mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-04-08 00:18:15 +02:00
568 lines
14 KiB
ObjectPascal
568 lines
14 KiB
ObjectPascal
{
|
|
***************************************************************************
|
|
* *
|
|
* This source is free software; you can redistribute it and/or modify *
|
|
* it under the terms of the GNU General Public License as published by *
|
|
* the Free Software Foundation; either version 2 of the License, or *
|
|
* (at your option) any later version. *
|
|
* *
|
|
* This code is distributed in the hope that it will be useful, but *
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of *
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
|
|
* General Public License for more details. *
|
|
* *
|
|
* A copy of the GNU General Public License is available on the World *
|
|
* Wide Web at <http://www.gnu.org/copyleft/gpl.html>. You can also *
|
|
* obtain it by writing to the Free Software Foundation, *
|
|
* Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1335, USA. *
|
|
* *
|
|
***************************************************************************
|
|
|
|
Author: Mattias Gaertner
|
|
|
|
Abstract:
|
|
function for testing if a xml fragment is correct for fpdoc
|
|
and if not to automatically repair it using heuristics.
|
|
}
|
|
unit CTXMLFixFragment;
|
|
|
|
{$mode objfpc}{$H+}
|
|
|
|
interface
|
|
|
|
uses
|
|
Classes, SysUtils, FileProcs, contnrs, BasicCodeTools;
|
|
|
|
type
|
|
TFixFPDocFlag = (
|
|
fffVerbose, // write debugln to stdout
|
|
fffRemoveWrongCloseTags // remove all close tags which were not open, default: convert to text
|
|
);
|
|
TFixFPDocFlags = set of TFixFPDocFlag;
|
|
PObjectList = ^TObjectList;
|
|
|
|
procedure FixFPDocFragment(var Fragment: string;
|
|
AllowTags, // for attribute values set this to false, so that all < are converted
|
|
Fix: boolean; // fix errors using heuristics creating valid xml
|
|
ErrorList: PObjectList = nil;
|
|
Flags: TFixFPDocFlags = []);
|
|
procedure FixFPDocAttributeValue(var Value: string);
|
|
|
|
implementation
|
|
|
|
const
|
|
LowerAlpha = ['a'..'z'];
|
|
UpperAlpha = ['A'..'Z'];
|
|
Alpha = LowerAlpha+UpperAlpha;
|
|
Digit = ['0'..'9'];
|
|
InvalidChars = [#0..#8,#11,#12,#14..#31,#127];
|
|
Space = [' ',#9,#10,#13];
|
|
|
|
type
|
|
TFPDocFragmentError = class
|
|
public
|
|
ErrorPos: integer;
|
|
Msg: string;
|
|
end;
|
|
|
|
procedure FixFPDocFragment(var Fragment: string; AllowTags, Fix: boolean; ErrorList: PObjectList;
|
|
Flags: TFixFPDocFlags);
|
|
{ - Fix all tags to lowercase to reduce git commits
|
|
- auto close comments
|
|
- remove #0 from comments
|
|
- convert special characters to &x;
|
|
- fix &name; lower case
|
|
- fix unclosed attribute values
|
|
- auto close unclosed tags
|
|
}
|
|
type
|
|
TStackItemTyp = (
|
|
sitTag,
|
|
sitComment
|
|
);
|
|
TStackItem = record
|
|
Typ: TStackItemTyp;
|
|
StartPos: integer; // position of <
|
|
EndPos: integer; // position behind >
|
|
CloseStartPos: integer; // position of <
|
|
end;
|
|
PStackItem = ^TStackItem;
|
|
var
|
|
Stack: PStackItem;
|
|
Capacity: integer;
|
|
Top: integer;
|
|
TopItem: PStackItem;
|
|
p: PChar;
|
|
|
|
function Rel(pt: PChar): integer;
|
|
begin
|
|
Result:=pt-PChar(Fragment)+1;
|
|
end;
|
|
|
|
function LineCol(pt: integer): string;
|
|
var
|
|
Line: Integer;
|
|
Col: Integer;
|
|
begin
|
|
SrcPosToLineCol(Fragment,pt,Line,Col);
|
|
Result:=IntToStr(Line)+','+IntToStr(Col);
|
|
end;
|
|
|
|
procedure Error(ErrorPos: PChar; ErrorMsg: string);
|
|
var
|
|
NewError: TFPDocFragmentError;
|
|
LineStart,LineEnd: integer;
|
|
begin
|
|
if fffVerbose in Flags then begin
|
|
debugln(['Error at ',LineCol(Rel(ErrorPos)),': ',ErrorMsg]);
|
|
GetLineStartEndAtPosition(Fragment,Rel(ErrorPos),LineStart,LineEnd);
|
|
debugln([' Line: ',copy(Fragment,LineStart,Rel(ErrorPos)-LineStart),'|',
|
|
copy(Fragment,Rel(ErrorPos),LineEnd-Rel(ErrorPos)+1)]);
|
|
end;
|
|
if not Fix then exit;
|
|
if ErrorList=nil then exit;
|
|
if ErrorList^=nil then
|
|
ErrorList^:=TObjectList.Create(true);
|
|
NewError:=TFPDocFragmentError.Create;
|
|
NewError.ErrorPos:=Rel(ErrorPos);
|
|
NewError.Msg:=ErrorMsg;
|
|
ErrorList^.Add(NewError);
|
|
end;
|
|
|
|
procedure Replace(StartPos, Len: integer; const NewTxt: string);
|
|
var
|
|
i: Integer;
|
|
Item: PStackItem;
|
|
Diff: Integer;
|
|
OldP: integer;
|
|
begin
|
|
OldP:=Rel(p);
|
|
Fragment:=copy(Fragment,1,StartPos-1)+NewTxt
|
|
+copy(Fragment,StartPos+Len,length(Fragment));
|
|
Diff:=length(NewTxt)-Len;
|
|
if Diff<>0 then begin
|
|
// adjust positions
|
|
if OldP>StartPos then
|
|
inc(OldP,Diff);
|
|
for i:=0 to Top do begin
|
|
Item:=@Stack[i];
|
|
if Item^.StartPos>StartPos then inc(Item^.StartPos,Diff);
|
|
if Item^.EndPos>StartPos then inc(Item^.EndPos,Diff);
|
|
if Item^.CloseStartPos>StartPos then inc(Item^.CloseStartPos,Diff);
|
|
end;
|
|
end;
|
|
p:=PChar(Fragment)+OldP-1;
|
|
//debugln(['Replace ',dbgstr(copy(Fragment,1,Rel(p)-1)),'|',dbgstr(copy(Fragment,Rel(p),length(Fragment)))]);
|
|
end;
|
|
|
|
procedure HandleSpecialChar;
|
|
var
|
|
c: char;
|
|
begin
|
|
c:=p^;
|
|
Error(p,'invalid character '+dbgstr(c));
|
|
if Fix then begin
|
|
case p^ of
|
|
#0..#31,#127:
|
|
// delete
|
|
Replace(Rel(p),1,'');
|
|
'<': Replace(Rel(p),1,'<');
|
|
'>': Replace(Rel(p),1,'>');
|
|
'&': Replace(Rel(p),1,'&');
|
|
'''': Replace(Rel(p),1,''');
|
|
'"': Replace(Rel(p),1,'"');
|
|
else
|
|
// convert
|
|
Replace(Rel(p),1,'&'+IntToStr(ord(c))+';');
|
|
end;
|
|
end
|
|
else begin
|
|
// skip
|
|
inc(p);
|
|
end;
|
|
end;
|
|
|
|
procedure Push(Typ: TStackItemTyp);
|
|
begin
|
|
inc(Top);
|
|
if Top=Capacity then begin
|
|
inc(Capacity);
|
|
ReAllocMem(Stack,SizeOf(TStackItem)*Capacity);
|
|
end;
|
|
TopItem:=@Stack[Top];
|
|
FillByte(TopItem^,SizeOf(TStackItem),0);
|
|
TopItem^.Typ:=Typ;
|
|
TopItem^.StartPos:=p-PChar(Fragment);
|
|
end;
|
|
|
|
procedure Pop;
|
|
begin
|
|
if Top<0 then raise Exception.Create('bug');
|
|
dec(Top);
|
|
if Top>=0 then
|
|
TopItem:=@Stack[Top]
|
|
else
|
|
TopItem:=nil;
|
|
end;
|
|
|
|
procedure ParseComment;
|
|
begin
|
|
// comment
|
|
Push(sitComment);
|
|
inc(p,4);
|
|
// parse comment
|
|
repeat
|
|
if p^ in InvalidChars then begin
|
|
// invalid character in comment => delete
|
|
if (p^=#0) and (p-PChar(Fragment)=length(Fragment)) then
|
|
begin
|
|
// reached end of fragment => close comment
|
|
Error(p,'comment end not found, start at '+LineCol(TopItem^.StartPos));
|
|
if Fix then begin
|
|
Replace(Rel(p),0,'-->');
|
|
inc(p,3);
|
|
end;
|
|
break;
|
|
end else begin
|
|
// invalid #0 character in comment => delete
|
|
Error(p,'invalid character');
|
|
if Fix then
|
|
Replace(Rel(p),1,'')
|
|
else
|
|
inc(p);
|
|
end;
|
|
end else if (p^='-') and (p[1]='-') and (p[2]='>') then
|
|
begin
|
|
// comment end found
|
|
inc(p,3);
|
|
break;
|
|
end else
|
|
inc(p);
|
|
until false;
|
|
Pop;
|
|
end;
|
|
|
|
procedure ParseAmpersand;
|
|
var
|
|
AmpPos: PChar;
|
|
i: Integer;
|
|
NeedLowercase: PChar;
|
|
len: integer;
|
|
begin
|
|
AmpPos:=p;
|
|
// & or &name; or &decimal;
|
|
case p[1] of
|
|
'0'..'9':
|
|
begin
|
|
// decimal number
|
|
inc(p);
|
|
i:=ord(p^)-ord('0');
|
|
while p^ in Digit do
|
|
begin
|
|
i:=i+10+ord(p^)-ord('0');
|
|
if i>$10FFFF then
|
|
break;
|
|
inc(p);
|
|
end;
|
|
if p^=';' then
|
|
begin
|
|
// ok
|
|
inc(p);
|
|
exit;
|
|
end;
|
|
end;
|
|
'a'..'z','A'..'Z':
|
|
begin
|
|
// name
|
|
inc(p);
|
|
NeedLowercase:=nil;
|
|
while p^ in Alpha do begin
|
|
if (NeedLowercase=nil) and (p^ in UpperAlpha) then
|
|
NeedLowercase:=p;
|
|
inc(p);
|
|
end;
|
|
if p^=';' then begin
|
|
if NeedLowercase<>nil then begin
|
|
Error(NeedLowercase,'character name must be lower case');
|
|
if Fix then begin
|
|
len:=(p-AmpPos)-1;
|
|
Replace(Rel(AmpPos)+1,len,lowercase(copy(Fragment,Rel(AmpPos)+1,len)));
|
|
end else
|
|
inc(p);
|
|
end else begin
|
|
// ok
|
|
inc(p);
|
|
end;
|
|
exit;
|
|
end;
|
|
end;
|
|
end;
|
|
p:=AmpPos;
|
|
// invalid character => convert or skip
|
|
HandleSpecialChar;
|
|
end;
|
|
|
|
procedure LowerCaseName;
|
|
var
|
|
Start: PChar;
|
|
NeedLowercase: PChar;
|
|
begin
|
|
Start:=p;
|
|
NeedLowercase:=nil;
|
|
repeat
|
|
case p^ of
|
|
'a'..'z': inc(p);
|
|
'A'..'Z':
|
|
begin
|
|
if NeedLowercase=nil then
|
|
NeedLowercase:=p;
|
|
inc(p);
|
|
end;
|
|
else break;
|
|
end;
|
|
until false;
|
|
if NeedLowercase<>nil then begin
|
|
Error(NeedLowercase,'names must be lower case');
|
|
if Fix then
|
|
Replace(Rel(Start),p-Start,lowercase(copy(Fragment,Rel(Start),p-Start)));
|
|
end;
|
|
end;
|
|
|
|
procedure ParseAttribute;
|
|
var
|
|
Quot: Char;
|
|
begin
|
|
// read attribute name
|
|
LowerCaseName;
|
|
while p^ in Space do inc(p); // skip space
|
|
if p^<>'=' then begin
|
|
// missing value
|
|
Error(p,'expected =');
|
|
if Fix then
|
|
Replace(Rel(p),0,'=');
|
|
end;
|
|
if p^='=' then begin
|
|
// read equal
|
|
inc(p);
|
|
while p^ in Space do inc(p); // skip space
|
|
if not (p^ in ['"','''']) then begin
|
|
// missing quote
|
|
Error(p,'expected "');
|
|
if Fix then
|
|
Replace(Rel(p),0,'"');
|
|
end;
|
|
end;
|
|
if p^ in ['"',''''] then begin
|
|
Quot:=p^;
|
|
// read value
|
|
inc(p);
|
|
repeat
|
|
case p^ of
|
|
'>',#0..#8,#11,#12,#14..#31,#127:
|
|
begin
|
|
// the > is not allowed in the quotes, probably the ending quot is missing
|
|
Error(p,'expected ending '+Quot);
|
|
if Fix then
|
|
Replace(Rel(p),0,Quot)
|
|
else
|
|
break;
|
|
end;
|
|
'&':
|
|
ParseAmpersand;
|
|
else
|
|
if p^=Quot then
|
|
begin
|
|
inc(p);
|
|
break;
|
|
end;
|
|
inc(p);
|
|
end;
|
|
until false;
|
|
end;
|
|
end;
|
|
|
|
procedure ParseCloseTag;
|
|
var
|
|
i: LongInt;
|
|
EndP: PChar;
|
|
begin
|
|
if (p^<>'<') or (p[1]<>'/') or (not (p[2] in Alpha)) then
|
|
begin
|
|
HandleSpecialChar;
|
|
exit;
|
|
end;
|
|
// p stands on < of </tagname>
|
|
i:=Top;
|
|
while (i>=0)
|
|
and (CompareIdentifiers(p+2,@Fragment[Stack[i].StartPos+1])<>0) do
|
|
dec(i);
|
|
if i<0 then begin
|
|
// no corresponding open tag
|
|
Error(p,'tag was never opened');
|
|
if fffRemoveWrongCloseTags in Flags then begin
|
|
EndP:=p+2;
|
|
while EndP^ in Alpha do inc(EndP);
|
|
if EndP='>' then
|
|
inc(EndP);
|
|
Replace(Rel(p),EndP-p,'');
|
|
end else
|
|
HandleSpecialChar;
|
|
exit;
|
|
end;
|
|
TopItem^.CloseStartPos:=Rel(p);
|
|
inc(p,2); // skip </
|
|
// read name
|
|
LowerCaseName;
|
|
// close unclosed sub tag
|
|
i:=Top;
|
|
while CompareIdentifiers(@Fragment[Stack[i].StartPos+1],
|
|
@Fragment[TopItem^.CloseStartPos+2])<>0
|
|
do begin
|
|
Error(@Fragment[TopItem^.StartPos],'tag must be closed');
|
|
if Fix then
|
|
Replace(TopItem^.EndPos-1,0,'/');
|
|
Pop;
|
|
dec(i);
|
|
end;
|
|
// skip space
|
|
while (p^ in Space) do inc(p);
|
|
if p^='/' then begin
|
|
// uneeded close /
|
|
Error(p,'invalid close /');
|
|
if Fix then
|
|
Replace(Rel(p),1,'')
|
|
else
|
|
inc(p);
|
|
end;
|
|
if p^<>'>' then begin
|
|
Error(p,'missing >');
|
|
if Fix then
|
|
Replace(Rel(p),0,'>');
|
|
end;
|
|
if p^='>' then inc(p);
|
|
// close tag
|
|
Pop;
|
|
end;
|
|
|
|
procedure ParseOpenTag;
|
|
begin
|
|
Push(sitTag);
|
|
TopItem^.StartPos:=Rel(p);
|
|
inc(p);
|
|
LowerCaseName;
|
|
repeat
|
|
while p^ in Space do inc(p); // skip space
|
|
case p^ of
|
|
'a'..'z','A'..'Z':
|
|
ParseAttribute;
|
|
'/':
|
|
begin
|
|
// end and close tag
|
|
inc(p);
|
|
if p^<>'>' then begin
|
|
Error(p,'expected >');
|
|
if Fix then begin
|
|
Replace(Rel(p),0,'>');
|
|
inc(p);
|
|
end;
|
|
end;
|
|
inc(p);
|
|
Pop;
|
|
exit;
|
|
end;
|
|
'>':
|
|
begin
|
|
// end tag
|
|
inc(p);
|
|
TopItem^.EndPos:=Rel(p);
|
|
exit;
|
|
end;
|
|
else
|
|
// invalid character => end tag
|
|
Error(p,'expected >');
|
|
if Fix then begin
|
|
Replace(Rel(p),0,'>');
|
|
inc(p);
|
|
end;
|
|
exit;
|
|
end;
|
|
until false;
|
|
end;
|
|
|
|
procedure ParseLowerThan;
|
|
begin
|
|
// comment, tag or 'lower than'
|
|
if not AllowTags then begin
|
|
// invalid character => convert or skip
|
|
HandleSpecialChar;
|
|
end else if (p[1]='!') and (p[2]='-') and (p[3]='-') then begin
|
|
// comment
|
|
ParseComment;
|
|
end else if p[1] in Alpha then begin
|
|
// start tag
|
|
ParseOpenTag;
|
|
end else if p[1]='/' then begin
|
|
// close tag
|
|
ParseCloseTag;
|
|
end else
|
|
// invalid character => convert or skip
|
|
HandleSpecialChar;
|
|
end;
|
|
|
|
begin
|
|
if Fragment='' then exit;
|
|
Top:=-1;
|
|
TopItem:=nil;
|
|
Capacity:=0;
|
|
Stack:=nil;
|
|
try
|
|
p:=PChar(Fragment);
|
|
repeat
|
|
case p^ of
|
|
#0..#8,#11,#12,#14..#31,#127:
|
|
begin
|
|
if (p^=#0) and (p-PChar(Fragment)>=length(Fragment)) then
|
|
begin
|
|
// reached end of fragment
|
|
break;
|
|
end else begin
|
|
// invalid character => convert or skip
|
|
HandleSpecialChar;
|
|
end;
|
|
end;
|
|
'<':
|
|
ParseLowerThan;
|
|
'>':
|
|
// invalid character => convert or skip
|
|
HandleSpecialChar;
|
|
'&':
|
|
ParseAmpersand;
|
|
'"','''':
|
|
if not AllowTags then
|
|
HandleSpecialChar
|
|
else
|
|
inc(p);
|
|
else
|
|
inc(p);
|
|
end;
|
|
until false;
|
|
while Top>=0 do begin
|
|
// fix unclosed tags
|
|
Error(@Fragment[TopItem^.StartPos],'tag must be closed');
|
|
if Fix then
|
|
Replace(TopItem^.EndPos-1,0,'/');
|
|
Pop;
|
|
end;
|
|
finally
|
|
ReAllocMem(Stack,0);
|
|
end;
|
|
end;
|
|
|
|
procedure FixFPDocAttributeValue(var Value: string);
|
|
begin
|
|
FixFPDocFragment(Value,false,true);
|
|
end;
|
|
|
|
end.
|
|
|