fpdoc repair: close tags

git-svn-id: trunk@28522 -
This commit is contained in:
mattias 2010-11-28 11:48:28 +00:00
parent fa2ce85234
commit 87922a9a0b
2 changed files with 235 additions and 121 deletions

View File

@ -92,9 +92,12 @@ function IsValidIdentPair(const NamePair: string;
out First, Second: string): boolean; out First, Second: string): boolean;
// line/code ends // line/code ends
procedure GetLineStartEndAtPosition(const Source:string; Position:integer; function SrcPosToLineCol(const s: string; Position: integer;
out LineStart,LineEnd:integer); out Line, Col: integer): boolean;
function GetLineStartPosition(const Source:string; Position:integer): integer; procedure GetLineStartEndAtPosition(const Source: string; Position:integer;
out LineStart,LineEnd:integer); // LineEnd at first line break character
function GetLineStartPosition(const Source: string; Position:integer): integer;
function GetLineInSrc(const Source: string; Position:integer): string;
function LineEndCount(const Txt: string): integer; inline; function LineEndCount(const Txt: string): integer; inline;
function LineEndCount(const Txt: string; out LengthOfLastLine:integer): integer; inline; function LineEndCount(const Txt: string; out LengthOfLastLine:integer): integer; inline;
function LineEndCount(const Txt: string; StartPos, EndPos: integer; function LineEndCount(const Txt: string; StartPos, EndPos: integer;
@ -122,8 +125,6 @@ function FindFirstLineEndAfterInCode(const Source: string;
function ChompLineEndsAtEnd(const s: string): string; function ChompLineEndsAtEnd(const s: string): string;
function ChompOneLineEndAtEnd(const s: string): string; function ChompOneLineEndAtEnd(const s: string): string;
function TrimLineEnds(const s: string; TrimStart, TrimEnd: boolean): string; function TrimLineEnds(const s: string; TrimStart, TrimEnd: boolean): string;
function SrcPosToLineCol(const s: string; Position: integer;
out Line, Col: integer): boolean;
// brackets // brackets
function GetBracketLvl(const Src: string; StartPos, EndPos: integer; function GetBracketLvl(const Src: string; StartPos, EndPos: integer;
@ -2782,6 +2783,7 @@ end;
function SrcPosToLineCol(const s: string; Position: integer; function SrcPosToLineCol(const s: string; Position: integer;
out Line, Col: integer): boolean; out Line, Col: integer): boolean;
// returns false if Postion<1 or >length(s)+1
var var
p: LongInt; p: LongInt;
l: Integer; l: Integer;
@ -2816,7 +2818,7 @@ begin
inc(Col); inc(Col);
end; end;
end; end;
if p=Position then Result:=true; Result:=p=Position;
end; end;
function GetBracketLvl(const Src: string; StartPos, EndPos: integer; function GetBracketLvl(const Src: string; StartPos, EndPos: integer;
@ -4082,6 +4084,14 @@ begin
dec(Result); dec(Result);
end; end;
function GetLineInSrc(const Source: string; Position: integer): string;
var
LineStart, LineEnd: integer;
begin
GetLineStartEndAtPosition(Source,Position,LineStart,LineEnd);
Result:=copy(Source,LineStart,LineEnd);
end;
function LineEndCount(const Txt: string): integer; function LineEndCount(const Txt: string): integer;
var var
LengthOfLastLine: integer; LengthOfLastLine: integer;

View File

@ -6,7 +6,7 @@ interface
uses uses
Classes, SysUtils, FileUtil, Forms, Controls, Graphics, Dialogs, LCLProc, Classes, SysUtils, FileUtil, Forms, Controls, Graphics, Dialogs, LCLProc,
contnrs, XMLRead; contnrs, BasicCodeTools, XMLRead;
type type
@ -15,17 +15,23 @@ type
TForm1 = class(TForm) TForm1 = class(TForm)
procedure FormCreate(Sender: TObject); procedure FormCreate(Sender: TObject);
private private
FVerbose: boolean;
public public
procedure TestComment; procedure TestComment;
procedure TestInvalidCharacters; procedure TestInvalidCharacters;
procedure TestOpenTag;
procedure TestAttribute;
procedure TestCloseTag;
procedure TestBugReports;
function Test(Title, Fragment, FixedFragment: string): boolean; function Test(Title, Fragment, FixedFragment: string): boolean;
property Verbose: boolean read FVerbose write FVerbose;
end; end;
var var
Form1: TForm1; Form1: TForm1;
procedure FixFPDocFragment(var Fragment: string; Fix: boolean; procedure FixFPDocFragment(var Fragment: string; Fix: boolean;
out ErrorList: TObjectList); out ErrorList: TObjectList; Verbose: boolean = false);
implementation implementation
@ -39,7 +45,7 @@ type
end; end;
procedure FixFPDocFragment(var Fragment: string; Fix: boolean; procedure FixFPDocFragment(var Fragment: string; Fix: boolean;
out ErrorList: TObjectList); out ErrorList: TObjectList; Verbose: boolean);
{ - Fix all tags to lowercase to reduce svn commits { - Fix all tags to lowercase to reduce svn commits
- auto close comments - auto close comments
- remove #0 from comments - remove #0 from comments
@ -55,10 +61,9 @@ type
); );
TStackItem = record TStackItem = record
Typ: TStackItemTyp; Typ: TStackItemTyp;
StartPos: integer; StartPos: integer; // position of <
NameStartPos: integer; EndPos: integer; // position behind >
NameEndPos: integer; CloseStartPos: integer; // position of <
EndPos: integer;
end; end;
PStackItem = ^TStackItem; PStackItem = ^TStackItem;
var var
@ -77,37 +82,23 @@ var
var var
Line: Integer; Line: Integer;
Col: Integer; Col: Integer;
i: Integer;
begin begin
Line:=1; SrcPosToLineCol(Fragment,pt,Line,Col);
Col:=1;
if pt>length(Fragment) then pt:=length(Fragment)+1;
i:=1;
while i<pt do begin
case Fragment[i] of
#10,#13:
begin
inc(Line);
inc(i);
if (i<=length(Fragment)) and (Fragment[i] in [#10,#13])
and (Fragment[i-1]<>Fragment[i])
then
inc(i);
Col:=1;
end;
else
inc(Col);
end;
inc(i);
end;
Result:=IntToStr(Line)+','+IntToStr(Col); Result:=IntToStr(Line)+','+IntToStr(Col);
end; end;
procedure Error(ErrorPos: PChar; ErrorMsg: string); procedure Error(ErrorPos: PChar; ErrorMsg: string);
var var
NewError: TFPDocFragmentError; NewError: TFPDocFragmentError;
LineStart,LineEnd: integer;
begin begin
debugln(['Error ',ErrorMsg]); if Verbose then begin
debugln(['Error at ',LineCol(Rel(ErrorPos)),': ',ErrorMsg]);
GetLineStartEndAtPosition(Fragment,Rel(ErrorPos),LineStart,LineEnd);
debugln([' Line: ',copy(Fragment,LineStart,Rel(ErrorPos)-LineStart),'|',
copy(Fragment,Rel(ErrorPos),LineEnd-Rel(ErrorPos)+1)]);
end;
if not Fix then exit;
if ErrorList=nil then if ErrorList=nil then
ErrorList:=TObjectList.Create(true); ErrorList:=TObjectList.Create(true);
NewError:=TFPDocFragmentError.Create; NewError:=TFPDocFragmentError.Create;
@ -135,19 +126,19 @@ var
Item:=@Stack[i]; Item:=@Stack[i];
if Item^.StartPos>StartPos then inc(Item^.StartPos,Diff); if Item^.StartPos>StartPos then inc(Item^.StartPos,Diff);
if Item^.EndPos>StartPos then inc(Item^.EndPos,Diff); if Item^.EndPos>StartPos then inc(Item^.EndPos,Diff);
if Item^.NameStartPos>StartPos then inc(Item^.NameStartPos,Diff); if Item^.CloseStartPos>StartPos then inc(Item^.CloseStartPos,Diff);
if Item^.NameEndPos>StartPos then inc(Item^.NameEndPos,Diff);
end; end;
end; end;
p:=PChar(Fragment)+OldP-1; p:=PChar(Fragment)+OldP-1;
debugln(['Replace ',dbgstr(copy(Fragment,1,Rel(p)-1)),'|',dbgstr(copy(Fragment,Rel(p),length(Fragment)))]); //debugln(['Replace ',dbgstr(copy(Fragment,1,Rel(p)-1)),'|',dbgstr(copy(Fragment,Rel(p),length(Fragment)))]);
end; end;
procedure HandleSpecialChar; procedure HandleSpecialChar;
var var
c: Integer; c: char;
begin begin
c:=ord(p^); c:=p^;
Error(p,'invalid character '+dbgstr(c));
if Fix then begin if Fix then begin
case p^ of case p^ of
#0..#31,#127: #0..#31,#127:
@ -160,12 +151,11 @@ var
'"': Replace(Rel(p),1,'&quot;'); '"': Replace(Rel(p),1,'&quot;');
else else
// convert // convert
Replace(Rel(p),1,'&'+IntToStr(c)+';'); Replace(Rel(p),1,'&'+IntToStr(ord(c))+';');
end; end;
end end
else begin else begin
// skip // skip
Error(p,'invalid character #'+IntToStr(c));
inc(p); inc(p);
end; end;
end; end;
@ -187,6 +177,10 @@ var
begin begin
if Top<0 then raise Exception.Create('bug'); if Top<0 then raise Exception.Create('bug');
dec(Top); dec(Top);
if Top>=0 then
TopItem:=@Stack[Top]
else
TopItem:=nil;
end; end;
procedure ParseComment; procedure ParseComment;
@ -201,20 +195,19 @@ var
if (p^=#0) and (p-PChar(Fragment)=length(Fragment)) then if (p^=#0) and (p-PChar(Fragment)=length(Fragment)) then
begin begin
// reached end of fragment => close comment // reached end of fragment => close comment
Error(p,'comment end not found, start at '+LineCol(TopItem^.StartPos));
if Fix then begin if Fix then begin
Replace(Rel(p),0,'-->'); Replace(Rel(p),0,'-->');
inc(p,3); inc(p,3);
end else end;
Error(p,'comment end not found, start at '+LineCol(TopItem^.StartPos));
break; break;
end else begin end else begin
// invalid #0 character in comment => delete // invalid #0 character in comment => delete
Error(p,'invalid character');
if Fix then if Fix then
Replace(Rel(p),1,'') Replace(Rel(p),1,'')
else begin else
Error(p,'invalid character');
inc(p); inc(p);
end;
end; end;
end else if (p^='-') and (p[1]='-') and (p[2]='>') then end else if (p^='-') and (p[1]='-') and (p[2]='>') then
begin begin
@ -268,13 +261,12 @@ var
end; end;
if p^=';' then begin if p^=';' then begin
if NeedLowercase<>nil then begin if NeedLowercase<>nil then begin
Error(NeedLowercase,'character name must be lower case');
if Fix then begin if Fix then begin
len:=(p-AmpPos)-1; len:=(p-AmpPos)-1;
Replace(Rel(AmpPos)+1,len,lowercase(copy(Fragment,Rel(AmpPos)+1,len))); Replace(Rel(AmpPos)+1,len,lowercase(copy(Fragment,Rel(AmpPos)+1,len)));
end else begin end else
Error(NeedLowercase,'special character name is not lower case');
inc(p); inc(p);
end;
end else begin end else begin
// ok // ok
inc(p); inc(p);
@ -300,124 +292,194 @@ var
'a'..'z': inc(p); 'a'..'z': inc(p);
'A'..'Z': 'A'..'Z':
begin begin
inc(p);
if NeedLowercase=nil then if NeedLowercase=nil then
NeedLowercase:=p; NeedLowercase:=p;
inc(p);
end; end;
else break; else break;
end; end;
until false; until false;
if NeedLowercase<>nil then begin if NeedLowercase<>nil then begin
if Fix then begin Error(NeedLowercase,'names must be lower case');
if Fix then
Replace(Rel(Start),p-Start,lowercase(copy(Fragment,Rel(Start),p-Start))); Replace(Rel(Start),p-Start,lowercase(copy(Fragment,Rel(Start),p-Start)));
end else begin
// all current tags must be lower case
Error(NeedLowercase,'tags must be lower case');
end;
end; end;
end; end;
procedure ParseAttribute; procedure ParseAttribute;
var
Quot: Char;
begin begin
// attribute name // read attribute name
LowerCaseName; LowerCaseName;
while p^ in [' ',#9,#10,#13] do inc(p); // skip space while p^ in [' ',#9,#10,#13] do inc(p); // skip space
if p^<>'=' then begin if p^<>'=' then begin
// missing value // missing value
if Fix then begin Error(p,'expected =');
if Fix then
Replace(Rel(p),0,'='); Replace(Rel(p),0,'=');
end else begin
Error(p,'expected =');
end;
end; end;
if p^='=' then begin if p^='=' then begin
// read equal
inc(p); inc(p);
while p^ in [' ',#9,#10,#13] do inc(p); // skip space while p^ in [' ',#9,#10,#13] do inc(p); // skip space
if p^<>'"' then begin if not (p^ in ['"','''']) then begin
// missing quotes // missing quote
if Fix then begin Error(p,'expected "');
if Fix then
Replace(Rel(p),0,'"'); Replace(Rel(p),0,'"');
end else begin
Error(p,'expected "');
end;
end; end;
end; end;
if p^='"' then begin if p^ in ['"',''''] then begin
Quot:=p^;
// read value // read value
inc(p); inc(p);
repeat repeat
case p^ of case p^ of
'>',#0..#8,#11,#12,#14..#31,#127: '>',#0..#8,#11,#12,#14..#31,#127:
// the > is not allowed in the quotes, probably the ending quot is missing begin
if Fix then begin // the > is not allowed in the quotes, probably the ending quot is missing
Replace(Rel(p),0,'"'); Error(p,'expected ending '+Quot);
end else begin if Fix then
Error(p,'expected ending "'); Replace(Rel(p),0,Quot)
break; else
break;
end; end;
'&': '&':
ParseAmpersand; ParseAmpersand;
'"': else
if p^=Quot then
begin begin
inc(p); inc(p);
break; break;
end end;
else
inc(p); inc(p);
end; end;
until false; until false;
end; end;
end; end;
procedure ParseCloseTag;
var
i: LongInt;
begin
if (p^<>'<') or (p[1]<>'/') or (not (p[2] in ['a'..'z','A'..'Z'])) then
begin
HandleSpecialChar;
exit;
end;
// p stands on < of </tagname>
i:=Top;
while (i>=0)
and (CompareIdentifiers(p+2,@Fragment[Stack[i].StartPos+1])<>0) do
dec(i);
if i<0 then begin
// no corresponding open tag
Error(p,'tag was never opened');
HandleSpecialChar;
exit;
end;
TopItem^.CloseStartPos:=Rel(p);
inc(p,2); // skip </
// read name
LowerCaseName;
// close unclosed sub tag
i:=Top;
while CompareIdentifiers(@Fragment[Stack[i].StartPos+1],
@Fragment[TopItem^.CloseStartPos+2])<>0
do begin
Error(@Fragment[TopItem^.StartPos],'tag must be closed');
if Fix then
Replace(TopItem^.EndPos-1,0,'/');
Pop;
dec(i);
end;
// skip space
while (p^ in [' ',#9,#10,#13]) do inc(p);
if p^='/' then begin
// uneeded close /
Error(p,'invalid close /');
if Fix then
Replace(Rel(p),1,'')
else
inc(p);
end;
if p^<>'>' then begin
Error(p,'missing >');
if Fix then
Replace(Rel(p),0,'>');
end;
if p^='>' then inc(p);
// close tag
Pop;
end;
procedure ParseOpenTag;
begin
Push(sitTag);
TopItem^.StartPos:=Rel(p);
inc(p);
LowerCaseName;
repeat
while p^ in [' ',#9,#10,#13] do inc(p); // skip space
case p^ of
'a'..'z','A'..'Z':
ParseAttribute;
'/':
begin
// end and close tag
inc(p);
if p^<>'>' then begin
Error(p,'expected >');
if Fix then begin
Replace(Rel(p),0,'>');
inc(p);
end;
end;
Pop;
exit;
end;
'>':
begin
// end tag
inc(p);
TopItem^.EndPos:=Rel(p);
exit;
end;
else
// invalid character => end tag
Error(p,'expected >');
if Fix then begin
Replace(Rel(p),0,'>');
inc(p);
end;
exit;
end;
until false;
end;
procedure ParseLowerThan; procedure ParseLowerThan;
begin begin
// comment, tag or 'lower than' // comment, tag or 'lower than'
if (p[1]='!') and (p[2]='-') and (p[3]='-') then begin if (p[1]='!') and (p[2]='-') and (p[3]='-') then begin
// comment // comment
ParseComment; ParseComment;
exit; end else if p[1] in ['a'..'z','A'..'Z'] then begin
end; // start tag
if p[1] in ['a'..'z','A'..'Z'] then begin ParseOpenTag;
// open tag
Push(sitTag);
TopItem^.StartPos:=Rel(p);
inc(p);
TopItem^.NameStartPos:=Rel(p);
LowerCaseName;
TopItem^.NameEndPos:=Rel(p);
while p^ in [' ',#9,#10,#13] do inc(p); // skip space
case p^ of
'a'..'z','A'..'Z':
ParseAttribute;
'/':
begin
inc(p);
if p^<>'>' then begin
if Fix then begin
Replace(Rel(p),0,'>');
end else begin
Error(p,'expected >');
end;
end;
exit;
end;
'>':
begin
// ToDo:
end;
end;
end else if p[1]='/' then begin end else if p[1]='/' then begin
// ToDo: close tag // close tag
RaiseGDBException('ToDo'); ParseCloseTag;
end; end else
// invalid character => convert or skip // invalid character => convert or skip
HandleSpecialChar; HandleSpecialChar;
end; end;
begin begin
ErrorList:=nil; ErrorList:=nil;
if Fragment='' then exit; if Fragment='' then exit;
Top:=-1; Top:=-1;
TopItem:=nil;
Capacity:=0; Capacity:=0;
Stack:=nil; Stack:=nil;
try try
@ -446,9 +508,12 @@ begin
inc(p); inc(p);
end; end;
until false; until false;
if Top>=0 then begin while Top>=0 do begin
// ToDo: fix unclosed tags // fix unclosed tags
debugln(['FixFPDocFragment ToDo: fix unclosed tags']); Error(@Fragment[TopItem^.StartPos],'tag must be closed');
if Fix then
Replace(TopItem^.EndPos-1,0,'/');
Pop;
end; end;
finally finally
ReAllocMem(Stack,0); ReAllocMem(Stack,0);
@ -459,8 +524,13 @@ end;
procedure TForm1.FormCreate(Sender: TObject); procedure TForm1.FormCreate(Sender: TObject);
begin begin
//TestComment; Verbose:=false;
//TestInvalidCharacters; TestComment;
TestInvalidCharacters;
TestOpenTag;
TestAttribute;
TestCloseTag;
TestBugReports;
end; end;
procedure TForm1.TestComment; procedure TForm1.TestComment;
@ -472,10 +542,44 @@ end;
procedure TForm1.TestInvalidCharacters; procedure TForm1.TestInvalidCharacters;
begin begin
Test('delete special characters','A'#0'B'#1#127,'AB'); Test('delete special characters','A'#0'B'#1#127,'AB');
Test('replace tag characters','LT<GT>AMP&','LT&lt;GT&gt;AMP&amp;'); Test('replace tag characters','LT< GT>AMP&','LT&lt; GT&gt;AMP&amp;');
Test('lower case special characters','&LT;','&lt;'); Test('lower case special characters','&LT;','&lt;');
end; end;
procedure TForm1.TestOpenTag;
begin
Test('missing tag name','<>','&lt;&gt;');
Test('lower case tag name','<A></a>','<a></a>');
Test('invalid character in tag','<a "></a>','<a >"&gt;</a>');
end;
procedure TForm1.TestAttribute;
begin
Test('lower case attribute name','<a Name=""></a>','<a name=""></a>');
Test('missing attribute equal','<a name ""></a>','<a name =""></a>');
Test('missing attribute value','<a name=></a>','<a name=""></a>');
Test('missing attribute quotes','<a name=1></a>','<a name="1"></a>');
Test('missing attribute ending quote','<a name="1></a>','<a name="1"></a>');
Test('invalid character in attribute value','<a name="&"></a>','<a name="&amp;"></a>');
Test('amp attribute value','<a name="&amp;"></a>','<a name="&amp;"></a>');
end;
procedure TForm1.TestCloseTag;
begin
Test('lower case close tag name','<a></A>','<a></a>');
Test('close open tag','<a>','<a/>');
Test('close open sub tag','<p><a></p>','<p><a/></p>');
Test('disable invalid close tag','</p>','&lt;/p&gt;');
end;
procedure TForm1.TestBugReports;
begin
Test('15120','operator <(TPoint, TPoint): Boolean',
'operator &lt;(TPoint, TPoint): Boolean');
Test('16671','<br>',
'<br/>');
end;
function TForm1.Test(Title, Fragment, FixedFragment: string): boolean; function TForm1.Test(Title, Fragment, FixedFragment: string): boolean;
var var
s: String; s: String;
@ -484,7 +588,7 @@ begin
Result:=true; Result:=true;
try try
s:=Fragment; s:=Fragment;
FixFPDocFragment(s,true,ErrorList); FixFPDocFragment(s,true,ErrorList,Verbose);
if s<>FixedFragment then begin if s<>FixedFragment then begin
Result:=false; Result:=false;
debugln(['failed: ',Title]); debugln(['failed: ',Title]);