mirror of
				https://gitlab.com/freepascal.org/lazarus/lazarus.git
				synced 2025-10-26 13:01:48 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			549 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			ObjectPascal
		
	
	
	
	
	
			
		
		
	
	
			549 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			ObjectPascal
		
	
	
	
	
	
| {
 | |
|  ***************************************************************************
 | |
|  *                                                                         *
 | |
|  *   This source is free software; you can redistribute it and/or modify   *
 | |
|  *   it under the terms of the GNU General Public License as published by  *
 | |
|  *   the Free Software Foundation; either version 2 of the License, or     *
 | |
|  *   (at your option) any later version.                                   *
 | |
|  *                                                                         *
 | |
|  *   This code is distributed in the hope that it will be useful, but      *
 | |
|  *   WITHOUT ANY WARRANTY; without even the implied warranty of            *
 | |
|  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU     *
 | |
|  *   General Public License for more details.                              *
 | |
|  *                                                                         *
 | |
|  *   A copy of the GNU General Public License is available on the World    *
 | |
|  *   Wide Web at <http://www.gnu.org/copyleft/gpl.html>. You can also      *
 | |
|  *   obtain it by writing to the Free Software Foundation,                 *
 | |
|  *   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.        *
 | |
|  *                                                                         *
 | |
|  ***************************************************************************
 | |
| 
 | |
|   Author: Mattias Gaertner
 | |
| 
 | |
|   Abstract:
 | |
|     function for testing if a xml fragment is correct for fpdoc
 | |
|     and if not to automatically repair it using heuristics.
 | |
| }
 | |
| unit CTXMLFixFragment;
 | |
| 
 | |
| {$mode objfpc}{$H+}
 | |
| 
 | |
| interface
 | |
| 
 | |
| uses
 | |
|   Classes, SysUtils, FileProcs, contnrs, BasicCodeTools;
 | |
| 
 | |
| type
 | |
|   PObjectList = ^TObjectList;
 | |
| 
 | |
| procedure FixFPDocFragment(var Fragment: string;
 | |
|   AllowTags,    // for attribute values set this to false, so that all < are converted
 | |
|   Fix: boolean; // fix errors using heuristics creating valid xml
 | |
|   ErrorList: PObjectList = nil;
 | |
|   Verbose: boolean = false // write debugln to stdout
 | |
|   );
 | |
| procedure FixFPDocAttributeValue(var Value: string);
 | |
| 
 | |
| implementation
 | |
| 
 | |
| 
 | |
| type
 | |
|   TFPDocFragmentError = class
 | |
|   public
 | |
|     ErrorPos: integer;
 | |
|     Msg: string;
 | |
|   end;
 | |
| 
 | |
| procedure FixFPDocFragment(var Fragment: string; AllowTags, Fix: boolean;
 | |
|   ErrorList: PObjectList; Verbose: boolean);
 | |
| { - Fix all tags to lowercase to reduce svn commits
 | |
|   - auto close comments
 | |
|   - remove #0 from comments
 | |
|   - convert special characters to &x;
 | |
|   - fix &name; lower case
 | |
|   - fix unclosed attribute values
 | |
|   - auto close unclosed tags
 | |
| }
 | |
| type
 | |
|   TStackItemTyp = (
 | |
|     sitTag,
 | |
|     sitComment
 | |
|     );
 | |
|   TStackItem = record
 | |
|     Typ: TStackItemTyp;
 | |
|     StartPos: integer; // position of <
 | |
|     EndPos: integer; // position behind >
 | |
|     CloseStartPos: integer; // position of <
 | |
|   end;
 | |
|   PStackItem = ^TStackItem;
 | |
| var
 | |
|   Stack: PStackItem;
 | |
|   Capacity: integer;
 | |
|   Top: integer;
 | |
|   TopItem: PStackItem;
 | |
|   p: PChar;
 | |
| 
 | |
|   function Rel(pt: PChar): integer;
 | |
|   begin
 | |
|     Result:=pt-PChar(Fragment)+1;
 | |
|   end;
 | |
| 
 | |
|   function LineCol(pt: integer): string;
 | |
|   var
 | |
|     Line: Integer;
 | |
|     Col: Integer;
 | |
|   begin
 | |
|     SrcPosToLineCol(Fragment,pt,Line,Col);
 | |
|     Result:=IntToStr(Line)+','+IntToStr(Col);
 | |
|   end;
 | |
| 
 | |
|   procedure Error(ErrorPos: PChar; ErrorMsg: string);
 | |
|   var
 | |
|     NewError: TFPDocFragmentError;
 | |
|     LineStart,LineEnd: integer;
 | |
|   begin
 | |
|     if Verbose then begin
 | |
|       debugln(['Error at ',LineCol(Rel(ErrorPos)),': ',ErrorMsg]);
 | |
|       GetLineStartEndAtPosition(Fragment,Rel(ErrorPos),LineStart,LineEnd);
 | |
|       debugln(['  Line: ',copy(Fragment,LineStart,Rel(ErrorPos)-LineStart),'|',
 | |
|                           copy(Fragment,Rel(ErrorPos),LineEnd-Rel(ErrorPos)+1)]);
 | |
|     end;
 | |
|     if not Fix then exit;
 | |
|     if ErrorList=nil then exit;
 | |
|     if ErrorList^=nil then
 | |
|       ErrorList^:=TObjectList.Create(true);
 | |
|     NewError:=TFPDocFragmentError.Create;
 | |
|     NewError.ErrorPos:=Rel(ErrorPos);
 | |
|     NewError.Msg:=ErrorMsg;
 | |
|     ErrorList^.Add(NewError);
 | |
|   end;
 | |
| 
 | |
|   procedure Replace(StartPos, Len: integer; const NewTxt: string);
 | |
|   var
 | |
|     i: Integer;
 | |
|     Item: PStackItem;
 | |
|     Diff: Integer;
 | |
|     OldP: integer;
 | |
|   begin
 | |
|     OldP:=Rel(p);
 | |
|     Fragment:=copy(Fragment,1,StartPos-1)+NewTxt
 | |
|                                   +copy(Fragment,StartPos+Len,length(Fragment));
 | |
|     Diff:=length(NewTxt)-Len;
 | |
|     if Diff<>0 then begin
 | |
|       // adjust positions
 | |
|       if OldP>StartPos then
 | |
|         inc(OldP,Diff);
 | |
|       for i:=0 to Top do begin
 | |
|         Item:=@Stack[i];
 | |
|         if Item^.StartPos>StartPos then inc(Item^.StartPos,Diff);
 | |
|         if Item^.EndPos>StartPos then inc(Item^.EndPos,Diff);
 | |
|         if Item^.CloseStartPos>StartPos then inc(Item^.CloseStartPos,Diff);
 | |
|       end;
 | |
|     end;
 | |
|     p:=PChar(Fragment)+OldP-1;
 | |
|     //debugln(['Replace ',dbgstr(copy(Fragment,1,Rel(p)-1)),'|',dbgstr(copy(Fragment,Rel(p),length(Fragment)))]);
 | |
|   end;
 | |
| 
 | |
|   procedure HandleSpecialChar;
 | |
|   var
 | |
|     c: char;
 | |
|   begin
 | |
|     c:=p^;
 | |
|     Error(p,'invalid character '+dbgstr(c));
 | |
|     if Fix then begin
 | |
|       case p^ of
 | |
|       #0..#31,#127:
 | |
|         // delete
 | |
|         Replace(Rel(p),1,'');
 | |
|       '<': Replace(Rel(p),1,'<');
 | |
|       '>': Replace(Rel(p),1,'>');
 | |
|       '&': Replace(Rel(p),1,'&');
 | |
|       '''': Replace(Rel(p),1,''');
 | |
|       '"': Replace(Rel(p),1,'"');
 | |
|       else
 | |
|         // convert
 | |
|         Replace(Rel(p),1,'&'+IntToStr(ord(c))+';');
 | |
|       end;
 | |
|     end
 | |
|     else begin
 | |
|       // skip
 | |
|       inc(p);
 | |
|     end;
 | |
|   end;
 | |
| 
 | |
|   procedure Push(Typ: TStackItemTyp);
 | |
|   begin
 | |
|     inc(Top);
 | |
|     if Top=Capacity then begin
 | |
|       inc(Capacity);
 | |
|       ReAllocMem(Stack,SizeOf(TStackItem)*Capacity);
 | |
|     end;
 | |
|     TopItem:=@Stack[Top];
 | |
|     FillByte(TopItem^,SizeOf(TStackItem),0);
 | |
|     TopItem^.Typ:=Typ;
 | |
|     TopItem^.StartPos:=p-PChar(Fragment);
 | |
|   end;
 | |
| 
 | |
|   procedure Pop;
 | |
|   begin
 | |
|     if Top<0 then raise Exception.Create('bug');
 | |
|     dec(Top);
 | |
|     if Top>=0 then
 | |
|       TopItem:=@Stack[Top]
 | |
|     else
 | |
|       TopItem:=nil;
 | |
|   end;
 | |
| 
 | |
|   procedure ParseComment;
 | |
|   begin
 | |
|     // comment
 | |
|     Push(sitComment);
 | |
|     inc(p,4);
 | |
|     // parse comment
 | |
|     repeat
 | |
|       if p^ in [#0..#8,#11,#12,#14..#31,#127] then begin
 | |
|         // invalid character in comment => delete
 | |
|         if (p^=#0) and (p-PChar(Fragment)=length(Fragment)) then
 | |
|         begin
 | |
|           // reached end of fragment => close comment
 | |
|           Error(p,'comment end not found, start at '+LineCol(TopItem^.StartPos));
 | |
|           if Fix then begin
 | |
|             Replace(Rel(p),0,'-->');
 | |
|             inc(p,3);
 | |
|           end;
 | |
|           break;
 | |
|         end else begin
 | |
|           // invalid #0 character in comment => delete
 | |
|           Error(p,'invalid character');
 | |
|           if Fix then
 | |
|             Replace(Rel(p),1,'')
 | |
|           else
 | |
|             inc(p);
 | |
|         end;
 | |
|       end else if (p^='-') and (p[1]='-') and (p[2]='>') then
 | |
|       begin
 | |
|         // comment end found
 | |
|         inc(p,3);
 | |
|         break;
 | |
|       end else
 | |
|         inc(p);
 | |
|     until false;
 | |
|     Pop;
 | |
|   end;
 | |
| 
 | |
|   procedure ParseAmpersand;
 | |
|   var
 | |
|     AmpPos: PChar;
 | |
|     i: Integer;
 | |
|     NeedLowercase: PChar;
 | |
|     len: integer;
 | |
|   begin
 | |
|     AmpPos:=p;
 | |
|     // & or &name; or &decimal;
 | |
|     case p[1] of
 | |
|     '0'..'9':
 | |
|       begin
 | |
|         // decimal number
 | |
|         inc(p);
 | |
|         i:=ord(p^)-ord('0');
 | |
|         while p^ in ['0'..'9'] do
 | |
|         begin
 | |
|           i:=i+10+ord(p^)-ord('0');
 | |
|           if i>$10FFFF then
 | |
|             break;
 | |
|           inc(p);
 | |
|         end;
 | |
|         if p^=';' then
 | |
|         begin
 | |
|           // ok
 | |
|           inc(p);
 | |
|           exit;
 | |
|         end;
 | |
|       end;
 | |
|     'a'..'z','A'..'Z':
 | |
|       begin
 | |
|         // name
 | |
|         inc(p);
 | |
|         NeedLowercase:=nil;
 | |
|         while p^ in ['a'..'z','A'..'Z'] do begin
 | |
|           if (NeedLowercase=nil) and (p^ in ['A'..'Z']) then
 | |
|             NeedLowercase:=p;
 | |
|           inc(p);
 | |
|         end;
 | |
|         if p^=';' then begin
 | |
|           if NeedLowercase<>nil then begin
 | |
|             Error(NeedLowercase,'character name must be lower case');
 | |
|             if Fix then begin
 | |
|               len:=(p-AmpPos)-1;
 | |
|               Replace(Rel(AmpPos)+1,len,lowercase(copy(Fragment,Rel(AmpPos)+1,len)));
 | |
|             end else
 | |
|               inc(p);
 | |
|           end else begin
 | |
|             // ok
 | |
|             inc(p);
 | |
|           end;
 | |
|           exit;
 | |
|         end;
 | |
|       end;
 | |
|     end;
 | |
|     p:=AmpPos;
 | |
|     // invalid character => convert or skip
 | |
|     HandleSpecialChar;
 | |
|   end;
 | |
| 
 | |
|   procedure LowerCaseName;
 | |
|   var
 | |
|     Start: PChar;
 | |
|     NeedLowercase: PChar;
 | |
|   begin
 | |
|     Start:=p;
 | |
|     NeedLowercase:=nil;
 | |
|     repeat
 | |
|       case p^ of
 | |
|       'a'..'z': inc(p);
 | |
|       'A'..'Z':
 | |
|         begin
 | |
|           if NeedLowercase=nil then
 | |
|             NeedLowercase:=p;
 | |
|           inc(p);
 | |
|         end;
 | |
|       else break;
 | |
|       end;
 | |
|     until false;
 | |
|     if NeedLowercase<>nil then begin
 | |
|       Error(NeedLowercase,'names must be lower case');
 | |
|       if Fix then
 | |
|         Replace(Rel(Start),p-Start,lowercase(copy(Fragment,Rel(Start),p-Start)));
 | |
|     end;
 | |
|   end;
 | |
| 
 | |
|   procedure ParseAttribute;
 | |
|   var
 | |
|     Quot: Char;
 | |
|   begin
 | |
|     // read attribute name
 | |
|     LowerCaseName;
 | |
|     while p^ in [' ',#9,#10,#13] do inc(p); // skip space
 | |
|     if p^<>'=' then begin
 | |
|       // missing value
 | |
|       Error(p,'expected =');
 | |
|       if Fix then
 | |
|         Replace(Rel(p),0,'=');
 | |
|     end;
 | |
|     if p^='=' then begin
 | |
|       // read equal
 | |
|       inc(p);
 | |
|       while p^ in [' ',#9,#10,#13] do inc(p); // skip space
 | |
|       if not (p^ in ['"','''']) then begin
 | |
|         // missing quote
 | |
|         Error(p,'expected "');
 | |
|         if Fix then
 | |
|           Replace(Rel(p),0,'"');
 | |
|       end;
 | |
|     end;
 | |
|     if p^ in ['"',''''] then begin
 | |
|       Quot:=p^;
 | |
|       // read value
 | |
|       inc(p);
 | |
|       repeat
 | |
|         case p^ of
 | |
|         '>',#0..#8,#11,#12,#14..#31,#127:
 | |
|           begin
 | |
|             // the > is not allowed in the quotes, probably the ending quot is missing
 | |
|             Error(p,'expected ending '+Quot);
 | |
|             if Fix then
 | |
|               Replace(Rel(p),0,Quot)
 | |
|             else
 | |
|               break;
 | |
|           end;
 | |
|         '&':
 | |
|           ParseAmpersand;
 | |
|         else
 | |
|           if p^=Quot then
 | |
|           begin
 | |
|             inc(p);
 | |
|             break;
 | |
|           end;
 | |
|           inc(p);
 | |
|         end;
 | |
|       until false;
 | |
|     end;
 | |
|   end;
 | |
| 
 | |
|   procedure ParseCloseTag;
 | |
|   var
 | |
|     i: LongInt;
 | |
|   begin
 | |
|     if (p^<>'<') or (p[1]<>'/') or (not (p[2] in ['a'..'z','A'..'Z'])) then
 | |
|     begin
 | |
|       HandleSpecialChar;
 | |
|       exit;
 | |
|     end;
 | |
|     // p stands on < of </tagname>
 | |
|     i:=Top;
 | |
|     while (i>=0)
 | |
|     and (CompareIdentifiers(p+2,@Fragment[Stack[i].StartPos+1])<>0) do
 | |
|       dec(i);
 | |
|     if i<0 then begin
 | |
|       // no corresponding open tag
 | |
|       Error(p,'tag was never opened');
 | |
|       HandleSpecialChar;
 | |
|       exit;
 | |
|     end;
 | |
|     TopItem^.CloseStartPos:=Rel(p);
 | |
|     inc(p,2); // skip </
 | |
|     // read name
 | |
|     LowerCaseName;
 | |
|     // close unclosed sub tag
 | |
|     i:=Top;
 | |
|     while CompareIdentifiers(@Fragment[Stack[i].StartPos+1],
 | |
|                              @Fragment[TopItem^.CloseStartPos+2])<>0
 | |
|     do begin
 | |
|       Error(@Fragment[TopItem^.StartPos],'tag must be closed');
 | |
|       if Fix then
 | |
|         Replace(TopItem^.EndPos-1,0,'/');
 | |
|       Pop;
 | |
|       dec(i);
 | |
|     end;
 | |
|     // skip space
 | |
|     while (p^ in [' ',#9,#10,#13]) do inc(p);
 | |
|     if p^='/' then begin
 | |
|       // uneeded close /
 | |
|       Error(p,'invalid close /');
 | |
|       if Fix then
 | |
|         Replace(Rel(p),1,'')
 | |
|       else
 | |
|         inc(p);
 | |
|     end;
 | |
|     if p^<>'>' then begin
 | |
|       Error(p,'missing >');
 | |
|       if Fix then
 | |
|         Replace(Rel(p),0,'>');
 | |
|     end;
 | |
|     if p^='>' then inc(p);
 | |
|     // close tag
 | |
|     Pop;
 | |
|   end;
 | |
| 
 | |
|   procedure ParseOpenTag;
 | |
|   begin
 | |
|     Push(sitTag);
 | |
|     TopItem^.StartPos:=Rel(p);
 | |
|     inc(p);
 | |
|     LowerCaseName;
 | |
|     repeat
 | |
|       while p^ in [' ',#9,#10,#13] do inc(p); // skip space
 | |
|       case p^ of
 | |
|       'a'..'z','A'..'Z':
 | |
|         ParseAttribute;
 | |
|       '/':
 | |
|         begin
 | |
|           // end and close tag
 | |
|           inc(p);
 | |
|           if p^<>'>' then begin
 | |
|             Error(p,'expected >');
 | |
|             if Fix then begin
 | |
|               Replace(Rel(p),0,'>');
 | |
|               inc(p);
 | |
|             end;
 | |
|           end;
 | |
|           inc(p);
 | |
|           Pop;
 | |
|           exit;
 | |
|         end;
 | |
|       '>':
 | |
|         begin
 | |
|           // end tag
 | |
|           inc(p);
 | |
|           TopItem^.EndPos:=Rel(p);
 | |
|           exit;
 | |
|         end;
 | |
|       else
 | |
|         // invalid character => end tag
 | |
|         Error(p,'expected >');
 | |
|         if Fix then begin
 | |
|           Replace(Rel(p),0,'>');
 | |
|           inc(p);
 | |
|         end;
 | |
|         exit;
 | |
|       end;
 | |
|     until false;
 | |
|   end;
 | |
| 
 | |
|   procedure ParseLowerThan;
 | |
|   begin
 | |
|     // comment, tag or 'lower than'
 | |
|     if not AllowTags then begin
 | |
|       // invalid character => convert or skip
 | |
|       HandleSpecialChar;
 | |
|     end else if (p[1]='!') and (p[2]='-') and (p[3]='-') then begin
 | |
|       // comment
 | |
|       ParseComment;
 | |
|     end else if p[1] in ['a'..'z','A'..'Z'] then begin
 | |
|       // start tag
 | |
|       ParseOpenTag;
 | |
|     end else if p[1]='/' then begin
 | |
|       // close tag
 | |
|       ParseCloseTag;
 | |
|     end else
 | |
|       // invalid character => convert or skip
 | |
|       HandleSpecialChar;
 | |
|   end;
 | |
| 
 | |
| begin
 | |
|   if Fragment='' then exit;
 | |
|   Top:=-1;
 | |
|   TopItem:=nil;
 | |
|   Capacity:=0;
 | |
|   Stack:=nil;
 | |
|   try
 | |
|     p:=PChar(Fragment);
 | |
|     repeat
 | |
|       case p^ of
 | |
|       #0..#8,#11,#12,#14..#31,#127:
 | |
|         begin
 | |
|           if (p^=#0) and (p-PChar(Fragment)>=length(Fragment)) then
 | |
|           begin
 | |
|             // reached end of fragment
 | |
|             break;
 | |
|           end else begin
 | |
|             // invalid character => convert or skip
 | |
|             HandleSpecialChar;
 | |
|           end;
 | |
|         end;
 | |
|       '<':
 | |
|         ParseLowerThan;
 | |
|       '>':
 | |
|         // invalid character => convert or skip
 | |
|         HandleSpecialChar;
 | |
|       '&':
 | |
|         ParseAmpersand;
 | |
|       '"','''':
 | |
|         if not AllowTags then
 | |
|           HandleSpecialChar
 | |
|         else
 | |
|           inc(p);
 | |
|       else
 | |
|         inc(p);
 | |
|       end;
 | |
|     until false;
 | |
|     while Top>=0 do begin
 | |
|       // fix unclosed tags
 | |
|       Error(@Fragment[TopItem^.StartPos],'tag must be closed');
 | |
|       if Fix then
 | |
|         Replace(TopItem^.EndPos-1,0,'/');
 | |
|       Pop;
 | |
|     end;
 | |
|   finally
 | |
|     ReAllocMem(Stack,0);
 | |
|   end;
 | |
| end;
 | |
| 
 | |
| procedure FixFPDocAttributeValue(var Value: string);
 | |
| begin
 | |
|   FixFPDocFragment(Value,false,true);
 | |
| end;
 | |
| 
 | |
| end.
 | |
| 
 | 
