mirror of
				https://gitlab.com/freepascal.org/fpc/source.git
				synced 2025-11-01 00:10:00 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			1055 lines
		
	
	
		
			26 KiB
		
	
	
	
		
			ObjectPascal
		
	
	
	
	
	
			
		
		
	
	
			1055 lines
		
	
	
		
			26 KiB
		
	
	
	
		
			ObjectPascal
		
	
	
	
	
	
| {
 | |
|     This file is part of the Free Pascal Integrated Development Environment
 | |
|     Copyright (c) 2000 by Berczi Gabor
 | |
| 
 | |
|     HTML scanner objects
 | |
| 
 | |
|     See the file COPYING.FPC, included in this distribution,
 | |
|     for details about the copyright.
 | |
| 
 | |
|     This program is distributed in the hope that it will be useful,
 | |
|     but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 | |
| 
 | |
|  **********************************************************************}
 | |
| unit WHTMLScn;
 | |
| 
 | |
| interface
 | |
| 
 | |
| uses Objects,
 | |
|      WHTML;
 | |
| 
 | |
| const
 | |
|      HTMLIndexMagicNo = ord('H')+ord('H') shl 8+ord('I') shl 16+ord('X') shl 24;
 | |
|      HTMLIndexVersion = 2;
 | |
| 
 | |
| type
 | |
|      PHTMLLinkScanner = ^THTMLLinkScanner;
 | |
|      PHTMLLinkScanDocument = ^THTMLLinkScanDocument;
 | |
| 
 | |
|      TCustomHTMLLinkScanner = object(THTMLParser)
 | |
|        function    DocAddTextChar(C: char): boolean; virtual;
 | |
|        procedure   DocAnchor(Entered: boolean); virtual;
 | |
|      public
 | |
|     {a}function    CheckURL(const URL: string): boolean; virtual;
 | |
|     {a}function    CheckText(const Text: string): boolean; virtual;
 | |
|     {a}procedure   AddLink(const LinkText, LinkURL: string); virtual;
 | |
|     {a}procedure   AddRef(LinkURL: string); virtual;
 | |
|     {a}procedure   AddNameID(AName: string); virtual;
 | |
|     {a}procedure   AddID(AName: string); virtual;
 | |
|     {a}function    GetDocumentBaseURL: string; virtual;
 | |
|      private
 | |
|        CurLinkText: string;
 | |
|        CurURL: string;
 | |
|        CurName,
 | |
|        CurID: string;
 | |
|        CurDoc: string;
 | |
|        InAnchor,InNameAnchor,
 | |
|        HasHRef : boolean;
 | |
|        LastSynonym: PHTMLLinkScanDocument;
 | |
|      end;
 | |
| 
 | |
|      TNameIDState = (IsReferenced, IsFound,IsID);
 | |
|      TNameIDStates = set of TNameIDState;
 | |
| 
 | |
| 
 | |
|      PNameID  = ^TNameID;
 | |
|      TNameID  = object(TObject)
 | |
|        constructor Init(const AName : string; Astate : TNameIDState);
 | |
|        destructor  Done; virtual;
 | |
|        procedure SetState(Astate : TNameIDState; enabled : boolean);
 | |
|        procedure SetOrigin(const AOrigin : string);
 | |
|        procedure SetLine(ALine : sw_integer);
 | |
|        function GetLine : sw_integer;
 | |
|        function GetState : TNameIDStates;
 | |
|        function GetName : string;
 | |
|        function GetOrigin : string;
 | |
|      private
 | |
|        Name : pstring;
 | |
|        Origin : pstring;
 | |
|        Line : sw_integer;
 | |
|        State : TNameIDStates;
 | |
|      end;
 | |
| 
 | |
|      PNameIDCollection = ^TNameIDCollection;
 | |
|      TNameIDCollection = object(TSortedCollection)
 | |
|        function At(Index: sw_Integer): PNameID;
 | |
|        function Compare(Key1, Key2: Pointer): sw_Integer; virtual;
 | |
|      end;
 | |
| 
 | |
| 
 | |
|      THTMLLinkScanDocument = object(TObject)
 | |
|        constructor Init(const ADocName: string);
 | |
|        function    GetName: string;
 | |
|        function    GetUniqueName: string;
 | |
|        function    GetAliasCount: sw_integer;
 | |
|        function    GetAlias(Index: sw_integer): string;
 | |
|        procedure   AddAlias(const Alias: string);
 | |
|        constructor Load(var S: TStream);
 | |
|        procedure   Store(var S: TStream);
 | |
|        destructor  Done; virtual;
 | |
|      private
 | |
|        DocName: PString;
 | |
|        Synonym: PHTMLLinkScanDocument;
 | |
|        Aliases: PStringCollection;
 | |
|      end;
 | |
| 
 | |
|      PHTMLLinkScanDocumentCollection = ^THTMLLinkScanDocumentCollection;
 | |
|      THTMLLinkScanDocumentCollection = object(TSortedCollection)
 | |
|        constructor Init(AScanner: PHTMLLinkScanner; ALimit, ADelta: Integer);
 | |
|        function    Compare(Key1, Key2: Pointer): sw_Integer; virtual;
 | |
|        function    At(Index: sw_Integer): PHTMLLinkScanDocument;
 | |
|        function    SearchDocument(const DocName: string): PHTMLLinkScanDocument;
 | |
|        procedure   MoveAliasesToSynonym;
 | |
|      private
 | |
|        Scanner: PHTMLLinkScanner;
 | |
|      end;
 | |
| 
 | |
|      THTMLLinkScanner = object(TCustomHTMLLinkScanner)
 | |
|        constructor Init(const ABaseDir: string);
 | |
|        procedure   SetBaseDir(const ABaseDir: string);
 | |
|     {a}function    FindID(const AName : string) : PNameID; virtual;
 | |
|        function    GetDocumentCount: sw_integer;
 | |
|        function    GetDocumentURL(DocIndex: sw_integer): string;
 | |
|        function    GetUniqueDocumentURL(DocIndex: sw_integer): string;
 | |
|        function    GetDocumentAliasCount(DocIndex: sw_integer): sw_integer;
 | |
|        function    GetDocumentAlias(DocIndex, AliasIndex: sw_integer): string;
 | |
|        constructor LoadDocuments(var S: TStream);
 | |
|        procedure   StoreDocuments(var S: TStream);
 | |
|        destructor  Done; virtual;
 | |
|      public
 | |
|        procedure   AddLink(const LinkText, LinkURL: string); virtual;
 | |
|      private
 | |
|        Documents:  PHTMLLinkScanDocumentCollection;
 | |
|        BaseDir:    PString;
 | |
|        function    ExpandChildURL(const S: string): string;
 | |
|        function    NormalizeChildURL(const S: string): string;
 | |
|      end;
 | |
| 
 | |
|      THTMLLinkScanState = (ssScheduled,ssProcessing,ssScanned);
 | |
| 
 | |
|      PHTMLLinkScanFile = ^THTMLLinkScanFile;
 | |
|      THTMLLinkScanFile = object(TObject)
 | |
|        constructor Init(const ADocumentURL: string);
 | |
|        function    GetDocumentURL: string;
 | |
|        destructor  Done; virtual;
 | |
|        function    AddReferencedName (const AName : string) : PNameID;
 | |
|        function    AddFoundName (const AName : string) : PNameID;
 | |
|        procedure   CheckNameList;
 | |
|        function    FindID(const AName : string) : PNameID; virtual;
 | |
|      private
 | |
|        DocumentURL  : PString;
 | |
|        NameIDList   : PNameIDCollection;
 | |
|        Owner        : PHTMLLinkScanner;
 | |
|      public
 | |
|        State        : THTMLLinkScanState;
 | |
|      end;
 | |
| 
 | |
|      PHTMLLinkScanFileCollection = ^THTMLLinkScanFileCollection;
 | |
|      THTMLLinkScanFileCollection = object(TSortedCollection)
 | |
|        function   At(Index: sw_Integer): PHTMLLinkScanFile;
 | |
|        function   Compare(Key1, Key2: Pointer): sw_Integer; virtual;
 | |
|        function   SearchFile(const DocURL: string): PHTMLLinkScanFile;
 | |
|        function   FindFileWithState(AState: THTMLLinkScanState): PHTMLLinkScanFile;
 | |
|        procedure  CheckNameIDLists;
 | |
|      end;
 | |
| 
 | |
|      THTMLLinkScanOption = (soSubDocsOnly);
 | |
|      THTMLLinkScanOptions = set of THTMLLinkScanOption;
 | |
| 
 | |
|      THTMLFileLinkScanner = object(THTMLLinkScanner)
 | |
|        constructor Init(const ABaseDir: string);
 | |
|        procedure   ProcessDocument(const DocumentURL: string; AOptions: THTMLLinkScanOptions);
 | |
|        destructor  Done; virtual;
 | |
|      public
 | |
|        function    GetDocumentBaseURL: string; virtual;
 | |
|        function    FindID(const AName : string) : PNameID; virtual;
 | |
|        procedure   AddLink(const LinkText, LinkURL: string); virtual;
 | |
|        procedure   AddRef(LinkURL: string); virtual;
 | |
|        procedure   AddNameID(AName: string); virtual;
 | |
|        procedure   AddID(AName: string); virtual;
 | |
|        function    CheckURL(const URL: string): boolean; virtual;
 | |
|      private
 | |
|        Options: THTMLLinkScanOptions;
 | |
|        BaseURL: string;
 | |
|        CurBaseURL: string;
 | |
|        IDList   : PNameIDCollection;
 | |
|        DocumentFiles: PHTMLLinkScanFileCollection;
 | |
|        procedure   ScheduleDoc(const DocumentURL: string);
 | |
|      public
 | |
|        procedure   ProcessDoc(Doc: PHTMLLinkScanFile); virtual;
 | |
|      end;
 | |
| 
 | |
| procedure RegisterWHTMLScan;
 | |
| 
 | |
| implementation
 | |
| 
 | |
| uses
 | |
|   WUtils;
 | |
| 
 | |
| const
 | |
|   RHTMLLinkScanDocument: TStreamRec = (
 | |
|      ObjType: 19500;
 | |
|      VmtLink: Ofs(TypeOf(THTMLLinkScanDocument)^);
 | |
|      Load:    @THTMLLinkScanDocument.Load;
 | |
|      Store:   @THTMLLinkScanDocument.Store
 | |
|   );
 | |
| 
 | |
| const
 | |
|   CurrentHTMLIndexVersion : sw_integer = HTMLIndexVersion;
 | |
| 
 | |
| function TCustomHTMLLinkScanner.DocAddTextChar(C: char): boolean;
 | |
| var Added: boolean;
 | |
| begin
 | |
|   Added:=false;
 | |
|   if InAnchor then
 | |
|   begin
 | |
|     CurLinkText:=CurLinkText+C;
 | |
|     Added:=true;
 | |
|   end;
 | |
|   if ord(c)>32 then
 | |
|     LastSynonym:=nil;
 | |
|   DocAddTextChar:=Added;
 | |
| end;
 | |
| 
 | |
| procedure TCustomHTMLLinkScanner.DocAnchor(Entered: boolean);
 | |
| begin
 | |
|   if Entered then
 | |
|     begin
 | |
|       CurLinkText:='';
 | |
|       if DocGetTagParam('HREF',CurURL) then
 | |
|         HasHRef:=true
 | |
|       else
 | |
|         CurURL:='';
 | |
|       if not DocGetTagParam('NAME',CurName) then
 | |
|       if not DocGetTagParam('ID',CurName) then
 | |
|         CurName:='';
 | |
|       if not DocGetTagParam('ID',CurID) then
 | |
|         CurID:='';
 | |
|       if CurName<>'' then
 | |
|         begin
 | |
|           InNameAnchor:=true;
 | |
|           If Pos('#',CurName)=0 then
 | |
|             CurName:=CurDoc+'#'+CurName;
 | |
|           CurName:=Trim(CurName);
 | |
|           CurName:=CompleteURL(GetDocumentBaseURL,CurName);
 | |
|           if CurURL='' then
 | |
|             CurURL:=CurName;
 | |
|         end
 | |
|       else
 | |
|         CurName:='';
 | |
|       CurURL:=Trim(CurURL);
 | |
|       if pos('#',CurURL)=1 then
 | |
|         CurURL:=CurDoc+CurURL;
 | |
|       CurURL:=CompleteURL(GetDocumentBaseURL,CurURL);
 | |
|     end
 | |
|   else
 | |
|     begin
 | |
|       CurLinkText:=Trim(CurLinkText);
 | |
|       if HasHRef then
 | |
|         begin
 | |
|           if CheckURL(CurURL) and CheckText(CurLinkText) and
 | |
|              not DisableCrossIndexing then
 | |
|             begin
 | |
|               AddLink(CurLinkText,CurURL);
 | |
|     {$ifdef DEBUG}
 | |
|               DebugMessage(CurDoc,' Adding ScanLink "'+CurLinkText+'" to "'+
 | |
|                 CurURL+'"',Line,1);
 | |
|     {$endif DEBUG}
 | |
|             end;
 | |
|           { Be sure to parse referenced file,
 | |
|             even if that link is not valid }
 | |
|           AddRef(CurURL);
 | |
|         end;
 | |
|       if not HasHRef and InNameAnchor and CheckURL(CurName) and CheckText(CurLinkText) then
 | |
|         begin
 | |
|           AddLink(CurLinkText,CurName);
 | |
| {$ifdef DEBUG}
 | |
|           DebugMessage(CurDoc,' Adding ScanName "'+CurLinkText+'" to "'+CurName+'"',Line,1);
 | |
| {$endif DEBUG}
 | |
|         end;
 | |
|       if InNameAnchor then
 | |
|         begin
 | |
|           AddNameID(CurName);
 | |
|         end;
 | |
|       if not HasHRef and (CurID<>'') then
 | |
|         AddID(CurID);
 | |
|       InNameAnchor:=false;
 | |
|       HasHRef:=false;
 | |
|     end;
 | |
|   InAnchor:=Entered;
 | |
| end;
 | |
| 
 | |
| function TCustomHTMLLinkScanner.GetDocumentBaseURL: string;
 | |
| begin
 | |
|   { Abstract }
 | |
|   GetDocumentBaseURL:='';
 | |
| end;
 | |
| 
 | |
| function TCustomHTMLLinkScanner.CheckURL(const URL: string): boolean;
 | |
| begin
 | |
|   { Abstract }
 | |
|   CheckURL:=true;
 | |
| end;
 | |
| 
 | |
| function TCustomHTMLLinkScanner.CheckText(const Text: string): boolean;
 | |
| begin
 | |
|   { Abstract }
 | |
|   CheckText:=true;
 | |
| end;
 | |
| 
 | |
| procedure TCustomHTMLLinkScanner.AddLink(const LinkText, LinkURL: string);
 | |
| begin
 | |
|   { Abstract }
 | |
| end;
 | |
| 
 | |
| procedure TCustomHTMLLinkScanner.AddRef(LinkURL: string);
 | |
| begin
 | |
|   { Abstract }
 | |
| end;
 | |
| 
 | |
| procedure TCustomHTMLLinkScanner.AddNameID(AName: string);
 | |
| begin
 | |
|   { Abstract }
 | |
| end;
 | |
| 
 | |
| procedure TCustomHTMLLinkScanner.AddID(AName: string);
 | |
| begin
 | |
|   { Abstract }
 | |
| end;
 | |
| 
 | |
| 
 | |
| constructor TNameID.Init(const AName : string; Astate : TNameIDState);
 | |
| begin
 | |
|   inherited Init;
 | |
|   SetStr(Name,AName);
 | |
|   Origin:=nil;
 | |
|   State:=[AState];
 | |
| end;
 | |
| 
 | |
| destructor  TNameID.Done;
 | |
| begin
 | |
|   if assigned(Name) then
 | |
|     DisposeStr(Name);
 | |
|   Name:=nil;
 | |
|   if assigned(Origin) then
 | |
|     DisposeStr(Origin);
 | |
|   Origin:=nil;
 | |
|   inherited Done;
 | |
| end;
 | |
| 
 | |
| procedure TNameID.SetState(Astate : TNameIDState; enabled : boolean);
 | |
| begin
 | |
|   if enabled then
 | |
|     Include(State,AState)
 | |
|   else
 | |
|     Exclude(State,AState);
 | |
| end;
 | |
| 
 | |
| 
 | |
| function TNameID.GetState : TNameIDStates;
 | |
| begin
 | |
|   GetState:=State;
 | |
| end;
 | |
| 
 | |
| function TNameID.GetName : string;
 | |
| begin
 | |
|   GetName:=GetStr(Name);
 | |
| end;
 | |
| 
 | |
| function TNameID.GetOrigin : string;
 | |
| begin
 | |
|   GetOrigin:=GetStr(Origin);
 | |
| end;
 | |
| 
 | |
| procedure TNameID.SetOrigin(const AOrigin : string);
 | |
| begin
 | |
|   SetStr(Origin,AOrigin);
 | |
| end;
 | |
| procedure TNameID.SetLine(ALine : sw_integer);
 | |
| begin
 | |
|   Line:=ALine;
 | |
| end;
 | |
| 
 | |
| function TNameID.GetLine : sw_integer;
 | |
| begin
 | |
|   GetLine:=Line;
 | |
| end;
 | |
| 
 | |
| 
 | |
| function TNameIDCollection.At(Index: sw_Integer): PNameID;
 | |
| begin
 | |
|   At:=Inherited At(Index);
 | |
| end;
 | |
| 
 | |
| function TNameIDCollection.Compare(Key1, Key2: Pointer): sw_Integer;
 | |
| var
 | |
|   R: sw_integer;
 | |
|   K1: PNameID absolute Key1;
 | |
|   K2: PNameID absolute Key2;
 | |
|   S1,S2: string;
 | |
| begin
 | |
|   S1:=K1^.GetName;
 | |
|   S2:=K2^.GetName;
 | |
|   S1:=UpcaseStr(S1); S2:=UpcaseStr(S2);
 | |
|   if S1<S2 then R:=-1 else
 | |
|   if S1>S2 then R:= 1 else
 | |
|   R:=0;
 | |
|   Compare:=R;
 | |
| end;
 | |
| 
 | |
| 
 | |
| constructor THTMLLinkScanDocument.Init(const ADocName: string);
 | |
| begin
 | |
|   inherited Init;
 | |
|   SetStr(DocName,ADocName);
 | |
|   New(Aliases, Init(10,10));
 | |
| {$ifdef DEBUG}
 | |
|   DebugMessage('',' Adding New LinkScan document "'+ADocName+'"',1,1);
 | |
| {$endif DEBUG}
 | |
|   Synonym:=nil;
 | |
| end;
 | |
| 
 | |
| function THTMLLinkScanDocument.GetName: string;
 | |
| begin
 | |
|   GetName:=GetStr(DocName);
 | |
| end;
 | |
| 
 | |
| function THTMLLinkScanDocument.GetUniqueName: string;
 | |
| var
 | |
|   PD: PHTMLLinkScanDocument;
 | |
| begin
 | |
|   PD:=@Self;
 | |
|   while assigned(PD^.synonym) do
 | |
|     PD:=PD^.Synonym;
 | |
|   GetUniqueName:=GetStr(PD^.DocName);
 | |
| end;
 | |
| 
 | |
| 
 | |
| function THTMLLinkScanDocument.GetAliasCount: sw_integer;
 | |
| begin
 | |
|   GetAliasCount:=Aliases^.Count;
 | |
| end;
 | |
| 
 | |
| function THTMLLinkScanDocument.GetAlias(Index: sw_integer): string;
 | |
| begin
 | |
|   GetAlias:=GetStr(Aliases^.At(Index));
 | |
| end;
 | |
| 
 | |
| procedure THTMLLinkScanDocument.AddAlias(const Alias: string);
 | |
| begin
 | |
|   Aliases^.Insert(NewStr(Alias));
 | |
| {$ifdef DEBUG}
 | |
|   DebugMessage('',' Adding alias "'+Alias+'" to LinkScan document "'+GetStr(DocName)+'"',1,1);
 | |
| {$endif DEBUG}
 | |
| end;
 | |
| 
 | |
| constructor THTMLLinkScanDocument.Load(var S: TStream);
 | |
| var
 | |
|   i: sw_integer;
 | |
| begin
 | |
|   inherited Init;
 | |
|   DocName:=S.ReadStr;
 | |
|   if assigned(DocName) then
 | |
|     for i:=1 to Length(DocName^) do
 | |
|       if (DocName^[i]='\') or  (DocName^[i]='/') then
 | |
|         DocName^[i]:=DirSep;
 | |
|   New(Aliases, Load(S));
 | |
| end;
 | |
| 
 | |
| procedure THTMLLinkScanDocument.Store(var S: TStream);
 | |
| begin
 | |
|   S.WriteStr(DocName);
 | |
|   Aliases^.Store(S);
 | |
| end;
 | |
| 
 | |
| destructor THTMLLinkScanDocument.Done;
 | |
| begin
 | |
|   if Assigned(Aliases) then
 | |
|     Dispose(Aliases, Done);
 | |
|   Aliases:=nil;
 | |
|   if Assigned(DocName) then
 | |
|     DisposeStr(DocName);
 | |
|   DocName:=nil;
 | |
|   inherited Done;
 | |
| end;
 | |
| 
 | |
| constructor THTMLLinkScanDocumentCollection.Init(AScanner: PHTMLLinkScanner; ALimit, ADelta: Integer);
 | |
| begin
 | |
|   inherited Init(ALimit,ADelta);
 | |
|   Scanner:=AScanner;
 | |
| end;
 | |
| 
 | |
| function THTMLLinkScanDocumentCollection.Compare(Key1, Key2: Pointer): sw_Integer;
 | |
| var R: sw_integer;
 | |
|     K1: PHTMLLinkScanDocument absolute Key1;
 | |
|     K2: PHTMLLinkScanDocument absolute Key2;
 | |
|     S1,S2: string;
 | |
| begin
 | |
|   S1:=K1^.GetName; S2:=K2^.GetName;
 | |
|   if Assigned(Scanner) then
 | |
|    begin S1:=Scanner^.ExpandChildURL(S1); S2:=Scanner^.ExpandChildURL(S2); end;
 | |
|   S1:=UpcaseStr(S1); S2:=UpcaseStr(S2);
 | |
|   if S1<S2 then R:=-1 else
 | |
|   if S1>S2 then R:= 1 else
 | |
|   R:=0;
 | |
|   Compare:=R;
 | |
| end;
 | |
| 
 | |
| function THTMLLinkScanDocumentCollection.At(Index: sw_Integer): PHTMLLinkScanDocument;
 | |
| begin
 | |
|   At:=inherited At(Index);
 | |
| end;
 | |
| 
 | |
| function THTMLLinkScanDocumentCollection.SearchDocument(const DocName: string): PHTMLLinkScanDocument;
 | |
| var D,P: PHTMLLinkScanDocument;
 | |
|     Index: sw_integer;
 | |
| begin
 | |
|   New(D, Init(DocName));
 | |
|   if Search(D, Index)=false then P:=nil else
 | |
|     P:=At(Index);
 | |
|   Dispose(D, Done);
 | |
|   SearchDocument:=P;
 | |
| end;
 | |
| 
 | |
| procedure THTMLLinkScanDocumentCollection.MoveAliasesToSynonym;
 | |
|   procedure MoveAliases(P: PHTMLLinkScanDocument);
 | |
|   var
 | |
|     PD: PHTMLLinkScanDocument;
 | |
|     i: sw_integer;
 | |
|   begin
 | |
|     if not assigned(P^.synonym) then
 | |
|       exit;
 | |
|     PD:=P;
 | |
|     while assigned(PD^.synonym) do
 | |
|       PD:=PD^.Synonym;
 | |
| 
 | |
|     For i:=P^.GetAliasCount-1 downto 0 do
 | |
|       begin
 | |
|         PD^.AddAlias(P^.GetAlias(i));
 | |
|         P^.Aliases^.AtFree(i);
 | |
|       end;
 | |
|   end;
 | |
| begin
 | |
|   ForEach(@MoveAliases);
 | |
| end;
 | |
| 
 | |
| constructor THTMLLinkScanner.Init(const ABaseDir: string);
 | |
| begin
 | |
|   inherited Init;
 | |
|   New(Documents, Init(@Self,50,100));
 | |
|   SetBaseDir(ABaseDir);
 | |
| end;
 | |
| 
 | |
| procedure THTMLLinkScanner.SetBaseDir(const ABaseDir: string);
 | |
| begin
 | |
|   if Assigned(BaseDir) then DisposeStr(BaseDir);
 | |
|   BaseDir:=NewStr(CompleteDir(ABaseDir));
 | |
| end;
 | |
| 
 | |
| function THTMLLinkScanner.GetDocumentCount: sw_integer;
 | |
| begin
 | |
|   GetDocumentCount:=Documents^.Count;
 | |
| end;
 | |
| 
 | |
| function THTMLLinkScanner.ExpandChildURL(const S: string): string;
 | |
| begin
 | |
|   ExpandChildURL:=CompleteURL(GetStr(BaseDir),S);
 | |
| end;
 | |
| 
 | |
| function THTMLLinkScanner.NormalizeChildURL(const S: string): string;
 | |
| var URL: string;
 | |
| begin
 | |
|   URL:=S;
 | |
|   if GetStr(BaseDir)<>'' then
 | |
|    if copy(UpcaseStr(S),1,length(GetStr(BaseDir)))=UpcaseStr(GetStr(BaseDir)) then
 | |
|      URL:=copy(S,length(GetStr(BaseDir))+1,length(S));
 | |
|   NormalizeChildURL:=URL;
 | |
| end;
 | |
| 
 | |
| function THTMLLinkScanner.GetDocumentURL(DocIndex: sw_integer): string;
 | |
| begin
 | |
|   GetDocumentURL:=ExpandChildURL(Documents^.At(DocIndex)^.GetName);
 | |
| end;
 | |
| 
 | |
| function THTMLLinkScanner.GetUniqueDocumentURL(DocIndex: sw_integer): string;
 | |
| begin
 | |
|   GetUniqueDocumentURL:=ExpandChildURL(Documents^.At(DocIndex)^.GetUniqueName);
 | |
| end;
 | |
| 
 | |
| function THTMLLinkScanner.GetDocumentAliasCount(DocIndex: sw_integer): sw_integer;
 | |
| begin
 | |
|   GetDocumentAliasCount:=Documents^.At(DocIndex)^.GetAliasCount;
 | |
| end;
 | |
| 
 | |
| function THTMLLinkScanner.GetDocumentAlias(DocIndex, AliasIndex: sw_integer): string;
 | |
| begin
 | |
|   GetDocumentAlias:=Documents^.At(DocIndex)^.GetAlias(AliasIndex);
 | |
| end;
 | |
| 
 | |
| procedure THTMLLinkScanner.AddLink(const LinkText, LinkURL: string);
 | |
| var D: PHTMLLinkScanDocument;
 | |
|     DoInsert: boolean;
 | |
|     int: sw_integer;
 | |
|     Text: string;
 | |
|     error: word;
 | |
| begin
 | |
|   D:=Documents^.SearchDocument(LinkURL);
 | |
|   if D=nil then
 | |
|   begin
 | |
|     New(D, Init(NormalizeChildURL(LinkURL)));
 | |
|     Documents^.Insert(D);
 | |
|   end;
 | |
|   If assigned(LastSynonym) then
 | |
|     LastSynonym^.Synonym:=D;
 | |
|   DoInsert:=true;
 | |
|   If (length(LinkText)=0) or (Pos(',',LinkText)=1) then
 | |
|     DoInsert:=false;
 | |
|   Val(LinkText,int,error);
 | |
|   If (Error>1) and (LinkText[Error]=' ') then
 | |
|     Text:=Trim(Copy(LinkText,error+1,length(LinkText)))
 | |
|   else
 | |
|     Text:=LinkText;
 | |
|   IF DoInsert then
 | |
|     D^.AddAlias(Text);
 | |
|   If InNameAnchor then
 | |
|     LastSynonym:=D;
 | |
| end;
 | |
| 
 | |
| constructor THTMLLinkScanner.LoadDocuments(var S: TStream);
 | |
| var P,L: longint;
 | |
|     OK: boolean;
 | |
|     PS: PString;
 | |
| begin
 | |
|   OK:=false;
 | |
|   P:=S.GetPos;
 | |
|   S.Read(L,sizeof(L));
 | |
|   if (S.Status=stOK) and (L=HTMLIndexMagicNo) then
 | |
|   begin
 | |
|     S.Read(L,sizeof(L));
 | |
|     CurrentHTMLIndexVersion:=L;
 | |
|     OK:=(S.Status=stOK);
 | |
|   end;
 | |
|   if not OK then
 | |
|     begin
 | |
|       S.Reset;
 | |
|       S.Seek(P);
 | |
|     end
 | |
|   else
 | |
|     BaseDir:=S.ReadStr;
 | |
|   New(Documents, Load(S));
 | |
|   if not Assigned(Documents) then
 | |
|     Fail;
 | |
|   Documents^.MoveAliasesToSynonym;
 | |
|   CurrentHTMLIndexVersion:=HTMLIndexVersion;
 | |
| end;
 | |
| 
 | |
| function THTMLLinkScanner.FindID(const AName : string) : PNameID;
 | |
| begin
 | |
|   {abstract}FindID:=nil;
 | |
| end;
 | |
| 
 | |
| 
 | |
| procedure THTMLLinkScanner.StoreDocuments(var S: TStream);
 | |
| var L: longint;
 | |
| begin
 | |
|   L:=HTMLIndexMagicNo;
 | |
|   S.Write(L,sizeof(L));
 | |
|   L:=HTMLIndexVersion;
 | |
|   CurrentHTMLIndexVersion:=L;
 | |
|   S.Write(L,sizeof(L));
 | |
|   S.WriteStr(BaseDir);
 | |
|   Documents^.MoveAliasesToSynonym;
 | |
|   Documents^.Store(S);
 | |
| end;
 | |
| 
 | |
| destructor THTMLLinkScanner.Done;
 | |
| begin
 | |
|   if Assigned(Documents) then
 | |
|     Dispose(Documents, Done);
 | |
|   Documents:=nil;
 | |
|   if Assigned(BaseDir) then
 | |
|     DisposeStr(BaseDir);
 | |
|   BaseDir:=nil;
 | |
|   inherited Done;
 | |
| end;
 | |
| 
 | |
| constructor THTMLLinkScanFile.Init(const ADocumentURL: string);
 | |
| begin
 | |
|   inherited Init;
 | |
|   SetStr(DocumentURL,ADocumentURL);
 | |
|   New(NameIDList, Init(5,10));
 | |
| end;
 | |
| 
 | |
| function THTMLLinkScanFile.GetDocumentURL: string;
 | |
| begin
 | |
|   GetDocumentURL:=GetStr(DocumentURL);
 | |
| end;
 | |
| 
 | |
| function THTMLLinkScanFile.AddReferencedName (const AName : string) : PNameID;
 | |
| var
 | |
|   index : sw_integer;
 | |
|   PN : PNameID;
 | |
| begin
 | |
|   new(PN,init(AName,IsReferenced));
 | |
|   if not NameIDList^.Search(PN,Index) then
 | |
|     NameIDList^.Insert(PN)
 | |
|   else
 | |
|     begin
 | |
|       dispose(PN,Done);
 | |
|       PN:=NameIDList^.At(Index);
 | |
|       PN^.SetState(IsReferenced,true);
 | |
|     end;
 | |
|   AddReferencedName:=PN;
 | |
| end;
 | |
| 
 | |
| function THTMLLinkScanFile.AddFoundName (const AName : string) : PNameID;
 | |
| var
 | |
|   index : sw_integer;
 | |
|   PN : PNameID;
 | |
| begin
 | |
|   new(PN,init(AName,IsFound));
 | |
|   if not NameIDList^.Search(PN,Index) then
 | |
|     NameIDList^.Insert(PN)
 | |
|   else
 | |
|     begin
 | |
|       dispose(PN,Done);
 | |
|       PN:=NameIDList^.At(Index);
 | |
|       PN^.SetState(IsFound,true);
 | |
|     end;
 | |
|   AddFoundName:=PN;
 | |
| end;
 | |
| 
 | |
| procedure THTMLLinkScanFile.CheckNameList;
 | |
| var
 | |
|   i : sw_integer;
 | |
|   PN,PN2 : PNameID;
 | |
| begin
 | |
| {$ifdef DEBUG}
 | |
|   for i:=0 to NameIDList^.Count-1 do
 | |
|     begin
 | |
|       PN:=NameIDList^.At(i);
 | |
|       if not (IsFound in PN^.GetState) then
 | |
|         begin
 | |
|           if (IsReferenced in PN^.GetState) then
 | |
|             DebugMessage(GetDocumentURL,'Name "'+PN^.GetName+'" from "'+
 | |
|               PN^.GetOrigin+'" not found',1,1);
 | |
|           PN2:=Owner^.FindID(PN^.GetName);
 | |
|           if assigned(PN2) then
 | |
|             begin
 | |
|               DebugMessage('','ID found in "'+PN2^.GetOrigin+'"',1,1);
 | |
|               if not (IsFound in PN2^.GetState) then
 | |
|                 DebugMessage('','ID not found',1,1);
 | |
|             end;
 | |
|         end;
 | |
|     end;
 | |
| {$endif DEBUG}
 | |
| end;
 | |
| 
 | |
| 
 | |
| function  THTMLLinkScanFile.FindID(const AName : string) : PNameID;
 | |
| var
 | |
|   PN : PNameID;
 | |
|   Index : sw_integer;
 | |
| begin
 | |
|   new(PN,init(AName,IsID));
 | |
|   if NameIDList^.Search(PN,Index) then
 | |
|     begin
 | |
|       dispose(PN,done);
 | |
|       PN:=NameIDList^.At(Index);
 | |
|       if (IsID in PN^.GetState) then
 | |
|         FindId:=PN
 | |
|       else
 | |
|         FindID:=nil;
 | |
|     end
 | |
|   else
 | |
|     begin
 | |
|       dispose(PN,done);
 | |
|       PN:=nil;
 | |
|       FindID:=nil;
 | |
|     end;
 | |
| 
 | |
| end;
 | |
| destructor THTMLLinkScanFile.Done;
 | |
| begin
 | |
|   if Assigned(DocumentURL) then
 | |
|     DisposeStr(DocumentURL);
 | |
|   DocumentURL:=nil;
 | |
|   dispose(NameIDList,done);
 | |
|   NameIDList:=nil;
 | |
|   inherited Done;
 | |
| end;
 | |
| 
 | |
| function THTMLLinkScanFileCollection.At(Index: sw_Integer): PHTMLLinkScanFile;
 | |
| begin
 | |
|   At:=inherited At(Index);
 | |
| end;
 | |
| 
 | |
| function THTMLLinkScanFileCollection.Compare(Key1, Key2: Pointer): sw_Integer;
 | |
| var R: integer;
 | |
|     K1: PHTMLLinkScanFile absolute Key1;
 | |
|     K2: PHTMLLinkScanFile absolute Key2;
 | |
|     S1,S2: string;
 | |
| begin
 | |
|   S1:=UpcaseStr(K1^.GetDocumentURL); S2:=UpcaseStr(K2^.GetDocumentURL);
 | |
|   if S1<S2 then R:=-1 else
 | |
|   if S1>S2 then R:= 1 else
 | |
|   R:=0;
 | |
|   Compare:=R;
 | |
| end;
 | |
| 
 | |
| function THTMLLinkScanFileCollection.SearchFile(const DocURL: string): PHTMLLinkScanFile;
 | |
| var P,D: PHTMLLinkScanFile;
 | |
|     Index: sw_integer;
 | |
| begin
 | |
|   New(D, Init(DocURL));
 | |
|   if Search(D,Index)=false then P:=nil else
 | |
|     P:=At(Index);
 | |
|   Dispose(D, Done);
 | |
|   SearchFile:=P;
 | |
| end;
 | |
| 
 | |
| function THTMLLinkScanFileCollection.FindFileWithState(AState: THTMLLinkScanState): PHTMLLinkScanFile;
 | |
| var I: sw_integer;
 | |
|     P,D: PHTMLLinkScanFile;
 | |
| begin
 | |
|   P:=nil;
 | |
|   for I:=0 to Count-1 do
 | |
|   begin
 | |
|     D:=At(I);
 | |
|     if D^.State=AState then
 | |
|       begin
 | |
|         P:=D;
 | |
|         Break;
 | |
|       end;
 | |
|   end;
 | |
|   FindFileWithState:=P;
 | |
| end;
 | |
| 
 | |
| procedure THTMLLinkScanFileCollection.CheckNameIDLists;
 | |
| 
 | |
|   procedure DoCheckNameList(P : PHTMLLinkScanFile);
 | |
|     begin
 | |
|       P^.CheckNameList;
 | |
|     end;
 | |
| 
 | |
| begin
 | |
|   ForEach(@DoCheckNameList);
 | |
| end;
 | |
| 
 | |
| 
 | |
| constructor THTMLFileLinkScanner.Init(const ABaseDir: string);
 | |
| begin
 | |
|   inherited Init(ABaseDir);
 | |
|   New(DocumentFiles, Init(50,100));
 | |
|   New(IDList, Init(50,100));
 | |
| {$ifdef DEBUG}
 | |
|   DebugMessage('','THTMLFileLinkScanner Init "'+ABaseDir+'"',1,1);
 | |
| {$endif DEBUG}
 | |
| end;
 | |
| 
 | |
| procedure THTMLFileLinkScanner.ProcessDocument(const DocumentURL: string; AOptions: THTMLLinkScanOptions);
 | |
| var P: PHTMLLinkScanFile;
 | |
| begin
 | |
|   CurBaseURL:='';
 | |
|   Options:=AOptions;
 | |
|   ScheduleDoc(DocumentURL);
 | |
|   repeat
 | |
|     P:=DocumentFiles^.FindFileWithState(ssScheduled);
 | |
|     if Assigned(P) then
 | |
|       ProcessDoc(P);
 | |
|   until P=nil;
 | |
| {$ifdef DEBUG}
 | |
|   DebugMessage('','THTMLFileLinkScanner CheckNameList start ',1,1);
 | |
|   DocumentFiles^.CheckNameIDLists;
 | |
|   DebugMessage('','THTMLFileLinkScanner CheckNameList end ',1,1);
 | |
| {$endif DEBUG}
 | |
| end;
 | |
| 
 | |
| function THTMLFileLinkScanner.GetDocumentBaseURL: string;
 | |
| begin
 | |
|   GetDocumentBaseURL:=CurBaseURL;
 | |
| end;
 | |
| 
 | |
| function THTMLFileLinkScanner.CheckURL(const URL: string): boolean;
 | |
| var OK: boolean;
 | |
| begin
 | |
|   if soSubDocsOnly in Options then
 | |
|     OK:=UpcaseStr(copy(URL,1,length(BaseURL)))=UpcaseStr(BaseURL)
 | |
|   else
 | |
|     OK:=true;
 | |
|   CheckURL:=OK;
 | |
| end;
 | |
| 
 | |
| procedure THTMLFileLinkScanner.AddLink(const LinkText, LinkURL: string);
 | |
| var D: PHTMLLinkScanFile;
 | |
|     P: sw_integer;
 | |
|     DocURL: string;
 | |
| begin
 | |
|   P:=Pos('#',LinkURL);
 | |
|   if P=0 then DocURL:=LinkURL else DocURL:=copy(LinkURL,1,P-1);
 | |
|   D:=DocumentFiles^.SearchFile(DocURL);
 | |
|   if not Assigned(D) then
 | |
|       ScheduleDoc(DocURL);
 | |
|   inherited AddLink(LinkText,LinkURL);
 | |
| end;
 | |
| 
 | |
| procedure THTMLFileLinkScanner.AddRef(LinkURL: string);
 | |
| var D: PHTMLLinkScanFile;
 | |
|     P: sw_integer;
 | |
|     DocURL: string;
 | |
|     PN : PNameID;
 | |
| begin
 | |
| {$ifdef DEBUG}
 | |
|   DebugMessage(CurDoc,' Adding Ref to "'+
 | |
|     LinkURL+'"',Line,1);
 | |
| {$endif DEBUG}
 | |
|   P:=Pos('#',LinkURL);
 | |
|   if P=0 then DocURL:=LinkURL else DocURL:=copy(LinkURL,1,P-1);
 | |
|   D:=DocumentFiles^.SearchFile(DocURL);
 | |
|   if not Assigned(D) then
 | |
|       ScheduleDoc(DocURL);
 | |
|   D:=DocumentFiles^.SearchFile(DocURL);
 | |
|   if P>0 then
 | |
|     begin
 | |
|       PN:=D^.AddReferencedName(copy(LinkURL,P+1,length(LinkURL)));
 | |
|       PN^.SetOrigin(CurDoc);
 | |
|       PN^.SetLine(Line);
 | |
|     end;
 | |
| end;
 | |
| 
 | |
| procedure THTMLFileLinkScanner.AddNameID(AName : string);
 | |
| var D: PHTMLLinkScanFile;
 | |
|     P: sw_integer;
 | |
|     PN : PNameID;
 | |
|     DocURL: string;
 | |
| begin
 | |
| {$ifdef DEBUG}
 | |
|   DebugMessage(CurDoc,' Adding NameID "'+
 | |
|     CurName+'"',Line,1);
 | |
| {$endif DEBUG}
 | |
|   P:=Pos('#',AName);
 | |
|   if P=0 then DocURL:=AName else DocURL:=copy(AName,1,P-1);
 | |
|   D:=DocumentFiles^.SearchFile(DocURL);
 | |
|   if not Assigned(D) then
 | |
|       ScheduleDoc(DocURL);
 | |
|   D:=DocumentFiles^.SearchFile(DocURL);
 | |
|   PN:=D^.AddFoundName(copy(AName,P+1,length(AName)));
 | |
|   PN^.SetOrigin(CurDoc);
 | |
|   PN^.SetLine(Line);
 | |
| end;
 | |
| 
 | |
| procedure THTMLFileLinkScanner.AddID(AName : string);
 | |
| var
 | |
|   D: PHTMLLinkScanFile;
 | |
|   PN : PNameID;
 | |
|   index : sw_integer;
 | |
| begin
 | |
| {$ifdef DEBUG}
 | |
|   DebugMessage(CurDoc,' Adding Id "'+
 | |
|     AName+'"',Line,1);
 | |
| {$endif DEBUG}
 | |
|   D:=DocumentFiles^.SearchFile(CurDoc);
 | |
|   if not Assigned(D) then
 | |
|       ScheduleDoc(CurDoc);
 | |
|   D:=DocumentFiles^.SearchFile(CurDoc);
 | |
|   PN:=D^.AddFoundName(AName);
 | |
|   PN^.SetState(IsId,true);
 | |
|   PN^.SetOrigin(CurDoc);
 | |
|   PN^.SetLine(Line);
 | |
| 
 | |
|   new(PN,init(AName,IsID));
 | |
|   if IDList^ .Search(PN,index) then
 | |
|     begin
 | |
|       dispose(PN,done);
 | |
| {$ifdef DEBUG}
 | |
|       PN:=IDList^.At(Index);
 | |
|       DebugMessage(CurDoc,'ID "'+AName+'" already defined in "'+
 | |
|         PN^.GetOrigin+'('+IntToStr(PN^.GetLine)+')"',Line,1);
 | |
| {$endif DEBUG}
 | |
|     end
 | |
|   else
 | |
|     begin
 | |
|       IDList^.Insert(PN);
 | |
|       PN^.SetOrigin(CurDoc);
 | |
|       PN^.SetLine(Line);
 | |
|     end;
 | |
| end;
 | |
| 
 | |
| function THTMLFileLinkScanner.FindID(const AName : string) : PNameID;
 | |
| 
 | |
|   Function ContainsNamedID(D : PHTMLLinkScanFile) : boolean;
 | |
|     begin
 | |
|       ContainsNamedID:=D^.FindID(AName)<>nil;
 | |
|     end;
 | |
| var
 | |
|   D : PHTMLLinkScanFile;
 | |
| begin
 | |
|   D:=DocumentFiles^.FirstThat(@ContainsNamedID);
 | |
|   if assigned(D) then
 | |
|     FindID:=D^.FindID(AName)
 | |
|   else
 | |
|     FindID:=nil;
 | |
| end;
 | |
| 
 | |
| procedure THTMLFileLinkScanner.ProcessDoc(Doc: PHTMLLinkScanFile);
 | |
| var F: PDOSTextFile;
 | |
| begin
 | |
|   if Assigned(Doc)=false then Exit;
 | |
| 
 | |
|   Doc^.State:=ssProcessing;
 | |
|   CurDoc:=Doc^.GetDocumentURL;
 | |
|   New(F, Init(CurDoc));
 | |
|   if Assigned(F) then
 | |
|     begin
 | |
|       CurBaseURL:=CompleteURL(CurDoc,'');
 | |
| {$ifdef DEBUG}
 | |
|       DebugMessage(CurDoc,'Processing "'+CurDoc+'"',1,1);
 | |
| {$endif DEBUG}
 | |
|       Process(F);
 | |
| {$ifdef DEBUG}
 | |
|       DebugMessage(CurDoc,'Finished processing "'+CurDoc+'"',Line,1);
 | |
| {$endif DEBUG}
 | |
|       Dispose(F, Done);
 | |
|     end
 | |
|   else
 | |
|     begin
 | |
| {$ifdef DEBUG}
 | |
|       DebugMessage(CurDoc,'file not found',1,1);
 | |
| {$endif DEBUG}
 | |
|     end;
 | |
|   Doc^.State:=ssScanned;
 | |
|   CurDoc:='';
 | |
| end;
 | |
| 
 | |
| procedure THTMLFileLinkScanner.ScheduleDoc(const DocumentURL: string);
 | |
| var D: PHTMLLinkScanFile;
 | |
| begin
 | |
|   New(D, Init(DocumentURL));
 | |
|   D^.State:=ssScheduled;
 | |
|   D^.Owner:=@Self;
 | |
| {$ifdef DEBUG}
 | |
|       DebugMessage('','Scheduling file "'+DocumentURL+'"',1,1);
 | |
| {$endif DEBUG}
 | |
|   DocumentFiles^.Insert(D);
 | |
| end;
 | |
| 
 | |
| destructor THTMLFileLinkScanner.Done;
 | |
| begin
 | |
|   if Assigned(DocumentFiles) then
 | |
|     Dispose(DocumentFiles, Done);
 | |
|   DocumentFiles:=nil;
 | |
|   if Assigned(IDList) then
 | |
|     Dispose(IDList, Done);
 | |
|   IDList:=nil;
 | |
|   inherited Done;
 | |
| end;
 | |
| 
 | |
| procedure RegisterWHTMLScan;
 | |
| begin
 | |
|   RegisterType(RHTMLLinkScanDocument);
 | |
| end;
 | |
| 
 | |
| 
 | |
| END.
 | 
