fpc/packages/ide/whtmlscn.pas
Michaël Van Canneyt 98ea5cddda * PChar -> PAnsichar
2023-07-15 18:22:37 +02:00

1060 lines
26 KiB
ObjectPascal

{
This file is part of the Free Pascal Integrated Development Environment
Copyright (c) 2000 by Berczi Gabor
HTML scanner objects
See the file COPYING.FPC, included in this distribution,
for details about the copyright.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
**********************************************************************}
unit WHTMLScn;
{$ifdef cpullvm}
{$modeswitch nestedprocvars}
{$endif}
{$H-}
interface
uses Objects,
WHTML;
const
HTMLIndexMagicNo = ord('H')+ord('H') shl 8+ord('I') shl 16+ord('X') shl 24;
HTMLIndexVersion = 2;
type
PHTMLLinkScanner = ^THTMLLinkScanner;
PHTMLLinkScanDocument = ^THTMLLinkScanDocument;
TCustomHTMLLinkScanner = object(THTMLParser)
function DocAddTextChar(C: AnsiChar): boolean; virtual;
procedure DocAnchor(Entered: boolean); virtual;
public
{a}function CheckURL(const URL: string): boolean; virtual;
{a}function CheckText(const Text: string): boolean; virtual;
{a}procedure AddLink(const LinkText, LinkURL: string); virtual;
{a}procedure AddRef(LinkURL: string); virtual;
{a}procedure AddNameID(AName: string); virtual;
{a}procedure AddID(AName: string); virtual;
{a}function GetDocumentBaseURL: string; virtual;
private
CurLinkText: string;
CurURL: string;
CurName,
CurID: string;
CurDoc: string;
InAnchor,InNameAnchor,
HasHRef : boolean;
LastSynonym: PHTMLLinkScanDocument;
end;
TNameIDState = (IsReferenced, IsFound,IsID);
TNameIDStates = set of TNameIDState;
PNameID = ^TNameID;
TNameID = object(TObject)
constructor Init(const AName : string; Astate : TNameIDState);
destructor Done; virtual;
procedure SetState(Astate : TNameIDState; enabled : boolean);
procedure SetOrigin(const AOrigin : string);
procedure SetLine(ALine : sw_integer);
function GetLine : sw_integer;
function GetState : TNameIDStates;
function GetName : string;
function GetOrigin : string;
private
Name : pstring;
Origin : pstring;
Line : sw_integer;
State : TNameIDStates;
end;
PNameIDCollection = ^TNameIDCollection;
TNameIDCollection = object(TSortedCollection)
function At(Index: sw_Integer): PNameID;
function Compare(Key1, Key2: Pointer): sw_Integer; virtual;
end;
THTMLLinkScanDocument = object(TObject)
constructor Init(const ADocName: string);
function GetName: string;
function GetUniqueName: string;
function GetAliasCount: sw_integer;
function GetAlias(Index: sw_integer): string;
procedure AddAlias(const Alias: string);
constructor Load(var S: TStream);
procedure Store(var S: TStream);
destructor Done; virtual;
private
DocName: PString;
Synonym: PHTMLLinkScanDocument;
Aliases: PStringCollection;
end;
PHTMLLinkScanDocumentCollection = ^THTMLLinkScanDocumentCollection;
THTMLLinkScanDocumentCollection = object(TSortedCollection)
constructor Init(AScanner: PHTMLLinkScanner; ALimit, ADelta: Integer);
function Compare(Key1, Key2: Pointer): sw_Integer; virtual;
function At(Index: sw_Integer): PHTMLLinkScanDocument;
function SearchDocument(const DocName: string): PHTMLLinkScanDocument;
procedure MoveAliasesToSynonym;
private
Scanner: PHTMLLinkScanner;
end;
THTMLLinkScanner = object(TCustomHTMLLinkScanner)
constructor Init(const ABaseDir: string);
procedure SetBaseDir(const ABaseDir: string);
{a}function FindID(const AName : string) : PNameID; virtual;
function GetDocumentCount: sw_integer;
function GetDocumentURL(DocIndex: sw_integer): string;
function GetUniqueDocumentURL(DocIndex: sw_integer): string;
function GetDocumentAliasCount(DocIndex: sw_integer): sw_integer;
function GetDocumentAlias(DocIndex, AliasIndex: sw_integer): string;
constructor LoadDocuments(var S: TStream);
procedure StoreDocuments(var S: TStream);
destructor Done; virtual;
public
procedure AddLink(const LinkText, LinkURL: string); virtual;
private
Documents: PHTMLLinkScanDocumentCollection;
BaseDir: PString;
function ExpandChildURL(const S: string): string;
function NormalizeChildURL(const S: string): string;
end;
THTMLLinkScanState = (ssScheduled,ssProcessing,ssScanned);
PHTMLLinkScanFile = ^THTMLLinkScanFile;
THTMLLinkScanFile = object(TObject)
constructor Init(const ADocumentURL: string);
function GetDocumentURL: string;
destructor Done; virtual;
function AddReferencedName (const AName : string) : PNameID;
function AddFoundName (const AName : string) : PNameID;
procedure CheckNameList;
function FindID(const AName : string) : PNameID; virtual;
private
DocumentURL : PString;
NameIDList : PNameIDCollection;
Owner : PHTMLLinkScanner;
public
State : THTMLLinkScanState;
end;
PHTMLLinkScanFileCollection = ^THTMLLinkScanFileCollection;
THTMLLinkScanFileCollection = object(TSortedCollection)
function At(Index: sw_Integer): PHTMLLinkScanFile;
function Compare(Key1, Key2: Pointer): sw_Integer; virtual;
function SearchFile(const DocURL: string): PHTMLLinkScanFile;
function FindFileWithState(AState: THTMLLinkScanState): PHTMLLinkScanFile;
procedure CheckNameIDLists;
end;
THTMLLinkScanOption = (soSubDocsOnly);
THTMLLinkScanOptions = set of THTMLLinkScanOption;
THTMLFileLinkScanner = object(THTMLLinkScanner)
constructor Init(const ABaseDir: string);
procedure ProcessDocument(const DocumentURL: string; AOptions: THTMLLinkScanOptions);
destructor Done; virtual;
public
function GetDocumentBaseURL: string; virtual;
function FindID(const AName : string) : PNameID; virtual;
procedure AddLink(const LinkText, LinkURL: string); virtual;
procedure AddRef(LinkURL: string); virtual;
procedure AddNameID(AName: string); virtual;
procedure AddID(AName: string); virtual;
function CheckURL(const URL: string): boolean; virtual;
private
Options: THTMLLinkScanOptions;
BaseURL: string;
CurBaseURL: string;
IDList : PNameIDCollection;
DocumentFiles: PHTMLLinkScanFileCollection;
procedure ScheduleDoc(const DocumentURL: string);
public
procedure ProcessDoc(Doc: PHTMLLinkScanFile); virtual;
end;
procedure RegisterWHTMLScan;
implementation
uses
WUtils;
const
RHTMLLinkScanDocument: TStreamRec = (
ObjType: 19500;
VmtLink: Ofs(TypeOf(THTMLLinkScanDocument)^);
Load: @THTMLLinkScanDocument.Load;
Store: @THTMLLinkScanDocument.Store
);
const
CurrentHTMLIndexVersion : sw_integer = HTMLIndexVersion;
function TCustomHTMLLinkScanner.DocAddTextChar(C: AnsiChar): boolean;
var Added: boolean;
begin
Added:=false;
if InAnchor then
begin
CurLinkText:=CurLinkText+C;
Added:=true;
end;
if ord(c)>32 then
LastSynonym:=nil;
DocAddTextChar:=Added;
end;
procedure TCustomHTMLLinkScanner.DocAnchor(Entered: boolean);
begin
if Entered then
begin
CurLinkText:='';
if DocGetTagParam('HREF',CurURL) then
HasHRef:=true
else
CurURL:='';
if not DocGetTagParam('NAME',CurName) then
if not DocGetTagParam('ID',CurName) then
CurName:='';
if not DocGetTagParam('ID',CurID) then
CurID:='';
if CurName<>'' then
begin
InNameAnchor:=true;
If Pos('#',CurName)=0 then
CurName:=CurDoc+'#'+CurName;
CurName:=Trim(CurName);
CurName:=CompleteURL(GetDocumentBaseURL,CurName);
if CurURL='' then
CurURL:=CurName;
end
else
CurName:='';
CurURL:=Trim(CurURL);
if pos('#',CurURL)=1 then
CurURL:=CurDoc+CurURL;
CurURL:=CompleteURL(GetDocumentBaseURL,CurURL);
end
else
begin
CurLinkText:=Trim(CurLinkText);
if HasHRef then
begin
if CheckURL(CurURL) and CheckText(CurLinkText) and
not DisableCrossIndexing then
begin
AddLink(CurLinkText,CurURL);
{$ifdef DEBUG}
DebugMessage(CurDoc,' Adding ScanLink "'+CurLinkText+'" to "'+
CurURL+'"',Line,1);
{$endif DEBUG}
end;
{ Be sure to parse referenced file,
even if that link is not valid }
AddRef(CurURL);
end;
if not HasHRef and InNameAnchor and CheckURL(CurName) and CheckText(CurLinkText) then
begin
AddLink(CurLinkText,CurName);
{$ifdef DEBUG}
DebugMessage(CurDoc,' Adding ScanName "'+CurLinkText+'" to "'+CurName+'"',Line,1);
{$endif DEBUG}
end;
if InNameAnchor then
begin
AddNameID(CurName);
end;
if not HasHRef and (CurID<>'') then
AddID(CurID);
InNameAnchor:=false;
HasHRef:=false;
end;
InAnchor:=Entered;
end;
function TCustomHTMLLinkScanner.GetDocumentBaseURL: string;
begin
{ Abstract }
GetDocumentBaseURL:='';
end;
function TCustomHTMLLinkScanner.CheckURL(const URL: string): boolean;
begin
{ Abstract }
CheckURL:=true;
end;
function TCustomHTMLLinkScanner.CheckText(const Text: string): boolean;
begin
{ Abstract }
CheckText:=true;
end;
procedure TCustomHTMLLinkScanner.AddLink(const LinkText, LinkURL: string);
begin
{ Abstract }
end;
procedure TCustomHTMLLinkScanner.AddRef(LinkURL: string);
begin
{ Abstract }
end;
procedure TCustomHTMLLinkScanner.AddNameID(AName: string);
begin
{ Abstract }
end;
procedure TCustomHTMLLinkScanner.AddID(AName: string);
begin
{ Abstract }
end;
constructor TNameID.Init(const AName : string; Astate : TNameIDState);
begin
inherited Init;
SetStr(Name,AName);
Origin:=nil;
State:=[AState];
end;
destructor TNameID.Done;
begin
if assigned(Name) then
DisposeStr(Name);
Name:=nil;
if assigned(Origin) then
DisposeStr(Origin);
Origin:=nil;
inherited Done;
end;
procedure TNameID.SetState(Astate : TNameIDState; enabled : boolean);
begin
if enabled then
Include(State,AState)
else
Exclude(State,AState);
end;
function TNameID.GetState : TNameIDStates;
begin
GetState:=State;
end;
function TNameID.GetName : string;
begin
GetName:=GetStr(Name);
end;
function TNameID.GetOrigin : string;
begin
GetOrigin:=GetStr(Origin);
end;
procedure TNameID.SetOrigin(const AOrigin : string);
begin
SetStr(Origin,AOrigin);
end;
procedure TNameID.SetLine(ALine : sw_integer);
begin
Line:=ALine;
end;
function TNameID.GetLine : sw_integer;
begin
GetLine:=Line;
end;
function TNameIDCollection.At(Index: sw_Integer): PNameID;
begin
At:=Inherited At(Index);
end;
function TNameIDCollection.Compare(Key1, Key2: Pointer): sw_Integer;
var
R: sw_integer;
K1: PNameID absolute Key1;
K2: PNameID absolute Key2;
S1,S2: string;
begin
S1:=K1^.GetName;
S2:=K2^.GetName;
S1:=UpcaseStr(S1); S2:=UpcaseStr(S2);
if S1<S2 then R:=-1 else
if S1>S2 then R:= 1 else
R:=0;
Compare:=R;
end;
constructor THTMLLinkScanDocument.Init(const ADocName: string);
begin
inherited Init;
SetStr(DocName,ADocName);
New(Aliases, Init(10,10));
{$ifdef DEBUG}
DebugMessage('',' Adding New LinkScan document "'+ADocName+'"',1,1);
{$endif DEBUG}
Synonym:=nil;
end;
function THTMLLinkScanDocument.GetName: string;
begin
GetName:=GetStr(DocName);
end;
function THTMLLinkScanDocument.GetUniqueName: string;
var
PD: PHTMLLinkScanDocument;
begin
PD:=@Self;
while assigned(PD^.synonym) do
PD:=PD^.Synonym;
GetUniqueName:=GetStr(PD^.DocName);
end;
function THTMLLinkScanDocument.GetAliasCount: sw_integer;
begin
GetAliasCount:=Aliases^.Count;
end;
function THTMLLinkScanDocument.GetAlias(Index: sw_integer): string;
begin
GetAlias:=GetStr(Aliases^.At(Index));
end;
procedure THTMLLinkScanDocument.AddAlias(const Alias: string);
begin
Aliases^.Insert(NewStr(Alias));
{$ifdef DEBUG}
DebugMessage('',' Adding alias "'+Alias+'" to LinkScan document "'+GetStr(DocName)+'"',1,1);
{$endif DEBUG}
end;
constructor THTMLLinkScanDocument.Load(var S: TStream);
var
i: sw_integer;
begin
inherited Init;
DocName:=S.ReadStr;
if assigned(DocName) then
for i:=1 to Length(DocName^) do
if (DocName^[i]='\') or (DocName^[i]='/') then
DocName^[i]:=DirSep;
New(Aliases, Load(S));
end;
procedure THTMLLinkScanDocument.Store(var S: TStream);
begin
S.WriteStr(DocName);
Aliases^.Store(S);
end;
destructor THTMLLinkScanDocument.Done;
begin
if Assigned(Aliases) then
Dispose(Aliases, Done);
Aliases:=nil;
if Assigned(DocName) then
DisposeStr(DocName);
DocName:=nil;
inherited Done;
end;
constructor THTMLLinkScanDocumentCollection.Init(AScanner: PHTMLLinkScanner; ALimit, ADelta: Integer);
begin
inherited Init(ALimit,ADelta);
Scanner:=AScanner;
end;
function THTMLLinkScanDocumentCollection.Compare(Key1, Key2: Pointer): sw_Integer;
var R: sw_integer;
K1: PHTMLLinkScanDocument absolute Key1;
K2: PHTMLLinkScanDocument absolute Key2;
S1,S2: string;
begin
S1:=K1^.GetName; S2:=K2^.GetName;
if Assigned(Scanner) then
begin S1:=Scanner^.ExpandChildURL(S1); S2:=Scanner^.ExpandChildURL(S2); end;
S1:=UpcaseStr(S1); S2:=UpcaseStr(S2);
if S1<S2 then R:=-1 else
if S1>S2 then R:= 1 else
R:=0;
Compare:=R;
end;
function THTMLLinkScanDocumentCollection.At(Index: sw_Integer): PHTMLLinkScanDocument;
begin
At:=inherited At(Index);
end;
function THTMLLinkScanDocumentCollection.SearchDocument(const DocName: string): PHTMLLinkScanDocument;
var D,P: PHTMLLinkScanDocument;
Index: sw_integer;
begin
New(D, Init(DocName));
if Search(D, Index)=false then P:=nil else
P:=At(Index);
Dispose(D, Done);
SearchDocument:=P;
end;
procedure THTMLLinkScanDocumentCollection.MoveAliasesToSynonym;
procedure MoveAliases(P: PHTMLLinkScanDocument);
var
PD: PHTMLLinkScanDocument;
i: sw_integer;
begin
if not assigned(P^.synonym) then
exit;
PD:=P;
while assigned(PD^.synonym) do
PD:=PD^.Synonym;
For i:=P^.GetAliasCount-1 downto 0 do
begin
PD^.AddAlias(P^.GetAlias(i));
P^.Aliases^.AtFree(i);
end;
end;
begin
ForEach(TCallbackProcParam(@MoveAliases));
end;
constructor THTMLLinkScanner.Init(const ABaseDir: string);
begin
inherited Init;
New(Documents, Init(@Self,50,100));
SetBaseDir(ABaseDir);
end;
procedure THTMLLinkScanner.SetBaseDir(const ABaseDir: string);
begin
if Assigned(BaseDir) then DisposeStr(BaseDir);
BaseDir:=NewStr(CompleteDir(ABaseDir));
end;
function THTMLLinkScanner.GetDocumentCount: sw_integer;
begin
GetDocumentCount:=Documents^.Count;
end;
function THTMLLinkScanner.ExpandChildURL(const S: string): string;
begin
ExpandChildURL:=CompleteURL(GetStr(BaseDir),S);
end;
function THTMLLinkScanner.NormalizeChildURL(const S: string): string;
var URL: string;
begin
URL:=S;
if GetStr(BaseDir)<>'' then
if copy(UpcaseStr(S),1,length(GetStr(BaseDir)))=UpcaseStr(GetStr(BaseDir)) then
URL:=copy(S,length(GetStr(BaseDir))+1,length(S));
NormalizeChildURL:=URL;
end;
function THTMLLinkScanner.GetDocumentURL(DocIndex: sw_integer): string;
begin
GetDocumentURL:=ExpandChildURL(Documents^.At(DocIndex)^.GetName);
end;
function THTMLLinkScanner.GetUniqueDocumentURL(DocIndex: sw_integer): string;
begin
GetUniqueDocumentURL:=ExpandChildURL(Documents^.At(DocIndex)^.GetUniqueName);
end;
function THTMLLinkScanner.GetDocumentAliasCount(DocIndex: sw_integer): sw_integer;
begin
GetDocumentAliasCount:=Documents^.At(DocIndex)^.GetAliasCount;
end;
function THTMLLinkScanner.GetDocumentAlias(DocIndex, AliasIndex: sw_integer): string;
begin
GetDocumentAlias:=Documents^.At(DocIndex)^.GetAlias(AliasIndex);
end;
procedure THTMLLinkScanner.AddLink(const LinkText, LinkURL: string);
var D: PHTMLLinkScanDocument;
DoInsert: boolean;
int: sw_integer;
Text: string;
error: word;
begin
D:=Documents^.SearchDocument(LinkURL);
if D=nil then
begin
New(D, Init(NormalizeChildURL(LinkURL)));
Documents^.Insert(D);
end;
If assigned(LastSynonym) then
LastSynonym^.Synonym:=D;
DoInsert:=true;
If (length(LinkText)=0) or (Pos(',',LinkText)=1) then
DoInsert:=false;
Val(LinkText,int,error);
If (Error>1) and (LinkText[Error]=' ') then
Text:=Trim(Copy(LinkText,error+1,length(LinkText)))
else
Text:=LinkText;
IF DoInsert then
D^.AddAlias(Text);
If InNameAnchor then
LastSynonym:=D;
end;
constructor THTMLLinkScanner.LoadDocuments(var S: TStream);
var P,L: longint;
OK: boolean;
PS: PString;
begin
OK:=false;
P:=S.GetPos;
S.Read(L,sizeof(L));
if (S.Status=stOK) and (L=HTMLIndexMagicNo) then
begin
S.Read(L,sizeof(L));
CurrentHTMLIndexVersion:=L;
OK:=(S.Status=stOK);
end;
if not OK then
begin
S.Reset;
S.Seek(P);
end
else
BaseDir:=S.ReadStr;
New(Documents, Load(S));
if not Assigned(Documents) then
Fail;
Documents^.MoveAliasesToSynonym;
CurrentHTMLIndexVersion:=HTMLIndexVersion;
end;
function THTMLLinkScanner.FindID(const AName : string) : PNameID;
begin
{abstract}FindID:=nil;
end;
procedure THTMLLinkScanner.StoreDocuments(var S: TStream);
var L: longint;
begin
L:=HTMLIndexMagicNo;
S.Write(L,sizeof(L));
L:=HTMLIndexVersion;
CurrentHTMLIndexVersion:=L;
S.Write(L,sizeof(L));
S.WriteStr(BaseDir);
Documents^.MoveAliasesToSynonym;
Documents^.Store(S);
end;
destructor THTMLLinkScanner.Done;
begin
if Assigned(Documents) then
Dispose(Documents, Done);
Documents:=nil;
if Assigned(BaseDir) then
DisposeStr(BaseDir);
BaseDir:=nil;
inherited Done;
end;
constructor THTMLLinkScanFile.Init(const ADocumentURL: string);
begin
inherited Init;
SetStr(DocumentURL,ADocumentURL);
New(NameIDList, Init(5,10));
end;
function THTMLLinkScanFile.GetDocumentURL: string;
begin
GetDocumentURL:=GetStr(DocumentURL);
end;
function THTMLLinkScanFile.AddReferencedName (const AName : string) : PNameID;
var
index : sw_integer;
PN : PNameID;
begin
new(PN,init(AName,IsReferenced));
if not NameIDList^.Search(PN,Index) then
NameIDList^.Insert(PN)
else
begin
dispose(PN,Done);
PN:=NameIDList^.At(Index);
PN^.SetState(IsReferenced,true);
end;
AddReferencedName:=PN;
end;
function THTMLLinkScanFile.AddFoundName (const AName : string) : PNameID;
var
index : sw_integer;
PN : PNameID;
begin
new(PN,init(AName,IsFound));
if not NameIDList^.Search(PN,Index) then
NameIDList^.Insert(PN)
else
begin
dispose(PN,Done);
PN:=NameIDList^.At(Index);
PN^.SetState(IsFound,true);
end;
AddFoundName:=PN;
end;
procedure THTMLLinkScanFile.CheckNameList;
var
i : sw_integer;
PN,PN2 : PNameID;
begin
{$ifdef DEBUG}
for i:=0 to NameIDList^.Count-1 do
begin
PN:=NameIDList^.At(i);
if not (IsFound in PN^.GetState) then
begin
if (IsReferenced in PN^.GetState) then
DebugMessage(GetDocumentURL,'Name "'+PN^.GetName+'" from "'+
PN^.GetOrigin+'" not found',1,1);
PN2:=Owner^.FindID(PN^.GetName);
if assigned(PN2) then
begin
DebugMessage('','ID found in "'+PN2^.GetOrigin+'"',1,1);
if not (IsFound in PN2^.GetState) then
DebugMessage('','ID not found',1,1);
end;
end;
end;
{$endif DEBUG}
end;
function THTMLLinkScanFile.FindID(const AName : string) : PNameID;
var
PN : PNameID;
Index : sw_integer;
begin
new(PN,init(AName,IsID));
if NameIDList^.Search(PN,Index) then
begin
dispose(PN,done);
PN:=NameIDList^.At(Index);
if (IsID in PN^.GetState) then
FindId:=PN
else
FindID:=nil;
end
else
begin
dispose(PN,done);
PN:=nil;
FindID:=nil;
end;
end;
destructor THTMLLinkScanFile.Done;
begin
if Assigned(DocumentURL) then
DisposeStr(DocumentURL);
DocumentURL:=nil;
dispose(NameIDList,done);
NameIDList:=nil;
inherited Done;
end;
function THTMLLinkScanFileCollection.At(Index: sw_Integer): PHTMLLinkScanFile;
begin
At:=inherited At(Index);
end;
function THTMLLinkScanFileCollection.Compare(Key1, Key2: Pointer): sw_Integer;
var R: integer;
K1: PHTMLLinkScanFile absolute Key1;
K2: PHTMLLinkScanFile absolute Key2;
S1,S2: string;
begin
S1:=UpcaseStr(K1^.GetDocumentURL); S2:=UpcaseStr(K2^.GetDocumentURL);
if S1<S2 then R:=-1 else
if S1>S2 then R:= 1 else
R:=0;
Compare:=R;
end;
function THTMLLinkScanFileCollection.SearchFile(const DocURL: string): PHTMLLinkScanFile;
var P,D: PHTMLLinkScanFile;
Index: sw_integer;
begin
New(D, Init(DocURL));
if Search(D,Index)=false then P:=nil else
P:=At(Index);
Dispose(D, Done);
SearchFile:=P;
end;
function THTMLLinkScanFileCollection.FindFileWithState(AState: THTMLLinkScanState): PHTMLLinkScanFile;
var I: sw_integer;
P,D: PHTMLLinkScanFile;
begin
P:=nil;
for I:=0 to Count-1 do
begin
D:=At(I);
if D^.State=AState then
begin
P:=D;
Break;
end;
end;
FindFileWithState:=P;
end;
procedure THTMLLinkScanFileCollection.CheckNameIDLists;
procedure DoCheckNameList(P : PHTMLLinkScanFile);
begin
P^.CheckNameList;
end;
begin
ForEach(TCallbackProcParam(@DoCheckNameList));
end;
constructor THTMLFileLinkScanner.Init(const ABaseDir: string);
begin
inherited Init(ABaseDir);
New(DocumentFiles, Init(50,100));
New(IDList, Init(50,100));
{$ifdef DEBUG}
DebugMessage('','THTMLFileLinkScanner Init "'+ABaseDir+'"',1,1);
{$endif DEBUG}
end;
procedure THTMLFileLinkScanner.ProcessDocument(const DocumentURL: string; AOptions: THTMLLinkScanOptions);
var P: PHTMLLinkScanFile;
begin
CurBaseURL:='';
Options:=AOptions;
ScheduleDoc(DocumentURL);
repeat
P:=DocumentFiles^.FindFileWithState(ssScheduled);
if Assigned(P) then
ProcessDoc(P);
until P=nil;
{$ifdef DEBUG}
DebugMessage('','THTMLFileLinkScanner CheckNameList start ',1,1);
DocumentFiles^.CheckNameIDLists;
DebugMessage('','THTMLFileLinkScanner CheckNameList end ',1,1);
{$endif DEBUG}
end;
function THTMLFileLinkScanner.GetDocumentBaseURL: string;
begin
GetDocumentBaseURL:=CurBaseURL;
end;
function THTMLFileLinkScanner.CheckURL(const URL: string): boolean;
var OK: boolean;
begin
if soSubDocsOnly in Options then
OK:=UpcaseStr(copy(URL,1,length(BaseURL)))=UpcaseStr(BaseURL)
else
OK:=true;
CheckURL:=OK;
end;
procedure THTMLFileLinkScanner.AddLink(const LinkText, LinkURL: string);
var D: PHTMLLinkScanFile;
P: sw_integer;
DocURL: string;
begin
P:=Pos('#',LinkURL);
if P=0 then DocURL:=LinkURL else DocURL:=copy(LinkURL,1,P-1);
D:=DocumentFiles^.SearchFile(DocURL);
if not Assigned(D) then
ScheduleDoc(DocURL);
inherited AddLink(LinkText,LinkURL);
end;
procedure THTMLFileLinkScanner.AddRef(LinkURL: string);
var D: PHTMLLinkScanFile;
P: sw_integer;
DocURL: string;
PN : PNameID;
begin
{$ifdef DEBUG}
DebugMessage(CurDoc,' Adding Ref to "'+
LinkURL+'"',Line,1);
{$endif DEBUG}
P:=Pos('#',LinkURL);
if P=0 then DocURL:=LinkURL else DocURL:=copy(LinkURL,1,P-1);
D:=DocumentFiles^.SearchFile(DocURL);
if not Assigned(D) then
ScheduleDoc(DocURL);
D:=DocumentFiles^.SearchFile(DocURL);
if P>0 then
begin
PN:=D^.AddReferencedName(copy(LinkURL,P+1,length(LinkURL)));
PN^.SetOrigin(CurDoc);
PN^.SetLine(Line);
end;
end;
procedure THTMLFileLinkScanner.AddNameID(AName : string);
var D: PHTMLLinkScanFile;
P: sw_integer;
PN : PNameID;
DocURL: string;
begin
{$ifdef DEBUG}
DebugMessage(CurDoc,' Adding NameID "'+
CurName+'"',Line,1);
{$endif DEBUG}
P:=Pos('#',AName);
if P=0 then DocURL:=AName else DocURL:=copy(AName,1,P-1);
D:=DocumentFiles^.SearchFile(DocURL);
if not Assigned(D) then
ScheduleDoc(DocURL);
D:=DocumentFiles^.SearchFile(DocURL);
PN:=D^.AddFoundName(copy(AName,P+1,length(AName)));
PN^.SetOrigin(CurDoc);
PN^.SetLine(Line);
end;
procedure THTMLFileLinkScanner.AddID(AName : string);
var
D: PHTMLLinkScanFile;
PN : PNameID;
index : sw_integer;
begin
{$ifdef DEBUG}
DebugMessage(CurDoc,' Adding Id "'+
AName+'"',Line,1);
{$endif DEBUG}
D:=DocumentFiles^.SearchFile(CurDoc);
if not Assigned(D) then
ScheduleDoc(CurDoc);
D:=DocumentFiles^.SearchFile(CurDoc);
PN:=D^.AddFoundName(AName);
PN^.SetState(IsId,true);
PN^.SetOrigin(CurDoc);
PN^.SetLine(Line);
new(PN,init(AName,IsID));
if IDList^ .Search(PN,index) then
begin
dispose(PN,done);
{$ifdef DEBUG}
PN:=IDList^.At(Index);
DebugMessage(CurDoc,'ID "'+AName+'" already defined in "'+
PN^.GetOrigin+'('+IntToStr(PN^.GetLine)+')"',Line,1);
{$endif DEBUG}
end
else
begin
IDList^.Insert(PN);
PN^.SetOrigin(CurDoc);
PN^.SetLine(Line);
end;
end;
function THTMLFileLinkScanner.FindID(const AName : string) : PNameID;
Function ContainsNamedID(D : PHTMLLinkScanFile) : boolean;
begin
ContainsNamedID:=D^.FindID(AName)<>nil;
end;
var
D : PHTMLLinkScanFile;
begin
D:=DocumentFiles^.FirstThat(TCallbackFunBoolParam(@ContainsNamedID));
if assigned(D) then
FindID:=D^.FindID(AName)
else
FindID:=nil;
end;
procedure THTMLFileLinkScanner.ProcessDoc(Doc: PHTMLLinkScanFile);
var F: PDOSTextFile;
begin
if Assigned(Doc)=false then Exit;
Doc^.State:=ssProcessing;
CurDoc:=Doc^.GetDocumentURL;
New(F, Init(CurDoc));
if Assigned(F) then
begin
CurBaseURL:=CompleteURL(CurDoc,'');
{$ifdef DEBUG}
DebugMessage(CurDoc,'Processing "'+CurDoc+'"',1,1);
{$endif DEBUG}
Process(F);
{$ifdef DEBUG}
DebugMessage(CurDoc,'Finished processing "'+CurDoc+'"',Line,1);
{$endif DEBUG}
Dispose(F, Done);
end
else
begin
{$ifdef DEBUG}
DebugMessage(CurDoc,'file not found',1,1);
{$endif DEBUG}
end;
Doc^.State:=ssScanned;
CurDoc:='';
end;
procedure THTMLFileLinkScanner.ScheduleDoc(const DocumentURL: string);
var D: PHTMLLinkScanFile;
begin
New(D, Init(DocumentURL));
D^.State:=ssScheduled;
D^.Owner:=@Self;
{$ifdef DEBUG}
DebugMessage('','Scheduling file "'+DocumentURL+'"',1,1);
{$endif DEBUG}
DocumentFiles^.Insert(D);
end;
destructor THTMLFileLinkScanner.Done;
begin
if Assigned(DocumentFiles) then
Dispose(DocumentFiles, Done);
DocumentFiles:=nil;
if Assigned(IDList) then
Dispose(IDList, Done);
IDList:=nil;
inherited Done;
end;
procedure RegisterWHTMLScan;
begin
RegisterType(RHTMLLinkScanDocument);
end;
END.