* renamed lowercased

This commit is contained in:
pierre 2000-04-25 10:28:52 +00:00
parent bbe65b02c0
commit bf29b96e0e

View File

@ -1,450 +0,0 @@
{
$Id$
This file is part of the Free Pascal Integrated Development Environment
Copyright (c) 2000 by Berczi Gabor
HTML scanner objects
See the file COPYING.FPC, included in this distribution,
for details about the copyright.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
**********************************************************************}
unit WHTMLScn;
interface
uses Objects,
WHTML;
type
TCustomHTMLLinkScanner = object(THTMLParser)
function DocAddTextChar(C: char): boolean; virtual;
procedure DocAnchor(Entered: boolean); virtual;
public
{a}function CheckURL(const URL: string): boolean; virtual;
{a}function CheckText(const Text: string): boolean; virtual;
{a}procedure AddLink(const LinkText, LinkURL: string); virtual;
{a}function GetDocumentBaseURL: string; virtual;
private
CurLinkText: string;
CurURL: string;
InAnchor: boolean;
end;
PHTMLLinkScanDocument = ^THTMLLinkScanDocument;
THTMLLinkScanDocument = object(TObject)
constructor Init(const ADocName: string);
function GetName: string;
function GetAliasCount: sw_integer;
function GetAlias(Index: sw_integer): string;
procedure AddAlias(const Alias: string);
constructor Load(var S: TStream);
procedure Store(var S: TStream);
destructor Done; virtual;
private
DocName: PString;
Aliases: PStringCollection;
end;
PHTMLLinkScanDocumentCollection = ^THTMLLinkScanDocumentCollection;
THTMLLinkScanDocumentCollection = object(TSortedCollection)
function Compare(Key1, Key2: Pointer): sw_Integer; virtual;
function At(Index: sw_Integer): PHTMLLinkScanDocument;
function SearchDocument(const DocName: string): PHTMLLinkScanDocument;
end;
THTMLLinkScanner = object(TCustomHTMLLinkScanner)
constructor Init;
function GetDocumentCount: sw_integer;
function GetDocumentURL(DocIndex: sw_integer): string;
function GetDocumentAliasCount(DocIndex: sw_integer): sw_integer;
function GetDocumentAlias(DocIndex, AliasIndex: sw_integer): string;
procedure StoreDocuments(var S: TStream);
destructor Done; virtual;
public
procedure AddLink(const LinkText, LinkURL: string); virtual;
private
Documents: PHTMLLinkScanDocumentCollection;
end;
THTMLLinkScanState = (ssScheduled,ssProcessing,ssScanned);
PHTMLLinkScanFile = ^THTMLLinkScanFile;
THTMLLinkScanFile = object(TObject)
constructor Init(const ADocumentURL: string);
function GetDocumentURL: string;
destructor Done; virtual;
private
DocumentURL : PString;
public
State : THTMLLinkScanState;
end;
PHTMLLinkScanFileCollection = ^THTMLLinkScanFileCollection;
THTMLLinkScanFileCollection = object(TSortedCollection)
function At(Index: sw_Integer): PHTMLLinkScanFile;
function Compare(Key1, Key2: Pointer): sw_Integer; virtual;
function SearchFile(const DocURL: string): PHTMLLinkScanFile;
function FindFileWithState(AState: THTMLLinkScanState): PHTMLLinkScanFile;
end;
THTMLLinkScanOption = (soSubDocsOnly);
THTMLLinkScanOptions = set of THTMLLinkScanOption;
THTMLFileLinkScanner = object(THTMLLinkScanner)
constructor Init;
procedure ProcessDocument(const DocumentURL: string; AOptions: THTMLLinkScanOptions);
destructor Done; virtual;
public
function GetDocumentBaseURL: string; virtual;
procedure AddLink(const LinkText, LinkURL: string); virtual;
function CheckURL(const URL: string): boolean; virtual;
private
Options: THTMLLinkScanOptions;
BaseURL: string;
CurBaseURL: string;
DocumentFiles: PHTMLLinkScanFileCollection;
procedure ScheduleDoc(const DocumentURL: string);
public
procedure ProcessDoc(Doc: PHTMLLinkScanFile); virtual;
end;
procedure RegisterWHTMLScan;
implementation
uses WUtils;
const
RHTMLLinkScanDocument: TStreamRec = (
ObjType: 19500;
VmtLink: Ofs(TypeOf(THTMLLinkScanDocument)^);
Load: @THTMLLinkScanDocument.Load;
Store: @THTMLLinkScanDocument.Store
);
function TCustomHTMLLinkScanner.DocAddTextChar(C: char): boolean;
begin
if InAnchor then
CurLinkText:=CurLinkText+C;
end;
procedure TCustomHTMLLinkScanner.DocAnchor(Entered: boolean);
begin
if Entered then
begin
CurLinkText:='';
if DocGetTagParam('HREF',CurURL)=false then CurURL:='';
CurURL:=Trim(CurURL);
CurURL:=CompleteURL(GetDocumentBaseURL,CurURL);
end
else
begin
CurLinkText:=Trim(CurLinkText);
if CheckURL(CurURL) and CheckText(CurLinkText) then
AddLink(CurLinkText,CurURL);
end;
InAnchor:=Entered;
end;
function TCustomHTMLLinkScanner.GetDocumentBaseURL: string;
begin
{ Abstract }
GetDocumentBaseURL:='';
end;
function TCustomHTMLLinkScanner.CheckURL(const URL: string): boolean;
begin
{ Abstract }
CheckURL:=true;
end;
function TCustomHTMLLinkScanner.CheckText(const Text: string): boolean;
begin
{ Abstract }
CheckText:=true;
end;
procedure TCustomHTMLLinkScanner.AddLink(const LinkText, LinkURL: string);
begin
{ Abstract }
end;
constructor THTMLLinkScanDocument.Init(const ADocName: string);
begin
inherited Init;
SetStr(DocName,ADocName);
New(Aliases, Init(10,10));
end;
function THTMLLinkScanDocument.GetName: string;
begin
GetName:=GetStr(DocName);
end;
function THTMLLinkScanDocument.GetAliasCount: sw_integer;
begin
GetAliasCount:=Aliases^.Count;
end;
function THTMLLinkScanDocument.GetAlias(Index: sw_integer): string;
begin
GetAlias:=GetStr(Aliases^.At(Index));
end;
procedure THTMLLinkScanDocument.AddAlias(const Alias: string);
begin
Aliases^.Insert(NewStr(Alias));
end;
constructor THTMLLinkScanDocument.Load(var S: TStream);
begin
inherited Init;
DocName:=S.ReadStr;
New(Aliases, Load(S));
end;
procedure THTMLLinkScanDocument.Store(var S: TStream);
var I: integer;
begin
S.WriteStr(DocName);
Aliases^.Store(S);
end;
destructor THTMLLinkScanDocument.Done;
begin
inherited Done;
if Assigned(Aliases) then Dispose(Aliases, Done); Aliases:=nil;
if Assigned(DocName) then DisposeStr(DocName); DocName:=nil;
end;
function THTMLLinkScanDocumentCollection.Compare(Key1, Key2: Pointer): sw_Integer;
var R: sw_integer;
K1: PHTMLLinkScanDocument absolute Key1;
K2: PHTMLLinkScanDocument absolute Key2;
S1,S2: string;
begin
S1:=UpcaseStr(K1^.GetName); S2:=UpcaseStr(K2^.GetName);
if S1<S2 then R:=-1 else
if S1>S2 then R:= 1 else
R:=0;
Compare:=R;
end;
function THTMLLinkScanDocumentCollection.At(Index: sw_Integer): PHTMLLinkScanDocument;
begin
At:=inherited At(Index);
end;
function THTMLLinkScanDocumentCollection.SearchDocument(const DocName: string): PHTMLLinkScanDocument;
var D,P: PHTMLLinkScanDocument;
Index: sw_integer;
begin
New(D, Init(DocName));
if Search(D, Index)=false then P:=nil else
P:=At(Index);
Dispose(D, Done);
SearchDocument:=P;
end;
constructor THTMLLinkScanner.Init;
begin
inherited Init;
New(Documents, Init(50,100));
end;
function THTMLLinkScanner.GetDocumentCount: sw_integer;
begin
GetDocumentCount:=Documents^.Count;
end;
function THTMLLinkScanner.GetDocumentURL(DocIndex: sw_integer): string;
begin
GetDocumentURL:=Documents^.At(DocIndex)^.GetName;
end;
function THTMLLinkScanner.GetDocumentAliasCount(DocIndex: sw_integer): sw_integer;
begin
GetDocumentAliasCount:=Documents^.At(DocIndex)^.GetAliasCount;
end;
function THTMLLinkScanner.GetDocumentAlias(DocIndex, AliasIndex: sw_integer): string;
begin
GetDocumentAlias:=Documents^.At(DocIndex)^.GetAlias(AliasIndex);
end;
procedure THTMLLinkScanner.AddLink(const LinkText, LinkURL: string);
var D: PHTMLLinkScanDocument;
begin
D:=Documents^.SearchDocument(LinkURL);
if D=nil then
begin
New(D, Init(LinkURL));
Documents^.Insert(D);
end;
D^.AddAlias(LinkText);
end;
procedure THTMLLinkScanner.StoreDocuments(var S: TStream);
begin
Documents^.Store(S);
end;
destructor THTMLLinkScanner.Done;
begin
inherited Done;
if Assigned(Documents) then Dispose(Documents, Done); Documents:=nil;
end;
constructor THTMLLinkScanFile.Init(const ADocumentURL: string);
begin
inherited Init;
SetStr(DocumentURL,ADocumentURL);
end;
function THTMLLinkScanFile.GetDocumentURL: string;
begin
GetDocumentURL:=GetStr(DocumentURL);
end;
destructor THTMLLinkScanFile.Done;
begin
inherited Done;
if Assigned(DocumentURL) then DisposeStr(DocumentURL); DocumentURL:=nil;
end;
function THTMLLinkScanFileCollection.At(Index: sw_Integer): PHTMLLinkScanFile;
begin
At:=inherited At(Index);
end;
function THTMLLinkScanFileCollection.Compare(Key1, Key2: Pointer): sw_Integer;
var R: integer;
K1: PHTMLLinkScanFile absolute Key1;
K2: PHTMLLinkScanFile absolute Key2;
S1,S2: string;
begin
S1:=UpcaseStr(K1^.GetDocumentURL); S2:=UpcaseStr(K2^.GetDocumentURL);
if S1<S2 then R:=-1 else
if S1>S2 then R:= 1 else
R:=0;
Compare:=R;
end;
function THTMLLinkScanFileCollection.SearchFile(const DocURL: string): PHTMLLinkScanFile;
var P,D: PHTMLLinkScanFile;
Index: sw_integer;
begin
New(D, Init(DocURL));
if Search(D,Index)=false then P:=nil else
P:=At(Index);
Dispose(D, Done);
SearchFile:=P;
end;
function THTMLLinkScanFileCollection.FindFileWithState(AState: THTMLLinkScanState): PHTMLLinkScanFile;
var I: sw_integer;
P,D: PHTMLLinkScanFile;
begin
P:=nil;
for I:=0 to Count-1 do
begin
D:=At(I);
if D^.State=AState then
begin
P:=D;
Break;
end;
end;
FindFileWithState:=P;
end;
constructor THTMLFileLinkScanner.Init;
begin
inherited Init;
New(DocumentFiles, Init(50,100));
end;
procedure THTMLFileLinkScanner.ProcessDocument(const DocumentURL: string; AOptions: THTMLLinkScanOptions);
var P: PHTMLLinkScanFile;
begin
CurBaseURL:=''; Options:=AOptions;
ScheduleDoc(DocumentURL);
repeat
P:=DocumentFiles^.FindFileWithState(ssScheduled);
if Assigned(P) then
ProcessDoc(P);
until P=nil;
end;
function THTMLFileLinkScanner.GetDocumentBaseURL: string;
begin
GetDocumentBaseURL:=CurBaseURL;
end;
function THTMLFileLinkScanner.CheckURL(const URL: string): boolean;
var OK: boolean;
begin
if soSubDocsOnly in Options then
OK:=UpcaseStr(copy(URL,1,length(BaseURL)))=UpcaseStr(BaseURL)
else
OK:=true;
CheckURL:=OK;
end;
procedure THTMLFileLinkScanner.AddLink(const LinkText, LinkURL: string);
var D: PHTMLLinkScanFile;
begin
D:=DocumentFiles^.SearchFile(LinkURL);
if Assigned(D)=false then
ScheduleDoc(LinkURL);
inherited AddLink(LinkText,LinkURL);
end;
procedure THTMLFileLinkScanner.ProcessDoc(Doc: PHTMLLinkScanFile);
var F: PDOSTextFile;
begin
if Assigned(Doc)=false then Exit;
Doc^.State:=ssProcessing;
New(F, Init(Doc^.GetDocumentURL));
if Assigned(F) then
begin
CurBaseURL:=CompleteURL(Doc^.GetDocumentURL,'');
Process(F);
Dispose(F, Done);
end;
Doc^.State:=ssScanned;
end;
procedure THTMLFileLinkScanner.ScheduleDoc(const DocumentURL: string);
var D: PHTMLLinkScanFile;
begin
New(D, Init(DocumentURL));
D^.State:=ssScheduled;
DocumentFiles^.Insert(D);
end;
destructor THTMLFileLinkScanner.Done;
begin
inherited Done;
if Assigned(DocumentFiles) then Dispose(DocumentFiles, Done); DocumentFiles:=nil;
end;
procedure RegisterWHTMLScan;
begin
RegisterType(RHTMLLinkScanDocument);
end;
END.
{
$Log$
Revision 1.1 2000-04-25 08:42:32 pierre
* New Gabor changes : see fixes.txt
}