+ added lastsynonym and InNameAnchor fields to TCustomHTMLLinkScanner

these allow to eliminate double index entries pointing to the same
    html file location (which had two different names).
This commit is contained in:
pierre 2002-04-23 09:55:22 +00:00
parent aa66edd3c9
commit 63ec8ab5bf
2 changed files with 96 additions and 10 deletions

View File

@ -1192,11 +1192,11 @@ begin
begin
LS^.SetBaseDir(DirOf(IndexFileName));
for I:=0 to LS^.GetDocumentCount-1 do
for J:=0 to LS^.GetDocumentAliasCount(I)-1 do
begin
TLI:=TopicLinks^.AddItem(LS^.GetDocumentURL(I));
TLI:=EncodeHTMLCtx(ID,TLI+1);
IndexEntries^.Insert(NewIndexEntry(FormatAlias(LS^.GetDocumentAlias(I,J)),ID,TLI));
for J:=0 to LS^.GetDocumentAliasCount(I)-1 do
IndexEntries^.Insert(NewIndexEntry(FormatAlias(LS^.GetDocumentAlias(I,J)),ID,TLI));
end;
Dispose(LS, Done);
end;
@ -1233,7 +1233,12 @@ end;
END.
{
$Log$
Revision 1.4 2002-04-11 07:04:23 pierre
Revision 1.5 2002-04-23 09:55:22 pierre
+ added lastsynonym and InNameAnchor fields to TCustomHTMLLinkScanner
these allow to eliminate double index entries pointing to the same
html file location (which had two different names).
Revision 1.4 2002/04/11 07:04:23 pierre
+ handle tables
Revision 1.3 2002/03/20 17:16:11 pierre

View File

@ -22,10 +22,11 @@ uses Objects,
const
HTMLIndexMagicNo = ord('H')+ord('H') shl 8+ord('I') shl 16+ord('X') shl 24;
HTMLIndexVersion = 1;
HTMLIndexVersion = 2;
type
PHTMLLinkScanner = ^THTMLLinkScanner;
PHTMLLinkScanDocument = ^THTMLLinkScanDocument;
TCustomHTMLLinkScanner = object(THTMLParser)
function DocAddTextChar(C: char): boolean; virtual;
@ -39,13 +40,14 @@ type
CurLinkText: string;
CurURL: string;
CurDoc: string;
InAnchor: boolean;
InAnchor,InNameAnchor: boolean;
LastSynonym: PHTMLLinkScanDocument;
end;
PHTMLLinkScanDocument = ^THTMLLinkScanDocument;
THTMLLinkScanDocument = object(TObject)
constructor Init(const ADocName: string);
function GetName: string;
function GetUniqueName: string;
function GetAliasCount: sw_integer;
function GetAlias(Index: sw_integer): string;
procedure AddAlias(const Alias: string);
@ -54,6 +56,7 @@ type
destructor Done; virtual;
private
DocName: PString;
Synonym: PHTMLLinkScanDocument;
Aliases: PStringCollection;
end;
@ -63,6 +66,7 @@ type
function Compare(Key1, Key2: Pointer): sw_Integer; virtual;
function At(Index: sw_Integer): PHTMLLinkScanDocument;
function SearchDocument(const DocName: string): PHTMLLinkScanDocument;
procedure MoveAliasesToSynonym;
private
Scanner: PHTMLLinkScanner;
end;
@ -72,6 +76,7 @@ type
procedure SetBaseDir(const ABaseDir: string);
function GetDocumentCount: sw_integer;
function GetDocumentURL(DocIndex: sw_integer): string;
function GetUniqueDocumentURL(DocIndex: sw_integer): string;
function GetDocumentAliasCount(DocIndex: sw_integer): sw_integer;
function GetDocumentAlias(DocIndex, AliasIndex: sw_integer): string;
constructor LoadDocuments(var S: TStream);
@ -142,6 +147,9 @@ const
Store: @THTMLLinkScanDocument.Store
);
const
CurrentHTMLIndexVersion : sw_integer = HTMLIndexVersion;
function TCustomHTMLLinkScanner.DocAddTextChar(C: char): boolean;
var Added: boolean;
begin
@ -151,6 +159,8 @@ begin
CurLinkText:=CurLinkText+C;
Added:=true;
end;
if ord(c)>32 then
LastSynonym:=nil;
DocAddTextChar:=Added;
end;
@ -161,7 +171,11 @@ begin
CurLinkText:='';
if DocGetTagParam('HREF',CurURL)=false then
if DocGetTagParam('NAME',CurURL) then
CurURL:=CurDoc+'#'+CurURL
begin
InNameAnchor:=true;
If Pos('#',CurURL)=0 then
CurURL:=CurDoc+'#'+CurURL;
end
else
CurURL:='';
CurURL:=Trim(CurURL);
@ -170,8 +184,9 @@ begin
else
begin
CurLinkText:=Trim(CurLinkText);
if CheckURL(CurURL) and CheckText(CurLinkText) then
if CheckURL(CurURL) and CheckText(CurLinkText) or InNameAnchor then
AddLink(CurLinkText,CurURL);
InNameAnchor:=false;
end;
InAnchor:=Entered;
end;
@ -204,6 +219,7 @@ begin
inherited Init;
SetStr(DocName,ADocName);
New(Aliases, Init(10,10));
Synonym:=nil;
end;
function THTMLLinkScanDocument.GetName: string;
@ -211,6 +227,17 @@ begin
GetName:=GetStr(DocName);
end;
function THTMLLinkScanDocument.GetUniqueName: string;
var
PD: PHTMLLinkScanDocument;
begin
PD:=@Self;
while assigned(PD^.synonym) do
PD:=PD^.Synonym;
GetUniqueName:=GetStr(PD^.DocName);
end;
function THTMLLinkScanDocument.GetAliasCount: sw_integer;
begin
GetAliasCount:=Aliases^.Count;
@ -284,6 +311,28 @@ begin
SearchDocument:=P;
end;
procedure THTMLLinkScanDocumentCollection.MoveAliasesToSynonym;
procedure MoveAliases(P: PHTMLLinkScanDocument);
var
PD: PHTMLLinkScanDocument;
i: sw_integer;
begin
if not assigned(P^.synonym) then
exit;
PD:=P;
while assigned(PD^.synonym) do
PD:=PD^.Synonym;
For i:=P^.GetAliasCount-1 downto 0 do
begin
PD^.AddAlias(P^.GetAlias(i));
P^.Aliases^.AtFree(i);
end;
end;
begin
ForEach(@MoveAliases);
end;
constructor THTMLLinkScanner.Init(const ABaseDir: string);
begin
inherited Init;
@ -322,6 +371,11 @@ begin
GetDocumentURL:=ExpandChildURL(Documents^.At(DocIndex)^.GetName);
end;
function THTMLLinkScanner.GetUniqueDocumentURL(DocIndex: sw_integer): string;
begin
GetUniqueDocumentURL:=ExpandChildURL(Documents^.At(DocIndex)^.GetUniqueName);
end;
function THTMLLinkScanner.GetDocumentAliasCount(DocIndex: sw_integer): sw_integer;
begin
GetDocumentAliasCount:=Documents^.At(DocIndex)^.GetAliasCount;
@ -334,6 +388,10 @@ end;
procedure THTMLLinkScanner.AddLink(const LinkText, LinkURL: string);
var D: PHTMLLinkScanDocument;
DoInsert: boolean;
int: sw_integer;
Text: string;
error: word;
begin
D:=Documents^.SearchDocument(LinkURL);
if D=nil then
@ -341,7 +399,20 @@ begin
New(D, Init(NormalizeChildURL(LinkURL)));
Documents^.Insert(D);
end;
D^.AddAlias(LinkText);
If assigned(LastSynonym) then
LastSynonym^.Synonym:=D;
DoInsert:=true;
If (length(LinkText)=0) or (Pos(',',LinkText)=1) then
DoInsert:=false;
Val(LinkText,int,error);
If (Error>1) and (LinkText[Error]=' ') then
Text:=Trim(Copy(LinkText,error+1,length(LinkText)))
else
Text:=LinkText;
IF DoInsert then
D^.AddAlias(Text);
If InNameAnchor then
LastSynonym:=D;
end;
constructor THTMLLinkScanner.LoadDocuments(var S: TStream);
@ -355,6 +426,7 @@ begin
if (S.Status=stOK) and (L=HTMLIndexMagicNo) then
begin
S.Read(L,sizeof(L));
CurrentHTMLIndexVersion:=L;
OK:=(S.Status=stOK);
end;
if not OK then
@ -367,6 +439,8 @@ begin
New(Documents, Load(S));
if not Assigned(Documents) then
Fail;
Documents^.MoveAliasesToSynonym;
CurrentHTMLIndexVersion:=HTMLIndexVersion;
end;
procedure THTMLLinkScanner.StoreDocuments(var S: TStream);
@ -375,8 +449,10 @@ begin
L:=HTMLIndexMagicNo;
S.Write(L,sizeof(L));
L:=HTMLIndexVersion;
CurrentHTMLIndexVersion:=L;
S.Write(L,sizeof(L));
S.WriteStr(BaseDir);
Documents^.MoveAliasesToSynonym;
Documents^.Store(S);
end;
@ -537,7 +613,12 @@ end;
END.
{
$Log$
Revision 1.2 2002-04-11 07:06:31 pierre
Revision 1.3 2002-04-23 09:55:22 pierre
+ added lastsynonym and InNameAnchor fields to TCustomHTMLLinkScanner
these allow to eliminate double index entries pointing to the same
html file location (which had two different names).
Revision 1.2 2002/04/11 07:06:31 pierre
+ recreate the full target of an anchor that only has a NAME field
Revision 1.1 2001/08/04 11:30:26 peter