* htmlindexer can use generics Red black tree (for testing, under ifdef)

* some fixes to chm debug statements in chmreader

git-svn-id: trunk@13064 -
This commit is contained in:
marco 2009-04-30 15:37:22 +00:00
parent 2635212ebc
commit 1c3daf1568
2 changed files with 98 additions and 6 deletions

View File

@ -740,11 +740,13 @@ begin
GetDirectoryChunk(NextIndex, ChunkStream);
NextIndex := -1;
ReadQuickRefSection;
//WriteLn('In Block ', ChunkIndex);
{$IFDEF CHM_DEBUG}
WriteLn('In Block ', NextIndex);
{$endif}
case ChunkType(ChunkStream) of
ctUnknown: // something is wrong
begin
{$IFDEF CHM_DEBUG}WriteLn(ChunkIndex, ' << Unknown BlockType!');{$ENDIF}
{$IFDEF CHM_DEBUG}WriteLn(NextIndex, ' << Unknown BlockType!');{$ENDIF}
Break;
end;
ctPMGI: // we must follow the PMGI tree until we reach a PMGL block

View File

@ -21,9 +21,10 @@
unit HTMLIndexer;
{$MODE OBJFPC}{$H+}
interface
uses Classes, SysUtils, FastHTMLParser,avl_tree;
uses Classes, SysUtils, FastHTMLParser,{$ifdef userb}fos_redblacktree_gen{$else}avl_tree{$endif};
Type
{ TIndexDocument }
TIndexDocument = class(TObject)
private
@ -61,6 +62,10 @@ Type
{ TIndexedWordList }
{$ifdef userb}
TRBIndexTree = specialize TGFOS_RBTree<String,TIndexedWord>;
{$endif}
TForEachMethod = procedure (AWord:TIndexedWord) of object;
TForEachProcedure = Procedure (AWord:TIndexedWord;state:pointer);
TIndexedWordList = class(TObject)
@ -80,8 +85,13 @@ Type
FTotalWordLength: DWord;
FLongestWord: DWord;
FParser: THTMLParser;
{$ifdef userb}
FAVLTree : TRBIndexTree;
{$else}
FAVLTree : TAVLTree;
Spare :TIndexedWord;
{$endif}
function AddGetWord(AWord: String; IsTitle: Boolean): TIndexedWord;
// callbacks
procedure CBFoundTag(NoCaseTag, ActualTag: string);
@ -117,6 +127,14 @@ begin
Result := BNumber;
end;
const titlexlat : array [boolean] of char = ('0','1');
function makekey( n : string;istitle:boolean):string; inline;
begin
result:=n+'___'+titlexlat[istitle];
end;
Function CompareProcObj(Node1, Node2: Pointer): integer;
var n1,n2 : TIndexedWord;
begin
@ -133,10 +151,18 @@ end;
{ TIndexedWordList }
function TIndexedWordList.AddGetWord(AWord: String; IsTitle: Boolean): TIndexedWord;
var
{$ifdef userb}
key : string;
{$else}
n : TAVLTreeNode;
{$endif}
begin
Result := nil;
AWord := LowerCase(AWord);
{$ifdef userb}
key:=makekey(aword,istitle);
if not favltree.Find(key,result) then result:=nil;;
{$else}
if not assigned(spare) then
spare:=TIndexedWord.Create(AWord,IsTitle)
else
@ -144,18 +170,25 @@ begin
spare.TheWord:=aword;
spare.IsTitle:=IsTitle;
end;
n:=favltree.FindKey(Spare,@CompareProcObj);
if assigned(n) then
result:=TIndexedWord(n.Data);
{$endif}
if Result = nil then
begin
Inc(FTotalDifferentWordLength, Length(AWord));
Inc(FTotalDIfferentWords);
{$ifdef userb}
result:=TIndexedWord.Create(AWord,IsTitle);
favltree.add(key,result);
{$else}
Result := spare; // TIndexedWord.Create(AWord,IsTitle);
spare:=nil;
AddWord(Result);
{$endif}
// if IsTitle then
//WriteLn('Creating word: ', AWord);
FLongestWord := Max(FLongestWord, Length(AWord));
@ -256,17 +289,37 @@ begin
end;
end;
function defaultindexedword : TIndexedWord;
begin
result:=Tindexedword.create('',false);
end;
constructor TIndexedWordList.Create;
begin
inherited;
{$ifdef userb}
FAVLTree :=TRBIndexTree.create(@default_rb_string_compare,
@defaultindexedword,
@default_rb_string_undef );
{$else}
favltree:=TAVLTree.Create(@CompareProcObj);
spare:=nil;
{$endif}
end;
procedure FreeObject(const Obj:TIndexedWord);
begin
obj.free;
end;
destructor TIndexedWordList.Destroy;
begin
clear;
{$ifndef userb}
if assigned(spare) then spare.free;
{$endif}
favltree.free;
inherited Destroy;
end;
@ -305,36 +358,73 @@ end;
procedure TIndexedWordList.Clear;
begin
{$ifdef userb}
fAvlTree.ClearN(@FreeObject);
{$else}
fAvlTree.FreeAndClear;
{$endif}
end;
procedure TIndexedWordList.AddWord(const AWord: TIndexedWord);
begin
favltree.add(AWord);
{$ifdef userb}
favltree.add(makekey(aword.theword,aword.istitle),AWord);
{$else}
favltree.add(aword);
{$endif}
end;
procedure TIndexedWordList.ForEach(Proc:TForEachMethod);
{$ifdef userb}
var key : string;
val:TIndexedWord;
{$else}
var
AVLNode : TAVLTreeNode;
{$endif}
begin
{$ifdef userb}
if favltree.FirstNode(key,val) then
begin // Scan it forward
repeat
proc(val);
until not favltree.FindNext(key,val);
end;
{$else}
AVLNode:=fAVLTree.FindLowest;
while (AVLNode<>nil) do
begin
Proc(TIndexedWord(AVLNode.Data));
AVLNode:=FAVLTree.FindSuccessor(AVLNode)
end;
{$endif}
end;
procedure TIndexedWordList.ForEach(Proc:TForEachProcedure;state:pointer);
{$ifdef userb}
var key : string;
val:TIndexedWord;
{$else}
var
AVLNode : TAVLTreeNode;
{$endif}
begin
{$ifdef userb}
if favltree.FirstNode(key,val) then
begin // Scan it forward
repeat
proc(val,state);
until not favltree.FindNext(key,val);
end;
{$else}
AVLNode:=fAVLTree.FindLowest;
while (AVLNode<>nil) do
begin
Proc(TIndexedWord(AVLNode.Data),State);
AVLNode:=FAVLTree.FindSuccessor(AVLNode)
end;
{$endif}
end;
{ TIndexedWord }