* htmlindexer can use generics Red black tree (for testing, under ifdef)

* some fixes to chm debug statements in chmreader

git-svn-id: trunk@13064 -
This commit is contained in:
marco 2009-04-30 15:37:22 +00:00
parent 2635212ebc
commit 1c3daf1568
2 changed files with 98 additions and 6 deletions

View File

@ -740,11 +740,13 @@ begin
GetDirectoryChunk(NextIndex, ChunkStream); GetDirectoryChunk(NextIndex, ChunkStream);
NextIndex := -1; NextIndex := -1;
ReadQuickRefSection; ReadQuickRefSection;
//WriteLn('In Block ', ChunkIndex); {$IFDEF CHM_DEBUG}
WriteLn('In Block ', NextIndex);
{$endif}
case ChunkType(ChunkStream) of case ChunkType(ChunkStream) of
ctUnknown: // something is wrong ctUnknown: // something is wrong
begin begin
{$IFDEF CHM_DEBUG}WriteLn(ChunkIndex, ' << Unknown BlockType!');{$ENDIF} {$IFDEF CHM_DEBUG}WriteLn(NextIndex, ' << Unknown BlockType!');{$ENDIF}
Break; Break;
end; end;
ctPMGI: // we must follow the PMGI tree until we reach a PMGL block ctPMGI: // we must follow the PMGI tree until we reach a PMGL block

View File

@ -21,9 +21,10 @@
unit HTMLIndexer; unit HTMLIndexer;
{$MODE OBJFPC}{$H+} {$MODE OBJFPC}{$H+}
interface interface
uses Classes, SysUtils, FastHTMLParser,avl_tree; uses Classes, SysUtils, FastHTMLParser,{$ifdef userb}fos_redblacktree_gen{$else}avl_tree{$endif};
Type Type
{ TIndexDocument } { TIndexDocument }
TIndexDocument = class(TObject) TIndexDocument = class(TObject)
private private
@ -61,6 +62,10 @@ Type
{ TIndexedWordList } { TIndexedWordList }
{$ifdef userb}
TRBIndexTree = specialize TGFOS_RBTree<String,TIndexedWord>;
{$endif}
TForEachMethod = procedure (AWord:TIndexedWord) of object; TForEachMethod = procedure (AWord:TIndexedWord) of object;
TForEachProcedure = Procedure (AWord:TIndexedWord;state:pointer); TForEachProcedure = Procedure (AWord:TIndexedWord;state:pointer);
TIndexedWordList = class(TObject) TIndexedWordList = class(TObject)
@ -80,8 +85,13 @@ Type
FTotalWordLength: DWord; FTotalWordLength: DWord;
FLongestWord: DWord; FLongestWord: DWord;
FParser: THTMLParser; FParser: THTMLParser;
{$ifdef userb}
FAVLTree : TRBIndexTree;
{$else}
FAVLTree : TAVLTree; FAVLTree : TAVLTree;
Spare :TIndexedWord; Spare :TIndexedWord;
{$endif}
function AddGetWord(AWord: String; IsTitle: Boolean): TIndexedWord; function AddGetWord(AWord: String; IsTitle: Boolean): TIndexedWord;
// callbacks // callbacks
procedure CBFoundTag(NoCaseTag, ActualTag: string); procedure CBFoundTag(NoCaseTag, ActualTag: string);
@ -117,6 +127,14 @@ begin
Result := BNumber; Result := BNumber;
end; end;
const titlexlat : array [boolean] of char = ('0','1');
function makekey( n : string;istitle:boolean):string; inline;
begin
result:=n+'___'+titlexlat[istitle];
end;
Function CompareProcObj(Node1, Node2: Pointer): integer; Function CompareProcObj(Node1, Node2: Pointer): integer;
var n1,n2 : TIndexedWord; var n1,n2 : TIndexedWord;
begin begin
@ -133,10 +151,18 @@ end;
{ TIndexedWordList } { TIndexedWordList }
function TIndexedWordList.AddGetWord(AWord: String; IsTitle: Boolean): TIndexedWord; function TIndexedWordList.AddGetWord(AWord: String; IsTitle: Boolean): TIndexedWord;
var var
{$ifdef userb}
key : string;
{$else}
n : TAVLTreeNode; n : TAVLTreeNode;
{$endif}
begin begin
Result := nil; Result := nil;
AWord := LowerCase(AWord); AWord := LowerCase(AWord);
{$ifdef userb}
key:=makekey(aword,istitle);
if not favltree.Find(key,result) then result:=nil;;
{$else}
if not assigned(spare) then if not assigned(spare) then
spare:=TIndexedWord.Create(AWord,IsTitle) spare:=TIndexedWord.Create(AWord,IsTitle)
else else
@ -148,14 +174,21 @@ begin
n:=favltree.FindKey(Spare,@CompareProcObj); n:=favltree.FindKey(Spare,@CompareProcObj);
if assigned(n) then if assigned(n) then
result:=TIndexedWord(n.Data); result:=TIndexedWord(n.Data);
{$endif}
if Result = nil then if Result = nil then
begin begin
Inc(FTotalDifferentWordLength, Length(AWord)); Inc(FTotalDifferentWordLength, Length(AWord));
Inc(FTotalDIfferentWords); Inc(FTotalDIfferentWords);
{$ifdef userb}
result:=TIndexedWord.Create(AWord,IsTitle);
favltree.add(key,result);
{$else}
Result := spare; // TIndexedWord.Create(AWord,IsTitle); Result := spare; // TIndexedWord.Create(AWord,IsTitle);
spare:=nil; spare:=nil;
AddWord(Result); AddWord(Result);
{$endif}
// if IsTitle then // if IsTitle then
//WriteLn('Creating word: ', AWord); //WriteLn('Creating word: ', AWord);
FLongestWord := Max(FLongestWord, Length(AWord)); FLongestWord := Max(FLongestWord, Length(AWord));
@ -256,17 +289,37 @@ begin
end; end;
end; end;
function defaultindexedword : TIndexedWord;
begin
result:=Tindexedword.create('',false);
end;
constructor TIndexedWordList.Create; constructor TIndexedWordList.Create;
begin begin
inherited; inherited;
{$ifdef userb}
FAVLTree :=TRBIndexTree.create(@default_rb_string_compare,
@defaultindexedword,
@default_rb_string_undef );
{$else}
favltree:=TAVLTree.Create(@CompareProcObj); favltree:=TAVLTree.Create(@CompareProcObj);
spare:=nil; spare:=nil;
{$endif}
end; end;
procedure FreeObject(const Obj:TIndexedWord);
begin
obj.free;
end;
destructor TIndexedWordList.Destroy; destructor TIndexedWordList.Destroy;
begin begin
clear; clear;
{$ifndef userb}
if assigned(spare) then spare.free; if assigned(spare) then spare.free;
{$endif}
favltree.free; favltree.free;
inherited Destroy; inherited Destroy;
end; end;
@ -305,36 +358,73 @@ end;
procedure TIndexedWordList.Clear; procedure TIndexedWordList.Clear;
begin begin
{$ifdef userb}
fAvlTree.ClearN(@FreeObject);
{$else}
fAvlTree.FreeAndClear; fAvlTree.FreeAndClear;
{$endif}
end; end;
procedure TIndexedWordList.AddWord(const AWord: TIndexedWord); procedure TIndexedWordList.AddWord(const AWord: TIndexedWord);
begin begin
favltree.add(AWord); {$ifdef userb}
favltree.add(makekey(aword.theword,aword.istitle),AWord);
{$else}
favltree.add(aword);
{$endif}
end; end;
procedure TIndexedWordList.ForEach(Proc:TForEachMethod); procedure TIndexedWordList.ForEach(Proc:TForEachMethod);
{$ifdef userb}
var key : string;
val:TIndexedWord;
{$else}
var var
AVLNode : TAVLTreeNode; AVLNode : TAVLTreeNode;
{$endif}
begin begin
{$ifdef userb}
if favltree.FirstNode(key,val) then
begin // Scan it forward
repeat
proc(val);
until not favltree.FindNext(key,val);
end;
{$else}
AVLNode:=fAVLTree.FindLowest; AVLNode:=fAVLTree.FindLowest;
while (AVLNode<>nil) do while (AVLNode<>nil) do
begin begin
Proc(TIndexedWord(AVLNode.Data)); Proc(TIndexedWord(AVLNode.Data));
AVLNode:=FAVLTree.FindSuccessor(AVLNode) AVLNode:=FAVLTree.FindSuccessor(AVLNode)
end; end;
{$endif}
end; end;
procedure TIndexedWordList.ForEach(Proc:TForEachProcedure;state:pointer); procedure TIndexedWordList.ForEach(Proc:TForEachProcedure;state:pointer);
{$ifdef userb}
var key : string;
val:TIndexedWord;
{$else}
var var
AVLNode : TAVLTreeNode; AVLNode : TAVLTreeNode;
{$endif}
begin begin
{$ifdef userb}
if favltree.FirstNode(key,val) then
begin // Scan it forward
repeat
proc(val,state);
until not favltree.FindNext(key,val);
end;
{$else}
AVLNode:=fAVLTree.FindLowest; AVLNode:=fAVLTree.FindLowest;
while (AVLNode<>nil) do while (AVLNode<>nil) do
begin begin
Proc(TIndexedWord(AVLNode.Data),State); Proc(TIndexedWord(AVLNode.Data),State);
AVLNode:=FAVLTree.FindSuccessor(AVLNode) AVLNode:=FAVLTree.FindSuccessor(AVLNode)
end; end;
{$endif}
end; end;
{ TIndexedWord } { TIndexedWord }