mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-09-26 06:32:20 +02:00
* Fixed several chm bugs.
* Now searching for particular words is supported instead of a full dump of the index. * Generated files don't crash the MS reader when searching. git-svn-id: trunk@12119 -
This commit is contained in:
parent
17a08efb82
commit
d423812928
@ -124,14 +124,14 @@ type
|
|||||||
procedure MoveToRootNode;
|
procedure MoveToRootNode;
|
||||||
procedure MoveToNode(ANodeOffset: DWord; ANodeDepth: Integer);
|
procedure MoveToNode(ANodeOffset: DWord; ANodeDepth: Integer);
|
||||||
function ReadWordOrPartialWord(ALastWord: String): String; // returns the whole word using the last word as a base
|
function ReadWordOrPartialWord(ALastWord: String): String; // returns the whole word using the last word as a base
|
||||||
procedure ReadRootNodeEntry(ALastWord: String; out AWord: String; out ASubNodeStart: DWord);
|
function ReadIndexNodeEntry(ALastWord: String; out AWord: String; out ASubNodeStart: DWord): Boolean;
|
||||||
function ReadLeafNodeEntry(ALastWord: String; out AWord: String; out AInTitle: Boolean; out AWLCCount: DWord; out AWLCOffset: DWord; out AWLCSize: DWord): Boolean;
|
function ReadLeafNodeEntry(ALastWord: String; out AWord: String; out AInTitle: Boolean; out AWLCCount: DWord; out AWLCOffset: DWord; out AWLCSize: DWord): Boolean;
|
||||||
function ReadWLCEntries(AWLCCount: DWord; AWLCOffset: DWord; AWLCSize: DWord): TChmWLCTopicArray;
|
function ReadWLCEntries(AWLCCount: DWord; AWLCOffset: DWord; AWLCSize: DWord): TChmWLCTopicArray;
|
||||||
public
|
public
|
||||||
constructor Create(AStream: TStream; AFreeStreamOnDestroy: Boolean);
|
constructor Create(AStream: TStream; AFreeStreamOnDestroy: Boolean);
|
||||||
destructor Destroy; override;
|
destructor Destroy; override;
|
||||||
procedure DumpData(AFoundDataEvent: TChmSearchReaderFoundDataEvent);
|
procedure DumpData(AFoundDataEvent: TChmSearchReaderFoundDataEvent);
|
||||||
function LookupWord(AWord: String): TChmWLCTopicArray;
|
function LookupWord(AWord: String; out ATitleHits: TChmWLCTopicArray): TChmWLCTopicArray;
|
||||||
property FileIsValid: Boolean read FFileIsValid;
|
property FileIsValid: Boolean read FFileIsValid;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
@ -172,6 +172,44 @@ type
|
|||||||
property LocRootSize: Byte read FLocRootSize write FLocRootSize;
|
property LocRootSize: Byte read FLocRootSize write FLocRootSize;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
function GetCompressedIntegerBE(Stream: TStream): DWord;
|
||||||
|
var
|
||||||
|
Buf: Byte;
|
||||||
|
Value: Dword = 0;
|
||||||
|
Shift: Integer = 0;
|
||||||
|
begin
|
||||||
|
repeat
|
||||||
|
Buf := Stream.ReadByte;
|
||||||
|
Value := Value or (Buf and $7F) shl Shift;
|
||||||
|
Inc(Shift, 7);
|
||||||
|
until (Buf and $80) = 0;
|
||||||
|
Result := Value;
|
||||||
|
end;
|
||||||
|
|
||||||
|
procedure WriteCompressedIntegerBE(Stream: TStream; AInt: DWord);
|
||||||
|
var
|
||||||
|
Bits: Integer;
|
||||||
|
Tmp: DWord;
|
||||||
|
Buf: Byte;
|
||||||
|
begin
|
||||||
|
Tmp := AInt;
|
||||||
|
Bits := 0;
|
||||||
|
while Tmp <> 0 do
|
||||||
|
begin
|
||||||
|
Tmp := Tmp shr 1;
|
||||||
|
Inc(Bits);
|
||||||
|
end;
|
||||||
|
|
||||||
|
repeat
|
||||||
|
Buf := (AInt shr (Tmp * 7)) and $7F;
|
||||||
|
if Bits > 7 then
|
||||||
|
Buf := Buf or $80;
|
||||||
|
Dec(Bits, 7);
|
||||||
|
Inc(Tmp);
|
||||||
|
Stream.WriteByte(Buf);
|
||||||
|
until Bits <= 0;
|
||||||
|
end;
|
||||||
|
|
||||||
function WriteScaleRootInt(ANumber: DWord; out Bits: DWord; Root: Integer): Byte;
|
function WriteScaleRootInt(ANumber: DWord; out Bits: DWord; Root: Integer): Byte;
|
||||||
var
|
var
|
||||||
Tmp: DWord;
|
Tmp: DWord;
|
||||||
@ -486,14 +524,14 @@ begin
|
|||||||
FBlockStream.WriteByte(Offset);
|
FBlockStream.WriteByte(Offset);
|
||||||
FBlockStream.Write(NewWord[1], Length(Trim(NewWord)));
|
FBlockStream.Write(NewWord[1], Length(Trim(NewWord)));
|
||||||
FBlockStream.WriteByte(Ord(AWord.IsTitle));
|
FBlockStream.WriteByte(Ord(AWord.IsTitle));
|
||||||
WriteCompressedInteger(FBlockStream, AWord.DocumentCount);
|
WriteCompressedIntegerBE(FBlockStream, AWord.DocumentCount);
|
||||||
FBlockStream.WriteDWord(NtoLE(DWord(FWriteStream.Position)));
|
FBlockStream.WriteDWord(NtoLE(DWord(FWriteStream.Position)));
|
||||||
FBlockStream.WriteWord(0);
|
FBlockStream.WriteWord(0);
|
||||||
|
|
||||||
// write WLC to FWriteStream so we can write the size of the wlc entries
|
// write WLC to FWriteStream so we can write the size of the wlc entries
|
||||||
WLCSize := WriteWLCEntries(AWord, FDocRootSize, FCodeRootSize, FLocRootSize);
|
WLCSize := WriteWLCEntries(AWord, FDocRootSize, FCodeRootSize, FLocRootSize);
|
||||||
|
|
||||||
WriteCompressedInteger(FBlockStream, WLCSize);
|
WriteCompressedIntegerBE(FBlockStream, WLCSize);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
function Min(AValue, BValue: Byte): Byte;
|
function Min(AValue, BValue: Byte): Byte;
|
||||||
@ -530,7 +568,6 @@ function TLeafNode.WriteWLCEntries ( AWord: TIndexedWord ; ADocRootSize, ACodeRo
|
|||||||
var
|
var
|
||||||
LastDocIndex: DWord;
|
LastDocIndex: DWord;
|
||||||
LastLocCode: DWord;
|
LastLocCode: DWord;
|
||||||
WLCLastWord: String;
|
|
||||||
UsedBits: Byte;
|
UsedBits: Byte;
|
||||||
Buf: Byte;
|
Buf: Byte;
|
||||||
function NewDocDelta(ADocIndex: DWord): DWord;
|
function NewDocDelta(ADocIndex: DWord): DWord;
|
||||||
@ -720,7 +757,7 @@ begin
|
|||||||
while NodeDepth > 1 do
|
while NodeDepth > 1 do
|
||||||
begin
|
begin
|
||||||
LastWord := '';
|
LastWord := '';
|
||||||
ReadRootNodeEntry(LastWord, NewWord, NodeOffset);
|
ReadIndexNodeEntry(LastWord, NewWord, NodeOffset);
|
||||||
Dec(NodeDepth);
|
Dec(NodeDepth);
|
||||||
MoveToNode(NodeOffset, NodeDepth);
|
MoveToNode(NodeOffset, NodeDepth);
|
||||||
end;
|
end;
|
||||||
@ -761,28 +798,30 @@ begin
|
|||||||
FStream.Read(Result[1+CopyLastWordCharCount], WordLength-1);
|
FStream.Read(Result[1+CopyLastWordCharCount], WordLength-1);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
procedure TChmSearchReader.ReadRootNodeEntry (ALastWord: String; out AWord: String; out
|
function TChmSearchReader.ReadIndexNodeEntry (ALastWord: String; out AWord: String; out
|
||||||
ASubNodeStart: DWord ) ;
|
ASubNodeStart: DWord ): Boolean;
|
||||||
begin
|
begin
|
||||||
|
Result := FStream.Position - FActiveNodeStart < FIFTI_NODE_SIZE - FActiveNodeFreeSpace;
|
||||||
|
if not Result then
|
||||||
|
Exit;
|
||||||
AWord := ReadWordOrPartialWord(ALastWord);
|
AWord := ReadWordOrPartialWord(ALastWord);
|
||||||
ASubNodeStart := LEtoN(FStream.ReadDWord);
|
ASubNodeStart := LEtoN(FStream.ReadDWord);
|
||||||
|
FStream.ReadWord;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
function TChmSearchReader.ReadLeafNodeEntry ( ALastWord: String; out
|
function TChmSearchReader.ReadLeafNodeEntry ( ALastWord: String; out
|
||||||
AWord: String; out AInTitle: Boolean; out AWLCCount: DWord; out
|
AWord: String; out AInTitle: Boolean; out AWLCCount: DWord; out
|
||||||
AWLCOffset: DWord; out AWLCSize: DWord ): Boolean;
|
AWLCOffset: DWord; out AWLCSize: DWord ): Boolean;
|
||||||
var
|
|
||||||
WordLength: Integer;
|
|
||||||
begin
|
begin
|
||||||
Result := FStream.Position - FActiveNodeStart < FIFTI_NODE_SIZE - FActiveNodeFreeSpace;
|
Result := FStream.Position - FActiveNodeStart < FIFTI_NODE_SIZE - FActiveNodeFreeSpace;
|
||||||
if not Result then
|
if not Result then
|
||||||
Exit;
|
Exit;
|
||||||
AWord := ReadWordOrPartialWord(ALastWord);
|
AWord := ReadWordOrPartialWord(ALastWord);
|
||||||
AInTitle := FStream.ReadByte = 1;
|
AInTitle := FStream.ReadByte = 1;
|
||||||
AWLCCount := GetCompressedInteger(FStream);
|
AWLCCount := GetCompressedIntegerBE(FStream);
|
||||||
AWLCOffset := LEtoN(FStream.ReadDWord);
|
AWLCOffset := LEtoN(FStream.ReadDWord);
|
||||||
FStream.ReadWord;
|
FStream.ReadWord;
|
||||||
AWLCSize := GetCompressedInteger(FStream);
|
AWLCSize := GetCompressedIntegerBE(FStream);
|
||||||
|
|
||||||
end;
|
end;
|
||||||
|
|
||||||
@ -857,13 +896,12 @@ var
|
|||||||
begin
|
begin
|
||||||
CachedStreamPos := FStream.Position;
|
CachedStreamPos := FStream.Position;
|
||||||
FStream.Position := AWLCOffset;
|
FStream.Position := AWLCOffset;
|
||||||
for i := 0 to AWLCSize-1 do
|
{for i := 0 to AWLCSize-1 do
|
||||||
begin
|
begin
|
||||||
Buf := FStream.ReadByte;
|
Buf := FStream.ReadByte;
|
||||||
Write(binStr(Buf, 8), ' ');
|
Write(binStr(Buf, 8), ' ');
|
||||||
end;
|
end;}
|
||||||
FStream.Position := AWLCOffset;
|
FStream.Position := AWLCOffset;
|
||||||
|
|
||||||
SetLength(Result, AWLCCount);
|
SetLength(Result, AWLCCount);
|
||||||
Buf := 0;
|
Buf := 0;
|
||||||
BitsInBuffer := 0;
|
BitsInBuffer := 0;
|
||||||
@ -932,7 +970,10 @@ begin
|
|||||||
end
|
end
|
||||||
else begin
|
else begin
|
||||||
LastWord := TheWord;
|
LastWord := TheWord;
|
||||||
|
//WriteLn('Reading Hits for ', TheWord ,' at ', hexstr(WLCOffset,8) );
|
||||||
FoundHits := ReadWLCEntries(WLCCount, WLCOffset, WLCSize);
|
FoundHits := ReadWLCEntries(WLCCount, WLCOffset, WLCSize);
|
||||||
|
//WriteLn('DONE Reading Hits for ', TheWord);
|
||||||
|
// AFoundDataEvent(Self, TheWord, 0,0);//FoundHits[i].TopicIndex ,-1);//FoundHits[i].LocationCodes[j]);
|
||||||
for i := 0 to High(FoundHits) do
|
for i := 0 to High(FoundHits) do
|
||||||
for j := 0 to High(FoundHits[i].LocationCodes) do
|
for j := 0 to High(FoundHits[i].LocationCodes) do
|
||||||
AFoundDataEvent(Self, TheWord, FoundHits[i].TopicIndex ,FoundHits[i].LocationCodes[j]);
|
AFoundDataEvent(Self, TheWord, FoundHits[i].TopicIndex ,FoundHits[i].LocationCodes[j]);
|
||||||
@ -940,16 +981,79 @@ begin
|
|||||||
until False; //FStream.Position - FActiveNodeStart >= FIFTI_NODE_SIZE - FActiveNodeFreeSpace
|
until False; //FStream.Position - FActiveNodeStart >= FIFTI_NODE_SIZE - FActiveNodeFreeSpace
|
||||||
end;
|
end;
|
||||||
|
|
||||||
function TChmSearchReader.LookupWord(AWord: String): TChmWLCTopicArray;
|
function TChmSearchReader.LookupWord(AWord: String; out ATitleHits: TChmWLCTopicArray): TChmWLCTopicArray;
|
||||||
|
var
|
||||||
|
LastWord: String;
|
||||||
|
NewWord: String;
|
||||||
|
NodeLevel: Integer;
|
||||||
|
NewNodePosition: DWord;
|
||||||
|
InTitle: Boolean;
|
||||||
|
WLCCount: DWord;
|
||||||
|
WLCOffset: DWord;
|
||||||
|
WLCSize: DWord;
|
||||||
|
CompareResult: Integer;
|
||||||
|
ReadNextResult: Boolean;
|
||||||
begin
|
begin
|
||||||
{ if not AIsReadyToReadWLC then
|
AWord := LowerCase(AWord);
|
||||||
begin
|
NodeLevel := FTreeDepth;
|
||||||
|
MoveToRootNode;
|
||||||
|
SetLength(Result, 0);
|
||||||
|
LastWord := '';
|
||||||
|
// descend the index node tree until we find the leafnode
|
||||||
|
while NodeLevel > 1 do begin
|
||||||
|
//WriteLn('At Node Level ', NodeLevel);
|
||||||
|
if ReadIndexNodeEntry(LastWord, NewWord, NewNodePosition) <> False then
|
||||||
|
begin
|
||||||
|
//WriteLn('Found Index Entry: ', NewWord, ' Comparing to ', AWord);
|
||||||
|
if ChmCompareText(NewWord, AWord) >= 0 then
|
||||||
|
begin
|
||||||
|
LastWord := '';
|
||||||
|
Dec(NodeLevel);
|
||||||
|
MoveToNode(NewNodePosition, NodeLevel);
|
||||||
|
end;
|
||||||
|
end
|
||||||
|
else
|
||||||
|
Break;
|
||||||
|
end;
|
||||||
|
if NodeLevel > 1 then
|
||||||
|
Exit; // the entry we are looking for is > than the last entry of the last index node
|
||||||
|
|
||||||
end
|
// now we are in a leafnode
|
||||||
else begin
|
while ReadLeafNodeEntry(LastWord, NewWord, InTitle, WLCCount, WLCOffset, WLCSize) <> False do
|
||||||
//ReadWLCEntries();
|
begin
|
||||||
end;}
|
//WriteLn('Found Leaf Entry: ', NewWord, ' Comparing to ', AWord);
|
||||||
|
LastWord := NewWord;
|
||||||
|
CompareResult := ChmCompareText(AWord, NewWord);
|
||||||
|
if CompareResult < 0 then
|
||||||
|
Exit;
|
||||||
|
if CompareResult = 0 then
|
||||||
|
begin
|
||||||
|
if InTitle then
|
||||||
|
ATitleHits := ReadWLCEntries(WLCCount, WLCOffset, WLCSize)
|
||||||
|
else
|
||||||
|
Result := ReadWLCEntries(WLCCount, WLCOffset, WLCSize);
|
||||||
|
// check if the next entry is the same word since there is an entry for titles and for body
|
||||||
|
|
||||||
|
if (ReadLeafNodeEntry(LastWord, NewWord, InTitle, WLCCount, WLCOffset, WLCSize)) then
|
||||||
|
ReadNextResult := True
|
||||||
|
else if (FNextLeafNode <> 0) then
|
||||||
|
begin
|
||||||
|
MoveToNode(FNextLeafNode, 1);
|
||||||
|
LastWord := '';
|
||||||
|
ReadNextResult := (ReadLeafNodeEntry(LastWord, NewWord, InTitle, WLCCount, WLCOffset, WLCSize));
|
||||||
|
end;
|
||||||
|
if ReadNextResult and (NewWord = AWord) then
|
||||||
|
begin
|
||||||
|
if InTitle then
|
||||||
|
ATitleHits := ReadWLCEntries(WLCCount, WLCOffset, WLCSize)
|
||||||
|
else
|
||||||
|
Result := ReadWLCEntries(WLCCount, WLCOffset, WLCSize);
|
||||||
|
end;
|
||||||
|
Exit;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
@ -28,7 +28,7 @@ unit chmreader;
|
|||||||
interface
|
interface
|
||||||
|
|
||||||
uses
|
uses
|
||||||
Classes, SysUtils, chmbase, paslzx;
|
Classes, SysUtils, chmbase, paslzx, chmFIftiMain;
|
||||||
|
|
||||||
type
|
type
|
||||||
|
|
||||||
@ -99,14 +99,22 @@ type
|
|||||||
fTitle: String;
|
fTitle: String;
|
||||||
fPreferedFont: String;
|
fPreferedFont: String;
|
||||||
fContextList: TContextList;
|
fContextList: TContextList;
|
||||||
|
fTOPICSStream,
|
||||||
|
fURLSTRStream,
|
||||||
|
fURLTBLStream,
|
||||||
|
fStringsStream: TMemoryStream;
|
||||||
fLocaleID: DWord;
|
fLocaleID: DWord;
|
||||||
private
|
private
|
||||||
|
FSearchReader: TChmSearchReader;
|
||||||
procedure ReadCommonData;
|
procedure ReadCommonData;
|
||||||
|
function ReadStringsEntry(APosition: DWord): String;
|
||||||
|
function ReadURLSTR(APosition: DWord): String;
|
||||||
public
|
public
|
||||||
constructor Create(AStream: TStream; FreeStreamOnDestroy: Boolean); override;
|
constructor Create(AStream: TStream; FreeStreamOnDestroy: Boolean); override;
|
||||||
destructor Destroy; override;
|
destructor Destroy; override;
|
||||||
public
|
public
|
||||||
function GetContextUrl(Context: THelpContext): String;
|
function GetContextUrl(Context: THelpContext): String;
|
||||||
|
function LookupTopicByID(ATopicID: Integer; out ATitle: String): String; // returns a url
|
||||||
function HasContextList: Boolean;
|
function HasContextList: Boolean;
|
||||||
property DefaultPage: String read fDefaultPage;
|
property DefaultPage: String read fDefaultPage;
|
||||||
property IndexFile: String read fIndexFile;
|
property IndexFile: String read fIndexFile;
|
||||||
@ -114,6 +122,7 @@ type
|
|||||||
property Title: String read fTitle write fTitle;
|
property Title: String read fTitle write fTitle;
|
||||||
property PreferedFont: String read fPreferedFont;
|
property PreferedFont: String read fPreferedFont;
|
||||||
property LocaleID: dword read fLocaleID;
|
property LocaleID: dword read fLocaleID;
|
||||||
|
property SearchReader: TChmSearchReader read FSearchReader write FSearchReader;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
{ TChmFileList }
|
{ TChmFileList }
|
||||||
@ -430,6 +439,41 @@ begin
|
|||||||
{$ENDIF}
|
{$ENDIF}
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
function TChmReader.ReadStringsEntry ( APosition: DWord ) : String;
|
||||||
|
begin
|
||||||
|
Result := '';
|
||||||
|
if fStringsStream = nil then
|
||||||
|
fStringsStream := GetObject('/#STRINGS');
|
||||||
|
if fStringsStream = nil then
|
||||||
|
Exit;
|
||||||
|
if APosition < fStringsStream.Size-1 then
|
||||||
|
begin
|
||||||
|
Result := PChar(fStringsStream.Memory+APosition);
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
function TChmReader.ReadURLSTR ( APosition: DWord ) : String;
|
||||||
|
var
|
||||||
|
URLStrURLOffset: DWord;
|
||||||
|
begin
|
||||||
|
if fURLSTRStream = nil then
|
||||||
|
fURLSTRStream := GetObject('/#URLSTR');
|
||||||
|
if fURLTBLStream = nil then
|
||||||
|
fURLTBLStream := GetObject('/#URLTBL');
|
||||||
|
if (fURLTBLStream <> nil) and (fURLSTRStream <> nil) then
|
||||||
|
begin
|
||||||
|
|
||||||
|
fURLTBLStream.Position := APosition;
|
||||||
|
fURLTBLStream.ReadDWord; // unknown
|
||||||
|
fURLTBLStream.ReadDWord; // TOPIC index #
|
||||||
|
fURLSTRStream.Position := LEtoN(fURLTBLStream.ReadDWord);
|
||||||
|
fURLSTRStream.ReadDWord;
|
||||||
|
fURLSTRStream.ReadDWord;
|
||||||
|
if fURLSTRStream.Position < fURLSTRStream.Size-1 then
|
||||||
|
Result := '/'+PChar(fURLSTRStream.Memory+fURLSTRStream.Position);
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
constructor TChmReader.Create(AStream: TStream; FreeStreamOnDestroy: Boolean);
|
constructor TChmReader.Create(AStream: TStream; FreeStreamOnDestroy: Boolean);
|
||||||
begin
|
begin
|
||||||
inherited Create(AStream, FreeStreamOnDestroy);
|
inherited Create(AStream, FreeStreamOnDestroy);
|
||||||
@ -442,6 +486,11 @@ end;
|
|||||||
destructor TChmReader.Destroy;
|
destructor TChmReader.Destroy;
|
||||||
begin
|
begin
|
||||||
fContextList.Free;
|
fContextList.Free;
|
||||||
|
FreeAndNil(FSearchReader);
|
||||||
|
FreeAndNil(fTOPICSStream);
|
||||||
|
FreeAndNil(fURLSTRStream);
|
||||||
|
FreeAndNil(fURLTBLStream);
|
||||||
|
FreeAndNil(fStringsStream);
|
||||||
inherited Destroy;
|
inherited Destroy;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
@ -787,6 +836,31 @@ begin
|
|||||||
Result := fContextList.GetURL(Context);
|
Result := fContextList.GetURL(Context);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
function TChmReader.LookupTopicByID ( ATopicID: Integer; out ATitle: String) : String;
|
||||||
|
var
|
||||||
|
TopicURLTBLOffset: DWord;
|
||||||
|
TopicTitleOffset: DWord;
|
||||||
|
begin
|
||||||
|
Result := '';
|
||||||
|
ATitle := '';
|
||||||
|
//WriteLn('Getting topic# ',ATopicID);
|
||||||
|
if fTOPICSStream = nil then;
|
||||||
|
fTOPICSStream := GetObject('/#TOPICS');
|
||||||
|
if fTOPICSStream = nil then
|
||||||
|
Exit;
|
||||||
|
fTOPICSStream.Position := ATopicID * 16;
|
||||||
|
if fTOPICSStream.Position = ATopicID * 16 then
|
||||||
|
begin
|
||||||
|
fTOPICSStream.ReadDWord;
|
||||||
|
TopicTitleOffset := LEtoN(fTOPICSStream.ReadDWord);
|
||||||
|
TopicURLTBLOffset := LEtoN(fTOPICSStream.ReadDWord);
|
||||||
|
if TopicTitleOffset <> $FFFFFFFF then
|
||||||
|
ATitle := ReadStringsEntry(TopicTitleOffset);
|
||||||
|
//WriteLn('Got a title: ', ATitle);
|
||||||
|
Result := ReadURLSTR(TopicURLTBLOffset);
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
function TChmReader.HasContextList: Boolean;
|
function TChmReader.HasContextList: Boolean;
|
||||||
begin
|
begin
|
||||||
Result := fContextList.Count > 0;
|
Result := fContextList.Count > 0;
|
||||||
|
@ -50,6 +50,7 @@ type
|
|||||||
FCurrentStream: TStream; // used to buffer the files that are to be compressed
|
FCurrentStream: TStream; // used to buffer the files that are to be compressed
|
||||||
FCurrentIndex: Integer;
|
FCurrentIndex: Integer;
|
||||||
FOnGetFileData: TGetDataFunc;
|
FOnGetFileData: TGetDataFunc;
|
||||||
|
FSearchTitlesOnly: Boolean;
|
||||||
FStringsStream: TMemoryStream; // the #STRINGS file
|
FStringsStream: TMemoryStream; // the #STRINGS file
|
||||||
FTopicsStream: TMemoryStream; // the #TOPICS file
|
FTopicsStream: TMemoryStream; // the #TOPICS file
|
||||||
FURLTBLStream: TMemoryStream; // the #URLTBL file. has offsets of strings in URLSTR
|
FURLTBLStream: TMemoryStream; // the #URLTBL file. has offsets of strings in URLSTR
|
||||||
@ -130,6 +131,7 @@ type
|
|||||||
property OutStream: TStream read FOutStream;
|
property OutStream: TStream read FOutStream;
|
||||||
property Title: String read FTitle write FTitle;
|
property Title: String read FTitle write FTitle;
|
||||||
property FullTextSearch: Boolean read FFullTextSearch write FFullTextSearch;
|
property FullTextSearch: Boolean read FFullTextSearch write FFullTextSearch;
|
||||||
|
property SearchTitlesOnly: Boolean read FSearchTitlesOnly write FSearchTitlesOnly;
|
||||||
property DefaultFont: String read FDefaultFont write FDefaultFont;
|
property DefaultFont: String read FDefaultFont write FDefaultFont;
|
||||||
property DefaultPage: String read FDefaultPage write FDefaultPage;
|
property DefaultPage: String read FDefaultPage write FDefaultPage;
|
||||||
property TempRawStream: TStream read FTempStream write SetTempRawStream;
|
property TempRawStream: TStream read FTempStream write SetTempRawStream;
|
||||||
@ -404,7 +406,6 @@ var
|
|||||||
Entry: TFileEntryRec;
|
Entry: TFileEntryRec;
|
||||||
TmpStr: String;
|
TmpStr: String;
|
||||||
TmpTitle: String;
|
TmpTitle: String;
|
||||||
TmpStream: TMemoryStream;
|
|
||||||
const
|
const
|
||||||
VersionStr = 'HHA Version 4.74.8702'; // does this matter?
|
VersionStr = 'HHA Version 4.74.8702'; // does this matter?
|
||||||
begin
|
begin
|
||||||
@ -591,9 +592,7 @@ end;
|
|||||||
|
|
||||||
procedure TChmWriter.WriteOBJINST;
|
procedure TChmWriter.WriteOBJINST;
|
||||||
var
|
var
|
||||||
Entry: TFileEntryRec;
|
|
||||||
i: Integer;
|
i: Integer;
|
||||||
TmpPos: Integer;
|
|
||||||
ObjStream: TMemoryStream;
|
ObjStream: TMemoryStream;
|
||||||
//Flags: Word;
|
//Flags: Word;
|
||||||
begin
|
begin
|
||||||
@ -832,7 +831,7 @@ function TChmWriter.AddURL ( AURL: String; TopicsIndex: DWord ) : LongWord;
|
|||||||
Len: LongWord;
|
Len: LongWord;
|
||||||
begin
|
begin
|
||||||
Rem := $4000 - (FURLSTRStream.Size mod $4000);
|
Rem := $4000 - (FURLSTRStream.Size mod $4000);
|
||||||
Len := 9 + Length(AString);
|
Len := 9 + Length(AString); // 2 dwords the string and NT
|
||||||
if Rem < Len then
|
if Rem < Len then
|
||||||
while Rem > 0 do
|
while Rem > 0 do
|
||||||
begin
|
begin
|
||||||
@ -847,17 +846,18 @@ function TChmWriter.AddURL ( AURL: String; TopicsIndex: DWord ) : LongWord;
|
|||||||
if FURLSTRStream.Size mod $4000 = 0 then
|
if FURLSTRStream.Size mod $4000 = 0 then
|
||||||
FURLSTRStream.WriteByte(0);
|
FURLSTRStream.WriteByte(0);
|
||||||
Result := FURLSTRStream.Position;
|
Result := FURLSTRStream.Position;
|
||||||
FURLSTRStream.WriteDWord(NToLE(DWord(0))); // URL Offset for topic??
|
FURLSTRStream.WriteDWord(NToLE(DWord(0))); // URL Offset for topic after the the "Local" value
|
||||||
FURLSTRStream.WriteDWord(NToLE(DWord(0))); // Offset of FrameName??
|
FURLSTRStream.WriteDWord(NToLE(DWord(0))); // Offset of FrameName??
|
||||||
FURLSTRStream.Write(AString[1], Length(AString));
|
FURLSTRStream.Write(AString[1], Length(AString));
|
||||||
FURLSTRStream.WriteByte(0); //NT
|
FURLSTRStream.WriteByte(0); //NT
|
||||||
end;
|
end;
|
||||||
begin
|
begin
|
||||||
if AURL[1] = '/' then Delete(AURL,1,1);
|
if AURL[1] = '/' then Delete(AURL,1,1);
|
||||||
if $1000 - (FURLTBLStream.Size mod $1000) = 4 then
|
//if $1000 - (FURLTBLStream.Size mod $1000) = 4 then // we are at 4092
|
||||||
FURLTBLStream.WriteDWord(NtoLE(DWord(4096)));
|
if FURLTBLStream.Size and $FFC = $FFC then // faster :)
|
||||||
|
FURLTBLStream.WriteDWord(0);
|
||||||
Result := FURLTBLStream.Position;
|
Result := FURLTBLStream.Position;
|
||||||
FURLTBLStream.WriteDWord($231e9f5c); //unknown
|
FURLTBLStream.WriteDWord(0);//($231e9f5c); //unknown
|
||||||
FURLTBLStream.WriteDWord(NtoLE(TopicsIndex)); // Index of topic in #TOPICS
|
FURLTBLStream.WriteDWord(NtoLE(TopicsIndex)); // Index of topic in #TOPICS
|
||||||
FURLTBLStream.WriteDWord(NtoLE(AddURLString(AURL)));
|
FURLTBLStream.WriteDWord(NtoLE(AddURLString(AURL)));
|
||||||
end;
|
end;
|
||||||
@ -1007,7 +1007,7 @@ type
|
|||||||
begin
|
begin
|
||||||
if Pos('.ht', AFileEntry.Name) > 0 then
|
if Pos('.ht', AFileEntry.Name) > 0 then
|
||||||
begin
|
begin
|
||||||
ATitle := FIndexedFiles.IndexFile(AStream, GetNewTopicsIndex);
|
ATitle := FIndexedFiles.IndexFile(AStream, GetNewTopicsIndex, FSearchTitlesOnly);
|
||||||
if ATitle <> '' then
|
if ATitle <> '' then
|
||||||
TopicEntry.StringsOffset := AddString(ATitle)
|
TopicEntry.StringsOffset := AddString(ATitle)
|
||||||
else
|
else
|
||||||
|
@ -68,6 +68,7 @@ Type
|
|||||||
|
|
||||||
TIndexedWordList = class(TObject)
|
TIndexedWordList = class(TObject)
|
||||||
private
|
private
|
||||||
|
FIndexTitlesOnly: Boolean;
|
||||||
FIndexedFileCount: DWord;
|
FIndexedFileCount: DWord;
|
||||||
//vars while processing page
|
//vars while processing page
|
||||||
FInTitle,
|
FInTitle,
|
||||||
@ -83,6 +84,7 @@ Type
|
|||||||
FLongestWord: DWord;
|
FLongestWord: DWord;
|
||||||
FFirstWord: TIndexedWord;
|
FFirstWord: TIndexedWord;
|
||||||
FCachedWord: TIndexedWord;
|
FCachedWord: TIndexedWord;
|
||||||
|
FParser: THTMLParser;
|
||||||
function AddGetWord(AWord: String; IsTitle: Boolean): TIndexedWord;
|
function AddGetWord(AWord: String; IsTitle: Boolean): TIndexedWord;
|
||||||
function GetWordForward(AWord: String; StartWord: TIndexedWord; out WrongWord: TIndexedWord; AIsTitle: Boolean): TIndexedWord;
|
function GetWordForward(AWord: String; StartWord: TIndexedWord; out WrongWord: TIndexedWord; AIsTitle: Boolean): TIndexedWord;
|
||||||
function GetWordBackward(AWord: String; StartWord: TIndexedWord; out WrongWord: TIndexedWord; AIsTitle: Boolean): TIndexedWord;
|
function GetWordBackward(AWord: String; StartWord: TIndexedWord; out WrongWord: TIndexedWord; AIsTitle: Boolean): TIndexedWord;
|
||||||
@ -95,7 +97,7 @@ Type
|
|||||||
public
|
public
|
||||||
constructor Create;
|
constructor Create;
|
||||||
destructor Destroy; override;
|
destructor Destroy; override;
|
||||||
function IndexFile(AStream: TStream; ATOPICIndex: Integer): String; // returns the documents <Title>
|
function IndexFile(AStream: TStream; ATOPICIndex: Integer; AIndexOnlyTitles: Boolean): String; // returns the documents <Title>
|
||||||
procedure Clear;
|
procedure Clear;
|
||||||
procedure AddWord(const AWord: TIndexedWord; StartingWord: TIndexedWord; AIsTitle: Boolean);
|
procedure AddWord(const AWord: TIndexedWord; StartingWord: TIndexedWord; AIsTitle: Boolean);
|
||||||
property FirstWord: TIndexedWord read FFirstWord;
|
property FirstWord: TIndexedWord read FFirstWord;
|
||||||
@ -231,7 +233,7 @@ begin
|
|||||||
else if NoCaseTag = '<BODY>' then FInBody := True
|
else if NoCaseTag = '<BODY>' then FInBody := True
|
||||||
else
|
else
|
||||||
end;
|
end;
|
||||||
|
if FInBody and FIndexTitlesOnly then FParser.Done := True;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
procedure TIndexedWordList.CBFountText(Text: string);
|
procedure TIndexedWordList.CBFountText(Text: string);
|
||||||
@ -325,13 +327,13 @@ begin
|
|||||||
inherited Destroy;
|
inherited Destroy;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
function TIndexedWordList.IndexFile(AStream: TStream; ATOPICIndex: Integer): String;
|
function TIndexedWordList.IndexFile(AStream: TStream; ATOPICIndex: Integer; AIndexOnlyTitles: Boolean): String;
|
||||||
var
|
var
|
||||||
TheFile: String;
|
TheFile: String;
|
||||||
Parser: THTMLParser;
|
|
||||||
begin
|
begin
|
||||||
FInBody := False;
|
FInBody := False;
|
||||||
FInTitle:= False;
|
FInTitle:= False;
|
||||||
|
FIndexTitlesOnly := AIndexOnlyTitles;
|
||||||
FWordCount := 0;
|
FWordCount := 0;
|
||||||
FTopicIndex := ATOPICIndex;
|
FTopicIndex := ATOPICIndex;
|
||||||
FIndexedFileCount := FIndexedFileCount +1;
|
FIndexedFileCount := FIndexedFileCount +1;
|
||||||
@ -341,11 +343,11 @@ begin
|
|||||||
AStream.Read(TheFile[1], AStream.Size);
|
AStream.Read(TheFile[1], AStream.Size);
|
||||||
TheFile[Length(TheFile)] := #0;
|
TheFile[Length(TheFile)] := #0;
|
||||||
|
|
||||||
Parser := THTMLParser.Create(@TheFile[1]);
|
FParser := THTMLParser.Create(@TheFile[1]);
|
||||||
Parser.OnFoundTag := @CBFoundTag;
|
FParser.OnFoundTag := @CBFoundTag;
|
||||||
Parser.OnFoundText := @CBFountText;
|
FParser.OnFoundText := @CBFountText;
|
||||||
Parser.Exec;
|
FParser.Exec;
|
||||||
Parser.Free;
|
FParser.Free;
|
||||||
|
|
||||||
Result := FDocTitle;
|
Result := FDocTitle;
|
||||||
FDocTitle := '';
|
FDocTitle := '';
|
||||||
|
Loading…
Reference in New Issue
Block a user