* Support for available words search

git-svn-id: trunk@39425 -
This commit is contained in:
michael 2018-07-10 07:27:44 +00:00
parent d7fa0b1998
commit 817b727435
4 changed files with 163 additions and 24 deletions

View File

@ -32,6 +32,8 @@ type
TokenType: TWordTokenType; TokenType: TWordTokenType;
end; end;
TUTF8StringArray = Array of UTF8String;
TIgnoreListDef = class; TIgnoreListDef = class;
{ TWordParser } { TWordParser }
@ -70,6 +72,7 @@ type
{ TCustomIndexDB } { TCustomIndexDB }
TAvailableMatch = (amAll,amExact,amContains,amStartsWith);
TCustomIndexDB = class(TComponent) TCustomIndexDB = class(TComponent)
public public
procedure CreateDB; virtual; abstract; procedure CreateDB; virtual; abstract;
@ -81,6 +84,7 @@ type
procedure DeleteWordsFromFile(URL: UTF8String); virtual; abstract; procedure DeleteWordsFromFile(URL: UTF8String); virtual; abstract;
procedure AddSearchData(ASearchData: TSearchWordData); virtual; abstract; procedure AddSearchData(ASearchData: TSearchWordData); virtual; abstract;
procedure FindSearchData(SearchWord: TWordParser; FPSearch: TFPSearch; SearchOptions: TSearchOptions); virtual; abstract; procedure FindSearchData(SearchWord: TWordParser; FPSearch: TFPSearch; SearchOptions: TSearchOptions); virtual; abstract;
Function GetAvailableWords(out aList : TUTF8StringArray; aContaining : UTF8String; Partial : TAvailableMatch) : integer;virtual; abstract;
procedure CreateIndexerTables; virtual; abstract; procedure CreateIndexerTables; virtual; abstract;
end; end;
@ -112,7 +116,7 @@ const
itMatches, itMatches, itMatches, itMatches, itMatches, itMatches, itMatches, itMatches, itMatches, itMatches, itMatches, itMatches,
itLanguages, itLanguages, itLanguages, itLanguages,
itFiles, itFiles, itFiles, itFiles, itFiles, itFiles); itFiles, itFiles, itFiles, itFiles, itFiles, itFiles);
SearchTermParam = 'SearchTerm';
DefaultTableNames: array[TIndexTable] of UTF8String = ('WORDS', 'FILELANGUAGES', 'FILENAMES', 'WORDMATCHES'); DefaultTableNames: array[TIndexTable] of UTF8String = ('WORDS', 'FILELANGUAGES', 'FILENAMES', 'WORDMATCHES');
DefaultIndexNames: array[TIndexIndex] of UTF8String = ('I_WORDS', 'I_WORDMATCHES', 'I_FILELANGUAGES', 'I_FILENAMES'); DefaultIndexNames: array[TIndexIndex] of UTF8String = ('I_WORDS', 'I_WORDMATCHES', 'I_FILELANGUAGES', 'I_FILENAMES');
DefaultFieldNames: array[TIndexField] of UTF8String = ( DefaultFieldNames: array[TIndexField] of UTF8String = (
@ -162,6 +166,7 @@ type
function GetUrlSQL(UseParams: boolean = True): UTF8String; virtual; function GetUrlSQL(UseParams: boolean = True): UTF8String; virtual;
function GetWordSQL(UseParams: boolean = True): UTF8String; virtual; function GetWordSQL(UseParams: boolean = True): UTF8String; virtual;
function InsertSQL(const TableType: TIndexTable; UseParams: boolean = True): UTF8String; virtual; function InsertSQL(const TableType: TIndexTable; UseParams: boolean = True): UTF8String; virtual;
Function AvailableWordsSQL(aContaining : UTF8String; Partial : TAvailableMatch) : UTF8String; virtual;
procedure FinishCreateTable(const TableType: TIndexTable); virtual; procedure FinishCreateTable(const TableType: TIndexTable); virtual;
procedure FinishDropTable(const TableType: TIndexTable); virtual; procedure FinishDropTable(const TableType: TIndexTable); virtual;
protected protected
@ -259,6 +264,7 @@ type
FOnProgress: TIndexProgressEvent; FOnProgress: TIndexProgressEvent;
FSearchPath: UTF8String; FSearchPath: UTF8String;
FSearchRecursive: boolean; FSearchRecursive: boolean;
FStripPath: String;
FUseIgnoreList: boolean; FUseIgnoreList: boolean;
ExcludeMaskPatternList: TStrings; ExcludeMaskPatternList: TStrings;
MaskPatternList: TStrings; MaskPatternList: TStrings;
@ -292,6 +298,7 @@ type
property SearchRecursive: boolean read FSearchRecursive write FSearchRecursive; property SearchRecursive: boolean read FSearchRecursive write FSearchRecursive;
property DetectLanguage: boolean read FDetectLanguage write FDetectLanguage; property DetectLanguage: boolean read FDetectLanguage write FDetectLanguage;
Property CodePage : TSystemCodePage Read FCodePage Write FCodePage; Property CodePage : TSystemCodePage Read FCodePage Write FCodePage;
Property StripPath : String Read FStripPath Write FStripPath;
end; end;
{ TFileReaderDef } { TFileReaderDef }
@ -349,7 +356,6 @@ type
end; end;
{ TFPSearch } { TFPSearch }
TFPSearch = class (TComponent) TFPSearch = class (TComponent)
private private
FCount: integer; FCount: integer;
@ -375,6 +381,7 @@ type
property Results[index: integer]: TSearchWordData read GetResults; property Results[index: integer]: TSearchWordData read GetResults;
property RankedResults[index: integer]: TSearchWordData read GetRankedResults; property RankedResults[index: integer]: TSearchWordData read GetRankedResults;
procedure SetSearchWord(AValue: UTF8String); procedure SetSearchWord(AValue: UTF8String);
Function GetAvailableWords(out aList : TUTF8StringArray; aContaining : UTF8String; Partial : TAvailableMatch) : Integer;
published published
property Database: TCustomIndexDB read FDatabase write SetDatabase; property Database: TCustomIndexDB read FDatabase write SetDatabase;
property Options: TSearchOptions read FOptions write FOptions; property Options: TSearchOptions read FOptions write FOptions;
@ -823,6 +830,12 @@ begin
FSearchWord.WildCardChar := '%'; //should come from DataBase FSearchWord.WildCardChar := '%'; //should come from DataBase
end; end;
Function TFPSearch.GetAvailableWords(out aList : TUTF8StringArray; aContaining: UTF8String; Partial: TAvailableMatch) : Integer;
begin
Database.Connect;
Result:=Database.GetAvailableWords(aList, aContaining, Partial);
end;
function TFPSearch.GetResults(index: integer): TSearchWordData; function TFPSearch.GetResults(index: integer): TSearchWordData;
begin begin
Result := ResultList[index]; Result := ResultList[index];
@ -1302,12 +1315,16 @@ Var
i: integer; i: integer;
Stub: TAddWordStub; Stub: TAddWordStub;
AWord: TSearchWordData; AWord: TSearchWordData;
U : String;
begin begin
// If reader must detect language, the stub cannot be used. // If reader must detect language, the stub cannot be used.
U:=AURL;
If (StripPath<>'') and (Pos(StripPath,aURL)=1) then
Delete(U,1,Length(StripPath));
if not DetectLanguage then if not DetectLanguage then
begin begin
Stub := TAddWordStub.Create(AURL, ADateTime, Database); Stub := TAddWordStub.Create(U, ADateTime, Database);
try try
Reader.OnAddSearchWord := @Stub.DoAddWord; Reader.OnAddSearchWord := @Stub.DoAddWord;
Reader.LoadFromStream(S); Reader.LoadFromStream(S);
@ -1322,7 +1339,7 @@ begin
for i := 0 to Reader.Count - 1 do for i := 0 to Reader.Count - 1 do
begin begin
AWord := Reader.SearchWord[i]; AWord := Reader.SearchWord[i];
AWord.URL := AURL; AWord.URL := U;
AWord.FileDate := ADateTime; AWord.FileDate := ADateTime;
AWord.Language := Reader.Language; AWord.Language := Reader.Language;
AWord.SearchWord := LowerCase(AWord.SearchWord); AWord.SearchWord := LowerCase(AWord.SearchWord);
@ -1627,6 +1644,17 @@ begin
Result := Format('INSERT INTO %s (%s) VALUES (%s)', [GetTableName(TableType), FL, VL]); Result := Format('INSERT INTO %s (%s) VALUES (%s)', [GetTableName(TableType), FL, VL]);
end; end;
function TSQLIndexDB.AvailableWordsSQL(aContaining: UTF8String; Partial: TAvailableMatch): UTF8String;
begin
Result:=Format('SELECT %s from %s ',[GetFieldName(ifWordsWord),GetTableName(itWords)]);
if not ((aContaining='') or (Partial=amAll)) then
if Partial = amExact then
Result:=Result+Format(' WHERE (%s = :%s)',[GetFieldName(ifWordsWord),SearchTermParam])
else
Result:=Result+Format(' WHERE (%s LIKE :%s)',[GetFieldName(ifWordsWord),SearchTermParam]);
end;
function TSQLIndexDB.CreateTableIndex(IndexType: TIndexIndex): UTF8String; function TSQLIndexDB.CreateTableIndex(IndexType: TIndexIndex): UTF8String;
var var
TIN: UTF8String; TIN: UTF8String;

View File

@ -85,7 +85,10 @@ Type
{ TWordItem } { TWordItem }
TWordItem = Class(TMatchedItem); TWordItem = Class(TMatchedItem)
Public
Function IsAvailableMatch(aContaining : UTF8string; aPartial : TAvailableMatch) : Boolean;
end;
{ TURLItem } { TURLItem }
@ -170,6 +173,7 @@ Type
procedure DeleteWordsFromFile(URL: UTF8string); override; procedure DeleteWordsFromFile(URL: UTF8string); override;
procedure AddSearchData(ASearchData: TSearchWordData); override; procedure AddSearchData(ASearchData: TSearchWordData); override;
procedure FindSearchData(SearchWord: TWordParser; FPSearch: TFPSearch; SearchOptions: TSearchOptions); override; procedure FindSearchData(SearchWord: TWordParser; FPSearch: TFPSearch; SearchOptions: TSearchOptions); override;
Function GetAvailableWords(out aList : TUTF8StringArray; aContaining : UTF8String; Partial : TAvailableMatch) : integer;override;
procedure CreateIndexerTables; override; procedure CreateIndexerTables; override;
Property Stream : TStream Read FStream Write FStream; Property Stream : TStream Read FStream Write FStream;
end; end;
@ -198,7 +202,7 @@ uses bufstream;
{ TMemIndexDB } { TMemIndexDB }
Resourcestring Resourcestring
SErrNoStream = 'No stream assigned'; // SErrNoStream = 'No stream assigned';
SInvalidStreamData = 'Invalid data at offset %d. Got %d, expected %d.'; SInvalidStreamData = 'Invalid data at offset %d. Got %d, expected %d.';
{ TFileIndexDB } { TFileIndexDB }
@ -210,6 +214,18 @@ Const
WordBlock = 3; WordBlock = 3;
MatchBlock = 4; MatchBlock = 4;
{ TWordItem }
function TWordItem.IsAvailableMatch(aContaining: UTF8string; aPartial: TAvailableMatch): Boolean;
begin
case aPartial of
amAll : Result:=True;
amExact : Result:=(Description=AContaining);
amContains : Result:=Pos(aContaining,Description)>0;
amStartsWith : Result:=Pos(aContaining,Description)=1;
end;
end;
{ TURLItem } { TURLItem }
function TURLItem.BlockSize: Integer; function TURLItem.BlockSize: Integer;
@ -220,8 +236,6 @@ end;
procedure TURLItem.WriteToStream(S: TStream); procedure TURLItem.WriteToStream(S: TStream);
Var
I : Integer;
begin begin
inherited WriteToStream(S); inherited WriteToStream(S);
@ -282,6 +296,7 @@ function TDescrItem.ReadStringFromStream(Astream: TStream): UTF8string;
Var Var
L : Integer; L : Integer;
begin begin
L:=0;
AStream.ReadBuffer(L,SizeOf(L)); AStream.ReadBuffer(L,SizeOf(L));
SetLength(Result,L); SetLength(Result,L);
if (L>0) then if (L>0) then
@ -291,6 +306,7 @@ end;
function TDescrItem.ReadFromStream(S: TStream) : Integer; function TDescrItem.ReadFromStream(S: TStream) : Integer;
begin begin
Result:=0;
S.ReadBuffer(Result,SizeOf(Result)); S.ReadBuffer(Result,SizeOf(Result));
Description:=ReadStringFromStream(S); Description:=ReadStringFromStream(S);
end; end;
@ -367,8 +383,6 @@ procedure TFileIndexDB.SaveToStream;
Var Var
I : Integer; I : Integer;
L : Integer;
U : TURLItem;
begin begin
Stream.WriteDWord(FileVersion); Stream.WriteDWord(FileVersion);
@ -551,16 +565,13 @@ end;
procedure TMemIndexDB.IntersectMatches(ListA,ListB : TFPList); procedure TMemIndexDB.IntersectMatches(ListA,ListB : TFPList);
Var Var
L : TFPList;
URL : TURLItem; URL : TURLItem;
I,J : Integer; I,J : Integer;
OK : Boolean;
begin begin
For I:=ListA.Count-1 downto 0 do For I:=ListA.Count-1 downto 0 do
begin begin
URL:=TMatch(ListA[i]).URL; URL:=TMatch(ListA[i]).URL;
OK:=False;
J:=ListB.Count-1; J:=ListB.Count-1;
While (J>=0) and (TMatch(ListB[i]).URL<>URL) do While (J>=0) and (TMatch(ListB[i]).URL<>URL) do
Dec(J); Dec(J);
@ -655,6 +666,24 @@ begin
end; end;
end; end;
function TMemIndexDB.GetAvailableWords(out aList: TUTF8StringArray; aContaining: UTF8String; Partial: TAvailableMatch): integer;
Var
I : integer;
begin
Result:=0;
aContaining:=LowerCase(aContaining);
SetLength(aList,FWords.Count);
For I:=0 to FWords.Count-1 do
if TWordItem(FWords[i]).IsAvailableMatch(aContaining,Partial) then
begin
aList[Result]:=FWords[i].Description;
Inc(Result);
end;
SetLength(aList,Result);
end;
procedure TMemIndexDB.CreateIndexerTables; procedure TMemIndexDB.CreateIndexerTables;
begin begin
Clear; Clear;
@ -719,9 +748,6 @@ end;
procedure TMatch.WriteToStream(S: TStream); procedure TMatch.WriteToStream(S: TStream);
Var
L : Integer;
begin begin
inherited WriteToStream(S); inherited WriteToStream(S);
S.WriteBuffer(FPosition,Sizeof(FPosition)); S.WriteBuffer(FPosition,Sizeof(FPosition));
@ -851,7 +877,7 @@ end;
function TMatchedItem.AddMatch(AMatch: TMatch): Integer; function TMatchedItem.AddMatch(AMatch: TMatch): Integer;
begin begin
FList.Add(AMatch); Result:=FList.Add(AMatch);
end; end;
procedure TMatchedItem.RemoveMatch(AMatch: TMatch); procedure TMatchedItem.RemoveMatch(AMatch: TMatch);

View File

@ -72,6 +72,7 @@ type
procedure CompactDB; override; procedure CompactDB; override;
procedure AddSearchData(ASearchData: TSearchWordData); override; procedure AddSearchData(ASearchData: TSearchWordData); override;
procedure FindSearchData(SearchWord: TWordParser; FPSearch: TFPSearch; SearchOptions: TSearchOptions); override; procedure FindSearchData(SearchWord: TWordParser; FPSearch: TFPSearch; SearchOptions: TSearchOptions); override;
function GetAvailableWords(out aList : TUTF8StringArray; aContaining : UTF8String; Partial : TAvailableMatch) : integer;override;
procedure DeleteWordsFromFile(URL: UTF8String); override; procedure DeleteWordsFromFile(URL: UTF8String); override;
Property NativeConnection : TSQLConnection Read FDB; Property NativeConnection : TSQLConnection Read FDB;
published published
@ -173,7 +174,7 @@ begin
Result.Transaction := Self.FDB.Transaction; Result.Transaction := Self.FDB.Transaction;
Result.SQL.Text := ASQL; Result.SQL.Text := ASQL;
Result.UsePrimaryKeyAsKey:=False; Result.UsePrimaryKeyAsKey:=False;
Result.UniDirectional:=True; // Result.UniDirectional:=True;
//Writeln('SQL :',ASQL); //Writeln('SQL :',ASQL);
end; end;
@ -186,8 +187,6 @@ begin
else else
begin begin
Q := CreateCachedQuery(cqtGetFileID, GetSearchFileSQL); Q := CreateCachedQuery(cqtGetFileID, GetSearchFileSQL);
If Length(URL)>255 then
Writeln('URL Length : ',Length(URL),' : ',URL);
Q.ParamByName(GetFieldName(ifFilesURL)).AsString := URL; Q.ParamByName(GetFieldName(ifFilesURL)).AsString := URL;
Q.Open; Q.Open;
try try
@ -240,7 +239,6 @@ var
begin begin
Q := CreateQuery(GetMatchSQL(SearchOptions,SearchWord,True)); Q := CreateQuery(GetMatchSQL(SearchOptions,SearchWord,True));
try try
Writeln(Q.SQL.Text);
WW := getFieldName(ifWordsWord); WW := getFieldName(ifWordsWord);
for i := 0 to SearchWord.Count - 1 do for i := 0 to SearchWord.Count - 1 do
If SearchWord.Token[i].TokenType=wtWord then If SearchWord.Token[i].TokenType=wtWord then
@ -261,6 +259,7 @@ begin
FC := Q.FieldByName(GetFieldName(ifMatchesContext)); FC := Q.FieldByName(GetFieldName(ifMatchesContext));
FP := Q.FieldByName(GetFieldName(ifMatchesPosition)); FP := Q.FieldByName(GetFieldName(ifMatchesPosition));
FW := Q.FieldByName(GetFieldName(ifWordsWord)); FW := Q.FieldByName(GetFieldName(ifWordsWord));
I:=0;
while not Q.EOF do while not Q.EOF do
begin begin
Res.FileDate := FD.AsDateTime; Res.FileDate := FD.AsDateTime;
@ -268,7 +267,9 @@ begin
Res.SearchWord := FW.AsString; Res.SearchWord := FW.AsString;
Res.Position := FP.AsInteger; Res.Position := FP.AsInteger;
Res.Context:=FC.aSString; Res.Context:=FC.aSString;
FPSearch.AddResult(Q.RecNo, Res); Res.Rank:=0;
FPSearch.AddResult(i, Res);
Inc(I);
Q.Next; Q.Next;
end; end;
finally finally
@ -276,6 +277,40 @@ begin
end; end;
end; end;
Function TSQLDBIndexDB.GetAvailableWords(out aList : TUTF8StringArray; aContaining: UTF8String; Partial: TAvailableMatch) : Integer;
Var
Q : TSQLQuery;
begin
Result:=0;
Q := CreateQuery(AvailableWordsSQL(aContaining,Partial));
try
Q.PacketRecords:=-1;
if (aContaining<>'') or (Partial<>amall) then
With Q.ParamByName(SearchTermParam) do
case Partial of
amExact : AsString:=aContaining;
amContains : AsString:='%'+aContaining+'%';
amStartsWith : AsString:=aContaining+'%';
end;
Q.Open;
SetLength(aList,Q.RecordCount);
Q.First;
While not Q.EOF do
begin
If Length(aList)<=Result then
SetLength(aList,Result+100);
aList[Result]:=Q.Fields[0].AsUTF8String;
Inc(Result);
Q.Next;
end;
SetLength(aList,Result);
finally
Q.Free;
end;
end;
procedure TSQLDBIndexDB.DeleteWordsFromFile(URL: UTF8String); procedure TSQLDBIndexDB.DeleteWordsFromFile(URL: UTF8String);
begin begin
inherited DeleteWordsFromFile(URL); inherited DeleteWordsFromFile(URL);
@ -293,8 +328,7 @@ begin
if not IgnoreErrors then if not IgnoreErrors then
raise raise
else else
Writeln(E.ClassName,' : ',E.Message); // Writeln(E.ClassName,' : ',E.Message);
end; end;
end; end;

View File

@ -39,6 +39,7 @@ type
QueryResult: UTF8string; QueryResult: UTF8string;
SearchWordID: TDatabaseID; SearchWordID: TDatabaseID;
URLID: TDatabaseID; URLID: TDatabaseID;
FMatchList : TUTF8StringArray;
procedure CheckSQLite(Rc: cint; pzErrMsg: PChar); procedure CheckSQLite(Rc: cint; pzErrMsg: PChar);
protected protected
class function AllowForeignKeyInTable: boolean; override; class function AllowForeignKeyInTable: boolean; override;
@ -60,6 +61,7 @@ type
procedure CreateDB; override; procedure CreateDB; override;
procedure DeleteWordsFromFile(URL: UTF8string); override; procedure DeleteWordsFromFile(URL: UTF8string); override;
procedure FindSearchData(SearchWord: TWordParser; FPSearch: TFPSearch; SearchOptions: TSearchOptions); override; procedure FindSearchData(SearchWord: TWordParser; FPSearch: TFPSearch; SearchOptions: TSearchOptions); override;
function GetAvailableWords(out aList : TUTF8StringArray; aContaining : UTF8String; Partial : TAvailableMatch) : integer;override;
published published
property FileName: UTF8string read FFileName write FFileName; property FileName: UTF8string read FFileName write FFileName;
end; end;
@ -90,6 +92,26 @@ begin
Result := 0; Result := 0;
end; end;
function WordListCallback(_para1: pointer; plArgc: longint; argv: PPchar; argcol: PPchar): longint; cdecl;
var
PVal: ^PChar;
S : UTF8String;
begin
PVal := argv;
S:=PVal^;
with TSQLiteIndexDB(_para1) do
begin
if Length(FMatchList)<=FRow then
SetLength(FMatchList,Length(FMatchList)+10);
FMatchList[FRow]:=S;
Inc(Frow);
end;
Result := 0;
end;
function IndexCallback(_para1: pointer; plArgc: longint; argv: PPchar; argcol: PPchar): longint; cdecl; function IndexCallback(_para1: pointer; plArgc: longint; argv: PPchar; argcol: PPchar): longint; cdecl;
begin begin
//store the query result //store the query result
@ -287,5 +309,34 @@ begin
CheckSQLite(rc, pzErrMsg); CheckSQLite(rc, pzErrMsg);
end; end;
function TSQLiteIndexDB.GetAvailableWords(out aList: TUTF8StringArray; aContaining: UTF8String; Partial: TAvailableMatch): integer;
Var
st,sql: UTF8string;
rc: cint;
pzErrMsg: PChar;
begin
Result:=0;
FRow:=0;
SetLength(FMatchList,0);
aContaining:=LowerCase(aContaining);
sql := AvailableWordsSQL(aContaining,Partial);
aContaining:=StringReplace(aContaining,'''','''''',[rfReplaceAll]);
case Partial of
amExact : st:=aContaining;
amContains : st:='%'+aContaining+'%';
amStartsWith : st:=aContaining+'%';
else
ST:='';
end;
sql:=StringReplace(SQL,':'+SearchTermParam,''''+ST+'''',[]);
rc := sqlite3_exec(db, PChar(sql), @WordListCallback, self, @pzErrMsg);
CheckSQLite(rc, pzErrMsg);
SetLength(FMatchList,FRow);
aList:=FMatchList;
FMatchList:=Nil;
end;
end. end.