* Support for available words search

git-svn-id: trunk@39425 -
This commit is contained in:
michael 2018-07-10 07:27:44 +00:00
parent d7fa0b1998
commit 817b727435
4 changed files with 163 additions and 24 deletions

View File

@ -32,6 +32,8 @@ type
TokenType: TWordTokenType;
end;
TUTF8StringArray = Array of UTF8String;
TIgnoreListDef = class;
{ TWordParser }
@ -70,6 +72,7 @@ type
{ TCustomIndexDB }
TAvailableMatch = (amAll,amExact,amContains,amStartsWith);
TCustomIndexDB = class(TComponent)
public
procedure CreateDB; virtual; abstract;
@ -81,6 +84,7 @@ type
procedure DeleteWordsFromFile(URL: UTF8String); virtual; abstract;
procedure AddSearchData(ASearchData: TSearchWordData); virtual; abstract;
procedure FindSearchData(SearchWord: TWordParser; FPSearch: TFPSearch; SearchOptions: TSearchOptions); virtual; abstract;
Function GetAvailableWords(out aList : TUTF8StringArray; aContaining : UTF8String; Partial : TAvailableMatch) : integer;virtual; abstract;
procedure CreateIndexerTables; virtual; abstract;
end;
@ -112,7 +116,7 @@ const
itMatches, itMatches, itMatches, itMatches, itMatches, itMatches,
itLanguages, itLanguages,
itFiles, itFiles, itFiles, itFiles, itFiles, itFiles);
SearchTermParam = 'SearchTerm';
DefaultTableNames: array[TIndexTable] of UTF8String = ('WORDS', 'FILELANGUAGES', 'FILENAMES', 'WORDMATCHES');
DefaultIndexNames: array[TIndexIndex] of UTF8String = ('I_WORDS', 'I_WORDMATCHES', 'I_FILELANGUAGES', 'I_FILENAMES');
DefaultFieldNames: array[TIndexField] of UTF8String = (
@ -162,6 +166,7 @@ type
function GetUrlSQL(UseParams: boolean = True): UTF8String; virtual;
function GetWordSQL(UseParams: boolean = True): UTF8String; virtual;
function InsertSQL(const TableType: TIndexTable; UseParams: boolean = True): UTF8String; virtual;
Function AvailableWordsSQL(aContaining : UTF8String; Partial : TAvailableMatch) : UTF8String; virtual;
procedure FinishCreateTable(const TableType: TIndexTable); virtual;
procedure FinishDropTable(const TableType: TIndexTable); virtual;
protected
@ -259,6 +264,7 @@ type
FOnProgress: TIndexProgressEvent;
FSearchPath: UTF8String;
FSearchRecursive: boolean;
FStripPath: String;
FUseIgnoreList: boolean;
ExcludeMaskPatternList: TStrings;
MaskPatternList: TStrings;
@ -292,6 +298,7 @@ type
property SearchRecursive: boolean read FSearchRecursive write FSearchRecursive;
property DetectLanguage: boolean read FDetectLanguage write FDetectLanguage;
Property CodePage : TSystemCodePage Read FCodePage Write FCodePage;
Property StripPath : String Read FStripPath Write FStripPath;
end;
{ TFileReaderDef }
@ -349,7 +356,6 @@ type
end;
{ TFPSearch }
TFPSearch = class (TComponent)
private
FCount: integer;
@ -375,6 +381,7 @@ type
property Results[index: integer]: TSearchWordData read GetResults;
property RankedResults[index: integer]: TSearchWordData read GetRankedResults;
procedure SetSearchWord(AValue: UTF8String);
Function GetAvailableWords(out aList : TUTF8StringArray; aContaining : UTF8String; Partial : TAvailableMatch) : Integer;
published
property Database: TCustomIndexDB read FDatabase write SetDatabase;
property Options: TSearchOptions read FOptions write FOptions;
@ -823,6 +830,12 @@ begin
FSearchWord.WildCardChar := '%'; //should come from DataBase
end;
Function TFPSearch.GetAvailableWords(out aList : TUTF8StringArray; aContaining: UTF8String; Partial: TAvailableMatch) : Integer;
begin
Database.Connect;
Result:=Database.GetAvailableWords(aList, aContaining, Partial);
end;
function TFPSearch.GetResults(index: integer): TSearchWordData;
begin
Result := ResultList[index];
@ -1302,12 +1315,16 @@ Var
i: integer;
Stub: TAddWordStub;
AWord: TSearchWordData;
U : String;
begin
// If reader must detect language, the stub cannot be used.
U:=AURL;
If (StripPath<>'') and (Pos(StripPath,aURL)=1) then
Delete(U,1,Length(StripPath));
if not DetectLanguage then
begin
Stub := TAddWordStub.Create(AURL, ADateTime, Database);
Stub := TAddWordStub.Create(U, ADateTime, Database);
try
Reader.OnAddSearchWord := @Stub.DoAddWord;
Reader.LoadFromStream(S);
@ -1322,7 +1339,7 @@ begin
for i := 0 to Reader.Count - 1 do
begin
AWord := Reader.SearchWord[i];
AWord.URL := AURL;
AWord.URL := U;
AWord.FileDate := ADateTime;
AWord.Language := Reader.Language;
AWord.SearchWord := LowerCase(AWord.SearchWord);
@ -1627,6 +1644,17 @@ begin
Result := Format('INSERT INTO %s (%s) VALUES (%s)', [GetTableName(TableType), FL, VL]);
end;
function TSQLIndexDB.AvailableWordsSQL(aContaining: UTF8String; Partial: TAvailableMatch): UTF8String;
begin
Result:=Format('SELECT %s from %s ',[GetFieldName(ifWordsWord),GetTableName(itWords)]);
if not ((aContaining='') or (Partial=amAll)) then
if Partial = amExact then
Result:=Result+Format(' WHERE (%s = :%s)',[GetFieldName(ifWordsWord),SearchTermParam])
else
Result:=Result+Format(' WHERE (%s LIKE :%s)',[GetFieldName(ifWordsWord),SearchTermParam]);
end;
function TSQLIndexDB.CreateTableIndex(IndexType: TIndexIndex): UTF8String;
var
TIN: UTF8String;

View File

@ -85,7 +85,10 @@ Type
{ TWordItem }
TWordItem = Class(TMatchedItem);
TWordItem = Class(TMatchedItem)
Public
Function IsAvailableMatch(aContaining : UTF8string; aPartial : TAvailableMatch) : Boolean;
end;
{ TURLItem }
@ -170,6 +173,7 @@ Type
procedure DeleteWordsFromFile(URL: UTF8string); override;
procedure AddSearchData(ASearchData: TSearchWordData); override;
procedure FindSearchData(SearchWord: TWordParser; FPSearch: TFPSearch; SearchOptions: TSearchOptions); override;
Function GetAvailableWords(out aList : TUTF8StringArray; aContaining : UTF8String; Partial : TAvailableMatch) : integer;override;
procedure CreateIndexerTables; override;
Property Stream : TStream Read FStream Write FStream;
end;
@ -198,7 +202,7 @@ uses bufstream;
{ TMemIndexDB }
Resourcestring
SErrNoStream = 'No stream assigned';
// SErrNoStream = 'No stream assigned';
SInvalidStreamData = 'Invalid data at offset %d. Got %d, expected %d.';
{ TFileIndexDB }
@ -210,6 +214,18 @@ Const
WordBlock = 3;
MatchBlock = 4;
{ TWordItem }
function TWordItem.IsAvailableMatch(aContaining: UTF8string; aPartial: TAvailableMatch): Boolean;
begin
case aPartial of
amAll : Result:=True;
amExact : Result:=(Description=AContaining);
amContains : Result:=Pos(aContaining,Description)>0;
amStartsWith : Result:=Pos(aContaining,Description)=1;
end;
end;
{ TURLItem }
function TURLItem.BlockSize: Integer;
@ -220,8 +236,6 @@ end;
procedure TURLItem.WriteToStream(S: TStream);
Var
I : Integer;
begin
inherited WriteToStream(S);
@ -282,6 +296,7 @@ function TDescrItem.ReadStringFromStream(Astream: TStream): UTF8string;
Var
L : Integer;
begin
L:=0;
AStream.ReadBuffer(L,SizeOf(L));
SetLength(Result,L);
if (L>0) then
@ -291,6 +306,7 @@ end;
function TDescrItem.ReadFromStream(S: TStream) : Integer;
begin
Result:=0;
S.ReadBuffer(Result,SizeOf(Result));
Description:=ReadStringFromStream(S);
end;
@ -367,8 +383,6 @@ procedure TFileIndexDB.SaveToStream;
Var
I : Integer;
L : Integer;
U : TURLItem;
begin
Stream.WriteDWord(FileVersion);
@ -551,16 +565,13 @@ end;
procedure TMemIndexDB.IntersectMatches(ListA,ListB : TFPList);
Var
L : TFPList;
URL : TURLItem;
I,J : Integer;
OK : Boolean;
begin
For I:=ListA.Count-1 downto 0 do
begin
URL:=TMatch(ListA[i]).URL;
OK:=False;
J:=ListB.Count-1;
While (J>=0) and (TMatch(ListB[i]).URL<>URL) do
Dec(J);
@ -655,6 +666,24 @@ begin
end;
end;
function TMemIndexDB.GetAvailableWords(out aList: TUTF8StringArray; aContaining: UTF8String; Partial: TAvailableMatch): integer;
Var
I : integer;
begin
Result:=0;
aContaining:=LowerCase(aContaining);
SetLength(aList,FWords.Count);
For I:=0 to FWords.Count-1 do
if TWordItem(FWords[i]).IsAvailableMatch(aContaining,Partial) then
begin
aList[Result]:=FWords[i].Description;
Inc(Result);
end;
SetLength(aList,Result);
end;
procedure TMemIndexDB.CreateIndexerTables;
begin
Clear;
@ -719,9 +748,6 @@ end;
procedure TMatch.WriteToStream(S: TStream);
Var
L : Integer;
begin
inherited WriteToStream(S);
S.WriteBuffer(FPosition,Sizeof(FPosition));
@ -851,7 +877,7 @@ end;
function TMatchedItem.AddMatch(AMatch: TMatch): Integer;
begin
FList.Add(AMatch);
Result:=FList.Add(AMatch);
end;
procedure TMatchedItem.RemoveMatch(AMatch: TMatch);

View File

@ -72,6 +72,7 @@ type
procedure CompactDB; override;
procedure AddSearchData(ASearchData: TSearchWordData); override;
procedure FindSearchData(SearchWord: TWordParser; FPSearch: TFPSearch; SearchOptions: TSearchOptions); override;
function GetAvailableWords(out aList : TUTF8StringArray; aContaining : UTF8String; Partial : TAvailableMatch) : integer;override;
procedure DeleteWordsFromFile(URL: UTF8String); override;
Property NativeConnection : TSQLConnection Read FDB;
published
@ -173,7 +174,7 @@ begin
Result.Transaction := Self.FDB.Transaction;
Result.SQL.Text := ASQL;
Result.UsePrimaryKeyAsKey:=False;
Result.UniDirectional:=True;
// Result.UniDirectional:=True;
//Writeln('SQL :',ASQL);
end;
@ -186,8 +187,6 @@ begin
else
begin
Q := CreateCachedQuery(cqtGetFileID, GetSearchFileSQL);
If Length(URL)>255 then
Writeln('URL Length : ',Length(URL),' : ',URL);
Q.ParamByName(GetFieldName(ifFilesURL)).AsString := URL;
Q.Open;
try
@ -240,7 +239,6 @@ var
begin
Q := CreateQuery(GetMatchSQL(SearchOptions,SearchWord,True));
try
Writeln(Q.SQL.Text);
WW := getFieldName(ifWordsWord);
for i := 0 to SearchWord.Count - 1 do
If SearchWord.Token[i].TokenType=wtWord then
@ -261,6 +259,7 @@ begin
FC := Q.FieldByName(GetFieldName(ifMatchesContext));
FP := Q.FieldByName(GetFieldName(ifMatchesPosition));
FW := Q.FieldByName(GetFieldName(ifWordsWord));
I:=0;
while not Q.EOF do
begin
Res.FileDate := FD.AsDateTime;
@ -268,7 +267,9 @@ begin
Res.SearchWord := FW.AsString;
Res.Position := FP.AsInteger;
Res.Context:=FC.aSString;
FPSearch.AddResult(Q.RecNo, Res);
Res.Rank:=0;
FPSearch.AddResult(i, Res);
Inc(I);
Q.Next;
end;
finally
@ -276,6 +277,40 @@ begin
end;
end;
Function TSQLDBIndexDB.GetAvailableWords(out aList : TUTF8StringArray; aContaining: UTF8String; Partial: TAvailableMatch) : Integer;
Var
Q : TSQLQuery;
begin
Result:=0;
Q := CreateQuery(AvailableWordsSQL(aContaining,Partial));
try
Q.PacketRecords:=-1;
if (aContaining<>'') or (Partial<>amall) then
With Q.ParamByName(SearchTermParam) do
case Partial of
amExact : AsString:=aContaining;
amContains : AsString:='%'+aContaining+'%';
amStartsWith : AsString:=aContaining+'%';
end;
Q.Open;
SetLength(aList,Q.RecordCount);
Q.First;
While not Q.EOF do
begin
If Length(aList)<=Result then
SetLength(aList,Result+100);
aList[Result]:=Q.Fields[0].AsUTF8String;
Inc(Result);
Q.Next;
end;
SetLength(aList,Result);
finally
Q.Free;
end;
end;
procedure TSQLDBIndexDB.DeleteWordsFromFile(URL: UTF8String);
begin
inherited DeleteWordsFromFile(URL);
@ -293,8 +328,7 @@ begin
if not IgnoreErrors then
raise
else
Writeln(E.ClassName,' : ',E.Message);
// Writeln(E.ClassName,' : ',E.Message);
end;
end;

View File

@ -39,6 +39,7 @@ type
QueryResult: UTF8string;
SearchWordID: TDatabaseID;
URLID: TDatabaseID;
FMatchList : TUTF8StringArray;
procedure CheckSQLite(Rc: cint; pzErrMsg: PChar);
protected
class function AllowForeignKeyInTable: boolean; override;
@ -60,6 +61,7 @@ type
procedure CreateDB; override;
procedure DeleteWordsFromFile(URL: UTF8string); override;
procedure FindSearchData(SearchWord: TWordParser; FPSearch: TFPSearch; SearchOptions: TSearchOptions); override;
function GetAvailableWords(out aList : TUTF8StringArray; aContaining : UTF8String; Partial : TAvailableMatch) : integer;override;
published
property FileName: UTF8string read FFileName write FFileName;
end;
@ -90,6 +92,26 @@ begin
Result := 0;
end;
function WordListCallback(_para1: pointer; plArgc: longint; argv: PPchar; argcol: PPchar): longint; cdecl;
var
PVal: ^PChar;
S : UTF8String;
begin
PVal := argv;
S:=PVal^;
with TSQLiteIndexDB(_para1) do
begin
if Length(FMatchList)<=FRow then
SetLength(FMatchList,Length(FMatchList)+10);
FMatchList[FRow]:=S;
Inc(Frow);
end;
Result := 0;
end;
function IndexCallback(_para1: pointer; plArgc: longint; argv: PPchar; argcol: PPchar): longint; cdecl;
begin
//store the query result
@ -287,5 +309,34 @@ begin
CheckSQLite(rc, pzErrMsg);
end;
function TSQLiteIndexDB.GetAvailableWords(out aList: TUTF8StringArray; aContaining: UTF8String; Partial: TAvailableMatch): integer;
Var
st,sql: UTF8string;
rc: cint;
pzErrMsg: PChar;
begin
Result:=0;
FRow:=0;
SetLength(FMatchList,0);
aContaining:=LowerCase(aContaining);
sql := AvailableWordsSQL(aContaining,Partial);
aContaining:=StringReplace(aContaining,'''','''''',[rfReplaceAll]);
case Partial of
amExact : st:=aContaining;
amContains : st:='%'+aContaining+'%';
amStartsWith : st:=aContaining+'%';
else
ST:='';
end;
sql:=StringReplace(SQL,':'+SearchTermParam,''''+ST+'''',[]);
rc := sqlite3_exec(db, PChar(sql), @WordListCallback, self, @pzErrMsg);
CheckSQLite(rc, pzErrMsg);
SetLength(FMatchList,FRow);
aList:=FMatchList;
FMatchList:=Nil;
end;
end.