fpc/packages/fpindexer/examples/docindexer.pp
michael ed4b94706b * HTML search database example
git-svn-id: trunk@39415 -
2018-07-08 19:46:15 +00:00

308 lines
7.5 KiB
ObjectPascal

program docindexer;
{$mode objfpc}{$H+}
{$IFDEF UNIX}
{$linklib pthread}
{$ENDIF}
uses
cwstring, cthreads, SysUtils, Classes, DateUtils, sqldb, SQLDBindexDB, FBindexDB, sqliteindexdb, pgindexdb, memindexdb, fpIndexer, inifiles,
// indexer readers
IReaderTXT, IReaderPAS, IReaderHTML, CustApp;
Type
{ TDocIndexerApplication }
TDocIndexerApplication = class(TCustomApplication)
Private
FDirs : TStringArray;
FCreateDB : Boolean;
FEmptyDB : Boolean;
FStripPath,
FLanguage,
FIgnoreList,
FConfig : String;
FCommitFiles,
FLogSQL : Boolean;
FCodePage : TSystemCodePage;
Protected
Procedure WriteLog(Const Msg : String); virtual;
Procedure WriteLog(Const Fmt : String; Const Args : Array of Const);
procedure IndexLog(Sender : TObject; Const ACurrent,ACount : Integer; Const AURL : UTF8String);
Procedure DBHook(Sender : TSQLConnection; EventType : TDBEventType; Const Msg : String);
function ParseOptions: Boolean; virtual;
function SetupDB : TCustomIndexDB; virtual;
procedure CreateDB(aDB : TCustomIndexDB);virtual;
procedure ClearDB(aDB : TCustomIndexDB);virtual;
procedure DoIndex(aDB: TCustomIndexDB);virtual;
procedure Usage(const Msg: String);virtual;
Procedure DoRun; override;
Public
Constructor Create(aOwner : TComponent); override;
end;
procedure TDocIndexerApplication.CreateDB(aDB : TCustomIndexDB);
begin
WriteLog('Creating database');
aDB.CreateDB;
end;
procedure TDocIndexerApplication.ClearDB(aDB: TCustomIndexDB);
begin
WriteLog('Clearing database tables');
aDB.CreateIndexerTables;
end;
function TDocIndexerApplication.SetupDB : TCustomIndexDB;
Const
SDatabase = 'Database';
KeyHostName = 'HostName';
KeyDatabaseName = 'DatabaseName';
KeyUser = 'User';
KeyPassword = 'Password';
KeyType = 'Type';
Procedure ConfigSQLDB(DB : TSQLDBIndexDB; aIni : TInifile);
begin
DB.HostName:= aIni.ReadString(SDatabase,KeyHostName,DB.HostName);
DB.DatabasePath := aIni.ReadString(SDatabase,KeyDatabaseName,DB.DatabasePath);
DB.UserName := aIni.ReadString(SDatabase,KeyUser,DB.UserName);
DB.Password := aIni.ReadString(SDatabase,KeyPassword,DB.Password);
end;
Procedure ConfigSQLIte(SDB : TSQLiteIndexDB; aIni : TInifile);
begin
SDB.FileName := aIni.ReadString(SDatabase,KeyDatabaseName,SDB.FileName);
end;
Procedure ConfigFile(FDB : TFileIndexDB; aIni : TInifile);
begin
FDB.FileName := aIni.ReadString(SDatabase,KeyDatabaseName,FDB.FileName);
end;
Var
Ini : TIniFile;
DB : TSQLDBIndexDB;
SDB : TSQLiteIndexDB;
FDB : TFileIndexDB;
begin
if FLogSQL then
GlobalDBLogHook:=@DBHook;
Result:=nil;
Ini:=TIniFile.Create(FConfig);
try
Case lowercase(Ini.ReadString(SDatabase,KeyType,'PostGres')) of
'postgres' :
begin
DB := TPGIndexDB.Create(nil);
ConfigSQLDB(DB,Ini);
Result:=DB;
end;
'firebird' :
begin
DB := TFBIndexDB.Create(nil);
ConfigSQLDB(DB,Ini);
Result:=DB;
end;
'sqlite' :
begin
SDB := TSQLiteIndexDB.Create(nil);
ConfigSQLite(SDB,Ini);
Result:=SDB;
end;
'file' :
begin
FDB := TFileIndexDB.Create(nil);
ConfigFile(FDB,Ini);
Result:=FDB;
end;
else
Raise Exception.CreateFmt('Unknown database type: "%s" ',[Ini.ReadString(SDatabase,KeyType,'PostGres')]);
end;
finally
ini.Free;
end;
end;
Procedure TDocIndexerApplication.DoIndex(aDB : TCustomIndexDB);
var
Indexer: TFPIndexer; //indexes files
start: TDateTime;
Dn,n: int64;
endtime: TDateTime;
D : String;
begin
//SetHeapTraceOutput('heap.trc');
start := Now;
Indexer := TFPIndexer.Create(Nil);
try
Indexer.CodePage:=FCodePage;
Indexer.Database:=aDB;
//setup parameters for indexing
Indexer.FileMask := '*.pas;*.html;readme.txt'; //semicolon separated list
Indexer.SearchRecursive := True;
Indexer.DetectLanguage := False;
if (FIgnoreList<>'') then
IgnoreListManager.LoadIgnoreWordsFromFile(FLanguage,FIgnoreList);
indexer.Language:=FLanguage;
Indexer.UseIgnoreList:=true;
Indexer.CommitFiles:=FCommitFiles;
Indexer.StripPath:=FStripPath;
Indexer.OnProgress:=@IndexLog;
N:=0;
DN:=0;
For D in FDirs do
begin
inc(DN);
IndexLog(Self,-1,-1,Format('Treating directory %d of %d: %s',[DN,Length(FDirs),D]));
Indexer.SearchPath:=D;
//execute the search
N := N+Indexer.Execute(False);
end;
endtime := Now;
if N <> 0 then
WriteLog('Endexing succesful')
else
WriteLog('Error indexing or no words found...');
WriteLog(Format('Done, indexed %d words in %d directories in %d sec.', [N,Length(FDirs),SecondsBetween(endtime,start)]));
finally
FreeAndNil(Indexer);
end;
end;
Procedure TDocIndexerApplication.Usage(Const Msg : String);
begin
If (Msg<>'') then
Writeln(Msg);
ExitCode:=Ord(Msg<>'')
end;
Function TDocIndexerApplication.ParseOptions : Boolean;
Var
Enc : String;
begin
Result:=True;
FConfig:=GetOptionValue('c','config');
If (FConfig='') then
begin
Usage('Need database connection configuration file');
Exit(False);
end;
FDirs:=GetOptionValues('d','directory');
if (Length(FDirs)=0) then
begin
SetLength(FDirs,1);
FDirs[0]:='.';
end;
FCreateDB:=HasOption('r','createdb');
FEmptyDB:=(Not FCreateDB) and HasOption('e','cleardb');
FLogSQL:=HasOption('q','querylog');
FCommitFiles:=HasOption('m','commit-files');
FLanguage:=GetOptionValue('l','language');
if FLanguage='' then
FLanguage:='english';
FIgnoreList:=GetOptionValue('i','ignore');
Enc:=getOptionValue('p','codepage');
FStripPath:=GetOptionValue('s','strip');
if Enc='' then
FCodePage:=CP_UTF8
else
begin
FCodePage := CodePageNameToCodePage(Enc);
if (FCodePage = $FFFF) then
begin
Usage('Invalid or unsupported encoding: '+Enc);
Exit(False);
end;
end;
end;
procedure TDocIndexerApplication.DoRun;
Var
S : String;
DB : TCustomIndexDB;
begin
Terminate;
S:=Checkoptions('hd:reqmc:l:i:p:s:',['help','directory','createdb','cleardb','querylog','commit-files','config','language','ignore-list','codepage','strip']);
if (S<>'') or HasOption('h','help') then
begin
Usage(S);
exit;
end;
If not ParseOptions then
exit;
DB:=SetupDB;
try
If FCreateDB then
DB.CreateDB
else
begin
DB.Connect;
if FEmptyDB then
ClearDB(DB);
end;
DoIndex(DB);
finally
DB.Free;
end;
end;
constructor TDocIndexerApplication.Create(aOwner: TComponent);
begin
inherited Create(aOwner);
StopOnException:=True;
FCodePage:=CP_UTF8;
end;
procedure TDocIndexerApplication.WriteLog(const Msg: String);
begin
Writeln(Msg);
end;
procedure TDocIndexerApplication.WriteLog(const Fmt: String; const Args: array of const);
begin
WriteLog(Format(Fmt,Args));
end;
procedure TDocIndexerApplication.IndexLog(Sender: TObject; const ACurrent, ACount: Integer; const AURL: UTF8String);
begin
if ACurrent=-1 then
WriteLog(AURL)
else
WriteLog('%5.2f%% [%d/%d] : %s',[(ACurrent/ACount*100),ACurrent,ACount,AURL]);
end;
procedure TDocIndexerApplication.DBHook(Sender: TSQLConnection; EventType: TDBEventType; const Msg: String);
Var
S : String;
begin
Str(EventType,S);
WriteLog('SQL [%s] : %s',[S,Msg]);
end;
begin
with TDocIndexerApplication.Create(Nil) do
try
Initialize;
Run;
finally
Free;
end;
end.