mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-08-20 11:19:23 +02:00
wiki test: get lang
git-svn-id: trunk@35712 -
This commit is contained in:
parent
b02630ac58
commit
94c3791dc7
@ -90,6 +90,8 @@ function WikiFilenameToPage(Filename: string): string;
|
|||||||
function WikiImageToFilename(Image: string; IsInternalLink, InsertCaseID: boolean;
|
function WikiImageToFilename(Image: string; IsInternalLink, InsertCaseID: boolean;
|
||||||
KeepScheme: boolean = false): string;
|
KeepScheme: boolean = false): string;
|
||||||
function WikiCreateCommonLanguageList(AddLazWikiLangs: boolean): TKeyWordFunctionList;
|
function WikiCreateCommonLanguageList(AddLazWikiLangs: boolean): TKeyWordFunctionList;
|
||||||
|
function GetWikiPageLanguage(const Page: string): string;
|
||||||
|
function WikiPageHasLanguage(const Page, Languages: string): boolean;
|
||||||
|
|
||||||
implementation
|
implementation
|
||||||
|
|
||||||
@ -364,5 +366,45 @@ begin
|
|||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
function GetWikiPageLanguage(const Page: string): string;
|
||||||
|
begin
|
||||||
|
Result:=RightStr(Page,3);
|
||||||
|
if (Result='') or (Result[1]<>'/') then exit('');
|
||||||
|
Delete(Result,1,1);
|
||||||
|
end;
|
||||||
|
|
||||||
|
function WikiPageHasLanguage(const Page, Languages: string): boolean;
|
||||||
|
// * = fits any
|
||||||
|
// de = fits 'de' and original
|
||||||
|
// -,de = fits only 'de'
|
||||||
|
var
|
||||||
|
Lang: String;
|
||||||
|
p: PChar;
|
||||||
|
StartPos: PChar;
|
||||||
|
begin
|
||||||
|
Lang:=GetWikiPageLanguage(Page);
|
||||||
|
if (Languages='') then
|
||||||
|
exit(Lang='');
|
||||||
|
p:=PChar(Languages);
|
||||||
|
while p^<>#0 do begin
|
||||||
|
StartPos:=p;
|
||||||
|
while not (p^ in [#0,',']) do inc(p);
|
||||||
|
if p>StartPos then begin
|
||||||
|
if StartPos^='-' then begin
|
||||||
|
// not original language
|
||||||
|
if Lang='' then exit(false);
|
||||||
|
end else if StartPos^='*' then begin
|
||||||
|
// fit any
|
||||||
|
exit(true);
|
||||||
|
end else if (Lang<>'') and (CompareIdentifiers(StartPos,PChar(Lang))=0)
|
||||||
|
then begin
|
||||||
|
// fits specific
|
||||||
|
exit(true);
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
while p^=',' do inc(p);
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
@ -41,8 +41,10 @@ type
|
|||||||
public
|
public
|
||||||
Phrases: TStrings;
|
Phrases: TStrings;
|
||||||
LoPhrases: TStrings; // Phrases lowercase
|
LoPhrases: TStrings; // Phrases lowercase
|
||||||
constructor Create(const SearchText: string);
|
Languages: string; // comma separated list, '-' means not in the original, 'de' = german
|
||||||
|
constructor Create(const SearchText: string; const aLang: string = '');
|
||||||
destructor Destroy; override;
|
destructor Destroy; override;
|
||||||
|
function Equals(Obj: TObject): boolean; override;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
TWHTextNodeType = (
|
TWHTextNodeType = (
|
||||||
@ -75,35 +77,42 @@ type
|
|||||||
end;
|
end;
|
||||||
|
|
||||||
TWHFitsCategory = (
|
TWHFitsCategory = (
|
||||||
whfcPageTitle,
|
whfcNone,
|
||||||
whfcHeader,
|
whfcLink,
|
||||||
whfcText,
|
whfcText,
|
||||||
whfcLink
|
whfcHeader,
|
||||||
|
whfcPageTitle
|
||||||
);
|
);
|
||||||
TWHFitsCategories = set of TWHFitsCategory;
|
TWHFitsCategories = set of TWHFitsCategory;
|
||||||
|
|
||||||
TWHFitsStringFlag = (
|
TWHFitsStringFlag = (
|
||||||
whfsWordStart,
|
whfsPart,
|
||||||
whfsWholeWord,
|
whfsWholeWord
|
||||||
whfsWhole
|
|
||||||
);
|
);
|
||||||
TWHFitsStringFlags = set of TWHFitsStringFlag;
|
TWHFitsStringFlags = set of TWHFitsStringFlag;
|
||||||
|
|
||||||
TWHFitsString = record
|
TWHPhrasePageFit = record
|
||||||
Flags: TWHFitsStringFlags;
|
Category: TWHFitsCategory;
|
||||||
Count: integer;
|
Quality: TWHFitsStringFlag;
|
||||||
end;
|
end;
|
||||||
PWHFitsString = ^TWHFitsString;
|
PWHPhrasePageFit = ^TWHPhrasePageFit;
|
||||||
|
|
||||||
{ TW2HelpPage
|
TWHScore = single;
|
||||||
for future extensions and descendants }
|
TWHScoring = class
|
||||||
|
public
|
||||||
|
Phrases: array[TWHFitsCategory,TWHFitsStringFlag] of TWHScore;
|
||||||
|
end;
|
||||||
|
|
||||||
|
{ TW2HelpPage }
|
||||||
|
|
||||||
TW2HelpPage = class(TW2HTMLPage)
|
TW2HelpPage = class(TW2HTMLPage)
|
||||||
public
|
public
|
||||||
TextRoot: TWHTextNode;
|
TextRoot: TWHTextNode;
|
||||||
CurNode: TWHTextNode;
|
CurNode: TWHTextNode;
|
||||||
|
Score: single;
|
||||||
destructor Destroy; override;
|
destructor Destroy; override;
|
||||||
procedure Search(Query: TWikiHelpQuery);
|
function GetScore(Query: TWikiHelpQuery; Scoring: TWHScoring): TWHScore;
|
||||||
|
procedure GetFit(Query: TWikiHelpQuery; Fit: PWHPhrasePageFit);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
{ TWiki2HelpConverter }
|
{ TWiki2HelpConverter }
|
||||||
@ -127,7 +136,7 @@ type
|
|||||||
procedure LoadPages;
|
procedure LoadPages;
|
||||||
procedure ConvertInit; override;
|
procedure ConvertInit; override;
|
||||||
procedure ExtractAllTexts;
|
procedure ExtractAllTexts;
|
||||||
procedure Search(Query: TWikiHelpQuery);
|
procedure Search(Query: TWikiHelpQuery; Scoring: TWHScoring);
|
||||||
property Help: TWikiHelp read FHelp;
|
property Help: TWikiHelp read FHelp;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
@ -156,6 +165,7 @@ type
|
|||||||
FQuery: TWikiHelpQuery;
|
FQuery: TWikiHelpQuery;
|
||||||
FLoadComplete: boolean;
|
FLoadComplete: boolean;
|
||||||
FLoading: boolean;
|
FLoading: boolean;
|
||||||
|
FScoring: TWHScoring;
|
||||||
FXMLDirectory: string;
|
FXMLDirectory: string;
|
||||||
FCritSec: TRTLCriticalSection;
|
FCritSec: TRTLCriticalSection;
|
||||||
FScanThread: TWikiHelpThread;
|
FScanThread: TWikiHelpThread;
|
||||||
@ -178,10 +188,11 @@ type
|
|||||||
property LoadComplete: boolean read FLoadComplete;
|
property LoadComplete: boolean read FLoadComplete;
|
||||||
|
|
||||||
// search
|
// search
|
||||||
procedure Search(const Term: string);
|
procedure Search(const Term: string; const Languages: string = '');
|
||||||
procedure Search(aQuery: TWikiHelpQuery);
|
procedure Search(aQuery: TWikiHelpQuery);
|
||||||
procedure TestSearch;
|
procedure TestSearch;
|
||||||
property Query: TWikiHelpQuery read FQuery;
|
property Query: TWikiHelpQuery read FQuery;
|
||||||
|
property Scoring: TWHScoring read FScoring;
|
||||||
public
|
public
|
||||||
property XMLDirectory: string read FXMLDirectory write SetXMLDirectory; // directory where the wiki xml files are
|
property XMLDirectory: string read FXMLDirectory write SetXMLDirectory; // directory where the wiki xml files are
|
||||||
property ImagesDirectory: string read GetImagesDirectory write SetImagesDirectory; // directory where the wiki image files are
|
property ImagesDirectory: string read GetImagesDirectory write SetImagesDirectory; // directory where the wiki image files are
|
||||||
@ -192,18 +203,20 @@ type
|
|||||||
var
|
var
|
||||||
WikiHelp: TWikiHelp = nil;
|
WikiHelp: TWikiHelp = nil;
|
||||||
|
|
||||||
function SearchTextToPhrases(const Txt: string): TStringList;
|
function SearchTextToPhrases(Txt: string): TStringList;
|
||||||
|
|
||||||
implementation
|
implementation
|
||||||
|
|
||||||
function SearchTextToPhrases(const Txt: string): TStringList;
|
function SearchTextToPhrases(Txt: string): TStringList;
|
||||||
var
|
var
|
||||||
p: PChar;
|
p: PChar;
|
||||||
StartPos: PChar;
|
StartPos: PChar;
|
||||||
Phrase: String;
|
Phrase: String;
|
||||||
begin
|
begin
|
||||||
Result:=TStringList.Create;
|
Result:=TStringList.Create;
|
||||||
|
Txt:=UTF8Trim(Txt);
|
||||||
if Txt='' then exit;
|
if Txt='' then exit;
|
||||||
|
Result.Add(Txt);
|
||||||
p:=PChar(Txt);
|
p:=PChar(Txt);
|
||||||
Phrase:='';
|
Phrase:='';
|
||||||
while p^<>#0 do begin
|
while p^<>#0 do begin
|
||||||
@ -218,7 +231,8 @@ begin
|
|||||||
// space => end phrase
|
// space => end phrase
|
||||||
inc(p);
|
inc(p);
|
||||||
if Phrase<>'' then begin
|
if Phrase<>'' then begin
|
||||||
Result.Add(Phrase);
|
if Result.IndexOf(Phrase)<0 then
|
||||||
|
Result.Add(Phrase);
|
||||||
Phrase:='';
|
Phrase:='';
|
||||||
end;
|
end;
|
||||||
end else begin
|
end else begin
|
||||||
@ -228,16 +242,18 @@ begin
|
|||||||
Phrase:=Phrase+SubString(StartPos,p-StartPos);
|
Phrase:=Phrase+SubString(StartPos,p-StartPos);
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
if Phrase<>'' then
|
if (Phrase<>'') and (Result.IndexOf(Phrase)<0) then
|
||||||
Result.Add(Phrase);
|
Result.Add(Phrase);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
{ TWikiHelpQuery }
|
{ TWikiHelpQuery }
|
||||||
|
|
||||||
constructor TWikiHelpQuery.Create(const SearchText: string);
|
constructor TWikiHelpQuery.Create(const SearchText: string; const aLang: string
|
||||||
|
);
|
||||||
var
|
var
|
||||||
i: Integer;
|
i: Integer;
|
||||||
begin
|
begin
|
||||||
|
Languages:=aLang;
|
||||||
Phrases:=SearchTextToPhrases(SearchText);
|
Phrases:=SearchTextToPhrases(SearchText);
|
||||||
LoPhrases:=TStringList.Create;
|
LoPhrases:=TStringList.Create;
|
||||||
for i:=0 to Phrases.Count-1 do
|
for i:=0 to Phrases.Count-1 do
|
||||||
@ -250,6 +266,19 @@ begin
|
|||||||
inherited Destroy;
|
inherited Destroy;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
function TWikiHelpQuery.Equals(Obj: TObject): boolean;
|
||||||
|
var
|
||||||
|
Src: TWikiHelpQuery;
|
||||||
|
begin
|
||||||
|
Result:=inherited Equals(Obj);
|
||||||
|
if Result then exit;
|
||||||
|
Result:=false;
|
||||||
|
if not (Obj is TWikiHelpQuery) then exit;
|
||||||
|
Src:=TWikiHelpQuery(Obj);
|
||||||
|
if not Phrases.Equals(Src.Phrases) then exit;
|
||||||
|
// LoPhrases is computed from Phrases
|
||||||
|
end;
|
||||||
|
|
||||||
{ TW2HelpPage }
|
{ TW2HelpPage }
|
||||||
|
|
||||||
destructor TW2HelpPage.Destroy;
|
destructor TW2HelpPage.Destroy;
|
||||||
@ -258,13 +287,82 @@ begin
|
|||||||
inherited Destroy;
|
inherited Destroy;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
procedure TW2HelpPage.Search(Query: TWikiHelpQuery);
|
function TW2HelpPage.GetScore(Query: TWikiHelpQuery; Scoring: TWHScoring
|
||||||
|
): TWHScore;
|
||||||
|
var
|
||||||
|
PhrasesFit: PWHPhrasePageFit;
|
||||||
|
Size: Integer;
|
||||||
|
i: Integer;
|
||||||
|
Fit: PWHPhrasePageFit;
|
||||||
begin
|
begin
|
||||||
// check title
|
Result:=0;
|
||||||
//WikiPage.Title:=;
|
if (Query=nil) or (Query.LoPhrases.Count=0) then exit;
|
||||||
|
if not WikiPageHasLanguage(WikiDocumentName,Query.Languages) then begin
|
||||||
|
//debugln(['TW2HelpPage.GetScore lang does not fit ',WikiDocumentName,' "',GetWikiPageLanguage(WikiDocumentName),'" ',Query.Languages]);
|
||||||
|
exit;
|
||||||
|
end;
|
||||||
|
|
||||||
// check nodes
|
Size:=Query.LoPhrases.Count*SizeOf(TWHPhrasePageFit);
|
||||||
|
GetMem(PhrasesFit,Size);
|
||||||
|
try
|
||||||
|
FillByte(PhrasesFit^,Size,0);
|
||||||
|
GetFit(Query,PhrasesFit);
|
||||||
|
for i:=0 to Query.LoPhrases.Count-1 do begin
|
||||||
|
Fit:=@PhrasesFit[i];
|
||||||
|
Result+=Scoring.Phrases[Fit^.Category,Fit^.Quality];
|
||||||
|
end;
|
||||||
|
finally
|
||||||
|
FreeMem(PhrasesFit);
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
procedure TW2HelpPage.GetFit(Query: TWikiHelpQuery; Fit: PWHPhrasePageFit);
|
||||||
|
|
||||||
|
procedure CheckTxt(s: string; Category: TWHFitsCategory);
|
||||||
|
var
|
||||||
|
i: Integer;
|
||||||
|
Phrase: String;
|
||||||
|
FitsWholeWord: boolean;
|
||||||
|
FitsCount: SizeInt;
|
||||||
|
Quality: TWHFitsStringFlag;
|
||||||
|
begin
|
||||||
|
s:=UTF8LowerCase(s);
|
||||||
|
for i:=0 to Query.LoPhrases.Count-1 do begin
|
||||||
|
if Fit[i].Category>Category then continue;
|
||||||
|
if (Fit[i].Category=Category) and (Fit[i].Quality>=whfsWholeWord) then
|
||||||
|
continue;
|
||||||
|
Phrase:=Query.LoPhrases[i];
|
||||||
|
HasTxtWord(PChar(Phrase),PChar(s),FitsWholeWord,FitsCount);
|
||||||
|
if FitsCount<=0 then continue;
|
||||||
|
if FitsWholeWord then
|
||||||
|
Quality:=whfsWholeWord
|
||||||
|
else
|
||||||
|
Quality:=whfsPart;
|
||||||
|
Fit[i].Category:=Category;
|
||||||
|
Fit[i].Quality:=Quality;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
procedure Traverse(Node: TWHTextNode);
|
||||||
|
var
|
||||||
|
i: Integer;
|
||||||
|
Category: TWHFitsCategory;
|
||||||
|
begin
|
||||||
|
if Node=nil then exit;
|
||||||
|
case Node.Typ of
|
||||||
|
whnTxt: Category:=whfcText;
|
||||||
|
whnHeader: Category:=whfcHeader;
|
||||||
|
whnLink: Category:=whfcLink;
|
||||||
|
else exit;
|
||||||
|
end;
|
||||||
|
CheckTxt(Node.Txt,Category);
|
||||||
|
for i:=0 to Node.Count-1 do
|
||||||
|
Traverse(Node[i]);
|
||||||
|
end;
|
||||||
|
|
||||||
|
begin
|
||||||
|
CheckTxt(WikiPage.Title,whfcPageTitle);
|
||||||
|
Traverse(TextRoot);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
{ TWHTextNode }
|
{ TWHTextNode }
|
||||||
@ -500,14 +598,15 @@ begin
|
|||||||
ProcThreadPool.DoParallel(@ParallelExtractPageText,0,(Count-1) div PagesPerThread);
|
ProcThreadPool.DoParallel(@ParallelExtractPageText,0,(Count-1) div PagesPerThread);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
procedure TWiki2HelpConverter.Search(Query: TWikiHelpQuery);
|
procedure TWiki2HelpConverter.Search(Query: TWikiHelpQuery; Scoring: TWHScoring
|
||||||
|
);
|
||||||
var
|
var
|
||||||
i: Integer;
|
i: Integer;
|
||||||
Page: TW2HelpPage;
|
Page: TW2HelpPage;
|
||||||
begin
|
begin
|
||||||
for i:=0 to Count-1 do begin
|
for i:=0 to Count-1 do begin
|
||||||
Page:=TW2HelpPage(Pages[i]);
|
Page:=TW2HelpPage(Pages[i]);
|
||||||
Page.Search(Query);
|
Page.GetScore(Query,Scoring);
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
@ -698,6 +797,15 @@ begin
|
|||||||
FConverter:=TWiki2HelpConverter.Create;
|
FConverter:=TWiki2HelpConverter.Create;
|
||||||
FConverter.LanguageTags:=WikiCreateCommonLanguageList(true);
|
FConverter.LanguageTags:=WikiCreateCommonLanguageList(true);
|
||||||
FConverter.FHelp:=Self;
|
FConverter.FHelp:=Self;
|
||||||
|
FScoring:=TWHScoring.Create;
|
||||||
|
FScoring.Phrases[whfcPageTitle,whfsWholeWord]:=128;
|
||||||
|
FScoring.Phrases[whfcPageTitle,whfsPart]:=64;
|
||||||
|
FScoring.Phrases[whfcHeader,whfsWholeWord]:=32;
|
||||||
|
FScoring.Phrases[whfcHeader,whfsPart]:=16;
|
||||||
|
FScoring.Phrases[whfcText,whfsWholeWord]:=8;
|
||||||
|
FScoring.Phrases[whfcText,whfsPart]:=4;
|
||||||
|
FScoring.Phrases[whfcLink,whfsWholeWord]:=2;
|
||||||
|
FScoring.Phrases[whfcLink,whfsPart]:=1;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
destructor TWikiHelp.Destroy;
|
destructor TWikiHelp.Destroy;
|
||||||
@ -705,6 +813,7 @@ begin
|
|||||||
AbortLoading(true);
|
AbortLoading(true);
|
||||||
FConverter.LanguageTags.Free;
|
FConverter.LanguageTags.Free;
|
||||||
FreeAndNil(FConverter);
|
FreeAndNil(FConverter);
|
||||||
|
FreeAndNil(FScoring);
|
||||||
FreeAndNil(FQuery);
|
FreeAndNil(FQuery);
|
||||||
inherited Destroy;
|
inherited Destroy;
|
||||||
DoneCriticalsection(FCritSec);
|
DoneCriticalsection(FCritSec);
|
||||||
@ -754,15 +863,20 @@ begin
|
|||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
procedure TWikiHelp.Search(const Term: string);
|
procedure TWikiHelp.Search(const Term: string; const Languages: string);
|
||||||
begin
|
begin
|
||||||
Search(TWikiHelpQuery.Create(Term));
|
Search(TWikiHelpQuery.Create(Term,Languages));
|
||||||
end;
|
end;
|
||||||
|
|
||||||
procedure TWikiHelp.Search(aQuery: TWikiHelpQuery);
|
procedure TWikiHelp.Search(aQuery: TWikiHelpQuery);
|
||||||
begin
|
begin
|
||||||
EnterCritSect;
|
EnterCritSect;
|
||||||
try
|
try
|
||||||
|
if (aQuery<>nil) and (FQuery<>nil) and (FQuery.Equals(aQuery)) then begin
|
||||||
|
// same query
|
||||||
|
FreeAndNil(aQuery);
|
||||||
|
exit;
|
||||||
|
end;
|
||||||
FreeAndNil(FQuery);
|
FreeAndNil(FQuery);
|
||||||
FQuery:=aQuery;
|
FQuery:=aQuery;
|
||||||
finally
|
finally
|
||||||
@ -772,9 +886,15 @@ begin
|
|||||||
end;
|
end;
|
||||||
|
|
||||||
procedure TWikiHelp.TestSearch;
|
procedure TWikiHelp.TestSearch;
|
||||||
|
var
|
||||||
|
StartTime: TDateTime;
|
||||||
|
EndTime: TDateTime;
|
||||||
begin
|
begin
|
||||||
debugln(['TWikiHelp.TestSearch ',Query.Phrases.Text]);
|
StartTime:=Now;
|
||||||
Converter.Search(Query);
|
debugln(['TWikiHelp.TestSearch START ',Query.Phrases.Text]);
|
||||||
|
Converter.Search(Query,Scoring);
|
||||||
|
EndTime:=Now;
|
||||||
|
debugln(['TWikiHelp.TestSearch END "',Query.Phrases.Text,'" ',dbgs(round(Abs(EndTime-StartTime)*86400000))+'msec']);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
end.
|
end.
|
||||||
|
Loading…
Reference in New Issue
Block a user