mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-05-09 15:52:35 +02:00
972 lines
30 KiB
ObjectPascal
972 lines
30 KiB
ObjectPascal
{ Base classes for converters from wiki to something else.
|
|
|
|
Copyright (C) 2012 Mattias Gaertner mattias@freepascal.org
|
|
|
|
This source is free software; you can redistribute it and/or modify it under
|
|
the terms of the GNU General Public License as published by the Free
|
|
Software Foundation; either version 2 of the License, or (at your option)
|
|
any later version.
|
|
|
|
This code is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
|
details.
|
|
|
|
A copy of the GNU General Public License is available on the World Wide Web
|
|
at <http://www.gnu.org/copyleft/gpl.html>. You can also obtain it by writing
|
|
to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
|
|
Boston, MA 02110-1335, USA.
|
|
|
|
}
|
|
unit WikiFormat;
|
|
|
|
{$mode objfpc}{$H+}
|
|
|
|
{off $DEFINE VerboseWikiFileCode}
|
|
|
|
interface
|
|
|
|
uses
|
|
Classes, SysUtils, Laz_AVL_Tree,
|
|
// LazUtils
|
|
LazFileUtils, laz2_XMLRead, laz2_DOM, LazLoggerBase, LazUTF8, AvgLvlTree,
|
|
// Codetools
|
|
BasicCodeTools, KeywordFuncLists,
|
|
// LazWiki
|
|
WikiParser, WikiStrConsts;
|
|
|
|
type
|
|
TWiki2FormatConverter = class;
|
|
|
|
{ TW2FormatPage }
|
|
|
|
TW2FormatPage = class
|
|
public
|
|
Converter: TWiki2FormatConverter;
|
|
CategoryList: TStringList;
|
|
WikiFilename: string;
|
|
WikiErrorMsg: string;
|
|
WikiDoc: TXMLDocument;
|
|
WikiPage: TWikiPage;
|
|
WikiDocumentName: string; // the path in the Wiki
|
|
WikiLanguage: string;
|
|
constructor Create(TheConverter: TWiki2FormatConverter); virtual;
|
|
destructor Destroy; override;
|
|
procedure ParseWikiDoc(KeepWikiDoc: boolean);
|
|
procedure ClearConversion; virtual;
|
|
procedure ClearPageConnections; virtual;
|
|
end;
|
|
TW2FormatPageClass = class of TW2FormatPage;
|
|
|
|
{ TWiki2FormatConverter }
|
|
|
|
TWiki2FormatConverter = class
|
|
private
|
|
FCodeTags: TKeyWordFunctionList;
|
|
FNoWarnBaseURLs: TStringToStringTree;
|
|
FOnLog: TWikiOnLog;
|
|
FQuiet: boolean;
|
|
FTitle: string;
|
|
FVerbose: boolean;
|
|
FWarnMissingPageLinks: boolean;
|
|
procedure SetTitle(AValue: string);
|
|
protected
|
|
FImagesDir: string;
|
|
FOutputDir: string;
|
|
fPages: TFPList; // list of TW2FormatPage
|
|
fPagesSortFilename: TAvlTree; // TW2FormatPage sorted for WikiFilename
|
|
fPagesSortDocumentName: TAvlTree; // TW2FormatPage sorted for WikiDocumentName
|
|
FPageClass: TW2FormatPageClass;
|
|
function GetPages(Index: integer): TW2FormatPage;
|
|
procedure SetOutputDir(AValue: string);
|
|
procedure SetImagesDir(AValue: string);
|
|
public
|
|
constructor Create; virtual;
|
|
destructor Destroy; override;
|
|
procedure Clear; virtual;
|
|
function GetPageWithFilename(Filename: string): TW2FormatPage; virtual;
|
|
function GetPageWithDocumentName(DocumentName: string): TW2FormatPage; virtual;
|
|
function AddWikiPage(Filename: string; ParseNow: boolean = true): TW2FormatPage;
|
|
procedure Convert; virtual;
|
|
procedure Log(Msg: string);
|
|
property OnLog: TWikiOnLog read FOnLog write FOnLog;
|
|
function Count: integer;
|
|
property Pages[Index: integer]: TW2FormatPage read GetPages; default;
|
|
property PageClass: TW2FormatPageClass read FPageClass;
|
|
property OutputDir: string read FOutputDir write SetOutputDir;
|
|
property ImagesDir: string read FImagesDir write SetImagesDir;
|
|
property Title: string read FTitle write SetTitle;
|
|
property WarnMissingPageLinks: boolean read FWarnMissingPageLinks write FWarnMissingPageLinks; // warn if an internal link links to non existing page
|
|
property NoWarnBaseURLs: TStringToStringTree read FNoWarnBaseURLs;
|
|
property CodeTags: TKeyWordFunctionList read FCodeTags write FCodeTags;
|
|
function CollectAllLangCodes(Delimiter: char = ','): string;
|
|
procedure GetPageTranslations(DocumentName: string; out LangToPage: TStringToPointerTree);
|
|
property Verbose: boolean read FVerbose write FVerbose;
|
|
property Quiet: boolean read FQuiet write FQuiet;
|
|
end;
|
|
|
|
function WikiPageToFilename(DocumentName: string; IsInternalLink, AppendCaseID: boolean): string;
|
|
function WikiFilenameToPage(Filename: string): string;
|
|
function WikiImageToFilename(Image: string; IsInternalLink, InsertCaseID: boolean;
|
|
KeepScheme: boolean = false): string;
|
|
function WikiHeaderToLink(Header: string): string;
|
|
function WikiCreateCommonCodeTagList(AddLazWikiLangs: boolean): TKeyWordFunctionList;
|
|
|
|
function UTF8ToWikiFileCode(AnUTF8: string): string;
|
|
function WikiFileCodeToUTF8(aFileCode: string): string;
|
|
procedure TestWikiPageToFilename;
|
|
|
|
// language
|
|
function GetWikiPageLanguage(const DocumentName: string): string;
|
|
function GetWikiPageLanguageDelimiterPos(const DocumentName: string): integer;
|
|
function ChompWikiPageLanguage(const DocumentName: string): string;
|
|
function WikiPageHasLanguage(const DocumentName, Languages: string): boolean;
|
|
function WikiLangInLanguageFilter(const Lang, Languages: string): boolean;
|
|
function WikiLangCodeToCaption(ID: string): string;
|
|
function WikiLangCaptionToCode(Caption: string): string;
|
|
|
|
function ComparePagesWithFilenames(Page1, Page2: Pointer): integer;
|
|
function CompareFilenameWithPage(Filename, Page: Pointer): integer;
|
|
function ComparePagesWithDocumentNames(Page1, Page2: Pointer): integer;
|
|
function CompareDocumentNameWithPage(DocumentName, Page: Pointer): integer;
|
|
function CompareDocumentNameWithPageIgnoreLang(DocumentName, Page: Pointer): integer;
|
|
|
|
implementation
|
|
|
|
{ TWiki2FormatConverter }
|
|
|
|
procedure TWiki2FormatConverter.SetImagesDir(AValue: string);
|
|
var
|
|
NewValue: String;
|
|
begin
|
|
NewValue:=AppendPathDelim(TrimFilename(AValue));
|
|
if FImagesDir=NewValue then Exit;
|
|
FImagesDir:=NewValue;
|
|
end;
|
|
|
|
procedure TWiki2FormatConverter.SetTitle(AValue: string);
|
|
begin
|
|
if FTitle=AValue then Exit;
|
|
FTitle:=AValue;
|
|
end;
|
|
|
|
function TWiki2FormatConverter.GetPages(Index: integer): TW2FormatPage;
|
|
begin
|
|
Result:=TW2FormatPage(fPages[Index]);
|
|
end;
|
|
|
|
procedure TWiki2FormatConverter.SetOutputDir(AValue: string);
|
|
var
|
|
NewValue: String;
|
|
begin
|
|
NewValue:=TrimAndExpandDirectory(AValue);
|
|
if FOutputDir=NewValue then Exit;
|
|
FOutputDir:=NewValue;
|
|
end;
|
|
|
|
constructor TWiki2FormatConverter.Create;
|
|
begin
|
|
FPageClass:=TW2FormatPage;
|
|
fPages:=TFPList.Create;
|
|
fPagesSortFilename:=TAvlTree.Create(@ComparePagesWithFilenames);
|
|
fPagesSortDocumentName:=TAvlTree.Create(@ComparePagesWithDocumentNames);
|
|
FTitle:='FPC/Lazarus Wiki (offline, generated '+DatetoStr(Now)+')';
|
|
FImagesDir:='images';
|
|
FNoWarnBaseURLs:=TStringToStringTree.Create(true);
|
|
end;
|
|
|
|
destructor TWiki2FormatConverter.Destroy;
|
|
begin
|
|
Clear;
|
|
FreeAndNil(FNoWarnBaseURLs);
|
|
FreeAndNil(fPagesSortFilename);
|
|
FreeAndNil(fPagesSortDocumentName);
|
|
FreeAndNil(fPages);
|
|
inherited Destroy;
|
|
end;
|
|
|
|
procedure TWiki2FormatConverter.Clear;
|
|
var
|
|
i: Integer;
|
|
begin
|
|
fPagesSortFilename.Clear;
|
|
fPagesSortDocumentName.Clear;
|
|
for i:=0 to Count-1 do
|
|
Pages[i].Free;
|
|
fPages.Clear;
|
|
FNoWarnBaseURLs.Clear;
|
|
end;
|
|
|
|
function TWiki2FormatConverter.GetPageWithFilename(Filename: string): TW2FormatPage;
|
|
var
|
|
Node: TAvlTreeNode;
|
|
begin
|
|
if Filename='' then exit(nil);
|
|
Node:=fPagesSortFilename.FindKey(Pointer(Filename),@CompareFilenameWithPage);
|
|
if Node<>nil then
|
|
Result:=TW2FormatPage(Node.Data)
|
|
else
|
|
Result:=nil;
|
|
end;
|
|
|
|
function TWiki2FormatConverter.GetPageWithDocumentName(DocumentName: string
|
|
): TW2FormatPage;
|
|
var
|
|
Node: TAvlTreeNode;
|
|
begin
|
|
if DocumentName='' then exit(nil);
|
|
Node:=fPagesSortDocumentName.FindKey(Pointer(DocumentName),@CompareDocumentNameWithPage);
|
|
if Node<>nil then
|
|
Result:=TW2FormatPage(Node.Data)
|
|
else
|
|
Result:=nil;
|
|
end;
|
|
|
|
function TWiki2FormatConverter.AddWikiPage(Filename: string; ParseNow: boolean
|
|
): TW2FormatPage;
|
|
begin
|
|
Result:=GetPageWithFilename(Filename);
|
|
if Result=nil then begin
|
|
Result:=PageClass.Create(Self);
|
|
Result.WikiFilename:=Filename;
|
|
Result.WikiDocumentName:=WikiFilenameToPage(Filename);
|
|
Result.WikiLanguage:=GetWikiPageLanguage(Result.WikiDocumentName);
|
|
fPages.Add(Result);
|
|
fPagesSortFilename.Add(Result);
|
|
fPagesSortDocumentName.Add(Result);
|
|
end;
|
|
if ParseNow then
|
|
Result.ParseWikiDoc(false);
|
|
end;
|
|
|
|
procedure TWiki2FormatConverter.Convert;
|
|
var
|
|
i: Integer;
|
|
Page: TW2FormatPage;
|
|
begin
|
|
if (OutputDir<>'') and (not DirPathExists(OutputDir)) then
|
|
raise Exception.Create('fpdoc output directory not found: "'+OutputDir+'"');
|
|
|
|
// load wiki pages
|
|
for i:=0 to Count-1 do begin
|
|
Page:=Pages[i];
|
|
Page.WikiPage.LanguageTags:=CodeTags;
|
|
Page.ParseWikiDoc(false);
|
|
end;
|
|
end;
|
|
|
|
procedure TWiki2FormatConverter.Log(Msg: string);
|
|
begin
|
|
if Assigned(OnLog) then
|
|
OnLog(Msg)
|
|
else
|
|
debugln(Msg);
|
|
end;
|
|
|
|
function TWiki2FormatConverter.Count: integer;
|
|
begin
|
|
Result:=fPages.Count;
|
|
end;
|
|
|
|
function TWiki2FormatConverter.CollectAllLangCodes(Delimiter: char): string;
|
|
var
|
|
i: Integer;
|
|
Page: TW2FormatPage;
|
|
Lang: String;
|
|
Langs: TStringToStringTree;
|
|
begin
|
|
Result:='';
|
|
Langs:=TStringToStringTree.Create(false);
|
|
try
|
|
for i:=0 to Count-1 do begin
|
|
Page:=Pages[i];
|
|
Lang:=Page.WikiLanguage;
|
|
if Lang='' then continue;
|
|
if Langs.Contains(Lang) then continue;
|
|
Langs[Lang]:='1';
|
|
Result+=Delimiter+Lang;
|
|
end;
|
|
finally
|
|
Langs.Free;
|
|
end;
|
|
end;
|
|
|
|
procedure TWiki2FormatConverter.GetPageTranslations(DocumentName: string; out
|
|
LangToPage: TStringToPointerTree);
|
|
var
|
|
AVLNode: TAvlTreeNode;
|
|
Page: TW2FormatPage;
|
|
begin
|
|
LangToPage:=TStringToPointerTree.Create(false);
|
|
DocumentName:=ChompWikiPageLanguage(DocumentName);
|
|
AVLNode:=fPagesSortDocumentName.FindLeftMostKey(Pointer(DocumentName),@CompareDocumentNameWithPageIgnoreLang);
|
|
while (AVLNode<>nil)
|
|
and (CompareDocumentNameWithPageIgnoreLang(Pointer(DocumentName),AVLNode.Data)=0) do begin
|
|
Page:=TW2FormatPage(AVLNode.Data);
|
|
LangToPage[GetWikiPageLanguage(Page.WikiDocumentName)]:=Page;
|
|
AVLNode:=fPagesSortDocumentName.FindSuccessor(AVLNode);
|
|
end;
|
|
end;
|
|
|
|
{ TW2FormatPage }
|
|
|
|
constructor TW2FormatPage.Create(TheConverter: TWiki2FormatConverter);
|
|
begin
|
|
Converter:=TheConverter;
|
|
CategoryList := TStringList.Create;
|
|
end;
|
|
|
|
destructor TW2FormatPage.Destroy;
|
|
begin
|
|
ClearPageConnections;
|
|
ClearConversion;
|
|
FreeAndNil(CategoryList);
|
|
FreeAndNil(WikiDoc);
|
|
FreeAndNil(WikiPage);
|
|
inherited Destroy;
|
|
end;
|
|
|
|
procedure TW2FormatPage.ParseWikiDoc(KeepWikiDoc: boolean);
|
|
begin
|
|
try
|
|
if WikiDoc=nil then begin
|
|
try
|
|
ReadXMLFile(WikiDoc,WikiFilename,[]);
|
|
except
|
|
on E: Exception do
|
|
WikiErrorMsg:=E.Message;
|
|
end;
|
|
end;
|
|
if WikiErrorMsg<>'' then
|
|
raise Exception.Create(WikiFilename+': '+WikiErrorMsg);
|
|
if WikiPage=nil then begin
|
|
WikiPage:=TWikiPage.Create;
|
|
try
|
|
if Converter<>nil then
|
|
WikiPage.OnLog:=Converter.OnLog;
|
|
WikiPage.LoadFromDoc(WikiDoc);
|
|
except
|
|
on E: Exception do
|
|
WikiErrorMsg:=E.Message;
|
|
end;
|
|
end;
|
|
if WikiErrorMsg<>'' then
|
|
raise Exception.Create(WikiFilename+': '+WikiErrorMsg);
|
|
finally
|
|
if not KeepWikiDoc then
|
|
FreeAndNil(WikiDoc);
|
|
end;
|
|
end;
|
|
|
|
procedure TW2FormatPage.ClearConversion;
|
|
begin
|
|
|
|
end;
|
|
|
|
procedure TW2FormatPage.ClearPageConnections;
|
|
begin
|
|
end;
|
|
|
|
function WikiPageToFilename(DocumentName: string; IsInternalLink, AppendCaseID: boolean): string;
|
|
{ IsInternalLink:
|
|
AppendCaseID=true: append a string encoding upper/lower case of letters
|
|
}
|
|
begin
|
|
Result:=DocumentName;
|
|
// optional: convert title to wiki link
|
|
if IsInternalLink then
|
|
Result:=WikiTitleToPage(Result);
|
|
// convert special characaters
|
|
Result:=UTF8ToWikiFileCode(Result);
|
|
// append case if
|
|
if AppendCaseID and (Result<>'') then
|
|
Result:=Result+'.'+WikiPageToCaseID(Result);
|
|
end;
|
|
|
|
function WikiFilenameToPage(Filename: string): string;
|
|
var
|
|
Ext: String;
|
|
p: Integer;
|
|
begin
|
|
Result:=ExtractFileName(Filename);
|
|
if Result='' then exit;
|
|
// delete .xml
|
|
Ext:=lowercase(ExtractFileExt(Result));
|
|
if Ext='.xml' then
|
|
Result:=LeftStr(Result,length(Result)-length(Ext));
|
|
// delete case id
|
|
p:=length(Result);
|
|
while (p>=1) and (Result[p] in ['0'..'9','a'..'z']) do
|
|
dec(p);
|
|
if (p>=1) and (Result[p]='.') then
|
|
Delete(Result,p,length(Result));
|
|
// convert special characaters
|
|
Result:=WikiFileCodeToUTF8(Result);
|
|
end;
|
|
|
|
function WikiImageToFilename(Image: string;
|
|
IsInternalLink, InsertCaseID: boolean;
|
|
KeepScheme: boolean): string;
|
|
var
|
|
id: String;
|
|
Ext: String;
|
|
p: SizeInt;
|
|
begin
|
|
Result:=Image;
|
|
if not KeepScheme then begin
|
|
// delete 'Image:'
|
|
p:=Pos(':',Result);
|
|
if p>0 then
|
|
Delete(Result,1,p);
|
|
end;
|
|
if IsInternalLink then
|
|
Result:=WikiTitleToPage(Result);
|
|
Ext:=ExtractFileExt(Result);
|
|
// encode file name without extension
|
|
Result:=WikiPageToFilename(copy(Result,1,Length(Result)-length(Ext)),false,false);
|
|
// encode extension
|
|
Ext:=LowerCase(WikiPageToFilename(copy(Ext,2,length(Ext)),false,false));
|
|
id:='';
|
|
if InsertCaseID then
|
|
id:='.'+WikiPageToCaseID(Result+Ext); // Note: compute case id with extension
|
|
Result:=Result+id+'.'+Ext;
|
|
end;
|
|
|
|
function WikiHeaderToLink(Header: string): string;
|
|
var
|
|
i: Integer;
|
|
s: string;
|
|
begin
|
|
Result:=UTF8Trim(Header);
|
|
i:=1;
|
|
while i<=length(Result) do begin
|
|
s:=Result[i];
|
|
case s[1] of
|
|
'-','_',':','.','0'..'9','a'..'z','A'..'Z',#128..#255: ; // keep
|
|
' ': s:='_';
|
|
'+': s:=''; // delete
|
|
else s:='.'+HexStr(ord(s[1]),2); // non-literal
|
|
end;
|
|
if s<>Result[i] then
|
|
ReplaceSubstring(Result,i,1,s);
|
|
inc(i,length(s));
|
|
end;
|
|
end;
|
|
|
|
function WikiCreateCommonCodeTagList(AddLazWikiLangs: boolean): TKeyWordFunctionList;
|
|
begin
|
|
Result:=TKeyWordFunctionList.Create('LanguageTags');
|
|
with Result do begin
|
|
Add('code',@AllwaysTrue);
|
|
Add('source',@AllwaysTrue);
|
|
Add('syntaxhighlight',@AllwaysTrue);
|
|
Add('pascal',@AllwaysTrue);
|
|
Add('delphi',@AllwaysTrue);
|
|
if AddLazWikiLangs then begin
|
|
Add('bash',@AllwaysTrue);
|
|
Add('java',@AllwaysTrue);
|
|
Add('javascript',@AllwaysTrue);
|
|
Add('xml',@AllwaysTrue);
|
|
Add('perl',@AllwaysTrue);
|
|
Add('python',@AllwaysTrue);
|
|
Add('sql',@AllwaysTrue);
|
|
Add('objc',@AllwaysTrue);
|
|
end;
|
|
end;
|
|
Result.Sort; // init now, so that multiple threads can use it
|
|
end;
|
|
|
|
const
|
|
WFCAllowedChars = ['a'..'z','A'..'Z','0'..'9',',','!','#','%','(',')','-','_',' '];
|
|
function UTF8ToWikiFileCode(AnUTF8: string): string;
|
|
{ Keep a..z, A..Z, 0..9, ,!#-%()-_
|
|
Replace + with +-
|
|
Replace = with =-
|
|
Replace single invalid byte with =HexHex
|
|
Replace sequences of invalid bytes as +base64-
|
|
}
|
|
const
|
|
Base64Chars: array[0..63] of char =
|
|
'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789,_';
|
|
var
|
|
p: PChar;
|
|
CharLen: integer;
|
|
BufBits: byte;
|
|
BufBitLen: integer;
|
|
c: Char;
|
|
SrcBits: Integer;
|
|
i: Integer;
|
|
AtEnd: Boolean;
|
|
begin
|
|
{$ifdef VerboseWikiFileCode}
|
|
writeln('UTF8ToWikiFileCode START AnUTF8="',AnUTF8,'"');
|
|
{$endif}
|
|
Result:='';
|
|
if AnUTF8='' then exit;
|
|
p:=PChar(AnUTF8);
|
|
repeat
|
|
c:=p^;
|
|
if (c=#0) and (p-PChar(AnUTF8)=length(AnUTF8)) then break;
|
|
AtEnd:=(p[1]=#0) and (p+1-PChar(AnUTF8)=length(AnUTF8));
|
|
if c in WFCAllowedChars then begin
|
|
// common English character: keep
|
|
{$ifdef VerboseWikiFileCode}
|
|
writeln('UTF8ToWikiFileCode normal char "',c,'"');
|
|
{$endif}
|
|
Result+=c;
|
|
inc(p);
|
|
end else if (c in ['+','=']) and ((p[1] in WFCAllowedChars) or AtEnd) then begin
|
|
// replace '+' with '+-' and '=' with '=-'
|
|
{$ifdef VerboseWikiFileCode}
|
|
writeln('UTF8ToWikiFileCode single + or = "',c,'"');
|
|
{$endif}
|
|
Result+=c+'-';
|
|
inc(p);
|
|
end else if (p[1] in WFCAllowedChars) or AtEnd then begin
|
|
// replace single special byte with =HexHex
|
|
{$ifdef VerboseWikiFileCode}
|
|
writeln('UTF8ToWikiFileCode single special char "',HexStr(ord(c),2),'"');
|
|
{$endif}
|
|
Result+='='+HexStr(ord(c),2);
|
|
inc(p);
|
|
end else begin
|
|
// special characters -> encode base64
|
|
// start marker: '+'
|
|
Result+='+';
|
|
// encode as base64
|
|
BufBits:=0;
|
|
BufBitLen:=0;
|
|
repeat
|
|
if (p^=#0) and (p-PChar(AnUTF8)=length(AnUTF8)) then
|
|
break; // end of string
|
|
if (p^ in WFCAllowedChars) and (p[1] in WFCAllowedChars)
|
|
and (p[2] in WFCAllowedChars) then
|
|
break; // the next three are normal characters -> stop encoding as base64
|
|
CharLen:=UTF8CodepointSize(p);
|
|
{$ifdef VerboseWikiFileCode}
|
|
writeln('UTF8ToWikiFileCode sequence UTF8CharLen=',CharLen);
|
|
{$endif}
|
|
for i:=1 to CharLen do begin
|
|
SrcBits:=ord(p^);
|
|
BufBits:=BufBits or (SrcBits shr (2+BufBitLen));
|
|
{$ifdef VerboseWikiFileCode}
|
|
writeln('UTF8ToWikiFileCode sequence 6bitA Byte=',i,' SrcBits=',binstr(SrcBits,8),' BufBits=',binstr(BufBits,6),' BufBitLen=',6);
|
|
{$endif}
|
|
Result+=Base64Chars[BufBits];
|
|
BufBits:=(SrcBits shl (4-BufBitLen)) and %111111;
|
|
BufBitLen:=2+BufBitLen;
|
|
if BufBitLen=6 then begin
|
|
{$ifdef VerboseWikiFileCode}
|
|
writeln('UTF8ToWikiFileCode sequence 6bitB Byte=',i,' SrcBits=',binstr(SrcBits,8),' BufBits=',binstr(BufBits,6),' BufBitLen=',BufBitLen);
|
|
{$endif}
|
|
Result+=Base64Chars[BufBits];
|
|
BufBitLen:=0;
|
|
BufBits:=0;
|
|
end else begin
|
|
{$ifdef VerboseWikiFileCode}
|
|
writeln('UTF8ToWikiFileCode sequence <6bit Byte=',i,' SrcBits=',binstr(SrcBits,8),' BufBits=',binstr(BufBits,6),' BufBitLen=',BufBitLen);
|
|
{$endif}
|
|
end;
|
|
inc(p);
|
|
end;
|
|
until false;
|
|
if BufBitLen>0 then begin
|
|
{$ifdef VerboseWikiFileCode}
|
|
writeln('UTF8ToWikiFileCode sequence Paddi Byte=',i,' SrcBits=',binstr(SrcBits,8),' BufBits=',binstr(BufBits,6),' BufBitLen=',BufBitLen);
|
|
{$endif}
|
|
Result+=Base64Chars[BufBits];
|
|
end;
|
|
// end marker: '-'
|
|
Result+='-';
|
|
end;
|
|
until false;
|
|
end;
|
|
|
|
function WikiFileCodeToUTF8(aFileCode: string): string;
|
|
var
|
|
p: PChar;
|
|
SrcBits: Integer;
|
|
BufBits: byte;
|
|
BufBitLen: Integer;
|
|
c: Char;
|
|
begin
|
|
{$ifdef VerboseWikiFileCode}
|
|
writeln('WikiFileCodeToUTF8 Code="',aFileCode,'"');
|
|
{$endif}
|
|
Result:='';
|
|
if aFileCode='' then exit;
|
|
p:=PChar(aFileCode);
|
|
repeat
|
|
c:=p^;
|
|
if (c=#0) and (p-PChar(aFileCode)=length(aFileCode)) then break;
|
|
if c='+' then begin
|
|
inc(p);
|
|
if p^='-' then begin
|
|
{$ifdef VerboseWikiFileCode}
|
|
writeln('WikiFileCodeToUTF8 +- to +');
|
|
{$endif}
|
|
inc(p);
|
|
Result+='+'; // single '+'
|
|
end else begin
|
|
// decode base64, read til '-'
|
|
{$ifdef VerboseWikiFileCode}
|
|
writeln('WikiFileCodeToUTF8 base64 sequence');
|
|
{$endif}
|
|
BufBits:=0;
|
|
BufBitLen:=0;
|
|
repeat
|
|
c:=p^;
|
|
case c of
|
|
'-':
|
|
begin
|
|
inc(p);
|
|
break;
|
|
end;
|
|
'A'..'Z': SrcBits:=ord(c)-ord('A');
|
|
'a'..'z': SrcBits:=ord(c)-ord('a')+26;
|
|
'0'..'9': SrcBits:=ord(c)-ord('0')+52;
|
|
',': SrcBits:=62;
|
|
'_': SrcBits:=63;
|
|
else
|
|
raise Exception.Create('invalid wiki file code: invalid base64 character');
|
|
end;
|
|
{$ifdef VerboseWikiFileCode}
|
|
writeln('WikiFileCodeToUTF8 SrcBits=',binstr(SrcBits,6));
|
|
{$endif}
|
|
if BufBitLen=0 then begin
|
|
BufBits:=BufBits or (SrcBits shl 2);
|
|
BufBitLen:=6;
|
|
{$ifdef VerboseWikiFileCode}
|
|
writeln('WikiFileCodeToUTF8 new byte BufBits=',binstr(BufBits,8),' BufBitLen=',BufBitLen);
|
|
{$endif}
|
|
end else begin
|
|
BufBits:=BufBits or (SrcBits shr (BufBitLen-2));
|
|
{$ifdef VerboseWikiFileCode}
|
|
writeln('WikiFileCodeToUTF8 byte complete BufBits=',binstr(BufBits,8),' BufBitLen=',8);
|
|
{$endif}
|
|
Result+=chr(BufBits);
|
|
BufBitLen-=2;
|
|
BufBits:=(SrcBits shl (8-BufBitLen)) and $FF;
|
|
{$ifdef VerboseWikiFileCode}
|
|
writeln('WikiFileCodeToUTF8 rest byte BufBits=',binstr(BufBits,8),' BufBitLen=',BufBitLen);
|
|
{$endif}
|
|
end;
|
|
inc(p);
|
|
until false;
|
|
// Note: BufBitLen can be >0 (the last byte contains padding bits)
|
|
{$ifdef VerboseWikiFileCode}
|
|
writeln('WikiFileCodeToUTF8 D BufBits=',binstr(BufBits,8),' BufBitLen=',BufBitLen);
|
|
{$endif}
|
|
if (BufBits shr (8-BufBitLen))>0 then
|
|
raise Exception.Create('invalid wiki file code: padding bits not empty');
|
|
end;
|
|
end else if c='=' then begin
|
|
inc(p);
|
|
if p^='-' then begin
|
|
{$ifdef VerboseWikiFileCode}
|
|
writeln('WikiFileCodeToUTF8 =- to =');
|
|
{$endif}
|
|
Result+='=';
|
|
inc(p);
|
|
end else begin
|
|
// one byte as hex code
|
|
{$ifdef VerboseWikiFileCode}
|
|
writeln('WikiFileCodeToUTF8 =hex ',p^,p[1]);
|
|
{$endif}
|
|
SrcBits:=0;
|
|
case p^ of
|
|
'0'..'9': SrcBits:=ord(p^)-ord('0');
|
|
'A'..'F': SrcBits:=ord(p^)-ord('A')+10;
|
|
'a'..'f': SrcBits:=ord(p^)-ord('a')+10;
|
|
else
|
|
raise Exception.Create('invalid wiki file code: invalid hex code');
|
|
end;
|
|
inc(p);
|
|
SrcBits:=SrcBits*16;
|
|
case p^ of
|
|
'0'..'9': SrcBits+=ord(p^)-ord('0');
|
|
'A'..'F': SrcBits+=ord(p^)-ord('A')+10;
|
|
'a'..'f': SrcBits+=ord(p^)-ord('a')+10;
|
|
else
|
|
raise Exception.Create('invalid wiki file code: invalid hex code');
|
|
end;
|
|
inc(p);
|
|
{$ifdef VerboseWikiFileCode}
|
|
writeln('WikiFileCodeToUTF8 =hex byte=',SrcBits);
|
|
{$endif}
|
|
Result+=chr(SrcBits);
|
|
end;
|
|
end else if c in WFCAllowedChars then begin
|
|
// normal char
|
|
{$ifdef VerboseWikiFileCode}
|
|
writeln('WikiFileCodeToUTF8 normal char "',c,'"');
|
|
{$endif}
|
|
Result+=c;
|
|
inc(p);
|
|
end else
|
|
raise Exception.Create('invalid wiki file code: invalid character');
|
|
until false;
|
|
if FindInvalidUTF8Codepoint(PChar(Result),length(Result))>=0 then
|
|
raise Exception.Create('invalid wiki file code: result is not UTF-8');
|
|
end;
|
|
|
|
procedure TestWikiPageToFilename;
|
|
|
|
procedure t(PageName: string);
|
|
var
|
|
Filename: String;
|
|
NewPageName: String;
|
|
step: integer;
|
|
begin
|
|
step:=0;
|
|
try
|
|
Filename:=WikiPageToFilename(PageName,false,true);
|
|
NewPageName:=WikiFilenameToPage(Filename);
|
|
inc(step);
|
|
if PageName=NewPageName then
|
|
inc(step);
|
|
finally
|
|
if step<2 then begin
|
|
writeln('TestPageToFilename failed:');
|
|
writeln(' PageName ="',PageName,'"');
|
|
writeln(' NewPageName="',NewPageName,'"');
|
|
writeln(' Filename ="',Filename,'"');
|
|
end;
|
|
if step=0 then
|
|
raise Exception.Create('TestPageToFilename failed');
|
|
end;
|
|
end;
|
|
|
|
begin
|
|
t('');
|
|
t('a');
|
|
t('A');
|
|
t('A1');
|
|
t('3');
|
|
t('/');
|
|
t('A/B');
|
|
t('A+B');
|
|
t('A-B');
|
|
t('A=B');
|
|
t('A+=B');
|
|
t('A+-B');
|
|
t('A+');
|
|
t('A=');
|
|
t('A*');
|
|
t('A*$');
|
|
t('A*$%');
|
|
t('A*$*$');
|
|
t('A*$*$*');
|
|
t('A*$*$*$');
|
|
end;
|
|
|
|
function GetWikiPageLanguage(const DocumentName: string): string;
|
|
var
|
|
p: Integer;
|
|
begin
|
|
p:=GetWikiPageLanguageDelimiterPos(DocumentName);
|
|
if p>0 then
|
|
Result:=copy(DocumentName,p+1,length(DocumentName))
|
|
else
|
|
Result:='';
|
|
end;
|
|
|
|
function GetWikiPageLanguageDelimiterPos(const DocumentName: string): integer;
|
|
var
|
|
l: Integer;
|
|
p: PChar;
|
|
begin
|
|
Result:=0;
|
|
l:=length(DocumentName);
|
|
if l=0 then exit;
|
|
if (l>3) then begin
|
|
p:=PChar(DocumentName)+l-3;
|
|
if (p^='/')
|
|
and (p[1] in ['a'..'z']) and (p[2] in ['a'..'z']) then
|
|
// short form: /de
|
|
exit(l-2);
|
|
end;
|
|
if (l>6) then begin
|
|
p:=PChar(DocumentName)+l-6;
|
|
if (p^='/')
|
|
and (p[1] in ['a'..'z']) and (p[2] in ['a'..'z'])
|
|
and (p[3]='_')
|
|
and (p[4] in ['A'..'Z']) and (p[5] in ['A'..'Z'])
|
|
then
|
|
// long form: /zh_TW
|
|
exit(l-5);
|
|
end;
|
|
end;
|
|
|
|
function ChompWikiPageLanguage(const DocumentName: string): string;
|
|
var
|
|
p: Integer;
|
|
begin
|
|
p:=GetWikiPageLanguageDelimiterPos(DocumentName);
|
|
if p>0 then
|
|
Result:=LeftStr(DocumentName,p-1)
|
|
else
|
|
Result:=DocumentName;
|
|
end;
|
|
|
|
function WikiPageHasLanguage(const DocumentName, Languages: string): boolean;
|
|
begin
|
|
Result:=WikiLangInLanguageFilter(GetWikiPageLanguage(DocumentName),Languages);
|
|
end;
|
|
|
|
function WikiLangInLanguageFilter(const Lang, Languages: string): boolean;
|
|
// * = fits any
|
|
// de = fits 'de' and original
|
|
// -,de = fits only 'de'
|
|
var
|
|
p: PChar;
|
|
StartPos: PChar;
|
|
begin
|
|
if (Languages='') then
|
|
exit(Lang='');
|
|
p:=PChar(Languages);
|
|
while p^<>#0 do begin
|
|
StartPos:=p;
|
|
while not (p^ in [#0,',']) do inc(p);
|
|
if p>StartPos then begin
|
|
if StartPos^='-' then begin
|
|
// not original language
|
|
if Lang='' then exit(false);
|
|
end else if StartPos^='*' then begin
|
|
// fit any
|
|
exit(true);
|
|
end else if (Lang<>'') and (CompareIdentifiers(StartPos,PChar(Lang))=0)
|
|
then begin
|
|
// fits specific
|
|
exit(true);
|
|
end;
|
|
end;
|
|
while p^=',' do inc(p);
|
|
end;
|
|
Result:=false;
|
|
end;
|
|
|
|
function WikiLangCodeToCaption(ID: string): string;
|
|
begin
|
|
if ID='' then Result:=wrsLanguageEnglishOriginal
|
|
else if ID='*' then Result:=wrsAll
|
|
else if CompareText(ID,'af')=0 then Result:=wrsLanguageAfrikaans
|
|
else if CompareText(ID,'ar')=0 then Result:=wrsLanguageArabic
|
|
else if CompareText(ID,'ca')=0 then Result:=wrsLanguageCatalan
|
|
else if CompareText(ID,'cs')=0 then Result:=wrsLanguageCzech
|
|
else if CompareText(ID,'de')=0 then Result:=wrsLanguageGerman
|
|
else if CompareText(ID,'en')=0 then Result:=wrsLanguageEnglish
|
|
else if CompareText(ID,'es')=0 then Result:=wrsLanguageSpanish
|
|
else if CompareText(ID,'fa')=0 then Result:=wrsLanguagePersian
|
|
else if CompareText(ID,'fi')=0 then Result:=wrsLanguageFinnish
|
|
else if CompareText(ID,'fr')=0 then Result:=wrsLanguageFrench
|
|
else if CompareText(ID,'he')=0 then Result:=wrsLanguageHebrew
|
|
else if CompareText(ID,'hu')=0 then Result:=wrsLanguageHungarian
|
|
else if CompareText(ID,'id')=0 then Result:=wrsLanguageIndonesian
|
|
else if CompareText(ID,'it')=0 then Result:=wrsLanguageItalian
|
|
else if CompareText(ID,'ja')=0 then Result:=wrsLanguageJapanese
|
|
else if CompareText(ID,'ko')=0 then Result:=wrsLanguageKorean
|
|
else if CompareText(ID,'lt')=0 then Result:=wrsLanguageLithuanian
|
|
else if CompareText(ID,'mk')=0 then Result:=wrsLanguageMacedonien
|
|
else if CompareText(ID,'nl')=0 then Result:=wrsLanguageDutch
|
|
else if CompareText(ID,'pl')=0 then Result:=wrsLanguagePolish
|
|
else if CompareText(ID,'pt')=0 then Result:=wrsLanguagePortuguese
|
|
else if CompareText(ID,'pt_BR')=0 then Result:=wrsLanguagePortugueseBr
|
|
else if CompareText(ID,'ro')=0 then Result:=wrsLanguageRomanian
|
|
else if CompareText(ID,'ru')=0 then Result:=wrsLanguageRussian
|
|
else if CompareText(ID,'sk')=0 then Result:=wrsLanguageSlovak
|
|
else if CompareText(ID,'sq')=0 then Result:=wrsLanguageAlbania
|
|
else if CompareText(ID,'tr')=0 then Result:=wrsLanguageTurkish
|
|
else if CompareText(ID,'uk')=0 then Result:=wrsLanguageUkrainian
|
|
else if CompareText(ID,'zh_CN')=0 then Result:=wrsLanguageChinese
|
|
else if CompareText(ID,'zh_TW')=0 then Result:=wrsLanguageChineseTaiwan
|
|
else Result:=ID;
|
|
end;
|
|
|
|
function WikiLangCaptionToCode(Caption: string): string;
|
|
begin
|
|
if Caption=wrsLanguageEnglishOriginal then Result:=''
|
|
else if Caption=wrsAll then Result:='*'
|
|
else if Caption=wrsLanguageAfrikaans then Result:='af'
|
|
else if Caption=wrsLanguageAlbania then Result:='sq'
|
|
else if Caption=wrsLanguageArabic then Result:='ar'
|
|
else if Caption=wrsLanguageCatalan then Result:='ca'
|
|
else if Caption=wrsLanguageChinese then Result:='zh_CN'
|
|
else if Caption=wrsLanguageChineseTaiwan then Result:='zh_TW'
|
|
else if Caption=wrsLanguageCzech then Result:='cs'
|
|
else if Caption=wrsLanguageDutch then Result:='nl'
|
|
else if Caption=wrsLanguageEnglish then Result:='en'
|
|
else if Caption=wrsLanguageFinnish then Result:='fi'
|
|
else if Caption=wrsLanguageFrench then Result:='fr'
|
|
else if Caption=wrsLanguageGerman then Result:='de'
|
|
else if Caption=wrsLanguageHebrew then Result:='he'
|
|
else if Caption=wrsLanguageHungarian then Result:='hu'
|
|
else if Caption=wrsLanguageIndonesian then Result:='id'
|
|
else if Caption=wrsLanguageItalian then Result:='it'
|
|
else if Caption=wrsLanguageJapanese then Result:='ja'
|
|
else if Caption=wrsLanguageKorean then Result:='ko'
|
|
else if Caption=wrsLanguageLithuanian then Result:='lt'
|
|
else if Caption=wrsLanguageMacedonien then Result:='mk'
|
|
else if Caption=wrsLanguagePersian then Result:='fa'
|
|
else if Caption=wrsLanguagePolish then Result:='pl'
|
|
else if Caption=wrsLanguagePortuguese then Result:='pt'
|
|
else if Caption=wrsLanguagePortugueseBr then Result:='pt_BR'
|
|
else if Caption=wrsLanguageRomanian then Result:='ro'
|
|
else if Caption=wrsLanguageRussian then Result:='ru'
|
|
else if Caption=wrsLanguageSlovak then Result:='sk'
|
|
else if Caption=wrsLanguageSpanish then Result:='es'
|
|
else if Caption=wrsLanguageTurkish then Result:='tk'
|
|
else if Caption=wrsLanguageUkrainian then Result:='uk'
|
|
else Result:=Caption;
|
|
end;
|
|
|
|
function ComparePagesWithFilenames(Page1, Page2: Pointer): integer;
|
|
var
|
|
p1: TW2FormatPage absolute Page1;
|
|
p2: TW2FormatPage absolute Page2;
|
|
begin
|
|
Result:=CompareFilenames(p1.WikiFilename,p2.WikiFilename);
|
|
end;
|
|
|
|
function CompareFilenameWithPage(Filename, Page: Pointer): integer;
|
|
var
|
|
p: TW2FormatPage absolute Page;
|
|
begin
|
|
Result:=CompareFilenames(AnsiString(Filename),p.WikiFilename);
|
|
end;
|
|
|
|
function ComparePagesWithDocumentNames(Page1, Page2: Pointer): integer;
|
|
var
|
|
p1: TW2FormatPage absolute Page1;
|
|
p2: TW2FormatPage absolute Page2;
|
|
begin
|
|
Result:=BasicCodeTools.CompareTextCT(p1.WikiDocumentName,p2.WikiDocumentName,true);
|
|
end;
|
|
|
|
function CompareDocumentNameWithPage(DocumentName, Page: Pointer): integer;
|
|
var
|
|
p: TW2FormatPage absolute Page;
|
|
begin
|
|
Result:=BasicCodeTools.CompareTextCT(AnsiString(DocumentName),p.WikiDocumentName,true);
|
|
end;
|
|
|
|
function CompareDocumentNameWithPageIgnoreLang(DocumentName, Page: Pointer
|
|
): integer;
|
|
var
|
|
p: TW2FormatPage absolute Page;
|
|
DocName: String;
|
|
l1: Integer;
|
|
l2: Integer;
|
|
begin
|
|
DocName:=AnsiString(DocumentName);
|
|
l1:=GetWikiPageLanguageDelimiterPos(DocName)-1;
|
|
if l1<0 then l1:=length(DocName);
|
|
l2:=GetWikiPageLanguageDelimiterPos(p.WikiDocumentName)-1;
|
|
if l2<0 then l2:=length(p.WikiDocumentName);
|
|
Result:=BasicCodeTools.CompareText(PChar(DocName),l1,
|
|
PChar(p.WikiDocumentName),l2,true);
|
|
end;
|
|
|
|
end.
|
|
|