mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-12-04 02:37:09 +01:00
utils: add unicode utils from Inoussa (part of mantis #0022909)
git-svn-id: trunk@23748 -
This commit is contained in:
parent
364a874623
commit
ab8c293a36
17
.gitattributes
vendored
17
.gitattributes
vendored
@ -14520,3 +14520,20 @@ utils/tply/yaccsem.pas svneol=native#text/plain
|
||||
utils/tply/yacctabl.pas svneol=native#text/plain
|
||||
utils/tply/yylex.cod svneol=native#text/plain
|
||||
utils/tply/yyparse.cod svneol=native#text/plain
|
||||
utils/unicode/Makefile svneol=native#text/plain
|
||||
utils/unicode/Makefile.fpc svneol=native#text/plain
|
||||
utils/unicode/cldrhelper.pas svneol=native#text/pascal
|
||||
utils/unicode/cldrparser.lpi svneol=native#text/plain
|
||||
utils/unicode/cldrparser.lpr svneol=native#text/pascal
|
||||
utils/unicode/cldrtest.pas svneol=native#text/pascal
|
||||
utils/unicode/cldrxml.pas svneol=native#text/pascal
|
||||
utils/unicode/data/readme.txt svneol=native#text/plain
|
||||
utils/unicode/grbtree.pas svneol=native#text/pascal
|
||||
utils/unicode/helper.pas svneol=native#text/pascal
|
||||
utils/unicode/parse-collations.bat svneol=native#text/plain
|
||||
utils/unicode/trie.pas svneol=native#text/pascal
|
||||
utils/unicode/uca_test.pas svneol=native#text/pascal
|
||||
utils/unicode/unicodeset.pas svneol=native#text/pascal
|
||||
utils/unicode/unihelper.lpi svneol=native#text/plain
|
||||
utils/unicode/unihelper.lpr svneol=native#text/pascal
|
||||
utils/unicode/weight_derivation.inc svneol=native#text/pascal
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Don't edit, this file is generated by FPCMake Version 2.0.0 [2013/01/16]
|
||||
# Don't edit, this file is generated by FPCMake Version 2.0.0 [2013/03/08]
|
||||
#
|
||||
default: all
|
||||
MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux jvm-java jvm-android
|
||||
@ -268,6 +268,19 @@ ifdef CROSSCOMPILE
|
||||
ifndef DARWIN2DARWIN
|
||||
ifneq ($(CPU_TARGET),jvm)
|
||||
BINUTILSPREFIX=$(CPU_TARGET)-$(OS_TARGET)-
|
||||
ifeq ($(OS_TARGET),android)
|
||||
ifeq ($(CPU_TARGET),arm)
|
||||
BINUTILSPREFIX=arm-linux-androideabi-
|
||||
else
|
||||
ifeq ($(CPU_TARGET),i386)
|
||||
BINUTILSPREFIX=i686-linux-android-
|
||||
else
|
||||
ifeq ($(CPU_TARGET),mips)
|
||||
BINUTILSPREFIX=mipsel-linux-android-
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
@ -311,7 +324,7 @@ ifeq ($(FULL_TARGET),i386-go32v2)
|
||||
override TARGET_DIRS+=fppkg fpcm tply h2pas fprcp dxegen fpdoc fpcmkcfg pas2ut pas2fpm rmwait
|
||||
endif
|
||||
ifeq ($(FULL_TARGET),i386-win32)
|
||||
override TARGET_DIRS+=fppkg fpcm tply h2pas fprcp dxegen fpdoc fpcmkcfg pas2ut pas2fpm fpmc fpcres rmwait instantfpc importtl
|
||||
override TARGET_DIRS+=fppkg fpcm tply h2pas fprcp dxegen fpdoc fpcmkcfg pas2ut pas2fpm fpmc fpcres rmwait instantfpc importtl unicode
|
||||
endif
|
||||
ifeq ($(FULL_TARGET),i386-os2)
|
||||
override TARGET_DIRS+=fppkg fpcm tply h2pas fprcp dxegen fpdoc fpcmkcfg pas2ut pas2fpm fpmc fpcres rmwait
|
||||
@ -3379,6 +3392,7 @@ TARGET_DIRS_FPCRES=1
|
||||
TARGET_DIRS_RMWAIT=1
|
||||
TARGET_DIRS_INSTANTFPC=1
|
||||
TARGET_DIRS_IMPORTTL=1
|
||||
TARGET_DIRS_UNICODE=1
|
||||
endif
|
||||
ifeq ($(FULL_TARGET),i386-os2)
|
||||
TARGET_DIRS_FPPKG=1
|
||||
@ -4970,6 +4984,51 @@ importtl:
|
||||
$(MAKE) -C importtl all
|
||||
.PHONY: importtl_all importtl_debug importtl_smart importtl_release importtl_units importtl_examples importtl_shared importtl_install importtl_sourceinstall importtl_exampleinstall importtl_distinstall importtl_zipinstall importtl_zipsourceinstall importtl_zipexampleinstall importtl_zipdistinstall importtl_clean importtl_distclean importtl_cleanall importtl_info importtl_makefiles importtl
|
||||
endif
|
||||
ifdef TARGET_DIRS_UNICODE
|
||||
unicode_all:
|
||||
$(MAKE) -C unicode all
|
||||
unicode_debug:
|
||||
$(MAKE) -C unicode debug
|
||||
unicode_smart:
|
||||
$(MAKE) -C unicode smart
|
||||
unicode_release:
|
||||
$(MAKE) -C unicode release
|
||||
unicode_units:
|
||||
$(MAKE) -C unicode units
|
||||
unicode_examples:
|
||||
$(MAKE) -C unicode examples
|
||||
unicode_shared:
|
||||
$(MAKE) -C unicode shared
|
||||
unicode_install:
|
||||
$(MAKE) -C unicode install
|
||||
unicode_sourceinstall:
|
||||
$(MAKE) -C unicode sourceinstall
|
||||
unicode_exampleinstall:
|
||||
$(MAKE) -C unicode exampleinstall
|
||||
unicode_distinstall:
|
||||
$(MAKE) -C unicode distinstall
|
||||
unicode_zipinstall:
|
||||
$(MAKE) -C unicode zipinstall
|
||||
unicode_zipsourceinstall:
|
||||
$(MAKE) -C unicode zipsourceinstall
|
||||
unicode_zipexampleinstall:
|
||||
$(MAKE) -C unicode zipexampleinstall
|
||||
unicode_zipdistinstall:
|
||||
$(MAKE) -C unicode zipdistinstall
|
||||
unicode_clean:
|
||||
$(MAKE) -C unicode clean
|
||||
unicode_distclean:
|
||||
$(MAKE) -C unicode distclean
|
||||
unicode_cleanall:
|
||||
$(MAKE) -C unicode cleanall
|
||||
unicode_info:
|
||||
$(MAKE) -C unicode info
|
||||
unicode_makefiles:
|
||||
$(MAKE) -C unicode makefiles
|
||||
unicode:
|
||||
$(MAKE) -C unicode all
|
||||
.PHONY: unicode_all unicode_debug unicode_smart unicode_release unicode_units unicode_examples unicode_shared unicode_install unicode_sourceinstall unicode_exampleinstall unicode_distinstall unicode_zipinstall unicode_zipsourceinstall unicode_zipexampleinstall unicode_zipdistinstall unicode_clean unicode_distclean unicode_cleanall unicode_info unicode_makefiles unicode
|
||||
endif
|
||||
ifdef TARGET_DIRS_FPCRESLIPO
|
||||
fpcreslipo_all:
|
||||
$(MAKE) -C fpcreslipo all
|
||||
|
||||
@ -10,7 +10,7 @@ version=2.7.1
|
||||
dirs=fppkg fpcm tply h2pas fprcp dxegen fpdoc fpcmkcfg pas2ut pas2fpm
|
||||
programs=ppdep ptop rstconv data2inc delp bin2obj postw32 rmcvsdir
|
||||
programs_linux=grab_vcsa
|
||||
dirs_win32=fpmc fpcres rmwait instantfpc importtl
|
||||
dirs_win32=fpmc fpcres rmwait instantfpc importtl unicode
|
||||
dirs_win64=fpmc fpcres rmwait instantfpc importtl
|
||||
dirs_wince=fpcres rmwait instantfpc
|
||||
dirs_haiku=fpcres instantfpc
|
||||
|
||||
2154
utils/unicode/Makefile
Normal file
2154
utils/unicode/Makefile
Normal file
File diff suppressed because it is too large
Load Diff
23
utils/unicode/Makefile.fpc
Normal file
23
utils/unicode/Makefile.fpc
Normal file
@ -0,0 +1,23 @@
|
||||
#
|
||||
# Makefile.fpc for Unicode Utils
|
||||
#
|
||||
|
||||
[target]
|
||||
programs=cldrparser unihelper
|
||||
|
||||
[clean]
|
||||
units=cldrhelper cldrtest cldrxml grbtree helper trie uca_test unicodeset
|
||||
|
||||
[require]
|
||||
packages=rtl
|
||||
|
||||
[install]
|
||||
fpcpackage=y
|
||||
|
||||
[default]
|
||||
fpcdir=../..
|
||||
|
||||
[rules]
|
||||
.NOTPARALLEL:
|
||||
cldrparser$(EXEEXT): cldrparser.lpr cldrhelper.pas helper.pas cldrtest.pas cldrxml.pas unicodeset.pas
|
||||
unihelper$(EXEEXT): unihelper.lpr helper.pas uca_test.pas
|
||||
1634
utils/unicode/cldrhelper.pas
Normal file
1634
utils/unicode/cldrhelper.pas
Normal file
File diff suppressed because it is too large
Load Diff
105
utils/unicode/cldrparser.lpi
Normal file
105
utils/unicode/cldrparser.lpi
Normal file
@ -0,0 +1,105 @@
|
||||
<?xml version="1.0"?>
|
||||
<CONFIG>
|
||||
<ProjectOptions>
|
||||
<Version Value="9"/>
|
||||
<PathDelim Value="\"/>
|
||||
<General>
|
||||
<Flags>
|
||||
<MainUnitHasCreateFormStatements Value="False"/>
|
||||
<MainUnitHasTitleStatement Value="False"/>
|
||||
</Flags>
|
||||
<SessionStorage Value="InProjectDir"/>
|
||||
<MainUnit Value="0"/>
|
||||
<Title Value="cldrparser"/>
|
||||
<UseAppBundle Value="False"/>
|
||||
<ResourceType Value="res"/>
|
||||
</General>
|
||||
<i18n>
|
||||
<EnableI18N LFM="False"/>
|
||||
</i18n>
|
||||
<VersionInfo>
|
||||
<StringTable ProductVersion=""/>
|
||||
</VersionInfo>
|
||||
<BuildModes Count="1">
|
||||
<Item1 Name="Default" Default="True"/>
|
||||
</BuildModes>
|
||||
<PublishOptions>
|
||||
<Version Value="2"/>
|
||||
<IncludeFileFilter Value="*.(pas|pp|inc|lfm|lpr|lrs|lpi|lpk|sh|xml)"/>
|
||||
<ExcludeFileFilter Value="*.(bak|ppu|o|so);*~;backup"/>
|
||||
</PublishOptions>
|
||||
<RunParams>
|
||||
<local>
|
||||
<FormatVersion Value="1"/>
|
||||
<CommandLineParams Value="de.xml search"/>
|
||||
</local>
|
||||
</RunParams>
|
||||
<Units Count="7">
|
||||
<Unit0>
|
||||
<Filename Value="cldrparser.lpr"/>
|
||||
<IsPartOfProject Value="True"/>
|
||||
<UnitName Value="cldrparser"/>
|
||||
</Unit0>
|
||||
<Unit1>
|
||||
<Filename Value="cldrhelper.pas"/>
|
||||
<IsPartOfProject Value="True"/>
|
||||
<UnitName Value="cldrhelper"/>
|
||||
</Unit1>
|
||||
<Unit2>
|
||||
<Filename Value="..\helper.pas"/>
|
||||
<IsPartOfProject Value="True"/>
|
||||
<UnitName Value="helper"/>
|
||||
</Unit2>
|
||||
<Unit3>
|
||||
<Filename Value="cldrtest.pas"/>
|
||||
<IsPartOfProject Value="True"/>
|
||||
<UnitName Value="cldrtest"/>
|
||||
</Unit3>
|
||||
<Unit4>
|
||||
<Filename Value="cldrxml.pas"/>
|
||||
<IsPartOfProject Value="True"/>
|
||||
<UnitName Value="cldrxml"/>
|
||||
</Unit4>
|
||||
<Unit5>
|
||||
<Filename Value="..\tests\weight_derivation.inc"/>
|
||||
<IsPartOfProject Value="True"/>
|
||||
</Unit5>
|
||||
<Unit6>
|
||||
<Filename Value="unicodeset.pas"/>
|
||||
<IsPartOfProject Value="True"/>
|
||||
<UnitName Value="unicodeset"/>
|
||||
</Unit6>
|
||||
</Units>
|
||||
</ProjectOptions>
|
||||
<CompilerOptions>
|
||||
<Version Value="11"/>
|
||||
<PathDelim Value="\"/>
|
||||
<Target>
|
||||
<Filename Value="cldrparser"/>
|
||||
</Target>
|
||||
<SearchPaths>
|
||||
<IncludeFiles Value="$(ProjOutDir);..\src"/>
|
||||
<OtherUnitFiles Value="..\src;..\rbtree\src;..\trie"/>
|
||||
<UnitOutputDirectory Value="lib\$(TargetCPU)-$(TargetOS)"/>
|
||||
</SearchPaths>
|
||||
<Other>
|
||||
<CompilerMessages>
|
||||
<MsgFileName Value=""/>
|
||||
</CompilerMessages>
|
||||
<CompilerPath Value="$(CompPath)"/>
|
||||
</Other>
|
||||
</CompilerOptions>
|
||||
<Debugging>
|
||||
<Exceptions Count="3">
|
||||
<Item1>
|
||||
<Name Value="EAbort"/>
|
||||
</Item1>
|
||||
<Item2>
|
||||
<Name Value="ECodetoolError"/>
|
||||
</Item2>
|
||||
<Item3>
|
||||
<Name Value="EFOpenError"/>
|
||||
</Item3>
|
||||
</Exceptions>
|
||||
</Debugging>
|
||||
</CONFIG>
|
||||
205
utils/unicode/cldrparser.lpr
Normal file
205
utils/unicode/cldrparser.lpr
Normal file
@ -0,0 +1,205 @@
|
||||
{ Unicode CLDR's collation parser.
|
||||
Copyright (c) 2013 by Inoussa OUEDRAOGO
|
||||
|
||||
It creates units from CLDR's collation files.
|
||||
|
||||
The source code is distributed under the Library GNU
|
||||
General Public License with the following modification:
|
||||
|
||||
- object files and libraries linked into an application may be
|
||||
distributed without source code.
|
||||
|
||||
If you didn't receive a copy of the file COPYING, contact:
|
||||
Free Software Foundation
|
||||
675 Mass Ave
|
||||
Cambridge, MA 02139
|
||||
USA
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
}
|
||||
program cldrparser;
|
||||
|
||||
{$mode objfpc}{$H+}
|
||||
|
||||
{ $define test_suite} // Define this to execute the parser test suite.
|
||||
{$define actual_parsing}
|
||||
|
||||
uses
|
||||
SysUtils, classes, getopts,
|
||||
cldrhelper, helper, cldrtest, cldrxml, unicodeset;
|
||||
|
||||
const
|
||||
SUsageText =
|
||||
'This program creates pascal units from CLDR''s collation files for usage ' + sLineBreak +
|
||||
'with the FreePascal Native Unicode Manager.' + sLineBreak + sLineBreak +
|
||||
'Usage : cldrparser <collationFileName> [<typeName>] [-d<dataDir>] [-o<outputDir>]' + sLineBreak + sLineBreak +
|
||||
' where :' + sLineBreak +
|
||||
' ' + sLineBreak +
|
||||
' - collationFileName : specify the target file.' + sLineBreak +
|
||||
' - typeName : optional, specify the collation'' type-name to be parse;' + sLineBreak +
|
||||
' If this argument is not supplied, a default type-name' + sLineBreak +
|
||||
' is chosen from the type-name list in the following order : ' + sLineBreak +
|
||||
' * the "default" specified type-name indicated by the collation' + sLineBreak +
|
||||
' * the type named "standard" ' + sLineBreak +
|
||||
' * the type named "search" ' + sLineBreak +
|
||||
' * the first type.' + sLineBreak +
|
||||
' - dataDir : specify the directory that contains the collation files.' + sLineBreak +
|
||||
' The default value is the program''s directory.' + sLineBreak +
|
||||
' - outputDir : specify the directory where the generated files will be stored.' + sLineBreak +
|
||||
' The default value is the program''s directory.' + sLineBreak +
|
||||
' ' + sLineBreak +
|
||||
' The program expects some files to be present in the <dataDir> folder : ' + sLineBreak +
|
||||
' - UCA_Rules_SHORT.xml found in the CollationAuxiliary.zip available on unicode.org' + sLineBreak +
|
||||
' - allkeys.txt this is the file allkeys_CLDR.txt contained in CollationAuxiliary.zip renamed to allkeys.txt' + sLineBreak +
|
||||
' The CollationAuxiliary.zip archive is provided by unicode in the "unicode collation algorithm data files" section.';
|
||||
|
||||
|
||||
function ParseOptions(
|
||||
var ADataDir, AOuputDir, ACollationFileName, ACollationTypeName : string
|
||||
) : Boolean;
|
||||
var
|
||||
c : Char;
|
||||
idx, k : Integer;
|
||||
s : string;
|
||||
begin
|
||||
if (ParamCount() = 0) then
|
||||
exit(False);
|
||||
Result := True;
|
||||
repeat
|
||||
c := GetOpt('d:o:h');
|
||||
case c of
|
||||
'd' : ADataDir := ExpandFileName(Trim(OptArg));
|
||||
'o' : AOuputDir := ExpandFileName(Trim(OptArg));
|
||||
'h', '?' :
|
||||
begin
|
||||
WriteLn(SUsageText);
|
||||
Result := False;
|
||||
end;
|
||||
end;
|
||||
until (c = EndOfOptions);
|
||||
idx := 0;
|
||||
for k := 1 to ParamCount() do begin
|
||||
s := Trim(ParamStr(k));
|
||||
if (s <> '') and (s[1] <> '-') then begin
|
||||
if (idx = 0) then
|
||||
ACollationFileName := s
|
||||
else if (idx = 1) then
|
||||
ACollationTypeName := s;
|
||||
Inc(idx);
|
||||
if (idx >= 2) then
|
||||
Break;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
var
|
||||
orderedChars : TOrderedCharacters;
|
||||
ucaBook : TUCA_DataBook;
|
||||
stream, endianStream : TMemoryStream;
|
||||
s, collationFileName, collationTypeName : string;
|
||||
i , c: Integer;
|
||||
collation : TCldrCollation;
|
||||
dataPath, outputPath : string;
|
||||
begin
|
||||
{$ifdef test_suite}
|
||||
exec_tests();
|
||||
{$endif test_suite}
|
||||
|
||||
{$ifdef actual_parsing}
|
||||
dataPath := '';
|
||||
outputPath := '';
|
||||
collationFileName := '';
|
||||
collationTypeName := '';
|
||||
if not ParseOptions(dataPath,outputPath,collationFileName,collationTypeName) then
|
||||
Halt(1);
|
||||
if (dataPath <> '') and not(DirectoryExists(dataPath)) then begin
|
||||
WriteLn('This directory does not exist : ',dataPath);
|
||||
Halt(1);
|
||||
end;
|
||||
if (dataPath = '') then
|
||||
dataPath := ExtractFilePath(ParamStr(0))
|
||||
else
|
||||
dataPath := IncludeTrailingPathDelimiter(dataPath);
|
||||
if (outputPath = '') then
|
||||
outputPath := dataPath
|
||||
else
|
||||
outputPath := IncludeTrailingPathDelimiter(outputPath);
|
||||
if (ParamCount() = 0) then begin
|
||||
WriteLn(SUsageText);
|
||||
Halt(1);
|
||||
end;
|
||||
if not(
|
||||
FileExists(dataPath+'UCA_Rules_SHORT.xml') and
|
||||
FileExists(dataPath+'allkeys.txt')
|
||||
)
|
||||
then begin
|
||||
WriteLn(Format('File not found : %s or %s.',[dataPath+'UCA_Rules_SHORT.xml',dataPath+'allkeys.txt']));
|
||||
Halt(1);
|
||||
end;
|
||||
|
||||
collationFileName := dataPath + collationFileName;
|
||||
if not FileExists(collationFileName) then begin
|
||||
WriteLn('File not found : "',collationFileName,'"');
|
||||
Halt(1);
|
||||
end;
|
||||
|
||||
WriteLn(sLineBreak,'Collation Parsing ',QuotedStr(collationFileName),' ...');
|
||||
stream := nil;
|
||||
endianStream := nil;
|
||||
collation := TCldrCollation.Create();
|
||||
try
|
||||
ParseCollationDocument(collationFileName,collation);
|
||||
WriteLn(Format(' Collation Count = %d',[collation.ItemCount]));
|
||||
if (collation.ItemCount = 0) then begin
|
||||
WriteLn('No collation in this file.');
|
||||
end else begin
|
||||
for i := 0 to collation.ItemCount - 1 do
|
||||
WriteLn(Format(' Item[%d] = %d "resets"; Type = %s',[i, Length(collation.Items[i].Rules),collation.Items[i].TypeName]));
|
||||
if (collation.Find(collationTypeName) = nil) then
|
||||
collationTypeName := FindCollationDefaultItemName(collation);
|
||||
WriteLn('Collation Item Name : ',collationTypeName);
|
||||
|
||||
|
||||
s := dataPath + 'UCA_Rules_SHORT.xml';
|
||||
WriteLn;
|
||||
WriteLn('Parsing ',QuotedStr(s),' ...');
|
||||
FillByte(orderedChars,SizeOf(orderedChars),0);
|
||||
orderedChars.Clear();
|
||||
ParseInitialDocument(@orderedChars,s);
|
||||
WriteLn('File parsed, ',orderedChars.ActualLength,' characters.');
|
||||
|
||||
WriteLn('Loading CLDR root''s key table ...');
|
||||
stream := TMemoryStream.Create();
|
||||
s := dataPath + 'allkeys.txt';
|
||||
stream.LoadFromFile(s);
|
||||
ParseUCAFile(stream,ucaBook);
|
||||
c := FillInitialPositions(@orderedChars.Data[0],orderedChars.ActualLength,ucaBook.Lines);
|
||||
if (c > 0) then
|
||||
WriteLn(' Missed Initial Positions = ',c);
|
||||
WriteLn(' Loaded.');
|
||||
|
||||
WriteLn('Start generation ...');
|
||||
stream.Clear();
|
||||
endianStream := TMemoryStream.Create();
|
||||
s := COLLATION_FILE_PREFIX + ChangeFileExt(LowerCase(ExtractFileName(collationFileName)),'.pas');
|
||||
GenerateCdlrCollation(
|
||||
collation,collationTypeName,s,stream,endianStream,
|
||||
orderedChars,ucaBook.Lines
|
||||
);
|
||||
stream.SaveToFile(ExtractFilePath(collationFileName)+s);
|
||||
if (endianStream.Size > 0) then
|
||||
endianStream.SaveToFile(ExtractFilePath(collationFileName)+GenerateEndianIncludeFileName(s));
|
||||
end;
|
||||
finally
|
||||
endianStream.Free();
|
||||
stream.Free();
|
||||
collation.Free();
|
||||
end;
|
||||
|
||||
{$endif actual_parsing}
|
||||
|
||||
WriteLn(sLineBreak,'Finished.');
|
||||
end.
|
||||
|
||||
1920
utils/unicode/cldrtest.pas
Normal file
1920
utils/unicode/cldrtest.pas
Normal file
File diff suppressed because it is too large
Load Diff
634
utils/unicode/cldrxml.pas
Normal file
634
utils/unicode/cldrxml.pas
Normal file
@ -0,0 +1,634 @@
|
||||
{ Parser of the CLDR collation xml files.
|
||||
|
||||
Copyright (c) 2013 by Inoussa OUEDRAOGO
|
||||
|
||||
The source code is distributed under the Library GNU
|
||||
General Public License with the following modification:
|
||||
|
||||
- object files and libraries linked into an application may be
|
||||
distributed without source code.
|
||||
|
||||
If you didn't receive a copy of the file COPYING, contact:
|
||||
Free Software Foundation
|
||||
675 Mass Ave
|
||||
Cambridge, MA 02139
|
||||
USA
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
}
|
||||
|
||||
unit cldrxml;
|
||||
|
||||
{$mode objfpc}{$H+}
|
||||
{$TypedAddress on}
|
||||
interface
|
||||
|
||||
uses
|
||||
Classes, SysUtils, DOM,
|
||||
cldrhelper;
|
||||
|
||||
procedure ParseInitialDocument(ASequence : POrderedCharacters; ADoc : TDOMDocument);overload;
|
||||
procedure ParseInitialDocument(ASequence : POrderedCharacters; AFileName : string);overload;
|
||||
|
||||
procedure ParseCollationDocument(ADoc : TDOMDocument; ACollation : TCldrCollation);
|
||||
procedure ParseCollationDocument(const AFileName : string; ACollation : TCldrCollation);
|
||||
|
||||
resourcestring
|
||||
sCaseNothandled = 'This case is not handled : "%s", Position = %d.';
|
||||
sCodePointExpected = 'Code Point node expected as child at this position "%d".';
|
||||
sCollationsNodeNotFound = '"collations" node not found.';
|
||||
sHexAttributeExpected = '"hex" attribute expected at this position "%d".';
|
||||
sInvalidResetClause = 'Invalid "Reset" clause.';
|
||||
sNodeNameAssertMessage = 'Expected NodeName "%s", got "%s".';
|
||||
sRulesNodeNotFound = '"rules" node not found.';
|
||||
sTextNodeChildExpected = '(Child) text node expected at this position "%d", but got "%s".';
|
||||
sUniqueChildNodeExpected = 'Unique child node expected at this position "%d".';
|
||||
sUnknownResetLogicalPosition = 'Unknown reset logical position : "%s".';
|
||||
|
||||
implementation
|
||||
uses
|
||||
typinfo, XMLRead, XPath, Helper, unicodeset;
|
||||
|
||||
const
|
||||
s_AT = 'at';
|
||||
s_BEFORE = 'before';
|
||||
s_CODEPOINT = 'codepoint';
|
||||
s_COLLATION = 'collation';
|
||||
s_COLLATIONS = 'collations';
|
||||
s_CONTEXT = 'context';
|
||||
s_DEFAULT = 'default';
|
||||
s_EXTEND = 'extend';
|
||||
s_HEX = 'hex';
|
||||
s_POSITION = 'position';
|
||||
s_RESET = 'reset';
|
||||
s_RULES = 'rules';
|
||||
s_STANDART = 'standard';
|
||||
s_TYPE = 'type';
|
||||
|
||||
procedure CheckNodeName(ANode : TDOMNode; const AExpectedName : DOMString);
|
||||
begin
|
||||
if (ANode.NodeName <> AExpectedName) then
|
||||
raise Exception.CreateFmt(sNodeNameAssertMessage,[AExpectedName,ANode.NodeName]);
|
||||
end;
|
||||
|
||||
function CharToReorderWeigthKind(const AChar : Char) : TReorderWeigthKind;inline;
|
||||
begin
|
||||
case AChar of
|
||||
'p' : Result := TReorderWeigthKind.PriMary;
|
||||
's' : Result := TReorderWeigthKind.Secondary;
|
||||
't' : Result := TReorderWeigthKind.Tertiary;
|
||||
'i' : Result := TReorderWeigthKind.Identity;
|
||||
else
|
||||
Result := TReorderWeigthKind.Identity;
|
||||
end;
|
||||
end;
|
||||
|
||||
function DomString2UnicodeCodePointArray(const AValue : DOMString): TUnicodeCodePointArray;
|
||||
var
|
||||
u4str : UCS4String;
|
||||
k : Integer;
|
||||
begin
|
||||
if (Length(AValue) = 0) then
|
||||
exit(nil);
|
||||
if (Length(AValue) = 1) then begin
|
||||
SetLength(Result,1);
|
||||
Result[0] := Ord(AValue[1])
|
||||
end else begin
|
||||
u4str := WideStringToUCS4String(AValue);
|
||||
k := Length(u4str) - 1; // remove the last #0
|
||||
SetLength(Result,k);
|
||||
for k := 0 to k - 1 do
|
||||
Result[k] := u4str[k];
|
||||
end;
|
||||
end;
|
||||
|
||||
function TryStrToLogicalReorder(
|
||||
const AValue : string;
|
||||
out AResult : TReorderLogicalReset
|
||||
) : Boolean;
|
||||
var
|
||||
s : string;
|
||||
i : Integer;
|
||||
begin
|
||||
s := StringReplace(AValue,' ','',[rfReplaceAll]);
|
||||
s := StringReplace(s,'_','',[rfReplaceAll]);
|
||||
i := GetEnumValue(TypeInfo(TReorderLogicalReset),s);
|
||||
Result := (i > -1);
|
||||
if Result then
|
||||
AResult := TReorderLogicalReset(i);
|
||||
end;
|
||||
|
||||
function ParseStatement(
|
||||
ARules : TDOMElement;
|
||||
AStartPosition : Integer;
|
||||
AStatement : PReorderSequence;
|
||||
var ANextPos : Integer
|
||||
) : Boolean;
|
||||
var
|
||||
startPosition : Integer;
|
||||
statement : PReorderSequence;
|
||||
elementActualCount : Integer;
|
||||
list : TDOMNodeList;
|
||||
inBlock : Boolean;
|
||||
|
||||
procedure SkipComments();
|
||||
begin
|
||||
while (startPosition < list.Count) do begin
|
||||
if (list[startPosition].NodeType <> COMMENT_NODE) then
|
||||
Break;
|
||||
Inc(startPosition);
|
||||
end;
|
||||
end;
|
||||
|
||||
function parse_reset() : Integer;
|
||||
var
|
||||
n, t : TDOMNode;
|
||||
s : string;
|
||||
logicalPos : TReorderLogicalReset;
|
||||
begin
|
||||
SkipComments();
|
||||
n := list[startPosition];
|
||||
CheckNodeName(n,s_RESET);
|
||||
if n.HasChildNodes() then begin
|
||||
n := n.FirstChild;
|
||||
if (n.NodeType = TEXT_NODE) then begin
|
||||
statement^.Reset := DomString2UnicodeCodePointArray(Trim(TDOMText(n).Data));
|
||||
Result := startPosition+1;
|
||||
end else begin
|
||||
if not TryStrToLogicalReorder(n.NodeName,logicalPos) then
|
||||
raise Exception.CreateFmt(sUnknownResetLogicalPosition,[n.NodeName]);
|
||||
statement^.LogicalPosition := logicalPos;
|
||||
Result := startPosition+1;
|
||||
end;
|
||||
end else if not n.HasChildNodes() then begin
|
||||
if (list[startPosition+1].NodeName = s_POSITION) then begin
|
||||
s := list[startPosition+1].Attributes.GetNamedItem(s_AT).NodeValue;
|
||||
if not TryStrToLogicalReorder(s,logicalPos) then
|
||||
raise Exception.CreateFmt(sUnknownResetLogicalPosition,[s]);
|
||||
statement^.LogicalPosition := logicalPos;
|
||||
Result := startPosition+2;
|
||||
end else begin
|
||||
t := list[startPosition+1];
|
||||
{if (t.NodeType <> TEXT_NODE) then
|
||||
raise Exception.CreateFmt(sTextNodeChildExpected,[(startPosition+1),(t.NodeName+'('+t.ClassName+')')]);}
|
||||
if (t.NodeType = TEXT_NODE) then
|
||||
statement^.Reset := DomString2UnicodeCodePointArray(Trim(TDOMText(t).Data))
|
||||
else
|
||||
statement^.Reset := DomString2UnicodeCodePointArray(' ');
|
||||
Result := startPosition+2;
|
||||
end;
|
||||
end;
|
||||
if (statement^.LogicalPosition = TReorderLogicalReset.None) and
|
||||
(Length(statement^.Reset) = 0)
|
||||
then
|
||||
raise Exception.Create(sInvalidResetClause);
|
||||
end;
|
||||
|
||||
procedure EnsureElementLength(const ALength : Integer);
|
||||
var
|
||||
k, d : Integer;
|
||||
begin
|
||||
k := Length(statement^.Elements);
|
||||
if (k < ALength) then begin
|
||||
k := ALength;
|
||||
if (k = 0) then begin
|
||||
k := 50;
|
||||
end else begin
|
||||
if (k < 10) then
|
||||
d := 10
|
||||
else
|
||||
d := 2;
|
||||
k := k * d;
|
||||
end;
|
||||
SetLength(statement^.Elements,k);
|
||||
end;
|
||||
end;
|
||||
|
||||
{procedure AddElement(AText : DOMString; AWeigthKind : TReorderWeigthKind);overload;
|
||||
var
|
||||
u4str : UCS4String;
|
||||
k : Integer;
|
||||
kp : PReorderUnit;
|
||||
begin
|
||||
u4str := WideStringToUCS4String(AText);
|
||||
EnsureElementLength(elementActualCount+1);
|
||||
kp := @statement^.Elements[elementActualCount];
|
||||
k := Length(u4str) - 1{null terminated};
|
||||
SetLength(kp^.Characters,k);
|
||||
for k := 0 to k - 1 do
|
||||
kp^.Characters[k] := u4str[k];
|
||||
kp^.WeigthKind:= AWeigthKind;
|
||||
elementActualCount := elementActualCount + 1;
|
||||
end;}
|
||||
|
||||
procedure AddElement(
|
||||
const AChars : array of UCS4Char;
|
||||
const AWeigthKind : TReorderWeigthKind;
|
||||
const AContext : DOMString
|
||||
);overload;
|
||||
var
|
||||
kp : PReorderUnit;
|
||||
k : Integer;
|
||||
begin
|
||||
EnsureElementLength(elementActualCount+1);
|
||||
kp := @statement^.Elements[elementActualCount];
|
||||
SetLength(kp^.Characters,Length(AChars));
|
||||
for k := 0 to Length(AChars) - 1 do
|
||||
kp^.Characters[k] := AChars[k];
|
||||
kp^.WeigthKind := AWeigthKind;
|
||||
elementActualCount := elementActualCount + 1;
|
||||
if (AContext <> '') then
|
||||
kp^.Context := DomString2UnicodeCodePointArray(AContext);
|
||||
end;
|
||||
|
||||
procedure ReadChars(
|
||||
ANode : TDOMNode;
|
||||
APos : Integer;
|
||||
var AChars : UCS4String
|
||||
);
|
||||
var
|
||||
t : TDOMNode;
|
||||
u4str : UCS4String;
|
||||
s : DOMString;
|
||||
begin
|
||||
if not ANode.HasChildNodes() then begin
|
||||
SetLength(AChars,1);
|
||||
AChars[0] := Ord(UnicodeChar(' '));
|
||||
exit;
|
||||
//raise Exception.CreateFmt(sCodePointExpected + ANode.ClassName,[APos]);
|
||||
end;
|
||||
t := ANode.FindNode(s_CODEPOINT);
|
||||
if (t = nil) then begin
|
||||
if (ANode.ChildNodes.Count <> 1) then
|
||||
raise Exception.CreateFmt(sUniqueChildNodeExpected,[APos]);
|
||||
t := ANode.ChildNodes[0];
|
||||
if not t.InheritsFrom(TDOMText) then
|
||||
raise Exception.CreateFmt(sTextNodeChildExpected,[APos,(t.NodeName+'('+t.ClassName+')')]);
|
||||
s := TDOMText(t).Data;
|
||||
if (Length(s) = 1) then begin
|
||||
SetLength(AChars,1);
|
||||
AChars[0] := Ord(s[1]);
|
||||
end else begin
|
||||
u4str := WideStringToUCS4String(s);
|
||||
AChars := u4str;
|
||||
SetLength(AChars,Length(AChars)-1);
|
||||
end;
|
||||
end else begin
|
||||
t := t.Attributes.GetNamedItem(s_HEX);
|
||||
if (t = nil) then
|
||||
raise Exception.CreateFmt(sHexAttributeExpected,[APos]);
|
||||
SetLength(AChars,1);
|
||||
AChars[0] := StrToInt('$'+t.NodeValue);
|
||||
end
|
||||
end;
|
||||
|
||||
procedure AddPrefixChars(const APrefix : array of UCS4Char; var ADest : TUnicodeCodePointArray);
|
||||
var
|
||||
k : Integer;
|
||||
begin
|
||||
k := Length(ADest);
|
||||
SetLength(ADest,(k+Length(APrefix)));
|
||||
Move(ADest[0],ADest[k+1],(SizeOf(k*ADest[0])));
|
||||
for k := 0 to k - 1 do
|
||||
ADest[k] := APrefix[k];
|
||||
end;
|
||||
|
||||
function ReadNextItem(const APos : Integer) : Integer;
|
||||
var
|
||||
n, t : TDOMNode;
|
||||
s, contextStr : DOMString;
|
||||
w : TReorderWeigthKind;
|
||||
isSimpleCharTag : Boolean;
|
||||
simpleCharTag : AnsiChar;
|
||||
last : PReorderUnit;
|
||||
u4str : UCS4String;
|
||||
k : Integer;
|
||||
begin
|
||||
contextStr := '';
|
||||
Result := APos;
|
||||
n := list[APos];
|
||||
isSimpleCharTag := (Length(n.NodeName) = 1) and (Ord(n.NodeName[1])<=127);
|
||||
if isSimpleCharTag then begin
|
||||
simpleCharTag := AnsiChar(n.NodeName[1]);
|
||||
if (simpleCharTag = 'x') then begin
|
||||
inBlock := True;
|
||||
n := n.FirstChild;
|
||||
if (n.NodeName = s_CONTEXT) then begin
|
||||
if n.HasChildNodes() then begin
|
||||
t := n.FirstChild;
|
||||
if (t.NodeType = TEXT_NODE) then
|
||||
contextStr := TDOMText(t).Data;
|
||||
end;
|
||||
n := n.NextSibling;
|
||||
end;
|
||||
isSimpleCharTag := (Length(n.NodeName) = 1) and (Ord(n.NodeName[1])<=127);
|
||||
if isSimpleCharTag then
|
||||
simpleCharTag := AnsiChar(n.NodeName[1]);
|
||||
end;
|
||||
end;
|
||||
if isSimpleCharTag and (simpleCharTag in ['p','s','t','i']) then begin
|
||||
w := CharToReorderWeigthKind(AnsiChar(n.NodeName[1]));
|
||||
ReadChars(n,APos,u4str);
|
||||
AddElement(u4str,w,contextStr);
|
||||
Result := Result + 1;
|
||||
if not inBlock then
|
||||
exit;
|
||||
last := @statement^.Elements[elementActualCount-1];
|
||||
n := n.NextSibling;
|
||||
if (n <> nil) and (n.NodeName = s_EXTEND) then begin
|
||||
ReadChars(n,APos,u4str);
|
||||
SetLength(last^.ExpansionChars,Length(u4str));
|
||||
for k := 0 to Length(u4str) - 1 do
|
||||
last^.ExpansionChars[k] := u4str[k];
|
||||
end;
|
||||
exit;
|
||||
end;
|
||||
if (Length(n.NodeName) = 2) and (n.NodeName[2] = 'c') and
|
||||
(Ord(n.NodeName[1])<=127) and (AnsiChar(n.NodeName[1]) in ['p','s','t','i'])
|
||||
then begin
|
||||
w := CharToReorderWeigthKind(AnsiChar(n.NodeName[1]));
|
||||
ReadChars(n,APos,u4str);
|
||||
for k := Low(u4str) to High(u4str) do
|
||||
AddElement(u4str[k],w,contextStr);
|
||||
Result := Result + 1;
|
||||
exit;
|
||||
end;
|
||||
raise Exception.CreateFmt(sCaseNothandled,[n.NodeName,APos]);
|
||||
end;
|
||||
|
||||
var
|
||||
i, c : Integer;
|
||||
n : TDOMNode;
|
||||
begin
|
||||
Result := False;
|
||||
inBlock := False;
|
||||
elementActualCount := 0;
|
||||
if (AStartPosition <= 0) then
|
||||
startPosition := 0
|
||||
else
|
||||
startPosition := AStartPosition;
|
||||
i := startPosition;
|
||||
list := ARules.ChildNodes;
|
||||
c := list.Count;
|
||||
if (c <= i) then
|
||||
exit;
|
||||
statement := AStatement;
|
||||
statement^.Clear();
|
||||
n := list[i];
|
||||
i := parse_reset();
|
||||
while (i < c) do begin
|
||||
n := list[i];
|
||||
if (n.NodeName = s_RESET) then
|
||||
Break;
|
||||
i := ReadNextItem(i);
|
||||
end;
|
||||
SetLength(statement^.Elements,elementActualCount);
|
||||
Result := (i > startPosition);
|
||||
if Result then
|
||||
ANextPos := i;
|
||||
end;
|
||||
|
||||
procedure ParseInitialDocument(ASequence : POrderedCharacters; ADoc : TDOMDocument);
|
||||
var
|
||||
n : TDOMNode;
|
||||
rulesElement : TDOMElement;
|
||||
i, c, nextPost : Integer;
|
||||
statement : TReorderSequence;
|
||||
p : PReorderUnit;
|
||||
begin
|
||||
n := ADoc.DocumentElement.FindNode(s_RULES);
|
||||
if (n = nil) then
|
||||
raise Exception.Create(sRulesNodeNotFound);
|
||||
rulesElement := n as TDOMElement;
|
||||
c := rulesElement.ChildNodes.Count;
|
||||
ASequence^.Clear();
|
||||
SetLength(ASequence^.Data,c+100);
|
||||
nextPost := 0;
|
||||
i := 0;
|
||||
while (i < c) do begin
|
||||
statement.Clear();
|
||||
if not ParseStatement(rulesElement,i,@statement,nextPost) then
|
||||
Break;
|
||||
i := nextPost;
|
||||
try
|
||||
ASequence^.ApplyStatement(@statement);
|
||||
except
|
||||
on e : Exception do begin
|
||||
e.Message := Format('%s Position = %d',[e.Message,i]);
|
||||
raise;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
if (ASequence^.ActualLength > 0) then begin
|
||||
p := @ASequence^.Data[0];
|
||||
for i := 0 to ASequence^.ActualLength - 1 do begin
|
||||
p^.Changed := False;
|
||||
Inc(p);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
procedure ParseInitialDocument(ASequence : POrderedCharacters; AFileName : string);
|
||||
var
|
||||
doc : TXMLDocument;
|
||||
begin
|
||||
ReadXMLFile(doc,AFileName);
|
||||
try
|
||||
ParseInitialDocument(ASequence,doc);
|
||||
finally
|
||||
doc.Free();
|
||||
end;
|
||||
end;
|
||||
|
||||
function EvaluateXPathStr(const AExpression : string; AContextNode : TDOMNode): DOMString;
|
||||
var
|
||||
xv : TXPathVariable;
|
||||
begin
|
||||
xv := EvaluateXPathExpression(AExpression,AContextNode);
|
||||
try
|
||||
if (xv <> nil) then
|
||||
Result := xv.AsText
|
||||
else
|
||||
Result := '';
|
||||
finally
|
||||
xv.Free();
|
||||
end;
|
||||
end;
|
||||
|
||||
function ParseDeletion(
|
||||
const APattern : DOMString;
|
||||
ASequence : PReorderSequence
|
||||
) : Integer;
|
||||
var
|
||||
r : array of TReorderUnit;
|
||||
c, i : Integer;
|
||||
uset : TUnicodeSet;
|
||||
it : TUnicodeSet.TIterator;
|
||||
p : PReorderUnit;
|
||||
begin
|
||||
if (APattern = '') then
|
||||
exit(0);
|
||||
it := nil;
|
||||
uset := TUnicodeSet.Create();
|
||||
try
|
||||
uset.AddPattern(APattern);
|
||||
it := uset.CreateIterator();
|
||||
c := 0;
|
||||
it.Reset();
|
||||
while it.MoveNext() do begin
|
||||
Inc(c);
|
||||
end;
|
||||
SetLength(r,c);
|
||||
p := @r[0];
|
||||
i := 0;
|
||||
it.Reset();
|
||||
while it.MoveNext() do begin
|
||||
p^.Clear();
|
||||
p^.WeigthKind := TReorderWeigthKind.Deletion;
|
||||
p^.Characters := Copy(it.GetCurrent());
|
||||
Inc(p);
|
||||
Inc(i);
|
||||
end;
|
||||
ASequence^.Clear();
|
||||
ASequence^.Elements := r;
|
||||
finally
|
||||
it.Free();
|
||||
uset.Free();
|
||||
end;
|
||||
SetLength(r,0);
|
||||
end;
|
||||
|
||||
procedure ParseCollationItem(ACollationNode : TDOMElement; AItem : TCldrCollationItem);
|
||||
var
|
||||
n : TDOMNode;
|
||||
rulesElement : TDOMElement;
|
||||
i, c, nextPos : Integer;
|
||||
statementList : TReorderSequenceArray;
|
||||
sal : Integer;//statement actual length
|
||||
statement : PReorderSequence;
|
||||
s : DOMString;
|
||||
begin
|
||||
AItem.TypeName := ACollationNode.GetAttribute(s_TYPE);
|
||||
AItem.Base := EvaluateXPathStr('base',ACollationNode);
|
||||
AItem.Backwards := (EvaluateXPathStr('settings/@backwards',ACollationNode) = 'on');
|
||||
if AItem.Backwards then
|
||||
AItem.ChangedFields := AItem.ChangedFields + [TCollationField.BackWard];
|
||||
|
||||
SetLength(statementList,15);
|
||||
sal := 0;
|
||||
statement := @statementList[0];
|
||||
s := EvaluateXPathStr('suppress_contractions',ACollationNode);
|
||||
if (s <> '') then begin
|
||||
if (ParseDeletion(s,statement) > 0) then begin
|
||||
Inc(sal);
|
||||
Inc(statement);
|
||||
end else begin
|
||||
statement^.Clear();
|
||||
end;
|
||||
end;
|
||||
n := ACollationNode.FindNode(s_RULES);
|
||||
if (n <> nil) then begin
|
||||
rulesElement := n as TDOMElement;
|
||||
c := rulesElement.ChildNodes.Count;
|
||||
nextPos := 0;
|
||||
i := 0;
|
||||
while (i < c) do begin
|
||||
statement^.Clear();
|
||||
if not ParseStatement(rulesElement,i,statement,nextPos) then
|
||||
Break;
|
||||
i := nextPos;
|
||||
Inc(statement);
|
||||
Inc(sal);
|
||||
if (sal >= Length(statementList)) then begin
|
||||
SetLength(statementList,(sal*2));
|
||||
statement := @statementList[(sal-1)];
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
SetLength(statementList,sal);
|
||||
AItem.Rules := statementList;
|
||||
end;
|
||||
|
||||
procedure ParseCollationDocument(ADoc : TDOMDocument; ACollation : TCldrCollation);
|
||||
var
|
||||
rulesNodes, n : TDOMNode;
|
||||
collationsElement, rulesElement : TDOMElement;
|
||||
i, c : Integer;
|
||||
item : TCldrCollationItem;
|
||||
nl : TDOMNodeList;
|
||||
begin
|
||||
n := ADoc.DocumentElement.FindNode(s_COLLATIONS);
|
||||
if (n = nil) then
|
||||
raise Exception.Create(sCollationsNodeNotFound);
|
||||
collationsElement := n as TDOMElement;
|
||||
ACollation.Clear();
|
||||
ACollation.Language := EvaluateXPathStr('identity/language/@type',ADoc.DocumentElement);
|
||||
ACollation.Version := EvaluateXPathStr('identity/version/@number',ADoc.DocumentElement);
|
||||
ACollation.DefaultType := EvaluateXPathStr('collations/default/@type',ADoc.DocumentElement);
|
||||
if collationsElement.HasChildNodes() then begin
|
||||
nl := collationsElement.ChildNodes;
|
||||
c := nl.Count;
|
||||
item := nil;
|
||||
try
|
||||
for i := 0 to c - 1 do begin
|
||||
n := nl[i];
|
||||
if (n.NodeName = s_COLLATION) then begin
|
||||
item := TCldrCollationItem.Create();
|
||||
ParseCollationItem((n as TDOMElement),item);
|
||||
ACollation.Add(item);
|
||||
item := nil;
|
||||
end
|
||||
end;
|
||||
except
|
||||
FreeAndNil(item);
|
||||
raise;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
function ReadXMLFile(f: TStream) : TXMLDocument;
|
||||
var
|
||||
src : TXMLInputSource;
|
||||
parser: TDOMParser;
|
||||
begin
|
||||
src := TXMLInputSource.Create(f);
|
||||
Result := TXMLDocument.Create;
|
||||
parser := TDOMParser.Create();
|
||||
try
|
||||
parser.Options.IgnoreComments := True;
|
||||
parser.Parse(src, Result);
|
||||
finally
|
||||
src.Free();
|
||||
parser.Free;
|
||||
end;
|
||||
end;
|
||||
|
||||
function ReadXMLFile(const AFilename: String) : TXMLDocument;
|
||||
var
|
||||
FileStream: TStream;
|
||||
begin
|
||||
Result := nil;
|
||||
FileStream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
|
||||
try
|
||||
Result := ReadXMLFile(FileStream);
|
||||
finally
|
||||
FileStream.Free;
|
||||
end;
|
||||
end;
|
||||
|
||||
procedure ParseCollationDocument(const AFileName : string; ACollation : TCldrCollation);
|
||||
var
|
||||
doc : TXMLDocument;
|
||||
begin
|
||||
doc := ReadXMLFile(AFileName);
|
||||
try
|
||||
ParseCollationDocument(doc,ACollation);
|
||||
ACollation.LocalID := ExtractFileName(ChangeFileExt(AFileName,''));
|
||||
finally
|
||||
doc.Free();
|
||||
end;
|
||||
end;
|
||||
|
||||
end.
|
||||
13
utils/unicode/data/readme.txt
Normal file
13
utils/unicode/data/readme.txt
Normal file
@ -0,0 +1,13 @@
|
||||
This folder requires the next files to be present:
|
||||
|
||||
Extracted from http://www.unicode.org/Public/6.2.0/ucd/UCD.zip:
|
||||
* UnicodeData.txt
|
||||
* HangulSyllableType.txt
|
||||
* PropList.txt
|
||||
|
||||
Extracted from http://www.unicode.org/Public/UCA/6.2.0/CollationAuxiliary.zip:
|
||||
* allkeys.txt : this file is actually the allkeys_CLDR.txt file renamed. It is the CLDR's root collation.
|
||||
* UCA_Rules_SHORT.xml
|
||||
|
||||
Extracted from http://www.unicode.org/Public/cldr/22/core.zip (see the "common\collation" folder):
|
||||
* all the language specific xml files (de.xml, es.xml, ...)
|
||||
667
utils/unicode/grbtree.pas
Normal file
667
utils/unicode/grbtree.pas
Normal file
@ -0,0 +1,667 @@
|
||||
{ Red Black Tree implementation.
|
||||
|
||||
Copyright (c) 2013 by Inoussa OUEDRAOGO
|
||||
|
||||
Inspired by ideas of Julienne Walker
|
||||
see http://www.eternallyconfuzzled.com/tuts/datastructures/jsw_tut_bst1.aspx
|
||||
|
||||
The source code is distributed under the Library GNU
|
||||
General Public License with the following modification:
|
||||
|
||||
- object files and libraries linked into an application may be
|
||||
distributed without source code.
|
||||
|
||||
If you didn't receive a copy of the file COPYING, contact:
|
||||
Free Software Foundation
|
||||
675 Mass Ave
|
||||
Cambridge, MA 02139
|
||||
USA
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. }
|
||||
|
||||
unit grbtree;
|
||||
|
||||
{$ifdef FPC}
|
||||
{$mode delphi}
|
||||
{$H+}
|
||||
{$endif FPC}
|
||||
{$TYPEDADDRESS ON}
|
||||
{$define RB_DEBUG}
|
||||
|
||||
interface
|
||||
|
||||
const
|
||||
HEIGHT_LIMIT = 64;
|
||||
|
||||
type
|
||||
|
||||
|
||||
{KCOMP = class
|
||||
public
|
||||
// Return
|
||||
// * if A>B then 1
|
||||
// * if A=B then 0
|
||||
// * if A<B then -1
|
||||
class function Compare(const A, B : TRBTreeNodeData) : Integer;
|
||||
end; }
|
||||
|
||||
TRBTree<T, KCOMP> = class
|
||||
public type
|
||||
TRBTreeNodeData = T;
|
||||
PRBTreeNode = ^TRBTreeNode;
|
||||
PRBTreeAllocator = ^TRBTreeAllocator;
|
||||
TRBTreeNode = record
|
||||
Links : array[Boolean] of PRBTreeNode;
|
||||
Data : TRBTreeNodeData;
|
||||
Red : Boolean;
|
||||
end;
|
||||
TRBTreeNodeCreator = function(AContext : Pointer) : PRBTreeNode;
|
||||
TRBTreeNodeDestructor = procedure(ANode : PRBTreeNode; AContext : Pointer);
|
||||
TRBTreeAllocator = record
|
||||
CreateNode : TRBTreeNodeCreator;
|
||||
FreeNode : TRBTreeNodeDestructor;
|
||||
end;
|
||||
|
||||
TRBTreeNodeComparator = KCOMP;
|
||||
ThisType = TRBTree<T,KCOMP>;
|
||||
|
||||
private type
|
||||
TBaseIterator = record
|
||||
Tree : ThisType;
|
||||
StartingNode : PRBTreeNode;
|
||||
StartingDir : Boolean;
|
||||
Current : PRBTreeNode;
|
||||
Top : NativeInt;
|
||||
Path : array[0..(HEIGHT_LIMIT-1)] of PRBTreeNode;
|
||||
end;
|
||||
PBaseIterator = ^TBaseIterator;
|
||||
public type
|
||||
|
||||
TIterator = class
|
||||
private
|
||||
FHandle : PBaseIterator;
|
||||
FResetState : Boolean;
|
||||
private
|
||||
public
|
||||
constructor Create(AHandle : PBaseIterator);
|
||||
destructor Destroy;override;
|
||||
procedure Reset();
|
||||
function MoveNext() : Boolean;inline;
|
||||
function MovePrevious() : Boolean;inline;
|
||||
function GetCurrent : TRBTreeNodeData;inline;
|
||||
function GetCurrentNode : PRBTreeNode;inline;
|
||||
end;
|
||||
|
||||
public var
|
||||
Root : PRBTreeNode;
|
||||
//FSize : Integer;
|
||||
Allocator : TRBTreeAllocator;
|
||||
Comparator : TRBTreeNodeComparator;
|
||||
private
|
||||
class function TreeCreateIterator() : PBaseIterator;static;inline;
|
||||
class procedure TreeFreeIterator(AItem : PBaseIterator);static;inline;
|
||||
class procedure TreeInitIterator(
|
||||
AIterator : PBaseIterator;
|
||||
const ATree : ThisType;
|
||||
const AStartingNode : PRBTreeNode;
|
||||
const ADirection : Boolean
|
||||
);static;
|
||||
class function TreeIteratorMove(
|
||||
AIterator : PBaseIterator;
|
||||
ADirection : Boolean
|
||||
) : PRBTreeNode;static;
|
||||
class function TreeIteratorMoveNext(AIterator : PBaseIterator) : PRBTreeNode;static;inline;
|
||||
class function TreeIteratorMovePrevious(AIterator : PBaseIterator) : PRBTreeNode;static;inline;
|
||||
function CreateIterator(
|
||||
const ANode : PRBTreeNode;
|
||||
const ADirection : Boolean
|
||||
) : TIterator;inline;
|
||||
private
|
||||
class function DefaultCreateNode(AContext : Pointer) : PRBTreeNode;static;
|
||||
class procedure DefaultFreeNode(ANode : PRBTreeNode; AContext : Pointer);static;
|
||||
|
||||
function InitNode(ANode : PRBTreeNode; AData : TRBTreeNodeData) : PRBTreeNode;inline;
|
||||
function IsRed(ANode : PRBTreeNode): Boolean;inline;
|
||||
function RotateDouble(ARoot : PRBTreeNode; const ADir : Boolean) : PRBTreeNode;inline;
|
||||
function RotateSingle(ARoot : PRBTreeNode; const ADir : Boolean) : PRBTreeNode;
|
||||
public
|
||||
constructor Create(const AAllocator : PRBTreeAllocator);overload;
|
||||
constructor Create();overload;
|
||||
destructor Destroy;override;
|
||||
procedure Clear();
|
||||
function FindNode(const AData : TRBTreeNodeData) : PRBTreeNode;
|
||||
function Insert(const AData : TRBTreeNodeData) : PRBTreeNode;
|
||||
function Remove(const AData : TRBTreeNodeData) : Boolean;
|
||||
function CreateForwardIterator(const ANode : PRBTreeNode) : TIterator;overload;inline;
|
||||
function CreateForwardIterator() : TIterator;overload;inline;
|
||||
function CreateBackwardIterator(const ANode : PRBTreeNode) : TIterator;overload;inline;
|
||||
function CreateBackwardIterator() : TIterator;overload;inline;
|
||||
{$ifdef RB_DEBUG}
|
||||
function SelfAssert(ARoot : PRBTreeNode; var AErrorMessage : string) : Boolean;overload;
|
||||
function SelfAssert(var AErrorMessage : string) : Boolean;overload;
|
||||
{$endif RB_DEBUG}
|
||||
end;
|
||||
|
||||
TOrdinalComparator<T> = class
|
||||
public type
|
||||
TOrdinalType = T;
|
||||
public
|
||||
// Return
|
||||
// * if A>B then 1
|
||||
// * if A=B then 0
|
||||
// * if A<B then -1
|
||||
class function Compare(const A, B : TOrdinalType) : Integer;static;inline;
|
||||
end;
|
||||
|
||||
implementation
|
||||
|
||||
{ TRBTree<T> }
|
||||
|
||||
function TRBTree<T,KCOMP>.IsRed(ANode : PRBTreeNode): Boolean;inline;
|
||||
begin
|
||||
Result := (ANode <> nil) and ANode^.Red;
|
||||
end;
|
||||
|
||||
function TRBTree<T,KCOMP>.InitNode(ANode: PRBTreeNode; AData: TRBTreeNodeData): PRBTreeNode;inline;
|
||||
begin
|
||||
Result := ANode;
|
||||
Result^.Data := AData;
|
||||
Result^.Red := True;
|
||||
Result^.Links[False] := nil;
|
||||
Result^.Links[True] := nil;
|
||||
end;
|
||||
|
||||
function TRBTree<T,KCOMP>.RotateDouble(ARoot: PRBTreeNode; const ADir: Boolean): PRBTreeNode;inline;
|
||||
begin
|
||||
ARoot^.Links[not ADir] := RotateSingle(ARoot^.Links[not ADir], not ADir );
|
||||
Result := RotateSingle(ARoot,ADir);
|
||||
end;
|
||||
|
||||
function TRBTree<T,KCOMP>.RotateSingle(ARoot: PRBTreeNode; const ADir: Boolean): PRBTreeNode;
|
||||
var
|
||||
t : PRBTreeNode;
|
||||
begin
|
||||
t := ARoot^.Links[not ADir];
|
||||
|
||||
ARoot^.Links[not ADir] := t^.Links[ADir];
|
||||
t^.Links[ADir] := ARoot;
|
||||
|
||||
ARoot^.Red := True;
|
||||
t^.Red := False;
|
||||
|
||||
Result := t;
|
||||
end;
|
||||
|
||||
class function TRBTree<T,KCOMP>.TreeCreateIterator() : PBaseIterator;static;
|
||||
begin
|
||||
Result := AllocMem(SizeOf(Result^));
|
||||
end;
|
||||
|
||||
class procedure TRBTree<T,KCOMP>.TreeFreeIterator(AItem : PBaseIterator);static;
|
||||
begin
|
||||
if (AItem <> nil) then
|
||||
FreeMem(AItem,SizeOf(AItem^));
|
||||
end;
|
||||
|
||||
class procedure TRBTree<T,KCOMP>.TreeInitIterator(
|
||||
AIterator : PBaseIterator;
|
||||
const ATree : ThisType;
|
||||
const AStartingNode : PRBTreeNode;
|
||||
const ADirection : Boolean
|
||||
);static;
|
||||
begin
|
||||
AIterator^.Tree := ATree;
|
||||
AIterator^.StartingNode := AStartingNode;
|
||||
AIterator^.StartingDir := ADirection;
|
||||
if (AStartingNode = nil) then
|
||||
AIterator^.Current := AIterator^.Tree.Root
|
||||
else
|
||||
AIterator^.Current := AStartingNode;
|
||||
AIterator^.Top := 0;
|
||||
|
||||
// Save the path for later traversal
|
||||
if (AIterator^.Current <> nil) then begin
|
||||
while (AIterator^.Current^.Links[ADirection] <> nil) do begin
|
||||
AIterator^.Path[AIterator^.Top] := AIterator^.Current;
|
||||
Inc(AIterator^.Top);
|
||||
AIterator^.Current := AIterator^.Current^.Links[ADirection];
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
class function TRBTree<T,KCOMP>.TreeIteratorMove(
|
||||
AIterator : PBaseIterator;
|
||||
ADirection : Boolean
|
||||
) : PRBTreeNode;static;
|
||||
var
|
||||
last : PRBTreeNode;
|
||||
begin
|
||||
Result := nil;
|
||||
if (AIterator^.Current = nil) then
|
||||
exit;
|
||||
|
||||
if (AIterator^.Current^.Links[ADirection] <> nil) then begin
|
||||
// Continue down this branch
|
||||
AIterator^.Path[AIterator^.Top] := AIterator^.Current;
|
||||
Inc(AIterator^.Top);
|
||||
AIterator^.Current := AIterator^.Current^.Links[ADirection];
|
||||
|
||||
while ( AIterator^.Current^.Links[not ADirection] <> nil) do begin
|
||||
AIterator^.Path[AIterator^.Top] := AIterator^.Current;
|
||||
Inc(AIterator^.Top);
|
||||
AIterator^.Current := AIterator^.Current^.Links[not ADirection];
|
||||
end;
|
||||
end else begin
|
||||
// Move to the next branch
|
||||
repeat
|
||||
if (AIterator^.Top = 0) then begin
|
||||
AIterator^.Current := nil;
|
||||
break;
|
||||
end;
|
||||
|
||||
last := AIterator^.Current;
|
||||
Dec(AIterator^.Top);
|
||||
AIterator^.Current := AIterator^.Path[AIterator^.Top];
|
||||
until (last <> AIterator^.Current^.Links[ADirection]);
|
||||
end;
|
||||
|
||||
Result := AIterator^.Current;
|
||||
end;
|
||||
|
||||
class function TRBTree<T,KCOMP>.TreeIteratorMoveNext(
|
||||
AIterator : PBaseIterator
|
||||
) : PRBTreeNode;static;
|
||||
begin
|
||||
Result := TreeIteratorMove(AIterator,True);
|
||||
end;
|
||||
|
||||
class function TRBTree<T,KCOMP>.TreeIteratorMovePrevious(
|
||||
AIterator : PBaseIterator
|
||||
) : PRBTreeNode;static;
|
||||
begin
|
||||
Result := TreeIteratorMove(AIterator,False);
|
||||
end;
|
||||
|
||||
function TRBTree<T,KCOMP>.CreateIterator(
|
||||
const ANode : PRBTreeNode;
|
||||
const ADirection : Boolean
|
||||
) : TIterator;
|
||||
var
|
||||
h : PBaseIterator;
|
||||
begin
|
||||
h := TreeCreateIterator();
|
||||
TreeInitIterator(h,Self,ANode,ADirection);
|
||||
Result := TIterator.Create(h);
|
||||
end;
|
||||
|
||||
class function TRBTree<T,KCOMP>.DefaultCreateNode(AContext: Pointer): PRBTreeNode;
|
||||
begin
|
||||
New(Result);
|
||||
end;
|
||||
|
||||
class procedure TRBTree<T,KCOMP>.DefaultFreeNode(ANode: PRBTreeNode; AContext: Pointer);
|
||||
begin
|
||||
Dispose(ANode);
|
||||
end;
|
||||
|
||||
constructor TRBTree<T,KCOMP>.Create(const AAllocator : PRBTreeAllocator);
|
||||
begin
|
||||
Root := nil;
|
||||
Allocator := AAllocator^;
|
||||
//Comparator := TRBTreeNodeComparator.Create();
|
||||
end;
|
||||
|
||||
constructor TRBTree < T, KCOMP > .Create();
|
||||
var
|
||||
a : TRBTreeAllocator;
|
||||
begin
|
||||
a.CreateNode := TRBTreeNodeCreator(DefaultCreateNode);
|
||||
a.FreeNode := TRBTreeNodeDestructor(DefaultFreeNode);
|
||||
Create(@a);
|
||||
end;
|
||||
|
||||
destructor TRBTree<T,KCOMP>.Destroy;
|
||||
begin
|
||||
Clear();
|
||||
//Comparator.Free();
|
||||
inherited;
|
||||
end;
|
||||
|
||||
procedure TRBTree<T,KCOMP>.Clear();
|
||||
var
|
||||
it, save : PRBTreeNode;
|
||||
begin
|
||||
it := Root;
|
||||
|
||||
while (it <> nil) do begin
|
||||
if (it^.Links[False] <> nil) then begin
|
||||
// Right rotation
|
||||
save := it^.Links[False];
|
||||
it^.Links[False] := save^.Links[True];
|
||||
save^.Links[True] := it;
|
||||
end else begin
|
||||
save := it^.Links[True];
|
||||
Allocator.FreeNode(it,Self);
|
||||
end;
|
||||
it := save;
|
||||
end;
|
||||
end;
|
||||
|
||||
function TRBTree<T,KCOMP>.FindNode(const AData: TRBTreeNodeData): PRBTreeNode;
|
||||
var
|
||||
it : PRBTreeNode;
|
||||
cp : TRBTreeNodeComparator;
|
||||
dir : Boolean;
|
||||
begin
|
||||
Result := nil;
|
||||
it := Root;
|
||||
if (it = nil) then
|
||||
exit;
|
||||
cp := Comparator;
|
||||
while (it <> nil) do begin
|
||||
if (cp.Compare(it^.Data,AData) = 0) then begin
|
||||
Result := it;
|
||||
Break;
|
||||
end;
|
||||
dir := (cp.Compare(it^.Data,AData) < 0);
|
||||
it := it^.Links[dir];
|
||||
end;
|
||||
end;
|
||||
|
||||
function TRBTree<T,KCOMP>.Insert(const AData: TRBTreeNodeData): PRBTreeNode;
|
||||
var
|
||||
head : TRBTreeNode;
|
||||
g, t : PRBTreeNode; // Grandparent & parent
|
||||
p, q : PRBTreeNode; // Iterator & parent
|
||||
dir, last, dir2 : Boolean;
|
||||
cp : TRBTreeNodeComparator;
|
||||
begin
|
||||
if (Root = nil) then begin
|
||||
// Empty tree case
|
||||
Root := InitNode(Allocator.CreateNode(Self),AData);
|
||||
Result := Root;
|
||||
end else begin
|
||||
FillChar(head,SizeOf(head),0); // False tree root
|
||||
|
||||
dir := False;
|
||||
last := False;
|
||||
|
||||
// Set up helpers
|
||||
t := @head;
|
||||
g := nil;
|
||||
p := nil;
|
||||
t^.Links[True] := Root;
|
||||
q := t^.Links[True];
|
||||
|
||||
// Search down the tree
|
||||
cp := Comparator;
|
||||
while True do begin
|
||||
if (q = nil) then begin
|
||||
// Insert new node at the bottom
|
||||
q := InitNode(Allocator.CreateNode(Self),AData);
|
||||
p^.Links[dir] := q;
|
||||
end else if IsRed(q^.Links[False]) and IsRed(q^.Links[True]) then begin
|
||||
// Color flip
|
||||
q^.Red := True;
|
||||
q^.Links[False]^.Red := False;
|
||||
q^.Links[True]^.Red := False;
|
||||
end;
|
||||
|
||||
// Fix red violation
|
||||
if IsRed(q) and IsRed(p) then begin
|
||||
dir2 := (t^.Links[True] = g);
|
||||
if (q = p^.Links[last]) then
|
||||
t^.Links[dir2] := RotateSingle(g, not last)
|
||||
else
|
||||
t^.Links[dir2] := RotateDouble(g, not last );
|
||||
end;
|
||||
|
||||
// Stop if found
|
||||
if (cp.Compare(q^.Data,AData) = 0) then
|
||||
break;
|
||||
|
||||
last := dir;
|
||||
dir := (cp.Compare(q^.Data,AData) < 0);
|
||||
|
||||
// Update helpers
|
||||
if (g <> nil) then
|
||||
t := g;
|
||||
g := p;
|
||||
p := q;
|
||||
q := q^.Links[dir];
|
||||
end;
|
||||
|
||||
// Update root
|
||||
Root := head.Links[True];
|
||||
end;
|
||||
|
||||
// Make root black
|
||||
Root^.Red := False;
|
||||
end;
|
||||
|
||||
function TRBTree<T,KCOMP>.Remove(const AData: TRBTreeNodeData): Boolean;
|
||||
var
|
||||
head : TRBTreeNode;
|
||||
q, p, g, f, s : PRBTreeNode;
|
||||
dir, last, dir2 : Boolean;
|
||||
cp : TRBTreeNodeComparator;
|
||||
begin
|
||||
Result := False;
|
||||
if (Root = nil) then
|
||||
exit;
|
||||
|
||||
FillChar(head,SizeOf(head),0); // False tree root
|
||||
f := nil;
|
||||
dir := True;
|
||||
|
||||
// Set up helpers
|
||||
q := @head;
|
||||
p := nil;
|
||||
g := nil;
|
||||
q^.Links[True] := Root;
|
||||
|
||||
// Search and push a red down
|
||||
cp := Comparator;
|
||||
while (q^.Links[dir] <> nil) do begin
|
||||
last := dir;
|
||||
|
||||
// Update helpers
|
||||
g := p;
|
||||
p := q;
|
||||
q := q^.Links[dir];
|
||||
dir := (cp.Compare(q^.Data,AData) < 0);
|
||||
|
||||
// Save found node
|
||||
if (cp.Compare(q^.Data,AData) = 0) then
|
||||
f := q;
|
||||
|
||||
// Push the red node down
|
||||
if not(IsRed(q)) and not(IsRed(q^.Links[dir])) then begin
|
||||
if IsRed(q^.Links[not dir]) then begin
|
||||
p^.Links[last] := RotateSingle(q,dir);
|
||||
p := p^.Links[last];
|
||||
end else if not IsRed(q^.Links[not dir]) then begin
|
||||
s := p^.Links[not last];
|
||||
|
||||
if (s <> nil) then begin
|
||||
if not(IsRed(s^.Links[not last])) and not(IsRed(s^.Links[last])) then begin
|
||||
// Color flip
|
||||
p^.Red := False;
|
||||
s^.Red := True;
|
||||
q^.Red := True;
|
||||
end else begin
|
||||
dir2 := (g^.Links[True] = p);
|
||||
|
||||
if IsRed(s^.Links[last]) then
|
||||
g^.Links[dir2] := RotateDouble(p,last)
|
||||
else if IsRed(s^.Links[not last]) then
|
||||
g^.Links[dir2] := RotateSingle(p,last);
|
||||
|
||||
// Ensure correct coloring
|
||||
g^.Links[dir2]^.Red := True;
|
||||
q^.Red := g^.Links[dir2]^.Red;
|
||||
g^.Links[dir2]^.Links[False]^.Red := False;
|
||||
g^.Links[dir2]^.Links[True]^.Red := False;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
// Replace and remove if found
|
||||
if (f <> nil) then begin
|
||||
f^.Data := q^.Data;
|
||||
p^.Links[(p^.Links[True] = q)] :=
|
||||
q^.Links[(q^.Links[False] = nil)];
|
||||
Allocator.FreeNode(q,Self);
|
||||
Result := True;
|
||||
end;
|
||||
|
||||
// Update root and make it black
|
||||
Root := head.Links[True];
|
||||
if (Root <> nil) then
|
||||
Root^.Red := False;
|
||||
end;
|
||||
|
||||
function TRBTree<T,KCOMP>.CreateForwardIterator(const ANode : PRBTreeNode) : TIterator;
|
||||
begin
|
||||
Result := CreateIterator(ANode,False);
|
||||
end;
|
||||
|
||||
function TRBTree<T,KCOMP>.CreateForwardIterator() : TIterator;
|
||||
begin
|
||||
Result := CreateForwardIterator(Root);
|
||||
end;
|
||||
|
||||
function TRBTree<T,KCOMP>.CreateBackwardIterator(const ANode : PRBTreeNode) : TIterator;
|
||||
begin
|
||||
Result := CreateIterator(ANode,True);
|
||||
end;
|
||||
|
||||
function TRBTree<T,KCOMP>.CreateBackwardIterator() : TIterator;
|
||||
begin
|
||||
Result := CreateBackwardIterator(Root);
|
||||
end;
|
||||
|
||||
{$ifdef RB_DEBUG}
|
||||
function TRBTree<T,KCOMP>.SelfAssert(ARoot : PRBTreeNode; var AErrorMessage: string): Boolean;
|
||||
var
|
||||
lh, rh : Boolean;
|
||||
ln, rn : PRBTreeNode;
|
||||
e : string;
|
||||
begin
|
||||
AErrorMessage := '';
|
||||
if (ARoot = nil) then begin
|
||||
Result := True;
|
||||
exit;
|
||||
end;
|
||||
|
||||
e := '';
|
||||
ln := ARoot^.Links[False];
|
||||
rn := ARoot^.Links[True];
|
||||
|
||||
// Consecutive red links
|
||||
if IsRed(ARoot) then begin
|
||||
if IsRed(ln) or IsRed(rn) then begin
|
||||
AErrorMessage := 'Red violation';
|
||||
Result := False;
|
||||
exit;
|
||||
end;
|
||||
end;
|
||||
|
||||
lh := SelfAssert(ln,e);
|
||||
AErrorMessage := AErrorMessage + ' ' + e;
|
||||
|
||||
rh := SelfAssert(rn,e);
|
||||
AErrorMessage := AErrorMessage + ' ' + e;
|
||||
|
||||
// Invalid binary search tree
|
||||
if ( ( (ln <> nil) and (Comparator.Compare(ln^.Data,ARoot^.Data) >= 0) ) or
|
||||
( (rn <> nil) and (Comparator.Compare(rn^.Data,ARoot^.Data) <= 0) ) )
|
||||
then begin
|
||||
AErrorMessage := AErrorMessage + ' ' + 'Binary tree violation';
|
||||
Result := False;
|
||||
Exit;
|
||||
end;
|
||||
|
||||
// Black height mismatch
|
||||
if ( lh and rh and (lh <> rh) ) then begin
|
||||
AErrorMessage := AErrorMessage + ' ' + 'Black violation';
|
||||
Result := False;
|
||||
Exit;
|
||||
end;
|
||||
|
||||
Result := lh and rh;
|
||||
end;
|
||||
|
||||
function TRBTree<T,KCOMP>.SelfAssert(var AErrorMessage: string): Boolean;
|
||||
begin
|
||||
Result := Self.SelfAssert(Root, AErrorMessage);
|
||||
end;
|
||||
|
||||
{$endif RB_DEBUG}
|
||||
|
||||
constructor TRBTree<T,KCOMP>.TIterator.Create(AHandle : PBaseIterator);
|
||||
begin
|
||||
inherited Create();
|
||||
FHandle := AHandle;
|
||||
FResetState := True;
|
||||
end;
|
||||
|
||||
destructor TRBTree<T,KCOMP>.TIterator.Destroy();
|
||||
begin
|
||||
TreeFreeIterator(FHandle);
|
||||
inherited Destroy;
|
||||
end;
|
||||
|
||||
function TRBTree<T,KCOMP>.TIterator.MoveNext : Boolean;
|
||||
begin
|
||||
if FResetState then begin
|
||||
FResetState := False;
|
||||
Result := (FHandle^.Current <> nil);
|
||||
exit;
|
||||
end;
|
||||
Result := (TreeIteratorMoveNext(FHandle) <> nil);
|
||||
end;
|
||||
|
||||
function TRBTree<T,KCOMP>.TIterator.MovePrevious : Boolean;
|
||||
begin
|
||||
if FResetState then begin
|
||||
FResetState := False;
|
||||
Result := (FHandle^.Current <> nil);
|
||||
exit;
|
||||
end;
|
||||
Result := (TreeIteratorMovePrevious(FHandle) <> nil);
|
||||
end;
|
||||
|
||||
function TRBTree<T,KCOMP>.TIterator.GetCurrent : TRBTreeNodeData;
|
||||
begin
|
||||
Result := GetCurrentNode()^.Data;
|
||||
end;
|
||||
|
||||
function TRBTree<T,KCOMP>.TIterator.GetCurrentNode : PRBTreeNode;
|
||||
begin
|
||||
Result := FHandle^.Current;
|
||||
end;
|
||||
|
||||
procedure TRBTree<T,KCOMP>.TIterator.Reset();
|
||||
begin
|
||||
FResetState := True;
|
||||
TreeInitIterator(FHandle,FHandle^.Tree,FHandle^.StartingNode,FHandle^.StartingDir)
|
||||
end;
|
||||
|
||||
{ TOrdinalComparator<T> }
|
||||
|
||||
class function TOrdinalComparator<T>.Compare(const A, B: TOrdinalType): Integer;
|
||||
begin
|
||||
if (A = B) then
|
||||
exit(0);
|
||||
if (A > B) then
|
||||
exit(1);
|
||||
exit(-1);
|
||||
end;
|
||||
|
||||
end.
|
||||
|
||||
4036
utils/unicode/helper.pas
Normal file
4036
utils/unicode/helper.pas
Normal file
File diff suppressed because it is too large
Load Diff
17
utils/unicode/parse-collations.bat
Normal file
17
utils/unicode/parse-collations.bat
Normal file
@ -0,0 +1,17 @@
|
||||
cldrparser.exe de.xml -d.\data -o.\data
|
||||
echo
|
||||
cldrparser es.xml -d.\data -o.\data
|
||||
echo
|
||||
cldrparser fr_CA.xml -d.\data -o.\data
|
||||
echo
|
||||
cldrparser ja.xml -d.\data -o.\data
|
||||
echo
|
||||
cldrparser ko.xml -d.\data -o.\data
|
||||
echo
|
||||
cldrparser ru.xml -d.\data -o.\data
|
||||
echo
|
||||
cldrparser sv.xml -d.\data -o.\data
|
||||
echo
|
||||
cldrparser zh.xml -d.\data -o.\data
|
||||
|
||||
pause
|
||||
158
utils/unicode/trie.pas
Normal file
158
utils/unicode/trie.pas
Normal file
@ -0,0 +1,158 @@
|
||||
{ Simple TRIE implementation.
|
||||
|
||||
Copyright (c) 2012 by Inoussa OUEDRAOGO
|
||||
|
||||
The source code is distributed under the Library GNU
|
||||
General Public License with the following modification:
|
||||
|
||||
- object files and libraries linked into an application may be
|
||||
distributed without source code.
|
||||
|
||||
If you didn't receive a copy of the file COPYING, contact:
|
||||
Free Software Foundation
|
||||
675 Mass Ave
|
||||
Cambridge, MA 02139
|
||||
USA
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. }
|
||||
unit trie;
|
||||
|
||||
{$mode objfpc}{$H+}
|
||||
|
||||
interface
|
||||
|
||||
uses
|
||||
SysUtils;
|
||||
|
||||
const
|
||||
MAX_CHILD_COUNT = 256;
|
||||
type
|
||||
TKeyType = Cardinal;
|
||||
TDataType = Integer;
|
||||
PTrieNode = ^TTrieNode;
|
||||
TTrieNode = packed record
|
||||
Key : TKeyType;
|
||||
DataNode : Boolean;
|
||||
Data : TDataType;
|
||||
ChildCount : Byte;
|
||||
Children : array[0..(MAX_CHILD_COUNT-1)] of PTrieNode;
|
||||
end;
|
||||
|
||||
function CreateNode(
|
||||
const AKey : TKeyType;
|
||||
const AData : TDataType
|
||||
) : PTrieNode; overload;
|
||||
function CreateNode(const AKey : TKeyType) : PTrieNode;overload;
|
||||
procedure FreeNode(ANode : PTrieNode);
|
||||
function InsertWord(
|
||||
const ARoot : PTrieNode;
|
||||
const AWord : array of TKeyType;
|
||||
const AValue : TDataType
|
||||
) : Boolean;overload;
|
||||
function InsertWord(
|
||||
const ARoot : PTrieNode;
|
||||
const AWord : TKeyType;
|
||||
const AValue : TDataType
|
||||
) : Boolean;overload;
|
||||
|
||||
implementation
|
||||
|
||||
function CreateNode(
|
||||
const AKey : TKeyType;
|
||||
const AData : TDataType
|
||||
) : PTrieNode;
|
||||
begin
|
||||
New(Result);
|
||||
Result^.Key := AKey;
|
||||
Result^.DataNode := True;
|
||||
Result^.Data := AData;
|
||||
Result^.ChildCount := 0;
|
||||
end;
|
||||
|
||||
function CreateNode(const AKey : TKeyType) : PTrieNode;
|
||||
begin
|
||||
New(Result);
|
||||
Result^.Key := AKey;
|
||||
Result^.DataNode := False;
|
||||
Result^.ChildCount := 0;
|
||||
end;
|
||||
|
||||
procedure FreeNode(ANode : PTrieNode);
|
||||
var
|
||||
p : PTrieNode;
|
||||
i : Integer;
|
||||
begin
|
||||
if (ANode = nil) then
|
||||
exit;
|
||||
p := ANode;
|
||||
for i := 0 to p^.ChildCount - 1 do
|
||||
FreeNode(p^.Children[i]);
|
||||
Dispose(p);
|
||||
end;
|
||||
|
||||
function InsertWord(
|
||||
const ARoot : PTrieNode;
|
||||
const AWord : TKeyType;
|
||||
const AValue : TDataType
|
||||
) : Boolean;
|
||||
begin
|
||||
Result := InsertWord(ARoot,[AWord],AValue);
|
||||
end;
|
||||
|
||||
function InsertWord(
|
||||
const ARoot : PTrieNode;
|
||||
const AWord : array of TKeyType;
|
||||
const AValue : TDataType
|
||||
) : Boolean;
|
||||
var
|
||||
p, q : PTrieNode;
|
||||
i, k, c : Integer;
|
||||
searching : TKeyType;
|
||||
found : Boolean;
|
||||
begin
|
||||
Result := False;
|
||||
if (ARoot^.Key <> AWord[0]) then
|
||||
exit;
|
||||
p := ARoot;
|
||||
q := p;
|
||||
i := 1;
|
||||
c := Length(AWord);
|
||||
while (i < c) do begin
|
||||
searching := AWord[i];
|
||||
found := False;
|
||||
for k := 0 to p^.ChildCount - 1 do begin
|
||||
if (p^.Children[k]^.Key = searching) then begin
|
||||
q := p;
|
||||
p := p^.Children[k];
|
||||
found := True;
|
||||
Break;
|
||||
end;
|
||||
end;
|
||||
if not found then
|
||||
Break;
|
||||
Inc(i);
|
||||
end;
|
||||
if (i < c) then begin
|
||||
if (i = c) then
|
||||
i := i - 1;
|
||||
for i := i to c - 2 do begin
|
||||
k := p^.ChildCount;
|
||||
p^.Children[k] := CreateNode(AWord[i]);
|
||||
p^.ChildCount := k + 1;
|
||||
p := p^.Children[k];
|
||||
end;
|
||||
i := c - 1;
|
||||
k := p^.ChildCount;
|
||||
p^.Children[k] := CreateNode(AWord[i],AValue);
|
||||
p^.ChildCount := k + 1;
|
||||
p := p^.Children[k];
|
||||
Result := True;
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
|
||||
end.
|
||||
|
||||
282
utils/unicode/uca_test.pas
Normal file
282
utils/unicode/uca_test.pas
Normal file
@ -0,0 +1,282 @@
|
||||
{ Unicode Collation Algorithm test routines for generated data.
|
||||
|
||||
Copyright (c) 2012 by Inoussa OUEDRAOGO
|
||||
|
||||
The source code is distributed under the Library GNU
|
||||
General Public License with the following modification:
|
||||
|
||||
- object files and libraries linked into an application may be
|
||||
distributed without source code.
|
||||
|
||||
If you didn't receive a copy of the file COPYING, contact:
|
||||
Free Software Foundation
|
||||
675 Mass Ave
|
||||
Cambridge, MA 02139
|
||||
USA
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
}
|
||||
|
||||
unit uca_test;
|
||||
|
||||
{$mode objfpc}{$H+}
|
||||
|
||||
interface
|
||||
|
||||
uses
|
||||
SysUtils,
|
||||
helper;
|
||||
|
||||
procedure uca_CheckProp_1(
|
||||
ABook : TUCA_DataBook;
|
||||
APropBook : PUCA_PropBook
|
||||
);
|
||||
procedure uca_CheckProp_x(
|
||||
ABook : TUCA_DataBook;
|
||||
APropBook : PUCA_PropBook
|
||||
);
|
||||
|
||||
procedure uca_CheckProp_1y(
|
||||
const ABook : TUCA_DataBook;
|
||||
const APropBook : PUCA_PropBook;
|
||||
const AFirstTable : PucaBmpFirstTable;
|
||||
const ASecondTable : PucaBmpSecondTable
|
||||
);
|
||||
procedure uca_CheckProp_2y(
|
||||
const ABook : TUCA_DataBook;
|
||||
const APropBook : PUCA_PropBook;
|
||||
const AFirstTable : PucaOBmpFirstTable;
|
||||
const ASecondTable : PucaOBmpSecondTable
|
||||
);
|
||||
|
||||
implementation
|
||||
|
||||
function IndexOf(const ACodePoint : Cardinal; APropBook : PUCA_PropBook): Integer;
|
||||
var
|
||||
i : Integer;
|
||||
begin
|
||||
for i := 0 to Length(APropBook^.Index) - 1 do begin
|
||||
if (ACodePoint = APropBook^.Index[i].CodePoint) then
|
||||
exit(i);
|
||||
end;
|
||||
Result := -1;
|
||||
end;
|
||||
|
||||
function CompareWeigth(AExpect : PUCA_LineRec; AActual : PUCA_PropItemRec) : Boolean;
|
||||
var
|
||||
i, k : Integer;
|
||||
p : PUCA_PropWeights;
|
||||
pw : array of TUCA_PropWeights;
|
||||
begin
|
||||
Result := False;
|
||||
if (Length(AExpect^.Weights) <> AActual^.WeightLength) then
|
||||
exit;
|
||||
//p := PUCA_PropWeights(PtrUInt(AActual) + SizeOf(TUCA_PropItemRec));
|
||||
SetLength(pw,AActual^.WeightLength);
|
||||
p := @pw[0];
|
||||
AActual^.GetWeightArray(p);
|
||||
for i := 0 to Length(AExpect^.Weights) - 1 do begin
|
||||
//if (BoolToByte(AExpect^.Weights[i].Variable) <> p^.Variable) then
|
||||
//exit;
|
||||
for k := 0 to 3 - 1 do begin
|
||||
if (AExpect^.Weights[i].Weights[k] <> p^.Weights[k]) then
|
||||
exit;
|
||||
end;
|
||||
Inc(p);
|
||||
end;
|
||||
Result := True;
|
||||
end;
|
||||
|
||||
procedure uca_CheckProp_1(
|
||||
ABook : TUCA_DataBook;
|
||||
APropBook : PUCA_PropBook
|
||||
);
|
||||
var
|
||||
i, c, k : Integer;
|
||||
line : PUCA_LineRec;
|
||||
uc : Cardinal;
|
||||
p : PUCA_PropItemRec;
|
||||
begin
|
||||
WriteLn('uca_CheckProp_1 Start ... ');
|
||||
line := @ABook.Lines[0];
|
||||
c := Length(ABook.Lines);
|
||||
for i := 0 to c - 1 do begin
|
||||
if line^.Stored and (Length(line^.CodePoints) = 1) then begin
|
||||
uc := line^.CodePoints[0];
|
||||
k := IndexOf(uc,APropBook);
|
||||
if (k = -1) then begin
|
||||
WriteLn('Property not found for Code Point : ' + Format('%x',[uc]));
|
||||
end else begin
|
||||
p := PUCA_PropItemRec(PtrUInt(APropBook^.Items)+APropBook^.Index[k].Position);
|
||||
if not CompareWeigth(line,p) then
|
||||
WriteLn('CompareWeigth fail for Code Point : ' + Format('%x',[uc]));
|
||||
end;
|
||||
end;
|
||||
Inc(line);
|
||||
end;
|
||||
WriteLn('uca_CheckProp_1 End');
|
||||
end;
|
||||
|
||||
function FindWord(
|
||||
const AWord : array of Cardinal;
|
||||
const APropBook : PUCA_PropItemRec
|
||||
) : PUCA_PropItemRec;
|
||||
var
|
||||
cc : Cardinal;
|
||||
p : PUCA_PropItemRec;
|
||||
i, k, kc : Integer;
|
||||
ok : Boolean;
|
||||
begin
|
||||
Result := nil;
|
||||
p := APropBook;
|
||||
for i := 1 to Length(AWord) - 1 do begin
|
||||
ok := False;
|
||||
kc := p^.ChildCount - 1;
|
||||
p := PUCA_PropItemRec(PtrUInt(p) + p^.GetSelfOnlySize());
|
||||
if (i > 1) then
|
||||
p := PUCA_PropItemRec(PtrUInt(p) + SizeOf(UInt24));
|
||||
for k := 0 to kc do begin
|
||||
if (AWord[i] = p^.CodePoint) then begin
|
||||
ok := True;
|
||||
Break;
|
||||
end;
|
||||
p := PUCA_PropItemRec(PtrUInt(p) + p^.Size);
|
||||
end;
|
||||
if not ok then
|
||||
exit;
|
||||
end;
|
||||
Result := p;
|
||||
end;
|
||||
|
||||
function DumpCodePoints(const AValues : array of Cardinal) : string;
|
||||
var
|
||||
i : Integer;
|
||||
begin
|
||||
Result := '';
|
||||
for i := 0 to Length(AValues) - 1 do
|
||||
Result := Format('%s %x',[Result,AValues[i]]);
|
||||
Result := Trim(Result);
|
||||
end;
|
||||
|
||||
procedure uca_CheckProp_x(
|
||||
ABook : TUCA_DataBook;
|
||||
APropBook : PUCA_PropBook
|
||||
);
|
||||
var
|
||||
i, c, k : Integer;
|
||||
line : PUCA_LineRec;
|
||||
uc : Cardinal;
|
||||
p, q : PUCA_PropItemRec;
|
||||
begin
|
||||
WriteLn('uca_CheckProp_x Start ... ');
|
||||
line := @ABook.Lines[0];
|
||||
c := Length(ABook.Lines);
|
||||
for i := 0 to c - 1 do begin
|
||||
if line^.Stored and (Length(line^.CodePoints) > 1) then begin
|
||||
//WriteLn(' Code Point sequence : ' + DumpCodePoints(line^.CodePoints));
|
||||
uc := line^.CodePoints[0];
|
||||
k := IndexOf(uc,APropBook);
|
||||
if (k = -1) then begin
|
||||
WriteLn(' Property not found for Code Point : ' + Format('%x',[uc]));
|
||||
end else begin
|
||||
q := PUCA_PropItemRec(PtrUInt(APropBook^.Items)+APropBook^.Index[k].Position);
|
||||
p := FindWord(line^.CodePoints,q);
|
||||
if (p = nil) then
|
||||
WriteLn(' Data not found for Code Point sequence : ' + DumpCodePoints(line^.CodePoints))
|
||||
else if not CompareWeigth(line,p) then
|
||||
WriteLn(' CompareWeigth fail for Code Point sequence : ' + DumpCodePoints(line^.CodePoints));
|
||||
end;
|
||||
end;
|
||||
Inc(line);
|
||||
end;
|
||||
WriteLn('uca_CheckProp_x End');
|
||||
end;
|
||||
|
||||
|
||||
function GetPropPosition(
|
||||
const ABMPCodePoint : Word;
|
||||
const AFirstTable : PucaBmpFirstTable;
|
||||
const ASecondTable : PucaBmpSecondTable
|
||||
) : Integer; inline;overload;
|
||||
begin
|
||||
Result:=
|
||||
ASecondTable^[AFirstTable^[WordRec(ABMPCodePoint).Hi]][WordRec(ABMPCodePoint).Lo] - 1
|
||||
end;
|
||||
|
||||
procedure uca_CheckProp_1y(
|
||||
const ABook : TUCA_DataBook;
|
||||
const APropBook : PUCA_PropBook;
|
||||
const AFirstTable : PucaBmpFirstTable;
|
||||
const ASecondTable : PucaBmpSecondTable
|
||||
);
|
||||
var
|
||||
i, c, k : Integer;
|
||||
line : PUCA_LineRec;
|
||||
uc : Cardinal;
|
||||
p : PUCA_PropItemRec;
|
||||
ucw : Word;
|
||||
begin
|
||||
WriteLn('uca_CheckProp_1y Start (BMP) ... ');
|
||||
line := @ABook.Lines[0];
|
||||
c := Length(ABook.Lines);
|
||||
for i := 0 to c - 1 do begin
|
||||
if line^.Stored and (Length(line^.CodePoints) = 1) then begin
|
||||
uc := line^.CodePoints[0];
|
||||
if (uc <= High(Word)) then begin
|
||||
ucw := uc;
|
||||
k := GetPropPosition(ucw,AFirstTable,ASecondTable);
|
||||
if (k = -1) then begin
|
||||
WriteLn('Property not found for Code Point : ' + Format('%x',[uc]));
|
||||
end else begin
|
||||
p := PUCA_PropItemRec(PtrUInt(APropBook^.Items)+k);
|
||||
if not CompareWeigth(line,p) then
|
||||
WriteLn('CompareWeigth fail for Code Point : ' + Format('%x',[uc]));
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
Inc(line);
|
||||
end;
|
||||
WriteLn('uca_CheckProp_1y End');
|
||||
end;
|
||||
|
||||
procedure uca_CheckProp_2y(
|
||||
const ABook : TUCA_DataBook;
|
||||
const APropBook : PUCA_PropBook;
|
||||
const AFirstTable : PucaOBmpFirstTable;
|
||||
const ASecondTable : PucaOBmpSecondTable
|
||||
);
|
||||
var
|
||||
i, c, k : Integer;
|
||||
line : PUCA_LineRec;
|
||||
uc : Cardinal;
|
||||
p : PUCA_PropItemRec;
|
||||
uchs, ucls : Word;
|
||||
begin
|
||||
WriteLn('uca_CheckProp_2y Start (>BMP) ... ');
|
||||
line := @ABook.Lines[0];
|
||||
c := Length(ABook.Lines);
|
||||
for i := 0 to c - 1 do begin
|
||||
if line^.Stored and (Length(line^.CodePoints) = 1) then begin
|
||||
uc := line^.CodePoints[0];
|
||||
if (uc > High(Word)) then begin
|
||||
FromUCS4(uc,uchs,ucls);
|
||||
k := GetPropPosition(uchs,ucls,AFirstTable,ASecondTable);
|
||||
if (k = -1) then begin
|
||||
WriteLn('Property not found for Code Point : ' + Format('%x',[uc]));
|
||||
end else begin
|
||||
p := PUCA_PropItemRec(PtrUInt(APropBook^.Items)+k);
|
||||
if not CompareWeigth(line,p) then
|
||||
WriteLn('CompareWeigth fail for Code Point : ' + Format('%x',[uc]));
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
Inc(line);
|
||||
end;
|
||||
WriteLn('uca_CheckProp_2y End');
|
||||
end;
|
||||
|
||||
|
||||
end.
|
||||
|
||||
426
utils/unicode/unicodeset.pas
Normal file
426
utils/unicode/unicodeset.pas
Normal file
@ -0,0 +1,426 @@
|
||||
{ UnicodeSet implementation.
|
||||
|
||||
Copyright (c) 2013 by Inoussa OUEDRAOGO
|
||||
|
||||
The source code is distributed under the Library GNU
|
||||
General Public License with the following modification:
|
||||
|
||||
- object files and libraries linked into an application may be
|
||||
distributed without source code.
|
||||
|
||||
If you didn't receive a copy of the file COPYING, contact:
|
||||
Free Software Foundation
|
||||
675 Mass Ave
|
||||
Cambridge, MA 02139
|
||||
USA
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
}
|
||||
unit unicodeset;
|
||||
|
||||
{$mode delphi}{$H+}
|
||||
{$scopedenums on}
|
||||
|
||||
interface
|
||||
|
||||
uses
|
||||
SysUtils,
|
||||
grbtree, helper;
|
||||
|
||||
type
|
||||
|
||||
EUnicodeSetException = class(Exception)
|
||||
end;
|
||||
|
||||
TUnicodeSet = class;
|
||||
|
||||
TPatternParser = class
|
||||
private
|
||||
FBufferStr : UnicodeString;
|
||||
FBuffer : PUnicodeChar;
|
||||
FBufferLength : Integer;
|
||||
FSet : TUnicodeSet;
|
||||
FPosition : Integer;
|
||||
private
|
||||
procedure Error(const AMsg : string; const AArgs : array of const);overload;inline;
|
||||
procedure Error(const AMsg : string);overload;inline;
|
||||
procedure SetBuffer(const APattern : PUnicodeChar; const ALength : Integer);
|
||||
procedure CheckEOF();inline;overload;
|
||||
procedure CheckEOF(ALength : Integer);overload;inline;
|
||||
procedure UnexpectedEOF();inline;
|
||||
function IsThis(AItem : UnicodeString; const APosition : Integer) : Boolean;overload;
|
||||
function IsThis(AItem : UnicodeString) : Boolean;overload;inline;
|
||||
procedure Expect(AItem : UnicodeString; const APosition : Integer);overload;inline;
|
||||
procedure Expect(AItem : UnicodeString);overload;inline;
|
||||
procedure SkipSpaces();inline;
|
||||
function NextChar() : TUnicodeCodePoint;
|
||||
procedure ParseItem();
|
||||
procedure DoParse();
|
||||
public
|
||||
procedure Parse(const APattern : PUnicodeChar; const ALength : Integer);overload;
|
||||
procedure Parse(const APattern : UnicodeString);overload;inline;
|
||||
property CurrentSet : TUnicodeSet read FSet write FSet;
|
||||
end;
|
||||
|
||||
TUnicodeCodePointArrayComparator = class
|
||||
public
|
||||
// Return
|
||||
// * if A>B then 1
|
||||
// * if A=B then 0
|
||||
// * if A<B then -1
|
||||
class function Compare(const A, B : TUnicodeCodePointArray) : Integer;static;inline;
|
||||
end;
|
||||
|
||||
TUnicodeSet = class
|
||||
private type
|
||||
TItem = TUnicodeCodePointArray;
|
||||
TTree = TRBTree<TItem,TUnicodeCodePointArrayComparator>;
|
||||
public type
|
||||
TIterator = TTree.TIterator;
|
||||
private
|
||||
FTree : TTree;
|
||||
FParser : TPatternParser;
|
||||
private
|
||||
procedure CreateParser();inline;
|
||||
public
|
||||
constructor Create();
|
||||
destructor Destroy;override;
|
||||
procedure Add(AChar : TUnicodeCodePoint);inline;overload;
|
||||
procedure Add(AString : TUnicodeCodePointArray);inline;overload;
|
||||
procedure AddRange(const AStart, AEnd : TUnicodeCodePoint);inline;
|
||||
procedure AddPattern(const APattern : UnicodeString);inline;
|
||||
function CreateIterator() : TIterator;
|
||||
function Contains(const AString : array of TUnicodeCodePoint) : Boolean;overload;
|
||||
function Contains(const AChar : TUnicodeCodePoint) : Boolean;inline;overload;
|
||||
function Contains(const AChar : UnicodeChar) : Boolean;inline;overload;
|
||||
function Contains(const AChar : AnsiChar) : Boolean;inline;overload;
|
||||
end;
|
||||
|
||||
resourcestring
|
||||
SInvalidLength = 'Invalid length value : "%d".';
|
||||
SInvalidPosition = 'Invalid position : "%d".';
|
||||
SInvalidRangeLimits = 'Invalid range limits : ["%x" , "%x"].';
|
||||
SExpectedBut = 'Expects "%s" but got "%s..." .';
|
||||
SUnexpectedEOF = 'Unexpected end of file.';
|
||||
|
||||
implementation
|
||||
uses
|
||||
unicodedata;
|
||||
|
||||
function ToArray(const AItem : TUnicodeCodePoint) : TUnicodeCodePointArray;inline;
|
||||
begin
|
||||
SetLength(Result,1);
|
||||
Result[Low(Result)] := AItem;
|
||||
end;
|
||||
|
||||
function CompareItem(const Item1, Item2 : TUnicodeCodePointArray): Integer;
|
||||
var
|
||||
a, b : ^TUnicodeCodePoint;
|
||||
i, ha, hb : Integer;
|
||||
begin
|
||||
if (Pointer(Item1) = Pointer(Item2)) then
|
||||
exit(0);
|
||||
if (Item1 = nil) then
|
||||
exit(-1);
|
||||
if (Item2 = nil) then
|
||||
exit(1);
|
||||
a := @Item1[0];
|
||||
b := @Item2[0];
|
||||
Result := 1;
|
||||
ha := Length(Item1) - 1;
|
||||
hb := Length(Item2) - 1;
|
||||
for i := 0 to ha do begin
|
||||
if (i > hb) then
|
||||
exit;
|
||||
if (a^ < b^) then
|
||||
exit(-1);
|
||||
if (a^ > b^) then
|
||||
exit(1);
|
||||
Inc(a);
|
||||
Inc(b);
|
||||
end;
|
||||
if (ha = hb) then
|
||||
exit(0);
|
||||
exit(-1);
|
||||
end;
|
||||
|
||||
{ TUnicodeCodePointArrayComparator }
|
||||
|
||||
class function TUnicodeCodePointArrayComparator.Compare(const A, B : TUnicodeCodePointArray): Integer;
|
||||
begin
|
||||
Result := CompareItem(A,B);
|
||||
end;
|
||||
|
||||
{ TPatternParser }
|
||||
|
||||
procedure TPatternParser.Error(const AMsg: string; const AArgs: array of const);
|
||||
begin
|
||||
raise EUnicodeSetException.CreateFmt(AMsg,AArgs);
|
||||
end;
|
||||
|
||||
procedure TPatternParser.Error(const AMsg: string);
|
||||
begin
|
||||
raise EUnicodeSetException.Create(AMsg);
|
||||
end;
|
||||
|
||||
procedure TPatternParser.SetBuffer(
|
||||
const APattern : PUnicodeChar;
|
||||
const ALength : Integer
|
||||
);
|
||||
begin
|
||||
FPosition := 0;
|
||||
if (ALength <= 1) then begin
|
||||
FBufferStr := '';
|
||||
FBuffer := nil;
|
||||
FBufferLength := 0;
|
||||
exit;
|
||||
end;
|
||||
FBufferLength := ALength;
|
||||
SetLength(FBufferStr,FBufferLength);
|
||||
FBuffer := @FBufferStr[1];
|
||||
Move(APattern^,FBuffer^,(FBufferLength*SizeOf(FBuffer^)));
|
||||
end;
|
||||
|
||||
procedure TPatternParser.CheckEOF();
|
||||
begin
|
||||
CheckEOF(0);
|
||||
end;
|
||||
|
||||
procedure TPatternParser.CheckEOF(ALength : Integer);
|
||||
begin
|
||||
if (ALength < 0) then
|
||||
Error(SInvalidLength,[ALength]);
|
||||
if ((FPosition+ALength) >= FBufferLength) then
|
||||
UnexpectedEOF();
|
||||
end;
|
||||
|
||||
procedure TPatternParser.UnexpectedEOF();
|
||||
begin
|
||||
Error(SUnexpectedEOF);
|
||||
end;
|
||||
|
||||
function TPatternParser.IsThis(AItem: UnicodeString; const APosition: Integer): Boolean;
|
||||
var
|
||||
i, k, c : Integer;
|
||||
begin
|
||||
if (APosition < 0) then
|
||||
Error(SInvalidPosition,[APosition]);
|
||||
Result := False;
|
||||
c := Length(AItem);
|
||||
if (c = 0) then
|
||||
exit;
|
||||
i := APosition;
|
||||
k := i + c;
|
||||
if (k >= FBufferLength) then
|
||||
exit;
|
||||
if CompareMem(@AItem[1], @FBuffer[APosition],c) then
|
||||
Result := True;
|
||||
end;
|
||||
|
||||
function TPatternParser.IsThis(AItem : UnicodeString) : Boolean;
|
||||
begin
|
||||
Result := IsThis(AItem,FPosition);
|
||||
end;
|
||||
|
||||
procedure TPatternParser.Expect(AItem: UnicodeString; const APosition: Integer);
|
||||
begin
|
||||
if not IsThis(AItem,APosition) then
|
||||
Error(SExpectedBut,[AItem,Copy(FBuffer,APosition,Length(AItem))]);
|
||||
end;
|
||||
|
||||
procedure TPatternParser.Expect(AItem: UnicodeString);
|
||||
begin
|
||||
Expect(AItem,FPosition);
|
||||
end;
|
||||
|
||||
procedure TPatternParser.SkipSpaces();
|
||||
begin
|
||||
while (FPosition < FBufferLength) do begin
|
||||
if (FBuffer[FPosition] <> ' ') then
|
||||
Break;
|
||||
Inc(FPosition);
|
||||
end;
|
||||
end;
|
||||
|
||||
function TPatternParser.NextChar(): TUnicodeCodePoint;
|
||||
var
|
||||
i : Integer;
|
||||
c : UnicodeChar;
|
||||
cp : TUnicodeCodePoint;
|
||||
s : UnicodeString;
|
||||
begin
|
||||
SkipSpaces();
|
||||
CheckEOF();
|
||||
c := FBuffer[FPosition];
|
||||
cp := Ord(c);
|
||||
Inc(FPosition);
|
||||
cp := Ord(c);
|
||||
if (c = '\') and (FPosition < FBufferLength) then begin
|
||||
if IsThis('\') then begin
|
||||
Inc(FPosition);
|
||||
CheckEOF();
|
||||
cp := Ord(FBuffer[FPosition]);
|
||||
Inc(FPosition);
|
||||
end else if IsThis('u') then begin
|
||||
Inc(FPosition);
|
||||
CheckEOF(4);
|
||||
s := Copy(FBufferStr,(FPosition+1),4);
|
||||
Inc(FPosition,4);
|
||||
if not TryStrToInt('$'+s,i) then
|
||||
Error(SExpectedBut,['\uXXXX',s]);
|
||||
cp := i;
|
||||
end;
|
||||
end;
|
||||
if (cp <= MAX_WORD) and UnicodeIsLowSurrogate(UnicodeChar(Word(cp))) then begin
|
||||
SkipSpaces();
|
||||
CheckEOF();
|
||||
c := UnicodeChar(Word(cp));
|
||||
if UnicodeIsSurrogatePair(c,FBuffer[FPosition]) then begin
|
||||
cp := ToUCS4(c,FBuffer[FPosition]);
|
||||
Inc(FPosition);
|
||||
end;
|
||||
end;
|
||||
Result := cp;
|
||||
end;
|
||||
|
||||
function CompareTo(const A : TUnicodeCodePoint; const B : UnicodeChar) : Boolean;inline;
|
||||
begin
|
||||
Result := (A = Ord(B));
|
||||
end;
|
||||
|
||||
procedure TPatternParser.ParseItem();
|
||||
var
|
||||
cp, lastCp : TUnicodeCodePoint;
|
||||
charCount, i : Integer;
|
||||
begin
|
||||
SkipSpaces();
|
||||
Expect('[');
|
||||
charCount := 0;
|
||||
Inc(FPosition);
|
||||
while (FPosition < FBufferLength) do begin
|
||||
lastCp := cp;
|
||||
cp := NextChar();
|
||||
if CompareTo(cp,']') then
|
||||
Break;
|
||||
if CompareTo(cp,'-') then begin
|
||||
if (charCount = 0) then
|
||||
Error(SExpectedBut,['<char>','-']);
|
||||
cp := NextChar();
|
||||
FSet.AddRange(lastCp,cp);
|
||||
end else begin
|
||||
FSet.Add(cp);
|
||||
end;
|
||||
Inc(charCount);
|
||||
end;
|
||||
end;
|
||||
|
||||
procedure TPatternParser.DoParse();
|
||||
begin
|
||||
SkipSpaces();
|
||||
while (FPosition < FBufferLength) do begin
|
||||
ParseItem();
|
||||
SkipSpaces();
|
||||
end;
|
||||
end;
|
||||
|
||||
procedure TPatternParser.Parse(const APattern: PUnicodeChar; const ALength: Integer);
|
||||
begin
|
||||
if (ALength < 2) then
|
||||
exit;
|
||||
SetBuffer(APattern,ALength);
|
||||
DoParse();
|
||||
end;
|
||||
|
||||
procedure TPatternParser.Parse(const APattern : UnicodeString);
|
||||
begin
|
||||
Parse(@APattern[1],Length(APattern));
|
||||
end;
|
||||
|
||||
{ TUnicodeSet }
|
||||
|
||||
procedure TUnicodeSet.CreateParser();
|
||||
begin
|
||||
if (FParser = nil) then begin
|
||||
FParser := TPatternParser.Create();
|
||||
FParser.CurrentSet := Self;
|
||||
end;
|
||||
end;
|
||||
|
||||
constructor TUnicodeSet.Create;
|
||||
begin
|
||||
FTree := TTree.Create();
|
||||
end;
|
||||
|
||||
destructor TUnicodeSet.Destroy;
|
||||
begin
|
||||
FParser.Free();
|
||||
FTree.Free();
|
||||
inherited Destroy;
|
||||
end;
|
||||
|
||||
procedure TUnicodeSet.Add(AChar: TUnicodeCodePoint);
|
||||
begin
|
||||
FTree.Insert(ToArray(AChar));
|
||||
end;
|
||||
|
||||
procedure TUnicodeSet.Add(AString: TUnicodeCodePointArray);
|
||||
begin
|
||||
if (AString <> nil) then
|
||||
FTree.Insert(AString);
|
||||
end;
|
||||
|
||||
procedure TUnicodeSet.AddRange(const AStart, AEnd : TUnicodeCodePoint);
|
||||
var
|
||||
i : Integer;
|
||||
begin
|
||||
if (AStart > AEnd) then
|
||||
raise EUnicodeSetException.CreateFmt(SInvalidRangeLimits,[AStart,AEnd]);
|
||||
for i := AStart to AEnd do
|
||||
Add(i);
|
||||
end;
|
||||
|
||||
procedure TUnicodeSet.AddPattern(const APattern : UnicodeString);
|
||||
begin
|
||||
CreateParser();
|
||||
FParser.Parse(APattern);
|
||||
end;
|
||||
|
||||
function TUnicodeSet.CreateIterator() : TIterator;
|
||||
begin
|
||||
Result := FTree.CreateForwardIterator();
|
||||
end;
|
||||
|
||||
function TUnicodeSet.Contains(const AString : array of TUnicodeCodePoint) : Boolean;
|
||||
var
|
||||
c : Integer;
|
||||
x : TUnicodeCodePointArray;
|
||||
begin
|
||||
Result := False;
|
||||
c := Length(AString);
|
||||
if (c = 0) then
|
||||
exit;
|
||||
SetLength(x,c);
|
||||
Move(AString[Low(AString)],x[Low(x)],(c*SizeOf(x[0])));
|
||||
if (FTree.FindNode(x) <> nil) then
|
||||
Result := True;
|
||||
end;
|
||||
|
||||
function TUnicodeSet.Contains(const AChar : TUnicodeCodePoint) : Boolean;
|
||||
begin
|
||||
Result := Contains([AChar]);
|
||||
end;
|
||||
|
||||
function TUnicodeSet.Contains(const AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := Contains(TUnicodeCodePoint(Ord(AChar)));
|
||||
end;
|
||||
|
||||
function TUnicodeSet.Contains(const AChar : AnsiChar) : Boolean;
|
||||
begin
|
||||
Result := Contains(TUnicodeCodePoint(Ord(AChar)));
|
||||
end;
|
||||
|
||||
end.
|
||||
|
||||
74
utils/unicode/unihelper.lpi
Normal file
74
utils/unicode/unihelper.lpi
Normal file
@ -0,0 +1,74 @@
|
||||
<?xml version="1.0"?>
|
||||
<CONFIG>
|
||||
<ProjectOptions>
|
||||
<Version Value="9"/>
|
||||
<PathDelim Value="\"/>
|
||||
<General>
|
||||
<Flags>
|
||||
<MainUnitHasCreateFormStatements Value="False"/>
|
||||
<MainUnitHasTitleStatement Value="False"/>
|
||||
</Flags>
|
||||
<SessionStorage Value="InProjectDir"/>
|
||||
<MainUnit Value="0"/>
|
||||
<Title Value="unihelper"/>
|
||||
<UseAppBundle Value="False"/>
|
||||
<ResourceType Value="res"/>
|
||||
</General>
|
||||
<i18n>
|
||||
<EnableI18N LFM="False"/>
|
||||
</i18n>
|
||||
<VersionInfo>
|
||||
<StringTable ProductVersion=""/>
|
||||
</VersionInfo>
|
||||
<BuildModes Count="1">
|
||||
<Item1 Name="Default" Default="True"/>
|
||||
</BuildModes>
|
||||
<PublishOptions>
|
||||
<Version Value="2"/>
|
||||
<IncludeFileFilter Value="*.(pas|pp|inc|lfm|lpr|lrs|lpi|lpk|sh|xml)"/>
|
||||
<ExcludeFileFilter Value="*.(bak|ppu|o|so);*~;backup"/>
|
||||
</PublishOptions>
|
||||
<RunParams>
|
||||
<local>
|
||||
<FormatVersion Value="1"/>
|
||||
</local>
|
||||
</RunParams>
|
||||
<Units Count="1">
|
||||
<Unit0>
|
||||
<Filename Value="unihelper.lpr"/>
|
||||
<IsPartOfProject Value="True"/>
|
||||
</Unit0>
|
||||
</Units>
|
||||
</ProjectOptions>
|
||||
<CompilerOptions>
|
||||
<Version Value="11"/>
|
||||
<PathDelim Value="\"/>
|
||||
<Target>
|
||||
<Filename Value="unihelper"/>
|
||||
</Target>
|
||||
<SearchPaths>
|
||||
<IncludeFiles Value="$(ProjOutDir)"/>
|
||||
<OtherUnitFiles Value="..\trie"/>
|
||||
<UnitOutputDirectory Value="lib\$(TargetCPU)-$(TargetOS)"/>
|
||||
</SearchPaths>
|
||||
<Other>
|
||||
<CompilerMessages>
|
||||
<MsgFileName Value=""/>
|
||||
</CompilerMessages>
|
||||
<CompilerPath Value="$(CompPath)"/>
|
||||
</Other>
|
||||
</CompilerOptions>
|
||||
<Debugging>
|
||||
<Exceptions Count="3">
|
||||
<Item1>
|
||||
<Name Value="EAbort"/>
|
||||
</Item1>
|
||||
<Item2>
|
||||
<Name Value="ECodetoolError"/>
|
||||
</Item2>
|
||||
<Item3>
|
||||
<Name Value="EFOpenError"/>
|
||||
</Item3>
|
||||
</Exceptions>
|
||||
</Debugging>
|
||||
</CONFIG>
|
||||
386
utils/unicode/unihelper.lpr
Normal file
386
utils/unicode/unihelper.lpr
Normal file
@ -0,0 +1,386 @@
|
||||
{ Unicode tables parser.
|
||||
|
||||
Copyright (c) 2012 by Inoussa OUEDRAOGO
|
||||
|
||||
The source code is distributed under the Library GNU
|
||||
General Public License with the following modification:
|
||||
|
||||
- object files and libraries linked into an application may be
|
||||
distributed without source code.
|
||||
|
||||
If you didn't receive a copy of the file COPYING, contact:
|
||||
Free Software Foundation
|
||||
675 Mass Ave
|
||||
Cambridge, MA 02139
|
||||
USA
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. }
|
||||
|
||||
{ This program generates tables as include-files for use
|
||||
with the unicode related sources. It expects the following
|
||||
unicode.org's files to be present in the same folder :
|
||||
* HangulSyllableType.txt
|
||||
* PropList.txt
|
||||
* UnicodeData.txt
|
||||
* allkeys.txt
|
||||
}
|
||||
|
||||
{$DEFINE UCA_TEST}
|
||||
program unihelper;
|
||||
|
||||
{$mode objfpc}{$H+}
|
||||
|
||||
uses
|
||||
SysUtils, Classes,
|
||||
helper, uca_test;
|
||||
|
||||
const
|
||||
SUsage =
|
||||
'This program generates tables as include-files for use ' + sLineBreak +
|
||||
' with the unicode related sources. It expects the following ' + sLineBreak +
|
||||
' unicode.org''s files to be present in the same folder : ' + sLineBreak +
|
||||
' * HangulSyllableType.txt ' + sLineBreak +
|
||||
' * PropList.txt ' + sLineBreak +
|
||||
' * UnicodeData.txt ' + sLineBreak +
|
||||
' * allkeys.txt : Note that this file is the one provided for the CLDR root.' + sLineBreak +
|
||||
'' + sLineBreak +
|
||||
'Usage : unihelper [<dataDir> <outputDir>] ' + sLineBreak +
|
||||
' where ' + sLineBreak +
|
||||
' dataDir : the directory where are stored the unicode files. The default' + sLineBreak +
|
||||
' value is the program''s directory.' + sLineBreak +
|
||||
' outputDir : The directory where the generated files will be stored. The' + sLineBreak +
|
||||
' default value is the program''s directory.'+sLineBreak;
|
||||
|
||||
function DumpCodePoint(ACodePoint : TCodePointRec) : string;
|
||||
begin
|
||||
Result := '';
|
||||
if (ACodePoint.LineType = 0) then
|
||||
WriteStr(Result,IntToHex(ACodePoint.CodePoint,4))
|
||||
else
|
||||
WriteStr(Result,IntToHex(ACodePoint.StartCodePoint,4),'..',IntToHex(ACodePoint.EndCodePoint,4));
|
||||
end;
|
||||
|
||||
var
|
||||
dataPath, outputPath : string;
|
||||
stream, binStream, binStream2 : TMemoryStream;
|
||||
hangulSyllables : TCodePointRecArray;
|
||||
ucaBook : TUCA_DataBook;
|
||||
ucaPropBook : PUCA_PropBook;
|
||||
propList : TPropListLineRecArray;
|
||||
whiteSpaceCodePoints : TCodePointRecArray;
|
||||
props : TPropRecArray;
|
||||
numericTable : TNumericValueArray;
|
||||
decomposition : TDecompositionArray;
|
||||
decompositionBook : TDecompositionBook;
|
||||
data : TDataLineRecArray;
|
||||
//----------------
|
||||
lvl3table1 : T3lvlBmp1Table;
|
||||
lvl3table2 : T3lvlBmp2Table;
|
||||
lvl3table3 : T3lvlBmp3Table;
|
||||
//----------------
|
||||
s : ansistring;
|
||||
i, k, h : Integer;
|
||||
p : PDataLineRec;
|
||||
r : TDataLineRecArray;
|
||||
olvl3table1 : T3lvlOBmp1Table;
|
||||
olvl3table2 : T3lvlOBmp2Table;
|
||||
olvl3table3 : T3lvlOBmp3Table;
|
||||
//----------------
|
||||
hs, ls : Word;
|
||||
ucaFirstTable : TucaBmpFirstTable;
|
||||
ucaSecondTable : TucaBmpSecondTable;
|
||||
ucaoFirstTable : TucaoBmpFirstTable;
|
||||
ucaoSecondTable : TucaOBmpSecondTable;
|
||||
WL : Integer;
|
||||
begin
|
||||
WriteLn(SUsage+sLineBreak);
|
||||
if (ParamCount > 0) then
|
||||
dataPath := IncludeTrailingPathDelimiter(ParamStr(1))
|
||||
else
|
||||
dataPath := ExtractFilePath(ParamStr(0));
|
||||
if (ParamCount > 1) then
|
||||
outputPath := IncludeTrailingPathDelimiter(ParamStr(2))
|
||||
else
|
||||
outputPath := dataPath;
|
||||
if not DirectoryExists(outputPath) then begin
|
||||
WriteLn('Directory not found : ',outputPath);
|
||||
if ForceDirectories(outputPath) then begin
|
||||
WriteLn(' directory created successfully');
|
||||
end else begin
|
||||
WriteLn(' fail to create directory.');
|
||||
Halt(1);
|
||||
end;
|
||||
end;
|
||||
if not(
|
||||
FileExists(dataPath + 'HangulSyllableType.txt') and
|
||||
FileExists(dataPath + 'PropList.txt') and
|
||||
FileExists(dataPath + 'UnicodeData.txt') and
|
||||
FileExists(dataPath + 'allkeys.txt')
|
||||
)
|
||||
then begin
|
||||
WriteLn('File(s) not found : HangulSyllableType.txt or PropList.txt or UnicodeData.txt or allkeys.txt .');
|
||||
Halt(1);
|
||||
end;
|
||||
|
||||
binStream2 := nil;
|
||||
binStream := nil;
|
||||
stream := TMemoryStream.Create();
|
||||
try
|
||||
binStream := TMemoryStream.Create();
|
||||
binStream2 := TMemoryStream.Create();
|
||||
WriteLn('Load file HangulSyllableType.txt ...', DateTimeToStr(Now));
|
||||
stream.LoadFromFile(dataPath + 'HangulSyllableType.txt');
|
||||
stream.Position := 0;
|
||||
hangulSyllables := nil;
|
||||
ParseHangulSyllableTypes(stream,hangulSyllables);
|
||||
stream.Clear();
|
||||
|
||||
WriteLn('Load file PropList.txt ...', DateTimeToStr(Now));
|
||||
stream.LoadFromFile(dataPath + 'PropList.txt');
|
||||
stream.Position := 0;
|
||||
propList := nil;
|
||||
ParseProps(stream,propList);
|
||||
stream.Clear();
|
||||
whiteSpaceCodePoints := FindCodePointsByProperty('White_Space',propList);
|
||||
writeln(' PropList Length = ',Length(propList));
|
||||
writeln(' White_Space Length = ',Length(whiteSpaceCodePoints));
|
||||
for i := Low(whiteSpaceCodePoints) to High(whiteSpaceCodePoints) do
|
||||
WriteLn(' ',DumpCodePoint(whiteSpaceCodePoints[i]):12,' , IsWhiteSpace = ',IsWhiteSpace(whiteSpaceCodePoints[i].CodePoint,whiteSpaceCodePoints));
|
||||
|
||||
WriteLn('Load file UnicodeData.txt ...', DateTimeToStr(Now));
|
||||
stream.LoadFromFile(dataPath + 'UnicodeData.txt');
|
||||
stream.Position := 0;
|
||||
WriteLn('Parse file ...', DateTimeToStr(Now));
|
||||
data := nil;
|
||||
props := nil;
|
||||
Parse_UnicodeData(stream,props,numericTable,data,decomposition,hangulSyllables,whiteSpaceCodePoints);
|
||||
WriteLn('Decomposition building ...');
|
||||
MakeDecomposition(decomposition,decompositionBook);
|
||||
|
||||
WriteLn('Load file UCA allkeys.txt ...', DateTimeToStr(Now));
|
||||
stream.LoadFromFile(dataPath + 'allkeys.txt');
|
||||
stream.Position := 0;
|
||||
ParseUCAFile(stream,ucaBook);
|
||||
{ $IFDEF UCA_TEST}
|
||||
k := 0; WL := 0; ;
|
||||
for i := 0 to Length(ucaBook.Lines) - 1 do begin
|
||||
h := GetPropID(ucaBook.Lines[i].CodePoints[0],data);
|
||||
if (h <> -1) and
|
||||
({props[h].HangulSyllable or} (props[h].DecompositionID <> -1))
|
||||
then begin
|
||||
Inc(k);
|
||||
ucaBook.Lines[i].Stored := False;
|
||||
end else begin
|
||||
ucaBook.Lines[i].Stored := True;
|
||||
if Length(ucaBook.Lines[i].Weights) > WL then
|
||||
WL := Length(ucaBook.Lines[i].Weights);
|
||||
end;
|
||||
end;
|
||||
WriteLn(
|
||||
'UCA, Version = ',ucaBook.Version,'; entries count = ',Length(ucaBook.Lines),' ; Hangul # = ',k,
|
||||
'Max Weights Length = ',WL
|
||||
);
|
||||
{ $ENDIF UCA_TEST}
|
||||
WriteLn('Construct UCA Property Book ...');
|
||||
ucaPropBook := nil;
|
||||
MakeUCA_Props(@ucaBook,ucaPropBook);
|
||||
{$IFDEF UCA_TEST}
|
||||
uca_CheckProp_1(ucaBook,ucaPropBook);
|
||||
uca_CheckProp_x(ucaBook,ucaPropBook);
|
||||
{$ENDIF UCA_TEST}
|
||||
WriteLn('Construct UCA BMP tables ...');
|
||||
MakeUCA_BmpTables(ucaFirstTable,ucaSecondTable,ucaPropBook);
|
||||
WriteLn(' UCA BMP Second Table Length = ',Length(ucaSecondTable));
|
||||
{$IFDEF UCA_TEST}
|
||||
uca_CheckProp_1y(ucaBook,ucaPropBook,@ucaFirstTable,@ucaSecondTable);
|
||||
{$ENDIF UCA_TEST}
|
||||
|
||||
WriteLn('Construct UCA OBMP tables ...');
|
||||
MakeUCA_OBmpTables(ucaoFirstTable,ucaoSecondTable,ucaPropBook);
|
||||
WriteLn(' UCA OBMP Second Table Length = ',Length(ucaoSecondTable));
|
||||
{$IFDEF UCA_TEST}
|
||||
uca_CheckProp_2y(ucaBook,ucaPropBook,@ucaoFirstTable,@ucaoSecondTable);
|
||||
{$ENDIF UCA_TEST}
|
||||
WriteLn('Generate UCA Props tables ...');
|
||||
binStream.Clear();
|
||||
GenerateLicenceText(binStream);
|
||||
GenerateUCA_PropTable(binStream,ucaPropBook);
|
||||
WriteLn('Generate UCA BMP tables ...');
|
||||
stream.Clear();
|
||||
GenerateLicenceText(stream);
|
||||
GenerateUCA_Head(stream,@ucaBook,ucaPropBook);
|
||||
GenerateUCA_BmpTables(stream,binStream,ucaFirstTable,ucaSecondTable,THIS_ENDIAN);
|
||||
WriteLn('Generate UCA OBMP tables ...');
|
||||
GenerateUCA_OBmpTables(stream,binStream,ucaoFirstTable,ucaoSecondTable,THIS_ENDIAN);
|
||||
stream.SaveToFile(outputPath + 'ucadata.inc');
|
||||
s := outputPath + 'ucadata.inc';
|
||||
s := GenerateEndianIncludeFileName(s);
|
||||
binStream.SaveToFile(s);
|
||||
binStream.Clear();
|
||||
|
||||
|
||||
stream.Clear();
|
||||
GenerateLicenceText(stream);
|
||||
WriteLn('File parsed ...', DateTimeToStr(Now));
|
||||
WriteLn(' Props Len = ',Length(props));
|
||||
WriteLn(' Data Len = ',Length(data));
|
||||
|
||||
{WriteLn('BMP Tables building ...', DateTimeToStr(Now));
|
||||
MakeBmpTables(firstTable,secondTable,props,data);
|
||||
WriteLn(' First Table length = ',Length(firstTable));
|
||||
WriteLn(' Second Table length = ',Length(secondTable));}
|
||||
|
||||
WriteLn('BMP Tables building ...', DateTimeToStr(Now));
|
||||
MakeBmpTables3Levels(lvl3table1,lvl3table2,lvl3table3,data);
|
||||
WriteLn(' 3 Levels Tables :');
|
||||
WriteLn(' Len 1 = ',Length(lvl3table1));
|
||||
WriteLn(' Len 2 = ',Length(lvl3table2));
|
||||
WriteLn(' Len 3 = ',Length(lvl3table3));
|
||||
for i := 0 to 255 do begin
|
||||
for k := 0 to 15 do begin
|
||||
for h := 0 to 15 do begin
|
||||
if lvl3table3[lvl3table2[lvl3table1[i]][k]][h] <>
|
||||
GetPropID(256*i + 16*k +h,data)
|
||||
then begin
|
||||
writeln('3 levels errors, i=',i,'; k=',k,'; h=',h);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
binStream2.Clear();
|
||||
WriteLn('Source generation ...', DateTimeToStr(Now));
|
||||
WriteLn('BMP Tables sources ...', DateTimeToStr(Now));
|
||||
Generate3lvlBmpTables(stream,lvl3table1,lvl3table2,lvl3table3);
|
||||
WriteLn('Properties Table sources ...', DateTimeToStr(Now));
|
||||
binStream.Clear();
|
||||
GenerateNumericTable(binStream,numericTable,True);
|
||||
binStream.SaveToFile(outputPath + 'unicodenumtable.pas');
|
||||
binStream.Clear();
|
||||
GeneratePropTable(binStream,props,ekLittle);
|
||||
GeneratePropTable(binStream2,props,ekBig);
|
||||
//-------------------------------------------
|
||||
|
||||
r := Compress(data);
|
||||
|
||||
//-------------------
|
||||
WriteLn('OBMP Tables building ...', DateTimeToStr(Now));
|
||||
MakeOBmpTables3Levels(olvl3table1,olvl3table2,olvl3table3,r);
|
||||
WriteLn(' 3 Levels Tables :');
|
||||
WriteLn(' Len 1 = ',Length(olvl3table1));
|
||||
WriteLn(' Len 2 = ',Length(olvl3table2));
|
||||
WriteLn(' Len 3 = ',Length(olvl3table3));
|
||||
for i := 0 to 1023 do begin
|
||||
for k := 0 to 31 do begin
|
||||
for h := 0 to 31 do begin
|
||||
if olvl3table3[olvl3table2[olvl3table1[i]][k]][h] <>
|
||||
GetPropID(ToUCS4(HIGH_SURROGATE_BEGIN + i,LOW_SURROGATE_BEGIN + (k*32) + h),data)
|
||||
then begin
|
||||
writeln('3, OBMP levels errors, i=',i,'; k=',k,'; h=',h);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
WriteLn('OBMP Tables sources ...', DateTimeToStr(Now));
|
||||
Generate3lvlOBmpTables(stream,olvl3table1,olvl3table2,olvl3table3);
|
||||
|
||||
//---------------------
|
||||
WriteLn('Decomposition Table sources ...', DateTimeToStr(Now));
|
||||
GenerateDecompositionBookTable(binStream,decompositionBook,ekLittle);
|
||||
GenerateDecompositionBookTable(binStream2,decompositionBook,ekBig);
|
||||
stream.SaveToFile(outputPath + 'unicodedata.inc');
|
||||
binStream.SaveToFile(outputPath + 'unicodedata_le.inc');
|
||||
binStream2.SaveToFile(outputPath + 'unicodedata_be.inc');
|
||||
binStream.Clear();
|
||||
binStream2.Clear();
|
||||
|
||||
|
||||
h := -1;
|
||||
for i := Low(data) to High(data) do
|
||||
if (data[i].CodePoint > $FFFF) then begin
|
||||
h := i;
|
||||
Break;
|
||||
end;
|
||||
stream.Clear();
|
||||
for i := h to High(data) do begin
|
||||
p := @data[i];
|
||||
if (p^.LineType = 0) then begin
|
||||
FromUCS4(p^.CodePoint,hs,ls);
|
||||
//k := GetProp(hs,ls,props,ofirstTable,osecondTable)^.PropID;
|
||||
k := GetProp(
|
||||
(hs-HIGH_SURROGATE_BEGIN),(ls-LOW_SURROGATE_BEGIN),
|
||||
props,olvl3table1,olvl3table2,olvl3table3
|
||||
)^.PropID;
|
||||
if (p^.PropID <> k) then begin
|
||||
s := Format('#%d-%d #%d',[p^.CodePoint,p^.PropID,k]) + sLineBreak;
|
||||
stream.Write(s[1],Length(s));
|
||||
end;
|
||||
end else begin
|
||||
for h := p^.StartCodePoint to p^.EndCodePoint do begin
|
||||
FromUCS4(h,hs,ls);
|
||||
//k := GetProp(hs,ls,props,ofirstTable,osecondTable)^.PropID;
|
||||
k := GetProp(
|
||||
(hs-HIGH_SURROGATE_BEGIN),(ls-LOW_SURROGATE_BEGIN),
|
||||
props,olvl3table1,olvl3table2,olvl3table3
|
||||
)^.PropID;
|
||||
if (p^.PropID <> k) then begin
|
||||
s := Format('##%d;%d-%d #%d',[p^.StartCodePoint,p^.EndCodePoint,p^.PropID,k]) + sLineBreak;
|
||||
stream.Write(s[1],Length(s));
|
||||
Break
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
stream.SaveToFile(outputPath + 'diff-obmp.txt');
|
||||
|
||||
stream.Clear();
|
||||
for i := Low(data) to High(data) do begin
|
||||
p := @data[i];
|
||||
if (p^.LineType = 0) then begin
|
||||
k := GetPropID(p^.CodePoint,r);
|
||||
if (p^.PropID <> k) then begin
|
||||
s := Format('#%d-%d #%d',[p^.CodePoint,p^.PropID,k]) + sLineBreak;
|
||||
stream.Write(s[1],Length(s));
|
||||
end;
|
||||
end else begin
|
||||
for h := p^.StartCodePoint to p^.EndCodePoint do begin
|
||||
k := GetPropID(h,r);
|
||||
if (p^.PropID <> k) then begin
|
||||
s := Format('##%d;%d-%d #%d',[p^.StartCodePoint,p^.EndCodePoint,p^.PropID,k]) + sLineBreak;
|
||||
stream.Write(s[1],Length(s));
|
||||
Break
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
stream.SaveToFile(outputPath + 'diff.txt');
|
||||
stream.Clear();
|
||||
for i := Low(r) to High(r) do begin
|
||||
p := @r[i];
|
||||
if (p^.LineType = 0) then begin
|
||||
k := GetPropID(p^.CodePoint,data);
|
||||
if (p^.PropID <> k) then begin
|
||||
s := Format('#%d-%d #%d',[p^.CodePoint,p^.PropID,k]) + sLineBreak;
|
||||
stream.Write(s[1],Length(s));
|
||||
end;
|
||||
end else begin
|
||||
for h := p^.StartCodePoint to p^.EndCodePoint do begin
|
||||
k := GetPropID(h,r);
|
||||
if (p^.PropID <> k) then begin
|
||||
s := Format('##%d;%d-%d #%d',[p^.StartCodePoint,p^.EndCodePoint,p^.PropID,k]) + sLineBreak;
|
||||
stream.Write(s[1],Length(s));
|
||||
Break
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
stream.SaveToFile(outputPath + 'diff2.txt');
|
||||
finally
|
||||
binStream2.Free();
|
||||
binStream.Free();
|
||||
stream.Free();
|
||||
end;
|
||||
end.
|
||||
|
||||
66
utils/unicode/weight_derivation.inc
Normal file
66
utils/unicode/weight_derivation.inc
Normal file
@ -0,0 +1,66 @@
|
||||
|
||||
function IsCJK_Unified_Ideographs(ACodePoint : Cardinal) : Boolean;inline;
|
||||
begin
|
||||
Result := (ACodePoint >= $4E00) and (ACodePoint <= $9FCC); // $9FFF
|
||||
end;
|
||||
|
||||
function IsCJK_Compatibility_Ideographs(ACodePoint : Cardinal) : Boolean;inline;
|
||||
begin
|
||||
Result := (ACodePoint >= $F900) and (ACodePoint <= $FAFF);
|
||||
end;
|
||||
|
||||
function IsCJK_Unified_Ideographs_Extension_A(ACodePoint : Cardinal) : Boolean;inline;
|
||||
begin
|
||||
Result := (ACodePoint >= $3400) and (ACodePoint <= $4DB5); // $4DBF
|
||||
end;
|
||||
|
||||
function IsCJK_Unified_Ideographs_Extension_B(ACodePoint : Cardinal) : Boolean;inline;
|
||||
begin
|
||||
Result := (ACodePoint >= $20000) and (ACodePoint <= $2A6D6); // $2A6DF
|
||||
end;
|
||||
|
||||
function IsCJK_Unified_Ideographs_Extension_C(ACodePoint : Cardinal) : Boolean;inline;
|
||||
begin
|
||||
Result := (ACodePoint >= $2A700) and (ACodePoint <= $2B734); // $2B73F
|
||||
end;
|
||||
|
||||
function IsCJK_Unified_Ideographs_Extension_D(ACodePoint : Cardinal) : Boolean;inline;
|
||||
begin
|
||||
Result := (ACodePoint >= $2B740) and (ACodePoint <= $2B81D); // $2B81F
|
||||
end;
|
||||
|
||||
function IsCJK_Compatibility_Ideographs_Supplement(ACodePoint : Cardinal) : Boolean;inline;
|
||||
begin
|
||||
Result := (ACodePoint >= $2F800) and (ACodePoint <= $2FA1F);
|
||||
end;
|
||||
|
||||
procedure DeriveWeight(const ACodePoint : Cardinal; AResult : PUCA_PropWeights);
|
||||
const
|
||||
BASE_1 = Word($FB40);
|
||||
BASE_2 = Word($FB80);
|
||||
BASE_3 = Word($FBC0);
|
||||
var
|
||||
base : Word;
|
||||
begin
|
||||
if IsCJK_Unified_Ideographs(ACodePoint) or IsCJK_Compatibility_Ideographs(ACodePoint) then
|
||||
base := BASE_1
|
||||
else if IsCJK_Unified_Ideographs_Extension_A(ACodePoint) or
|
||||
IsCJK_Unified_Ideographs_Extension_B(ACodePoint) or
|
||||
IsCJK_Unified_Ideographs_Extension_C(ACodePoint) or
|
||||
IsCJK_Unified_Ideographs_Extension_D(ACodePoint) or
|
||||
IsCJK_Compatibility_Ideographs_Supplement(ACodePoint)
|
||||
then begin
|
||||
base := BASE_2;
|
||||
end else begin
|
||||
base := BASE_3;
|
||||
end;
|
||||
|
||||
|
||||
AResult[0].Weights[0] := base + (ACodePoint shr 15);
|
||||
AResult[0].Weights[1] := $20;
|
||||
AResult[0].Weights[2] := $2;
|
||||
|
||||
AResult[1].Weights[0] := (ACodePoint and $7FFF) or $8000;
|
||||
AResult[1].Weights[1] := 0;
|
||||
AResult[1].Weights[2] := 0;
|
||||
end;
|
||||
Loading…
Reference in New Issue
Block a user