rtl, utils: apply patch of Inoussa:

This patch implements collation'loading at runtime. This reduce the final executable' size as the collation's data are now externaly stored. Note that It requires the external collation files to be shipped and the program to load the collations it needs using the "LoadCollation"/"RegisterCollation" procedure(s).

The external collation files are produced by "cldrparser" (while producing the static files). The root collation "ducet" 's external file is produced by "unihelper".

It is important to note that these files are endian specific :
 * collation_*_be.bco for big endian systems
 * collation_*_le.bco for little endian system.

The root collation should at be registered, be it staticaly by using the "unicodeducet" unit or dynamicaly by making a call sush as RegisterCollation(<collation dir>,'ducet'). 
It is possible, in the same application, to make use of static and dynamic.

git-svn-id: trunk@25295 -
This commit is contained in:
paul 2013-08-19 13:42:11 +00:00
parent 721d695ae4
commit f285948fcb
5 changed files with 524 additions and 33 deletions

View File

@ -274,6 +274,7 @@ type
Props : PUCA_PropItemRec; Props : PUCA_PropItemRec;
VariableLowLimit : Word; VariableLowLimit : Word;
VariableHighLimit : Word; VariableHighLimit : Word;
Dynamic : Boolean;
public public
function IsVariable(const AWeight : PUCA_PropWeights) : Boolean; inline; function IsVariable(const AWeight : PUCA_PropWeights) : Boolean; inline;
end; end;
@ -320,8 +321,13 @@ type
function CompareSortKey(const A, B : TUCASortKey) : Integer;overload; function CompareSortKey(const A, B : TUCASortKey) : Integer;overload;
function CompareSortKey(const A : TUCASortKey; const B : array of Word) : Integer;overload; function CompareSortKey(const A : TUCASortKey; const B : array of Word) : Integer;overload;
function RegisterCollation(const ACollation : PUCA_DataBook) : Boolean; function RegisterCollation(const ACollation : PUCA_DataBook) : Boolean;overload;
function RegisterCollation(
const ADirectory,
ALanguage : string
) : Boolean;overload;
function UnregisterCollation(const AName : ansistring): Boolean; function UnregisterCollation(const AName : ansistring): Boolean;
procedure UnregisterCollations(const AFreeDynamicCollations : Boolean);
function FindCollation(const AName : ansistring): PUCA_DataBook;overload; function FindCollation(const AName : ansistring): PUCA_DataBook;overload;
function FindCollation(const AIndex : Integer): PUCA_DataBook;overload; function FindCollation(const AIndex : Integer): PUCA_DataBook;overload;
function GetCollationCount() : Integer; function GetCollationCount() : Integer;
@ -330,6 +336,29 @@ type
const ABaseName : ansistring; const ABaseName : ansistring;
const AChangedFields : TCollationFields const AChangedFields : TCollationFields
); );
function LoadCollation(
const AData : Pointer;
const ADataLength : Integer
) : PUCA_DataBook;overload;
function LoadCollation(const AFileName : string) : PUCA_DataBook;overload;
function LoadCollation(
const ADirectory,
ALanguage : string
) : PUCA_DataBook;overload;
procedure FreeCollation(AItem : PUCA_DataBook);
type
TEndianKind = (Little, Big);
const
ENDIAN_SUFFIX : array[TEndianKind] of string[2] = ('le','be');
{$IFDEF ENDIAN_LITTLE}
ENDIAN_NATIVE = TEndianKind.Little;
ENDIAN_NON_NATIVE = TEndianKind.Big;
{$ENDIF ENDIAN_LITTLE}
{$IFDEF ENDIAN_BIG}
ENDIAN_NATIVE = TEndianKind.Big;
ENDIAN_NON_NATIVE = TEndianKind.Little;
{$ENDIF ENDIAN_BIG}
resourcestring resourcestring
SCollationNotFound = 'Collation not found : "%s".'; SCollationNotFound = 'Collation not found : "%s".';
@ -535,6 +564,21 @@ begin
Result := a <= Cardinal(b); Result := a <= Cardinal(b);
end; end;
type
TBitOrder = 0..7;
function IsBitON(const AData : Byte; const ABit : TBitOrder) : Boolean ;inline;
begin
Result := ( ( AData and ( 1 shl ABit ) ) <> 0 );
end;
procedure SetBit(var AData : Byte; const ABit : TBitOrder; const AValue : Boolean);inline;
begin
if AValue then
AData := AData or (1 shl (ABit mod 8))
else
AData := AData and ( not ( 1 shl ( ABit mod 8 ) ) );
end;
var var
CollationTable : array of PUCA_DataBook; CollationTable : array of PUCA_DataBook;
function IndexOfCollation(const AName : string) : Integer; function IndexOfCollation(const AName : string) : Integer;
@ -565,6 +609,23 @@ begin
end; end;
end; end;
function RegisterCollation(const ADirectory, ALanguage : string) : Boolean;
var
cl : PUCA_DataBook;
begin
cl := LoadCollation(ADirectory,ALanguage);
if (cl = nil) then
exit(False);
try
Result := RegisterCollation(cl);
except
FreeCollation(cl);
raise;
end;
if not Result then
FreeCollation(cl);
end;
function UnregisterCollation(const AName : ansistring): Boolean; function UnregisterCollation(const AName : ansistring): Boolean;
var var
i, c : Integer; i, c : Integer;
@ -582,6 +643,21 @@ begin
end; end;
end; end;
procedure UnregisterCollations(const AFreeDynamicCollations : Boolean);
var
i : Integer;
cl : PUCA_DataBook;
begin
for i := Low(CollationTable) to High(CollationTable) do begin
if CollationTable[i].Dynamic then begin
cl := CollationTable[i];
CollationTable[i] := nil;
FreeCollation(cl);
end;
end;
SetLength(CollationTable,0);
end;
function FindCollation(const AName : ansistring): PUCA_DataBook;overload; function FindCollation(const AName : ansistring): PUCA_DataBook;overload;
var var
i : Integer; i : Integer;
@ -632,6 +708,190 @@ begin
p^.VariableLowLimit := base^.VariableHighLimit; p^.VariableLowLimit := base^.VariableHighLimit;
end; end;
type
TSerializedCollationHeader = packed record
Base : TCollationName;
Version : TCollationName;
CollationName : TCollationName;
VariableWeight : Byte;
Backwards : Byte;
BMP_Table1Length : DWord;
BMP_Table2Length : DWord;
OBMP_Table1Length : DWord;
OBMP_Table2Length : DWord;
PropCount : DWord;
VariableLowLimit : Word;
VariableHighLimit : Word;
ChangedFields : Byte;
end;
PSerializedCollationHeader = ^TSerializedCollationHeader;
procedure FreeCollation(AItem : PUCA_DataBook);
var
h : PSerializedCollationHeader;
begin
if (AItem = nil) or not(AItem^.Dynamic) then
exit;
h := PSerializedCollationHeader(PtrUInt(AItem) + SizeOf(TUCA_DataBook));
if (AItem^.BMP_Table1 <> nil) then
FreeMem(AItem^.BMP_Table1,h^.BMP_Table1Length);
if (AItem^.BMP_Table2 <> nil) then
FreeMem(AItem^.BMP_Table2,h^.BMP_Table2Length);
if (AItem^.OBMP_Table1 <> nil) then
FreeMem(AItem^.OBMP_Table1,h^.OBMP_Table1Length);
if (AItem^.OBMP_Table2 <> nil) then
FreeMem(AItem^.OBMP_Table2,h^.OBMP_Table2Length);
if (AItem^.Props <> nil) then
FreeMem(AItem^.Props,h^.PropCount);
FreeMem(AItem,(SizeOf(TUCA_DataBook)+SizeOf(TSerializedCollationHeader)));
end;
function LoadCollation(
const AData : Pointer;
const ADataLength : Integer
) : PUCA_DataBook;
var
dataPointer : PByte;
readedLength : LongInt;
function ReadBuffer(ADest : Pointer; ALength : LongInt) : Boolean;
begin
Result := (readedLength + ALength) <= ADataLength;
if not result then
exit;
Move(dataPointer^,ADest^,ALength);
Inc(dataPointer,ALength);
readedLength := readedLength + ALength;
end;
var
r : PUCA_DataBook;
h : PSerializedCollationHeader;
cfs : TCollationFields;
i : Integer;
baseName : TCollationName;
begin
readedLength := 0;
dataPointer := AData;
r := AllocMem((SizeOf(TUCA_DataBook)+SizeOf(TSerializedCollationHeader)));
try
h := PSerializedCollationHeader(PtrUInt(r) + SizeOf(TUCA_DataBook));
if not ReadBuffer(h,SizeOf(TSerializedCollationHeader)) then
exit;
r^.Version := h^.Version;
r^.CollationName := h^.CollationName;
r^.VariableWeight := TUCA_VariableKind(h^.VariableWeight);
r^.Backwards[0] := IsBitON(h^.Backwards,0);
r^.Backwards[1] := IsBitON(h^.Backwards,1);
r^.Backwards[2] := IsBitON(h^.Backwards,2);
r^.Backwards[3] := IsBitON(h^.Backwards,3);
if (h^.BMP_Table1Length > 0) then begin
r^.BMP_Table1 := GetMem(h^.BMP_Table1Length);
if not ReadBuffer(r^.BMP_Table1,h^.BMP_Table1Length) then
exit;
end;
if (h^.BMP_Table2Length > 0) then begin
r^.BMP_Table2 := GetMem(h^.BMP_Table2Length);
if not ReadBuffer(r^.BMP_Table2,h^.BMP_Table2Length) then
exit;
end;
if (h^.OBMP_Table1Length > 0) then begin
r^.OBMP_Table1 := GetMem(h^.OBMP_Table1Length);
if not ReadBuffer(r^.OBMP_Table1,h^.OBMP_Table1Length) then
exit;
end;
if (h^.OBMP_Table2Length > 0) then begin
r^.OBMP_Table2 := GetMem(h^.OBMP_Table2Length);
if not ReadBuffer(r^.OBMP_Table2,h^.OBMP_Table2Length) then
exit;
end;
r^.PropCount := h^.PropCount;
if (h^.PropCount > 0) then begin
r^.Props := GetMem(h^.PropCount);
if not ReadBuffer(r^.Props,h^.PropCount) then
exit;
end;
r^.VariableLowLimit := h^.VariableLowLimit;
r^.VariableHighLimit := h^.VariableHighLimit;
cfs := [];
for i := Ord(Low(TCollationField)) to Ord(High(TCollationField)) do begin
if IsBitON(h^.ChangedFields,i) then
cfs := cfs + [TCollationField(i)];
end;
if (h^.Base <> '') then
baseName := h^.Base
else if (h^.CollationName <> ROOT_COLLATION_NAME) then
baseName := ROOT_COLLATION_NAME
else
baseName := '';
if (baseName <> '') then
PrepareCollation(r,baseName,cfs);
r^.Dynamic := True;
Result := r;
except
FreeCollation(r);
raise;
end;
end;
{$PUSH}
function LoadCollation(const AFileName : string) : PUCA_DataBook;
const
BLOCK_SIZE = 16*1024;
var
f : File of Byte;
locSize, locReaded, c : LongInt;
locBuffer : PByte;
locBlockSize : LongInt;
begin
Result := nil;
{$I-}
if (AFileName = '') then
exit;
Assign(f,AFileName);
Reset(f);
try
if (IOResult <> 0) then
exit;
locSize := FileSize(f);
if (locSize < SizeOf(TSerializedCollationHeader)) then
exit;
locBuffer := GetMem(locSize);
try
locBlockSize := BLOCK_SIZE;
locReaded := 0;
while (locReaded < locSize) do begin
if (locBlockSize > (locSize-locReaded)) then
locBlockSize := locSize-locReaded;
BlockRead(f,locBuffer[locReaded],locBlockSize,c);
if (IOResult <> 0) or (c <= 0) then
exit;
locReaded := locReaded + c;
end;
Result := LoadCollation(locBuffer,locSize);
finally
FreeMem(locBuffer,locSize);
end;
finally
Close(f);
end;
end;
{$POP}
function LoadCollation(const ADirectory, ALanguage : string) : PUCA_DataBook;
var
fileName : string;
begin
fileName := ADirectory;
if (fileName <> '') then begin
if (fileName[Length(fileName)] <> DirectorySeparator) then
fileName := fileName + DirectorySeparator;
end;
fileName := fileName + 'collation_' + ALanguage + '_' + ENDIAN_SUFFIX[ENDIAN_NATIVE] + '.bco';
Result := LoadCollation(fileName);
end;
{$INCLUDE unicodedata.inc} {$INCLUDE unicodedata.inc}
{$IFDEF ENDIAN_LITTLE} {$IFDEF ENDIAN_LITTLE}
{$INCLUDE unicodedata_le.inc} {$INCLUDE unicodedata_le.inc}
@ -1026,21 +1286,6 @@ begin
end; end;
end; end;
type
TBitOrder = 0..7;
function IsBitON(const AData : Byte; const ABit : TBitOrder) : Boolean ;inline;
begin
Result := ( ( AData and ( 1 shl ABit ) ) <> 0 );
end;
procedure SetBit(var AData : Byte; const ABit : TBitOrder; const AValue : Boolean);inline;
begin
if AValue then
AData := AData or (1 shl (ABit mod 8))
else
AData := AData and ( not ( 1 shl ( ABit mod 8 ) ) );
end;
{ TUCA_PropItemContextTreeNodeRec } { TUCA_PropItemContextTreeNodeRec }
function TUCA_PropItemContextTreeNodeRec.GetLeftNode: PUCA_PropItemContextTreeNodeRec; function TUCA_PropItemContextTreeNodeRec.GetLeftNode: PUCA_PropItemContextTreeNodeRec;

View File

@ -208,14 +208,16 @@ type
) : Integer; ) : Integer;
function FindCollationDefaultItemName(ACollation : TCldrCollation) : string; function FindCollationDefaultItemName(ACollation : TCldrCollation) : string;
procedure GenerateCdlrCollation( procedure GenerateCdlrCollation(
ACollation : TCldrCollation; ACollation : TCldrCollation;
AItemName : string; AItemName : string;
AStoreName : string; AStoreName : string;
AStream, AStream,
ANativeEndianStream, ANativeEndianStream,
AOtherEndianStream : TStream; AOtherEndianStream,
ARootChars : TOrderedCharacters; ABinaryNativeEndianStream,
ARootWeigths : TUCA_LineRecArray ABinaryOtherEndianStream : TStream;
ARootChars : TOrderedCharacters;
ARootWeigths : TUCA_LineRecArray
); );
procedure GenerateUCA_CLDR_Head( procedure GenerateUCA_CLDR_Head(
@ -1635,14 +1637,16 @@ begin
end; end;
procedure GenerateCdlrCollation( procedure GenerateCdlrCollation(
ACollation : TCldrCollation; ACollation : TCldrCollation;
AItemName : string; AItemName : string;
AStoreName : string; AStoreName : string;
AStream, AStream,
ANativeEndianStream, ANativeEndianStream,
AOtherEndianStream : TStream; AOtherEndianStream,
ARootChars : TOrderedCharacters; ABinaryNativeEndianStream,
ARootWeigths : TUCA_LineRecArray ABinaryOtherEndianStream : TStream;
ARootChars : TOrderedCharacters;
ARootWeigths : TUCA_LineRecArray
); );
procedure AddLine(const ALine : ansistring; ADestStream : TStream); procedure AddLine(const ALine : ansistring; ADestStream : TStream);
@ -1665,6 +1669,8 @@ var
ucaoSecondTable : TucaOBmpSecondTable; ucaoSecondTable : TucaOBmpSecondTable;
locHasProps : Boolean; locHasProps : Boolean;
s : string; s : string;
serializedHeader : TSerializedCollationHeader;
e : TCollationField;
begin begin
locItem := ACollation.Find(AItemName); locItem := ACollation.Find(AItemName);
if (locItem = nil) then if (locItem = nil) then
@ -1707,6 +1713,43 @@ begin
AddLine('{$endif FPC_LITTLE_ENDIAN}',AStream); AddLine('{$endif FPC_LITTLE_ENDIAN}',AStream);
end; end;
GenerateUCA_CLDR_Registration(AStream,@locUcaBook); GenerateUCA_CLDR_Registration(AStream,@locUcaBook);
FillChar(serializedHeader,SizeOf(TSerializedCollationHeader),0);
serializedHeader.Base := locItem.Base;
serializedHeader.Version := ACollation.Version;
serializedHeader.CollationName := ACollation.Language;
serializedHeader.VariableWeight := Ord(locUcaBook.VariableWeight);
SetBit(serializedHeader.Backwards,0,locUcaBook.Backwards[0]);
SetBit(serializedHeader.Backwards,1,locUcaBook.Backwards[1]);
SetBit(serializedHeader.Backwards,2,locUcaBook.Backwards[2]);
SetBit(serializedHeader.Backwards,3,locUcaBook.Backwards[3]);
if locHasProps then begin
serializedHeader.BMP_Table1Length := Length(ucaFirstTable);
serializedHeader.BMP_Table2Length := Length(TucaBmpSecondTableItem) *
(Length(ucaSecondTable) * SizeOf(UInt24));
serializedHeader.OBMP_Table1Length := Length(ucaoFirstTable) * SizeOf(Word);
serializedHeader.OBMP_Table2Length := Length(TucaOBmpSecondTableItem) *
(Length(ucaoSecondTable) * SizeOf(UInt24));
serializedHeader.PropCount := locUcaProps^.ItemSize;
serializedHeader.VariableLowLimit := locUcaProps^.VariableLowLimit;
serializedHeader.VariableHighLimit := locUcaProps^.VariableHighLimit;
end else begin
serializedHeader.VariableLowLimit := High(Word);
serializedHeader.VariableHighLimit := 0;
end;
serializedHeader.ChangedFields := 0;
for e := Low(TCollationField) to High(TCollationField) do begin
if (e in locItem.ChangedFields) then
SetBit(serializedHeader.ChangedFields,Ord(e),True);
end;
ABinaryNativeEndianStream.Write(serializedHeader,SizeOf(serializedHeader));
ReverseRecordBytes(serializedHeader);
ABinaryOtherEndianStream.Write(serializedHeader,SizeOf(serializedHeader));
if locHasProps then begin
GenerateBinaryUCA_BmpTables(ABinaryNativeEndianStream,ABinaryOtherEndianStream,ucaFirstTable,ucaSecondTable);
GenerateBinaryUCA_OBmpTables(ABinaryNativeEndianStream,ABinaryOtherEndianStream,ucaoFirstTable,ucaoSecondTable);
GenerateBinaryUCA_PropTable(ABinaryNativeEndianStream,ABinaryOtherEndianStream,locUcaProps);
end;
finally finally
locSequence.Clear(); locSequence.Clear();
FreeUcaBook(locUcaProps); FreeUcaBook(locUcaProps);

View File

@ -22,9 +22,10 @@
program cldrparser; program cldrparser;
{$mode objfpc}{$H+} {$mode objfpc}{$H+}
{ $define WINCE_TEST}
uses uses
SysUtils, classes, getopts, SysUtils, classes, getopts,{$ifdef WINCE}StreamIO,{$endif}
cldrhelper, helper, cldrtest, cldrxml, unicodeset; cldrhelper, helper, cldrtest, cldrxml, unicodeset;
const const
@ -66,6 +67,12 @@ var
idx, k : Integer; idx, k : Integer;
s : string; s : string;
begin begin
{$ifdef WINCE_TEST}
ADataDir := ExtractFilePath(ParamStr(0))+'data';
AOuputDir := ADataDir;
ACollationFileName := 'sv.xml';
exit(True);
{$endif WINCE_TEST}
if (ParamCount() = 0) then if (ParamCount() = 0) then
exit(False); exit(False);
Result := True; Result := True;
@ -101,14 +108,41 @@ end;
var var
orderedChars : TOrderedCharacters; orderedChars : TOrderedCharacters;
ucaBook : TUCA_DataBook; ucaBook : TUCA_DataBook;
stream, streamNE, streamOE : TMemoryStream; stream, streamNE, streamOE, binaryStreamNE, binaryStreamOE : TMemoryStream;
s, collationFileName, collationTypeName : string; s, collationFileName, collationTypeName : string;
i , c: Integer; i , c: Integer;
collation : TCldrCollation; collation : TCldrCollation;
dataPath, outputPath : string; dataPath, outputPath : string;
collationItem : TCldrCollationItem; collationItem : TCldrCollationItem;
testSuiteFlag : Boolean; testSuiteFlag : Boolean;
{$ifdef WINCE}
fs : TFileStream;
{$endif WINCE}
begin begin
{$ifdef WINCE}
s := ExtractFilePath(ParamStr(0))+'cldr-log.txt';
DeleteFile(s);
fs := TFileStream.Create(s,fmCreate);
AssignStream(Output,fs);
Rewrite(Output);
s := ExtractFilePath(ParamStr(0))+'cldr-err.txt';
DeleteFile(s);
fs := TFileStream.Create(s,fmCreate);
AssignStream(ErrOutput,fs);
Rewrite(ErrOutput);
{$endif WINCE}
{$ifdef WINCE_TEST}
testSuiteFlag := True;
try
exec_tests();
except
on e : Exception do begin
WriteLn('Exception : '+e.Message);
raise;
end;
end;
exit;
{$endif WINCE_TEST}
dataPath := ''; dataPath := '';
outputPath := ''; outputPath := '';
collationFileName := ''; collationFileName := '';
@ -132,10 +166,12 @@ begin
outputPath := dataPath outputPath := dataPath
else else
outputPath := IncludeTrailingPathDelimiter(outputPath); outputPath := IncludeTrailingPathDelimiter(outputPath);
{$ifndef WINCE_TEST}
if (ParamCount() = 0) then begin if (ParamCount() = 0) then begin
WriteLn(SUsageText); WriteLn(SUsageText);
Halt(1); Halt(1);
end; end;
{$endif WINCE_TEST}
if not( if not(
FileExists(dataPath+'UCA_Rules_SHORT.xml') and FileExists(dataPath+'UCA_Rules_SHORT.xml') and
FileExists(dataPath+'allkeys.txt') FileExists(dataPath+'allkeys.txt')
@ -155,6 +191,8 @@ begin
stream := nil; stream := nil;
streamNE := nil; streamNE := nil;
streamOE := nil; streamOE := nil;
binaryStreamNE := nil;
binaryStreamOE := nil;
collation := TCldrCollation.Create(); collation := TCldrCollation.Create();
try try
ParseCollationDocument(collationFileName,collation,TCldrParserMode.HeaderParsing); ParseCollationDocument(collationFileName,collation,TCldrParserMode.HeaderParsing);
@ -194,9 +232,12 @@ begin
stream.Clear(); stream.Clear();
streamNE := TMemoryStream.Create(); streamNE := TMemoryStream.Create();
streamOE := TMemoryStream.Create(); streamOE := TMemoryStream.Create();
binaryStreamNE := TMemoryStream.Create();
binaryStreamOE := TMemoryStream.Create();
s := COLLATION_FILE_PREFIX + ChangeFileExt(LowerCase(ExtractFileName(collationFileName)),'.pas'); s := COLLATION_FILE_PREFIX + ChangeFileExt(LowerCase(ExtractFileName(collationFileName)),'.pas');
GenerateCdlrCollation( GenerateCdlrCollation(
collation,collationTypeName,s,stream,streamNE,streamOE, collation,collationTypeName,s,stream,streamNE,streamOE,
binaryStreamNE,binaryStreamOE,
orderedChars,ucaBook.Lines orderedChars,ucaBook.Lines
); );
stream.SaveToFile(ExtractFilePath(collationFileName)+s); stream.SaveToFile(ExtractFilePath(collationFileName)+s);
@ -204,8 +245,20 @@ begin
streamNE.SaveToFile(ExtractFilePath(collationFileName)+GenerateEndianIncludeFileName(s,ENDIAN_NATIVE)); streamNE.SaveToFile(ExtractFilePath(collationFileName)+GenerateEndianIncludeFileName(s,ENDIAN_NATIVE));
streamOE.SaveToFile(ExtractFilePath(collationFileName)+GenerateEndianIncludeFileName(s,ENDIAN_NON_NATIVE)); streamOE.SaveToFile(ExtractFilePath(collationFileName)+GenerateEndianIncludeFileName(s,ENDIAN_NON_NATIVE));
end; end;
if (binaryStreamNE.Size > 0) then begin
binaryStreamNE.SaveToFile(
ExtractFilePath(collationFileName) +
ChangeFileExt(s,Format('_%s.bco',[ENDIAN_SUFFIX[ENDIAN_NATIVE]]))
);
binaryStreamOE.SaveToFile(
ExtractFilePath(collationFileName) +
ChangeFileExt(s,Format('_%s.bco',[ENDIAN_SUFFIX[ENDIAN_NON_NATIVE]]))
);
end;
end; end;
finally finally
binaryStreamOE.Free();
binaryStreamNE.Free();
streamOE.Free(); streamOE.Free();
streamNE.Free(); streamNE.Free();
stream.Free(); stream.Free();

View File

@ -487,11 +487,23 @@ const
var AFirstTable : TucaBmpFirstTable; var AFirstTable : TucaBmpFirstTable;
var ASecondTable : TucaBmpSecondTable var ASecondTable : TucaBmpSecondTable
); );
procedure GenerateBinaryUCA_BmpTables(
ANativeEndianStream,
ANonNativeEndianStream : TStream;
var AFirstTable : TucaBmpFirstTable;
var ASecondTable : TucaBmpSecondTable
);
procedure GenerateUCA_PropTable( procedure GenerateUCA_PropTable(
ADest : TStream; ADest : TStream;
const APropBook : PUCA_PropBook; const APropBook : PUCA_PropBook;
const AEndian : TEndianKind const AEndian : TEndianKind
); );
procedure GenerateBinaryUCA_PropTable(
// WARNING : files must be generated for each endianess (Little / Big)
ANativeEndianStream,
ANonNativeEndianStream : TStream;
const APropBook : PUCA_PropBook
);
procedure GenerateUCA_OBmpTables( procedure GenerateUCA_OBmpTables(
AStream, AStream,
ANativeEndianStream, ANativeEndianStream,
@ -499,6 +511,12 @@ const
var AFirstTable : TucaOBmpFirstTable; var AFirstTable : TucaOBmpFirstTable;
var ASecondTable : TucaOBmpSecondTable var ASecondTable : TucaOBmpSecondTable
); );
procedure GenerateBinaryUCA_OBmpTables(
ANativeEndianStream,
ANonNativeEndianStream : TStream;
var AFirstTable : TucaOBmpFirstTable;
var ASecondTable : TucaOBmpSecondTable
);
procedure Parse_UnicodeData( procedure Parse_UnicodeData(
ADataAStream : TMemoryStream; ADataAStream : TMemoryStream;
@ -611,7 +629,6 @@ const
): PPropRec; inline;overload; ): PPropRec; inline;overload;
procedure FromUCS4(const AValue : TUnicodeCodePoint; var AHighS, ALowS : Word);inline; procedure FromUCS4(const AValue : TUnicodeCodePoint; var AHighS, ALowS : Word);inline;
function ToUCS4(const AHighS, ALowS : Word) : TUnicodeCodePoint; inline; function ToUCS4(const AHighS, ALowS : Word) : TUnicodeCodePoint; inline;
//--------------------
type type
TBitOrder = 0..7; TBitOrder = 0..7;
@ -640,6 +657,29 @@ type
const ADataLen : Integer const ADataLen : Integer
); );
type
TCollationName = string[128];
TSerializedCollationHeader = packed record
Base : TCollationName;
Version : TCollationName;
CollationName : TCollationName;
VariableWeight : Byte;
Backwards : Byte;
BMP_Table1Length : DWord;
BMP_Table2Length : DWord;
OBMP_Table1Length : DWord;
OBMP_Table2Length : DWord;
PropCount : DWord;
VariableLowLimit : Word;
VariableHighLimit : Word;
ChangedFields : Byte;
end;
PSerializedCollationHeader = ^TSerializedCollationHeader;
procedure ReverseRecordBytes(var AItem : TSerializedCollationHeader);
procedure ReverseBytes(var AData; const ALength : Integer);
procedure ReverseArray(var AValue; const AArrayLength, AItemSize : PtrInt);
resourcestring resourcestring
SInsufficientMemoryBuffer = 'Insufficient Memory Buffer'; SInsufficientMemoryBuffer = 'Insufficient Memory Buffer';
@ -3294,6 +3334,28 @@ begin
AddLine(ANonNativeEndianStream,' );' + sLineBreak); AddLine(ANonNativeEndianStream,' );' + sLineBreak);
end; end;
procedure GenerateBinaryUCA_BmpTables(
ANativeEndianStream,
ANonNativeEndianStream : TStream;
var AFirstTable : TucaBmpFirstTable;
var ASecondTable : TucaBmpSecondTable
);
var
i, j : Integer;
value : UInt24;
begin
ANativeEndianStream.Write(AFirstTable[0],Length(AFirstTable));
ANonNativeEndianStream.Write(AFirstTable[0],Length(AFirstTable));
for i := Low(ASecondTable) to High(ASecondTable) do begin
for j := Low(TucaBmpSecondTableItem) to High(TucaBmpSecondTableItem) do begin
value := ASecondTable[i][j];
ANativeEndianStream.Write(value,SizeOf(value));
ReverseBytes(value,SizeOf(value));
ANonNativeEndianStream.Write(value,SizeOf(value));
end;
end;
end;
procedure GenerateUCA_PropTable( procedure GenerateUCA_PropTable(
// WARNING : files must be generated for each endianess (Little / Big) // WARNING : files must be generated for each endianess (Little / Big)
ADest : TStream; ADest : TStream;
@ -3336,6 +3398,17 @@ begin
AddLine(' );' + sLineBreak); AddLine(' );' + sLineBreak);
end; end;
procedure GenerateBinaryUCA_PropTable(
// WARNING : files must be generated for each endianess (Little / Big)
ANativeEndianStream,
ANonNativeEndianStream : TStream;
const APropBook : PUCA_PropBook
);
begin
ANativeEndianStream.Write(APropBook^.Items^,APropBook^.ItemSize);
ANonNativeEndianStream.Write(APropBook^.ItemsOtherEndian^,APropBook^.ItemSize);
end;
procedure GenerateUCA_OBmpTables( procedure GenerateUCA_OBmpTables(
AStream, AStream,
ANativeEndianStream, ANativeEndianStream,
@ -3410,7 +3483,34 @@ begin
AddLine(ANonNativeEndianStream,' );' + sLineBreak); AddLine(ANonNativeEndianStream,' );' + sLineBreak);
end; end;
//------------------------------------------- procedure GenerateBinaryUCA_OBmpTables(
ANativeEndianStream,
ANonNativeEndianStream : TStream;
var AFirstTable : TucaOBmpFirstTable;
var ASecondTable : TucaOBmpSecondTable
);
var
i, j : Integer;
locLine : string;
wordValue : Word;
value : UInt24;
begin
for i := Low(AFirstTable) to High(AFirstTable) do begin
wordValue := AFirstTable[i];
ANativeEndianStream.Write(wordValue,SizeOf(wordValue));
ReverseBytes(wordValue,SizeOf(wordValue));
ANonNativeEndianStream.Write(wordValue,SizeOf(wordValue));
end;
for i := Low(ASecondTable) to High(ASecondTable) do begin
for j := Low(TucaOBmpSecondTableItem) to High(TucaOBmpSecondTableItem) do begin
value := ASecondTable[i][j];
ANativeEndianStream.Write(value,SizeOf(value));
ReverseBytes(value,SizeOf(value));
ANonNativeEndianStream.Write(value,SizeOf(value));
end;
end;
end;
type type
POBmpSecondTableItem = ^TOBmpSecondTableItem; POBmpSecondTableItem = ^TOBmpSecondTableItem;
@ -4103,6 +4203,17 @@ begin
Result := r; Result := r;
end; end;
procedure ReverseRecordBytes(var AItem : TSerializedCollationHeader);
begin
ReverseBytes(AItem.BMP_Table1Length,SizeOf(AItem.BMP_Table1Length));
ReverseBytes(AItem.BMP_Table2Length,SizeOf(AItem.BMP_Table2Length));
ReverseBytes(AItem.OBMP_Table1Length,SizeOf(AItem.OBMP_Table1Length));
ReverseBytes(AItem.OBMP_Table2Length,SizeOf(AItem.OBMP_Table2Length));
ReverseBytes(AItem.PropCount,SizeOf(AItem.PropCount));
ReverseBytes(AItem.VariableLowLimit,SizeOf(AItem.VariableLowLimit));
ReverseBytes(AItem.VariableHighLimit,SizeOf(AItem.VariableHighLimit));
end;
procedure ReverseBytes(var AData; const ALength : Integer); procedure ReverseBytes(var AData; const ALength : Integer);
var var
i,j : PtrInt; i,j : PtrInt;

View File

@ -31,7 +31,7 @@
program unihelper; program unihelper;
{$mode objfpc}{$H+} {$mode objfpc}{$H+}
{$typedadress on} {$typedaddress on}
uses uses
SysUtils, Classes, SysUtils, Classes,
@ -66,6 +66,7 @@ end;
var var
dataPath, outputPath : string; dataPath, outputPath : string;
stream, binStreamNE, binStreamOE, tmpStream : TMemoryStream; stream, binStreamNE, binStreamOE, tmpStream : TMemoryStream;
binaryStreamNE, binaryStreamOE : TMemoryStream;
hangulSyllables : TCodePointRecArray; hangulSyllables : TCodePointRecArray;
ucaBook : TUCA_DataBook; ucaBook : TUCA_DataBook;
ucaPropBook : PUCA_PropBook; ucaPropBook : PUCA_PropBook;
@ -95,6 +96,7 @@ var
ucaoFirstTable : TucaoBmpFirstTable; ucaoFirstTable : TucaoBmpFirstTable;
ucaoSecondTable : TucaOBmpSecondTable; ucaoSecondTable : TucaOBmpSecondTable;
WL : Integer; WL : Integer;
serializedHeader : TSerializedCollationHeader;
begin begin
WriteLn(SUsage+sLineBreak); WriteLn(SUsage+sLineBreak);
if (ParamCount > 0) then if (ParamCount > 0) then
@ -125,6 +127,8 @@ begin
Halt(1); Halt(1);
end; end;
binaryStreamNE := nil;
binaryStreamOE := nil;
binStreamOE := nil; binStreamOE := nil;
binStreamNE := nil; binStreamNE := nil;
tmpStream := nil; tmpStream := nil;
@ -206,6 +210,8 @@ begin
{$IFDEF UCA_TEST} {$IFDEF UCA_TEST}
uca_CheckProp_2y(ucaBook,ucaPropBook,@ucaoFirstTable,@ucaoSecondTable); uca_CheckProp_2y(ucaBook,ucaPropBook,@ucaoFirstTable,@ucaoSecondTable);
{$ENDIF UCA_TEST} {$ENDIF UCA_TEST}
binaryStreamNE := TMemoryStream.Create();
binaryStreamOE := TMemoryStream.Create();
WriteLn('Generate UCA Props tables ...'); WriteLn('Generate UCA Props tables ...');
binStreamNE.Clear(); binStreamNE.Clear();
binStreamOE.Clear(); binStreamOE.Clear();
@ -226,6 +232,37 @@ begin
binStreamOE.SaveToFile(GenerateEndianIncludeFileName(s,ENDIAN_NON_NATIVE)); binStreamOE.SaveToFile(GenerateEndianIncludeFileName(s,ENDIAN_NON_NATIVE));
binStreamNE.Clear(); binStreamNE.Clear();
binStreamOE.Clear(); binStreamOE.Clear();
// Binary DUCET
FillChar(serializedHeader,SizeOf(TSerializedCollationHeader),0);
serializedHeader.Version := ucaBook.Version;
serializedHeader.CollationName := 'DUCET';//'Default Unicode Collation Element Table (DUCET)';
serializedHeader.VariableWeight := Ord(ucaBook.VariableWeight);
SetBit(serializedHeader.Backwards,0,ucaBook.Backwards[0]);
SetBit(serializedHeader.Backwards,1,ucaBook.Backwards[1]);
SetBit(serializedHeader.Backwards,2,ucaBook.Backwards[2]);
SetBit(serializedHeader.Backwards,3,ucaBook.Backwards[3]);
serializedHeader.BMP_Table1Length := Length(ucaFirstTable);
serializedHeader.BMP_Table2Length := Length(TucaBmpSecondTableItem) *
(Length(ucaSecondTable) * SizeOf(UInt24));
serializedHeader.OBMP_Table1Length := Length(ucaoFirstTable) * SizeOf(Word);
serializedHeader.OBMP_Table2Length := Length(TucaOBmpSecondTableItem) *
(Length(ucaoSecondTable) * SizeOf(UInt24));
serializedHeader.PropCount := ucaPropBook^.ItemSize;
serializedHeader.VariableLowLimit := ucaPropBook^.VariableLowLimit;
serializedHeader.VariableHighLimit := ucaPropBook^.VariableHighLimit;
binaryStreamNE.Write(serializedHeader,SizeOf(serializedHeader));
ReverseRecordBytes(serializedHeader);
binaryStreamOE.Write(serializedHeader,SizeOf(serializedHeader));
GenerateBinaryUCA_BmpTables(binaryStreamNE,binaryStreamOE,ucaFirstTable,ucaSecondTable);
GenerateBinaryUCA_OBmpTables(binaryStreamNE,binaryStreamOE,ucaoFirstTable,ucaoSecondTable);
GenerateBinaryUCA_PropTable(binaryStreamNE,binaryStreamOE,ucaPropBook);
binaryStreamNE.SaveToFile(
outputPath + Format('collation_ducet_%s.bco',[ENDIAN_SUFFIX[ENDIAN_NATIVE]])
);
binaryStreamOE.SaveToFile(
outputPath + Format('collation_ducet_%s.bco',[ENDIAN_SUFFIX[ENDIAN_NON_NATIVE]])
);
// Binary DUCET - END
stream.Clear(); stream.Clear();
@ -386,6 +423,8 @@ begin
end; end;
stream.SaveToFile(outputPath + 'diff2.txt'); stream.SaveToFile(outputPath + 'diff2.txt');
finally finally
binaryStreamOE.Free();
binaryStreamNE.Free();
tmpStream.Free(); tmpStream.Free();
binStreamOE.Free(); binStreamOE.Free();
binStreamNE.Free(); binStreamNE.Free();