fpc/utils/unicode/helper.pas
paul f285948fcb rtl, utils: apply patch of Inoussa:
This patch implements collation'loading at runtime. This reduce the final executable' size as the collation's data are now externaly stored. Note that It requires the external collation files to be shipped and the program to load the collations it needs using the "LoadCollation"/"RegisterCollation" procedure(s).

The external collation files are produced by "cldrparser" (while producing the static files). The root collation "ducet" 's external file is produced by "unihelper".

It is important to note that these files are endian specific :
 * collation_*_be.bco for big endian systems
 * collation_*_le.bco for little endian system.

The root collation should at be registered, be it staticaly by using the "unicodeducet" unit or dynamicaly by making a call sush as RegisterCollation(<collation dir>,'ducet'). 
It is possible, in the same application, to make use of static and dynamic.

git-svn-id: trunk@25295 -
2013-08-19 13:42:11 +00:00

4681 lines
134 KiB
ObjectPascal

{ Unicode parser helper unit.
Copyright (c) 2012 by Inoussa OUEDRAOGO
The source code is distributed under the Library GNU
General Public License with the following modification:
- object files and libraries linked into an application may be
distributed without source code.
If you didn't receive a copy of the file COPYING, contact:
Free Software Foundation
675 Mass Ave
Cambridge, MA 02139
USA
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. }
unit helper;
{$mode delphi}
{$H+}
{$PACKENUM 1}
{$pointermath on}
{$typedaddress on}
{$warn 4056 off} //Conversion between ordinals and pointers is not portable
{$macro on}
{$ifdef FPC_REQUIRES_PROPER_ALIGNMENT}
{$define X_PACKED:=}
{$else FPC_REQUIRES_PROPER_ALIGNMENT}
{$define X_PACKED:=packed}
{$endif FPC_REQUIRES_PROPER_ALIGNMENT}
interface
uses
Classes, SysUtils, StrUtils;
const
SLicenseText =
' { Unicode implementation tables. ' + sLineBreak +
' ' + sLineBreak +
' Copyright (c) 2013 by Inoussa OUEDRAOGO ' + sLineBreak +
' ' + sLineBreak +
' Permission is hereby granted, free of charge, to any person ' + sLineBreak +
' obtaining a copy of the Unicode data files and any associated ' + sLineBreak +
' documentation (the "Data Files") or Unicode software and any ' + sLineBreak +
' associated documentation (the "Software") to deal in the Data ' + sLineBreak +
' Files or Software without restriction, including without ' + sLineBreak +
' limitation the rights to use, copy, modify, merge, publish, ' + sLineBreak +
' distribute, and/or sell copies of the Data Files or Software, ' + sLineBreak +
' and to permit persons to whom the Data Files or Software are ' + sLineBreak +
' furnished to do so, provided that (a) the above copyright ' + sLineBreak +
' notice(s) and this permission notice appear with all copies ' + sLineBreak +
' of the Data Files or Software, (b) both the above copyright ' + sLineBreak +
' notice(s) and this permission notice appear in associated ' + sLineBreak +
' documentation, and (c) there is clear notice in each modified ' + sLineBreak +
' Data File or in the Software as well as in the documentation ' + sLineBreak +
' associated with the Data File(s) or Software that the data or ' + sLineBreak +
' software has been modified. ' + sLineBreak +
' ' + sLineBreak +
' ' + sLineBreak +
' This program is distributed in the hope that it will be useful, ' + sLineBreak +
' but WITHOUT ANY WARRANTY; without even the implied warranty of ' + sLineBreak +
' MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. }';
type
// Unicode General Category
TUnicodeCategory = (
ucUppercaseLetter, // Lu = Letter, uppercase
ucLowercaseLetter, // Ll = Letter, lowercase
ucTitlecaseLetter, // Lt = Letter, titlecase
ucModifierLetter, // Lm = Letter, modifier
ucOtherLetter, // Lo = Letter, other
ucNonSpacingMark, // Mn = Mark, nonspacing
ucCombiningMark, // Mc = Mark, spacing combining
ucEnclosingMark, // Me = Mark, enclosing
ucDecimalNumber, // Nd = Number, decimal digit
ucLetterNumber, // Nl = Number, letter
ucOtherNumber, // No = Number, other
ucConnectPunctuation, // Pc = Punctuation, connector
ucDashPunctuation, // Pd = Punctuation, dash
ucOpenPunctuation, // Ps = Punctuation, open
ucClosePunctuation, // Pe = Punctuation, close
ucInitialPunctuation, // Pi = Punctuation, initial quote (may behave like Ps or Pe depending on usage)
ucFinalPunctuation, // Pf = Punctuation, final quote (may behave like Ps or Pe depending on usage)
ucOtherPunctuation, // Po = Punctuation, other
ucMathSymbol, // Sm = Symbol, math
ucCurrencySymbol, // Sc = Symbol, currency
ucModifierSymbol, // Sk = Symbol, modifier
ucOtherSymbol, // So = Symbol, other
ucSpaceSeparator, // Zs = Separator, space
ucLineSeparator, // Zl = Separator, line
ucParagraphSeparator, // Zp = Separator, paragraph
ucControl, // Cc = Other, control
ucFormat, // Cf = Other, format
ucSurrogate, // Cs = Other, surrogate
ucPrivateUse, // Co = Other, private use
ucUnassigned // Cn = Other, not assigned (including noncharacters)
);
TUInt24Rec = packed record
public
{$ifdef FPC_LITTLE_ENDIAN}
byte0, byte1, byte2 : Byte;
{$else FPC_LITTLE_ENDIAN}
byte2, byte1, byte0 : Byte;
{$endif FPC_LITTLE_ENDIAN}
public
class operator Implicit(a : TUInt24Rec) : Cardinal;{$ifdef USE_INLINE}inline;{$ENDIF}
class operator Implicit(a : TUInt24Rec) : LongInt;{$ifdef USE_INLINE}inline;{$ENDIF}
class operator Implicit(a : TUInt24Rec) : Word;{$ifdef USE_INLINE}inline;{$ENDIF}
class operator Implicit(a : TUInt24Rec) : Byte;{$ifdef USE_INLINE}inline;{$ENDIF}
class operator Implicit(a : Cardinal) : TUInt24Rec;{$ifdef USE_INLINE}inline;{$ENDIF}
class operator Explicit(a : TUInt24Rec) : Cardinal;{$ifdef USE_INLINE}inline;{$ENDIF}
class operator Equal(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
class operator Equal(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
class operator Equal(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
class operator Equal(a : TUInt24Rec; b : LongInt): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
class operator Equal(a : LongInt; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
class operator Equal(a : TUInt24Rec; b : Word): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
class operator Equal(a : Word; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
class operator Equal(a : TUInt24Rec; b : Byte): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
class operator Equal(a : Byte; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
class operator NotEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
class operator NotEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
class operator NotEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
class operator GreaterThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
class operator GreaterThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
class operator GreaterThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
class operator GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
class operator GreaterThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
class operator GreaterThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
class operator LessThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
class operator LessThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
class operator LessThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
class operator LessThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
class operator LessThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
class operator LessThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
end;
UInt24 = TUInt24Rec;
PUInt24 = ^UInt24;
TUnicodeCodePoint = Cardinal;
TUnicodeCodePointArray = array of TUnicodeCodePoint;
TDecompositionArray = array of TUnicodeCodePointArray;
TNumericValue = Double;
TNumericValueArray = array of TNumericValue;
TBlockItemRec = packed record
RangeStart : TUnicodeCodePoint;
RangeEnd : TUnicodeCodePoint;
Name : string[120];
CanonicalName : string[120];
end;
TBlocks = array of TBlockItemRec;
PPropRec = ^TPropRec;
{ TPropRec }
TPropRec = packed record
private
function GetCategory : TUnicodeCategory;inline;
procedure SetCategory(AValue : TUnicodeCategory);
function GetWhiteSpace : Boolean;inline;
procedure SetWhiteSpace(AValue : Boolean);
function GetHangulSyllable : Boolean;inline;
procedure SetHangulSyllable(AValue : Boolean);
public
CategoryData : Byte;
PropID : Word;
CCC : Byte; // Canonical Combining Class
NumericIndex : Byte;
SimpleUpperCase : UInt24;
SimpleLowerCase : UInt24;
DecompositionID : SmallInt;
public
property Category : TUnicodeCategory read GetCategory write SetCategory;
property WhiteSpace : Boolean read GetWhiteSpace write SetWhiteSpace;
property HangulSyllable : Boolean read GetHangulSyllable write SetHangulSyllable;
end;
TPropRecArray = array of TPropRec;
TDecompositionIndexRec = packed record
StartPosition : Word;
Length : Byte;
end;
TDecompositionBook = X_PACKED record
Index : array of TDecompositionIndexRec;
CodePoints : array of TUnicodeCodePoint;
end;
PDataLineRec = ^TDataLineRec;
TDataLineRec = record
PropID : Integer;
case LineType : Byte of
0 : (CodePoint : TUnicodeCodePoint);
1 : (StartCodePoint, EndCodePoint : TUnicodeCodePoint);
end;
TDataLineRecArray = array of TDataLineRec;
TCodePointRec = record
case LineType : Byte of
0 : (CodePoint : TUnicodeCodePoint);
1 : (StartCodePoint, EndCodePoint : TUnicodeCodePoint);
end;
TCodePointRecArray = array of TCodePointRec;
TPropListLineRec = packed record
CodePoint : TCodePointRec;
PropName : string[123];
end;
TPropListLineRecArray = array of TPropListLineRec;
TUCA_WeightRec = packed record
Weights : array[0..3] of Cardinal;
Variable : Boolean;
end;
TUCA_WeightRecArray = array of TUCA_WeightRec;
TUCA_LineContextItemRec = X_PACKED record
public
CodePoints : TUnicodeCodePointArray;
Weights : TUCA_WeightRecArray;
public
procedure Clear();
procedure Assign(ASource : TUCA_LineContextItemRec);
function Clone() : TUCA_LineContextItemRec;
end;
PUCA_LineContextItemRec = ^TUCA_LineContextItemRec;
TUCA_LineContextRec = X_PACKED record
public
Data : array of TUCA_LineContextItemRec;
public
procedure Clear();
procedure Assign(ASource : TUCA_LineContextRec);
function Clone() : TUCA_LineContextRec;
end;
PUCA_LineContextRec = ^TUCA_LineContextRec;
{ TUCA_LineRec }
TUCA_LineRec = X_PACKED record
public
CodePoints : TUnicodeCodePointArray;
Weights : TUCA_WeightRecArray;
Context : TUCA_LineContextRec;
//Variable : Boolean;
Deleted : Boolean;
Stored : Boolean;
public
procedure Clear();
procedure Assign(ASource : TUCA_LineRec);
function Clone() : TUCA_LineRec;
function HasContext() : Boolean;
end;
PUCA_LineRec = ^TUCA_LineRec;
TUCA_VariableKind = (
ucaShifted, ucaNonIgnorable, ucaBlanked, ucaShiftedTrimmed,
ucaIgnoreSP
);
TUCA_DataBook = X_PACKED record
Version : string;
VariableWeight : TUCA_VariableKind;
Backwards : array[0..3] of Boolean;
Lines : array of TUCA_LineRec;
end;
PUCA_DataBook = ^TUCA_DataBook;
TUCA_DataBookIndex = array of Integer;
type
TUCA_PropWeights = packed record
Weights : array[0..2] of Word;
//Variable : Byte;
end;
PUCA_PropWeights = ^TUCA_PropWeights;
TUCA_PropItemContextRec = packed record
CodePointCount : Byte;
WeightCount : Byte;
//CodePoints : UInt24;
//Weights : TUCA_PropWeights;
end;
PUCA_PropItemContextRec = ^TUCA_PropItemContextRec;
TUCA_PropItemContextTreeNodeRec = packed record
Left : Word;
Right : Word;
Data : TUCA_PropItemContextRec;
end;
PUCA_PropItemContextTreeNodeRec = ^TUCA_PropItemContextTreeNodeRec;
TUCA_PropItemContextTreeRec = packed record
public
Size : UInt24;
public
function GetData:PUCA_PropItemContextTreeNodeRec;inline;
property Data : PUCA_PropItemContextTreeNodeRec read GetData;
end;
PUCA_PropItemContextTreeRec = ^TUCA_PropItemContextTreeRec;
{ TUCA_PropItemRec }
TUCA_PropItemRec = packed record
private
const FLAG_VALID = 0;
const FLAG_CODEPOINT = 1;
const FLAG_CONTEXTUAL = 2;
const FLAG_DELETION = 3;
const FLAG_COMPRESS_WEIGHT_1 = 6;
const FLAG_COMPRESS_WEIGHT_2 = 7;
private
function GetWeightSize : Word;inline;
public
WeightLength : Byte;
ChildCount : Byte;
Size : Word;
Flags : Byte;
public
function HasCodePoint() : Boolean;inline;
function GetCodePoint() : UInt24;//inline;
property CodePoint : UInt24 read GetCodePoint;
//Weights : array[0..WeightLength] of TUCA_PropWeights;
procedure GetWeightArray(ADest : PUCA_PropWeights);
function GetSelfOnlySize() : Cardinal;inline;
procedure SetContextual(AValue : Boolean);inline;
function GetContextual() : Boolean;inline;
property Contextual : Boolean read GetContextual write setContextual;
function GetContext() : PUCA_PropItemContextTreeRec;
procedure SetDeleted(AValue : Boolean);inline;
function IsDeleted() : Boolean;inline;
function IsValid() : Boolean;inline;
function IsWeightCompress_1() : Boolean;inline;
function IsWeightCompress_2() : Boolean;inline;
end;
PUCA_PropItemRec = ^TUCA_PropItemRec;
TUCA_PropIndexItem = packed record
CodePoint : Cardinal;
Position : Integer;
end;
PUCA_PropIndexItem = ^TUCA_PropIndexItem;
TUCA_PropBook = X_PACKED record
ItemSize : Integer;
Index : array of TUCA_PropIndexItem;
Items : PUCA_PropItemRec; //Native Endian
ItemsOtherEndian : PUCA_PropItemRec;//Non Native Endian
VariableLowLimit : Word;
VariableHighLimit : Word;
end;
PUCA_PropBook = ^TUCA_PropBook;
TBmpFirstTable = array[0..255] of Byte;
TBmpSecondTableItem = array[0..255] of Word;
TBmpSecondTable = array of TBmpSecondTableItem;
T3lvlBmp1Table = array[0..255] of Byte;
T3lvlBmp2TableItem = array[0..15] of Word;
T3lvlBmp2Table = array of T3lvlBmp2TableItem;
T3lvlBmp3TableItem = array[0..15] of Word;
T3lvlBmp3Table = array of T3lvlBmp3TableItem;
TucaBmpFirstTable = array[0..255] of Byte;
TucaBmpSecondTableItem = array[0..255] of Cardinal;
TucaBmpSecondTable = array of TucaBmpSecondTableItem;
PucaBmpFirstTable = ^TucaBmpFirstTable;
PucaBmpSecondTable = ^TucaBmpSecondTable;
const
LOW_SURROGATE_BEGIN = Word($DC00);
LOW_SURROGATE_END = Word($DFFF);
LOW_SURROGATE_COUNT = LOW_SURROGATE_END - LOW_SURROGATE_BEGIN + 1;
HIGH_SURROGATE_BEGIN = Word($D800);
HIGH_SURROGATE_END = Word($DBFF);
HIGH_SURROGATE_COUNT = HIGH_SURROGATE_END - HIGH_SURROGATE_BEGIN + 1;
type
TOBmpFirstTable = array[0..(HIGH_SURROGATE_COUNT-1)] of Word;
TOBmpSecondTableItem = array[0..(LOW_SURROGATE_COUNT-1)] of Word;
TOBmpSecondTable = array of TOBmpSecondTableItem;
T3lvlOBmp1Table = array[0..1023] of Byte;
T3lvlOBmp2TableItem = array[0..31] of Word;
T3lvlOBmp2Table = array of T3lvlOBmp2TableItem;
T3lvlOBmp3TableItem = array[0..31] of Word;
T3lvlOBmp3Table = array of T3lvlOBmp3TableItem;
TucaOBmpFirstTable = array[0..(HIGH_SURROGATE_COUNT-1)] of Word;
TucaOBmpSecondTableItem = array[0..(LOW_SURROGATE_COUNT-1)] of Cardinal;
TucaOBmpSecondTable = array of TucaOBmpSecondTableItem;
PucaOBmpFirstTable = ^TucaOBmpFirstTable;
PucaOBmpSecondTable = ^TucaOBmpSecondTable;
type
TEndianKind = (ekLittle, ekBig);
const
ENDIAN_SUFFIX : array[TEndianKind] of string[2] = ('le','be');
{$IFDEF ENDIAN_LITTLE}
ENDIAN_NATIVE = ekLittle;
ENDIAN_NON_NATIVE = ekBig;
{$ENDIF ENDIAN_LITTLE}
{$IFDEF ENDIAN_BIG}
ENDIAN_NATIVE = ekBig;
ENDIAN_NON_NATIVE = ekLittle;
{$ENDIF ENDIAN_BIG}
procedure GenerateLicenceText(ADest : TStream);
function BoolToByte(AValue : Boolean): Byte;inline;
function IsHangulSyllable(
const ACodePoint : TUnicodeCodePoint;
const AHangulList : TCodePointRecArray
) : Boolean;
procedure ParseHangulSyllableTypes(
ADataAStream : TMemoryStream;
var ACodePointList : TCodePointRecArray
);
procedure ParseProps(
ADataAStream : TMemoryStream;
var APropList : TPropListLineRecArray
);
function FindCodePointsByProperty(
const APropName : string;
const APropList : TPropListLineRecArray
) : TCodePointRecArray;
procedure ParseBlokcs(
ADataAStream : TMemoryStream;
var ABlocks : TBlocks
);
procedure ParseUCAFile(
ADataAStream : TMemoryStream;
var ABook : TUCA_DataBook
);
procedure MakeUCA_Props(
ABook : PUCA_DataBook;
out AProps : PUCA_PropBook
);
procedure FreeUcaBook(var ABook : PUCA_PropBook);
procedure MakeUCA_BmpTables(
var AFirstTable : TucaBmpFirstTable;
var ASecondTable : TucaBmpSecondTable;
const APropBook : PUCA_PropBook
);
procedure MakeUCA_OBmpTables(
var AFirstTable : TucaOBmpFirstTable;
var ASecondTable : TucaOBmpSecondTable;
const APropBook : PUCA_PropBook
);
function GetPropPosition(
const AHighS,
ALowS : Word;
const AFirstTable : PucaOBmpFirstTable;
const ASecondTable : PucaOBmpSecondTable
): Integer;inline;overload;
procedure GenerateUCA_Head(
ADest : TStream;
ABook : PUCA_DataBook;
AProps : PUCA_PropBook
);
procedure GenerateUCA_BmpTables(
AStream,
ANativeEndianStream,
ANonNativeEndianStream : TStream;
var AFirstTable : TucaBmpFirstTable;
var ASecondTable : TucaBmpSecondTable
);
procedure GenerateBinaryUCA_BmpTables(
ANativeEndianStream,
ANonNativeEndianStream : TStream;
var AFirstTable : TucaBmpFirstTable;
var ASecondTable : TucaBmpSecondTable
);
procedure GenerateUCA_PropTable(
ADest : TStream;
const APropBook : PUCA_PropBook;
const AEndian : TEndianKind
);
procedure GenerateBinaryUCA_PropTable(
// WARNING : files must be generated for each endianess (Little / Big)
ANativeEndianStream,
ANonNativeEndianStream : TStream;
const APropBook : PUCA_PropBook
);
procedure GenerateUCA_OBmpTables(
AStream,
ANativeEndianStream,
ANonNativeEndianStream : TStream;
var AFirstTable : TucaOBmpFirstTable;
var ASecondTable : TucaOBmpSecondTable
);
procedure GenerateBinaryUCA_OBmpTables(
ANativeEndianStream,
ANonNativeEndianStream : TStream;
var AFirstTable : TucaOBmpFirstTable;
var ASecondTable : TucaOBmpSecondTable
);
procedure Parse_UnicodeData(
ADataAStream : TMemoryStream;
var APropList : TPropRecArray;
var ANumericTable : TNumericValueArray;
var ADataLineList : TDataLineRecArray;
var ADecomposition : TDecompositionArray;
const AHangulList : TCodePointRecArray;
const AWhiteSpaces : TCodePointRecArray
);
procedure MakeDecomposition(
const ARawData : TDecompositionArray;
var ABook : TDecompositionBook
);
procedure MakeBmpTables(
var AFirstTable : TBmpFirstTable;
var ASecondTable : TBmpSecondTable;
const ADataLineList : TDataLineRecArray
);
procedure MakeBmpTables3Levels(
var AFirstTable : T3lvlBmp1Table;
var ASecondTable : T3lvlBmp2Table;
var AThirdTable : T3lvlBmp3Table;
const ADataLineList : TDataLineRecArray
);
procedure GenerateBmpTables(
ADest : TStream;
var AFirstTable : TBmpFirstTable;
var ASecondTable : TBmpSecondTable
);
procedure Generate3lvlBmpTables(
ADest : TStream;
var AFirstTable : T3lvlBmp1Table;
var ASecondTable : T3lvlBmp2Table;
var AThirdTable : T3lvlBmp3Table
);
procedure GeneratePropTable(
ADest : TStream;
const APropList : TPropRecArray;
const AEndian : TEndianKind
);
procedure GenerateNumericTable(
ADest : TStream;
const ANumList : TNumericValueArray;
const ACompleteUnit : Boolean
);
procedure GenerateDecompositionBookTable(
ADest : TStream;
const ABook : TDecompositionBook;
const AEndian : TEndianKind
);
procedure GenerateOutBmpTable(
ADest : TStream;
const AList : TDataLineRecArray
);
function Compress(const AData : TDataLineRecArray) : TDataLineRecArray;
function EvaluateFloat(const AStr : string) : Double;
function StrToCategory(const AStr : string) : TUnicodeCategory;
function StringToCodePoint(ACP : string) : TUnicodeCodePoint;
function IsWhiteSpace(
const ACodePoint : TUnicodeCodePoint;
const AWhiteSpaces : TCodePointRecArray
) : Boolean;
function GetPropID(
ACodePoint : TUnicodeCodePoint;
const ADataLineList : TDataLineRecArray
) : Cardinal;
//--------------------
procedure MakeOBmpTables(
var AFirstTable : TOBmpFirstTable;
var ASecondTable : TOBmpSecondTable;
const ADataLineList : TDataLineRecArray
);
procedure MakeOBmpTables3Levels(
var AFirstTable : T3lvlOBmp1Table;
var ASecondTable : T3lvlOBmp2Table;
var AThirdTable : T3lvlOBmp3Table;
const ADataLineList : TDataLineRecArray
);
procedure GenerateOBmpTables(
ADest : TStream;
var AFirstTable : TOBmpFirstTable;
var ASecondTable : TOBmpSecondTable
);
procedure Generate3lvlOBmpTables(
ADest : TStream;
var AFirstTable : T3lvlOBmp1Table;
var ASecondTable : T3lvlOBmp2Table;
var AThirdTable : T3lvlOBmp3Table
);
function GetProp(
const AHighS,
ALowS : Word;
const AProps : TPropRecArray;
var AFirstTable : TOBmpFirstTable;
var ASecondTable : TOBmpSecondTable
): PPropRec; inline;overload;
function GetProp(
const AHighS,
ALowS : Word;
const AProps : TPropRecArray;
var AFirstTable : T3lvlOBmp1Table;
var ASecondTable : T3lvlOBmp2Table;
var AThirdTable : T3lvlOBmp3Table
): PPropRec; inline;overload;
procedure FromUCS4(const AValue : TUnicodeCodePoint; var AHighS, ALowS : Word);inline;
function ToUCS4(const AHighS, ALowS : Word) : TUnicodeCodePoint; inline;
type
TBitOrder = 0..7;
function IsBitON(const AData : Byte; const ABit : TBitOrder) : Boolean ;{$IFDEF USE_INLINE}inline;{$ENDIF}
procedure SetBit(var AData : Byte; const ABit : TBitOrder; const AValue : Boolean);{$IFDEF USE_INLINE}inline;{$ENDIF}
function GenerateEndianIncludeFileName(
const AStoreName : string;
const AEndian : TEndianKind
): string;inline;
procedure ReverseFromNativeEndian(
const AData : PUCA_PropItemRec;
const ADataLen : Cardinal;
const ADest : PUCA_PropItemRec
);
procedure ReverseToNativeEndian(
const AData : PUCA_PropItemRec;
const ADataLen : Cardinal;
const ADest : PUCA_PropItemRec
);
procedure CompareProps(
const AProp1,
AProp2 : PUCA_PropItemRec;
const ADataLen : Integer
);
type
TCollationName = string[128];
TSerializedCollationHeader = packed record
Base : TCollationName;
Version : TCollationName;
CollationName : TCollationName;
VariableWeight : Byte;
Backwards : Byte;
BMP_Table1Length : DWord;
BMP_Table2Length : DWord;
OBMP_Table1Length : DWord;
OBMP_Table2Length : DWord;
PropCount : DWord;
VariableLowLimit : Word;
VariableHighLimit : Word;
ChangedFields : Byte;
end;
PSerializedCollationHeader = ^TSerializedCollationHeader;
procedure ReverseRecordBytes(var AItem : TSerializedCollationHeader);
procedure ReverseBytes(var AData; const ALength : Integer);
procedure ReverseArray(var AValue; const AArrayLength, AItemSize : PtrInt);
resourcestring
SInsufficientMemoryBuffer = 'Insufficient Memory Buffer';
implementation
uses
typinfo, Math, AVL_Tree,
trie;
type
TCardinalRec = packed record
{$ifdef FPC_LITTLE_ENDIAN}
byte0, byte1, byte2, byte3 : Byte;
{$else FPC_LITTLE_ENDIAN}
byte3, byte2, byte1, byte0 : Byte;
{$endif FPC_LITTLE_ENDIAN}
end;
TWordRec = packed record
{$ifdef FPC_LITTLE_ENDIAN}
byte0, byte1 : Byte;
{$else FPC_LITTLE_ENDIAN}
byte1, byte0 : Byte;
{$endif FPC_LITTLE_ENDIAN}
end;
{ TUInt24Rec }
class operator TUInt24Rec.Explicit(a : TUInt24Rec) : Cardinal;
begin
TCardinalRec(Result).byte0 := a.byte0;
TCardinalRec(Result).byte1 := a.byte1;
TCardinalRec(Result).byte2 := a.byte2;
TCardinalRec(Result).byte3 := 0;
end;
class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Cardinal;
begin
TCardinalRec(Result).byte0 := a.byte0;
TCardinalRec(Result).byte1 := a.byte1;
TCardinalRec(Result).byte2 := a.byte2;
TCardinalRec(Result).byte3 := 0;
end;
class operator TUInt24Rec.Implicit(a : TUInt24Rec) : LongInt;
begin
Result := Cardinal(a);
end;
class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Word;
begin
{$IFOPT R+}
if (a.byte2 > 0) then
Error(reIntOverflow);
{$ENDIF R+}
TWordRec(Result).byte0 := a.byte0;
TWordRec(Result).byte1 := a.byte1;
end;
class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Byte;
begin
{$IFOPT R+}
if (a.byte1 > 0) or (a.byte2 > 0) then
Error(reIntOverflow);
{$ENDIF R+}
Result := a.byte0;
end;
class operator TUInt24Rec.Implicit(a : Cardinal) : TUInt24Rec;
begin
{$IFOPT R+}
if (a > $FFFFFF) then
Error(reIntOverflow);
{$ENDIF R+}
Result.byte0 := TCardinalRec(a).byte0;
Result.byte1 := TCardinalRec(a).byte1;
Result.byte2 := TCardinalRec(a).byte2;
end;
class operator TUInt24Rec.Equal(a, b : TUInt24Rec) : Boolean;
begin
Result := (a.byte0 = b.byte0) and (a.byte1 = b.byte1) and (a.byte2 = b.byte2);
end;
class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Cardinal) : Boolean;
begin
Result := (TCardinalRec(b).byte3 = 0) and
(a.byte0 = TCardinalRec(b).byte0) and
(a.byte1 = TCardinalRec(b).byte1) and
(a.byte2 = TCardinalRec(b).byte2);
end;
class operator TUInt24Rec.Equal(a : Cardinal; b : TUInt24Rec) : Boolean;
begin
Result := (b = a);
end;
class operator TUInt24Rec.Equal(a : TUInt24Rec; b : LongInt) : Boolean;
begin
Result := (LongInt(a) = b);
end;
class operator TUInt24Rec.Equal(a : LongInt; b : TUInt24Rec) : Boolean;
begin
Result := (b = a);
end;
class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Word) : Boolean;
begin
Result := (a.byte2 = 0) and
(a.byte0 = TWordRec(b).byte0) and
(a.byte1 = TWordRec(b).byte1);
end;
class operator TUInt24Rec.Equal(a : Word; b : TUInt24Rec) : Boolean;
begin
Result := (b = a);
end;
class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Byte) : Boolean;
begin
Result := (a.byte2 = 0) and
(a.byte1 = 0) and
(a.byte0 = b);
end;
class operator TUInt24Rec.Equal(a : Byte; b : TUInt24Rec) : Boolean;
begin
Result := (b = a);
end;
class operator TUInt24Rec.NotEqual(a, b : TUInt24Rec) : Boolean;
begin
Result := (a.byte0 <> b.byte0) or (a.byte1 <> b.byte1) or (a.byte2 <> b.byte2);
end;
class operator TUInt24Rec.NotEqual(a : TUInt24Rec; b : Cardinal) : Boolean;
begin
Result := (TCardinalRec(b).byte3 <> 0) or
(a.byte0 <> TCardinalRec(b).byte0) or
(a.byte1 <> TCardinalRec(b).byte1) or
(a.byte2 <> TCardinalRec(b).byte2);
end;
class operator TUInt24Rec.NotEqual(a : Cardinal; b : TUInt24Rec) : Boolean;
begin
Result := (b <> a);
end;
class operator TUInt24Rec.GreaterThan(a, b: TUInt24Rec): Boolean;
begin
Result := (a.byte2 > b.byte2) or
((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 > b.byte0));
end;
class operator TUInt24Rec.GreaterThan(a: TUInt24Rec; b: Cardinal): Boolean;
begin
Result := Cardinal(a) > b;
end;
class operator TUInt24Rec.GreaterThan(a: Cardinal; b: TUInt24Rec): Boolean;
begin
Result := a > Cardinal(b);
end;
class operator TUInt24Rec.GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;
begin
Result := (a.byte2 > b.byte2) or
((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 >= b.byte0));
end;
class operator TUInt24Rec.GreaterThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
begin
Result := Cardinal(a) >= b;
end;
class operator TUInt24Rec.GreaterThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
begin
Result := a >= Cardinal(b);
end;
class operator TUInt24Rec.LessThan(a, b: TUInt24Rec): Boolean;
begin
Result := (b > a);
end;
class operator TUInt24Rec.LessThan(a: TUInt24Rec; b: Cardinal): Boolean;
begin
Result := Cardinal(a) < b;
end;
class operator TUInt24Rec.LessThan(a: Cardinal; b: TUInt24Rec): Boolean;
begin
Result := a < Cardinal(b);
end;
class operator TUInt24Rec.LessThanOrEqual(a, b: TUInt24Rec): Boolean;
begin
Result := (b >= a);
end;
class operator TUInt24Rec.LessThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
begin
Result := Cardinal(a) <= b;
end;
class operator TUInt24Rec.LessThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
begin
Result := a <= Cardinal(b);
end;
function GenerateEndianIncludeFileName(
const AStoreName : string;
const AEndian : TEndianKind
): string;inline;
begin
Result := ExtractFilePath(AStoreName) +
ChangeFileExt(ExtractFileName(AStoreName),Format('_%s.inc',[ENDIAN_SUFFIX[AEndian]]));
end;
function IsBitON(const AData : Byte; const ABit : TBitOrder) : Boolean ;
begin
Result := ( ( AData and ( 1 shl ABit ) ) <> 0 );
end;
procedure SetBit(var AData : Byte; const ABit : TBitOrder; const AValue : Boolean);
begin
if AValue then
AData := AData or (1 shl (ABit mod 8))
else
AData := AData and ( not ( 1 shl ( ABit mod 8 ) ) );
end;
var
FS : TFormatSettings;
function EvaluateFloat(const AStr : string) : Double;
var
s, n, d : string;
i : Integer;
begin
Result := 0;
s := Trim(AStr);
if (Length(s) > 0) then begin
i := Pos('/',s);
if (i < 1) then
Result := StrToFloat(s,FS)
else begin
n := Copy(s,1,i-1);
d := Copy(s,i+1,MaxInt);
Result := StrToInt(n) / StrToInt(d);
end;
end;
end;
function StrToCategory(const AStr : string) : TUnicodeCategory;
var
s : string;
begin
s := UpperCase(Trim(AStr));
if (s = 'LU') then
Result := ucUppercaseLetter
else if (s = 'LL') then
Result := ucLowercaseLetter
else if (s = 'LT') then
Result := ucTitlecaseLetter
else if (s = 'LM') then
Result := ucModifierLetter
else if (s = 'LO') then
Result := ucOtherLetter
else
if (s = 'MN') then
Result := ucNonSpacingMark
else if (s = 'MC') then
Result := ucCombiningMark
else if (s = 'ME') then
Result := ucEnclosingMark
else
if (s = 'ND') then
Result := ucDecimalNumber
else if (s = 'NL') then
Result := ucLetterNumber
else if (s = 'NO') then
Result := ucOtherNumber
else
if (s = 'PC') then
Result := ucConnectPunctuation
else if (s = 'PD') then
Result := ucDashPunctuation
else if (s = 'PS') then
Result := ucOpenPunctuation
else if (s = 'PE') then
Result := ucClosePunctuation
else if (s = 'PI') then
Result := ucInitialPunctuation
else if (s = 'PF') then
Result := ucFinalPunctuation
else if (s = 'PO') then
Result := ucOtherPunctuation
else
if (s = 'SM') then
Result := ucMathSymbol
else if (s = 'SC') then
Result := ucCurrencySymbol
else if (s = 'SK') then
Result := ucModifierSymbol
else if (s = 'SO') then
Result := ucOtherSymbol
else
if (s = 'ZS') then
Result := ucSpaceSeparator
else if (s = 'ZL') then
Result := ucLineSeparator
else if (s = 'ZP') then
Result := ucParagraphSeparator
else
if (s = 'CC') then
Result := ucControl
else if (s = 'CF') then
Result := ucFormat
else if (s = 'CS') then
Result := ucSurrogate
else if (s = 'CO') then
Result := ucPrivateUse
else
Result := ucUnassigned;
end;
function StringToCodePoint(ACP : string) : TUnicodeCodePoint;
var
s : string;
begin
s := Trim(ACP);
Result := 0;
if (Length(s) > 0) and (s <> '#') then
Result := StrToInt('$' + s);
end;
{function IsWhiteSpace(const ACodePoint : TUnicodeCodePoint) : Boolean;
begin
case ACodePoint of
$0009..$000D : Result := True;// White_Space # Cc [5] <control-0009>..<control-000D>
$0020 : Result := True;// White_Space # Zs SPACE
$0085 : Result := True;// White_Space # Cc <control-0085>
$00A0 : Result := True;// White_Space # Zs NO-BREAK SPACE
$1680 : Result := True;// White_Space # Zs OGHAM SPACE MARK
$180E : Result := True;// White_Space # Zs MONGOLIAN VOWEL SEPARATOR
$2000..$200A : Result := True;// White_Space # Zs [11] EN QUAD..HAIR SPACE
$2028 : Result := True;// White_Space # Zl LINE SEPARATOR
$2029 : Result := True;// White_Space # Zp PARAGRAPH SEPARATOR
$202F : Result := True;// White_Space # Zs NARROW NO-BREAK SPACE
$205F : Result := True;// White_Space # Zs MEDIUM MATHEMATICAL SPACE
$3000 : Result := True;// White_Space # Zs IDEOGRAPHIC SPACE
else
Result := False;
end;
end;}
function IsWhiteSpace(
const ACodePoint : TUnicodeCodePoint;
const AWhiteSpaces : TCodePointRecArray
) : Boolean;
var
i : Integer;
p : ^TCodePointRec;
begin
p := @AWhiteSpaces[Low(AWhiteSpaces)];
for i := Low(AWhiteSpaces) to High(AWhiteSpaces) do begin
if (p^.LineType = 0) then begin
if (p^.CodePoint = ACodePoint) then
exit(True);
end else begin
if (ACodePoint >= p^.StartCodePoint) and (ACodePoint <= p^.EndCodePoint) then
exit(True);
end;
Inc(p);
end;
Result := False;
end;
function NormalizeBlockName(const AName : string) : string;
var
i, c, k : Integer;
s : string;
begin
c := Length(AName);
SetLength(Result,c);
s := LowerCase(AName);
k := 0;
for i := 1 to c do begin
if (s[1] in ['a'..'z','0'..'9','-']) then begin
k := k + 1;
Result[k] := s[i];
end;
end;
SetLength(Result,k);
end;
procedure ParseBlokcs(
ADataAStream : TMemoryStream;
var ABlocks : TBlocks
);
const
LINE_LENGTH = 1024;
DATA_LENGTH = 25000;
var
p : PAnsiChar;
actualDataLen : Integer;
bufferLength, bufferPos, lineLength, linePos : Integer;
line : ansistring;
function NextLine() : Boolean;
var
locOldPos : Integer;
locOldPointer : PAnsiChar;
begin
Result := False;
locOldPointer := p;
locOldPos := bufferPos;
while (bufferPos < bufferLength) and (p^ <> #10) do begin
Inc(p);
Inc(bufferPos);
end;
if (locOldPos = bufferPos) and (p^ = #10) then begin
lineLength := 0;
Inc(p);
Inc(bufferPos);
linePos := 1;
Result := True;
end else if (locOldPos < bufferPos) then begin
lineLength := (bufferPos - locOldPos);
Move(locOldPointer^,line[1],lineLength);
if (p^ = #10) then begin
Dec(lineLength);
Inc(p);
Inc(bufferPos);
end;
linePos := 1;
Result := True;
end;
end;
function NextToken() : ansistring;
var
k : Integer;
begin
k := linePos;
if (linePos < lineLength) and (line[linePos] in [';','#','.']) then begin
Inc(linePos);
Result := Copy(line,k,(linePos-k));
exit;
end;
while (linePos < lineLength) and not(line[linePos] in [';','#','.']) do
Inc(linePos);
if (linePos > k) then begin
if (line[linePos] in [';','#','.']) then
Result := Copy(line,k,(linePos-k))
else
Result := Copy(line,k,(linePos-k+1));
Result := Trim(Result);
end else begin
Result := '';
end;
end;
procedure ParseLine();
var
locData : TBlockItemRec;
s : ansistring;
begin
s := NextToken();
if (s = '') or (s[1] = '#') then
exit;
locData.RangeStart := StrToInt('$'+s);
s := NextToken();
if (s <> '.') then
raise Exception.CreateFmt('"." expected but "%s" found.',[s]);
s := NextToken();
if (s <> '.') then
raise Exception.CreateFmt('"." expected but "%s" found.',[s]);
s := NextToken();
locData.RangeEnd := StrToInt('$'+s);
s := NextToken();
if (s <> ';') then
raise Exception.CreateFmt('";" expected but "%s" found.',[s]);
locData.Name := Trim(NextToken());
locData.CanonicalName := NormalizeBlockName(locData.Name);
if (Length(ABlocks) <= actualDataLen) then
SetLength(ABlocks,Length(ABlocks)*2);
ABlocks[actualDataLen] := locData;
Inc(actualDataLen);
end;
procedure Prepare();
begin
SetLength(ABlocks,DATA_LENGTH);
actualDataLen := 0;
bufferLength := ADataAStream.Size;
bufferPos := 0;
p := ADataAStream.Memory;
lineLength := 0;
SetLength(line,LINE_LENGTH);
end;
begin
Prepare();
while NextLine() do
ParseLine();
SetLength(ABlocks,actualDataLen);
end;
procedure ParseProps(
ADataAStream : TMemoryStream;
var APropList : TPropListLineRecArray
);
const
LINE_LENGTH = 1024;
DATA_LENGTH = 25000;
var
p : PAnsiChar;
actualDataLen : Integer;
bufferLength, bufferPos, lineLength, linePos : Integer;
line : ansistring;
function NextLine() : Boolean;
var
locOldPos : Integer;
locOldPointer : PAnsiChar;
begin
Result := False;
locOldPointer := p;
locOldPos := bufferPos;
while (bufferPos < bufferLength) and (p^ <> #10) do begin
Inc(p);
Inc(bufferPos);
end;
if (locOldPos = bufferPos) and (p^ = #10) then begin
lineLength := 0;
Inc(p);
Inc(bufferPos);
linePos := 1;
Result := True;
end else if (locOldPos < bufferPos) then begin
lineLength := (bufferPos - locOldPos);
Move(locOldPointer^,line[1],lineLength);
if (p^ = #10) then begin
Dec(lineLength);
Inc(p);
Inc(bufferPos);
end;
linePos := 1;
Result := True;
end;
end;
function NextToken() : ansistring;
var
k : Integer;
begin
k := linePos;
if (linePos < lineLength) and (line[linePos] in [';','#','.']) then begin
Inc(linePos);
Result := Copy(line,k,(linePos-k));
exit;
end;
while (linePos < lineLength) and not(line[linePos] in [';','#','.']) do
Inc(linePos);
if (linePos > k) then begin
if (line[linePos] in [';','#','.']) then
Result := Copy(line,k,(linePos-k))
else
Result := Copy(line,k,(linePos-k+1));
Result := Trim(Result);
end else begin
Result := '';
end;
end;
procedure ParseLine();
var
locCP : Cardinal;
locData : TPropListLineRec;
s : ansistring;
begin
s := NextToken();
if (s = '') or (s[1] = '#') then
exit;
locCP := StrToInt('$'+s);
s := NextToken();
if (s = ';') then begin
locData.CodePoint.LineType := 0;
locData.CodePoint.CodePoint := locCP;
end else begin
if (s = '') or (s <> '.') or (NextToken() <> '.') then
raise Exception.CreateFmt('Invalid line : "%s".',[Copy(line,1,lineLength)]);
locData.CodePoint.LineType := 1;
locData.CodePoint.StartCodePoint := locCP;
locData.CodePoint.EndCodePoint := StrToInt('$'+NextToken());
s := NextToken();
if (s <> ';') then
raise Exception.CreateFmt('"." expected but "%s" found.',[s]);
end;
locData.PropName := Trim(NextToken());
if (Length(APropList) <= actualDataLen) then
SetLength(APropList,Length(APropList)*2);
APropList[actualDataLen] := locData;
Inc(actualDataLen);
end;
procedure Prepare();
begin
SetLength(APropList,DATA_LENGTH);
actualDataLen := 0;
bufferLength := ADataAStream.Size;
bufferPos := 0;
p := ADataAStream.Memory;
lineLength := 0;
SetLength(line,LINE_LENGTH);
end;
begin
Prepare();
while NextLine() do
ParseLine();
SetLength(APropList,actualDataLen);
end;
function FindCodePointsByProperty(
const APropName : string;
const APropList : TPropListLineRecArray
) : TCodePointRecArray;
var
r : TCodePointRecArray;
i, k : Integer;
s : string;
begin
k := 0;
r := nil;
s := LowerCase(Trim(APropName));
for i := Low(APropList) to High(APropList) do begin
if (LowerCase(APropList[i].PropName) = s) then begin
if (k >= Length(r)) then begin
if (k = 0) then
SetLength(r,24)
else
SetLength(r,(2*Length(r)));
end;
r[k] := APropList[i].CodePoint;
Inc(k);
end;
end;
SetLength(r,k);
Result := r;
end;
procedure ParseHangulSyllableTypes(
ADataAStream : TMemoryStream;
var ACodePointList : TCodePointRecArray
);
const
LINE_LENGTH = 1024;
DATA_LENGTH = 25000;
var
p : PAnsiChar;
actualDataLen : Integer;
bufferLength, bufferPos, lineLength, linePos : Integer;
line : ansistring;
function NextLine() : Boolean;
var
locOldPos : Integer;
locOldPointer : PAnsiChar;
begin
Result := False;
locOldPointer := p;
locOldPos := bufferPos;
while (bufferPos < bufferLength) and (p^ <> #10) do begin
Inc(p);
Inc(bufferPos);
end;
if (locOldPos = bufferPos) and (p^ = #10) then begin
lineLength := 0;
Inc(p);
Inc(bufferPos);
linePos := 1;
Result := True;
end else if (locOldPos < bufferPos) then begin
lineLength := (bufferPos - locOldPos);
Move(locOldPointer^,line[1],lineLength);
if (p^ = #10) then begin
Dec(lineLength);
Inc(p);
Inc(bufferPos);
end;
linePos := 1;
Result := True;
end;
end;
function NextToken() : ansistring;
var
k : Integer;
begin
k := linePos;
if (linePos < lineLength) and (line[linePos] = '.') then begin
Inc(linePos);
while (linePos < lineLength) and (line[linePos] = '.') do begin
Inc(linePos);
end;
Result := Copy(line,k,(linePos-k));
exit;
end;
while (linePos < lineLength) and not(line[linePos] in [';','#','.']) do
Inc(linePos);
if (linePos > k) then begin
if (line[linePos] in [';','#','.']) then
Result := Copy(line,k,(linePos-k))
else
Result := Copy(line,k,(linePos-k+1));
Result := Trim(Result);
end else begin
Result := '';
end;
//Inc(linePos);
end;
procedure ParseLine();
var
locData : TCodePointRec;
s : ansistring;
begin
s := NextToken();
if (s = '') or (s[1] = '#') then
exit;
locData.CodePoint := StrToInt('$'+s);
s := NextToken();
if (s = '') or (s[1] in [';','#']) then begin
locData.LineType := 0;
end else begin
if (s <> '..') then
raise Exception.CreateFmt('Unknown line type : "%s"',[Copy(line,1,lineLength)]);
locData.StartCodePoint := locData.CodePoint;
locData.EndCodePoint := StrToInt('$'+NextToken());
locData.LineType := 1;
end;
if (Length(ACodePointList) <= actualDataLen) then
SetLength(ACodePointList,Length(ACodePointList)*2);
ACodePointList[actualDataLen] := locData;
Inc(actualDataLen);
end;
procedure Prepare();
begin
SetLength(ACodePointList,DATA_LENGTH);
actualDataLen := 0;
bufferLength := ADataAStream.Size;
bufferPos := 0;
p := ADataAStream.Memory;
lineLength := 0;
SetLength(line,LINE_LENGTH);
end;
begin
Prepare();
while NextLine() do
ParseLine();
SetLength(ACodePointList,actualDataLen);
end;
function IsHangulSyllable(
const ACodePoint : TUnicodeCodePoint;
const AHangulList : TCodePointRecArray
) : Boolean;
var
i : Integer;
p : ^TCodePointRec;
begin
Result := False;
p := @AHangulList[Low(AHangulList)];
for i := Low(AHangulList) to High(AHangulList) do begin
if ( (p^.LineType = 0) and (ACodePoint = p^.CodePoint) ) or
( (p^.LineType = 1) and (ACodePoint >= p^.StartCodePoint) and (ACodePoint <= p^.EndCodePoint) )
then begin
Result := True;
Break;
end;
Inc(p);
end;
end;
function IndexOf(
const AProp : TPropRec;
const APropList : TPropRecArray;
const AActualLen : Integer
) : Integer;overload;
var
i : Integer;
p : PPropRec;
begin
Result := -1;
if (AActualLen > 0) then begin
p := @APropList[0];
for i := 0 to AActualLen - 1 do begin
if (AProp.Category = p^.Category) and
(AProp.CCC = p^.CCC) and
(AProp.NumericIndex = p^.NumericIndex) and
(AProp.SimpleUpperCase = p^.SimpleUpperCase) and
(AProp.SimpleLowerCase = p^.SimpleLowerCase) and
(AProp.WhiteSpace = p^.WhiteSpace) and
//
(AProp.DecompositionID = p^.DecompositionID) and
(* ( (AProp.DecompositionID = -1 ) and (p^.DecompositionID = -1) ) or
( (AProp.DecompositionID <> -1 ) and (p^.DecompositionID <> -1) )
*)
(AProp.HangulSyllable = p^.HangulSyllable)
then begin
Result := i;
Break;
end;
Inc(p);
end;
end;
end;
function IndexOf(
const AItem : TUnicodeCodePointArray;
const AList : TDecompositionArray
) : Integer;overload;
var
p : TUnicodeCodePointArray;
i : Integer;
begin
Result := -1;
if (Length(AList) = 0) then
exit;
for i := Low(AList) to High(AList) do begin
p := AList[i];
if (Length(p) = Length(AItem)) then begin
if CompareMem(@p[0],@AItem[0],Length(AItem)*SizeOf(TUnicodeCodePoint)) then
exit(i);
end;
end;
Result := -1;
end;
function IndexOf(
const AItem : TNumericValue;
const AList : TNumericValueArray;
const AActualLen : Integer
) : Integer;overload;
var
p : ^TNumericValue;
i : Integer;
begin
Result := -1;
if (AActualLen = 0) then
exit;
p := @AList[Low(AList)];
for i := Low(AList) to AActualLen - 1 do begin
if (AItem = p^) then
exit(i);
Inc(p);
end;
Result := -1;
end;
procedure Parse_UnicodeData(
ADataAStream : TMemoryStream;
var APropList : TPropRecArray;
var ANumericTable : TNumericValueArray;
var ADataLineList : TDataLineRecArray;
var ADecomposition : TDecompositionArray;
const AHangulList : TCodePointRecArray;
const AWhiteSpaces : TCodePointRecArray
);
const
LINE_LENGTH = 1024;
PROP_LENGTH = 5000;
DATA_LENGTH = 25000;
var
p : PAnsiChar;
bufferLength, bufferPos : Integer;
actualPropLen, actualDataLen, actualNumLen : Integer;
line : ansistring;
lineLength, linePos : Integer;
function NextLine() : Boolean;
var
locOldPos : Integer;
locOldPointer : PAnsiChar;
begin
Result := False;
locOldPointer := p;
locOldPos := bufferPos;
while (bufferPos < bufferLength) and (p^ <> #10) do begin
Inc(p);
Inc(bufferPos);
end;
if (locOldPos < bufferPos) then begin
lineLength := (bufferPos - locOldPos);
Move(locOldPointer^,line[1],lineLength);
if (p^ = #10) then begin
Dec(lineLength);
Inc(p);
Inc(bufferPos);
end;
if (lineLength > 7) then begin
linePos := 1;
Result := True;
end;
end;
end;
function NextToken() : ansistring;
var
k : Integer;
begin
k := linePos;
while (linePos < lineLength) and not(line[linePos] in [';','#']) do
Inc(linePos);
if (linePos > k) then begin
if (line[linePos] in [';','#']) then
Result := Copy(line,k,(linePos-k))
else
Result := Copy(line,k,(linePos-k+1));
Result := Trim(Result);
end else begin
Result := '';
end;
Inc(linePos);
end;
function ParseCanonicalDecomposition(AStr : ansistring) : TUnicodeCodePointArray;
var
locStr, ks : ansistring;
k0,k : Integer;
begin
SetLength(Result,0);
locStr := UpperCase(Trim(AStr));
if (locStr = '') or (locStr[1] = '<') then
exit;
k0 := 1;
k := 1;
while (k <= Length(locStr)) do begin
while (k <= Length(locStr)) and (locStr[k] in ['0'..'9','A'..'F']) do
inc(k);
ks := Trim(Copy(locStr,k0,k-k0));
SetLength(Result,Length(Result)+1);
Result[Length(Result)-1] := StringToCodePoint(ks);
Inc(k);
k0 := k;
end;
end;
procedure ParseLine();
var
locCP : TUnicodeCodePoint;
locProp : TPropRec;
locData : TDataLineRec;
s : ansistring;
locRangeStart, locRangeEnd : Boolean;
k : Integer;
locDecompItem : TUnicodeCodePointArray;
numVal : TNumericValue;
begin
FillChar(locData,SizeOf(locData),#0);
FillChar(locProp,SizeOf(locProp),#0);
locCP := StrToInt('$'+NextToken());
s := NextToken();
locRangeStart := AnsiEndsText(', First>',s);
if locRangeStart then
locRangeEnd := False
else
locRangeEnd := AnsiEndsText(', Last>',s);
if locRangeStart then begin
locData.LineType := 1;
locData.StartCodePoint := locCP;
end else if locRangeEnd then begin
ADataLineList[actualDataLen - 1].EndCodePoint := locCP;
exit;
//locData.EndCodePoint := locCP;
end else begin
locData.LineType := 0;
locData.CodePoint := locCP;
end;
locProp.Category := StrToCategory(NextToken());
locProp.CCC := StrToInt(NextToken());//Canonical_Combining_Class
NextToken();//Bidi_Class
s := NextToken();//Decomposition_Type
locDecompItem := ParseCanonicalDecomposition(s);
if (Length(locDecompItem) = 0) then
locProp.DecompositionID := -1
else begin
locProp.DecompositionID := IndexOf(locDecompItem,ADecomposition);
if (locProp.DecompositionID = -1) then begin
k := Length(ADecomposition);
locProp.DecompositionID := k;
SetLength(ADecomposition,k+1);
ADecomposition[k] := locDecompItem;
end;
end;
numVal := EvaluateFloat(NextToken());
if (numVal <> Double(0.0)) then begin
NextToken();
NextToken();
end else begin
s := NextToken();
if (s <> '') then
numVal := EvaluateFloat(s);
s := NextToken();
if (numVal = Double(0.0)) then
numVal := EvaluateFloat(s);
end;
k := IndexOf(numVal,ANumericTable,actualNumLen);
if (k = -1) then begin
if (actualNumLen >= Length(ANumericTable)) then
SetLength(ANumericTable,(actualNumLen*2));
ANumericTable[actualNumLen] := numVal;
k := actualNumLen;
Inc(actualNumLen);
end;
locProp.NumericIndex := k;
NextToken();//Bidi_Mirroed
NextToken();//Unicode_l_Name
NextToken();//ISO_Comment
locProp.SimpleUpperCase := StringToCodePoint(NextToken());
locProp.SimpleLowerCase := StringToCodePoint(NextToken());
NextToken();//Simple_Title_Case_Mapping
locProp.WhiteSpace := IsWhiteSpace(locCP,AWhiteSpaces);
locProp.HangulSyllable := IsHangulSyllable(locCP,AHangulList);
k := IndexOf(locProp,APropList,actualPropLen);
if (k = -1) then begin
k := actualPropLen;
locProp.PropID := k{ + 1};
APropList[k] := locProp;
Inc(actualPropLen);
end;
locData.PropID := k;
ADataLineList[actualDataLen] := locData;
Inc(actualDataLen);
end;
procedure Prepare();
var
r : TPropRec;
begin
SetLength(APropList,PROP_LENGTH);
actualPropLen := 0;
SetLength(ADataLineList,DATA_LENGTH);
actualDataLen := 0;
bufferLength := ADataAStream.Size;
bufferPos := 0;
p := ADataAStream.Memory;
lineLength := 0;
SetLength(line,LINE_LENGTH);
SetLength(ANumericTable,500);
actualNumLen := 0;
FillChar(r,SizeOf(r),#0);
r.PropID := 0;
r.Category := ucUnassigned;
r.DecompositionID := -1;
r.NumericIndex := 0;
APropList[0] := r;
Inc(actualPropLen);
ANumericTable[0] := 0;
Inc(actualNumLen);
end;
begin
Prepare();
while NextLine() do
ParseLine();
SetLength(APropList,actualPropLen);
SetLength(ADataLineList,actualDataLen);
SetLength(ANumericTable,actualNumLen);
end;
function GetPropID(
ACodePoint : TUnicodeCodePoint;
const ADataLineList : TDataLineRecArray
) : Cardinal;
var
i : Integer;
p : PDataLineRec;
begin
Result := 0;
p := @ADataLineList[Low(ADataLineList)];
for i := Low(ADataLineList) to High(ADataLineList) do begin
if (p^.LineType = 0) then begin
if (p^.CodePoint = ACodePoint) then begin
Result := p^.PropID;
Break;
end;
end else begin
if (p^.StartCodePoint <= ACodePoint) and (p^.EndCodePoint >= ACodePoint) then begin
Result := p^.PropID;
Break;
end;
end;
Inc(p);
end;
end;
procedure MakeDecomposition(
const ARawData : TDecompositionArray;
var ABook : TDecompositionBook
);
var
i, c, locPos : Integer;
locItem : TUnicodeCodePointArray;
begin
c := 0;
for i := Low(ARawData) to High(ARawData) do
c := c + Length(ARawData[i]);
SetLength(ABook.CodePoints,c);
SetLength(ABook.Index,Length(ARawData));
locPos := 0;
for i := Low(ARawData) to High(ARawData) do begin
locItem := ARawData[i];
ABook.Index[i].StartPosition := locPos;
ABook.Index[i].Length := Length(locItem);
Move(locItem[0],ABook.CodePoints[locPos],(Length(locItem) * SizeOf(TUnicodeCodePoint)));
locPos := locPos + Length(locItem);
end;
end;
type
PBmpSecondTableItem = ^TBmpSecondTableItem;
function IndexOf(
const AItem : PBmpSecondTableItem;
const ATable : TBmpSecondTable;
const ATableActualLength : Integer
) : Integer;overload;
var
i : Integer;
p : PBmpSecondTableItem;
begin
Result := -1;
if (ATableActualLength > 0) then begin
p := @ATable[0];
for i := 0 to ATableActualLength - 1 do begin
if CompareMem(p,AItem,SizeOf(TBmpSecondTableItem)) then begin
Result := i;
Break;
end;
Inc(p);
end;
end;
end;
procedure MakeBmpTables(
var AFirstTable : TBmpFirstTable;
var ASecondTable : TBmpSecondTable;
const ADataLineList : TDataLineRecArray
);
var
locLowByte, locHighByte : Byte;
locTableItem : TBmpSecondTableItem;
locCP : TUnicodeCodePoint;
i, locSecondActualLen : Integer;
begin
SetLength(ASecondTable,120);
locSecondActualLen := 0;
for locHighByte := 0 to 255 do begin
FillChar(locTableItem,SizeOf(locTableItem),#0);
for locLowByte := 0 to 255 do begin
locCP := (locHighByte * 256) + locLowByte;
locTableItem[locLowByte] := GetPropID(locCP,ADataLineList)// - 1;
end;
i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen);
if (i = -1) then begin
if (locSecondActualLen = Length(ASecondTable)) then
SetLength(ASecondTable,locSecondActualLen + 50);
i := locSecondActualLen;
ASecondTable[i] := locTableItem;
Inc(locSecondActualLen);
end;
AFirstTable[locHighByte] := i;
end;
SetLength(ASecondTable,locSecondActualLen);
end;
type
P3lvlBmp3TableItem = ^T3lvlBmp3TableItem;
function IndexOf(
const AItem : P3lvlBmp3TableItem;
const ATable : T3lvlBmp3Table;
const ATableActualLength : Integer
) : Integer;overload;
var
i : Integer;
p : P3lvlBmp3TableItem;
begin
Result := -1;
if (ATableActualLength > 0) then begin
p := @ATable[0];
for i := 0 to ATableActualLength - 1 do begin
if CompareMem(p,AItem,SizeOf(T3lvlBmp3TableItem)) then begin
Result := i;
Break;
end;
Inc(p);
end;
end;
end;
type
P3lvlBmp2TableItem = ^T3lvlBmp2TableItem;
function IndexOf(
const AItem : P3lvlBmp2TableItem;
const ATable : T3lvlBmp2Table
) : Integer;overload;
var
i : Integer;
p : P3lvlBmp2TableItem;
begin
Result := -1;
if (Length(ATable) > 0) then begin
p := @ATable[0];
for i := 0 to Length(ATable) - 1 do begin
if CompareMem(p,AItem,SizeOf(T3lvlBmp2TableItem)) then begin
Result := i;
Break;
end;
Inc(p);
end;
end;
end;
procedure MakeBmpTables3Levels(
var AFirstTable : T3lvlBmp1Table;
var ASecondTable : T3lvlBmp2Table;
var AThirdTable : T3lvlBmp3Table;
const ADataLineList : TDataLineRecArray
);
var
locLowByte0, locLowByte1, locHighByte : Byte;
locTableItem2 : T3lvlBmp2TableItem;
locTableItem3 : T3lvlBmp3TableItem;
locCP : TUnicodeCodePoint;
i, locThirdActualLen : Integer;
begin
SetLength(AThirdTable,120);
locThirdActualLen := 0;
for locHighByte := 0 to 255 do begin
FillChar(locTableItem2,SizeOf(locTableItem2),#0);
for locLowByte0 := 0 to 15 do begin
FillChar(locTableItem3,SizeOf(locTableItem3),#0);
for locLowByte1 := 0 to 15 do begin
locCP := (locHighByte * 256) + (locLowByte0*16) + locLowByte1;
locTableItem3[locLowByte1] := GetPropID(locCP,ADataLineList);
end;
i := IndexOf(@locTableItem3,AThirdTable,locThirdActualLen);
if (i = -1) then begin
if (locThirdActualLen = Length(AThirdTable)) then
SetLength(AThirdTable,locThirdActualLen + 50);
i := locThirdActualLen;
AThirdTable[i] := locTableItem3;
Inc(locThirdActualLen);
end;
locTableItem2[locLowByte0] := i;
end;
i := IndexOf(@locTableItem2,ASecondTable);
if (i = -1) then begin
i := Length(ASecondTable);
SetLength(ASecondTable,(i + 1));
ASecondTable[i] := locTableItem2;
end;
AFirstTable[locHighByte] := i;
end;
SetLength(AThirdTable,locThirdActualLen);
end;
procedure GenerateLicenceText(ADest : TStream);
var
s : ansistring;
begin
s := SLicenseText + sLineBreak + sLineBreak;
ADest.Write(s[1],Length(s));
end;
procedure GenerateBmpTables(
ADest : TStream;
var AFirstTable : TBmpFirstTable;
var ASecondTable : TBmpSecondTable
);
procedure AddLine(const ALine : ansistring);
var
buffer : ansistring;
begin
buffer := ALine + sLineBreak;
ADest.Write(buffer[1],Length(buffer));
end;
var
i, j, c : Integer;
locLine : string;
begin
AddLine('const');
AddLine(' UC_TABLE_1 : array[0..255] of Byte = (');
locLine := '';
for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
locLine := locLine + IntToStr(AFirstTable[i]) + ',';
if (((i+1) mod 16) = 0) then begin
locLine := ' ' + locLine;
AddLine(locLine);
locLine := '';
end;
end;
locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
locLine := ' ' + locLine;
AddLine(locLine);
AddLine(' );' + sLineBreak);
AddLine(' UC_TABLE_2 : array[0..(256*' + IntToStr(Length(ASecondTable)) +'-1)] of Word =(');
c := High(ASecondTable);
for i := Low(ASecondTable) to c do begin
locLine := '';
for j := Low(TBmpSecondTableItem) to High(TBmpSecondTableItem) do begin
locLine := locLine + IntToStr(ASecondTable[i][j]) + ',';
if (((j+1) mod 16) = 0) then begin
if (i = c) and (j = 255) then
Delete(locLine,Length(locLine),1);
locLine := ' ' + locLine;
AddLine(locLine);
locLine := '';
end;
end;
end;
AddLine(' );' + sLineBreak);
end;
//----------------------------------
procedure Generate3lvlBmpTables(
ADest : TStream;
var AFirstTable : T3lvlBmp1Table;
var ASecondTable : T3lvlBmp2Table;
var AThirdTable : T3lvlBmp3Table
);
procedure AddLine(const ALine : ansistring);
var
buffer : ansistring;
begin
buffer := ALine + sLineBreak;
ADest.Write(buffer[1],Length(buffer));
end;
var
i, j, c : Integer;
locLine : string;
begin
AddLine('const');
AddLine(' UC_TABLE_1 : array[0..255] of Byte = (');
locLine := '';
for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
locLine := locLine + IntToStr(AFirstTable[i]) + ',';
if (((i+1) mod 16) = 0) then begin
locLine := ' ' + locLine;
AddLine(locLine);
locLine := '';
end;
end;
locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
locLine := ' ' + locLine;
AddLine(locLine);
AddLine(' );' + sLineBreak);
AddLine(' UC_TABLE_2 : array[0..' + IntToStr(Length(ASecondTable)-1) +'] of array[0..15] of Word = (');
c := High(ASecondTable);
for i := Low(ASecondTable) to c do begin
locLine := '(';
for j := Low(T3lvlBmp2TableItem) to High(T3lvlBmp2TableItem) do
locLine := locLine + IntToStr(ASecondTable[i][j]) + ',';
Delete(locLine,Length(locLine),1);
locLine := ' ' + locLine + ')';
if (i < c) then
locLine := locLine + ',';
AddLine(locLine);
end;
AddLine(' );' + sLineBreak);
AddLine(' UC_TABLE_3 : array[0..' + IntToStr(Length(AThirdTable)-1) +'] of array[0..15] of Word = (');
c := High(AThirdTable);
for i := Low(AThirdTable) to c do begin
locLine := '(';
for j := Low(T3lvlBmp3TableItem) to High(T3lvlBmp3TableItem) do
locLine := locLine + IntToStr(AThirdTable[i][j]) + ',';
Delete(locLine,Length(locLine),1);
locLine := ' ' + locLine + ')';
if (i < c) then
locLine := locLine + ',';
AddLine(locLine);
end;
AddLine(' );' + sLineBreak);
end;
function UInt24ToStr(const AValue : UInt24; const AEndian : TEndianKind): string;inline;
begin
if (AEndian = ekBig) then
Result := Format(
'(byte2 : $%s; byte1 : $%s; byte0 : $%s;)',
[ IntToHex(AValue.byte2,2), IntToHex(AValue.byte1,2),
IntToHex(AValue.byte0,2)
]
)
else
Result := Format(
'(byte0 : $%s; byte1 : $%s; byte2 : $%s;)',
[ IntToHex(AValue.byte0,2), IntToHex(AValue.byte1,2),
IntToHex(AValue.byte2,2)
]
);
end;
procedure GeneratePropTable(
ADest : TStream;
const APropList : TPropRecArray;
const AEndian : TEndianKind
);
procedure AddLine(const ALine : ansistring);
var
buffer : ansistring;
begin
buffer := ALine + sLineBreak;
ADest.Write(buffer[1],Length(buffer));
end;
var
i : Integer;
locLine : string;
p : PPropRec;
begin
AddLine('');
AddLine('const');
AddLine(' UC_PROP_REC_COUNT = ' + IntToStr(Length(APropList)) + ';');
AddLine(' UC_PROP_ARRAY : array[0..(UC_PROP_REC_COUNT-1)] of TUC_Prop = (');
p := @APropList[0];
for i := Low(APropList) to High(APropList) - 1 do begin
locLine := ' (CategoryData : ' + IntToStr(p^.CategoryData) + ';' +
' CCC : ' + IntToStr(p^.CCC) + ';' +
' NumericIndex : ' + IntToStr(p^.NumericIndex) + ';' +
' SimpleUpperCase : ' + UInt24ToStr(p^.SimpleUpperCase,AEndian) + ';' +
' SimpleLowerCase : ' + UInt24ToStr(p^.SimpleLowerCase,AEndian) + ';' +
' DecompositionID : ' + IntToStr(p^.DecompositionID) + '),';
AddLine(locLine);
Inc(p);
end;
locLine := //' (Category : TUnicodeCategory.' + GetEnumName(pti,Ord(p^.Category)) + ';' +
' (CategoryData : ' + IntToStr(p^.CategoryData) + ';' +
' CCC : ' + IntToStr(p^.CCC) + ';' +
' NumericIndex : ' + IntToStr(p^.NumericIndex) + ';' +
' SimpleUpperCase : ' + UInt24ToStr(p^.SimpleUpperCase,AEndian) + ';' +
' SimpleLowerCase : ' + UInt24ToStr(p^.SimpleLowerCase,AEndian) + ';' +
' DecompositionID : ' + IntToStr(p^.DecompositionID) + ')';
AddLine(locLine);
AddLine(' );' + sLineBreak);
end;
procedure GenerateNumericTable(
ADest : TStream;
const ANumList : TNumericValueArray;
const ACompleteUnit : Boolean
);
procedure AddLine(const ALine : ansistring);
var
buffer : ansistring;
begin
buffer := ALine + sLineBreak;
ADest.Write(buffer[1],Length(buffer));
end;
var
i : Integer;
locLine : string;
p : ^TNumericValue;
begin
if ACompleteUnit then begin
GenerateLicenceText(ADest);
AddLine('unit unicodenumtable;');
AddLine('interface');
AddLine('');
end;
AddLine('');
AddLine('const');
AddLine(' UC_NUMERIC_COUNT = ' + IntToStr(Length(ANumList)) + ';');
AddLine(' UC_NUMERIC_ARRAY : array[0..(UC_NUMERIC_COUNT-1)] of Double = (');
locLine := '';
p := @ANumList[0];
for i := Low(ANumList) to High(ANumList) - 1 do begin
locLine := locLine + FloatToStr(p^,FS) + ' ,';
if (i > 0) and ((i mod 8) = 0) then begin
AddLine(' ' + locLine);
locLine := '';
end;
Inc(p);
end;
locLine := locLine + FloatToStr(p^,FS);
AddLine(' ' + locLine);
AddLine(' );' + sLineBreak);
if ACompleteUnit then begin
AddLine('');
AddLine('implementation');
AddLine('');
AddLine('end.');
end;
end;
procedure GenerateDecompositionBookTable(
ADest : TStream;
const ABook : TDecompositionBook;
const AEndian : TEndianKind
);
procedure AddLine(const ALine : ansistring);
var
buffer : ansistring;
begin
buffer := ALine + sLineBreak;
ADest.Write(buffer[1],Length(buffer));
end;
var
i, k : Integer;
p : ^TDecompositionIndexRec;
cp : ^TUnicodeCodePoint;
cp24 : UInt24;
locLine : string;
begin
AddLine('const');
AddLine(' UC_DEC_BOOK_INDEX_LENGTH = ' + IntToStr(Length(ABook.Index)) + ';');
AddLine(' UC_DEC_BOOK_DATA_LENGTH = ' + IntToStr(Length(ABook.CodePoints)) + ';');
AddLine('type');
AddLine(' TDecompositionIndexRec = packed record');
AddLine(' StartPosition : Word;');
AddLine(' Length : Byte;');
AddLine(' end;');
AddLine(' TDecompositionBookRec = packed record');
AddLine(' Index : array[0..(UC_DEC_BOOK_INDEX_LENGTH-1)] of TDecompositionIndexRec;');
AddLine(' CodePoints : array[0..(UC_DEC_BOOK_DATA_LENGTH-1)] of UInt24;');
AddLine(' end;');
AddLine('const');
AddLine(' UC_DEC_BOOK_DATA : TDecompositionBookRec = (');
p := @ABook.Index[0];
AddLine(' Index : (// Index BEGIN');
k := 0;
locLine := ' ';
for i := Low(ABook.Index) to High(ABook.Index) - 1 do begin
locLine := locLine + '(StartPosition : ' + IntToStr(p^.StartPosition) + ';' +
' Length : ' + IntToStr(p^.Length) + '), ';
k := k + 1;
if (k >= 2) then begin
AddLine(locLine);
locLine := ' ';
k := 0;
end;
Inc(p);
end;
locLine := locLine + '(StartPosition : ' + IntToStr(p^.StartPosition) + ';' +
' Length : ' + IntToStr(p^.Length) + ')';
AddLine(locLine);
AddLine(' ); // Index END');
cp := @ABook.CodePoints[0];
AddLine(' CodePoints : (// CodePoints BEGIN');
k := 0;
locLine := ' ';
for i := Low(ABook.CodePoints) to High(ABook.CodePoints) - 1 do begin
cp24 := cp^;
locLine := locLine + Format('%s,',[UInt24ToStr(cp24,AEndian)]);
Inc(k);
if (k >= 16) then begin
AddLine(locLine);
k := 0;
locLine := ' ';
end;
Inc(cp);
end;
cp24 := cp^;
locLine := locLine + Format('%s',[UInt24ToStr(cp24,AEndian)]);
AddLine(locLine);
AddLine(' ); // CodePoints END');
AddLine(' );' + sLineBreak);
end;
procedure GenerateOutBmpTable(
ADest : TStream;
const AList : TDataLineRecArray
);
procedure AddLine(const ALine : ansistring);
var
buffer : ansistring;
begin
buffer := ALine + sLineBreak;
ADest.Write(buffer[1],Length(buffer));
end;
var
i, j : Integer;
locLine : string;
p : PDataLineRec;
begin
AddLine('');
//AddLine(' UC_PROP_REC_COUNT = ' + IntToStr(Length(APropList)) + ';');
//AddLine(' UC_PROP_ARRAY : array[0..(UC_PROP_REC_COUNT-1)] of TUC_Prop = (');
j := -1;
p := @AList[0];
for i := 0 to Length(AList) - 1 do begin
if ((p^.LineType = 0) and (p^.CodePoint >$FFFF)) or
(p^.StartCodePoint > $FFFF)
then begin
j := i;
Break;
end;
Inc(p);
end;
if (j < 0) then
exit;
for i := j to Length(AList) - 2 do begin
locLine := ' (PropID : ' + IntToStr(p^.PropID) + ';' +
' CodePoint : ' + IntToStr(p^.CodePoint) + ';' +
' RangeEnd : ' + IntToStr(p^.EndCodePoint) + '),' ;
AddLine(locLine);
Inc(p);
end;
locLine := ' (PropID : ' + IntToStr(p^.PropID) + ';' +
' CodePoint : ' + IntToStr(p^.CodePoint) + ';' +
' RangeEnd : ' + IntToStr(p^.EndCodePoint) + ')' ;
AddLine(locLine);
AddLine(' );' + sLineBreak);
end;
function Compress(const AData : TDataLineRecArray) : TDataLineRecArray;
var
k, i, locResLen : Integer;
q, p, pr : PDataLineRec;
k_end : TUnicodeCodePoint;
begin
locResLen := 1;
SetLength(Result,Length(AData));
FillChar(Result[0],Length(Result),#0);
Result[0] := AData[0];
q := @AData[0];
k := 0;
while (k < Length(AData)) do begin
if (q^.LineType = 0) then
k_end := q^.CodePoint
else
k_end := q^.EndCodePoint;
if ((k+1) = Length(AData)) then begin
i := k;
end else begin
p := @AData[k+1];
i := k +1;
while (i < (Length(AData) {- 1})) do begin
if (p^.PropID <> q^.PropID) then begin
i := i - 1;
Break;
end;
if (p^.LineType = 0) then begin
if (p^.CodePoint <> (k_end + 1)) then begin
i := i - 1;
Break;
end;
Inc(k_end);
end else begin
if (p^.StartCodePoint <> (k_end + 1)) then begin
i := i - 1;
Break;
end;
k_end := p^.EndCodePoint;
end;
Inc(i);
Inc(p);
end;
end;
{if (i = k) then begin
Result[locResLen] := q^;
Inc(locResLen);
end else begin }
p := @AData[i];
pr := @Result[locResLen];
pr^.PropID := q^.PropID;
if (q^.LineType = 0) then
pr^.StartCodePoint := q^.CodePoint
else
pr^.StartCodePoint := q^.StartCodePoint;
pr^.LineType := 1;
if (p^.LineType = 0) then
pr^.EndCodePoint := p^.CodePoint
else
pr^.EndCodePoint := p^.EndCodePoint;
Inc(locResLen);
//end;
k := i + 1;
if (k = Length(AData)) then
Break;
q := @AData[k];
end;
SetLength(Result,locResLen);
end;
procedure ParseUCAFile(
ADataAStream : TMemoryStream;
var ABook : TUCA_DataBook
);
const
LINE_LENGTH = 1024;
DATA_LENGTH = 25000;
var
p : PAnsiChar;
actualDataLen : Integer;
bufferLength, bufferPos, lineLength, linePos : Integer;
line : ansistring;
function NextLine() : Boolean;
var
locOldPos : Integer;
locOldPointer : PAnsiChar;
begin
Result := False;
locOldPointer := p;
locOldPos := bufferPos;
while (bufferPos < bufferLength) and (p^ <> #10) do begin
Inc(p);
Inc(bufferPos);
end;
if (locOldPos = bufferPos) and (p^ = #10) then begin
lineLength := 0;
Inc(p);
Inc(bufferPos);
linePos := 1;
Result := True;
end else if (locOldPos < bufferPos) then begin
lineLength := (bufferPos - locOldPos) + 1;
Move(locOldPointer^,line[1],lineLength);
if (p^ = #10) then begin
Dec(lineLength);
Inc(p);
Inc(bufferPos);
end;
linePos := 1;
Result := True;
end;
end;
procedure SkipSpace();
begin
while (linePos < lineLength) and (line[linePos] in [' ',#9]) do
Inc(linePos);
end;
function NextToken() : ansistring;
const C_SEPARATORS = [';','#','.','[',']','*','@'];
var
k : Integer;
begin
SkipSpace();
k := linePos;
if (linePos <= lineLength) and (line[linePos] in C_SEPARATORS) then begin
Result := line[linePos];
Inc(linePos);
exit;
end;
while (linePos <= lineLength) and not(line[linePos] in (C_SEPARATORS+[' '])) do
Inc(linePos);
if (linePos > k) then begin
if (line[Min(linePos,lineLength)] in C_SEPARATORS) then
Result := Copy(line,k,(linePos-k))
else
Result := Copy(line,k,(linePos-k+1));
Result := Trim(Result);
end else begin
Result := '';
end;
end;
procedure CheckToken(const AToken : string);
var
a, b : string;
begin
a := LowerCase(Trim(AToken));
b := LowerCase(Trim(NextToken()));
if (a <> b) then
raise Exception.CreateFmt('Expected token "%s" but found "%s".',[a,b]);
end;
function ReadWeightBlock(var ADest : TUCA_WeightRec) : Boolean;
var
s :AnsiString;
k : Integer;
begin
Result := False;
s := NextToken();
if (s <> '[') then
exit;
s := NextToken();
if (s = '.') then
ADest.Variable := False
else begin
if (s <> '*') then
raise Exception.CreateFmt('Expected "%s" but found "%s".',['*',s]);
ADest.Variable := True;
end;
ADest.Weights[0] := StrToInt('$'+NextToken());
for k := 1 to 3 do begin
CheckToken('.');
ADest.Weights[k] := StrToInt('$'+NextToken());
end;
CheckToken(']');
Result := True;
end;
procedure ParseHeaderVar();
var
s,ss : string;
k : Integer;
begin
s := NextToken();
if (s = 'version') then begin
ss := '';
while True do begin
s := NextToken();
if (s = '') then
Break;
ss := ss + s;
end;
ABook.Version := ss;
end else if (s = 'variable') then begin
if (s = 'blanked') then
ABook.VariableWeight := ucaBlanked
else if (s = 'non-ignorable') then
ABook.VariableWeight := ucaNonIgnorable
else if (s = 'shifted') then
ABook.VariableWeight := ucaShifted
else if (s = 'shift-trimmed') then
ABook.VariableWeight := ucaShiftedTrimmed
else if (s = 'ignoresp') then
ABook.VariableWeight := ucaIgnoreSP
else
raise Exception.CreateFmt('Unknown "@variable" type : "%s".',[s]);
end else if (s = 'backwards') or (s = 'forwards') then begin
ss := s;
s := NextToken();
k := StrToInt(s);
if (k < 1) or (k > 4) then
raise Exception.CreateFmt('Invalid "%s" position : %d.',[ss,s]);
ABook.Backwards[k] := (s = 'backwards');
end;
end;
procedure ParseLine();
var
locData : ^TUCA_LineRec;
s : ansistring;
kc : Integer;
begin
if (Length(ABook.Lines) <= actualDataLen) then
SetLength(ABook.Lines,Length(ABook.Lines)*2);
locData := @ABook.Lines[actualDataLen];
s := NextToken();
if (s = '') or (s[1] = '#') then
exit;
if (s[1] = '@') then begin
ParseHeaderVar();
exit;
end;
SetLength(locData^.CodePoints,10);
locData^.CodePoints[0] := StrToInt('$'+s);
kc := 1;
while True do begin
s := Trim(NextToken());
if (s = '') then
exit;
if (s = ';') then
Break;
locData^.CodePoints[kc] := StrToInt('$'+s);
Inc(kc);
end;
if (kc = 0) then
exit;
SetLength(locData^.CodePoints,kc);
SetLength(locData^.Weights,24);
kc := 0;
while ReadWeightBlock(locData^.Weights[kc]) do begin
Inc(kc);
end;
SetLength(locData^.Weights,kc);
Inc(actualDataLen);
end;
procedure Prepare();
var
k : Integer;
begin
ABook.VariableWeight := ucaShifted;
for k := Low(ABook.Backwards) to High(ABook.Backwards) do
ABook.Backwards[k] := False;
SetLength(ABook.Lines,DATA_LENGTH);
actualDataLen := 0;
bufferLength := ADataAStream.Size;
bufferPos := 0;
p := ADataAStream.Memory;
lineLength := 0;
SetLength(line,LINE_LENGTH);
end;
begin
Prepare();
while NextLine() do
ParseLine();
SetLength(ABook.Lines,actualDataLen);
end;
procedure Dump(X : array of TUnicodeCodePoint; const ATitle : string = '');
var
i : Integer;
begin
Write(ATitle, ' ');
for i := 0 to Length(X) - 1 do
Write(X[i],' ');
WriteLn();
end;
function IsGreaterThan(A, B : PUCA_LineRec) : Integer;
var
i, hb : Integer;
begin
if (A=B) then
exit(0);
Result := 1;
hb := Length(B^.CodePoints) - 1;
for i := 0 to Length(A^.CodePoints) - 1 do begin
if (i > hb) then
exit;
if (A^.CodePoints[i] < B^.CodePoints[i]) then
exit(-1);
if (A^.CodePoints[i] > B^.CodePoints[i]) then
exit(1);
end;
if (Length(A^.CodePoints) = Length(B^.CodePoints)) then
exit(0);
exit(-1);
end;
Procedure QuickSort(var AList: TUCA_DataBookIndex; L, R : Longint;
ABook : PUCA_DataBook);
var
I, J : Longint;
P, Q : Integer;
begin
repeat
I := L;
J := R;
P := AList[ (L + R) div 2 ];
repeat
while IsGreaterThan(@ABook^.Lines[P], @ABook^.Lines[AList[i]]) > 0 do
I := I + 1;
while IsGreaterThan(@ABook^.Lines[P], @ABook^.Lines[AList[J]]) < 0 do
J := J - 1;
If I <= J then
begin
Q := AList[I];
AList[I] := AList[J];
AList[J] := Q;
I := I + 1;
J := J - 1;
end;
until I > J;
// sort the smaller range recursively
// sort the bigger range via the loop
// Reasons: memory usage is O(log(n)) instead of O(n) and loop is faster than recursion
if J - L < R - I then
begin
if L < J then
QuickSort(AList, L, J, ABook);
L := I;
end
else
begin
if I < R then
QuickSort(AList, I, R, ABook);
R := J;
end;
until L >= R;
end;
function CreateIndex(ABook : PUCA_DataBook) : TUCA_DataBookIndex;
var
r : TUCA_DataBookIndex;
i, c : Integer;
begin
c := Length(ABook^.Lines);
SetLength(r,c);
for i := 0 to c - 1 do
r[i] := i;
QuickSort(r,0,c-1,ABook);
Result := r;
end;
function ConstructContextTree(
const AContext : PUCA_LineContextRec;
var ADestBuffer;
const ADestBufferLength : Cardinal
) : PUCA_PropItemContextTreeRec;forward;
function ConstructItem(
AItem : PUCA_PropItemRec;
ACodePoint : Cardinal;
AValid : Byte;
AChildCount : Byte;
const AWeights : array of TUCA_WeightRec;
const AStoreCP : Boolean;
const AContext : PUCA_LineContextRec;
const ADeleted : Boolean
) : Cardinal;
var
i : Integer;
p : PUCA_PropItemRec;
pw : PUCA_PropWeights;
pb : PByte;
hasContext : Boolean;
contextTree : PUCA_PropItemContextTreeRec;
wl : Integer;
begin
p := AItem;
p^.Size := 0;
p^.Flags := 0;
p^.WeightLength := 0;
SetBit(p^.Flags,AItem^.FLAG_VALID,(AValid <> 0));
p^.ChildCount := AChildCount;
hasContext := (AContext <> nil) and (Length(AContext^.Data) > 0);
if hasContext then
wl := 0
else
wl := Length(AWeights);
p^.WeightLength := wl;
if (wl = 0) then begin
Result := SizeOf(TUCA_PropItemRec);
if ADeleted then
SetBit(AItem^.Flags,AItem^.FLAG_DELETION,True);
end else begin
Result := SizeOf(TUCA_PropItemRec) + (wl*SizeOf(TUCA_PropWeights));
pb := PByte(PtrUInt(p) + SizeOf(TUCA_PropItemRec));
Unaligned(PWord(pb)^) := AWeights[0].Weights[0];
pb := pb + 2;
if (AWeights[0].Weights[1] > High(Byte)) then begin
Unaligned(PWord(pb)^) := AWeights[0].Weights[1];
pb := pb + 2;
end else begin
SetBit(p^.Flags,p^.FLAG_COMPRESS_WEIGHT_1,True);
pb^ := AWeights[0].Weights[1];
pb := pb + 1;
Result := Result - 1;
end;
if (AWeights[0].Weights[2] > High(Byte)) then begin
Unaligned(PWord(pb)^) := AWeights[0].Weights[2];
pb := pb + 2;
end else begin
SetBit(p^.Flags,p^.FLAG_COMPRESS_WEIGHT_2,True);
pb^ := AWeights[0].Weights[2];
pb := pb + 1;
Result := Result - 1;
end;
pw := PUCA_PropWeights(pb);
for i := 1 to wl - 1 do begin
pw^.Weights[0] := AWeights[i].Weights[0];
pw^.Weights[1] := AWeights[i].Weights[1];
pw^.Weights[2] := AWeights[i].Weights[2];
//pw^.Variable := BoolToByte(AWeights[i].Variable);
Inc(pw);
end;
end;
hasContext := (AContext <> nil) and (Length(AContext^.Data) > 0);
if AStoreCP or hasContext then begin
Unaligned(PUInt24(PtrUInt(AItem)+Result)^) := ACodePoint;
Result := Result + SizeOf(UInt24);
SetBit(AItem^.Flags,AItem^.FLAG_CODEPOINT,True);
end;
if hasContext then begin
contextTree := ConstructContextTree(AContext,Unaligned(Pointer(PtrUInt(AItem)+Result)^),MaxInt);
Result := Result + Cardinal(contextTree^.Size);
SetBit(AItem^.Flags,AItem^.FLAG_CONTEXTUAL,True);
end;
p^.Size := Result;
end;
function CalcCharChildCount(
const ASearchStartPos : Integer;
const ALinePos : Integer;
const ABookLines : PUCA_LineRec;
const AMaxLength : Integer;
const ABookIndex : TUCA_DataBookIndex;
out ALineCount : Word
) : Byte;
var
locLinePos : Integer;
p : PUCA_LineRec;
procedure IncP();
begin
Inc(locLinePos);
p := @ABookLines[ABookIndex[locLinePos]];
end;
var
i, locTargetLen, locTargetBufferSize, r : Integer;
locTarget : array[0..127] of Cardinal;
locLastChar : Cardinal;
begin
locLinePos := ALinePos;
p := @ABookLines[ABookIndex[locLinePos]];
locTargetLen := ASearchStartPos;
locTargetBufferSize := (locTargetLen*SizeOf(Cardinal));
Move(p^.CodePoints[0],locTarget[0],locTargetBufferSize);
if (Length(p^.CodePoints) = ASearchStartPos) then begin
r := 0;
locLastChar := High(Cardinal);
end else begin
r := 1;
locLastChar := p^.CodePoints[ASearchStartPos];
end;
i := 1;
while (i < AMaxLength) do begin
IncP();
if (Length(p^.CodePoints) < locTargetLen) then
Break;
if not CompareMem(@locTarget[0],@p^.CodePoints[0],locTargetBufferSize) then
Break;
if (p^.CodePoints[ASearchStartPos] <> locLastChar) then begin
Inc(r);
locLastChar := p^.CodePoints[ASearchStartPos];
end;
Inc(i);
end;
ALineCount := i;
Result := r;
end;
function BuildTrie(
const ALinePos : Integer;
const ABookLines : PUCA_LineRec;
const AMaxLength : Integer;
const ABookIndex : TUCA_DataBookIndex
) : PTrieNode;
var
p : PUCA_LineRec;
root : PTrieNode;
ki, k, i : Integer;
key : array of TKeyType;
begin
k := ABookIndex[ALinePos];
p := @ABookLines[k];
if (Length(p^.CodePoints) = 1) then
root := CreateNode(p^.CodePoints[0],k)
else
root := CreateNode(p^.CodePoints[0]);
for i := ALinePos to ALinePos + AMaxLength - 1 do begin
k := ABookIndex[i];
p := @ABookLines[k];
if (Length(p^.CodePoints) = 1) then begin
InsertWord(root,p^.CodePoints[0],k);
end else begin
SetLength(key,Length(p^.CodePoints));
for ki := 0 to Length(p^.CodePoints) - 1 do
key[ki] := p^.CodePoints[ki];
InsertWord(root,key,k);
end;
end;
Result := root;
end;
function BoolToByte(AValue : Boolean): Byte;inline;
begin
if AValue then
Result := 1
else
Result := 0;
end;
function InternalConstructFromTrie(
const ATrie : PTrieNode;
const AItem : PUCA_PropItemRec;
const ALines : PUCA_LineRec;
const AStoreCp : Boolean
) : Cardinal;
var
i : Integer;
size : Cardinal;
p : PUCA_PropItemRec;
n : PTrieNode;
begin
if (ATrie = nil) then
exit(0);
p := AItem;
n := ATrie;
if n^.DataNode then
size := ConstructItem(p,n^.Key,1,n^.ChildCount,ALines[n^.Data].Weights,AStoreCp,@(ALines[n^.Data].Context),ALines[n^.Data].Deleted)
else
size := ConstructItem(p,n^.Key,0,n^.ChildCount,[],AStoreCp,nil,False);
Result := size;
if (n^.ChildCount > 0) then begin
for i := 0 to n^.ChildCount - 1 do begin
p := PUCA_PropItemRec(PtrUInt(p) + size);
size := InternalConstructFromTrie(n^.Children[i],p,ALines,True);
Result := Result + size;
end;
end;
AItem^.Size := Result;
end;
function ConstructFromTrie(
const ATrie : PTrieNode;
const AItem : PUCA_PropItemRec;
const ALines : PUCA_LineRec
) : Integer;
begin
Result := InternalConstructFromTrie(ATrie,AItem,ALines,False);
end;
procedure MakeUCA_Props(
ABook : PUCA_DataBook;
out AProps : PUCA_PropBook
);
var
propIndexCount : Integer;
procedure CapturePropIndex(AItem : PUCA_PropItemRec; ACodePoint : Cardinal);
begin
AProps^.Index[propIndexCount].CodePoint := ACodePoint;
AProps^.Index[propIndexCount].Position := PtrUInt(AItem) - PtrUInt(AProps^.Items);
propIndexCount := propIndexCount + 1;
end;
var
locIndex : TUCA_DataBookIndex;
i, c, k, kc : Integer;
p, p1, p2 : PUCA_PropItemRec;
lines, pl1, pl2 : PUCA_LineRec;
childCount, lineCount : Word;
size : Cardinal;
trieRoot : PTrieNode;
MaxChildCount, MaxSize : Cardinal;
childList : array of PUCA_PropItemRec;
begin
locIndex := CreateIndex(ABook);
i := Length(ABook^.Lines);
i := 30 * i * (SizeOf(TUCA_PropItemRec) + SizeOf(TUCA_PropWeights));
AProps := AllocMem(SizeOf(TUCA_DataBook));
AProps^.ItemSize := i;
AProps^.Items := AllocMem(i);
propIndexCount := 0;
SetLength(AProps^.Index,Length(ABook^.Lines));
p := AProps^.Items;
lines := @ABook^.Lines[0];
c := Length(locIndex);
i := 0;
MaxChildCount := 0; MaxSize := 0;
while (i < (c-1)) do begin
pl1 := @lines[locIndex[i]];
if not pl1^.Stored then begin
i := i + 1;
Continue;
end;
pl2 := @lines[locIndex[i+1]];
if (pl1^.CodePoints[0] <> pl2^.CodePoints[0]) then begin
if (Length(pl1^.CodePoints) = 1) then begin
size := ConstructItem(p,pl1^.CodePoints[0],1,0,pl1^.Weights,False,@pl1^.Context,pl1^.Deleted);
CapturePropIndex(p,pl1^.CodePoints[0]);
p := PUCA_PropItemRec(PtrUInt(p) + size);
if (size > MaxSize) then
MaxSize := size;
end else begin
kc := Length(pl1^.CodePoints);
SetLength(childList,kc);
for k := 0 to kc - 2 do begin
size := ConstructItem(p,pl1^.CodePoints[k],0,1,[],(k>0),nil,False);
if (k = 0) then
CapturePropIndex(p,pl1^.CodePoints[k]);
childList[k] := p;
p := PUCA_PropItemRec(PtrUInt(p) + size);
end;
size := ConstructItem(p,pl1^.CodePoints[kc-1],1,0,pl1^.Weights,True,@pl1^.Context,pl1^.Deleted);
childList[kc-1] := p;
p := PUCA_PropItemRec(PtrUInt(p) + size);
for k := kc - 2 downto 0 do begin
p1 := childList[k];
p2 := childList[k+1];
p1^.Size := p1^.Size + p2^.Size;
end;
if (p1^.Size > MaxSize) then
MaxSize := p1^.Size;
end;
lineCount := 1;
end else begin
childCount := CalcCharChildCount(1,i,lines,c,locIndex,lineCount);
if (childCount < 1) then
raise Exception.CreateFmt('Expected "child count > 1" but found %d.',[childCount]);
if (lineCount < 2) then
raise Exception.CreateFmt('Expected "line count > 2" but found %d.',[lineCount]);
if (childCount > MaxChildCount) then
MaxChildCount := childCount;
trieRoot := BuildTrie(i,lines,lineCount,locIndex);
size := ConstructFromTrie(trieRoot,p,lines);
CapturePropIndex(p,pl1^.CodePoints[0]);
FreeNode(trieRoot);
p := PUCA_PropItemRec(PtrUInt(p) + size);
if (size > MaxSize) then
MaxSize := size;
end;
i := i + lineCount;
end;
if (i = (c-1)) then begin
pl1 := @lines[locIndex[i]];
if (Length(pl1^.CodePoints) = 1) then begin
size := ConstructItem(p,pl1^.CodePoints[0],1,0,pl1^.Weights,False,@pl1^.Context,pl1^.Deleted);
CapturePropIndex(p,pl1^.CodePoints[0]);
p := PUCA_PropItemRec(PtrUInt(p) + size);
if (size > MaxSize) then
MaxSize := size;
end else begin
kc := Length(pl1^.CodePoints);
SetLength(childList,kc);
for k := 0 to kc - 2 do begin
size := ConstructItem(p,pl1^.CodePoints[k],0,1,[],(k>0),@pl1^.Context,pl1^.Deleted);
if (k = 0) then
CapturePropIndex(p,pl1^.CodePoints[0]);
childList[k] := p;
p := PUCA_PropItemRec(PtrUInt(p) + size);
end;
size := ConstructItem(p,pl1^.CodePoints[kc-1],1,0,pl1^.Weights,True,@pl1^.Context,pl1^.Deleted);
childList[kc-1] := p;
p := PUCA_PropItemRec(PtrUInt(p) + size);
for i := kc - 2 downto 0 do begin
p1 := childList[i];
p2 := childList[i+1];
p1^.Size := p1^.Size + p2^.Size;
end;
if (size > MaxSize) then
MaxSize := size;
end;
end;
c := Int64(PtrUInt(p)) - Int64(PtrUInt(AProps^.Items));
ReAllocMem(AProps^.Items,c);
AProps^.ItemSize := c;
SetLength(AProps^.Index,propIndexCount);
AProps^.ItemsOtherEndian := AllocMem(AProps^.ItemSize);
ReverseFromNativeEndian(AProps^.Items,AProps^.ItemSize,AProps^.ItemsOtherEndian);
k := 0;
c := High(Word);
for i := 0 to Length(ABook^.Lines) - 1 do begin
if (Length(ABook^.Lines[i].Weights) > 0) then begin
if (ABook^.Lines[i].Weights[0].Variable) then begin
if (ABook^.Lines[i].Weights[0].Weights[0] > k) then
k := ABook^.Lines[i].Weights[0].Weights[0];
if (ABook^.Lines[i].Weights[0].Weights[0] < c) then
c := ABook^.Lines[i].Weights[0].Weights[0];
end;
end;
end;
AProps^.VariableHighLimit := k;
AProps^.VariableLowLimit := c;
end;
procedure FreeUcaBook(var ABook : PUCA_PropBook);
var
p : PUCA_PropBook;
begin
if (ABook = nil) then
exit;
p := ABook;
ABook := nil;
p^.Index := nil;
FreeMem(p^.Items,p^.ItemSize);
FreeMem(p,SizeOf(p^));
end;
function IndexOf(const ACodePoint : Cardinal; APropBook : PUCA_PropBook): Integer;overload;
var
i : Integer;
begin
for i := 0 to Length(APropBook^.Index) - 1 do begin
if (ACodePoint = APropBook^.Index[i].CodePoint) then
exit(i);
end;
Result := -1;
end;
type
PucaBmpSecondTableItem = ^TucaBmpSecondTableItem;
function IndexOf(
const AItem : PucaBmpSecondTableItem;
const ATable : TucaBmpSecondTable;
const ATableActualLength : Integer
) : Integer;overload;
var
i : Integer;
p : PucaBmpSecondTableItem;
begin
Result := -1;
if (ATableActualLength > 0) then begin
p := @ATable[0];
for i := 0 to ATableActualLength - 1 do begin
if CompareMem(p,AItem,SizeOf(TucaBmpSecondTableItem)) then begin
Result := i;
Break;
end;
Inc(p);
end;
end;
end;
procedure MakeUCA_BmpTables(
var AFirstTable : TucaBmpFirstTable;
var ASecondTable : TucaBmpSecondTable;
const APropBook : PUCA_PropBook
);
var
locLowByte, locHighByte : Byte;
locTableItem : TucaBmpSecondTableItem;
locCP : TUnicodeCodePoint;
i, locSecondActualLen : Integer;
k : Integer;
begin
SetLength(ASecondTable,120);
locSecondActualLen := 0;
for locHighByte := 0 to 255 do begin
FillChar(locTableItem,SizeOf(locTableItem),#0);
for locLowByte := 0 to 255 do begin
locCP := (locHighByte * 256) + locLowByte;
k := IndexOf(locCP,APropBook);
if (k = -1) then
k := 0
else
k := APropBook^.Index[k].Position + 1;
locTableItem[locLowByte] := k;
end;
i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen);
if (i = -1) then begin
if (locSecondActualLen = Length(ASecondTable)) then
SetLength(ASecondTable,locSecondActualLen + 50);
i := locSecondActualLen;
ASecondTable[i] := locTableItem;
Inc(locSecondActualLen);
end;
AFirstTable[locHighByte] := i;
end;
SetLength(ASecondTable,locSecondActualLen);
end;
function ToUCS4(const AHighS, ALowS : Word) : TUnicodeCodePoint; inline;
begin
//copied from utf16toutf32
Result := (UCS4Char(AHighS)-$d800) shl 10 + (UCS4Char(ALowS)-$dc00) + $10000;
end;
procedure FromUCS4(const AValue : TUnicodeCodePoint; var AHighS, ALowS : Word);
begin
AHighS := Word((AValue - $10000) shr 10 + $d800);
ALowS := Word((AValue - $10000) and $3ff + $dc00);
end;
type
PucaOBmpSecondTableItem = ^TucaOBmpSecondTableItem;
function IndexOf(
const AItem : PucaOBmpSecondTableItem;
const ATable : TucaOBmpSecondTable;
const ATableActualLength : Integer
) : Integer;overload;
var
i : Integer;
p : PucaOBmpSecondTableItem;
begin
Result := -1;
if (ATableActualLength > 0) then begin
p := @ATable[0];
for i := 0 to ATableActualLength - 1 do begin
if CompareMem(p,AItem,SizeOf(TucaOBmpSecondTableItem)) then begin
Result := i;
Break;
end;
Inc(p);
end;
end;
end;
procedure MakeUCA_OBmpTables(
var AFirstTable : TucaOBmpFirstTable;
var ASecondTable : TucaOBmpSecondTable;
const APropBook : PUCA_PropBook
);
var
locLowByte, locHighByte : Word;
locTableItem : TucaOBmpSecondTableItem;
locCP : TUnicodeCodePoint;
i, locSecondActualLen : Integer;
k : Integer;
begin
if (Length(ASecondTable) = 0) then
SetLength(ASecondTable,2000);
locSecondActualLen := 0;
for locHighByte := 0 to HIGH_SURROGATE_COUNT - 1 do begin
FillChar(locTableItem,SizeOf(locTableItem),#0);
for locLowByte := 0 to LOW_SURROGATE_COUNT - 1 do begin
locCP := ToUCS4(HIGH_SURROGATE_BEGIN + locHighByte,LOW_SURROGATE_BEGIN + locLowByte);
k := IndexOf(locCP,APropBook);
if (k = -1) then
k := 0
else
k := APropBook^.Index[k].Position + 1;
locTableItem[locLowByte] := k;
end;
i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen);
if (i = -1) then begin
if (locSecondActualLen = Length(ASecondTable)) then
SetLength(ASecondTable,locSecondActualLen + 50);
i := locSecondActualLen;
ASecondTable[i] := locTableItem;
Inc(locSecondActualLen);
end;
AFirstTable[locHighByte] := i;
end;
SetLength(ASecondTable,locSecondActualLen);
end;
function GetPropPosition(
const AHighS,
ALowS : Word;
const AFirstTable : PucaOBmpFirstTable;
const ASecondTable : PucaOBmpSecondTable
): Integer;inline;overload;
begin
Result := ASecondTable^[AFirstTable^[AHighS-HIGH_SURROGATE_BEGIN]][ALowS-LOW_SURROGATE_BEGIN] - 1;
end;
procedure GenerateUCA_Head(
ADest : TStream;
ABook : PUCA_DataBook;
AProps : PUCA_PropBook
);
procedure AddLine(const ALine : ansistring);
var
buffer : ansistring;
begin
buffer := ALine + sLineBreak;
ADest.Write(buffer[1],Length(buffer));
end;
begin
AddLine('const');
AddLine(' VERSION_STRING = ' + QuotedStr(ABook^.Version) + ';');
AddLine(' VARIABLE_LOW_LIMIT = ' + IntToStr(AProps^.VariableLowLimit) + ';');
AddLine(' VARIABLE_HIGH_LIMIT = ' + IntToStr(AProps^.VariableHighLimit) + ';');
AddLine(' VARIABLE_WEIGHT = ' + IntToStr(Ord(ABook^.VariableWeight)) + ';');
AddLine(' BACKWARDS_0 = ' + BoolToStr(ABook^.Backwards[0],'True','False') + ';');
AddLine(' BACKWARDS_1 = ' + BoolToStr(ABook^.Backwards[1],'True','False') + ';');
AddLine(' BACKWARDS_2 = ' + BoolToStr(ABook^.Backwards[2],'True','False') + ';');
AddLine(' BACKWARDS_3 = ' + BoolToStr(ABook^.Backwards[3],'True','False') + ';');
AddLine(' PROP_COUNT = ' + IntToStr(Ord(AProps^.ItemSize)) + ';');
AddLine('');
end;
procedure GenerateUCA_BmpTables(
AStream,
ANativeEndianStream,
ANonNativeEndianStream : TStream;
var AFirstTable : TucaBmpFirstTable;
var ASecondTable : TucaBmpSecondTable
);
procedure AddLine(AOut : TStream; const ALine : ansistring);
var
buffer : ansistring;
begin
buffer := ALine + sLineBreak;
AOut.Write(buffer[1],Length(buffer));
end;
var
i, j, c : Integer;
locLine : string;
value : UInt24;
begin
AddLine(AStream,'const');
AddLine(AStream,' UCA_TABLE_1 : array[0..255] of Byte = (');
locLine := '';
for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
locLine := locLine + IntToStr(AFirstTable[i]) + ',';
if (((i+1) mod 16) = 0) then begin
locLine := ' ' + locLine;
AddLine(AStream,locLine);
locLine := '';
end;
end;
locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
locLine := ' ' + locLine;
AddLine(AStream,locLine);
AddLine(AStream,' );' + sLineBreak);
AddLine(ANativeEndianStream,'const');
AddLine(ANativeEndianStream,' UCA_TABLE_2 : array[0..(256*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =(');
c := High(ASecondTable);
for i := Low(ASecondTable) to c do begin
locLine := '';
for j := Low(TucaBmpSecondTableItem) to High(TucaBmpSecondTableItem) do begin
value := ASecondTable[i][j];
locLine := locLine + UInt24ToStr(value,ENDIAN_NATIVE) + ',';
if (((j+1) mod 2) = 0) then begin
if (i = c) and (j = 255) then
Delete(locLine,Length(locLine),1);
locLine := ' ' + locLine;
AddLine(ANativeEndianStream,locLine);
locLine := '';
end;
end;
end;
AddLine(ANativeEndianStream,' );' + sLineBreak);
AddLine(ANonNativeEndianStream,'const');
AddLine(ANonNativeEndianStream,' UCA_TABLE_2 : array[0..(256*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =(');
c := High(ASecondTable);
for i := Low(ASecondTable) to c do begin
locLine := '';
for j := Low(TucaBmpSecondTableItem) to High(TucaBmpSecondTableItem) do begin
value := ASecondTable[i][j];
locLine := locLine + UInt24ToStr(value,ENDIAN_NON_NATIVE) + ',';
if (((j+1) mod 2) = 0) then begin
if (i = c) and (j = 255) then
Delete(locLine,Length(locLine),1);
locLine := ' ' + locLine;
AddLine(ANonNativeEndianStream,locLine);
locLine := '';
end;
end;
end;
AddLine(ANonNativeEndianStream,' );' + sLineBreak);
end;
procedure GenerateBinaryUCA_BmpTables(
ANativeEndianStream,
ANonNativeEndianStream : TStream;
var AFirstTable : TucaBmpFirstTable;
var ASecondTable : TucaBmpSecondTable
);
var
i, j : Integer;
value : UInt24;
begin
ANativeEndianStream.Write(AFirstTable[0],Length(AFirstTable));
ANonNativeEndianStream.Write(AFirstTable[0],Length(AFirstTable));
for i := Low(ASecondTable) to High(ASecondTable) do begin
for j := Low(TucaBmpSecondTableItem) to High(TucaBmpSecondTableItem) do begin
value := ASecondTable[i][j];
ANativeEndianStream.Write(value,SizeOf(value));
ReverseBytes(value,SizeOf(value));
ANonNativeEndianStream.Write(value,SizeOf(value));
end;
end;
end;
procedure GenerateUCA_PropTable(
// WARNING : files must be generated for each endianess (Little / Big)
ADest : TStream;
const APropBook : PUCA_PropBook;
const AEndian : TEndianKind
);
procedure AddLine(const ALine : ansistring);
var
buffer : ansistring;
begin
buffer := ALine + sLineBreak;
ADest.Write(buffer[1],Length(buffer));
end;
var
i, c : Integer;
locLine : string;
p : PByte;
begin
c := APropBook^.ItemSize;
AddLine('const');
AddLine(' UCA_PROPS : array[0..' + IntToStr(c-1) + '] of Byte = (');
locLine := '';
if (AEndian = ENDIAN_NATIVE) then
p := PByte(APropBook^.Items)
else
p := PByte(APropBook^.ItemsOtherEndian);
for i := 0 to c - 2 do begin
locLine := locLine + IntToStr(p[i]) + ',';
if (((i+1) mod 60) = 0) then begin
locLine := ' ' + locLine;
AddLine(locLine);
locLine := '';
end;
end;
locLine := locLine + IntToStr(p[c-1]);
locLine := ' ' + locLine;
AddLine(locLine);
AddLine(' );' + sLineBreak);
end;
procedure GenerateBinaryUCA_PropTable(
// WARNING : files must be generated for each endianess (Little / Big)
ANativeEndianStream,
ANonNativeEndianStream : TStream;
const APropBook : PUCA_PropBook
);
begin
ANativeEndianStream.Write(APropBook^.Items^,APropBook^.ItemSize);
ANonNativeEndianStream.Write(APropBook^.ItemsOtherEndian^,APropBook^.ItemSize);
end;
procedure GenerateUCA_OBmpTables(
AStream,
ANativeEndianStream,
ANonNativeEndianStream : TStream;
var AFirstTable : TucaOBmpFirstTable;
var ASecondTable : TucaOBmpSecondTable
);
procedure AddLine(AOut : TStream; const ALine : ansistring);
var
buffer : ansistring;
begin
buffer := ALine + sLineBreak;
AOut.Write(buffer[1],Length(buffer));
end;
var
i, j, c : Integer;
locLine : string;
value : UInt24;
begin
AddLine(AStream,'const');
AddLine(AStream,' UCAO_TABLE_1 : array[0..' + IntToStr(HIGH_SURROGATE_COUNT-1) + '] of Word = (');
locLine := '';
for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
locLine := locLine + IntToStr(AFirstTable[i]) + ',';
if (((i+1) mod 16) = 0) then begin
locLine := ' ' + locLine;
AddLine(AStream,locLine);
locLine := '';
end;
end;
locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
locLine := ' ' + locLine;
AddLine(AStream,locLine);
AddLine(AStream,' );' + sLineBreak);
AddLine(ANativeEndianStream,' UCAO_TABLE_2 : array[0..('+IntToStr(LOW_SURROGATE_COUNT)+'*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =(');
c := High(ASecondTable);
for i := Low(ASecondTable) to c do begin
locLine := '';
for j := Low(TucaOBmpSecondTableItem) to High(TucaOBmpSecondTableItem) do begin
value := ASecondTable[i][j];
locLine := locLine + UInt24ToStr(value,ENDIAN_NATIVE) + ',';
if (((j+1) mod 2) = 0) then begin
if (i = c) and (j = High(TucaOBmpSecondTableItem)) then
Delete(locLine,Length(locLine),1);
locLine := ' ' + locLine;
AddLine(ANativeEndianStream,locLine);
locLine := '';
end;
end;
end;
AddLine(ANativeEndianStream,' );' + sLineBreak);
AddLine(ANonNativeEndianStream,' UCAO_TABLE_2 : array[0..('+IntToStr(LOW_SURROGATE_COUNT)+'*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =(');
c := High(ASecondTable);
for i := Low(ASecondTable) to c do begin
locLine := '';
for j := Low(TucaOBmpSecondTableItem) to High(TucaOBmpSecondTableItem) do begin
value := ASecondTable[i][j];
locLine := locLine + UInt24ToStr(value,ENDIAN_NON_NATIVE) + ',';
if (((j+1) mod 2) = 0) then begin
if (i = c) and (j = High(TucaOBmpSecondTableItem)) then
Delete(locLine,Length(locLine),1);
locLine := ' ' + locLine;
AddLine(ANonNativeEndianStream,locLine);
locLine := '';
end;
end;
end;
AddLine(ANonNativeEndianStream,' );' + sLineBreak);
end;
procedure GenerateBinaryUCA_OBmpTables(
ANativeEndianStream,
ANonNativeEndianStream : TStream;
var AFirstTable : TucaOBmpFirstTable;
var ASecondTable : TucaOBmpSecondTable
);
var
i, j : Integer;
locLine : string;
wordValue : Word;
value : UInt24;
begin
for i := Low(AFirstTable) to High(AFirstTable) do begin
wordValue := AFirstTable[i];
ANativeEndianStream.Write(wordValue,SizeOf(wordValue));
ReverseBytes(wordValue,SizeOf(wordValue));
ANonNativeEndianStream.Write(wordValue,SizeOf(wordValue));
end;
for i := Low(ASecondTable) to High(ASecondTable) do begin
for j := Low(TucaOBmpSecondTableItem) to High(TucaOBmpSecondTableItem) do begin
value := ASecondTable[i][j];
ANativeEndianStream.Write(value,SizeOf(value));
ReverseBytes(value,SizeOf(value));
ANonNativeEndianStream.Write(value,SizeOf(value));
end;
end;
end;
type
POBmpSecondTableItem = ^TOBmpSecondTableItem;
function IndexOf(
const AItem : POBmpSecondTableItem;
const ATable : TOBmpSecondTable;
const ATableActualLength : Integer
) : Integer;overload;
var
i : Integer;
p : POBmpSecondTableItem;
begin
Result := -1;
if (ATableActualLength > 0) then begin
p := @ATable[0];
for i := 0 to ATableActualLength - 1 do begin
if CompareMem(p,AItem,SizeOf(TOBmpSecondTableItem)) then begin
Result := i;
Break;
end;
Inc(p);
end;
end;
end;
procedure MakeOBmpTables(
var AFirstTable : TOBmpFirstTable;
var ASecondTable : TOBmpSecondTable;
const ADataLineList : TDataLineRecArray
);
var
locLowByte, locHighByte : Word;
locTableItem : TOBmpSecondTableItem;
locCP : TUnicodeCodePoint;
i, locSecondActualLen : Integer;
begin
SetLength(ASecondTable,2000);
locSecondActualLen := 0;
for locHighByte := 0 to HIGH_SURROGATE_COUNT - 1 do begin
FillChar(locTableItem,SizeOf(locTableItem),#0);
for locLowByte := 0 to LOW_SURROGATE_COUNT - 1 do begin
locCP := ToUCS4(HIGH_SURROGATE_BEGIN + locHighByte,LOW_SURROGATE_BEGIN + locLowByte);
locTableItem[locLowByte] := GetPropID(locCP,ADataLineList)// - 1;
end;
i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen);
if (i = -1) then begin
if (locSecondActualLen = Length(ASecondTable)) then
SetLength(ASecondTable,locSecondActualLen + 50);
i := locSecondActualLen;
ASecondTable[i] := locTableItem;
Inc(locSecondActualLen);
end;
AFirstTable[locHighByte] := i;
end;
SetLength(ASecondTable,locSecondActualLen);
end;
type
P3lvlOBmp3TableItem = ^T3lvlOBmp3TableItem;
function IndexOf(
const AItem : P3lvlOBmp3TableItem;
const ATable : T3lvlOBmp3Table;
const ATableActualLength : Integer
) : Integer;overload;
var
i : Integer;
p : P3lvlOBmp3TableItem;
begin
Result := -1;
if (ATableActualLength > 0) then begin
p := @ATable[0];
for i := 0 to ATableActualLength - 1 do begin
if CompareMem(p,AItem,SizeOf(T3lvlOBmp3TableItem)) then begin
Result := i;
Break;
end;
Inc(p);
end;
end;
end;
type
P3lvlOBmp2TableItem = ^T3lvlOBmp2TableItem;
function IndexOf(
const AItem : P3lvlOBmp2TableItem;
const ATable : T3lvlOBmp2Table
) : Integer;overload;
var
i : Integer;
p : P3lvlOBmp2TableItem;
begin
Result := -1;
if (Length(ATable) > 0) then begin
p := @ATable[0];
for i := 0 to Length(ATable) - 1 do begin
if CompareMem(p,AItem,SizeOf(T3lvlOBmp2TableItem)) then begin
Result := i;
Break;
end;
Inc(p);
end;
end;
end;
procedure MakeOBmpTables3Levels(
var AFirstTable : T3lvlOBmp1Table;
var ASecondTable : T3lvlOBmp2Table;
var AThirdTable : T3lvlOBmp3Table;
const ADataLineList : TDataLineRecArray
);
var
locLowByte0, locLowByte1, locHighByte : Word;
locTableItem2 : T3lvlOBmp2TableItem;
locTableItem3 : T3lvlOBmp3TableItem;
locCP : TUnicodeCodePoint;
i, locThirdActualLen : Integer;
begin
SetLength(AThirdTable,120);
locThirdActualLen := 0;
for locHighByte := 0 to 1023 do begin
FillChar(locTableItem2,SizeOf(locTableItem2),#0);
for locLowByte0 := 0 to 31 do begin
FillChar(locTableItem3,SizeOf(locTableItem3),#0);
for locLowByte1 := 0 to 31 do begin
locCP := ToUCS4(HIGH_SURROGATE_BEGIN + locHighByte,LOW_SURROGATE_BEGIN + (locLowByte0*32) + locLowByte1);
locTableItem3[locLowByte1] := GetPropID(locCP,ADataLineList);
end;
i := IndexOf(@locTableItem3,AThirdTable,locThirdActualLen);
if (i = -1) then begin
if (locThirdActualLen = Length(AThirdTable)) then
SetLength(AThirdTable,locThirdActualLen + 50);
i := locThirdActualLen;
AThirdTable[i] := locTableItem3;
Inc(locThirdActualLen);
end;
locTableItem2[locLowByte0] := i;
end;
i := IndexOf(@locTableItem2,ASecondTable);
if (i = -1) then begin
i := Length(ASecondTable);
SetLength(ASecondTable,(i + 1));
ASecondTable[i] := locTableItem2;
end;
AFirstTable[locHighByte] := i;
end;
SetLength(AThirdTable,locThirdActualLen);
end;
procedure GenerateOBmpTables(
ADest : TStream;
var AFirstTable : TOBmpFirstTable;
var ASecondTable : TOBmpSecondTable
);
procedure AddLine(const ALine : ansistring);
var
buffer : ansistring;
begin
buffer := ALine + sLineBreak;
ADest.Write(buffer[1],Length(buffer));
end;
var
i, j, c : Integer;
locLine : string;
begin
AddLine('const');
AddLine(' UCO_TABLE_1 : array[0..' + IntToStr(HIGH_SURROGATE_COUNT-1) + '] of Word = (');
locLine := '';
for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
locLine := locLine + IntToStr(AFirstTable[i]) + ',';
if (((i+1) mod 16) = 0) then begin
locLine := ' ' + locLine;
AddLine(locLine);
locLine := '';
end;
end;
locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
locLine := ' ' + locLine;
AddLine(locLine);
AddLine(' );' + sLineBreak);
AddLine(' UCO_TABLE_2 : array[0..('+IntToStr(LOW_SURROGATE_COUNT)+'*' + IntToStr(Length(ASecondTable)) +'-1)] of Word =(');
c := High(ASecondTable);
for i := Low(ASecondTable) to c do begin
locLine := '';
for j := Low(TOBmpSecondTableItem) to High(TOBmpSecondTableItem) do begin
locLine := locLine + IntToStr(ASecondTable[i][j]) + ',';
if (((j+1) mod 16) = 0) then begin
if (i = c) and (j = High(TOBmpSecondTableItem)) then
Delete(locLine,Length(locLine),1);
locLine := ' ' + locLine;
AddLine(locLine);
locLine := '';
end;
end;
end;
AddLine(' );' + sLineBreak);
end;
//----------------------------------
procedure Generate3lvlOBmpTables(
ADest : TStream;
var AFirstTable : T3lvlOBmp1Table;
var ASecondTable : T3lvlOBmp2Table;
var AThirdTable : T3lvlOBmp3Table
);
procedure AddLine(const ALine : ansistring);
var
buffer : ansistring;
begin
buffer := ALine + sLineBreak;
ADest.Write(buffer[1],Length(buffer));
end;
var
i, j, c : Integer;
locLine : string;
begin
AddLine('const');
AddLine(' UCO_TABLE_1 : array[0..1023] of Word = (');
locLine := '';
for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
locLine := locLine + IntToStr(AFirstTable[i]) + ',';
if (((i+1) mod 16) = 0) then begin
locLine := ' ' + locLine;
AddLine(locLine);
locLine := '';
end;
end;
locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
locLine := ' ' + locLine;
AddLine(locLine);
AddLine(' );' + sLineBreak);
AddLine(' UCO_TABLE_2 : array[0..' + IntToStr(Length(ASecondTable)-1) +'] of array[0..31] of Word = (');
c := High(ASecondTable);
for i := Low(ASecondTable) to c do begin
locLine := '(';
for j := Low(T3lvlOBmp2TableItem) to High(T3lvlOBmp2TableItem) do
locLine := locLine + IntToStr(ASecondTable[i][j]) + ',';
Delete(locLine,Length(locLine),1);
locLine := ' ' + locLine + ')';
if (i < c) then
locLine := locLine + ',';
AddLine(locLine);
end;
AddLine(' );' + sLineBreak);
AddLine(' UCO_TABLE_3 : array[0..' + IntToStr(Length(AThirdTable)-1) +'] of array[0..31] of Word = (');
c := High(AThirdTable);
for i := Low(AThirdTable) to c do begin
locLine := '(';
for j := Low(T3lvlOBmp3TableItem) to High(T3lvlOBmp3TableItem) do
locLine := locLine + IntToStr(AThirdTable[i][j]) + ',';
Delete(locLine,Length(locLine),1);
locLine := ' ' + locLine + ')';
if (i < c) then
locLine := locLine + ',';
AddLine(locLine);
end;
AddLine(' );' + sLineBreak);
end;
function GetProp(
const AHighS,
ALowS : Word;
const AProps : TPropRecArray;
var AFirstTable : TOBmpFirstTable;
var ASecondTable : TOBmpSecondTable
): PPropRec;
begin
Result := @AProps[ASecondTable[AFirstTable[AHighS-HIGH_SURROGATE_BEGIN]][ALowS-LOW_SURROGATE_BEGIN]];
end;
function GetProp(
const AHighS,
ALowS : Word;
const AProps : TPropRecArray;
var AFirstTable : T3lvlOBmp1Table;
var ASecondTable : T3lvlOBmp2Table;
var AThirdTable : T3lvlOBmp3Table
): PPropRec;
begin
Result := @AProps[AThirdTable[ASecondTable[AFirstTable[AHighS]][ALowS div 32]][ALowS mod 32]];
//Result := @AProps[ASecondTable[AFirstTable[AHighS-HIGH_SURROGATE_BEGIN]][ALowS-LOW_SURROGATE_BEGIN]];
end;
{ TUCA_PropItemContextTreeRec }
function TUCA_PropItemContextTreeRec.GetData : PUCA_PropItemContextTreeNodeRec;
begin
if (Size = 0) then
Result := nil
else
Result := PUCA_PropItemContextTreeNodeRec(
PtrUInt(
PtrUInt(@Self) + SizeOf(UInt24){Size}
)
);
end;
{ TUCA_LineContextRec }
procedure TUCA_LineContextRec.Clear;
begin
Data := nil
end;
procedure TUCA_LineContextRec.Assign(ASource : TUCA_LineContextRec);
var
c, i : Integer;
begin
c := Length(ASource.Data);
SetLength(Self.Data,c);
for i := 0 to c-1 do
Self.Data[i].Assign(ASource.Data[i]);
end;
function TUCA_LineContextRec.Clone : TUCA_LineContextRec;
begin
Result.Clear();
Result.Assign(Self);
end;
{ TUCA_LineContextItemRec }
procedure TUCA_LineContextItemRec.Clear();
begin
CodePoints := nil;
Weights := nil;
end;
procedure TUCA_LineContextItemRec.Assign(ASource : TUCA_LineContextItemRec);
begin
Self.CodePoints := Copy(ASource.CodePoints);
Self.Weights := Copy(ASource.Weights);
end;
function TUCA_LineContextItemRec.Clone() : TUCA_LineContextItemRec;
begin
Result.Clear();
Result.Assign(Self);
end;
{ TUCA_LineRec }
procedure TUCA_LineRec.Clear;
begin
CodePoints := nil;
Weights := nil;
Deleted := False;
Stored := False;
Context.Clear();
end;
procedure TUCA_LineRec.Assign(ASource : TUCA_LineRec);
begin
Self.CodePoints := Copy(ASource.CodePoints);
Self.Weights := Copy(ASource.Weights);
Self.Deleted := ASource.Deleted;
Self.Stored := ASource.Stored;
Self.Context.Assign(ASource.Context);
end;
function TUCA_LineRec.Clone : TUCA_LineRec;
begin
Result.Clear();
Result.Assign(Self);
end;
function TUCA_LineRec.HasContext() : Boolean;
begin
Result := (Length(Context.Data) > 0);
end;
{ TPropRec }
function TPropRec.GetCategory: TUnicodeCategory;
begin
Result := TUnicodeCategory((CategoryData and Byte($F8)) shr 3);
end;
procedure TPropRec.SetCategory(AValue: TUnicodeCategory);
var
b : Byte;
begin
b := Ord(AValue);
b := b shl 3;
CategoryData := CategoryData or b;
//CategoryData := CategoryData or Byte(Byte(Ord(AValue)) shl 3);
end;
function TPropRec.GetWhiteSpace: Boolean;
begin
Result := IsBitON(CategoryData,0);
end;
procedure TPropRec.SetWhiteSpace(AValue: Boolean);
begin
SetBit(CategoryData,0,AValue);
end;
function TPropRec.GetHangulSyllable: Boolean;
begin
Result := IsBitON(CategoryData,1);
end;
procedure TPropRec.SetHangulSyllable(AValue: Boolean);
begin
SetBit(CategoryData,1,AValue);
end;
{ TUCA_PropItemRec }
function TUCA_PropItemRec.GetWeightSize : Word;
var
c : Integer;
begin
c := WeightLength;
if (c = 0) then
exit(0);
Result := c*SizeOf(TUCA_PropWeights);
if IsWeightCompress_1() then
Result := Result - 1;
if IsWeightCompress_2() then
Result := Result - 1;
end;
function TUCA_PropItemRec.HasCodePoint(): Boolean;
begin
Result := IsBitON(Flags,FLAG_CODEPOINT);
end;
procedure TUCA_PropItemRec.GetWeightArray(ADest: PUCA_PropWeights);
var
c : Integer;
p : PByte;
pd : PUCA_PropWeights;
begin
c := WeightLength;
p := PByte(PtrUInt(@Self) + SizeOf(TUCA_PropItemRec));
pd := ADest;
pd^.Weights[0] := PWord(p)^;
p := p + 2;
if not IsWeightCompress_1() then begin
pd^.Weights[1] := PWord(p)^;
p := p + 2;
end else begin
pd^.Weights[1] := p^;
p := p + 1;
end;
if not IsWeightCompress_2() then begin
pd^.Weights[2] := PWord(p)^;
p := p + 2;
end else begin
pd^.Weights[2] := p^;
p := p + 1;
end;
if (c > 1) then
Move(p^, (pd+1)^, ((c-1)*SizeOf(TUCA_PropWeights)));
end;
function TUCA_PropItemRec.GetSelfOnlySize() : Cardinal;
begin
Result := SizeOf(TUCA_PropItemRec);
if (WeightLength > 0) then begin
Result := Result + (WeightLength * Sizeof(TUCA_PropWeights));
if IsWeightCompress_1() then
Result := Result - 1;
if IsWeightCompress_2() then
Result := Result - 1;
end;
if HasCodePoint() then
Result := Result + SizeOf(UInt24);
if Contextual then
Result := Result + Cardinal(GetContext()^.Size);
end;
procedure TUCA_PropItemRec.SetContextual(AValue : Boolean);
begin
SetBit(Flags,FLAG_CONTEXTUAL,AValue);
end;
function TUCA_PropItemRec.GetContextual : Boolean;
begin
Result := IsBitON(Flags,FLAG_CONTEXTUAL);
end;
function TUCA_PropItemRec.GetContext() : PUCA_PropItemContextTreeRec;
var
p : PtrUInt;
begin
if not Contextual then
exit(nil);
p := PtrUInt(@Self) + SizeOf(TUCA_PropItemRec);
if IsBitON(Flags,FLAG_CODEPOINT) then
p := p + SizeOf(UInt24);
Result := PUCA_PropItemContextTreeRec(p);
end;
procedure TUCA_PropItemRec.SetDeleted(AValue: Boolean);
begin
SetBit(Flags,FLAG_DELETION,AValue);
end;
function TUCA_PropItemRec.IsDeleted: Boolean;
begin
Result := IsBitON(Flags,FLAG_DELETION);
end;
function TUCA_PropItemRec.IsValid() : Boolean;
begin
Result := IsBitON(Flags,FLAG_VALID);
end;
function TUCA_PropItemRec.IsWeightCompress_1 : Boolean;
begin
Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_1);
end;
function TUCA_PropItemRec.IsWeightCompress_2 : Boolean;
begin
Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_2);
end;
function TUCA_PropItemRec.GetCodePoint: UInt24;
begin
if HasCodePoint() then begin
if Contextual then
Result := PUInt24(
PtrUInt(@Self) + Self.GetSelfOnlySize()- SizeOf(UInt24) -
Cardinal(GetContext()^.Size)
)^
else
Result := PUInt24(PtrUInt(@Self) + Self.GetSelfOnlySize() - SizeOf(UInt24))^
end else begin
raise Exception.Create('TUCA_PropItemRec.GetCodePoint : "No code point available."');
end
end;
function avl_CompareCodePoints(Item1, Item2: Pointer): Integer;
var
a, b : PUCA_LineContextItemRec;
i, hb : Integer;
begin
if (Item1 = Item2) then
exit(0);
if (Item1 = nil) then
exit(-1);
if (Item2 = nil) then
exit(1);
a := Item1;
b := Item2;
if (a^.CodePoints = b^.CodePoints) then
exit(0);
Result := 1;
hb := Length(b^.CodePoints) - 1;
for i := 0 to Length(a^.CodePoints) - 1 do begin
if (i > hb) then
exit;
if (a^.CodePoints[i] < b^.CodePoints[i]) then
exit(-1);
if (a^.CodePoints[i] > b^.CodePoints[i]) then
exit(1);
end;
if (Length(a^.CodePoints) = Length(b^.CodePoints)) then
exit(0);
exit(-1);
end;
function ConstructAvlContextTree(AContext : PUCA_LineContextRec) : TAVLTree;
var
r : TAVLTree;
i : Integer;
begin
r := TAVLTree.Create(@avl_CompareCodePoints);
try
for i := 0 to Length(AContext^.Data) - 1 do
r.Add(@AContext^.Data[i]);
Result := r;
except
FreeAndNil(r);
raise;
end;
end;
function ConstructContextTree(
const AContext : PUCA_LineContextRec;
var ADestBuffer;
const ADestBufferLength : Cardinal
) : PUCA_PropItemContextTreeRec;
function CalcItemOnlySize(AItem : TAVLTreeNode) : Cardinal;
var
kitem : PUCA_LineContextItemRec;
begin
if (AItem = nil) then
exit(0);
kitem := AItem.Data;
Result := SizeOf(PUCA_PropItemContextTreeNodeRec^.Left) +
SizeOf(PUCA_PropItemContextTreeNodeRec^.Right) +
SizeOf(PUCA_PropItemContextRec^.CodePointCount) +
(Length(kitem^.CodePoints)*SizeOf(UInt24)) +
SizeOf(PUCA_PropItemContextRec^.WeightCount) +
(Length(kitem^.Weights)*SizeOf(TUCA_PropWeights));
end;
function CalcItemSize(AItem : TAVLTreeNode) : Cardinal;
begin
if (AItem = nil) then
exit(0);
Result := CalcItemOnlySize(AItem);
if (AItem.Left <> nil) then
Result := Result + CalcItemSize(AItem.Left);
if (AItem.Right <> nil) then
Result := Result + CalcItemSize(AItem.Right);
end;
function CalcSize(AData : TAVLTree) : Cardinal;
begin
Result := SizeOf(PUCA_PropItemContextTreeRec^.Size) + CalcItemSize(AData.Root);
end;
function ConstructItem(ASource : TAVLTreeNode; ADest : PUCA_PropItemContextTreeNodeRec) : Cardinal;
var
k : Integer;
kitem : PUCA_LineContextItemRec;
kpcp : PUInt24;
kpw : PUCA_PropWeights;
pextra : PtrUInt;
pnext : PUCA_PropItemContextTreeNodeRec;
begin
kitem := ASource.Data;
ADest^.Data.CodePointCount := Length(kitem^.CodePoints);
ADest^.Data.WeightCount := Length(kitem^.Weights);
pextra := PtrUInt(ADest)+SizeOf(ADest^.Left)+SizeOf(ADest^.Right)+
SizeOf(ADest^.Data.CodePointCount)+SizeOf(ADest^.Data.WeightCount);
if (ADest^.Data.CodePointCount > 0) then begin
kpcp := PUInt24(pextra);
for k := 0 to ADest^.Data.CodePointCount - 1 do begin
kpcp^ := kitem^.CodePoints[k];
Inc(kpcp);
end;
end;
if (ADest^.Data.WeightCount > 0) then begin
kpw := PUCA_PropWeights(pextra + (ADest^.Data.CodePointCount*SizeOf(UInt24)));
for k := 0 to ADest^.Data.WeightCount - 1 do begin
kpw^.Weights[0] := kitem^.Weights[k].Weights[0];
kpw^.Weights[1] := kitem^.Weights[k].Weights[1];
kpw^.Weights[2] := kitem^.Weights[k].Weights[2];
Inc(kpw);
end;
end;
Result := CalcItemOnlySize(ASource);
if (ASource.Left <> nil) then begin
pnext := PUCA_PropItemContextTreeNodeRec(PtrUInt(ADest) + Result);
ADest^.Left := Result;
Result := Result + ConstructItem(ASource.Left,pnext);
end else begin
ADest^.Left := 0;
end;
if (ASource.Right <> nil) then begin
pnext := PUCA_PropItemContextTreeNodeRec(PtrUInt(ADest) + Result);
ADest^.Right := Result;
Result := Result + ConstructItem(ASource.Right,pnext);
end else begin
ADest^.Right := 0;
end;
end;
var
c : PtrUInt;
r : PUCA_PropItemContextTreeRec;
p : PUCA_PropItemContextTreeNodeRec;
tempTree : TAVLTree;
begin
tempTree := ConstructAvlContextTree(AContext);
try
c := CalcSize(tempTree);
if (ADestBufferLength > 0) and (c > ADestBufferLength) then
raise Exception.Create(SInsufficientMemoryBuffer);
r := @ADestBuffer;
r^.Size := c;
p := PUCA_PropItemContextTreeNodeRec(PtrUInt(r) + SizeOf(r^.Size));
ConstructItem(tempTree.Root,p);
finally
tempTree.Free();
end;
Result := r;
end;
procedure ReverseRecordBytes(var AItem : TSerializedCollationHeader);
begin
ReverseBytes(AItem.BMP_Table1Length,SizeOf(AItem.BMP_Table1Length));
ReverseBytes(AItem.BMP_Table2Length,SizeOf(AItem.BMP_Table2Length));
ReverseBytes(AItem.OBMP_Table1Length,SizeOf(AItem.OBMP_Table1Length));
ReverseBytes(AItem.OBMP_Table2Length,SizeOf(AItem.OBMP_Table2Length));
ReverseBytes(AItem.PropCount,SizeOf(AItem.PropCount));
ReverseBytes(AItem.VariableLowLimit,SizeOf(AItem.VariableLowLimit));
ReverseBytes(AItem.VariableHighLimit,SizeOf(AItem.VariableHighLimit));
end;
procedure ReverseBytes(var AData; const ALength : Integer);
var
i,j : PtrInt;
c : Byte;
p : PByte;
begin
if (ALength = 1) then
exit;
p := @AData;
j := ALength div 2;
for i := 0 to Pred(j) do begin
c := p[i];
p[i] := p[(ALength - 1 ) - i];
p[(ALength - 1 ) - i] := c;
end;
end;
procedure ReverseArray(var AValue; const AArrayLength, AItemSize : PtrInt);
var
p : PByte;
i : PtrInt;
begin
if ( AArrayLength > 0 ) and ( AItemSize > 1 ) then begin
p := @AValue;
for i := 0 to Pred(AArrayLength) do begin
ReverseBytes(p^,AItemSize);
Inc(p,AItemSize);
end;
end;
end;
procedure ReverseContextNodeFromNativeEndian(s, d : PUCA_PropItemContextTreeNodeRec);
var
k : PtrUInt;
p_s, p_d : PByte;
begin
d^.Left := s^.Left;
ReverseBytes(d^.Left,SizeOf(d^.Left));
d^.Right := s^.Right;
ReverseBytes(d^.Right,SizeOf(d^.Right));
d^.Data.CodePointCount := s^.Data.CodePointCount;
ReverseBytes(d^.Data.CodePointCount,SizeOf(d^.Data.CodePointCount));
d^.Data.WeightCount := s^.Data.WeightCount;
ReverseBytes(d^.Data.WeightCount,SizeOf(d^.Data.WeightCount));
k := SizeOf(TUCA_PropItemContextTreeNodeRec);
p_s := PByte(PtrUInt(s) + k);
p_d := PByte(PtrUInt(d) + k);
k := (s^.Data.CodePointCount*SizeOf(UInt24));
Move(p_s^,p_d^, k);
ReverseArray(p_d^,s^.Data.CodePointCount,SizeOf(UInt24));
p_s := PByte(PtrUInt(p_s) + k);
p_d := PByte(PtrUInt(p_d) + k);
k := (s^.Data.WeightCount*SizeOf(TUCA_PropWeights));
Move(p_s^,p_d^,k);
ReverseArray(p_d^,(s^.Data.WeightCount*Length(TUCA_PropWeights.Weights)),SizeOf(TUCA_PropWeights.Weights[0]));
if (s^.Left > 0) then
ReverseContextNodeFromNativeEndian(
PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + s^.Left),
PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + s^.Left)
);
if (s^.Right > 0) then
ReverseContextNodeFromNativeEndian(
PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + s^.Right),
PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + s^.Right)
);
end;
procedure ReverseContextFromNativeEndian(s, d : PUCA_PropItemContextTreeRec);
var
k : PtrUInt;
begin
d^.Size := s^.Size;
ReverseBytes(d^.Size,SizeOf(d^.Size));
if (s^.Size = 0) then
exit;
k := SizeOf(s^.Size);
ReverseContextNodeFromNativeEndian(
PUCA_PropItemContextTreeNodeRec(PtrUInt(s)+k),
PUCA_PropItemContextTreeNodeRec(PtrUInt(d)+k)
);
end;
procedure ReverseFromNativeEndian(
const AData : PUCA_PropItemRec;
const ADataLen : Cardinal;
const ADest : PUCA_PropItemRec
);
var
s, d : PUCA_PropItemRec;
sCtx, dCtx : PUCA_PropItemContextTreeRec;
dataEnd : PtrUInt;
k, i : PtrUInt;
p_s, p_d : PByte;
pw_s, pw_d : PUCA_PropWeights;
begin
dataEnd := PtrUInt(AData) + ADataLen;
s := AData;
d := ADest;
while True do begin
d^.WeightLength := s^.WeightLength;
ReverseBytes(d^.WeightLength,SizeOf(d^.WeightLength));
d^.ChildCount := s^.ChildCount;
ReverseBytes(d^.ChildCount,SizeOf(d^.ChildCount));
d^.Size := s^.Size;
ReverseBytes(d^.Size,SizeOf(d^.Size));
d^.Flags := s^.Flags;
ReverseBytes(d^.Flags,SizeOf(d^.Flags));
if s^.Contextual then begin
k := SizeOf(TUCA_PropItemRec);
if s^.HasCodePoint() then
k := k + SizeOf(UInt24);
sCtx := PUCA_PropItemContextTreeRec(PtrUInt(s) + k);
dCtx := PUCA_PropItemContextTreeRec(PtrUInt(d) + k);
ReverseContextFromNativeEndian(sCtx,dCtx);
end;
if s^.HasCodePoint() then begin
if s^.Contextual then
k := s^.GetSelfOnlySize()- SizeOf(UInt24) - Cardinal(s^.GetContext()^.Size)
else
k := s^.GetSelfOnlySize() - SizeOf(UInt24);
p_s := PByte(PtrUInt(s) + k);
p_d := PByte(PtrUInt(d) + k);
Unaligned(PUInt24(p_d)^) := Unaligned(PUInt24(p_s)^);
ReverseBytes(p_d^,SizeOf(UInt24));
end;
if (s^.WeightLength > 0) then begin
k := SizeOf(TUCA_PropItemRec);
p_s := PByte(PtrUInt(s) + k);
p_d := PByte(PtrUInt(d) + k);
k := SizeOf(Word);
Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
ReverseBytes(Unaligned(p_d^),k);
p_s := PByte(PtrUInt(p_s) + k);
p_d := PByte(PtrUInt(p_d) + k);
if s^.IsWeightCompress_1() then begin
k := SizeOf(Byte);
PByte(p_d)^ := PByte(p_s)^;
end else begin
k := SizeOf(Word);
Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
end;
ReverseBytes(p_d^,k);
p_s := PByte(PtrUInt(p_s) + k);
p_d := PByte(PtrUInt(p_d) + k);
if s^.IsWeightCompress_2() then begin
k := SizeOf(Byte);
PByte(p_d)^ := PByte(p_s)^;
end else begin
k := SizeOf(Word);
Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
end;
ReverseBytes(p_d^,k);
if (s^.WeightLength > 1) then begin
pw_s := PUCA_PropWeights(PtrUInt(p_s) + k);
pw_d := PUCA_PropWeights(PtrUInt(p_d) + k);
for i := 1 to s^.WeightLength - 1 do begin
pw_d^.Weights[0] := pw_s^.Weights[0];
pw_d^.Weights[1] := pw_s^.Weights[1];
pw_d^.Weights[2] := pw_s^.Weights[2];
ReverseArray(pw_d^,3,SizeOf(pw_s^.Weights[0]));
Inc(pw_s);
Inc(pw_d);
end;
end;
end;
k := s^.GetSelfOnlySize();
s := PUCA_PropItemRec(PtrUInt(s)+k);
d := PUCA_PropItemRec(PtrUInt(d)+k);
if (PtrUInt(s) >= dataEnd) then
Break;
end;
if ( (PtrUInt(s)-PtrUInt(AData)) <> (PtrUInt(d)-PtrUInt(ADest)) ) then
raise Exception.CreateFmt('Read data length(%d) differs from written data length(%d).',[(PtrUInt(s)-PtrUInt(AData)), (PtrUInt(d)-PtrUInt(ADest))]);
end;
//------------------------------------------------------------------------------
procedure ReverseContextNodeToNativeEndian(s, d : PUCA_PropItemContextTreeNodeRec);
var
k : PtrUInt;
p_s, p_d : PByte;
begin
d^.Left := s^.Left;
ReverseBytes(d^.Left,SizeOf(d^.Left));
d^.Right := s^.Right;
ReverseBytes(d^.Right,SizeOf(d^.Right));
d^.Data.CodePointCount := s^.Data.CodePointCount;
ReverseBytes(d^.Data.CodePointCount,SizeOf(d^.Data.CodePointCount));
d^.Data.WeightCount := s^.Data.WeightCount;
ReverseBytes(d^.Data.WeightCount,SizeOf(d^.Data.WeightCount));
k := SizeOf(TUCA_PropItemContextTreeNodeRec);
p_s := PByte(PtrUInt(s) + k);
p_d := PByte(PtrUInt(d) + k);
k := (d^.Data.CodePointCount*SizeOf(UInt24));
Move(p_s^,p_d^, k);
ReverseArray(p_d^,d^.Data.CodePointCount,SizeOf(UInt24));
p_s := PByte(PtrUInt(p_s) + k);
p_d := PByte(PtrUInt(p_d) + k);
k := (d^.Data.WeightCount*SizeOf(TUCA_PropWeights));
Move(p_s^,p_d^,k);
ReverseArray(p_d^,(d^.Data.WeightCount*Length(TUCA_PropWeights.Weights)),SizeOf(TUCA_PropWeights.Weights[0]));
if (d^.Left > 0) then
ReverseContextNodeToNativeEndian(
PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + d^.Left),
PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + d^.Left)
);
if (d^.Right > 0) then
ReverseContextNodeToNativeEndian(
PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + d^.Right),
PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + d^.Right)
);
end;
procedure ReverseContextToNativeEndian(s, d : PUCA_PropItemContextTreeRec);
var
k : PtrUInt;
begin
d^.Size := s^.Size;
ReverseBytes(d^.Size,SizeOf(d^.Size));
if (s^.Size = 0) then
exit;
k := SizeOf(s^.Size);
ReverseContextNodeToNativeEndian(
PUCA_PropItemContextTreeNodeRec(PtrUInt(s)+k),
PUCA_PropItemContextTreeNodeRec(PtrUInt(d)+k)
);
end;
procedure ReverseToNativeEndian(
const AData : PUCA_PropItemRec;
const ADataLen : Cardinal;
const ADest : PUCA_PropItemRec
);
var
s, d : PUCA_PropItemRec;
sCtx, dCtx : PUCA_PropItemContextTreeRec;
dataEnd : PtrUInt;
k, i : PtrUInt;
p_s, p_d : PByte;
pw_s, pw_d : PUCA_PropWeights;
begin
dataEnd := PtrUInt(AData) + ADataLen;
s := AData;
d := ADest;
while True do begin
d^.WeightLength := s^.WeightLength;
ReverseBytes(d^.WeightLength,SizeOf(d^.WeightLength));
d^.ChildCount := s^.ChildCount;
ReverseBytes(d^.ChildCount,SizeOf(d^.ChildCount));
d^.Size := s^.Size;
ReverseBytes(d^.Size,SizeOf(d^.Size));
d^.Flags := s^.Flags;
ReverseBytes(d^.Flags,SizeOf(d^.Flags));
if d^.Contextual then begin
k := SizeOf(TUCA_PropItemRec);
if d^.HasCodePoint() then
k := k + SizeOf(UInt24);
sCtx := PUCA_PropItemContextTreeRec(PtrUInt(s) + k);
dCtx := PUCA_PropItemContextTreeRec(PtrUInt(d) + k);
ReverseContextToNativeEndian(sCtx,dCtx);
end;
if d^.HasCodePoint() then begin
if d^.Contextual then
k := d^.GetSelfOnlySize()- SizeOf(UInt24) - Cardinal(d^.GetContext()^.Size)
else
k := d^.GetSelfOnlySize() - SizeOf(UInt24);
p_s := PByte(PtrUInt(s) + k);
p_d := PByte(PtrUInt(d) + k);
Unaligned(PUInt24(p_d)^) := Unaligned(PUInt24(p_s)^);
ReverseBytes(p_d^,SizeOf(UInt24));
end;
if (d^.WeightLength > 0) then begin
k := SizeOf(TUCA_PropItemRec);
p_s := PByte(PtrUInt(s) + k);
p_d := PByte(PtrUInt(d) + k);
k := SizeOf(Word);
Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
ReverseBytes(p_d^,k);
p_s := PByte(PtrUInt(p_s) + k);
p_d := PByte(PtrUInt(p_d) + k);
if d^.IsWeightCompress_1() then begin
k := SizeOf(Byte);
PByte(p_d)^ := PByte(p_s)^;
end else begin
k := SizeOf(Word);
Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
end;
ReverseBytes(p_d^,k);
p_s := PByte(PtrUInt(p_s) + k);
p_d := PByte(PtrUInt(p_d) + k);
if d^.IsWeightCompress_2() then begin
k := SizeOf(Byte);
PByte(p_d)^ := PByte(p_s)^;
end else begin
k := SizeOf(Word);
Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
end;
ReverseBytes(p_d^,k);
if (d^.WeightLength > 1) then begin
pw_s := PUCA_PropWeights(PtrUInt(p_s) + k);
pw_d := PUCA_PropWeights(PtrUInt(p_d) + k);
for i := 1 to d^.WeightLength - 1 do begin
pw_d^.Weights[0] := pw_s^.Weights[0];
pw_d^.Weights[1] := pw_s^.Weights[1];
pw_d^.Weights[2] := pw_s^.Weights[2];
ReverseArray(pw_d^,3,SizeOf(pw_s^.Weights[0]));
Inc(pw_s);
Inc(pw_d);
end;
end;
end;
k := d^.GetSelfOnlySize();
s := PUCA_PropItemRec(PtrUInt(s)+k);
d := PUCA_PropItemRec(PtrUInt(d)+k);
if (PtrUInt(s) >= dataEnd) then
Break;
end;
if ( (PtrUInt(s)-PtrUInt(AData)) <> (PtrUInt(d)-PtrUInt(ADest)) ) then
raise Exception.CreateFmt('Read data length(%d) differs from written data length(%d).',[(PtrUInt(s)-PtrUInt(AData)), (PtrUInt(d)-PtrUInt(ADest))]);
end;
procedure Check(const ACondition : Boolean; const AMsg : string);overload;
begin
if not ACondition then
raise Exception.Create(AMsg);
end;
procedure Check(
const ACondition : Boolean;
const AFormatMsg : string;
const AArgs : array of const
);overload;
begin
Check(ACondition,Format(AFormatMsg,AArgs));
end;
procedure Check(const ACondition : Boolean);overload;
begin
Check(ACondition,'Check failed.')
end;
procedure CompareWeights(a, b : PUCA_PropWeights; const ALength : Integer);
var
i : Integer;
begin
if (ALength > 0) then begin
for i := 0 to ALength - 1 do begin
Check(a[i].Weights[0]=b[i].Weights[0]);
Check(a[i].Weights[1]=b[i].Weights[1]);
Check(a[i].Weights[2]=b[i].Weights[2]);
end;
end;
end;
procedure CompareCodePoints(a, b : PUInt24; const ALength : Integer);
var
i : Integer;
begin
if (ALength > 0) then begin
for i := 0 to ALength - 1 do
Check(a[i]=b[i]);
end;
end;
procedure CompareContextNode(AProp1, AProp2 : PUCA_PropItemContextTreeNodeRec);
var
a, b : PUCA_PropItemContextTreeNodeRec;
k : Cardinal;
begin
if (AProp1=nil) then begin
Check(AProp2=nil);
exit;
end;
a := AProp1;
b := AProp2;
Check(a^.Left=b^.Left);
Check(a^.Right=b^.Right);
Check(a^.Data.CodePointCount=b^.Data.CodePointCount);
Check(a^.Data.WeightCount=b^.Data.WeightCount);
k := SizeOf(a^.Data);
CompareCodePoints(
PUInt24(PtrUInt(a)+k),
PUInt24(PtrUInt(b)+k),
a^.Data.CodePointCount
);
k := SizeOf(a^.Data)+ (a^.Data.CodePointCount*SizeOf(UInt24));
CompareWeights(
PUCA_PropWeights(PtrUInt(a)+k),
PUCA_PropWeights(PtrUInt(b)+k),
a^.Data.WeightCount
);
if (a^.Left > 0) then begin
k := a^.Left;
CompareContextNode(
PUCA_PropItemContextTreeNodeRec(PtrUInt(a)+k),
PUCA_PropItemContextTreeNodeRec(PtrUInt(b)+k)
);
end;
if (a^.Right > 0) then begin
k := a^.Right;
CompareContextNode(
PUCA_PropItemContextTreeNodeRec(PtrUInt(a)+k),
PUCA_PropItemContextTreeNodeRec(PtrUInt(b)+k)
);
end;
end;
procedure CompareContext(AProp1, AProp2 : PUCA_PropItemContextTreeRec);
var
a, b : PUCA_PropItemContextTreeNodeRec;
k : Integer;
begin
if (AProp1=nil) then begin
Check(AProp2=nil);
exit;
end;
Check(AProp1^.Size=AProp2^.Size);
k := Cardinal(AProp1^.Size);
a := PUCA_PropItemContextTreeNodeRec(PtrUInt(AProp1)+k);
b := PUCA_PropItemContextTreeNodeRec(PtrUInt(AProp2)+k);
CompareContextNode(a,b);
end;
procedure CompareProps(const AProp1, AProp2 : PUCA_PropItemRec; const ADataLen : Integer);
var
a, b, pend : PUCA_PropItemRec;
wa, wb : array of TUCA_PropWeights;
k : Integer;
begin
if (ADataLen <= 0) then
exit;
a := PUCA_PropItemRec(AProp1);
b := PUCA_PropItemRec(AProp2);
pend := PUCA_PropItemRec(PtrUInt(AProp1)+ADataLen);
while (a<pend) do begin
Check(a^.WeightLength=b^.WeightLength);
Check(a^.ChildCount=b^.ChildCount);
Check(a^.Size=b^.Size);
Check(a^.Flags=b^.Flags);
if a^.HasCodePoint() then
Check(a^.CodePoint = b^.CodePoint);
if (a^.WeightLength > 0) then begin
k := a^.WeightLength;
SetLength(wa,k);
SetLength(wb,k);
a^.GetWeightArray(@wa[0]);
b^.GetWeightArray(@wb[0]);
CompareWeights(@wa[0],@wb[0],k);
end;
if a^.Contextual then
CompareContext(a^.GetContext(),b^.GetContext());
Check(a^.GetSelfOnlySize()=b^.GetSelfOnlySize());
k := a^.GetSelfOnlySize();
a := PUCA_PropItemRec(PtrUInt(a)+k);
b := PUCA_PropItemRec(PtrUInt(b)+k);
end;
end;
initialization
FS := DefaultFormatSettings;
FS.DecimalSeparator := '.';
end.