{ Unicode parser helper unit. Copyright (c) 2012-2015 by Inoussa OUEDRAOGO The source code is distributed under the Library GNU General Public License with the following modification: - object files and libraries linked into an application may be distributed without source code. If you didn't receive a copy of the file COPYING, contact: Free Software Foundation 675 Mass Ave Cambridge, MA 02139 USA This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. } unit helper; {$mode delphi} {$H+} {$PACKENUM 1} {$pointermath on} {$typedaddress on} {$warn 4056 off} //Conversion between ordinals and pointers is not portable {$macro on} {$ifdef FPC_REQUIRES_PROPER_ALIGNMENT} {$define X_PACKED:=} {$else FPC_REQUIRES_PROPER_ALIGNMENT} {$define X_PACKED:=packed} {$endif FPC_REQUIRES_PROPER_ALIGNMENT} interface uses Classes, SysUtils, StrUtils; const SLicenseText = ' { Unicode implementation tables. ' + sLineBreak + ' ' + sLineBreak + ' Copyright (c) 2013 - 2017 by Inoussa OUEDRAOGO ' + sLineBreak + ' ' + sLineBreak + ' Permission is hereby granted, free of charge, to any person ' + sLineBreak + ' obtaining a copy of the Unicode data files and any associated ' + sLineBreak + ' documentation (the "Data Files") or Unicode software and any ' + sLineBreak + ' associated documentation (the "Software") to deal in the Data ' + sLineBreak + ' Files or Software without restriction, including without ' + sLineBreak + ' limitation the rights to use, copy, modify, merge, publish, ' + sLineBreak + ' distribute, and/or sell copies of the Data Files or Software, ' + sLineBreak + ' and to permit persons to whom the Data Files or Software are ' + sLineBreak + ' furnished to do so, provided that (a) the above copyright ' + sLineBreak + ' notice(s) and this permission notice appear with all copies ' + sLineBreak + ' of the Data Files or Software, (b) both the above copyright ' + sLineBreak + ' notice(s) and this permission notice appear in associated ' + sLineBreak + ' documentation, and (c) there is clear notice in each modified ' + sLineBreak + ' Data File or in the Software as well as in the documentation ' + sLineBreak + ' associated with the Data File(s) or Software that the data or ' + sLineBreak + ' software has been modified. ' + sLineBreak + ' ' + sLineBreak + ' ' + sLineBreak + ' This program is distributed in the hope that it will be useful, ' + sLineBreak + ' but WITHOUT ANY WARRANTY; without even the implied warranty of ' + sLineBreak + ' MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. }'; WEIGHT_LEVEL_COUNT = 3; type // Unicode General Category TUnicodeCategory = ( ucUppercaseLetter, // Lu = Letter, uppercase ucLowercaseLetter, // Ll = Letter, lowercase ucTitlecaseLetter, // Lt = Letter, titlecase ucModifierLetter, // Lm = Letter, modifier ucOtherLetter, // Lo = Letter, other ucNonSpacingMark, // Mn = Mark, nonspacing ucCombiningMark, // Mc = Mark, spacing combining ucEnclosingMark, // Me = Mark, enclosing ucDecimalNumber, // Nd = Number, decimal digit ucLetterNumber, // Nl = Number, letter ucOtherNumber, // No = Number, other ucConnectPunctuation, // Pc = Punctuation, connector ucDashPunctuation, // Pd = Punctuation, dash ucOpenPunctuation, // Ps = Punctuation, open ucClosePunctuation, // Pe = Punctuation, close ucInitialPunctuation, // Pi = Punctuation, initial quote (may behave like Ps or Pe depending on usage) ucFinalPunctuation, // Pf = Punctuation, final quote (may behave like Ps or Pe depending on usage) ucOtherPunctuation, // Po = Punctuation, other ucMathSymbol, // Sm = Symbol, math ucCurrencySymbol, // Sc = Symbol, currency ucModifierSymbol, // Sk = Symbol, modifier ucOtherSymbol, // So = Symbol, other ucSpaceSeparator, // Zs = Separator, space ucLineSeparator, // Zl = Separator, line ucParagraphSeparator, // Zp = Separator, paragraph ucControl, // Cc = Other, control ucFormat, // Cf = Other, format ucSurrogate, // Cs = Other, surrogate ucPrivateUse, // Co = Other, private use ucUnassigned // Cn = Other, not assigned (including noncharacters) ); TUInt24Rec = packed record public {$ifdef FPC_LITTLE_ENDIAN} byte0, byte1, byte2 : Byte; {$else FPC_LITTLE_ENDIAN} byte2, byte1, byte0 : Byte; {$endif FPC_LITTLE_ENDIAN} public class operator Implicit(a : TUInt24Rec) : Cardinal;{$ifdef USE_INLINE}inline;{$ENDIF} class operator Implicit(a : TUInt24Rec) : LongInt;{$ifdef USE_INLINE}inline;{$ENDIF} class operator Implicit(a : TUInt24Rec) : Word;{$ifdef USE_INLINE}inline;{$ENDIF} class operator Implicit(a : TUInt24Rec) : Byte;{$ifdef USE_INLINE}inline;{$ENDIF} class operator Implicit(a : Cardinal) : TUInt24Rec;{$ifdef USE_INLINE}inline;{$ENDIF} class operator Explicit(a : TUInt24Rec) : Cardinal;{$ifdef USE_INLINE}inline;{$ENDIF} class operator Equal(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF} class operator Equal(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF} class operator Equal(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF} class operator Equal(a : TUInt24Rec; b : LongInt): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF} class operator Equal(a : LongInt; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF} class operator Equal(a : TUInt24Rec; b : Word): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF} class operator Equal(a : Word; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF} class operator Equal(a : TUInt24Rec; b : Byte): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF} class operator Equal(a : Byte; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF} class operator NotEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF} class operator NotEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF} class operator NotEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF} class operator GreaterThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF} class operator GreaterThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF} class operator GreaterThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF} class operator GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF} class operator GreaterThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF} class operator GreaterThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF} class operator LessThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF} class operator LessThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF} class operator LessThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF} class operator LessThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF} class operator LessThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF} class operator LessThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF} end; UInt24 = TUInt24Rec; PUInt24 = ^UInt24; TUnicodeCodePoint = Cardinal; TUnicodeCodePointArray = array of TUnicodeCodePoint; TDecompositionArray = array of TUnicodeCodePointArray; TNumericValue = Double; TNumericValueArray = array of TNumericValue; TBlockItemRec = packed record RangeStart : TUnicodeCodePoint; RangeEnd : TUnicodeCodePoint; Name : string[120]; CanonicalName : string[120]; end; TBlocks = array of TBlockItemRec; PPropRec = ^TPropRec; { TPropRec } TPropRec = packed record private const FLAG_WHITE_SPACE = 0; const FLAG_HANGUL_SYLLABLE = 1; const FLAG_UNIFIED_IDEOGRAPH = 2; private function GetCategory : TUnicodeCategory;inline; procedure SetCategory(AValue : TUnicodeCategory); function GetWhiteSpace : Boolean;inline; procedure SetWhiteSpace(AValue : Boolean); function GetHangulSyllable : Boolean;inline; procedure SetHangulSyllable(AValue : Boolean); function GetUnifiedIdeograph : Boolean;inline; procedure SetUnifiedIdeograph(AValue : Boolean); public CategoryData : Byte; PropID : Word; CCC : Byte; // Canonical Combining Class NumericIndex : Byte; SimpleUpperCase : UInt24; SimpleLowerCase : UInt24; DecompositionID : SmallInt; public property Category : TUnicodeCategory read GetCategory write SetCategory; property WhiteSpace : Boolean read GetWhiteSpace write SetWhiteSpace; property HangulSyllable : Boolean read GetHangulSyllable write SetHangulSyllable; property UnifiedIdeograph : Boolean read GetUnifiedIdeograph write SetUnifiedIdeograph; end; TPropRecArray = array of TPropRec; TDecompositionIndexRec = packed record StartPosition : Word; Length : Byte; end; TDecompositionBook = X_PACKED record Index : array of TDecompositionIndexRec; CodePoints : array of TUnicodeCodePoint; end; PDataLineRec = ^TDataLineRec; TDataLineRec = record PropID : Integer; case LineType : Byte of 0 : (CodePoint : TUnicodeCodePoint); 1 : (StartCodePoint, EndCodePoint : TUnicodeCodePoint); end; TDataLineRecArray = array of TDataLineRec; TCodePointRec = record case LineType : Byte of 0 : (CodePoint : TUnicodeCodePoint); 1 : (StartCodePoint, EndCodePoint : TUnicodeCodePoint); end; TCodePointRecArray = array of TCodePointRec; TPropListLineRec = packed record CodePoint : TCodePointRec; PropName : string[123]; end; TPropListLineRecArray = array of TPropListLineRec; { TUCA_WeightRec } TUCA_WeightRec = packed record public Weights : array[0..3] of Cardinal; Variable : Boolean; public class operator Equal(a, b: TUCA_WeightRec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF} end; TUCA_WeightRecArray = array of TUCA_WeightRec; PUCA_LineContextItemRec = ^TUCA_LineContextItemRec; TUCA_LineContextItemRec = X_PACKED record public CodePoints : TUnicodeCodePointArray; Weights : TUCA_WeightRecArray; public procedure Clear(); procedure Assign(ASource : PUCA_LineContextItemRec); function Clone() : TUCA_LineContextItemRec; end; PUCA_LineContextRec = ^TUCA_LineContextRec; TUCA_LineContextRec = X_PACKED record public Data : array of TUCA_LineContextItemRec; public procedure Clear(); procedure Assign(ASource : PUCA_LineContextRec); function Clone() : TUCA_LineContextRec; end; PUCA_LineRec = ^TUCA_LineRec; TUCA_LineRec = X_PACKED record public CodePoints : TUnicodeCodePointArray; Weights : TUCA_WeightRecArray; Context : TUCA_LineContextRec; //Variable : Boolean; Deleted : Boolean; Stored : Boolean; public procedure Clear(); procedure Assign(ASource : PUCA_LineRec); function Clone() : TUCA_LineRec; function HasContext() : Boolean; end; TUCA_VariableKind = ( ucaShifted, ucaNonIgnorable, ucaBlanked, ucaShiftedTrimmed, ucaIgnoreSP ); TUCA_DataBook = X_PACKED record Version : string; VariableWeight : TUCA_VariableKind; Backwards : array[0..3] of Boolean; Lines : array of TUCA_LineRec; end; PUCA_DataBook = ^TUCA_DataBook; TUCA_DataBookIndex = array of Integer; type TUCA_PropWeights = packed record Weights : array[0..2] of Word; //Variable : Byte; end; PUCA_PropWeights = ^TUCA_PropWeights; TUCA_PropItemContextRec = packed record CodePointCount : Byte; WeightCount : Byte; //CodePoints : UInt24; //Weights : TUCA_PropWeights; end; PUCA_PropItemContextRec = ^TUCA_PropItemContextRec; TUCA_PropItemContextTreeNodeRec = packed record Left : Word; Right : Word; Data : TUCA_PropItemContextRec; end; PUCA_PropItemContextTreeNodeRec = ^TUCA_PropItemContextTreeNodeRec; TUCA_PropItemContextTreeRec = packed record public Size : UInt24; public function GetData:PUCA_PropItemContextTreeNodeRec;inline; property Data : PUCA_PropItemContextTreeNodeRec read GetData; end; PUCA_PropItemContextTreeRec = ^TUCA_PropItemContextTreeRec; { TUCA_PropItemRec } TUCA_PropItemRec = packed record private const FLAG_VALID = 0; const FLAG_CODEPOINT = 1; const FLAG_CONTEXTUAL = 2; const FLAG_DELETION = 3; const FLAG_COMPRESS_WEIGHT_1 = 6; const FLAG_COMPRESS_WEIGHT_2 = 7; private function GetWeightSize : Word;inline; public WeightLength : Byte; ChildCount : Byte; Size : Word; Flags : Byte; public function HasCodePoint() : Boolean;inline; function GetCodePoint() : UInt24;//inline; property CodePoint : UInt24 read GetCodePoint; //Weights : array[0..WeightLength] of TUCA_PropWeights; procedure GetWeightArray(ADest : PUCA_PropWeights); function GetSelfOnlySize() : Cardinal;inline; procedure SetContextual(AValue : Boolean);inline; function GetContextual() : Boolean;inline; property Contextual : Boolean read GetContextual write setContextual; function GetContext() : PUCA_PropItemContextTreeRec; procedure SetDeleted(AValue : Boolean);inline; function IsDeleted() : Boolean;inline; function IsValid() : Boolean;inline; function IsWeightCompress_1() : Boolean;inline; function IsWeightCompress_2() : Boolean;inline; end; PUCA_PropItemRec = ^TUCA_PropItemRec; TUCA_PropIndexItem = packed record CodePoint : Cardinal; Position : Integer; end; PUCA_PropIndexItem = ^TUCA_PropIndexItem; TUCA_PropBook = X_PACKED record ItemSize : Integer; Index : array of TUCA_PropIndexItem; Items : PUCA_PropItemRec; //Native Endian ItemsOtherEndian : PUCA_PropItemRec;//Non Native Endian VariableLowLimit : Word; VariableHighLimit : Word; end; PUCA_PropBook = ^TUCA_PropBook; TBmpFirstTable = array[0..255] of Byte; TBmpSecondTableItem = array[0..255] of Word; TBmpSecondTable = array of TBmpSecondTableItem; T3lvlBmp1Table = array[0..255] of Byte; T3lvlBmp2TableItem = array[0..15] of Word; T3lvlBmp2Table = array of T3lvlBmp2TableItem; T3lvlBmp3TableItem = array[0..15] of Word; T3lvlBmp3Table = array of T3lvlBmp3TableItem; TucaBmpFirstTable = array[0..255] of Byte; TucaBmpSecondTableItem = array[0..255] of Cardinal; TucaBmpSecondTable = array of TucaBmpSecondTableItem; PucaBmpFirstTable = ^TucaBmpFirstTable; PucaBmpSecondTable = ^TucaBmpSecondTable; const LOW_SURROGATE_BEGIN = Word($DC00); LOW_SURROGATE_END = Word($DFFF); LOW_SURROGATE_COUNT = LOW_SURROGATE_END - LOW_SURROGATE_BEGIN + 1; HIGH_SURROGATE_BEGIN = Word($D800); HIGH_SURROGATE_END = Word($DBFF); HIGH_SURROGATE_COUNT = HIGH_SURROGATE_END - HIGH_SURROGATE_BEGIN + 1; type TOBmpFirstTable = array[0..(HIGH_SURROGATE_COUNT-1)] of Word; TOBmpSecondTableItem = array[0..(LOW_SURROGATE_COUNT-1)] of Word; TOBmpSecondTable = array of TOBmpSecondTableItem; T3lvlOBmp1Table = array[0..1023] of Byte; T3lvlOBmp2TableItem = array[0..31] of Word; T3lvlOBmp2Table = array of T3lvlOBmp2TableItem; T3lvlOBmp3TableItem = array[0..31] of Word; T3lvlOBmp3Table = array of T3lvlOBmp3TableItem; TucaOBmpFirstTable = array[0..(HIGH_SURROGATE_COUNT-1)] of Word; TucaOBmpSecondTableItem = array[0..(LOW_SURROGATE_COUNT-1)] of Cardinal; TucaOBmpSecondTable = array of TucaOBmpSecondTableItem; PucaOBmpFirstTable = ^TucaOBmpFirstTable; PucaOBmpSecondTable = ^TucaOBmpSecondTable; type TEndianKind = (ekLittle, ekBig); const ENDIAN_SUFFIX : array[TEndianKind] of string[2] = ('le','be'); {$IFDEF ENDIAN_LITTLE} ENDIAN_NATIVE = ekLittle; ENDIAN_NON_NATIVE = ekBig; {$ENDIF ENDIAN_LITTLE} {$IFDEF ENDIAN_BIG} ENDIAN_NATIVE = ekBig; ENDIAN_NON_NATIVE = ekLittle; {$ENDIF ENDIAN_BIG} procedure GenerateLicenceText(ADest : TStream); function BoolToByte(AValue : Boolean): Byte;inline; function IsHangulSyllable( const ACodePoint : TUnicodeCodePoint; const AHangulList : TCodePointRecArray ) : Boolean; procedure ParseHangulSyllableTypes( ADataAStream : TMemoryStream; var ACodePointList : TCodePointRecArray ); procedure ParseProps( ADataAStream : TMemoryStream; var APropList : TPropListLineRecArray ); function FindCodePointsByProperty( const APropName : string; const APropList : TPropListLineRecArray ) : TCodePointRecArray; procedure ParseBlokcs( ADataAStream : TMemoryStream; var ABlocks : TBlocks ); procedure ParseUCAFile( ADataAStream : TMemoryStream; var ABook : TUCA_DataBook ); procedure MakeUCA_Props( ABook : PUCA_DataBook; out AProps : PUCA_PropBook ); procedure FreeUcaBook(var ABook : PUCA_PropBook); procedure MakeUCA_BmpTables( var AFirstTable : TucaBmpFirstTable; var ASecondTable : TucaBmpSecondTable; const APropBook : PUCA_PropBook ); procedure MakeUCA_OBmpTables( var AFirstTable : TucaOBmpFirstTable; var ASecondTable : TucaOBmpSecondTable; const APropBook : PUCA_PropBook ); function GetPropPosition( const AHighS, ALowS : Word; const AFirstTable : PucaOBmpFirstTable; const ASecondTable : PucaOBmpSecondTable ): Integer;inline;overload; procedure GenerateUCA_Head( ADest : TStream; ABook : PUCA_DataBook; AProps : PUCA_PropBook ); procedure GenerateUCA_BmpTables( AStream, ANativeEndianStream, ANonNativeEndianStream : TStream; var AFirstTable : TucaBmpFirstTable; var ASecondTable : TucaBmpSecondTable ); procedure GenerateBinaryUCA_BmpTables( ANativeEndianStream, ANonNativeEndianStream : TStream; var AFirstTable : TucaBmpFirstTable; var ASecondTable : TucaBmpSecondTable ); procedure GenerateUCA_PropTable( ADest : TStream; const APropBook : PUCA_PropBook; const AEndian : TEndianKind ); procedure GenerateBinaryUCA_PropTable( // WARNING : files must be generated for each endianess (Little / Big) ANativeEndianStream, ANonNativeEndianStream : TStream; const APropBook : PUCA_PropBook ); procedure GenerateUCA_OBmpTables( AStream, ANativeEndianStream, ANonNativeEndianStream : TStream; var AFirstTable : TucaOBmpFirstTable; var ASecondTable : TucaOBmpSecondTable ); procedure GenerateBinaryUCA_OBmpTables( ANativeEndianStream, ANonNativeEndianStream : TStream; var AFirstTable : TucaOBmpFirstTable; var ASecondTable : TucaOBmpSecondTable ); procedure Parse_UnicodeData( ADataAStream : TMemoryStream; var APropList : TPropRecArray; var ANumericTable : TNumericValueArray; var ADataLineList : TDataLineRecArray; var ADecomposition : TDecompositionArray; const AHangulList : TCodePointRecArray; const AWhiteSpaces : TCodePointRecArray; const AUnifiedIdeographs : TCodePointRecArray ); procedure MakeDecomposition( const ARawData : TDecompositionArray; var ABook : TDecompositionBook ); procedure MakeBmpTables( var AFirstTable : TBmpFirstTable; var ASecondTable : TBmpSecondTable; const ADataLineList : TDataLineRecArray ); procedure MakeBmpTables3Levels( var AFirstTable : T3lvlBmp1Table; var ASecondTable : T3lvlBmp2Table; var AThirdTable : T3lvlBmp3Table; const ADataLineList : TDataLineRecArray ); procedure GenerateBmpTables( ADest : TStream; var AFirstTable : TBmpFirstTable; var ASecondTable : TBmpSecondTable ); procedure Generate3lvlBmpTables( ADest : TStream; var AFirstTable : T3lvlBmp1Table; var ASecondTable : T3lvlBmp2Table; var AThirdTable : T3lvlBmp3Table ); procedure GeneratePropTable( ADest : TStream; const APropList : TPropRecArray; const AEndian : TEndianKind ); procedure GenerateNumericTable( ADest : TStream; const ANumList : TNumericValueArray; const ACompleteUnit : Boolean ); procedure GenerateDecompositionBookTable( ADest : TStream; const ABook : TDecompositionBook; const AEndian : TEndianKind ); procedure GenerateOutBmpTable( ADest : TStream; const AList : TDataLineRecArray ); function Compress(const AData : TDataLineRecArray) : TDataLineRecArray; function EvaluateFloat(const AStr : string) : Double; function StrToCategory(const AStr : string) : TUnicodeCategory; function StringToCodePoint(ACP : string) : TUnicodeCodePoint; function IsWhiteSpace( const ACodePoint : TUnicodeCodePoint; const AWhiteSpaces : TCodePointRecArray ) : Boolean;inline; function IsIncluded( const ACodePoint : TUnicodeCodePoint; const AList : TCodePointRecArray ) : Boolean; function GetPropID( ACodePoint : TUnicodeCodePoint; const ADataLineList : TDataLineRecArray ) : Cardinal; //-------------------- procedure MakeOBmpTables( var AFirstTable : TOBmpFirstTable; var ASecondTable : TOBmpSecondTable; const ADataLineList : TDataLineRecArray ); procedure MakeOBmpTables3Levels( var AFirstTable : T3lvlOBmp1Table; var ASecondTable : T3lvlOBmp2Table; var AThirdTable : T3lvlOBmp3Table; const ADataLineList : TDataLineRecArray ); procedure GenerateOBmpTables( ADest : TStream; var AFirstTable : TOBmpFirstTable; var ASecondTable : TOBmpSecondTable ); procedure Generate3lvlOBmpTables( ADest : TStream; var AFirstTable : T3lvlOBmp1Table; var ASecondTable : T3lvlOBmp2Table; var AThirdTable : T3lvlOBmp3Table ); function GetProp( const AHighS, ALowS : Word; const AProps : TPropRecArray; var AFirstTable : TOBmpFirstTable; var ASecondTable : TOBmpSecondTable ): PPropRec; inline;overload; function GetProp( const AHighS, ALowS : Word; const AProps : TPropRecArray; var AFirstTable : T3lvlOBmp1Table; var ASecondTable : T3lvlOBmp2Table; var AThirdTable : T3lvlOBmp3Table ): PPropRec; inline;overload; procedure FromUCS4(const AValue : TUnicodeCodePoint; var AHighS, ALowS : Word);inline; function ToUCS4(const AHighS, ALowS : Word) : TUnicodeCodePoint; inline; type TBitOrder = 0..7; function IsBitON(const AData : Byte; const ABit : TBitOrder) : Boolean ;{$IFDEF USE_INLINE}inline;{$ENDIF} procedure SetBit(var AData : Byte; const ABit : TBitOrder; const AValue : Boolean);{$IFDEF USE_INLINE}inline;{$ENDIF} function GenerateEndianIncludeFileName( const AStoreName : string; const AEndian : TEndianKind ): string;inline; procedure ReverseFromNativeEndian( const AData : PUCA_PropItemRec; const ADataLen : Cardinal; const ADest : PUCA_PropItemRec ); procedure ReverseToNativeEndian( const AData : PUCA_PropItemRec; const ADataLen : Cardinal; const ADest : PUCA_PropItemRec ); procedure CompareProps( const AProp1, AProp2 : PUCA_PropItemRec; const ADataLen : Integer ); type TCollationName = array[0..(128-1)] of Byte; TCollationVersion = TCollationName; TSerializedCollationHeader = packed record Base : TCollationName; Version : TCollationVersion; CollationName : TCollationName; CollationAliases : TCollationName; // ";" separated VariableWeight : Byte; Backwards : Byte; BMP_Table1Length : DWord; BMP_Table2Length : DWord; OBMP_Table1Length : DWord; OBMP_Table2Length : DWord; PropCount : DWord; VariableLowLimit : Word; VariableHighLimit : Word; NoNormalization : Byte; Strength : Byte; ChangedFields : Byte; end; PSerializedCollationHeader = ^TSerializedCollationHeader; procedure StringToByteArray(AStr : UnicodeString; var ABuffer : array of Byte);overload; procedure StringToByteArray(AStr : UnicodeString; ABuffer : PByte; const ABufferLength : Integer);overload; procedure ReverseRecordBytes(var AItem : TSerializedCollationHeader); procedure ReverseBytes(var AData; const ALength : Integer); procedure ReverseArray(var AValue; const AArrayLength, AItemSize : PtrInt); function CalcMaxLevel2Value(ALines : array of TUCA_LineRec) : Cardinal; procedure RewriteLevel2Values(ALines : PUCA_LineRec; ALength : Integer); function RewriteLevel2( const ALevel1Value : Cardinal; ALines : PUCA_LineRec; const ALinesLength : Integer ) : Integer; resourcestring SInsufficientMemoryBuffer = 'Insufficient Memory Buffer'; implementation uses typinfo, Math, AVL_Tree, trie; type TCardinalRec = packed record {$ifdef FPC_LITTLE_ENDIAN} byte0, byte1, byte2, byte3 : Byte; {$else FPC_LITTLE_ENDIAN} byte3, byte2, byte1, byte0 : Byte; {$endif FPC_LITTLE_ENDIAN} end; TWordRec = packed record {$ifdef FPC_LITTLE_ENDIAN} byte0, byte1 : Byte; {$else FPC_LITTLE_ENDIAN} byte1, byte0 : Byte; {$endif FPC_LITTLE_ENDIAN} end; { TUInt24Rec } class operator TUInt24Rec.Explicit(a : TUInt24Rec) : Cardinal; begin TCardinalRec(Result).byte0 := a.byte0; TCardinalRec(Result).byte1 := a.byte1; TCardinalRec(Result).byte2 := a.byte2; TCardinalRec(Result).byte3 := 0; end; class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Cardinal; begin TCardinalRec(Result).byte0 := a.byte0; TCardinalRec(Result).byte1 := a.byte1; TCardinalRec(Result).byte2 := a.byte2; TCardinalRec(Result).byte3 := 0; end; class operator TUInt24Rec.Implicit(a : TUInt24Rec) : LongInt; begin Result := Cardinal(a); end; class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Word; begin {$IFOPT R+} if (a.byte2 > 0) then Error(reIntOverflow); {$ENDIF R+} TWordRec(Result).byte0 := a.byte0; TWordRec(Result).byte1 := a.byte1; end; class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Byte; begin {$IFOPT R+} if (a.byte1 > 0) or (a.byte2 > 0) then Error(reIntOverflow); {$ENDIF R+} Result := a.byte0; end; class operator TUInt24Rec.Implicit(a : Cardinal) : TUInt24Rec; begin {$IFOPT R+} if (a > $FFFFFF) then Error(reIntOverflow); {$ENDIF R+} Result.byte0 := TCardinalRec(a).byte0; Result.byte1 := TCardinalRec(a).byte1; Result.byte2 := TCardinalRec(a).byte2; end; class operator TUInt24Rec.Equal(a, b : TUInt24Rec) : Boolean; begin Result := (a.byte0 = b.byte0) and (a.byte1 = b.byte1) and (a.byte2 = b.byte2); end; class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Cardinal) : Boolean; begin Result := (TCardinalRec(b).byte3 = 0) and (a.byte0 = TCardinalRec(b).byte0) and (a.byte1 = TCardinalRec(b).byte1) and (a.byte2 = TCardinalRec(b).byte2); end; class operator TUInt24Rec.Equal(a : Cardinal; b : TUInt24Rec) : Boolean; begin Result := (b = a); end; class operator TUInt24Rec.Equal(a : TUInt24Rec; b : LongInt) : Boolean; begin Result := (LongInt(a) = b); end; class operator TUInt24Rec.Equal(a : LongInt; b : TUInt24Rec) : Boolean; begin Result := (b = a); end; class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Word) : Boolean; begin Result := (a.byte2 = 0) and (a.byte0 = TWordRec(b).byte0) and (a.byte1 = TWordRec(b).byte1); end; class operator TUInt24Rec.Equal(a : Word; b : TUInt24Rec) : Boolean; begin Result := (b = a); end; class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Byte) : Boolean; begin Result := (a.byte2 = 0) and (a.byte1 = 0) and (a.byte0 = b); end; class operator TUInt24Rec.Equal(a : Byte; b : TUInt24Rec) : Boolean; begin Result := (b = a); end; class operator TUInt24Rec.NotEqual(a, b : TUInt24Rec) : Boolean; begin Result := (a.byte0 <> b.byte0) or (a.byte1 <> b.byte1) or (a.byte2 <> b.byte2); end; class operator TUInt24Rec.NotEqual(a : TUInt24Rec; b : Cardinal) : Boolean; begin Result := (TCardinalRec(b).byte3 <> 0) or (a.byte0 <> TCardinalRec(b).byte0) or (a.byte1 <> TCardinalRec(b).byte1) or (a.byte2 <> TCardinalRec(b).byte2); end; class operator TUInt24Rec.NotEqual(a : Cardinal; b : TUInt24Rec) : Boolean; begin Result := (b <> a); end; class operator TUInt24Rec.GreaterThan(a, b: TUInt24Rec): Boolean; begin Result := (a.byte2 > b.byte2) or ((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or ((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 > b.byte0)); end; class operator TUInt24Rec.GreaterThan(a: TUInt24Rec; b: Cardinal): Boolean; begin Result := Cardinal(a) > b; end; class operator TUInt24Rec.GreaterThan(a: Cardinal; b: TUInt24Rec): Boolean; begin Result := a > Cardinal(b); end; class operator TUInt24Rec.GreaterThanOrEqual(a, b: TUInt24Rec): Boolean; begin Result := (a.byte2 > b.byte2) or ((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or ((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 >= b.byte0)); end; class operator TUInt24Rec.GreaterThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean; begin Result := Cardinal(a) >= b; end; class operator TUInt24Rec.GreaterThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean; begin Result := a >= Cardinal(b); end; class operator TUInt24Rec.LessThan(a, b: TUInt24Rec): Boolean; begin Result := (b > a); end; class operator TUInt24Rec.LessThan(a: TUInt24Rec; b: Cardinal): Boolean; begin Result := Cardinal(a) < b; end; class operator TUInt24Rec.LessThan(a: Cardinal; b: TUInt24Rec): Boolean; begin Result := a < Cardinal(b); end; class operator TUInt24Rec.LessThanOrEqual(a, b: TUInt24Rec): Boolean; begin Result := (b >= a); end; class operator TUInt24Rec.LessThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean; begin Result := Cardinal(a) <= b; end; class operator TUInt24Rec.LessThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean; begin Result := a <= Cardinal(b); end; { TUCA_WeightRec } class operator TUCA_WeightRec.Equal(a, b : TUCA_WeightRec) : Boolean; begin Result := (a.Weights[0] = b.Weights[0]) and (a.Weights[1] = b.Weights[1]) and (a.Weights[2] = b.Weights[2]) and (a.Weights[3] = b.Weights[3]) and (a.Variable = b.Variable); end; procedure StringToByteArray(AStr : UnicodeString; var ABuffer : array of Byte); begin StringToByteArray(AStr,@(ABuffer[Low(ABuffer)]),Length(ABuffer)); end; procedure StringToByteArray(AStr : UnicodeString; ABuffer : PByte; const ABufferLength : Integer); var c, i, bl : Integer; ps : PWord; pb : PByte; begin if (ABufferLength < 1) then exit; c := Length(AStr); if (c > ABufferLength) then c := ABufferLength; bl := 0; pb := ABuffer; if (c > 0) then begin ps := PWord(@AStr[1]); for i := 1 to c do begin if (ps^ <= High(Byte)) then begin pb^ := ps^; bl := bl+1; Inc(pb); end; Inc(ps); end; end; if (bl < ABufferLength) then begin for i := bl+1 to ABufferLength do begin pb^:= 0; Inc(pb); end; end; end; function GenerateEndianIncludeFileName( const AStoreName : string; const AEndian : TEndianKind ): string;inline; begin Result := ExtractFilePath(AStoreName) + ChangeFileExt(ExtractFileName(AStoreName),Format('_%s.inc',[ENDIAN_SUFFIX[AEndian]])); end; function IsBitON(const AData : Byte; const ABit : TBitOrder) : Boolean ; begin Result := ( ( AData and ( 1 shl ABit ) ) <> 0 ); end; procedure SetBit(var AData : Byte; const ABit : TBitOrder; const AValue : Boolean); begin if AValue then AData := AData or (1 shl (ABit mod 8)) else AData := AData and ( not ( 1 shl ( ABit mod 8 ) ) ); end; var FS : TFormatSettings; function EvaluateFloat(const AStr : string) : Double; var s, n, d : string; i : Integer; begin Result := 0; s := Trim(AStr); if (Length(s) > 0) then begin i := Pos('/',s); if (i < 1) then Result := StrToFloat(s,FS) else begin n := Copy(s,1,i-1); d := Copy(s,i+1,MaxInt); Result := StrToInt(n) / StrToInt(d); end; end; end; function StrToCategory(const AStr : string) : TUnicodeCategory; var s : string; begin s := UpperCase(Trim(AStr)); if (s = 'LU') then Result := ucUppercaseLetter else if (s = 'LL') then Result := ucLowercaseLetter else if (s = 'LT') then Result := ucTitlecaseLetter else if (s = 'LM') then Result := ucModifierLetter else if (s = 'LO') then Result := ucOtherLetter else if (s = 'MN') then Result := ucNonSpacingMark else if (s = 'MC') then Result := ucCombiningMark else if (s = 'ME') then Result := ucEnclosingMark else if (s = 'ND') then Result := ucDecimalNumber else if (s = 'NL') then Result := ucLetterNumber else if (s = 'NO') then Result := ucOtherNumber else if (s = 'PC') then Result := ucConnectPunctuation else if (s = 'PD') then Result := ucDashPunctuation else if (s = 'PS') then Result := ucOpenPunctuation else if (s = 'PE') then Result := ucClosePunctuation else if (s = 'PI') then Result := ucInitialPunctuation else if (s = 'PF') then Result := ucFinalPunctuation else if (s = 'PO') then Result := ucOtherPunctuation else if (s = 'SM') then Result := ucMathSymbol else if (s = 'SC') then Result := ucCurrencySymbol else if (s = 'SK') then Result := ucModifierSymbol else if (s = 'SO') then Result := ucOtherSymbol else if (s = 'ZS') then Result := ucSpaceSeparator else if (s = 'ZL') then Result := ucLineSeparator else if (s = 'ZP') then Result := ucParagraphSeparator else if (s = 'CC') then Result := ucControl else if (s = 'CF') then Result := ucFormat else if (s = 'CS') then Result := ucSurrogate else if (s = 'CO') then Result := ucPrivateUse else Result := ucUnassigned; end; function StringToCodePoint(ACP : string) : TUnicodeCodePoint; var s : string; begin s := Trim(ACP); Result := 0; if (Length(s) > 0) and (s <> '#') then Result := StrToInt('$' + s); end; function IsIncluded( const ACodePoint : TUnicodeCodePoint; const AList : TCodePointRecArray ) : Boolean; var i : Integer; p : ^TCodePointRec; begin Result := False; p := @AList[Low(AList)]; for i := Low(AList) to High(AList) do begin if (p^.LineType = 0) then begin if (p^.CodePoint = ACodePoint) then begin Result := True; break; end; end else begin if (ACodePoint >= p^.StartCodePoint) and (ACodePoint <= p^.EndCodePoint) then begin Result := True; break; end; end; Inc(p); end; end; {function IsWhiteSpace(const ACodePoint : TUnicodeCodePoint) : Boolean; begin case ACodePoint of $0009..$000D : Result := True;// White_Space # Cc [5] .. $0020 : Result := True;// White_Space # Zs SPACE $0085 : Result := True;// White_Space # Cc $00A0 : Result := True;// White_Space # Zs NO-BREAK SPACE $1680 : Result := True;// White_Space # Zs OGHAM SPACE MARK $180E : Result := True;// White_Space # Zs MONGOLIAN VOWEL SEPARATOR $2000..$200A : Result := True;// White_Space # Zs [11] EN QUAD..HAIR SPACE $2028 : Result := True;// White_Space # Zl LINE SEPARATOR $2029 : Result := True;// White_Space # Zp PARAGRAPH SEPARATOR $202F : Result := True;// White_Space # Zs NARROW NO-BREAK SPACE $205F : Result := True;// White_Space # Zs MEDIUM MATHEMATICAL SPACE $3000 : Result := True;// White_Space # Zs IDEOGRAPHIC SPACE else Result := False; end; end;} function IsWhiteSpace( const ACodePoint : TUnicodeCodePoint; const AWhiteSpaces : TCodePointRecArray ) : Boolean; begin Result := IsIncluded(ACodePoint,AWhiteSpaces); end; function NormalizeBlockName(const AName : string) : string; var i, c, k : Integer; s : string; begin c := Length(AName); SetLength(Result,c); s := LowerCase(AName); k := 0; for i := 1 to c do begin if (s[1] in ['a'..'z','0'..'9','-']) then begin k := k + 1; Result[k] := s[i]; end; end; SetLength(Result,k); end; procedure ParseBlokcs( ADataAStream : TMemoryStream; var ABlocks : TBlocks ); const LINE_LENGTH = 1024; DATA_LENGTH = 25000; var p : PAnsiChar; actualDataLen : Integer; bufferLength, bufferPos, lineLength, linePos : Integer; line : ansistring; function NextLine() : Boolean; var locOldPos : Integer; locOldPointer : PAnsiChar; begin Result := False; locOldPointer := p; locOldPos := bufferPos; while (bufferPos < bufferLength) and (p^ <> #10) do begin Inc(p); Inc(bufferPos); end; if (locOldPos = bufferPos) and (p^ = #10) then begin lineLength := 0; Inc(p); Inc(bufferPos); linePos := 1; Result := True; end else if (locOldPos < bufferPos) then begin lineLength := (bufferPos - locOldPos); Move(locOldPointer^,line[1],lineLength); if (p^ = #10) then begin Dec(lineLength); Inc(p); Inc(bufferPos); end; linePos := 1; Result := True; end; end; function NextToken() : ansistring; var k : Integer; begin k := linePos; if (linePos < lineLength) and (line[linePos] in [';','#','.']) then begin Inc(linePos); Result := Copy(line,k,(linePos-k)); exit; end; while (linePos < lineLength) and not(line[linePos] in [';','#','.']) do Inc(linePos); if (linePos > k) then begin if (line[linePos] in [';','#','.']) then Result := Copy(line,k,(linePos-k)) else Result := Copy(line,k,(linePos-k+1)); Result := Trim(Result); end else begin Result := ''; end; end; procedure ParseLine(); var locData : TBlockItemRec; s : ansistring; begin s := NextToken(); if (s = '') or (s[1] = '#') then exit; locData.RangeStart := StrToInt('$'+s); s := NextToken(); if (s <> '.') then raise Exception.CreateFmt('"." expected but "%s" found.',[s]); s := NextToken(); if (s <> '.') then raise Exception.CreateFmt('"." expected but "%s" found.',[s]); s := NextToken(); locData.RangeEnd := StrToInt('$'+s); s := NextToken(); if (s <> ';') then raise Exception.CreateFmt('";" expected but "%s" found.',[s]); locData.Name := Trim(NextToken()); locData.CanonicalName := NormalizeBlockName(locData.Name); if (Length(ABlocks) <= actualDataLen) then SetLength(ABlocks,Length(ABlocks)*2); ABlocks[actualDataLen] := locData; Inc(actualDataLen); end; procedure Prepare(); begin SetLength(ABlocks,DATA_LENGTH); actualDataLen := 0; bufferLength := ADataAStream.Size; bufferPos := 0; p := ADataAStream.Memory; lineLength := 0; SetLength(line,LINE_LENGTH); end; begin Prepare(); while NextLine() do ParseLine(); SetLength(ABlocks,actualDataLen); end; procedure ParseProps( ADataAStream : TMemoryStream; var APropList : TPropListLineRecArray ); const LINE_LENGTH = 1024; DATA_LENGTH = 25000; var p : PAnsiChar; actualDataLen : Integer; bufferLength, bufferPos, lineLength, linePos : Integer; line : ansistring; function NextLine() : Boolean; var locOldPos : Integer; locOldPointer : PAnsiChar; begin Result := False; locOldPointer := p; locOldPos := bufferPos; while (bufferPos < bufferLength) and (p^ <> #10) do begin Inc(p); Inc(bufferPos); end; if (locOldPos = bufferPos) and (p^ = #10) then begin lineLength := 0; Inc(p); Inc(bufferPos); linePos := 1; Result := True; end else if (locOldPos < bufferPos) then begin lineLength := (bufferPos - locOldPos); Move(locOldPointer^,line[1],lineLength); if (p^ = #10) then begin Dec(lineLength); Inc(p); Inc(bufferPos); end; linePos := 1; Result := True; end; end; function NextToken() : ansistring; var k : Integer; begin k := linePos; if (linePos < lineLength) and (line[linePos] in [';','#','.']) then begin Inc(linePos); Result := Copy(line,k,(linePos-k)); exit; end; while (linePos < lineLength) and not(line[linePos] in [';','#','.']) do Inc(linePos); if (linePos > k) then begin if (line[linePos] in [';','#','.']) then Result := Copy(line,k,(linePos-k)) else Result := Copy(line,k,(linePos-k+1)); Result := Trim(Result); end else begin Result := ''; end; end; procedure ParseLine(); var locCP : Cardinal; locData : TPropListLineRec; s : ansistring; begin s := NextToken(); if (s = '') or (s[1] = '#') then exit; locCP := StrToInt('$'+s); s := NextToken(); if (s = ';') then begin locData.CodePoint.LineType := 0; locData.CodePoint.CodePoint := locCP; end else begin if (s = '') or (s <> '.') or (NextToken() <> '.') then raise Exception.CreateFmt('Invalid line : "%s".',[Copy(line,1,lineLength)]); locData.CodePoint.LineType := 1; locData.CodePoint.StartCodePoint := locCP; locData.CodePoint.EndCodePoint := StrToInt('$'+NextToken()); s := NextToken(); if (s <> ';') then raise Exception.CreateFmt('"." expected but "%s" found.',[s]); end; locData.PropName := Trim(NextToken()); if (Length(APropList) <= actualDataLen) then SetLength(APropList,Length(APropList)*2); APropList[actualDataLen] := locData; Inc(actualDataLen); end; procedure Prepare(); begin SetLength(APropList,DATA_LENGTH); actualDataLen := 0; bufferLength := ADataAStream.Size; bufferPos := 0; p := ADataAStream.Memory; lineLength := 0; SetLength(line,LINE_LENGTH); end; begin Prepare(); while NextLine() do ParseLine(); SetLength(APropList,actualDataLen); end; function FindCodePointsByProperty( const APropName : string; const APropList : TPropListLineRecArray ) : TCodePointRecArray; var r : TCodePointRecArray; i, k : Integer; s : string; begin k := 0; r := nil; s := LowerCase(Trim(APropName)); for i := Low(APropList) to High(APropList) do begin if (LowerCase(APropList[i].PropName) = s) then begin if (k >= Length(r)) then begin if (k = 0) then SetLength(r,24) else SetLength(r,(2*Length(r))); end; r[k] := APropList[i].CodePoint; Inc(k); end; end; SetLength(r,k); Result := r; end; procedure ParseHangulSyllableTypes( ADataAStream : TMemoryStream; var ACodePointList : TCodePointRecArray ); const LINE_LENGTH = 1024; DATA_LENGTH = 25000; var p : PAnsiChar; actualDataLen : Integer; bufferLength, bufferPos, lineLength, linePos : Integer; line : ansistring; function NextLine() : Boolean; var locOldPos : Integer; locOldPointer : PAnsiChar; begin Result := False; locOldPointer := p; locOldPos := bufferPos; while (bufferPos < bufferLength) and (p^ <> #10) do begin Inc(p); Inc(bufferPos); end; if (locOldPos = bufferPos) and (p^ = #10) then begin lineLength := 0; Inc(p); Inc(bufferPos); linePos := 1; Result := True; end else if (locOldPos < bufferPos) then begin lineLength := (bufferPos - locOldPos); Move(locOldPointer^,line[1],lineLength); if (p^ = #10) then begin Dec(lineLength); Inc(p); Inc(bufferPos); end; linePos := 1; Result := True; end; end; function NextToken() : ansistring; var k : Integer; begin k := linePos; if (linePos < lineLength) and (line[linePos] = '.') then begin Inc(linePos); while (linePos < lineLength) and (line[linePos] = '.') do begin Inc(linePos); end; Result := Copy(line,k,(linePos-k)); exit; end; while (linePos < lineLength) and not(line[linePos] in [';','#','.']) do Inc(linePos); if (linePos > k) then begin if (line[linePos] in [';','#','.']) then Result := Copy(line,k,(linePos-k)) else Result := Copy(line,k,(linePos-k+1)); Result := Trim(Result); end else begin Result := ''; end; //Inc(linePos); end; procedure ParseLine(); var locData : TCodePointRec; s : ansistring; begin s := NextToken(); if (s = '') or (s[1] = '#') then exit; locData.CodePoint := StrToInt('$'+s); s := NextToken(); if (s = '') or (s[1] in [';','#']) then begin locData.LineType := 0; end else begin if (s <> '..') then raise Exception.CreateFmt('Unknown line type : "%s"',[Copy(line,1,lineLength)]); locData.StartCodePoint := locData.CodePoint; locData.EndCodePoint := StrToInt('$'+NextToken()); locData.LineType := 1; end; if (Length(ACodePointList) <= actualDataLen) then SetLength(ACodePointList,Length(ACodePointList)*2); ACodePointList[actualDataLen] := locData; Inc(actualDataLen); end; procedure Prepare(); begin SetLength(ACodePointList,DATA_LENGTH); actualDataLen := 0; bufferLength := ADataAStream.Size; bufferPos := 0; p := ADataAStream.Memory; lineLength := 0; SetLength(line,LINE_LENGTH); end; begin Prepare(); while NextLine() do ParseLine(); SetLength(ACodePointList,actualDataLen); end; function IsHangulSyllable( const ACodePoint : TUnicodeCodePoint; const AHangulList : TCodePointRecArray ) : Boolean; var i : Integer; p : ^TCodePointRec; begin Result := False; p := @AHangulList[Low(AHangulList)]; for i := Low(AHangulList) to High(AHangulList) do begin if ( (p^.LineType = 0) and (ACodePoint = p^.CodePoint) ) or ( (p^.LineType = 1) and (ACodePoint >= p^.StartCodePoint) and (ACodePoint <= p^.EndCodePoint) ) then begin Result := True; Break; end; Inc(p); end; end; function IndexOf( const AProp : TPropRec; const APropList : TPropRecArray; const AActualLen : Integer ) : Integer;overload; var i : Integer; p : PPropRec; begin Result := -1; if (AActualLen > 0) then begin p := @APropList[0]; for i := 0 to AActualLen - 1 do begin if (AProp.Category = p^.Category) and (AProp.CCC = p^.CCC) and (AProp.NumericIndex = p^.NumericIndex) and (AProp.SimpleUpperCase = p^.SimpleUpperCase) and (AProp.SimpleLowerCase = p^.SimpleLowerCase) and (AProp.WhiteSpace = p^.WhiteSpace) and (AProp.UnifiedIdeograph = p^.UnifiedIdeograph) and // (AProp.DecompositionID = p^.DecompositionID) and (* ( (AProp.DecompositionID = -1 ) and (p^.DecompositionID = -1) ) or ( (AProp.DecompositionID <> -1 ) and (p^.DecompositionID <> -1) ) *) (AProp.HangulSyllable = p^.HangulSyllable) then begin Result := i; Break; end; Inc(p); end; end; end; function IndexOf( const AItem : TUnicodeCodePointArray; const AList : TDecompositionArray ) : Integer;overload; var p : TUnicodeCodePointArray; i : Integer; begin Result := -1; if (Length(AList) = 0) then exit; for i := Low(AList) to High(AList) do begin p := AList[i]; if (Length(p) = Length(AItem)) then begin if CompareMem(@p[0],@AItem[0],Length(AItem)*SizeOf(TUnicodeCodePoint)) then exit(i); end; end; Result := -1; end; function IndexOf( const AItem : TNumericValue; const AList : TNumericValueArray; const AActualLen : Integer ) : Integer;overload; var p : ^TNumericValue; i : Integer; begin Result := -1; if (AActualLen = 0) then exit; p := @AList[Low(AList)]; for i := Low(AList) to AActualLen - 1 do begin if (AItem = p^) then exit(i); Inc(p); end; Result := -1; end; procedure Parse_UnicodeData( ADataAStream : TMemoryStream; var APropList : TPropRecArray; var ANumericTable : TNumericValueArray; var ADataLineList : TDataLineRecArray; var ADecomposition : TDecompositionArray; const AHangulList : TCodePointRecArray; const AWhiteSpaces : TCodePointRecArray; const AUnifiedIdeographs : TCodePointRecArray ); const LINE_LENGTH = 1024; PROP_LENGTH = 5000; DATA_LENGTH = 25000; var p : PAnsiChar; bufferLength, bufferPos : Integer; actualPropLen, actualDataLen, actualNumLen : Integer; line : ansistring; lineLength, linePos : Integer; function NextLine() : Boolean; var locOldPos : Integer; locOldPointer : PAnsiChar; begin Result := False; locOldPointer := p; locOldPos := bufferPos; while (bufferPos < bufferLength) and (p^ <> #10) do begin Inc(p); Inc(bufferPos); end; if (locOldPos < bufferPos) then begin lineLength := (bufferPos - locOldPos); Move(locOldPointer^,line[1],lineLength); if (p^ = #10) then begin Dec(lineLength); Inc(p); Inc(bufferPos); end; if (lineLength > 7) then begin linePos := 1; Result := True; end; end; end; function NextToken() : ansistring; var k : Integer; begin k := linePos; while (linePos < lineLength) and not(line[linePos] in [';','#']) do Inc(linePos); if (linePos > k) then begin if (line[linePos] in [';','#']) then Result := Copy(line,k,(linePos-k)) else Result := Copy(line,k,(linePos-k+1)); Result := Trim(Result); end else begin Result := ''; end; Inc(linePos); end; function ParseCanonicalDecomposition(AStr : ansistring) : TUnicodeCodePointArray; var locStr, ks : ansistring; k0,k : Integer; begin SetLength(Result,0); locStr := UpperCase(Trim(AStr)); if (locStr = '') or (locStr[1] = '<') then exit; k0 := 1; k := 1; while (k <= Length(locStr)) do begin while (k <= Length(locStr)) and (locStr[k] in ['0'..'9','A'..'F']) do inc(k); ks := Trim(Copy(locStr,k0,k-k0)); SetLength(Result,Length(Result)+1); Result[Length(Result)-1] := StringToCodePoint(ks); Inc(k); k0 := k; end; end; procedure ParseLine(); var locCP : TUnicodeCodePoint; locProp : TPropRec; locData : TDataLineRec; s : ansistring; locRangeStart, locRangeEnd : Boolean; k : Integer; locDecompItem : TUnicodeCodePointArray; numVal : TNumericValue; begin FillChar(locData,SizeOf(locData),#0); FillChar(locProp,SizeOf(locProp),#0); locCP := StrToInt('$'+NextToken()); s := NextToken(); locRangeStart := AnsiEndsText(', First>',s); if locRangeStart then locRangeEnd := False else locRangeEnd := AnsiEndsText(', Last>',s); if locRangeStart then begin locData.LineType := 1; locData.StartCodePoint := locCP; end else if locRangeEnd then begin ADataLineList[actualDataLen - 1].EndCodePoint := locCP; exit; //locData.EndCodePoint := locCP; end else begin locData.LineType := 0; locData.CodePoint := locCP; end; locProp.Category := StrToCategory(NextToken()); locProp.CCC := StrToInt(NextToken());//Canonical_Combining_Class NextToken();//Bidi_Class s := NextToken();//Decomposition_Type locDecompItem := ParseCanonicalDecomposition(s); if (Length(locDecompItem) = 0) then locProp.DecompositionID := -1 else begin locProp.DecompositionID := IndexOf(locDecompItem,ADecomposition); if (locProp.DecompositionID = -1) then begin k := Length(ADecomposition); locProp.DecompositionID := k; SetLength(ADecomposition,k+1); ADecomposition[k] := locDecompItem; end; end; numVal := EvaluateFloat(NextToken()); if (numVal <> Double(0.0)) then begin NextToken(); NextToken(); end else begin s := NextToken(); if (s <> '') then numVal := EvaluateFloat(s); s := NextToken(); if (numVal = Double(0.0)) then numVal := EvaluateFloat(s); end; k := IndexOf(numVal,ANumericTable,actualNumLen); if (k = -1) then begin if (actualNumLen >= Length(ANumericTable)) then SetLength(ANumericTable,(actualNumLen*2)); ANumericTable[actualNumLen] := numVal; k := actualNumLen; Inc(actualNumLen); end; locProp.NumericIndex := k; NextToken();//Bidi_Mirroed NextToken();//Unicode_l_Name NextToken();//ISO_Comment locProp.SimpleUpperCase := StringToCodePoint(NextToken()); locProp.SimpleLowerCase := StringToCodePoint(NextToken()); NextToken();//Simple_Title_Case_Mapping locProp.WhiteSpace := IsWhiteSpace(locCP,AWhiteSpaces); locProp.HangulSyllable := IsHangulSyllable(locCP,AHangulList); locProp.UnifiedIdeograph := IsIncluded(locCP,AUnifiedIdeographs); k := IndexOf(locProp,APropList,actualPropLen); if (k = -1) then begin k := actualPropLen; locProp.PropID := k{ + 1}; APropList[k] := locProp; Inc(actualPropLen); end; locData.PropID := k; if (actualDataLen >= Length(ADataLineList)) then SetLength(ADataLineList,(2*Length(ADataLineList))); ADataLineList[actualDataLen] := locData; Inc(actualDataLen); end; procedure Prepare(); var r : TPropRec; begin SetLength(APropList,PROP_LENGTH); actualPropLen := 0; SetLength(ADataLineList,DATA_LENGTH); actualDataLen := 0; bufferLength := ADataAStream.Size; bufferPos := 0; p := ADataAStream.Memory; lineLength := 0; SetLength(line,LINE_LENGTH); SetLength(ANumericTable,500); actualNumLen := 0; FillChar(r,SizeOf(r),#0); r.PropID := 0; r.Category := ucUnassigned; r.DecompositionID := -1; r.NumericIndex := 0; APropList[0] := r; Inc(actualPropLen); ANumericTable[0] := 0; Inc(actualNumLen); end; begin Prepare(); while NextLine() do ParseLine(); SetLength(APropList,actualPropLen); SetLength(ADataLineList,actualDataLen); SetLength(ANumericTable,actualNumLen); end; function GetPropID( ACodePoint : TUnicodeCodePoint; const ADataLineList : TDataLineRecArray ) : Cardinal; var i : Integer; p : PDataLineRec; begin Result := 0; p := @ADataLineList[Low(ADataLineList)]; for i := Low(ADataLineList) to High(ADataLineList) do begin if (p^.LineType = 0) then begin if (p^.CodePoint = ACodePoint) then begin Result := p^.PropID; Break; end; end else begin if (p^.StartCodePoint <= ACodePoint) and (p^.EndCodePoint >= ACodePoint) then begin Result := p^.PropID; Break; end; end; Inc(p); end; end; procedure MakeDecomposition( const ARawData : TDecompositionArray; var ABook : TDecompositionBook ); var i, c, locPos : Integer; locItem : TUnicodeCodePointArray; begin c := 0; for i := Low(ARawData) to High(ARawData) do c := c + Length(ARawData[i]); SetLength(ABook.CodePoints,c); SetLength(ABook.Index,Length(ARawData)); locPos := 0; for i := Low(ARawData) to High(ARawData) do begin locItem := ARawData[i]; ABook.Index[i].StartPosition := locPos; ABook.Index[i].Length := Length(locItem); Move(locItem[0],ABook.CodePoints[locPos],(Length(locItem) * SizeOf(TUnicodeCodePoint))); locPos := locPos + Length(locItem); end; end; type PBmpSecondTableItem = ^TBmpSecondTableItem; function IndexOf( const AItem : PBmpSecondTableItem; const ATable : TBmpSecondTable; const ATableActualLength : Integer ) : Integer;overload; var i : Integer; p : PBmpSecondTableItem; begin Result := -1; if (ATableActualLength > 0) then begin p := @ATable[0]; for i := 0 to ATableActualLength - 1 do begin if CompareMem(p,AItem,SizeOf(TBmpSecondTableItem)) then begin Result := i; Break; end; Inc(p); end; end; end; procedure MakeBmpTables( var AFirstTable : TBmpFirstTable; var ASecondTable : TBmpSecondTable; const ADataLineList : TDataLineRecArray ); var locLowByte, locHighByte : Byte; locTableItem : TBmpSecondTableItem; locCP : TUnicodeCodePoint; i, locSecondActualLen : Integer; begin SetLength(ASecondTable,120); locSecondActualLen := 0; for locHighByte := 0 to 255 do begin FillChar(locTableItem,SizeOf(locTableItem),#0); for locLowByte := 0 to 255 do begin locCP := (locHighByte * 256) + locLowByte; locTableItem[locLowByte] := GetPropID(locCP,ADataLineList)// - 1; end; i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen); if (i = -1) then begin if (locSecondActualLen = Length(ASecondTable)) then SetLength(ASecondTable,locSecondActualLen + 50); i := locSecondActualLen; ASecondTable[i] := locTableItem; Inc(locSecondActualLen); end; AFirstTable[locHighByte] := i; end; SetLength(ASecondTable,locSecondActualLen); end; type P3lvlBmp3TableItem = ^T3lvlBmp3TableItem; function IndexOf( const AItem : P3lvlBmp3TableItem; const ATable : T3lvlBmp3Table; const ATableActualLength : Integer ) : Integer;overload; var i : Integer; p : P3lvlBmp3TableItem; begin Result := -1; if (ATableActualLength > 0) then begin p := @ATable[0]; for i := 0 to ATableActualLength - 1 do begin if CompareMem(p,AItem,SizeOf(T3lvlBmp3TableItem)) then begin Result := i; Break; end; Inc(p); end; end; end; type P3lvlBmp2TableItem = ^T3lvlBmp2TableItem; function IndexOf( const AItem : P3lvlBmp2TableItem; const ATable : T3lvlBmp2Table ) : Integer;overload; var i : Integer; p : P3lvlBmp2TableItem; begin Result := -1; if (Length(ATable) > 0) then begin p := @ATable[0]; for i := 0 to Length(ATable) - 1 do begin if CompareMem(p,AItem,SizeOf(T3lvlBmp2TableItem)) then begin Result := i; Break; end; Inc(p); end; end; end; procedure MakeBmpTables3Levels( var AFirstTable : T3lvlBmp1Table; var ASecondTable : T3lvlBmp2Table; var AThirdTable : T3lvlBmp3Table; const ADataLineList : TDataLineRecArray ); var locLowByte0, locLowByte1, locHighByte : Byte; locTableItem2 : T3lvlBmp2TableItem; locTableItem3 : T3lvlBmp3TableItem; locCP : TUnicodeCodePoint; i, locThirdActualLen : Integer; begin SetLength(AThirdTable,120); locThirdActualLen := 0; for locHighByte := 0 to 255 do begin FillChar(locTableItem2,SizeOf(locTableItem2),#0); for locLowByte0 := 0 to 15 do begin FillChar(locTableItem3,SizeOf(locTableItem3),#0); for locLowByte1 := 0 to 15 do begin locCP := (locHighByte * 256) + (locLowByte0*16) + locLowByte1; locTableItem3[locLowByte1] := GetPropID(locCP,ADataLineList); end; i := IndexOf(@locTableItem3,AThirdTable,locThirdActualLen); if (i = -1) then begin if (locThirdActualLen = Length(AThirdTable)) then SetLength(AThirdTable,locThirdActualLen + 50); i := locThirdActualLen; AThirdTable[i] := locTableItem3; Inc(locThirdActualLen); end; locTableItem2[locLowByte0] := i; end; i := IndexOf(@locTableItem2,ASecondTable); if (i = -1) then begin i := Length(ASecondTable); SetLength(ASecondTable,(i + 1)); ASecondTable[i] := locTableItem2; end; AFirstTable[locHighByte] := i; end; SetLength(AThirdTable,locThirdActualLen); end; procedure GenerateLicenceText(ADest : TStream); var s : ansistring; begin s := SLicenseText + sLineBreak + sLineBreak; ADest.Write(s[1],Length(s)); end; procedure GenerateBmpTables( ADest : TStream; var AFirstTable : TBmpFirstTable; var ASecondTable : TBmpSecondTable ); procedure AddLine(const ALine : ansistring); var buffer : ansistring; begin buffer := ALine + sLineBreak; ADest.Write(buffer[1],Length(buffer)); end; var i, j, c : Integer; locLine : string; begin AddLine('const'); AddLine(' UC_TABLE_1 : array[0..255] of Byte = ('); locLine := ''; for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin locLine := locLine + IntToStr(AFirstTable[i]) + ','; if (((i+1) mod 16) = 0) then begin locLine := ' ' + locLine; AddLine(locLine); locLine := ''; end; end; locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]); locLine := ' ' + locLine; AddLine(locLine); AddLine(' );' + sLineBreak); AddLine(' UC_TABLE_2 : array[0..(256*' + IntToStr(Length(ASecondTable)) +'-1)] of Word =('); c := High(ASecondTable); for i := Low(ASecondTable) to c do begin locLine := ''; for j := Low(TBmpSecondTableItem) to High(TBmpSecondTableItem) do begin locLine := locLine + IntToStr(ASecondTable[i][j]) + ','; if (((j+1) mod 16) = 0) then begin if (i = c) and (j = 255) then Delete(locLine,Length(locLine),1); locLine := ' ' + locLine; AddLine(locLine); locLine := ''; end; end; end; AddLine(' );' + sLineBreak); end; //---------------------------------- procedure Generate3lvlBmpTables( ADest : TStream; var AFirstTable : T3lvlBmp1Table; var ASecondTable : T3lvlBmp2Table; var AThirdTable : T3lvlBmp3Table ); procedure AddLine(const ALine : ansistring); var buffer : ansistring; begin buffer := ALine + sLineBreak; ADest.Write(buffer[1],Length(buffer)); end; var i, j, c : Integer; locLine : string; begin AddLine('const'); AddLine(' UC_TABLE_1 : array[0..255] of Byte = ('); locLine := ''; for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin locLine := locLine + IntToStr(AFirstTable[i]) + ','; if (((i+1) mod 16) = 0) then begin locLine := ' ' + locLine; AddLine(locLine); locLine := ''; end; end; locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]); locLine := ' ' + locLine; AddLine(locLine); AddLine(' );' + sLineBreak); AddLine(' UC_TABLE_2 : array[0..' + IntToStr(Length(ASecondTable)-1) +'] of array[0..15] of Word = ('); c := High(ASecondTable); for i := Low(ASecondTable) to c do begin locLine := '('; for j := Low(T3lvlBmp2TableItem) to High(T3lvlBmp2TableItem) do locLine := locLine + IntToStr(ASecondTable[i][j]) + ','; Delete(locLine,Length(locLine),1); locLine := ' ' + locLine + ')'; if (i < c) then locLine := locLine + ','; AddLine(locLine); end; AddLine(' );' + sLineBreak); AddLine(' UC_TABLE_3 : array[0..' + IntToStr(Length(AThirdTable)-1) +'] of array[0..15] of Word = ('); c := High(AThirdTable); for i := Low(AThirdTable) to c do begin locLine := '('; for j := Low(T3lvlBmp3TableItem) to High(T3lvlBmp3TableItem) do locLine := locLine + IntToStr(AThirdTable[i][j]) + ','; Delete(locLine,Length(locLine),1); locLine := ' ' + locLine + ')'; if (i < c) then locLine := locLine + ','; AddLine(locLine); end; AddLine(' );' + sLineBreak); end; function UInt24ToStr(const AValue : UInt24; const AEndian : TEndianKind): string;inline; begin if (AEndian = ekBig) then Result := Format( '(c:$%s;b:$%s;a:$%s;)', [ IntToHex(AValue.byte2,1), IntToHex(AValue.byte1,1), IntToHex(AValue.byte0,1) ] ) else Result := Format( '(a:$%s;b:$%s;c:$%s;)', [ IntToHex(AValue.byte0,1), IntToHex(AValue.byte1,1), IntToHex(AValue.byte2,1) ] ); end; procedure GeneratePropTable( ADest : TStream; const APropList : TPropRecArray; const AEndian : TEndianKind ); procedure AddLine(const ALine : ansistring); var buffer : ansistring; begin buffer := ALine + sLineBreak; ADest.Write(buffer[1],Length(buffer)); end; var i : Integer; locLine : string; p : PPropRec; begin AddLine(''); AddLine('const'); AddLine(' UC_PROP_REC_COUNT = ' + IntToStr(Length(APropList)) + ';'); AddLine(' UC_PROP_ARRAY : array[0..(UC_PROP_REC_COUNT-1)] of TUC_Prop = ('); locLine := ''; p := @APropList[0]; for i := Low(APropList) to High(APropList) do begin //locLine := ' (CD:' + IntToStr(p^.CategoryData) + ';' + locLine := locLine + '(C:' + IntToStr(p^.CategoryData) + ';' + 'C3:' + IntToStr(p^.CCC) + ';' + 'N:' + IntToStr(p^.NumericIndex) + ';' + 'UC:' + UInt24ToStr(p^.SimpleUpperCase,AEndian) + ';' + 'LC:' + UInt24ToStr(p^.SimpleLowerCase,AEndian) + ';' + 'D:' + IntToStr(p^.DecompositionID) + ')'; if (i < High(APropList)) then locLine := locLine + ','; if (((i+1) mod 2) = 0) then begin locLine := ' ' + locLine; AddLine(locLine); locLine := ''; end; Inc(p); end; if (locLine <> '') then AddLine( ' ' + locLine); AddLine(' );' + sLineBreak); end; procedure GenerateNumericTable( ADest : TStream; const ANumList : TNumericValueArray; const ACompleteUnit : Boolean ); procedure AddLine(const ALine : ansistring); var buffer : ansistring; begin buffer := ALine + sLineBreak; ADest.Write(buffer[1],Length(buffer)); end; var i : Integer; locLine : string; p : ^TNumericValue; begin if ACompleteUnit then begin GenerateLicenceText(ADest); AddLine('unit unicodenumtable;'); AddLine('interface'); AddLine(''); end; AddLine(''); AddLine('const'); AddLine(' UC_NUMERIC_COUNT = ' + IntToStr(Length(ANumList)) + ';'); AddLine(' UC_NUMERIC_ARRAY : array[0..(UC_NUMERIC_COUNT-1)] of Double = ('); locLine := ''; p := @ANumList[0]; for i := Low(ANumList) to High(ANumList) - 1 do begin locLine := locLine + FloatToStr(p^,FS) + ' ,'; if (i > 0) and ((i mod 8) = 0) then begin AddLine(' ' + locLine); locLine := ''; end; Inc(p); end; locLine := locLine + FloatToStr(p^,FS); AddLine(' ' + locLine); AddLine(' );' + sLineBreak); if ACompleteUnit then begin AddLine(''); AddLine('implementation'); AddLine(''); AddLine('end.'); end; end; procedure GenerateDecompositionBookTable( ADest : TStream; const ABook : TDecompositionBook; const AEndian : TEndianKind ); procedure AddLine(const ALine : ansistring); var buffer : ansistring; begin buffer := ALine + sLineBreak; ADest.Write(buffer[1],Length(buffer)); end; var i, k : Integer; p : ^TDecompositionIndexRec; cp : ^TUnicodeCodePoint; cp24 : UInt24; locLine : string; begin AddLine('const'); AddLine(' UC_DEC_BOOK_INDEX_LENGTH = ' + IntToStr(Length(ABook.Index)) + ';'); AddLine(' UC_DEC_BOOK_DATA_LENGTH = ' + IntToStr(Length(ABook.CodePoints)) + ';'); AddLine('type'); AddLine(' TDecompositionIndexRec = packed record'); AddLine(' S : Word; //StartPosition'); AddLine(' L : Byte; //Length'); AddLine(' end;'); AddLine(' TDecompositionBookRec = packed record'); AddLine(' Index : array[0..(UC_DEC_BOOK_INDEX_LENGTH-1)] of TDecompositionIndexRec;'); AddLine(' CodePoints : array[0..(UC_DEC_BOOK_DATA_LENGTH-1)] of UInt24;'); AddLine(' end;'); AddLine('const'); AddLine(' UC_DEC_BOOK_DATA : TDecompositionBookRec = ('); p := @ABook.Index[0]; AddLine(' Index : (// Index BEGIN'); k := 0; locLine := ' '; for i := Low(ABook.Index) to High(ABook.Index) - 1 do begin locLine := locLine + '(S:' + IntToStr(p^.StartPosition) + ';' + 'L:' + IntToStr(p^.Length) + '),'; k := k + 1; if (k >= 9) then begin AddLine(locLine); locLine := ' '; k := 0; end; Inc(p); end; locLine := locLine + '(S:' + IntToStr(p^.StartPosition) + ';' + 'L:' + IntToStr(p^.Length) + ')'; AddLine(locLine); AddLine(' ); // Index END'); cp := @ABook.CodePoints[0]; AddLine(' CodePoints : (// CodePoints BEGIN'); k := 0; locLine := ' '; for i := Low(ABook.CodePoints) to High(ABook.CodePoints) - 1 do begin cp24 := cp^; locLine := locLine + Format('%s,',[UInt24ToStr(cp24,AEndian)]); Inc(k); if (k >= 16) then begin AddLine(locLine); k := 0; locLine := ' '; end; Inc(cp); end; cp24 := cp^; locLine := locLine + Format('%s',[UInt24ToStr(cp24,AEndian)]); AddLine(locLine); AddLine(' ); // CodePoints END'); AddLine(' );' + sLineBreak); end; procedure GenerateOutBmpTable( ADest : TStream; const AList : TDataLineRecArray ); procedure AddLine(const ALine : ansistring); var buffer : ansistring; begin buffer := ALine + sLineBreak; ADest.Write(buffer[1],Length(buffer)); end; var i, j : Integer; locLine : string; p : PDataLineRec; begin AddLine(''); //AddLine(' UC_PROP_REC_COUNT = ' + IntToStr(Length(APropList)) + ';'); //AddLine(' UC_PROP_ARRAY : array[0..(UC_PROP_REC_COUNT-1)] of TUC_Prop = ('); j := -1; p := @AList[0]; for i := 0 to Length(AList) - 1 do begin if ((p^.LineType = 0) and (p^.CodePoint >$FFFF)) or (p^.StartCodePoint > $FFFF) then begin j := i; Break; end; Inc(p); end; if (j < 0) then exit; for i := j to Length(AList) - 2 do begin locLine := ' (PropID : ' + IntToStr(p^.PropID) + ';' + ' CodePoint : ' + IntToStr(p^.CodePoint) + ';' + ' RangeEnd : ' + IntToStr(p^.EndCodePoint) + '),' ; AddLine(locLine); Inc(p); end; locLine := ' (PropID : ' + IntToStr(p^.PropID) + ';' + ' CodePoint : ' + IntToStr(p^.CodePoint) + ';' + ' RangeEnd : ' + IntToStr(p^.EndCodePoint) + ')' ; AddLine(locLine); AddLine(' );' + sLineBreak); end; function Compress(const AData : TDataLineRecArray) : TDataLineRecArray; var k, i, locResLen : Integer; q, p, pr : PDataLineRec; k_end : TUnicodeCodePoint; begin locResLen := 1; SetLength(Result,Length(AData)); FillChar(Result[0],Length(Result),#0); Result[0] := AData[0]; q := @AData[0]; k := 0; while (k < Length(AData)) do begin if (q^.LineType = 0) then k_end := q^.CodePoint else k_end := q^.EndCodePoint; if ((k+1) = Length(AData)) then begin i := k; end else begin p := @AData[k+1]; i := k +1; while (i < (Length(AData) {- 1})) do begin if (p^.PropID <> q^.PropID) then begin i := i - 1; Break; end; if (p^.LineType = 0) then begin if (p^.CodePoint <> (k_end + 1)) then begin i := i - 1; Break; end; Inc(k_end); end else begin if (p^.StartCodePoint <> (k_end + 1)) then begin i := i - 1; Break; end; k_end := p^.EndCodePoint; end; Inc(i); Inc(p); end; end; {if (i = k) then begin Result[locResLen] := q^; Inc(locResLen); end else begin } p := @AData[i]; pr := @Result[locResLen]; pr^.PropID := q^.PropID; if (q^.LineType = 0) then pr^.StartCodePoint := q^.CodePoint else pr^.StartCodePoint := q^.StartCodePoint; pr^.LineType := 1; if (p^.LineType = 0) then pr^.EndCodePoint := p^.CodePoint else pr^.EndCodePoint := p^.EndCodePoint; Inc(locResLen); //end; k := i + 1; if (k = Length(AData)) then Break; q := @AData[k]; end; SetLength(Result,locResLen); end; procedure ParseUCAFile( ADataAStream : TMemoryStream; var ABook : TUCA_DataBook ); const LINE_LENGTH = 1024; DATA_LENGTH = 25000; var p : PAnsiChar; actualDataLen : Integer; bufferLength, bufferPos, lineLength, linePos : Integer; line : ansistring; function NextLine() : Boolean; var locOldPos : Integer; locOldPointer : PAnsiChar; begin Result := False; locOldPointer := p; locOldPos := bufferPos; while (bufferPos < bufferLength) and (p^ <> #10) do begin Inc(p); Inc(bufferPos); end; if (locOldPos = bufferPos) and (p^ = #10) then begin lineLength := 0; Inc(p); Inc(bufferPos); linePos := 1; Result := True; end else if (locOldPos < bufferPos) then begin lineLength := (bufferPos - locOldPos) + 1; Move(locOldPointer^,line[1],lineLength); if (p^ = #10) then begin Dec(lineLength); Inc(p); Inc(bufferPos); end; linePos := 1; Result := True; end; end; procedure SkipSpace(); begin while (linePos < lineLength) and (line[linePos] in [' ',#9]) do Inc(linePos); end; function NextToken() : ansistring; const C_SEPARATORS = [';','#','.','[',']','*','@']; var k : Integer; begin SkipSpace(); k := linePos; if (linePos <= lineLength) and (line[linePos] in C_SEPARATORS) then begin Result := line[linePos]; Inc(linePos); exit; end; while (linePos <= lineLength) and not(line[linePos] in (C_SEPARATORS+[' '])) do Inc(linePos); if (linePos > k) then begin if (line[Min(linePos,lineLength)] in C_SEPARATORS) then Result := Copy(line,k,(linePos-k)) else Result := Copy(line,k,(linePos-k+1)); Result := Trim(Result); end else begin Result := ''; end; end; procedure CheckToken(const AToken : string); var a, b : string; begin a := LowerCase(Trim(AToken)); b := LowerCase(Trim(NextToken())); if (a <> b) then raise Exception.CreateFmt('Expected token "%s" but found "%s", Line = "%s".',[a,b,line]); end; function ReadWeightBlock(var ADest : TUCA_WeightRec) : Boolean; var s :AnsiString; k : Integer; begin Result := False; s := NextToken(); if (s <> '[') then exit; s := NextToken(); if (s = '.') then ADest.Variable := False else begin if (s <> '*') then raise Exception.CreateFmt('Expected "%s" but found "%s".',['*',s]); ADest.Variable := True; end; ADest.Weights[0] := StrToInt('$'+NextToken()); for k := 1 to WEIGHT_LEVEL_COUNT-1 do begin CheckToken('.'); ADest.Weights[k] := StrToInt('$'+NextToken()); end; CheckToken(']'); Result := True; end; procedure ParseHeaderVar(); var s,ss : string; k : Integer; begin s := NextToken(); if (s = 'version') then begin ss := ''; while True do begin s := NextToken(); if (s = '') then Break; ss := ss + s; end; ABook.Version := ss; end else if (s = 'variable') then begin if (s = 'blanked') then ABook.VariableWeight := ucaBlanked else if (s = 'non-ignorable') then ABook.VariableWeight := ucaNonIgnorable else if (s = 'shifted') then ABook.VariableWeight := ucaShifted else if (s = 'shift-trimmed') then ABook.VariableWeight := ucaShiftedTrimmed else if (s = 'ignoresp') then ABook.VariableWeight := ucaIgnoreSP else raise Exception.CreateFmt('Unknown "@variable" type : "%s".',[s]); end else if (s = 'backwards') or (s = 'forwards') then begin ss := s; s := NextToken(); k := StrToInt(s); if (k < 1) or (k > 4) then raise Exception.CreateFmt('Invalid "%s" position : %d.',[ss,s]); ABook.Backwards[k] := (s = 'backwards'); end; end; procedure ParseLine(); var locData : ^TUCA_LineRec; s : ansistring; kc : Integer; begin if (Length(ABook.Lines) <= actualDataLen) then SetLength(ABook.Lines,Length(ABook.Lines)*2); locData := @ABook.Lines[actualDataLen]; s := NextToken(); if (s = '') or (s[1] = '#') then exit; if (s[1] = '@') then begin ParseHeaderVar(); exit; end; SetLength(locData^.CodePoints,10); locData^.CodePoints[0] := StrToInt('$'+s); kc := 1; while True do begin s := Trim(NextToken()); if (s = '') then exit; if (s = ';') then Break; locData^.CodePoints[kc] := StrToInt('$'+s); Inc(kc); end; if (kc = 0) then exit; SetLength(locData^.CodePoints,kc); SetLength(locData^.Weights,24); kc := 0; while ReadWeightBlock(locData^.Weights[kc]) do begin Inc(kc); end; SetLength(locData^.Weights,kc); Inc(actualDataLen); end; procedure Prepare(); var k : Integer; begin ABook.VariableWeight := ucaShifted; for k := Low(ABook.Backwards) to High(ABook.Backwards) do ABook.Backwards[k] := False; SetLength(ABook.Lines,DATA_LENGTH); actualDataLen := 0; bufferLength := ADataAStream.Size; bufferPos := 0; p := ADataAStream.Memory; lineLength := 0; SetLength(line,LINE_LENGTH); end; begin Prepare(); while NextLine() do ParseLine(); SetLength(ABook.Lines,actualDataLen); end; procedure Dump(X : array of TUnicodeCodePoint; const ATitle : string = ''); var i : Integer; begin Write(ATitle, ' '); for i := 0 to Length(X) - 1 do Write(X[i],' '); WriteLn(); end; function IsGreaterThan(A, B : PUCA_LineRec) : Integer; var i, hb : Integer; begin if (A=B) then exit(0); Result := 1; hb := Length(B^.CodePoints) - 1; for i := 0 to Length(A^.CodePoints) - 1 do begin if (i > hb) then exit; if (A^.CodePoints[i] < B^.CodePoints[i]) then exit(-1); if (A^.CodePoints[i] > B^.CodePoints[i]) then exit(1); end; if (Length(A^.CodePoints) = Length(B^.CodePoints)) then exit(0); exit(-1); end; procedure QuickSort( var AList : TUCA_DataBookIndex; L, R : Longint; ABook : PUCA_DataBook );overload; var I, J : Longint; P, Q : Integer; begin repeat I := L; J := R; P := AList[ (L + R) div 2 ]; repeat while IsGreaterThan(@ABook^.Lines[P], @ABook^.Lines[AList[i]]) > 0 do I := I + 1; while IsGreaterThan(@ABook^.Lines[P], @ABook^.Lines[AList[J]]) < 0 do J := J - 1; If I <= J then begin Q := AList[I]; AList[I] := AList[J]; AList[J] := Q; I := I + 1; J := J - 1; end; until I > J; // sort the smaller range recursively // sort the bigger range via the loop // Reasons: memory usage is O(log(n)) instead of O(n) and loop is faster than recursion if J - L < R - I then begin if L < J then QuickSort(AList, L, J, ABook); L := I; end else begin if I < R then QuickSort(AList, I, R, ABook); R := J; end; until L >= R; end; function CreateIndex(ABook : PUCA_DataBook) : TUCA_DataBookIndex; var r : TUCA_DataBookIndex; i, c : Integer; begin c := Length(ABook^.Lines); SetLength(r,c); for i := 0 to c - 1 do r[i] := i; QuickSort(r,0,c-1,ABook); Result := r; end; function ConstructContextTree( const AContext : PUCA_LineContextRec; var ADestBuffer; const ADestBufferLength : Cardinal ) : PUCA_PropItemContextTreeRec;forward; function ConstructItem( AItem : PUCA_PropItemRec; ACodePoint : Cardinal; AValid : Byte; AChildCount : Byte; const AWeights : array of TUCA_WeightRec; const AStoreCP : Boolean; const AContext : PUCA_LineContextRec; const ADeleted : Boolean ) : Cardinal; var i : Integer; p : PUCA_PropItemRec; pw : PUCA_PropWeights; pb : PByte; hasContext : Boolean; contextTree : PUCA_PropItemContextTreeRec; wl : Integer; begin p := AItem; p^.Size := 0; p^.Flags := 0; p^.WeightLength := 0; SetBit(p^.Flags,AItem^.FLAG_VALID,(AValid <> 0)); p^.ChildCount := AChildCount; hasContext := (AContext <> nil) and (Length(AContext^.Data) > 0); if hasContext then wl := 0 else wl := Length(AWeights); p^.WeightLength := wl; if (wl = 0) then begin Result := SizeOf(TUCA_PropItemRec); if ADeleted then SetBit(AItem^.Flags,AItem^.FLAG_DELETION,True); end else begin Result := SizeOf(TUCA_PropItemRec) + (wl*SizeOf(TUCA_PropWeights)); pb := PByte(PtrUInt(p) + SizeOf(TUCA_PropItemRec)); Unaligned(PWord(pb)^) := AWeights[0].Weights[0]; pb := pb + 2; if (AWeights[0].Weights[1] > High(Byte)) then begin Unaligned(PWord(pb)^) := AWeights[0].Weights[1]; pb := pb + 2; end else begin SetBit(p^.Flags,p^.FLAG_COMPRESS_WEIGHT_1,True); pb^ := AWeights[0].Weights[1]; pb := pb + 1; Result := Result - 1; end; if (AWeights[0].Weights[2] > High(Byte)) then begin Unaligned(PWord(pb)^) := AWeights[0].Weights[2]; pb := pb + 2; end else begin SetBit(p^.Flags,p^.FLAG_COMPRESS_WEIGHT_2,True); pb^ := AWeights[0].Weights[2]; pb := pb + 1; Result := Result - 1; end; pw := PUCA_PropWeights(pb); for i := 1 to wl - 1 do begin pw^.Weights[0] := AWeights[i].Weights[0]; pw^.Weights[1] := AWeights[i].Weights[1]; pw^.Weights[2] := AWeights[i].Weights[2]; //pw^.Variable := BoolToByte(AWeights[i].Variable); Inc(pw); end; end; hasContext := (AContext <> nil) and (Length(AContext^.Data) > 0); if AStoreCP or hasContext then begin Unaligned(PUInt24(PtrUInt(AItem)+Result)^) := ACodePoint; Result := Result + SizeOf(UInt24); SetBit(AItem^.Flags,AItem^.FLAG_CODEPOINT,True); end; if hasContext then begin contextTree := ConstructContextTree(AContext,Unaligned(Pointer(PtrUInt(AItem)+Result)^),MaxInt); Result := Result + Cardinal(contextTree^.Size); SetBit(AItem^.Flags,AItem^.FLAG_CONTEXTUAL,True); end; p^.Size := Result; end; function CalcCharChildCount( const ASearchStartPos : Integer; const ALinePos : Integer; const ABookLines : PUCA_LineRec; const AMaxLength : Integer; const ABookIndex : TUCA_DataBookIndex; out ALineCount : Word ) : Byte; var locLinePos : Integer; p : PUCA_LineRec; procedure IncP(); begin Inc(locLinePos); p := @ABookLines[ABookIndex[locLinePos]]; end; var i, locTargetLen, locTargetBufferSize, r : Integer; locTarget : array[0..127] of Cardinal; locLastChar : Cardinal; begin locLinePos := ALinePos; p := @ABookLines[ABookIndex[locLinePos]]; locTargetLen := ASearchStartPos; locTargetBufferSize := (locTargetLen*SizeOf(Cardinal)); Move(p^.CodePoints[0],locTarget[0],locTargetBufferSize); if (Length(p^.CodePoints) = ASearchStartPos) then begin r := 0; locLastChar := High(Cardinal); end else begin r := 1; locLastChar := p^.CodePoints[ASearchStartPos]; end; i := 1; while (i < AMaxLength) do begin IncP(); if (Length(p^.CodePoints) < locTargetLen) then Break; if not CompareMem(@locTarget[0],@p^.CodePoints[0],locTargetBufferSize) then Break; if (p^.CodePoints[ASearchStartPos] <> locLastChar) then begin Inc(r); locLastChar := p^.CodePoints[ASearchStartPos]; end; Inc(i); end; ALineCount := i; Result := r; end; function BuildTrie( const ALinePos : Integer; const ABookLines : PUCA_LineRec; const AMaxLength : Integer; const ABookIndex : TUCA_DataBookIndex ) : PTrieNode; var p : PUCA_LineRec; root : PTrieNode; ki, k, i : Integer; key : array of TKeyType; begin k := ABookIndex[ALinePos]; p := @ABookLines[k]; if (Length(p^.CodePoints) = 1) then root := CreateNode(p^.CodePoints[0],k) else root := CreateNode(p^.CodePoints[0]); for i := ALinePos to ALinePos + AMaxLength - 1 do begin k := ABookIndex[i]; p := @ABookLines[k]; if (Length(p^.CodePoints) = 1) then begin InsertWord(root,p^.CodePoints[0],k); end else begin SetLength(key,Length(p^.CodePoints)); for ki := 0 to Length(p^.CodePoints) - 1 do key[ki] := p^.CodePoints[ki]; InsertWord(root,key,k); end; end; Result := root; end; function BoolToByte(AValue : Boolean): Byte;inline; begin if AValue then Result := 1 else Result := 0; end; function InternalConstructFromTrie( const ATrie : PTrieNode; const AItem : PUCA_PropItemRec; const ALines : PUCA_LineRec; const AStoreCp : Boolean ) : Cardinal; var i : Integer; size : Cardinal; p : PUCA_PropItemRec; n : PTrieNode; begin if (ATrie = nil) then exit(0); p := AItem; n := ATrie; if n^.DataNode then size := ConstructItem(p,n^.Key,1,n^.ChildCount,ALines[n^.Data].Weights,AStoreCp,@(ALines[n^.Data].Context),ALines[n^.Data].Deleted) else size := ConstructItem(p,n^.Key,0,n^.ChildCount,[],AStoreCp,nil,False); Result := size; if (n^.ChildCount > 0) then begin for i := 0 to n^.ChildCount - 1 do begin p := PUCA_PropItemRec(PtrUInt(p) + size); size := InternalConstructFromTrie(n^.Children[i],p,ALines,True); Result := Result + size; end; end; AItem^.Size := Result; end; function ConstructFromTrie( const ATrie : PTrieNode; const AItem : PUCA_PropItemRec; const ALines : PUCA_LineRec ) : Integer; begin Result := InternalConstructFromTrie(ATrie,AItem,ALines,False); end; procedure MakeUCA_Props( ABook : PUCA_DataBook; out AProps : PUCA_PropBook ); var propIndexCount : Integer; procedure CapturePropIndex(AItem : PUCA_PropItemRec; ACodePoint : Cardinal); begin AProps^.Index[propIndexCount].CodePoint := ACodePoint; AProps^.Index[propIndexCount].Position := PtrUInt(AItem) - PtrUInt(AProps^.Items); propIndexCount := propIndexCount + 1; end; var locIndex : TUCA_DataBookIndex; i, c, k, kc : Integer; p, p1, p2 : PUCA_PropItemRec; lines, pl1, pl2 : PUCA_LineRec; childCount, lineCount : Word; size : Cardinal; trieRoot : PTrieNode; MaxChildCount, MaxSize : Cardinal; childList : array of PUCA_PropItemRec; begin locIndex := CreateIndex(ABook); i := Length(ABook^.Lines); i := 30 * i * (SizeOf(TUCA_PropItemRec) + SizeOf(TUCA_PropWeights)); AProps := AllocMem(SizeOf(TUCA_PropBook)); AProps^.ItemSize := i; AProps^.Items := AllocMem(i); propIndexCount := 0; SetLength(AProps^.Index,Length(ABook^.Lines)); p := AProps^.Items; lines := @ABook^.Lines[0]; c := Length(locIndex); i := 0; MaxChildCount := 0; MaxSize := 0; while (i < (c-1)) do begin pl1 := @lines[locIndex[i]]; if not pl1^.Stored then begin i := i + 1; Continue; end; pl2 := @lines[locIndex[i+1]]; if (pl1^.CodePoints[0] <> pl2^.CodePoints[0]) then begin if (Length(pl1^.CodePoints) = 1) then begin size := ConstructItem(p,pl1^.CodePoints[0],1,0,pl1^.Weights,False,@pl1^.Context,pl1^.Deleted); CapturePropIndex(p,pl1^.CodePoints[0]); p := PUCA_PropItemRec(PtrUInt(p) + size); if (size > MaxSize) then MaxSize := size; end else begin kc := Length(pl1^.CodePoints); SetLength(childList,kc); for k := 0 to kc - 2 do begin size := ConstructItem(p,pl1^.CodePoints[k],0,1,[],(k>0),nil,False); if (k = 0) then CapturePropIndex(p,pl1^.CodePoints[k]); childList[k] := p; p := PUCA_PropItemRec(PtrUInt(p) + size); end; size := ConstructItem(p,pl1^.CodePoints[kc-1],1,0,pl1^.Weights,True,@pl1^.Context,pl1^.Deleted); childList[kc-1] := p; p := PUCA_PropItemRec(PtrUInt(p) + size); for k := kc - 2 downto 0 do begin p1 := childList[k]; p2 := childList[k+1]; p1^.Size := p1^.Size + p2^.Size; end; if (p1^.Size > MaxSize) then MaxSize := p1^.Size; end; lineCount := 1; end else begin childCount := CalcCharChildCount(1,i,lines,c,locIndex,lineCount); if (childCount < 1) then raise Exception.CreateFmt('Expected "child count > 1" but found %d.',[childCount]); if (lineCount < 2) then raise Exception.CreateFmt('Expected "line count > 2" but found %d.',[lineCount]); if (childCount > MaxChildCount) then MaxChildCount := childCount; trieRoot := BuildTrie(i,lines,lineCount,locIndex); size := ConstructFromTrie(trieRoot,p,lines); CapturePropIndex(p,pl1^.CodePoints[0]); FreeNode(trieRoot); p := PUCA_PropItemRec(PtrUInt(p) + size); if (size > MaxSize) then MaxSize := size; end; i := i + lineCount; end; if (i = (c-1)) then begin pl1 := @lines[locIndex[i]]; if (Length(pl1^.CodePoints) = 1) then begin size := ConstructItem(p,pl1^.CodePoints[0],1,0,pl1^.Weights,False,@pl1^.Context,pl1^.Deleted); CapturePropIndex(p,pl1^.CodePoints[0]); p := PUCA_PropItemRec(PtrUInt(p) + size); if (size > MaxSize) then MaxSize := size; end else begin kc := Length(pl1^.CodePoints); SetLength(childList,kc); for k := 0 to kc - 2 do begin size := ConstructItem(p,pl1^.CodePoints[k],0,1,[],(k>0),@pl1^.Context,pl1^.Deleted); if (k = 0) then CapturePropIndex(p,pl1^.CodePoints[0]); childList[k] := p; p := PUCA_PropItemRec(PtrUInt(p) + size); end; size := ConstructItem(p,pl1^.CodePoints[kc-1],1,0,pl1^.Weights,True,@pl1^.Context,pl1^.Deleted); childList[kc-1] := p; p := PUCA_PropItemRec(PtrUInt(p) + size); for i := kc - 2 downto 0 do begin p1 := childList[i]; p2 := childList[i+1]; p1^.Size := p1^.Size + p2^.Size; end; if (size > MaxSize) then MaxSize := size; end; end; //c := Int64(PtrUInt(p)) - Int64(PtrUInt(AProps^.Items)); c := UInt64(PtrUInt(p)) - UInt64(PtrUInt(AProps^.Items)); ReAllocMem(AProps^.Items,c); AProps^.ItemSize := c; SetLength(AProps^.Index,propIndexCount); AProps^.ItemsOtherEndian := AllocMem(AProps^.ItemSize); ReverseFromNativeEndian(AProps^.Items,AProps^.ItemSize,AProps^.ItemsOtherEndian); k := 0; c := High(Word); for i := 0 to Length(ABook^.Lines) - 1 do begin if (Length(ABook^.Lines[i].Weights) > 0) then begin if (ABook^.Lines[i].Weights[0].Variable) then begin if (ABook^.Lines[i].Weights[0].Weights[0] > k) then k := ABook^.Lines[i].Weights[0].Weights[0]; if (ABook^.Lines[i].Weights[0].Weights[0] < c) then c := ABook^.Lines[i].Weights[0].Weights[0]; end; end; end; AProps^.VariableHighLimit := k; AProps^.VariableLowLimit := c; end; procedure FreeUcaBook(var ABook : PUCA_PropBook); var p : PUCA_PropBook; begin if (ABook = nil) then exit; p := ABook; ABook := nil; p^.Index := nil; FreeMem(p^.Items,p^.ItemSize); FreeMem(p^.ItemsOtherEndian,p^.ItemSize); FreeMem(p,SizeOf(p^)); end; function IndexOf(const ACodePoint : Cardinal; APropBook : PUCA_PropBook): Integer;overload; var i : Integer; begin for i := 0 to Length(APropBook^.Index) - 1 do begin if (ACodePoint = APropBook^.Index[i].CodePoint) then exit(i); end; Result := -1; end; type PucaBmpSecondTableItem = ^TucaBmpSecondTableItem; function IndexOf( const AItem : PucaBmpSecondTableItem; const ATable : TucaBmpSecondTable; const ATableActualLength : Integer ) : Integer;overload; var i : Integer; p : PucaBmpSecondTableItem; begin Result := -1; if (ATableActualLength > 0) then begin p := @ATable[0]; for i := 0 to ATableActualLength - 1 do begin if CompareMem(p,AItem,SizeOf(TucaBmpSecondTableItem)) then begin Result := i; Break; end; Inc(p); end; end; end; procedure MakeUCA_BmpTables( var AFirstTable : TucaBmpFirstTable; var ASecondTable : TucaBmpSecondTable; const APropBook : PUCA_PropBook ); var locLowByte, locHighByte : Byte; locTableItem : TucaBmpSecondTableItem; locCP : TUnicodeCodePoint; i, locSecondActualLen : Integer; k : Integer; begin SetLength(ASecondTable,120); locSecondActualLen := 0; for locHighByte := 0 to 255 do begin FillChar(locTableItem,SizeOf(locTableItem),#0); for locLowByte := 0 to 255 do begin locCP := (locHighByte * 256) + locLowByte; k := IndexOf(locCP,APropBook); if (k = -1) then k := 0 else k := APropBook^.Index[k].Position + 1; locTableItem[locLowByte] := k; end; i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen); if (i = -1) then begin if (locSecondActualLen = Length(ASecondTable)) then SetLength(ASecondTable,locSecondActualLen + 50); i := locSecondActualLen; ASecondTable[i] := locTableItem; Inc(locSecondActualLen); end; AFirstTable[locHighByte] := i; end; SetLength(ASecondTable,locSecondActualLen); end; function ToUCS4(const AHighS, ALowS : Word) : TUnicodeCodePoint; inline; begin //copied from utf16toutf32 Result := (UCS4Char(AHighS)-$d800) shl 10 + (UCS4Char(ALowS)-$dc00) + $10000; end; procedure FromUCS4(const AValue : TUnicodeCodePoint; var AHighS, ALowS : Word); begin AHighS := Word((AValue - $10000) shr 10 + $d800); ALowS := Word((AValue - $10000) and $3ff + $dc00); end; type PucaOBmpSecondTableItem = ^TucaOBmpSecondTableItem; function IndexOf( const AItem : PucaOBmpSecondTableItem; const ATable : TucaOBmpSecondTable; const ATableActualLength : Integer ) : Integer;overload; var i : Integer; p : PucaOBmpSecondTableItem; begin Result := -1; if (ATableActualLength > 0) then begin p := @ATable[0]; for i := 0 to ATableActualLength - 1 do begin if CompareMem(p,AItem,SizeOf(TucaOBmpSecondTableItem)) then begin Result := i; Break; end; Inc(p); end; end; end; procedure MakeUCA_OBmpTables( var AFirstTable : TucaOBmpFirstTable; var ASecondTable : TucaOBmpSecondTable; const APropBook : PUCA_PropBook ); var locLowByte, locHighByte : Word; locTableItem : TucaOBmpSecondTableItem; locCP : TUnicodeCodePoint; i, locSecondActualLen : Integer; k : Integer; begin if (Length(ASecondTable) = 0) then SetLength(ASecondTable,2000); locSecondActualLen := 0; for locHighByte := 0 to HIGH_SURROGATE_COUNT - 1 do begin FillChar(locTableItem,SizeOf(locTableItem),#0); for locLowByte := 0 to LOW_SURROGATE_COUNT - 1 do begin locCP := ToUCS4(HIGH_SURROGATE_BEGIN + locHighByte,LOW_SURROGATE_BEGIN + locLowByte); k := IndexOf(locCP,APropBook); if (k = -1) then k := 0 else k := APropBook^.Index[k].Position + 1; locTableItem[locLowByte] := k; end; i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen); if (i = -1) then begin if (locSecondActualLen = Length(ASecondTable)) then SetLength(ASecondTable,locSecondActualLen + 50); i := locSecondActualLen; ASecondTable[i] := locTableItem; Inc(locSecondActualLen); end; AFirstTable[locHighByte] := i; end; SetLength(ASecondTable,locSecondActualLen); end; function GetPropPosition( const AHighS, ALowS : Word; const AFirstTable : PucaOBmpFirstTable; const ASecondTable : PucaOBmpSecondTable ): Integer;inline;overload; begin Result := ASecondTable^[AFirstTable^[AHighS-HIGH_SURROGATE_BEGIN]][ALowS-LOW_SURROGATE_BEGIN] - 1; end; procedure GenerateUCA_Head( ADest : TStream; ABook : PUCA_DataBook; AProps : PUCA_PropBook ); procedure AddLine(const ALine : ansistring); var buffer : ansistring; begin buffer := ALine + sLineBreak; ADest.Write(buffer[1],Length(buffer)); end; begin AddLine('const'); //AddLine(' VERSION_STRING = ' + QuotedStr(ABook^.Version) + ';'); AddLine(' VARIABLE_LOW_LIMIT = ' + IntToStr(AProps^.VariableLowLimit) + ';'); AddLine(' VARIABLE_HIGH_LIMIT = ' + IntToStr(AProps^.VariableHighLimit) + ';'); AddLine(' VARIABLE_WEIGHT = ' + IntToStr(Ord(ABook^.VariableWeight)) + ';'); AddLine(' BACKWARDS_0 = ' + BoolToStr(ABook^.Backwards[0],'True','False') + ';'); AddLine(' BACKWARDS_1 = ' + BoolToStr(ABook^.Backwards[1],'True','False') + ';'); AddLine(' BACKWARDS_2 = ' + BoolToStr(ABook^.Backwards[2],'True','False') + ';'); AddLine(' BACKWARDS_3 = ' + BoolToStr(ABook^.Backwards[3],'True','False') + ';'); AddLine(' PROP_COUNT = ' + IntToStr(Ord(AProps^.ItemSize)) + ';'); AddLine(''); end; procedure GenerateUCA_BmpTables( AStream, ANativeEndianStream, ANonNativeEndianStream : TStream; var AFirstTable : TucaBmpFirstTable; var ASecondTable : TucaBmpSecondTable ); procedure AddLine(AOut : TStream; const ALine : ansistring); var buffer : ansistring; begin buffer := ALine + sLineBreak; AOut.Write(buffer[1],Length(buffer)); end; var i, j, c : Integer; locLine : string; value : UInt24; begin AddLine(AStream,'const'); AddLine(AStream,' UCA_TABLE_1 : array[0..255] of Byte = ('); locLine := ''; for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin locLine := locLine + IntToStr(AFirstTable[i]) + ','; if (((i+1) mod 16) = 0) then begin locLine := ' ' + locLine; AddLine(AStream,locLine); locLine := ''; end; end; locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]); locLine := ' ' + locLine; AddLine(AStream,locLine); AddLine(AStream,' );' + sLineBreak); AddLine(ANativeEndianStream,'const'); AddLine(ANativeEndianStream,' UCA_TABLE_2 : array[0..(256*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =('); c := High(ASecondTable); for i := Low(ASecondTable) to c do begin locLine := ''; for j := Low(TucaBmpSecondTableItem) to High(TucaBmpSecondTableItem) do begin value := ASecondTable[i][j]; locLine := locLine + UInt24ToStr(value,ENDIAN_NATIVE) + ','; if (((j+1) mod 7) = 0) then begin if (i = c) and (j = High(TucaBmpSecondTableItem)) then Delete(locLine,Length(locLine),1); locLine := ' ' + locLine; AddLine(ANativeEndianStream,locLine); locLine := ''; end; end; if (locLine <> '') then begin if (i = c) then Delete(locLine,Length(locLine),1); locLine := ' ' + locLine; AddLine(ANativeEndianStream,locLine); end; end; AddLine(ANativeEndianStream,' );' + sLineBreak); AddLine(ANonNativeEndianStream,'const'); AddLine(ANonNativeEndianStream,' UCA_TABLE_2 : array[0..(256*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =('); c := High(ASecondTable); for i := Low(ASecondTable) to c do begin locLine := ''; for j := Low(TucaBmpSecondTableItem) to High(TucaBmpSecondTableItem) do begin value := ASecondTable[i][j]; locLine := locLine + UInt24ToStr(value,ENDIAN_NON_NATIVE) + ','; if (((j+1) mod 7) = 0) then begin if (i = c) and (j = High(TucaBmpSecondTableItem)) then Delete(locLine,Length(locLine),1); locLine := ' ' + locLine; AddLine(ANonNativeEndianStream,locLine); locLine := ''; end; end; if (locLine <> '') then begin if (i = c) then Delete(locLine,Length(locLine),1); locLine := ' ' + locLine; AddLine(ANonNativeEndianStream,locLine); end; end; AddLine(ANonNativeEndianStream,' );' + sLineBreak); end; procedure GenerateBinaryUCA_BmpTables( ANativeEndianStream, ANonNativeEndianStream : TStream; var AFirstTable : TucaBmpFirstTable; var ASecondTable : TucaBmpSecondTable ); var i, j : Integer; value : UInt24; begin ANativeEndianStream.Write(AFirstTable[0],Length(AFirstTable)); ANonNativeEndianStream.Write(AFirstTable[0],Length(AFirstTable)); for i := Low(ASecondTable) to High(ASecondTable) do begin for j := Low(TucaBmpSecondTableItem) to High(TucaBmpSecondTableItem) do begin value := ASecondTable[i][j]; ANativeEndianStream.Write(value,SizeOf(value)); ReverseBytes(value,SizeOf(value)); ANonNativeEndianStream.Write(value,SizeOf(value)); end; end; end; procedure GenerateUCA_PropTable( // WARNING : files must be generated for each endianess (Little / Big) ADest : TStream; const APropBook : PUCA_PropBook; const AEndian : TEndianKind ); procedure AddLine(const ALine : ansistring); var buffer : ansistring; begin buffer := ALine + sLineBreak; ADest.Write(buffer[1],Length(buffer)); end; var i, c : Integer; locLine : string; p : PByte; begin c := APropBook^.ItemSize; AddLine('const'); AddLine(' UCA_PROPS : array[0..' + IntToStr(c-1) + '] of Byte = ('); locLine := ''; if (AEndian = ENDIAN_NATIVE) then p := PByte(APropBook^.Items) else p := PByte(APropBook^.ItemsOtherEndian); for i := 0 to c - 2 do begin locLine := locLine + IntToStr(p[i]) + ','; if (((i+1) mod 60) = 0) then begin locLine := ' ' + locLine; AddLine(locLine); locLine := ''; end; end; locLine := locLine + IntToStr(p[c-1]); locLine := ' ' + locLine; AddLine(locLine); AddLine(' );' + sLineBreak); end; procedure GenerateBinaryUCA_PropTable( // WARNING : files must be generated for each endianess (Little / Big) ANativeEndianStream, ANonNativeEndianStream : TStream; const APropBook : PUCA_PropBook ); begin ANativeEndianStream.Write(APropBook^.Items^,APropBook^.ItemSize); ANonNativeEndianStream.Write(APropBook^.ItemsOtherEndian^,APropBook^.ItemSize); end; procedure GenerateUCA_OBmpTables( AStream, ANativeEndianStream, ANonNativeEndianStream : TStream; var AFirstTable : TucaOBmpFirstTable; var ASecondTable : TucaOBmpSecondTable ); procedure AddLine(AOut : TStream; const ALine : ansistring); var buffer : ansistring; begin buffer := ALine + sLineBreak; AOut.Write(buffer[1],Length(buffer)); end; var i, j, c : Integer; locLine : string; value : UInt24; begin AddLine(AStream,'const'); AddLine(AStream,' UCAO_TABLE_1 : array[0..' + IntToStr(HIGH_SURROGATE_COUNT-1) + '] of Word = ('); locLine := ''; for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin locLine := locLine + IntToStr(AFirstTable[i]) + ','; if (((i+1) mod 16) = 0) then begin locLine := ' ' + locLine; AddLine(AStream,locLine); locLine := ''; end; end; locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]); locLine := ' ' + locLine; AddLine(AStream,locLine); AddLine(AStream,' );' + sLineBreak); AddLine(ANativeEndianStream,' UCAO_TABLE_2 : array[0..('+IntToStr(LOW_SURROGATE_COUNT)+'*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =('); c := High(ASecondTable); for i := Low(ASecondTable) to c do begin locLine := ''; for j := Low(TucaOBmpSecondTableItem) to High(TucaOBmpSecondTableItem) do begin value := ASecondTable[i][j]; locLine := locLine + UInt24ToStr(value,ENDIAN_NATIVE) + ','; if (((j+1) mod 7) = 0) then begin if (i = c) and (j = High(TucaOBmpSecondTableItem)) then Delete(locLine,Length(locLine),1); locLine := ' ' + locLine; AddLine(ANativeEndianStream,locLine); locLine := ''; end; end; if (locLine <> '') then begin if (i = c) then Delete(locLine,Length(locLine),1); locLine := ' ' + locLine; AddLine(ANativeEndianStream,locLine); end; end; AddLine(ANativeEndianStream,' );' + sLineBreak); AddLine(ANonNativeEndianStream,' UCAO_TABLE_2 : array[0..('+IntToStr(LOW_SURROGATE_COUNT)+'*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =('); c := High(ASecondTable); for i := Low(ASecondTable) to c do begin locLine := ''; for j := Low(TucaOBmpSecondTableItem) to High(TucaOBmpSecondTableItem) do begin value := ASecondTable[i][j]; locLine := locLine + UInt24ToStr(value,ENDIAN_NON_NATIVE) + ','; if (((j+1) mod 7) = 0) then begin if (i = c) and (j = High(TucaOBmpSecondTableItem)) then Delete(locLine,Length(locLine),1); locLine := ' ' + locLine; AddLine(ANonNativeEndianStream,locLine); locLine := ''; end; end; if (locLine <> '') then begin if (i = c) then Delete(locLine,Length(locLine),1); locLine := ' ' + locLine; AddLine(ANonNativeEndianStream,locLine); end; end; AddLine(ANonNativeEndianStream,' );' + sLineBreak); end; procedure GenerateBinaryUCA_OBmpTables( ANativeEndianStream, ANonNativeEndianStream : TStream; var AFirstTable : TucaOBmpFirstTable; var ASecondTable : TucaOBmpSecondTable ); var i, j : Integer; locLine : string; wordValue : Word; value : UInt24; begin for i := Low(AFirstTable) to High(AFirstTable) do begin wordValue := AFirstTable[i]; ANativeEndianStream.Write(wordValue,SizeOf(wordValue)); ReverseBytes(wordValue,SizeOf(wordValue)); ANonNativeEndianStream.Write(wordValue,SizeOf(wordValue)); end; for i := Low(ASecondTable) to High(ASecondTable) do begin for j := Low(TucaOBmpSecondTableItem) to High(TucaOBmpSecondTableItem) do begin value := ASecondTable[i][j]; ANativeEndianStream.Write(value,SizeOf(value)); ReverseBytes(value,SizeOf(value)); ANonNativeEndianStream.Write(value,SizeOf(value)); end; end; end; type POBmpSecondTableItem = ^TOBmpSecondTableItem; function IndexOf( const AItem : POBmpSecondTableItem; const ATable : TOBmpSecondTable; const ATableActualLength : Integer ) : Integer;overload; var i : Integer; p : POBmpSecondTableItem; begin Result := -1; if (ATableActualLength > 0) then begin p := @ATable[0]; for i := 0 to ATableActualLength - 1 do begin if CompareMem(p,AItem,SizeOf(TOBmpSecondTableItem)) then begin Result := i; Break; end; Inc(p); end; end; end; procedure MakeOBmpTables( var AFirstTable : TOBmpFirstTable; var ASecondTable : TOBmpSecondTable; const ADataLineList : TDataLineRecArray ); var locLowByte, locHighByte : Word; locTableItem : TOBmpSecondTableItem; locCP : TUnicodeCodePoint; i, locSecondActualLen : Integer; begin SetLength(ASecondTable,2000); locSecondActualLen := 0; for locHighByte := 0 to HIGH_SURROGATE_COUNT - 1 do begin FillChar(locTableItem,SizeOf(locTableItem),#0); for locLowByte := 0 to LOW_SURROGATE_COUNT - 1 do begin locCP := ToUCS4(HIGH_SURROGATE_BEGIN + locHighByte,LOW_SURROGATE_BEGIN + locLowByte); locTableItem[locLowByte] := GetPropID(locCP,ADataLineList)// - 1; end; i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen); if (i = -1) then begin if (locSecondActualLen = Length(ASecondTable)) then SetLength(ASecondTable,locSecondActualLen + 50); i := locSecondActualLen; ASecondTable[i] := locTableItem; Inc(locSecondActualLen); end; AFirstTable[locHighByte] := i; end; SetLength(ASecondTable,locSecondActualLen); end; type P3lvlOBmp3TableItem = ^T3lvlOBmp3TableItem; function IndexOf( const AItem : P3lvlOBmp3TableItem; const ATable : T3lvlOBmp3Table; const ATableActualLength : Integer ) : Integer;overload; var i : Integer; p : P3lvlOBmp3TableItem; begin Result := -1; if (ATableActualLength > 0) then begin p := @ATable[0]; for i := 0 to ATableActualLength - 1 do begin if CompareMem(p,AItem,SizeOf(T3lvlOBmp3TableItem)) then begin Result := i; Break; end; Inc(p); end; end; end; type P3lvlOBmp2TableItem = ^T3lvlOBmp2TableItem; function IndexOf( const AItem : P3lvlOBmp2TableItem; const ATable : T3lvlOBmp2Table ) : Integer;overload; var i : Integer; p : P3lvlOBmp2TableItem; begin Result := -1; if (Length(ATable) > 0) then begin p := @ATable[0]; for i := 0 to Length(ATable) - 1 do begin if CompareMem(p,AItem,SizeOf(T3lvlOBmp2TableItem)) then begin Result := i; Break; end; Inc(p); end; end; end; procedure MakeOBmpTables3Levels( var AFirstTable : T3lvlOBmp1Table; var ASecondTable : T3lvlOBmp2Table; var AThirdTable : T3lvlOBmp3Table; const ADataLineList : TDataLineRecArray ); var locLowByte0, locLowByte1, locHighByte : Word; locTableItem2 : T3lvlOBmp2TableItem; locTableItem3 : T3lvlOBmp3TableItem; locCP : TUnicodeCodePoint; i, locThirdActualLen : Integer; begin SetLength(AThirdTable,120); locThirdActualLen := 0; for locHighByte := 0 to 1023 do begin FillChar(locTableItem2,SizeOf(locTableItem2),#0); for locLowByte0 := 0 to 31 do begin FillChar(locTableItem3,SizeOf(locTableItem3),#0); for locLowByte1 := 0 to 31 do begin locCP := ToUCS4(HIGH_SURROGATE_BEGIN + locHighByte,LOW_SURROGATE_BEGIN + (locLowByte0*32) + locLowByte1); locTableItem3[locLowByte1] := GetPropID(locCP,ADataLineList); end; i := IndexOf(@locTableItem3,AThirdTable,locThirdActualLen); if (i = -1) then begin if (locThirdActualLen = Length(AThirdTable)) then SetLength(AThirdTable,locThirdActualLen + 50); i := locThirdActualLen; AThirdTable[i] := locTableItem3; Inc(locThirdActualLen); end; locTableItem2[locLowByte0] := i; end; i := IndexOf(@locTableItem2,ASecondTable); if (i = -1) then begin i := Length(ASecondTable); SetLength(ASecondTable,(i + 1)); ASecondTable[i] := locTableItem2; end; AFirstTable[locHighByte] := i; end; SetLength(AThirdTable,locThirdActualLen); end; procedure GenerateOBmpTables( ADest : TStream; var AFirstTable : TOBmpFirstTable; var ASecondTable : TOBmpSecondTable ); procedure AddLine(const ALine : ansistring); var buffer : ansistring; begin buffer := ALine + sLineBreak; ADest.Write(buffer[1],Length(buffer)); end; var i, j, c : Integer; locLine : string; begin AddLine('const'); AddLine(' UCO_TABLE_1 : array[0..' + IntToStr(HIGH_SURROGATE_COUNT-1) + '] of Word = ('); locLine := ''; for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin locLine := locLine + IntToStr(AFirstTable[i]) + ','; if (((i+1) mod 20) = 0) then begin locLine := ' ' + locLine; AddLine(locLine); locLine := ''; end; end; locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]); locLine := ' ' + locLine; AddLine(locLine); AddLine(' );' + sLineBreak); AddLine(' UCO_TABLE_2 : array[0..('+IntToStr(LOW_SURROGATE_COUNT)+'*' + IntToStr(Length(ASecondTable)) +'-1)] of Word =('); c := High(ASecondTable); for i := Low(ASecondTable) to c do begin locLine := ''; for j := Low(TOBmpSecondTableItem) to High(TOBmpSecondTableItem) do begin locLine := locLine + IntToStr(ASecondTable[i][j]) + ','; if (((j+1) mod 16) = 0) then begin if (i = c) and (j = High(TOBmpSecondTableItem)) then Delete(locLine,Length(locLine),1); locLine := ' ' + locLine; AddLine(locLine); locLine := ''; end; end; end; AddLine(' );' + sLineBreak); end; //---------------------------------- procedure Generate3lvlOBmpTables( ADest : TStream; var AFirstTable : T3lvlOBmp1Table; var ASecondTable : T3lvlOBmp2Table; var AThirdTable : T3lvlOBmp3Table ); procedure AddLine(const ALine : ansistring); var buffer : ansistring; begin buffer := ALine + sLineBreak; ADest.Write(buffer[1],Length(buffer)); end; var i, j, c : Integer; locLine : string; begin AddLine('const'); AddLine(' UCO_TABLE_1 : array[0..1023] of Word = ('); locLine := ''; for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin locLine := locLine + IntToStr(AFirstTable[i]) + ','; if (((i+1) mod 20) = 0) then begin locLine := ' ' + locLine; AddLine(locLine); locLine := ''; end; end; locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]); locLine := ' ' + locLine; AddLine(locLine); AddLine(' );' + sLineBreak); AddLine(' UCO_TABLE_2 : array[0..' + IntToStr(Length(ASecondTable)-1) +'] of array[0..31] of Word = ('); c := High(ASecondTable); for i := Low(ASecondTable) to c do begin locLine := '('; for j := Low(T3lvlOBmp2TableItem) to High(T3lvlOBmp2TableItem) do locLine := locLine + IntToStr(ASecondTable[i][j]) + ','; Delete(locLine,Length(locLine),1); locLine := ' ' + locLine + ')'; if (i < c) then locLine := locLine + ','; AddLine(locLine); end; AddLine(' );' + sLineBreak); AddLine(' UCO_TABLE_3 : array[0..' + IntToStr(Length(AThirdTable)-1) +'] of array[0..31] of Word = ('); c := High(AThirdTable); for i := Low(AThirdTable) to c do begin locLine := '('; for j := Low(T3lvlOBmp3TableItem) to High(T3lvlOBmp3TableItem) do locLine := locLine + IntToStr(AThirdTable[i][j]) + ','; Delete(locLine,Length(locLine),1); locLine := ' ' + locLine + ')'; if (i < c) then locLine := locLine + ','; AddLine(locLine); end; AddLine(' );' + sLineBreak); end; function GetProp( const AHighS, ALowS : Word; const AProps : TPropRecArray; var AFirstTable : TOBmpFirstTable; var ASecondTable : TOBmpSecondTable ): PPropRec; begin Result := @AProps[ASecondTable[AFirstTable[AHighS-HIGH_SURROGATE_BEGIN]][ALowS-LOW_SURROGATE_BEGIN]]; end; function GetProp( const AHighS, ALowS : Word; const AProps : TPropRecArray; var AFirstTable : T3lvlOBmp1Table; var ASecondTable : T3lvlOBmp2Table; var AThirdTable : T3lvlOBmp3Table ): PPropRec; begin Result := @AProps[AThirdTable[ASecondTable[AFirstTable[AHighS]][ALowS div 32]][ALowS mod 32]]; //Result := @AProps[ASecondTable[AFirstTable[AHighS-HIGH_SURROGATE_BEGIN]][ALowS-LOW_SURROGATE_BEGIN]]; end; { TUCA_PropItemContextTreeRec } function TUCA_PropItemContextTreeRec.GetData : PUCA_PropItemContextTreeNodeRec; begin if (Size = 0) then Result := nil else Result := PUCA_PropItemContextTreeNodeRec( PtrUInt( PtrUInt(@Self) + SizeOf(UInt24){Size} ) ); end; { TUCA_LineContextRec } procedure TUCA_LineContextRec.Clear; begin Data := nil end; procedure TUCA_LineContextRec.Assign(ASource : PUCA_LineContextRec); var c, i : Integer; begin if (ASource = nil) then begin Clear(); exit; end; c := Length(ASource^.Data); SetLength(Self.Data,c); for i := 0 to c-1 do Self.Data[i].Assign(@ASource^.Data[i]); end; function TUCA_LineContextRec.Clone : TUCA_LineContextRec; begin Result.Clear(); Result.Assign(@Self); end; { TUCA_LineContextItemRec } procedure TUCA_LineContextItemRec.Clear(); begin CodePoints := nil; Weights := nil; end; procedure TUCA_LineContextItemRec.Assign(ASource : PUCA_LineContextItemRec); begin if (ASource = nil) then begin Clear(); exit; end; Self.CodePoints := Copy(ASource^.CodePoints); Self.Weights := Copy(ASource^.Weights); end; function TUCA_LineContextItemRec.Clone() : TUCA_LineContextItemRec; begin Result.Clear(); Result.Assign(@Self); end; { TUCA_LineRec } procedure TUCA_LineRec.Clear; begin CodePoints := nil; Weights := nil; Deleted := False; Stored := False; Context.Clear(); end; procedure TUCA_LineRec.Assign(ASource : PUCA_LineRec); begin if (ASource = nil) then begin Clear(); exit; end; Self.CodePoints := Copy(ASource^.CodePoints); Self.Weights := Copy(ASource^.Weights); Self.Deleted := ASource^.Deleted; Self.Stored := ASource^.Stored; Self.Context.Assign(@ASource^.Context); end; function TUCA_LineRec.Clone : TUCA_LineRec; begin Result.Clear(); Result.Assign(@Self); end; function TUCA_LineRec.HasContext() : Boolean; begin Result := (Length(Context.Data) > 0); end; { TPropRec } function TPropRec.GetCategory: TUnicodeCategory; begin Result := TUnicodeCategory((CategoryData and Byte($F8)) shr 3); end; function TPropRec.GetUnifiedIdeograph : Boolean; begin Result := IsBitON(CategoryData,FLAG_UNIFIED_IDEOGRAPH); end; procedure TPropRec.SetCategory(AValue: TUnicodeCategory); var b : Byte; begin b := Ord(AValue); b := b shl 3; CategoryData := CategoryData or b; //CategoryData := CategoryData or Byte(Byte(Ord(AValue)) shl 3); end; function TPropRec.GetWhiteSpace: Boolean; begin Result := IsBitON(CategoryData,FLAG_WHITE_SPACE); end; procedure TPropRec.SetUnifiedIdeograph(AValue : Boolean); begin SetBit(CategoryData,FLAG_UNIFIED_IDEOGRAPH,AValue); end; procedure TPropRec.SetWhiteSpace(AValue: Boolean); begin SetBit(CategoryData,FLAG_WHITE_SPACE,AValue); end; function TPropRec.GetHangulSyllable: Boolean; begin Result := IsBitON(CategoryData,FLAG_HANGUL_SYLLABLE); end; procedure TPropRec.SetHangulSyllable(AValue: Boolean); begin SetBit(CategoryData,FLAG_HANGUL_SYLLABLE,AValue); end; { TUCA_PropItemRec } function TUCA_PropItemRec.GetWeightSize : Word; var c : Integer; begin c := WeightLength; if (c = 0) then exit(0); Result := c*SizeOf(TUCA_PropWeights); if IsWeightCompress_1() then Result := Result - 1; if IsWeightCompress_2() then Result := Result - 1; end; function TUCA_PropItemRec.HasCodePoint(): Boolean; begin Result := IsBitON(Flags,FLAG_CODEPOINT); end; procedure TUCA_PropItemRec.GetWeightArray(ADest: PUCA_PropWeights); var c : Integer; p : PByte; pd : PUCA_PropWeights; begin c := WeightLength; p := PByte(PtrUInt(@Self) + SizeOf(TUCA_PropItemRec)); pd := ADest; pd^.Weights[0] := PWord(p)^; p := p + 2; if not IsWeightCompress_1() then begin pd^.Weights[1] := PWord(p)^; p := p + 2; end else begin pd^.Weights[1] := p^; p := p + 1; end; if not IsWeightCompress_2() then begin pd^.Weights[2] := PWord(p)^; p := p + 2; end else begin pd^.Weights[2] := p^; p := p + 1; end; if (c > 1) then Move(p^, (pd+1)^, ((c-1)*SizeOf(TUCA_PropWeights))); end; function TUCA_PropItemRec.GetSelfOnlySize() : Cardinal; begin Result := SizeOf(TUCA_PropItemRec); if (WeightLength > 0) then begin Result := Result + (WeightLength * Sizeof(TUCA_PropWeights)); if IsWeightCompress_1() then Result := Result - 1; if IsWeightCompress_2() then Result := Result - 1; end; if HasCodePoint() then Result := Result + SizeOf(UInt24); if Contextual then Result := Result + Cardinal(GetContext()^.Size); end; procedure TUCA_PropItemRec.SetContextual(AValue : Boolean); begin SetBit(Flags,FLAG_CONTEXTUAL,AValue); end; function TUCA_PropItemRec.GetContextual : Boolean; begin Result := IsBitON(Flags,FLAG_CONTEXTUAL); end; function TUCA_PropItemRec.GetContext() : PUCA_PropItemContextTreeRec; var p : PtrUInt; begin if not Contextual then exit(nil); p := PtrUInt(@Self) + SizeOf(TUCA_PropItemRec); if IsBitON(Flags,FLAG_CODEPOINT) then p := p + SizeOf(UInt24); Result := PUCA_PropItemContextTreeRec(p); end; procedure TUCA_PropItemRec.SetDeleted(AValue: Boolean); begin SetBit(Flags,FLAG_DELETION,AValue); end; function TUCA_PropItemRec.IsDeleted: Boolean; begin Result := IsBitON(Flags,FLAG_DELETION); end; function TUCA_PropItemRec.IsValid() : Boolean; begin Result := IsBitON(Flags,FLAG_VALID); end; function TUCA_PropItemRec.IsWeightCompress_1 : Boolean; begin Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_1); end; function TUCA_PropItemRec.IsWeightCompress_2 : Boolean; begin Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_2); end; function TUCA_PropItemRec.GetCodePoint: UInt24; begin if HasCodePoint() then begin if Contextual then Result := PUInt24( PtrUInt(@Self) + Self.GetSelfOnlySize()- SizeOf(UInt24) - Cardinal(GetContext()^.Size) )^ else Result := PUInt24(PtrUInt(@Self) + Self.GetSelfOnlySize() - SizeOf(UInt24))^ end else begin raise Exception.Create('TUCA_PropItemRec.GetCodePoint : "No code point available."'); end end; function avl_CompareCodePoints(Item1, Item2: Pointer): Integer; var a, b : PUCA_LineContextItemRec; i, hb : Integer; begin if (Item1 = Item2) then exit(0); if (Item1 = nil) then exit(-1); if (Item2 = nil) then exit(1); a := Item1; b := Item2; if (a^.CodePoints = b^.CodePoints) then exit(0); Result := 1; hb := Length(b^.CodePoints) - 1; for i := 0 to Length(a^.CodePoints) - 1 do begin if (i > hb) then exit; if (a^.CodePoints[i] < b^.CodePoints[i]) then exit(-1); if (a^.CodePoints[i] > b^.CodePoints[i]) then exit(1); end; if (Length(a^.CodePoints) = Length(b^.CodePoints)) then exit(0); exit(-1); end; function ConstructAvlContextTree(AContext : PUCA_LineContextRec) : TAVLTree; var r : TAVLTree; i : Integer; begin r := TAVLTree.Create(@avl_CompareCodePoints); try for i := 0 to Length(AContext^.Data) - 1 do r.Add(@AContext^.Data[i]); Result := r; except FreeAndNil(r); raise; end; end; function ConstructContextTree( const AContext : PUCA_LineContextRec; var ADestBuffer; const ADestBufferLength : Cardinal ) : PUCA_PropItemContextTreeRec; function CalcItemOnlySize(AItem : TAVLTreeNode) : Cardinal; var kitem : PUCA_LineContextItemRec; begin if (AItem = nil) then exit(0); kitem := AItem.Data; Result := SizeOf(PUCA_PropItemContextTreeNodeRec^.Left) + SizeOf(PUCA_PropItemContextTreeNodeRec^.Right) + SizeOf(PUCA_PropItemContextRec^.CodePointCount) + (Length(kitem^.CodePoints)*SizeOf(UInt24)) + SizeOf(PUCA_PropItemContextRec^.WeightCount) + (Length(kitem^.Weights)*SizeOf(TUCA_PropWeights)); end; function CalcItemSize(AItem : TAVLTreeNode) : Cardinal; begin if (AItem = nil) then exit(0); Result := CalcItemOnlySize(AItem); if (AItem.Left <> nil) then Result := Result + CalcItemSize(AItem.Left); if (AItem.Right <> nil) then Result := Result + CalcItemSize(AItem.Right); end; function CalcSize(AData : TAVLTree) : Cardinal; begin Result := SizeOf(PUCA_PropItemContextTreeRec^.Size) + CalcItemSize(AData.Root); end; function ConstructItem(ASource : TAVLTreeNode; ADest : PUCA_PropItemContextTreeNodeRec) : Cardinal; var k : Integer; kitem : PUCA_LineContextItemRec; kpcp : PUInt24; kpw : PUCA_PropWeights; pextra : PtrUInt; pnext : PUCA_PropItemContextTreeNodeRec; begin kitem := ASource.Data; ADest^.Data.CodePointCount := Length(kitem^.CodePoints); ADest^.Data.WeightCount := Length(kitem^.Weights); pextra := PtrUInt(ADest)+SizeOf(ADest^.Left)+SizeOf(ADest^.Right)+ SizeOf(ADest^.Data.CodePointCount)+SizeOf(ADest^.Data.WeightCount); if (ADest^.Data.CodePointCount > 0) then begin kpcp := PUInt24(pextra); for k := 0 to ADest^.Data.CodePointCount - 1 do begin kpcp^ := kitem^.CodePoints[k]; Inc(kpcp); end; end; if (ADest^.Data.WeightCount > 0) then begin kpw := PUCA_PropWeights(pextra + (ADest^.Data.CodePointCount*SizeOf(UInt24))); for k := 0 to ADest^.Data.WeightCount - 1 do begin kpw^.Weights[0] := kitem^.Weights[k].Weights[0]; kpw^.Weights[1] := kitem^.Weights[k].Weights[1]; kpw^.Weights[2] := kitem^.Weights[k].Weights[2]; Inc(kpw); end; end; Result := CalcItemOnlySize(ASource); if (ASource.Left <> nil) then begin pnext := PUCA_PropItemContextTreeNodeRec(PtrUInt(ADest) + Result); ADest^.Left := Result; Result := Result + ConstructItem(ASource.Left,pnext); end else begin ADest^.Left := 0; end; if (ASource.Right <> nil) then begin pnext := PUCA_PropItemContextTreeNodeRec(PtrUInt(ADest) + Result); ADest^.Right := Result; Result := Result + ConstructItem(ASource.Right,pnext); end else begin ADest^.Right := 0; end; end; var c : PtrUInt; r : PUCA_PropItemContextTreeRec; p : PUCA_PropItemContextTreeNodeRec; tempTree : TAVLTree; begin tempTree := ConstructAvlContextTree(AContext); try c := CalcSize(tempTree); if (ADestBufferLength > 0) and (c > ADestBufferLength) then raise Exception.Create(SInsufficientMemoryBuffer); r := @ADestBuffer; r^.Size := c; p := PUCA_PropItemContextTreeNodeRec(PtrUInt(r) + SizeOf(r^.Size)); ConstructItem(tempTree.Root,p); finally tempTree.Free(); end; Result := r; end; procedure ReverseRecordBytes(var AItem : TSerializedCollationHeader); begin ReverseBytes(AItem.BMP_Table1Length,SizeOf(AItem.BMP_Table1Length)); ReverseBytes(AItem.BMP_Table2Length,SizeOf(AItem.BMP_Table2Length)); ReverseBytes(AItem.OBMP_Table1Length,SizeOf(AItem.OBMP_Table1Length)); ReverseBytes(AItem.OBMP_Table2Length,SizeOf(AItem.OBMP_Table2Length)); ReverseBytes(AItem.PropCount,SizeOf(AItem.PropCount)); ReverseBytes(AItem.VariableLowLimit,SizeOf(AItem.VariableLowLimit)); ReverseBytes(AItem.VariableHighLimit,SizeOf(AItem.VariableHighLimit)); end; procedure ReverseBytes(var AData; const ALength : Integer); var i,j : PtrInt; c : Byte; p : PByte; begin if (ALength = 1) then exit; p := @AData; j := ALength div 2; for i := 0 to Pred(j) do begin c := p[i]; p[i] := p[(ALength - 1 ) - i]; p[(ALength - 1 ) - i] := c; end; end; procedure ReverseArray(var AValue; const AArrayLength, AItemSize : PtrInt); var p : PByte; i : PtrInt; begin if ( AArrayLength > 0 ) and ( AItemSize > 1 ) then begin p := @AValue; for i := 0 to Pred(AArrayLength) do begin ReverseBytes(p^,AItemSize); Inc(p,AItemSize); end; end; end; procedure ReverseContextNodeFromNativeEndian(s, d : PUCA_PropItemContextTreeNodeRec); var k : PtrUInt; p_s, p_d : PByte; begin d^.Left := s^.Left; ReverseBytes(d^.Left,SizeOf(d^.Left)); d^.Right := s^.Right; ReverseBytes(d^.Right,SizeOf(d^.Right)); d^.Data.CodePointCount := s^.Data.CodePointCount; ReverseBytes(d^.Data.CodePointCount,SizeOf(d^.Data.CodePointCount)); d^.Data.WeightCount := s^.Data.WeightCount; ReverseBytes(d^.Data.WeightCount,SizeOf(d^.Data.WeightCount)); k := SizeOf(TUCA_PropItemContextTreeNodeRec); p_s := PByte(PtrUInt(s) + k); p_d := PByte(PtrUInt(d) + k); k := (s^.Data.CodePointCount*SizeOf(UInt24)); Move(p_s^,p_d^, k); ReverseArray(p_d^,s^.Data.CodePointCount,SizeOf(UInt24)); p_s := PByte(PtrUInt(p_s) + k); p_d := PByte(PtrUInt(p_d) + k); k := (s^.Data.WeightCount*SizeOf(TUCA_PropWeights)); Move(p_s^,p_d^,k); ReverseArray(p_d^,(s^.Data.WeightCount*Length(TUCA_PropWeights.Weights)),SizeOf(TUCA_PropWeights.Weights[0])); if (s^.Left > 0) then ReverseContextNodeFromNativeEndian( PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + s^.Left), PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + s^.Left) ); if (s^.Right > 0) then ReverseContextNodeFromNativeEndian( PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + s^.Right), PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + s^.Right) ); end; procedure ReverseContextFromNativeEndian(s, d : PUCA_PropItemContextTreeRec); var k : PtrUInt; begin d^.Size := s^.Size; ReverseBytes(d^.Size,SizeOf(d^.Size)); if (s^.Size = 0) then exit; k := SizeOf(s^.Size); ReverseContextNodeFromNativeEndian( PUCA_PropItemContextTreeNodeRec(PtrUInt(s)+k), PUCA_PropItemContextTreeNodeRec(PtrUInt(d)+k) ); end; procedure ReverseFromNativeEndian( const AData : PUCA_PropItemRec; const ADataLen : Cardinal; const ADest : PUCA_PropItemRec ); var s, d : PUCA_PropItemRec; sCtx, dCtx : PUCA_PropItemContextTreeRec; dataEnd : PtrUInt; k, i : PtrUInt; p_s, p_d : PByte; pw_s, pw_d : PUCA_PropWeights; begin dataEnd := PtrUInt(AData) + ADataLen; s := AData; d := ADest; while True do begin d^.WeightLength := s^.WeightLength; ReverseBytes(d^.WeightLength,SizeOf(d^.WeightLength)); d^.ChildCount := s^.ChildCount; ReverseBytes(d^.ChildCount,SizeOf(d^.ChildCount)); d^.Size := s^.Size; ReverseBytes(d^.Size,SizeOf(d^.Size)); d^.Flags := s^.Flags; ReverseBytes(d^.Flags,SizeOf(d^.Flags)); if s^.Contextual then begin k := SizeOf(TUCA_PropItemRec); if s^.HasCodePoint() then k := k + SizeOf(UInt24); sCtx := PUCA_PropItemContextTreeRec(PtrUInt(s) + k); dCtx := PUCA_PropItemContextTreeRec(PtrUInt(d) + k); ReverseContextFromNativeEndian(sCtx,dCtx); end; if s^.HasCodePoint() then begin if s^.Contextual then k := s^.GetSelfOnlySize()- SizeOf(UInt24) - Cardinal(s^.GetContext()^.Size) else k := s^.GetSelfOnlySize() - SizeOf(UInt24); p_s := PByte(PtrUInt(s) + k); p_d := PByte(PtrUInt(d) + k); Unaligned(PUInt24(p_d)^) := Unaligned(PUInt24(p_s)^); ReverseBytes(p_d^,SizeOf(UInt24)); end; if (s^.WeightLength > 0) then begin k := SizeOf(TUCA_PropItemRec); p_s := PByte(PtrUInt(s) + k); p_d := PByte(PtrUInt(d) + k); k := SizeOf(Word); Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^); ReverseBytes(Unaligned(p_d^),k); p_s := PByte(PtrUInt(p_s) + k); p_d := PByte(PtrUInt(p_d) + k); if s^.IsWeightCompress_1() then begin k := SizeOf(Byte); PByte(p_d)^ := PByte(p_s)^; end else begin k := SizeOf(Word); Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^); end; ReverseBytes(p_d^,k); p_s := PByte(PtrUInt(p_s) + k); p_d := PByte(PtrUInt(p_d) + k); if s^.IsWeightCompress_2() then begin k := SizeOf(Byte); PByte(p_d)^ := PByte(p_s)^; end else begin k := SizeOf(Word); Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^); end; ReverseBytes(p_d^,k); if (s^.WeightLength > 1) then begin pw_s := PUCA_PropWeights(PtrUInt(p_s) + k); pw_d := PUCA_PropWeights(PtrUInt(p_d) + k); for i := 1 to s^.WeightLength - 1 do begin pw_d^.Weights[0] := pw_s^.Weights[0]; pw_d^.Weights[1] := pw_s^.Weights[1]; pw_d^.Weights[2] := pw_s^.Weights[2]; ReverseArray(pw_d^,3,SizeOf(pw_s^.Weights[0])); Inc(pw_s); Inc(pw_d); end; end; end; k := s^.GetSelfOnlySize(); s := PUCA_PropItemRec(PtrUInt(s)+k); d := PUCA_PropItemRec(PtrUInt(d)+k); if (PtrUInt(s) >= dataEnd) then Break; end; if ( (PtrUInt(s)-PtrUInt(AData)) <> (PtrUInt(d)-PtrUInt(ADest)) ) then raise Exception.CreateFmt('Read data length(%d) differs from written data length(%d).',[(PtrUInt(s)-PtrUInt(AData)), (PtrUInt(d)-PtrUInt(ADest))]); end; //------------------------------------------------------------------------------ procedure ReverseContextNodeToNativeEndian(s, d : PUCA_PropItemContextTreeNodeRec); var k : PtrUInt; p_s, p_d : PByte; begin d^.Left := s^.Left; ReverseBytes(d^.Left,SizeOf(d^.Left)); d^.Right := s^.Right; ReverseBytes(d^.Right,SizeOf(d^.Right)); d^.Data.CodePointCount := s^.Data.CodePointCount; ReverseBytes(d^.Data.CodePointCount,SizeOf(d^.Data.CodePointCount)); d^.Data.WeightCount := s^.Data.WeightCount; ReverseBytes(d^.Data.WeightCount,SizeOf(d^.Data.WeightCount)); k := SizeOf(TUCA_PropItemContextTreeNodeRec); p_s := PByte(PtrUInt(s) + k); p_d := PByte(PtrUInt(d) + k); k := (d^.Data.CodePointCount*SizeOf(UInt24)); Move(p_s^,p_d^, k); ReverseArray(p_d^,d^.Data.CodePointCount,SizeOf(UInt24)); p_s := PByte(PtrUInt(p_s) + k); p_d := PByte(PtrUInt(p_d) + k); k := (d^.Data.WeightCount*SizeOf(TUCA_PropWeights)); Move(p_s^,p_d^,k); ReverseArray(p_d^,(d^.Data.WeightCount*Length(TUCA_PropWeights.Weights)),SizeOf(TUCA_PropWeights.Weights[0])); if (d^.Left > 0) then ReverseContextNodeToNativeEndian( PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + d^.Left), PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + d^.Left) ); if (d^.Right > 0) then ReverseContextNodeToNativeEndian( PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + d^.Right), PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + d^.Right) ); end; procedure ReverseContextToNativeEndian(s, d : PUCA_PropItemContextTreeRec); var k : PtrUInt; begin d^.Size := s^.Size; ReverseBytes(d^.Size,SizeOf(d^.Size)); if (s^.Size = 0) then exit; k := SizeOf(s^.Size); ReverseContextNodeToNativeEndian( PUCA_PropItemContextTreeNodeRec(PtrUInt(s)+k), PUCA_PropItemContextTreeNodeRec(PtrUInt(d)+k) ); end; procedure ReverseToNativeEndian( const AData : PUCA_PropItemRec; const ADataLen : Cardinal; const ADest : PUCA_PropItemRec ); var s, d : PUCA_PropItemRec; sCtx, dCtx : PUCA_PropItemContextTreeRec; dataEnd : PtrUInt; k, i : PtrUInt; p_s, p_d : PByte; pw_s, pw_d : PUCA_PropWeights; begin dataEnd := PtrUInt(AData) + ADataLen; s := AData; d := ADest; while True do begin d^.WeightLength := s^.WeightLength; ReverseBytes(d^.WeightLength,SizeOf(d^.WeightLength)); d^.ChildCount := s^.ChildCount; ReverseBytes(d^.ChildCount,SizeOf(d^.ChildCount)); d^.Size := s^.Size; ReverseBytes(d^.Size,SizeOf(d^.Size)); d^.Flags := s^.Flags; ReverseBytes(d^.Flags,SizeOf(d^.Flags)); if d^.Contextual then begin k := SizeOf(TUCA_PropItemRec); if d^.HasCodePoint() then k := k + SizeOf(UInt24); sCtx := PUCA_PropItemContextTreeRec(PtrUInt(s) + k); dCtx := PUCA_PropItemContextTreeRec(PtrUInt(d) + k); ReverseContextToNativeEndian(sCtx,dCtx); end; if d^.HasCodePoint() then begin if d^.Contextual then k := d^.GetSelfOnlySize()- SizeOf(UInt24) - Cardinal(d^.GetContext()^.Size) else k := d^.GetSelfOnlySize() - SizeOf(UInt24); p_s := PByte(PtrUInt(s) + k); p_d := PByte(PtrUInt(d) + k); Unaligned(PUInt24(p_d)^) := Unaligned(PUInt24(p_s)^); ReverseBytes(p_d^,SizeOf(UInt24)); end; if (d^.WeightLength > 0) then begin k := SizeOf(TUCA_PropItemRec); p_s := PByte(PtrUInt(s) + k); p_d := PByte(PtrUInt(d) + k); k := SizeOf(Word); Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^); ReverseBytes(p_d^,k); p_s := PByte(PtrUInt(p_s) + k); p_d := PByte(PtrUInt(p_d) + k); if d^.IsWeightCompress_1() then begin k := SizeOf(Byte); PByte(p_d)^ := PByte(p_s)^; end else begin k := SizeOf(Word); Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^); end; ReverseBytes(p_d^,k); p_s := PByte(PtrUInt(p_s) + k); p_d := PByte(PtrUInt(p_d) + k); if d^.IsWeightCompress_2() then begin k := SizeOf(Byte); PByte(p_d)^ := PByte(p_s)^; end else begin k := SizeOf(Word); Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^); end; ReverseBytes(p_d^,k); if (d^.WeightLength > 1) then begin pw_s := PUCA_PropWeights(PtrUInt(p_s) + k); pw_d := PUCA_PropWeights(PtrUInt(p_d) + k); for i := 1 to d^.WeightLength - 1 do begin pw_d^.Weights[0] := pw_s^.Weights[0]; pw_d^.Weights[1] := pw_s^.Weights[1]; pw_d^.Weights[2] := pw_s^.Weights[2]; ReverseArray(pw_d^,3,SizeOf(pw_s^.Weights[0])); Inc(pw_s); Inc(pw_d); end; end; end; k := d^.GetSelfOnlySize(); s := PUCA_PropItemRec(PtrUInt(s)+k); d := PUCA_PropItemRec(PtrUInt(d)+k); if (PtrUInt(s) >= dataEnd) then Break; end; if ( (PtrUInt(s)-PtrUInt(AData)) <> (PtrUInt(d)-PtrUInt(ADest)) ) then raise Exception.CreateFmt('Read data length(%d) differs from written data length(%d).',[(PtrUInt(s)-PtrUInt(AData)), (PtrUInt(d)-PtrUInt(ADest))]); end; procedure Check(const ACondition : Boolean; const AMsg : string);overload; begin if not ACondition then raise Exception.Create(AMsg); end; procedure Check( const ACondition : Boolean; const AFormatMsg : string; const AArgs : array of const );overload; begin Check(ACondition,Format(AFormatMsg,AArgs)); end; procedure Check(const ACondition : Boolean);overload; begin Check(ACondition,'Check failed.') end; procedure CompareWeights(a, b : PUCA_PropWeights; const ALength : Integer); var i : Integer; begin if (ALength > 0) then begin for i := 0 to ALength - 1 do begin Check(a[i].Weights[0]=b[i].Weights[0]); Check(a[i].Weights[1]=b[i].Weights[1]); Check(a[i].Weights[2]=b[i].Weights[2]); end; end; end; procedure CompareCodePoints(a, b : PUInt24; const ALength : Integer); var i : Integer; begin if (ALength > 0) then begin for i := 0 to ALength - 1 do Check(a[i]=b[i]); end; end; procedure CompareContextNode(AProp1, AProp2 : PUCA_PropItemContextTreeNodeRec); var a, b : PUCA_PropItemContextTreeNodeRec; k : Cardinal; begin if (AProp1=nil) then begin Check(AProp2=nil); exit; end; a := AProp1; b := AProp2; Check(a^.Left=b^.Left); Check(a^.Right=b^.Right); Check(a^.Data.CodePointCount=b^.Data.CodePointCount); Check(a^.Data.WeightCount=b^.Data.WeightCount); k := SizeOf(a^.Data); CompareCodePoints( PUInt24(PtrUInt(a)+k), PUInt24(PtrUInt(b)+k), a^.Data.CodePointCount ); k := SizeOf(a^.Data)+ (a^.Data.CodePointCount*SizeOf(UInt24)); CompareWeights( PUCA_PropWeights(PtrUInt(a)+k), PUCA_PropWeights(PtrUInt(b)+k), a^.Data.WeightCount ); if (a^.Left > 0) then begin k := a^.Left; CompareContextNode( PUCA_PropItemContextTreeNodeRec(PtrUInt(a)+k), PUCA_PropItemContextTreeNodeRec(PtrUInt(b)+k) ); end; if (a^.Right > 0) then begin k := a^.Right; CompareContextNode( PUCA_PropItemContextTreeNodeRec(PtrUInt(a)+k), PUCA_PropItemContextTreeNodeRec(PtrUInt(b)+k) ); end; end; procedure CompareContext(AProp1, AProp2 : PUCA_PropItemContextTreeRec); var a, b : PUCA_PropItemContextTreeNodeRec; k : Integer; begin if (AProp1=nil) then begin Check(AProp2=nil); exit; end; Check(AProp1^.Size=AProp2^.Size); k := Cardinal(AProp1^.Size); a := PUCA_PropItemContextTreeNodeRec(PtrUInt(AProp1)+k); b := PUCA_PropItemContextTreeNodeRec(PtrUInt(AProp2)+k); CompareContextNode(a,b); end; procedure CompareProps(const AProp1, AProp2 : PUCA_PropItemRec; const ADataLen : Integer); var a, b, pend : PUCA_PropItemRec; wa, wb : array of TUCA_PropWeights; k : Integer; begin if (ADataLen <= 0) then exit; a := PUCA_PropItemRec(AProp1); b := PUCA_PropItemRec(AProp2); pend := PUCA_PropItemRec(PtrUInt(AProp1)+ADataLen); while (a 0) then begin k := a^.WeightLength; SetLength(wa,k); SetLength(wb,k); a^.GetWeightArray(@wa[0]); b^.GetWeightArray(@wb[0]); CompareWeights(@wa[0],@wb[0],k); end; if a^.Contextual then CompareContext(a^.GetContext(),b^.GetContext()); Check(a^.GetSelfOnlySize()=b^.GetSelfOnlySize()); k := a^.GetSelfOnlySize(); a := PUCA_PropItemRec(PtrUInt(a)+k); b := PUCA_PropItemRec(PtrUInt(b)+k); end; end; Procedure QuickSort(AList : PCardinal; L, R : Longint);overload; var I, J : Longint; P, Q : Cardinal; begin repeat I := L; J := R; P := AList[ (L + R) div 2 ]; repeat while (P > AList[i]) do I := I + 1; while (P < AList[J]) do J := J - 1; If I <= J then begin Q := AList[I]; AList[I] := AList[J]; AList[J] := Q; I := I + 1; J := J - 1; end; until I > J; if J - L < R - I then begin if L < J then QuickSort(AList, L, J); L := I; end else begin if I < R then QuickSort(AList, I, R); R := J; end; until L >= R; end; function CalcMaxLevel2Count( const ALevel1Value : Cardinal; ALines : array of TUCA_LineRec ) : Integer; var i, c, k : Integer; ac : Integer; items : array of Cardinal; p : PUCA_LineRec; pw : ^TUCA_WeightRec; begin c := Length(ALines); if (c < 1) then exit(0); SetLength(items,0); ac := 0; p := @ALines[Low(ALines)]; for i := 0 to c-1 do begin if (Length(p^.Weights) > 0) then begin pw := @p^.Weights[Low(p^.Weights)]; for k := 0 to Length(p^.Weights)-1 do begin if (pw^.Weights[0] = ALevel1Value) then begin if (ac = 0) or (IndexDWord(items[0],ac,pw^.Weights[1]) < 0) then begin if (ac >= Length(items)) then SetLength(items,Length(items)+256); items[ac] := pw^.Weights[1]; ac := ac+1; end; end; Inc(pw); end; end; Inc(p); end; Result := ac; end; function RewriteLevel2( const ALevel1Value : Cardinal; ALines : PUCA_LineRec; const ALinesLength : Integer ) : Integer; var i, c, k : Integer; ac : Integer; items : array of Cardinal; p : PUCA_LineRec; pw : ^TUCA_WeightRec; newValue : Int64; begin c := ALinesLength; if (c < 1) then exit(0); SetLength(items,256); ac := 0; p := ALines; for i := 0 to c-1 do begin if (Length(p^.Weights) > 0) then begin for k := 0 to Length(p^.Weights)-1 do begin pw := @p^.Weights[k]; if (pw^.Weights[0] = ALevel1Value) then begin if (ac = 0) or (IndexDWord(items[0],ac,pw^.Weights[1]) < 0) then begin if (ac >= Length(items)) then SetLength(items,Length(items)+256); items[ac] := pw^.Weights[1]; ac := ac+1; end; end; end; end; Inc(p); end; SetLength(items,ac); if (ac > 1) then QuickSort(@items[0],0,(ac-1)); p := ALines; for i := 0 to c-1 do begin if (Length(p^.Weights) > 0) then begin for k := 0 to Length(p^.Weights)-1 do begin pw := @p^.Weights[k]; if (pw^.Weights[0] = ALevel1Value) then begin newValue := IndexDWord(items[0],ac,pw^.Weights[1]); if (newValue < 0) then raise Exception.CreateFmt('level 2 value %d missed in rewrite of level 1 value of %d.',[pw^.Weights[1],ALevel1Value]); pw^.Weights[1] := newValue;//+1; end; end; end; Inc(p); end; if (Length(items) > 0) then Result := items[Length(items)-1] else Result := 0; end; procedure RewriteLevel2Values(ALines : PUCA_LineRec; ALength : Integer); var c, i, ac, k : Integer; p : PUCA_LineRec; level1List : array of Cardinal; pw : ^TUCA_WeightRec; begin c := ALength; if (c < 1) then exit; ac := 0; SetLength(level1List,c); p := ALines; for i := 0 to c-1 do begin if (Length(p^.Weights) > 0) then begin for k := 0 to Length(p^.Weights)-1 do begin pw := @p^.Weights[k]; if (ac = 0) or (IndexDWord(level1List[0],ac,pw^.Weights[0]) < 0) then begin if (ac >= Length(level1List)) then SetLength(level1List,ac+1000); level1List[ac] := pw^.Weights[0]; RewriteLevel2(level1List[ac],ALines,ALength); ac := ac+1; end; end; end; Inc(p); end; end; function CalcMaxLevel2Value(ALines : array of TUCA_LineRec) : Cardinal; var i, c, k, tempValue : Integer; p : PUCA_LineRec; maxLevel : Cardinal; maxValue : Integer; begin c := Length(ALines); if (c < 2) then exit(0); maxLevel := 0; maxValue := CalcMaxLevel2Count(maxLevel,ALines); p := @ALines[Low(ALines)+1]; for i := 1 to c-1 do begin if (Length(p^.Weights) > 0) then begin for k := 0 to Length(p^.Weights)-1 do begin if (p^.Weights[k].Weights[0] <> maxLevel) then begin tempValue := CalcMaxLevel2Count(p^.Weights[k].Weights[0],ALines); if (tempValue > maxValue) then begin maxLevel := p^.Weights[k].Weights[0]; maxValue := tempValue; end; end; end; end; Inc(p); end; Result := maxValue; end; initialization FS := DefaultFormatSettings; FS.DecimalSeparator := '.'; end.