mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-12 18:10:29 +02:00
5022 lines
143 KiB
ObjectPascal
5022 lines
143 KiB
ObjectPascal
{ Unicode parser helper unit.
|
|
|
|
Copyright (c) 2012-2015 by Inoussa OUEDRAOGO
|
|
|
|
The source code is distributed under the Library GNU
|
|
General Public License with the following modification:
|
|
|
|
- object files and libraries linked into an application may be
|
|
distributed without source code.
|
|
|
|
If you didn't receive a copy of the file COPYING, contact:
|
|
Free Software Foundation
|
|
675 Mass Ave
|
|
Cambridge, MA 02139
|
|
USA
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. }
|
|
unit helper;
|
|
|
|
{$mode delphi}
|
|
{$H+}
|
|
{$PACKENUM 1}
|
|
{$pointermath on}
|
|
{$typedaddress on}
|
|
{$warn 4056 off} //Conversion between ordinals and pointers is not portable
|
|
|
|
{$macro on}
|
|
{$ifdef FPC_REQUIRES_PROPER_ALIGNMENT}
|
|
{$define X_PACKED:=}
|
|
{$else FPC_REQUIRES_PROPER_ALIGNMENT}
|
|
{$define X_PACKED:=packed}
|
|
{$endif FPC_REQUIRES_PROPER_ALIGNMENT}
|
|
|
|
interface
|
|
|
|
uses
|
|
Classes, SysUtils, StrUtils;
|
|
|
|
const
|
|
SLicenseText =
|
|
' { Unicode implementation tables. ' + sLineBreak +
|
|
' ' + sLineBreak +
|
|
' Copyright (c) 2013 - 2017 by Inoussa OUEDRAOGO ' + sLineBreak +
|
|
' ' + sLineBreak +
|
|
' Permission is hereby granted, free of charge, to any person ' + sLineBreak +
|
|
' obtaining a copy of the Unicode data files and any associated ' + sLineBreak +
|
|
' documentation (the "Data Files") or Unicode software and any ' + sLineBreak +
|
|
' associated documentation (the "Software") to deal in the Data ' + sLineBreak +
|
|
' Files or Software without restriction, including without ' + sLineBreak +
|
|
' limitation the rights to use, copy, modify, merge, publish, ' + sLineBreak +
|
|
' distribute, and/or sell copies of the Data Files or Software, ' + sLineBreak +
|
|
' and to permit persons to whom the Data Files or Software are ' + sLineBreak +
|
|
' furnished to do so, provided that (a) the above copyright ' + sLineBreak +
|
|
' notice(s) and this permission notice appear with all copies ' + sLineBreak +
|
|
' of the Data Files or Software, (b) both the above copyright ' + sLineBreak +
|
|
' notice(s) and this permission notice appear in associated ' + sLineBreak +
|
|
' documentation, and (c) there is clear notice in each modified ' + sLineBreak +
|
|
' Data File or in the Software as well as in the documentation ' + sLineBreak +
|
|
' associated with the Data File(s) or Software that the data or ' + sLineBreak +
|
|
' software has been modified. ' + sLineBreak +
|
|
' ' + sLineBreak +
|
|
' ' + sLineBreak +
|
|
' This program is distributed in the hope that it will be useful, ' + sLineBreak +
|
|
' but WITHOUT ANY WARRANTY; without even the implied warranty of ' + sLineBreak +
|
|
' MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. }';
|
|
|
|
WEIGHT_LEVEL_COUNT = 3;
|
|
|
|
type
|
|
// Unicode General Category
|
|
TUnicodeCategory = (
|
|
ucUppercaseLetter, // Lu = Letter, uppercase
|
|
ucLowercaseLetter, // Ll = Letter, lowercase
|
|
ucTitlecaseLetter, // Lt = Letter, titlecase
|
|
ucModifierLetter, // Lm = Letter, modifier
|
|
ucOtherLetter, // Lo = Letter, other
|
|
|
|
ucNonSpacingMark, // Mn = Mark, nonspacing
|
|
ucCombiningMark, // Mc = Mark, spacing combining
|
|
ucEnclosingMark, // Me = Mark, enclosing
|
|
|
|
ucDecimalNumber, // Nd = Number, decimal digit
|
|
ucLetterNumber, // Nl = Number, letter
|
|
ucOtherNumber, // No = Number, other
|
|
|
|
ucConnectPunctuation, // Pc = Punctuation, connector
|
|
ucDashPunctuation, // Pd = Punctuation, dash
|
|
ucOpenPunctuation, // Ps = Punctuation, open
|
|
ucClosePunctuation, // Pe = Punctuation, close
|
|
ucInitialPunctuation, // Pi = Punctuation, initial quote (may behave like Ps or Pe depending on usage)
|
|
ucFinalPunctuation, // Pf = Punctuation, final quote (may behave like Ps or Pe depending on usage)
|
|
ucOtherPunctuation, // Po = Punctuation, other
|
|
|
|
ucMathSymbol, // Sm = Symbol, math
|
|
ucCurrencySymbol, // Sc = Symbol, currency
|
|
ucModifierSymbol, // Sk = Symbol, modifier
|
|
ucOtherSymbol, // So = Symbol, other
|
|
|
|
ucSpaceSeparator, // Zs = Separator, space
|
|
ucLineSeparator, // Zl = Separator, line
|
|
ucParagraphSeparator, // Zp = Separator, paragraph
|
|
|
|
ucControl, // Cc = Other, control
|
|
ucFormat, // Cf = Other, format
|
|
ucSurrogate, // Cs = Other, surrogate
|
|
ucPrivateUse, // Co = Other, private use
|
|
ucUnassigned // Cn = Other, not assigned (including noncharacters)
|
|
);
|
|
|
|
|
|
TUInt24Rec = packed record
|
|
public
|
|
{$ifdef FPC_LITTLE_ENDIAN}
|
|
byte0, byte1, byte2 : Byte;
|
|
{$else FPC_LITTLE_ENDIAN}
|
|
byte2, byte1, byte0 : Byte;
|
|
{$endif FPC_LITTLE_ENDIAN}
|
|
public
|
|
class operator Implicit(a : TUInt24Rec) : Cardinal;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
class operator Implicit(a : TUInt24Rec) : LongInt;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
class operator Implicit(a : TUInt24Rec) : Word;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
class operator Implicit(a : TUInt24Rec) : Byte;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
class operator Implicit(a : Cardinal) : TUInt24Rec;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
|
|
class operator Explicit(a : TUInt24Rec) : Cardinal;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
|
|
class operator Equal(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
|
|
class operator Equal(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
class operator Equal(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
|
|
class operator Equal(a : TUInt24Rec; b : LongInt): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
class operator Equal(a : LongInt; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
|
|
class operator Equal(a : TUInt24Rec; b : Word): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
class operator Equal(a : Word; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
|
|
class operator Equal(a : TUInt24Rec; b : Byte): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
class operator Equal(a : Byte; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
|
|
class operator NotEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
class operator NotEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
class operator NotEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
class operator GreaterThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
class operator GreaterThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
class operator GreaterThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
class operator GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
class operator GreaterThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
class operator GreaterThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
class operator LessThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
class operator LessThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
class operator LessThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
class operator LessThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
class operator LessThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
class operator LessThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
end;
|
|
|
|
UInt24 = TUInt24Rec;
|
|
PUInt24 = ^UInt24;
|
|
TUnicodeCodePoint = Cardinal;
|
|
TUnicodeCodePointArray = array of TUnicodeCodePoint;
|
|
TDecompositionArray = array of TUnicodeCodePointArray;
|
|
TNumericValue = Double;
|
|
TNumericValueArray = array of TNumericValue;
|
|
|
|
TBlockItemRec = packed record
|
|
RangeStart : TUnicodeCodePoint;
|
|
RangeEnd : TUnicodeCodePoint;
|
|
Name : string[120];
|
|
CanonicalName : string[120];
|
|
end;
|
|
TBlocks = array of TBlockItemRec;
|
|
|
|
PPropRec = ^TPropRec;
|
|
|
|
{ TPropRec }
|
|
|
|
TPropRec = packed record
|
|
private
|
|
const FLAG_WHITE_SPACE = 0;
|
|
const FLAG_HANGUL_SYLLABLE = 1;
|
|
const FLAG_UNIFIED_IDEOGRAPH = 2;
|
|
private
|
|
function GetCategory : TUnicodeCategory;inline;
|
|
procedure SetCategory(AValue : TUnicodeCategory);
|
|
function GetWhiteSpace : Boolean;inline;
|
|
procedure SetWhiteSpace(AValue : Boolean);
|
|
function GetHangulSyllable : Boolean;inline;
|
|
procedure SetHangulSyllable(AValue : Boolean);
|
|
function GetUnifiedIdeograph : Boolean;inline;
|
|
procedure SetUnifiedIdeograph(AValue : Boolean);
|
|
public
|
|
CategoryData : Byte;
|
|
|
|
PropID : Word;
|
|
CCC : Byte; // Canonical Combining Class
|
|
NumericIndex : Byte;
|
|
SimpleUpperCase : UInt24;
|
|
SimpleLowerCase : UInt24;
|
|
DecompositionID : SmallInt;
|
|
public
|
|
property Category : TUnicodeCategory read GetCategory write SetCategory;
|
|
property WhiteSpace : Boolean read GetWhiteSpace write SetWhiteSpace;
|
|
property HangulSyllable : Boolean read GetHangulSyllable write SetHangulSyllable;
|
|
property UnifiedIdeograph : Boolean read GetUnifiedIdeograph write SetUnifiedIdeograph;
|
|
end;
|
|
TPropRecArray = array of TPropRec;
|
|
|
|
TDecompositionIndexRec = packed record
|
|
StartPosition : Word;
|
|
Length : Byte;
|
|
end;
|
|
TDecompositionBook = X_PACKED record
|
|
Index : array of TDecompositionIndexRec;
|
|
CodePoints : array of TUnicodeCodePoint;
|
|
end;
|
|
|
|
PDataLineRec = ^TDataLineRec;
|
|
TDataLineRec = record
|
|
PropID : Integer;
|
|
case LineType : Byte of
|
|
0 : (CodePoint : TUnicodeCodePoint);
|
|
1 : (StartCodePoint, EndCodePoint : TUnicodeCodePoint);
|
|
end;
|
|
TDataLineRecArray = array of TDataLineRec;
|
|
|
|
TCodePointRec = record
|
|
case LineType : Byte of
|
|
0 : (CodePoint : TUnicodeCodePoint);
|
|
1 : (StartCodePoint, EndCodePoint : TUnicodeCodePoint);
|
|
end;
|
|
TCodePointRecArray = array of TCodePointRec;
|
|
|
|
TPropListLineRec = packed record
|
|
CodePoint : TCodePointRec;
|
|
PropName : string[123];
|
|
end;
|
|
TPropListLineRecArray = array of TPropListLineRec;
|
|
|
|
{ TUCA_WeightRec }
|
|
|
|
TUCA_WeightRec = packed record
|
|
public
|
|
Weights : array[0..3] of Cardinal;
|
|
Variable : Boolean;
|
|
public
|
|
class operator Equal(a, b: TUCA_WeightRec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
|
end;
|
|
TUCA_WeightRecArray = array of TUCA_WeightRec;
|
|
|
|
PUCA_LineContextItemRec = ^TUCA_LineContextItemRec;
|
|
TUCA_LineContextItemRec = X_PACKED record
|
|
public
|
|
CodePoints : TUnicodeCodePointArray;
|
|
Weights : TUCA_WeightRecArray;
|
|
public
|
|
procedure Clear();
|
|
procedure Assign(ASource : PUCA_LineContextItemRec);
|
|
function Clone() : TUCA_LineContextItemRec;
|
|
end;
|
|
|
|
PUCA_LineContextRec = ^TUCA_LineContextRec;
|
|
TUCA_LineContextRec = X_PACKED record
|
|
public
|
|
Data : array of TUCA_LineContextItemRec;
|
|
public
|
|
procedure Clear();
|
|
procedure Assign(ASource : PUCA_LineContextRec);
|
|
function Clone() : TUCA_LineContextRec;
|
|
end;
|
|
|
|
PUCA_LineRec = ^TUCA_LineRec;
|
|
TUCA_LineRec = X_PACKED record
|
|
public
|
|
CodePoints : TUnicodeCodePointArray;
|
|
Weights : TUCA_WeightRecArray;
|
|
Context : TUCA_LineContextRec;
|
|
//Variable : Boolean;
|
|
Deleted : Boolean;
|
|
Stored : Boolean;
|
|
public
|
|
procedure Clear();
|
|
procedure Assign(ASource : PUCA_LineRec);
|
|
function Clone() : TUCA_LineRec;
|
|
function HasContext() : Boolean;
|
|
end;
|
|
TUCA_VariableKind = (
|
|
ucaShifted, ucaNonIgnorable, ucaBlanked, ucaShiftedTrimmed,
|
|
ucaIgnoreSP
|
|
);
|
|
TUCA_DataBook = X_PACKED record
|
|
Version : string;
|
|
VariableWeight : TUCA_VariableKind;
|
|
Backwards : array[0..3] of Boolean;
|
|
Lines : array of TUCA_LineRec;
|
|
end;
|
|
PUCA_DataBook = ^TUCA_DataBook;
|
|
TUCA_DataBookIndex = array of Integer;
|
|
|
|
type
|
|
TUCA_PropWeights = packed record
|
|
Weights : array[0..2] of Word;
|
|
//Variable : Byte;
|
|
end;
|
|
PUCA_PropWeights = ^TUCA_PropWeights;
|
|
|
|
TUCA_PropItemContextRec = packed record
|
|
CodePointCount : Byte;
|
|
WeightCount : Byte;
|
|
//CodePoints : UInt24;
|
|
//Weights : TUCA_PropWeights;
|
|
end;
|
|
PUCA_PropItemContextRec = ^TUCA_PropItemContextRec;
|
|
TUCA_PropItemContextTreeNodeRec = packed record
|
|
Left : Word;
|
|
Right : Word;
|
|
Data : TUCA_PropItemContextRec;
|
|
end;
|
|
PUCA_PropItemContextTreeNodeRec = ^TUCA_PropItemContextTreeNodeRec;
|
|
|
|
TUCA_PropItemContextTreeRec = packed record
|
|
public
|
|
Size : UInt24;
|
|
public
|
|
function GetData:PUCA_PropItemContextTreeNodeRec;inline;
|
|
property Data : PUCA_PropItemContextTreeNodeRec read GetData;
|
|
end;
|
|
PUCA_PropItemContextTreeRec = ^TUCA_PropItemContextTreeRec;
|
|
|
|
{ TUCA_PropItemRec }
|
|
|
|
TUCA_PropItemRec = packed record
|
|
private
|
|
const FLAG_VALID = 0;
|
|
const FLAG_CODEPOINT = 1;
|
|
const FLAG_CONTEXTUAL = 2;
|
|
const FLAG_DELETION = 3;
|
|
const FLAG_COMPRESS_WEIGHT_1 = 6;
|
|
const FLAG_COMPRESS_WEIGHT_2 = 7;
|
|
private
|
|
function GetWeightSize : Word;inline;
|
|
public
|
|
WeightLength : Byte;
|
|
ChildCount : Byte;
|
|
Size : Word;
|
|
Flags : Byte;
|
|
public
|
|
function HasCodePoint() : Boolean;inline;
|
|
function GetCodePoint() : UInt24;//inline;
|
|
property CodePoint : UInt24 read GetCodePoint;
|
|
//Weights : array[0..WeightLength] of TUCA_PropWeights;
|
|
procedure GetWeightArray(ADest : PUCA_PropWeights);
|
|
function GetSelfOnlySize() : Cardinal;inline;
|
|
|
|
procedure SetContextual(AValue : Boolean);inline;
|
|
function GetContextual() : Boolean;inline;
|
|
property Contextual : Boolean read GetContextual write setContextual;
|
|
function GetContext() : PUCA_PropItemContextTreeRec;
|
|
procedure SetDeleted(AValue : Boolean);inline;
|
|
function IsDeleted() : Boolean;inline;
|
|
function IsValid() : Boolean;inline;
|
|
function IsWeightCompress_1() : Boolean;inline;
|
|
function IsWeightCompress_2() : Boolean;inline;
|
|
end;
|
|
PUCA_PropItemRec = ^TUCA_PropItemRec;
|
|
TUCA_PropIndexItem = packed record
|
|
CodePoint : Cardinal;
|
|
Position : Integer;
|
|
end;
|
|
PUCA_PropIndexItem = ^TUCA_PropIndexItem;
|
|
TUCA_PropBook = X_PACKED record
|
|
ItemSize : Integer;
|
|
Index : array of TUCA_PropIndexItem;
|
|
Items : PUCA_PropItemRec; //Native Endian
|
|
ItemsOtherEndian : PUCA_PropItemRec;//Non Native Endian
|
|
VariableLowLimit : Word;
|
|
VariableHighLimit : Word;
|
|
end;
|
|
PUCA_PropBook = ^TUCA_PropBook;
|
|
|
|
TBmpFirstTable = array[0..255] of Byte;
|
|
TBmpSecondTableItem = array[0..255] of Word;
|
|
TBmpSecondTable = array of TBmpSecondTableItem;
|
|
|
|
T3lvlBmp1Table = array[0..255] of Byte;
|
|
T3lvlBmp2TableItem = array[0..15] of Word;
|
|
T3lvlBmp2Table = array of T3lvlBmp2TableItem;
|
|
T3lvlBmp3TableItem = array[0..15] of Word;
|
|
T3lvlBmp3Table = array of T3lvlBmp3TableItem;
|
|
|
|
TucaBmpFirstTable = array[0..255] of Byte;
|
|
TucaBmpSecondTableItem = array[0..255] of Cardinal;
|
|
TucaBmpSecondTable = array of TucaBmpSecondTableItem;
|
|
PucaBmpFirstTable = ^TucaBmpFirstTable;
|
|
PucaBmpSecondTable = ^TucaBmpSecondTable;
|
|
|
|
const
|
|
LOW_SURROGATE_BEGIN = Word($DC00);
|
|
LOW_SURROGATE_END = Word($DFFF);
|
|
LOW_SURROGATE_COUNT = LOW_SURROGATE_END - LOW_SURROGATE_BEGIN + 1;
|
|
|
|
HIGH_SURROGATE_BEGIN = Word($D800);
|
|
HIGH_SURROGATE_END = Word($DBFF);
|
|
HIGH_SURROGATE_COUNT = HIGH_SURROGATE_END - HIGH_SURROGATE_BEGIN + 1;
|
|
type
|
|
TOBmpFirstTable = array[0..(HIGH_SURROGATE_COUNT-1)] of Word;
|
|
TOBmpSecondTableItem = array[0..(LOW_SURROGATE_COUNT-1)] of Word;
|
|
TOBmpSecondTable = array of TOBmpSecondTableItem;
|
|
|
|
T3lvlOBmp1Table = array[0..1023] of Byte;
|
|
T3lvlOBmp2TableItem = array[0..31] of Word;
|
|
T3lvlOBmp2Table = array of T3lvlOBmp2TableItem;
|
|
T3lvlOBmp3TableItem = array[0..31] of Word;
|
|
T3lvlOBmp3Table = array of T3lvlOBmp3TableItem;
|
|
|
|
TucaOBmpFirstTable = array[0..(HIGH_SURROGATE_COUNT-1)] of Word;
|
|
TucaOBmpSecondTableItem = array[0..(LOW_SURROGATE_COUNT-1)] of Cardinal;
|
|
TucaOBmpSecondTable = array of TucaOBmpSecondTableItem;
|
|
PucaOBmpFirstTable = ^TucaOBmpFirstTable;
|
|
PucaOBmpSecondTable = ^TucaOBmpSecondTable;
|
|
|
|
type
|
|
TEndianKind = (ekLittle, ekBig);
|
|
const
|
|
ENDIAN_SUFFIX : array[TEndianKind] of string[2] = ('le','be');
|
|
{$IFDEF ENDIAN_LITTLE}
|
|
ENDIAN_NATIVE = ekLittle;
|
|
ENDIAN_NON_NATIVE = ekBig;
|
|
{$ENDIF ENDIAN_LITTLE}
|
|
{$IFDEF ENDIAN_BIG}
|
|
ENDIAN_NATIVE = ekBig;
|
|
ENDIAN_NON_NATIVE = ekLittle;
|
|
{$ENDIF ENDIAN_BIG}
|
|
|
|
procedure GenerateLicenceText(ADest : TStream);
|
|
|
|
function BoolToByte(AValue : Boolean): Byte;inline;
|
|
|
|
function IsHangulSyllable(
|
|
const ACodePoint : TUnicodeCodePoint;
|
|
const AHangulList : TCodePointRecArray
|
|
) : Boolean;
|
|
procedure ParseHangulSyllableTypes(
|
|
ADataAStream : TMemoryStream;
|
|
var ACodePointList : TCodePointRecArray
|
|
);
|
|
|
|
procedure ParseProps(
|
|
ADataAStream : TMemoryStream;
|
|
var APropList : TPropListLineRecArray
|
|
);
|
|
function FindCodePointsByProperty(
|
|
const APropName : string;
|
|
const APropList : TPropListLineRecArray
|
|
) : TCodePointRecArray;
|
|
|
|
procedure ParseBlokcs(
|
|
ADataAStream : TMemoryStream;
|
|
var ABlocks : TBlocks
|
|
);
|
|
procedure ParseUCAFile(
|
|
ADataAStream : TMemoryStream;
|
|
var ABook : TUCA_DataBook
|
|
);
|
|
procedure MakeUCA_Props(
|
|
ABook : PUCA_DataBook;
|
|
out AProps : PUCA_PropBook
|
|
);
|
|
procedure FreeUcaBook(var ABook : PUCA_PropBook);
|
|
procedure MakeUCA_BmpTables(
|
|
var AFirstTable : TucaBmpFirstTable;
|
|
var ASecondTable : TucaBmpSecondTable;
|
|
const APropBook : PUCA_PropBook
|
|
);
|
|
procedure MakeUCA_OBmpTables(
|
|
var AFirstTable : TucaOBmpFirstTable;
|
|
var ASecondTable : TucaOBmpSecondTable;
|
|
const APropBook : PUCA_PropBook
|
|
);
|
|
function GetPropPosition(
|
|
const AHighS,
|
|
ALowS : Word;
|
|
const AFirstTable : PucaOBmpFirstTable;
|
|
const ASecondTable : PucaOBmpSecondTable
|
|
): Integer;inline;overload;
|
|
|
|
procedure GenerateUCA_Head(
|
|
ADest : TStream;
|
|
ABook : PUCA_DataBook;
|
|
AProps : PUCA_PropBook
|
|
);
|
|
procedure GenerateUCA_BmpTables(
|
|
AStream,
|
|
ANativeEndianStream,
|
|
ANonNativeEndianStream : TStream;
|
|
var AFirstTable : TucaBmpFirstTable;
|
|
var ASecondTable : TucaBmpSecondTable
|
|
);
|
|
procedure GenerateBinaryUCA_BmpTables(
|
|
ANativeEndianStream,
|
|
ANonNativeEndianStream : TStream;
|
|
var AFirstTable : TucaBmpFirstTable;
|
|
var ASecondTable : TucaBmpSecondTable
|
|
);
|
|
procedure GenerateUCA_PropTable(
|
|
ADest : TStream;
|
|
const APropBook : PUCA_PropBook;
|
|
const AEndian : TEndianKind
|
|
);
|
|
procedure GenerateBinaryUCA_PropTable(
|
|
// WARNING : files must be generated for each endianess (Little / Big)
|
|
ANativeEndianStream,
|
|
ANonNativeEndianStream : TStream;
|
|
const APropBook : PUCA_PropBook
|
|
);
|
|
procedure GenerateUCA_OBmpTables(
|
|
AStream,
|
|
ANativeEndianStream,
|
|
ANonNativeEndianStream : TStream;
|
|
var AFirstTable : TucaOBmpFirstTable;
|
|
var ASecondTable : TucaOBmpSecondTable
|
|
);
|
|
procedure GenerateBinaryUCA_OBmpTables(
|
|
ANativeEndianStream,
|
|
ANonNativeEndianStream : TStream;
|
|
var AFirstTable : TucaOBmpFirstTable;
|
|
var ASecondTable : TucaOBmpSecondTable
|
|
);
|
|
|
|
procedure Parse_UnicodeData(
|
|
ADataAStream : TMemoryStream;
|
|
var APropList : TPropRecArray;
|
|
var ANumericTable : TNumericValueArray;
|
|
var ADataLineList : TDataLineRecArray;
|
|
var ADecomposition : TDecompositionArray;
|
|
const AHangulList : TCodePointRecArray;
|
|
const AWhiteSpaces : TCodePointRecArray;
|
|
const AUnifiedIdeographs : TCodePointRecArray
|
|
);
|
|
procedure MakeDecomposition(
|
|
const ARawData : TDecompositionArray;
|
|
var ABook : TDecompositionBook
|
|
);
|
|
|
|
procedure MakeBmpTables(
|
|
var AFirstTable : TBmpFirstTable;
|
|
var ASecondTable : TBmpSecondTable;
|
|
const ADataLineList : TDataLineRecArray
|
|
);
|
|
procedure MakeBmpTables3Levels(
|
|
var AFirstTable : T3lvlBmp1Table;
|
|
var ASecondTable : T3lvlBmp2Table;
|
|
var AThirdTable : T3lvlBmp3Table;
|
|
const ADataLineList : TDataLineRecArray
|
|
);
|
|
procedure GenerateBmpTables(
|
|
ADest : TStream;
|
|
var AFirstTable : TBmpFirstTable;
|
|
var ASecondTable : TBmpSecondTable
|
|
);
|
|
procedure Generate3lvlBmpTables(
|
|
ADest : TStream;
|
|
var AFirstTable : T3lvlBmp1Table;
|
|
var ASecondTable : T3lvlBmp2Table;
|
|
var AThirdTable : T3lvlBmp3Table
|
|
);
|
|
procedure GeneratePropTable(
|
|
ADest : TStream;
|
|
const APropList : TPropRecArray;
|
|
const AEndian : TEndianKind
|
|
);
|
|
procedure GenerateNumericTable(
|
|
ADest : TStream;
|
|
const ANumList : TNumericValueArray;
|
|
const ACompleteUnit : Boolean
|
|
);
|
|
procedure GenerateDecompositionBookTable(
|
|
ADest : TStream;
|
|
const ABook : TDecompositionBook;
|
|
const AEndian : TEndianKind
|
|
);
|
|
procedure GenerateOutBmpTable(
|
|
ADest : TStream;
|
|
const AList : TDataLineRecArray
|
|
);
|
|
|
|
function Compress(const AData : TDataLineRecArray) : TDataLineRecArray;
|
|
|
|
function EvaluateFloat(const AStr : string) : Double;
|
|
function StrToCategory(const AStr : string) : TUnicodeCategory;
|
|
function StringToCodePoint(ACP : string) : TUnicodeCodePoint;
|
|
function IsWhiteSpace(
|
|
const ACodePoint : TUnicodeCodePoint;
|
|
const AWhiteSpaces : TCodePointRecArray
|
|
) : Boolean;inline;
|
|
function IsIncluded(
|
|
const ACodePoint : TUnicodeCodePoint;
|
|
const AList : TCodePointRecArray
|
|
) : Boolean;
|
|
|
|
function GetPropID(
|
|
ACodePoint : TUnicodeCodePoint;
|
|
const ADataLineList : TDataLineRecArray
|
|
) : Cardinal;
|
|
|
|
//--------------------
|
|
procedure MakeOBmpTables(
|
|
var AFirstTable : TOBmpFirstTable;
|
|
var ASecondTable : TOBmpSecondTable;
|
|
const ADataLineList : TDataLineRecArray
|
|
);
|
|
procedure MakeOBmpTables3Levels(
|
|
var AFirstTable : T3lvlOBmp1Table;
|
|
var ASecondTable : T3lvlOBmp2Table;
|
|
var AThirdTable : T3lvlOBmp3Table;
|
|
const ADataLineList : TDataLineRecArray
|
|
);
|
|
procedure GenerateOBmpTables(
|
|
ADest : TStream;
|
|
var AFirstTable : TOBmpFirstTable;
|
|
var ASecondTable : TOBmpSecondTable
|
|
);
|
|
procedure Generate3lvlOBmpTables(
|
|
ADest : TStream;
|
|
var AFirstTable : T3lvlOBmp1Table;
|
|
var ASecondTable : T3lvlOBmp2Table;
|
|
var AThirdTable : T3lvlOBmp3Table
|
|
);
|
|
function GetProp(
|
|
const AHighS,
|
|
ALowS : Word;
|
|
const AProps : TPropRecArray;
|
|
var AFirstTable : TOBmpFirstTable;
|
|
var ASecondTable : TOBmpSecondTable
|
|
): PPropRec; inline;overload;
|
|
function GetProp(
|
|
const AHighS,
|
|
ALowS : Word;
|
|
const AProps : TPropRecArray;
|
|
var AFirstTable : T3lvlOBmp1Table;
|
|
var ASecondTable : T3lvlOBmp2Table;
|
|
var AThirdTable : T3lvlOBmp3Table
|
|
): PPropRec; inline;overload;
|
|
procedure FromUCS4(const AValue : TUnicodeCodePoint; var AHighS, ALowS : Word);inline;
|
|
function ToUCS4(const AHighS, ALowS : Word) : TUnicodeCodePoint; inline;
|
|
|
|
type
|
|
TBitOrder = 0..7;
|
|
|
|
function IsBitON(const AData : Byte; const ABit : TBitOrder) : Boolean ;{$IFDEF USE_INLINE}inline;{$ENDIF}
|
|
procedure SetBit(var AData : Byte; const ABit : TBitOrder; const AValue : Boolean);{$IFDEF USE_INLINE}inline;{$ENDIF}
|
|
|
|
function GenerateEndianIncludeFileName(
|
|
const AStoreName : string;
|
|
const AEndian : TEndianKind
|
|
): string;inline;
|
|
|
|
procedure ReverseFromNativeEndian(
|
|
const AData : PUCA_PropItemRec;
|
|
const ADataLen : Cardinal;
|
|
const ADest : PUCA_PropItemRec
|
|
);
|
|
procedure ReverseToNativeEndian(
|
|
const AData : PUCA_PropItemRec;
|
|
const ADataLen : Cardinal;
|
|
const ADest : PUCA_PropItemRec
|
|
);
|
|
procedure CompareProps(
|
|
const AProp1,
|
|
AProp2 : PUCA_PropItemRec;
|
|
const ADataLen : Integer
|
|
);
|
|
|
|
type
|
|
TCollationName = array[0..(128-1)] of Byte;
|
|
TCollationVersion = TCollationName;
|
|
TSerializedCollationHeader = packed record
|
|
Base : TCollationName;
|
|
Version : TCollationVersion;
|
|
CollationName : TCollationName;
|
|
CollationAliases : TCollationName; // ";" separated
|
|
VariableWeight : Byte;
|
|
Backwards : Byte;
|
|
BMP_Table1Length : DWord;
|
|
BMP_Table2Length : DWord;
|
|
OBMP_Table1Length : DWord;
|
|
OBMP_Table2Length : DWord;
|
|
PropCount : DWord;
|
|
VariableLowLimit : Word;
|
|
VariableHighLimit : Word;
|
|
NoNormalization : Byte;
|
|
Strength : Byte;
|
|
ChangedFields : Byte;
|
|
end;
|
|
PSerializedCollationHeader = ^TSerializedCollationHeader;
|
|
|
|
procedure StringToByteArray(AStr : UnicodeString; var ABuffer : array of Byte);overload;
|
|
procedure StringToByteArray(AStr : UnicodeString; ABuffer : PByte; const ABufferLength : Integer);overload;
|
|
|
|
procedure ReverseRecordBytes(var AItem : TSerializedCollationHeader);
|
|
procedure ReverseBytes(var AData; const ALength : Integer);
|
|
procedure ReverseArray(var AValue; const AArrayLength, AItemSize : PtrInt);
|
|
|
|
function CalcMaxLevel2Value(ALines : array of TUCA_LineRec) : Cardinal;
|
|
procedure RewriteLevel2Values(ALines : PUCA_LineRec; ALength : Integer);
|
|
function RewriteLevel2(
|
|
const ALevel1Value : Cardinal;
|
|
ALines : PUCA_LineRec;
|
|
const ALinesLength : Integer
|
|
) : Integer;
|
|
|
|
resourcestring
|
|
SInsufficientMemoryBuffer = 'Insufficient Memory Buffer';
|
|
|
|
implementation
|
|
uses
|
|
typinfo, Math, AVL_Tree,
|
|
trie;
|
|
|
|
|
|
type
|
|
|
|
TCardinalRec = packed record
|
|
{$ifdef FPC_LITTLE_ENDIAN}
|
|
byte0, byte1, byte2, byte3 : Byte;
|
|
{$else FPC_LITTLE_ENDIAN}
|
|
byte3, byte2, byte1, byte0 : Byte;
|
|
{$endif FPC_LITTLE_ENDIAN}
|
|
end;
|
|
|
|
TWordRec = packed record
|
|
{$ifdef FPC_LITTLE_ENDIAN}
|
|
byte0, byte1 : Byte;
|
|
{$else FPC_LITTLE_ENDIAN}
|
|
byte1, byte0 : Byte;
|
|
{$endif FPC_LITTLE_ENDIAN}
|
|
end;
|
|
|
|
{ TUInt24Rec }
|
|
|
|
class operator TUInt24Rec.Explicit(a : TUInt24Rec) : Cardinal;
|
|
begin
|
|
TCardinalRec(Result).byte0 := a.byte0;
|
|
TCardinalRec(Result).byte1 := a.byte1;
|
|
TCardinalRec(Result).byte2 := a.byte2;
|
|
TCardinalRec(Result).byte3 := 0;
|
|
end;
|
|
|
|
class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Cardinal;
|
|
begin
|
|
TCardinalRec(Result).byte0 := a.byte0;
|
|
TCardinalRec(Result).byte1 := a.byte1;
|
|
TCardinalRec(Result).byte2 := a.byte2;
|
|
TCardinalRec(Result).byte3 := 0;
|
|
end;
|
|
|
|
class operator TUInt24Rec.Implicit(a : TUInt24Rec) : LongInt;
|
|
begin
|
|
Result := Cardinal(a);
|
|
end;
|
|
|
|
class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Word;
|
|
begin
|
|
{$IFOPT R+}
|
|
if (a.byte2 > 0) then
|
|
Error(reIntOverflow);
|
|
{$ENDIF R+}
|
|
TWordRec(Result).byte0 := a.byte0;
|
|
TWordRec(Result).byte1 := a.byte1;
|
|
end;
|
|
|
|
class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Byte;
|
|
begin
|
|
{$IFOPT R+}
|
|
if (a.byte1 > 0) or (a.byte2 > 0) then
|
|
Error(reIntOverflow);
|
|
{$ENDIF R+}
|
|
Result := a.byte0;
|
|
end;
|
|
|
|
class operator TUInt24Rec.Implicit(a : Cardinal) : TUInt24Rec;
|
|
begin
|
|
{$IFOPT R+}
|
|
if (a > $FFFFFF) then
|
|
Error(reIntOverflow);
|
|
{$ENDIF R+}
|
|
Result.byte0 := TCardinalRec(a).byte0;
|
|
Result.byte1 := TCardinalRec(a).byte1;
|
|
Result.byte2 := TCardinalRec(a).byte2;
|
|
end;
|
|
|
|
class operator TUInt24Rec.Equal(a, b : TUInt24Rec) : Boolean;
|
|
begin
|
|
Result := (a.byte0 = b.byte0) and (a.byte1 = b.byte1) and (a.byte2 = b.byte2);
|
|
end;
|
|
|
|
class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Cardinal) : Boolean;
|
|
begin
|
|
Result := (TCardinalRec(b).byte3 = 0) and
|
|
(a.byte0 = TCardinalRec(b).byte0) and
|
|
(a.byte1 = TCardinalRec(b).byte1) and
|
|
(a.byte2 = TCardinalRec(b).byte2);
|
|
end;
|
|
|
|
class operator TUInt24Rec.Equal(a : Cardinal; b : TUInt24Rec) : Boolean;
|
|
begin
|
|
Result := (b = a);
|
|
end;
|
|
|
|
class operator TUInt24Rec.Equal(a : TUInt24Rec; b : LongInt) : Boolean;
|
|
begin
|
|
Result := (LongInt(a) = b);
|
|
end;
|
|
|
|
class operator TUInt24Rec.Equal(a : LongInt; b : TUInt24Rec) : Boolean;
|
|
begin
|
|
Result := (b = a);
|
|
end;
|
|
|
|
class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Word) : Boolean;
|
|
begin
|
|
Result := (a.byte2 = 0) and
|
|
(a.byte0 = TWordRec(b).byte0) and
|
|
(a.byte1 = TWordRec(b).byte1);
|
|
end;
|
|
|
|
class operator TUInt24Rec.Equal(a : Word; b : TUInt24Rec) : Boolean;
|
|
begin
|
|
Result := (b = a);
|
|
end;
|
|
|
|
class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Byte) : Boolean;
|
|
begin
|
|
Result := (a.byte2 = 0) and
|
|
(a.byte1 = 0) and
|
|
(a.byte0 = b);
|
|
end;
|
|
|
|
class operator TUInt24Rec.Equal(a : Byte; b : TUInt24Rec) : Boolean;
|
|
begin
|
|
Result := (b = a);
|
|
end;
|
|
|
|
class operator TUInt24Rec.NotEqual(a, b : TUInt24Rec) : Boolean;
|
|
begin
|
|
Result := (a.byte0 <> b.byte0) or (a.byte1 <> b.byte1) or (a.byte2 <> b.byte2);
|
|
end;
|
|
|
|
class operator TUInt24Rec.NotEqual(a : TUInt24Rec; b : Cardinal) : Boolean;
|
|
begin
|
|
Result := (TCardinalRec(b).byte3 <> 0) or
|
|
(a.byte0 <> TCardinalRec(b).byte0) or
|
|
(a.byte1 <> TCardinalRec(b).byte1) or
|
|
(a.byte2 <> TCardinalRec(b).byte2);
|
|
end;
|
|
|
|
class operator TUInt24Rec.NotEqual(a : Cardinal; b : TUInt24Rec) : Boolean;
|
|
begin
|
|
Result := (b <> a);
|
|
end;
|
|
|
|
class operator TUInt24Rec.GreaterThan(a, b: TUInt24Rec): Boolean;
|
|
begin
|
|
Result := (a.byte2 > b.byte2) or
|
|
((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
|
|
((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 > b.byte0));
|
|
end;
|
|
|
|
class operator TUInt24Rec.GreaterThan(a: TUInt24Rec; b: Cardinal): Boolean;
|
|
begin
|
|
Result := Cardinal(a) > b;
|
|
end;
|
|
|
|
class operator TUInt24Rec.GreaterThan(a: Cardinal; b: TUInt24Rec): Boolean;
|
|
begin
|
|
Result := a > Cardinal(b);
|
|
end;
|
|
|
|
class operator TUInt24Rec.GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;
|
|
begin
|
|
Result := (a.byte2 > b.byte2) or
|
|
((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
|
|
((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 >= b.byte0));
|
|
end;
|
|
|
|
class operator TUInt24Rec.GreaterThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
|
|
begin
|
|
Result := Cardinal(a) >= b;
|
|
end;
|
|
|
|
class operator TUInt24Rec.GreaterThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
|
|
begin
|
|
Result := a >= Cardinal(b);
|
|
end;
|
|
|
|
class operator TUInt24Rec.LessThan(a, b: TUInt24Rec): Boolean;
|
|
begin
|
|
Result := (b > a);
|
|
end;
|
|
|
|
class operator TUInt24Rec.LessThan(a: TUInt24Rec; b: Cardinal): Boolean;
|
|
begin
|
|
Result := Cardinal(a) < b;
|
|
end;
|
|
|
|
class operator TUInt24Rec.LessThan(a: Cardinal; b: TUInt24Rec): Boolean;
|
|
begin
|
|
Result := a < Cardinal(b);
|
|
end;
|
|
|
|
class operator TUInt24Rec.LessThanOrEqual(a, b: TUInt24Rec): Boolean;
|
|
begin
|
|
Result := (b >= a);
|
|
end;
|
|
|
|
class operator TUInt24Rec.LessThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
|
|
begin
|
|
Result := Cardinal(a) <= b;
|
|
end;
|
|
|
|
class operator TUInt24Rec.LessThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
|
|
begin
|
|
Result := a <= Cardinal(b);
|
|
end;
|
|
|
|
{ TUCA_WeightRec }
|
|
|
|
class operator TUCA_WeightRec.Equal(a, b : TUCA_WeightRec) : Boolean;
|
|
begin
|
|
Result := (a.Weights[0] = b.Weights[0]) and (a.Weights[1] = b.Weights[1]) and
|
|
(a.Weights[2] = b.Weights[2]) and (a.Weights[3] = b.Weights[3]) and
|
|
(a.Variable = b.Variable);
|
|
end;
|
|
|
|
procedure StringToByteArray(AStr : UnicodeString; var ABuffer : array of Byte);
|
|
begin
|
|
StringToByteArray(AStr,@(ABuffer[Low(ABuffer)]),Length(ABuffer));
|
|
end;
|
|
|
|
procedure StringToByteArray(AStr : UnicodeString; ABuffer : PByte; const ABufferLength : Integer);
|
|
var
|
|
c, i, bl : Integer;
|
|
ps : PWord;
|
|
pb : PByte;
|
|
begin
|
|
if (ABufferLength < 1) then
|
|
exit;
|
|
c := Length(AStr);
|
|
if (c > ABufferLength) then
|
|
c := ABufferLength;
|
|
bl := 0;
|
|
pb := ABuffer;
|
|
if (c > 0) then begin
|
|
ps := PWord(@AStr[1]);
|
|
for i := 1 to c do begin
|
|
if (ps^ <= High(Byte)) then begin
|
|
pb^ := ps^;
|
|
bl := bl+1;
|
|
Inc(pb);
|
|
end;
|
|
Inc(ps);
|
|
end;
|
|
end;
|
|
if (bl < ABufferLength) then begin
|
|
for i := bl+1 to ABufferLength do begin
|
|
pb^:= 0;
|
|
Inc(pb);
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
function GenerateEndianIncludeFileName(
|
|
const AStoreName : string;
|
|
const AEndian : TEndianKind
|
|
): string;inline;
|
|
begin
|
|
Result := ExtractFilePath(AStoreName) +
|
|
ChangeFileExt(ExtractFileName(AStoreName),Format('_%s.inc',[ENDIAN_SUFFIX[AEndian]]));
|
|
end;
|
|
|
|
function IsBitON(const AData : Byte; const ABit : TBitOrder) : Boolean ;
|
|
begin
|
|
Result := ( ( AData and ( 1 shl ABit ) ) <> 0 );
|
|
end;
|
|
|
|
procedure SetBit(var AData : Byte; const ABit : TBitOrder; const AValue : Boolean);
|
|
begin
|
|
if AValue then
|
|
AData := AData or (1 shl (ABit mod 8))
|
|
else
|
|
AData := AData and ( not ( 1 shl ( ABit mod 8 ) ) );
|
|
end;
|
|
|
|
var
|
|
FS : TFormatSettings;
|
|
|
|
function EvaluateFloat(const AStr : string) : Double;
|
|
var
|
|
s, n, d : string;
|
|
i : Integer;
|
|
begin
|
|
Result := 0;
|
|
s := Trim(AStr);
|
|
if (Length(s) > 0) then begin
|
|
i := Pos('/',s);
|
|
if (i < 1) then
|
|
Result := StrToFloat(s,FS)
|
|
else begin
|
|
n := Copy(s,1,i-1);
|
|
d := Copy(s,i+1,MaxInt);
|
|
Result := StrToInt(n) / StrToInt(d);
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
function StrToCategory(const AStr : string) : TUnicodeCategory;
|
|
var
|
|
s : string;
|
|
begin
|
|
s := UpperCase(Trim(AStr));
|
|
if (s = 'LU') then
|
|
Result := ucUppercaseLetter
|
|
else if (s = 'LL') then
|
|
Result := ucLowercaseLetter
|
|
else if (s = 'LT') then
|
|
Result := ucTitlecaseLetter
|
|
else if (s = 'LM') then
|
|
Result := ucModifierLetter
|
|
else if (s = 'LO') then
|
|
Result := ucOtherLetter
|
|
else
|
|
|
|
if (s = 'MN') then
|
|
Result := ucNonSpacingMark
|
|
else if (s = 'MC') then
|
|
Result := ucCombiningMark
|
|
else if (s = 'ME') then
|
|
Result := ucEnclosingMark
|
|
else
|
|
|
|
if (s = 'ND') then
|
|
Result := ucDecimalNumber
|
|
else if (s = 'NL') then
|
|
Result := ucLetterNumber
|
|
else if (s = 'NO') then
|
|
Result := ucOtherNumber
|
|
else
|
|
|
|
if (s = 'PC') then
|
|
Result := ucConnectPunctuation
|
|
else if (s = 'PD') then
|
|
Result := ucDashPunctuation
|
|
else if (s = 'PS') then
|
|
Result := ucOpenPunctuation
|
|
else if (s = 'PE') then
|
|
Result := ucClosePunctuation
|
|
else if (s = 'PI') then
|
|
Result := ucInitialPunctuation
|
|
else if (s = 'PF') then
|
|
Result := ucFinalPunctuation
|
|
else if (s = 'PO') then
|
|
Result := ucOtherPunctuation
|
|
else
|
|
|
|
if (s = 'SM') then
|
|
Result := ucMathSymbol
|
|
else if (s = 'SC') then
|
|
Result := ucCurrencySymbol
|
|
else if (s = 'SK') then
|
|
Result := ucModifierSymbol
|
|
else if (s = 'SO') then
|
|
Result := ucOtherSymbol
|
|
else
|
|
|
|
if (s = 'ZS') then
|
|
Result := ucSpaceSeparator
|
|
else if (s = 'ZL') then
|
|
Result := ucLineSeparator
|
|
else if (s = 'ZP') then
|
|
Result := ucParagraphSeparator
|
|
else
|
|
|
|
if (s = 'CC') then
|
|
Result := ucControl
|
|
else if (s = 'CF') then
|
|
Result := ucFormat
|
|
else if (s = 'CS') then
|
|
Result := ucSurrogate
|
|
else if (s = 'CO') then
|
|
Result := ucPrivateUse
|
|
else
|
|
Result := ucUnassigned;
|
|
end;
|
|
|
|
function StringToCodePoint(ACP : string) : TUnicodeCodePoint;
|
|
var
|
|
s : string;
|
|
begin
|
|
s := Trim(ACP);
|
|
Result := 0;
|
|
if (Length(s) > 0) and (s <> '#') then
|
|
Result := StrToInt('$' + s);
|
|
end;
|
|
|
|
function IsIncluded(
|
|
const ACodePoint : TUnicodeCodePoint;
|
|
const AList : TCodePointRecArray
|
|
) : Boolean;
|
|
var
|
|
i : Integer;
|
|
p : ^TCodePointRec;
|
|
begin
|
|
Result := False;
|
|
p := @AList[Low(AList)];
|
|
for i := Low(AList) to High(AList) do begin
|
|
if (p^.LineType = 0) then begin
|
|
if (p^.CodePoint = ACodePoint) then begin
|
|
Result := True;
|
|
break;
|
|
end;
|
|
end else begin
|
|
if (ACodePoint >= p^.StartCodePoint) and (ACodePoint <= p^.EndCodePoint) then begin
|
|
Result := True;
|
|
break;
|
|
end;
|
|
end;
|
|
Inc(p);
|
|
end;
|
|
end;
|
|
|
|
{function IsWhiteSpace(const ACodePoint : TUnicodeCodePoint) : Boolean;
|
|
begin
|
|
case ACodePoint of
|
|
$0009..$000D : Result := True;// White_Space # Cc [5] <control-0009>..<control-000D>
|
|
$0020 : Result := True;// White_Space # Zs SPACE
|
|
$0085 : Result := True;// White_Space # Cc <control-0085>
|
|
$00A0 : Result := True;// White_Space # Zs NO-BREAK SPACE
|
|
$1680 : Result := True;// White_Space # Zs OGHAM SPACE MARK
|
|
$180E : Result := True;// White_Space # Zs MONGOLIAN VOWEL SEPARATOR
|
|
$2000..$200A : Result := True;// White_Space # Zs [11] EN QUAD..HAIR SPACE
|
|
$2028 : Result := True;// White_Space # Zl LINE SEPARATOR
|
|
$2029 : Result := True;// White_Space # Zp PARAGRAPH SEPARATOR
|
|
$202F : Result := True;// White_Space # Zs NARROW NO-BREAK SPACE
|
|
$205F : Result := True;// White_Space # Zs MEDIUM MATHEMATICAL SPACE
|
|
$3000 : Result := True;// White_Space # Zs IDEOGRAPHIC SPACE
|
|
else
|
|
Result := False;
|
|
end;
|
|
end;}
|
|
function IsWhiteSpace(
|
|
const ACodePoint : TUnicodeCodePoint;
|
|
const AWhiteSpaces : TCodePointRecArray
|
|
) : Boolean;
|
|
begin
|
|
Result := IsIncluded(ACodePoint,AWhiteSpaces);
|
|
end;
|
|
|
|
function NormalizeBlockName(const AName : string) : string;
|
|
var
|
|
i, c, k : Integer;
|
|
s : string;
|
|
begin
|
|
c := Length(AName);
|
|
SetLength(Result,c);
|
|
s := LowerCase(AName);
|
|
k := 0;
|
|
for i := 1 to c do begin
|
|
if (s[1] in ['a'..'z','0'..'9','-']) then begin
|
|
k := k + 1;
|
|
Result[k] := s[i];
|
|
end;
|
|
end;
|
|
SetLength(Result,k);
|
|
end;
|
|
|
|
procedure ParseBlokcs(
|
|
ADataAStream : TMemoryStream;
|
|
var ABlocks : TBlocks
|
|
);
|
|
const
|
|
LINE_LENGTH = 1024;
|
|
DATA_LENGTH = 25000;
|
|
var
|
|
p : PAnsiChar;
|
|
actualDataLen : Integer;
|
|
bufferLength, bufferPos, lineLength, linePos : Integer;
|
|
line : ansistring;
|
|
|
|
function NextLine() : Boolean;
|
|
var
|
|
locOldPos : Integer;
|
|
locOldPointer : PAnsiChar;
|
|
begin
|
|
Result := False;
|
|
locOldPointer := p;
|
|
locOldPos := bufferPos;
|
|
while (bufferPos < bufferLength) and (p^ <> #10) do begin
|
|
Inc(p);
|
|
Inc(bufferPos);
|
|
end;
|
|
if (locOldPos = bufferPos) and (p^ = #10) then begin
|
|
lineLength := 0;
|
|
Inc(p);
|
|
Inc(bufferPos);
|
|
linePos := 1;
|
|
Result := True;
|
|
end else if (locOldPos < bufferPos) then begin
|
|
lineLength := (bufferPos - locOldPos);
|
|
Move(locOldPointer^,line[1],lineLength);
|
|
if (p^ = #10) then begin
|
|
Dec(lineLength);
|
|
Inc(p);
|
|
Inc(bufferPos);
|
|
end;
|
|
linePos := 1;
|
|
Result := True;
|
|
end;
|
|
end;
|
|
|
|
function NextToken() : ansistring;
|
|
var
|
|
k : Integer;
|
|
begin
|
|
k := linePos;
|
|
if (linePos < lineLength) and (line[linePos] in [';','#','.']) then begin
|
|
Inc(linePos);
|
|
Result := Copy(line,k,(linePos-k));
|
|
exit;
|
|
end;
|
|
while (linePos < lineLength) and not(line[linePos] in [';','#','.']) do
|
|
Inc(linePos);
|
|
if (linePos > k) then begin
|
|
if (line[linePos] in [';','#','.']) then
|
|
Result := Copy(line,k,(linePos-k))
|
|
else
|
|
Result := Copy(line,k,(linePos-k+1));
|
|
Result := Trim(Result);
|
|
end else begin
|
|
Result := '';
|
|
end;
|
|
end;
|
|
|
|
procedure ParseLine();
|
|
var
|
|
locData : TBlockItemRec;
|
|
s : ansistring;
|
|
begin
|
|
s := NextToken();
|
|
if (s = '') or (s[1] = '#') then
|
|
exit;
|
|
locData.RangeStart := StrToInt('$'+s);
|
|
s := NextToken();
|
|
if (s <> '.') then
|
|
raise Exception.CreateFmt('"." expected but "%s" found.',[s]);
|
|
s := NextToken();
|
|
if (s <> '.') then
|
|
raise Exception.CreateFmt('"." expected but "%s" found.',[s]);
|
|
s := NextToken();
|
|
locData.RangeEnd := StrToInt('$'+s);
|
|
s := NextToken();
|
|
if (s <> ';') then
|
|
raise Exception.CreateFmt('";" expected but "%s" found.',[s]);
|
|
locData.Name := Trim(NextToken());
|
|
locData.CanonicalName := NormalizeBlockName(locData.Name);
|
|
if (Length(ABlocks) <= actualDataLen) then
|
|
SetLength(ABlocks,Length(ABlocks)*2);
|
|
ABlocks[actualDataLen] := locData;
|
|
Inc(actualDataLen);
|
|
end;
|
|
|
|
procedure Prepare();
|
|
begin
|
|
SetLength(ABlocks,DATA_LENGTH);
|
|
actualDataLen := 0;
|
|
bufferLength := ADataAStream.Size;
|
|
bufferPos := 0;
|
|
p := ADataAStream.Memory;
|
|
lineLength := 0;
|
|
SetLength(line,LINE_LENGTH);
|
|
end;
|
|
|
|
begin
|
|
Prepare();
|
|
while NextLine() do
|
|
ParseLine();
|
|
SetLength(ABlocks,actualDataLen);
|
|
end;
|
|
|
|
procedure ParseProps(
|
|
ADataAStream : TMemoryStream;
|
|
var APropList : TPropListLineRecArray
|
|
);
|
|
const
|
|
LINE_LENGTH = 1024;
|
|
DATA_LENGTH = 25000;
|
|
var
|
|
p : PAnsiChar;
|
|
actualDataLen : Integer;
|
|
bufferLength, bufferPos, lineLength, linePos : Integer;
|
|
line : ansistring;
|
|
|
|
function NextLine() : Boolean;
|
|
var
|
|
locOldPos : Integer;
|
|
locOldPointer : PAnsiChar;
|
|
begin
|
|
Result := False;
|
|
locOldPointer := p;
|
|
locOldPos := bufferPos;
|
|
while (bufferPos < bufferLength) and (p^ <> #10) do begin
|
|
Inc(p);
|
|
Inc(bufferPos);
|
|
end;
|
|
if (locOldPos = bufferPos) and (p^ = #10) then begin
|
|
lineLength := 0;
|
|
Inc(p);
|
|
Inc(bufferPos);
|
|
linePos := 1;
|
|
Result := True;
|
|
end else if (locOldPos < bufferPos) then begin
|
|
lineLength := (bufferPos - locOldPos);
|
|
Move(locOldPointer^,line[1],lineLength);
|
|
if (p^ = #10) then begin
|
|
Dec(lineLength);
|
|
Inc(p);
|
|
Inc(bufferPos);
|
|
end;
|
|
linePos := 1;
|
|
Result := True;
|
|
end;
|
|
end;
|
|
|
|
function NextToken() : ansistring;
|
|
var
|
|
k : Integer;
|
|
begin
|
|
k := linePos;
|
|
if (linePos < lineLength) and (line[linePos] in [';','#','.']) then begin
|
|
Inc(linePos);
|
|
Result := Copy(line,k,(linePos-k));
|
|
exit;
|
|
end;
|
|
while (linePos < lineLength) and not(line[linePos] in [';','#','.']) do
|
|
Inc(linePos);
|
|
if (linePos > k) then begin
|
|
if (line[linePos] in [';','#','.']) then
|
|
Result := Copy(line,k,(linePos-k))
|
|
else
|
|
Result := Copy(line,k,(linePos-k+1));
|
|
Result := Trim(Result);
|
|
end else begin
|
|
Result := '';
|
|
end;
|
|
end;
|
|
|
|
procedure ParseLine();
|
|
var
|
|
locCP : Cardinal;
|
|
locData : TPropListLineRec;
|
|
s : ansistring;
|
|
begin
|
|
s := NextToken();
|
|
if (s = '') or (s[1] = '#') then
|
|
exit;
|
|
locCP := StrToInt('$'+s);
|
|
s := NextToken();
|
|
if (s = ';') then begin
|
|
locData.CodePoint.LineType := 0;
|
|
locData.CodePoint.CodePoint := locCP;
|
|
end else begin
|
|
if (s = '') or (s <> '.') or (NextToken() <> '.') then
|
|
raise Exception.CreateFmt('Invalid line : "%s".',[Copy(line,1,lineLength)]);
|
|
locData.CodePoint.LineType := 1;
|
|
locData.CodePoint.StartCodePoint := locCP;
|
|
locData.CodePoint.EndCodePoint := StrToInt('$'+NextToken());
|
|
s := NextToken();
|
|
if (s <> ';') then
|
|
raise Exception.CreateFmt('"." expected but "%s" found.',[s]);
|
|
end;
|
|
locData.PropName := Trim(NextToken());
|
|
if (Length(APropList) <= actualDataLen) then
|
|
SetLength(APropList,Length(APropList)*2);
|
|
APropList[actualDataLen] := locData;
|
|
Inc(actualDataLen);
|
|
end;
|
|
|
|
procedure Prepare();
|
|
begin
|
|
SetLength(APropList,DATA_LENGTH);
|
|
actualDataLen := 0;
|
|
bufferLength := ADataAStream.Size;
|
|
bufferPos := 0;
|
|
p := ADataAStream.Memory;
|
|
lineLength := 0;
|
|
SetLength(line,LINE_LENGTH);
|
|
end;
|
|
|
|
begin
|
|
Prepare();
|
|
while NextLine() do
|
|
ParseLine();
|
|
SetLength(APropList,actualDataLen);
|
|
end;
|
|
|
|
function FindCodePointsByProperty(
|
|
const APropName : string;
|
|
const APropList : TPropListLineRecArray
|
|
) : TCodePointRecArray;
|
|
var
|
|
r : TCodePointRecArray;
|
|
i, k : Integer;
|
|
s : string;
|
|
begin
|
|
k := 0;
|
|
r := nil;
|
|
s := LowerCase(Trim(APropName));
|
|
for i := Low(APropList) to High(APropList) do begin
|
|
if (LowerCase(APropList[i].PropName) = s) then begin
|
|
if (k >= Length(r)) then begin
|
|
if (k = 0) then
|
|
SetLength(r,24)
|
|
else
|
|
SetLength(r,(2*Length(r)));
|
|
end;
|
|
r[k] := APropList[i].CodePoint;
|
|
Inc(k);
|
|
end;
|
|
end;
|
|
SetLength(r,k);
|
|
Result := r;
|
|
end;
|
|
|
|
procedure ParseHangulSyllableTypes(
|
|
ADataAStream : TMemoryStream;
|
|
var ACodePointList : TCodePointRecArray
|
|
);
|
|
const
|
|
LINE_LENGTH = 1024;
|
|
DATA_LENGTH = 25000;
|
|
var
|
|
p : PAnsiChar;
|
|
actualDataLen : Integer;
|
|
bufferLength, bufferPos, lineLength, linePos : Integer;
|
|
line : ansistring;
|
|
|
|
function NextLine() : Boolean;
|
|
var
|
|
locOldPos : Integer;
|
|
locOldPointer : PAnsiChar;
|
|
begin
|
|
Result := False;
|
|
locOldPointer := p;
|
|
locOldPos := bufferPos;
|
|
while (bufferPos < bufferLength) and (p^ <> #10) do begin
|
|
Inc(p);
|
|
Inc(bufferPos);
|
|
end;
|
|
if (locOldPos = bufferPos) and (p^ = #10) then begin
|
|
lineLength := 0;
|
|
Inc(p);
|
|
Inc(bufferPos);
|
|
linePos := 1;
|
|
Result := True;
|
|
end else if (locOldPos < bufferPos) then begin
|
|
lineLength := (bufferPos - locOldPos);
|
|
Move(locOldPointer^,line[1],lineLength);
|
|
if (p^ = #10) then begin
|
|
Dec(lineLength);
|
|
Inc(p);
|
|
Inc(bufferPos);
|
|
end;
|
|
linePos := 1;
|
|
Result := True;
|
|
end;
|
|
end;
|
|
|
|
function NextToken() : ansistring;
|
|
var
|
|
k : Integer;
|
|
begin
|
|
k := linePos;
|
|
if (linePos < lineLength) and (line[linePos] = '.') then begin
|
|
Inc(linePos);
|
|
while (linePos < lineLength) and (line[linePos] = '.') do begin
|
|
Inc(linePos);
|
|
end;
|
|
Result := Copy(line,k,(linePos-k));
|
|
exit;
|
|
end;
|
|
while (linePos < lineLength) and not(line[linePos] in [';','#','.']) do
|
|
Inc(linePos);
|
|
if (linePos > k) then begin
|
|
if (line[linePos] in [';','#','.']) then
|
|
Result := Copy(line,k,(linePos-k))
|
|
else
|
|
Result := Copy(line,k,(linePos-k+1));
|
|
Result := Trim(Result);
|
|
end else begin
|
|
Result := '';
|
|
end;
|
|
//Inc(linePos);
|
|
end;
|
|
|
|
procedure ParseLine();
|
|
var
|
|
locData : TCodePointRec;
|
|
s : ansistring;
|
|
begin
|
|
s := NextToken();
|
|
if (s = '') or (s[1] = '#') then
|
|
exit;
|
|
locData.CodePoint := StrToInt('$'+s);
|
|
s := NextToken();
|
|
if (s = '') or (s[1] in [';','#']) then begin
|
|
locData.LineType := 0;
|
|
end else begin
|
|
if (s <> '..') then
|
|
raise Exception.CreateFmt('Unknown line type : "%s"',[Copy(line,1,lineLength)]);
|
|
locData.StartCodePoint := locData.CodePoint;
|
|
locData.EndCodePoint := StrToInt('$'+NextToken());
|
|
locData.LineType := 1;
|
|
end;
|
|
if (Length(ACodePointList) <= actualDataLen) then
|
|
SetLength(ACodePointList,Length(ACodePointList)*2);
|
|
ACodePointList[actualDataLen] := locData;
|
|
Inc(actualDataLen);
|
|
end;
|
|
|
|
procedure Prepare();
|
|
begin
|
|
SetLength(ACodePointList,DATA_LENGTH);
|
|
actualDataLen := 0;
|
|
bufferLength := ADataAStream.Size;
|
|
bufferPos := 0;
|
|
p := ADataAStream.Memory;
|
|
lineLength := 0;
|
|
SetLength(line,LINE_LENGTH);
|
|
end;
|
|
|
|
begin
|
|
Prepare();
|
|
while NextLine() do
|
|
ParseLine();
|
|
SetLength(ACodePointList,actualDataLen);
|
|
end;
|
|
|
|
function IsHangulSyllable(
|
|
const ACodePoint : TUnicodeCodePoint;
|
|
const AHangulList : TCodePointRecArray
|
|
) : Boolean;
|
|
var
|
|
i : Integer;
|
|
p : ^TCodePointRec;
|
|
begin
|
|
Result := False;
|
|
p := @AHangulList[Low(AHangulList)];
|
|
for i := Low(AHangulList) to High(AHangulList) do begin
|
|
if ( (p^.LineType = 0) and (ACodePoint = p^.CodePoint) ) or
|
|
( (p^.LineType = 1) and (ACodePoint >= p^.StartCodePoint) and (ACodePoint <= p^.EndCodePoint) )
|
|
then begin
|
|
Result := True;
|
|
Break;
|
|
end;
|
|
Inc(p);
|
|
end;
|
|
end;
|
|
|
|
function IndexOf(
|
|
const AProp : TPropRec;
|
|
const APropList : TPropRecArray;
|
|
const AActualLen : Integer
|
|
) : Integer;overload;
|
|
var
|
|
i : Integer;
|
|
p : PPropRec;
|
|
begin
|
|
Result := -1;
|
|
if (AActualLen > 0) then begin
|
|
p := @APropList[0];
|
|
for i := 0 to AActualLen - 1 do begin
|
|
if (AProp.Category = p^.Category) and
|
|
(AProp.CCC = p^.CCC) and
|
|
(AProp.NumericIndex = p^.NumericIndex) and
|
|
(AProp.SimpleUpperCase = p^.SimpleUpperCase) and
|
|
(AProp.SimpleLowerCase = p^.SimpleLowerCase) and
|
|
(AProp.WhiteSpace = p^.WhiteSpace) and
|
|
(AProp.UnifiedIdeograph = p^.UnifiedIdeograph) and
|
|
//
|
|
(AProp.DecompositionID = p^.DecompositionID) and
|
|
(* ( (AProp.DecompositionID = -1 ) and (p^.DecompositionID = -1) ) or
|
|
( (AProp.DecompositionID <> -1 ) and (p^.DecompositionID <> -1) )
|
|
*)
|
|
(AProp.HangulSyllable = p^.HangulSyllable)
|
|
then begin
|
|
Result := i;
|
|
Break;
|
|
end;
|
|
Inc(p);
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
function IndexOf(
|
|
const AItem : TUnicodeCodePointArray;
|
|
const AList : TDecompositionArray
|
|
) : Integer;overload;
|
|
var
|
|
p : TUnicodeCodePointArray;
|
|
i : Integer;
|
|
begin
|
|
Result := -1;
|
|
if (Length(AList) = 0) then
|
|
exit;
|
|
for i := Low(AList) to High(AList) do begin
|
|
p := AList[i];
|
|
if (Length(p) = Length(AItem)) then begin
|
|
if CompareMem(@p[0],@AItem[0],Length(AItem)*SizeOf(TUnicodeCodePoint)) then
|
|
exit(i);
|
|
end;
|
|
end;
|
|
Result := -1;
|
|
end;
|
|
|
|
function IndexOf(
|
|
const AItem : TNumericValue;
|
|
const AList : TNumericValueArray;
|
|
const AActualLen : Integer
|
|
) : Integer;overload;
|
|
var
|
|
p : ^TNumericValue;
|
|
i : Integer;
|
|
begin
|
|
Result := -1;
|
|
if (AActualLen = 0) then
|
|
exit;
|
|
p := @AList[Low(AList)];
|
|
for i := Low(AList) to AActualLen - 1 do begin
|
|
if (AItem = p^) then
|
|
exit(i);
|
|
Inc(p);
|
|
end;
|
|
Result := -1;
|
|
end;
|
|
|
|
procedure Parse_UnicodeData(
|
|
ADataAStream : TMemoryStream;
|
|
var APropList : TPropRecArray;
|
|
var ANumericTable : TNumericValueArray;
|
|
var ADataLineList : TDataLineRecArray;
|
|
var ADecomposition : TDecompositionArray;
|
|
const AHangulList : TCodePointRecArray;
|
|
const AWhiteSpaces : TCodePointRecArray;
|
|
const AUnifiedIdeographs : TCodePointRecArray
|
|
);
|
|
const
|
|
LINE_LENGTH = 1024;
|
|
PROP_LENGTH = 5000;
|
|
DATA_LENGTH = 25000;
|
|
var
|
|
p : PAnsiChar;
|
|
bufferLength, bufferPos : Integer;
|
|
actualPropLen, actualDataLen, actualNumLen : Integer;
|
|
line : ansistring;
|
|
lineLength, linePos : Integer;
|
|
|
|
function NextLine() : Boolean;
|
|
var
|
|
locOldPos : Integer;
|
|
locOldPointer : PAnsiChar;
|
|
begin
|
|
Result := False;
|
|
locOldPointer := p;
|
|
locOldPos := bufferPos;
|
|
while (bufferPos < bufferLength) and (p^ <> #10) do begin
|
|
Inc(p);
|
|
Inc(bufferPos);
|
|
end;
|
|
if (locOldPos < bufferPos) then begin
|
|
lineLength := (bufferPos - locOldPos);
|
|
Move(locOldPointer^,line[1],lineLength);
|
|
if (p^ = #10) then begin
|
|
Dec(lineLength);
|
|
Inc(p);
|
|
Inc(bufferPos);
|
|
end;
|
|
if (lineLength > 7) then begin
|
|
linePos := 1;
|
|
Result := True;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
function NextToken() : ansistring;
|
|
var
|
|
k : Integer;
|
|
begin
|
|
k := linePos;
|
|
while (linePos < lineLength) and not(line[linePos] in [';','#']) do
|
|
Inc(linePos);
|
|
if (linePos > k) then begin
|
|
if (line[linePos] in [';','#']) then
|
|
Result := Copy(line,k,(linePos-k))
|
|
else
|
|
Result := Copy(line,k,(linePos-k+1));
|
|
Result := Trim(Result);
|
|
end else begin
|
|
Result := '';
|
|
end;
|
|
Inc(linePos);
|
|
end;
|
|
|
|
function ParseCanonicalDecomposition(AStr : ansistring) : TUnicodeCodePointArray;
|
|
var
|
|
locStr, ks : ansistring;
|
|
k0,k : Integer;
|
|
begin
|
|
SetLength(Result,0);
|
|
locStr := UpperCase(Trim(AStr));
|
|
if (locStr = '') or (locStr[1] = '<') then
|
|
exit;
|
|
k0 := 1;
|
|
k := 1;
|
|
while (k <= Length(locStr)) do begin
|
|
while (k <= Length(locStr)) and (locStr[k] in ['0'..'9','A'..'F']) do
|
|
inc(k);
|
|
ks := Trim(Copy(locStr,k0,k-k0));
|
|
SetLength(Result,Length(Result)+1);
|
|
Result[Length(Result)-1] := StringToCodePoint(ks);
|
|
Inc(k);
|
|
k0 := k;
|
|
end;
|
|
end;
|
|
|
|
procedure ParseLine();
|
|
var
|
|
locCP : TUnicodeCodePoint;
|
|
locProp : TPropRec;
|
|
locData : TDataLineRec;
|
|
s : ansistring;
|
|
locRangeStart, locRangeEnd : Boolean;
|
|
k : Integer;
|
|
locDecompItem : TUnicodeCodePointArray;
|
|
numVal : TNumericValue;
|
|
begin
|
|
FillChar(locData,SizeOf(locData),#0);
|
|
FillChar(locProp,SizeOf(locProp),#0);
|
|
locCP := StrToInt('$'+NextToken());
|
|
s := NextToken();
|
|
locRangeStart := AnsiEndsText(', First>',s);
|
|
if locRangeStart then
|
|
locRangeEnd := False
|
|
else
|
|
locRangeEnd := AnsiEndsText(', Last>',s);
|
|
if locRangeStart then begin
|
|
locData.LineType := 1;
|
|
locData.StartCodePoint := locCP;
|
|
end else if locRangeEnd then begin
|
|
ADataLineList[actualDataLen - 1].EndCodePoint := locCP;
|
|
exit;
|
|
//locData.EndCodePoint := locCP;
|
|
end else begin
|
|
locData.LineType := 0;
|
|
locData.CodePoint := locCP;
|
|
end;
|
|
locProp.Category := StrToCategory(NextToken());
|
|
locProp.CCC := StrToInt(NextToken());//Canonical_Combining_Class
|
|
NextToken();//Bidi_Class
|
|
s := NextToken();//Decomposition_Type
|
|
locDecompItem := ParseCanonicalDecomposition(s);
|
|
if (Length(locDecompItem) = 0) then
|
|
locProp.DecompositionID := -1
|
|
else begin
|
|
locProp.DecompositionID := IndexOf(locDecompItem,ADecomposition);
|
|
if (locProp.DecompositionID = -1) then begin
|
|
k := Length(ADecomposition);
|
|
locProp.DecompositionID := k;
|
|
SetLength(ADecomposition,k+1);
|
|
ADecomposition[k] := locDecompItem;
|
|
end;
|
|
end;
|
|
|
|
numVal := EvaluateFloat(NextToken());
|
|
if (numVal <> Double(0.0)) then begin
|
|
NextToken();
|
|
NextToken();
|
|
end else begin
|
|
s := NextToken();
|
|
if (s <> '') then
|
|
numVal := EvaluateFloat(s);
|
|
s := NextToken();
|
|
if (numVal = Double(0.0)) then
|
|
numVal := EvaluateFloat(s);
|
|
end;
|
|
k := IndexOf(numVal,ANumericTable,actualNumLen);
|
|
if (k = -1) then begin
|
|
if (actualNumLen >= Length(ANumericTable)) then
|
|
SetLength(ANumericTable,(actualNumLen*2));
|
|
ANumericTable[actualNumLen] := numVal;
|
|
k := actualNumLen;
|
|
Inc(actualNumLen);
|
|
end;
|
|
locProp.NumericIndex := k;
|
|
|
|
NextToken();//Bidi_Mirroed
|
|
NextToken();//Unicode_l_Name
|
|
NextToken();//ISO_Comment
|
|
locProp.SimpleUpperCase := StringToCodePoint(NextToken());
|
|
locProp.SimpleLowerCase := StringToCodePoint(NextToken());
|
|
NextToken();//Simple_Title_Case_Mapping
|
|
locProp.WhiteSpace := IsWhiteSpace(locCP,AWhiteSpaces);
|
|
locProp.HangulSyllable := IsHangulSyllable(locCP,AHangulList);
|
|
locProp.UnifiedIdeograph := IsIncluded(locCP,AUnifiedIdeographs);
|
|
k := IndexOf(locProp,APropList,actualPropLen);
|
|
if (k = -1) then begin
|
|
k := actualPropLen;
|
|
locProp.PropID := k{ + 1};
|
|
APropList[k] := locProp;
|
|
Inc(actualPropLen);
|
|
end;
|
|
locData.PropID := k;
|
|
if (actualDataLen >= Length(ADataLineList)) then
|
|
SetLength(ADataLineList,(2*Length(ADataLineList)));
|
|
ADataLineList[actualDataLen] := locData;
|
|
Inc(actualDataLen);
|
|
end;
|
|
|
|
procedure Prepare();
|
|
var
|
|
r : TPropRec;
|
|
begin
|
|
SetLength(APropList,PROP_LENGTH);
|
|
actualPropLen := 0;
|
|
SetLength(ADataLineList,DATA_LENGTH);
|
|
actualDataLen := 0;
|
|
bufferLength := ADataAStream.Size;
|
|
bufferPos := 0;
|
|
p := ADataAStream.Memory;
|
|
lineLength := 0;
|
|
SetLength(line,LINE_LENGTH);
|
|
SetLength(ANumericTable,500);
|
|
actualNumLen := 0;
|
|
|
|
FillChar(r,SizeOf(r),#0);
|
|
r.PropID := 0;
|
|
r.Category := ucUnassigned;
|
|
r.DecompositionID := -1;
|
|
r.NumericIndex := 0;
|
|
APropList[0] := r;
|
|
Inc(actualPropLen);
|
|
ANumericTable[0] := 0;
|
|
Inc(actualNumLen);
|
|
end;
|
|
|
|
begin
|
|
Prepare();
|
|
while NextLine() do
|
|
ParseLine();
|
|
SetLength(APropList,actualPropLen);
|
|
SetLength(ADataLineList,actualDataLen);
|
|
SetLength(ANumericTable,actualNumLen);
|
|
end;
|
|
|
|
function GetPropID(
|
|
ACodePoint : TUnicodeCodePoint;
|
|
const ADataLineList : TDataLineRecArray
|
|
) : Cardinal;
|
|
var
|
|
i : Integer;
|
|
p : PDataLineRec;
|
|
begin
|
|
Result := 0;
|
|
p := @ADataLineList[Low(ADataLineList)];
|
|
for i := Low(ADataLineList) to High(ADataLineList) do begin
|
|
if (p^.LineType = 0) then begin
|
|
if (p^.CodePoint = ACodePoint) then begin
|
|
Result := p^.PropID;
|
|
Break;
|
|
end;
|
|
end else begin
|
|
if (p^.StartCodePoint <= ACodePoint) and (p^.EndCodePoint >= ACodePoint) then begin
|
|
Result := p^.PropID;
|
|
Break;
|
|
end;
|
|
end;
|
|
Inc(p);
|
|
end;
|
|
end;
|
|
|
|
procedure MakeDecomposition(
|
|
const ARawData : TDecompositionArray;
|
|
var ABook : TDecompositionBook
|
|
);
|
|
var
|
|
i, c, locPos : Integer;
|
|
locItem : TUnicodeCodePointArray;
|
|
begin
|
|
c := 0;
|
|
for i := Low(ARawData) to High(ARawData) do
|
|
c := c + Length(ARawData[i]);
|
|
SetLength(ABook.CodePoints,c);
|
|
SetLength(ABook.Index,Length(ARawData));
|
|
locPos := 0;
|
|
for i := Low(ARawData) to High(ARawData) do begin
|
|
locItem := ARawData[i];
|
|
ABook.Index[i].StartPosition := locPos;
|
|
ABook.Index[i].Length := Length(locItem);
|
|
Move(locItem[0],ABook.CodePoints[locPos],(Length(locItem) * SizeOf(TUnicodeCodePoint)));
|
|
locPos := locPos + Length(locItem);
|
|
end;
|
|
end;
|
|
|
|
type
|
|
PBmpSecondTableItem = ^TBmpSecondTableItem;
|
|
function IndexOf(
|
|
const AItem : PBmpSecondTableItem;
|
|
const ATable : TBmpSecondTable;
|
|
const ATableActualLength : Integer
|
|
) : Integer;overload;
|
|
var
|
|
i : Integer;
|
|
p : PBmpSecondTableItem;
|
|
begin
|
|
Result := -1;
|
|
if (ATableActualLength > 0) then begin
|
|
p := @ATable[0];
|
|
for i := 0 to ATableActualLength - 1 do begin
|
|
if CompareMem(p,AItem,SizeOf(TBmpSecondTableItem)) then begin
|
|
Result := i;
|
|
Break;
|
|
end;
|
|
Inc(p);
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
procedure MakeBmpTables(
|
|
var AFirstTable : TBmpFirstTable;
|
|
var ASecondTable : TBmpSecondTable;
|
|
const ADataLineList : TDataLineRecArray
|
|
);
|
|
var
|
|
locLowByte, locHighByte : Byte;
|
|
locTableItem : TBmpSecondTableItem;
|
|
locCP : TUnicodeCodePoint;
|
|
i, locSecondActualLen : Integer;
|
|
begin
|
|
SetLength(ASecondTable,120);
|
|
locSecondActualLen := 0;
|
|
for locHighByte := 0 to 255 do begin
|
|
FillChar(locTableItem,SizeOf(locTableItem),#0);
|
|
for locLowByte := 0 to 255 do begin
|
|
locCP := (locHighByte * 256) + locLowByte;
|
|
locTableItem[locLowByte] := GetPropID(locCP,ADataLineList)// - 1;
|
|
end;
|
|
i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen);
|
|
if (i = -1) then begin
|
|
if (locSecondActualLen = Length(ASecondTable)) then
|
|
SetLength(ASecondTable,locSecondActualLen + 50);
|
|
i := locSecondActualLen;
|
|
ASecondTable[i] := locTableItem;
|
|
Inc(locSecondActualLen);
|
|
end;
|
|
AFirstTable[locHighByte] := i;
|
|
end;
|
|
SetLength(ASecondTable,locSecondActualLen);
|
|
end;
|
|
|
|
type
|
|
P3lvlBmp3TableItem = ^T3lvlBmp3TableItem;
|
|
function IndexOf(
|
|
const AItem : P3lvlBmp3TableItem;
|
|
const ATable : T3lvlBmp3Table;
|
|
const ATableActualLength : Integer
|
|
) : Integer;overload;
|
|
var
|
|
i : Integer;
|
|
p : P3lvlBmp3TableItem;
|
|
begin
|
|
Result := -1;
|
|
if (ATableActualLength > 0) then begin
|
|
p := @ATable[0];
|
|
for i := 0 to ATableActualLength - 1 do begin
|
|
if CompareMem(p,AItem,SizeOf(T3lvlBmp3TableItem)) then begin
|
|
Result := i;
|
|
Break;
|
|
end;
|
|
Inc(p);
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
type
|
|
P3lvlBmp2TableItem = ^T3lvlBmp2TableItem;
|
|
function IndexOf(
|
|
const AItem : P3lvlBmp2TableItem;
|
|
const ATable : T3lvlBmp2Table
|
|
) : Integer;overload;
|
|
var
|
|
i : Integer;
|
|
p : P3lvlBmp2TableItem;
|
|
begin
|
|
Result := -1;
|
|
if (Length(ATable) > 0) then begin
|
|
p := @ATable[0];
|
|
for i := 0 to Length(ATable) - 1 do begin
|
|
if CompareMem(p,AItem,SizeOf(T3lvlBmp2TableItem)) then begin
|
|
Result := i;
|
|
Break;
|
|
end;
|
|
Inc(p);
|
|
end;
|
|
end;
|
|
end;
|
|
procedure MakeBmpTables3Levels(
|
|
var AFirstTable : T3lvlBmp1Table;
|
|
var ASecondTable : T3lvlBmp2Table;
|
|
var AThirdTable : T3lvlBmp3Table;
|
|
const ADataLineList : TDataLineRecArray
|
|
);
|
|
var
|
|
locLowByte0, locLowByte1, locHighByte : Byte;
|
|
locTableItem2 : T3lvlBmp2TableItem;
|
|
locTableItem3 : T3lvlBmp3TableItem;
|
|
locCP : TUnicodeCodePoint;
|
|
i, locThirdActualLen : Integer;
|
|
begin
|
|
SetLength(AThirdTable,120);
|
|
locThirdActualLen := 0;
|
|
for locHighByte := 0 to 255 do begin
|
|
FillChar(locTableItem2,SizeOf(locTableItem2),#0);
|
|
for locLowByte0 := 0 to 15 do begin
|
|
FillChar(locTableItem3,SizeOf(locTableItem3),#0);
|
|
for locLowByte1 := 0 to 15 do begin
|
|
locCP := (locHighByte * 256) + (locLowByte0*16) + locLowByte1;
|
|
locTableItem3[locLowByte1] := GetPropID(locCP,ADataLineList);
|
|
end;
|
|
i := IndexOf(@locTableItem3,AThirdTable,locThirdActualLen);
|
|
if (i = -1) then begin
|
|
if (locThirdActualLen = Length(AThirdTable)) then
|
|
SetLength(AThirdTable,locThirdActualLen + 50);
|
|
i := locThirdActualLen;
|
|
AThirdTable[i] := locTableItem3;
|
|
Inc(locThirdActualLen);
|
|
end;
|
|
locTableItem2[locLowByte0] := i;
|
|
end;
|
|
i := IndexOf(@locTableItem2,ASecondTable);
|
|
if (i = -1) then begin
|
|
i := Length(ASecondTable);
|
|
SetLength(ASecondTable,(i + 1));
|
|
ASecondTable[i] := locTableItem2;
|
|
end;
|
|
AFirstTable[locHighByte] := i;
|
|
end;
|
|
SetLength(AThirdTable,locThirdActualLen);
|
|
end;
|
|
|
|
procedure GenerateLicenceText(ADest : TStream);
|
|
var
|
|
s : ansistring;
|
|
begin
|
|
s := SLicenseText + sLineBreak + sLineBreak;
|
|
ADest.Write(s[1],Length(s));
|
|
end;
|
|
|
|
procedure GenerateBmpTables(
|
|
ADest : TStream;
|
|
var AFirstTable : TBmpFirstTable;
|
|
var ASecondTable : TBmpSecondTable
|
|
);
|
|
procedure AddLine(const ALine : ansistring);
|
|
var
|
|
buffer : ansistring;
|
|
begin
|
|
buffer := ALine + sLineBreak;
|
|
ADest.Write(buffer[1],Length(buffer));
|
|
end;
|
|
|
|
var
|
|
i, j, c : Integer;
|
|
locLine : string;
|
|
begin
|
|
AddLine('const');
|
|
AddLine(' UC_TABLE_1 : array[0..255] of Byte = (');
|
|
locLine := '';
|
|
for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
|
|
locLine := locLine + IntToStr(AFirstTable[i]) + ',';
|
|
if (((i+1) mod 16) = 0) then begin
|
|
locLine := ' ' + locLine;
|
|
AddLine(locLine);
|
|
locLine := '';
|
|
end;
|
|
end;
|
|
locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
|
|
locLine := ' ' + locLine;
|
|
AddLine(locLine);
|
|
AddLine(' );' + sLineBreak);
|
|
|
|
AddLine(' UC_TABLE_2 : array[0..(256*' + IntToStr(Length(ASecondTable)) +'-1)] of Word =(');
|
|
c := High(ASecondTable);
|
|
for i := Low(ASecondTable) to c do begin
|
|
locLine := '';
|
|
for j := Low(TBmpSecondTableItem) to High(TBmpSecondTableItem) do begin
|
|
locLine := locLine + IntToStr(ASecondTable[i][j]) + ',';
|
|
if (((j+1) mod 16) = 0) then begin
|
|
if (i = c) and (j = 255) then
|
|
Delete(locLine,Length(locLine),1);
|
|
locLine := ' ' + locLine;
|
|
AddLine(locLine);
|
|
locLine := '';
|
|
end;
|
|
end;
|
|
end;
|
|
AddLine(' );' + sLineBreak);
|
|
end;
|
|
|
|
//----------------------------------
|
|
procedure Generate3lvlBmpTables(
|
|
ADest : TStream;
|
|
var AFirstTable : T3lvlBmp1Table;
|
|
var ASecondTable : T3lvlBmp2Table;
|
|
var AThirdTable : T3lvlBmp3Table
|
|
);
|
|
|
|
procedure AddLine(const ALine : ansistring);
|
|
var
|
|
buffer : ansistring;
|
|
begin
|
|
buffer := ALine + sLineBreak;
|
|
ADest.Write(buffer[1],Length(buffer));
|
|
end;
|
|
|
|
var
|
|
i, j, c : Integer;
|
|
locLine : string;
|
|
begin
|
|
AddLine('const');
|
|
AddLine(' UC_TABLE_1 : array[0..255] of Byte = (');
|
|
locLine := '';
|
|
for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
|
|
locLine := locLine + IntToStr(AFirstTable[i]) + ',';
|
|
if (((i+1) mod 16) = 0) then begin
|
|
locLine := ' ' + locLine;
|
|
AddLine(locLine);
|
|
locLine := '';
|
|
end;
|
|
end;
|
|
locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
|
|
locLine := ' ' + locLine;
|
|
AddLine(locLine);
|
|
AddLine(' );' + sLineBreak);
|
|
|
|
AddLine(' UC_TABLE_2 : array[0..' + IntToStr(Length(ASecondTable)-1) +'] of array[0..15] of Word = (');
|
|
c := High(ASecondTable);
|
|
for i := Low(ASecondTable) to c do begin
|
|
locLine := '(';
|
|
for j := Low(T3lvlBmp2TableItem) to High(T3lvlBmp2TableItem) do
|
|
locLine := locLine + IntToStr(ASecondTable[i][j]) + ',';
|
|
Delete(locLine,Length(locLine),1);
|
|
locLine := ' ' + locLine + ')';
|
|
if (i < c) then
|
|
locLine := locLine + ',';
|
|
AddLine(locLine);
|
|
end;
|
|
AddLine(' );' + sLineBreak);
|
|
|
|
AddLine(' UC_TABLE_3 : array[0..' + IntToStr(Length(AThirdTable)-1) +'] of array[0..15] of Word = (');
|
|
c := High(AThirdTable);
|
|
for i := Low(AThirdTable) to c do begin
|
|
locLine := '(';
|
|
for j := Low(T3lvlBmp3TableItem) to High(T3lvlBmp3TableItem) do
|
|
locLine := locLine + IntToStr(AThirdTable[i][j]) + ',';
|
|
Delete(locLine,Length(locLine),1);
|
|
locLine := ' ' + locLine + ')';
|
|
if (i < c) then
|
|
locLine := locLine + ',';
|
|
AddLine(locLine);
|
|
end;
|
|
AddLine(' );' + sLineBreak);
|
|
end;
|
|
|
|
function UInt24ToStr(const AValue : UInt24; const AEndian : TEndianKind): string;inline;
|
|
begin
|
|
if (AEndian = ekBig) then
|
|
Result := Format(
|
|
'(c:$%s;b:$%s;a:$%s;)',
|
|
[ IntToHex(AValue.byte2,1), IntToHex(AValue.byte1,1),
|
|
IntToHex(AValue.byte0,1)
|
|
]
|
|
)
|
|
else
|
|
Result := Format(
|
|
'(a:$%s;b:$%s;c:$%s;)',
|
|
[ IntToHex(AValue.byte0,1), IntToHex(AValue.byte1,1),
|
|
IntToHex(AValue.byte2,1)
|
|
]
|
|
);
|
|
end;
|
|
|
|
procedure GeneratePropTable(
|
|
ADest : TStream;
|
|
const APropList : TPropRecArray;
|
|
const AEndian : TEndianKind
|
|
);
|
|
|
|
procedure AddLine(const ALine : ansistring);
|
|
var
|
|
buffer : ansistring;
|
|
begin
|
|
buffer := ALine + sLineBreak;
|
|
ADest.Write(buffer[1],Length(buffer));
|
|
end;
|
|
|
|
var
|
|
i : Integer;
|
|
locLine : string;
|
|
p : PPropRec;
|
|
begin
|
|
AddLine('');
|
|
AddLine('const');
|
|
AddLine(' UC_PROP_REC_COUNT = ' + IntToStr(Length(APropList)) + ';');
|
|
AddLine(' UC_PROP_ARRAY : array[0..(UC_PROP_REC_COUNT-1)] of TUC_Prop = (');
|
|
locLine := '';
|
|
p := @APropList[0];
|
|
for i := Low(APropList) to High(APropList) do begin //locLine := ' (CD:' + IntToStr(p^.CategoryData) + ';' +
|
|
locLine := locLine + '(C:' + IntToStr(p^.CategoryData) + ';' +
|
|
'C3:' + IntToStr(p^.CCC) + ';' +
|
|
'N:' + IntToStr(p^.NumericIndex) + ';' +
|
|
'UC:' + UInt24ToStr(p^.SimpleUpperCase,AEndian) + ';' +
|
|
'LC:' + UInt24ToStr(p^.SimpleLowerCase,AEndian) + ';' +
|
|
'D:' + IntToStr(p^.DecompositionID) + ')';
|
|
if (i < High(APropList)) then
|
|
locLine := locLine + ',';
|
|
if (((i+1) mod 2) = 0) then begin
|
|
locLine := ' ' + locLine;
|
|
AddLine(locLine);
|
|
locLine := '';
|
|
end;
|
|
Inc(p);
|
|
end;
|
|
if (locLine <> '') then
|
|
AddLine( ' ' + locLine);
|
|
AddLine(' );' + sLineBreak);
|
|
end;
|
|
|
|
procedure GenerateNumericTable(
|
|
ADest : TStream;
|
|
const ANumList : TNumericValueArray;
|
|
const ACompleteUnit : Boolean
|
|
);
|
|
|
|
procedure AddLine(const ALine : ansistring);
|
|
var
|
|
buffer : ansistring;
|
|
begin
|
|
buffer := ALine + sLineBreak;
|
|
ADest.Write(buffer[1],Length(buffer));
|
|
end;
|
|
|
|
var
|
|
i : Integer;
|
|
locLine : string;
|
|
p : ^TNumericValue;
|
|
begin
|
|
if ACompleteUnit then begin
|
|
GenerateLicenceText(ADest);
|
|
AddLine('unit unicodenumtable;');
|
|
AddLine('interface');
|
|
AddLine('');
|
|
end;
|
|
AddLine('');
|
|
AddLine('const');
|
|
AddLine(' UC_NUMERIC_COUNT = ' + IntToStr(Length(ANumList)) + ';');
|
|
AddLine(' UC_NUMERIC_ARRAY : array[0..(UC_NUMERIC_COUNT-1)] of Double = (');
|
|
locLine := '';
|
|
p := @ANumList[0];
|
|
for i := Low(ANumList) to High(ANumList) - 1 do begin
|
|
locLine := locLine + FloatToStr(p^,FS) + ' ,';
|
|
if (i > 0) and ((i mod 8) = 0) then begin
|
|
AddLine(' ' + locLine);
|
|
locLine := '';
|
|
end;
|
|
Inc(p);
|
|
end;
|
|
locLine := locLine + FloatToStr(p^,FS);
|
|
AddLine(' ' + locLine);
|
|
AddLine(' );' + sLineBreak);
|
|
if ACompleteUnit then begin
|
|
AddLine('');
|
|
AddLine('implementation');
|
|
AddLine('');
|
|
AddLine('end.');
|
|
end;
|
|
end;
|
|
|
|
procedure GenerateDecompositionBookTable(
|
|
ADest : TStream;
|
|
const ABook : TDecompositionBook;
|
|
const AEndian : TEndianKind
|
|
);
|
|
|
|
procedure AddLine(const ALine : ansistring);
|
|
var
|
|
buffer : ansistring;
|
|
begin
|
|
buffer := ALine + sLineBreak;
|
|
ADest.Write(buffer[1],Length(buffer));
|
|
end;
|
|
|
|
var
|
|
i, k : Integer;
|
|
p : ^TDecompositionIndexRec;
|
|
cp : ^TUnicodeCodePoint;
|
|
cp24 : UInt24;
|
|
locLine : string;
|
|
begin
|
|
AddLine('const');
|
|
AddLine(' UC_DEC_BOOK_INDEX_LENGTH = ' + IntToStr(Length(ABook.Index)) + ';');
|
|
AddLine(' UC_DEC_BOOK_DATA_LENGTH = ' + IntToStr(Length(ABook.CodePoints)) + ';');
|
|
AddLine('type');
|
|
AddLine(' TDecompositionIndexRec = packed record');
|
|
AddLine(' S : Word; //StartPosition');
|
|
AddLine(' L : Byte; //Length');
|
|
AddLine(' end;');
|
|
AddLine(' TDecompositionBookRec = packed record');
|
|
AddLine(' Index : array[0..(UC_DEC_BOOK_INDEX_LENGTH-1)] of TDecompositionIndexRec;');
|
|
AddLine(' CodePoints : array[0..(UC_DEC_BOOK_DATA_LENGTH-1)] of UInt24;');
|
|
AddLine(' end;');
|
|
AddLine('const');
|
|
AddLine(' UC_DEC_BOOK_DATA : TDecompositionBookRec = (');
|
|
p := @ABook.Index[0];
|
|
AddLine(' Index : (// Index BEGIN');
|
|
k := 0;
|
|
locLine := ' ';
|
|
for i := Low(ABook.Index) to High(ABook.Index) - 1 do begin
|
|
locLine := locLine + '(S:' + IntToStr(p^.StartPosition) + ';' +
|
|
'L:' + IntToStr(p^.Length) + '),';
|
|
k := k + 1;
|
|
if (k >= 9) then begin
|
|
AddLine(locLine);
|
|
locLine := ' ';
|
|
k := 0;
|
|
end;
|
|
Inc(p);
|
|
end;
|
|
locLine := locLine + '(S:' + IntToStr(p^.StartPosition) + ';' +
|
|
'L:' + IntToStr(p^.Length) + ')';
|
|
AddLine(locLine);
|
|
AddLine(' ); // Index END');
|
|
|
|
cp := @ABook.CodePoints[0];
|
|
AddLine(' CodePoints : (// CodePoints BEGIN');
|
|
k := 0;
|
|
locLine := ' ';
|
|
for i := Low(ABook.CodePoints) to High(ABook.CodePoints) - 1 do begin
|
|
cp24 := cp^;
|
|
locLine := locLine + Format('%s,',[UInt24ToStr(cp24,AEndian)]);
|
|
Inc(k);
|
|
if (k >= 16) then begin
|
|
AddLine(locLine);
|
|
k := 0;
|
|
locLine := ' ';
|
|
end;
|
|
Inc(cp);
|
|
end;
|
|
cp24 := cp^;
|
|
locLine := locLine + Format('%s',[UInt24ToStr(cp24,AEndian)]);
|
|
AddLine(locLine);
|
|
AddLine(' ); // CodePoints END');
|
|
AddLine(' );' + sLineBreak);
|
|
end;
|
|
|
|
procedure GenerateOutBmpTable(
|
|
ADest : TStream;
|
|
const AList : TDataLineRecArray
|
|
);
|
|
procedure AddLine(const ALine : ansistring);
|
|
var
|
|
buffer : ansistring;
|
|
begin
|
|
buffer := ALine + sLineBreak;
|
|
ADest.Write(buffer[1],Length(buffer));
|
|
end;
|
|
|
|
var
|
|
i, j : Integer;
|
|
locLine : string;
|
|
p : PDataLineRec;
|
|
begin
|
|
AddLine('');
|
|
//AddLine(' UC_PROP_REC_COUNT = ' + IntToStr(Length(APropList)) + ';');
|
|
//AddLine(' UC_PROP_ARRAY : array[0..(UC_PROP_REC_COUNT-1)] of TUC_Prop = (');
|
|
j := -1;
|
|
p := @AList[0];
|
|
for i := 0 to Length(AList) - 1 do begin
|
|
if ((p^.LineType = 0) and (p^.CodePoint >$FFFF)) or
|
|
(p^.StartCodePoint > $FFFF)
|
|
then begin
|
|
j := i;
|
|
Break;
|
|
end;
|
|
Inc(p);
|
|
end;
|
|
if (j < 0) then
|
|
exit;
|
|
|
|
for i := j to Length(AList) - 2 do begin
|
|
locLine := ' (PropID : ' + IntToStr(p^.PropID) + ';' +
|
|
' CodePoint : ' + IntToStr(p^.CodePoint) + ';' +
|
|
' RangeEnd : ' + IntToStr(p^.EndCodePoint) + '),' ;
|
|
AddLine(locLine);
|
|
Inc(p);
|
|
end;
|
|
locLine := ' (PropID : ' + IntToStr(p^.PropID) + ';' +
|
|
' CodePoint : ' + IntToStr(p^.CodePoint) + ';' +
|
|
' RangeEnd : ' + IntToStr(p^.EndCodePoint) + ')' ;
|
|
AddLine(locLine);
|
|
AddLine(' );' + sLineBreak);
|
|
end;
|
|
|
|
function Compress(const AData : TDataLineRecArray) : TDataLineRecArray;
|
|
var
|
|
k, i, locResLen : Integer;
|
|
q, p, pr : PDataLineRec;
|
|
k_end : TUnicodeCodePoint;
|
|
begin
|
|
locResLen := 1;
|
|
SetLength(Result,Length(AData));
|
|
FillChar(Result[0],Length(Result),#0);
|
|
Result[0] := AData[0];
|
|
q := @AData[0];
|
|
k := 0;
|
|
while (k < Length(AData)) do begin
|
|
if (q^.LineType = 0) then
|
|
k_end := q^.CodePoint
|
|
else
|
|
k_end := q^.EndCodePoint;
|
|
if ((k+1) = Length(AData)) then begin
|
|
i := k;
|
|
end else begin
|
|
p := @AData[k+1];
|
|
i := k +1;
|
|
while (i < (Length(AData) {- 1})) do begin
|
|
if (p^.PropID <> q^.PropID) then begin
|
|
i := i - 1;
|
|
Break;
|
|
end;
|
|
if (p^.LineType = 0) then begin
|
|
if (p^.CodePoint <> (k_end + 1)) then begin
|
|
i := i - 1;
|
|
Break;
|
|
end;
|
|
Inc(k_end);
|
|
end else begin
|
|
if (p^.StartCodePoint <> (k_end + 1)) then begin
|
|
i := i - 1;
|
|
Break;
|
|
end;
|
|
k_end := p^.EndCodePoint;
|
|
end;
|
|
Inc(i);
|
|
Inc(p);
|
|
end;
|
|
end;
|
|
{if (i = k) then begin
|
|
Result[locResLen] := q^;
|
|
Inc(locResLen);
|
|
end else begin }
|
|
p := @AData[i];
|
|
pr := @Result[locResLen];
|
|
pr^.PropID := q^.PropID;
|
|
if (q^.LineType = 0) then
|
|
pr^.StartCodePoint := q^.CodePoint
|
|
else
|
|
pr^.StartCodePoint := q^.StartCodePoint;
|
|
pr^.LineType := 1;
|
|
if (p^.LineType = 0) then
|
|
pr^.EndCodePoint := p^.CodePoint
|
|
else
|
|
pr^.EndCodePoint := p^.EndCodePoint;
|
|
Inc(locResLen);
|
|
//end;
|
|
k := i + 1;
|
|
if (k = Length(AData)) then
|
|
Break;
|
|
q := @AData[k];
|
|
end;
|
|
SetLength(Result,locResLen);
|
|
end;
|
|
|
|
procedure ParseUCAFile(
|
|
ADataAStream : TMemoryStream;
|
|
var ABook : TUCA_DataBook
|
|
);
|
|
const
|
|
LINE_LENGTH = 1024;
|
|
DATA_LENGTH = 25000;
|
|
var
|
|
p : PAnsiChar;
|
|
actualDataLen : Integer;
|
|
bufferLength, bufferPos, lineLength, linePos : Integer;
|
|
line : ansistring;
|
|
|
|
function NextLine() : Boolean;
|
|
var
|
|
locOldPos : Integer;
|
|
locOldPointer : PAnsiChar;
|
|
begin
|
|
Result := False;
|
|
locOldPointer := p;
|
|
locOldPos := bufferPos;
|
|
while (bufferPos < bufferLength) and (p^ <> #10) do begin
|
|
Inc(p);
|
|
Inc(bufferPos);
|
|
end;
|
|
if (locOldPos = bufferPos) and (p^ = #10) then begin
|
|
lineLength := 0;
|
|
Inc(p);
|
|
Inc(bufferPos);
|
|
linePos := 1;
|
|
Result := True;
|
|
end else if (locOldPos < bufferPos) then begin
|
|
lineLength := (bufferPos - locOldPos) + 1;
|
|
Move(locOldPointer^,line[1],lineLength);
|
|
if (p^ = #10) then begin
|
|
Dec(lineLength);
|
|
Inc(p);
|
|
Inc(bufferPos);
|
|
end;
|
|
linePos := 1;
|
|
Result := True;
|
|
end;
|
|
end;
|
|
|
|
procedure SkipSpace();
|
|
begin
|
|
while (linePos < lineLength) and (line[linePos] in [' ',#9]) do
|
|
Inc(linePos);
|
|
end;
|
|
|
|
function NextToken() : ansistring;
|
|
const C_SEPARATORS = [';','#','.','[',']','*','@'];
|
|
var
|
|
k : Integer;
|
|
begin
|
|
SkipSpace();
|
|
k := linePos;
|
|
if (linePos <= lineLength) and (line[linePos] in C_SEPARATORS) then begin
|
|
Result := line[linePos];
|
|
Inc(linePos);
|
|
exit;
|
|
end;
|
|
while (linePos <= lineLength) and not(line[linePos] in (C_SEPARATORS+[' '])) do
|
|
Inc(linePos);
|
|
if (linePos > k) then begin
|
|
if (line[Min(linePos,lineLength)] in C_SEPARATORS) then
|
|
Result := Copy(line,k,(linePos-k))
|
|
else
|
|
Result := Copy(line,k,(linePos-k+1));
|
|
Result := Trim(Result);
|
|
end else begin
|
|
Result := '';
|
|
end;
|
|
end;
|
|
|
|
procedure CheckToken(const AToken : string);
|
|
var
|
|
a, b : string;
|
|
begin
|
|
a := LowerCase(Trim(AToken));
|
|
b := LowerCase(Trim(NextToken()));
|
|
if (a <> b) then
|
|
raise Exception.CreateFmt('Expected token "%s" but found "%s", Line = "%s".',[a,b,line]);
|
|
end;
|
|
|
|
function ReadWeightBlock(var ADest : TUCA_WeightRec) : Boolean;
|
|
var
|
|
s :AnsiString;
|
|
k : Integer;
|
|
begin
|
|
Result := False;
|
|
s := NextToken();
|
|
if (s <> '[') then
|
|
exit;
|
|
s := NextToken();
|
|
if (s = '.') then
|
|
ADest.Variable := False
|
|
else begin
|
|
if (s <> '*') then
|
|
raise Exception.CreateFmt('Expected "%s" but found "%s".',['*',s]);
|
|
ADest.Variable := True;
|
|
end;
|
|
ADest.Weights[0] := StrToInt('$'+NextToken());
|
|
for k := 1 to WEIGHT_LEVEL_COUNT-1 do begin
|
|
CheckToken('.');
|
|
ADest.Weights[k] := StrToInt('$'+NextToken());
|
|
end;
|
|
CheckToken(']');
|
|
Result := True;
|
|
end;
|
|
|
|
procedure ParseHeaderVar();
|
|
var
|
|
s,ss : string;
|
|
k : Integer;
|
|
begin
|
|
s := NextToken();
|
|
if (s = 'version') then begin
|
|
ss := '';
|
|
while True do begin
|
|
s := NextToken();
|
|
if (s = '') then
|
|
Break;
|
|
ss := ss + s;
|
|
end;
|
|
ABook.Version := ss;
|
|
end else if (s = 'variable') then begin
|
|
if (s = 'blanked') then
|
|
ABook.VariableWeight := ucaBlanked
|
|
else if (s = 'non-ignorable') then
|
|
ABook.VariableWeight := ucaNonIgnorable
|
|
else if (s = 'shifted') then
|
|
ABook.VariableWeight := ucaShifted
|
|
else if (s = 'shift-trimmed') then
|
|
ABook.VariableWeight := ucaShiftedTrimmed
|
|
else if (s = 'ignoresp') then
|
|
ABook.VariableWeight := ucaIgnoreSP
|
|
else
|
|
raise Exception.CreateFmt('Unknown "@variable" type : "%s".',[s]);
|
|
end else if (s = 'backwards') or (s = 'forwards') then begin
|
|
ss := s;
|
|
s := NextToken();
|
|
k := StrToInt(s);
|
|
if (k < 1) or (k > 4) then
|
|
raise Exception.CreateFmt('Invalid "%s" position : %d.',[ss,s]);
|
|
ABook.Backwards[k] := (s = 'backwards');
|
|
end;
|
|
end;
|
|
|
|
procedure ParseLine();
|
|
var
|
|
locData : ^TUCA_LineRec;
|
|
s : ansistring;
|
|
kc : Integer;
|
|
begin
|
|
if (Length(ABook.Lines) <= actualDataLen) then
|
|
SetLength(ABook.Lines,Length(ABook.Lines)*2);
|
|
locData := @ABook.Lines[actualDataLen];
|
|
s := NextToken();
|
|
if (s = '') or (s[1] = '#') then
|
|
exit;
|
|
if (s[1] = '@') then begin
|
|
ParseHeaderVar();
|
|
exit;
|
|
end;
|
|
SetLength(locData^.CodePoints,10);
|
|
locData^.CodePoints[0] := StrToInt('$'+s);
|
|
kc := 1;
|
|
while True do begin
|
|
s := Trim(NextToken());
|
|
if (s = '') then
|
|
exit;
|
|
if (s = ';') then
|
|
Break;
|
|
locData^.CodePoints[kc] := StrToInt('$'+s);
|
|
Inc(kc);
|
|
end;
|
|
if (kc = 0) then
|
|
exit;
|
|
SetLength(locData^.CodePoints,kc);
|
|
SetLength(locData^.Weights,24);
|
|
kc := 0;
|
|
while ReadWeightBlock(locData^.Weights[kc]) do begin
|
|
Inc(kc);
|
|
end;
|
|
SetLength(locData^.Weights,kc);
|
|
Inc(actualDataLen);
|
|
end;
|
|
|
|
procedure Prepare();
|
|
var
|
|
k : Integer;
|
|
begin
|
|
ABook.VariableWeight := ucaShifted;
|
|
for k := Low(ABook.Backwards) to High(ABook.Backwards) do
|
|
ABook.Backwards[k] := False;
|
|
SetLength(ABook.Lines,DATA_LENGTH);
|
|
actualDataLen := 0;
|
|
bufferLength := ADataAStream.Size;
|
|
bufferPos := 0;
|
|
p := ADataAStream.Memory;
|
|
lineLength := 0;
|
|
SetLength(line,LINE_LENGTH);
|
|
end;
|
|
|
|
begin
|
|
Prepare();
|
|
while NextLine() do
|
|
ParseLine();
|
|
SetLength(ABook.Lines,actualDataLen);
|
|
end;
|
|
|
|
procedure Dump(X : array of TUnicodeCodePoint; const ATitle : string = '');
|
|
var
|
|
i : Integer;
|
|
begin
|
|
Write(ATitle, ' ');
|
|
for i := 0 to Length(X) - 1 do
|
|
Write(X[i],' ');
|
|
WriteLn();
|
|
end;
|
|
|
|
function IsGreaterThan(A, B : PUCA_LineRec) : Integer;
|
|
var
|
|
i, hb : Integer;
|
|
begin
|
|
if (A=B) then
|
|
exit(0);
|
|
Result := 1;
|
|
hb := Length(B^.CodePoints) - 1;
|
|
for i := 0 to Length(A^.CodePoints) - 1 do begin
|
|
if (i > hb) then
|
|
exit;
|
|
if (A^.CodePoints[i] < B^.CodePoints[i]) then
|
|
exit(-1);
|
|
if (A^.CodePoints[i] > B^.CodePoints[i]) then
|
|
exit(1);
|
|
end;
|
|
if (Length(A^.CodePoints) = Length(B^.CodePoints)) then
|
|
exit(0);
|
|
exit(-1);
|
|
end;
|
|
|
|
procedure QuickSort(
|
|
var AList : TUCA_DataBookIndex;
|
|
L, R : Longint;
|
|
ABook : PUCA_DataBook
|
|
);overload;
|
|
var
|
|
I, J : Longint;
|
|
P, Q : Integer;
|
|
begin
|
|
repeat
|
|
I := L;
|
|
J := R;
|
|
P := AList[ (L + R) div 2 ];
|
|
repeat
|
|
while IsGreaterThan(@ABook^.Lines[P], @ABook^.Lines[AList[i]]) > 0 do
|
|
I := I + 1;
|
|
while IsGreaterThan(@ABook^.Lines[P], @ABook^.Lines[AList[J]]) < 0 do
|
|
J := J - 1;
|
|
If I <= J then
|
|
begin
|
|
Q := AList[I];
|
|
AList[I] := AList[J];
|
|
AList[J] := Q;
|
|
I := I + 1;
|
|
J := J - 1;
|
|
end;
|
|
until I > J;
|
|
// sort the smaller range recursively
|
|
// sort the bigger range via the loop
|
|
// Reasons: memory usage is O(log(n)) instead of O(n) and loop is faster than recursion
|
|
if J - L < R - I then
|
|
begin
|
|
if L < J then
|
|
QuickSort(AList, L, J, ABook);
|
|
L := I;
|
|
end
|
|
else
|
|
begin
|
|
if I < R then
|
|
QuickSort(AList, I, R, ABook);
|
|
R := J;
|
|
end;
|
|
until L >= R;
|
|
end;
|
|
|
|
function CreateIndex(ABook : PUCA_DataBook) : TUCA_DataBookIndex;
|
|
var
|
|
r : TUCA_DataBookIndex;
|
|
i, c : Integer;
|
|
begin
|
|
c := Length(ABook^.Lines);
|
|
SetLength(r,c);
|
|
for i := 0 to c - 1 do
|
|
r[i] := i;
|
|
QuickSort(r,0,c-1,ABook);
|
|
Result := r;
|
|
end;
|
|
|
|
function ConstructContextTree(
|
|
const AContext : PUCA_LineContextRec;
|
|
var ADestBuffer;
|
|
const ADestBufferLength : Cardinal
|
|
) : PUCA_PropItemContextTreeRec;forward;
|
|
function ConstructItem(
|
|
AItem : PUCA_PropItemRec;
|
|
ACodePoint : Cardinal;
|
|
AValid : Byte;
|
|
AChildCount : Byte;
|
|
const AWeights : array of TUCA_WeightRec;
|
|
const AStoreCP : Boolean;
|
|
const AContext : PUCA_LineContextRec;
|
|
const ADeleted : Boolean
|
|
) : Cardinal;
|
|
var
|
|
i : Integer;
|
|
p : PUCA_PropItemRec;
|
|
pw : PUCA_PropWeights;
|
|
pb : PByte;
|
|
hasContext : Boolean;
|
|
contextTree : PUCA_PropItemContextTreeRec;
|
|
wl : Integer;
|
|
begin
|
|
p := AItem;
|
|
p^.Size := 0;
|
|
p^.Flags := 0;
|
|
p^.WeightLength := 0;
|
|
SetBit(p^.Flags,AItem^.FLAG_VALID,(AValid <> 0));
|
|
p^.ChildCount := AChildCount;
|
|
hasContext := (AContext <> nil) and (Length(AContext^.Data) > 0);
|
|
if hasContext then
|
|
wl := 0
|
|
else
|
|
wl := Length(AWeights);
|
|
p^.WeightLength := wl;
|
|
if (wl = 0) then begin
|
|
Result := SizeOf(TUCA_PropItemRec);
|
|
if ADeleted then
|
|
SetBit(AItem^.Flags,AItem^.FLAG_DELETION,True);
|
|
end else begin
|
|
Result := SizeOf(TUCA_PropItemRec) + (wl*SizeOf(TUCA_PropWeights));
|
|
pb := PByte(PtrUInt(p) + SizeOf(TUCA_PropItemRec));
|
|
Unaligned(PWord(pb)^) := AWeights[0].Weights[0];
|
|
pb := pb + 2;
|
|
if (AWeights[0].Weights[1] > High(Byte)) then begin
|
|
Unaligned(PWord(pb)^) := AWeights[0].Weights[1];
|
|
pb := pb + 2;
|
|
end else begin
|
|
SetBit(p^.Flags,p^.FLAG_COMPRESS_WEIGHT_1,True);
|
|
pb^ := AWeights[0].Weights[1];
|
|
pb := pb + 1;
|
|
Result := Result - 1;
|
|
end;
|
|
if (AWeights[0].Weights[2] > High(Byte)) then begin
|
|
Unaligned(PWord(pb)^) := AWeights[0].Weights[2];
|
|
pb := pb + 2;
|
|
end else begin
|
|
SetBit(p^.Flags,p^.FLAG_COMPRESS_WEIGHT_2,True);
|
|
pb^ := AWeights[0].Weights[2];
|
|
pb := pb + 1;
|
|
Result := Result - 1;
|
|
end;
|
|
pw := PUCA_PropWeights(pb);
|
|
for i := 1 to wl - 1 do begin
|
|
pw^.Weights[0] := AWeights[i].Weights[0];
|
|
pw^.Weights[1] := AWeights[i].Weights[1];
|
|
pw^.Weights[2] := AWeights[i].Weights[2];
|
|
//pw^.Variable := BoolToByte(AWeights[i].Variable);
|
|
Inc(pw);
|
|
end;
|
|
end;
|
|
hasContext := (AContext <> nil) and (Length(AContext^.Data) > 0);
|
|
if AStoreCP or hasContext then begin
|
|
Unaligned(PUInt24(PtrUInt(AItem)+Result)^) := ACodePoint;
|
|
Result := Result + SizeOf(UInt24);
|
|
SetBit(AItem^.Flags,AItem^.FLAG_CODEPOINT,True);
|
|
end;
|
|
if hasContext then begin
|
|
contextTree := ConstructContextTree(AContext,Unaligned(Pointer(PtrUInt(AItem)+Result)^),MaxInt);
|
|
Result := Result + Cardinal(contextTree^.Size);
|
|
SetBit(AItem^.Flags,AItem^.FLAG_CONTEXTUAL,True);
|
|
end;
|
|
p^.Size := Result;
|
|
end;
|
|
|
|
function CalcCharChildCount(
|
|
const ASearchStartPos : Integer;
|
|
const ALinePos : Integer;
|
|
const ABookLines : PUCA_LineRec;
|
|
const AMaxLength : Integer;
|
|
const ABookIndex : TUCA_DataBookIndex;
|
|
out ALineCount : Word
|
|
) : Byte;
|
|
var
|
|
locLinePos : Integer;
|
|
p : PUCA_LineRec;
|
|
|
|
procedure IncP();
|
|
begin
|
|
Inc(locLinePos);
|
|
p := @ABookLines[ABookIndex[locLinePos]];
|
|
end;
|
|
|
|
var
|
|
i, locTargetLen, locTargetBufferSize, r : Integer;
|
|
locTarget : array[0..127] of Cardinal;
|
|
locLastChar : Cardinal;
|
|
begin
|
|
locLinePos := ALinePos;
|
|
p := @ABookLines[ABookIndex[locLinePos]];
|
|
locTargetLen := ASearchStartPos;
|
|
locTargetBufferSize := (locTargetLen*SizeOf(Cardinal));
|
|
Move(p^.CodePoints[0],locTarget[0],locTargetBufferSize);
|
|
if (Length(p^.CodePoints) = ASearchStartPos) then begin
|
|
r := 0;
|
|
locLastChar := High(Cardinal);
|
|
end else begin
|
|
r := 1;
|
|
locLastChar := p^.CodePoints[ASearchStartPos];
|
|
end;
|
|
i := 1;
|
|
while (i < AMaxLength) do begin
|
|
IncP();
|
|
if (Length(p^.CodePoints) < locTargetLen) then
|
|
Break;
|
|
if not CompareMem(@locTarget[0],@p^.CodePoints[0],locTargetBufferSize) then
|
|
Break;
|
|
if (p^.CodePoints[ASearchStartPos] <> locLastChar) then begin
|
|
Inc(r);
|
|
locLastChar := p^.CodePoints[ASearchStartPos];
|
|
end;
|
|
Inc(i);
|
|
end;
|
|
ALineCount := i;
|
|
Result := r;
|
|
end;
|
|
|
|
function BuildTrie(
|
|
const ALinePos : Integer;
|
|
const ABookLines : PUCA_LineRec;
|
|
const AMaxLength : Integer;
|
|
const ABookIndex : TUCA_DataBookIndex
|
|
) : PTrieNode;
|
|
var
|
|
p : PUCA_LineRec;
|
|
root : PTrieNode;
|
|
ki, k, i : Integer;
|
|
key : array of TKeyType;
|
|
begin
|
|
k := ABookIndex[ALinePos];
|
|
p := @ABookLines[k];
|
|
if (Length(p^.CodePoints) = 1) then
|
|
root := CreateNode(p^.CodePoints[0],k)
|
|
else
|
|
root := CreateNode(p^.CodePoints[0]);
|
|
|
|
for i := ALinePos to ALinePos + AMaxLength - 1 do begin
|
|
k := ABookIndex[i];
|
|
p := @ABookLines[k];
|
|
if (Length(p^.CodePoints) = 1) then begin
|
|
InsertWord(root,p^.CodePoints[0],k);
|
|
end else begin
|
|
SetLength(key,Length(p^.CodePoints));
|
|
for ki := 0 to Length(p^.CodePoints) - 1 do
|
|
key[ki] := p^.CodePoints[ki];
|
|
InsertWord(root,key,k);
|
|
end;
|
|
end;
|
|
Result := root;
|
|
end;
|
|
|
|
function BoolToByte(AValue : Boolean): Byte;inline;
|
|
begin
|
|
if AValue then
|
|
Result := 1
|
|
else
|
|
Result := 0;
|
|
end;
|
|
|
|
function InternalConstructFromTrie(
|
|
const ATrie : PTrieNode;
|
|
const AItem : PUCA_PropItemRec;
|
|
const ALines : PUCA_LineRec;
|
|
const AStoreCp : Boolean
|
|
) : Cardinal;
|
|
var
|
|
i : Integer;
|
|
size : Cardinal;
|
|
p : PUCA_PropItemRec;
|
|
n : PTrieNode;
|
|
begin
|
|
if (ATrie = nil) then
|
|
exit(0);
|
|
p := AItem;
|
|
n := ATrie;
|
|
if n^.DataNode then
|
|
size := ConstructItem(p,n^.Key,1,n^.ChildCount,ALines[n^.Data].Weights,AStoreCp,@(ALines[n^.Data].Context),ALines[n^.Data].Deleted)
|
|
else
|
|
size := ConstructItem(p,n^.Key,0,n^.ChildCount,[],AStoreCp,nil,False);
|
|
Result := size;
|
|
if (n^.ChildCount > 0) then begin
|
|
for i := 0 to n^.ChildCount - 1 do begin
|
|
p := PUCA_PropItemRec(PtrUInt(p) + size);
|
|
size := InternalConstructFromTrie(n^.Children[i],p,ALines,True);
|
|
Result := Result + size;
|
|
end;
|
|
end;
|
|
AItem^.Size := Result;
|
|
end;
|
|
|
|
function ConstructFromTrie(
|
|
const ATrie : PTrieNode;
|
|
const AItem : PUCA_PropItemRec;
|
|
const ALines : PUCA_LineRec
|
|
) : Integer;
|
|
begin
|
|
Result := InternalConstructFromTrie(ATrie,AItem,ALines,False);
|
|
end;
|
|
|
|
procedure MakeUCA_Props(
|
|
ABook : PUCA_DataBook;
|
|
out AProps : PUCA_PropBook
|
|
);
|
|
var
|
|
propIndexCount : Integer;
|
|
|
|
procedure CapturePropIndex(AItem : PUCA_PropItemRec; ACodePoint : Cardinal);
|
|
begin
|
|
AProps^.Index[propIndexCount].CodePoint := ACodePoint;
|
|
AProps^.Index[propIndexCount].Position := PtrUInt(AItem) - PtrUInt(AProps^.Items);
|
|
propIndexCount := propIndexCount + 1;
|
|
end;
|
|
|
|
var
|
|
locIndex : TUCA_DataBookIndex;
|
|
i, c, k, kc : Integer;
|
|
p, p1, p2 : PUCA_PropItemRec;
|
|
lines, pl1, pl2 : PUCA_LineRec;
|
|
childCount, lineCount : Word;
|
|
size : Cardinal;
|
|
trieRoot : PTrieNode;
|
|
MaxChildCount, MaxSize : Cardinal;
|
|
childList : array of PUCA_PropItemRec;
|
|
begin
|
|
locIndex := CreateIndex(ABook);
|
|
i := Length(ABook^.Lines);
|
|
i := 30 * i * (SizeOf(TUCA_PropItemRec) + SizeOf(TUCA_PropWeights));
|
|
AProps := AllocMem(SizeOf(TUCA_PropBook));
|
|
AProps^.ItemSize := i;
|
|
AProps^.Items := AllocMem(i);
|
|
propIndexCount := 0;
|
|
SetLength(AProps^.Index,Length(ABook^.Lines));
|
|
p := AProps^.Items;
|
|
lines := @ABook^.Lines[0];
|
|
c := Length(locIndex);
|
|
i := 0;
|
|
MaxChildCount := 0; MaxSize := 0;
|
|
while (i < (c-1)) do begin
|
|
pl1 := @lines[locIndex[i]];
|
|
if not pl1^.Stored then begin
|
|
i := i + 1;
|
|
Continue;
|
|
end;
|
|
pl2 := @lines[locIndex[i+1]];
|
|
if (pl1^.CodePoints[0] <> pl2^.CodePoints[0]) then begin
|
|
if (Length(pl1^.CodePoints) = 1) then begin
|
|
size := ConstructItem(p,pl1^.CodePoints[0],1,0,pl1^.Weights,False,@pl1^.Context,pl1^.Deleted);
|
|
CapturePropIndex(p,pl1^.CodePoints[0]);
|
|
p := PUCA_PropItemRec(PtrUInt(p) + size);
|
|
if (size > MaxSize) then
|
|
MaxSize := size;
|
|
end else begin
|
|
kc := Length(pl1^.CodePoints);
|
|
SetLength(childList,kc);
|
|
for k := 0 to kc - 2 do begin
|
|
size := ConstructItem(p,pl1^.CodePoints[k],0,1,[],(k>0),nil,False);
|
|
if (k = 0) then
|
|
CapturePropIndex(p,pl1^.CodePoints[k]);
|
|
childList[k] := p;
|
|
p := PUCA_PropItemRec(PtrUInt(p) + size);
|
|
end;
|
|
size := ConstructItem(p,pl1^.CodePoints[kc-1],1,0,pl1^.Weights,True,@pl1^.Context,pl1^.Deleted);
|
|
childList[kc-1] := p;
|
|
p := PUCA_PropItemRec(PtrUInt(p) + size);
|
|
for k := kc - 2 downto 0 do begin
|
|
p1 := childList[k];
|
|
p2 := childList[k+1];
|
|
p1^.Size := p1^.Size + p2^.Size;
|
|
end;
|
|
if (p1^.Size > MaxSize) then
|
|
MaxSize := p1^.Size;
|
|
end;
|
|
lineCount := 1;
|
|
end else begin
|
|
childCount := CalcCharChildCount(1,i,lines,c,locIndex,lineCount);
|
|
if (childCount < 1) then
|
|
raise Exception.CreateFmt('Expected "child count > 1" but found %d.',[childCount]);
|
|
if (lineCount < 2) then
|
|
raise Exception.CreateFmt('Expected "line count > 2" but found %d.',[lineCount]);
|
|
if (childCount > MaxChildCount) then
|
|
MaxChildCount := childCount;
|
|
trieRoot := BuildTrie(i,lines,lineCount,locIndex);
|
|
size := ConstructFromTrie(trieRoot,p,lines);
|
|
CapturePropIndex(p,pl1^.CodePoints[0]);
|
|
FreeNode(trieRoot);
|
|
p := PUCA_PropItemRec(PtrUInt(p) + size);
|
|
if (size > MaxSize) then
|
|
MaxSize := size;
|
|
end;
|
|
i := i + lineCount;
|
|
end;
|
|
if (i = (c-1)) then begin
|
|
pl1 := @lines[locIndex[i]];
|
|
if (Length(pl1^.CodePoints) = 1) then begin
|
|
size := ConstructItem(p,pl1^.CodePoints[0],1,0,pl1^.Weights,False,@pl1^.Context,pl1^.Deleted);
|
|
CapturePropIndex(p,pl1^.CodePoints[0]);
|
|
p := PUCA_PropItemRec(PtrUInt(p) + size);
|
|
if (size > MaxSize) then
|
|
MaxSize := size;
|
|
end else begin
|
|
kc := Length(pl1^.CodePoints);
|
|
SetLength(childList,kc);
|
|
for k := 0 to kc - 2 do begin
|
|
size := ConstructItem(p,pl1^.CodePoints[k],0,1,[],(k>0),@pl1^.Context,pl1^.Deleted);
|
|
if (k = 0) then
|
|
CapturePropIndex(p,pl1^.CodePoints[0]);
|
|
childList[k] := p;
|
|
p := PUCA_PropItemRec(PtrUInt(p) + size);
|
|
end;
|
|
size := ConstructItem(p,pl1^.CodePoints[kc-1],1,0,pl1^.Weights,True,@pl1^.Context,pl1^.Deleted);
|
|
childList[kc-1] := p;
|
|
p := PUCA_PropItemRec(PtrUInt(p) + size);
|
|
for i := kc - 2 downto 0 do begin
|
|
p1 := childList[i];
|
|
p2 := childList[i+1];
|
|
p1^.Size := p1^.Size + p2^.Size;
|
|
end;
|
|
if (size > MaxSize) then
|
|
MaxSize := size;
|
|
end;
|
|
end;
|
|
//c := Int64(PtrUInt(p)) - Int64(PtrUInt(AProps^.Items));
|
|
c := UInt64(PtrUInt(p)) - UInt64(PtrUInt(AProps^.Items));
|
|
ReAllocMem(AProps^.Items,c);
|
|
AProps^.ItemSize := c;
|
|
SetLength(AProps^.Index,propIndexCount);
|
|
AProps^.ItemsOtherEndian := AllocMem(AProps^.ItemSize);
|
|
ReverseFromNativeEndian(AProps^.Items,AProps^.ItemSize,AProps^.ItemsOtherEndian);
|
|
|
|
k := 0;
|
|
c := High(Word);
|
|
for i := 0 to Length(ABook^.Lines) - 1 do begin
|
|
if (Length(ABook^.Lines[i].Weights) > 0) then begin
|
|
if (ABook^.Lines[i].Weights[0].Variable) then begin
|
|
if (ABook^.Lines[i].Weights[0].Weights[0] > k) then
|
|
k := ABook^.Lines[i].Weights[0].Weights[0];
|
|
if (ABook^.Lines[i].Weights[0].Weights[0] < c) then
|
|
c := ABook^.Lines[i].Weights[0].Weights[0];
|
|
end;
|
|
end;
|
|
end;
|
|
AProps^.VariableHighLimit := k;
|
|
AProps^.VariableLowLimit := c;
|
|
end;
|
|
|
|
procedure FreeUcaBook(var ABook : PUCA_PropBook);
|
|
var
|
|
p : PUCA_PropBook;
|
|
begin
|
|
if (ABook = nil) then
|
|
exit;
|
|
p := ABook;
|
|
ABook := nil;
|
|
p^.Index := nil;
|
|
FreeMem(p^.Items,p^.ItemSize);
|
|
FreeMem(p^.ItemsOtherEndian,p^.ItemSize);
|
|
FreeMem(p,SizeOf(p^));
|
|
end;
|
|
|
|
function IndexOf(const ACodePoint : Cardinal; APropBook : PUCA_PropBook): Integer;overload;
|
|
var
|
|
i : Integer;
|
|
begin
|
|
for i := 0 to Length(APropBook^.Index) - 1 do begin
|
|
if (ACodePoint = APropBook^.Index[i].CodePoint) then
|
|
exit(i);
|
|
end;
|
|
Result := -1;
|
|
end;
|
|
|
|
type
|
|
PucaBmpSecondTableItem = ^TucaBmpSecondTableItem;
|
|
function IndexOf(
|
|
const AItem : PucaBmpSecondTableItem;
|
|
const ATable : TucaBmpSecondTable;
|
|
const ATableActualLength : Integer
|
|
) : Integer;overload;
|
|
var
|
|
i : Integer;
|
|
p : PucaBmpSecondTableItem;
|
|
begin
|
|
Result := -1;
|
|
if (ATableActualLength > 0) then begin
|
|
p := @ATable[0];
|
|
for i := 0 to ATableActualLength - 1 do begin
|
|
if CompareMem(p,AItem,SizeOf(TucaBmpSecondTableItem)) then begin
|
|
Result := i;
|
|
Break;
|
|
end;
|
|
Inc(p);
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
procedure MakeUCA_BmpTables(
|
|
var AFirstTable : TucaBmpFirstTable;
|
|
var ASecondTable : TucaBmpSecondTable;
|
|
const APropBook : PUCA_PropBook
|
|
);
|
|
var
|
|
locLowByte, locHighByte : Byte;
|
|
locTableItem : TucaBmpSecondTableItem;
|
|
locCP : TUnicodeCodePoint;
|
|
i, locSecondActualLen : Integer;
|
|
k : Integer;
|
|
begin
|
|
SetLength(ASecondTable,120);
|
|
locSecondActualLen := 0;
|
|
for locHighByte := 0 to 255 do begin
|
|
FillChar(locTableItem,SizeOf(locTableItem),#0);
|
|
for locLowByte := 0 to 255 do begin
|
|
locCP := (locHighByte * 256) + locLowByte;
|
|
k := IndexOf(locCP,APropBook);
|
|
if (k = -1) then
|
|
k := 0
|
|
else
|
|
k := APropBook^.Index[k].Position + 1;
|
|
locTableItem[locLowByte] := k;
|
|
end;
|
|
i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen);
|
|
if (i = -1) then begin
|
|
if (locSecondActualLen = Length(ASecondTable)) then
|
|
SetLength(ASecondTable,locSecondActualLen + 50);
|
|
i := locSecondActualLen;
|
|
ASecondTable[i] := locTableItem;
|
|
Inc(locSecondActualLen);
|
|
end;
|
|
AFirstTable[locHighByte] := i;
|
|
end;
|
|
SetLength(ASecondTable,locSecondActualLen);
|
|
end;
|
|
|
|
function ToUCS4(const AHighS, ALowS : Word) : TUnicodeCodePoint; inline;
|
|
begin
|
|
//copied from utf16toutf32
|
|
Result := (UCS4Char(AHighS)-$d800) shl 10 + (UCS4Char(ALowS)-$dc00) + $10000;
|
|
end;
|
|
|
|
procedure FromUCS4(const AValue : TUnicodeCodePoint; var AHighS, ALowS : Word);
|
|
begin
|
|
AHighS := Word((AValue - $10000) shr 10 + $d800);
|
|
ALowS := Word((AValue - $10000) and $3ff + $dc00);
|
|
end;
|
|
|
|
type
|
|
PucaOBmpSecondTableItem = ^TucaOBmpSecondTableItem;
|
|
function IndexOf(
|
|
const AItem : PucaOBmpSecondTableItem;
|
|
const ATable : TucaOBmpSecondTable;
|
|
const ATableActualLength : Integer
|
|
) : Integer;overload;
|
|
var
|
|
i : Integer;
|
|
p : PucaOBmpSecondTableItem;
|
|
begin
|
|
Result := -1;
|
|
if (ATableActualLength > 0) then begin
|
|
p := @ATable[0];
|
|
for i := 0 to ATableActualLength - 1 do begin
|
|
if CompareMem(p,AItem,SizeOf(TucaOBmpSecondTableItem)) then begin
|
|
Result := i;
|
|
Break;
|
|
end;
|
|
Inc(p);
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
procedure MakeUCA_OBmpTables(
|
|
var AFirstTable : TucaOBmpFirstTable;
|
|
var ASecondTable : TucaOBmpSecondTable;
|
|
const APropBook : PUCA_PropBook
|
|
);
|
|
var
|
|
locLowByte, locHighByte : Word;
|
|
locTableItem : TucaOBmpSecondTableItem;
|
|
locCP : TUnicodeCodePoint;
|
|
i, locSecondActualLen : Integer;
|
|
k : Integer;
|
|
begin
|
|
if (Length(ASecondTable) = 0) then
|
|
SetLength(ASecondTable,2000);
|
|
locSecondActualLen := 0;
|
|
for locHighByte := 0 to HIGH_SURROGATE_COUNT - 1 do begin
|
|
FillChar(locTableItem,SizeOf(locTableItem),#0);
|
|
for locLowByte := 0 to LOW_SURROGATE_COUNT - 1 do begin
|
|
locCP := ToUCS4(HIGH_SURROGATE_BEGIN + locHighByte,LOW_SURROGATE_BEGIN + locLowByte);
|
|
k := IndexOf(locCP,APropBook);
|
|
if (k = -1) then
|
|
k := 0
|
|
else
|
|
k := APropBook^.Index[k].Position + 1;
|
|
locTableItem[locLowByte] := k;
|
|
end;
|
|
i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen);
|
|
if (i = -1) then begin
|
|
if (locSecondActualLen = Length(ASecondTable)) then
|
|
SetLength(ASecondTable,locSecondActualLen + 50);
|
|
i := locSecondActualLen;
|
|
ASecondTable[i] := locTableItem;
|
|
Inc(locSecondActualLen);
|
|
end;
|
|
AFirstTable[locHighByte] := i;
|
|
end;
|
|
SetLength(ASecondTable,locSecondActualLen);
|
|
end;
|
|
|
|
function GetPropPosition(
|
|
const AHighS,
|
|
ALowS : Word;
|
|
const AFirstTable : PucaOBmpFirstTable;
|
|
const ASecondTable : PucaOBmpSecondTable
|
|
): Integer;inline;overload;
|
|
begin
|
|
Result := ASecondTable^[AFirstTable^[AHighS-HIGH_SURROGATE_BEGIN]][ALowS-LOW_SURROGATE_BEGIN] - 1;
|
|
end;
|
|
|
|
procedure GenerateUCA_Head(
|
|
ADest : TStream;
|
|
ABook : PUCA_DataBook;
|
|
AProps : PUCA_PropBook
|
|
);
|
|
|
|
procedure AddLine(const ALine : ansistring);
|
|
var
|
|
buffer : ansistring;
|
|
begin
|
|
buffer := ALine + sLineBreak;
|
|
ADest.Write(buffer[1],Length(buffer));
|
|
end;
|
|
|
|
begin
|
|
AddLine('const');
|
|
//AddLine(' VERSION_STRING = ' + QuotedStr(ABook^.Version) + ';');
|
|
AddLine(' VARIABLE_LOW_LIMIT = ' + IntToStr(AProps^.VariableLowLimit) + ';');
|
|
AddLine(' VARIABLE_HIGH_LIMIT = ' + IntToStr(AProps^.VariableHighLimit) + ';');
|
|
AddLine(' VARIABLE_WEIGHT = ' + IntToStr(Ord(ABook^.VariableWeight)) + ';');
|
|
AddLine(' BACKWARDS_0 = ' + BoolToStr(ABook^.Backwards[0],'True','False') + ';');
|
|
AddLine(' BACKWARDS_1 = ' + BoolToStr(ABook^.Backwards[1],'True','False') + ';');
|
|
AddLine(' BACKWARDS_2 = ' + BoolToStr(ABook^.Backwards[2],'True','False') + ';');
|
|
AddLine(' BACKWARDS_3 = ' + BoolToStr(ABook^.Backwards[3],'True','False') + ';');
|
|
AddLine(' PROP_COUNT = ' + IntToStr(Ord(AProps^.ItemSize)) + ';');
|
|
|
|
AddLine('');
|
|
end;
|
|
|
|
procedure GenerateUCA_BmpTables(
|
|
AStream,
|
|
ANativeEndianStream,
|
|
ANonNativeEndianStream : TStream;
|
|
var AFirstTable : TucaBmpFirstTable;
|
|
var ASecondTable : TucaBmpSecondTable
|
|
);
|
|
|
|
procedure AddLine(AOut : TStream; const ALine : ansistring);
|
|
var
|
|
buffer : ansistring;
|
|
begin
|
|
buffer := ALine + sLineBreak;
|
|
AOut.Write(buffer[1],Length(buffer));
|
|
end;
|
|
|
|
var
|
|
i, j, c : Integer;
|
|
locLine : string;
|
|
value : UInt24;
|
|
begin
|
|
AddLine(AStream,'const');
|
|
AddLine(AStream,' UCA_TABLE_1 : array[0..255] of Byte = (');
|
|
locLine := '';
|
|
for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
|
|
locLine := locLine + IntToStr(AFirstTable[i]) + ',';
|
|
if (((i+1) mod 16) = 0) then begin
|
|
locLine := ' ' + locLine;
|
|
AddLine(AStream,locLine);
|
|
locLine := '';
|
|
end;
|
|
end;
|
|
locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
|
|
locLine := ' ' + locLine;
|
|
AddLine(AStream,locLine);
|
|
AddLine(AStream,' );' + sLineBreak);
|
|
|
|
AddLine(ANativeEndianStream,'const');
|
|
AddLine(ANativeEndianStream,' UCA_TABLE_2 : array[0..(256*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =(');
|
|
c := High(ASecondTable);
|
|
for i := Low(ASecondTable) to c do begin
|
|
locLine := '';
|
|
for j := Low(TucaBmpSecondTableItem) to High(TucaBmpSecondTableItem) do begin
|
|
value := ASecondTable[i][j];
|
|
locLine := locLine + UInt24ToStr(value,ENDIAN_NATIVE) + ',';
|
|
if (((j+1) mod 7) = 0) then begin
|
|
if (i = c) and (j = High(TucaBmpSecondTableItem)) then
|
|
Delete(locLine,Length(locLine),1);
|
|
locLine := ' ' + locLine;
|
|
AddLine(ANativeEndianStream,locLine);
|
|
locLine := '';
|
|
end;
|
|
end;
|
|
if (locLine <> '') then begin
|
|
if (i = c) then
|
|
Delete(locLine,Length(locLine),1);
|
|
locLine := ' ' + locLine;
|
|
AddLine(ANativeEndianStream,locLine);
|
|
end;
|
|
end;
|
|
AddLine(ANativeEndianStream,' );' + sLineBreak);
|
|
|
|
AddLine(ANonNativeEndianStream,'const');
|
|
AddLine(ANonNativeEndianStream,' UCA_TABLE_2 : array[0..(256*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =(');
|
|
c := High(ASecondTable);
|
|
for i := Low(ASecondTable) to c do begin
|
|
locLine := '';
|
|
for j := Low(TucaBmpSecondTableItem) to High(TucaBmpSecondTableItem) do begin
|
|
value := ASecondTable[i][j];
|
|
locLine := locLine + UInt24ToStr(value,ENDIAN_NON_NATIVE) + ',';
|
|
if (((j+1) mod 7) = 0) then begin
|
|
if (i = c) and (j = High(TucaBmpSecondTableItem)) then
|
|
Delete(locLine,Length(locLine),1);
|
|
locLine := ' ' + locLine;
|
|
AddLine(ANonNativeEndianStream,locLine);
|
|
locLine := '';
|
|
end;
|
|
end;
|
|
if (locLine <> '') then begin
|
|
if (i = c) then
|
|
Delete(locLine,Length(locLine),1);
|
|
locLine := ' ' + locLine;
|
|
AddLine(ANonNativeEndianStream,locLine);
|
|
end;
|
|
end;
|
|
AddLine(ANonNativeEndianStream,' );' + sLineBreak);
|
|
end;
|
|
|
|
procedure GenerateBinaryUCA_BmpTables(
|
|
ANativeEndianStream,
|
|
ANonNativeEndianStream : TStream;
|
|
var AFirstTable : TucaBmpFirstTable;
|
|
var ASecondTable : TucaBmpSecondTable
|
|
);
|
|
var
|
|
i, j : Integer;
|
|
value : UInt24;
|
|
begin
|
|
ANativeEndianStream.Write(AFirstTable[0],Length(AFirstTable));
|
|
ANonNativeEndianStream.Write(AFirstTable[0],Length(AFirstTable));
|
|
for i := Low(ASecondTable) to High(ASecondTable) do begin
|
|
for j := Low(TucaBmpSecondTableItem) to High(TucaBmpSecondTableItem) do begin
|
|
value := ASecondTable[i][j];
|
|
ANativeEndianStream.Write(value,SizeOf(value));
|
|
ReverseBytes(value,SizeOf(value));
|
|
ANonNativeEndianStream.Write(value,SizeOf(value));
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
procedure GenerateUCA_PropTable(
|
|
// WARNING : files must be generated for each endianess (Little / Big)
|
|
ADest : TStream;
|
|
const APropBook : PUCA_PropBook;
|
|
const AEndian : TEndianKind
|
|
);
|
|
|
|
procedure AddLine(const ALine : ansistring);
|
|
var
|
|
buffer : ansistring;
|
|
begin
|
|
buffer := ALine + sLineBreak;
|
|
ADest.Write(buffer[1],Length(buffer));
|
|
end;
|
|
|
|
var
|
|
i, c : Integer;
|
|
locLine : string;
|
|
p : PByte;
|
|
begin
|
|
c := APropBook^.ItemSize;
|
|
AddLine('const');
|
|
AddLine(' UCA_PROPS : array[0..' + IntToStr(c-1) + '] of Byte = (');
|
|
locLine := '';
|
|
if (AEndian = ENDIAN_NATIVE) then
|
|
p := PByte(APropBook^.Items)
|
|
else
|
|
p := PByte(APropBook^.ItemsOtherEndian);
|
|
for i := 0 to c - 2 do begin
|
|
locLine := locLine + IntToStr(p[i]) + ',';
|
|
if (((i+1) mod 60) = 0) then begin
|
|
locLine := ' ' + locLine;
|
|
AddLine(locLine);
|
|
locLine := '';
|
|
end;
|
|
end;
|
|
locLine := locLine + IntToStr(p[c-1]);
|
|
locLine := ' ' + locLine;
|
|
AddLine(locLine);
|
|
AddLine(' );' + sLineBreak);
|
|
end;
|
|
|
|
procedure GenerateBinaryUCA_PropTable(
|
|
// WARNING : files must be generated for each endianess (Little / Big)
|
|
ANativeEndianStream,
|
|
ANonNativeEndianStream : TStream;
|
|
const APropBook : PUCA_PropBook
|
|
);
|
|
begin
|
|
ANativeEndianStream.Write(APropBook^.Items^,APropBook^.ItemSize);
|
|
ANonNativeEndianStream.Write(APropBook^.ItemsOtherEndian^,APropBook^.ItemSize);
|
|
end;
|
|
|
|
procedure GenerateUCA_OBmpTables(
|
|
AStream,
|
|
ANativeEndianStream,
|
|
ANonNativeEndianStream : TStream;
|
|
var AFirstTable : TucaOBmpFirstTable;
|
|
var ASecondTable : TucaOBmpSecondTable
|
|
);
|
|
|
|
procedure AddLine(AOut : TStream; const ALine : ansistring);
|
|
var
|
|
buffer : ansistring;
|
|
begin
|
|
buffer := ALine + sLineBreak;
|
|
AOut.Write(buffer[1],Length(buffer));
|
|
end;
|
|
|
|
var
|
|
i, j, c : Integer;
|
|
locLine : string;
|
|
value : UInt24;
|
|
begin
|
|
AddLine(AStream,'const');
|
|
AddLine(AStream,' UCAO_TABLE_1 : array[0..' + IntToStr(HIGH_SURROGATE_COUNT-1) + '] of Word = (');
|
|
locLine := '';
|
|
for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
|
|
locLine := locLine + IntToStr(AFirstTable[i]) + ',';
|
|
if (((i+1) mod 16) = 0) then begin
|
|
locLine := ' ' + locLine;
|
|
AddLine(AStream,locLine);
|
|
locLine := '';
|
|
end;
|
|
end;
|
|
locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
|
|
locLine := ' ' + locLine;
|
|
AddLine(AStream,locLine);
|
|
AddLine(AStream,' );' + sLineBreak);
|
|
|
|
AddLine(ANativeEndianStream,' UCAO_TABLE_2 : array[0..('+IntToStr(LOW_SURROGATE_COUNT)+'*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =(');
|
|
c := High(ASecondTable);
|
|
for i := Low(ASecondTable) to c do begin
|
|
locLine := '';
|
|
for j := Low(TucaOBmpSecondTableItem) to High(TucaOBmpSecondTableItem) do begin
|
|
value := ASecondTable[i][j];
|
|
locLine := locLine + UInt24ToStr(value,ENDIAN_NATIVE) + ',';
|
|
if (((j+1) mod 7) = 0) then begin
|
|
if (i = c) and (j = High(TucaOBmpSecondTableItem)) then
|
|
Delete(locLine,Length(locLine),1);
|
|
locLine := ' ' + locLine;
|
|
AddLine(ANativeEndianStream,locLine);
|
|
locLine := '';
|
|
end;
|
|
end;
|
|
if (locLine <> '') then begin
|
|
if (i = c) then
|
|
Delete(locLine,Length(locLine),1);
|
|
locLine := ' ' + locLine;
|
|
AddLine(ANativeEndianStream,locLine);
|
|
end;
|
|
end;
|
|
AddLine(ANativeEndianStream,' );' + sLineBreak);
|
|
|
|
AddLine(ANonNativeEndianStream,' UCAO_TABLE_2 : array[0..('+IntToStr(LOW_SURROGATE_COUNT)+'*' + IntToStr(Length(ASecondTable)) +'-1)] of UInt24 =(');
|
|
c := High(ASecondTable);
|
|
for i := Low(ASecondTable) to c do begin
|
|
locLine := '';
|
|
for j := Low(TucaOBmpSecondTableItem) to High(TucaOBmpSecondTableItem) do begin
|
|
value := ASecondTable[i][j];
|
|
locLine := locLine + UInt24ToStr(value,ENDIAN_NON_NATIVE) + ',';
|
|
if (((j+1) mod 7) = 0) then begin
|
|
if (i = c) and (j = High(TucaOBmpSecondTableItem)) then
|
|
Delete(locLine,Length(locLine),1);
|
|
locLine := ' ' + locLine;
|
|
AddLine(ANonNativeEndianStream,locLine);
|
|
locLine := '';
|
|
end;
|
|
end;
|
|
if (locLine <> '') then begin
|
|
if (i = c) then
|
|
Delete(locLine,Length(locLine),1);
|
|
locLine := ' ' + locLine;
|
|
AddLine(ANonNativeEndianStream,locLine);
|
|
end;
|
|
end;
|
|
AddLine(ANonNativeEndianStream,' );' + sLineBreak);
|
|
end;
|
|
|
|
procedure GenerateBinaryUCA_OBmpTables(
|
|
ANativeEndianStream,
|
|
ANonNativeEndianStream : TStream;
|
|
var AFirstTable : TucaOBmpFirstTable;
|
|
var ASecondTable : TucaOBmpSecondTable
|
|
);
|
|
var
|
|
i, j : Integer;
|
|
locLine : string;
|
|
wordValue : Word;
|
|
value : UInt24;
|
|
begin
|
|
for i := Low(AFirstTable) to High(AFirstTable) do begin
|
|
wordValue := AFirstTable[i];
|
|
ANativeEndianStream.Write(wordValue,SizeOf(wordValue));
|
|
ReverseBytes(wordValue,SizeOf(wordValue));
|
|
ANonNativeEndianStream.Write(wordValue,SizeOf(wordValue));
|
|
end;
|
|
|
|
for i := Low(ASecondTable) to High(ASecondTable) do begin
|
|
for j := Low(TucaOBmpSecondTableItem) to High(TucaOBmpSecondTableItem) do begin
|
|
value := ASecondTable[i][j];
|
|
ANativeEndianStream.Write(value,SizeOf(value));
|
|
ReverseBytes(value,SizeOf(value));
|
|
ANonNativeEndianStream.Write(value,SizeOf(value));
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
type
|
|
POBmpSecondTableItem = ^TOBmpSecondTableItem;
|
|
function IndexOf(
|
|
const AItem : POBmpSecondTableItem;
|
|
const ATable : TOBmpSecondTable;
|
|
const ATableActualLength : Integer
|
|
) : Integer;overload;
|
|
var
|
|
i : Integer;
|
|
p : POBmpSecondTableItem;
|
|
begin
|
|
Result := -1;
|
|
if (ATableActualLength > 0) then begin
|
|
p := @ATable[0];
|
|
for i := 0 to ATableActualLength - 1 do begin
|
|
if CompareMem(p,AItem,SizeOf(TOBmpSecondTableItem)) then begin
|
|
Result := i;
|
|
Break;
|
|
end;
|
|
Inc(p);
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
procedure MakeOBmpTables(
|
|
var AFirstTable : TOBmpFirstTable;
|
|
var ASecondTable : TOBmpSecondTable;
|
|
const ADataLineList : TDataLineRecArray
|
|
);
|
|
var
|
|
locLowByte, locHighByte : Word;
|
|
locTableItem : TOBmpSecondTableItem;
|
|
locCP : TUnicodeCodePoint;
|
|
i, locSecondActualLen : Integer;
|
|
begin
|
|
SetLength(ASecondTable,2000);
|
|
locSecondActualLen := 0;
|
|
for locHighByte := 0 to HIGH_SURROGATE_COUNT - 1 do begin
|
|
FillChar(locTableItem,SizeOf(locTableItem),#0);
|
|
for locLowByte := 0 to LOW_SURROGATE_COUNT - 1 do begin
|
|
locCP := ToUCS4(HIGH_SURROGATE_BEGIN + locHighByte,LOW_SURROGATE_BEGIN + locLowByte);
|
|
locTableItem[locLowByte] := GetPropID(locCP,ADataLineList)// - 1;
|
|
end;
|
|
i := IndexOf(@locTableItem,ASecondTable,locSecondActualLen);
|
|
if (i = -1) then begin
|
|
if (locSecondActualLen = Length(ASecondTable)) then
|
|
SetLength(ASecondTable,locSecondActualLen + 50);
|
|
i := locSecondActualLen;
|
|
ASecondTable[i] := locTableItem;
|
|
Inc(locSecondActualLen);
|
|
end;
|
|
AFirstTable[locHighByte] := i;
|
|
end;
|
|
SetLength(ASecondTable,locSecondActualLen);
|
|
end;
|
|
|
|
type
|
|
P3lvlOBmp3TableItem = ^T3lvlOBmp3TableItem;
|
|
function IndexOf(
|
|
const AItem : P3lvlOBmp3TableItem;
|
|
const ATable : T3lvlOBmp3Table;
|
|
const ATableActualLength : Integer
|
|
) : Integer;overload;
|
|
var
|
|
i : Integer;
|
|
p : P3lvlOBmp3TableItem;
|
|
begin
|
|
Result := -1;
|
|
if (ATableActualLength > 0) then begin
|
|
p := @ATable[0];
|
|
for i := 0 to ATableActualLength - 1 do begin
|
|
if CompareMem(p,AItem,SizeOf(T3lvlOBmp3TableItem)) then begin
|
|
Result := i;
|
|
Break;
|
|
end;
|
|
Inc(p);
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
type
|
|
P3lvlOBmp2TableItem = ^T3lvlOBmp2TableItem;
|
|
function IndexOf(
|
|
const AItem : P3lvlOBmp2TableItem;
|
|
const ATable : T3lvlOBmp2Table
|
|
) : Integer;overload;
|
|
var
|
|
i : Integer;
|
|
p : P3lvlOBmp2TableItem;
|
|
begin
|
|
Result := -1;
|
|
if (Length(ATable) > 0) then begin
|
|
p := @ATable[0];
|
|
for i := 0 to Length(ATable) - 1 do begin
|
|
if CompareMem(p,AItem,SizeOf(T3lvlOBmp2TableItem)) then begin
|
|
Result := i;
|
|
Break;
|
|
end;
|
|
Inc(p);
|
|
end;
|
|
end;
|
|
end;
|
|
procedure MakeOBmpTables3Levels(
|
|
var AFirstTable : T3lvlOBmp1Table;
|
|
var ASecondTable : T3lvlOBmp2Table;
|
|
var AThirdTable : T3lvlOBmp3Table;
|
|
const ADataLineList : TDataLineRecArray
|
|
);
|
|
var
|
|
locLowByte0, locLowByte1, locHighByte : Word;
|
|
locTableItem2 : T3lvlOBmp2TableItem;
|
|
locTableItem3 : T3lvlOBmp3TableItem;
|
|
locCP : TUnicodeCodePoint;
|
|
i, locThirdActualLen : Integer;
|
|
begin
|
|
SetLength(AThirdTable,120);
|
|
locThirdActualLen := 0;
|
|
for locHighByte := 0 to 1023 do begin
|
|
FillChar(locTableItem2,SizeOf(locTableItem2),#0);
|
|
for locLowByte0 := 0 to 31 do begin
|
|
FillChar(locTableItem3,SizeOf(locTableItem3),#0);
|
|
for locLowByte1 := 0 to 31 do begin
|
|
locCP := ToUCS4(HIGH_SURROGATE_BEGIN + locHighByte,LOW_SURROGATE_BEGIN + (locLowByte0*32) + locLowByte1);
|
|
locTableItem3[locLowByte1] := GetPropID(locCP,ADataLineList);
|
|
end;
|
|
i := IndexOf(@locTableItem3,AThirdTable,locThirdActualLen);
|
|
if (i = -1) then begin
|
|
if (locThirdActualLen = Length(AThirdTable)) then
|
|
SetLength(AThirdTable,locThirdActualLen + 50);
|
|
i := locThirdActualLen;
|
|
AThirdTable[i] := locTableItem3;
|
|
Inc(locThirdActualLen);
|
|
end;
|
|
locTableItem2[locLowByte0] := i;
|
|
end;
|
|
i := IndexOf(@locTableItem2,ASecondTable);
|
|
if (i = -1) then begin
|
|
i := Length(ASecondTable);
|
|
SetLength(ASecondTable,(i + 1));
|
|
ASecondTable[i] := locTableItem2;
|
|
end;
|
|
AFirstTable[locHighByte] := i;
|
|
end;
|
|
SetLength(AThirdTable,locThirdActualLen);
|
|
end;
|
|
|
|
procedure GenerateOBmpTables(
|
|
ADest : TStream;
|
|
var AFirstTable : TOBmpFirstTable;
|
|
var ASecondTable : TOBmpSecondTable
|
|
);
|
|
procedure AddLine(const ALine : ansistring);
|
|
var
|
|
buffer : ansistring;
|
|
begin
|
|
buffer := ALine + sLineBreak;
|
|
ADest.Write(buffer[1],Length(buffer));
|
|
end;
|
|
|
|
var
|
|
i, j, c : Integer;
|
|
locLine : string;
|
|
begin
|
|
AddLine('const');
|
|
AddLine(' UCO_TABLE_1 : array[0..' + IntToStr(HIGH_SURROGATE_COUNT-1) + '] of Word = (');
|
|
locLine := '';
|
|
for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
|
|
locLine := locLine + IntToStr(AFirstTable[i]) + ',';
|
|
if (((i+1) mod 20) = 0) then begin
|
|
locLine := ' ' + locLine;
|
|
AddLine(locLine);
|
|
locLine := '';
|
|
end;
|
|
end;
|
|
locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
|
|
locLine := ' ' + locLine;
|
|
AddLine(locLine);
|
|
AddLine(' );' + sLineBreak);
|
|
|
|
AddLine(' UCO_TABLE_2 : array[0..('+IntToStr(LOW_SURROGATE_COUNT)+'*' + IntToStr(Length(ASecondTable)) +'-1)] of Word =(');
|
|
c := High(ASecondTable);
|
|
for i := Low(ASecondTable) to c do begin
|
|
locLine := '';
|
|
for j := Low(TOBmpSecondTableItem) to High(TOBmpSecondTableItem) do begin
|
|
locLine := locLine + IntToStr(ASecondTable[i][j]) + ',';
|
|
if (((j+1) mod 16) = 0) then begin
|
|
if (i = c) and (j = High(TOBmpSecondTableItem)) then
|
|
Delete(locLine,Length(locLine),1);
|
|
locLine := ' ' + locLine;
|
|
AddLine(locLine);
|
|
locLine := '';
|
|
end;
|
|
end;
|
|
end;
|
|
AddLine(' );' + sLineBreak);
|
|
end;
|
|
|
|
|
|
//----------------------------------
|
|
procedure Generate3lvlOBmpTables(
|
|
ADest : TStream;
|
|
var AFirstTable : T3lvlOBmp1Table;
|
|
var ASecondTable : T3lvlOBmp2Table;
|
|
var AThirdTable : T3lvlOBmp3Table
|
|
);
|
|
|
|
procedure AddLine(const ALine : ansistring);
|
|
var
|
|
buffer : ansistring;
|
|
begin
|
|
buffer := ALine + sLineBreak;
|
|
ADest.Write(buffer[1],Length(buffer));
|
|
end;
|
|
|
|
var
|
|
i, j, c : Integer;
|
|
locLine : string;
|
|
begin
|
|
AddLine('const');
|
|
AddLine(' UCO_TABLE_1 : array[0..1023] of Word = (');
|
|
locLine := '';
|
|
for i := Low(AFirstTable) to High(AFirstTable) - 1 do begin
|
|
locLine := locLine + IntToStr(AFirstTable[i]) + ',';
|
|
if (((i+1) mod 20) = 0) then begin
|
|
locLine := ' ' + locLine;
|
|
AddLine(locLine);
|
|
locLine := '';
|
|
end;
|
|
end;
|
|
locLine := locLine + IntToStr(AFirstTable[High(AFirstTable)]);
|
|
locLine := ' ' + locLine;
|
|
AddLine(locLine);
|
|
AddLine(' );' + sLineBreak);
|
|
|
|
AddLine(' UCO_TABLE_2 : array[0..' + IntToStr(Length(ASecondTable)-1) +'] of array[0..31] of Word = (');
|
|
c := High(ASecondTable);
|
|
for i := Low(ASecondTable) to c do begin
|
|
locLine := '(';
|
|
for j := Low(T3lvlOBmp2TableItem) to High(T3lvlOBmp2TableItem) do
|
|
locLine := locLine + IntToStr(ASecondTable[i][j]) + ',';
|
|
Delete(locLine,Length(locLine),1);
|
|
locLine := ' ' + locLine + ')';
|
|
if (i < c) then
|
|
locLine := locLine + ',';
|
|
AddLine(locLine);
|
|
end;
|
|
AddLine(' );' + sLineBreak);
|
|
|
|
AddLine(' UCO_TABLE_3 : array[0..' + IntToStr(Length(AThirdTable)-1) +'] of array[0..31] of Word = (');
|
|
c := High(AThirdTable);
|
|
for i := Low(AThirdTable) to c do begin
|
|
locLine := '(';
|
|
for j := Low(T3lvlOBmp3TableItem) to High(T3lvlOBmp3TableItem) do
|
|
locLine := locLine + IntToStr(AThirdTable[i][j]) + ',';
|
|
Delete(locLine,Length(locLine),1);
|
|
locLine := ' ' + locLine + ')';
|
|
if (i < c) then
|
|
locLine := locLine + ',';
|
|
AddLine(locLine);
|
|
end;
|
|
AddLine(' );' + sLineBreak);
|
|
end;
|
|
|
|
function GetProp(
|
|
const AHighS,
|
|
ALowS : Word;
|
|
const AProps : TPropRecArray;
|
|
var AFirstTable : TOBmpFirstTable;
|
|
var ASecondTable : TOBmpSecondTable
|
|
): PPropRec;
|
|
begin
|
|
Result := @AProps[ASecondTable[AFirstTable[AHighS-HIGH_SURROGATE_BEGIN]][ALowS-LOW_SURROGATE_BEGIN]];
|
|
end;
|
|
|
|
function GetProp(
|
|
const AHighS,
|
|
ALowS : Word;
|
|
const AProps : TPropRecArray;
|
|
var AFirstTable : T3lvlOBmp1Table;
|
|
var ASecondTable : T3lvlOBmp2Table;
|
|
var AThirdTable : T3lvlOBmp3Table
|
|
): PPropRec;
|
|
begin
|
|
Result := @AProps[AThirdTable[ASecondTable[AFirstTable[AHighS]][ALowS div 32]][ALowS mod 32]];
|
|
//Result := @AProps[ASecondTable[AFirstTable[AHighS-HIGH_SURROGATE_BEGIN]][ALowS-LOW_SURROGATE_BEGIN]];
|
|
end;
|
|
|
|
{ TUCA_PropItemContextTreeRec }
|
|
|
|
function TUCA_PropItemContextTreeRec.GetData : PUCA_PropItemContextTreeNodeRec;
|
|
begin
|
|
if (Size = 0) then
|
|
Result := nil
|
|
else
|
|
Result := PUCA_PropItemContextTreeNodeRec(
|
|
PtrUInt(
|
|
PtrUInt(@Self) + SizeOf(UInt24){Size}
|
|
)
|
|
);
|
|
end;
|
|
|
|
{ TUCA_LineContextRec }
|
|
|
|
procedure TUCA_LineContextRec.Clear;
|
|
begin
|
|
Data := nil
|
|
end;
|
|
|
|
procedure TUCA_LineContextRec.Assign(ASource : PUCA_LineContextRec);
|
|
var
|
|
c, i : Integer;
|
|
begin
|
|
if (ASource = nil) then begin
|
|
Clear();
|
|
exit;
|
|
end;
|
|
c := Length(ASource^.Data);
|
|
SetLength(Self.Data,c);
|
|
for i := 0 to c-1 do
|
|
Self.Data[i].Assign(@ASource^.Data[i]);
|
|
end;
|
|
|
|
function TUCA_LineContextRec.Clone : TUCA_LineContextRec;
|
|
begin
|
|
Result.Clear();
|
|
Result.Assign(@Self);
|
|
end;
|
|
|
|
{ TUCA_LineContextItemRec }
|
|
|
|
procedure TUCA_LineContextItemRec.Clear();
|
|
begin
|
|
CodePoints := nil;
|
|
Weights := nil;
|
|
end;
|
|
|
|
procedure TUCA_LineContextItemRec.Assign(ASource : PUCA_LineContextItemRec);
|
|
begin
|
|
if (ASource = nil) then begin
|
|
Clear();
|
|
exit;
|
|
end;
|
|
Self.CodePoints := Copy(ASource^.CodePoints);
|
|
Self.Weights := Copy(ASource^.Weights);
|
|
end;
|
|
|
|
function TUCA_LineContextItemRec.Clone() : TUCA_LineContextItemRec;
|
|
begin
|
|
Result.Clear();
|
|
Result.Assign(@Self);
|
|
end;
|
|
|
|
{ TUCA_LineRec }
|
|
|
|
procedure TUCA_LineRec.Clear;
|
|
begin
|
|
CodePoints := nil;
|
|
Weights := nil;
|
|
Deleted := False;
|
|
Stored := False;
|
|
Context.Clear();
|
|
end;
|
|
|
|
procedure TUCA_LineRec.Assign(ASource : PUCA_LineRec);
|
|
begin
|
|
if (ASource = nil) then begin
|
|
Clear();
|
|
exit;
|
|
end;
|
|
Self.CodePoints := Copy(ASource^.CodePoints);
|
|
Self.Weights := Copy(ASource^.Weights);
|
|
Self.Deleted := ASource^.Deleted;
|
|
Self.Stored := ASource^.Stored;
|
|
Self.Context.Assign(@ASource^.Context);
|
|
end;
|
|
|
|
function TUCA_LineRec.Clone : TUCA_LineRec;
|
|
begin
|
|
Result.Clear();
|
|
Result.Assign(@Self);
|
|
end;
|
|
|
|
function TUCA_LineRec.HasContext() : Boolean;
|
|
begin
|
|
Result := (Length(Context.Data) > 0);
|
|
end;
|
|
|
|
{ TPropRec }
|
|
|
|
function TPropRec.GetCategory: TUnicodeCategory;
|
|
begin
|
|
Result := TUnicodeCategory((CategoryData and Byte($F8)) shr 3);
|
|
end;
|
|
|
|
function TPropRec.GetUnifiedIdeograph : Boolean;
|
|
begin
|
|
Result := IsBitON(CategoryData,FLAG_UNIFIED_IDEOGRAPH);
|
|
end;
|
|
|
|
procedure TPropRec.SetCategory(AValue: TUnicodeCategory);
|
|
var
|
|
b : Byte;
|
|
begin
|
|
b := Ord(AValue);
|
|
b := b shl 3;
|
|
CategoryData := CategoryData or b;
|
|
//CategoryData := CategoryData or Byte(Byte(Ord(AValue)) shl 3);
|
|
end;
|
|
|
|
function TPropRec.GetWhiteSpace: Boolean;
|
|
begin
|
|
Result := IsBitON(CategoryData,FLAG_WHITE_SPACE);
|
|
end;
|
|
|
|
procedure TPropRec.SetUnifiedIdeograph(AValue : Boolean);
|
|
begin
|
|
SetBit(CategoryData,FLAG_UNIFIED_IDEOGRAPH,AValue);
|
|
end;
|
|
|
|
procedure TPropRec.SetWhiteSpace(AValue: Boolean);
|
|
begin
|
|
SetBit(CategoryData,FLAG_WHITE_SPACE,AValue);
|
|
end;
|
|
|
|
function TPropRec.GetHangulSyllable: Boolean;
|
|
begin
|
|
Result := IsBitON(CategoryData,FLAG_HANGUL_SYLLABLE);
|
|
end;
|
|
|
|
procedure TPropRec.SetHangulSyllable(AValue: Boolean);
|
|
begin
|
|
SetBit(CategoryData,FLAG_HANGUL_SYLLABLE,AValue);
|
|
end;
|
|
|
|
{ TUCA_PropItemRec }
|
|
|
|
function TUCA_PropItemRec.GetWeightSize : Word;
|
|
var
|
|
c : Integer;
|
|
begin
|
|
c := WeightLength;
|
|
if (c = 0) then
|
|
exit(0);
|
|
Result := c*SizeOf(TUCA_PropWeights);
|
|
if IsWeightCompress_1() then
|
|
Result := Result - 1;
|
|
if IsWeightCompress_2() then
|
|
Result := Result - 1;
|
|
end;
|
|
|
|
function TUCA_PropItemRec.HasCodePoint(): Boolean;
|
|
begin
|
|
Result := IsBitON(Flags,FLAG_CODEPOINT);
|
|
end;
|
|
|
|
procedure TUCA_PropItemRec.GetWeightArray(ADest: PUCA_PropWeights);
|
|
var
|
|
c : Integer;
|
|
p : PByte;
|
|
pd : PUCA_PropWeights;
|
|
begin
|
|
c := WeightLength;
|
|
p := PByte(PtrUInt(@Self) + SizeOf(TUCA_PropItemRec));
|
|
pd := ADest;
|
|
pd^.Weights[0] := PWord(p)^;
|
|
p := p + 2;
|
|
if not IsWeightCompress_1() then begin
|
|
pd^.Weights[1] := PWord(p)^;
|
|
p := p + 2;
|
|
end else begin
|
|
pd^.Weights[1] := p^;
|
|
p := p + 1;
|
|
end;
|
|
if not IsWeightCompress_2() then begin
|
|
pd^.Weights[2] := PWord(p)^;
|
|
p := p + 2;
|
|
end else begin
|
|
pd^.Weights[2] := p^;
|
|
p := p + 1;
|
|
end;
|
|
if (c > 1) then
|
|
Move(p^, (pd+1)^, ((c-1)*SizeOf(TUCA_PropWeights)));
|
|
end;
|
|
|
|
function TUCA_PropItemRec.GetSelfOnlySize() : Cardinal;
|
|
begin
|
|
Result := SizeOf(TUCA_PropItemRec);
|
|
if (WeightLength > 0) then begin
|
|
Result := Result + (WeightLength * Sizeof(TUCA_PropWeights));
|
|
if IsWeightCompress_1() then
|
|
Result := Result - 1;
|
|
if IsWeightCompress_2() then
|
|
Result := Result - 1;
|
|
end;
|
|
if HasCodePoint() then
|
|
Result := Result + SizeOf(UInt24);
|
|
if Contextual then
|
|
Result := Result + Cardinal(GetContext()^.Size);
|
|
end;
|
|
|
|
procedure TUCA_PropItemRec.SetContextual(AValue : Boolean);
|
|
begin
|
|
SetBit(Flags,FLAG_CONTEXTUAL,AValue);
|
|
end;
|
|
|
|
function TUCA_PropItemRec.GetContextual : Boolean;
|
|
begin
|
|
Result := IsBitON(Flags,FLAG_CONTEXTUAL);
|
|
end;
|
|
|
|
function TUCA_PropItemRec.GetContext() : PUCA_PropItemContextTreeRec;
|
|
var
|
|
p : PtrUInt;
|
|
begin
|
|
if not Contextual then
|
|
exit(nil);
|
|
p := PtrUInt(@Self) + SizeOf(TUCA_PropItemRec);
|
|
if IsBitON(Flags,FLAG_CODEPOINT) then
|
|
p := p + SizeOf(UInt24);
|
|
Result := PUCA_PropItemContextTreeRec(p);
|
|
end;
|
|
|
|
procedure TUCA_PropItemRec.SetDeleted(AValue: Boolean);
|
|
begin
|
|
SetBit(Flags,FLAG_DELETION,AValue);
|
|
end;
|
|
|
|
function TUCA_PropItemRec.IsDeleted: Boolean;
|
|
begin
|
|
Result := IsBitON(Flags,FLAG_DELETION);
|
|
end;
|
|
|
|
function TUCA_PropItemRec.IsValid() : Boolean;
|
|
begin
|
|
Result := IsBitON(Flags,FLAG_VALID);
|
|
end;
|
|
|
|
function TUCA_PropItemRec.IsWeightCompress_1 : Boolean;
|
|
begin
|
|
Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_1);
|
|
end;
|
|
|
|
function TUCA_PropItemRec.IsWeightCompress_2 : Boolean;
|
|
begin
|
|
Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_2);
|
|
end;
|
|
|
|
function TUCA_PropItemRec.GetCodePoint: UInt24;
|
|
begin
|
|
if HasCodePoint() then begin
|
|
if Contextual then
|
|
Result := PUInt24(
|
|
PtrUInt(@Self) + Self.GetSelfOnlySize()- SizeOf(UInt24) -
|
|
Cardinal(GetContext()^.Size)
|
|
)^
|
|
else
|
|
Result := PUInt24(PtrUInt(@Self) + Self.GetSelfOnlySize() - SizeOf(UInt24))^
|
|
end else begin
|
|
raise Exception.Create('TUCA_PropItemRec.GetCodePoint : "No code point available."');
|
|
end
|
|
end;
|
|
|
|
function avl_CompareCodePoints(Item1, Item2: Pointer): Integer;
|
|
var
|
|
a, b : PUCA_LineContextItemRec;
|
|
i, hb : Integer;
|
|
begin
|
|
if (Item1 = Item2) then
|
|
exit(0);
|
|
if (Item1 = nil) then
|
|
exit(-1);
|
|
if (Item2 = nil) then
|
|
exit(1);
|
|
a := Item1;
|
|
b := Item2;
|
|
if (a^.CodePoints = b^.CodePoints) then
|
|
exit(0);
|
|
Result := 1;
|
|
hb := Length(b^.CodePoints) - 1;
|
|
for i := 0 to Length(a^.CodePoints) - 1 do begin
|
|
if (i > hb) then
|
|
exit;
|
|
if (a^.CodePoints[i] < b^.CodePoints[i]) then
|
|
exit(-1);
|
|
if (a^.CodePoints[i] > b^.CodePoints[i]) then
|
|
exit(1);
|
|
end;
|
|
if (Length(a^.CodePoints) = Length(b^.CodePoints)) then
|
|
exit(0);
|
|
exit(-1);
|
|
end;
|
|
|
|
function ConstructAvlContextTree(AContext : PUCA_LineContextRec) : TAVLTree;
|
|
var
|
|
r : TAVLTree;
|
|
i : Integer;
|
|
begin
|
|
r := TAVLTree.Create(@avl_CompareCodePoints);
|
|
try
|
|
for i := 0 to Length(AContext^.Data) - 1 do
|
|
r.Add(@AContext^.Data[i]);
|
|
Result := r;
|
|
except
|
|
FreeAndNil(r);
|
|
raise;
|
|
end;
|
|
end;
|
|
|
|
function ConstructContextTree(
|
|
const AContext : PUCA_LineContextRec;
|
|
var ADestBuffer;
|
|
const ADestBufferLength : Cardinal
|
|
) : PUCA_PropItemContextTreeRec;
|
|
|
|
function CalcItemOnlySize(AItem : TAVLTreeNode) : Cardinal;
|
|
var
|
|
kitem : PUCA_LineContextItemRec;
|
|
begin
|
|
if (AItem = nil) then
|
|
exit(0);
|
|
kitem := AItem.Data;
|
|
Result := SizeOf(PUCA_PropItemContextTreeNodeRec^.Left) +
|
|
SizeOf(PUCA_PropItemContextTreeNodeRec^.Right) +
|
|
SizeOf(PUCA_PropItemContextRec^.CodePointCount) +
|
|
(Length(kitem^.CodePoints)*SizeOf(UInt24)) +
|
|
SizeOf(PUCA_PropItemContextRec^.WeightCount) +
|
|
(Length(kitem^.Weights)*SizeOf(TUCA_PropWeights));
|
|
end;
|
|
|
|
function CalcItemSize(AItem : TAVLTreeNode) : Cardinal;
|
|
begin
|
|
if (AItem = nil) then
|
|
exit(0);
|
|
Result := CalcItemOnlySize(AItem);
|
|
if (AItem.Left <> nil) then
|
|
Result := Result + CalcItemSize(AItem.Left);
|
|
if (AItem.Right <> nil) then
|
|
Result := Result + CalcItemSize(AItem.Right);
|
|
end;
|
|
|
|
function CalcSize(AData : TAVLTree) : Cardinal;
|
|
begin
|
|
Result := SizeOf(PUCA_PropItemContextTreeRec^.Size) + CalcItemSize(AData.Root);
|
|
end;
|
|
|
|
function ConstructItem(ASource : TAVLTreeNode; ADest : PUCA_PropItemContextTreeNodeRec) : Cardinal;
|
|
var
|
|
k : Integer;
|
|
kitem : PUCA_LineContextItemRec;
|
|
kpcp : PUInt24;
|
|
kpw : PUCA_PropWeights;
|
|
pextra : PtrUInt;
|
|
pnext : PUCA_PropItemContextTreeNodeRec;
|
|
begin
|
|
kitem := ASource.Data;
|
|
ADest^.Data.CodePointCount := Length(kitem^.CodePoints);
|
|
ADest^.Data.WeightCount := Length(kitem^.Weights);
|
|
pextra := PtrUInt(ADest)+SizeOf(ADest^.Left)+SizeOf(ADest^.Right)+
|
|
SizeOf(ADest^.Data.CodePointCount)+SizeOf(ADest^.Data.WeightCount);
|
|
if (ADest^.Data.CodePointCount > 0) then begin
|
|
kpcp := PUInt24(pextra);
|
|
for k := 0 to ADest^.Data.CodePointCount - 1 do begin
|
|
kpcp^ := kitem^.CodePoints[k];
|
|
Inc(kpcp);
|
|
end;
|
|
end;
|
|
if (ADest^.Data.WeightCount > 0) then begin
|
|
kpw := PUCA_PropWeights(pextra + (ADest^.Data.CodePointCount*SizeOf(UInt24)));
|
|
for k := 0 to ADest^.Data.WeightCount - 1 do begin
|
|
kpw^.Weights[0] := kitem^.Weights[k].Weights[0];
|
|
kpw^.Weights[1] := kitem^.Weights[k].Weights[1];
|
|
kpw^.Weights[2] := kitem^.Weights[k].Weights[2];
|
|
Inc(kpw);
|
|
end;
|
|
end;
|
|
Result := CalcItemOnlySize(ASource);
|
|
if (ASource.Left <> nil) then begin
|
|
pnext := PUCA_PropItemContextTreeNodeRec(PtrUInt(ADest) + Result);
|
|
ADest^.Left := Result;
|
|
Result := Result + ConstructItem(ASource.Left,pnext);
|
|
end else begin
|
|
ADest^.Left := 0;
|
|
end;
|
|
if (ASource.Right <> nil) then begin
|
|
pnext := PUCA_PropItemContextTreeNodeRec(PtrUInt(ADest) + Result);
|
|
ADest^.Right := Result;
|
|
Result := Result + ConstructItem(ASource.Right,pnext);
|
|
end else begin
|
|
ADest^.Right := 0;
|
|
end;
|
|
end;
|
|
|
|
var
|
|
c : PtrUInt;
|
|
r : PUCA_PropItemContextTreeRec;
|
|
p : PUCA_PropItemContextTreeNodeRec;
|
|
tempTree : TAVLTree;
|
|
begin
|
|
tempTree := ConstructAvlContextTree(AContext);
|
|
try
|
|
c := CalcSize(tempTree);
|
|
if (ADestBufferLength > 0) and (c > ADestBufferLength) then
|
|
raise Exception.Create(SInsufficientMemoryBuffer);
|
|
r := @ADestBuffer;
|
|
r^.Size := c;
|
|
p := PUCA_PropItemContextTreeNodeRec(PtrUInt(r) + SizeOf(r^.Size));
|
|
ConstructItem(tempTree.Root,p);
|
|
finally
|
|
tempTree.Free();
|
|
end;
|
|
Result := r;
|
|
end;
|
|
|
|
procedure ReverseRecordBytes(var AItem : TSerializedCollationHeader);
|
|
begin
|
|
ReverseBytes(AItem.BMP_Table1Length,SizeOf(AItem.BMP_Table1Length));
|
|
ReverseBytes(AItem.BMP_Table2Length,SizeOf(AItem.BMP_Table2Length));
|
|
ReverseBytes(AItem.OBMP_Table1Length,SizeOf(AItem.OBMP_Table1Length));
|
|
ReverseBytes(AItem.OBMP_Table2Length,SizeOf(AItem.OBMP_Table2Length));
|
|
ReverseBytes(AItem.PropCount,SizeOf(AItem.PropCount));
|
|
ReverseBytes(AItem.VariableLowLimit,SizeOf(AItem.VariableLowLimit));
|
|
ReverseBytes(AItem.VariableHighLimit,SizeOf(AItem.VariableHighLimit));
|
|
end;
|
|
|
|
procedure ReverseBytes(var AData; const ALength : Integer);
|
|
var
|
|
i,j : PtrInt;
|
|
c : Byte;
|
|
p : PByte;
|
|
begin
|
|
if (ALength = 1) then
|
|
exit;
|
|
p := @AData;
|
|
j := ALength div 2;
|
|
for i := 0 to Pred(j) do begin
|
|
c := p[i];
|
|
p[i] := p[(ALength - 1 ) - i];
|
|
p[(ALength - 1 ) - i] := c;
|
|
end;
|
|
end;
|
|
|
|
procedure ReverseArray(var AValue; const AArrayLength, AItemSize : PtrInt);
|
|
var
|
|
p : PByte;
|
|
i : PtrInt;
|
|
begin
|
|
if ( AArrayLength > 0 ) and ( AItemSize > 1 ) then begin
|
|
p := @AValue;
|
|
for i := 0 to Pred(AArrayLength) do begin
|
|
ReverseBytes(p^,AItemSize);
|
|
Inc(p,AItemSize);
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
procedure ReverseContextNodeFromNativeEndian(s, d : PUCA_PropItemContextTreeNodeRec);
|
|
var
|
|
k : PtrUInt;
|
|
p_s, p_d : PByte;
|
|
begin
|
|
d^.Left := s^.Left;
|
|
ReverseBytes(d^.Left,SizeOf(d^.Left));
|
|
d^.Right := s^.Right;
|
|
ReverseBytes(d^.Right,SizeOf(d^.Right));
|
|
d^.Data.CodePointCount := s^.Data.CodePointCount;
|
|
ReverseBytes(d^.Data.CodePointCount,SizeOf(d^.Data.CodePointCount));
|
|
d^.Data.WeightCount := s^.Data.WeightCount;
|
|
ReverseBytes(d^.Data.WeightCount,SizeOf(d^.Data.WeightCount));
|
|
|
|
k := SizeOf(TUCA_PropItemContextTreeNodeRec);
|
|
p_s := PByte(PtrUInt(s) + k);
|
|
p_d := PByte(PtrUInt(d) + k);
|
|
k := (s^.Data.CodePointCount*SizeOf(UInt24));
|
|
Move(p_s^,p_d^, k);
|
|
ReverseArray(p_d^,s^.Data.CodePointCount,SizeOf(UInt24));
|
|
p_s := PByte(PtrUInt(p_s) + k);
|
|
p_d := PByte(PtrUInt(p_d) + k);
|
|
k := (s^.Data.WeightCount*SizeOf(TUCA_PropWeights));
|
|
Move(p_s^,p_d^,k);
|
|
ReverseArray(p_d^,(s^.Data.WeightCount*Length(TUCA_PropWeights.Weights)),SizeOf(TUCA_PropWeights.Weights[0]));
|
|
if (s^.Left > 0) then
|
|
ReverseContextNodeFromNativeEndian(
|
|
PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + s^.Left),
|
|
PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + s^.Left)
|
|
);
|
|
if (s^.Right > 0) then
|
|
ReverseContextNodeFromNativeEndian(
|
|
PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + s^.Right),
|
|
PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + s^.Right)
|
|
);
|
|
end;
|
|
|
|
procedure ReverseContextFromNativeEndian(s, d : PUCA_PropItemContextTreeRec);
|
|
var
|
|
k : PtrUInt;
|
|
begin
|
|
d^.Size := s^.Size;
|
|
ReverseBytes(d^.Size,SizeOf(d^.Size));
|
|
if (s^.Size = 0) then
|
|
exit;
|
|
k := SizeOf(s^.Size);
|
|
ReverseContextNodeFromNativeEndian(
|
|
PUCA_PropItemContextTreeNodeRec(PtrUInt(s)+k),
|
|
PUCA_PropItemContextTreeNodeRec(PtrUInt(d)+k)
|
|
);
|
|
end;
|
|
|
|
procedure ReverseFromNativeEndian(
|
|
const AData : PUCA_PropItemRec;
|
|
const ADataLen : Cardinal;
|
|
const ADest : PUCA_PropItemRec
|
|
);
|
|
var
|
|
s, d : PUCA_PropItemRec;
|
|
sCtx, dCtx : PUCA_PropItemContextTreeRec;
|
|
dataEnd : PtrUInt;
|
|
k, i : PtrUInt;
|
|
p_s, p_d : PByte;
|
|
pw_s, pw_d : PUCA_PropWeights;
|
|
begin
|
|
dataEnd := PtrUInt(AData) + ADataLen;
|
|
s := AData;
|
|
d := ADest;
|
|
while True do begin
|
|
d^.WeightLength := s^.WeightLength;
|
|
ReverseBytes(d^.WeightLength,SizeOf(d^.WeightLength));
|
|
d^.ChildCount := s^.ChildCount;
|
|
ReverseBytes(d^.ChildCount,SizeOf(d^.ChildCount));
|
|
d^.Size := s^.Size;
|
|
ReverseBytes(d^.Size,SizeOf(d^.Size));
|
|
d^.Flags := s^.Flags;
|
|
ReverseBytes(d^.Flags,SizeOf(d^.Flags));
|
|
if s^.Contextual then begin
|
|
k := SizeOf(TUCA_PropItemRec);
|
|
if s^.HasCodePoint() then
|
|
k := k + SizeOf(UInt24);
|
|
sCtx := PUCA_PropItemContextTreeRec(PtrUInt(s) + k);
|
|
dCtx := PUCA_PropItemContextTreeRec(PtrUInt(d) + k);
|
|
ReverseContextFromNativeEndian(sCtx,dCtx);
|
|
end;
|
|
if s^.HasCodePoint() then begin
|
|
if s^.Contextual then
|
|
k := s^.GetSelfOnlySize()- SizeOf(UInt24) - Cardinal(s^.GetContext()^.Size)
|
|
else
|
|
k := s^.GetSelfOnlySize() - SizeOf(UInt24);
|
|
p_s := PByte(PtrUInt(s) + k);
|
|
p_d := PByte(PtrUInt(d) + k);
|
|
Unaligned(PUInt24(p_d)^) := Unaligned(PUInt24(p_s)^);
|
|
ReverseBytes(p_d^,SizeOf(UInt24));
|
|
end;
|
|
if (s^.WeightLength > 0) then begin
|
|
k := SizeOf(TUCA_PropItemRec);
|
|
p_s := PByte(PtrUInt(s) + k);
|
|
p_d := PByte(PtrUInt(d) + k);
|
|
k := SizeOf(Word);
|
|
Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
|
|
ReverseBytes(Unaligned(p_d^),k);
|
|
p_s := PByte(PtrUInt(p_s) + k);
|
|
p_d := PByte(PtrUInt(p_d) + k);
|
|
if s^.IsWeightCompress_1() then begin
|
|
k := SizeOf(Byte);
|
|
PByte(p_d)^ := PByte(p_s)^;
|
|
end else begin
|
|
k := SizeOf(Word);
|
|
Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
|
|
end;
|
|
ReverseBytes(p_d^,k);
|
|
p_s := PByte(PtrUInt(p_s) + k);
|
|
p_d := PByte(PtrUInt(p_d) + k);
|
|
if s^.IsWeightCompress_2() then begin
|
|
k := SizeOf(Byte);
|
|
PByte(p_d)^ := PByte(p_s)^;
|
|
end else begin
|
|
k := SizeOf(Word);
|
|
Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
|
|
end;
|
|
ReverseBytes(p_d^,k);
|
|
if (s^.WeightLength > 1) then begin
|
|
pw_s := PUCA_PropWeights(PtrUInt(p_s) + k);
|
|
pw_d := PUCA_PropWeights(PtrUInt(p_d) + k);
|
|
for i := 1 to s^.WeightLength - 1 do begin
|
|
pw_d^.Weights[0] := pw_s^.Weights[0];
|
|
pw_d^.Weights[1] := pw_s^.Weights[1];
|
|
pw_d^.Weights[2] := pw_s^.Weights[2];
|
|
ReverseArray(pw_d^,3,SizeOf(pw_s^.Weights[0]));
|
|
Inc(pw_s);
|
|
Inc(pw_d);
|
|
end;
|
|
end;
|
|
end;
|
|
k := s^.GetSelfOnlySize();
|
|
s := PUCA_PropItemRec(PtrUInt(s)+k);
|
|
d := PUCA_PropItemRec(PtrUInt(d)+k);
|
|
if (PtrUInt(s) >= dataEnd) then
|
|
Break;
|
|
end;
|
|
if ( (PtrUInt(s)-PtrUInt(AData)) <> (PtrUInt(d)-PtrUInt(ADest)) ) then
|
|
raise Exception.CreateFmt('Read data length(%d) differs from written data length(%d).',[(PtrUInt(s)-PtrUInt(AData)), (PtrUInt(d)-PtrUInt(ADest))]);
|
|
end;
|
|
//------------------------------------------------------------------------------
|
|
|
|
procedure ReverseContextNodeToNativeEndian(s, d : PUCA_PropItemContextTreeNodeRec);
|
|
var
|
|
k : PtrUInt;
|
|
p_s, p_d : PByte;
|
|
begin
|
|
d^.Left := s^.Left;
|
|
ReverseBytes(d^.Left,SizeOf(d^.Left));
|
|
d^.Right := s^.Right;
|
|
ReverseBytes(d^.Right,SizeOf(d^.Right));
|
|
d^.Data.CodePointCount := s^.Data.CodePointCount;
|
|
ReverseBytes(d^.Data.CodePointCount,SizeOf(d^.Data.CodePointCount));
|
|
d^.Data.WeightCount := s^.Data.WeightCount;
|
|
ReverseBytes(d^.Data.WeightCount,SizeOf(d^.Data.WeightCount));
|
|
|
|
k := SizeOf(TUCA_PropItemContextTreeNodeRec);
|
|
p_s := PByte(PtrUInt(s) + k);
|
|
p_d := PByte(PtrUInt(d) + k);
|
|
k := (d^.Data.CodePointCount*SizeOf(UInt24));
|
|
Move(p_s^,p_d^, k);
|
|
ReverseArray(p_d^,d^.Data.CodePointCount,SizeOf(UInt24));
|
|
p_s := PByte(PtrUInt(p_s) + k);
|
|
p_d := PByte(PtrUInt(p_d) + k);
|
|
k := (d^.Data.WeightCount*SizeOf(TUCA_PropWeights));
|
|
Move(p_s^,p_d^,k);
|
|
ReverseArray(p_d^,(d^.Data.WeightCount*Length(TUCA_PropWeights.Weights)),SizeOf(TUCA_PropWeights.Weights[0]));
|
|
if (d^.Left > 0) then
|
|
ReverseContextNodeToNativeEndian(
|
|
PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + d^.Left),
|
|
PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + d^.Left)
|
|
);
|
|
if (d^.Right > 0) then
|
|
ReverseContextNodeToNativeEndian(
|
|
PUCA_PropItemContextTreeNodeRec(PtrUInt(s) + d^.Right),
|
|
PUCA_PropItemContextTreeNodeRec(PtrUInt(d) + d^.Right)
|
|
);
|
|
end;
|
|
|
|
procedure ReverseContextToNativeEndian(s, d : PUCA_PropItemContextTreeRec);
|
|
var
|
|
k : PtrUInt;
|
|
begin
|
|
d^.Size := s^.Size;
|
|
ReverseBytes(d^.Size,SizeOf(d^.Size));
|
|
if (s^.Size = 0) then
|
|
exit;
|
|
k := SizeOf(s^.Size);
|
|
ReverseContextNodeToNativeEndian(
|
|
PUCA_PropItemContextTreeNodeRec(PtrUInt(s)+k),
|
|
PUCA_PropItemContextTreeNodeRec(PtrUInt(d)+k)
|
|
);
|
|
end;
|
|
|
|
procedure ReverseToNativeEndian(
|
|
const AData : PUCA_PropItemRec;
|
|
const ADataLen : Cardinal;
|
|
const ADest : PUCA_PropItemRec
|
|
);
|
|
var
|
|
s, d : PUCA_PropItemRec;
|
|
sCtx, dCtx : PUCA_PropItemContextTreeRec;
|
|
dataEnd : PtrUInt;
|
|
k, i : PtrUInt;
|
|
p_s, p_d : PByte;
|
|
pw_s, pw_d : PUCA_PropWeights;
|
|
begin
|
|
dataEnd := PtrUInt(AData) + ADataLen;
|
|
s := AData;
|
|
d := ADest;
|
|
while True do begin
|
|
d^.WeightLength := s^.WeightLength;
|
|
ReverseBytes(d^.WeightLength,SizeOf(d^.WeightLength));
|
|
d^.ChildCount := s^.ChildCount;
|
|
ReverseBytes(d^.ChildCount,SizeOf(d^.ChildCount));
|
|
d^.Size := s^.Size;
|
|
ReverseBytes(d^.Size,SizeOf(d^.Size));
|
|
d^.Flags := s^.Flags;
|
|
ReverseBytes(d^.Flags,SizeOf(d^.Flags));
|
|
if d^.Contextual then begin
|
|
k := SizeOf(TUCA_PropItemRec);
|
|
if d^.HasCodePoint() then
|
|
k := k + SizeOf(UInt24);
|
|
sCtx := PUCA_PropItemContextTreeRec(PtrUInt(s) + k);
|
|
dCtx := PUCA_PropItemContextTreeRec(PtrUInt(d) + k);
|
|
ReverseContextToNativeEndian(sCtx,dCtx);
|
|
end;
|
|
if d^.HasCodePoint() then begin
|
|
if d^.Contextual then
|
|
k := d^.GetSelfOnlySize()- SizeOf(UInt24) - Cardinal(d^.GetContext()^.Size)
|
|
else
|
|
k := d^.GetSelfOnlySize() - SizeOf(UInt24);
|
|
p_s := PByte(PtrUInt(s) + k);
|
|
p_d := PByte(PtrUInt(d) + k);
|
|
Unaligned(PUInt24(p_d)^) := Unaligned(PUInt24(p_s)^);
|
|
ReverseBytes(p_d^,SizeOf(UInt24));
|
|
end;
|
|
if (d^.WeightLength > 0) then begin
|
|
k := SizeOf(TUCA_PropItemRec);
|
|
p_s := PByte(PtrUInt(s) + k);
|
|
p_d := PByte(PtrUInt(d) + k);
|
|
k := SizeOf(Word);
|
|
Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
|
|
ReverseBytes(p_d^,k);
|
|
p_s := PByte(PtrUInt(p_s) + k);
|
|
p_d := PByte(PtrUInt(p_d) + k);
|
|
if d^.IsWeightCompress_1() then begin
|
|
k := SizeOf(Byte);
|
|
PByte(p_d)^ := PByte(p_s)^;
|
|
end else begin
|
|
k := SizeOf(Word);
|
|
Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
|
|
end;
|
|
ReverseBytes(p_d^,k);
|
|
p_s := PByte(PtrUInt(p_s) + k);
|
|
p_d := PByte(PtrUInt(p_d) + k);
|
|
if d^.IsWeightCompress_2() then begin
|
|
k := SizeOf(Byte);
|
|
PByte(p_d)^ := PByte(p_s)^;
|
|
end else begin
|
|
k := SizeOf(Word);
|
|
Unaligned(PWord(p_d)^) := Unaligned(PWord(p_s)^);
|
|
end;
|
|
ReverseBytes(p_d^,k);
|
|
if (d^.WeightLength > 1) then begin
|
|
pw_s := PUCA_PropWeights(PtrUInt(p_s) + k);
|
|
pw_d := PUCA_PropWeights(PtrUInt(p_d) + k);
|
|
for i := 1 to d^.WeightLength - 1 do begin
|
|
pw_d^.Weights[0] := pw_s^.Weights[0];
|
|
pw_d^.Weights[1] := pw_s^.Weights[1];
|
|
pw_d^.Weights[2] := pw_s^.Weights[2];
|
|
ReverseArray(pw_d^,3,SizeOf(pw_s^.Weights[0]));
|
|
Inc(pw_s);
|
|
Inc(pw_d);
|
|
end;
|
|
end;
|
|
end;
|
|
k := d^.GetSelfOnlySize();
|
|
s := PUCA_PropItemRec(PtrUInt(s)+k);
|
|
d := PUCA_PropItemRec(PtrUInt(d)+k);
|
|
if (PtrUInt(s) >= dataEnd) then
|
|
Break;
|
|
end;
|
|
if ( (PtrUInt(s)-PtrUInt(AData)) <> (PtrUInt(d)-PtrUInt(ADest)) ) then
|
|
raise Exception.CreateFmt('Read data length(%d) differs from written data length(%d).',[(PtrUInt(s)-PtrUInt(AData)), (PtrUInt(d)-PtrUInt(ADest))]);
|
|
end;
|
|
|
|
procedure Check(const ACondition : Boolean; const AMsg : string);overload;
|
|
begin
|
|
if not ACondition then
|
|
raise Exception.Create(AMsg);
|
|
end;
|
|
|
|
procedure Check(
|
|
const ACondition : Boolean;
|
|
const AFormatMsg : string;
|
|
const AArgs : array of const
|
|
);overload;
|
|
begin
|
|
Check(ACondition,Format(AFormatMsg,AArgs));
|
|
end;
|
|
|
|
procedure Check(const ACondition : Boolean);overload;
|
|
begin
|
|
Check(ACondition,'Check failed.')
|
|
end;
|
|
|
|
procedure CompareWeights(a, b : PUCA_PropWeights; const ALength : Integer);
|
|
var
|
|
i : Integer;
|
|
begin
|
|
if (ALength > 0) then begin
|
|
for i := 0 to ALength - 1 do begin
|
|
Check(a[i].Weights[0]=b[i].Weights[0]);
|
|
Check(a[i].Weights[1]=b[i].Weights[1]);
|
|
Check(a[i].Weights[2]=b[i].Weights[2]);
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
procedure CompareCodePoints(a, b : PUInt24; const ALength : Integer);
|
|
var
|
|
i : Integer;
|
|
begin
|
|
if (ALength > 0) then begin
|
|
for i := 0 to ALength - 1 do
|
|
Check(a[i]=b[i]);
|
|
end;
|
|
end;
|
|
|
|
procedure CompareContextNode(AProp1, AProp2 : PUCA_PropItemContextTreeNodeRec);
|
|
var
|
|
a, b : PUCA_PropItemContextTreeNodeRec;
|
|
k : Cardinal;
|
|
begin
|
|
if (AProp1=nil) then begin
|
|
Check(AProp2=nil);
|
|
exit;
|
|
end;
|
|
a := AProp1;
|
|
b := AProp2;
|
|
Check(a^.Left=b^.Left);
|
|
Check(a^.Right=b^.Right);
|
|
Check(a^.Data.CodePointCount=b^.Data.CodePointCount);
|
|
Check(a^.Data.WeightCount=b^.Data.WeightCount);
|
|
k := SizeOf(a^.Data);
|
|
CompareCodePoints(
|
|
PUInt24(PtrUInt(a)+k),
|
|
PUInt24(PtrUInt(b)+k),
|
|
a^.Data.CodePointCount
|
|
);
|
|
k := SizeOf(a^.Data)+ (a^.Data.CodePointCount*SizeOf(UInt24));
|
|
CompareWeights(
|
|
PUCA_PropWeights(PtrUInt(a)+k),
|
|
PUCA_PropWeights(PtrUInt(b)+k),
|
|
a^.Data.WeightCount
|
|
);
|
|
if (a^.Left > 0) then begin
|
|
k := a^.Left;
|
|
CompareContextNode(
|
|
PUCA_PropItemContextTreeNodeRec(PtrUInt(a)+k),
|
|
PUCA_PropItemContextTreeNodeRec(PtrUInt(b)+k)
|
|
);
|
|
end;
|
|
if (a^.Right > 0) then begin
|
|
k := a^.Right;
|
|
CompareContextNode(
|
|
PUCA_PropItemContextTreeNodeRec(PtrUInt(a)+k),
|
|
PUCA_PropItemContextTreeNodeRec(PtrUInt(b)+k)
|
|
);
|
|
end;
|
|
end;
|
|
|
|
procedure CompareContext(AProp1, AProp2 : PUCA_PropItemContextTreeRec);
|
|
var
|
|
a, b : PUCA_PropItemContextTreeNodeRec;
|
|
k : Integer;
|
|
begin
|
|
if (AProp1=nil) then begin
|
|
Check(AProp2=nil);
|
|
exit;
|
|
end;
|
|
Check(AProp1^.Size=AProp2^.Size);
|
|
k := Cardinal(AProp1^.Size);
|
|
a := PUCA_PropItemContextTreeNodeRec(PtrUInt(AProp1)+k);
|
|
b := PUCA_PropItemContextTreeNodeRec(PtrUInt(AProp2)+k);
|
|
CompareContextNode(a,b);
|
|
end;
|
|
|
|
procedure CompareProps(const AProp1, AProp2 : PUCA_PropItemRec; const ADataLen : Integer);
|
|
var
|
|
a, b, pend : PUCA_PropItemRec;
|
|
wa, wb : array of TUCA_PropWeights;
|
|
k : Integer;
|
|
begin
|
|
if (ADataLen <= 0) then
|
|
exit;
|
|
a := PUCA_PropItemRec(AProp1);
|
|
b := PUCA_PropItemRec(AProp2);
|
|
pend := PUCA_PropItemRec(PtrUInt(AProp1)+ADataLen);
|
|
while (a<pend) do begin
|
|
Check(a^.WeightLength=b^.WeightLength);
|
|
Check(a^.ChildCount=b^.ChildCount);
|
|
Check(a^.Size=b^.Size);
|
|
Check(a^.Flags=b^.Flags);
|
|
if a^.HasCodePoint() then
|
|
Check(a^.CodePoint = b^.CodePoint);
|
|
if (a^.WeightLength > 0) then begin
|
|
k := a^.WeightLength;
|
|
SetLength(wa,k);
|
|
SetLength(wb,k);
|
|
a^.GetWeightArray(@wa[0]);
|
|
b^.GetWeightArray(@wb[0]);
|
|
CompareWeights(@wa[0],@wb[0],k);
|
|
end;
|
|
if a^.Contextual then
|
|
CompareContext(a^.GetContext(),b^.GetContext());
|
|
Check(a^.GetSelfOnlySize()=b^.GetSelfOnlySize());
|
|
k := a^.GetSelfOnlySize();
|
|
a := PUCA_PropItemRec(PtrUInt(a)+k);
|
|
b := PUCA_PropItemRec(PtrUInt(b)+k);
|
|
end;
|
|
end;
|
|
|
|
Procedure QuickSort(AList : PCardinal; L, R : Longint);overload;
|
|
var
|
|
I, J : Longint;
|
|
P, Q : Cardinal;
|
|
begin
|
|
repeat
|
|
I := L;
|
|
J := R;
|
|
P := AList[ (L + R) div 2 ];
|
|
repeat
|
|
while (P > AList[i]) do
|
|
I := I + 1;
|
|
while (P < AList[J]) do
|
|
J := J - 1;
|
|
If I <= J then
|
|
begin
|
|
Q := AList[I];
|
|
AList[I] := AList[J];
|
|
AList[J] := Q;
|
|
I := I + 1;
|
|
J := J - 1;
|
|
end;
|
|
until I > J;
|
|
if J - L < R - I then
|
|
begin
|
|
if L < J then
|
|
QuickSort(AList, L, J);
|
|
L := I;
|
|
end
|
|
else
|
|
begin
|
|
if I < R then
|
|
QuickSort(AList, I, R);
|
|
R := J;
|
|
end;
|
|
until L >= R;
|
|
end;
|
|
|
|
function CalcMaxLevel2Count(
|
|
const ALevel1Value : Cardinal;
|
|
ALines : array of TUCA_LineRec
|
|
) : Integer;
|
|
var
|
|
i, c, k : Integer;
|
|
ac : Integer;
|
|
items : array of Cardinal;
|
|
p : PUCA_LineRec;
|
|
pw : ^TUCA_WeightRec;
|
|
begin
|
|
c := Length(ALines);
|
|
if (c < 1) then
|
|
exit(0);
|
|
SetLength(items,0);
|
|
ac := 0;
|
|
p := @ALines[Low(ALines)];
|
|
for i := 0 to c-1 do begin
|
|
if (Length(p^.Weights) > 0) then begin
|
|
pw := @p^.Weights[Low(p^.Weights)];
|
|
for k := 0 to Length(p^.Weights)-1 do begin
|
|
if (pw^.Weights[0] = ALevel1Value) then begin
|
|
if (ac = 0) or (IndexDWord(items[0],ac,pw^.Weights[1]) < 0) then begin
|
|
if (ac >= Length(items)) then
|
|
SetLength(items,Length(items)+256);
|
|
items[ac] := pw^.Weights[1];
|
|
ac := ac+1;
|
|
end;
|
|
end;
|
|
Inc(pw);
|
|
end;
|
|
end;
|
|
Inc(p);
|
|
end;
|
|
Result := ac;
|
|
end;
|
|
|
|
function RewriteLevel2(
|
|
const ALevel1Value : Cardinal;
|
|
ALines : PUCA_LineRec;
|
|
const ALinesLength : Integer
|
|
) : Integer;
|
|
var
|
|
i, c, k : Integer;
|
|
ac : Integer;
|
|
items : array of Cardinal;
|
|
p : PUCA_LineRec;
|
|
pw : ^TUCA_WeightRec;
|
|
newValue : Int64;
|
|
begin
|
|
c := ALinesLength;
|
|
if (c < 1) then
|
|
exit(0);
|
|
SetLength(items,256);
|
|
ac := 0;
|
|
p := ALines;
|
|
for i := 0 to c-1 do begin
|
|
if (Length(p^.Weights) > 0) then begin
|
|
for k := 0 to Length(p^.Weights)-1 do begin
|
|
pw := @p^.Weights[k];
|
|
if (pw^.Weights[0] = ALevel1Value) then begin
|
|
if (ac = 0) or (IndexDWord(items[0],ac,pw^.Weights[1]) < 0) then begin
|
|
if (ac >= Length(items)) then
|
|
SetLength(items,Length(items)+256);
|
|
items[ac] := pw^.Weights[1];
|
|
ac := ac+1;
|
|
end;
|
|
end;
|
|
end;
|
|
end;
|
|
Inc(p);
|
|
end;
|
|
SetLength(items,ac);
|
|
if (ac > 1) then
|
|
QuickSort(@items[0],0,(ac-1));
|
|
|
|
p := ALines;
|
|
for i := 0 to c-1 do begin
|
|
if (Length(p^.Weights) > 0) then begin
|
|
for k := 0 to Length(p^.Weights)-1 do begin
|
|
pw := @p^.Weights[k];
|
|
if (pw^.Weights[0] = ALevel1Value) then begin
|
|
newValue := IndexDWord(items[0],ac,pw^.Weights[1]);
|
|
if (newValue < 0) then
|
|
raise Exception.CreateFmt('level 2 value %d missed in rewrite of level 1 value of %d.',[pw^.Weights[1],ALevel1Value]);
|
|
pw^.Weights[1] := newValue;//+1;
|
|
end;
|
|
end;
|
|
end;
|
|
Inc(p);
|
|
end;
|
|
if (Length(items) > 0) then
|
|
Result := items[Length(items)-1]
|
|
else
|
|
Result := 0;
|
|
end;
|
|
|
|
procedure RewriteLevel2Values(ALines : PUCA_LineRec; ALength : Integer);
|
|
var
|
|
c, i, ac, k : Integer;
|
|
p : PUCA_LineRec;
|
|
level1List : array of Cardinal;
|
|
pw : ^TUCA_WeightRec;
|
|
begin
|
|
c := ALength;
|
|
if (c < 1) then
|
|
exit;
|
|
ac := 0;
|
|
SetLength(level1List,c);
|
|
p := ALines;
|
|
for i := 0 to c-1 do begin
|
|
if (Length(p^.Weights) > 0) then begin
|
|
for k := 0 to Length(p^.Weights)-1 do begin
|
|
pw := @p^.Weights[k];
|
|
if (ac = 0) or (IndexDWord(level1List[0],ac,pw^.Weights[0]) < 0) then begin
|
|
if (ac >= Length(level1List)) then
|
|
SetLength(level1List,ac+1000);
|
|
level1List[ac] := pw^.Weights[0];
|
|
RewriteLevel2(level1List[ac],ALines,ALength);
|
|
ac := ac+1;
|
|
end;
|
|
end;
|
|
end;
|
|
Inc(p);
|
|
end;
|
|
end;
|
|
|
|
function CalcMaxLevel2Value(ALines : array of TUCA_LineRec) : Cardinal;
|
|
var
|
|
i, c, k, tempValue : Integer;
|
|
p : PUCA_LineRec;
|
|
maxLevel : Cardinal;
|
|
maxValue : Integer;
|
|
begin
|
|
c := Length(ALines);
|
|
if (c < 2) then
|
|
exit(0);
|
|
maxLevel := 0;
|
|
maxValue := CalcMaxLevel2Count(maxLevel,ALines);
|
|
p := @ALines[Low(ALines)+1];
|
|
for i := 1 to c-1 do begin
|
|
if (Length(p^.Weights) > 0) then begin
|
|
for k := 0 to Length(p^.Weights)-1 do begin
|
|
if (p^.Weights[k].Weights[0] <> maxLevel) then begin
|
|
tempValue := CalcMaxLevel2Count(p^.Weights[k].Weights[0],ALines);
|
|
if (tempValue > maxValue) then begin
|
|
maxLevel := p^.Weights[k].Weights[0];
|
|
maxValue := tempValue;
|
|
end;
|
|
end;
|
|
end;
|
|
end;
|
|
Inc(p);
|
|
end;
|
|
Result := maxValue;
|
|
end;
|
|
|
|
initialization
|
|
FS := DefaultFormatSettings;
|
|
FS.DecimalSeparator := '.';
|
|
|
|
end.
|