mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-05 09:18:15 +02:00
4202 lines
115 KiB
ObjectPascal
4202 lines
115 KiB
ObjectPascal
{ Unicode tables unit.
|
||
|
||
Copyright (c) 2013 by Inoussa OUEDRAOGO
|
||
|
||
The source code is distributed under the Library GNU
|
||
General Public License with the following modification:
|
||
|
||
- object files and libraries linked into an application may be
|
||
distributed without source code.
|
||
|
||
If you didn't receive a copy of the file COPYING, contact:
|
||
Free Software Foundation
|
||
675 Mass Ave
|
||
Cambridge, MA 02139
|
||
USA
|
||
|
||
This program is distributed in the hope that it will be useful,
|
||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||
|
||
-------------------------------------------------------------------------------
|
||
|
||
Overview of the Unicode Collation Algorithm(UCA) data layout :
|
||
============================================================
|
||
|
||
The UCA data(see “TUCA_DataBook”) are organized into index data
|
||
(see the “TUCA_DataBook” fields “BMP_Table1”, “BMP_Table2”,
|
||
“OBMP_Table1” and “OBMP_Table2”) and actual properties data(see
|
||
the “Props” field of “TUCA_DataBook”). The index is a 3 level
|
||
tables designed to minimize the overhaul data size. The
|
||
properties’ data contain the actual (used) UCA’s properties
|
||
for the customized code points(or sequence of code points)
|
||
data (see TUCA_PropItemRec).
|
||
To get the properties’ record of a code point, one goes
|
||
through the index data to get its offset into the “Props”
|
||
serialized data, see the “GetPropUCA” procedure.
|
||
The “TUCA_PropItemRec” record, that represents the actual
|
||
properties, contains a fixed part and a variable part. The
|
||
fixed part is directly expressed as fields of the record :
|
||
“WeightLength”, “ChildCount”, “Size”, “Flags”. The
|
||
variable part depends on some values of the fixed part; For
|
||
example “WeightLength” specify the number of weight[1] item,
|
||
it can be zero or not null; The “Flags” fields does contains
|
||
some bit states to indicate for example if the record’s owner,
|
||
that is the target code point, is present(it is not always
|
||
necessary to store the code point as you are required to have
|
||
this information in the first place in order to get the
|
||
“TUCA_PropItemRec” record).
|
||
|
||
The data, as it is organized now, is as follow for each code point :
|
||
* the fixed part is serialized,
|
||
* if there are weight item array, they are serialized
|
||
(see the "WeigthLength")
|
||
* the code point is serialized (if needed)
|
||
* the context[2] array is serialized
|
||
* The children[3] record are serialized.
|
||
|
||
The “Size” represent the size of the whole record, including its
|
||
children records(see [3]). The “GetSelfOnlySize” returns the size
|
||
of the queried record, excluding the size of its children.
|
||
|
||
|
||
Notes :
|
||
|
||
[1] : A weight item is an array of 3 words. A code point/sequence of code
|
||
point may have zero or multiple items.
|
||
|
||
[2] : There are characters(mostly japanese ones) that do not have their
|
||
own weighs; There inherit the weights of the preceding character
|
||
in the string that you will be evaluating.
|
||
[3] : Some unicode characters are expressed using more than one code point.
|
||
In that case the properties records are serialized as a trie. The
|
||
trie data structure is useful when many characters’ expression have
|
||
the same starting code point(s).
|
||
|
||
[4] TUCA_PropItemRec serialization :
|
||
TUCA_PropItemRec :
|
||
WeightLength, ChildCount, Size, Flags [weight item array]
|
||
[Code Point] [Context data]
|
||
[Child 0] [Child 1] .. [Child n]
|
||
|
||
each [Child k] is a TUCA_PropItemRec.
|
||
}
|
||
|
||
{$IFNDEF FPC_DOTTEDUNITS}
|
||
unit unicodedata;
|
||
{$ENDIF FPC_DOTTEDUNITS}
|
||
{$IFDEF FPC}
|
||
{$mode delphi}
|
||
{$H+}
|
||
{$PACKENUM 1}
|
||
{$warn 4056 off} //Conversion between ordinals and pointers is not portable
|
||
{$DEFINE HAS_PUSH}
|
||
{$DEFINE HAS_COMPARE_BYTE}
|
||
{$DEFINE INLINE_SUPPORT_PRIVATE_VARS}
|
||
{$DEFINE HAS_UNALIGNED}
|
||
{$ENDIF FPC}
|
||
|
||
{$IFNDEF FPC}
|
||
{$UNDEF HAS_COMPARE_BYTE}
|
||
{$UNDEF HAS_PUSH}
|
||
{$DEFINE ENDIAN_LITTLE}
|
||
{$ENDIF !FPC}
|
||
|
||
{$SCOPEDENUMS ON}
|
||
{$pointermath on}
|
||
{$define USE_INLINE}
|
||
{ $define uni_debug}
|
||
|
||
interface
|
||
{$IFNDEF FPC}
|
||
type
|
||
UnicodeChar = WideChar;
|
||
PUnicodeChar = ^UnicodeChar;
|
||
SizeInt = NativeInt;
|
||
DWord = UInt32;
|
||
PDWord = ^DWord;
|
||
PtrInt = NativeInt;
|
||
PtrUInt = NativeUInt;
|
||
{$ENDIF !FPC}
|
||
|
||
{$IF not Declared(reCodesetConversion)}
|
||
const reCodesetConversion = reRangeError;
|
||
{$IFEND reCodesetConversion}
|
||
|
||
{$IF not Declared(DirectorySeparator)}
|
||
{$IFDEF MSWINDOWS}
|
||
const DirectorySeparator = '\';
|
||
{$ELSE}
|
||
const DirectorySeparator = '/';
|
||
{$ENDIF MSWINDOWS}
|
||
|
||
{$IFEND DirectorySeparator}
|
||
|
||
const
|
||
MAX_WORD = High(Word);
|
||
LOW_SURROGATE_BEGIN = Word($DC00);
|
||
LOW_SURROGATE_END = Word($DFFF);
|
||
|
||
HIGH_SURROGATE_BEGIN = Word($D800);
|
||
HIGH_SURROGATE_END = Word($DBFF);
|
||
HIGH_SURROGATE_COUNT = HIGH_SURROGATE_END - HIGH_SURROGATE_BEGIN + 1;
|
||
UCS4_HALF_BASE = LongWord($10000);
|
||
UCS4_HALF_MASK = Word($3FF);
|
||
MAX_LEGAL_UTF32 = $10FFFF;
|
||
|
||
const
|
||
// Unicode General Category
|
||
UGC_UppercaseLetter = 0;
|
||
UGC_LowercaseLetter = 1;
|
||
UGC_TitlecaseLetter = 2;
|
||
UGC_ModifierLetter = 3;
|
||
UGC_OtherLetter = 4;
|
||
|
||
UGC_NonSpacingMark = 5;
|
||
UGC_CombiningMark = 6;
|
||
UGC_EnclosingMark = 7;
|
||
|
||
UGC_DecimalNumber = 8;
|
||
UGC_LetterNumber = 9;
|
||
UGC_OtherNumber = 10;
|
||
|
||
UGC_ConnectPunctuation = 11;
|
||
UGC_DashPunctuation = 12;
|
||
UGC_OpenPunctuation = 13;
|
||
UGC_ClosePunctuation = 14;
|
||
UGC_InitialPunctuation = 15;
|
||
UGC_FinalPunctuation = 16;
|
||
UGC_OtherPunctuation = 17;
|
||
|
||
UGC_MathSymbol = 18;
|
||
UGC_CurrencySymbol = 19;
|
||
UGC_ModifierSymbol = 20;
|
||
UGC_OtherSymbol = 21;
|
||
|
||
UGC_SpaceSeparator = 22;
|
||
UGC_LineSeparator = 23;
|
||
UGC_ParagraphSeparator = 24;
|
||
|
||
UGC_Control = 25;
|
||
UGC_Format = 26;
|
||
UGC_Surrogate = 27;
|
||
UGC_PrivateUse = 28;
|
||
UGC_Unassigned = 29;
|
||
|
||
// Names
|
||
|
||
UnicodeCategoryNames: array[0..29] of string[2] = (
|
||
'Lu',
|
||
'Ll',
|
||
'Lt',
|
||
'Lm',
|
||
'Lo',
|
||
'Mn',
|
||
'Mc',
|
||
'Me',
|
||
'Nd',
|
||
'Nl',
|
||
'No',
|
||
'Pc',
|
||
'Pd',
|
||
'Ps',
|
||
'Pe',
|
||
'Pi',
|
||
'Pf',
|
||
'Po',
|
||
'Sm',
|
||
'Sc',
|
||
'Sk',
|
||
'So',
|
||
'Zs',
|
||
'Zl',
|
||
'Zp',
|
||
'Cc',
|
||
'Cf',
|
||
'Cs',
|
||
'Co',
|
||
'Cn'
|
||
);
|
||
|
||
type
|
||
|
||
TUInt24Rec = packed record
|
||
public
|
||
{$ifdef ENDIAN_LITTLE}
|
||
a, b, c : Byte;
|
||
{$else ENDIAN_LITTLE}
|
||
c, b, a : Byte;
|
||
{$endif ENDIAN_LITTLE}
|
||
public
|
||
property byte0 : Byte read a write a;
|
||
property byte1 : Byte read b write b;
|
||
property byte2 : Byte read c write c;
|
||
public
|
||
class operator Implicit(a : TUInt24Rec) : Cardinal;{$ifdef USE_INLINE}inline;{$ENDIF}
|
||
class operator Implicit(a : TUInt24Rec) : LongInt;{$ifdef USE_INLINE}inline;{$ENDIF}
|
||
class operator Implicit(a : TUInt24Rec) : Word;{$ifdef USE_INLINE}inline;{$ENDIF}
|
||
class operator Implicit(a : TUInt24Rec) : Byte;{$ifdef USE_INLINE}inline;{$ENDIF}
|
||
class operator Implicit(a : Cardinal) : TUInt24Rec;{$ifdef USE_INLINE}inline;{$ENDIF}
|
||
class operator Equal(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
||
|
||
class operator Equal(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
||
class operator Equal(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
||
|
||
class operator Equal(a : TUInt24Rec; b : LongInt): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
||
class operator Equal(a : LongInt; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
||
|
||
class operator Equal(a : TUInt24Rec; b : Word): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
||
class operator Equal(a : Word; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
||
|
||
class operator Equal(a : TUInt24Rec; b : Byte): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
||
class operator Equal(a : Byte; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
||
|
||
class operator NotEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
||
class operator NotEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
||
class operator NotEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
||
class operator GreaterThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
||
class operator GreaterThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
||
class operator GreaterThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
||
class operator GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
||
class operator GreaterThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
||
class operator GreaterThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
||
class operator LessThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
||
class operator LessThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
||
class operator LessThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
||
class operator LessThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
||
class operator LessThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
||
class operator LessThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
|
||
end;
|
||
|
||
UInt24 = TUInt24Rec;
|
||
PUInt24 = ^UInt24;
|
||
|
||
const
|
||
ZERO_UINT24 : UInt24 =
|
||
{$ifdef ENDIAN_LITTLE}
|
||
(a : 0; b : 0; c : 0;);
|
||
{$else ENDIAN_LITTLE}
|
||
(c : 0; b : 0; a : 0;);
|
||
{$endif ENDIAN_LITTLE}
|
||
|
||
type
|
||
PUC_Prop = ^TUC_Prop;
|
||
|
||
{ TUC_Prop }
|
||
|
||
{ On alignment-sensitive targets, at least some of them, assembler uses to forcibly align data >1 byte.
|
||
This breaks intended layout of initialized constants/variables.
|
||
A proper solution is to patch compiler to emit always unaligned directives for words/dwords/etc,
|
||
but for now just declare this record as "unpacked". This causes bloat, but it's better than having
|
||
entire unit not working at all. }
|
||
TUC_Prop = {$ifndef FPC_REQUIRES_PROPER_ALIGNMENT}packed{$endif} record
|
||
private
|
||
function GetCategory : Byte;inline;
|
||
procedure SetCategory(AValue : Byte);
|
||
function GetWhiteSpace : Boolean;inline;
|
||
procedure SetWhiteSpace(AValue : Boolean);
|
||
function GetHangulSyllable : Boolean;inline;
|
||
procedure SetHangulSyllable(AValue : Boolean);
|
||
function GetNumericValue: Double;inline;
|
||
function GetUnifiedIdeograph : Boolean;inline;
|
||
public //Shortned names
|
||
C : Byte; //CategoryData
|
||
C3 : Byte; //Canonical Combining Class
|
||
N : Byte; //NumericIndex
|
||
UC : UInt24; //SimpleUpperCase
|
||
LC : UInt24; //SimpleLowerCase
|
||
D : SmallInt; //DecompositionID
|
||
public
|
||
property CategoryData : Byte read C write C;
|
||
property NumericIndex : Byte read N write N;
|
||
property SimpleUpperCase : UInt24 read UC write UC;
|
||
property SimpleLowerCase : UInt24 read LC write LC;
|
||
property DecompositionID : SmallInt read D write D;
|
||
public
|
||
property Category : Byte read GetCategory write SetCategory;
|
||
property WhiteSpace : Boolean read GetWhiteSpace write SetWhiteSpace;
|
||
property HangulSyllable : Boolean read GetHangulSyllable write SetHangulSyllable;
|
||
property UnifiedIdeograph : Boolean read GetUnifiedIdeograph;
|
||
property NumericValue : Double read GetNumericValue;
|
||
end;
|
||
|
||
type
|
||
TUCA_PropWeights = packed record
|
||
Weights : array[0..2] of Word;
|
||
end;
|
||
PUCA_PropWeights = ^TUCA_PropWeights;
|
||
|
||
|
||
TUCA_PropItemContextRec = packed record
|
||
public
|
||
CodePointCount : Byte;
|
||
WeightCount : Byte;
|
||
public
|
||
function GetCodePoints() : PUInt24;inline;
|
||
function GetWeights() : PUCA_PropWeights;inline;
|
||
end;
|
||
PUCA_PropItemContextRec = ^TUCA_PropItemContextRec;
|
||
|
||
PUCA_PropItemContextTreeNodeRec = ^TUCA_PropItemContextTreeNodeRec;
|
||
TUCA_PropItemContextTreeNodeRec = packed record
|
||
public
|
||
Left : Word;
|
||
Right : Word;
|
||
Data : TUCA_PropItemContextRec;
|
||
public
|
||
function GetLeftNode() : PUCA_PropItemContextTreeNodeRec;inline;
|
||
function GetRightNode() : PUCA_PropItemContextTreeNodeRec;inline;
|
||
end;
|
||
|
||
|
||
{ TUCA_PropItemContextTreeRec }
|
||
|
||
TUCA_PropItemContextTreeRec = packed record
|
||
public
|
||
Size : UInt24;
|
||
public
|
||
function GetData:PUCA_PropItemContextTreeNodeRec;inline;
|
||
property Data : PUCA_PropItemContextTreeNodeRec read GetData;
|
||
|
||
function Find(
|
||
const AChars : PUInt24;
|
||
const ACharCount : Integer;
|
||
out ANode : PUCA_PropItemContextTreeNodeRec
|
||
) : Boolean;
|
||
end;
|
||
PUCA_PropItemContextTreeRec = ^TUCA_PropItemContextTreeRec;
|
||
|
||
{ TUCA_PropItemRec }
|
||
|
||
TUCA_PropItemRec = packed record
|
||
private
|
||
const FLAG_VALID = 0;
|
||
const FLAG_CODEPOINT = 1;
|
||
const FLAG_CONTEXTUAL = 2;
|
||
const FLAG_DELETION = 3;
|
||
const FLAG_COMPRESS_WEIGHT_1 = 6;
|
||
const FLAG_COMPRESS_WEIGHT_2 = 7;
|
||
private
|
||
function GetCodePoint() : UInt24;inline;
|
||
public
|
||
WeightLength : Byte;
|
||
ChildCount : Byte;
|
||
Size : Word;
|
||
Flags : Byte;
|
||
public
|
||
function HasCodePoint() : Boolean;inline;
|
||
property CodePoint : UInt24 read GetCodePoint;
|
||
//Weights : array[0..WeightLength] of TUCA_PropWeights;
|
||
|
||
function IsValid() : Boolean;inline;
|
||
//function GetWeightArray() : PUCA_PropWeights;inline;
|
||
procedure GetWeightArray(ADest : PUCA_PropWeights);
|
||
function GetSelfOnlySize() : Cardinal;inline;
|
||
|
||
function GetContextual() : Boolean;inline;
|
||
property Contextual : Boolean read GetContextual;
|
||
function GetContext() : PUCA_PropItemContextTreeRec;
|
||
|
||
function IsDeleted() : Boolean;inline;
|
||
function IsWeightCompress_1() : Boolean;inline;
|
||
function IsWeightCompress_2() : Boolean;inline;
|
||
end;
|
||
PUCA_PropItemRec = ^TUCA_PropItemRec;
|
||
|
||
TUCA_VariableKind = (
|
||
ucaShifted, ucaNonIgnorable, ucaBlanked, ucaShiftedTrimmed,
|
||
ucaIgnoreSP // This one is not implemented !
|
||
);
|
||
|
||
TCollationName = array[0..(128-1)] of Byte;
|
||
TCollationVersion = TCollationName;
|
||
|
||
PUCA_DataBook = ^TUCA_DataBook;
|
||
TUCA_DataBook = record
|
||
public
|
||
Base : PUCA_DataBook;
|
||
Version : TCollationVersion;
|
||
CollationName : TCollationName;
|
||
VariableWeight : TUCA_VariableKind;
|
||
Backwards : array[0..3] of Boolean;
|
||
BMP_Table1 : PByte;
|
||
BMP_Table2 : PUInt24;
|
||
OBMP_Table1 : PWord;
|
||
OBMP_Table2 : PUInt24;
|
||
PropCount : Integer;
|
||
Props : PUCA_PropItemRec;
|
||
VariableLowLimit : Word;
|
||
VariableHighLimit : Word;
|
||
NoNormalization : Boolean;
|
||
ComparisonStrength : Byte;
|
||
Dynamic : Boolean;
|
||
public
|
||
function IsVariable(const AWeight : PUCA_PropWeights) : Boolean; inline;
|
||
end;
|
||
|
||
TUnicodeStringArray = array of UnicodeString;
|
||
TCollationTableItem = record
|
||
Collation : PUCA_DataBook;
|
||
Aliases : TUnicodeStringArray;
|
||
end;
|
||
PCollationTableItem = ^TCollationTableItem;
|
||
|
||
TCollationTableItemArray = array of TCollationTableItem;
|
||
|
||
{ TCollationTable }
|
||
|
||
TCollationTable = record
|
||
private
|
||
FItems : TCollationTableItemArray;
|
||
FCount : Integer;
|
||
private
|
||
function GetCapacity : Integer;
|
||
function GetCount : Integer;
|
||
function GetItem(const AIndex : Integer) : PCollationTableItem;
|
||
procedure Grow();
|
||
procedure ClearItem(AItem : PCollationTableItem);
|
||
procedure AddAlias(
|
||
AItem : PCollationTableItem;
|
||
AAlias : UnicodeString
|
||
);overload;
|
||
public
|
||
class function NormalizeName(AName : UnicodeString) : UnicodeString;static;
|
||
procedure Clear();
|
||
function IndexOf(AName : UnicodeString) : Integer;overload;
|
||
function IndexOf(ACollation : PUCA_DataBook) : Integer;overload;
|
||
function Find(AName : UnicodeString) : PCollationTableItem;overload;
|
||
function Find(ACollation : PUCA_DataBook) : PCollationTableItem;overload;
|
||
function Add(ACollation : PUCA_DataBook) : Integer;
|
||
function AddAlias(AName, AAlias : UnicodeString) : Boolean;overload;
|
||
function Remove(AIndex : Integer) : PUCA_DataBook;
|
||
property Item[const AIndex : Integer] : PCollationTableItem read GetItem;default;
|
||
property Count : Integer read GetCount;
|
||
property Capacity : Integer read GetCapacity;
|
||
end;
|
||
|
||
TCollationField = (
|
||
BackWard, VariableLowLimit, VariableHighLimit, Alternate, Normalization,
|
||
Strength
|
||
);
|
||
TCollationFields = set of TCollationField;
|
||
|
||
const
|
||
ROOT_COLLATION_NAME = 'DUCET';
|
||
ERROR_INVALID_CODEPOINT_SEQUENCE = 1;
|
||
|
||
procedure FromUCS4(const AValue : UCS4Char; out AHighS, ALowS : UnicodeChar);
|
||
function ToUCS4(const AHighS, ALowS : UnicodeChar) : UCS4Char;inline;
|
||
function UnicodeIsSurrogatePair(
|
||
const AHighSurrogate,
|
||
ALowSurrogate : UnicodeChar
|
||
) : Boolean;inline;
|
||
function UnicodeIsHighSurrogate(const AValue : UnicodeChar) : Boolean;inline;
|
||
function UnicodeIsLowSurrogate(const AValue : UnicodeChar) : Boolean;inline;
|
||
function UnicodeToUpper(
|
||
const AString : UnicodeString;
|
||
const AIgnoreInvalidSequence : Boolean;
|
||
out AResultString : UnicodeString
|
||
) : Integer;
|
||
function UnicodeToLower(
|
||
const AString : UnicodeString;
|
||
const AIgnoreInvalidSequence : Boolean;
|
||
out AResultString : UnicodeString
|
||
) : Integer;
|
||
|
||
function GetProps(const ACodePoint : Word) : PUC_Prop;overload;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
function GetProps(const AHighS, ALowS : UnicodeChar): PUC_Prop;overload;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
function GetProps(const ACodePoint : Cardinal) : PUC_Prop;overload;inline;
|
||
function GetPropUCA(const AHighS, ALowS : UnicodeChar; const ABook : PUCA_DataBook): PUCA_PropItemRec; overload;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
function GetPropUCA(const AChar : UnicodeChar; const ABook : PUCA_DataBook) : PUCA_PropItemRec; overload;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
|
||
|
||
function NormalizeNFD(const AString : UnicodeString) : UnicodeString;inline;overload;
|
||
function NormalizeNFD(const AStr : PUnicodeChar; ALength : SizeInt) : UnicodeString;overload;
|
||
procedure CanonicalOrder(var AString : UnicodeString);inline;overload;
|
||
procedure CanonicalOrder(AStr : PUnicodeChar; const ALength : SizeInt);overload;
|
||
|
||
type
|
||
TUCASortKeyItem = Word;
|
||
TUCASortKey = array of TUCASortKeyItem;
|
||
TCategoryMask = set of 0..31;
|
||
const
|
||
DEFAULT_UCA_COMPARISON_STRENGTH = 3;
|
||
|
||
function ComputeSortKey(
|
||
const AString : UnicodeString;
|
||
const ACollation : PUCA_DataBook
|
||
) : TUCASortKey;inline;overload;
|
||
function ComputeSortKey(
|
||
const AStr : PUnicodeChar;
|
||
const ALength : SizeInt;
|
||
const ACollation : PUCA_DataBook
|
||
) : TUCASortKey;overload;
|
||
function CompareSortKey(const A, B : TUCASortKey) : Integer;overload;
|
||
function CompareSortKey(const A : TUCASortKey; const B : array of Word) : Integer;overload;
|
||
function IncrementalCompareString(
|
||
const AStrA : PUnicodeChar;
|
||
const ALengthA : SizeInt;
|
||
const AStrB : PUnicodeChar;
|
||
const ALengthB : SizeInt;
|
||
const ACollation : PUCA_DataBook
|
||
) : Integer;overload;
|
||
function IncrementalCompareString(
|
||
const AStrA,
|
||
AStrB : UnicodeString;
|
||
const ACollation : PUCA_DataBook
|
||
) : Integer;inline;overload;
|
||
function FilterString(
|
||
const AStr : PUnicodeChar;
|
||
const ALength : SizeInt;
|
||
const AExcludedMask : TCategoryMask
|
||
) : UnicodeString;overload;
|
||
function FilterString(
|
||
const AStr : UnicodeString;
|
||
const AExcludedMask : TCategoryMask
|
||
) : UnicodeString;overload;inline;
|
||
|
||
function RegisterCollation(const ACollation : PUCA_DataBook) : Boolean;overload;
|
||
function RegisterCollation(
|
||
const ACollation : PUCA_DataBook;
|
||
const AAliasList : array of UnicodeString
|
||
) : Boolean;overload;
|
||
function RegisterCollation(
|
||
ADirectory, ALanguage : UnicodeString
|
||
) : Boolean;overload;
|
||
function AddAliasCollation(
|
||
ACollation : PUCA_DataBook;
|
||
AALias : UnicodeString
|
||
) : Boolean;
|
||
function UnregisterCollation(AName : UnicodeString): Boolean;
|
||
procedure UnregisterCollations(const AFreeDynamicCollations : Boolean);
|
||
function FindCollation(AName : UnicodeString): PUCA_DataBook;overload;
|
||
function FindCollation(const AIndex : Integer): PUCA_DataBook;overload;
|
||
function GetCollationCount() : Integer;
|
||
procedure PrepareCollation(
|
||
ACollation : PUCA_DataBook;
|
||
const ABaseName : UnicodeString;
|
||
const AChangedFields : TCollationFields
|
||
);
|
||
function LoadCollation(
|
||
const AData : Pointer;
|
||
const ADataLength : Integer;
|
||
var AAliases : TUnicodeStringArray
|
||
) : PUCA_DataBook;overload;
|
||
function LoadCollation(
|
||
const AData : Pointer;
|
||
const ADataLength : Integer
|
||
) : PUCA_DataBook;overload;
|
||
function LoadCollation(
|
||
const AFileName : UnicodeString;
|
||
var AAliases : TUnicodeStringArray
|
||
) : PUCA_DataBook;overload;
|
||
function LoadCollation(
|
||
const AFileName : UnicodeString
|
||
) : PUCA_DataBook;overload;
|
||
function LoadCollation(
|
||
const ADirectory,
|
||
ALanguage : UnicodeString;
|
||
var AAliases : TUnicodeStringArray
|
||
) : PUCA_DataBook;overload;
|
||
function LoadCollation(
|
||
const ADirectory,
|
||
ALanguage : UnicodeString
|
||
) : PUCA_DataBook;overload;
|
||
procedure FreeCollation(AItem : PUCA_DataBook);
|
||
|
||
type
|
||
TSetOfByte = set of Byte;
|
||
|
||
function BytesToString(
|
||
const ABytes : array of Byte;
|
||
const AValideChars : TSetOfByte
|
||
) : UnicodeString;
|
||
function BytesToName(
|
||
const ABytes : array of Byte
|
||
) : UnicodeString;
|
||
|
||
type
|
||
TEndianKind = (Little, Big);
|
||
const
|
||
ENDIAN_SUFFIX : array[TEndianKind] of UnicodeString = ('le','be');
|
||
{$IFDEF ENDIAN_LITTLE}
|
||
ENDIAN_NATIVE = TEndianKind.Little;
|
||
ENDIAN_NON_NATIVE = TEndianKind.Big;
|
||
{$ENDIF ENDIAN_LITTLE}
|
||
{$IFDEF ENDIAN_BIG}
|
||
ENDIAN_NATIVE = TEndianKind.Big;
|
||
ENDIAN_NON_NATIVE = TEndianKind.Little;
|
||
{$ENDIF ENDIAN_BIG}
|
||
|
||
resourcestring
|
||
SCollationNotFound = 'Collation not found : "%s".';
|
||
|
||
implementation
|
||
|
||
type
|
||
|
||
TCardinalRec = packed record
|
||
{$ifdef ENDIAN_LITTLE}
|
||
byte0, byte1, byte2, byte3 : Byte;
|
||
{$else ENDIAN_LITTLE}
|
||
byte3, byte2, byte1, byte0 : Byte;
|
||
{$endif ENDIAN_LITTLE}
|
||
end;
|
||
|
||
TWordRec = packed record
|
||
{$ifdef ENDIAN_LITTLE}
|
||
byte0, byte1 : Byte;
|
||
{$else ENDIAN_LITTLE}
|
||
byte1, byte0 : Byte;
|
||
{$endif ENDIAN_LITTLE}
|
||
end;
|
||
|
||
const
|
||
BYTES_OF_VALID_NAME_CHARS : set of Byte = [
|
||
Ord('a')..Ord('z'), Ord('A')..Ord('Z'), Ord('-'),Ord('_')
|
||
];
|
||
|
||
function BytesToString(
|
||
const ABytes : array of Byte;
|
||
const AValideChars : TSetOfByte
|
||
) : UnicodeString;
|
||
var
|
||
c, i, rl : Integer;
|
||
pr : PWord;
|
||
begin
|
||
rl := 0;
|
||
c := Length(ABytes);
|
||
if (c > 0) then begin
|
||
for i := 0 to c-1 do begin
|
||
if not(ABytes[i] in AValideChars) then
|
||
break;
|
||
rl := rl+1;
|
||
end;
|
||
end;
|
||
SetLength(Result,rl);
|
||
if (rl > 0) then begin
|
||
pr := PWord(@Result[1]);
|
||
for i := 0 to rl-1 do begin
|
||
pr^ := ABytes[i];
|
||
Inc(pr);
|
||
end;
|
||
end;
|
||
end;
|
||
|
||
function BytesToName(
|
||
const ABytes : array of Byte
|
||
) : UnicodeString;
|
||
begin
|
||
Result := BytesToString(ABytes,BYTES_OF_VALID_NAME_CHARS);
|
||
end;
|
||
|
||
{ TCollationTable }
|
||
|
||
function TCollationTable.GetCapacity : Integer;
|
||
begin
|
||
Result := Length(FItems);
|
||
end;
|
||
|
||
function TCollationTable.GetCount : Integer;
|
||
begin
|
||
if (FCount < 0) or (Length(FItems) < 1) or (FCount > Length(FItems)) then
|
||
FCount := 0;
|
||
Result := FCount;
|
||
end;
|
||
|
||
function TCollationTable.GetItem(const AIndex : Integer) : PCollationTableItem;
|
||
begin
|
||
if (AIndex < 0) or (AIndex >= Count) then
|
||
Error(reRangeError);
|
||
Result := @FItems[AIndex];
|
||
end;
|
||
|
||
procedure TCollationTable.Grow();
|
||
var
|
||
c0, c1 : Integer;
|
||
begin
|
||
c0 := Length(FItems);
|
||
if (c0 < 1) then begin
|
||
c0 := 1;
|
||
if (FCount < 0) then
|
||
FCount := 0;
|
||
end;
|
||
c1 := 2*c0;
|
||
c0 := Length(FItems);
|
||
SetLength(FItems,c1);
|
||
FillChar(FItems[c0],((c1-c0)*SizeOf(TCollationTableItem)),#0);
|
||
end;
|
||
|
||
procedure TCollationTable.ClearItem(AItem : PCollationTableItem);
|
||
begin
|
||
if (AItem = nil) then
|
||
exit;
|
||
AItem^.Collation := nil;
|
||
SetLength(AItem^.Aliases,0);
|
||
end;
|
||
|
||
procedure TCollationTable.AddAlias(
|
||
AItem : PCollationTableItem;
|
||
AAlias : UnicodeString
|
||
);
|
||
var
|
||
n : UnicodeString;
|
||
c, i : Integer;
|
||
begin
|
||
n := NormalizeName(AAlias);
|
||
if (n = '') then
|
||
exit;
|
||
c := Length(AItem^.Aliases);
|
||
if (c > 0) then begin
|
||
for i := 0 to c-1 do begin
|
||
if (AItem^.Aliases[i] = n) then
|
||
exit;
|
||
end;
|
||
end;
|
||
SetLength(AItem^.Aliases,(c+1));
|
||
AItem^.Aliases[c] := n;
|
||
end;
|
||
|
||
class function TCollationTable.NormalizeName(
|
||
AName : UnicodeString
|
||
) : UnicodeString;
|
||
var
|
||
r : UnicodeString;
|
||
c, i, rl : Integer;
|
||
cx : Word;
|
||
begin
|
||
c := Length(AName);
|
||
rl := 0;
|
||
SetLength(r,c);
|
||
for i := 1 to c do begin
|
||
case Ord(AName[i]) of
|
||
Ord('a')..Ord('z') : cx := Ord(AName[i]);
|
||
Ord('A')..Ord('Z') : cx := Ord(AName[i])+(Ord('a')-Ord('A'));
|
||
Ord('0')..Ord('9'),
|
||
Ord('-'), Ord('_') : cx := Ord(AName[i]);
|
||
else
|
||
cx := 0;
|
||
end;
|
||
if (cx > 0) then begin
|
||
rl := rl+1;
|
||
r[rl] := UnicodeChar(cx);
|
||
end;
|
||
end;
|
||
SetLength(r,rl);
|
||
Result := r;
|
||
end;
|
||
|
||
procedure TCollationTable.Clear();
|
||
var
|
||
p : PCollationTableItem;
|
||
i : Integer;
|
||
begin
|
||
if (Count < 1) then
|
||
exit;
|
||
p := @FItems[0];
|
||
for i := 0 to Count-1 do begin;
|
||
ClearItem(p);
|
||
Inc(p);
|
||
end;
|
||
FCount := 0;
|
||
end;
|
||
|
||
function TCollationTable.IndexOf(AName : UnicodeString) : Integer;
|
||
var
|
||
c, i, k : Integer;
|
||
p : PCollationTableItem;
|
||
n : UnicodeString;
|
||
begin
|
||
c := Count;
|
||
if (c > 0) then begin
|
||
// Names
|
||
n := NormalizeName(AName);
|
||
p := @FItems[0];
|
||
for i := 0 to c-1 do begin
|
||
if (Length(p^.Aliases) > 0) and (p^.Aliases[0] = n) then
|
||
exit(i);
|
||
Inc(p);
|
||
end;
|
||
// Aliases
|
||
p := @FItems[0];
|
||
for i := 0 to c-1 do begin
|
||
if (Length(p^.Aliases) > 1) then begin
|
||
for k := 1 to Length(p^.Aliases)-1 do begin
|
||
if (p^.Aliases[k] = n) then
|
||
exit(i);
|
||
end;
|
||
end;
|
||
Inc(p);
|
||
end;
|
||
end;
|
||
Result := -1;
|
||
end;
|
||
|
||
function TCollationTable.IndexOf(ACollation : PUCA_DataBook) : Integer;
|
||
var
|
||
c, i : Integer;
|
||
p : PCollationTableItem;
|
||
begin
|
||
c := Count;
|
||
if (c > 0) then begin
|
||
p := @FItems[0];
|
||
for i := 0 to c-1 do begin
|
||
if (p^.Collation = ACollation) then
|
||
exit(i);
|
||
Inc(p);
|
||
end;
|
||
end;
|
||
Result := -1;
|
||
end;
|
||
|
||
function TCollationTable.Find(AName : UnicodeString) : PCollationTableItem;
|
||
var
|
||
i : Integer;
|
||
begin
|
||
i := IndexOf(AName);
|
||
if (i >= 0) then
|
||
Result := @FItems[i]
|
||
else
|
||
Result := nil;
|
||
end;
|
||
|
||
function TCollationTable.Find(ACollation : PUCA_DataBook) : PCollationTableItem;
|
||
var
|
||
i : Integer;
|
||
begin
|
||
i := IndexOf(ACollation);
|
||
if (i >= 0) then
|
||
Result := @FItems[i]
|
||
else
|
||
Result := nil;
|
||
end;
|
||
|
||
function TCollationTable.Add(ACollation : PUCA_DataBook) : Integer;
|
||
var
|
||
c : Integer;
|
||
p : PCollationTableItem;
|
||
begin
|
||
Result := IndexOf(ACollation);
|
||
if (Result < 0) then begin
|
||
c := Count;
|
||
if (c >= Capacity) then
|
||
Grow();
|
||
p := @FItems[c];
|
||
p^.Collation := ACollation;
|
||
SetLength(p^.Aliases,1);
|
||
p^.Aliases[0] := NormalizeName(BytesToName(ACollation^.CollationName));
|
||
FCount := FCount+1;
|
||
Result := c;
|
||
end;
|
||
end;
|
||
|
||
function TCollationTable.AddAlias(AName, AAlias : UnicodeString) : Boolean;
|
||
var
|
||
p : PCollationTableItem;
|
||
begin
|
||
p := Find(AName);
|
||
Result := (p <> nil);
|
||
if Result then
|
||
AddAlias(p,AAlias);
|
||
end;
|
||
|
||
function TCollationTable.Remove(AIndex : Integer) : PUCA_DataBook;
|
||
var
|
||
p, q : PCollationTableItem;
|
||
c, i : Integer;
|
||
begin
|
||
if (AIndex < 0) or (AIndex >= Count) then
|
||
Error(reRangeError);
|
||
p := @FItems[AIndex];
|
||
Result := p^.Collation;
|
||
ClearItem(p);
|
||
c := Count;
|
||
if (AIndex < (c-1)) then begin
|
||
for i := AIndex+1 to c-1 do begin
|
||
q := p;
|
||
Inc(p);
|
||
Move(p^,q^,SizeOf(TCollationTableItem));
|
||
end;
|
||
FillChar(p^,SizeOf(TCollationTableItem),#0);
|
||
end;
|
||
FCount := FCount-1;
|
||
end;
|
||
|
||
{ TUInt24Rec }
|
||
|
||
class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Cardinal;
|
||
begin
|
||
TCardinalRec(Result).byte0 := a.byte0;
|
||
TCardinalRec(Result).byte1 := a.byte1;
|
||
TCardinalRec(Result).byte2 := a.byte2;
|
||
TCardinalRec(Result).byte3 := 0;
|
||
end;
|
||
|
||
class operator TUInt24Rec.Implicit(a : TUInt24Rec) : LongInt;
|
||
begin
|
||
Result := Cardinal(a);
|
||
end;
|
||
|
||
class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Word;
|
||
begin
|
||
{$IFOPT R+}
|
||
if (a > $FFFF) then
|
||
Error(reIntOverflow);
|
||
{$ENDIF R+}
|
||
TWordRec(Result).byte0 := a.byte0;
|
||
TWordRec(Result).byte1 := a.byte1;
|
||
end;
|
||
|
||
class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Byte;
|
||
begin
|
||
{$IFOPT R+}
|
||
if (a > $FF) then
|
||
Error(reIntOverflow);
|
||
{$ENDIF R+}
|
||
Result := a.byte0;
|
||
end;
|
||
|
||
class operator TUInt24Rec.Implicit(a : Cardinal) : TUInt24Rec;
|
||
begin
|
||
{$IFOPT R+}
|
||
if (a > $FFFFFF) then
|
||
Error(reIntOverflow);
|
||
{$ENDIF R+}
|
||
Result.byte0 := TCardinalRec(a).byte0;
|
||
Result.byte1 := TCardinalRec(a).byte1;
|
||
Result.byte2 := TCardinalRec(a).byte2;
|
||
end;
|
||
|
||
class operator TUInt24Rec.Equal(a, b : TUInt24Rec) : Boolean;
|
||
begin
|
||
Result := (a.byte0 = b.byte0) and (a.byte1 = b.byte1) and (a.byte2 = b.byte2);
|
||
end;
|
||
|
||
class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Cardinal) : Boolean;
|
||
begin
|
||
Result := (TCardinalRec(b).byte3 = 0) and
|
||
(a.byte0 = TCardinalRec(b).byte0) and
|
||
(a.byte1 = TCardinalRec(b).byte1) and
|
||
(a.byte2 = TCardinalRec(b).byte2);
|
||
end;
|
||
|
||
class operator TUInt24Rec.Equal(a : Cardinal; b : TUInt24Rec) : Boolean;
|
||
begin
|
||
Result := (b = a);
|
||
end;
|
||
|
||
class operator TUInt24Rec.Equal(a : TUInt24Rec; b : LongInt) : Boolean;
|
||
begin
|
||
Result := (LongInt(a) = b);
|
||
end;
|
||
|
||
class operator TUInt24Rec.Equal(a : LongInt; b : TUInt24Rec) : Boolean;
|
||
begin
|
||
Result := (b = a);
|
||
end;
|
||
|
||
class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Word) : Boolean;
|
||
begin
|
||
Result := (a.byte2 = 0) and
|
||
(a.byte0 = TWordRec(b).byte0) and
|
||
(a.byte1 = TWordRec(b).byte1);
|
||
end;
|
||
|
||
class operator TUInt24Rec.Equal(a : Word; b : TUInt24Rec) : Boolean;
|
||
begin
|
||
Result := (b = a);
|
||
end;
|
||
|
||
class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Byte) : Boolean;
|
||
begin
|
||
Result := (a.byte2 = 0) and
|
||
(a.byte1 = 0) and
|
||
(a.byte0 = b);
|
||
end;
|
||
|
||
class operator TUInt24Rec.Equal(a : Byte; b : TUInt24Rec) : Boolean;
|
||
begin
|
||
Result := (b = a);
|
||
end;
|
||
|
||
class operator TUInt24Rec.NotEqual(a, b : TUInt24Rec) : Boolean;
|
||
begin
|
||
Result := (a.byte0 <> b.byte0) or (a.byte1 <> b.byte1) or (a.byte2 <> b.byte2);
|
||
end;
|
||
|
||
class operator TUInt24Rec.NotEqual(a : TUInt24Rec; b : Cardinal) : Boolean;
|
||
begin
|
||
Result := (TCardinalRec(b).byte3 <> 0) or
|
||
(a.byte0 <> TCardinalRec(b).byte0) or
|
||
(a.byte1 <> TCardinalRec(b).byte1) or
|
||
(a.byte2 <> TCardinalRec(b).byte2);
|
||
end;
|
||
|
||
class operator TUInt24Rec.NotEqual(a : Cardinal; b : TUInt24Rec) : Boolean;
|
||
begin
|
||
Result := (b <> a);
|
||
end;
|
||
|
||
class operator TUInt24Rec.GreaterThan(a, b: TUInt24Rec): Boolean;
|
||
begin
|
||
Result := (a.byte2 > b.byte2) or
|
||
((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
|
||
((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 > b.byte0));
|
||
end;
|
||
|
||
class operator TUInt24Rec.GreaterThan(a: TUInt24Rec; b: Cardinal): Boolean;
|
||
begin
|
||
Result := Cardinal(a) > b;
|
||
end;
|
||
|
||
class operator TUInt24Rec.GreaterThan(a: Cardinal; b: TUInt24Rec): Boolean;
|
||
begin
|
||
Result := a > Cardinal(b);
|
||
end;
|
||
|
||
class operator TUInt24Rec.GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;
|
||
begin
|
||
Result := (a.byte2 > b.byte2) or
|
||
((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
|
||
((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 >= b.byte0));
|
||
end;
|
||
|
||
class operator TUInt24Rec.GreaterThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
|
||
begin
|
||
Result := Cardinal(a) >= b;
|
||
end;
|
||
|
||
class operator TUInt24Rec.GreaterThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
|
||
begin
|
||
Result := a >= Cardinal(b);
|
||
end;
|
||
|
||
class operator TUInt24Rec.LessThan(a, b: TUInt24Rec): Boolean;
|
||
begin
|
||
Result := (b > a);
|
||
end;
|
||
|
||
class operator TUInt24Rec.LessThan(a: TUInt24Rec; b: Cardinal): Boolean;
|
||
begin
|
||
Result := Cardinal(a) < b;
|
||
end;
|
||
|
||
class operator TUInt24Rec.LessThan(a: Cardinal; b: TUInt24Rec): Boolean;
|
||
begin
|
||
Result := a < Cardinal(b);
|
||
end;
|
||
|
||
class operator TUInt24Rec.LessThanOrEqual(a, b: TUInt24Rec): Boolean;
|
||
begin
|
||
Result := (b >= a);
|
||
end;
|
||
|
||
class operator TUInt24Rec.LessThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
|
||
begin
|
||
Result := Cardinal(a) <= b;
|
||
end;
|
||
|
||
class operator TUInt24Rec.LessThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
|
||
begin
|
||
Result := a <= Cardinal(b);
|
||
end;
|
||
|
||
type
|
||
TBitOrder = 0..7;
|
||
function IsBitON(const AData : Byte; const ABit : TBitOrder) : Boolean ;inline;
|
||
begin
|
||
Result := ( ( AData and ( 1 shl ABit ) ) <> 0 );
|
||
end;
|
||
|
||
procedure SetBit(var AData : Byte; const ABit : TBitOrder; const AValue : Boolean);inline;
|
||
begin
|
||
if AValue then
|
||
AData := AData or (1 shl (ABit mod 8))
|
||
else
|
||
AData := AData and ( not ( 1 shl ( ABit mod 8 ) ) );
|
||
end;
|
||
|
||
{$IFNDEF HAS_COMPARE_BYTE}
|
||
function CompareByte(const A, B; ALength : SizeInt):SizeInt;
|
||
var
|
||
pa, pb : PByte;
|
||
i : Integer;
|
||
begin
|
||
if (ALength < 1) then
|
||
exit(0);
|
||
pa := PByte(@A);
|
||
pb := PByte(@B);
|
||
if (pa = pb) then
|
||
exit(0);
|
||
for i := 1 to ALength do begin
|
||
if (pa^ <> pb^) then
|
||
exit(i);
|
||
pa := pa+1;
|
||
pb := pb+1;
|
||
end;
|
||
Result := 0;
|
||
end;
|
||
{$ENDIF HAS_COMPARE_BYTE}
|
||
|
||
function IndexInArrayDWord(const ABuffer : array of DWord; AItem : DWord) : SizeInt;
|
||
var
|
||
c, i : Integer;
|
||
p : PDWord;
|
||
begin
|
||
Result := -1;
|
||
c := Length(ABuffer);
|
||
if (c < 1) then
|
||
exit;
|
||
p := @ABuffer[Low(ABuffer)];
|
||
for i := 1 to c do begin
|
||
if (p^ = AItem) then begin
|
||
Result := i-1;
|
||
break;
|
||
end;
|
||
p := p+1;
|
||
end;
|
||
end;
|
||
|
||
var
|
||
CollationTable : TCollationTable;
|
||
function IndexOfCollation(AName : UnicodeString) : Integer;
|
||
begin
|
||
Result := CollationTable.IndexOf(AName);
|
||
end;
|
||
|
||
function RegisterCollation(const ACollation : PUCA_DataBook) : Boolean;
|
||
begin
|
||
Result := RegisterCollation(ACollation,[]);
|
||
end;
|
||
|
||
function RegisterCollation(
|
||
const ACollation : PUCA_DataBook;
|
||
const AAliasList : array of UnicodeString
|
||
) : Boolean;
|
||
var
|
||
i : Integer;
|
||
p : PCollationTableItem;
|
||
begin
|
||
Result := (CollationTable.IndexOf(BytesToName(ACollation^.CollationName)) = -1);
|
||
if Result then begin
|
||
i := CollationTable.Add(ACollation);
|
||
if (Length(AAliasList) > 0) then begin
|
||
p := CollationTable[i];
|
||
for i := Low(AAliasList) to High(AAliasList) do
|
||
CollationTable.AddAlias(p,AAliasList[i]);
|
||
end;
|
||
end;
|
||
end;
|
||
|
||
function RegisterCollation(ADirectory, ALanguage : UnicodeString) : Boolean;
|
||
var
|
||
cl : PUCA_DataBook;
|
||
al : TUnicodeStringArray;
|
||
begin
|
||
al := nil;
|
||
cl := LoadCollation(ADirectory,ALanguage,al);
|
||
if (cl = nil) then
|
||
exit(False);
|
||
try
|
||
Result := RegisterCollation(cl,al);
|
||
except
|
||
FreeCollation(cl);
|
||
raise;
|
||
end;
|
||
if not Result then
|
||
FreeCollation(cl);
|
||
end;
|
||
|
||
function AddAliasCollation(
|
||
ACollation : PUCA_DataBook;
|
||
AALias : UnicodeString
|
||
) : Boolean;
|
||
var
|
||
p : PCollationTableItem;
|
||
begin
|
||
Result := False;
|
||
if (ACollation <> nil) then begin
|
||
p := CollationTable.Find(ACollation);
|
||
if (p <> nil) then begin
|
||
CollationTable.AddAlias(p,AALias);
|
||
Result := True;
|
||
end;
|
||
end;
|
||
end;
|
||
|
||
function UnregisterCollation(AName : UnicodeString): Boolean;
|
||
var
|
||
i : Integer;
|
||
begin
|
||
i := CollationTable.IndexOf(AName);
|
||
Result := (i >= 0);
|
||
if Result then
|
||
CollationTable.Remove(i);
|
||
end;
|
||
|
||
procedure UnregisterCollations(const AFreeDynamicCollations : Boolean);
|
||
var
|
||
i : Integer;
|
||
p : PCollationTableItem;
|
||
begin
|
||
if AFreeDynamicCollations then begin
|
||
for i := 0 to CollationTable.Count-1 do begin
|
||
p := CollationTable[i];
|
||
if p^.Collation.Dynamic then begin
|
||
FreeCollation(p^.Collation);
|
||
p^.Collation := nil;
|
||
end;
|
||
end;
|
||
end;
|
||
CollationTable.Clear();
|
||
end;
|
||
|
||
function FindCollation(AName : UnicodeString): PUCA_DataBook;overload;
|
||
var
|
||
p : PCollationTableItem;
|
||
begin
|
||
p := CollationTable.Find(AName);
|
||
if (p <> nil) then
|
||
Result := p^.Collation
|
||
else
|
||
Result := nil;
|
||
end;
|
||
|
||
function GetCollationCount() : Integer;
|
||
begin
|
||
Result := CollationTable.Count;
|
||
end;
|
||
|
||
function FindCollation(const AIndex : Integer): PUCA_DataBook;overload;
|
||
var
|
||
p : PCollationTableItem;
|
||
begin
|
||
p := CollationTable[AIndex];
|
||
if (p <> nil) then
|
||
Result := p^.Collation
|
||
else
|
||
Result := nil;
|
||
end;
|
||
|
||
|
||
procedure PrepareCollation(
|
||
ACollation : PUCA_DataBook;
|
||
const ABaseName : UnicodeString;
|
||
const AChangedFields : TCollationFields
|
||
);
|
||
var
|
||
s : UnicodeString;
|
||
p, base : PUCA_DataBook;
|
||
begin
|
||
if (ABaseName <> '') then
|
||
s := ABaseName
|
||
else
|
||
s := ROOT_COLLATION_NAME;
|
||
p := ACollation;
|
||
base := FindCollation(s);
|
||
if (base = nil) then
|
||
Error(reCodesetConversion);
|
||
p^.Base := base;
|
||
if not(TCollationField.BackWard in AChangedFields) then
|
||
p^.Backwards := base^.Backwards;
|
||
if not(TCollationField.VariableLowLimit in AChangedFields) then
|
||
p^.VariableLowLimit := base^.VariableLowLimit;
|
||
if not(TCollationField.VariableHighLimit in AChangedFields) then
|
||
p^.VariableLowLimit := base^.VariableHighLimit;
|
||
if not(TCollationField.Alternate in AChangedFields) then
|
||
p^.VariableWeight := base^.VariableWeight;
|
||
if not(TCollationField.Normalization in AChangedFields) then
|
||
p^.NoNormalization := base^.NoNormalization;
|
||
if not(TCollationField.Strength in AChangedFields) then
|
||
p^.ComparisonStrength := base^.ComparisonStrength;
|
||
end;
|
||
|
||
type
|
||
TSerializedCollationHeader = packed record
|
||
Base : TCollationName;
|
||
Version : TCollationVersion;
|
||
CollationName : TCollationName;
|
||
CollationAliases : TCollationName; // ";" separated
|
||
VariableWeight : Byte;
|
||
Backwards : Byte;
|
||
BMP_Table1Length : DWord;
|
||
BMP_Table2Length : DWord;
|
||
OBMP_Table1Length : DWord;
|
||
OBMP_Table2Length : DWord;
|
||
PropCount : DWord;
|
||
VariableLowLimit : Word;
|
||
VariableHighLimit : Word;
|
||
NoNormalization : Byte;
|
||
Strength : Byte;
|
||
ChangedFields : Byte;
|
||
end;
|
||
PSerializedCollationHeader = ^TSerializedCollationHeader;
|
||
|
||
procedure FreeCollation(AItem : PUCA_DataBook);
|
||
var
|
||
h : PSerializedCollationHeader;
|
||
begin
|
||
if (AItem = nil) or not(AItem^.Dynamic) then
|
||
exit;
|
||
h := PSerializedCollationHeader(PtrUInt(AItem) + SizeOf(TUCA_DataBook));
|
||
if (AItem^.BMP_Table1 <> nil) then
|
||
FreeMem(AItem^.BMP_Table1,h^.BMP_Table1Length);
|
||
if (AItem^.BMP_Table2 <> nil) then
|
||
FreeMem(AItem^.BMP_Table2,h^.BMP_Table2Length);
|
||
if (AItem^.OBMP_Table1 <> nil) then
|
||
FreeMem(AItem^.OBMP_Table1,h^.OBMP_Table1Length);
|
||
if (AItem^.OBMP_Table2 <> nil) then
|
||
FreeMem(AItem^.OBMP_Table2,h^.OBMP_Table2Length);
|
||
if (AItem^.Props <> nil) then
|
||
FreeMem(AItem^.Props,h^.PropCount);
|
||
FreeMem(AItem,(SizeOf(TUCA_DataBook)+SizeOf(TSerializedCollationHeader)));
|
||
end;
|
||
|
||
function ParseAliases(AStr : UnicodeString) : TUnicodeStringArray;
|
||
var
|
||
r : TUnicodeStringArray;
|
||
c, k, i : Integer;
|
||
s : UnicodeString;
|
||
begin
|
||
SetLength(r,0);
|
||
c := Length(AStr);
|
||
k := 1;
|
||
for i := 1 to c do begin
|
||
if (AStr[i] <> ';') then begin
|
||
k := i;
|
||
break;
|
||
end;
|
||
end;
|
||
|
||
s := '';
|
||
for i := 1 to c do begin
|
||
if (AStr[i] = ';') then begin
|
||
s := Copy(AStr,k,(i-k));
|
||
end else if (i = c) then begin
|
||
s := Copy(AStr,k,(i+1-k));
|
||
end;
|
||
if (s <> '') then begin
|
||
SetLength(r,(Length(r)+1));
|
||
r[High(r)] := s;
|
||
s := '';
|
||
k := i+1;
|
||
end;
|
||
end;
|
||
Result := r;
|
||
end;
|
||
|
||
function LoadCollation(
|
||
const AData : Pointer;
|
||
const ADataLength : Integer;
|
||
var AAliases : TUnicodeStringArray
|
||
) : PUCA_DataBook;
|
||
var
|
||
dataPointer : PByte;
|
||
readedLength : LongInt;
|
||
|
||
function ReadBuffer(ADest : Pointer; ALength : LongInt) : Boolean;
|
||
begin
|
||
Result := (readedLength + ALength) <= ADataLength;
|
||
if not result then
|
||
exit;
|
||
Move(dataPointer^,ADest^,ALength);
|
||
Inc(dataPointer,ALength);
|
||
readedLength := readedLength + ALength;
|
||
end;
|
||
|
||
var
|
||
r : PUCA_DataBook;
|
||
h : PSerializedCollationHeader;
|
||
cfs : TCollationFields;
|
||
i : Integer;
|
||
baseName, s : UnicodeString;
|
||
begin
|
||
Result := nil;
|
||
readedLength := 0;
|
||
AAliases := nil;
|
||
dataPointer := AData;
|
||
r := AllocMem((SizeOf(TUCA_DataBook)+SizeOf(TSerializedCollationHeader)));
|
||
try
|
||
h := PSerializedCollationHeader(PtrUInt(r) + SizeOf(TUCA_DataBook));
|
||
if not ReadBuffer(h,SizeOf(TSerializedCollationHeader)) then
|
||
exit;
|
||
r^.Version := h^.Version;
|
||
r^.CollationName := h^.CollationName;
|
||
r^.VariableWeight := TUCA_VariableKind(h^.VariableWeight);
|
||
r^.Backwards[0] := IsBitON(h^.Backwards,0);
|
||
r^.Backwards[1] := IsBitON(h^.Backwards,1);
|
||
r^.Backwards[2] := IsBitON(h^.Backwards,2);
|
||
r^.Backwards[3] := IsBitON(h^.Backwards,3);
|
||
if (h^.BMP_Table1Length > 0) then begin
|
||
r^.BMP_Table1 := GetMemory(h^.BMP_Table1Length);
|
||
if not ReadBuffer(r^.BMP_Table1,h^.BMP_Table1Length) then
|
||
exit;
|
||
end;
|
||
if (h^.BMP_Table2Length > 0) then begin
|
||
r^.BMP_Table2 := GetMemory(h^.BMP_Table2Length);
|
||
if not ReadBuffer(r^.BMP_Table2,h^.BMP_Table2Length) then
|
||
exit;
|
||
end;
|
||
if (h^.OBMP_Table1Length > 0) then begin
|
||
r^.OBMP_Table1 := GetMemory(h^.OBMP_Table1Length);
|
||
if not ReadBuffer(r^.OBMP_Table1,h^.OBMP_Table1Length) then
|
||
exit;
|
||
end;
|
||
if (h^.OBMP_Table2Length > 0) then begin
|
||
r^.OBMP_Table2 := GetMemory(h^.OBMP_Table2Length);
|
||
if not ReadBuffer(r^.OBMP_Table2,h^.OBMP_Table2Length) then
|
||
exit;
|
||
end;
|
||
r^.PropCount := h^.PropCount;
|
||
if (h^.PropCount > 0) then begin
|
||
r^.Props := GetMemory(h^.PropCount);
|
||
if not ReadBuffer(r^.Props,h^.PropCount) then
|
||
exit;
|
||
end;
|
||
r^.VariableLowLimit := h^.VariableLowLimit;
|
||
r^.VariableHighLimit := h^.VariableHighLimit;
|
||
r^.NoNormalization := (h^.NoNormalization <> 0);
|
||
r^.ComparisonStrength := h^.Strength;
|
||
|
||
cfs := [];
|
||
for i := Ord(Low(TCollationField)) to Ord(High(TCollationField)) do begin
|
||
if IsBitON(h^.ChangedFields,i) then
|
||
cfs := cfs + [TCollationField(i)];
|
||
end;
|
||
baseName := BytesToName(h^.Base);
|
||
if (baseName = '') then begin
|
||
if (BytesToName(h^.CollationName) <> ROOT_COLLATION_NAME) then
|
||
baseName := ROOT_COLLATION_NAME
|
||
else
|
||
baseName := '';
|
||
end;
|
||
if (baseName <> '') then
|
||
PrepareCollation(r,baseName,cfs);
|
||
s := BytesToString(h^.CollationAliases,(BYTES_OF_VALID_NAME_CHARS+[Ord(';')]));
|
||
if (s <> '') then
|
||
AAliases := ParseAliases(s);
|
||
r^.Dynamic := True;
|
||
Result := r;
|
||
except
|
||
FreeCollation(r);
|
||
raise;
|
||
end;
|
||
end;
|
||
|
||
function LoadCollation(
|
||
const AData : Pointer;
|
||
const ADataLength : Integer
|
||
) : PUCA_DataBook;
|
||
var
|
||
al : TUnicodeStringArray;
|
||
begin
|
||
al := nil;
|
||
Result := LoadCollation(AData,ADataLength,al);
|
||
end;
|
||
|
||
{$IFDEF HAS_PUSH}
|
||
{$PUSH}
|
||
{$ENDIF HAS_PUSH}
|
||
|
||
{$IFNDEF HAS_PUSH}
|
||
{$IFOPT I+}
|
||
{$DEFINE I_PLUS}
|
||
{$ELSE}
|
||
{$UNDEF I_PLUS}
|
||
{$ENDIF}
|
||
{$ENDIF HAS_PUSH}
|
||
function LoadCollation(
|
||
const AFileName : UnicodeString;
|
||
var AAliases : TUnicodeStringArray
|
||
) : PUCA_DataBook;
|
||
const
|
||
BLOCK_SIZE = 16*1024;
|
||
var
|
||
f : File of Byte;
|
||
locSize, locReaded, c : LongInt;
|
||
locBuffer : PByte;
|
||
locBlockSize : LongInt;
|
||
begin
|
||
Result := nil;
|
||
{$I-}
|
||
if (AFileName = '') then
|
||
exit;
|
||
Assign(f,AFileName);
|
||
Reset(f);
|
||
try
|
||
if (IOResult <> 0) then
|
||
exit;
|
||
locSize := FileSize(f);
|
||
if (locSize < SizeOf(TSerializedCollationHeader)) then
|
||
exit;
|
||
locBuffer := GetMemory(locSize);
|
||
try
|
||
locBlockSize := BLOCK_SIZE;
|
||
locReaded := 0;
|
||
while (locReaded < locSize) do begin
|
||
if (locBlockSize > (locSize-locReaded)) then
|
||
locBlockSize := locSize-locReaded;
|
||
BlockRead(f,locBuffer[locReaded],locBlockSize,c);
|
||
if (IOResult <> 0) or (c <= 0) then
|
||
exit;
|
||
locReaded := locReaded + c;
|
||
end;
|
||
Result := LoadCollation(locBuffer,locSize,AAliases);
|
||
finally
|
||
FreeMemory(locBuffer);
|
||
end;
|
||
finally
|
||
Close(f);
|
||
end;
|
||
end;
|
||
|
||
function LoadCollation(
|
||
const AFileName : UnicodeString
|
||
) : PUCA_DataBook;
|
||
var
|
||
al : TUnicodeStringArray;
|
||
begin
|
||
al := nil;
|
||
Result := LoadCollation(AFileName,al);
|
||
end;
|
||
|
||
{$IFDEF HAS_PUSH}
|
||
{$POP}
|
||
{$ELSE}
|
||
{$IFDEF I_PLUS}
|
||
{$I+}
|
||
{$ELSE}
|
||
{$I-}
|
||
{$ENDIF}
|
||
{$ENDIF HAS_PUSH}
|
||
function LoadCollation(
|
||
const ADirectory,
|
||
ALanguage : UnicodeString;
|
||
var AAliases : TUnicodeStringArray
|
||
) : PUCA_DataBook;
|
||
var
|
||
fileName : UnicodeString;
|
||
begin
|
||
fileName := ADirectory;
|
||
if (fileName <> '') then begin
|
||
if (fileName[Length(fileName)] <> DirectorySeparator) then
|
||
fileName := fileName + DirectorySeparator;
|
||
end;
|
||
fileName := fileName + 'collation_' + ALanguage + '_' + ENDIAN_SUFFIX[ENDIAN_NATIVE] + '.bco';
|
||
Result := LoadCollation(fileName,AAliases);
|
||
end;
|
||
|
||
function LoadCollation(
|
||
const ADirectory,
|
||
ALanguage : UnicodeString
|
||
) : PUCA_DataBook;
|
||
var
|
||
al : TUnicodeStringArray;
|
||
begin
|
||
al := nil;
|
||
Result := LoadCollation(ADirectory,ALanguage,al);
|
||
end;
|
||
|
||
{$INCLUDE unicodedata.inc}
|
||
{$IFDEF ENDIAN_LITTLE}
|
||
{$INCLUDE unicodedata_le.inc}
|
||
{$ENDIF ENDIAN_LITTLE}
|
||
{$IFDEF ENDIAN_BIG}
|
||
{$INCLUDE unicodedata_be.inc}
|
||
{$ENDIF ENDIAN_BIG}
|
||
|
||
procedure FromUCS4(const AValue : UCS4Char; out AHighS, ALowS : UnicodeChar);
|
||
begin
|
||
AHighS := UnicodeChar((AValue - $10000) shr 10 + $d800);
|
||
ALowS := UnicodeChar((AValue - $10000) and $3ff + $dc00);
|
||
end;
|
||
|
||
function ToUCS4(const AHighS, ALowS : UnicodeChar) : UCS4Char;inline;
|
||
begin
|
||
Result := (UCS4Char(Word(AHighS)) - HIGH_SURROGATE_BEGIN) shl 10 +
|
||
(UCS4Char(Word(ALowS)) - LOW_SURROGATE_BEGIN) + UCS4_HALF_BASE;
|
||
end;
|
||
|
||
function UnicodeIsSurrogatePair(
|
||
const AHighSurrogate,
|
||
ALowSurrogate : UnicodeChar
|
||
) : Boolean;
|
||
begin
|
||
Result :=
|
||
( (Word(AHighSurrogate) >= HIGH_SURROGATE_BEGIN) and
|
||
(Word(AHighSurrogate) <= HIGH_SURROGATE_END)
|
||
) and
|
||
( (Word(ALowSurrogate) >= LOW_SURROGATE_BEGIN) and
|
||
(Word(ALowSurrogate) <= LOW_SURROGATE_END)
|
||
)
|
||
end;
|
||
|
||
function UnicodeIsHighSurrogate(const AValue : UnicodeChar) : Boolean;
|
||
begin
|
||
Result := (Word(AValue) >= HIGH_SURROGATE_BEGIN) and
|
||
(Word(AValue) <= HIGH_SURROGATE_END);
|
||
end;
|
||
|
||
function UnicodeIsLowSurrogate(const AValue : UnicodeChar) : Boolean;
|
||
begin
|
||
Result := (Word(AValue) >= LOW_SURROGATE_BEGIN) and
|
||
(Word(AValue) <= LOW_SURROGATE_END);
|
||
end;
|
||
|
||
function GetProps(const ACodePoint : Word) : PUC_Prop;overload;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
Result:=
|
||
@UC_PROP_ARRAY[
|
||
UC_TABLE_3[
|
||
UC_TABLE_2[UC_TABLE_1[hi(ACodePoint)]]
|
||
[lo(ACodePoint) shr 4]
|
||
][lo(ACodePoint) and $F]
|
||
]; {
|
||
@UC_PROP_ARRAY[
|
||
UC_TABLE_2[
|
||
(UC_TABLE_1[WordRec(ACodePoint).Hi] * 256) +
|
||
WordRec(ACodePoint).Lo
|
||
]
|
||
];}
|
||
end;
|
||
|
||
function GetProps(const AHighS, ALowS : UnicodeChar): PUC_Prop;overload;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
Result:=
|
||
@UC_PROP_ARRAY[
|
||
UCO_TABLE_3[
|
||
UCO_TABLE_2[UCO_TABLE_1[Word(AHighS)-HIGH_SURROGATE_BEGIN]]
|
||
[(Word(ALowS) - LOW_SURROGATE_BEGIN) div 32]
|
||
][(Word(ALowS) - LOW_SURROGATE_BEGIN) mod 32]
|
||
]; {
|
||
Result:=
|
||
@UC_PROP_ARRAY[
|
||
UCO_TABLE_2[
|
||
(UCO_TABLE_1[Word(AHighS)-HIGH_SURROGATE_BEGIN] * HIGH_SURROGATE_COUNT) +
|
||
Word(ALowS) - LOW_SURROGATE_BEGIN
|
||
]
|
||
]; }
|
||
end;
|
||
|
||
function GetProps(const ACodePoint : Cardinal) : PUC_Prop;inline;
|
||
var
|
||
l, h : UnicodeChar;
|
||
begin
|
||
if (ACodePoint <= High(Word)) then
|
||
exit(GetProps(Word(ACodePoint)));
|
||
FromUCS4(ACodePoint,h,l);
|
||
Result := GetProps(h,l);
|
||
end;
|
||
|
||
function UnicodeToUpper(
|
||
const AString : UnicodeString;
|
||
const AIgnoreInvalidSequence : Boolean;
|
||
out AResultString : UnicodeString
|
||
) : Integer;
|
||
var
|
||
i, c : SizeInt;
|
||
pp, pr : PUnicodeChar;
|
||
pu : PUC_Prop;
|
||
locIsSurrogate : Boolean;
|
||
r : UnicodeString;
|
||
begin
|
||
c := Length(AString);
|
||
SetLength(r,2*c);
|
||
if (c > 0) then begin
|
||
pp := @AString[1];
|
||
pr := @r[1];
|
||
i := 1;
|
||
while (i <= c) do begin
|
||
pu := GetProps(Word(pp^));
|
||
locIsSurrogate := (pu^.Category = UGC_Surrogate);
|
||
if locIsSurrogate then begin
|
||
if (i = c) or not(UnicodeIsSurrogatePair(pp[0],pp[1])) then begin
|
||
if AIgnoreInvalidSequence then begin
|
||
pr^ := pp^;
|
||
Inc(pp);
|
||
Inc(pr);
|
||
Inc(i);
|
||
Continue;
|
||
end;
|
||
exit(ERROR_INVALID_CODEPOINT_SEQUENCE);
|
||
end;
|
||
pu := GetProps(pp^,AString[i+1]);
|
||
end;
|
||
if (pu^.SimpleUpperCase = 0) then begin
|
||
pr^ := pp^;
|
||
if locIsSurrogate then begin
|
||
Inc(pp);
|
||
Inc(pr);
|
||
Inc(i);
|
||
pr^ := pp^;
|
||
end;
|
||
end else begin
|
||
if (pu^.SimpleUpperCase <= $FFFF) then begin
|
||
pr^ := UnicodeChar(Word(pu^.SimpleUpperCase));
|
||
end else begin
|
||
FromUCS4(UCS4Char(Cardinal(pu^.SimpleUpperCase)),pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
|
||
Inc(pr);
|
||
end;
|
||
if locIsSurrogate then begin
|
||
Inc(pp);
|
||
Inc(i);
|
||
end;
|
||
end;
|
||
Inc(pp);
|
||
Inc(pr);
|
||
Inc(i);
|
||
end;
|
||
Dec(pp);
|
||
i := ((PtrUInt(pr) - PtrUInt(@r[1])) div SizeOf(UnicodeChar));
|
||
SetLength(r,i);
|
||
AResultString := r;
|
||
end;
|
||
Result := 0;
|
||
end;
|
||
|
||
function UnicodeToLower(
|
||
const AString : UnicodeString;
|
||
const AIgnoreInvalidSequence : Boolean;
|
||
out AResultString : UnicodeString
|
||
) : Integer;
|
||
var
|
||
i, c : SizeInt;
|
||
pp, pr : PUnicodeChar;
|
||
pu : PUC_Prop;
|
||
locIsSurrogate : Boolean;
|
||
r : UnicodeString;
|
||
begin
|
||
c := Length(AString);
|
||
SetLength(r,2*c);
|
||
if (c > 0) then begin
|
||
pp := @AString[1];
|
||
pr := @r[1];
|
||
i := 1;
|
||
while (i <= c) do begin
|
||
pu := GetProps(Word(pp^));
|
||
locIsSurrogate := (pu^.Category = UGC_Surrogate);
|
||
if locIsSurrogate then begin
|
||
if (i = c) or not(UnicodeIsSurrogatePair(pp[0],pp[1])) then begin
|
||
if AIgnoreInvalidSequence then begin
|
||
pr^ := pp^;
|
||
Inc(pp);
|
||
Inc(pr);
|
||
Inc(i);
|
||
Continue;
|
||
end;
|
||
exit(ERROR_INVALID_CODEPOINT_SEQUENCE);
|
||
end;
|
||
pu := GetProps(pp^,AString[i+1]);
|
||
end;
|
||
if (pu^.SimpleLowerCase = 0) then begin
|
||
pr^ := pp^;
|
||
if locIsSurrogate then begin
|
||
Inc(pp);
|
||
Inc(pr);
|
||
Inc(i);
|
||
pr^ := pp^;
|
||
end;
|
||
end else begin
|
||
if (pu^.SimpleLowerCase <= $FFFF) then begin
|
||
pr^ := UnicodeChar(Word(pu^.SimpleLowerCase));
|
||
end else begin
|
||
FromUCS4(UCS4Char(Cardinal(pu^.SimpleLowerCase)),pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
|
||
Inc(pr);
|
||
end;
|
||
if locIsSurrogate then begin
|
||
Inc(pp);
|
||
Inc(i);
|
||
end;
|
||
end;
|
||
Inc(pp);
|
||
Inc(pr);
|
||
Inc(i);
|
||
end;
|
||
Dec(pp);
|
||
i := ((PtrUInt(pr) - PtrUInt(@r[1])) div SizeOf(UnicodeChar));
|
||
SetLength(r,i);
|
||
AResultString := r;
|
||
end;
|
||
Result := 0;
|
||
end;
|
||
|
||
//----------------------------------------------------------------------
|
||
function DecomposeHangul(const AChar : Cardinal; ABuffer : PCardinal) : Integer;
|
||
const
|
||
SBase = $AC00;
|
||
LBase = $1100;
|
||
VBase = $1161;
|
||
TBase = $11A7;
|
||
LCount = 19;
|
||
VCount = 21;
|
||
TCount = 28;
|
||
NCount = VCount * TCount; // 588
|
||
SCount = LCount * NCount; // 11172
|
||
var
|
||
SIndex, L, V, T : Integer;
|
||
begin
|
||
SIndex := AChar - SBase;
|
||
if (SIndex < 0) or (SIndex >= SCount) then begin
|
||
ABuffer^ := AChar;
|
||
exit(1);
|
||
end;
|
||
L := LBase + SIndex div NCount;
|
||
V := VBase + (SIndex mod NCount) div TCount;
|
||
T := TBase + SIndex mod TCount;
|
||
ABuffer[0] := L;
|
||
ABuffer[1] := V;
|
||
Result := 2;
|
||
if (T <> TBase) then begin
|
||
ABuffer[2] := T;
|
||
Inc(Result);
|
||
end;
|
||
end;
|
||
|
||
function Decompose(const ADecomposeIndex : Integer; ABuffer : PUnicodeChar) : Integer;
|
||
var
|
||
locStack : array[0..23] of Cardinal;
|
||
locStackIdx : Integer;
|
||
ResultBuffer : array[0..23] of Cardinal;
|
||
ResultIdx : Integer;
|
||
|
||
procedure AddCompositionToStack(const AIndex : Integer);
|
||
var
|
||
pdecIdx : ^TDecompositionIndexRec;
|
||
k, kc : Integer;
|
||
pu : ^UInt24;
|
||
begin
|
||
pdecIdx := @(UC_DEC_BOOK_DATA.Index[AIndex]);
|
||
pu := @(UC_DEC_BOOK_DATA.CodePoints[pdecIdx^.S]);
|
||
kc := pdecIdx^.L;
|
||
Inc(pu,kc);
|
||
for k := 1 to kc do begin
|
||
Dec(pu);
|
||
locStack[locStackIdx + k] := pu^;
|
||
end;
|
||
locStackIdx := locStackIdx + kc;
|
||
end;
|
||
|
||
procedure AddResult(const AChar : Cardinal);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
Inc(ResultIdx);
|
||
ResultBuffer[ResultIdx] := AChar;
|
||
end;
|
||
|
||
function PopStack() : Cardinal;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
Result := locStack[locStackIdx];
|
||
Dec(locStackIdx);
|
||
end;
|
||
|
||
var
|
||
cu : Cardinal;
|
||
decIdx : SmallInt;
|
||
locIsWord : Boolean;
|
||
i : Integer;
|
||
p : PUnicodeChar;
|
||
begin
|
||
ResultIdx := -1;
|
||
locStackIdx := -1;
|
||
AddCompositionToStack(ADecomposeIndex);
|
||
while (locStackIdx >= 0) do begin
|
||
cu := PopStack();
|
||
locIsWord := (cu <= MAX_WORD);
|
||
if locIsWord then
|
||
decIdx := GetProps(Word(cu))^.DecompositionID
|
||
else
|
||
decIdx := GetProps(cu)^.DecompositionID;
|
||
if (decIdx = -1) then
|
||
AddResult(cu)
|
||
else
|
||
AddCompositionToStack(decIdx);
|
||
end;
|
||
p := ABuffer;
|
||
Result := 0;
|
||
for i := 0 to ResultIdx do begin
|
||
cu := ResultBuffer[i];
|
||
if (cu <= MAX_WORD) then begin
|
||
p[0] := UnicodeChar(Word(cu));
|
||
Inc(p);
|
||
end else begin
|
||
FromUCS4(cu,p[0],p[1]);
|
||
Inc(p,2);
|
||
Inc(Result);
|
||
end;
|
||
end;
|
||
Result := Result + ResultIdx + 1;
|
||
end;
|
||
|
||
procedure CanonicalOrder(var AString : UnicodeString);
|
||
begin
|
||
CanonicalOrder(@AString[1],Length(AString));
|
||
end;
|
||
|
||
procedure CanonicalOrder(AStr : PUnicodeChar; const ALength : SizeInt);
|
||
var
|
||
i, c : SizeInt;
|
||
p, q : PUnicodeChar;
|
||
locIsSurrogateP, locIsSurrogateQ : Boolean;
|
||
|
||
procedure Swap();
|
||
var
|
||
t, t1 : UnicodeChar;
|
||
begin
|
||
if not locIsSurrogateP then begin
|
||
if not locIsSurrogateQ then begin
|
||
t := p^;
|
||
p^ := q^;
|
||
q^ := t;
|
||
exit;
|
||
end;
|
||
t := p^;
|
||
p[0] := q[0];
|
||
p[1] := q[1];
|
||
q[1] := t;
|
||
exit;
|
||
end;
|
||
if not locIsSurrogateQ then begin
|
||
t := q[0];
|
||
p[2] := p[1];
|
||
p[1] := p[0];
|
||
p[0] := t;
|
||
exit;
|
||
end;
|
||
t := p[0];
|
||
t1 := p[1];
|
||
p[0] := q[0];
|
||
p[1] := q[1];
|
||
q[0] := t;
|
||
q[1] := t1;
|
||
end;
|
||
|
||
var
|
||
pu : PUC_Prop;
|
||
cccp, cccq : Byte;
|
||
begin
|
||
c := ALength;
|
||
if (c < 2) then
|
||
exit;
|
||
p := AStr;
|
||
i := 1;
|
||
while (i < c) do begin
|
||
pu := GetProps(Word(p^));
|
||
locIsSurrogateP := (pu^.Category = UGC_Surrogate);
|
||
if locIsSurrogateP then begin
|
||
if (i = (c - 1)) then
|
||
Break;
|
||
if not UnicodeIsSurrogatePair(p[0],p[1]) then begin
|
||
Inc(p);
|
||
Inc(i);
|
||
Continue;
|
||
end;
|
||
pu := GetProps(p[0],p[1]);
|
||
end;
|
||
if (pu^.C3 > 0) then begin
|
||
cccp := pu^.C3;
|
||
if locIsSurrogateP then
|
||
q := p + 2
|
||
else
|
||
q := p + 1;
|
||
pu := GetProps(Word(q^));
|
||
locIsSurrogateQ := (pu^.Category = UGC_Surrogate);
|
||
if locIsSurrogateQ then begin
|
||
if (i = c) then
|
||
Break;
|
||
if not UnicodeIsSurrogatePair(q[0],q[1]) then begin
|
||
Inc(p);
|
||
Inc(i);
|
||
Continue;
|
||
end;
|
||
pu := GetProps(q[0],q[1]);
|
||
end;
|
||
cccq := pu^.C3;
|
||
if (cccq > 0) and (cccp > cccq) then begin
|
||
Swap();
|
||
if (i > 1) then begin
|
||
Dec(p);
|
||
Dec(i);
|
||
pu := GetProps(Word(p^));
|
||
if (pu^.Category = UGC_Surrogate) then begin
|
||
if (i > 1) then begin
|
||
Dec(p);
|
||
Dec(i);
|
||
end;
|
||
end;
|
||
Continue;
|
||
end;
|
||
end;
|
||
end;
|
||
if locIsSurrogateP then begin
|
||
Inc(p);
|
||
Inc(i);
|
||
end;
|
||
Inc(p);
|
||
Inc(i);
|
||
end;
|
||
end;
|
||
|
||
//Canonical Decomposition
|
||
function NormalizeNFD(const AString : UnicodeString) : UnicodeString;
|
||
begin
|
||
Result := NormalizeNFD(@AString[1],Length(AString));
|
||
end;
|
||
|
||
function NormalizeNFD(const AStr : PUnicodeChar; ALength : SizeInt) : UnicodeString;
|
||
const MAX_EXPAND = 3;
|
||
var
|
||
i, c, kc, k : SizeInt;
|
||
pp, pr : PUnicodeChar;
|
||
pu : PUC_Prop;
|
||
locIsSurrogate : Boolean;
|
||
cpArray : array[0..7] of Cardinal;
|
||
cp : Cardinal;
|
||
begin
|
||
c := ALength;
|
||
SetLength(Result,(MAX_EXPAND*c));
|
||
if (c > 0) then begin
|
||
pp := AStr;
|
||
pr := @Result[1];
|
||
i := 1;
|
||
while (i <= c) do begin
|
||
pu := GetProps(Word(pp^));
|
||
locIsSurrogate := (pu^.Category = UGC_Surrogate);
|
||
if locIsSurrogate then begin
|
||
if (i = c) then
|
||
Break;
|
||
if not UnicodeIsSurrogatePair(pp[0],pp[1]) then begin
|
||
pr^ := pp^;
|
||
Inc(pp);
|
||
Inc(pr);
|
||
Inc(i);
|
||
Continue;
|
||
end;
|
||
pu := GetProps(pp[0],pp[1]);
|
||
end;
|
||
if pu^.HangulSyllable then begin
|
||
if locIsSurrogate then begin
|
||
cp := ToUCS4(pp[0],pp[1]);
|
||
Inc(pp);
|
||
Inc(i);
|
||
end else begin
|
||
cp := Word(pp^);
|
||
end;
|
||
kc := DecomposeHangul(cp,@cpArray[0]);
|
||
for k := 0 to kc - 1 do begin
|
||
if (cpArray[k] <= MAX_WORD) then begin
|
||
pr^ := UnicodeChar(Word(cpArray[k]));
|
||
pr := pr + 1;
|
||
end else begin
|
||
FromUCS4(cpArray[k],pr[0],pr[1]);
|
||
pr := pr + 2;
|
||
end;
|
||
end;
|
||
if (kc > 0) then
|
||
Dec(pr);
|
||
end else begin
|
||
if (pu^.DecompositionID = -1) then begin
|
||
pr^ := pp^;
|
||
if locIsSurrogate then begin
|
||
Inc(pp);
|
||
Inc(pr);
|
||
Inc(i);
|
||
pr^ := pp^;
|
||
end;
|
||
end else begin
|
||
k := Decompose(pu^.DecompositionID,pr);
|
||
pr := pr + (k - 1);
|
||
if locIsSurrogate then begin
|
||
Inc(pp);
|
||
Inc(i);
|
||
end;
|
||
end;
|
||
end;
|
||
Inc(pp);
|
||
Inc(pr);
|
||
Inc(i);
|
||
end;
|
||
Dec(pp);
|
||
i := ((PtrUInt(pr) - PtrUInt(@Result[1])) div SizeOf(UnicodeChar));
|
||
SetLength(Result,i);
|
||
CanonicalOrder(@Result[1],Length(Result));
|
||
end;
|
||
end;
|
||
|
||
{ TUCA_PropItemContextTreeNodeRec }
|
||
|
||
function TUCA_PropItemContextTreeNodeRec.GetLeftNode: PUCA_PropItemContextTreeNodeRec;
|
||
begin
|
||
if (Self.Left = 0) then
|
||
Result := nil
|
||
else
|
||
Result := PUCA_PropItemContextTreeNodeRec(PtrUInt(@Self) + Self.Left);
|
||
end;
|
||
|
||
function TUCA_PropItemContextTreeNodeRec.GetRightNode: PUCA_PropItemContextTreeNodeRec;
|
||
begin
|
||
if (Self.Right = 0) then
|
||
Result := nil
|
||
else
|
||
Result := PUCA_PropItemContextTreeNodeRec(PtrUInt(@Self) + Self.Right);
|
||
end;
|
||
|
||
{ TUCA_PropItemContextRec }
|
||
|
||
function TUCA_PropItemContextRec.GetCodePoints() : PUInt24;
|
||
begin
|
||
Result := PUInt24(
|
||
PtrUInt(@Self) + SizeOf(Self.CodePointCount) +
|
||
SizeOf(Self.WeightCount)
|
||
);
|
||
end;
|
||
|
||
function TUCA_PropItemContextRec.GetWeights: PUCA_PropWeights;
|
||
begin
|
||
Result := PUCA_PropWeights(
|
||
PtrUInt(@Self) +
|
||
SizeOf(Self.CodePointCount) + SizeOf(Self.WeightCount) +
|
||
(Self.CodePointCount*SizeOf(UInt24))
|
||
);
|
||
end;
|
||
|
||
{ TUCA_PropItemContextTreeRec }
|
||
|
||
function TUCA_PropItemContextTreeRec.GetData: PUCA_PropItemContextTreeNodeRec;
|
||
begin
|
||
if (Size = 0) then
|
||
Result := nil
|
||
else
|
||
Result := PUCA_PropItemContextTreeNodeRec(
|
||
PtrUInt(
|
||
PtrUInt(@Self) + SizeOf(UInt24){Size}
|
||
)
|
||
);
|
||
end;
|
||
|
||
function CompareCodePoints(
|
||
A : PUInt24; LA : Integer;
|
||
B : PUInt24; LB : Integer
|
||
) : Integer;
|
||
var
|
||
i, hb : Integer;
|
||
begin
|
||
if (A = B) then
|
||
exit(0);
|
||
Result := 1;
|
||
hb := LB - 1;
|
||
for i := 0 to LA - 1 do begin
|
||
if (i > hb) then
|
||
exit;
|
||
if (A[i] < B[i]) then
|
||
exit(-1);
|
||
if (A[i] > B[i]) then
|
||
exit(1);
|
||
end;
|
||
if (LA = LB) then
|
||
exit(0);
|
||
exit(-1);
|
||
end;
|
||
|
||
function TUCA_PropItemContextTreeRec.Find(
|
||
const AChars : PUInt24;
|
||
const ACharCount : Integer;
|
||
out ANode : PUCA_PropItemContextTreeNodeRec
|
||
) : Boolean;
|
||
var
|
||
t : PUCA_PropItemContextTreeNodeRec;
|
||
begin
|
||
t := Data;
|
||
while (t <> nil) do begin
|
||
case CompareCodePoints(AChars,ACharCount,t^.Data.GetCodePoints(),t^.Data.CodePointCount) of
|
||
0 : Break;
|
||
-1 : t := t^.GetLeftNode();
|
||
else
|
||
t := t^.GetRightNode();
|
||
end;
|
||
end;
|
||
Result := (t <> nil);
|
||
if Result then
|
||
ANode := t;
|
||
end;
|
||
|
||
{ TUC_Prop }
|
||
|
||
function TUC_Prop.GetCategory: Byte;
|
||
begin
|
||
Result := Byte((C and Byte($F8)) shr 3);
|
||
end;
|
||
|
||
function TUC_Prop.GetNumericValue: Double;
|
||
begin
|
||
Result := UC_NUMERIC_ARRAY[NumericIndex];
|
||
end;
|
||
|
||
function TUC_Prop.GetUnifiedIdeograph : Boolean;
|
||
begin
|
||
Result := IsBitON(C,2);
|
||
end;
|
||
|
||
procedure TUC_Prop.SetCategory(AValue: Byte);
|
||
begin
|
||
C := Byte(C or Byte(AValue shl 3));
|
||
end;
|
||
|
||
function TUC_Prop.GetWhiteSpace: Boolean;
|
||
begin
|
||
Result := IsBitON(C,0);
|
||
end;
|
||
|
||
procedure TUC_Prop.SetWhiteSpace(AValue: Boolean);
|
||
begin
|
||
SetBit(C,0,AValue);
|
||
end;
|
||
|
||
function TUC_Prop.GetHangulSyllable: Boolean;
|
||
begin
|
||
Result := IsBitON(C,1);
|
||
end;
|
||
|
||
procedure TUC_Prop.SetHangulSyllable(AValue: Boolean);
|
||
begin
|
||
SetBit(C,1,AValue);
|
||
end;
|
||
|
||
{ TUCA_DataBook }
|
||
|
||
function TUCA_DataBook.IsVariable(const AWeight: PUCA_PropWeights): Boolean;
|
||
begin
|
||
Result := (AWeight^.Weights[0] >= Self.VariableLowLimit) and
|
||
(AWeight^.Weights[0] <= Self.VariableHighLimit);
|
||
end;
|
||
|
||
{ TUCA_PropItemRec }
|
||
|
||
function TUCA_PropItemRec.IsWeightCompress_1 : Boolean;
|
||
begin
|
||
Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_1);
|
||
end;
|
||
|
||
function TUCA_PropItemRec.IsWeightCompress_2 : Boolean;
|
||
begin
|
||
Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_2);
|
||
end;
|
||
|
||
function TUCA_PropItemRec.GetCodePoint() : UInt24;
|
||
begin
|
||
if HasCodePoint() then begin
|
||
if Contextual then
|
||
Result := {$IFDEF HAS_UNALIGNED}Unaligned{$ENDIF}(
|
||
PUInt24(
|
||
PtrUInt(@Self) + Self.GetSelfOnlySize()- SizeOf(UInt24) -
|
||
Cardinal(GetContext()^.Size)
|
||
)^
|
||
)
|
||
else
|
||
Result := {$IFDEF HAS_UNALIGNED}Unaligned{$ENDIF}(
|
||
PUInt24(PtrUInt(@Self) + Self.GetSelfOnlySize() - SizeOf(UInt24))^
|
||
)
|
||
end else begin
|
||
{$ifdef uni_debug}
|
||
raise EUnicodeException.Create('TUCA_PropItemRec.GetCodePoint : "No code point available."');
|
||
{$else uni_debug}
|
||
Result := ZERO_UINT24;
|
||
{$endif uni_debug}
|
||
end
|
||
end;
|
||
|
||
function TUCA_PropItemRec.HasCodePoint() : Boolean;
|
||
begin
|
||
Result := IsBitON(Flags,FLAG_CODEPOINT);
|
||
end;
|
||
|
||
function TUCA_PropItemRec.IsValid() : Boolean;
|
||
begin
|
||
Result := IsBitON(Flags,FLAG_VALID);
|
||
end;
|
||
|
||
{function TUCA_PropItemRec.GetWeightArray: PUCA_PropWeights;
|
||
begin
|
||
Result := PUCA_PropWeights(PtrUInt(@Self) + SizeOf(TUCA_PropItemRec));
|
||
end;}
|
||
|
||
procedure TUCA_PropItemRec.GetWeightArray(ADest: PUCA_PropWeights);
|
||
var
|
||
c : Integer;
|
||
p : PByte;
|
||
pd : PUCA_PropWeights;
|
||
begin
|
||
c := WeightLength;
|
||
p := PByte(PtrUInt(@Self) + SizeOf(TUCA_PropItemRec));
|
||
pd := ADest;
|
||
pd^.Weights[0] := {$IFDEF HAS_UNALIGNED}Unaligned{$ENDIF}(PWord(p)^);
|
||
p := p + 2;
|
||
if not IsWeightCompress_1() then begin
|
||
pd^.Weights[1] := {$IFDEF HAS_UNALIGNED}Unaligned{$ENDIF}(PWord(p)^);
|
||
p := p + 2;
|
||
end else begin
|
||
pd^.Weights[1] := p^;
|
||
p := p + 1;
|
||
end;
|
||
if not IsWeightCompress_2() then begin
|
||
pd^.Weights[2] := {$IFDEF HAS_UNALIGNED}Unaligned{$ENDIF}(PWord(p)^);
|
||
p := p + 2;
|
||
end else begin
|
||
pd^.Weights[2] := p^;
|
||
p := p + 1;
|
||
end;
|
||
if (c > 1) then
|
||
Move(p^, (pd+1)^, ((c-1)*SizeOf(TUCA_PropWeights)));
|
||
end;
|
||
|
||
function TUCA_PropItemRec.GetSelfOnlySize() : Cardinal;
|
||
begin
|
||
Result := SizeOf(TUCA_PropItemRec);
|
||
if (WeightLength > 0) then begin
|
||
Result := Result + (WeightLength * Sizeof(TUCA_PropWeights));
|
||
if IsWeightCompress_1() then
|
||
Result := Result - 1;
|
||
if IsWeightCompress_2() then
|
||
Result := Result - 1;
|
||
end;
|
||
if HasCodePoint() then
|
||
Result := Result + SizeOf(UInt24);
|
||
if Contextual then
|
||
Result := Result + Cardinal(GetContext()^.Size);
|
||
end;
|
||
|
||
function TUCA_PropItemRec.GetContextual: Boolean;
|
||
begin
|
||
Result := IsBitON(Flags,FLAG_CONTEXTUAL);
|
||
end;
|
||
|
||
function TUCA_PropItemRec.GetContext: PUCA_PropItemContextTreeRec;
|
||
var
|
||
p : PtrUInt;
|
||
begin
|
||
if not Contextual then
|
||
exit(nil);
|
||
p := PtrUInt(@Self) + SizeOf(TUCA_PropItemRec);
|
||
if IsBitON(Flags,FLAG_CODEPOINT) then
|
||
p := p + SizeOf(UInt24);
|
||
Result := PUCA_PropItemContextTreeRec(p);
|
||
end;
|
||
|
||
function TUCA_PropItemRec.IsDeleted() : Boolean;
|
||
begin
|
||
Result := IsBitON(Flags,FLAG_DELETION);
|
||
end;
|
||
|
||
|
||
function GetPropUCA(const AChar : UnicodeChar; const ABook : PUCA_DataBook) : PUCA_PropItemRec;
|
||
var
|
||
i : Cardinal;
|
||
begin
|
||
if (ABook^.BMP_Table2 = nil) then
|
||
exit(nil);
|
||
i := PUInt24(
|
||
PtrUInt(ABook^.BMP_Table2) +
|
||
( ((ABook^.BMP_Table1[Hi(Word(AChar))] * 256) + Lo(Word(AChar))) *
|
||
SizeOf(UInt24)
|
||
)
|
||
)^;
|
||
{i := ABook^.BMP_Table2[
|
||
(ABook^.BMP_Table1[Hi(Word(AChar))] * 256) +
|
||
Lo(Word(AChar))
|
||
];}
|
||
if (i > 0) then
|
||
Result:= PUCA_PropItemRec(PtrUInt(ABook^.Props) + i - 1)
|
||
else
|
||
Result := nil;
|
||
end;
|
||
|
||
function GetPropUCA(const AHighS, ALowS : UnicodeChar; const ABook : PUCA_DataBook): PUCA_PropItemRec;
|
||
var
|
||
i : Cardinal;
|
||
begin
|
||
if (ABook^.OBMP_Table2 = nil) then
|
||
exit(nil);
|
||
i := PUInt24(
|
||
PtrUInt(ABook^.OBMP_Table2) +
|
||
( (ABook^.OBMP_Table1[Word(AHighS)-HIGH_SURROGATE_BEGIN] * HIGH_SURROGATE_COUNT) +
|
||
Word(ALowS) - LOW_SURROGATE_BEGIN
|
||
) *
|
||
SizeOf(UInt24)
|
||
)^;
|
||
{i := ABook^.OBMP_Table2[
|
||
(ABook^.OBMP_Table1[Word(AHighS)-HIGH_SURROGATE_BEGIN] * HIGH_SURROGATE_COUNT) +
|
||
Word(ALowS) - LOW_SURROGATE_BEGIN
|
||
]; }
|
||
if (i > 0) then
|
||
Result:= PUCA_PropItemRec(PtrUInt(ABook^.Props) + i - 1)
|
||
else
|
||
Result := nil;
|
||
end;
|
||
|
||
{$UNDEF UNI_BUILD_TIME}
|
||
{$include weight_derivation.inc}
|
||
function CompareSortKey(const A : TUCASortKey; const B : array of Word) : Integer;
|
||
var
|
||
bb : TUCASortKey;
|
||
begin
|
||
SetLength(bb,Length(B));
|
||
if (Length(bb) > 0) then
|
||
Move(B[0],bb[0],(Length(bb)*SizeOf(B[0])));
|
||
Result := CompareSortKey(A,bb);
|
||
end;
|
||
|
||
function CompareSortKey(const A, B : TUCASortKey) : Integer;
|
||
var
|
||
i, hb : Integer;
|
||
begin
|
||
if (Pointer(A) = Pointer(B)) then
|
||
exit(0);
|
||
Result := 1;
|
||
hb := Length(B) - 1;
|
||
for i := 0 to Length(A) - 1 do begin
|
||
if (i > hb) then
|
||
exit;
|
||
if (A[i] < B[i]) then
|
||
exit(-1);
|
||
if (A[i] > B[i]) then
|
||
exit(1);
|
||
end;
|
||
if (Length(A) = Length(B)) then
|
||
exit(0);
|
||
exit(-1);
|
||
end;
|
||
|
||
type
|
||
TUCA_PropWeightsArray = array of TUCA_PropWeights;
|
||
function FormKeyBlanked(const ACEList : TUCA_PropWeightsArray; const ACollation : PUCA_DataBook) : TUCASortKey;
|
||
var
|
||
r : TUCASortKey;
|
||
i, c, k, ral, levelCount : Integer;
|
||
pce : PUCA_PropWeights;
|
||
begin
|
||
c := Length(ACEList);
|
||
if (c = 0) then
|
||
exit(nil);
|
||
levelCount := Length(ACEList[0].Weights);
|
||
if (ACollation^.ComparisonStrength > 0) and
|
||
(ACollation^.ComparisonStrength < levelCount)
|
||
then begin
|
||
levelCount := ACollation^.ComparisonStrength;
|
||
end;
|
||
SetLength(r,(levelCount*c + levelCount));
|
||
ral := 0;
|
||
for i := 0 to levelCount - 1 do begin
|
||
if not ACollation^.Backwards[i] then begin
|
||
pce := @ACEList[0];
|
||
for k := 0 to c - 1 do begin
|
||
if not(ACollation^.IsVariable(pce)) and (pce^.Weights[i] <> 0) then begin
|
||
r[ral] := pce^.Weights[i];
|
||
ral := ral + 1;
|
||
end;
|
||
pce := pce + 1;
|
||
end;
|
||
end else begin
|
||
pce := @ACEList[c-1];
|
||
for k := 0 to c - 1 do begin
|
||
if not(ACollation^.IsVariable(pce)) and (pce^.Weights[i] <> 0) then begin
|
||
r[ral] := pce^.Weights[i];
|
||
ral := ral + 1;
|
||
end;
|
||
pce := pce - 1;
|
||
end;
|
||
end;
|
||
r[ral] := 0;
|
||
ral := ral + 1;
|
||
end;
|
||
ral := ral - 1;
|
||
SetLength(r,ral);
|
||
Result := r;
|
||
end;
|
||
|
||
function FormKeyNonIgnorable(const ACEList : TUCA_PropWeightsArray; const ACollation : PUCA_DataBook) : TUCASortKey;
|
||
var
|
||
r : TUCASortKey;
|
||
i, c, k, ral, levelCount : Integer;
|
||
pce : PUCA_PropWeights;
|
||
begin
|
||
c := Length(ACEList);
|
||
if (c = 0) then
|
||
exit(nil);
|
||
levelCount := Length(ACEList[0].Weights);
|
||
if (ACollation^.ComparisonStrength > 0) and
|
||
(ACollation^.ComparisonStrength < levelCount)
|
||
then begin
|
||
levelCount := ACollation^.ComparisonStrength;
|
||
end;
|
||
SetLength(r,(levelCount*c + levelCount));
|
||
ral := 0;
|
||
for i := 0 to levelCount - 1 do begin
|
||
if not ACollation^.Backwards[i] then begin
|
||
pce := @ACEList[0];
|
||
for k := 0 to c - 1 do begin
|
||
if (pce^.Weights[i] <> 0) then begin
|
||
r[ral] := pce^.Weights[i];
|
||
ral := ral + 1;
|
||
end;
|
||
pce := pce + 1;
|
||
end;
|
||
end else begin
|
||
pce := @ACEList[c-1];
|
||
for k := 0 to c - 1 do begin
|
||
if (pce^.Weights[i] <> 0) then begin
|
||
r[ral] := pce^.Weights[i];
|
||
ral := ral + 1;
|
||
end;
|
||
pce := pce - 1;
|
||
end;
|
||
end;
|
||
r[ral] := 0;
|
||
ral := ral + 1;
|
||
end;
|
||
ral := ral - 1;
|
||
SetLength(r,ral);
|
||
Result := r;
|
||
end;
|
||
|
||
function FormKeyShifted(const ACEList : TUCA_PropWeightsArray; const ACollation : PUCA_DataBook) : TUCASortKey;
|
||
var
|
||
r : TUCASortKey;
|
||
i, c, k, ral, levelCount : Integer;
|
||
pce : PUCA_PropWeights;
|
||
variableState : Boolean;
|
||
begin
|
||
c := Length(ACEList);
|
||
if (c = 0) then
|
||
exit(nil);
|
||
levelCount := Length(ACEList[0].Weights);
|
||
if (ACollation^.ComparisonStrength > 0) and
|
||
(ACollation^.ComparisonStrength < levelCount)
|
||
then begin
|
||
levelCount := ACollation^.ComparisonStrength;
|
||
end;
|
||
SetLength(r,(levelCount*c + levelCount));
|
||
ral := 0;
|
||
variableState := False;
|
||
for i := 0 to levelCount - 1 do begin
|
||
if not ACollation^.Backwards[i] then begin
|
||
variableState := False;
|
||
pce := @ACEList[0];
|
||
for k := 0 to c - 1 do begin
|
||
if not ACollation^.IsVariable(pce) then begin
|
||
if (pce^.Weights[0] <> 0) then
|
||
variableState := False;
|
||
if (pce^.Weights[i] <> 0) and not(variableState) then begin
|
||
r[ral] := pce^.Weights[i];
|
||
ral := ral + 1;
|
||
end;
|
||
end else begin
|
||
variableState := True;
|
||
end;
|
||
pce := pce + 1;
|
||
end;
|
||
end else begin
|
||
pce := @ACEList[c-1];
|
||
for k := 0 to c - 1 do begin
|
||
if not ACollation^.IsVariable(pce) then begin
|
||
if (pce^.Weights[0] <> 0) then
|
||
variableState := False;
|
||
if (pce^.Weights[i] <> 0) and not(variableState) then begin
|
||
r[ral] := pce^.Weights[i];
|
||
ral := ral + 1;
|
||
end;
|
||
end else begin
|
||
variableState := True;
|
||
end;
|
||
pce := pce - 1;
|
||
end;
|
||
end;
|
||
r[ral] := 0;
|
||
ral := ral + 1;
|
||
end;
|
||
ral := ral - 1;
|
||
SetLength(r,ral);
|
||
Result := r;
|
||
end;
|
||
|
||
function FormKeyShiftedTrimmed(
|
||
const ACEList : TUCA_PropWeightsArray;
|
||
const ACollation : PUCA_DataBook
|
||
) : TUCASortKey;
|
||
var
|
||
i : Integer;
|
||
p : ^TUCASortKeyItem;
|
||
begin
|
||
Result := FormKeyShifted(ACEList,ACollation);
|
||
i := Length(Result) - 1;
|
||
if (i >= 0) then begin
|
||
p := @Result[i];
|
||
while (i >= 0) do begin
|
||
if (p^ <> $FFFF) then
|
||
Break;
|
||
Dec(i);
|
||
Dec(p);
|
||
end;
|
||
if ((i+1) < Length(Result)) then
|
||
SetLength(Result,(i+1));
|
||
end;
|
||
end;
|
||
|
||
function FindChild(
|
||
const ACodePoint : Cardinal;
|
||
const AParent : PUCA_PropItemRec
|
||
) : PUCA_PropItemRec;inline;
|
||
var
|
||
k : Integer;
|
||
begin
|
||
Result := PUCA_PropItemRec(PtrUInt(AParent) + AParent^.GetSelfOnlySize());
|
||
for k := 0 to AParent^.ChildCount - 1 do begin
|
||
if (ACodePoint = Result^.CodePoint) then
|
||
exit;
|
||
Result := PUCA_PropItemRec(PtrUInt(Result) + Result^.Size);
|
||
end;
|
||
Result := nil;
|
||
end;
|
||
|
||
function ComputeSortKey(
|
||
const AString : UnicodeString;
|
||
const ACollation : PUCA_DataBook
|
||
) : TUCASortKey;
|
||
begin
|
||
Result := ComputeSortKey(@AString[1],Length(AString),ACollation);
|
||
end;
|
||
|
||
function ComputeRawSortKey(
|
||
const AStr : PUnicodeChar;
|
||
const ALength : SizeInt;
|
||
const ACollation : PUCA_DataBook
|
||
) : TUCA_PropWeightsArray;
|
||
var
|
||
r : TUCA_PropWeightsArray;
|
||
ral {used length of "r"}: Integer;
|
||
rl {capacity of "r"} : Integer;
|
||
|
||
procedure GrowKey(const AMinGrow : Integer = 0);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
if (rl < AMinGrow) then
|
||
rl := rl + AMinGrow
|
||
else
|
||
rl := 2 * rl;
|
||
SetLength(r,rl);
|
||
end;
|
||
|
||
var
|
||
i : Integer;
|
||
s : UnicodeString;
|
||
psBase : PUnicodeChar;
|
||
ps : PUnicodeChar;
|
||
cp : Cardinal;
|
||
cl : PUCA_DataBook;
|
||
pp : PUCA_PropItemRec;
|
||
ppLevel : Byte;
|
||
removedCharIndex : array of DWord;
|
||
removedCharIndexLength : DWord;
|
||
locHistory : array[0..24] of record
|
||
i : Integer;
|
||
cl : PUCA_DataBook;
|
||
pp : PUCA_PropItemRec;
|
||
ppLevel : Byte;
|
||
cp : Cardinal;
|
||
removedCharIndexLength : DWord;
|
||
end;
|
||
locHistoryTop : Integer;
|
||
suppressState : record
|
||
cl : PUCA_DataBook;
|
||
CharCount : Integer;
|
||
end;
|
||
LastKeyOwner : record
|
||
Length : Integer;
|
||
Chars : array[0..24] of UInt24;
|
||
end;
|
||
|
||
procedure SaveKeyOwner();
|
||
var
|
||
k : Integer;
|
||
kppLevel : Byte;
|
||
begin
|
||
k := 0;
|
||
kppLevel := High(Byte);
|
||
while (k <= locHistoryTop) do begin
|
||
if (kppLevel <> locHistory[k].ppLevel) then begin
|
||
LastKeyOwner.Chars[k] := locHistory[k].cp;
|
||
kppLevel := locHistory[k].ppLevel;
|
||
end;
|
||
k := k + 1;
|
||
end;
|
||
if (k = 0) or (kppLevel <> ppLevel) then begin
|
||
LastKeyOwner.Chars[k] := cp;
|
||
k := k + 1;
|
||
end;
|
||
LastKeyOwner.Length := k;
|
||
end;
|
||
|
||
procedure AddWeights(AItem : PUCA_PropItemRec);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
SaveKeyOwner();
|
||
if ((ral + AItem^.WeightLength) > rl) then
|
||
GrowKey(AItem^.WeightLength);
|
||
AItem^.GetWeightArray(@r[ral]);
|
||
ral := ral + AItem^.WeightLength;
|
||
end;
|
||
|
||
procedure AddContextWeights(AItem : PUCA_PropItemContextRec);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
if ((ral + AItem^.WeightCount) > rl) then
|
||
GrowKey(AItem^.WeightCount);
|
||
Move(AItem^.GetWeights()^,r[ral],(AItem^.WeightCount*SizeOf(r[0])));
|
||
ral := ral + AItem^.WeightCount;
|
||
end;
|
||
|
||
procedure AddComputedWeights(ACodePoint : Cardinal);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
SaveKeyOwner();
|
||
if ((ral + 2) > rl) then
|
||
GrowKey();
|
||
DeriveWeight(ACodePoint,@r[ral]);
|
||
ral := ral + 2;
|
||
end;
|
||
|
||
procedure RecordDeletion();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
if pp^.IsValid() and pp^.IsDeleted() (*pp^.GetWeightLength() = 0*) then begin
|
||
if (suppressState.cl = nil) or
|
||
(suppressState.CharCount > ppLevel)
|
||
then begin
|
||
suppressState.cl := cl;
|
||
suppressState.CharCount := ppLevel;
|
||
end;
|
||
end;
|
||
end;
|
||
|
||
procedure RecordStep();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
Inc(locHistoryTop);
|
||
locHistory[locHistoryTop].i := i;
|
||
locHistory[locHistoryTop].cl := cl;
|
||
locHistory[locHistoryTop].pp := pp;
|
||
locHistory[locHistoryTop].ppLevel := ppLevel;
|
||
locHistory[locHistoryTop].cp := cp;
|
||
locHistory[locHistoryTop].removedCharIndexLength := removedCharIndexLength;
|
||
RecordDeletion();
|
||
end;
|
||
|
||
procedure ClearHistory();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
locHistoryTop := -1;
|
||
end;
|
||
|
||
function HasHistory() : Boolean;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
Result := (locHistoryTop >= 0);
|
||
end;
|
||
|
||
function GetHistoryLength() : Integer;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
Result := (locHistoryTop + 1);
|
||
end;
|
||
|
||
procedure GoBack();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
Assert(locHistoryTop >= 0);
|
||
i := locHistory[locHistoryTop].i;
|
||
cp := locHistory[locHistoryTop].cp;
|
||
cl := locHistory[locHistoryTop].cl;
|
||
pp := locHistory[locHistoryTop].pp;
|
||
ppLevel := locHistory[locHistoryTop].ppLevel;
|
||
removedCharIndexLength := locHistory[locHistoryTop].removedCharIndexLength;
|
||
ps := psBase + (i-1);
|
||
Dec(locHistoryTop);
|
||
end;
|
||
|
||
var
|
||
c : Integer;
|
||
lastUnblockedNonstarterCCC : Byte;
|
||
|
||
function IsUnblockedNonstarter(const AStartFrom : Integer) : Boolean;
|
||
var
|
||
k : DWord;
|
||
pk : PUnicodeChar;
|
||
puk : PUC_Prop;
|
||
begin
|
||
k := AStartFrom;
|
||
if (k > c) then
|
||
exit(False);
|
||
if (removedCharIndexLength>0) and
|
||
(IndexInArrayDWord(removedCharIndex,k) >= 0)
|
||
then begin
|
||
exit(False);
|
||
end;
|
||
{if (k = (i+1)) or
|
||
( (k = (i+2)) and UnicodeIsHighSurrogate(s[i]) )
|
||
then
|
||
lastUnblockedNonstarterCCC := 0;}
|
||
pk := psBase + k-1;
|
||
if UnicodeIsHighSurrogate(pk^) then begin
|
||
if (k = c) then
|
||
exit(False);
|
||
if UnicodeIsLowSurrogate(pk[1]) then
|
||
puk := GetProps(pk[0],pk[1])
|
||
else
|
||
puk := GetProps(Word(pk^));
|
||
end else begin
|
||
puk := GetProps(Word(pk^));
|
||
end;
|
||
if (puk^.C3 = 0) or (lastUnblockedNonstarterCCC >= puk^.C3) then
|
||
exit(False);
|
||
lastUnblockedNonstarterCCC := puk^.C3;
|
||
Result := True;
|
||
end;
|
||
|
||
procedure RemoveChar(APos : Integer);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
if (removedCharIndexLength >= Length(removedCharIndex)) then
|
||
SetLength(removedCharIndex,(2*removedCharIndexLength + 2));
|
||
removedCharIndex[removedCharIndexLength] := APos;
|
||
Inc(removedCharIndexLength);
|
||
if UnicodeIsHighSurrogate(psBase[APos]) and (APos < c) and UnicodeIsLowSurrogate(psBase[APos+1]) then begin
|
||
if (removedCharIndexLength >= Length(removedCharIndex)) then
|
||
SetLength(removedCharIndex,(2*removedCharIndexLength + 2));
|
||
removedCharIndex[removedCharIndexLength] := APos+1;
|
||
Inc(removedCharIndexLength);
|
||
end;
|
||
end;
|
||
|
||
procedure Inc_I();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
if (removedCharIndexLength = 0) then begin
|
||
Inc(i);
|
||
Inc(ps);
|
||
exit;
|
||
end;
|
||
while True do begin
|
||
Inc(i);
|
||
Inc(ps);
|
||
if (IndexInArrayDWord(removedCharIndex,i) = -1) then
|
||
Break;
|
||
end;
|
||
end;
|
||
|
||
var
|
||
surrogateState : Boolean;
|
||
|
||
function MoveToNextChar() : Boolean;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
Result := True;
|
||
if UnicodeIsHighSurrogate(ps[0]) then begin
|
||
if (i = c) then
|
||
exit(False);
|
||
if UnicodeIsLowSurrogate(ps[1]) then begin
|
||
surrogateState := True;
|
||
cp := ToUCS4(ps[0],ps[1]);
|
||
end else begin
|
||
surrogateState := False;
|
||
cp := Word(ps[0]);
|
||
end;
|
||
end else begin
|
||
surrogateState := False;
|
||
cp := Word(ps[0]);
|
||
end;
|
||
end;
|
||
|
||
procedure ClearPP(const AClearSuppressInfo : Boolean = True);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
cl := nil;
|
||
pp := nil;
|
||
ppLevel := 0;
|
||
if AClearSuppressInfo then begin
|
||
suppressState.cl := nil;
|
||
suppressState.CharCount := 0;
|
||
end;
|
||
end;
|
||
|
||
function FindPropUCA() : Boolean;
|
||
var
|
||
candidateCL : PUCA_DataBook;
|
||
begin
|
||
pp := nil;
|
||
if (cl = nil) then
|
||
candidateCL := ACollation
|
||
else
|
||
candidateCL := cl;
|
||
if surrogateState then begin
|
||
while (candidateCL <> nil) do begin
|
||
pp := GetPropUCA(ps[0],ps[1],candidateCL);
|
||
if (pp <> nil) then
|
||
break;
|
||
candidateCL := candidateCL^.Base;
|
||
end;
|
||
end else begin
|
||
while (candidateCL <> nil) do begin
|
||
pp := GetPropUCA(ps[0],candidateCL);
|
||
if (pp <> nil) then
|
||
break;
|
||
candidateCL := candidateCL^.Base;
|
||
end;
|
||
end;
|
||
cl := candidateCL;
|
||
Result := (pp <> nil);
|
||
end;
|
||
|
||
procedure AddWeightsAndClear();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
var
|
||
ctxNode : PUCA_PropItemContextTreeNodeRec;
|
||
begin
|
||
if (pp^.WeightLength > 0) then begin
|
||
AddWeights(pp);
|
||
end else
|
||
if (LastKeyOwner.Length > 0) and pp^.Contextual and
|
||
pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
|
||
(ctxNode^.Data.WeightCount > 0)
|
||
then begin
|
||
AddContextWeights(@ctxNode^.Data);
|
||
end;
|
||
//AddWeights(pp);
|
||
ClearHistory();
|
||
ClearPP();
|
||
end;
|
||
|
||
procedure StartMatch();
|
||
|
||
procedure HandleLastChar();
|
||
var
|
||
ctxNode : PUCA_PropItemContextTreeNodeRec;
|
||
begin
|
||
while True do begin
|
||
if pp^.IsValid() then begin
|
||
if (pp^.WeightLength > 0) then
|
||
AddWeights(pp)
|
||
else
|
||
if (LastKeyOwner.Length > 0) and pp^.Contextual and
|
||
pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
|
||
(ctxNode^.Data.WeightCount > 0)
|
||
then
|
||
AddContextWeights(@ctxNode^.Data)
|
||
else
|
||
AddComputedWeights(cp){handle deletion of code point};
|
||
break;
|
||
end;
|
||
if (cl^.Base = nil) then begin
|
||
AddComputedWeights(cp);
|
||
break;
|
||
end;
|
||
cl := cl^.Base;
|
||
if not FindPropUCA() then begin
|
||
AddComputedWeights(cp);
|
||
break;
|
||
end;
|
||
end;
|
||
end;
|
||
var
|
||
tmpCtxNode : PUCA_PropItemContextTreeNodeRec;
|
||
begin
|
||
ppLevel := 0;
|
||
if not FindPropUCA() then begin
|
||
AddComputedWeights(cp);
|
||
ClearHistory();
|
||
ClearPP();
|
||
end else begin
|
||
if (i = c) then begin
|
||
HandleLastChar();
|
||
end else begin
|
||
if pp^.IsValid()then begin
|
||
if (pp^.ChildCount = 0) then begin
|
||
if (pp^.WeightLength > 0) then
|
||
AddWeights(pp)
|
||
else
|
||
if (LastKeyOwner.Length > 0) and pp^.Contextual and
|
||
pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,tmpCtxNode) and
|
||
(tmpCtxNode^.Data.WeightCount > 0)
|
||
then
|
||
AddContextWeights(@tmpCtxNode^.Data)
|
||
else
|
||
AddComputedWeights(cp){handle deletion of code point};
|
||
ClearPP();
|
||
ClearHistory();
|
||
end else begin
|
||
RecordStep();
|
||
end
|
||
end else begin
|
||
if (pp^.ChildCount = 0) then begin
|
||
AddComputedWeights(cp);
|
||
ClearPP();
|
||
ClearHistory();
|
||
end else begin
|
||
RecordStep();
|
||
end;
|
||
end ;
|
||
end;
|
||
end;
|
||
end;
|
||
|
||
function TryPermutation() : Boolean;
|
||
var
|
||
kk, kkidx : Integer;
|
||
b : Boolean;
|
||
puk : PUC_Prop;
|
||
ppk : PUCA_PropItemRec;
|
||
begin
|
||
Result := False;
|
||
puk := GetProps(cp);
|
||
if (puk^.C3 = 0) then
|
||
exit;
|
||
lastUnblockedNonstarterCCC := puk^.C3;
|
||
if surrogateState then
|
||
kk := i + 2
|
||
else
|
||
kk := i + 1;
|
||
while IsUnblockedNonstarter(kk) do begin
|
||
kkidx := kk-1;
|
||
b := UnicodeIsHighSurrogate(psBase[kkidx]) and (kk<c) and UnicodeIsLowSurrogate(psBase[kkidx+1]);
|
||
if b then
|
||
ppk := FindChild(ToUCS4(psBase[kkidx],psBase[kkidx+1]),pp)
|
||
else
|
||
ppk := FindChild(Word(psBase[kkidx]),pp);
|
||
if (ppk <> nil) then begin
|
||
pp := ppk;
|
||
RemoveChar(kk);
|
||
Inc(ppLevel);
|
||
RecordStep();
|
||
Result := True;
|
||
if (pp^.ChildCount = 0 ) then
|
||
Break;
|
||
end;
|
||
if b then
|
||
Inc(kk);
|
||
Inc(kk);
|
||
end;
|
||
end;
|
||
|
||
procedure AdvanceCharPos();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
if UnicodeIsHighSurrogate(ps[0]) and (i<c) and UnicodeIsLowSurrogate(ps[1]) then begin
|
||
Inc(i);
|
||
Inc(ps);
|
||
end;
|
||
Inc_I();
|
||
end;
|
||
|
||
var
|
||
ok : Boolean;
|
||
pp1 : PUCA_PropItemRec;
|
||
cltemp : PUCA_DataBook;
|
||
ctxNode : PUCA_PropItemContextTreeNodeRec;
|
||
begin
|
||
if (ALength = 0) then
|
||
exit(nil);
|
||
s := '';
|
||
if ACollation^.NoNormalization then begin
|
||
psBase := AStr;
|
||
c := ALength;
|
||
end else begin
|
||
s := NormalizeNFD(AStr,ALength);
|
||
c := Length(s);
|
||
psBase := @s[1];
|
||
end;
|
||
rl := 3*c;
|
||
SetLength(r,rl);
|
||
ral := 0;
|
||
ps := psBase;
|
||
ClearPP();
|
||
locHistoryTop := -1;
|
||
removedCharIndexLength := 0;
|
||
FillChar(suppressState,SizeOf(suppressState),#0);
|
||
LastKeyOwner.Length := 0;
|
||
i := 1;
|
||
while (i <= c) and MoveToNextChar() do begin
|
||
if (pp = nil) then begin // Start Matching
|
||
StartMatch();
|
||
end else begin
|
||
pp1 := FindChild(cp,pp);
|
||
if (pp1 <> nil) then begin
|
||
Inc(ppLevel);
|
||
pp := pp1;
|
||
if (pp^.ChildCount = 0) or (i = c) then begin
|
||
ok := False;
|
||
if pp^.IsValid() and (suppressState.CharCount = 0) then begin
|
||
if (pp^.WeightLength > 0) then begin
|
||
AddWeightsAndClear();
|
||
ok := True;
|
||
end else
|
||
if (LastKeyOwner.Length > 0) and pp^.Contextual and
|
||
pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
|
||
(ctxNode^.Data.WeightCount > 0)
|
||
then begin
|
||
AddContextWeights(@ctxNode^.Data);
|
||
ClearHistory();
|
||
ClearPP();
|
||
ok := True;
|
||
end
|
||
end;
|
||
if not ok then begin
|
||
RecordDeletion();
|
||
ok := False;
|
||
while HasHistory() do begin
|
||
GoBack();
|
||
if pp^.IsValid() and
|
||
( ( (cl = suppressState.cl) and (ppLevel <> suppressState.CharCount) ) or
|
||
( (cl <> suppressState.cl) and (ppLevel < suppressState.CharCount) )
|
||
)
|
||
then begin
|
||
AddWeightsAndClear();
|
||
ok := True;
|
||
Break;
|
||
end;
|
||
end;
|
||
if not ok then begin
|
||
cltemp := cl^.Base;
|
||
if (cltemp <> nil) then begin
|
||
ClearPP(False);
|
||
cl := cltemp;
|
||
Continue;
|
||
end;
|
||
end;
|
||
|
||
if not ok then begin
|
||
AddComputedWeights(cp);
|
||
ClearHistory();
|
||
ClearPP();
|
||
end;
|
||
end;
|
||
end else begin
|
||
RecordStep();
|
||
end;
|
||
end else begin
|
||
// permutations !
|
||
ok := False;
|
||
if TryPermutation() and pp^.IsValid() then begin
|
||
if (suppressState.CharCount = 0) then begin
|
||
AddWeightsAndClear();
|
||
Continue;
|
||
end;
|
||
while True do begin
|
||
if pp^.IsValid() and
|
||
(pp^.WeightLength > 0) and
|
||
( ( (cl = suppressState.cl) and (ppLevel <> suppressState.CharCount) ) or
|
||
( (cl <> suppressState.cl) and (ppLevel < suppressState.CharCount) )
|
||
)
|
||
then begin
|
||
AddWeightsAndClear();
|
||
ok := True;
|
||
break;
|
||
end;
|
||
if not HasHistory() then
|
||
break;
|
||
GoBack();
|
||
if (pp = nil) then
|
||
break;
|
||
end;
|
||
end;
|
||
if not ok then begin
|
||
if pp^.IsValid() and (suppressState.CharCount = 0) then begin
|
||
if (pp^.WeightLength > 0) then begin
|
||
AddWeightsAndClear();
|
||
ok := True;
|
||
end else
|
||
if (LastKeyOwner.Length > 0) and pp^.Contextual and
|
||
pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
|
||
(ctxNode^.Data.WeightCount > 0)
|
||
then begin
|
||
AddContextWeights(@ctxNode^.Data);
|
||
ClearHistory();
|
||
ClearPP();
|
||
ok := True;
|
||
end
|
||
end;
|
||
if ok then
|
||
Continue;
|
||
end;
|
||
if not ok then begin
|
||
if (cl^.Base <> nil) then begin
|
||
cltemp := cl^.Base;
|
||
while HasHistory() do
|
||
GoBack();
|
||
pp := nil;
|
||
ppLevel := 0;
|
||
cl := cltemp;
|
||
Continue;
|
||
end;
|
||
|
||
//walk back
|
||
ok := False;
|
||
while HasHistory() do begin
|
||
GoBack();
|
||
if pp^.IsValid() and
|
||
(pp^.WeightLength > 0) and
|
||
( (suppressState.CharCount = 0) or
|
||
( ( (cl = suppressState.cl) and (ppLevel <> suppressState.CharCount) ) or
|
||
( (cl <> suppressState.cl) and (ppLevel < suppressState.CharCount) )
|
||
)
|
||
)
|
||
then begin
|
||
AddWeightsAndClear();
|
||
ok := True;
|
||
Break;
|
||
end;
|
||
end;
|
||
if ok then begin
|
||
AdvanceCharPos();
|
||
Continue;
|
||
end;
|
||
if (pp <> nil) then begin
|
||
AddComputedWeights(cp);
|
||
ClearHistory();
|
||
ClearPP();
|
||
end;
|
||
end;
|
||
end;
|
||
end;
|
||
if surrogateState then begin
|
||
Inc(ps);
|
||
Inc(i);
|
||
end;
|
||
//
|
||
Inc_I();
|
||
end;
|
||
SetLength(r,ral);
|
||
Result := r;
|
||
end;
|
||
|
||
type
|
||
TComputeKeyContext = record
|
||
Collation : PUCA_DataBook;
|
||
r : TUCA_PropWeightsArray;
|
||
ral {used length of "r"}: Integer;
|
||
rl {capacity of "r"} : Integer;
|
||
i : Integer;
|
||
s : UnicodeString;
|
||
ps : PUnicodeChar;
|
||
cp : Cardinal;
|
||
cl : PUCA_DataBook;
|
||
pp : PUCA_PropItemRec;
|
||
ppLevel : Byte;
|
||
removedCharIndex : array of DWord;
|
||
removedCharIndexLength : DWord;
|
||
locHistoryTop : Integer;
|
||
locHistory : array[0..24] of record
|
||
i : Integer;
|
||
cl : PUCA_DataBook;
|
||
pp : PUCA_PropItemRec;
|
||
ppLevel : Byte;
|
||
cp : Cardinal;
|
||
removedCharIndexLength : DWord;
|
||
end;
|
||
suppressState : record
|
||
cl : PUCA_DataBook;
|
||
CharCount : Integer;
|
||
end;
|
||
LastKeyOwner : record
|
||
Length : Integer;
|
||
Chars : array[0..24] of UInt24;
|
||
end;
|
||
c : Integer;
|
||
lastUnblockedNonstarterCCC : Byte;
|
||
surrogateState : Boolean;
|
||
Finished : Boolean;
|
||
end;
|
||
PComputeKeyContext = ^TComputeKeyContext;
|
||
|
||
procedure ClearPP(AContext : PComputeKeyContext; const AClearSuppressInfo : Boolean = True);inline;
|
||
begin
|
||
AContext^.cl := nil;
|
||
AContext^.pp := nil;
|
||
AContext^.ppLevel := 0;
|
||
if AClearSuppressInfo then begin
|
||
AContext^.suppressState.cl := nil;
|
||
AContext^.suppressState.CharCount := 0;
|
||
end;
|
||
end;
|
||
|
||
procedure InitContext(
|
||
AContext : PComputeKeyContext;
|
||
const AStr : PUnicodeChar;
|
||
const ALength : SizeInt;
|
||
const ACollation : PUCA_DataBook
|
||
);
|
||
begin
|
||
AContext^.Collation := ACollation;
|
||
AContext^.c := ALength;
|
||
AContext^.s := NormalizeNFD(AStr,AContext^.c);
|
||
AContext^.c := Length(AContext^.s);
|
||
AContext^.rl := 3*AContext^.c;
|
||
SetLength(AContext^.r,AContext^.rl);
|
||
AContext^.ral := 0;
|
||
AContext^.ps := @AContext^.s[1];
|
||
ClearPP(AContext);
|
||
AContext^.locHistoryTop := -1;
|
||
AContext^.removedCharIndexLength := 0;
|
||
FillChar(AContext^.suppressState,SizeOf(AContext^.suppressState),#0);
|
||
AContext^.LastKeyOwner.Length := 0;
|
||
AContext^.i := 1;
|
||
AContext^.Finished := False;
|
||
end;
|
||
|
||
function FormKey(
|
||
const AWeightArray : TUCA_PropWeightsArray;
|
||
const ACollation : PUCA_DataBook
|
||
) : TUCASortKey;inline;
|
||
begin
|
||
case ACollation.VariableWeight of
|
||
TUCA_VariableKind.ucaShifted : Result := FormKeyShifted(AWeightArray,ACollation);
|
||
TUCA_VariableKind.ucaBlanked : Result := FormKeyBlanked(AWeightArray,ACollation);
|
||
TUCA_VariableKind.ucaNonIgnorable : Result := FormKeyNonIgnorable(AWeightArray,ACollation);
|
||
TUCA_VariableKind.ucaShiftedTrimmed : Result := FormKeyShiftedTrimmed(AWeightArray,ACollation);
|
||
else
|
||
Result := FormKeyShifted(AWeightArray,ACollation);
|
||
end;
|
||
end;
|
||
|
||
function ComputeRawSortKeyNextItem(
|
||
const AContext : PComputeKeyContext
|
||
) : Boolean;forward;
|
||
function IncrementalCompareString_NonIgnorable(
|
||
const AStrA : PUnicodeChar;
|
||
const ALengthA : SizeInt;
|
||
const AStrB : PUnicodeChar;
|
||
const ALengthB : SizeInt;
|
||
const ACollation : PUCA_DataBook
|
||
) : Integer;
|
||
var
|
||
ctxA, ctxB : TComputeKeyContext;
|
||
lastKeyIndexA, keyIndexA, lengthMaxA : Integer;
|
||
keyIndexB : Integer;
|
||
keyA, keyB : TUCASortKey;
|
||
begin
|
||
if ( (ALengthA = 0) and (ALengthB = 0) ) or
|
||
( (PtrUInt(AStrA) = PtrUInt(AStrB)) and
|
||
(ALengthA = ALengthB)
|
||
)
|
||
then
|
||
exit(0);
|
||
if (ALengthA = 0) then
|
||
exit(-1);
|
||
if (ALengthB = 0) then
|
||
exit(1);
|
||
|
||
InitContext(@ctxA,AStrA,ALengthA,ACollation);
|
||
InitContext(@ctxB,AStrB,ALengthB,ACollation);
|
||
lastKeyIndexA := -1;
|
||
keyIndexA := -1;
|
||
lengthMaxA := 0;
|
||
keyIndexB := -1;
|
||
while True do begin
|
||
if not ComputeRawSortKeyNextItem(@ctxA) then
|
||
Break;
|
||
if (ctxA.ral = lengthMaxA) then
|
||
Continue;
|
||
lengthMaxA := ctxA.ral;
|
||
keyIndexA := lastKeyIndexA + 1;
|
||
while (keyIndexA < lengthMaxA) and (ctxA.r[keyIndexA].Weights[0] = 0) do begin
|
||
Inc(keyIndexA);
|
||
end;
|
||
if (keyIndexA = lengthMaxA) then begin
|
||
lastKeyIndexA := keyIndexA-1;
|
||
Continue;
|
||
end;
|
||
|
||
while (keyIndexA < lengthMaxA) do begin
|
||
if (ctxA.r[keyIndexA].Weights[0] = 0) then begin
|
||
Inc(keyIndexA);
|
||
Continue;
|
||
end;
|
||
Inc(keyIndexB);
|
||
while (ctxB.ral <= keyIndexB) or (ctxB.r[keyIndexB].Weights[0] = 0) do begin
|
||
if (ctxB.ral <= keyIndexB) then begin
|
||
if not ComputeRawSortKeyNextItem(@ctxB) then
|
||
Break;
|
||
Continue;
|
||
end;
|
||
Inc(keyIndexB);
|
||
end;
|
||
if (ctxB.ral <= keyIndexB) then
|
||
exit(1);
|
||
if (ctxA.r[keyIndexA].Weights[0] > ctxB.r[keyIndexB].Weights[0]) then
|
||
exit(1);
|
||
if (ctxA.r[keyIndexA].Weights[0] < ctxB.r[keyIndexB].Weights[0]) then
|
||
exit(-1);
|
||
Inc(keyIndexA);
|
||
end;
|
||
lastKeyIndexA := keyIndexA - 1;
|
||
end;
|
||
//Key(A) is completed !
|
||
Inc(keyIndexB);
|
||
while (ctxB.ral <= keyIndexB) or (ctxB.r[keyIndexB].Weights[0] = 0) do begin
|
||
if (ctxB.ral <= keyIndexB) then begin
|
||
if not ComputeRawSortKeyNextItem(@ctxB) then
|
||
Break;
|
||
Continue;
|
||
end;
|
||
Inc(keyIndexB);
|
||
end;
|
||
if (ctxB.ral > keyIndexB) then begin
|
||
//B has at least one more primary weight that A
|
||
exit(-1);
|
||
end;
|
||
while ComputeRawSortKeyNextItem(@ctxB) do begin
|
||
//
|
||
end;
|
||
//Key(B) is completed !
|
||
keyA := FormKey(ctxA.r,ctxA.Collation);
|
||
keyB := FormKey(ctxB.r,ctxB.Collation);
|
||
Result := CompareSortKey(keyA,keyB);
|
||
end;
|
||
|
||
function IncrementalCompareString_Shift(
|
||
const AStrA : PUnicodeChar;
|
||
const ALengthA : SizeInt;
|
||
const AStrB : PUnicodeChar;
|
||
const ALengthB : SizeInt;
|
||
const ACollation : PUCA_DataBook
|
||
) : Integer;
|
||
var
|
||
ctxA, ctxB : TComputeKeyContext;
|
||
lastKeyIndexA, keyIndexA, lengthMaxA : Integer;
|
||
keyIndexB : Integer;
|
||
keyA, keyB : TUCASortKey;
|
||
begin
|
||
if ( (ALengthA = 0) and (ALengthB = 0) ) or
|
||
( (PtrUInt(AStrA) = PtrUInt(AStrB)) and
|
||
(ALengthA = ALengthB)
|
||
)
|
||
then
|
||
exit(0);
|
||
if (ALengthA = 0) then
|
||
exit(-1);
|
||
if (ALengthB = 0) then
|
||
exit(1);
|
||
|
||
InitContext(@ctxA,AStrA,ALengthA,ACollation);
|
||
InitContext(@ctxB,AStrB,ALengthB,ACollation);
|
||
lastKeyIndexA := -1;
|
||
keyIndexA := -1;
|
||
lengthMaxA := 0;
|
||
keyIndexB := -1;
|
||
while True do begin
|
||
if not ComputeRawSortKeyNextItem(@ctxA) then
|
||
Break;
|
||
if (ctxA.ral = lengthMaxA) then
|
||
Continue;
|
||
lengthMaxA := ctxA.ral;
|
||
keyIndexA := lastKeyIndexA + 1;
|
||
while (keyIndexA < lengthMaxA) and
|
||
( (ctxA.r[keyIndexA].Weights[0] = 0) or
|
||
ctxA.Collation^.IsVariable(@ctxA.r[keyIndexA].Weights)
|
||
)
|
||
do begin
|
||
Inc(keyIndexA);
|
||
end;
|
||
if (keyIndexA = lengthMaxA) then begin
|
||
lastKeyIndexA := keyIndexA-1;
|
||
Continue;
|
||
end;
|
||
|
||
while (keyIndexA < lengthMaxA) do begin
|
||
if (ctxA.r[keyIndexA].Weights[0] = 0) or
|
||
ctxA.Collation^.IsVariable(@ctxA.r[keyIndexA].Weights)
|
||
then begin
|
||
Inc(keyIndexA);
|
||
Continue;
|
||
end;
|
||
Inc(keyIndexB);
|
||
while (ctxB.ral <= keyIndexB) or
|
||
(ctxB.r[keyIndexB].Weights[0] = 0) or
|
||
ctxB.Collation^.IsVariable(@ctxB.r[keyIndexB].Weights)
|
||
do begin
|
||
if (ctxB.ral <= keyIndexB) then begin
|
||
if not ComputeRawSortKeyNextItem(@ctxB) then
|
||
Break;
|
||
Continue;
|
||
end;
|
||
Inc(keyIndexB);
|
||
end;
|
||
if (ctxB.ral <= keyIndexB) then
|
||
exit(1);
|
||
if (ctxA.r[keyIndexA].Weights[0] > ctxB.r[keyIndexB].Weights[0]) then
|
||
exit(1);
|
||
if (ctxA.r[keyIndexA].Weights[0] < ctxB.r[keyIndexB].Weights[0]) then
|
||
exit(-1);
|
||
Inc(keyIndexA);
|
||
end;
|
||
lastKeyIndexA := keyIndexA - 1;
|
||
end;
|
||
//Key(A) is completed !
|
||
Inc(keyIndexB);
|
||
while (ctxB.ral <= keyIndexB) or
|
||
(ctxB.r[keyIndexB].Weights[0] = 0) or
|
||
ctxB.Collation^.IsVariable(@ctxB.r[keyIndexB].Weights)
|
||
do begin
|
||
if (ctxB.ral <= keyIndexB) then begin
|
||
if not ComputeRawSortKeyNextItem(@ctxB) then
|
||
Break;
|
||
Continue;
|
||
end;
|
||
Inc(keyIndexB);
|
||
end;
|
||
if (ctxB.ral > keyIndexB) then begin
|
||
//B has at least one more primary weight that A
|
||
exit(-1);
|
||
end;
|
||
while ComputeRawSortKeyNextItem(@ctxB) do begin
|
||
//
|
||
end;
|
||
//Key(B) is completed !
|
||
keyA := FormKey(ctxA.r,ctxA.Collation);
|
||
keyB := FormKey(ctxB.r,ctxB.Collation);
|
||
Result := CompareSortKey(keyA,keyB);
|
||
end;
|
||
|
||
function IncrementalCompareString(
|
||
const AStrA : PUnicodeChar;
|
||
const ALengthA : SizeInt;
|
||
const AStrB : PUnicodeChar;
|
||
const ALengthB : SizeInt;
|
||
const ACollation : PUCA_DataBook
|
||
) : Integer;
|
||
begin
|
||
case ACollation^.VariableWeight of
|
||
TUCA_VariableKind.ucaNonIgnorable :
|
||
begin
|
||
Result := IncrementalCompareString_NonIgnorable(
|
||
AStrA,ALengthA,AStrB,ALengthB,ACollation
|
||
);
|
||
end;
|
||
TUCA_VariableKind.ucaBlanked,
|
||
TUCA_VariableKind.ucaShiftedTrimmed,
|
||
TUCA_VariableKind.ucaIgnoreSP,
|
||
TUCA_VariableKind.ucaShifted:
|
||
begin
|
||
Result := IncrementalCompareString_Shift(
|
||
AStrA,ALengthA,AStrB,ALengthB,ACollation
|
||
);
|
||
end;
|
||
else
|
||
begin
|
||
Result := IncrementalCompareString_Shift(
|
||
AStrA,ALengthA,AStrB,ALengthB,ACollation
|
||
);
|
||
end;
|
||
end;
|
||
end;
|
||
|
||
function IncrementalCompareString(
|
||
const AStrA,
|
||
AStrB : UnicodeString;
|
||
const ACollation : PUCA_DataBook
|
||
) : Integer;
|
||
begin
|
||
Result := IncrementalCompareString(
|
||
Pointer(AStrA),Length(AStrA),Pointer(AStrB),Length(AStrB),
|
||
ACollation
|
||
);
|
||
end;
|
||
|
||
function FilterString(
|
||
const AStr : PUnicodeChar;
|
||
const ALength : SizeInt;
|
||
const AExcludedMask : TCategoryMask
|
||
) : UnicodeString;
|
||
var
|
||
i, c : SizeInt;
|
||
pp, pr : PUnicodeChar;
|
||
pu : PUC_Prop;
|
||
locIsSurrogate : Boolean;
|
||
begin
|
||
c := ALength;
|
||
SetLength(Result,(2*c));
|
||
if (c > 0) then begin
|
||
pp := AStr;
|
||
pr := @Result[1];
|
||
i := 1;
|
||
while (i <= c) do begin
|
||
pu := GetProps(Word(pp^));
|
||
locIsSurrogate := (pu^.Category = UGC_Surrogate);
|
||
if locIsSurrogate then begin
|
||
if (i = c) then
|
||
Break;
|
||
if not UnicodeIsSurrogatePair(pp[0],pp[1]) then begin
|
||
Inc(pp);
|
||
Inc(i);
|
||
Continue;
|
||
end;
|
||
pu := GetProps(pp[0],pp[1]);
|
||
end;
|
||
if not(pu^.Category in AExcludedMask) then begin
|
||
pr^ := pp^;
|
||
Inc(pr);
|
||
if locIsSurrogate then begin
|
||
Inc(pp);
|
||
Inc(pr);
|
||
Inc(i);
|
||
pr^ := pp^;
|
||
end;
|
||
end;
|
||
Inc(pp);
|
||
Inc(i);
|
||
end;
|
||
i := ((PtrUInt(pr) - PtrUInt(@Result[1])) div SizeOf(UnicodeChar));
|
||
SetLength(Result,i);
|
||
end;
|
||
end;
|
||
|
||
function FilterString(
|
||
const AStr : UnicodeString;
|
||
const AExcludedMask : TCategoryMask
|
||
) : UnicodeString;
|
||
begin
|
||
if (AStr = '') then
|
||
Result := ''
|
||
else
|
||
Result := FilterString(@AStr[1],Length(AStr),AExcludedMask);
|
||
end;
|
||
|
||
function ComputeRawSortKeyNextItem(
|
||
const AContext : PComputeKeyContext
|
||
) : Boolean;
|
||
var
|
||
ctx : PComputeKeyContext;
|
||
|
||
procedure GrowKey(const AMinGrow : Integer = 0);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
if (ctx^.rl < AMinGrow) then
|
||
ctx^.rl := ctx^.rl + AMinGrow
|
||
else
|
||
ctx^.rl := 2 * ctx^.rl;
|
||
SetLength(ctx^.r,ctx^.rl);
|
||
end;
|
||
|
||
procedure SaveKeyOwner();
|
||
var
|
||
k : Integer;
|
||
kppLevel : Byte;
|
||
begin
|
||
k := 0;
|
||
kppLevel := High(Byte);
|
||
while (k <= ctx^.locHistoryTop) do begin
|
||
if (kppLevel <> ctx^.locHistory[k].ppLevel) then begin
|
||
ctx^.LastKeyOwner.Chars[k] := ctx^.locHistory[k].cp;
|
||
kppLevel := ctx^.locHistory[k].ppLevel;
|
||
end;
|
||
k := k + 1;
|
||
end;
|
||
if (k = 0) or (kppLevel <> ctx^.ppLevel) then begin
|
||
ctx^.LastKeyOwner.Chars[k] := ctx^.cp;
|
||
k := k + 1;
|
||
end;
|
||
ctx^.LastKeyOwner.Length := k;
|
||
end;
|
||
|
||
procedure AddWeights(AItem : PUCA_PropItemRec);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
SaveKeyOwner();
|
||
if ((ctx^.ral + AItem^.WeightLength) > ctx^.rl) then
|
||
GrowKey(AItem^.WeightLength);
|
||
AItem^.GetWeightArray(@ctx^.r[ctx^.ral]);
|
||
ctx^.ral := ctx^.ral + AItem^.WeightLength;
|
||
end;
|
||
|
||
procedure AddContextWeights(AItem : PUCA_PropItemContextRec);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
if ((ctx^.ral + AItem^.WeightCount) > ctx^.rl) then
|
||
GrowKey(AItem^.WeightCount);
|
||
Move(AItem^.GetWeights()^,ctx^.r[ctx^.ral],(AItem^.WeightCount*SizeOf(ctx^.r[0])));
|
||
ctx^.ral := ctx^.ral + AItem^.WeightCount;
|
||
end;
|
||
|
||
procedure AddComputedWeights(ACodePoint : Cardinal);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
SaveKeyOwner();
|
||
if ((ctx^.ral + 2) > ctx^.rl) then
|
||
GrowKey();
|
||
DeriveWeight(ACodePoint,@ctx^.r[ctx^.ral]);
|
||
ctx^.ral := ctx^.ral + 2;
|
||
end;
|
||
|
||
procedure RecordDeletion();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
if ctx^.pp^.IsValid() and ctx^.pp^.IsDeleted() (*pp^.GetWeightLength() = 0*) then begin
|
||
if (ctx^.suppressState.cl = nil) or
|
||
(ctx^.suppressState.CharCount > ctx^.ppLevel)
|
||
then begin
|
||
ctx^.suppressState.cl := ctx^.cl;
|
||
ctx^.suppressState.CharCount := ctx^.ppLevel;
|
||
end;
|
||
end;
|
||
end;
|
||
|
||
procedure RecordStep();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
Inc(ctx^.locHistoryTop);
|
||
ctx^.locHistory[ctx^.locHistoryTop].i := ctx^.i;
|
||
ctx^.locHistory[ctx^.locHistoryTop].cl := ctx^.cl;
|
||
ctx^.locHistory[ctx^.locHistoryTop].pp := ctx^.pp;
|
||
ctx^.locHistory[ctx^.locHistoryTop].ppLevel := ctx^.ppLevel;
|
||
ctx^.locHistory[ctx^.locHistoryTop].cp := ctx^.cp;
|
||
ctx^.locHistory[ctx^.locHistoryTop].removedCharIndexLength := ctx^.removedCharIndexLength;
|
||
RecordDeletion();
|
||
end;
|
||
|
||
procedure ClearHistory();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
ctx^.locHistoryTop := -1;
|
||
end;
|
||
|
||
function HasHistory() : Boolean;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
Result := (ctx^.locHistoryTop >= 0);
|
||
end;
|
||
|
||
function GetHistoryLength() : Integer;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
Result := (ctx^.locHistoryTop + 1);
|
||
end;
|
||
|
||
procedure GoBack();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
Assert(ctx^.locHistoryTop >= 0);
|
||
ctx^.i := ctx^.locHistory[ctx^.locHistoryTop].i;
|
||
ctx^.cp := ctx^.locHistory[ctx^.locHistoryTop].cp;
|
||
ctx^.cl := ctx^.locHistory[ctx^.locHistoryTop].cl;
|
||
ctx^.pp := ctx^.locHistory[ctx^.locHistoryTop].pp;
|
||
ctx^.ppLevel := ctx^.locHistory[ctx^.locHistoryTop].ppLevel;
|
||
ctx^.removedCharIndexLength := ctx^.locHistory[ctx^.locHistoryTop].removedCharIndexLength;
|
||
ctx^.ps := @ctx^.s[ctx^.i];
|
||
Dec(ctx^.locHistoryTop);
|
||
end;
|
||
|
||
function IsUnblockedNonstarter(const AStartFrom : Integer) : Boolean;
|
||
var
|
||
k : DWord;
|
||
pk : PUnicodeChar;
|
||
puk : PUC_Prop;
|
||
begin
|
||
k := AStartFrom;
|
||
if (k > ctx^.c) then
|
||
exit(False);
|
||
if (ctx^.removedCharIndexLength>0) and
|
||
(IndexInArrayDWord(ctx^.removedCharIndex,k) >= 0)
|
||
then begin
|
||
exit(False);
|
||
end;
|
||
{if (k = (i+1)) or
|
||
( (k = (i+2)) and UnicodeIsHighSurrogate(s[i]) )
|
||
then
|
||
lastUnblockedNonstarterCCC := 0;}
|
||
pk := @ctx^.s[k];
|
||
if UnicodeIsHighSurrogate(pk^) then begin
|
||
if (k = ctx^.c) then
|
||
exit(False);
|
||
if UnicodeIsLowSurrogate(pk[1]) then
|
||
puk := GetProps(pk[0],pk[1])
|
||
else
|
||
puk := GetProps(Word(pk^));
|
||
end else begin
|
||
puk := GetProps(Word(pk^));
|
||
end;
|
||
if (puk^.C3 = 0) or (ctx^.lastUnblockedNonstarterCCC >= puk^.C3) then
|
||
exit(False);
|
||
ctx^.lastUnblockedNonstarterCCC := puk^.C3;
|
||
Result := True;
|
||
end;
|
||
|
||
procedure RemoveChar(APos : Integer);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
if (ctx^.removedCharIndexLength >= Length(ctx^.removedCharIndex)) then
|
||
SetLength(ctx^.removedCharIndex,(2*ctx^.removedCharIndexLength + 2));
|
||
ctx^.removedCharIndex[ctx^.removedCharIndexLength] := APos;
|
||
Inc(ctx^.removedCharIndexLength);
|
||
if UnicodeIsHighSurrogate(ctx^.s[APos]) and (APos < ctx^.c) and UnicodeIsLowSurrogate(ctx^.s[APos+1]) then begin
|
||
if (ctx^.removedCharIndexLength >= Length(ctx^.removedCharIndex)) then
|
||
SetLength(ctx^.removedCharIndex,(2*ctx^.removedCharIndexLength + 2));
|
||
ctx^.removedCharIndex[ctx^.removedCharIndexLength] := APos+1;
|
||
Inc(ctx^.removedCharIndexLength);
|
||
end;
|
||
end;
|
||
|
||
procedure Inc_I();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
if (ctx^.removedCharIndexLength = 0) then begin
|
||
Inc(ctx^.i);
|
||
Inc(ctx^.ps);
|
||
exit;
|
||
end;
|
||
while True do begin
|
||
Inc(ctx^.i);
|
||
Inc(ctx^.ps);
|
||
if (IndexInArrayDWord(ctx^.removedCharIndex,ctx^.i) = -1) then
|
||
Break;
|
||
end;
|
||
end;
|
||
|
||
function MoveToNextChar() : Boolean;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
Result := True;
|
||
if UnicodeIsHighSurrogate(ctx^.ps[0]) then begin
|
||
if (ctx^.i = ctx^.c) then
|
||
exit(False);
|
||
if UnicodeIsLowSurrogate(ctx^.ps[1]) then begin
|
||
ctx^.surrogateState := True;
|
||
ctx^.cp := ToUCS4(ctx^.ps[0],ctx^.ps[1]);
|
||
end else begin
|
||
ctx^.surrogateState := False;
|
||
ctx^.cp := Word(ctx^.ps[0]);
|
||
end;
|
||
end else begin
|
||
ctx^.surrogateState := False;
|
||
ctx^.cp := Word(ctx^.ps[0]);
|
||
end;
|
||
end;
|
||
|
||
function FindPropUCA() : Boolean;
|
||
var
|
||
candidateCL : PUCA_DataBook;
|
||
begin
|
||
ctx^.pp := nil;
|
||
if (ctx^.cl = nil) then
|
||
candidateCL := ctx^.Collation
|
||
else
|
||
candidateCL := ctx^.cl;
|
||
if ctx^.surrogateState then begin
|
||
while (candidateCL <> nil) do begin
|
||
ctx^.pp := GetPropUCA(ctx^.ps[0],ctx^.ps[1],candidateCL);
|
||
if (ctx^.pp <> nil) then
|
||
break;
|
||
candidateCL := candidateCL^.Base;
|
||
end;
|
||
end else begin
|
||
while (candidateCL <> nil) do begin
|
||
ctx^.pp := GetPropUCA(ctx^.ps[0],candidateCL);
|
||
if (ctx^.pp <> nil) then
|
||
break;
|
||
candidateCL := candidateCL^.Base;
|
||
end;
|
||
end;
|
||
ctx^.cl := candidateCL;
|
||
Result := (ctx^.pp <> nil);
|
||
end;
|
||
|
||
procedure AddWeightsAndClear();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
var
|
||
ctxNode : PUCA_PropItemContextTreeNodeRec;
|
||
begin
|
||
if (ctx^.pp^.WeightLength > 0) then begin
|
||
AddWeights(ctx^.pp);
|
||
end else
|
||
if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
|
||
ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
|
||
(ctxNode^.Data.WeightCount > 0)
|
||
then begin
|
||
AddContextWeights(@ctxNode^.Data);
|
||
end;
|
||
//AddWeights(pp);
|
||
ClearHistory();
|
||
ClearPP(ctx);
|
||
end;
|
||
|
||
function StartMatch() : Boolean;
|
||
|
||
procedure HandleLastChar();
|
||
var
|
||
ctxNode : PUCA_PropItemContextTreeNodeRec;
|
||
begin
|
||
while True do begin
|
||
if ctx^.pp^.IsValid() then begin
|
||
if (ctx^.pp^.WeightLength > 0) then
|
||
AddWeights(ctx^.pp)
|
||
else
|
||
if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
|
||
ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
|
||
(ctxNode^.Data.WeightCount > 0)
|
||
then
|
||
AddContextWeights(@ctxNode^.Data)
|
||
else
|
||
AddComputedWeights(ctx^.cp){handle deletion of code point};
|
||
break;
|
||
end;
|
||
if (ctx^.cl^.Base = nil) then begin
|
||
AddComputedWeights(ctx^.cp);
|
||
break;
|
||
end;
|
||
ctx^.cl := ctx^.cl^.Base;
|
||
if not FindPropUCA() then begin
|
||
AddComputedWeights(ctx^.cp);
|
||
break;
|
||
end;
|
||
end;
|
||
end;
|
||
var
|
||
tmpCtxNode : PUCA_PropItemContextTreeNodeRec;
|
||
begin
|
||
Result := False;
|
||
ctx^.ppLevel := 0;
|
||
if not FindPropUCA() then begin
|
||
AddComputedWeights(ctx^.cp);
|
||
ClearHistory();
|
||
ClearPP(ctx);
|
||
Result := True;
|
||
end else begin
|
||
if (ctx^.i = ctx^.c) then begin
|
||
HandleLastChar();
|
||
Result := True;
|
||
end else begin
|
||
if ctx^.pp^.IsValid()then begin
|
||
if (ctx^.pp^.ChildCount = 0) then begin
|
||
if (ctx^.pp^.WeightLength > 0) then
|
||
AddWeights(ctx^.pp)
|
||
else
|
||
if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
|
||
ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,tmpCtxNode) and
|
||
(tmpCtxNode^.Data.WeightCount > 0)
|
||
then
|
||
AddContextWeights(@tmpCtxNode^.Data)
|
||
else
|
||
AddComputedWeights(ctx^.cp){handle deletion of code point};
|
||
ClearPP(ctx);
|
||
ClearHistory();
|
||
Result := True;
|
||
end else begin
|
||
RecordStep();
|
||
end
|
||
end else begin
|
||
if (ctx^.pp^.ChildCount = 0) then begin
|
||
AddComputedWeights(ctx^.cp);
|
||
ClearPP(ctx);
|
||
ClearHistory();
|
||
Result := True;
|
||
end else begin
|
||
RecordStep();
|
||
end;
|
||
end;
|
||
end;
|
||
end;
|
||
end;
|
||
|
||
function TryPermutation() : Boolean;
|
||
var
|
||
kk : Integer;
|
||
b : Boolean;
|
||
puk : PUC_Prop;
|
||
ppk : PUCA_PropItemRec;
|
||
begin
|
||
Result := False;
|
||
puk := GetProps(ctx^.cp);
|
||
if (puk^.C3 = 0) then
|
||
exit;
|
||
ctx^.lastUnblockedNonstarterCCC := puk^.C3;
|
||
if ctx^.surrogateState then
|
||
kk := ctx^.i + 2
|
||
else
|
||
kk := ctx^.i + 1;
|
||
while IsUnblockedNonstarter(kk) do begin
|
||
b := UnicodeIsHighSurrogate(ctx^.s[kk]) and (kk<ctx^.c) and UnicodeIsLowSurrogate(ctx^.s[kk+1]);
|
||
if b then
|
||
ppk := FindChild(ToUCS4(ctx^.s[kk],ctx^.s[kk+1]),ctx^.pp)
|
||
else
|
||
ppk := FindChild(Word(ctx^.s[kk]),ctx^.pp);
|
||
if (ppk <> nil) then begin
|
||
ctx^.pp := ppk;
|
||
RemoveChar(kk);
|
||
Inc(ctx^.ppLevel);
|
||
RecordStep();
|
||
Result := True;
|
||
if (ctx^.pp^.ChildCount = 0 ) then
|
||
Break;
|
||
end;
|
||
if b then
|
||
Inc(kk);
|
||
Inc(kk);
|
||
end;
|
||
end;
|
||
|
||
procedure AdvanceCharPos();{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||
begin
|
||
if UnicodeIsHighSurrogate(ctx^.ps[0]) and (ctx^.i<ctx^.c) and UnicodeIsLowSurrogate(ctx^.ps[1]) then begin
|
||
Inc(ctx^.i);
|
||
Inc(ctx^.ps);
|
||
end;
|
||
Inc_I();
|
||
end;
|
||
|
||
var
|
||
ok : Boolean;
|
||
pp1 : PUCA_PropItemRec;
|
||
cltemp : PUCA_DataBook;
|
||
ctxNode : PUCA_PropItemContextTreeNodeRec;
|
||
begin
|
||
if AContext^.Finished then
|
||
exit(False);
|
||
ctx := AContext;
|
||
while (ctx^.i <= ctx^.c) and MoveToNextChar() do begin
|
||
ok := False;
|
||
if (ctx^.pp = nil) then begin // Start Matching
|
||
ok := StartMatch();
|
||
end else begin
|
||
pp1 := FindChild(ctx^.cp,ctx^.pp);
|
||
if (pp1 <> nil) then begin
|
||
Inc(ctx^.ppLevel);
|
||
ctx^.pp := pp1;
|
||
if (ctx^.pp^.ChildCount = 0) or (ctx^.i = ctx^.c) then begin
|
||
ok := False;
|
||
if ctx^.pp^.IsValid() and (ctx^.suppressState.CharCount = 0) then begin
|
||
if (ctx^.pp^.WeightLength > 0) then begin
|
||
AddWeightsAndClear();
|
||
ok := True;
|
||
end else
|
||
if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
|
||
ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
|
||
(ctxNode^.Data.WeightCount > 0)
|
||
then begin
|
||
AddContextWeights(@ctxNode^.Data);
|
||
ClearHistory();
|
||
ClearPP(ctx);
|
||
ok := True;
|
||
end
|
||
end;
|
||
if not ok then begin
|
||
RecordDeletion();
|
||
while HasHistory() do begin
|
||
GoBack();
|
||
if ctx^.pp^.IsValid() and
|
||
( ( (ctx^.cl = ctx^.suppressState.cl) and (ctx^.ppLevel <> ctx^.suppressState.CharCount) ) or
|
||
( (ctx^.cl <> ctx^.suppressState.cl) and (ctx^.ppLevel < ctx^.suppressState.CharCount) )
|
||
)
|
||
then begin
|
||
AddWeightsAndClear();
|
||
ok := True;
|
||
Break;
|
||
end;
|
||
end;
|
||
if not ok then begin
|
||
cltemp := ctx^.cl^.Base;
|
||
if (cltemp <> nil) then begin
|
||
ClearPP(ctx,False);
|
||
ctx^.cl := cltemp;
|
||
Continue;
|
||
end;
|
||
end;
|
||
|
||
if not ok then begin
|
||
AddComputedWeights(ctx^.cp);
|
||
ClearHistory();
|
||
ClearPP(ctx);
|
||
ok := True;
|
||
end;
|
||
end;
|
||
end else begin
|
||
RecordStep();
|
||
end;
|
||
end else begin
|
||
// permutations !
|
||
ok := False;
|
||
if TryPermutation() and ctx^.pp^.IsValid() then begin
|
||
if (ctx^.suppressState.CharCount = 0) then begin
|
||
AddWeightsAndClear();
|
||
//ok := True;
|
||
exit(True);// Continue;
|
||
end;
|
||
while True do begin
|
||
if ctx^.pp^.IsValid() and
|
||
(ctx^.pp^.WeightLength > 0) and
|
||
( ( (ctx^.cl = ctx^.suppressState.cl) and (ctx^.ppLevel <> ctx^.suppressState.CharCount) ) or
|
||
( (ctx^.cl <> ctx^.suppressState.cl) and (ctx^.ppLevel < ctx^.suppressState.CharCount) )
|
||
)
|
||
then begin
|
||
AddWeightsAndClear();
|
||
ok := True;
|
||
break;
|
||
end;
|
||
if not HasHistory() then
|
||
break;
|
||
GoBack();
|
||
if (ctx^.pp = nil) then
|
||
break;
|
||
end;
|
||
end;
|
||
if not ok then begin
|
||
if ctx^.pp^.IsValid() and (ctx^.suppressState.CharCount = 0) then begin
|
||
if (ctx^.pp^.WeightLength > 0) then begin
|
||
AddWeightsAndClear();
|
||
ok := True;
|
||
end else
|
||
if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
|
||
ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
|
||
(ctxNode^.Data.WeightCount > 0)
|
||
then begin
|
||
AddContextWeights(@ctxNode^.Data);
|
||
ClearHistory();
|
||
ClearPP(ctx);
|
||
ok := True;
|
||
end
|
||
end;
|
||
if ok then
|
||
exit(True);// Continue;
|
||
end;
|
||
if not ok then begin
|
||
if (ctx^.cl^.Base <> nil) then begin
|
||
cltemp := ctx^.cl^.Base;
|
||
while HasHistory() do
|
||
GoBack();
|
||
ctx^.pp := nil;
|
||
ctx^.ppLevel := 0;
|
||
ctx^.cl := cltemp;
|
||
Continue;
|
||
end;
|
||
|
||
//walk back
|
||
ok := False;
|
||
while HasHistory() do begin
|
||
GoBack();
|
||
if ctx^.pp^.IsValid() and
|
||
(ctx^.pp^.WeightLength > 0) and
|
||
( (ctx^.suppressState.CharCount = 0) or
|
||
( ( (ctx^.cl = ctx^.suppressState.cl) and (ctx^.ppLevel <> ctx^.suppressState.CharCount) ) or
|
||
( (ctx^.cl <> ctx^.suppressState.cl) and (ctx^.ppLevel < ctx^.suppressState.CharCount) )
|
||
)
|
||
)
|
||
then begin
|
||
AddWeightsAndClear();
|
||
ok := True;
|
||
Break;
|
||
end;
|
||
end;
|
||
if ok then begin
|
||
AdvanceCharPos();
|
||
exit(True);// Continue;
|
||
end;
|
||
if (ctx^.pp <> nil) then begin
|
||
AddComputedWeights(ctx^.cp);
|
||
ClearHistory();
|
||
ClearPP(ctx);
|
||
ok := True;
|
||
end;
|
||
end;
|
||
end;
|
||
end;
|
||
if ctx^.surrogateState then begin
|
||
Inc(ctx^.ps);
|
||
Inc(ctx^.i);
|
||
end;
|
||
//
|
||
Inc_I();
|
||
if ok then
|
||
exit(True);
|
||
end;
|
||
SetLength(ctx^.r,ctx^.ral);
|
||
ctx^.Finished := True;
|
||
Result := True;
|
||
end;
|
||
|
||
function ComputeSortKey(
|
||
const AStr : PUnicodeChar;
|
||
const ALength : SizeInt;
|
||
const ACollation : PUCA_DataBook
|
||
) : TUCASortKey;
|
||
var
|
||
r : TUCA_PropWeightsArray;
|
||
begin
|
||
r := ComputeRawSortKey(AStr,ALength,ACollation);
|
||
Result := FormKey(r,ACollation);
|
||
end;
|
||
|
||
end.
|