mirror of
				https://gitlab.com/freepascal.org/fpc/source.git
				synced 2025-10-31 11:31:39 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			3502 lines
		
	
	
		
			98 KiB
		
	
	
	
		
			ObjectPascal
		
	
	
	
	
	
			
		
		
	
	
			3502 lines
		
	
	
		
			98 KiB
		
	
	
	
		
			ObjectPascal
		
	
	
	
	
	
| {   Unicode tables unit.
 | ||
| 
 | ||
|     Copyright (c) 2013 by Inoussa OUEDRAOGO
 | ||
| 
 | ||
|     The source code is distributed under the Library GNU
 | ||
|     General Public License with the following modification:
 | ||
| 
 | ||
|         - object files and libraries linked into an application may be
 | ||
|           distributed without source code.
 | ||
| 
 | ||
|     If you didn't receive a copy of the file COPYING, contact:
 | ||
|           Free Software Foundation
 | ||
|           675 Mass Ave
 | ||
|           Cambridge, MA  02139
 | ||
|           USA
 | ||
| 
 | ||
|     This program is distributed in the hope that it will be useful,
 | ||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of
 | ||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 | ||
| 
 | ||
| -------------------------------------------------------------------------------
 | ||
| 
 | ||
|   Overview of the Unicode Collation Algorithm(UCA) data layout :
 | ||
|   ============================================================
 | ||
| 
 | ||
|     The UCA data(see “TUCA_DataBook”) are organized into index data
 | ||
|     (see the “TUCA_DataBook” fields “BMP_Table1”, “BMP_Table2”,
 | ||
|     “OBMP_Table1” and “OBMP_Table2”) and actual properties data(see
 | ||
|     the “Props” field of  “TUCA_DataBook”). The index is a 3 level
 | ||
|     tables designed to minimize the overhaul data size. The
 | ||
|     properties’ data contain the actual (used) UCA’s properties
 | ||
|     for the customized code points(or sequence of code points)
 | ||
|     data (see TUCA_PropItemRec).
 | ||
|     To get the properties’ record of a code point, one goes
 | ||
|     through the index data to get its offset into the “Props”
 | ||
|     serialized data, see the “GetPropUCA” procedure.
 | ||
|     The “TUCA_PropItemRec” record, that represents the actual
 | ||
|     properties, contains a fixed part and a variable part. The
 | ||
|     fixed part is directly expressed as fields of the record :
 | ||
|       “WeightLength”, “ChildCount”, “Size”, “Flags”. The
 | ||
|     variable part depends on some values of the fixed part; For
 | ||
|     example “WeightLength” specify the number of weight[1] item,
 | ||
|     it can be zero or not null; The “Flags” fields does contains
 | ||
|     some bit states to indicate for example if the record’s owner,
 | ||
|     that is the target code point, is present(it is not always
 | ||
|     necessary to store the code point as you are required to have
 | ||
|     this information in the first place in order to get the
 | ||
|     “TUCA_PropItemRec” record).
 | ||
| 
 | ||
|     The data, as it is organized now, is as follow for each code point :
 | ||
|       * the fixed part is serialized,
 | ||
|       * if there are weight item array, they are serialized
 | ||
|           (see the "WeigthLength")
 | ||
|       * the code point is serialized (if needed)
 | ||
|       * the context[2] array is serialized
 | ||
|       * The children[3] record are serialized.
 | ||
| 
 | ||
|     The “Size” represent the size of the whole record, including its
 | ||
|     children records(see [3]). The “GetSelfOnlySize” returns the size
 | ||
|     of the queried record, excluding the size of its children.
 | ||
| 
 | ||
| 
 | ||
|     Notes :
 | ||
| 
 | ||
|     [1] : A weight item is an array of 3 words. A code point/sequence of code
 | ||
|           point may have zero or multiple items.
 | ||
| 
 | ||
|     [2] :  There are characters(mostly japanese ones) that do not have their
 | ||
|            own weighs; There inherit the weights of the preceding character
 | ||
|            in the string that you will be evaluating.
 | ||
|     [3] :  Some unicode characters are expressed using more than one code point.
 | ||
|            In that case the properties records are serialized as a trie. The
 | ||
|            trie data structure is useful when many characters’ expression have
 | ||
|            the same starting code point(s).
 | ||
| 
 | ||
|     [4] TUCA_PropItemRec serialization :
 | ||
|             TUCA_PropItemRec :
 | ||
|               WeightLength, ChildCount, Size, Flags [weight item array]
 | ||
|     [Code Point] [Context data]
 | ||
|               [Child 0] [Child 1] .. [Child n]
 | ||
| 
 | ||
|         each [Child k] is a TUCA_PropItemRec.
 | ||
| }
 | ||
| 
 | ||
| unit unicodedata;
 | ||
| {$mode delphi}
 | ||
| {$H+}
 | ||
| {$PACKENUM 1}
 | ||
| {$SCOPEDENUMS ON}
 | ||
| {$pointermath on}
 | ||
| {$define USE_INLINE}
 | ||
| {$warn 4056 off}  //Conversion between ordinals and pointers is not portable
 | ||
| { $define uni_debug}
 | ||
| 
 | ||
| interface
 | ||
| 
 | ||
| const
 | ||
|   MAX_WORD = High(Word);
 | ||
|   LOW_SURROGATE_BEGIN  = Word($DC00);
 | ||
|   LOW_SURROGATE_END    = Word($DFFF);
 | ||
| 
 | ||
|   HIGH_SURROGATE_BEGIN = Word($D800);
 | ||
|   HIGH_SURROGATE_END   = Word($DBFF);
 | ||
|   HIGH_SURROGATE_COUNT = HIGH_SURROGATE_END - HIGH_SURROGATE_BEGIN + 1;
 | ||
|   UCS4_HALF_BASE       = LongWord($10000);
 | ||
|   UCS4_HALF_MASK       = Word($3FF);
 | ||
|   MAX_LEGAL_UTF32      = $10FFFF;
 | ||
| 
 | ||
| const
 | ||
|     // Unicode General Category
 | ||
|     UGC_UppercaseLetter         = 0;
 | ||
|     UGC_LowercaseLetter         = 1;
 | ||
|     UGC_TitlecaseLetter         = 2;
 | ||
|     UGC_ModifierLetter          = 3;
 | ||
|     UGC_OtherLetter             = 4;
 | ||
| 
 | ||
|     UGC_NonSpacingMark          = 5;
 | ||
|     UGC_CombiningMark           = 6;
 | ||
|     UGC_EnclosingMark           = 7;
 | ||
| 
 | ||
|     UGC_DecimalNumber           = 8;
 | ||
|     UGC_LetterNumber            = 9;
 | ||
|     UGC_OtherNumber             = 10;
 | ||
| 
 | ||
|     UGC_ConnectPunctuation      = 11;
 | ||
|     UGC_DashPunctuation         = 12;
 | ||
|     UGC_OpenPunctuation         = 13;
 | ||
|     UGC_ClosePunctuation        = 14;
 | ||
|     UGC_InitialPunctuation      = 15;
 | ||
|     UGC_FinalPunctuation        = 16;
 | ||
|     UGC_OtherPunctuation        = 17;
 | ||
| 
 | ||
|     UGC_MathSymbol              = 18;
 | ||
|     UGC_CurrencySymbol          = 19;
 | ||
|     UGC_ModifierSymbol          = 20;
 | ||
|     UGC_OtherSymbol             = 21;
 | ||
| 
 | ||
|     UGC_SpaceSeparator          = 22;
 | ||
|     UGC_LineSeparator           = 23;
 | ||
|     UGC_ParagraphSeparator      = 24;
 | ||
| 
 | ||
|     UGC_Control                 = 25;
 | ||
|     UGC_Format                  = 26;
 | ||
|     UGC_Surrogate               = 27;
 | ||
|     UGC_PrivateUse              = 28;
 | ||
|     UGC_Unassigned              = 29;
 | ||
| 
 | ||
| type
 | ||
| 
 | ||
|   TUInt24Rec = packed record
 | ||
|   public
 | ||
|   {$ifdef FPC_LITTLE_ENDIAN}
 | ||
|     byte0, byte1, byte2 : Byte;
 | ||
|   {$else FPC_LITTLE_ENDIAN}
 | ||
|     byte2, byte1, byte0 : Byte;
 | ||
|   {$endif FPC_LITTLE_ENDIAN}
 | ||
|   public
 | ||
|     class operator Implicit(a : TUInt24Rec) : Cardinal;{$ifdef USE_INLINE}inline;{$ENDIF}
 | ||
|     class operator Implicit(a : TUInt24Rec) : LongInt;{$ifdef USE_INLINE}inline;{$ENDIF}
 | ||
|     class operator Implicit(a : TUInt24Rec) : Word;{$ifdef USE_INLINE}inline;{$ENDIF}
 | ||
|     class operator Implicit(a : TUInt24Rec) : Byte;{$ifdef USE_INLINE}inline;{$ENDIF}
 | ||
|     class operator Implicit(a : Cardinal) : TUInt24Rec;{$ifdef USE_INLINE}inline;{$ENDIF}
 | ||
|     class operator Equal(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
 | ||
| 
 | ||
|     class operator Equal(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
 | ||
|     class operator Equal(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
 | ||
| 
 | ||
|     class operator Equal(a : TUInt24Rec; b : LongInt): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
 | ||
|     class operator Equal(a : LongInt; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
 | ||
| 
 | ||
|     class operator Equal(a : TUInt24Rec; b : Word): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
 | ||
|     class operator Equal(a : Word; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
 | ||
| 
 | ||
|     class operator Equal(a : TUInt24Rec; b : Byte): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
 | ||
|     class operator Equal(a : Byte; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
 | ||
| 
 | ||
|     class operator NotEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
 | ||
|     class operator NotEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
 | ||
|     class operator NotEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
 | ||
|     class operator GreaterThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
 | ||
|     class operator GreaterThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
 | ||
|     class operator GreaterThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
 | ||
|     class operator GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
 | ||
|     class operator GreaterThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
 | ||
|     class operator GreaterThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
 | ||
|     class operator LessThan(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
 | ||
|     class operator LessThan(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
 | ||
|     class operator LessThan(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
 | ||
|     class operator LessThanOrEqual(a, b: TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
 | ||
|     class operator LessThanOrEqual(a : TUInt24Rec; b : Cardinal): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
 | ||
|     class operator LessThanOrEqual(a : Cardinal; b : TUInt24Rec): Boolean;{$ifdef USE_INLINE}inline;{$ENDIF}
 | ||
|   end;
 | ||
| 
 | ||
|   UInt24 = TUInt24Rec;
 | ||
|   PUInt24 = ^UInt24;
 | ||
| 
 | ||
| const
 | ||
|   ZERO_UINT24 : UInt24 =
 | ||
|   {$ifdef FPC_LITTLE_ENDIAN}
 | ||
|     (byte0 : 0; byte1 : 0; byte2 : 0;);
 | ||
|   {$else FPC_LITTLE_ENDIAN}
 | ||
|     (byte2 : 0; byte1 : 0; byte0 : 0;);
 | ||
|   {$endif FPC_LITTLE_ENDIAN}
 | ||
| 
 | ||
| type
 | ||
|   PUC_Prop = ^TUC_Prop;
 | ||
| 
 | ||
|   { TUC_Prop }
 | ||
| 
 | ||
|   { On alignment-sensitive targets, at least some of them, assembler uses to forcibly align data >1 byte.
 | ||
|     This breaks intended layout of initialized constants/variables.
 | ||
|     A proper solution is to patch compiler to emit always unaligned directives for words/dwords/etc,
 | ||
|     but for now just declare this record as "unpacked". This causes bloat, but it's better than having
 | ||
|     entire unit not working at all. }
 | ||
|   TUC_Prop = {$ifndef FPC_REQUIRES_PROPER_ALIGNMENT}packed{$endif} record
 | ||
|   private
 | ||
|     function GetCategory : Byte;inline;
 | ||
|     procedure SetCategory(AValue : Byte);
 | ||
|     function GetWhiteSpace : Boolean;inline;
 | ||
|     procedure SetWhiteSpace(AValue : Boolean);
 | ||
|     function GetHangulSyllable : Boolean;inline;
 | ||
|     procedure SetHangulSyllable(AValue : Boolean);
 | ||
|     function GetNumericValue: Double;inline;
 | ||
|   public
 | ||
|     CategoryData    : Byte;
 | ||
|   public
 | ||
|     CCC             : Byte;
 | ||
|     NumericIndex    : Byte;
 | ||
|     SimpleUpperCase : UInt24;
 | ||
|     SimpleLowerCase : UInt24;
 | ||
|     DecompositionID : SmallInt;
 | ||
|   public
 | ||
|     property Category : Byte read GetCategory write SetCategory;
 | ||
|     property WhiteSpace : Boolean read GetWhiteSpace write SetWhiteSpace;
 | ||
|     property HangulSyllable : Boolean read GetHangulSyllable write SetHangulSyllable;
 | ||
|     property NumericValue : Double read GetNumericValue;
 | ||
|   end;
 | ||
| 
 | ||
| type
 | ||
|   TUCA_PropWeights = packed record
 | ||
|     Weights  : array[0..2] of Word;
 | ||
|   end;
 | ||
|   PUCA_PropWeights = ^TUCA_PropWeights;
 | ||
| 
 | ||
| 
 | ||
|   TUCA_PropItemContextRec = packed record
 | ||
|   public
 | ||
|     CodePointCount : Byte;
 | ||
|     WeightCount    : Byte;
 | ||
|   public
 | ||
|     function GetCodePoints() : PUInt24;inline;
 | ||
|     function GetWeights() : PUCA_PropWeights;inline;
 | ||
|   end;
 | ||
|   PUCA_PropItemContextRec = ^TUCA_PropItemContextRec;
 | ||
| 
 | ||
|   PUCA_PropItemContextTreeNodeRec = ^TUCA_PropItemContextTreeNodeRec;
 | ||
|   TUCA_PropItemContextTreeNodeRec = packed record
 | ||
|   public
 | ||
|     Left    : Word;
 | ||
|     Right   : Word;
 | ||
|     Data    : TUCA_PropItemContextRec;
 | ||
|   public
 | ||
|     function GetLeftNode() : PUCA_PropItemContextTreeNodeRec;inline;
 | ||
|     function GetRightNode() : PUCA_PropItemContextTreeNodeRec;inline;
 | ||
|   end;
 | ||
| 
 | ||
| 
 | ||
|   { TUCA_PropItemContextTreeRec }
 | ||
| 
 | ||
|   TUCA_PropItemContextTreeRec = packed record
 | ||
|   public
 | ||
|     Size : UInt24;
 | ||
|   public
 | ||
|     function GetData:PUCA_PropItemContextTreeNodeRec;inline;
 | ||
|     property Data : PUCA_PropItemContextTreeNodeRec read GetData;
 | ||
| 
 | ||
|     function Find(
 | ||
|       const AChars     : PUInt24;
 | ||
|       const ACharCount : Integer;
 | ||
|       out   ANode      : PUCA_PropItemContextTreeNodeRec
 | ||
|     ) : Boolean;
 | ||
|   end;
 | ||
|   PUCA_PropItemContextTreeRec = ^TUCA_PropItemContextTreeRec;
 | ||
| 
 | ||
|   { TUCA_PropItemRec }
 | ||
| 
 | ||
|   TUCA_PropItemRec = packed record
 | ||
|   private
 | ||
|     const FLAG_VALID      = 0;
 | ||
|     const FLAG_CODEPOINT  = 1;
 | ||
|     const FLAG_CONTEXTUAL = 2;
 | ||
|     const FLAG_DELETION   = 3;
 | ||
|     const FLAG_COMPRESS_WEIGHT_1 = 6;
 | ||
|     const FLAG_COMPRESS_WEIGHT_2 = 7;
 | ||
|   private
 | ||
|     function GetCodePoint() : UInt24;inline;
 | ||
|   public
 | ||
|     WeightLength : Byte;
 | ||
|     ChildCount   : Byte;
 | ||
|     Size         : Word;
 | ||
|     Flags        : Byte;
 | ||
|   public
 | ||
|     function HasCodePoint() : Boolean;inline;
 | ||
|     property CodePoint : UInt24 read GetCodePoint;
 | ||
|     //Weights    : array[0..WeightLength] of TUCA_PropWeights;
 | ||
| 
 | ||
|     function IsValid() : Boolean;inline;
 | ||
|     //function GetWeightArray() : PUCA_PropWeights;inline;
 | ||
|     procedure GetWeightArray(ADest : PUCA_PropWeights);
 | ||
|     function GetSelfOnlySize() : Cardinal;inline;
 | ||
| 
 | ||
|     function GetContextual() : Boolean;inline;
 | ||
|     property Contextual : Boolean read GetContextual;
 | ||
|     function GetContext() : PUCA_PropItemContextTreeRec;
 | ||
| 
 | ||
|     function IsDeleted() : Boolean;inline;
 | ||
|     function IsWeightCompress_1() : Boolean;inline;
 | ||
|     function IsWeightCompress_2() : Boolean;inline;
 | ||
|   end;
 | ||
|   PUCA_PropItemRec = ^TUCA_PropItemRec;
 | ||
| 
 | ||
|   TUCA_VariableKind = (
 | ||
|     ucaShifted, ucaNonIgnorable, ucaBlanked, ucaShiftedTrimmed,
 | ||
|     ucaIgnoreSP // This one is not implemented !
 | ||
|   );
 | ||
| 
 | ||
|   TCollationName = string[128];
 | ||
| 
 | ||
|   PUCA_DataBook = ^TUCA_DataBook;
 | ||
|   TUCA_DataBook = record
 | ||
|   public
 | ||
|     Base               : PUCA_DataBook;
 | ||
|     Version            : TCollationName;
 | ||
|     CollationName      : TCollationName;
 | ||
|     VariableWeight     : TUCA_VariableKind;
 | ||
|     Backwards          : array[0..3] of Boolean;
 | ||
|     BMP_Table1         : PByte;
 | ||
|     BMP_Table2         : PUInt24;
 | ||
|     OBMP_Table1        : PWord;
 | ||
|     OBMP_Table2        : PUInt24;
 | ||
|     PropCount          : Integer;
 | ||
|     Props              : PUCA_PropItemRec;
 | ||
|     VariableLowLimit   : Word;
 | ||
|     VariableHighLimit  : Word;
 | ||
|     Dynamic            : Boolean;
 | ||
|   public
 | ||
|     function IsVariable(const AWeight : PUCA_PropWeights) : Boolean; inline;
 | ||
|   end;
 | ||
| 
 | ||
|   TCollationField = (BackWard, VariableLowLimit, VariableHighLimit);
 | ||
|   TCollationFields = set of TCollationField;
 | ||
| 
 | ||
| const
 | ||
|   ROOT_COLLATION_NAME = 'DUCET';
 | ||
|   ERROR_INVALID_CODEPOINT_SEQUENCE = 1;
 | ||
| 
 | ||
|   procedure FromUCS4(const AValue : UCS4Char; var AHighS, ALowS : UnicodeChar);inline;
 | ||
|   function ToUCS4(const AHighS, ALowS : UnicodeChar) : UCS4Char;inline;
 | ||
|   function UnicodeIsSurrogatePair(
 | ||
|     const AHighSurrogate,
 | ||
|           ALowSurrogate   : UnicodeChar
 | ||
|   ) : Boolean;inline;
 | ||
|   function UnicodeIsHighSurrogate(const AValue : UnicodeChar) : Boolean;inline;
 | ||
|   function UnicodeIsLowSurrogate(const AValue : UnicodeChar) : Boolean;inline;
 | ||
|   function UnicodeToUpper(
 | ||
|     const AString                : UnicodeString;
 | ||
|     const AIgnoreInvalidSequence : Boolean;
 | ||
|     out   AResultString          : UnicodeString
 | ||
|   ) : Integer;
 | ||
|   function UnicodeToLower(
 | ||
|     const AString                : UnicodeString;
 | ||
|     const AIgnoreInvalidSequence : Boolean;
 | ||
|     out   AResultString          : UnicodeString
 | ||
|   ) : Integer;
 | ||
| 
 | ||
|   function GetProps(const ACodePoint : Word) : PUC_Prop;overload;inline;
 | ||
|   function GetProps(const AHighS, ALowS : UnicodeChar): PUC_Prop;overload;inline;
 | ||
|   function GetProps(const ACodePoint : Cardinal) : PUC_Prop;overload;inline;
 | ||
|   function GetPropUCA(const AHighS, ALowS : UnicodeChar; const ABook : PUCA_DataBook): PUCA_PropItemRec; inline; overload;
 | ||
|   function GetPropUCA(const AChar : UnicodeChar; const ABook : PUCA_DataBook) : PUCA_PropItemRec; inline; overload;
 | ||
| 
 | ||
| 
 | ||
|   function NormalizeNFD(const AString : UnicodeString) : UnicodeString;inline;overload;
 | ||
|   function NormalizeNFD(const AStr : PUnicodeChar; ALength : SizeInt) : UnicodeString;overload;
 | ||
|   function DecomposeCanonical(const AString : UnicodeString) : UnicodeString;inline;overload;
 | ||
|   function DecomposeCanonical(const AStr : PUnicodeChar; ALength : SizeInt) : UnicodeString;overload;
 | ||
|   procedure CanonicalOrder(var AString : UnicodeString);inline;overload;
 | ||
|   procedure CanonicalOrder(AStr : PUnicodeChar; const ALength : SizeInt);overload;
 | ||
| 
 | ||
| type
 | ||
|   TUCASortKeyItem = Word;
 | ||
|   TUCASortKey = array of TUCASortKeyItem;
 | ||
|   function ComputeSortKey(
 | ||
|     const AString    : UnicodeString;
 | ||
|     const ACollation : PUCA_DataBook
 | ||
|   ) : TUCASortKey;inline;overload;
 | ||
|   function ComputeSortKey(
 | ||
|     const AStr       : PUnicodeChar;
 | ||
|     const ALength    : SizeInt;
 | ||
|     const ACollation : PUCA_DataBook
 | ||
|   ) : TUCASortKey;overload;
 | ||
|   function CompareSortKey(const A, B : TUCASortKey) : Integer;overload;
 | ||
|   function CompareSortKey(const A : TUCASortKey; const B : array of Word) : Integer;overload;
 | ||
|   function IncrementalCompareString(
 | ||
|     const AStrA      : PUnicodeChar;
 | ||
|     const ALengthA   : SizeInt;
 | ||
|     const AStrB      : PUnicodeChar;
 | ||
|     const ALengthB   : SizeInt;
 | ||
|     const ACollation : PUCA_DataBook
 | ||
|   ) : Integer;overload;
 | ||
|   function IncrementalCompareString(
 | ||
|     const AStrA,
 | ||
|           AStrB      : UnicodeString;
 | ||
|     const ACollation : PUCA_DataBook
 | ||
|   ) : Integer;inline;overload;
 | ||
| 
 | ||
|   function RegisterCollation(const ACollation : PUCA_DataBook) : Boolean;overload;
 | ||
|   function RegisterCollation(
 | ||
|     const ADirectory,
 | ||
|           ALanguage : string
 | ||
|   ) : Boolean;overload;
 | ||
|   function UnregisterCollation(const AName : ansistring): Boolean;
 | ||
|   procedure UnregisterCollations(const AFreeDynamicCollations : Boolean);
 | ||
|   function FindCollation(const AName : ansistring): PUCA_DataBook;overload;
 | ||
|   function FindCollation(const AIndex : Integer): PUCA_DataBook;overload;
 | ||
|   function GetCollationCount() : Integer;
 | ||
|   procedure PrepareCollation(
 | ||
|           ACollation     : PUCA_DataBook;
 | ||
|     const ABaseName      : ansistring;
 | ||
|     const AChangedFields : TCollationFields
 | ||
|   );
 | ||
|   function LoadCollation(
 | ||
|     const AData       : Pointer;
 | ||
|     const ADataLength : Integer
 | ||
|   ) : PUCA_DataBook;overload;
 | ||
|   function LoadCollation(const AFileName : string) : PUCA_DataBook;overload;
 | ||
|   function LoadCollation(
 | ||
|     const ADirectory,
 | ||
|           ALanguage : string
 | ||
|   ) : PUCA_DataBook;overload;
 | ||
|   procedure FreeCollation(AItem : PUCA_DataBook);
 | ||
| 
 | ||
| type
 | ||
|   TEndianKind = (Little, Big);
 | ||
| const
 | ||
|   ENDIAN_SUFFIX : array[TEndianKind] of string[2] = ('le','be');
 | ||
| {$IFDEF ENDIAN_LITTLE}
 | ||
|   ENDIAN_NATIVE     = TEndianKind.Little;
 | ||
|   ENDIAN_NON_NATIVE = TEndianKind.Big;
 | ||
| {$ENDIF ENDIAN_LITTLE}
 | ||
| {$IFDEF ENDIAN_BIG}
 | ||
|   ENDIAN_NATIVE = TEndianKind.Big;
 | ||
|   ENDIAN_NON_NATIVE = TEndianKind.Little;
 | ||
| {$ENDIF ENDIAN_BIG}
 | ||
| 
 | ||
| resourcestring
 | ||
|   SCollationNotFound = 'Collation not found : "%s".';
 | ||
| 
 | ||
| implementation
 | ||
| uses
 | ||
|   unicodenumtable;
 | ||
| 
 | ||
| type
 | ||
| 
 | ||
|   TCardinalRec = packed record
 | ||
|   {$ifdef FPC_LITTLE_ENDIAN}
 | ||
|     byte0, byte1, byte2, byte3 : Byte;
 | ||
|   {$else FPC_LITTLE_ENDIAN}
 | ||
|     byte3, byte2, byte1, byte0 : Byte;
 | ||
|   {$endif FPC_LITTLE_ENDIAN}
 | ||
|   end;
 | ||
| 
 | ||
|   TWordRec = packed record
 | ||
|   {$ifdef FPC_LITTLE_ENDIAN}
 | ||
|     byte0, byte1 : Byte;
 | ||
|   {$else FPC_LITTLE_ENDIAN}
 | ||
|     byte1, byte0 : Byte;
 | ||
|   {$endif FPC_LITTLE_ENDIAN}
 | ||
|   end;
 | ||
| 
 | ||
| { TUInt24Rec }
 | ||
| 
 | ||
| class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Cardinal;
 | ||
| begin
 | ||
|   TCardinalRec(Result).byte0 := a.byte0;
 | ||
|   TCardinalRec(Result).byte1 := a.byte1;
 | ||
|   TCardinalRec(Result).byte2 := a.byte2;
 | ||
|   TCardinalRec(Result).byte3 := 0;
 | ||
| end;
 | ||
| 
 | ||
| class operator TUInt24Rec.Implicit(a : TUInt24Rec) : LongInt;
 | ||
| begin
 | ||
|   Result := Cardinal(a);
 | ||
| end;
 | ||
| 
 | ||
| class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Word;
 | ||
| begin
 | ||
| {$IFOPT R+}
 | ||
|   if (a > $FFFF) then
 | ||
|     Error(reIntOverflow);
 | ||
| {$ENDIF R+}
 | ||
|   TWordRec(Result).byte0 := a.byte0;
 | ||
|   TWordRec(Result).byte1 := a.byte1;
 | ||
| end;
 | ||
| 
 | ||
| class operator TUInt24Rec.Implicit(a : TUInt24Rec) : Byte;
 | ||
| begin
 | ||
| {$IFOPT R+}
 | ||
|   if (a > $FF) then
 | ||
|     Error(reIntOverflow);
 | ||
| {$ENDIF R+}
 | ||
|   Result := a.byte0;
 | ||
| end;
 | ||
| 
 | ||
| class operator TUInt24Rec.Implicit(a : Cardinal) : TUInt24Rec;
 | ||
| begin
 | ||
| {$IFOPT R+}
 | ||
|   if (a > $FFFFFF) then
 | ||
|     Error(reIntOverflow);
 | ||
| {$ENDIF R+}
 | ||
|   Result.byte0 := TCardinalRec(a).byte0;
 | ||
|   Result.byte1 := TCardinalRec(a).byte1;
 | ||
|   Result.byte2 := TCardinalRec(a).byte2;
 | ||
| end;
 | ||
| 
 | ||
| class operator TUInt24Rec.Equal(a, b : TUInt24Rec) : Boolean;
 | ||
| begin
 | ||
|   Result := (a.byte0 = b.byte0) and (a.byte1 = b.byte1) and (a.byte2 = b.byte2);
 | ||
| end;
 | ||
| 
 | ||
| class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Cardinal) : Boolean;
 | ||
| begin
 | ||
|   Result := (TCardinalRec(b).byte3 = 0) and
 | ||
|             (a.byte0 = TCardinalRec(b).byte0) and
 | ||
|             (a.byte1 = TCardinalRec(b).byte1) and
 | ||
|             (a.byte2 = TCardinalRec(b).byte2);
 | ||
| end;
 | ||
| 
 | ||
| class operator TUInt24Rec.Equal(a : Cardinal; b : TUInt24Rec) : Boolean;
 | ||
| begin
 | ||
|   Result := (b = a);
 | ||
| end;
 | ||
| 
 | ||
| class operator TUInt24Rec.Equal(a : TUInt24Rec; b : LongInt) : Boolean;
 | ||
| begin
 | ||
|   Result := (LongInt(a) = b);
 | ||
| end;
 | ||
| 
 | ||
| class operator TUInt24Rec.Equal(a : LongInt; b : TUInt24Rec) : Boolean;
 | ||
| begin
 | ||
|   Result := (b = a);
 | ||
| end;
 | ||
| 
 | ||
| class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Word) : Boolean;
 | ||
| begin
 | ||
|   Result := (a.byte2 = 0) and
 | ||
|             (a.byte0 = TWordRec(b).byte0) and
 | ||
|             (a.byte1 = TWordRec(b).byte1);
 | ||
| end;
 | ||
| 
 | ||
| class operator TUInt24Rec.Equal(a : Word; b : TUInt24Rec) : Boolean;
 | ||
| begin
 | ||
|   Result := (b = a);
 | ||
| end;
 | ||
| 
 | ||
| class operator TUInt24Rec.Equal(a : TUInt24Rec; b : Byte) : Boolean;
 | ||
| begin
 | ||
|   Result := (a.byte2 = 0) and
 | ||
|             (a.byte1 = 0) and
 | ||
|             (a.byte0 = b);
 | ||
| end;
 | ||
| 
 | ||
| class operator TUInt24Rec.Equal(a : Byte; b : TUInt24Rec) : Boolean;
 | ||
| begin
 | ||
|   Result := (b = a);
 | ||
| end;
 | ||
| 
 | ||
| class operator TUInt24Rec.NotEqual(a, b : TUInt24Rec) : Boolean;
 | ||
| begin
 | ||
|   Result := (a.byte0 <> b.byte0) or (a.byte1 <> b.byte1) or (a.byte2 <> b.byte2);
 | ||
| end;
 | ||
| 
 | ||
| class operator TUInt24Rec.NotEqual(a : TUInt24Rec; b : Cardinal) : Boolean;
 | ||
| begin
 | ||
|   Result := (TCardinalRec(b).byte3 <> 0) or
 | ||
|             (a.byte0 <> TCardinalRec(b).byte0) or
 | ||
|             (a.byte1 <> TCardinalRec(b).byte1) or
 | ||
|             (a.byte2 <> TCardinalRec(b).byte2);
 | ||
| end;
 | ||
| 
 | ||
| class operator TUInt24Rec.NotEqual(a : Cardinal; b : TUInt24Rec) : Boolean;
 | ||
| begin
 | ||
|   Result := (b <> a);
 | ||
| end;
 | ||
| 
 | ||
| class operator TUInt24Rec.GreaterThan(a, b: TUInt24Rec): Boolean;
 | ||
| begin
 | ||
|   Result := (a.byte2 > b.byte2) or
 | ||
|             ((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
 | ||
|             ((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 > b.byte0));
 | ||
| end;
 | ||
| 
 | ||
| class operator TUInt24Rec.GreaterThan(a: TUInt24Rec; b: Cardinal): Boolean;
 | ||
| begin
 | ||
|   Result := Cardinal(a) > b;
 | ||
| end;
 | ||
| 
 | ||
| class operator TUInt24Rec.GreaterThan(a: Cardinal; b: TUInt24Rec): Boolean;
 | ||
| begin
 | ||
|   Result := a > Cardinal(b);
 | ||
| end;
 | ||
| 
 | ||
| class operator TUInt24Rec.GreaterThanOrEqual(a, b: TUInt24Rec): Boolean;
 | ||
| begin
 | ||
|   Result := (a.byte2 > b.byte2) or
 | ||
|             ((a.byte2 = b.byte2) and (a.byte1 > b.byte1)) or
 | ||
|             ((a.byte2 = b.byte2) and (a.byte1 = b.byte1) and (a.byte0 >= b.byte0));
 | ||
| end;
 | ||
| 
 | ||
| class operator TUInt24Rec.GreaterThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
 | ||
| begin
 | ||
|   Result := Cardinal(a) >= b;
 | ||
| end;
 | ||
| 
 | ||
| class operator TUInt24Rec.GreaterThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
 | ||
| begin
 | ||
|   Result := a >= Cardinal(b);
 | ||
| end;
 | ||
| 
 | ||
| class operator TUInt24Rec.LessThan(a, b: TUInt24Rec): Boolean;
 | ||
| begin
 | ||
|   Result := (b > a);
 | ||
| end;
 | ||
| 
 | ||
| class operator TUInt24Rec.LessThan(a: TUInt24Rec; b: Cardinal): Boolean;
 | ||
| begin
 | ||
|   Result := Cardinal(a) < b;
 | ||
| end;
 | ||
| 
 | ||
| class operator TUInt24Rec.LessThan(a: Cardinal; b: TUInt24Rec): Boolean;
 | ||
| begin
 | ||
|   Result := a < Cardinal(b);
 | ||
| end;
 | ||
| 
 | ||
| class operator TUInt24Rec.LessThanOrEqual(a, b: TUInt24Rec): Boolean;
 | ||
| begin
 | ||
|   Result := (b >= a);
 | ||
| end;
 | ||
| 
 | ||
| class operator TUInt24Rec.LessThanOrEqual(a: TUInt24Rec; b: Cardinal): Boolean;
 | ||
| begin
 | ||
|   Result := Cardinal(a) <= b;
 | ||
| end;
 | ||
| 
 | ||
| class operator TUInt24Rec.LessThanOrEqual(a: Cardinal; b: TUInt24Rec): Boolean;
 | ||
| begin
 | ||
|   Result := a <= Cardinal(b);
 | ||
| end;
 | ||
| 
 | ||
| type
 | ||
|   TBitOrder = 0..7;
 | ||
| function IsBitON(const AData : Byte; const ABit : TBitOrder) : Boolean ;inline;
 | ||
| begin
 | ||
|   Result := ( ( AData and ( 1 shl ABit ) ) <> 0 );
 | ||
| end;
 | ||
| 
 | ||
| procedure SetBit(var AData : Byte; const ABit : TBitOrder; const AValue : Boolean);inline;
 | ||
| begin
 | ||
|   if AValue then
 | ||
|     AData := AData or (1 shl (ABit mod 8))
 | ||
|   else
 | ||
|     AData := AData and ( not ( 1 shl ( ABit mod 8 ) ) );
 | ||
| end;
 | ||
| 
 | ||
| var
 | ||
|   CollationTable : array of PUCA_DataBook;
 | ||
| function IndexOfCollation(const AName : string) : Integer;
 | ||
| var
 | ||
|   i, c : Integer;
 | ||
|   p : Pointer;
 | ||
| begin
 | ||
|   c := Length(AName);
 | ||
|   p := @AName[1];
 | ||
|   for i := 0 to Length(CollationTable) - 1 do begin
 | ||
|     if (Length(CollationTable[i]^.CollationName) = c) and
 | ||
|        (CompareByte((CollationTable[i]^.CollationName[1]),p^,c)=0)
 | ||
|     then
 | ||
|       exit(i);
 | ||
|   end;
 | ||
|   Result := -1;
 | ||
| end;
 | ||
| 
 | ||
| function RegisterCollation(const ACollation : PUCA_DataBook) : Boolean;
 | ||
| var
 | ||
|   i : Integer;
 | ||
| begin
 | ||
|   Result := (IndexOfCollation(ACollation^.CollationName) = -1);
 | ||
|   if Result then begin
 | ||
|     i := Length(CollationTable);
 | ||
|     SetLength(CollationTable,(i+1));
 | ||
|     CollationTable[i] := ACollation;
 | ||
|   end;
 | ||
| end;
 | ||
| 
 | ||
| function RegisterCollation(const ADirectory, ALanguage : string) : Boolean;
 | ||
| var
 | ||
|   cl : PUCA_DataBook;
 | ||
| begin
 | ||
|   cl := LoadCollation(ADirectory,ALanguage);
 | ||
|   if (cl = nil) then
 | ||
|     exit(False);
 | ||
|   try
 | ||
|     Result := RegisterCollation(cl);
 | ||
|   except
 | ||
|     FreeCollation(cl);
 | ||
|     raise;
 | ||
|   end;
 | ||
|   if not Result then
 | ||
|     FreeCollation(cl);
 | ||
| end;
 | ||
| 
 | ||
| function UnregisterCollation(const AName : ansistring): Boolean;
 | ||
| var
 | ||
|   i, c : Integer;
 | ||
| begin
 | ||
|   i := IndexOfCollation(AName);
 | ||
|   Result := (i >= 0);
 | ||
|   if Result then begin
 | ||
|     c := Length(CollationTable);
 | ||
|     if (c = 1) then begin
 | ||
|       SetLength(CollationTable,0);
 | ||
|     end else begin
 | ||
|       CollationTable[i] := CollationTable[c-1];
 | ||
|       SetLength(CollationTable,(c-1));
 | ||
|     end;
 | ||
|   end;
 | ||
| end;
 | ||
| 
 | ||
| procedure UnregisterCollations(const AFreeDynamicCollations : Boolean);
 | ||
| var
 | ||
|   i : Integer;
 | ||
|   cl : PUCA_DataBook;
 | ||
| begin
 | ||
|   if AFreeDynamicCollations then begin
 | ||
|     for i := Low(CollationTable) to High(CollationTable) do begin
 | ||
|       if CollationTable[i].Dynamic then begin
 | ||
|         cl := CollationTable[i];
 | ||
|         CollationTable[i] := nil;
 | ||
|         FreeCollation(cl);
 | ||
|       end;
 | ||
|     end;
 | ||
|   end;
 | ||
|   SetLength(CollationTable,0);
 | ||
| end;
 | ||
| 
 | ||
| function FindCollation(const AName : ansistring): PUCA_DataBook;overload;
 | ||
| var
 | ||
|   i : Integer;
 | ||
| begin
 | ||
|   i := IndexOfCollation(AName);
 | ||
|   if (i = -1) then
 | ||
|     Result := nil
 | ||
|   else
 | ||
|     Result := CollationTable[i];
 | ||
| end;
 | ||
| 
 | ||
| function GetCollationCount() : Integer;
 | ||
| begin
 | ||
|   Result := Length(CollationTable);
 | ||
| end;
 | ||
| 
 | ||
| function FindCollation(const AIndex : Integer): PUCA_DataBook;overload;
 | ||
| begin
 | ||
|   if (AIndex < 0) or (AIndex >= Length(CollationTable)) then
 | ||
|     Result := nil
 | ||
|   else
 | ||
|     Result := CollationTable[AIndex];
 | ||
| end;
 | ||
| 
 | ||
| procedure PrepareCollation(
 | ||
|         ACollation     : PUCA_DataBook;
 | ||
|   const ABaseName      : ansistring;
 | ||
|   const AChangedFields : TCollationFields
 | ||
| );
 | ||
| var
 | ||
|   s : ansistring;
 | ||
|   p, base : PUCA_DataBook;
 | ||
| begin
 | ||
|   if (ABaseName <> '') then
 | ||
|     s := ABaseName
 | ||
|   else
 | ||
|     s := ROOT_COLLATION_NAME;
 | ||
|   p := ACollation;
 | ||
|   base := FindCollation(s);
 | ||
|   if (base = nil) then
 | ||
|     Error(reCodesetConversion);
 | ||
|   p^.Base := base;
 | ||
|   if not(TCollationField.BackWard in AChangedFields) then
 | ||
|     p^.Backwards := base^.Backwards;
 | ||
|   if not(TCollationField.VariableLowLimit in AChangedFields) then
 | ||
|     p^.VariableLowLimit := base^.VariableLowLimit;
 | ||
|   if not(TCollationField.VariableHighLimit in AChangedFields) then
 | ||
|     p^.VariableLowLimit := base^.VariableHighLimit;
 | ||
| end;
 | ||
| 
 | ||
| type
 | ||
|   TSerializedCollationHeader = packed record
 | ||
|     Base               : TCollationName;
 | ||
|     Version            : TCollationName;
 | ||
|     CollationName      : TCollationName;
 | ||
|     VariableWeight     : Byte;
 | ||
|     Backwards          : Byte;
 | ||
|     BMP_Table1Length   : DWord;
 | ||
|     BMP_Table2Length   : DWord;
 | ||
|     OBMP_Table1Length  : DWord;
 | ||
|     OBMP_Table2Length  : DWord;
 | ||
|     PropCount          : DWord;
 | ||
|     VariableLowLimit   : Word;
 | ||
|     VariableHighLimit  : Word;
 | ||
|     ChangedFields      : Byte;
 | ||
|   end;
 | ||
|   PSerializedCollationHeader = ^TSerializedCollationHeader;
 | ||
| 
 | ||
| procedure FreeCollation(AItem : PUCA_DataBook);
 | ||
| var
 | ||
|   h : PSerializedCollationHeader;
 | ||
| begin
 | ||
|   if (AItem = nil) or not(AItem^.Dynamic) then
 | ||
|     exit;
 | ||
|   h := PSerializedCollationHeader(PtrUInt(AItem) + SizeOf(TUCA_DataBook));
 | ||
|   if (AItem^.BMP_Table1 <> nil) then
 | ||
|     FreeMem(AItem^.BMP_Table1,h^.BMP_Table1Length);
 | ||
|   if (AItem^.BMP_Table2 <> nil) then
 | ||
|     FreeMem(AItem^.BMP_Table2,h^.BMP_Table2Length);
 | ||
|   if (AItem^.OBMP_Table1 <> nil) then
 | ||
|     FreeMem(AItem^.OBMP_Table1,h^.OBMP_Table1Length);
 | ||
|   if (AItem^.OBMP_Table2 <> nil) then
 | ||
|     FreeMem(AItem^.OBMP_Table2,h^.OBMP_Table2Length);
 | ||
|   if (AItem^.Props <> nil) then
 | ||
|     FreeMem(AItem^.Props,h^.PropCount);
 | ||
|   FreeMem(AItem,(SizeOf(TUCA_DataBook)+SizeOf(TSerializedCollationHeader)));
 | ||
| end;
 | ||
| 
 | ||
| function LoadCollation(
 | ||
|   const AData       : Pointer;
 | ||
|   const ADataLength : Integer
 | ||
| ) : PUCA_DataBook;
 | ||
| var
 | ||
|   dataPointer : PByte;
 | ||
|   readedLength : LongInt;
 | ||
| 
 | ||
|   function ReadBuffer(ADest : Pointer; ALength : LongInt) : Boolean;
 | ||
|   begin
 | ||
|     Result := (readedLength + ALength) <= ADataLength;
 | ||
|     if not result then
 | ||
|       exit;
 | ||
|     Move(dataPointer^,ADest^,ALength);
 | ||
|     Inc(dataPointer,ALength);
 | ||
|     readedLength := readedLength + ALength;
 | ||
|   end;
 | ||
| 
 | ||
| var
 | ||
|   r : PUCA_DataBook;
 | ||
|   h : PSerializedCollationHeader;
 | ||
|   cfs : TCollationFields;
 | ||
|   i : Integer;
 | ||
|   baseName : TCollationName;
 | ||
| begin
 | ||
|   readedLength := 0;
 | ||
|   dataPointer := AData;
 | ||
|   r := AllocMem((SizeOf(TUCA_DataBook)+SizeOf(TSerializedCollationHeader)));
 | ||
|   try
 | ||
|     h := PSerializedCollationHeader(PtrUInt(r) + SizeOf(TUCA_DataBook));
 | ||
|     if not ReadBuffer(h,SizeOf(TSerializedCollationHeader)) then
 | ||
|       exit;
 | ||
|     r^.Version := h^.Version;
 | ||
|     r^.CollationName := h^.CollationName;
 | ||
|     r^.VariableWeight := TUCA_VariableKind(h^.VariableWeight);
 | ||
|     r^.Backwards[0] := IsBitON(h^.Backwards,0);
 | ||
|     r^.Backwards[1] := IsBitON(h^.Backwards,1);
 | ||
|     r^.Backwards[2] := IsBitON(h^.Backwards,2);
 | ||
|     r^.Backwards[3] := IsBitON(h^.Backwards,3);
 | ||
|     if (h^.BMP_Table1Length > 0) then begin
 | ||
|       r^.BMP_Table1 := GetMem(h^.BMP_Table1Length);
 | ||
|         if not ReadBuffer(r^.BMP_Table1,h^.BMP_Table1Length) then
 | ||
|           exit;
 | ||
|     end;
 | ||
|     if (h^.BMP_Table2Length > 0) then begin
 | ||
|       r^.BMP_Table2 := GetMem(h^.BMP_Table2Length);
 | ||
|         if not ReadBuffer(r^.BMP_Table2,h^.BMP_Table2Length) then
 | ||
|           exit;
 | ||
|     end;
 | ||
|     if (h^.OBMP_Table1Length > 0) then begin
 | ||
|       r^.OBMP_Table1 := GetMem(h^.OBMP_Table1Length);
 | ||
|         if not ReadBuffer(r^.OBMP_Table1,h^.OBMP_Table1Length) then
 | ||
|           exit;
 | ||
|     end;
 | ||
|     if (h^.OBMP_Table2Length > 0) then begin
 | ||
|       r^.OBMP_Table2 := GetMem(h^.OBMP_Table2Length);
 | ||
|         if not ReadBuffer(r^.OBMP_Table2,h^.OBMP_Table2Length) then
 | ||
|           exit;
 | ||
|     end;
 | ||
|     r^.PropCount := h^.PropCount;
 | ||
|     if (h^.PropCount > 0) then begin
 | ||
|       r^.Props := GetMem(h^.PropCount);
 | ||
|         if not ReadBuffer(r^.Props,h^.PropCount) then
 | ||
|           exit;
 | ||
|     end;
 | ||
|     r^.VariableLowLimit := h^.VariableLowLimit;
 | ||
|     r^.VariableHighLimit := h^.VariableHighLimit;
 | ||
| 
 | ||
|     cfs := [];
 | ||
|     for i := Ord(Low(TCollationField)) to Ord(High(TCollationField)) do begin
 | ||
|       if IsBitON(h^.ChangedFields,i) then
 | ||
|         cfs := cfs + [TCollationField(i)];
 | ||
|     end;
 | ||
|     if (h^.Base <> '') then
 | ||
|       baseName := h^.Base
 | ||
|     else if (h^.CollationName <> ROOT_COLLATION_NAME) then
 | ||
|       baseName := ROOT_COLLATION_NAME
 | ||
|     else
 | ||
|       baseName := '';
 | ||
|     if (baseName <> '') then
 | ||
|       PrepareCollation(r,baseName,cfs);
 | ||
|     r^.Dynamic := True;
 | ||
|     Result := r;
 | ||
|   except
 | ||
|     FreeCollation(r);
 | ||
|     raise;
 | ||
|   end;
 | ||
| end;
 | ||
| 
 | ||
| {$PUSH}
 | ||
| function LoadCollation(const AFileName : string) : PUCA_DataBook;
 | ||
| const
 | ||
|   BLOCK_SIZE = 16*1024;
 | ||
| var
 | ||
|   f : File of Byte;
 | ||
|   locSize, locReaded, c : LongInt;
 | ||
|   locBuffer : PByte;
 | ||
|   locBlockSize : LongInt;
 | ||
| begin
 | ||
|   Result := nil;
 | ||
| {$I-}
 | ||
|   if (AFileName = '') then
 | ||
|     exit;
 | ||
|   Assign(f,AFileName);
 | ||
|   Reset(f);
 | ||
|   try
 | ||
|     if (IOResult <> 0) then
 | ||
|       exit;
 | ||
|     locSize := FileSize(f);
 | ||
|     if (locSize < SizeOf(TSerializedCollationHeader)) then
 | ||
|       exit;
 | ||
|     locBuffer := GetMem(locSize);
 | ||
|     try
 | ||
|       locBlockSize := BLOCK_SIZE;
 | ||
|       locReaded := 0;
 | ||
|       while (locReaded < locSize) do begin
 | ||
|         if (locBlockSize > (locSize-locReaded)) then
 | ||
|           locBlockSize := locSize-locReaded;
 | ||
|         BlockRead(f,locBuffer[locReaded],locBlockSize,c);
 | ||
|         if (IOResult <> 0) or (c <= 0) then
 | ||
|           exit;
 | ||
|         locReaded := locReaded + c;
 | ||
|       end;
 | ||
|       Result := LoadCollation(locBuffer,locSize);
 | ||
|     finally
 | ||
|       FreeMem(locBuffer,locSize);
 | ||
|     end;
 | ||
|   finally
 | ||
|     Close(f);
 | ||
|   end;
 | ||
| end;
 | ||
| {$POP}
 | ||
| 
 | ||
| function LoadCollation(const ADirectory, ALanguage : string) : PUCA_DataBook;
 | ||
| var
 | ||
|   fileName : string;
 | ||
| begin
 | ||
|   fileName := ADirectory;
 | ||
|   if (fileName <> '') then begin
 | ||
|     if (fileName[Length(fileName)] <> DirectorySeparator) then
 | ||
|       fileName := fileName + DirectorySeparator;
 | ||
|   end;
 | ||
|   fileName := fileName + 'collation_' + ALanguage + '_' + ENDIAN_SUFFIX[ENDIAN_NATIVE] + '.bco';
 | ||
|   Result := LoadCollation(fileName);
 | ||
| end;
 | ||
| 
 | ||
| {$INCLUDE unicodedata.inc}
 | ||
| {$IFDEF ENDIAN_LITTLE}
 | ||
|   {$INCLUDE unicodedata_le.inc}
 | ||
| {$ENDIF ENDIAN_LITTLE}
 | ||
| {$IFDEF ENDIAN_BIG}
 | ||
|   {$INCLUDE unicodedata_be.inc}
 | ||
| {$ENDIF ENDIAN_BIG}
 | ||
| 
 | ||
| procedure FromUCS4(const AValue : UCS4Char; var AHighS, ALowS : UnicodeChar);inline;
 | ||
| begin
 | ||
|   AHighS := UnicodeChar((AValue - $10000) shr 10 + $d800);
 | ||
|   ALowS := UnicodeChar((AValue - $10000) and $3ff + $dc00);
 | ||
| end;
 | ||
| 
 | ||
| function ToUCS4(const AHighS, ALowS : UnicodeChar) : UCS4Char;inline;
 | ||
| begin
 | ||
|   Result := (UCS4Char(Word(AHighS)) - HIGH_SURROGATE_BEGIN) shl 10 +
 | ||
|             (UCS4Char(Word(ALowS)) - LOW_SURROGATE_BEGIN) + UCS4_HALF_BASE;
 | ||
| end;
 | ||
| 
 | ||
| function UnicodeIsSurrogatePair(
 | ||
|   const AHighSurrogate,
 | ||
|         ALowSurrogate   : UnicodeChar
 | ||
| ) : Boolean;
 | ||
| begin
 | ||
|   Result :=
 | ||
|     ( (Word(AHighSurrogate) >= HIGH_SURROGATE_BEGIN) and
 | ||
|       (Word(AHighSurrogate) <= HIGH_SURROGATE_END)
 | ||
|     ) and
 | ||
|     ( (Word(ALowSurrogate) >= LOW_SURROGATE_BEGIN) and
 | ||
|       (Word(ALowSurrogate) <= LOW_SURROGATE_END)
 | ||
|     )
 | ||
| end;
 | ||
| 
 | ||
| function UnicodeIsHighSurrogate(const AValue : UnicodeChar) : Boolean;
 | ||
| begin
 | ||
|   Result := (Word(AValue) >= HIGH_SURROGATE_BEGIN) and
 | ||
|             (Word(AValue) <= HIGH_SURROGATE_END);
 | ||
| end;
 | ||
| 
 | ||
| function UnicodeIsLowSurrogate(const AValue : UnicodeChar) : Boolean;
 | ||
| begin
 | ||
|   Result := (Word(AValue) >= LOW_SURROGATE_BEGIN) and
 | ||
|             (Word(AValue) <= LOW_SURROGATE_END);
 | ||
| end;
 | ||
| 
 | ||
| function GetProps(const ACodePoint : Word) : PUC_Prop;overload;inline;
 | ||
| begin
 | ||
|   Result:=
 | ||
|     @UC_PROP_ARRAY[
 | ||
|        UC_TABLE_3[
 | ||
|          UC_TABLE_2[UC_TABLE_1[hi(ACodePoint)]]
 | ||
|            [lo(ACodePoint) shr 4]
 | ||
|        ][lo(ACodePoint) and $F]
 | ||
|      ];    {
 | ||
|     @UC_PROP_ARRAY[
 | ||
|        UC_TABLE_2[
 | ||
|          (UC_TABLE_1[WordRec(ACodePoint).Hi] * 256) +
 | ||
|          WordRec(ACodePoint).Lo
 | ||
|        ]
 | ||
|      ];}
 | ||
| end;
 | ||
| 
 | ||
| function GetProps(const AHighS, ALowS : UnicodeChar): PUC_Prop;overload;inline;
 | ||
| begin
 | ||
|   Result:=
 | ||
|     @UC_PROP_ARRAY[
 | ||
|        UCO_TABLE_3[
 | ||
|          UCO_TABLE_2[UCO_TABLE_1[Word(AHighS)-HIGH_SURROGATE_BEGIN]]
 | ||
|            [(Word(ALowS) - LOW_SURROGATE_BEGIN) div 32]
 | ||
|        ][(Word(ALowS) - LOW_SURROGATE_BEGIN) mod 32]
 | ||
|      ];    {
 | ||
|   Result:=
 | ||
|     @UC_PROP_ARRAY[
 | ||
|        UCO_TABLE_2[
 | ||
|          (UCO_TABLE_1[Word(AHighS)-HIGH_SURROGATE_BEGIN] * HIGH_SURROGATE_COUNT) +
 | ||
|          Word(ALowS) - LOW_SURROGATE_BEGIN
 | ||
|        ]
 | ||
|      ]; }
 | ||
| end;
 | ||
| 
 | ||
| function GetProps(const ACodePoint : Cardinal) : PUC_Prop;inline;
 | ||
| var
 | ||
|   l, h : UnicodeChar;
 | ||
| begin
 | ||
|   if (ACodePoint <= High(Word)) then
 | ||
|     exit(GetProps(Word(ACodePoint)));
 | ||
|   FromUCS4(ACodePoint,h,l);
 | ||
|   Result := GetProps(h,l);
 | ||
| end;
 | ||
| 
 | ||
| function UnicodeToUpper(
 | ||
|   const AString                : UnicodeString;
 | ||
|   const AIgnoreInvalidSequence : Boolean;
 | ||
|   out   AResultString          : UnicodeString
 | ||
| ) : Integer;
 | ||
| var
 | ||
|   i, c : SizeInt;
 | ||
|   pp, pr : PUnicodeChar;
 | ||
|   pu : PUC_Prop;
 | ||
|   locIsSurrogate : Boolean;
 | ||
|   r : UnicodeString;
 | ||
| begin
 | ||
|   c := Length(AString);
 | ||
|   SetLength(r,2*c);
 | ||
|   if (c > 0) then begin
 | ||
|     pp := @AString[1];
 | ||
|     pr := @r[1];
 | ||
|     i := 1;
 | ||
|     while (i <= c) do begin
 | ||
|       pu := GetProps(Word(pp^));
 | ||
|       locIsSurrogate := (pu^.Category = UGC_Surrogate);
 | ||
|       if locIsSurrogate then begin
 | ||
|         if (i = c) or not(UnicodeIsSurrogatePair(pp[0],pp[1])) then begin
 | ||
|           if AIgnoreInvalidSequence then begin
 | ||
|             pr^ := pp^;
 | ||
|             Inc(pp);
 | ||
|             Inc(pr);
 | ||
|             Inc(i);
 | ||
|             Continue;
 | ||
|           end;
 | ||
|           exit(ERROR_INVALID_CODEPOINT_SEQUENCE);
 | ||
|         end;
 | ||
|         pu := GetProps(pp^,AString[i+1]);
 | ||
|       end;
 | ||
|       if (pu^.SimpleUpperCase = 0) then begin
 | ||
|         pr^ := pp^;
 | ||
|         if locIsSurrogate then begin
 | ||
|           Inc(pp);
 | ||
|           Inc(pr);
 | ||
|           Inc(i);
 | ||
|           pr^ := pp^;
 | ||
|         end;
 | ||
|       end else begin
 | ||
|         if (pu^.SimpleUpperCase <= $FFFF) then begin
 | ||
|           pr^ := UnicodeChar(Word(pu^.SimpleUpperCase));
 | ||
|         end else begin
 | ||
|           FromUCS4(UCS4Char(Cardinal(pu^.SimpleUpperCase)),pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
 | ||
|           Inc(pr);
 | ||
|         end;
 | ||
|         if locIsSurrogate then begin
 | ||
|           Inc(pp);
 | ||
|           Inc(i);
 | ||
|         end;
 | ||
|       end;
 | ||
|       Inc(pp);
 | ||
|       Inc(pr);
 | ||
|       Inc(i);
 | ||
|     end;
 | ||
|     Dec(pp);
 | ||
|     i := ((PtrUInt(pr) - PtrUInt(@r[1])) div SizeOf(UnicodeChar));
 | ||
|     SetLength(r,i);
 | ||
|     AResultString := r;
 | ||
|   end;
 | ||
|   Result := 0;
 | ||
| end;
 | ||
| 
 | ||
| function UnicodeToLower(
 | ||
|   const AString                : UnicodeString;
 | ||
|   const AIgnoreInvalidSequence : Boolean;
 | ||
|   out   AResultString          : UnicodeString
 | ||
| ) : Integer;
 | ||
| var
 | ||
|   i, c : SizeInt;
 | ||
|   pp, pr : PUnicodeChar;
 | ||
|   pu : PUC_Prop;
 | ||
|   locIsSurrogate : Boolean;
 | ||
|   r : UnicodeString;
 | ||
| begin
 | ||
|   c := Length(AString);
 | ||
|   SetLength(r,2*c);
 | ||
|   if (c > 0) then begin
 | ||
|     pp := @AString[1];
 | ||
|     pr := @r[1];
 | ||
|     i := 1;
 | ||
|     while (i <= c) do begin
 | ||
|       pu := GetProps(Word(pp^));
 | ||
|       locIsSurrogate := (pu^.Category = UGC_Surrogate);
 | ||
|       if locIsSurrogate then begin
 | ||
|         if (i = c) or not(UnicodeIsSurrogatePair(pp[0],pp[1])) then begin
 | ||
|           if AIgnoreInvalidSequence then begin
 | ||
|             pr^ := pp^;
 | ||
|             Inc(pp);
 | ||
|             Inc(pr);
 | ||
|             Inc(i);
 | ||
|             Continue;
 | ||
|           end;
 | ||
|           exit(ERROR_INVALID_CODEPOINT_SEQUENCE);
 | ||
|         end;
 | ||
|         pu := GetProps(pp^,AString[i+1]);
 | ||
|       end;
 | ||
|       if (pu^.SimpleLowerCase = 0) then begin
 | ||
|         pr^ := pp^;
 | ||
|         if locIsSurrogate then begin
 | ||
|           Inc(pp);
 | ||
|           Inc(pr);
 | ||
|           Inc(i);
 | ||
|           pr^ := pp^;
 | ||
|         end;
 | ||
|       end else begin
 | ||
|         if (pu^.SimpleLowerCase <= $FFFF) then begin
 | ||
|           pr^ := UnicodeChar(Word(pu^.SimpleLowerCase));
 | ||
|         end else begin
 | ||
|           FromUCS4(UCS4Char(Cardinal(pu^.SimpleLowerCase)),pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
 | ||
|           Inc(pr);
 | ||
|         end;
 | ||
|         if locIsSurrogate then begin
 | ||
|           Inc(pp);
 | ||
|           Inc(i);
 | ||
|         end;
 | ||
|       end;
 | ||
|       Inc(pp);
 | ||
|       Inc(pr);
 | ||
|       Inc(i);
 | ||
|     end;
 | ||
|     Dec(pp);
 | ||
|     i := ((PtrUInt(pr) - PtrUInt(@r[1])) div SizeOf(UnicodeChar));
 | ||
|     SetLength(r,i);
 | ||
|     AResultString := r;
 | ||
|   end;
 | ||
|   Result := 0;
 | ||
| end;
 | ||
| 
 | ||
| //----------------------------------------------------------------------
 | ||
| function DecomposeHangul(const AChar : Cardinal; ABuffer : PCardinal) : Integer;
 | ||
| const
 | ||
|   SBase  = $AC00;
 | ||
|   LBase  = $1100;
 | ||
|   VBase  = $1161;
 | ||
|   TBase  = $11A7;
 | ||
|   LCount = 19;
 | ||
|   VCount = 21;
 | ||
|   TCount = 28;
 | ||
|   NCount = VCount * TCount;   // 588
 | ||
|   SCount = LCount * NCount;   // 11172
 | ||
| var
 | ||
|   SIndex, L, V, T : Integer;
 | ||
| begin
 | ||
|   SIndex := AChar - SBase;
 | ||
|   if (SIndex < 0) or (SIndex >= SCount) then begin
 | ||
|     ABuffer^ := AChar;
 | ||
|     exit(1);
 | ||
|   end;
 | ||
|   L := LBase + SIndex div NCount;
 | ||
|   V := VBase + (SIndex mod NCount) div TCount;
 | ||
|   T := TBase + SIndex mod TCount;
 | ||
|   ABuffer[0] := L;
 | ||
|   ABuffer[1] := V;
 | ||
|   Result := 2;
 | ||
|   if (T <> TBase) then begin
 | ||
|     ABuffer[2] := T;
 | ||
|     Inc(Result);
 | ||
|   end;
 | ||
| end;
 | ||
| 
 | ||
| function Decompose(const ADecomposeIndex : Integer; ABuffer : PUnicodeChar) : Integer;
 | ||
| var
 | ||
|   locStack : array[0..23] of Cardinal;
 | ||
|   locStackIdx : Integer;
 | ||
|   ResultBuffer : array[0..23] of Cardinal;
 | ||
|   ResultIdx : Integer;
 | ||
| 
 | ||
|   procedure AddCompositionToStack(const AIndex : Integer);
 | ||
|   var
 | ||
|     pdecIdx : ^TDecompositionIndexRec;
 | ||
|     k, kc : Integer;
 | ||
|     pu : ^UInt24;
 | ||
|   begin
 | ||
|     pdecIdx := @(UC_DEC_BOOK_DATA.Index[AIndex]);
 | ||
|     pu := @(UC_DEC_BOOK_DATA.CodePoints[pdecIdx^.StartPosition]);
 | ||
|     kc := pdecIdx^.Length;
 | ||
|     Inc(pu,kc);
 | ||
|     for k := 1 to kc do begin
 | ||
|       Dec(pu);
 | ||
|       locStack[locStackIdx + k] := pu^;
 | ||
|     end;
 | ||
|     locStackIdx := locStackIdx + kc;
 | ||
|   end;
 | ||
| 
 | ||
|   procedure AddResult(const AChar : Cardinal);inline;
 | ||
|   begin
 | ||
|     Inc(ResultIdx);
 | ||
|     ResultBuffer[ResultIdx] := AChar;
 | ||
|   end;
 | ||
| 
 | ||
|   function PopStack() : Cardinal;inline;
 | ||
|   begin
 | ||
|     Result := locStack[locStackIdx];
 | ||
|     Dec(locStackIdx);
 | ||
|   end;
 | ||
| 
 | ||
| var
 | ||
|   cu : Cardinal;
 | ||
|   decIdx : SmallInt;
 | ||
|   locIsWord : Boolean;
 | ||
|   i : Integer;
 | ||
|   p : PUnicodeChar;
 | ||
| begin
 | ||
|   ResultIdx := -1;
 | ||
|   locStackIdx := -1;
 | ||
|   AddCompositionToStack(ADecomposeIndex);
 | ||
|   while (locStackIdx >= 0) do begin
 | ||
|     cu := PopStack();
 | ||
|     locIsWord := (cu <= MAX_WORD);
 | ||
|     if locIsWord then
 | ||
|       decIdx := GetProps(Word(cu))^.DecompositionID
 | ||
|     else
 | ||
|       decIdx := GetProps(cu)^.DecompositionID;
 | ||
|     if (decIdx = -1) then
 | ||
|       AddResult(cu)
 | ||
|     else
 | ||
|       AddCompositionToStack(decIdx);
 | ||
|   end;
 | ||
|   p := ABuffer;
 | ||
|   Result := 0;
 | ||
|   for i := 0 to ResultIdx do begin
 | ||
|     cu := ResultBuffer[i];
 | ||
|     if (cu <= MAX_WORD) then begin
 | ||
|       p[0] := UnicodeChar(Word(cu));
 | ||
|       Inc(p);
 | ||
|     end else begin
 | ||
|       FromUCS4(cu,p[0],p[1]);
 | ||
|       Inc(p,2);
 | ||
|       Inc(Result);
 | ||
|     end;
 | ||
|   end;
 | ||
|   Result := Result + ResultIdx + 1;
 | ||
| end;
 | ||
| 
 | ||
| procedure CanonicalOrder(var AString : UnicodeString);
 | ||
| begin
 | ||
|   CanonicalOrder(@AString[1],Length(AString));
 | ||
| end;
 | ||
| 
 | ||
| procedure CanonicalOrder(AStr : PUnicodeChar; const ALength : SizeInt);
 | ||
| var
 | ||
|   i, c : SizeInt;
 | ||
|   p, q : PUnicodeChar;
 | ||
|   locIsSurrogateP, locIsSurrogateQ : Boolean;
 | ||
| 
 | ||
|   procedure Swap();
 | ||
|   var
 | ||
|     t, t1 : UnicodeChar;
 | ||
|   begin
 | ||
|     if not locIsSurrogateP then begin
 | ||
|       if not locIsSurrogateQ then begin
 | ||
|         t := p^;
 | ||
|         p^ := q^;
 | ||
|         q^ := t;
 | ||
|         exit;
 | ||
|       end;
 | ||
|       t := p^;
 | ||
|       p[0] := q[0];
 | ||
|       p[1] := q[1];
 | ||
|       q[1] := t;
 | ||
|       exit;
 | ||
|     end;
 | ||
|     if not locIsSurrogateQ then begin
 | ||
|       t := q[0];
 | ||
|       p[2] := p[1];
 | ||
|       p[1] := p[0];
 | ||
|       p[0] := t;
 | ||
|       exit;
 | ||
|     end;
 | ||
|     t := p[0];
 | ||
|     t1 := p[1];
 | ||
|     p[0] := q[0];
 | ||
|     p[1] := q[1];
 | ||
|     q[0] := t;
 | ||
|     q[1] := t1;
 | ||
|   end;
 | ||
| 
 | ||
| var
 | ||
|   pu : PUC_Prop;
 | ||
|   cccp, cccq : Byte;
 | ||
| begin
 | ||
|   c := ALength;
 | ||
|   if (c < 2) then
 | ||
|     exit;
 | ||
|   p := AStr;
 | ||
|   i := 1;
 | ||
|   while (i < c) do begin
 | ||
|     pu := GetProps(Word(p^));
 | ||
|     locIsSurrogateP := (pu^.Category = UGC_Surrogate);
 | ||
|     if locIsSurrogateP then begin
 | ||
|       if (i = (c - 1)) then
 | ||
|         Break;
 | ||
|       if not UnicodeIsSurrogatePair(p[0],p[1]) then begin
 | ||
|         Inc(p);
 | ||
|         Inc(i);
 | ||
|         Continue;
 | ||
|       end;
 | ||
|       pu := GetProps(p[0],p[1]);
 | ||
|     end;
 | ||
|     if (pu^.CCC > 0) then begin
 | ||
|       cccp := pu^.CCC;
 | ||
|       if locIsSurrogateP then
 | ||
|         q := p + 2
 | ||
|       else
 | ||
|         q := p + 1;
 | ||
|       pu := GetProps(Word(q^));
 | ||
|       locIsSurrogateQ := (pu^.Category = UGC_Surrogate);
 | ||
|       if locIsSurrogateQ then begin
 | ||
|         if (i = c) then
 | ||
|           Break;
 | ||
|         if not UnicodeIsSurrogatePair(q[0],q[1]) then begin
 | ||
|           Inc(p);
 | ||
|           Inc(i);
 | ||
|           Continue;
 | ||
|         end;
 | ||
|         pu := GetProps(q[0],q[1]);
 | ||
|       end;
 | ||
|       cccq := pu^.CCC;
 | ||
|       if (cccq > 0) and (cccp > cccq) then begin
 | ||
|         Swap();
 | ||
|         if (i > 1) then begin
 | ||
|           Dec(p);
 | ||
|           Dec(i);
 | ||
|           pu := GetProps(Word(p^));
 | ||
|           if (pu^.Category = UGC_Surrogate) then begin
 | ||
|             if (i > 1) then begin
 | ||
|               Dec(p);
 | ||
|               Dec(i);
 | ||
|             end;
 | ||
|           end;
 | ||
|           Continue;
 | ||
|         end;
 | ||
|       end;
 | ||
|     end;
 | ||
|     if locIsSurrogateP then begin
 | ||
|       Inc(p);
 | ||
|       Inc(i);
 | ||
|     end;
 | ||
|     Inc(p);
 | ||
|     Inc(i);
 | ||
|   end;
 | ||
| end;
 | ||
| 
 | ||
| function DecomposeCanonical(const AString : UnicodeString) : UnicodeString;
 | ||
| begin
 | ||
|   Result := DecomposeCanonical(@AString[1],Length(AString));
 | ||
| end;
 | ||
| 
 | ||
| function DecomposeCanonical(const AStr : PUnicodeChar; ALength : SizeInt) : UnicodeString;
 | ||
| const MAX_EXPAND = 3;
 | ||
| var
 | ||
|   i, c, kc, k : SizeInt;
 | ||
|   pp, pr : PUnicodeChar;
 | ||
|   pu : PUC_Prop;
 | ||
|   locIsSurrogate : Boolean;
 | ||
|   cpArray : array[0..7] of Cardinal;
 | ||
|   cp : Cardinal;
 | ||
| begin
 | ||
|   c := ALength;
 | ||
|   SetLength(Result,(MAX_EXPAND*c));
 | ||
|   if (c > 0) then begin
 | ||
|     pp := AStr;
 | ||
|     pr := @Result[1];
 | ||
|     i := 1;
 | ||
|     while (i <= c) do begin
 | ||
|       pu := GetProps(Word(pp^));
 | ||
|       locIsSurrogate := (pu^.Category = UGC_Surrogate);
 | ||
|       if locIsSurrogate then begin
 | ||
|         if (i = c) then
 | ||
|           Break;
 | ||
|         if not UnicodeIsSurrogatePair(pp[0],pp[1]) then begin
 | ||
|           pr^ := pp^;
 | ||
|           Inc(pp);
 | ||
|           Inc(pr);
 | ||
|           Inc(i);
 | ||
|           Continue;
 | ||
|         end;
 | ||
|         pu := GetProps(pp[0],pp[1]);
 | ||
|       end;
 | ||
|       if pu^.HangulSyllable then begin
 | ||
|         if locIsSurrogate then begin
 | ||
|           cp := ToUCS4(pp[0],pp[1]);
 | ||
|           Inc(pp);
 | ||
|           Inc(i);
 | ||
|         end else begin
 | ||
|           cp := Word(pp^);
 | ||
|         end;
 | ||
|         kc := DecomposeHangul(cp,@cpArray[0]);
 | ||
|         for k := 0 to kc - 1 do begin
 | ||
|           if (cpArray[k] <= MAX_WORD) then begin
 | ||
|             pr^ := UnicodeChar(Word(cpArray[k]));
 | ||
|             pr := pr + 1;
 | ||
|           end else begin
 | ||
|             FromUCS4(cpArray[k],pr[0],pr[1]);
 | ||
|             pr := pr + 2;
 | ||
|           end;
 | ||
|         end;
 | ||
|         if (kc > 0) then
 | ||
|           Dec(pr);
 | ||
|       end else begin
 | ||
|         if (pu^.DecompositionID = -1) then begin
 | ||
|           pr^ := pp^;
 | ||
|           if locIsSurrogate then begin
 | ||
|             Inc(pp);
 | ||
|             Inc(pr);
 | ||
|             Inc(i);
 | ||
|             pr^ := pp^;
 | ||
|           end;
 | ||
|         end else begin
 | ||
|           k := Decompose(pu^.DecompositionID,pr);
 | ||
|           pr := pr + (k - 1);
 | ||
|           if locIsSurrogate then begin
 | ||
|             Inc(pp);
 | ||
|             Inc(i);
 | ||
|           end;
 | ||
|         end;
 | ||
|       end;
 | ||
|       Inc(pp);
 | ||
|       Inc(pr);
 | ||
|       Inc(i);
 | ||
|     end;
 | ||
|     Dec(pp);
 | ||
|     i := ((PtrUInt(pr) - PtrUInt(@Result[1])) div SizeOf(UnicodeChar));
 | ||
|     SetLength(Result,i);
 | ||
|     CanonicalOrder(@Result[1],Length(Result));
 | ||
|   end;
 | ||
| end;
 | ||
| 
 | ||
| function NormalizeNFD(const AStr : PUnicodeChar; ALength : SizeInt) : UnicodeString;
 | ||
| begin
 | ||
|   if (ALength < 1) then
 | ||
|     exit('');
 | ||
|   Result := DecomposeCanonical(AStr,ALength);
 | ||
|   CanonicalOrder(@Result[1],Length(Result));
 | ||
| end;
 | ||
| 
 | ||
| //Canonical Decomposition + _ordering_
 | ||
| function NormalizeNFD(const AString : UnicodeString) : UnicodeString;
 | ||
| begin
 | ||
|   Result := NormalizeNFD(@AString[1],Length(AString));
 | ||
| end;
 | ||
| 
 | ||
| { TUCA_PropItemContextTreeNodeRec }
 | ||
| 
 | ||
| function TUCA_PropItemContextTreeNodeRec.GetLeftNode: PUCA_PropItemContextTreeNodeRec;
 | ||
| begin
 | ||
|   if (Self.Left = 0) then
 | ||
|     Result := nil
 | ||
|   else
 | ||
|     Result := PUCA_PropItemContextTreeNodeRec(PtrUInt(@Self) + Self.Left);
 | ||
| end;
 | ||
| 
 | ||
| function TUCA_PropItemContextTreeNodeRec.GetRightNode: PUCA_PropItemContextTreeNodeRec;
 | ||
| begin
 | ||
|   if (Self.Right = 0) then
 | ||
|     Result := nil
 | ||
|   else
 | ||
|     Result := PUCA_PropItemContextTreeNodeRec(PtrUInt(@Self) + Self.Right);
 | ||
| end;
 | ||
| 
 | ||
| { TUCA_PropItemContextRec }
 | ||
| 
 | ||
| function TUCA_PropItemContextRec.GetCodePoints() : PUInt24;
 | ||
| begin
 | ||
|   Result := PUInt24(
 | ||
|               PtrUInt(@Self) + SizeOf(Self.CodePointCount) +
 | ||
|               SizeOf(Self.WeightCount)
 | ||
|             );
 | ||
| end;
 | ||
| 
 | ||
| function TUCA_PropItemContextRec.GetWeights: PUCA_PropWeights;
 | ||
| begin
 | ||
|   Result := PUCA_PropWeights(
 | ||
|               PtrUInt(@Self) +
 | ||
|                 SizeOf(Self.CodePointCount) + SizeOf(Self.WeightCount) +
 | ||
|                 (Self.CodePointCount*SizeOf(UInt24))
 | ||
|             );
 | ||
| end;
 | ||
| 
 | ||
| { TUCA_PropItemContextTreeRec }
 | ||
| 
 | ||
| function TUCA_PropItemContextTreeRec.GetData: PUCA_PropItemContextTreeNodeRec;
 | ||
| begin
 | ||
|   if (Size = 0) then
 | ||
|     Result := nil
 | ||
|   else
 | ||
|     Result := PUCA_PropItemContextTreeNodeRec(
 | ||
|                 PtrUInt(
 | ||
|                   PtrUInt(@Self) + SizeOf(UInt24){Size}
 | ||
|                 )
 | ||
|               );
 | ||
| end;
 | ||
| 
 | ||
| function CompareCodePoints(
 | ||
|   A : PUInt24; LA : Integer;
 | ||
|   B : PUInt24; LB : Integer
 | ||
| ) : Integer;
 | ||
| var
 | ||
|   i, hb : Integer;
 | ||
| begin
 | ||
|   if (A = B) then
 | ||
|     exit(0);
 | ||
|   Result := 1;
 | ||
|   hb := LB - 1;
 | ||
|   for i := 0 to LA - 1 do begin
 | ||
|     if (i > hb) then
 | ||
|       exit;
 | ||
|     if (A[i] < B[i]) then
 | ||
|       exit(-1);
 | ||
|     if (A[i] > B[i]) then
 | ||
|       exit(1);
 | ||
|   end;
 | ||
|   if (LA = LB) then
 | ||
|     exit(0);
 | ||
|   exit(-1);
 | ||
| end;
 | ||
| 
 | ||
| function TUCA_PropItemContextTreeRec.Find(
 | ||
|   const AChars     : PUInt24;
 | ||
|   const ACharCount : Integer;
 | ||
|   out   ANode      : PUCA_PropItemContextTreeNodeRec
 | ||
| ) : Boolean;
 | ||
| var
 | ||
|   t : PUCA_PropItemContextTreeNodeRec;
 | ||
| begin
 | ||
|   t := Data;
 | ||
|   while (t <> nil) do begin
 | ||
|     case CompareCodePoints(AChars,ACharCount,t^.Data.GetCodePoints(),t^.Data.CodePointCount) of
 | ||
|        0   : Break;
 | ||
|       -1   : t := t^.GetLeftNode();
 | ||
|       else
 | ||
|         t := t^.GetRightNode();
 | ||
|     end;
 | ||
|   end;
 | ||
|   Result := (t <> nil);
 | ||
|   if Result then
 | ||
|     ANode := t;
 | ||
| end;
 | ||
| 
 | ||
| { TUC_Prop }
 | ||
| 
 | ||
| function TUC_Prop.GetCategory: Byte;
 | ||
| begin
 | ||
|   Result := Byte((CategoryData and Byte($F8)) shr 3);
 | ||
| end;
 | ||
| 
 | ||
| function TUC_Prop.GetNumericValue: Double;
 | ||
| begin
 | ||
|   Result := UC_NUMERIC_ARRAY[NumericIndex];
 | ||
| end;
 | ||
| 
 | ||
| procedure TUC_Prop.SetCategory(AValue: Byte);
 | ||
| begin
 | ||
|   CategoryData := Byte(CategoryData or Byte(AValue shl 3));
 | ||
| end;
 | ||
| 
 | ||
| function TUC_Prop.GetWhiteSpace: Boolean;
 | ||
| begin
 | ||
|   Result := IsBitON(CategoryData,0);
 | ||
| end;
 | ||
| 
 | ||
| procedure TUC_Prop.SetWhiteSpace(AValue: Boolean);
 | ||
| begin
 | ||
|   SetBit(CategoryData,0,AValue);
 | ||
| end;
 | ||
| 
 | ||
| function TUC_Prop.GetHangulSyllable: Boolean;
 | ||
| begin
 | ||
|   Result := IsBitON(CategoryData,1);
 | ||
| end;
 | ||
| 
 | ||
| procedure TUC_Prop.SetHangulSyllable(AValue: Boolean);
 | ||
| begin
 | ||
|    SetBit(CategoryData,1,AValue);
 | ||
| end;
 | ||
| 
 | ||
| { TUCA_DataBook }
 | ||
| 
 | ||
| function TUCA_DataBook.IsVariable(const AWeight: PUCA_PropWeights): Boolean;
 | ||
| begin
 | ||
|   Result := (AWeight^.Weights[0] >= Self.VariableLowLimit) and
 | ||
|             (AWeight^.Weights[0] <= Self.VariableHighLimit);
 | ||
| end;
 | ||
| 
 | ||
| { TUCA_PropItemRec }
 | ||
| 
 | ||
| function TUCA_PropItemRec.IsWeightCompress_1 : Boolean;
 | ||
| begin
 | ||
|   Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_1);
 | ||
| end;
 | ||
| 
 | ||
| function TUCA_PropItemRec.IsWeightCompress_2 : Boolean;
 | ||
| begin
 | ||
|   Result := IsBitON(Flags,FLAG_COMPRESS_WEIGHT_2);
 | ||
| end;
 | ||
| 
 | ||
| function TUCA_PropItemRec.GetCodePoint() : UInt24;
 | ||
| begin
 | ||
|   if HasCodePoint() then begin
 | ||
|     if Contextual then
 | ||
|       Result := Unaligned(
 | ||
|                   PUInt24(
 | ||
|                     PtrUInt(@Self) + Self.GetSelfOnlySize()- SizeOf(UInt24) -
 | ||
|                     Cardinal(GetContext()^.Size)
 | ||
|                   )^
 | ||
|                 )
 | ||
|     else
 | ||
|       Result := Unaligned(PUInt24(PtrUInt(@Self) + Self.GetSelfOnlySize() - SizeOf(UInt24))^)
 | ||
|   end else begin
 | ||
|   {$ifdef uni_debug}
 | ||
|     raise EUnicodeException.Create('TUCA_PropItemRec.GetCodePoint : "No code point available."');
 | ||
|   {$else uni_debug}
 | ||
|     Result := ZERO_UINT24;
 | ||
|   {$endif uni_debug}
 | ||
|   end
 | ||
| end;
 | ||
| 
 | ||
| function TUCA_PropItemRec.HasCodePoint() : Boolean;
 | ||
| begin
 | ||
|   Result := IsBitON(Flags,FLAG_CODEPOINT);
 | ||
| end;
 | ||
| 
 | ||
| function TUCA_PropItemRec.IsValid() : Boolean;
 | ||
| begin
 | ||
|   Result := IsBitON(Flags,FLAG_VALID);
 | ||
| end;
 | ||
| 
 | ||
| {function TUCA_PropItemRec.GetWeightArray: PUCA_PropWeights;
 | ||
| begin
 | ||
|   Result := PUCA_PropWeights(PtrUInt(@Self) + SizeOf(TUCA_PropItemRec));
 | ||
| end;}
 | ||
| 
 | ||
| procedure TUCA_PropItemRec.GetWeightArray(ADest: PUCA_PropWeights);
 | ||
| var
 | ||
|   c : Integer;
 | ||
|   p : PByte;
 | ||
|   pd : PUCA_PropWeights;
 | ||
| begin
 | ||
|   c := WeightLength;
 | ||
|   p := PByte(PtrUInt(@Self) + SizeOf(TUCA_PropItemRec));
 | ||
|   pd := ADest;
 | ||
|   pd^.Weights[0] := Unaligned(PWord(p)^);
 | ||
|   p := p + 2;
 | ||
|   if not IsWeightCompress_1() then begin
 | ||
|     pd^.Weights[1] := Unaligned(PWord(p)^);
 | ||
|     p := p + 2;
 | ||
|   end else begin
 | ||
|     pd^.Weights[1] := p^;
 | ||
|     p := p + 1;
 | ||
|   end;
 | ||
|   if not IsWeightCompress_2() then begin
 | ||
|     pd^.Weights[2] := Unaligned(PWord(p)^);
 | ||
|     p := p + 2;
 | ||
|   end else begin
 | ||
|     pd^.Weights[2] := p^;
 | ||
|     p := p + 1;
 | ||
|   end;
 | ||
|   if (c > 1) then
 | ||
|     Move(p^, (pd+1)^, ((c-1)*SizeOf(TUCA_PropWeights)));
 | ||
| end;
 | ||
| 
 | ||
| function TUCA_PropItemRec.GetSelfOnlySize() : Cardinal;
 | ||
| begin
 | ||
|   Result := SizeOf(TUCA_PropItemRec);
 | ||
|   if (WeightLength > 0) then begin
 | ||
|     Result := Result + (WeightLength * Sizeof(TUCA_PropWeights));
 | ||
|     if IsWeightCompress_1() then
 | ||
|       Result := Result - 1;
 | ||
|     if IsWeightCompress_2() then
 | ||
|       Result := Result - 1;
 | ||
|   end;
 | ||
|   if HasCodePoint() then
 | ||
|     Result := Result + SizeOf(UInt24);
 | ||
|   if Contextual then
 | ||
|     Result := Result + Cardinal(GetContext()^.Size);
 | ||
| end;
 | ||
| 
 | ||
| function TUCA_PropItemRec.GetContextual: Boolean;
 | ||
| begin
 | ||
|   Result := IsBitON(Flags,FLAG_CONTEXTUAL);
 | ||
| end;
 | ||
| 
 | ||
| function TUCA_PropItemRec.GetContext: PUCA_PropItemContextTreeRec;
 | ||
| var
 | ||
|   p : PtrUInt;
 | ||
| begin
 | ||
|   if not Contextual then
 | ||
|     exit(nil);
 | ||
|   p := PtrUInt(@Self) + SizeOf(TUCA_PropItemRec);
 | ||
|   if IsBitON(Flags,FLAG_CODEPOINT) then
 | ||
|     p := p + SizeOf(UInt24);
 | ||
|   Result := PUCA_PropItemContextTreeRec(p);
 | ||
| end;
 | ||
| 
 | ||
| function TUCA_PropItemRec.IsDeleted() : Boolean;
 | ||
| begin
 | ||
|   Result := IsBitON(Flags,FLAG_DELETION);
 | ||
| end;
 | ||
| 
 | ||
| 
 | ||
| function GetPropUCA(const AChar : UnicodeChar; const ABook : PUCA_DataBook) : PUCA_PropItemRec;
 | ||
| var
 | ||
|   i : Cardinal;
 | ||
| begin
 | ||
|   if (ABook^.BMP_Table2 = nil) then
 | ||
|     exit(nil);
 | ||
|   i := ABook^.BMP_Table2[
 | ||
|          (ABook^.BMP_Table1[Hi(Word(AChar))] * 256) +
 | ||
|          Lo(Word(AChar))
 | ||
|        ];
 | ||
|   if (i > 0) then
 | ||
|     Result:= PUCA_PropItemRec(PtrUInt(ABook^.Props) + i - 1)
 | ||
|   else
 | ||
|     Result := nil;
 | ||
| end;
 | ||
| 
 | ||
| function GetPropUCA(const AHighS, ALowS : UnicodeChar; const ABook : PUCA_DataBook): PUCA_PropItemRec;
 | ||
| var
 | ||
|   i : Cardinal;
 | ||
| begin
 | ||
|   if (ABook^.OBMP_Table2 = nil) then
 | ||
|     exit(nil);
 | ||
|   i := ABook^.OBMP_Table2[
 | ||
|          (ABook^.OBMP_Table1[Word(AHighS)-HIGH_SURROGATE_BEGIN] * HIGH_SURROGATE_COUNT) +
 | ||
|          Word(ALowS) - LOW_SURROGATE_BEGIN
 | ||
|        ];
 | ||
|   if (i > 0) then
 | ||
|     Result:= PUCA_PropItemRec(PtrUInt(ABook^.Props) + i - 1)
 | ||
|   else
 | ||
|     Result := nil;
 | ||
| end;
 | ||
| 
 | ||
| {$include weight_derivation.inc}
 | ||
| 
 | ||
| function CompareSortKey(const A : TUCASortKey; const B : array of Word) : Integer;
 | ||
| var
 | ||
|   bb : TUCASortKey;
 | ||
| begin
 | ||
|   SetLength(bb,Length(B));
 | ||
|   if (Length(bb) > 0) then
 | ||
|     Move(B[0],bb[0],(Length(bb)*SizeOf(B[0])));
 | ||
|   Result := CompareSortKey(A,bb);
 | ||
| end;
 | ||
| 
 | ||
| function CompareSortKey(const A, B : TUCASortKey) : Integer;
 | ||
| var
 | ||
|   i, hb : Integer;
 | ||
| begin
 | ||
|   if (Pointer(A) = Pointer(B)) then
 | ||
|     exit(0);
 | ||
|   Result := 1;
 | ||
|   hb := Length(B) - 1;
 | ||
|   for i := 0 to Length(A) - 1 do begin
 | ||
|     if (i > hb) then
 | ||
|       exit;
 | ||
|     if (A[i] < B[i]) then
 | ||
|       exit(-1);
 | ||
|     if (A[i] > B[i]) then
 | ||
|       exit(1);
 | ||
|   end;
 | ||
|   if (Length(A) = Length(B)) then
 | ||
|     exit(0);
 | ||
|   exit(-1);
 | ||
| end;
 | ||
| 
 | ||
| type
 | ||
|   TUCA_PropWeightsArray = array of TUCA_PropWeights;
 | ||
| function FormKeyBlanked(const ACEList : TUCA_PropWeightsArray; const ACollation : PUCA_DataBook) : TUCASortKey;
 | ||
| var
 | ||
|   r : TUCASortKey;
 | ||
|   i, c, k, ral, levelCount : Integer;
 | ||
|   pce : PUCA_PropWeights;
 | ||
| begin
 | ||
|   c := Length(ACEList);
 | ||
|   if (c = 0) then
 | ||
|     exit(nil);
 | ||
|   levelCount := Length(ACEList[0].Weights);
 | ||
|   SetLength(r,(levelCount*c + levelCount));
 | ||
|   ral := 0;
 | ||
|   for i := 0 to levelCount - 1 do begin
 | ||
|     if not ACollation^.Backwards[i] then begin
 | ||
|       pce := @ACEList[0];
 | ||
|       for k := 0 to c - 1 do begin
 | ||
|         if not(ACollation^.IsVariable(pce)) and (pce^.Weights[i] <> 0) then begin
 | ||
|           r[ral] := pce^.Weights[i];
 | ||
|           ral := ral + 1;
 | ||
|         end;
 | ||
|         pce := pce + 1;
 | ||
|       end;
 | ||
|     end else begin
 | ||
|       pce := @ACEList[c-1];
 | ||
|       for k := 0 to c - 1 do begin
 | ||
|         if not(ACollation^.IsVariable(pce)) and (pce^.Weights[i] <> 0) then begin
 | ||
|           r[ral] := pce^.Weights[i];
 | ||
|           ral := ral + 1;
 | ||
|         end;
 | ||
|         pce := pce - 1;
 | ||
|       end;
 | ||
|     end;
 | ||
|     r[ral] := 0;
 | ||
|     ral := ral + 1;
 | ||
|   end;
 | ||
|   ral := ral - 1;
 | ||
|   SetLength(r,ral);
 | ||
|   Result := r;
 | ||
| end;
 | ||
| 
 | ||
| function FormKeyNonIgnorable(const ACEList : TUCA_PropWeightsArray; const ACollation : PUCA_DataBook) : TUCASortKey;
 | ||
| var
 | ||
|   r : TUCASortKey;
 | ||
|   i, c, k, ral, levelCount : Integer;
 | ||
|   pce : PUCA_PropWeights;
 | ||
| begin
 | ||
|   c := Length(ACEList);
 | ||
|   if (c = 0) then
 | ||
|     exit(nil);
 | ||
|   levelCount := Length(ACEList[0].Weights);
 | ||
|   SetLength(r,(levelCount*c + levelCount));
 | ||
|   ral := 0;
 | ||
|   for i := 0 to levelCount - 1 do begin
 | ||
|     if not ACollation^.Backwards[i] then begin
 | ||
|       pce := @ACEList[0];
 | ||
|       for k := 0 to c - 1 do begin
 | ||
|         if (pce^.Weights[i] <> 0) then begin
 | ||
|           r[ral] := pce^.Weights[i];
 | ||
|           ral := ral + 1;
 | ||
|         end;
 | ||
|         pce := pce + 1;
 | ||
|       end;
 | ||
|     end else begin
 | ||
|       pce := @ACEList[c-1];
 | ||
|       for k := 0 to c - 1 do begin
 | ||
|         if (pce^.Weights[i] <> 0) then begin
 | ||
|           r[ral] := pce^.Weights[i];
 | ||
|           ral := ral + 1;
 | ||
|         end;
 | ||
|         pce := pce - 1;
 | ||
|       end;
 | ||
|     end;
 | ||
|     r[ral] := 0;
 | ||
|     ral := ral + 1;
 | ||
|   end;
 | ||
|   ral := ral - 1;
 | ||
|   SetLength(r,ral);
 | ||
|   Result := r;
 | ||
| end;
 | ||
| 
 | ||
| function FormKeyShifted(const ACEList : TUCA_PropWeightsArray; const ACollation : PUCA_DataBook) : TUCASortKey;
 | ||
| var
 | ||
|   r : TUCASortKey;
 | ||
|   i, c, k, ral, levelCount : Integer;
 | ||
|   pce : PUCA_PropWeights;
 | ||
|   variableState : Boolean;
 | ||
| begin
 | ||
|   c := Length(ACEList);
 | ||
|   if (c = 0) then
 | ||
|     exit(nil);
 | ||
|   levelCount := Length(ACEList[0].Weights);
 | ||
|   SetLength(r,(levelCount*c + levelCount));
 | ||
|   ral := 0;
 | ||
|   for i := 0 to levelCount - 1 do begin
 | ||
|     if not ACollation^.Backwards[i] then begin
 | ||
|       variableState := False;
 | ||
|       pce := @ACEList[0];
 | ||
|       for k := 0 to c - 1 do begin
 | ||
|         if not ACollation^.IsVariable(pce) then begin
 | ||
|           if (pce^.Weights[0] <> 0) then
 | ||
|             variableState := False;
 | ||
|           if (pce^.Weights[i] <> 0) and not(variableState) then begin
 | ||
|             r[ral] := pce^.Weights[i];
 | ||
|             ral := ral + 1;
 | ||
|           end;
 | ||
|         end else begin
 | ||
|           variableState := True;
 | ||
|         end;
 | ||
|         pce := pce + 1;
 | ||
|       end;
 | ||
|     end else begin
 | ||
|       pce := @ACEList[c-1];
 | ||
|       for k := 0 to c - 1 do begin
 | ||
|         if not ACollation^.IsVariable(pce) then begin
 | ||
|           if (pce^.Weights[0] <> 0) then
 | ||
|             variableState := False;
 | ||
|           if (pce^.Weights[i] <> 0) and not(variableState) then begin
 | ||
|             r[ral] := pce^.Weights[i];
 | ||
|             ral := ral + 1;
 | ||
|           end;
 | ||
|         end else begin
 | ||
|           variableState := True;
 | ||
|         end;
 | ||
|         pce := pce - 1;
 | ||
|       end;
 | ||
|     end;
 | ||
|     r[ral] := 0;
 | ||
|     ral := ral + 1;
 | ||
|   end;
 | ||
|   ral := ral - 1;
 | ||
|   SetLength(r,ral);
 | ||
|   Result := r;
 | ||
| end;
 | ||
| 
 | ||
| function FormKeyShiftedTrimmed(
 | ||
|   const ACEList : TUCA_PropWeightsArray;
 | ||
|   const ACollation : PUCA_DataBook
 | ||
| ) : TUCASortKey;
 | ||
| var
 | ||
|   i : Integer;
 | ||
|   p : ^TUCASortKeyItem;
 | ||
| begin
 | ||
|   Result := FormKeyShifted(ACEList,ACollation);
 | ||
|   i := Length(Result) - 1;
 | ||
|   if (i >= 0) then begin
 | ||
|     p := @Result[i];
 | ||
|     while (i >= 0) do begin
 | ||
|       if (p^ <> $FFFF) then
 | ||
|         Break;
 | ||
|       Dec(i);
 | ||
|       Dec(p);
 | ||
|     end;
 | ||
|     if ((i+1) < Length(Result)) then
 | ||
|       SetLength(Result,(i+1));
 | ||
|   end;
 | ||
| end;
 | ||
| 
 | ||
| function FindChild(
 | ||
|   const ACodePoint : Cardinal;
 | ||
|   const AParent    : PUCA_PropItemRec
 | ||
| ) : PUCA_PropItemRec;inline;
 | ||
| var
 | ||
|   k : Integer;
 | ||
| begin
 | ||
|   Result := PUCA_PropItemRec(PtrUInt(AParent) + AParent^.GetSelfOnlySize());
 | ||
|   for k := 0 to AParent^.ChildCount - 1 do begin
 | ||
|     if (ACodePoint = Result^.CodePoint) then
 | ||
|       exit;
 | ||
|     Result := PUCA_PropItemRec(PtrUInt(Result) + Result^.Size);
 | ||
|   end;
 | ||
|   Result := nil;
 | ||
| end;
 | ||
| 
 | ||
| function ComputeSortKey(
 | ||
|   const AString    : UnicodeString;
 | ||
|   const ACollation : PUCA_DataBook
 | ||
| ) : TUCASortKey;
 | ||
| begin
 | ||
|   Result := ComputeSortKey(@AString[1],Length(AString),ACollation);
 | ||
| end;
 | ||
| 
 | ||
| function ComputeRawSortKey(
 | ||
|   const AStr       : PUnicodeChar;
 | ||
|   const ALength    : SizeInt;
 | ||
|   const ACollation : PUCA_DataBook
 | ||
| ) : TUCA_PropWeightsArray;
 | ||
| var
 | ||
|   r : TUCA_PropWeightsArray;
 | ||
|   ral {used length of "r"}: Integer;
 | ||
|   rl  {capacity of "r"} : Integer;
 | ||
| 
 | ||
|   procedure GrowKey(const AMinGrow : Integer = 0);inline;
 | ||
|   begin
 | ||
|     if (rl < AMinGrow) then
 | ||
|       rl := rl + AMinGrow
 | ||
|     else
 | ||
|       rl := 2 * rl;
 | ||
|     SetLength(r,rl);
 | ||
|   end;
 | ||
| 
 | ||
| var
 | ||
|   i : Integer;
 | ||
|   s : UnicodeString;
 | ||
|   ps : PUnicodeChar;
 | ||
|   cp : Cardinal;
 | ||
|   cl : PUCA_DataBook;
 | ||
|   pp : PUCA_PropItemRec;
 | ||
|   ppLevel : Byte;
 | ||
|   removedCharIndex : array of DWord;
 | ||
|   removedCharIndexLength : DWord;
 | ||
|   locHistory : array[0..24] of record
 | ||
|                                  i  : Integer;
 | ||
|                                  cl : PUCA_DataBook;
 | ||
|                                  pp : PUCA_PropItemRec;
 | ||
|                                  ppLevel : Byte;
 | ||
|                                  cp      : Cardinal;
 | ||
|                                  removedCharIndexLength : DWord;
 | ||
|                                end;
 | ||
|   locHistoryTop : Integer;
 | ||
|   suppressState : record
 | ||
|                     cl : PUCA_DataBook;
 | ||
|                     CharCount : Integer;
 | ||
|                   end;
 | ||
|   LastKeyOwner : record
 | ||
|                     Length : Integer;
 | ||
|                     Chars  : array[0..24] of UInt24;
 | ||
|                  end;
 | ||
| 
 | ||
|   procedure SaveKeyOwner();
 | ||
|   var
 | ||
|     k : Integer;
 | ||
|     kppLevel : Byte;
 | ||
|   begin
 | ||
|     k := 0;
 | ||
|     kppLevel := High(Byte);
 | ||
|     while (k <= locHistoryTop) do begin
 | ||
|       if (kppLevel <> locHistory[k].ppLevel) then begin
 | ||
|         LastKeyOwner.Chars[k] := locHistory[k].cp;
 | ||
|         kppLevel := locHistory[k].ppLevel;
 | ||
|       end;
 | ||
|       k := k + 1;
 | ||
|     end;
 | ||
|     if (k = 0) or (kppLevel <> ppLevel) then begin
 | ||
|       LastKeyOwner.Chars[k] := cp;
 | ||
|       k := k + 1;
 | ||
|     end;
 | ||
|     LastKeyOwner.Length := k;
 | ||
|   end;
 | ||
| 
 | ||
|   procedure AddWeights(AItem : PUCA_PropItemRec);inline;
 | ||
|   begin
 | ||
|     SaveKeyOwner();
 | ||
|     if ((ral + AItem^.WeightLength) > rl) then
 | ||
|       GrowKey(AItem^.WeightLength);
 | ||
|     AItem^.GetWeightArray(@r[ral]);
 | ||
|     ral := ral + AItem^.WeightLength;
 | ||
|   end;
 | ||
| 
 | ||
|   procedure AddContextWeights(AItem : PUCA_PropItemContextRec);inline;
 | ||
|   begin
 | ||
|     if ((ral + AItem^.WeightCount) > rl) then
 | ||
|       GrowKey(AItem^.WeightCount);
 | ||
|     Move(AItem^.GetWeights()^,r[ral],(AItem^.WeightCount*SizeOf(r[0])));
 | ||
|     ral := ral + AItem^.WeightCount;
 | ||
|   end;
 | ||
| 
 | ||
|   procedure AddComputedWeights(ACodePoint : Cardinal);inline;
 | ||
|   begin
 | ||
|     SaveKeyOwner();
 | ||
|     if ((ral + 2) > rl) then
 | ||
|       GrowKey();
 | ||
|     DeriveWeight(ACodePoint,@r[ral]);
 | ||
|     ral := ral + 2;
 | ||
|   end;
 | ||
| 
 | ||
|   procedure RecordDeletion();inline;
 | ||
|   begin
 | ||
|     if pp^.IsValid() and pp^.IsDeleted() (*pp^.GetWeightLength() = 0*) then begin
 | ||
|       if (suppressState.cl = nil) or
 | ||
|          (suppressState.CharCount > ppLevel)
 | ||
|       then begin
 | ||
|         suppressState.cl := cl;
 | ||
|         suppressState.CharCount := ppLevel;
 | ||
|       end;
 | ||
|     end;
 | ||
|   end;
 | ||
| 
 | ||
|   procedure RecordStep();inline;
 | ||
|   begin
 | ||
|     Inc(locHistoryTop);
 | ||
|     locHistory[locHistoryTop].i := i;
 | ||
|     locHistory[locHistoryTop].cl := cl;
 | ||
|     locHistory[locHistoryTop].pp := pp;
 | ||
|     locHistory[locHistoryTop].ppLevel := ppLevel;
 | ||
|     locHistory[locHistoryTop].cp := cp;
 | ||
|     locHistory[locHistoryTop].removedCharIndexLength := removedCharIndexLength;
 | ||
|     RecordDeletion();
 | ||
|   end;
 | ||
| 
 | ||
|   procedure ClearHistory();inline;
 | ||
|   begin
 | ||
|     locHistoryTop := -1;
 | ||
|   end;
 | ||
| 
 | ||
|   function HasHistory() : Boolean;inline;
 | ||
|   begin
 | ||
|     Result := (locHistoryTop >= 0);
 | ||
|   end;
 | ||
| 
 | ||
|   function GetHistoryLength() : Integer;inline;
 | ||
|   begin
 | ||
|     Result := (locHistoryTop + 1);
 | ||
|   end;
 | ||
| 
 | ||
|   procedure GoBack();inline;
 | ||
|   begin
 | ||
|     Assert(locHistoryTop >= 0);
 | ||
|     i := locHistory[locHistoryTop].i;
 | ||
|     cp := locHistory[locHistoryTop].cp;
 | ||
|     cl := locHistory[locHistoryTop].cl;
 | ||
|     pp := locHistory[locHistoryTop].pp;
 | ||
|     ppLevel := locHistory[locHistoryTop].ppLevel;
 | ||
|     removedCharIndexLength := locHistory[locHistoryTop].removedCharIndexLength;
 | ||
|     ps := @s[i];
 | ||
|     Dec(locHistoryTop);
 | ||
|   end;
 | ||
| 
 | ||
| var
 | ||
|   c : Integer;
 | ||
|   lastUnblockedNonstarterCCC : Byte;
 | ||
| 
 | ||
|   function IsUnblockedNonstarter(const AStartFrom : Integer) : Boolean;
 | ||
|   var
 | ||
|     k : DWord;
 | ||
|     pk : PUnicodeChar;
 | ||
|     puk : PUC_Prop;
 | ||
|   begin
 | ||
|     k := AStartFrom;
 | ||
|     if (k > c) then
 | ||
|       exit(False);
 | ||
|     if (removedCharIndexLength>0) and
 | ||
|        (IndexDWord(removedCharIndex[0],removedCharIndexLength,k) >= 0)
 | ||
|     then begin
 | ||
|       exit(False);
 | ||
|     end;
 | ||
|     {if (k = (i+1)) or
 | ||
|        ( (k = (i+2)) and UnicodeIsHighSurrogate(s[i]) )
 | ||
|     then
 | ||
|       lastUnblockedNonstarterCCC := 0;}
 | ||
|     pk := @s[k];
 | ||
|     if UnicodeIsHighSurrogate(pk^) then begin
 | ||
|       if (k = c) then
 | ||
|         exit(False);
 | ||
|       if UnicodeIsLowSurrogate(pk[1]) then
 | ||
|         puk := GetProps(pk[0],pk[1])
 | ||
|       else
 | ||
|         puk := GetProps(Word(pk^));
 | ||
|     end else begin
 | ||
|       puk := GetProps(Word(pk^));
 | ||
|     end;
 | ||
|     if (puk^.CCC = 0) or (lastUnblockedNonstarterCCC >= puk^.CCC) then
 | ||
|       exit(False);
 | ||
|     lastUnblockedNonstarterCCC := puk^.CCC;
 | ||
|     Result := True;
 | ||
|   end;
 | ||
| 
 | ||
|   procedure RemoveChar(APos : Integer);inline;
 | ||
|   begin
 | ||
|     if (removedCharIndexLength >= Length(removedCharIndex)) then
 | ||
|       SetLength(removedCharIndex,(2*removedCharIndexLength + 2));
 | ||
|     removedCharIndex[removedCharIndexLength] := APos;
 | ||
|     Inc(removedCharIndexLength);
 | ||
|     if UnicodeIsHighSurrogate(s[APos]) and (APos < c) and UnicodeIsLowSurrogate(s[APos+1]) then begin
 | ||
|       if (removedCharIndexLength >= Length(removedCharIndex)) then
 | ||
|           SetLength(removedCharIndex,(2*removedCharIndexLength + 2));
 | ||
|         removedCharIndex[removedCharIndexLength] := APos+1;
 | ||
|         Inc(removedCharIndexLength);
 | ||
|     end;
 | ||
|   end;
 | ||
| 
 | ||
|   procedure Inc_I();inline;
 | ||
|   begin
 | ||
|     if (removedCharIndexLength = 0) then begin
 | ||
|       Inc(i);
 | ||
|       Inc(ps);
 | ||
|       exit;
 | ||
|     end;
 | ||
|     while True do begin
 | ||
|       Inc(i);
 | ||
|       Inc(ps);
 | ||
|       if (IndexDWord(removedCharIndex[0],removedCharIndexLength,i) = -1) then
 | ||
|         Break;
 | ||
|     end;
 | ||
|   end;
 | ||
| 
 | ||
| var
 | ||
|   surrogateState : Boolean;
 | ||
| 
 | ||
|   function MoveToNextChar() : Boolean;inline;
 | ||
|   begin
 | ||
|     Result := True;
 | ||
|     if UnicodeIsHighSurrogate(ps[0]) then begin
 | ||
|       if (i = c) then
 | ||
|         exit(False);
 | ||
|       if UnicodeIsLowSurrogate(ps[1]) then begin
 | ||
|         surrogateState := True;
 | ||
|         cp := ToUCS4(ps[0],ps[1]);
 | ||
|       end else begin
 | ||
|         surrogateState := False;
 | ||
|         cp := Word(ps[0]);
 | ||
|       end;
 | ||
|     end else begin
 | ||
|       surrogateState := False;
 | ||
|       cp := Word(ps[0]);
 | ||
|     end;
 | ||
|   end;
 | ||
| 
 | ||
|   procedure ClearPP(const AClearSuppressInfo : Boolean = True);inline;
 | ||
|   begin
 | ||
|     cl := nil;
 | ||
|     pp := nil;
 | ||
|     ppLevel := 0;
 | ||
|     if AClearSuppressInfo then begin
 | ||
|       suppressState.cl := nil;
 | ||
|       suppressState.CharCount := 0;
 | ||
|     end;
 | ||
|   end;
 | ||
| 
 | ||
|   function FindPropUCA() : Boolean;
 | ||
|   var
 | ||
|     candidateCL : PUCA_DataBook;
 | ||
|   begin
 | ||
|     pp := nil;
 | ||
|     if (cl = nil) then
 | ||
|       candidateCL := ACollation
 | ||
|     else
 | ||
|       candidateCL := cl;
 | ||
|     if surrogateState then begin
 | ||
|       while (candidateCL <> nil) do begin
 | ||
|         pp := GetPropUCA(ps[0],ps[1],candidateCL);
 | ||
|         if (pp <> nil) then
 | ||
|           break;
 | ||
|         candidateCL := candidateCL^.Base;
 | ||
|       end;
 | ||
|     end else begin
 | ||
|       while (candidateCL <> nil) do begin
 | ||
|         pp := GetPropUCA(ps[0],candidateCL);
 | ||
|         if (pp <> nil) then
 | ||
|           break;
 | ||
|         candidateCL := candidateCL^.Base;
 | ||
|       end;
 | ||
|     end;
 | ||
|     cl := candidateCL;
 | ||
|     Result := (pp <> nil);
 | ||
|   end;
 | ||
| 
 | ||
|   procedure AddWeightsAndClear();inline;
 | ||
|   var
 | ||
|     ctxNode : PUCA_PropItemContextTreeNodeRec;
 | ||
|   begin
 | ||
|     if (pp^.WeightLength > 0) then begin
 | ||
|       AddWeights(pp);
 | ||
|     end else
 | ||
|     if (LastKeyOwner.Length > 0) and pp^.Contextual and
 | ||
|        pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
 | ||
|        (ctxNode^.Data.WeightCount > 0)
 | ||
|     then begin
 | ||
|       AddContextWeights(@ctxNode^.Data);
 | ||
|     end;
 | ||
|     //AddWeights(pp);
 | ||
|     ClearHistory();
 | ||
|     ClearPP();
 | ||
|   end;
 | ||
| 
 | ||
|   procedure StartMatch();
 | ||
| 
 | ||
|     procedure HandleLastChar();
 | ||
|     var
 | ||
|       ctxNode : PUCA_PropItemContextTreeNodeRec;
 | ||
|     begin
 | ||
|       while True do begin
 | ||
|         if pp^.IsValid() then begin
 | ||
|           if (pp^.WeightLength > 0) then
 | ||
|             AddWeights(pp)
 | ||
|           else
 | ||
|           if (LastKeyOwner.Length > 0) and pp^.Contextual and
 | ||
|              pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
 | ||
|              (ctxNode^.Data.WeightCount > 0)
 | ||
|           then
 | ||
|             AddContextWeights(@ctxNode^.Data)
 | ||
|           else
 | ||
|             AddComputedWeights(cp){handle deletion of code point};
 | ||
|           break;
 | ||
|         end;
 | ||
|         if (cl^.Base = nil) then begin
 | ||
|           AddComputedWeights(cp);
 | ||
|           break;
 | ||
|         end;
 | ||
|         cl := cl^.Base;
 | ||
|         if not FindPropUCA() then begin
 | ||
|           AddComputedWeights(cp);
 | ||
|           break;
 | ||
|         end;
 | ||
|       end;
 | ||
|     end;
 | ||
|   var
 | ||
|     tmpCtxNode : PUCA_PropItemContextTreeNodeRec;
 | ||
|   begin
 | ||
|     ppLevel := 0;
 | ||
|     if not FindPropUCA() then begin
 | ||
|       AddComputedWeights(cp);
 | ||
|       ClearHistory();
 | ||
|       ClearPP();
 | ||
|     end else begin
 | ||
|       if (i = c) then begin
 | ||
|         HandleLastChar();
 | ||
|       end else begin
 | ||
|         if pp^.IsValid()then begin
 | ||
|           if (pp^.ChildCount = 0) then begin
 | ||
|             if (pp^.WeightLength > 0) then
 | ||
|               AddWeights(pp)
 | ||
|             else
 | ||
|             if (LastKeyOwner.Length > 0) and pp^.Contextual and
 | ||
|                pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,tmpCtxNode) and
 | ||
|                (tmpCtxNode^.Data.WeightCount > 0)
 | ||
|             then
 | ||
|               AddContextWeights(@tmpCtxNode^.Data)
 | ||
|             else
 | ||
|               AddComputedWeights(cp){handle deletion of code point};
 | ||
|             ClearPP();
 | ||
|             ClearHistory();
 | ||
|           end else begin
 | ||
|             RecordStep();
 | ||
|           end
 | ||
|         end else begin
 | ||
|           if (pp^.ChildCount = 0) then begin
 | ||
|             AddComputedWeights(cp);
 | ||
|             ClearPP();
 | ||
|             ClearHistory();
 | ||
|           end else begin
 | ||
|             RecordStep();
 | ||
|           end;
 | ||
|         end ;
 | ||
|       end;
 | ||
|     end;
 | ||
|   end;
 | ||
| 
 | ||
|   function TryPermutation() : Boolean;
 | ||
|   var
 | ||
|     kk : Integer;
 | ||
|     b : Boolean;
 | ||
|     puk : PUC_Prop;
 | ||
|     ppk : PUCA_PropItemRec;
 | ||
|   begin
 | ||
|     Result := False;
 | ||
|     puk := GetProps(cp);
 | ||
|     if (puk^.CCC = 0) then
 | ||
|       exit;
 | ||
|     lastUnblockedNonstarterCCC := puk^.CCC;
 | ||
|     if surrogateState then
 | ||
|       kk := i + 2
 | ||
|     else
 | ||
|       kk := i + 1;
 | ||
|     while IsUnblockedNonstarter(kk) do begin
 | ||
|       b := UnicodeIsHighSurrogate(s[kk]) and (kk<c) and UnicodeIsLowSurrogate(s[kk+1]);
 | ||
|       if b then
 | ||
|         ppk := FindChild(ToUCS4(s[kk],s[kk+1]),pp)
 | ||
|       else
 | ||
|         ppk := FindChild(Word(s[kk]),pp);
 | ||
|       if (ppk <> nil) then begin
 | ||
|         pp := ppk;
 | ||
|         RemoveChar(kk);
 | ||
|         Inc(ppLevel);
 | ||
|         RecordStep();
 | ||
|         Result := True;
 | ||
|         if (pp^.ChildCount = 0 ) then
 | ||
|           Break;
 | ||
|       end;
 | ||
|       if b then
 | ||
|         Inc(kk);
 | ||
|       Inc(kk);
 | ||
|     end;
 | ||
|   end;
 | ||
| 
 | ||
|   procedure AdvanceCharPos();inline;
 | ||
|   begin
 | ||
|     if UnicodeIsHighSurrogate(ps[0]) and (i<c) and UnicodeIsLowSurrogate(ps[1]) then begin
 | ||
|       Inc(i);
 | ||
|       Inc(ps);
 | ||
|     end;
 | ||
|     Inc_I();
 | ||
|   end;
 | ||
| 
 | ||
| var
 | ||
|   ok : Boolean;
 | ||
|   pp1 : PUCA_PropItemRec;
 | ||
|   cltemp : PUCA_DataBook;
 | ||
|   ctxNode : PUCA_PropItemContextTreeNodeRec;
 | ||
| begin
 | ||
|   if (ALength = 0) then
 | ||
|     exit(nil);
 | ||
|   c := ALength;
 | ||
|   s := NormalizeNFD(AStr,c);
 | ||
|   c := Length(s);
 | ||
|   rl := 3*c;
 | ||
|   SetLength(r,rl);
 | ||
|   ral := 0;
 | ||
|   ps := @s[1];
 | ||
|   ClearPP();
 | ||
|   locHistoryTop := -1;
 | ||
|   removedCharIndexLength := 0;
 | ||
|   FillByte(suppressState,SizeOf(suppressState),0);
 | ||
|   LastKeyOwner.Length := 0;
 | ||
|   i := 1;
 | ||
|   while (i <= c) and MoveToNextChar() do begin
 | ||
|     if (pp = nil) then begin // Start Matching
 | ||
|       StartMatch();
 | ||
|     end else begin
 | ||
|       pp1 := FindChild(cp,pp);
 | ||
|       if (pp1 <> nil) then begin
 | ||
|         Inc(ppLevel);
 | ||
|         pp := pp1;
 | ||
|         if (pp^.ChildCount = 0) or (i = c) then begin
 | ||
|           ok := False;
 | ||
|           if pp^.IsValid() and (suppressState.CharCount = 0) then begin
 | ||
|             if (pp^.WeightLength > 0) then begin
 | ||
|               AddWeightsAndClear();
 | ||
|               ok := True;
 | ||
|             end else
 | ||
|             if (LastKeyOwner.Length > 0) and pp^.Contextual and
 | ||
|                pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
 | ||
|                (ctxNode^.Data.WeightCount > 0)
 | ||
|             then begin
 | ||
|               AddContextWeights(@ctxNode^.Data);
 | ||
|               ClearHistory();
 | ||
|               ClearPP();
 | ||
|               ok := True;
 | ||
|             end
 | ||
|           end;
 | ||
|           if not ok then begin
 | ||
|             RecordDeletion();
 | ||
|             ok := False;
 | ||
|             while HasHistory() do begin
 | ||
|               GoBack();
 | ||
|               if pp^.IsValid() and
 | ||
|                  ( ( (cl = suppressState.cl) and (ppLevel <> suppressState.CharCount) ) or
 | ||
|                    ( (cl <> suppressState.cl) and (ppLevel < suppressState.CharCount) )
 | ||
|                  )
 | ||
|               then begin
 | ||
|                 AddWeightsAndClear();
 | ||
|                 ok := True;
 | ||
|                 Break;
 | ||
|               end;
 | ||
|             end;
 | ||
|             if not ok then begin
 | ||
|               cltemp := cl^.Base;
 | ||
|               if (cltemp <> nil) then begin
 | ||
|                 ClearPP(False);
 | ||
|                 cl := cltemp;
 | ||
|                 Continue;
 | ||
|               end;
 | ||
|             end;
 | ||
| 
 | ||
|             if not ok then begin
 | ||
|               AddComputedWeights(cp);
 | ||
|               ClearHistory();
 | ||
|               ClearPP();
 | ||
|             end;
 | ||
|           end;
 | ||
|         end else begin
 | ||
|           RecordStep();
 | ||
|         end;
 | ||
|       end else begin
 | ||
|         // permutations !
 | ||
|         ok := False;
 | ||
|         if TryPermutation() and pp^.IsValid() then begin
 | ||
|           if (suppressState.CharCount = 0) then begin
 | ||
|             AddWeightsAndClear();
 | ||
|             Continue;
 | ||
|           end;
 | ||
|           while True do begin
 | ||
|             if pp^.IsValid() and
 | ||
|                (pp^.WeightLength > 0) and
 | ||
|                ( ( (cl = suppressState.cl) and (ppLevel <> suppressState.CharCount) ) or
 | ||
|                  ( (cl <> suppressState.cl) and (ppLevel < suppressState.CharCount) )
 | ||
|                )
 | ||
|             then begin
 | ||
|               AddWeightsAndClear();
 | ||
|               ok := True;
 | ||
|               break;
 | ||
|             end;
 | ||
|             if not HasHistory() then
 | ||
|               break;
 | ||
|             GoBack();
 | ||
|             if (pp = nil) then
 | ||
|               break;
 | ||
|           end;
 | ||
|         end;
 | ||
|         if not ok then begin
 | ||
|           if pp^.IsValid() and (suppressState.CharCount = 0) then begin
 | ||
|             if (pp^.WeightLength > 0) then begin
 | ||
|               AddWeightsAndClear();
 | ||
|               ok := True;
 | ||
|             end else
 | ||
|             if (LastKeyOwner.Length > 0) and pp^.Contextual and
 | ||
|                pp^.GetContext()^.Find(@LastKeyOwner.Chars[0],LastKeyOwner.Length,ctxNode) and
 | ||
|                (ctxNode^.Data.WeightCount > 0)
 | ||
|             then begin
 | ||
|               AddContextWeights(@ctxNode^.Data);
 | ||
|               ClearHistory();
 | ||
|               ClearPP();
 | ||
|               ok := True;
 | ||
|             end
 | ||
|           end;
 | ||
|           if ok then
 | ||
|             Continue;
 | ||
|         end;
 | ||
|         if not ok then begin
 | ||
|           if (cl^.Base <> nil) then begin
 | ||
|             cltemp := cl^.Base;
 | ||
|             while HasHistory() do
 | ||
|               GoBack();
 | ||
|             pp := nil;
 | ||
|             ppLevel := 0;
 | ||
|             cl := cltemp;
 | ||
|             Continue;
 | ||
|           end;
 | ||
| 
 | ||
|           //walk back
 | ||
|           ok := False;
 | ||
|           while HasHistory() do begin
 | ||
|             GoBack();
 | ||
|             if pp^.IsValid() and
 | ||
|                (pp^.WeightLength > 0) and
 | ||
|                ( (suppressState.CharCount = 0) or
 | ||
|                  ( ( (cl = suppressState.cl) and (ppLevel <> suppressState.CharCount) ) or
 | ||
|                    ( (cl <> suppressState.cl) and (ppLevel < suppressState.CharCount) )
 | ||
|                  )
 | ||
|                )
 | ||
|             then begin
 | ||
|               AddWeightsAndClear();
 | ||
|               ok := True;
 | ||
|               Break;
 | ||
|             end;
 | ||
|           end;
 | ||
|           if ok then begin
 | ||
|             AdvanceCharPos();
 | ||
|             Continue;
 | ||
|           end;
 | ||
|           if (pp <> nil) then begin
 | ||
|             AddComputedWeights(cp);
 | ||
|             ClearHistory();
 | ||
|             ClearPP();
 | ||
|           end;
 | ||
|         end;
 | ||
|       end;
 | ||
|     end;
 | ||
|     if surrogateState then begin
 | ||
|       Inc(ps);
 | ||
|       Inc(i);
 | ||
|     end;
 | ||
|     //
 | ||
|     Inc_I();
 | ||
|   end;
 | ||
|   SetLength(r,ral);
 | ||
|   Result := r;
 | ||
| end;
 | ||
| 
 | ||
| type
 | ||
|   TComputeKeyContext = record
 | ||
|     Collation : PUCA_DataBook;
 | ||
|     r : TUCA_PropWeightsArray;
 | ||
|     ral {used length of "r"}: Integer;
 | ||
|     rl  {capacity of "r"} : Integer;
 | ||
|     i : Integer;
 | ||
|     s : UnicodeString;
 | ||
|     ps : PUnicodeChar;
 | ||
|     cp : Cardinal;
 | ||
|     cl : PUCA_DataBook;
 | ||
|     pp : PUCA_PropItemRec;
 | ||
|     ppLevel : Byte;
 | ||
|     removedCharIndex : array of DWord;
 | ||
|     removedCharIndexLength : DWord;
 | ||
|     locHistoryTop : Integer;
 | ||
|     locHistory : array[0..24] of record
 | ||
|                                    i  : Integer;
 | ||
|                                    cl : PUCA_DataBook;
 | ||
|                                    pp : PUCA_PropItemRec;
 | ||
|                                    ppLevel : Byte;
 | ||
|                                    cp      : Cardinal;
 | ||
|                                    removedCharIndexLength : DWord;
 | ||
|                                  end;
 | ||
|     suppressState : record
 | ||
|                       cl : PUCA_DataBook;
 | ||
|                       CharCount : Integer;
 | ||
|                     end;
 | ||
|     LastKeyOwner : record
 | ||
|                       Length : Integer;
 | ||
|                       Chars  : array[0..24] of UInt24;
 | ||
|                    end;
 | ||
|     c : Integer;
 | ||
|     lastUnblockedNonstarterCCC : Byte;
 | ||
|     surrogateState : Boolean;
 | ||
|     Finished : Boolean;
 | ||
|   end;
 | ||
|   PComputeKeyContext = ^TComputeKeyContext;
 | ||
| 
 | ||
| procedure ClearPP(AContext : PComputeKeyContext; const AClearSuppressInfo : Boolean = True);inline;
 | ||
| begin
 | ||
|   AContext^.cl := nil;
 | ||
|   AContext^.pp := nil;
 | ||
|   AContext^.ppLevel := 0;
 | ||
|   if AClearSuppressInfo then begin
 | ||
|     AContext^.suppressState.cl := nil;
 | ||
|     AContext^.suppressState.CharCount := 0;
 | ||
|   end;
 | ||
| end;
 | ||
| 
 | ||
| procedure InitContext(
 | ||
|         AContext   : PComputeKeyContext;
 | ||
|   const AStr       : PUnicodeChar;
 | ||
|   const ALength    : SizeInt;
 | ||
|   const ACollation : PUCA_DataBook
 | ||
| );
 | ||
| begin
 | ||
|   AContext^.Collation := ACollation;
 | ||
|   AContext^.c := ALength;
 | ||
|   AContext^.s := NormalizeNFD(AStr,AContext^.c);
 | ||
|   AContext^.c := Length(AContext^.s);
 | ||
|   AContext^.rl := 3*AContext^.c;
 | ||
|   SetLength(AContext^.r,AContext^.rl);
 | ||
|   AContext^.ral := 0;
 | ||
|   AContext^.ps := @AContext^.s[1];
 | ||
|   ClearPP(AContext);
 | ||
|   AContext^.locHistoryTop := -1;
 | ||
|   AContext^.removedCharIndexLength := 0;
 | ||
|   FillByte(AContext^.suppressState,SizeOf(AContext^.suppressState),0);
 | ||
|   AContext^.LastKeyOwner.Length := 0;
 | ||
|   AContext^.i := 1;
 | ||
|   AContext^.Finished := False;
 | ||
| end;
 | ||
| 
 | ||
| function FormKey(
 | ||
|   const AWeightArray  : TUCA_PropWeightsArray;
 | ||
|   const ACollation    : PUCA_DataBook
 | ||
| ) : TUCASortKey;inline;
 | ||
| begin
 | ||
|   case ACollation.VariableWeight of
 | ||
|     TUCA_VariableKind.ucaShifted        : Result := FormKeyShifted(AWeightArray,ACollation);
 | ||
|     TUCA_VariableKind.ucaBlanked        : Result := FormKeyBlanked(AWeightArray,ACollation);
 | ||
|     TUCA_VariableKind.ucaNonIgnorable   : Result := FormKeyNonIgnorable(AWeightArray,ACollation);
 | ||
|     TUCA_VariableKind.ucaShiftedTrimmed : Result := FormKeyShiftedTrimmed(AWeightArray,ACollation);
 | ||
|     else
 | ||
|       Result := FormKeyShifted(AWeightArray,ACollation);
 | ||
|   end;
 | ||
| end;
 | ||
| 
 | ||
| function ComputeRawSortKeyNextItem(
 | ||
|   const AContext   : PComputeKeyContext
 | ||
| ) : Boolean;forward;
 | ||
| function IncrementalCompareString_NonIgnorable(
 | ||
|   const AStrA      : PUnicodeChar;
 | ||
|   const ALengthA   : SizeInt;
 | ||
|   const AStrB      : PUnicodeChar;
 | ||
|   const ALengthB   : SizeInt;
 | ||
|   const ACollation : PUCA_DataBook
 | ||
| ) : Integer;
 | ||
| var
 | ||
|   ctxA, ctxB : TComputeKeyContext;
 | ||
|   lastKeyIndexA, keyIndexA, lengthMaxA : Integer;
 | ||
|   keyIndexB : Integer;
 | ||
|   keyA, keyB : TUCASortKey;
 | ||
| begin
 | ||
|   if ( (ALengthA = 0) and (ALengthB = 0) ) or
 | ||
|      ( (PtrUInt(AStrA) = PtrUInt(AStrB)) and
 | ||
|        (ALengthA = ALengthB)
 | ||
|      )
 | ||
|   then
 | ||
|     exit(0);
 | ||
|   if (ALengthA = 0) then
 | ||
|     exit(-1);
 | ||
|   if (ALengthB = 0) then
 | ||
|     exit(1);
 | ||
| 
 | ||
|   InitContext(@ctxA,AStrA,ALengthA,ACollation);
 | ||
|   InitContext(@ctxB,AStrB,ALengthB,ACollation);
 | ||
|   lastKeyIndexA := -1;
 | ||
|   keyIndexA := -1;
 | ||
|   lengthMaxA := 0;
 | ||
|   keyIndexB := -1;
 | ||
|   while True do begin
 | ||
|     if not ComputeRawSortKeyNextItem(@ctxA) then
 | ||
|       Break;
 | ||
|     if (ctxA.ral = lengthMaxA) then
 | ||
|       Continue;
 | ||
|     lengthMaxA := ctxA.ral;
 | ||
|     keyIndexA := lastKeyIndexA + 1;
 | ||
|     while (keyIndexA < lengthMaxA) and (ctxA.r[keyIndexA].Weights[0] = 0) do begin
 | ||
|       Inc(keyIndexA);
 | ||
|     end;
 | ||
|     if (keyIndexA = lengthMaxA) then begin
 | ||
|       lastKeyIndexA := keyIndexA-1;
 | ||
|       Continue;
 | ||
|     end;
 | ||
| 
 | ||
|     while (keyIndexA < lengthMaxA) do begin
 | ||
|       if (ctxA.r[keyIndexA].Weights[0] = 0) then begin
 | ||
|         Inc(keyIndexA);
 | ||
|         Continue;
 | ||
|       end;
 | ||
|       Inc(keyIndexB);
 | ||
|       while (ctxB.ral <= keyIndexB) or (ctxB.r[keyIndexB].Weights[0] = 0) do begin
 | ||
|         if (ctxB.ral <= keyIndexB) then begin
 | ||
|           if not ComputeRawSortKeyNextItem(@ctxB) then
 | ||
|             Break;
 | ||
|           Continue;
 | ||
|         end;
 | ||
|         Inc(keyIndexB);
 | ||
|       end;
 | ||
|       if (ctxB.ral <= keyIndexB) then
 | ||
|         exit(1);
 | ||
|       if (ctxA.r[keyIndexA].Weights[0] > ctxB.r[keyIndexB].Weights[0]) then
 | ||
|         exit(1);
 | ||
|       if (ctxA.r[keyIndexA].Weights[0] < ctxB.r[keyIndexB].Weights[0]) then
 | ||
|         exit(-1);
 | ||
|       Inc(keyIndexA);
 | ||
|     end;
 | ||
|     lastKeyIndexA := keyIndexA - 1;
 | ||
|   end;
 | ||
|   //Key(A) is completed !
 | ||
|   Inc(keyIndexB);
 | ||
|   while (ctxB.ral <= keyIndexB) or (ctxB.r[keyIndexB].Weights[0] = 0) do begin
 | ||
|     if (ctxB.ral <= keyIndexB) then begin
 | ||
|       if not ComputeRawSortKeyNextItem(@ctxB) then
 | ||
|         Break;
 | ||
|       Continue;
 | ||
|     end;
 | ||
|     Inc(keyIndexB);
 | ||
|   end;
 | ||
|   if (ctxB.ral > keyIndexB) then begin
 | ||
|     //B has at least one more primary weight that A
 | ||
|     exit(-1);
 | ||
|   end;
 | ||
|   while ComputeRawSortKeyNextItem(@ctxB) do begin
 | ||
|     //
 | ||
|   end;
 | ||
|   //Key(B) is completed !
 | ||
|   keyA := FormKey(ctxA.r,ctxA.Collation);
 | ||
|   keyB := FormKey(ctxB.r,ctxB.Collation);
 | ||
|   Result := CompareSortKey(keyA,keyB);
 | ||
| end;
 | ||
| 
 | ||
| function IncrementalCompareString_Shift(
 | ||
|   const AStrA      : PUnicodeChar;
 | ||
|   const ALengthA   : SizeInt;
 | ||
|   const AStrB      : PUnicodeChar;
 | ||
|   const ALengthB   : SizeInt;
 | ||
|   const ACollation : PUCA_DataBook
 | ||
| ) : Integer;
 | ||
| var
 | ||
|   ctxA, ctxB : TComputeKeyContext;
 | ||
|   lastKeyIndexA, keyIndexA, lengthMaxA : Integer;
 | ||
|   keyIndexB : Integer;
 | ||
|   keyA, keyB : TUCASortKey;
 | ||
| begin
 | ||
|   if ( (ALengthA = 0) and (ALengthB = 0) ) or
 | ||
|      ( (PtrUInt(AStrA) = PtrUInt(AStrB)) and
 | ||
|        (ALengthA = ALengthB)
 | ||
|      )
 | ||
|   then
 | ||
|     exit(0);
 | ||
|   if (ALengthA = 0) then
 | ||
|     exit(-1);
 | ||
|   if (ALengthB = 0) then
 | ||
|     exit(1);
 | ||
| 
 | ||
|   InitContext(@ctxA,AStrA,ALengthA,ACollation);
 | ||
|   InitContext(@ctxB,AStrB,ALengthB,ACollation);
 | ||
|   lastKeyIndexA := -1;
 | ||
|   keyIndexA := -1;
 | ||
|   lengthMaxA := 0;
 | ||
|   keyIndexB := -1;
 | ||
|   while True do begin
 | ||
|     if not ComputeRawSortKeyNextItem(@ctxA) then
 | ||
|       Break;
 | ||
|     if (ctxA.ral = lengthMaxA) then
 | ||
|       Continue;
 | ||
|     lengthMaxA := ctxA.ral;
 | ||
|     keyIndexA := lastKeyIndexA + 1;
 | ||
|     while (keyIndexA < lengthMaxA) and
 | ||
|           ( (ctxA.r[keyIndexA].Weights[0] = 0) or
 | ||
|             ctxA.Collation^.IsVariable(@ctxA.r[keyIndexA].Weights)
 | ||
|           )
 | ||
|     do begin
 | ||
|       Inc(keyIndexA);
 | ||
|     end;
 | ||
|     if (keyIndexA = lengthMaxA) then begin
 | ||
|       lastKeyIndexA := keyIndexA-1;
 | ||
|       Continue;
 | ||
|     end;
 | ||
| 
 | ||
|     while (keyIndexA < lengthMaxA) do begin
 | ||
|       if (ctxA.r[keyIndexA].Weights[0] = 0) or
 | ||
|          ctxA.Collation^.IsVariable(@ctxA.r[keyIndexA].Weights)
 | ||
|       then begin
 | ||
|         Inc(keyIndexA);
 | ||
|         Continue;
 | ||
|       end;
 | ||
|       Inc(keyIndexB);
 | ||
|       while (ctxB.ral <= keyIndexB) or
 | ||
|             (ctxB.r[keyIndexB].Weights[0] = 0) or
 | ||
|             ctxB.Collation^.IsVariable(@ctxB.r[keyIndexB].Weights)
 | ||
|       do begin
 | ||
|         if (ctxB.ral <= keyIndexB) then begin
 | ||
|           if not ComputeRawSortKeyNextItem(@ctxB) then
 | ||
|             Break;
 | ||
|           Continue;
 | ||
|         end;
 | ||
|         Inc(keyIndexB);
 | ||
|       end;
 | ||
|       if (ctxB.ral <= keyIndexB) then
 | ||
|         exit(1);
 | ||
|       if (ctxA.r[keyIndexA].Weights[0] > ctxB.r[keyIndexB].Weights[0]) then
 | ||
|         exit(1);
 | ||
|       if (ctxA.r[keyIndexA].Weights[0] < ctxB.r[keyIndexB].Weights[0]) then
 | ||
|         exit(-1);
 | ||
|       Inc(keyIndexA);
 | ||
|     end;
 | ||
|     lastKeyIndexA := keyIndexA - 1;
 | ||
|   end;
 | ||
|   //Key(A) is completed !
 | ||
|   Inc(keyIndexB);
 | ||
|   while (ctxB.ral <= keyIndexB) or
 | ||
|         (ctxB.r[keyIndexB].Weights[0] = 0) or
 | ||
|         ctxB.Collation^.IsVariable(@ctxB.r[keyIndexB].Weights)
 | ||
|   do begin
 | ||
|     if (ctxB.ral <= keyIndexB) then begin
 | ||
|       if not ComputeRawSortKeyNextItem(@ctxB) then
 | ||
|         Break;
 | ||
|       Continue;
 | ||
|     end;
 | ||
|     Inc(keyIndexB);
 | ||
|   end;
 | ||
|   if (ctxB.ral > keyIndexB) then begin
 | ||
|     //B has at least one more primary weight that A
 | ||
|     exit(-1);
 | ||
|   end;
 | ||
|   while ComputeRawSortKeyNextItem(@ctxB) do begin
 | ||
|     //
 | ||
|   end;
 | ||
|   //Key(B) is completed !
 | ||
|   keyA := FormKey(ctxA.r,ctxA.Collation);
 | ||
|   keyB := FormKey(ctxB.r,ctxB.Collation);
 | ||
|   Result := CompareSortKey(keyA,keyB);
 | ||
| end;
 | ||
| 
 | ||
| function IncrementalCompareString(
 | ||
|   const AStrA      : PUnicodeChar;
 | ||
|   const ALengthA   : SizeInt;
 | ||
|   const AStrB      : PUnicodeChar;
 | ||
|   const ALengthB   : SizeInt;
 | ||
|   const ACollation : PUCA_DataBook
 | ||
| ) : Integer;
 | ||
| begin
 | ||
|   case ACollation^.VariableWeight of
 | ||
|     TUCA_VariableKind.ucaNonIgnorable :
 | ||
|       begin
 | ||
|         Result := IncrementalCompareString_NonIgnorable(
 | ||
|                     AStrA,ALengthA,AStrB,ALengthB,ACollation
 | ||
|                   );
 | ||
|       end;
 | ||
|     TUCA_VariableKind.ucaBlanked,
 | ||
|     TUCA_VariableKind.ucaShiftedTrimmed,
 | ||
|     TUCA_VariableKind.ucaIgnoreSP,
 | ||
|     TUCA_VariableKind.ucaShifted:
 | ||
|       begin
 | ||
|         Result := IncrementalCompareString_Shift(
 | ||
|                     AStrA,ALengthA,AStrB,ALengthB,ACollation
 | ||
|                   );
 | ||
|       end;
 | ||
|     else
 | ||
|       begin
 | ||
|         Result := IncrementalCompareString_Shift(
 | ||
|                     AStrA,ALengthA,AStrB,ALengthB,ACollation
 | ||
|                   );
 | ||
|       end;
 | ||
|   end;
 | ||
| end;
 | ||
| 
 | ||
| function IncrementalCompareString(
 | ||
|   const AStrA,
 | ||
|         AStrB      : UnicodeString;
 | ||
|   const ACollation : PUCA_DataBook
 | ||
| ) : Integer;
 | ||
| begin
 | ||
|   Result := IncrementalCompareString(
 | ||
|               Pointer(AStrA),Length(AStrA),Pointer(AStrB),Length(AStrB),
 | ||
|               ACollation
 | ||
|             );
 | ||
| end;
 | ||
| 
 | ||
| function ComputeRawSortKeyNextItem(
 | ||
|   const AContext : PComputeKeyContext
 | ||
| ) : Boolean;
 | ||
| var
 | ||
|   ctx : PComputeKeyContext;
 | ||
| 
 | ||
|   procedure GrowKey(const AMinGrow : Integer = 0);inline;
 | ||
|   begin
 | ||
|     if (ctx^.rl < AMinGrow) then
 | ||
|       ctx^.rl := ctx^.rl + AMinGrow
 | ||
|     else
 | ||
|       ctx^.rl := 2 * ctx^.rl;
 | ||
|     SetLength(ctx^.r,ctx^.rl);
 | ||
|   end;
 | ||
| 
 | ||
|   procedure SaveKeyOwner();
 | ||
|   var
 | ||
|     k : Integer;
 | ||
|     kppLevel : Byte;
 | ||
|   begin
 | ||
|     k := 0;
 | ||
|     kppLevel := High(Byte);
 | ||
|     while (k <= ctx^.locHistoryTop) do begin
 | ||
|       if (kppLevel <> ctx^.locHistory[k].ppLevel) then begin
 | ||
|         ctx^.LastKeyOwner.Chars[k] := ctx^.locHistory[k].cp;
 | ||
|         kppLevel := ctx^.locHistory[k].ppLevel;
 | ||
|       end;
 | ||
|       k := k + 1;
 | ||
|     end;
 | ||
|     if (k = 0) or (kppLevel <> ctx^.ppLevel) then begin
 | ||
|       ctx^.LastKeyOwner.Chars[k] := ctx^.cp;
 | ||
|       k := k + 1;
 | ||
|     end;
 | ||
|     ctx^.LastKeyOwner.Length := k;
 | ||
|   end;
 | ||
| 
 | ||
|   procedure AddWeights(AItem : PUCA_PropItemRec);inline;
 | ||
|   begin
 | ||
|     SaveKeyOwner();
 | ||
|     if ((ctx^.ral + AItem^.WeightLength) > ctx^.rl) then
 | ||
|       GrowKey(AItem^.WeightLength);
 | ||
|     AItem^.GetWeightArray(@ctx^.r[ctx^.ral]);
 | ||
|     ctx^.ral := ctx^.ral + AItem^.WeightLength;
 | ||
|   end;
 | ||
| 
 | ||
|   procedure AddContextWeights(AItem : PUCA_PropItemContextRec);inline;
 | ||
|   begin
 | ||
|     if ((ctx^.ral + AItem^.WeightCount) > ctx^.rl) then
 | ||
|       GrowKey(AItem^.WeightCount);
 | ||
|     Move(AItem^.GetWeights()^,ctx^.r[ctx^.ral],(AItem^.WeightCount*SizeOf(ctx^.r[0])));
 | ||
|     ctx^.ral := ctx^.ral + AItem^.WeightCount;
 | ||
|   end;
 | ||
| 
 | ||
|   procedure AddComputedWeights(ACodePoint : Cardinal);inline;
 | ||
|   begin
 | ||
|     SaveKeyOwner();
 | ||
|     if ((ctx^.ral + 2) > ctx^.rl) then
 | ||
|       GrowKey();
 | ||
|     DeriveWeight(ACodePoint,@ctx^.r[ctx^.ral]);
 | ||
|     ctx^.ral := ctx^.ral + 2;
 | ||
|   end;
 | ||
| 
 | ||
|   procedure RecordDeletion();inline;
 | ||
|   begin
 | ||
|     if ctx^.pp^.IsValid() and ctx^.pp^.IsDeleted() (*pp^.GetWeightLength() = 0*) then begin
 | ||
|       if (ctx^.suppressState.cl = nil) or
 | ||
|          (ctx^.suppressState.CharCount > ctx^.ppLevel)
 | ||
|       then begin
 | ||
|         ctx^.suppressState.cl := ctx^.cl;
 | ||
|         ctx^.suppressState.CharCount := ctx^.ppLevel;
 | ||
|       end;
 | ||
|     end;
 | ||
|   end;
 | ||
| 
 | ||
|   procedure RecordStep();inline;
 | ||
|   begin
 | ||
|     Inc(ctx^.locHistoryTop);
 | ||
|     ctx^.locHistory[ctx^.locHistoryTop].i := ctx^.i;
 | ||
|     ctx^.locHistory[ctx^.locHistoryTop].cl := ctx^.cl;
 | ||
|     ctx^.locHistory[ctx^.locHistoryTop].pp := ctx^.pp;
 | ||
|     ctx^.locHistory[ctx^.locHistoryTop].ppLevel := ctx^.ppLevel;
 | ||
|     ctx^.locHistory[ctx^.locHistoryTop].cp := ctx^.cp;
 | ||
|     ctx^.locHistory[ctx^.locHistoryTop].removedCharIndexLength := ctx^.removedCharIndexLength;
 | ||
|     RecordDeletion();
 | ||
|   end;
 | ||
| 
 | ||
|   procedure ClearHistory();inline;
 | ||
|   begin
 | ||
|     ctx^.locHistoryTop := -1;
 | ||
|   end;
 | ||
| 
 | ||
|   function HasHistory() : Boolean;inline;
 | ||
|   begin
 | ||
|     Result := (ctx^.locHistoryTop >= 0);
 | ||
|   end;
 | ||
| 
 | ||
|   function GetHistoryLength() : Integer;inline;
 | ||
|   begin
 | ||
|     Result := (ctx^.locHistoryTop + 1);
 | ||
|   end;
 | ||
| 
 | ||
|   procedure GoBack();inline;
 | ||
|   begin
 | ||
|     Assert(ctx^.locHistoryTop >= 0);
 | ||
|     ctx^.i := ctx^.locHistory[ctx^.locHistoryTop].i;
 | ||
|     ctx^.cp := ctx^.locHistory[ctx^.locHistoryTop].cp;
 | ||
|     ctx^.cl := ctx^.locHistory[ctx^.locHistoryTop].cl;
 | ||
|     ctx^.pp := ctx^.locHistory[ctx^.locHistoryTop].pp;
 | ||
|     ctx^.ppLevel := ctx^.locHistory[ctx^.locHistoryTop].ppLevel;
 | ||
|     ctx^.removedCharIndexLength := ctx^.locHistory[ctx^.locHistoryTop].removedCharIndexLength;
 | ||
|     ctx^.ps := @ctx^.s[ctx^.i];
 | ||
|     Dec(ctx^.locHistoryTop);
 | ||
|   end;
 | ||
| 
 | ||
|   function IsUnblockedNonstarter(const AStartFrom : Integer) : Boolean;
 | ||
|   var
 | ||
|     k : DWord;
 | ||
|     pk : PUnicodeChar;
 | ||
|     puk : PUC_Prop;
 | ||
|   begin
 | ||
|     k := AStartFrom;
 | ||
|     if (k > ctx^.c) then
 | ||
|       exit(False);
 | ||
|     if (ctx^.removedCharIndexLength>0) and
 | ||
|        (IndexDWord(ctx^.removedCharIndex[0],ctx^.removedCharIndexLength,k) >= 0)
 | ||
|     then begin
 | ||
|       exit(False);
 | ||
|     end;
 | ||
|     {if (k = (i+1)) or
 | ||
|        ( (k = (i+2)) and UnicodeIsHighSurrogate(s[i]) )
 | ||
|     then
 | ||
|       lastUnblockedNonstarterCCC := 0;}
 | ||
|     pk := @ctx^.s[k];
 | ||
|     if UnicodeIsHighSurrogate(pk^) then begin
 | ||
|       if (k = ctx^.c) then
 | ||
|         exit(False);
 | ||
|       if UnicodeIsLowSurrogate(pk[1]) then
 | ||
|         puk := GetProps(pk[0],pk[1])
 | ||
|       else
 | ||
|         puk := GetProps(Word(pk^));
 | ||
|     end else begin
 | ||
|       puk := GetProps(Word(pk^));
 | ||
|     end;
 | ||
|     if (puk^.CCC = 0) or (ctx^.lastUnblockedNonstarterCCC >= puk^.CCC) then
 | ||
|       exit(False);
 | ||
|     ctx^.lastUnblockedNonstarterCCC := puk^.CCC;
 | ||
|     Result := True;
 | ||
|   end;
 | ||
| 
 | ||
|   procedure RemoveChar(APos : Integer);inline;
 | ||
|   begin
 | ||
|     if (ctx^.removedCharIndexLength >= Length(ctx^.removedCharIndex)) then
 | ||
|       SetLength(ctx^.removedCharIndex,(2*ctx^.removedCharIndexLength + 2));
 | ||
|     ctx^.removedCharIndex[ctx^.removedCharIndexLength] := APos;
 | ||
|     Inc(ctx^.removedCharIndexLength);
 | ||
|     if UnicodeIsHighSurrogate(ctx^.s[APos]) and (APos < ctx^.c) and UnicodeIsLowSurrogate(ctx^.s[APos+1]) then begin
 | ||
|       if (ctx^.removedCharIndexLength >= Length(ctx^.removedCharIndex)) then
 | ||
|           SetLength(ctx^.removedCharIndex,(2*ctx^.removedCharIndexLength + 2));
 | ||
|         ctx^.removedCharIndex[ctx^.removedCharIndexLength] := APos+1;
 | ||
|         Inc(ctx^.removedCharIndexLength);
 | ||
|     end;
 | ||
|   end;
 | ||
| 
 | ||
|   procedure Inc_I();inline;
 | ||
|   begin
 | ||
|     if (ctx^.removedCharIndexLength = 0) then begin
 | ||
|       Inc(ctx^.i);
 | ||
|       Inc(ctx^.ps);
 | ||
|       exit;
 | ||
|     end;
 | ||
|     while True do begin
 | ||
|       Inc(ctx^.i);
 | ||
|       Inc(ctx^.ps);
 | ||
|       if (IndexDWord(ctx^.removedCharIndex[0],ctx^.removedCharIndexLength,ctx^.i) = -1) then
 | ||
|         Break;
 | ||
|     end;
 | ||
|   end;
 | ||
| 
 | ||
|   function MoveToNextChar() : Boolean;inline;
 | ||
|   begin
 | ||
|     Result := True;
 | ||
|     if UnicodeIsHighSurrogate(ctx^.ps[0]) then begin
 | ||
|       if (ctx^.i = ctx^.c) then
 | ||
|         exit(False);
 | ||
|       if UnicodeIsLowSurrogate(ctx^.ps[1]) then begin
 | ||
|         ctx^.surrogateState := True;
 | ||
|         ctx^.cp := ToUCS4(ctx^.ps[0],ctx^.ps[1]);
 | ||
|       end else begin
 | ||
|         ctx^.surrogateState := False;
 | ||
|         ctx^.cp := Word(ctx^.ps[0]);
 | ||
|       end;
 | ||
|     end else begin
 | ||
|       ctx^.surrogateState := False;
 | ||
|       ctx^.cp := Word(ctx^.ps[0]);
 | ||
|     end;
 | ||
|   end;
 | ||
| 
 | ||
|   function FindPropUCA() : Boolean;
 | ||
|   var
 | ||
|     candidateCL : PUCA_DataBook;
 | ||
|   begin
 | ||
|     ctx^.pp := nil;
 | ||
|     if (ctx^.cl = nil) then
 | ||
|       candidateCL := ctx^.Collation
 | ||
|     else
 | ||
|       candidateCL := ctx^.cl;
 | ||
|     if ctx^.surrogateState then begin
 | ||
|       while (candidateCL <> nil) do begin
 | ||
|         ctx^.pp := GetPropUCA(ctx^.ps[0],ctx^.ps[1],candidateCL);
 | ||
|         if (ctx^.pp <> nil) then
 | ||
|           break;
 | ||
|         candidateCL := candidateCL^.Base;
 | ||
|       end;
 | ||
|     end else begin
 | ||
|       while (candidateCL <> nil) do begin
 | ||
|         ctx^.pp := GetPropUCA(ctx^.ps[0],candidateCL);
 | ||
|         if (ctx^.pp <> nil) then
 | ||
|           break;
 | ||
|         candidateCL := candidateCL^.Base;
 | ||
|       end;
 | ||
|     end;
 | ||
|     ctx^.cl := candidateCL;
 | ||
|     Result := (ctx^.pp <> nil);
 | ||
|   end;
 | ||
| 
 | ||
|   procedure AddWeightsAndClear();inline;
 | ||
|   var
 | ||
|     ctxNode : PUCA_PropItemContextTreeNodeRec;
 | ||
|   begin
 | ||
|     if (ctx^.pp^.WeightLength > 0) then begin
 | ||
|       AddWeights(ctx^.pp);
 | ||
|     end else
 | ||
|     if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
 | ||
|        ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
 | ||
|        (ctxNode^.Data.WeightCount > 0)
 | ||
|     then begin
 | ||
|       AddContextWeights(@ctxNode^.Data);
 | ||
|     end;
 | ||
|     //AddWeights(pp);
 | ||
|     ClearHistory();
 | ||
|     ClearPP(ctx);
 | ||
|   end;
 | ||
| 
 | ||
|   function StartMatch() : Boolean;
 | ||
| 
 | ||
|     procedure HandleLastChar();
 | ||
|     var
 | ||
|       ctxNode : PUCA_PropItemContextTreeNodeRec;
 | ||
|     begin
 | ||
|       while True do begin
 | ||
|         if ctx^.pp^.IsValid() then begin
 | ||
|           if (ctx^.pp^.WeightLength > 0) then
 | ||
|             AddWeights(ctx^.pp)
 | ||
|           else
 | ||
|           if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
 | ||
|              ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
 | ||
|              (ctxNode^.Data.WeightCount > 0)
 | ||
|           then
 | ||
|             AddContextWeights(@ctxNode^.Data)
 | ||
|           else
 | ||
|             AddComputedWeights(ctx^.cp){handle deletion of code point};
 | ||
|           break;
 | ||
|         end;
 | ||
|         if (ctx^.cl^.Base = nil) then begin
 | ||
|           AddComputedWeights(ctx^.cp);
 | ||
|           break;
 | ||
|         end;
 | ||
|         ctx^.cl := ctx^.cl^.Base;
 | ||
|         if not FindPropUCA() then begin
 | ||
|           AddComputedWeights(ctx^.cp);
 | ||
|           break;
 | ||
|         end;
 | ||
|       end;
 | ||
|     end;
 | ||
|   var
 | ||
|     tmpCtxNode : PUCA_PropItemContextTreeNodeRec;
 | ||
|   begin
 | ||
|     Result := False;
 | ||
|     ctx^.ppLevel := 0;
 | ||
|     if not FindPropUCA() then begin
 | ||
|       AddComputedWeights(ctx^.cp);
 | ||
|       ClearHistory();
 | ||
|       ClearPP(ctx);
 | ||
|       Result := True;
 | ||
|     end else begin
 | ||
|       if (ctx^.i = ctx^.c) then begin
 | ||
|         HandleLastChar();
 | ||
|         Result := True;
 | ||
|       end else begin
 | ||
|         if ctx^.pp^.IsValid()then begin
 | ||
|           if (ctx^.pp^.ChildCount = 0) then begin
 | ||
|             if (ctx^.pp^.WeightLength > 0) then
 | ||
|               AddWeights(ctx^.pp)
 | ||
|             else
 | ||
|             if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
 | ||
|                ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,tmpCtxNode) and
 | ||
|                (tmpCtxNode^.Data.WeightCount > 0)
 | ||
|             then
 | ||
|               AddContextWeights(@tmpCtxNode^.Data)
 | ||
|             else
 | ||
|               AddComputedWeights(ctx^.cp){handle deletion of code point};
 | ||
|             ClearPP(ctx);
 | ||
|             ClearHistory();
 | ||
|             Result := True;
 | ||
|           end else begin
 | ||
|             RecordStep();
 | ||
|           end
 | ||
|         end else begin
 | ||
|           if (ctx^.pp^.ChildCount = 0) then begin
 | ||
|             AddComputedWeights(ctx^.cp);
 | ||
|             ClearPP(ctx);
 | ||
|             ClearHistory();
 | ||
|             Result := True;
 | ||
|           end else begin
 | ||
|             RecordStep();
 | ||
|           end;
 | ||
|         end;
 | ||
|       end;
 | ||
|     end;
 | ||
|   end;
 | ||
| 
 | ||
|   function TryPermutation() : Boolean;
 | ||
|   var
 | ||
|     kk : Integer;
 | ||
|     b : Boolean;
 | ||
|     puk : PUC_Prop;
 | ||
|     ppk : PUCA_PropItemRec;
 | ||
|   begin
 | ||
|     Result := False;
 | ||
|     puk := GetProps(ctx^.cp);
 | ||
|     if (puk^.CCC = 0) then
 | ||
|       exit;
 | ||
|     ctx^.lastUnblockedNonstarterCCC := puk^.CCC;
 | ||
|     if ctx^.surrogateState then
 | ||
|       kk := ctx^.i + 2
 | ||
|     else
 | ||
|       kk := ctx^.i + 1;
 | ||
|     while IsUnblockedNonstarter(kk) do begin
 | ||
|       b := UnicodeIsHighSurrogate(ctx^.s[kk]) and (kk<ctx^.c) and UnicodeIsLowSurrogate(ctx^.s[kk+1]);
 | ||
|       if b then
 | ||
|         ppk := FindChild(ToUCS4(ctx^.s[kk],ctx^.s[kk+1]),ctx^.pp)
 | ||
|       else
 | ||
|         ppk := FindChild(Word(ctx^.s[kk]),ctx^.pp);
 | ||
|       if (ppk <> nil) then begin
 | ||
|         ctx^.pp := ppk;
 | ||
|         RemoveChar(kk);
 | ||
|         Inc(ctx^.ppLevel);
 | ||
|         RecordStep();
 | ||
|         Result := True;
 | ||
|         if (ctx^.pp^.ChildCount = 0 ) then
 | ||
|           Break;
 | ||
|       end;
 | ||
|       if b then
 | ||
|         Inc(kk);
 | ||
|       Inc(kk);
 | ||
|     end;
 | ||
|   end;
 | ||
| 
 | ||
|   procedure AdvanceCharPos();inline;
 | ||
|   begin
 | ||
|     if UnicodeIsHighSurrogate(ctx^.ps[0]) and (ctx^.i<ctx^.c) and UnicodeIsLowSurrogate(ctx^.ps[1]) then begin
 | ||
|       Inc(ctx^.i);
 | ||
|       Inc(ctx^.ps);
 | ||
|     end;
 | ||
|     Inc_I();
 | ||
|   end;
 | ||
| 
 | ||
| var
 | ||
|   ok : Boolean;
 | ||
|   pp1 : PUCA_PropItemRec;
 | ||
|   cltemp : PUCA_DataBook;
 | ||
|   ctxNode : PUCA_PropItemContextTreeNodeRec;
 | ||
| begin
 | ||
|   if AContext^.Finished then
 | ||
|     exit(False);
 | ||
|   ctx := AContext;
 | ||
|   while (ctx^.i <= ctx^.c) and MoveToNextChar() do begin
 | ||
|     ok := False;
 | ||
|     if (ctx^.pp = nil) then begin // Start Matching
 | ||
|       ok := StartMatch();
 | ||
|     end else begin
 | ||
|       pp1 := FindChild(ctx^.cp,ctx^.pp);
 | ||
|       if (pp1 <> nil) then begin
 | ||
|         Inc(ctx^.ppLevel);
 | ||
|         ctx^.pp := pp1;
 | ||
|         if (ctx^.pp^.ChildCount = 0) or (ctx^.i = ctx^.c) then begin
 | ||
|           ok := False;
 | ||
|           if ctx^.pp^.IsValid() and (ctx^.suppressState.CharCount = 0) then begin
 | ||
|             if (ctx^.pp^.WeightLength > 0) then begin
 | ||
|               AddWeightsAndClear();
 | ||
|               ok := True;
 | ||
|             end else
 | ||
|             if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
 | ||
|                ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
 | ||
|                (ctxNode^.Data.WeightCount > 0)
 | ||
|             then begin
 | ||
|               AddContextWeights(@ctxNode^.Data);
 | ||
|               ClearHistory();
 | ||
|               ClearPP(ctx);
 | ||
|               ok := True;
 | ||
|             end
 | ||
|           end;
 | ||
|           if not ok then begin
 | ||
|             RecordDeletion();
 | ||
|             while HasHistory() do begin
 | ||
|               GoBack();
 | ||
|               if ctx^.pp^.IsValid() and
 | ||
|                  ( ( (ctx^.cl = ctx^.suppressState.cl) and (ctx^.ppLevel <> ctx^.suppressState.CharCount) ) or
 | ||
|                    ( (ctx^.cl <> ctx^.suppressState.cl) and (ctx^.ppLevel < ctx^.suppressState.CharCount) )
 | ||
|                  )
 | ||
|               then begin
 | ||
|                 AddWeightsAndClear();
 | ||
|                 ok := True;
 | ||
|                 Break;
 | ||
|               end;
 | ||
|             end;
 | ||
|             if not ok then begin
 | ||
|               cltemp := ctx^.cl^.Base;
 | ||
|               if (cltemp <> nil) then begin
 | ||
|                 ClearPP(ctx,False);
 | ||
|                 ctx^.cl := cltemp;
 | ||
|                 Continue;
 | ||
|               end;
 | ||
|             end;
 | ||
| 
 | ||
|             if not ok then begin
 | ||
|               AddComputedWeights(ctx^.cp);
 | ||
|               ClearHistory();
 | ||
|               ClearPP(ctx);
 | ||
|               ok := True;
 | ||
|             end;
 | ||
|           end;
 | ||
|         end else begin
 | ||
|           RecordStep();
 | ||
|         end;
 | ||
|       end else begin
 | ||
|         // permutations !
 | ||
|         ok := False;
 | ||
|         if TryPermutation() and ctx^.pp^.IsValid() then begin
 | ||
|           if (ctx^.suppressState.CharCount = 0) then begin
 | ||
|             AddWeightsAndClear();
 | ||
|             ok := True;
 | ||
|             exit(True);// Continue;
 | ||
|           end;
 | ||
|           while True do begin
 | ||
|             if ctx^.pp^.IsValid() and
 | ||
|                (ctx^.pp^.WeightLength > 0) and
 | ||
|                ( ( (ctx^.cl = ctx^.suppressState.cl) and (ctx^.ppLevel <> ctx^.suppressState.CharCount) ) or
 | ||
|                  ( (ctx^.cl <> ctx^.suppressState.cl) and (ctx^.ppLevel < ctx^.suppressState.CharCount) )
 | ||
|                )
 | ||
|             then begin
 | ||
|               AddWeightsAndClear();
 | ||
|               ok := True;
 | ||
|               break;
 | ||
|             end;
 | ||
|             if not HasHistory() then
 | ||
|               break;
 | ||
|             GoBack();
 | ||
|             if (ctx^.pp = nil) then
 | ||
|               break;
 | ||
|           end;
 | ||
|         end;
 | ||
|         if not ok then begin
 | ||
|           if ctx^.pp^.IsValid() and (ctx^.suppressState.CharCount = 0) then begin
 | ||
|             if (ctx^.pp^.WeightLength > 0) then begin
 | ||
|               AddWeightsAndClear();
 | ||
|               ok := True;
 | ||
|             end else
 | ||
|             if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
 | ||
|                ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
 | ||
|                (ctxNode^.Data.WeightCount > 0)
 | ||
|             then begin
 | ||
|               AddContextWeights(@ctxNode^.Data);
 | ||
|               ClearHistory();
 | ||
|               ClearPP(ctx);
 | ||
|               ok := True;
 | ||
|             end
 | ||
|           end;
 | ||
|           if ok then
 | ||
|             exit(True);// Continue;
 | ||
|         end;
 | ||
|         if not ok then begin
 | ||
|           if (ctx^.cl^.Base <> nil) then begin
 | ||
|             cltemp := ctx^.cl^.Base;
 | ||
|             while HasHistory() do
 | ||
|               GoBack();
 | ||
|             ctx^.pp := nil;
 | ||
|             ctx^.ppLevel := 0;
 | ||
|             ctx^.cl := cltemp;
 | ||
|             Continue;
 | ||
|           end;
 | ||
| 
 | ||
|           //walk back
 | ||
|           ok := False;
 | ||
|           while HasHistory() do begin
 | ||
|             GoBack();
 | ||
|             if ctx^.pp^.IsValid() and
 | ||
|                (ctx^.pp^.WeightLength > 0) and
 | ||
|                ( (ctx^.suppressState.CharCount = 0) or
 | ||
|                  ( ( (ctx^.cl = ctx^.suppressState.cl) and (ctx^.ppLevel <> ctx^.suppressState.CharCount) ) or
 | ||
|                    ( (ctx^.cl <> ctx^.suppressState.cl) and (ctx^.ppLevel < ctx^.suppressState.CharCount) )
 | ||
|                  )
 | ||
|                )
 | ||
|             then begin
 | ||
|               AddWeightsAndClear();
 | ||
|               ok := True;
 | ||
|               Break;
 | ||
|             end;
 | ||
|           end;
 | ||
|           if ok then begin
 | ||
|             AdvanceCharPos();
 | ||
|             exit(True);// Continue;
 | ||
|           end;
 | ||
|           if (ctx^.pp <> nil) then begin
 | ||
|             AddComputedWeights(ctx^.cp);
 | ||
|             ClearHistory();
 | ||
|             ClearPP(ctx);
 | ||
|             ok := True;
 | ||
|           end;
 | ||
|         end;
 | ||
|       end;
 | ||
|     end;
 | ||
|     if ctx^.surrogateState then begin
 | ||
|       Inc(ctx^.ps);
 | ||
|       Inc(ctx^.i);
 | ||
|     end;
 | ||
|     //
 | ||
|     Inc_I();
 | ||
|     if ok then
 | ||
|       exit(True);
 | ||
|   end;
 | ||
|   SetLength(ctx^.r,ctx^.ral);
 | ||
|   ctx^.Finished := True;
 | ||
|   Result := True;
 | ||
| end;
 | ||
| 
 | ||
| function ComputeSortKey(
 | ||
|   const AStr       : PUnicodeChar;
 | ||
|   const ALength    : SizeInt;
 | ||
|   const ACollation : PUCA_DataBook
 | ||
| ) : TUCASortKey;
 | ||
| var
 | ||
|   r : TUCA_PropWeightsArray;
 | ||
| begin
 | ||
|   r := ComputeRawSortKey(AStr,ALength,ACollation);
 | ||
|   Result := FormKey(r,ACollation);
 | ||
| end;
 | ||
| 
 | ||
| end.
 | 
