* apply Inoussa changes to character.pas and related files (part of mantis #0022909)

git-svn-id: trunk@23657 -
This commit is contained in:
paul 2013-02-25 01:46:33 +00:00
parent db6385ef33
commit e549954de6
9 changed files with 13017 additions and 4715 deletions

6
.gitattributes vendored
View File

@ -8435,10 +8435,14 @@ rtl/objpas/sysutils/syswideh.inc svneol=native#text/plain
rtl/objpas/types.pp svneol=native#text/plain
rtl/objpas/typinfo.pp svneol=native#text/plain
rtl/objpas/unicodedata.inc svneol=native#text/pascal
rtl/objpas/unicodedata2.inc svneol=native#text/pascal
rtl/objpas/unicodedata.pas svneol=native#text/pascal
rtl/objpas/unicodedata_be.inc svneol=native#text/pascal
rtl/objpas/unicodedata_le.inc svneol=native#text/pascal
rtl/objpas/unicodenumtable.pas svneol=native#text/pascal
rtl/objpas/utf8bidi.pp svneol=native#text/plain
rtl/objpas/varutilh.inc svneol=native#text/plain
rtl/objpas/varutils.inc svneol=native#text/plain
rtl/objpas/weight_derivation.inc svneol=native#text/pascal
rtl/openbsd/Makefile svneol=native#text/plain
rtl/openbsd/Makefile.fpc svneol=native#text/plain
rtl/openbsd/classes.pp svneol=native#text/plain

View File

@ -1,3 +1,22 @@
{ Unicode "Character" properties handler.
Copyright (c) 2012 by Inoussa OUEDRAOGO
The source code is distributed under the Library GNU
General Public License with the following modification:
- object files and libraries linked into an application may be
distributed without source code.
If you didn't receive a copy of the file COPYING, contact:
Free Software Foundation
675 Mass Ave
Cambridge, MA 02139
USA
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. }
unit character;
interface
@ -6,6 +25,8 @@ interface
{$H+}
{$PACKENUM 1}
{$SCOPEDENUMS ON}
uses
unicodedata;
type
// Unicode General Category
@ -49,6 +70,9 @@ type
);
TUnicodeCategorySet = set of TUnicodeCategory;
TCharacterOption = (coIgnoreInvalidSequence);
TCharacterOptions = set of TCharacterOption;
{ TCharacter }
TCharacter = class sealed
@ -112,10 +136,12 @@ type
class function IsWhiteSpace(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
class function ToLower(AChar : UnicodeChar) : UnicodeChar; overload; static;
class function ToLower(const AString : UnicodeString) : UnicodeString; overload; static;
class function ToLower(const AString : UnicodeString) : UnicodeString; inline;overload; static;
class function ToLower(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString; overload; static;
class function ToUpper(AChar : UnicodeChar) : UnicodeChar; overload; static;
class function ToUpper(const AString : UnicodeString) : UnicodeString; overload; static;
class function ToUpper(const AString : UnicodeString) : UnicodeString; inline; overload; static;
class function ToUpper(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString; overload; static;
end;
// flat functions
@ -169,30 +195,6 @@ uses
SysUtils,
RtlConsts;
type
PUC_Prop = ^TUC_Prop;
TUC_Prop = packed record
Category : TUnicodeCategory;
NumericValue : Double;
SimpleUpperCase : DWord;
SimpleLowerCase : DWord;
WhiteSpace : Boolean;
end;
{$INCLUDE unicodedata.inc} // For BMP code points
{$INCLUDE unicodedata2.inc} // For other planes
const
LOW_SURROGATE_BEGIN = Word($DC00);
LOW_SURROGATE_END = Word($DFFF);
HIGH_SURROGATE_BEGIN = Word($D800);
HIGH_SURROGATE_END = Word($DBFF);
HIGH_SURROGATE_COUNT = HIGH_SURROGATE_END - HIGH_SURROGATE_BEGIN + 1;
UCS4_HALF_BASE = LongWord($10000);
UCS4_HALF_MASK = Word($3FF);
MAX_LEGAL_UTF32 = $10FFFF;
const
LETTER_CATEGORIES = [
TUnicodeCategory.ucUppercaseLetter, TUnicodeCategory.ucLowercaseLetter,
@ -221,34 +223,6 @@ const
TUnicodeCategory.ucModifierSymbol, TUnicodeCategory.ucOtherSymbol
];
function GetProps(const ACodePoint : Word) : PUC_Prop; inline;
begin
Result:=
@UC_PROP_ARRAY[
UC_TABLE_2[
(UC_TABLE_1[WordRec(ACodePoint).Hi] * 256) +
WordRec(ACodePoint).Lo
]
];
end;
function GetProps(const AHighS, ALowS : UnicodeChar): PUC_Prop; inline;
begin
Result:=
@UC_PROP_ARRAY[
UCO_TABLE_2[
(UCO_TABLE_1[Word(AHighS)-HIGH_SURROGATE_BEGIN] * HIGH_SURROGATE_COUNT) +
Word(ALowS) - LOW_SURROGATE_BEGIN
]
];
end;
procedure FromUCS4(const AValue : UCS4Char; var AHighS, ALowS : UnicodeChar);
begin
AHighS := UnicodeChar((AValue - $10000) shr 10 + $d800);
ALowS := UnicodeChar((AValue - $10000) and $3ff + $dc00);
end;
function ConvertFromUtf32(AChar: UCS4Char): UnicodeString;
begin
Result := TCharacter.ConvertFromUtf32(AChar);
@ -472,12 +446,12 @@ begin
if (AIndex < 1) or (AIndex > Length(AString)) then
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
pu := GetProps(Word(AString[AIndex]));
if (pu^.Category = TUnicodeCategory.ucSurrogate) then begin
if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
if not IsSurrogatePair(AString,AIndex) then
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
pu := GetProps(AString[AIndex],AString[AIndex+1]);
end;
Result := (pu^.Category = ACategory);
Result := (TUnicodeCategory(pu^.Category) = ACategory);
end;
class function TCharacter.TestCategory(
@ -491,12 +465,12 @@ begin
if (AIndex < 1) or (AIndex > Length(AString)) then
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
pu := GetProps(Word(AString[AIndex]));
if (pu^.Category = TUnicodeCategory.ucSurrogate) then begin
if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
if not IsSurrogatePair(AString,AIndex) then
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
pu := GetProps(AString[AIndex],AString[AIndex+1]);
end;
Result := (pu^.Category in ACategory);
Result := (TUnicodeCategory(pu^.Category) in ACategory);
end;
constructor TCharacter.Create;
@ -558,7 +532,7 @@ begin
raise EArgumentOutOfRangeException.CreateFmt(SHighSurrogateOutOfRange, [Word(AHighSurrogate)]);
if not IsLowSurrogate(ALowSurrogate) then
raise EArgumentOutOfRangeException.CreateFmt(SLowSurrogateOutOfRange, [Word(ALowSurrogate)]);
Result := (UCS4Char(AHighSurrogate) - HIGH_SURROGATE_BEGIN) shl 10 + (UCS4Char(ALowSurrogate) - LOW_SURROGATE_BEGIN) + UCS4_HALF_BASE;
Result := ToUCS4(AHighSurrogate, ALowSurrogate);
end;
class function TCharacter.GetNumericValue(AChar : UnicodeChar) : Double;
@ -576,7 +550,7 @@ begin
if (AIndex < 1) or (AIndex > Length(AString)) then
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
pu := GetProps(Word(AString[AIndex]));
if (pu^.Category = TUnicodeCategory.ucSurrogate) then begin
if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
if not IsSurrogatePair(AString,AIndex) then
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
pu := GetProps(AString[AIndex],AString[AIndex+1]);
@ -586,7 +560,7 @@ end;
class function TCharacter.GetUnicodeCategory(AChar : UnicodeChar) : TUnicodeCategory;
begin
Result := GetProps(Word(AChar))^.Category;
Result := TUnicodeCategory(GetProps(Word(AChar))^.Category);
end;
class function TCharacter.GetUnicodeCategory(
@ -599,17 +573,17 @@ begin
if (AIndex < 1) or (AIndex > Length(AString)) then
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
pu := GetProps(Word(AString[AIndex]));
if (pu^.Category = TUnicodeCategory.ucSurrogate) then begin
if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
if not IsSurrogatePair(AString,AIndex) then
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
pu := GetProps(AString[AIndex],AString[AIndex+1]);
end;
Result := pu^.Category;
Result := TUnicodeCategory(pu^.Category);
end;
class function TCharacter.IsControl(AChar : UnicodeChar) : Boolean;
begin
Result := (GetProps(Word(AChar))^.Category = TUnicodeCategory.ucControl);
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucControl);
end;
class function TCharacter.IsControl(
@ -622,7 +596,7 @@ end;
class function TCharacter.IsDigit(AChar : UnicodeChar) : Boolean;
begin
Result := (GetProps(Word(AChar))^.Category = TUnicodeCategory.ucDecimalNumber);
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucDecimalNumber);
end;
class function TCharacter.IsDigit(
@ -635,7 +609,7 @@ end;
class function TCharacter.IsSurrogate(AChar : UnicodeChar) : Boolean;
begin
Result := (GetProps(Word(AChar))^.Category = TUnicodeCategory.ucSurrogate);
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucSurrogate);
end;
class function TCharacter.IsSurrogate(
@ -650,7 +624,7 @@ end;
class function TCharacter.IsHighSurrogate(AChar : UnicodeChar) : Boolean;
begin
Result := (GetProps(Word(AChar))^.Category = TUnicodeCategory.ucSurrogate) and
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucSurrogate) and
(Word(AChar) >= HIGH_SURROGATE_BEGIN) and
(Word(AChar) <= HIGH_SURROGATE_END);
end;
@ -667,7 +641,7 @@ end;
class function TCharacter.IsLowSurrogate(AChar : UnicodeChar) : Boolean;
begin
Result := (GetProps(Word(AChar))^.Category = TUnicodeCategory.ucSurrogate) and
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucSurrogate) and
(Word(AChar) >= LOW_SURROGATE_BEGIN) and
(Word(AChar) <= LOW_SURROGATE_END);
end;
@ -687,13 +661,7 @@ class function TCharacter.IsSurrogatePair(
ALowSurrogate : UnicodeChar
) : Boolean;
begin
Result :=
( (Word(AHighSurrogate) >= HIGH_SURROGATE_BEGIN) and
(Word(AHighSurrogate) <= HIGH_SURROGATE_END)
) and
( (Word(ALowSurrogate) >= LOW_SURROGATE_BEGIN) and
(Word(ALowSurrogate) <= LOW_SURROGATE_END)
)
Result := UnicodeIsSurrogatePair(AHighSurrogate,ALowSurrogate);
end;
class function TCharacter.IsSurrogatePair(
@ -714,7 +682,7 @@ end;
class function TCharacter.IsLetter(AChar : UnicodeChar) : Boolean;
begin
Result := (GetProps(Word(AChar))^.Category in LETTER_CATEGORIES);
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in LETTER_CATEGORIES);
end;
class function TCharacter.IsLetter(
@ -727,7 +695,7 @@ end;
class function TCharacter.IsLetterOrDigit(AChar : UnicodeChar) : Boolean;
begin
Result := (GetProps(Word(AChar))^.Category in LETTER_OR_DIGIT_CATEGORIES);
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in LETTER_OR_DIGIT_CATEGORIES);
end;
class function TCharacter.IsLetterOrDigit(
@ -740,7 +708,7 @@ end;
class function TCharacter.IsLower(AChar : UnicodeChar) : Boolean;
begin
Result := (GetProps(Word(AChar))^.Category = TUnicodeCategory.ucLowercaseLetter);
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucLowercaseLetter);
end;
class function TCharacter.IsLower(
@ -753,7 +721,7 @@ end;
class function TCharacter.IsNumber(AChar : UnicodeChar) : Boolean;
begin
Result := (GetProps(Word(AChar))^.Category in NUMBER_CATEGORIES);
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in NUMBER_CATEGORIES);
end;
class function TCharacter.IsNumber(
@ -766,7 +734,7 @@ end;
class function TCharacter.IsPunctuation(AChar : UnicodeChar) : Boolean;
begin
Result := (GetProps(Word(AChar))^.Category in PUNCTUATION_CATEGORIES);
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in PUNCTUATION_CATEGORIES);
end;
class function TCharacter.IsPunctuation(
@ -779,7 +747,7 @@ end;
class function TCharacter.IsSeparator(AChar: UnicodeChar): Boolean;
begin
Result := (GetProps(Word(AChar))^.Category in SEPARATOR_CATEGORIES);
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in SEPARATOR_CATEGORIES);
end;
class function TCharacter.IsSeparator(
@ -792,7 +760,7 @@ end;
class function TCharacter.IsSymbol(AChar: UnicodeChar): Boolean;
begin
Result := (GetProps(Word(AChar))^.Category in SYMBOL_CATEGORIES);
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in SYMBOL_CATEGORIES);
end;
class function TCharacter.IsSymbol(
@ -805,7 +773,7 @@ end;
class function TCharacter.IsUpper(AChar : UnicodeChar) : Boolean;
begin
Result := (GetProps(Word(AChar))^.Category = TUnicodeCategory.ucUppercaseLetter);
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucUppercaseLetter);
end;
class function TCharacter.IsUpper(
@ -831,7 +799,7 @@ begin
if (AIndex < 1) or (AIndex > Length(AString)) then
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
pu := GetProps(Word(AString[AIndex]));
if (pu^.Category = TUnicodeCategory.ucSurrogate) then begin
if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
if not IsSurrogatePair(AString,AIndex) then
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
pu := GetProps(AString[AIndex],AString[AIndex+1]);
@ -841,28 +809,43 @@ end;
class function TCharacter.ToLower(AChar : UnicodeChar) : UnicodeChar;
begin
Result := UnicodeChar(GetProps(Word(AChar))^.SimpleLowerCase);
Result := UnicodeChar(Word(GetProps(Word(AChar))^.SimpleLowerCase));
if (Result = UnicodeChar(0)) then
Result := AChar;
end;
class function TCharacter.ToLower(const AString : UnicodeString) : UnicodeString;
begin
Result := ToLower(AString,[]);
end;
class function TCharacter.ToLower(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString;
var
i, c : SizeInt;
pp, pr : PUnicodeChar;
pu : PUC_Prop;
locIsSurrogate : Boolean;
locIsSurrogate, locIgnoreInvalid : Boolean;
begin
c := Length(AString);
SetLength(Result,2*c);
if (c > 0) then begin
locIgnoreInvalid := (TCharacterOption.coIgnoreInvalidSequence in AOptions);
pp := @AString[1];
pr := @Result[1];
i := 1;
while (i <= c) do begin
pu := GetProps(Word(pp^));
locIsSurrogate := (pu^.Category = TUnicodeCategory.ucSurrogate);
locIsSurrogate := (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate);
if locIsSurrogate then begin
if locIgnoreInvalid then begin
if (i = c) or not(IsSurrogatePair(pp[0],pp[1])) then begin
pr^ := pp^;
Inc(pp);
Inc(pr);
Inc(i);
Continue;
end;
end;
if not IsSurrogatePair(AString,i) then
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
pu := GetProps(pp^,AString[i+1]);
@ -879,7 +862,7 @@ begin
if (pu^.SimpleLowerCase <= $FFFF) then begin
pr^ := UnicodeChar(Word(pu^.SimpleLowerCase));
end else begin
FromUCS4(UCS4Char(pu^.SimpleLowerCase),pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
FromUCS4(UCS4Char(Cardinal(pu^.SimpleLowerCase)),pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
Inc(pr);
end;
if locIsSurrogate then begin
@ -899,28 +882,43 @@ end;
class function TCharacter.ToUpper(AChar : UnicodeChar) : UnicodeChar;
begin
Result := UnicodeChar(GetProps(Word(AChar))^.SimpleUpperCase);
Result := UnicodeChar(Word(GetProps(Word(AChar))^.SimpleUpperCase));
if (Result = UnicodeChar(0)) then
Result := AChar;
end;
class function TCharacter.ToUpper(const AString : UnicodeString) : UnicodeString;
begin
Result := ToUpper(AString,[]);
end;
class function TCharacter.ToUpper(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString;
var
i, c : SizeInt;
pp, pr : PUnicodeChar;
pu : PUC_Prop;
locIsSurrogate : Boolean;
locIsSurrogate, locIgnoreInvalid : Boolean;
begin
c := Length(AString);
SetLength(Result,2*c);
if (c > 0) then begin
locIgnoreInvalid := (TCharacterOption.coIgnoreInvalidSequence in AOptions);
pp := @AString[1];
pr := @Result[1];
i := 1;
while (i <= c) do begin
pu := GetProps(Word(pp^));
locIsSurrogate := (pu^.Category = TUnicodeCategory.ucSurrogate);
locIsSurrogate := (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate);
if locIsSurrogate then begin
if locIgnoreInvalid then begin
if (i = c) or not(IsSurrogatePair(pp[0],pp[1])) then begin
pr^ := pp^;
Inc(pp);
Inc(pr);
Inc(i);
Continue;
end;
end;
if not IsSurrogatePair(AString,i) then
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
pu := GetProps(pp^,AString[i+1]);
@ -937,7 +935,7 @@ begin
if (pu^.SimpleUpperCase <= $FFFF) then begin
pr^ := UnicodeChar(Word(pu^.SimpleUpperCase));
end else begin
FromUCS4(UCS4Char(pu^.SimpleUpperCase),pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
FromUCS4(UCS4Char(Cardinal(pu^.SimpleUpperCase)),pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
Inc(pr);
end;
if locIsSurrogate then begin
@ -954,5 +952,6 @@ begin
SetLength(Result,i)
end;
end;
{$endif VER2_4}
end.

File diff suppressed because it is too large Load Diff

2467
rtl/objpas/unicodedata.pas Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,53 @@
{ Unicode implementation tables.
Copyright (c) 2013 by Inoussa OUEDRAOGO
Permission is hereby granted, free of charge, to any person
obtaining a copy of the Unicode data files and any associated
documentation (the "Data Files") or Unicode software and any
associated documentation (the "Software") to deal in the Data
Files or Software without restriction, including without
limitation the rights to use, copy, modify, merge, publish,
distribute, and/or sell copies of the Data Files or Software,
and to permit persons to whom the Data Files or Software are
furnished to do so, provided that (a) the above copyright
notice(s) and this permission notice appear with all copies
of the Data Files or Software, (b) both the above copyright
notice(s) and this permission notice appear in associated
documentation, and (c) there is clear notice in each modified
Data File or in the Software as well as in the documentation
associated with the Data File(s) or Software that the data or
software has been modified.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. }
unit unicodenumtable;
interface
const
UC_NUMERIC_COUNT = 112;
UC_NUMERIC_ARRAY : array[0..(UC_NUMERIC_COUNT-1)] of Double = (
0 ,1 ,2 ,3 ,4 ,5 ,6 ,7 ,8 ,
9 ,0.25 ,0.5 ,0.75 ,0.0625 ,0.125 ,0.1875 ,16 ,
10 ,100 ,1000 ,1.5 ,2.5 ,3.5 ,4.5 ,5.5 ,
6.5 ,7.5 ,8.5 ,-0.5 ,20 ,30 ,40 ,50 ,
60 ,70 ,80 ,90 ,10000 ,17 ,18 ,19 ,
0.142857142857143 ,0.111111111111111 ,0.1 ,0.333333333333333 ,0.666666666666667 ,0.2 ,0.4 ,0.6 ,
0.8 ,0.166666666666667 ,0.833333333333334 ,0.375 ,0.625 ,0.875 ,11 ,12 ,
500 ,5000 ,50000 ,100000 ,13 ,14 ,15 ,21 ,
22 ,23 ,24 ,25 ,26 ,27 ,28 ,29 ,
31 ,32 ,33 ,34 ,35 ,36 ,37 ,38 ,
39 ,41 ,42 ,43 ,44 ,45 ,46 ,47 ,
48 ,49 ,200 ,300 ,400 ,600 ,700 ,800 ,
900 ,2000 ,3000 ,4000 ,6000 ,7000 ,8000 ,9000 ,
20000 ,30000 ,40000 ,60000 ,70000 ,80000 ,90000
);
implementation
end.

View File

@ -0,0 +1,66 @@
function IsCJK_Unified_Ideographs(ACodePoint : Cardinal) : Boolean;inline;
begin
Result := (ACodePoint >= $4E00) and (ACodePoint <= $9FCC); // $9FFF
end;
function IsCJK_Compatibility_Ideographs(ACodePoint : Cardinal) : Boolean;inline;
begin
Result := (ACodePoint >= $F900) and (ACodePoint <= $FAFF);
end;
function IsCJK_Unified_Ideographs_Extension_A(ACodePoint : Cardinal) : Boolean;inline;
begin
Result := (ACodePoint >= $3400) and (ACodePoint <= $4DB5); // $4DBF
end;
function IsCJK_Unified_Ideographs_Extension_B(ACodePoint : Cardinal) : Boolean;inline;
begin
Result := (ACodePoint >= $20000) and (ACodePoint <= $2A6D6); // $2A6DF
end;
function IsCJK_Unified_Ideographs_Extension_C(ACodePoint : Cardinal) : Boolean;inline;
begin
Result := (ACodePoint >= $2A700) and (ACodePoint <= $2B734); // $2B73F
end;
function IsCJK_Unified_Ideographs_Extension_D(ACodePoint : Cardinal) : Boolean;inline;
begin
Result := (ACodePoint >= $2B740) and (ACodePoint <= $2B81D); // $2B81F
end;
function IsCJK_Compatibility_Ideographs_Supplement(ACodePoint : Cardinal) : Boolean;inline;
begin
Result := (ACodePoint >= $2F800) and (ACodePoint <= $2FA1F);
end;
procedure DeriveWeight(const ACodePoint : Cardinal; AResult : PUCA_PropWeights);
const
BASE_1 = Word($FB40);
BASE_2 = Word($FB80);
BASE_3 = Word($FBC0);
var
base : Word;
begin
if IsCJK_Unified_Ideographs(ACodePoint) or IsCJK_Compatibility_Ideographs(ACodePoint) then
base := BASE_1
else if IsCJK_Unified_Ideographs_Extension_A(ACodePoint) or
IsCJK_Unified_Ideographs_Extension_B(ACodePoint) or
IsCJK_Unified_Ideographs_Extension_C(ACodePoint) or
IsCJK_Unified_Ideographs_Extension_D(ACodePoint) or
IsCJK_Compatibility_Ideographs_Supplement(ACodePoint)
then begin
base := BASE_2;
end else begin
base := BASE_3;
end;
AResult[0].Weights[0] := base + (ACodePoint shr 15);
AResult[0].Weights[1] := $20;
AResult[0].Weights[2] := $2;
AResult[1].Weights[0] := (ACodePoint and $7FFF) or $8000;
AResult[1].Weights[1] := 0;
AResult[1].Weights[2] := 0;
end;