mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-06 07:47:59 +02:00
Merge branch 'ucd_separate' into 'main'
UnicodeData Alternative Any% WR See merge request freepascal.org/fpc/source!179
This commit is contained in:
commit
322438fd08
@ -1303,8 +1303,8 @@ System.CharSet$(PPUEXT) : $(NSINC)/System.CharSet.pp $(CHARSET_DEPS)
|
||||
# unicodedata
|
||||
#
|
||||
|
||||
UNICODEDATA_DEPS = $(OBJPASDIR)/unicodedata.pas $(OBJPASDIR)/unicodedata.inc \
|
||||
$(OBJPASDIR)/unicodedata_le.inc $(OBJPASDIR)/unicodedata_be.inc \
|
||||
UNICODEDATA_DEPS = $(OBJPASDIR)/unicodedata.pas $(OBJPASDIR)/unicodedata_props.inc \
|
||||
$(OBJPASDIR)/weight_derivation.inc \
|
||||
$(SYSTEMUNIT)$(PPUEXT) $(OBJPASUNIT)$(PPUEXT)
|
||||
|
||||
unicodedata$(PPUEXT) : $(UNICODEDATA_DEPS)
|
||||
|
@ -3227,8 +3227,8 @@ charset$(PPUEXT) : $(CHARSET_DEPS)
|
||||
$(COMPILER) $(CHARSET_OPT) $<
|
||||
System.CharSet$(PPUEXT) : $(NSINC)/System.CharSet.pp $(CHARSET_DEPS)
|
||||
$(COMPILER) $(CHARSET_OPT) $<
|
||||
UNICODEDATA_DEPS = $(OBJPASDIR)/unicodedata.pas $(OBJPASDIR)/unicodedata.inc \
|
||||
$(OBJPASDIR)/unicodedata_le.inc $(OBJPASDIR)/unicodedata_be.inc \
|
||||
UNICODEDATA_DEPS = $(OBJPASDIR)/unicodedata.pas $(OBJPASDIR)/unicodedata_props.inc \
|
||||
$(OBJPASDIR)/weight_derivation.inc \
|
||||
$(SYSTEMUNIT)$(PPUEXT) $(OBJPASUNIT)$(PPUEXT)
|
||||
unicodedata$(PPUEXT) : $(UNICODEDATA_DEPS)
|
||||
$(COMPILER) -Fi$(OBJPASDIR) $(UNICODEDATA_OPT) $<
|
||||
|
@ -750,18 +750,8 @@ class function TCharacter.TestCategory(
|
||||
AIndex : Integer;
|
||||
ACategory : TUnicodeCategory
|
||||
) : Boolean;
|
||||
var
|
||||
pu : PUC_Prop;
|
||||
begin
|
||||
if (AIndex < 1) or (AIndex > Length(AString)) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
||||
pu := GetProps(Word(AString[AIndex]));
|
||||
if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
|
||||
if not IsSurrogatePair(AString,AIndex) then
|
||||
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
|
||||
pu := GetProps(AString[AIndex],AString[AIndex+1]);
|
||||
end;
|
||||
Result := (TUnicodeCategory(pu^.Category) = ACategory);
|
||||
Result := GetUnicodeCategory(AString, AIndex) = ACategory;
|
||||
end;
|
||||
|
||||
class function TCharacter.TestCategory(
|
||||
@ -769,18 +759,8 @@ class function TCharacter.TestCategory(
|
||||
AIndex : Integer;
|
||||
ACategory : TUnicodeCategorySet
|
||||
) : Boolean;
|
||||
var
|
||||
pu : PUC_Prop;
|
||||
begin
|
||||
if (AIndex < 1) or (AIndex > Length(AString)) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
||||
pu := GetProps(Word(AString[AIndex]));
|
||||
if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
|
||||
if not IsSurrogatePair(AString,AIndex) then
|
||||
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
|
||||
pu := GetProps(AString[AIndex],AString[AIndex+1]);
|
||||
end;
|
||||
Result := (TUnicodeCategory(pu^.Category) in ACategory);
|
||||
Result := GetUnicodeCategory(AString, AIndex) in ACategory;
|
||||
end;
|
||||
|
||||
constructor TCharacter.Create;
|
||||
@ -847,7 +827,7 @@ end;
|
||||
|
||||
class function TCharacter.GetNumericValue(AChar : UnicodeChar) : Double;
|
||||
begin
|
||||
Result := GetProps(Word(AChar))^.NumericValue;
|
||||
Result := UnicodeData.GetNumericValue(Word(AChar));
|
||||
end;
|
||||
|
||||
class function TCharacter.GetNumericValue(
|
||||
@ -855,29 +835,29 @@ class function TCharacter.GetNumericValue(
|
||||
AIndex : Integer
|
||||
) : Double;
|
||||
var
|
||||
pu : PUC_Prop;
|
||||
cp : uint32;
|
||||
begin
|
||||
if (AIndex < 1) or (AIndex > Length(AString)) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
||||
pu := GetProps(Word(AString[AIndex]));
|
||||
if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
|
||||
cp := Word(AString[AIndex]);
|
||||
if (cp >= SURROGATES_BEGIN) and (cp <= SURROGATES_END) then begin
|
||||
if not IsSurrogatePair(AString,AIndex) then
|
||||
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
|
||||
pu := GetProps(AString[AIndex],AString[AIndex+1]);
|
||||
cp := ToUCS4(UnicodeChar(cp),AString[AIndex+1]);
|
||||
end;
|
||||
Result := pu^.NumericValue;
|
||||
Result := UnicodeData.GetNumericValue(cp);
|
||||
end;
|
||||
|
||||
class function TCharacter.GetNumericValue(aChar: UCS4Char): Double;
|
||||
begin
|
||||
Result := GetProps(AChar)^.NumericValue;
|
||||
Result := UnicodeData.GetNumericValue(aChar);
|
||||
end;
|
||||
|
||||
|
||||
|
||||
class function TCharacter.GetUnicodeCategory(AChar : UnicodeChar) : TUnicodeCategory;
|
||||
begin
|
||||
Result := TUnicodeCategory(GetProps(Word(AChar))^.Category);
|
||||
Result := TUnicodeCategory(UnicodeData.GetCategory(Word(AChar)));
|
||||
end;
|
||||
|
||||
class function TCharacter.GetUnicodeCategory(
|
||||
@ -885,17 +865,17 @@ class function TCharacter.GetUnicodeCategory(
|
||||
AIndex : Integer
|
||||
) : TUnicodeCategory;
|
||||
var
|
||||
pu : PUC_Prop;
|
||||
begin
|
||||
cp : uint32;
|
||||
begin
|
||||
if (AIndex < 1) or (AIndex > Length(AString)) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
||||
pu := GetProps(Word(AString[AIndex]));
|
||||
if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
|
||||
cp := Word(AString[AIndex]);
|
||||
if (cp >= SURROGATES_BEGIN) and (cp <= SURROGATES_END) then begin
|
||||
if not IsSurrogatePair(AString,AIndex) then
|
||||
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
|
||||
pu := GetProps(AString[AIndex],AString[AIndex+1]);
|
||||
cp := ToUCS4(UnicodeChar(cp),AString[AIndex+1]);
|
||||
end;
|
||||
Result := TUnicodeCategory(pu^.Category);
|
||||
Result := TUnicodeCategory(UnicodeData.GetCategory(cp));
|
||||
end;
|
||||
|
||||
|
||||
@ -915,7 +895,7 @@ end;
|
||||
|
||||
class function TCharacter.IsControl(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucControl);
|
||||
Result := UnicodeData.GetCategory(Word(AChar)) = ord(TUnicodeCategory.ucControl);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsControl(
|
||||
@ -933,7 +913,7 @@ end;
|
||||
|
||||
class function TCharacter.IsDigit(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucDecimalNumber);
|
||||
Result := UnicodeData.GetCategory(Word(AChar)) = ord(TUnicodeCategory.ucDecimalNumber);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsDigit(
|
||||
@ -951,7 +931,7 @@ end;
|
||||
|
||||
class function TCharacter.IsSurrogate(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucSurrogate);
|
||||
Result := (Word(AChar) >= SURROGATES_BEGIN) and (Word(AChar) <= SURROGATES_END);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsSurrogate(
|
||||
@ -972,8 +952,7 @@ end;
|
||||
|
||||
class function TCharacter.IsHighSurrogate(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucSurrogate) and
|
||||
(Word(AChar) >= HIGH_SURROGATE_BEGIN) and
|
||||
Result := (Word(AChar) >= HIGH_SURROGATE_BEGIN) and
|
||||
(Word(AChar) <= HIGH_SURROGATE_END);
|
||||
end;
|
||||
|
||||
@ -994,8 +973,7 @@ end;
|
||||
|
||||
class function TCharacter.IsLowSurrogate(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucSurrogate) and
|
||||
(Word(AChar) >= LOW_SURROGATE_BEGIN) and
|
||||
Result := (Word(AChar) >= LOW_SURROGATE_BEGIN) and
|
||||
(Word(AChar) <= LOW_SURROGATE_END);
|
||||
end;
|
||||
|
||||
@ -1041,7 +1019,7 @@ end;
|
||||
|
||||
class function TCharacter.IsLetter(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in LETTER_CATEGORIES);
|
||||
Result := (TUnicodeCategory(UnicodeData.GetCategory(Word(AChar))) in LETTER_CATEGORIES);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsLetter(
|
||||
@ -1060,7 +1038,7 @@ end;
|
||||
|
||||
class function TCharacter.IsLetterOrDigit(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in LETTER_OR_DIGIT_CATEGORIES);
|
||||
Result := (TUnicodeCategory(UnicodeData.GetCategory(Word(AChar))) in LETTER_OR_DIGIT_CATEGORIES);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsLetterOrDigit(
|
||||
@ -1078,7 +1056,7 @@ end;
|
||||
|
||||
class function TCharacter.IsLower(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucLowercaseLetter);
|
||||
Result := (TUnicodeCategory(UnicodeData.GetCategory(Word(AChar))) = TUnicodeCategory.ucLowercaseLetter);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsLower(
|
||||
@ -1097,7 +1075,7 @@ end;
|
||||
|
||||
class function TCharacter.IsNumber(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in NUMBER_CATEGORIES);
|
||||
Result := (TUnicodeCategory(UnicodeData.GetCategory(Word(AChar))) in NUMBER_CATEGORIES);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsNumber(
|
||||
@ -1115,7 +1093,7 @@ end;
|
||||
|
||||
class function TCharacter.IsPunctuation(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in PUNCTUATION_CATEGORIES);
|
||||
Result := (TUnicodeCategory(UnicodeData.GetCategory(Word(AChar))) in PUNCTUATION_CATEGORIES);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsPunctuation(
|
||||
@ -1133,7 +1111,7 @@ end;
|
||||
|
||||
class function TCharacter.IsSeparator(AChar: UnicodeChar): Boolean;
|
||||
begin
|
||||
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in SEPARATOR_CATEGORIES);
|
||||
Result := (TUnicodeCategory(UnicodeData.GetCategory(Word(AChar))) in SEPARATOR_CATEGORIES);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsSeparator(
|
||||
@ -1153,7 +1131,7 @@ end;
|
||||
|
||||
class function TCharacter.IsSymbol(AChar: UnicodeChar): Boolean;
|
||||
begin
|
||||
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in SYMBOL_CATEGORIES);
|
||||
Result := (TUnicodeCategory(UnicodeData.GetCategory(Word(AChar))) in SYMBOL_CATEGORIES);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsSymbol(aChar: UCS4Char): Boolean;
|
||||
@ -1173,7 +1151,7 @@ end;
|
||||
|
||||
class function TCharacter.IsUpper(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucUppercaseLetter);
|
||||
Result := (TUnicodeCategory(UnicodeData.GetCategory(Word(AChar))) = TUnicodeCategory.ucUppercaseLetter);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsUpper(
|
||||
@ -1191,7 +1169,7 @@ end;
|
||||
|
||||
class function TCharacter.IsWhiteSpace(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := GetProps(Word(AChar))^.WhiteSpace;
|
||||
Result := UnicodeData.IsWhiteSpace(Word(AChar));
|
||||
end;
|
||||
|
||||
class function TCharacter.IsWhiteSpace(
|
||||
@ -1199,17 +1177,17 @@ class function TCharacter.IsWhiteSpace(
|
||||
AIndex : Integer
|
||||
) : Boolean;
|
||||
var
|
||||
pu : PUC_Prop;
|
||||
cp : uint32;
|
||||
begin
|
||||
if (AIndex < 1) or (AIndex > Length(AString)) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
||||
pu := GetProps(Word(AString[AIndex]));
|
||||
if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
|
||||
cp := Word(AString[AIndex]);
|
||||
if (cp >= SURROGATES_BEGIN) and (cp <= SURROGATES_END) then begin
|
||||
if not IsSurrogatePair(AString,AIndex) then
|
||||
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
|
||||
pu := GetProps(AString[AIndex],AString[AIndex+1]);
|
||||
cp := ToUCS4(UnicodeChar(cp),AString[AIndex+1]);
|
||||
end;
|
||||
Result := pu^.WhiteSpace;
|
||||
Result := UnicodeData.IsWhiteSpace(cp);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsWhiteSpace(aChar: UCS4Char): Boolean;
|
||||
@ -1219,9 +1197,7 @@ end;
|
||||
|
||||
class function TCharacter.ToLower(AChar : UnicodeChar) : UnicodeChar;
|
||||
begin
|
||||
Result := UnicodeChar(Word(GetProps(Word(AChar))^.SimpleLowerCase));
|
||||
if (Result = UnicodeChar(0)) then
|
||||
Result := AChar;
|
||||
Result := UnicodeChar(UnicodeData.GetSimpleLowerCase(Word(AChar)));
|
||||
end;
|
||||
|
||||
class function TCharacter.ToLower(const AString : UnicodeString) : UnicodeString;
|
||||
@ -1248,9 +1224,7 @@ end;
|
||||
|
||||
class function TCharacter.ToUpper(AChar : UnicodeChar) : UnicodeChar;
|
||||
begin
|
||||
Result := UnicodeChar(Word(GetProps(Word(AChar))^.SimpleUpperCase));
|
||||
if (Result = UnicodeChar(0)) then
|
||||
Result := AChar;
|
||||
Result := UnicodeChar(UnicodeData.GetSimpleUpperCase(Word(AChar)));
|
||||
end;
|
||||
|
||||
class function TCharacter.ToUpper(const AString : UnicodeString) : UnicodeString;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -104,6 +104,7 @@ unit unicodedata;
|
||||
|
||||
{$SCOPEDENUMS ON}
|
||||
{$pointermath on}
|
||||
{$coperators on}
|
||||
{$define USE_INLINE}
|
||||
{ $define uni_debug}
|
||||
|
||||
@ -140,6 +141,8 @@ const
|
||||
HIGH_SURROGATE_BEGIN = Word($D800);
|
||||
HIGH_SURROGATE_END = Word($DBFF);
|
||||
HIGH_SURROGATE_COUNT = HIGH_SURROGATE_END - HIGH_SURROGATE_BEGIN + 1;
|
||||
SURROGATES_BEGIN = HIGH_SURROGATE_BEGIN;
|
||||
SURROGATES_END = LOW_SURROGATE_END;
|
||||
UCS4_HALF_BASE = LongWord($10000);
|
||||
UCS4_HALF_MASK = Word($3FF);
|
||||
MAX_LEGAL_UTF32 = $10FFFF;
|
||||
@ -186,36 +189,13 @@ const
|
||||
// Names
|
||||
|
||||
UnicodeCategoryNames: array[0..29] of string[2] = (
|
||||
'Lu',
|
||||
'Ll',
|
||||
'Lt',
|
||||
'Lm',
|
||||
'Lo',
|
||||
'Mn',
|
||||
'Mc',
|
||||
'Me',
|
||||
'Nd',
|
||||
'Nl',
|
||||
'No',
|
||||
'Pc',
|
||||
'Pd',
|
||||
'Ps',
|
||||
'Pe',
|
||||
'Pi',
|
||||
'Pf',
|
||||
'Po',
|
||||
'Sm',
|
||||
'Sc',
|
||||
'Sk',
|
||||
'So',
|
||||
'Zs',
|
||||
'Zl',
|
||||
'Zp',
|
||||
'Cc',
|
||||
'Cf',
|
||||
'Cs',
|
||||
'Co',
|
||||
'Cn'
|
||||
'Lu', 'Ll', 'Lt', 'Lm', 'Lo',
|
||||
'Mn', 'Mc', 'Me',
|
||||
'Nd', 'Nl', 'No',
|
||||
'Pc', 'Pd', 'Ps', 'Pe', 'Pi', 'Pf', 'Po',
|
||||
'Sm', 'Sc', 'Sk', 'So',
|
||||
'Zs', 'Zl', 'Zp',
|
||||
'Cc', 'Cf', 'Cs', 'Co', 'Cn'
|
||||
);
|
||||
|
||||
type
|
||||
@ -279,46 +259,45 @@ const
|
||||
(c : 0; b : 0; a : 0;);
|
||||
{$endif ENDIAN_LITTLE}
|
||||
|
||||
function GetCategory(c: uint32): uint8;
|
||||
function GetCCC(c: uint32): uint8;
|
||||
function GetNumericValue(c: uint32): double;
|
||||
function GetSimpleUpperCase(c: uint32): uint32;
|
||||
function GetSimpleLowerCase(c: uint32): uint32;
|
||||
function GetSimpleTitleCase(c: uint32): uint32;
|
||||
function IsWhiteSpace(c: uint32): boolean;
|
||||
function IsHangulSyllable(c: uint32): boolean;
|
||||
function IsUnifiedIdeograph(const ACodePoint : Cardinal) : boolean;
|
||||
|
||||
type
|
||||
PUC_Prop = ^TUC_Prop;
|
||||
|
||||
{ TUC_Prop }
|
||||
|
||||
{ On alignment-sensitive targets, at least some of them, assembler uses to forcibly align data >1 byte.
|
||||
This breaks intended layout of initialized constants/variables.
|
||||
A proper solution is to patch compiler to emit always unaligned directives for words/dwords/etc,
|
||||
but for now just declare this record as "unpacked". This causes bloat, but it's better than having
|
||||
entire unit not working at all. }
|
||||
TUC_Prop = {$ifndef FPC_REQUIRES_PROPER_ALIGNMENT}packed{$endif} record
|
||||
{ Once upon a time, UnicodeData.txt database was stored as the array of TUC_Prop.
|
||||
This is a compatibility wrapper that disguises the codepoint index as @Self
|
||||
and emulates accessing its properties by redirecting them to specific functions. }
|
||||
TUC_Prop = record
|
||||
private
|
||||
function GetCategory : Byte;inline;
|
||||
procedure SetCategory(AValue : Byte);
|
||||
function GetWhiteSpace : Boolean;inline;
|
||||
procedure SetWhiteSpace(AValue : Boolean);
|
||||
function GetHangulSyllable : Boolean;inline;
|
||||
procedure SetHangulSyllable(AValue : Boolean);
|
||||
function GetNumericValue: Double;inline;
|
||||
function GetCategory : Byte; inline;
|
||||
function GetCCC : Byte; inline;
|
||||
function GetWhiteSpace : Boolean; inline;
|
||||
function GetHangulSyllable : Boolean; inline;
|
||||
function GetNumericValue: Double; inline;
|
||||
function GetUnifiedIdeograph : Boolean;inline;
|
||||
public //Shortned names
|
||||
C : Byte; //CategoryData
|
||||
C3 : Byte; //Canonical Combining Class
|
||||
N : Byte; //NumericIndex
|
||||
UC : UInt24; //SimpleUpperCase
|
||||
LC : UInt24; //SimpleLowerCase
|
||||
D : SmallInt; //DecompositionID
|
||||
function GetSimpleUpperCase : uint32; inline;
|
||||
function GetSimpleLowerCase : uint32; inline;
|
||||
function GetSimpleTitleCase : uint32; inline;
|
||||
public
|
||||
property CategoryData : Byte read C write C;
|
||||
property NumericIndex : Byte read N write N;
|
||||
property SimpleUpperCase : UInt24 read UC write UC;
|
||||
property SimpleLowerCase : UInt24 read LC write LC;
|
||||
property DecompositionID : SmallInt read D write D;
|
||||
public
|
||||
property Category : Byte read GetCategory write SetCategory;
|
||||
property WhiteSpace : Boolean read GetWhiteSpace write SetWhiteSpace;
|
||||
property HangulSyllable : Boolean read GetHangulSyllable write SetHangulSyllable;
|
||||
property UnifiedIdeograph : Boolean read GetUnifiedIdeograph;
|
||||
property NumericValue : Double read GetNumericValue;
|
||||
end;
|
||||
property Category : Byte read GetCategory; deprecated 'Use GetCategory()';
|
||||
property CCC : Byte read GetCCC; deprecated 'Use GetCCC()';
|
||||
property WhiteSpace : Boolean read GetWhiteSpace; deprecated 'Use IsWhiteSpace()';
|
||||
property HangulSyllable : Boolean read GetHangulSyllable; deprecated 'Use IsHangulSyllable()';
|
||||
property NumericValue : Double read GetNumericValue; deprecated 'Use GetNumericValue()';
|
||||
property UnifiedIdeograph : Boolean read GetUnifiedIdeograph; deprecated 'Use IsUnifiedIdeograph()';
|
||||
property SimpleUpperCase : uint32 read GetSimpleUpperCase; deprecated 'Use GetSimpleUpperCase()';
|
||||
property SimpleLowerCase : uint32 read GetSimpleLowerCase; deprecated 'Use GetSimpleLowerCase()';
|
||||
end deprecated 'Use GetCategory(), GetCCC(), etc., this is a wrapper.';
|
||||
|
||||
type
|
||||
TUCA_PropWeights = packed record
|
||||
@ -503,9 +482,10 @@ const
|
||||
out AResultString : UnicodeString
|
||||
) : Integer;
|
||||
|
||||
function GetProps(const ACodePoint : Word) : PUC_Prop;overload;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||||
function GetProps(const AHighS, ALowS : UnicodeChar): PUC_Prop;overload;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||||
function GetProps(const ACodePoint : Cardinal) : PUC_Prop;overload;inline;
|
||||
function GetProps(const ACodePoint : Word) : PUC_Prop;overload;inline; deprecated 'Use GetCategory(), GetCCC(), etc., this is a wrapper.';
|
||||
function GetProps(const AHighS, ALowS : UnicodeChar): PUC_Prop;overload;inline; deprecated 'Use GetCategory(), GetCCC(), etc., this is a wrapper.';
|
||||
function GetProps(const ACodePoint : Cardinal) : PUC_Prop;overload;inline; deprecated 'Use GetCategory(), GetCCC(), etc., this is a wrapper.';
|
||||
|
||||
function GetPropUCA(const AHighS, ALowS : UnicodeChar; const ABook : PUCA_DataBook): PUCA_PropItemRec; overload;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||||
function GetPropUCA(const AChar : UnicodeChar; const ABook : PUCA_DataBook) : PUCA_PropItemRec; overload;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||||
|
||||
@ -633,8 +613,29 @@ resourcestring
|
||||
|
||||
implementation
|
||||
|
||||
type
|
||||
{$include unicodedata_props.inc}
|
||||
|
||||
function IsWhiteSpace(c: uint32): boolean;
|
||||
begin
|
||||
result := false;
|
||||
case c of
|
||||
$20 {Space}, $9 .. $D {Tab, Line feed, Line tab, Form feed, Carriage return},
|
||||
$85 {Next line}, $A0 {No-break space}, $1680 {ogham space mark}, $2000 {en quad} .. $200A {hair space},
|
||||
$2028 {line separator}, $2029 {paragraph separator},
|
||||
$202F {narrow no-break space}, $205F {medium mathematical space},
|
||||
$3000 {ideographic space}: result := true;
|
||||
end;
|
||||
end;
|
||||
|
||||
function IsHangulSyllable(c: uint32): boolean;
|
||||
begin
|
||||
result := false;
|
||||
case c of
|
||||
$1100..$11FF, $A960..$A97C, $AC00..$D7A3, $D7B0..$D7C6, $D7CB..$D7FB: result := true;
|
||||
end;
|
||||
end;
|
||||
|
||||
type
|
||||
TCardinalRec = packed record
|
||||
{$ifdef ENDIAN_LITTLE}
|
||||
byte0, byte1, byte2, byte3 : Byte;
|
||||
@ -1597,14 +1598,6 @@ begin
|
||||
Result := LoadCollation(ADirectory,ALanguage,al);
|
||||
end;
|
||||
|
||||
{$INCLUDE unicodedata.inc}
|
||||
{$IFDEF ENDIAN_LITTLE}
|
||||
{$INCLUDE unicodedata_le.inc}
|
||||
{$ENDIF ENDIAN_LITTLE}
|
||||
{$IFDEF ENDIAN_BIG}
|
||||
{$INCLUDE unicodedata_be.inc}
|
||||
{$ENDIF ENDIAN_BIG}
|
||||
|
||||
procedure FromUCS4(const AValue : UCS4Char; out AHighS, ALowS : UnicodeChar);
|
||||
begin
|
||||
AHighS := UnicodeChar((AValue - $10000) shr 10 + $d800);
|
||||
@ -1643,49 +1636,19 @@ begin
|
||||
(Word(AValue) <= LOW_SURROGATE_END);
|
||||
end;
|
||||
|
||||
function GetProps(const ACodePoint : Word) : PUC_Prop;overload;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||||
function GetProps(const ACodePoint : Word) : PUC_Prop;
|
||||
begin
|
||||
Result:=
|
||||
@UC_PROP_ARRAY[
|
||||
UC_TABLE_3[
|
||||
UC_TABLE_2[UC_TABLE_1[hi(ACodePoint)]]
|
||||
[lo(ACodePoint) shr 4]
|
||||
][lo(ACodePoint) and $F]
|
||||
]; {
|
||||
@UC_PROP_ARRAY[
|
||||
UC_TABLE_2[
|
||||
(UC_TABLE_1[WordRec(ACodePoint).Hi] * 256) +
|
||||
WordRec(ACodePoint).Lo
|
||||
]
|
||||
];}
|
||||
Result:=PUC_Prop(PtrUint(ACodePoint));
|
||||
end;
|
||||
|
||||
function GetProps(const AHighS, ALowS : UnicodeChar): PUC_Prop;overload;{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||||
function GetProps(const AHighS, ALowS : UnicodeChar): PUC_Prop;
|
||||
begin
|
||||
Result:=
|
||||
@UC_PROP_ARRAY[
|
||||
UCO_TABLE_3[
|
||||
UCO_TABLE_2[UCO_TABLE_1[Word(AHighS)-HIGH_SURROGATE_BEGIN]]
|
||||
[(Word(ALowS) - LOW_SURROGATE_BEGIN) div 32]
|
||||
][(Word(ALowS) - LOW_SURROGATE_BEGIN) mod 32]
|
||||
]; {
|
||||
Result:=
|
||||
@UC_PROP_ARRAY[
|
||||
UCO_TABLE_2[
|
||||
(UCO_TABLE_1[Word(AHighS)-HIGH_SURROGATE_BEGIN] * HIGH_SURROGATE_COUNT) +
|
||||
Word(ALowS) - LOW_SURROGATE_BEGIN
|
||||
]
|
||||
]; }
|
||||
Result:=PUC_Prop(PtrUint(ToUCS4(AHighS, ALowS)));
|
||||
end;
|
||||
|
||||
function GetProps(const ACodePoint : Cardinal) : PUC_Prop;inline;
|
||||
var
|
||||
l, h : UnicodeChar;
|
||||
function GetProps(const ACodePoint : Cardinal) : PUC_Prop;
|
||||
begin
|
||||
if (ACodePoint <= High(Word)) then
|
||||
exit(GetProps(Word(ACodePoint)));
|
||||
FromUCS4(ACodePoint,h,l);
|
||||
Result := GetProps(h,l);
|
||||
result:=PUC_Prop(PtrUint(ACodePoint));
|
||||
end;
|
||||
|
||||
function UnicodeToUpper(
|
||||
@ -1696,7 +1659,7 @@ function UnicodeToUpper(
|
||||
var
|
||||
i, c : SizeInt;
|
||||
pp, pr : PUnicodeChar;
|
||||
pu : PUC_Prop;
|
||||
cp, cpUp : uint32;
|
||||
locIsSurrogate : Boolean;
|
||||
r : UnicodeString;
|
||||
begin
|
||||
@ -1707,10 +1670,10 @@ begin
|
||||
pr := @r[1];
|
||||
i := 1;
|
||||
while (i <= c) do begin
|
||||
pu := GetProps(Word(pp^));
|
||||
locIsSurrogate := (pu^.Category = UGC_Surrogate);
|
||||
cp := Word(pp^);
|
||||
locIsSurrogate := (cp >= SURROGATES_BEGIN) and (cp <= SURROGATES_END);
|
||||
if locIsSurrogate then begin
|
||||
if (i = c) or not(UnicodeIsSurrogatePair(pp[0],pp[1])) then begin
|
||||
if (i = c) or not(UnicodeIsSurrogatePair(UnicodeChar(cp),pp[1])) then begin
|
||||
if AIgnoreInvalidSequence then begin
|
||||
pr^ := pp^;
|
||||
Inc(pp);
|
||||
@ -1720,9 +1683,10 @@ begin
|
||||
end;
|
||||
exit(ERROR_INVALID_CODEPOINT_SEQUENCE);
|
||||
end;
|
||||
pu := GetProps(pp^,AString[i+1]);
|
||||
cp := ToUCS4(UnicodeChar(cp),pp[1]);
|
||||
end;
|
||||
if (pu^.SimpleUpperCase = 0) then begin
|
||||
cpUp := GetSimpleUpperCase(cp);
|
||||
if (cpUp = cp) then begin
|
||||
pr^ := pp^;
|
||||
if locIsSurrogate then begin
|
||||
Inc(pp);
|
||||
@ -1731,10 +1695,10 @@ begin
|
||||
pr^ := pp^;
|
||||
end;
|
||||
end else begin
|
||||
if (pu^.SimpleUpperCase <= $FFFF) then begin
|
||||
pr^ := UnicodeChar(Word(pu^.SimpleUpperCase));
|
||||
if (cpUp <= $FFFF) then begin
|
||||
pr^ := UnicodeChar(cpUp);
|
||||
end else begin
|
||||
FromUCS4(UCS4Char(Cardinal(pu^.SimpleUpperCase)),pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
|
||||
FromUCS4(cpUp,pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
|
||||
Inc(pr);
|
||||
end;
|
||||
if locIsSurrogate then begin
|
||||
@ -1762,7 +1726,7 @@ function UnicodeToLower(
|
||||
var
|
||||
i, c : SizeInt;
|
||||
pp, pr : PUnicodeChar;
|
||||
pu : PUC_Prop;
|
||||
cp, cpLo : uint32;
|
||||
locIsSurrogate : Boolean;
|
||||
r : UnicodeString;
|
||||
begin
|
||||
@ -1773,10 +1737,10 @@ begin
|
||||
pr := @r[1];
|
||||
i := 1;
|
||||
while (i <= c) do begin
|
||||
pu := GetProps(Word(pp^));
|
||||
locIsSurrogate := (pu^.Category = UGC_Surrogate);
|
||||
cp := Word(pp^);
|
||||
locIsSurrogate := (cp >= SURROGATES_BEGIN) and (cp <= SURROGATES_END);
|
||||
if locIsSurrogate then begin
|
||||
if (i = c) or not(UnicodeIsSurrogatePair(pp[0],pp[1])) then begin
|
||||
if (i = c) or not(UnicodeIsSurrogatePair(UnicodeChar(cp),pp[1])) then begin
|
||||
if AIgnoreInvalidSequence then begin
|
||||
pr^ := pp^;
|
||||
Inc(pp);
|
||||
@ -1786,9 +1750,10 @@ begin
|
||||
end;
|
||||
exit(ERROR_INVALID_CODEPOINT_SEQUENCE);
|
||||
end;
|
||||
pu := GetProps(pp^,AString[i+1]);
|
||||
cpLo := ToUCS4(UnicodeChar(cp),AString[i+1]);
|
||||
end;
|
||||
if (pu^.SimpleLowerCase = 0) then begin
|
||||
cpLo := GetSimpleLowerCase(cp);
|
||||
if (cp = cpLo) then begin
|
||||
pr^ := pp^;
|
||||
if locIsSurrogate then begin
|
||||
Inc(pp);
|
||||
@ -1797,10 +1762,10 @@ begin
|
||||
pr^ := pp^;
|
||||
end;
|
||||
end else begin
|
||||
if (pu^.SimpleLowerCase <= $FFFF) then begin
|
||||
pr^ := UnicodeChar(Word(pu^.SimpleLowerCase));
|
||||
if (cpLo <= $FFFF) then begin
|
||||
pr^ := UnicodeChar(cpLo);
|
||||
end else begin
|
||||
FromUCS4(UCS4Char(Cardinal(pu^.SimpleLowerCase)),pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
|
||||
FromUCS4(cpLo,pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
|
||||
Inc(pr);
|
||||
end;
|
||||
if locIsSurrogate then begin
|
||||
@ -1852,28 +1817,16 @@ begin
|
||||
end;
|
||||
end;
|
||||
|
||||
function Decompose(const ADecomposeIndex : Integer; ABuffer : PUnicodeChar) : Integer;
|
||||
function Decompose(const ADecomposeIndex : SizeUint; ABuffer : PUnicodeChar) : Integer;
|
||||
var
|
||||
locStack : array[0..23] of Cardinal;
|
||||
locStackIdx : Integer;
|
||||
ResultBuffer : array[0..23] of Cardinal;
|
||||
ResultIdx : Integer;
|
||||
|
||||
procedure AddCompositionToStack(const AIndex : Integer);
|
||||
var
|
||||
pdecIdx : ^TDecompositionIndexRec;
|
||||
k, kc : Integer;
|
||||
pu : ^UInt24;
|
||||
procedure AddCompositionToStack(AIndex : SizeUint);
|
||||
begin
|
||||
pdecIdx := @(UC_DEC_BOOK_DATA.Index[AIndex]);
|
||||
pu := @(UC_DEC_BOOK_DATA.CodePoints[pdecIdx^.S]);
|
||||
kc := pdecIdx^.L;
|
||||
Inc(pu,kc);
|
||||
for k := 1 to kc do begin
|
||||
Dec(pu);
|
||||
locStack[locStackIdx + k] := pu^;
|
||||
end;
|
||||
locStackIdx := locStackIdx + kc;
|
||||
locStackIdx += integer(DecomposeCommonRev(AIndex, @locStack[locStackIdx + 1]));
|
||||
end;
|
||||
|
||||
procedure AddResult(const AChar : Cardinal);{$IFDEF INLINE_SUPPORT_PRIVATE_VARS}inline;{$ENDIF}
|
||||
@ -1891,7 +1844,6 @@ var
|
||||
var
|
||||
cu : Cardinal;
|
||||
decIdx : SmallInt;
|
||||
locIsWord : Boolean;
|
||||
i : Integer;
|
||||
p : PUnicodeChar;
|
||||
begin
|
||||
@ -1900,12 +1852,8 @@ begin
|
||||
AddCompositionToStack(ADecomposeIndex);
|
||||
while (locStackIdx >= 0) do begin
|
||||
cu := PopStack();
|
||||
locIsWord := (cu <= MAX_WORD);
|
||||
if locIsWord then
|
||||
decIdx := GetProps(Word(cu))^.DecompositionID
|
||||
else
|
||||
decIdx := GetProps(cu)^.DecompositionID;
|
||||
if (decIdx = -1) then
|
||||
decIdx := GetDecompositionIndex(cu);
|
||||
if decIdx = 0 then
|
||||
AddResult(cu)
|
||||
else
|
||||
AddCompositionToStack(decIdx);
|
||||
@ -1970,8 +1918,8 @@ var
|
||||
end;
|
||||
|
||||
var
|
||||
pu : PUC_Prop;
|
||||
cccp, cccq : Byte;
|
||||
cp : uint32;
|
||||
cpccc, cccp, cccq : Byte;
|
||||
begin
|
||||
c := ALength;
|
||||
if (c < 2) then
|
||||
@ -1979,44 +1927,45 @@ begin
|
||||
p := AStr;
|
||||
i := 1;
|
||||
while (i < c) do begin
|
||||
pu := GetProps(Word(p^));
|
||||
locIsSurrogateP := (pu^.Category = UGC_Surrogate);
|
||||
cp := Word(p^);
|
||||
locIsSurrogateP := (cp >= SURROGATES_BEGIN) and (cp <= SURROGATES_END);
|
||||
if locIsSurrogateP then begin
|
||||
if (i = (c - 1)) then
|
||||
Break;
|
||||
if not UnicodeIsSurrogatePair(p[0],p[1]) then begin
|
||||
if not UnicodeIsSurrogatePair(UnicodeChar(cp),p[1]) then begin
|
||||
Inc(p);
|
||||
Inc(i);
|
||||
Continue;
|
||||
end;
|
||||
pu := GetProps(p[0],p[1]);
|
||||
cp := ToUCS4(UnicodeChar(cp),p[1]);
|
||||
end;
|
||||
if (pu^.C3 > 0) then begin
|
||||
cccp := pu^.C3;
|
||||
cpccc := GetCCC(cp);
|
||||
if (cpccc > 0) then begin
|
||||
cccp := cpccc;
|
||||
if locIsSurrogateP then
|
||||
q := p + 2
|
||||
else
|
||||
q := p + 1;
|
||||
pu := GetProps(Word(q^));
|
||||
locIsSurrogateQ := (pu^.Category = UGC_Surrogate);
|
||||
cp := Word(q^);
|
||||
locIsSurrogateQ := (cp >= SURROGATES_BEGIN) and (cp <= SURROGATES_END);
|
||||
if locIsSurrogateQ then begin
|
||||
if (i = c) then
|
||||
Break;
|
||||
if not UnicodeIsSurrogatePair(q[0],q[1]) then begin
|
||||
if not UnicodeIsSurrogatePair(UnicodeChar(cp),q[1]) then begin
|
||||
Inc(p);
|
||||
Inc(i);
|
||||
Continue;
|
||||
end;
|
||||
pu := GetProps(q[0],q[1]);
|
||||
cp := ToUCS4(UnicodeChar(cp),q[1]);
|
||||
end;
|
||||
cccq := pu^.C3;
|
||||
cccq := GetCCC(cp);
|
||||
if (cccq > 0) and (cccp > cccq) then begin
|
||||
Swap();
|
||||
if (i > 1) then begin
|
||||
Dec(p);
|
||||
Dec(i);
|
||||
pu := GetProps(Word(p^));
|
||||
if (pu^.Category = UGC_Surrogate) then begin
|
||||
cp := Word(p^);
|
||||
if (cp >= SURROGATES_BEGIN) and (cp <= SURROGATES_END) then begin
|
||||
if (i > 1) then begin
|
||||
Dec(p);
|
||||
Dec(i);
|
||||
@ -2044,9 +1993,8 @@ end;
|
||||
function NormalizeNFD(const AStr : PUnicodeChar; ALength : SizeInt) : UnicodeString;
|
||||
const MAX_EXPAND = 3;
|
||||
var
|
||||
i, c, kc, k : SizeInt;
|
||||
i, c, kc, k, decIdx : SizeInt;
|
||||
pp, pr : PUnicodeChar;
|
||||
pu : PUC_Prop;
|
||||
locIsSurrogate : Boolean;
|
||||
cpArray : array[0..7] of Cardinal;
|
||||
cp : Cardinal;
|
||||
@ -2058,27 +2006,33 @@ begin
|
||||
pr := @Result[1];
|
||||
i := 1;
|
||||
while (i <= c) do begin
|
||||
pu := GetProps(Word(pp^));
|
||||
locIsSurrogate := (pu^.Category = UGC_Surrogate);
|
||||
cp := Word(pp^);
|
||||
locIsSurrogate := (cp >= SURROGATES_BEGIN) and (cp <= SURROGATES_END);
|
||||
if locIsSurrogate then begin
|
||||
if (i = c) then
|
||||
Break;
|
||||
if not UnicodeIsSurrogatePair(pp[0],pp[1]) then begin
|
||||
if not UnicodeIsSurrogatePair(UnicodeChar(cp),pp[1]) then begin
|
||||
pr^ := pp^;
|
||||
Inc(pp);
|
||||
Inc(pr);
|
||||
Inc(i);
|
||||
Continue;
|
||||
end;
|
||||
pu := GetProps(pp[0],pp[1]);
|
||||
cp := ToUCS4(UnicodeChar(cp), pp[1]);
|
||||
end;
|
||||
if pu^.HangulSyllable then begin
|
||||
decIdx := GetDecompositionIndex(cp);
|
||||
if decIdx = 0 then begin
|
||||
pr^ := pp^;
|
||||
if locIsSurrogate then begin
|
||||
Inc(pp);
|
||||
Inc(pr);
|
||||
Inc(i);
|
||||
pr^ := pp^;
|
||||
end;
|
||||
end else if decIdx = HangulSyllable then begin
|
||||
if locIsSurrogate then begin
|
||||
cp := ToUCS4(pp[0],pp[1]);
|
||||
Inc(pp);
|
||||
Inc(i);
|
||||
end else begin
|
||||
cp := Word(pp^);
|
||||
end;
|
||||
kc := DecomposeHangul(cp,@cpArray[0]);
|
||||
for k := 0 to kc - 1 do begin
|
||||
@ -2093,21 +2047,11 @@ begin
|
||||
if (kc > 0) then
|
||||
Dec(pr);
|
||||
end else begin
|
||||
if (pu^.DecompositionID = -1) then begin
|
||||
pr^ := pp^;
|
||||
if locIsSurrogate then begin
|
||||
Inc(pp);
|
||||
Inc(pr);
|
||||
Inc(i);
|
||||
pr^ := pp^;
|
||||
end;
|
||||
end else begin
|
||||
k := Decompose(pu^.DecompositionID,pr);
|
||||
pr := pr + (k - 1);
|
||||
if locIsSurrogate then begin
|
||||
Inc(pp);
|
||||
Inc(i);
|
||||
end;
|
||||
k := Decompose(decIdx,pr);
|
||||
pr := pr + (k - 1);
|
||||
if locIsSurrogate then begin
|
||||
Inc(pp);
|
||||
Inc(i);
|
||||
end;
|
||||
end;
|
||||
Inc(pp);
|
||||
@ -2115,7 +2059,7 @@ begin
|
||||
Inc(i);
|
||||
end;
|
||||
Dec(pp);
|
||||
i := ((PtrUInt(pr) - PtrUInt(@Result[1])) div SizeOf(UnicodeChar));
|
||||
i := (SizeUint(PtrUInt(pr) - PtrUInt(@Result[1])) div SizeOf(UnicodeChar));
|
||||
SetLength(Result,i);
|
||||
CanonicalOrder(@Result[1],Length(Result));
|
||||
end;
|
||||
@ -2218,46 +2162,49 @@ begin
|
||||
ANode := t;
|
||||
end;
|
||||
|
||||
{ TUC_Prop }
|
||||
|
||||
function TUC_Prop.GetCategory: Byte;
|
||||
function TUC_Prop.GetCategory : Byte;
|
||||
begin
|
||||
Result := Byte((C and Byte($F8)) shr 3);
|
||||
result := UnicodeData.GetCategory(PtrUint(@self));
|
||||
end;
|
||||
|
||||
function TUC_Prop.GetNumericValue: Double;
|
||||
function TUC_Prop.GetCCC : Byte;
|
||||
begin
|
||||
Result := UC_NUMERIC_ARRAY[NumericIndex];
|
||||
result := UnicodeData.GetCCC(PtrUint(@self));
|
||||
end;
|
||||
|
||||
function TUC_Prop.GetWhiteSpace : Boolean;
|
||||
begin
|
||||
result := IsWhiteSpace(PtrUint(@self));
|
||||
end;
|
||||
|
||||
function TUC_Prop.GetHangulSyllable : Boolean;
|
||||
begin
|
||||
result := IsHangulSyllable(PtrUint(@self));
|
||||
end;
|
||||
|
||||
function TUC_Prop.GetNumericValue : Double;
|
||||
begin
|
||||
result := UnicodeData.GetNumericValue(PtrUint(@self));
|
||||
end;
|
||||
|
||||
function TUC_Prop.GetUnifiedIdeograph : Boolean;
|
||||
begin
|
||||
Result := IsBitON(C,2);
|
||||
result := IsUnifiedIdeograph(PtrUint(@self));
|
||||
end;
|
||||
|
||||
procedure TUC_Prop.SetCategory(AValue: Byte);
|
||||
function TUC_Prop.GetSimpleUpperCase : uint32;
|
||||
begin
|
||||
C := Byte(C or Byte(AValue shl 3));
|
||||
result := UnicodeData.GetSimpleUpperCase(PtrUint(@self));
|
||||
end;
|
||||
|
||||
function TUC_Prop.GetWhiteSpace: Boolean;
|
||||
function TUC_Prop.GetSimpleLowerCase : uint32;
|
||||
begin
|
||||
Result := IsBitON(C,0);
|
||||
result := UnicodeData.GetSimpleLowerCase(PtrUint(@self));
|
||||
end;
|
||||
|
||||
procedure TUC_Prop.SetWhiteSpace(AValue: Boolean);
|
||||
function TUC_Prop.GetSimpleTitleCase : uint32;
|
||||
begin
|
||||
SetBit(C,0,AValue);
|
||||
end;
|
||||
|
||||
function TUC_Prop.GetHangulSyllable: Boolean;
|
||||
begin
|
||||
Result := IsBitON(C,1);
|
||||
end;
|
||||
|
||||
procedure TUC_Prop.SetHangulSyllable(AValue: Boolean);
|
||||
begin
|
||||
SetBit(C,1,AValue);
|
||||
result := UnicodeData.GetSimpleTitleCase(PtrUint(@self));
|
||||
end;
|
||||
|
||||
{ TUCA_DataBook }
|
||||
@ -2818,7 +2765,8 @@ var
|
||||
var
|
||||
k : DWord;
|
||||
pk : PUnicodeChar;
|
||||
puk : PUC_Prop;
|
||||
cp : uint32;
|
||||
cpccc : byte;
|
||||
begin
|
||||
k := AStartFrom;
|
||||
if (k > c) then
|
||||
@ -2837,15 +2785,16 @@ var
|
||||
if (k = c) then
|
||||
exit(False);
|
||||
if UnicodeIsLowSurrogate(pk[1]) then
|
||||
puk := GetProps(pk[0],pk[1])
|
||||
cp := ToUCS4(pk[0],pk[1])
|
||||
else
|
||||
puk := GetProps(Word(pk^));
|
||||
cp := Word(pk^);
|
||||
end else begin
|
||||
puk := GetProps(Word(pk^));
|
||||
cp := Word(pk^);
|
||||
end;
|
||||
if (puk^.C3 = 0) or (lastUnblockedNonstarterCCC >= puk^.C3) then
|
||||
cpccc := GetCCC(cp);
|
||||
if (cpccc = 0) or (lastUnblockedNonstarterCCC >= cpccc) then
|
||||
exit(False);
|
||||
lastUnblockedNonstarterCCC := puk^.C3;
|
||||
lastUnblockedNonstarterCCC := cpccc;
|
||||
Result := True;
|
||||
end;
|
||||
|
||||
@ -3034,14 +2983,14 @@ var
|
||||
var
|
||||
kk, kkidx : Integer;
|
||||
b : Boolean;
|
||||
puk : PUC_Prop;
|
||||
cpccc : byte;
|
||||
ppk : PUCA_PropItemRec;
|
||||
begin
|
||||
Result := False;
|
||||
puk := GetProps(cp);
|
||||
if (puk^.C3 = 0) then
|
||||
cpccc := GetCCC(cp);
|
||||
if (cpccc = 0) then
|
||||
exit;
|
||||
lastUnblockedNonstarterCCC := puk^.C3;
|
||||
lastUnblockedNonstarterCCC := cpccc;
|
||||
if surrogateState then
|
||||
kk := i + 2
|
||||
else
|
||||
@ -3599,7 +3548,7 @@ function FilterString(
|
||||
var
|
||||
i, c : SizeInt;
|
||||
pp, pr : PUnicodeChar;
|
||||
pu : PUC_Prop;
|
||||
cp : uint32;
|
||||
locIsSurrogate : Boolean;
|
||||
begin
|
||||
c := ALength;
|
||||
@ -3609,19 +3558,19 @@ begin
|
||||
pr := @Result[1];
|
||||
i := 1;
|
||||
while (i <= c) do begin
|
||||
pu := GetProps(Word(pp^));
|
||||
locIsSurrogate := (pu^.Category = UGC_Surrogate);
|
||||
cp := Word(pp^);
|
||||
locIsSurrogate := (cp >= SURROGATES_BEGIN) and (cp <= SURROGATES_END);
|
||||
if locIsSurrogate then begin
|
||||
if (i = c) then
|
||||
Break;
|
||||
if not UnicodeIsSurrogatePair(pp[0],pp[1]) then begin
|
||||
if not UnicodeIsSurrogatePair(UnicodeChar(cp),pp[1]) then begin
|
||||
Inc(pp);
|
||||
Inc(i);
|
||||
Continue;
|
||||
end;
|
||||
pu := GetProps(pp[0],pp[1]);
|
||||
cp := ToUCS4(UnicodeChar(cp),pp[1]);
|
||||
end;
|
||||
if not(pu^.Category in AExcludedMask) then begin
|
||||
if not(GetCategory(cp) in AExcludedMask) then begin
|
||||
pr^ := pp^;
|
||||
Inc(pr);
|
||||
if locIsSurrogate then begin
|
||||
@ -3768,7 +3717,8 @@ var
|
||||
var
|
||||
k : DWord;
|
||||
pk : PUnicodeChar;
|
||||
puk : PUC_Prop;
|
||||
cp : uint32;
|
||||
cpccc : byte;
|
||||
begin
|
||||
k := AStartFrom;
|
||||
if (k > ctx^.c) then
|
||||
@ -3787,15 +3737,16 @@ var
|
||||
if (k = ctx^.c) then
|
||||
exit(False);
|
||||
if UnicodeIsLowSurrogate(pk[1]) then
|
||||
puk := GetProps(pk[0],pk[1])
|
||||
cp := ToUCS4(pk[0],pk[1])
|
||||
else
|
||||
puk := GetProps(Word(pk^));
|
||||
cp := Word(pk^);
|
||||
end else begin
|
||||
puk := GetProps(Word(pk^));
|
||||
cp := Word(pk^);
|
||||
end;
|
||||
if (puk^.C3 = 0) or (ctx^.lastUnblockedNonstarterCCC >= puk^.C3) then
|
||||
cpccc := GetCCC(cp);
|
||||
if (cpccc = 0) or (ctx^.lastUnblockedNonstarterCCC >= cpccc) then
|
||||
exit(False);
|
||||
ctx^.lastUnblockedNonstarterCCC := puk^.C3;
|
||||
ctx^.lastUnblockedNonstarterCCC := cpccc;
|
||||
Result := True;
|
||||
end;
|
||||
|
||||
@ -3975,14 +3926,14 @@ var
|
||||
var
|
||||
kk : Integer;
|
||||
b : Boolean;
|
||||
puk : PUC_Prop;
|
||||
cpccc : Byte;
|
||||
ppk : PUCA_PropItemRec;
|
||||
begin
|
||||
Result := False;
|
||||
puk := GetProps(ctx^.cp);
|
||||
if (puk^.C3 = 0) then
|
||||
cpccc := GetCCC(ctx^.cp);
|
||||
if (cpccc = 0) then
|
||||
exit;
|
||||
ctx^.lastUnblockedNonstarterCCC := puk^.C3;
|
||||
ctx^.lastUnblockedNonstarterCCC := cpccc;
|
||||
if ctx^.surrogateState then
|
||||
kk := ctx^.i + 2
|
||||
else
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
1326
rtl/objpas/unicodedata_props.inc
Normal file
1326
rtl/objpas/unicodedata_props.inc
Normal file
File diff suppressed because it is too large
Load Diff
@ -76,11 +76,13 @@ end;
|
||||
|
||||
{$IFNDEF UNI_BUILD_TIME}
|
||||
function isUnifiedIdeograph(const ACodePoint : Cardinal) : boolean;
|
||||
var
|
||||
p : PUC_Prop;
|
||||
begin
|
||||
p := GetProps(ACodePoint);
|
||||
Result := (p <> nil) and p^.UnifiedIdeograph;
|
||||
result := false;
|
||||
case ACodePoint of
|
||||
$3400..$4DBF, $4E00..$9FFF, $FA0E..$FA0F, $FA11, $FA13..$FA14, $FA1F, $FA21, $FA23..$FA24, $FA27..$FA29,
|
||||
$20000..$2A6DF, $2A700..$2B738, $2B740..$2B81D, $2B820..$2CEA1, $2CEB0..$2EBE0, $30000..$3134A:
|
||||
result := true;
|
||||
end;
|
||||
end;
|
||||
{$ENDIF UNI_BUILD_TIME}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user