mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-21 15:49:26 +02:00
rtl: update character.pas by Inoussa OUEDRAOGO (issue #0020302)
* This new version contains all the unicode code points (BMP and other planes) character properties. git-svn-id: trunk@19207 -
This commit is contained in:
parent
64f8ec7a9a
commit
3b9fd15af7
4
.gitattributes
vendored
4
.gitattributes
vendored
@ -7820,6 +7820,7 @@ rtl/objpas/sysutils/syswideh.inc svneol=native#text/plain
|
||||
rtl/objpas/types.pp svneol=native#text/plain
|
||||
rtl/objpas/typinfo.pp svneol=native#text/plain
|
||||
rtl/objpas/unicodedata.inc svneol=native#text/pascal
|
||||
rtl/objpas/unicodedata2.inc svneol=native#text/pascal
|
||||
rtl/objpas/utf8bidi.pp svneol=native#text/plain
|
||||
rtl/objpas/varutilh.inc svneol=native#text/plain
|
||||
rtl/objpas/varutils.inc svneol=native#text/plain
|
||||
@ -10621,7 +10622,10 @@ tests/test/units/character/tisdigit.pp svneol=native#text/pascal
|
||||
tests/test/units/character/tisdigit2.pp svneol=native#text/pascal
|
||||
tests/test/units/character/tishighsurrogate.pp svneol=native#text/pascal
|
||||
tests/test/units/character/tisletter.pp svneol=native#text/pascal
|
||||
tests/test/units/character/tisletter2.pp svneol=native#text/pascal
|
||||
tests/test/units/character/tisletterordigit.pp svneol=native#text/pascal
|
||||
tests/test/units/character/tisletterordigit2.pp svneol=native#text/pascal
|
||||
tests/test/units/character/tislower2.pp svneol=native#text/pascal
|
||||
tests/test/units/character/tislowsurrogate.pp svneol=native#text/pascal
|
||||
tests/test/units/character/tisnumber.pp svneol=native#text/pascal
|
||||
tests/test/units/character/tisnumber2.pp svneol=native#text/pascal
|
||||
|
@ -175,7 +175,8 @@ type
|
||||
WhiteSpace : Boolean;
|
||||
end;
|
||||
|
||||
{$INCLUDE unicodedata.inc}
|
||||
{$INCLUDE unicodedata.inc} // For BMP code points
|
||||
{$INCLUDE unicodedata2.inc} // For other planes
|
||||
|
||||
const
|
||||
LOW_SURROGATE_BEGIN = Word($DC00);
|
||||
@ -183,7 +184,7 @@ const
|
||||
|
||||
HIGH_SURROGATE_BEGIN = Word($D800);
|
||||
HIGH_SURROGATE_END = Word($DBFF);
|
||||
|
||||
HIGH_SURROGATE_COUNT = HIGH_SURROGATE_END - HIGH_SURROGATE_BEGIN + 1;
|
||||
UCS4_HALF_BASE = LongWord($10000);
|
||||
UCS4_HALF_MASK = Word($3FF);
|
||||
MAX_LEGAL_UTF32 = $10FFFF;
|
||||
@ -216,7 +217,7 @@ const
|
||||
TUnicodeCategory.ucModifierSymbol, TUnicodeCategory.ucOtherSymbol
|
||||
];
|
||||
|
||||
class function GetProps(const ACodePoint : Word) : PUC_Prop; inline;
|
||||
function GetProps(const ACodePoint : Word) : PUC_Prop; inline;
|
||||
begin
|
||||
Result:=
|
||||
@UC_PROP_ARRAY[
|
||||
@ -227,6 +228,23 @@ begin
|
||||
];
|
||||
end;
|
||||
|
||||
function GetProps(const AHighS, ALowS : UnicodeChar): PUC_Prop; inline;
|
||||
begin
|
||||
Result:=
|
||||
@UC_PROP_ARRAY[
|
||||
UCO_TABLE_2[
|
||||
(UCO_TABLE_1[Word(AHighS)-HIGH_SURROGATE_BEGIN] * HIGH_SURROGATE_COUNT) +
|
||||
Word(ALowS) - LOW_SURROGATE_BEGIN
|
||||
]
|
||||
];
|
||||
end;
|
||||
|
||||
procedure FromUCS4(const AValue : UCS4Char; var AHighS, ALowS : UnicodeChar);
|
||||
begin
|
||||
AHighS := UnicodeChar((AValue - $10000) shr 10 + $d800);
|
||||
ALowS := UnicodeChar((AValue - $10000) and $3ff + $dc00);
|
||||
end;
|
||||
|
||||
function ConvertFromUtf32(AChar: UCS4Char): UnicodeString;
|
||||
begin
|
||||
Result := TCharacter.ConvertFromUtf32(AChar);
|
||||
@ -444,7 +462,7 @@ begin
|
||||
raise ENoConstructException.CreateFmt(SClassCantBeConstructed, [ClassName]);
|
||||
end;
|
||||
|
||||
class function TCharacter.ConvertFromUtf32(AChar : UCS4Char) : UnicodeString; static;
|
||||
class function TCharacter.ConvertFromUtf32(AChar : UCS4Char) : UnicodeString;
|
||||
begin
|
||||
if AChar < UCS4_HALF_BASE then
|
||||
begin
|
||||
@ -463,7 +481,7 @@ begin
|
||||
end;
|
||||
end;
|
||||
|
||||
class function TCharacter.ConvertToUtf32(const AString : UnicodeString; AIndex : Integer) : UCS4Char; overload; static;
|
||||
class function TCharacter.ConvertToUtf32(const AString : UnicodeString; AIndex : Integer) : UCS4Char; overload;
|
||||
begin
|
||||
if (AIndex < 1) or (AIndex > Length(AString)) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
||||
@ -476,7 +494,7 @@ begin
|
||||
end;
|
||||
end;
|
||||
|
||||
class function TCharacter.ConvertToUtf32(const AString : UnicodeString; AIndex : Integer; out ACharLength : Integer) : UCS4Char; overload; static;
|
||||
class function TCharacter.ConvertToUtf32(const AString : UnicodeString; AIndex : Integer; out ACharLength : Integer) : UCS4Char; overload;
|
||||
begin
|
||||
if (AIndex < 1) or (AIndex > Length(AString)) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
||||
@ -492,7 +510,7 @@ begin
|
||||
ACharLength := 1;
|
||||
end;
|
||||
|
||||
class function TCharacter.ConvertToUtf32(const AHighSurrogate, ALowSurrogate : UnicodeChar) : UCS4Char; overload; static;
|
||||
class function TCharacter.ConvertToUtf32(const AHighSurrogate, ALowSurrogate : UnicodeChar) : UCS4Char; overload;
|
||||
begin
|
||||
if not IsHighSurrogate(AHighSurrogate) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SHighSurrogateOutOfRange, [Word(AHighSurrogate)]);
|
||||
@ -501,7 +519,7 @@ begin
|
||||
Result := (UCS4Char(AHighSurrogate) - HIGH_SURROGATE_BEGIN) shl 10 + (UCS4Char(ALowSurrogate) - LOW_SURROGATE_BEGIN) + UCS4_HALF_BASE;
|
||||
end;
|
||||
|
||||
class function TCharacter.GetNumericValue(AChar : UnicodeChar) : Double; static;
|
||||
class function TCharacter.GetNumericValue(AChar : UnicodeChar) : Double;
|
||||
begin
|
||||
Result := GetProps(Word(AChar))^.NumericValue;
|
||||
end;
|
||||
@ -509,14 +527,14 @@ end;
|
||||
class function TCharacter.GetNumericValue(
|
||||
const AString : UnicodeString;
|
||||
AIndex : Integer
|
||||
) : Double; static;
|
||||
) : Double;
|
||||
begin
|
||||
if (AIndex < 1) or (AIndex > Length(AString)) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
||||
Result := GetNumericValue(AString[AIndex]);
|
||||
end;
|
||||
|
||||
class function TCharacter.GetUnicodeCategory(AChar : UnicodeChar) : TUnicodeCategory; static;
|
||||
class function TCharacter.GetUnicodeCategory(AChar : UnicodeChar) : TUnicodeCategory;
|
||||
begin
|
||||
Result := GetProps(Word(AChar))^.Category;
|
||||
end;
|
||||
@ -524,14 +542,14 @@ end;
|
||||
class function TCharacter.GetUnicodeCategory(
|
||||
const AString : UnicodeString;
|
||||
AIndex : Integer
|
||||
) : TUnicodeCategory; static;
|
||||
) : TUnicodeCategory;
|
||||
begin
|
||||
if (AIndex < 1) or (AIndex > Length(AString)) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
||||
Result := GetUnicodeCategory(AString[AIndex]);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsControl(AChar : UnicodeChar) : Boolean; static;
|
||||
class function TCharacter.IsControl(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (GetProps(Word(AChar))^.Category = TUnicodeCategory.ucControl);
|
||||
end;
|
||||
@ -539,14 +557,14 @@ end;
|
||||
class function TCharacter.IsControl(
|
||||
const AString : UnicodeString;
|
||||
AIndex : Integer
|
||||
) : Boolean; static;
|
||||
) : Boolean;
|
||||
begin
|
||||
if (AIndex < 1) or (AIndex > Length(AString)) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
||||
Result := IsControl(AString[AIndex]);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsDigit(AChar : UnicodeChar) : Boolean; static;
|
||||
class function TCharacter.IsDigit(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (GetProps(Word(AChar))^.Category = TUnicodeCategory.ucDecimalNumber);
|
||||
end;
|
||||
@ -554,14 +572,14 @@ end;
|
||||
class function TCharacter.IsDigit(
|
||||
const AString : UnicodeString;
|
||||
AIndex : Integer
|
||||
) : Boolean; static;
|
||||
) : Boolean;
|
||||
begin
|
||||
if (AIndex < 1) or (AIndex > Length(AString)) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
||||
Result := IsDigit(AString[AIndex]);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsSurrogate(AChar : UnicodeChar) : Boolean; static;
|
||||
class function TCharacter.IsSurrogate(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (GetProps(Word(AChar))^.Category = TUnicodeCategory.ucSurrogate);
|
||||
end;
|
||||
@ -569,14 +587,14 @@ end;
|
||||
class function TCharacter.IsSurrogate(
|
||||
const AString : UnicodeString;
|
||||
AIndex : Integer
|
||||
) : Boolean; static;
|
||||
) : Boolean;
|
||||
begin
|
||||
if (AIndex < 1) or (AIndex > Length(AString)) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
||||
Result := IsSurrogate(AString[AIndex]);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsHighSurrogate(AChar : UnicodeChar) : Boolean; static;
|
||||
class function TCharacter.IsHighSurrogate(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (GetProps(Word(AChar))^.Category = TUnicodeCategory.ucSurrogate) and
|
||||
(Word(AChar) >= HIGH_SURROGATE_BEGIN) and
|
||||
@ -586,14 +604,14 @@ end;
|
||||
class function TCharacter.IsHighSurrogate(
|
||||
const AString : UnicodeString;
|
||||
AIndex : Integer
|
||||
) : Boolean; static;
|
||||
) : Boolean;
|
||||
begin
|
||||
if (AIndex < 1) or (AIndex > Length(AString)) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
||||
Result := IsHighSurrogate(AString[AIndex]);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsLowSurrogate(AChar : UnicodeChar) : Boolean; static;
|
||||
class function TCharacter.IsLowSurrogate(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (GetProps(Word(AChar))^.Category = TUnicodeCategory.ucSurrogate) and
|
||||
(Word(AChar) >= LOW_SURROGATE_BEGIN) and
|
||||
@ -603,7 +621,7 @@ end;
|
||||
class function TCharacter.IsLowSurrogate(
|
||||
const AString : UnicodeString;
|
||||
AIndex : Integer
|
||||
) : Boolean; static;
|
||||
) : Boolean;
|
||||
begin
|
||||
if (AIndex < 1) or (AIndex > Length(AString)) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
||||
@ -613,7 +631,7 @@ end;
|
||||
class function TCharacter.IsSurrogatePair(
|
||||
const AHighSurrogate,
|
||||
ALowSurrogate : UnicodeChar
|
||||
) : Boolean;static;
|
||||
) : Boolean;
|
||||
begin
|
||||
Result :=
|
||||
( (Word(AHighSurrogate) >= HIGH_SURROGATE_BEGIN) and
|
||||
@ -627,14 +645,14 @@ end;
|
||||
class function TCharacter.IsSurrogatePair(
|
||||
const AString : UnicodeString;
|
||||
AIndex : Integer
|
||||
) : Boolean;static;
|
||||
) : Boolean;
|
||||
begin
|
||||
if (AIndex < 1) or (AIndex > Length(AString)) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
||||
Result := IsSurrogatePair(AString[AIndex],AString[AIndex+1]);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsLetter(AChar : UnicodeChar) : Boolean; static;
|
||||
class function TCharacter.IsLetter(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (GetProps(Word(AChar))^.Category in LETTER_CATEGORIES);
|
||||
end;
|
||||
@ -642,14 +660,27 @@ end;
|
||||
class function TCharacter.IsLetter(
|
||||
const AString : UnicodeString;
|
||||
AIndex : Integer
|
||||
) : Boolean; static;
|
||||
begin
|
||||
) : Boolean;
|
||||
var
|
||||
c : UnicodeChar;
|
||||
begin
|
||||
if (AIndex < 1) or (AIndex > Length(AString)) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
||||
Result := IsLetter(AString[AIndex]);
|
||||
c := AString[AIndex];
|
||||
if IsHighSurrogate(c) then
|
||||
begin
|
||||
if Length(AString) < Succ(AIndex) then
|
||||
raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
|
||||
if IsLowSurrogate(AString,Succ(AIndex)) then
|
||||
Result := (GetProps(c, AString[Succ(AIndex)])^.Category in LETTER_CATEGORIES)
|
||||
else
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SLowSurrogateOutOfRange, [Word(AString[Succ(AIndex)])]);
|
||||
end
|
||||
else
|
||||
Result := IsLetter(c);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsLetterOrDigit(AChar : UnicodeChar) : Boolean; static;
|
||||
class function TCharacter.IsLetterOrDigit(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (GetProps(Word(AChar))^.Category in LETTER_OR_DIGIT_CATEGORIES);
|
||||
end;
|
||||
@ -657,14 +688,27 @@ end;
|
||||
class function TCharacter.IsLetterOrDigit(
|
||||
const AString : UnicodeString;
|
||||
AIndex : Integer
|
||||
) : Boolean; static;
|
||||
begin
|
||||
) : Boolean;
|
||||
var
|
||||
c : UnicodeChar;
|
||||
begin
|
||||
if (AIndex < 1) or (AIndex > Length(AString)) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
||||
Result := IsLetterOrDigit(AString[AIndex]);
|
||||
c := AString[AIndex];
|
||||
if IsHighSurrogate(c) then
|
||||
begin
|
||||
if Length(AString) < Succ(AIndex) then
|
||||
raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
|
||||
if IsLowSurrogate(AString, Succ(AIndex)) then
|
||||
Result := (GetProps(c, AString[Succ(AIndex)])^.Category in LETTER_OR_DIGIT_CATEGORIES)
|
||||
else
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SLowSurrogateOutOfRange, [Word(AString[Succ(AIndex)])]);
|
||||
end
|
||||
else
|
||||
Result := IsLetterOrDigit(c);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsLower(AChar : UnicodeChar) : Boolean; static;
|
||||
class function TCharacter.IsLower(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (GetProps(Word(AChar))^.Category = TUnicodeCategory.ucLowercaseLetter);
|
||||
end;
|
||||
@ -672,14 +716,27 @@ end;
|
||||
class function TCharacter.IsLower(
|
||||
const AString : UnicodeString;
|
||||
AIndex : Integer
|
||||
) : Boolean; static;
|
||||
begin
|
||||
) : Boolean;
|
||||
var
|
||||
c : UnicodeChar;
|
||||
begin
|
||||
if (AIndex < 1) or (AIndex > Length(AString)) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
||||
Result := IsLower(AString[AIndex]);
|
||||
c := AString[AIndex];
|
||||
if IsHighSurrogate(c) then
|
||||
begin
|
||||
if Length(AString) < Succ(AIndex) then
|
||||
raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
|
||||
if IsLowSurrogate(AString, Succ(AIndex)) then
|
||||
Result := (GetProps(c, AString[Succ(AIndex)])^.Category = TUnicodeCategory.ucLowercaseLetter)
|
||||
else
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SLowSurrogateOutOfRange, [Word(AString[Succ(AIndex)])]);
|
||||
end
|
||||
else
|
||||
Result := IsLower(c);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsNumber(AChar : UnicodeChar) : Boolean; static;
|
||||
class function TCharacter.IsNumber(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (GetProps(Word(AChar))^.Category in NUMBER_CATEGORIES);
|
||||
end;
|
||||
@ -687,14 +744,27 @@ end;
|
||||
class function TCharacter.IsNumber(
|
||||
const AString : UnicodeString;
|
||||
AIndex : Integer
|
||||
) : Boolean;static;
|
||||
) : Boolean;
|
||||
var
|
||||
c : UnicodeChar;
|
||||
begin
|
||||
if (AIndex < 1) or (AIndex > Length(AString)) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
||||
Result := IsNumber(AString[AIndex]);
|
||||
c := AString[AIndex];
|
||||
if IsHighSurrogate(c) then
|
||||
begin
|
||||
if Length(AString) < Succ(AIndex) then
|
||||
raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
|
||||
if IsLowSurrogate(AString, Succ(AIndex)) then
|
||||
Result := (GetProps(c, AString[Succ(AIndex)])^.Category in NUMBER_CATEGORIES)
|
||||
else
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SLowSurrogateOutOfRange, [Word(AString[Succ(AIndex)])]);
|
||||
end
|
||||
else
|
||||
Result := IsNumber(c);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsPunctuation(AChar : UnicodeChar) : Boolean;static;
|
||||
class function TCharacter.IsPunctuation(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (GetProps(Word(AChar))^.Category in PUNCTUATION_CATEGORIES);
|
||||
end;
|
||||
@ -702,14 +772,27 @@ end;
|
||||
class function TCharacter.IsPunctuation(
|
||||
const AString : UnicodeString;
|
||||
AIndex : Integer
|
||||
) : Boolean;static;
|
||||
) : Boolean;
|
||||
var
|
||||
c : UnicodeChar;
|
||||
begin
|
||||
if (AIndex < 1) or (AIndex > Length(AString)) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
||||
Result := IsPunctuation(AString[AIndex]);
|
||||
c := AString[AIndex];
|
||||
if IsHighSurrogate(c) then
|
||||
begin
|
||||
if Length(AString) < Succ(AIndex) then
|
||||
raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
|
||||
if IsLowSurrogate(AString, Succ(AIndex)) then
|
||||
Result := (GetProps(c, AString[Succ(AIndex)])^.Category in PUNCTUATION_CATEGORIES)
|
||||
else
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SLowSurrogateOutOfRange, [Word(AString[Succ(AIndex)])]);
|
||||
end
|
||||
else
|
||||
Result := IsPunctuation(c);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsSeparator(AChar: UnicodeChar): Boolean;static;
|
||||
class function TCharacter.IsSeparator(AChar: UnicodeChar): Boolean;
|
||||
begin
|
||||
Result := (GetProps(Word(AChar))^.Category in SEPARATOR_CATEGORIES);
|
||||
end;
|
||||
@ -717,14 +800,27 @@ end;
|
||||
class function TCharacter.IsSeparator(
|
||||
const AString : UnicodeString;
|
||||
AIndex : Integer
|
||||
) : Boolean;static;
|
||||
) : Boolean;
|
||||
var
|
||||
c : UnicodeChar;
|
||||
begin
|
||||
if (AIndex < 1) or (AIndex > Length(AString)) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
||||
Result := IsSeparator(AString[AIndex]);
|
||||
c := AString[AIndex];
|
||||
if IsHighSurrogate(c) then
|
||||
begin
|
||||
if Length(AString) < Succ(AIndex) then
|
||||
raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
|
||||
if IsLowSurrogate(AString, Succ(AIndex)) then
|
||||
Result := (GetProps(c, AString[Succ(AIndex)])^.Category in SEPARATOR_CATEGORIES)
|
||||
else
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SLowSurrogateOutOfRange, [Word(AString[Succ(AIndex)])]);
|
||||
end
|
||||
else
|
||||
Result := IsSeparator(c);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsSymbol(AChar: UnicodeChar): Boolean;static;
|
||||
class function TCharacter.IsSymbol(AChar: UnicodeChar): Boolean;
|
||||
begin
|
||||
Result := (GetProps(Word(AChar))^.Category in SYMBOL_CATEGORIES);
|
||||
end;
|
||||
@ -732,14 +828,27 @@ end;
|
||||
class function TCharacter.IsSymbol(
|
||||
const AString : UnicodeString;
|
||||
AIndex : Integer
|
||||
) : Boolean;static;
|
||||
) : Boolean;
|
||||
var
|
||||
c : UnicodeChar;
|
||||
begin
|
||||
if (AIndex < 1) or (AIndex > Length(AString)) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
||||
Result := IsSymbol(AString[AIndex]);
|
||||
c := AString[AIndex];
|
||||
if IsHighSurrogate(c) then
|
||||
begin
|
||||
if Length(AString) < Succ(AIndex) then
|
||||
raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
|
||||
if IsLowSurrogate(AString, Succ(AIndex)) then
|
||||
Result := (GetProps(c, AString[Succ(AIndex)])^.Category in SYMBOL_CATEGORIES)
|
||||
else
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SLowSurrogateOutOfRange, [Word(AString[Succ(AIndex)])]);
|
||||
end
|
||||
else
|
||||
Result := IsSymbol(c);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsUpper(AChar : UnicodeChar) : Boolean;static;
|
||||
class function TCharacter.IsUpper(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (GetProps(Word(AChar))^.Category = TUnicodeCategory.ucUppercaseLetter);
|
||||
end;
|
||||
@ -747,14 +856,27 @@ end;
|
||||
class function TCharacter.IsUpper(
|
||||
const AString : UnicodeString;
|
||||
AIndex : Integer
|
||||
) : Boolean;static;
|
||||
) : Boolean;
|
||||
var
|
||||
c : UnicodeChar;
|
||||
begin
|
||||
if (AIndex < 1) or (AIndex > Length(AString)) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
||||
Result := IsUpper(AString[AIndex]);
|
||||
c := AString[AIndex];
|
||||
if IsHighSurrogate(c) then
|
||||
begin
|
||||
if Length(AString) < Succ(AIndex) then
|
||||
raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
|
||||
if IsLowSurrogate(AString,Succ(AIndex)) then
|
||||
Result := (GetProps(c,AString[Succ(AIndex)])^.Category = TUnicodeCategory.ucUppercaseLetter)
|
||||
else
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SLowSurrogateOutOfRange, [Word(AString[Succ(AIndex)])]);
|
||||
end
|
||||
else
|
||||
Result := IsUpper(c);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsWhiteSpace(AChar : UnicodeChar) : Boolean;static;
|
||||
class function TCharacter.IsWhiteSpace(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := GetProps(Word(AChar))^.WhiteSpace;
|
||||
end;
|
||||
@ -762,46 +884,87 @@ end;
|
||||
class function TCharacter.IsWhiteSpace(
|
||||
const AString : UnicodeString;
|
||||
AIndex : Integer
|
||||
) : Boolean;static;
|
||||
) : Boolean;
|
||||
var
|
||||
c : UnicodeChar;
|
||||
begin
|
||||
if (AIndex < 1) or (AIndex > Length(AString)) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
||||
Result := IsWhiteSpace(AString[AIndex]);
|
||||
c := AString[AIndex];
|
||||
if IsHighSurrogate(c) then
|
||||
begin
|
||||
if Length(AString) < Succ(AIndex) then
|
||||
raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
|
||||
if IsLowSurrogate(AString, Succ(AIndex)) then
|
||||
Result := GetProps(c,AString[AIndex+1])^.WhiteSpace
|
||||
else
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SLowSurrogateOutOfRange, [Word(AString[Succ(AIndex)])]);
|
||||
end
|
||||
else
|
||||
Result := IsWhiteSpace(c);
|
||||
end;
|
||||
|
||||
class function TCharacter.ToLower(AChar : UnicodeChar) : UnicodeChar;static;
|
||||
class function TCharacter.ToLower(AChar : UnicodeChar) : UnicodeChar;
|
||||
begin
|
||||
Result := UnicodeChar(GetProps(Word(AChar))^.SimpleLowerCase);
|
||||
if (Result = UnicodeChar(0)) then
|
||||
Result := AChar;
|
||||
end;
|
||||
|
||||
class function TCharacter.ToLower(const AString : UnicodeString) : UnicodeString;static;
|
||||
class function TCharacter.ToLower(const AString : UnicodeString) : UnicodeString;
|
||||
var
|
||||
i, c : SizeInt;
|
||||
pp, pr : PUnicodeChar;
|
||||
pu : PUC_Prop;
|
||||
locIsSurrogate : Boolean;
|
||||
begin
|
||||
c := Length(AString);
|
||||
SetLength(Result,c);
|
||||
SetLength(Result,2*c);
|
||||
if (c > 0) then begin
|
||||
pp := @AString[1];
|
||||
pr := @Result[1];
|
||||
for i := 1 to c do begin
|
||||
pr^ := ToLower(pp^);
|
||||
i := 1;
|
||||
while (i <= c) do begin
|
||||
pu := GetProps(Word(pp^));
|
||||
locIsSurrogate := (pu^.Category = TUnicodeCategory.ucSurrogate);
|
||||
if locIsSurrogate then begin
|
||||
if not IsSurrogatePair(AString,i) then
|
||||
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
|
||||
pu := GetProps(pp^,AString[i+1]);
|
||||
end;
|
||||
if (pu^.SimpleLowerCase = 0) then begin
|
||||
pr^ := pp^;
|
||||
if locIsSurrogate then begin
|
||||
Inc(pp);
|
||||
Inc(pr);
|
||||
pr^ := pp^;
|
||||
end;
|
||||
end else begin
|
||||
if (pu^.SimpleLowerCase <= $FFFF) then begin
|
||||
pr^ := UnicodeChar(Word(pu^.SimpleLowerCase));
|
||||
end else begin
|
||||
FromUCS4(UCS4Char(pu^.SimpleLowerCase),pr^,(pr+1)^);
|
||||
Inc(pr);
|
||||
end;
|
||||
end;
|
||||
Inc(pp);
|
||||
Inc(pr);
|
||||
Inc(i);
|
||||
end;
|
||||
Dec(pp);
|
||||
i := ((pr - (@Result[1])) div SizeOf(UnicodeChar));
|
||||
SetLength(Result,i)
|
||||
end;
|
||||
end;
|
||||
|
||||
class function TCharacter.ToUpper(AChar : UnicodeChar) : UnicodeChar;static;
|
||||
class function TCharacter.ToUpper(AChar : UnicodeChar) : UnicodeChar;
|
||||
begin
|
||||
Result := UnicodeChar(GetProps(Word(AChar))^.SimpleUpperCase);
|
||||
if (Result = UnicodeChar(0)) then
|
||||
Result := AChar;
|
||||
end;
|
||||
|
||||
class function TCharacter.ToUpper(const AString : UnicodeString) : UnicodeString;static;
|
||||
class function TCharacter.ToUpper(const AString : UnicodeString) : UnicodeString;
|
||||
var
|
||||
i, c : SizeInt;
|
||||
pp, pr : PUnicodeChar;
|
||||
|
@ -294,6 +294,7 @@ ResourceString
|
||||
SLowSurrogateOutOfRange = 'Low surrogate $%x out of range [$DC00 - $DFFF]';
|
||||
SInvalidUTF32Char = 'Invalid UTF32 character $%x. Valid UTF32 character must be in range [$0 - $10FFFF] except surrogate range [$D800-$DFFF]';
|
||||
SInvalidHighSurrogate = 'Invalid high surrogate at index %d. High surrogate must be followed by a low surrogate pair';
|
||||
SInvalidUnicodeCodePointSequence = 'Invalid unicode code point sequence';
|
||||
SClassCantBeConstructed = 'Class %s can not be constructed';
|
||||
|
||||
{ ---------------------------------------------------------------------
|
||||
|
@ -2598,12 +2598,12 @@ const
|
||||
(Category : TUnicodeCategory.ucOtherNumber; NumericValue : 0.11111111111111; SimpleUpperCase : 0; SimpleLowerCase : 0; WhiteSpace : False),
|
||||
(Category : TUnicodeCategory.ucOtherNumber; NumericValue : 0.1; SimpleUpperCase : 0; SimpleLowerCase : 0; WhiteSpace : False),
|
||||
(Category : TUnicodeCategory.ucOtherNumber; NumericValue : 0.33333333333333; SimpleUpperCase : 0; SimpleLowerCase : 0; WhiteSpace : False),
|
||||
(Category : TUnicodeCategory.ucOtherNumber; NumericValue : 0.66666666666667; SimpleUpperCase : 0; SimpleLowerCase : 0; WhiteSpace : False),
|
||||
(Category : TUnicodeCategory.ucOtherNumber; NumericValue : 0.66666666666666; SimpleUpperCase : 0; SimpleLowerCase : 0; WhiteSpace : False),
|
||||
(Category : TUnicodeCategory.ucOtherNumber; NumericValue : 0.2; SimpleUpperCase : 0; SimpleLowerCase : 0; WhiteSpace : False),
|
||||
(Category : TUnicodeCategory.ucOtherNumber; NumericValue : 0.4; SimpleUpperCase : 0; SimpleLowerCase : 0; WhiteSpace : False),
|
||||
(Category : TUnicodeCategory.ucOtherNumber; NumericValue : 0.6; SimpleUpperCase : 0; SimpleLowerCase : 0; WhiteSpace : False),
|
||||
(Category : TUnicodeCategory.ucOtherNumber; NumericValue : 0.8; SimpleUpperCase : 0; SimpleLowerCase : 0; WhiteSpace : False),
|
||||
(Category : TUnicodeCategory.ucOtherNumber; NumericValue : 0.16666666666667; SimpleUpperCase : 0; SimpleLowerCase : 0; WhiteSpace : False),
|
||||
(Category : TUnicodeCategory.ucOtherNumber; NumericValue : 0.16666666666666; SimpleUpperCase : 0; SimpleLowerCase : 0; WhiteSpace : False),
|
||||
(Category : TUnicodeCategory.ucOtherNumber; NumericValue : 0.83333333333333; SimpleUpperCase : 0; SimpleLowerCase : 0; WhiteSpace : False),
|
||||
(Category : TUnicodeCategory.ucOtherNumber; NumericValue : 0.375; SimpleUpperCase : 0; SimpleLowerCase : 0; WhiteSpace : False),
|
||||
(Category : TUnicodeCategory.ucOtherNumber; NumericValue : 0.625; SimpleUpperCase : 0; SimpleLowerCase : 0; WhiteSpace : False),
|
||||
@ -3338,10 +3338,10 @@ const
|
||||
(Category : TUnicodeCategory.ucLowercaseLetter; NumericValue : 0; SimpleUpperCase : 66598; SimpleLowerCase : 0; WhiteSpace : False),
|
||||
(Category : TUnicodeCategory.ucLowercaseLetter; NumericValue : 0; SimpleUpperCase : 66599; SimpleLowerCase : 0; WhiteSpace : False),
|
||||
(Category : TUnicodeCategory.ucLetterNumber; NumericValue : 0.33333333333333; SimpleUpperCase : 0; SimpleLowerCase : 0; WhiteSpace : False),
|
||||
(Category : TUnicodeCategory.ucLetterNumber; NumericValue : 0.66666666666667; SimpleUpperCase : 0; SimpleLowerCase : 0; WhiteSpace : False),
|
||||
(Category : TUnicodeCategory.ucLetterNumber; NumericValue : 0.66666666666666; SimpleUpperCase : 0; SimpleLowerCase : 0; WhiteSpace : False),
|
||||
(Category : TUnicodeCategory.ucLetterNumber; NumericValue : 0.83333333333333; SimpleUpperCase : 0; SimpleLowerCase : 0; WhiteSpace : False),
|
||||
(Category : TUnicodeCategory.ucLetterNumber; NumericValue : 0.125; SimpleUpperCase : 0; SimpleLowerCase : 0; WhiteSpace : False),
|
||||
(Category : TUnicodeCategory.ucLetterNumber; NumericValue : 0.16666666666667; SimpleUpperCase : 0; SimpleLowerCase : 0; WhiteSpace : False),
|
||||
(Category : TUnicodeCategory.ucLetterNumber; NumericValue : 0.16666666666666; SimpleUpperCase : 0; SimpleLowerCase : 0; WhiteSpace : False),
|
||||
(Category : TUnicodeCategory.ucOtherLetter; NumericValue : 9; SimpleUpperCase : 0; SimpleLowerCase : 0; WhiteSpace : False)
|
||||
);
|
||||
|
||||
|
1286
rtl/objpas/unicodedata2.inc
Normal file
1286
rtl/objpas/unicodedata2.inc
Normal file
File diff suppressed because it is too large
Load Diff
32
tests/test/units/character/tisletter2.pp
Normal file
32
tests/test/units/character/tisletter2.pp
Normal file
@ -0,0 +1,32 @@
|
||||
program tisletter;
|
||||
{$ifndef FPC}
|
||||
{$APPTYPE CONSOLE}
|
||||
{$endif}
|
||||
|
||||
uses
|
||||
SysUtils,
|
||||
character;
|
||||
|
||||
{$ifndef FPC}
|
||||
type UnicodeChar = WideChar;
|
||||
{$endif}
|
||||
|
||||
procedure DoError(ACode : Integer);
|
||||
begin
|
||||
WriteLn('Error #',ACode);
|
||||
Halt(Acode);
|
||||
end;
|
||||
|
||||
var
|
||||
s : UnicodeString;
|
||||
begin
|
||||
s := UnicodeChar($D835) + UnicodeChar($DD75); //1D575;MATHEMATICAL BOLD FRAKTUR CAPITAL J
|
||||
if not TCharacter.IsLetter(s,1) then
|
||||
DoError(1);
|
||||
|
||||
s := UnicodeChar($D835) + UnicodeChar($DFED); //1D7ED;MATHEMATICAL SANS-SERIF BOLD DIGIT ONE;Nd;0;EN;<font> 0031;1;1;1;N;;;;;
|
||||
if TCharacter.IsLetter(s,1) then
|
||||
DoError(1);
|
||||
|
||||
WriteLn('ok');
|
||||
end.
|
36
tests/test/units/character/tisletterordigit2.pp
Normal file
36
tests/test/units/character/tisletterordigit2.pp
Normal file
@ -0,0 +1,36 @@
|
||||
program tisletterordigit2;
|
||||
{$ifndef FPC}
|
||||
{$APPTYPE CONSOLE}
|
||||
{$endif}
|
||||
|
||||
uses
|
||||
SysUtils,
|
||||
character;
|
||||
|
||||
{$ifndef FPC}
|
||||
type UnicodeChar = WideChar;
|
||||
{$endif}
|
||||
|
||||
procedure DoError(ACode : Integer);
|
||||
begin
|
||||
WriteLn('Error #',ACode);
|
||||
Halt(Acode);
|
||||
end;
|
||||
|
||||
var
|
||||
s : UnicodeString;
|
||||
begin
|
||||
s := UnicodeChar($D835) + UnicodeChar($DD75); //1D575;MATHEMATICAL BOLD FRAKTUR CAPITAL J
|
||||
if not TCharacter.IsLetterOrDigit(s,1) then
|
||||
DoError(1);
|
||||
|
||||
s := UnicodeChar($D835) + UnicodeChar($DFED); //1D7ED;MATHEMATICAL SANS-SERIF BOLD DIGIT ONE;Nd;0;EN;<font> 0031;1;1;1;N;;;;;
|
||||
if not TCharacter.IsLetterOrDigit(s,1) then
|
||||
DoError(2);
|
||||
|
||||
s := UnicodeChar($D83C) + UnicodeChar($DC00); //1F000;MAHJONG TILE EAST WIND;So;0;ON;;;;;N;;;;;
|
||||
if TCharacter.IsLetterOrDigit(s,1) then
|
||||
DoError(3);
|
||||
|
||||
WriteLn('ok');
|
||||
end.
|
32
tests/test/units/character/tislower2.pp
Normal file
32
tests/test/units/character/tislower2.pp
Normal file
@ -0,0 +1,32 @@
|
||||
program tislower2;
|
||||
{$ifndef FPC}
|
||||
{$APPTYPE CONSOLE}
|
||||
{$endif}
|
||||
|
||||
uses
|
||||
SysUtils,
|
||||
character;
|
||||
|
||||
{$ifndef FPC}
|
||||
type UnicodeChar = WideChar;
|
||||
{$endif}
|
||||
|
||||
procedure DoError(ACode : Integer);
|
||||
begin
|
||||
WriteLn('Error #',ACode);
|
||||
Halt(Acode);
|
||||
end;
|
||||
|
||||
var
|
||||
s : UnicodeString;
|
||||
begin
|
||||
s := UnicodeChar($D835) + UnicodeChar($DFCB); //1D7CB;MATHEMATICAL BOLD SMALL DIGAMMA;Ll;0;L;<font> 03DD;;;;N;;;;;
|
||||
if not TCharacter.IsLower(s,1) then
|
||||
DoError(1);
|
||||
|
||||
s := UnicodeChar($D835) + UnicodeChar($DFED); //1D7ED;MATHEMATICAL SANS-SERIF BOLD DIGIT ONE;Nd;0;EN;<font> 0031;1;1;1;N;;;;;
|
||||
if TCharacter.IsLower(s,1) then
|
||||
DoError(2);
|
||||
|
||||
WriteLn('ok');
|
||||
end.
|
@ -38,40 +38,38 @@ end;
|
||||
|
||||
var
|
||||
e, i, j : Integer;
|
||||
uc, s : UnicodeString;
|
||||
uc, s, s2 : UnicodeString;
|
||||
begin
|
||||
e := 1;
|
||||
for i := Ord('a') to Ord('z') do begin
|
||||
uc := UnicodeChar(i);
|
||||
if (TCharacter.ToLower(uc) <> uc) then
|
||||
DoError(e,i);
|
||||
end;
|
||||
s := 'azerty';
|
||||
if (TCharacter.ToLower(s) <> s) then begin
|
||||
WriteLn(s);
|
||||
s2 := TCharacter.ToLower(s);
|
||||
WriteLn('"',s2,'"');
|
||||
DoError(e,s2);
|
||||
end;
|
||||
|
||||
Inc(e);
|
||||
for i := Ord('0') to Ord('9') do begin
|
||||
uc := UnicodeChar(i);
|
||||
if (TCharacter.ToLower(uc) <> uc) then
|
||||
DoError(e,i);
|
||||
end;
|
||||
s := '0123456789';
|
||||
if (TCharacter.ToLower(s) <> s) then
|
||||
DoError(e,s);
|
||||
|
||||
Inc(e);
|
||||
if (TCharacter.ToLower('azerty') <> 'azerty') then
|
||||
DoError(e,'azerty');
|
||||
if (TCharacter.ToLower('AZERTY') <> 'azerty') then
|
||||
DoError(e,'AZERTY');
|
||||
if (TCharacter.ToLower('AzERty') <> 'azerty') then
|
||||
DoError(e,'AzERty');
|
||||
s := 'AZERTY'; s2:= 'azerty';
|
||||
if (TCharacter.ToLower(s) <> s2) then begin
|
||||
WriteLn(s);
|
||||
s2 := TCharacter.ToLower(s);
|
||||
WriteLn('"',s2,'"');
|
||||
DoError(e,s2);
|
||||
end;
|
||||
s := 'AzERty';
|
||||
if (TCharacter.ToLower(s) <> s2) then begin
|
||||
WriteLn(s);
|
||||
s2 := TCharacter.ToLower(s);
|
||||
WriteLn('"',s2,'"');
|
||||
DoError(e,s2);
|
||||
end;
|
||||
|
||||
Inc(e);
|
||||
j := Ord('a');
|
||||
for i := Ord('A') to Ord('Z') do begin
|
||||
uc := UnicodeChar(i);
|
||||
s := UnicodeChar(j);
|
||||
if (TCharacter.ToLower(uc) <> s) then
|
||||
DoError(e,i);
|
||||
Inc(j);
|
||||
end;
|
||||
|
||||
WriteLn('ok');
|
||||
end.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user