fpc/rtl/objpas/character.pas
2018-03-23 22:06:36 +00:00

856 lines
31 KiB
ObjectPascal

{ Unicode "Character" properties handler.
Copyright (c) 2012 by Inoussa OUEDRAOGO
The source code is distributed under the Library GNU
General Public License with the following modification:
- object files and libraries linked into an application may be
distributed without source code.
If you didn't receive a copy of the file COPYING, contact:
Free Software Foundation
675 Mass Ave
Cambridge, MA 02139
USA
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. }
unit Character;
interface
{$ifndef VER2_4}
{$mode objfpc}
{$H+}
{$PACKENUM 1}
{$SCOPEDENUMS ON}
uses
unicodedata;
type
// Unicode General Category
TUnicodeCategory = (
ucUppercaseLetter, // Lu = Letter, uppercase
ucLowercaseLetter, // Ll = Letter, lowercase
ucTitlecaseLetter, // Lt = Letter, titlecase
ucModifierLetter, // Lm = Letter, modifier
ucOtherLetter, // Lo = Letter, other
ucNonSpacingMark, // Mn = Mark, nonspacing
ucCombiningMark, // Mc = Mark, spacing combining
ucEnclosingMark, // Me = Mark, enclosing
ucDecimalNumber, // Nd = Number, decimal digit
ucLetterNumber, // Nl = Number, letter
ucOtherNumber, // No = Number, other
ucConnectPunctuation, // Pc = Punctuation, connector
ucDashPunctuation, // Pd = Punctuation, dash
ucOpenPunctuation, // Ps = Punctuation, open
ucClosePunctuation, // Pe = Punctuation, close
ucInitialPunctuation, // Pi = Punctuation, initial quote (may behave like Ps or Pe depending on usage)
ucFinalPunctuation, // Pf = Punctuation, final quote (may behave like Ps or Pe depending on usage)
ucOtherPunctuation, // Po = Punctuation, other
ucMathSymbol, // Sm = Symbol, math
ucCurrencySymbol, // Sc = Symbol, currency
ucModifierSymbol, // Sk = Symbol, modifier
ucOtherSymbol, // So = Symbol, other
ucSpaceSeparator, // Zs = Separator, space
ucLineSeparator, // Zl = Separator, line
ucParagraphSeparator, // Zp = Separator, paragraph
ucControl, // Cc = Other, control
ucFormat, // Cf = Other, format
ucSurrogate, // Cs = Other, surrogate
ucPrivateUse, // Co = Other, private use
ucUnassigned // Cn = Other, not assigned (including noncharacters)
);
TUnicodeCategorySet = set of TUnicodeCategory;
TCharacterOption = (coIgnoreInvalidSequence);
TCharacterOptions = set of TCharacterOption;
{ TCharacter }
TCharacter = class sealed
private
class function TestCategory(const AString : UnicodeString; AIndex : Integer; ACategory : TUnicodeCategory) : Boolean; overload; static;
class function TestCategory(const AString : UnicodeString; AIndex : Integer; ACategory : TUnicodeCategorySet) : Boolean; overload; static;
public
constructor Create;
class function ConvertFromUtf32(AChar : UCS4Char) : UnicodeString; static;
class function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer) : UCS4Char; overload; static;
class function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer; out ACharLength : Integer) : UCS4Char; overload; static;
class function ConvertToUtf32(const AHighSurrogate, ALowSurrogate : UnicodeChar) : UCS4Char; overload; static;
class function GetNumericValue(AChar : UnicodeChar) : Double; static; overload;
class function GetNumericValue(const AString : UnicodeString; AIndex : Integer) : Double; overload; static;
class function GetUnicodeCategory(AChar : UnicodeChar) : TUnicodeCategory; overload; static; inline;
class function GetUnicodeCategory(const AString : UnicodeString; AIndex : Integer) : TUnicodeCategory; overload; static;
class function IsControl(AChar : UnicodeChar) : Boolean; overload; static; inline;
class function IsControl(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
class function IsDigit(AChar : UnicodeChar) : Boolean; overload; static; inline;
class function IsDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
class function IsSurrogate(AChar : UnicodeChar) : Boolean; overload; static; inline;
class function IsSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
class function IsHighSurrogate(AChar : UnicodeChar) : Boolean; overload; static; inline;
class function IsHighSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
class function IsLowSurrogate(AChar : UnicodeChar) : Boolean; overload; static; inline;
class function IsLowSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
class function IsSurrogatePair(const AHighSurrogate, ALowSurrogate : UnicodeChar) : Boolean; overload; static; inline;
class function IsSurrogatePair(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
class function IsLetter(AChar : UnicodeChar) : Boolean; overload; static; inline;
class function IsLetter(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
class function IsLetterOrDigit(AChar : UnicodeChar) : Boolean; overload; static; inline;
class function IsLetterOrDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
class function IsLower(AChar : UnicodeChar) : Boolean; overload; static; inline;
class function IsLower(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
class function IsNumber(AChar : UnicodeChar) : Boolean; overload; static; inline;
class function IsNumber(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
class function IsPunctuation(AChar : UnicodeChar) : Boolean; overload; static; inline;
class function IsPunctuation(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
class function IsSeparator(AChar : UnicodeChar) : Boolean; overload; static; inline;
class function IsSeparator(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
class function IsSymbol(AChar : UnicodeChar) : Boolean; overload; static; inline;
class function IsSymbol(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
class function IsUpper(AChar : UnicodeChar) : Boolean; overload; static; inline;
class function IsUpper(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
class function IsWhiteSpace(AChar : UnicodeChar) : Boolean; overload; static; inline;
class function IsWhiteSpace(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
class function ToLower(AChar : UnicodeChar) : UnicodeChar; overload; static;
class function ToLower(const AString : UnicodeString) : UnicodeString; inline;overload; static;
class function ToLower(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString; overload; static;
class function ToUpper(AChar : UnicodeChar) : UnicodeChar; overload; static;
class function ToUpper(const AString : UnicodeString) : UnicodeString; inline; overload; static;
class function ToUpper(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString; overload; static;
end;
// flat functions
function ConvertFromUtf32(AChar : UCS4Char) : UnicodeString;
function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer) : UCS4Char; overload;
function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer; out ACharLength : Integer) : UCS4Char; overload;
function ConvertToUtf32(const AHighSurrogate, ALowSurrogate : UnicodeChar) : UCS4Char; overload;
function GetNumericValue(AChar : UnicodeChar) : Double; overload;
function GetNumericValue(const AString : UnicodeString; AIndex : Integer) : Double; overload;
function GetUnicodeCategory(AChar : UnicodeChar) : TUnicodeCategory; overload;
function GetUnicodeCategory(const AString : UnicodeString; AIndex : Integer) : TUnicodeCategory; overload;
function IsControl(AChar : UnicodeChar) : Boolean; overload;
function IsControl(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
function IsDigit(AChar : UnicodeChar) : Boolean; overload;
function IsDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
function IsSurrogate(AChar : UnicodeChar) : Boolean; overload;
function IsSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
function IsHighSurrogate(AChar : UnicodeChar) : Boolean; overload;
function IsHighSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
function IsLowSurrogate(AChar : UnicodeChar) : Boolean; overload;
function IsLowSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
function IsSurrogatePair(const AHighSurrogate, ALowSurrogate : UnicodeChar) : Boolean; overload;
function IsSurrogatePair(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
function IsLetter(AChar : UnicodeChar) : Boolean; overload;
function IsLetter(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
function IsLetterOrDigit(AChar : UnicodeChar) : Boolean; overload;
function IsLetterOrDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
function IsLower(AChar : UnicodeChar) : Boolean; overload;
function IsLower(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
function IsNumber(AChar : UnicodeChar) : Boolean; overload;
function IsNumber(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
function IsPunctuation(AChar : UnicodeChar) : Boolean; overload;
function IsPunctuation(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
function IsSeparator(AChar : UnicodeChar) : Boolean; overload;
function IsSeparator(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
function IsSymbol(AChar : UnicodeChar) : Boolean; overload;
function IsSymbol(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
function IsUpper(AChar : UnicodeChar) : Boolean; overload;
function IsUpper(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
function IsWhiteSpace(AChar : UnicodeChar) : Boolean; overload;
function IsWhiteSpace(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
function ToLower(AChar : UnicodeChar) : UnicodeChar; overload;
function ToLower(const AString : UnicodeString) : UnicodeString; overload;
function ToUpper(AChar : UnicodeChar) : UnicodeChar; overload;
function ToUpper(const AString : UnicodeString) : UnicodeString; overload;
{$endif VER2_4}
implementation
{$ifndef VER2_4}
uses
SysUtils,
RtlConsts;
const
LETTER_CATEGORIES = [
TUnicodeCategory.ucUppercaseLetter, TUnicodeCategory.ucLowercaseLetter,
TUnicodeCategory.ucTitlecaseLetter, TUnicodeCategory.ucModifierLetter,
TUnicodeCategory.ucOtherLetter
];
LETTER_OR_DIGIT_CATEGORIES =
LETTER_CATEGORIES +
[TUnicodeCategory.ucDecimalNumber,TUnicodeCategory.ucLetterNumber];
NUMBER_CATEGORIES =
[ TUnicodeCategory.ucDecimalNumber, TUnicodeCategory.ucLetterNumber,
TUnicodeCategory.ucOtherNumber
];
PUNCTUATION_CATEGORIES = [
TUnicodeCategory.ucConnectPunctuation, TUnicodeCategory.ucDashPunctuation,
TUnicodeCategory.ucOpenPunctuation, TUnicodeCategory.ucClosePunctuation,
TUnicodeCategory.ucInitialPunctuation, TUnicodeCategory.ucFinalPunctuation,
TUnicodeCategory.ucOtherPunctuation
];
SEPARATOR_CATEGORIES =
[ TUnicodeCategory.ucSpaceSeparator, TUnicodeCategory.ucLineSeparator,
TUnicodeCategory.ucParagraphSeparator
];
SYMBOL_CATEGORIES =
[ TUnicodeCategory.ucMathSymbol, TUnicodeCategory.ucCurrencySymbol,
TUnicodeCategory.ucModifierSymbol, TUnicodeCategory.ucOtherSymbol
];
function ConvertFromUtf32(AChar: UCS4Char): UnicodeString;
begin
Result := TCharacter.ConvertFromUtf32(AChar);
end;
function ConvertToUtf32(const AString: UnicodeString; AIndex: Integer): UCS4Char;
begin
Result := TCharacter.ConvertToUtf32(AString, AIndex);
end;
function ConvertToUtf32(const AString: UnicodeString; AIndex: Integer; out ACharLength: Integer): UCS4Char;
begin
Result := TCharacter.ConvertToUtf32(AString, AIndex, ACharLength);
end;
function ConvertToUtf32(const AHighSurrogate, ALowSurrogate: UnicodeChar): UCS4Char;
begin
Result := TCharacter.ConvertToUtf32(AHighSurrogate, ALowSurrogate);
end;
function GetNumericValue(AChar: UnicodeChar): Double;
begin
Result := TCharacter.GetNumericValue(AChar);
end;
function GetNumericValue(const AString: UnicodeString; AIndex: Integer): Double;
begin
Result := TCharacter.GetNumericValue(AString, AIndex);
end;
function GetUnicodeCategory(AChar: UnicodeChar): TUnicodeCategory;
begin
Result := TCharacter.GetUnicodeCategory(AChar);
end;
function GetUnicodeCategory(const AString: UnicodeString; AIndex: Integer): TUnicodeCategory;
begin
Result := TCharacter.GetUnicodeCategory(AString, AIndex);
end;
function IsControl(AChar: UnicodeChar): Boolean;
begin
Result := TCharacter.IsControl(AChar);
end;
function IsControl(const AString: UnicodeString; AIndex: Integer): Boolean;
begin
Result := TCharacter.IsControl(AString, AIndex);
end;
function IsDigit(AChar: UnicodeChar): Boolean;
begin
Result := TCharacter.IsDigit(AChar);
end;
function IsDigit(const AString: UnicodeString; AIndex: Integer): Boolean;
begin
Result := TCharacter.IsDigit(AString, AIndex);
end;
function IsSurrogate(AChar: UnicodeChar): Boolean;
begin
Result := TCharacter.IsSurrogate(AChar);
end;
function IsSurrogate(const AString: UnicodeString; AIndex: Integer): Boolean;
begin
Result := TCharacter.IsSurrogate(AString, AIndex);
end;
function IsHighSurrogate(AChar: UnicodeChar): Boolean;
begin
Result := TCharacter.IsHighSurrogate(AChar);
end;
function IsHighSurrogate(const AString: UnicodeString; AIndex: Integer): Boolean;
begin
Result := TCharacter.IsHighSurrogate(AString, AIndex);
end;
function IsLowSurrogate(AChar: UnicodeChar): Boolean;
begin
Result := TCharacter.IsLowSurrogate(AChar);
end;
function IsLowSurrogate(const AString: UnicodeString; AIndex: Integer): Boolean;
begin
Result := TCharacter.IsLowSurrogate(AString, AIndex);
end;
function IsSurrogatePair(const AHighSurrogate, ALowSurrogate: UnicodeChar): Boolean;
begin
Result := TCharacter.IsSurrogatePair(AHighSurrogate, ALowSurrogate);
end;
function IsSurrogatePair(const AString: UnicodeString; AIndex: Integer): Boolean;
begin
Result := TCharacter.IsSurrogatePair(AString, AIndex);
end;
function IsLetter(AChar: UnicodeChar): Boolean;
begin
Result := TCharacter.IsLetter(AChar);
end;
function IsLetter(const AString: UnicodeString; AIndex: Integer): Boolean;
begin
Result := TCharacter.IsLetter(AString, AIndex);
end;
function IsLetterOrDigit(AChar: UnicodeChar): Boolean;
begin
Result := TCharacter.IsLetterOrDigit(AChar);
end;
function IsLetterOrDigit(const AString: UnicodeString; AIndex: Integer): Boolean;
begin
Result := TCharacter.IsLetterOrDigit(AString, AIndex);
end;
function IsLower(AChar: UnicodeChar): Boolean;
begin
Result := TCharacter.IsLower(AChar);
end;
function IsLower(const AString: UnicodeString; AIndex: Integer): Boolean;
begin
Result := TCharacter.IsLower(AString, AIndex);
end;
function IsNumber(AChar: UnicodeChar): Boolean;
begin
Result := TCharacter.IsNumber(AChar);
end;
function IsNumber(const AString: UnicodeString; AIndex: Integer): Boolean;
begin
Result := TCharacter.IsNumber(AString, AIndex);
end;
function IsPunctuation(AChar: UnicodeChar): Boolean;
begin
Result := TCharacter.IsPunctuation(AChar);
end;
function IsPunctuation(const AString: UnicodeString; AIndex: Integer): Boolean;
begin
Result := TCharacter.IsPunctuation(AString, AIndex);
end;
function IsSeparator(AChar: UnicodeChar): Boolean;
begin
Result := TCharacter.IsSeparator(AChar);
end;
function IsSeparator(const AString: UnicodeString; AIndex: Integer): Boolean;
begin
Result := TCharacter.IsSeparator(AString, AIndex);
end;
function IsSymbol(AChar: UnicodeChar): Boolean;
begin
Result := TCharacter.IsSymbol(AChar);
end;
function IsSymbol(const AString: UnicodeString; AIndex: Integer): Boolean;
begin
Result := TCharacter.IsSymbol(AString, AIndex);
end;
function IsUpper(AChar: UnicodeChar): Boolean;
begin
Result := TCharacter.IsUpper(AChar);
end;
function IsUpper(const AString: UnicodeString; AIndex: Integer): Boolean;
begin
Result := TCharacter.IsUpper(AString, AIndex);
end;
function IsWhiteSpace(AChar: UnicodeChar): Boolean;
begin
Result := TCharacter.IsWhiteSpace(AChar);
end;
function IsWhiteSpace(const AString: UnicodeString; AIndex: Integer): Boolean;
begin
Result := TCharacter.IsWhiteSpace(AString, AIndex);
end;
function ToLower(AChar: UnicodeChar): UnicodeChar;
begin
Result := TCharacter.ToLower(AChar);
end;
function ToLower(const AString: UnicodeString): UnicodeString;
begin
Result := TCharacter.ToLower(AString);
end;
function ToUpper(AChar: UnicodeChar): UnicodeChar;
begin
Result := TCharacter.ToUpper(AChar);
end;
function ToUpper(const AString: UnicodeString): UnicodeString;
begin
Result := TCharacter.ToUpper(AString);
end;
{ TCharacter }
class function TCharacter.TestCategory(
const AString : UnicodeString;
AIndex : Integer;
ACategory : TUnicodeCategory
) : Boolean;
var
pu : PUC_Prop;
begin
if (AIndex < 1) or (AIndex > Length(AString)) then
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
pu := GetProps(Word(AString[AIndex]));
if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
if not IsSurrogatePair(AString,AIndex) then
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
pu := GetProps(AString[AIndex],AString[AIndex+1]);
end;
Result := (TUnicodeCategory(pu^.Category) = ACategory);
end;
class function TCharacter.TestCategory(
const AString : UnicodeString;
AIndex : Integer;
ACategory : TUnicodeCategorySet
) : Boolean;
var
pu : PUC_Prop;
begin
if (AIndex < 1) or (AIndex > Length(AString)) then
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
pu := GetProps(Word(AString[AIndex]));
if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
if not IsSurrogatePair(AString,AIndex) then
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
pu := GetProps(AString[AIndex],AString[AIndex+1]);
end;
Result := (TUnicodeCategory(pu^.Category) in ACategory);
end;
constructor TCharacter.Create;
begin
raise ENoConstructException.CreateFmt(SClassCantBeConstructed, [ClassName]);
end;
class function TCharacter.ConvertFromUtf32(AChar : UCS4Char) : UnicodeString;
begin
if AChar < UCS4_HALF_BASE then
begin
if IsSurrogate(UnicodeChar(AChar)) then
raise EArgumentOutOfRangeException.CreateFmt(SInvalidUTF32Char, [AChar]);
Result := UnicodeChar(AChar);
end
else
begin
if AChar > MAX_LEGAL_UTF32 then
raise EArgumentOutOfRangeException.CreateFmt(SInvalidUTF32Char, [AChar]);
SetLength(Result, 2);
AChar := AChar - UCS4_HALF_BASE;
Result[1] := UnicodeChar((AChar shr 10) + HIGH_SURROGATE_BEGIN);
Result[2] := UnicodeChar((AChar and UCS4_HALF_MASK) + LOW_SURROGATE_BEGIN);
end;
end;
class function TCharacter.ConvertToUtf32(const AString : UnicodeString; AIndex : Integer) : UCS4Char; overload;
begin
if (AIndex < 1) or (AIndex > Length(AString)) then
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
Result := Word(AString[AIndex]);
if IsHighSurrogate(UnicodeChar(Result)) then
begin
if Length(AString) < Succ(AIndex) then
raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
Result := ConvertToUtf32(UnicodeChar(Result), AString[Succ(AIndex)]);
end;
end;
class function TCharacter.ConvertToUtf32(const AString : UnicodeString; AIndex : Integer; out ACharLength : Integer) : UCS4Char; overload;
begin
if (AIndex < 1) or (AIndex > Length(AString)) then
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
Result := Word(AString[AIndex]);
if IsHighSurrogate(UnicodeChar(Result)) then
begin
if Length(AString) < Succ(AIndex) then
raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
Result := ConvertToUtf32(UnicodeChar(Result), AString[Succ(AIndex)]);
ACharLength := 2;
end
else
ACharLength := 1;
end;
class function TCharacter.ConvertToUtf32(const AHighSurrogate, ALowSurrogate : UnicodeChar) : UCS4Char; overload;
begin
if not IsHighSurrogate(AHighSurrogate) then
raise EArgumentOutOfRangeException.CreateFmt(SHighSurrogateOutOfRange, [Word(AHighSurrogate)]);
if not IsLowSurrogate(ALowSurrogate) then
raise EArgumentOutOfRangeException.CreateFmt(SLowSurrogateOutOfRange, [Word(ALowSurrogate)]);
Result := ToUCS4(AHighSurrogate, ALowSurrogate);
end;
class function TCharacter.GetNumericValue(AChar : UnicodeChar) : Double;
begin
Result := GetProps(Word(AChar))^.NumericValue;
end;
class function TCharacter.GetNumericValue(
const AString : UnicodeString;
AIndex : Integer
) : Double;
var
pu : PUC_Prop;
begin
if (AIndex < 1) or (AIndex > Length(AString)) then
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
pu := GetProps(Word(AString[AIndex]));
if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
if not IsSurrogatePair(AString,AIndex) then
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
pu := GetProps(AString[AIndex],AString[AIndex+1]);
end;
Result := pu^.NumericValue;
end;
class function TCharacter.GetUnicodeCategory(AChar : UnicodeChar) : TUnicodeCategory;
begin
Result := TUnicodeCategory(GetProps(Word(AChar))^.Category);
end;
class function TCharacter.GetUnicodeCategory(
const AString : UnicodeString;
AIndex : Integer
) : TUnicodeCategory;
var
pu : PUC_Prop;
begin
if (AIndex < 1) or (AIndex > Length(AString)) then
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
pu := GetProps(Word(AString[AIndex]));
if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
if not IsSurrogatePair(AString,AIndex) then
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
pu := GetProps(AString[AIndex],AString[AIndex+1]);
end;
Result := TUnicodeCategory(pu^.Category);
end;
class function TCharacter.IsControl(AChar : UnicodeChar) : Boolean;
begin
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucControl);
end;
class function TCharacter.IsControl(
const AString : UnicodeString;
AIndex : Integer
) : Boolean;
begin
Result := TestCategory(AString,AIndex,TUnicodeCategory.ucControl);
end;
class function TCharacter.IsDigit(AChar : UnicodeChar) : Boolean;
begin
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucDecimalNumber);
end;
class function TCharacter.IsDigit(
const AString : UnicodeString;
AIndex : Integer
) : Boolean;
begin
Result := TestCategory(AString,AIndex,TUnicodeCategory.ucDecimalNumber);
end;
class function TCharacter.IsSurrogate(AChar : UnicodeChar) : Boolean;
begin
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucSurrogate);
end;
class function TCharacter.IsSurrogate(
const AString : UnicodeString;
AIndex : Integer
) : Boolean;
begin
if (AIndex < 1) or (AIndex > Length(AString)) then
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
Result := IsSurrogate(AString[AIndex]);
end;
class function TCharacter.IsHighSurrogate(AChar : UnicodeChar) : Boolean;
begin
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucSurrogate) and
(Word(AChar) >= HIGH_SURROGATE_BEGIN) and
(Word(AChar) <= HIGH_SURROGATE_END);
end;
class function TCharacter.IsHighSurrogate(
const AString : UnicodeString;
AIndex : Integer
) : Boolean;
begin
if (AIndex < 1) or (AIndex > Length(AString)) then
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
Result := IsHighSurrogate(AString[AIndex]);
end;
class function TCharacter.IsLowSurrogate(AChar : UnicodeChar) : Boolean;
begin
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucSurrogate) and
(Word(AChar) >= LOW_SURROGATE_BEGIN) and
(Word(AChar) <= LOW_SURROGATE_END);
end;
class function TCharacter.IsLowSurrogate(
const AString : UnicodeString;
AIndex : Integer
) : Boolean;
begin
if (AIndex < 1) or (AIndex > Length(AString)) then
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
Result := IsLowSurrogate(AString[AIndex]);
end;
class function TCharacter.IsSurrogatePair(
const AHighSurrogate,
ALowSurrogate : UnicodeChar
) : Boolean;
begin
Result := UnicodeIsSurrogatePair(AHighSurrogate,ALowSurrogate);
end;
class function TCharacter.IsSurrogatePair(
const AString : UnicodeString;
AIndex : Integer
) : Boolean;
begin
if (AIndex < 1) or (AIndex > Length(AString)) then
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
if not IsHighSurrogate(AString[AIndex]) then begin
Result := False;
exit;
end;
if ((AIndex+1) > Length(AString)) then
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex+1, Length(AString)]);
Result := IsSurrogatePair(AString[AIndex],AString[AIndex+1]);
end;
class function TCharacter.IsLetter(AChar : UnicodeChar) : Boolean;
begin
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in LETTER_CATEGORIES);
end;
class function TCharacter.IsLetter(
const AString : UnicodeString;
AIndex : Integer
) : Boolean;
begin
Result := TestCategory(AString,AIndex,LETTER_CATEGORIES);
end;
class function TCharacter.IsLetterOrDigit(AChar : UnicodeChar) : Boolean;
begin
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in LETTER_OR_DIGIT_CATEGORIES);
end;
class function TCharacter.IsLetterOrDigit(
const AString : UnicodeString;
AIndex : Integer
) : Boolean;
begin
Result := TestCategory(AString,AIndex,LETTER_OR_DIGIT_CATEGORIES);
end;
class function TCharacter.IsLower(AChar : UnicodeChar) : Boolean;
begin
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucLowercaseLetter);
end;
class function TCharacter.IsLower(
const AString : UnicodeString;
AIndex : Integer
) : Boolean;
begin
Result := TestCategory(AString,AIndex,TUnicodeCategory.ucLowercaseLetter);
end;
class function TCharacter.IsNumber(AChar : UnicodeChar) : Boolean;
begin
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in NUMBER_CATEGORIES);
end;
class function TCharacter.IsNumber(
const AString : UnicodeString;
AIndex : Integer
) : Boolean;
begin
Result := TestCategory(AString,AIndex,NUMBER_CATEGORIES);
end;
class function TCharacter.IsPunctuation(AChar : UnicodeChar) : Boolean;
begin
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in PUNCTUATION_CATEGORIES);
end;
class function TCharacter.IsPunctuation(
const AString : UnicodeString;
AIndex : Integer
) : Boolean;
begin
Result := TestCategory(AString,AIndex,PUNCTUATION_CATEGORIES);
end;
class function TCharacter.IsSeparator(AChar: UnicodeChar): Boolean;
begin
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in SEPARATOR_CATEGORIES);
end;
class function TCharacter.IsSeparator(
const AString : UnicodeString;
AIndex : Integer
) : Boolean;
begin
Result := TestCategory(AString,AIndex,SEPARATOR_CATEGORIES);
end;
class function TCharacter.IsSymbol(AChar: UnicodeChar): Boolean;
begin
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in SYMBOL_CATEGORIES);
end;
class function TCharacter.IsSymbol(
const AString : UnicodeString;
AIndex : Integer
) : Boolean;
begin
Result := TestCategory(AString,AIndex,SYMBOL_CATEGORIES);
end;
class function TCharacter.IsUpper(AChar : UnicodeChar) : Boolean;
begin
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucUppercaseLetter);
end;
class function TCharacter.IsUpper(
const AString : UnicodeString;
AIndex : Integer
) : Boolean;
begin
Result := TestCategory(AString,AIndex,TUnicodeCategory.ucUppercaseLetter);
end;
class function TCharacter.IsWhiteSpace(AChar : UnicodeChar) : Boolean;
begin
Result := GetProps(Word(AChar))^.WhiteSpace;
end;
class function TCharacter.IsWhiteSpace(
const AString : UnicodeString;
AIndex : Integer
) : Boolean;
var
pu : PUC_Prop;
begin
if (AIndex < 1) or (AIndex > Length(AString)) then
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
pu := GetProps(Word(AString[AIndex]));
if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
if not IsSurrogatePair(AString,AIndex) then
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
pu := GetProps(AString[AIndex],AString[AIndex+1]);
end;
Result := pu^.WhiteSpace;
end;
class function TCharacter.ToLower(AChar : UnicodeChar) : UnicodeChar;
begin
Result := UnicodeChar(Word(GetProps(Word(AChar))^.SimpleLowerCase));
if (Result = UnicodeChar(0)) then
Result := AChar;
end;
class function TCharacter.ToLower(const AString : UnicodeString) : UnicodeString;
begin
Result := ToLower(AString,[]);
end;
class function TCharacter.ToLower(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString;
begin
if (UnicodeToLower(
AString,(TCharacterOption.coIgnoreInvalidSequence in AOptions),Result
) <> 0
)
then
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
end;
class function TCharacter.ToUpper(AChar : UnicodeChar) : UnicodeChar;
begin
Result := UnicodeChar(Word(GetProps(Word(AChar))^.SimpleUpperCase));
if (Result = UnicodeChar(0)) then
Result := AChar;
end;
class function TCharacter.ToUpper(const AString : UnicodeString) : UnicodeString;
begin
Result := ToUpper(AString,[]);
end;
class function TCharacter.ToUpper(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString;
begin
if (UnicodeToUpper(
AString,(TCharacterOption.coIgnoreInvalidSequence in AOptions),Result
) <> 0
)
then
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
end;
{$endif VER2_4}
end.