mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-07 08:48:08 +02:00
856 lines
31 KiB
ObjectPascal
856 lines
31 KiB
ObjectPascal
{ Unicode "Character" properties handler.
|
|
|
|
Copyright (c) 2012 by Inoussa OUEDRAOGO
|
|
|
|
The source code is distributed under the Library GNU
|
|
General Public License with the following modification:
|
|
|
|
- object files and libraries linked into an application may be
|
|
distributed without source code.
|
|
|
|
If you didn't receive a copy of the file COPYING, contact:
|
|
Free Software Foundation
|
|
675 Mass Ave
|
|
Cambridge, MA 02139
|
|
USA
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. }
|
|
unit Character;
|
|
|
|
interface
|
|
{$ifndef VER2_4}
|
|
{$mode objfpc}
|
|
{$H+}
|
|
{$PACKENUM 1}
|
|
{$SCOPEDENUMS ON}
|
|
uses
|
|
unicodedata;
|
|
|
|
type
|
|
// Unicode General Category
|
|
TUnicodeCategory = (
|
|
ucUppercaseLetter, // Lu = Letter, uppercase
|
|
ucLowercaseLetter, // Ll = Letter, lowercase
|
|
ucTitlecaseLetter, // Lt = Letter, titlecase
|
|
ucModifierLetter, // Lm = Letter, modifier
|
|
ucOtherLetter, // Lo = Letter, other
|
|
|
|
ucNonSpacingMark, // Mn = Mark, nonspacing
|
|
ucCombiningMark, // Mc = Mark, spacing combining
|
|
ucEnclosingMark, // Me = Mark, enclosing
|
|
|
|
ucDecimalNumber, // Nd = Number, decimal digit
|
|
ucLetterNumber, // Nl = Number, letter
|
|
ucOtherNumber, // No = Number, other
|
|
|
|
ucConnectPunctuation, // Pc = Punctuation, connector
|
|
ucDashPunctuation, // Pd = Punctuation, dash
|
|
ucOpenPunctuation, // Ps = Punctuation, open
|
|
ucClosePunctuation, // Pe = Punctuation, close
|
|
ucInitialPunctuation, // Pi = Punctuation, initial quote (may behave like Ps or Pe depending on usage)
|
|
ucFinalPunctuation, // Pf = Punctuation, final quote (may behave like Ps or Pe depending on usage)
|
|
ucOtherPunctuation, // Po = Punctuation, other
|
|
|
|
ucMathSymbol, // Sm = Symbol, math
|
|
ucCurrencySymbol, // Sc = Symbol, currency
|
|
ucModifierSymbol, // Sk = Symbol, modifier
|
|
ucOtherSymbol, // So = Symbol, other
|
|
|
|
ucSpaceSeparator, // Zs = Separator, space
|
|
ucLineSeparator, // Zl = Separator, line
|
|
ucParagraphSeparator, // Zp = Separator, paragraph
|
|
|
|
ucControl, // Cc = Other, control
|
|
ucFormat, // Cf = Other, format
|
|
ucSurrogate, // Cs = Other, surrogate
|
|
ucPrivateUse, // Co = Other, private use
|
|
ucUnassigned // Cn = Other, not assigned (including noncharacters)
|
|
);
|
|
TUnicodeCategorySet = set of TUnicodeCategory;
|
|
|
|
TCharacterOption = (coIgnoreInvalidSequence);
|
|
TCharacterOptions = set of TCharacterOption;
|
|
|
|
{ TCharacter }
|
|
|
|
TCharacter = class sealed
|
|
private
|
|
class function TestCategory(const AString : UnicodeString; AIndex : Integer; ACategory : TUnicodeCategory) : Boolean; overload; static;
|
|
class function TestCategory(const AString : UnicodeString; AIndex : Integer; ACategory : TUnicodeCategorySet) : Boolean; overload; static;
|
|
public
|
|
constructor Create;
|
|
|
|
class function ConvertFromUtf32(AChar : UCS4Char) : UnicodeString; static;
|
|
class function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer) : UCS4Char; overload; static;
|
|
class function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer; out ACharLength : Integer) : UCS4Char; overload; static;
|
|
class function ConvertToUtf32(const AHighSurrogate, ALowSurrogate : UnicodeChar) : UCS4Char; overload; static;
|
|
|
|
class function GetNumericValue(AChar : UnicodeChar) : Double; static; overload;
|
|
class function GetNumericValue(const AString : UnicodeString; AIndex : Integer) : Double; overload; static;
|
|
|
|
class function GetUnicodeCategory(AChar : UnicodeChar) : TUnicodeCategory; overload; static; inline;
|
|
class function GetUnicodeCategory(const AString : UnicodeString; AIndex : Integer) : TUnicodeCategory; overload; static;
|
|
|
|
class function IsControl(AChar : UnicodeChar) : Boolean; overload; static; inline;
|
|
class function IsControl(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
|
|
|
|
class function IsDigit(AChar : UnicodeChar) : Boolean; overload; static; inline;
|
|
class function IsDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
|
|
|
|
class function IsSurrogate(AChar : UnicodeChar) : Boolean; overload; static; inline;
|
|
class function IsSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
|
|
class function IsHighSurrogate(AChar : UnicodeChar) : Boolean; overload; static; inline;
|
|
class function IsHighSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
|
|
class function IsLowSurrogate(AChar : UnicodeChar) : Boolean; overload; static; inline;
|
|
class function IsLowSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
|
|
class function IsSurrogatePair(const AHighSurrogate, ALowSurrogate : UnicodeChar) : Boolean; overload; static; inline;
|
|
class function IsSurrogatePair(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
|
|
|
|
class function IsLetter(AChar : UnicodeChar) : Boolean; overload; static; inline;
|
|
class function IsLetter(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
|
|
|
|
class function IsLetterOrDigit(AChar : UnicodeChar) : Boolean; overload; static; inline;
|
|
class function IsLetterOrDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
|
|
|
|
class function IsLower(AChar : UnicodeChar) : Boolean; overload; static; inline;
|
|
class function IsLower(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
|
|
|
|
class function IsNumber(AChar : UnicodeChar) : Boolean; overload; static; inline;
|
|
class function IsNumber(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
|
|
|
|
class function IsPunctuation(AChar : UnicodeChar) : Boolean; overload; static; inline;
|
|
class function IsPunctuation(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
|
|
|
|
class function IsSeparator(AChar : UnicodeChar) : Boolean; overload; static; inline;
|
|
class function IsSeparator(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
|
|
|
|
class function IsSymbol(AChar : UnicodeChar) : Boolean; overload; static; inline;
|
|
class function IsSymbol(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
|
|
|
|
class function IsUpper(AChar : UnicodeChar) : Boolean; overload; static; inline;
|
|
class function IsUpper(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static; inline;
|
|
|
|
class function IsWhiteSpace(AChar : UnicodeChar) : Boolean; overload; static; inline;
|
|
class function IsWhiteSpace(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
|
|
|
|
class function ToLower(AChar : UnicodeChar) : UnicodeChar; overload; static;
|
|
class function ToLower(const AString : UnicodeString) : UnicodeString; inline;overload; static;
|
|
class function ToLower(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString; overload; static;
|
|
|
|
class function ToUpper(AChar : UnicodeChar) : UnicodeChar; overload; static;
|
|
class function ToUpper(const AString : UnicodeString) : UnicodeString; inline; overload; static;
|
|
class function ToUpper(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString; overload; static;
|
|
end;
|
|
|
|
// flat functions
|
|
function ConvertFromUtf32(AChar : UCS4Char) : UnicodeString;
|
|
function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer) : UCS4Char; overload;
|
|
function ConvertToUtf32(const AString : UnicodeString; AIndex : Integer; out ACharLength : Integer) : UCS4Char; overload;
|
|
function ConvertToUtf32(const AHighSurrogate, ALowSurrogate : UnicodeChar) : UCS4Char; overload;
|
|
function GetNumericValue(AChar : UnicodeChar) : Double; overload;
|
|
function GetNumericValue(const AString : UnicodeString; AIndex : Integer) : Double; overload;
|
|
function GetUnicodeCategory(AChar : UnicodeChar) : TUnicodeCategory; overload;
|
|
function GetUnicodeCategory(const AString : UnicodeString; AIndex : Integer) : TUnicodeCategory; overload;
|
|
function IsControl(AChar : UnicodeChar) : Boolean; overload;
|
|
function IsControl(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
|
|
function IsDigit(AChar : UnicodeChar) : Boolean; overload;
|
|
function IsDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
|
|
function IsSurrogate(AChar : UnicodeChar) : Boolean; overload;
|
|
function IsSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
|
|
function IsHighSurrogate(AChar : UnicodeChar) : Boolean; overload;
|
|
function IsHighSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
|
|
function IsLowSurrogate(AChar : UnicodeChar) : Boolean; overload;
|
|
function IsLowSurrogate(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
|
|
function IsSurrogatePair(const AHighSurrogate, ALowSurrogate : UnicodeChar) : Boolean; overload;
|
|
function IsSurrogatePair(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
|
|
function IsLetter(AChar : UnicodeChar) : Boolean; overload;
|
|
function IsLetter(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
|
|
function IsLetterOrDigit(AChar : UnicodeChar) : Boolean; overload;
|
|
function IsLetterOrDigit(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
|
|
function IsLower(AChar : UnicodeChar) : Boolean; overload;
|
|
function IsLower(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
|
|
function IsNumber(AChar : UnicodeChar) : Boolean; overload;
|
|
function IsNumber(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
|
|
function IsPunctuation(AChar : UnicodeChar) : Boolean; overload;
|
|
function IsPunctuation(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
|
|
function IsSeparator(AChar : UnicodeChar) : Boolean; overload;
|
|
function IsSeparator(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
|
|
function IsSymbol(AChar : UnicodeChar) : Boolean; overload;
|
|
function IsSymbol(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
|
|
function IsUpper(AChar : UnicodeChar) : Boolean; overload;
|
|
function IsUpper(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
|
|
function IsWhiteSpace(AChar : UnicodeChar) : Boolean; overload;
|
|
function IsWhiteSpace(const AString : UnicodeString; AIndex : Integer) : Boolean; overload;
|
|
function ToLower(AChar : UnicodeChar) : UnicodeChar; overload;
|
|
function ToLower(const AString : UnicodeString) : UnicodeString; overload;
|
|
function ToUpper(AChar : UnicodeChar) : UnicodeChar; overload;
|
|
function ToUpper(const AString : UnicodeString) : UnicodeString; overload;
|
|
{$endif VER2_4}
|
|
|
|
implementation
|
|
{$ifndef VER2_4}
|
|
uses
|
|
SysUtils,
|
|
RtlConsts;
|
|
|
|
const
|
|
LETTER_CATEGORIES = [
|
|
TUnicodeCategory.ucUppercaseLetter, TUnicodeCategory.ucLowercaseLetter,
|
|
TUnicodeCategory.ucTitlecaseLetter, TUnicodeCategory.ucModifierLetter,
|
|
TUnicodeCategory.ucOtherLetter
|
|
];
|
|
LETTER_OR_DIGIT_CATEGORIES =
|
|
LETTER_CATEGORIES +
|
|
[TUnicodeCategory.ucDecimalNumber,TUnicodeCategory.ucLetterNumber];
|
|
NUMBER_CATEGORIES =
|
|
[ TUnicodeCategory.ucDecimalNumber, TUnicodeCategory.ucLetterNumber,
|
|
TUnicodeCategory.ucOtherNumber
|
|
];
|
|
PUNCTUATION_CATEGORIES = [
|
|
TUnicodeCategory.ucConnectPunctuation, TUnicodeCategory.ucDashPunctuation,
|
|
TUnicodeCategory.ucOpenPunctuation, TUnicodeCategory.ucClosePunctuation,
|
|
TUnicodeCategory.ucInitialPunctuation, TUnicodeCategory.ucFinalPunctuation,
|
|
TUnicodeCategory.ucOtherPunctuation
|
|
];
|
|
SEPARATOR_CATEGORIES =
|
|
[ TUnicodeCategory.ucSpaceSeparator, TUnicodeCategory.ucLineSeparator,
|
|
TUnicodeCategory.ucParagraphSeparator
|
|
];
|
|
SYMBOL_CATEGORIES =
|
|
[ TUnicodeCategory.ucMathSymbol, TUnicodeCategory.ucCurrencySymbol,
|
|
TUnicodeCategory.ucModifierSymbol, TUnicodeCategory.ucOtherSymbol
|
|
];
|
|
|
|
function ConvertFromUtf32(AChar: UCS4Char): UnicodeString;
|
|
begin
|
|
Result := TCharacter.ConvertFromUtf32(AChar);
|
|
end;
|
|
|
|
function ConvertToUtf32(const AString: UnicodeString; AIndex: Integer): UCS4Char;
|
|
begin
|
|
Result := TCharacter.ConvertToUtf32(AString, AIndex);
|
|
end;
|
|
|
|
function ConvertToUtf32(const AString: UnicodeString; AIndex: Integer; out ACharLength: Integer): UCS4Char;
|
|
begin
|
|
Result := TCharacter.ConvertToUtf32(AString, AIndex, ACharLength);
|
|
end;
|
|
|
|
function ConvertToUtf32(const AHighSurrogate, ALowSurrogate: UnicodeChar): UCS4Char;
|
|
begin
|
|
Result := TCharacter.ConvertToUtf32(AHighSurrogate, ALowSurrogate);
|
|
end;
|
|
|
|
function GetNumericValue(AChar: UnicodeChar): Double;
|
|
begin
|
|
Result := TCharacter.GetNumericValue(AChar);
|
|
end;
|
|
|
|
function GetNumericValue(const AString: UnicodeString; AIndex: Integer): Double;
|
|
begin
|
|
Result := TCharacter.GetNumericValue(AString, AIndex);
|
|
end;
|
|
|
|
function GetUnicodeCategory(AChar: UnicodeChar): TUnicodeCategory;
|
|
begin
|
|
Result := TCharacter.GetUnicodeCategory(AChar);
|
|
end;
|
|
|
|
function GetUnicodeCategory(const AString: UnicodeString; AIndex: Integer): TUnicodeCategory;
|
|
begin
|
|
Result := TCharacter.GetUnicodeCategory(AString, AIndex);
|
|
end;
|
|
|
|
function IsControl(AChar: UnicodeChar): Boolean;
|
|
begin
|
|
Result := TCharacter.IsControl(AChar);
|
|
end;
|
|
|
|
function IsControl(const AString: UnicodeString; AIndex: Integer): Boolean;
|
|
begin
|
|
Result := TCharacter.IsControl(AString, AIndex);
|
|
end;
|
|
|
|
function IsDigit(AChar: UnicodeChar): Boolean;
|
|
begin
|
|
Result := TCharacter.IsDigit(AChar);
|
|
end;
|
|
|
|
function IsDigit(const AString: UnicodeString; AIndex: Integer): Boolean;
|
|
begin
|
|
Result := TCharacter.IsDigit(AString, AIndex);
|
|
end;
|
|
|
|
function IsSurrogate(AChar: UnicodeChar): Boolean;
|
|
begin
|
|
Result := TCharacter.IsSurrogate(AChar);
|
|
end;
|
|
|
|
function IsSurrogate(const AString: UnicodeString; AIndex: Integer): Boolean;
|
|
begin
|
|
Result := TCharacter.IsSurrogate(AString, AIndex);
|
|
end;
|
|
|
|
function IsHighSurrogate(AChar: UnicodeChar): Boolean;
|
|
begin
|
|
Result := TCharacter.IsHighSurrogate(AChar);
|
|
end;
|
|
|
|
function IsHighSurrogate(const AString: UnicodeString; AIndex: Integer): Boolean;
|
|
begin
|
|
Result := TCharacter.IsHighSurrogate(AString, AIndex);
|
|
end;
|
|
|
|
function IsLowSurrogate(AChar: UnicodeChar): Boolean;
|
|
begin
|
|
Result := TCharacter.IsLowSurrogate(AChar);
|
|
end;
|
|
|
|
function IsLowSurrogate(const AString: UnicodeString; AIndex: Integer): Boolean;
|
|
begin
|
|
Result := TCharacter.IsLowSurrogate(AString, AIndex);
|
|
end;
|
|
|
|
function IsSurrogatePair(const AHighSurrogate, ALowSurrogate: UnicodeChar): Boolean;
|
|
begin
|
|
Result := TCharacter.IsSurrogatePair(AHighSurrogate, ALowSurrogate);
|
|
end;
|
|
|
|
function IsSurrogatePair(const AString: UnicodeString; AIndex: Integer): Boolean;
|
|
begin
|
|
Result := TCharacter.IsSurrogatePair(AString, AIndex);
|
|
end;
|
|
|
|
function IsLetter(AChar: UnicodeChar): Boolean;
|
|
begin
|
|
Result := TCharacter.IsLetter(AChar);
|
|
end;
|
|
|
|
function IsLetter(const AString: UnicodeString; AIndex: Integer): Boolean;
|
|
begin
|
|
Result := TCharacter.IsLetter(AString, AIndex);
|
|
end;
|
|
|
|
function IsLetterOrDigit(AChar: UnicodeChar): Boolean;
|
|
begin
|
|
Result := TCharacter.IsLetterOrDigit(AChar);
|
|
end;
|
|
|
|
function IsLetterOrDigit(const AString: UnicodeString; AIndex: Integer): Boolean;
|
|
begin
|
|
Result := TCharacter.IsLetterOrDigit(AString, AIndex);
|
|
end;
|
|
|
|
function IsLower(AChar: UnicodeChar): Boolean;
|
|
begin
|
|
Result := TCharacter.IsLower(AChar);
|
|
end;
|
|
|
|
function IsLower(const AString: UnicodeString; AIndex: Integer): Boolean;
|
|
begin
|
|
Result := TCharacter.IsLower(AString, AIndex);
|
|
end;
|
|
|
|
function IsNumber(AChar: UnicodeChar): Boolean;
|
|
begin
|
|
Result := TCharacter.IsNumber(AChar);
|
|
end;
|
|
|
|
function IsNumber(const AString: UnicodeString; AIndex: Integer): Boolean;
|
|
begin
|
|
Result := TCharacter.IsNumber(AString, AIndex);
|
|
end;
|
|
|
|
function IsPunctuation(AChar: UnicodeChar): Boolean;
|
|
begin
|
|
Result := TCharacter.IsPunctuation(AChar);
|
|
end;
|
|
|
|
function IsPunctuation(const AString: UnicodeString; AIndex: Integer): Boolean;
|
|
begin
|
|
Result := TCharacter.IsPunctuation(AString, AIndex);
|
|
end;
|
|
|
|
function IsSeparator(AChar: UnicodeChar): Boolean;
|
|
begin
|
|
Result := TCharacter.IsSeparator(AChar);
|
|
end;
|
|
|
|
function IsSeparator(const AString: UnicodeString; AIndex: Integer): Boolean;
|
|
begin
|
|
Result := TCharacter.IsSeparator(AString, AIndex);
|
|
end;
|
|
|
|
function IsSymbol(AChar: UnicodeChar): Boolean;
|
|
begin
|
|
Result := TCharacter.IsSymbol(AChar);
|
|
end;
|
|
|
|
function IsSymbol(const AString: UnicodeString; AIndex: Integer): Boolean;
|
|
begin
|
|
Result := TCharacter.IsSymbol(AString, AIndex);
|
|
end;
|
|
|
|
function IsUpper(AChar: UnicodeChar): Boolean;
|
|
begin
|
|
Result := TCharacter.IsUpper(AChar);
|
|
end;
|
|
|
|
function IsUpper(const AString: UnicodeString; AIndex: Integer): Boolean;
|
|
begin
|
|
Result := TCharacter.IsUpper(AString, AIndex);
|
|
end;
|
|
|
|
function IsWhiteSpace(AChar: UnicodeChar): Boolean;
|
|
begin
|
|
Result := TCharacter.IsWhiteSpace(AChar);
|
|
end;
|
|
|
|
function IsWhiteSpace(const AString: UnicodeString; AIndex: Integer): Boolean;
|
|
begin
|
|
Result := TCharacter.IsWhiteSpace(AString, AIndex);
|
|
end;
|
|
|
|
function ToLower(AChar: UnicodeChar): UnicodeChar;
|
|
begin
|
|
Result := TCharacter.ToLower(AChar);
|
|
end;
|
|
|
|
function ToLower(const AString: UnicodeString): UnicodeString;
|
|
begin
|
|
Result := TCharacter.ToLower(AString);
|
|
end;
|
|
|
|
function ToUpper(AChar: UnicodeChar): UnicodeChar;
|
|
begin
|
|
Result := TCharacter.ToUpper(AChar);
|
|
end;
|
|
|
|
function ToUpper(const AString: UnicodeString): UnicodeString;
|
|
begin
|
|
Result := TCharacter.ToUpper(AString);
|
|
end;
|
|
|
|
{ TCharacter }
|
|
|
|
class function TCharacter.TestCategory(
|
|
const AString : UnicodeString;
|
|
AIndex : Integer;
|
|
ACategory : TUnicodeCategory
|
|
) : Boolean;
|
|
var
|
|
pu : PUC_Prop;
|
|
begin
|
|
if (AIndex < 1) or (AIndex > Length(AString)) then
|
|
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
|
pu := GetProps(Word(AString[AIndex]));
|
|
if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
|
|
if not IsSurrogatePair(AString,AIndex) then
|
|
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
|
|
pu := GetProps(AString[AIndex],AString[AIndex+1]);
|
|
end;
|
|
Result := (TUnicodeCategory(pu^.Category) = ACategory);
|
|
end;
|
|
|
|
class function TCharacter.TestCategory(
|
|
const AString : UnicodeString;
|
|
AIndex : Integer;
|
|
ACategory : TUnicodeCategorySet
|
|
) : Boolean;
|
|
var
|
|
pu : PUC_Prop;
|
|
begin
|
|
if (AIndex < 1) or (AIndex > Length(AString)) then
|
|
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
|
pu := GetProps(Word(AString[AIndex]));
|
|
if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
|
|
if not IsSurrogatePair(AString,AIndex) then
|
|
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
|
|
pu := GetProps(AString[AIndex],AString[AIndex+1]);
|
|
end;
|
|
Result := (TUnicodeCategory(pu^.Category) in ACategory);
|
|
end;
|
|
|
|
constructor TCharacter.Create;
|
|
begin
|
|
raise ENoConstructException.CreateFmt(SClassCantBeConstructed, [ClassName]);
|
|
end;
|
|
|
|
class function TCharacter.ConvertFromUtf32(AChar : UCS4Char) : UnicodeString;
|
|
begin
|
|
if AChar < UCS4_HALF_BASE then
|
|
begin
|
|
if IsSurrogate(UnicodeChar(AChar)) then
|
|
raise EArgumentOutOfRangeException.CreateFmt(SInvalidUTF32Char, [AChar]);
|
|
Result := UnicodeChar(AChar);
|
|
end
|
|
else
|
|
begin
|
|
if AChar > MAX_LEGAL_UTF32 then
|
|
raise EArgumentOutOfRangeException.CreateFmt(SInvalidUTF32Char, [AChar]);
|
|
SetLength(Result, 2);
|
|
AChar := AChar - UCS4_HALF_BASE;
|
|
Result[1] := UnicodeChar((AChar shr 10) + HIGH_SURROGATE_BEGIN);
|
|
Result[2] := UnicodeChar((AChar and UCS4_HALF_MASK) + LOW_SURROGATE_BEGIN);
|
|
end;
|
|
end;
|
|
|
|
class function TCharacter.ConvertToUtf32(const AString : UnicodeString; AIndex : Integer) : UCS4Char; overload;
|
|
begin
|
|
if (AIndex < 1) or (AIndex > Length(AString)) then
|
|
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
|
Result := Word(AString[AIndex]);
|
|
if IsHighSurrogate(UnicodeChar(Result)) then
|
|
begin
|
|
if Length(AString) < Succ(AIndex) then
|
|
raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
|
|
Result := ConvertToUtf32(UnicodeChar(Result), AString[Succ(AIndex)]);
|
|
end;
|
|
end;
|
|
|
|
class function TCharacter.ConvertToUtf32(const AString : UnicodeString; AIndex : Integer; out ACharLength : Integer) : UCS4Char; overload;
|
|
begin
|
|
if (AIndex < 1) or (AIndex > Length(AString)) then
|
|
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
|
Result := Word(AString[AIndex]);
|
|
if IsHighSurrogate(UnicodeChar(Result)) then
|
|
begin
|
|
if Length(AString) < Succ(AIndex) then
|
|
raise EArgumentException.CreateFmt(SInvalidHighSurrogate, [AIndex]);
|
|
Result := ConvertToUtf32(UnicodeChar(Result), AString[Succ(AIndex)]);
|
|
ACharLength := 2;
|
|
end
|
|
else
|
|
ACharLength := 1;
|
|
end;
|
|
|
|
class function TCharacter.ConvertToUtf32(const AHighSurrogate, ALowSurrogate : UnicodeChar) : UCS4Char; overload;
|
|
begin
|
|
if not IsHighSurrogate(AHighSurrogate) then
|
|
raise EArgumentOutOfRangeException.CreateFmt(SHighSurrogateOutOfRange, [Word(AHighSurrogate)]);
|
|
if not IsLowSurrogate(ALowSurrogate) then
|
|
raise EArgumentOutOfRangeException.CreateFmt(SLowSurrogateOutOfRange, [Word(ALowSurrogate)]);
|
|
Result := ToUCS4(AHighSurrogate, ALowSurrogate);
|
|
end;
|
|
|
|
class function TCharacter.GetNumericValue(AChar : UnicodeChar) : Double;
|
|
begin
|
|
Result := GetProps(Word(AChar))^.NumericValue;
|
|
end;
|
|
|
|
class function TCharacter.GetNumericValue(
|
|
const AString : UnicodeString;
|
|
AIndex : Integer
|
|
) : Double;
|
|
var
|
|
pu : PUC_Prop;
|
|
begin
|
|
if (AIndex < 1) or (AIndex > Length(AString)) then
|
|
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
|
pu := GetProps(Word(AString[AIndex]));
|
|
if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
|
|
if not IsSurrogatePair(AString,AIndex) then
|
|
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
|
|
pu := GetProps(AString[AIndex],AString[AIndex+1]);
|
|
end;
|
|
Result := pu^.NumericValue;
|
|
end;
|
|
|
|
class function TCharacter.GetUnicodeCategory(AChar : UnicodeChar) : TUnicodeCategory;
|
|
begin
|
|
Result := TUnicodeCategory(GetProps(Word(AChar))^.Category);
|
|
end;
|
|
|
|
class function TCharacter.GetUnicodeCategory(
|
|
const AString : UnicodeString;
|
|
AIndex : Integer
|
|
) : TUnicodeCategory;
|
|
var
|
|
pu : PUC_Prop;
|
|
begin
|
|
if (AIndex < 1) or (AIndex > Length(AString)) then
|
|
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
|
pu := GetProps(Word(AString[AIndex]));
|
|
if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
|
|
if not IsSurrogatePair(AString,AIndex) then
|
|
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
|
|
pu := GetProps(AString[AIndex],AString[AIndex+1]);
|
|
end;
|
|
Result := TUnicodeCategory(pu^.Category);
|
|
end;
|
|
|
|
class function TCharacter.IsControl(AChar : UnicodeChar) : Boolean;
|
|
begin
|
|
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucControl);
|
|
end;
|
|
|
|
class function TCharacter.IsControl(
|
|
const AString : UnicodeString;
|
|
AIndex : Integer
|
|
) : Boolean;
|
|
begin
|
|
Result := TestCategory(AString,AIndex,TUnicodeCategory.ucControl);
|
|
end;
|
|
|
|
class function TCharacter.IsDigit(AChar : UnicodeChar) : Boolean;
|
|
begin
|
|
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucDecimalNumber);
|
|
end;
|
|
|
|
class function TCharacter.IsDigit(
|
|
const AString : UnicodeString;
|
|
AIndex : Integer
|
|
) : Boolean;
|
|
begin
|
|
Result := TestCategory(AString,AIndex,TUnicodeCategory.ucDecimalNumber);
|
|
end;
|
|
|
|
class function TCharacter.IsSurrogate(AChar : UnicodeChar) : Boolean;
|
|
begin
|
|
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucSurrogate);
|
|
end;
|
|
|
|
class function TCharacter.IsSurrogate(
|
|
const AString : UnicodeString;
|
|
AIndex : Integer
|
|
) : Boolean;
|
|
begin
|
|
if (AIndex < 1) or (AIndex > Length(AString)) then
|
|
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
|
Result := IsSurrogate(AString[AIndex]);
|
|
end;
|
|
|
|
class function TCharacter.IsHighSurrogate(AChar : UnicodeChar) : Boolean;
|
|
begin
|
|
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucSurrogate) and
|
|
(Word(AChar) >= HIGH_SURROGATE_BEGIN) and
|
|
(Word(AChar) <= HIGH_SURROGATE_END);
|
|
end;
|
|
|
|
class function TCharacter.IsHighSurrogate(
|
|
const AString : UnicodeString;
|
|
AIndex : Integer
|
|
) : Boolean;
|
|
begin
|
|
if (AIndex < 1) or (AIndex > Length(AString)) then
|
|
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
|
Result := IsHighSurrogate(AString[AIndex]);
|
|
end;
|
|
|
|
class function TCharacter.IsLowSurrogate(AChar : UnicodeChar) : Boolean;
|
|
begin
|
|
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucSurrogate) and
|
|
(Word(AChar) >= LOW_SURROGATE_BEGIN) and
|
|
(Word(AChar) <= LOW_SURROGATE_END);
|
|
end;
|
|
|
|
class function TCharacter.IsLowSurrogate(
|
|
const AString : UnicodeString;
|
|
AIndex : Integer
|
|
) : Boolean;
|
|
begin
|
|
if (AIndex < 1) or (AIndex > Length(AString)) then
|
|
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
|
Result := IsLowSurrogate(AString[AIndex]);
|
|
end;
|
|
|
|
class function TCharacter.IsSurrogatePair(
|
|
const AHighSurrogate,
|
|
ALowSurrogate : UnicodeChar
|
|
) : Boolean;
|
|
begin
|
|
Result := UnicodeIsSurrogatePair(AHighSurrogate,ALowSurrogate);
|
|
end;
|
|
|
|
class function TCharacter.IsSurrogatePair(
|
|
const AString : UnicodeString;
|
|
AIndex : Integer
|
|
) : Boolean;
|
|
begin
|
|
if (AIndex < 1) or (AIndex > Length(AString)) then
|
|
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
|
if not IsHighSurrogate(AString[AIndex]) then begin
|
|
Result := False;
|
|
exit;
|
|
end;
|
|
if ((AIndex+1) > Length(AString)) then
|
|
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex+1, Length(AString)]);
|
|
Result := IsSurrogatePair(AString[AIndex],AString[AIndex+1]);
|
|
end;
|
|
|
|
class function TCharacter.IsLetter(AChar : UnicodeChar) : Boolean;
|
|
begin
|
|
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in LETTER_CATEGORIES);
|
|
end;
|
|
|
|
class function TCharacter.IsLetter(
|
|
const AString : UnicodeString;
|
|
AIndex : Integer
|
|
) : Boolean;
|
|
begin
|
|
Result := TestCategory(AString,AIndex,LETTER_CATEGORIES);
|
|
end;
|
|
|
|
class function TCharacter.IsLetterOrDigit(AChar : UnicodeChar) : Boolean;
|
|
begin
|
|
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in LETTER_OR_DIGIT_CATEGORIES);
|
|
end;
|
|
|
|
class function TCharacter.IsLetterOrDigit(
|
|
const AString : UnicodeString;
|
|
AIndex : Integer
|
|
) : Boolean;
|
|
begin
|
|
Result := TestCategory(AString,AIndex,LETTER_OR_DIGIT_CATEGORIES);
|
|
end;
|
|
|
|
class function TCharacter.IsLower(AChar : UnicodeChar) : Boolean;
|
|
begin
|
|
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucLowercaseLetter);
|
|
end;
|
|
|
|
class function TCharacter.IsLower(
|
|
const AString : UnicodeString;
|
|
AIndex : Integer
|
|
) : Boolean;
|
|
begin
|
|
Result := TestCategory(AString,AIndex,TUnicodeCategory.ucLowercaseLetter);
|
|
end;
|
|
|
|
class function TCharacter.IsNumber(AChar : UnicodeChar) : Boolean;
|
|
begin
|
|
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in NUMBER_CATEGORIES);
|
|
end;
|
|
|
|
class function TCharacter.IsNumber(
|
|
const AString : UnicodeString;
|
|
AIndex : Integer
|
|
) : Boolean;
|
|
begin
|
|
Result := TestCategory(AString,AIndex,NUMBER_CATEGORIES);
|
|
end;
|
|
|
|
class function TCharacter.IsPunctuation(AChar : UnicodeChar) : Boolean;
|
|
begin
|
|
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in PUNCTUATION_CATEGORIES);
|
|
end;
|
|
|
|
class function TCharacter.IsPunctuation(
|
|
const AString : UnicodeString;
|
|
AIndex : Integer
|
|
) : Boolean;
|
|
begin
|
|
Result := TestCategory(AString,AIndex,PUNCTUATION_CATEGORIES);
|
|
end;
|
|
|
|
class function TCharacter.IsSeparator(AChar: UnicodeChar): Boolean;
|
|
begin
|
|
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in SEPARATOR_CATEGORIES);
|
|
end;
|
|
|
|
class function TCharacter.IsSeparator(
|
|
const AString : UnicodeString;
|
|
AIndex : Integer
|
|
) : Boolean;
|
|
begin
|
|
Result := TestCategory(AString,AIndex,SEPARATOR_CATEGORIES);
|
|
end;
|
|
|
|
class function TCharacter.IsSymbol(AChar: UnicodeChar): Boolean;
|
|
begin
|
|
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in SYMBOL_CATEGORIES);
|
|
end;
|
|
|
|
class function TCharacter.IsSymbol(
|
|
const AString : UnicodeString;
|
|
AIndex : Integer
|
|
) : Boolean;
|
|
begin
|
|
Result := TestCategory(AString,AIndex,SYMBOL_CATEGORIES);
|
|
end;
|
|
|
|
class function TCharacter.IsUpper(AChar : UnicodeChar) : Boolean;
|
|
begin
|
|
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucUppercaseLetter);
|
|
end;
|
|
|
|
class function TCharacter.IsUpper(
|
|
const AString : UnicodeString;
|
|
AIndex : Integer
|
|
) : Boolean;
|
|
begin
|
|
Result := TestCategory(AString,AIndex,TUnicodeCategory.ucUppercaseLetter);
|
|
end;
|
|
|
|
class function TCharacter.IsWhiteSpace(AChar : UnicodeChar) : Boolean;
|
|
begin
|
|
Result := GetProps(Word(AChar))^.WhiteSpace;
|
|
end;
|
|
|
|
class function TCharacter.IsWhiteSpace(
|
|
const AString : UnicodeString;
|
|
AIndex : Integer
|
|
) : Boolean;
|
|
var
|
|
pu : PUC_Prop;
|
|
begin
|
|
if (AIndex < 1) or (AIndex > Length(AString)) then
|
|
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
|
pu := GetProps(Word(AString[AIndex]));
|
|
if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
|
|
if not IsSurrogatePair(AString,AIndex) then
|
|
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
|
|
pu := GetProps(AString[AIndex],AString[AIndex+1]);
|
|
end;
|
|
Result := pu^.WhiteSpace;
|
|
end;
|
|
|
|
class function TCharacter.ToLower(AChar : UnicodeChar) : UnicodeChar;
|
|
begin
|
|
Result := UnicodeChar(Word(GetProps(Word(AChar))^.SimpleLowerCase));
|
|
if (Result = UnicodeChar(0)) then
|
|
Result := AChar;
|
|
end;
|
|
|
|
class function TCharacter.ToLower(const AString : UnicodeString) : UnicodeString;
|
|
begin
|
|
Result := ToLower(AString,[]);
|
|
end;
|
|
|
|
class function TCharacter.ToLower(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString;
|
|
begin
|
|
if (UnicodeToLower(
|
|
AString,(TCharacterOption.coIgnoreInvalidSequence in AOptions),Result
|
|
) <> 0
|
|
)
|
|
then
|
|
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
|
|
end;
|
|
|
|
class function TCharacter.ToUpper(AChar : UnicodeChar) : UnicodeChar;
|
|
begin
|
|
Result := UnicodeChar(Word(GetProps(Word(AChar))^.SimpleUpperCase));
|
|
if (Result = UnicodeChar(0)) then
|
|
Result := AChar;
|
|
end;
|
|
|
|
class function TCharacter.ToUpper(const AString : UnicodeString) : UnicodeString;
|
|
begin
|
|
Result := ToUpper(AString,[]);
|
|
end;
|
|
|
|
class function TCharacter.ToUpper(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString;
|
|
begin
|
|
if (UnicodeToUpper(
|
|
AString,(TCharacterOption.coIgnoreInvalidSequence in AOptions),Result
|
|
) <> 0
|
|
)
|
|
then
|
|
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
|
|
end;
|
|
|
|
{$endif VER2_4}
|
|
end.
|