mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-09-10 22:09:18 +02:00
rtl:
* apply Inoussa changes to character.pas and related files (part of mantis #0022909) git-svn-id: trunk@23657 -
This commit is contained in:
parent
db6385ef33
commit
e549954de6
6
.gitattributes
vendored
6
.gitattributes
vendored
@ -8435,10 +8435,14 @@ rtl/objpas/sysutils/syswideh.inc svneol=native#text/plain
|
||||
rtl/objpas/types.pp svneol=native#text/plain
|
||||
rtl/objpas/typinfo.pp svneol=native#text/plain
|
||||
rtl/objpas/unicodedata.inc svneol=native#text/pascal
|
||||
rtl/objpas/unicodedata2.inc svneol=native#text/pascal
|
||||
rtl/objpas/unicodedata.pas svneol=native#text/pascal
|
||||
rtl/objpas/unicodedata_be.inc svneol=native#text/pascal
|
||||
rtl/objpas/unicodedata_le.inc svneol=native#text/pascal
|
||||
rtl/objpas/unicodenumtable.pas svneol=native#text/pascal
|
||||
rtl/objpas/utf8bidi.pp svneol=native#text/plain
|
||||
rtl/objpas/varutilh.inc svneol=native#text/plain
|
||||
rtl/objpas/varutils.inc svneol=native#text/plain
|
||||
rtl/objpas/weight_derivation.inc svneol=native#text/pascal
|
||||
rtl/openbsd/Makefile svneol=native#text/plain
|
||||
rtl/openbsd/Makefile.fpc svneol=native#text/plain
|
||||
rtl/openbsd/classes.pp svneol=native#text/plain
|
||||
|
@ -1,3 +1,22 @@
|
||||
{ Unicode "Character" properties handler.
|
||||
|
||||
Copyright (c) 2012 by Inoussa OUEDRAOGO
|
||||
|
||||
The source code is distributed under the Library GNU
|
||||
General Public License with the following modification:
|
||||
|
||||
- object files and libraries linked into an application may be
|
||||
distributed without source code.
|
||||
|
||||
If you didn't receive a copy of the file COPYING, contact:
|
||||
Free Software Foundation
|
||||
675 Mass Ave
|
||||
Cambridge, MA 02139
|
||||
USA
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. }
|
||||
unit character;
|
||||
|
||||
interface
|
||||
@ -6,6 +25,8 @@ interface
|
||||
{$H+}
|
||||
{$PACKENUM 1}
|
||||
{$SCOPEDENUMS ON}
|
||||
uses
|
||||
unicodedata;
|
||||
|
||||
type
|
||||
// Unicode General Category
|
||||
@ -49,6 +70,9 @@ type
|
||||
);
|
||||
TUnicodeCategorySet = set of TUnicodeCategory;
|
||||
|
||||
TCharacterOption = (coIgnoreInvalidSequence);
|
||||
TCharacterOptions = set of TCharacterOption;
|
||||
|
||||
{ TCharacter }
|
||||
|
||||
TCharacter = class sealed
|
||||
@ -112,10 +136,12 @@ type
|
||||
class function IsWhiteSpace(const AString : UnicodeString; AIndex : Integer) : Boolean; overload; static;
|
||||
|
||||
class function ToLower(AChar : UnicodeChar) : UnicodeChar; overload; static;
|
||||
class function ToLower(const AString : UnicodeString) : UnicodeString; overload; static;
|
||||
class function ToLower(const AString : UnicodeString) : UnicodeString; inline;overload; static;
|
||||
class function ToLower(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString; overload; static;
|
||||
|
||||
class function ToUpper(AChar : UnicodeChar) : UnicodeChar; overload; static;
|
||||
class function ToUpper(const AString : UnicodeString) : UnicodeString; overload; static;
|
||||
class function ToUpper(const AString : UnicodeString) : UnicodeString; inline; overload; static;
|
||||
class function ToUpper(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString; overload; static;
|
||||
end;
|
||||
|
||||
// flat functions
|
||||
@ -169,30 +195,6 @@ uses
|
||||
SysUtils,
|
||||
RtlConsts;
|
||||
|
||||
type
|
||||
PUC_Prop = ^TUC_Prop;
|
||||
TUC_Prop = packed record
|
||||
Category : TUnicodeCategory;
|
||||
NumericValue : Double;
|
||||
SimpleUpperCase : DWord;
|
||||
SimpleLowerCase : DWord;
|
||||
WhiteSpace : Boolean;
|
||||
end;
|
||||
|
||||
{$INCLUDE unicodedata.inc} // For BMP code points
|
||||
{$INCLUDE unicodedata2.inc} // For other planes
|
||||
|
||||
const
|
||||
LOW_SURROGATE_BEGIN = Word($DC00);
|
||||
LOW_SURROGATE_END = Word($DFFF);
|
||||
|
||||
HIGH_SURROGATE_BEGIN = Word($D800);
|
||||
HIGH_SURROGATE_END = Word($DBFF);
|
||||
HIGH_SURROGATE_COUNT = HIGH_SURROGATE_END - HIGH_SURROGATE_BEGIN + 1;
|
||||
UCS4_HALF_BASE = LongWord($10000);
|
||||
UCS4_HALF_MASK = Word($3FF);
|
||||
MAX_LEGAL_UTF32 = $10FFFF;
|
||||
|
||||
const
|
||||
LETTER_CATEGORIES = [
|
||||
TUnicodeCategory.ucUppercaseLetter, TUnicodeCategory.ucLowercaseLetter,
|
||||
@ -221,34 +223,6 @@ const
|
||||
TUnicodeCategory.ucModifierSymbol, TUnicodeCategory.ucOtherSymbol
|
||||
];
|
||||
|
||||
function GetProps(const ACodePoint : Word) : PUC_Prop; inline;
|
||||
begin
|
||||
Result:=
|
||||
@UC_PROP_ARRAY[
|
||||
UC_TABLE_2[
|
||||
(UC_TABLE_1[WordRec(ACodePoint).Hi] * 256) +
|
||||
WordRec(ACodePoint).Lo
|
||||
]
|
||||
];
|
||||
end;
|
||||
|
||||
function GetProps(const AHighS, ALowS : UnicodeChar): PUC_Prop; inline;
|
||||
begin
|
||||
Result:=
|
||||
@UC_PROP_ARRAY[
|
||||
UCO_TABLE_2[
|
||||
(UCO_TABLE_1[Word(AHighS)-HIGH_SURROGATE_BEGIN] * HIGH_SURROGATE_COUNT) +
|
||||
Word(ALowS) - LOW_SURROGATE_BEGIN
|
||||
]
|
||||
];
|
||||
end;
|
||||
|
||||
procedure FromUCS4(const AValue : UCS4Char; var AHighS, ALowS : UnicodeChar);
|
||||
begin
|
||||
AHighS := UnicodeChar((AValue - $10000) shr 10 + $d800);
|
||||
ALowS := UnicodeChar((AValue - $10000) and $3ff + $dc00);
|
||||
end;
|
||||
|
||||
function ConvertFromUtf32(AChar: UCS4Char): UnicodeString;
|
||||
begin
|
||||
Result := TCharacter.ConvertFromUtf32(AChar);
|
||||
@ -472,12 +446,12 @@ begin
|
||||
if (AIndex < 1) or (AIndex > Length(AString)) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
||||
pu := GetProps(Word(AString[AIndex]));
|
||||
if (pu^.Category = TUnicodeCategory.ucSurrogate) then begin
|
||||
if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
|
||||
if not IsSurrogatePair(AString,AIndex) then
|
||||
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
|
||||
pu := GetProps(AString[AIndex],AString[AIndex+1]);
|
||||
end;
|
||||
Result := (pu^.Category = ACategory);
|
||||
Result := (TUnicodeCategory(pu^.Category) = ACategory);
|
||||
end;
|
||||
|
||||
class function TCharacter.TestCategory(
|
||||
@ -491,12 +465,12 @@ begin
|
||||
if (AIndex < 1) or (AIndex > Length(AString)) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
||||
pu := GetProps(Word(AString[AIndex]));
|
||||
if (pu^.Category = TUnicodeCategory.ucSurrogate) then begin
|
||||
if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
|
||||
if not IsSurrogatePair(AString,AIndex) then
|
||||
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
|
||||
pu := GetProps(AString[AIndex],AString[AIndex+1]);
|
||||
end;
|
||||
Result := (pu^.Category in ACategory);
|
||||
Result := (TUnicodeCategory(pu^.Category) in ACategory);
|
||||
end;
|
||||
|
||||
constructor TCharacter.Create;
|
||||
@ -558,7 +532,7 @@ begin
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SHighSurrogateOutOfRange, [Word(AHighSurrogate)]);
|
||||
if not IsLowSurrogate(ALowSurrogate) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SLowSurrogateOutOfRange, [Word(ALowSurrogate)]);
|
||||
Result := (UCS4Char(AHighSurrogate) - HIGH_SURROGATE_BEGIN) shl 10 + (UCS4Char(ALowSurrogate) - LOW_SURROGATE_BEGIN) + UCS4_HALF_BASE;
|
||||
Result := ToUCS4(AHighSurrogate, ALowSurrogate);
|
||||
end;
|
||||
|
||||
class function TCharacter.GetNumericValue(AChar : UnicodeChar) : Double;
|
||||
@ -576,7 +550,7 @@ begin
|
||||
if (AIndex < 1) or (AIndex > Length(AString)) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
||||
pu := GetProps(Word(AString[AIndex]));
|
||||
if (pu^.Category = TUnicodeCategory.ucSurrogate) then begin
|
||||
if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
|
||||
if not IsSurrogatePair(AString,AIndex) then
|
||||
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
|
||||
pu := GetProps(AString[AIndex],AString[AIndex+1]);
|
||||
@ -586,7 +560,7 @@ end;
|
||||
|
||||
class function TCharacter.GetUnicodeCategory(AChar : UnicodeChar) : TUnicodeCategory;
|
||||
begin
|
||||
Result := GetProps(Word(AChar))^.Category;
|
||||
Result := TUnicodeCategory(GetProps(Word(AChar))^.Category);
|
||||
end;
|
||||
|
||||
class function TCharacter.GetUnicodeCategory(
|
||||
@ -599,17 +573,17 @@ begin
|
||||
if (AIndex < 1) or (AIndex > Length(AString)) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
||||
pu := GetProps(Word(AString[AIndex]));
|
||||
if (pu^.Category = TUnicodeCategory.ucSurrogate) then begin
|
||||
if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
|
||||
if not IsSurrogatePair(AString,AIndex) then
|
||||
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
|
||||
pu := GetProps(AString[AIndex],AString[AIndex+1]);
|
||||
end;
|
||||
Result := pu^.Category;
|
||||
Result := TUnicodeCategory(pu^.Category);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsControl(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (GetProps(Word(AChar))^.Category = TUnicodeCategory.ucControl);
|
||||
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucControl);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsControl(
|
||||
@ -622,7 +596,7 @@ end;
|
||||
|
||||
class function TCharacter.IsDigit(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (GetProps(Word(AChar))^.Category = TUnicodeCategory.ucDecimalNumber);
|
||||
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucDecimalNumber);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsDigit(
|
||||
@ -635,7 +609,7 @@ end;
|
||||
|
||||
class function TCharacter.IsSurrogate(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (GetProps(Word(AChar))^.Category = TUnicodeCategory.ucSurrogate);
|
||||
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucSurrogate);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsSurrogate(
|
||||
@ -650,7 +624,7 @@ end;
|
||||
|
||||
class function TCharacter.IsHighSurrogate(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (GetProps(Word(AChar))^.Category = TUnicodeCategory.ucSurrogate) and
|
||||
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucSurrogate) and
|
||||
(Word(AChar) >= HIGH_SURROGATE_BEGIN) and
|
||||
(Word(AChar) <= HIGH_SURROGATE_END);
|
||||
end;
|
||||
@ -667,7 +641,7 @@ end;
|
||||
|
||||
class function TCharacter.IsLowSurrogate(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (GetProps(Word(AChar))^.Category = TUnicodeCategory.ucSurrogate) and
|
||||
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucSurrogate) and
|
||||
(Word(AChar) >= LOW_SURROGATE_BEGIN) and
|
||||
(Word(AChar) <= LOW_SURROGATE_END);
|
||||
end;
|
||||
@ -687,13 +661,7 @@ class function TCharacter.IsSurrogatePair(
|
||||
ALowSurrogate : UnicodeChar
|
||||
) : Boolean;
|
||||
begin
|
||||
Result :=
|
||||
( (Word(AHighSurrogate) >= HIGH_SURROGATE_BEGIN) and
|
||||
(Word(AHighSurrogate) <= HIGH_SURROGATE_END)
|
||||
) and
|
||||
( (Word(ALowSurrogate) >= LOW_SURROGATE_BEGIN) and
|
||||
(Word(ALowSurrogate) <= LOW_SURROGATE_END)
|
||||
)
|
||||
Result := UnicodeIsSurrogatePair(AHighSurrogate,ALowSurrogate);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsSurrogatePair(
|
||||
@ -714,7 +682,7 @@ end;
|
||||
|
||||
class function TCharacter.IsLetter(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (GetProps(Word(AChar))^.Category in LETTER_CATEGORIES);
|
||||
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in LETTER_CATEGORIES);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsLetter(
|
||||
@ -727,7 +695,7 @@ end;
|
||||
|
||||
class function TCharacter.IsLetterOrDigit(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (GetProps(Word(AChar))^.Category in LETTER_OR_DIGIT_CATEGORIES);
|
||||
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in LETTER_OR_DIGIT_CATEGORIES);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsLetterOrDigit(
|
||||
@ -740,7 +708,7 @@ end;
|
||||
|
||||
class function TCharacter.IsLower(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (GetProps(Word(AChar))^.Category = TUnicodeCategory.ucLowercaseLetter);
|
||||
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucLowercaseLetter);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsLower(
|
||||
@ -753,7 +721,7 @@ end;
|
||||
|
||||
class function TCharacter.IsNumber(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (GetProps(Word(AChar))^.Category in NUMBER_CATEGORIES);
|
||||
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in NUMBER_CATEGORIES);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsNumber(
|
||||
@ -766,7 +734,7 @@ end;
|
||||
|
||||
class function TCharacter.IsPunctuation(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (GetProps(Word(AChar))^.Category in PUNCTUATION_CATEGORIES);
|
||||
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in PUNCTUATION_CATEGORIES);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsPunctuation(
|
||||
@ -779,7 +747,7 @@ end;
|
||||
|
||||
class function TCharacter.IsSeparator(AChar: UnicodeChar): Boolean;
|
||||
begin
|
||||
Result := (GetProps(Word(AChar))^.Category in SEPARATOR_CATEGORIES);
|
||||
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in SEPARATOR_CATEGORIES);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsSeparator(
|
||||
@ -792,7 +760,7 @@ end;
|
||||
|
||||
class function TCharacter.IsSymbol(AChar: UnicodeChar): Boolean;
|
||||
begin
|
||||
Result := (GetProps(Word(AChar))^.Category in SYMBOL_CATEGORIES);
|
||||
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) in SYMBOL_CATEGORIES);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsSymbol(
|
||||
@ -805,7 +773,7 @@ end;
|
||||
|
||||
class function TCharacter.IsUpper(AChar : UnicodeChar) : Boolean;
|
||||
begin
|
||||
Result := (GetProps(Word(AChar))^.Category = TUnicodeCategory.ucUppercaseLetter);
|
||||
Result := (TUnicodeCategory(GetProps(Word(AChar))^.Category) = TUnicodeCategory.ucUppercaseLetter);
|
||||
end;
|
||||
|
||||
class function TCharacter.IsUpper(
|
||||
@ -831,7 +799,7 @@ begin
|
||||
if (AIndex < 1) or (AIndex > Length(AString)) then
|
||||
raise EArgumentOutOfRangeException.CreateFmt(SStringIndexOutOfRange, [AIndex, Length(AString)]);
|
||||
pu := GetProps(Word(AString[AIndex]));
|
||||
if (pu^.Category = TUnicodeCategory.ucSurrogate) then begin
|
||||
if (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate) then begin
|
||||
if not IsSurrogatePair(AString,AIndex) then
|
||||
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
|
||||
pu := GetProps(AString[AIndex],AString[AIndex+1]);
|
||||
@ -841,28 +809,43 @@ end;
|
||||
|
||||
class function TCharacter.ToLower(AChar : UnicodeChar) : UnicodeChar;
|
||||
begin
|
||||
Result := UnicodeChar(GetProps(Word(AChar))^.SimpleLowerCase);
|
||||
Result := UnicodeChar(Word(GetProps(Word(AChar))^.SimpleLowerCase));
|
||||
if (Result = UnicodeChar(0)) then
|
||||
Result := AChar;
|
||||
end;
|
||||
|
||||
class function TCharacter.ToLower(const AString : UnicodeString) : UnicodeString;
|
||||
begin
|
||||
Result := ToLower(AString,[]);
|
||||
end;
|
||||
|
||||
class function TCharacter.ToLower(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString;
|
||||
var
|
||||
i, c : SizeInt;
|
||||
pp, pr : PUnicodeChar;
|
||||
pu : PUC_Prop;
|
||||
locIsSurrogate : Boolean;
|
||||
locIsSurrogate, locIgnoreInvalid : Boolean;
|
||||
begin
|
||||
c := Length(AString);
|
||||
SetLength(Result,2*c);
|
||||
if (c > 0) then begin
|
||||
locIgnoreInvalid := (TCharacterOption.coIgnoreInvalidSequence in AOptions);
|
||||
pp := @AString[1];
|
||||
pr := @Result[1];
|
||||
i := 1;
|
||||
while (i <= c) do begin
|
||||
pu := GetProps(Word(pp^));
|
||||
locIsSurrogate := (pu^.Category = TUnicodeCategory.ucSurrogate);
|
||||
locIsSurrogate := (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate);
|
||||
if locIsSurrogate then begin
|
||||
if locIgnoreInvalid then begin
|
||||
if (i = c) or not(IsSurrogatePair(pp[0],pp[1])) then begin
|
||||
pr^ := pp^;
|
||||
Inc(pp);
|
||||
Inc(pr);
|
||||
Inc(i);
|
||||
Continue;
|
||||
end;
|
||||
end;
|
||||
if not IsSurrogatePair(AString,i) then
|
||||
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
|
||||
pu := GetProps(pp^,AString[i+1]);
|
||||
@ -879,7 +862,7 @@ begin
|
||||
if (pu^.SimpleLowerCase <= $FFFF) then begin
|
||||
pr^ := UnicodeChar(Word(pu^.SimpleLowerCase));
|
||||
end else begin
|
||||
FromUCS4(UCS4Char(pu^.SimpleLowerCase),pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
|
||||
FromUCS4(UCS4Char(Cardinal(pu^.SimpleLowerCase)),pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
|
||||
Inc(pr);
|
||||
end;
|
||||
if locIsSurrogate then begin
|
||||
@ -899,28 +882,43 @@ end;
|
||||
|
||||
class function TCharacter.ToUpper(AChar : UnicodeChar) : UnicodeChar;
|
||||
begin
|
||||
Result := UnicodeChar(GetProps(Word(AChar))^.SimpleUpperCase);
|
||||
Result := UnicodeChar(Word(GetProps(Word(AChar))^.SimpleUpperCase));
|
||||
if (Result = UnicodeChar(0)) then
|
||||
Result := AChar;
|
||||
end;
|
||||
|
||||
class function TCharacter.ToUpper(const AString : UnicodeString) : UnicodeString;
|
||||
begin
|
||||
Result := ToUpper(AString,[]);
|
||||
end;
|
||||
|
||||
class function TCharacter.ToUpper(const AString : UnicodeString; const AOptions : TCharacterOptions) : UnicodeString;
|
||||
var
|
||||
i, c : SizeInt;
|
||||
pp, pr : PUnicodeChar;
|
||||
pu : PUC_Prop;
|
||||
locIsSurrogate : Boolean;
|
||||
locIsSurrogate, locIgnoreInvalid : Boolean;
|
||||
begin
|
||||
c := Length(AString);
|
||||
SetLength(Result,2*c);
|
||||
if (c > 0) then begin
|
||||
locIgnoreInvalid := (TCharacterOption.coIgnoreInvalidSequence in AOptions);
|
||||
pp := @AString[1];
|
||||
pr := @Result[1];
|
||||
i := 1;
|
||||
while (i <= c) do begin
|
||||
pu := GetProps(Word(pp^));
|
||||
locIsSurrogate := (pu^.Category = TUnicodeCategory.ucSurrogate);
|
||||
locIsSurrogate := (TUnicodeCategory(pu^.Category) = TUnicodeCategory.ucSurrogate);
|
||||
if locIsSurrogate then begin
|
||||
if locIgnoreInvalid then begin
|
||||
if (i = c) or not(IsSurrogatePair(pp[0],pp[1])) then begin
|
||||
pr^ := pp^;
|
||||
Inc(pp);
|
||||
Inc(pr);
|
||||
Inc(i);
|
||||
Continue;
|
||||
end;
|
||||
end;
|
||||
if not IsSurrogatePair(AString,i) then
|
||||
raise EArgumentException.Create(SInvalidUnicodeCodePointSequence);
|
||||
pu := GetProps(pp^,AString[i+1]);
|
||||
@ -937,7 +935,7 @@ begin
|
||||
if (pu^.SimpleUpperCase <= $FFFF) then begin
|
||||
pr^ := UnicodeChar(Word(pu^.SimpleUpperCase));
|
||||
end else begin
|
||||
FromUCS4(UCS4Char(pu^.SimpleUpperCase),pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
|
||||
FromUCS4(UCS4Char(Cardinal(pu^.SimpleUpperCase)),pr^,PUnicodeChar(PtrUInt(pr)+SizeOf(UnicodeChar))^);
|
||||
Inc(pr);
|
||||
end;
|
||||
if locIsSurrogate then begin
|
||||
@ -954,5 +952,6 @@ begin
|
||||
SetLength(Result,i)
|
||||
end;
|
||||
end;
|
||||
|
||||
{$endif VER2_4}
|
||||
end.
|
||||
|
File diff suppressed because it is too large
Load Diff
2467
rtl/objpas/unicodedata.pas
Normal file
2467
rtl/objpas/unicodedata.pas
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
4700
rtl/objpas/unicodedata_be.inc
Normal file
4700
rtl/objpas/unicodedata_be.inc
Normal file
File diff suppressed because it is too large
Load Diff
4700
rtl/objpas/unicodedata_le.inc
Normal file
4700
rtl/objpas/unicodedata_le.inc
Normal file
File diff suppressed because it is too large
Load Diff
53
rtl/objpas/unicodenumtable.pas
Normal file
53
rtl/objpas/unicodenumtable.pas
Normal file
@ -0,0 +1,53 @@
|
||||
{ Unicode implementation tables.
|
||||
|
||||
Copyright (c) 2013 by Inoussa OUEDRAOGO
|
||||
|
||||
Permission is hereby granted, free of charge, to any person
|
||||
obtaining a copy of the Unicode data files and any associated
|
||||
documentation (the "Data Files") or Unicode software and any
|
||||
associated documentation (the "Software") to deal in the Data
|
||||
Files or Software without restriction, including without
|
||||
limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, and/or sell copies of the Data Files or Software,
|
||||
and to permit persons to whom the Data Files or Software are
|
||||
furnished to do so, provided that (a) the above copyright
|
||||
notice(s) and this permission notice appear with all copies
|
||||
of the Data Files or Software, (b) both the above copyright
|
||||
notice(s) and this permission notice appear in associated
|
||||
documentation, and (c) there is clear notice in each modified
|
||||
Data File or in the Software as well as in the documentation
|
||||
associated with the Data File(s) or Software that the data or
|
||||
software has been modified.
|
||||
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. }
|
||||
|
||||
unit unicodenumtable;
|
||||
interface
|
||||
|
||||
|
||||
const
|
||||
UC_NUMERIC_COUNT = 112;
|
||||
UC_NUMERIC_ARRAY : array[0..(UC_NUMERIC_COUNT-1)] of Double = (
|
||||
0 ,1 ,2 ,3 ,4 ,5 ,6 ,7 ,8 ,
|
||||
9 ,0.25 ,0.5 ,0.75 ,0.0625 ,0.125 ,0.1875 ,16 ,
|
||||
10 ,100 ,1000 ,1.5 ,2.5 ,3.5 ,4.5 ,5.5 ,
|
||||
6.5 ,7.5 ,8.5 ,-0.5 ,20 ,30 ,40 ,50 ,
|
||||
60 ,70 ,80 ,90 ,10000 ,17 ,18 ,19 ,
|
||||
0.142857142857143 ,0.111111111111111 ,0.1 ,0.333333333333333 ,0.666666666666667 ,0.2 ,0.4 ,0.6 ,
|
||||
0.8 ,0.166666666666667 ,0.833333333333334 ,0.375 ,0.625 ,0.875 ,11 ,12 ,
|
||||
500 ,5000 ,50000 ,100000 ,13 ,14 ,15 ,21 ,
|
||||
22 ,23 ,24 ,25 ,26 ,27 ,28 ,29 ,
|
||||
31 ,32 ,33 ,34 ,35 ,36 ,37 ,38 ,
|
||||
39 ,41 ,42 ,43 ,44 ,45 ,46 ,47 ,
|
||||
48 ,49 ,200 ,300 ,400 ,600 ,700 ,800 ,
|
||||
900 ,2000 ,3000 ,4000 ,6000 ,7000 ,8000 ,9000 ,
|
||||
20000 ,30000 ,40000 ,60000 ,70000 ,80000 ,90000
|
||||
);
|
||||
|
||||
|
||||
implementation
|
||||
|
||||
end.
|
66
rtl/objpas/weight_derivation.inc
Normal file
66
rtl/objpas/weight_derivation.inc
Normal file
@ -0,0 +1,66 @@
|
||||
|
||||
function IsCJK_Unified_Ideographs(ACodePoint : Cardinal) : Boolean;inline;
|
||||
begin
|
||||
Result := (ACodePoint >= $4E00) and (ACodePoint <= $9FCC); // $9FFF
|
||||
end;
|
||||
|
||||
function IsCJK_Compatibility_Ideographs(ACodePoint : Cardinal) : Boolean;inline;
|
||||
begin
|
||||
Result := (ACodePoint >= $F900) and (ACodePoint <= $FAFF);
|
||||
end;
|
||||
|
||||
function IsCJK_Unified_Ideographs_Extension_A(ACodePoint : Cardinal) : Boolean;inline;
|
||||
begin
|
||||
Result := (ACodePoint >= $3400) and (ACodePoint <= $4DB5); // $4DBF
|
||||
end;
|
||||
|
||||
function IsCJK_Unified_Ideographs_Extension_B(ACodePoint : Cardinal) : Boolean;inline;
|
||||
begin
|
||||
Result := (ACodePoint >= $20000) and (ACodePoint <= $2A6D6); // $2A6DF
|
||||
end;
|
||||
|
||||
function IsCJK_Unified_Ideographs_Extension_C(ACodePoint : Cardinal) : Boolean;inline;
|
||||
begin
|
||||
Result := (ACodePoint >= $2A700) and (ACodePoint <= $2B734); // $2B73F
|
||||
end;
|
||||
|
||||
function IsCJK_Unified_Ideographs_Extension_D(ACodePoint : Cardinal) : Boolean;inline;
|
||||
begin
|
||||
Result := (ACodePoint >= $2B740) and (ACodePoint <= $2B81D); // $2B81F
|
||||
end;
|
||||
|
||||
function IsCJK_Compatibility_Ideographs_Supplement(ACodePoint : Cardinal) : Boolean;inline;
|
||||
begin
|
||||
Result := (ACodePoint >= $2F800) and (ACodePoint <= $2FA1F);
|
||||
end;
|
||||
|
||||
procedure DeriveWeight(const ACodePoint : Cardinal; AResult : PUCA_PropWeights);
|
||||
const
|
||||
BASE_1 = Word($FB40);
|
||||
BASE_2 = Word($FB80);
|
||||
BASE_3 = Word($FBC0);
|
||||
var
|
||||
base : Word;
|
||||
begin
|
||||
if IsCJK_Unified_Ideographs(ACodePoint) or IsCJK_Compatibility_Ideographs(ACodePoint) then
|
||||
base := BASE_1
|
||||
else if IsCJK_Unified_Ideographs_Extension_A(ACodePoint) or
|
||||
IsCJK_Unified_Ideographs_Extension_B(ACodePoint) or
|
||||
IsCJK_Unified_Ideographs_Extension_C(ACodePoint) or
|
||||
IsCJK_Unified_Ideographs_Extension_D(ACodePoint) or
|
||||
IsCJK_Compatibility_Ideographs_Supplement(ACodePoint)
|
||||
then begin
|
||||
base := BASE_2;
|
||||
end else begin
|
||||
base := BASE_3;
|
||||
end;
|
||||
|
||||
|
||||
AResult[0].Weights[0] := base + (ACodePoint shr 15);
|
||||
AResult[0].Weights[1] := $20;
|
||||
AResult[0].Weights[2] := $2;
|
||||
|
||||
AResult[1].Weights[0] := (ACodePoint and $7FFF) or $8000;
|
||||
AResult[1].Weights[1] := 0;
|
||||
AResult[1].Weights[2] := 0;
|
||||
end;
|
Loading…
Reference in New Issue
Block a user