Starts the new UTF8UpperCase

git-svn-id: trunk@32728 -
This commit is contained in:
sekelsenmat 2011-10-07 07:48:27 +00:00
parent 2333a0cfbe
commit f1cbb0b645

View File

@ -63,8 +63,9 @@ procedure UTF8Insert(const source: String; var s: string; StartCharIndex: PtrInt
function UnicodeLowercase(u: cardinal): cardinal;
function UTF8LowerCase(const s: utf8string): utf8string;
//function UTF8UpperCase(const s: String): String;
//function FindInvalidUTF8Character(p: PChar; Count: PtrInt;
function UTF8UpperCase(const AInStr: utf8string): utf8string;
function UTF8UpperCase(const AInStr, ALocale: utf8string): utf8string;
{function FindInvalidUTF8Character(p: PChar; Count: PtrInt;
// StopOnNonASCII: Boolean = false): PtrInt;
//function ValidUTF8String(const s: String): String;
@ -74,7 +75,7 @@ function UTF8LowerCase(const s: utf8string): utf8string;
//function UTF16Length(const s: widestring): PtrInt;
//function UTF16Length(p: PWideChar; WordCount: PtrInt): PtrInt;
//function UTF16CharacterToUnicode(p: PWideChar; out CharLen: integer): Cardinal;
//function UnicodeToUTF16(u: cardinal): widestring;
//function UnicodeToUTF16(u: cardinal): widestring;}
//compare functions
@ -1137,6 +1138,98 @@ begin
end;
end;
function UTF8UpperCase(const AInStr: utf8string): utf8string;
begin
Result := UTF8UpperCase(AInStr, '');
end;
{
AInStr - The input string
ALocale - The locale. Use '' for maximum speed if one desires to ignore the locale
}
function UTF8UpperCase(const AInStr, ALocale: utf8string): utf8string;
var
i, InCounter, OutCounter: PtrInt;
CharLen: integer;
CharProcessed: Boolean;
// NewCode: LongWord;
NewCharLen: integer;
// Language identification
IsTurkish: Boolean;
begin
// Start with the same string, and progressively modify
Result:=AInStr;
// Language identification
IsTurkish := ALocale = 'tu';
InCounter:=1; // for AInStr
OutCounter := 1; // for Result
while InCounter<=length(AInStr) do
begin
case AInStr[InCounter] of
{ First ASCII chars }
'a'..'z':
begin
// Special turkish handling
// small dotted i to capital dotted i
if IsTurkish and (AInStr[InCounter] = 'i') then
begin
Result[OutCounter]:=#$C4;
Result[OutCounter+1]:=#$B0;
inc(InCounter);
inc(OutCounter,2);
end
else
begin
Result[OutCounter]:=chr(ord(AInStr[InCounter])-32);
inc(InCounter);
inc(OutCounter);
end;
end;
{ Now chars with multiple bytes }
#192..#240:
begin
CharLen := UTF8CharacterLength(@AInStr[InCounter]);
CharProcessed := False;
NewCharLen := CharLen;
if CharLen = 2 then
begin
// Process Latin characters
// Special turkish handling
// small undotted i to capital undotted i
if IsTurkish and (AInStr[InCounter] = #$C4) and (AInStr[InCounter] = #$B1) then
begin
Result[OutCounter]:='I';
inc(InCounter,2);
inc(OutCounter);
end
end
else if CharLen = 3 then
begin
//
end;
// Copy the character if the string was disaligned by previous changed
// and no processing was done in this character
if (InCounter <> OutCounter) and (not CharProcessed) then
begin
for i := 0 to CharLen-1 do
Result[OutCounter+i] :=AInStr[InCounter+i];
end;
inc(InCounter, CharLen);
inc(OutCounter, NewCharLen);
end;
else
inc(InCounter);
inc(OutCounter);
end; // case
end; // while
end;
{------------------------------------------------------------------------------
Name: UTF8CompareStr
Params: S1, S2 - UTF8 encoded strings