Starts the new UTF8UpperCase

git-svn-id: trunk@32728 -
2025-08-18 22:59:07 +02:00 · 2011-10-07 07:48:27 +00:00 · 2011-10-07 07:48:27 +00:00 · f1cbb0b645
commit f1cbb0b645
parent 2333a0cfbe
1 changed files with 96 additions and 3 deletions
--- a/components/lazutils/lazutf8.pas
+++ b/components/lazutils/lazutf8.pas
@ -63,8 +63,9 @@ procedure UTF8Insert(const source: String; var s: string; StartCharIndex: PtrInt
 function UnicodeLowercase(u: cardinal): cardinal;
 function UTF8LowerCase(const s: utf8string): utf8string;
-//function UTF8UpperCase(const s: String): String;
+function UTF8UpperCase(const AInStr: utf8string): utf8string;
-//function FindInvalidUTF8Character(p: PChar; Count: PtrInt;
+function UTF8UpperCase(const AInStr, ALocale: utf8string): utf8string;
 {function FindInvalidUTF8Character(p: PChar; Count: PtrInt;
 //                                  StopOnNonASCII: Boolean = false): PtrInt;
 //function ValidUTF8String(const s: String): String;
@ -74,7 +75,7 @@ function UTF8LowerCase(const s: utf8string): utf8string;
 //function UTF16Length(const s: widestring): PtrInt;
 //function UTF16Length(p: PWideChar; WordCount: PtrInt): PtrInt;
 //function UTF16CharacterToUnicode(p: PWideChar; out CharLen: integer): Cardinal;
-//function UnicodeToUTF16(u: cardinal): widestring;
+//function UnicodeToUTF16(u: cardinal): widestring;}
 //compare functions
@ -1137,6 +1138,98 @@ begin
  end;
 end;
 function UTF8UpperCase(const AInStr: utf8string): utf8string;
 begin
  Result := UTF8UpperCase(AInStr, '');
 end;
 {
  AInStr - The input string
  ALocale - The locale. Use '' for maximum speed if one desires to ignore the locale
 }
 function UTF8UpperCase(const AInStr, ALocale: utf8string): utf8string;
 var
  i, InCounter, OutCounter: PtrInt;
  CharLen: integer;
  CharProcessed: Boolean;
 //  NewCode: LongWord;
  NewCharLen: integer;
  // Language identification
  IsTurkish: Boolean;
 begin
  // Start with the same string, and progressively modify
  Result:=AInStr;
  // Language identification
  IsTurkish := ALocale = 'tu';
  InCounter:=1; // for AInStr
  OutCounter := 1; // for Result
  while InCounter<=length(AInStr) do
  begin
    case AInStr[InCounter] of
    { First ASCII chars }
    'a'..'z':
    begin
      // Special turkish handling
      // small dotted i to capital dotted i
      if IsTurkish and (AInStr[InCounter] = 'i') then
      begin
        Result[OutCounter]:=#$C4;
        Result[OutCounter+1]:=#$B0;
        inc(InCounter);
        inc(OutCounter,2);
      end
      else
      begin
        Result[OutCounter]:=chr(ord(AInStr[InCounter])-32);
        inc(InCounter);
        inc(OutCounter);
      end;
    end;
    { Now chars with multiple bytes }
    #192..#240:
    begin
      CharLen := UTF8CharacterLength(@AInStr[InCounter]);
      CharProcessed := False;
      NewCharLen := CharLen;
      if CharLen = 2 then
      begin
        // Process Latin characters
        // Special turkish handling
        // small undotted i to capital undotted i
        if IsTurkish and (AInStr[InCounter] = #$C4) and (AInStr[InCounter] = #$B1) then
        begin
          Result[OutCounter]:='I';
          inc(InCounter,2);
          inc(OutCounter);
        end
      end
      else if CharLen = 3 then
      begin
        //
      end;
      // Copy the character if the string was disaligned by previous changed
      // and no processing was done in this character
      if (InCounter <> OutCounter) and (not CharProcessed) then
      begin
        for i := 0 to CharLen-1 do
          Result[OutCounter+i]  :=AInStr[InCounter+i];
      end;
      inc(InCounter, CharLen);
      inc(OutCounter, NewCharLen);
    end;
    else
      inc(InCounter);
      inc(OutCounter);
    end; // case
  end; // while
 end;
 {------------------------------------------------------------------------------
  Name:    UTF8CompareStr
  Params: S1, S2 - UTF8 encoded strings