lazutf8: some optimizations for UTF8LowerCaseMattias

git-svn-id: trunk@32756 -
2025-08-15 01:39:25 +02:00 · 2011-10-07 14:49:27 +00:00 · 2011-10-07 14:49:27 +00:00 · 1e24d21f65
commit 1e24d21f65
parent 493cba2f66
1 changed files with 43 additions and 27 deletions
--- a/components/lazutils/lazutf8.pas
+++ b/components/lazutils/lazutf8.pas
@ -1113,40 +1113,56 @@ var
  OldCode: LongWord;
  NewCode: LongWord;
  NewCharLen: integer;
  Changed: Boolean;
  p: PChar;
 begin
  Result:=s;
-  i:=1;
+  if Result='' then exit;
-  while i<=length(Result) do begin
+  Changed:=false;
-    case Result[i] of
+  p:=PChar(Result);
-    { First ASCII chars }
+  repeat
-    'A'..'Z':
+    case p^ of
    #0:
      if p-PChar(Result)=length(Result) then
        exit
      else
        inc(p);
    'A'..'Z': // First ASCII chars
      begin
-        Result[i]:=chr(ord(Result[i])+32);
+        if not Changed then begin
-        inc(i);
+          i:=p-PChar(Result)+1;
      end;
    { Now chars with multiple bytes }
    #192..#240:
      begin
        OldCode:=UTF8CharacterToUnicode(@Result[i],CharLen);
        NewCode:=UnicodeLowercase(OldCode);
        if NewCode=OldCode then begin
          inc(i,CharLen);
        end else begin
          UniqueString(Result);
-          NewCharLen:=UnicodeToUTF8(NewCode,@Result[i]);
+          Changed:=true;
-          if CharLen=NewCharLen then begin
+          p:=@Result[i];
-            inc(i,NewCharLen);
+        end;
-          end else begin
+        p^:=chr(ord(p^)+32);
        inc(p);
      end;
    #192..#240: // Now chars with multiple bytes
      begin
        OldCode:=UTF8CharacterToUnicode(p,CharLen);
        NewCode:=UnicodeLowercase(OldCode);
        if NewCode<>OldCode then begin
          if not Changed then begin
            i:=p-PChar(Result)+1;
            UniqueString(Result);
            Changed:=true;
            p:=@Result[i];
          end;
          NewCharLen:=UnicodeToUTF8(NewCode,p);
          if CharLen<>NewCharLen then begin
            // string size changed => use slower function
            Result:=UTF8LowercaseDynLength(s);
            exit;
          end;
        end;
        inc(p,CharLen);
      end;
    else
-      inc(i);
+      inc(p);
    end;
-  end;
+  until false;
 end;
 {$endif}
@ -1242,9 +1258,9 @@ begin
        end;
        // $C4B1 turkish lowercase undotted ı
        $C4B2..$C4B6: if OldChar mod 2 = 0 then NewChar := OldChar + 1;
-        //$C4B7: ĸ => K ?
+        // $C4B7: ĸ => K ?
        $C4B8..$C588: if OldChar mod 2 = 1 then NewChar := OldChar + 1;
-        //$C589 ŉ => ?
+        // $C589 ŉ => ?
        $C58A..$C5B7: if OldChar mod 2 = 0 then NewChar := OldChar + 1;
        $C5B8:        NewChar := $C3BF; // Ÿ
        $C5B9..$C8B3: if OldChar mod 2 = 1 then NewChar := OldChar + 1;
@ -1361,11 +1377,11 @@ begin
          CharProcessed := True;
        end;
        $C4B2..$C4B6: if OldChar mod 2 = 1 then NewChar := OldChar - 1;
-        //$C4B7: ĸ => K ?
+        // $C4B7: ĸ => K ?
        $C4B8..$C588: if OldChar mod 2 = 0 then NewChar := OldChar - 1;
-        //$C589 ŉ => ?
+        // $C589 ŉ => ?
        $C58A..$C5B7: if OldChar mod 2 = 1 then NewChar := OldChar - 1;
-        //$C5B8: // Ÿ already uppercase
+        // $C5B8: // Ÿ already uppercase
        $C5B9..$C8B3: if OldChar mod 2 = 0 then NewChar := OldChar - 1;
        //
        $CEB1..$CEBF: NewChar := OldChar - $20; // Greek Characters