lazutf8: some optimizations for UTF8LowerCaseMattias

git-svn-id: trunk@32756 -
This commit is contained in:
mattias 2011-10-07 14:49:27 +00:00
parent 493cba2f66
commit 1e24d21f65

View File

@ -1113,40 +1113,56 @@ var
OldCode: LongWord; OldCode: LongWord;
NewCode: LongWord; NewCode: LongWord;
NewCharLen: integer; NewCharLen: integer;
Changed: Boolean;
p: PChar;
begin begin
Result:=s; Result:=s;
i:=1; if Result='' then exit;
while i<=length(Result) do begin Changed:=false;
case Result[i] of p:=PChar(Result);
{ First ASCII chars } repeat
'A'..'Z': case p^ of
#0:
if p-PChar(Result)=length(Result) then
exit
else
inc(p);
'A'..'Z': // First ASCII chars
begin begin
Result[i]:=chr(ord(Result[i])+32); if not Changed then begin
inc(i); i:=p-PChar(Result)+1;
end;
{ Now chars with multiple bytes }
#192..#240:
begin
OldCode:=UTF8CharacterToUnicode(@Result[i],CharLen);
NewCode:=UnicodeLowercase(OldCode);
if NewCode=OldCode then begin
inc(i,CharLen);
end else begin
UniqueString(Result); UniqueString(Result);
NewCharLen:=UnicodeToUTF8(NewCode,@Result[i]); Changed:=true;
if CharLen=NewCharLen then begin p:=@Result[i];
inc(i,NewCharLen); end;
end else begin p^:=chr(ord(p^)+32);
inc(p);
end;
#192..#240: // Now chars with multiple bytes
begin
OldCode:=UTF8CharacterToUnicode(p,CharLen);
NewCode:=UnicodeLowercase(OldCode);
if NewCode<>OldCode then begin
if not Changed then begin
i:=p-PChar(Result)+1;
UniqueString(Result);
Changed:=true;
p:=@Result[i];
end;
NewCharLen:=UnicodeToUTF8(NewCode,p);
if CharLen<>NewCharLen then begin
// string size changed => use slower function // string size changed => use slower function
Result:=UTF8LowercaseDynLength(s); Result:=UTF8LowercaseDynLength(s);
exit; exit;
end; end;
end; end;
inc(p,CharLen);
end; end;
else else
inc(i); inc(p);
end; end;
end; until false;
end; end;
{$endif} {$endif}
@ -1242,9 +1258,9 @@ begin
end; end;
// $C4B1 turkish lowercase undotted ı // $C4B1 turkish lowercase undotted ı
$C4B2..$C4B6: if OldChar mod 2 = 0 then NewChar := OldChar + 1; $C4B2..$C4B6: if OldChar mod 2 = 0 then NewChar := OldChar + 1;
//$C4B7: ĸ => K ? // $C4B7: ĸ => K ?
$C4B8..$C588: if OldChar mod 2 = 1 then NewChar := OldChar + 1; $C4B8..$C588: if OldChar mod 2 = 1 then NewChar := OldChar + 1;
//$C589 ʼn => ? // $C589 ʼn => ?
$C58A..$C5B7: if OldChar mod 2 = 0 then NewChar := OldChar + 1; $C58A..$C5B7: if OldChar mod 2 = 0 then NewChar := OldChar + 1;
$C5B8: NewChar := $C3BF; // Ÿ $C5B8: NewChar := $C3BF; // Ÿ
$C5B9..$C8B3: if OldChar mod 2 = 1 then NewChar := OldChar + 1; $C5B9..$C8B3: if OldChar mod 2 = 1 then NewChar := OldChar + 1;
@ -1361,11 +1377,11 @@ begin
CharProcessed := True; CharProcessed := True;
end; end;
$C4B2..$C4B6: if OldChar mod 2 = 1 then NewChar := OldChar - 1; $C4B2..$C4B6: if OldChar mod 2 = 1 then NewChar := OldChar - 1;
//$C4B7: ĸ => K ? // $C4B7: ĸ => K ?
$C4B8..$C588: if OldChar mod 2 = 0 then NewChar := OldChar - 1; $C4B8..$C588: if OldChar mod 2 = 0 then NewChar := OldChar - 1;
//$C589 ʼn => ? // $C589 ʼn => ?
$C58A..$C5B7: if OldChar mod 2 = 1 then NewChar := OldChar - 1; $C58A..$C5B7: if OldChar mod 2 = 1 then NewChar := OldChar - 1;
//$C5B8: // Ÿ already uppercase // $C5B8: // Ÿ already uppercase
$C5B9..$C8B3: if OldChar mod 2 = 0 then NewChar := OldChar - 1; $C5B9..$C8B3: if OldChar mod 2 = 0 then NewChar := OldChar - 1;
// //
$CEB1..$CEBF: NewChar := OldChar - $20; // Greek Characters $CEB1..$CEBF: NewChar := OldChar - $20; // Greek Characters