mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-08-15 01:39:25 +02:00
lazutf8: some optimizations for UTF8LowerCaseMattias
git-svn-id: trunk@32756 -
This commit is contained in:
parent
493cba2f66
commit
1e24d21f65
@ -1113,40 +1113,56 @@ var
|
|||||||
OldCode: LongWord;
|
OldCode: LongWord;
|
||||||
NewCode: LongWord;
|
NewCode: LongWord;
|
||||||
NewCharLen: integer;
|
NewCharLen: integer;
|
||||||
|
Changed: Boolean;
|
||||||
|
p: PChar;
|
||||||
begin
|
begin
|
||||||
Result:=s;
|
Result:=s;
|
||||||
i:=1;
|
if Result='' then exit;
|
||||||
while i<=length(Result) do begin
|
Changed:=false;
|
||||||
case Result[i] of
|
p:=PChar(Result);
|
||||||
{ First ASCII chars }
|
repeat
|
||||||
'A'..'Z':
|
case p^ of
|
||||||
|
#0:
|
||||||
|
if p-PChar(Result)=length(Result) then
|
||||||
|
exit
|
||||||
|
else
|
||||||
|
inc(p);
|
||||||
|
'A'..'Z': // First ASCII chars
|
||||||
begin
|
begin
|
||||||
Result[i]:=chr(ord(Result[i])+32);
|
if not Changed then begin
|
||||||
inc(i);
|
i:=p-PChar(Result)+1;
|
||||||
end;
|
|
||||||
{ Now chars with multiple bytes }
|
|
||||||
#192..#240:
|
|
||||||
begin
|
|
||||||
OldCode:=UTF8CharacterToUnicode(@Result[i],CharLen);
|
|
||||||
NewCode:=UnicodeLowercase(OldCode);
|
|
||||||
if NewCode=OldCode then begin
|
|
||||||
inc(i,CharLen);
|
|
||||||
end else begin
|
|
||||||
UniqueString(Result);
|
UniqueString(Result);
|
||||||
NewCharLen:=UnicodeToUTF8(NewCode,@Result[i]);
|
Changed:=true;
|
||||||
if CharLen=NewCharLen then begin
|
p:=@Result[i];
|
||||||
inc(i,NewCharLen);
|
end;
|
||||||
end else begin
|
p^:=chr(ord(p^)+32);
|
||||||
|
inc(p);
|
||||||
|
end;
|
||||||
|
|
||||||
|
#192..#240: // Now chars with multiple bytes
|
||||||
|
begin
|
||||||
|
OldCode:=UTF8CharacterToUnicode(p,CharLen);
|
||||||
|
NewCode:=UnicodeLowercase(OldCode);
|
||||||
|
if NewCode<>OldCode then begin
|
||||||
|
if not Changed then begin
|
||||||
|
i:=p-PChar(Result)+1;
|
||||||
|
UniqueString(Result);
|
||||||
|
Changed:=true;
|
||||||
|
p:=@Result[i];
|
||||||
|
end;
|
||||||
|
NewCharLen:=UnicodeToUTF8(NewCode,p);
|
||||||
|
if CharLen<>NewCharLen then begin
|
||||||
// string size changed => use slower function
|
// string size changed => use slower function
|
||||||
Result:=UTF8LowercaseDynLength(s);
|
Result:=UTF8LowercaseDynLength(s);
|
||||||
exit;
|
exit;
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
inc(p,CharLen);
|
||||||
end;
|
end;
|
||||||
else
|
else
|
||||||
inc(i);
|
inc(p);
|
||||||
end;
|
end;
|
||||||
end;
|
until false;
|
||||||
end;
|
end;
|
||||||
{$endif}
|
{$endif}
|
||||||
|
|
||||||
@ -1242,9 +1258,9 @@ begin
|
|||||||
end;
|
end;
|
||||||
// $C4B1 turkish lowercase undotted ı
|
// $C4B1 turkish lowercase undotted ı
|
||||||
$C4B2..$C4B6: if OldChar mod 2 = 0 then NewChar := OldChar + 1;
|
$C4B2..$C4B6: if OldChar mod 2 = 0 then NewChar := OldChar + 1;
|
||||||
//$C4B7: ĸ => K ?
|
// $C4B7: ĸ => K ?
|
||||||
$C4B8..$C588: if OldChar mod 2 = 1 then NewChar := OldChar + 1;
|
$C4B8..$C588: if OldChar mod 2 = 1 then NewChar := OldChar + 1;
|
||||||
//$C589 ʼn => ?
|
// $C589 ʼn => ?
|
||||||
$C58A..$C5B7: if OldChar mod 2 = 0 then NewChar := OldChar + 1;
|
$C58A..$C5B7: if OldChar mod 2 = 0 then NewChar := OldChar + 1;
|
||||||
$C5B8: NewChar := $C3BF; // Ÿ
|
$C5B8: NewChar := $C3BF; // Ÿ
|
||||||
$C5B9..$C8B3: if OldChar mod 2 = 1 then NewChar := OldChar + 1;
|
$C5B9..$C8B3: if OldChar mod 2 = 1 then NewChar := OldChar + 1;
|
||||||
@ -1361,11 +1377,11 @@ begin
|
|||||||
CharProcessed := True;
|
CharProcessed := True;
|
||||||
end;
|
end;
|
||||||
$C4B2..$C4B6: if OldChar mod 2 = 1 then NewChar := OldChar - 1;
|
$C4B2..$C4B6: if OldChar mod 2 = 1 then NewChar := OldChar - 1;
|
||||||
//$C4B7: ĸ => K ?
|
// $C4B7: ĸ => K ?
|
||||||
$C4B8..$C588: if OldChar mod 2 = 0 then NewChar := OldChar - 1;
|
$C4B8..$C588: if OldChar mod 2 = 0 then NewChar := OldChar - 1;
|
||||||
//$C589 ʼn => ?
|
// $C589 ʼn => ?
|
||||||
$C58A..$C5B7: if OldChar mod 2 = 1 then NewChar := OldChar - 1;
|
$C58A..$C5B7: if OldChar mod 2 = 1 then NewChar := OldChar - 1;
|
||||||
//$C5B8: // Ÿ already uppercase
|
// $C5B8: // Ÿ already uppercase
|
||||||
$C5B9..$C8B3: if OldChar mod 2 = 0 then NewChar := OldChar - 1;
|
$C5B9..$C8B3: if OldChar mod 2 = 0 then NewChar := OldChar - 1;
|
||||||
//
|
//
|
||||||
$CEB1..$CEBF: NewChar := OldChar - $20; // Greek Characters
|
$CEB1..$CEBF: NewChar := OldChar - $20; // Greek Characters
|
||||||
|
Loading…
Reference in New Issue
Block a user