* Improved ucs4 to utf16 conversions: removed function call on every character and multiple memory allocations, made as much code as possible shared between WideString and UnicodeString versions.

git-svn-id: trunk@20270 -
This commit is contained in:
sergei 2012-02-06 19:55:56 +00:00
parent e94508d5eb
commit a1cb87bcf8

View File

@ -1895,35 +1895,27 @@ function WideStringToUCS4String(const s : WideString) : UCS4String;
end; end;
{ concatenates an utf-32 char to a unicodestring. S *must* be unique when entering. } { dest should point to previously allocated wide/unicodestring }
procedure ConcatUTF32ToUnicodeStr(const nc: UCS4Char; var S: UnicodeString; var index: SizeInt); procedure UCS4Decode(const s: UCS4String; dest: PWideChar);
var var
p : PUnicodeChar; i: sizeint;
nc: UCS4Char;
begin begin
{ if nc > $ffff, we need two places } for i:=0 to length(s)-2 do { -2 because s contains explicit terminating #0 }
if (index+ord(nc > $ffff)>length(s)) then
if (length(s) < 10*256) then
setlength(s,length(s)+10)
else
setlength(s,length(s)+length(s) shr 8);
{ we know that s is unique -> avoid uniquestring calls}
p:=@s[index];
if (nc<$ffff) then
begin begin
p^:=unicodechar(nc); nc:=s[i];
inc(index); if (nc<$ffff) then
end dest^:=widechar(nc)
else if (dword(nc)<=$10ffff) then else if (dword(nc)<=$10ffff) then
begin begin
p^:=unicodechar((nc - $10000) shr 10 + $d800); dest^:=widechar(nc shr 10 + $d7c0);
(p+1)^:=unicodechar((nc - $10000) and $3ff + $dc00); { subtracting $10000 doesn't change low 10 bits }
inc(index,2); dest[1]:=widechar(nc and $3ff + $dc00);
end inc(dest);
else end
{ invalid code point } else { invalid code point }
begin dest^:='?';
p^:='?'; inc(dest);
inc(index);
end; end;
end; end;
@ -1931,65 +1923,26 @@ end;
function UCS4StringToUnicodeString(const s : UCS4String) : UnicodeString; function UCS4StringToUnicodeString(const s : UCS4String) : UnicodeString;
var var
i : SizeInt; i : SizeInt;
resindex : SizeInt; reslen : SizeInt;
begin begin
{ skip terminating #0 } reslen:=0;
SetLength(result,length(s)-1); for i:=0 to length(s)-2 do { skip terminating #0 }
resindex:=1; Inc(reslen,1+ord((s[i]>$ffff) and (s[i]<=$10ffff)));
for i:=0 to high(s)-1 do SetLength(result,reslen);
ConcatUTF32ToUnicodeStr(s[i],result,resindex); UCS4Decode(s,pointer(result));
{ adjust result length (may be too big due to growing }
{ for surrogate pairs) }
setlength(result,resindex-1);
end; end;
{ concatenates an utf-32 char to a widestring. S *must* be unique when entering. }
procedure ConcatUTF32ToWideStr(const nc: UCS4Char; var S: WideString; var index: SizeInt);
var
p : PWideChar;
begin
{ if nc > $ffff, we need two places }
if (index+ord(nc > $ffff)>length(s)) then
if (length(s) < 10*256) then
setlength(s,length(s)+10)
else
setlength(s,length(s)+length(s) shr 8);
{ we know that s is unique -> avoid uniquestring calls}
p:=@s[index];
if (nc<$ffff) then
begin
p^:=widechar(nc);
inc(index);
end
else if (dword(nc)<=$10ffff) then
begin
p^:=widechar((nc - $10000) shr 10 + $d800);
(p+1)^:=widechar((nc - $10000) and $3ff + $dc00);
inc(index,2);
end
else
{ invalid code point }
begin
p^:='?';
inc(index);
end;
end;
function UCS4StringToWideString(const s : UCS4String) : WideString; function UCS4StringToWideString(const s : UCS4String) : WideString;
var var
i : SizeInt; i : SizeInt;
resindex : SizeInt; reslen : SizeInt;
begin begin
{ skip terminating #0 } reslen:=0;
SetLength(result,length(s)-1); for i:=0 to length(s)-2 do { skip terminating #0 }
resindex:=1; Inc(reslen,1+ord((s[i]>$ffff) and (s[i]<=$10ffff)));
for i:=0 to high(s)-1 do SetLength(result,reslen);
ConcatUTF32ToWideStr(s[i],result,resindex); UCS4Decode(s,pointer(result));
{ adjust result length (may be too big due to growing }
{ for surrogate pairs) }
setlength(result,resindex-1);
end; end;