LCL UTF-16 conversion bug: do not ignore code point values > $DFFF and <= $FFFF, unrolled by Brad Campbell

git-svn-id: trunk@16868 -
This commit is contained in:
tombo 2008-10-04 12:40:46 +00:00
parent b57df0675a
commit 8c3ef8f770
3 changed files with 19 additions and 11 deletions

View File

@ -20,6 +20,7 @@ Bob Wingard
Boguslaw Brandys
Boris Arko
Boris Glavin
Brad Campbell
Chris Rorden
Christian Iversen
Christian Ulrich

View File

@ -3636,7 +3636,7 @@ function UTF16CharacterLength(p: PWideChar): integer;
// The endianess of the machine will be taken.
begin
if p<>nil then begin
if ord(p[0])<$D800 then
if (ord(p[0]) < $D800) or (ord(p[0]) > $DFFF) then
Result:=1
else
Result:=2;
@ -3670,7 +3670,7 @@ var
begin
if p<>nil then begin
w1:=ord(p[0]);
if w1<$D800 then begin
if (w1 < $D800) or (w1 > $DFFF) then begin
// is 1 word character
Result:=w1;
CharLen:=1;
@ -3695,7 +3695,10 @@ end;
function UnicodeToUTF16(u: cardinal): widestring;
begin
if u<$D800 then
// u should be <= $10FFFF to fit into UTF-16
if u < $10000 then
// Note: codepoints $D800 - $DFFF are reserved
Result:=widechar(u)
else
Result:=widechar($D800+((u - $10000) shr 10))+widechar($DC00+((u - $10000) and $3ff));
@ -3847,7 +3850,7 @@ begin
if ((B2 and %11000000) = %10000000) and ((B3 and %11000000) = %10000000) then
begin
W := ((B1 and %00011111) shl 12) or ((B2 and %00111111) shl 6) or (B3 and %00111111);
if W < $D800 then // to single wide char UTF-16 char
if (W < $D800) or (W > $DFFF) then // to single wide char UTF-16 char
begin
Dest[DestI] := WideChar(W);
Inc(DestI);
@ -3985,7 +3988,7 @@ begin
W1 := Word(Src[SrcI]);
Inc(SrcI);
if W1 < $D800 then // single wide char UTF-16 char
if (W1 < $D800) or (W1 > $DFFF) then // single wide char UTF-16 char
begin
if W1 < $0080 then // to single byte UTF-8 char
begin

View File

@ -20,8 +20,8 @@ type
end;
const
Limits: Array [0..8] of Cardinal =
(0, $7F, $80, $7FF, $800, $10000, $10FFFF, $1FFFFF, $D7FF);
Limits: Array [0..9] of Cardinal =
(0, $7F, $80, $7FF, $800, $D7FF, $E000, $FFFF, $10000, $10FFFF);
implementation
@ -34,9 +34,9 @@ var
SUTF8, S1UTF8: UTF8String;
SUTF16, S1UTF16, R: WideString;
begin
for U := 0 to $1FFFFF do // test each unicode char
for U := 0 to $10FFFF do // test each unicode char
begin
if (U >= $D800) and (U <= $FFFF) then Continue;
if (U >= $D800) and (U <= $DFFF) then Continue;
SUTF8 := UnicodeToUTF8(U);
SUTF16 := UnicodeToUTF16(U);
@ -71,9 +71,9 @@ var
SUTF8, S1UTF8, R: UTF8String;
SUTF16, S1UTF16: WideString;
begin
for U := 0 to $1FFFFF do
for U := 0 to $10FFFF do
begin
if (U >= $D800) and (U <= $FFFF) then Continue;
if (U >= $D800) and (U <= $DFFF) then Continue;
SUTF8 := UnicodeToUTF8(U);
SUTF16 := UnicodeToUTF16(U);
@ -107,6 +107,8 @@ var
begin
AssertEquals(0, UTF16CharacterToUnicode(#0, L));
AssertEquals($D7FF, UTF16CharacterToUnicode(#$D7FF, L));
AssertEquals($E000, UTF16CharacterToUnicode(#$E000, L));
AssertEquals($FFFF, UTF16CharacterToUnicode(#$FFFF, L));
AssertEquals($10000, UTF16CharacterToUnicode(#$D800#$DC00, L));
AssertEquals($10001, UTF16CharacterToUnicode(#$D800#$DC01, L));
AssertEquals($10FFFD, UTF16CharacterToUnicode(#$DBFF#$DFFD, L));
@ -116,6 +118,8 @@ procedure TTestUnicode.TestUnicodeToUTF16;
begin
AssertEquals(#0, UnicodeToUTF16(0));
AssertEquals(#$D7FF, UnicodeToUTF16($D7FF));
AssertEquals(#$E000, UnicodeToUTF16($E000));
AssertEquals(#$FFFF, UnicodeToUTF16($FFFF));
AssertEquals(#$D800#$DC00, UnicodeToUTF16($10000));
AssertEquals(#$D800#$DC01, UnicodeToUTF16($10001));
AssertEquals(#$DBFF#$DFFD, UnicodeToUTF16($10FFFD));