LCL UTF-16 conversion bug: do not ignore code point values > $DFFF and <= $FFFF, unrolled by Brad Campbell

git-svn-id: trunk@16868 -
This commit is contained in:
tombo 2008-10-04 12:40:46 +00:00
parent b57df0675a
commit 8c3ef8f770
3 changed files with 19 additions and 11 deletions

View File

@ -20,6 +20,7 @@ Bob Wingard
Boguslaw Brandys Boguslaw Brandys
Boris Arko Boris Arko
Boris Glavin Boris Glavin
Brad Campbell
Chris Rorden Chris Rorden
Christian Iversen Christian Iversen
Christian Ulrich Christian Ulrich

View File

@ -3636,7 +3636,7 @@ function UTF16CharacterLength(p: PWideChar): integer;
// The endianess of the machine will be taken. // The endianess of the machine will be taken.
begin begin
if p<>nil then begin if p<>nil then begin
if ord(p[0])<$D800 then if (ord(p[0]) < $D800) or (ord(p[0]) > $DFFF) then
Result:=1 Result:=1
else else
Result:=2; Result:=2;
@ -3670,7 +3670,7 @@ var
begin begin
if p<>nil then begin if p<>nil then begin
w1:=ord(p[0]); w1:=ord(p[0]);
if w1<$D800 then begin if (w1 < $D800) or (w1 > $DFFF) then begin
// is 1 word character // is 1 word character
Result:=w1; Result:=w1;
CharLen:=1; CharLen:=1;
@ -3695,7 +3695,10 @@ end;
function UnicodeToUTF16(u: cardinal): widestring; function UnicodeToUTF16(u: cardinal): widestring;
begin begin
if u<$D800 then // u should be <= $10FFFF to fit into UTF-16
if u < $10000 then
// Note: codepoints $D800 - $DFFF are reserved
Result:=widechar(u) Result:=widechar(u)
else else
Result:=widechar($D800+((u - $10000) shr 10))+widechar($DC00+((u - $10000) and $3ff)); Result:=widechar($D800+((u - $10000) shr 10))+widechar($DC00+((u - $10000) and $3ff));
@ -3847,7 +3850,7 @@ begin
if ((B2 and %11000000) = %10000000) and ((B3 and %11000000) = %10000000) then if ((B2 and %11000000) = %10000000) and ((B3 and %11000000) = %10000000) then
begin begin
W := ((B1 and %00011111) shl 12) or ((B2 and %00111111) shl 6) or (B3 and %00111111); W := ((B1 and %00011111) shl 12) or ((B2 and %00111111) shl 6) or (B3 and %00111111);
if W < $D800 then // to single wide char UTF-16 char if (W < $D800) or (W > $DFFF) then // to single wide char UTF-16 char
begin begin
Dest[DestI] := WideChar(W); Dest[DestI] := WideChar(W);
Inc(DestI); Inc(DestI);
@ -3985,7 +3988,7 @@ begin
W1 := Word(Src[SrcI]); W1 := Word(Src[SrcI]);
Inc(SrcI); Inc(SrcI);
if W1 < $D800 then // single wide char UTF-16 char if (W1 < $D800) or (W1 > $DFFF) then // single wide char UTF-16 char
begin begin
if W1 < $0080 then // to single byte UTF-8 char if W1 < $0080 then // to single byte UTF-8 char
begin begin

View File

@ -20,8 +20,8 @@ type
end; end;
const const
Limits: Array [0..8] of Cardinal = Limits: Array [0..9] of Cardinal =
(0, $7F, $80, $7FF, $800, $10000, $10FFFF, $1FFFFF, $D7FF); (0, $7F, $80, $7FF, $800, $D7FF, $E000, $FFFF, $10000, $10FFFF);
implementation implementation
@ -34,9 +34,9 @@ var
SUTF8, S1UTF8: UTF8String; SUTF8, S1UTF8: UTF8String;
SUTF16, S1UTF16, R: WideString; SUTF16, S1UTF16, R: WideString;
begin begin
for U := 0 to $1FFFFF do // test each unicode char for U := 0 to $10FFFF do // test each unicode char
begin begin
if (U >= $D800) and (U <= $FFFF) then Continue; if (U >= $D800) and (U <= $DFFF) then Continue;
SUTF8 := UnicodeToUTF8(U); SUTF8 := UnicodeToUTF8(U);
SUTF16 := UnicodeToUTF16(U); SUTF16 := UnicodeToUTF16(U);
@ -71,9 +71,9 @@ var
SUTF8, S1UTF8, R: UTF8String; SUTF8, S1UTF8, R: UTF8String;
SUTF16, S1UTF16: WideString; SUTF16, S1UTF16: WideString;
begin begin
for U := 0 to $1FFFFF do for U := 0 to $10FFFF do
begin begin
if (U >= $D800) and (U <= $FFFF) then Continue; if (U >= $D800) and (U <= $DFFF) then Continue;
SUTF8 := UnicodeToUTF8(U); SUTF8 := UnicodeToUTF8(U);
SUTF16 := UnicodeToUTF16(U); SUTF16 := UnicodeToUTF16(U);
@ -107,6 +107,8 @@ var
begin begin
AssertEquals(0, UTF16CharacterToUnicode(#0, L)); AssertEquals(0, UTF16CharacterToUnicode(#0, L));
AssertEquals($D7FF, UTF16CharacterToUnicode(#$D7FF, L)); AssertEquals($D7FF, UTF16CharacterToUnicode(#$D7FF, L));
AssertEquals($E000, UTF16CharacterToUnicode(#$E000, L));
AssertEquals($FFFF, UTF16CharacterToUnicode(#$FFFF, L));
AssertEquals($10000, UTF16CharacterToUnicode(#$D800#$DC00, L)); AssertEquals($10000, UTF16CharacterToUnicode(#$D800#$DC00, L));
AssertEquals($10001, UTF16CharacterToUnicode(#$D800#$DC01, L)); AssertEquals($10001, UTF16CharacterToUnicode(#$D800#$DC01, L));
AssertEquals($10FFFD, UTF16CharacterToUnicode(#$DBFF#$DFFD, L)); AssertEquals($10FFFD, UTF16CharacterToUnicode(#$DBFF#$DFFD, L));
@ -116,6 +118,8 @@ procedure TTestUnicode.TestUnicodeToUTF16;
begin begin
AssertEquals(#0, UnicodeToUTF16(0)); AssertEquals(#0, UnicodeToUTF16(0));
AssertEquals(#$D7FF, UnicodeToUTF16($D7FF)); AssertEquals(#$D7FF, UnicodeToUTF16($D7FF));
AssertEquals(#$E000, UnicodeToUTF16($E000));
AssertEquals(#$FFFF, UnicodeToUTF16($FFFF));
AssertEquals(#$D800#$DC00, UnicodeToUTF16($10000)); AssertEquals(#$D800#$DC00, UnicodeToUTF16($10000));
AssertEquals(#$D800#$DC01, UnicodeToUTF16($10001)); AssertEquals(#$D800#$DC01, UnicodeToUTF16($10001));
AssertEquals(#$DBFF#$DFFD, UnicodeToUTF16($10FFFD)); AssertEquals(#$DBFF#$DFFD, UnicodeToUTF16($10FFFD));