mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-04-06 21:18:01 +02:00
LCL UTF-16 conversion bug: do not ignore code point values > $DFFF and <= $FFFF, unrolled by Brad Campbell
git-svn-id: trunk@16868 -
This commit is contained in:
parent
b57df0675a
commit
8c3ef8f770
@ -20,6 +20,7 @@ Bob Wingard
|
||||
Boguslaw Brandys
|
||||
Boris Arko
|
||||
Boris Glavin
|
||||
Brad Campbell
|
||||
Chris Rorden
|
||||
Christian Iversen
|
||||
Christian Ulrich
|
||||
|
@ -3636,7 +3636,7 @@ function UTF16CharacterLength(p: PWideChar): integer;
|
||||
// The endianess of the machine will be taken.
|
||||
begin
|
||||
if p<>nil then begin
|
||||
if ord(p[0])<$D800 then
|
||||
if (ord(p[0]) < $D800) or (ord(p[0]) > $DFFF) then
|
||||
Result:=1
|
||||
else
|
||||
Result:=2;
|
||||
@ -3670,7 +3670,7 @@ var
|
||||
begin
|
||||
if p<>nil then begin
|
||||
w1:=ord(p[0]);
|
||||
if w1<$D800 then begin
|
||||
if (w1 < $D800) or (w1 > $DFFF) then begin
|
||||
// is 1 word character
|
||||
Result:=w1;
|
||||
CharLen:=1;
|
||||
@ -3695,7 +3695,10 @@ end;
|
||||
|
||||
function UnicodeToUTF16(u: cardinal): widestring;
|
||||
begin
|
||||
if u<$D800 then
|
||||
// u should be <= $10FFFF to fit into UTF-16
|
||||
|
||||
if u < $10000 then
|
||||
// Note: codepoints $D800 - $DFFF are reserved
|
||||
Result:=widechar(u)
|
||||
else
|
||||
Result:=widechar($D800+((u - $10000) shr 10))+widechar($DC00+((u - $10000) and $3ff));
|
||||
@ -3847,7 +3850,7 @@ begin
|
||||
if ((B2 and %11000000) = %10000000) and ((B3 and %11000000) = %10000000) then
|
||||
begin
|
||||
W := ((B1 and %00011111) shl 12) or ((B2 and %00111111) shl 6) or (B3 and %00111111);
|
||||
if W < $D800 then // to single wide char UTF-16 char
|
||||
if (W < $D800) or (W > $DFFF) then // to single wide char UTF-16 char
|
||||
begin
|
||||
Dest[DestI] := WideChar(W);
|
||||
Inc(DestI);
|
||||
@ -3985,7 +3988,7 @@ begin
|
||||
W1 := Word(Src[SrcI]);
|
||||
Inc(SrcI);
|
||||
|
||||
if W1 < $D800 then // single wide char UTF-16 char
|
||||
if (W1 < $D800) or (W1 > $DFFF) then // single wide char UTF-16 char
|
||||
begin
|
||||
if W1 < $0080 then // to single byte UTF-8 char
|
||||
begin
|
||||
|
@ -20,8 +20,8 @@ type
|
||||
end;
|
||||
|
||||
const
|
||||
Limits: Array [0..8] of Cardinal =
|
||||
(0, $7F, $80, $7FF, $800, $10000, $10FFFF, $1FFFFF, $D7FF);
|
||||
Limits: Array [0..9] of Cardinal =
|
||||
(0, $7F, $80, $7FF, $800, $D7FF, $E000, $FFFF, $10000, $10FFFF);
|
||||
|
||||
implementation
|
||||
|
||||
@ -34,9 +34,9 @@ var
|
||||
SUTF8, S1UTF8: UTF8String;
|
||||
SUTF16, S1UTF16, R: WideString;
|
||||
begin
|
||||
for U := 0 to $1FFFFF do // test each unicode char
|
||||
for U := 0 to $10FFFF do // test each unicode char
|
||||
begin
|
||||
if (U >= $D800) and (U <= $FFFF) then Continue;
|
||||
if (U >= $D800) and (U <= $DFFF) then Continue;
|
||||
|
||||
SUTF8 := UnicodeToUTF8(U);
|
||||
SUTF16 := UnicodeToUTF16(U);
|
||||
@ -71,9 +71,9 @@ var
|
||||
SUTF8, S1UTF8, R: UTF8String;
|
||||
SUTF16, S1UTF16: WideString;
|
||||
begin
|
||||
for U := 0 to $1FFFFF do
|
||||
for U := 0 to $10FFFF do
|
||||
begin
|
||||
if (U >= $D800) and (U <= $FFFF) then Continue;
|
||||
if (U >= $D800) and (U <= $DFFF) then Continue;
|
||||
|
||||
SUTF8 := UnicodeToUTF8(U);
|
||||
SUTF16 := UnicodeToUTF16(U);
|
||||
@ -107,6 +107,8 @@ var
|
||||
begin
|
||||
AssertEquals(0, UTF16CharacterToUnicode(#0, L));
|
||||
AssertEquals($D7FF, UTF16CharacterToUnicode(#$D7FF, L));
|
||||
AssertEquals($E000, UTF16CharacterToUnicode(#$E000, L));
|
||||
AssertEquals($FFFF, UTF16CharacterToUnicode(#$FFFF, L));
|
||||
AssertEquals($10000, UTF16CharacterToUnicode(#$D800#$DC00, L));
|
||||
AssertEquals($10001, UTF16CharacterToUnicode(#$D800#$DC01, L));
|
||||
AssertEquals($10FFFD, UTF16CharacterToUnicode(#$DBFF#$DFFD, L));
|
||||
@ -116,6 +118,8 @@ procedure TTestUnicode.TestUnicodeToUTF16;
|
||||
begin
|
||||
AssertEquals(#0, UnicodeToUTF16(0));
|
||||
AssertEquals(#$D7FF, UnicodeToUTF16($D7FF));
|
||||
AssertEquals(#$E000, UnicodeToUTF16($E000));
|
||||
AssertEquals(#$FFFF, UnicodeToUTF16($FFFF));
|
||||
AssertEquals(#$D800#$DC00, UnicodeToUTF16($10000));
|
||||
AssertEquals(#$D800#$DC01, UnicodeToUTF16($10001));
|
||||
AssertEquals(#$DBFF#$DFFD, UnicodeToUTF16($10FFFD));
|
||||
|
Loading…
Reference in New Issue
Block a user