mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-08-24 19:39:17 +02:00
LCL UTF-16 conversion bug: do not ignore code point values > $DFFF and <= $FFFF, unrolled by Brad Campbell
git-svn-id: trunk@16868 -
This commit is contained in:
parent
b57df0675a
commit
8c3ef8f770
@ -20,6 +20,7 @@ Bob Wingard
|
|||||||
Boguslaw Brandys
|
Boguslaw Brandys
|
||||||
Boris Arko
|
Boris Arko
|
||||||
Boris Glavin
|
Boris Glavin
|
||||||
|
Brad Campbell
|
||||||
Chris Rorden
|
Chris Rorden
|
||||||
Christian Iversen
|
Christian Iversen
|
||||||
Christian Ulrich
|
Christian Ulrich
|
||||||
|
@ -3636,7 +3636,7 @@ function UTF16CharacterLength(p: PWideChar): integer;
|
|||||||
// The endianess of the machine will be taken.
|
// The endianess of the machine will be taken.
|
||||||
begin
|
begin
|
||||||
if p<>nil then begin
|
if p<>nil then begin
|
||||||
if ord(p[0])<$D800 then
|
if (ord(p[0]) < $D800) or (ord(p[0]) > $DFFF) then
|
||||||
Result:=1
|
Result:=1
|
||||||
else
|
else
|
||||||
Result:=2;
|
Result:=2;
|
||||||
@ -3670,7 +3670,7 @@ var
|
|||||||
begin
|
begin
|
||||||
if p<>nil then begin
|
if p<>nil then begin
|
||||||
w1:=ord(p[0]);
|
w1:=ord(p[0]);
|
||||||
if w1<$D800 then begin
|
if (w1 < $D800) or (w1 > $DFFF) then begin
|
||||||
// is 1 word character
|
// is 1 word character
|
||||||
Result:=w1;
|
Result:=w1;
|
||||||
CharLen:=1;
|
CharLen:=1;
|
||||||
@ -3695,7 +3695,10 @@ end;
|
|||||||
|
|
||||||
function UnicodeToUTF16(u: cardinal): widestring;
|
function UnicodeToUTF16(u: cardinal): widestring;
|
||||||
begin
|
begin
|
||||||
if u<$D800 then
|
// u should be <= $10FFFF to fit into UTF-16
|
||||||
|
|
||||||
|
if u < $10000 then
|
||||||
|
// Note: codepoints $D800 - $DFFF are reserved
|
||||||
Result:=widechar(u)
|
Result:=widechar(u)
|
||||||
else
|
else
|
||||||
Result:=widechar($D800+((u - $10000) shr 10))+widechar($DC00+((u - $10000) and $3ff));
|
Result:=widechar($D800+((u - $10000) shr 10))+widechar($DC00+((u - $10000) and $3ff));
|
||||||
@ -3847,7 +3850,7 @@ begin
|
|||||||
if ((B2 and %11000000) = %10000000) and ((B3 and %11000000) = %10000000) then
|
if ((B2 and %11000000) = %10000000) and ((B3 and %11000000) = %10000000) then
|
||||||
begin
|
begin
|
||||||
W := ((B1 and %00011111) shl 12) or ((B2 and %00111111) shl 6) or (B3 and %00111111);
|
W := ((B1 and %00011111) shl 12) or ((B2 and %00111111) shl 6) or (B3 and %00111111);
|
||||||
if W < $D800 then // to single wide char UTF-16 char
|
if (W < $D800) or (W > $DFFF) then // to single wide char UTF-16 char
|
||||||
begin
|
begin
|
||||||
Dest[DestI] := WideChar(W);
|
Dest[DestI] := WideChar(W);
|
||||||
Inc(DestI);
|
Inc(DestI);
|
||||||
@ -3985,7 +3988,7 @@ begin
|
|||||||
W1 := Word(Src[SrcI]);
|
W1 := Word(Src[SrcI]);
|
||||||
Inc(SrcI);
|
Inc(SrcI);
|
||||||
|
|
||||||
if W1 < $D800 then // single wide char UTF-16 char
|
if (W1 < $D800) or (W1 > $DFFF) then // single wide char UTF-16 char
|
||||||
begin
|
begin
|
||||||
if W1 < $0080 then // to single byte UTF-8 char
|
if W1 < $0080 then // to single byte UTF-8 char
|
||||||
begin
|
begin
|
||||||
|
@ -20,8 +20,8 @@ type
|
|||||||
end;
|
end;
|
||||||
|
|
||||||
const
|
const
|
||||||
Limits: Array [0..8] of Cardinal =
|
Limits: Array [0..9] of Cardinal =
|
||||||
(0, $7F, $80, $7FF, $800, $10000, $10FFFF, $1FFFFF, $D7FF);
|
(0, $7F, $80, $7FF, $800, $D7FF, $E000, $FFFF, $10000, $10FFFF);
|
||||||
|
|
||||||
implementation
|
implementation
|
||||||
|
|
||||||
@ -34,9 +34,9 @@ var
|
|||||||
SUTF8, S1UTF8: UTF8String;
|
SUTF8, S1UTF8: UTF8String;
|
||||||
SUTF16, S1UTF16, R: WideString;
|
SUTF16, S1UTF16, R: WideString;
|
||||||
begin
|
begin
|
||||||
for U := 0 to $1FFFFF do // test each unicode char
|
for U := 0 to $10FFFF do // test each unicode char
|
||||||
begin
|
begin
|
||||||
if (U >= $D800) and (U <= $FFFF) then Continue;
|
if (U >= $D800) and (U <= $DFFF) then Continue;
|
||||||
|
|
||||||
SUTF8 := UnicodeToUTF8(U);
|
SUTF8 := UnicodeToUTF8(U);
|
||||||
SUTF16 := UnicodeToUTF16(U);
|
SUTF16 := UnicodeToUTF16(U);
|
||||||
@ -71,9 +71,9 @@ var
|
|||||||
SUTF8, S1UTF8, R: UTF8String;
|
SUTF8, S1UTF8, R: UTF8String;
|
||||||
SUTF16, S1UTF16: WideString;
|
SUTF16, S1UTF16: WideString;
|
||||||
begin
|
begin
|
||||||
for U := 0 to $1FFFFF do
|
for U := 0 to $10FFFF do
|
||||||
begin
|
begin
|
||||||
if (U >= $D800) and (U <= $FFFF) then Continue;
|
if (U >= $D800) and (U <= $DFFF) then Continue;
|
||||||
|
|
||||||
SUTF8 := UnicodeToUTF8(U);
|
SUTF8 := UnicodeToUTF8(U);
|
||||||
SUTF16 := UnicodeToUTF16(U);
|
SUTF16 := UnicodeToUTF16(U);
|
||||||
@ -107,6 +107,8 @@ var
|
|||||||
begin
|
begin
|
||||||
AssertEquals(0, UTF16CharacterToUnicode(#0, L));
|
AssertEquals(0, UTF16CharacterToUnicode(#0, L));
|
||||||
AssertEquals($D7FF, UTF16CharacterToUnicode(#$D7FF, L));
|
AssertEquals($D7FF, UTF16CharacterToUnicode(#$D7FF, L));
|
||||||
|
AssertEquals($E000, UTF16CharacterToUnicode(#$E000, L));
|
||||||
|
AssertEquals($FFFF, UTF16CharacterToUnicode(#$FFFF, L));
|
||||||
AssertEquals($10000, UTF16CharacterToUnicode(#$D800#$DC00, L));
|
AssertEquals($10000, UTF16CharacterToUnicode(#$D800#$DC00, L));
|
||||||
AssertEquals($10001, UTF16CharacterToUnicode(#$D800#$DC01, L));
|
AssertEquals($10001, UTF16CharacterToUnicode(#$D800#$DC01, L));
|
||||||
AssertEquals($10FFFD, UTF16CharacterToUnicode(#$DBFF#$DFFD, L));
|
AssertEquals($10FFFD, UTF16CharacterToUnicode(#$DBFF#$DFFD, L));
|
||||||
@ -116,6 +118,8 @@ procedure TTestUnicode.TestUnicodeToUTF16;
|
|||||||
begin
|
begin
|
||||||
AssertEquals(#0, UnicodeToUTF16(0));
|
AssertEquals(#0, UnicodeToUTF16(0));
|
||||||
AssertEquals(#$D7FF, UnicodeToUTF16($D7FF));
|
AssertEquals(#$D7FF, UnicodeToUTF16($D7FF));
|
||||||
|
AssertEquals(#$E000, UnicodeToUTF16($E000));
|
||||||
|
AssertEquals(#$FFFF, UnicodeToUTF16($FFFF));
|
||||||
AssertEquals(#$D800#$DC00, UnicodeToUTF16($10000));
|
AssertEquals(#$D800#$DC00, UnicodeToUTF16($10000));
|
||||||
AssertEquals(#$D800#$DC01, UnicodeToUTF16($10001));
|
AssertEquals(#$D800#$DC01, UnicodeToUTF16($10001));
|
||||||
AssertEquals(#$DBFF#$DFFD, UnicodeToUTF16($10FFFD));
|
AssertEquals(#$DBFF#$DFFD, UnicodeToUTF16($10FFFD));
|
||||||
|
Loading…
Reference in New Issue
Block a user