+ add support for Unicode code point constants > $FFFF; they are converted to a surrogate pair so they are in

fact a UnicodeString constant
+ added tests

git-svn-id: trunk@39123 -
This commit is contained in:
svenbarth 2018-05-27 14:06:19 +00:00
parent c64455cfd4
commit f077c7d950
8 changed files with 98 additions and 6 deletions

6
.gitattributes vendored
View File

@ -13770,12 +13770,18 @@ tests/test/tweaklib2.pp svneol=native#text/plain
tests/test/tweaklib3.pp svneol=native#text/plain
tests/test/tweaklib4.pp svneol=native#text/plain
tests/test/twide1.pp svneol=native#text/plain
tests/test/twide10.pp svneol=native#text/pascal
tests/test/twide11.pp svneol=native#text/pascal
tests/test/twide12.pp svneol=native#text/pascal
tests/test/twide13.pp svneol=native#text/pascal
tests/test/twide2.pp svneol=native#text/plain
tests/test/twide3.pp svneol=native#text/plain
tests/test/twide4.pp svneol=native#text/plain
tests/test/twide5.pp svneol=native#text/plain
tests/test/twide6.pp svneol=native#text/plain
tests/test/twide7.pp svneol=native#text/plain
tests/test/twide8.pp svneol=native#text/pascal
tests/test/twide9.pp svneol=native#text/pascal
tests/test/twrstr1.pp svneol=native#text/plain
tests/test/twrstr2.pp svneol=native#text/plain
tests/test/twrstr3.pp svneol=native#text/plain

View File

@ -5104,7 +5104,7 @@ type
begin
readchar; { read leading $ }
asciinr:='$';
while (upcase(c) in ['A'..'F','0'..'9']) and (length(asciinr)<=5) do
while (upcase(c) in ['A'..'F','0'..'9']) and (length(asciinr)<=7) do
begin
asciinr:=asciinr+c;
readchar;
@ -5114,7 +5114,7 @@ type
begin
readchar; { read leading $ }
asciinr:='&';
while (upcase(c) in ['0'..'7']) and (length(asciinr)<=7) do
while (upcase(c) in ['0'..'7']) and (length(asciinr)<=8) do
begin
asciinr:=asciinr+c;
readchar;
@ -5124,7 +5124,7 @@ type
begin
readchar; { read leading $ }
asciinr:='%';
while (upcase(c) in ['0','1']) and (length(asciinr)<=17) do
while (upcase(c) in ['0','1']) and (length(asciinr)<=22) do
begin
asciinr:=asciinr+c;
readchar;
@ -5133,7 +5133,7 @@ type
else
begin
asciinr:='';
while (c in ['0'..'9']) and (length(asciinr)<=5) do
while (c in ['0'..'9']) and (length(asciinr)<=8) do
begin
asciinr:=asciinr+c;
readchar;
@ -5145,7 +5145,7 @@ type
Message(scan_e_illegal_char_const)
else if (m<0) or (m>255) or (length(asciinr)>3) then
begin
if (m>=0) and (m<=65535) then
if (m>=0) and (m<=$10FFFF) then
begin
if not iswidestring then
begin
@ -5156,7 +5156,15 @@ type
iswidestring:=true;
len:=0;
end;
concatwidestringchar(patternw,tcompilerwidechar(m));
if m<=$FFFF then
concatwidestringchar(patternw,tcompilerwidechar(m))
else
begin
{ split into surrogate pair }
dec(m,$10000);
concatwidestringchar(patternw,tcompilerwidechar((m shr 10) + $D800));
concatwidestringchar(patternw,tcompilerwidechar((m and $3FF) + $DC00));
end;
end
else
Message(scan_e_illegal_char_const)

10
tests/test/twide10.pp Normal file
View File

@ -0,0 +1,10 @@
{ %FAIL }
program twide10;
var
s: UnicodeString;
begin
{ this is greater than the highest defined Unicode code point }
s := #$110000;
end.

10
tests/test/twide11.pp Normal file
View File

@ -0,0 +1,10 @@
{ %FAIL }
program twide11;
var
s: UnicodeString;
begin
{ this is greater than the highest defined Unicode code point }
s := #1114112;
end.

10
tests/test/twide12.pp Normal file
View File

@ -0,0 +1,10 @@
{ %FAIL }
program twide12;
var
s: UnicodeString;
begin
{ this is greater than the highest defined Unicode code point }
s := #&4200000;
end.

10
tests/test/twide13.pp Normal file
View File

@ -0,0 +1,10 @@
{ %FAIL }
program twide13;
var
s: UnicodeString;
begin
{ this is greater than the highest defined Unicode code point }
s := #%100010000000000000000;
end.

28
tests/test/twide8.pp Normal file
View File

@ -0,0 +1,28 @@
program twide8;
procedure Check(const aStr: UnicodeString; aIndex: LongInt);
const
Char1 = #$DBFF;
Char2 = #$DFFF;
begin
if Length(aStr) <> 2 then
Halt(aIndex * 3);
if aStr[1] <> Char1 then
Halt(aIndex * 3 + 1);
if aStr[2] <> Char2 then
Halt(aIndex * 3 + 2);
end;
var
s: UnicodeString;
begin
s := #$10FFFF;
Check(s, 1);
s := #1114111;
Check(s, 2);
s := #&4177777;
Check(s, 3);
s := #%100001111111111111111;
Check(s, 4);
Writeln('ok');
end.

10
tests/test/twide9.pp Normal file
View File

@ -0,0 +1,10 @@
{ %FAIL }
program twide9;
var
u: UnicodeChar;
begin
{ fails, because a code point > $FFFF decodes to a surrogate pair, thus a string constant }
u := #$10FFFF;
end.