From f077c7d950b300a719b10b34692149a0d31eb354 Mon Sep 17 00:00:00 2001 From: svenbarth Date: Sun, 27 May 2018 14:06:19 +0000 Subject: [PATCH] + add support for Unicode code point constants > $FFFF; they are converted to a surrogate pair so they are in fact a UnicodeString constant + added tests git-svn-id: trunk@39123 - --- .gitattributes | 6 ++++++ compiler/scanner.pas | 20 ++++++++++++++------ tests/test/twide10.pp | 10 ++++++++++ tests/test/twide11.pp | 10 ++++++++++ tests/test/twide12.pp | 10 ++++++++++ tests/test/twide13.pp | 10 ++++++++++ tests/test/twide8.pp | 28 ++++++++++++++++++++++++++++ tests/test/twide9.pp | 10 ++++++++++ 8 files changed, 98 insertions(+), 6 deletions(-) create mode 100644 tests/test/twide10.pp create mode 100644 tests/test/twide11.pp create mode 100644 tests/test/twide12.pp create mode 100644 tests/test/twide13.pp create mode 100644 tests/test/twide8.pp create mode 100644 tests/test/twide9.pp diff --git a/.gitattributes b/.gitattributes index 96e66c86e3..501d46e65b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -13770,12 +13770,18 @@ tests/test/tweaklib2.pp svneol=native#text/plain tests/test/tweaklib3.pp svneol=native#text/plain tests/test/tweaklib4.pp svneol=native#text/plain tests/test/twide1.pp svneol=native#text/plain +tests/test/twide10.pp svneol=native#text/pascal +tests/test/twide11.pp svneol=native#text/pascal +tests/test/twide12.pp svneol=native#text/pascal +tests/test/twide13.pp svneol=native#text/pascal tests/test/twide2.pp svneol=native#text/plain tests/test/twide3.pp svneol=native#text/plain tests/test/twide4.pp svneol=native#text/plain tests/test/twide5.pp svneol=native#text/plain tests/test/twide6.pp svneol=native#text/plain tests/test/twide7.pp svneol=native#text/plain +tests/test/twide8.pp svneol=native#text/pascal +tests/test/twide9.pp svneol=native#text/pascal tests/test/twrstr1.pp svneol=native#text/plain tests/test/twrstr2.pp svneol=native#text/plain tests/test/twrstr3.pp svneol=native#text/plain diff --git a/compiler/scanner.pas b/compiler/scanner.pas index c8368f6221..62cc177da3 100644 --- a/compiler/scanner.pas +++ b/compiler/scanner.pas @@ -5104,7 +5104,7 @@ type begin readchar; { read leading $ } asciinr:='$'; - while (upcase(c) in ['A'..'F','0'..'9']) and (length(asciinr)<=5) do + while (upcase(c) in ['A'..'F','0'..'9']) and (length(asciinr)<=7) do begin asciinr:=asciinr+c; readchar; @@ -5114,7 +5114,7 @@ type begin readchar; { read leading $ } asciinr:='&'; - while (upcase(c) in ['0'..'7']) and (length(asciinr)<=7) do + while (upcase(c) in ['0'..'7']) and (length(asciinr)<=8) do begin asciinr:=asciinr+c; readchar; @@ -5124,7 +5124,7 @@ type begin readchar; { read leading $ } asciinr:='%'; - while (upcase(c) in ['0','1']) and (length(asciinr)<=17) do + while (upcase(c) in ['0','1']) and (length(asciinr)<=22) do begin asciinr:=asciinr+c; readchar; @@ -5133,7 +5133,7 @@ type else begin asciinr:=''; - while (c in ['0'..'9']) and (length(asciinr)<=5) do + while (c in ['0'..'9']) and (length(asciinr)<=8) do begin asciinr:=asciinr+c; readchar; @@ -5145,7 +5145,7 @@ type Message(scan_e_illegal_char_const) else if (m<0) or (m>255) or (length(asciinr)>3) then begin - if (m>=0) and (m<=65535) then + if (m>=0) and (m<=$10FFFF) then begin if not iswidestring then begin @@ -5156,7 +5156,15 @@ type iswidestring:=true; len:=0; end; - concatwidestringchar(patternw,tcompilerwidechar(m)); + if m<=$FFFF then + concatwidestringchar(patternw,tcompilerwidechar(m)) + else + begin + { split into surrogate pair } + dec(m,$10000); + concatwidestringchar(patternw,tcompilerwidechar((m shr 10) + $D800)); + concatwidestringchar(patternw,tcompilerwidechar((m and $3FF) + $DC00)); + end; end else Message(scan_e_illegal_char_const) diff --git a/tests/test/twide10.pp b/tests/test/twide10.pp new file mode 100644 index 0000000000..a73aa8f5a4 --- /dev/null +++ b/tests/test/twide10.pp @@ -0,0 +1,10 @@ +{ %FAIL } + +program twide10; + +var + s: UnicodeString; +begin + { this is greater than the highest defined Unicode code point } + s := #$110000; +end. diff --git a/tests/test/twide11.pp b/tests/test/twide11.pp new file mode 100644 index 0000000000..f93cdf1638 --- /dev/null +++ b/tests/test/twide11.pp @@ -0,0 +1,10 @@ +{ %FAIL } + +program twide11; + +var + s: UnicodeString; +begin + { this is greater than the highest defined Unicode code point } + s := #1114112; +end. diff --git a/tests/test/twide12.pp b/tests/test/twide12.pp new file mode 100644 index 0000000000..d26be797b3 --- /dev/null +++ b/tests/test/twide12.pp @@ -0,0 +1,10 @@ +{ %FAIL } + +program twide12; + +var + s: UnicodeString; +begin + { this is greater than the highest defined Unicode code point } + s := #&4200000; +end. diff --git a/tests/test/twide13.pp b/tests/test/twide13.pp new file mode 100644 index 0000000000..b353688758 --- /dev/null +++ b/tests/test/twide13.pp @@ -0,0 +1,10 @@ +{ %FAIL } + +program twide13; + +var + s: UnicodeString; +begin + { this is greater than the highest defined Unicode code point } + s := #%100010000000000000000; +end. diff --git a/tests/test/twide8.pp b/tests/test/twide8.pp new file mode 100644 index 0000000000..1fd03e1142 --- /dev/null +++ b/tests/test/twide8.pp @@ -0,0 +1,28 @@ +program twide8; + +procedure Check(const aStr: UnicodeString; aIndex: LongInt); +const + Char1 = #$DBFF; + Char2 = #$DFFF; +begin + if Length(aStr) <> 2 then + Halt(aIndex * 3); + if aStr[1] <> Char1 then + Halt(aIndex * 3 + 1); + if aStr[2] <> Char2 then + Halt(aIndex * 3 + 2); +end; + +var + s: UnicodeString; +begin + s := #$10FFFF; + Check(s, 1); + s := #1114111; + Check(s, 2); + s := #&4177777; + Check(s, 3); + s := #%100001111111111111111; + Check(s, 4); + Writeln('ok'); +end. diff --git a/tests/test/twide9.pp b/tests/test/twide9.pp new file mode 100644 index 0000000000..366ef309fe --- /dev/null +++ b/tests/test/twide9.pp @@ -0,0 +1,10 @@ +{ %FAIL } + +program twide9; + +var + u: UnicodeChar; +begin + { fails, because a code point > $FFFF decodes to a surrogate pair, thus a string constant } + u := #$10FFFF; +end.