mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-12-16 02:30:41 +01:00
* implement support for 4 Byte UTF-8 codepoints that result in a surrogate pair for UTF-16
git-svn-id: trunk@36116 -
This commit is contained in:
parent
af48d176ec
commit
c552b2957a
1
.gitattributes
vendored
1
.gitattributes
vendored
@ -12291,6 +12291,7 @@ tests/test/tcpstr26b.pp svneol=native#text/plain
|
||||
tests/test/tcpstr26c.pp svneol=native#text/plain
|
||||
tests/test/tcpstr26d.pp svneol=native#text/plain
|
||||
tests/test/tcpstr27.pp svneol=native#text/plain
|
||||
tests/test/tcpstr28.pp svneol=native#text/pascal
|
||||
tests/test/tcpstr2a.pp svneol=native#text/plain
|
||||
tests/test/tcpstr3.pp svneol=native#text/plain
|
||||
tests/test/tcpstr4.pp svneol=native#text/plain
|
||||
|
||||
@ -4586,6 +4586,7 @@ type
|
||||
procedure tscannerfile.readtoken(allowrecordtoken:boolean);
|
||||
var
|
||||
code : integer;
|
||||
d : cardinal;
|
||||
len,
|
||||
low,high,mid : longint;
|
||||
w : word;
|
||||
@ -5160,9 +5161,35 @@ type
|
||||
iswidestring:=true;
|
||||
len:=0;
|
||||
end;
|
||||
{ four or more chars aren't handled }
|
||||
{ four chars }
|
||||
if (ord(c) and $f0)=$f0 then
|
||||
message(scan_e_utf8_bigger_than_65535)
|
||||
begin
|
||||
{ this always represents a surrogate pair, so
|
||||
read as 32-bit value and then split into
|
||||
the corresponding pair of two wchars }
|
||||
d:=ord(c) and $f;
|
||||
readchar;
|
||||
if (ord(c) and $c0)<>$80 then
|
||||
message(scan_e_utf8_malformed);
|
||||
d:=(d shl 6) or (ord(c) and $3f);
|
||||
readchar;
|
||||
if (ord(c) and $c0)<>$80 then
|
||||
message(scan_e_utf8_malformed);
|
||||
d:=(d shl 6) or (ord(c) and $3f);
|
||||
readchar;
|
||||
if (ord(c) and $c0)<>$80 then
|
||||
message(scan_e_utf8_malformed);
|
||||
d:=(d shl 6) or (ord(c) and $3f);
|
||||
if d<$10000 then
|
||||
message(scan_e_utf8_malformed);
|
||||
d:=d-$10000;
|
||||
{ high surrogate }
|
||||
w:=$d800+(d shr 10);
|
||||
concatwidestringchar(patternw,w);
|
||||
{ low surrogate }
|
||||
w:=$dc00+(d and $3ff);
|
||||
concatwidestringchar(patternw,w);
|
||||
end
|
||||
{ three chars }
|
||||
else if (ord(c) and $e0)=$e0 then
|
||||
begin
|
||||
|
||||
23
tests/test/tcpstr28.pp
Normal file
23
tests/test/tcpstr28.pp
Normal file
@ -0,0 +1,23 @@
|
||||
program tcpstr28;
|
||||
|
||||
{$codepage utf8}
|
||||
|
||||
const
|
||||
Test = '𝄞𤽜';
|
||||
UTF8Test = UTF8String(Test);
|
||||
UTF16Test = UnicodeString(Test);
|
||||
|
||||
var
|
||||
utf8str: UTF8String = Test;
|
||||
utf16str: UnicodeString = Test;
|
||||
|
||||
begin
|
||||
if Length(UTF8Test) <> 8 then
|
||||
Halt(1);
|
||||
if Length(utf8str) <> 8 then
|
||||
Halt(1);
|
||||
if Length(UTF16Test) <> 4 then
|
||||
Halt(1);
|
||||
if Length(utf16str) <> 4 then
|
||||
Halt(1);
|
||||
end.
|
||||
Loading…
Reference in New Issue
Block a user