pastojs: fixed UTF-16 chars

This commit is contained in:
mattias 2020-12-21 15:52:44 +00:00
parent a9b4ca33c3
commit 3c51a0d851
2 changed files with 79 additions and 10 deletions

View File

@ -6197,7 +6197,8 @@ begin
cInterfaceToString:=cTypeConversion+1;
{$IFDEF FPC_HAS_CPSTRING}
ExprEvaluator.DefaultStringCodePage:=CP_UTF8;
ExprEvaluator.DefaultSourceCodePage:=CP_UTF8;
ExprEvaluator.DefaultStringCodePage:=CP_UTF16;
{$ENDIF}
FExternalNames:=TPasResHashList.Create;
StoreSrcColumns:=true;
@ -6513,10 +6514,10 @@ function TPas2JSResolver.ExtractPasStringLiteral(El: TPasElement;
S is a Pascal string literal e.g. 'Line'#10
'' empty string
'''' => "'"
#decimal #0..255 is UTF-8 byte, #01..0255 is UTF-16, #256+ is UTF-16
#$hex #$0..$ff is UTF-8 byte, #$01..$0FF is UTF-16, #$100+ is UTF-16
#decimal
#$hex
^l l is a letter a-z
Invalid UTF-8 sequences give an error
Note that invalid UTF-8 sequences are checked by the scanner
}
var
p, StartP, i, l: integer;
@ -6562,7 +6563,7 @@ begin
end;
'#':
begin
// byte or word sequence
// word sequence
inc(p);
if p>l then
RaiseInternalError(20170207155121);
@ -6587,7 +6588,6 @@ begin
end;
if p=StartP then
RaiseInternalError(20170207164956);
Result:=Result+CodePointToJSString(i);
end
else
begin
@ -6607,8 +6607,8 @@ begin
end;
if p=StartP then
RaiseInternalError(20170207171148);
Result:=Result+CodePointToJSString(i);
end;
Result:=Result+CodePointToJSString(i);
end;
'^':
begin

View File

@ -282,6 +282,7 @@ type
Procedure TestChar_Compare;
Procedure TestChar_BuiltInProcs;
Procedure TestStringConst;
Procedure TestStringConst_InvalidUTF16;
Procedure TestStringConstSurrogate;
Procedure TestString_Length;
Procedure TestString_Compare;
@ -983,6 +984,28 @@ var
end;
end;
function HasSpecialChar(s: string): boolean;
var
i: Integer;
begin
for i:=1 to length(s) do
if s[i] in [#0..#31,#127..#255] then
exit(true);
Result:=false;
end;
function HashSpecialChars(s: string): string;
var
i: Integer;
begin
Result:='';
for i:=1 to length(s) do
if s[i] in [#0..#31,#127..#255] then
Result:=Result+'#'+hexstr(ord(s[i]),2)
else
Result:=Result+s[i];
end;
procedure DiffFound;
var
ActLineStartP, ActLineEndP, p, StartPos: PChar;
@ -1011,8 +1034,12 @@ var
ActLineEndP:=FindLineEnd(ActualP);
ActLine:=copy(Actual,ActLineStartP-PChar(Actual)+1,ActLineEndP-ActLineStartP);
writeln('- ',ActLine);
if HasSpecialChar(ActLine) then
writeln('- ',HashSpecialChars(ActLine));
// write expected line
writeln('+ ',ExpLine);
if HasSpecialChar(ExpLine) then
writeln('- ',HashSpecialChars(ExpLine));
// write empty line with pointer ^
for i:=1 to 2+ExpectedP-StartPos do write(' ');
writeln('^');
@ -7609,11 +7636,16 @@ begin
' s:=''"''''"'';',
' s:=#$20AC;', // euro
' s:=#$10437;', // outside BMP
//' s:=#$F0#$90#$90#$B7;', // as UTF-8
' s:=''abc''#$20AC;', // ascii,#
' s:=''ä''#$20AC;', // non ascii,#
' s:=#$20AC''abc'';', // #, ascii
' s:=#$20AC''ä'';', // #, non ascii
' s:=default(string);',
' s:=concat(s);',
' s:=concat(s,''a'',s);',
//' s:=#0250#269;',
' s:=#250#269;',
//' s:=#$2F804;',
// ToDo: \uD87E\uDC04 -> \u{2F804}
'']);
ConvertProgram;
CheckSource('TestStringConst',
@ -7635,10 +7667,47 @@ begin
'$mod.s=''"\''"'';',
'$mod.s="€";',
'$mod.s="'#$F0#$90#$90#$B7'";',
//'$mod.s="'#$F0#$90#$90#$B7'";',
'$mod.s = "abc€";',
'$mod.s = "ä€";',
'$mod.s = "€abc";',
'$mod.s = "ۊ";',
'$mod.s="";',
'$mod.s = $mod.s;',
'$mod.s = $mod.s.concat("a", $mod.s);',
'$mod.s = "úč";',
'']));
end;
procedure TTestModule.TestStringConst_InvalidUTF16;
begin
StartProgram(false);
Add([
'const',
' a: char = #$D87E;',
' b: string = #$D87E;',
' c: string = #$D87E#43;',
'begin',
' c:=''abc''#$D87E;',
' c:=#0#1#2;',
' c:=#127;',
' c:=#128;',
' c:=#255;',
' c:=#256;',
'']);
ConvertProgram;
CheckSource('TestStringConst',
LinesToStr([
'this.a = "\uD87E";',
'this.b = "\uD87E";',
'this.c = "\uD87E+";',
'']),
LinesToStr([
'$mod.c = "abc\uD87E";',
'$mod.c = "\x00\x01\x02";',
'$mod.c = "'#127'";',
'$mod.c = "'#$c2#$80'";',
'$mod.c = "'#$c3#$BF'";',
'$mod.c = "'#$c4#$80'";',
'']));
end;