fcl-js: escape unicode whitespaces in string literals

This commit is contained in:
mattias 2024-09-10 10:28:45 +02:00
parent 1a3d651cfb
commit af206dfecd
4 changed files with 54 additions and 31 deletions

View File

@ -284,6 +284,7 @@ begin
// conversion magic
SetCodePage(RawByteString(Result), CP_ACP, False);
end;
{$endif}
{$ifndef FPC_DOTTEDUNITS}
function LeftStr(const s: UnicodeString; Count: SizeInt): UnicodeString; overload;
@ -292,8 +293,6 @@ begin
end;
{$endif}
{$endif}
function QuoteJSString(const S: TJSString; Quote: TJSChar): TJSString;
var
i, j, Count: Integer;
@ -525,17 +524,17 @@ end;
procedure TJSWriter.Write(const U: UnicodeString);
Var
S : UTF8String;
s : TJSWriterString;
begin
//system.writeln('TJSWriter.Write unicodestring=',U);
WriteIndent;
if UseUTF8 then
begin
S:=UTF16ToUTF8(U);
if S='' then exit;
FLinePos:=FLinePos+Writer.Write(S);
FLastChar:=AnsiChar(S[length(S)]);
s:=UTF16ToUTF8(U);
if s='' then exit;
FLinePos:=FLinePos+Writer.Write(s);
FLastChar:=AnsiChar(s[length(s)]);
end
else if U<>'' then
begin
@ -604,7 +603,7 @@ function TJSWriter.EscapeString(const S: TJSString; Quote: TJSEscapeQuote
Var
I,J,L : Integer;
R: TJSString;
c: WideChar;
c: Word;
begin
//system.writeln('TJSWriter.EscapeString "',S,'"');
I:=1;
@ -613,27 +612,30 @@ begin
L:=Length(S);
While I<=L do
begin
c:=S[I];
if (c in [#0..#31,'"','''','\'])
or (c>=#$ff00) or ((c>=#$D800) and (c<=#$DFFF)) then
c:=ord(S[I]);
if (c in [0..31,ord('"'),ord(''''),ord('\')])
or (c>=$ff00)
or ((c>=$D800) and (c<=$DFFF))
or (c=$1680) or ((c>=$2000) and (c<=$200A)) or (c=$2028) or (c=$205F) or (c=$3000) // whitespaces
then
begin
R:=R+Copy(S,J,I-J);
Case c of
'\' : R:=R+'\\';
'"' : if Quote=jseqSingle then R:=R+'"' else R:=R+'\"';
'''': if Quote=jseqDouble then R:=R+'''' else R:=R+'\''';
#0..#7,#11,#14..#31: R:=R+'\x'+TJSString(hexStr(ord(c),2));
#8 : R:=R+'\b';
#9 : R:=R+'\t';
#10 : R:=R+'\n';
#12 : R:=R+'\f';
#13 : R:=R+'\r';
#$D800..#$DBFF:
ord('\') : R:=R+'\\';
ord('"') : if Quote=jseqSingle then R:=R+'"' else R:=R+'\"';
ord(''''): if Quote=jseqDouble then R:=R+'''' else R:=R+'\''';
0..7,11,14..31: R:=R+'\x'+TJSString(hexStr(ord(c),2));
8 : R:=R+'\b';
9 : R:=R+'\t';
10 : R:=R+'\n';
12 : R:=R+'\f';
13 : R:=R+'\r';
$D800..$DBFF:
begin
if (I<L) then
begin
c:=S[I+1];
if (c>=#$DC00) and (c<=#$DFFF) then
c:=ord(S[I+1]);
if (c>=$DC00) and (c<=$DFFF) then
begin
// surrogate, two WideChar codepoint
R:=R+Copy(S,I,2);
@ -647,14 +649,15 @@ begin
end
else
// high surrogate without low surrogate at end of string, cannot be encoded as UTF-8 -> encode as hex
R:=R+'\u'+TJSString(HexStr(ord(c),4));
R:=R+'\u'+TJSString(HexStr(c,4));
end;
#$DC00..#$DFFF:
$DC00..$DFFF:
begin
// low surrogate without high surrogate, cannot be encoded as UTF-8 -> encode as hex
R:=R+'\u'+TJSString(HexStr(ord(c),4));
R:=R+'\u'+TJSString(HexStr(c,4));
end;
#$FF00..#$FFFF: R:=R+'\u'+TJSString(HexStr(ord(c),4));
else
R:=R+'\u'+TJSString(HexStr(c,4));
end;
J:=I+1;
end;
@ -2249,7 +2252,7 @@ begin
continue;
end;
else
// ignore low/high surrogate, CurColumn is AnsiChar index, not codepoint
// ignore low/high surrogate, CurColumn is WideChar index, not codepoint
inc(FCurColumn);
end;
inc(p);

View File

@ -108,9 +108,6 @@
<OptimizationLevel Value="0"/>
</Optimizations>
</CodeGeneration>
<Other>
<CustomOptions Value="-tunicodertl"/>
</Other>
</CompilerOptions>
<Debugging>
<Exceptions Count="6">

1
packages/pastojs/tests/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
testpas2js

View File

@ -325,6 +325,7 @@ type
Procedure TestStringConst;
Procedure TestStringConst_InvalidUTF16;
Procedure TestStringConstSurrogate;
Procedure TestStringConstWhitespaces;
Procedure TestStringConst_Multiline;
Procedure TestString_Length;
Procedure TestString_Compare;
@ -8863,6 +8864,27 @@ begin
]));
end;
procedure TTestModule.TestStringConstWhitespaces;
begin
StartProgram(false);
Add([
'var',
' s: string;',
'begin',
' s:=#$2028;', // line separator not supported by some editors, e.g. vsc
' s:=''Medium Mathematical Space ''#$205f',
'']);
ConvertProgram;
CheckSource('TestStringConstSurrogate',
LinesToStr([
'this.s="";'
]),
LinesToStr([
'$mod.s="\u2028";',
'$mod.s="Medium Mathematical Space \u205F";'
]));
end;
procedure TTestModule.TestStringConst_Multiline;
begin
StartProgram(false);