* Fix 4-byte unicode characters

git-svn-id: trunk@40058 -
This commit is contained in:
michael 2018-10-27 17:16:13 +00:00
parent 645a528584
commit 8e0442fb1f
6 changed files with 139 additions and 43 deletions

View File

@ -875,14 +875,29 @@ end;
function JSONStringToString(const S: TJSONStringType): TJSONStringType; function JSONStringToString(const S: TJSONStringType): TJSONStringType;
Var Var
I,J,L : Integer; I,J,L,U1,U2 : Integer;
w : String; App,W : String;
Procedure MaybeAppendUnicode;
Var
U : String;
begin
if (U1<>0) then
begin
U:=UTF8Encode(WideChar(U1));
Result:=Result+U;
U1:=0;
end;
end;
begin begin
I:=1; I:=1;
J:=1; J:=1;
L:=Length(S); L:=Length(S);
Result:=''; Result:='';
U1:=0;
While (I<=L) do While (I<=L) do
begin begin
if (S[I]='\') then if (S[I]='\') then
@ -891,25 +906,41 @@ begin
If I<L then If I<L then
begin begin
Inc(I); Inc(I);
App:='';
Case S[I] of Case S[I] of
'\','"','/' '\','"','/'
: Result:=Result+S[I]; : App:=S[I];
'b' : Result:=Result+#8; 'b' : App:=#8;
't' : Result:=Result+#9; 't' : App:=#9;
'n' : Result:=Result+#10; 'n' : App:=#10;
'f' : Result:=Result+#12; 'f' : App:=#12;
'r' : Result:=Result+#13; 'r' : App:=#13;
'u' : begin 'u' : begin
W:=Copy(S,I+1,4); W:=Copy(S,I+1,4);
Inc(I,4); Inc(I,4);
Result:=Result+TJSONStringType(WideChar(StrToInt('$'+W))); u2:=StrToInt('$'+W);
if (U1<>0) then
begin
App:=UTF8Encode(WideChar(U1)+WideChar(U2));
U2:=0;
end
else
U1:=U2;
end; end;
end; end;
if App<>'' then
begin
MaybeAppendUnicode;
Result:=Result+App;
end;
end; end;
J:=I+1; J:=I+1;
end; end
else
MaybeAppendUnicode;
Inc(I); Inc(I);
end; end;
MaybeAppendUnicode;
Result:=Result+Copy(S,J,I-J+1); Result:=Result+Copy(S,J,I-J+1);
end; end;

View File

@ -218,11 +218,31 @@ var
TokenStart: PChar; TokenStart: PChar;
it : TJSONToken; it : TJSONToken;
I : Integer; I : Integer;
OldLength, SectionLength, tstart,tcol, u: Integer; OldLength, SectionLength, tstart,tcol, u1,u2: Integer;
C , c2: char; C , c2: char;
S : String; S : String;
IsStar,EOC: Boolean; IsStar,EOC: Boolean;
Procedure MaybeAppendUnicode;
Var
u : String;
begin
// if there is a leftover \u, append
if (u1<>0) then
begin
if (joUTF8 in Options) or (DefaultSystemCodePage=CP_UTF8) then
U:=Utf8Encode(WideString(WideChar(u1))) // ToDo: use faster function
else
U:=String(WideChar(u1)); // WideChar converts the encoding. Should it warn on loss?
FCurTokenString:=FCurTokenString+U;
OldLength:=Length(FCurTokenString);
u1:=0;
end;
end;
begin begin
if FTokenStr = nil then if FTokenStr = nil then
if not FetchLine then if not FetchLine then
@ -262,6 +282,7 @@ begin
TokenStart := FTokenStr; TokenStart := FTokenStr;
OldLength := 0; OldLength := 0;
FCurTokenString := ''; FCurTokenString := '';
u1:=0;
while not (FTokenStr[0] in [#0,C]) do while not (FTokenStr[0] in [#0,C]) do
begin begin
if (FTokenStr[0]='\') then if (FTokenStr[0]='\') then
@ -282,43 +303,64 @@ begin
'/' : S:='/'; '/' : S:='/';
'u' : begin 'u' : begin
S:='0000'; S:='0000';
u:=0; u2:=0;
For I:=1 to 4 do For I:=1 to 4 do
begin begin
Inc(FTokenStr); Inc(FTokenStr);
c2:=FTokenStr^; c2:=FTokenStr^;
Case c2 of Case c2 of
'0'..'9': u:=u*16+ord(c2)-ord('0'); '0'..'9': u2:=u2*16+ord(c2)-ord('0');
'A'..'F': u:=u*16+ord(c2)-ord('A')+10; 'A'..'F': u2:=u2*16+ord(c2)-ord('A')+10;
'a'..'f': u:=u*16+ord(c2)-ord('a')+10; 'a'..'f': u2:=u2*16+ord(c2)-ord('a')+10;
else else
Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]); Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]);
end; end;
end; end;
// ToDo: 4-bytes UTF16 // ToDo: 4-bytes UTF16
if (joUTF8 in Options) or (DefaultSystemCodePage=CP_UTF8) then if u1<>0 then
S:=Utf8Encode(WideString(WideChar(u))) // ToDo: use faster function begin
if (joUTF8 in Options) or (DefaultSystemCodePage=CP_UTF8) then
S:=Utf8Encode(WideString(WideChar(u1)+WideChar(u2))) // ToDo: use faster function
else
S:=String(WideChar(u1)+WideChar(u2)); // WideChar converts the encoding. Should it warn on loss?
u1:=0;
end
else else
S:=String(WideChar(u)); // WideChar converts the encoding. Should it warn on loss? begin
S:='';
u1:=u2;
end
end; end;
#0 : Error(SErrOpenString); #0 : Error(SErrOpenString);
else else
Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]); Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]);
end; end;
SetLength(FCurTokenString, OldLength + SectionLength+1+Length(S)); I:=Length(S);
if SectionLength > 0 then if (SectionLength+I>0) then
Move(TokenStart^, FCurTokenString[OldLength + 1], SectionLength); begin
Move(S[1],FCurTokenString[OldLength + SectionLength+1],Length(S)); // If length=1, we know it was not \uXX, but u1 can be nonzero, and we must first append it.
Inc(OldLength, SectionLength+Length(S)); // example: \u00f8\"
if I=1 then
MaybeAppendUnicode;
SetLength(FCurTokenString, OldLength + SectionLength+Length(S));
if SectionLength > 0 then
Move(TokenStart^, FCurTokenString[OldLength + 1], SectionLength);
if I>0 then
Move(S[1],FCurTokenString[OldLength + SectionLength+1],i);
Inc(OldLength, SectionLength+Length(S));
end;
// Next char // Next char
TokenStart := FTokenStr+1; TokenStart := FTokenStr+1;
end; end
else
MaybeAppendUnicode;
if FTokenStr[0] = #0 then if FTokenStr[0] = #0 then
Error(SErrOpenString); Error(SErrOpenString);
Inc(FTokenStr); Inc(FTokenStr);
end; end;
if FTokenStr[0] = #0 then if FTokenStr[0] = #0 then
Error(SErrOpenString); Error(SErrOpenString);
MaybeAppendUnicode;
SectionLength := FTokenStr - TokenStart; SectionLength := FTokenStr - TokenStart;
SetLength(FCurTokenString, OldLength + SectionLength); SetLength(FCurTokenString, OldLength + SectionLength);
if SectionLength > 0 then if SectionLength > 0 then

View File

@ -15,20 +15,17 @@
</BuildModes> </BuildModes>
<PublishOptions> <PublishOptions>
<Version Value="2"/> <Version Value="2"/>
<IgnoreBinaries Value="False"/>
<IncludeFileFilter Value="*.(pas|pp|inc|lfm|lpr|lrs|lpi|lpk|sh|xml)"/>
<ExcludeFileFilter Value="*.(bak|ppu|ppw|o|so);*~;backup"/>
</PublishOptions> </PublishOptions>
<RunParams> <RunParams>
<local> <local>
<CommandLineParams Value="--suite=TTestParser.TestArray"/> <CommandLineParams Value="--suite=TTestJSONString.TestJSONStringToString"/>
<LaunchingApplication PathPlusParams="/usr/X11R6/bin/xterm -T 'Lazarus Run Output' -e $(LazarusDir)/tools/runwait.sh $(TargetCmdLine)"/> <LaunchingApplication PathPlusParams="/usr/X11R6/bin/xterm -T 'Lazarus Run Output' -e $(LazarusDir)/tools/runwait.sh $(TargetCmdLine)"/>
</local> </local>
<FormatVersion Value="2"/> <FormatVersion Value="2"/>
<Modes Count="1"> <Modes Count="1">
<Mode0 Name="default"> <Mode0 Name="default">
<local> <local>
<CommandLineParams Value="--suite=TTestParser.TestArray"/> <CommandLineParams Value="--suite=TTestJSONString.TestJSONStringToString"/>
<LaunchingApplication PathPlusParams="/usr/X11R6/bin/xterm -T 'Lazarus Run Output' -e $(LazarusDir)/tools/runwait.sh $(TargetCmdLine)"/> <LaunchingApplication PathPlusParams="/usr/X11R6/bin/xterm -T 'Lazarus Run Output' -e $(LazarusDir)/tools/runwait.sh $(TargetCmdLine)"/>
</local> </local>
</Mode0> </Mode0>

View File

@ -17,6 +17,9 @@
program testjson; program testjson;
uses uses
{$ifdef unix}
cwstring,
{$endif}
Classes, testjsondata, testjsonparser, testjsonrtti, consoletestrunner, testjsonreader; Classes, testjsondata, testjsonparser, testjsonrtti, consoletestrunner, testjsonreader;
type type

View File

@ -3993,6 +3993,11 @@ begin
end; end;
procedure TTestJSONString.TestJSONStringToString; procedure TTestJSONString.TestJSONStringToString;
Const
// Glowing star in UTF8
GlowingStar = #$F0#$9F#$8C#$9F;
begin begin
TestFrom('',''); TestFrom('','');
TestFrom('A','A'); TestFrom('A','A');
@ -4029,6 +4034,9 @@ begin
TestFrom('\n\n',#10#10); TestFrom('\n\n',#10#10);
TestFrom('\f\f',#12#12); TestFrom('\f\f',#12#12);
TestFrom('\r\r',#13#13); TestFrom('\r\r',#13#13);
TestFrom('\u00f8','ø'); // this is ø
TestFrom('\u00f8\"','ø"'); // this is ø"
TestFrom('\ud83c\udf1f',GlowingStar);
end; end;
procedure TTestJSONString.TestStringToJSONString; procedure TTestJSONString.TestStringToJSONString;

View File

@ -37,6 +37,7 @@ type
procedure DoTestFloat(F: TJSONFloat); overload; procedure DoTestFloat(F: TJSONFloat); overload;
procedure DoTestFloat(F: TJSONFloat; S: String); overload; procedure DoTestFloat(F: TJSONFloat; S: String); overload;
procedure DoTestObject(S: String; const ElNames: array of String; DoJSONTest : Boolean = True); procedure DoTestObject(S: String; const ElNames: array of String; DoJSONTest : Boolean = True);
procedure DoTestString(S : String; AResult : String);
procedure DoTestString(S : String); procedure DoTestString(S : String);
procedure DoTestArray(S: String; ACount: Integer; IgnoreJSON: Boolean=False); procedure DoTestArray(S: String; ACount: Integer; IgnoreJSON: Boolean=False);
Procedure DoTestClass(S : String; AClass : TJSONDataClass); Procedure DoTestClass(S : String; AClass : TJSONDataClass);
@ -79,7 +80,7 @@ Var
J : TJSONData; J : TJSONData;
begin begin
P:=TJSONParser.Create(''); P:=TJSONParser.Create('',[joUTF8]);
Try Try
J:=P.Parse; J:=P.Parse;
If (J<>Nil) then If (J<>Nil) then
@ -97,7 +98,7 @@ Var
J : TJSONData; J : TJSONData;
begin begin
P:=TJSONParser.Create('1'); P:=TJSONParser.Create('1',[joUTF8]);
Try Try
J:=P.Parse; J:=P.Parse;
If (J=Nil) then If (J=Nil) then
@ -117,7 +118,7 @@ Var
J : TJSONData; J : TJSONData;
begin begin
P:=TJSONParser.Create('123456789012345'); P:=TJSONParser.Create('123456789012345',[joUTF8]);
Try Try
J:=P.Parse; J:=P.Parse;
If (J=Nil) then If (J=Nil) then
@ -137,7 +138,7 @@ Var
J : TJSONData; J : TJSONData;
begin begin
P:=TJSONParser.Create('null'); P:=TJSONParser.Create('null',[joUTF8]);
Try Try
J:=P.Parse; J:=P.Parse;
If (J=Nil) then If (J=Nil) then
@ -156,7 +157,7 @@ Var
J : TJSONData; J : TJSONData;
begin begin
P:=TJSONParser.Create('true'); P:=TJSONParser.Create('true',[joUTF8]);
Try Try
J:=P.Parse; J:=P.Parse;
If (J=Nil) then If (J=Nil) then
@ -176,7 +177,7 @@ Var
J : TJSONData; J : TJSONData;
begin begin
P:=TJSONParser.Create('false'); P:=TJSONParser.Create('false',[joUTF8]);
Try Try
J:=P.Parse; J:=P.Parse;
If (J=Nil) then If (J=Nil) then
@ -206,10 +207,18 @@ end;
procedure TTestParser.TestString; procedure TTestParser.TestString;
Const
// Glowing star in UTF8
GlowingStar = #$F0#$9F#$8C#$9F;
begin begin
DoTestString('A string'); DoTestString('A string');
DoTestString(''); DoTestString('');
DoTestString('\"'); DoTestString('\"');
DoTestString('\u00f8','ø'); // this is ø
DoTestString('\u00f8\"','ø"'); // this is ø"
// Writeln(GlowingStar);
DoTestString('\ud83c\udf1f',GlowingStar);
end; end;
@ -348,7 +357,7 @@ Var
begin begin
J:=Nil; J:=Nil;
P:=TJSONParser.Create(S); P:=TJSONParser.Create(S,[joUTF8]);
Try Try
P.Options:=FOptions; P.Options:=FOptions;
J:=P.Parse; J:=P.Parse;
@ -400,7 +409,7 @@ Var
D : TJSONData; D : TJSONData;
begin begin
P:=TJSONParser.Create(S); P:=TJSONParser.Create(S,[joUTF8]);
try try
D:=P.Parse; D:=P.Parse;
try try
@ -536,7 +545,7 @@ Var
begin begin
ParseOK:=False; ParseOK:=False;
P:=TJSONParser.Create(S); P:=TJSONParser.Create(S,[joUTF8]);
P.OPtions:=Options; P.OPtions:=Options;
J:=Nil; J:=Nil;
Try Try
@ -561,24 +570,30 @@ end;
procedure TTestParser.DoTestString(S: String); procedure TTestParser.DoTestString(S: String);
begin
DoTestString(S,JSONStringToString(S));
end;
procedure TTestParser.DoTestString(S: String; AResult : String);
Var Var
P : TJSONParser; P : TJSONParser;
J : TJSONData; J : TJSONData;
begin begin
P:=TJSONParser.Create('"'+S+'"'); P:=TJSONParser.Create('"'+S+'"',[joUTF8]);
Try Try
J:=P.Parse; J:=P.Parse;
If (J=Nil) then If (J=Nil) then
Fail('Parse of string "'+S+'" fails'); Fail('Parse of string "'+S+'" fails');
TestJSONType(J,jtString); TestJSONType(J,jtString);
TestAsString(J,JSONStringToString(S)); TestAsString(J,aResult);
TestJSON(J,'"'+S+'"'); if Pos('\u',S)=0 then
TestJSON(J,'"'+S+'"');
Finally Finally
FreeAndNil(J); FreeAndNil(J);
FreeAndNil(P); FreeAndNil(P);
end; end;
end; end;
procedure TTestParser.DoTestFloat(F : TJSONFloat); procedure TTestParser.DoTestFloat(F : TJSONFloat);
@ -598,7 +613,7 @@ Var
J : TJSONData; J : TJSONData;
begin begin
P:=TJSONParser.Create(S); P:=TJSONParser.Create(S,[joUTF8]);
Try Try
J:=P.Parse; J:=P.Parse;
If (J=Nil) then If (J=Nil) then