* Fix 4-byte unicode characters

git-svn-id: trunk@40058 -
This commit is contained in:
michael 2018-10-27 17:16:13 +00:00
parent 645a528584
commit 8e0442fb1f
6 changed files with 139 additions and 43 deletions

View File

@ -875,14 +875,29 @@ end;
function JSONStringToString(const S: TJSONStringType): TJSONStringType;
Var
I,J,L : Integer;
w : String;
I,J,L,U1,U2 : Integer;
App,W : String;
Procedure MaybeAppendUnicode;
Var
U : String;
begin
if (U1<>0) then
begin
U:=UTF8Encode(WideChar(U1));
Result:=Result+U;
U1:=0;
end;
end;
begin
I:=1;
J:=1;
L:=Length(S);
Result:='';
U1:=0;
While (I<=L) do
begin
if (S[I]='\') then
@ -891,25 +906,41 @@ begin
If I<L then
begin
Inc(I);
App:='';
Case S[I] of
'\','"','/'
: Result:=Result+S[I];
'b' : Result:=Result+#8;
't' : Result:=Result+#9;
'n' : Result:=Result+#10;
'f' : Result:=Result+#12;
'r' : Result:=Result+#13;
: App:=S[I];
'b' : App:=#8;
't' : App:=#9;
'n' : App:=#10;
'f' : App:=#12;
'r' : App:=#13;
'u' : begin
W:=Copy(S,I+1,4);
Inc(I,4);
Result:=Result+TJSONStringType(WideChar(StrToInt('$'+W)));
u2:=StrToInt('$'+W);
if (U1<>0) then
begin
App:=UTF8Encode(WideChar(U1)+WideChar(U2));
U2:=0;
end
else
U1:=U2;
end;
end;
if App<>'' then
begin
MaybeAppendUnicode;
Result:=Result+App;
end;
end;
J:=I+1;
end;
end
else
MaybeAppendUnicode;
Inc(I);
end;
MaybeAppendUnicode;
Result:=Result+Copy(S,J,I-J+1);
end;

View File

@ -218,11 +218,31 @@ var
TokenStart: PChar;
it : TJSONToken;
I : Integer;
OldLength, SectionLength, tstart,tcol, u: Integer;
OldLength, SectionLength, tstart,tcol, u1,u2: Integer;
C , c2: char;
S : String;
IsStar,EOC: Boolean;
Procedure MaybeAppendUnicode;
Var
u : String;
begin
// if there is a leftover \u, append
if (u1<>0) then
begin
if (joUTF8 in Options) or (DefaultSystemCodePage=CP_UTF8) then
U:=Utf8Encode(WideString(WideChar(u1))) // ToDo: use faster function
else
U:=String(WideChar(u1)); // WideChar converts the encoding. Should it warn on loss?
FCurTokenString:=FCurTokenString+U;
OldLength:=Length(FCurTokenString);
u1:=0;
end;
end;
begin
if FTokenStr = nil then
if not FetchLine then
@ -262,6 +282,7 @@ begin
TokenStart := FTokenStr;
OldLength := 0;
FCurTokenString := '';
u1:=0;
while not (FTokenStr[0] in [#0,C]) do
begin
if (FTokenStr[0]='\') then
@ -282,43 +303,64 @@ begin
'/' : S:='/';
'u' : begin
S:='0000';
u:=0;
u2:=0;
For I:=1 to 4 do
begin
Inc(FTokenStr);
c2:=FTokenStr^;
Case c2 of
'0'..'9': u:=u*16+ord(c2)-ord('0');
'A'..'F': u:=u*16+ord(c2)-ord('A')+10;
'a'..'f': u:=u*16+ord(c2)-ord('a')+10;
'0'..'9': u2:=u2*16+ord(c2)-ord('0');
'A'..'F': u2:=u2*16+ord(c2)-ord('A')+10;
'a'..'f': u2:=u2*16+ord(c2)-ord('a')+10;
else
Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]);
end;
end;
// ToDo: 4-bytes UTF16
if (joUTF8 in Options) or (DefaultSystemCodePage=CP_UTF8) then
S:=Utf8Encode(WideString(WideChar(u))) // ToDo: use faster function
if u1<>0 then
begin
if (joUTF8 in Options) or (DefaultSystemCodePage=CP_UTF8) then
S:=Utf8Encode(WideString(WideChar(u1)+WideChar(u2))) // ToDo: use faster function
else
S:=String(WideChar(u1)+WideChar(u2)); // WideChar converts the encoding. Should it warn on loss?
u1:=0;
end
else
S:=String(WideChar(u)); // WideChar converts the encoding. Should it warn on loss?
begin
S:='';
u1:=u2;
end
end;
#0 : Error(SErrOpenString);
else
Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]);
end;
SetLength(FCurTokenString, OldLength + SectionLength+1+Length(S));
if SectionLength > 0 then
Move(TokenStart^, FCurTokenString[OldLength + 1], SectionLength);
Move(S[1],FCurTokenString[OldLength + SectionLength+1],Length(S));
Inc(OldLength, SectionLength+Length(S));
I:=Length(S);
if (SectionLength+I>0) then
begin
// If length=1, we know it was not \uXX, but u1 can be nonzero, and we must first append it.
// example: \u00f8\"
if I=1 then
MaybeAppendUnicode;
SetLength(FCurTokenString, OldLength + SectionLength+Length(S));
if SectionLength > 0 then
Move(TokenStart^, FCurTokenString[OldLength + 1], SectionLength);
if I>0 then
Move(S[1],FCurTokenString[OldLength + SectionLength+1],i);
Inc(OldLength, SectionLength+Length(S));
end;
// Next char
TokenStart := FTokenStr+1;
end;
end
else
MaybeAppendUnicode;
if FTokenStr[0] = #0 then
Error(SErrOpenString);
Inc(FTokenStr);
end;
if FTokenStr[0] = #0 then
Error(SErrOpenString);
MaybeAppendUnicode;
SectionLength := FTokenStr - TokenStart;
SetLength(FCurTokenString, OldLength + SectionLength);
if SectionLength > 0 then

View File

@ -15,20 +15,17 @@
</BuildModes>
<PublishOptions>
<Version Value="2"/>
<IgnoreBinaries Value="False"/>
<IncludeFileFilter Value="*.(pas|pp|inc|lfm|lpr|lrs|lpi|lpk|sh|xml)"/>
<ExcludeFileFilter Value="*.(bak|ppu|ppw|o|so);*~;backup"/>
</PublishOptions>
<RunParams>
<local>
<CommandLineParams Value="--suite=TTestParser.TestArray"/>
<CommandLineParams Value="--suite=TTestJSONString.TestJSONStringToString"/>
<LaunchingApplication PathPlusParams="/usr/X11R6/bin/xterm -T 'Lazarus Run Output' -e $(LazarusDir)/tools/runwait.sh $(TargetCmdLine)"/>
</local>
<FormatVersion Value="2"/>
<Modes Count="1">
<Mode0 Name="default">
<local>
<CommandLineParams Value="--suite=TTestParser.TestArray"/>
<CommandLineParams Value="--suite=TTestJSONString.TestJSONStringToString"/>
<LaunchingApplication PathPlusParams="/usr/X11R6/bin/xterm -T 'Lazarus Run Output' -e $(LazarusDir)/tools/runwait.sh $(TargetCmdLine)"/>
</local>
</Mode0>

View File

@ -17,6 +17,9 @@
program testjson;
uses
{$ifdef unix}
cwstring,
{$endif}
Classes, testjsondata, testjsonparser, testjsonrtti, consoletestrunner, testjsonreader;
type

View File

@ -3993,6 +3993,11 @@ begin
end;
procedure TTestJSONString.TestJSONStringToString;
Const
// Glowing star in UTF8
GlowingStar = #$F0#$9F#$8C#$9F;
begin
TestFrom('','');
TestFrom('A','A');
@ -4029,6 +4034,9 @@ begin
TestFrom('\n\n',#10#10);
TestFrom('\f\f',#12#12);
TestFrom('\r\r',#13#13);
TestFrom('\u00f8','ø'); // this is ø
TestFrom('\u00f8\"','ø"'); // this is ø"
TestFrom('\ud83c\udf1f',GlowingStar);
end;
procedure TTestJSONString.TestStringToJSONString;

View File

@ -37,6 +37,7 @@ type
procedure DoTestFloat(F: TJSONFloat); overload;
procedure DoTestFloat(F: TJSONFloat; S: String); overload;
procedure DoTestObject(S: String; const ElNames: array of String; DoJSONTest : Boolean = True);
procedure DoTestString(S : String; AResult : String);
procedure DoTestString(S : String);
procedure DoTestArray(S: String; ACount: Integer; IgnoreJSON: Boolean=False);
Procedure DoTestClass(S : String; AClass : TJSONDataClass);
@ -79,7 +80,7 @@ Var
J : TJSONData;
begin
P:=TJSONParser.Create('');
P:=TJSONParser.Create('',[joUTF8]);
Try
J:=P.Parse;
If (J<>Nil) then
@ -97,7 +98,7 @@ Var
J : TJSONData;
begin
P:=TJSONParser.Create('1');
P:=TJSONParser.Create('1',[joUTF8]);
Try
J:=P.Parse;
If (J=Nil) then
@ -117,7 +118,7 @@ Var
J : TJSONData;
begin
P:=TJSONParser.Create('123456789012345');
P:=TJSONParser.Create('123456789012345',[joUTF8]);
Try
J:=P.Parse;
If (J=Nil) then
@ -137,7 +138,7 @@ Var
J : TJSONData;
begin
P:=TJSONParser.Create('null');
P:=TJSONParser.Create('null',[joUTF8]);
Try
J:=P.Parse;
If (J=Nil) then
@ -156,7 +157,7 @@ Var
J : TJSONData;
begin
P:=TJSONParser.Create('true');
P:=TJSONParser.Create('true',[joUTF8]);
Try
J:=P.Parse;
If (J=Nil) then
@ -176,7 +177,7 @@ Var
J : TJSONData;
begin
P:=TJSONParser.Create('false');
P:=TJSONParser.Create('false',[joUTF8]);
Try
J:=P.Parse;
If (J=Nil) then
@ -206,10 +207,18 @@ end;
procedure TTestParser.TestString;
Const
// Glowing star in UTF8
GlowingStar = #$F0#$9F#$8C#$9F;
begin
DoTestString('A string');
DoTestString('');
DoTestString('\"');
DoTestString('\u00f8','ø'); // this is ø
DoTestString('\u00f8\"','ø"'); // this is ø"
// Writeln(GlowingStar);
DoTestString('\ud83c\udf1f',GlowingStar);
end;
@ -348,7 +357,7 @@ Var
begin
J:=Nil;
P:=TJSONParser.Create(S);
P:=TJSONParser.Create(S,[joUTF8]);
Try
P.Options:=FOptions;
J:=P.Parse;
@ -400,7 +409,7 @@ Var
D : TJSONData;
begin
P:=TJSONParser.Create(S);
P:=TJSONParser.Create(S,[joUTF8]);
try
D:=P.Parse;
try
@ -536,7 +545,7 @@ Var
begin
ParseOK:=False;
P:=TJSONParser.Create(S);
P:=TJSONParser.Create(S,[joUTF8]);
P.OPtions:=Options;
J:=Nil;
Try
@ -561,24 +570,30 @@ end;
procedure TTestParser.DoTestString(S: String);
begin
DoTestString(S,JSONStringToString(S));
end;
procedure TTestParser.DoTestString(S: String; AResult : String);
Var
P : TJSONParser;
J : TJSONData;
begin
P:=TJSONParser.Create('"'+S+'"');
P:=TJSONParser.Create('"'+S+'"',[joUTF8]);
Try
J:=P.Parse;
If (J=Nil) then
Fail('Parse of string "'+S+'" fails');
TestJSONType(J,jtString);
TestAsString(J,JSONStringToString(S));
TestJSON(J,'"'+S+'"');
TestAsString(J,aResult);
if Pos('\u',S)=0 then
TestJSON(J,'"'+S+'"');
Finally
FreeAndNil(J);
FreeAndNil(P);
end;
end;
procedure TTestParser.DoTestFloat(F : TJSONFloat);
@ -598,7 +613,7 @@ Var
J : TJSONData;
begin
P:=TJSONParser.Create(S);
P:=TJSONParser.Create(S,[joUTF8]);
Try
J:=P.Parse;
If (J=Nil) then