From 8e0442fb1f7a2d84fcb1d87aa18e1ee4575ca71e Mon Sep 17 00:00:00 2001 From: michael Date: Sat, 27 Oct 2018 17:16:13 +0000 Subject: [PATCH] * Fix 4-byte unicode characters git-svn-id: trunk@40058 - --- packages/fcl-json/src/fpjson.pp | 51 +++++++++++++---- packages/fcl-json/src/jsonscanner.pp | 70 ++++++++++++++++++----- packages/fcl-json/tests/testjson.lpi | 7 +-- packages/fcl-json/tests/testjson.pp | 3 + packages/fcl-json/tests/testjsondata.pp | 8 +++ packages/fcl-json/tests/testjsonparser.pp | 43 +++++++++----- 6 files changed, 139 insertions(+), 43 deletions(-) diff --git a/packages/fcl-json/src/fpjson.pp b/packages/fcl-json/src/fpjson.pp index 491187b11b..9b34d9ce96 100644 --- a/packages/fcl-json/src/fpjson.pp +++ b/packages/fcl-json/src/fpjson.pp @@ -875,14 +875,29 @@ end; function JSONStringToString(const S: TJSONStringType): TJSONStringType; Var - I,J,L : Integer; - w : String; + I,J,L,U1,U2 : Integer; + App,W : String; + + Procedure MaybeAppendUnicode; + + Var + U : String; + + begin + if (U1<>0) then + begin + U:=UTF8Encode(WideChar(U1)); + Result:=Result+U; + U1:=0; + end; + end; begin I:=1; J:=1; L:=Length(S); Result:=''; + U1:=0; While (I<=L) do begin if (S[I]='\') then @@ -891,25 +906,41 @@ begin If I0) then + begin + App:=UTF8Encode(WideChar(U1)+WideChar(U2)); + U2:=0; + end + else + U1:=U2; end; end; + if App<>'' then + begin + MaybeAppendUnicode; + Result:=Result+App; + end; end; J:=I+1; - end; + end + else + MaybeAppendUnicode; Inc(I); end; + MaybeAppendUnicode; Result:=Result+Copy(S,J,I-J+1); end; diff --git a/packages/fcl-json/src/jsonscanner.pp b/packages/fcl-json/src/jsonscanner.pp index 226e797223..eedf88a664 100644 --- a/packages/fcl-json/src/jsonscanner.pp +++ b/packages/fcl-json/src/jsonscanner.pp @@ -218,11 +218,31 @@ var TokenStart: PChar; it : TJSONToken; I : Integer; - OldLength, SectionLength, tstart,tcol, u: Integer; + OldLength, SectionLength, tstart,tcol, u1,u2: Integer; C , c2: char; S : String; IsStar,EOC: Boolean; + Procedure MaybeAppendUnicode; + + Var + u : String; + + begin + // if there is a leftover \u, append + if (u1<>0) then + begin + if (joUTF8 in Options) or (DefaultSystemCodePage=CP_UTF8) then + U:=Utf8Encode(WideString(WideChar(u1))) // ToDo: use faster function + else + U:=String(WideChar(u1)); // WideChar converts the encoding. Should it warn on loss? + FCurTokenString:=FCurTokenString+U; + OldLength:=Length(FCurTokenString); + u1:=0; + end; + end; + + begin if FTokenStr = nil then if not FetchLine then @@ -262,6 +282,7 @@ begin TokenStart := FTokenStr; OldLength := 0; FCurTokenString := ''; + u1:=0; while not (FTokenStr[0] in [#0,C]) do begin if (FTokenStr[0]='\') then @@ -282,43 +303,64 @@ begin '/' : S:='/'; 'u' : begin S:='0000'; - u:=0; + u2:=0; For I:=1 to 4 do begin Inc(FTokenStr); c2:=FTokenStr^; Case c2 of - '0'..'9': u:=u*16+ord(c2)-ord('0'); - 'A'..'F': u:=u*16+ord(c2)-ord('A')+10; - 'a'..'f': u:=u*16+ord(c2)-ord('a')+10; + '0'..'9': u2:=u2*16+ord(c2)-ord('0'); + 'A'..'F': u2:=u2*16+ord(c2)-ord('A')+10; + 'a'..'f': u2:=u2*16+ord(c2)-ord('a')+10; else Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]); end; end; // ToDo: 4-bytes UTF16 - if (joUTF8 in Options) or (DefaultSystemCodePage=CP_UTF8) then - S:=Utf8Encode(WideString(WideChar(u))) // ToDo: use faster function + if u1<>0 then + begin + if (joUTF8 in Options) or (DefaultSystemCodePage=CP_UTF8) then + S:=Utf8Encode(WideString(WideChar(u1)+WideChar(u2))) // ToDo: use faster function + else + S:=String(WideChar(u1)+WideChar(u2)); // WideChar converts the encoding. Should it warn on loss? + u1:=0; + end else - S:=String(WideChar(u)); // WideChar converts the encoding. Should it warn on loss? + begin + S:=''; + u1:=u2; + end end; #0 : Error(SErrOpenString); else Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]); end; - SetLength(FCurTokenString, OldLength + SectionLength+1+Length(S)); - if SectionLength > 0 then - Move(TokenStart^, FCurTokenString[OldLength + 1], SectionLength); - Move(S[1],FCurTokenString[OldLength + SectionLength+1],Length(S)); - Inc(OldLength, SectionLength+Length(S)); + I:=Length(S); + if (SectionLength+I>0) then + begin + // If length=1, we know it was not \uXX, but u1 can be nonzero, and we must first append it. + // example: \u00f8\" + if I=1 then + MaybeAppendUnicode; + SetLength(FCurTokenString, OldLength + SectionLength+Length(S)); + if SectionLength > 0 then + Move(TokenStart^, FCurTokenString[OldLength + 1], SectionLength); + if I>0 then + Move(S[1],FCurTokenString[OldLength + SectionLength+1],i); + Inc(OldLength, SectionLength+Length(S)); + end; // Next char TokenStart := FTokenStr+1; - end; + end + else + MaybeAppendUnicode; if FTokenStr[0] = #0 then Error(SErrOpenString); Inc(FTokenStr); end; if FTokenStr[0] = #0 then Error(SErrOpenString); + MaybeAppendUnicode; SectionLength := FTokenStr - TokenStart; SetLength(FCurTokenString, OldLength + SectionLength); if SectionLength > 0 then diff --git a/packages/fcl-json/tests/testjson.lpi b/packages/fcl-json/tests/testjson.lpi index a37fbc0927..953cfb216d 100644 --- a/packages/fcl-json/tests/testjson.lpi +++ b/packages/fcl-json/tests/testjson.lpi @@ -15,20 +15,17 @@ - - - - + - + diff --git a/packages/fcl-json/tests/testjson.pp b/packages/fcl-json/tests/testjson.pp index 57316e9a7b..4144287514 100644 --- a/packages/fcl-json/tests/testjson.pp +++ b/packages/fcl-json/tests/testjson.pp @@ -17,6 +17,9 @@ program testjson; uses + {$ifdef unix} + cwstring, + {$endif} Classes, testjsondata, testjsonparser, testjsonrtti, consoletestrunner, testjsonreader; type diff --git a/packages/fcl-json/tests/testjsondata.pp b/packages/fcl-json/tests/testjsondata.pp index df16a77ed6..5e347de354 100644 --- a/packages/fcl-json/tests/testjsondata.pp +++ b/packages/fcl-json/tests/testjsondata.pp @@ -3993,6 +3993,11 @@ begin end; procedure TTestJSONString.TestJSONStringToString; + +Const + // Glowing star in UTF8 + GlowingStar = #$F0#$9F#$8C#$9F; + begin TestFrom('',''); TestFrom('A','A'); @@ -4029,6 +4034,9 @@ begin TestFrom('\n\n',#10#10); TestFrom('\f\f',#12#12); TestFrom('\r\r',#13#13); + TestFrom('\u00f8','ø'); // this is ø + TestFrom('\u00f8\"','ø"'); // this is ø" + TestFrom('\ud83c\udf1f',GlowingStar); end; procedure TTestJSONString.TestStringToJSONString; diff --git a/packages/fcl-json/tests/testjsonparser.pp b/packages/fcl-json/tests/testjsonparser.pp index 8b0fff5e45..c3b5ed7db9 100644 --- a/packages/fcl-json/tests/testjsonparser.pp +++ b/packages/fcl-json/tests/testjsonparser.pp @@ -37,6 +37,7 @@ type procedure DoTestFloat(F: TJSONFloat); overload; procedure DoTestFloat(F: TJSONFloat; S: String); overload; procedure DoTestObject(S: String; const ElNames: array of String; DoJSONTest : Boolean = True); + procedure DoTestString(S : String; AResult : String); procedure DoTestString(S : String); procedure DoTestArray(S: String; ACount: Integer; IgnoreJSON: Boolean=False); Procedure DoTestClass(S : String; AClass : TJSONDataClass); @@ -79,7 +80,7 @@ Var J : TJSONData; begin - P:=TJSONParser.Create(''); + P:=TJSONParser.Create('',[joUTF8]); Try J:=P.Parse; If (J<>Nil) then @@ -97,7 +98,7 @@ Var J : TJSONData; begin - P:=TJSONParser.Create('1'); + P:=TJSONParser.Create('1',[joUTF8]); Try J:=P.Parse; If (J=Nil) then @@ -117,7 +118,7 @@ Var J : TJSONData; begin - P:=TJSONParser.Create('123456789012345'); + P:=TJSONParser.Create('123456789012345',[joUTF8]); Try J:=P.Parse; If (J=Nil) then @@ -137,7 +138,7 @@ Var J : TJSONData; begin - P:=TJSONParser.Create('null'); + P:=TJSONParser.Create('null',[joUTF8]); Try J:=P.Parse; If (J=Nil) then @@ -156,7 +157,7 @@ Var J : TJSONData; begin - P:=TJSONParser.Create('true'); + P:=TJSONParser.Create('true',[joUTF8]); Try J:=P.Parse; If (J=Nil) then @@ -176,7 +177,7 @@ Var J : TJSONData; begin - P:=TJSONParser.Create('false'); + P:=TJSONParser.Create('false',[joUTF8]); Try J:=P.Parse; If (J=Nil) then @@ -206,10 +207,18 @@ end; procedure TTestParser.TestString; +Const + // Glowing star in UTF8 + GlowingStar = #$F0#$9F#$8C#$9F; + begin DoTestString('A string'); DoTestString(''); DoTestString('\"'); + DoTestString('\u00f8','ø'); // this is ø + DoTestString('\u00f8\"','ø"'); // this is ø" +// Writeln(GlowingStar); + DoTestString('\ud83c\udf1f',GlowingStar); end; @@ -348,7 +357,7 @@ Var begin J:=Nil; - P:=TJSONParser.Create(S); + P:=TJSONParser.Create(S,[joUTF8]); Try P.Options:=FOptions; J:=P.Parse; @@ -400,7 +409,7 @@ Var D : TJSONData; begin - P:=TJSONParser.Create(S); + P:=TJSONParser.Create(S,[joUTF8]); try D:=P.Parse; try @@ -536,7 +545,7 @@ Var begin ParseOK:=False; - P:=TJSONParser.Create(S); + P:=TJSONParser.Create(S,[joUTF8]); P.OPtions:=Options; J:=Nil; Try @@ -561,24 +570,30 @@ end; procedure TTestParser.DoTestString(S: String); +begin + DoTestString(S,JSONStringToString(S)); +end; + +procedure TTestParser.DoTestString(S: String; AResult : String); + Var P : TJSONParser; J : TJSONData; begin - P:=TJSONParser.Create('"'+S+'"'); + P:=TJSONParser.Create('"'+S+'"',[joUTF8]); Try J:=P.Parse; If (J=Nil) then Fail('Parse of string "'+S+'" fails'); TestJSONType(J,jtString); - TestAsString(J,JSONStringToString(S)); - TestJSON(J,'"'+S+'"'); + TestAsString(J,aResult); + if Pos('\u',S)=0 then + TestJSON(J,'"'+S+'"'); Finally FreeAndNil(J); FreeAndNil(P); end; - end; procedure TTestParser.DoTestFloat(F : TJSONFloat); @@ -598,7 +613,7 @@ Var J : TJSONData; begin - P:=TJSONParser.Create(S); + P:=TJSONParser.Create(S,[joUTF8]); Try J:=P.Parse; If (J=Nil) then