* Fix bug ID #38624

git-svn-id: trunk@48980 -
(cherry picked from commit 5e913147ab)
This commit is contained in:
michael 2021-03-15 15:13:08 +00:00 committed by Florian Klämpfl
parent 32023bf708
commit 8c338e1174
4 changed files with 39 additions and 10 deletions

View File

@ -1013,12 +1013,22 @@ begin
Inc(I,4); Inc(I,4);
if (U1<>0) then if (U1<>0) then
begin begin
App:={$IFDEF FPC_HAS_CPSTRING}UTF8Encode({$ENDIF}WideChar(U1)+WideChar(U2){$IFDEF FPC_HAS_CPSTRING}){$ENDIF}; if ((U1>=$D800) and (U1<=$DBFF)) and
U1:=0; ((U2>=$DC00) and (U2<=$DFFF)) then
U2:=0; begin
end App:={$IFDEF FPC_HAS_CPSTRING}UTF8Encode({$ENDIF}WideChar(U1)+WideChar(U2){$IFDEF FPC_HAS_CPSTRING}){$ENDIF};
U2:=0;
end
else
begin
App:={$IFDEF FPC_HAS_CPSTRING}UTF8Encode({$ENDIF}WideChar(U1){$IFDEF FPC_HAS_CPSTRING}){$ENDIF};
Result:=Result+App;
App:='';
end;
end
else else
U1:=U2; App:='';
U1:=U2;
end; end;
end; end;
if App<>'' then if App<>'' then

View File

@ -354,9 +354,11 @@ begin
Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]); Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]);
end; end;
end; end;
// ToDo: 4-bytes UTF16
if u1<>0 then if u1<>0 then
begin begin
// 4bytes, compose.
if not ((u2>=$DC00) and (u2<=$DFFF)) then
Error(SErrInvalidCharacter, [CurRow,CurColumn,IntToStr(u2)]);
if (joUTF8 in Options) or (DefaultSystemCodePage=CP_UTF8) then if (joUTF8 in Options) or (DefaultSystemCodePage=CP_UTF8) then
S:=Utf8Encode(WideString(WideChar(u1)+WideChar(u2))) // ToDo: use faster function S:=Utf8Encode(WideString(WideChar(u1)+WideChar(u2))) // ToDo: use faster function
else else
@ -365,9 +367,23 @@ begin
end end
else else
begin begin
S:=''; // Surrogate start
u1:=u2; if (u2>=$D800) and (U2<=$DBFF) then
end begin
u1:=u2;
S:='';
end
else
begin
if (joUTF8 in Options) or (DefaultSystemCodePage=CP_UTF8) then
S:=Utf8Encode(WideString(WideChar(u2))) // ToDo: use faster function
else
S:=String(WideChar(u1))+String(WideChar(u2)); // WideChar converts the encoding. Should it warn on loss?
U1:=0;
U2:=0;
end;
end;
Writeln(' U2 : ',U2,' : >',S,'<');
end; end;
#0 : Error(SErrOpenString,[FCurRow]); #0 : Error(SErrOpenString,[FCurRow]);
else else

View File

@ -4038,6 +4038,7 @@ Const
// Glowing star in UTF8 // Glowing star in UTF8
GlowingStar = #$F0#$9F#$8C#$9F; GlowingStar = #$F0#$9F#$8C#$9F;
Chinese = #$95e8#$88ab#$8111#$5b50#$6324#$574f#$4e86; Chinese = #$95e8#$88ab#$8111#$5b50#$6324#$574f#$4e86;
Chinese4b = #$95e8#$d867#$de3d#$88ab#$8111#$5b50#$6324#$574f#$4e86;
begin begin
TestFrom('',''); TestFrom('','');
@ -4082,6 +4083,7 @@ begin
TestFrom('\u0041\u0042\u0043','ABC'); TestFrom('\u0041\u0042\u0043','ABC');
TestFrom('\u0041\u0042\u0043\u0044','ABCD'); TestFrom('\u0041\u0042\u0043\u0044','ABCD');
TestFrom('\u95e8\u88ab\u8111\u5b50\u6324\u574f\u4e86',Utf8Encode(Chinese)); TestFrom('\u95e8\u88ab\u8111\u5b50\u6324\u574f\u4e86',Utf8Encode(Chinese));
TestFrom('\u95e8\ud867\ude3d\u88ab\u8111\u5b50\u6324\u574f\u4e86',Utf8Encode(Chinese4b));
end; end;
procedure TTestJSONString.TestStringToJSONString; procedure TTestJSONString.TestStringToJSONString;

View File

@ -317,6 +317,7 @@ procedure TBaseTestReader.TestString;
const const
GlowingStar = #$F0#$9F#$8C#$9F; GlowingStar = #$F0#$9F#$8C#$9F;
Chinese = #$95e8#$88ab#$8111#$5b50#$6324#$574f#$4e86; Chinese = #$95e8#$88ab#$8111#$5b50#$6324#$574f#$4e86;
Chinese4b = #$95e8#$d867#$de3d#$88ab#$8111#$5b50#$6324#$574f#$4e86;
begin begin
DoTestString('A string'); DoTestString('A string');
@ -329,7 +330,7 @@ begin
DoTestString('\u0041\u0042\u0043','ABC'); DoTestString('\u0041\u0042\u0043','ABC');
DoTestString('\u0041\u0042\u0043\u0044','ABCD'); DoTestString('\u0041\u0042\u0043\u0044','ABCD');
DoTestString('\u95e8\u88ab\u8111\u5b50\u6324\u574f\u4e86',Utf8Encode(Chinese)); DoTestString('\u95e8\u88ab\u8111\u5b50\u6324\u574f\u4e86',Utf8Encode(Chinese));
DoTestString('\u95e8\ud867\ude3d\u88ab\u8111\u5b50\u6324\u574f\u4e86',Utf8Encode(Chinese4b));
end; end;