* Fix bug ID #38624

git-svn-id: trunk@48980 -
This commit is contained in:
michael 2021-03-15 15:13:08 +00:00
parent ffa69353f3
commit 5e913147ab
4 changed files with 39 additions and 10 deletions

View File

@ -1013,12 +1013,22 @@ begin
Inc(I,4);
if (U1<>0) then
begin
App:={$IFDEF FPC_HAS_CPSTRING}UTF8Encode({$ENDIF}WideChar(U1)+WideChar(U2){$IFDEF FPC_HAS_CPSTRING}){$ENDIF};
U1:=0;
U2:=0;
end
if ((U1>=$D800) and (U1<=$DBFF)) and
((U2>=$DC00) and (U2<=$DFFF)) then
begin
App:={$IFDEF FPC_HAS_CPSTRING}UTF8Encode({$ENDIF}WideChar(U1)+WideChar(U2){$IFDEF FPC_HAS_CPSTRING}){$ENDIF};
U2:=0;
end
else
begin
App:={$IFDEF FPC_HAS_CPSTRING}UTF8Encode({$ENDIF}WideChar(U1){$IFDEF FPC_HAS_CPSTRING}){$ENDIF};
Result:=Result+App;
App:='';
end;
end
else
U1:=U2;
App:='';
U1:=U2;
end;
end;
if App<>'' then

View File

@ -354,9 +354,11 @@ begin
Error(SErrInvalidCharacter, [CurRow,CurColumn,FTokenStr[0]]);
end;
end;
// ToDo: 4-bytes UTF16
if u1<>0 then
begin
// 4bytes, compose.
if not ((u2>=$DC00) and (u2<=$DFFF)) then
Error(SErrInvalidCharacter, [CurRow,CurColumn,IntToStr(u2)]);
if (joUTF8 in Options) or (DefaultSystemCodePage=CP_UTF8) then
S:=Utf8Encode(WideString(WideChar(u1)+WideChar(u2))) // ToDo: use faster function
else
@ -365,9 +367,23 @@ begin
end
else
begin
S:='';
u1:=u2;
end
// Surrogate start
if (u2>=$D800) and (U2<=$DBFF) then
begin
u1:=u2;
S:='';
end
else
begin
if (joUTF8 in Options) or (DefaultSystemCodePage=CP_UTF8) then
S:=Utf8Encode(WideString(WideChar(u2))) // ToDo: use faster function
else
S:=String(WideChar(u1))+String(WideChar(u2)); // WideChar converts the encoding. Should it warn on loss?
U1:=0;
U2:=0;
end;
end;
Writeln(' U2 : ',U2,' : >',S,'<');
end;
#0 : Error(SErrOpenString,[FCurRow]);
else

View File

@ -4038,6 +4038,7 @@ Const
// Glowing star in UTF8
GlowingStar = #$F0#$9F#$8C#$9F;
Chinese = #$95e8#$88ab#$8111#$5b50#$6324#$574f#$4e86;
Chinese4b = #$95e8#$d867#$de3d#$88ab#$8111#$5b50#$6324#$574f#$4e86;
begin
TestFrom('','');
@ -4082,6 +4083,7 @@ begin
TestFrom('\u0041\u0042\u0043','ABC');
TestFrom('\u0041\u0042\u0043\u0044','ABCD');
TestFrom('\u95e8\u88ab\u8111\u5b50\u6324\u574f\u4e86',Utf8Encode(Chinese));
TestFrom('\u95e8\ud867\ude3d\u88ab\u8111\u5b50\u6324\u574f\u4e86',Utf8Encode(Chinese4b));
end;
procedure TTestJSONString.TestStringToJSONString;

View File

@ -317,6 +317,7 @@ procedure TBaseTestReader.TestString;
const
GlowingStar = #$F0#$9F#$8C#$9F;
Chinese = #$95e8#$88ab#$8111#$5b50#$6324#$574f#$4e86;
Chinese4b = #$95e8#$d867#$de3d#$88ab#$8111#$5b50#$6324#$574f#$4e86;
begin
DoTestString('A string');
@ -329,7 +330,7 @@ begin
DoTestString('\u0041\u0042\u0043','ABC');
DoTestString('\u0041\u0042\u0043\u0044','ABCD');
DoTestString('\u95e8\u88ab\u8111\u5b50\u6324\u574f\u4e86',Utf8Encode(Chinese));
DoTestString('\u95e8\ud867\ude3d\u88ab\u8111\u5b50\u6324\u574f\u4e86',Utf8Encode(Chinese4b));
end;