* Fix bug ID #38008: allow UTF8 to unicode conversion to react on/ignore invalid input

git-svn-id: trunk@47391 -
This commit is contained in:
michael 2020-11-12 09:17:09 +00:00
parent d9784412a4
commit 257ef24a1e
2 changed files with 26 additions and 66 deletions

View File

@ -134,7 +134,8 @@ var
function UnicodeToUtf8(Dest: PChar; Source: PUnicodeChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PUnicodeChar; SourceChars: SizeUInt): SizeUInt;
function Utf8ToUnicode(Dest: PUnicodeChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
function Utf8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
function Utf8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;{$ifdef SYSTEMINLINE}inline;{$endif}
function Utf8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt; IgnoreInvalid : Boolean): SizeUInt;
function UTF8Encode(const s : RawByteString) : RawByteString; inline;
function UTF8Encode(const s : UnicodeString) : RawByteString;
function UTF8Decode(const s : RawByteString): UnicodeString;

View File

@ -1792,13 +1792,20 @@ end;
function Utf8ToUnicode(Dest: PUnicodeChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
begin
if assigned(Source) then
Result:=Utf8ToUnicode(Dest,MaxChars,Source,length(Source))
Result:=Utf8ToUnicode(Dest,MaxChars,Source,length(Source),True)
else
Result:=0;
end;
function UTF8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
function UTF8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;{$ifdef SYSTEMINLINE}inline;{$endif}
begin
Result:=Utf8ToUnicode(Dest,MaxDestChars,Source,SourceBytes,True);
end;
function Utf8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt; IgnoreInvalid : Boolean): SizeUInt;
{$ifdef EXCLUDE_COMPLEX_PROCS}
begin
runerror(217);
@ -1832,44 +1839,12 @@ end;
IBYTE:=byte(Source[InputUTF8]);
if (IBYTE and $80) = 0 then
begin
//One character US-ASCII, convert it to unicode
(*
if IBYTE = 10 then
begin
If (PreChar<>13) and FALSE then
begin
//Expand to crlf, conform UTF-8.
//This procedure will break the memory alocation by
//FPC for the widestring, so never use it. Condition never true due the "and FALSE".
if OutputUnicode+1<MaxDestChars then
begin
Dest[OutputUnicode]:=WideChar(13);
inc(OutputUnicode);
Dest[OutputUnicode]:=WideChar(10);
inc(OutputUnicode);
PreChar:=10;
end
else
begin
Dest[OutputUnicode]:=WideChar(13);
inc(OutputUnicode);
end;
end
else
begin
Dest[OutputUnicode]:=WideChar(IBYTE);
inc(OutputUnicode);
PreChar:=IBYTE;
end;
end
else
*)
begin
Dest[OutputUnicode]:=WideChar(IBYTE);
inc(OutputUnicode);
PreChar:=IBYTE;
end;
inc(InputUTF8);
// One character US-ASCII, convert it to unicode
// Commented code to convert LF to CRLF has been removed
Dest[OutputUnicode]:=WideChar(IBYTE);
inc(OutputUnicode);
PreChar:=IBYTE;
inc(InputUTF8);
end
else
begin
@ -1961,6 +1936,8 @@ end;
end;
if CharLen > 0 then
begin
if (UC=UNICODE_INVALID) and Not IgnoreInvalid then
HandleError(231); // Will be converted to EConversionError in sysutils
PreChar:=UC;
Dest[OutputUnicode]:=WideChar(UC);
inc(OutputUnicode);
@ -1977,31 +1954,11 @@ end;
IBYTE:=byte(Source[InputUTF8]);
if (IBYTE and $80) = 0 then
begin
//One character US-ASCII, convert it to unicode
(*
if IBYTE = 10 then
begin
if (PreChar<>13) and FALSE then
begin
//Expand to crlf, conform UTF-8.
//This procedure will break the memory alocation by
//FPC for the widestring, so never use it. Condition never true due the "and FALSE".
inc(OutputUnicode,2);
PreChar:=10;
end
else
begin
inc(OutputUnicode);
PreChar:=IBYTE;
end;
end
else
*)
begin
inc(OutputUnicode);
PreChar:=IBYTE;
end;
inc(InputUTF8);
// One character US-ASCII, convert it to unicode
// Commented code to convert LF to CRLF has been removed
inc(OutputUnicode);
PreChar:=IBYTE;
inc(InputUTF8);
end
else
begin
@ -2077,6 +2034,8 @@ end;
end;
if CharLen > 0 then
begin
if (UC=UNICODE_INVALID) and Not IgnoreInvalid then
HandleError(231); // Will be converted to EConversionError in sysutils
PreChar:=UC;
inc(OutputUnicode);
end;