mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-20 20:49:49 +02:00
* Fix bug ID #38008: allow UTF8 to unicode conversion to react on/ignore invalid input
git-svn-id: trunk@47391 -
This commit is contained in:
parent
d9784412a4
commit
257ef24a1e
@ -134,7 +134,8 @@ var
|
||||
function UnicodeToUtf8(Dest: PChar; Source: PUnicodeChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
|
||||
function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PUnicodeChar; SourceChars: SizeUInt): SizeUInt;
|
||||
function Utf8ToUnicode(Dest: PUnicodeChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
|
||||
function Utf8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
|
||||
function Utf8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;{$ifdef SYSTEMINLINE}inline;{$endif}
|
||||
function Utf8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt; IgnoreInvalid : Boolean): SizeUInt;
|
||||
function UTF8Encode(const s : RawByteString) : RawByteString; inline;
|
||||
function UTF8Encode(const s : UnicodeString) : RawByteString;
|
||||
function UTF8Decode(const s : RawByteString): UnicodeString;
|
||||
|
@ -1792,13 +1792,20 @@ end;
|
||||
function Utf8ToUnicode(Dest: PUnicodeChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
|
||||
begin
|
||||
if assigned(Source) then
|
||||
Result:=Utf8ToUnicode(Dest,MaxChars,Source,length(Source))
|
||||
Result:=Utf8ToUnicode(Dest,MaxChars,Source,length(Source),True)
|
||||
else
|
||||
Result:=0;
|
||||
end;
|
||||
|
||||
|
||||
function UTF8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
|
||||
function UTF8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;{$ifdef SYSTEMINLINE}inline;{$endif}
|
||||
|
||||
begin
|
||||
Result:=Utf8ToUnicode(Dest,MaxDestChars,Source,SourceBytes,True);
|
||||
end;
|
||||
|
||||
function Utf8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt; IgnoreInvalid : Boolean): SizeUInt;
|
||||
|
||||
{$ifdef EXCLUDE_COMPLEX_PROCS}
|
||||
begin
|
||||
runerror(217);
|
||||
@ -1832,44 +1839,12 @@ end;
|
||||
IBYTE:=byte(Source[InputUTF8]);
|
||||
if (IBYTE and $80) = 0 then
|
||||
begin
|
||||
//One character US-ASCII, convert it to unicode
|
||||
(*
|
||||
if IBYTE = 10 then
|
||||
begin
|
||||
If (PreChar<>13) and FALSE then
|
||||
begin
|
||||
//Expand to crlf, conform UTF-8.
|
||||
//This procedure will break the memory alocation by
|
||||
//FPC for the widestring, so never use it. Condition never true due the "and FALSE".
|
||||
if OutputUnicode+1<MaxDestChars then
|
||||
begin
|
||||
Dest[OutputUnicode]:=WideChar(13);
|
||||
inc(OutputUnicode);
|
||||
Dest[OutputUnicode]:=WideChar(10);
|
||||
inc(OutputUnicode);
|
||||
PreChar:=10;
|
||||
end
|
||||
else
|
||||
begin
|
||||
Dest[OutputUnicode]:=WideChar(13);
|
||||
inc(OutputUnicode);
|
||||
end;
|
||||
end
|
||||
else
|
||||
begin
|
||||
Dest[OutputUnicode]:=WideChar(IBYTE);
|
||||
inc(OutputUnicode);
|
||||
PreChar:=IBYTE;
|
||||
end;
|
||||
end
|
||||
else
|
||||
*)
|
||||
begin
|
||||
Dest[OutputUnicode]:=WideChar(IBYTE);
|
||||
inc(OutputUnicode);
|
||||
PreChar:=IBYTE;
|
||||
end;
|
||||
inc(InputUTF8);
|
||||
// One character US-ASCII, convert it to unicode
|
||||
// Commented code to convert LF to CRLF has been removed
|
||||
Dest[OutputUnicode]:=WideChar(IBYTE);
|
||||
inc(OutputUnicode);
|
||||
PreChar:=IBYTE;
|
||||
inc(InputUTF8);
|
||||
end
|
||||
else
|
||||
begin
|
||||
@ -1961,6 +1936,8 @@ end;
|
||||
end;
|
||||
if CharLen > 0 then
|
||||
begin
|
||||
if (UC=UNICODE_INVALID) and Not IgnoreInvalid then
|
||||
HandleError(231); // Will be converted to EConversionError in sysutils
|
||||
PreChar:=UC;
|
||||
Dest[OutputUnicode]:=WideChar(UC);
|
||||
inc(OutputUnicode);
|
||||
@ -1977,31 +1954,11 @@ end;
|
||||
IBYTE:=byte(Source[InputUTF8]);
|
||||
if (IBYTE and $80) = 0 then
|
||||
begin
|
||||
//One character US-ASCII, convert it to unicode
|
||||
(*
|
||||
if IBYTE = 10 then
|
||||
begin
|
||||
if (PreChar<>13) and FALSE then
|
||||
begin
|
||||
//Expand to crlf, conform UTF-8.
|
||||
//This procedure will break the memory alocation by
|
||||
//FPC for the widestring, so never use it. Condition never true due the "and FALSE".
|
||||
inc(OutputUnicode,2);
|
||||
PreChar:=10;
|
||||
end
|
||||
else
|
||||
begin
|
||||
inc(OutputUnicode);
|
||||
PreChar:=IBYTE;
|
||||
end;
|
||||
end
|
||||
else
|
||||
*)
|
||||
begin
|
||||
inc(OutputUnicode);
|
||||
PreChar:=IBYTE;
|
||||
end;
|
||||
inc(InputUTF8);
|
||||
// One character US-ASCII, convert it to unicode
|
||||
// Commented code to convert LF to CRLF has been removed
|
||||
inc(OutputUnicode);
|
||||
PreChar:=IBYTE;
|
||||
inc(InputUTF8);
|
||||
end
|
||||
else
|
||||
begin
|
||||
@ -2077,6 +2034,8 @@ end;
|
||||
end;
|
||||
if CharLen > 0 then
|
||||
begin
|
||||
if (UC=UNICODE_INVALID) and Not IgnoreInvalid then
|
||||
HandleError(231); // Will be converted to EConversionError in sysutils
|
||||
PreChar:=UC;
|
||||
inc(OutputUnicode);
|
||||
end;
|
||||
|
Loading…
Reference in New Issue
Block a user