mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-08-08 08:16:18 +02:00
LazUtf8: faster implementation Utf8EscapeControlChars based upon idea by Alexey Torgashin. Issue #39573.
This commit is contained in:
parent
a80c5252e0
commit
242f0ac056
@ -2937,31 +2937,62 @@ const
|
|||||||
'[CAN]', '[EM]' , '[SUB]', '[ESC]', '[FS]' , '[GS]' , '[RS]' , '[US]');
|
'[CAN]', '[EM]' , '[SUB]', '[ESC]', '[FS]' , '[GS]' , '[RS]' , '[US]');
|
||||||
var
|
var
|
||||||
Ch: Char;
|
Ch: Char;
|
||||||
i: Integer;
|
i,ResLen: Integer;
|
||||||
|
SubLen: SizeInt;
|
||||||
|
const
|
||||||
|
MaxGrowFactor: array[TEscapeMode] of integer = (3, 4, 5, 5, 5);
|
||||||
begin
|
begin
|
||||||
if FindInvalidUTF8Codepoint(PChar(S), Length(S)) <> -1 then
|
if FindInvalidUTF8Codepoint(PChar(S), Length(S)) <> -1 then
|
||||||
begin
|
begin
|
||||||
UTF8FixBroken(S);
|
UTF8FixBroken(S);
|
||||||
end;
|
end;
|
||||||
Result := '';
|
Result := '';
|
||||||
|
SetLength(Result, Length(S)*MaxGrowFactor[EscapeMode]);
|
||||||
|
ResLen := 0;
|
||||||
//a byte < 127 cannot be part of a multi-byte codepoint, so this is safe
|
//a byte < 127 cannot be part of a multi-byte codepoint, so this is safe
|
||||||
for i := 1 to Length(S) do
|
for i := 1 to Length(S) do
|
||||||
begin
|
begin
|
||||||
|
Inc(ResLen);
|
||||||
Ch := S[i];
|
Ch := S[i];
|
||||||
if (Ch < #32) then
|
if (Ch < #32) then
|
||||||
begin
|
begin
|
||||||
case EscapeMode of
|
case EscapeMode of
|
||||||
emPascal: Result := Result + PascalEscapeStrings[Ch];
|
emPascal:
|
||||||
emHexPascal: Result := Result + HexEscapePascalStrings[Ch];
|
begin
|
||||||
emHexC: Result := Result + HexEscapeCStrings[Ch];
|
Move(PascalEscapeStrings[Ch][1], Result[ResLen], 3);
|
||||||
emC: Result := Result + CEscapeStrings[Ch];
|
Inc(ResLen, 3-1);
|
||||||
emAsciiControlNames: Result := Result + AsciiControlStrings[Ch];
|
end;
|
||||||
|
emHexPascal:
|
||||||
|
begin
|
||||||
|
Move(HexEscapePascalStrings[Ch][1], Result[ResLen], 4);
|
||||||
|
Inc(ResLen, 4-1);
|
||||||
|
end;
|
||||||
|
emHexC:
|
||||||
|
begin
|
||||||
|
Move(HexEscapeCStrings[Ch][1], Result[ResLen], 5);
|
||||||
|
Inc(ResLen, 5-1);
|
||||||
|
end;
|
||||||
|
emC:
|
||||||
|
begin
|
||||||
|
SubLen := Length(CEscapeStrings[Ch]);
|
||||||
|
Move(CEscapeStrings[Ch][1], Result[ResLen], SubLen);
|
||||||
|
Inc(ResLen, SubLen-1);
|
||||||
|
end;
|
||||||
|
emAsciiControlNames:
|
||||||
|
begin
|
||||||
|
SubLen := Length(AsciiControlStrings[Ch]);
|
||||||
|
Move(AsciiControlStrings[Ch][1], Result[ResLen], SubLen);
|
||||||
|
Inc(ResLen, SubLen-1);
|
||||||
|
end;
|
||||||
end;//case
|
end;//case
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
Result := Result + Ch;
|
begin
|
||||||
|
Result[ResLen] := Ch;
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
SetLength(Result, ResLen);
|
||||||
|
end;
|
||||||
|
|
||||||
function UTF8StringOfChar(AUtf8Char: String; N: Integer): String;
|
function UTF8StringOfChar(AUtf8Char: String; N: Integer): String;
|
||||||
var
|
var
|
||||||
|
Loading…
Reference in New Issue
Block a user