From 242f0ac05629b99b96537c236f06f29621788df2 Mon Sep 17 00:00:00 2001
From: Bart <9132501-flyingsheep@users.noreply.gitlab.com>
Date: Wed, 19 Jan 2022 15:38:43 +0100
Subject: [PATCH] LazUtf8: faster implementation Utf8EscapeControlChars based
 upon idea by Alexey Torgashin. Issue #39573.

---
 components/lazutils/lazutf8.pas | 45 ++++++++++++++++++++++++++++-----
 1 file changed, 38 insertions(+), 7 deletions(-)

diff --git a/components/lazutils/lazutf8.pas b/components/lazutils/lazutf8.pas
index 3ead9ef89b..1cd1e32e98 100644
--- a/components/lazutils/lazutf8.pas
+++ b/components/lazutils/lazutf8.pas
@@ -2937,30 +2937,61 @@ const
     '[CAN]', '[EM]' , '[SUB]', '[ESC]', '[FS]' , '[GS]' , '[RS]' , '[US]');
 var
   Ch: Char;
-  i: Integer;
+  i,ResLen: Integer;
+  SubLen: SizeInt;
+const
+  MaxGrowFactor: array[TEscapeMode] of integer = (3, 4, 5, 5, 5);
 begin
   if FindInvalidUTF8Codepoint(PChar(S), Length(S)) <> -1 then
   begin
     UTF8FixBroken(S);
   end;
   Result := '';
+  SetLength(Result, Length(S)*MaxGrowFactor[EscapeMode]);
+  ResLen := 0;
   //a byte < 127 cannot be part of a multi-byte codepoint, so this is safe
   for i := 1 to Length(S) do
   begin
+    Inc(ResLen);
     Ch := S[i];
     if (Ch < #32) then
     begin
       case EscapeMode of
-        emPascal: Result := Result + PascalEscapeStrings[Ch];
-        emHexPascal: Result := Result + HexEscapePascalStrings[Ch];
-        emHexC: Result := Result + HexEscapeCStrings[Ch];
-        emC: Result := Result + CEscapeStrings[Ch];
-        emAsciiControlNames: Result := Result + AsciiControlStrings[Ch];
+        emPascal:
+        begin
+          Move(PascalEscapeStrings[Ch][1], Result[ResLen], 3);
+          Inc(ResLen, 3-1);
+        end;
+        emHexPascal:
+        begin
+          Move(HexEscapePascalStrings[Ch][1], Result[ResLen], 4);
+          Inc(ResLen, 4-1);
+        end;
+        emHexC:
+        begin
+          Move(HexEscapeCStrings[Ch][1], Result[ResLen], 5);
+          Inc(ResLen, 5-1);
+        end;
+        emC:
+        begin
+          SubLen := Length(CEscapeStrings[Ch]);
+          Move(CEscapeStrings[Ch][1], Result[ResLen], SubLen);
+          Inc(ResLen, SubLen-1);
+        end;
+        emAsciiControlNames:
+        begin
+          SubLen := Length(AsciiControlStrings[Ch]);
+          Move(AsciiControlStrings[Ch][1], Result[ResLen], SubLen);
+          Inc(ResLen, SubLen-1);
+        end;
       end;//case
     end
     else
-      Result := Result + Ch;
+    begin
+      Result[ResLen] := Ch;
+    end;
   end;
+  SetLength(Result, ResLen);
 end;
 
 function UTF8StringOfChar(AUtf8Char: String; N: Integer): String;