From 8e168bc2f5dde2bac520b378c9a59795b4d4e78a Mon Sep 17 00:00:00 2001 From: Bart <9132501-flyingsheep@users.noreply.gitlab.com> Date: Sun, 13 Jul 2025 14:46:29 +0200 Subject: [PATCH] LazUtf8: implement custom escape strings for Utf8EscapeStrings. This also allows to remove unwanted control characters from the output. Remove unneeded exception (if a user typecasts something else into the EscapeMode param, (s)he's on is own). MaxGrowFactors to Cardinal (disallow them to be negative) --- components/lazutils/lazutf8.pas | 42 ++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/components/lazutils/lazutf8.pas b/components/lazutils/lazutf8.pas index 7c591e3b78..20a02b544a 100644 --- a/components/lazutils/lazutf8.pas +++ b/components/lazutils/lazutf8.pas @@ -168,10 +168,15 @@ type emC, // C-style character encoding: "\n" (if present, otherwise hex) emHexC, // C-style character encoding: "\0x0D" emAsciiControlNames, // abbreviation of ASCII control characters in square brackets: "[CR]" - emPercent // percent encoding (control characters only): "%0D" + emPercent, // percent encoding (control characters only): "%0D" + emCustom // user defined scheme ); + TControlChars = #0..#31; + TEscapedStrings = array[TControlChars] of string; + PEscapedStrings = ^TEscapedStrings; -function Utf8EscapeControlChars(S: String; EscapeMode: TEscapeMode = emPascal): String; + +function Utf8EscapeControlChars(S: String; EscapeMode: TEscapeMode = emPascal; CustomStrings: PEscapedStrings = nil; AMaxGrowFactor: Cardinal = 0): String; type TUTF8TrimFlag = ( @@ -3022,13 +3027,14 @@ end; human readable format. Mainly used for logging purposes. Parameters: - S : Input string. Must be UTF8 encoded. - EscapeMode: controls the human readable format for escape characters. + S : Input string. Must be UTF8 encoded. + EscapeMode : controls the human readable format for escape characters. + CustomStrings : only applicable if EscapeMode=emCustom: pointer to a fixed array of strings + AMaxGrowFactor: max growfactor for EscapeMode=emCustom. + If non-zero, no error checking is done on it's value + If set to zero, it will be calculated on the fly (which is time consuming) } -function Utf8EscapeControlChars(S: String; EscapeMode: TEscapeMode = emPascal): String; -type - TEscapedStrings = array[#0..#31] of string; - PEscapedStrings = ^TEscapedStrings; +function Utf8EscapeControlChars(S: String; EscapeMode: TEscapeMode; CustomStrings: PEscapedStrings; AMaxGrowFactor: Cardinal): String; const // lookup tables are almost 2x faster than using IntToStr or IntToHex EscapedStringsOfPascal: TEscapedStrings = ( @@ -3067,7 +3073,7 @@ var i, ResLen: Integer; SLen, SubLen: SizeInt; const - MaxGrowFactor: array[TEscapeMode] of integer = (3, 4, 5, 5, 5, 3); + MaxGrowFactor: array[TEscapeMode] of Cardinal = (3, 4, 5, 5, 5, 3, 0); begin Result := ''; case EscapeMode of @@ -3077,8 +3083,22 @@ begin emHexC : EscapedStrings := @EscapedStringsOfCHex; emAsciiControlNames: EscapedStrings := @EscapedStringsOfAscii; emPercent : EscapedStrings := @EscapedStringsOfPercent; - else - raise Exception.Create('Invalid EscapeMode is specified'); + emCustom : begin + if not Assigned(EscapedStrings) then + Raise EInvalidOp.Create('Utf8EscapeControlChars: CustomStrings cannot be nil when EscapeMode=emCustom.'); + EscapedStrings := CustomStrings; + MaxGrowFactor[emCustom] := AMaxGrowFactor; + if (MaxGrowFactor[emCustom] = 0) then + begin + MaxGrowFactor[emCustom] := 1; + for Ch := #0 to #31 do + begin + SLen := Length(EscapedStrings^[Ch]); + if (SLen > MaxGrowFactor[emCustom]) then + MaxGrowFactor[emCustom] := SLen; + end; + end; + end; end; if FindInvalidUTF8Codepoint(PChar(S), Length(S)) >= 0 then