LazUtils, LCL: Initialize UnicodeTables in LazUTF16 only when needed. Deprecate copied UTF16 functions in LCLProc.

2025-04-08 07:21:34 +02:00 · 2023-07-03 15:29:56 +03:00 · 2023-07-03 15:29:56 +03:00 · dd161fc03b
commit dd161fc03b
parent 9553dd176c
5 changed files with 54 additions and 98 deletions
--- a/components/lazutils/lazutf16.pas
+++ b/components/lazutils/lazutf16.pas
@ -36,18 +36,20 @@ type
 {$ENDIF}

 function UTF16CharacterLength(p: PWideChar): integer;
-function UTF16Length(const s: widestring): PtrInt; overload;
+function UTF16Length(const s: UnicodeString): PtrInt; overload;
 function UTF16Length(p: PWideChar; WordCount: PtrInt): PtrInt; overload;
-function UTF16Copy(const s: UnicodeString; StartCharIndex, CharCount: PtrInt): Unicodestring;
+function UTF16Copy(const s: UnicodeString; StartCharIndex, CharCount: PtrInt): UnicodeString;
 function UTF16CharStart(P: PWideChar; Len, CharIndex: PtrInt): PWideChar;
 function UTF16Pos(const SearchForText, SearchInText: UnicodeString; StartPos: PtrInt = 1): PtrInt;
 function UTF16CharacterToUnicode(p: PWideChar; out CharLen: integer): Cardinal;
-function UnicodeToUTF16(u: cardinal): widestring;
+function UnicodeToUTF16(u: cardinal): UnicodeString;
 function IsUTF16CharValid(AChar, ANextChar: WideChar): Boolean;
-function IsUTF16StringValid(AWideStr: widestring): Boolean;
-function Utf16StringReplace(const S, OldPattern, NewPattern: WideString; Flags: TReplaceFlags): WideString; Inline;
-function Utf16StringReplace(const S, OldPattern, NewPattern: WideString; Flags: TReplaceFlags; out Count: Integer): WideString;
+function IsUTF16StringValid(AStr: UnicodeString): Boolean;
+function Utf16StringReplace(const S, OldPattern, NewPattern: UnicodeString; Flags: TReplaceFlags): UnicodeString; Inline;
+function Utf16StringReplace(const S, OldPattern, NewPattern: UnicodeString; Flags: TReplaceFlags; out Count: Integer): UnicodeString;

+// The following functions use UnicodeTables which are initialized
+//  only when the functions are called.
 function UnicodeLowercase(u: cardinal): cardinal;
 {$IFDEF FPC}
 function UTF8LowerCaseViaTables(const s: string): string;
@ -69,7 +71,7 @@ begin
  end;
 end;

-function UTF16Length(const s: widestring): PtrInt;
+function UTF16Length(const s: UnicodeString): PtrInt;
 begin
  Result:=UTF16Length(PWideChar(s),length(s));
 end;
@ -87,7 +89,7 @@ begin
  end;
 end;

-function UTF16Copy(const s: UnicodeString; StartCharIndex, CharCount: PtrInt): Unicodestring;
+function UTF16Copy(const s: UnicodeString; StartCharIndex, CharCount: PtrInt): UnicodeString;
 // returns substring
 var
  StartPos: PWideChar;
@ -218,15 +220,14 @@ begin
  end;
 end;

-function UnicodeToUTF16(u: cardinal): widestring;
+function UnicodeToUTF16(u: cardinal): UnicodeString;
 begin
  // u should be <= $10FFFF to fit into UTF-16
-
  if u < $10000 then
    // Note: codepoints $D800 - $DFFF are reserved
-    Result:=system.widechar(u)
+    Result:=widechar(u)
  else
-    Result:=system.widechar($D800+((u - $10000) shr 10))+system.widechar($DC00+((u - $10000) and $3ff));
+    Result:=widechar($D800+((u - $10000) shr 10))+widechar($DC00+((u - $10000) and $3ff));
 end;

 // Specification here: http://unicode.org/faq/utf_bom.html#utf16-7
@ -244,20 +245,20 @@ begin
  Result := not Result;
 end;

-function IsUTF16StringValid(AWideStr: widestring): Boolean;
+function IsUTF16StringValid(AStr: UnicodeString): Boolean;
 var
  i: Integer;
 begin
  Result := True;
-  for i := 1 to Length(AWideStr)-1 do
+  for i := 1 to Length(AStr)-1 do
  begin
-    Result := Result and IsUTF16CharValid(AWideStr[i], AWideStr[i+1]);
+    Result := Result and IsUTF16CharValid(AStr[i], AStr[i+1]);
    if not Result then Exit;
  end;
 end;

-function Utf16StringReplace(const S, OldPattern, NewPattern: WideString;
-  Flags: TReplaceFlags): WideString;
+function Utf16StringReplace(const S, OldPattern, NewPattern: UnicodeString;
+  Flags: TReplaceFlags): UnicodeString;
 var
  DummyCount: Integer;
 begin
@ -265,10 +266,10 @@ begin
 end;

 //Same as SysUtil.StringReplace but for WideStrings/UnicodeStrings, since it's not available in fpc yet
-function Utf16StringReplace(const S, OldPattern, NewPattern: WideString;
-                            Flags: TReplaceFlags; out Count: Integer): WideString;
+function Utf16StringReplace(const S, OldPattern, NewPattern: UnicodeString;
+  Flags: TReplaceFlags; out Count: Integer): UnicodeString;
 var
-  Srch, OldP, RemS: WideString; // Srch and OldP can contain WideUpperCase versions of S,OldPattern
+  Srch, OldP, RemS: UnicodeString; // Srch and OldP can contain WideUpperCase versions of S,OldPattern
  P: Integer;
 begin
  Srch:=S;
@ -317,6 +318,7 @@ var
  UnicodeLower1E00_1FFC: array[$1E00..$1FFC] of word;
  UnicodeLower2126_2183: array[$2126..$2183] of word;
  UnicodeLower2C60_2CE2: array[$2C60..$2CE2] of word;
+  UnicodeTablesInitialized: Boolean;

 procedure InitUnicodeTables;
 var
@ -997,10 +999,14 @@ begin
  UnicodeLower2C60_2CE2[$2CDE]:=$2CDF;
  UnicodeLower2C60_2CE2[$2CE0]:=$2CE1;
  UnicodeLower2C60_2CE2[$2CE2]:=$2CE3;
+
+  UnicodeTablesInitialized:=True;
 end;

 function UnicodeLowercase(u: cardinal): cardinal;
 begin
+  if not UnicodeTablesInitialized then
+    InitUnicodeTables;      // Initialize only when needed.
  if u<$00C0 then begin
    // most common
    if (u>=$0041) and (u<=$0061) then
@ -1027,6 +1033,7 @@ begin
 end;

 {$IFDEF FPC}
+
 function UTF8LowercaseDynLength(const s: string): string;
 var
  Buf: shortstring;
@ -1094,6 +1101,8 @@ var
  Changed: Boolean;
  p: PChar;
 begin
+  if not UnicodeTablesInitialized then
+    InitUnicodeTables;      // Initialize only when needed.
  Result:=s;
  if Result='' then exit;
  Changed:=false;
@ -1142,9 +1151,8 @@ begin
    end;
  until false;
 end;
+
 {$ENDIF}

-initialization
-  InitUnicodeTables;
 end.

--- a/lcl/interfaces/qt/qtwidgets.pas
+++ b/lcl/interfaces/qt/qtwidgets.pas
@ -27,8 +27,10 @@ uses
  qtobjects, qtint,
  // Free Pascal
  Classes, SysUtils, Types,
+  // LazUtils
+  LazUtilities, LazLoggerBase, LazUTF16,
  // LCL
-  LCLType, LCLProc, LazUTF8, LCLIntf, LMessages, Graphics, Forms, Controls,
+  LCLType, LazUTF8, LCLIntf, LMessages, Graphics, Forms, Controls,
  ComCtrls, ExtCtrls, StdCtrls, Menus, Dialogs, ImgList;

 type
--- a/lcl/interfaces/qt5/qtwidgets.pas
+++ b/lcl/interfaces/qt5/qtwidgets.pas
@ -28,9 +28,9 @@ uses
  // Free Pascal
  Classes, SysUtils, Types,
  // LazUtils
-  LazLoggerBase,
+  LazLoggerBase, LazUTF16,
  // LCL
-  LCLType, LCLProc, LazUTF8, LazStringUtils, LCLIntf, LMessages, Graphics, Forms, Controls,
+  LCLType, LazUTF8, LazStringUtils, LCLIntf, LMessages, Graphics, Forms, Controls,
  ComCtrls, ExtCtrls, StdCtrls, Menus, Dialogs, ImgList;

 type
--- a/lcl/interfaces/qt6/qtwidgets.pas
+++ b/lcl/interfaces/qt6/qtwidgets.pas
@ -27,8 +27,10 @@ uses
  qtobjects, qtint,
  // Free Pascal
  Classes, SysUtils, Types,
+  // LazUtils
+  LazLoggerBase, LazUTF16,
  // LCL
-  LCLType, LCLProc, LazUTF8, LazStringUtils, LCLIntf, LMessages, Graphics, Forms, Controls,
+  LCLType, LazUTF8, LazStringUtils, LCLIntf, LMessages, Graphics, Forms, Controls,
  ComCtrls, ExtCtrls, StdCtrls, Menus, Dialogs, ImgList;

 type
--- a/lcl/lclproc.pas
+++ b/lcl/lclproc.pas
@ -28,7 +28,7 @@ uses
  {$IFDEF Darwin}MacOSAll, {$ENDIF}
  Classes, SysUtils, Math, Types, Laz_AVL_Tree,
  // LazUtils
-  LazFileUtils, LazUtilities, LazMethodList, LazUTF8, LazLoggerBase, LazTracer,
+  LazFileUtils, LazUtilities, LazMethodList, LazUTF8, LazUTF16, LazLoggerBase, LazTracer,
  GraphMath,
  // LCL
  LCLStrConsts, LCLType;
@ -43,7 +43,7 @@ type
    Item: Pointer;
    IsDestroyed: boolean;
    Info: string;
-    CreationStack: TStackTracePointers; // stack trace at creationg
+    CreationStack: TStackTracePointers; // stack trace at creation
    DestructionStack: TStackTracePointers;// stack trace at destruction
    function AsString(WithStackTraces: boolean): string;
    destructor Destroy; override;
@ -225,20 +225,12 @@ procedure DbgAppendToFileWithoutLn(FileName, S: String);
 function ClassCase(const AClass: TClass; const ACase: array of TClass {; const ADescendant: Boolean = True}): Integer; overload;
 function ClassCase(const AClass: TClass; const ACase: array of TClass; const ADescendant: Boolean): Integer; overload;

-// MWE: define (missing) UTF16string similar to UTF8
-//      strictly spoken, a widestring <> utf16string
-// todo: use it in existing functions
-type
-  UTF16String = type UnicodeString;
-  PUTF16String = ^UTF16String;
-
-// Felipe: Don't substitute with calls to lazutf16 because lazutf16 includes
-// some initialization code and tables, which are not necessary for the LCL
-function UTF16CharacterLength(p: PWideChar): integer;
-function UTF16Length(const s: UTF16String): PtrInt; inline;
-function UTF16Length(p: PWideChar; WordCount: PtrInt): PtrInt;
-function UTF16CharacterToUnicode(p: PWideChar; out CharLen: integer): Cardinal;
-function UnicodeToUTF16(u: cardinal): UTF16String;
+// Deprecated in Lazarus 3.99 July 2023.
+function UTF16CharacterLength(p: PWideChar): integer; deprecated 'Use LazUTF16.UTF16CharacterLength instead';
+function UTF16Length(const s: UnicodeString): PtrInt; deprecated 'Use LazUTF16.UTF16Length instead';
+function UTF16Length(p: PWideChar; WordCount: PtrInt): PtrInt; deprecated 'Use LazUTF16.UTF16Length instead';
+function UTF16CharacterToUnicode(p: PWideChar; out CharLen: integer): Cardinal; deprecated 'Use LazUTF16.UTF16CharacterToUnicode instead';
+function UnicodeToUTF16(u: cardinal): UnicodeString; deprecated 'Use LazUTF16.UnicodeToUTF16 instead';

 // identifier
 function CreateFirstIdentifier(const Identifier: string): string;
@ -1317,7 +1309,7 @@ begin
  Result:=LazLoggerBase.dbgObjMem(AnObject);
 end;

-function dbghex(i: Int64): string;
+function dbgHex(i: Int64): string;
 begin
  Result:=LazLoggerBase.dbghex(i);
 end;
@ -1731,76 +1723,28 @@ begin
 end;

 function UTF16CharacterLength(p: PWideChar): integer;
-// returns length of UTF16 character in number of words
-// The endianess of the machine will be taken.
 begin
-  if p<>nil then begin
-    if (ord(p[0]) < $D800) or (ord(p[0]) > $DFFF) then
-      Result:=1
-    else
-      Result:=2;
-  end else begin
-    Result:=0;
-  end;
+  Result:=LazUTF16.UTF16CharacterLength(p);
 end;

-function UTF16Length(const s: UTF16String): PtrInt;
+function UTF16Length(const s: UnicodeString): PtrInt;
 begin
-  Result:=UTF16Length(PWideChar(s),length(s));
+  Result:=LazUTF16.UTF16Length(s);
 end;

 function UTF16Length(p: PWideChar; WordCount: PtrInt): PtrInt;
-var
-  CharLen: LongInt;
 begin
-  Result:=0;
-  while (WordCount>0) do begin
-    inc(Result);
-    CharLen:=UTF16CharacterLength(p);
-    inc(p,CharLen);
-    dec(WordCount,CharLen);
-  end;
+  Result:=LazUTF16.UTF16Length(p, WordCount);
 end;

 function UTF16CharacterToUnicode(p: PWideChar; out CharLen: integer): Cardinal;
-var
-  w1: cardinal;
-  w2: Cardinal;
 begin
-  if p<>nil then begin
-    w1:=ord(p[0]);
-    if (w1 < $D800) or (w1 > $DFFF) then begin
-      // is 1 word character
-      Result:=w1;
-      CharLen:=1;
-    end else begin
-      // could be 2 word character
-      w2:=ord(p[1]);
-      if (w2>=$DC00) then begin
-        // is 2 word character
-        Result:=(w1-$D800) shl 10 + (w2-$DC00) + $10000;
-        CharLen:=2;
-      end else begin
-        // invalid character
-        Result:=w1;
-        CharLen:=1;
-      end;
-    end;
-  end else begin
-    Result:=0;
-    CharLen:=0;
-  end;
+  Result:=LazUTF16.UTF16CharacterToUnicode(p, CharLen);
 end;

-function UnicodeToUTF16(u: cardinal): UTF16String;
+function UnicodeToUTF16(u: cardinal): UnicodeString;
 begin
-  // u should be <= $10FFFF to fit into UTF-16
-
-  if u < $10000 then
-    // Note: codepoints $D800 - $DFFF are reserved
-    Result:=system.widechar(u)
-  else
-    Result:=system.widechar($D800+((u - $10000) shr 10))+system.widechar($DC00+((u - $10000) and $3ff));
+  Result:=LazUTF16.UnicodeToUTF16(u);
 end;

 function CreateFirstIdentifier(const Identifier: string): string;