LazUtils, LCL: Initialize UnicodeTables in LazUTF16 only when needed. Deprecate copied UTF16 functions in LCLProc.

This commit is contained in:
Juha 2023-07-03 15:29:56 +03:00
parent 9553dd176c
commit dd161fc03b
5 changed files with 54 additions and 98 deletions

View File

@ -36,18 +36,20 @@ type
{$ENDIF} {$ENDIF}
function UTF16CharacterLength(p: PWideChar): integer; function UTF16CharacterLength(p: PWideChar): integer;
function UTF16Length(const s: widestring): PtrInt; overload; function UTF16Length(const s: UnicodeString): PtrInt; overload;
function UTF16Length(p: PWideChar; WordCount: PtrInt): PtrInt; overload; function UTF16Length(p: PWideChar; WordCount: PtrInt): PtrInt; overload;
function UTF16Copy(const s: UnicodeString; StartCharIndex, CharCount: PtrInt): Unicodestring; function UTF16Copy(const s: UnicodeString; StartCharIndex, CharCount: PtrInt): UnicodeString;
function UTF16CharStart(P: PWideChar; Len, CharIndex: PtrInt): PWideChar; function UTF16CharStart(P: PWideChar; Len, CharIndex: PtrInt): PWideChar;
function UTF16Pos(const SearchForText, SearchInText: UnicodeString; StartPos: PtrInt = 1): PtrInt; function UTF16Pos(const SearchForText, SearchInText: UnicodeString; StartPos: PtrInt = 1): PtrInt;
function UTF16CharacterToUnicode(p: PWideChar; out CharLen: integer): Cardinal; function UTF16CharacterToUnicode(p: PWideChar; out CharLen: integer): Cardinal;
function UnicodeToUTF16(u: cardinal): widestring; function UnicodeToUTF16(u: cardinal): UnicodeString;
function IsUTF16CharValid(AChar, ANextChar: WideChar): Boolean; function IsUTF16CharValid(AChar, ANextChar: WideChar): Boolean;
function IsUTF16StringValid(AWideStr: widestring): Boolean; function IsUTF16StringValid(AStr: UnicodeString): Boolean;
function Utf16StringReplace(const S, OldPattern, NewPattern: WideString; Flags: TReplaceFlags): WideString; Inline; function Utf16StringReplace(const S, OldPattern, NewPattern: UnicodeString; Flags: TReplaceFlags): UnicodeString; Inline;
function Utf16StringReplace(const S, OldPattern, NewPattern: WideString; Flags: TReplaceFlags; out Count: Integer): WideString; function Utf16StringReplace(const S, OldPattern, NewPattern: UnicodeString; Flags: TReplaceFlags; out Count: Integer): UnicodeString;
// The following functions use UnicodeTables which are initialized
// only when the functions are called.
function UnicodeLowercase(u: cardinal): cardinal; function UnicodeLowercase(u: cardinal): cardinal;
{$IFDEF FPC} {$IFDEF FPC}
function UTF8LowerCaseViaTables(const s: string): string; function UTF8LowerCaseViaTables(const s: string): string;
@ -69,7 +71,7 @@ begin
end; end;
end; end;
function UTF16Length(const s: widestring): PtrInt; function UTF16Length(const s: UnicodeString): PtrInt;
begin begin
Result:=UTF16Length(PWideChar(s),length(s)); Result:=UTF16Length(PWideChar(s),length(s));
end; end;
@ -87,7 +89,7 @@ begin
end; end;
end; end;
function UTF16Copy(const s: UnicodeString; StartCharIndex, CharCount: PtrInt): Unicodestring; function UTF16Copy(const s: UnicodeString; StartCharIndex, CharCount: PtrInt): UnicodeString;
// returns substring // returns substring
var var
StartPos: PWideChar; StartPos: PWideChar;
@ -218,15 +220,14 @@ begin
end; end;
end; end;
function UnicodeToUTF16(u: cardinal): widestring; function UnicodeToUTF16(u: cardinal): UnicodeString;
begin begin
// u should be <= $10FFFF to fit into UTF-16 // u should be <= $10FFFF to fit into UTF-16
if u < $10000 then if u < $10000 then
// Note: codepoints $D800 - $DFFF are reserved // Note: codepoints $D800 - $DFFF are reserved
Result:=system.widechar(u) Result:=widechar(u)
else else
Result:=system.widechar($D800+((u - $10000) shr 10))+system.widechar($DC00+((u - $10000) and $3ff)); Result:=widechar($D800+((u - $10000) shr 10))+widechar($DC00+((u - $10000) and $3ff));
end; end;
// Specification here: http://unicode.org/faq/utf_bom.html#utf16-7 // Specification here: http://unicode.org/faq/utf_bom.html#utf16-7
@ -244,20 +245,20 @@ begin
Result := not Result; Result := not Result;
end; end;
function IsUTF16StringValid(AWideStr: widestring): Boolean; function IsUTF16StringValid(AStr: UnicodeString): Boolean;
var var
i: Integer; i: Integer;
begin begin
Result := True; Result := True;
for i := 1 to Length(AWideStr)-1 do for i := 1 to Length(AStr)-1 do
begin begin
Result := Result and IsUTF16CharValid(AWideStr[i], AWideStr[i+1]); Result := Result and IsUTF16CharValid(AStr[i], AStr[i+1]);
if not Result then Exit; if not Result then Exit;
end; end;
end; end;
function Utf16StringReplace(const S, OldPattern, NewPattern: WideString; function Utf16StringReplace(const S, OldPattern, NewPattern: UnicodeString;
Flags: TReplaceFlags): WideString; Flags: TReplaceFlags): UnicodeString;
var var
DummyCount: Integer; DummyCount: Integer;
begin begin
@ -265,10 +266,10 @@ begin
end; end;
//Same as SysUtil.StringReplace but for WideStrings/UnicodeStrings, since it's not available in fpc yet //Same as SysUtil.StringReplace but for WideStrings/UnicodeStrings, since it's not available in fpc yet
function Utf16StringReplace(const S, OldPattern, NewPattern: WideString; function Utf16StringReplace(const S, OldPattern, NewPattern: UnicodeString;
Flags: TReplaceFlags; out Count: Integer): WideString; Flags: TReplaceFlags; out Count: Integer): UnicodeString;
var var
Srch, OldP, RemS: WideString; // Srch and OldP can contain WideUpperCase versions of S,OldPattern Srch, OldP, RemS: UnicodeString; // Srch and OldP can contain WideUpperCase versions of S,OldPattern
P: Integer; P: Integer;
begin begin
Srch:=S; Srch:=S;
@ -317,6 +318,7 @@ var
UnicodeLower1E00_1FFC: array[$1E00..$1FFC] of word; UnicodeLower1E00_1FFC: array[$1E00..$1FFC] of word;
UnicodeLower2126_2183: array[$2126..$2183] of word; UnicodeLower2126_2183: array[$2126..$2183] of word;
UnicodeLower2C60_2CE2: array[$2C60..$2CE2] of word; UnicodeLower2C60_2CE2: array[$2C60..$2CE2] of word;
UnicodeTablesInitialized: Boolean;
procedure InitUnicodeTables; procedure InitUnicodeTables;
var var
@ -997,10 +999,14 @@ begin
UnicodeLower2C60_2CE2[$2CDE]:=$2CDF; UnicodeLower2C60_2CE2[$2CDE]:=$2CDF;
UnicodeLower2C60_2CE2[$2CE0]:=$2CE1; UnicodeLower2C60_2CE2[$2CE0]:=$2CE1;
UnicodeLower2C60_2CE2[$2CE2]:=$2CE3; UnicodeLower2C60_2CE2[$2CE2]:=$2CE3;
UnicodeTablesInitialized:=True;
end; end;
function UnicodeLowercase(u: cardinal): cardinal; function UnicodeLowercase(u: cardinal): cardinal;
begin begin
if not UnicodeTablesInitialized then
InitUnicodeTables; // Initialize only when needed.
if u<$00C0 then begin if u<$00C0 then begin
// most common // most common
if (u>=$0041) and (u<=$0061) then if (u>=$0041) and (u<=$0061) then
@ -1027,6 +1033,7 @@ begin
end; end;
{$IFDEF FPC} {$IFDEF FPC}
function UTF8LowercaseDynLength(const s: string): string; function UTF8LowercaseDynLength(const s: string): string;
var var
Buf: shortstring; Buf: shortstring;
@ -1094,6 +1101,8 @@ var
Changed: Boolean; Changed: Boolean;
p: PChar; p: PChar;
begin begin
if not UnicodeTablesInitialized then
InitUnicodeTables; // Initialize only when needed.
Result:=s; Result:=s;
if Result='' then exit; if Result='' then exit;
Changed:=false; Changed:=false;
@ -1142,9 +1151,8 @@ begin
end; end;
until false; until false;
end; end;
{$ENDIF} {$ENDIF}
initialization
InitUnicodeTables;
end. end.

View File

@ -27,8 +27,10 @@ uses
qtobjects, qtint, qtobjects, qtint,
// Free Pascal // Free Pascal
Classes, SysUtils, Types, Classes, SysUtils, Types,
// LazUtils
LazUtilities, LazLoggerBase, LazUTF16,
// LCL // LCL
LCLType, LCLProc, LazUTF8, LCLIntf, LMessages, Graphics, Forms, Controls, LCLType, LazUTF8, LCLIntf, LMessages, Graphics, Forms, Controls,
ComCtrls, ExtCtrls, StdCtrls, Menus, Dialogs, ImgList; ComCtrls, ExtCtrls, StdCtrls, Menus, Dialogs, ImgList;
type type

View File

@ -28,9 +28,9 @@ uses
// Free Pascal // Free Pascal
Classes, SysUtils, Types, Classes, SysUtils, Types,
// LazUtils // LazUtils
LazLoggerBase, LazLoggerBase, LazUTF16,
// LCL // LCL
LCLType, LCLProc, LazUTF8, LazStringUtils, LCLIntf, LMessages, Graphics, Forms, Controls, LCLType, LazUTF8, LazStringUtils, LCLIntf, LMessages, Graphics, Forms, Controls,
ComCtrls, ExtCtrls, StdCtrls, Menus, Dialogs, ImgList; ComCtrls, ExtCtrls, StdCtrls, Menus, Dialogs, ImgList;
type type

View File

@ -27,8 +27,10 @@ uses
qtobjects, qtint, qtobjects, qtint,
// Free Pascal // Free Pascal
Classes, SysUtils, Types, Classes, SysUtils, Types,
// LazUtils
LazLoggerBase, LazUTF16,
// LCL // LCL
LCLType, LCLProc, LazUTF8, LazStringUtils, LCLIntf, LMessages, Graphics, Forms, Controls, LCLType, LazUTF8, LazStringUtils, LCLIntf, LMessages, Graphics, Forms, Controls,
ComCtrls, ExtCtrls, StdCtrls, Menus, Dialogs, ImgList; ComCtrls, ExtCtrls, StdCtrls, Menus, Dialogs, ImgList;
type type

View File

@ -28,7 +28,7 @@ uses
{$IFDEF Darwin}MacOSAll, {$ENDIF} {$IFDEF Darwin}MacOSAll, {$ENDIF}
Classes, SysUtils, Math, Types, Laz_AVL_Tree, Classes, SysUtils, Math, Types, Laz_AVL_Tree,
// LazUtils // LazUtils
LazFileUtils, LazUtilities, LazMethodList, LazUTF8, LazLoggerBase, LazTracer, LazFileUtils, LazUtilities, LazMethodList, LazUTF8, LazUTF16, LazLoggerBase, LazTracer,
GraphMath, GraphMath,
// LCL // LCL
LCLStrConsts, LCLType; LCLStrConsts, LCLType;
@ -43,7 +43,7 @@ type
Item: Pointer; Item: Pointer;
IsDestroyed: boolean; IsDestroyed: boolean;
Info: string; Info: string;
CreationStack: TStackTracePointers; // stack trace at creationg CreationStack: TStackTracePointers; // stack trace at creation
DestructionStack: TStackTracePointers;// stack trace at destruction DestructionStack: TStackTracePointers;// stack trace at destruction
function AsString(WithStackTraces: boolean): string; function AsString(WithStackTraces: boolean): string;
destructor Destroy; override; destructor Destroy; override;
@ -225,20 +225,12 @@ procedure DbgAppendToFileWithoutLn(FileName, S: String);
function ClassCase(const AClass: TClass; const ACase: array of TClass {; const ADescendant: Boolean = True}): Integer; overload; function ClassCase(const AClass: TClass; const ACase: array of TClass {; const ADescendant: Boolean = True}): Integer; overload;
function ClassCase(const AClass: TClass; const ACase: array of TClass; const ADescendant: Boolean): Integer; overload; function ClassCase(const AClass: TClass; const ACase: array of TClass; const ADescendant: Boolean): Integer; overload;
// MWE: define (missing) UTF16string similar to UTF8 // Deprecated in Lazarus 3.99 July 2023.
// strictly spoken, a widestring <> utf16string function UTF16CharacterLength(p: PWideChar): integer; deprecated 'Use LazUTF16.UTF16CharacterLength instead';
// todo: use it in existing functions function UTF16Length(const s: UnicodeString): PtrInt; deprecated 'Use LazUTF16.UTF16Length instead';
type function UTF16Length(p: PWideChar; WordCount: PtrInt): PtrInt; deprecated 'Use LazUTF16.UTF16Length instead';
UTF16String = type UnicodeString; function UTF16CharacterToUnicode(p: PWideChar; out CharLen: integer): Cardinal; deprecated 'Use LazUTF16.UTF16CharacterToUnicode instead';
PUTF16String = ^UTF16String; function UnicodeToUTF16(u: cardinal): UnicodeString; deprecated 'Use LazUTF16.UnicodeToUTF16 instead';
// Felipe: Don't substitute with calls to lazutf16 because lazutf16 includes
// some initialization code and tables, which are not necessary for the LCL
function UTF16CharacterLength(p: PWideChar): integer;
function UTF16Length(const s: UTF16String): PtrInt; inline;
function UTF16Length(p: PWideChar; WordCount: PtrInt): PtrInt;
function UTF16CharacterToUnicode(p: PWideChar; out CharLen: integer): Cardinal;
function UnicodeToUTF16(u: cardinal): UTF16String;
// identifier // identifier
function CreateFirstIdentifier(const Identifier: string): string; function CreateFirstIdentifier(const Identifier: string): string;
@ -1317,7 +1309,7 @@ begin
Result:=LazLoggerBase.dbgObjMem(AnObject); Result:=LazLoggerBase.dbgObjMem(AnObject);
end; end;
function dbghex(i: Int64): string; function dbgHex(i: Int64): string;
begin begin
Result:=LazLoggerBase.dbghex(i); Result:=LazLoggerBase.dbghex(i);
end; end;
@ -1731,76 +1723,28 @@ begin
end; end;
function UTF16CharacterLength(p: PWideChar): integer; function UTF16CharacterLength(p: PWideChar): integer;
// returns length of UTF16 character in number of words
// The endianess of the machine will be taken.
begin begin
if p<>nil then begin Result:=LazUTF16.UTF16CharacterLength(p);
if (ord(p[0]) < $D800) or (ord(p[0]) > $DFFF) then
Result:=1
else
Result:=2;
end else begin
Result:=0;
end;
end; end;
function UTF16Length(const s: UTF16String): PtrInt; function UTF16Length(const s: UnicodeString): PtrInt;
begin begin
Result:=UTF16Length(PWideChar(s),length(s)); Result:=LazUTF16.UTF16Length(s);
end; end;
function UTF16Length(p: PWideChar; WordCount: PtrInt): PtrInt; function UTF16Length(p: PWideChar; WordCount: PtrInt): PtrInt;
var
CharLen: LongInt;
begin begin
Result:=0; Result:=LazUTF16.UTF16Length(p, WordCount);
while (WordCount>0) do begin
inc(Result);
CharLen:=UTF16CharacterLength(p);
inc(p,CharLen);
dec(WordCount,CharLen);
end;
end; end;
function UTF16CharacterToUnicode(p: PWideChar; out CharLen: integer): Cardinal; function UTF16CharacterToUnicode(p: PWideChar; out CharLen: integer): Cardinal;
var
w1: cardinal;
w2: Cardinal;
begin begin
if p<>nil then begin Result:=LazUTF16.UTF16CharacterToUnicode(p, CharLen);
w1:=ord(p[0]);
if (w1 < $D800) or (w1 > $DFFF) then begin
// is 1 word character
Result:=w1;
CharLen:=1;
end else begin
// could be 2 word character
w2:=ord(p[1]);
if (w2>=$DC00) then begin
// is 2 word character
Result:=(w1-$D800) shl 10 + (w2-$DC00) + $10000;
CharLen:=2;
end else begin
// invalid character
Result:=w1;
CharLen:=1;
end;
end;
end else begin
Result:=0;
CharLen:=0;
end;
end; end;
function UnicodeToUTF16(u: cardinal): UTF16String; function UnicodeToUTF16(u: cardinal): UnicodeString;
begin begin
// u should be <= $10FFFF to fit into UTF-16 Result:=LazUTF16.UnicodeToUTF16(u);
if u < $10000 then
// Note: codepoints $D800 - $DFFF are reserved
Result:=system.widechar(u)
else
Result:=system.widechar($D800+((u - $10000) shr 10))+system.widechar($DC00+((u - $10000) and $3ff));
end; end;
function CreateFirstIdentifier(const Identifier: string): string; function CreateFirstIdentifier(const Identifier: string): string;