LazUtils, LCL: Initialize UnicodeTables in LazUTF16 only when needed. Deprecate copied UTF16 functions in LCLProc.

This commit is contained in:
Juha 2023-07-03 15:29:56 +03:00
parent 9553dd176c
commit dd161fc03b
5 changed files with 54 additions and 98 deletions

View File

@ -36,18 +36,20 @@ type
{$ENDIF}
function UTF16CharacterLength(p: PWideChar): integer;
function UTF16Length(const s: widestring): PtrInt; overload;
function UTF16Length(const s: UnicodeString): PtrInt; overload;
function UTF16Length(p: PWideChar; WordCount: PtrInt): PtrInt; overload;
function UTF16Copy(const s: UnicodeString; StartCharIndex, CharCount: PtrInt): Unicodestring;
function UTF16Copy(const s: UnicodeString; StartCharIndex, CharCount: PtrInt): UnicodeString;
function UTF16CharStart(P: PWideChar; Len, CharIndex: PtrInt): PWideChar;
function UTF16Pos(const SearchForText, SearchInText: UnicodeString; StartPos: PtrInt = 1): PtrInt;
function UTF16CharacterToUnicode(p: PWideChar; out CharLen: integer): Cardinal;
function UnicodeToUTF16(u: cardinal): widestring;
function UnicodeToUTF16(u: cardinal): UnicodeString;
function IsUTF16CharValid(AChar, ANextChar: WideChar): Boolean;
function IsUTF16StringValid(AWideStr: widestring): Boolean;
function Utf16StringReplace(const S, OldPattern, NewPattern: WideString; Flags: TReplaceFlags): WideString; Inline;
function Utf16StringReplace(const S, OldPattern, NewPattern: WideString; Flags: TReplaceFlags; out Count: Integer): WideString;
function IsUTF16StringValid(AStr: UnicodeString): Boolean;
function Utf16StringReplace(const S, OldPattern, NewPattern: UnicodeString; Flags: TReplaceFlags): UnicodeString; Inline;
function Utf16StringReplace(const S, OldPattern, NewPattern: UnicodeString; Flags: TReplaceFlags; out Count: Integer): UnicodeString;
// The following functions use UnicodeTables which are initialized
// only when the functions are called.
function UnicodeLowercase(u: cardinal): cardinal;
{$IFDEF FPC}
function UTF8LowerCaseViaTables(const s: string): string;
@ -69,7 +71,7 @@ begin
end;
end;
function UTF16Length(const s: widestring): PtrInt;
function UTF16Length(const s: UnicodeString): PtrInt;
begin
Result:=UTF16Length(PWideChar(s),length(s));
end;
@ -87,7 +89,7 @@ begin
end;
end;
function UTF16Copy(const s: UnicodeString; StartCharIndex, CharCount: PtrInt): Unicodestring;
function UTF16Copy(const s: UnicodeString; StartCharIndex, CharCount: PtrInt): UnicodeString;
// returns substring
var
StartPos: PWideChar;
@ -218,15 +220,14 @@ begin
end;
end;
function UnicodeToUTF16(u: cardinal): widestring;
function UnicodeToUTF16(u: cardinal): UnicodeString;
begin
// u should be <= $10FFFF to fit into UTF-16
if u < $10000 then
// Note: codepoints $D800 - $DFFF are reserved
Result:=system.widechar(u)
Result:=widechar(u)
else
Result:=system.widechar($D800+((u - $10000) shr 10))+system.widechar($DC00+((u - $10000) and $3ff));
Result:=widechar($D800+((u - $10000) shr 10))+widechar($DC00+((u - $10000) and $3ff));
end;
// Specification here: http://unicode.org/faq/utf_bom.html#utf16-7
@ -244,20 +245,20 @@ begin
Result := not Result;
end;
function IsUTF16StringValid(AWideStr: widestring): Boolean;
function IsUTF16StringValid(AStr: UnicodeString): Boolean;
var
i: Integer;
begin
Result := True;
for i := 1 to Length(AWideStr)-1 do
for i := 1 to Length(AStr)-1 do
begin
Result := Result and IsUTF16CharValid(AWideStr[i], AWideStr[i+1]);
Result := Result and IsUTF16CharValid(AStr[i], AStr[i+1]);
if not Result then Exit;
end;
end;
function Utf16StringReplace(const S, OldPattern, NewPattern: WideString;
Flags: TReplaceFlags): WideString;
function Utf16StringReplace(const S, OldPattern, NewPattern: UnicodeString;
Flags: TReplaceFlags): UnicodeString;
var
DummyCount: Integer;
begin
@ -265,10 +266,10 @@ begin
end;
//Same as SysUtil.StringReplace but for WideStrings/UnicodeStrings, since it's not available in fpc yet
function Utf16StringReplace(const S, OldPattern, NewPattern: WideString;
Flags: TReplaceFlags; out Count: Integer): WideString;
function Utf16StringReplace(const S, OldPattern, NewPattern: UnicodeString;
Flags: TReplaceFlags; out Count: Integer): UnicodeString;
var
Srch, OldP, RemS: WideString; // Srch and OldP can contain WideUpperCase versions of S,OldPattern
Srch, OldP, RemS: UnicodeString; // Srch and OldP can contain WideUpperCase versions of S,OldPattern
P: Integer;
begin
Srch:=S;
@ -317,6 +318,7 @@ var
UnicodeLower1E00_1FFC: array[$1E00..$1FFC] of word;
UnicodeLower2126_2183: array[$2126..$2183] of word;
UnicodeLower2C60_2CE2: array[$2C60..$2CE2] of word;
UnicodeTablesInitialized: Boolean;
procedure InitUnicodeTables;
var
@ -997,10 +999,14 @@ begin
UnicodeLower2C60_2CE2[$2CDE]:=$2CDF;
UnicodeLower2C60_2CE2[$2CE0]:=$2CE1;
UnicodeLower2C60_2CE2[$2CE2]:=$2CE3;
UnicodeTablesInitialized:=True;
end;
function UnicodeLowercase(u: cardinal): cardinal;
begin
if not UnicodeTablesInitialized then
InitUnicodeTables; // Initialize only when needed.
if u<$00C0 then begin
// most common
if (u>=$0041) and (u<=$0061) then
@ -1027,6 +1033,7 @@ begin
end;
{$IFDEF FPC}
function UTF8LowercaseDynLength(const s: string): string;
var
Buf: shortstring;
@ -1094,6 +1101,8 @@ var
Changed: Boolean;
p: PChar;
begin
if not UnicodeTablesInitialized then
InitUnicodeTables; // Initialize only when needed.
Result:=s;
if Result='' then exit;
Changed:=false;
@ -1142,9 +1151,8 @@ begin
end;
until false;
end;
{$ENDIF}
initialization
InitUnicodeTables;
end.

View File

@ -27,8 +27,10 @@ uses
qtobjects, qtint,
// Free Pascal
Classes, SysUtils, Types,
// LazUtils
LazUtilities, LazLoggerBase, LazUTF16,
// LCL
LCLType, LCLProc, LazUTF8, LCLIntf, LMessages, Graphics, Forms, Controls,
LCLType, LazUTF8, LCLIntf, LMessages, Graphics, Forms, Controls,
ComCtrls, ExtCtrls, StdCtrls, Menus, Dialogs, ImgList;
type

View File

@ -28,9 +28,9 @@ uses
// Free Pascal
Classes, SysUtils, Types,
// LazUtils
LazLoggerBase,
LazLoggerBase, LazUTF16,
// LCL
LCLType, LCLProc, LazUTF8, LazStringUtils, LCLIntf, LMessages, Graphics, Forms, Controls,
LCLType, LazUTF8, LazStringUtils, LCLIntf, LMessages, Graphics, Forms, Controls,
ComCtrls, ExtCtrls, StdCtrls, Menus, Dialogs, ImgList;
type

View File

@ -27,8 +27,10 @@ uses
qtobjects, qtint,
// Free Pascal
Classes, SysUtils, Types,
// LazUtils
LazLoggerBase, LazUTF16,
// LCL
LCLType, LCLProc, LazUTF8, LazStringUtils, LCLIntf, LMessages, Graphics, Forms, Controls,
LCLType, LazUTF8, LazStringUtils, LCLIntf, LMessages, Graphics, Forms, Controls,
ComCtrls, ExtCtrls, StdCtrls, Menus, Dialogs, ImgList;
type

View File

@ -28,7 +28,7 @@ uses
{$IFDEF Darwin}MacOSAll, {$ENDIF}
Classes, SysUtils, Math, Types, Laz_AVL_Tree,
// LazUtils
LazFileUtils, LazUtilities, LazMethodList, LazUTF8, LazLoggerBase, LazTracer,
LazFileUtils, LazUtilities, LazMethodList, LazUTF8, LazUTF16, LazLoggerBase, LazTracer,
GraphMath,
// LCL
LCLStrConsts, LCLType;
@ -43,7 +43,7 @@ type
Item: Pointer;
IsDestroyed: boolean;
Info: string;
CreationStack: TStackTracePointers; // stack trace at creationg
CreationStack: TStackTracePointers; // stack trace at creation
DestructionStack: TStackTracePointers;// stack trace at destruction
function AsString(WithStackTraces: boolean): string;
destructor Destroy; override;
@ -225,20 +225,12 @@ procedure DbgAppendToFileWithoutLn(FileName, S: String);
function ClassCase(const AClass: TClass; const ACase: array of TClass {; const ADescendant: Boolean = True}): Integer; overload;
function ClassCase(const AClass: TClass; const ACase: array of TClass; const ADescendant: Boolean): Integer; overload;
// MWE: define (missing) UTF16string similar to UTF8
// strictly spoken, a widestring <> utf16string
// todo: use it in existing functions
type
UTF16String = type UnicodeString;
PUTF16String = ^UTF16String;
// Felipe: Don't substitute with calls to lazutf16 because lazutf16 includes
// some initialization code and tables, which are not necessary for the LCL
function UTF16CharacterLength(p: PWideChar): integer;
function UTF16Length(const s: UTF16String): PtrInt; inline;
function UTF16Length(p: PWideChar; WordCount: PtrInt): PtrInt;
function UTF16CharacterToUnicode(p: PWideChar; out CharLen: integer): Cardinal;
function UnicodeToUTF16(u: cardinal): UTF16String;
// Deprecated in Lazarus 3.99 July 2023.
function UTF16CharacterLength(p: PWideChar): integer; deprecated 'Use LazUTF16.UTF16CharacterLength instead';
function UTF16Length(const s: UnicodeString): PtrInt; deprecated 'Use LazUTF16.UTF16Length instead';
function UTF16Length(p: PWideChar; WordCount: PtrInt): PtrInt; deprecated 'Use LazUTF16.UTF16Length instead';
function UTF16CharacterToUnicode(p: PWideChar; out CharLen: integer): Cardinal; deprecated 'Use LazUTF16.UTF16CharacterToUnicode instead';
function UnicodeToUTF16(u: cardinal): UnicodeString; deprecated 'Use LazUTF16.UnicodeToUTF16 instead';
// identifier
function CreateFirstIdentifier(const Identifier: string): string;
@ -1317,7 +1309,7 @@ begin
Result:=LazLoggerBase.dbgObjMem(AnObject);
end;
function dbghex(i: Int64): string;
function dbgHex(i: Int64): string;
begin
Result:=LazLoggerBase.dbghex(i);
end;
@ -1731,76 +1723,28 @@ begin
end;
function UTF16CharacterLength(p: PWideChar): integer;
// returns length of UTF16 character in number of words
// The endianess of the machine will be taken.
begin
if p<>nil then begin
if (ord(p[0]) < $D800) or (ord(p[0]) > $DFFF) then
Result:=1
else
Result:=2;
end else begin
Result:=0;
end;
Result:=LazUTF16.UTF16CharacterLength(p);
end;
function UTF16Length(const s: UTF16String): PtrInt;
function UTF16Length(const s: UnicodeString): PtrInt;
begin
Result:=UTF16Length(PWideChar(s),length(s));
Result:=LazUTF16.UTF16Length(s);
end;
function UTF16Length(p: PWideChar; WordCount: PtrInt): PtrInt;
var
CharLen: LongInt;
begin
Result:=0;
while (WordCount>0) do begin
inc(Result);
CharLen:=UTF16CharacterLength(p);
inc(p,CharLen);
dec(WordCount,CharLen);
end;
Result:=LazUTF16.UTF16Length(p, WordCount);
end;
function UTF16CharacterToUnicode(p: PWideChar; out CharLen: integer): Cardinal;
var
w1: cardinal;
w2: Cardinal;
begin
if p<>nil then begin
w1:=ord(p[0]);
if (w1 < $D800) or (w1 > $DFFF) then begin
// is 1 word character
Result:=w1;
CharLen:=1;
end else begin
// could be 2 word character
w2:=ord(p[1]);
if (w2>=$DC00) then begin
// is 2 word character
Result:=(w1-$D800) shl 10 + (w2-$DC00) + $10000;
CharLen:=2;
end else begin
// invalid character
Result:=w1;
CharLen:=1;
end;
end;
end else begin
Result:=0;
CharLen:=0;
end;
Result:=LazUTF16.UTF16CharacterToUnicode(p, CharLen);
end;
function UnicodeToUTF16(u: cardinal): UTF16String;
function UnicodeToUTF16(u: cardinal): UnicodeString;
begin
// u should be <= $10FFFF to fit into UTF-16
if u < $10000 then
// Note: codepoints $D800 - $DFFF are reserved
Result:=system.widechar(u)
else
Result:=system.widechar($D800+((u - $10000) shr 10))+system.widechar($DC00+((u - $10000) and $3ff));
Result:=LazUTF16.UnicodeToUTF16(u);
end;
function CreateFirstIdentifier(const Identifier: string): string;