mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-04-08 07:21:34 +02:00
LazUtils, LCL: Initialize UnicodeTables in LazUTF16 only when needed. Deprecate copied UTF16 functions in LCLProc.
This commit is contained in:
parent
9553dd176c
commit
dd161fc03b
@ -36,18 +36,20 @@ type
|
||||
{$ENDIF}
|
||||
|
||||
function UTF16CharacterLength(p: PWideChar): integer;
|
||||
function UTF16Length(const s: widestring): PtrInt; overload;
|
||||
function UTF16Length(const s: UnicodeString): PtrInt; overload;
|
||||
function UTF16Length(p: PWideChar; WordCount: PtrInt): PtrInt; overload;
|
||||
function UTF16Copy(const s: UnicodeString; StartCharIndex, CharCount: PtrInt): Unicodestring;
|
||||
function UTF16Copy(const s: UnicodeString; StartCharIndex, CharCount: PtrInt): UnicodeString;
|
||||
function UTF16CharStart(P: PWideChar; Len, CharIndex: PtrInt): PWideChar;
|
||||
function UTF16Pos(const SearchForText, SearchInText: UnicodeString; StartPos: PtrInt = 1): PtrInt;
|
||||
function UTF16CharacterToUnicode(p: PWideChar; out CharLen: integer): Cardinal;
|
||||
function UnicodeToUTF16(u: cardinal): widestring;
|
||||
function UnicodeToUTF16(u: cardinal): UnicodeString;
|
||||
function IsUTF16CharValid(AChar, ANextChar: WideChar): Boolean;
|
||||
function IsUTF16StringValid(AWideStr: widestring): Boolean;
|
||||
function Utf16StringReplace(const S, OldPattern, NewPattern: WideString; Flags: TReplaceFlags): WideString; Inline;
|
||||
function Utf16StringReplace(const S, OldPattern, NewPattern: WideString; Flags: TReplaceFlags; out Count: Integer): WideString;
|
||||
function IsUTF16StringValid(AStr: UnicodeString): Boolean;
|
||||
function Utf16StringReplace(const S, OldPattern, NewPattern: UnicodeString; Flags: TReplaceFlags): UnicodeString; Inline;
|
||||
function Utf16StringReplace(const S, OldPattern, NewPattern: UnicodeString; Flags: TReplaceFlags; out Count: Integer): UnicodeString;
|
||||
|
||||
// The following functions use UnicodeTables which are initialized
|
||||
// only when the functions are called.
|
||||
function UnicodeLowercase(u: cardinal): cardinal;
|
||||
{$IFDEF FPC}
|
||||
function UTF8LowerCaseViaTables(const s: string): string;
|
||||
@ -69,7 +71,7 @@ begin
|
||||
end;
|
||||
end;
|
||||
|
||||
function UTF16Length(const s: widestring): PtrInt;
|
||||
function UTF16Length(const s: UnicodeString): PtrInt;
|
||||
begin
|
||||
Result:=UTF16Length(PWideChar(s),length(s));
|
||||
end;
|
||||
@ -87,7 +89,7 @@ begin
|
||||
end;
|
||||
end;
|
||||
|
||||
function UTF16Copy(const s: UnicodeString; StartCharIndex, CharCount: PtrInt): Unicodestring;
|
||||
function UTF16Copy(const s: UnicodeString; StartCharIndex, CharCount: PtrInt): UnicodeString;
|
||||
// returns substring
|
||||
var
|
||||
StartPos: PWideChar;
|
||||
@ -218,15 +220,14 @@ begin
|
||||
end;
|
||||
end;
|
||||
|
||||
function UnicodeToUTF16(u: cardinal): widestring;
|
||||
function UnicodeToUTF16(u: cardinal): UnicodeString;
|
||||
begin
|
||||
// u should be <= $10FFFF to fit into UTF-16
|
||||
|
||||
if u < $10000 then
|
||||
// Note: codepoints $D800 - $DFFF are reserved
|
||||
Result:=system.widechar(u)
|
||||
Result:=widechar(u)
|
||||
else
|
||||
Result:=system.widechar($D800+((u - $10000) shr 10))+system.widechar($DC00+((u - $10000) and $3ff));
|
||||
Result:=widechar($D800+((u - $10000) shr 10))+widechar($DC00+((u - $10000) and $3ff));
|
||||
end;
|
||||
|
||||
// Specification here: http://unicode.org/faq/utf_bom.html#utf16-7
|
||||
@ -244,20 +245,20 @@ begin
|
||||
Result := not Result;
|
||||
end;
|
||||
|
||||
function IsUTF16StringValid(AWideStr: widestring): Boolean;
|
||||
function IsUTF16StringValid(AStr: UnicodeString): Boolean;
|
||||
var
|
||||
i: Integer;
|
||||
begin
|
||||
Result := True;
|
||||
for i := 1 to Length(AWideStr)-1 do
|
||||
for i := 1 to Length(AStr)-1 do
|
||||
begin
|
||||
Result := Result and IsUTF16CharValid(AWideStr[i], AWideStr[i+1]);
|
||||
Result := Result and IsUTF16CharValid(AStr[i], AStr[i+1]);
|
||||
if not Result then Exit;
|
||||
end;
|
||||
end;
|
||||
|
||||
function Utf16StringReplace(const S, OldPattern, NewPattern: WideString;
|
||||
Flags: TReplaceFlags): WideString;
|
||||
function Utf16StringReplace(const S, OldPattern, NewPattern: UnicodeString;
|
||||
Flags: TReplaceFlags): UnicodeString;
|
||||
var
|
||||
DummyCount: Integer;
|
||||
begin
|
||||
@ -265,10 +266,10 @@ begin
|
||||
end;
|
||||
|
||||
//Same as SysUtil.StringReplace but for WideStrings/UnicodeStrings, since it's not available in fpc yet
|
||||
function Utf16StringReplace(const S, OldPattern, NewPattern: WideString;
|
||||
Flags: TReplaceFlags; out Count: Integer): WideString;
|
||||
function Utf16StringReplace(const S, OldPattern, NewPattern: UnicodeString;
|
||||
Flags: TReplaceFlags; out Count: Integer): UnicodeString;
|
||||
var
|
||||
Srch, OldP, RemS: WideString; // Srch and OldP can contain WideUpperCase versions of S,OldPattern
|
||||
Srch, OldP, RemS: UnicodeString; // Srch and OldP can contain WideUpperCase versions of S,OldPattern
|
||||
P: Integer;
|
||||
begin
|
||||
Srch:=S;
|
||||
@ -317,6 +318,7 @@ var
|
||||
UnicodeLower1E00_1FFC: array[$1E00..$1FFC] of word;
|
||||
UnicodeLower2126_2183: array[$2126..$2183] of word;
|
||||
UnicodeLower2C60_2CE2: array[$2C60..$2CE2] of word;
|
||||
UnicodeTablesInitialized: Boolean;
|
||||
|
||||
procedure InitUnicodeTables;
|
||||
var
|
||||
@ -997,10 +999,14 @@ begin
|
||||
UnicodeLower2C60_2CE2[$2CDE]:=$2CDF;
|
||||
UnicodeLower2C60_2CE2[$2CE0]:=$2CE1;
|
||||
UnicodeLower2C60_2CE2[$2CE2]:=$2CE3;
|
||||
|
||||
UnicodeTablesInitialized:=True;
|
||||
end;
|
||||
|
||||
function UnicodeLowercase(u: cardinal): cardinal;
|
||||
begin
|
||||
if not UnicodeTablesInitialized then
|
||||
InitUnicodeTables; // Initialize only when needed.
|
||||
if u<$00C0 then begin
|
||||
// most common
|
||||
if (u>=$0041) and (u<=$0061) then
|
||||
@ -1027,6 +1033,7 @@ begin
|
||||
end;
|
||||
|
||||
{$IFDEF FPC}
|
||||
|
||||
function UTF8LowercaseDynLength(const s: string): string;
|
||||
var
|
||||
Buf: shortstring;
|
||||
@ -1094,6 +1101,8 @@ var
|
||||
Changed: Boolean;
|
||||
p: PChar;
|
||||
begin
|
||||
if not UnicodeTablesInitialized then
|
||||
InitUnicodeTables; // Initialize only when needed.
|
||||
Result:=s;
|
||||
if Result='' then exit;
|
||||
Changed:=false;
|
||||
@ -1142,9 +1151,8 @@ begin
|
||||
end;
|
||||
until false;
|
||||
end;
|
||||
|
||||
{$ENDIF}
|
||||
|
||||
initialization
|
||||
InitUnicodeTables;
|
||||
end.
|
||||
|
||||
|
@ -27,8 +27,10 @@ uses
|
||||
qtobjects, qtint,
|
||||
// Free Pascal
|
||||
Classes, SysUtils, Types,
|
||||
// LazUtils
|
||||
LazUtilities, LazLoggerBase, LazUTF16,
|
||||
// LCL
|
||||
LCLType, LCLProc, LazUTF8, LCLIntf, LMessages, Graphics, Forms, Controls,
|
||||
LCLType, LazUTF8, LCLIntf, LMessages, Graphics, Forms, Controls,
|
||||
ComCtrls, ExtCtrls, StdCtrls, Menus, Dialogs, ImgList;
|
||||
|
||||
type
|
||||
|
@ -28,9 +28,9 @@ uses
|
||||
// Free Pascal
|
||||
Classes, SysUtils, Types,
|
||||
// LazUtils
|
||||
LazLoggerBase,
|
||||
LazLoggerBase, LazUTF16,
|
||||
// LCL
|
||||
LCLType, LCLProc, LazUTF8, LazStringUtils, LCLIntf, LMessages, Graphics, Forms, Controls,
|
||||
LCLType, LazUTF8, LazStringUtils, LCLIntf, LMessages, Graphics, Forms, Controls,
|
||||
ComCtrls, ExtCtrls, StdCtrls, Menus, Dialogs, ImgList;
|
||||
|
||||
type
|
||||
|
@ -27,8 +27,10 @@ uses
|
||||
qtobjects, qtint,
|
||||
// Free Pascal
|
||||
Classes, SysUtils, Types,
|
||||
// LazUtils
|
||||
LazLoggerBase, LazUTF16,
|
||||
// LCL
|
||||
LCLType, LCLProc, LazUTF8, LazStringUtils, LCLIntf, LMessages, Graphics, Forms, Controls,
|
||||
LCLType, LazUTF8, LazStringUtils, LCLIntf, LMessages, Graphics, Forms, Controls,
|
||||
ComCtrls, ExtCtrls, StdCtrls, Menus, Dialogs, ImgList;
|
||||
|
||||
type
|
||||
|
@ -28,7 +28,7 @@ uses
|
||||
{$IFDEF Darwin}MacOSAll, {$ENDIF}
|
||||
Classes, SysUtils, Math, Types, Laz_AVL_Tree,
|
||||
// LazUtils
|
||||
LazFileUtils, LazUtilities, LazMethodList, LazUTF8, LazLoggerBase, LazTracer,
|
||||
LazFileUtils, LazUtilities, LazMethodList, LazUTF8, LazUTF16, LazLoggerBase, LazTracer,
|
||||
GraphMath,
|
||||
// LCL
|
||||
LCLStrConsts, LCLType;
|
||||
@ -43,7 +43,7 @@ type
|
||||
Item: Pointer;
|
||||
IsDestroyed: boolean;
|
||||
Info: string;
|
||||
CreationStack: TStackTracePointers; // stack trace at creationg
|
||||
CreationStack: TStackTracePointers; // stack trace at creation
|
||||
DestructionStack: TStackTracePointers;// stack trace at destruction
|
||||
function AsString(WithStackTraces: boolean): string;
|
||||
destructor Destroy; override;
|
||||
@ -225,20 +225,12 @@ procedure DbgAppendToFileWithoutLn(FileName, S: String);
|
||||
function ClassCase(const AClass: TClass; const ACase: array of TClass {; const ADescendant: Boolean = True}): Integer; overload;
|
||||
function ClassCase(const AClass: TClass; const ACase: array of TClass; const ADescendant: Boolean): Integer; overload;
|
||||
|
||||
// MWE: define (missing) UTF16string similar to UTF8
|
||||
// strictly spoken, a widestring <> utf16string
|
||||
// todo: use it in existing functions
|
||||
type
|
||||
UTF16String = type UnicodeString;
|
||||
PUTF16String = ^UTF16String;
|
||||
|
||||
// Felipe: Don't substitute with calls to lazutf16 because lazutf16 includes
|
||||
// some initialization code and tables, which are not necessary for the LCL
|
||||
function UTF16CharacterLength(p: PWideChar): integer;
|
||||
function UTF16Length(const s: UTF16String): PtrInt; inline;
|
||||
function UTF16Length(p: PWideChar; WordCount: PtrInt): PtrInt;
|
||||
function UTF16CharacterToUnicode(p: PWideChar; out CharLen: integer): Cardinal;
|
||||
function UnicodeToUTF16(u: cardinal): UTF16String;
|
||||
// Deprecated in Lazarus 3.99 July 2023.
|
||||
function UTF16CharacterLength(p: PWideChar): integer; deprecated 'Use LazUTF16.UTF16CharacterLength instead';
|
||||
function UTF16Length(const s: UnicodeString): PtrInt; deprecated 'Use LazUTF16.UTF16Length instead';
|
||||
function UTF16Length(p: PWideChar; WordCount: PtrInt): PtrInt; deprecated 'Use LazUTF16.UTF16Length instead';
|
||||
function UTF16CharacterToUnicode(p: PWideChar; out CharLen: integer): Cardinal; deprecated 'Use LazUTF16.UTF16CharacterToUnicode instead';
|
||||
function UnicodeToUTF16(u: cardinal): UnicodeString; deprecated 'Use LazUTF16.UnicodeToUTF16 instead';
|
||||
|
||||
// identifier
|
||||
function CreateFirstIdentifier(const Identifier: string): string;
|
||||
@ -1317,7 +1309,7 @@ begin
|
||||
Result:=LazLoggerBase.dbgObjMem(AnObject);
|
||||
end;
|
||||
|
||||
function dbghex(i: Int64): string;
|
||||
function dbgHex(i: Int64): string;
|
||||
begin
|
||||
Result:=LazLoggerBase.dbghex(i);
|
||||
end;
|
||||
@ -1731,76 +1723,28 @@ begin
|
||||
end;
|
||||
|
||||
function UTF16CharacterLength(p: PWideChar): integer;
|
||||
// returns length of UTF16 character in number of words
|
||||
// The endianess of the machine will be taken.
|
||||
begin
|
||||
if p<>nil then begin
|
||||
if (ord(p[0]) < $D800) or (ord(p[0]) > $DFFF) then
|
||||
Result:=1
|
||||
else
|
||||
Result:=2;
|
||||
end else begin
|
||||
Result:=0;
|
||||
end;
|
||||
Result:=LazUTF16.UTF16CharacterLength(p);
|
||||
end;
|
||||
|
||||
function UTF16Length(const s: UTF16String): PtrInt;
|
||||
function UTF16Length(const s: UnicodeString): PtrInt;
|
||||
begin
|
||||
Result:=UTF16Length(PWideChar(s),length(s));
|
||||
Result:=LazUTF16.UTF16Length(s);
|
||||
end;
|
||||
|
||||
function UTF16Length(p: PWideChar; WordCount: PtrInt): PtrInt;
|
||||
var
|
||||
CharLen: LongInt;
|
||||
begin
|
||||
Result:=0;
|
||||
while (WordCount>0) do begin
|
||||
inc(Result);
|
||||
CharLen:=UTF16CharacterLength(p);
|
||||
inc(p,CharLen);
|
||||
dec(WordCount,CharLen);
|
||||
end;
|
||||
Result:=LazUTF16.UTF16Length(p, WordCount);
|
||||
end;
|
||||
|
||||
function UTF16CharacterToUnicode(p: PWideChar; out CharLen: integer): Cardinal;
|
||||
var
|
||||
w1: cardinal;
|
||||
w2: Cardinal;
|
||||
begin
|
||||
if p<>nil then begin
|
||||
w1:=ord(p[0]);
|
||||
if (w1 < $D800) or (w1 > $DFFF) then begin
|
||||
// is 1 word character
|
||||
Result:=w1;
|
||||
CharLen:=1;
|
||||
end else begin
|
||||
// could be 2 word character
|
||||
w2:=ord(p[1]);
|
||||
if (w2>=$DC00) then begin
|
||||
// is 2 word character
|
||||
Result:=(w1-$D800) shl 10 + (w2-$DC00) + $10000;
|
||||
CharLen:=2;
|
||||
end else begin
|
||||
// invalid character
|
||||
Result:=w1;
|
||||
CharLen:=1;
|
||||
end;
|
||||
end;
|
||||
end else begin
|
||||
Result:=0;
|
||||
CharLen:=0;
|
||||
end;
|
||||
Result:=LazUTF16.UTF16CharacterToUnicode(p, CharLen);
|
||||
end;
|
||||
|
||||
function UnicodeToUTF16(u: cardinal): UTF16String;
|
||||
function UnicodeToUTF16(u: cardinal): UnicodeString;
|
||||
begin
|
||||
// u should be <= $10FFFF to fit into UTF-16
|
||||
|
||||
if u < $10000 then
|
||||
// Note: codepoints $D800 - $DFFF are reserved
|
||||
Result:=system.widechar(u)
|
||||
else
|
||||
Result:=system.widechar($D800+((u - $10000) shr 10))+system.widechar($DC00+((u - $10000) and $3ff));
|
||||
Result:=LazUTF16.UnicodeToUTF16(u);
|
||||
end;
|
||||
|
||||
function CreateFirstIdentifier(const Identifier: string): string;
|
||||
|
Loading…
Reference in New Issue
Block a user