mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-06 10:47:57 +02:00
193 lines
11 KiB
PHP
193 lines
11 KiB
PHP
{
|
|
This file is part of the Free Pascal run time library.
|
|
Copyright (c) 1999-2005 by Florian Klaempfl,
|
|
member of the Free Pascal development team.
|
|
|
|
This file implements support routines for UnicodeStrings with FPC
|
|
|
|
See the file COPYING.FPC, included in this distribution,
|
|
for details about the copyright.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
**********************************************************************}
|
|
|
|
|
|
Procedure UniqueString (Var S : UnicodeString);{$ifdef SYSTEMINLINE}inline;{$endif}
|
|
Function Pos (Const Substr : UnicodeString; Const Source : UnicodeString; Offset: Sizeint = 1) : SizeInt;
|
|
Function Pos (c : AnsiChar; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
|
|
Function Pos (c : UnicodeChar; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
|
|
Function Pos (const c : RawByteString; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
|
|
Function Pos (const c : UnicodeString; Const s : RawByteString; Offset: Sizeint = 1) : SizeInt;
|
|
Function Pos (const c : ShortString; Const s : UnicodeString; Offset: Sizeint = 1) : SizeInt;
|
|
Function StringOfChar(c : Unicodechar;l : SizeInt) : UnicodeString;
|
|
Function UpCase(const s : UnicodeString) : UnicodeString;
|
|
Function UpCase(c:UnicodeChar):UnicodeChar;
|
|
Function LowerCase(const s : UnicodeString) : UnicodeString;
|
|
Function LowerCase(c:UnicodeChar):UnicodeChar;
|
|
|
|
Procedure fpc_setstring_unicodestr_pwidechar(Out S : UnicodeString; Buf : PUnicodeChar; Len : SizeInt); compilerproc;
|
|
Procedure fpc_setstring_unicodestr_pansichar(Out S : UnicodeString; Buf : PAnsiChar; Len : SizeInt); compilerproc;
|
|
|
|
function WideCharToString(S : PWideChar) : UnicodeString;
|
|
function StringToWideChar(const Src : RawByteString;Dest : PWideChar;DestSize : SizeInt) : PWideChar;
|
|
function WideCharLenToString(S : PWideChar;Len : SizeInt) : UnicodeString;
|
|
procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : UnicodeString);
|
|
procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : AnsiString);
|
|
procedure WideCharToStrVar(S : PWideChar;out Dest : UnicodeString);
|
|
procedure WideCharToStrVar(S : PWideChar;out Dest : AnsiString);
|
|
|
|
function UnicodeCharToString(S : PUnicodeChar) : UnicodeString;
|
|
function StringToUnicodeChar(const Src : RawByteString;Dest : PUnicodeChar;DestSize : SizeInt) : PUnicodeChar;
|
|
function UnicodeCharLenToString(S : PUnicodeChar;Len : SizeInt) : UnicodeString;
|
|
procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : UnicodeString);
|
|
procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : AnsiString);
|
|
procedure UnicodeCharToStrVar(S : PUnicodeChar;out Dest : AnsiString);
|
|
|
|
function UnicodeFromLocaleChars(CodePage, Flags: Cardinal; LocaleStr: PAnsiChar;
|
|
LocaleStrLen: SizeInt; UnicodeStr: PWideChar; UnicodeStrLen: SizeInt): SizeInt; overload;
|
|
|
|
function UnicodeFromLocaleChars(const LocaleName: AnsiString; Flags: Cardinal;
|
|
LocaleStr: PAnsiChar; LocaleStrLen: SizeInt; UnicodeStr: PWideChar;
|
|
UnicodeStrLen: SizeInt): SizeInt; overload;
|
|
|
|
|
|
procedure DefaultUnicode2AnsiMove(source:punicodechar;var dest:RawByteString;cp : TSystemCodePage;len:SizeInt);
|
|
procedure DefaultAnsi2UnicodeMove(source:pansichar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
|
|
|
|
Type
|
|
{ please only enable options below after creating a test program for them that
|
|
passes on Windows and committing it, so it can be used to verify the
|
|
functionality on other platforms }
|
|
TCompareOption = ({coLingIgnoreCase, coLingIgnoreDiacritic, }coIgnoreCase{,
|
|
coIgnoreKanaType, coIgnoreNonSpace, coIgnoreSymbols, coIgnoreWidth,
|
|
coLingCasing, coDigitAsNumbers, coStringSort});
|
|
TCompareOptions = set of TCompareOption;
|
|
TStandardCodePageEnum = (
|
|
scpAnsi, // system Ansi code page (GetACP on windows)
|
|
scpConsoleInput, // system console input code page (GetConsoleCP on windows)
|
|
scpConsoleOutput, // system console output code page (GetConsoleOutputCP on windows)
|
|
scpFileSystemSingleByte // file system code page used by single byte OS FileSystem APIs (GetACP on Windows),
|
|
);
|
|
|
|
{$ifndef FPC_HAS_BUILTIN_WIDESTR_MANAGER}
|
|
{ hooks for internationalization
|
|
please add new procedures at the end, it makes it easier to detect new procedures }
|
|
TUnicodeStringManager = record
|
|
Wide2AnsiMoveProc : procedure(source:pwidechar;var dest:RawByteString;cp : TSystemCodePage;len:SizeInt);
|
|
Ansi2WideMoveProc : procedure(source:pansichar;cp : TSystemCodePage;var dest:widestring;len:SizeInt);
|
|
|
|
// UpperUTF8 : procedure(p:PUTF8String);
|
|
|
|
UpperWideStringProc : function(const S: WideString): WideString;
|
|
// UpperUCS4 : procedure(p:PUCS4Char);
|
|
// LowerUTF8 : procedure(p:PUTF8String);
|
|
LowerWideStringProc : function(const S: WideString): WideString;
|
|
// LowerUCS4 : procedure(p:PUCS4Char);
|
|
{
|
|
CompUTF8 : function(p1,p2:PUTF8String) : shortint;
|
|
CompUCS2 : function(p1,p2:PUCS2Char) : shortint;
|
|
CompUCS4 : function(p1,p2:PUC42Char) : shortint;
|
|
}
|
|
CompareWideStringProc : function(const s1, s2 : WideString; Options : TCompareOptions) : PtrInt;
|
|
// CompareTextWideStringProc is CompareWideStringProc with coIgnoreCase in options.
|
|
// CompareTextWideStringProc : function(const s1, s2 : WideString): PtrInt;
|
|
{ return value: number of code points in the string. Whenever an invalid
|
|
code point is encountered, all characters part of this invalid code point
|
|
are considered to form one "character" and the next character is
|
|
considered to be the start of a new (possibly also invalid) code point }
|
|
CharLengthPCharProc : function(const Str: PAnsiChar): PtrInt;
|
|
{ return value:
|
|
-1 if incomplete or invalid code point
|
|
0 if NULL character,
|
|
> 0 if that's the length in bytes of the code point }
|
|
CodePointLengthProc : function(const Str: PAnsiChar; MaxLookAead: PtrInt): Ptrint;
|
|
|
|
UpperAnsiStringProc : function(const s : ansistring) : ansistring;
|
|
LowerAnsiStringProc : function(const s : ansistring) : ansistring;
|
|
CompareStrAnsiStringProc : function(const S1, S2: ansistring): PtrInt;
|
|
CompareTextAnsiStringProc : function(const S1, S2: ansistring): PtrInt;
|
|
StrCompAnsiStringProc : function(S1, S2: PAnsiChar): PtrInt;
|
|
StrICompAnsiStringProc : function(S1, S2: PAnsiChar): PtrInt;
|
|
StrLCompAnsiStringProc : function(S1, S2: PAnsiChar; MaxLen: PtrUInt): PtrInt;
|
|
StrLICompAnsiStringProc : function(S1, S2: PAnsiChar; MaxLen: PtrUInt): PtrInt;
|
|
StrLowerAnsiStringProc : function(Str: PAnsiChar): PAnsiChar;
|
|
StrUpperAnsiStringProc : function(Str: PAnsiChar): PAnsiChar;
|
|
ThreadInitProc : procedure;
|
|
ThreadFiniProc : procedure;
|
|
|
|
{ this is only different on windows }
|
|
Unicode2AnsiMoveProc : procedure(source:punicodechar;var dest:RawByteString;cp : TSystemCodePage;len:SizeInt);
|
|
Ansi2UnicodeMoveProc : procedure(source:pansichar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
|
|
UpperUnicodeStringProc : function(const S: UnicodeString): UnicodeString;
|
|
LowerUnicodeStringProc : function(const S: UnicodeString): UnicodeString;
|
|
CompareUnicodeStringProc : function(const s1, s2 : UnicodeString; Options : TCompareOptions) : PtrInt;
|
|
// CompareTextUnicodeStringProc is CompareUnicodeStringProc with coIgnoreCase in options.
|
|
/// CompareTextUnicodeStringProc : function(const s1, s2 : UnicodeString): PtrInt;
|
|
|
|
{ codepage retrieve function }
|
|
GetStandardCodePageProc: function(const stdcp: TStandardCodePageEnum): TSystemCodePage;
|
|
end;
|
|
{$endif FPC_HAS_BUILTIN_WIDESTR_MANAGER}
|
|
|
|
var
|
|
widestringmanager : TUnicodeStringManager;
|
|
|
|
function UnicodeToUtf8(Dest: PAnsiChar; Source: PUnicodeChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
|
|
function UnicodeToUtf8(Dest: PAnsiChar; MaxDestBytes: SizeUInt; Source: PUnicodeChar; SourceChars: SizeUInt): SizeUInt;
|
|
function Utf8ToUnicode(Dest: PUnicodeChar; Source: PAnsiChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
|
|
function Utf8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PAnsiChar; SourceBytes: SizeUInt): SizeUInt;{$ifdef SYSTEMINLINE}inline;{$endif}
|
|
function Utf8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PAnsiChar; SourceBytes: SizeUInt; IgnoreInvalid : Boolean): SizeUInt;
|
|
function UTF8Encode(const s : RawByteString) : RawByteString; inline;
|
|
function UTF8Encode(const s : UnicodeString) : RawByteString;
|
|
function UTF8Decode(const s : RawByteString): UnicodeString;
|
|
function UTF8ToString(const s : RawByteString): UnicodeString;inline;
|
|
function UTF8ToString(const S: ShortString): unicodestring;
|
|
function UTF8ToString(const S: PAnsiChar): unicodestring;
|
|
// Delphi compatibility overloads...
|
|
function UTF8ToUnicodeString(const s : RawByteString): UnicodeString;inline;
|
|
function UTF8ToUnicodeString(const S: ShortString): unicodestring;inline;
|
|
function UTF8ToUnicodeString(const S: PAnsiChar): unicodestring;inline;
|
|
|
|
{ byte and ansichar are the same on the JVM, and "array of" and "pointer to"
|
|
are as well }
|
|
{$ifndef CPUJVM}
|
|
function UTF8ToString(const S: array of AnsiChar): unicodestring;
|
|
function UTF8ToString(const S: array of Byte): unicodestring;
|
|
{$endif not CPUJVM}
|
|
function AnsiToUtf8(const s : RawByteString): RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
|
|
function Utf8ToAnsi(const s : RawByteString) : RawByteString;{$ifdef SYSTEMINLINE}inline;{$endif}
|
|
{$ifdef FPC_HAS_FEATURE_DYNARRAYS}
|
|
function UnicodeStringToUCS4String(const s : UnicodeString) : UCS4String;
|
|
function UCS4StringToUnicodeString(const s : UCS4String) : UnicodeString;
|
|
function WideStringToUCS4String(const s : WideString) : UCS4String;
|
|
function UCS4StringToWideString(const s : UCS4String) : WideString;
|
|
{$endif FPC_HAS_FEATURE_DYNARRAYS}
|
|
|
|
Procedure GetWideStringManager (Out Manager : TUnicodeStringManager);
|
|
Procedure SetWideStringManager (Const New : TUnicodeStringManager);
|
|
Procedure SetWideStringManager (Const New : TUnicodeStringManager; Out Old: TUnicodeStringManager);
|
|
|
|
Procedure GetUnicodeStringManager (Out Manager : TUnicodeStringManager);
|
|
Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager);
|
|
Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager; Out Old: TUnicodeStringManager);
|
|
|
|
function StringElementSize(const S : UnicodeString): Word; overload;
|
|
function StringRefCount(const S : UnicodeString): SizeInt; overload;
|
|
function StringCodePage(const S : UnicodeString): TSystemCodePage; overload;
|
|
|
|
Function ToSingleByteFileSystemEncodedFileName(const Str: UnicodeString): RawByteString;
|
|
Function ToSingleByteFileSystemEncodedFileName(const arr: array of widechar): RawByteString;
|
|
Function ToSingleByteFileSystemEncodedFileName(const Str: RawByteString): RawByteString;
|
|
|
|
Type
|
|
TLocaleNameToCodePageCallBack = Procedure (const localename : shortstring; out codepage : TSystemCodePage; aHandled : Boolean);
|
|
|
|
|
|
Var
|
|
LocaleNameToCodePageCallBack : TLocaleNameToCodePageCallBack;
|
|
|
|
Function LocaleNameToCodePage(const localename : shortstring; out codepage : TSystemCodePage) : Boolean;
|