mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-25 09:59:14 +02:00
564 lines
15 KiB
ObjectPascal
564 lines
15 KiB
ObjectPascal
{
|
|
This file is part of the Free Pascal run time library.
|
|
Copyright (c) 2013 by Yury Sidorov,
|
|
member of the Free Pascal development team.
|
|
|
|
Wide string support for Android
|
|
|
|
See the file COPYING.FPC, included in this distribution,
|
|
for details about the copyright.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
**********************************************************************}
|
|
|
|
{$mode objfpc}
|
|
{$inline on}
|
|
{$implicitexceptions off}
|
|
|
|
unit cwstring;
|
|
|
|
interface
|
|
|
|
procedure SetCWidestringManager;
|
|
|
|
implementation
|
|
|
|
uses dynlibs;
|
|
|
|
type
|
|
UErrorCode = SizeInt;
|
|
int32_t = longint;
|
|
uint32_t = longword;
|
|
PUConverter = pointer;
|
|
PUCollator = pointer;
|
|
UBool = LongBool;
|
|
|
|
var
|
|
hlibICU: TLibHandle;
|
|
hlibICUi18n: TLibHandle;
|
|
ucnv_open: function (converterName: PAnsiChar; var pErrorCode: UErrorCode): PUConverter; cdecl;
|
|
ucnv_close: procedure (converter: PUConverter); cdecl;
|
|
ucnv_setSubstChars: procedure (converter: PUConverter; subChars: PAnsiChar; len: byte; var pErrorCode: UErrorCode); cdecl;
|
|
ucnv_setFallback: procedure (cnv: PUConverter; usesFallback: UBool); cdecl;
|
|
ucnv_fromUChars: function (cnv: PUConverter; dest: PAnsiChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
|
|
ucnv_toUChars: function (cnv: PUConverter; dest: PUnicodeChar; destCapacity: int32_t; src: PAnsiChar; srcLength: int32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
|
|
u_strToUpper: function (dest: PUnicodeChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; locale: PAnsiChar; var pErrorCode: UErrorCode): int32_t; cdecl;
|
|
u_strToLower: function (dest: PUnicodeChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; locale: PAnsiChar; var pErrorCode: UErrorCode): int32_t; cdecl;
|
|
u_strCompare: function (s1: PUnicodeChar; length1: int32_t; s2: PUnicodeChar; length2: int32_t; codePointOrder: UBool): int32_t; cdecl;
|
|
u_strCaseCompare: function (s1: PUnicodeChar; length1: int32_t; s2: PUnicodeChar; length2: int32_t; options: uint32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
|
|
|
|
ucol_open: function(loc: PAnsiChar; var status: UErrorCode): PUCollator; cdecl;
|
|
ucol_close: procedure (coll: PUCollator); cdecl;
|
|
ucol_strcoll: function (coll: PUCollator; source: PUnicodeChar; sourceLength: int32_t; target: PUnicodeChar; targetLength: int32_t): int32_t; cdecl;
|
|
ucol_setStrength: procedure (coll: PUCollator; strength: int32_t); cdecl;
|
|
u_errorName: function (code: UErrorCode): PAnsiChar; cdecl;
|
|
|
|
threadvar
|
|
ThreadDataInited: boolean;
|
|
DefConv, LastConv: PUConverter;
|
|
LastCP: TSystemCodePage;
|
|
DefColl: PUCollator;
|
|
|
|
function OpenConverter(const name: ansistring): PUConverter;
|
|
var
|
|
err: UErrorCode;
|
|
begin
|
|
err:=0;
|
|
Result:=ucnv_open(PAnsiChar(name), err);
|
|
if Result <> nil then begin
|
|
ucnv_setSubstChars(Result, '?', 1, err);
|
|
ucnv_setFallback(Result, True);
|
|
end;
|
|
end;
|
|
|
|
procedure InitThreadData;
|
|
var
|
|
err: UErrorCode;
|
|
col: PUCollator;
|
|
begin
|
|
if (hlibICU = 0) or ThreadDataInited then
|
|
exit;
|
|
ThreadDataInited:=True;
|
|
DefConv:=OpenConverter('utf8');
|
|
err:=0;
|
|
col:=ucol_open(nil, err);
|
|
if col <> nil then
|
|
ucol_setStrength(col, 2);
|
|
DefColl:=col;
|
|
end;
|
|
|
|
function GetConverter(cp: TSystemCodePage): PUConverter;
|
|
var
|
|
s: ansistring;
|
|
begin
|
|
if hlibICU = 0 then begin
|
|
Result:=nil;
|
|
exit;
|
|
end;
|
|
InitThreadData;
|
|
if (cp = DefaultSystemCodePage) or (cp = CP_ACP) then
|
|
Result:=DefConv
|
|
else begin
|
|
if cp <> LastCP then begin
|
|
Str(cp, s);
|
|
LastConv:=OpenConverter('cp' + s);
|
|
LastCP:=cp;
|
|
end;
|
|
Result:=LastConv;
|
|
end;
|
|
end;
|
|
|
|
procedure Unicode2AnsiMove(source: PUnicodeChar; var dest: RawByteString; cp: TSystemCodePage; len: SizeInt);
|
|
var
|
|
len2: SizeInt;
|
|
conv: PUConverter;
|
|
err: UErrorCode;
|
|
begin
|
|
if len = 0 then begin
|
|
dest:='';
|
|
exit;
|
|
end;
|
|
conv:=GetConverter(cp);
|
|
if conv = nil then begin
|
|
DefaultUnicode2AnsiMove(source,dest,DefaultSystemCodePage,len);
|
|
exit;
|
|
end;
|
|
|
|
len2:=len*3;
|
|
SetLength(dest, len2);
|
|
err:=0;
|
|
len2:=ucnv_fromUChars(conv, PAnsiChar(dest), len2, source, len, err);
|
|
if len2 > Length(dest) then begin
|
|
SetLength(dest, len2);
|
|
err:=0;
|
|
len2:=ucnv_fromUChars(conv, PAnsiChar(dest), len2, source, len, err);
|
|
end;
|
|
SetLength(dest, len2);
|
|
SetCodePage(dest, cp, False);
|
|
end;
|
|
|
|
procedure Ansi2UnicodeMove(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
|
|
var
|
|
len2: SizeInt;
|
|
conv: PUConverter;
|
|
err: UErrorCode;
|
|
begin
|
|
if len = 0 then begin
|
|
dest:='';
|
|
exit;
|
|
end;
|
|
conv:=GetConverter(cp);
|
|
if conv = nil then begin
|
|
DefaultAnsi2UnicodeMove(source,DefaultSystemCodePage,dest,len);
|
|
exit;
|
|
end;
|
|
|
|
len2:=len;
|
|
SetLength(dest, len2);
|
|
err:=0;
|
|
len2:=ucnv_toUChars(conv, PUnicodeChar(dest), len2, source, len, err);
|
|
if len2 > Length(dest) then begin
|
|
SetLength(dest, len2);
|
|
err:=0;
|
|
len2:=ucnv_toUChars(conv, PUnicodeChar(dest), len2, source, len, err);
|
|
end;
|
|
SetLength(dest, len2);
|
|
end;
|
|
|
|
function UpperUnicodeString(const s : UnicodeString) : UnicodeString;
|
|
var
|
|
len, len2: SizeInt;
|
|
err: UErrorCode;
|
|
begin
|
|
if hlibICU = 0 then begin
|
|
// fallback implementation
|
|
Result:=UnicodeString(UpCase(AnsiString(s)));
|
|
exit;
|
|
end;
|
|
len:=Length(s);
|
|
SetLength(Result, len);
|
|
if len = 0 then
|
|
exit;
|
|
err:=0;
|
|
len2:=u_strToUpper(PUnicodeChar(Result), len, PUnicodeChar(s), len, nil, err);
|
|
if len2 > len then begin
|
|
SetLength(Result, len2);
|
|
err:=0;
|
|
len2:=u_strToUpper(PUnicodeChar(Result), len2, PUnicodeChar(s), len, nil, err);
|
|
end;
|
|
SetLength(Result, len2);
|
|
end;
|
|
|
|
function LowerUnicodeString(const s : UnicodeString) : UnicodeString;
|
|
var
|
|
len, len2: SizeInt;
|
|
err: UErrorCode;
|
|
begin
|
|
if hlibICU = 0 then begin
|
|
// fallback implementation
|
|
Result:=UnicodeString(LowerCase(AnsiString(s)));
|
|
exit;
|
|
end;
|
|
len:=Length(s);
|
|
SetLength(Result, len);
|
|
if len = 0 then
|
|
exit;
|
|
err:=0;
|
|
len2:=u_strToLower(PUnicodeChar(Result), len, PUnicodeChar(s), len, nil, err);
|
|
if len2 > len then begin
|
|
SetLength(Result, len2);
|
|
err:=0;
|
|
len2:=u_strToLower(PUnicodeChar(Result), len2, PUnicodeChar(s), len, nil, err);
|
|
end;
|
|
SetLength(Result, len2);
|
|
end;
|
|
|
|
function _CompareStr(const S1, S2: UnicodeString): PtrInt;
|
|
var
|
|
count, count1, count2: SizeInt;
|
|
begin
|
|
result := 0;
|
|
Count1 := Length(S1);
|
|
Count2 := Length(S2);
|
|
if Count1>Count2 then
|
|
Count:=Count2
|
|
else
|
|
Count:=Count1;
|
|
result := CompareByte(PUnicodeChar(S1)^, PUnicodeChar(S2)^, Count*SizeOf(UnicodeChar));
|
|
if result=0 then
|
|
result:=Count1 - Count2;
|
|
end;
|
|
|
|
function CompareUnicodeString(const s1, s2 : UnicodeString) : PtrInt;
|
|
begin
|
|
if hlibICU = 0 then begin
|
|
// fallback implementation
|
|
Result:=_CompareStr(s1, s2);
|
|
exit;
|
|
end;
|
|
InitThreadData;
|
|
if DefColl <> nil then
|
|
Result:=ucol_strcoll(DefColl, PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2))
|
|
else
|
|
Result:=u_strCompare(PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2), True);
|
|
end;
|
|
|
|
function CompareTextUnicodeString(const s1, s2 : UnicodeString): PtrInt;
|
|
const
|
|
U_COMPARE_CODE_POINT_ORDER = $8000;
|
|
var
|
|
err: UErrorCode;
|
|
begin
|
|
if hlibICU = 0 then begin
|
|
// fallback implementation
|
|
Result:=_CompareStr(UpperUnicodeString(s1), UpperUnicodeString(s2));
|
|
exit;
|
|
end;
|
|
err:=0;
|
|
Result:=u_strCaseCompare(PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2), U_COMPARE_CODE_POINT_ORDER, err);
|
|
end;
|
|
|
|
function UpperAnsiString(const s : AnsiString) : AnsiString;
|
|
begin
|
|
Result:=AnsiString(UpperUnicodeString(UnicodeString(s)));
|
|
end;
|
|
|
|
function LowerAnsiString(const s : AnsiString) : AnsiString;
|
|
begin
|
|
Result:=AnsiString(LowerUnicodeString(UnicodeString(s)));
|
|
end;
|
|
|
|
function CompareStrAnsiString(const s1, s2: ansistring): PtrInt;
|
|
begin
|
|
Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2));
|
|
end;
|
|
|
|
function StrCompAnsi(s1,s2 : PChar): PtrInt;
|
|
begin
|
|
Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2));
|
|
end;
|
|
|
|
function AnsiCompareText(const S1, S2: ansistring): PtrInt;
|
|
begin
|
|
Result:=CompareTextUnicodeString(UnicodeString(s1), UnicodeString(s2));
|
|
end;
|
|
|
|
function AnsiStrIComp(S1, S2: PChar): PtrInt;
|
|
begin
|
|
Result:=CompareTextUnicodeString(UnicodeString(s1), UnicodeString(s2));
|
|
end;
|
|
|
|
function AnsiStrLComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
|
|
var
|
|
as1, as2: ansistring;
|
|
begin
|
|
SetString(as1, S1, MaxLen);
|
|
SetString(as2, S2, MaxLen);
|
|
Result:=CompareUnicodeString(UnicodeString(as1), UnicodeString(as2));
|
|
end;
|
|
|
|
function AnsiStrLIComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt;
|
|
var
|
|
as1, as2: ansistring;
|
|
begin
|
|
SetString(as1, S1, MaxLen);
|
|
SetString(as2, S2, MaxLen);
|
|
Result:=CompareTextUnicodeString(UnicodeString(as1), UnicodeString(as2));
|
|
end;
|
|
|
|
function AnsiStrLower(Str: PChar): PChar;
|
|
var
|
|
s, res: ansistring;
|
|
begin
|
|
s:=Str;
|
|
res:=LowerAnsiString(s);
|
|
if Length(res) > Length(s) then
|
|
SetLength(res, Length(s));
|
|
Move(PAnsiChar(res)^, Str, Length(res) + 1);
|
|
Result:=Str;
|
|
end;
|
|
|
|
function AnsiStrUpper(Str: PChar): PChar;
|
|
var
|
|
s, res: ansistring;
|
|
begin
|
|
s:=Str;
|
|
res:=UpperAnsiString(s);
|
|
if Length(res) > Length(s) then
|
|
SetLength(res, Length(s));
|
|
Move(PAnsiChar(res)^, Str, Length(res) + 1);
|
|
Result:=Str;
|
|
end;
|
|
|
|
function CodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint;
|
|
var
|
|
c: byte;
|
|
begin
|
|
// Only UTF-8 encoding is supported
|
|
c:=byte(Str^);
|
|
if c = 0 then
|
|
Result:=0
|
|
else begin
|
|
Result:=1;
|
|
if c < $80 then
|
|
exit; // 1-byte ASCII char
|
|
while c and $C0 = $C0 do begin
|
|
Inc(Result);
|
|
c:=c shl 1;
|
|
end;
|
|
if Result > 6 then
|
|
Result:=1 // Invalid code point
|
|
else
|
|
if Result > MaxLookAead then
|
|
Result:=-1; // Incomplete code point
|
|
end;
|
|
end;
|
|
|
|
function GetStandardCodePage(const stdcp: TStandardCodePageEnum): TSystemCodePage;
|
|
begin
|
|
Result := CP_UTF8; // Android always uses UTF-8
|
|
end;
|
|
|
|
procedure SetStdIOCodePage(var T: Text); inline;
|
|
begin
|
|
case TextRec(T).Mode of
|
|
fmInput:TextRec(T).CodePage:=DefaultSystemCodePage;
|
|
fmOutput:TextRec(T).CodePage:=DefaultSystemCodePage;
|
|
end;
|
|
end;
|
|
|
|
procedure SetStdIOCodePages; inline;
|
|
begin
|
|
SetStdIOCodePage(Input);
|
|
SetStdIOCodePage(Output);
|
|
SetStdIOCodePage(ErrOutput);
|
|
SetStdIOCodePage(StdOut);
|
|
SetStdIOCodePage(StdErr);
|
|
end;
|
|
|
|
procedure Ansi2WideMove(source:pchar; cp:TSystemCodePage; var dest:widestring; len:SizeInt);
|
|
var
|
|
us: UnicodeString;
|
|
begin
|
|
Ansi2UnicodeMove(source,cp,us,len);
|
|
dest:=us;
|
|
end;
|
|
|
|
function UpperWideString(const s : WideString) : WideString;
|
|
begin
|
|
Result:=UpperUnicodeString(s);
|
|
end;
|
|
|
|
function LowerWideString(const s : WideString) : WideString;
|
|
begin
|
|
Result:=LowerUnicodeString(s);
|
|
end;
|
|
|
|
function CompareWideString(const s1, s2 : WideString) : PtrInt;
|
|
begin
|
|
Result:=CompareUnicodeString(s1, s2);
|
|
end;
|
|
|
|
function CompareTextWideString(const s1, s2 : WideString): PtrInt;
|
|
begin
|
|
Result:=CompareTextUnicodeString(s1, s2);
|
|
end;
|
|
|
|
Procedure SetCWideStringManager;
|
|
Var
|
|
CWideStringManager : TUnicodeStringManager;
|
|
begin
|
|
CWideStringManager:=widestringmanager;
|
|
With CWideStringManager do
|
|
begin
|
|
Wide2AnsiMoveProc:=@Unicode2AnsiMove;
|
|
Ansi2WideMoveProc:=@Ansi2WideMove;
|
|
UpperWideStringProc:=@UpperWideString;
|
|
LowerWideStringProc:=@LowerWideString;
|
|
CompareWideStringProc:=@CompareWideString;
|
|
CompareTextWideStringProc:=@CompareTextWideString;
|
|
|
|
UpperAnsiStringProc:=@UpperAnsiString;
|
|
LowerAnsiStringProc:=@LowerAnsiString;
|
|
CompareStrAnsiStringProc:=@CompareStrAnsiString;
|
|
CompareTextAnsiStringProc:=@AnsiCompareText;
|
|
StrCompAnsiStringProc:=@StrCompAnsi;
|
|
StrICompAnsiStringProc:=@AnsiStrIComp;
|
|
StrLCompAnsiStringProc:=@AnsiStrLComp;
|
|
StrLICompAnsiStringProc:=@AnsiStrLIComp;
|
|
StrLowerAnsiStringProc:=@AnsiStrLower;
|
|
StrUpperAnsiStringProc:=@AnsiStrUpper;
|
|
|
|
Unicode2AnsiMoveProc:=@Unicode2AnsiMove;
|
|
Ansi2UnicodeMoveProc:=@Ansi2UnicodeMove;
|
|
UpperUnicodeStringProc:=@UpperUnicodeString;
|
|
LowerUnicodeStringProc:=@LowerUnicodeString;
|
|
CompareUnicodeStringProc:=@CompareUnicodeString;
|
|
CompareTextUnicodeStringProc:=@CompareTextUnicodeString;
|
|
|
|
GetStandardCodePageProc:=@GetStandardCodePage;
|
|
CodePointLengthProc:=@CodePointLength;
|
|
end;
|
|
SetUnicodeStringManager(CWideStringManager);
|
|
end;
|
|
|
|
procedure UnloadICU;
|
|
begin
|
|
if hlibICUi18n <> 0 then begin
|
|
if DefColl <> nil then
|
|
ucol_close(DefColl);
|
|
UnloadLibrary(hlibICUi18n);
|
|
hlibICUi18n:=0;
|
|
end;
|
|
if hlibICU <> 0 then begin
|
|
if DefConv <> nil then
|
|
ucnv_close(DefConv);
|
|
if LastConv <> nil then
|
|
ucnv_close(LastConv);
|
|
UnloadLibrary(hlibICU);
|
|
hlibICU:=0;
|
|
end;
|
|
end;
|
|
|
|
procedure LoadICU;
|
|
var
|
|
LibVer: ansistring;
|
|
|
|
function _GetProc(const Name: AnsiString; out ProcPtr; hLib: TLibHandle = 0): boolean;
|
|
var
|
|
p: pointer;
|
|
begin
|
|
if hLib = 0 then
|
|
hLib:=hlibICU;
|
|
p:=GetProcedureAddress(hlib, Name + LibVer);
|
|
if p = nil then begin
|
|
// unload lib on failure
|
|
UnloadICU;
|
|
Result:=False;
|
|
end
|
|
else begin
|
|
pointer(ProcPtr):=p;
|
|
Result:=True;
|
|
end;
|
|
end;
|
|
|
|
const
|
|
ICUver: array [1..5] of ansistring = ('3_8', '4_2', '44', '46', '48');
|
|
TestProcName = 'ucnv_open';
|
|
|
|
var
|
|
i: longint;
|
|
s: ansistring;
|
|
begin
|
|
hlibICU:=LoadLibrary('libicuuc.so');
|
|
hlibICUi18n:=LoadLibrary('libicui18n.so');
|
|
if (hlibICU = 0) or (hlibICUi18n = 0) then begin
|
|
UnloadICU;
|
|
exit;
|
|
end;
|
|
// Finding ICU version using known versions table
|
|
for i:=High(ICUver) downto Low(ICUver) do begin
|
|
s:='_' + ICUver[i];
|
|
if GetProcedureAddress(hlibICU, TestProcName + s) <> nil then begin
|
|
LibVer:=s;
|
|
break;
|
|
end;
|
|
end;
|
|
|
|
if LibVer = '' then begin
|
|
// Finding unknown ICU version
|
|
Val(ICUver[High(ICUver)], i);
|
|
repeat
|
|
Inc(i, 2);
|
|
Str(i, s);
|
|
s:='_' + s;
|
|
if GetProcedureAddress(hlibICU, TestProcName + s) <> nil then begin
|
|
LibVer:=s;
|
|
break;
|
|
end;
|
|
until i >= 100;
|
|
end;
|
|
|
|
if LibVer = '' then begin
|
|
// Trying versionless name
|
|
if GetProcedureAddress(hlibICU, TestProcName) = nil then begin
|
|
// Unable to get ICU version
|
|
UnloadICU;
|
|
exit;
|
|
end;
|
|
end;
|
|
|
|
if not _GetProc('ucnv_open', ucnv_open) then exit;
|
|
if not _GetProc('ucnv_close', ucnv_close) then exit;
|
|
if not _GetProc('ucnv_setSubstChars', ucnv_setSubstChars) then exit;
|
|
if not _GetProc('ucnv_setFallback', ucnv_setFallback) then exit;
|
|
if not _GetProc('ucnv_fromUChars', ucnv_fromUChars) then exit;
|
|
if not _GetProc('ucnv_toUChars', ucnv_toUChars) then exit;
|
|
if not _GetProc('u_strToUpper', u_strToUpper) then exit;
|
|
if not _GetProc('u_strToLower', u_strToLower) then exit;
|
|
if not _GetProc('u_strCompare', u_strCompare) then exit;
|
|
if not _GetProc('u_strCaseCompare', u_strCaseCompare) then exit;
|
|
|
|
if not _GetProc('u_errorName', u_errorName) then exit;
|
|
|
|
if not _GetProc('ucol_open', ucol_open, hlibICUi18n) then exit;
|
|
if not _GetProc('ucol_close', ucol_close, hlibICUi18n) then exit;
|
|
if not _GetProc('ucol_strcoll', ucol_strcoll, hlibICUi18n) then exit;
|
|
if not _GetProc('ucol_setStrength', ucol_setStrength, hlibICUi18n) then exit;
|
|
end;
|
|
|
|
initialization
|
|
DefaultSystemCodePage:=GetStandardCodePage(scpAnsi);
|
|
DefaultUnicodeCodePage:=CP_UTF16;
|
|
LoadICU;
|
|
SetCWideStringManager;
|
|
SetStdIOCodePages;
|
|
|
|
finalization
|
|
UnloadICU;
|
|
|
|
end.
|
|
|