fpc/rtl/android/cwstring.pp
2023-07-27 19:04:03 +02:00

644 lines
17 KiB
ObjectPascal

{
This file is part of the Free Pascal run time library.
Copyright (c) 2013 by Yury Sidorov,
member of the Free Pascal development team.
Wide string support for Android
See the file COPYING.FPC, included in this distribution,
for details about the copyright.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
**********************************************************************}
{$mode objfpc}
{$inline on}
{$implicitexceptions off}
{$IFNDEF FPC_DOTTEDUNITS}
unit cwstring;
{$ENDIF FPC_DOTTEDUNITS}
interface
procedure SetCWidestringManager;
implementation
{$IFDEF FPC_DOTTEDUNITS}
uses System.DynLibs;
{$ELSE FPC_DOTTEDUNITS}
uses dynlibs;
{$ENDIF FPC_DOTTEDUNITS}
type
UErrorCode = SizeInt;
int32_t = longint;
uint32_t = longword;
PUConverter = pointer;
PUCollator = pointer;
UBool = LongBool;
var
hlibICU: TLibHandle;
hlibICUi18n: TLibHandle;
LibVer: ansistring;
ucnv_open: function (converterName: PAnsiChar; var pErrorCode: UErrorCode): PUConverter; cdecl;
ucnv_close: procedure (converter: PUConverter); cdecl;
ucnv_setSubstChars: procedure (converter: PUConverter; subChars: PAnsiChar; len: byte; var pErrorCode: UErrorCode); cdecl;
ucnv_setFallback: procedure (cnv: PUConverter; usesFallback: UBool); cdecl;
ucnv_fromUChars: function (cnv: PUConverter; dest: PAnsiChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
ucnv_toUChars: function (cnv: PUConverter; dest: PUnicodeChar; destCapacity: int32_t; src: PAnsiChar; srcLength: int32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
u_strToUpper: function (dest: PUnicodeChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; locale: PAnsiChar; var pErrorCode: UErrorCode): int32_t; cdecl;
u_strToLower: function (dest: PUnicodeChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; locale: PAnsiChar; var pErrorCode: UErrorCode): int32_t; cdecl;
u_strCompare: function (s1: PUnicodeChar; length1: int32_t; s2: PUnicodeChar; length2: int32_t; codePointOrder: UBool): int32_t; cdecl;
u_strCaseCompare: function (s1: PUnicodeChar; length1: int32_t; s2: PUnicodeChar; length2: int32_t; options: uint32_t; var pErrorCode: UErrorCode): int32_t; cdecl;
u_getDataDirectory: function(): PAnsiChar; cdecl;
u_setDataDirectory: procedure(directory: PAnsiChar); cdecl;
u_init: procedure(var status: UErrorCode); cdecl;
ucol_open: function(loc: PAnsiChar; var status: UErrorCode): PUCollator; cdecl;
ucol_close: procedure (coll: PUCollator); cdecl;
ucol_strcoll: function (coll: PUCollator; source: PUnicodeChar; sourceLength: int32_t; target: PUnicodeChar; targetLength: int32_t): int32_t; cdecl;
ucol_setStrength: procedure (coll: PUCollator; strength: int32_t); cdecl;
threadvar
ThreadDataInited: boolean;
DefConv, LastConv: PUConverter;
LastCP: TSystemCodePage;
DefColl: PUCollator;
function MaskExceptions: dword;
begin
{$if defined(cpux86_64) or defined(cpui386)}
Result:=GetMXCSR;
SetMXCSR(Result or %0000000010000000 {MM_MaskInvalidOp} or %0001000000000000 {MM_MaskPrecision});
{$else}
Result:=0;
{$endif}
end;
procedure UnmaskExceptions(oldmask: dword);
begin
{$if defined(cpux86_64) or defined(cpui386)}
SetMXCSR(oldmask);
{$endif}
end;
function OpenConverter(const name: ansistring): PUConverter;
var
err: UErrorCode;
oldmask: dword;
begin
{ ucnv_open() must be called with some SSE exception masked on x86_64-android. }
oldmask:=MaskExceptions;
err:=0;
Result:=ucnv_open(PAnsiChar(name), err);
if Result <> nil then begin
ucnv_setSubstChars(Result, '?', 1, err);
ucnv_setFallback(Result, True);
end;
UnmaskExceptions(oldmask);
end;
procedure InitThreadData;
var
err: UErrorCode;
col: PUCollator;
begin
if (hlibICU = 0) or ThreadDataInited then
exit;
ThreadDataInited:=True;
DefConv:=OpenConverter('utf8');
err:=0;
col:=ucol_open(nil, err);
if col <> nil then
ucol_setStrength(col, 2);
DefColl:=col;
end;
function GetConverter(cp: TSystemCodePage): PUConverter;
var
s: ansistring;
begin
if hlibICU = 0 then begin
Result:=nil;
exit;
end;
InitThreadData;
if (cp = CP_UTF8) or (cp = CP_ACP) then
Result:=DefConv
else begin
if cp <> LastCP then begin
Str(cp, s);
LastConv:=OpenConverter('cp' + s);
LastCP:=cp;
end;
Result:=LastConv;
end;
end;
procedure Unicode2AnsiMove(source: PUnicodeChar; var dest: RawByteString; cp: TSystemCodePage; len: SizeInt);
var
len2: SizeInt;
conv: PUConverter;
err: UErrorCode;
begin
if len = 0 then begin
dest:='';
exit;
end;
conv:=GetConverter(cp);
if (conv = nil) and not ( (cp = CP_UTF8) or (cp = CP_ACP) ) then begin
// fallback implementation
DefaultUnicode2AnsiMove(source,dest,cp,len);
exit;
end;
len2:=len*3;
SetLength(dest, len2);
err:=0;
if conv <> nil then
len2:=ucnv_fromUChars(conv, PAnsiChar(dest), len2, source, len, err)
else begin
// Use UTF-8 conversion from RTL
cp:=CP_UTF8;
len2:=UnicodeToUtf8(PAnsiChar(dest), len2, source, len) - 1;
end;
if len2 > Length(dest) then begin
SetLength(dest, len2);
err:=0;
if conv <> nil then
len2:=ucnv_fromUChars(conv, PAnsiChar(dest), len2, source, len, err)
else
len2:=UnicodeToUtf8(PAnsiChar(dest), len2, source, len) - 1;
end;
if len2 < 0 then
len2:=0;
SetLength(dest, len2);
SetCodePage(dest, cp, False);
end;
procedure Ansi2UnicodeMove(source:PAnsiChar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt);
var
len2: SizeInt;
conv: PUConverter;
err: UErrorCode;
begin
if len = 0 then begin
dest:='';
exit;
end;
conv:=GetConverter(cp);
if (conv = nil) and not ( (cp = CP_UTF8) or (cp = CP_ACP) ) then begin
// fallback implementation
DefaultAnsi2UnicodeMove(source,cp,dest,len);
exit;
end;
len2:=len;
SetLength(dest, len2);
err:=0;
if conv <> nil then
len2:=ucnv_toUChars(conv, PUnicodeChar(dest), len2, source, len, err)
else
// Use UTF-8 conversion from RTL
len2:=Utf8ToUnicode(PUnicodeChar(dest), len2, source, len) - 1;
if len2 > Length(dest) then begin
SetLength(dest, len2);
err:=0;
if conv <> nil then
len2:=ucnv_toUChars(conv, PUnicodeChar(dest), len2, source, len, err)
else
len2:=Utf8ToUnicode(PUnicodeChar(dest), len2, source, len) - 1;
end;
if len2 < 0 then
len2:=0;
SetLength(dest, len2);
end;
function UpperUnicodeString(const s : UnicodeString) : UnicodeString;
var
len, len2: SizeInt;
err: UErrorCode;
begin
if hlibICU = 0 then begin
// fallback implementation
Result:=UnicodeString(UpCase(AnsiString(s)));
exit;
end;
len:=Length(s);
SetLength(Result, len);
if len = 0 then
exit;
err:=0;
len2:=u_strToUpper(PUnicodeChar(Result), len, PUnicodeChar(s), len, nil, err);
if len2 > len then begin
SetLength(Result, len2);
err:=0;
len2:=u_strToUpper(PUnicodeChar(Result), len2, PUnicodeChar(s), len, nil, err);
end;
SetLength(Result, len2);
end;
function LowerUnicodeString(const s : UnicodeString) : UnicodeString;
var
len, len2: SizeInt;
err: UErrorCode;
begin
if hlibICU = 0 then begin
// fallback implementation
Result:=UnicodeString(LowerCase(AnsiString(s)));
exit;
end;
len:=Length(s);
SetLength(Result, len);
if len = 0 then
exit;
err:=0;
len2:=u_strToLower(PUnicodeChar(Result), len, PUnicodeChar(s), len, nil, err);
if len2 > len then begin
SetLength(Result, len2);
err:=0;
len2:=u_strToLower(PUnicodeChar(Result), len2, PUnicodeChar(s), len, nil, err);
end;
SetLength(Result, len2);
end;
function _CompareStr(const S1, S2: UnicodeString): PtrInt;
var
count, count1, count2: SizeInt;
begin
result := 0;
Count1 := Length(S1);
Count2 := Length(S2);
if Count1>Count2 then
Count:=Count2
else
Count:=Count1;
result := CompareByte(PUnicodeChar(S1)^, PUnicodeChar(S2)^, Count*SizeOf(UnicodeChar));
if result=0 then
result:=Count1 - Count2;
end;
function CompareUnicodeString(const s1, s2 : UnicodeString; Options : TCompareOptions) : PtrInt;
const
U_COMPARE_CODE_POINT_ORDER = $8000;
var
err: UErrorCode;
begin
if hlibICU = 0 then begin
// fallback implementation
Result:=_CompareStr(s1, s2);
exit;
end;
if (coIgnoreCase in Options) then begin
err:=0;
Result:=u_strCaseCompare(PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2), U_COMPARE_CODE_POINT_ORDER, err);
end
else begin
InitThreadData;
if DefColl <> nil then
Result:=ucol_strcoll(DefColl, PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2))
else
Result:=u_strCompare(PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2), True);
end;
end;
function UpperAnsiString(const s : AnsiString) : AnsiString;
begin
Result:=AnsiString(UpperUnicodeString(UnicodeString(s)));
end;
function LowerAnsiString(const s : AnsiString) : AnsiString;
begin
Result:=AnsiString(LowerUnicodeString(UnicodeString(s)));
end;
function CompareStrAnsiString(const s1, s2: ansistring): PtrInt;
begin
Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2), []);
end;
function StrCompAnsi(s1,s2 : PAnsiChar): PtrInt;
begin
Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2), []);
end;
function AnsiCompareText(const S1, S2: ansistring): PtrInt;
begin
Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2), [coIgnoreCase]);
end;
function AnsiStrIComp(S1, S2: PAnsiChar): PtrInt;
begin
Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2), [coIgnoreCase]);
end;
function AnsiStrLComp(S1, S2: PAnsiChar; MaxLen: PtrUInt): PtrInt;
var
as1, as2: ansistring;
begin
SetString(as1, S1, MaxLen);
SetString(as2, S2, MaxLen);
Result:=CompareUnicodeString(UnicodeString(as1), UnicodeString(as2), []);
end;
function AnsiStrLIComp(S1, S2: PAnsiChar; MaxLen: PtrUInt): PtrInt;
var
as1, as2: ansistring;
begin
SetString(as1, S1, MaxLen);
SetString(as2, S2, MaxLen);
Result:=CompareUnicodeString(UnicodeString(as1), UnicodeString(as2), [coIgnoreCase]);
end;
function AnsiStrLower(Str: PAnsiChar): PAnsiChar;
var
s, res: ansistring;
begin
s:=Str;
res:=LowerAnsiString(s);
if Length(res) > Length(s) then
SetLength(res, Length(s));
Move(PAnsiChar(res)^, Str, Length(res) + 1);
Result:=Str;
end;
function AnsiStrUpper(Str: PAnsiChar): PAnsiChar;
var
s, res: ansistring;
begin
s:=Str;
res:=UpperAnsiString(s);
if Length(res) > Length(s) then
SetLength(res, Length(s));
Move(PAnsiChar(res)^, Str, Length(res) + 1);
Result:=Str;
end;
function CodePointLength(const Str: PAnsiChar; MaxLookAead: PtrInt): Ptrint;
var
c: byte;
begin
// Only UTF-8 encoding is supported
c:=byte(Str^);
if c = 0 then
Result:=0
else begin
Result:=1;
if c < $80 then
exit; // 1-byte ASCII char
while c and $C0 = $C0 do begin
Inc(Result);
c:=c shl 1;
end;
if Result > 6 then
Result:=1 // Invalid code point
else
if Result > MaxLookAead then
Result:=-1; // Incomplete code point
end;
end;
function GetStandardCodePage(const stdcp: TStandardCodePageEnum): TSystemCodePage;
begin
Result := CP_UTF8; // Android always uses UTF-8
end;
procedure SetStdIOCodePage(var T: Text); inline;
begin
case TextRec(T).Mode of
fmInput:TextRec(T).CodePage:=DefaultSystemCodePage;
fmOutput:TextRec(T).CodePage:=DefaultSystemCodePage;
end;
end;
procedure SetStdIOCodePages; inline;
begin
SetStdIOCodePage(Input);
SetStdIOCodePage(Output);
SetStdIOCodePage(ErrOutput);
SetStdIOCodePage(StdOut);
SetStdIOCodePage(StdErr);
end;
procedure Ansi2WideMove(source:PAnsiChar; cp:TSystemCodePage; var dest:widestring; len:SizeInt);
var
us: UnicodeString;
begin
Ansi2UnicodeMove(source,cp,us,len);
dest:=us;
end;
function UpperWideString(const s : WideString) : WideString;
begin
Result:=UpperUnicodeString(s);
end;
function LowerWideString(const s : WideString) : WideString;
begin
Result:=LowerUnicodeString(s);
end;
function CompareWideString(const s1, s2 : WideString; Options : TCompareOptions) : PtrInt;
begin
Result:=CompareUnicodeString(s1, s2, Options);
end;
Procedure SetCWideStringManager;
Var
CWideStringManager : TUnicodeStringManager;
begin
CWideStringManager:=widestringmanager;
With CWideStringManager do
begin
Wide2AnsiMoveProc:=@Unicode2AnsiMove;
Ansi2WideMoveProc:=@Ansi2WideMove;
UpperWideStringProc:=@UpperWideString;
LowerWideStringProc:=@LowerWideString;
CompareWideStringProc:=@CompareWideString;
UpperAnsiStringProc:=@UpperAnsiString;
LowerAnsiStringProc:=@LowerAnsiString;
CompareStrAnsiStringProc:=@CompareStrAnsiString;
CompareTextAnsiStringProc:=@AnsiCompareText;
StrCompAnsiStringProc:=@StrCompAnsi;
StrICompAnsiStringProc:=@AnsiStrIComp;
StrLCompAnsiStringProc:=@AnsiStrLComp;
StrLICompAnsiStringProc:=@AnsiStrLIComp;
StrLowerAnsiStringProc:=@AnsiStrLower;
StrUpperAnsiStringProc:=@AnsiStrUpper;
Unicode2AnsiMoveProc:=@Unicode2AnsiMove;
Ansi2UnicodeMoveProc:=@Ansi2UnicodeMove;
UpperUnicodeStringProc:=@UpperUnicodeString;
LowerUnicodeStringProc:=@LowerUnicodeString;
CompareUnicodeStringProc:=@CompareUnicodeString;
GetStandardCodePageProc:=@GetStandardCodePage;
CodePointLengthProc:=@CodePointLength;
end;
SetUnicodeStringManager(CWideStringManager);
end;
procedure UnloadICU;
begin
if DefColl <> nil then
ucol_close(DefColl);
if DefConv <> nil then
ucnv_close(DefConv);
if LastConv <> nil then
ucnv_close(LastConv);
if LibVer = '_3_8' then
exit; // ICU v3.8 on Android 1.5-2.1 is buggy and can't be unloaded properly
if hlibICU <> 0 then begin
UnloadLibrary(hlibICU);
hlibICU:=0;
end;
if hlibICUi18n <> 0 then begin
UnloadLibrary(hlibICUi18n);
hlibICUi18n:=0;
end;
end;
function GetIcuProc(const Name: AnsiString; out ProcPtr; libId: longint = 0): boolean;
var
p: pointer;
hLib: TLibHandle;
begin
Result:=False;
if libId = 0 then
hLib:=hlibICU
else
hLib:=hlibICUi18n;
if hLib = 0 then
exit;
p:=GetProcedureAddress(hlib, Name + LibVer);
if p = nil then
exit;
pointer(ProcPtr):=p;
Result:=True;
end;
function LoadICU: boolean;
const
ICUver: array [1..15] of ansistring =
('3_8', '4_2', '44', '46', '48', '50', '51', '53', '55', '56', '58', '60',
'63', '66', '68');
TestProcName = 'ucnv_open';
var
i: longint;
s: ansistring;
dir: PAnsiChar;
err: UErrorCode;
begin
Result:=False;
{$ifdef android}
hlibICU:=LoadLibrary('libicuuc.so');
hlibICUi18n:=LoadLibrary('libicui18n.so');
{$else}
hlibICU:=LoadLibrary('icuuc40.dll');
hlibICUi18n:=LoadLibrary('icuin40.dll');
LibVer:='_4_0';
{$endif android}
if (hlibICU = 0) or (hlibICUi18n = 0) then begin
UnloadICU;
exit;
end;
// Finding ICU version using known versions table
for i:=High(ICUver) downto Low(ICUver) do begin
s:='_' + ICUver[i];
if GetProcedureAddress(hlibICU, TestProcName + s) <> nil then begin
LibVer:=s;
break;
end;
end;
if LibVer = '' then begin
// Finding unknown ICU version
Val(ICUver[High(ICUver)], i);
repeat
Inc(i);
Str(i, s);
s:='_' + s;
if GetProcedureAddress(hlibICU, TestProcName + s) <> nil then begin
LibVer:=s;
break;
end;
until i >= 100;
end;
if LibVer = '' then begin
// Trying versionless name
if GetProcedureAddress(hlibICU, TestProcName) = nil then begin
// Unable to get ICU version
SysLogWrite(ANDROID_LOG_ERROR, 'cwstring: Unable to get ICU version.');
UnloadICU;
exit;
end;
end;
if not GetIcuProc('ucnv_open', ucnv_open) then exit;
if not GetIcuProc('ucnv_close', ucnv_close) then exit;
if not GetIcuProc('ucnv_setSubstChars', ucnv_setSubstChars) then exit;
if not GetIcuProc('ucnv_setFallback', ucnv_setFallback) then exit;
if not GetIcuProc('ucnv_fromUChars', ucnv_fromUChars) then exit;
if not GetIcuProc('ucnv_toUChars', ucnv_toUChars) then exit;
if not GetIcuProc('u_strToUpper', u_strToUpper) then exit;
if not GetIcuProc('u_strToLower', u_strToLower) then exit;
if not GetIcuProc('u_strCompare', u_strCompare) then exit;
if not GetIcuProc('u_strCaseCompare', u_strCaseCompare) then exit;
if not GetIcuProc('u_getDataDirectory', u_getDataDirectory) then exit;
if not GetIcuProc('u_setDataDirectory', u_setDataDirectory) then exit;
if not GetIcuProc('u_init', u_init) then exit;
if not GetIcuProc('ucol_open', ucol_open, 1) then exit;
if not GetIcuProc('ucol_close', ucol_close, 1) then exit;
if not GetIcuProc('ucol_strcoll', ucol_strcoll, 1) then exit;
if not GetIcuProc('ucol_setStrength', ucol_setStrength, 1) then exit;
// Checking if ICU data dir is set
dir:=u_getDataDirectory();
if (dir = nil) or (dir^ = #0) then
u_setDataDirectory('/system/usr/icu');
err:=0;
u_init(err);
Result:=True;
end;
var
oldm: TUnicodeStringManager;
{$ifdef android}
SysGetIcuProc: pointer; external name 'ANDROID_GET_ICU_PROC';
{$endif android}
initialization
GetUnicodeStringManager(oldm);
DefaultSystemCodePage:=GetStandardCodePage(scpAnsi);
DefaultUnicodeCodePage:=CP_UTF16;
if LoadICU then begin
SetCWideStringManager;
{$ifdef android}
SysGetIcuProc:=@GetIcuProc;
SetStdIOCodePages;
{$endif android}
end
else
SysLogWrite(ANDROID_LOG_ERROR, 'cwstring: Failed to load ICU.');
finalization
SetUnicodeStringManager(oldm);
UnloadICU;
end.