{ This file is part of the Free Pascal run time library. Copyright (c) 2013 by Yury Sidorov, member of the Free Pascal development team. Wide string support for Android See the file COPYING.FPC, included in this distribution, for details about the copyright. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. **********************************************************************} {$mode objfpc} {$inline on} {$implicitexceptions off} unit cwstring; interface procedure SetCWidestringManager; implementation uses dynlibs; type UErrorCode = SizeInt; int32_t = longint; uint32_t = longword; PUConverter = pointer; PUCollator = pointer; UBool = LongBool; var hlibICU: TLibHandle; hlibICUi18n: TLibHandle; ucnv_open: function (converterName: PAnsiChar; var pErrorCode: UErrorCode): PUConverter; cdecl; ucnv_close: procedure (converter: PUConverter); cdecl; ucnv_setSubstChars: procedure (converter: PUConverter; subChars: PAnsiChar; len: byte; var pErrorCode: UErrorCode); cdecl; ucnv_setFallback: procedure (cnv: PUConverter; usesFallback: UBool); cdecl; ucnv_fromUChars: function (cnv: PUConverter; dest: PAnsiChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; var pErrorCode: UErrorCode): int32_t; cdecl; ucnv_toUChars: function (cnv: PUConverter; dest: PUnicodeChar; destCapacity: int32_t; src: PAnsiChar; srcLength: int32_t; var pErrorCode: UErrorCode): int32_t; cdecl; u_strToUpper: function (dest: PUnicodeChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; locale: PAnsiChar; var pErrorCode: UErrorCode): int32_t; cdecl; u_strToLower: function (dest: PUnicodeChar; destCapacity: int32_t; src: PUnicodeChar; srcLength: int32_t; locale: PAnsiChar; var pErrorCode: UErrorCode): int32_t; cdecl; u_strCompare: function (s1: PUnicodeChar; length1: int32_t; s2: PUnicodeChar; length2: int32_t; codePointOrder: UBool): int32_t; cdecl; u_strCaseCompare: function (s1: PUnicodeChar; length1: int32_t; s2: PUnicodeChar; length2: int32_t; options: uint32_t; var pErrorCode: UErrorCode): int32_t; cdecl; ucol_open: function(loc: PAnsiChar; var status: UErrorCode): PUCollator; cdecl; ucol_close: procedure (coll: PUCollator); cdecl; ucol_strcoll: function (coll: PUCollator; source: PUnicodeChar; sourceLength: int32_t; target: PUnicodeChar; targetLength: int32_t): int32_t; cdecl; ucol_setStrength: procedure (coll: PUCollator; strength: int32_t); cdecl; u_errorName: function (code: UErrorCode): PAnsiChar; cdecl; DefConv, LastConv: PUConverter; LastCP: TSystemCodePage; DefColl: PUCollator; function OpenConverter(const name: ansistring): PUConverter; var err: UErrorCode; begin err:=0; Result:=ucnv_open(PAnsiChar(name), err); if Result <> nil then begin ucnv_setSubstChars(Result, '?', 1, err); ucnv_setFallback(Result, True); end; end; function GetConverter(cp: TSystemCodePage): PUConverter; var s: ansistring; begin if hlibICU = 0 then begin Result:=nil; exit; end; if (cp = DefaultSystemCodePage) or (cp = CP_ACP) then Result:=DefConv else begin if cp <> LastCP then begin Str(cp, s); LastConv:=OpenConverter('cp' + s); LastCP:=cp; end; Result:=LastConv; end; end; procedure Unicode2AnsiMove(source: PUnicodeChar; var dest: RawByteString; cp: TSystemCodePage; len: SizeInt); var len2: SizeInt; conv: PUConverter; err: UErrorCode; begin if len = 0 then begin dest:=''; exit; end; conv:=GetConverter(cp); if conv = nil then begin DefaultUnicode2AnsiMove(source,dest,DefaultSystemCodePage,len); exit; end; len2:=len*3; SetLength(dest, len2); err:=0; len2:=ucnv_fromUChars(conv, PAnsiChar(dest), len2, source, len, err); if len2 > Length(dest) then begin SetLength(dest, len2); err:=0; len2:=ucnv_fromUChars(conv, PAnsiChar(dest), len2, source, len, err); end; SetLength(dest, len2); SetCodePage(dest, cp, False); end; procedure Ansi2UnicodeMove(source:pchar;cp : TSystemCodePage;var dest:unicodestring;len:SizeInt); var len2: SizeInt; conv: PUConverter; err: UErrorCode; begin if len = 0 then begin dest:=''; exit; end; conv:=GetConverter(cp); if conv = nil then begin DefaultAnsi2UnicodeMove(source,DefaultSystemCodePage,dest,len); exit; end; len2:=len; SetLength(dest, len2); err:=0; len2:=ucnv_toUChars(conv, PUnicodeChar(dest), len2, source, len, err); if len2 > Length(dest) then begin SetLength(dest, len2); err:=0; len2:=ucnv_toUChars(conv, PUnicodeChar(dest), len2, source, len, err); end; SetLength(dest, len2); end; function UpperUnicodeString(const s : UnicodeString) : UnicodeString; var len, len2: SizeInt; err: UErrorCode; begin if hlibICU = 0 then begin // fallback implementation Result:=UnicodeString(UpCase(AnsiString(s))); exit; end; len:=Length(s); SetLength(Result, len); if len = 0 then exit; err:=0; len2:=u_strToUpper(PUnicodeChar(Result), len, PUnicodeChar(s), len, nil, err); if len2 > len then begin SetLength(Result, len2); err:=0; len2:=u_strToUpper(PUnicodeChar(Result), len2, PUnicodeChar(s), len, nil, err); end; SetLength(Result, len2); end; function LowerUnicodeString(const s : UnicodeString) : UnicodeString; var len, len2: SizeInt; err: UErrorCode; begin if hlibICU = 0 then begin // fallback implementation Result:=UnicodeString(LowerCase(AnsiString(s))); exit; end; len:=Length(s); SetLength(Result, len); if len = 0 then exit; err:=0; len2:=u_strToLower(PUnicodeChar(Result), len, PUnicodeChar(s), len, nil, err); if len2 > len then begin SetLength(Result, len2); err:=0; len2:=u_strToLower(PUnicodeChar(Result), len2, PUnicodeChar(s), len, nil, err); end; SetLength(Result, len2); end; function _CompareStr(const S1, S2: UnicodeString): PtrInt; var count, count1, count2: SizeInt; begin result := 0; Count1 := Length(S1); Count2 := Length(S2); if Count1>Count2 then Count:=Count2 else Count:=Count1; result := CompareByte(PUnicodeChar(S1)^, PUnicodeChar(S2)^, Count*SizeOf(UnicodeChar)); if result=0 then result:=Count1 - Count2; end; function CompareUnicodeString(const s1, s2 : UnicodeString) : PtrInt; begin if hlibICU = 0 then begin // fallback implementation Result:=_CompareStr(s1, s2); exit; end; if DefColl <> nil then Result:=ucol_strcoll(DefColl, PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2)) else Result:=u_strCompare(PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2), True); end; function CompareTextUnicodeString(const s1, s2 : UnicodeString): PtrInt; const U_COMPARE_CODE_POINT_ORDER = $8000; var err: UErrorCode; begin if hlibICU = 0 then begin // fallback implementation Result:=_CompareStr(UpperUnicodeString(s1), UpperUnicodeString(s2)); exit; end; err:=0; Result:=u_strCaseCompare(PUnicodeChar(s1), Length(s1), PUnicodeChar(s2), Length(s2), U_COMPARE_CODE_POINT_ORDER, err); end; function UpperAnsiString(const s : AnsiString) : AnsiString; begin Result:=AnsiString(UpperUnicodeString(UnicodeString(s))); end; function LowerAnsiString(const s : AnsiString) : AnsiString; begin Result:=AnsiString(LowerUnicodeString(UnicodeString(s))); end; function CompareStrAnsiString(const s1, s2: ansistring): PtrInt; begin Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2)); end; function StrCompAnsi(s1,s2 : PChar): PtrInt; begin Result:=CompareUnicodeString(UnicodeString(s1), UnicodeString(s2)); end; function AnsiCompareText(const S1, S2: ansistring): PtrInt; begin Result:=CompareTextUnicodeString(UnicodeString(s1), UnicodeString(s2)); end; function AnsiStrIComp(S1, S2: PChar): PtrInt; begin Result:=CompareTextUnicodeString(UnicodeString(s1), UnicodeString(s2)); end; function AnsiStrLComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt; begin Result:=CompareUnicodeString(UnicodeString(Copy(s1, 1, MaxLen)), UnicodeString(Copy(s2, 1, MaxLen))); end; function AnsiStrLIComp(S1, S2: PChar; MaxLen: PtrUInt): PtrInt; begin Result:=CompareTextUnicodeString(UnicodeString(Copy(s1, 1, MaxLen)), UnicodeString(Copy(s2, 1, MaxLen))); end; function AnsiStrLower(Str: PChar): PChar; var s, res: ansistring; begin s:=Str; res:=LowerAnsiString(s); if Length(res) > Length(s) then SetLength(res, Length(s)); Move(PAnsiChar(res)^, Str, Length(res) + 1); Result:=Str; end; function AnsiStrUpper(Str: PChar): PChar; var s, res: ansistring; begin s:=Str; res:=UpperAnsiString(s); if Length(res) > Length(s) then SetLength(res, Length(s)); Move(PAnsiChar(res)^, Str, Length(res) + 1); Result:=Str; end; function CodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint; var c: byte; begin // Only UTF-8 encoding is supported c:=byte(Str^); if c = 0 then Result:=0 else begin Result:=1; if c < $80 then exit; // 1-byte ASCII char while c and $C0 = $C0 do begin Inc(Result); c:=c shl 1; end; if Result > 6 then Result:=1 // Invalid code point else if Result > MaxLookAead then Result:=-1; // Incomplete code point end; end; function GetStandardCodePage(const stdcp: TStandardCodePageEnum): TSystemCodePage; begin Result := CP_UTF8; // Android always uses UTF-8 end; {$i textrec.inc} procedure SetStdIOCodePage(var T: Text); inline; begin case TextRec(T).Mode of fmInput:TextRec(T).CodePage:=DefaultSystemCodePage; fmOutput:TextRec(T).CodePage:=DefaultSystemCodePage; end; end; procedure SetStdIOCodePages; inline; begin SetStdIOCodePage(Input); SetStdIOCodePage(Output); SetStdIOCodePage(ErrOutput); SetStdIOCodePage(StdOut); SetStdIOCodePage(StdErr); end; procedure Ansi2WideMove(source:pchar; cp:TSystemCodePage; var dest:widestring; len:SizeInt); var us: UnicodeString; begin Ansi2UnicodeMove(source,cp,us,len); dest:=us; end; function UpperWideString(const s : WideString) : WideString; begin Result:=UpperUnicodeString(s); end; function LowerWideString(const s : WideString) : WideString; begin Result:=LowerUnicodeString(s); end; function CompareWideString(const s1, s2 : WideString) : PtrInt; begin Result:=CompareUnicodeString(s1, s2); end; function CompareTextWideString(const s1, s2 : WideString): PtrInt; begin Result:=CompareTextUnicodeString(s1, s2); end; Procedure SetCWideStringManager; Var CWideStringManager : TUnicodeStringManager; begin CWideStringManager:=widestringmanager; With CWideStringManager do begin Wide2AnsiMoveProc:=@Unicode2AnsiMove; Ansi2WideMoveProc:=@Ansi2WideMove; UpperWideStringProc:=@UpperWideString; LowerWideStringProc:=@LowerWideString; CompareWideStringProc:=@CompareWideString; CompareTextWideStringProc:=@CompareTextWideString; UpperAnsiStringProc:=@UpperAnsiString; LowerAnsiStringProc:=@LowerAnsiString; CompareStrAnsiStringProc:=@CompareStrAnsiString; CompareTextAnsiStringProc:=@AnsiCompareText; StrCompAnsiStringProc:=@StrCompAnsi; StrICompAnsiStringProc:=@AnsiStrIComp; StrLCompAnsiStringProc:=@AnsiStrLComp; StrLICompAnsiStringProc:=@AnsiStrLIComp; StrLowerAnsiStringProc:=@AnsiStrLower; StrUpperAnsiStringProc:=@AnsiStrUpper; Unicode2AnsiMoveProc:=@Unicode2AnsiMove; Ansi2UnicodeMoveProc:=@Ansi2UnicodeMove; UpperUnicodeStringProc:=@UpperUnicodeString; LowerUnicodeStringProc:=@LowerUnicodeString; CompareUnicodeStringProc:=@CompareUnicodeString; CompareTextUnicodeStringProc:=@CompareTextUnicodeString; GetStandardCodePageProc:=@GetStandardCodePage; CodePointLengthProc:=@CodePointLength; end; SetUnicodeStringManager(CWideStringManager); end; procedure UnloadICU; begin if hlibICUi18n <> 0 then begin if DefColl <> nil then ucol_close(DefColl); UnloadLibrary(hlibICUi18n); hlibICUi18n:=0; end; if hlibICU <> 0 then begin if DefConv <> nil then ucnv_close(DefConv); if LastConv <> nil then ucnv_close(LastConv); UnloadLibrary(hlibICU); hlibICU:=0; end; end; procedure LoadICU; var LibVer: ansistring; function _GetProc(const Name: AnsiString; out ProcPtr; hLib: TLibHandle = 0): boolean; var p: pointer; begin if hLib = 0 then hLib:=hlibICU; p:=GetProcedureAddress(hlib, Name + LibVer); if p = nil then begin // unload lib on failure UnloadICU; Result:=False; end else begin pointer(ProcPtr):=p; Result:=True; end; end; const ICUver: array [1..4] of ansistring = ('3_8', '4_2', '44', '46'); TestProcName = 'ucnv_open'; var err: UErrorCode; i: longint; s: ansistring; begin hlibICU:=LoadLibrary('libicuuc.so'); hlibICUi18n:=LoadLibrary('libicui18n.so'); if (hlibICU = 0) or (hlibICUi18n = 0) then begin UnloadICU; exit; end; // Finding ICU version using known versions table for i:=High(ICUver) downto Low(ICUver) do begin s:='_' + ICUver[i]; if GetProcedureAddress(hlibICU, TestProcName + s) <> nil then begin LibVer:=s; break; end; end; if LibVer = '' then begin // Finding unknown ICU version Val(ICUver[High(ICUver)], i); for i:=i + 1 to 100 do begin Str(i, s); s:='_' + s; if GetProcedureAddress(hlibICU, TestProcName + s) <> nil then begin LibVer:=s; break; end; end; end; if LibVer = '' then begin // Trying versionless name if GetProcedureAddress(hlibICU, TestProcName) = nil then begin // Unable to get ICU version UnloadICU; exit; end; end; if not _GetProc('ucnv_open', ucnv_open) then exit; if not _GetProc('ucnv_close', ucnv_close) then exit; if not _GetProc('ucnv_setSubstChars', ucnv_setSubstChars) then exit; if not _GetProc('ucnv_setFallback', ucnv_setFallback) then exit; if not _GetProc('ucnv_fromUChars', ucnv_fromUChars) then exit; if not _GetProc('ucnv_toUChars', ucnv_toUChars) then exit; if not _GetProc('u_strToUpper', u_strToUpper) then exit; if not _GetProc('u_strToLower', u_strToLower) then exit; if not _GetProc('u_strCompare', u_strCompare) then exit; if not _GetProc('u_strCaseCompare', u_strCaseCompare) then exit; if not _GetProc('u_errorName', u_errorName) then exit; if not _GetProc('ucol_open', ucol_open, hlibICUi18n) then exit; if not _GetProc('ucol_close', ucol_close, hlibICUi18n) then exit; if not _GetProc('ucol_strcoll', ucol_strcoll, hlibICUi18n) then exit; if not _GetProc('ucol_setStrength', ucol_setStrength, hlibICUi18n) then exit; DefConv:=OpenConverter('utf8'); err:=0; DefColl:=ucol_open(nil, err); if DefColl <> nil then ucol_setStrength(DefColl, 2); end; initialization DefaultSystemCodePage:=GetStandardCodePage(scpAnsi); DefaultUnicodeCodePage:=CP_UTF16; LoadICU; SetCWideStringManager; SetStdIOCodePages; finalization UnloadICU; end.