fpc/rtl/java/ustrings.inc
Jonas Maebe c75246706d + stubbed ansistring support (using ansistrings compiles, but does not
generate working code)

git-svn-id: branches/jvmbackend@18499 -
2011-08-20 08:04:57 +00:00

1858 lines
48 KiB
PHP

{
This file is part of the Free Pascal run time library.
Copyright (c) 1999-2005 by Florian Klaempfl,
Copyright (c) 2011 by Jonas Maebe,
members of the Free Pascal development team.
This file implements support routines for UTF-8 strings with FPC
See the file COPYING.FPC, included in this distribution,
for details about the copyright.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
**********************************************************************}
{$i wustrings.inc}
{
This file contains the implementation of the UnicodeString type,
which on the Java platforms is an alias for java.lang.String
}
Function NewUnicodeString(Len : SizeInt) : JLString;
{
Allocate a new UnicodeString on the heap.
initialize it to zero length and reference count 1.
}
var
data: array of jchar;
begin
setlength(data,len);
result:=JLString.create(data);
end;
(*
procedure fpc_UnicodeStr_To_ShortStr (out res: ShortString;const S2 : UnicodeString); [Public, alias: 'FPC_UNICODESTR_TO_SHORTSTR'];compilerproc;
{
Converts a UnicodeString to a ShortString;
}
Var
Size : SizeInt;
temp : ansistring;
begin
res:='';
Size:=Length(S2);
if Size>0 then
begin
If Size>high(res) then
Size:=high(res);
widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(S2),temp,Size);
res:=temp;
end;
end;
Function fpc_ShortStr_To_UnicodeStr (Const S2 : ShortString): UnicodeString;compilerproc;
{
Converts a ShortString to a UnicodeString;
}
Var
Size : SizeInt;
begin
result:='';
Size:=Length(S2);
if Size>0 then
begin
widestringmanager.Ansi2UnicodeMoveProc(PChar(@S2[1]),result,Size);
{ Terminating Zero }
PUnicodeChar(Pointer(fpc_ShortStr_To_UnicodeStr)+Size*sizeof(UnicodeChar))^:=#0;
end;
end;
*)
Function fpc_UnicodeStr_To_AnsiStr (const S2 : UnicodeString): AnsiString; compilerproc;
{
Converts a UnicodeString to an AnsiString
}
{$ifdef nounsupported}
Var
Size : SizeInt;
{$endif}
begin
{$ifdef nounsupported}
result:='';
Size:=Length(S2);
if Size>0 then
widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(Pointer(S2)),result,Size);
{$endif}
end;
Function fpc_AnsiStr_To_UnicodeStr (Const S2 : AnsiString): UnicodeString; compilerproc;
{
Converts an AnsiString to a UnicodeString;
}
{$ifdef nounsupported}
Var
Size : SizeInt;
{$endif}
begin
{$ifdef nounsupported}
result:='';
Size:=Length(S2);
if Size>0 then
widestringmanager.Ansi2UnicodeMoveProc(PChar(S2),result,Size);
{$endif}
end;
Function fpc_UnicodeStr_To_WideStr (const S2 : UnicodeString): WideString; compilerproc;
begin
result:=s2;
end;
Function fpc_WideStr_To_UnicodeStr (Const S2 : WideString): UnicodeString; compilerproc;
begin
result:=s2;
end;
function fpc_UnicodeStr_Concat (const S1,S2 : UnicodeString): UnicodeString; compilerproc;
Var
sb: JLStringBuilder;
begin
{ only assign if s1 or s2 is empty }
if (length(S1)=0) then
begin
result:=s2;
exit;
end;
if (length(S2)=0) then
begin
result:=s1;
exit;
end;
sb:=JLStringBuilder.create(S1);
sb.append(s2);
result:=sb.toString;
end;
function fpc_UnicodeStr_Concat_multi (const sarr:array of Unicodestring): unicodestring; compilerproc;
Var
i : Longint;
Size,NewSize : SizeInt;
sb: JLStringBuilder;
begin
{ First calculate size of the result so we can allocate a StringBuilder of
the right size }
NewSize:=0;
for i:=low(sarr) to high(sarr) do
inc(Newsize,length(sarr[i]));
sb:=JLStringBuilder.create(NewSize);
for i:=low(sarr) to high(sarr) do
begin
if length(sarr[i])>0 then
sb.append(sarr[i]);
end;
result:=sb.toString;
end;
Function fpc_Char_To_UChar(const c : AnsiChar): UnicodeChar; compilerproc;
var
str: JLString;
arr: array of jbyte;
begin
setlength(arr,1);
arr[0]:=ord(c);
result:=JLString.create(arr,0,1).charAt(0);
end;
Function fpc_Char_To_UnicodeStr(const c : AnsiChar): UnicodeString; compilerproc;
{
Converts a AnsiChar to a UnicodeString;
}
var
str: JLString;
arr: array of jbyte;
begin
setlength(arr,1);
arr[0]:=ord(c);
result:=JLString.create(arr,0,1);
end;
Function fpc_UChar_To_Char(const c : UnicodeChar): AnsiChar; compilerproc;
{
Converts a UnicodeChar to a AnsiChar;
}
var
arrb: array of jbyte;
arrw: array of jchar;
str: JLString;
begin
setlength(arrw,1);
arrw[0]:=c;
str:=JLString.create(arrw);
arrb:=str.getbytes();
result:=chr(arrb[0]);
end;
Function fpc_WChar_To_UnicodeStr(const c : WideChar): UnicodeString; compilerproc;
{
Converts a WideChar to a UnicodeString;
}
var
arrw: array of jchar;
begin
setlength(arrw,1);
arrw[0]:=c;
result:=JLString.create(arrw);
end;
Function fpc_Char_To_WChar(const c : AnsiChar): WideChar; compilerproc;
{
Converts a AnsiChar to a WideChar;
}
var
str: JLString;
arr: array of jbyte;
begin
setlength(arr,1);
arr[0]:=ord(c);
result:=JLString.create(arr,0,1).charAt(0);
end;
Function fpc_WChar_To_Char(const c : WideChar): AnsiChar; compilerproc;
{
Converts a WideChar to a AnsiChar;
}
var
arrb: array of jbyte;
arrw: array of jchar;
begin
setlength(arrw,1);
arrw[0]:=c;
arrb:=JLString.create(arrw).getbytes();
result:=chr(arrb[0]);
end;
(*
{$ifndef FPC_STRTOSHORTSTRINGPROC}
Function fpc_WChar_To_ShortStr(const c : WideChar): ShortString; compilerproc;
{
Converts a WideChar to a ShortString;
}
var
s: ansistring;
begin
widestringmanager.Wide2AnsiMoveProc(@c, s, 1);
fpc_WChar_To_ShortStr:= s;
end;
{$else FPC_STRTOSHORTSTRINGPROC}
procedure fpc_WChar_To_ShortStr(out res : shortstring;const c : WideChar) compilerproc;
{
Converts a WideChar to a ShortString;
}
var
s: ansistring;
begin
widestringmanager.Wide2AnsiMoveProc(@c,s,1);
res:=s;
end;
{$endif FPC_STRTOSHORTSTRINGPROC}
*)
Function fpc_UChar_To_UnicodeStr(const c : UnicodeChar): UnicodeString; compilerproc;
{
Converts a UnicodeChar to a UnicodeString;
}
var
arr: array of UnicodeChar;
begin
setlength(arr,1);
arr[0]:=c;
result:=JLString.create(arr);
end;
(*
Function fpc_UChar_To_AnsiStr(const c : UnicodeChar): AnsiString; compilerproc;
{
Converts a UnicodeChar to a AnsiString;
}
begin
widestringmanager.Unicode2AnsiMoveProc(@c, fpc_UChar_To_AnsiStr, 1);
end;
{$ifndef FPC_STRTOSHORTSTRINGPROC}
Function fpc_UChar_To_ShortStr(const c : UnicodeChar): ShortString; compilerproc;
{
Converts a UnicodeChar to a ShortString;
}
var
s: ansistring;
begin
widestringmanager.Unicode2AnsiMoveProc(@c, s, 1);
fpc_UChar_To_ShortStr:= s;
end;
{$else FPC_STRTOSHORTSTRINGPROC}
procedure fpc_UChar_To_ShortStr(out res : shortstring;const c : UnicodeChar) compilerproc;
{
Converts a UnicodeChar to a ShortString;
}
var
s: ansistring;
begin
widestringmanager.Unicode2AnsiMoveProc(@c,s,1);
res:=s;
end;
{$endif FPC_STRTOSHORTSTRINGPROC}
Function fpc_PChar_To_UnicodeStr(const p : pchar): UnicodeString; compilerproc;
Var
L : SizeInt;
begin
if (not assigned(p)) or (p[0]=#0) Then
begin
fpc_pchar_to_unicodestr := '';
exit;
end;
l:=IndexChar(p^,-1,#0);
widestringmanager.Ansi2UnicodeMoveProc(P,fpc_PChar_To_UnicodeStr,l);
end;
*)
Function fpc_CharArray_To_UnicodeStr(const arr: array of ansichar; zerobased: boolean = true): UnicodeString; compilerproc;
var
i,j : SizeInt;
localarr: array of jbyte;
foundnull: boolean;
begin
if (zerobased) then
begin
if (arr[0]=#0) Then
begin
fpc_chararray_to_unicodestr := '';
exit;
end;
foundnull:=false;
for i:=low(arr) to high(arr) do
if arr[i]=#0 then
begin
foundnull:=true;
break;
end;
if not foundnull then
i := high(arr)+1;
end
else
i := high(arr)+1;
setlength(localarr,i);
for j:=0 to i-1 do
localarr[j]:=ord(arr[j]);
result:=JLString.create(localarr,0,i);
end;
(*
function fpc_UnicodeCharArray_To_ShortStr(const arr: array of unicodechar; zerobased: boolean = true): shortstring;[public,alias:'FPC_UNICODECHARARRAY_TO_SHORTSTR']; compilerproc;
var
l: longint;
index: longint;
len: byte;
temp: ansistring;
foundnull: boolean;
begin
l := high(arr)+1;
if l>=256 then
l:=255
else if l<0 then
l:=0;
if zerobased then
begin
foundnull:=false;
for index:=low(arr) to l-1 do
if arr[index]=#0 then
begin
foundnull:=true;
break;
end;
if not foundnull then
len := l
else
len := index;
end
else
len := l;
result:=JLString.create(arr,0,l);
end;
Function fpc_UnicodeCharArray_To_AnsiStr(const arr: array of unicodechar; zerobased: boolean = true): AnsiString; compilerproc;
var
i : SizeInt;
begin
if (zerobased) then
begin
i:=IndexWord(arr,high(arr)+1,0);
if i = -1 then
i := high(arr)+1;
end
else
i := high(arr)+1;
SetLength(fpc_UnicodeCharArray_To_AnsiStr,i);
widestringmanager.Unicode2AnsiMoveProc (punicodechar(@arr),fpc_UnicodeCharArray_To_AnsiStr,i);
end;
*)
Function fpc_UnicodeCharArray_To_UnicodeStr(const arr: array of unicodechar; zerobased: boolean = true): UnicodeString; compilerproc;
var
i : SizeInt;
foundnull : boolean;
begin
if (zerobased) then
begin
foundnull:=false;
for i:=low(arr) to high(arr) do
if arr[i]=#0 then
begin
foundnull:=true;
break;
end;
if not foundnull then
i := high(arr)+1;
end
else
i := high(arr)+1;
result:=JLString.create(arr,0,i);
end;
Function fpc_WideCharArray_To_UnicodeStr(const arr: array of widechar; zerobased: boolean = true): UnicodeString; compilerproc;
var
i : SizeInt;
foundnull : boolean;
begin
if (zerobased) then
begin
foundnull:=false;
for i:=low(arr) to high(arr) do
if arr[i]=#0 then
begin
foundnull:=true;
break;
end;
if not foundnull then
i := high(arr)+1;
end
else
i := high(arr)+1;
result:=JLString.create(arr,0,i);
end;
(*
{ due to their names, the following procedures should be in wstrings.inc,
however, the compiler generates code using this functions on all platforms }
{$ifndef FPC_STRTOSHORTSTRINGPROC}
function fpc_WideCharArray_To_ShortStr(const arr: array of widechar; zerobased: boolean = true): shortstring;[public,alias:'FPC_WIDECHARARRAY_TO_SHORTSTR']; compilerproc;
var
l: longint;
index: longint;
len: byte;
temp: ansistring;
begin
l := high(arr)+1;
if l>=256 then
l:=255
else if l<0 then
l:=0;
if zerobased then
begin
index:=IndexWord(arr[0],l,0);
if (index < 0) then
len := l
else
len := index;
end
else
len := l;
widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),temp,len);
fpc_WideCharArray_To_ShortStr := temp;
end;
{$else FPC_STRTOSHORTSTRINGPROC}
procedure fpc_WideCharArray_To_ShortStr(out res : shortstring;const arr: array of widechar; zerobased: boolean = true);[public,alias:'FPC_WIDECHARARRAY_TO_SHORTSTR']; compilerproc;
var
l: longint;
index: ptrint;
len: byte;
temp: ansistring;
begin
l := high(arr)+1;
if l>=high(res)+1 then
l:=high(res)
else if l<0 then
l:=0;
if zerobased then
begin
index:=IndexWord(arr[0],l,0);
if index<0 then
len:=l
else
len:=index;
end
else
len:=l;
widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),temp,len);
res:=temp;
end;
{$endif FPC_STRTOSHORTSTRINGPROC}
*)
Function fpc_WideCharArray_To_AnsiStr(const arr: array of widechar; zerobased: boolean = true): AnsiString; compilerproc;
{$ifdef nounsupported}
var
i : SizeInt;
{$endif}
begin
{$ifdef nounsupported}
if (zerobased) then
begin
i:=IndexWord(arr,high(arr)+1,0);
if i = -1 then
i := high(arr)+1;
end
else
i := high(arr)+1;
SetLength(fpc_WideCharArray_To_AnsiStr,i);
widestringmanager.Wide2AnsiMoveProc (pwidechar(@arr),fpc_WideCharArray_To_AnsiStr,i);
{$endif}
end;
Function fpc_WideCharArray_To_WideStr(const arr: array of widechar; zerobased: boolean = true): WideString; compilerproc;
var
i : SizeInt;
foundnull : boolean;
begin
if (zerobased) then
begin
foundnull:=false;
for i:=low(arr) to high(arr) do
if arr[i]=#0 then
begin
foundnull:=true;
break;
end;
if not foundnull then
i := high(arr)+1;
end
else
i := high(arr)+1;
result:=JLString.create(arr,0,i);
end;
procedure fpc_unicodestr_to_chararray(out res: array of AnsiChar; const src: UnicodeString); compilerproc;
var
i, len: SizeInt;
temp: array of jbyte;
begin
len := length(src);
{ make sure we don't dereference src if it can be nil (JM) }
if len > 0 then
begin
temp:=JLString(src).getBytes;
if len > length(temp) then
len := length(temp);
for i := 0 to len-1 do
res[i] := chr(temp[i]);
end;
end;
procedure fpc_unicodestr_to_unicodechararray(out res: array of unicodechar; const src: UnicodeString); compilerproc;
var
len: SizeInt;
begin
len := length(src);
{ make sure we don't dereference src if it can be nil (JM) }
if len > 0 then
begin
if len > high(res)+1 then
len := high(res)+1;
JLString(src).getChars(0,len,res,0);
end;
end;
function fpc_unicodestr_setchar(const s: UnicodeString; const index: longint; const ch: unicodechar): UnicodeString; compilerproc;
var
sb: JLStringBuilder;
begin
sb:=JLStringBuilder.create(s);
{ string indexes are 1-based in Pascal, 0-based in Java }
sb.setCharAt(index-1,ch);
result:=sb.toString();
end;
(*
procedure fpc_ansistr_to_unicodechararray(out res: array of unicodechar; const src: AnsiString); compilerproc;
var
len: SizeInt;
temp: unicodestring;
begin
len := length(src);
{ make sure we don't dereference src if it can be nil (JM) }
if len > 0 then
widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),temp,len);
len := length(temp);
if len > length(res) then
len := length(res);
{$r-}
move(temp[1],res[0],len*sizeof(unicodechar));
fillchar(res[len],(length(res)-len)*SizeOf(UnicodeChar),0);
{$ifdef RangeCheckWasOn}
{$r+}
{$endif}
end;
procedure fpc_shortstr_to_unicodechararray(out res: array of unicodechar; const src: ShortString); compilerproc;
var
len: longint;
temp : unicodestring;
begin
len := length(src);
{ make sure we don't access char 1 if length is 0 (JM) }
if len > 0 then
widestringmanager.ansi2unicodemoveproc(pchar(@src[1]),temp,len);
len := length(temp);
if len > length(res) then
len := length(res);
{$r-}
move(temp[1],res[0],len*sizeof(unicodechar));
fillchar(res[len],(length(res)-len)*SizeOf(UnicodeChar),0);
{$ifdef RangeCheckWasOn}
{$r+}
{$endif}
end;
*)
procedure fpc_ansistr_to_widechararray(out res: array of widechar; const src: AnsiString); compilerproc;
{$ifdef nounsupported}
var
len: SizeInt;
temp: widestring;
{$endif}
begin
{$ifdef nounsupported}
len := length(src);
{ make sure we don't dereference src if it can be nil (JM) }
if len > 0 then
widestringmanager.ansi2widemoveproc(pchar(@src[1]),temp,len);
len := length(temp);
if len > length(res) then
len := length(res);
{$r-}
move(temp[1],res[0],len*sizeof(widechar));
fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
{$ifdef RangeCheckWasOn}
{$r+}
{$endif}
{$endif}
end;
(*
procedure fpc_shortstr_to_widechararray(out res: array of widechar; const src: ShortString); compilerproc;
var
len: longint;
temp : widestring;
begin
len := length(src);
{ make sure we don't access char 1 if length is 0 (JM) }
if len > 0 then
widestringmanager.ansi2widemoveproc(pchar(@src[1]),temp,len);
len := length(temp);
if len > length(res) then
len := length(res);
{$r-}
move(temp[1],res[0],len*sizeof(widechar));
fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
{$ifdef RangeCheckWasOn}
{$r+}
{$endif}
end;
*)
procedure fpc_unicodestr_to_widechararray(out res: array of widechar; const src: UnicodeString); compilerproc;
var
i, len: SizeInt;
begin
len := length(src);
if len > length(res) then
len := length(res);
for i:=0 to len-1 do
res[i]:=src[i+1];
end;
Function fpc_UnicodeStr_Compare(const S1,S2 : UnicodeString): SizeInt; compilerproc;
{
Compares 2 UnicodeStrings;
The result is
<0 if S1<S2
0 if S1=S2
>0 if S1>S2
}
Var
MaxI,Temp : SizeInt;
begin
if JLObject(S1)=JLObject(S2) then
begin
result:=0;
exit;
end;
result:=JLString(S1).compareTo(S2);
end;
Function fpc_UnicodeStr_Compare_Equal(const S1,S2 : UnicodeString): SizeInt; compilerproc;
{
Compares 2 UnicodeStrings for equality only;
The result is
0 if S1=S2
<>0 if S1<>S2
}
Var
MaxI : SizeInt;
begin
result:=ord(not JLString(S1).equals(JLString(S2)));
end;
function fpc_UnicodeStr_SetLength(const S : UnicodeString; l : SizeInt): UnicodeString; compilerproc;
{
Sets The length of string S to L.
Makes sure S is unique, and contains enough room.
Returns new val
}
Var
movelen: SizeInt;
chars: array of widechar;
strlen: SizeInt;
begin
if (l>0) then
begin
if JLObject(S)=nil then
begin
{ Need a completely new string...}
result:=NewUnicodeString(l);
end
{ no need to create a new string, since Java strings are immutable }
else
begin
strlen:=length(s);
if l=strlen then
result:=s
else if (l<strlen) then
result:=JLString(s).substring(0,l)
else
begin
setlength(chars,l);
JLString(s).getChars(0,strlen,chars,0);
result:=JLString.create(chars,0,l)
end;
end
end
else
begin
result:='';
end;
end;
{*****************************************************************************
Public functions, In interface.
*****************************************************************************}
(*
function UnicodeCharToString(S : PUnicodeChar) : AnsiString;
begin
result:=UnicodeCharLenToString(s,Length(UnicodeString(s)));
end;
function StringToUnicodeChar(const Src : AnsiString;Dest : PUnicodeChar;DestSize : SizeInt) : PUnicodeChar;
var
temp:unicodestring;
begin
widestringmanager.Ansi2UnicodeMoveProc(PChar(Src),temp,Length(Src));
if Length(temp)<DestSize then
move(temp[1],Dest^,Length(temp)*SizeOf(UnicodeChar))
else
move(temp[1],Dest^,(DestSize-1)*SizeOf(UnicodeChar));
Dest[DestSize-1]:=#0;
result:=Dest;
end;
function WideCharToString(S : PWideChar) : AnsiString;
begin
result:=WideCharLenToString(s,Length(WideString(s)));
end;
function StringToWideChar(const Src : AnsiString;Dest : PWideChar;DestSize : SizeInt) : PWideChar;
var
temp:widestring;
begin
widestringmanager.Ansi2WideMoveProc(PChar(Src),temp,Length(Src));
if Length(temp)<DestSize then
move(temp[1],Dest^,Length(temp)*SizeOf(WideChar))
else
move(temp[1],Dest^,(DestSize-1)*SizeOf(WideChar));
Dest[DestSize-1]:=#0;
result:=Dest;
end;
function UnicodeCharLenToString(S : PUnicodeChar;Len : SizeInt) : AnsiString;
begin
//SetLength(result,Len);
widestringmanager.Unicode2AnsiMoveproc(S,result,Len);
end;
procedure UnicodeCharLenToStrVar(Src : PUnicodeChar;Len : SizeInt;out Dest : AnsiString);
begin
Dest:=UnicodeCharLenToString(Src,Len);
end;
procedure UnicodeCharToStrVar(S : PUnicodeChar;out Dest : AnsiString);
begin
Dest:=UnicodeCharToString(S);
end;
function WideCharLenToString(S : PWideChar;Len : SizeInt) : AnsiString;
begin
//SetLength(result,Len);
widestringmanager.Wide2AnsiMoveproc(S,result,Len);
end;
procedure WideCharLenToStrVar(Src : PWideChar;Len : SizeInt;out Dest : AnsiString);
begin
Dest:=WideCharLenToString(Src,Len);
end;
procedure WideCharToStrVar(S : PWideChar;out Dest : AnsiString);
begin
Dest:=WideCharToString(S);
end;
*)
Function fpc_unicodestr_Unique(const S : JLObject): JLObject; compilerproc;
begin
result:=s;
end;
Function Fpc_UnicodeStr_Copy (Const S : UnicodeString; Index,Size : SizeInt) : UnicodeString;compilerproc;
begin
dec(index);
if Index < 0 then
Index := 0;
{ Check Size. Accounts for Zero-length S, the double check is needed because
Size can be maxint and will get <0 when adding index }
if (Size>Length(S)) or
(Index+Size>Length(S)) then
Size:=Length(S)-Index;
If Size>0 then
result:=JLString(s).subString(Index,Size)
else
result:='';
end;
Function Pos (Const Substr : UnicodeString; Const Source : UnicodeString) : SizeInt;
begin
Pos:=0;
if Length(SubStr)>0 then
Pos:=JLString(Source).indexOf(SubStr)+1;
end;
{ Faster version for a unicodechar alone }
Function Pos (c : UnicodeChar; Const s : UnicodeString) : SizeInt;
begin
Pos:=0;
if length(S)>0 then
Pos:=JLString(s).indexOf(ord(c))+1;
end;
(*
Function Pos (c : AnsiString; Const s : UnicodeString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
begin
result:=Pos(UnicodeString(c),s);
end;
Function Pos (c : ShortString; Const s : UnicodeString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
begin
result:=Pos(UnicodeString(c),s);
end;
Function Pos (c : UnicodeString; Const s : AnsiString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
begin
result:=Pos(c,UnicodeString(s));
end;
*)
{ Faster version for a char alone. Must be implemented because }
{ pos(c: char; const s: shortstring) also exists, so otherwise }
{ using pos(char,pchar) will always call the shortstring version }
{ (exact match for first argument), also with $h+ (JM) }
Function Pos (c : AnsiChar; Const s : UnicodeString) : SizeInt;
var
i: SizeInt;
wc : unicodechar;
begin
wc:=c;
result:=Pos(wc,s);
end;
(*
Procedure Delete (Var S : UnicodeString; Index,Size: SizeInt);
Var
LS : SizeInt;
begin
LS:=Length(S);
if (Index>LS) or (Index<=0) or (Size<=0) then
exit;
UniqueString (S);
{ (Size+Index) will overflow if Size=MaxInt. }
if Size>LS-Index then
Size:=LS-Index+1;
if Size<=LS-Index then
begin
Dec(Index);
Move(PUnicodeChar(S)[Index+Size],PUnicodeChar(S)[Index],(LS-Index-Size+1)*sizeof(UnicodeChar));
end;
Setlength(s,LS-Size);
end;
Procedure Insert (Const Source : UnicodeString; Var S : UnicodeString; Index : SizeInt);
var
Temp : UnicodeString;
LS : SizeInt;
begin
If Length(Source)=0 then
exit;
if index <= 0 then
index := 1;
Ls:=Length(S);
if index > LS then
index := LS+1;
Dec(Index);
Pointer(Temp) := NewUnicodeString(Length(Source)+LS);
SetLength(Temp,Length(Source)+LS);
If Index>0 then
move (PUnicodeChar(S)^,PUnicodeChar(Temp)^,Index*sizeof(UnicodeChar));
Move (PUnicodeChar(Source)^,PUnicodeChar(Temp)[Index],Length(Source)*sizeof(UnicodeChar));
If (LS-Index)>0 then
Move(PUnicodeChar(S)[Index],PUnicodeChar(temp)[Length(Source)+index],(LS-Index)*sizeof(UnicodeChar));
S:=Temp;
end;
*)
Function UpCase(c:UnicodeChar):UnicodeChar;
begin
result:=JLCharacter.toUpperCase(c);
end;
function UpCase(const s : UnicodeString) : UnicodeString;
begin
result:=JLString(s).toUpperCase;
end;
(*
Procedure SetString (Out S : UnicodeString; Buf : PUnicodeChar; Len : SizeInt);
begin
SetLength(S,Len);
If (Buf<>Nil) and (Len>0) then
Move (Buf[0],S[1],Len*sizeof(UnicodeChar));
end;
Procedure SetString (Out S : UnicodeString; Buf : PChar; Len : SizeInt);
var
BufLen: SizeInt;
begin
SetLength(S,Len);
If (Buf<>Nil) and (Len>0) then
widestringmanager.Ansi2UnicodeMoveProc(Buf,S,Len);
end;
{$ifndef FPUNONE}
Function fpc_Val_Real_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): ValReal; [public, alias:'FPC_VAL_REAL_UNICODESTR']; compilerproc;
Var
SS : String;
begin
fpc_Val_Real_UnicodeStr := 0;
if length(S) > 255 then
code := 256
else
begin
SS := S;
Val(SS,fpc_Val_Real_UnicodeStr,code);
end;
end;
{$endif}
function fpc_val_enum_unicodestr(str2ordindex:pointer;const s:unicodestring;out code:valsint):longint;compilerproc;
var ss:shortstring;
begin
if length(s)>255 then
code:=256
else
begin
ss:=s;
val(ss,fpc_val_enum_unicodestr,code);
end;
end;
Function fpc_Val_Currency_UnicodeStr(Const S : UnicodeString; out Code : ValSInt): Currency; [public, alias:'FPC_VAL_CURRENCY_UNICODESTR']; compilerproc;
Var
SS : String;
begin
if length(S) > 255 then
begin
fpc_Val_Currency_UnicodeStr:=0;
code := 256;
end
else
begin
SS := S;
Val(SS,fpc_Val_Currency_UnicodeStr,code);
end;
end;
Function fpc_Val_UInt_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): ValUInt; [public, alias:'FPC_VAL_UINT_UNICODESTR']; compilerproc;
Var
SS : ShortString;
begin
fpc_Val_UInt_UnicodeStr := 0;
if length(S) > 255 then
code := 256
else
begin
SS := S;
Val(SS,fpc_Val_UInt_UnicodeStr,code);
end;
end;
Function fpc_Val_SInt_UnicodeStr (DestSize: SizeInt; Const S : UnicodeString; out Code : ValSInt): ValSInt; [public, alias:'FPC_VAL_SINT_UNICODESTR']; compilerproc;
Var
SS : ShortString;
begin
fpc_Val_SInt_UnicodeStr:=0;
if length(S)>255 then
code:=256
else
begin
SS := S;
fpc_Val_SInt_UnicodeStr := int_Val_SInt_ShortStr(DestSize,SS,Code);
end;
end;
{$ifndef CPU64}
Function fpc_Val_qword_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): qword; [public, alias:'FPC_VAL_QWORD_UNICODESTR']; compilerproc;
Var
SS : ShortString;
begin
fpc_Val_qword_UnicodeStr:=0;
if length(S)>255 then
code:=256
else
begin
SS := S;
Val(SS,fpc_Val_qword_UnicodeStr,Code);
end;
end;
Function fpc_Val_int64_UnicodeStr (Const S : UnicodeString; out Code : ValSInt): Int64; [public, alias:'FPC_VAL_INT64_UNICODESTR']; compilerproc;
Var
SS : ShortString;
begin
fpc_Val_int64_UnicodeStr:=0;
if length(S)>255 then
code:=256
else
begin
SS := S;
Val(SS,fpc_Val_int64_UnicodeStr,Code);
end;
end;
{$endif CPU64}
{$ifndef FPUNONE}
procedure fpc_UnicodeStr_Float(d : ValReal;len,fr,rt : SizeInt;out s : UnicodeString);compilerproc;
var
ss : shortstring;
begin
str_real(len,fr,d,treal_type(rt),ss);
s:=ss;
end;
{$endif}
procedure fpc_unicodestr_enum(ordinal,len:sizeint;typinfo,ord2strindex:pointer;out s:unicodestring);compilerproc;
var ss:shortstring;
begin
fpc_shortstr_enum(ordinal,len,typinfo,ord2strindex,ss);
s:=ss;
end;
procedure fpc_unicodestr_bool(b : boolean;len:sizeint;out s:unicodestring);compilerproc;
var ss:shortstring;
begin
fpc_shortstr_bool(b,len,ss);
s:=ss;
end;
{$ifdef FPC_HAS_STR_CURRENCY}
procedure fpc_UnicodeStr_Currency(c : Currency;len,fr : SizeInt;out s : UnicodeString);compilerproc;
var
ss : shortstring;
begin
str(c:len:fr,ss);
s:=ss;
end;
{$endif FPC_HAS_STR_CURRENCY}
Procedure fpc_UnicodeStr_SInt(v : ValSint; Len : SizeInt; out S : UnicodeString);compilerproc;
Var
SS : ShortString;
begin
Str (v:Len,SS);
S:=SS;
end;
Procedure fpc_UnicodeStr_UInt(v : ValUInt;Len : SizeInt; out S : UnicodeString);compilerproc;
Var
SS : ShortString;
begin
str(v:Len,SS);
S:=SS;
end;
{$ifndef CPU64}
Procedure fpc_UnicodeStr_Int64(v : Int64; Len : SizeInt; out S : UnicodeString);compilerproc;
Var
SS : ShortString;
begin
Str (v:Len,SS);
S:=SS;
end;
Procedure fpc_UnicodeStr_Qword(v : Qword;Len : SizeInt; out S : UnicodeString);compilerproc;
Var
SS : ShortString;
begin
str(v:Len,SS);
S:=SS;
end;
{$endif CPU64}
*)
(*
{ converts an utf-16 code point or surrogate pair to utf-32 }
function utf16toutf32(const S: UnicodeString; const index: SizeInt; out len: longint): UCS4Char; [public, alias: 'FPC_UTF16TOUTF32'];
var
w: unicodechar;
begin
{ UTF-16 points in the range #$0-#$D7FF and #$E000-#$FFFF }
{ are the same in UTF-32 }
w:=s[index];
if (w<=#$d7ff) or
(w>=#$e000) then
begin
result:=UCS4Char(w);
len:=1;
end
{ valid surrogate pair? }
else if (w<=#$dbff) and
{ w>=#$d7ff check not needed, checked above }
(index<length(s)) and
(s[index+1]>=#$dc00) and
(s[index+1]<=#$dfff) then
{ convert the surrogate pair to UTF-32 }
begin
result:=(UCS4Char(w)-$d800) shl 10 + (UCS4Char(s[index+1])-$dc00) + $10000;
len:=2;
end
else
{ invalid surrogate -> do nothing }
begin
result:=UCS4Char(w);
len:=1;
end;
end;
function UnicodeToUtf8(Dest: PChar; Source: PUnicodeChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
begin
if assigned(Source) then
Result:=UnicodeToUtf8(Dest,MaxBytes,Source,IndexWord(Source^,-1,0))
else
Result:=0;
end;
function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PUnicodeChar; SourceChars: SizeUInt): SizeUInt;
var
i,j : SizeUInt;
w : word;
lw : longword;
len : longint;
begin
result:=0;
if source=nil then
exit;
i:=0;
j:=0;
if assigned(Dest) then
begin
while (i<SourceChars) and (j<MaxDestBytes) do
begin
w:=word(Source[i]);
case w of
0..$7f:
begin
Dest[j]:=char(w);
inc(j);
end;
$80..$7ff:
begin
if j+1>=MaxDestBytes then
break;
Dest[j]:=char($c0 or (w shr 6));
Dest[j+1]:=char($80 or (w and $3f));
inc(j,2);
end;
$800..$d7ff,$e000..$ffff:
begin
if j+2>=MaxDestBytes then
break;
Dest[j]:=char($e0 or (w shr 12));
Dest[j+1]:=char($80 or ((w shr 6) and $3f));
Dest[j+2]:=char($80 or (w and $3f));
inc(j,3);
end;
$d800..$dbff:
{High Surrogates}
begin
if j+3>=MaxDestBytes then
break;
if (i<sourcechars-1) and
(word(Source[i+1]) >= $dc00) and
(word(Source[i+1]) <= $dfff) then
begin
lw:=longword(utf16toutf32(Source[i] + Source[i+1], 1, len));
Dest[j]:=char($f0 or (lw shr 18));
Dest[j+1]:=char($80 or ((lw shr 12) and $3f));
Dest[j+2]:=char($80 or ((lw shr 6) and $3f));
Dest[j+3]:=char($80 or (lw and $3f));
inc(j,4);
inc(i);
end;
end;
end;
inc(i);
end;
if j>SizeUInt(MaxDestBytes-1) then
j:=MaxDestBytes-1;
Dest[j]:=#0;
end
else
begin
while i<SourceChars do
begin
case word(Source[i]) of
$0..$7f:
inc(j);
$80..$7ff:
inc(j,2);
$800..$d7ff,$e000..$ffff:
inc(j,3);
$d800..$dbff:
begin
if (i<sourcechars-1) and
(word(Source[i+1]) >= $dc00) and
(word(Source[i+1]) <= $dfff) then
begin
inc(j,4);
inc(i);
end;
end;
end;
inc(i);
end;
end;
result:=j+1;
end;
function Utf8ToUnicode(Dest: PUnicodeChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
begin
if assigned(Source) then
Result:=Utf8ToUnicode(Dest,MaxChars,Source,strlen(Source))
else
Result:=0;
end;
function UTF8ToUnicode(Dest: PUnicodeChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
const
UNICODE_INVALID=63;
var
InputUTF8: SizeUInt;
IBYTE: BYTE;
OutputUnicode: SizeUInt;
PRECHAR: SizeUInt;
TempBYTE: BYTE;
CharLen: SizeUint;
LookAhead: SizeUInt;
UC: SizeUInt;
begin
if not assigned(Source) then
begin
result:=0;
exit;
end;
result:=SizeUInt(-1);
InputUTF8:=0;
OutputUnicode:=0;
PreChar:=0;
if Assigned(Dest) Then
begin
while (OutputUnicode<MaxDestChars) and (InputUTF8<SourceBytes) do
begin
IBYTE:=byte(Source[InputUTF8]);
if (IBYTE and $80) = 0 then
begin
//One character US-ASCII, convert it to unicode
if IBYTE = 10 then
begin
If (PreChar<>13) and FALSE then
begin
//Expand to crlf, conform UTF-8.
//This procedure will break the memory alocation by
//FPC for the widestring, so never use it. Condition never true due the "and FALSE".
if OutputUnicode+1<MaxDestChars then
begin
Dest[OutputUnicode]:=WideChar(13);
inc(OutputUnicode);
Dest[OutputUnicode]:=WideChar(10);
inc(OutputUnicode);
PreChar:=10;
end
else
begin
Dest[OutputUnicode]:=WideChar(13);
inc(OutputUnicode);
end;
end
else
begin
Dest[OutputUnicode]:=WideChar(IBYTE);
inc(OutputUnicode);
PreChar:=IBYTE;
end;
end
else
begin
Dest[OutputUnicode]:=WideChar(IBYTE);
inc(OutputUnicode);
PreChar:=IBYTE;
end;
inc(InputUTF8);
end
else
begin
TempByte:=IBYTE;
CharLen:=0;
while (TempBYTE and $80)<>0 do
begin
TempBYTE:=(TempBYTE shl 1) and $FE;
inc(CharLen);
end;
//Test for the "CharLen" conforms UTF-8 string
//This means the 10xxxxxx pattern.
if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
begin
//Insuficient chars in string to decode
//UTF-8 array. Fallback to single char.
CharLen:= 1;
end;
for LookAhead := 1 to CharLen-1 do
begin
if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
begin
//Invalid UTF-8 sequence, fallback.
CharLen:= LookAhead;
break;
end;
end;
UC:=$FFFF;
case CharLen of
1: begin
//Not valid UTF-8 sequence
UC:=UNICODE_INVALID;
end;
2: begin
//Two bytes UTF, convert it
UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
if UC <= $7F then
begin
//Invalid UTF sequence.
UC:=UNICODE_INVALID;
end;
end;
3: begin
//Three bytes, convert it to unicode
UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
if (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
begin
//Invalid UTF-8 sequence
UC:= UNICODE_INVALID;
End;
end;
4: begin
//Four bytes, convert it to two unicode characters
UC:= (byte(Source[InputUTF8]) and $07) shl 18;
UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
if (UC < $10000) or (UC > $10FFFF) then
begin
UC:= UNICODE_INVALID;
end
else
begin
{ only store pair if room }
dec(UC,$10000);
if (OutputUnicode<MaxDestChars-1) then
begin
Dest[OutputUnicode]:=WideChar(UC shr 10 + $D800);
inc(OutputUnicode);
UC:=(UC and $3ff) + $DC00;
end
else
begin
InputUTF8:= InputUTF8 + CharLen;
{ don't store anything }
CharLen:=0;
end;
end;
end;
5,6,7: begin
//Invalid UTF8 to unicode conversion,
//mask it as invalid UNICODE too.
UC:=UNICODE_INVALID;
end;
end;
if CharLen > 0 then
begin
PreChar:=UC;
Dest[OutputUnicode]:=WideChar(UC);
inc(OutputUnicode);
end;
InputUTF8:= InputUTF8 + CharLen;
end;
end;
Result:=OutputUnicode+1;
end
else
begin
while (InputUTF8<SourceBytes) do
begin
IBYTE:=byte(Source[InputUTF8]);
if (IBYTE and $80) = 0 then
begin
//One character US-ASCII, convert it to unicode
if IBYTE = 10 then
begin
if (PreChar<>13) and FALSE then
begin
//Expand to crlf, conform UTF-8.
//This procedure will break the memory alocation by
//FPC for the widestring, so never use it. Condition never true due the "and FALSE".
inc(OutputUnicode,2);
PreChar:=10;
end
else
begin
inc(OutputUnicode);
PreChar:=IBYTE;
end;
end
else
begin
inc(OutputUnicode);
PreChar:=IBYTE;
end;
inc(InputUTF8);
end
else
begin
TempByte:=IBYTE;
CharLen:=0;
while (TempBYTE and $80)<>0 do
begin
TempBYTE:=(TempBYTE shl 1) and $FE;
inc(CharLen);
end;
//Test for the "CharLen" conforms UTF-8 string
//This means the 10xxxxxx pattern.
if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
begin
//Insuficient chars in string to decode
//UTF-8 array. Fallback to single char.
CharLen:= 1;
end;
for LookAhead := 1 to CharLen-1 do
begin
if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
begin
//Invalid UTF-8 sequence, fallback.
CharLen:= LookAhead;
break;
end;
end;
UC:=$FFFF;
case CharLen of
1: begin
//Not valid UTF-8 sequence
UC:=UNICODE_INVALID;
end;
2: begin
//Two bytes UTF, convert it
UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
if UC <= $7F then
begin
//Invalid UTF sequence.
UC:=UNICODE_INVALID;
end;
end;
3: begin
//Three bytes, convert it to unicode
UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
If (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
begin
//Invalid UTF-8 sequence
UC:= UNICODE_INVALID;
end;
end;
4: begin
//Four bytes, convert it to two unicode characters
UC:= (byte(Source[InputUTF8]) and $07) shl 18;
UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
if (UC < $10000) or (UC > $10FFFF) then
UC:= UNICODE_INVALID
else
{ extra character character }
inc(OutputUnicode);
end;
5,6,7: begin
//Invalid UTF8 to unicode conversion,
//mask it as invalid UNICODE too.
UC:=UNICODE_INVALID;
end;
end;
if CharLen > 0 then
begin
PreChar:=UC;
inc(OutputUnicode);
end;
InputUTF8:= InputUTF8 + CharLen;
end;
end;
Result:=OutputUnicode+1;
end;
end;
function UTF8Encode(const s : Ansistring) : UTF8String; inline;
begin
Result:=UTF8Encode(UnicodeString(s));
end;
function UTF8Encode(const s : UnicodeString) : UTF8String;
var
i : SizeInt;
hs : UTF8String;
begin
result:='';
if s='' then
exit;
SetLength(hs,length(s)*3);
i:=UnicodeToUtf8(pchar(hs),length(hs)+1,PUnicodeChar(s),length(s));
if i>0 then
begin
SetLength(hs,i-1);
result:=hs;
end;
end;
function UTF8Decode(const s : UTF8String): UnicodeString;
var
i : SizeInt;
hs : UnicodeString;
begin
result:='';
if s='' then
exit;
SetLength(hs,length(s));
i:=Utf8ToUnicode(PUnicodeChar(hs),length(hs)+1,pchar(s),length(s));
if i>0 then
begin
SetLength(hs,i-1);
result:=hs;
end;
end;
function AnsiToUtf8(const s : ansistring): UTF8String;{$ifdef SYSTEMINLINE}inline;{$endif}
begin
Result:=Utf8Encode(s);
end;
function Utf8ToAnsi(const s : UTF8String) : ansistring;{$ifdef SYSTEMINLINE}inline;{$endif}
begin
Result:=Utf8Decode(s);
end;
function UnicodeStringToUCS4String(const s : UnicodeString) : UCS4String;
var
i, slen,
destindex : SizeInt;
len : longint;
begin
slen:=length(s);
setlength(result,slen+1);
i:=1;
destindex:=0;
while (i<=slen) do
begin
result[destindex]:=utf16toutf32(s,i,len);
inc(destindex);
inc(i,len);
end;
{ destindex <= slen (surrogate pairs may have been merged) }
{ destindex+1 for terminating #0 (dynamic arrays are }
{ implicitely filled with zero) }
setlength(result,destindex+1);
end;
{ concatenates an utf-32 char to a unicodestring. S *must* be unique when entering. }
procedure ConcatUTF32ToUnicodeStr(const nc: UCS4Char; var S: UnicodeString; var index: SizeInt);
var
p : PUnicodeChar;
begin
{ if nc > $ffff, we need two places }
if (index+ord(nc > $ffff)>length(s)) then
if (length(s) < 10*256) then
setlength(s,length(s)+10)
else
setlength(s,length(s)+length(s) shr 8);
{ we know that s is unique -> avoid uniquestring calls}
p:=@s[index];
if (nc<$ffff) then
begin
p^:=unicodechar(nc);
inc(index);
end
else if (dword(nc)<=$10ffff) then
begin
p^:=unicodechar((nc - $10000) shr 10 + $d800);
(p+1)^:=unicodechar((nc - $10000) and $3ff + $dc00);
inc(index,2);
end
else
{ invalid code point }
begin
p^:='?';
inc(index);
end;
end;
function UCS4StringToUnicodeString(const s : UCS4String) : UnicodeString;
var
i : SizeInt;
resindex : SizeInt;
begin
{ skip terminating #0 }
SetLength(result,length(s)-1);
resindex:=1;
for i:=0 to high(s)-1 do
ConcatUTF32ToUnicodeStr(s[i],result,resindex);
{ adjust result length (may be too big due to growing }
{ for surrogate pairs) }
setlength(result,resindex-1);
end;
function WideStringToUCS4String(const s : WideString) : UCS4String;
var
i, slen,
destindex : SizeInt;
len : longint;
begin
slen:=length(s);
setlength(result,slen+1);
i:=1;
destindex:=0;
while (i<=slen) do
begin
result[destindex]:=utf16toutf32(s,i,len);
inc(destindex);
inc(i,len);
end;
{ destindex <= slen (surrogate pairs may have been merged) }
{ destindex+1 for terminating #0 (dynamic arrays are }
{ implicitely filled with zero) }
setlength(result,destindex+1);
end;
{ concatenates an utf-32 char to a widestring. S *must* be unique when entering. }
procedure ConcatUTF32ToWideStr(const nc: UCS4Char; var S: WideString; var index: SizeInt);
var
p : PWideChar;
begin
{ if nc > $ffff, we need two places }
if (index+ord(nc > $ffff)>length(s)) then
if (length(s) < 10*256) then
setlength(s,length(s)+10)
else
setlength(s,length(s)+length(s) shr 8);
{ we know that s is unique -> avoid uniquestring calls}
p:=@s[index];
if (nc<$ffff) then
begin
p^:=widechar(nc);
inc(index);
end
else if (dword(nc)<=$10ffff) then
begin
p^:=widechar((nc - $10000) shr 10 + $d800);
(p+1)^:=widechar((nc - $10000) and $3ff + $dc00);
inc(index,2);
end
else
{ invalid code point }
begin
p^:='?';
inc(index);
end;
end;
function UCS4StringToWideString(const s : UCS4String) : WideString;
var
i : SizeInt;
resindex : SizeInt;
begin
{ skip terminating #0 }
SetLength(result,length(s)-1);
resindex:=1;
for i:=0 to high(s)-1 do
ConcatUTF32ToWideStr(s[i],result,resindex);
{ adjust result length (may be too big due to growing }
{ for surrogate pairs) }
setlength(result,resindex-1);
end;
const
SNoUnicodestrings = 'This binary has no unicodestrings support compiled in.';
SRecompileWithUnicodestrings = 'Recompile the application with a unicodestrings-manager in the program uses clause.';
*)
function CompareUnicodeString(const s1, s2 : UnicodeString) : PtrInt;
begin
widestringmanager.collator.setStrength(JTCollator.IDENTICAL);
result:=widestringmanager.collator.compare(s1,s2);
end;
function CompareTextUnicodeString(const s1, s2 : UnicodeString): PtrInt;
begin
widestringmanager.collator.setStrength(JTCollator.TERTIARY);
result:=widestringmanager.collator.compare(s1,s2);
end;
constructor TUnicodeStringManager.create;
begin
end;
procedure initunicodestringmanager;
begin
widestringmanager:=TUnicodeStringManager.create;
widestringmanager.collator:=JTCollator.getInstance;
end;