fpc/rtl/inc/wstrings.inc
Jonas Maebe 6a0755e897 * fixed setstring for unicode/widestring: don't stop at embedded #0
characters, don't expect that buffer is null-terminated (mantis #14740)

git-svn-id: trunk@13826 -
2009-10-09 16:12:03 +00:00

1722 lines
46 KiB
PHP

{
This file is part of the Free Pascal run time library.
Copyright (c) 1999-2005 by Florian Klaempfl,
member of the Free Pascal development team.
This file implements support routines for WideStrings with FPC
See the file COPYING.FPC, included in this distribution,
for details about the copyright.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
**********************************************************************}
{
This file contains the implementation of the WideString type,
and all things that are needed for it.
WideString is defined as a 'silent' pwidechar :
a pwidechar that points to :
@-8 : SizeInt for reference count;
@-4 : SizeInt for size; size=number of bytes, not the number of chars. Divide or multiply
with sizeof(WideChar) to convert. This is needed to be compatible with Delphi and
Windows COM BSTR.
@ : String + Terminating #0;
Pwidechar(Widestring) is a valid typecast.
So WS[i] is converted to the address @WS+i-1.
Constants should be assigned a reference count of -1
Meaning that they can't be disposed of.
}
Type
PWideRec = ^TWideRec;
TWideRec = Packed Record
Len : DWord;
First : WideChar;
end;
Const
WideRecLen = SizeOf(TWideRec);
WideFirstOff = SizeOf(TWideRec)-sizeof(WideChar);
{
Default WideChar <-> Char conversion is to only convert the
lower 127 chars, all others are translated to spaces.
These routines can be overwritten for the Current Locale
}
procedure DefaultWide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
var
i : SizeInt;
begin
setlength(dest,len);
for i:=1 to len do
begin
if word(source^)<256 then
dest[i]:=char(word(source^))
else
dest[i]:='?';
inc(source);
end;
end;
procedure DefaultAnsi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
var
i : SizeInt;
begin
setlength(dest,len);
for i:=1 to len do
begin
dest[i]:=widechar(byte(source^));
inc(source);
end;
end;
(*
Procedure UniqueWideString(Var S : WideString); [Public,Alias : 'FPC_WIDESTR_UNIQUE'];
{
Make sure reference count of S is 1,
using copy-on-write semantics.
}
begin
end;
*)
{****************************************************************************
Internal functions, not in interface.
****************************************************************************}
procedure WideStringError;
begin
HandleErrorFrame(204,get_frame);
end;
{$ifdef WideStrDebug}
Procedure DumpWideRec(S : Pointer);
begin
If S=Nil then
Writeln ('String is nil')
Else
Begin
With PWideRec(S-WideFirstOff)^ do
begin
Write ('(Len:',len);
Writeln (' Ref: ',ref,')');
end;
end;
end;
{$endif}
Function NewWideString(Len : SizeInt) : Pointer;
{
Allocate a new WideString on the heap.
initialize it to zero length and reference count 1.
}
Var
P : Pointer;
begin
{$ifdef MSWINDOWS}
if winwidestringalloc then
begin
P:=SysAllocStringLen(nil,Len);
if P=nil then
WideStringError;
end
else
{$endif MSWINDOWS}
begin
GetMem(P,Len*sizeof(WideChar)+WideRecLen);
If P<>Nil then
begin
PWideRec(P)^.Len:=Len*2; { Initial length }
PWideRec(P)^.First:=#0; { Terminating #0 }
inc(p,WideFirstOff); { Points to string now }
end
else
WideStringError;
end;
NewWideString:=P;
end;
Procedure DisposeWideString(Var S : Pointer);
{
Deallocates a WideString From the heap.
}
begin
If S=Nil then
exit;
{$ifndef MSWINDOWS}
Dec (S,WideFirstOff);
Freemem(S);
{$else MSWINDOWS}
if winwidestringalloc then
SysFreeString(S)
else
begin
Dec (S,WideFirstOff);
Freemem(S);
end;
{$endif MSWINDOWS}
S:=Nil;
end;
var
__data_start: byte; external name '__data_start__';
__data_end: byte; external name '__data_end__';
function IsWideStringConstant(S: pointer): boolean;{$ifdef SYSTEMINLINE}inline;{$endif}
{
Returns True if widestring is constant (located in .data section);
}
begin
Result:=(S>=@__data_start) and (S<@__data_end);
end;
Procedure fpc_WideStr_Decr_Ref (Var S : Pointer);[Public,Alias:'FPC_WIDESTR_DECR_REF']; compilerproc;
{
Decreases the ReferenceCount of a non constant widestring;
If the reference count is zero, deallocate the string;
}
Type
pSizeInt = ^SizeInt;
Begin
{ Zero string }
if S=Nil then
exit;
if not IsWideStringConstant(S) then
DisposeWideString(S);
end;
{ alias for internal use }
Procedure fpc_WideStr_Decr_Ref (Var S : Pointer);[external name 'FPC_WIDESTR_DECR_REF'];
Procedure fpc_WideStr_Incr_Ref(Var S : Pointer);[Public,Alias:'FPC_WIDESTR_INCR_REF']; compilerproc;
var
p : pointer;
Begin
If S=Nil then
exit;
p:=NewWidestring(length(WideString(S)));
move(s^,p^,(length(WideString(s))+1)*sizeof(widechar)); // double #0 too
s:=p;
end;
{ alias for internal use }
Procedure fpc_WideStr_Incr_Ref (Var S : Pointer);[external name 'FPC_WIDESTR_INCR_REF'];
{$ifndef FPC_STRTOSHORTSTRINGPROC}
function fpc_WideStr_To_ShortStr (high_of_res: SizeInt;const S2 : WideString): shortstring;[Public, alias: 'FPC_WIDESTR_TO_SHORTSTR']; compilerproc;
{
Converts a WideString to a ShortString;
}
Var
Size : SizeInt;
temp : ansistring;
begin
result:='';
Size:=Length(S2);
if Size>0 then
begin
If Size>high_of_res then
Size:=high_of_res;
widestringmanager.Wide2AnsiMoveProc(PWideChar(S2),temp,Size);
result:=temp;
end;
end;
{$else FPC_STRTOSHORTSTRINGPROC}
procedure fpc_WideStr_To_ShortStr (out res: ShortString;const S2 : WideString); [Public, alias: 'FPC_WIDESTR_TO_SHORTSTR'];compilerproc;
{
Converts a WideString to a ShortString;
}
Var
Size : SizeInt;
temp : ansistring;
begin
res:='';
Size:=Length(S2);
if Size>0 then
begin
If Size>high(res) then
Size:=high(res);
widestringmanager.Wide2AnsiMoveProc(PWideChar(S2),temp,Size);
res:=temp;
end;
end;
{$endif FPC_STRTOSHORTSTRINGPROC}
Function fpc_ShortStr_To_WideStr (Const S2 : ShortString): WideString;compilerproc;
{
Converts a ShortString to a WideString;
}
Var
Size : SizeInt;
begin
result:='';
Size:=Length(S2);
if Size>0 then
begin
widestringmanager.Ansi2WideMoveProc(PChar(@S2[1]),result,Size);
{ Terminating Zero }
PWideChar(Pointer(fpc_ShortStr_To_WideStr)+Size*sizeof(WideChar))^:=#0;
end;
end;
Function fpc_WideStr_To_AnsiStr (const S2 : WideString): AnsiString; compilerproc;
{
Converts a WideString to an AnsiString
}
Var
Size : SizeInt;
begin
result:='';
Size:=Length(S2);
if Size>0 then
widestringmanager.Wide2AnsiMoveProc(PWideChar(Pointer(S2)),result,Size);
end;
Function fpc_AnsiStr_To_WideStr (Const S2 : AnsiString): WideString; compilerproc;
{
Converts an AnsiString to a WideString;
}
Var
Size : SizeInt;
begin
result:='';
Size:=Length(S2);
if Size>0 then
widestringmanager.Ansi2WideMoveProc(PChar(S2),result,Size);
end;
Function fpc_PWideChar_To_WideStr(const p : pwidechar): widestring; compilerproc;
var
Size : SizeInt;
begin
result:='';
if p=nil then
exit;
Size := IndexWord(p^, -1, 0);
Setlength(result,Size);
if Size>0 then
begin
Move(p^,PWideChar(Pointer(result))^,Size*sizeof(WideChar));
{ Terminating Zero }
PWideChar(Pointer(result)+Size*sizeof(WideChar))^:=#0;
end;
end;
{ checked against the ansistring routine, 2001-05-27 (FK) }
Procedure fpc_WideStr_Assign (Var S1 : Pointer;S2 : Pointer);[Public,Alias:'FPC_WIDESTR_ASSIGN']; compilerproc;
{
Assigns S2 to S1 (S1:=S2), taking in account reference counts.
}
begin
if S1=S2 then exit;
if S2<>nil then
begin
if IsWideStringConstant(S1) then
begin
S1:=NewWidestring(length(WideString(S2)));
move(s2^,s1^,(length(WideString(s1))+1)*sizeof(widechar));
end
else
{$ifdef MSWINDOWS}
if winwidestringalloc then
begin
if SysReAllocStringLen(S1, S2, Length(WideString(S2))) = 0 then
WideStringError;
end
else
{$endif MSWINDOWS}
begin
SetLength(WideString(S1),length(WideString(S2)));
move(s2^,s1^,(length(WideString(s1))+1)*sizeof(widechar));
end;
end
else
begin
{ Free S1 }
fpc_widestr_decr_ref (S1);
S1:=nil;
end;
end;
{ alias for internal use }
Procedure fpc_WideStr_Assign (Var S1 : Pointer;S2 : Pointer);[external name 'FPC_WIDESTR_ASSIGN'];
{$ifndef STR_CONCAT_PROCS}
function fpc_WideStr_Concat (const S1,S2 : WideString): WideString; compilerproc;
Var
Size,Location : SizeInt;
pc : pwidechar;
begin
{ only assign if s1 or s2 is empty }
if (S1='') then
begin
result:=s2;
exit;
end;
if (S2='') then
begin
result:=s1;
exit;
end;
Location:=Length(S1);
Size:=length(S2);
SetLength(result,Size+Location);
pc:=pwidechar(result);
Move(S1[1],pc^,Location*sizeof(WideChar));
inc(pc,location);
Move(S2[1],pc^,(Size+1)*sizeof(WideChar));
end;
function fpc_WideStr_Concat_multi (const sarr:array of Widestring): widestring; compilerproc;
Var
i : Longint;
p : pointer;
pc : pwidechar;
Size,NewSize : SizeInt;
begin
{ First calculate size of the result so we can do
a single call to SetLength() }
NewSize:=0;
for i:=low(sarr) to high(sarr) do
inc(Newsize,length(sarr[i]));
SetLength(result,NewSize);
pc:=pwidechar(result);
for i:=low(sarr) to high(sarr) do
begin
p:=pointer(sarr[i]);
if assigned(p) then
begin
Size:=length(widestring(p));
Move(pwidechar(p)^,pc^,(Size+1)*sizeof(WideChar));
inc(pc,size);
end;
end;
end;
{$else STR_CONCAT_PROCS}
procedure fpc_WideStr_Concat (var DestS:Widestring;const S1,S2 : WideString); compilerproc;
Var
Size,Location : SizeInt;
same : boolean;
begin
{ only assign if s1 or s2 is empty }
if (S1='') then
begin
DestS:=s2;
exit;
end;
if (S2='') then
begin
DestS:=s1;
exit;
end;
Location:=Length(S1);
Size:=length(S2);
{ Use Pointer() typecasts to prevent extra conversion code }
if Pointer(DestS)=Pointer(S1) then
begin
same:=Pointer(S1)=Pointer(S2);
SetLength(DestS,Size+Location);
if same then
Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(WideChar))^,(Size)*sizeof(WideChar))
else
Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(WideChar))^,(Size+1)*sizeof(WideChar));
end
else if Pointer(DestS)=Pointer(S2) then
begin
SetLength(DestS,Size+Location);
Move(Pointer(DestS)^,(Pointer(DestS)+Location*sizeof(WideChar))^,(Size+1)*sizeof(WideChar));
Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(WideChar));
end
else
begin
DestS:='';
SetLength(DestS,Size+Location);
Move(Pointer(S1)^,Pointer(DestS)^,Location*sizeof(WideChar));
Move(Pointer(S2)^,(Pointer(DestS)+Location*sizeof(WideChar))^,(Size+1)*sizeof(WideChar));
end;
end;
procedure fpc_WideStr_Concat_multi (var DestS:Widestring;const sarr:array of Widestring); compilerproc;
Var
i : Longint;
p,pc : pointer;
Size,NewLen : SizeInt;
DestTmp : Widestring;
begin
if high(sarr)=0 then
begin
DestS:='';
exit;
end;
{ First calculate size of the result so we can do
a single call to SetLength() }
NewLen:=0;
for i:=low(sarr) to high(sarr) do
inc(NewLen,length(sarr[i]));
SetLength(DestTmp,NewLen);
pc:=pwidechar(DestTmp);
for i:=low(sarr) to high(sarr) do
begin
p:=pointer(sarr[i]);
if assigned(p) then
begin
Size:=length(widestring(p));
Move(p^,pc^,(Size+1)*sizeof(WideChar));
inc(pc,size*sizeof(WideChar));
end;
end;
DestS:=DestTmp;
end;
{$endif STR_CONCAT_PROCS}
Function fpc_Char_To_WideStr(const c : Char): WideString; compilerproc;
{
Converts a Char to a WideString;
}
begin
Setlength(fpc_Char_To_WideStr,1);
fpc_Char_To_WideStr[1]:=c;
{ Terminating Zero }
PWideChar(Pointer(fpc_Char_To_WideStr)+sizeof(WideChar))^:=#0;
end;
Function fpc_WChar_To_WideStr(const c : WideChar): WideString; compilerproc;
{
Converts a WideChar to a WideString;
}
begin
Setlength (fpc_WChar_To_WideStr,1);
fpc_WChar_To_WideStr[1]:= c;
end;
Function fpc_WChar_To_AnsiStr(const c : WideChar): AnsiString; compilerproc;
{
Converts a WideChar to a AnsiString;
}
begin
widestringmanager.Wide2AnsiMoveProc(@c, fpc_WChar_To_AnsiStr, 1);
end;
Function fpc_UChar_To_WideStr(const c : WideChar): WideString; compilerproc;
{
Converts a WideChar to a WideString;
}
begin
Setlength (fpc_UChar_To_WideStr,1);
fpc_UChar_To_WideStr[1]:= c;
end;
Function fpc_PChar_To_WideStr(const p : pchar): WideString; compilerproc;
Var
L : SizeInt;
begin
if (not assigned(p)) or (p[0]=#0) Then
begin
fpc_pchar_to_widestr := '';
exit;
end;
l:=IndexChar(p^,-1,#0);
widestringmanager.Ansi2WideMoveProc(P,fpc_PChar_To_WideStr,l);
end;
Function fpc_CharArray_To_WideStr(const arr: array of char; zerobased: boolean = true): WideString; compilerproc;
var
i : SizeInt;
begin
if (zerobased) then
begin
if (arr[0]=#0) Then
begin
fpc_chararray_to_widestr := '';
exit;
end;
i:=IndexChar(arr,high(arr)+1,#0);
if i = -1 then
i := high(arr)+1;
end
else
i := high(arr)+1;
SetLength(fpc_CharArray_To_WideStr,i);
widestringmanager.Ansi2WideMoveProc (pchar(@arr),fpc_CharArray_To_WideStr,i);
end;
{$ifndef FPC_STRTOCHARARRAYPROC}
{ inside the compiler, the resulttype is modified to that of the actual }
{ chararray we're converting to (JM) }
function fpc_widestr_to_chararray(arraysize: SizeInt; const src: WideString): fpc_big_chararray;[public,alias: 'FPC_WIDESTR_TO_CHARARRAY']; compilerproc;
var
len: SizeInt;
temp: ansistring;
begin
len := length(src);
{ make sure we don't dereference src if it can be nil (JM) }
if len > 0 then
widestringmanager.wide2ansimoveproc(pwidechar(@src[1]),temp,len);
len := length(temp);
if len > arraysize then
len := arraysize;
{$r-}
move(temp[1],fpc_widestr_to_chararray[0],len);
fillchar(fpc_widestr_to_chararray[len],arraysize-len,0);
{$ifdef RangeCheckWasOn}
{$r+}
{$endif}
end;
{ inside the compiler, the resulttype is modified to that of the actual }
{ widechararray we're converting to (JM) }
function fpc_widestr_to_widechararray(arraysize: SizeInt; const src: WideString): fpc_big_widechararray;[public,alias: 'FPC_WIDESTR_TO_WIDECHARARRAY']; compilerproc;
var
len: SizeInt;
begin
len := length(src);
if len > arraysize then
len := arraysize;
{$r-}
{ make sure we don't try to access element 1 of the ansistring if it's nil }
if len > 0 then
move(src[1],fpc_widestr_to_widechararray[0],len*SizeOf(WideChar));
fillchar(fpc_widestr_to_widechararray[len],(arraysize-len)*SizeOf(WideChar),0);
{$ifdef RangeCheckWasOn}
{$r+}
{$endif}
end;
{ inside the compiler, the resulttype is modified to that of the actual }
{ chararray we're converting to (JM) }
function fpc_ansistr_to_widechararray(arraysize: SizeInt; const src: AnsiString): fpc_big_widechararray;[public,alias: 'FPC_ANSISTR_TO_WIDECHARARRAY']; compilerproc;
var
len: SizeInt;
temp: widestring;
begin
len := length(src);
{ make sure we don't dereference src if it can be nil (JM) }
if len > 0 then
widestringmanager.ansi2widemoveproc(pchar(@src[1]),temp,len);
len := length(temp);
if len > arraysize then
len := arraysize;
{$r-}
move(temp[1],fpc_ansistr_to_widechararray[0],len*sizeof(widechar));
fillchar(fpc_ansistr_to_widechararray[len],(arraysize-len)*SizeOf(WideChar),0);
{$ifdef RangeCheckWasOn}
{$r+}
{$endif}
end;
function fpc_shortstr_to_widechararray(arraysize: SizeInt; const src: ShortString): fpc_big_widechararray;[public,alias: 'FPC_SHORTSTR_TO_WIDECHARARRAY']; compilerproc;
var
len: longint;
temp : widestring;
begin
len := length(src);
{ make sure we don't access char 1 if length is 0 (JM) }
if len > 0 then
widestringmanager.ansi2widemoveproc(pchar(@src[1]),temp,len);
len := length(temp);
if len > arraysize then
len := arraysize;
{$r-}
move(temp[1],fpc_shortstr_to_widechararray[0],len*sizeof(widechar));
fillchar(fpc_shortstr_to_widechararray[len],(arraysize-len)*SizeOf(WideChar),0);
{$ifdef RangeCheckWasOn}
{$r+}
{$endif}
end;
{$else ndef FPC_STRTOCHARARRAYPROC}
procedure fpc_widestr_to_chararray(out res: array of char; const src: WideString); compilerproc;
var
len: SizeInt;
temp: ansistring;
begin
len := length(src);
{ make sure we don't dereference src if it can be nil (JM) }
if len > 0 then
widestringmanager.wide2ansimoveproc(pwidechar(@src[1]),temp,len);
len := length(temp);
if len > length(res) then
len := length(res);
{$r-}
move(temp[1],res[0],len);
fillchar(res[len],length(res)-len,0);
{$ifdef RangeCheckWasOn}
{$r+}
{$endif}
end;
procedure fpc_widestr_to_widechararray(out res: array of widechar; const src: WideString); compilerproc;
var
len: SizeInt;
begin
len := length(src);
if len > length(res) then
len := length(res);
{$r-}
{ make sure we don't try to access element 1 of the ansistring if it's nil }
if len > 0 then
move(src[1],res[0],len*SizeOf(WideChar));
fillchar(res[len],(length(res)-len)*SizeOf(WideChar),0);
{$ifdef RangeCheckWasOn}
{$r+}
{$endif}
end;
{$endif ndef FPC_STRTOCHARARRAYPROC}
Function fpc_WideStr_Compare(const S1,S2 : WideString): SizeInt;[Public,Alias : 'FPC_WIDESTR_COMPARE']; compilerproc;
{
Compares 2 WideStrings;
The result is
<0 if S1<S2
0 if S1=S2
>0 if S1>S2
}
Var
MaxI,Temp : SizeInt;
begin
if pointer(S1)=pointer(S2) then
begin
fpc_WideStr_Compare:=0;
exit;
end;
Maxi:=Length(S1);
temp:=Length(S2);
If MaxI>Temp then
MaxI:=Temp;
Temp:=CompareWord(S1[1],S2[1],MaxI);
if temp=0 then
temp:=Length(S1)-Length(S2);
fpc_WideStr_Compare:=Temp;
end;
Function fpc_WideStr_Compare_Equal(const S1,S2 : WideString): SizeInt;[Public,Alias : 'FPC_WIDESTR_COMPARE_EQUAL']; compilerproc;
{
Compares 2 WideStrings for equality only;
The result is
0 if S1=S2
<>0 if S1<>S2
}
Var
MaxI : SizeInt;
begin
if pointer(S1)=pointer(S2) then
exit(0);
Maxi:=Length(S1);
If MaxI<>Length(S2) then
exit(-1)
else
exit(CompareWord(S1[1],S2[1],MaxI));
end;
Procedure fpc_WideStr_CheckZero(p : pointer);[Public,Alias : 'FPC_WIDESTR_CHECKZERO']; compilerproc;
begin
if p=nil then
HandleErrorFrame(201,get_frame);
end;
Procedure fpc_WideStr_CheckRange(len,index : SizeInt);[Public,Alias : 'FPC_WIDESTR_RANGECHECK']; compilerproc;
begin
if (index>len div 2) or (Index<1) then
HandleErrorFrame(201,get_frame);
end;
Procedure fpc_WideStr_SetLength(Var S : WideString; l : SizeInt);[Public,Alias : 'FPC_WIDESTR_SETLENGTH']; compilerproc;
{
Sets The length of string S to L.
Makes sure S is unique, and contains enough room.
}
Var
Temp : Pointer;
movelen: SizeInt;
begin
if (l>0) then
begin
if Pointer(S)=nil then
begin
{ Need a complete new string...}
Pointer(s):=NewWideString(l);
end
{ windows doesn't support reallocing widestrings, this code
is anyways subject to be removed because widestrings shouldn't be
ref. counted anymore (FK) }
else
if
{$ifdef MSWINDOWS}
not winwidestringalloc and
{$endif MSWINDOWS}
not IsWideStringConstant(pointer(S))
then
begin
Dec(Pointer(S),WideFirstOff);
if SizeUInt(L*sizeof(WideChar)+WideRecLen)>MemSize(Pointer(S)) then
reallocmem(pointer(S), L*sizeof(WideChar)+WideRecLen);
Inc(Pointer(S), WideFirstOff);
end
else
begin
{ Reallocation is needed... }
Temp:=Pointer(NewWideString(L));
if Length(S)>0 then
begin
if l < succ(length(s)) then
movelen := l
{ also move terminating null }
else
movelen := succ(length(s));
Move(Pointer(S)^,Temp^,movelen * Sizeof(WideChar));
end;
fpc_widestr_decr_ref(Pointer(S));
Pointer(S):=Temp;
end;
{ Force nil termination in case it gets shorter }
PWord(Pointer(S)+l*sizeof(WideChar))^:=0;
{$ifdef MSWINDOWS}
if not winwidestringalloc then
{$endif MSWINDOWS}
PWideRec(Pointer(S)-WideFirstOff)^.Len:=l*sizeof(WideChar);
end
else
begin
{ Length=0 }
if Pointer(S)<>nil then
fpc_widestr_decr_ref (Pointer(S));
Pointer(S):=Nil;
end;
end;
{*****************************************************************************
Public functions, In interface.
*****************************************************************************}
Function fpc_widestr_Unique(Var S : Pointer): Pointer; [Public,Alias : 'FPC_WIDESTR_UNIQUE']; compilerproc;
begin
pointer(result) := pointer(s);
end;
Function Fpc_WideStr_Copy (Const S : WideString; Index,Size : SizeInt) : WideString;compilerproc;
var
ResultAddress : Pointer;
begin
ResultAddress:=Nil;
dec(index);
if Index < 0 then
Index := 0;
{ Check Size. Accounts for Zero-length S, the double check is needed because
Size can be maxint and will get <0 when adding index }
if (Size>Length(S)) or
(Index+Size>Length(S)) then
Size:=Length(S)-Index;
If Size>0 then
begin
If Index<0 Then
Index:=0;
ResultAddress:=Pointer(NewWideString (Size));
if ResultAddress<>Nil then
begin
Move (PWideChar(S)[Index],ResultAddress^,Size*sizeof(WideChar));
PWideRec(ResultAddress-WideFirstOff)^.Len:=Size*sizeof(WideChar);
PWideChar(ResultAddress+Size*sizeof(WideChar))^:=#0;
end;
end;
fpc_widestr_decr_ref(Pointer(fpc_widestr_copy));
Pointer(fpc_widestr_Copy):=ResultAddress;
end;
Function Pos (Const Substr : WideString; Const Source : WideString) : SizeInt;
var
i,MaxLen : SizeInt;
pc : pwidechar;
begin
Pos:=0;
if Length(SubStr)>0 then
begin
MaxLen:=Length(source)-Length(SubStr);
i:=0;
pc:=@source[1];
while (i<=MaxLen) do
begin
inc(i);
if (SubStr[1]=pc^) and
(CompareWord(Substr[1],pc^,Length(SubStr))=0) then
begin
Pos:=i;
exit;
end;
inc(pc);
end;
end;
end;
{ Faster version for a widechar alone }
Function Pos (c : WideChar; Const s : WideString) : SizeInt;
var
i: SizeInt;
pc : pwidechar;
begin
pc:=@s[1];
for i:=1 to length(s) do
begin
if pc^=c then
begin
pos:=i;
exit;
end;
inc(pc);
end;
pos:=0;
end;
Function Pos (c : WideChar; Const s : AnsiString) : SizeInt;
begin
result:=Pos(c,WideString(s));
end;
Function Pos (c : AnsiString; Const s : WideString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
begin
result:=Pos(WideString(c),s);
end;
Function Pos (c : ShortString; Const s : WideString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
begin
result:=Pos(WideString(c),s);
end;
Function Pos (c : WideString; Const s : AnsiString) : SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
begin
result:=Pos(c,WideString(s));
end;
{ Faster version for a char alone. Must be implemented because }
{ pos(c: char; const s: shortstring) also exists, so otherwise }
{ using pos(char,pchar) will always call the shortstring version }
{ (exact match for first argument), also with $h+ (JM) }
Function Pos (c : Char; Const s : WideString) : SizeInt;
var
i: SizeInt;
wc : widechar;
pc : pwidechar;
begin
wc:=c;
pc:=@s[1];
for i:=1 to length(s) do
begin
if pc^=wc then
begin
pos:=i;
exit;
end;
inc(pc);
end;
pos:=0;
end;
Procedure Delete (Var S : WideString; Index,Size: SizeInt);
Var
LS : SizeInt;
begin
If Length(S)=0 then
exit;
if index<=0 then
exit;
LS:=PWideRec(Pointer(S)-WideFirstOff)^.Len div sizeof(WideChar);
if (Index<=LS) and (Size>0) then
begin
UniqueString (S);
if Size+Index>LS then
Size:=LS-Index+1;
if Index+Size<=LS then
begin
Dec(Index);
Move(PWideChar(S)[Index+Size],PWideChar(S)[Index],(LS-Index-Size+1)*sizeof(WideChar));
end;
Setlength(s,LS-Size);
end;
end;
Procedure Insert (Const Source : WideString; Var S : WideString; Index : SizeInt);
var
Temp : WideString;
LS : SizeInt;
begin
If Length(Source)=0 then
exit;
if index <= 0 then
index := 1;
Ls:=Length(S);
if index > LS then
index := LS+1;
Dec(Index);
Pointer(Temp) := NewWideString(Length(Source)+LS);
SetLength(Temp,Length(Source)+LS);
If Index>0 then
move (PWideChar(S)^,PWideChar(Temp)^,Index*sizeof(WideChar));
Move (PWideChar(Source)^,PWideChar(Temp)[Index],Length(Source)*sizeof(WideChar));
If (LS-Index)>0 then
Move(PWideChar(S)[Index],PWideChar(temp)[Length(Source)+index],(LS-Index)*sizeof(WideChar));
S:=Temp;
end;
function UpCase(const s : WideString) : WideString;
begin
result:=widestringmanager.UpperWideStringProc(s);
end;
Procedure SetString (Out S : WideString; Buf : PWideChar; Len : SizeInt);
begin
SetLength(S,Len);
If (Buf<>Nil) and (Len>0) then
Move (Buf[0],S[1],Len*sizeof(WideChar));
end;
Procedure SetString (Out S : WideString; Buf : PChar; Len : SizeInt);
var
BufLen: SizeInt;
begin
SetLength(S,Len);
If (Buf<>Nil) and (Len>0) then
begin
BufLen := IndexByte(Buf^, Len+1, 0);
If (BufLen>0) and (BufLen < Len) then
Len := BufLen;
widestringmanager.Ansi2WideMoveProc(Buf,S,Len);
//PWideChar(Pointer(S)+Len*sizeof(WideChar))^:=#0;
end;
end;
{$ifndef FPUNONE}
Function fpc_Val_Real_WideStr(Const S : WideString; out Code : ValSInt): ValReal; [public, alias:'FPC_VAL_REAL_WIDESTR']; compilerproc;
Var
SS : String;
begin
fpc_Val_Real_WideStr := 0;
if length(S) > 255 then
code := 256
else
begin
SS := S;
Val(SS,fpc_Val_Real_WideStr,code);
end;
end;
{$endif}
function fpc_val_enum_widestr(str2ordindex:pointer;const s:widestring;out code:valsint):longint;compilerproc;
var ss:shortstring;
begin
if length(s)>255 then
code:=256
else
begin
ss:=s;
val(ss,fpc_val_enum_widestr,code);
end;
end;
Function fpc_Val_Currency_WideStr(Const S : WideString; out Code : ValSInt): Currency; [public, alias:'FPC_VAL_CURRENCY_WIDESTR']; compilerproc;
Var
SS : String;
begin
if length(S) > 255 then
begin
fpc_Val_Currency_WideStr:=0;
code := 256;
end
else
begin
SS := S;
Val(SS,fpc_Val_Currency_WideStr,code);
end;
end;
Function fpc_Val_UInt_WideStr (Const S : WideString; out Code : ValSInt): ValUInt; [public, alias:'FPC_VAL_UINT_WIDESTR']; compilerproc;
Var
SS : ShortString;
begin
fpc_Val_UInt_WideStr := 0;
if length(S) > 255 then
code := 256
else
begin
SS := S;
Val(SS,fpc_Val_UInt_WideStr,code);
end;
end;
Function fpc_Val_SInt_WideStr (DestSize: SizeInt; Const S : WideString; out Code : ValSInt): ValSInt; [public, alias:'FPC_VAL_SINT_WIDESTR']; compilerproc;
Var
SS : ShortString;
begin
fpc_Val_SInt_WideStr:=0;
if length(S)>255 then
code:=256
else
begin
SS := S;
fpc_Val_SInt_WideStr := int_Val_SInt_ShortStr(DestSize,SS,Code);
end;
end;
{$ifndef CPU64}
Function fpc_Val_qword_WideStr (Const S : WideString; out Code : ValSInt): qword; [public, alias:'FPC_VAL_QWORD_WIDESTR']; compilerproc;
Var
SS : ShortString;
begin
fpc_Val_qword_WideStr:=0;
if length(S)>255 then
code:=256
else
begin
SS := S;
Val(SS,fpc_Val_qword_WideStr,Code);
end;
end;
Function fpc_Val_int64_WideStr (Const S : WideString; out Code : ValSInt): Int64; [public, alias:'FPC_VAL_INT64_WIDESTR']; compilerproc;
Var
SS : ShortString;
begin
fpc_Val_int64_WideStr:=0;
if length(S)>255 then
code:=256
else
begin
SS := S;
Val(SS,fpc_Val_int64_WideStr,Code);
end;
end;
{$endif CPU64}
{$ifndef FPUNONE}
procedure fpc_WideStr_Float(d : ValReal;len,fr,rt : SizeInt;out s : WideString);compilerproc;
var
ss : shortstring;
begin
str_real(len,fr,d,treal_type(rt),ss);
s:=ss;
end;
{$endif}
procedure fpc_widestr_enum(ordinal,len:sizeint;typinfo,ord2strindex:pointer;out s:widestring);compilerproc;
var ss:shortstring;
begin
fpc_shortstr_enum(ordinal,len,typinfo,ord2strindex,ss);
s:=ss;
end;
{$ifdef FPC_HAS_STR_CURRENCY}
procedure fpc_WideStr_Currency(c : Currency;len,fr : SizeInt;out s : WideString);compilerproc;
var
ss : shortstring;
begin
str(c:len:fr,ss);
s:=ss;
end;
{$endif FPC_HAS_STR_CURRENCY}
Procedure fpc_WideStr_SInt(v : ValSint; Len : SizeInt; out S : WideString);compilerproc;
Var
SS : ShortString;
begin
Str (v:Len,SS);
S:=SS;
end;
Procedure fpc_WideStr_UInt(v : ValUInt;Len : SizeInt; out S : WideString);compilerproc;
Var
SS : ShortString;
begin
str(v:Len,SS);
S:=SS;
end;
{$ifndef CPU64}
Procedure fpc_WideStr_Int64(v : Int64; Len : SizeInt; out S : WideString);compilerproc;
Var
SS : ShortString;
begin
Str (v:Len,SS);
S:=SS;
end;
Procedure fpc_WideStr_Qword(v : Qword;Len : SizeInt; out S : WideString);compilerproc;
Var
SS : ShortString;
begin
str(v:Len,SS);
S:=SS;
end;
{$endif CPU64}
{ converts an utf-16 code point or surrogate pair to utf-32 }
function utf16toutf32(const S: WideString; const index: SizeInt; out len: longint): UCS4Char; [public, alias: 'FPC_WIDETOUTF32'];
var
w: widechar;
begin
{ UTF-16 points in the range #$0-#$D7FF and #$E000-#$FFFF }
{ are the same in UTF-32 }
w:=s[index];
if (w<=#$d7ff) or
(w>=#$e000) then
begin
result:=UCS4Char(w);
len:=1;
end
{ valid surrogate pair? }
else if (w<=#$dbff) and
{ w>=#$d7ff check not needed, checked above }
(index<length(s)) and
(s[index+1]>=#$dc00) and
(s[index+1]<=#$dfff) then
{ convert the surrogate pair to UTF-32 }
begin
result:=(UCS4Char(w)-$d800) shl 10 + (UCS4Char(s[index+1])-$dc00) + $10000;
len:=2;
end
else
{ invalid surrogate -> do nothing }
begin
result:=UCS4Char(w);
len:=1;
end;
end;
function UnicodeToUtf8(Dest: PChar; Source: PWideChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
begin
if assigned(Source) then
Result:=UnicodeToUtf8(Dest,MaxBytes,Source,IndexWord(Source^,-1,0))
else
Result:=0;
end;
function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PWideChar; SourceChars: SizeUInt): SizeUInt;
var
i,j : SizeUInt;
w : word;
lw : longword;
len : longint;
begin
result:=0;
if source=nil then
exit;
i:=0;
j:=0;
if assigned(Dest) then
begin
while (i<SourceChars) and (j<MaxDestBytes) do
begin
w:=word(Source[i]);
case w of
0..$7f:
begin
Dest[j]:=char(w);
inc(j);
end;
$80..$7ff:
begin
if j+1>=MaxDestBytes then
break;
Dest[j]:=char($c0 or (w shr 6));
Dest[j+1]:=char($80 or (w and $3f));
inc(j,2);
end;
$800..$d7ff,$e000..$ffff:
begin
if j+2>=MaxDestBytes then
break;
Dest[j]:=char($e0 or (w shr 12));
Dest[j+1]:=char($80 or ((w shr 6) and $3f));
Dest[j+2]:=char($80 or (w and $3f));
inc(j,3);
end;
$d800..$dbff:
{High Surrogates}
begin
if j+3>=MaxDestBytes then
break;
if (i<sourcechars-1) and
(word(Source[i+1]) >= $dc00) and
(word(Source[i+1]) <= $dfff) then
begin
lw:=longword(utf16toutf32(Source[i] + Source[i+1], 1, len));
Dest[j]:=char($f0 or (lw shr 18));
Dest[j+1]:=char($80 or ((lw shr 12) and $3f));
Dest[j+2]:=char($80 or ((lw shr 6) and $3f));
Dest[j+3]:=char($80 or (lw and $3f));
inc(j,4);
inc(i);
end;
end;
end;
inc(i);
end;
if j>SizeUInt(MaxDestBytes-1) then
j:=MaxDestBytes-1;
Dest[j]:=#0;
end
else
begin
while i<SourceChars do
begin
case word(Source[i]) of
$0..$7f:
inc(j);
$80..$7ff:
inc(j,2);
$800..$d7ff,$e000..$ffff:
inc(j,3);
$d800..$dbff:
begin
if (i<sourcechars-1) and
(word(Source[i+1]) >= $dc00) and
(word(Source[i+1]) <= $dfff) then
begin
inc(j,4);
inc(i);
end;
end;
end;
inc(i);
end;
end;
result:=j+1;
end;
function Utf8ToUnicode(Dest: PWideChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
begin
if assigned(Source) then
Result:=Utf8ToUnicode(Dest,MaxChars,Source,strlen(Source))
else
Result:=0;
end;
function UTF8ToUnicode(Dest: PWideChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
const
UNICODE_INVALID=63;
var
InputUTF8: SizeUInt;
IBYTE: BYTE;
OutputUnicode: SizeUInt;
PRECHAR: SizeUInt;
TempBYTE: BYTE;
CharLen: SizeUint;
LookAhead: SizeUInt;
UC: SizeUInt;
begin
if not assigned(Source) then
begin
result:=0;
exit;
end;
result:=SizeUInt(-1);
InputUTF8:=0;
OutputUnicode:=0;
PreChar:=0;
if Assigned(Dest) Then
begin
while (OutputUnicode<MaxDestChars) and (InputUTF8<SourceBytes) do
begin
IBYTE:=byte(Source[InputUTF8]);
if (IBYTE and $80) = 0 then
begin
//One character US-ASCII, convert it to unicode
if IBYTE = 10 then
begin
If (PreChar<>13) and FALSE then
begin
//Expand to crlf, conform UTF-8.
//This procedure will break the memory alocation by
//FPC for the widestring, so never use it. Condition never true due the "and FALSE".
if OutputUnicode+1<MaxDestChars then
begin
Dest[OutputUnicode]:=WideChar(13);
inc(OutputUnicode);
Dest[OutputUnicode]:=WideChar(10);
inc(OutputUnicode);
PreChar:=10;
end
else
begin
Dest[OutputUnicode]:=WideChar(13);
inc(OutputUnicode);
end;
end
else
begin
Dest[OutputUnicode]:=WideChar(IBYTE);
inc(OutputUnicode);
PreChar:=IBYTE;
end;
end
else
begin
Dest[OutputUnicode]:=WideChar(IBYTE);
inc(OutputUnicode);
PreChar:=IBYTE;
end;
inc(InputUTF8);
end
else
begin
TempByte:=IBYTE;
CharLen:=0;
while (TempBYTE and $80)<>0 do
begin
TempBYTE:=(TempBYTE shl 1) and $FE;
inc(CharLen);
end;
//Test for the "CharLen" conforms UTF-8 string
//This means the 10xxxxxx pattern.
if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
begin
//Insuficient chars in string to decode
//UTF-8 array. Fallback to single char.
CharLen:= 1;
end;
for LookAhead := 1 to CharLen-1 do
begin
if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
begin
//Invalid UTF-8 sequence, fallback.
CharLen:= LookAhead;
break;
end;
end;
UC:=$FFFF;
case CharLen of
1: begin
//Not valid UTF-8 sequence
UC:=UNICODE_INVALID;
end;
2: begin
//Two bytes UTF, convert it
UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
if UC <= $7F then
begin
//Invalid UTF sequence.
UC:=UNICODE_INVALID;
end;
end;
3: begin
//Three bytes, convert it to unicode
UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
if (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
begin
//Invalid UTF-8 sequence
UC:= UNICODE_INVALID;
End;
end;
4: begin
//Four bytes, convert it to two unicode characters
UC:= (byte(Source[InputUTF8]) and $07) shl 18;
UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
if (UC < $10000) or (UC > $10FFFF) then
begin
UC:= UNICODE_INVALID;
end
else
begin
{ only store pair if room }
dec(UC,$10000);
if (OutputUnicode<MaxDestChars-1) then
begin
Dest[OutputUnicode]:=WideChar(UC shr 10 + $D800);
inc(OutputUnicode);
UC:=(UC and $3ff) + $DC00;
end
else
begin
InputUTF8:= InputUTF8 + CharLen;
{ don't store anything }
CharLen:=0;
end;
end;
end;
5,6,7: begin
//Invalid UTF8 to unicode conversion,
//mask it as invalid UNICODE too.
UC:=UNICODE_INVALID;
end;
end;
if CharLen > 0 then
begin
PreChar:=UC;
Dest[OutputUnicode]:=WideChar(UC);
inc(OutputUnicode);
end;
InputUTF8:= InputUTF8 + CharLen;
end;
end;
Result:=OutputUnicode+1;
end
else
begin
while (InputUTF8<SourceBytes) do
begin
IBYTE:=byte(Source[InputUTF8]);
if (IBYTE and $80) = 0 then
begin
//One character US-ASCII, convert it to unicode
if IBYTE = 10 then
begin
if (PreChar<>13) and FALSE then
begin
//Expand to crlf, conform UTF-8.
//This procedure will break the memory alocation by
//FPC for the widestring, so never use it. Condition never true due the "and FALSE".
inc(OutputUnicode,2);
PreChar:=10;
end
else
begin
inc(OutputUnicode);
PreChar:=IBYTE;
end;
end
else
begin
inc(OutputUnicode);
PreChar:=IBYTE;
end;
inc(InputUTF8);
end
else
begin
TempByte:=IBYTE;
CharLen:=0;
while (TempBYTE and $80)<>0 do
begin
TempBYTE:=(TempBYTE shl 1) and $FE;
inc(CharLen);
end;
//Test for the "CharLen" conforms UTF-8 string
//This means the 10xxxxxx pattern.
if SizeUInt(InputUTF8+CharLen-1)>SourceBytes then
begin
//Insuficient chars in string to decode
//UTF-8 array. Fallback to single char.
CharLen:= 1;
end;
for LookAhead := 1 to CharLen-1 do
begin
if ((byte(Source[InputUTF8+LookAhead]) and $80)<>$80) or
((byte(Source[InputUTF8+LookAhead]) and $40)<>$00) then
begin
//Invalid UTF-8 sequence, fallback.
CharLen:= LookAhead;
break;
end;
end;
UC:=$FFFF;
case CharLen of
1: begin
//Not valid UTF-8 sequence
UC:=UNICODE_INVALID;
end;
2: begin
//Two bytes UTF, convert it
UC:=(byte(Source[InputUTF8]) and $1F) shl 6;
UC:=UC or (byte(Source[InputUTF8+1]) and $3F);
if UC <= $7F then
begin
//Invalid UTF sequence.
UC:=UNICODE_INVALID;
end;
end;
3: begin
//Three bytes, convert it to unicode
UC:= (byte(Source[InputUTF8]) and $0F) shl 12;
UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 6);
UC:= UC or ((byte(Source[InputUTF8+2]) and $3F));
If (UC <= $7FF) or (UC >= $FFFE) or ((UC >= $D800) and (UC <= $DFFF)) then
begin
//Invalid UTF-8 sequence
UC:= UNICODE_INVALID;
end;
end;
4: begin
//Four bytes, convert it to two unicode characters
UC:= (byte(Source[InputUTF8]) and $07) shl 18;
UC:= UC or ((byte(Source[InputUTF8+1]) and $3F) shl 12);
UC:= UC or ((byte(Source[InputUTF8+2]) and $3F) shl 6);
UC:= UC or ((byte(Source[InputUTF8+3]) and $3F));
if (UC < $10000) or (UC > $10FFFF) then
UC:= UNICODE_INVALID
else
{ extra character character }
inc(OutputUnicode);
end;
5,6,7: begin
//Invalid UTF8 to unicode conversion,
//mask it as invalid UNICODE too.
UC:=UNICODE_INVALID;
end;
end;
if CharLen > 0 then
begin
PreChar:=UC;
inc(OutputUnicode);
end;
InputUTF8:= InputUTF8 + CharLen;
end;
end;
Result:=OutputUnicode+1;
end;
end;
function UTF8Encode(const s : WideString) : UTF8String;
var
i : SizeInt;
hs : UTF8String;
begin
result:='';
if s='' then
exit;
SetLength(hs,length(s)*3);
i:=UnicodeToUtf8(pchar(hs),length(hs)+1,PWideChar(s),length(s));
if i>0 then
begin
SetLength(hs,i-1);
result:=hs;
end;
end;
const
SNoWidestrings = 'This binary has no widestrings support compiled in.';
SRecompileWithWidestrings = 'Recompile the application with a widestrings-manager in the program uses clause.';
procedure unimplementedwidestring;
begin
{$ifdef FPC_HAS_FEATURE_CONSOLEIO}
If IsConsole then
begin
Writeln(StdErr,SNoWidestrings);
Writeln(StdErr,SRecompileWithWidestrings);
end;
{$endif FPC_HAS_FEATURE_CONSOLEIO}
HandleErrorFrame(233,get_frame);
end;
{$warnings off}
function GenericWideCase(const s : WideString) : WideString;
begin
unimplementedwidestring;
end;
function CompareWideString(const s1, s2 : WideString) : PtrInt;
begin
unimplementedwidestring;
end;
function CompareTextWideString(const s1, s2 : WideString): PtrInt;
begin
unimplementedwidestring;
end;
{$warnings on}
function CharLengthPChar(const Str: PChar): PtrInt;forward;
procedure initwidestringmanager;
begin
fillchar(widestringmanager,sizeof(widestringmanager),0);
{$ifndef HAS_WIDESTRINGMANAGER}
widestringmanager.Wide2AnsiMoveProc:=@defaultWide2AnsiMove;
widestringmanager.Ansi2WideMoveProc:=@defaultAnsi2WideMove;
widestringmanager.UpperWideStringProc:=@GenericWideCase;
widestringmanager.LowerWideStringProc:=@GenericWideCase;
{$endif HAS_WIDESTRINGMANAGER}
widestringmanager.CompareWideStringProc:=@CompareWideString;
widestringmanager.CompareTextWideStringProc:=@CompareTextWideString;
widestringmanager.CharLengthPCharProc:=@CharLengthPChar;
end;