merge r13481 from cpstrnew branch by florian

+ support parsing of strings with code page specification
+ added encoding and elementsize field to ansi- and unicodestring records
+ some basic rtl support routines for encoding aware strings
+ DefaultSystemCodePage
+ DefaultUnicodeCodePage
+ ppu writing/loading of code page aware strings

git-svn-id: trunk@19080 -
This commit is contained in:
paul 2011-09-17 10:37:36 +00:00
parent a95512f0ec
commit 8a4634a7b1
12 changed files with 479 additions and 359 deletions

View File

@ -357,7 +357,7 @@ implementation
(tstringdef(def_from).len=tstringdef(def_to).len)) and
{ for ansi- and unicodestrings also the encoding must match }
(not(tstringdef(def_from).stringtype in [st_ansistring,st_unicodestring]) or
(tstringdef(def_from).stringencoding=tstringdef(def_to).stringencoding))then
(tstringdef(def_from).encoding=tstringdef(def_to).encoding))then
eq:=te_equal
else
begin

View File

@ -102,6 +102,9 @@ interface
MathPiExtended : textendedrec = (bytes : (64,0,201,15,218,162,33,104,194,53));
{$endif FPC_LITTLE_ENDIAN}
{$endif}
CP_UTF8 = 65001;
CP_UTF16 = 1200;
type
tcodepagestring = string[20];

View File

@ -513,7 +513,7 @@ interface
end;
{$endif}
tstringencoding = dword;
tstringencoding = word;
const
{ link options }

View File

@ -375,7 +375,7 @@ scanner_e_illegal_alignment_directive=02088_E_Illegal alignment directive
#
# Parser
#
# 03310 is the last used one
# 03314 is the last used one
#
% \section{Parser messages}
% This section lists all parser messages. The parser takes care of the
@ -1400,6 +1400,9 @@ parser_e_mapping_no_implements=03312_E_Interface "$1" can't be delegated by "$2"
% has to implement the interface directly. Delegation is not possible.
parser_e_implements_no_mapping=03313_E_Interface "$1" can't have method resolutions, "$2" already delegates it
% Method resoulution is only possible for interfaces that are implemented directly, not by delegation.
parser_e_invalid_codepage=03314_E_Invalid codepage
% When declaring a string with a given codepage, the range of valid codepages values is limited
% to 0 to 65535.
% \end{description}
# Type Checking
#

View File

@ -405,6 +405,7 @@ const
parser_e_duplicate_implements_clause=03311;
parser_e_mapping_no_implements=03312;
parser_e_implements_no_mapping=03313;
parser_e_invalid_codepage=03314;
type_e_mismatch=04000;
type_e_incompatible_types=04001;
type_e_not_equal_types=04002;
@ -903,9 +904,9 @@ const
option_info=11024;
option_help_pages=11025;
MsgTxtSize = 61274;
MsgTxtSize = 61299;
MsgIdxMax : array[1..20] of longint=(
26,89,314,104,85,54,112,23,202,63,
26,89,315,104,85,54,112,23,202,63,
50,20,1,1,1,1,1,1,1,1
);

File diff suppressed because it is too large Load Diff

View File

@ -96,51 +96,76 @@ implementation
begin
def:=cshortstringtype;
consume(_STRING);
if (token=_LECKKLAMMER) then
if token=_LECKKLAMMER then
begin
if not(allowtypedef) then
Message(parser_e_no_local_para_def);
consume(_LECKKLAMMER);
p:=comp_expr(true,false);
if not is_constintnode(p) then
begin
Message(parser_e_illegal_expression);
{ error recovery }
consume(_RECKKLAMMER);
end
else
begin
if (tordconstnode(p).value<=0) then
begin
Message(parser_e_invalid_string_size);
tordconstnode(p).value:=255;
end;
if not(allowtypedef) then
Message(parser_e_no_local_para_def);
consume(_LECKKLAMMER);
p:=comp_expr(true,false);
if not is_constintnode(p) then
begin
Message(parser_e_illegal_expression);
{ error recovery }
consume(_RECKKLAMMER);
if tordconstnode(p).value>255 then
end
else
begin
if (tordconstnode(p).value<=0) then
begin
{ longstring is currently unsupported (CEC)! }
{ t:=tstringdef.createlong(tordconstnode(p).value))}
Message(parser_e_invalid_string_size);
tordconstnode(p).value:=255;
def:=tstringdef.createshort(int64(tordconstnode(p).value));
end;
if tordconstnode(p).value>255 then
begin
{ longstring is currently unsupported (CEC)! }
{ t:=tstringdef.createlong(tordconstnode(p).value))}
Message(parser_e_invalid_string_size);
tordconstnode(p).value:=255;
def:=tstringdef.createshort(int64(tordconstnode(p).value));
end
else
if tordconstnode(p).value<>255 then
def:=tstringdef.createshort(int64(tordconstnode(p).value));
end;
p.free;
else
if tordconstnode(p).value<>255 then
def:=tstringdef.createshort(int64(tordconstnode(p).value));
consume(_RECKKLAMMER);
end;
p.free;
end
else if try_to_consume(_GT) then
begin
consume(_LT);
end
else
begin
if cs_ansistrings in current_settings.localswitches then
def:=cansistringtype
else
def:=cshortstringtype;
end;
else if token=_LSHARPBRACKET then
begin
if not(allowtypedef) then
Message(parser_e_no_local_para_def);
consume(_LSHARPBRACKET);
p:=comp_expr(true,false);
if not is_constintnode(p) then
begin
Message(parser_e_illegal_expression);
{ error recovery }
end
else
begin
if (tordconstnode(p).value<0) or (tordconstnode(p).value>65535) then
begin
Message(parser_e_invalid_codepage);
tordconstnode(p).value:=0;
end;
if tordconstnode(p).value=CP_UTF16 then
def:=tstringdef.createunicode
else
begin
def:=tstringdef.createansi;
tstringdef(def).encoding:=int64(tordconstnode(p).value);
end;
consume(_RSHARPBRACKET);
end;
p.free;
end
else
begin
if cs_ansistrings in current_settings.localswitches then
def:=cansistringtype
else
def:=cshortstringtype;
end;
end;

View File

@ -1457,6 +1457,7 @@ implementation
inherited ppuload(stringdef,ppufile);
stringtype:=st_ansistring;
len:=ppufile.getaint;
encoding:=ppufile.getword;
savesize:=sizeof(pint);
end;
@ -1483,6 +1484,7 @@ implementation
begin
inherited create(stringdef);
stringtype:=st_unicodestring;
encoding:=CP_UTF16;
len:=-1;
savesize:=sizeof(pint);
end;
@ -1493,6 +1495,7 @@ implementation
inherited ppuload(stringdef,ppufile);
stringtype:=st_unicodestring;
len:=ppufile.getaint;
encoding:=ppufile.getword;
savesize:=sizeof(pint);
end;
@ -1502,6 +1505,7 @@ implementation
result:=tstringdef.create(typ);
result.typ:=stringdef;
tstringdef(result).stringtype:=stringtype;
tstringdef(result).encoding:=encoding;
tstringdef(result).len:=len;
tstringdef(result).savesize:=savesize;
end;
@ -1529,6 +1533,8 @@ implementation
end
else
ppufile.putaint(len);
if stringtype in [st_ansistring,st_unicodestring] then
ppufile.putword(encoding);
case stringtype of
st_shortstring : ppufile.writeentry(ibshortstringdef);
st_longstring : ppufile.writeentry(iblongstringdef);

View File

@ -36,22 +36,26 @@
Type
PAnsiRec = ^TAnsiRec;
TAnsiRec = Packed Record
Ref,
Len : SizeInt;
First : Char;
CodePage : TSystemCodePage;
ElementSize : Word;
{$ifdef CPU64}
{ align fields }
Dummy : DWord;
{$endif CPU64}
Ref : SizeInt;
Len : SizeInt;
First : Char;
end;
Const
AnsiRecLen = SizeOf(TAnsiRec);
FirstOff = SizeOf(TAnsiRec)-1;
AnsiFirstOff = SizeOf(TAnsiRec)-1;
{****************************************************************************
Internal functions, not in interface.
****************************************************************************}
Function NewAnsiString(Len : SizeInt) : Pointer;
{
Allocate a new AnsiString on the heap.
@ -66,8 +70,10 @@ begin
begin
PAnsiRec(P)^.Ref:=1; { Set reference count }
PAnsiRec(P)^.Len:=0; { Initial length }
PAnsiRec(P)^.CodePage:=DefaultSystemCodePage;
PAnsiRec(P)^.ElementSize:=SizeOf(AnsiChar);
PAnsiRec(P)^.First:=#0; { Terminating #0 }
inc(p,firstoff); { Points to string now }
inc(p,AnsiFirstOff); { Points to string now }
end;
NewAnsiString:=P;
end;
@ -80,7 +86,7 @@ Procedure DisposeAnsiString(Var S : Pointer); {$IFNDEF VER2_0} Inline; {$ENDIF}
begin
If S=Nil then
exit;
Dec (S,FirstOff);
Dec (S,AnsiFirstOff);
FreeMem (S);
S:=Nil;
end;
@ -99,7 +105,7 @@ Begin
{ Zero string }
If S=Nil then exit;
{ check for constant strings ...}
l:=@PAnsiRec(S-FirstOff)^.Ref;
l:=@PAnsiRec(S-AnsiFirstOff)^.Ref;
If l^<0 then exit;
{ declocked does a MT safe dec and returns true, if the counter is 0 }
If declocked(l^) then
@ -117,8 +123,8 @@ Begin
If S=Nil then
exit;
{ Let's be paranoid : Constant string ??}
If PAnsiRec(S-FirstOff)^.Ref<0 then exit;
inclocked(PAnsiRec(S-FirstOff)^.Ref);
If PAnsiRec(S-AnsiFirstOff)^.Ref<0 then exit;
inclocked(PAnsiRec(S-AnsiFirstOff)^.Ref);
end;
@ -133,8 +139,8 @@ begin
if DestS=S2 then
exit;
If S2<>nil then
If PAnsiRec(S2-FirstOff)^.Ref>0 then
inclocked(PAnsiRec(S2-FirstOff)^.ref);
If PAnsiRec(S2-AnsiFirstOff)^.Ref>0 then
inclocked(PAnsiRec(S2-AnsiFirstOff)^.Ref);
{ Decrease the reference count on the old S1 }
fpc_ansistr_decr_ref (DestS);
{ And finally, have DestS pointing to S2 (or its copy) }
@ -576,17 +582,17 @@ begin
begin
GetMem(Pointer(S),AnsiRecLen+L);
PAnsiRec(S)^.Ref:=1;
inc(Pointer(S),firstoff);
inc(Pointer(S),AnsiFirstOff);
end
else if PAnsiRec(Pointer(S)-FirstOff)^.Ref=1 then
else if PAnsiRec(Pointer(S)-AnsiFirstOff)^.Ref=1 then
begin
Dec(Pointer(S),FirstOff);
Dec(Pointer(S),AnsiFirstOff);
lens:=MemSize(Pointer(s));
lena:=AnsiRecLen+L;
{ allow shrinking string if that saves at least half of current size }
if (lena>lens) or ((lens>32) and (lena<=(lens div 2))) then
reallocmem(pointer(S),AnsiRecLen+L);
Inc(Pointer(S),FirstOff);
Inc(Pointer(S),AnsiFirstOff);
end
else
begin
@ -601,14 +607,14 @@ begin
movelen := lens;
Move(Pointer(S)^,Temp^,movelen);
{ ref count dropped to zero in the mean time? }
If (PAnsiRec(Pointer(S)-FirstOff)^.Ref > 0) and
declocked(PAnsiRec(Pointer(S)-FirstOff)^.Ref) then
freemem(PAnsiRec(Pointer(s)-FirstOff));
If (PAnsiRec(Pointer(S)-AnsiFirstOff)^.Ref > 0) and
declocked(PAnsiRec(Pointer(S)-AnsiFirstOff)^.Ref) then
freemem(PAnsiRec(Pointer(s)-AnsiFirstOff));
Pointer(S):=Temp;
end;
{ Force nil termination in case it gets shorter }
PByte(Pointer(S)+l)^:=0;
PAnsiRec(Pointer(S)-FirstOff)^.Len:=l;
PAnsiRec(Pointer(S)-AnsiFirstOff)^.Len:=l;
end
else
begin
@ -655,10 +661,10 @@ Var
SNew : Pointer;
L : SizeInt;
begin
L:=PAnsiRec(Pointer(S)-FirstOff)^.len;
L:=PAnsiRec(Pointer(S)-AnsiFirstOff)^.len;
SNew:=NewAnsiString (L);
Move (Pointer(S)^,SNew^,L+1);
PAnsiRec(SNew-FirstOff)^.len:=L;
PAnsiRec(SNew-AnsiFirstOff)^.len:=L;
fpc_ansistr_decr_ref (Pointer(S)); { Thread safe }
pointer(S):=SNew;
pointer(result):=SNew;
@ -677,7 +683,7 @@ begin
pointer(result) := pointer(s);
If Pointer(S)=Nil then
exit;
if PAnsiRec(Pointer(S)-Firstoff)^.Ref<>1 then
if PAnsiRec(Pointer(S)-AnsiFirstOff)^.Ref<>1 then
result:=fpc_truely_ansistr_unique(s);
end;
{$endif FPC_SYSTEM_HAS_ANSISTR_UNIQUE}
@ -745,7 +751,7 @@ begin
if ResultAddress<>Nil then
begin
Move (Pointer(Pointer(S)+index)^,ResultAddress^,Size);
PAnsiRec(ResultAddress-FirstOff)^.Len:=Size;
PAnsiRec(ResultAddress-AnsiFirstOff)^.Len:=Size;
PByte(ResultAddress+Size)^:=0;
end;
end;
@ -1094,3 +1100,31 @@ begin
for i := 1 to length (s) do
result[i] := lowercase(s[i]);
end;
function StringCodePage(const S: RawByteString): TSystemCodePage; overload;
begin
if assigned(S) then
Result:=PAnsiRec(pointer(S)-AnsiFirstOff)^.CodePage
else
Result:=SizeOf(AnsiChar);
end;
function StringElementSize(const S: RawByteString): Word; overload;
begin
if assigned(S) then
Result:=PAnsiRec(pointer(S)-AnsiFirstOff)^.ElementSize
else
Result:=SizeOf(AnsiChar);
end;
function StringRefCount(const S: RawByteString): SizeInt; overload;
begin
if assigned(S) then
Result:=PAnsiRec(pointer(S)-AnsiFirstOff)^.Ref
else
Result:=SizeOf(AnsiChar);
end;

View File

@ -290,9 +290,11 @@ Type
PUCS4CharArray = ^TUCS4CharArray;
UCS4String = array of UCS4Char;
UTF8String = type ansistring;
UTF8String = String<65001>;
PUTF8String = ^UTF8String;
RawByteString = String<$ffff>;
HRESULT = type Longint;
{$ifndef FPUNONE}
TDateTime = type Double;
@ -361,6 +363,8 @@ Type
PUnicodeChar = ^UnicodeChar;
PUnicodeString = ^UnicodeString;
TSystemCodePage = Word;
{ Needed for fpc_get_output }
PText = ^Text;
@ -483,6 +487,11 @@ var
{ Threading support }
fpc_threadvar_relocate_proc : pointer; public name 'FPC_THREADVAR_RELOCATE';
DefaultSystemCodePage,
DefaultUnicodeCodePage,
UTF8CompareLocale : TSystemCodePage;
{$ifndef HAS_CMDLINE}
{Value should be changed during system initialization as appropriate.}
var cmdline:Pchar=nil;
@ -871,6 +880,10 @@ Procedure Delete (var S : AnsiString; Index,Size: SizeInt);
Function StringOfChar(c : char;l : SizeInt) : AnsiString;
function upcase(const s : ansistring) : ansistring;
function lowercase(const s : ansistring) : ansistring;
function StringCodePage(const S : RawByteString): Word; overload;
function StringElementSize(const S : RawByteString): Word; overload;
function StringRefCount(const S : RawByteString): SizeInt; overload;
{$endif FPC_HAS_FEATURE_ANSISTRINGS}

View File

@ -125,4 +125,6 @@ Procedure GetUnicodeStringManager (Var Manager : TUnicodeStringManager);
Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager);
Procedure SetUnicodeStringManager (Const New : TUnicodeStringManager; Var Old: TUnicodeStringManager);
function StringElementSize(const S : UnicodeString): Word; overload;
function StringRefCount(const S : UnicodeString): SizeInt; overload;
function StringCodePage(const S : UnicodeString): Word; overload;

View File

@ -37,9 +37,15 @@
Type
PUnicodeRec = ^TUnicodeRec;
TUnicodeRec = Packed Record
Ref : SizeInt;
Len : SizeInt;
First : UnicodeChar;
CodePage : TSystemCodePage;
ElementSize : Word;
{$ifdef CPU64}
{ align fields }
Dummy : DWord;
{$endif CPU64}
Ref : SizeInt;
Len : SizeInt;
First : UnicodeChar;
end;
Const
@ -139,11 +145,12 @@ Procedure SetWideStringManager (Const New : TUnicodeStringManager);
begin
widestringmanager:=New;
end;
{****************************************************************************
{****************************************************************************
Internal functions, not in interface.
****************************************************************************}
procedure UnicodeStringError;
begin
HandleErrorFrame(204,get_frame);
@ -178,10 +185,12 @@ begin
GetMem(P,Len*sizeof(UnicodeChar)+UnicodeRecLen);
If P<>Nil then
begin
PUnicodeRec(P)^.Len:=Len*2; { Initial length }
PUnicodeRec(P)^.Ref:=1; { Initial Refcount }
PUnicodeRec(P)^.First:=#0; { Terminating #0 }
inc(p,UnicodeFirstOff); { Points to string now }
PUnicodeRec(P)^.Len:=Len*2; { Initial length }
PUnicodeRec(P)^.Ref:=1; { Initial Refcount }
PUnicodeRec(P)^.CodePage:=DefaultUnicodeCodePage;
PUnicodeRec(P)^.ElementSize:=SizeOf(UnicodeChar);
PUnicodeRec(P)^.First:=#0; { Terminating #0 }
inc(p,UnicodeFirstOff); { Points to string now }
end
else
UnicodeStringError;
@ -1338,20 +1347,17 @@ begin
if (l>0) then
begin
if Pointer(S)=nil then
begin
{ Need a complete new string...}
Pointer(s):=NewUnicodeString(l);
end
{ windows doesn't support reallocing unicodestrings, this code
is anyways subject to be removed because unicodestrings shouldn't be
ref. counted anymore (FK) }
begin
{ Need a complete new string...}
Pointer(s):=NewUnicodeString(l);
end
else
if (PUnicodeRec(Pointer(S)-UnicodeFirstOff)^.Ref = 1) then
begin
Dec(Pointer(S),UnicodeFirstOff);
if SizeUInt(L*sizeof(UnicodeChar)+UnicodeRecLen)>MemSize(Pointer(S)) then
begin
Dec(Pointer(S),UnicodeFirstOff);
if SizeUInt(L*sizeof(UnicodeChar)+UnicodeRecLen)>MemSize(Pointer(S)) then
reallocmem(pointer(S), L*sizeof(UnicodeChar)+UnicodeRecLen);
Inc(Pointer(S), UnicodeFirstOff);
Inc(Pointer(S), UnicodeFirstOff);
end
else
begin
@ -1612,7 +1618,6 @@ begin
end;
Procedure Delete (Var S : UnicodeString; Index,Size: SizeInt);
Var
LS : SizeInt;
@ -2522,6 +2527,34 @@ procedure unimplementedunicodestring;
HandleErrorFrame(233,get_frame);
end;
function StringElementSize(const S: UnicodeString): Word; overload;
begin
if assigned(S) then
Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.ElementSize
else
Result:=SizeOf(UnicodeChar);
end;
function StringRefCount(const S: UnicodeString): SizeInt; overload;
begin
if assigned(S) then
Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.Ref
else
Result:=SizeOf(UnicodeChar);
end;
function StringCodePage(const S: UnicodeString): TSystemCodePage; overload;
begin
if assigned(S) then
Result:=PUnicodeRec(pointer(S)-UnicodeFirstOff)^.CodePage
else
Result:=SizeOf(UnicodeChar);
end;
{$warnings off}
function GenericUnicodeCase(const s : UnicodeString) : UnicodeString;
begin