compiler:

- add helper function getansistringcodepage which returns explicitly set codepage or 0 in other case
  - add helper function getansistringdef which return a def with explicitly set codepage or cansistringtype in other case
  - change tstoreddef.createnai constructor to allow set codepage in constructor
  - don't convert string constants to rawbytestring. if string constant already has a codepage - preserve it or convert to ansistring codepage (delphi compatible)
  - don't perform string conversion from ansistring to strings with explicitly set codepage (by directive or by compiler switch) and vice versa (delphi compatible)
  + test which covers most of the cases

git-svn-id: trunk@19510 -
This commit is contained in:
paul 2011-10-19 02:45:52 +00:00
parent 212b14a46b
commit c6ca9e5091
18 changed files with 161 additions and 36 deletions

1
.gitattributes vendored
View File

@ -9978,6 +9978,7 @@ tests/test/tcpstr13.pp svneol=native#text/pascal
tests/test/tcpstr14.pp svneol=native#text/pascal tests/test/tcpstr14.pp svneol=native#text/pascal
tests/test/tcpstr15.pp svneol=native#text/pascal tests/test/tcpstr15.pp svneol=native#text/pascal
tests/test/tcpstr16.pp svneol=native#text/pascal tests/test/tcpstr16.pp svneol=native#text/pascal
tests/test/tcpstr17.pp svneol=native#text/pascal
tests/test/tcpstr2.pp svneol=native#text/plain tests/test/tcpstr2.pp svneol=native#text/plain
tests/test/tcpstr2a.pp svneol=native#text/plain tests/test/tcpstr2a.pp svneol=native#text/plain
tests/test/tcpstr3.pp svneol=native#text/plain tests/test/tcpstr3.pp svneol=native#text/plain

View File

@ -150,7 +150,7 @@ uses
make_mangledname('RESSTR',current_module.localsymtable,'START'),AT_DATA,0)); make_mangledname('RESSTR',current_module.localsymtable,'START'),AT_DATA,0));
{ Write unitname entry } { Write unitname entry }
namelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],@current_module.localsymtable.name^[1],length(current_module.localsymtable.name^),tstringdef(cansistringtype).encoding,False); namelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],@current_module.localsymtable.name^[1],length(current_module.localsymtable.name^),getansistringcodepage,False);
current_asmdata.asmlists[al_resourcestrings].concat(tai_const.create_sym(namelab)); current_asmdata.asmlists[al_resourcestrings].concat(tai_const.create_sym(namelab));
current_asmdata.asmlists[al_resourcestrings].concat(tai_const.create_sym(nil)); current_asmdata.asmlists[al_resourcestrings].concat(tai_const.create_sym(nil));
current_asmdata.asmlists[al_resourcestrings].concat(tai_const.create_sym(nil)); current_asmdata.asmlists[al_resourcestrings].concat(tai_const.create_sym(nil));
@ -166,12 +166,12 @@ uses
new_section(current_asmdata.asmlists[al_const],sec_rodata,make_mangledname('RESSTR',current_module.localsymtable,'d_'+r.name),sizeof(pint)); new_section(current_asmdata.asmlists[al_const],sec_rodata,make_mangledname('RESSTR',current_module.localsymtable,'d_'+r.name),sizeof(pint));
{ Write default value } { Write default value }
if assigned(R.value) and (R.len<>0) then if assigned(R.value) and (R.len<>0) then
valuelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],R.Value,R.Len,tstringdef(cansistringtype).encoding,False) valuelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],R.Value,R.Len,getansistringcodepage,False)
else else
valuelab:=nil; valuelab:=nil;
{ Append the name as a ansistring. } { Append the name as a ansistring. }
current_asmdata.asmlists[al_const].concat(cai_align.Create(const_align(sizeof(pint)))); current_asmdata.asmlists[al_const].concat(cai_align.Create(const_align(sizeof(pint))));
namelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],@R.Name[1],length(R.name),tstringdef(cansistringtype).encoding,False); namelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],@R.Name[1],length(R.name),getansistringcodepage,False);
{ {
Resourcestring index: Resourcestring index:

View File

@ -366,10 +366,15 @@ implementation
else if (tstringdef(def_from).stringtype=tstringdef(def_to).stringtype) and else if (tstringdef(def_from).stringtype=tstringdef(def_to).stringtype) and
(tstringdef(def_from).stringtype=st_ansistring) then (tstringdef(def_from).stringtype=st_ansistring) then
begin begin
{ don't convert ansistrings if any conditions is true:
1) same encoding
2) from explicit codepage ansistring to ansistring and vice versa
3) from any ansistring to rawbytestring }
if (tstringdef(def_from).encoding=tstringdef(def_to).encoding) or if (tstringdef(def_from).encoding=tstringdef(def_to).encoding) or
((tstringdef(def_to).encoding=0) and (tstringdef(def_from).encoding=getansistringcodepage)) or
((tstringdef(def_to).encoding=getansistringcodepage) and (tstringdef(def_from).encoding=0)) or
(tstringdef(def_to).encoding=globals.CP_NONE) then (tstringdef(def_to).encoding=globals.CP_NONE) then
begin begin
//doconv := tc_string_2_string;
eq:=te_equal; eq:=te_equal;
end end
else else

View File

@ -169,6 +169,9 @@ interface
{# Returns true if p is an ansi string type } {# Returns true if p is an ansi string type }
function is_ansistring(p : tdef) : boolean; function is_ansistring(p : tdef) : boolean;
{# Returns true if p is an ansi string type with codepage 0 }
function is_rawbytestring(p : tdef) : boolean;
{# Returns true if p is a long string type } {# Returns true if p is a long string type }
function is_longstring(p : tdef) : boolean; function is_longstring(p : tdef) : boolean;
@ -617,6 +620,14 @@ implementation
(tstringdef(p).stringtype=st_ansistring); (tstringdef(p).stringtype=st_ansistring);
end; end;
{ true if p is an ansi string def with codepage CP_NONE }
function is_rawbytestring(p : tdef) : boolean;
begin
is_rawbytestring:=(p.typ=stringdef) and
(tstringdef(p).stringtype=st_ansistring) and
(tstringdef(p).encoding=globals.CP_NONE);
end;
{ true if p is an long string def } { true if p is an long string def }
function is_longstring(p : tdef) : boolean; function is_longstring(p : tdef) : boolean;
begin begin

View File

@ -143,6 +143,7 @@ interface
checkforwarddefs, checkforwarddefs,
deflist, deflist,
symlist : TFPObjectList; symlist : TFPObjectList;
ansistrdef : tobject; { an ansistring def redefined for the current module }
wpoinfo : tunitwpoinfobase; { whole program optimization-related information that is generated during the current run for this unit } wpoinfo : tunitwpoinfobase; { whole program optimization-related information that is generated during the current run for this unit }
globalsymtable, { pointer to the global symtable of this unit } globalsymtable, { pointer to the global symtable of this unit }
localsymtable : TSymtable;{ pointer to the local symtable of this unit } localsymtable : TSymtable;{ pointer to the local symtable of this unit }
@ -523,6 +524,7 @@ implementation
derefdataintflen:=0; derefdataintflen:=0;
deflist:=TFPObjectList.Create(false); deflist:=TFPObjectList.Create(false);
symlist:=TFPObjectList.Create(false); symlist:=TFPObjectList.Create(false);
ansistrdef:=nil;
wpoinfo:=nil; wpoinfo:=nil;
checkforwarddefs:=TFPObjectList.Create(false); checkforwarddefs:=TFPObjectList.Create(false);
extendeddefs := TFPHashObjectList.Create(true); extendeddefs := TFPHashObjectList.Create(true);
@ -634,6 +636,7 @@ implementation
derefdata.free; derefdata.free;
deflist.free; deflist.free;
symlist.free; symlist.free;
ansistrdef:=nil;
wpoinfo.free; wpoinfo.free;
checkforwarddefs.free; checkforwarddefs.free;
globalsymtable.free; globalsymtable.free;

View File

@ -1665,8 +1665,8 @@ implementation
inserttypeconv(left,rd) inserttypeconv(left,rd)
else else
begin begin
inserttypeconv(left,cansistringtype); inserttypeconv(left,getansistringdef);
inserttypeconv(right,cansistringtype); inserttypeconv(right,getansistringdef);
end; end;
end; end;
st_longstring : st_longstring :

View File

@ -258,7 +258,6 @@ implementation
href: treference; href: treference;
pool: THashSet; pool: THashSet;
entry: PHashSetItem; entry: PHashSetItem;
cp: tstringencoding;
const const
PoolMap: array[tconststringtype] of TConstPoolType = ( PoolMap: array[tconststringtype] of TConstPoolType = (
@ -286,16 +285,7 @@ implementation
entry := pool.FindOrAdd(pcompilerwidestring(value_str)^.data,len*cwidechartype.size) entry := pool.FindOrAdd(pcompilerwidestring(value_str)^.data,len*cwidechartype.size)
else else
if cst_type = cst_ansistring then if cst_type = cst_ansistring then
begin entry := PHashSetItem(TTagHashSet(pool).FindOrAdd(value_str,len,tstringdef(resultdef).encoding))
cp:=tstringdef(resultdef).encoding;
{ force output of RawByteString constants in CP_ACP codepage }
if cp=CP_NONE then
cp:=0;
{ for delphiuncode mode output CP_ACP constants in the compiler codepage }
if (cp=0) and (cs_explicit_codepage in current_settings.moduleswitches) then
cp:=current_settings.sourcecodepage;
entry := PHashSetItem(TTagHashSet(pool).FindOrAdd(value_str,len,cp))
end
else else
entry := pool.FindOrAdd(value_str,len); entry := pool.FindOrAdd(value_str,len);
@ -310,7 +300,7 @@ implementation
if len=0 then if len=0 then
InternalError(2008032301) { empty string should be handled above } InternalError(2008032301) { empty string should be handled above }
else else
lastlabel:=emit_ansistring_const(current_asmdata.AsmLists[al_typedconsts],value_str,len,cp); lastlabel:=emit_ansistring_const(current_asmdata.AsmLists[al_typedconsts],value_str,len,tstringdef(resultdef).encoding);
end; end;
cst_unicodestring, cst_unicodestring,
cst_widestring: cst_widestring:

View File

@ -269,7 +269,12 @@ implementation
remain too so that not too many/few bits are laoded } remain too so that not too many/few bits are laoded }
if equal_defs(p.resultdef,def) and if equal_defs(p.resultdef,def) and
not is_bitpacked_access(p) then not is_bitpacked_access(p) then
p.resultdef:=def begin
{ don't replace encoded string constants to rawbytestring encoding.
preserve the codepage }
if not (is_rawbytestring(def) and (p.nodetype=stringconstn)) then
p.resultdef:=def
end
else else
begin begin
case convtype of case convtype of
@ -598,7 +603,7 @@ implementation
(p.nodetype=stringconstn) and (p.nodetype=stringconstn) and
{ don't cast to AnsiString if already casted to Wide/UnicodeString, issue #18266 } { don't cast to AnsiString if already casted to Wide/UnicodeString, issue #18266 }
(tstringconstnode(p).cst_type in [cst_conststring,cst_shortstring,cst_longstring]) then (tstringconstnode(p).cst_type in [cst_conststring,cst_shortstring,cst_longstring]) then
p:=ctypeconvnode.create_internal(p,cansistringtype) p:=ctypeconvnode.create_internal(p,getansistringdef)
else else
case p.resultdef.typ of case p.resultdef.typ of
enumdef : enumdef :
@ -994,7 +999,7 @@ implementation
else else
begin begin
if tstringconstnode(left).len>255 then if tstringconstnode(left).len>255 then
inserttypeconv(left,cansistringtype) inserttypeconv(left,getansistringdef)
else else
inserttypeconv(left,cshortstringtype); inserttypeconv(left,cshortstringtype);
end; end;
@ -1381,7 +1386,7 @@ implementation
(is_widestring(left.resultdef) or (is_widestring(left.resultdef) or
is_unicodestring(left.resultdef)) then is_unicodestring(left.resultdef)) then
begin begin
inserttypeconv(left,cansistringtype); inserttypeconv(left,getansistringdef);
{ the second pass of second_cstring_to_pchar expects a } { the second pass of second_cstring_to_pchar expects a }
{ strinconstn, but this may become a call to the } { strinconstn, but this may become a call to the }
{ widestring manager in case left contains "high ascii" } { widestring manager in case left contains "high ascii" }
@ -2286,8 +2291,13 @@ implementation
) )
) then ) then
begin begin
tstringconstnode(left).changestringtype(resultdef); { convert ansistring and rawbytestring constants to explicit source encoding if set }
if is_ansistring(resultdef) and ((tstringdef(resultdef).encoding=0)or(tstringdef(resultdef).encoding=globals.CP_NONE)) then
tstringconstnode(left).changestringtype(getansistringdef)
else
tstringconstnode(left).changestringtype(resultdef);
result:=left; result:=left;
resultdef:=left.resultdef;
left:=nil; left:=nil;
exit; exit;
end; end;

View File

@ -926,7 +926,7 @@ implementation
cst_shortstring : cst_shortstring :
resultdef:=cshortstringtype; resultdef:=cshortstringtype;
cst_ansistring : cst_ansistring :
resultdef:=cansistringtype; resultdef:=getansistringdef;
cst_unicodestring : cst_unicodestring :
resultdef:=cunicodestringtype; resultdef:=cunicodestringtype;
cst_widestring : cst_widestring :

View File

@ -378,7 +378,7 @@ implementation
if (tstringconstnode(n).len<=255) then if (tstringconstnode(n).len<=255) then
inserttypeconv(n,cshortstringtype) inserttypeconv(n,cshortstringtype)
else else
inserttypeconv(n,cansistringtype) inserttypeconv(n,getansistringdef)
else if is_widechararray(n.resultdef) then else if is_widechararray(n.resultdef) then
inserttypeconv(n,cwidestringtype); inserttypeconv(n,cwidestringtype);
end; end;
@ -967,7 +967,7 @@ implementation
{ (if you want to optimize to use shortstring, keep in mind that } { (if you want to optimize to use shortstring, keep in mind that }
{ readstr internally always uses ansistring, and to account for } { readstr internally always uses ansistring, and to account for }
{ chararrays with > 255 characters) } { chararrays with > 255 characters) }
inserttypeconv(filepara.left,cansistringtype); inserttypeconv(filepara.left,getansistringdef);
filepara.resultdef:=filepara.left.resultdef; filepara.resultdef:=filepara.left.resultdef;
if codegenerror then if codegenerror then
exit; exit;
@ -2270,7 +2270,7 @@ implementation
case left.resultdef.typ of case left.resultdef.typ of
variantdef: variantdef:
begin begin
inserttypeconv(left,cansistringtype); inserttypeconv(left,getansistringdef);
end; end;
stringdef : stringdef :

View File

@ -259,7 +259,7 @@ implementation
constsym: constsym:
begin begin
if tconstsym(symtableentry).consttyp=constresourcestring then if tconstsym(symtableentry).consttyp=constresourcestring then
resultdef:=cansistringtype resultdef:=getansistringdef
else else
internalerror(22799); internalerror(22799);
end; end;

View File

@ -782,7 +782,7 @@ implementation
(tstringconstnode(left).cst_type=cst_conststring) then (tstringconstnode(left).cst_type=cst_conststring) then
begin begin
if tstringconstnode(left).len>255 then if tstringconstnode(left).len>255 then
inserttypeconv(left,cansistringtype) inserttypeconv(left,getansistringdef)
else else
inserttypeconv(left,cshortstringtype); inserttypeconv(left,cshortstringtype);
end; end;

View File

@ -133,7 +133,7 @@ implementation
else else
begin begin
if cs_ansistrings in current_settings.localswitches then if cs_ansistrings in current_settings.localswitches then
def:=cansistringtype def:=getansistringdef
else else
def:=cshortstringtype; def:=cshortstringtype;
end; end;
@ -1608,7 +1608,7 @@ implementation
begin begin
p1:=cloadnode.create(srsym,srsymtable); p1:=cloadnode.create(srsym,srsymtable);
do_typecheckpass(p1); do_typecheckpass(p1);
p1.resultdef:=cansistringtype; p1.resultdef:=getansistringdef;
end end
else else
p1:=genconstsymtree(tconstsym(srsym)); p1:=genconstsymtree(tconstsym(srsym));

View File

@ -167,7 +167,7 @@ implementation
cshortstringtype:=tstringdef.createshort(255); cshortstringtype:=tstringdef.createshort(255);
{ should we give a length to the default long and ansi string definition ?? } { should we give a length to the default long and ansi string definition ?? }
clongstringtype:=tstringdef.createlong(-1); clongstringtype:=tstringdef.createlong(-1);
cansistringtype:=tstringdef.createansi; cansistringtype:=tstringdef.createansi(0);
if target_info.system in systems_windows then if target_info.system in systems_windows then
cwidestringtype:=tstringdef.createwide cwidestringtype:=tstringdef.createwide
else else

View File

@ -896,7 +896,7 @@ implementation
1: 1:
begin begin
if (tstringconstnode(n).cst_type in [cst_unicodestring,cst_widestring]) then if (tstringconstnode(n).cst_type in [cst_unicodestring,cst_widestring]) then
inserttypeconv(n,cansistringtype); inserttypeconv(n,getansistringdef);
if n.nodetype<>stringconstn then if n.nodetype<>stringconstn then
internalerror(2010033003); internalerror(2010033003);
ca:=pointer(tstringconstnode(n).value_str); ca:=pointer(tstringconstnode(n).value_str);

View File

@ -354,6 +354,12 @@ implementation
init_settings.sourcecodepage:=DefaultSystemCodePage; init_settings.sourcecodepage:=DefaultSystemCodePage;
include(init_settings.moduleswitches,cs_explicit_codepage); include(init_settings.moduleswitches,cs_explicit_codepage);
end; end;
end
else
begin
exclude(current_settings.moduleswitches,cs_explicit_codepage);
if changeinit then
exclude(init_settings.moduleswitches,cs_explicit_codepage);
end; end;
end; end;

View File

@ -595,7 +595,7 @@ interface
constructor loadshort(ppufile:tcompilerppufile); constructor loadshort(ppufile:tcompilerppufile);
constructor createlong(l : asizeint); constructor createlong(l : asizeint);
constructor loadlong(ppufile:tcompilerppufile); constructor loadlong(ppufile:tcompilerppufile);
constructor createansi; constructor createansi(aencoding:tstringencoding);
constructor loadansi(ppufile:tcompilerppufile); constructor loadansi(ppufile:tcompilerppufile);
constructor createwide; constructor createwide;
constructor loadwide(ppufile:tcompilerppufile); constructor loadwide(ppufile:tcompilerppufile);
@ -826,6 +826,9 @@ interface
function use_vectorfpu(def : tdef) : boolean; function use_vectorfpu(def : tdef) : boolean;
function getansistringcodepage:tstringencoding; inline;
function getansistringdef:tstringdef; inline;
implementation implementation
uses uses
@ -848,6 +851,37 @@ implementation
Helpers Helpers
****************************************************************************} ****************************************************************************}
function getansistringcodepage:tstringencoding; inline;
begin
if cs_explicit_codepage in current_settings.moduleswitches then
result:=current_settings.sourcecodepage
else
result:=0;
end;
function getansistringdef:tstringdef; inline;
begin
{ if codepage is explicitly defined in this mudule we need to return
a replacement for ansistring def }
if cs_explicit_codepage in current_settings.moduleswitches then
begin
if not assigned(current_module) then
internalerror(2011101301);
{ codepage can be redeclared only once per unit so we don't need a list of
redefined ansistring but only one pointer }
if not assigned(current_module.ansistrdef) then
begin
{ if we did not create it yet we need to do this now }
symtablestack.push(current_module.localsymtable);
current_module.ansistrdef:=tstringdef.createansi(current_settings.sourcecodepage);
symtablestack.pop(current_module.localsymtable);
end;
result:=tstringdef(current_module.ansistrdef);
end
else
result:=tstringdef(cansistringtype);
end;
function make_mangledname(const typeprefix:string;st:TSymtable;const suffix:string):string; function make_mangledname(const typeprefix:string;st:TSymtable;const suffix:string):string;
var var
s,hs, s,hs,
@ -1448,11 +1482,11 @@ implementation
end; end;
constructor tstringdef.createansi; constructor tstringdef.createansi(aencoding:tstringencoding);
begin begin
inherited create(stringdef); inherited create(stringdef);
stringtype:=st_ansistring; stringtype:=st_ansistring;
encoding:=0; encoding:=aencoding;
len:=-1; len:=-1;
savesize:=sizeof(pint); savesize:=sizeof(pint);
end; end;

65
tests/test/tcpstr17.pp Normal file
View File

@ -0,0 +1,65 @@
// to have correct test result with delphi set codepage option to 65001
program tcpstr17;
{$ifdef FPC}
{$mode delphi}
{$codepage utf8}
{$endif}
{$apptype console}
type
TOEMStr = type AnsiString(866);
{$ifndef FPC}
TSystemCodePage = Word;
const
CP_UTF8 = 65001;
{$endif}
procedure TestCodeConvRaw(const s: rawbytestring; const CodePage: TSystemCodePage);
begin
WriteLn(StringCodePage(s), ' ',s);
if CodePage <> StringCodePage(s) then
halt(1);
end;
procedure TestCodeConvAnsi(const s: ansistring; const CodePage: TSystemCodePage);
begin
WriteLn(StringCodePage(s), ' ',s);
if CodePage <> StringCodePage(s) then
halt(2);
end;
procedure TestCodeConvUTF(const s: utf8string; const CodePage: TSystemCodePage);
begin
WriteLn(StringCodePage(s), ' ',s);
if CodePage <> StringCodePage(s) then
halt(3);
end;
var
u: unicodestring;
u8: utf8string;
s: ansistring;
oemstr: TOEMStr;
begin
u := #$0141#$00F3#$0064#$017A;
u8 := u;
TestCodeConvRaw(u8, CP_UTF8);
// if UTF8 codepage is set in options S will have UTF8 codepage
s := u8;
TestCodeConvRaw(s, CP_UTF8);
TestCodeConvAnsi(u8, CP_UTF8);
TestCodeConvAnsi(s, CP_UTF8);
// converts to 866
oemstr := u8;
TestCodeConvRaw(oemstr, 866);
TestCodeConvAnsi(oemstr, DefaultSystemCodePage);
s := 'test';
TestCodeConvRaw(s, CP_UTF8);
// converts to System codepage
s := oemstr;
TestCodeConvRaw(s, DefaultSystemCodePage);
TestCodeConvUTF(s, DefaultSystemCodePage);
// outputs in source codepage instead of OEM
TestCodeConvRaw('привет', CP_UTF8);
// outputs in OEM codepage
TestCodeConvRaw(TOEMStr('привет'), 866);
end.