compiler:

- add helper function getansistringcodepage which returns explicitly set codepage or 0 in other case
  - add helper function getansistringdef which return a def with explicitly set codepage or cansistringtype in other case
  - change tstoreddef.createnai constructor to allow set codepage in constructor
  - don't convert string constants to rawbytestring. if string constant already has a codepage - preserve it or convert to ansistring codepage (delphi compatible)
  - don't perform string conversion from ansistring to strings with explicitly set codepage (by directive or by compiler switch) and vice versa (delphi compatible)
  + test which covers most of the cases

git-svn-id: trunk@19510 -
This commit is contained in:
paul 2011-10-19 02:45:52 +00:00
parent 212b14a46b
commit c6ca9e5091
18 changed files with 161 additions and 36 deletions

1
.gitattributes vendored
View File

@ -9978,6 +9978,7 @@ tests/test/tcpstr13.pp svneol=native#text/pascal
tests/test/tcpstr14.pp svneol=native#text/pascal
tests/test/tcpstr15.pp svneol=native#text/pascal
tests/test/tcpstr16.pp svneol=native#text/pascal
tests/test/tcpstr17.pp svneol=native#text/pascal
tests/test/tcpstr2.pp svneol=native#text/plain
tests/test/tcpstr2a.pp svneol=native#text/plain
tests/test/tcpstr3.pp svneol=native#text/plain

View File

@ -150,7 +150,7 @@ uses
make_mangledname('RESSTR',current_module.localsymtable,'START'),AT_DATA,0));
{ Write unitname entry }
namelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],@current_module.localsymtable.name^[1],length(current_module.localsymtable.name^),tstringdef(cansistringtype).encoding,False);
namelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],@current_module.localsymtable.name^[1],length(current_module.localsymtable.name^),getansistringcodepage,False);
current_asmdata.asmlists[al_resourcestrings].concat(tai_const.create_sym(namelab));
current_asmdata.asmlists[al_resourcestrings].concat(tai_const.create_sym(nil));
current_asmdata.asmlists[al_resourcestrings].concat(tai_const.create_sym(nil));
@ -166,12 +166,12 @@ uses
new_section(current_asmdata.asmlists[al_const],sec_rodata,make_mangledname('RESSTR',current_module.localsymtable,'d_'+r.name),sizeof(pint));
{ Write default value }
if assigned(R.value) and (R.len<>0) then
valuelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],R.Value,R.Len,tstringdef(cansistringtype).encoding,False)
valuelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],R.Value,R.Len,getansistringcodepage,False)
else
valuelab:=nil;
{ Append the name as a ansistring. }
current_asmdata.asmlists[al_const].concat(cai_align.Create(const_align(sizeof(pint))));
namelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],@R.Name[1],length(R.name),tstringdef(cansistringtype).encoding,False);
namelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],@R.Name[1],length(R.name),getansistringcodepage,False);
{
Resourcestring index:

View File

@ -366,10 +366,15 @@ implementation
else if (tstringdef(def_from).stringtype=tstringdef(def_to).stringtype) and
(tstringdef(def_from).stringtype=st_ansistring) then
begin
{ don't convert ansistrings if any conditions is true:
1) same encoding
2) from explicit codepage ansistring to ansistring and vice versa
3) from any ansistring to rawbytestring }
if (tstringdef(def_from).encoding=tstringdef(def_to).encoding) or
((tstringdef(def_to).encoding=0) and (tstringdef(def_from).encoding=getansistringcodepage)) or
((tstringdef(def_to).encoding=getansistringcodepage) and (tstringdef(def_from).encoding=0)) or
(tstringdef(def_to).encoding=globals.CP_NONE) then
begin
//doconv := tc_string_2_string;
eq:=te_equal;
end
else

View File

@ -169,6 +169,9 @@ interface
{# Returns true if p is an ansi string type }
function is_ansistring(p : tdef) : boolean;
{# Returns true if p is an ansi string type with codepage 0 }
function is_rawbytestring(p : tdef) : boolean;
{# Returns true if p is a long string type }
function is_longstring(p : tdef) : boolean;
@ -617,6 +620,14 @@ implementation
(tstringdef(p).stringtype=st_ansistring);
end;
{ true if p is an ansi string def with codepage CP_NONE }
function is_rawbytestring(p : tdef) : boolean;
begin
is_rawbytestring:=(p.typ=stringdef) and
(tstringdef(p).stringtype=st_ansistring) and
(tstringdef(p).encoding=globals.CP_NONE);
end;
{ true if p is an long string def }
function is_longstring(p : tdef) : boolean;
begin

View File

@ -143,6 +143,7 @@ interface
checkforwarddefs,
deflist,
symlist : TFPObjectList;
ansistrdef : tobject; { an ansistring def redefined for the current module }
wpoinfo : tunitwpoinfobase; { whole program optimization-related information that is generated during the current run for this unit }
globalsymtable, { pointer to the global symtable of this unit }
localsymtable : TSymtable;{ pointer to the local symtable of this unit }
@ -523,6 +524,7 @@ implementation
derefdataintflen:=0;
deflist:=TFPObjectList.Create(false);
symlist:=TFPObjectList.Create(false);
ansistrdef:=nil;
wpoinfo:=nil;
checkforwarddefs:=TFPObjectList.Create(false);
extendeddefs := TFPHashObjectList.Create(true);
@ -634,6 +636,7 @@ implementation
derefdata.free;
deflist.free;
symlist.free;
ansistrdef:=nil;
wpoinfo.free;
checkforwarddefs.free;
globalsymtable.free;

View File

@ -1665,8 +1665,8 @@ implementation
inserttypeconv(left,rd)
else
begin
inserttypeconv(left,cansistringtype);
inserttypeconv(right,cansistringtype);
inserttypeconv(left,getansistringdef);
inserttypeconv(right,getansistringdef);
end;
end;
st_longstring :

View File

@ -258,7 +258,6 @@ implementation
href: treference;
pool: THashSet;
entry: PHashSetItem;
cp: tstringencoding;
const
PoolMap: array[tconststringtype] of TConstPoolType = (
@ -286,16 +285,7 @@ implementation
entry := pool.FindOrAdd(pcompilerwidestring(value_str)^.data,len*cwidechartype.size)
else
if cst_type = cst_ansistring then
begin
cp:=tstringdef(resultdef).encoding;
{ force output of RawByteString constants in CP_ACP codepage }
if cp=CP_NONE then
cp:=0;
{ for delphiuncode mode output CP_ACP constants in the compiler codepage }
if (cp=0) and (cs_explicit_codepage in current_settings.moduleswitches) then
cp:=current_settings.sourcecodepage;
entry := PHashSetItem(TTagHashSet(pool).FindOrAdd(value_str,len,cp))
end
entry := PHashSetItem(TTagHashSet(pool).FindOrAdd(value_str,len,tstringdef(resultdef).encoding))
else
entry := pool.FindOrAdd(value_str,len);
@ -310,7 +300,7 @@ implementation
if len=0 then
InternalError(2008032301) { empty string should be handled above }
else
lastlabel:=emit_ansistring_const(current_asmdata.AsmLists[al_typedconsts],value_str,len,cp);
lastlabel:=emit_ansistring_const(current_asmdata.AsmLists[al_typedconsts],value_str,len,tstringdef(resultdef).encoding);
end;
cst_unicodestring,
cst_widestring:

View File

@ -269,7 +269,12 @@ implementation
remain too so that not too many/few bits are laoded }
if equal_defs(p.resultdef,def) and
not is_bitpacked_access(p) then
p.resultdef:=def
begin
{ don't replace encoded string constants to rawbytestring encoding.
preserve the codepage }
if not (is_rawbytestring(def) and (p.nodetype=stringconstn)) then
p.resultdef:=def
end
else
begin
case convtype of
@ -598,7 +603,7 @@ implementation
(p.nodetype=stringconstn) and
{ don't cast to AnsiString if already casted to Wide/UnicodeString, issue #18266 }
(tstringconstnode(p).cst_type in [cst_conststring,cst_shortstring,cst_longstring]) then
p:=ctypeconvnode.create_internal(p,cansistringtype)
p:=ctypeconvnode.create_internal(p,getansistringdef)
else
case p.resultdef.typ of
enumdef :
@ -994,7 +999,7 @@ implementation
else
begin
if tstringconstnode(left).len>255 then
inserttypeconv(left,cansistringtype)
inserttypeconv(left,getansistringdef)
else
inserttypeconv(left,cshortstringtype);
end;
@ -1381,7 +1386,7 @@ implementation
(is_widestring(left.resultdef) or
is_unicodestring(left.resultdef)) then
begin
inserttypeconv(left,cansistringtype);
inserttypeconv(left,getansistringdef);
{ the second pass of second_cstring_to_pchar expects a }
{ strinconstn, but this may become a call to the }
{ widestring manager in case left contains "high ascii" }
@ -2286,8 +2291,13 @@ implementation
)
) then
begin
tstringconstnode(left).changestringtype(resultdef);
{ convert ansistring and rawbytestring constants to explicit source encoding if set }
if is_ansistring(resultdef) and ((tstringdef(resultdef).encoding=0)or(tstringdef(resultdef).encoding=globals.CP_NONE)) then
tstringconstnode(left).changestringtype(getansistringdef)
else
tstringconstnode(left).changestringtype(resultdef);
result:=left;
resultdef:=left.resultdef;
left:=nil;
exit;
end;

View File

@ -926,7 +926,7 @@ implementation
cst_shortstring :
resultdef:=cshortstringtype;
cst_ansistring :
resultdef:=cansistringtype;
resultdef:=getansistringdef;
cst_unicodestring :
resultdef:=cunicodestringtype;
cst_widestring :

View File

@ -378,7 +378,7 @@ implementation
if (tstringconstnode(n).len<=255) then
inserttypeconv(n,cshortstringtype)
else
inserttypeconv(n,cansistringtype)
inserttypeconv(n,getansistringdef)
else if is_widechararray(n.resultdef) then
inserttypeconv(n,cwidestringtype);
end;
@ -967,7 +967,7 @@ implementation
{ (if you want to optimize to use shortstring, keep in mind that }
{ readstr internally always uses ansistring, and to account for }
{ chararrays with > 255 characters) }
inserttypeconv(filepara.left,cansistringtype);
inserttypeconv(filepara.left,getansistringdef);
filepara.resultdef:=filepara.left.resultdef;
if codegenerror then
exit;
@ -2270,7 +2270,7 @@ implementation
case left.resultdef.typ of
variantdef:
begin
inserttypeconv(left,cansistringtype);
inserttypeconv(left,getansistringdef);
end;
stringdef :

View File

@ -259,7 +259,7 @@ implementation
constsym:
begin
if tconstsym(symtableentry).consttyp=constresourcestring then
resultdef:=cansistringtype
resultdef:=getansistringdef
else
internalerror(22799);
end;

View File

@ -782,7 +782,7 @@ implementation
(tstringconstnode(left).cst_type=cst_conststring) then
begin
if tstringconstnode(left).len>255 then
inserttypeconv(left,cansistringtype)
inserttypeconv(left,getansistringdef)
else
inserttypeconv(left,cshortstringtype);
end;

View File

@ -133,7 +133,7 @@ implementation
else
begin
if cs_ansistrings in current_settings.localswitches then
def:=cansistringtype
def:=getansistringdef
else
def:=cshortstringtype;
end;
@ -1608,7 +1608,7 @@ implementation
begin
p1:=cloadnode.create(srsym,srsymtable);
do_typecheckpass(p1);
p1.resultdef:=cansistringtype;
p1.resultdef:=getansistringdef;
end
else
p1:=genconstsymtree(tconstsym(srsym));

View File

@ -167,7 +167,7 @@ implementation
cshortstringtype:=tstringdef.createshort(255);
{ should we give a length to the default long and ansi string definition ?? }
clongstringtype:=tstringdef.createlong(-1);
cansistringtype:=tstringdef.createansi;
cansistringtype:=tstringdef.createansi(0);
if target_info.system in systems_windows then
cwidestringtype:=tstringdef.createwide
else

View File

@ -896,7 +896,7 @@ implementation
1:
begin
if (tstringconstnode(n).cst_type in [cst_unicodestring,cst_widestring]) then
inserttypeconv(n,cansistringtype);
inserttypeconv(n,getansistringdef);
if n.nodetype<>stringconstn then
internalerror(2010033003);
ca:=pointer(tstringconstnode(n).value_str);

View File

@ -354,6 +354,12 @@ implementation
init_settings.sourcecodepage:=DefaultSystemCodePage;
include(init_settings.moduleswitches,cs_explicit_codepage);
end;
end
else
begin
exclude(current_settings.moduleswitches,cs_explicit_codepage);
if changeinit then
exclude(init_settings.moduleswitches,cs_explicit_codepage);
end;
end;

View File

@ -595,7 +595,7 @@ interface
constructor loadshort(ppufile:tcompilerppufile);
constructor createlong(l : asizeint);
constructor loadlong(ppufile:tcompilerppufile);
constructor createansi;
constructor createansi(aencoding:tstringencoding);
constructor loadansi(ppufile:tcompilerppufile);
constructor createwide;
constructor loadwide(ppufile:tcompilerppufile);
@ -826,6 +826,9 @@ interface
function use_vectorfpu(def : tdef) : boolean;
function getansistringcodepage:tstringencoding; inline;
function getansistringdef:tstringdef; inline;
implementation
uses
@ -848,6 +851,37 @@ implementation
Helpers
****************************************************************************}
function getansistringcodepage:tstringencoding; inline;
begin
if cs_explicit_codepage in current_settings.moduleswitches then
result:=current_settings.sourcecodepage
else
result:=0;
end;
function getansistringdef:tstringdef; inline;
begin
{ if codepage is explicitly defined in this mudule we need to return
a replacement for ansistring def }
if cs_explicit_codepage in current_settings.moduleswitches then
begin
if not assigned(current_module) then
internalerror(2011101301);
{ codepage can be redeclared only once per unit so we don't need a list of
redefined ansistring but only one pointer }
if not assigned(current_module.ansistrdef) then
begin
{ if we did not create it yet we need to do this now }
symtablestack.push(current_module.localsymtable);
current_module.ansistrdef:=tstringdef.createansi(current_settings.sourcecodepage);
symtablestack.pop(current_module.localsymtable);
end;
result:=tstringdef(current_module.ansistrdef);
end
else
result:=tstringdef(cansistringtype);
end;
function make_mangledname(const typeprefix:string;st:TSymtable;const suffix:string):string;
var
s,hs,
@ -1448,11 +1482,11 @@ implementation
end;
constructor tstringdef.createansi;
constructor tstringdef.createansi(aencoding:tstringencoding);
begin
inherited create(stringdef);
stringtype:=st_ansistring;
encoding:=0;
encoding:=aencoding;
len:=-1;
savesize:=sizeof(pint);
end;

65
tests/test/tcpstr17.pp Normal file
View File

@ -0,0 +1,65 @@
// to have correct test result with delphi set codepage option to 65001
program tcpstr17;
{$ifdef FPC}
{$mode delphi}
{$codepage utf8}
{$endif}
{$apptype console}
type
TOEMStr = type AnsiString(866);
{$ifndef FPC}
TSystemCodePage = Word;
const
CP_UTF8 = 65001;
{$endif}
procedure TestCodeConvRaw(const s: rawbytestring; const CodePage: TSystemCodePage);
begin
WriteLn(StringCodePage(s), ' ',s);
if CodePage <> StringCodePage(s) then
halt(1);
end;
procedure TestCodeConvAnsi(const s: ansistring; const CodePage: TSystemCodePage);
begin
WriteLn(StringCodePage(s), ' ',s);
if CodePage <> StringCodePage(s) then
halt(2);
end;
procedure TestCodeConvUTF(const s: utf8string; const CodePage: TSystemCodePage);
begin
WriteLn(StringCodePage(s), ' ',s);
if CodePage <> StringCodePage(s) then
halt(3);
end;
var
u: unicodestring;
u8: utf8string;
s: ansistring;
oemstr: TOEMStr;
begin
u := #$0141#$00F3#$0064#$017A;
u8 := u;
TestCodeConvRaw(u8, CP_UTF8);
// if UTF8 codepage is set in options S will have UTF8 codepage
s := u8;
TestCodeConvRaw(s, CP_UTF8);
TestCodeConvAnsi(u8, CP_UTF8);
TestCodeConvAnsi(s, CP_UTF8);
// converts to 866
oemstr := u8;
TestCodeConvRaw(oemstr, 866);
TestCodeConvAnsi(oemstr, DefaultSystemCodePage);
s := 'test';
TestCodeConvRaw(s, CP_UTF8);
// converts to System codepage
s := oemstr;
TestCodeConvRaw(s, DefaultSystemCodePage);
TestCodeConvUTF(s, DefaultSystemCodePage);
// outputs in source codepage instead of OEM
TestCodeConvRaw('привет', CP_UTF8);
// outputs in OEM codepage
TestCodeConvRaw(TOEMStr('привет'), 866);
end.