From c6ca9e50917650be9649d94831fd146479d05ce6 Mon Sep 17 00:00:00 2001 From: paul <paul@idefix.freepascal.org> Date: Wed, 19 Oct 2011 02:45:52 +0000 Subject: [PATCH] compiler: - add helper function getansistringcodepage which returns explicitly set codepage or 0 in other case - add helper function getansistringdef which return a def with explicitly set codepage or cansistringtype in other case - change tstoreddef.createnai constructor to allow set codepage in constructor - don't convert string constants to rawbytestring. if string constant already has a codepage - preserve it or convert to ansistring codepage (delphi compatible) - don't perform string conversion from ansistring to strings with explicitly set codepage (by directive or by compiler switch) and vice versa (delphi compatible) + test which covers most of the cases git-svn-id: trunk@19510 - --- .gitattributes | 1 + compiler/cresstr.pas | 6 ++-- compiler/defcmp.pas | 7 ++++- compiler/defutil.pas | 11 +++++++ compiler/fmodule.pas | 3 ++ compiler/nadd.pas | 4 +-- compiler/ncgcon.pas | 14 ++------- compiler/ncnv.pas | 20 +++++++++---- compiler/ncon.pas | 2 +- compiler/ninl.pas | 6 ++-- compiler/nld.pas | 2 +- compiler/nmem.pas | 2 +- compiler/pexpr.pas | 4 +-- compiler/psystem.pas | 2 +- compiler/ptconst.pas | 2 +- compiler/scanner.pas | 6 ++++ compiler/symdef.pas | 40 ++++++++++++++++++++++++-- tests/test/tcpstr17.pp | 65 ++++++++++++++++++++++++++++++++++++++++++ 18 files changed, 161 insertions(+), 36 deletions(-) create mode 100644 tests/test/tcpstr17.pp diff --git a/.gitattributes b/.gitattributes index 7c93ed651c..ccc194732d 100644 --- a/.gitattributes +++ b/.gitattributes @@ -9978,6 +9978,7 @@ tests/test/tcpstr13.pp svneol=native#text/pascal tests/test/tcpstr14.pp svneol=native#text/pascal tests/test/tcpstr15.pp svneol=native#text/pascal tests/test/tcpstr16.pp svneol=native#text/pascal +tests/test/tcpstr17.pp svneol=native#text/pascal tests/test/tcpstr2.pp svneol=native#text/plain tests/test/tcpstr2a.pp svneol=native#text/plain tests/test/tcpstr3.pp svneol=native#text/plain diff --git a/compiler/cresstr.pas b/compiler/cresstr.pas index b110030ec1..a4c4109265 100644 --- a/compiler/cresstr.pas +++ b/compiler/cresstr.pas @@ -150,7 +150,7 @@ uses make_mangledname('RESSTR',current_module.localsymtable,'START'),AT_DATA,0)); { Write unitname entry } - namelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],@current_module.localsymtable.name^[1],length(current_module.localsymtable.name^),tstringdef(cansistringtype).encoding,False); + namelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],@current_module.localsymtable.name^[1],length(current_module.localsymtable.name^),getansistringcodepage,False); current_asmdata.asmlists[al_resourcestrings].concat(tai_const.create_sym(namelab)); current_asmdata.asmlists[al_resourcestrings].concat(tai_const.create_sym(nil)); current_asmdata.asmlists[al_resourcestrings].concat(tai_const.create_sym(nil)); @@ -166,12 +166,12 @@ uses new_section(current_asmdata.asmlists[al_const],sec_rodata,make_mangledname('RESSTR',current_module.localsymtable,'d_'+r.name),sizeof(pint)); { Write default value } if assigned(R.value) and (R.len<>0) then - valuelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],R.Value,R.Len,tstringdef(cansistringtype).encoding,False) + valuelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],R.Value,R.Len,getansistringcodepage,False) else valuelab:=nil; { Append the name as a ansistring. } current_asmdata.asmlists[al_const].concat(cai_align.Create(const_align(sizeof(pint)))); - namelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],@R.Name[1],length(R.name),tstringdef(cansistringtype).encoding,False); + namelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],@R.Name[1],length(R.name),getansistringcodepage,False); { Resourcestring index: diff --git a/compiler/defcmp.pas b/compiler/defcmp.pas index 19661d33d5..25c4ca4f11 100644 --- a/compiler/defcmp.pas +++ b/compiler/defcmp.pas @@ -366,10 +366,15 @@ implementation else if (tstringdef(def_from).stringtype=tstringdef(def_to).stringtype) and (tstringdef(def_from).stringtype=st_ansistring) then begin + { don't convert ansistrings if any conditions is true: + 1) same encoding + 2) from explicit codepage ansistring to ansistring and vice versa + 3) from any ansistring to rawbytestring } if (tstringdef(def_from).encoding=tstringdef(def_to).encoding) or + ((tstringdef(def_to).encoding=0) and (tstringdef(def_from).encoding=getansistringcodepage)) or + ((tstringdef(def_to).encoding=getansistringcodepage) and (tstringdef(def_from).encoding=0)) or (tstringdef(def_to).encoding=globals.CP_NONE) then begin - //doconv := tc_string_2_string; eq:=te_equal; end else diff --git a/compiler/defutil.pas b/compiler/defutil.pas index fa0b447624..5459ef9d6d 100644 --- a/compiler/defutil.pas +++ b/compiler/defutil.pas @@ -169,6 +169,9 @@ interface {# Returns true if p is an ansi string type } function is_ansistring(p : tdef) : boolean; + {# Returns true if p is an ansi string type with codepage 0 } + function is_rawbytestring(p : tdef) : boolean; + {# Returns true if p is a long string type } function is_longstring(p : tdef) : boolean; @@ -617,6 +620,14 @@ implementation (tstringdef(p).stringtype=st_ansistring); end; + { true if p is an ansi string def with codepage CP_NONE } + function is_rawbytestring(p : tdef) : boolean; + begin + is_rawbytestring:=(p.typ=stringdef) and + (tstringdef(p).stringtype=st_ansistring) and + (tstringdef(p).encoding=globals.CP_NONE); + end; + { true if p is an long string def } function is_longstring(p : tdef) : boolean; begin diff --git a/compiler/fmodule.pas b/compiler/fmodule.pas index 98257e0215..7134218995 100644 --- a/compiler/fmodule.pas +++ b/compiler/fmodule.pas @@ -143,6 +143,7 @@ interface checkforwarddefs, deflist, symlist : TFPObjectList; + ansistrdef : tobject; { an ansistring def redefined for the current module } wpoinfo : tunitwpoinfobase; { whole program optimization-related information that is generated during the current run for this unit } globalsymtable, { pointer to the global symtable of this unit } localsymtable : TSymtable;{ pointer to the local symtable of this unit } @@ -523,6 +524,7 @@ implementation derefdataintflen:=0; deflist:=TFPObjectList.Create(false); symlist:=TFPObjectList.Create(false); + ansistrdef:=nil; wpoinfo:=nil; checkforwarddefs:=TFPObjectList.Create(false); extendeddefs := TFPHashObjectList.Create(true); @@ -634,6 +636,7 @@ implementation derefdata.free; deflist.free; symlist.free; + ansistrdef:=nil; wpoinfo.free; checkforwarddefs.free; globalsymtable.free; diff --git a/compiler/nadd.pas b/compiler/nadd.pas index 57831d30a7..8768c3dda5 100644 --- a/compiler/nadd.pas +++ b/compiler/nadd.pas @@ -1665,8 +1665,8 @@ implementation inserttypeconv(left,rd) else begin - inserttypeconv(left,cansistringtype); - inserttypeconv(right,cansistringtype); + inserttypeconv(left,getansistringdef); + inserttypeconv(right,getansistringdef); end; end; st_longstring : diff --git a/compiler/ncgcon.pas b/compiler/ncgcon.pas index 5e57403331..b1057d220b 100644 --- a/compiler/ncgcon.pas +++ b/compiler/ncgcon.pas @@ -258,7 +258,6 @@ implementation href: treference; pool: THashSet; entry: PHashSetItem; - cp: tstringencoding; const PoolMap: array[tconststringtype] of TConstPoolType = ( @@ -286,16 +285,7 @@ implementation entry := pool.FindOrAdd(pcompilerwidestring(value_str)^.data,len*cwidechartype.size) else if cst_type = cst_ansistring then - begin - cp:=tstringdef(resultdef).encoding; - { force output of RawByteString constants in CP_ACP codepage } - if cp=CP_NONE then - cp:=0; - { for delphiuncode mode output CP_ACP constants in the compiler codepage } - if (cp=0) and (cs_explicit_codepage in current_settings.moduleswitches) then - cp:=current_settings.sourcecodepage; - entry := PHashSetItem(TTagHashSet(pool).FindOrAdd(value_str,len,cp)) - end + entry := PHashSetItem(TTagHashSet(pool).FindOrAdd(value_str,len,tstringdef(resultdef).encoding)) else entry := pool.FindOrAdd(value_str,len); @@ -310,7 +300,7 @@ implementation if len=0 then InternalError(2008032301) { empty string should be handled above } else - lastlabel:=emit_ansistring_const(current_asmdata.AsmLists[al_typedconsts],value_str,len,cp); + lastlabel:=emit_ansistring_const(current_asmdata.AsmLists[al_typedconsts],value_str,len,tstringdef(resultdef).encoding); end; cst_unicodestring, cst_widestring: diff --git a/compiler/ncnv.pas b/compiler/ncnv.pas index 45f584ecf3..cbb2a5c50a 100644 --- a/compiler/ncnv.pas +++ b/compiler/ncnv.pas @@ -269,7 +269,12 @@ implementation remain too so that not too many/few bits are laoded } if equal_defs(p.resultdef,def) and not is_bitpacked_access(p) then - p.resultdef:=def + begin + { don't replace encoded string constants to rawbytestring encoding. + preserve the codepage } + if not (is_rawbytestring(def) and (p.nodetype=stringconstn)) then + p.resultdef:=def + end else begin case convtype of @@ -598,7 +603,7 @@ implementation (p.nodetype=stringconstn) and { don't cast to AnsiString if already casted to Wide/UnicodeString, issue #18266 } (tstringconstnode(p).cst_type in [cst_conststring,cst_shortstring,cst_longstring]) then - p:=ctypeconvnode.create_internal(p,cansistringtype) + p:=ctypeconvnode.create_internal(p,getansistringdef) else case p.resultdef.typ of enumdef : @@ -994,7 +999,7 @@ implementation else begin if tstringconstnode(left).len>255 then - inserttypeconv(left,cansistringtype) + inserttypeconv(left,getansistringdef) else inserttypeconv(left,cshortstringtype); end; @@ -1381,7 +1386,7 @@ implementation (is_widestring(left.resultdef) or is_unicodestring(left.resultdef)) then begin - inserttypeconv(left,cansistringtype); + inserttypeconv(left,getansistringdef); { the second pass of second_cstring_to_pchar expects a } { strinconstn, but this may become a call to the } { widestring manager in case left contains "high ascii" } @@ -2286,8 +2291,13 @@ implementation ) ) then begin - tstringconstnode(left).changestringtype(resultdef); + { convert ansistring and rawbytestring constants to explicit source encoding if set } + if is_ansistring(resultdef) and ((tstringdef(resultdef).encoding=0)or(tstringdef(resultdef).encoding=globals.CP_NONE)) then + tstringconstnode(left).changestringtype(getansistringdef) + else + tstringconstnode(left).changestringtype(resultdef); result:=left; + resultdef:=left.resultdef; left:=nil; exit; end; diff --git a/compiler/ncon.pas b/compiler/ncon.pas index cfaaa33660..60141f3b61 100644 --- a/compiler/ncon.pas +++ b/compiler/ncon.pas @@ -926,7 +926,7 @@ implementation cst_shortstring : resultdef:=cshortstringtype; cst_ansistring : - resultdef:=cansistringtype; + resultdef:=getansistringdef; cst_unicodestring : resultdef:=cunicodestringtype; cst_widestring : diff --git a/compiler/ninl.pas b/compiler/ninl.pas index e394dbc6f8..e4cb5a19ea 100644 --- a/compiler/ninl.pas +++ b/compiler/ninl.pas @@ -378,7 +378,7 @@ implementation if (tstringconstnode(n).len<=255) then inserttypeconv(n,cshortstringtype) else - inserttypeconv(n,cansistringtype) + inserttypeconv(n,getansistringdef) else if is_widechararray(n.resultdef) then inserttypeconv(n,cwidestringtype); end; @@ -967,7 +967,7 @@ implementation { (if you want to optimize to use shortstring, keep in mind that } { readstr internally always uses ansistring, and to account for } { chararrays with > 255 characters) } - inserttypeconv(filepara.left,cansistringtype); + inserttypeconv(filepara.left,getansistringdef); filepara.resultdef:=filepara.left.resultdef; if codegenerror then exit; @@ -2270,7 +2270,7 @@ implementation case left.resultdef.typ of variantdef: begin - inserttypeconv(left,cansistringtype); + inserttypeconv(left,getansistringdef); end; stringdef : diff --git a/compiler/nld.pas b/compiler/nld.pas index bfa30ce741..0290eb1959 100644 --- a/compiler/nld.pas +++ b/compiler/nld.pas @@ -259,7 +259,7 @@ implementation constsym: begin if tconstsym(symtableentry).consttyp=constresourcestring then - resultdef:=cansistringtype + resultdef:=getansistringdef else internalerror(22799); end; diff --git a/compiler/nmem.pas b/compiler/nmem.pas index 45825ee74a..2fd10ac56b 100644 --- a/compiler/nmem.pas +++ b/compiler/nmem.pas @@ -782,7 +782,7 @@ implementation (tstringconstnode(left).cst_type=cst_conststring) then begin if tstringconstnode(left).len>255 then - inserttypeconv(left,cansistringtype) + inserttypeconv(left,getansistringdef) else inserttypeconv(left,cshortstringtype); end; diff --git a/compiler/pexpr.pas b/compiler/pexpr.pas index 6913edb5ba..2162a66eb0 100644 --- a/compiler/pexpr.pas +++ b/compiler/pexpr.pas @@ -133,7 +133,7 @@ implementation else begin if cs_ansistrings in current_settings.localswitches then - def:=cansistringtype + def:=getansistringdef else def:=cshortstringtype; end; @@ -1608,7 +1608,7 @@ implementation begin p1:=cloadnode.create(srsym,srsymtable); do_typecheckpass(p1); - p1.resultdef:=cansistringtype; + p1.resultdef:=getansistringdef; end else p1:=genconstsymtree(tconstsym(srsym)); diff --git a/compiler/psystem.pas b/compiler/psystem.pas index dcfe438047..393af3cb55 100644 --- a/compiler/psystem.pas +++ b/compiler/psystem.pas @@ -167,7 +167,7 @@ implementation cshortstringtype:=tstringdef.createshort(255); { should we give a length to the default long and ansi string definition ?? } clongstringtype:=tstringdef.createlong(-1); - cansistringtype:=tstringdef.createansi; + cansistringtype:=tstringdef.createansi(0); if target_info.system in systems_windows then cwidestringtype:=tstringdef.createwide else diff --git a/compiler/ptconst.pas b/compiler/ptconst.pas index b7316d8075..2d59dba903 100644 --- a/compiler/ptconst.pas +++ b/compiler/ptconst.pas @@ -896,7 +896,7 @@ implementation 1: begin if (tstringconstnode(n).cst_type in [cst_unicodestring,cst_widestring]) then - inserttypeconv(n,cansistringtype); + inserttypeconv(n,getansistringdef); if n.nodetype<>stringconstn then internalerror(2010033003); ca:=pointer(tstringconstnode(n).value_str); diff --git a/compiler/scanner.pas b/compiler/scanner.pas index b6afcb3d41..0da0339988 100644 --- a/compiler/scanner.pas +++ b/compiler/scanner.pas @@ -354,6 +354,12 @@ implementation init_settings.sourcecodepage:=DefaultSystemCodePage; include(init_settings.moduleswitches,cs_explicit_codepage); end; + end + else + begin + exclude(current_settings.moduleswitches,cs_explicit_codepage); + if changeinit then + exclude(init_settings.moduleswitches,cs_explicit_codepage); end; end; diff --git a/compiler/symdef.pas b/compiler/symdef.pas index bb56514d71..87b5f5308b 100644 --- a/compiler/symdef.pas +++ b/compiler/symdef.pas @@ -595,7 +595,7 @@ interface constructor loadshort(ppufile:tcompilerppufile); constructor createlong(l : asizeint); constructor loadlong(ppufile:tcompilerppufile); - constructor createansi; + constructor createansi(aencoding:tstringencoding); constructor loadansi(ppufile:tcompilerppufile); constructor createwide; constructor loadwide(ppufile:tcompilerppufile); @@ -826,6 +826,9 @@ interface function use_vectorfpu(def : tdef) : boolean; + function getansistringcodepage:tstringencoding; inline; + function getansistringdef:tstringdef; inline; + implementation uses @@ -848,6 +851,37 @@ implementation Helpers ****************************************************************************} + function getansistringcodepage:tstringencoding; inline; + begin + if cs_explicit_codepage in current_settings.moduleswitches then + result:=current_settings.sourcecodepage + else + result:=0; + end; + + function getansistringdef:tstringdef; inline; + begin + { if codepage is explicitly defined in this mudule we need to return + a replacement for ansistring def } + if cs_explicit_codepage in current_settings.moduleswitches then + begin + if not assigned(current_module) then + internalerror(2011101301); + { codepage can be redeclared only once per unit so we don't need a list of + redefined ansistring but only one pointer } + if not assigned(current_module.ansistrdef) then + begin + { if we did not create it yet we need to do this now } + symtablestack.push(current_module.localsymtable); + current_module.ansistrdef:=tstringdef.createansi(current_settings.sourcecodepage); + symtablestack.pop(current_module.localsymtable); + end; + result:=tstringdef(current_module.ansistrdef); + end + else + result:=tstringdef(cansistringtype); + end; + function make_mangledname(const typeprefix:string;st:TSymtable;const suffix:string):string; var s,hs, @@ -1448,11 +1482,11 @@ implementation end; - constructor tstringdef.createansi; + constructor tstringdef.createansi(aencoding:tstringencoding); begin inherited create(stringdef); stringtype:=st_ansistring; - encoding:=0; + encoding:=aencoding; len:=-1; savesize:=sizeof(pint); end; diff --git a/tests/test/tcpstr17.pp b/tests/test/tcpstr17.pp new file mode 100644 index 0000000000..cfbce0b301 --- /dev/null +++ b/tests/test/tcpstr17.pp @@ -0,0 +1,65 @@ +// to have correct test result with delphi set codepage option to 65001 +program tcpstr17; +{$ifdef FPC} + {$mode delphi} + {$codepage utf8} +{$endif} +{$apptype console} +type + TOEMStr = type AnsiString(866); +{$ifndef FPC} + TSystemCodePage = Word; +const + CP_UTF8 = 65001; +{$endif} + +procedure TestCodeConvRaw(const s: rawbytestring; const CodePage: TSystemCodePage); +begin + WriteLn(StringCodePage(s), ' ',s); + if CodePage <> StringCodePage(s) then + halt(1); +end; + +procedure TestCodeConvAnsi(const s: ansistring; const CodePage: TSystemCodePage); +begin + WriteLn(StringCodePage(s), ' ',s); + if CodePage <> StringCodePage(s) then + halt(2); +end; + +procedure TestCodeConvUTF(const s: utf8string; const CodePage: TSystemCodePage); +begin + WriteLn(StringCodePage(s), ' ',s); + if CodePage <> StringCodePage(s) then + halt(3); +end; + +var + u: unicodestring; + u8: utf8string; + s: ansistring; + oemstr: TOEMStr; +begin + u := #$0141#$00F3#$0064#$017A; + u8 := u; + TestCodeConvRaw(u8, CP_UTF8); + // if UTF8 codepage is set in options S will have UTF8 codepage + s := u8; + TestCodeConvRaw(s, CP_UTF8); + TestCodeConvAnsi(u8, CP_UTF8); + TestCodeConvAnsi(s, CP_UTF8); + // converts to 866 + oemstr := u8; + TestCodeConvRaw(oemstr, 866); + TestCodeConvAnsi(oemstr, DefaultSystemCodePage); + s := 'test'; + TestCodeConvRaw(s, CP_UTF8); + // converts to System codepage + s := oemstr; + TestCodeConvRaw(s, DefaultSystemCodePage); + TestCodeConvUTF(s, DefaultSystemCodePage); + // outputs in source codepage instead of OEM + TestCodeConvRaw('привет', CP_UTF8); + // outputs in OEM codepage + TestCodeConvRaw(TOEMStr('привет'), 866); +end.