From c6ca9e50917650be9649d94831fd146479d05ce6 Mon Sep 17 00:00:00 2001
From: paul <paul@idefix.freepascal.org>
Date: Wed, 19 Oct 2011 02:45:52 +0000
Subject: [PATCH] compiler:   - add helper function getansistringcodepage which
 returns explicitly set codepage or 0 in other case   - add helper function
 getansistringdef which return a def with explicitly set codepage or
 cansistringtype in other case   - change tstoreddef.createnai constructor to
 allow set codepage in constructor   - don't convert string constants to
 rawbytestring. if string constant already has a codepage - preserve it or
 convert to ansistring codepage (delphi compatible)   - don't perform string
 conversion from ansistring to strings with explicitly set codepage (by
 directive or by compiler switch) and vice versa (delphi compatible)   + test
 which covers most of the cases

git-svn-id: trunk@19510 -
---
 .gitattributes         |  1 +
 compiler/cresstr.pas   |  6 ++--
 compiler/defcmp.pas    |  7 ++++-
 compiler/defutil.pas   | 11 +++++++
 compiler/fmodule.pas   |  3 ++
 compiler/nadd.pas      |  4 +--
 compiler/ncgcon.pas    | 14 ++-------
 compiler/ncnv.pas      | 20 +++++++++----
 compiler/ncon.pas      |  2 +-
 compiler/ninl.pas      |  6 ++--
 compiler/nld.pas       |  2 +-
 compiler/nmem.pas      |  2 +-
 compiler/pexpr.pas     |  4 +--
 compiler/psystem.pas   |  2 +-
 compiler/ptconst.pas   |  2 +-
 compiler/scanner.pas   |  6 ++++
 compiler/symdef.pas    | 40 ++++++++++++++++++++++++--
 tests/test/tcpstr17.pp | 65 ++++++++++++++++++++++++++++++++++++++++++
 18 files changed, 161 insertions(+), 36 deletions(-)
 create mode 100644 tests/test/tcpstr17.pp

diff --git a/.gitattributes b/.gitattributes
index 7c93ed651c..ccc194732d 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -9978,6 +9978,7 @@ tests/test/tcpstr13.pp svneol=native#text/pascal
 tests/test/tcpstr14.pp svneol=native#text/pascal
 tests/test/tcpstr15.pp svneol=native#text/pascal
 tests/test/tcpstr16.pp svneol=native#text/pascal
+tests/test/tcpstr17.pp svneol=native#text/pascal
 tests/test/tcpstr2.pp svneol=native#text/plain
 tests/test/tcpstr2a.pp svneol=native#text/plain
 tests/test/tcpstr3.pp svneol=native#text/plain
diff --git a/compiler/cresstr.pas b/compiler/cresstr.pas
index b110030ec1..a4c4109265 100644
--- a/compiler/cresstr.pas
+++ b/compiler/cresstr.pas
@@ -150,7 +150,7 @@ uses
           make_mangledname('RESSTR',current_module.localsymtable,'START'),AT_DATA,0));
 
         { Write unitname entry }
-        namelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],@current_module.localsymtable.name^[1],length(current_module.localsymtable.name^),tstringdef(cansistringtype).encoding,False);
+        namelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],@current_module.localsymtable.name^[1],length(current_module.localsymtable.name^),getansistringcodepage,False);
         current_asmdata.asmlists[al_resourcestrings].concat(tai_const.create_sym(namelab));
         current_asmdata.asmlists[al_resourcestrings].concat(tai_const.create_sym(nil));
         current_asmdata.asmlists[al_resourcestrings].concat(tai_const.create_sym(nil));
@@ -166,12 +166,12 @@ uses
             new_section(current_asmdata.asmlists[al_const],sec_rodata,make_mangledname('RESSTR',current_module.localsymtable,'d_'+r.name),sizeof(pint));
             { Write default value }
             if assigned(R.value) and (R.len<>0) then
-              valuelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],R.Value,R.Len,tstringdef(cansistringtype).encoding,False)
+              valuelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],R.Value,R.Len,getansistringcodepage,False)
             else
               valuelab:=nil;
             { Append the name as a ansistring. }
             current_asmdata.asmlists[al_const].concat(cai_align.Create(const_align(sizeof(pint))));
-            namelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],@R.Name[1],length(R.name),tstringdef(cansistringtype).encoding,False);
+            namelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],@R.Name[1],length(R.name),getansistringcodepage,False);
 
             {
               Resourcestring index:
diff --git a/compiler/defcmp.pas b/compiler/defcmp.pas
index 19661d33d5..25c4ca4f11 100644
--- a/compiler/defcmp.pas
+++ b/compiler/defcmp.pas
@@ -366,10 +366,15 @@ implementation
                      else if (tstringdef(def_from).stringtype=tstringdef(def_to).stringtype) and
                              (tstringdef(def_from).stringtype=st_ansistring) then 
                       begin
+                        { don't convert ansistrings if any conditions is true:
+                          1) same encoding
+                          2) from explicit codepage ansistring to ansistring and vice versa
+                          3) from any ansistring to rawbytestring }
                         if (tstringdef(def_from).encoding=tstringdef(def_to).encoding) or
+                           ((tstringdef(def_to).encoding=0) and (tstringdef(def_from).encoding=getansistringcodepage)) or
+                           ((tstringdef(def_to).encoding=getansistringcodepage) and (tstringdef(def_from).encoding=0)) or
                            (tstringdef(def_to).encoding=globals.CP_NONE) then
                          begin
-                           //doconv := tc_string_2_string;
                            eq:=te_equal;
                          end
                         else
diff --git a/compiler/defutil.pas b/compiler/defutil.pas
index fa0b447624..5459ef9d6d 100644
--- a/compiler/defutil.pas
+++ b/compiler/defutil.pas
@@ -169,6 +169,9 @@ interface
     {# Returns true if p is an ansi string type }
     function is_ansistring(p : tdef) : boolean;
 
+    {# Returns true if p is an ansi string type with codepage 0 }
+    function is_rawbytestring(p : tdef) : boolean;
+
     {# Returns true if p is a long string type }
     function is_longstring(p : tdef) : boolean;
 
@@ -617,6 +620,14 @@ implementation
                         (tstringdef(p).stringtype=st_ansistring);
       end;
 
+    { true if p is an ansi string def with codepage CP_NONE }
+    function is_rawbytestring(p : tdef) : boolean;
+      begin
+        is_rawbytestring:=(p.typ=stringdef) and
+                       (tstringdef(p).stringtype=st_ansistring) and
+                       (tstringdef(p).encoding=globals.CP_NONE);
+      end;
+
     { true if p is an long string def }
     function is_longstring(p : tdef) : boolean;
       begin
diff --git a/compiler/fmodule.pas b/compiler/fmodule.pas
index 98257e0215..7134218995 100644
--- a/compiler/fmodule.pas
+++ b/compiler/fmodule.pas
@@ -143,6 +143,7 @@ interface
         checkforwarddefs,
         deflist,
         symlist       : TFPObjectList;
+        ansistrdef    : tobject; { an ansistring def redefined for the current module }
         wpoinfo       : tunitwpoinfobase; { whole program optimization-related information that is generated during the current run for this unit }
         globalsymtable,           { pointer to the global symtable of this unit }
         localsymtable : TSymtable;{ pointer to the local symtable of this unit }
@@ -523,6 +524,7 @@ implementation
         derefdataintflen:=0;
         deflist:=TFPObjectList.Create(false);
         symlist:=TFPObjectList.Create(false);
+        ansistrdef:=nil;
         wpoinfo:=nil;
         checkforwarddefs:=TFPObjectList.Create(false);
         extendeddefs := TFPHashObjectList.Create(true);
@@ -634,6 +636,7 @@ implementation
         derefdata.free;
         deflist.free;
         symlist.free;
+        ansistrdef:=nil;
         wpoinfo.free;
         checkforwarddefs.free;
         globalsymtable.free;
diff --git a/compiler/nadd.pas b/compiler/nadd.pas
index 57831d30a7..8768c3dda5 100644
--- a/compiler/nadd.pas
+++ b/compiler/nadd.pas
@@ -1665,8 +1665,8 @@ implementation
                         inserttypeconv(left,rd)
                       else
                         begin
-                          inserttypeconv(left,cansistringtype);
-                          inserttypeconv(right,cansistringtype);
+                          inserttypeconv(left,getansistringdef);
+                          inserttypeconv(right,getansistringdef);
                         end;
                     end;
                   st_longstring :
diff --git a/compiler/ncgcon.pas b/compiler/ncgcon.pas
index 5e57403331..b1057d220b 100644
--- a/compiler/ncgcon.pas
+++ b/compiler/ncgcon.pas
@@ -258,7 +258,6 @@ implementation
          href: treference;
          pool: THashSet;
          entry: PHashSetItem;
-         cp: tstringencoding;
 
       const
         PoolMap: array[tconststringtype] of TConstPoolType = (
@@ -286,16 +285,7 @@ implementation
                 entry := pool.FindOrAdd(pcompilerwidestring(value_str)^.data,len*cwidechartype.size)
               else
               if cst_type = cst_ansistring then
-                begin
-                  cp:=tstringdef(resultdef).encoding;
-                  { force output of RawByteString constants in CP_ACP codepage }
-                  if cp=CP_NONE then
-                    cp:=0;
-                  { for delphiuncode mode output CP_ACP constants in the compiler codepage }
-                  if (cp=0) and (cs_explicit_codepage in current_settings.moduleswitches) then
-                    cp:=current_settings.sourcecodepage;
-                  entry := PHashSetItem(TTagHashSet(pool).FindOrAdd(value_str,len,cp))
-                end
+                entry := PHashSetItem(TTagHashSet(pool).FindOrAdd(value_str,len,tstringdef(resultdef).encoding))
               else
                 entry := pool.FindOrAdd(value_str,len);
 
@@ -310,7 +300,7 @@ implementation
                            if len=0 then
                              InternalError(2008032301)   { empty string should be handled above }
                            else
-                             lastlabel:=emit_ansistring_const(current_asmdata.AsmLists[al_typedconsts],value_str,len,cp);
+                             lastlabel:=emit_ansistring_const(current_asmdata.AsmLists[al_typedconsts],value_str,len,tstringdef(resultdef).encoding);
                         end;
                       cst_unicodestring,
                       cst_widestring:
diff --git a/compiler/ncnv.pas b/compiler/ncnv.pas
index 45f584ecf3..cbb2a5c50a 100644
--- a/compiler/ncnv.pas
+++ b/compiler/ncnv.pas
@@ -269,7 +269,12 @@ implementation
           remain too so that not too many/few bits are laoded }
         if equal_defs(p.resultdef,def) and
            not is_bitpacked_access(p) then
-          p.resultdef:=def
+          begin
+            { don't replace encoded string constants to rawbytestring encoding.
+              preserve the codepage }
+            if not (is_rawbytestring(def) and (p.nodetype=stringconstn)) then
+              p.resultdef:=def
+          end
         else
          begin
            case convtype of
@@ -598,7 +603,7 @@ implementation
            (p.nodetype=stringconstn) and
            { don't cast to AnsiString if already casted to Wide/UnicodeString, issue #18266 }
            (tstringconstnode(p).cst_type in [cst_conststring,cst_shortstring,cst_longstring]) then
-          p:=ctypeconvnode.create_internal(p,cansistringtype)
+          p:=ctypeconvnode.create_internal(p,getansistringdef)
         else
           case p.resultdef.typ of
             enumdef :
@@ -994,7 +999,7 @@ implementation
              else
                begin
                  if tstringconstnode(left).len>255 then
-                   inserttypeconv(left,cansistringtype)
+                   inserttypeconv(left,getansistringdef)
                  else
                    inserttypeconv(left,cshortstringtype);
                end;
@@ -1381,7 +1386,7 @@ implementation
               (is_widestring(left.resultdef) or
                is_unicodestring(left.resultdef)) then
              begin
-               inserttypeconv(left,cansistringtype);
+               inserttypeconv(left,getansistringdef);
                { the second pass of second_cstring_to_pchar expects a  }
                { strinconstn, but this may become a call to the        }
                { widestring manager in case left contains "high ascii" }
@@ -2286,8 +2291,13 @@ implementation
                 )
               ) then
               begin
-                tstringconstnode(left).changestringtype(resultdef);
+                { convert ansistring and rawbytestring constants to explicit source encoding if set }
+                if is_ansistring(resultdef) and ((tstringdef(resultdef).encoding=0)or(tstringdef(resultdef).encoding=globals.CP_NONE)) then
+                  tstringconstnode(left).changestringtype(getansistringdef)
+                else
+                  tstringconstnode(left).changestringtype(resultdef);
                 result:=left;
+                resultdef:=left.resultdef;
                 left:=nil;
                 exit;
               end;
diff --git a/compiler/ncon.pas b/compiler/ncon.pas
index cfaaa33660..60141f3b61 100644
--- a/compiler/ncon.pas
+++ b/compiler/ncon.pas
@@ -926,7 +926,7 @@ implementation
           cst_shortstring :
             resultdef:=cshortstringtype;
           cst_ansistring :
-            resultdef:=cansistringtype;
+            resultdef:=getansistringdef;
           cst_unicodestring :
             resultdef:=cunicodestringtype;
           cst_widestring :
diff --git a/compiler/ninl.pas b/compiler/ninl.pas
index e394dbc6f8..e4cb5a19ea 100644
--- a/compiler/ninl.pas
+++ b/compiler/ninl.pas
@@ -378,7 +378,7 @@ implementation
             if (tstringconstnode(n).len<=255) then
               inserttypeconv(n,cshortstringtype)
             else
-              inserttypeconv(n,cansistringtype)
+              inserttypeconv(n,getansistringdef)
           else if is_widechararray(n.resultdef) then
             inserttypeconv(n,cwidestringtype);
       end;
@@ -967,7 +967,7 @@ implementation
                 { (if you want to optimize to use shortstring, keep in mind that    }
                 {  readstr internally always uses ansistring, and to account for    }
                 {  chararrays with > 255 characters)                                }
-                inserttypeconv(filepara.left,cansistringtype);
+                inserttypeconv(filepara.left,getansistringdef);
                 filepara.resultdef:=filepara.left.resultdef;
                 if codegenerror then
                   exit;
@@ -2270,7 +2270,7 @@ implementation
                   case left.resultdef.typ of
                     variantdef:
                       begin
-                        inserttypeconv(left,cansistringtype);
+                        inserttypeconv(left,getansistringdef);
                       end;
 
                     stringdef :
diff --git a/compiler/nld.pas b/compiler/nld.pas
index bfa30ce741..0290eb1959 100644
--- a/compiler/nld.pas
+++ b/compiler/nld.pas
@@ -259,7 +259,7 @@ implementation
            constsym:
              begin
                if tconstsym(symtableentry).consttyp=constresourcestring then
-                 resultdef:=cansistringtype
+                 resultdef:=getansistringdef
                else
                  internalerror(22799);
              end;
diff --git a/compiler/nmem.pas b/compiler/nmem.pas
index 45825ee74a..2fd10ac56b 100644
--- a/compiler/nmem.pas
+++ b/compiler/nmem.pas
@@ -782,7 +782,7 @@ implementation
             (tstringconstnode(left).cst_type=cst_conststring) then
            begin
              if tstringconstnode(left).len>255 then
-               inserttypeconv(left,cansistringtype)
+               inserttypeconv(left,getansistringdef)
              else
                inserttypeconv(left,cshortstringtype);
            end;
diff --git a/compiler/pexpr.pas b/compiler/pexpr.pas
index 6913edb5ba..2162a66eb0 100644
--- a/compiler/pexpr.pas
+++ b/compiler/pexpr.pas
@@ -133,7 +133,7 @@ implementation
          else
            begin
              if cs_ansistrings in current_settings.localswitches then
-               def:=cansistringtype
+               def:=getansistringdef
              else
                def:=cshortstringtype;
            end;
@@ -1608,7 +1608,7 @@ implementation
                       begin
                         p1:=cloadnode.create(srsym,srsymtable);
                         do_typecheckpass(p1);
-                        p1.resultdef:=cansistringtype;
+                        p1.resultdef:=getansistringdef;
                       end
                     else
                       p1:=genconstsymtree(tconstsym(srsym));
diff --git a/compiler/psystem.pas b/compiler/psystem.pas
index dcfe438047..393af3cb55 100644
--- a/compiler/psystem.pas
+++ b/compiler/psystem.pas
@@ -167,7 +167,7 @@ implementation
         cshortstringtype:=tstringdef.createshort(255);
         { should we give a length to the default long and ansi string definition ?? }
         clongstringtype:=tstringdef.createlong(-1);
-        cansistringtype:=tstringdef.createansi;
+        cansistringtype:=tstringdef.createansi(0);
         if target_info.system in systems_windows then
           cwidestringtype:=tstringdef.createwide
         else
diff --git a/compiler/ptconst.pas b/compiler/ptconst.pas
index b7316d8075..2d59dba903 100644
--- a/compiler/ptconst.pas
+++ b/compiler/ptconst.pas
@@ -896,7 +896,7 @@ implementation
                       1:
                         begin
                           if (tstringconstnode(n).cst_type in [cst_unicodestring,cst_widestring]) then
-                            inserttypeconv(n,cansistringtype);
+                            inserttypeconv(n,getansistringdef);
                           if n.nodetype<>stringconstn then
                             internalerror(2010033003);
                           ca:=pointer(tstringconstnode(n).value_str);
diff --git a/compiler/scanner.pas b/compiler/scanner.pas
index b6afcb3d41..0da0339988 100644
--- a/compiler/scanner.pas
+++ b/compiler/scanner.pas
@@ -354,6 +354,12 @@ implementation
               init_settings.sourcecodepage:=DefaultSystemCodePage;
               include(init_settings.moduleswitches,cs_explicit_codepage);
             end;
+          end
+        else
+          begin
+            exclude(current_settings.moduleswitches,cs_explicit_codepage);
+            if changeinit then
+              exclude(init_settings.moduleswitches,cs_explicit_codepage);
           end;
       end;
 
diff --git a/compiler/symdef.pas b/compiler/symdef.pas
index bb56514d71..87b5f5308b 100644
--- a/compiler/symdef.pas
+++ b/compiler/symdef.pas
@@ -595,7 +595,7 @@ interface
           constructor loadshort(ppufile:tcompilerppufile);
           constructor createlong(l : asizeint);
           constructor loadlong(ppufile:tcompilerppufile);
-          constructor createansi;
+          constructor createansi(aencoding:tstringencoding);
           constructor loadansi(ppufile:tcompilerppufile);
           constructor createwide;
           constructor loadwide(ppufile:tcompilerppufile);
@@ -826,6 +826,9 @@ interface
 
     function use_vectorfpu(def : tdef) : boolean;
 
+    function getansistringcodepage:tstringencoding; inline;
+    function getansistringdef:tstringdef; inline;
+
 implementation
 
     uses
@@ -848,6 +851,37 @@ implementation
                                   Helpers
 ****************************************************************************}
 
+    function getansistringcodepage:tstringencoding; inline;
+      begin
+        if cs_explicit_codepage in current_settings.moduleswitches then
+          result:=current_settings.sourcecodepage
+        else
+          result:=0;
+      end;
+
+    function getansistringdef:tstringdef; inline;
+      begin
+        { if codepage is explicitly defined in this mudule we need to return
+          a replacement for ansistring def }
+        if cs_explicit_codepage in current_settings.moduleswitches then
+          begin
+            if not assigned(current_module) then
+              internalerror(2011101301);
+            { codepage can be redeclared only once per unit so we don't need a list of
+              redefined ansistring but only one pointer }
+            if not assigned(current_module.ansistrdef) then
+              begin
+                { if we did not create it yet we need to do this now }
+                symtablestack.push(current_module.localsymtable);
+                current_module.ansistrdef:=tstringdef.createansi(current_settings.sourcecodepage);
+                symtablestack.pop(current_module.localsymtable);
+              end;
+            result:=tstringdef(current_module.ansistrdef);
+          end
+        else
+          result:=tstringdef(cansistringtype);
+      end;
+
     function make_mangledname(const typeprefix:string;st:TSymtable;const suffix:string):string;
       var
         s,hs,
@@ -1448,11 +1482,11 @@ implementation
       end;
 
 
-    constructor tstringdef.createansi;
+    constructor tstringdef.createansi(aencoding:tstringencoding);
       begin
          inherited create(stringdef);
          stringtype:=st_ansistring;
-         encoding:=0;
+         encoding:=aencoding;
          len:=-1;
          savesize:=sizeof(pint);
       end;
diff --git a/tests/test/tcpstr17.pp b/tests/test/tcpstr17.pp
new file mode 100644
index 0000000000..cfbce0b301
--- /dev/null
+++ b/tests/test/tcpstr17.pp
@@ -0,0 +1,65 @@
+// to have correct test result with delphi set codepage option to 65001
+program tcpstr17;
+{$ifdef FPC}
+  {$mode delphi}
+  {$codepage utf8}
+{$endif}
+{$apptype console}
+type
+  TOEMStr = type AnsiString(866);
+{$ifndef FPC}
+  TSystemCodePage = Word;
+const
+  CP_UTF8 = 65001;
+{$endif}
+
+procedure TestCodeConvRaw(const s: rawbytestring; const CodePage: TSystemCodePage);
+begin
+  WriteLn(StringCodePage(s), ' ',s);
+  if CodePage <> StringCodePage(s) then
+    halt(1);
+end;
+
+procedure TestCodeConvAnsi(const s: ansistring; const CodePage: TSystemCodePage);
+begin
+  WriteLn(StringCodePage(s), ' ',s);
+  if CodePage <> StringCodePage(s) then
+    halt(2);
+end;
+
+procedure TestCodeConvUTF(const s: utf8string; const CodePage: TSystemCodePage);
+begin
+  WriteLn(StringCodePage(s), ' ',s);
+  if CodePage <> StringCodePage(s) then
+    halt(3);
+end;
+
+var
+  u: unicodestring;
+  u8: utf8string;
+  s: ansistring;
+  oemstr: TOEMStr;
+begin
+  u := #$0141#$00F3#$0064#$017A;
+  u8 := u;
+  TestCodeConvRaw(u8, CP_UTF8);
+  // if UTF8 codepage is set in options S will have UTF8 codepage
+  s := u8;
+  TestCodeConvRaw(s, CP_UTF8);
+  TestCodeConvAnsi(u8, CP_UTF8);
+  TestCodeConvAnsi(s, CP_UTF8);
+  // converts to 866
+  oemstr := u8;
+  TestCodeConvRaw(oemstr, 866);
+  TestCodeConvAnsi(oemstr, DefaultSystemCodePage);
+  s := 'test';
+  TestCodeConvRaw(s, CP_UTF8);
+  // converts to System codepage
+  s := oemstr;
+  TestCodeConvRaw(s, DefaultSystemCodePage);
+  TestCodeConvUTF(s, DefaultSystemCodePage);
+  // outputs in source codepage instead of OEM
+  TestCodeConvRaw('привет', CP_UTF8);
+  // outputs in OEM codepage
+  TestCodeConvRaw(TOEMStr('привет'), 866);
+end.