mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-13 00:09:32 +02:00
compiler:
- add helper function getansistringcodepage which returns explicitly set codepage or 0 in other case - add helper function getansistringdef which return a def with explicitly set codepage or cansistringtype in other case - change tstoreddef.createnai constructor to allow set codepage in constructor - don't convert string constants to rawbytestring. if string constant already has a codepage - preserve it or convert to ansistring codepage (delphi compatible) - don't perform string conversion from ansistring to strings with explicitly set codepage (by directive or by compiler switch) and vice versa (delphi compatible) + test which covers most of the cases git-svn-id: trunk@19510 -
This commit is contained in:
parent
212b14a46b
commit
c6ca9e5091
1
.gitattributes
vendored
1
.gitattributes
vendored
@ -9978,6 +9978,7 @@ tests/test/tcpstr13.pp svneol=native#text/pascal
|
||||
tests/test/tcpstr14.pp svneol=native#text/pascal
|
||||
tests/test/tcpstr15.pp svneol=native#text/pascal
|
||||
tests/test/tcpstr16.pp svneol=native#text/pascal
|
||||
tests/test/tcpstr17.pp svneol=native#text/pascal
|
||||
tests/test/tcpstr2.pp svneol=native#text/plain
|
||||
tests/test/tcpstr2a.pp svneol=native#text/plain
|
||||
tests/test/tcpstr3.pp svneol=native#text/plain
|
||||
|
@ -150,7 +150,7 @@ uses
|
||||
make_mangledname('RESSTR',current_module.localsymtable,'START'),AT_DATA,0));
|
||||
|
||||
{ Write unitname entry }
|
||||
namelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],@current_module.localsymtable.name^[1],length(current_module.localsymtable.name^),tstringdef(cansistringtype).encoding,False);
|
||||
namelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],@current_module.localsymtable.name^[1],length(current_module.localsymtable.name^),getansistringcodepage,False);
|
||||
current_asmdata.asmlists[al_resourcestrings].concat(tai_const.create_sym(namelab));
|
||||
current_asmdata.asmlists[al_resourcestrings].concat(tai_const.create_sym(nil));
|
||||
current_asmdata.asmlists[al_resourcestrings].concat(tai_const.create_sym(nil));
|
||||
@ -166,12 +166,12 @@ uses
|
||||
new_section(current_asmdata.asmlists[al_const],sec_rodata,make_mangledname('RESSTR',current_module.localsymtable,'d_'+r.name),sizeof(pint));
|
||||
{ Write default value }
|
||||
if assigned(R.value) and (R.len<>0) then
|
||||
valuelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],R.Value,R.Len,tstringdef(cansistringtype).encoding,False)
|
||||
valuelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],R.Value,R.Len,getansistringcodepage,False)
|
||||
else
|
||||
valuelab:=nil;
|
||||
{ Append the name as a ansistring. }
|
||||
current_asmdata.asmlists[al_const].concat(cai_align.Create(const_align(sizeof(pint))));
|
||||
namelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],@R.Name[1],length(R.name),tstringdef(cansistringtype).encoding,False);
|
||||
namelab:=emit_ansistring_const(current_asmdata.asmlists[al_const],@R.Name[1],length(R.name),getansistringcodepage,False);
|
||||
|
||||
{
|
||||
Resourcestring index:
|
||||
|
@ -366,10 +366,15 @@ implementation
|
||||
else if (tstringdef(def_from).stringtype=tstringdef(def_to).stringtype) and
|
||||
(tstringdef(def_from).stringtype=st_ansistring) then
|
||||
begin
|
||||
{ don't convert ansistrings if any conditions is true:
|
||||
1) same encoding
|
||||
2) from explicit codepage ansistring to ansistring and vice versa
|
||||
3) from any ansistring to rawbytestring }
|
||||
if (tstringdef(def_from).encoding=tstringdef(def_to).encoding) or
|
||||
((tstringdef(def_to).encoding=0) and (tstringdef(def_from).encoding=getansistringcodepage)) or
|
||||
((tstringdef(def_to).encoding=getansistringcodepage) and (tstringdef(def_from).encoding=0)) or
|
||||
(tstringdef(def_to).encoding=globals.CP_NONE) then
|
||||
begin
|
||||
//doconv := tc_string_2_string;
|
||||
eq:=te_equal;
|
||||
end
|
||||
else
|
||||
|
@ -169,6 +169,9 @@ interface
|
||||
{# Returns true if p is an ansi string type }
|
||||
function is_ansistring(p : tdef) : boolean;
|
||||
|
||||
{# Returns true if p is an ansi string type with codepage 0 }
|
||||
function is_rawbytestring(p : tdef) : boolean;
|
||||
|
||||
{# Returns true if p is a long string type }
|
||||
function is_longstring(p : tdef) : boolean;
|
||||
|
||||
@ -617,6 +620,14 @@ implementation
|
||||
(tstringdef(p).stringtype=st_ansistring);
|
||||
end;
|
||||
|
||||
{ true if p is an ansi string def with codepage CP_NONE }
|
||||
function is_rawbytestring(p : tdef) : boolean;
|
||||
begin
|
||||
is_rawbytestring:=(p.typ=stringdef) and
|
||||
(tstringdef(p).stringtype=st_ansistring) and
|
||||
(tstringdef(p).encoding=globals.CP_NONE);
|
||||
end;
|
||||
|
||||
{ true if p is an long string def }
|
||||
function is_longstring(p : tdef) : boolean;
|
||||
begin
|
||||
|
@ -143,6 +143,7 @@ interface
|
||||
checkforwarddefs,
|
||||
deflist,
|
||||
symlist : TFPObjectList;
|
||||
ansistrdef : tobject; { an ansistring def redefined for the current module }
|
||||
wpoinfo : tunitwpoinfobase; { whole program optimization-related information that is generated during the current run for this unit }
|
||||
globalsymtable, { pointer to the global symtable of this unit }
|
||||
localsymtable : TSymtable;{ pointer to the local symtable of this unit }
|
||||
@ -523,6 +524,7 @@ implementation
|
||||
derefdataintflen:=0;
|
||||
deflist:=TFPObjectList.Create(false);
|
||||
symlist:=TFPObjectList.Create(false);
|
||||
ansistrdef:=nil;
|
||||
wpoinfo:=nil;
|
||||
checkforwarddefs:=TFPObjectList.Create(false);
|
||||
extendeddefs := TFPHashObjectList.Create(true);
|
||||
@ -634,6 +636,7 @@ implementation
|
||||
derefdata.free;
|
||||
deflist.free;
|
||||
symlist.free;
|
||||
ansistrdef:=nil;
|
||||
wpoinfo.free;
|
||||
checkforwarddefs.free;
|
||||
globalsymtable.free;
|
||||
|
@ -1665,8 +1665,8 @@ implementation
|
||||
inserttypeconv(left,rd)
|
||||
else
|
||||
begin
|
||||
inserttypeconv(left,cansistringtype);
|
||||
inserttypeconv(right,cansistringtype);
|
||||
inserttypeconv(left,getansistringdef);
|
||||
inserttypeconv(right,getansistringdef);
|
||||
end;
|
||||
end;
|
||||
st_longstring :
|
||||
|
@ -258,7 +258,6 @@ implementation
|
||||
href: treference;
|
||||
pool: THashSet;
|
||||
entry: PHashSetItem;
|
||||
cp: tstringencoding;
|
||||
|
||||
const
|
||||
PoolMap: array[tconststringtype] of TConstPoolType = (
|
||||
@ -286,16 +285,7 @@ implementation
|
||||
entry := pool.FindOrAdd(pcompilerwidestring(value_str)^.data,len*cwidechartype.size)
|
||||
else
|
||||
if cst_type = cst_ansistring then
|
||||
begin
|
||||
cp:=tstringdef(resultdef).encoding;
|
||||
{ force output of RawByteString constants in CP_ACP codepage }
|
||||
if cp=CP_NONE then
|
||||
cp:=0;
|
||||
{ for delphiuncode mode output CP_ACP constants in the compiler codepage }
|
||||
if (cp=0) and (cs_explicit_codepage in current_settings.moduleswitches) then
|
||||
cp:=current_settings.sourcecodepage;
|
||||
entry := PHashSetItem(TTagHashSet(pool).FindOrAdd(value_str,len,cp))
|
||||
end
|
||||
entry := PHashSetItem(TTagHashSet(pool).FindOrAdd(value_str,len,tstringdef(resultdef).encoding))
|
||||
else
|
||||
entry := pool.FindOrAdd(value_str,len);
|
||||
|
||||
@ -310,7 +300,7 @@ implementation
|
||||
if len=0 then
|
||||
InternalError(2008032301) { empty string should be handled above }
|
||||
else
|
||||
lastlabel:=emit_ansistring_const(current_asmdata.AsmLists[al_typedconsts],value_str,len,cp);
|
||||
lastlabel:=emit_ansistring_const(current_asmdata.AsmLists[al_typedconsts],value_str,len,tstringdef(resultdef).encoding);
|
||||
end;
|
||||
cst_unicodestring,
|
||||
cst_widestring:
|
||||
|
@ -269,7 +269,12 @@ implementation
|
||||
remain too so that not too many/few bits are laoded }
|
||||
if equal_defs(p.resultdef,def) and
|
||||
not is_bitpacked_access(p) then
|
||||
p.resultdef:=def
|
||||
begin
|
||||
{ don't replace encoded string constants to rawbytestring encoding.
|
||||
preserve the codepage }
|
||||
if not (is_rawbytestring(def) and (p.nodetype=stringconstn)) then
|
||||
p.resultdef:=def
|
||||
end
|
||||
else
|
||||
begin
|
||||
case convtype of
|
||||
@ -598,7 +603,7 @@ implementation
|
||||
(p.nodetype=stringconstn) and
|
||||
{ don't cast to AnsiString if already casted to Wide/UnicodeString, issue #18266 }
|
||||
(tstringconstnode(p).cst_type in [cst_conststring,cst_shortstring,cst_longstring]) then
|
||||
p:=ctypeconvnode.create_internal(p,cansistringtype)
|
||||
p:=ctypeconvnode.create_internal(p,getansistringdef)
|
||||
else
|
||||
case p.resultdef.typ of
|
||||
enumdef :
|
||||
@ -994,7 +999,7 @@ implementation
|
||||
else
|
||||
begin
|
||||
if tstringconstnode(left).len>255 then
|
||||
inserttypeconv(left,cansistringtype)
|
||||
inserttypeconv(left,getansistringdef)
|
||||
else
|
||||
inserttypeconv(left,cshortstringtype);
|
||||
end;
|
||||
@ -1381,7 +1386,7 @@ implementation
|
||||
(is_widestring(left.resultdef) or
|
||||
is_unicodestring(left.resultdef)) then
|
||||
begin
|
||||
inserttypeconv(left,cansistringtype);
|
||||
inserttypeconv(left,getansistringdef);
|
||||
{ the second pass of second_cstring_to_pchar expects a }
|
||||
{ strinconstn, but this may become a call to the }
|
||||
{ widestring manager in case left contains "high ascii" }
|
||||
@ -2286,8 +2291,13 @@ implementation
|
||||
)
|
||||
) then
|
||||
begin
|
||||
tstringconstnode(left).changestringtype(resultdef);
|
||||
{ convert ansistring and rawbytestring constants to explicit source encoding if set }
|
||||
if is_ansistring(resultdef) and ((tstringdef(resultdef).encoding=0)or(tstringdef(resultdef).encoding=globals.CP_NONE)) then
|
||||
tstringconstnode(left).changestringtype(getansistringdef)
|
||||
else
|
||||
tstringconstnode(left).changestringtype(resultdef);
|
||||
result:=left;
|
||||
resultdef:=left.resultdef;
|
||||
left:=nil;
|
||||
exit;
|
||||
end;
|
||||
|
@ -926,7 +926,7 @@ implementation
|
||||
cst_shortstring :
|
||||
resultdef:=cshortstringtype;
|
||||
cst_ansistring :
|
||||
resultdef:=cansistringtype;
|
||||
resultdef:=getansistringdef;
|
||||
cst_unicodestring :
|
||||
resultdef:=cunicodestringtype;
|
||||
cst_widestring :
|
||||
|
@ -378,7 +378,7 @@ implementation
|
||||
if (tstringconstnode(n).len<=255) then
|
||||
inserttypeconv(n,cshortstringtype)
|
||||
else
|
||||
inserttypeconv(n,cansistringtype)
|
||||
inserttypeconv(n,getansistringdef)
|
||||
else if is_widechararray(n.resultdef) then
|
||||
inserttypeconv(n,cwidestringtype);
|
||||
end;
|
||||
@ -967,7 +967,7 @@ implementation
|
||||
{ (if you want to optimize to use shortstring, keep in mind that }
|
||||
{ readstr internally always uses ansistring, and to account for }
|
||||
{ chararrays with > 255 characters) }
|
||||
inserttypeconv(filepara.left,cansistringtype);
|
||||
inserttypeconv(filepara.left,getansistringdef);
|
||||
filepara.resultdef:=filepara.left.resultdef;
|
||||
if codegenerror then
|
||||
exit;
|
||||
@ -2270,7 +2270,7 @@ implementation
|
||||
case left.resultdef.typ of
|
||||
variantdef:
|
||||
begin
|
||||
inserttypeconv(left,cansistringtype);
|
||||
inserttypeconv(left,getansistringdef);
|
||||
end;
|
||||
|
||||
stringdef :
|
||||
|
@ -259,7 +259,7 @@ implementation
|
||||
constsym:
|
||||
begin
|
||||
if tconstsym(symtableentry).consttyp=constresourcestring then
|
||||
resultdef:=cansistringtype
|
||||
resultdef:=getansistringdef
|
||||
else
|
||||
internalerror(22799);
|
||||
end;
|
||||
|
@ -782,7 +782,7 @@ implementation
|
||||
(tstringconstnode(left).cst_type=cst_conststring) then
|
||||
begin
|
||||
if tstringconstnode(left).len>255 then
|
||||
inserttypeconv(left,cansistringtype)
|
||||
inserttypeconv(left,getansistringdef)
|
||||
else
|
||||
inserttypeconv(left,cshortstringtype);
|
||||
end;
|
||||
|
@ -133,7 +133,7 @@ implementation
|
||||
else
|
||||
begin
|
||||
if cs_ansistrings in current_settings.localswitches then
|
||||
def:=cansistringtype
|
||||
def:=getansistringdef
|
||||
else
|
||||
def:=cshortstringtype;
|
||||
end;
|
||||
@ -1608,7 +1608,7 @@ implementation
|
||||
begin
|
||||
p1:=cloadnode.create(srsym,srsymtable);
|
||||
do_typecheckpass(p1);
|
||||
p1.resultdef:=cansistringtype;
|
||||
p1.resultdef:=getansistringdef;
|
||||
end
|
||||
else
|
||||
p1:=genconstsymtree(tconstsym(srsym));
|
||||
|
@ -167,7 +167,7 @@ implementation
|
||||
cshortstringtype:=tstringdef.createshort(255);
|
||||
{ should we give a length to the default long and ansi string definition ?? }
|
||||
clongstringtype:=tstringdef.createlong(-1);
|
||||
cansistringtype:=tstringdef.createansi;
|
||||
cansistringtype:=tstringdef.createansi(0);
|
||||
if target_info.system in systems_windows then
|
||||
cwidestringtype:=tstringdef.createwide
|
||||
else
|
||||
|
@ -896,7 +896,7 @@ implementation
|
||||
1:
|
||||
begin
|
||||
if (tstringconstnode(n).cst_type in [cst_unicodestring,cst_widestring]) then
|
||||
inserttypeconv(n,cansistringtype);
|
||||
inserttypeconv(n,getansistringdef);
|
||||
if n.nodetype<>stringconstn then
|
||||
internalerror(2010033003);
|
||||
ca:=pointer(tstringconstnode(n).value_str);
|
||||
|
@ -354,6 +354,12 @@ implementation
|
||||
init_settings.sourcecodepage:=DefaultSystemCodePage;
|
||||
include(init_settings.moduleswitches,cs_explicit_codepage);
|
||||
end;
|
||||
end
|
||||
else
|
||||
begin
|
||||
exclude(current_settings.moduleswitches,cs_explicit_codepage);
|
||||
if changeinit then
|
||||
exclude(init_settings.moduleswitches,cs_explicit_codepage);
|
||||
end;
|
||||
end;
|
||||
|
||||
|
@ -595,7 +595,7 @@ interface
|
||||
constructor loadshort(ppufile:tcompilerppufile);
|
||||
constructor createlong(l : asizeint);
|
||||
constructor loadlong(ppufile:tcompilerppufile);
|
||||
constructor createansi;
|
||||
constructor createansi(aencoding:tstringencoding);
|
||||
constructor loadansi(ppufile:tcompilerppufile);
|
||||
constructor createwide;
|
||||
constructor loadwide(ppufile:tcompilerppufile);
|
||||
@ -826,6 +826,9 @@ interface
|
||||
|
||||
function use_vectorfpu(def : tdef) : boolean;
|
||||
|
||||
function getansistringcodepage:tstringencoding; inline;
|
||||
function getansistringdef:tstringdef; inline;
|
||||
|
||||
implementation
|
||||
|
||||
uses
|
||||
@ -848,6 +851,37 @@ implementation
|
||||
Helpers
|
||||
****************************************************************************}
|
||||
|
||||
function getansistringcodepage:tstringencoding; inline;
|
||||
begin
|
||||
if cs_explicit_codepage in current_settings.moduleswitches then
|
||||
result:=current_settings.sourcecodepage
|
||||
else
|
||||
result:=0;
|
||||
end;
|
||||
|
||||
function getansistringdef:tstringdef; inline;
|
||||
begin
|
||||
{ if codepage is explicitly defined in this mudule we need to return
|
||||
a replacement for ansistring def }
|
||||
if cs_explicit_codepage in current_settings.moduleswitches then
|
||||
begin
|
||||
if not assigned(current_module) then
|
||||
internalerror(2011101301);
|
||||
{ codepage can be redeclared only once per unit so we don't need a list of
|
||||
redefined ansistring but only one pointer }
|
||||
if not assigned(current_module.ansistrdef) then
|
||||
begin
|
||||
{ if we did not create it yet we need to do this now }
|
||||
symtablestack.push(current_module.localsymtable);
|
||||
current_module.ansistrdef:=tstringdef.createansi(current_settings.sourcecodepage);
|
||||
symtablestack.pop(current_module.localsymtable);
|
||||
end;
|
||||
result:=tstringdef(current_module.ansistrdef);
|
||||
end
|
||||
else
|
||||
result:=tstringdef(cansistringtype);
|
||||
end;
|
||||
|
||||
function make_mangledname(const typeprefix:string;st:TSymtable;const suffix:string):string;
|
||||
var
|
||||
s,hs,
|
||||
@ -1448,11 +1482,11 @@ implementation
|
||||
end;
|
||||
|
||||
|
||||
constructor tstringdef.createansi;
|
||||
constructor tstringdef.createansi(aencoding:tstringencoding);
|
||||
begin
|
||||
inherited create(stringdef);
|
||||
stringtype:=st_ansistring;
|
||||
encoding:=0;
|
||||
encoding:=aencoding;
|
||||
len:=-1;
|
||||
savesize:=sizeof(pint);
|
||||
end;
|
||||
|
65
tests/test/tcpstr17.pp
Normal file
65
tests/test/tcpstr17.pp
Normal file
@ -0,0 +1,65 @@
|
||||
// to have correct test result with delphi set codepage option to 65001
|
||||
program tcpstr17;
|
||||
{$ifdef FPC}
|
||||
{$mode delphi}
|
||||
{$codepage utf8}
|
||||
{$endif}
|
||||
{$apptype console}
|
||||
type
|
||||
TOEMStr = type AnsiString(866);
|
||||
{$ifndef FPC}
|
||||
TSystemCodePage = Word;
|
||||
const
|
||||
CP_UTF8 = 65001;
|
||||
{$endif}
|
||||
|
||||
procedure TestCodeConvRaw(const s: rawbytestring; const CodePage: TSystemCodePage);
|
||||
begin
|
||||
WriteLn(StringCodePage(s), ' ',s);
|
||||
if CodePage <> StringCodePage(s) then
|
||||
halt(1);
|
||||
end;
|
||||
|
||||
procedure TestCodeConvAnsi(const s: ansistring; const CodePage: TSystemCodePage);
|
||||
begin
|
||||
WriteLn(StringCodePage(s), ' ',s);
|
||||
if CodePage <> StringCodePage(s) then
|
||||
halt(2);
|
||||
end;
|
||||
|
||||
procedure TestCodeConvUTF(const s: utf8string; const CodePage: TSystemCodePage);
|
||||
begin
|
||||
WriteLn(StringCodePage(s), ' ',s);
|
||||
if CodePage <> StringCodePage(s) then
|
||||
halt(3);
|
||||
end;
|
||||
|
||||
var
|
||||
u: unicodestring;
|
||||
u8: utf8string;
|
||||
s: ansistring;
|
||||
oemstr: TOEMStr;
|
||||
begin
|
||||
u := #$0141#$00F3#$0064#$017A;
|
||||
u8 := u;
|
||||
TestCodeConvRaw(u8, CP_UTF8);
|
||||
// if UTF8 codepage is set in options S will have UTF8 codepage
|
||||
s := u8;
|
||||
TestCodeConvRaw(s, CP_UTF8);
|
||||
TestCodeConvAnsi(u8, CP_UTF8);
|
||||
TestCodeConvAnsi(s, CP_UTF8);
|
||||
// converts to 866
|
||||
oemstr := u8;
|
||||
TestCodeConvRaw(oemstr, 866);
|
||||
TestCodeConvAnsi(oemstr, DefaultSystemCodePage);
|
||||
s := 'test';
|
||||
TestCodeConvRaw(s, CP_UTF8);
|
||||
// converts to System codepage
|
||||
s := oemstr;
|
||||
TestCodeConvRaw(s, DefaultSystemCodePage);
|
||||
TestCodeConvUTF(s, DefaultSystemCodePage);
|
||||
// outputs in source codepage instead of OEM
|
||||
TestCodeConvRaw('привет', CP_UTF8);
|
||||
// outputs in OEM codepage
|
||||
TestCodeConvRaw(TOEMStr('привет'), 866);
|
||||
end.
|
Loading…
Reference in New Issue
Block a user