From 50514538063c0d0c5925da6e5a0430ceaa08506e Mon Sep 17 00:00:00 2001 From: Jonas Maebe Date: Fri, 31 May 2013 12:05:14 +0000 Subject: [PATCH] + support for LOC_(C)MMREGISTER in hlcg o migrated location_force_mmregister_scalar from ncgutil to hlcgobj git-svn-id: trunk@24661 - --- compiler/arm/narmadd.pas | 16 +-- compiler/arm/narmcnv.pas | 4 +- compiler/arm/narminl.pas | 2 +- compiler/arm/narmmat.pas | 4 +- compiler/hlcg2ll.pas | 244 ++++++++++++++++++++++++++++++------- compiler/hlcgobj.pas | 257 ++++++++++++++++++++++++++------------- compiler/ncgadd.pas | 2 +- compiler/ncgcnv.pas | 7 +- compiler/ncgld.pas | 22 ++-- compiler/ncgutil.pas | 51 +------- compiler/x86/nx86add.pas | 8 +- compiler/x86/nx86inl.pas | 10 +- compiler/x86/nx86mat.pas | 2 +- 13 files changed, 416 insertions(+), 213 deletions(-) diff --git a/compiler/arm/narmadd.pas b/compiler/arm/narmadd.pas index aa21d89b0f..b5fcf54e0b 100644 --- a/compiler/arm/narmadd.pas +++ b/compiler/arm/narmadd.pas @@ -170,8 +170,8 @@ interface begin { force mmreg as location, left right doesn't matter as both will be in a fpureg } - location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true); - location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,true); + hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true); + hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true); location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef)); if left.location.loc<>LOC_CMMREGISTER then @@ -214,8 +214,8 @@ interface begin { force mmreg as location, left right doesn't matter as both will be in a fpureg } - location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true); - location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,true); + hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true); + hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true); location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef)); if left.location.loc<>LOC_CMMREGISTER then @@ -284,8 +284,8 @@ interface fpu_vfpv3, fpu_vfpv3_d16: begin - location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true); - location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,true); + hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true); + hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true); if (tfloatdef(left.resultdef).floattype=s32real) then if nodetype in [equaln,unequaln] then @@ -303,8 +303,8 @@ interface end; fpu_fpv4_s16: begin - location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true); - location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,true); + hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true); + hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true); if nodetype in [equaln,unequaln] then op:=A_VCMP diff --git a/compiler/arm/narmcnv.pas b/compiler/arm/narmcnv.pas index 3a1365c196..589c785bac 100644 --- a/compiler/arm/narmcnv.pas +++ b/compiler/arm/narmcnv.pas @@ -246,7 +246,7 @@ implementation begin location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef)); signed:=left.location.size=OS_S32; - location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false); + hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false); if (left.location.size<>OS_F32) then internalerror(2009112703); if left.location.size<>location.size then @@ -260,7 +260,7 @@ implementation begin location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef)); signed:=left.location.size=OS_S32; - location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false); + hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false); if (left.location.size<>OS_F32) then internalerror(2009112703); if left.location.size<>location.size then diff --git a/compiler/arm/narminl.pas b/compiler/arm/narminl.pas index 6fd2be2199..c7676c3d0b 100644 --- a/compiler/arm/narminl.pas +++ b/compiler/arm/narminl.pas @@ -88,7 +88,7 @@ implementation fpu_vfpv3_d16, fpu_fpv4_s16: begin - location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true); + hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true); location_copy(location,left.location); if left.location.loc=LOC_CMMREGISTER then begin diff --git a/compiler/arm/narmmat.pas b/compiler/arm/narmmat.pas index 9712da8ec3..25441c1fe1 100644 --- a/compiler/arm/narmmat.pas +++ b/compiler/arm/narmmat.pas @@ -390,7 +390,7 @@ implementation fpu_vfpv3, fpu_vfpv3_d16: begin - location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true); + hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true); location:=left.location; if (left.location.loc=LOC_CMMREGISTER) then location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size); @@ -403,7 +403,7 @@ implementation end; fpu_fpv4_s16: begin - location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true); + hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true); location:=left.location; if (left.location.loc=LOC_CMMREGISTER) then location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size); diff --git a/compiler/hlcg2ll.pas b/compiler/hlcg2ll.pas index eddcaa97be..297320b6ef 100644 --- a/compiler/hlcg2ll.pas +++ b/compiler/hlcg2ll.pas @@ -67,8 +67,8 @@ unit hlcg2ll; {# Gets a register suitable to do integer operations on.} function getaddressregister(list:TAsmList;size:tdef):Tregister;override; function getfpuregister(list:TAsmList;size:tdef):Tregister;override; -// we don't have high level defs yet that translate into all mm cgsizes -// function getmmregister(list:TAsmList;size:tdef):Tregister;override; + { warning: only works correctly for fpu types currently } + function getmmregister(list:TAsmList;size:tdef):Tregister;override; function getflagregister(list:TAsmList;size:tdef):Tregister;override; {Does the generic cg need SIMD registers, like getmmxregister? Or should the cpu specific child cg object have such a method?} @@ -188,14 +188,10 @@ unit hlcg2ll; procedure a_loadfpu_ref_cgpara(list : TAsmList;fromsize : tdef;const ref : treference;const cgpara : TCGPara);override; { vector register move instructions } -// we don't have high level defs yet that translate into all mm cgsizes -{ procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tdef;reg1, reg2: tregister;shuffle : pmmshuffle); override; procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tdef;const ref: treference; reg: tregister;shuffle : pmmshuffle); override; procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tdef;reg: tregister; const ref: treference;shuffle : pmmshuffle); override; -} - procedure a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tcgsize; const loc: tlocation; const reg: tregister;shuffle : pmmshuffle);override; -{ + procedure a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tdef; const loc: tlocation; const reg: tregister;shuffle : pmmshuffle);override; procedure a_loadmm_reg_loc(list: TAsmList; fromsize, tosize: tdef; const reg: tregister; const loc: tlocation;shuffle : pmmshuffle);override; procedure a_loadmm_reg_cgpara(list: TAsmList; fromsize: tdef; reg: tregister;const cgpara : TCGPara;shuffle : pmmshuffle); override; procedure a_loadmm_ref_cgpara(list: TAsmList; fromsize: tdef; const ref: treference;const cgpara : TCGPara;shuffle : pmmshuffle); override; @@ -204,10 +200,8 @@ unit hlcg2ll; procedure a_opmm_ref_reg(list: TAsmList; Op: TOpCG; size : tdef;const ref: treference; reg: tregister;shuffle : pmmshuffle); override; procedure a_opmm_loc_reg(list: TAsmList; Op: TOpCG; size : tdef;const loc: tlocation; reg: tregister;shuffle : pmmshuffle); override; procedure a_opmm_reg_ref(list: TAsmList; Op: TOpCG; size : tdef;reg: tregister;const ref: treference; shuffle : pmmshuffle); override; -} -// we don't have high level defs yet that translate into all mm cgsizes -// procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tdef; intreg, mmreg: tregister; shuffle: pmmshuffle); override; -// procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tdef; mmreg, intreg: tregister; shuffle : pmmshuffle); override; + procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tdef; intreg, mmreg: tregister; shuffle: pmmshuffle); override; + procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tdef; mmreg, intreg: tregister; shuffle : pmmshuffle); override; { basic arithmetic operations } { note: for operators which require only one argument (not, neg), use } @@ -322,7 +316,7 @@ unit hlcg2ll; procedure location_force_reg(list:TAsmList;var l:tlocation;src_size,dst_size:tdef;maybeconst:boolean);override; procedure location_force_fpureg(list:TAsmList;var l: tlocation;size: tdef;maybeconst:boolean);override; procedure location_force_mem(list:TAsmList;var l:tlocation;size:tdef);override; -// procedure location_force_mmregscalar(list:TAsmList;var l: tlocation;size:tdef;maybeconst:boolean);override; + procedure location_force_mmregscalar(list:TAsmList;var l: tlocation;size:tdef;maybeconst:boolean);override; // procedure location_force_mmreg(list:TAsmList;var l: tlocation;size:tdef;maybeconst:boolean);override; procedure maketojumpbool(list:TAsmList; p : tnode);override; @@ -338,6 +332,7 @@ unit hlcg2ll; protected procedure initialize_regvars(p: TObject; arg: pointer); override; + function getmmcgsize(reg: tregister; size: tcgsize): tcgsize; virtual; end; @@ -385,6 +380,12 @@ implementation begin result:=cg.getfpuregister(list,def_cgsize(size)); end; + + function thlcg2ll.getmmregister(list: TAsmList; size: tdef): Tregister; + begin + result:=cg.getmmregister(list,def_cgsize(size)); + end; + (* function thlcg2ll.getmmregister(list: TAsmList; size: tdef): Tregister; begin @@ -659,93 +660,178 @@ implementation cg.a_loadfpu_ref_cgpara(list,def_cgsize(fromsize),ref,cgpara); end; - procedure thlcg2ll.a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tcgsize; const loc: tlocation; const reg: tregister; shuffle: pmmshuffle); + procedure thlcg2ll.a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tdef; const loc: tlocation; const reg: tregister;shuffle : pmmshuffle); var tmpreg: tregister; + tocgsize: tcgsize; begin + { no vector support yet } + if shuffle<>mms_movescalar then + internalerror(2012071225); + { sanity check } + if def_cgsize(fromsize)<>loc.size then + internalerror(2012071226); + tocgsize:=getmmcgsize(reg,def_cgsize(tosize)); case loc.loc of LOC_SUBSETREG,LOC_CSUBSETREG, LOC_SUBSETREF,LOC_CSUBSETREF: begin tmpreg:=cg.getintregister(list,loc.size); - a_load_loc_reg(list,tcgsize2orddef(fromsize),tcgsize2orddef(fromsize),loc,tmpreg); - cg.a_loadmm_intreg_reg(list,loc.size,tosize,tmpreg,reg,shuffle); + a_load_loc_reg(list,fromsize,fromsize,loc,tmpreg); + cg.a_loadmm_intreg_reg(list,def_cgsize(fromsize),tocgsize,tmpreg,reg,shuffle); end else - cg.a_loadmm_loc_reg(list,tosize,loc,reg,shuffle); + cg.a_loadmm_loc_reg(list,tocgsize,loc,reg,shuffle); end; end; -(* procedure thlcg2ll.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister; shuffle: pmmshuffle); + var + fromcgsize: tcgsize; + tocgsize: tcgsize; begin - cg.a_loadmm_reg_reg(list,def_cgsize(fromsize),def_cgsize(tosize),reg1,reg2,shuffle); + { no vector support yet } + if shuffle<>mms_movescalar then + internalerror(2012062305); + fromcgsize:=getmmcgsize(reg1,def_cgsize(fromsize)); + tocgsize:=getmmcgsize(reg2,def_cgsize(tosize)); + { records may be stored in mmregisters, but def_cgsize will return an + integer size for them... } + cg.a_loadmm_reg_reg(list,fromcgsize,tocgsize,reg1,reg2,shuffle); end; procedure thlcg2ll.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tdef; const ref: treference; reg: tregister; shuffle: pmmshuffle); + var + tocgsize: tcgsize; begin - cg.a_loadmm_ref_reg(list,def_cgsize(fromsize),def_cgsize(tosize),ref,reg,shuffle); + { no vector support yet } + if shuffle<>mms_movescalar then + internalerror(2012062306); + { records may be stored in mmregisters, but def_cgsize will return an + integer size for them... } + tocgsize:=getmmcgsize(reg,def_cgsize(tosize)); + cg.a_loadmm_ref_reg(list,def_cgsize(fromsize),tocgsize,ref,reg,shuffle); end; procedure thlcg2ll.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tdef; reg: tregister; const ref: treference; shuffle: pmmshuffle); + var + fromcgsize: tcgsize; begin - cg.a_loadmm_reg_ref(list,def_cgsize(fromsize),def_cgsize(tosize),reg,ref,shuffle); - end; - - procedure thlcg2ll.a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tdef; const loc: tlocation; const reg: tregister; shuffle: pmmshuffle); - begin -{$ifdef extdebug} - if def_cgsize(fromsize)<>loc.size then - internalerror(2010112103); -{$endif} - cg.a_loadmm_loc_reg(list,def_cgsize(tosize),loc,reg,shuffle); + { no vector support yet } + if shuffle<>mms_movescalar then + internalerror(2012062307); + { records may be stored in mmregisters, but def_cgsize will return an + integer size for them... } + fromcgsize:=getmmcgsize(reg,def_cgsize(fromsize)); + cg.a_loadmm_reg_ref(list,fromcgsize,def_cgsize(tosize),reg,ref,shuffle); end; procedure thlcg2ll.a_loadmm_reg_loc(list: TAsmList; fromsize, tosize: tdef; const reg: tregister; const loc: tlocation; shuffle: pmmshuffle); + var + fromcgsize: tcgsize; begin -{$ifdef extdebug} + { no vector support yet } + if shuffle<>mms_movescalar then + internalerror(2012071215); + { sanity check } if def_cgsize(tosize)<>loc.size then - internalerror(2010112104); -{$endif} - cg.a_loadmm_reg_loc(list,def_cgsize(fromsize),reg,loc,shuffle); + internalerror(2012071216); + { records may be stored in mmregisters, but def_cgsize will return an + integer size for them... } + fromcgsize:=getmmcgsize(reg,def_cgsize(fromsize)); + cg.a_loadmm_reg_loc(list,fromcgsize,reg,loc,shuffle); end; procedure thlcg2ll.a_loadmm_reg_cgpara(list: TAsmList; fromsize: tdef; reg: tregister; const cgpara: TCGPara; shuffle: pmmshuffle); + var + fromcgsize: tcgsize; begin - cg.a_loadmm_reg_cgpara(list,def_cgsize(fromsize),reg,cgpara,shuffle); + { no vector support yet } + if shuffle<>mms_movescalar then + internalerror(2012071217); + { records may be stored in mmregisters, but def_cgsize will return an + integer size for them... } + fromcgsize:=getmmcgsize(reg,def_cgsize(fromsize)); + cg.a_loadmm_reg_cgpara(list,fromcgsize,reg,cgpara,shuffle); end; procedure thlcg2ll.a_loadmm_ref_cgpara(list: TAsmList; fromsize: tdef; const ref: treference; const cgpara: TCGPara; shuffle: pmmshuffle); begin + { no vector support yet } + if shuffle<>mms_movescalar then + internalerror(2012071218); cg.a_loadmm_ref_cgpara(list,def_cgsize(fromsize),ref,cgpara,shuffle); end; procedure thlcg2ll.a_loadmm_loc_cgpara(list: TAsmList; fromsize: tdef; const loc: tlocation; const cgpara: TCGPara; shuffle: pmmshuffle); begin -{$ifdef extdebug} + { no vector support yet } + if shuffle<>mms_movescalar then + internalerror(2012071219); + { sanity check } if def_cgsize(fromsize)<>loc.size then - internalerror(2010112105); -{$endif} + internalerror(2012071220); cg.a_loadmm_loc_cgpara(list,loc,cgpara,shuffle); end; + procedure thlcg2ll.a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tdef; src, dst: tregister; shuffle: pmmshuffle); + begin + { no vector support yet } + if shuffle<>mms_movescalar then + internalerror(2012071221); + cg.a_opmm_reg_reg(list,op,def_cgsize(size),src,dst,shuffle); + end; + + procedure thlcg2ll.a_opmm_ref_reg(list: TAsmList; Op: TOpCG; size: tdef; const ref: treference; reg: tregister; shuffle: pmmshuffle); + begin + { no vector support yet } + if shuffle<>mms_movescalar then + internalerror(2012071222); + cg.a_opmm_ref_reg(list,op,def_cgsize(size),ref,reg,shuffle); + end; + procedure thlcg2ll.a_opmm_loc_reg(list: TAsmList; Op: TOpCG; size: tdef; const loc: tlocation; reg: tregister; shuffle: pmmshuffle); begin + { no vector support yet } + if shuffle<>mms_movescalar then + internalerror(2012071223); cg.a_opmm_loc_reg(list,op,def_cgsize(size),loc,reg,shuffle); end; -*) -(* - procedure thlcg2ll.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tdef; intreg, mmreg: tregister; shuffle: pmmshuffle); + procedure thlcg2ll.a_opmm_reg_ref(list: TAsmList; Op: TOpCG; size: tdef; reg: tregister; const ref: treference; shuffle: pmmshuffle); begin - cg.a_loadmm_intreg_reg(list,def_cgsize(fromsize),def_cgsize(tosize),intreg,mmreg,shuffle); + { no vector support yet } + if shuffle<>mms_movescalar then + internalerror(2012071224); + cg.a_opmm_reg_ref(list,op,def_cgsize(size),reg,ref,shuffle); + end; + + procedure thlcg2ll.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tdef; intreg, mmreg: tregister; shuffle: pmmshuffle); + var + tocgsize: tcgsize; + begin + { no vector support yet } + if shuffle<>mms_movescalar then + internalerror(2012071227); + { records may be stored in mmregisters, but def_cgsize will return an + integer size for them... } + tocgsize:=getmmcgsize(mmreg,def_cgsize(tosize)); + cg.a_loadmm_intreg_reg(list,def_cgsize(fromsize),tocgsize,intreg,mmreg,shuffle); end; procedure thlcg2ll.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tdef; mmreg, intreg: tregister; shuffle: pmmshuffle); + var + fromcgsize: tcgsize; begin - cg.a_loadmm_reg_intreg(list,def_cgsize(fromsize),def_cgsize(tosize),mmreg,intreg,shuffle); + { no vector support yet } + if shuffle<>mms_movescalar then + internalerror(2012071228); + { records may be stored in mmregisters, but def_cgsize will return an + integer size for them... } + fromcgsize:=getmmcgsize(mmreg,def_cgsize(fromsize)); + cg.a_loadmm_reg_intreg(list,fromcgsize,def_cgsize(tosize),mmreg,intreg,shuffle); end; -*) + procedure thlcg2ll.a_op_const_reg(list: TAsmList; Op: TOpCG; size: tdef; a: tcgint; reg: TRegister); begin cg.a_op_const_reg(list,op,def_cgsize(size),a,reg); @@ -1222,6 +1308,61 @@ implementation inherited; end; end; + + procedure thlcg2ll.location_force_mmregscalar(list: TAsmList; var l: tlocation; size: tdef; maybeconst: boolean); + var + reg : tregister; + href : treference; + newsize : tdef; + begin + if (l.loc<>LOC_MMREGISTER) and + ((l.loc<>LOC_CMMREGISTER) or (not maybeconst)) then + begin + { if it's in an fpu register, store to memory first } + if (l.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then + begin + tg.GetTemp(list,tcgsize2size[l.size],tcgsize2size[l.size],tt_normal,href); + cg.a_loadfpu_reg_ref(list,l.size,l.size,l.register,href); + location_reset_ref(l,LOC_REFERENCE,l.size,0); + l.reference:=href; + end; +{$ifndef cpu64bitalu} + if (l.loc in [LOC_REGISTER,LOC_CREGISTER]) and + (l.size in [OS_64,OS_S64]) then + begin + reg:=cg.getmmregister(list,OS_F64); + cg64.a_loadmm_intreg64_reg(list,OS_F64,l.register64,reg); + l.size:=OS_F64; + size:=s64floattype; + end + else +{$endif not cpu64bitalu} + begin + { on ARM, CFP values may be located in integer registers, + and its second_int_to_real() also uses this routine to + force integer (memory) values in an mmregister } + if (l.size in [OS_32,OS_S32]) then + begin + size:=tcgsize2orddef(l.size); + newsize:=s32floattype; + end + else if (l.size in [OS_64,OS_S64]) then + begin + size:=tcgsize2orddef(l.size); + newsize:=s64floattype; + end + else + newsize:=size; + reg:=getmmregister(list,newsize); + a_loadmm_loc_reg(list,size,newsize,l,reg,mms_movescalar); + l.size:=def_cgsize(newsize); + end; + location_freetemp(list,l); + location_reset(l,LOC_MMREGISTER,l.size); + l.register:=reg; + end; + end; + (* procedure thlcg2ll.location_force_mmregscalar(list: TAsmList; var l: tlocation; size: tdef; maybeconst: boolean); begin @@ -1282,7 +1423,7 @@ implementation LOC_CMMREGISTER: begin tmploc:=l; - location_force_mmregscalar(list,tmploc,false); + location_force_mmregscalar(list,tmploc,size,false); cg.a_loadmm_reg_cgpara(list,tmploc.size,tmploc.register,cgpara,mms_movescalar); end; { Some targets pass floats in normal registers } @@ -1440,4 +1581,19 @@ implementation inherited initialize_regvars(p, arg); end; + function thlcg2ll.getmmcgsize(reg: tregister; size: tcgsize): tcgsize; + begin + result:=size; + if getregtype(reg)=R_MMREGISTER then + begin + case size of + OS_32: + result:=OS_F32; + OS_64: + result:=OS_F64; + end; + end; + end; + + end. diff --git a/compiler/hlcgobj.pas b/compiler/hlcgobj.pas index 1d2ce5f406..3d22782475 100644 --- a/compiler/hlcgobj.pas +++ b/compiler/hlcgobj.pas @@ -69,8 +69,8 @@ unit hlcgobj; {# Gets a register suitable to do integer operations on.} function getaddressregister(list:TAsmList;size:tdef):Tregister;virtual; function getfpuregister(list:TAsmList;size:tdef):Tregister;virtual; -// we don't have high level defs yet that translate into all mm cgsizes -// function getmmregister(list:TAsmList;size:tdef):Tregister;virtual; + { warning: only works correctly for fpu types currently } + function getmmregister(list:TAsmList;size:tdef):Tregister;virtual; function getflagregister(list:TAsmList;size:tdef):Tregister;virtual; function getregisterfordef(list: TAsmList;size:tdef):Tregister;virtual; {Does the generic cg need SIMD registers, like getmmxregister? Or should @@ -292,27 +292,26 @@ unit hlcgobj; procedure a_loadfpu_ref_cgpara(list : TAsmList;fromsize : tdef;const ref : treference;const cgpara : TCGPara);virtual; { vector register move instructions } -// we don't have high level defs yet that translate into all mm cgsizes -{ - procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tdef;reg1, reg2: tregister;shuffle : pmmshuffle); virtual; - procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tdef;const ref: treference; reg: tregister;shuffle : pmmshuffle); virtual; - procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tdef;reg: tregister; const ref: treference;shuffle : pmmshuffle); virtual; -} + procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tdef;reg1, reg2: tregister;shuffle : pmmshuffle); virtual; abstract; + procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tdef;const ref: treference; reg: tregister;shuffle : pmmshuffle); virtual; abstract; + procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tdef;reg: tregister; const ref: treference;shuffle : pmmshuffle); virtual; abstract; + procedure a_loadmm_ref_ref(list: TAsmList; fromsize, tosize: tdef; const fromref, toref: treference; shuffle: pmmshuffle); virtual; { required for subsetreg/ref; still tcgsize rather than tdef because of reason mentioned above } - procedure a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tcgsize; const loc: tlocation; const reg: tregister;shuffle : pmmshuffle);virtual; abstract; -{ + procedure a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tdef; const loc: tlocation; const reg: tregister; shuffle : pmmshuffle);virtual; procedure a_loadmm_reg_loc(list: TAsmList; fromsize, tosize: tdef; const reg: tregister; const loc: tlocation;shuffle : pmmshuffle);virtual; procedure a_loadmm_reg_cgpara(list: TAsmList; fromsize: tdef; reg: tregister;const cgpara : TCGPara;shuffle : pmmshuffle); virtual; procedure a_loadmm_ref_cgpara(list: TAsmList; fromsize: tdef; const ref: treference;const cgpara : TCGPara;shuffle : pmmshuffle); virtual; procedure a_loadmm_loc_cgpara(list: TAsmList; fromsize: tdef; const loc: tlocation; const cgpara : TCGPara;shuffle : pmmshuffle); virtual; - procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tdef;src,dst: tregister;shuffle : pmmshuffle); virtual; + procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tdef;src,dst: tregister;shuffle : pmmshuffle); virtual; abstract; procedure a_opmm_ref_reg(list: TAsmList; Op: TOpCG; size : tdef;const ref: treference; reg: tregister;shuffle : pmmshuffle); virtual; procedure a_opmm_loc_reg(list: TAsmList; Op: TOpCG; size : tdef;const loc: tlocation; reg: tregister;shuffle : pmmshuffle); virtual; procedure a_opmm_reg_ref(list: TAsmList; Op: TOpCG; size : tdef;reg: tregister;const ref: treference; shuffle : pmmshuffle); virtual; -} -// we don't have high level defs yet that translate into all mm cgsizes -// procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tdef; intreg, mmreg: tregister; shuffle: pmmshuffle); virtual; -// procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tdef; mmreg, intreg: tregister; shuffle : pmmshuffle); virtual; + { requires a temp that is interpreted in two different ways, and we + don't have a way (yet) to tag a treference with tdef information so + targets like LLVM can insert the necessary bitcast + } + procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tdef; intreg, mmreg: tregister; shuffle: pmmshuffle); virtual; abstract; + procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tdef; mmreg, intreg: tregister; shuffle : pmmshuffle); virtual; abstract; { basic arithmetic operations } { note: for operators which require only one argument (not, neg), use } @@ -473,7 +472,7 @@ unit hlcgobj; procedure location_force_reg(list:TAsmList;var l:tlocation;src_size,dst_size:tdef;maybeconst:boolean);virtual; procedure location_force_fpureg(list:TAsmList;var l: tlocation;size: tdef;maybeconst:boolean);virtual; procedure location_force_mem(list:TAsmList;var l:tlocation;size:tdef);virtual; -// procedure location_force_mmregscalar(list:TAsmList;var l: tlocation;size:tdef;maybeconst:boolean);virtual;abstract; + procedure location_force_mmregscalar(list:TAsmList;var l: tlocation;size:tdef;maybeconst:boolean);virtual; // procedure location_force_mmreg(list:TAsmList;var l: tlocation;size:tdef;maybeconst:boolean);virtual;abstract; { Retrieve the location of the data pointed to in location l, when the location is @@ -607,6 +606,12 @@ implementation begin result:=cg.getfpuregister(list,def_cgsize(size)); end; + + function thlcgobj.getmmregister(list: TAsmList; size: tdef): Tregister; + begin + result:=cg.getmmregister(list,def_cgsize(size)); + end; + (* function thlcgobj.getmmregister(list: TAsmList; size: tdef): Tregister; begin @@ -771,10 +776,8 @@ implementation reference_reset_base(ref,cgpara.location^.reference.index,cgpara.location^.reference.offset,cgpara.alignment); a_load_reg_ref(list,size,cgpara.def,r,ref); end; -(* LOC_MMREGISTER,LOC_CMMREGISTER: a_loadmm_intreg_reg(list,size,cgpara.def,r,cgpara.location^.register,mms_movescalar); -*) LOC_FPUREGISTER,LOC_CFPUREGISTER: begin tg.gethltemp(list,size,size.size,tt_normal,ref); @@ -942,10 +945,8 @@ implementation a_load_reg_subsetreg(list,fromsize,tosize,reg,loc.sreg); LOC_SUBSETREF,LOC_CSUBSETREF: a_load_reg_subsetref(list,fromsize,tosize,reg,loc.sref); - { we don't have enough type information to handle these here LOC_MMREGISTER,LOC_CMMREGISTER: - a_loadmm_intreg_reg(list,fromsize,loc.size,reg,loc.register,mms_movescalar); - } + a_loadmm_intreg_reg(list,fromsize,tosize,reg,loc.register,mms_movescalar); else internalerror(2010120402); end; @@ -2309,24 +2310,23 @@ implementation internalerror(2010120423); end; end; -(* - procedure thlcgobj.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister; shuffle: pmmshuffle); - begin - cg.a_loadmm_reg_reg(list,def_cgsize(fromsize),def_cgsize(tosize),reg1,reg2,shuffle); - end; - procedure thlcgobj.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tdef; const ref: treference; reg: tregister; shuffle: pmmshuffle); + procedure thlcgobj.a_loadmm_ref_ref(list: TAsmList; fromsize, tosize: tdef; const fromref, toref: treference; shuffle: pmmshuffle); + var + reg: tregister; begin - cg.a_loadmm_ref_reg(list,def_cgsize(fromsize),def_cgsize(tosize),ref,reg,shuffle); - end; - - procedure thlcgobj.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tdef; reg: tregister; const ref: treference; shuffle: pmmshuffle); - begin - cg.a_loadmm_reg_ref(list,def_cgsize(fromsize),def_cgsize(tosize),reg,ref,shuffle); + reg:=getmmregister(list,tosize); + a_loadmm_ref_reg(list,fromsize,tosize,fromref,reg,shuffle); + a_loadmm_reg_ref(list,tosize,tosize,reg,toref,shuffle); end; procedure thlcgobj.a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tdef; const loc: tlocation; const reg: tregister; shuffle: pmmshuffle); + var + tmpreg: tregister; begin + { no vector support yet } + if shuffle<>mms_movescalar then + internalerror(2012062302); case loc.loc of LOC_MMREGISTER,LOC_CMMREGISTER: a_loadmm_reg_reg(list,fromsize,tosize,loc.register,reg,shuffle); @@ -2334,6 +2334,13 @@ implementation a_loadmm_ref_reg(list,fromsize,tosize,loc.reference,reg,shuffle); LOC_REGISTER,LOC_CREGISTER: a_loadmm_intreg_reg(list,fromsize,tosize,loc.register,reg,shuffle); + LOC_SUBSETREG,LOC_CSUBSETREG, + LOC_SUBSETREF,LOC_CSUBSETREF: + begin + tmpreg:=getintregister(list,fromsize); + a_load_loc_reg(list,fromsize,fromsize,loc,tmpreg); + a_loadmm_intreg_reg(list,fromsize,tosize,tmpreg,reg,shuffle); + end else internalerror(2010120414); end; @@ -2341,6 +2348,9 @@ implementation procedure thlcgobj.a_loadmm_reg_loc(list: TAsmList; fromsize, tosize: tdef; const reg: tregister; const loc: tlocation; shuffle: pmmshuffle); begin + { no vector support yet } + if shuffle<>mms_movescalar then + internalerror(2012062303); case loc.loc of LOC_MMREGISTER,LOC_CMMREGISTER: a_loadmm_reg_reg(list,fromsize,tosize,reg,loc.register,shuffle); @@ -2355,6 +2365,9 @@ implementation var href : treference; begin + { no vector support yet } + if shuffle<>mms_movescalar then + internalerror(2012062304); cgpara.check_simple_location; paramanager.alloccgpara(list,cgpara); case cgpara.location^.loc of @@ -2369,11 +2382,11 @@ implementation begin if assigned(shuffle) and not shufflescalar(shuffle) then - internalerror(2009112510); - a_loadmm_reg_intreg(list,deomsize,cgpara.def,reg,cgpara.location^.register,mms_movescalar); + internalerror(2012071205); + a_loadmm_reg_intreg(list,fromsize,cgpara.def,reg,cgpara.location^.register,mms_movescalar); end else - internalerror(2010120427); + internalerror(2012071204); end; end; @@ -2382,9 +2395,12 @@ implementation hr : tregister; hs : tmmshuffle; begin + { no vector support yet } + if shuffle<>mms_movescalar then + internalerror(2012062308); cgpara.check_simple_location; - hr:=cg.getmmregister(list,cgpara.size); - a_loadmm_ref_reg(list,deomsize,cgpara.def,ref,hr,shuffle); + hr:=getmmregister(list,cgpara.def); + a_loadmm_ref_reg(list,fromsize,cgpara.def,ref,hr,shuffle); if realshuffle(shuffle) then begin hs:=shuffle^; @@ -2399,31 +2415,68 @@ implementation begin {$ifdef extdebug} if def_cgsize(fromsize)<>loc.size then - internalerror(2010112105); + internalerror(2012071203); {$endif} - cg.a_loadmm_loc_cgpara(list,loc,cgpara,shuffle); - end; - - procedure thlcgobj.a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tdef; src, dst: tregister; shuffle: pmmshuffle); - begin - cg.a_opmm_reg_reg(list,op,def_cgsize(size),src,dst,shuffle); + case loc.loc of + LOC_MMREGISTER,LOC_CMMREGISTER: + a_loadmm_reg_cgpara(list,fromsize,loc.register,cgpara,shuffle); + LOC_REFERENCE,LOC_CREFERENCE: + a_loadmm_ref_cgpara(list,fromsize,loc.reference,cgpara,shuffle); + else + internalerror(2012071202); + end; end; procedure thlcgobj.a_opmm_ref_reg(list: TAsmList; Op: TOpCG; size: tdef; const ref: treference; reg: tregister; shuffle: pmmshuffle); + var + hr : tregister; + hs : tmmshuffle; begin - cg.a_opmm_ref_reg(list,op,def_cgsize(size),ref,reg,shuffle) + hr:=getmmregister(list,size); + a_loadmm_ref_reg(list,size,size,ref,hr,shuffle); + if realshuffle(shuffle) then + begin + hs:=shuffle^; + removeshuffles(hs); + a_opmm_reg_reg(list,op,size,hr,reg,@hs); + end + else + a_opmm_reg_reg(list,op,size,hr,reg,shuffle); end; procedure thlcgobj.a_opmm_loc_reg(list: TAsmList; Op: TOpCG; size: tdef; const loc: tlocation; reg: tregister; shuffle: pmmshuffle); begin - cg.a_opmm_loc_reg(list,op,def_cgsize(size),loc,reg,shuffle); + case loc.loc of + LOC_CMMREGISTER,LOC_MMREGISTER: + a_opmm_reg_reg(list,op,size,loc.register,reg,shuffle); + LOC_CREFERENCE,LOC_REFERENCE: + a_opmm_ref_reg(list,op,size,loc.reference,reg,shuffle); + else + internalerror(2012071201); + end; end; procedure thlcgobj.a_opmm_reg_ref(list: TAsmList; Op: TOpCG; size: tdef; reg: tregister; const ref: treference; shuffle: pmmshuffle); + var + hr : tregister; + hs : tmmshuffle; begin - cg.a_opmm_reg_ref(list,op,def_cgsize(size),reg,ref,shuffle); + hr:=getmmregister(list,size); + a_loadmm_ref_reg(list,size,size,ref,hr,shuffle); + if realshuffle(shuffle) then + begin + hs:=shuffle^; + removeshuffles(hs); + a_opmm_reg_reg(list,op,size,reg,hr,@hs); + a_loadmm_reg_ref(list,size,size,hr,ref,@hs); + end + else + begin + a_opmm_reg_reg(list,op,size,reg,hr,shuffle); + a_loadmm_reg_ref(list,size,size,hr,ref,shuffle); + end; end; -*) + (* procedure thlcgobj.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tdef; intreg, mmreg: tregister; shuffle: pmmshuffle); begin @@ -2801,12 +2854,9 @@ implementation procedure thlcgobj.g_concatcopy(list: TAsmList; size: tdef; const source, dest: treference); begin -{ if use_vectorfpu(size) then - a_loadmm_ref_ref() - else - } - if size.typ<>floatdef then + a_loadmm_ref_ref(list,size,size,source,dest,mms_movescalar) + else if size.typ<>floatdef then a_load_ref_ref(list,size,size,source,dest) else a_loadfpu_ref_ref(list,size,size,source,dest); @@ -3560,16 +3610,17 @@ implementation location_reset_ref(l,LOC_REFERENCE,l.size,0); l.reference:=r; end; -(* LOC_MMREGISTER, LOC_CMMREGISTER: begin + { vectors can't be represented yet using tdef } + if size.typ<>floatdef then + internalerror(2012062301); tg.gethltemp(list,size,size.size,tt_normal,r); - cg.a_loadmm_reg_ref(list,l.size,l.size,l.register,r,mms_movescalar); + a_loadmm_reg_ref(list,size,size,l.register,r,mms_movescalar); location_reset_ref(l,LOC_REFERENCE,l.size,0); l.reference:=r; end; -*) LOC_CONSTANT, LOC_REGISTER, LOC_CREGISTER, @@ -3582,7 +3633,7 @@ implementation not is_open_array(size) then forcesize:=size.size else - forcesize:=voidpointertype.size; + forcesize:=sizeof(pint); tg.gethltemp(list,size,forcesize,tt_normal,r); a_load_loc_ref(list,size,size,l,r); location_reset_ref(l,LOC_REFERENCE,l.size,0); @@ -3595,6 +3646,55 @@ implementation end; end; + procedure thlcgobj.location_force_mmregscalar(list: TAsmList; var l: tlocation; size: tdef; maybeconst: boolean); + var + reg : tregister; + href : treference; + newsize : tdef; + begin + if (l.loc<>LOC_MMREGISTER) and + ((l.loc<>LOC_CMMREGISTER) or (not maybeconst)) then + begin + { if it's in an fpu register, store to memory first } + if (l.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then + begin + tg.gethltemp(list,size,-1,tt_normal,href); + hlcg.a_loadfpu_reg_ref(list,size,size,l.register,href); + location_reset_ref(l,LOC_REFERENCE,l.size,0); + l.reference:=href; + end; + { on ARM, CFP values may be located in integer registers, + and its second_int_to_real() also uses this routine to + force integer (memory) values in an mmregister } + if (l.size in [OS_32,OS_S32]) then + begin + size:=tcgsize2orddef(l.size); + newsize:=s32floattype; + end + else if (l.size in [OS_64,OS_S64]) then + begin + size:=tcgsize2orddef(l.size); + newsize:=s64floattype; + end + else + newsize:=size; + case size.size of + 4: + newsize:=s32floattype; + 8: + newsize:=s64floattype; + else + newsize:=size; + end; + reg:=hlcg.getmmregister(list,newsize); + hlcg.a_loadmm_loc_reg(list,size,newsize,l,reg,mms_movescalar); + l.size:=def_cgsize(newsize); + location_freetemp(list,l); + location_reset(l,LOC_MMREGISTER,l.size); + l.register:=reg; + end; + end; + procedure thlcgobj.location_get_data_ref(list: TAsmList; def: tdef; const l: tlocation; var ref: treference; loadref: boolean; alignment: longint); begin case l.loc of @@ -3972,14 +4072,12 @@ implementation a_load_const_reg(TAsmList(arg),tstaticvarsym(p).vardef,0, tstaticvarsym(p).initialloc.register); end; -(* LOC_CMMREGISTER : { clear the whole register } - cg.a_opmm_reg_reg(TAsmList(arg),OP_XOR,reg_cgsize(tstaticvarsym(p).initialloc.register), + a_opmm_reg_reg(TAsmList(arg),OP_XOR,tstaticvarsym(p).vardef, tstaticvarsym(p).initialloc.register, tstaticvarsym(p).initialloc.register, nil); -*) LOC_CFPUREGISTER : begin { initialize fpu regvar by loading from memory } @@ -4312,9 +4410,10 @@ implementation end; procedure thlcgobj.gen_loadfpu_loc_cgpara(list: TAsmList; size: tdef; const l: tlocation; const cgpara: tcgpara; locintsize: longint); + var + tmploc: tlocation; begin case l.loc of -(* LOC_MMREGISTER, LOC_CMMREGISTER: case cgpara.location^.loc of @@ -4324,30 +4423,27 @@ implementation LOC_CMMREGISTER, LOC_REGISTER, LOC_CREGISTER : - cg.a_loadmm_reg_cgpara(list,locsize,l.register,cgpara,mms_movescalar); + a_loadmm_reg_cgpara(list,size,l.register,cgpara,mms_movescalar); LOC_FPUREGISTER, LOC_CFPUREGISTER: begin tmploc:=l; - location_force_fpureg(list,tmploc,false); - cg.a_loadfpu_reg_cgpara(list,tmploc.size,tmploc.register,cgpara); + location_force_fpureg(list,tmploc,size,false); + a_loadfpu_reg_cgpara(list,size,tmploc.register,cgpara); end; else internalerror(200204249); end; -*) LOC_FPUREGISTER, LOC_CFPUREGISTER: case cgpara.location^.loc of -(* LOC_MMREGISTER, LOC_CMMREGISTER: begin tmploc:=l; - location_force_mmregscalar(list,tmploc,false); - cg.a_loadmm_reg_cgpara(list,tmploc.size,tmploc.register,cgpara,mms_movescalar); + location_force_mmregscalar(list,tmploc,size,false); + a_loadmm_reg_cgpara(list,size,tmploc.register,cgpara,mms_movescalar); end; -*) { Some targets pass floats in normal registers } LOC_REGISTER, LOC_CREGISTER, @@ -4362,11 +4458,9 @@ implementation LOC_REFERENCE, LOC_CREFERENCE: case cgpara.location^.loc of -(* LOC_MMREGISTER, LOC_CMMREGISTER: - cg.a_loadmm_ref_cgpara(list,locsize,l.reference,cgpara,mms_movescalar); -*) + a_loadmm_ref_cgpara(list,size,l.reference,cgpara,mms_movescalar); { Some targets pass floats in normal registers } LOC_REGISTER, LOC_CREGISTER, @@ -4416,19 +4510,18 @@ implementation begin a_load_loc_cgpara(list,vardef,l,cgpara); end; -(* LOC_MMREGISTER, LOC_CMMREGISTER: begin - case l.size of - OS_F32, - OS_F64: - cg.a_loadmm_loc_cgpara(list,l,cgpara,mms_movescalar); - else - cg.a_loadmm_loc_cgpara(list,l,cgpara,nil); - end; + if use_vectorfpu(vardef) then + a_loadmm_loc_cgpara(list,vardef,l,cgpara,mms_movescalar) + else + { no vector support yet } + internalerror(2012071212); + { + cg.a_loadmm_loc_cgpara(list,l,cgpara,nil); + } end; -*) else internalerror(2011010213); end; diff --git a/compiler/ncgadd.pas b/compiler/ncgadd.pas index 8c56f06ef0..1340552eeb 100644 --- a/compiler/ncgadd.pas +++ b/compiler/ncgadd.pas @@ -153,7 +153,7 @@ interface if use_vectorfpu(left.resultdef) then begin tmpreg := cg.getmmregister(current_asmdata.CurrAsmList,left.location.size); - hlcg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,left.location.size,left.location.size,left.location,tmpreg,mms_movescalar); + hlcg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,left.resultdef,left.resultdef,left.location,tmpreg,mms_movescalar); location_freetemp(current_asmdata.CurrAsmList,left.location); location_reset(left.location,LOC_MMREGISTER,left.location.size); left.location.register:=tmpreg; diff --git a/compiler/ncgcnv.pas b/compiler/ncgcnv.pas index a6b7128260..a6ef003058 100644 --- a/compiler/ncgcnv.pas +++ b/compiler/ncgcnv.pas @@ -407,11 +407,12 @@ interface cg.a_loadfpu_reg_ref(current_asmdata.CurrAsmList,left.location.size,location.size,left.location.register,tr); location_reset_ref(left.location,LOC_REFERENCE,location.size,tr.alignment); left.location.reference:=tr; + left.resultdef:=resultdef; end; {$endif x86} { ARM VFP values are in integer registers when they are function results } if (left.location.loc in [LOC_REGISTER,LOC_CREGISTER]) then - location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false); + hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false); case left.location.loc of LOC_FPUREGISTER, LOC_CFPUREGISTER: @@ -427,7 +428,7 @@ interface end; LOC_MMREGISTER: begin - location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false); + hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false); location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size); cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,left.location.size,location.size,left.location.register,location.register,mms_movescalar); end @@ -442,7 +443,7 @@ interface if expectloc=LOC_MMREGISTER then begin location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size); - hlcg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,left.location.size,location.size,left.location,location.register,mms_movescalar) + hlcg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,left.resultdef,resultdef,left.location,location.register,mms_movescalar) end else begin diff --git a/compiler/ncgld.pas b/compiler/ncgld.pas index 71a5ed54d8..b2101c3a79 100644 --- a/compiler/ncgld.pas +++ b/compiler/ncgld.pas @@ -818,11 +818,12 @@ implementation releaseright:=true; location_reset_ref(right.location,LOC_REFERENCE,left.location.size,0); right.location.reference:=href; + right.resultdef:=left.resultdef; end; {$endif} - cg.a_loadmm_ref_reg(current_asmdata.CurrAsmList, - right.location.size, - left.location.size, + hlcg.a_loadmm_ref_reg(current_asmdata.CurrAsmList, + right.resultdef, + left.resultdef, right.location.reference, left.location.register,mms_movescalar); end; @@ -862,10 +863,10 @@ implementation case left.location.loc of LOC_CMMREGISTER, LOC_MMREGISTER: - cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.location.size,left.location.size,right.location.register,left.location.register,mms_movescalar); + hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.resultdef,left.resultdef,right.location.register,left.location.register,mms_movescalar); LOC_REFERENCE, LOC_CREFERENCE: - cg.a_loadmm_reg_ref(current_asmdata.CurrAsmList,right.location.size,left.location.size,right.location.register,left.location.reference,mms_movescalar); + hlcg.a_loadmm_reg_ref(current_asmdata.CurrAsmList,right.resultdef,left.resultdef,right.location.register,left.location.reference,mms_movescalar); else internalerror(2009112601); end; @@ -899,15 +900,16 @@ implementation begin { perform size conversion if needed (the mm-code cannot convert an } { extended into a double/single, since sse doesn't support extended) } - tg.gethltemp(current_asmdata.CurrAsmList,left.resultdef, left.resultdef.size,tt_normal,href); + tg.gethltemp(current_asmdata.CurrAsmList,left.resultdef,left.resultdef.size,tt_normal,href); cg.a_loadfpu_reg_ref(current_asmdata.CurrAsmList,right.location.size,left.location.size,right.location.register,href); location_reset_ref(right.location,LOC_REFERENCE,left.location.size,0); right.location.reference:=href; + right.resultdef:=left.resultdef; end; {$endif} - location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,false); - cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList, - right.location.size,left.location.size, + hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,false); + hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList, + right.resultdef,left.resultdef, right.location.register,left.location.register,mms_movescalar); end else @@ -1295,7 +1297,7 @@ implementation case hp.left.location.loc of LOC_MMREGISTER, LOC_CMMREGISTER: - cg.a_loadmm_reg_ref(current_asmdata.CurrAsmList,hp.left.location.size,hp.left.location.size, + hlcg.a_loadmm_reg_ref(current_asmdata.CurrAsmList,hp.left.resultdef,hp.left.resultdef, hp.left.location.register,href,mms_movescalar); LOC_FPUREGISTER, LOC_CFPUREGISTER : diff --git a/compiler/ncgutil.pas b/compiler/ncgutil.pas index 14bcdf10b3..f4e96485cb 100644 --- a/compiler/ncgutil.pas +++ b/compiler/ncgutil.pas @@ -61,7 +61,6 @@ interface // procedure remove_non_regvars_from_loc(const t: tlocation; var regs:Tsuperregisterset); procedure location_force_fpureg(list:TAsmList;var l: tlocation;maybeconst:boolean); - procedure location_force_mmregscalar(list:TAsmList;var l: tlocation;maybeconst:boolean); procedure location_force_mmreg(list:TAsmList;var l: tlocation;maybeconst:boolean); procedure location_allocate_register(list:TAsmList;out l: tlocation;def: tdef;constant: boolean); @@ -512,54 +511,6 @@ implementation end; - procedure location_force_mmregscalar(list:TAsmList;var l: tlocation;maybeconst:boolean); - var - reg : tregister; - href : treference; - newsize : tcgsize; - begin - if (l.loc<>LOC_MMREGISTER) and - ((l.loc<>LOC_CMMREGISTER) or (not maybeconst)) then - begin - { if it's in an fpu register, store to memory first } - if (l.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then - begin - tg.GetTemp(list,tcgsize2size[l.size],tcgsize2size[l.size],tt_normal,href); - cg.a_loadfpu_reg_ref(list,l.size,l.size,l.register,href); - location_reset_ref(l,LOC_REFERENCE,l.size,0); - l.reference:=href; - end; -{$ifndef cpu64bitalu} - if (l.loc in [LOC_REGISTER,LOC_CREGISTER]) and - (l.size in [OS_64,OS_S64]) then - begin - reg:=cg.getmmregister(list,OS_F64); - cg64.a_loadmm_intreg64_reg(list,OS_F64,l.register64,reg); - l.size:=OS_F64 - end - else -{$endif not cpu64bitalu} - begin - { on ARM, CFP values may be located in integer registers, - and its second_int_to_real() also uses this routine to - force integer (memory) values in an mmregister } - if (l.size in [OS_32,OS_S32]) then - newsize:=OS_F32 - else if (l.size in [OS_64,OS_S64]) then - newsize:=OS_F64 - else - newsize:=l.size; - reg:=cg.getmmregister(list,newsize); - hlcg.a_loadmm_loc_reg(list,l.size,newsize,l,reg,mms_movescalar); - l.size:=newsize; - end; - location_freetemp(list,l); - location_reset(l,LOC_MMREGISTER,l.size); - l.register:=reg; - end; - end; - - procedure register_maybe_adjust_setbase(list: TAsmList; var l: tlocation; setbase: aint); var tmpreg: tregister; @@ -594,7 +545,7 @@ implementation ((l.loc<>LOC_CMMREGISTER) or (not maybeconst)) then begin reg:=cg.getmmregister(list,OS_VECTOR); - hlcg.a_loadmm_loc_reg(list,l.size,OS_VECTOR,l,reg,nil); + cg.a_loadmm_loc_reg(list,OS_VECTOR,l,reg,nil); location_freetemp(list,l); location_reset(l,LOC_MMREGISTER,OS_VECTOR); l.register:=reg; diff --git a/compiler/x86/nx86add.pas b/compiler/x86/nx86add.pas index bc92d9807e..042844b750 100644 --- a/compiler/x86/nx86add.pas +++ b/compiler/x86/nx86add.pas @@ -726,8 +726,8 @@ unit nx86add; if nf_swapped in flags then swapleftright; - location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false); - location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,true); + hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false); + hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true); location:=left.location; if is_double(resultdef) then begin @@ -781,7 +781,7 @@ unit nx86add; if (nf_swapped in flags) then swapleftright; - location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false); + hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false); location.register:=left.location.register; { force floating point reg. location to be written to memory, we don't force it to mm register because writing to memory @@ -836,7 +836,7 @@ unit nx86add; end else begin - location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false); + hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false); { force floating point reg. location to be written to memory, we don't force it to mm register because writing to memory allows probably shorter code because there is no direct fpu->mm register diff --git a/compiler/x86/nx86inl.pas b/compiler/x86/nx86inl.pas index e07dda9936..23de7eea8f 100644 --- a/compiler/x86/nx86inl.pas +++ b/compiler/x86/nx86inl.pas @@ -259,7 +259,7 @@ implementation if use_vectorfpu(resultdef) then begin secondpass(left); - location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false); + hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false); location:=left.location; case tfloatdef(resultdef).floattype of s32real: @@ -286,7 +286,7 @@ implementation if use_vectorfpu(left.resultdef) then begin secondpass(left); - location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false); + hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false); location_reset(location,LOC_REGISTER,OS_S64); location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64); case left.location.size of @@ -320,7 +320,7 @@ implementation not((left.location.loc=LOC_FPUREGISTER) and (current_settings.fputype>=fpu_sse3)) then begin secondpass(left); - location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false); + hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false); location_reset(location,LOC_REGISTER,OS_S64); location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64); case left.location.size of @@ -371,7 +371,7 @@ implementation if use_vectorfpu(resultdef) then begin secondpass(left); - location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false); + hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false); location:=left.location; cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,left.location,left.location.register,mms_movescalar); end @@ -388,7 +388,7 @@ implementation if use_vectorfpu(resultdef) then begin secondpass(left); - location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false); + hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false); location:=left.location; case tfloatdef(resultdef).floattype of s32real: diff --git a/compiler/x86/nx86mat.pas b/compiler/x86/nx86mat.pas index 90b133f587..0e2f651c27 100644 --- a/compiler/x86/nx86mat.pas +++ b/compiler/x86/nx86mat.pas @@ -154,7 +154,7 @@ interface if expectloc=LOC_MMREGISTER then begin - location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false); + hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false); location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef)); { make life of register allocator easier }