From d7fe9914a79453141c419fdfa3e9bac379868fcc Mon Sep 17 00:00:00 2001 From: florian Date: Sat, 12 Sep 2020 21:23:57 +0000 Subject: [PATCH] + introduce tcgobj.a_loadfpu_reg_intreg + make use of it in tcgtypeconvnode.second_nothing + Xtensa: implementation of a_tcgcpu.a_loadfpu_intreg_reg and tcgcpu.a_loadfpu_reg_intreg git-svn-id: trunk@46858 - --- compiler/cgobj.pas | 20 +++++++++++++++++++- compiler/fpcdefs.inc | 1 + compiler/hlcg2ll.pas | 14 +------------- compiler/ncgcnv.pas | 15 +++++++++++++-- compiler/xtensa/cgcpu.pas | 21 +++++++++++++++++++++ 5 files changed, 55 insertions(+), 16 deletions(-) diff --git a/compiler/cgobj.pas b/compiler/cgobj.pas index af34b49e03..7f5e287c40 100644 --- a/compiler/cgobj.pas +++ b/compiler/cgobj.pas @@ -282,6 +282,7 @@ unit cgobj; procedure a_loadfpu_ref_cgpara(list : TAsmList;size : tcgsize;const ref : treference;const cgpara : TCGPara);virtual; procedure a_loadfpu_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize; intreg, fpureg: tregister); virtual; + procedure a_loadfpu_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; fpureg, intreg: tregister); virtual; { vector register move instructions } procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); virtual; @@ -1437,7 +1438,9 @@ implementation begin case getregtype(reg) of R_FPUREGISTER: - a_loadfpu_reg_reg(list,paraloc.size,regsize,paraloc.register,reg) + a_loadfpu_reg_reg(list,paraloc.size,regsize,paraloc.register,reg); + R_INTREGISTER: + a_loadfpu_reg_intreg(list,paraloc.size,regsize,paraloc.register,reg); else internalerror(2015031401); end; @@ -1974,6 +1977,21 @@ implementation end; + procedure tcg.a_loadfpu_reg_intreg(list : TAsmList; fromsize,tosize : tcgsize; fpureg,intreg : tregister); + var + tmpref: treference; + begin + if not(tcgsize2size[fromsize] in [4,8]) or + not(tcgsize2size[tosize] in [4,8]) or + (tcgsize2size[fromsize]<>tcgsize2size[tosize]) then + internalerror(2020091201); + tg.gettemp(list,tcgsize2size[fromsize],tcgsize2size[fromsize],tt_normal,tmpref); + a_loadfpu_reg_ref(list,fromsize,fromsize,fpureg,tmpref); + a_load_ref_reg(list,tosize,tosize,tmpref,intreg); + tg.ungettemp(list,tmpref); + end; + + procedure tcg.a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference); var tmpreg : tregister; diff --git a/compiler/fpcdefs.inc b/compiler/fpcdefs.inc index b448022c24..e201b2d6d1 100644 --- a/compiler/fpcdefs.inc +++ b/compiler/fpcdefs.inc @@ -341,6 +341,7 @@ {$define cpuneedsdivhelper} {$define cpucapabilities} {$define cpurequiresproperalignment} + {$define cpufloatintregmov} {$endif xtensa} { Stabs is not officially supported on 64 bit targets by gdb, except on Mac OS X diff --git a/compiler/hlcg2ll.pas b/compiler/hlcg2ll.pas index 8dc8b93588..a6c18e98c8 100644 --- a/compiler/hlcg2ll.pas +++ b/compiler/hlcg2ll.pas @@ -2019,19 +2019,7 @@ implementation begin unget_para(paraloc^); gen_alloc_regloc(list,destloc,vardef); - { we can't directly move regular registers into fpu - registers } - if getregtype(paraloc^.register)=R_FPUREGISTER then - begin - { store everything first to memory, then load it in - destloc } - tg.gettemp(list,tcgsize2size[paraloc^.size],para.intsize,tt_persistent,tempref); - cg.a_load_cgparaloc_ref(list,paraloc^,tempref,tcgsize2size[paraloc^.size],tempref.alignment); - cg.a_load_ref_reg(list,int_cgsize(tcgsize2size[paraloc^.size]),destloc.size,tempref,destloc.register); - tg.ungettemp(list,tempref); - end - else - cg.a_load_cgparaloc_anyreg(list,destloc.size,paraloc^,destloc.register,sizeof(aint)); + cg.a_load_cgparaloc_anyreg(list,destloc.size,paraloc^,destloc.register,sizeof(aint)); end; end; end; diff --git a/compiler/ncgcnv.pas b/compiler/ncgcnv.pas index 98b2f7dad8..2a232eeedf 100644 --- a/compiler/ncgcnv.pas +++ b/compiler/ncgcnv.pas @@ -806,8 +806,19 @@ interface (location.loc=LOC_CONSTANT) ) or ((resultdef.typ=floatdef) xor (location.loc in [LOC_CFPUREGISTER,LOC_FPUREGISTER,LOC_CMMREGISTER,LOC_MMREGISTER])) then - hlcg.location_force_mem(current_asmdata.CurrAsmList,location,left.resultdef); - + begin + { check if the CPU supports direct moves between int and fpu registers and take advantage of it } +{$ifdef cpufloatintregmov} + if (resultdef.typ<>floatdef) and (location.loc in [LOC_CFPUREGISTER,LOC_FPUREGISTER]) then + begin + location_reset(location,LOC_REGISTER,def_cgsize(resultdef)); + location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size); + cg.a_loadfpu_reg_intreg(current_asmdata.CurrAsmList,left.location.size,location.size,left.location.register,location.register); + end + else +{$endif cpufloatintregmov} + hlcg.location_force_mem(current_asmdata.CurrAsmList,location,left.resultdef); + end; { but use the new size, but we don't know the size of all arrays } newsize:=def_cgsize(resultdef); location.size:=newsize; diff --git a/compiler/xtensa/cgcpu.pas b/compiler/xtensa/cgcpu.pas index 6bc71bca57..2c1f2d5ea4 100644 --- a/compiler/xtensa/cgcpu.pas +++ b/compiler/xtensa/cgcpu.pas @@ -77,6 +77,9 @@ interface procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);override; procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);override; + procedure a_loadfpu_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, fpureg: tregister);override; + procedure a_loadfpu_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; fpureg, intreg: tregister);override; + procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister); procedure g_overflowcheck(list: TAsmList; const Loc:tlocation; def:tdef);override; @@ -1196,6 +1199,24 @@ implementation end; + procedure tcgcpu.a_loadfpu_intreg_reg(list : TAsmList; fromsize,tosize : tcgsize; intreg,fpureg : tregister); + begin + if not(tcgsize2size[fromsize]=4) or + not(tcgsize2size[tosize]=4) then + internalerror(2020091102); + list.concat(taicpu.op_reg_reg(A_WFR,fpureg,intreg)); + end; + + + procedure tcgcpu.a_loadfpu_reg_intreg(list : TAsmList; fromsize,tosize : tcgsize; fpureg,intreg : tregister); + begin + if not(tcgsize2size[fromsize]=4) or + not(tcgsize2size[tosize]=4) then + internalerror(2020091202); + list.concat(taicpu.op_reg_reg(A_RFR,intreg,fpureg)); + end; + + procedure tcgcpu.maybeadjustresult(list : TAsmList; op : TOpCg; size : tcgsize; dst : tregister); const overflowops = [OP_MUL,OP_SHL,OP_ADD,OP_SUB,OP_NEG];