From d94460e3b3ca9a4f7448bd9e6f9f262d83d2a3f1 Mon Sep 17 00:00:00 2001 From: sergei Date: Mon, 26 Sep 2011 12:58:59 +0000 Subject: [PATCH] * Made the x86-specific scalefactor optimization of tvecnode available on x86_64 as well, by moving update_reference_reg_mul method from ti386vecnode to newly introduced tx86vecnode. git-svn-id: trunk@19245 - --- .gitattributes | 1 + compiler/i386/n386mem.pas | 64 +-------------------- compiler/x86/nx86mem.pas | 108 ++++++++++++++++++++++++++++++++++++ compiler/x86_64/cpunode.pas | 1 + 4 files changed, 112 insertions(+), 62 deletions(-) create mode 100644 compiler/x86/nx86mem.pas diff --git a/.gitattributes b/.gitattributes index 8a5fcb429d..b36c797f29 100644 --- a/.gitattributes +++ b/.gitattributes @@ -627,6 +627,7 @@ compiler/x86/nx86cnv.pas svneol=native#text/plain compiler/x86/nx86con.pas svneol=native#text/plain compiler/x86/nx86inl.pas svneol=native#text/plain compiler/x86/nx86mat.pas svneol=native#text/plain +compiler/x86/nx86mem.pas svneol=native#text/plain compiler/x86/nx86set.pas svneol=native#text/plain compiler/x86/rax86.pas svneol=native#text/plain compiler/x86/rax86att.pas svneol=native#text/plain diff --git a/compiler/i386/n386mem.pas b/compiler/i386/n386mem.pas index a2da5ff298..fc9fb07853 100644 --- a/compiler/i386/n386mem.pas +++ b/compiler/i386/n386mem.pas @@ -28,7 +28,7 @@ interface uses globtype, cgbase,cpuinfo,cpubase, - node,nmem,ncgmem; + node,nmem,ncgmem,nx86mem; type ti386addrnode = class(tcgaddrnode) @@ -39,8 +39,7 @@ interface procedure pass_generate_code;override; end; - ti386vecnode = class(tcgvecnode) - procedure update_reference_reg_mul(maybe_const_reg:tregister;l:aint);override; + ti386vecnode = class(tx86vecnode) procedure pass_generate_code;override; end; @@ -84,65 +83,6 @@ implementation TI386VECNODE *****************************************************************************} - { this routine must, like any other routine, not change the contents } - { of base/index registers of references, as these may be regvars. } - { The register allocator can coalesce one LOC_REGISTER being moved } - { into another (as their live ranges won't overlap), but not a } - { LOC_CREGISTER moved into a LOC_(C)REGISTER most of the time (as } - { the live range of the LOC_CREGISTER will most likely overlap the } - { the live range of the target LOC_(C)REGISTER) } - { The passed register may be a LOC_CREGISTER as well. } - procedure ti386vecnode.update_reference_reg_mul(maybe_const_reg:tregister;l:aint); - var - l2 : integer; - hreg : tregister; - begin - { Optimized for x86 to use the index register and scalefactor } - if location.reference.index=NR_NO then - begin - { no preparations needed } - end - else if location.reference.base=NR_NO then - begin - if (location.reference.scalefactor > 1) then - hreg:=cg.getaddressregister(current_asmdata.CurrAsmList); - case location.reference.scalefactor of - 0,1 : hreg:=location.reference.index; - 2 : cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHL,OS_ADDR,1,location.reference.index,hreg); - 4 : cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHL,OS_ADDR,2,location.reference.index,hreg); - 8 : cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHL,OS_ADDR,3,location.reference.index,hreg); - else - internalerror(2008091401); - end; - location.reference.base:=hreg; - end - else - begin - hreg:=cg.getaddressregister(current_asmdata.CurrAsmList); - cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,location.reference,hreg); - reference_reset_base(location.reference,hreg,0,location.reference.alignment); - end; - { insert the new index register and scalefactor or - do the multiplication manual } - case l of - 1,2,4,8 : - begin - location.reference.scalefactor:=l; - hreg:=maybe_const_reg; - end; - else - begin - hreg:=cg.getaddressregister(current_asmdata.CurrAsmList); - if ispowerof2(l,l2) then - cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHL,OS_ADDR,l2,maybe_const_reg,hreg) - else - cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_IMUL,OS_ADDR,l,maybe_const_reg,hreg); - end; - end; - location.reference.index:=hreg; - end; - - procedure ti386vecnode.pass_generate_code; begin inherited pass_generate_code; diff --git a/compiler/x86/nx86mem.pas b/compiler/x86/nx86mem.pas new file mode 100644 index 0000000000..d29bbb62b0 --- /dev/null +++ b/compiler/x86/nx86mem.pas @@ -0,0 +1,108 @@ +{ + Copyright (c) 1998-2002 by Florian Klaempfl + + Generate x86 assembler for in memory related nodes + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + **************************************************************************** +} +unit nx86mem; + +{$i fpcdefs.inc} + +interface + uses + globtype, + cgbase,cpuinfo,cpubase, + node,nmem,ncgmem; + + type + tx86vecnode = class(tcgvecnode) + procedure update_reference_reg_mul(maybe_const_reg:tregister;l:aint);override; + end; + +implementation + + uses + cutils,verbose, + aasmtai,aasmdata, + cgutils,cgobj; + +{***************************************************************************** + TX86VECNODE +*****************************************************************************} + + { this routine must, like any other routine, not change the contents } + { of base/index registers of references, as these may be regvars. } + { The register allocator can coalesce one LOC_REGISTER being moved } + { into another (as their live ranges won't overlap), but not a } + { LOC_CREGISTER moved into a LOC_(C)REGISTER most of the time (as } + { the live range of the LOC_CREGISTER will most likely overlap the } + { the live range of the target LOC_(C)REGISTER) } + { The passed register may be a LOC_CREGISTER as well. } + procedure tx86vecnode.update_reference_reg_mul(maybe_const_reg:tregister;l:aint); + var + l2 : integer; + hreg : tregister; + begin + { Optimized for x86 to use the index register and scalefactor } + if location.reference.index=NR_NO then + begin + { no preparations needed } + end + else if location.reference.base=NR_NO then + begin + if (location.reference.scalefactor > 1) then + hreg:=cg.getaddressregister(current_asmdata.CurrAsmList); + case location.reference.scalefactor of + 0,1 : hreg:=location.reference.index; + 2 : cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHL,OS_ADDR,1,location.reference.index,hreg); + 4 : cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHL,OS_ADDR,2,location.reference.index,hreg); + 8 : cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHL,OS_ADDR,3,location.reference.index,hreg); + else + internalerror(2008091401); + end; + location.reference.base:=hreg; + end + else + begin + hreg:=cg.getaddressregister(current_asmdata.CurrAsmList); + cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,location.reference,hreg); + reference_reset_base(location.reference,hreg,0,location.reference.alignment); + end; + { insert the new index register and scalefactor or + do the multiplication manual } + case l of + 1,2,4,8 : + begin + location.reference.scalefactor:=l; + hreg:=maybe_const_reg; + end; + else + begin + hreg:=cg.getaddressregister(current_asmdata.CurrAsmList); + if ispowerof2(l,l2) then + cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHL,OS_ADDR,l2,maybe_const_reg,hreg) + else + cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_IMUL,OS_ADDR,l,maybe_const_reg,hreg); + end; + end; + location.reference.index:=hreg; + end; + +begin + cvecnode:=tx86vecnode; +end. diff --git a/compiler/x86_64/cpunode.pas b/compiler/x86_64/cpunode.pas index fcd2d07314..8a0199654e 100644 --- a/compiler/x86_64/cpunode.pas +++ b/compiler/x86_64/cpunode.pas @@ -49,6 +49,7 @@ unit cpunode; get the correct class pointer } nx86set, nx86con, + nx86mem, nx64add, nx64cal, nx64cnv,