* Made the x86-specific scalefactor optimization of tvecnode available on x86_64 as well, by moving update_reference_reg_mul method from ti386vecnode to newly introduced tx86vecnode.

git-svn-id: trunk@19245 -
This commit is contained in:
sergei 2011-09-26 12:58:59 +00:00
parent e2357b5809
commit d94460e3b3
4 changed files with 112 additions and 62 deletions

1
.gitattributes vendored
View File

@ -627,6 +627,7 @@ compiler/x86/nx86cnv.pas svneol=native#text/plain
compiler/x86/nx86con.pas svneol=native#text/plain
compiler/x86/nx86inl.pas svneol=native#text/plain
compiler/x86/nx86mat.pas svneol=native#text/plain
compiler/x86/nx86mem.pas svneol=native#text/plain
compiler/x86/nx86set.pas svneol=native#text/plain
compiler/x86/rax86.pas svneol=native#text/plain
compiler/x86/rax86att.pas svneol=native#text/plain

View File

@ -28,7 +28,7 @@ interface
uses
globtype,
cgbase,cpuinfo,cpubase,
node,nmem,ncgmem;
node,nmem,ncgmem,nx86mem;
type
ti386addrnode = class(tcgaddrnode)
@ -39,8 +39,7 @@ interface
procedure pass_generate_code;override;
end;
ti386vecnode = class(tcgvecnode)
procedure update_reference_reg_mul(maybe_const_reg:tregister;l:aint);override;
ti386vecnode = class(tx86vecnode)
procedure pass_generate_code;override;
end;
@ -84,65 +83,6 @@ implementation
TI386VECNODE
*****************************************************************************}
{ this routine must, like any other routine, not change the contents }
{ of base/index registers of references, as these may be regvars. }
{ The register allocator can coalesce one LOC_REGISTER being moved }
{ into another (as their live ranges won't overlap), but not a }
{ LOC_CREGISTER moved into a LOC_(C)REGISTER most of the time (as }
{ the live range of the LOC_CREGISTER will most likely overlap the }
{ the live range of the target LOC_(C)REGISTER) }
{ The passed register may be a LOC_CREGISTER as well. }
procedure ti386vecnode.update_reference_reg_mul(maybe_const_reg:tregister;l:aint);
var
l2 : integer;
hreg : tregister;
begin
{ Optimized for x86 to use the index register and scalefactor }
if location.reference.index=NR_NO then
begin
{ no preparations needed }
end
else if location.reference.base=NR_NO then
begin
if (location.reference.scalefactor > 1) then
hreg:=cg.getaddressregister(current_asmdata.CurrAsmList);
case location.reference.scalefactor of
0,1 : hreg:=location.reference.index;
2 : cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHL,OS_ADDR,1,location.reference.index,hreg);
4 : cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHL,OS_ADDR,2,location.reference.index,hreg);
8 : cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHL,OS_ADDR,3,location.reference.index,hreg);
else
internalerror(2008091401);
end;
location.reference.base:=hreg;
end
else
begin
hreg:=cg.getaddressregister(current_asmdata.CurrAsmList);
cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,location.reference,hreg);
reference_reset_base(location.reference,hreg,0,location.reference.alignment);
end;
{ insert the new index register and scalefactor or
do the multiplication manual }
case l of
1,2,4,8 :
begin
location.reference.scalefactor:=l;
hreg:=maybe_const_reg;
end;
else
begin
hreg:=cg.getaddressregister(current_asmdata.CurrAsmList);
if ispowerof2(l,l2) then
cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHL,OS_ADDR,l2,maybe_const_reg,hreg)
else
cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_IMUL,OS_ADDR,l,maybe_const_reg,hreg);
end;
end;
location.reference.index:=hreg;
end;
procedure ti386vecnode.pass_generate_code;
begin
inherited pass_generate_code;

108
compiler/x86/nx86mem.pas Normal file
View File

@ -0,0 +1,108 @@
{
Copyright (c) 1998-2002 by Florian Klaempfl
Generate x86 assembler for in memory related nodes
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
****************************************************************************
}
unit nx86mem;
{$i fpcdefs.inc}
interface
uses
globtype,
cgbase,cpuinfo,cpubase,
node,nmem,ncgmem;
type
tx86vecnode = class(tcgvecnode)
procedure update_reference_reg_mul(maybe_const_reg:tregister;l:aint);override;
end;
implementation
uses
cutils,verbose,
aasmtai,aasmdata,
cgutils,cgobj;
{*****************************************************************************
TX86VECNODE
*****************************************************************************}
{ this routine must, like any other routine, not change the contents }
{ of base/index registers of references, as these may be regvars. }
{ The register allocator can coalesce one LOC_REGISTER being moved }
{ into another (as their live ranges won't overlap), but not a }
{ LOC_CREGISTER moved into a LOC_(C)REGISTER most of the time (as }
{ the live range of the LOC_CREGISTER will most likely overlap the }
{ the live range of the target LOC_(C)REGISTER) }
{ The passed register may be a LOC_CREGISTER as well. }
procedure tx86vecnode.update_reference_reg_mul(maybe_const_reg:tregister;l:aint);
var
l2 : integer;
hreg : tregister;
begin
{ Optimized for x86 to use the index register and scalefactor }
if location.reference.index=NR_NO then
begin
{ no preparations needed }
end
else if location.reference.base=NR_NO then
begin
if (location.reference.scalefactor > 1) then
hreg:=cg.getaddressregister(current_asmdata.CurrAsmList);
case location.reference.scalefactor of
0,1 : hreg:=location.reference.index;
2 : cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHL,OS_ADDR,1,location.reference.index,hreg);
4 : cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHL,OS_ADDR,2,location.reference.index,hreg);
8 : cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHL,OS_ADDR,3,location.reference.index,hreg);
else
internalerror(2008091401);
end;
location.reference.base:=hreg;
end
else
begin
hreg:=cg.getaddressregister(current_asmdata.CurrAsmList);
cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,location.reference,hreg);
reference_reset_base(location.reference,hreg,0,location.reference.alignment);
end;
{ insert the new index register and scalefactor or
do the multiplication manual }
case l of
1,2,4,8 :
begin
location.reference.scalefactor:=l;
hreg:=maybe_const_reg;
end;
else
begin
hreg:=cg.getaddressregister(current_asmdata.CurrAsmList);
if ispowerof2(l,l2) then
cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHL,OS_ADDR,l2,maybe_const_reg,hreg)
else
cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_IMUL,OS_ADDR,l,maybe_const_reg,hreg);
end;
end;
location.reference.index:=hreg;
end;
begin
cvecnode:=tx86vecnode;
end.

View File

@ -49,6 +49,7 @@ unit cpunode;
get the correct class pointer }
nx86set,
nx86con,
nx86mem,
nx64add,
nx64cal,
nx64cnv,