Consolidate do_spill_read/do_spill_written on arm

ARM can not reference an arbitrary offset so it needs some special
handling if the offset goes beyond abs(4095).

The code for do_spill_read and do_spill written used to be very similar.
I've partially factored out the code into spilling_create_load_store.

The former code loaded the offset from a constant pool, which is a waste
of memory-bandwidth and cache lines. The new code tries to find a way to
adjust the baseregister so the memory location can be reached more
easily, this allows us to handle at least +-1MB with just a single
additional ADD or SUB instruction. If that fails we'll resort to the normal
constant loading code, which on it's own will fallback to loading the
constant from a constant-pool.

So instead of:
ldr r1, =16388
ldr r0, [r13, r1]

which will at least uses 4 cycles (2 Instruction cycles + 2 stall
cycles) on most cores.

We try to generate:
add r1, r13, #16384
ldr r0, [r1, #4]

which most armv5+ cores will execute in 2 cycles. We'll also save on
DCache usage.

git-svn-id: trunk@21889 -
This commit is contained in:
masta 2012-07-12 01:11:23 +00:00
parent bf8ec92b5c
commit e2a744e19b

View File

@ -35,6 +35,9 @@ unit rgcpu;
type
trgcpu = class(trgobj)
private
procedure spilling_create_load_store(list: TAsmList; pos: tai; const spilltemp:treference;tempreg:tregister; is_store: boolean);
public
procedure do_spill_read(list:TAsmList;pos:tai;const spilltemp:treference;tempreg:tregister);override;
procedure do_spill_written(list:TAsmList;pos:tai;const spilltemp:treference;tempreg:tregister);override;
procedure add_constraints(reg:tregister);override;
@ -122,13 +125,70 @@ unit rgcpu;
end;
end;
procedure trgcpu.do_spill_read(list:TAsmList;pos:tai;const spilltemp:treference;tempreg:tregister);
procedure trgcpu.spilling_create_load_store(list: TAsmList; pos: tai; const spilltemp:treference;tempreg:tregister; is_store: boolean);
var
tmpref : treference;
helplist : TAsmList;
l : tasmlabel;
hreg : tregister;
immshift: byte;
a: aint;
begin
helplist:=TAsmList.create;
{ load consts entry }
if getregtype(tempreg)=R_INTREGISTER then
hreg:=getregisterinline(helplist,[R_SUBWHOLE])
else
hreg:=cg.getintregister(helplist,OS_ADDR);
{ Lets remove the bits we can fold in later and check if the result can be easily with an add or sub }
a:=abs(spilltemp.offset);
if is_shifter_const(a and not($FFF), immshift) then
if spilltemp.offset > 0 then
begin
{$ifdef DEBUG_SPILLING}
helplist.concat(tai_comment.create(strpnew('Spilling: Use ADD to fix spill offset')));
{$endif}
helplist.concat(taicpu.op_reg_reg_const(A_ADD, hreg, current_procinfo.framepointer,
a and not($FFF)));
reference_reset_base(tmpref, hreg, a and $FFF, sizeof(aint));
end
else
begin
{$ifdef DEBUG_SPILLING}
helplist.concat(tai_comment.create(strpnew('Spilling: Use SUB to fix spill offset')));
{$endif}
helplist.concat(taicpu.op_reg_reg_const(A_SUB, hreg, current_procinfo.framepointer,
a and not($FFF)));
reference_reset_base(tmpref, hreg, -(a and $FFF), sizeof(aint));
end
else
begin
{$ifdef DEBUG_SPILLING}
helplist.concat(tai_comment.create(strpnew('Spilling: Use a_load_const_reg to fix spill offset')));
{$endif}
cg.a_load_const_reg(helplist,OS_ADDR,spilltemp.offset,hreg);
reference_reset_base(tmpref,current_procinfo.framepointer,0,sizeof(aint));
tmpref.index:=hreg;
end;
if spilltemp.index<>NR_NO then
internalerror(200401263);
if is_store then
helplist.concat(spilling_create_store(tempreg,tmpref))
else
helplist.concat(spilling_create_load(tmpref,tempreg));
if getregtype(tempreg)=R_INTREGISTER then
ungetregisterinline(helplist,hreg);
list.insertlistafter(pos,helplist);
helplist.free;
end;
procedure trgcpu.do_spill_read(list:TAsmList;pos:tai;const spilltemp:treference;tempreg:tregister);
begin
{ don't load spilled register between
mov lr,pc
@ -145,85 +205,16 @@ unit rgcpu;
pos:=tai(pos.previous);
if abs(spilltemp.offset)>4095 then
begin
helplist:=TAsmList.create;
reference_reset(tmpref,sizeof(aint));
{ create consts entry }
current_asmdata.getjumplabel(l);
cg.a_label(current_procinfo.aktlocaldata,l);
tmpref.symboldata:=current_procinfo.aktlocaldata.last;
current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(spilltemp.offset));
{ load consts entry }
if getregtype(tempreg)=R_INTREGISTER then
hreg:=getregisterinline(helplist,[R_SUBWHOLE])
else
hreg:=cg.getintregister(helplist,OS_ADDR);
tmpref.symbol:=l;
tmpref.base:=NR_R15;
helplist.concat(taicpu.op_reg_ref(A_LDR,hreg,tmpref));
reference_reset_base(tmpref,current_procinfo.framepointer,0,sizeof(aint));
tmpref.index:=hreg;
if spilltemp.index<>NR_NO then
internalerror(200401263);
helplist.concat(spilling_create_load(tmpref,tempreg));
if getregtype(tempreg)=R_INTREGISTER then
ungetregisterinline(helplist,hreg);
list.insertlistafter(pos,helplist);
helplist.free;
end
spilling_create_load_store(list, pos, spilltemp, tempreg, false)
else
inherited do_spill_read(list,pos,spilltemp,tempreg);
end;
procedure trgcpu.do_spill_written(list:TAsmList;pos:tai;const spilltemp:treference;tempreg:tregister);
var
tmpref : treference;
helplist : TAsmList;
l : tasmlabel;
hreg : tregister;
begin
if abs(spilltemp.offset)>4095 then
begin
helplist:=TAsmList.create;
reference_reset(tmpref,sizeof(aint));
{ create consts entry }
current_asmdata.getjumplabel(l);
cg.a_label(current_procinfo.aktlocaldata,l);
tmpref.symboldata:=current_procinfo.aktlocaldata.last;
current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(spilltemp.offset));
{ load consts entry }
if getregtype(tempreg)=R_INTREGISTER then
hreg:=getregisterinline(helplist,[R_SUBWHOLE])
else
hreg:=cg.getintregister(helplist,OS_ADDR);
tmpref.symbol:=l;
tmpref.base:=NR_R15;
helplist.concat(taicpu.op_reg_ref(A_LDR,hreg,tmpref));
if spilltemp.index<>NR_NO then
internalerror(200401263);
reference_reset_base(tmpref,current_procinfo.framepointer,0,sizeof(pint));
tmpref.index:=hreg;
helplist.concat(spilling_create_store(tempreg,tmpref));
if getregtype(tempreg)=R_INTREGISTER then
ungetregisterinline(helplist,hreg);
list.insertlistafter(pos,helplist);
helplist.free;
end
spilling_create_load_store(list, pos, spilltemp, tempreg, true)
else
inherited do_spill_written(list,pos,spilltemp,tempreg);
end;