* Implemented stackframe optimization for ARM CPU.

git-svn-id: trunk@5374 -
This commit is contained in:
yury 2006-11-14 16:18:49 +00:00
parent c16249f103
commit 8abe2d365e
4 changed files with 75 additions and 26 deletions

View File

@ -1156,7 +1156,8 @@ unit cgcpu;
firstfloatreg,lastfloatreg,
r : byte;
i : aint;
again : tasmlabel;
again : tasmlabel;
regs : tcpuregisterset;
begin
LocalSize:=align(LocalSize,4);
if not(nostackframe) then
@ -1171,19 +1172,28 @@ unit cgcpu;
lastfloatreg:=r;
end;
a_reg_alloc(list,NR_STACK_POINTER_REG);
a_reg_alloc(list,NR_FRAME_POINTER_REG);
a_reg_alloc(list,NR_R12);
if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
begin
a_reg_alloc(list,NR_FRAME_POINTER_REG);
a_reg_alloc(list,NR_R12);
list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
end;
{ save int registers }
reference_reset(ref);
ref.index:=NR_STACK_POINTER_REG;
ref.addressmode:=AM_PREINDEXED;
list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,
rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall)+[RS_R11,RS_R12,RS_R14,RS_R15]),
PF_FD));
regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
regs:=regs+[RS_R11,RS_R12,RS_R14,RS_R15]
else
if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
include(regs,RS_R14);
if regs<>[] then
list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,regs),PF_FD));
list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
{ allocate necessary stack size
not necessary according to Yury Sidorov
@ -1248,17 +1258,20 @@ unit cgcpu;
end
else
}
if not(is_shifter_const(localsize,shift)) then
begin
a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
a_reg_dealloc(list,NR_R12);
end
else
begin
a_reg_dealloc(list,NR_R12);
list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
end;
if LocalSize<>0 then
if not(is_shifter_const(localsize,shift)) then
begin
if current_procinfo.framepointer=NR_STACK_POINTER_REG then
a_reg_alloc(list,NR_R12);
a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
a_reg_dealloc(list,NR_R12);
end
else
begin
a_reg_dealloc(list,NR_R12);
list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
end;
if firstfloatreg<>RS_NO then
begin
@ -1287,6 +1300,8 @@ unit cgcpu;
firstfloatreg,lastfloatreg,
r : byte;
shift : byte;
regs : tcpuregisterset;
LocalSize : longint;
begin
if not(nostackframe) then
begin
@ -1320,7 +1335,37 @@ unit cgcpu;
end;
if (current_procinfo.framepointer=NR_STACK_POINTER_REG) then
list.concat(taicpu.op_reg_reg(A_MOV,NR_R15,NR_R14))
begin
LocalSize:=current_procinfo.calc_stackframe_size;
if LocalSize<>0 then
if not(is_shifter_const(LocalSize,shift)) then
begin
a_reg_alloc(list,NR_R12);
a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
a_reg_dealloc(list,NR_R12);
end
else
begin
list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
end;
regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
if (pi_do_call in current_procinfo.flags) or (regs<>[]) then
begin
exclude(regs,RS_R14);
include(regs,RS_R15);
end;
if regs=[] then
list.concat(taicpu.op_reg_reg(A_MOV,NR_R15,NR_R14))
else
begin
reference_reset(ref);
ref.index:=NR_STACK_POINTER_REG;
ref.addressmode:=AM_PREINDEXED;
list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,regs),PF_FD));
end;
end
else
begin
{ restore int registers and return }

View File

@ -88,7 +88,7 @@ Const
genericlevel3optimizerswitches-
{ no need to write info about those }
[cs_opt_level1,cs_opt_level2,cs_opt_level3]+
[cs_opt_regvar,cs_opt_loopunroll,cs_opt_tailrecursion];
[cs_opt_regvar,cs_opt_loopunroll,cs_opt_tailrecursion,cs_opt_stackframe];
level1optimizerswitches = genericlevel1optimizerswitches;
level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches + [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion];

View File

@ -62,7 +62,8 @@ unit cpupi;
is especially a problem when taking the address of a local. For now,
this extra memory should hurt less than generating all local contants with offsets
>256 as non shifter constants }
tg.setfirsttemp(-12-28);
if tg.direction = -1 then
tg.setfirsttemp(-12-28);
end;

View File

@ -745,7 +745,7 @@ implementation
{ set the start offset to the start of the temp area in the stack }
tg:=ttgobj.create;
{$ifdef x86}
{$if defined(x86) or defined(arm)}
{ try to strip the stack frame }
{ set the framepointer to esp if:
- no assembler directive, those are handled elsewhere
@ -761,7 +761,11 @@ implementation
not(po_assembler in procdef.procoptions) and
((flags*[pi_has_assembler_block,pi_uses_exceptions,pi_is_assembler,
pi_needs_implicit_finally,pi_has_implicit_finally,pi_has_stackparameter,
pi_needs_stackframe])=[]) then
pi_needs_stackframe])=[])
{$ifdef arm}
and ((cs_fp_emulation in current_settings.moduleswitches) or not (pi_uses_fpu in flags))
{$endif arm}
then
begin
{ we need the parameter info here to determine if the procedure gets
parameters on the stack
@ -777,8 +781,7 @@ implementation
tg.direction:=1;
end;
end;
{$endif x86}
{$endif}
{ Create register allocator }
cg.init_register_allocators;