* Implemented stackframe optimization for ARM CPU.

git-svn-id: trunk@5374 -
This commit is contained in:
yury 2006-11-14 16:18:49 +00:00
parent c16249f103
commit 8abe2d365e
4 changed files with 75 additions and 26 deletions

View File

@ -1156,7 +1156,8 @@ unit cgcpu;
firstfloatreg,lastfloatreg, firstfloatreg,lastfloatreg,
r : byte; r : byte;
i : aint; i : aint;
again : tasmlabel; again : tasmlabel;
regs : tcpuregisterset;
begin begin
LocalSize:=align(LocalSize,4); LocalSize:=align(LocalSize,4);
if not(nostackframe) then if not(nostackframe) then
@ -1171,19 +1172,28 @@ unit cgcpu;
lastfloatreg:=r; lastfloatreg:=r;
end; end;
a_reg_alloc(list,NR_STACK_POINTER_REG); a_reg_alloc(list,NR_STACK_POINTER_REG);
a_reg_alloc(list,NR_FRAME_POINTER_REG); if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
a_reg_alloc(list,NR_R12); begin
a_reg_alloc(list,NR_FRAME_POINTER_REG);
a_reg_alloc(list,NR_R12);
list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG)); list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG));
end;
{ save int registers } { save int registers }
reference_reset(ref); reference_reset(ref);
ref.index:=NR_STACK_POINTER_REG; ref.index:=NR_STACK_POINTER_REG;
ref.addressmode:=AM_PREINDEXED; ref.addressmode:=AM_PREINDEXED;
list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref, regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall)+[RS_R11,RS_R12,RS_R14,RS_R15]), if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
PF_FD)); regs:=regs+[RS_R11,RS_R12,RS_R14,RS_R15]
else
if (regs<>[]) or (pi_do_call in current_procinfo.flags) then
include(regs,RS_R14);
if regs<>[] then
list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,regs),PF_FD));
list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4)); if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4));
{ allocate necessary stack size { allocate necessary stack size
not necessary according to Yury Sidorov not necessary according to Yury Sidorov
@ -1248,17 +1258,20 @@ unit cgcpu;
end end
else else
} }
if not(is_shifter_const(localsize,shift)) then if LocalSize<>0 then
begin if not(is_shifter_const(localsize,shift)) then
a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12); begin
list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12)); if current_procinfo.framepointer=NR_STACK_POINTER_REG then
a_reg_dealloc(list,NR_R12); a_reg_alloc(list,NR_R12);
end a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
else list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
begin a_reg_dealloc(list,NR_R12);
a_reg_dealloc(list,NR_R12); end
list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize)); else
end; begin
a_reg_dealloc(list,NR_R12);
list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
end;
if firstfloatreg<>RS_NO then if firstfloatreg<>RS_NO then
begin begin
@ -1287,6 +1300,8 @@ unit cgcpu;
firstfloatreg,lastfloatreg, firstfloatreg,lastfloatreg,
r : byte; r : byte;
shift : byte; shift : byte;
regs : tcpuregisterset;
LocalSize : longint;
begin begin
if not(nostackframe) then if not(nostackframe) then
begin begin
@ -1320,7 +1335,37 @@ unit cgcpu;
end; end;
if (current_procinfo.framepointer=NR_STACK_POINTER_REG) then if (current_procinfo.framepointer=NR_STACK_POINTER_REG) then
list.concat(taicpu.op_reg_reg(A_MOV,NR_R15,NR_R14)) begin
LocalSize:=current_procinfo.calc_stackframe_size;
if LocalSize<>0 then
if not(is_shifter_const(LocalSize,shift)) then
begin
a_reg_alloc(list,NR_R12);
a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
a_reg_dealloc(list,NR_R12);
end
else
begin
list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
end;
regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
if (pi_do_call in current_procinfo.flags) or (regs<>[]) then
begin
exclude(regs,RS_R14);
include(regs,RS_R15);
end;
if regs=[] then
list.concat(taicpu.op_reg_reg(A_MOV,NR_R15,NR_R14))
else
begin
reference_reset(ref);
ref.index:=NR_STACK_POINTER_REG;
ref.addressmode:=AM_PREINDEXED;
list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,regs),PF_FD));
end;
end
else else
begin begin
{ restore int registers and return } { restore int registers and return }

View File

@ -88,7 +88,7 @@ Const
genericlevel3optimizerswitches- genericlevel3optimizerswitches-
{ no need to write info about those } { no need to write info about those }
[cs_opt_level1,cs_opt_level2,cs_opt_level3]+ [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
[cs_opt_regvar,cs_opt_loopunroll,cs_opt_tailrecursion]; [cs_opt_regvar,cs_opt_loopunroll,cs_opt_tailrecursion,cs_opt_stackframe];
level1optimizerswitches = genericlevel1optimizerswitches; level1optimizerswitches = genericlevel1optimizerswitches;
level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches + [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion]; level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches + [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion];

View File

@ -62,7 +62,8 @@ unit cpupi;
is especially a problem when taking the address of a local. For now, is especially a problem when taking the address of a local. For now,
this extra memory should hurt less than generating all local contants with offsets this extra memory should hurt less than generating all local contants with offsets
>256 as non shifter constants } >256 as non shifter constants }
tg.setfirsttemp(-12-28); if tg.direction = -1 then
tg.setfirsttemp(-12-28);
end; end;

View File

@ -745,7 +745,7 @@ implementation
{ set the start offset to the start of the temp area in the stack } { set the start offset to the start of the temp area in the stack }
tg:=ttgobj.create; tg:=ttgobj.create;
{$ifdef x86} {$if defined(x86) or defined(arm)}
{ try to strip the stack frame } { try to strip the stack frame }
{ set the framepointer to esp if: { set the framepointer to esp if:
- no assembler directive, those are handled elsewhere - no assembler directive, those are handled elsewhere
@ -761,7 +761,11 @@ implementation
not(po_assembler in procdef.procoptions) and not(po_assembler in procdef.procoptions) and
((flags*[pi_has_assembler_block,pi_uses_exceptions,pi_is_assembler, ((flags*[pi_has_assembler_block,pi_uses_exceptions,pi_is_assembler,
pi_needs_implicit_finally,pi_has_implicit_finally,pi_has_stackparameter, pi_needs_implicit_finally,pi_has_implicit_finally,pi_has_stackparameter,
pi_needs_stackframe])=[]) then pi_needs_stackframe])=[])
{$ifdef arm}
and ((cs_fp_emulation in current_settings.moduleswitches) or not (pi_uses_fpu in flags))
{$endif arm}
then
begin begin
{ we need the parameter info here to determine if the procedure gets { we need the parameter info here to determine if the procedure gets
parameters on the stack parameters on the stack
@ -777,8 +781,7 @@ implementation
tg.direction:=1; tg.direction:=1;
end; end;
end; end;
{$endif x86} {$endif}
{ Create register allocator } { Create register allocator }
cg.init_register_allocators; cg.init_register_allocators;