* avoid to create a stack frame on aarch64 if possible

This commit is contained in:
Florian Klämpfl 2018-10-23 22:35:34 +02:00 committed by florian
parent e0a78c2485
commit a47f153dae
5 changed files with 63 additions and 34 deletions

View File

@ -1872,6 +1872,13 @@ implementation
ref: treference;
totalstackframesize: longint;
begin
{ on aarch64, we need to store the link register and the generate a frame pointer if the subroutine either
- receives parameters on the stack
- is not a leaf procedure
- has nested procedures
- helpers retrieve the stack pointer
}
hitem:=list.last;
{ pi_has_unwind_info may already be set at this point if there are
SEH directives in assembler body. In this case, .seh_endprologue
@ -1885,28 +1892,30 @@ implementation
if target_info.system=system_aarch64_win64 then
include(current_procinfo.flags,pi_has_unwind_info);
{ save stack pointer and return address }
reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
ref.addressmode:=AM_PREINDEXED;
list.concat(taicpu.op_reg_reg_ref(A_STP,NR_FP,NR_LR,ref));
current_asmdata.asmcfi.cfa_def_cfa_offset(list,2*sizeof(pint));
current_asmdata.asmcfi.cfa_offset(list,NR_FP,-16);
current_asmdata.asmcfi.cfa_offset(list,NR_LR,-8);
if target_info.system=system_aarch64_win64 then
list.concat(cai_seh_directive.create_offset(ash_savefplr_x,16));
{ initialise frame pointer }
if current_procinfo.procdef.proctypeoption<>potype_exceptfilter then
if not(pi_no_framepointer_needed in current_procinfo.flags) then
begin
a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_SP,NR_FP);
current_asmdata.asmcfi.cfa_def_cfa_register(list,NR_FP);
{ save stack pointer and return address }
reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
ref.addressmode:=AM_PREINDEXED;
list.concat(taicpu.op_reg_reg_ref(A_STP,NR_FP,NR_LR,ref));
current_asmdata.asmcfi.cfa_def_cfa_offset(list,2*sizeof(pint));
current_asmdata.asmcfi.cfa_offset(list,NR_FP,-16);
current_asmdata.asmcfi.cfa_offset(list,NR_LR,-8);
if target_info.system=system_aarch64_win64 then
list.concat(cai_seh_directive.create(ash_setfp));
end
else
begin
gen_load_frame_for_exceptfilter(list);
localsize:=current_procinfo.maxpushedparasize;
list.concat(cai_seh_directive.create_offset(ash_savefplr_x,16));
{ initialise frame pointer }
if current_procinfo.procdef.proctypeoption<>potype_exceptfilter then
begin
a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_SP,NR_FP);
current_asmdata.asmcfi.cfa_def_cfa_register(list,NR_FP);
if target_info.system=system_aarch64_win64 then
list.concat(cai_seh_directive.create(ash_setfp));
end
else
begin
gen_load_frame_for_exceptfilter(list);
localsize:=current_procinfo.maxpushedparasize;
end;
end;
totalstackframesize:=localsize;
@ -2081,7 +2090,6 @@ implementation
end;
procedure tcgaarch64.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
var
ref: treference;
@ -2122,13 +2130,22 @@ implementation
load_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE);
end
else if current_procinfo.final_localsize<>0 then
{ restore stack pointer }
a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_FP,NR_SP);
begin
{ restore stack pointer }
if pi_no_framepointer_needed in current_procinfo.flags then
handle_reg_imm12_reg(list,A_ADD,OS_ADDR,current_procinfo.framepointer,current_procinfo.final_localsize,
current_procinfo.framepointer,NR_IP0,false,true)
else
a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_FP,NR_SP);
end;
{ restore framepointer and return address }
reference_reset_base(ref,NR_SP,16,ctempposinvalid,16,[]);
ref.addressmode:=AM_POSTINDEXED;
list.concat(taicpu.op_reg_reg_ref(A_LDP,NR_FP,NR_LR,ref));
if not(pi_no_framepointer_needed in current_procinfo.flags) then
begin
{ restore framepointer and return address }
reference_reset_base(ref,NR_SP,16,ctempposinvalid,16,[]);
ref.addressmode:=AM_POSTINDEXED;
list.concat(taicpu.op_reg_reg_ref(A_LDP,NR_FP,NR_LR,ref));
end;
end;
{ return }

View File

@ -1133,7 +1133,8 @@ Implementation
GetNextInstruction(p, hp1) and
(hp1.typ = ait_instruction) and
(taicpu(hp1).condition = C_None) and
(taicpu(hp1).oppostfix = taicpu(p).oppostfix) then
(taicpu(hp1).oppostfix = taicpu(p).oppostfix) and
(taicpu(hp1).ops>0) and (taicpu(hp1).oper[0]^.typ=top_reg) then
begin
{ Saves constant dereferencing and makes it easier to change the size if necessary }
SrcReg := taicpu(p).oper[0]^.reg;

View File

@ -786,7 +786,9 @@ interface
{ subroutine uses get_frame }
pi_uses_get_frame,
{ x86 only: subroutine uses ymm registers, requires vzeroupper call }
pi_uses_ymm
pi_uses_ymm,
{ set if no frame pointer is needed, the rules when this applies is target specific }
pi_no_framepointer_needed
);
tprocinfoflags=set of tprocinfoflag;

View File

@ -1046,7 +1046,7 @@ implementation
end;
{$if defined(i386) or defined(x86_64) or defined(arm) or defined(riscv32) or defined(riscv64) or defined(m68k)}
{$if defined(i386) or defined(x86_64) or defined(arm) or defined(aarch64) or defined(riscv32) or defined(riscv64) or defined(m68k)}
const
exception_flags: array[boolean] of tprocinfoflags = (
[],
@ -1058,7 +1058,7 @@ implementation
begin
tg:=tgobjclass.create;
{$if defined(i386) or defined(x86_64) or defined(arm) or defined(m68k)}
{$if defined(i386) or defined(x86_64) or defined(arm) or defined(aarch64) or defined(m68k)}
{$if defined(arm)}
{ frame and stack pointer must be always the same on arm thumb so it makes no
sense to fiddle with a frame pointer }
@ -1102,11 +1102,16 @@ implementation
not(cs_generate_stackframes in current_settings.localswitches) and
not(cs_profile in current_settings.moduleswitches) and
not(po_assembler in procdef.procoptions) and
{$if defined(aarch64)}
{ on aarch64, it must be a leaf subroutine }
not(pi_do_call in flags) and
{$endif defined(aarch64)}
not ((pi_has_stackparameter in flags)
{$ifndef arm} { Outgoing parameter(s) on stack do not need stackframe on x86 targets
{$if defined(i386) or defined(x86_64)}
{ Outgoing parameter(s) on stack do not need stackframe on x86 targets
with fixed stack. On ARM it fails, see bug #25050 }
and (not paramanager.use_fixed_stack)
{$endif arm}
{$endif defined(i386) or defined(x86_64)}
) and
((flags*([pi_has_assembler_block,pi_is_assembler,
pi_needs_stackframe]+
@ -1137,6 +1142,7 @@ implementation
{ Only need to set the framepointer }
framepointer:=NR_STACK_POINTER_REG;
tg.direction:=1;
Include(flags,pi_no_framepointer_needed)
end
{$if defined(arm)}
{ On arm, the stack frame size can be estimated to avoid using an extra frame pointer,

View File

@ -1661,7 +1661,10 @@ const
(mask:pi_uses_get_frame;
str:' uses get_frame'),
(mask:pi_uses_ymm;
str:' uses ymm register (x86 only)')
str:' uses ymm register (x86 only)'),
(mask:pi_no_framepointer_needed;
str:' set if no frame pointer is needed, the rules when this applies is target specific'
)
);
var
procinfooptions : tprocinfoflags;