mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-06 00:47:52 +02:00
* avoid to create a stack frame on aarch64 if possible
This commit is contained in:
parent
e0a78c2485
commit
a47f153dae
@ -1872,6 +1872,13 @@ implementation
|
||||
ref: treference;
|
||||
totalstackframesize: longint;
|
||||
begin
|
||||
{ on aarch64, we need to store the link register and the generate a frame pointer if the subroutine either
|
||||
- receives parameters on the stack
|
||||
- is not a leaf procedure
|
||||
- has nested procedures
|
||||
- helpers retrieve the stack pointer
|
||||
}
|
||||
|
||||
hitem:=list.last;
|
||||
{ pi_has_unwind_info may already be set at this point if there are
|
||||
SEH directives in assembler body. In this case, .seh_endprologue
|
||||
@ -1885,28 +1892,30 @@ implementation
|
||||
|
||||
if target_info.system=system_aarch64_win64 then
|
||||
include(current_procinfo.flags,pi_has_unwind_info);
|
||||
|
||||
{ save stack pointer and return address }
|
||||
reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
|
||||
ref.addressmode:=AM_PREINDEXED;
|
||||
list.concat(taicpu.op_reg_reg_ref(A_STP,NR_FP,NR_LR,ref));
|
||||
current_asmdata.asmcfi.cfa_def_cfa_offset(list,2*sizeof(pint));
|
||||
current_asmdata.asmcfi.cfa_offset(list,NR_FP,-16);
|
||||
current_asmdata.asmcfi.cfa_offset(list,NR_LR,-8);
|
||||
if target_info.system=system_aarch64_win64 then
|
||||
list.concat(cai_seh_directive.create_offset(ash_savefplr_x,16));
|
||||
{ initialise frame pointer }
|
||||
if current_procinfo.procdef.proctypeoption<>potype_exceptfilter then
|
||||
if not(pi_no_framepointer_needed in current_procinfo.flags) then
|
||||
begin
|
||||
a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_SP,NR_FP);
|
||||
current_asmdata.asmcfi.cfa_def_cfa_register(list,NR_FP);
|
||||
{ save stack pointer and return address }
|
||||
reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
|
||||
ref.addressmode:=AM_PREINDEXED;
|
||||
list.concat(taicpu.op_reg_reg_ref(A_STP,NR_FP,NR_LR,ref));
|
||||
current_asmdata.asmcfi.cfa_def_cfa_offset(list,2*sizeof(pint));
|
||||
current_asmdata.asmcfi.cfa_offset(list,NR_FP,-16);
|
||||
current_asmdata.asmcfi.cfa_offset(list,NR_LR,-8);
|
||||
if target_info.system=system_aarch64_win64 then
|
||||
list.concat(cai_seh_directive.create(ash_setfp));
|
||||
end
|
||||
else
|
||||
begin
|
||||
gen_load_frame_for_exceptfilter(list);
|
||||
localsize:=current_procinfo.maxpushedparasize;
|
||||
list.concat(cai_seh_directive.create_offset(ash_savefplr_x,16));
|
||||
{ initialise frame pointer }
|
||||
if current_procinfo.procdef.proctypeoption<>potype_exceptfilter then
|
||||
begin
|
||||
a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_SP,NR_FP);
|
||||
current_asmdata.asmcfi.cfa_def_cfa_register(list,NR_FP);
|
||||
if target_info.system=system_aarch64_win64 then
|
||||
list.concat(cai_seh_directive.create(ash_setfp));
|
||||
end
|
||||
else
|
||||
begin
|
||||
gen_load_frame_for_exceptfilter(list);
|
||||
localsize:=current_procinfo.maxpushedparasize;
|
||||
end;
|
||||
end;
|
||||
|
||||
totalstackframesize:=localsize;
|
||||
@ -2081,7 +2090,6 @@ implementation
|
||||
end;
|
||||
|
||||
|
||||
|
||||
procedure tcgaarch64.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
|
||||
var
|
||||
ref: treference;
|
||||
@ -2122,13 +2130,22 @@ implementation
|
||||
load_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE);
|
||||
end
|
||||
else if current_procinfo.final_localsize<>0 then
|
||||
{ restore stack pointer }
|
||||
a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_FP,NR_SP);
|
||||
begin
|
||||
{ restore stack pointer }
|
||||
if pi_no_framepointer_needed in current_procinfo.flags then
|
||||
handle_reg_imm12_reg(list,A_ADD,OS_ADDR,current_procinfo.framepointer,current_procinfo.final_localsize,
|
||||
current_procinfo.framepointer,NR_IP0,false,true)
|
||||
else
|
||||
a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_FP,NR_SP);
|
||||
end;
|
||||
|
||||
{ restore framepointer and return address }
|
||||
reference_reset_base(ref,NR_SP,16,ctempposinvalid,16,[]);
|
||||
ref.addressmode:=AM_POSTINDEXED;
|
||||
list.concat(taicpu.op_reg_reg_ref(A_LDP,NR_FP,NR_LR,ref));
|
||||
if not(pi_no_framepointer_needed in current_procinfo.flags) then
|
||||
begin
|
||||
{ restore framepointer and return address }
|
||||
reference_reset_base(ref,NR_SP,16,ctempposinvalid,16,[]);
|
||||
ref.addressmode:=AM_POSTINDEXED;
|
||||
list.concat(taicpu.op_reg_reg_ref(A_LDP,NR_FP,NR_LR,ref));
|
||||
end;
|
||||
end;
|
||||
|
||||
{ return }
|
||||
|
@ -1133,7 +1133,8 @@ Implementation
|
||||
GetNextInstruction(p, hp1) and
|
||||
(hp1.typ = ait_instruction) and
|
||||
(taicpu(hp1).condition = C_None) and
|
||||
(taicpu(hp1).oppostfix = taicpu(p).oppostfix) then
|
||||
(taicpu(hp1).oppostfix = taicpu(p).oppostfix) and
|
||||
(taicpu(hp1).ops>0) and (taicpu(hp1).oper[0]^.typ=top_reg) then
|
||||
begin
|
||||
{ Saves constant dereferencing and makes it easier to change the size if necessary }
|
||||
SrcReg := taicpu(p).oper[0]^.reg;
|
||||
|
@ -786,7 +786,9 @@ interface
|
||||
{ subroutine uses get_frame }
|
||||
pi_uses_get_frame,
|
||||
{ x86 only: subroutine uses ymm registers, requires vzeroupper call }
|
||||
pi_uses_ymm
|
||||
pi_uses_ymm,
|
||||
{ set if no frame pointer is needed, the rules when this applies is target specific }
|
||||
pi_no_framepointer_needed
|
||||
);
|
||||
tprocinfoflags=set of tprocinfoflag;
|
||||
|
||||
|
@ -1046,7 +1046,7 @@ implementation
|
||||
end;
|
||||
|
||||
|
||||
{$if defined(i386) or defined(x86_64) or defined(arm) or defined(riscv32) or defined(riscv64) or defined(m68k)}
|
||||
{$if defined(i386) or defined(x86_64) or defined(arm) or defined(aarch64) or defined(riscv32) or defined(riscv64) or defined(m68k)}
|
||||
const
|
||||
exception_flags: array[boolean] of tprocinfoflags = (
|
||||
[],
|
||||
@ -1058,7 +1058,7 @@ implementation
|
||||
begin
|
||||
tg:=tgobjclass.create;
|
||||
|
||||
{$if defined(i386) or defined(x86_64) or defined(arm) or defined(m68k)}
|
||||
{$if defined(i386) or defined(x86_64) or defined(arm) or defined(aarch64) or defined(m68k)}
|
||||
{$if defined(arm)}
|
||||
{ frame and stack pointer must be always the same on arm thumb so it makes no
|
||||
sense to fiddle with a frame pointer }
|
||||
@ -1102,11 +1102,16 @@ implementation
|
||||
not(cs_generate_stackframes in current_settings.localswitches) and
|
||||
not(cs_profile in current_settings.moduleswitches) and
|
||||
not(po_assembler in procdef.procoptions) and
|
||||
{$if defined(aarch64)}
|
||||
{ on aarch64, it must be a leaf subroutine }
|
||||
not(pi_do_call in flags) and
|
||||
{$endif defined(aarch64)}
|
||||
not ((pi_has_stackparameter in flags)
|
||||
{$ifndef arm} { Outgoing parameter(s) on stack do not need stackframe on x86 targets
|
||||
{$if defined(i386) or defined(x86_64)}
|
||||
{ Outgoing parameter(s) on stack do not need stackframe on x86 targets
|
||||
with fixed stack. On ARM it fails, see bug #25050 }
|
||||
and (not paramanager.use_fixed_stack)
|
||||
{$endif arm}
|
||||
{$endif defined(i386) or defined(x86_64)}
|
||||
) and
|
||||
((flags*([pi_has_assembler_block,pi_is_assembler,
|
||||
pi_needs_stackframe]+
|
||||
@ -1137,6 +1142,7 @@ implementation
|
||||
{ Only need to set the framepointer }
|
||||
framepointer:=NR_STACK_POINTER_REG;
|
||||
tg.direction:=1;
|
||||
Include(flags,pi_no_framepointer_needed)
|
||||
end
|
||||
{$if defined(arm)}
|
||||
{ On arm, the stack frame size can be estimated to avoid using an extra frame pointer,
|
||||
|
@ -1661,7 +1661,10 @@ const
|
||||
(mask:pi_uses_get_frame;
|
||||
str:' uses get_frame'),
|
||||
(mask:pi_uses_ymm;
|
||||
str:' uses ymm register (x86 only)')
|
||||
str:' uses ymm register (x86 only)'),
|
||||
(mask:pi_no_framepointer_needed;
|
||||
str:' set if no frame pointer is needed, the rules when this applies is target specific'
|
||||
)
|
||||
);
|
||||
var
|
||||
procinfooptions : tprocinfoflags;
|
||||
|
Loading…
Reference in New Issue
Block a user