mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-07 01:48:00 +02:00
* switched to using the stack pointer as base register for the temp allocator
instead of the frame pointer register: 1) we exactly know the offsets of the temps from the stack pointer after pass 1 (based on the require parameter stack size for called routines), while we don't know it for the frame pointer (it depends on the number of saved registers) 2) temp offsets from the stack pointer are positive while those from the frame pointer are negative, and we can directly encode much bigger positive offsets in the instructions o move the stack pointer register to a virtual register in loadparentfpn, because many instructions cannot directly operate on/with the stack pointer o add the necessary register interference edges for the stack pointer register git-svn-id: trunk@29938 -
This commit is contained in:
parent
5bb89cc2f0
commit
41fba0c4f7
2
.gitattributes
vendored
2
.gitattributes
vendored
@ -31,6 +31,7 @@ compiler/aarch64/ncpuadd.pas svneol=native#text/plain
|
||||
compiler/aarch64/ncpucnv.pas svneol=native#text/plain
|
||||
compiler/aarch64/ncpuinl.pas svneol=native#text/plain
|
||||
compiler/aarch64/ncpumat.pas svneol=native#text/plain
|
||||
compiler/aarch64/ncpumem.pas svneol=native#text/plain
|
||||
compiler/aarch64/ra64con.inc svneol=native#text/plain
|
||||
compiler/aarch64/ra64dwa.inc svneol=native#text/plain
|
||||
compiler/aarch64/ra64nor.inc svneol=native#text/plain
|
||||
@ -11945,6 +11946,7 @@ tests/test/tmsg3.pp svneol=native#text/plain
|
||||
tests/test/tmsg4.pp svneol=native#text/plain
|
||||
tests/test/tmt1.pp svneol=native#text/plain
|
||||
tests/test/tmul1.pp svneol=native#text/pascal
|
||||
tests/test/tnest1.pp svneol=native#text/plain
|
||||
tests/test/tnoext1.pp svneol=native#text/plain
|
||||
tests/test/tnoext2.pp svneol=native#text/plain
|
||||
tests/test/tnoext3.pp svneol=native#text/plain
|
||||
|
@ -95,7 +95,7 @@ interface
|
||||
procedure g_adjust_self_value(list: TAsmList; procdef: tprocdef; ioffset: tcgint);override;
|
||||
procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override;
|
||||
private
|
||||
procedure save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
|
||||
function save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
|
||||
procedure load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
|
||||
end;
|
||||
|
||||
@ -519,7 +519,7 @@ implementation
|
||||
begin
|
||||
inherited init_register_allocators;
|
||||
|
||||
rg[R_INTREGISTER]:=Trgcpu.create(R_INTREGISTER,R_SUBWHOLE,
|
||||
rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
|
||||
[RS_X0,RS_X1,RS_X2,RS_X3,RS_X4,RS_X5,RS_X6,RS_X7,RS_X8,
|
||||
RS_X9,RS_X10,RS_X11,RS_X12,RS_X13,RS_X14,RS_X15,RS_X16,RS_X17,
|
||||
RS_X19,RS_X20,RS_X21,RS_X22,RS_X23,RS_X24,RS_X25,RS_X26,RS_X27,RS_X28
|
||||
@ -1399,12 +1399,13 @@ implementation
|
||||
|
||||
{ *********** entry/exit code and address loading ************ }
|
||||
|
||||
procedure tcgaarch64.save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
|
||||
function tcgaarch64.save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
|
||||
var
|
||||
ref: treference;
|
||||
sr: tsuperregister;
|
||||
pairreg: tregister;
|
||||
begin
|
||||
result:=0;
|
||||
reference_reset_base(ref,NR_SP,-16,16);
|
||||
ref.addressmode:=AM_PREINDEXED;
|
||||
pairreg:=NR_NO;
|
||||
@ -1415,18 +1416,38 @@ implementation
|
||||
pairreg:=newreg(rt,sr,sub)
|
||||
else
|
||||
begin
|
||||
inc(result,16);
|
||||
list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,newreg(rt,sr,sub),ref));
|
||||
pairreg:=NR_NO
|
||||
end;
|
||||
{ one left -> store twice (stack must be 16 bytes aligned) }
|
||||
if pairreg<>NR_NO then
|
||||
list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,pairreg,ref));
|
||||
begin
|
||||
list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,pairreg,ref));
|
||||
inc(result,16);
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
procedure FixupOffsets(p:TObject;arg:pointer);
|
||||
var
|
||||
sym: tabstractnormalvarsym absolute p;
|
||||
begin
|
||||
if (tsym(p).typ in [paravarsym,localvarsym]) and
|
||||
(sym.localloc.loc=LOC_REFERENCE) and
|
||||
(sym.localloc.reference.base=NR_STACK_POINTER_REG) then
|
||||
begin
|
||||
sym.localloc.reference.base:=NR_FRAME_POINTER_REG;
|
||||
dec(sym.localloc.reference.offset,PLongint(arg)^);
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
|
||||
procedure tcgaarch64.g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);
|
||||
var
|
||||
ref: treference;
|
||||
totalstackframesize: longint;
|
||||
begin
|
||||
if nostackframe then
|
||||
exit;
|
||||
@ -1440,12 +1461,15 @@ implementation
|
||||
{ initialise frame pointer }
|
||||
a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_SP,NR_FP);
|
||||
|
||||
totalstackframesize:=localsize;
|
||||
{ save modified integer registers }
|
||||
save_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE);
|
||||
inc(totalstackframesize,
|
||||
save_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE));
|
||||
{ only the lower 64 bits of the modified vector registers need to be
|
||||
saved; if the caller needs the upper 64 bits, it has to save them
|
||||
itself }
|
||||
save_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD);
|
||||
inc(totalstackframesize,
|
||||
save_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD));
|
||||
|
||||
{ allocate stack space }
|
||||
if localsize<>0 then
|
||||
@ -1454,6 +1478,37 @@ implementation
|
||||
current_procinfo.final_localsize:=localsize;
|
||||
handle_reg_imm12_reg(list,A_SUB,OS_ADDR,NR_SP,localsize,NR_SP,NR_IP0,false,true);
|
||||
end;
|
||||
{ By default, we use the frame pointer to access parameters passed via
|
||||
the stack and the stack pointer to address local variables and temps
|
||||
because
|
||||
a) we can use bigger positive than negative offsets (so accessing
|
||||
locals via negative offsets from the frame pointer would be less
|
||||
efficient)
|
||||
b) we don't know the local size while generating the code, so
|
||||
accessing the parameters via the stack pointer is not possible
|
||||
without copying them
|
||||
The problem with this is the get_frame() intrinsic:
|
||||
a) it must return the same value as what we pass as parentfp
|
||||
parameter, since that's how it's used in the TP-style objects unit
|
||||
b) its return value must usable to access all local data from a
|
||||
routine (locals and parameters), since it's all the nested
|
||||
routines have access to
|
||||
c) its return value must be usable to construct a backtrace, as it's
|
||||
also used by the exception handling routines
|
||||
|
||||
The solution we use here, based on something similar that's done in
|
||||
the MIPS port, is to generate all accesses to locals in the routine
|
||||
itself SP-relative, and then after the code is generated and the local
|
||||
size is known (namely, here), we change all SP-relative variables/
|
||||
parameters into FP-relative ones. This means that they'll be accessed
|
||||
less efficiently from nested routines, but those accesses are indirect
|
||||
anyway and at least this way they can be accessed at all
|
||||
}
|
||||
if current_procinfo.has_nestedprocs then
|
||||
begin
|
||||
current_procinfo.procdef.localst.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
|
||||
current_procinfo.procdef.parast.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
|
@ -31,7 +31,7 @@ implementation
|
||||
|
||||
uses
|
||||
ncgbas,ncgflw,ncgcal,ncgcnv,ncgld,ncgmem,ncgcon,ncgset,ncgobjc,
|
||||
ncpuadd,ncpumat,ncpuinl,ncpucnv,{ncpuset,}
|
||||
ncpuadd,ncpumat,ncpumem,ncpuinl,ncpucnv,{ncpuset,}
|
||||
{ this not really a node }
|
||||
rgcpu,
|
||||
{ symtable }
|
||||
|
@ -535,10 +535,6 @@ unit cpupara;
|
||||
begin
|
||||
paraloc^.size:=paracgsize;
|
||||
paraloc^.loc:=LOC_REFERENCE;
|
||||
if side=callerside then
|
||||
paraloc^.reference.index:=NR_STACK_POINTER_REG
|
||||
else
|
||||
paraloc^.reference.index:=NR_FRAME_POINTER_REG;
|
||||
|
||||
{ the current stack offset may not be properly aligned in
|
||||
case we're on Darwin have allocated a non-variadic argument
|
||||
@ -563,6 +559,13 @@ unit cpupara;
|
||||
paraloc^.reference.offset:=curstackoffset
|
||||
else
|
||||
paraloc^.reference.offset:=curstackoffset+stackslotlen-paralen;
|
||||
if side=callerside then
|
||||
paraloc^.reference.index:=NR_STACK_POINTER_REG
|
||||
else
|
||||
begin
|
||||
paraloc^.reference.index:=NR_FRAME_POINTER_REG;
|
||||
inc(paraloc^.reference.offset,16);
|
||||
end;
|
||||
inc(curstackoffset,stackslotlen);
|
||||
paralen:=0
|
||||
end;
|
||||
|
@ -26,18 +26,42 @@ unit cpupi;
|
||||
interface
|
||||
|
||||
uses
|
||||
procinfo,
|
||||
psub;
|
||||
|
||||
type
|
||||
taarch64procinfo=class(tcgprocinfo)
|
||||
{ no need to override anything, as the ABI requires us to use a frame
|
||||
pointer at all times }
|
||||
constructor create(aparent: tprocinfo); override;
|
||||
procedure set_first_temp_offset; override;
|
||||
end;
|
||||
|
||||
implementation
|
||||
|
||||
uses
|
||||
procinfo;
|
||||
tgobj,
|
||||
cpubase;
|
||||
|
||||
constructor taarch64procinfo.create(aparent: tprocinfo);
|
||||
begin
|
||||
inherited;
|
||||
{ use the stack pointer as framepointer, because
|
||||
1) we exactly know the offsets of the temps from the stack pointer
|
||||
after pass 1 (based on the require parameter stack size for called
|
||||
routines), while we don't know it for the frame pointer (it depends
|
||||
on the number of saved registers)
|
||||
2) temp offsets from the stack pointer are positive while those from
|
||||
the frame pointer are negative, and we can directly encode much
|
||||
bigger positive offsets in the instructions
|
||||
}
|
||||
framepointer:=NR_STACK_POINTER_REG;
|
||||
end;
|
||||
|
||||
procedure taarch64procinfo.set_first_temp_offset;
|
||||
begin
|
||||
{ leave room for allocated parameters }
|
||||
tg.setfirsttemp(align(maxpushedparasize,16));
|
||||
end;
|
||||
|
||||
|
||||
begin
|
||||
cprocinfo:=taarch64procinfo;
|
||||
|
@ -41,6 +41,7 @@ interface
|
||||
procedure second_abs_long; override;
|
||||
procedure second_round_real; override;
|
||||
procedure second_trunc_real; override;
|
||||
procedure second_get_frame; override;
|
||||
private
|
||||
procedure load_fpu_location;
|
||||
end;
|
||||
@ -167,6 +168,17 @@ implementation
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCVTZS,location.register,left.location.register));
|
||||
end;
|
||||
|
||||
|
||||
procedure taarch64inlinenode.second_get_frame;
|
||||
begin
|
||||
location_reset(location,LOC_CREGISTER,OS_ADDR);
|
||||
{ this routine is used to get the frame pointer for backtracing
|
||||
purposes. current_procinfo.framepointer is set to SP because that one
|
||||
is used to access temps. On most platforms these two frame pointers
|
||||
are the same, but not on AArch64. }
|
||||
location.register:=NR_FRAME_POINTER_REG;
|
||||
end;
|
||||
|
||||
begin
|
||||
cinlinenode:=taarch64inlinenode;
|
||||
end.
|
||||
|
66
compiler/aarch64/ncpumem.pas
Normal file
66
compiler/aarch64/ncpumem.pas
Normal file
@ -0,0 +1,66 @@
|
||||
{
|
||||
Copyright (c) 2014 by Jonas Maebe
|
||||
|
||||
Generate AArch64 code for in memory related nodes
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
****************************************************************************
|
||||
}
|
||||
unit ncpumem;
|
||||
|
||||
{$i fpcdefs.inc}
|
||||
|
||||
interface
|
||||
|
||||
uses
|
||||
globtype,
|
||||
node,nmem,ncgmem;
|
||||
|
||||
type
|
||||
taarch64loadparentfpnode = class(tcgloadparentfpnode)
|
||||
procedure pass_generate_code; override;
|
||||
end;
|
||||
|
||||
implementation
|
||||
|
||||
uses
|
||||
aasmdata,cgbase,cpubase,
|
||||
cgobj;
|
||||
|
||||
{ taarch64loadparentfpnode }
|
||||
|
||||
procedure taarch64loadparentfpnode.pass_generate_code;
|
||||
begin
|
||||
inherited pass_generate_code;
|
||||
{ see the comments in tcgaarch64.g_proc_entry }
|
||||
if (location.loc in [LOC_REGISTER,LOC_CREGISTER]) and
|
||||
(location.register=NR_STACK_POINTER_REG) then
|
||||
if (kind=lpf_forpara) then
|
||||
location.register:=NR_FRAME_POINTER_REG
|
||||
else
|
||||
begin
|
||||
{ load stack pointer in a different register, as many instructions
|
||||
cannot directly work with the stack pointer. The register
|
||||
allocator can merge them if possible }
|
||||
location.register:=cg.getaddressregister(current_asmdata.CurrAsmList);
|
||||
cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,NR_STACK_POINTER_REG,location.register);
|
||||
location.loc:=LOC_REGISTER;
|
||||
end;
|
||||
end;
|
||||
|
||||
begin
|
||||
cloadparentfpnode:=taarch64loadparentfpnode;
|
||||
end.
|
@ -40,6 +40,10 @@ unit rgcpu;
|
||||
procedure do_spill_op(list: tasmlist; op: tasmop; pos: tai; const spilltemp: treference; tempreg: tregister);
|
||||
end;
|
||||
|
||||
trgintcpu=class(trgcpu)
|
||||
procedure add_cpu_interferences(p: tai); override;
|
||||
end;
|
||||
|
||||
|
||||
implementation
|
||||
|
||||
@ -97,4 +101,62 @@ implementation
|
||||
end;
|
||||
|
||||
|
||||
procedure trgintcpu.add_cpu_interferences(p: tai);
|
||||
var
|
||||
i: longint;
|
||||
begin
|
||||
if p.typ=ait_instruction then
|
||||
begin
|
||||
{ add interferences for instructions that can have SP as a register
|
||||
operand }
|
||||
case taicpu(p).opcode of
|
||||
A_MOV:
|
||||
{ all operands can be SP }
|
||||
exit;
|
||||
A_ADD,
|
||||
A_SUB,
|
||||
A_CMP,
|
||||
A_CMN:
|
||||
{ ok as destination or first source in immediate or extended
|
||||
register form }
|
||||
if (taicpu(p).oper[taicpu(p).ops-1]^.typ<>top_shifterop) or
|
||||
valid_shifter_operand(taicpu(p).opcode,false,true,
|
||||
reg_cgsize(taicpu(p).oper[0]^.reg) in [OS_64,OS_S64],
|
||||
taicpu(p).oper[taicpu(p).ops-1]^.shifterop^.shiftmode,
|
||||
taicpu(p).oper[taicpu(p).ops-1]^.shifterop^.shiftimm) then
|
||||
begin
|
||||
if taicpu(p).oper[taicpu(p).ops-1]^.typ=top_shifterop then
|
||||
i:=taicpu(p).ops-2
|
||||
else
|
||||
i:=taicpu(p).ops-1;
|
||||
if (taicpu(p).oper[i]^.typ=top_reg) then
|
||||
add_edge(getsupreg(taicpu(p).oper[i]^.reg),RS_SP);
|
||||
exit;
|
||||
end;
|
||||
A_AND,
|
||||
A_EOR,
|
||||
A_ORR,
|
||||
A_TST:
|
||||
{ ok in immediate form }
|
||||
if taicpu(p).oper[taicpu(p).ops-1]^.typ=top_const then
|
||||
exit;
|
||||
end;
|
||||
{ add interferences for other registers }
|
||||
for i:=0 to taicpu(p).ops-1 do
|
||||
begin
|
||||
case taicpu(p).oper[i]^.typ of
|
||||
top_reg:
|
||||
if getregtype(taicpu(p).oper[i]^.reg)=R_INTREGISTER then
|
||||
add_edge(getsupreg(taicpu(p).oper[i]^.reg),RS_SP);
|
||||
top_ref:
|
||||
begin
|
||||
{ sp can always be base, never be index }
|
||||
if taicpu(p).oper[i]^.ref^.index<>NR_NO then
|
||||
add_edge(getsupreg(taicpu(p).oper[i]^.ref^.index),RS_SP);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
end.
|
||||
|
@ -242,6 +242,7 @@
|
||||
{$define cpurox}
|
||||
{$define cputargethasfixedstack}
|
||||
{$define cpurefshaveindexreg}
|
||||
{$define SUPPORT_GET_FRAME}
|
||||
{$endif aarch64}
|
||||
|
||||
{$IFDEF MACOS}
|
||||
|
@ -179,7 +179,7 @@ implementation
|
||||
tempfreelist:=nil;
|
||||
templist:=nil;
|
||||
{ we could create a new child class for this but I don't if it is worth the effort (FK) }
|
||||
{$if defined(powerpc) or defined(powerpc64) or defined(avr) or defined(jvm)}
|
||||
{$if defined(powerpc) or defined(powerpc64) or defined(avr) or defined(jvm) or defined(aarch64)}
|
||||
direction:=1;
|
||||
{$else}
|
||||
direction:=-1;
|
||||
|
@ -82,12 +82,6 @@ procedure fpc_cpuinit;
|
||||
String
|
||||
****************************************************************************}
|
||||
|
||||
{$define FPC_SYSTEM_HAS_GET_FRAME}
|
||||
function get_frame:pointer;assembler; nostackframe;
|
||||
asm
|
||||
mov x0, x29
|
||||
end;
|
||||
|
||||
{$define FPC_SYSTEM_HAS_GET_CALLER_ADDR}
|
||||
function get_caller_addr(framebp:pointer;addr:pointer=nil):pointer;assembler; nostackframe;
|
||||
asm
|
||||
|
38
tests/test/tnest1.pp
Normal file
38
tests/test/tnest1.pp
Normal file
@ -0,0 +1,38 @@
|
||||
{$inline on}
|
||||
|
||||
procedure test(l1, l2: longint);
|
||||
|
||||
var
|
||||
a1: cardinal;
|
||||
d1, d2: double;
|
||||
a2: cardinal;
|
||||
|
||||
procedure nested; inline;
|
||||
begin
|
||||
l1:=1;
|
||||
l2:=2;
|
||||
d1:=3.0;
|
||||
d2:=4.0;
|
||||
end;
|
||||
|
||||
begin
|
||||
a1:=$deadbeef;
|
||||
a2:=$cafe0000;
|
||||
nested;
|
||||
if a1<>$deadbeef then
|
||||
halt(1);
|
||||
if a2<>$cafe0000 then
|
||||
halt(2);
|
||||
if l1<>1 then
|
||||
halt(3);
|
||||
if l2<>2 then
|
||||
halt(4);
|
||||
if d1<>3.0 then
|
||||
halt(5);
|
||||
if d2<>4.0 then
|
||||
halt(6);
|
||||
end;
|
||||
|
||||
begin
|
||||
test(5,6);
|
||||
end.
|
Loading…
Reference in New Issue
Block a user