From bba4b02eb26c69fd42e861d8d4c040c1c7b6998f Mon Sep 17 00:00:00 2001 From: Jonas Maebe Date: Thu, 29 Mar 2012 20:54:33 +0000 Subject: [PATCH] * use r7 instead of r11 as frame pointer on Darwin/iOS, and make sure r7 always points to the previous r7 on the stack (with the saved return address coming right after it) so that the debugger and crashreporter can use it for backtraces as specified in the ABI o changed NR_FRAME_POINTER_REG and RS_FRAME_POINTER_REG from a symbolic into a typed constant, and added a new method to tprocinfo that can be used to initialze it (so it can be inited to r7/r11 depending on the target platform) * allow using r9 on Darwin, it was only used by the system on iOS up to 2.x, which we no longer support * prefer using r9 and r12 before r4..r11 on Darwin, because they are volatile and hence do not have to be saved git-svn-id: trunk@20661 - --- compiler/arm/cgcpu.pas | 172 +++++++++++++++++++++++++++++---------- compiler/arm/cpubase.pas | 6 +- compiler/arm/cpupara.pas | 11 ++- compiler/arm/cpupi.pas | 22 ++++- compiler/procinfo.pas | 11 +++ rtl/arm/arm.inc | 12 +++ 6 files changed, 183 insertions(+), 51 deletions(-) diff --git a/compiler/arm/cgcpu.pas b/compiler/arm/cgcpu.pas index f98b907d6c..21ae1c3adf 100644 --- a/compiler/arm/cgcpu.pas +++ b/compiler/arm/cgcpu.pas @@ -209,17 +209,18 @@ unit cgcpu; procedure tarmcgarm.init_register_allocators; begin inherited init_register_allocators; - { currently, we save R14 always, so we can use it } + { currently, we always save R14, so we can use it } if (target_info.system<>system_arm_darwin) then rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE, - [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8, - RS_R9,RS_R10,RS_R12,RS_R14],first_int_imreg,[]) + [RS_R0,RS_R1,RS_R2,RS_R3,RS_R12,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8, + RS_R9,RS_R10,RS_R14],first_int_imreg,[]) else - { r9 is not (always) available on Darwin according to the llvm code - generator. } + { r7 is not available on Darwin, it's used as frame pointer (always, + for backtrace support -- also in gcc/clang -> R11 can be used). + r9 is volatile } rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE, - [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8, - RS_R10,RS_R12,RS_R14],first_int_imreg,[]); + [RS_R0,RS_R1,RS_R2,RS_R3,RS_R9,RS_R12,RS_R4,RS_R5,RS_R6,RS_R8, + RS_R10,RS_R11,RS_R14],first_int_imreg,[]); rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE, [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]); { The register allocator currently cannot deal with multiple @@ -1410,7 +1411,8 @@ unit cgcpu; firstfloatreg,lastfloatreg, r : byte; mmregs, - regs : tcpuregisterset; + regs, saveregs : tcpuregisterset; + r7offset, stackmisalignment : pint; postfix: toppostfix; begin @@ -1446,41 +1448,91 @@ unit cgcpu; end; a_reg_alloc(list,NR_STACK_POINTER_REG); if current_procinfo.framepointer<>NR_STACK_POINTER_REG then - begin - a_reg_alloc(list,NR_FRAME_POINTER_REG); - a_reg_alloc(list,NR_R12); - - list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG)); - end; + a_reg_alloc(list,NR_FRAME_POINTER_REG); { save int registers } reference_reset(ref,4); ref.index:=NR_STACK_POINTER_REG; ref.addressmode:=AM_PREINDEXED; regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall); - { the (old) ARM APCS requires saving both the stack pointer (to - crawl the stack) and the PC (to identify the function this - stack frame belongs to) -> also save R12 (= copy of R13 on entry) - and R15 -- still needs updating for EABI and Darwin, they don't - need that } - if current_procinfo.framepointer<>NR_STACK_POINTER_REG then - regs:=regs+[RS_FRAME_POINTER_REG,RS_R12,RS_R14,RS_R15] - else - if (regs<>[]) or (pi_do_call in current_procinfo.flags) then - include(regs,RS_R14); - if regs<>[] then - begin - for r:=RS_R0 to RS_R15 do - if (r in regs) then - inc(stackmisalignment,4); - list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD)); - end; - - if current_procinfo.framepointer<>NR_STACK_POINTER_REG then + if not(target_info.system in systems_darwin) then begin - { the framepointer now points to the saved R15, so the saved - framepointer is at R11-12 (for get_caller_frame) } - list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4)); - a_reg_dealloc(list,NR_R12); + a_reg_alloc(list,NR_STACK_POINTER_REG); + if current_procinfo.framepointer<>NR_STACK_POINTER_REG then + begin + a_reg_alloc(list,NR_R12); + list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG)); + end; + { the (old) ARM APCS requires saving both the stack pointer (to + crawl the stack) and the PC (to identify the function this + stack frame belongs to) -> also save R12 (= copy of R13 on entry) + and R15 -- still needs updating for EABI and Darwin, they don't + need that } + if current_procinfo.framepointer<>NR_STACK_POINTER_REG then + regs:=regs+[RS_FRAME_POINTER_REG,RS_R12,RS_R14,RS_R15] + else + if (regs<>[]) or (pi_do_call in current_procinfo.flags) then + include(regs,RS_R14); + if regs<>[] then + begin + for r:=RS_R0 to RS_R15 do + if r in regs then + inc(stackmisalignment,4); + list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD)); + end; + + if current_procinfo.framepointer<>NR_STACK_POINTER_REG then + begin + { the framepointer now points to the saved R15, so the saved + framepointer is at R11-12 (for get_caller_frame) } + list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4)); + a_reg_dealloc(list,NR_R12); + end; + end + else + begin + { always save r14 if we use r7 as the framepointer, because + the parameter offsets are hardcoded in advance and always + assume that r14 sits on the stack right behind the saved r7 + } + if current_procinfo.framepointer=NR_FRAME_POINTER_REG then + include(regs,RS_FRAME_POINTER_REG); + if (regs<>[]) or (pi_do_call in current_procinfo.flags) then + include(regs,RS_R14); + if regs<>[] then + begin + { on Darwin, you first have to save [r4-r7,lr], and then + [r8,r10,r11] and make r7 point to the previously saved + r7 so that you can perform a stack crawl based on it + ([r7] is previous stack frame, [r7+4] is return address + } + include(regs,RS_FRAME_POINTER_REG); + saveregs:=regs-[RS_R8,RS_R10,RS_R11]; + r7offset:=0; + for r:=RS_R0 to RS_R15 do + if r in saveregs then + begin + inc(stackmisalignment,4); + if r0 then + list.concat(taicpu.op_reg_reg_const(A_ADD,NR_FRAME_POINTER_REG,NR_R13,r7offset)) + else + list.concat(taicpu.op_reg_reg(A_MOV,NR_R7,NR_R13)); + { now save the rest (if any) } + saveregs:=regs-saveregs; + if saveregs<>[] then + begin + for r:=RS_R8 to RS_R11 do + if r in saveregs then + inc(stackmisalignment,4); + list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD)); + end; + end; end; stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax; @@ -1564,6 +1616,7 @@ unit cgcpu; r, shift : byte; mmregs, + saveregs, regs : tcpuregisterset; stackmisalignment: pint; mmpostfix: toppostfix; @@ -1649,22 +1702,47 @@ unit cgcpu; end; regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall) ; - if (pi_do_call in current_procinfo.flags) or (regs<>[]) then + if (pi_do_call in current_procinfo.flags) or + (regs<>[]) or + ((target_info.system in systems_darwin) and + (current_procinfo.framepointer<>NR_STACK_POINTER_REG)) then begin exclude(regs,RS_R14); include(regs,RS_R15); + if (target_info.system in systems_darwin) then + include(regs,RS_FRAME_POINTER_REG); end; - { restore saved stack pointer to SP (R13) and saved lr to PC (R15). - The saved PC came after that but is discarded, since we restore - the stack pointer } - if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then - regs:=regs+[RS_FRAME_POINTER_REG,RS_R13,RS_R15]; + if not(target_info.system in systems_darwin) then + begin + { restore saved stack pointer to SP (R13) and saved lr to PC (R15). + The saved PC came after that but is discarded, since we restore + the stack pointer } + if (current_procinfo.framepointer<>NR_STACK_POINTER_REG) then + regs:=regs+[RS_FRAME_POINTER_REG,RS_R13,RS_R15]; + end + else + begin + { restore R8-R11 already if necessary (they've been stored + before the others) } + saveregs:=regs*[RS_R8,RS_R10,RS_R11]; + if saveregs<>[] then + begin + reference_reset(ref,4); + ref.index:=NR_STACK_POINTER_REG; + ref.addressmode:=AM_PREINDEXED; + for r:=RS_R8 to RS_R11 do + if r in saveregs then + inc(stackmisalignment,4); + regs:=regs-saveregs; + end; + end; for r:=RS_R0 to RS_R15 do - if (r in regs) then + if r in regs then inc(stackmisalignment,4); stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax; - if (current_procinfo.framepointer=NR_STACK_POINTER_REG) then + if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or + (target_info.system in systems_darwin) then begin LocalSize:=current_procinfo.calc_stackframe_size; if (LocalSize<>0) or @@ -1686,6 +1764,10 @@ unit cgcpu; end; end; + if (target_info.system in systems_darwin) and + (saveregs<>[]) then + list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,saveregs),PF_FD)); + if regs=[] then begin if (current_settings.cputype framepointer + 4 = stack pointer + on entry } + if not(target_info.system in systems_darwin) then + inc(paraloc^.reference.offset,4) + else + inc(paraloc^.reference.offset,8); end; end; dec(paralen,tcgsize2size[paraloc^.size]); diff --git a/compiler/arm/cpupi.pas b/compiler/arm/cpupi.pas index 6eafbc1263..c5c6a92eed 100644 --- a/compiler/arm/cpupi.pas +++ b/compiler/arm/cpupi.pas @@ -38,6 +38,7 @@ unit cpupi; // procedure after_pass1;override; procedure set_first_temp_offset;override; function calc_stackframe_size:longint;override; + procedure init_framepointer; override; end; @@ -70,8 +71,10 @@ unit cpupi; register (= last register in list above) -> + 4 } tg.setfirsttemp(-28-16+4) else - { on Darwin r9 is not usable -> one less register to save } - tg.setfirsttemp(-24-16+4) + { on Darwin first r4-r7,r14 are saved, then r7 is adjusted to + point to the saved r7, and next r8,r10,r11 gets saved -> -24 + (r4-r6 and r8,r10,r11) } + tg.setfirsttemp(-24) end else tg.setfirsttemp(maxpushedparasize); @@ -124,6 +127,21 @@ unit cpupi; end; + procedure tarmprocinfo.init_framepointer; + begin + if not(target_info.system in systems_darwin) then + begin + RS_FRAME_POINTER_REG:=RS_R11; + NR_FRAME_POINTER_REG:=NR_R11; + end + else + begin + RS_FRAME_POINTER_REG:=RS_R7; + NR_FRAME_POINTER_REG:=NR_R7; + end; + end; + + begin cprocinfo:=tarmprocinfo; end. diff --git a/compiler/procinfo.pas b/compiler/procinfo.pas index e494b2d59d..521dc28b4f 100644 --- a/compiler/procinfo.pas +++ b/compiler/procinfo.pas @@ -140,6 +140,9 @@ unit procinfo; { Allocate got register } procedure allocate_got_register(list: TAsmList);virtual; + { get frame pointer } + procedure init_framepointer; virtual; + { Destroy the entire procinfo tree, starting from the outermost parent } procedure destroy_tree; @@ -182,6 +185,7 @@ implementation procdef:=nil; para_stack_size:=0; flags:=[]; + init_framepointer; framepointer:=NR_FRAME_POINTER_REG; maxpushedparasize:=0; { asmlists } @@ -289,4 +293,11 @@ implementation end; + procedure tprocinfo.init_framepointer; + begin + { most targets use a constant, but some have a typed constant that must + be initialized } + end; + + end. diff --git a/rtl/arm/arm.inc b/rtl/arm/arm.inc index bc808375c3..76860a81b2 100644 --- a/rtl/arm/arm.inc +++ b/rtl/arm/arm.inc @@ -101,7 +101,11 @@ end; {$define FPC_SYSTEM_HAS_GET_FRAME} function get_frame:pointer;assembler;nostackframe; asm +{$ifndef darwin} mov r0,r11 +{$else} + mov r0,r7 +{$endif} end; {$ENDIF not INTERNAL_BACKTRACE} @@ -109,7 +113,11 @@ end; function get_caller_addr(framebp:pointer):pointer;assembler;nostackframe; asm cmp r0,#0 +{$ifndef darwin} ldrne r0,[r0,#-4] +{$else} + ldrne r0,[r0,#4] +{$endif} end; @@ -117,7 +125,11 @@ end; function get_caller_frame(framebp:pointer):pointer;assembler;nostackframe; asm cmp r0,#0 +{$ifndef darwin} ldrne r0,[r0,#-12] +{$else} + ldrne r0,[r0] +{$endif} end;