diff --git a/compiler/arm/cgcpu.pas b/compiler/arm/cgcpu.pas index a9f92dc02e..d82ded7c61 100644 --- a/compiler/arm/cgcpu.pas +++ b/compiler/arm/cgcpu.pas @@ -1156,7 +1156,8 @@ unit cgcpu; firstfloatreg,lastfloatreg, r : byte; i : aint; - again : tasmlabel; + again : tasmlabel; + regs : tcpuregisterset; begin LocalSize:=align(LocalSize,4); if not(nostackframe) then @@ -1171,19 +1172,28 @@ unit cgcpu; lastfloatreg:=r; end; a_reg_alloc(list,NR_STACK_POINTER_REG); - a_reg_alloc(list,NR_FRAME_POINTER_REG); - a_reg_alloc(list,NR_R12); + if current_procinfo.framepointer<>NR_STACK_POINTER_REG then + begin + a_reg_alloc(list,NR_FRAME_POINTER_REG); + a_reg_alloc(list,NR_R12); - list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG)); + list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_STACK_POINTER_REG)); + end; { save int registers } reference_reset(ref); ref.index:=NR_STACK_POINTER_REG; ref.addressmode:=AM_PREINDEXED; - list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref, - rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall)+[RS_R11,RS_R12,RS_R14,RS_R15]), - PF_FD)); + regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall); + if current_procinfo.framepointer<>NR_STACK_POINTER_REG then + regs:=regs+[RS_R11,RS_R12,RS_R14,RS_R15] + else + if (regs<>[]) or (pi_do_call in current_procinfo.flags) then + include(regs,RS_R14); + if regs<>[] then + list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,regs),PF_FD)); - list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4)); + if current_procinfo.framepointer<>NR_STACK_POINTER_REG then + list.concat(taicpu.op_reg_reg_const(A_SUB,NR_FRAME_POINTER_REG,NR_R12,4)); { allocate necessary stack size not necessary according to Yury Sidorov @@ -1248,17 +1258,20 @@ unit cgcpu; end else } - if not(is_shifter_const(localsize,shift)) then - begin - a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12); - list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12)); - a_reg_dealloc(list,NR_R12); - end - else - begin - a_reg_dealloc(list,NR_R12); - list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize)); - end; + if LocalSize<>0 then + if not(is_shifter_const(localsize,shift)) then + begin + if current_procinfo.framepointer=NR_STACK_POINTER_REG then + a_reg_alloc(list,NR_R12); + a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12); + list.concat(taicpu.op_reg_reg_reg(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12)); + a_reg_dealloc(list,NR_R12); + end + else + begin + a_reg_dealloc(list,NR_R12); + list.concat(taicpu.op_reg_reg_const(A_SUB,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize)); + end; if firstfloatreg<>RS_NO then begin @@ -1287,6 +1300,8 @@ unit cgcpu; firstfloatreg,lastfloatreg, r : byte; shift : byte; + regs : tcpuregisterset; + LocalSize : longint; begin if not(nostackframe) then begin @@ -1320,7 +1335,37 @@ unit cgcpu; end; if (current_procinfo.framepointer=NR_STACK_POINTER_REG) then - list.concat(taicpu.op_reg_reg(A_MOV,NR_R15,NR_R14)) + begin + LocalSize:=current_procinfo.calc_stackframe_size; + if LocalSize<>0 then + if not(is_shifter_const(LocalSize,shift)) then + begin + a_reg_alloc(list,NR_R12); + a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12); + list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12)); + a_reg_dealloc(list,NR_R12); + end + else + begin + list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize)); + end; + + regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall); + if (pi_do_call in current_procinfo.flags) or (regs<>[]) then + begin + exclude(regs,RS_R14); + include(regs,RS_R15); + end; + if regs=[] then + list.concat(taicpu.op_reg_reg(A_MOV,NR_R15,NR_R14)) + else + begin + reference_reset(ref); + ref.index:=NR_STACK_POINTER_REG; + ref.addressmode:=AM_PREINDEXED; + list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,regs),PF_FD)); + end; + end else begin { restore int registers and return } diff --git a/compiler/arm/cpuinfo.pas b/compiler/arm/cpuinfo.pas index 808f7c5f54..ba58231ad4 100644 --- a/compiler/arm/cpuinfo.pas +++ b/compiler/arm/cpuinfo.pas @@ -88,7 +88,7 @@ Const genericlevel3optimizerswitches- { no need to write info about those } [cs_opt_level1,cs_opt_level2,cs_opt_level3]+ - [cs_opt_regvar,cs_opt_loopunroll,cs_opt_tailrecursion]; + [cs_opt_regvar,cs_opt_loopunroll,cs_opt_tailrecursion,cs_opt_stackframe]; level1optimizerswitches = genericlevel1optimizerswitches; level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches + [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion]; diff --git a/compiler/arm/cpupi.pas b/compiler/arm/cpupi.pas index 64ddb29850..e2a63c3e17 100644 --- a/compiler/arm/cpupi.pas +++ b/compiler/arm/cpupi.pas @@ -62,7 +62,8 @@ unit cpupi; is especially a problem when taking the address of a local. For now, this extra memory should hurt less than generating all local contants with offsets >256 as non shifter constants } - tg.setfirsttemp(-12-28); + if tg.direction = -1 then + tg.setfirsttemp(-12-28); end; diff --git a/compiler/psub.pas b/compiler/psub.pas index 9a1fa252f9..ecaf873e96 100644 --- a/compiler/psub.pas +++ b/compiler/psub.pas @@ -745,7 +745,7 @@ implementation { set the start offset to the start of the temp area in the stack } tg:=ttgobj.create; -{$ifdef x86} +{$if defined(x86) or defined(arm)} { try to strip the stack frame } { set the framepointer to esp if: - no assembler directive, those are handled elsewhere @@ -761,7 +761,11 @@ implementation not(po_assembler in procdef.procoptions) and ((flags*[pi_has_assembler_block,pi_uses_exceptions,pi_is_assembler, pi_needs_implicit_finally,pi_has_implicit_finally,pi_has_stackparameter, - pi_needs_stackframe])=[]) then + pi_needs_stackframe])=[]) + {$ifdef arm} + and ((cs_fp_emulation in current_settings.moduleswitches) or not (pi_uses_fpu in flags)) + {$endif arm} + then begin { we need the parameter info here to determine if the procedure gets parameters on the stack @@ -777,8 +781,7 @@ implementation tg.direction:=1; end; end; -{$endif x86} - +{$endif} { Create register allocator } cg.init_register_allocators;