* use lea to adjust stack pointer, this is equal or faster on all modern CPUs than add/sub

git-svn-id: trunk@25010 -
This commit is contained in:
florian 2013-06-28 17:06:57 +00:00
parent 7810d6637a
commit 94cf650d9a
3 changed files with 41 additions and 11 deletions

View File

@ -293,6 +293,16 @@ unit cgcpu;
procedure tcg386.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
procedure increase_fp(a : tcgint);
var
href : treference;
begin
reference_reset_base(href,current_procinfo.framepointer,a,0);
{ normally, lea is a better choice than an add }
list.concat(Taicpu.op_ref_reg(A_LEA,TCGSize2OpSize[OS_ADDR],href,current_procinfo.framepointer));
end;
var
stacksize : longint;
begin
@ -304,7 +314,7 @@ unit cgcpu;
{ remove stackframe }
if not nostackframe then
begin
if (current_procinfo.framepointer=NR_STACK_POINTER_REG) then
if current_procinfo.framepointer=NR_STACK_POINTER_REG then
begin
stacksize:=current_procinfo.calc_stackframe_size;
if (target_info.stackalign>4) and
@ -314,8 +324,8 @@ unit cgcpu;
{ if you (think you) know what you are doing }
(po_assembler in current_procinfo.procdef.procoptions)) then
stacksize := align(stacksize+sizeof(aint),target_info.stackalign) - sizeof(aint);
if (stacksize<>0) then
cg.a_op_const_reg(list,OP_ADD,OS_ADDR,stacksize,current_procinfo.framepointer);
if stacksize<>0 then
increase_fp(stacksize);
end
else
list.concat(Taicpu.op_none(A_LEAVE,S_NO));

View File

@ -2318,6 +2318,16 @@ unit cgx86;
procedure tcgx86.g_stackpointer_alloc(list : TAsmList;localsize : longint);
procedure decrease_sp(a : tcgint);
var
href : treference;
begin
reference_reset_base(href,NR_STACK_POINTER_REG,-a,0);
{ normally, lea is a better choice than a sub to adjust the stack pointer }
list.concat(Taicpu.op_ref_reg(A_LEA,TCGSize2OpSize[OS_ADDR],href,NR_STACK_POINTER_REG));
end;
{$ifdef x86}
{$ifndef NOTARGETWIN}
var
@ -2338,7 +2348,7 @@ unit cgx86;
begin
if localsize div winstackpagesize<=5 then
begin
list.concat(Taicpu.Op_const_reg(A_SUB,S_L,localsize-4,NR_ESP));
decrease_sp(localsize-4);
for i:=1 to localsize div winstackpagesize do
begin
reference_reset_base(href,NR_ESP,localsize-i*winstackpagesize,4);
@ -2353,11 +2363,11 @@ unit cgx86;
list.concat(Taicpu.op_reg(A_PUSH,S_L,NR_EDI));
list.concat(Taicpu.op_const_reg(A_MOV,S_L,localsize div winstackpagesize,NR_EDI));
a_label(list,again);
list.concat(Taicpu.op_const_reg(A_SUB,S_L,winstackpagesize-4,NR_ESP));
decrease_sp(winstackpagesize-4);
list.concat(Taicpu.op_reg(A_PUSH,S_L,NR_EAX));
list.concat(Taicpu.op_reg(A_DEC,S_L,NR_EDI));
a_jmp_cond(list,OC_NE,again);
list.concat(Taicpu.op_const_reg(A_SUB,S_L,localsize mod winstackpagesize - 4,NR_ESP));
decrease_sp(localsize mod winstackpagesize-4);
reference_reset_base(href,NR_ESP,localsize-4,4);
list.concat(Taicpu.op_ref_reg(A_MOV,S_L,href,NR_EDI));
ungetcpuregister(list,NR_EDI);
@ -2375,7 +2385,7 @@ unit cgx86;
begin
if localsize div winstackpagesize<=5 then
begin
list.concat(Taicpu.Op_const_reg(A_SUB,S_Q,localsize,NR_RSP));
decrease_sp(localsize);
for i:=1 to localsize div winstackpagesize do
begin
reference_reset_base(href,NR_RSP,localsize-i*winstackpagesize+4,4);
@ -2390,19 +2400,19 @@ unit cgx86;
getcpuregister(list,NR_R10);
list.concat(Taicpu.op_const_reg(A_MOV,S_Q,localsize div winstackpagesize,NR_R10));
a_label(list,again);
list.concat(Taicpu.op_const_reg(A_SUB,S_Q,winstackpagesize,NR_RSP));
decrease_sp(winstackpagesize);
reference_reset_base(href,NR_RSP,0,4);
list.concat(Taicpu.op_reg_ref(A_MOV,S_L,NR_EAX,href));
list.concat(Taicpu.op_reg(A_DEC,S_Q,NR_R10));
a_jmp_cond(list,OC_NE,again);
list.concat(Taicpu.op_const_reg(A_SUB,S_Q,localsize mod winstackpagesize,NR_RSP));
decrease_sp(localsize mod winstackpagesize);
ungetcpuregister(list,NR_R10);
end
end
else
{$endif NOTARGETWIN}
{$endif x86_64}
list.concat(Taicpu.Op_const_reg(A_SUB,tcgsize2opsize[OS_ADDR],localsize,NR_STACK_POINTER_REG));
decrease_sp(localsize);
end;
end;

View File

@ -177,6 +177,16 @@ unit cgcpu;
procedure tcgx86_64.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
procedure increase_sp(a : tcgint);
var
href : treference;
begin
reference_reset_base(href,NR_STACK_POINTER_REG,a,0);
{ normally, lea is a better choice than an add }
list.concat(Taicpu.op_ref_reg(A_LEA,TCGSize2OpSize[OS_ADDR],href,NR_STACK_POINTER_REG));
end;
var
href : treference;
begin
@ -195,7 +205,7 @@ unit cgcpu;
(current_procinfo.procdef.proctypeoption=potype_exceptfilter) then
begin
if (current_procinfo.final_localsize<>0) then
cg.a_op_const_reg(list,OP_ADD,OS_ADDR,current_procinfo.final_localsize,NR_STACK_POINTER_REG);
increase_sp(current_procinfo.final_localsize);
if (current_procinfo.procdef.proctypeoption=potype_exceptfilter) then
list.concat(Taicpu.op_reg(A_POP,tcgsize2opsize[OS_ADDR],NR_FRAME_POINTER_REG));
end