+ Implemented saving/restoring registers using PUSH/POP instructions on i386 targets with non-fixed stack. Provides some advantage in code size (e.g. the compiler becomes about 20KBytes smaller).

* This should be applicable to i8086 targets as well (but needs testing), by copying changes from i386/cgcpu.pas to i8086/cgcpu.pas and removing "$ifdef i386" lines in cgx86.pas.

git-svn-id: trunk@25224 -
This commit is contained in:
sergei 2013-08-07 09:59:07 +00:00
parent 1ce93f7430
commit 5c4abfa75a
4 changed files with 96 additions and 6 deletions

View File

@ -581,7 +581,9 @@ interface
{ subroutine contains inherited call }
pi_has_inherited,
{ subroutine has nested exit }
pi_has_nested_exit
pi_has_nested_exit,
{ allocates memory on stack, so stack is unbalanced on exit }
pi_has_stack_allocs
);
tprocinfoflags=set of tprocinfoflag;

View File

@ -326,9 +326,15 @@ unit cgcpu;
stacksize := align(stacksize+sizeof(aint),target_info.stackalign) - sizeof(aint);
if stacksize<>0 then
increase_fp(stacksize);
if (not paramanager.use_fixed_stack) then
internal_restore_regs(list,true);
end
else
list.concat(Taicpu.op_none(A_LEAVE,S_NO));
begin
if (not paramanager.use_fixed_stack) then
internal_restore_regs(list,not (pi_has_stack_allocs in current_procinfo.flags));
list.concat(Taicpu.op_none(A_LEAVE,S_NO));
end;
list.concat(tai_regalloc.dealloc(current_procinfo.framepointer,nil));
end;
@ -492,6 +498,7 @@ unit cgcpu;
{ patch the new address, but don't use a_load_reg_reg, that will add a move instruction
that can confuse the reg allocator }
list.concat(Taicpu.Op_reg_reg(A_MOV,S_L,NR_ESP,destreg));
include(current_procinfo.flags,pi_has_stack_allocs);
end;

View File

@ -1183,7 +1183,10 @@ const
(mask:pi_has_inherited;
str:' subroutine contains inherited call '),
(mask:pi_has_nested_exit;
str:' subroutine contains a nested subroutine which calls the exit of the current one ')
str:' subroutine contains a nested subroutine which calls the exit of the current one '),
(mask:pi_has_stack_allocs;
str:' allocates memory on stack, so stack may be unbalanced on exit ')
);
var
procinfooptions : tprocinfoflags;

View File

@ -117,6 +117,8 @@ unit cgx86;
procedure g_profilecode(list : TAsmList);override;
procedure g_stackpointer_alloc(list : TAsmList;localsize : longint);override;
procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
procedure g_save_registers(list: TAsmList); override;
procedure g_restore_registers(list: TAsmList); override;
procedure g_overflowcheck(list: TAsmList; const l:tlocation;def:tdef);override;
@ -138,6 +140,7 @@ unit cgx86;
procedure floatloadops(t : tcgsize;var op : tasmop;var s : topsize);
procedure floatstoreops(t : tcgsize;var op : tasmop;var s : topsize);
procedure internal_restore_regs(list: TAsmList; use_pop: boolean);
end;
const
@ -2382,7 +2385,11 @@ unit cgx86;
else
begin
current_asmdata.getjumplabel(again);
getcpuregister(list,NR_EDI);
{ Using a_reg_alloc instead of getcpuregister, so this procedure
does not change "used_in_proc" state of EDI and therefore can be
called after saving registers with "push" instruction
without creating an unbalanced "pop edi" in epilogue }
a_reg_alloc(list,NR_EDI);
list.concat(Taicpu.op_reg(A_PUSH,S_L,NR_EDI));
list.concat(Taicpu.op_const_reg(A_MOV,S_L,localsize div winstackpagesize,NR_EDI));
a_label(list,again);
@ -2396,7 +2403,7 @@ unit cgx86;
decrease_sp(localsize mod winstackpagesize-4);
reference_reset_base(href,NR_ESP,localsize-4,4);
list.concat(Taicpu.op_ref_reg(A_MOV,S_L,href,NR_EDI));
ungetcpuregister(list,NR_EDI);
a_reg_dealloc(list,NR_EDI);
end
end
else
@ -2453,6 +2460,18 @@ unit cgx86;
{$ifdef i8086}
dgroup: treference;
{$endif i8086}
procedure push_regs;
var
r: longint;
begin
for r := low(saved_standard_registers) to high(saved_standard_registers) do
if saved_standard_registers[r] in rg[R_INTREGISTER].used_in_proc then
begin
list.concat(Taicpu.Op_reg(A_PUSH,tcgsize2opsize[OS_ADDR],newreg(R_INTREGISTER,saved_standard_registers[r],R_SUBWHOLE)));
end;
end;
begin
{$ifdef i8086}
{ interrupt support for i8086 }
@ -2500,7 +2519,13 @@ unit cgx86;
stackmisalignment := sizeof(pint);
list.concat(tai_regalloc.alloc(current_procinfo.framepointer,nil));
if current_procinfo.framepointer=NR_STACK_POINTER_REG then
CGmessage(cg_d_stackframe_omited)
begin
{$ifdef i386}
if (not paramanager.use_fixed_stack) then
push_regs;
{$endif i386}
CGmessage(cg_d_stackframe_omited);
end
else
begin
{ push <frame_pointer> }
@ -2528,10 +2553,63 @@ unit cgx86;
current_asmdata.asmcfi.cfa_def_cfa_offset(list,localsize+sizeof(pint));
current_procinfo.final_localsize:=localsize;
end;
{$ifdef i386}
if (not paramanager.use_fixed_stack) and
(current_procinfo.framepointer<>NR_STACK_POINTER_REG) then
begin
reference_reset_base(current_procinfo.save_regs_ref,
current_procinfo.framepointer,
-(localsize+sizeof(aint)),sizeof(aint));
push_regs;
end;
{$endif i386}
end;
end;
procedure tcgx86.g_save_registers(list: TAsmList);
begin
{$ifdef i386}
if paramanager.use_fixed_stack then
{$endif i386}
inherited g_save_registers(list);
end;
procedure tcgx86.g_restore_registers(list: TAsmList);
begin
{$ifdef i386}
if paramanager.use_fixed_stack then
{$endif i386}
inherited g_restore_registers(list);
end;
procedure tcgx86.internal_restore_regs(list: TAsmList; use_pop: boolean);
var
r: longint;
hreg: tregister;
href: treference;
begin
href:=current_procinfo.save_regs_ref;
for r:=low(saved_standard_registers) to high(saved_standard_registers) do
if saved_standard_registers[r] in rg[R_INTREGISTER].used_in_proc then
begin
hreg:=newreg(R_INTREGISTER,saved_standard_registers[r],R_SUBWHOLE);
{ Allocate register so the optimizer does not remove the load }
a_reg_alloc(list,hreg);
if use_pop then
list.concat(Taicpu.Op_reg(A_POP,tcgsize2opsize[OS_ADDR],hreg))
else
begin
a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,hreg);
dec(href.offset,sizeof(aint));
end;
end;
end;
{ produces if necessary overflowcode }
procedure tcgx86.g_overflowcheck(list: TAsmList; const l:tlocation;def:tdef);
var