* x86_64: reworked register saving/restoring to use PUSH/POP instructions for procedures with RSP-based frame and SEH finalization procedures. XMM registers are also saved/restored without involving tempgen in mentioned cases. This prevents SEH finalization procedures from saving registers in stack frame of their parent procedures, fixing incorrect unwind bytecode (Mantis ). It also reduces executable size (for compiler itself, by about 100Kb).

git-svn-id: trunk@25389 -
This commit is contained in:
sergei 2013-08-30 07:54:02 +00:00
parent fcaad5baf2
commit e41149a7ec

View File

@ -40,9 +40,14 @@ unit cgcpu;
procedure g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);override;
procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override;
procedure g_local_unwind(list: TAsmList; l: TAsmLabel);override;
procedure g_save_registers(list: TAsmList);override;
procedure g_restore_registers(list: TAsmList);override;
procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister;shuffle : pmmshuffle); override;
private
function use_push: boolean;
function saved_xmm_reg_size: longint;
end;
procedure create_codegen;
@ -103,6 +108,29 @@ unit cgcpu;
end;
function tcgx86_64.use_push: boolean;
begin
result:=(current_procinfo.framepointer=NR_STACK_POINTER_REG) or
(current_procinfo.procdef.proctypeoption=potype_exceptfilter);
end;
function tcgx86_64.saved_xmm_reg_size: longint;
var
i: longint;
begin
result:=0;
if (target_info.system<>system_x86_64_win64) or
(not uses_registers(R_MMREGISTER)) then
exit;
for i:=low(saved_mm_registers) to high(saved_mm_registers) do
begin
if (saved_mm_registers[i] in rg[R_MMREGISTER].used_in_proc) then
inc(result,tcgsize2size[OS_VECTOR]);
end;
end;
procedure tcgx86_64.g_proc_entry(list : TAsmList;localsize:longint;nostackframe:boolean);
var
hitem: tlinkedlistitem;
@ -113,7 +141,31 @@ unit cgcpu;
suppress_endprologue: boolean;
stackmisalignment: longint;
para: tparavarsym;
xmmsize: longint;
procedure push_one_reg(reg: tregister);
begin
list.concat(taicpu.op_reg(A_PUSH,tcgsize2opsize[OS_ADDR],reg));
if (target_info.system=system_x86_64_win64) then
begin
list.concat(cai_seh_directive.create_reg(ash_pushreg,reg));
include(current_procinfo.flags,pi_has_unwind_info);
end;
end;
procedure push_regs;
var
r: longint;
begin
for r := low(saved_standard_registers) to high(saved_standard_registers) do
if saved_standard_registers[r] in rg[R_INTREGISTER].used_in_proc then
begin
inc(stackmisalignment,sizeof(pint));
push_one_reg(newreg(R_INTREGISTER,saved_standard_registers[r],R_SUBWHOLE));
end;
end;
begin
hitem:=list.last;
{ pi_has_unwind_info may already be set at this point if there are
SEH directives in assembler body. In this case, .seh_endprologue
@ -127,17 +179,15 @@ unit cgcpu;
stackmisalignment := sizeof(pint);
list.concat(tai_regalloc.alloc(current_procinfo.framepointer,nil));
if current_procinfo.framepointer=NR_STACK_POINTER_REG then
CGmessage(cg_d_stackframe_omited)
begin
push_regs;
CGmessage(cg_d_stackframe_omited);
end
else
begin
{ push <frame_pointer> }
inc(stackmisalignment,sizeof(pint));
list.concat(Taicpu.op_reg(A_PUSH,tcgsize2opsize[OS_ADDR],NR_FRAME_POINTER_REG));
if (target_info.system=system_x86_64_win64) then
begin
list.concat(cai_seh_directive.create_reg(ash_pushreg,NR_FRAME_POINTER_REG));
include(current_procinfo.flags,pi_has_unwind_info);
end;
push_one_reg(NR_FRAME_POINTER_REG);
{ Return address and FP are both on stack }
current_asmdata.asmcfi.cfa_def_cfa_offset(list,2*sizeof(pint));
current_asmdata.asmcfi.cfa_offset(list,NR_FRAME_POINTER_REG,-(2*sizeof(pint)));
@ -145,6 +195,7 @@ unit cgcpu;
list.concat(Taicpu.op_reg_reg(A_MOV,tcgsize2opsize[OS_ADDR],NR_STACK_POINTER_REG,NR_FRAME_POINTER_REG))
else
begin
push_regs;
{ load framepointer from hidden $parentfp parameter }
para:=tparavarsym(current_procinfo.procdef.paras[0]);
if not (vo_is_parentfp in para.varoptions) then
@ -170,6 +221,14 @@ unit cgcpu;
}
end;
xmmsize:=saved_xmm_reg_size;
if use_push and (xmmsize<>0) then
begin
localsize:=align(localsize,target_info.stackalign)+xmmsize;
reference_reset_base(current_procinfo.save_regs_ref,NR_STACK_POINTER_REG,
localsize-xmmsize,tcgsize2size[OS_VECTOR]);
end;
{ allocate stackframe space }
if (localsize<>0) or
((target_info.stackalign>sizeof(pint)) and
@ -188,6 +247,16 @@ unit cgcpu;
if localsize<>0 then
list.concat(cai_seh_directive.create_offset(ash_stackalloc,localsize));
include(current_procinfo.flags,pi_has_unwind_info);
if use_push and (xmmsize<>0) then
begin
href:=current_procinfo.save_regs_ref;
for r:=low(saved_mm_registers) to high(saved_mm_registers) do
if saved_mm_registers[r] in rg[R_MMREGISTER].used_in_proc then
begin
a_loadmm_reg_ref(list,OS_VECTOR,OS_VECTOR,newreg(R_MMREGISTER,saved_mm_registers[r],R_SUBMMWHOLE),href,nil);
inc(href.offset,tcgsize2size[OS_VECTOR]);
end;
end;
end;
end;
end;
@ -209,6 +278,8 @@ unit cgcpu;
since registers are not modified before they are saved, and saves do not
change RSP, 'logically' all saves can happen at the end of prologue. }
href:=current_procinfo.save_regs_ref;
if (not use_push) then
begin
for r:=low(saved_standard_registers) to high(saved_standard_registers) do
if saved_standard_registers[r] in rg[R_INTREGISTER].used_in_proc then
begin
@ -217,6 +288,7 @@ unit cgcpu;
href.offset+frame_offset));
inc(href.offset,sizeof(aint));
end;
end;
if uses_registers(R_MMREGISTER) then
begin
if (href.offset mod tcgsize2size[OS_VECTOR])<>0 then
@ -256,6 +328,8 @@ unit cgcpu;
var
href : treference;
hreg : tregister;
r : longint;
begin
{ Release PIC register }
if cs_create_pic in current_settings.moduleswitches then
@ -268,11 +342,26 @@ unit cgcpu;
{ remove stackframe }
if not nostackframe then
begin
if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
(current_procinfo.procdef.proctypeoption=potype_exceptfilter) then
if use_push then
begin
if (saved_xmm_reg_size<>0) then
begin
href:=current_procinfo.save_regs_ref;
for r:=low(saved_mm_registers) to high(saved_mm_registers) do
if saved_mm_registers[r] in rg[R_MMREGISTER].used_in_proc then
begin
{ Allocate register so the optimizer does not remove the load }
hreg:=newreg(R_MMREGISTER,saved_mm_registers[r],R_SUBMMWHOLE);
a_reg_alloc(list,hreg);
a_loadmm_ref_reg(list,OS_VECTOR,OS_VECTOR,href,hreg,nil);
inc(href.offset,tcgsize2size[OS_VECTOR]);
end;
end;
if (current_procinfo.final_localsize<>0) then
increase_sp(current_procinfo.final_localsize);
internal_restore_regs(list,true);
if (current_procinfo.procdef.proctypeoption=potype_exceptfilter) then
list.concat(Taicpu.op_reg(A_POP,tcgsize2opsize[OS_ADDR],NR_FRAME_POINTER_REG));
end
@ -300,6 +389,20 @@ unit cgcpu;
end;
procedure tcgx86_64.g_save_registers(list: TAsmList);
begin
if (not use_push) then
inherited g_save_registers(list);
end;
procedure tcgx86_64.g_restore_registers(list: TAsmList);
begin
if (not use_push) then
inherited g_restore_registers(list);
end;
procedure tcgx86_64.g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);
var
make_global : boolean;