* x86_64: reworked register saving/restoring to use PUSH/POP instructions for procedures with RSP-based frame and SEH finalization procedures. XMM registers are also saved/restored without involving tempgen in mentioned cases. This prevents SEH finalization procedures from saving registers in stack frame of their parent procedures, fixing incorrect unwind bytecode (Mantis #24791). It also reduces executable size (for compiler itself, by about 100Kb).

git-svn-id: trunk@25389 -
This commit is contained in:
sergei 2013-08-30 07:54:02 +00:00
parent fcaad5baf2
commit e41149a7ec

View File

@ -40,9 +40,14 @@ unit cgcpu;
procedure g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);override; procedure g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);override;
procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override; procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override;
procedure g_local_unwind(list: TAsmList; l: TAsmLabel);override; procedure g_local_unwind(list: TAsmList; l: TAsmLabel);override;
procedure g_save_registers(list: TAsmList);override;
procedure g_restore_registers(list: TAsmList);override;
procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override; procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister;shuffle : pmmshuffle); override; procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister;shuffle : pmmshuffle); override;
private
function use_push: boolean;
function saved_xmm_reg_size: longint;
end; end;
procedure create_codegen; procedure create_codegen;
@ -103,6 +108,29 @@ unit cgcpu;
end; end;
function tcgx86_64.use_push: boolean;
begin
result:=(current_procinfo.framepointer=NR_STACK_POINTER_REG) or
(current_procinfo.procdef.proctypeoption=potype_exceptfilter);
end;
function tcgx86_64.saved_xmm_reg_size: longint;
var
i: longint;
begin
result:=0;
if (target_info.system<>system_x86_64_win64) or
(not uses_registers(R_MMREGISTER)) then
exit;
for i:=low(saved_mm_registers) to high(saved_mm_registers) do
begin
if (saved_mm_registers[i] in rg[R_MMREGISTER].used_in_proc) then
inc(result,tcgsize2size[OS_VECTOR]);
end;
end;
procedure tcgx86_64.g_proc_entry(list : TAsmList;localsize:longint;nostackframe:boolean); procedure tcgx86_64.g_proc_entry(list : TAsmList;localsize:longint;nostackframe:boolean);
var var
hitem: tlinkedlistitem; hitem: tlinkedlistitem;
@ -113,6 +141,30 @@ unit cgcpu;
suppress_endprologue: boolean; suppress_endprologue: boolean;
stackmisalignment: longint; stackmisalignment: longint;
para: tparavarsym; para: tparavarsym;
xmmsize: longint;
procedure push_one_reg(reg: tregister);
begin
list.concat(taicpu.op_reg(A_PUSH,tcgsize2opsize[OS_ADDR],reg));
if (target_info.system=system_x86_64_win64) then
begin
list.concat(cai_seh_directive.create_reg(ash_pushreg,reg));
include(current_procinfo.flags,pi_has_unwind_info);
end;
end;
procedure push_regs;
var
r: longint;
begin
for r := low(saved_standard_registers) to high(saved_standard_registers) do
if saved_standard_registers[r] in rg[R_INTREGISTER].used_in_proc then
begin
inc(stackmisalignment,sizeof(pint));
push_one_reg(newreg(R_INTREGISTER,saved_standard_registers[r],R_SUBWHOLE));
end;
end;
begin begin
hitem:=list.last; hitem:=list.last;
{ pi_has_unwind_info may already be set at this point if there are { pi_has_unwind_info may already be set at this point if there are
@ -127,17 +179,15 @@ unit cgcpu;
stackmisalignment := sizeof(pint); stackmisalignment := sizeof(pint);
list.concat(tai_regalloc.alloc(current_procinfo.framepointer,nil)); list.concat(tai_regalloc.alloc(current_procinfo.framepointer,nil));
if current_procinfo.framepointer=NR_STACK_POINTER_REG then if current_procinfo.framepointer=NR_STACK_POINTER_REG then
CGmessage(cg_d_stackframe_omited) begin
push_regs;
CGmessage(cg_d_stackframe_omited);
end
else else
begin begin
{ push <frame_pointer> } { push <frame_pointer> }
inc(stackmisalignment,sizeof(pint)); inc(stackmisalignment,sizeof(pint));
list.concat(Taicpu.op_reg(A_PUSH,tcgsize2opsize[OS_ADDR],NR_FRAME_POINTER_REG)); push_one_reg(NR_FRAME_POINTER_REG);
if (target_info.system=system_x86_64_win64) then
begin
list.concat(cai_seh_directive.create_reg(ash_pushreg,NR_FRAME_POINTER_REG));
include(current_procinfo.flags,pi_has_unwind_info);
end;
{ Return address and FP are both on stack } { Return address and FP are both on stack }
current_asmdata.asmcfi.cfa_def_cfa_offset(list,2*sizeof(pint)); current_asmdata.asmcfi.cfa_def_cfa_offset(list,2*sizeof(pint));
current_asmdata.asmcfi.cfa_offset(list,NR_FRAME_POINTER_REG,-(2*sizeof(pint))); current_asmdata.asmcfi.cfa_offset(list,NR_FRAME_POINTER_REG,-(2*sizeof(pint)));
@ -145,6 +195,7 @@ unit cgcpu;
list.concat(Taicpu.op_reg_reg(A_MOV,tcgsize2opsize[OS_ADDR],NR_STACK_POINTER_REG,NR_FRAME_POINTER_REG)) list.concat(Taicpu.op_reg_reg(A_MOV,tcgsize2opsize[OS_ADDR],NR_STACK_POINTER_REG,NR_FRAME_POINTER_REG))
else else
begin begin
push_regs;
{ load framepointer from hidden $parentfp parameter } { load framepointer from hidden $parentfp parameter }
para:=tparavarsym(current_procinfo.procdef.paras[0]); para:=tparavarsym(current_procinfo.procdef.paras[0]);
if not (vo_is_parentfp in para.varoptions) then if not (vo_is_parentfp in para.varoptions) then
@ -170,6 +221,14 @@ unit cgcpu;
} }
end; end;
xmmsize:=saved_xmm_reg_size;
if use_push and (xmmsize<>0) then
begin
localsize:=align(localsize,target_info.stackalign)+xmmsize;
reference_reset_base(current_procinfo.save_regs_ref,NR_STACK_POINTER_REG,
localsize-xmmsize,tcgsize2size[OS_VECTOR]);
end;
{ allocate stackframe space } { allocate stackframe space }
if (localsize<>0) or if (localsize<>0) or
((target_info.stackalign>sizeof(pint)) and ((target_info.stackalign>sizeof(pint)) and
@ -188,6 +247,16 @@ unit cgcpu;
if localsize<>0 then if localsize<>0 then
list.concat(cai_seh_directive.create_offset(ash_stackalloc,localsize)); list.concat(cai_seh_directive.create_offset(ash_stackalloc,localsize));
include(current_procinfo.flags,pi_has_unwind_info); include(current_procinfo.flags,pi_has_unwind_info);
if use_push and (xmmsize<>0) then
begin
href:=current_procinfo.save_regs_ref;
for r:=low(saved_mm_registers) to high(saved_mm_registers) do
if saved_mm_registers[r] in rg[R_MMREGISTER].used_in_proc then
begin
a_loadmm_reg_ref(list,OS_VECTOR,OS_VECTOR,newreg(R_MMREGISTER,saved_mm_registers[r],R_SUBMMWHOLE),href,nil);
inc(href.offset,tcgsize2size[OS_VECTOR]);
end;
end;
end; end;
end; end;
end; end;
@ -209,6 +278,8 @@ unit cgcpu;
since registers are not modified before they are saved, and saves do not since registers are not modified before they are saved, and saves do not
change RSP, 'logically' all saves can happen at the end of prologue. } change RSP, 'logically' all saves can happen at the end of prologue. }
href:=current_procinfo.save_regs_ref; href:=current_procinfo.save_regs_ref;
if (not use_push) then
begin
for r:=low(saved_standard_registers) to high(saved_standard_registers) do for r:=low(saved_standard_registers) to high(saved_standard_registers) do
if saved_standard_registers[r] in rg[R_INTREGISTER].used_in_proc then if saved_standard_registers[r] in rg[R_INTREGISTER].used_in_proc then
begin begin
@ -217,6 +288,7 @@ unit cgcpu;
href.offset+frame_offset)); href.offset+frame_offset));
inc(href.offset,sizeof(aint)); inc(href.offset,sizeof(aint));
end; end;
end;
if uses_registers(R_MMREGISTER) then if uses_registers(R_MMREGISTER) then
begin begin
if (href.offset mod tcgsize2size[OS_VECTOR])<>0 then if (href.offset mod tcgsize2size[OS_VECTOR])<>0 then
@ -256,6 +328,8 @@ unit cgcpu;
var var
href : treference; href : treference;
hreg : tregister;
r : longint;
begin begin
{ Release PIC register } { Release PIC register }
if cs_create_pic in current_settings.moduleswitches then if cs_create_pic in current_settings.moduleswitches then
@ -268,11 +342,26 @@ unit cgcpu;
{ remove stackframe } { remove stackframe }
if not nostackframe then if not nostackframe then
begin begin
if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or if use_push then
(current_procinfo.procdef.proctypeoption=potype_exceptfilter) then
begin begin
if (saved_xmm_reg_size<>0) then
begin
href:=current_procinfo.save_regs_ref;
for r:=low(saved_mm_registers) to high(saved_mm_registers) do
if saved_mm_registers[r] in rg[R_MMREGISTER].used_in_proc then
begin
{ Allocate register so the optimizer does not remove the load }
hreg:=newreg(R_MMREGISTER,saved_mm_registers[r],R_SUBMMWHOLE);
a_reg_alloc(list,hreg);
a_loadmm_ref_reg(list,OS_VECTOR,OS_VECTOR,href,hreg,nil);
inc(href.offset,tcgsize2size[OS_VECTOR]);
end;
end;
if (current_procinfo.final_localsize<>0) then if (current_procinfo.final_localsize<>0) then
increase_sp(current_procinfo.final_localsize); increase_sp(current_procinfo.final_localsize);
internal_restore_regs(list,true);
if (current_procinfo.procdef.proctypeoption=potype_exceptfilter) then if (current_procinfo.procdef.proctypeoption=potype_exceptfilter) then
list.concat(Taicpu.op_reg(A_POP,tcgsize2opsize[OS_ADDR],NR_FRAME_POINTER_REG)); list.concat(Taicpu.op_reg(A_POP,tcgsize2opsize[OS_ADDR],NR_FRAME_POINTER_REG));
end end
@ -300,6 +389,20 @@ unit cgcpu;
end; end;
procedure tcgx86_64.g_save_registers(list: TAsmList);
begin
if (not use_push) then
inherited g_save_registers(list);
end;
procedure tcgx86_64.g_restore_registers(list: TAsmList);
begin
if (not use_push) then
inherited g_restore_registers(list);
end;
procedure tcgx86_64.g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint); procedure tcgx86_64.g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);
var var
make_global : boolean; make_global : boolean;