* x86_64: reworked register saving/restoring to use PUSH/POP instructions for procedures with RSP-based frame and SEH finalization procedures. XMM registers are also saved/restored without involving tempgen in mentioned cases. This prevents SEH finalization procedures from saving registers in stack frame of their parent procedures, fixing incorrect unwind bytecode (Mantis #24791). It also reduces executable size (for compiler itself, by about 100Kb).

git-svn-id: trunk@25389 -
This commit is contained in:
sergei 2013-08-30 07:54:02 +00:00
parent fcaad5baf2
commit e41149a7ec

View File

@ -40,9 +40,14 @@ unit cgcpu;
procedure g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);override;
procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override;
procedure g_local_unwind(list: TAsmList; l: TAsmLabel);override;
procedure g_save_registers(list: TAsmList);override;
procedure g_restore_registers(list: TAsmList);override;
procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister;shuffle : pmmshuffle); override;
private
function use_push: boolean;
function saved_xmm_reg_size: longint;
end;
procedure create_codegen;
@ -103,6 +108,29 @@ unit cgcpu;
end;
function tcgx86_64.use_push: boolean;
begin
result:=(current_procinfo.framepointer=NR_STACK_POINTER_REG) or
(current_procinfo.procdef.proctypeoption=potype_exceptfilter);
end;
function tcgx86_64.saved_xmm_reg_size: longint;
var
i: longint;
begin
result:=0;
if (target_info.system<>system_x86_64_win64) or
(not uses_registers(R_MMREGISTER)) then
exit;
for i:=low(saved_mm_registers) to high(saved_mm_registers) do
begin
if (saved_mm_registers[i] in rg[R_MMREGISTER].used_in_proc) then
inc(result,tcgsize2size[OS_VECTOR]);
end;
end;
procedure tcgx86_64.g_proc_entry(list : TAsmList;localsize:longint;nostackframe:boolean);
var
hitem: tlinkedlistitem;
@ -113,7 +141,31 @@ unit cgcpu;
suppress_endprologue: boolean;
stackmisalignment: longint;
para: tparavarsym;
xmmsize: longint;
procedure push_one_reg(reg: tregister);
begin
list.concat(taicpu.op_reg(A_PUSH,tcgsize2opsize[OS_ADDR],reg));
if (target_info.system=system_x86_64_win64) then
begin
list.concat(cai_seh_directive.create_reg(ash_pushreg,reg));
include(current_procinfo.flags,pi_has_unwind_info);
end;
end;
procedure push_regs;
var
r: longint;
begin
for r := low(saved_standard_registers) to high(saved_standard_registers) do
if saved_standard_registers[r] in rg[R_INTREGISTER].used_in_proc then
begin
inc(stackmisalignment,sizeof(pint));
push_one_reg(newreg(R_INTREGISTER,saved_standard_registers[r],R_SUBWHOLE));
end;
end;
begin
hitem:=list.last;
{ pi_has_unwind_info may already be set at this point if there are
SEH directives in assembler body. In this case, .seh_endprologue
@ -127,17 +179,15 @@ unit cgcpu;
stackmisalignment := sizeof(pint);
list.concat(tai_regalloc.alloc(current_procinfo.framepointer,nil));
if current_procinfo.framepointer=NR_STACK_POINTER_REG then
CGmessage(cg_d_stackframe_omited)
begin
push_regs;
CGmessage(cg_d_stackframe_omited);
end
else
begin
{ push <frame_pointer> }
inc(stackmisalignment,sizeof(pint));
list.concat(Taicpu.op_reg(A_PUSH,tcgsize2opsize[OS_ADDR],NR_FRAME_POINTER_REG));
if (target_info.system=system_x86_64_win64) then
begin
list.concat(cai_seh_directive.create_reg(ash_pushreg,NR_FRAME_POINTER_REG));
include(current_procinfo.flags,pi_has_unwind_info);
end;
push_one_reg(NR_FRAME_POINTER_REG);
{ Return address and FP are both on stack }
current_asmdata.asmcfi.cfa_def_cfa_offset(list,2*sizeof(pint));
current_asmdata.asmcfi.cfa_offset(list,NR_FRAME_POINTER_REG,-(2*sizeof(pint)));
@ -145,6 +195,7 @@ unit cgcpu;
list.concat(Taicpu.op_reg_reg(A_MOV,tcgsize2opsize[OS_ADDR],NR_STACK_POINTER_REG,NR_FRAME_POINTER_REG))
else
begin
push_regs;
{ load framepointer from hidden $parentfp parameter }
para:=tparavarsym(current_procinfo.procdef.paras[0]);
if not (vo_is_parentfp in para.varoptions) then
@ -170,6 +221,14 @@ unit cgcpu;
}
end;
xmmsize:=saved_xmm_reg_size;
if use_push and (xmmsize<>0) then
begin
localsize:=align(localsize,target_info.stackalign)+xmmsize;
reference_reset_base(current_procinfo.save_regs_ref,NR_STACK_POINTER_REG,
localsize-xmmsize,tcgsize2size[OS_VECTOR]);
end;
{ allocate stackframe space }
if (localsize<>0) or
((target_info.stackalign>sizeof(pint)) and
@ -188,6 +247,16 @@ unit cgcpu;
if localsize<>0 then
list.concat(cai_seh_directive.create_offset(ash_stackalloc,localsize));
include(current_procinfo.flags,pi_has_unwind_info);
if use_push and (xmmsize<>0) then
begin
href:=current_procinfo.save_regs_ref;
for r:=low(saved_mm_registers) to high(saved_mm_registers) do
if saved_mm_registers[r] in rg[R_MMREGISTER].used_in_proc then
begin
a_loadmm_reg_ref(list,OS_VECTOR,OS_VECTOR,newreg(R_MMREGISTER,saved_mm_registers[r],R_SUBMMWHOLE),href,nil);
inc(href.offset,tcgsize2size[OS_VECTOR]);
end;
end;
end;
end;
end;
@ -209,6 +278,8 @@ unit cgcpu;
since registers are not modified before they are saved, and saves do not
change RSP, 'logically' all saves can happen at the end of prologue. }
href:=current_procinfo.save_regs_ref;
if (not use_push) then
begin
for r:=low(saved_standard_registers) to high(saved_standard_registers) do
if saved_standard_registers[r] in rg[R_INTREGISTER].used_in_proc then
begin
@ -217,6 +288,7 @@ unit cgcpu;
href.offset+frame_offset));
inc(href.offset,sizeof(aint));
end;
end;
if uses_registers(R_MMREGISTER) then
begin
if (href.offset mod tcgsize2size[OS_VECTOR])<>0 then
@ -256,6 +328,8 @@ unit cgcpu;
var
href : treference;
hreg : tregister;
r : longint;
begin
{ Release PIC register }
if cs_create_pic in current_settings.moduleswitches then
@ -268,11 +342,26 @@ unit cgcpu;
{ remove stackframe }
if not nostackframe then
begin
if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
(current_procinfo.procdef.proctypeoption=potype_exceptfilter) then
if use_push then
begin
if (saved_xmm_reg_size<>0) then
begin
href:=current_procinfo.save_regs_ref;
for r:=low(saved_mm_registers) to high(saved_mm_registers) do
if saved_mm_registers[r] in rg[R_MMREGISTER].used_in_proc then
begin
{ Allocate register so the optimizer does not remove the load }
hreg:=newreg(R_MMREGISTER,saved_mm_registers[r],R_SUBMMWHOLE);
a_reg_alloc(list,hreg);
a_loadmm_ref_reg(list,OS_VECTOR,OS_VECTOR,href,hreg,nil);
inc(href.offset,tcgsize2size[OS_VECTOR]);
end;
end;
if (current_procinfo.final_localsize<>0) then
increase_sp(current_procinfo.final_localsize);
internal_restore_regs(list,true);
if (current_procinfo.procdef.proctypeoption=potype_exceptfilter) then
list.concat(Taicpu.op_reg(A_POP,tcgsize2opsize[OS_ADDR],NR_FRAME_POINTER_REG));
end
@ -300,6 +389,20 @@ unit cgcpu;
end;
procedure tcgx86_64.g_save_registers(list: TAsmList);
begin
if (not use_push) then
inherited g_save_registers(list);
end;
procedure tcgx86_64.g_restore_registers(list: TAsmList);
begin
if (not use_push) then
inherited g_restore_registers(list);
end;
procedure tcgx86_64.g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);
var
make_global : boolean;