+ implemented x86-64 mov optimization to test register usage tracking framework

git-svn-id: trunk@20892 -
This commit is contained in:
florian 2012-04-15 20:30:45 +00:00
parent 3c33bf4e6d
commit fc673340fe

View File

@ -41,6 +41,7 @@ uses
cutils,
verbose,
cgbase, cgutils,
aoptobj,
aasmbase, aasmdata, aasmcpu;
function isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
@ -49,11 +50,11 @@ begin
case hp1.opcode of
A_ADD, A_SUB, A_OR, A_XOR, A_AND, A_SHL, A_SHR, A_SAR:
isFoldableArithOp :=
(taicpu(hp1).oper[1]^.typ = top_reg) and
(taicpu(hp1).oper[1]^.reg = reg) and
((taicpu(hp1).oper[0]^.typ = top_const) or
((taicpu(hp1).oper[0]^.typ = top_reg) and
(taicpu(hp1).oper[0]^.reg<>reg))) and
(taicpu(hp1).oper[1]^.typ = top_reg) and
(taicpu(hp1).oper[1]^.reg = reg);
(taicpu(hp1).oper[0]^.reg<>reg)));
A_INC, A_DEC:
isFoldableArithOp :=
(taicpu(hp1).oper[0]^.typ = top_reg) and
@ -65,6 +66,8 @@ function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
var
next1: tai;
hp1, hp2: tai;
GetNextIntruction_p : boolean;
TmpUsedRegs : TAllUsedRegs;
begin
Result := False;
case p.typ of
@ -92,7 +95,7 @@ begin
taicpu(hp1).oper[0]^.val);
asml.remove(p);
p.Free;
p := hp1;
p:=hp1;
end;
(* else
{change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
@ -108,8 +111,11 @@ begin
A_MOV:
{ removes superfluous And's after mov's }
begin
if not(cs_opt_level3 in current_settings.optimizerswitches) then
exit;
GetNextIntruction_p:=GetNextInstruction(p, hp1);
if (taicpu(p).oper[1]^.typ = top_reg) and
GetNextInstruction(p, hp1) and
GetNextIntruction_p and
(tai(hp1).typ = ait_instruction) and
(taicpu(hp1).opcode = A_AND) and
(taicpu(hp1).oper[0]^.typ = top_const) and
@ -122,7 +128,65 @@ begin
asml.remove(hp1);
hp1.free;
end;
end;
end
else if (taicpu(p).oper[1]^.typ = top_reg) and
GetNextIntruction_p and
(hp1.typ = ait_instruction) and
GetNextInstruction(hp1, hp2) and
(hp2.typ = ait_instruction) and
(taicpu(hp2).opcode = A_MOV) and
(taicpu(hp2).oper[0]^.typ = top_reg) and
OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
(IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
) then
{ change movsX/movzX reg/ref, reg2 }
{ add/sub/or/... reg3/$const, reg2 }
{ mov reg2 reg/ref }
{ to add/sub/or/... reg3/$const, reg/ref }
begin
CopyUsedRegs(TmpUsedRegs);
UpdateUsedRegs(TmpUsedRegs, tai(p.next));
UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
begin
{ by example:
movswl %si,%eax movswl %si,%eax p
decl %eax addl %edx,%eax hp1
movw %ax,%si movw %ax,%si hp2
->
movswl %si,%eax movswl %si,%eax p
decw %eax addw %edx,%eax hp1
movw %ax,%si movw %ax,%si hp2
}
taicpu(hp1).changeopsize(taicpu(hp2).opsize);
{
->
movswl %si,%eax movswl %si,%eax p
decw %si addw %dx,%si hp1
movw %ax,%si movw %ax,%si hp2
}
case taicpu(hp1).ops of
1:
taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
2:
taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
else
internalerror(2008042701);
end;
{
->
decw %si addw %dx,%si p
}
asml.remove(p);
asml.remove(hp2);
p.Free;
hp2.Free;
p := hp1;
end;
ReleaseUsedRegs(TmpUsedRegs);
end
end;
A_MOVSX,
A_MOVZX:
@ -190,26 +254,28 @@ begin
(taicpu(hp1).oper[0]^.typ = top_const) and
(taicpu(hp1).oper[1]^.typ = top_reg) and
(taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
case taicpu(p).opsize of
S_BL, S_BW, S_BQ:
if (taicpu(hp1).oper[0]^.val = $ff) then
begin
asml.remove(hp1);
hp1.Free;
begin
case taicpu(p).opsize of
S_BL, S_BW, S_BQ:
if (taicpu(hp1).oper[0]^.val = $ff) then
begin
asml.remove(hp1);
hp1.Free;
end;
S_WL, S_WQ:
if (taicpu(hp1).oper[0]^.val = $ffff) then
begin
asml.remove(hp1);
hp1.Free;
end;
S_LQ:
if (taicpu(hp1).oper[0]^.val = $ffffffff) then
begin
asml.remove(hp1);
hp1.Free;
end;
end;
S_WL, S_WQ:
if (taicpu(hp1).oper[0]^.val = $ffff) then
begin
asml.remove(hp1);
hp1.Free;
end;
S_LQ:
if (taicpu(hp1).oper[0]^.val = $ffffffff) then
begin
asml.remove(hp1);
hp1.Free;
end;
end;
end;
{ changes some movzx constructs to faster synonims (all examples
are given with eax/ax, but are also valid for other registers)}
if (taicpu(p).oper[1]^.typ = top_reg) then