* i386: For integer comparisons with zero, emit "test $-1,%reg" instead of "test %reg,%reg". It is more spilling-friendly, because it transforms into "test $-1,spilltemp" and does not require a register.

* Improved peephole optimizer to remove these instructions if preceded by flag-setting instruction that operates on same location and followed by conditional jump, and change them back into "test %reg,%reg" otherwise.

git-svn-id: trunk@27617 -
This commit is contained in:
sergei 2014-04-20 19:16:58 +00:00
parent 1fa0e64480
commit 2ee0c8de45
2 changed files with 30 additions and 14 deletions

View File

@ -2292,6 +2292,7 @@ end;
procedure PostPeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
var
p,hp1,hp2: tai;
IsTestConstX: boolean;
begin
p := BlockStart;
while (p <> BlockEnd) Do
@ -2397,22 +2398,22 @@ See test/tgadint64 in the test suite.
A_TEST, A_OR:
{removes the line marked with (x) from the sequence
and/or/xor/add/sub/... $x, %y
test/or %y, %y (x)
test/or %y, %y | test $-1, %y (x)
j(n)z _Label
as the first instruction already adjusts the ZF}
as the first instruction already adjusts the ZF
%y operand may also be a reference }
begin
if OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
if GetLastInstruction(p, hp1) and
IsTestConstX:=(taicpu(p).opcode=A_TEST) and
MatchOperand(taicpu(p).oper[0]^,-1);
if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
GetLastInstruction(p, hp1) and
(tai(hp1).typ = ait_instruction) and
GetNextInstruction(p,hp2) and
(hp2.typ = ait_instruction) and
((taicpu(hp2).opcode = A_SETcc) or
(taicpu(hp2).opcode = A_Jcc) or
(taicpu(hp2).opcode = A_CMOVcc)) then
MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
case taicpu(hp1).opcode Of
A_ADD, A_SUB, A_OR, A_XOR, A_AND:
begin
if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
{ does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
{ and in case of carry for A(E)/B(E)/C/NC }
((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
@ -2428,7 +2429,7 @@ See test/tgadint64 in the test suite.
end;
A_SHL, A_SAL, A_SHR, A_SAR:
begin
if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
{ SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
{ therefore, it's only safe to do this optimization for }
{ shifts by a (nonzero) constant }
@ -2447,7 +2448,7 @@ See test/tgadint64 in the test suite.
end;
A_DEC, A_INC, A_NEG:
begin
if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
{ does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
{ and in case of carry for A(E)/B(E)/C/NC }
(taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
@ -2472,7 +2473,15 @@ See test/tgadint64 in the test suite.
continue
end;
end
end
else
{ change "test $-1,%reg" into "test %reg,%reg" }
if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
taicpu(p).loadoper(0,taicpu(p).oper[1]^);
end { case }
else
{ change "test $-1,%reg" into "test %reg,%reg" }
if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
taicpu(p).loadoper(0,taicpu(p).oper[1]^);
end;
end;
end;

View File

@ -66,7 +66,7 @@ unit nx86add;
implementation
uses
globtype,globals,
globtype,globals,systems,
verbose,cutils,
cpuinfo,
aasmbase,aasmtai,aasmdata,aasmcpu,
@ -131,7 +131,14 @@ unit nx86add;
(right.location.loc=LOC_CONSTANT) and
(right.location.value=0) then
begin
emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
{ 'test $-1,%reg' is transformable into 'test $-1,spilltemp' if %reg needs
spilling, while 'test %reg,%reg' still requires loading into register.
If spilling is not necessary, it is changed back into 'test %reg,%reg' by
peephole optimizer (this optimization is currently available only for i386). }
if (target_info.cpu=cpu_i386) then
emit_const_reg(A_TEST,TCGSize2Opsize[opsize],aint(-1),left.location.register)
else
emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
end
else
if (op=A_ADD) and