* manually merged merge request 69 by J. Gareth "Kit" Moreton:

x86: CMP/MOV refactoring and expansion
      This merge request refactors the SwapMovCmp routine, and calls to it, to be more self-contained,
      having the preliminary checks built-in to ensure that moving the MOV instruction is
      actually a sound idea, while also making it more general-purpose so it can handle instructions
      that are not MOV operations. This feature is primarily for future expansion,
      but also cleans up the code for the x86 peephole optimizer.
This commit is contained in:
florian 2021-10-17 10:22:30 +02:00
parent 4012c3dbd4
commit ea6529ff63

View File

@ -193,7 +193,7 @@ unit aoptx86;
procedure ConvertJumpToRET(const p: tai; const ret_p: tai);
function CheckJumpMovTransferOpt(var p: tai; hp1: tai; LoopCount: Integer; out Count: Integer): Boolean;
procedure SwapMovCmp(var p, hp1: tai);
function TrySwapMovCmp(var p, hp1: tai): Boolean;
{ Processor-dependent reference optimisation }
class procedure OptimizeRefs(var p: taicpu); static;
@ -772,6 +772,16 @@ unit aoptx86;
Result:=([Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*insprop[taicpu(p1).opcode].Ch)<>[];
R_SUBFLAGDIRECTION:
Result:=([Ch_RDirFlag,Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*insprop[taicpu(p1).opcode].Ch)<>[];
R_SUBW,R_SUBD,R_SUBQ:
{ Everything except the direction bits }
Result:=
([Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
Ch_WCarryFlag,Ch_WParityFlag,Ch_WAuxiliaryFlag,Ch_WZeroFlag,Ch_WSignFlag,Ch_WOverflowFlag,
Ch_W0CarryFlag,Ch_W0ParityFlag,Ch_W0AuxiliaryFlag,Ch_W0ZeroFlag,Ch_W0SignFlag,Ch_W0OverflowFlag,
Ch_W1CarryFlag,Ch_W1ParityFlag,Ch_W1AuxiliaryFlag,Ch_W1ZeroFlag,Ch_W1SignFlag,Ch_W1OverflowFlag,
Ch_WUCarryFlag,Ch_WUParityFlag,Ch_WUAuxiliaryFlag,Ch_WUZeroFlag,Ch_WUSignFlag,Ch_WUOverflowFlag,
Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag
]*insprop[taicpu(p1).opcode].Ch)<>[];
else
;
end;
@ -4171,33 +4181,8 @@ unit aoptx86;
Result := False;
if GetNextInstruction(p, hp1) and
MatchInstruction(hp1,A_MOV,[]) and
(
(taicpu(p).oper[0]^.typ <> top_reg) or
not RegInInstruction(taicpu(p).oper[0]^.reg, hp1)
) and
(
(taicpu(p).oper[1]^.typ <> top_reg) or
not RegInInstruction(taicpu(p).oper[1]^.reg, hp1)
) and
(
{ Make sure the register written to doesn't appear in the
test instruction (in a reference, say) }
(taicpu(hp1).oper[1]^.typ <> top_reg) or
not RegInInstruction(taicpu(hp1).oper[1]^.reg, p)
) then
TrySwapMovCmp(p, hp1) then
begin
{ If we have something like:
test %reg1,%reg1
mov 0,%reg2
And no registers are shared (the two %reg1's can be different, as
long as neither of them are also %reg2), move the MOV command to
before the comparison as this means it can be optimised without
worrying about the FLAGS register. (This combination is generated
by "J(c)Mov1JmpMov0 -> Set(~c)", among other things).
}
SwapMovCmp(p, hp1);
Result := True;
Exit;
end;
@ -5730,32 +5715,8 @@ unit aoptx86;
end;
end;
if MatchInstruction(hp1,A_MOV,[]) and
(
(taicpu(p).oper[0]^.typ <> top_reg) or
not RegInInstruction(taicpu(p).oper[0]^.reg, hp1)
) and
(
(taicpu(p).oper[1]^.typ <> top_reg) or
not RegInInstruction(taicpu(p).oper[1]^.reg, hp1)
) and
(
{ Make sure the register written to doesn't appear in the
cmp instruction (in a reference, say) }
(taicpu(hp1).oper[1]^.typ <> top_reg) or
not RegInInstruction(taicpu(hp1).oper[1]^.reg, p)
) then
if TrySwapMovCmp(p, hp1) then
begin
{ If we have something like:
cmp ###,%reg1
mov 0,%reg2
And no registers are shared, move the MOV command to before the
comparison as this means it can be optimised without worrying
about the FLAGS register. (This combination is generated by
"J(c)Mov1JmpMov0 -> Set(~c)", among other things).
}
SwapMovCmp(p, hp1);
Result := True;
Exit;
end;
@ -6524,11 +6485,86 @@ unit aoptx86;
end;
procedure TX86AsmOptimizer.SwapMovCmp(var p, hp1: tai);
function TX86AsmOptimizer.TrySwapMovCmp(var p, hp1: tai): Boolean;
var
hp2: tai;
X: Integer;
const
WriteOp: array[0..3] of set of TInsChange = (
[Ch_Wop1, Ch_RWop1, Ch_Mop1],
[Ch_Wop2, Ch_RWop2, Ch_Mop2],
[Ch_Wop3, Ch_RWop3, Ch_Mop3],
[Ch_Wop4, Ch_RWop4, Ch_Mop4]);
RegWriteFlags: array[0..7] of set of TInsChange = (
{ The order is important: EAX, ECX, EDX, EBX, ESI, EDI, EBP, ESP }
[Ch_WEAX, Ch_RWEAX, Ch_MEAX{$ifdef x86_64}, Ch_WRAX, Ch_RWRAX, Ch_MRAX{$endif x86_64}],
[Ch_WECX, Ch_RWECX, Ch_MECX{$ifdef x86_64}, Ch_WRCX, Ch_RWRCX, Ch_MRCX{$endif x86_64}],
[Ch_WEDX, Ch_RWEDX, Ch_MEDX{$ifdef x86_64}, Ch_WRDX, Ch_RWRDX, Ch_MRDX{$endif x86_64}],
[Ch_WEBX, Ch_RWEBX, Ch_MEBX{$ifdef x86_64}, Ch_WRBX, Ch_RWRBX, Ch_MRBX{$endif x86_64}],
[Ch_WESI, Ch_RWESI, Ch_MESI{$ifdef x86_64}, Ch_WRSI, Ch_RWRSI, Ch_MRSI{$endif x86_64}],
[Ch_WEDI, Ch_RWEDI, Ch_MEDI{$ifdef x86_64}, Ch_WRDI, Ch_RWRDI, Ch_MRDI{$endif x86_64}],
[Ch_WEBP, Ch_RWEBP, Ch_MEBP{$ifdef x86_64}, Ch_WRBP, Ch_RWRBP, Ch_MRBP{$endif x86_64}],
[Ch_WESP, Ch_RWESP, Ch_MESP{$ifdef x86_64}, Ch_WRSP, Ch_RWRSP, Ch_MRSP{$endif x86_64}]);
begin
{ If we have something like:
cmp ###,%reg1
mov 0,%reg2
And no modified registers are shared, move the instruction to before
the comparison as this means it can be optimised without worrying
about the FLAGS register. (CMP/MOV is generated by
"J(c)Mov1JmpMov0 -> Set(~c)", among other things).
As long as the second instruction doesn't use the flags or one of the
registers used by CMP or TEST (also check any references that use the
registers), then it can be moved prior to the comparison.
}
Result := False;
if (hp1.typ <> ait_instruction) or
taicpu(hp1).is_jmp or
RegInInstruction(NR_DEFAULTFLAGS, hp1) then
Exit;
{ NOP is a pipeline fence, likely marking the beginning of the function
epilogue, so drop out. Similarly, drop out if POP or RET are
encountered }
if MatchInstruction(hp1, A_NOP, A_POP, []) then
Exit;
if (taicpu(hp1).opcode = A_MOVSS) and
(taicpu(hp1).ops = 0) then
{ Wrong MOVSS }
Exit;
{ Check for writes to specific registers first }
{ EAX, ECX, EDX, EBX, ESI, EDI, EBP, ESP in that order }
for X := 0 to 7 do
if (RegWriteFlags[X] * InsProp[taicpu(hp1).opcode].Ch <> [])
and RegInInstruction(newreg(R_INTREGISTER, TSuperRegister(X), R_SUBWHOLE), p) then
Exit;
for X := 0 to taicpu(hp1).ops - 1 do
begin
{ Check to see if this operand writes to something }
if ((WriteOp[X] * InsProp[taicpu(hp1).opcode].Ch) <> []) and
{ And matches something in the CMP/TEST instruction }
(
MatchOperand(taicpu(hp1).oper[X]^, taicpu(p).oper[0]^) or
MatchOperand(taicpu(hp1).oper[X]^, taicpu(p).oper[1]^) or
(
{ If it's a register, make sure the register written to doesn't
appear in the cmp instruction as part of a reference }
(taicpu(hp1).oper[X]^.typ = top_reg) and
RegInInstruction(taicpu(hp1).oper[X]^.reg, p)
)
) then
Exit;
end;
{ The instruction can be safely moved }
asml.Remove(hp1);
{ Try to insert after the last instructions where the FLAGS register is not yet in use }
@ -6537,9 +6573,9 @@ unit aoptx86;
else
asml.InsertAfter(hp1, hp2);
DebugMsg(SPeepholeOptimization + 'Swapped ' + debug_op2str(taicpu(p).opcode) + ' and mov instructions to improve optimisation potential', hp1);
DebugMsg(SPeepholeOptimization + 'Swapped ' + debug_op2str(taicpu(p).opcode) + ' and ' + debug_op2str(taicpu(hp1).opcode) + ' instructions to improve optimisation potential', hp1);
for X := 0 to 1 do
for X := 0 to taicpu(hp1).ops - 1 do
case taicpu(hp1).oper[X]^.typ of
top_reg:
AllocRegBetween(taicpu(hp1).oper[X]^.reg, hp1, p, UsedRegs);
@ -6553,6 +6589,12 @@ unit aoptx86;
else
;
end;
if taicpu(hp1).opcode = A_LEA then
{ The flags will be overwritten by the CMP/TEST instruction }
ConvertLEA(taicpu(hp1));
Result := True;
end;