mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-17 10:39:33 +02:00
* manually merged merge request 69 by J. Gareth "Kit" Moreton:
x86: CMP/MOV refactoring and expansion This merge request refactors the SwapMovCmp routine, and calls to it, to be more self-contained, having the preliminary checks built-in to ensure that moving the MOV instruction is actually a sound idea, while also making it more general-purpose so it can handle instructions that are not MOV operations. This feature is primarily for future expansion, but also cleans up the code for the x86 peephole optimizer.
This commit is contained in:
parent
4012c3dbd4
commit
ea6529ff63
@ -193,7 +193,7 @@ unit aoptx86;
|
||||
procedure ConvertJumpToRET(const p: tai; const ret_p: tai);
|
||||
|
||||
function CheckJumpMovTransferOpt(var p: tai; hp1: tai; LoopCount: Integer; out Count: Integer): Boolean;
|
||||
procedure SwapMovCmp(var p, hp1: tai);
|
||||
function TrySwapMovCmp(var p, hp1: tai): Boolean;
|
||||
|
||||
{ Processor-dependent reference optimisation }
|
||||
class procedure OptimizeRefs(var p: taicpu); static;
|
||||
@ -772,6 +772,16 @@ unit aoptx86;
|
||||
Result:=([Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*insprop[taicpu(p1).opcode].Ch)<>[];
|
||||
R_SUBFLAGDIRECTION:
|
||||
Result:=([Ch_RDirFlag,Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*insprop[taicpu(p1).opcode].Ch)<>[];
|
||||
R_SUBW,R_SUBD,R_SUBQ:
|
||||
{ Everything except the direction bits }
|
||||
Result:=
|
||||
([Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
|
||||
Ch_WCarryFlag,Ch_WParityFlag,Ch_WAuxiliaryFlag,Ch_WZeroFlag,Ch_WSignFlag,Ch_WOverflowFlag,
|
||||
Ch_W0CarryFlag,Ch_W0ParityFlag,Ch_W0AuxiliaryFlag,Ch_W0ZeroFlag,Ch_W0SignFlag,Ch_W0OverflowFlag,
|
||||
Ch_W1CarryFlag,Ch_W1ParityFlag,Ch_W1AuxiliaryFlag,Ch_W1ZeroFlag,Ch_W1SignFlag,Ch_W1OverflowFlag,
|
||||
Ch_WUCarryFlag,Ch_WUParityFlag,Ch_WUAuxiliaryFlag,Ch_WUZeroFlag,Ch_WUSignFlag,Ch_WUOverflowFlag,
|
||||
Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag
|
||||
]*insprop[taicpu(p1).opcode].Ch)<>[];
|
||||
else
|
||||
;
|
||||
end;
|
||||
@ -4171,33 +4181,8 @@ unit aoptx86;
|
||||
Result := False;
|
||||
|
||||
if GetNextInstruction(p, hp1) and
|
||||
MatchInstruction(hp1,A_MOV,[]) and
|
||||
(
|
||||
(taicpu(p).oper[0]^.typ <> top_reg) or
|
||||
not RegInInstruction(taicpu(p).oper[0]^.reg, hp1)
|
||||
) and
|
||||
(
|
||||
(taicpu(p).oper[1]^.typ <> top_reg) or
|
||||
not RegInInstruction(taicpu(p).oper[1]^.reg, hp1)
|
||||
) and
|
||||
(
|
||||
{ Make sure the register written to doesn't appear in the
|
||||
test instruction (in a reference, say) }
|
||||
(taicpu(hp1).oper[1]^.typ <> top_reg) or
|
||||
not RegInInstruction(taicpu(hp1).oper[1]^.reg, p)
|
||||
) then
|
||||
TrySwapMovCmp(p, hp1) then
|
||||
begin
|
||||
{ If we have something like:
|
||||
test %reg1,%reg1
|
||||
mov 0,%reg2
|
||||
|
||||
And no registers are shared (the two %reg1's can be different, as
|
||||
long as neither of them are also %reg2), move the MOV command to
|
||||
before the comparison as this means it can be optimised without
|
||||
worrying about the FLAGS register. (This combination is generated
|
||||
by "J(c)Mov1JmpMov0 -> Set(~c)", among other things).
|
||||
}
|
||||
SwapMovCmp(p, hp1);
|
||||
Result := True;
|
||||
Exit;
|
||||
end;
|
||||
@ -5730,32 +5715,8 @@ unit aoptx86;
|
||||
end;
|
||||
end;
|
||||
|
||||
if MatchInstruction(hp1,A_MOV,[]) and
|
||||
(
|
||||
(taicpu(p).oper[0]^.typ <> top_reg) or
|
||||
not RegInInstruction(taicpu(p).oper[0]^.reg, hp1)
|
||||
) and
|
||||
(
|
||||
(taicpu(p).oper[1]^.typ <> top_reg) or
|
||||
not RegInInstruction(taicpu(p).oper[1]^.reg, hp1)
|
||||
) and
|
||||
(
|
||||
{ Make sure the register written to doesn't appear in the
|
||||
cmp instruction (in a reference, say) }
|
||||
(taicpu(hp1).oper[1]^.typ <> top_reg) or
|
||||
not RegInInstruction(taicpu(hp1).oper[1]^.reg, p)
|
||||
) then
|
||||
if TrySwapMovCmp(p, hp1) then
|
||||
begin
|
||||
{ If we have something like:
|
||||
cmp ###,%reg1
|
||||
mov 0,%reg2
|
||||
|
||||
And no registers are shared, move the MOV command to before the
|
||||
comparison as this means it can be optimised without worrying
|
||||
about the FLAGS register. (This combination is generated by
|
||||
"J(c)Mov1JmpMov0 -> Set(~c)", among other things).
|
||||
}
|
||||
SwapMovCmp(p, hp1);
|
||||
Result := True;
|
||||
Exit;
|
||||
end;
|
||||
@ -6524,11 +6485,86 @@ unit aoptx86;
|
||||
end;
|
||||
|
||||
|
||||
procedure TX86AsmOptimizer.SwapMovCmp(var p, hp1: tai);
|
||||
function TX86AsmOptimizer.TrySwapMovCmp(var p, hp1: tai): Boolean;
|
||||
var
|
||||
hp2: tai;
|
||||
X: Integer;
|
||||
const
|
||||
WriteOp: array[0..3] of set of TInsChange = (
|
||||
[Ch_Wop1, Ch_RWop1, Ch_Mop1],
|
||||
[Ch_Wop2, Ch_RWop2, Ch_Mop2],
|
||||
[Ch_Wop3, Ch_RWop3, Ch_Mop3],
|
||||
[Ch_Wop4, Ch_RWop4, Ch_Mop4]);
|
||||
|
||||
RegWriteFlags: array[0..7] of set of TInsChange = (
|
||||
{ The order is important: EAX, ECX, EDX, EBX, ESI, EDI, EBP, ESP }
|
||||
[Ch_WEAX, Ch_RWEAX, Ch_MEAX{$ifdef x86_64}, Ch_WRAX, Ch_RWRAX, Ch_MRAX{$endif x86_64}],
|
||||
[Ch_WECX, Ch_RWECX, Ch_MECX{$ifdef x86_64}, Ch_WRCX, Ch_RWRCX, Ch_MRCX{$endif x86_64}],
|
||||
[Ch_WEDX, Ch_RWEDX, Ch_MEDX{$ifdef x86_64}, Ch_WRDX, Ch_RWRDX, Ch_MRDX{$endif x86_64}],
|
||||
[Ch_WEBX, Ch_RWEBX, Ch_MEBX{$ifdef x86_64}, Ch_WRBX, Ch_RWRBX, Ch_MRBX{$endif x86_64}],
|
||||
[Ch_WESI, Ch_RWESI, Ch_MESI{$ifdef x86_64}, Ch_WRSI, Ch_RWRSI, Ch_MRSI{$endif x86_64}],
|
||||
[Ch_WEDI, Ch_RWEDI, Ch_MEDI{$ifdef x86_64}, Ch_WRDI, Ch_RWRDI, Ch_MRDI{$endif x86_64}],
|
||||
[Ch_WEBP, Ch_RWEBP, Ch_MEBP{$ifdef x86_64}, Ch_WRBP, Ch_RWRBP, Ch_MRBP{$endif x86_64}],
|
||||
[Ch_WESP, Ch_RWESP, Ch_MESP{$ifdef x86_64}, Ch_WRSP, Ch_RWRSP, Ch_MRSP{$endif x86_64}]);
|
||||
|
||||
begin
|
||||
{ If we have something like:
|
||||
cmp ###,%reg1
|
||||
mov 0,%reg2
|
||||
|
||||
And no modified registers are shared, move the instruction to before
|
||||
the comparison as this means it can be optimised without worrying
|
||||
about the FLAGS register. (CMP/MOV is generated by
|
||||
"J(c)Mov1JmpMov0 -> Set(~c)", among other things).
|
||||
|
||||
As long as the second instruction doesn't use the flags or one of the
|
||||
registers used by CMP or TEST (also check any references that use the
|
||||
registers), then it can be moved prior to the comparison.
|
||||
}
|
||||
|
||||
Result := False;
|
||||
if (hp1.typ <> ait_instruction) or
|
||||
taicpu(hp1).is_jmp or
|
||||
RegInInstruction(NR_DEFAULTFLAGS, hp1) then
|
||||
Exit;
|
||||
|
||||
{ NOP is a pipeline fence, likely marking the beginning of the function
|
||||
epilogue, so drop out. Similarly, drop out if POP or RET are
|
||||
encountered }
|
||||
if MatchInstruction(hp1, A_NOP, A_POP, []) then
|
||||
Exit;
|
||||
|
||||
if (taicpu(hp1).opcode = A_MOVSS) and
|
||||
(taicpu(hp1).ops = 0) then
|
||||
{ Wrong MOVSS }
|
||||
Exit;
|
||||
|
||||
{ Check for writes to specific registers first }
|
||||
{ EAX, ECX, EDX, EBX, ESI, EDI, EBP, ESP in that order }
|
||||
for X := 0 to 7 do
|
||||
if (RegWriteFlags[X] * InsProp[taicpu(hp1).opcode].Ch <> [])
|
||||
and RegInInstruction(newreg(R_INTREGISTER, TSuperRegister(X), R_SUBWHOLE), p) then
|
||||
Exit;
|
||||
|
||||
for X := 0 to taicpu(hp1).ops - 1 do
|
||||
begin
|
||||
{ Check to see if this operand writes to something }
|
||||
if ((WriteOp[X] * InsProp[taicpu(hp1).opcode].Ch) <> []) and
|
||||
{ And matches something in the CMP/TEST instruction }
|
||||
(
|
||||
MatchOperand(taicpu(hp1).oper[X]^, taicpu(p).oper[0]^) or
|
||||
MatchOperand(taicpu(hp1).oper[X]^, taicpu(p).oper[1]^) or
|
||||
(
|
||||
{ If it's a register, make sure the register written to doesn't
|
||||
appear in the cmp instruction as part of a reference }
|
||||
(taicpu(hp1).oper[X]^.typ = top_reg) and
|
||||
RegInInstruction(taicpu(hp1).oper[X]^.reg, p)
|
||||
)
|
||||
) then
|
||||
Exit;
|
||||
end;
|
||||
|
||||
{ The instruction can be safely moved }
|
||||
asml.Remove(hp1);
|
||||
|
||||
{ Try to insert after the last instructions where the FLAGS register is not yet in use }
|
||||
@ -6537,9 +6573,9 @@ unit aoptx86;
|
||||
else
|
||||
asml.InsertAfter(hp1, hp2);
|
||||
|
||||
DebugMsg(SPeepholeOptimization + 'Swapped ' + debug_op2str(taicpu(p).opcode) + ' and mov instructions to improve optimisation potential', hp1);
|
||||
DebugMsg(SPeepholeOptimization + 'Swapped ' + debug_op2str(taicpu(p).opcode) + ' and ' + debug_op2str(taicpu(hp1).opcode) + ' instructions to improve optimisation potential', hp1);
|
||||
|
||||
for X := 0 to 1 do
|
||||
for X := 0 to taicpu(hp1).ops - 1 do
|
||||
case taicpu(hp1).oper[X]^.typ of
|
||||
top_reg:
|
||||
AllocRegBetween(taicpu(hp1).oper[X]^.reg, hp1, p, UsedRegs);
|
||||
@ -6553,6 +6589,12 @@ unit aoptx86;
|
||||
else
|
||||
;
|
||||
end;
|
||||
|
||||
if taicpu(hp1).opcode = A_LEA then
|
||||
{ The flags will be overwritten by the CMP/TEST instruction }
|
||||
ConvertLEA(taicpu(hp1));
|
||||
|
||||
Result := True;
|
||||
end;
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user