mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-08-11 21:29:42 +02:00
* x86: Introduced TrySwapMovOp method, and redesigned TrySwapMovCmp
to use it while also trying to move one more instruction back
This commit is contained in:
parent
6af886c2b9
commit
5f3749dc49
@ -211,6 +211,7 @@ unit aoptx86;
|
|||||||
procedure ConvertJumpToRET(const p: tai; const ret_p: tai);
|
procedure ConvertJumpToRET(const p: tai; const ret_p: tai);
|
||||||
|
|
||||||
function CheckJumpMovTransferOpt(var p: tai; hp1: tai; LoopCount: Integer; out Count: Integer): Boolean;
|
function CheckJumpMovTransferOpt(var p: tai; hp1: tai; LoopCount: Integer; out Count: Integer): Boolean;
|
||||||
|
function TrySwapMovOp(var p, hp1: tai): Boolean;
|
||||||
function TrySwapMovCmp(var p, hp1: tai): Boolean;
|
function TrySwapMovCmp(var p, hp1: tai): Boolean;
|
||||||
|
|
||||||
{ Processor-dependent reference optimisation }
|
{ Processor-dependent reference optimisation }
|
||||||
@ -8453,10 +8454,10 @@ unit aoptx86;
|
|||||||
Break;
|
Break;
|
||||||
|
|
||||||
case taicpu(hp2).opcode of
|
case taicpu(hp2).opcode of
|
||||||
A_MOVSS:
|
A_MOVSD:
|
||||||
begin
|
begin
|
||||||
if taicpu(hp2).ops = 0 then
|
if taicpu(hp2).ops = 0 then
|
||||||
{ Wrong MOVSS }
|
{ Wrong MOVSD }
|
||||||
Break;
|
Break;
|
||||||
Inc(Count);
|
Inc(Count);
|
||||||
if Count >= 5 then
|
if Count >= 5 then
|
||||||
@ -8475,7 +8476,7 @@ unit aoptx86;
|
|||||||
A_MOVZX,
|
A_MOVZX,
|
||||||
A_MOVAPS,
|
A_MOVAPS,
|
||||||
A_MOVUPS,
|
A_MOVUPS,
|
||||||
A_MOVSD,
|
A_MOVSS,
|
||||||
A_MOVAPD,
|
A_MOVAPD,
|
||||||
A_MOVUPD,
|
A_MOVUPD,
|
||||||
A_MOVDQA,
|
A_MOVDQA,
|
||||||
@ -8626,41 +8627,38 @@ unit aoptx86;
|
|||||||
end;
|
end;
|
||||||
|
|
||||||
|
|
||||||
function TX86AsmOptimizer.TrySwapMovCmp(var p, hp1: tai): Boolean;
|
const
|
||||||
|
WriteOp: array[0..3] of set of TInsChange = (
|
||||||
|
[Ch_Wop1, Ch_RWop1, Ch_Mop1],
|
||||||
|
[Ch_Wop2, Ch_RWop2, Ch_Mop2],
|
||||||
|
[Ch_Wop3, Ch_RWop3, Ch_Mop3],
|
||||||
|
[Ch_Wop4, Ch_RWop4, Ch_Mop4]);
|
||||||
|
|
||||||
|
RegWriteFlags: array[0..7] of set of TInsChange = (
|
||||||
|
{ The order is important: EAX, ECX, EDX, EBX, ESI, EDI, EBP, ESP }
|
||||||
|
[Ch_WEAX, Ch_RWEAX, Ch_MEAX{$ifdef x86_64}, Ch_WRAX, Ch_RWRAX, Ch_MRAX{$endif x86_64}],
|
||||||
|
[Ch_WECX, Ch_RWECX, Ch_MECX{$ifdef x86_64}, Ch_WRCX, Ch_RWRCX, Ch_MRCX{$endif x86_64}],
|
||||||
|
[Ch_WEDX, Ch_RWEDX, Ch_MEDX{$ifdef x86_64}, Ch_WRDX, Ch_RWRDX, Ch_MRDX{$endif x86_64}],
|
||||||
|
[Ch_WEBX, Ch_RWEBX, Ch_MEBX{$ifdef x86_64}, Ch_WRBX, Ch_RWRBX, Ch_MRBX{$endif x86_64}],
|
||||||
|
[Ch_WESI, Ch_RWESI, Ch_MESI{$ifdef x86_64}, Ch_WRSI, Ch_RWRSI, Ch_MRSI{$endif x86_64}],
|
||||||
|
[Ch_WEDI, Ch_RWEDI, Ch_MEDI{$ifdef x86_64}, Ch_WRDI, Ch_RWRDI, Ch_MRDI{$endif x86_64}],
|
||||||
|
[Ch_WEBP, Ch_RWEBP, Ch_MEBP{$ifdef x86_64}, Ch_WRBP, Ch_RWRBP, Ch_MRBP{$endif x86_64}],
|
||||||
|
[Ch_WESP, Ch_RWESP, Ch_MESP{$ifdef x86_64}, Ch_WRSP, Ch_RWRSP, Ch_MRSP{$endif x86_64}]);
|
||||||
|
|
||||||
|
|
||||||
|
function TX86AsmOptimizer.TrySwapMovOp(var p, hp1: tai): Boolean;
|
||||||
var
|
var
|
||||||
hp2: tai;
|
hp2: tai;
|
||||||
X: Integer;
|
X: Integer;
|
||||||
const
|
|
||||||
WriteOp: array[0..3] of set of TInsChange = (
|
|
||||||
[Ch_Wop1, Ch_RWop1, Ch_Mop1],
|
|
||||||
[Ch_Wop2, Ch_RWop2, Ch_Mop2],
|
|
||||||
[Ch_Wop3, Ch_RWop3, Ch_Mop3],
|
|
||||||
[Ch_Wop4, Ch_RWop4, Ch_Mop4]);
|
|
||||||
|
|
||||||
RegWriteFlags: array[0..7] of set of TInsChange = (
|
|
||||||
{ The order is important: EAX, ECX, EDX, EBX, ESI, EDI, EBP, ESP }
|
|
||||||
[Ch_WEAX, Ch_RWEAX, Ch_MEAX{$ifdef x86_64}, Ch_WRAX, Ch_RWRAX, Ch_MRAX{$endif x86_64}],
|
|
||||||
[Ch_WECX, Ch_RWECX, Ch_MECX{$ifdef x86_64}, Ch_WRCX, Ch_RWRCX, Ch_MRCX{$endif x86_64}],
|
|
||||||
[Ch_WEDX, Ch_RWEDX, Ch_MEDX{$ifdef x86_64}, Ch_WRDX, Ch_RWRDX, Ch_MRDX{$endif x86_64}],
|
|
||||||
[Ch_WEBX, Ch_RWEBX, Ch_MEBX{$ifdef x86_64}, Ch_WRBX, Ch_RWRBX, Ch_MRBX{$endif x86_64}],
|
|
||||||
[Ch_WESI, Ch_RWESI, Ch_MESI{$ifdef x86_64}, Ch_WRSI, Ch_RWRSI, Ch_MRSI{$endif x86_64}],
|
|
||||||
[Ch_WEDI, Ch_RWEDI, Ch_MEDI{$ifdef x86_64}, Ch_WRDI, Ch_RWRDI, Ch_MRDI{$endif x86_64}],
|
|
||||||
[Ch_WEBP, Ch_RWEBP, Ch_MEBP{$ifdef x86_64}, Ch_WRBP, Ch_RWRBP, Ch_MRBP{$endif x86_64}],
|
|
||||||
[Ch_WESP, Ch_RWESP, Ch_MESP{$ifdef x86_64}, Ch_WRSP, Ch_RWRSP, Ch_MRSP{$endif x86_64}]);
|
|
||||||
|
|
||||||
begin
|
begin
|
||||||
{ If we have something like:
|
{ If we have something like:
|
||||||
cmp ###,%reg1
|
op ###,###
|
||||||
mov 0,%reg2
|
mov ###,###
|
||||||
|
|
||||||
And no modified registers are shared, move the instruction to before
|
Try to move the MOV instruction to before OP as long as OP and MOV don't
|
||||||
the comparison as this means it can be optimised without worrying
|
interfere in regards to what they write to.
|
||||||
about the FLAGS register. (CMP/MOV is generated by
|
|
||||||
"J(c)Mov1JmpMov0 -> Set(~c)", among other things).
|
|
||||||
|
|
||||||
As long as the second instruction doesn't use the flags or one of the
|
NOTE: p must be a 2-operand instruction
|
||||||
registers used by CMP or TEST (also check any references that use the
|
|
||||||
registers), then it can be moved prior to the comparison.
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Result := False;
|
Result := False;
|
||||||
@ -8672,12 +8670,12 @@ unit aoptx86;
|
|||||||
{ NOP is a pipeline fence, likely marking the beginning of the function
|
{ NOP is a pipeline fence, likely marking the beginning of the function
|
||||||
epilogue, so drop out. Similarly, drop out if POP or RET are
|
epilogue, so drop out. Similarly, drop out if POP or RET are
|
||||||
encountered }
|
encountered }
|
||||||
if MatchInstruction(hp1, A_NOP, A_POP, []) then
|
if MatchInstruction(hp1, A_NOP, A_POP, A_RET, []) then
|
||||||
Exit;
|
Exit;
|
||||||
|
|
||||||
if (taicpu(hp1).opcode = A_MOVSS) and
|
if (taicpu(hp1).opcode = A_MOVSD) and
|
||||||
(taicpu(hp1).ops = 0) then
|
(taicpu(hp1).ops = 0) then
|
||||||
{ Wrong MOVSS }
|
{ Wrong MOVSD }
|
||||||
Exit;
|
Exit;
|
||||||
|
|
||||||
{ Check for writes to specific registers first }
|
{ Check for writes to specific registers first }
|
||||||
@ -8705,6 +8703,25 @@ unit aoptx86;
|
|||||||
Exit;
|
Exit;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
{ Check p to make sure it doesn't write to something that affects hp1 }
|
||||||
|
|
||||||
|
{ Check for writes to specific registers first }
|
||||||
|
{ EAX, ECX, EDX, EBX, ESI, EDI, EBP, ESP in that order }
|
||||||
|
for X := 0 to 7 do
|
||||||
|
if (RegWriteFlags[X] * InsProp[taicpu(p).opcode].Ch <> [])
|
||||||
|
and RegInInstruction(newreg(R_INTREGISTER, TSuperRegister(X), R_SUBWHOLE), hp1) then
|
||||||
|
Exit;
|
||||||
|
|
||||||
|
for X := 0 to taicpu(p).ops - 1 do
|
||||||
|
begin
|
||||||
|
{ Check to see if this operand writes to something }
|
||||||
|
if ((WriteOp[X] * InsProp[taicpu(p).opcode].Ch) <> []) and
|
||||||
|
{ And matches something in hp1 }
|
||||||
|
(taicpu(p).oper[X]^.typ = top_reg) and
|
||||||
|
RegInInstruction(taicpu(p).oper[X]^.reg, hp1) then
|
||||||
|
Exit;
|
||||||
|
end;
|
||||||
|
|
||||||
{ The instruction can be safely moved }
|
{ The instruction can be safely moved }
|
||||||
asml.Remove(hp1);
|
asml.Remove(hp1);
|
||||||
|
|
||||||
@ -8712,6 +8729,17 @@ unit aoptx86;
|
|||||||
can be optimised into "xor %reg,%reg" later }
|
can be optimised into "xor %reg,%reg" later }
|
||||||
if SetAndTest(FindRegAllocBackward(NR_DEFAULTFLAGS, tai(p.Previous)), hp2) then
|
if SetAndTest(FindRegAllocBackward(NR_DEFAULTFLAGS, tai(p.Previous)), hp2) then
|
||||||
asml.InsertBefore(hp1, hp2)
|
asml.InsertBefore(hp1, hp2)
|
||||||
|
|
||||||
|
{ Failing that, try to insert after the last instructions where the
|
||||||
|
FLAGS register is not yet in use }
|
||||||
|
else if GetLastInstruction(p, hp2) and
|
||||||
|
(
|
||||||
|
(hp2.typ <> ait_instruction) or
|
||||||
|
{ Don't insert after an instruction that uses the flags when p doesn't use them }
|
||||||
|
RegInInstruction(NR_DEFAULTFLAGS, p) or
|
||||||
|
not RegInInstruction(NR_DEFAULTFLAGS, hp2)
|
||||||
|
) then
|
||||||
|
asml.InsertAfter(hp1, hp2)
|
||||||
else
|
else
|
||||||
{ Note, if p.Previous is nil (even if it should logically never be the
|
{ Note, if p.Previous is nil (even if it should logically never be the
|
||||||
case), FindRegAllocBackward immediately exits with False and so we
|
case), FindRegAllocBackward immediately exits with False and so we
|
||||||
@ -8721,26 +8749,90 @@ unit aoptx86;
|
|||||||
|
|
||||||
DebugMsg(SPeepholeOptimization + 'Swapped ' + debug_op2str(taicpu(p).opcode) + ' and ' + debug_op2str(taicpu(hp1).opcode) + ' instructions to improve optimisation potential', hp1);
|
DebugMsg(SPeepholeOptimization + 'Swapped ' + debug_op2str(taicpu(p).opcode) + ' and ' + debug_op2str(taicpu(hp1).opcode) + ' instructions to improve optimisation potential', hp1);
|
||||||
|
|
||||||
|
{ We can't trust UsedRegs because we're looking backwards, although we
|
||||||
|
know the registers are allocated after p at the very least, so manually
|
||||||
|
create tai_regalloc objects if needed }
|
||||||
for X := 0 to taicpu(hp1).ops - 1 do
|
for X := 0 to taicpu(hp1).ops - 1 do
|
||||||
case taicpu(hp1).oper[X]^.typ of
|
case taicpu(hp1).oper[X]^.typ of
|
||||||
top_reg:
|
top_reg:
|
||||||
AllocRegBetween(taicpu(hp1).oper[X]^.reg, hp1, p, UsedRegs);
|
begin
|
||||||
|
asml.InsertBefore(tai_regalloc.alloc(taicpu(hp1).oper[X]^.reg, nil), hp1);
|
||||||
|
IncludeRegInUsedRegs(taicpu(hp1).oper[X]^.reg, UsedRegs);
|
||||||
|
AllocRegBetween(taicpu(hp1).oper[X]^.reg, hp1, p, UsedRegs);
|
||||||
|
end;
|
||||||
top_ref:
|
top_ref:
|
||||||
begin
|
begin
|
||||||
if taicpu(hp1).oper[X]^.ref^.base <> NR_NO then
|
if taicpu(hp1).oper[X]^.ref^.base <> NR_NO then
|
||||||
AllocRegBetween(taicpu(hp1).oper[X]^.ref^.base, hp1, p, UsedRegs);
|
begin
|
||||||
|
asml.InsertBefore(tai_regalloc.alloc(taicpu(hp1).oper[X]^.ref^.base, nil), hp1);
|
||||||
|
IncludeRegInUsedRegs(taicpu(hp1).oper[X]^.ref^.base, UsedRegs);
|
||||||
|
AllocRegBetween(taicpu(hp1).oper[X]^.ref^.base, hp1, p, UsedRegs);
|
||||||
|
end;
|
||||||
if taicpu(hp1).oper[X]^.ref^.index <> NR_NO then
|
if taicpu(hp1).oper[X]^.ref^.index <> NR_NO then
|
||||||
AllocRegBetween(taicpu(hp1).oper[X]^.ref^.index, hp1, p, UsedRegs);
|
begin
|
||||||
|
asml.InsertBefore(tai_regalloc.alloc(taicpu(hp1).oper[X]^.ref^.index, nil), hp1);
|
||||||
|
IncludeRegInUsedRegs(taicpu(hp1).oper[X]^.ref^.index, UsedRegs);
|
||||||
|
AllocRegBetween(taicpu(hp1).oper[X]^.ref^.index, hp1, p, UsedRegs);
|
||||||
|
end;
|
||||||
end;
|
end;
|
||||||
else
|
else
|
||||||
;
|
;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
Result := True;
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
function TX86AsmOptimizer.TrySwapMovCmp(var p, hp1: tai): Boolean;
|
||||||
|
var
|
||||||
|
hp2: tai;
|
||||||
|
X: Integer;
|
||||||
|
begin
|
||||||
|
{ If we have something like:
|
||||||
|
cmp ###,%reg1
|
||||||
|
mov 0,%reg2
|
||||||
|
|
||||||
|
And no modified registers are shared, move the instruction to before
|
||||||
|
the comparison as this means it can be optimised without worrying
|
||||||
|
about the FLAGS register. (CMP/MOV is generated by
|
||||||
|
"J(c)Mov1JmpMov0 -> Set(~c)", among other things).
|
||||||
|
|
||||||
|
As long as the second instruction doesn't use the flags or one of the
|
||||||
|
registers used by CMP or TEST (also check any references that use the
|
||||||
|
registers), then it can be moved prior to the comparison.
|
||||||
|
}
|
||||||
|
|
||||||
|
Result := False;
|
||||||
|
if not TrySwapMovOp(p, hp1) then
|
||||||
|
Exit;
|
||||||
|
|
||||||
if taicpu(hp1).opcode = A_LEA then
|
if taicpu(hp1).opcode = A_LEA then
|
||||||
{ The flags will be overwritten by the CMP/TEST instruction }
|
{ The flags will be overwritten by the CMP/TEST instruction }
|
||||||
ConvertLEA(taicpu(hp1));
|
ConvertLEA(taicpu(hp1));
|
||||||
|
|
||||||
Result := True;
|
Result := True;
|
||||||
|
|
||||||
|
{ Can we move it one further back? }
|
||||||
|
if GetLastInstruction(hp1, hp2) and (hp2.typ = ait_instruction) and
|
||||||
|
{ Check to see if CMP/TEST is a comparison against zero }
|
||||||
|
(
|
||||||
|
(
|
||||||
|
(taicpu(p).opcode = A_CMP) and
|
||||||
|
MatchOperand(taicpu(p).oper[0]^, 0)
|
||||||
|
) or
|
||||||
|
(
|
||||||
|
(taicpu(p).opcode = A_TEST) and
|
||||||
|
(
|
||||||
|
OpsEqual(taicpu(p).oper[0]^, taicpu(p).oper[1]^) or
|
||||||
|
MatchOperand(taicpu(p).oper[0]^, -1)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
) and
|
||||||
|
{ These instructions set the zero flag if the result is zero }
|
||||||
|
MatchInstruction(hp2, [A_ADD, A_SUB, A_OR, A_XOR, A_AND, A_POPCNT, A_LZCNT], []) and
|
||||||
|
OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) then
|
||||||
|
{ Looks like we can - if successful, this benefits PostPeepholeOptTestOr }
|
||||||
|
TrySwapMovOp(hp2, hp1);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user