mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-20 19:09:23 +02:00
* x86: Introduced TrySwapMovOp method, and redesigned TrySwapMovCmp
to use it while also trying to move one more instruction back
This commit is contained in:
parent
6af886c2b9
commit
5f3749dc49
@ -211,6 +211,7 @@ unit aoptx86;
|
||||
procedure ConvertJumpToRET(const p: tai; const ret_p: tai);
|
||||
|
||||
function CheckJumpMovTransferOpt(var p: tai; hp1: tai; LoopCount: Integer; out Count: Integer): Boolean;
|
||||
function TrySwapMovOp(var p, hp1: tai): Boolean;
|
||||
function TrySwapMovCmp(var p, hp1: tai): Boolean;
|
||||
|
||||
{ Processor-dependent reference optimisation }
|
||||
@ -8453,10 +8454,10 @@ unit aoptx86;
|
||||
Break;
|
||||
|
||||
case taicpu(hp2).opcode of
|
||||
A_MOVSS:
|
||||
A_MOVSD:
|
||||
begin
|
||||
if taicpu(hp2).ops = 0 then
|
||||
{ Wrong MOVSS }
|
||||
{ Wrong MOVSD }
|
||||
Break;
|
||||
Inc(Count);
|
||||
if Count >= 5 then
|
||||
@ -8475,7 +8476,7 @@ unit aoptx86;
|
||||
A_MOVZX,
|
||||
A_MOVAPS,
|
||||
A_MOVUPS,
|
||||
A_MOVSD,
|
||||
A_MOVSS,
|
||||
A_MOVAPD,
|
||||
A_MOVUPD,
|
||||
A_MOVDQA,
|
||||
@ -8626,41 +8627,38 @@ unit aoptx86;
|
||||
end;
|
||||
|
||||
|
||||
function TX86AsmOptimizer.TrySwapMovCmp(var p, hp1: tai): Boolean;
|
||||
const
|
||||
WriteOp: array[0..3] of set of TInsChange = (
|
||||
[Ch_Wop1, Ch_RWop1, Ch_Mop1],
|
||||
[Ch_Wop2, Ch_RWop2, Ch_Mop2],
|
||||
[Ch_Wop3, Ch_RWop3, Ch_Mop3],
|
||||
[Ch_Wop4, Ch_RWop4, Ch_Mop4]);
|
||||
|
||||
RegWriteFlags: array[0..7] of set of TInsChange = (
|
||||
{ The order is important: EAX, ECX, EDX, EBX, ESI, EDI, EBP, ESP }
|
||||
[Ch_WEAX, Ch_RWEAX, Ch_MEAX{$ifdef x86_64}, Ch_WRAX, Ch_RWRAX, Ch_MRAX{$endif x86_64}],
|
||||
[Ch_WECX, Ch_RWECX, Ch_MECX{$ifdef x86_64}, Ch_WRCX, Ch_RWRCX, Ch_MRCX{$endif x86_64}],
|
||||
[Ch_WEDX, Ch_RWEDX, Ch_MEDX{$ifdef x86_64}, Ch_WRDX, Ch_RWRDX, Ch_MRDX{$endif x86_64}],
|
||||
[Ch_WEBX, Ch_RWEBX, Ch_MEBX{$ifdef x86_64}, Ch_WRBX, Ch_RWRBX, Ch_MRBX{$endif x86_64}],
|
||||
[Ch_WESI, Ch_RWESI, Ch_MESI{$ifdef x86_64}, Ch_WRSI, Ch_RWRSI, Ch_MRSI{$endif x86_64}],
|
||||
[Ch_WEDI, Ch_RWEDI, Ch_MEDI{$ifdef x86_64}, Ch_WRDI, Ch_RWRDI, Ch_MRDI{$endif x86_64}],
|
||||
[Ch_WEBP, Ch_RWEBP, Ch_MEBP{$ifdef x86_64}, Ch_WRBP, Ch_RWRBP, Ch_MRBP{$endif x86_64}],
|
||||
[Ch_WESP, Ch_RWESP, Ch_MESP{$ifdef x86_64}, Ch_WRSP, Ch_RWRSP, Ch_MRSP{$endif x86_64}]);
|
||||
|
||||
|
||||
function TX86AsmOptimizer.TrySwapMovOp(var p, hp1: tai): Boolean;
|
||||
var
|
||||
hp2: tai;
|
||||
X: Integer;
|
||||
const
|
||||
WriteOp: array[0..3] of set of TInsChange = (
|
||||
[Ch_Wop1, Ch_RWop1, Ch_Mop1],
|
||||
[Ch_Wop2, Ch_RWop2, Ch_Mop2],
|
||||
[Ch_Wop3, Ch_RWop3, Ch_Mop3],
|
||||
[Ch_Wop4, Ch_RWop4, Ch_Mop4]);
|
||||
|
||||
RegWriteFlags: array[0..7] of set of TInsChange = (
|
||||
{ The order is important: EAX, ECX, EDX, EBX, ESI, EDI, EBP, ESP }
|
||||
[Ch_WEAX, Ch_RWEAX, Ch_MEAX{$ifdef x86_64}, Ch_WRAX, Ch_RWRAX, Ch_MRAX{$endif x86_64}],
|
||||
[Ch_WECX, Ch_RWECX, Ch_MECX{$ifdef x86_64}, Ch_WRCX, Ch_RWRCX, Ch_MRCX{$endif x86_64}],
|
||||
[Ch_WEDX, Ch_RWEDX, Ch_MEDX{$ifdef x86_64}, Ch_WRDX, Ch_RWRDX, Ch_MRDX{$endif x86_64}],
|
||||
[Ch_WEBX, Ch_RWEBX, Ch_MEBX{$ifdef x86_64}, Ch_WRBX, Ch_RWRBX, Ch_MRBX{$endif x86_64}],
|
||||
[Ch_WESI, Ch_RWESI, Ch_MESI{$ifdef x86_64}, Ch_WRSI, Ch_RWRSI, Ch_MRSI{$endif x86_64}],
|
||||
[Ch_WEDI, Ch_RWEDI, Ch_MEDI{$ifdef x86_64}, Ch_WRDI, Ch_RWRDI, Ch_MRDI{$endif x86_64}],
|
||||
[Ch_WEBP, Ch_RWEBP, Ch_MEBP{$ifdef x86_64}, Ch_WRBP, Ch_RWRBP, Ch_MRBP{$endif x86_64}],
|
||||
[Ch_WESP, Ch_RWESP, Ch_MESP{$ifdef x86_64}, Ch_WRSP, Ch_RWRSP, Ch_MRSP{$endif x86_64}]);
|
||||
|
||||
begin
|
||||
{ If we have something like:
|
||||
cmp ###,%reg1
|
||||
mov 0,%reg2
|
||||
op ###,###
|
||||
mov ###,###
|
||||
|
||||
And no modified registers are shared, move the instruction to before
|
||||
the comparison as this means it can be optimised without worrying
|
||||
about the FLAGS register. (CMP/MOV is generated by
|
||||
"J(c)Mov1JmpMov0 -> Set(~c)", among other things).
|
||||
Try to move the MOV instruction to before OP as long as OP and MOV don't
|
||||
interfere in regards to what they write to.
|
||||
|
||||
As long as the second instruction doesn't use the flags or one of the
|
||||
registers used by CMP or TEST (also check any references that use the
|
||||
registers), then it can be moved prior to the comparison.
|
||||
NOTE: p must be a 2-operand instruction
|
||||
}
|
||||
|
||||
Result := False;
|
||||
@ -8672,12 +8670,12 @@ unit aoptx86;
|
||||
{ NOP is a pipeline fence, likely marking the beginning of the function
|
||||
epilogue, so drop out. Similarly, drop out if POP or RET are
|
||||
encountered }
|
||||
if MatchInstruction(hp1, A_NOP, A_POP, []) then
|
||||
if MatchInstruction(hp1, A_NOP, A_POP, A_RET, []) then
|
||||
Exit;
|
||||
|
||||
if (taicpu(hp1).opcode = A_MOVSS) and
|
||||
if (taicpu(hp1).opcode = A_MOVSD) and
|
||||
(taicpu(hp1).ops = 0) then
|
||||
{ Wrong MOVSS }
|
||||
{ Wrong MOVSD }
|
||||
Exit;
|
||||
|
||||
{ Check for writes to specific registers first }
|
||||
@ -8705,6 +8703,25 @@ unit aoptx86;
|
||||
Exit;
|
||||
end;
|
||||
|
||||
{ Check p to make sure it doesn't write to something that affects hp1 }
|
||||
|
||||
{ Check for writes to specific registers first }
|
||||
{ EAX, ECX, EDX, EBX, ESI, EDI, EBP, ESP in that order }
|
||||
for X := 0 to 7 do
|
||||
if (RegWriteFlags[X] * InsProp[taicpu(p).opcode].Ch <> [])
|
||||
and RegInInstruction(newreg(R_INTREGISTER, TSuperRegister(X), R_SUBWHOLE), hp1) then
|
||||
Exit;
|
||||
|
||||
for X := 0 to taicpu(p).ops - 1 do
|
||||
begin
|
||||
{ Check to see if this operand writes to something }
|
||||
if ((WriteOp[X] * InsProp[taicpu(p).opcode].Ch) <> []) and
|
||||
{ And matches something in hp1 }
|
||||
(taicpu(p).oper[X]^.typ = top_reg) and
|
||||
RegInInstruction(taicpu(p).oper[X]^.reg, hp1) then
|
||||
Exit;
|
||||
end;
|
||||
|
||||
{ The instruction can be safely moved }
|
||||
asml.Remove(hp1);
|
||||
|
||||
@ -8712,6 +8729,17 @@ unit aoptx86;
|
||||
can be optimised into "xor %reg,%reg" later }
|
||||
if SetAndTest(FindRegAllocBackward(NR_DEFAULTFLAGS, tai(p.Previous)), hp2) then
|
||||
asml.InsertBefore(hp1, hp2)
|
||||
|
||||
{ Failing that, try to insert after the last instructions where the
|
||||
FLAGS register is not yet in use }
|
||||
else if GetLastInstruction(p, hp2) and
|
||||
(
|
||||
(hp2.typ <> ait_instruction) or
|
||||
{ Don't insert after an instruction that uses the flags when p doesn't use them }
|
||||
RegInInstruction(NR_DEFAULTFLAGS, p) or
|
||||
not RegInInstruction(NR_DEFAULTFLAGS, hp2)
|
||||
) then
|
||||
asml.InsertAfter(hp1, hp2)
|
||||
else
|
||||
{ Note, if p.Previous is nil (even if it should logically never be the
|
||||
case), FindRegAllocBackward immediately exits with False and so we
|
||||
@ -8721,26 +8749,90 @@ unit aoptx86;
|
||||
|
||||
DebugMsg(SPeepholeOptimization + 'Swapped ' + debug_op2str(taicpu(p).opcode) + ' and ' + debug_op2str(taicpu(hp1).opcode) + ' instructions to improve optimisation potential', hp1);
|
||||
|
||||
{ We can't trust UsedRegs because we're looking backwards, although we
|
||||
know the registers are allocated after p at the very least, so manually
|
||||
create tai_regalloc objects if needed }
|
||||
for X := 0 to taicpu(hp1).ops - 1 do
|
||||
case taicpu(hp1).oper[X]^.typ of
|
||||
top_reg:
|
||||
AllocRegBetween(taicpu(hp1).oper[X]^.reg, hp1, p, UsedRegs);
|
||||
begin
|
||||
asml.InsertBefore(tai_regalloc.alloc(taicpu(hp1).oper[X]^.reg, nil), hp1);
|
||||
IncludeRegInUsedRegs(taicpu(hp1).oper[X]^.reg, UsedRegs);
|
||||
AllocRegBetween(taicpu(hp1).oper[X]^.reg, hp1, p, UsedRegs);
|
||||
end;
|
||||
top_ref:
|
||||
begin
|
||||
if taicpu(hp1).oper[X]^.ref^.base <> NR_NO then
|
||||
AllocRegBetween(taicpu(hp1).oper[X]^.ref^.base, hp1, p, UsedRegs);
|
||||
begin
|
||||
asml.InsertBefore(tai_regalloc.alloc(taicpu(hp1).oper[X]^.ref^.base, nil), hp1);
|
||||
IncludeRegInUsedRegs(taicpu(hp1).oper[X]^.ref^.base, UsedRegs);
|
||||
AllocRegBetween(taicpu(hp1).oper[X]^.ref^.base, hp1, p, UsedRegs);
|
||||
end;
|
||||
if taicpu(hp1).oper[X]^.ref^.index <> NR_NO then
|
||||
AllocRegBetween(taicpu(hp1).oper[X]^.ref^.index, hp1, p, UsedRegs);
|
||||
begin
|
||||
asml.InsertBefore(tai_regalloc.alloc(taicpu(hp1).oper[X]^.ref^.index, nil), hp1);
|
||||
IncludeRegInUsedRegs(taicpu(hp1).oper[X]^.ref^.index, UsedRegs);
|
||||
AllocRegBetween(taicpu(hp1).oper[X]^.ref^.index, hp1, p, UsedRegs);
|
||||
end;
|
||||
end;
|
||||
else
|
||||
;
|
||||
end;
|
||||
|
||||
Result := True;
|
||||
end;
|
||||
|
||||
|
||||
function TX86AsmOptimizer.TrySwapMovCmp(var p, hp1: tai): Boolean;
|
||||
var
|
||||
hp2: tai;
|
||||
X: Integer;
|
||||
begin
|
||||
{ If we have something like:
|
||||
cmp ###,%reg1
|
||||
mov 0,%reg2
|
||||
|
||||
And no modified registers are shared, move the instruction to before
|
||||
the comparison as this means it can be optimised without worrying
|
||||
about the FLAGS register. (CMP/MOV is generated by
|
||||
"J(c)Mov1JmpMov0 -> Set(~c)", among other things).
|
||||
|
||||
As long as the second instruction doesn't use the flags or one of the
|
||||
registers used by CMP or TEST (also check any references that use the
|
||||
registers), then it can be moved prior to the comparison.
|
||||
}
|
||||
|
||||
Result := False;
|
||||
if not TrySwapMovOp(p, hp1) then
|
||||
Exit;
|
||||
|
||||
if taicpu(hp1).opcode = A_LEA then
|
||||
{ The flags will be overwritten by the CMP/TEST instruction }
|
||||
ConvertLEA(taicpu(hp1));
|
||||
|
||||
Result := True;
|
||||
|
||||
{ Can we move it one further back? }
|
||||
if GetLastInstruction(hp1, hp2) and (hp2.typ = ait_instruction) and
|
||||
{ Check to see if CMP/TEST is a comparison against zero }
|
||||
(
|
||||
(
|
||||
(taicpu(p).opcode = A_CMP) and
|
||||
MatchOperand(taicpu(p).oper[0]^, 0)
|
||||
) or
|
||||
(
|
||||
(taicpu(p).opcode = A_TEST) and
|
||||
(
|
||||
OpsEqual(taicpu(p).oper[0]^, taicpu(p).oper[1]^) or
|
||||
MatchOperand(taicpu(p).oper[0]^, -1)
|
||||
)
|
||||
)
|
||||
) and
|
||||
{ These instructions set the zero flag if the result is zero }
|
||||
MatchInstruction(hp2, [A_ADD, A_SUB, A_OR, A_XOR, A_AND, A_POPCNT, A_LZCNT], []) and
|
||||
OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^) then
|
||||
{ Looks like we can - if successful, this benefits PostPeepholeOptTestOr }
|
||||
TrySwapMovOp(hp2, hp1);
|
||||
end;
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user