* merge request 75 by J. Gareth "Kit" Moreton manually applied:

This merge request makes a number of improvements to the DeepMOVOpt method and supporting functions:

      * ReplaceRegisterInInstruction now replaces registers in references that are written to
        (since the registers themselves won't change)
      * RegModifiedByInstruction will no longer return True for a register that appears in a reference
        that's written to (for the same reason as above) - special operations like MOVSS
        (the 0-operand version) aren't affected.
      * DeepMOVOpt returning True will now always set the Result of OptPass1MOV to True even though p
        wasn't directly modified, since this often caused missed optimisations.
      * Some of the speed-ups in the patch from #32916 have also been applied in order to make
        the general DeepMOVOpt run faster, notably it tries to avoid calling UpdateUsedRegs where possible.
This commit is contained in:
florian 2021-10-17 09:50:47 +02:00
parent 5c75ef30ce
commit 4012c3dbd4

View File

@ -115,7 +115,7 @@ unit aoptx86;
{ Returns true if the reference only refers to ESP or EBP (or their 64-bit equivalents),
or writes to a global symbol }
class function IsRefSafe(const ref: PReference): Boolean; static; inline;
class function IsRefSafe(const ref: PReference): Boolean; static;
{ Returns true if the given MOV instruction can be safely converted to CMOV }
@ -785,6 +785,14 @@ unit aoptx86;
function TX86AsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
const
WriteOps: array[0..3] of set of TInsChange =
([CH_RWOP1,CH_WOP1,CH_MOP1],
[Ch_RWOP2,Ch_WOP2,Ch_MOP2],
[Ch_RWOP3,Ch_WOP3,Ch_MOP3],
[Ch_RWOP4,Ch_WOP4,Ch_MOP4]);
var
OperIdx: Integer;
begin
Result := False;
if p1.typ <> ait_instruction then
@ -909,22 +917,12 @@ unit aoptx86;
end;
end;
end;
if ([CH_RWOP1,CH_WOP1,CH_MOP1]*Ch<>[]) and reginop(reg,taicpu(p1).oper[0]^) then
begin
Result := true;
exit
end;
if ([Ch_RWOP2,Ch_WOP2,Ch_MOP2]*Ch<>[]) and reginop(reg,taicpu(p1).oper[1]^) then
begin
Result := true;
exit
end;
if ([Ch_RWOP3,Ch_WOP3,Ch_MOP3]*Ch<>[]) and reginop(reg,taicpu(p1).oper[2]^) then
begin
Result := true;
exit
end;
if ([Ch_RWOP4,Ch_WOP4,Ch_MOP4]*Ch<>[]) and reginop(reg,taicpu(p1).oper[3]^) then
for OperIdx := 0 to taicpu(p1).ops - 1 do
if (WriteOps[OperIdx]*Ch<>[]) and
{ The register doesn't get modified inside a reference }
(taicpu(p1).oper[OperIdx]^.typ = top_reg) and
SuperRegistersEqual(reg,taicpu(p1).oper[OperIdx]^.reg) then
begin
Result := true;
exit
@ -2199,9 +2197,10 @@ unit aoptx86;
Result := False;
for OperIdx := 0 to p.ops - 1 do
if (ReadFlag[OperIdx] in InsProp[p.Opcode].Ch) and
if (ReadFlag[OperIdx] in InsProp[p.Opcode].Ch) then
begin
{ The shift and rotate instructions can only use CL }
not (
if not (
(OperIdx = 0) and
{ This second condition just helps to avoid unnecessarily
calling MatchInstruction for 10 different opcodes }
@ -2209,22 +2208,27 @@ unit aoptx86;
MatchInstruction(p, [A_RCL, A_RCR, A_ROL, A_ROR, A_SAL, A_SAR, A_SHL, A_SHLD, A_SHR, A_SHRD], [])
) then
Result := ReplaceRegisterInOper(p, OperIdx, AOldReg, ANewReg) or Result;
end
else if p.oper[OperIdx]^.typ = top_ref then
{ It's okay to replace registers in references that get written to }
Result := ReplaceRegisterInOper(p, OperIdx, AOldReg, ANewReg) or Result;
end;
class function TX86AsmOptimizer.IsRefSafe(const ref: PReference): Boolean; inline;
class function TX86AsmOptimizer.IsRefSafe(const ref: PReference): Boolean;
begin
with ref^ do
Result :=
(ref^.index = NR_NO) and
(index = NR_NO) and
(
{$ifdef x86_64}
(
(ref^.base = NR_RIP) and
(ref^.refaddr in [addr_pic, addr_pic_no_got])
(base = NR_RIP) and
(refaddr in [addr_pic, addr_pic_no_got])
) or
{$endif x86_64}
(ref^.base = NR_STACK_POINTER_REG) or
(ref^.base = current_procinfo.framepointer)
(base = NR_STACK_POINTER_REG) or
(base = current_procinfo.framepointer)
);
end;
@ -2416,6 +2420,9 @@ unit aoptx86;
if RegReadByInstruction(CurrentReg, hp1) and
DeepMOVOpt(taicpu(p), taicpu(hp1)) then
begin
{ A change has occurred, just not in p }
Result := True;
TransferUsedRegs(TmpUsedRegs);
UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
@ -3359,11 +3366,30 @@ unit aoptx86;
{ Saves on a large number of dereferences }
ActiveReg := taicpu(p).oper[1]^.reg;
TransferUsedRegs(TmpUsedRegs);
UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
while GetNextInstructionUsingRegCond(hp3,hp2,ActiveReg,CrossJump) and
{ GetNextInstructionUsingRegCond only searches one instruction ahead unless -O3 is specified }
(hp2.typ=ait_instruction) do
begin
case taicpu(hp2).opcode of
A_POP:
if MatchOperand(taicpu(hp2).oper[0]^,ActiveReg) then
begin
if not CrossJump and
not RegUsedBetween(ActiveReg, p, hp2) then
begin
{ We can remove the original MOV since the register
wasn't used between it and its popping from the stack }
DebugMsg(SPeepholeOptimization + 'Mov2Nop 3c done',p);
RemoveCurrentp(p, hp1);
Result := True;
Exit;
end;
{ Can't go any further }
Break;
end;
A_MOV:
if MatchOperand(taicpu(hp2).oper[0]^,ActiveReg) and
((taicpu(p).oper[0]^.typ=top_const) or
@ -3377,9 +3403,6 @@ unit aoptx86;
mov %treg, y
}
TransferUsedRegs(TmpUsedRegs);
TmpUsedRegs[R_INTREGISTER].Update(tai(p.Next));
{ We don't need to call UpdateUsedRegs for every instruction between
p and hp2 because the register we're concerned about will not
become deallocated (otherwise GetNextInstructionUsingReg would
@ -3387,8 +3410,8 @@ unit aoptx86;
TempRegUsed :=
CrossJump { Assume the register is in use if it crossed a conditional jump } or
RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs) or
RegReadByInstruction(ActiveReg, hp1);
RegReadByInstruction(ActiveReg, hp3) or
RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs);
case taicpu(p).oper[0]^.typ Of
top_reg:
@ -3557,40 +3580,41 @@ unit aoptx86;
Exit;
end;
else
if MatchOpType(taicpu(p), top_reg, top_reg) then
{ Move down to the MatchOpType if-block below };
end;
{ Also catches MOV/S/Z instructions that aren't modified }
if taicpu(p).oper[0]^.typ = top_reg then
begin
TransferUsedRegs(TmpUsedRegs);
TmpUsedRegs[R_INTREGISTER].Update(tai(p.Next));
CurrentReg := taicpu(p).oper[0]^.reg;
if
not RegModifiedByInstruction(taicpu(p).oper[0]^.reg, hp1) and
not RegModifiedBetween(taicpu(p).oper[0]^.reg, hp1, hp2) and
DeepMovOpt(taicpu(p), taicpu(hp2)) then
not RegModifiedByInstruction(CurrentReg, hp3) and
not RegModifiedBetween(CurrentReg, hp3, hp2) and
DeepMOVOpt(taicpu(p), taicpu(hp2)) then
begin
Result := True;
{ Just in case something didn't get modified (e.g. an
implicit register) }
if not RegReadByInstruction(ActiveReg, hp2) and
implicit register). Also, if it does read from this
register, then there's no longer an advantage to
changing the register on subsequent instructions.}
if not RegReadByInstruction(ActiveReg, hp2) then
begin
{ If a conditional jump was crossed, do not delete
the original MOV no matter what }
not CrossJump then
if not CrossJump and
{ RegEndOfLife returns True if the register is
deallocated before the next instruction or has
been loaded with a new value }
RegEndOfLife(ActiveReg, taicpu(hp2)) then
begin
TransferUsedRegs(TmpUsedRegs);
UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
if
{ Make sure the original register isn't still present
and has been written to (e.g. with SHRX) }
RegLoadedWithNewValue(ActiveReg, hp2) or
not RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs) then
begin
RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs);
{ We can remove the original MOV }
DebugMsg(SPeepholeOptimization + 'Mov2Nop 3b done',p);
RemoveCurrentp(p, hp1);
Result := True;
Exit;
end
else
end;
if not RegModifiedByInstruction(ActiveReg, hp2) then
begin
{ See if there's more we can optimise }
hp3 := hp2;
@ -3599,7 +3623,6 @@ unit aoptx86;
end;
end;
end;
end;
{ Break out of the while loop under normal circumstances }
Break;