* merge request 75 by J. Gareth "Kit" Moreton manually applied:

This merge request makes a number of improvements to the DeepMOVOpt method and supporting functions:

      * ReplaceRegisterInInstruction now replaces registers in references that are written to
        (since the registers themselves won't change)
      * RegModifiedByInstruction will no longer return True for a register that appears in a reference
        that's written to (for the same reason as above) - special operations like MOVSS
        (the 0-operand version) aren't affected.
      * DeepMOVOpt returning True will now always set the Result of OptPass1MOV to True even though p
        wasn't directly modified, since this often caused missed optimisations.
      * Some of the speed-ups in the patch from #32916 have also been applied in order to make
        the general DeepMOVOpt run faster, notably it tries to avoid calling UpdateUsedRegs where possible.
This commit is contained in:
florian 2021-10-17 09:50:47 +02:00
parent 5c75ef30ce
commit 4012c3dbd4

View File

@ -115,7 +115,7 @@ unit aoptx86;
{ Returns true if the reference only refers to ESP or EBP (or their 64-bit equivalents), { Returns true if the reference only refers to ESP or EBP (or their 64-bit equivalents),
or writes to a global symbol } or writes to a global symbol }
class function IsRefSafe(const ref: PReference): Boolean; static; inline; class function IsRefSafe(const ref: PReference): Boolean; static;
{ Returns true if the given MOV instruction can be safely converted to CMOV } { Returns true if the given MOV instruction can be safely converted to CMOV }
@ -785,6 +785,14 @@ unit aoptx86;
function TX86AsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; function TX86AsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
const
WriteOps: array[0..3] of set of TInsChange =
([CH_RWOP1,CH_WOP1,CH_MOP1],
[Ch_RWOP2,Ch_WOP2,Ch_MOP2],
[Ch_RWOP3,Ch_WOP3,Ch_MOP3],
[Ch_RWOP4,Ch_WOP4,Ch_MOP4]);
var
OperIdx: Integer;
begin begin
Result := False; Result := False;
if p1.typ <> ait_instruction then if p1.typ <> ait_instruction then
@ -909,22 +917,12 @@ unit aoptx86;
end; end;
end; end;
end; end;
if ([CH_RWOP1,CH_WOP1,CH_MOP1]*Ch<>[]) and reginop(reg,taicpu(p1).oper[0]^) then
begin for OperIdx := 0 to taicpu(p1).ops - 1 do
Result := true; if (WriteOps[OperIdx]*Ch<>[]) and
exit { The register doesn't get modified inside a reference }
end; (taicpu(p1).oper[OperIdx]^.typ = top_reg) and
if ([Ch_RWOP2,Ch_WOP2,Ch_MOP2]*Ch<>[]) and reginop(reg,taicpu(p1).oper[1]^) then SuperRegistersEqual(reg,taicpu(p1).oper[OperIdx]^.reg) then
begin
Result := true;
exit
end;
if ([Ch_RWOP3,Ch_WOP3,Ch_MOP3]*Ch<>[]) and reginop(reg,taicpu(p1).oper[2]^) then
begin
Result := true;
exit
end;
if ([Ch_RWOP4,Ch_WOP4,Ch_MOP4]*Ch<>[]) and reginop(reg,taicpu(p1).oper[3]^) then
begin begin
Result := true; Result := true;
exit exit
@ -2199,9 +2197,10 @@ unit aoptx86;
Result := False; Result := False;
for OperIdx := 0 to p.ops - 1 do for OperIdx := 0 to p.ops - 1 do
if (ReadFlag[OperIdx] in InsProp[p.Opcode].Ch) and if (ReadFlag[OperIdx] in InsProp[p.Opcode].Ch) then
begin
{ The shift and rotate instructions can only use CL } { The shift and rotate instructions can only use CL }
not ( if not (
(OperIdx = 0) and (OperIdx = 0) and
{ This second condition just helps to avoid unnecessarily { This second condition just helps to avoid unnecessarily
calling MatchInstruction for 10 different opcodes } calling MatchInstruction for 10 different opcodes }
@ -2209,22 +2208,27 @@ unit aoptx86;
MatchInstruction(p, [A_RCL, A_RCR, A_ROL, A_ROR, A_SAL, A_SAR, A_SHL, A_SHLD, A_SHR, A_SHRD], []) MatchInstruction(p, [A_RCL, A_RCR, A_ROL, A_ROR, A_SAL, A_SAR, A_SHL, A_SHLD, A_SHR, A_SHRD], [])
) then ) then
Result := ReplaceRegisterInOper(p, OperIdx, AOldReg, ANewReg) or Result; Result := ReplaceRegisterInOper(p, OperIdx, AOldReg, ANewReg) or Result;
end
else if p.oper[OperIdx]^.typ = top_ref then
{ It's okay to replace registers in references that get written to }
Result := ReplaceRegisterInOper(p, OperIdx, AOldReg, ANewReg) or Result;
end; end;
class function TX86AsmOptimizer.IsRefSafe(const ref: PReference): Boolean; inline; class function TX86AsmOptimizer.IsRefSafe(const ref: PReference): Boolean;
begin begin
with ref^ do
Result := Result :=
(ref^.index = NR_NO) and (index = NR_NO) and
( (
{$ifdef x86_64} {$ifdef x86_64}
( (
(ref^.base = NR_RIP) and (base = NR_RIP) and
(ref^.refaddr in [addr_pic, addr_pic_no_got]) (refaddr in [addr_pic, addr_pic_no_got])
) or ) or
{$endif x86_64} {$endif x86_64}
(ref^.base = NR_STACK_POINTER_REG) or (base = NR_STACK_POINTER_REG) or
(ref^.base = current_procinfo.framepointer) (base = current_procinfo.framepointer)
); );
end; end;
@ -2416,6 +2420,9 @@ unit aoptx86;
if RegReadByInstruction(CurrentReg, hp1) and if RegReadByInstruction(CurrentReg, hp1) and
DeepMOVOpt(taicpu(p), taicpu(hp1)) then DeepMOVOpt(taicpu(p), taicpu(hp1)) then
begin begin
{ A change has occurred, just not in p }
Result := True;
TransferUsedRegs(TmpUsedRegs); TransferUsedRegs(TmpUsedRegs);
UpdateUsedRegs(TmpUsedRegs, tai(p.Next)); UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
@ -3359,11 +3366,30 @@ unit aoptx86;
{ Saves on a large number of dereferences } { Saves on a large number of dereferences }
ActiveReg := taicpu(p).oper[1]^.reg; ActiveReg := taicpu(p).oper[1]^.reg;
TransferUsedRegs(TmpUsedRegs);
UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
while GetNextInstructionUsingRegCond(hp3,hp2,ActiveReg,CrossJump) and while GetNextInstructionUsingRegCond(hp3,hp2,ActiveReg,CrossJump) and
{ GetNextInstructionUsingRegCond only searches one instruction ahead unless -O3 is specified } { GetNextInstructionUsingRegCond only searches one instruction ahead unless -O3 is specified }
(hp2.typ=ait_instruction) do (hp2.typ=ait_instruction) do
begin begin
case taicpu(hp2).opcode of case taicpu(hp2).opcode of
A_POP:
if MatchOperand(taicpu(hp2).oper[0]^,ActiveReg) then
begin
if not CrossJump and
not RegUsedBetween(ActiveReg, p, hp2) then
begin
{ We can remove the original MOV since the register
wasn't used between it and its popping from the stack }
DebugMsg(SPeepholeOptimization + 'Mov2Nop 3c done',p);
RemoveCurrentp(p, hp1);
Result := True;
Exit;
end;
{ Can't go any further }
Break;
end;
A_MOV: A_MOV:
if MatchOperand(taicpu(hp2).oper[0]^,ActiveReg) and if MatchOperand(taicpu(hp2).oper[0]^,ActiveReg) and
((taicpu(p).oper[0]^.typ=top_const) or ((taicpu(p).oper[0]^.typ=top_const) or
@ -3377,9 +3403,6 @@ unit aoptx86;
mov %treg, y mov %treg, y
} }
TransferUsedRegs(TmpUsedRegs);
TmpUsedRegs[R_INTREGISTER].Update(tai(p.Next));
{ We don't need to call UpdateUsedRegs for every instruction between { We don't need to call UpdateUsedRegs for every instruction between
p and hp2 because the register we're concerned about will not p and hp2 because the register we're concerned about will not
become deallocated (otherwise GetNextInstructionUsingReg would become deallocated (otherwise GetNextInstructionUsingReg would
@ -3387,8 +3410,8 @@ unit aoptx86;
TempRegUsed := TempRegUsed :=
CrossJump { Assume the register is in use if it crossed a conditional jump } or CrossJump { Assume the register is in use if it crossed a conditional jump } or
RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs) or RegReadByInstruction(ActiveReg, hp3) or
RegReadByInstruction(ActiveReg, hp1); RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs);
case taicpu(p).oper[0]^.typ Of case taicpu(p).oper[0]^.typ Of
top_reg: top_reg:
@ -3557,40 +3580,41 @@ unit aoptx86;
Exit; Exit;
end; end;
else else
if MatchOpType(taicpu(p), top_reg, top_reg) then { Move down to the MatchOpType if-block below };
end;
{ Also catches MOV/S/Z instructions that aren't modified }
if taicpu(p).oper[0]^.typ = top_reg then
begin begin
TransferUsedRegs(TmpUsedRegs); CurrentReg := taicpu(p).oper[0]^.reg;
TmpUsedRegs[R_INTREGISTER].Update(tai(p.Next));
if if
not RegModifiedByInstruction(taicpu(p).oper[0]^.reg, hp1) and not RegModifiedByInstruction(CurrentReg, hp3) and
not RegModifiedBetween(taicpu(p).oper[0]^.reg, hp1, hp2) and not RegModifiedBetween(CurrentReg, hp3, hp2) and
DeepMovOpt(taicpu(p), taicpu(hp2)) then DeepMOVOpt(taicpu(p), taicpu(hp2)) then
begin begin
Result := True;
{ Just in case something didn't get modified (e.g. an { Just in case something didn't get modified (e.g. an
implicit register) } implicit register). Also, if it does read from this
if not RegReadByInstruction(ActiveReg, hp2) and register, then there's no longer an advantage to
changing the register on subsequent instructions.}
if not RegReadByInstruction(ActiveReg, hp2) then
begin
{ If a conditional jump was crossed, do not delete { If a conditional jump was crossed, do not delete
the original MOV no matter what } the original MOV no matter what }
not CrossJump then if not CrossJump and
{ RegEndOfLife returns True if the register is
deallocated before the next instruction or has
been loaded with a new value }
RegEndOfLife(ActiveReg, taicpu(hp2)) then
begin begin
TransferUsedRegs(TmpUsedRegs);
UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
if
{ Make sure the original register isn't still present
and has been written to (e.g. with SHRX) }
RegLoadedWithNewValue(ActiveReg, hp2) or
not RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs) then
begin
RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs);
{ We can remove the original MOV } { We can remove the original MOV }
DebugMsg(SPeepholeOptimization + 'Mov2Nop 3b done',p); DebugMsg(SPeepholeOptimization + 'Mov2Nop 3b done',p);
RemoveCurrentp(p, hp1); RemoveCurrentp(p, hp1);
Result := True;
Exit; Exit;
end end;
else
if not RegModifiedByInstruction(ActiveReg, hp2) then
begin begin
{ See if there's more we can optimise } { See if there's more we can optimise }
hp3 := hp2; hp3 := hp2;
@ -3599,7 +3623,6 @@ unit aoptx86;
end; end;
end; end;
end; end;
end;
{ Break out of the while loop under normal circumstances } { Break out of the while loop under normal circumstances }
Break; Break;