mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-08-30 12:20:19 +02:00
* merge request 75 by J. Gareth "Kit" Moreton manually applied:
This merge request makes a number of improvements to the DeepMOVOpt method and supporting functions: * ReplaceRegisterInInstruction now replaces registers in references that are written to (since the registers themselves won't change) * RegModifiedByInstruction will no longer return True for a register that appears in a reference that's written to (for the same reason as above) - special operations like MOVSS (the 0-operand version) aren't affected. * DeepMOVOpt returning True will now always set the Result of OptPass1MOV to True even though p wasn't directly modified, since this often caused missed optimisations. * Some of the speed-ups in the patch from #32916 have also been applied in order to make the general DeepMOVOpt run faster, notably it tries to avoid calling UpdateUsedRegs where possible.
This commit is contained in:
parent
5c75ef30ce
commit
4012c3dbd4
@ -115,7 +115,7 @@ unit aoptx86;
|
|||||||
|
|
||||||
{ Returns true if the reference only refers to ESP or EBP (or their 64-bit equivalents),
|
{ Returns true if the reference only refers to ESP or EBP (or their 64-bit equivalents),
|
||||||
or writes to a global symbol }
|
or writes to a global symbol }
|
||||||
class function IsRefSafe(const ref: PReference): Boolean; static; inline;
|
class function IsRefSafe(const ref: PReference): Boolean; static;
|
||||||
|
|
||||||
|
|
||||||
{ Returns true if the given MOV instruction can be safely converted to CMOV }
|
{ Returns true if the given MOV instruction can be safely converted to CMOV }
|
||||||
@ -785,6 +785,14 @@ unit aoptx86;
|
|||||||
|
|
||||||
|
|
||||||
function TX86AsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
|
function TX86AsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
|
||||||
|
const
|
||||||
|
WriteOps: array[0..3] of set of TInsChange =
|
||||||
|
([CH_RWOP1,CH_WOP1,CH_MOP1],
|
||||||
|
[Ch_RWOP2,Ch_WOP2,Ch_MOP2],
|
||||||
|
[Ch_RWOP3,Ch_WOP3,Ch_MOP3],
|
||||||
|
[Ch_RWOP4,Ch_WOP4,Ch_MOP4]);
|
||||||
|
var
|
||||||
|
OperIdx: Integer;
|
||||||
begin
|
begin
|
||||||
Result := False;
|
Result := False;
|
||||||
if p1.typ <> ait_instruction then
|
if p1.typ <> ait_instruction then
|
||||||
@ -909,22 +917,12 @@ unit aoptx86;
|
|||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
if ([CH_RWOP1,CH_WOP1,CH_MOP1]*Ch<>[]) and reginop(reg,taicpu(p1).oper[0]^) then
|
|
||||||
begin
|
for OperIdx := 0 to taicpu(p1).ops - 1 do
|
||||||
Result := true;
|
if (WriteOps[OperIdx]*Ch<>[]) and
|
||||||
exit
|
{ The register doesn't get modified inside a reference }
|
||||||
end;
|
(taicpu(p1).oper[OperIdx]^.typ = top_reg) and
|
||||||
if ([Ch_RWOP2,Ch_WOP2,Ch_MOP2]*Ch<>[]) and reginop(reg,taicpu(p1).oper[1]^) then
|
SuperRegistersEqual(reg,taicpu(p1).oper[OperIdx]^.reg) then
|
||||||
begin
|
|
||||||
Result := true;
|
|
||||||
exit
|
|
||||||
end;
|
|
||||||
if ([Ch_RWOP3,Ch_WOP3,Ch_MOP3]*Ch<>[]) and reginop(reg,taicpu(p1).oper[2]^) then
|
|
||||||
begin
|
|
||||||
Result := true;
|
|
||||||
exit
|
|
||||||
end;
|
|
||||||
if ([Ch_RWOP4,Ch_WOP4,Ch_MOP4]*Ch<>[]) and reginop(reg,taicpu(p1).oper[3]^) then
|
|
||||||
begin
|
begin
|
||||||
Result := true;
|
Result := true;
|
||||||
exit
|
exit
|
||||||
@ -2199,9 +2197,10 @@ unit aoptx86;
|
|||||||
Result := False;
|
Result := False;
|
||||||
|
|
||||||
for OperIdx := 0 to p.ops - 1 do
|
for OperIdx := 0 to p.ops - 1 do
|
||||||
if (ReadFlag[OperIdx] in InsProp[p.Opcode].Ch) and
|
if (ReadFlag[OperIdx] in InsProp[p.Opcode].Ch) then
|
||||||
|
begin
|
||||||
{ The shift and rotate instructions can only use CL }
|
{ The shift and rotate instructions can only use CL }
|
||||||
not (
|
if not (
|
||||||
(OperIdx = 0) and
|
(OperIdx = 0) and
|
||||||
{ This second condition just helps to avoid unnecessarily
|
{ This second condition just helps to avoid unnecessarily
|
||||||
calling MatchInstruction for 10 different opcodes }
|
calling MatchInstruction for 10 different opcodes }
|
||||||
@ -2209,22 +2208,27 @@ unit aoptx86;
|
|||||||
MatchInstruction(p, [A_RCL, A_RCR, A_ROL, A_ROR, A_SAL, A_SAR, A_SHL, A_SHLD, A_SHR, A_SHRD], [])
|
MatchInstruction(p, [A_RCL, A_RCR, A_ROL, A_ROR, A_SAL, A_SAR, A_SHL, A_SHLD, A_SHR, A_SHRD], [])
|
||||||
) then
|
) then
|
||||||
Result := ReplaceRegisterInOper(p, OperIdx, AOldReg, ANewReg) or Result;
|
Result := ReplaceRegisterInOper(p, OperIdx, AOldReg, ANewReg) or Result;
|
||||||
|
end
|
||||||
|
else if p.oper[OperIdx]^.typ = top_ref then
|
||||||
|
{ It's okay to replace registers in references that get written to }
|
||||||
|
Result := ReplaceRegisterInOper(p, OperIdx, AOldReg, ANewReg) or Result;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
|
||||||
class function TX86AsmOptimizer.IsRefSafe(const ref: PReference): Boolean; inline;
|
class function TX86AsmOptimizer.IsRefSafe(const ref: PReference): Boolean;
|
||||||
begin
|
begin
|
||||||
|
with ref^ do
|
||||||
Result :=
|
Result :=
|
||||||
(ref^.index = NR_NO) and
|
(index = NR_NO) and
|
||||||
(
|
(
|
||||||
{$ifdef x86_64}
|
{$ifdef x86_64}
|
||||||
(
|
(
|
||||||
(ref^.base = NR_RIP) and
|
(base = NR_RIP) and
|
||||||
(ref^.refaddr in [addr_pic, addr_pic_no_got])
|
(refaddr in [addr_pic, addr_pic_no_got])
|
||||||
) or
|
) or
|
||||||
{$endif x86_64}
|
{$endif x86_64}
|
||||||
(ref^.base = NR_STACK_POINTER_REG) or
|
(base = NR_STACK_POINTER_REG) or
|
||||||
(ref^.base = current_procinfo.framepointer)
|
(base = current_procinfo.framepointer)
|
||||||
);
|
);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
@ -2416,6 +2420,9 @@ unit aoptx86;
|
|||||||
if RegReadByInstruction(CurrentReg, hp1) and
|
if RegReadByInstruction(CurrentReg, hp1) and
|
||||||
DeepMOVOpt(taicpu(p), taicpu(hp1)) then
|
DeepMOVOpt(taicpu(p), taicpu(hp1)) then
|
||||||
begin
|
begin
|
||||||
|
{ A change has occurred, just not in p }
|
||||||
|
Result := True;
|
||||||
|
|
||||||
TransferUsedRegs(TmpUsedRegs);
|
TransferUsedRegs(TmpUsedRegs);
|
||||||
UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
|
UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
|
||||||
|
|
||||||
@ -3359,11 +3366,30 @@ unit aoptx86;
|
|||||||
{ Saves on a large number of dereferences }
|
{ Saves on a large number of dereferences }
|
||||||
ActiveReg := taicpu(p).oper[1]^.reg;
|
ActiveReg := taicpu(p).oper[1]^.reg;
|
||||||
|
|
||||||
|
TransferUsedRegs(TmpUsedRegs);
|
||||||
|
UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
|
||||||
|
|
||||||
while GetNextInstructionUsingRegCond(hp3,hp2,ActiveReg,CrossJump) and
|
while GetNextInstructionUsingRegCond(hp3,hp2,ActiveReg,CrossJump) and
|
||||||
{ GetNextInstructionUsingRegCond only searches one instruction ahead unless -O3 is specified }
|
{ GetNextInstructionUsingRegCond only searches one instruction ahead unless -O3 is specified }
|
||||||
(hp2.typ=ait_instruction) do
|
(hp2.typ=ait_instruction) do
|
||||||
begin
|
begin
|
||||||
case taicpu(hp2).opcode of
|
case taicpu(hp2).opcode of
|
||||||
|
A_POP:
|
||||||
|
if MatchOperand(taicpu(hp2).oper[0]^,ActiveReg) then
|
||||||
|
begin
|
||||||
|
if not CrossJump and
|
||||||
|
not RegUsedBetween(ActiveReg, p, hp2) then
|
||||||
|
begin
|
||||||
|
{ We can remove the original MOV since the register
|
||||||
|
wasn't used between it and its popping from the stack }
|
||||||
|
DebugMsg(SPeepholeOptimization + 'Mov2Nop 3c done',p);
|
||||||
|
RemoveCurrentp(p, hp1);
|
||||||
|
Result := True;
|
||||||
|
Exit;
|
||||||
|
end;
|
||||||
|
{ Can't go any further }
|
||||||
|
Break;
|
||||||
|
end;
|
||||||
A_MOV:
|
A_MOV:
|
||||||
if MatchOperand(taicpu(hp2).oper[0]^,ActiveReg) and
|
if MatchOperand(taicpu(hp2).oper[0]^,ActiveReg) and
|
||||||
((taicpu(p).oper[0]^.typ=top_const) or
|
((taicpu(p).oper[0]^.typ=top_const) or
|
||||||
@ -3377,9 +3403,6 @@ unit aoptx86;
|
|||||||
mov %treg, y
|
mov %treg, y
|
||||||
}
|
}
|
||||||
|
|
||||||
TransferUsedRegs(TmpUsedRegs);
|
|
||||||
TmpUsedRegs[R_INTREGISTER].Update(tai(p.Next));
|
|
||||||
|
|
||||||
{ We don't need to call UpdateUsedRegs for every instruction between
|
{ We don't need to call UpdateUsedRegs for every instruction between
|
||||||
p and hp2 because the register we're concerned about will not
|
p and hp2 because the register we're concerned about will not
|
||||||
become deallocated (otherwise GetNextInstructionUsingReg would
|
become deallocated (otherwise GetNextInstructionUsingReg would
|
||||||
@ -3387,8 +3410,8 @@ unit aoptx86;
|
|||||||
|
|
||||||
TempRegUsed :=
|
TempRegUsed :=
|
||||||
CrossJump { Assume the register is in use if it crossed a conditional jump } or
|
CrossJump { Assume the register is in use if it crossed a conditional jump } or
|
||||||
RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs) or
|
RegReadByInstruction(ActiveReg, hp3) or
|
||||||
RegReadByInstruction(ActiveReg, hp1);
|
RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs);
|
||||||
|
|
||||||
case taicpu(p).oper[0]^.typ Of
|
case taicpu(p).oper[0]^.typ Of
|
||||||
top_reg:
|
top_reg:
|
||||||
@ -3557,40 +3580,41 @@ unit aoptx86;
|
|||||||
Exit;
|
Exit;
|
||||||
end;
|
end;
|
||||||
else
|
else
|
||||||
if MatchOpType(taicpu(p), top_reg, top_reg) then
|
{ Move down to the MatchOpType if-block below };
|
||||||
|
end;
|
||||||
|
|
||||||
|
{ Also catches MOV/S/Z instructions that aren't modified }
|
||||||
|
if taicpu(p).oper[0]^.typ = top_reg then
|
||||||
begin
|
begin
|
||||||
TransferUsedRegs(TmpUsedRegs);
|
CurrentReg := taicpu(p).oper[0]^.reg;
|
||||||
TmpUsedRegs[R_INTREGISTER].Update(tai(p.Next));
|
|
||||||
if
|
if
|
||||||
not RegModifiedByInstruction(taicpu(p).oper[0]^.reg, hp1) and
|
not RegModifiedByInstruction(CurrentReg, hp3) and
|
||||||
not RegModifiedBetween(taicpu(p).oper[0]^.reg, hp1, hp2) and
|
not RegModifiedBetween(CurrentReg, hp3, hp2) and
|
||||||
DeepMovOpt(taicpu(p), taicpu(hp2)) then
|
DeepMOVOpt(taicpu(p), taicpu(hp2)) then
|
||||||
begin
|
begin
|
||||||
|
Result := True;
|
||||||
|
|
||||||
{ Just in case something didn't get modified (e.g. an
|
{ Just in case something didn't get modified (e.g. an
|
||||||
implicit register) }
|
implicit register). Also, if it does read from this
|
||||||
if not RegReadByInstruction(ActiveReg, hp2) and
|
register, then there's no longer an advantage to
|
||||||
|
changing the register on subsequent instructions.}
|
||||||
|
if not RegReadByInstruction(ActiveReg, hp2) then
|
||||||
|
begin
|
||||||
{ If a conditional jump was crossed, do not delete
|
{ If a conditional jump was crossed, do not delete
|
||||||
the original MOV no matter what }
|
the original MOV no matter what }
|
||||||
not CrossJump then
|
if not CrossJump and
|
||||||
|
{ RegEndOfLife returns True if the register is
|
||||||
|
deallocated before the next instruction or has
|
||||||
|
been loaded with a new value }
|
||||||
|
RegEndOfLife(ActiveReg, taicpu(hp2)) then
|
||||||
begin
|
begin
|
||||||
TransferUsedRegs(TmpUsedRegs);
|
|
||||||
UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
|
|
||||||
UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
|
|
||||||
|
|
||||||
if
|
|
||||||
{ Make sure the original register isn't still present
|
|
||||||
and has been written to (e.g. with SHRX) }
|
|
||||||
RegLoadedWithNewValue(ActiveReg, hp2) or
|
|
||||||
not RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs) then
|
|
||||||
begin
|
|
||||||
RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs);
|
|
||||||
{ We can remove the original MOV }
|
{ We can remove the original MOV }
|
||||||
DebugMsg(SPeepholeOptimization + 'Mov2Nop 3b done',p);
|
DebugMsg(SPeepholeOptimization + 'Mov2Nop 3b done',p);
|
||||||
RemoveCurrentp(p, hp1);
|
RemoveCurrentp(p, hp1);
|
||||||
Result := True;
|
|
||||||
Exit;
|
Exit;
|
||||||
end
|
end;
|
||||||
else
|
|
||||||
|
if not RegModifiedByInstruction(ActiveReg, hp2) then
|
||||||
begin
|
begin
|
||||||
{ See if there's more we can optimise }
|
{ See if there's more we can optimise }
|
||||||
hp3 := hp2;
|
hp3 := hp2;
|
||||||
@ -3599,7 +3623,6 @@ unit aoptx86;
|
|||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
end;
|
|
||||||
|
|
||||||
{ Break out of the while loop under normal circumstances }
|
{ Break out of the while loop under normal circumstances }
|
||||||
Break;
|
Break;
|
||||||
|
Loading…
Reference in New Issue
Block a user