From 4012c3dbd47f661805bb7a831c6c687807ede3b4 Mon Sep 17 00:00:00 2001 From: florian Date: Sun, 17 Oct 2021 09:50:47 +0200 Subject: [PATCH] * merge request 75 by J. Gareth "Kit" Moreton manually applied: This merge request makes a number of improvements to the DeepMOVOpt method and supporting functions: * ReplaceRegisterInInstruction now replaces registers in references that are written to (since the registers themselves won't change) * RegModifiedByInstruction will no longer return True for a register that appears in a reference that's written to (for the same reason as above) - special operations like MOVSS (the 0-operand version) aren't affected. * DeepMOVOpt returning True will now always set the Result of OptPass1MOV to True even though p wasn't directly modified, since this often caused missed optimisations. * Some of the speed-ups in the patch from #32916 have also been applied in order to make the general DeepMOVOpt run faster, notably it tries to avoid calling UpdateUsedRegs where possible. --- compiler/x86/aoptx86.pas | 193 ++++++++++++++++++++++----------------- 1 file changed, 108 insertions(+), 85 deletions(-) diff --git a/compiler/x86/aoptx86.pas b/compiler/x86/aoptx86.pas index ac169c31d9..2d4c8104ad 100644 --- a/compiler/x86/aoptx86.pas +++ b/compiler/x86/aoptx86.pas @@ -115,7 +115,7 @@ unit aoptx86; { Returns true if the reference only refers to ESP or EBP (or their 64-bit equivalents), or writes to a global symbol } - class function IsRefSafe(const ref: PReference): Boolean; static; inline; + class function IsRefSafe(const ref: PReference): Boolean; static; { Returns true if the given MOV instruction can be safely converted to CMOV } @@ -785,6 +785,14 @@ unit aoptx86; function TX86AsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; + const + WriteOps: array[0..3] of set of TInsChange = + ([CH_RWOP1,CH_WOP1,CH_MOP1], + [Ch_RWOP2,Ch_WOP2,Ch_MOP2], + [Ch_RWOP3,Ch_WOP3,Ch_MOP3], + [Ch_RWOP4,Ch_WOP4,Ch_MOP4]); + var + OperIdx: Integer; begin Result := False; if p1.typ <> ait_instruction then @@ -909,26 +917,16 @@ unit aoptx86; end; end; end; - if ([CH_RWOP1,CH_WOP1,CH_MOP1]*Ch<>[]) and reginop(reg,taicpu(p1).oper[0]^) then - begin - Result := true; - exit - end; - if ([Ch_RWOP2,Ch_WOP2,Ch_MOP2]*Ch<>[]) and reginop(reg,taicpu(p1).oper[1]^) then - begin - Result := true; - exit - end; - if ([Ch_RWOP3,Ch_WOP3,Ch_MOP3]*Ch<>[]) and reginop(reg,taicpu(p1).oper[2]^) then - begin - Result := true; - exit - end; - if ([Ch_RWOP4,Ch_WOP4,Ch_MOP4]*Ch<>[]) and reginop(reg,taicpu(p1).oper[3]^) then - begin - Result := true; - exit - end; + + for OperIdx := 0 to taicpu(p1).ops - 1 do + if (WriteOps[OperIdx]*Ch<>[]) and + { The register doesn't get modified inside a reference } + (taicpu(p1).oper[OperIdx]^.typ = top_reg) and + SuperRegistersEqual(reg,taicpu(p1).oper[OperIdx]^.reg) then + begin + Result := true; + exit + end; end; end; @@ -2199,33 +2197,39 @@ unit aoptx86; Result := False; for OperIdx := 0 to p.ops - 1 do - if (ReadFlag[OperIdx] in InsProp[p.Opcode].Ch) and - { The shift and rotate instructions can only use CL } - not ( - (OperIdx = 0) and - { This second condition just helps to avoid unnecessarily - calling MatchInstruction for 10 different opcodes } - (p.oper[0]^.reg = NR_CL) and - MatchInstruction(p, [A_RCL, A_RCR, A_ROL, A_ROR, A_SAL, A_SAR, A_SHL, A_SHLD, A_SHR, A_SHRD], []) - ) then + if (ReadFlag[OperIdx] in InsProp[p.Opcode].Ch) then + begin + { The shift and rotate instructions can only use CL } + if not ( + (OperIdx = 0) and + { This second condition just helps to avoid unnecessarily + calling MatchInstruction for 10 different opcodes } + (p.oper[0]^.reg = NR_CL) and + MatchInstruction(p, [A_RCL, A_RCR, A_ROL, A_ROR, A_SAL, A_SAR, A_SHL, A_SHLD, A_SHR, A_SHRD], []) + ) then + Result := ReplaceRegisterInOper(p, OperIdx, AOldReg, ANewReg) or Result; + end + else if p.oper[OperIdx]^.typ = top_ref then + { It's okay to replace registers in references that get written to } Result := ReplaceRegisterInOper(p, OperIdx, AOldReg, ANewReg) or Result; end; - class function TX86AsmOptimizer.IsRefSafe(const ref: PReference): Boolean; inline; + class function TX86AsmOptimizer.IsRefSafe(const ref: PReference): Boolean; begin - Result := - (ref^.index = NR_NO) and - ( -{$ifdef x86_64} + with ref^ do + Result := + (index = NR_NO) and ( - (ref^.base = NR_RIP) and - (ref^.refaddr in [addr_pic, addr_pic_no_got]) - ) or +{$ifdef x86_64} + ( + (base = NR_RIP) and + (refaddr in [addr_pic, addr_pic_no_got]) + ) or {$endif x86_64} - (ref^.base = NR_STACK_POINTER_REG) or - (ref^.base = current_procinfo.framepointer) - ); + (base = NR_STACK_POINTER_REG) or + (base = current_procinfo.framepointer) + ); end; @@ -2416,6 +2420,9 @@ unit aoptx86; if RegReadByInstruction(CurrentReg, hp1) and DeepMOVOpt(taicpu(p), taicpu(hp1)) then begin + { A change has occurred, just not in p } + Result := True; + TransferUsedRegs(TmpUsedRegs); UpdateUsedRegs(TmpUsedRegs, tai(p.Next)); @@ -3359,11 +3366,30 @@ unit aoptx86; { Saves on a large number of dereferences } ActiveReg := taicpu(p).oper[1]^.reg; + TransferUsedRegs(TmpUsedRegs); + UpdateUsedRegs(TmpUsedRegs, tai(p.Next)); + while GetNextInstructionUsingRegCond(hp3,hp2,ActiveReg,CrossJump) and { GetNextInstructionUsingRegCond only searches one instruction ahead unless -O3 is specified } (hp2.typ=ait_instruction) do begin case taicpu(hp2).opcode of + A_POP: + if MatchOperand(taicpu(hp2).oper[0]^,ActiveReg) then + begin + if not CrossJump and + not RegUsedBetween(ActiveReg, p, hp2) then + begin + { We can remove the original MOV since the register + wasn't used between it and its popping from the stack } + DebugMsg(SPeepholeOptimization + 'Mov2Nop 3c done',p); + RemoveCurrentp(p, hp1); + Result := True; + Exit; + end; + { Can't go any further } + Break; + end; A_MOV: if MatchOperand(taicpu(hp2).oper[0]^,ActiveReg) and ((taicpu(p).oper[0]^.typ=top_const) or @@ -3377,9 +3403,6 @@ unit aoptx86; mov %treg, y } - TransferUsedRegs(TmpUsedRegs); - TmpUsedRegs[R_INTREGISTER].Update(tai(p.Next)); - { We don't need to call UpdateUsedRegs for every instruction between p and hp2 because the register we're concerned about will not become deallocated (otherwise GetNextInstructionUsingReg would @@ -3387,8 +3410,8 @@ unit aoptx86; TempRegUsed := CrossJump { Assume the register is in use if it crossed a conditional jump } or - RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs) or - RegReadByInstruction(ActiveReg, hp1); + RegReadByInstruction(ActiveReg, hp3) or + RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs); case taicpu(p).oper[0]^.typ Of top_reg: @@ -3557,49 +3580,49 @@ unit aoptx86; Exit; end; else - if MatchOpType(taicpu(p), top_reg, top_reg) then - begin - TransferUsedRegs(TmpUsedRegs); - TmpUsedRegs[R_INTREGISTER].Update(tai(p.Next)); - if - not RegModifiedByInstruction(taicpu(p).oper[0]^.reg, hp1) and - not RegModifiedBetween(taicpu(p).oper[0]^.reg, hp1, hp2) and - DeepMovOpt(taicpu(p), taicpu(hp2)) then - begin - { Just in case something didn't get modified (e.g. an - implicit register) } - if not RegReadByInstruction(ActiveReg, hp2) and - { If a conditional jump was crossed, do not delete - the original MOV no matter what } - not CrossJump then - begin - TransferUsedRegs(TmpUsedRegs); - UpdateUsedRegs(TmpUsedRegs, tai(p.Next)); - UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next)); + { Move down to the MatchOpType if-block below }; + end; - if - { Make sure the original register isn't still present - and has been written to (e.g. with SHRX) } - RegLoadedWithNewValue(ActiveReg, hp2) or - not RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs) then - begin - RegUsedAfterInstruction(ActiveReg, hp2, TmpUsedRegs); - { We can remove the original MOV } - DebugMsg(SPeepholeOptimization + 'Mov2Nop 3b done',p); - RemoveCurrentp(p, hp1); - Result := True; - Exit; - end - else - begin - { See if there's more we can optimise } - hp3 := hp2; - Continue; - end; + { Also catches MOV/S/Z instructions that aren't modified } + if taicpu(p).oper[0]^.typ = top_reg then + begin + CurrentReg := taicpu(p).oper[0]^.reg; + if + not RegModifiedByInstruction(CurrentReg, hp3) and + not RegModifiedBetween(CurrentReg, hp3, hp2) and + DeepMOVOpt(taicpu(p), taicpu(hp2)) then + begin + Result := True; + + { Just in case something didn't get modified (e.g. an + implicit register). Also, if it does read from this + register, then there's no longer an advantage to + changing the register on subsequent instructions.} + if not RegReadByInstruction(ActiveReg, hp2) then + begin + { If a conditional jump was crossed, do not delete + the original MOV no matter what } + if not CrossJump and + { RegEndOfLife returns True if the register is + deallocated before the next instruction or has + been loaded with a new value } + RegEndOfLife(ActiveReg, taicpu(hp2)) then + begin + { We can remove the original MOV } + DebugMsg(SPeepholeOptimization + 'Mov2Nop 3b done',p); + RemoveCurrentp(p, hp1); + Exit; + end; + + if not RegModifiedByInstruction(ActiveReg, hp2) then + begin + { See if there's more we can optimise } + hp3 := hp2; + Continue; end; end; end; - end; + end; { Break out of the while loop under normal circumstances } Break;