From a807e185912a5ab61e3bbfcb7358e442256562ac Mon Sep 17 00:00:00 2001 From: florian Date: Fri, 24 Jan 2020 20:41:11 +0000 Subject: [PATCH] * patch by J. Gareth Moreton: x86 CMP/TEST/Jcc optimisations, resolves #36624 git-svn-id: trunk@44029 - --- compiler/x86/aoptx86.pas | 206 +++++++++++++++++++++++++++++++++------ 1 file changed, 174 insertions(+), 32 deletions(-) diff --git a/compiler/x86/aoptx86.pas b/compiler/x86/aoptx86.pas index dec2e0c4cb..9d4bea764e 100644 --- a/compiler/x86/aoptx86.pas +++ b/compiler/x86/aoptx86.pas @@ -3303,43 +3303,185 @@ unit aoptx86; function TX86AsmOptimizer.OptPass1Cmp(var p: tai): boolean; var v: TCGInt; - hp1, hp2, hp3, hp4: tai; + hp1, hp2: tai; begin Result:=false; - { cmp register,$8000 neg register - je target --> jo target - .... only if register is deallocated before jump.} - case Taicpu(p).opsize of - S_B: v:=$80; - S_W: v:=$8000; - S_L: v:=qword($80000000); - { actually, this will never happen: cmp with 64 bit constants is not possible } - S_Q : v:=Int64($8000000000000000); - else - internalerror(2013112905); - end; - if MatchOpType(taicpu(p),Top_const,top_reg) and - (taicpu(p).oper[0]^.val=v) and - GetNextInstruction(p, hp1) and - MatchInstruction(hp1,A_Jcc,[]) and - (Taicpu(hp1).condition in [C_E,C_NE]) then + if taicpu(p).oper[0]^.typ = top_const then begin - TransferUsedRegs(TmpUsedRegs); - UpdateUsedRegs(TmpUsedRegs,tai(p.next)); - if not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, TmpUsedRegs)) then + { Though GetNextInstruction can be factored out, it is an expensive + call, so delay calling it until we have first checked cheaper + conditions that are independent of it. } + + if (taicpu(p).oper[0]^.val = 0) and + (taicpu(p).oper[1]^.typ = top_reg) and + GetNextInstruction(p, hp1) and + MatchInstruction(hp1,A_Jcc,A_SETcc,[]) then begin - DebugMsg(SPeepholeOptimization + 'CmpJe2NegJo done',p); - Taicpu(p).opcode:=A_NEG; - Taicpu(p).loadoper(0,Taicpu(p).oper[1]^); - Taicpu(p).clearop(1); - Taicpu(p).ops:=1; - if Taicpu(hp1).condition=C_E then - Taicpu(hp1).condition:=C_O - else - Taicpu(hp1).condition:=C_NO; - Result:=true; - exit; + hp2 := p; + { When dealing with "cmp $0,%reg", only ZF and SF contain + anything meaningful once it's converted to "test %reg,%reg"; + additionally, some jumps will always (or never) branch, so + evaluate every jump immediately following the + comparison, optimising the conditions if possible. + Similarly with SETcc... those that are always set to 0 or 1 + are changed to MOV instructions } + while GetNextInstruction(hp2, hp1) and + MatchInstruction(hp1,A_Jcc,A_SETcc,[]) do + begin + case taicpu(hp1).condition of + C_B, C_C, C_NAE, C_O: + { For B/NAE: + Will never branch since an unsigned integer can never be below zero + For C/O: + Result cannot overflow because 0 is being subtracted + } + begin + if taicpu(hp1).opcode = A_Jcc then + begin + DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition B/C/NAE/O --> Never (jump removed)', hp1); + TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol).decrefs; + AsmL.Remove(hp1); + hp1.Free; + { Since hp1 was deleted, hp2 must not be updated } + Continue; + end + else + begin + DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition B/C/NAE/O --> Never (set -> mov 0)', hp1); + { Convert "set(c) %reg" instruction to "movb 0,%reg" } + taicpu(hp1).opcode := A_MOV; + taicpu(hp1).condition := C_None; + taicpu(hp1).opsize := S_B; + taicpu(hp1).allocate_oper(2); + taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg); + taicpu(hp1).loadconst(0, 0); + end; + end; + C_BE, C_NA: + begin + { Will only branch if equal to zero } + DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition BE/NA --> E', hp1); + taicpu(hp1).condition := C_E; + end; + C_A, C_NBE: + begin + { Will only branch if not equal to zero } + DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition A/NBE --> NE', hp1); + taicpu(hp1).condition := C_NE; + end; + C_AE, C_NB, C_NC, C_NO: + begin + { Will always branch } + DebugMsg(SPeepholeOptimization + 'Cmpcc2Testcc - condition AE/NB/NC/NO --> Always', hp1); + if taicpu(hp1).opcode = A_Jcc then + begin + MakeUnconditional(taicpu(hp1)); + { Any jumps/set that follow will now be dead code } + RemoveDeadCodeAfterJump(taicpu(hp1)); + Break; + end + else + begin + { Convert "set(c) %reg" instruction to "movb 1,%reg" } + taicpu(hp1).opcode := A_MOV; + taicpu(hp1).condition := C_None; + taicpu(hp1).opsize := S_B; + taicpu(hp1).allocate_oper(2); + taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg); + taicpu(hp1).loadconst(0, 1); + end; + end; + C_None: + InternalError(2020012201); + C_P, C_PE, C_NP, C_PO: + { We can't handle parity checks and they should never be generated + after a general-purpose CMP (it's used in some floating-point + comparisons that don't use CMP) } + InternalError(2020012202); + else + { Zero/Equality, Sign, their complements and all of the + signed comparisons do not need to be converted }; + end; + hp2 := hp1; + end; + + { Convert the instruction to a TEST } + + taicpu(p).opcode := A_TEST; + taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg); + Result := True; + Exit; + end + else if (taicpu(p).oper[0]^.val = 1) and + GetNextInstruction(p, hp1) and + MatchInstruction(hp1,A_Jcc,A_SETcc,[]) and + (taicpu(hp1).condition in [C_L, C_NGE]) then + begin + { Convert; To: + cmp $1,r/m cmp $0,r/m + jl @lbl jle @lbl + } + DebugMsg(SPeepholeOptimization + 'Cmp1Jl2Cmp0Jle', p); + taicpu(p).oper[0]^.val := 0; + taicpu(hp1).condition := C_LE; + + { If the instruction is now "cmp $0,%reg", convert it to a + TEST (and effectively do the work of the "cmp $0,%reg" in + the block above) + + If it's a reference, we can get away with not setting + Result to True because he haven't evaluated the jump + in this pass yet. + } + if (taicpu(p).oper[1]^.typ = top_reg) then + begin + taicpu(p).opcode := A_TEST; + taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg); + Result := True; + end; + + Exit; + end + else if (taicpu(p).oper[1]^.typ = top_reg) then + begin + { cmp register,$8000 neg register + je target --> jo target + + .... only if register is deallocated before jump.} + case Taicpu(p).opsize of + S_B: v:=$80; + S_W: v:=$8000; + S_L: v:=qword($80000000); + { S_Q will never happen: cmp with 64 bit constants is not possible } + S_Q: + Exit; + else + internalerror(2013112905); + end; + + if (taicpu(p).oper[0]^.val=v) and + GetNextInstruction(p, hp1) and + MatchInstruction(hp1,A_Jcc,A_SETcc,[]) and + (Taicpu(hp1).condition in [C_E,C_NE]) then + begin + TransferUsedRegs(TmpUsedRegs); + UpdateUsedRegs(TmpUsedRegs,tai(p.next)); + if not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, TmpUsedRegs)) then + begin + DebugMsg(SPeepholeOptimization + 'CmpJe2NegJo done',p); + Taicpu(p).opcode:=A_NEG; + Taicpu(p).loadoper(0,Taicpu(p).oper[1]^); + Taicpu(p).clearop(1); + Taicpu(p).ops:=1; + if Taicpu(hp1).condition=C_E then + Taicpu(hp1).condition:=C_O + else + Taicpu(hp1).condition:=C_NO; + Result:=true; + exit; + end; + end; end; end; end;