From 11b341cc97b521fce9a74405eed2718cbc226e00 Mon Sep 17 00:00:00 2001 From: "J. Gareth \"Curious Kit\" Moreton" Date: Thu, 14 Mar 2024 13:39:28 +0000 Subject: [PATCH] * x86: Added new OptPass1CMOVcc peephole optimisation routine to dust up min/max code --- compiler/i386/aoptcpu.pas | 2 ++ compiler/x86/aoptx86.pas | 58 +++++++++++++++++++++++++++++++++++++ compiler/x86/nx86inl.pas | 2 +- compiler/x86_64/aoptcpu.pas | 2 ++ 4 files changed, 63 insertions(+), 1 deletion(-) diff --git a/compiler/i386/aoptcpu.pas b/compiler/i386/aoptcpu.pas index 9dfff43c65..bb216d68b2 100644 --- a/compiler/i386/aoptcpu.pas +++ b/compiler/i386/aoptcpu.pas @@ -161,6 +161,8 @@ unit aoptcpu; Result:=OptPass1ADD(p); A_AND: Result:=OptPass1And(p); + A_CMOVcc: + Result:=OptPass1CMOVcc(p); A_IMUL: Result:=OptPass1Imul(p); A_CMP: diff --git a/compiler/x86/aoptx86.pas b/compiler/x86/aoptx86.pas index 125928e070..1acc46d02a 100644 --- a/compiler/x86/aoptx86.pas +++ b/compiler/x86/aoptx86.pas @@ -167,6 +167,7 @@ unit aoptx86; function OptPass1Test(var p: tai): boolean; function OptPass1Add(var p: tai): boolean; function OptPass1AND(var p : tai) : boolean; + function OptPass1CMOVcc(var p: tai): Boolean; function OptPass1_V_MOVAP(var p : tai) : boolean; function OptPass1VOP(var p : tai) : boolean; function OptPass1MOV(var p : tai) : boolean; @@ -2299,6 +2300,57 @@ unit aoptx86; end; + function TX86AsmOptimizer.OptPass1CMOVcc(var p: tai): Boolean; + var + hp1: tai; + operswap: poper; + begin + Result := False; + + { Optimise: + cmov(c) %reg1,%reg2 + mov %reg2,%reg1 + (%reg2 dealloc.) + + To: + cmov(~c) %reg2,%reg1 + } + if (taicpu(p).oper[0]^.typ = top_reg) then + while GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[1]^.reg) and + MatchInstruction(hp1, A_MOV, [taicpu(p).opsize]) and + MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^.reg) and + MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) do + begin + TransferUsedRegs(TmpUsedRegs); + UpdateUsedRegsBetween(TmpUsedRegs, p, hp1); + if not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs) then + begin + DebugMsg(SPeepholeOptimization + 'CMOV(c) %reg1,%reg2; MOV %reg2,%reg1 -> CMOV(~c) %reg2,%reg1 (CMovMov2CMov)', p); + + { Save time by swapping the pointers (they're both registers, so + we don't need to worry about reference counts) } + operswap := taicpu(p).oper[0]; + taicpu(p).oper[0] := taicpu(p).oper[1]; + taicpu(p).oper[1] := operswap; + + taicpu(p).condition := inverse_cond(taicpu(p).condition); + + RemoveInstruction(hp1); + + { It's still a CMOV, so we can look further ahead } + Include(OptsToCheck, aoc_ForceNewIteration); + + { But first, let's see if this will get optimised again + (probably won't happen, but best to be sure) } + Continue; + end; + + Break; + end; + + end; + + function TX86AsmOptimizer.OptPass1_V_MOVAP(var p : tai) : boolean; var hp1,hp2 : tai; @@ -9693,7 +9745,13 @@ unit aoptx86; hp1, hp2: tai; FoundComparison: Boolean; begin + { Run the pass 1 optimisations as well, since they may have some effect + after the CMOV blocks are created in OptPass2Jcc } Result := False; +{ Result := OptPass1CMOVcc(p); + if Result then + Exit;} + { Sometimes, the CMOV optimisations in OptPass2Jcc are a bit overzealous and make a slightly inefficent result on branching-type blocks, notably when setting a function result then jumping to the function epilogue. diff --git a/compiler/x86/nx86inl.pas b/compiler/x86/nx86inl.pas index 0aab517fb3..e9ce54e967 100644 --- a/compiler/x86/nx86inl.pas +++ b/compiler/x86/nx86inl.pas @@ -1591,7 +1591,7 @@ implementation {$endif i8086} var {$ifndef i8086} - memop, + memop : integer; gotmem : boolean; op: TAsmOp; {$endif i8086} diff --git a/compiler/x86_64/aoptcpu.pas b/compiler/x86_64/aoptcpu.pas index ae9e76d48a..ef6b055fdd 100644 --- a/compiler/x86_64/aoptcpu.pas +++ b/compiler/x86_64/aoptcpu.pas @@ -94,6 +94,8 @@ uses Result:=OptPass1ADD(p); A_AND: Result:=OptPass1AND(p); + A_CMOVcc: + Result:=OptPass1CMOVcc(p); A_IMUL: Result:=OptPass1Imul(p); A_MOV: