mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-05 17:08:01 +02:00
* x86: Added new OptPass1CMOVcc peephole optimisation routine to dust up min/max code
This commit is contained in:
parent
c79361c010
commit
11b341cc97
@ -161,6 +161,8 @@ unit aoptcpu;
|
||||
Result:=OptPass1ADD(p);
|
||||
A_AND:
|
||||
Result:=OptPass1And(p);
|
||||
A_CMOVcc:
|
||||
Result:=OptPass1CMOVcc(p);
|
||||
A_IMUL:
|
||||
Result:=OptPass1Imul(p);
|
||||
A_CMP:
|
||||
|
@ -167,6 +167,7 @@ unit aoptx86;
|
||||
function OptPass1Test(var p: tai): boolean;
|
||||
function OptPass1Add(var p: tai): boolean;
|
||||
function OptPass1AND(var p : tai) : boolean;
|
||||
function OptPass1CMOVcc(var p: tai): Boolean;
|
||||
function OptPass1_V_MOVAP(var p : tai) : boolean;
|
||||
function OptPass1VOP(var p : tai) : boolean;
|
||||
function OptPass1MOV(var p : tai) : boolean;
|
||||
@ -2299,6 +2300,57 @@ unit aoptx86;
|
||||
end;
|
||||
|
||||
|
||||
function TX86AsmOptimizer.OptPass1CMOVcc(var p: tai): Boolean;
|
||||
var
|
||||
hp1: tai;
|
||||
operswap: poper;
|
||||
begin
|
||||
Result := False;
|
||||
|
||||
{ Optimise:
|
||||
cmov(c) %reg1,%reg2
|
||||
mov %reg2,%reg1
|
||||
(%reg2 dealloc.)
|
||||
|
||||
To:
|
||||
cmov(~c) %reg2,%reg1
|
||||
}
|
||||
if (taicpu(p).oper[0]^.typ = top_reg) then
|
||||
while GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[1]^.reg) and
|
||||
MatchInstruction(hp1, A_MOV, [taicpu(p).opsize]) and
|
||||
MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^.reg) and
|
||||
MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) do
|
||||
begin
|
||||
TransferUsedRegs(TmpUsedRegs);
|
||||
UpdateUsedRegsBetween(TmpUsedRegs, p, hp1);
|
||||
if not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs) then
|
||||
begin
|
||||
DebugMsg(SPeepholeOptimization + 'CMOV(c) %reg1,%reg2; MOV %reg2,%reg1 -> CMOV(~c) %reg2,%reg1 (CMovMov2CMov)', p);
|
||||
|
||||
{ Save time by swapping the pointers (they're both registers, so
|
||||
we don't need to worry about reference counts) }
|
||||
operswap := taicpu(p).oper[0];
|
||||
taicpu(p).oper[0] := taicpu(p).oper[1];
|
||||
taicpu(p).oper[1] := operswap;
|
||||
|
||||
taicpu(p).condition := inverse_cond(taicpu(p).condition);
|
||||
|
||||
RemoveInstruction(hp1);
|
||||
|
||||
{ It's still a CMOV, so we can look further ahead }
|
||||
Include(OptsToCheck, aoc_ForceNewIteration);
|
||||
|
||||
{ But first, let's see if this will get optimised again
|
||||
(probably won't happen, but best to be sure) }
|
||||
Continue;
|
||||
end;
|
||||
|
||||
Break;
|
||||
end;
|
||||
|
||||
end;
|
||||
|
||||
|
||||
function TX86AsmOptimizer.OptPass1_V_MOVAP(var p : tai) : boolean;
|
||||
var
|
||||
hp1,hp2 : tai;
|
||||
@ -9693,7 +9745,13 @@ unit aoptx86;
|
||||
hp1, hp2: tai;
|
||||
FoundComparison: Boolean;
|
||||
begin
|
||||
{ Run the pass 1 optimisations as well, since they may have some effect
|
||||
after the CMOV blocks are created in OptPass2Jcc }
|
||||
Result := False;
|
||||
{ Result := OptPass1CMOVcc(p);
|
||||
if Result then
|
||||
Exit;}
|
||||
|
||||
{ Sometimes, the CMOV optimisations in OptPass2Jcc are a bit overzealous
|
||||
and make a slightly inefficent result on branching-type blocks, notably
|
||||
when setting a function result then jumping to the function epilogue.
|
||||
|
@ -1591,7 +1591,7 @@ implementation
|
||||
{$endif i8086}
|
||||
var
|
||||
{$ifndef i8086}
|
||||
memop,
|
||||
memop : integer;
|
||||
gotmem : boolean;
|
||||
op: TAsmOp;
|
||||
{$endif i8086}
|
||||
|
@ -94,6 +94,8 @@ uses
|
||||
Result:=OptPass1ADD(p);
|
||||
A_AND:
|
||||
Result:=OptPass1AND(p);
|
||||
A_CMOVcc:
|
||||
Result:=OptPass1CMOVcc(p);
|
||||
A_IMUL:
|
||||
Result:=OptPass1Imul(p);
|
||||
A_MOV:
|
||||
|
Loading…
Reference in New Issue
Block a user