* x86: Improvement to CMP/JE/CMP/@Lbl/SETE ->

CMP/SETE/CMP/SETE/OR optimisation to reduce
    pass counts (and work better with improved
    FLAGS register allocation).
This commit is contained in:
J. Gareth "Curious Kit" Moreton 2022-04-22 01:10:24 +01:00 committed by FPK
parent 1fd2d3a83d
commit 62d5ddffb8

View File

@ -6829,29 +6829,45 @@ unit aoptx86;
{ The first operand of CMP instructions can only be a register or { The first operand of CMP instructions can only be a register or
immediate anyway, so no need to check } immediate anyway, so no need to check }
GetNextInstruction(hp2, p_label) and GetNextInstruction(hp2, p_label) and
(
(p_label.typ = ait_label) or
(
{ Sometimes there's a zero-distance jump before the label, so deal with it here
to potentially cut down on the iterations of Pass 1 }
MatchInstruction(p_label, A_Jcc, []) and
IsJumpToLabel(taicpu(p_label)) and
{ Use p_dist to hold the jump briefly }
SetAndTest(p_label, p_dist) and
GetNextInstruction(p_dist, p_label) and
(p_label.typ = ait_label) and (p_label.typ = ait_label) and
(tai_label(p_label).labsym.getrefs >= 2) and
(JumpTargetOp(taicpu(p_dist))^.ref^.symbol = tai_label(p_label).labsym) and
{ We might as well collapse the jump now }
CollapseZeroDistJump(p_dist, tai_label(p_label).labsym)
)
) and
(tai_label(p_label).labsym.getrefs = 1) and (tai_label(p_label).labsym.getrefs = 1) and
(JumpTargetOp(taicpu(hp1))^.ref^.symbol = tai_label(p_label).labsym) and (JumpTargetOp(taicpu(hp1))^.ref^.symbol = tai_label(p_label).labsym) and
GetNextInstruction(p_label, p_dist) and GetNextInstruction(p_label, p_dist) and
MatchInstruction(p_dist, A_SETcc, []) and MatchInstruction(p_dist, A_SETcc, []) and
(taicpu(p_dist).condition in [C_E, C_Z]) and (taicpu(p_dist).condition in [C_E, C_Z]) and
(taicpu(p_dist).oper[0]^.typ = top_reg) then (taicpu(p_dist).oper[0]^.typ = top_reg) and
{ Get the instruction after the SETcc instruction so we can
allocate a new register over the entire range }
GetNextInstruction(p_dist, hp1_dist) then
begin begin
TransferUsedRegs(TmpUsedRegs); TransferUsedRegs(TmpUsedRegs);
UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next)); UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next)); UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next));
UpdateUsedRegs(TmpUsedRegs, tai(p_label.Next)); UpdateUsedRegs(TmpUsedRegs, tai(p_label.Next));
UpdateUsedRegs(TmpUsedRegs, tai(p_dist.Next)); // UpdateUsedRegs(TmpUsedRegs, tai(p_dist.Next));
if not RegInUsedRegs(NR_DEFAULTFLAGS, TmpUsedRegs) and
{ Get the instruction after the SETcc instruction so we can
allocate a new register over the entire range }
GetNextInstruction(p_dist, hp1_dist) then
begin
{ Register can appear in p if it's not used afterwards, so only { Register can appear in p if it's not used afterwards, so only
allocate between hp1 and hp1_dist } allocate between hp1 and hp1_dist }
NewReg := GetIntRegisterBetween(R_SUBL, TmpUsedRegs, hp1, hp1_dist); NewReg := GetIntRegisterBetween(R_SUBL, TmpUsedRegs, hp1, hp1_dist);
if NewReg <> NR_NO then if (NewReg <> NR_NO) and
{ RegUsedAfterInstruction modifies TmpUsedRegs }
not RegUsedAfterInstruction(NR_DEFAULTFLAGS, p_dist, TmpUsedRegs) then
begin begin
DebugMsg(SPeepholeOptimization + 'CMP/JE/CMP/@Lbl/SETE -> CMP/SETE/CMP/SETE/OR, removing conditional branch', p); DebugMsg(SPeepholeOptimization + 'CMP/JE/CMP/@Lbl/SETE -> CMP/SETE/CMP/SETE/OR, removing conditional branch', p);
@ -6863,12 +6879,16 @@ unit aoptx86;
{ This is now a dead label } { This is now a dead label }
tai_label(p_label).labsym.decrefs; tai_label(p_label).labsym.decrefs;
{ Prefer adding before the next instruction so the FLAGS hp2 := taicpu.op_reg_reg(A_OR, S_B, NewReg, taicpu(p_dist).oper[0]^.reg);
register is deallicated first }
AsmL.InsertBefore( { Try to add the instruction right after the flags get deallocated, since
taicpu.op_reg_reg(A_OR, S_B, NewReg, taicpu(p_dist).oper[0]^.reg), the flags may become allocated again before the next instruction
hp1_dist (reuse p_dist, not hp1, since that needs to remain as the
); instruction immediately after p) }
if SetAndTest(FindRegDealloc(NR_DEFAULTFLAGS, tai(p_dist.Next)), p_dist) then
AsmL.InsertAfter(hp2, p_dist)
else
AsmL.InsertBefore(hp2, hp1_dist);
Result := True; Result := True;
{ Don't exit yet, as p wasn't changed and hp1, while { Don't exit yet, as p wasn't changed and hp1, while
@ -6876,7 +6896,6 @@ unit aoptx86;
SETcc optimisation below } SETcc optimisation below }
end; end;
end; end;
end;
if taicpu(p).oper[0]^.typ = top_const then if taicpu(p).oper[0]^.typ = top_const then
begin begin