mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-08-18 01:09:06 +02:00
* x86: Improvement to CMP/JE/CMP/@Lbl/SETE ->
CMP/SETE/CMP/SETE/OR optimisation to reduce pass counts (and work better with improved FLAGS register allocation).
This commit is contained in:
parent
1fd2d3a83d
commit
62d5ddffb8
@ -6829,29 +6829,45 @@ unit aoptx86;
|
|||||||
{ The first operand of CMP instructions can only be a register or
|
{ The first operand of CMP instructions can only be a register or
|
||||||
immediate anyway, so no need to check }
|
immediate anyway, so no need to check }
|
||||||
GetNextInstruction(hp2, p_label) and
|
GetNextInstruction(hp2, p_label) and
|
||||||
|
(
|
||||||
|
(p_label.typ = ait_label) or
|
||||||
|
(
|
||||||
|
{ Sometimes there's a zero-distance jump before the label, so deal with it here
|
||||||
|
to potentially cut down on the iterations of Pass 1 }
|
||||||
|
MatchInstruction(p_label, A_Jcc, []) and
|
||||||
|
IsJumpToLabel(taicpu(p_label)) and
|
||||||
|
{ Use p_dist to hold the jump briefly }
|
||||||
|
SetAndTest(p_label, p_dist) and
|
||||||
|
GetNextInstruction(p_dist, p_label) and
|
||||||
(p_label.typ = ait_label) and
|
(p_label.typ = ait_label) and
|
||||||
|
(tai_label(p_label).labsym.getrefs >= 2) and
|
||||||
|
(JumpTargetOp(taicpu(p_dist))^.ref^.symbol = tai_label(p_label).labsym) and
|
||||||
|
{ We might as well collapse the jump now }
|
||||||
|
CollapseZeroDistJump(p_dist, tai_label(p_label).labsym)
|
||||||
|
)
|
||||||
|
) and
|
||||||
(tai_label(p_label).labsym.getrefs = 1) and
|
(tai_label(p_label).labsym.getrefs = 1) and
|
||||||
(JumpTargetOp(taicpu(hp1))^.ref^.symbol = tai_label(p_label).labsym) and
|
(JumpTargetOp(taicpu(hp1))^.ref^.symbol = tai_label(p_label).labsym) and
|
||||||
GetNextInstruction(p_label, p_dist) and
|
GetNextInstruction(p_label, p_dist) and
|
||||||
MatchInstruction(p_dist, A_SETcc, []) and
|
MatchInstruction(p_dist, A_SETcc, []) and
|
||||||
(taicpu(p_dist).condition in [C_E, C_Z]) and
|
(taicpu(p_dist).condition in [C_E, C_Z]) and
|
||||||
(taicpu(p_dist).oper[0]^.typ = top_reg) then
|
(taicpu(p_dist).oper[0]^.typ = top_reg) and
|
||||||
|
{ Get the instruction after the SETcc instruction so we can
|
||||||
|
allocate a new register over the entire range }
|
||||||
|
GetNextInstruction(p_dist, hp1_dist) then
|
||||||
begin
|
begin
|
||||||
TransferUsedRegs(TmpUsedRegs);
|
TransferUsedRegs(TmpUsedRegs);
|
||||||
UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
|
UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
|
||||||
UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next));
|
UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next));
|
||||||
UpdateUsedRegs(TmpUsedRegs, tai(p_label.Next));
|
UpdateUsedRegs(TmpUsedRegs, tai(p_label.Next));
|
||||||
UpdateUsedRegs(TmpUsedRegs, tai(p_dist.Next));
|
// UpdateUsedRegs(TmpUsedRegs, tai(p_dist.Next));
|
||||||
|
|
||||||
if not RegInUsedRegs(NR_DEFAULTFLAGS, TmpUsedRegs) and
|
|
||||||
{ Get the instruction after the SETcc instruction so we can
|
|
||||||
allocate a new register over the entire range }
|
|
||||||
GetNextInstruction(p_dist, hp1_dist) then
|
|
||||||
begin
|
|
||||||
{ Register can appear in p if it's not used afterwards, so only
|
{ Register can appear in p if it's not used afterwards, so only
|
||||||
allocate between hp1 and hp1_dist }
|
allocate between hp1 and hp1_dist }
|
||||||
NewReg := GetIntRegisterBetween(R_SUBL, TmpUsedRegs, hp1, hp1_dist);
|
NewReg := GetIntRegisterBetween(R_SUBL, TmpUsedRegs, hp1, hp1_dist);
|
||||||
if NewReg <> NR_NO then
|
if (NewReg <> NR_NO) and
|
||||||
|
{ RegUsedAfterInstruction modifies TmpUsedRegs }
|
||||||
|
not RegUsedAfterInstruction(NR_DEFAULTFLAGS, p_dist, TmpUsedRegs) then
|
||||||
begin
|
begin
|
||||||
DebugMsg(SPeepholeOptimization + 'CMP/JE/CMP/@Lbl/SETE -> CMP/SETE/CMP/SETE/OR, removing conditional branch', p);
|
DebugMsg(SPeepholeOptimization + 'CMP/JE/CMP/@Lbl/SETE -> CMP/SETE/CMP/SETE/OR, removing conditional branch', p);
|
||||||
|
|
||||||
@ -6863,12 +6879,16 @@ unit aoptx86;
|
|||||||
{ This is now a dead label }
|
{ This is now a dead label }
|
||||||
tai_label(p_label).labsym.decrefs;
|
tai_label(p_label).labsym.decrefs;
|
||||||
|
|
||||||
{ Prefer adding before the next instruction so the FLAGS
|
hp2 := taicpu.op_reg_reg(A_OR, S_B, NewReg, taicpu(p_dist).oper[0]^.reg);
|
||||||
register is deallicated first }
|
|
||||||
AsmL.InsertBefore(
|
{ Try to add the instruction right after the flags get deallocated, since
|
||||||
taicpu.op_reg_reg(A_OR, S_B, NewReg, taicpu(p_dist).oper[0]^.reg),
|
the flags may become allocated again before the next instruction
|
||||||
hp1_dist
|
(reuse p_dist, not hp1, since that needs to remain as the
|
||||||
);
|
instruction immediately after p) }
|
||||||
|
if SetAndTest(FindRegDealloc(NR_DEFAULTFLAGS, tai(p_dist.Next)), p_dist) then
|
||||||
|
AsmL.InsertAfter(hp2, p_dist)
|
||||||
|
else
|
||||||
|
AsmL.InsertBefore(hp2, hp1_dist);
|
||||||
|
|
||||||
Result := True;
|
Result := True;
|
||||||
{ Don't exit yet, as p wasn't changed and hp1, while
|
{ Don't exit yet, as p wasn't changed and hp1, while
|
||||||
@ -6876,7 +6896,6 @@ unit aoptx86;
|
|||||||
SETcc optimisation below }
|
SETcc optimisation below }
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
end;
|
|
||||||
|
|
||||||
if taicpu(p).oper[0]^.typ = top_const then
|
if taicpu(p).oper[0]^.typ = top_const then
|
||||||
begin
|
begin
|
||||||
|
Loading…
Reference in New Issue
Block a user