* x86: STC/CLC optimisations can now handle multiple Jcc instructions

This commit is contained in:
J. Gareth "Curious Kit" Moreton 2024-01-28 21:03:42 +00:00 committed by FPK
parent de03a1b894
commit 2055ef4e88

View File

@ -9412,9 +9412,9 @@ unit aoptx86;
function TX86AsmOptimizer.OptPass1STCCLC(var p: tai): Boolean; function TX86AsmOptimizer.OptPass1STCCLC(var p: tai): Boolean;
var var
hp1, hp2, p_dist, hp1_dist: tai; hp1, hp2, p_last, p_dist, hp1_dist: tai;
JumpLabel: TAsmLabel; JumpLabel: TAsmLabel;
Tmp: string; TmpBool: Boolean;
begin begin
Result := False; Result := False;
{ Look for: { Look for:
@ -9430,12 +9430,16 @@ unit aoptx86;
mov $0/$1,%reg (depending on if the carry bit is cleared or not) mov $0/$1,%reg (depending on if the carry bit is cleared or not)
j(c) .L2 j(c) .L2
} }
if not GetNextInstruction(p, hp1) then p_last := p;
Exit;
if (hp1.typ = ait_instruction) and while GetNextInstruction(p_last, hp1) and
IsJumpToLabel(taicpu(hp1)) then (hp1.typ = ait_instruction) and
IsJumpToLabel(taicpu(hp1)) do
begin begin
if DoJumpOptimizations(hp1, TmpBool) then
{ Re-evaluate from p_last. Probably could be faster, but it's guaranteed to be correct }
Continue;
JumpLabel := TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol); JumpLabel := TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol);
if not Assigned(JumpLabel) then if not Assigned(JumpLabel) then
InternalError(2024012801); InternalError(2024012801);
@ -9471,25 +9475,25 @@ unit aoptx86;
Asml.Remove(hp1); Asml.Remove(hp1);
Asml.InsertAfter(hp1, hp2); Asml.InsertAfter(hp1, hp2);
RemoveCurrentP(p, hp1); RemoveCurrentP(p); { hp1 may not be the immediate next instruction }
RemoveDeadCodeAfterJump(p); { Might as well do it now } Result := True;
Exit;
end end
else else
begin begin
JumpLabel.DecRefs;
if (taicpu(p).opcode = A_STC) then if (taicpu(p).opcode = A_STC) then
DebugMsg(SPeepholeOptimization + 'STC; JNC -> NOP (Deterministic jump) (StcJnc2Nop)', p) DebugMsg(SPeepholeOptimization + 'STC; JNC -> NOP (Deterministic jump) (StcJnc2Nop)', p)
else else
DebugMsg(SPeepholeOptimization + 'CLC; JC -> NOP (Deterministic jump) (ClcJc2Nop)', p); DebugMsg(SPeepholeOptimization + 'CLC; JC -> NOP (Deterministic jump) (ClcJc2Nop)', p);
{ In this case, the jump is deterministic in that it will never be taken } { In this case, the jump is deterministic in that it will never be taken }
RemoveCurrentP(p, tai(hp1.Next)); { hp1 will get removed too } JumpLabel.DecRefs;
RemoveInstruction(hp1); RemoveInstruction(hp1);
end;
Result := True; RemoveCurrentP(p); { hp1 may not have been the immediate next instruction }
Exit; Result := True;
Exit;
end;
end; end;
end; end;
@ -9504,36 +9508,42 @@ unit aoptx86;
begin begin
case taicpu(p_dist).opcode of case taicpu(p_dist).opcode of
A_Jcc: A_Jcc:
{ clc + jc = False; clc + jnc = True; stc + jc = True; stc + jnc = False } begin
if ((taicpu(p).opcode = A_STC) xor (taicpu(p_dist).condition = C_NC)) then if DoJumpOptimizations(p_dist, TmpBool) then
begin { Re-evaluate from p_last. Probably could be faster, but it's guaranteed to be correct }
DebugMsg(SPeepholeOptimization + 'STC/CLC; JMP/Jcc; ... J(N)C -> JMP/Jcc (StcClcJ(c)2Jmp)', p); Continue;
JumpLabel.decrefs; { clc + jc = False; clc + jnc = True; stc + jc = True; stc + jnc = False }
taicpu(hp1).loadsymbol(0, taicpu(p_dist).oper[0]^.ref^.symbol, 0); if ((taicpu(p).opcode = A_STC) xor (taicpu(p_dist).condition = C_NC)) then
begin
DebugMsg(SPeepholeOptimization + 'STC/CLC; JMP/Jcc; ... J(N)C -> JMP/Jcc (StcClcJ(c)2Jmp)', p);
RemoveCurrentP(p, hp1); JumpLabel.decrefs;
Result := True; taicpu(hp1).loadsymbol(0, taicpu(p_dist).oper[0]^.ref^.symbol, 0);
Exit;
end
else if GetNextInstruction(p_dist, hp1_dist) and
(hp1_dist.typ = ait_label) then
begin
DebugMsg(SPeepholeOptimization + 'STC/CLC; JMP/Jcc; ... J(N)C; .Lbl -> JMP/Jcc .Lbl (StcClcJ(~c)Lbl2Jmp)', p);
JumpLabel.decrefs; RemoveCurrentP(p); { hp1 may not be the immediate next instruction }
taicpu(hp1).loadsymbol(0, tai_label(hp1_dist).labsym, 0); Result := True;
Exit;
end
else if GetNextInstruction(p_dist, hp1_dist) and
(hp1_dist.typ = ait_label) then
begin
DebugMsg(SPeepholeOptimization + 'STC/CLC; JMP/Jcc; ... J(N)C; .Lbl -> JMP/Jcc .Lbl (StcClcJ(~c)Lbl2Jmp)', p);
RemoveCurrentP(p, hp1); JumpLabel.decrefs;
Result := True; taicpu(hp1).loadsymbol(0, tai_label(hp1_dist).labsym, 0);
Exit;
end; RemoveCurrentP(p); { hp1 may not be the immediate next instruction }
Result := True;
Exit;
end;
end;
A_SETcc: A_SETcc:
if { Make sure the flags aren't used again } if { Make sure the flags aren't used again }
SetAndTest(FindRegDealloc(NR_DEFAULTFLAGS, tai(p_dist.Next)), hp2) and SetAndTest(FindRegDealloc(NR_DEFAULTFLAGS, tai(p_dist.Next)), hp2) and
GetNextInstruction(hp2, hp1_dist) and GetNextInstruction(hp2, hp1_dist) and
(Hp1_dist.typ = ait_instruction) and (hp1_dist.typ = ait_instruction) and
IsJumpToLabel(taicpu(hp1_dist)) and IsJumpToLabel(taicpu(hp1_dist)) and
not (taicpu(hp1_dist).condition in [C_AE, C_NB, C_NC, C_B, C_C, C_NAE, C_BE, C_NA]) and not (taicpu(hp1_dist).condition in [C_AE, C_NB, C_NC, C_B, C_C, C_NAE, C_BE, C_NA]) and
{ This works if hp1_dist or both are regular JMP instructions } { This works if hp1_dist or both are regular JMP instructions }
@ -9557,11 +9567,11 @@ unit aoptx86;
taicpu(hp1).loadsymbol(0, taicpu(hp1_dist).oper[0]^.ref^.symbol, 0); taicpu(hp1).loadsymbol(0, taicpu(hp1_dist).oper[0]^.ref^.symbol, 0);
{ If a flag allocation is found, try to move it to after the MOV so "mov $0,%reg" gets optimised to "xor %reg,%reg" } { If a flag allocation is found, try to move it to after the MOV so "mov $0,%reg" gets optimised to "xor %reg,%reg" }
if SetAndTest(FindRegAllocBackward(NR_DEFAULTFLAGS, tai(p.Previous)), hp1) and if SetAndTest(FindRegAllocBackward(NR_DEFAULTFLAGS, tai(p.Previous)), hp2) and
(tai_regalloc(hp1).ratype = ra_alloc) then (tai_regalloc(hp2).ratype = ra_alloc) then
begin begin
Asml.Remove(hp1); Asml.Remove(hp2);
Asml.InsertAfter(hp1, p); Asml.InsertAfter(hp2, p);
end; end;
Result := True; Result := True;
@ -9571,6 +9581,8 @@ unit aoptx86;
; ;
end; end;
end; end;
p_last := hp1;
end; end;
end; end;