* patch by J. Gareth Moreton to fix faulty conditional jump logic, resolves #38985

git-svn-id: trunk@49576 -
2025-08-15 07:09:18 +02:00 · 2021-07-03 21:05:27 +00:00 · 2021-07-03 21:05:27 +00:00 · 94a15faa7f
commit 94a15faa7f
parent b463d511bf
3 changed files with 36 additions and 38 deletions
--- a/compiler/aoptobj.pas
+++ b/compiler/aoptobj.pas
@ -2002,10 +2002,7 @@ Unit AoptObj;
 {$ifdef cpudelayslot}
                        RemoveDelaySlot(p);
 {$endif cpudelayslot}
-                        UpdateUsedRegs(tai(p.Next));
+                        RemoveCurrentP(p, hp1);
                        AsmL.Remove(p);
                        p.Free;
                        p := hp1;
                        Result := True;
                        Exit;
@ -2043,8 +2040,7 @@ Unit AoptObj;
 {$ifdef cpudelayslot}
                            RemoveDelaySlot(hp1);
 {$endif cpudelayslot}
-                            asml.remove(hp1);
+                            RemoveInstruction(hp1);
                            hp1.free;
                            stoploop := False;
@ -2081,7 +2077,7 @@ Unit AoptObj;
                else
                  begin
                    { Do not try to optimize if the test generating the condition
-                      is the same instruction, like 'bne	$v0,$zero,.Lj3' for MIPS }
+                      is the same instruction, like 'bne $v0,$zero,.Lj3' for MIPS }
                    if (taicpu(p).ops>1) or (taicpu(hp1).ops>1) then
                      exit;
@ -2089,7 +2085,8 @@ Unit AoptObj;
                        jmp<cond1>    @Lbl1
                        jmp<cond2>    @Lbl2
-                        Remove 2nd jump if conditions are equal or cond2 is fully covered by cond1
+                        Remove 2nd jump if conditions are equal or cond2 is a subset of cond1
                        (as if the first jump didn't branch, then neither will the 2nd)
                    }
                    if condition_in(taicpu(hp1).condition, taicpu(p).condition) then
@ -2098,9 +2095,10 @@ Unit AoptObj;
                        NCJLabel.decrefs;
                        GetNextInstruction(hp1, hp2);
-
+{$ifdef cpudelayslot}
-                        AsmL.Remove(hp1);
+                        RemoveDelaySlot(hp1);
-                        hp1.Free;
+{$endif cpudelayslot}
                        RemoveInstruction(hp1);
                        hp1 := hp2;
@ -2114,9 +2112,9 @@ Unit AoptObj;
                        jmp<cond1>  @Lbl1
                        jmp<cond2>  @Lbl2
-                      And inv(cond2) is a subset of cond1 (e.g. je followed by jne, or jae followed by jbe) )
+                      And inv(cond1) is a subset of cond2 (e.g. je followed by jne, or jae followed by jbe) )
                    }
-                    if condition_in(inverse_cond(taicpu(hp1).condition), taicpu(p).condition) then
+                    if condition_in(inverse_cond(taicpu(p).condition), taicpu(hp1).condition) then
                      begin
                        GetNextInstruction(hp1, hp2);
@ -2124,16 +2122,16 @@ Unit AoptObj;
                          the first jump completely }
                        if FindLabel(CJLabel, hp2) then
                          begin
                            { However, to be absolutely correct, cond2 must be changed to inv(cond1) }
                            taicpu(hp1).condition := inverse_cond(taicpu(p).condition);
                            DebugMsg(SPeepholeOptimization+'jmp<cond> before jmp<inv_cond> - removed first jump',p);
                            CJLabel.decrefs;
 {$ifdef cpudelayslot}
                            RemoveDelaySlot(p);
 {$endif cpudelayslot}
-                            UpdateUsedRegs(tai(p.Next));
+                            RemoveCurrentP(p, hp1);
                            AsmL.Remove(p);
                            p.Free;
                            p := hp1;
                            Result := True;
                            Exit;
@ -2146,7 +2144,7 @@ Unit AoptObj;
                            improve this particular optimisation to work on AVR,
                            please do. [Kit] }
                          begin
-                            { Since cond1 is a subset of inv(cond2), jmp<cond2> will always branch if
+                            { Since inv(cond1) is a subset of cond2, jmp<cond2> will always branch if
                              jmp<cond1> does not, so change jmp<cond2> to an unconditional jump. }
                            DebugMsg(SPeepholeOptimization+'jmp<cond> before jmp<inv_cond> - made second jump unconditional',p);
--- a/compiler/x86/aoptx86.pas
+++ b/compiler/x86/aoptx86.pas
@ -3406,7 +3406,7 @@ unit aoptx86;
            if (taicpu(p).oper[0]^.typ = top_reg) and
              (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
              MatchInstruction(hp1, A_JCC, []) and
-              (taicpu(hp1).oper[0]^.typ = top_ref) then
+              IsJumpToLabel(taicpu(hp1)) then
              begin
                JumpLabel := TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol);
                p_label := nil;
@ -3422,7 +3422,7 @@ unit aoptx86;
                  SuperRegistersEqual(taicpu(p_dist).oper[0]^.reg, taicpu(p).oper[0]^.reg) and
                  SuperRegistersEqual(taicpu(p_dist).oper[1]^.reg, taicpu(p).oper[1]^.reg) and
                  GetNextInstruction(p_dist, hp1_dist) and
-                  MatchInstruction(hp1_dist, A_JCC, []) then
+                  MatchInstruction(hp1_dist, A_JCC, []) then { This doesn't have to be an explicit label }
                  begin
                    JumpLabel_dist := TAsmLabel(taicpu(hp1_dist).oper[0]^.ref^.symbol);
@ -3430,9 +3430,10 @@ unit aoptx86;
                      { This is an infinite loop }
                      Exit;
-                    { Best optimisation when the second condition is a subset (or equal) to the first }
+                    { Best optimisation when the first condition is a subset (or equal) of the second }
-                    if condition_in(taicpu(hp1_dist).condition, taicpu(hp1).condition) then
+                    if condition_in(taicpu(hp1).condition, taicpu(hp1_dist).condition) then
                      begin
                        { Any registers used here will already be allocated }
                        if Assigned(JumpLabel_dist) then
                          JumpLabel_dist.IncRefs;
--- a/compiler/x86/cpubase.pas
+++ b/compiler/x86/cpubase.pas
@ -697,33 +697,32 @@ implementation
        if not Result then
          case Subset of
            C_A,  C_NBE:
-              Result := (c in [C_A,  C_AE, C_NB, C_NBE]);
+              Result := (c in [C_A,  C_AE, C_NB, C_NC, C_NBE,C_NE, C_NZ]);
-            C_AE, C_NB:
+            C_AE, C_NB, C_NC:
-              Result := (c in [C_AE, C_NB]);
+              { C_A  / C_NBE: CF = 0 and ZF = 0; not a subset because ZF has to be zero as well
-            C_B,  C_NAE:
+                C_AE / C_NB:  CF = 0 }
              Result := (c in [C_AE, C_NB, C_NC]);
            C_B,  C_C,  C_NAE:
              { C_B  / C_NAE: CF = 1
                C_BE / C_NA:  CF = 1 or ZF = 1 }
              Result := (c in [C_B,  C_BE, C_C,  C_NA, C_NAE]);
            C_BE, C_NA:
              Result := (c in [C_BE, C_NA]);
            C_C:
              { C_B  / C_NAE: CF = 1
                C_BE / C_NA:  CF = 1 or ZF = 1 }
              Result := (c in [C_B,  C_BE, C_NA, C_NAE]);
            C_E,  C_Z:
-              Result := (c in [C_AE, C_BE, C_E,  C_NA, C_NB, C_NG, C_NL]);
+              Result := (c in [C_AE, C_BE, C_E,  C_NA, C_NG, C_Z]);
            C_G,  C_NLE:
-              Result := (c in [C_G,  C_GE, C_NL, C_NLE]);
+              { Not-equal can be considered equivalent to less than or greater than }
              Result := (c in [C_G,  C_GE, C_NE, C_NL, C_NLE,C_NZ]);
            C_GE, C_NL:
              Result := (c in [C_GE, C_NL]);
            C_L,  C_NGE:
-              Result := (c in [C_L,  C_LE, C_NG, C_NGE]);
+              Result := (c in [C_L,  C_LE, C_NE, C_NG, C_NGE,C_NZ]);
            C_LE, C_NG:
              Result := (c in [C_LE, C_NG]);
            C_NC:
              { C_A  / C_NBE: CF = 0 and ZF = 0; not a subset because ZF has to be zero as well
                C_AE / C_NB:  CF = 0 }
              Result := (c in [C_AE, C_NB]);
            C_NE, C_NZ:
-              Result := (c in [C_NE, C_NZ, C_A,  C_B,  C_NAE,C_NBE,C_L,  C_G,  C_NLE,C_NGE]);
+              { Note that not equal is NOT a subset of greater/less than because
                not equal is less than OR greater than. Same with above and below }
              Result := (c in [C_NE, C_NZ]);
            C_NP, C_PO:
              Result := (c in [C_NP, C_PO]);
            C_P,  C_PE: