From 116c861af6c13f6d3d0d0d91b6dc4e0c1eb88550 Mon Sep 17 00:00:00 2001
From: "J. Gareth \"Curious Kit\" Moreton" <gareth@moreton-family.com>
Date: Mon, 27 Dec 2021 16:18:13 +0000
Subject: [PATCH] MOV/CMP optimisation is now in both Pass 1 and Pass 2 to
 catch more eventualities

---
 compiler/x86/aoptx86.pas | 123 +++++++++++++++++++++++----------------
 1 file changed, 74 insertions(+), 49 deletions(-)

diff --git a/compiler/x86/aoptx86.pas b/compiler/x86/aoptx86.pas
index 39967d8103..85a3791c66 100644
--- a/compiler/x86/aoptx86.pas
+++ b/compiler/x86/aoptx86.pas
@@ -137,6 +137,7 @@ unit aoptx86;
         procedure RemoveLastDeallocForFuncRes(p : tai);
 
         function DoSubAddOpt(var p : tai) : Boolean;
+        function DoMovCmpMemOpt(var p : tai; const hp1: tai; UpdateTmpUsedRegs: Boolean) : Boolean;
 
         function PrePeepholeOptSxx(var p : tai) : boolean;
         function PrePeepholeOptIMUL(var p : tai) : boolean;
@@ -3878,50 +3879,10 @@ unit aoptx86;
                 Exit;
               end;
 
-            if MatchOpType(taicpu(p),top_ref,top_reg) and
-              { The x86 assemblers have difficulty comparing values against absolute addresses }
-              (taicpu(p).oper[0]^.ref^.refaddr in [addr_no, addr_pic, addr_pic_no_got]) and
-              (taicpu(hp1).oper[0]^.typ <> top_ref) and
-              MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^.reg) and
-              (
-                (
-                  (taicpu(hp1).opcode = A_TEST)
-                ) or (
-                  (taicpu(hp1).opcode = A_CMP) and
-                  { A sanity check more than anything }
-                  not MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^.reg)
-                )
-              ) then
+            if DoMovCmpMemOpt(p, hp1, True) then
               begin
-                { change
-                    mov      mem, %reg
-                    cmp/test x,   %reg / test %reg,%reg
-                    (reg deallocated)
-
-                    to
-
-                    cmp/test x,   mem  / cmp  0,   mem
-                }
-                TransferUsedRegs(TmpUsedRegs);
-                UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
-                if not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs) then
-                  begin
-                    { Convert test %reg,%reg or test $-1,%reg to cmp $0,mem }
-                    if (taicpu(hp1).opcode = A_TEST) and
-                      (
-                        MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^.reg) or
-                        MatchOperand(taicpu(hp1).oper[0]^, -1)
-                      ) then
-                      begin
-                        taicpu(hp1).opcode := A_CMP;
-                        taicpu(hp1).loadconst(0, 0);
-                      end;
-                    taicpu(hp1).loadref(1, taicpu(p).oper[0]^.ref^);
-                    DebugMsg(SPeepholeOptimization + 'MOV/CMP -> CMP (memory check)', p);
-                    RemoveCurrentP(p, hp1);
-                    Result := True;
-                    Exit;
-                  end;
+                Result := True;
+                Exit;
               end;
           end;
 
@@ -5226,6 +5187,59 @@ unit aoptx86;
       end;
 
 
+    function TX86AsmOptimizer.DoMovCmpMemOpt(var p : tai; const hp1: tai; UpdateTmpUsedRegs: Boolean) : Boolean;
+      begin
+        Result := False;
+        if UpdateTmpUsedRegs then
+          TransferUsedRegs(TmpUsedRegs);
+
+        if MatchOpType(taicpu(p),top_ref,top_reg) and
+          { The x86 assemblers have difficulty comparing values against absolute addresses }
+          (taicpu(p).oper[0]^.ref^.refaddr <> addr_full) and
+          (taicpu(hp1).oper[0]^.typ <> top_ref) and
+          MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^.reg) and
+          (
+            (
+              (taicpu(hp1).opcode = A_TEST)
+            ) or (
+              (taicpu(hp1).opcode = A_CMP) and
+              { A sanity check more than anything }
+              not MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^.reg)
+            )
+          ) then
+          begin
+            { change
+                mov      mem, %reg
+                cmp/test x,   %reg / test %reg,%reg
+                (reg deallocated)
+
+                to
+
+                cmp/test x,   mem  / cmp  0,   mem
+            }
+            UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
+            if not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs) then
+              begin
+                { Convert test %reg,%reg or test $-1,%reg to cmp $0,mem }
+                if (taicpu(hp1).opcode = A_TEST) and
+                  (
+                    MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^.reg) or
+                    MatchOperand(taicpu(hp1).oper[0]^, -1)
+                  ) then
+                  begin
+                    taicpu(hp1).opcode := A_CMP;
+                    taicpu(hp1).loadconst(0, 0);
+                  end;
+                taicpu(hp1).loadref(1, taicpu(p).oper[0]^.ref^);
+                DebugMsg(SPeepholeOptimization + 'MOV/CMP -> CMP (memory check)', p);
+                RemoveCurrentP(p, hp1);
+                Result := True;
+                Exit;
+              end;
+          end;
+      end;
+
+
     function TX86AsmOptimizer.OptPass1Sub(var p : tai) : boolean;
       var
         hp1, hp2: tai;
@@ -6113,6 +6127,7 @@ unit aoptx86;
 
                    end;
                end;
+
              { Search for:
                  cmp   ###,###
                  j(c1) @lbl1
@@ -6120,16 +6135,16 @@ unit aoptx86;
 
                Remove second cmp
              }
-
              if GetNextInstruction(p_jump, hp2) and
                (
                  (
-                   MatchInstruction(hp2, A_CMP, []) and
+                   MatchInstruction(hp2, A_CMP, [taicpu(p).opsize]) and
                    (
                      (
                        MatchOpType(taicpu(p), top_const, top_reg) and
+                       MatchOpType(taicpu(hp2), top_const, top_reg) and
                        (taicpu(hp2).oper[0]^.val = taicpu(p).oper[0]^.val) and
-                       SuperRegistersEqual(taicpu(p).oper[1]^.reg, taicpu(hp2).oper[1]^.reg)
+                       Reg1WriteOverwritesReg2Entirely(taicpu(hp2).oper[1]^.reg, taicpu(p).oper[1]^.reg)
                      ) or (
                        MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^) and
                        MatchOperand(taicpu(hp2).oper[1]^, taicpu(p).oper[1]^)
@@ -6142,7 +6157,7 @@ unit aoptx86;
                    MatchInstruction(hp2, A_TEST, []) and
                    MatchOpType(taicpu(hp2), top_reg, top_reg) and
                    (taicpu(hp2).oper[0]^.reg = taicpu(hp2).oper[1]^.reg) and
-                   SuperRegistersEqual(taicpu(p).oper[1]^.reg, taicpu(hp2).oper[1]^.reg)
+                   Reg1WriteOverwritesReg2Entirely(taicpu(hp2).oper[1]^.reg, taicpu(p).oper[1]^.reg)
                  )
                ) then
                begin
@@ -6175,6 +6190,10 @@ unit aoptx86;
            a conditional branch.
          }
          if not (cs_opt_size in current_settings.optimizerswitches) and
+           (
+             (hp1 = p_jump) or
+             GetNextInstruction(p, hp1)
+           ) and
            MatchInstruction(hp1, A_Jcc, []) and
            IsJumpToLabel(taicpu(hp1)) and
            (taicpu(hp1).condition in [C_E, C_Z]) and
@@ -6182,7 +6201,7 @@ unit aoptx86;
            MatchInstruction(hp2, A_CMP, A_TEST, [taicpu(p).opsize]) and
            MatchOperand(taicpu(p).oper[1]^, taicpu(hp2).oper[1]^) and
            { The first operand of CMP instructions can only be a register or
-             operand anyway, so no need to check }
+             immediate anyway, so no need to check }
            GetNextInstruction(hp2, p_label) and
            (p_label.typ = ait_label) and
            (tai_label(p_label).labsym.getrefs = 1) and
@@ -7516,7 +7535,13 @@ unit aoptx86;
         if not GetNextInstruction(p, hp1) then
           Exit;
 
-        if MatchInstruction(hp1, A_JMP, [S_NO]) then
+        if MatchInstruction(hp1, A_CMP, A_TEST, [taicpu(p).opsize])
+          and DoMovCmpMemOpt(p, hp1, True) then
+          begin
+            Result := True;
+            Exit;
+          end
+        else if MatchInstruction(hp1, A_JMP, [S_NO]) then
           begin
             { Sometimes the MOVs that OptPass2JMP produces can be improved
               further, but we can't just put this jump optimisation in pass 1