From cd3f064a3318f9a23c0437234c3c1852c46ab364 Mon Sep 17 00:00:00 2001
From: Jonas Maebe <jonas@freepascal.org>
Date: Wed, 12 Oct 2005 19:47:21 +0000
Subject: [PATCH]   + enabled postpeepholeopts phase   + optimize "integer op"
 followed by comparison of target register with zero     to a variant of that
 integer op which sets the flags (ppc)   + change rlwinm. instructions which
 do nothing but an "and" operation into     andi./andis., since the rlwinm. is
 cracked on the G5 while andi./andis.     isn't

git-svn-id: trunk@1361 -
---
 compiler/aopt.pas            |   4 +-
 compiler/aoptobj.pas         |  19 +++++
 compiler/powerpc/aoptcpu.pas | 138 ++++++++++++++++++++++++++++++++++-
 3 files changed, 158 insertions(+), 3 deletions(-)

diff --git a/compiler/aopt.pas b/compiler/aopt.pas
index f0d0c6ea66..c3d9e6ec0a 100644
--- a/compiler/aopt.pas
+++ b/compiler/aopt.pas
@@ -219,7 +219,9 @@ Unit aopt;
               End;
             { more peephole optimizations }
       {      PeepHoleOptPass2;}
-            { free memory�}
+            { if pass = last_pass then }
+            PostPeepHoleOpts;
+            { free memory }
             clear;
             { continue where we left off, BlockEnd is either the start of an }
             { assembler block or nil}
diff --git a/compiler/aoptobj.pas b/compiler/aoptobj.pas
index f5f5186f1d..49a1698a0e 100644
--- a/compiler/aoptobj.pas
+++ b/compiler/aoptobj.pas
@@ -298,6 +298,7 @@ Unit AoptObj;
         { processor dependent methods }
         // if it returns true, perform a "continue"
         function PeepHoleOptPass1Cpu(var p: tai): boolean; virtual;
+        function PostPeepHoleOptsCpu(var p: tai): boolean; virtual;
       End;
 
        Function ArrayRefsEq(const r1, r2: TReference): Boolean;
@@ -1094,7 +1095,19 @@ Unit AoptObj;
 
 
     procedure TAOptObj.PostPeepHoleOpts;
+      var
+        p: tai;
       begin
+        p := BlockStart;
+        //!!!! UsedRegs := [];
+        while (p <> BlockEnd) Do
+          begin
+            //!!!! UpDateUsedRegs(UsedRegs, tai(p.next));
+            if PostPeepHoleOptsCpu(p) then
+              continue;
+            //!!!!!!!! updateUsedRegs(UsedRegs,p);
+            p:=tai(p.next);
+          end;
       end;
 
 
@@ -1103,4 +1116,10 @@ Unit AoptObj;
         result := false;
       end;
 
+
+    function TAOptObj.PostPeepHoleOptsCpu(var p: tai): boolean;
+      begin
+        result := false;
+      end;
+
 End.
diff --git a/compiler/powerpc/aoptcpu.pas b/compiler/powerpc/aoptcpu.pas
index 98825b5a59..00b5463f60 100644
--- a/compiler/powerpc/aoptcpu.pas
+++ b/compiler/powerpc/aoptcpu.pas
@@ -34,12 +34,15 @@ Type
   TCpuAsmOptimizer = class(TAsmOptimizer)
     { uses the same constructor as TAopObj }
     function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
+
+    function PostPeepHoleOptsCpu(var p: tai): boolean; override;
+
   End;
 
 Implementation
 
   uses
-    cutils, aasmcpu;
+    cutils, aasmcpu, cgbase;
 
   function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
     var
@@ -146,7 +149,138 @@ Implementation
       end;
     end;
 
+
+  const
+    modifyflags: array[tasmop] of tasmop =
+      (a_none, a_add_, a_add_, a_addo_, a_addo_, a_addc_, a_addc_, a_addco_, a_addco_,
+      a_adde_, a_adde_, a_addeo_, a_addeo_, {a_addi could be addic_ if sure doesn't disturb carry} a_none, a_addic_, a_addic_, a_none,
+      a_addme_, a_addme_, a_addmeo_, a_addmeo_, a_addze_, a_addze_, a_addzeo_,
+      a_addzeo_, a_and_, a_and_, a_andc_, a_andc_, a_andi_, a_andis_, a_none,
+      a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
+      a_none, a_none, a_none, a_none, a_none, a_cntlzw_, a_cntlzw_, a_none,
+      a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
+      a_none, a_none, a_none, a_none, a_none, a_none, a_divw_, a_divw_, a_divwo_, a_divwo_,
+      a_divwu_, a_divwu_, a_divwuo_, a_divwuo_, a_none, a_none, a_none, a_eqv_,
+      a_eqv_, a_extsb_, a_extsb_, a_extsh_, a_extsh_, a_none, a_none, a_none,
+      a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
+      a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
+      a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
+      a_none, a_none, a_none, a_none, a_none, a_none, a_none,
+      a_none, a_none, a_none, a_none, a_none, a_none,
+      a_none, a_none, a_none, a_none, a_none, a_none, a_none,
+      a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
+      a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
+      a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
+      a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
+      a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
+      a_none, a_none, a_none, a_mffs, a_mffs_, a_mfmsr, a_mfspr, a_mfsr,
+      a_mfsrin, a_mftb, a_mtcrf, a_none, a_none, a_none, a_none,
+      a_none, a_none, a_none, a_none, a_none, a_none, a_mulhw_,
+      a_mulhw_, a_mulhwu_, a_mulhwu_, a_none, a_mullw_, a_mullw_, a_mullwo_,
+      a_mullwo_, a_nand_, a_nand_, a_neg_, a_neg_, a_nego_, a_nego_, a_nor_, a_nor_,
+      a_or_, a_or_, a_orc_, a_orc_, a_none, a_none, a_none, a_rlwimi_, a_rlwimi_,
+      a_rlwinm_, a_rlwinm_, a_rlwnm_, a_rlwnm_, a_none, a_slw_, a_slw_, a_sraw_, a_sraw_,
+      a_srawi_, a_srawi_,a_srw_, a_srw_, a_none, a_none, a_none, a_none, a_none,
+      a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
+      a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
+      a_none, a_none, a_none, a_none, a_none, a_subf_, a_subf_, a_subfo_,
+      a_subfo_, a_subfc_, a_subfc_, a_subfco_, a_subfco_, a_subfe_, a_subfe_,
+      a_subfeo_, a_subfeo_, a_none, a_subfme_, a_subfme_, a_subfmeo_, a_subfmeo_,
+      a_subfze_, a_subfze_, a_subfzeo_, a_subfzeo_, a_none, a_none, a_none,
+      a_none, a_none, a_none, a_xor_, a_xor_, a_none, a_none,
+      { simplified mnemonics }
+      a_none, a_none, a_subic_, a_subic_, a_sub_, a_sub_, a_subo_, a_subo_,
+      a_subc_, a_subc_, a_subco_, a_subco_, a_none, a_none, a_none, a_none,
+      a_extlwi_, a_extlwi_, a_extrwi_, a_extrwi_, a_inslwi_, a_inslwi_, a_insrwi_,
+      a_insrwi_, a_rotlwi_, a_rotlwi_, a_rotlw_, a_rotlw_, a_slwi_, a_slwi_,
+      a_srwi_, a_srwi_, a_clrlwi_, a_clrlwi_, a_clrrwi_, a_clrrwi_, a_clrslwi_,
+      a_clrslwi_, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
+      a_none, a_none {move to special prupose reg}, a_none {move from special purpose reg},
+      a_none, a_none, a_none, a_none, a_mr_, a_mr_, a_not_, a_not_, a_none, a_none, a_none,
+      a_none, a_none);
+
+  function changetomodifyflags(p: taicpu): boolean;
+    begin
+      result := false;
+      if (modifyflags[p.opcode] <> a_none) then
+        begin
+          p.opcode := modifyflags[p.opcode];
+          result := true;
+        end;
+    end;
+
+
+
+  function TCpuAsmOptimizer.PostPeepHoleOptsCpu(var p: tai): boolean;
+    var
+      next1: tai;
+    begin
+      result := false;
+      case p.typ of
+        ait_instruction:
+          begin
+            case taicpu(p).opcode of
+              A_RLWINM_:
+                begin
+                  // rlwinm_ is cracked on the G5, andi_/andis_ aren't
+                  if (taicpu(p).oper[2]^.val = 0) then
+                    if (taicpu(p).oper[3]^.val < 16) and
+                       (taicpu(p).oper[4]^.val < 16) then
+                      begin
+                        taicpu(p).opcode := A_ANDIS_;
+                        taicpu(p).oper[2]^.val :=
+                          ((1 shl (16-taicpu(p).oper[3]^.val)) - 1) and
+                          not((1 shl (15-taicpu(p).oper[4]^.val)) - 1);
+                        taicpu(p).clearop(3);
+                        taicpu(p).clearop(4);
+                        taicpu(p).ops := 3;
+                        taicpu(p).opercnt := 2;
+                      end
+                    else if (taicpu(p).oper[3]^.val >= 16) and
+                       (taicpu(p).oper[4]^.val >= 16) then
+                      begin
+                        taicpu(p).opcode := A_ANDI_;
+                        taicpu(p).oper[2]^.val :=
+                          ((1 shl (32-taicpu(p).oper[3]^.val)) - 1) and
+                          not((1 shl (31-taicpu(p).oper[4]^.val)) - 1);
+                        taicpu(p).clearop(3);
+                        taicpu(p).clearop(4);
+                        taicpu(p).ops := 3;
+                        taicpu(p).opercnt := 2;
+                      end;
+                end;
+            end;
+
+            // change "integer operation with destination reg" followed by a
+            // comparison to zero of that reg, with a variant of that integer
+            // operation which sets the flags (if it exists)
+            if not(result) and
+               (taicpu(p).ops >= 2) and
+               (taicpu(p).oper[0]^.typ = top_reg) and
+               (taicpu(p).oper[1]^.typ = top_reg) and
+               getnextinstruction(p,next1) and
+               (next1.typ = ait_instruction) and
+               ((taicpu(next1).opcode = A_CMPWI) or
+                (taicpu(next1).opcode = A_CMPLWI)) and
+               // make sure it the result goes to cr0
+               (((taicpu(next1).ops = 2) and
+                 (taicpu(next1).oper[1]^.val = 0) and
+                 (taicpu(next1).oper[0]^.reg = taicpu(p).oper[0]^.reg)) or
+                ((taicpu(next1).ops = 3) and
+                 (taicpu(next1).oper[2]^.val = 0) and
+                 (taicpu(next1).oper[0]^.typ = top_reg) and
+                 (getsupreg(taicpu(next1).oper[0]^.reg) = RS_CR0) and
+                 (taicpu(next1).oper[1]^.reg = taicpu(p).oper[0]^.reg))) and
+               changetomodifyflags(taicpu(p)) then
+              begin
+                asml.remove(next1);
+                next1.free;
+                result := true;
+              end;
+          end;
+      end;
+    end;
+
 begin
   casmoptimizer:=TCpuAsmOptimizer;
 End.
-