From 588cccb3cad9e43f2d0bc1683cfbe3b4ccd5e237 Mon Sep 17 00:00:00 2001
From: peter <peter@freepascal.org>
Date: Mon, 13 Mar 2006 16:37:08 +0000
Subject: [PATCH]   * remove some obsolete i386 specific code   * use
 a_optimize_const to remove useless opcodes

git-svn-id: trunk@2910 -
---
 compiler/cgbase.pas       |   3 +-
 compiler/cgobj.pas        |  72 +++++++-----
 compiler/i386/n386add.pas |   2 +
 compiler/i386/n386mat.pas | 235 ++++++++++++++++----------------------
 compiler/m68k/cgcpu.pas   |   8 +-
 compiler/ncgadd.pas       |  10 +-
 compiler/x86/cgx86.pas    | 146 ++++++-----------------
 compiler/x86/nx86add.pas  |  58 +---------
 8 files changed, 195 insertions(+), 339 deletions(-)

diff --git a/compiler/cgbase.pas b/compiler/cgbase.pas
index 9b2f580018..343ce3e930 100644
--- a/compiler/cgbase.pas
+++ b/compiler/cgbase.pas
@@ -81,6 +81,7 @@ interface
        topcg =
        (
           OP_NONE,
+          OP_MOVE,      { replaced operation with direct load }
           OP_ADD,       { simple addition          }
           OP_AND,       { simple logical and       }
           OP_DIV,       { simple unsigned division }
@@ -593,7 +594,7 @@ implementation
     function commutativeop(op: topcg): boolean;{$ifdef USEINLINE}inline;{$endif}
       const
         list: array[topcg] of boolean =
-          (true,true,true,false,false,true,true,false,false,
+          (true,false,true,true,false,false,true,true,false,false,
            true,false,false,false,false,true);
       begin
         commutativeop := list[op];
diff --git a/compiler/cgobj.pas b/compiler/cgobj.pas
index c0da9640c1..c57b0e0251 100644
--- a/compiler/cgobj.pas
+++ b/compiler/cgobj.pas
@@ -285,18 +285,16 @@ unit cgobj;
           procedure g_flags2ref(list: taasmoutput; size: TCgSize; const f: tresflags; const ref:TReference); virtual;
 
           {
-             This routine tries to optimize the const_reg opcode, and should be
-             called at the start of a_op_const_reg. It returns the actual opcode
-             to emit, and the constant value to emit. If this routine returns
-             TRUE, @var(no) instruction should be emitted (.eg : imul reg by 1 )
+             This routine tries to optimize the op_const_reg/ref opcode, and should be
+             called at the start of a_op_const_reg/ref. It returns the actual opcode
+             to emit, and the constant value to emit. This function can opcode OP_NONE to
+             remove the opcode and OP_MOVE to replace it with a simple load
 
              @param(op The opcode to emit, returns the opcode which must be emitted)
              @param(a  The constant which should be emitted, returns the constant which must
                     be emitted)
-             @param(reg The register to emit the opcode with, returns the register with
-                   which the opcode will be emitted)
           }
-          function optimize_op_const_reg(list: taasmoutput; var op: topcg; var a : aint; var reg: tregister): boolean;virtual;
+          procedure optimize_op_const(var op: topcg; var a : aint);virtual;
 
          {#
              This routine is used in exception management nodes. It should
@@ -936,55 +934,69 @@ implementation
       end;
 
 
-    function tcg.optimize_op_const_reg(list: taasmoutput; var op: topcg; var a : aint; var reg:tregister): boolean;
+    procedure tcg.optimize_op_const(var op: topcg; var a : aint);
       var
         powerval : longint;
       begin
-        optimize_op_const_reg := false;
         case op of
-          { or with zero returns same result }
-          OP_OR : if a = 0 then optimize_op_const_reg := true;
-          { and with max returns same result }
-          OP_AND : if (a = high(a)) then optimize_op_const_reg := true;
-          { division by 1 returns result }
+          OP_OR :
+            begin
+              { or with zero returns same result }
+              if a = 0 then
+                op:=OP_NONE
+              else
+              { or with max returns max }
+                if a = -1 then
+                  op:=OP_MOVE;
+            end;
+          OP_AND :
+            begin
+              { and with max returns same result }
+              if (a = -1) then
+                op:=OP_NONE
+              else
+              { and with 0 returns 0 }
+                if a=0 then
+                  op:=OP_MOVE;
+            end;
           OP_DIV :
             begin
+              { division by 1 returns result }
               if a = 1 then
-                optimize_op_const_reg := true
+                op:=OP_NONE
               else if ispowerof2(int64(a), powerval) then
                 begin
                   a := powerval;
                   op:= OP_SHR;
                 end;
-              exit;
             end;
           OP_IDIV:
             begin
               if a = 1 then
-                optimize_op_const_reg := true
-              else if ispowerof2(int64(a), powerval) then
-                begin
-                  a := powerval;
-                  op:= OP_SAR;
-                end;
-               exit;
+                op:=OP_NONE;
             end;
-        OP_MUL,OP_IMUL:
+         OP_MUL,OP_IMUL:
             begin
                if a = 1 then
-                  optimize_op_const_reg := true
+                 op:=OP_NONE
+               else
+                 if a=0 then
+                   op:=OP_MOVE
                else if ispowerof2(int64(a), powerval) then
                  begin
                    a := powerval;
                    op:= OP_SHL;
                  end;
-               exit;
+            end;
+        OP_ADD,OP_SUB:
+            begin
+               if a = 0 then
+                 op:=OP_NONE;
             end;
         OP_SAR,OP_SHL,OP_SHR:
            begin
               if a = 0 then
-                 optimize_op_const_reg := true;
-              exit;
+                op:=OP_NONE;
            end;
         end;
       end;
@@ -1729,7 +1741,7 @@ implementation
 {$endif}
                 if to_signed then
                   begin
-                    { calculation of the low/high ranges must not overflow 64 bit 
+                    { calculation of the low/high ranges must not overflow 64 bit
                      otherwise we end up comparing with zero for 64 bit data types on
                      64 bit processors }
                     if (lto = (int64(-1) << (tosize * 8 - 1))) and
@@ -1738,7 +1750,7 @@ implementation
                   end
                 else
                   begin
-                    { calculation of the low/high ranges must not overflow 64 bit 
+                    { calculation of the low/high ranges must not overflow 64 bit
                      otherwise we end up having all zeros for 64 bit data types on
                      64 bit processors }
                     if (lto = 0) and
diff --git a/compiler/i386/n386add.pas b/compiler/i386/n386add.pas
index 92e9ef29ea..aa13fbe8da 100644
--- a/compiler/i386/n386add.pas
+++ b/compiler/i386/n386add.pas
@@ -349,6 +349,8 @@ interface
         hl4 : tasmlabel;
 
     begin
+      pass_left_right;
+
       {The location.register will be filled in later (JM)}
       location_reset(location,LOC_REGISTER,OS_INT);
       {Get a temp register and load the left value into it
diff --git a/compiler/i386/n386mat.pas b/compiler/i386/n386mat.pas
index cba99c7e93..4349067762 100644
--- a/compiler/i386/n386mat.pas
+++ b/compiler/i386/n386mat.pas
@@ -33,9 +33,8 @@ interface
          procedure pass_2;override;
       end;
 
-      ti386shlshrnode = class(tshlshrnode)
-         procedure pass_2;override;
-         { everything will be handled in pass_2 }
+      ti386shlshrnode = class(tcgshlshrnode)
+         procedure second_64bit;override;
          function first_shlshr64bitint: tnode; override;
       end;
 
@@ -165,7 +164,7 @@ implementation
                         m_high:=m_high shr 1;
                         dec(l);
                       end;
-                    m:=m_high;
+                    m:=dword(m_high);
                     s:=l;
                     if (m_high shr 31)<>0 then
                       a:=1
@@ -223,7 +222,7 @@ implementation
                     d:=tordconstnode(right).value;
                     if d>=$80000000 then
                       begin
-                        emit_const_reg(A_CMP,S_L,d,hreg1);
+                        emit_const_reg(A_CMP,S_L,aint(d),hreg1);
                         location.register:=cg.getintregister(exprasmlist,OS_INT);
                         emit_const_reg(A_MOV,S_L,0,location.register);
                         emit_const_reg(A_SBB,S_L,-1,location.register);
@@ -359,147 +358,111 @@ implementation
 
 
     function ti386shlshrnode.first_shlshr64bitint: tnode;
+      begin
+        result := nil;
+      end;
 
-    begin
-      result := nil;
-    end;
-
-    procedure ti386shlshrnode.pass_2;
-
-    var hreg64hi,hreg64lo:Tregister;
-        op:Tasmop;
+    procedure ti386shlshrnode.second_64bit;
+      var
+        hreg64hi,hreg64lo:Tregister;
         v : TConstExprInt;
         l1,l2,l3:Tasmlabel;
+      begin
+        location_reset(location,LOC_REGISTER,OS_64);
 
-    begin
-      secondpass(left);
-      secondpass(right);
+        { load left operator in a register }
+        location_force_reg(exprasmlist,left.location,OS_64,false);
+        hreg64hi:=left.location.register64.reghi;
+        hreg64lo:=left.location.register64.reglo;
 
-      { determine operator }
-      if nodetype=shln then
-        op:=A_SHL
-      else
-        op:=A_SHR;
-
-      if is_64bitint(left.resulttype.def) then
-        begin
-          location_reset(location,LOC_REGISTER,OS_64);
-
-          { load left operator in a register }
-          location_force_reg(exprasmlist,left.location,OS_64,false);
-          hreg64hi:=left.location.register64.reghi;
-          hreg64lo:=left.location.register64.reglo;
-
-          { shifting by a constant directly coded: }
-          if (right.nodetype=ordconstn) then
-            begin
-              v:=Tordconstnode(right).value and 63;
-              if v>31 then
-                begin
-                  if nodetype=shln then
-                    begin
-                      emit_reg_reg(A_XOR,S_L,hreg64hi,hreg64hi);
-                      if ((v and 31) <> 0) then
-                        emit_const_reg(A_SHL,S_L,v and 31,hreg64lo);
-                    end
-                  else
-                    begin
-                      emit_reg_reg(A_XOR,S_L,hreg64lo,hreg64lo);
-                      if ((v and 31) <> 0) then
-                        emit_const_reg(A_SHR,S_L,v and 31,hreg64hi);
-                    end;
-                  location.register64.reghi:=hreg64lo;
-                  location.register64.reglo:=hreg64hi;
-                end
-              else
-                begin
-                  if nodetype=shln then
-                    begin
-                      emit_const_reg_reg(A_SHLD,S_L,v and 31,hreg64lo,hreg64hi);
+        { shifting by a constant directly coded: }
+        if (right.nodetype=ordconstn) then
+          begin
+            v:=Tordconstnode(right).value and 63;
+            if v>31 then
+              begin
+                if nodetype=shln then
+                  begin
+                    emit_reg_reg(A_XOR,S_L,hreg64hi,hreg64hi);
+                    if ((v and 31) <> 0) then
                       emit_const_reg(A_SHL,S_L,v and 31,hreg64lo);
-                    end
-                  else
-                    begin
-                      emit_const_reg_reg(A_SHRD,S_L,v and 31,hreg64hi,hreg64lo);
+                  end
+                else
+                  begin
+                    emit_reg_reg(A_XOR,S_L,hreg64lo,hreg64lo);
+                    if ((v and 31) <> 0) then
                       emit_const_reg(A_SHR,S_L,v and 31,hreg64hi);
-                    end;
-                  location.register64.reglo:=hreg64lo;
-                  location.register64.reghi:=hreg64hi;
-                end;
-            end
-          else
-            begin
-              { load right operators in a register }
-              cg.getcpuregister(exprasmlist,NR_ECX);
-              cg.a_load_loc_reg(exprasmlist,OS_32,right.location,NR_ECX);
+                  end;
+                location.register64.reghi:=hreg64lo;
+                location.register64.reglo:=hreg64hi;
+              end
+            else
+              begin
+                if nodetype=shln then
+                  begin
+                    emit_const_reg_reg(A_SHLD,S_L,v and 31,hreg64lo,hreg64hi);
+                    emit_const_reg(A_SHL,S_L,v and 31,hreg64lo);
+                  end
+                else
+                  begin
+                    emit_const_reg_reg(A_SHRD,S_L,v and 31,hreg64hi,hreg64lo);
+                    emit_const_reg(A_SHR,S_L,v and 31,hreg64hi);
+                  end;
+                location.register64.reglo:=hreg64lo;
+                location.register64.reghi:=hreg64hi;
+              end;
+          end
+        else
+          begin
+            { load right operators in a register }
+            cg.getcpuregister(exprasmlist,NR_ECX);
+            cg.a_load_loc_reg(exprasmlist,OS_32,right.location,NR_ECX);
 
-              { left operator is already in a register }
-              { hence are both in a register }
-              { is it in the case ECX ? }
+            { left operator is already in a register }
+            { hence are both in a register }
+            { is it in the case ECX ? }
 
-              { the damned shift instructions work only til a count of 32 }
-              { so we've to do some tricks here                           }
-              objectlibrary.getjumplabel(l1);
-              objectlibrary.getjumplabel(l2);
-              objectlibrary.getjumplabel(l3);
-              emit_const_reg(A_CMP,S_L,64,NR_ECX);
-              cg.a_jmp_flags(exprasmlist,F_L,l1);
-              emit_reg_reg(A_XOR,S_L,hreg64lo,hreg64lo);
-              emit_reg_reg(A_XOR,S_L,hreg64hi,hreg64hi);
-              cg.a_jmp_always(exprasmlist,l3);
-              cg.a_label(exprasmlist,l1);
-              emit_const_reg(A_CMP,S_L,32,NR_ECX);
-              cg.a_jmp_flags(exprasmlist,F_L,l2);
-              emit_const_reg(A_SUB,S_L,32,NR_ECX);
-              if nodetype=shln then
-                begin
-                  emit_reg_reg(A_SHL,S_L,NR_CL,hreg64lo);
-                  emit_reg_reg(A_MOV,S_L,hreg64lo,hreg64hi);
-                  emit_reg_reg(A_XOR,S_L,hreg64lo,hreg64lo);
-                  cg.a_jmp_always(exprasmlist,l3);
-                  cg.a_label(exprasmlist,l2);
-                  emit_reg_reg_reg(A_SHLD,S_L,NR_CL,hreg64lo,hreg64hi);
-                  emit_reg_reg(A_SHL,S_L,NR_CL,hreg64lo);
-                end
-              else
-                begin
-                  emit_reg_reg(A_SHR,S_L,NR_CL,hreg64hi);
-                  emit_reg_reg(A_MOV,S_L,hreg64hi,hreg64lo);
-                  emit_reg_reg(A_XOR,S_L,hreg64hi,hreg64hi);
-                  cg.a_jmp_always(exprasmlist,l3);
-                  cg.a_label(exprasmlist,l2);
-                  emit_reg_reg_reg(A_SHRD,S_L,NR_CL,hreg64hi,hreg64lo);
-                  emit_reg_reg(A_SHR,S_L,NR_CL,hreg64hi);
-                end;
-              cg.a_label(exprasmlist,l3);
+            { the damned shift instructions work only til a count of 32 }
+            { so we've to do some tricks here                           }
+            objectlibrary.getjumplabel(l1);
+            objectlibrary.getjumplabel(l2);
+            objectlibrary.getjumplabel(l3);
+            emit_const_reg(A_CMP,S_L,64,NR_ECX);
+            cg.a_jmp_flags(exprasmlist,F_L,l1);
+            emit_reg_reg(A_XOR,S_L,hreg64lo,hreg64lo);
+            emit_reg_reg(A_XOR,S_L,hreg64hi,hreg64hi);
+            cg.a_jmp_always(exprasmlist,l3);
+            cg.a_label(exprasmlist,l1);
+            emit_const_reg(A_CMP,S_L,32,NR_ECX);
+            cg.a_jmp_flags(exprasmlist,F_L,l2);
+            emit_const_reg(A_SUB,S_L,32,NR_ECX);
+            if nodetype=shln then
+              begin
+                emit_reg_reg(A_SHL,S_L,NR_CL,hreg64lo);
+                emit_reg_reg(A_MOV,S_L,hreg64lo,hreg64hi);
+                emit_reg_reg(A_XOR,S_L,hreg64lo,hreg64lo);
+                cg.a_jmp_always(exprasmlist,l3);
+                cg.a_label(exprasmlist,l2);
+                emit_reg_reg_reg(A_SHLD,S_L,NR_CL,hreg64lo,hreg64hi);
+                emit_reg_reg(A_SHL,S_L,NR_CL,hreg64lo);
+              end
+            else
+              begin
+                emit_reg_reg(A_SHR,S_L,NR_CL,hreg64hi);
+                emit_reg_reg(A_MOV,S_L,hreg64hi,hreg64lo);
+                emit_reg_reg(A_XOR,S_L,hreg64hi,hreg64hi);
+                cg.a_jmp_always(exprasmlist,l3);
+                cg.a_label(exprasmlist,l2);
+                emit_reg_reg_reg(A_SHRD,S_L,NR_CL,hreg64hi,hreg64lo);
+                emit_reg_reg(A_SHR,S_L,NR_CL,hreg64hi);
+              end;
+            cg.a_label(exprasmlist,l3);
 
-              cg.ungetcpuregister(exprasmlist,NR_ECX);
-              location.register64.reglo:=hreg64lo;
-              location.register64.reghi:=hreg64hi;
-            end;
-        end
-      else
-        begin
-          { load left operators in a register }
-          location_copy(location,left.location);
-          location_force_reg(exprasmlist,location,OS_INT,false);
-
-          { shifting by a constant directly coded: }
-          if (right.nodetype=ordconstn) then
-            { l shl 32 should 0 imho, but neither TP nor Delphi do it in this way (FK)}
-            emit_const_reg(op,S_L,tordconstnode(right).value and 31,location.register)
-          else
-            begin
-              { load right operators in a ECX }
-              cg.getcpuregister(exprasmlist,NR_ECX);
-              cg.a_load_loc_reg(exprasmlist,OS_32,right.location,NR_ECX);
-
-              { right operand is in ECX }
-              cg.ungetcpuregister(exprasmlist,NR_ECX);
-              emit_reg_reg(op,S_L,NR_CL,location.register);
-            end;
-        end;
-    end;
+            cg.ungetcpuregister(exprasmlist,NR_ECX);
+            location.register64.reglo:=hreg64lo;
+            location.register64.reghi:=hreg64hi;
+          end;
+      end;
 
 
 begin
diff --git a/compiler/m68k/cgcpu.pas b/compiler/m68k/cgcpu.pas
index 69f0248960..dc12e36800 100644
--- a/compiler/m68k/cgcpu.pas
+++ b/compiler/m68k/cgcpu.pas
@@ -404,11 +404,13 @@ unit cgcpu;
        opcode : tasmop;
        r,r2 : Tregister;
       begin
-        { need to emit opcode? }
-        if optimize_op_const_reg(list, op, a, reg) then
-           exit;
+        optimize_op_const_reg(list, op, a, reg);
         opcode := topcg2tasmop[op];
         case op of
+          OP_NONE :
+              begin
+                { Opcode is optimized away }
+              end;
           OP_ADD :
               begin
                 if (a >= 1) and (a <= 8) then
diff --git a/compiler/ncgadd.pas b/compiler/ncgadd.pas
index f85f03cbd2..4a3f9f3b99 100644
--- a/compiler/ncgadd.pas
+++ b/compiler/ncgadd.pas
@@ -459,8 +459,7 @@ interface
         ovloc.loc:=LOC_VOID;
 
         pass_left_right;
-        force_reg_left_right(false,(cs_check_overflow in aktlocalswitches) and
-                                   (nodetype in [addn,subn]));
+        force_reg_left_right(false,true);
         set_result_location_reg;
 
         { assume no overflow checking is required }
@@ -634,8 +633,7 @@ interface
         ovloc.loc:=LOC_VOID;
 
         pass_left_right;
-        force_reg_left_right(false,(cs_check_overflow in aktlocalswitches) and
-                                   (nodetype in [addn,subn,muln]));
+        force_reg_left_right(false,true);
         set_result_location_reg;
 
         { determine if the comparison will be unsigned }
@@ -680,7 +678,7 @@ interface
 
        if nodetype<>subn then
         begin
-          if (right.location.loc >LOC_CONSTANT) then
+          if (right.location.loc<>LOC_CONSTANT) then
             cg.a_op_reg_reg_reg_checkoverflow(exprasmlist,cgop,location.size,
                left.location.register,right.location.register,
                location.register,checkoverflow and (cs_check_overflow in aktlocalswitches),ovloc)
@@ -708,7 +706,7 @@ interface
             begin
               tmpreg:=cg.getintregister(exprasmlist,location.size);
               cg.a_load_const_reg(exprasmlist,location.size,
-                aword(left.location.value),tmpreg);
+                left.location.value,tmpreg);
               cg.a_op_reg_reg_reg_checkoverflow(exprasmlist,OP_SUB,location.size,
                 right.location.register,tmpreg,location.register,checkoverflow and (cs_check_overflow in aktlocalswitches),ovloc);
             end;
diff --git a/compiler/x86/cgx86.pas b/compiler/x86/cgx86.pas
index 10448e4088..619c1e67cb 100644
--- a/compiler/x86/cgx86.pas
+++ b/compiler/x86/cgx86.pas
@@ -63,11 +63,6 @@ unit cgx86;
         procedure a_op_ref_reg(list : taasmoutput; Op: TOpCG; size: TCGSize; const ref: TReference; reg: TRegister); override;
         procedure a_op_reg_ref(list : taasmoutput; Op: TOpCG; size: TCGSize;reg: TRegister; const ref: TReference); override;
 
-        procedure a_op_const_reg_reg(list: taasmoutput; op: TOpCg;
-          size: tcgsize; a: aint; src, dst: tregister); override;
-        procedure a_op_reg_reg_reg(list: taasmoutput; op: TOpCg;
-          size: tcgsize; src1, src2, dst: tregister); override;
-
         { move instructions }
         procedure a_load_const_reg(list : taasmoutput; tosize: tcgsize; a : aint;reg : tregister);override;
         procedure a_load_const_ref(list : taasmoutput; tosize: tcgsize; a : aint;const ref : treference);override;
@@ -160,8 +155,8 @@ unit cgx86;
        fmodule;
 
     const
-      TOpCG2AsmOp: Array[topcg] of TAsmOp = (A_NONE,A_ADD,A_AND,A_DIV,
-                            A_IDIV,A_MUL, A_IMUL, A_NEG,A_NOT,A_OR,
+      TOpCG2AsmOp: Array[topcg] of TAsmOp = (A_NONE,A_MOV,A_ADD,A_AND,A_DIV,
+                            A_IDIV,A_IMUL,A_MUL,A_NEG,A_NOT,A_OR,
                             A_SAR,A_SHL,A_SHR,A_SUB,A_XOR);
 
       TOpCmp2AsmCond: Array[topcmp] of TAsmCond = (C_NONE,
@@ -572,7 +567,7 @@ unit cgx86;
         sym : tasmsymbol;
         r : treference;
       begin
- 
+
         if (target_info.system <> system_i386_darwin) then
           begin
             sym:=objectlibrary.newasmsymbol(s,AB_EXTERNAL,AT_FUNCTION);
@@ -984,10 +979,10 @@ unit cgx86;
         opmm2asmop : array[0..1,OS_F32..OS_F64,topcg] of tasmop = (
           ( { scalar }
             ( { OS_F32 }
-              A_NOP,A_ADDSS,A_NOP,A_DIVSS,A_NOP,A_NOP,A_MULSS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSS,A_NOP
+              A_NOP,A_NOP,A_ADDSS,A_NOP,A_DIVSS,A_NOP,A_NOP,A_MULSS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSS,A_NOP
             ),
             ( { OS_F64 }
-              A_NOP,A_ADDSD,A_NOP,A_DIVSD,A_NOP,A_NOP,A_MULSD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSD,A_NOP
+              A_NOP,A_NOP,A_ADDSD,A_NOP,A_DIVSD,A_NOP,A_NOP,A_MULSD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSD,A_NOP
             )
           ),
           ( { vectorized/packed }
@@ -995,10 +990,10 @@ unit cgx86;
               these
             }
             ( { OS_F32 }
-              A_NOP,A_ADDPS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_XORPS
+              A_NOP,A_NOP,A_ADDPS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_XORPS
             ),
             ( { OS_F64 }
-              A_NOP,A_ADDPD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_XORPD
+              A_NOP,A_NOP,A_ADDPD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_XORPD
             )
           )
         );
@@ -1062,9 +1057,11 @@ unit cgx86;
         tmpreg : tregister;
 {$endif x86_64}
       begin
+        optimize_op_const(op, a);
 {$ifdef x86_64}
         { x86_64 only supports signed 32 bits constants directly }
-        if (size in [OS_S64,OS_64]) and
+        if not(op in [OP_NONE,OP_MOVE) and
+           (size in [OS_S64,OS_64]) and
             ((a<low(longint)) or (a>high(longint))) then
           begin
             tmpreg:=getintregister(list,size);
@@ -1075,6 +1072,15 @@ unit cgx86;
 {$endif x86_64}
         check_register_size(size,reg);
         case op of
+          OP_NONE :
+            begin
+              { Opcode is optimized away }
+            end;
+          OP_MOVE :
+            begin
+              { Optimized, replaced with a simple load }
+              a_load_const_reg(list,size,a,reg);
+            end;
           OP_DIV, OP_IDIV:
             begin
               if ispowerof2(int64(a),power) then
@@ -1155,11 +1161,13 @@ unit cgx86;
 {$endif x86_64}
         tmpref  : treference;
       begin
+        optimize_op_const(op, a);
         tmpref:=ref;
         make_simple_ref(list,tmpref);
 {$ifdef x86_64}
         { x86_64 only supports signed 32 bits constants directly }
-        if (size in [OS_S64,OS_64]) and
+        if not(op in [OP_NONE,OP_MOVE) and
+           (size in [OS_S64,OS_64]) and
             ((a<low(longint)) or (a>high(longint))) then
           begin
             tmpreg:=getintregister(list,size);
@@ -1169,6 +1177,15 @@ unit cgx86;
           end;
 {$endif x86_64}
         Case Op of
+          OP_NONE :
+            begin
+              { Opcode is optimized away }
+            end;
+          OP_MOVE :
+            begin
+              { Optimized, replaced with a simple load }
+              a_load_const_ref(list,size,a,ref);
+            end;
           OP_DIV, OP_IDIV:
             Begin
               if ispowerof2(int64(a),power) then
@@ -1266,10 +1283,11 @@ unit cgx86;
             internalerror(200109233);
           OP_SHR,OP_SHL,OP_SAR:
             begin
-              getcpuregister(list,NR_CL);
-              a_load_reg_reg(list,OS_8,OS_8,makeregsize(list,src,OS_8),NR_CL);
-              list.concat(taicpu.op_reg_reg(Topcg2asmop[op],tcgsize2opsize[size],NR_CL,src));
-              ungetcpuregister(list,NR_CL);
+              { Use ecx to load the value, that allows beter coalescing }
+              getcpuregister(list,NR_ECX);
+              a_load_reg_reg(list,size,OS_32,src,NR_ECX);
+              list.concat(taicpu.op_reg_reg(Topcg2asmop[op],tcgsize2opsize[size],NR_CL,dst));
+              ungetcpuregister(list,NR_ECX);
             end;
           else
             begin
@@ -1338,98 +1356,6 @@ unit cgx86;
       end;
 
 
-    procedure tcgx86.a_op_const_reg_reg(list: taasmoutput; op: TOpCg; size: tcgsize; a: aint; src, dst: tregister);
-      var
-        tmpref: treference;
-        power: longint;
-{$ifdef x86_64}
-        tmpreg : tregister;
-{$endif x86_64}
-      begin
-{$ifdef x86_64}
-        { x86_64 only supports signed 32 bits constants directly }
-        if (size in [OS_S64,OS_64]) and
-            ((a<low(longint)) or (a>high(longint))) then
-          begin
-            tmpreg:=getintregister(list,size);
-            a_load_const_reg(list,size,a,tmpreg);
-            a_op_reg_reg_reg(list,op,size,tmpreg,src,dst);
-            exit;
-          end;
-{$endif x86_64}
-        check_register_size(size,src);
-        check_register_size(size,dst);
-        if tcgsize2size[size]<>tcgsize2size[OS_INT] then
-          begin
-            inherited a_op_const_reg_reg(list,op,size,a,src,dst);
-            exit;
-          end;
-        { if we get here, we have to do a 32 bit calculation, guaranteed }
-        case op of
-          OP_DIV, OP_IDIV, OP_MUL, OP_AND, OP_OR, OP_XOR, OP_SHL, OP_SHR,
-          OP_SAR:
-            { can't do anything special for these }
-            inherited a_op_const_reg_reg(list,op,size,a,src,dst);
-          OP_IMUL:
-            begin
-              if not(cs_check_overflow in aktlocalswitches) and
-                 ispowerof2(int64(a),power) then
-                { can be done with a shift }
-                begin
-                  inherited a_op_const_reg_reg(list,op,size,a,src,dst);
-                  exit;
-                end;
-              list.concat(taicpu.op_const_reg_reg(A_IMUL,tcgsize2opsize[size],a,src,dst));
-            end;
-          OP_ADD, OP_SUB:
-            if (a = 0) then
-              a_load_reg_reg(list,size,size,src,dst)
-            else
-              begin
-                reference_reset(tmpref);
-                tmpref.base := src;
-                tmpref.offset := longint(a);
-                if op = OP_SUB then
-                  tmpref.offset := -tmpref.offset;
-                list.concat(taicpu.op_ref_reg(A_LEA,tcgsize2opsize[size],tmpref,dst));
-              end
-          else internalerror(200112302);
-        end;
-      end;
-
-
-    procedure tcgx86.a_op_reg_reg_reg(list: taasmoutput; op: TOpCg;size: tcgsize; src1, src2, dst: tregister);
-      var
-        tmpref: treference;
-      begin
-        check_register_size(size,src1);
-        check_register_size(size,src2);
-        check_register_size(size,dst);
-        if tcgsize2size[size]<>tcgsize2size[OS_INT] then
-          begin
-            inherited a_op_reg_reg_reg(list,op,size,src1,src2,dst);
-            exit;
-          end;
-        { if we get here, we have to do a 32 bit calculation, guaranteed }
-        Case Op of
-          OP_DIV, OP_IDIV, OP_MUL, OP_AND, OP_OR, OP_XOR, OP_SHL, OP_SHR,
-          OP_SAR,OP_SUB,OP_NOT,OP_NEG:
-            { can't do anything special for these }
-            inherited a_op_reg_reg_reg(list,op,size,src1,src2,dst);
-          OP_IMUL:
-            list.concat(taicpu.op_reg_reg_reg(A_IMUL,tcgsize2opsize[size],src1,src2,dst));
-          OP_ADD:
-            begin
-              reference_reset(tmpref);
-              tmpref.base := src1;
-              tmpref.index := src2;
-              tmpref.scalefactor := 1;
-              list.concat(taicpu.op_ref_reg(A_LEA,tcgsize2opsize[size],tmpref,dst));
-            end
-          else internalerror(200112303);
-        end;
-      end;
-
 {*************** compare instructructions ****************}
 
     procedure tcgx86.a_cmp_const_reg_label(list : taasmoutput;size : tcgsize;cmp_op : topcmp;a : aint;reg : tregister;
diff --git a/compiler/x86/nx86add.pas b/compiler/x86/nx86add.pas
index 66f8fdf78c..3a05998d31 100644
--- a/compiler/x86/nx86add.pas
+++ b/compiler/x86/nx86add.pas
@@ -978,65 +978,17 @@ unit nx86add;
 *****************************************************************************}
 
     procedure tx86addnode.second_addordinal;
-      var
-         mboverflow : boolean;
-         op : tasmop;
-         opsize : tcgsize;
-         { true, if unsigned types are compared }
-         unsigned : boolean;
-         { true, if for sets subtractions the extra not should generated }
-         extra_not : boolean;
       begin
-         { defaults }
-         extra_not:=false;
-         mboverflow:=false;
-         unsigned:=not(is_signed(left.resulttype.def)) or
-                   not(is_signed(right.resulttype.def));
-         opsize:=def_cgsize(left.resulttype.def);
-
-         pass_left_right;
-
-         case nodetype of
-           addn :
-             begin
-               op:=A_ADD;
-               mboverflow:=true;
-             end;
-           muln :
-             begin
-               if unsigned then
-                 op:=A_MUL
-               else
-                 op:=A_IMUL;
-               mboverflow:=true;
-             end;
-           subn :
-             begin
-               op:=A_SUB;
-               mboverflow:=true;
-             end;
-           xorn :
-             op:=A_XOR;
-           orn :
-             op:=A_OR;
-           andn :
-             op:=A_AND;
-           else
-             internalerror(200304229);
-         end;
-
-         { filter MUL, which requires special handling }
-         if op=A_MUL then
+         { filter unsigned MUL opcode, which requires special handling }
+         if (nodetype=muln) and
+            (not(is_signed(left.resulttype.def)) or
+             not(is_signed(right.resulttype.def))) then
            begin
              second_mul;
              exit;
            end;
 
-         left_must_be_reg(opsize,false);
-         emit_generic_code(op,opsize,unsigned,extra_not,mboverflow);
-         location_freetemp(exprasmlist,right.location);
-
-         set_result_location_reg;
+         inherited second_addordinal;
       end;