* remove some obsolete i386 specific code

* use a_optimize_const to remove useless opcodes git-svn-id: trunk@2910 -
2025-08-17 17:09:09 +02:00 · 2006-03-13 16:37:08 +00:00 · 2006-03-13 16:37:08 +00:00 · 588cccb3ca
commit 588cccb3ca
parent 2b9ccf6579
8 changed files with 195 additions and 339 deletions
--- a/compiler/cgbase.pas
+++ b/compiler/cgbase.pas
@ -81,6 +81,7 @@ interface
       topcg =
       (
          OP_NONE,
+          OP_MOVE,      { replaced operation with direct load }
          OP_ADD,       { simple addition          }
          OP_AND,       { simple logical and       }
          OP_DIV,       { simple unsigned division }
@ -593,7 +594,7 @@ implementation
    function commutativeop(op: topcg): boolean;{$ifdef USEINLINE}inline;{$endif}
      const
        list: array[topcg] of boolean =
-          (true,true,true,false,false,true,true,false,false,
+          (true,false,true,true,false,false,true,true,false,false,
           true,false,false,false,false,true);
      begin
        commutativeop := list[op];
--- a/compiler/cgobj.pas
+++ b/compiler/cgobj.pas
@ -285,18 +285,16 @@ unit cgobj;
          procedure g_flags2ref(list: taasmoutput; size: TCgSize; const f: tresflags; const ref:TReference); virtual;

          {
-             This routine tries to optimize the const_reg opcode, and should be
-             called at the start of a_op_const_reg. It returns the actual opcode
-             to emit, and the constant value to emit. If this routine returns
-             TRUE, @var(no) instruction should be emitted (.eg : imul reg by 1 )
+             This routine tries to optimize the op_const_reg/ref opcode, and should be
+             called at the start of a_op_const_reg/ref. It returns the actual opcode
+             to emit, and the constant value to emit. This function can opcode OP_NONE to
+             remove the opcode and OP_MOVE to replace it with a simple load

             @param(op The opcode to emit, returns the opcode which must be emitted)
             @param(a  The constant which should be emitted, returns the constant which must
                    be emitted)
-             @param(reg The register to emit the opcode with, returns the register with
-                   which the opcode will be emitted)
          }
-          function optimize_op_const_reg(list: taasmoutput; var op: topcg; var a : aint; var reg: tregister): boolean;virtual;
+          procedure optimize_op_const(var op: topcg; var a : aint);virtual;

         {#
             This routine is used in exception management nodes. It should
@ -936,55 +934,69 @@ implementation
      end;


-    function tcg.optimize_op_const_reg(list: taasmoutput; var op: topcg; var a : aint; var reg:tregister): boolean;
+    procedure tcg.optimize_op_const(var op: topcg; var a : aint);
      var
        powerval : longint;
      begin
-        optimize_op_const_reg := false;
        case op of
-          { or with zero returns same result }
-          OP_OR : if a = 0 then optimize_op_const_reg := true;
-          { and with max returns same result }
-          OP_AND : if (a = high(a)) then optimize_op_const_reg := true;
-          { division by 1 returns result }
+          OP_OR :
+            begin
+              { or with zero returns same result }
+              if a = 0 then
+                op:=OP_NONE
+              else
+              { or with max returns max }
+                if a = -1 then
+                  op:=OP_MOVE;
+            end;
+          OP_AND :
+            begin
+              { and with max returns same result }
+              if (a = -1) then
+                op:=OP_NONE
+              else
+              { and with 0 returns 0 }
+                if a=0 then
+                  op:=OP_MOVE;
+            end;
          OP_DIV :
            begin
+              { division by 1 returns result }
              if a = 1 then
-                optimize_op_const_reg := true
+                op:=OP_NONE
              else if ispowerof2(int64(a), powerval) then
                begin
                  a := powerval;
                  op:= OP_SHR;
                end;
-              exit;
            end;
          OP_IDIV:
            begin
              if a = 1 then
-                optimize_op_const_reg := true
-              else if ispowerof2(int64(a), powerval) then
-                begin
-                  a := powerval;
-                  op:= OP_SAR;
-                end;
-               exit;
+                op:=OP_NONE;
            end;
-        OP_MUL,OP_IMUL:
+         OP_MUL,OP_IMUL:
            begin
               if a = 1 then
-                  optimize_op_const_reg := true
+                 op:=OP_NONE
+               else
+                 if a=0 then
+                   op:=OP_MOVE
               else if ispowerof2(int64(a), powerval) then
                 begin
                   a := powerval;
                   op:= OP_SHL;
                 end;
-               exit;
+            end;
+        OP_ADD,OP_SUB:
+            begin
+               if a = 0 then
+                 op:=OP_NONE;
            end;
        OP_SAR,OP_SHL,OP_SHR:
           begin
              if a = 0 then
-                 optimize_op_const_reg := true;
-              exit;
+                op:=OP_NONE;
           end;
        end;
      end;
@ -1729,7 +1741,7 @@ implementation
 {$endif}
                if to_signed then
                  begin
-                    { calculation of the low/high ranges must not overflow 64 bit 
+                    { calculation of the low/high ranges must not overflow 64 bit
                     otherwise we end up comparing with zero for 64 bit data types on
                     64 bit processors }
                    if (lto = (int64(-1) << (tosize * 8 - 1))) and
@ -1738,7 +1750,7 @@ implementation
                  end
                else
                  begin
-                    { calculation of the low/high ranges must not overflow 64 bit 
+                    { calculation of the low/high ranges must not overflow 64 bit
                     otherwise we end up having all zeros for 64 bit data types on
                     64 bit processors }
                    if (lto = 0) and
--- a/compiler/i386/n386add.pas
+++ b/compiler/i386/n386add.pas
@ -349,6 +349,8 @@ interface
        hl4 : tasmlabel;

    begin
+      pass_left_right;
+
      {The location.register will be filled in later (JM)}
      location_reset(location,LOC_REGISTER,OS_INT);
      {Get a temp register and load the left value into it
--- a/compiler/i386/n386mat.pas
+++ b/compiler/i386/n386mat.pas
@ -33,9 +33,8 @@ interface
         procedure pass_2;override;
      end;

-      ti386shlshrnode = class(tshlshrnode)
-         procedure pass_2;override;
-         { everything will be handled in pass_2 }
+      ti386shlshrnode = class(tcgshlshrnode)
+         procedure second_64bit;override;
         function first_shlshr64bitint: tnode; override;
      end;

@ -165,7 +164,7 @@ implementation
                        m_high:=m_high shr 1;
                        dec(l);
                      end;
-                    m:=m_high;
+                    m:=dword(m_high);
                    s:=l;
                    if (m_high shr 31)<>0 then
                      a:=1
@ -223,7 +222,7 @@ implementation
                    d:=tordconstnode(right).value;
                    if d>=$80000000 then
                      begin
-                        emit_const_reg(A_CMP,S_L,d,hreg1);
+                        emit_const_reg(A_CMP,S_L,aint(d),hreg1);
                        location.register:=cg.getintregister(exprasmlist,OS_INT);
                        emit_const_reg(A_MOV,S_L,0,location.register);
                        emit_const_reg(A_SBB,S_L,-1,location.register);
@ -359,147 +358,111 @@ implementation


    function ti386shlshrnode.first_shlshr64bitint: tnode;
+      begin
+        result := nil;
+      end;

-    begin
-      result := nil;
-    end;
-
-    procedure ti386shlshrnode.pass_2;
-
-    var hreg64hi,hreg64lo:Tregister;
-        op:Tasmop;
+    procedure ti386shlshrnode.second_64bit;
+      var
+        hreg64hi,hreg64lo:Tregister;
        v : TConstExprInt;
        l1,l2,l3:Tasmlabel;
+      begin
+        location_reset(location,LOC_REGISTER,OS_64);

-    begin
-      secondpass(left);
-      secondpass(right);
+        { load left operator in a register }
+        location_force_reg(exprasmlist,left.location,OS_64,false);
+        hreg64hi:=left.location.register64.reghi;
+        hreg64lo:=left.location.register64.reglo;

-      { determine operator }
-      if nodetype=shln then
-        op:=A_SHL
-      else
-        op:=A_SHR;
-
-      if is_64bitint(left.resulttype.def) then
-        begin
-          location_reset(location,LOC_REGISTER,OS_64);
-
-          { load left operator in a register }
-          location_force_reg(exprasmlist,left.location,OS_64,false);
-          hreg64hi:=left.location.register64.reghi;
-          hreg64lo:=left.location.register64.reglo;
-
-          { shifting by a constant directly coded: }
-          if (right.nodetype=ordconstn) then
-            begin
-              v:=Tordconstnode(right).value and 63;
-              if v>31 then
-                begin
-                  if nodetype=shln then
-                    begin
-                      emit_reg_reg(A_XOR,S_L,hreg64hi,hreg64hi);
-                      if ((v and 31) <> 0) then
-                        emit_const_reg(A_SHL,S_L,v and 31,hreg64lo);
-                    end
-                  else
-                    begin
-                      emit_reg_reg(A_XOR,S_L,hreg64lo,hreg64lo);
-                      if ((v and 31) <> 0) then
-                        emit_const_reg(A_SHR,S_L,v and 31,hreg64hi);
-                    end;
-                  location.register64.reghi:=hreg64lo;
-                  location.register64.reglo:=hreg64hi;
-                end
-              else
-                begin
-                  if nodetype=shln then
-                    begin
-                      emit_const_reg_reg(A_SHLD,S_L,v and 31,hreg64lo,hreg64hi);
+        { shifting by a constant directly coded: }
+        if (right.nodetype=ordconstn) then
+          begin
+            v:=Tordconstnode(right).value and 63;
+            if v>31 then
+              begin
+                if nodetype=shln then
+                  begin
+                    emit_reg_reg(A_XOR,S_L,hreg64hi,hreg64hi);
+                    if ((v and 31) <> 0) then
                      emit_const_reg(A_SHL,S_L,v and 31,hreg64lo);
-                    end
-                  else
-                    begin
-                      emit_const_reg_reg(A_SHRD,S_L,v and 31,hreg64hi,hreg64lo);
+                  end
+                else
+                  begin
+                    emit_reg_reg(A_XOR,S_L,hreg64lo,hreg64lo);
+                    if ((v and 31) <> 0) then
                      emit_const_reg(A_SHR,S_L,v and 31,hreg64hi);
-                    end;
-                  location.register64.reglo:=hreg64lo;
-                  location.register64.reghi:=hreg64hi;
-                end;
-            end
-          else
-            begin
-              { load right operators in a register }
-              cg.getcpuregister(exprasmlist,NR_ECX);
-              cg.a_load_loc_reg(exprasmlist,OS_32,right.location,NR_ECX);
+                  end;
+                location.register64.reghi:=hreg64lo;
+                location.register64.reglo:=hreg64hi;
+              end
+            else
+              begin
+                if nodetype=shln then
+                  begin
+                    emit_const_reg_reg(A_SHLD,S_L,v and 31,hreg64lo,hreg64hi);
+                    emit_const_reg(A_SHL,S_L,v and 31,hreg64lo);
+                  end
+                else
+                  begin
+                    emit_const_reg_reg(A_SHRD,S_L,v and 31,hreg64hi,hreg64lo);
+                    emit_const_reg(A_SHR,S_L,v and 31,hreg64hi);
+                  end;
+                location.register64.reglo:=hreg64lo;
+                location.register64.reghi:=hreg64hi;
+              end;
+          end
+        else
+          begin
+            { load right operators in a register }
+            cg.getcpuregister(exprasmlist,NR_ECX);
+            cg.a_load_loc_reg(exprasmlist,OS_32,right.location,NR_ECX);

-              { left operator is already in a register }
-              { hence are both in a register }
-              { is it in the case ECX ? }
+            { left operator is already in a register }
+            { hence are both in a register }
+            { is it in the case ECX ? }

-              { the damned shift instructions work only til a count of 32 }
-              { so we've to do some tricks here                           }
-              objectlibrary.getjumplabel(l1);
-              objectlibrary.getjumplabel(l2);
-              objectlibrary.getjumplabel(l3);
-              emit_const_reg(A_CMP,S_L,64,NR_ECX);
-              cg.a_jmp_flags(exprasmlist,F_L,l1);
-              emit_reg_reg(A_XOR,S_L,hreg64lo,hreg64lo);
-              emit_reg_reg(A_XOR,S_L,hreg64hi,hreg64hi);
-              cg.a_jmp_always(exprasmlist,l3);
-              cg.a_label(exprasmlist,l1);
-              emit_const_reg(A_CMP,S_L,32,NR_ECX);
-              cg.a_jmp_flags(exprasmlist,F_L,l2);
-              emit_const_reg(A_SUB,S_L,32,NR_ECX);
-              if nodetype=shln then
-                begin
-                  emit_reg_reg(A_SHL,S_L,NR_CL,hreg64lo);
-                  emit_reg_reg(A_MOV,S_L,hreg64lo,hreg64hi);
-                  emit_reg_reg(A_XOR,S_L,hreg64lo,hreg64lo);
-                  cg.a_jmp_always(exprasmlist,l3);
-                  cg.a_label(exprasmlist,l2);
-                  emit_reg_reg_reg(A_SHLD,S_L,NR_CL,hreg64lo,hreg64hi);
-                  emit_reg_reg(A_SHL,S_L,NR_CL,hreg64lo);
-                end
-              else
-                begin
-                  emit_reg_reg(A_SHR,S_L,NR_CL,hreg64hi);
-                  emit_reg_reg(A_MOV,S_L,hreg64hi,hreg64lo);
-                  emit_reg_reg(A_XOR,S_L,hreg64hi,hreg64hi);
-                  cg.a_jmp_always(exprasmlist,l3);
-                  cg.a_label(exprasmlist,l2);
-                  emit_reg_reg_reg(A_SHRD,S_L,NR_CL,hreg64hi,hreg64lo);
-                  emit_reg_reg(A_SHR,S_L,NR_CL,hreg64hi);
-                end;
-              cg.a_label(exprasmlist,l3);
+            { the damned shift instructions work only til a count of 32 }
+            { so we've to do some tricks here                           }
+            objectlibrary.getjumplabel(l1);
+            objectlibrary.getjumplabel(l2);
+            objectlibrary.getjumplabel(l3);
+            emit_const_reg(A_CMP,S_L,64,NR_ECX);
+            cg.a_jmp_flags(exprasmlist,F_L,l1);
+            emit_reg_reg(A_XOR,S_L,hreg64lo,hreg64lo);
+            emit_reg_reg(A_XOR,S_L,hreg64hi,hreg64hi);
+            cg.a_jmp_always(exprasmlist,l3);
+            cg.a_label(exprasmlist,l1);
+            emit_const_reg(A_CMP,S_L,32,NR_ECX);
+            cg.a_jmp_flags(exprasmlist,F_L,l2);
+            emit_const_reg(A_SUB,S_L,32,NR_ECX);
+            if nodetype=shln then
+              begin
+                emit_reg_reg(A_SHL,S_L,NR_CL,hreg64lo);
+                emit_reg_reg(A_MOV,S_L,hreg64lo,hreg64hi);
+                emit_reg_reg(A_XOR,S_L,hreg64lo,hreg64lo);
+                cg.a_jmp_always(exprasmlist,l3);
+                cg.a_label(exprasmlist,l2);
+                emit_reg_reg_reg(A_SHLD,S_L,NR_CL,hreg64lo,hreg64hi);
+                emit_reg_reg(A_SHL,S_L,NR_CL,hreg64lo);
+              end
+            else
+              begin
+                emit_reg_reg(A_SHR,S_L,NR_CL,hreg64hi);
+                emit_reg_reg(A_MOV,S_L,hreg64hi,hreg64lo);
+                emit_reg_reg(A_XOR,S_L,hreg64hi,hreg64hi);
+                cg.a_jmp_always(exprasmlist,l3);
+                cg.a_label(exprasmlist,l2);
+                emit_reg_reg_reg(A_SHRD,S_L,NR_CL,hreg64hi,hreg64lo);
+                emit_reg_reg(A_SHR,S_L,NR_CL,hreg64hi);
+              end;
+            cg.a_label(exprasmlist,l3);

-              cg.ungetcpuregister(exprasmlist,NR_ECX);
-              location.register64.reglo:=hreg64lo;
-              location.register64.reghi:=hreg64hi;
-            end;
-        end
-      else
-        begin
-          { load left operators in a register }
-          location_copy(location,left.location);
-          location_force_reg(exprasmlist,location,OS_INT,false);
-
-          { shifting by a constant directly coded: }
-          if (right.nodetype=ordconstn) then
-            { l shl 32 should 0 imho, but neither TP nor Delphi do it in this way (FK)}
-            emit_const_reg(op,S_L,tordconstnode(right).value and 31,location.register)
-          else
-            begin
-              { load right operators in a ECX }
-              cg.getcpuregister(exprasmlist,NR_ECX);
-              cg.a_load_loc_reg(exprasmlist,OS_32,right.location,NR_ECX);
-
-              { right operand is in ECX }
-              cg.ungetcpuregister(exprasmlist,NR_ECX);
-              emit_reg_reg(op,S_L,NR_CL,location.register);
-            end;
-        end;
-    end;
+            cg.ungetcpuregister(exprasmlist,NR_ECX);
+            location.register64.reglo:=hreg64lo;
+            location.register64.reghi:=hreg64hi;
+          end;
+      end;


 begin
--- a/compiler/m68k/cgcpu.pas
+++ b/compiler/m68k/cgcpu.pas
@ -404,11 +404,13 @@ unit cgcpu;
       opcode : tasmop;
       r,r2 : Tregister;
      begin
-        { need to emit opcode? }
-        if optimize_op_const_reg(list, op, a, reg) then
-           exit;
+        optimize_op_const_reg(list, op, a, reg);
        opcode := topcg2tasmop[op];
        case op of
+          OP_NONE :
+              begin
+                { Opcode is optimized away }
+              end;
          OP_ADD :
              begin
                if (a >= 1) and (a <= 8) then
--- a/compiler/ncgadd.pas
+++ b/compiler/ncgadd.pas
@ -459,8 +459,7 @@ interface
        ovloc.loc:=LOC_VOID;

        pass_left_right;
-        force_reg_left_right(false,(cs_check_overflow in aktlocalswitches) and
-                                   (nodetype in [addn,subn]));
+        force_reg_left_right(false,true);
        set_result_location_reg;

        { assume no overflow checking is required }
@ -634,8 +633,7 @@ interface
        ovloc.loc:=LOC_VOID;

        pass_left_right;
-        force_reg_left_right(false,(cs_check_overflow in aktlocalswitches) and
-                                   (nodetype in [addn,subn,muln]));
+        force_reg_left_right(false,true);
        set_result_location_reg;

        { determine if the comparison will be unsigned }
@ -680,7 +678,7 @@ interface

       if nodetype<>subn then
        begin
-          if (right.location.loc >LOC_CONSTANT) then
+          if (right.location.loc<>LOC_CONSTANT) then
            cg.a_op_reg_reg_reg_checkoverflow(exprasmlist,cgop,location.size,
               left.location.register,right.location.register,
               location.register,checkoverflow and (cs_check_overflow in aktlocalswitches),ovloc)
@ -708,7 +706,7 @@ interface
            begin
              tmpreg:=cg.getintregister(exprasmlist,location.size);
              cg.a_load_const_reg(exprasmlist,location.size,
-                aword(left.location.value),tmpreg);
+                left.location.value,tmpreg);
              cg.a_op_reg_reg_reg_checkoverflow(exprasmlist,OP_SUB,location.size,
                right.location.register,tmpreg,location.register,checkoverflow and (cs_check_overflow in aktlocalswitches),ovloc);
            end;
--- a/compiler/x86/cgx86.pas
+++ b/compiler/x86/cgx86.pas
@ -63,11 +63,6 @@ unit cgx86;
        procedure a_op_ref_reg(list : taasmoutput; Op: TOpCG; size: TCGSize; const ref: TReference; reg: TRegister); override;
        procedure a_op_reg_ref(list : taasmoutput; Op: TOpCG; size: TCGSize;reg: TRegister; const ref: TReference); override;

-        procedure a_op_const_reg_reg(list: taasmoutput; op: TOpCg;
-          size: tcgsize; a: aint; src, dst: tregister); override;
-        procedure a_op_reg_reg_reg(list: taasmoutput; op: TOpCg;
-          size: tcgsize; src1, src2, dst: tregister); override;
-
        { move instructions }
        procedure a_load_const_reg(list : taasmoutput; tosize: tcgsize; a : aint;reg : tregister);override;
        procedure a_load_const_ref(list : taasmoutput; tosize: tcgsize; a : aint;const ref : treference);override;
@ -160,8 +155,8 @@ unit cgx86;
       fmodule;

    const
-      TOpCG2AsmOp: Array[topcg] of TAsmOp = (A_NONE,A_ADD,A_AND,A_DIV,
-                            A_IDIV,A_MUL, A_IMUL, A_NEG,A_NOT,A_OR,
+      TOpCG2AsmOp: Array[topcg] of TAsmOp = (A_NONE,A_MOV,A_ADD,A_AND,A_DIV,
+                            A_IDIV,A_IMUL,A_MUL,A_NEG,A_NOT,A_OR,
                            A_SAR,A_SHL,A_SHR,A_SUB,A_XOR);

      TOpCmp2AsmCond: Array[topcmp] of TAsmCond = (C_NONE,
@ -572,7 +567,7 @@ unit cgx86;
        sym : tasmsymbol;
        r : treference;
      begin
- 
+
        if (target_info.system <> system_i386_darwin) then
          begin
            sym:=objectlibrary.newasmsymbol(s,AB_EXTERNAL,AT_FUNCTION);
@ -984,10 +979,10 @@ unit cgx86;
        opmm2asmop : array[0..1,OS_F32..OS_F64,topcg] of tasmop = (
          ( { scalar }
            ( { OS_F32 }
-              A_NOP,A_ADDSS,A_NOP,A_DIVSS,A_NOP,A_NOP,A_MULSS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSS,A_NOP
+              A_NOP,A_NOP,A_ADDSS,A_NOP,A_DIVSS,A_NOP,A_NOP,A_MULSS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSS,A_NOP
            ),
            ( { OS_F64 }
-              A_NOP,A_ADDSD,A_NOP,A_DIVSD,A_NOP,A_NOP,A_MULSD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSD,A_NOP
+              A_NOP,A_NOP,A_ADDSD,A_NOP,A_DIVSD,A_NOP,A_NOP,A_MULSD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBSD,A_NOP
            )
          ),
          ( { vectorized/packed }
@ -995,10 +990,10 @@ unit cgx86;
              these
            }
            ( { OS_F32 }
-              A_NOP,A_ADDPS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_XORPS
+              A_NOP,A_NOP,A_ADDPS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_XORPS
            ),
            ( { OS_F64 }
-              A_NOP,A_ADDPD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_XORPD
+              A_NOP,A_NOP,A_ADDPD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_XORPD
            )
          )
        );
@ -1062,9 +1057,11 @@ unit cgx86;
        tmpreg : tregister;
 {$endif x86_64}
      begin
+        optimize_op_const(op, a);
 {$ifdef x86_64}
        { x86_64 only supports signed 32 bits constants directly }
-        if (size in [OS_S64,OS_64]) and
+        if not(op in [OP_NONE,OP_MOVE) and
+           (size in [OS_S64,OS_64]) and
            ((a<low(longint)) or (a>high(longint))) then
          begin
            tmpreg:=getintregister(list,size);
@ -1075,6 +1072,15 @@ unit cgx86;
 {$endif x86_64}
        check_register_size(size,reg);
        case op of
+          OP_NONE :
+            begin
+              { Opcode is optimized away }
+            end;
+          OP_MOVE :
+            begin
+              { Optimized, replaced with a simple load }
+              a_load_const_reg(list,size,a,reg);
+            end;
          OP_DIV, OP_IDIV:
            begin
              if ispowerof2(int64(a),power) then
@ -1155,11 +1161,13 @@ unit cgx86;
 {$endif x86_64}
        tmpref  : treference;
      begin
+        optimize_op_const(op, a);
        tmpref:=ref;
        make_simple_ref(list,tmpref);
 {$ifdef x86_64}
        { x86_64 only supports signed 32 bits constants directly }
-        if (size in [OS_S64,OS_64]) and
+        if not(op in [OP_NONE,OP_MOVE) and
+           (size in [OS_S64,OS_64]) and
            ((a<low(longint)) or (a>high(longint))) then
          begin
            tmpreg:=getintregister(list,size);
@ -1169,6 +1177,15 @@ unit cgx86;
          end;
 {$endif x86_64}
        Case Op of
+          OP_NONE :
+            begin
+              { Opcode is optimized away }
+            end;
+          OP_MOVE :
+            begin
+              { Optimized, replaced with a simple load }
+              a_load_const_ref(list,size,a,ref);
+            end;
          OP_DIV, OP_IDIV:
            Begin
              if ispowerof2(int64(a),power) then
@ -1266,10 +1283,11 @@ unit cgx86;
            internalerror(200109233);
          OP_SHR,OP_SHL,OP_SAR:
            begin
-              getcpuregister(list,NR_CL);
-              a_load_reg_reg(list,OS_8,OS_8,makeregsize(list,src,OS_8),NR_CL);
-              list.concat(taicpu.op_reg_reg(Topcg2asmop[op],tcgsize2opsize[size],NR_CL,src));
-              ungetcpuregister(list,NR_CL);
+              { Use ecx to load the value, that allows beter coalescing }
+              getcpuregister(list,NR_ECX);
+              a_load_reg_reg(list,size,OS_32,src,NR_ECX);
+              list.concat(taicpu.op_reg_reg(Topcg2asmop[op],tcgsize2opsize[size],NR_CL,dst));
+              ungetcpuregister(list,NR_ECX);
            end;
          else
            begin
@ -1338,98 +1356,6 @@ unit cgx86;
      end;


-    procedure tcgx86.a_op_const_reg_reg(list: taasmoutput; op: TOpCg; size: tcgsize; a: aint; src, dst: tregister);
-      var
-        tmpref: treference;
-        power: longint;
-{$ifdef x86_64}
-        tmpreg : tregister;
-{$endif x86_64}
-      begin
-{$ifdef x86_64}
-        { x86_64 only supports signed 32 bits constants directly }
-        if (size in [OS_S64,OS_64]) and
-            ((a<low(longint)) or (a>high(longint))) then
-          begin
-            tmpreg:=getintregister(list,size);
-            a_load_const_reg(list,size,a,tmpreg);
-            a_op_reg_reg_reg(list,op,size,tmpreg,src,dst);
-            exit;
-          end;
-{$endif x86_64}
-        check_register_size(size,src);
-        check_register_size(size,dst);
-        if tcgsize2size[size]<>tcgsize2size[OS_INT] then
-          begin
-            inherited a_op_const_reg_reg(list,op,size,a,src,dst);
-            exit;
-          end;
-        { if we get here, we have to do a 32 bit calculation, guaranteed }
-        case op of
-          OP_DIV, OP_IDIV, OP_MUL, OP_AND, OP_OR, OP_XOR, OP_SHL, OP_SHR,
-          OP_SAR:
-            { can't do anything special for these }
-            inherited a_op_const_reg_reg(list,op,size,a,src,dst);
-          OP_IMUL:
-            begin
-              if not(cs_check_overflow in aktlocalswitches) and
-                 ispowerof2(int64(a),power) then
-                { can be done with a shift }
-                begin
-                  inherited a_op_const_reg_reg(list,op,size,a,src,dst);
-                  exit;
-                end;
-              list.concat(taicpu.op_const_reg_reg(A_IMUL,tcgsize2opsize[size],a,src,dst));
-            end;
-          OP_ADD, OP_SUB:
-            if (a = 0) then
-              a_load_reg_reg(list,size,size,src,dst)
-            else
-              begin
-                reference_reset(tmpref);
-                tmpref.base := src;
-                tmpref.offset := longint(a);
-                if op = OP_SUB then
-                  tmpref.offset := -tmpref.offset;
-                list.concat(taicpu.op_ref_reg(A_LEA,tcgsize2opsize[size],tmpref,dst));
-              end
-          else internalerror(200112302);
-        end;
-      end;
-
-
-    procedure tcgx86.a_op_reg_reg_reg(list: taasmoutput; op: TOpCg;size: tcgsize; src1, src2, dst: tregister);
-      var
-        tmpref: treference;
-      begin
-        check_register_size(size,src1);
-        check_register_size(size,src2);
-        check_register_size(size,dst);
-        if tcgsize2size[size]<>tcgsize2size[OS_INT] then
-          begin
-            inherited a_op_reg_reg_reg(list,op,size,src1,src2,dst);
-            exit;
-          end;
-        { if we get here, we have to do a 32 bit calculation, guaranteed }
-        Case Op of
-          OP_DIV, OP_IDIV, OP_MUL, OP_AND, OP_OR, OP_XOR, OP_SHL, OP_SHR,
-          OP_SAR,OP_SUB,OP_NOT,OP_NEG:
-            { can't do anything special for these }
-            inherited a_op_reg_reg_reg(list,op,size,src1,src2,dst);
-          OP_IMUL:
-            list.concat(taicpu.op_reg_reg_reg(A_IMUL,tcgsize2opsize[size],src1,src2,dst));
-          OP_ADD:
-            begin
-              reference_reset(tmpref);
-              tmpref.base := src1;
-              tmpref.index := src2;
-              tmpref.scalefactor := 1;
-              list.concat(taicpu.op_ref_reg(A_LEA,tcgsize2opsize[size],tmpref,dst));
-            end
-          else internalerror(200112303);
-        end;
-      end;
-
 {*************** compare instructructions ****************}

    procedure tcgx86.a_cmp_const_reg_label(list : taasmoutput;size : tcgsize;cmp_op : topcmp;a : aint;reg : tregister;
--- a/compiler/x86/nx86add.pas
+++ b/compiler/x86/nx86add.pas
@ -978,65 +978,17 @@ unit nx86add;
 *****************************************************************************}

    procedure tx86addnode.second_addordinal;
-      var
-         mboverflow : boolean;
-         op : tasmop;
-         opsize : tcgsize;
-         { true, if unsigned types are compared }
-         unsigned : boolean;
-         { true, if for sets subtractions the extra not should generated }
-         extra_not : boolean;
      begin
-         { defaults }
-         extra_not:=false;
-         mboverflow:=false;
-         unsigned:=not(is_signed(left.resulttype.def)) or
-                   not(is_signed(right.resulttype.def));
-         opsize:=def_cgsize(left.resulttype.def);
-
-         pass_left_right;
-
-         case nodetype of
-           addn :
-             begin
-               op:=A_ADD;
-               mboverflow:=true;
-             end;
-           muln :
-             begin
-               if unsigned then
-                 op:=A_MUL
-               else
-                 op:=A_IMUL;
-               mboverflow:=true;
-             end;
-           subn :
-             begin
-               op:=A_SUB;
-               mboverflow:=true;
-             end;
-           xorn :
-             op:=A_XOR;
-           orn :
-             op:=A_OR;
-           andn :
-             op:=A_AND;
-           else
-             internalerror(200304229);
-         end;
-
-         { filter MUL, which requires special handling }
-         if op=A_MUL then
+         { filter unsigned MUL opcode, which requires special handling }
+         if (nodetype=muln) and
+            (not(is_signed(left.resulttype.def)) or
+             not(is_signed(right.resulttype.def))) then
           begin
             second_mul;
             exit;
           end;

-         left_must_be_reg(opsize,false);
-         emit_generic_code(op,opsize,unsigned,extra_not,mboverflow);
-         location_freetemp(exprasmlist,right.location);
-
-         set_result_location_reg;
+         inherited second_addordinal;
      end;