* implementation of 32x32->64 multiplication for i386 based on patch

by Sergei Gorelkin git-svn-id: trunk@12028 -
2025-11-03 20:49:35 +01:00 · 2008-11-06 21:03:10 +00:00 · 2008-11-06 21:03:10 +00:00 · 53e52ac6a9
commit 53e52ac6a9
parent 84112032c3
3 changed files with 79 additions and 34 deletions
--- a/compiler/i386/n386add.pas
+++ b/compiler/i386/n386add.pas
@ -30,9 +30,11 @@ interface

    type
       ti386addnode = class(tx86addnode)
+         function use_generic_mul32to64: boolean; override;
+         procedure second_addordinal; override;
         procedure second_add64bit;override;
         procedure second_cmp64bit;override;
-         procedure second_mul;override;
+         procedure second_mul(unsigned: boolean);
       end;

  implementation
@ -46,6 +48,29 @@ interface
      ncon,nset,cgutils,tgobj,
      cga,ncgutil,cgobj,cg64f32,cgx86;

+{*****************************************************************************
+                                use_generic_mul32to64
+*****************************************************************************}
+
+    function ti386addnode.use_generic_mul32to64: boolean;
+    begin
+      result := False;
+    end;
+
+    { handles all unsigned multiplications, and 32->64 bit signed ones.
+      32bit-only signed mul is handled by generic codegen }
+    procedure ti386addnode.second_addordinal;
+    var
+      unsigned: boolean;
+    begin
+      unsigned:=not(is_signed(left.resultdef)) or
+                not(is_signed(right.resultdef));
+      if (nodetype=muln) and (unsigned or is_64bit(resultdef)) then
+        second_mul(unsigned)
+      else
+        inherited second_addordinal;
+    end;
+
 {*****************************************************************************
                                Add64bit
 *****************************************************************************}
@ -343,20 +368,24 @@ interface
                                x86 MUL
 *****************************************************************************}

-    procedure ti386addnode.second_mul;
+    procedure ti386addnode.second_mul(unsigned: boolean);

    var reg:Tregister;
        ref:Treference;
        use_ref:boolean;
        hl4 : tasmlabel;

+    const
+      asmops: array[boolean] of tasmop = (A_IMUL, A_MUL);
+
    begin
      pass_left_right;

      {The location.register will be filled in later (JM)}
      location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
      { Mul supports registers and references, so if not register/reference,
-        load the location into a register}
+        load the location into a register.
+        The variant of IMUL which is capable of doing 32->64 bits has the same restrictions. }
      use_ref:=false;
      if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
        reg:=left.location.register
@ -379,22 +408,36 @@ interface
      {Also allocate EDX, since it is also modified by a mul (JM).}
      cg.getcpuregister(current_asmdata.CurrAsmList,NR_EDX);
      if use_ref then
-        emit_ref(A_MUL,S_L,ref)
+        emit_ref(asmops[unsigned],S_L,ref)
      else
-        emit_reg(A_MUL,S_L,reg);
-      if cs_check_overflow in current_settings.localswitches  then
-       begin
-         current_asmdata.getjumplabel(hl4);
-         cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4);
-         cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
-         cg.a_label(current_asmdata.CurrAsmList,hl4);
-       end;
+        emit_reg(asmops[unsigned],S_L,reg);
+      if (cs_check_overflow in current_settings.localswitches) and
+        { 32->64 bit cannot overflow }
+        (not is_64bit(resultdef)) then
+        begin
+          current_asmdata.getjumplabel(hl4);
+          cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4);
+          cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
+          cg.a_label(current_asmdata.CurrAsmList,hl4);
+        end;
      {Free EAX,EDX}
      cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EDX);
-      cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
-      {Allocate a new register and store the result in EAX in it.}
-      location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
-      cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_EAX,location.register);
+      if is_64bit(resultdef) then
+      begin
+        {Allocate a couple of registers and store EDX:EAX into it}
+        location.register64.reghi := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
+        cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, NR_EDX, location.register64.reghi);
+        cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
+        location.register64.reglo := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
+        cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, NR_EAX, location.register64.reglo);
+      end
+      else
+      begin
+        {Allocate a new register and store the result in EAX in it.}
+        location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
+        cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
+        cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_EAX,location.register);
+      end;
      location_freetemp(current_asmdata.CurrAsmList,left.location);
      location_freetemp(current_asmdata.CurrAsmList,right.location);
    end;
--- a/compiler/x86/nx86add.pas
+++ b/compiler/x86/nx86add.pas
@ -41,12 +41,10 @@ unit nx86add;

        procedure second_cmpfloatsse;
        procedure second_addfloatsse;
-        procedure second_mul;virtual;abstract;
      public
        procedure second_addfloat;override;
        procedure second_addsmallset;override;
        procedure second_add64bit;override;
-        procedure second_addordinal;override;
        procedure second_cmpfloat;override;
        procedure second_cmpsmallset;override;
        procedure second_cmp64bit;override;
@ -993,21 +991,6 @@ unit nx86add;
                                  AddOrdinal
 *****************************************************************************}

-    procedure tx86addnode.second_addordinal;
-      begin
-         { filter unsigned MUL opcode, which requires special handling }
-         if (nodetype=muln) and
-            (not(is_signed(left.resultdef)) or
-             not(is_signed(right.resultdef))) then
-           begin
-             second_mul;
-             exit;
-           end;
-
-         inherited second_addordinal;
-      end;
-
-
    procedure tx86addnode.second_cmpordinal;
      var
         opsize : tcgsize;
--- a/compiler/x86_64/nx64add.pas
+++ b/compiler/x86_64/nx64add.pas
@ -30,7 +30,8 @@ interface

    type
       tx8664addnode = class(tx86addnode)
-          procedure second_mul;override;
+          procedure second_addordinal; override;
+          procedure second_mul;
       end;

  implementation
@ -42,6 +43,24 @@ interface
      cgbase,cgutils,cga,cgobj,
      tgobj;

+{*****************************************************************************
+                                Addordinal
+*****************************************************************************}
+
+    procedure tx8664addnode.second_addordinal;
+    begin
+      { filter unsigned MUL opcode, which requires special handling }
+      if (nodetype=muln) and
+        (not(is_signed(left.resultdef)) or
+         not(is_signed(right.resultdef))) then
+      begin
+        second_mul;
+        exit;
+      end;
+
+      inherited second_addordinal;
+    end;
+
 {*****************************************************************************
                                MUL
 *****************************************************************************}