* made multiplying, dividing and mod'ing of int64 and qword processor

independent with compilerprocs (+ small optimizations by using shift/and where possible)
2025-12-10 00:07:23 +01:00 · 2001-09-05 15:22:09 +00:00 · 2001-09-05 15:22:09 +00:00 · a39cd8a580
commit a39cd8a580
parent b81a4bb773
6 changed files with 181 additions and 124 deletions
--- a/compiler/i386/n386add.pas
+++ b/compiler/i386/n386add.pas
@ -1235,54 +1235,8 @@ interface

                   if nodetype=muln then
                     begin
-                        regstopush := $ff;
-                        remove_non_regvars_from_loc(location,regstopush);
-                        remove_non_regvars_from_loc(right.location,regstopush);
-                        
-                        { ugly hack because in *this* case, the pushed register }
-                        { must not be allocated later on (JM)                   }
-                        unusedregisters:=unused;
-                        usablecount:=usablereg32;
-                        pushusedregisters(pushedreg,regstopush);
-                        unused:=unusedregisters;
-                        usablereg32:=usablecount;
-                        
-                        if cs_check_overflow in aktlocalswitches then
-                          push_int(1)
-                        else
-                          push_int(0);
-                        { the left operand is in hloc, because the
-                          location of left is location but location
-                          is already destroyed
-                          
-                          not anymore... I had to change this because the
-                          regalloc info was completely wrong otherwise (JM)
-                        }
-                        emit_pushq_loc(location);
-                        release_qword_loc(location);
-                        clear_location(location);
-                        emit_pushq_loc(right.location);
-                        release_qword_loc(right.location);
-                        saveregvars($ff);
-                        if torddef(resulttype.def).typ=u64bit then
-                          emitcall('FPC_MUL_QWORD')
-                        else
-                          emitcall('FPC_MUL_INT64');
-                        { make sure we don't overwrite any results (JM) }
-                        if R_EDX in unused then
-                          begin
-                             location.registerhigh:=getexplicitregister32(R_EDX);
-                             location.registerlow:=getexplicitregister32(R_EAX);
-                          end
-                        else
-                          begin
-                             location.registerlow:=getexplicitregister32(R_EAX);
-                             location.registerhigh:=getexplicitregister32(R_EDX);
-                          end;
-                        location.loc := LOC_REGISTER;
-                        emit_reg_reg(A_MOV,S_L,R_EAX,location.registerlow);
-                        emit_reg_reg(A_MOV,S_L,R_EDX,location.registerhigh);
-                        popusedregisters(pushedreg);
+                       { should be handled in pass_1 (JM) }
+                       internalerror(200109051);
                     end
                   else
                     begin
@ -1890,7 +1844,12 @@ begin
 end.
 {
  $Log$
-  Revision 1.22  2001-09-04 11:38:55  jonas
+  Revision 1.23  2001-09-05 15:22:09  jonas
+    * made multiplying, dividing and mod'ing of int64 and qword processor
+      independent with compilerprocs (+ small optimizations by using shift/and
+      where possible)
+
+  Revision 1.22  2001/09/04 11:38:55  jonas
    + searchsystype() and searchsystype() functions in symtable
    * changed ninl and nadd to use these functions
    * i386 set comparison functions now return their results in al instead
--- a/compiler/i386/n386mat.pas
+++ b/compiler/i386/n386mat.pas
@ -88,50 +88,8 @@ implementation

         if is_64bitint(resulttype.def) then
           begin
-              regstopush := $ff;
-              remove_non_regvars_from_loc(location,regstopush);
-              remove_non_regvars_from_loc(right.location,regstopush);
-              
-              { ugly hack because in *this* case, the pushed register }
-              { must not be allocated later on (JM)                   }
-              unusedregisters:=unused;
-              usablecount:=usablereg32;
-              pushusedregisters(pushedreg,regstopush);
-              unused:=unusedregisters;
-              usablereg32:=usablecount;
-              
-              emit_pushq_loc(location);
-              release_qword_loc(location);
-              clear_location(location);
-              emit_pushq_loc(right.location);
-              release_qword_loc(right.location);
-              if torddef(resulttype.def).typ=u64bit then
-                typename:='QWORD'
-              else
-                typename:='INT64';
-              if nodetype=divn then
-                opname:='DIV_'
-              else
-                opname:='MOD_';
-              saveregvars($ff);
-              emitcall('FPC_'+opname+typename);
-
-              { make sure we don't overwrite any results (JM) }
-              if R_EDX in unused then
-                begin
-                   location.registerhigh:=getexplicitregister32(R_EDX);
-                   location.registerlow:=getexplicitregister32(R_EAX);
-                end
-              else
-                begin
-                   location.registerlow:=getexplicitregister32(R_EAX);
-                   location.registerhigh:=getexplicitregister32(R_EDX);
-                end;
-              location.loc:=LOC_REGISTER;
-              emit_reg_reg(A_MOV,S_L,R_EAX,location.registerlow);
-              emit_reg_reg(A_MOV,S_L,R_EDX,location.registerhigh);
-
-              popusedregisters(pushedreg);
+             { should be handled in pass_1 (JM) }
+             internalerror(200109052);
           end
         else
           begin
@ -1058,7 +1016,12 @@ begin
 end.
 {
  $Log$
-  Revision 1.15  2001-08-29 12:03:23  jonas
+  Revision 1.16  2001-09-05 15:22:10  jonas
+    * made multiplying, dividing and mod'ing of int64 and qword processor
+      independent with compilerprocs (+ small optimizations by using shift/and
+      where possible)
+
+  Revision 1.15  2001/08/29 12:03:23  jonas
    * fixed wrong regalloc info around FPC_MUL/DIV/MOD_INT64/QWORD calls
    * fixed partial result overwriting with the above calls too

--- a/compiler/nadd.pas
+++ b/compiler/nadd.pas
@ -39,6 +39,9 @@ interface
          { parts explicitely in the code generator (JM)    }
          function first_addstring: tnode; virtual;
          function first_addset: tnode; virtual;
+          { only implements "muln" nodes, the rest always has to be done in }
+          { the code generator for performance reasons (JM)                 }
+          function first_add64bitint: tnode; virtual;
       end;
       taddnodeclass = class of taddnode;

@ -1265,6 +1268,53 @@ implementation
      end;


+    function taddnode.first_add64bitint: tnode;
+      var
+        procname: string[31];
+        temp: tnode;
+        power: longint;
+      begin
+        result := nil;
+        { create helper calls mul }
+        if nodetype <> muln then
+          exit;
+        
+        { make sure that if there is a constant, that it's on the right }
+        if left.nodetype = ordconstn then
+          begin
+            temp := right;
+            right := left;
+            left := temp;
+          end;
+
+        { can we use a shift instead of a mul? }
+        if (right.nodetype = ordconstn) and
+           ispowerof2(tordconstnode(right).value,power) then
+          begin
+            tordconstnode(right).value := power;
+            result := cshlshrnode.create(shln,left,right);
+            { left and right are reused }
+            left := nil;
+            right := nil;
+            { return firstpassed new node }
+            firstpass(result);
+            exit;
+          end;
+
+        { otherwise, create the parameters for the helper }
+        right := ccallparanode.create(
+          cordconstnode.create(ord(cs_check_overflow in aktlocalswitches),booltype),
+          ccallparanode.create(right,ccallparanode.create(left,nil)));
+        left := nil;
+        if torddef(resulttype.def).typ = s64bit then
+          procname := 'fpc_mul_int64'
+        else
+          procname := 'fpc_mul_qword';
+        result := ccallnode.createintern(procname,right);
+        right := nil;
+        firstpass(result);
+      end;
+
    function taddnode.pass_1 : tnode;
      var
         hp      : tnode;
@ -1329,7 +1379,12 @@ implementation
               end
              { is there a 64 bit type ? }
             else if (torddef(ld).typ in [s64bit,u64bit]) then
-               calcregisters(self,2,0,0)
+               begin
+                 result := first_add64bitint;
+                 if assigned(result) then
+                   exit;
+                 calcregisters(self,2,0,0)
+               end
             { is there a cardinal? }
             else if (torddef(ld).typ=u32bit) then
               begin
@ -1527,7 +1582,12 @@ begin
 end.
 {
  $Log$
-  Revision 1.38  2001-09-04 11:38:54  jonas
+  Revision 1.39  2001-09-05 15:22:09  jonas
+    * made multiplying, dividing and mod'ing of int64 and qword processor
+      independent with compilerprocs (+ small optimizations by using shift/and
+      where possible)
+
+  Revision 1.38  2001/09/04 11:38:54  jonas
    + searchsystype() and searchsystype() functions in symtable
    * changed ninl and nadd to use these functions
    * i386 set comparison functions now return their results in al instead
--- a/compiler/nmat.pas
+++ b/compiler/nmat.pas
@ -33,6 +33,10 @@ interface
       tmoddivnode = class(tbinopnode)
          function pass_1 : tnode;override;
          function det_resulttype:tnode;override;
+         protected
+          { override the following if you want to implement }
+          { parts explicitely in the code generator (JM)    }
+          function first_moddiv64bitint: tnode; virtual;
       end;
       tmoddivnodeclass = class of tmoddivnode;

@ -67,14 +71,14 @@ implementation

    uses
      systems,tokens,
-      verbose,globals,
+      verbose,globals,cutils,
 {$ifdef support_mmx}
      globtype,
 {$endif}
      symconst,symtype,symtable,symdef,types,
      htypechk,pass_1,cpubase,cpuinfo,
      cgbase,
-      ncon,ncnv,ncal;
+      ncon,ncnv,ncal,nadd;

 {****************************************************************************
                              TMODDIVNODE
@ -188,6 +192,52 @@ implementation
      end;


+    function tmoddivnode.first_moddiv64bitint: tnode;
+      var
+        procname: string[31];
+        power: longint;
+      begin
+        result := nil;
+        
+        { divide/mod an unsigned number by a constant which is a power of 2? }
+        if (right.nodetype = ordconstn) and
+           not is_signed(resulttype.def) and
+           ispowerof2(tordconstnode(right).value,power) then
+          begin
+            if nodetype = divn then
+              begin
+                tordconstnode(right).value := power;
+                result := cshlshrnode.create(shrn,left,right)
+              end
+            else
+              begin
+                dec(tordconstnode(right).value);
+                result := caddnode.create(andn,left,right);
+              end;
+            { left and right are reused }
+            left := nil;
+            right := nil;
+            firstpass(result);
+            exit;
+          end;
+          
+        { otherwise create a call to a helper }
+        if nodetype = divn then
+          procname := 'fpc_div_'
+        else
+          procname := 'fpc_mod_';
+        if is_signed(resulttype.def) then
+          procname := procname + 'int64'
+        else
+          procname := procname + 'qword';
+
+        result := ccallnode.createintern(procname,ccallparanode.create(left,
+          ccallparanode.create(right,nil)));
+        left := nil;
+        right := nil;
+        firstpass(result);
+      end;
+
    function tmoddivnode.pass_1 : tnode;
      begin
         result:=nil;
@ -200,6 +250,9 @@ implementation
         if (left.resulttype.def.deftype=orddef) and (right.resulttype.def.deftype=orddef) and
            (is_64bitint(left.resulttype.def) or is_64bitint(right.resulttype.def)) then
           begin
+             result := first_moddiv64bitint;
+             if assigned(result) then
+               exit;
             calcregisters(self,2,0,0);
           end
         else
@ -587,7 +640,12 @@ begin
 end.
 {
  $Log$
-  Revision 1.22  2001-09-02 21:12:07  peter
+  Revision 1.23  2001-09-05 15:22:09  jonas
+    * made multiplying, dividing and mod'ing of int64 and qword processor
+      independent with compilerprocs (+ small optimizations by using shift/and
+      where possible)
+
+  Revision 1.22  2001/09/02 21:12:07  peter
    * move class of definitions into type section for delphi

  Revision 1.21  2001/08/26 13:36:41  florian
--- a/rtl/inc/compproc.inc
+++ b/rtl/inc/compproc.inc
@ -157,6 +157,13 @@ Function fpc_val_qword_shortstr(Const S: ShortString; var Code: ValSInt): QWord;
 procedure fpc_widestr_qword(v : qword;len : longint;var s : widestring); compilerproc;
 procedure fpc_widestr_int64(v : int64;len : longint;var s : widestring); compilerproc;

+function fpc_div_qword(n,z : qword) : qword; compilerproc;
+function fpc_mod_qword(n,z : qword) : qword; compilerproc;
+function fpc_div_int64(n,z : int64) : int64; compilerproc;
+function fpc_mod_int64(n,z : int64) : int64; compilerproc;
+function fpc_mul_qword(f1,f2 : qword;checkoverflow : longbool) : qword; compilerproc;
+function fpc_mul_int64(f1,f2 : int64;checkoverflow : longbool) : int64; compilerproc;
+
 function fpc_do_is(aclass : tclass;aobject : tobject) : boolean; compilerproc;
 procedure fpc_do_as(aclass : tclass;aobject : tobject); compilerproc;
 procedure fpc_intf_decr_ref(var i: pointer); compilerproc;
@ -240,7 +247,12 @@ Procedure fpc_typed_read(TypeSize : Longint;var f : TypedFile;var Buf); compiler

 {
  $Log$
-  Revision 1.9  2001-09-04 11:38:55  jonas
+  Revision 1.10  2001-09-05 15:22:09  jonas
+    * made multiplying, dividing and mod'ing of int64 and qword processor
+      independent with compilerprocs (+ small optimizations by using shift/and
+      where possible)
+
+  Revision 1.9  2001/09/04 11:38:55  jonas
    + searchsystype() and searchsystype() functions in symtable
    * changed ninl and nadd to use these functions
    * i386 set comparison functions now return their results in al instead
--- a/rtl/inc/int64.inc
+++ b/rtl/inc/int64.inc
@ -50,14 +50,14 @@
         count_leading_zeros:=r;
      end;

-    function divqword(n,z : qword) : qword;[public,alias: 'FPC_DIV_QWORD'];
+    function fpc_div_qword(n,z : qword) : qword;[public,alias: 'FPC_DIV_QWORD']; {$ifdef hascompilerproc} compilerproc; {$endif}

      var
         shift,lzz,lzn : longint;
         { one : qword; }

      begin
-         divqword:=0;
+         fpc_div_qword:=0;
         if n=0 then
           HandleErrorFrame(200,get_frame);
         lzz:=count_leading_zeros(z);
@ -73,20 +73,20 @@
           if z>=n then
             begin
                z:=z-n;
-                divqword:=divqword+(qword(1) shl shift);
+                fpc_div_qword:=fpc_div_qword+(qword(1) shl shift);
             end;
           dec(shift);
           n:=n shr 1;
         until shift<0;
      end;

-    function modqword(n,z : qword) : qword;[public,alias: 'FPC_MOD_QWORD'];
+    function fpc_mod_qword(n,z : qword) : qword;[public,alias: 'FPC_MOD_QWORD']; {$ifdef hascompilerproc} compilerproc; {$endif}

      var
         shift,lzz,lzn : longint;

      begin
-         modqword:=0;
+         fpc_mod_qword:=0;
         if n=0 then
           HandleErrorFrame(200,get_frame);
         lzz:=count_leading_zeros(z);
@ -96,7 +96,7 @@
         { the d is greater than the n            }
         if lzn<lzz then
           begin
-              modqword:=z;
+              fpc_mod_qword:=z;
              exit;
           end;
         shift:=lzn-lzz;
@ -107,10 +107,10 @@
           dec(shift);
           n:=n shr 1;
         until shift<0;
-         modqword:=z;
+         fpc_mod_qword:=z;
      end;

-    function divint64(n,z : int64) : int64;[public,alias: 'FPC_DIV_INT64'];
+    function fpc_div_int64(n,z : int64) : int64;[public,alias: 'FPC_DIV_INT64']; {$ifdef hascompilerproc} compilerproc; {$endif}

      var
         sign : boolean;
@ -128,7 +128,7 @@
           begin
              // the c:=comp(...) is necessary to shut up the compiler
              c:=comp(comp(z)/comp(n));
-              divint64:=qword(c);
+              fpc_div_int64:=qword(c);
           end
         else
 {$endif}
@ -151,13 +151,13 @@

              { the div is coded by the compiler as call to divqword }
              if sign then
-                divint64:=-(q1 div q2)
+                fpc_div_int64:=-(q1 div q2)
              else
-                divint64:=q1 div q2;
+                fpc_div_int64:=q1 div q2;
           end;
      end;

-    function modint64(n,z : int64) : int64;[public,alias: 'FPC_MOD_INT64'];
+    function fpc_mod_int64(n,z : int64) : int64;[public,alias: 'FPC_MOD_INT64']; {$ifdef hascompilerproc} compilerproc; {$endif}

      var
         signed : boolean;
@ -185,15 +185,15 @@
           zq:=z;
         r:=zq mod nq;
         if signed then
-           modint64:=-int64(r)
+           fpc_mod_int64:=-int64(r)
         else
-           modint64:=r;
+           fpc_mod_int64:=r;
      end;

    { multiplies two qwords
      the longbool for checkoverflow avoids a misaligned stack
    }
-    function mulqword(f1,f2 : qword;checkoverflow : longbool) : qword;[public,alias: 'FPC_MUL_QWORD'];
+    function fpc_mul_qword(f1,f2 : qword;checkoverflow : longbool) : qword;[public,alias: 'FPC_MUL_QWORD']; {$ifdef hascompilerproc} compilerproc; {$endif}

      var
         _f1,bitpos : qword;
@ -226,12 +226,12 @@
                 movl %eax,r
                 movl %edx,r+4
              end;
-              mulqword:=r;
+              fpc_mul_qword:=r;
           end
         else
 {$endif i386}
           begin
-              mulqword:=0;
+              fpc_mul_qword:=0;
              bitpos:=1;

              // store f1 for overflow checking
@ -240,7 +240,7 @@
              for l:=0 to 63 do
                begin
                   if (f2 and bitpos)<>0 then
-                     mulqword:=mulqword+f1;
+                     fpc_mul_qword:=fpc_mul_qword+f1;

                   f1:=f1 shl 1;
                   bitpos:=bitpos shl 1;
@ -249,7 +249,7 @@
              { if one of the operands is greater than the result an }
              { overflow occurs                                      }
              if checkoverflow and (_f1 <> 0) and (f2 <>0) and
-                 ((_f1>mulqword) or (f2>mulqword)) then
+                 ((_f1>fpc_mul_qword) or (f2>fpc_mul_qword)) then
                HandleErrorFrame(215,get_frame);
           end;
      end;
@ -261,7 +261,7 @@
         ... using the comp multiplication
       the longbool for checkoverflow avoids a misaligned stack
     }
-    function mulint64(f1,f2 : int64;checkoverflow : longbool) : int64;[public,alias: 'FPC_MUL_INT64'];
+    function fpc_mul_int64(f1,f2 : int64;checkoverflow : longbool) : int64;[public,alias: 'FPC_MUL_INT64']; {$ifdef hascompilerproc} compilerproc; {$endif}

      var
         sign : boolean;
@ -277,7 +277,7 @@
           begin
              // the c:=comp(...) is necessary to shut up the compiler
              c:=comp(comp(f1)*comp(f2));
-              mulint64:=int64(c);
+              fpc_mul_int64:=int64(c);
           end
         else
 {$endif}
@ -310,9 +310,9 @@
                HandleErrorFrame(215,get_frame);

              if sign then
-                mulint64:=-q3
+                fpc_mul_int64:=-q3
              else
-                mulint64:=q3;
+                fpc_mul_int64:=q3;
           end;
      end;

@ -508,7 +508,12 @@

 {
  $Log$
-  Revision 1.11  2001-08-13 12:40:16  jonas
+  Revision 1.12  2001-09-05 15:22:09  jonas
+    * made multiplying, dividing and mod'ing of int64 and qword processor
+      independent with compilerprocs (+ small optimizations by using shift/and
+      where possible)
+
+  Revision 1.11  2001/08/13 12:40:16  jonas
    * renamed some str(x,y) and val(x,y) helpers so the naming scheme is the
      same for all string types
    + added the str(x,y) and val(x,y,z) helpers for int64/qword to