+ Support (as target-independent as possible) optimization of division by constants:

The code generator gets two new methods, a_mul_reg_reg_pair and g_div_const_reg_reg. The first one is basically 32x32 to 64 bits multiplication (or any other size, with result having twice the size of arguments), which must be implemented for every target. The second one actually does the job, its default implementation taken from powerpc64 and is sufficiently good for all three-address targets. + Enabled optimized division for MIPS target, target-specific changes are under 30 lines. git-svn-id: trunk@27904 -
2025-04-08 09:28:19 +02:00 · 2014-06-08 22:50:24 +00:00 · 2014-06-08 22:50:24 +00:00 · cd27d64cd5
commit cd27d64cd5
parent 73d7f2aa18
3 changed files with 103 additions and 9 deletions
--- a/compiler/cgobj.pas
+++ b/compiler/cgobj.pas
@ -249,6 +249,10 @@ unit cgobj;
          { bit scan instructions }
          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tcgsize; src, dst: TRegister); virtual; abstract;

+          { Multiplication with doubling result size.
+            dstlo or dsthi may be NR_NO, in which case corresponding half of result is discarded. }
+          procedure a_mul_reg_reg_pair(list: TAsmList; size: tcgsize; src1,src2,dstlo,dsthi: TRegister);virtual;
+
          { fpu move instructions }
          procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize:tcgsize; reg1, reg2: tregister); virtual; abstract;
          procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); virtual; abstract;
@ -458,6 +462,9 @@ unit cgobj;
          { Generate code to exit an unwind-protected region. The default implementation
            produces a simple jump to destination label. }
          procedure g_local_unwind(list: TAsmList; l: TAsmLabel);virtual;
+          { Generate code for integer division by constant,
+            generic version is suitable for 3-address CPUs }
+          procedure g_div_const_reg_reg(list:tasmlist; size: TCgSize; a: tcgint; src,dst: tregister); virtual;

         protected
          function g_indirect_sym_load(list:TAsmList;const symname: string; const flags: tindsymflags): tregister;virtual;
@ -2501,6 +2508,69 @@ implementation
        internalerror(200807238);
      end;

+
+    procedure tcg.a_mul_reg_reg_pair(list: TAsmList; size: TCgSize; src1,src2,dstlo,dsthi: TRegister);
+      begin
+        internalerror(2014060801);
+      end;
+
+
+    procedure tcg.g_div_const_reg_reg(list:tasmlist; size: TCgSize; a: tcgint; src,dst: tregister);
+      var
+        divreg: tregister;
+        magic: aInt;
+        u_magic: aWord;
+        u_shift: byte;
+        u_add: boolean;
+      begin
+        divreg:=getintregister(list,OS_INT);
+        if (size in [OS_S32,OS_S64]) then
+          begin
+            calc_divconst_magic_signed(tcgsize2size[size]*8,a,magic,u_shift);
+            { load magic value }
+            a_load_const_reg(list,OS_INT,magic,divreg);
+            { multiply, discarding low bits }
+            a_mul_reg_reg_pair(list,size,src,divreg,NR_NO,dst);
+            { add/subtract numerator }
+            if (a>0) and (magic<0) then
+              a_op_reg_reg_reg(list,OP_ADD,OS_INT,src,dst,dst)
+            else if (a<0) and (magic>0) then
+              a_op_reg_reg_reg(list,OP_SUB,OS_INT,src,dst,dst);
+            { shift shift places to the right (arithmetic) }
+            a_op_const_reg_reg(list,OP_SAR,OS_INT,u_shift,dst,dst);
+            { extract and add sign bit }
+            if (a>=0) then
+              a_op_const_reg_reg(list,OP_SHR,OS_INT,tcgsize2size[size]*8-1,src,divreg)
+            else
+              a_op_const_reg_reg(list,OP_SHR,OS_INT,tcgsize2size[size]*8-1,dst,divreg);
+            a_op_reg_reg_reg(list,OP_ADD,OS_INT,dst,divreg,dst);
+          end
+        else if (size in [OS_32,OS_64]) then
+          begin
+            calc_divconst_magic_unsigned(tcgsize2size[size]*8,a,u_magic,u_add,u_shift);
+            { load magic in divreg }
+            a_load_const_reg(list,OS_INT,tcgint(u_magic),divreg);
+            { multiply, discarding low bits }
+            a_mul_reg_reg_pair(list,size,src,divreg,NR_NO,dst);
+            if (u_add) then
+              begin
+                { Calculate "(numerator+result) shr u_shift", avoiding possible overflow }
+                a_op_reg_reg_reg(list,OP_SUB,OS_INT,dst,src,divreg);
+                { divreg=(numerator-result) }
+                a_op_const_reg_reg(list,OP_SHR,OS_INT,1,divreg,divreg);
+                { divreg=(numerator-result)/2 }
+                a_op_reg_reg_reg(list,OP_ADD,OS_INT,divreg,dst,divreg);
+                { divreg=(numerator+result)/2, already shifted by 1, so decrease u_shift. }
+                a_op_const_reg_reg(list,OP_SHR,OS_INT,u_shift-1,divreg,dst);
+              end
+            else
+              a_op_const_reg_reg(list,OP_SHR,OS_INT,u_shift,dst,dst);
+          end
+        else
+          InternalError(2014060601);
+      end;
+
+
 {*****************************************************************************
                                    TCG64
 *****************************************************************************}
--- a/compiler/mips/cgcpu.pas
+++ b/compiler/mips/cgcpu.pas
@ -76,6 +76,7 @@ type
    procedure g_flags2reg(list: tasmlist; size: TCgSize; const f: TResFlags; reg: tregister); override;
    procedure a_jmp_always(List: tasmlist; l: TAsmLabel); override;
    procedure a_jmp_name(list: tasmlist; const s: string); override;
+    procedure a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister); override;
    procedure g_overflowCheck(List: tasmlist; const Loc: TLocation; def: TDef); override;
    procedure g_overflowCheck_loc(List: tasmlist; const Loc: TLocation; def: TDef; ovloc: tlocation); override;
    procedure g_proc_entry(list: tasmlist; localsize: longint; nostackframe: boolean); override;
@ -1154,6 +1155,24 @@ procedure TCGMIPS.g_flags2reg(list: tasmlist; size: tcgsize; const f: tresflags;
  end;


+procedure TCGMIPS.a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister);
+var
+  asmop: tasmop;
+begin
+  case size of
+    OS_32:  asmop:=A_MULTU;
+    OS_S32: asmop:=A_MULT;
+  else
+    InternalError(2014060802);
+  end;
+  list.concat(taicpu.op_reg_reg(asmop,src1,src2));
+  if (dstlo<>NR_NO) then
+    list.concat(taicpu.op_reg(A_MFLO,dstlo));
+  if (dsthi<>NR_NO) then
+    list.concat(taicpu.op_reg(A_MFHI,dsthi));
+end;
+
+
 procedure TCGMIPS.g_overflowCheck(List: tasmlist; const Loc: TLocation; def: TDef);
 begin
 // this is an empty procedure
--- a/compiler/mips/ncpumat.pas
+++ b/compiler/mips/ncpumat.pas
@ -86,16 +86,21 @@ begin
  numerator := left.location.Register;

  if (nodetype = divn) and
-    (right.nodetype = ordconstn) and
-    ispowerof2(tordconstnode(right).Value.svalue, power) then
+    (right.nodetype = ordconstn) then
  begin
-    tmpreg := cg.GetIntRegister(current_asmdata.CurrAsmList, OS_INT);
-    cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SAR, OS_INT, 31, numerator, tmpreg);
-    { if signed, tmpreg=right value-1, otherwise 0 }
-    cg.a_op_const_reg(current_asmdata.CurrAsmList, OP_AND, OS_INT, tordconstnode(right).Value.svalue - 1, tmpreg);
-    { add left value }
-    cg.a_op_reg_reg(current_asmdata.CurrAsmList, OP_ADD, OS_INT, numerator, tmpreg);
-    cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SAR, OS_INT, aword(power), tmpreg, location.register);
+    if ispowerof2(tordconstnode(right).Value.svalue, power) then
+    begin
+      tmpreg := cg.GetIntRegister(current_asmdata.CurrAsmList, OS_INT);
+      cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SAR, OS_INT, 31, numerator, tmpreg);
+      { if signed, tmpreg=right value-1, otherwise 0 }
+      cg.a_op_const_reg(current_asmdata.CurrAsmList, OP_AND, OS_INT, tordconstnode(right).Value.svalue - 1, tmpreg);
+      { add left value }
+      cg.a_op_reg_reg(current_asmdata.CurrAsmList, OP_ADD, OS_INT, numerator, tmpreg);
+      cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SAR, OS_INT, aword(power), tmpreg, location.register);
+    end
+    else
+      cg.g_div_const_reg_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),
+        tordconstnode(right).value.svalue,numerator,location.register);
  end
  else
  begin