+ experimental -Sv option to support vector arithmetics

git-svn-id: trunk@4825 -
2025-04-08 11:48:04 +02:00 · 2006-10-07 21:39:48 +00:00 · 2006-10-07 21:39:48 +00:00 · fb5e396881
commit fb5e396881
parent 537c10517c
14 changed files with 151 additions and 13 deletions
--- a/compiler/cgbase.pas
+++ b/compiler/cgbase.pas
@ -157,7 +157,8 @@ interface
        R_SUBFD,   { = 7; Float that allocates 2 FPU registers }
        R_SUBFQ,   { = 8; Float that allocates 4 FPU registers }
        R_SUBMMS,  { = 9; single scalar in multi media register }
-        R_SUBMMD   { = 10; double scalar in multi media register }
+        R_SUBMMD,  { = 10; double scalar in multi media register }
+        R_SUBMMWHOLE  { = 11; complete MM register, size depends on CPU }
      );

      TSuperRegister = type word;
@ -563,6 +564,8 @@ implementation
            result:=result+'md';
          R_SUBMMS:
            result:=result+'ms';
+          R_SUBMMWHOLE:
+            result:=result+'ma';
          else
            internalerror(200308252);
        end;
--- a/compiler/defutil.pas
+++ b/compiler/defutil.pas
@ -208,12 +208,18 @@ interface
    }
    procedure getrange(def : tdef;var l : TConstExprInt;var h : TConstExprInt);

+    { type being a vector? }
+    function is_vector(p : tdef) : boolean;
+
    { some type helper routines for MMX support }
    function is_mmx_able_array(p : tdef) : boolean;

    {# returns the mmx type }
    function mmx_type(p : tdef) : tmmxtype;

+    { returns if the passed type (array) fits into an mm register }
+    function fits_in_mm_register(p : tdef) : boolean;
+
    {# From a definition return the abstract code generator size enum. It is
       to note that the value returned can be @var(OS_NO) }
    function def_cgsize(def: tdef): tcgsize;
@ -808,6 +814,42 @@ implementation
      end;


+    function is_vector(p : tdef) : boolean;
+      begin
+        result:=(p.deftype=arraydef) and
+                not(is_special_array(p)) and
+                (tarraydef(p).elementtype.def.deftype=floatdef) and (tfloatdef(tarraydef(p).elementtype.def).typ in [s32real,s64real]);
+      end;
+
+
+    { returns if the passed type (array) fits into an mm register }
+    function fits_in_mm_register(p : tdef) : boolean;
+      begin
+{$ifdef x86}
+        result:= is_vector(p) and
+                 (
+                  (tarraydef(p).elementtype.def.deftype=floatdef) and
+                  (
+                   (tarraydef(p).lowrange=0) and
+                   (tarraydef(p).highrange=3) and
+                   (tfloatdef(tarraydef(p).elementtype.def).typ=s32real)
+                  )
+                 ) or
+
+                 (
+                  (tarraydef(p).elementtype.def.deftype=floatdef) and
+                  (
+                   (tarraydef(p).lowrange=0) and
+                   (tarraydef(p).highrange=1) and
+                   (tfloatdef(tarraydef(p).elementtype.def).typ=s64real)
+                  )
+                 );
+{$else x86}
+        result:=false;
+{$endif x86}
+      end;
+
+
    function is_mmx_able_array(p : tdef) : boolean;
      begin
 {$ifdef SUPPORT_MMX}
--- a/compiler/globtype.pas
+++ b/compiler/globtype.pas
@ -127,6 +127,7 @@ than 255 characters. That's why using Ansi Strings}
         cs_load_objpas_unit,
         cs_load_gpc_unit,
         cs_load_fpcylix_unit,
+         cs_support_vectors,
         { browser }
         cs_browser_log,
         { debuginfo }
--- a/compiler/htypechk.pas
+++ b/compiler/htypechk.pas
@ -229,9 +229,11 @@ implementation
              end;
            arraydef :
              begin
-                { not mmx }
-                if (cs_mmx in aktlocalswitches) and
-                   is_mmx_able_array(ld) then
+                { not vector/mmx }
+                if ((cs_mmx in aktlocalswitches) and
+                   is_mmx_able_array(ld)) or
+                   ((cs_support_vectors in aktglobalswitches) and
+                   is_vector(ld)) then
                 begin
                   allowed:=false;
                   exit;
--- a/compiler/i386/cpubase.inc
+++ b/compiler/i386/cpubase.inc
@ -100,7 +100,7 @@
      {# the maximum float size for a processor,           }
      OS_FLOAT = OS_F80;
      {# the size of a vector register for a processor     }
-      OS_VECTOR = OS_M64;
+      OS_VECTOR = OS_M128;

 {*****************************************************************************
                          Generic Register names
--- a/compiler/nadd.pas
+++ b/compiler/nadd.pas
@ -760,7 +760,7 @@ implementation
        lt:=left.nodetype;

         { but an int/int gives real/real! }
-         if nodetype=slashn then
+         if (nodetype=slashn) and not(is_vector(left.resulttype.def)) and not(is_vector(right.resulttype.def)) then
          begin
            if is_currency(left.resulttype.def) and
               is_currency(right.resulttype.def) then
@ -1402,6 +1402,18 @@ implementation
              end;
            end
 {$endif SUPPORT_MMX}
+         { vector support, this must be before the zero based array
+           check }
+         else if (cs_support_vectors in aktglobalswitches) and
+                 is_vector(ld) and
+                 is_vector(rd) and
+                 equal_defs(ld,rd) then
+            begin
+              if not(nodetype in [addn,subn,xorn,orn,andn,muln,slashn]) then
+                CGMessage3(type_e_operator_not_supported_for_types,node2opstr(nodetype),ld.typename,rd.typename);
+              { both defs must be equal, so taking left or right as resulttype doesn't matter }
+              resulttype:=left.resulttype;
+            end

         { this is a little bit dangerous, also the left type }
         { pointer to should be checked! This broke the mmx support      }
--- a/compiler/ncgadd.pas
+++ b/compiler/ncgadd.pas
@ -56,6 +56,7 @@ interface
          procedure second_opmmx;virtual;abstract;
 {$endif SUPPORT_MMX}
 {$endif x86}
+          procedure second_opvector;virtual;abstract;
          procedure second_add64bit;virtual;
          procedure second_addordinal;virtual;
          procedure second_cmpfloat;virtual;abstract;
@ -777,6 +778,10 @@ interface
              { support dynarr=nil }
              if is_dynamic_array(left.resulttype.def) then
                second_opordinal
+              else
+                if (cs_support_vectors in aktglobalswitches) and
+                   is_vector(left.resulttype.def) then
+                  second_opvector
 {$ifdef SUPPORT_MMX}
              else
                if is_mmx_able_array(left.resulttype.def) then
--- a/compiler/ncgutil.pas
+++ b/compiler/ncgutil.pas
@ -64,6 +64,7 @@ interface
    procedure location_force_fpureg(list:TAsmList;var l: tlocation;maybeconst:boolean);
    procedure location_force_mem(list:TAsmList;var l:tlocation);
    procedure location_force_mmregscalar(list:TAsmList;var l: tlocation;maybeconst:boolean);
+    procedure location_force_mmreg(list:TAsmList;var l: tlocation;maybeconst:boolean);

    { Retrieve the location of the data pointed to in location l, when the location is
      a register it is expected to contain the address of the data }
@ -727,6 +728,23 @@ implementation
      end;


+    procedure location_force_mmreg(list:TAsmList;var l: tlocation;maybeconst:boolean);
+      var
+        reg : tregister;
+        href : treference;
+      begin
+        if (l.loc<>LOC_MMREGISTER)  and
+           ((l.loc<>LOC_CMMREGISTER) or (not maybeconst)) then
+          begin
+            reg:=cg.getmmregister(list,OS_VECTOR);
+            cg.a_loadmm_loc_reg(list,OS_VECTOR,l,reg,nil);
+            location_freetemp(list,l);
+            location_reset(l,LOC_MMREGISTER,OS_VECTOR);
+            l.register:=reg;
+          end;
+      end;
+
+
    procedure location_force_mem(list:TAsmList;var l:tlocation);
      var
        r : treference;
--- a/compiler/options.pas
+++ b/compiler/options.pas
@ -1052,6 +1052,8 @@ begin
                         include(initglobalswitches,cs_constructor_name);
                       't' :
                         include(initmoduleswitches,cs_static_keyword);
+                       'v' :
+                         include(initglobalswitches,cs_support_vectors);
                       '-' :
                         begin
                           exclude(initglobalswitches,cs_constructor_name);
--- a/compiler/x86/aasmcpu.pas
+++ b/compiler/x86/aasmcpu.pas
@ -2435,6 +2435,8 @@ implementation
                result:=taicpu.op_ref_reg(A_MOVSD,reg2opsize(r),ref,r);
              R_SUBMMS:
                result:=taicpu.op_ref_reg(A_MOVSS,reg2opsize(r),ref,r);
+              R_SUBMMWHOLE:
+                result:=taicpu.op_ref_reg(A_MOVQ,S_NO,ref,r);
              else
                internalerror(200506043);
            end;
@ -2455,6 +2457,8 @@ implementation
                result:=taicpu.op_reg_ref(A_MOVSD,reg2opsize(r),r,ref);
              R_SUBMMS:
                result:=taicpu.op_reg_ref(A_MOVSS,reg2opsize(r),r,ref);
+              R_SUBMMWHOLE:
+                result:=taicpu.op_reg_ref(A_MOVQ,S_NO,r,ref);
              else
                internalerror(200506042);
            end;
--- a/compiler/x86/cgx86.pas
+++ b/compiler/x86/cgx86.pas
@ -195,6 +195,8 @@ unit cgx86;
            result:=rg[R_MMREGISTER].getregister(list,R_SUBMMD);
          OS_F32:
            result:=rg[R_MMREGISTER].getregister(list,R_SUBMMS);
+          OS_M128:
+            result:=rg[R_MMREGISTER].getregister(list,R_SUBMMWHOLE);
          else
            internalerror(200506041);
        end;
@ -1003,10 +1005,10 @@ unit cgx86;
              these
            }
            ( { OS_F32 }
-              A_NOP,A_NOP,A_ADDPS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_XORPS
+              A_NOP,A_NOP,A_ADDPS,A_NOP,A_DIVPS,A_NOP,A_NOP,A_MULPS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBPS,A_XORPS
            ),
            ( { OS_F64 }
-              A_NOP,A_NOP,A_ADDPD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_XORPD
+              A_NOP,A_NOP,A_ADDPD,A_NOP,A_DIVPD,A_NOP,A_NOP,A_MULPD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_SUBPD,A_XORPD
            )
          )
        );
@ -1041,7 +1043,7 @@ unit cgx86;
        else
          internalerror(200312211);
        if asmop=A_NOP then
-          internalerror(200312215);
+          internalerror(200312216);
        case loc.loc of
          LOC_CREFERENCE,LOC_REFERENCE:
            begin
--- a/compiler/x86/cpubase.pas
+++ b/compiler/x86/cpubase.pas
@ -319,7 +319,7 @@ implementation

    function reg_cgsize(const reg: tregister): tcgsize;
      const subreg2cgsize:array[Tsubregister] of Tcgsize =
-            (OS_NO,OS_8,OS_8,OS_16,OS_32,OS_64,OS_NO,OS_NO,OS_NO,OS_F32,OS_F64);
+            (OS_NO,OS_8,OS_8,OS_16,OS_32,OS_64,OS_NO,OS_NO,OS_NO,OS_F32,OS_F64,OS_M128);
      begin
        case getregtype(reg) of
          R_INTREGISTER :
@ -346,7 +346,7 @@ implementation
    function reg2opsize(r:Tregister):topsize;
      const
        subreg2opsize : array[tsubregister] of topsize =
-          (S_NO,S_B,S_B,S_W,S_L,S_Q,S_NO,S_NO,S_NO,S_NO,S_NO);
+          (S_NO,S_B,S_B,S_W,S_L,S_Q,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO);
      begin
        reg2opsize:=S_L;
        case getregtype(r) of
@ -429,7 +429,7 @@ implementation
        { for the name the sub reg doesn't matter }
        hr:=r;
        case getsubreg(hr) of
-          R_SUBMMS,R_SUBMMD:
+          R_SUBMMS,R_SUBMMD,R_SUBMMWHOLE:
            setsubreg(hr,R_SUBNONE);
        end;
        result:=findreg_by_number_table(hr,regnumber_index);
--- a/compiler/x86/nx86add.pas
+++ b/compiler/x86/nx86add.pas
@ -55,6 +55,7 @@ unit nx86add;
        procedure second_opmmxset;override;
        procedure second_opmmx;override;
 {$endif SUPPORT_MMX}
+        procedure second_opvector;override;
      end;


@ -810,6 +811,52 @@ unit nx86add;
      end;


+    procedure tx86addnode.second_opvector;
+      var
+        op : topcg;
+      begin
+        pass_left_right;
+        if (nf_swaped in flags) then
+          swapleftright;
+
+        case nodetype of
+          addn :
+            op:=OP_ADD;
+          muln :
+            op:=OP_MUL;
+          subn :
+            op:=OP_SUB;
+          slashn :
+            op:=OP_DIV;
+          else
+            internalerror(200610071);
+        end;
+
+        if fits_in_mm_register(left.resulttype.def) then
+          begin
+            location_reset(location,LOC_MMREGISTER,def_cgsize(resulttype.def));
+            { we can use only right as left operand if the operation is commutative }
+            if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
+              begin
+                location.register:=right.location.register;
+                cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resulttype.def).typ],left.location,location.register,nil);
+              end
+            else
+              begin
+                location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
+                location.register:=left.location.register;
+                cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
+                  tfloat2tcgsize[tfloatdef(tarraydef(left.resulttype.def).elementtype.def).typ],right.location,location.register,nil);
+              end;
+          end
+        else
+          begin
+            { not yet supported }
+            internalerror(200610072);
+          end
+      end;
+
+
    procedure tx86addnode.second_addfloat;
      var
        op : TAsmOp;
--- a/compiler/x86_64/cpubase.inc
+++ b/compiler/x86_64/cpubase.inc
@ -82,7 +82,7 @@ const
      { the maximum float size for a processor,           }
      OS_FLOAT = OS_F80;
      { the size of a vector register for a processor     }
-      OS_VECTOR = OS_M64;
+      OS_VECTOR = OS_M128;

 {*****************************************************************************
                          Generic Register names