+ fma support for arm

git-svn-id: trunk@33181 -
2025-04-13 01:29:28 +02:00 · 2016-03-06 13:33:14 +00:00 · 2016-03-06 13:33:14 +00:00 · 43746685a5
commit 43746685a5
parent 096e1c45d6
1 changed files with 105 additions and 1 deletions
--- a/compiler/arm/narminl.pas
+++ b/compiler/arm/narminl.pas
@ -33,6 +33,7 @@ interface
        function first_abs_real: tnode; override;
        function first_sqr_real: tnode; override;
        function first_sqrt_real: tnode; override;
+        function first_fma : tnode; override;
        { atn,sin,cos,lgn isn't supported by the linux fpe
        function first_arctan_real: tnode; override;
        function first_ln_real: tnode; override;
@ -50,6 +51,7 @@ interface
        }
        procedure second_prefetch; override;
        procedure second_abs_long; override;
+        procedure second_fma; override;
      private
        procedure load_fpu_location(out singleprec: boolean);
      end;
@ -61,7 +63,8 @@ implementation
      globtype,verbose,globals,
      cpuinfo, defutil,symdef,aasmdata,aasmcpu,
      cgbase,cgutils,pass_1,pass_2,
-      cpubase,ncgutil,cgobj,cgcpu, hlcgobj;
+      cpubase,ncgutil,cgobj,cgcpu, hlcgobj,
+      ncal;

 {*****************************************************************************
                              tarminlinenode
@ -202,6 +205,19 @@ implementation
      end;


+     function tarminlinenode.first_fma : tnode;
+       begin
+         if (true) and
+           ((is_double(resultdef)) or (is_single(resultdef))) then
+           begin
+             expectloc:=LOC_MMREGISTER;
+             Result:=nil;
+           end
+         else
+           Result:=inherited first_fma;
+       end;
+
+
    { atn,sin,cos,lgn isn't supported by the linux fpe
    function tarminlinenode.first_arctan_real: tnode;
      begin
@ -404,6 +420,94 @@ implementation
        cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
      end;

+
+    procedure tarminlinenode.second_fma;
+      const
+        op : array[false..true,false..true] of TAsmOp =
+          { positive product }
+          (
+           { positive third operand }
+           (A_VFMA,
+           { negative third operand }
+            A_VFNMS),
+           { negative product }
+            { positive third operand }
+            (A_VFMS,
+             A_VFNMA)
+           );
+
+      var
+        paraarray : array[1..3] of tnode;
+        i : integer;
+        negop3,
+        negproduct : boolean;
+        hp : tnode;
+        oppostfix : TOpPostfix;
+      begin
+         if current_settings.fputype in [fpu_vfpv4] then
+           begin
+             negop3:=false;
+             negproduct:=false;
+             paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
+             paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
+             paraarray[3]:=tcallparanode(parameters).paravalue;
+
+             { check if a neg. node can be removed
+               this is possible because changing the sign of
+               a floating point number does not affect its absolute
+               value in any way
+             }
+             if paraarray[1].nodetype=unaryminusn then
+               begin
+                 paraarray[1]:=tunarynode(paraarray[1]).left;
+                 { do not release the unused unary minus node, it is kept and release together with the other nodes,
+                   only no code is generated for it }
+                 negproduct:=not(negproduct);
+               end;
+
+             if paraarray[2].nodetype=unaryminusn then
+               begin
+                 paraarray[2]:=tunarynode(paraarray[2]).left;
+                 { do not release the unused unary minus node, it is kept and release together with the other nodes,
+                   only no code is generated for it }
+                 negproduct:=not(negproduct);
+               end;
+
+             if paraarray[3].nodetype=unaryminusn then
+               begin
+                 paraarray[3]:=tunarynode(paraarray[3]).left;
+                 { do not release the unused unary minus node, it is kept and release together with the other nodes,
+                   only no code is generated for it }
+                 negop3:=true;
+               end;
+
+              for i:=1 to 3 do
+               secondpass(paraarray[i]);
+
+             { no memory operand is allowed }
+             for i:=1 to 3 do
+               begin
+                 if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
+                   hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
+               end;
+
+             location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
+             location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+
+             hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
+               paraarray[3].location.register,location.register,mms_movescalar);
+             if is_double(resultdef) then
+               oppostfix:=PF_F64
+             else
+               oppostfix:=PF_F32;
+             current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(op[negproduct,negop3],
+               location.register,paraarray[1].location.register,paraarray[2].location.register),oppostfix));
+           end
+         else
+           internalerror(2014032301);
+      end;
+
+
 begin
  cinlinenode:=tarminlinenode;
 end.