{
    Copyright (c) 1998-2002 by Florian Klaempfl

    Generates ARM inline nodes

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

 ****************************************************************************
}
unit narminl;

{$i fpcdefs.inc}

interface

    uses
      node,ninl,ncginl;

    type
      tarminlinenode = class(tcgInlineNode)
        function first_abs_real: tnode; override;
        function first_sqr_real: tnode; override;
        function first_sqrt_real: tnode; override;
        function first_fma : tnode; override;
        { atn,sin,cos,lgn isn't supported by the linux fpe
        function first_arctan_real: tnode; override;
        function first_ln_real: tnode; override;
        function first_cos_real: tnode; override;
        function first_sin_real: tnode; override;
        }
        procedure second_abs_real; override;
        procedure second_sqr_real; override;
        procedure second_sqrt_real; override;
        { atn,sin,cos,lgn isn't supported by the linux fpe
        procedure second_arctan_real; override;
        procedure second_ln_real; override;
        procedure second_cos_real; override;
        procedure second_sin_real; override;
        }
        procedure second_prefetch; override;
        procedure second_abs_long; override;
        procedure second_fma; override;
      private
        procedure load_fpu_location(out singleprec: boolean);
      end;


implementation

    uses
      globtype,verbose,globals,
      procinfo,
      cpuinfo, defutil,symdef,aasmdata,aasmcpu,
      cgbase,cgutils,pass_1,pass_2,
      cpubase,ncgutil,cgobj,cgcpu, hlcgobj,
      nutils,ncal;

{*****************************************************************************
                              tarminlinenode
*****************************************************************************}

    procedure tarminlinenode.load_fpu_location(out singleprec: boolean);
      begin
        secondpass(left);
        case current_settings.fputype of
          fpu_fpa,
          fpu_fpa10,
          fpu_fpa11:
            begin
              hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
              location_copy(location,left.location);
              if left.location.loc=LOC_CFPUREGISTER then
                begin
                 location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
                 location.loc := LOC_FPUREGISTER;
               end;
            end;
          fpu_soft:
            begin
              hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
              location_copy(location,left.location);
            end
          else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
            begin
              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
              location_copy(location,left.location);
              if left.location.loc=LOC_CMMREGISTER then
                begin
                 location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
                 location.loc := LOC_MMREGISTER;
               end;
            end
          else
            internalerror(2009111801);
        end;
        singleprec:=tfloatdef(left.resultdef).floattype=s32real;
      end;


    function tarminlinenode.first_abs_real : tnode;
      begin
        if (cs_fp_emulation in current_settings.moduleswitches) then
          begin
            firstpass(left);
            expectloc:=LOC_REGISTER;
            first_abs_real:=nil;
          end
        else
          begin
            case current_settings.fputype of
              fpu_fpa,
              fpu_fpa10,
              fpu_fpa11:
                expectloc:=LOC_FPUREGISTER;
              else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
                expectloc:=LOC_MMREGISTER
              else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
                begin
                  if tfloatdef(left.resultdef).floattype=s32real then
                    expectloc:=LOC_MMREGISTER
                  else
                    exit(inherited first_abs_real);
                end
              else
                internalerror(2009112401);
            end;
            if ([FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE]*fpu_capabilities[current_settings.fputype]<>[]) and
              needs_check_for_fpu_exceptions then
              Include(current_procinfo.flags,pi_do_call);
            first_abs_real:=nil;
          end;
      end;


    function tarminlinenode.first_sqr_real : tnode;
      begin
        if (cs_fp_emulation in current_settings.moduleswitches) then
          result:=inherited first_sqr_real
        else
          begin
            case current_settings.fputype of
              fpu_fpa,
              fpu_fpa10,
              fpu_fpa11:
                expectloc:=LOC_FPUREGISTER;
              else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
                expectloc:=LOC_MMREGISTER
              else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
                begin
                  if tfloatdef(left.resultdef).floattype=s32real then
                    expectloc:=LOC_MMREGISTER
                  else
                    exit(inherited first_sqr_real);
                end
              else
                internalerror(2009112402);
            end;
            if ([FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE]*fpu_capabilities[current_settings.fputype]<>[]) and
              needs_check_for_fpu_exceptions then
              Include(current_procinfo.flags,pi_do_call);
            first_sqr_real:=nil;
          end;
      end;


    function tarminlinenode.first_sqrt_real : tnode;
      begin
        if cs_fp_emulation in current_settings.moduleswitches then
          result:=inherited first_sqrt_real
        else
          begin
            case current_settings.fputype of
              fpu_fpa,
              fpu_fpa10,
              fpu_fpa11:
                expectloc:=LOC_FPUREGISTER;
              else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
                expectloc:=LOC_MMREGISTER
              else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
                begin
                  if tfloatdef(left.resultdef).floattype=s32real then
                    expectloc:=LOC_MMREGISTER
                  else
                    exit(inherited first_sqrt_real);
                end
              else
                internalerror(2009112403);
            end;
            if ([FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE]*fpu_capabilities[current_settings.fputype]<>[]) and
              needs_check_for_fpu_exceptions then
              Include(current_procinfo.flags,pi_do_call);
            first_sqrt_real := nil;
          end;
      end;


     function tarminlinenode.first_fma : tnode;
       begin
         if ((is_double(resultdef)) or (is_single(resultdef))) then
           begin
             expectloc:=LOC_MMREGISTER;
             Result:=nil;
             if ([FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE]*fpu_capabilities[current_settings.fputype]<>[]) and
               needs_check_for_fpu_exceptions then
               Include(current_procinfo.flags,pi_do_call);
           end
         else
           Result:=inherited first_fma;
       end;


    { atn,sin,cos,lgn isn't supported by the linux fpe
    function tarminlinenode.first_arctan_real: tnode;
      begin
        expectloc:=LOC_FPUREGISTER;
        result:=nil;
      end;


    function tarminlinenode.first_ln_real: tnode;
      begin
        expectloc:=LOC_FPUREGISTER;
        result:=nil;
      end;

    function tarminlinenode.first_cos_real: tnode;
      begin
        expectloc:=LOC_FPUREGISTER;
        result:=nil;
      end;


    function tarminlinenode.first_sin_real: tnode;
      begin
        expectloc:=LOC_FPUREGISTER;
        result:=nil;
      end;
    }


    procedure tarminlinenode.second_abs_real;
      var
        singleprec: boolean;
        pf: TOpPostfix;
      begin
        load_fpu_location(singleprec);
        case current_settings.fputype of
          fpu_fpa,
          fpu_fpa10,
          fpu_fpa11:
            current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_ABS,location.register,left.location.register),get_fpu_postfix(resultdef)));
          fpu_soft:
            begin
              if singleprec then
                cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,OS_32,tcgint($7fffffff),location.register)
              else
                cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,OS_32,tcgint($7fffffff),location.registerhi);
            end
          else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
            begin
              if singleprec then
                pf:=PF_F32
              else
                pf:=PF_F64;
              current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register),pf));
              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
            end
          else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
            begin
              current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register), PF_F32));
              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
            end
          else
            internalerror(2009111402);
        end;
      end;


    procedure tarminlinenode.second_sqr_real;
      var
        singleprec: boolean;
        pf: TOpPostfix;
      begin
        load_fpu_location(singleprec);
        case current_settings.fputype of
          fpu_fpa,
          fpu_fpa10,
          fpu_fpa11:
            current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(A_MUF,location.register,left.location.register,left.location.register),get_fpu_postfix(resultdef)));
          else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
            begin
              if singleprec then
                pf:=PF_F32
              else
                pf:=PF_F64;
              current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register),pf));
              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
            end
          else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
            begin
              current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register), PF_F32));
              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
            end
          else
            internalerror(2009111403);
        end;
      end;


    procedure tarminlinenode.second_sqrt_real;
      var
        singleprec: boolean;
        pf: TOpPostfix;
      begin
        load_fpu_location(singleprec);
        case current_settings.fputype of
          fpu_fpa,
          fpu_fpa10,
          fpu_fpa11:
            current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_SQT,location.register,left.location.register),get_fpu_postfix(resultdef)));
          else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
            begin
              if singleprec then
                pf:=PF_F32
              else
                pf:=PF_F64;
              current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VSQRT,location.register,left.location.register),pf));
              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
            end
          else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
            begin
              current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VSQRT,location.register,left.location.register), PF_F32));
              cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
            end
          else
            internalerror(2009111405);
        end;
      end;


    { atn, sin, cos, lgn isn't supported by the linux fpe
    procedure tarminlinenode.second_arctan_real;
      begin
        load_fpu_location;
        current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_ATN,location.register,left.location.register),get_fpu_postfix(resultdef)));
      end;


    procedure tarminlinenode.second_ln_real;
      begin
        load_fpu_location;
        current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_LGN,location.register,left.location.register),get_fpu_postfix(resultdef)));
      end;

    procedure tarminlinenode.second_cos_real;
      begin
        load_fpu_location;
        current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_COS,location.register,left.location.register),get_fpu_postfix(resultdef)));
      end;


    procedure tarminlinenode.second_sin_real;
      begin
        load_fpu_location;
        current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_SIN,location.register,left.location.register),get_fpu_postfix(resultdef)));
      end;
    }

    procedure tarminlinenode.second_prefetch;
      var
        ref : treference;
        r : tregister;
        checkpointer_used : boolean;
      begin
        if not(GenerateThumbCode) and (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) then
          begin
             { do not call Checkpointer for left node }
             checkpointer_used:=(cs_checkpointer in current_settings.localswitches);
             if checkpointer_used then
               node_change_local_switch(left,cs_checkpointer,false);
             secondpass(left);
             if checkpointer_used then
               node_change_local_switch(left,cs_checkpointer,false);
            case left.location.loc of
              LOC_CREFERENCE,
              LOC_REFERENCE:
                begin
                  r:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
                  cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,r);
                  reference_reset_base(ref,r,0,location.reference.temppos,left.location.reference.alignment,location.reference.volatility);
                  { since the address might be nil we can't use ldr for older cpus }
                  current_asmdata.CurrAsmList.concat(taicpu.op_ref(A_PLD,ref));
                end;
              else
                { nothing to prefetch };
            end;
          end;
      end;

    procedure tarminlinenode.second_abs_long;
      var
        opsize : tcgsize;
      begin
        if GenerateThumbCode then
          begin
            inherited second_abs_long;
            exit;
          end;

        secondpass(left);
        opsize:=def_cgsize(left.resultdef);
        hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
        location:=left.location;
        location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);

        cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
        current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_MOV,location.register,left.location.register), PF_S));

        if GenerateThumb2Code then
          current_asmdata.CurrAsmList.concat(taicpu.op_cond(A_IT,C_MI));

        current_asmdata.CurrAsmList.concat(setcondition(taicpu.op_reg_reg_const(A_RSB,location.register,location.register, 0), C_MI));

        cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
      end;


    procedure tarminlinenode.second_fma;
      const
        op : array[false..true,false..true] of TAsmOp =
          { positive product }
          (
           { positive third operand }
           (A_VFMA,
           { negative third operand }
            A_VFNMS),
           { negative product }
            { positive third operand }
            (A_VFMS,
             A_VFNMA)
           );

      var
        paraarray : array[1..3] of tnode;
        i : integer;
        negop3,
        negproduct : boolean;
        oppostfix : TOpPostfix;
      begin
         if FPUARM_HAS_FMA in fpu_capabilities[current_settings.fputype] then
           begin
             negop3:=false;
             negproduct:=false;
             paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
             paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
             paraarray[3]:=tcallparanode(parameters).paravalue;

             { check if a neg. node can be removed
               this is possible because changing the sign of
               a floating point number does not affect its absolute
               value in any way
             }
             if paraarray[1].nodetype=unaryminusn then
               begin
                 paraarray[1]:=tunarynode(paraarray[1]).left;
                 { do not release the unused unary minus node, it is kept and release together with the other nodes,
                   only no code is generated for it }
                 negproduct:=not(negproduct);
               end;

             if paraarray[2].nodetype=unaryminusn then
               begin
                 paraarray[2]:=tunarynode(paraarray[2]).left;
                 { do not release the unused unary minus node, it is kept and release together with the other nodes,
                   only no code is generated for it }
                 negproduct:=not(negproduct);
               end;

             if paraarray[3].nodetype=unaryminusn then
               begin
                 paraarray[3]:=tunarynode(paraarray[3]).left;
                 { do not release the unused unary minus node, it is kept and release together with the other nodes,
                   only no code is generated for it }
                 negop3:=true;
               end;

              for i:=1 to 3 do
               secondpass(paraarray[i]);

             { no memory operand is allowed }
             for i:=1 to 3 do
               begin
                 if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
                   hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
               end;

             location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
             location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);

             hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
               paraarray[3].location.register,location.register,mms_movescalar);
             if is_double(resultdef) then
               oppostfix:=PF_F64
             else
               oppostfix:=PF_F32;
             current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(op[negproduct,negop3],
               location.register,paraarray[1].location.register,paraarray[2].location.register),oppostfix));
             cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
           end
         else
           internalerror(2014032301);
      end;


begin
  cinlinenode:=tarminlinenode;
end.