* use constrained LLVM fp intrinsics for add/sub/mul/slash/fma/sqrt

operations when fastmath is not enabled git-svn-id: trunk@43819 -
2025-09-01 13:11:06 +02:00 · 2019-12-30 15:05:09 +00:00 · 2019-12-30 15:05:09 +00:00 · 797077855e
commit 797077855e
parent b355ba3d39
3 changed files with 204 additions and 96 deletions
--- a/compiler/llvm/nllvmadd.pas
+++ b/compiler/llvm/nllvmadd.pas
@ -47,20 +47,75 @@ interface
 implementation
     uses
-       verbose,globtype,
+       verbose,globtype,globals,cutils,
       aasmdata,
       symconst,symtype,symdef,defutil,
       llvmbase,aasmllvm,
-       cgbase,cgutils,
+       cgbase,cgutils,pass_1,
       hlcgobj,
-       nadd
+       nadd,ncal,ncnv,ncon
       ;
 { tllvmaddnode }
  function tllvmaddnode.pass_1: tnode;
    var
      intrname: string;
      iscompcurrency: boolean;
    begin
      result:=inherited pass_1;
      if not assigned(result) and
         is_fpu(left.resultdef) and
         not(cs_opt_fastmath in current_settings.optimizerswitches) then
        begin
          case nodetype of
            addn:
              begin
                intrname:='LLVM_EXPERIMENTAL_CONSTRAINED_FADD';
              end;
            subn:
              begin
                intrname:='LLVM_EXPERIMENTAL_CONSTRAINED_FSUB';
              end;
            muln:
              begin
                intrname:='LLVM_EXPERIMENTAL_CONSTRAINED_FMUL';
              end;
            slashn:
              begin
                intrname:='LLVM_EXPERIMENTAL_CONSTRAINED_FDIV';
              end;
            else
              begin
                intrname:='';
              end;
          end;
          if intrname<>'' then
            begin
              iscompcurrency:=tfloatdef(left.resultdef).floattype in [s64currency,s64comp];
              if iscompcurrency then
                begin
                  inserttypeconv_internal(left,s80floattype);
                  inserttypeconv_internal(right,s80floattype);
                end;
              result:=ccallnode.createintern(intrname,
                ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('fpexcept.strict'),length('fpexcept.strict'),llvm_metadatatype),
                  ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('round.dynamic'),length('round.dynamic'),llvm_metadatatype),
                    ccallparanode.create(right,
                      ccallparanode.create(left,nil)
                    )
                  )
                )
              );
              if iscompcurrency then
                begin
                  result:=ctypeconvnode.create_internal(result,resultdef);
                end;
              left:=nil;
              right:=nil;
              exit;
            end;
        end;
      { there are no flags in LLVM }
      if expectloc=LOC_FLAGS then
        expectloc:=LOC_REGISTER;
@ -226,29 +281,26 @@ implementation
      op    : tllvmop;
      llvmfpcmp : tllvmfpcmp;
      size  : tdef;
      cmpop,
      singleprec : boolean;
    begin
      pass_left_right;
-      cmpop:=false;
+      { get the operands in the correct order; there are no special cases here,
-      singleprec:=tfloatdef(left.resultdef).floattype=s32real;
+        everything is register-based }
-      { avoid uninitialised warning }
+      if nf_swapped in flags then
-      llvmfpcmp:=lfc_invalid;
+        swapleftright;
-      case nodetype of
+
-        addn :
+      { put both operands in a register }
-          op:=la_fadd;
+      hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
-        muln :
+      hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
-          op:=la_fmul;
+
-        subn :
+      { see comment in thlcgllvm.a_loadfpu_ref_reg }
-          op:=la_fsub;
+      if tfloatdef(left.resultdef).floattype in [s64comp,s64currency] then
-        slashn :
+        size:=sc80floattype
-          op:=la_fdiv;
+      else
-        ltn,lten,gtn,gten,
+        size:=left.resultdef;
-        equaln,unequaln :
+
      if nodetype in [ltn,lten,gtn,gten,equaln,unequaln] then
        begin
            op:=la_fcmp;
            cmpop:=true;
          case nodetype of
            ltn:
              llvmfpcmp:=lfc_olt;
@ -265,51 +317,34 @@ implementation
            else
              internalerror(2015031506);
          end;
          end;
        else
          internalerror(2013102401);
      end;
      { get the operands in the correct order; there are no special cases here,
        everything is register-based }
      if nf_swapped in flags then
        swapleftright;
      { put both operands in a register }
      hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
      hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
      { initialize the result location }
      if not cmpop then
        begin
          location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
          location.register:=hlcg.getfpuregister(current_asmdata.CurrAsmList,resultdef);
        end
      else
        begin
          location_reset(location,LOC_REGISTER,OS_8);
          location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,llvmbool1type);
        end;
-      { see comment in thlcgllvm.a_loadfpu_ref_reg }
+          current_asmdata.CurrAsmList.concat(taillvm.op_reg_fpcond_size_reg_reg(la_fcmp ,
      if tfloatdef(left.resultdef).floattype in [s64comp,s64currency] then
        size:=sc80floattype
      else
        size:=left.resultdef;
      { emit the actual operation }
      if not cmpop then
        begin
          current_asmdata.CurrAsmList.concat(taillvm.op_reg_size_reg_reg(op,location.register,size,
            left.location.register,right.location.register))
        end
      else
        begin
          current_asmdata.CurrAsmList.concat(taillvm.op_reg_fpcond_size_reg_reg(op,
            location.register,llvmfpcmp,size,left.location.register,right.location.register));
          tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
          hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,llvmbool1type,resultdef,location.register,tmpreg);
          location.register:=tmpreg;
        end
      else
        begin
          case nodetype of
            addn :
              op:=la_fadd;
            muln :
              op:=la_fmul;
            subn :
              op:=la_fsub;
            slashn :
              op:=la_fdiv;
            else
              internalerror(2013102401);
          end;
          location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
          location.register:=hlcg.getfpuregister(current_asmdata.CurrAsmList,resultdef);
          current_asmdata.CurrAsmList.concat(taillvm.op_reg_size_reg_reg(op,location.register,size,
            left.location.register,right.location.register))
        end;
    end;
--- a/compiler/llvm/nllvminl.pas
+++ b/compiler/llvm/nllvminl.pas
@ -52,7 +52,7 @@ interface
 implementation
     uses
-       verbose,globals,globtype,constexp,
+       verbose,globals,globtype,constexp,cutils,
       aasmbase, aasmdata,
       symconst,symtype,symdef,defutil,
       compinnr,
@ -219,7 +219,9 @@ implementation
    function tllvminlinenode.first_fma: tnode;
      var
-        procname: string[15];
+        procname: string[40];
      begin
        if cs_opt_fastmath in current_settings.optimizerswitches then
          begin
            case inlinenumber of
              in_fma_single:
@ -234,6 +236,26 @@ implementation
                internalerror(2018122101);
            end;
            result:=ccallnode.createintern(procname,left);
          end
        else
          begin
            case inlinenumber of
              in_fma_single,
              in_fma_double,
              in_fma_extended,
              in_fma_float128:
                procname:='LLVM_EXPERIMENTAL_CONSTRAINED_FMA';
              else
                internalerror(2019122811);
            end;
            result:=ccallnode.createintern(procname,
              ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('fpexcept.strict'),length('fpexcept.strict'),llvm_metadatatype),
                ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('round.dynamic'),length('round.dynamic'),llvm_metadatatype),
                  left
                )
              )
            );
          end;
        left:=nil;
      end;
@ -250,10 +272,12 @@ implementation
    function tllvminlinenode.first_sqrt_real: tnode;
      var
-        intrinsic: string[20];
+        intrinsic: string[40];
      begin
        if left.resultdef.typ<>floatdef then
          internalerror(2018121601);
        if cs_opt_fastmath in current_settings.optimizerswitches then
          begin
            case tfloatdef(left.resultdef).floattype of
              s32real:
                intrinsic:='llvm_sqrt_f32';
@ -267,6 +291,26 @@ implementation
                internalerror(2018121602);
            end;
            result:=ccallnode.createintern(intrinsic, ccallparanode.create(left,nil));
          end
        else
          begin
            case tfloatdef(left.resultdef).floattype of
              s32real,
              s64real,
              s80real,sc80real,
              s128real:
                intrinsic:='LLVM_EXPERIMENTAL_CONSTRAINED_SQRT';
              else
                internalerror(2019122810);
            end;
            result:=ccallnode.createintern(intrinsic,
              ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('fpexcept.strict'),length('fpexcept.strict'),llvm_metadatatype),
                ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('round.dynamic'),length('round.dynamic'),llvm_metadatatype),
                  ccallparanode.create(left,nil)
                )
              )
            );
          end;
        left:=nil;
      end;
--- a/rtl/inc/llvmintr.inc
+++ b/rtl/inc/llvmintr.inc
@ -41,19 +41,48 @@ function llvm_ctpop(src: UInt32): UInt32; external name 'llvm.ctpop.i32';
 function llvm_ctpop(src: UInt64): UInt64; external name 'llvm.ctpop.i64';
 function llvm_sqrt_f32(val: single): single; compilerproc; external name 'llvm.sqrt.f32';
 function llvm_experimental_constrained_sqrt(val: single; rounding, exceptions: LLVMMetadata): single; external name 'llvm.experimental.constrained.sqrt.f32';
 function llvm_sqrt_f64(val: double): double; compilerproc; external name 'llvm.sqrt.f64';
 function llvm_experimental_constrained_sqrt(val: double; rounding, exceptions: LLVMMetadata): double; external name 'llvm.experimental.constrained.sqrt.f64';
 {$ifdef SUPPORT_EXTENDED}
-function llvm_sqrt_f80(val: extended): extended; compilerproc; external name 'llvm.sqrt.f80';
+function llvm_sqrt_f80(val: cextended): cextended; compilerproc; external name 'llvm.sqrt.x86_fp80';
 function llvm_experimental_constrained_sqrt(val: cextended; rounding, exceptions: LLVMMetadata): cextended; external name 'llvm.experimental.constrained.sqrt.x86_fp80';
 {$endif}
 {$ifdef SUPPORT_FLOAT128}
 function llvm_sqrt_f128(val: float128): float128; compilerproc; external name 'llvm.sqrt.f128';
 function llvm_experimental_constrained_sqrt(val: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.sqrt.f128';
 {$endif}
 function llvm_fma_f32(a, b, c: single): single; compilerproc; external name 'llvm.fma.f32';
 function llvm_experimental_constrained_fma(a, b, c: single; rounding, exceptions: LLVMMetadata): single; external name 'llvm.experimental.constrained.fma.f32';
 function llvm_fma_f64(a, b, c: double): double; compilerproc; external name 'llvm.fma.f64';
 function llvm_experimental_constrained_fma(a, b, c: double; rounding, exceptions: LLVMMetadata): double; external name 'llvm.experimental.constrained.fma.f64';
 {$ifdef SUPPORT_EXTENDED}
-function llvm_fma_f80(a, b, c: extended): extended; compilerproc; external name 'llvm.fma.f80';
+function llvm_fma_f80(a, b, c: cextended): cextended; compilerproc; external name 'llvm.fma.x86_fp80';
 function llvm_experimental_constrained_fma(a, b, c: cextended; rounding, exceptions: LLVMMetadata): cextended; external name 'llvm.experimental.constrained.fma.x86_fp80';
 {$endif}
 {$ifdef SUPPORT_FLOAT128}
 function llvm_fma_f128(a, b, c: float128): float128; compilerproc; external name 'llvm.fma.f128';
 function llvm_experimental_constrained_fma(a, b, c: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.fma.f128';
 {$endif}
 function llvm_experimental_constrained_fadd(a, b: single; rounding, exceptions: LLVMMetadata): single; external name 'llvm.experimental.constrained.fadd.f32';
 function llvm_experimental_constrained_fsub(a, b: single; rounding, exceptions: LLVMMetadata): single; external name 'llvm.experimental.constrained.fsub.f32';
 function llvm_experimental_constrained_fmul(a, b: single; rounding, exceptions: LLVMMetadata): single; external name 'llvm.experimental.constrained.fmul.f32';
 function llvm_experimental_constrained_fdiv(a, b: single; rounding, exceptions: LLVMMetadata): single; external name 'llvm.experimental.constrained.fdiv.f32';
 function llvm_experimental_constrained_fadd(a, b: double; rounding, exceptions: LLVMMetadata): double; external name 'llvm.experimental.constrained.fadd.f64';
 function llvm_experimental_constrained_fsub(a, b: double; rounding, exceptions: LLVMMetadata): double; external name 'llvm.experimental.constrained.fsub.f64';
 function llvm_experimental_constrained_fmul(a, b: double; rounding, exceptions: LLVMMetadata): double; external name 'llvm.experimental.constrained.fmul.f64';
 function llvm_experimental_constrained_fdiv(a, b: double; rounding, exceptions: LLVMMetadata): double; external name 'llvm.experimental.constrained.fdiv.f64';
 {$ifdef SUPPORT_EXTENDED}
 function llvm_experimental_constrained_fadd(a, b: extended; rounding, exceptions: LLVMMetadata): extended; external name 'llvm.experimental.constrained.fadd.x86_fp80';
 function llvm_experimental_constrained_fsub(a, b: extended; rounding, exceptions: LLVMMetadata): extended; external name 'llvm.experimental.constrained.fsub.x86_fp80';
 function llvm_experimental_constrained_fmul(a, b: extended; rounding, exceptions: LLVMMetadata): extended; external name 'llvm.experimental.constrained.fmul.x86_fp80';
 function llvm_experimental_constrained_fdiv(a, b: extended; rounding, exceptions: LLVMMetadata): extended; external name 'llvm.experimental.constrained.fdiv.x86_fp80';
 {$endif}
 {$ifdef SUPPORT_FLOAT128}
 function llvm_experimental_constrained_fadd(a, b: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.fadd.f128';
 function llvm_experimental_constrained_fsub(a, b: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.fsub.f128';
 function llvm_experimental_constrained_fmul(a, b: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.fmul.f128';
 function llvm_experimental_constrained_fdiv(a, b: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.fdiv.f128';
 {$endif}