From a6a17efa42d9de2a1206311b536b5de0487e6a67 Mon Sep 17 00:00:00 2001
From: Jonas Maebe <jonas@freepascal.org>
Date: Mon, 30 Dec 2019 15:05:13 +0000
Subject: [PATCH]   * use LLVM constrained fpext/fptrunc intrinsics when
 fastmath is not enabled     for accurate exception behaviour

git-svn-id: trunk@43820 -
---
 compiler/llvm/hlcgllvm.pas | 54 +++++++++++++++++++++++++++++++++++---
 compiler/llvm/llvmdef.pas  | 19 ++++++++++++++
 compiler/llvm/llvminfo.pas |  5 ++--
 rtl/inc/llvmintr.inc       | 19 ++++++++++++++
 4 files changed, 92 insertions(+), 5 deletions(-)

diff --git a/compiler/llvm/hlcgllvm.pas b/compiler/llvm/hlcgllvm.pas
index 318cbe3f64..75d1b4a8e5 100644
--- a/compiler/llvm/hlcgllvm.pas
+++ b/compiler/llvm/hlcgllvm.pas
@@ -96,6 +96,9 @@ uses
       procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tdef; const ref: treference; reg: tregister); override;
       procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tdef; reg: tregister; const ref: treference); override;
       procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister); override;
+     protected
+      procedure gen_fpconstrained_intrinsic(list: TAsmList; const intrinsic: TIDString; fromsize, tosize: tdef; fromreg, toreg: tregister);
+     public
 
       procedure gen_proc_symbol(list: TAsmList); override;
       procedure handle_external_proc(list: TAsmList; pd: tprocdef; const importname: TSymStr); override;
@@ -165,7 +168,7 @@ implementation
     verbose,cutils,globals,fmodule,constexp,systems,
     defutil,llvmdef,llvmsym,
     aasmtai,aasmcpu,
-    aasmllvm,llvmbase,llvminfo,tgllvm,
+    aasmllvm,aasmllvmmetadata,llvmbase,llvminfo,tgllvm,
     symtable,symllvm,
     paramgr,
     pass_2,procinfo,llvmpi,cpuinfo,cgobj,cgllvm,cghlcpu,
@@ -1321,10 +1324,55 @@ implementation
   procedure thlcgllvm.a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister);
     var
       op: tllvmop;
+      intrinsic: TIDString;
     begin
       op:=llvmconvop(fromsize,tosize,true);
-      { reg2 = bitcast fromllsize reg1 to tollsize }
-      list.concat(taillvm.op_reg_size_reg_size(op,reg2,fromsize,reg1,tosize));
+      if (cs_opt_fastmath in current_settings.optimizerswitches) or
+         not(llvmflag_constrained_fptrunc_fpext in llvmversion_properties[current_settings.llvmversion]) or
+         not(op in [la_fptrunc,la_fpext]) then
+        list.concat(taillvm.op_reg_size_reg_size(op,reg2,fromsize,reg1,tosize))
+      else
+        begin
+          if op=la_fptrunc then
+            intrinsic:='llvm_experimental_constrained_fptrunc'
+          else
+            intrinsic:='llvm_experimental_constrained_fpext';
+          gen_fpconstrained_intrinsic(list,
+            intrinsic+llvmfloatintrinsicsuffix(tfloatdef(tosize))+llvmfloatintrinsicsuffix(tfloatdef(fromsize)),
+            fromsize,tosize,reg1,reg2);
+        end;
+    end;
+
+
+  procedure thlcgllvm.gen_fpconstrained_intrinsic(list: TAsmList; const intrinsic: TIDString; fromsize, tosize: tdef; fromreg, toreg: tregister);
+    var
+      frompara, roundpara, exceptpara, respara: tcgpara;
+      tmploc: tlocation;
+      pd: tprocdef;
+    begin
+      frompara.init;
+      roundpara.init;
+      exceptpara.init;
+      pd:=search_system_proc(intrinsic);
+
+      paramanager.getcgtempparaloc(list,pd,1,frompara);
+      paramanager.getcgtempparaloc(list,pd,2,roundpara);
+      paramanager.getcgtempparaloc(list,pd,3,exceptpara);
+
+      location_reset(tmploc,frompara.location^.loc,def_cgsize(fromsize));
+      tmploc.register:=fromreg;
+      gen_load_loc_cgpara(list,fromsize,tmploc,frompara);
+      a_load_reg_cgpara(list,llvm_metadatatype,tllvmmetadata.getstringreg('round.dynamic'),roundpara);
+      a_load_reg_cgpara(list,llvm_metadatatype,tllvmmetadata.getstringreg('fpexcept.strict'),exceptpara);
+      respara:=g_call_system_proc(list,pd,[@frompara,@roundpara,@exceptpara],nil);
+
+      location_reset(tmploc,respara.location^.loc,def_cgsize(tosize));
+      tmploc.register:=toreg;
+      gen_load_cgpara_loc(list,tosize,respara,tmploc,false);
+      frompara.done;
+      roundpara.done;
+      exceptpara.done;
+      respara.resetiftemp;
     end;
 
 
diff --git a/compiler/llvm/llvmdef.pas b/compiler/llvm/llvmdef.pas
index ea8757a7dc..34d3d99af2 100644
--- a/compiler/llvm/llvmdef.pas
+++ b/compiler/llvm/llvmdef.pas
@@ -109,6 +109,8 @@ interface
 
     function llvmasmsymname(const sym: TAsmSymbol): TSymStr;
 
+    function llvmfloatintrinsicsuffix(def: tfloatdef): TIDString;
+
 
 implementation
 
@@ -290,6 +292,23 @@ implementation
         result:='label %'+sym.name;
     end;
 
+  function llvmfloatintrinsicsuffix(def: tfloatdef): TIDString;
+    begin
+      case def.floattype of
+        s32real:
+          result:='_f32';
+        s64real:
+          result:='_f64';
+        s80real,sc80real:
+          result:='_f80';
+        s128real:
+          result:='_f128';
+        else
+          { comp/currency need to be converted to s(c)80real first }
+          internalerror(2019122902);
+      end;
+    end;
+
 
   function llvmbyvalparaloc(paraloc: pcgparalocation): boolean;
     begin
diff --git a/compiler/llvm/llvminfo.pas b/compiler/llvm/llvminfo.pas
index 13503ba24b..05b82c1031 100644
--- a/compiler/llvm/llvminfo.pas
+++ b/compiler/llvm/llvminfo.pas
@@ -47,7 +47,8 @@ Type
 type
    tllvmversionflag = (
      llvmflag_memcpy_indiv_align,  { memcpy intrinsic supports separate alignment for source and dest }
-     llvmflag_null_pointer_valid   { supports "llvmflag_null_pointer_valid" attribute, which indicates access to nil should not be optimized as undefined behaviour }
+     llvmflag_null_pointer_valid,  { supports "llvmflag_null_pointer_valid" attribute, which indicates access to nil should not be optimized as undefined behaviour }
+     llvmflag_constrained_fptrunc_fpext { supports constrained fptrunc and fpext intrinsics }
    );
    tllvmversionflags = set of tllvmversionflag;
 
@@ -70,7 +71,7 @@ Const
        { llvmver_7_0     } [llvmflag_memcpy_indiv_align,llvmflag_null_pointer_valid],
        { llvmver_7_1     } [llvmflag_memcpy_indiv_align,llvmflag_null_pointer_valid],
        { llvmver_8_0     } [llvmflag_memcpy_indiv_align,llvmflag_null_pointer_valid],
-       { llvmver_9_0     } [llvmflag_memcpy_indiv_align,llvmflag_null_pointer_valid]
+       { llvmver_9_0     } [llvmflag_memcpy_indiv_align,llvmflag_null_pointer_valid,llvmflag_constrained_fptrunc_fpext]
      );
 
    { Supported optimizations, only used for information }
diff --git a/rtl/inc/llvmintr.inc b/rtl/inc/llvmintr.inc
index 3aac1d1ab2..880be2ce51 100644
--- a/rtl/inc/llvmintr.inc
+++ b/rtl/inc/llvmintr.inc
@@ -86,3 +86,22 @@ function llvm_experimental_constrained_fsub(a, b: float128; rounding, exceptions
 function llvm_experimental_constrained_fmul(a, b: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.fmul.f128';
 function llvm_experimental_constrained_fdiv(a, b: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.fdiv.f128';
 {$endif}
+
+function llvm_experimental_constrained_fptrunc_f32_f64(a: double; rounding, exceptions: LLVMMetadata): single; compilerproc; external name 'llvm.experimental.constrained.fptrunc.f32.f64';
+function llvm_experimental_constrained_fpext_f64_f32(a: single; rounding, exceptions: LLVMMetadata): double; compilerproc; external name 'llvm.experimental.constrained.fpext.f64.f32';
+{$ifdef SUPPORT_EXTENDED}
+function llvm_experimental_constrained_fptrunc_f32_f80(a: extended; rounding, exceptions: LLVMMetadata): single; compilerproc; external name 'llvm.experimental.constrained.fptrunc.f32.x86_fp80';
+function llvm_experimental_constrained_fptrunc_f64_f80(a: extended; rounding, exceptions: LLVMMetadata): double; compilerproc; external name 'llvm.experimental.constrained.fptrunc.f64.x86_fp80';
+function llvm_experimental_constrained_fpext_f80_f32(a: single; rounding, exceptions: LLVMMetadata): extended; compilerproc; external name 'llvm.experimental.constrained.fpext.x86_fp80.f32';
+function llvm_experimental_constrained_fpext_f80_f64(a: double; rounding, exceptions: LLVMMetadata): extended; compilerproc; external name 'llvm.experimental.constrained.fpext.x86_fp80.f64';
+{$ifdef SUPPORT_FLOAT128}
+function llvm_experimental_constrained_fptrunc_f128_f80(a: extended; rounding, exceptions: LLVMMetadata): float128; compilerproc; external name 'llvm.experimental.constrained.fptrunc.f128.x86_fp80';
+function llvm_experimental_constrained_fpext_f80_f32(a: float128; rounding, exceptions: LLVMMetadata): extended; compilerproc; external name 'llvm.experimental.constrained.fpext.x86_fp80.f128';
+{$endif}
+{$endif}
+{$ifdef SUPPORT_FLOAT128}
+function llvm_experimental_constrained_fptrunc_f32_f128(a: float128; rounding, exceptions: LLVMMetadata): single; compilerproc; external name 'llvm.experimental.constrained.fptrunc.f32.f128';
+function llvm_experimental_constrained_fptrunc_f64_f128(a: float128; rounding, exceptions: LLVMMetadata): double; compilerproc; external name 'llvm.experimental.constrained.fptrunc.f64.f128';
+function llvm_experimental_constrained_fpext_f128_f32(a: single; rounding, exceptions: LLVMMetadata): float128; compilerproc; external name 'llvm.experimental.constrained.fpext.f128.f32';
+function llvm_experimental_constrained_fpext_f128_f64(a: double; rounding, exceptions: LLVMMetadata): float128; compilerproc; external name 'llvm.experimental.constrained.fpext.f128.f64';
+{$endif}