* use constrained LLVM fp intrinsics for add/sub/mul/slash/fma/sqrt

operations when fastmath is not enabled

git-svn-id: trunk@43819 -
This commit is contained in:
Jonas Maebe 2019-12-30 15:05:09 +00:00
parent b355ba3d39
commit 797077855e
3 changed files with 204 additions and 96 deletions

View File

@ -47,20 +47,75 @@ interface
implementation
uses
verbose,globtype,
verbose,globtype,globals,cutils,
aasmdata,
symconst,symtype,symdef,defutil,
llvmbase,aasmllvm,
cgbase,cgutils,
cgbase,cgutils,pass_1,
hlcgobj,
nadd
nadd,ncal,ncnv,ncon
;
{ tllvmaddnode }
function tllvmaddnode.pass_1: tnode;
var
intrname: string;
iscompcurrency: boolean;
begin
result:=inherited pass_1;
if not assigned(result) and
is_fpu(left.resultdef) and
not(cs_opt_fastmath in current_settings.optimizerswitches) then
begin
case nodetype of
addn:
begin
intrname:='LLVM_EXPERIMENTAL_CONSTRAINED_FADD';
end;
subn:
begin
intrname:='LLVM_EXPERIMENTAL_CONSTRAINED_FSUB';
end;
muln:
begin
intrname:='LLVM_EXPERIMENTAL_CONSTRAINED_FMUL';
end;
slashn:
begin
intrname:='LLVM_EXPERIMENTAL_CONSTRAINED_FDIV';
end;
else
begin
intrname:='';
end;
end;
if intrname<>'' then
begin
iscompcurrency:=tfloatdef(left.resultdef).floattype in [s64currency,s64comp];
if iscompcurrency then
begin
inserttypeconv_internal(left,s80floattype);
inserttypeconv_internal(right,s80floattype);
end;
result:=ccallnode.createintern(intrname,
ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('fpexcept.strict'),length('fpexcept.strict'),llvm_metadatatype),
ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('round.dynamic'),length('round.dynamic'),llvm_metadatatype),
ccallparanode.create(right,
ccallparanode.create(left,nil)
)
)
)
);
if iscompcurrency then
begin
result:=ctypeconvnode.create_internal(result,resultdef);
end;
left:=nil;
right:=nil;
exit;
end;
end;
{ there are no flags in LLVM }
if expectloc=LOC_FLAGS then
expectloc:=LOC_REGISTER;
@ -225,51 +280,10 @@ implementation
tmpreg: tregister;
op : tllvmop;
llvmfpcmp : tllvmfpcmp;
size : tdef;
cmpop,
singleprec : boolean;
size : tdef;
begin
pass_left_right;
cmpop:=false;
singleprec:=tfloatdef(left.resultdef).floattype=s32real;
{ avoid uninitialised warning }
llvmfpcmp:=lfc_invalid;
case nodetype of
addn :
op:=la_fadd;
muln :
op:=la_fmul;
subn :
op:=la_fsub;
slashn :
op:=la_fdiv;
ltn,lten,gtn,gten,
equaln,unequaln :
begin
op:=la_fcmp;
cmpop:=true;
case nodetype of
ltn:
llvmfpcmp:=lfc_olt;
lten:
llvmfpcmp:=lfc_ole;
gtn:
llvmfpcmp:=lfc_ogt;
gten:
llvmfpcmp:=lfc_oge;
equaln:
llvmfpcmp:=lfc_oeq;
unequaln:
llvmfpcmp:=lfc_une;
else
internalerror(2015031506);
end;
end;
else
internalerror(2013102401);
end;
{ get the operands in the correct order; there are no special cases here,
everything is register-based }
if nf_swapped in flags then
@ -279,37 +293,58 @@ implementation
hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
{ initialize the result location }
if not cmpop then
begin
location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
location.register:=hlcg.getfpuregister(current_asmdata.CurrAsmList,resultdef);
end
else
begin
location_reset(location,LOC_REGISTER,OS_8);
location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,llvmbool1type);
end;
{ see comment in thlcgllvm.a_loadfpu_ref_reg }
if tfloatdef(left.resultdef).floattype in [s64comp,s64currency] then
size:=sc80floattype
else
size:=left.resultdef;
{ emit the actual operation }
if not cmpop then
if nodetype in [ltn,lten,gtn,gten,equaln,unequaln] then
begin
current_asmdata.CurrAsmList.concat(taillvm.op_reg_size_reg_reg(op,location.register,size,
left.location.register,right.location.register))
end
else
begin
current_asmdata.CurrAsmList.concat(taillvm.op_reg_fpcond_size_reg_reg(op,
case nodetype of
ltn:
llvmfpcmp:=lfc_olt;
lten:
llvmfpcmp:=lfc_ole;
gtn:
llvmfpcmp:=lfc_ogt;
gten:
llvmfpcmp:=lfc_oge;
equaln:
llvmfpcmp:=lfc_oeq;
unequaln:
llvmfpcmp:=lfc_une;
else
internalerror(2015031506);
end;
location_reset(location,LOC_REGISTER,OS_8);
location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,llvmbool1type);
current_asmdata.CurrAsmList.concat(taillvm.op_reg_fpcond_size_reg_reg(la_fcmp ,
location.register,llvmfpcmp,size,left.location.register,right.location.register));
tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,llvmbool1type,resultdef,location.register,tmpreg);
location.register:=tmpreg;
end
else
begin
case nodetype of
addn :
op:=la_fadd;
muln :
op:=la_fmul;
subn :
op:=la_fsub;
slashn :
op:=la_fdiv;
else
internalerror(2013102401);
end;
location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
location.register:=hlcg.getfpuregister(current_asmdata.CurrAsmList,resultdef);
current_asmdata.CurrAsmList.concat(taillvm.op_reg_size_reg_reg(op,location.register,size,
left.location.register,right.location.register))
end;
end;

View File

@ -52,7 +52,7 @@ interface
implementation
uses
verbose,globals,globtype,constexp,
verbose,globals,globtype,constexp,cutils,
aasmbase, aasmdata,
symconst,symtype,symdef,defutil,
compinnr,
@ -219,21 +219,43 @@ implementation
function tllvminlinenode.first_fma: tnode;
var
procname: string[15];
procname: string[40];
begin
case inlinenumber of
in_fma_single:
procname:='llvm_fma_f32';
in_fma_double:
procname:='llvm_fma_f64';
in_fma_extended:
procname:='llvm_fma_f80';
in_fma_float128:
procname:='llvm_fma_f128';
else
internalerror(2018122101);
end;
result:=ccallnode.createintern(procname,left);
if cs_opt_fastmath in current_settings.optimizerswitches then
begin
case inlinenumber of
in_fma_single:
procname:='llvm_fma_f32';
in_fma_double:
procname:='llvm_fma_f64';
in_fma_extended:
procname:='llvm_fma_f80';
in_fma_float128:
procname:='llvm_fma_f128';
else
internalerror(2018122101);
end;
result:=ccallnode.createintern(procname,left);
end
else
begin
case inlinenumber of
in_fma_single,
in_fma_double,
in_fma_extended,
in_fma_float128:
procname:='LLVM_EXPERIMENTAL_CONSTRAINED_FMA';
else
internalerror(2019122811);
end;
result:=ccallnode.createintern(procname,
ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('fpexcept.strict'),length('fpexcept.strict'),llvm_metadatatype),
ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('round.dynamic'),length('round.dynamic'),llvm_metadatatype),
left
)
)
);
end;
left:=nil;
end;
@ -250,23 +272,45 @@ implementation
function tllvminlinenode.first_sqrt_real: tnode;
var
intrinsic: string[20];
intrinsic: string[40];
begin
if left.resultdef.typ<>floatdef then
internalerror(2018121601);
case tfloatdef(left.resultdef).floattype of
s32real:
intrinsic:='llvm_sqrt_f32';
s64real:
intrinsic:='llvm_sqrt_f64';
s80real,sc80real:
intrinsic:='llvm_sqrt_f80';
s128real:
intrinsic:='llvm_sqrt_f128';
else
internalerror(2018121602);
end;
result:=ccallnode.createintern(intrinsic, ccallparanode.create(left,nil));
if cs_opt_fastmath in current_settings.optimizerswitches then
begin
case tfloatdef(left.resultdef).floattype of
s32real:
intrinsic:='llvm_sqrt_f32';
s64real:
intrinsic:='llvm_sqrt_f64';
s80real,sc80real:
intrinsic:='llvm_sqrt_f80';
s128real:
intrinsic:='llvm_sqrt_f128';
else
internalerror(2018121602);
end;
result:=ccallnode.createintern(intrinsic, ccallparanode.create(left,nil));
end
else
begin
case tfloatdef(left.resultdef).floattype of
s32real,
s64real,
s80real,sc80real,
s128real:
intrinsic:='LLVM_EXPERIMENTAL_CONSTRAINED_SQRT';
else
internalerror(2019122810);
end;
result:=ccallnode.createintern(intrinsic,
ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('fpexcept.strict'),length('fpexcept.strict'),llvm_metadatatype),
ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('round.dynamic'),length('round.dynamic'),llvm_metadatatype),
ccallparanode.create(left,nil)
)
)
);
end;
left:=nil;
end;

View File

@ -41,19 +41,48 @@ function llvm_ctpop(src: UInt32): UInt32; external name 'llvm.ctpop.i32';
function llvm_ctpop(src: UInt64): UInt64; external name 'llvm.ctpop.i64';
function llvm_sqrt_f32(val: single): single; compilerproc; external name 'llvm.sqrt.f32';
function llvm_experimental_constrained_sqrt(val: single; rounding, exceptions: LLVMMetadata): single; external name 'llvm.experimental.constrained.sqrt.f32';
function llvm_sqrt_f64(val: double): double; compilerproc; external name 'llvm.sqrt.f64';
function llvm_experimental_constrained_sqrt(val: double; rounding, exceptions: LLVMMetadata): double; external name 'llvm.experimental.constrained.sqrt.f64';
{$ifdef SUPPORT_EXTENDED}
function llvm_sqrt_f80(val: extended): extended; compilerproc; external name 'llvm.sqrt.f80';
function llvm_sqrt_f80(val: cextended): cextended; compilerproc; external name 'llvm.sqrt.x86_fp80';
function llvm_experimental_constrained_sqrt(val: cextended; rounding, exceptions: LLVMMetadata): cextended; external name 'llvm.experimental.constrained.sqrt.x86_fp80';
{$endif}
{$ifdef SUPPORT_FLOAT128}
function llvm_sqrt_f128(val: float128): float128; compilerproc; external name 'llvm.sqrt.f128';
function llvm_experimental_constrained_sqrt(val: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.sqrt.f128';
{$endif}
function llvm_fma_f32(a, b, c: single): single; compilerproc; external name 'llvm.fma.f32';
function llvm_experimental_constrained_fma(a, b, c: single; rounding, exceptions: LLVMMetadata): single; external name 'llvm.experimental.constrained.fma.f32';
function llvm_fma_f64(a, b, c: double): double; compilerproc; external name 'llvm.fma.f64';
function llvm_experimental_constrained_fma(a, b, c: double; rounding, exceptions: LLVMMetadata): double; external name 'llvm.experimental.constrained.fma.f64';
{$ifdef SUPPORT_EXTENDED}
function llvm_fma_f80(a, b, c: extended): extended; compilerproc; external name 'llvm.fma.f80';
function llvm_fma_f80(a, b, c: cextended): cextended; compilerproc; external name 'llvm.fma.x86_fp80';
function llvm_experimental_constrained_fma(a, b, c: cextended; rounding, exceptions: LLVMMetadata): cextended; external name 'llvm.experimental.constrained.fma.x86_fp80';
{$endif}
{$ifdef SUPPORT_FLOAT128}
function llvm_fma_f128(a, b, c: float128): float128; compilerproc; external name 'llvm.fma.f128';
function llvm_experimental_constrained_fma(a, b, c: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.fma.f128';
{$endif}
function llvm_experimental_constrained_fadd(a, b: single; rounding, exceptions: LLVMMetadata): single; external name 'llvm.experimental.constrained.fadd.f32';
function llvm_experimental_constrained_fsub(a, b: single; rounding, exceptions: LLVMMetadata): single; external name 'llvm.experimental.constrained.fsub.f32';
function llvm_experimental_constrained_fmul(a, b: single; rounding, exceptions: LLVMMetadata): single; external name 'llvm.experimental.constrained.fmul.f32';
function llvm_experimental_constrained_fdiv(a, b: single; rounding, exceptions: LLVMMetadata): single; external name 'llvm.experimental.constrained.fdiv.f32';
function llvm_experimental_constrained_fadd(a, b: double; rounding, exceptions: LLVMMetadata): double; external name 'llvm.experimental.constrained.fadd.f64';
function llvm_experimental_constrained_fsub(a, b: double; rounding, exceptions: LLVMMetadata): double; external name 'llvm.experimental.constrained.fsub.f64';
function llvm_experimental_constrained_fmul(a, b: double; rounding, exceptions: LLVMMetadata): double; external name 'llvm.experimental.constrained.fmul.f64';
function llvm_experimental_constrained_fdiv(a, b: double; rounding, exceptions: LLVMMetadata): double; external name 'llvm.experimental.constrained.fdiv.f64';
{$ifdef SUPPORT_EXTENDED}
function llvm_experimental_constrained_fadd(a, b: extended; rounding, exceptions: LLVMMetadata): extended; external name 'llvm.experimental.constrained.fadd.x86_fp80';
function llvm_experimental_constrained_fsub(a, b: extended; rounding, exceptions: LLVMMetadata): extended; external name 'llvm.experimental.constrained.fsub.x86_fp80';
function llvm_experimental_constrained_fmul(a, b: extended; rounding, exceptions: LLVMMetadata): extended; external name 'llvm.experimental.constrained.fmul.x86_fp80';
function llvm_experimental_constrained_fdiv(a, b: extended; rounding, exceptions: LLVMMetadata): extended; external name 'llvm.experimental.constrained.fdiv.x86_fp80';
{$endif}
{$ifdef SUPPORT_FLOAT128}
function llvm_experimental_constrained_fadd(a, b: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.fadd.f128';
function llvm_experimental_constrained_fsub(a, b: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.fsub.f128';
function llvm_experimental_constrained_fmul(a, b: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.fmul.f128';
function llvm_experimental_constrained_fdiv(a, b: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.fdiv.f128';
{$endif}