* use constrained LLVM fp intrinsics for add/sub/mul/slash/fma/sqrt

operations when fastmath is not enabled

git-svn-id: trunk@43819 -
This commit is contained in:
Jonas Maebe 2019-12-30 15:05:09 +00:00
parent b355ba3d39
commit 797077855e
3 changed files with 204 additions and 96 deletions

View File

@ -47,20 +47,75 @@ interface
implementation implementation
uses uses
verbose,globtype, verbose,globtype,globals,cutils,
aasmdata, aasmdata,
symconst,symtype,symdef,defutil, symconst,symtype,symdef,defutil,
llvmbase,aasmllvm, llvmbase,aasmllvm,
cgbase,cgutils, cgbase,cgutils,pass_1,
hlcgobj, hlcgobj,
nadd nadd,ncal,ncnv,ncon
; ;
{ tllvmaddnode } { tllvmaddnode }
function tllvmaddnode.pass_1: tnode; function tllvmaddnode.pass_1: tnode;
var
intrname: string;
iscompcurrency: boolean;
begin begin
result:=inherited pass_1; result:=inherited pass_1;
if not assigned(result) and
is_fpu(left.resultdef) and
not(cs_opt_fastmath in current_settings.optimizerswitches) then
begin
case nodetype of
addn:
begin
intrname:='LLVM_EXPERIMENTAL_CONSTRAINED_FADD';
end;
subn:
begin
intrname:='LLVM_EXPERIMENTAL_CONSTRAINED_FSUB';
end;
muln:
begin
intrname:='LLVM_EXPERIMENTAL_CONSTRAINED_FMUL';
end;
slashn:
begin
intrname:='LLVM_EXPERIMENTAL_CONSTRAINED_FDIV';
end;
else
begin
intrname:='';
end;
end;
if intrname<>'' then
begin
iscompcurrency:=tfloatdef(left.resultdef).floattype in [s64currency,s64comp];
if iscompcurrency then
begin
inserttypeconv_internal(left,s80floattype);
inserttypeconv_internal(right,s80floattype);
end;
result:=ccallnode.createintern(intrname,
ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('fpexcept.strict'),length('fpexcept.strict'),llvm_metadatatype),
ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('round.dynamic'),length('round.dynamic'),llvm_metadatatype),
ccallparanode.create(right,
ccallparanode.create(left,nil)
)
)
)
);
if iscompcurrency then
begin
result:=ctypeconvnode.create_internal(result,resultdef);
end;
left:=nil;
right:=nil;
exit;
end;
end;
{ there are no flags in LLVM } { there are no flags in LLVM }
if expectloc=LOC_FLAGS then if expectloc=LOC_FLAGS then
expectloc:=LOC_REGISTER; expectloc:=LOC_REGISTER;
@ -226,29 +281,26 @@ implementation
op : tllvmop; op : tllvmop;
llvmfpcmp : tllvmfpcmp; llvmfpcmp : tllvmfpcmp;
size : tdef; size : tdef;
cmpop,
singleprec : boolean;
begin begin
pass_left_right; pass_left_right;
cmpop:=false; { get the operands in the correct order; there are no special cases here,
singleprec:=tfloatdef(left.resultdef).floattype=s32real; everything is register-based }
{ avoid uninitialised warning } if nf_swapped in flags then
llvmfpcmp:=lfc_invalid; swapleftright;
case nodetype of
addn : { put both operands in a register }
op:=la_fadd; hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
muln : hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
op:=la_fmul;
subn : { see comment in thlcgllvm.a_loadfpu_ref_reg }
op:=la_fsub; if tfloatdef(left.resultdef).floattype in [s64comp,s64currency] then
slashn : size:=sc80floattype
op:=la_fdiv; else
ltn,lten,gtn,gten, size:=left.resultdef;
equaln,unequaln :
if nodetype in [ltn,lten,gtn,gten,equaln,unequaln] then
begin begin
op:=la_fcmp;
cmpop:=true;
case nodetype of case nodetype of
ltn: ltn:
llvmfpcmp:=lfc_olt; llvmfpcmp:=lfc_olt;
@ -265,51 +317,34 @@ implementation
else else
internalerror(2015031506); internalerror(2015031506);
end; end;
end;
else
internalerror(2013102401);
end;
{ get the operands in the correct order; there are no special cases here,
everything is register-based }
if nf_swapped in flags then
swapleftright;
{ put both operands in a register }
hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
{ initialize the result location }
if not cmpop then
begin
location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
location.register:=hlcg.getfpuregister(current_asmdata.CurrAsmList,resultdef);
end
else
begin
location_reset(location,LOC_REGISTER,OS_8); location_reset(location,LOC_REGISTER,OS_8);
location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,llvmbool1type); location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,llvmbool1type);
end;
{ see comment in thlcgllvm.a_loadfpu_ref_reg } current_asmdata.CurrAsmList.concat(taillvm.op_reg_fpcond_size_reg_reg(la_fcmp ,
if tfloatdef(left.resultdef).floattype in [s64comp,s64currency] then
size:=sc80floattype
else
size:=left.resultdef;
{ emit the actual operation }
if not cmpop then
begin
current_asmdata.CurrAsmList.concat(taillvm.op_reg_size_reg_reg(op,location.register,size,
left.location.register,right.location.register))
end
else
begin
current_asmdata.CurrAsmList.concat(taillvm.op_reg_fpcond_size_reg_reg(op,
location.register,llvmfpcmp,size,left.location.register,right.location.register)); location.register,llvmfpcmp,size,left.location.register,right.location.register));
tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef); tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,llvmbool1type,resultdef,location.register,tmpreg); hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,llvmbool1type,resultdef,location.register,tmpreg);
location.register:=tmpreg; location.register:=tmpreg;
end
else
begin
case nodetype of
addn :
op:=la_fadd;
muln :
op:=la_fmul;
subn :
op:=la_fsub;
slashn :
op:=la_fdiv;
else
internalerror(2013102401);
end;
location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
location.register:=hlcg.getfpuregister(current_asmdata.CurrAsmList,resultdef);
current_asmdata.CurrAsmList.concat(taillvm.op_reg_size_reg_reg(op,location.register,size,
left.location.register,right.location.register))
end; end;
end; end;

View File

@ -52,7 +52,7 @@ interface
implementation implementation
uses uses
verbose,globals,globtype,constexp, verbose,globals,globtype,constexp,cutils,
aasmbase, aasmdata, aasmbase, aasmdata,
symconst,symtype,symdef,defutil, symconst,symtype,symdef,defutil,
compinnr, compinnr,
@ -219,7 +219,9 @@ implementation
function tllvminlinenode.first_fma: tnode; function tllvminlinenode.first_fma: tnode;
var var
procname: string[15]; procname: string[40];
begin
if cs_opt_fastmath in current_settings.optimizerswitches then
begin begin
case inlinenumber of case inlinenumber of
in_fma_single: in_fma_single:
@ -234,6 +236,26 @@ implementation
internalerror(2018122101); internalerror(2018122101);
end; end;
result:=ccallnode.createintern(procname,left); result:=ccallnode.createintern(procname,left);
end
else
begin
case inlinenumber of
in_fma_single,
in_fma_double,
in_fma_extended,
in_fma_float128:
procname:='LLVM_EXPERIMENTAL_CONSTRAINED_FMA';
else
internalerror(2019122811);
end;
result:=ccallnode.createintern(procname,
ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('fpexcept.strict'),length('fpexcept.strict'),llvm_metadatatype),
ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('round.dynamic'),length('round.dynamic'),llvm_metadatatype),
left
)
)
);
end;
left:=nil; left:=nil;
end; end;
@ -250,10 +272,12 @@ implementation
function tllvminlinenode.first_sqrt_real: tnode; function tllvminlinenode.first_sqrt_real: tnode;
var var
intrinsic: string[20]; intrinsic: string[40];
begin begin
if left.resultdef.typ<>floatdef then if left.resultdef.typ<>floatdef then
internalerror(2018121601); internalerror(2018121601);
if cs_opt_fastmath in current_settings.optimizerswitches then
begin
case tfloatdef(left.resultdef).floattype of case tfloatdef(left.resultdef).floattype of
s32real: s32real:
intrinsic:='llvm_sqrt_f32'; intrinsic:='llvm_sqrt_f32';
@ -267,6 +291,26 @@ implementation
internalerror(2018121602); internalerror(2018121602);
end; end;
result:=ccallnode.createintern(intrinsic, ccallparanode.create(left,nil)); result:=ccallnode.createintern(intrinsic, ccallparanode.create(left,nil));
end
else
begin
case tfloatdef(left.resultdef).floattype of
s32real,
s64real,
s80real,sc80real,
s128real:
intrinsic:='LLVM_EXPERIMENTAL_CONSTRAINED_SQRT';
else
internalerror(2019122810);
end;
result:=ccallnode.createintern(intrinsic,
ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('fpexcept.strict'),length('fpexcept.strict'),llvm_metadatatype),
ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('round.dynamic'),length('round.dynamic'),llvm_metadatatype),
ccallparanode.create(left,nil)
)
)
);
end;
left:=nil; left:=nil;
end; end;

View File

@ -41,19 +41,48 @@ function llvm_ctpop(src: UInt32): UInt32; external name 'llvm.ctpop.i32';
function llvm_ctpop(src: UInt64): UInt64; external name 'llvm.ctpop.i64'; function llvm_ctpop(src: UInt64): UInt64; external name 'llvm.ctpop.i64';
function llvm_sqrt_f32(val: single): single; compilerproc; external name 'llvm.sqrt.f32'; function llvm_sqrt_f32(val: single): single; compilerproc; external name 'llvm.sqrt.f32';
function llvm_experimental_constrained_sqrt(val: single; rounding, exceptions: LLVMMetadata): single; external name 'llvm.experimental.constrained.sqrt.f32';
function llvm_sqrt_f64(val: double): double; compilerproc; external name 'llvm.sqrt.f64'; function llvm_sqrt_f64(val: double): double; compilerproc; external name 'llvm.sqrt.f64';
function llvm_experimental_constrained_sqrt(val: double; rounding, exceptions: LLVMMetadata): double; external name 'llvm.experimental.constrained.sqrt.f64';
{$ifdef SUPPORT_EXTENDED} {$ifdef SUPPORT_EXTENDED}
function llvm_sqrt_f80(val: extended): extended; compilerproc; external name 'llvm.sqrt.f80'; function llvm_sqrt_f80(val: cextended): cextended; compilerproc; external name 'llvm.sqrt.x86_fp80';
function llvm_experimental_constrained_sqrt(val: cextended; rounding, exceptions: LLVMMetadata): cextended; external name 'llvm.experimental.constrained.sqrt.x86_fp80';
{$endif} {$endif}
{$ifdef SUPPORT_FLOAT128} {$ifdef SUPPORT_FLOAT128}
function llvm_sqrt_f128(val: float128): float128; compilerproc; external name 'llvm.sqrt.f128'; function llvm_sqrt_f128(val: float128): float128; compilerproc; external name 'llvm.sqrt.f128';
function llvm_experimental_constrained_sqrt(val: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.sqrt.f128';
{$endif} {$endif}
function llvm_fma_f32(a, b, c: single): single; compilerproc; external name 'llvm.fma.f32'; function llvm_fma_f32(a, b, c: single): single; compilerproc; external name 'llvm.fma.f32';
function llvm_experimental_constrained_fma(a, b, c: single; rounding, exceptions: LLVMMetadata): single; external name 'llvm.experimental.constrained.fma.f32';
function llvm_fma_f64(a, b, c: double): double; compilerproc; external name 'llvm.fma.f64'; function llvm_fma_f64(a, b, c: double): double; compilerproc; external name 'llvm.fma.f64';
function llvm_experimental_constrained_fma(a, b, c: double; rounding, exceptions: LLVMMetadata): double; external name 'llvm.experimental.constrained.fma.f64';
{$ifdef SUPPORT_EXTENDED} {$ifdef SUPPORT_EXTENDED}
function llvm_fma_f80(a, b, c: extended): extended; compilerproc; external name 'llvm.fma.f80'; function llvm_fma_f80(a, b, c: cextended): cextended; compilerproc; external name 'llvm.fma.x86_fp80';
function llvm_experimental_constrained_fma(a, b, c: cextended; rounding, exceptions: LLVMMetadata): cextended; external name 'llvm.experimental.constrained.fma.x86_fp80';
{$endif} {$endif}
{$ifdef SUPPORT_FLOAT128} {$ifdef SUPPORT_FLOAT128}
function llvm_fma_f128(a, b, c: float128): float128; compilerproc; external name 'llvm.fma.f128'; function llvm_fma_f128(a, b, c: float128): float128; compilerproc; external name 'llvm.fma.f128';
function llvm_experimental_constrained_fma(a, b, c: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.fma.f128';
{$endif}
function llvm_experimental_constrained_fadd(a, b: single; rounding, exceptions: LLVMMetadata): single; external name 'llvm.experimental.constrained.fadd.f32';
function llvm_experimental_constrained_fsub(a, b: single; rounding, exceptions: LLVMMetadata): single; external name 'llvm.experimental.constrained.fsub.f32';
function llvm_experimental_constrained_fmul(a, b: single; rounding, exceptions: LLVMMetadata): single; external name 'llvm.experimental.constrained.fmul.f32';
function llvm_experimental_constrained_fdiv(a, b: single; rounding, exceptions: LLVMMetadata): single; external name 'llvm.experimental.constrained.fdiv.f32';
function llvm_experimental_constrained_fadd(a, b: double; rounding, exceptions: LLVMMetadata): double; external name 'llvm.experimental.constrained.fadd.f64';
function llvm_experimental_constrained_fsub(a, b: double; rounding, exceptions: LLVMMetadata): double; external name 'llvm.experimental.constrained.fsub.f64';
function llvm_experimental_constrained_fmul(a, b: double; rounding, exceptions: LLVMMetadata): double; external name 'llvm.experimental.constrained.fmul.f64';
function llvm_experimental_constrained_fdiv(a, b: double; rounding, exceptions: LLVMMetadata): double; external name 'llvm.experimental.constrained.fdiv.f64';
{$ifdef SUPPORT_EXTENDED}
function llvm_experimental_constrained_fadd(a, b: extended; rounding, exceptions: LLVMMetadata): extended; external name 'llvm.experimental.constrained.fadd.x86_fp80';
function llvm_experimental_constrained_fsub(a, b: extended; rounding, exceptions: LLVMMetadata): extended; external name 'llvm.experimental.constrained.fsub.x86_fp80';
function llvm_experimental_constrained_fmul(a, b: extended; rounding, exceptions: LLVMMetadata): extended; external name 'llvm.experimental.constrained.fmul.x86_fp80';
function llvm_experimental_constrained_fdiv(a, b: extended; rounding, exceptions: LLVMMetadata): extended; external name 'llvm.experimental.constrained.fdiv.x86_fp80';
{$endif}
{$ifdef SUPPORT_FLOAT128}
function llvm_experimental_constrained_fadd(a, b: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.fadd.f128';
function llvm_experimental_constrained_fsub(a, b: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.fsub.f128';
function llvm_experimental_constrained_fmul(a, b: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.fmul.f128';
function llvm_experimental_constrained_fdiv(a, b: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.fdiv.f128';
{$endif} {$endif}