mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-14 14:59:32 +02:00
* use constrained LLVM fp intrinsics for add/sub/mul/slash/fma/sqrt
operations when fastmath is not enabled git-svn-id: trunk@43819 -
This commit is contained in:
parent
b355ba3d39
commit
797077855e
@ -47,20 +47,75 @@ interface
|
||||
implementation
|
||||
|
||||
uses
|
||||
verbose,globtype,
|
||||
verbose,globtype,globals,cutils,
|
||||
aasmdata,
|
||||
symconst,symtype,symdef,defutil,
|
||||
llvmbase,aasmllvm,
|
||||
cgbase,cgutils,
|
||||
cgbase,cgutils,pass_1,
|
||||
hlcgobj,
|
||||
nadd
|
||||
nadd,ncal,ncnv,ncon
|
||||
;
|
||||
|
||||
{ tllvmaddnode }
|
||||
|
||||
function tllvmaddnode.pass_1: tnode;
|
||||
var
|
||||
intrname: string;
|
||||
iscompcurrency: boolean;
|
||||
begin
|
||||
result:=inherited pass_1;
|
||||
if not assigned(result) and
|
||||
is_fpu(left.resultdef) and
|
||||
not(cs_opt_fastmath in current_settings.optimizerswitches) then
|
||||
begin
|
||||
case nodetype of
|
||||
addn:
|
||||
begin
|
||||
intrname:='LLVM_EXPERIMENTAL_CONSTRAINED_FADD';
|
||||
end;
|
||||
subn:
|
||||
begin
|
||||
intrname:='LLVM_EXPERIMENTAL_CONSTRAINED_FSUB';
|
||||
end;
|
||||
muln:
|
||||
begin
|
||||
intrname:='LLVM_EXPERIMENTAL_CONSTRAINED_FMUL';
|
||||
end;
|
||||
slashn:
|
||||
begin
|
||||
intrname:='LLVM_EXPERIMENTAL_CONSTRAINED_FDIV';
|
||||
end;
|
||||
else
|
||||
begin
|
||||
intrname:='';
|
||||
end;
|
||||
end;
|
||||
if intrname<>'' then
|
||||
begin
|
||||
iscompcurrency:=tfloatdef(left.resultdef).floattype in [s64currency,s64comp];
|
||||
if iscompcurrency then
|
||||
begin
|
||||
inserttypeconv_internal(left,s80floattype);
|
||||
inserttypeconv_internal(right,s80floattype);
|
||||
end;
|
||||
result:=ccallnode.createintern(intrname,
|
||||
ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('fpexcept.strict'),length('fpexcept.strict'),llvm_metadatatype),
|
||||
ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('round.dynamic'),length('round.dynamic'),llvm_metadatatype),
|
||||
ccallparanode.create(right,
|
||||
ccallparanode.create(left,nil)
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
if iscompcurrency then
|
||||
begin
|
||||
result:=ctypeconvnode.create_internal(result,resultdef);
|
||||
end;
|
||||
left:=nil;
|
||||
right:=nil;
|
||||
exit;
|
||||
end;
|
||||
end;
|
||||
{ there are no flags in LLVM }
|
||||
if expectloc=LOC_FLAGS then
|
||||
expectloc:=LOC_REGISTER;
|
||||
@ -225,51 +280,10 @@ implementation
|
||||
tmpreg: tregister;
|
||||
op : tllvmop;
|
||||
llvmfpcmp : tllvmfpcmp;
|
||||
size : tdef;
|
||||
cmpop,
|
||||
singleprec : boolean;
|
||||
size : tdef;
|
||||
begin
|
||||
pass_left_right;
|
||||
|
||||
cmpop:=false;
|
||||
singleprec:=tfloatdef(left.resultdef).floattype=s32real;
|
||||
{ avoid uninitialised warning }
|
||||
llvmfpcmp:=lfc_invalid;
|
||||
case nodetype of
|
||||
addn :
|
||||
op:=la_fadd;
|
||||
muln :
|
||||
op:=la_fmul;
|
||||
subn :
|
||||
op:=la_fsub;
|
||||
slashn :
|
||||
op:=la_fdiv;
|
||||
ltn,lten,gtn,gten,
|
||||
equaln,unequaln :
|
||||
begin
|
||||
op:=la_fcmp;
|
||||
cmpop:=true;
|
||||
case nodetype of
|
||||
ltn:
|
||||
llvmfpcmp:=lfc_olt;
|
||||
lten:
|
||||
llvmfpcmp:=lfc_ole;
|
||||
gtn:
|
||||
llvmfpcmp:=lfc_ogt;
|
||||
gten:
|
||||
llvmfpcmp:=lfc_oge;
|
||||
equaln:
|
||||
llvmfpcmp:=lfc_oeq;
|
||||
unequaln:
|
||||
llvmfpcmp:=lfc_une;
|
||||
else
|
||||
internalerror(2015031506);
|
||||
end;
|
||||
end;
|
||||
else
|
||||
internalerror(2013102401);
|
||||
end;
|
||||
|
||||
{ get the operands in the correct order; there are no special cases here,
|
||||
everything is register-based }
|
||||
if nf_swapped in flags then
|
||||
@ -279,37 +293,58 @@ implementation
|
||||
hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
|
||||
hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
|
||||
|
||||
{ initialize the result location }
|
||||
if not cmpop then
|
||||
begin
|
||||
location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
|
||||
location.register:=hlcg.getfpuregister(current_asmdata.CurrAsmList,resultdef);
|
||||
end
|
||||
else
|
||||
begin
|
||||
location_reset(location,LOC_REGISTER,OS_8);
|
||||
location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,llvmbool1type);
|
||||
end;
|
||||
|
||||
{ see comment in thlcgllvm.a_loadfpu_ref_reg }
|
||||
if tfloatdef(left.resultdef).floattype in [s64comp,s64currency] then
|
||||
size:=sc80floattype
|
||||
else
|
||||
size:=left.resultdef;
|
||||
|
||||
{ emit the actual operation }
|
||||
if not cmpop then
|
||||
if nodetype in [ltn,lten,gtn,gten,equaln,unequaln] then
|
||||
begin
|
||||
current_asmdata.CurrAsmList.concat(taillvm.op_reg_size_reg_reg(op,location.register,size,
|
||||
left.location.register,right.location.register))
|
||||
end
|
||||
else
|
||||
begin
|
||||
current_asmdata.CurrAsmList.concat(taillvm.op_reg_fpcond_size_reg_reg(op,
|
||||
case nodetype of
|
||||
ltn:
|
||||
llvmfpcmp:=lfc_olt;
|
||||
lten:
|
||||
llvmfpcmp:=lfc_ole;
|
||||
gtn:
|
||||
llvmfpcmp:=lfc_ogt;
|
||||
gten:
|
||||
llvmfpcmp:=lfc_oge;
|
||||
equaln:
|
||||
llvmfpcmp:=lfc_oeq;
|
||||
unequaln:
|
||||
llvmfpcmp:=lfc_une;
|
||||
else
|
||||
internalerror(2015031506);
|
||||
end;
|
||||
location_reset(location,LOC_REGISTER,OS_8);
|
||||
location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,llvmbool1type);
|
||||
|
||||
current_asmdata.CurrAsmList.concat(taillvm.op_reg_fpcond_size_reg_reg(la_fcmp ,
|
||||
location.register,llvmfpcmp,size,left.location.register,right.location.register));
|
||||
tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
|
||||
hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,llvmbool1type,resultdef,location.register,tmpreg);
|
||||
location.register:=tmpreg;
|
||||
end
|
||||
else
|
||||
begin
|
||||
case nodetype of
|
||||
addn :
|
||||
op:=la_fadd;
|
||||
muln :
|
||||
op:=la_fmul;
|
||||
subn :
|
||||
op:=la_fsub;
|
||||
slashn :
|
||||
op:=la_fdiv;
|
||||
else
|
||||
internalerror(2013102401);
|
||||
end;
|
||||
location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
|
||||
location.register:=hlcg.getfpuregister(current_asmdata.CurrAsmList,resultdef);
|
||||
|
||||
current_asmdata.CurrAsmList.concat(taillvm.op_reg_size_reg_reg(op,location.register,size,
|
||||
left.location.register,right.location.register))
|
||||
end;
|
||||
end;
|
||||
|
||||
|
@ -52,7 +52,7 @@ interface
|
||||
implementation
|
||||
|
||||
uses
|
||||
verbose,globals,globtype,constexp,
|
||||
verbose,globals,globtype,constexp,cutils,
|
||||
aasmbase, aasmdata,
|
||||
symconst,symtype,symdef,defutil,
|
||||
compinnr,
|
||||
@ -219,21 +219,43 @@ implementation
|
||||
|
||||
function tllvminlinenode.first_fma: tnode;
|
||||
var
|
||||
procname: string[15];
|
||||
procname: string[40];
|
||||
begin
|
||||
case inlinenumber of
|
||||
in_fma_single:
|
||||
procname:='llvm_fma_f32';
|
||||
in_fma_double:
|
||||
procname:='llvm_fma_f64';
|
||||
in_fma_extended:
|
||||
procname:='llvm_fma_f80';
|
||||
in_fma_float128:
|
||||
procname:='llvm_fma_f128';
|
||||
else
|
||||
internalerror(2018122101);
|
||||
end;
|
||||
result:=ccallnode.createintern(procname,left);
|
||||
if cs_opt_fastmath in current_settings.optimizerswitches then
|
||||
begin
|
||||
case inlinenumber of
|
||||
in_fma_single:
|
||||
procname:='llvm_fma_f32';
|
||||
in_fma_double:
|
||||
procname:='llvm_fma_f64';
|
||||
in_fma_extended:
|
||||
procname:='llvm_fma_f80';
|
||||
in_fma_float128:
|
||||
procname:='llvm_fma_f128';
|
||||
else
|
||||
internalerror(2018122101);
|
||||
end;
|
||||
result:=ccallnode.createintern(procname,left);
|
||||
end
|
||||
else
|
||||
begin
|
||||
case inlinenumber of
|
||||
in_fma_single,
|
||||
in_fma_double,
|
||||
in_fma_extended,
|
||||
in_fma_float128:
|
||||
procname:='LLVM_EXPERIMENTAL_CONSTRAINED_FMA';
|
||||
else
|
||||
internalerror(2019122811);
|
||||
end;
|
||||
result:=ccallnode.createintern(procname,
|
||||
ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('fpexcept.strict'),length('fpexcept.strict'),llvm_metadatatype),
|
||||
ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('round.dynamic'),length('round.dynamic'),llvm_metadatatype),
|
||||
left
|
||||
)
|
||||
)
|
||||
);
|
||||
end;
|
||||
left:=nil;
|
||||
end;
|
||||
|
||||
@ -250,23 +272,45 @@ implementation
|
||||
|
||||
function tllvminlinenode.first_sqrt_real: tnode;
|
||||
var
|
||||
intrinsic: string[20];
|
||||
intrinsic: string[40];
|
||||
begin
|
||||
if left.resultdef.typ<>floatdef then
|
||||
internalerror(2018121601);
|
||||
case tfloatdef(left.resultdef).floattype of
|
||||
s32real:
|
||||
intrinsic:='llvm_sqrt_f32';
|
||||
s64real:
|
||||
intrinsic:='llvm_sqrt_f64';
|
||||
s80real,sc80real:
|
||||
intrinsic:='llvm_sqrt_f80';
|
||||
s128real:
|
||||
intrinsic:='llvm_sqrt_f128';
|
||||
else
|
||||
internalerror(2018121602);
|
||||
end;
|
||||
result:=ccallnode.createintern(intrinsic, ccallparanode.create(left,nil));
|
||||
if cs_opt_fastmath in current_settings.optimizerswitches then
|
||||
begin
|
||||
case tfloatdef(left.resultdef).floattype of
|
||||
s32real:
|
||||
intrinsic:='llvm_sqrt_f32';
|
||||
s64real:
|
||||
intrinsic:='llvm_sqrt_f64';
|
||||
s80real,sc80real:
|
||||
intrinsic:='llvm_sqrt_f80';
|
||||
s128real:
|
||||
intrinsic:='llvm_sqrt_f128';
|
||||
else
|
||||
internalerror(2018121602);
|
||||
end;
|
||||
result:=ccallnode.createintern(intrinsic, ccallparanode.create(left,nil));
|
||||
end
|
||||
else
|
||||
begin
|
||||
case tfloatdef(left.resultdef).floattype of
|
||||
s32real,
|
||||
s64real,
|
||||
s80real,sc80real,
|
||||
s128real:
|
||||
intrinsic:='LLVM_EXPERIMENTAL_CONSTRAINED_SQRT';
|
||||
else
|
||||
internalerror(2019122810);
|
||||
end;
|
||||
result:=ccallnode.createintern(intrinsic,
|
||||
ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('fpexcept.strict'),length('fpexcept.strict'),llvm_metadatatype),
|
||||
ccallparanode.create(cstringconstnode.createpchar(ansistring2pchar('round.dynamic'),length('round.dynamic'),llvm_metadatatype),
|
||||
ccallparanode.create(left,nil)
|
||||
)
|
||||
)
|
||||
);
|
||||
end;
|
||||
left:=nil;
|
||||
end;
|
||||
|
||||
|
@ -41,19 +41,48 @@ function llvm_ctpop(src: UInt32): UInt32; external name 'llvm.ctpop.i32';
|
||||
function llvm_ctpop(src: UInt64): UInt64; external name 'llvm.ctpop.i64';
|
||||
|
||||
function llvm_sqrt_f32(val: single): single; compilerproc; external name 'llvm.sqrt.f32';
|
||||
function llvm_experimental_constrained_sqrt(val: single; rounding, exceptions: LLVMMetadata): single; external name 'llvm.experimental.constrained.sqrt.f32';
|
||||
function llvm_sqrt_f64(val: double): double; compilerproc; external name 'llvm.sqrt.f64';
|
||||
function llvm_experimental_constrained_sqrt(val: double; rounding, exceptions: LLVMMetadata): double; external name 'llvm.experimental.constrained.sqrt.f64';
|
||||
{$ifdef SUPPORT_EXTENDED}
|
||||
function llvm_sqrt_f80(val: extended): extended; compilerproc; external name 'llvm.sqrt.f80';
|
||||
function llvm_sqrt_f80(val: cextended): cextended; compilerproc; external name 'llvm.sqrt.x86_fp80';
|
||||
function llvm_experimental_constrained_sqrt(val: cextended; rounding, exceptions: LLVMMetadata): cextended; external name 'llvm.experimental.constrained.sqrt.x86_fp80';
|
||||
{$endif}
|
||||
{$ifdef SUPPORT_FLOAT128}
|
||||
function llvm_sqrt_f128(val: float128): float128; compilerproc; external name 'llvm.sqrt.f128';
|
||||
function llvm_experimental_constrained_sqrt(val: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.sqrt.f128';
|
||||
{$endif}
|
||||
|
||||
function llvm_fma_f32(a, b, c: single): single; compilerproc; external name 'llvm.fma.f32';
|
||||
function llvm_experimental_constrained_fma(a, b, c: single; rounding, exceptions: LLVMMetadata): single; external name 'llvm.experimental.constrained.fma.f32';
|
||||
function llvm_fma_f64(a, b, c: double): double; compilerproc; external name 'llvm.fma.f64';
|
||||
function llvm_experimental_constrained_fma(a, b, c: double; rounding, exceptions: LLVMMetadata): double; external name 'llvm.experimental.constrained.fma.f64';
|
||||
{$ifdef SUPPORT_EXTENDED}
|
||||
function llvm_fma_f80(a, b, c: extended): extended; compilerproc; external name 'llvm.fma.f80';
|
||||
function llvm_fma_f80(a, b, c: cextended): cextended; compilerproc; external name 'llvm.fma.x86_fp80';
|
||||
function llvm_experimental_constrained_fma(a, b, c: cextended; rounding, exceptions: LLVMMetadata): cextended; external name 'llvm.experimental.constrained.fma.x86_fp80';
|
||||
{$endif}
|
||||
{$ifdef SUPPORT_FLOAT128}
|
||||
function llvm_fma_f128(a, b, c: float128): float128; compilerproc; external name 'llvm.fma.f128';
|
||||
function llvm_experimental_constrained_fma(a, b, c: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.fma.f128';
|
||||
{$endif}
|
||||
|
||||
function llvm_experimental_constrained_fadd(a, b: single; rounding, exceptions: LLVMMetadata): single; external name 'llvm.experimental.constrained.fadd.f32';
|
||||
function llvm_experimental_constrained_fsub(a, b: single; rounding, exceptions: LLVMMetadata): single; external name 'llvm.experimental.constrained.fsub.f32';
|
||||
function llvm_experimental_constrained_fmul(a, b: single; rounding, exceptions: LLVMMetadata): single; external name 'llvm.experimental.constrained.fmul.f32';
|
||||
function llvm_experimental_constrained_fdiv(a, b: single; rounding, exceptions: LLVMMetadata): single; external name 'llvm.experimental.constrained.fdiv.f32';
|
||||
function llvm_experimental_constrained_fadd(a, b: double; rounding, exceptions: LLVMMetadata): double; external name 'llvm.experimental.constrained.fadd.f64';
|
||||
function llvm_experimental_constrained_fsub(a, b: double; rounding, exceptions: LLVMMetadata): double; external name 'llvm.experimental.constrained.fsub.f64';
|
||||
function llvm_experimental_constrained_fmul(a, b: double; rounding, exceptions: LLVMMetadata): double; external name 'llvm.experimental.constrained.fmul.f64';
|
||||
function llvm_experimental_constrained_fdiv(a, b: double; rounding, exceptions: LLVMMetadata): double; external name 'llvm.experimental.constrained.fdiv.f64';
|
||||
{$ifdef SUPPORT_EXTENDED}
|
||||
function llvm_experimental_constrained_fadd(a, b: extended; rounding, exceptions: LLVMMetadata): extended; external name 'llvm.experimental.constrained.fadd.x86_fp80';
|
||||
function llvm_experimental_constrained_fsub(a, b: extended; rounding, exceptions: LLVMMetadata): extended; external name 'llvm.experimental.constrained.fsub.x86_fp80';
|
||||
function llvm_experimental_constrained_fmul(a, b: extended; rounding, exceptions: LLVMMetadata): extended; external name 'llvm.experimental.constrained.fmul.x86_fp80';
|
||||
function llvm_experimental_constrained_fdiv(a, b: extended; rounding, exceptions: LLVMMetadata): extended; external name 'llvm.experimental.constrained.fdiv.x86_fp80';
|
||||
{$endif}
|
||||
{$ifdef SUPPORT_FLOAT128}
|
||||
function llvm_experimental_constrained_fadd(a, b: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.fadd.f128';
|
||||
function llvm_experimental_constrained_fsub(a, b: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.fsub.f128';
|
||||
function llvm_experimental_constrained_fmul(a, b: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.fmul.f128';
|
||||
function llvm_experimental_constrained_fdiv(a, b: float128; rounding, exceptions: LLVMMetadata): float128; external name 'llvm.experimental.constrained.fdiv.f128';
|
||||
{$endif}
|
||||
|
Loading…
Reference in New Issue
Block a user