mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-11-07 23:39:24 +01:00
+ support for FMA intrinsic: if there is no hardware support, the compiler throws an error.
Currently it is implemented only for x86-CPUs supporting the FMA extension. While it would be possible to implement it in software or simulate fma(<single>,<single>,<single>) using double operations, it makes no sense in my opinion to do so. git-svn-id: trunk@27564 -
This commit is contained in:
parent
d404d15c1e
commit
d88d644925
1
.gitattributes
vendored
1
.gitattributes
vendored
@ -11382,6 +11382,7 @@ tests/test/textthr.pp svneol=native#text/plain
|
|||||||
tests/test/tfillchr.pp svneol=native#text/plain
|
tests/test/tfillchr.pp svneol=native#text/plain
|
||||||
tests/test/tfinal1.pp svneol=native#text/pascal
|
tests/test/tfinal1.pp svneol=native#text/pascal
|
||||||
tests/test/tfinal2.pp svneol=native#text/pascal
|
tests/test/tfinal2.pp svneol=native#text/pascal
|
||||||
|
tests/test/tfma1.pp svneol=native#text/pascal
|
||||||
tests/test/tforin1.pp svneol=native#text/pascal
|
tests/test/tforin1.pp svneol=native#text/pascal
|
||||||
tests/test/tforin10.pp svneol=native#text/plain
|
tests/test/tforin10.pp svneol=native#text/plain
|
||||||
tests/test/tforin11.pp svneol=native#text/plain
|
tests/test/tforin11.pp svneol=native#text/plain
|
||||||
|
|||||||
@ -115,6 +115,10 @@ const
|
|||||||
in_arctan_real = 130;
|
in_arctan_real = 130;
|
||||||
in_ln_real = 131;
|
in_ln_real = 131;
|
||||||
in_sin_real = 132;
|
in_sin_real = 132;
|
||||||
|
in_fma_single = 133;
|
||||||
|
in_fma_double = 134;
|
||||||
|
in_fma_extended = 135;
|
||||||
|
in_fma_float128 = 136;
|
||||||
|
|
||||||
{ MMX functions }
|
{ MMX functions }
|
||||||
{ these contants are used by the mmx unit }
|
{ these contants are used by the mmx unit }
|
||||||
|
|||||||
@ -137,7 +137,9 @@ type
|
|||||||
CPUX86_HAS_POPCNT,
|
CPUX86_HAS_POPCNT,
|
||||||
CPUX86_HAS_AVXUNIT,
|
CPUX86_HAS_AVXUNIT,
|
||||||
CPUX86_HAS_LZCNT,
|
CPUX86_HAS_LZCNT,
|
||||||
CPUX86_HAS_MOVBE
|
CPUX86_HAS_MOVBE,
|
||||||
|
CPUX86_HAS_FMA,
|
||||||
|
CPUX86_HAS_FMA4
|
||||||
);
|
);
|
||||||
|
|
||||||
const
|
const
|
||||||
@ -151,7 +153,7 @@ type
|
|||||||
{ cpu_PentiumM } [CPUX86_HAS_SSEUNIT],
|
{ cpu_PentiumM } [CPUX86_HAS_SSEUNIT],
|
||||||
{ cpu_core_i } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT],
|
{ cpu_core_i } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT],
|
||||||
{ cpu_core_avx } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT],
|
{ cpu_core_avx } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT],
|
||||||
{ cpu_core_avx2 } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT,CPUX86_HAS_BMI1,CPUX86_HAS_BMI2,CPUX86_HAS_LZCNT,CPUX86_HAS_MOVBE]
|
{ cpu_core_avx2 } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT,CPUX86_HAS_BMI1,CPUX86_HAS_BMI2,CPUX86_HAS_LZCNT,CPUX86_HAS_MOVBE,CPUX86_HAS_FMA]
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -2358,6 +2358,11 @@ cg_e_mod_only_defined_for_pos_quotient=06054_E_In ISO mode, the mod operator is
|
|||||||
% In ISO pascal, only positive values are allowed for the quotient: \var{n mod m} is only valid if \var{m>0}.
|
% In ISO pascal, only positive values are allowed for the quotient: \var{n mod m} is only valid if \var{m>0}.
|
||||||
cg_d_autoinlining=06055_DL_Auto inlining: $1
|
cg_d_autoinlining=06055_DL_Auto inlining: $1
|
||||||
% Due to auto inlining turned on, the compiler auto inlines this subroutine.
|
% Due to auto inlining turned on, the compiler auto inlines this subroutine.
|
||||||
|
cg_e_function_not_support_by_selected_instruction_set=06056_E_The function used, is not supported by the selected instruction set: $1
|
||||||
|
% Some functions cannot be implemented efficiently for certain instruction sets, one example is fused multiply/add.
|
||||||
|
% To avoid very inefficient code, the compiler complains in this case, so either select another instruction set
|
||||||
|
% or replace the function call by alternative code
|
||||||
|
%
|
||||||
% \end{description}
|
% \end{description}
|
||||||
# EndOfTeX
|
# EndOfTeX
|
||||||
#
|
#
|
||||||
@ -2615,7 +2620,6 @@ asmr_e_mixing_regtypes=07108_E_All registers in a register set must be of the sa
|
|||||||
asmr_e_empty_regset=07109_E_A register set cannot be empty
|
asmr_e_empty_regset=07109_E_A register set cannot be empty
|
||||||
% Instructions on the ARM architecture that take a register set as argument require that such a set
|
% Instructions on the ARM architecture that take a register set as argument require that such a set
|
||||||
% contains at least one register.
|
% contains at least one register.
|
||||||
|
|
||||||
asmr_w_useless_got_for_local=07110_W_@GOTPCREL is useless and potentially dangereous for local symbols
|
asmr_w_useless_got_for_local=07110_W_@GOTPCREL is useless and potentially dangereous for local symbols
|
||||||
% The use of @GOTPCREL supposes an extra indirection that is
|
% The use of @GOTPCREL supposes an extra indirection that is
|
||||||
% not present if the symbol is local, which might lead to wrong asembler code
|
% not present if the symbol is local, which might lead to wrong asembler code
|
||||||
|
|||||||
@ -654,6 +654,7 @@ const
|
|||||||
cg_e_goto_across_procedures_with_exceptions_not_allowed=06053;
|
cg_e_goto_across_procedures_with_exceptions_not_allowed=06053;
|
||||||
cg_e_mod_only_defined_for_pos_quotient=06054;
|
cg_e_mod_only_defined_for_pos_quotient=06054;
|
||||||
cg_d_autoinlining=06055;
|
cg_d_autoinlining=06055;
|
||||||
|
cg_e_function_not_support_by_selected_instruction_set=06056;
|
||||||
asmr_d_start_reading=07000;
|
asmr_d_start_reading=07000;
|
||||||
asmr_d_finish_reading=07001;
|
asmr_d_finish_reading=07001;
|
||||||
asmr_e_none_label_contain_at=07002;
|
asmr_e_none_label_contain_at=07002;
|
||||||
@ -985,9 +986,9 @@ const
|
|||||||
option_info=11024;
|
option_info=11024;
|
||||||
option_help_pages=11025;
|
option_help_pages=11025;
|
||||||
|
|
||||||
MsgTxtSize = 71162;
|
MsgTxtSize = 71242;
|
||||||
|
|
||||||
MsgIdxMax : array[1..20] of longint=(
|
MsgIdxMax : array[1..20] of longint=(
|
||||||
26,96,337,121,89,56,126,27,202,64,
|
26,96,337,121,89,57,126,27,202,64,
|
||||||
57,20,1,1,1,1,1,1,1,1
|
57,20,1,1,1,1,1,1,1,1
|
||||||
);
|
);
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@ -220,6 +220,7 @@ interface
|
|||||||
{ a refcounted into a non-refcounted type }
|
{ a refcounted into a non-refcounted type }
|
||||||
function can_be_inlined: boolean;
|
function can_be_inlined: boolean;
|
||||||
|
|
||||||
|
property paravalue : tnode read left write left;
|
||||||
property nextpara : tnode read right write right;
|
property nextpara : tnode read right write right;
|
||||||
{ third is reused to store the parameter name (only while parsing
|
{ third is reused to store the parameter name (only while parsing
|
||||||
vardispatch calls, never in real node tree) and copy of 'high'
|
vardispatch calls, never in real node tree) and copy of 'high'
|
||||||
|
|||||||
@ -60,6 +60,7 @@ interface
|
|||||||
procedure second_box; virtual; abstract;
|
procedure second_box; virtual; abstract;
|
||||||
procedure second_popcnt; virtual;
|
procedure second_popcnt; virtual;
|
||||||
procedure second_seg; virtual; abstract;
|
procedure second_seg; virtual; abstract;
|
||||||
|
procedure second_fma; virtual;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
implementation
|
implementation
|
||||||
@ -190,6 +191,11 @@ implementation
|
|||||||
second_popcnt;
|
second_popcnt;
|
||||||
in_seg_x:
|
in_seg_x:
|
||||||
second_seg;
|
second_seg;
|
||||||
|
in_fma_single,
|
||||||
|
in_fma_double,
|
||||||
|
in_fma_extended,
|
||||||
|
in_fma_float128:
|
||||||
|
second_fma;
|
||||||
else internalerror(9);
|
else internalerror(9);
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
@ -768,6 +774,12 @@ implementation
|
|||||||
end;
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
procedure tcginlinenode.second_fma;
|
||||||
|
begin
|
||||||
|
internalerror(2014032701);
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
begin
|
begin
|
||||||
cinlinenode:=tcginlinenode;
|
cinlinenode:=tcginlinenode;
|
||||||
end. s
|
end. s
|
||||||
|
|||||||
@ -45,6 +45,7 @@ interface
|
|||||||
{ pack and unpack are changed into for-loops by the compiler }
|
{ pack and unpack are changed into for-loops by the compiler }
|
||||||
function first_pack_unpack: tnode; virtual;
|
function first_pack_unpack: tnode; virtual;
|
||||||
|
|
||||||
|
property parameters : tnode read left write left;
|
||||||
protected
|
protected
|
||||||
{ All the following routines currently
|
{ All the following routines currently
|
||||||
call compilerprocs, unless they are
|
call compilerprocs, unless they are
|
||||||
@ -83,6 +84,7 @@ interface
|
|||||||
function typecheck_seg: tnode; virtual;
|
function typecheck_seg: tnode; virtual;
|
||||||
function first_seg: tnode; virtual;
|
function first_seg: tnode; virtual;
|
||||||
function first_sar: tnode; virtual;
|
function first_sar: tnode; virtual;
|
||||||
|
function first_fma : tnode; virtual;
|
||||||
private
|
private
|
||||||
function handle_str: tnode;
|
function handle_str: tnode;
|
||||||
function handle_reset_rewrite_typed: tnode;
|
function handle_reset_rewrite_typed: tnode;
|
||||||
@ -3245,6 +3247,16 @@ implementation
|
|||||||
begin
|
begin
|
||||||
result:=handle_unbox;
|
result:=handle_unbox;
|
||||||
end;
|
end;
|
||||||
|
in_fma_single,
|
||||||
|
in_fma_double,
|
||||||
|
in_fma_extended,
|
||||||
|
in_fma_float128:
|
||||||
|
begin
|
||||||
|
set_varstate(tcallparanode(left).left,vs_read,[vsf_must_be_valid]);
|
||||||
|
set_varstate(tcallparanode(tcallparanode(left).right).left,vs_read,[vsf_must_be_valid]);
|
||||||
|
set_varstate(tcallparanode(tcallparanode(tcallparanode(left).right).right).left,vs_read,[vsf_must_be_valid]);
|
||||||
|
resultdef:=tcallparanode(left).left.resultdef;
|
||||||
|
end;
|
||||||
else
|
else
|
||||||
internalerror(8);
|
internalerror(8);
|
||||||
end;
|
end;
|
||||||
@ -3659,6 +3671,11 @@ implementation
|
|||||||
result:=first_box;
|
result:=first_box;
|
||||||
in_unbox_x_y:
|
in_unbox_x_y:
|
||||||
result:=first_unbox;
|
result:=first_unbox;
|
||||||
|
in_fma_single,
|
||||||
|
in_fma_double,
|
||||||
|
in_fma_extended,
|
||||||
|
in_fma_float128:
|
||||||
|
result:=first_fma;
|
||||||
else
|
else
|
||||||
internalerror(89);
|
internalerror(89);
|
||||||
end;
|
end;
|
||||||
@ -4218,4 +4235,12 @@ implementation
|
|||||||
result := loop;
|
result := loop;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
function tinlinenode.first_fma: tnode;
|
||||||
|
begin
|
||||||
|
CGMessage1(cg_e_function_not_support_by_selected_instruction_set,'FMA');
|
||||||
|
result:=nil;
|
||||||
|
end;
|
||||||
|
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
|||||||
@ -44,6 +44,7 @@ interface
|
|||||||
|
|
||||||
procedure emit_const_reg_reg(i : tasmop;s : topsize;c : longint;reg1,reg2 : tregister);
|
procedure emit_const_reg_reg(i : tasmop;s : topsize;c : longint;reg1,reg2 : tregister);
|
||||||
procedure emit_reg_reg_reg(i : tasmop;s : topsize;reg1,reg2,reg3 : tregister);
|
procedure emit_reg_reg_reg(i : tasmop;s : topsize;reg1,reg2,reg3 : tregister);
|
||||||
|
procedure emit_ref_reg_reg(i : tasmop;s : topsize;ref : treference;reg1,reg2 : tregister);
|
||||||
|
|
||||||
|
|
||||||
procedure emit_sym(i : tasmop;s : topsize;op : tasmsymbol);
|
procedure emit_sym(i : tasmop;s : topsize;op : tasmsymbol);
|
||||||
@ -124,6 +125,12 @@ implementation
|
|||||||
current_asmdata.CurrAsmList.concat(Taicpu.Op_reg_reg_reg(i,s,reg1,reg2,reg3));
|
current_asmdata.CurrAsmList.concat(Taicpu.Op_reg_reg_reg(i,s,reg1,reg2,reg3));
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
procedure emit_ref_reg_reg(i : tasmop;s : topsize;ref : treference;reg1,reg2 : tregister);
|
||||||
|
begin
|
||||||
|
tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,ref);
|
||||||
|
current_asmdata.CurrAsmList.concat(Taicpu.Op_ref_reg_reg(i,s,ref,reg1,reg2));
|
||||||
|
end;
|
||||||
|
|
||||||
procedure emit_sym(i : tasmop;s : topsize;op : tasmsymbol);
|
procedure emit_sym(i : tasmop;s : topsize;op : tasmsymbol);
|
||||||
begin
|
begin
|
||||||
current_asmdata.CurrAsmList.concat(Taicpu.Op_sym(i,s,op));
|
current_asmdata.CurrAsmList.concat(Taicpu.Op_sym(i,s,op));
|
||||||
|
|||||||
@ -185,7 +185,7 @@ unit cgx86;
|
|||||||
|
|
||||||
function UseAVX: boolean;
|
function UseAVX: boolean;
|
||||||
begin
|
begin
|
||||||
Result:=current_settings.fputype in fpu_avx_instructionsets;
|
Result:=(current_settings.fputype in fpu_avx_instructionsets) or (CPUX86_HAS_AVXUNIT in cpu_capabilities[current_settings.cputype]);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -45,6 +45,7 @@ interface
|
|||||||
function first_round_real: tnode; override;
|
function first_round_real: tnode; override;
|
||||||
function first_trunc_real: tnode; override;
|
function first_trunc_real: tnode; override;
|
||||||
function first_popcnt: tnode; override;
|
function first_popcnt: tnode; override;
|
||||||
|
function first_fma: tnode; override;
|
||||||
{ second pass override to generate these nodes }
|
{ second pass override to generate these nodes }
|
||||||
procedure second_IncludeExclude;override;
|
procedure second_IncludeExclude;override;
|
||||||
procedure second_pi; override;
|
procedure second_pi; override;
|
||||||
@ -64,6 +65,7 @@ interface
|
|||||||
procedure second_abs_long;override;
|
procedure second_abs_long;override;
|
||||||
{$endif not i8086}
|
{$endif not i8086}
|
||||||
procedure second_popcnt;override;
|
procedure second_popcnt;override;
|
||||||
|
procedure second_fma;override;
|
||||||
private
|
private
|
||||||
procedure load_fpu_location(lnode: tnode);
|
procedure load_fpu_location(lnode: tnode);
|
||||||
end;
|
end;
|
||||||
@ -247,7 +249,20 @@ implementation
|
|||||||
end;
|
end;
|
||||||
|
|
||||||
|
|
||||||
procedure tx86inlinenode.second_Pi;
|
function tx86inlinenode.first_fma : tnode;
|
||||||
|
begin
|
||||||
|
if ((cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[]) and
|
||||||
|
((is_double(resultdef)) or (is_single(resultdef))) then
|
||||||
|
begin
|
||||||
|
expectloc:=LOC_MMREGISTER;
|
||||||
|
Result:=nil;
|
||||||
|
end
|
||||||
|
else
|
||||||
|
Result:=inherited first_fma;
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
procedure tx86inlinenode.second_pi;
|
||||||
begin
|
begin
|
||||||
location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
|
location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
|
||||||
emit_none(A_FLDPI,S_NO);
|
emit_none(A_FLDPI,S_NO);
|
||||||
@ -741,4 +756,85 @@ implementation
|
|||||||
else
|
else
|
||||||
emit_ref_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.reference,location.register);
|
emit_ref_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.reference,location.register);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
procedure tx86inlinenode.second_fma;
|
||||||
|
const
|
||||||
|
op : array[s32real..s64real,0..3] of TAsmOp = ((A_VFMADD231SS,A_VFMADD231SS,A_VFMADD231SS,A_VFMADD213SS),
|
||||||
|
(A_VFMADD231SD,A_VFMADD231SD,A_VFMADD231SD,A_VFMADD213SD));
|
||||||
|
var
|
||||||
|
paraarray : array[1..3] of tnode;
|
||||||
|
memop,
|
||||||
|
i : integer;
|
||||||
|
gotmem : boolean;
|
||||||
|
begin
|
||||||
|
if (cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[] then
|
||||||
|
begin
|
||||||
|
paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
|
||||||
|
paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
|
||||||
|
paraarray[3]:=tcallparanode(parameters).paravalue;
|
||||||
|
|
||||||
|
for i:=1 to 3 do
|
||||||
|
secondpass(paraarray[i]);
|
||||||
|
|
||||||
|
{ only one memory operand is allowed }
|
||||||
|
gotmem:=false;
|
||||||
|
memop:=0;
|
||||||
|
for i:=1 to 3 do
|
||||||
|
begin
|
||||||
|
if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
|
||||||
|
begin
|
||||||
|
if (paraarray[i].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and not(gotmem) then
|
||||||
|
begin
|
||||||
|
memop:=i;
|
||||||
|
gotmem:=true;
|
||||||
|
end
|
||||||
|
else
|
||||||
|
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
|
||||||
|
location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
|
||||||
|
|
||||||
|
if gotmem then
|
||||||
|
begin
|
||||||
|
case memop of
|
||||||
|
1:
|
||||||
|
begin
|
||||||
|
hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
|
||||||
|
paraarray[3].location.register,location.register,mms_movescalar);
|
||||||
|
emit_ref_reg_reg(op[tfloatdef(resultdef).floattype,memop],S_NO,
|
||||||
|
paraarray[1].location.reference,paraarray[2].location.register,location.register);
|
||||||
|
end;
|
||||||
|
2:
|
||||||
|
begin
|
||||||
|
hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
|
||||||
|
paraarray[3].location.register,location.register,mms_movescalar);
|
||||||
|
emit_ref_reg_reg(op[tfloatdef(resultdef).floattype,memop],S_NO,
|
||||||
|
paraarray[2].location.reference,paraarray[1].location.register,location.register);
|
||||||
|
end;
|
||||||
|
3:
|
||||||
|
begin
|
||||||
|
hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
|
||||||
|
paraarray[1].location.register,location.register,mms_movescalar);
|
||||||
|
emit_ref_reg_reg(op[tfloatdef(resultdef).floattype,memop],S_NO,
|
||||||
|
paraarray[3].location.reference,paraarray[2].location.register,location.register);
|
||||||
|
end
|
||||||
|
else
|
||||||
|
internalerror(2014041301);
|
||||||
|
end;
|
||||||
|
end
|
||||||
|
else
|
||||||
|
begin
|
||||||
|
hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
|
||||||
|
paraarray[3].location.register,location.register,mms_movescalar);
|
||||||
|
emit_reg_reg_reg(op[tfloatdef(resultdef).floattype,0],S_NO,
|
||||||
|
paraarray[1].location.register,paraarray[2].location.register,location.register);
|
||||||
|
end;
|
||||||
|
end
|
||||||
|
else
|
||||||
|
internalerror(2014032301);
|
||||||
|
end;
|
||||||
|
|
||||||
end.
|
end.
|
||||||
|
|||||||
@ -125,7 +125,9 @@ type
|
|||||||
CPUX86_HAS_POPCNT,
|
CPUX86_HAS_POPCNT,
|
||||||
CPUX86_HAS_AVXUNIT,
|
CPUX86_HAS_AVXUNIT,
|
||||||
CPUX86_HAS_LZCNT,
|
CPUX86_HAS_LZCNT,
|
||||||
CPUX86_HAS_MOVBE
|
CPUX86_HAS_MOVBE,
|
||||||
|
CPUX86_HAS_FMA,
|
||||||
|
CPUX86_HAS_FMA4
|
||||||
);
|
);
|
||||||
|
|
||||||
const
|
const
|
||||||
@ -134,7 +136,7 @@ type
|
|||||||
{ Athlon64 } [CPUX86_HAS_SSEUNIT],
|
{ Athlon64 } [CPUX86_HAS_SSEUNIT],
|
||||||
{ cpu_core_i } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT],
|
{ cpu_core_i } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT],
|
||||||
{ cpu_core_avx } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT],
|
{ cpu_core_avx } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT],
|
||||||
{ cpu_core_avx2 } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT,CPUX86_HAS_BMI1,CPUX86_HAS_BMI2,CPUX86_HAS_LZCNT,CPUX86_HAS_MOVBE]
|
{ cpu_core_avx2 } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT,CPUX86_HAS_BMI1,CPUX86_HAS_BMI2,CPUX86_HAS_LZCNT,CPUX86_HAS_MOVBE,CPUX86_HAS_FMA]
|
||||||
);
|
);
|
||||||
|
|
||||||
Implementation
|
Implementation
|
||||||
|
|||||||
@ -115,6 +115,10 @@ const
|
|||||||
fpc_in_arctan_real = 130;
|
fpc_in_arctan_real = 130;
|
||||||
fpc_in_ln_real = 131;
|
fpc_in_ln_real = 131;
|
||||||
fpc_in_sin_real = 132;
|
fpc_in_sin_real = 132;
|
||||||
|
fpc_in_fma_single = 133;
|
||||||
|
fpc_in_fma_double = 134;
|
||||||
|
fpc_in_fma_extended = 135;
|
||||||
|
fpc_in_fma_float128 = 136;
|
||||||
|
|
||||||
{ MMX functions }
|
{ MMX functions }
|
||||||
{ these contants are used by the mmx unit }
|
{ these contants are used by the mmx unit }
|
||||||
|
|||||||
@ -109,3 +109,15 @@ procedure float_raise(i: TFPUExceptionMask);
|
|||||||
operator := (b:real48) e:extended;
|
operator := (b:real48) e:extended;
|
||||||
{$endif SUPPORT_EXTENDED}
|
{$endif SUPPORT_EXTENDED}
|
||||||
|
|
||||||
|
|
||||||
|
function fma(s1,s2,s3 : single) : single;[internproc:fpc_in_fma_single];
|
||||||
|
{$ifdef SUPPORT_DOUBLE}
|
||||||
|
function fma(d1,d2,d3 : double) : double;[internproc:fpc_in_fma_double];
|
||||||
|
{$endif SUPPORT_DOUBLE}
|
||||||
|
{$ifdef SUPPORT_EXTENDED}
|
||||||
|
function fma(e1,e2,e3 : extended) : extended;[internproc:fpc_in_fma_extended];
|
||||||
|
{$endif SUPPORT_EXTENDED}
|
||||||
|
{$ifdef SUPPORT_FLOAT128}
|
||||||
|
function fma(f1,f2,f3 : float128) : float128;[internproc:fpc_in_fma_float128];
|
||||||
|
{$endif SUPPORT_FLOAT128}
|
||||||
|
|
||||||
|
|||||||
121
tests/test/tfma1.pp
Normal file
121
tests/test/tfma1.pp
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
{ %CPU=i386,x86_64 }
|
||||||
|
{ %OPT=-Cfavx2 -Cpcoreavx2 }
|
||||||
|
uses
|
||||||
|
cpu;
|
||||||
|
var
|
||||||
|
d0,d1,d2,d3 : double;
|
||||||
|
s0,s1,s2,s3 : single;
|
||||||
|
|
||||||
|
procedure testsingle;
|
||||||
|
var
|
||||||
|
l0,l1,l2,l3 : single;
|
||||||
|
begin
|
||||||
|
l1:=2;
|
||||||
|
l2:=3;
|
||||||
|
l3:=4;
|
||||||
|
l0:=fma(l1,l2,l3);
|
||||||
|
writeln(l0);
|
||||||
|
if l0<>10.0 then
|
||||||
|
halt(1);
|
||||||
|
|
||||||
|
l0:=fma(s1,l2,l3);
|
||||||
|
writeln(l0);
|
||||||
|
if l0<>10.0 then
|
||||||
|
halt(1);
|
||||||
|
|
||||||
|
l0:=fma(l1,s2,l3);
|
||||||
|
writeln(l0);
|
||||||
|
if l0<>10.0 then
|
||||||
|
halt(1);
|
||||||
|
|
||||||
|
l0:=fma(l1,l2,s3);
|
||||||
|
writeln(l0);
|
||||||
|
if l0<>10.0 then
|
||||||
|
halt(1);
|
||||||
|
|
||||||
|
l0:=fma(s1,s2,l3);
|
||||||
|
writeln(l0);
|
||||||
|
if l0<>10.0 then
|
||||||
|
halt(1);
|
||||||
|
|
||||||
|
l0:=fma(s1,l2,s3);
|
||||||
|
writeln(l0);
|
||||||
|
if l0<>10.0 then
|
||||||
|
halt(1);
|
||||||
|
|
||||||
|
l0:=fma(l1,s2,s3);
|
||||||
|
writeln(l0);
|
||||||
|
if l0<>10.0 then
|
||||||
|
halt(1);
|
||||||
|
end;
|
||||||
|
|
||||||
|
procedure testdouble;
|
||||||
|
var
|
||||||
|
l0,l1,l2,l3 : double;
|
||||||
|
begin
|
||||||
|
l1:=2;
|
||||||
|
l2:=3;
|
||||||
|
l3:=4;
|
||||||
|
l0:=fma(l1,l2,l3);
|
||||||
|
writeln(l0);
|
||||||
|
if l0<>10.0 then
|
||||||
|
halt(1);
|
||||||
|
|
||||||
|
l0:=fma(d1,l2,l3);
|
||||||
|
writeln(l0);
|
||||||
|
if l0<>10.0 then
|
||||||
|
halt(1);
|
||||||
|
|
||||||
|
l0:=fma(l1,d2,l3);
|
||||||
|
writeln(l0);
|
||||||
|
if l0<>10.0 then
|
||||||
|
halt(1);
|
||||||
|
|
||||||
|
l0:=fma(l1,l2,d3);
|
||||||
|
writeln(l0);
|
||||||
|
if l0<>10.0 then
|
||||||
|
halt(1);
|
||||||
|
|
||||||
|
l0:=fma(d1,d2,l3);
|
||||||
|
writeln(l0);
|
||||||
|
if l0<>10.0 then
|
||||||
|
halt(1);
|
||||||
|
|
||||||
|
l0:=fma(d1,l2,d3);
|
||||||
|
writeln(l0);
|
||||||
|
if l0<>10.0 then
|
||||||
|
halt(1);
|
||||||
|
|
||||||
|
l0:=fma(l1,d2,d3);
|
||||||
|
writeln(l0);
|
||||||
|
if l0<>10.0 then
|
||||||
|
halt(1);
|
||||||
|
end;
|
||||||
|
|
||||||
|
begin
|
||||||
|
if AVXSupport and FMASupport then
|
||||||
|
begin
|
||||||
|
d1:=2;
|
||||||
|
d2:=3;
|
||||||
|
d3:=4;
|
||||||
|
d0:=fma(d1,d2,d3);
|
||||||
|
writeln(d0);
|
||||||
|
if d0<>10.0 then
|
||||||
|
halt(1);
|
||||||
|
|
||||||
|
s1:=2;
|
||||||
|
s2:=3;
|
||||||
|
s3:=4;
|
||||||
|
s0:=fma(s1,s2,s3);
|
||||||
|
writeln(s0);
|
||||||
|
if s0<>10.0 then
|
||||||
|
halt(1);
|
||||||
|
|
||||||
|
testsingle;
|
||||||
|
testdouble;
|
||||||
|
|
||||||
|
writeln('ok');
|
||||||
|
end
|
||||||
|
else
|
||||||
|
writeln('Skipped because not supported by the CPU');
|
||||||
|
end.
|
||||||
Loading…
Reference in New Issue
Block a user