mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-08-25 21:52:17 +02:00
+ make use of vfnmsub*/vfmsub*/vfnmadd* instructions if possible
git-svn-id: trunk@27721 -
This commit is contained in:
parent
6e7dd647fd
commit
8207e0ef22
@ -762,22 +762,80 @@ implementation
|
||||
|
||||
procedure tx86inlinenode.second_fma;
|
||||
const
|
||||
op : array[s32real..s64real,0..3] of TAsmOp = ((A_VFMADD231SS,A_VFMADD231SS,A_VFMADD231SS,A_VFMADD213SS),
|
||||
(A_VFMADD231SD,A_VFMADD231SD,A_VFMADD231SD,A_VFMADD213SD));
|
||||
op : array[false..true,false..true,s32real..s64real,0..3] of TAsmOp =
|
||||
(
|
||||
{ positive product }
|
||||
(
|
||||
{ positive third operand }
|
||||
((A_VFMADD231SS,A_VFMADD231SS,A_VFMADD231SS,A_VFMADD213SS),
|
||||
(A_VFMADD231SD,A_VFMADD231SD,A_VFMADD231SD,A_VFMADD213SD)
|
||||
),
|
||||
{ negative third operand }
|
||||
((A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB213SS),
|
||||
(A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB213SD)
|
||||
)
|
||||
),
|
||||
{ negative product }
|
||||
(
|
||||
{ positive third operand }
|
||||
((A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD213SS),
|
||||
(A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD213SD)
|
||||
),
|
||||
{ negative third operand }
|
||||
((A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB213SS),
|
||||
(A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB213SD)
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
var
|
||||
paraarray : array[1..3] of tnode;
|
||||
memop,
|
||||
i : integer;
|
||||
negop3,
|
||||
negproduct,
|
||||
gotmem : boolean;
|
||||
hp : tnode;
|
||||
begin
|
||||
{$ifndef i8086}
|
||||
if (cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[] then
|
||||
begin
|
||||
negop3:=false;
|
||||
negproduct:=false;
|
||||
paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
|
||||
paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
|
||||
paraarray[3]:=tcallparanode(parameters).paravalue;
|
||||
|
||||
for i:=1 to 3 do
|
||||
{ check if a neg. node can be removed
|
||||
this is possible because changing the sign of
|
||||
a floating point number does not affect its absolute
|
||||
value in any way
|
||||
}
|
||||
if paraarray[1].nodetype=unaryminusn then
|
||||
begin
|
||||
paraarray[1]:=tunarynode(paraarray[1]).left;
|
||||
{ do not release the unused unary minus node, it is kept and release together with the other nodes,
|
||||
only no code is generated for it }
|
||||
negproduct:=not(negproduct);
|
||||
end;
|
||||
|
||||
if paraarray[2].nodetype=unaryminusn then
|
||||
begin
|
||||
paraarray[2]:=tunarynode(paraarray[2]).left;
|
||||
{ do not release the unused unary minus node, it is kept and release together with the other nodes,
|
||||
only no code is generated for it }
|
||||
negproduct:=not(negproduct);
|
||||
end;
|
||||
|
||||
if paraarray[3].nodetype=unaryminusn then
|
||||
begin
|
||||
paraarray[3]:=tunarynode(paraarray[3]).left;
|
||||
{ do not release the unused unary minus node, it is kept and release together with the other nodes,
|
||||
only no code is generated for it }
|
||||
negop3:=true;
|
||||
end;
|
||||
|
||||
for i:=1 to 3 do
|
||||
secondpass(paraarray[i]);
|
||||
|
||||
{ only one memory operand is allowed }
|
||||
@ -807,21 +865,21 @@ implementation
|
||||
begin
|
||||
hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
|
||||
paraarray[3].location.register,location.register,mms_movescalar);
|
||||
emit_ref_reg_reg(op[tfloatdef(resultdef).floattype,memop],S_NO,
|
||||
emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
|
||||
paraarray[1].location.reference,paraarray[2].location.register,location.register);
|
||||
end;
|
||||
2:
|
||||
begin
|
||||
hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
|
||||
paraarray[3].location.register,location.register,mms_movescalar);
|
||||
emit_ref_reg_reg(op[tfloatdef(resultdef).floattype,memop],S_NO,
|
||||
emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
|
||||
paraarray[2].location.reference,paraarray[1].location.register,location.register);
|
||||
end;
|
||||
3:
|
||||
begin
|
||||
hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
|
||||
paraarray[1].location.register,location.register,mms_movescalar);
|
||||
emit_ref_reg_reg(op[tfloatdef(resultdef).floattype,memop],S_NO,
|
||||
emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
|
||||
paraarray[3].location.reference,paraarray[2].location.register,location.register);
|
||||
end
|
||||
else
|
||||
@ -836,21 +894,21 @@ implementation
|
||||
begin
|
||||
hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
|
||||
paraarray[1].location.register,location.register,mms_movescalar);
|
||||
emit_reg_reg_reg(op[tfloatdef(resultdef).floattype,3],S_NO,
|
||||
emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
|
||||
paraarray[3].location.register,paraarray[2].location.register,location.register);
|
||||
end
|
||||
else if paraarray[2].location.loc=LOC_MMREGISTER then
|
||||
begin
|
||||
hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[2].resultdef,resultdef,
|
||||
paraarray[2].location.register,location.register,mms_movescalar);
|
||||
emit_reg_reg_reg(op[tfloatdef(resultdef).floattype,3],S_NO,
|
||||
emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
|
||||
paraarray[3].location.register,paraarray[1].location.register,location.register);
|
||||
end
|
||||
else
|
||||
begin
|
||||
hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
|
||||
paraarray[3].location.register,location.register,mms_movescalar);
|
||||
emit_reg_reg_reg(op[tfloatdef(resultdef).floattype,0],S_NO,
|
||||
emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,0],S_NO,
|
||||
paraarray[1].location.register,paraarray[2].location.register,location.register);
|
||||
end;
|
||||
end;
|
||||
|
@ -14,6 +14,7 @@ procedure testsingle;
|
||||
l2:=3;
|
||||
l3:=4;
|
||||
s0:=0;
|
||||
|
||||
l0:=fma(l1,l2,l3);
|
||||
writeln(l0);
|
||||
if l0<>10.0 then
|
||||
@ -58,8 +59,193 @@ procedure testsingle;
|
||||
writeln(l0);
|
||||
if l0<>10.0 then
|
||||
halt(1);
|
||||
|
||||
{ first operand negative }
|
||||
l0:=fma(-l1,l2,l3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-(l1+1.0),l2,l3);
|
||||
writeln(l0);
|
||||
if l0<>-5.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-l1,l1+1.0,l3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-s1,l2,l3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-l1,s2,l3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-l1,l2,s3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-s1,s2,l3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-s1,l2,s3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-l1,s2,s3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
{ second operand negative }
|
||||
l0:=fma(l1,-l2,l3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(l1+1.0,-l2,l3);
|
||||
writeln(l0);
|
||||
if l0<>-5.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(l1,-(l1+1.0),l3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(s1,-l2,l3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(l1,-s2,l3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(l1,-l2,s3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(s1,-s2,l3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(s1,-l2,s3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(l1,-s2,s3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
{ third operand negative }
|
||||
l0:=fma(l1,l2,-l3);
|
||||
writeln(l0);
|
||||
if l0<>2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(l1+1.0,l2,-l3);
|
||||
writeln(l0);
|
||||
if l0<>5.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(l1,l1+1.0,-l3);
|
||||
writeln(l0);
|
||||
if l0<>2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(s1,l2,-l3);
|
||||
writeln(l0);
|
||||
if l0<>2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(l1,s2,-l3);
|
||||
writeln(l0);
|
||||
if l0<>2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(l1,l2,-s3);
|
||||
writeln(l0);
|
||||
if l0<>2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(s1,s2,-l3);
|
||||
writeln(l0);
|
||||
if l0<>2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(s1,l2,-s3);
|
||||
writeln(l0);
|
||||
if l0<>2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(l1,s2,-s3);
|
||||
writeln(l0);
|
||||
if l0<>2.0 then
|
||||
halt(1);
|
||||
|
||||
{ first and third operand negative }
|
||||
l0:=fma(-l1,l2,-l3);
|
||||
writeln(l0);
|
||||
if l0<>-10.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-(l1+1.0),l2,-l3);
|
||||
writeln(l0);
|
||||
if l0<>-13.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-l1,l1+1.0,-l3);
|
||||
writeln(l0);
|
||||
if l0<>-10.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-s1,l2,-l3);
|
||||
writeln(l0);
|
||||
if l0<>-10.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-l1,s2,-l3);
|
||||
writeln(l0);
|
||||
if l0<>-10.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-l1,l2,-s3);
|
||||
writeln(l0);
|
||||
if l0<>-10.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-s1,s2,-l3);
|
||||
writeln(l0);
|
||||
if l0<>-10.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-s1,l2,-s3);
|
||||
writeln(l0);
|
||||
if l0<>-10.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-l1,s2,-s3);
|
||||
writeln(l0);
|
||||
if l0<>-10.0 then
|
||||
halt(1);
|
||||
end;
|
||||
|
||||
|
||||
procedure testdouble;
|
||||
var
|
||||
l0,l1,l2,l3 : double;
|
||||
@ -68,6 +254,7 @@ procedure testdouble;
|
||||
l2:=3;
|
||||
l3:=4;
|
||||
d0:=0;
|
||||
|
||||
l0:=fma(l1,l2,l3);
|
||||
writeln(l0);
|
||||
if l0<>10.0 then
|
||||
@ -78,7 +265,6 @@ procedure testdouble;
|
||||
if l0<>13.0 then
|
||||
halt(1);
|
||||
|
||||
|
||||
l0:=fma(l1,l1+1.0,l3);
|
||||
writeln(l0);
|
||||
if l0<>10.0 then
|
||||
@ -113,6 +299,190 @@ procedure testdouble;
|
||||
writeln(l0);
|
||||
if l0<>10.0 then
|
||||
halt(1);
|
||||
|
||||
{ first operand negative }
|
||||
l0:=fma(-l1,l2,l3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-(l1+1.0),l2,l3);
|
||||
writeln(l0);
|
||||
if l0<>-5.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-l1,l1+1.0,l3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-d1,l2,l3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-l1,d2,l3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-l1,l2,d3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-d1,d2,l3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-d1,l2,d3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-l1,d2,d3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
{ second operand negative }
|
||||
l0:=fma(l1,-l2,l3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(l1+1.0,-l2,l3);
|
||||
writeln(l0);
|
||||
if l0<>-5.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(l1,-(l1+1.0),l3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(d1,-l2,l3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(l1,-d2,l3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(l1,-l2,d3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(d1,-d2,l3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(d1,-l2,d3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(l1,-d2,d3);
|
||||
writeln(l0);
|
||||
if l0<>-2.0 then
|
||||
halt(1);
|
||||
|
||||
{ third operand negative }
|
||||
l0:=fma(l1,l2,-l3);
|
||||
writeln(l0);
|
||||
if l0<>2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(l1+1.0,l2,-l3);
|
||||
writeln(l0);
|
||||
if l0<>5.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(l1,l1+1.0,-l3);
|
||||
writeln(l0);
|
||||
if l0<>2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(d1,l2,-l3);
|
||||
writeln(l0);
|
||||
if l0<>2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(l1,d2,-l3);
|
||||
writeln(l0);
|
||||
if l0<>2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(l1,l2,-d3);
|
||||
writeln(l0);
|
||||
if l0<>2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(d1,d2,-l3);
|
||||
writeln(l0);
|
||||
if l0<>2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(d1,l2,-d3);
|
||||
writeln(l0);
|
||||
if l0<>2.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(l1,d2,-d3);
|
||||
writeln(l0);
|
||||
if l0<>2.0 then
|
||||
halt(1);
|
||||
|
||||
{ first and third operand negative }
|
||||
l0:=fma(-l1,l2,-l3);
|
||||
writeln(l0);
|
||||
if l0<>-10.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-(l1+1.0),l2,-l3);
|
||||
writeln(l0);
|
||||
if l0<>-13.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-l1,l1+1.0,-l3);
|
||||
writeln(l0);
|
||||
if l0<>-10.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-d1,l2,-l3);
|
||||
writeln(l0);
|
||||
if l0<>-10.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-l1,d2,-l3);
|
||||
writeln(l0);
|
||||
if l0<>-10.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-l1,l2,-d3);
|
||||
writeln(l0);
|
||||
if l0<>-10.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-d1,d2,-l3);
|
||||
writeln(l0);
|
||||
if l0<>-10.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-d1,l2,-d3);
|
||||
writeln(l0);
|
||||
if l0<>-10.0 then
|
||||
halt(1);
|
||||
|
||||
l0:=fma(-l1,d2,-d3);
|
||||
writeln(l0);
|
||||
if l0<>-10.0 then
|
||||
halt(1);
|
||||
end;
|
||||
|
||||
begin
|
||||
|
Loading…
Reference in New Issue
Block a user