mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-08-11 09:26:15 +02:00
* make use of mulps/mulpd and haddps/haddpd/hsubpd/hsubps to optimze x*x+y*y and x*x-y*y where x and y might be single or double
git-svn-id: trunk@18790 -
This commit is contained in:
parent
13ac5d185f
commit
46cc0209de
1
.gitattributes
vendored
1
.gitattributes
vendored
@ -10448,6 +10448,7 @@ tests/test/tset5a.pp svneol=native#text/plain
|
|||||||
tests/test/tset6.pp svneol=native#text/plain
|
tests/test/tset6.pp svneol=native#text/plain
|
||||||
tests/test/tset7.pp svneol=native#text/plain
|
tests/test/tset7.pp svneol=native#text/plain
|
||||||
tests/test/tsetsize.pp svneol=native#text/plain
|
tests/test/tsetsize.pp svneol=native#text/plain
|
||||||
|
tests/test/tshuffle1.pp svneol=native#text/pascal
|
||||||
tests/test/tstack.pp svneol=native#text/plain
|
tests/test/tstack.pp svneol=native#text/plain
|
||||||
tests/test/tstatic1.pp svneol=native#text/pascal
|
tests/test/tstatic1.pp svneol=native#text/pascal
|
||||||
tests/test/tstatic2.pp svneol=native#text/pascal
|
tests/test/tstatic2.pp svneol=native#text/pascal
|
||||||
|
@ -66,7 +66,7 @@ unit nx86add;
|
|||||||
symconst,symdef,
|
symconst,symdef,
|
||||||
cgobj,cgx86,cga,cgutils,
|
cgobj,cgx86,cga,cgutils,
|
||||||
paramgr,tgobj,ncgutil,
|
paramgr,tgobj,ncgutil,
|
||||||
ncon,nset,
|
ncon,nset,ninl,
|
||||||
defutil;
|
defutil;
|
||||||
|
|
||||||
|
|
||||||
@ -660,7 +660,28 @@ unit nx86add;
|
|||||||
procedure tx86addnode.second_addfloatsse;
|
procedure tx86addnode.second_addfloatsse;
|
||||||
var
|
var
|
||||||
op : topcg;
|
op : topcg;
|
||||||
|
sqr_sum : boolean;
|
||||||
|
tmp : tnode;
|
||||||
begin
|
begin
|
||||||
|
sqr_sum:=false;
|
||||||
|
if (current_settings.fputype>=fpu_sse3) and
|
||||||
|
use_vectorfpu(resultdef) and
|
||||||
|
(nodetype in [addn,subn]) and
|
||||||
|
(left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
|
||||||
|
(right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
|
||||||
|
begin
|
||||||
|
sqr_sum:=true;
|
||||||
|
tmp:=tinlinenode(left).left;
|
||||||
|
tinlinenode(left).left:=nil;
|
||||||
|
left.free;
|
||||||
|
left:=tmp;
|
||||||
|
|
||||||
|
tmp:=tinlinenode(right).left;
|
||||||
|
tinlinenode(right).left:=nil;
|
||||||
|
right.free;
|
||||||
|
right:=tmp;
|
||||||
|
end;
|
||||||
|
|
||||||
pass_left_right;
|
pass_left_right;
|
||||||
check_left_and_right_fpureg(false);
|
check_left_and_right_fpureg(false);
|
||||||
|
|
||||||
@ -687,8 +708,51 @@ unit nx86add;
|
|||||||
end;
|
end;
|
||||||
|
|
||||||
location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
|
location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
|
||||||
|
|
||||||
|
if sqr_sum then
|
||||||
|
begin
|
||||||
|
if nf_swapped in flags then
|
||||||
|
swapleftright;
|
||||||
|
|
||||||
|
location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
|
||||||
|
location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,true);
|
||||||
|
location:=left.location;
|
||||||
|
if is_double(resultdef) then
|
||||||
|
begin
|
||||||
|
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
|
||||||
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
|
||||||
|
case nodetype of
|
||||||
|
addn:
|
||||||
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
|
||||||
|
subn:
|
||||||
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
|
||||||
|
else
|
||||||
|
internalerror(201108162);
|
||||||
|
end;
|
||||||
|
end
|
||||||
|
else
|
||||||
|
begin
|
||||||
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
|
||||||
|
{ ensure that bits 64..127 contain valid values }
|
||||||
|
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
|
||||||
|
{ the data is now in bits 0..32 and 64..95 }
|
||||||
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
|
||||||
|
case nodetype of
|
||||||
|
addn:
|
||||||
|
begin
|
||||||
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
|
||||||
|
end;
|
||||||
|
subn:
|
||||||
|
begin
|
||||||
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
|
||||||
|
end;
|
||||||
|
else
|
||||||
|
internalerror(201108163);
|
||||||
|
end;
|
||||||
|
end
|
||||||
|
end
|
||||||
{ we can use only right as left operand if the operation is commutative }
|
{ we can use only right as left operand if the operation is commutative }
|
||||||
if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
|
else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
|
||||||
begin
|
begin
|
||||||
location.register:=right.location.register;
|
location.register:=right.location.register;
|
||||||
{ force floating point reg. location to be written to memory,
|
{ force floating point reg. location to be written to memory,
|
||||||
|
89
tests/test/tshuffle1.pp
Normal file
89
tests/test/tshuffle1.pp
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
{ %cpu=i386,x86_64 }
|
||||||
|
{ %opt=-Cfsse3 -O3 }
|
||||||
|
{$mode objfpc}
|
||||||
|
uses
|
||||||
|
cpu;
|
||||||
|
|
||||||
|
function test_double : longint;
|
||||||
|
var
|
||||||
|
f,f1,f2 : double;
|
||||||
|
i : longint;
|
||||||
|
begin
|
||||||
|
result:=0;
|
||||||
|
f1:=1;
|
||||||
|
f2:=2;
|
||||||
|
f:=f1*f1+f2*f2;
|
||||||
|
if f<>5 then
|
||||||
|
result:=1;
|
||||||
|
f:=f1*f1-f2*f2;
|
||||||
|
if f<>-3 then
|
||||||
|
result:=1;
|
||||||
|
{ fool ssa }
|
||||||
|
for i:=1 to 3 do
|
||||||
|
begin
|
||||||
|
f:=f1*f1+f2*f2;
|
||||||
|
if f<>5 then
|
||||||
|
result:=1;
|
||||||
|
f:=f1*f1-f2*f2;
|
||||||
|
if f<>-3 then
|
||||||
|
result:=1;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
function test_single : longint;
|
||||||
|
var
|
||||||
|
f,f1,f2 : single;
|
||||||
|
i : longint;
|
||||||
|
begin
|
||||||
|
result:=0;
|
||||||
|
f1:=1;
|
||||||
|
f2:=2;
|
||||||
|
f:=f1*f1+f2*f2;
|
||||||
|
if f<>5 then
|
||||||
|
result:=1;
|
||||||
|
f:=f1*f1-f2*f2;
|
||||||
|
if f<>-3 then
|
||||||
|
result:=1;
|
||||||
|
{ fool ssa }
|
||||||
|
for i:=1 to 3 do
|
||||||
|
begin
|
||||||
|
f:=f1*f1+f2*f2;
|
||||||
|
if f<>5 then
|
||||||
|
result:=1;
|
||||||
|
f:=f1*f1-f2*f2;
|
||||||
|
if f<>-3 then
|
||||||
|
result:=1;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
var
|
||||||
|
f,f1,f2 : double;
|
||||||
|
i : longint;
|
||||||
|
begin
|
||||||
|
if not(is_sse3_cpu) then
|
||||||
|
halt(0);
|
||||||
|
f1:=1;
|
||||||
|
f2:=2;
|
||||||
|
f:=f1*f1+f2*f2;
|
||||||
|
if f<>5 then
|
||||||
|
halt(1);
|
||||||
|
f:=f1*f1-f2*f2;
|
||||||
|
if f<>-3 then
|
||||||
|
halt(1);
|
||||||
|
{ fool ssa }
|
||||||
|
for i:=1 to 3 do
|
||||||
|
begin
|
||||||
|
f:=f1*f1+f2*f2;
|
||||||
|
if f<>5 then
|
||||||
|
halt(1);
|
||||||
|
f:=f1*f1-f2*f2;
|
||||||
|
if f<>-3 then
|
||||||
|
halt(1);
|
||||||
|
end;
|
||||||
|
if test_double<>0 then
|
||||||
|
halt(1);
|
||||||
|
if test_single<>0 then
|
||||||
|
halt(1);
|
||||||
|
writeln('ok');
|
||||||
|
end.
|
Loading…
Reference in New Issue
Block a user