* improved the optimization of signed modulus by power-of-2, so it uses less

operations. Previously generated code:
    sign:=sar(left,sizeof(left)*8-1);
    result:=((((left xor sign)-sign) and right) xor sign)-sign;
  New code:
    mask:=sar(left,sizeof(left)*8-1) and ((1 shl power)-1);
    result:=((left+mask) and right)-mask;

git-svn-id: trunk@36847 -
This commit is contained in:
nickysn 2017-08-04 16:20:50 +00:00
parent 8ffc035a86
commit ee79ff3635

View File

@ -582,28 +582,33 @@ implementation
addstatement(statements,resulttemp); addstatement(statements,resulttemp);
addstatement(statements,temp); addstatement(statements,temp);
addstatement(statements,cassignmentnode.create(ctemprefnode.create(temp),left)); addstatement(statements,cassignmentnode.create(ctemprefnode.create(temp),left));
{ sign:=sar(left,sizeof(left)*8-1); } { mask:=sar(left,sizeof(left)*8-1) and ((1 shl power)-1); }
addstatement(statements,cassignmentnode.create(ctemprefnode.create(resulttemp), if power=1 then
cinlinenode.create(in_sar_x_y,false, masknode:=
ccallparanode.create(cordconstnode.create(shiftval,u8inttype,false), cshlshrnode.create(shrn,
ccallparanode.create(ctemprefnode.create(temp),nil) ctemprefnode.create(temp),
) cordconstnode.create(shiftval,u8inttype,false)
))); )
else
masknode:=
caddnode.create(andn,
cinlinenode.create(in_sar_x_y,false,
ccallparanode.create(cordconstnode.create(shiftval,u8inttype,false),
ccallparanode.create(ctemprefnode.create(temp),nil))
),
cordconstnode.create(tcgint((qword(1) shl power)-1),
right.resultdef,false)
);
addstatement(statements,cassignmentnode.create(ctemprefnode.create(resulttemp),masknode));
{ result:=((((left xor sign)-sign) and right) xor sign)-sign; } { result:=((left+mask) and right)-mask; }
addstatement(statements,cassignmentnode.create(ctemprefnode.create(resulttemp), addstatement(statements,cassignmentnode.create(ctemprefnode.create(resulttemp),
caddnode.create(subn, caddnode.create(subn,
caddnode.create(xorn, caddnode.create(andn,
caddnode.create(andn, right,
right, caddnode.create(addn,
caddnode.create(subn, ctemprefnode.create(temp),
caddnode.create(xorn, ctemprefnode.create(resulttemp))),
ctemprefnode.create(resulttemp),
ctemprefnode.create(temp)),
ctemprefnode.create(resulttemp))
),
ctemprefnode.create(resulttemp)
),
ctemprefnode.create(resulttemp)) ctemprefnode.create(resulttemp))
)); ));