* better heuristics to decide when a mul by a constant shall be replaced by shift/add/sub sequences

git-svn-id: trunk@22300 -
This commit is contained in:
florian 2012-09-03 13:39:52 +00:00
parent 2f8027c63f
commit 59012afe26
2 changed files with 52 additions and 3 deletions

View File

@ -665,7 +665,9 @@ unit cgcpu;
bitsset : byte; bitsset : byte;
negative : boolean; negative : boolean;
first : boolean; first : boolean;
b,
cycles : byte; cycles : byte;
maxeffort : byte;
begin begin
result:=true; result:=true;
cycles:=0; cycles:=0;
@ -677,6 +679,28 @@ unit cgcpu;
multiplier:=dword(abs(a)); multiplier:=dword(abs(a));
bitsset:=popcnt(multiplier and $fffffffe); bitsset:=popcnt(multiplier and $fffffffe);
{ heuristics to estimate how much instructions are reasonable to replace the mul,
this is currently based on XScale timings }
{ in the simplest case, we need a mov to load the constant and a mul to carry out the
actual multiplication, this requires min. 1+4 cycles
because the first shift imm. might cause a stall and because we need more instructions
when replacing the mul we generate max. 3 instructions to replace this mul }
maxeffort:=3;
{ if the constant is not a shifter op, we need either some mov/mvn/bic/or sequence or
a ldr, so generating one more operation to replace this is beneficial }
if not(is_shifter_const(dword(a),b)) and not(is_shifter_const(not(dword(a)),b)) then
inc(maxeffort);
{ if the upper 5 bits are all set or clear, mul is one cycle faster }
if ((dword(a) and $f8000000)=0) or ((dword(a) and $f8000000)=$f8000000) then
dec(maxeffort);
{ if the upper 17 bits are all set or clear, mul is another cycle faster }
if ((dword(a) and $ffff8000)=0) or ((dword(a) and $ffff8000)=$ffff8000) then
dec(maxeffort);
{ most simple cases } { most simple cases }
if a=1 then if a=1 then
a_load_reg_reg(list,OS_32,OS_32,src,dst) a_load_reg_reg(list,OS_32,OS_32,src,dst)
@ -690,8 +714,8 @@ unit cgcpu;
however, the least significant bit is for free, it can be hidden in the initial however, the least significant bit is for free, it can be hidden in the initial
instruction instruction
} }
else if (bitsset+cycles<=3) and else if (bitsset+cycles<=maxeffort) and
(bitsset>popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)) then (bitsset<=popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)) then
begin begin
first:=true; first:=true;
while multiplier<>0 do while multiplier<>0 do
@ -714,7 +738,7 @@ unit cgcpu;
list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0)); list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
end end
{ subtract from the next greater power of two? } { subtract from the next greater power of two? }
else if popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)+cycles<=3 then else if popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)+cycles+1<=maxeffort then
begin begin
first:=true; first:=true;
while multiplier<>0 do while multiplier<>0 do

View File

@ -2,6 +2,31 @@ var
i : longint; i : longint;
begin begin
i:=5;
i:=i*$80010;
if i<>2621520 then
halt(1);
i:=5;
i:=i*$18000010;
if i<>2013266000 then
halt(1);
i:=5;
i:=i*$18ffffef;
if i<>2097151915 then
halt(1);
i:=5;
i:=i*$7ffef;
if i<>2621355 then
halt(1);
i:=5;
i:=i*$6fffffcf;
if i<>805306123 then
halt(1);
i:=5; i:=5;
i:=i*10; i:=i*10;
i:=i*62; i:=i*62;