fpc/compiler/optunrol.pas
florian 85d63d9fa9 * settings refactored
git-svn-id: trunk@5094 -
2006-10-30 18:02:58 +00:00

170 lines
5.5 KiB
ObjectPascal

{
Loop unrolling
Copyright (c) 2005 by Florian Klaempfl
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
****************************************************************************
}
unit optunrol;
{$i fpcdefs.inc}
interface
uses
node;
function unroll_loop(node : tnode) : tnode;
implementation
uses
globtype,globals,
cpuinfo,
nutils,
nbas,nflw,ncon,ninl,ncal;
var
nodecount : aint;
function donodecount(var n: tnode; arg: pointer): foreachnoderesult;
begin
inc(nodecount);
result:=fen_false;
end;
{ rough estimation how large the tree "node" is }
function countnodes(node : tnode) : aint;
begin
nodecount:=0;
foreachnodestatic(node,@donodecount,nil);
result:=nodecount;
end;
function number_unrolls(node : tnode) : integer;
begin
{$ifdef i386}
{ multiply by 2 for CPUs with a long pipeline }
if current_settings.cputype in [cpu_Pentium4] then
number_unrolls:=60 div countnodes(node)
else
{$endif i386}
number_unrolls:=30 div countnodes(node);
if number_unrolls=0 then
number_unrolls:=1;
end;
function unroll_loop(node : tnode) : tnode;
var
unrolls,i : integer;
counts : qword;
unrollstatement : tstatementnode;
unrollblock : tblocknode;
begin
result:=nil;
if (cs_opt_size in current_settings.optimizerswitches) then
exit;
if not(node.nodetype in [forn]) then
exit;
unrolls:=number_unrolls(tfornode(node).t2);
if unrolls>1 then
begin
{ number of executions known? }
if (tfornode(node).right.nodetype=ordconstn) and (tfornode(node).t1.nodetype=ordconstn) then
begin
if lnf_backward in tfornode(node).loopflags then
counts:=tordconstnode(tfornode(node).right).value-tordconstnode(tfornode(node).t1).value+1
else
counts:=tordconstnode(tfornode(node).t1).value-tordconstnode(tfornode(node).right).value+1;
{ don't unroll more than we need }
if unrolls>counts then
unrolls:=counts;
{ create block statement }
unrollblock:=internalstatements(unrollstatement);
{ let's unroll (and rock of course) }
for i:=1 to unrolls do
begin
{ create and insert copy of the statement block }
addstatement(unrollstatement,tfornode(tfornode(node).t2).getcopy);
{ set and insert entry label? }
if (counts mod unrolls<>0) and
((counts mod unrolls)=unrolls-i) then
begin
tfornode(node).entrylabel:=clabelnode.create(cnothingnode.create);
addstatement(unrollstatement,tfornode(node).entrylabel);
end;
{ for itself increases at the last iteration }
if i<unrolls then
begin
{ insert incrementation of counter var }
addstatement(unrollstatement,
geninlinenode(in_inc_x,false,ccallparanode.create(tfornode(node).left.getcopy,nil)));
end;
end;
{ can we get rid of the for statement? }
if unrolls=counts then
result:=unrollblock;
end
else
begin
{ unrolling is a little bit more tricky if we don't know the
loop count at compile time, but the solution is to use a jump table
which is indexed by "loop count mod unrolls" at run time and which
jumps then at the appropriate place inside the loop. Because
a module division is expensive, we can use only unroll counts dividable
by 2 }
case unrolls of
1..2:
;
3:
unrolls:=2;
4..7:
unrolls:=4;
{ unrolls>4 already make no sense imo, but who knows (FK) }
8..15:
unrolls:=8;
16..31:
unrolls:=16;
32..63:
unrolls:=32;
64..$7fff:
unrolls:=64;
else
exit;
end;
{ we don't handle this yet }
exit;
end;
if not(assigned(result)) then
begin
tfornode(node).t2.free;
tfornode(node).t2:=unrollblock;
end;
end;
end;
end.