mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-12 16:49:35 +02:00
170 lines
5.5 KiB
ObjectPascal
170 lines
5.5 KiB
ObjectPascal
{
|
|
Loop unrolling
|
|
|
|
Copyright (c) 2005 by Florian Klaempfl
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
|
|
****************************************************************************
|
|
}
|
|
unit optunrol;
|
|
|
|
{$i fpcdefs.inc}
|
|
|
|
interface
|
|
|
|
uses
|
|
node;
|
|
|
|
function unroll_loop(node : tnode) : tnode;
|
|
|
|
implementation
|
|
|
|
uses
|
|
globtype,globals,
|
|
cpuinfo,
|
|
nutils,
|
|
nbas,nflw,ncon,ninl,ncal;
|
|
|
|
var
|
|
nodecount : aint;
|
|
|
|
function donodecount(var n: tnode; arg: pointer): foreachnoderesult;
|
|
begin
|
|
inc(nodecount);
|
|
result:=fen_false;
|
|
end;
|
|
|
|
|
|
{ rough estimation how large the tree "node" is }
|
|
function countnodes(node : tnode) : aint;
|
|
begin
|
|
nodecount:=0;
|
|
foreachnodestatic(node,@donodecount,nil);
|
|
result:=nodecount;
|
|
end;
|
|
|
|
|
|
function number_unrolls(node : tnode) : integer;
|
|
begin
|
|
{$ifdef i386}
|
|
{ multiply by 2 for CPUs with a long pipeline }
|
|
if current_settings.cputype in [cpu_Pentium4] then
|
|
number_unrolls:=60 div countnodes(node)
|
|
else
|
|
{$endif i386}
|
|
number_unrolls:=30 div countnodes(node);
|
|
|
|
if number_unrolls=0 then
|
|
number_unrolls:=1;
|
|
end;
|
|
|
|
|
|
function unroll_loop(node : tnode) : tnode;
|
|
var
|
|
unrolls,i : integer;
|
|
counts : qword;
|
|
unrollstatement : tstatementnode;
|
|
unrollblock : tblocknode;
|
|
begin
|
|
result:=nil;
|
|
if (cs_opt_size in current_settings.optimizerswitches) then
|
|
exit;
|
|
if not(node.nodetype in [forn]) then
|
|
exit;
|
|
unrolls:=number_unrolls(tfornode(node).t2);
|
|
if unrolls>1 then
|
|
begin
|
|
{ number of executions known? }
|
|
if (tfornode(node).right.nodetype=ordconstn) and (tfornode(node).t1.nodetype=ordconstn) then
|
|
begin
|
|
if lnf_backward in tfornode(node).loopflags then
|
|
counts:=tordconstnode(tfornode(node).right).value-tordconstnode(tfornode(node).t1).value+1
|
|
else
|
|
counts:=tordconstnode(tfornode(node).t1).value-tordconstnode(tfornode(node).right).value+1;
|
|
|
|
{ don't unroll more than we need }
|
|
if unrolls>counts then
|
|
unrolls:=counts;
|
|
|
|
{ create block statement }
|
|
unrollblock:=internalstatements(unrollstatement);
|
|
|
|
{ let's unroll (and rock of course) }
|
|
for i:=1 to unrolls do
|
|
begin
|
|
{ create and insert copy of the statement block }
|
|
addstatement(unrollstatement,tfornode(tfornode(node).t2).getcopy);
|
|
|
|
{ set and insert entry label? }
|
|
if (counts mod unrolls<>0) and
|
|
((counts mod unrolls)=unrolls-i) then
|
|
begin
|
|
tfornode(node).entrylabel:=clabelnode.create(cnothingnode.create);
|
|
addstatement(unrollstatement,tfornode(node).entrylabel);
|
|
end;
|
|
|
|
{ for itself increases at the last iteration }
|
|
if i<unrolls then
|
|
begin
|
|
{ insert incrementation of counter var }
|
|
addstatement(unrollstatement,
|
|
geninlinenode(in_inc_x,false,ccallparanode.create(tfornode(node).left.getcopy,nil)));
|
|
end;
|
|
end;
|
|
{ can we get rid of the for statement? }
|
|
if unrolls=counts then
|
|
result:=unrollblock;
|
|
end
|
|
else
|
|
begin
|
|
{ unrolling is a little bit more tricky if we don't know the
|
|
loop count at compile time, but the solution is to use a jump table
|
|
which is indexed by "loop count mod unrolls" at run time and which
|
|
jumps then at the appropriate place inside the loop. Because
|
|
a module division is expensive, we can use only unroll counts dividable
|
|
by 2 }
|
|
case unrolls of
|
|
1..2:
|
|
;
|
|
3:
|
|
unrolls:=2;
|
|
4..7:
|
|
unrolls:=4;
|
|
{ unrolls>4 already make no sense imo, but who knows (FK) }
|
|
8..15:
|
|
unrolls:=8;
|
|
16..31:
|
|
unrolls:=16;
|
|
32..63:
|
|
unrolls:=32;
|
|
64..$7fff:
|
|
unrolls:=64;
|
|
else
|
|
exit;
|
|
end;
|
|
{ we don't handle this yet }
|
|
exit;
|
|
end;
|
|
if not(assigned(result)) then
|
|
begin
|
|
tfornode(node).t2.free;
|
|
tfornode(node).t2:=unrollblock;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
end.
|