* Register allocator speed boost

This commit is contained in:
daniel 2004-02-08 14:26:28 +00:00
parent f735bb4e9a
commit 9868155d53

View File

@ -123,7 +123,7 @@ unit rgobj;
end;
Tmovelist=record
count:cardinal;
count,sorted_until:cardinal;
data:array[0..$ffff] of Tlinkedlistitem;
end;
Pmovelist=^Tmovelist;
@ -311,6 +311,42 @@ implementation
globals,verbose,tgobj,procinfo;
procedure sort_movelist(ml:Pmovelist);
{Ok, sorting pointers is silly, but it does the job to make Trgobj.combine
faster.}
var h,i,p:word;
t:Tlinkedlistitem;
begin
with ml^ do
begin
if count<2 then
exit;
p:=1;
while 2*p<count do
p:=2*p;
while p<>0 do
begin
for h:=p to count-1 do
begin
i:=h;
t:=data[i];
repeat
if ptrint(data[i-p])<=ptrint(t) then
break;
data[i]:=data[i-p];
dec(i,p);
until i<p;
data[i]:=t;
end;
p:=p shr 1;
end;
sorted_until:=count-1;
end;
end;
{******************************************************************************
tinterferencebitmap
******************************************************************************}
@ -664,11 +700,12 @@ implementation
begin
getmem(movelist,64);
movelist^.count:=0;
movelist^.sorted_until:=0;
end
else
begin
cursize:=memsize(movelist);
if (4*(movelist^.count+1)=cursize) then
if (4*(movelist^.count+2)=cursize) then
reallocmem(movelist,cursize*2);
end;
movelist^.data[movelist^.count]:=data;
@ -742,9 +779,9 @@ implementation
registers in it cause. This allows simplify to execute in
constant time.}
var p,h,i,j,leni,lenj:word;
var p,h,i,leni,lent:word;
t:Tsuperregister;
adji,adjj:Psuperregisterworklist;
adji,adjt:Psuperregisterworklist;
begin
with simplifyworklist do
@ -756,30 +793,25 @@ implementation
p:=2*p;
while p<>0 do
begin
for h:=0 to length-p-1 do
for h:=p to length-1 do
begin
i:=h;
t:=buf^[i];
adjt:=reginfo[buf^[i]].adjlist;
lent:=0;
if adjt<>nil then
lent:=adjt^.length;
repeat
j:=i+p;
adji:=reginfo[buf^[i]].adjlist;
adjj:=reginfo[buf^[j]].adjlist;
if adji=nil then
leni:=0
else
adji:=reginfo[buf^[i-p]].adjlist;
leni:=0;
if adji<>nil then
leni:=adji^.length;
if adjj=nil then
lenj:=0
else
lenj:=adjj^.length;
if lenj>=leni then
break;
t:=buf^[i];
buf^[i]:=buf^[j];
buf^[j]:=t;
if i<p then
if leni<=lent then
break;
buf^[i]:=buf^[i-p];
dec(i,p)
until false;
until i<p;
buf^[i]:=t;
end;
p:=p shr 1;
end;
@ -1009,12 +1041,9 @@ implementation
procedure trgobj.combine(u,v:Tsuperregister);
var adj : Psuperregisterworklist;
i : word;
i,n,p,q:cardinal;
t : tsuperregister;
n,o : cardinal;
decrement : boolean;
{ moves:Tsuperregisterset;}
vm:Pmovelist;
searched:Tlinkedlistitem;
label l1;
@ -1028,50 +1057,53 @@ implementation
{Combine both movelists. Since the movelists are sets, only add
elements that are not already present. The movelists cannot be
empty by definition; nodes are only coalesced if there is a move
between them.}
between them. To prevent quadratic time blowup (movelists of
especially machine registers can get very large because of moves
generated during calls) we need to go into disgusting complexity.
{ Nice attempt; it didn't work.
supregset_reset(moves,false);
supregset_include(moves,u);
(See webtbs/tw2242 for an example that stresses this.)
We want to sort the movelist to be able to search logarithmically.
Unfortunately, sorting the movelist every time before searching
is counter-productive, since the movelist usually grows with a few
items at a time. Therefore, we split the movelist into a sorted
and an unsorted part and search through both. If the unsorted part
becomes too large, we sort.}
{We have to weigh the cost of sorting the list against searching
the cost of the unsorted part. I use factor of 8 here; if the
number of items is less than 8 times the numer of unsorted items,
we'll sort the list.}
with reginfo[u].movelist^ do
for n:=0 to count-1 do
begin
if Tmoveins(data[n]).x=u then
supregset_include(moves,Tmoveins(data[n]).y)
else
supregset_include(moves,Tmoveins(data[n]).x)
end;
with reginfo[v].movelist^ do
for n:=0 to count-1 do
begin
if Tmoveins(data[n]).x=v then
begin
if supregset_in(moves,Tmoveins(data[n]).y) then
add_to_movelist(u,data[n]);
end
else
begin
if supregset_in(moves,Tmoveins(data[n]).x) then
add_to_movelist(u,data[n]);
if count<8*(count-sorted_until) then
sort_movelist(reginfo[u].movelist);
for n:=0 to reginfo[v].movelist^.count-1 do
begin
{Binary search the sorted part of the list.}
searched:=reginfo[v].movelist^.data[n];
p:=0;
q:=reginfo[u].movelist^.sorted_until;
i:=0;
if q<>0 then
repeat
i:=(p+q) shr 1;
if ptrint(searched)>ptrint(reginfo[u].movelist^.data[i]) then
p:=i+1
else
q:=i;
until p=q;
with reginfo[u].movelist^ do
if searched<>data[i] then
begin
{Linear search the unsorted part of the list.}
for i:=sorted_until+1 to count-1 do
if searched=data[i] then
goto l1;
{Not found -> add}
add_to_movelist(u,searched);
l1:
end;
end;}
{This loop is a performance bottleneck for large procedures and therefore
optimized by hand as much as possible. This is because machine registers
generally collect large movelists (for example around procedure calls data
is moved into machine registers). The loop below is unfortunately quadratic,
and guess what this means when a procedure has collected several thousand
moves.... Test webtbs/tw2242 is a good example to illustrate this.}
vm:=reginfo[v].movelist;
for n:=0 to vm^.count-1 do
with reginfo[u].movelist^ do
begin
for o:=0 to count-1 do
if data[o]=vm^.data[n] then
goto l1; {Continue outer loop.}
add_to_movelist(u,vm^.data[n]);
l1:
end;
end;
enable_moves(v);
@ -1080,26 +1112,27 @@ implementation
for i:=1 to adj^.length do
begin
t:=adj^.buf^[i-1];
if not(ri_coalesced in reginfo[t].flags) then
begin
{t has a connection to v. Since we are adding v to u, we
need to connect t to u. However, beware if t was already
connected to u...}
if (ibitmap[t,u]) and not (ri_selected in reginfo[t].flags) then
{... because in that case, we are actually removing an edge
and the degree of t decreases.}
decrement_degree(t)
else
begin
add_edge(t,u);
{We have added an edge to t and u. So their degree increases.
However, v is added to u. That means its neighbours will
no longer point to v, but to u instead. Therefore, only the
degree of u increases.}
if (u>=first_imaginary) and not (ri_selected in reginfo[t].flags) then
inc(reginfo[u].degree);
end;
end;
with reginfo[t] do
if not(ri_coalesced in flags) then
begin
{t has a connection to v. Since we are adding v to u, we
need to connect t to u. However, beware if t was already
connected to u...}
if (ibitmap[t,u]) and not (ri_selected in flags) then
{... because in that case, we are actually removing an edge
and the degree of t decreases.}
decrement_degree(t)
else
begin
add_edge(t,u);
{We have added an edge to t and u. So their degree increases.
However, v is added to u. That means its neighbours will
no longer point to v, but to u instead. Therefore, only the
degree of u increases.}
if (u>=first_imaginary) and not (ri_selected in flags) then
inc(reginfo[u].degree);
end;
end;
end;
if (reginfo[u].degree>=usable_registers_cnt) and freezeworklist.delete(u) then
spillworklist.add(u);
@ -1968,7 +2001,10 @@ implementation
end.
{
$Log$
Revision 1.118 2004-02-07 23:28:34 daniel
Revision 1.119 2004-02-08 14:26:28 daniel
* Register allocator speed boost
Revision 1.118 2004/02/07 23:28:34 daniel
* Take advantage of our new with statement optimization
Revision 1.117 2004/02/06 13:34:46 daniel