* Register allocator speed boost

This commit is contained in:
daniel 2004-02-08 14:26:28 +00:00
parent f735bb4e9a
commit 9868155d53

View File

@ -123,7 +123,7 @@ unit rgobj;
end; end;
Tmovelist=record Tmovelist=record
count:cardinal; count,sorted_until:cardinal;
data:array[0..$ffff] of Tlinkedlistitem; data:array[0..$ffff] of Tlinkedlistitem;
end; end;
Pmovelist=^Tmovelist; Pmovelist=^Tmovelist;
@ -311,6 +311,42 @@ implementation
globals,verbose,tgobj,procinfo; globals,verbose,tgobj,procinfo;
procedure sort_movelist(ml:Pmovelist);
{Ok, sorting pointers is silly, but it does the job to make Trgobj.combine
faster.}
var h,i,p:word;
t:Tlinkedlistitem;
begin
with ml^ do
begin
if count<2 then
exit;
p:=1;
while 2*p<count do
p:=2*p;
while p<>0 do
begin
for h:=p to count-1 do
begin
i:=h;
t:=data[i];
repeat
if ptrint(data[i-p])<=ptrint(t) then
break;
data[i]:=data[i-p];
dec(i,p);
until i<p;
data[i]:=t;
end;
p:=p shr 1;
end;
sorted_until:=count-1;
end;
end;
{****************************************************************************** {******************************************************************************
tinterferencebitmap tinterferencebitmap
******************************************************************************} ******************************************************************************}
@ -664,11 +700,12 @@ implementation
begin begin
getmem(movelist,64); getmem(movelist,64);
movelist^.count:=0; movelist^.count:=0;
movelist^.sorted_until:=0;
end end
else else
begin begin
cursize:=memsize(movelist); cursize:=memsize(movelist);
if (4*(movelist^.count+1)=cursize) then if (4*(movelist^.count+2)=cursize) then
reallocmem(movelist,cursize*2); reallocmem(movelist,cursize*2);
end; end;
movelist^.data[movelist^.count]:=data; movelist^.data[movelist^.count]:=data;
@ -742,9 +779,9 @@ implementation
registers in it cause. This allows simplify to execute in registers in it cause. This allows simplify to execute in
constant time.} constant time.}
var p,h,i,j,leni,lenj:word; var p,h,i,leni,lent:word;
t:Tsuperregister; t:Tsuperregister;
adji,adjj:Psuperregisterworklist; adji,adjt:Psuperregisterworklist;
begin begin
with simplifyworklist do with simplifyworklist do
@ -756,30 +793,25 @@ implementation
p:=2*p; p:=2*p;
while p<>0 do while p<>0 do
begin begin
for h:=0 to length-p-1 do for h:=p to length-1 do
begin begin
i:=h; i:=h;
t:=buf^[i];
adjt:=reginfo[buf^[i]].adjlist;
lent:=0;
if adjt<>nil then
lent:=adjt^.length;
repeat repeat
j:=i+p; adji:=reginfo[buf^[i-p]].adjlist;
adji:=reginfo[buf^[i]].adjlist; leni:=0;
adjj:=reginfo[buf^[j]].adjlist; if adji<>nil then
if adji=nil then
leni:=0
else
leni:=adji^.length; leni:=adji^.length;
if adjj=nil then if leni<=lent then
lenj:=0
else
lenj:=adjj^.length;
if lenj>=leni then
break;
t:=buf^[i];
buf^[i]:=buf^[j];
buf^[j]:=t;
if i<p then
break; break;
buf^[i]:=buf^[i-p];
dec(i,p) dec(i,p)
until false; until i<p;
buf^[i]:=t;
end; end;
p:=p shr 1; p:=p shr 1;
end; end;
@ -1009,12 +1041,9 @@ implementation
procedure trgobj.combine(u,v:Tsuperregister); procedure trgobj.combine(u,v:Tsuperregister);
var adj : Psuperregisterworklist; var adj : Psuperregisterworklist;
i : word; i,n,p,q:cardinal;
t : tsuperregister; t : tsuperregister;
n,o : cardinal; searched:Tlinkedlistitem;
decrement : boolean;
{ moves:Tsuperregisterset;}
vm:Pmovelist;
label l1; label l1;
@ -1028,50 +1057,53 @@ implementation
{Combine both movelists. Since the movelists are sets, only add {Combine both movelists. Since the movelists are sets, only add
elements that are not already present. The movelists cannot be elements that are not already present. The movelists cannot be
empty by definition; nodes are only coalesced if there is a move empty by definition; nodes are only coalesced if there is a move
between them.} between them. To prevent quadratic time blowup (movelists of
especially machine registers can get very large because of moves
generated during calls) we need to go into disgusting complexity.
{ Nice attempt; it didn't work. (See webtbs/tw2242 for an example that stresses this.)
supregset_reset(moves,false);
supregset_include(moves,u); We want to sort the movelist to be able to search logarithmically.
Unfortunately, sorting the movelist every time before searching
is counter-productive, since the movelist usually grows with a few
items at a time. Therefore, we split the movelist into a sorted
and an unsorted part and search through both. If the unsorted part
becomes too large, we sort.}
{We have to weigh the cost of sorting the list against searching
the cost of the unsorted part. I use factor of 8 here; if the
number of items is less than 8 times the numer of unsorted items,
we'll sort the list.}
with reginfo[u].movelist^ do with reginfo[u].movelist^ do
for n:=0 to count-1 do if count<8*(count-sorted_until) then
begin sort_movelist(reginfo[u].movelist);
if Tmoveins(data[n]).x=u then for n:=0 to reginfo[v].movelist^.count-1 do
supregset_include(moves,Tmoveins(data[n]).y) begin
else {Binary search the sorted part of the list.}
supregset_include(moves,Tmoveins(data[n]).x) searched:=reginfo[v].movelist^.data[n];
end; p:=0;
with reginfo[v].movelist^ do q:=reginfo[u].movelist^.sorted_until;
for n:=0 to count-1 do i:=0;
begin if q<>0 then
if Tmoveins(data[n]).x=v then repeat
begin i:=(p+q) shr 1;
if supregset_in(moves,Tmoveins(data[n]).y) then if ptrint(searched)>ptrint(reginfo[u].movelist^.data[i]) then
add_to_movelist(u,data[n]); p:=i+1
end else
else q:=i;
begin until p=q;
if supregset_in(moves,Tmoveins(data[n]).x) then with reginfo[u].movelist^ do
add_to_movelist(u,data[n]); if searched<>data[i] then
begin
{Linear search the unsorted part of the list.}
for i:=sorted_until+1 to count-1 do
if searched=data[i] then
goto l1;
{Not found -> add}
add_to_movelist(u,searched);
l1:
end; end;
end;} end;
{This loop is a performance bottleneck for large procedures and therefore
optimized by hand as much as possible. This is because machine registers
generally collect large movelists (for example around procedure calls data
is moved into machine registers). The loop below is unfortunately quadratic,
and guess what this means when a procedure has collected several thousand
moves.... Test webtbs/tw2242 is a good example to illustrate this.}
vm:=reginfo[v].movelist;
for n:=0 to vm^.count-1 do
with reginfo[u].movelist^ do
begin
for o:=0 to count-1 do
if data[o]=vm^.data[n] then
goto l1; {Continue outer loop.}
add_to_movelist(u,vm^.data[n]);
l1:
end;
enable_moves(v); enable_moves(v);
@ -1080,26 +1112,27 @@ implementation
for i:=1 to adj^.length do for i:=1 to adj^.length do
begin begin
t:=adj^.buf^[i-1]; t:=adj^.buf^[i-1];
if not(ri_coalesced in reginfo[t].flags) then with reginfo[t] do
begin if not(ri_coalesced in flags) then
{t has a connection to v. Since we are adding v to u, we begin
need to connect t to u. However, beware if t was already {t has a connection to v. Since we are adding v to u, we
connected to u...} need to connect t to u. However, beware if t was already
if (ibitmap[t,u]) and not (ri_selected in reginfo[t].flags) then connected to u...}
{... because in that case, we are actually removing an edge if (ibitmap[t,u]) and not (ri_selected in flags) then
and the degree of t decreases.} {... because in that case, we are actually removing an edge
decrement_degree(t) and the degree of t decreases.}
else decrement_degree(t)
begin else
add_edge(t,u); begin
{We have added an edge to t and u. So their degree increases. add_edge(t,u);
However, v is added to u. That means its neighbours will {We have added an edge to t and u. So their degree increases.
no longer point to v, but to u instead. Therefore, only the However, v is added to u. That means its neighbours will
degree of u increases.} no longer point to v, but to u instead. Therefore, only the
if (u>=first_imaginary) and not (ri_selected in reginfo[t].flags) then degree of u increases.}
inc(reginfo[u].degree); if (u>=first_imaginary) and not (ri_selected in flags) then
end; inc(reginfo[u].degree);
end; end;
end;
end; end;
if (reginfo[u].degree>=usable_registers_cnt) and freezeworklist.delete(u) then if (reginfo[u].degree>=usable_registers_cnt) and freezeworklist.delete(u) then
spillworklist.add(u); spillworklist.add(u);
@ -1968,7 +2001,10 @@ implementation
end. end.
{ {
$Log$ $Log$
Revision 1.118 2004-02-07 23:28:34 daniel Revision 1.119 2004-02-08 14:26:28 daniel
* Register allocator speed boost
Revision 1.118 2004/02/07 23:28:34 daniel
* Take advantage of our new with statement optimization * Take advantage of our new with statement optimization
Revision 1.117 2004/02/06 13:34:46 daniel Revision 1.117 2004/02/06 13:34:46 daniel