mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-08-26 00:49:55 +02:00
* Register allocator speed boost
This commit is contained in:
parent
f735bb4e9a
commit
9868155d53
@ -123,7 +123,7 @@ unit rgobj;
|
|||||||
end;
|
end;
|
||||||
|
|
||||||
Tmovelist=record
|
Tmovelist=record
|
||||||
count:cardinal;
|
count,sorted_until:cardinal;
|
||||||
data:array[0..$ffff] of Tlinkedlistitem;
|
data:array[0..$ffff] of Tlinkedlistitem;
|
||||||
end;
|
end;
|
||||||
Pmovelist=^Tmovelist;
|
Pmovelist=^Tmovelist;
|
||||||
@ -311,6 +311,42 @@ implementation
|
|||||||
globals,verbose,tgobj,procinfo;
|
globals,verbose,tgobj,procinfo;
|
||||||
|
|
||||||
|
|
||||||
|
procedure sort_movelist(ml:Pmovelist);
|
||||||
|
|
||||||
|
{Ok, sorting pointers is silly, but it does the job to make Trgobj.combine
|
||||||
|
faster.}
|
||||||
|
|
||||||
|
var h,i,p:word;
|
||||||
|
t:Tlinkedlistitem;
|
||||||
|
|
||||||
|
begin
|
||||||
|
with ml^ do
|
||||||
|
begin
|
||||||
|
if count<2 then
|
||||||
|
exit;
|
||||||
|
p:=1;
|
||||||
|
while 2*p<count do
|
||||||
|
p:=2*p;
|
||||||
|
while p<>0 do
|
||||||
|
begin
|
||||||
|
for h:=p to count-1 do
|
||||||
|
begin
|
||||||
|
i:=h;
|
||||||
|
t:=data[i];
|
||||||
|
repeat
|
||||||
|
if ptrint(data[i-p])<=ptrint(t) then
|
||||||
|
break;
|
||||||
|
data[i]:=data[i-p];
|
||||||
|
dec(i,p);
|
||||||
|
until i<p;
|
||||||
|
data[i]:=t;
|
||||||
|
end;
|
||||||
|
p:=p shr 1;
|
||||||
|
end;
|
||||||
|
sorted_until:=count-1;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
{******************************************************************************
|
{******************************************************************************
|
||||||
tinterferencebitmap
|
tinterferencebitmap
|
||||||
******************************************************************************}
|
******************************************************************************}
|
||||||
@ -664,11 +700,12 @@ implementation
|
|||||||
begin
|
begin
|
||||||
getmem(movelist,64);
|
getmem(movelist,64);
|
||||||
movelist^.count:=0;
|
movelist^.count:=0;
|
||||||
|
movelist^.sorted_until:=0;
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
begin
|
begin
|
||||||
cursize:=memsize(movelist);
|
cursize:=memsize(movelist);
|
||||||
if (4*(movelist^.count+1)=cursize) then
|
if (4*(movelist^.count+2)=cursize) then
|
||||||
reallocmem(movelist,cursize*2);
|
reallocmem(movelist,cursize*2);
|
||||||
end;
|
end;
|
||||||
movelist^.data[movelist^.count]:=data;
|
movelist^.data[movelist^.count]:=data;
|
||||||
@ -742,9 +779,9 @@ implementation
|
|||||||
registers in it cause. This allows simplify to execute in
|
registers in it cause. This allows simplify to execute in
|
||||||
constant time.}
|
constant time.}
|
||||||
|
|
||||||
var p,h,i,j,leni,lenj:word;
|
var p,h,i,leni,lent:word;
|
||||||
t:Tsuperregister;
|
t:Tsuperregister;
|
||||||
adji,adjj:Psuperregisterworklist;
|
adji,adjt:Psuperregisterworklist;
|
||||||
|
|
||||||
begin
|
begin
|
||||||
with simplifyworklist do
|
with simplifyworklist do
|
||||||
@ -756,30 +793,25 @@ implementation
|
|||||||
p:=2*p;
|
p:=2*p;
|
||||||
while p<>0 do
|
while p<>0 do
|
||||||
begin
|
begin
|
||||||
for h:=0 to length-p-1 do
|
for h:=p to length-1 do
|
||||||
begin
|
begin
|
||||||
i:=h;
|
i:=h;
|
||||||
|
t:=buf^[i];
|
||||||
|
adjt:=reginfo[buf^[i]].adjlist;
|
||||||
|
lent:=0;
|
||||||
|
if adjt<>nil then
|
||||||
|
lent:=adjt^.length;
|
||||||
repeat
|
repeat
|
||||||
j:=i+p;
|
adji:=reginfo[buf^[i-p]].adjlist;
|
||||||
adji:=reginfo[buf^[i]].adjlist;
|
leni:=0;
|
||||||
adjj:=reginfo[buf^[j]].adjlist;
|
if adji<>nil then
|
||||||
if adji=nil then
|
|
||||||
leni:=0
|
|
||||||
else
|
|
||||||
leni:=adji^.length;
|
leni:=adji^.length;
|
||||||
if adjj=nil then
|
if leni<=lent then
|
||||||
lenj:=0
|
|
||||||
else
|
|
||||||
lenj:=adjj^.length;
|
|
||||||
if lenj>=leni then
|
|
||||||
break;
|
|
||||||
t:=buf^[i];
|
|
||||||
buf^[i]:=buf^[j];
|
|
||||||
buf^[j]:=t;
|
|
||||||
if i<p then
|
|
||||||
break;
|
break;
|
||||||
|
buf^[i]:=buf^[i-p];
|
||||||
dec(i,p)
|
dec(i,p)
|
||||||
until false;
|
until i<p;
|
||||||
|
buf^[i]:=t;
|
||||||
end;
|
end;
|
||||||
p:=p shr 1;
|
p:=p shr 1;
|
||||||
end;
|
end;
|
||||||
@ -1009,12 +1041,9 @@ implementation
|
|||||||
procedure trgobj.combine(u,v:Tsuperregister);
|
procedure trgobj.combine(u,v:Tsuperregister);
|
||||||
|
|
||||||
var adj : Psuperregisterworklist;
|
var adj : Psuperregisterworklist;
|
||||||
i : word;
|
i,n,p,q:cardinal;
|
||||||
t : tsuperregister;
|
t : tsuperregister;
|
||||||
n,o : cardinal;
|
searched:Tlinkedlistitem;
|
||||||
decrement : boolean;
|
|
||||||
{ moves:Tsuperregisterset;}
|
|
||||||
vm:Pmovelist;
|
|
||||||
|
|
||||||
label l1;
|
label l1;
|
||||||
|
|
||||||
@ -1028,50 +1057,53 @@ implementation
|
|||||||
{Combine both movelists. Since the movelists are sets, only add
|
{Combine both movelists. Since the movelists are sets, only add
|
||||||
elements that are not already present. The movelists cannot be
|
elements that are not already present. The movelists cannot be
|
||||||
empty by definition; nodes are only coalesced if there is a move
|
empty by definition; nodes are only coalesced if there is a move
|
||||||
between them.}
|
between them. To prevent quadratic time blowup (movelists of
|
||||||
|
especially machine registers can get very large because of moves
|
||||||
|
generated during calls) we need to go into disgusting complexity.
|
||||||
|
|
||||||
{ Nice attempt; it didn't work.
|
(See webtbs/tw2242 for an example that stresses this.)
|
||||||
supregset_reset(moves,false);
|
|
||||||
supregset_include(moves,u);
|
We want to sort the movelist to be able to search logarithmically.
|
||||||
|
Unfortunately, sorting the movelist every time before searching
|
||||||
|
is counter-productive, since the movelist usually grows with a few
|
||||||
|
items at a time. Therefore, we split the movelist into a sorted
|
||||||
|
and an unsorted part and search through both. If the unsorted part
|
||||||
|
becomes too large, we sort.}
|
||||||
|
|
||||||
|
{We have to weigh the cost of sorting the list against searching
|
||||||
|
the cost of the unsorted part. I use factor of 8 here; if the
|
||||||
|
number of items is less than 8 times the numer of unsorted items,
|
||||||
|
we'll sort the list.}
|
||||||
with reginfo[u].movelist^ do
|
with reginfo[u].movelist^ do
|
||||||
for n:=0 to count-1 do
|
if count<8*(count-sorted_until) then
|
||||||
begin
|
sort_movelist(reginfo[u].movelist);
|
||||||
if Tmoveins(data[n]).x=u then
|
for n:=0 to reginfo[v].movelist^.count-1 do
|
||||||
supregset_include(moves,Tmoveins(data[n]).y)
|
begin
|
||||||
else
|
{Binary search the sorted part of the list.}
|
||||||
supregset_include(moves,Tmoveins(data[n]).x)
|
searched:=reginfo[v].movelist^.data[n];
|
||||||
end;
|
p:=0;
|
||||||
with reginfo[v].movelist^ do
|
q:=reginfo[u].movelist^.sorted_until;
|
||||||
for n:=0 to count-1 do
|
i:=0;
|
||||||
begin
|
if q<>0 then
|
||||||
if Tmoveins(data[n]).x=v then
|
repeat
|
||||||
begin
|
i:=(p+q) shr 1;
|
||||||
if supregset_in(moves,Tmoveins(data[n]).y) then
|
if ptrint(searched)>ptrint(reginfo[u].movelist^.data[i]) then
|
||||||
add_to_movelist(u,data[n]);
|
p:=i+1
|
||||||
end
|
else
|
||||||
else
|
q:=i;
|
||||||
begin
|
until p=q;
|
||||||
if supregset_in(moves,Tmoveins(data[n]).x) then
|
with reginfo[u].movelist^ do
|
||||||
add_to_movelist(u,data[n]);
|
if searched<>data[i] then
|
||||||
|
begin
|
||||||
|
{Linear search the unsorted part of the list.}
|
||||||
|
for i:=sorted_until+1 to count-1 do
|
||||||
|
if searched=data[i] then
|
||||||
|
goto l1;
|
||||||
|
{Not found -> add}
|
||||||
|
add_to_movelist(u,searched);
|
||||||
|
l1:
|
||||||
end;
|
end;
|
||||||
end;}
|
end;
|
||||||
|
|
||||||
{This loop is a performance bottleneck for large procedures and therefore
|
|
||||||
optimized by hand as much as possible. This is because machine registers
|
|
||||||
generally collect large movelists (for example around procedure calls data
|
|
||||||
is moved into machine registers). The loop below is unfortunately quadratic,
|
|
||||||
and guess what this means when a procedure has collected several thousand
|
|
||||||
moves.... Test webtbs/tw2242 is a good example to illustrate this.}
|
|
||||||
vm:=reginfo[v].movelist;
|
|
||||||
for n:=0 to vm^.count-1 do
|
|
||||||
with reginfo[u].movelist^ do
|
|
||||||
begin
|
|
||||||
for o:=0 to count-1 do
|
|
||||||
if data[o]=vm^.data[n] then
|
|
||||||
goto l1; {Continue outer loop.}
|
|
||||||
add_to_movelist(u,vm^.data[n]);
|
|
||||||
l1:
|
|
||||||
end;
|
|
||||||
|
|
||||||
enable_moves(v);
|
enable_moves(v);
|
||||||
|
|
||||||
@ -1080,26 +1112,27 @@ implementation
|
|||||||
for i:=1 to adj^.length do
|
for i:=1 to adj^.length do
|
||||||
begin
|
begin
|
||||||
t:=adj^.buf^[i-1];
|
t:=adj^.buf^[i-1];
|
||||||
if not(ri_coalesced in reginfo[t].flags) then
|
with reginfo[t] do
|
||||||
begin
|
if not(ri_coalesced in flags) then
|
||||||
{t has a connection to v. Since we are adding v to u, we
|
begin
|
||||||
need to connect t to u. However, beware if t was already
|
{t has a connection to v. Since we are adding v to u, we
|
||||||
connected to u...}
|
need to connect t to u. However, beware if t was already
|
||||||
if (ibitmap[t,u]) and not (ri_selected in reginfo[t].flags) then
|
connected to u...}
|
||||||
{... because in that case, we are actually removing an edge
|
if (ibitmap[t,u]) and not (ri_selected in flags) then
|
||||||
and the degree of t decreases.}
|
{... because in that case, we are actually removing an edge
|
||||||
decrement_degree(t)
|
and the degree of t decreases.}
|
||||||
else
|
decrement_degree(t)
|
||||||
begin
|
else
|
||||||
add_edge(t,u);
|
begin
|
||||||
{We have added an edge to t and u. So their degree increases.
|
add_edge(t,u);
|
||||||
However, v is added to u. That means its neighbours will
|
{We have added an edge to t and u. So their degree increases.
|
||||||
no longer point to v, but to u instead. Therefore, only the
|
However, v is added to u. That means its neighbours will
|
||||||
degree of u increases.}
|
no longer point to v, but to u instead. Therefore, only the
|
||||||
if (u>=first_imaginary) and not (ri_selected in reginfo[t].flags) then
|
degree of u increases.}
|
||||||
inc(reginfo[u].degree);
|
if (u>=first_imaginary) and not (ri_selected in flags) then
|
||||||
end;
|
inc(reginfo[u].degree);
|
||||||
end;
|
end;
|
||||||
|
end;
|
||||||
end;
|
end;
|
||||||
if (reginfo[u].degree>=usable_registers_cnt) and freezeworklist.delete(u) then
|
if (reginfo[u].degree>=usable_registers_cnt) and freezeworklist.delete(u) then
|
||||||
spillworklist.add(u);
|
spillworklist.add(u);
|
||||||
@ -1968,7 +2001,10 @@ implementation
|
|||||||
end.
|
end.
|
||||||
{
|
{
|
||||||
$Log$
|
$Log$
|
||||||
Revision 1.118 2004-02-07 23:28:34 daniel
|
Revision 1.119 2004-02-08 14:26:28 daniel
|
||||||
|
* Register allocator speed boost
|
||||||
|
|
||||||
|
Revision 1.118 2004/02/07 23:28:34 daniel
|
||||||
* Take advantage of our new with statement optimization
|
* Take advantage of our new with statement optimization
|
||||||
|
|
||||||
Revision 1.117 2004/02/06 13:34:46 daniel
|
Revision 1.117 2004/02/06 13:34:46 daniel
|
||||||
|
Loading…
Reference in New Issue
Block a user