diff --git a/compiler/rgobj.pas b/compiler/rgobj.pas index b3b54757e3..d605b98e84 100644 --- a/compiler/rgobj.pas +++ b/compiler/rgobj.pas @@ -123,7 +123,7 @@ unit rgobj; end; Tmovelist=record - count:cardinal; + count,sorted_until:cardinal; data:array[0..$ffff] of Tlinkedlistitem; end; Pmovelist=^Tmovelist; @@ -311,6 +311,42 @@ implementation globals,verbose,tgobj,procinfo; + procedure sort_movelist(ml:Pmovelist); + + {Ok, sorting pointers is silly, but it does the job to make Trgobj.combine + faster.} + + var h,i,p:word; + t:Tlinkedlistitem; + + begin + with ml^ do + begin + if count<2 then + exit; + p:=1; + while 2*p0 do + begin + for h:=p to count-1 do + begin + i:=h; + t:=data[i]; + repeat + if ptrint(data[i-p])<=ptrint(t) then + break; + data[i]:=data[i-p]; + dec(i,p); + until i0 do begin - for h:=0 to length-p-1 do + for h:=p to length-1 do begin i:=h; + t:=buf^[i]; + adjt:=reginfo[buf^[i]].adjlist; + lent:=0; + if adjt<>nil then + lent:=adjt^.length; repeat - j:=i+p; - adji:=reginfo[buf^[i]].adjlist; - adjj:=reginfo[buf^[j]].adjlist; - if adji=nil then - leni:=0 - else + adji:=reginfo[buf^[i-p]].adjlist; + leni:=0; + if adji<>nil then leni:=adji^.length; - if adjj=nil then - lenj:=0 - else - lenj:=adjj^.length; - if lenj>=leni then - break; - t:=buf^[i]; - buf^[i]:=buf^[j]; - buf^[j]:=t; - if i

0 then + repeat + i:=(p+q) shr 1; + if ptrint(searched)>ptrint(reginfo[u].movelist^.data[i]) then + p:=i+1 + else + q:=i; + until p=q; + with reginfo[u].movelist^ do + if searched<>data[i] then + begin + {Linear search the unsorted part of the list.} + for i:=sorted_until+1 to count-1 do + if searched=data[i] then + goto l1; + {Not found -> add} + add_to_movelist(u,searched); + l1: end; - end;} - - {This loop is a performance bottleneck for large procedures and therefore - optimized by hand as much as possible. This is because machine registers - generally collect large movelists (for example around procedure calls data - is moved into machine registers). The loop below is unfortunately quadratic, - and guess what this means when a procedure has collected several thousand - moves.... Test webtbs/tw2242 is a good example to illustrate this.} - vm:=reginfo[v].movelist; - for n:=0 to vm^.count-1 do - with reginfo[u].movelist^ do - begin - for o:=0 to count-1 do - if data[o]=vm^.data[n] then - goto l1; {Continue outer loop.} - add_to_movelist(u,vm^.data[n]); - l1: - end; + end; enable_moves(v); @@ -1080,26 +1112,27 @@ implementation for i:=1 to adj^.length do begin t:=adj^.buf^[i-1]; - if not(ri_coalesced in reginfo[t].flags) then - begin - {t has a connection to v. Since we are adding v to u, we - need to connect t to u. However, beware if t was already - connected to u...} - if (ibitmap[t,u]) and not (ri_selected in reginfo[t].flags) then - {... because in that case, we are actually removing an edge - and the degree of t decreases.} - decrement_degree(t) - else - begin - add_edge(t,u); - {We have added an edge to t and u. So their degree increases. - However, v is added to u. That means its neighbours will - no longer point to v, but to u instead. Therefore, only the - degree of u increases.} - if (u>=first_imaginary) and not (ri_selected in reginfo[t].flags) then - inc(reginfo[u].degree); - end; - end; + with reginfo[t] do + if not(ri_coalesced in flags) then + begin + {t has a connection to v. Since we are adding v to u, we + need to connect t to u. However, beware if t was already + connected to u...} + if (ibitmap[t,u]) and not (ri_selected in flags) then + {... because in that case, we are actually removing an edge + and the degree of t decreases.} + decrement_degree(t) + else + begin + add_edge(t,u); + {We have added an edge to t and u. So their degree increases. + However, v is added to u. That means its neighbours will + no longer point to v, but to u instead. Therefore, only the + degree of u increases.} + if (u>=first_imaginary) and not (ri_selected in flags) then + inc(reginfo[u].degree); + end; + end; end; if (reginfo[u].degree>=usable_registers_cnt) and freezeworklist.delete(u) then spillworklist.add(u); @@ -1968,7 +2001,10 @@ implementation end. { $Log$ - Revision 1.118 2004-02-07 23:28:34 daniel + Revision 1.119 2004-02-08 14:26:28 daniel + * Register allocator speed boost + + Revision 1.118 2004/02/07 23:28:34 daniel * Take advantage of our new with statement optimization Revision 1.117 2004/02/06 13:34:46 daniel