* Register allocator speed boost

2025-08-26 00:49:55 +02:00 · 2004-02-08 14:26:28 +00:00 · 2004-02-08 14:26:28 +00:00 · 9868155d53
commit 9868155d53
parent f735bb4e9a
1 changed files with 125 additions and 89 deletions
--- a/compiler/rgobj.pas
+++ b/compiler/rgobj.pas
@ -123,7 +123,7 @@ unit rgobj;
      end;
      Tmovelist=record
-        count:cardinal;
+        count,sorted_until:cardinal;
        data:array[0..$ffff] of Tlinkedlistitem;
      end;
      Pmovelist=^Tmovelist;
@ -311,6 +311,42 @@ implementation
       globals,verbose,tgobj,procinfo;
    procedure sort_movelist(ml:Pmovelist);
    {Ok, sorting pointers is silly, but it does the job to make Trgobj.combine
     faster.}
    var h,i,p:word;
        t:Tlinkedlistitem;
    begin
      with ml^ do
        begin
          if count<2 then
            exit;
          p:=1;
          while 2*p<count do
            p:=2*p;
          while p<>0 do
            begin
              for h:=p to count-1 do
                begin
                  i:=h;
                  t:=data[i];
                  repeat
                    if ptrint(data[i-p])<=ptrint(t) then
                      break;
                    data[i]:=data[i-p];
                    dec(i,p);
                  until i<p;
                  data[i]:=t;
                end;
              p:=p shr 1;
            end;
          sorted_until:=count-1;
        end;
    end;
 {******************************************************************************
                              tinterferencebitmap
 ******************************************************************************}
@ -664,11 +700,12 @@ implementation
            begin
              getmem(movelist,64);
              movelist^.count:=0;
              movelist^.sorted_until:=0;
            end
          else
            begin
              cursize:=memsize(movelist);
-              if (4*(movelist^.count+1)=cursize) then
+              if (4*(movelist^.count+2)=cursize) then
                reallocmem(movelist,cursize*2);
            end;
          movelist^.data[movelist^.count]:=data;
@ -742,9 +779,9 @@ implementation
     registers in it cause. This allows simplify to execute in
     constant time.}
-    var p,h,i,j,leni,lenj:word;
+    var p,h,i,leni,lent:word;
        t:Tsuperregister;
-        adji,adjj:Psuperregisterworklist;
+        adji,adjt:Psuperregisterworklist;
    begin
      with simplifyworklist do
@ -756,30 +793,25 @@ implementation
            p:=2*p;
          while p<>0 do
            begin
-              for h:=0 to length-p-1 do
+              for h:=p to length-1 do
                begin
                  i:=h;
                  t:=buf^[i];
                  adjt:=reginfo[buf^[i]].adjlist;
                  lent:=0;
                  if adjt<>nil then
                    lent:=adjt^.length;
                  repeat
-                    j:=i+p;
+                    adji:=reginfo[buf^[i-p]].adjlist;
-                    adji:=reginfo[buf^[i]].adjlist;
+                    leni:=0;
-                    adjj:=reginfo[buf^[j]].adjlist;
+                    if adji<>nil then
                    if adji=nil then
                      leni:=0
                    else
                      leni:=adji^.length;
-                    if adjj=nil then
+                    if leni<=lent then
                      lenj:=0
                    else
                      lenj:=adjj^.length;
                    if lenj>=leni then
                      break;
                    t:=buf^[i];
                    buf^[i]:=buf^[j];
                    buf^[j]:=t;
                    if i<p then
                      break;
                    buf^[i]:=buf^[i-p];
                    dec(i,p)
-                  until false;
+                  until i<p;
                  buf^[i]:=t;
                end;
              p:=p shr 1;
            end;
@ -1009,12 +1041,9 @@ implementation
    procedure trgobj.combine(u,v:Tsuperregister);
    var adj : Psuperregisterworklist;
-        i : word;
+        i,n,p,q:cardinal;
        t : tsuperregister;
-        n,o : cardinal;
+        searched:Tlinkedlistitem;
        decrement : boolean;
 {	moves:Tsuperregisterset;}
        vm:Pmovelist;
    label l1;
@ -1028,50 +1057,53 @@ implementation
      {Combine both movelists. Since the movelists are sets, only add
       elements that are not already present. The movelists cannot be
       empty by definition; nodes are only coalesced if there is a move
-       between them.}
+       between them. To prevent quadratic time blowup (movelists of
       especially machine registers can get very large because of moves
       generated during calls) we need to go into disgusting complexity.
-{     Nice attempt; it didn't work.
+       (See webtbs/tw2242 for an example that stresses this.)
-      supregset_reset(moves,false);
+
-      supregset_include(moves,u);
+       We want to sort the movelist to be able to search logarithmically.
       Unfortunately, sorting the movelist every time before searching
       is counter-productive, since the movelist usually grows with a few
       items at a time. Therefore, we split the movelist into a sorted
       and an unsorted part and search through both. If the unsorted part
       becomes too large, we sort.}
      {We have to weigh the cost of sorting the list against searching
       the cost of the unsorted part. I use factor of 8 here; if the
       number of items is less than 8 times the numer of unsorted items,
       we'll sort the list.}
      with reginfo[u].movelist^ do
-        for n:=0 to count-1 do
+        if count<8*(count-sorted_until) then
-	  begin
+          sort_movelist(reginfo[u].movelist);
-	    if Tmoveins(data[n]).x=u then
+      for n:=0 to reginfo[v].movelist^.count-1 do
-              supregset_include(moves,Tmoveins(data[n]).y)
+        begin
-	    else
+          {Binary search the sorted part of the list.}
-              supregset_include(moves,Tmoveins(data[n]).x)
+          searched:=reginfo[v].movelist^.data[n];
-          end;
+          p:=0;
-      with reginfo[v].movelist^ do
+          q:=reginfo[u].movelist^.sorted_until;
-        for n:=0 to count-1 do
+          i:=0;
-	  begin
+          if q<>0 then 
-	    if Tmoveins(data[n]).x=v then
+            repeat
-	      begin
+              i:=(p+q) shr 1;
-	        if supregset_in(moves,Tmoveins(data[n]).y) then
+              if ptrint(searched)>ptrint(reginfo[u].movelist^.data[i]) then
-        	  add_to_movelist(u,data[n]);
+                p:=i+1
-              end
+              else
-	    else
+                q:=i;
-	      begin
+            until p=q;
-	        if supregset_in(moves,Tmoveins(data[n]).x) then
+          with reginfo[u].movelist^ do
-        	  add_to_movelist(u,data[n]);
+            if searched<>data[i] then
              begin
                {Linear search the unsorted part of the list.}
                for i:=sorted_until+1 to count-1 do
                  if searched=data[i] then
                    goto l1;
                {Not found -> add}
                add_to_movelist(u,searched);
              l1:
              end;
-	  end;}
+        end;
      {This loop is a performance bottleneck for large procedures and therefore
       optimized by hand as much as possible. This is because machine registers
       generally collect large movelists (for example around procedure calls data
       is moved into machine registers). The loop below is unfortunately quadratic,
       and guess what this means when a procedure has collected several thousand
       moves.... Test webtbs/tw2242 is a good example to illustrate this.}
      vm:=reginfo[v].movelist;
      for n:=0 to vm^.count-1 do
        with reginfo[u].movelist^ do
          begin
            for o:=0 to count-1 do
              if data[o]=vm^.data[n] then
                goto l1; {Continue outer loop.}
            add_to_movelist(u,vm^.data[n]);
          l1:
          end;
      enable_moves(v);
@ -1080,26 +1112,27 @@ implementation
        for i:=1 to adj^.length do
          begin
            t:=adj^.buf^[i-1];
-            if not(ri_coalesced in reginfo[t].flags) then
+            with reginfo[t] do
-              begin
+              if not(ri_coalesced in flags) then
-                {t has a connection to v. Since we are adding v to u, we
+                begin
-                 need to connect t to u. However, beware if t was already
+                  {t has a connection to v. Since we are adding v to u, we
-                 connected to u...}
+                   need to connect t to u. However, beware if t was already
-                if (ibitmap[t,u]) and not (ri_selected in reginfo[t].flags) then
+                   connected to u...}
-                  {... because in that case, we are actually removing an edge
+                  if (ibitmap[t,u]) and not (ri_selected in flags) then
-                   and the degree of t decreases.}
+                    {... because in that case, we are actually removing an edge
-                  decrement_degree(t)
+                     and the degree of t decreases.}
-                else
+                    decrement_degree(t)
-                  begin
+                  else
-                    add_edge(t,u);
+                    begin
-                    {We have added an edge to t and u. So their degree increases.
+                      add_edge(t,u);
-                     However, v is added to u. That means its neighbours will
+                      {We have added an edge to t and u. So their degree increases.
-                     no longer point to v, but to u instead. Therefore, only the
+                       However, v is added to u. That means its neighbours will
-                     degree of u increases.}
+                       no longer point to v, but to u instead. Therefore, only the
-                    if (u>=first_imaginary) and not (ri_selected in reginfo[t].flags) then
+                       degree of u increases.}
-                      inc(reginfo[u].degree);
+                      if (u>=first_imaginary) and not (ri_selected in flags) then
-                  end;
+                        inc(reginfo[u].degree);
-              end;
+                    end;
                end;
          end;
      if (reginfo[u].degree>=usable_registers_cnt) and freezeworklist.delete(u) then
        spillworklist.add(u);
@ -1968,7 +2001,10 @@ implementation
 end.
 {
  $Log$
-  Revision 1.118  2004-02-07 23:28:34  daniel
+  Revision 1.119  2004-02-08 14:26:28  daniel
    * Register allocator speed boost
  Revision 1.118  2004/02/07 23:28:34  daniel
    * Take advantage of our new with statement optimization
  Revision 1.117  2004/02/06 13:34:46  daniel