Replace TSuperRegisterWorkList with TSuperRegisterWork >Hash< List.

2025-04-05 20:08:52 +02:00 · 2022-05-04 01:13:08 +03:00 · 2022-05-04 01:13:08 +03:00 · b2e596ada2
commit b2e596ada2
parent daa825a5a3
3 changed files with 464 additions and 408 deletions
--- a/compiler/cgbase.pas
+++ b/compiler/cgbase.pas
@ -312,6 +312,7 @@ interface
      TSubRegisterSet = set of TSubRegister;

      TSuperRegister = type word;
+      PSuperRegister = ^TSuperRegister;

      {
        The new register coding:
@ -368,26 +369,36 @@ interface
        shuffles : array[1..1] of word;
      end;

-      Tsuperregisterarray=array of Tsuperregister;
+      Tsuperregistercomparefunc = function(a, b: TSuperRegister; param: pointer): boolean;

-      Tsuperregisterworklist=object
-        buflength,
-        buflengthinc,
-        length:word;
-        buf:tsuperregisterarray;
+      Tsuperregisterworkhashlist=object
        constructor init;
-        constructor copyfrom(const x:Tsuperregisterworklist);
+        constructor copyfrom(const x:Tsuperregisterworkhashlist);
        destructor  done;
        procedure clear;
        procedure add(s:tsuperregister);
        function addnodup(s:tsuperregister): boolean;
        { returns the last element and removes it from the list }
        function get:tsuperregister;
-        function readidx(i:word):tsuperregister;
-        procedure deleteidx(i:word);
+        procedure deleteidx(i:SizeInt);
        function delete(s:tsuperregister):boolean;
+        procedure sort(less:Tsuperregistercomparefunc; param:pointer);
+      private
+        { Either a region is allocated for FItems+h+next, or the table is empty with hmask=0, fake 'h' pointing to a single -1, and distinctive FItems=nil. }
+        FItems : Psuperregister;
+        h : PInt32; { Count of 'h's is always hmask+1 and is always a power of two. h[i]=-1 means empty, h[i]=hi>=0 references FItems[hi]. }
+        next : PInt32; { Chaining to allow duplicates and resolve collisions. }
+        FNItems : int32; { signed to allow subtracting one without surprises... }
+        hmask,minItems,maxItems : uint32;
+        procedure rehash(forItems:SizeUint);
+        class function allocateregion(
+          nh,amaxitems:uint32; out aitems:Psuperregister; out ah:PInt32; out anext:PInt32):SizeUint; static;
+        class procedure rebuildh(aitems:Psuperregister; ah,anext:PInt32; ahmask,anitems:int32); static;
+      public
+        property length: int32 read FNItems;
+        property buf: Psuperregister read FItems;
      end;
-      psuperregisterworklist=^tsuperregisterworklist;
+      Psuperregisterworkhashlist = ^Tsuperregisterworkhashlist;

    const
       { alias for easier understanding }
@ -540,119 +551,258 @@ implementation
      verbose,
      cutils;

+
 {******************************************************************************
-                             tsuperregisterworklist
+                          tsuperregisterworkhashlist
 ******************************************************************************}

-    constructor tsuperregisterworklist.init;
+    const
+      SuperRegisterWorkHashList_EmptyH: int32 = -1;

+
+    constructor Tsuperregisterworkhashlist.init;
    begin
-      length:=0;
-      buflength:=0;
-      buflengthinc:=16;
-      buf:=nil;
-    end;
-
-    constructor Tsuperregisterworklist.copyfrom(const x:Tsuperregisterworklist);
-
-    begin
-      // self.x cannot be used, we'd copy over the dyn array
-      buflength:=x.buflength;
-      buflengthinc:=x.buflengthinc;
-      length:=x.length;
-      if x.buf<>nil then
-        begin
-          setlength(buf,buflength);
-          move(x.buf[0],buf[0],length*sizeof(Tsuperregister));
-        end;
-    end;
-
-    destructor tsuperregisterworklist.done;
-
-    begin
-      buf:=nil;
+      h:=@SuperRegisterWorkHashList_EmptyH;
    end;


-    procedure tsuperregisterworklist.add(s:tsuperregister);
-
-    begin
-      inc(length);
-      { Need to increase buffer length? }
-      if length>=buflength then
-        begin
-          inc(buflength,buflengthinc);
-          buflengthinc:=buflengthinc*2;
-          if buflengthinc>256 then
-             buflengthinc:=256;
-          setlength(buf,buflength);
-        end;
-      buf[length-1]:=s;
-    end;
-
-
-    function tsuperregisterworklist.addnodup(s:tsuperregister): boolean;
-
-    begin
-      addnodup := false;
-      if (length=0) or (indexword(buf[0],length,s) = -1) then
-        begin
-          add(s);
-          addnodup := true;
-        end;
-    end;
-
-
-    procedure tsuperregisterworklist.clear;
-
-    begin
-      length:=0;
-    end;
-
-
-    procedure tsuperregisterworklist.deleteidx(i:word);
-
-    begin
-      if i>=length then
-        internalerror(200310144);
-      buf[i]:=buf[length-1];
-      dec(length);
-    end;
-
-
-    function tsuperregisterworklist.readidx(i:word):tsuperregister;
-      begin
-        if (i >= length) then
-          internalerror(2005010601);
-        result := buf[i];
-      end;
-
-
-    function tsuperregisterworklist.get:tsuperregister;
-
-    begin
-      if length=0 then
-        internalerror(200310142);
-      dec(length);
-      get:=buf[length];
-    end;
-
-
-    function tsuperregisterworklist.delete(s:tsuperregister):boolean;
-
+    constructor Tsuperregisterworkhashlist.copyfrom(const x:Tsuperregisterworkhashlist);
    var
-      i:longint;
-
+      datasize : SizeUint;
    begin
-      delete:=false;
-      if (system.length(buf)=0) then
-        exit;
-      { indexword in 1.0.x and 1.9.4 is broken }
-      i:=indexword(buf[0],length,s);
-      if i<>-1 then
+      if x.FNItems=0 then
+        h:=@SuperRegisterWorkHashList_EmptyH
+      else
        begin
-          deleteidx(i);
-          delete := true;
+          datasize:=allocateregion(1+x.hmask,x.maxItems,FItems,h,next);
+          Move(x.FItems^,FItems^,datasize);
+          FNItems:=x.FNItems;
+          hmask:=x.hmask;
+          minItems:=x.minItems;
+          maxItems:=x.maxItems;
+        end;
+    end;
+
+
+    destructor Tsuperregisterworkhashlist.done;
+    begin
+      FreeMem(FItems);
+      FItems:=nil;
+    end;
+
+
+    procedure Tsuperregisterworkhashlist.clear;
+    begin
+      if not Assigned(FItems) then
+        exit;
+      FreeMem(FItems);
+      FItems:=nil;
+      h:=@SuperRegisterWorkHashList_EmptyH;
+      FNItems:=0;
+      hmask:=0;
+      minItems:=0;
+      maxItems:=0;
+    end;
+
+
+    procedure Tsuperregisterworkhashlist.add(s:tsuperregister);
+    var
+      ii : SizeInt;
+      hp : PInt32;
+    begin
+      ii:=FNItems;
+      if uint32(ii)=maxItems then
+        rehash(ii+1);
+      FItems[ii]:=s;
+      hp:=h+s and hmask;
+      next[ii]:=hp^;
+      hp^:=ii;
+      FNItems:=ii+1;
+    end;
+
+
+    function Tsuperregisterworkhashlist.addnodup(s:tsuperregister):boolean;
+    var
+      ii : SizeInt;
+    begin
+      ii:=h[s and hmask];
+      while ii>=0 do
+        begin
+          if FItems[ii]=s then
+            exit(false);
+          ii:=next[ii];
+        end;
+      add(s);
+      result:=true;
+    end;
+
+
+    function Tsuperregisterworkhashlist.get:tsuperregister;
+    var
+      ii : SizeInt;
+    begin
+      ii:=length-1;
+      if ii<0 then
+        internalerror(202205030);
+      result:=FItems[ii];
+      deleteidx(ii);
+    end;
+
+
+    procedure Tsuperregisterworkhashlist.deleteidx(i: SizeInt);
+    var
+      ii,ilast : SizeInt;
+      nextp : PInt32;
+    begin
+      if (i<0) or (i>=length) then
+        internalerror(202205031);
+      { Remove #i reference from h/next. }
+      nextp:=h+FItems[i] and hmask;
+      repeat
+        ii:=nextp^;
+        if ii=i then
+          break;
+        nextp:=next+ii;
+      until false;
+      nextp^:=next[ii];
+
+      { Move item #length-1 = #ilast to #i and fix up its reference in h/next. }
+      ilast:=length-1;
+      if i<>ilast then
+        begin
+          nextp:=h+FItems[ilast] and hmask;
+          repeat
+            ii:=nextp^;
+            if ii=ilast then
+              break;
+            nextp:=next+ii;
+          until false;
+          nextp^:=i;
+          FItems[i]:=FItems[ilast];
+          next[i]:=next[ilast];
+        end;
+
+      FNItems:=ilast;
+      if ilast<SizeInt(minItems) then
+        rehash(ilast);
+    end;
+
+
+    function Tsuperregisterworkhashlist.delete(s: tsuperregister): boolean;
+    var
+      ii : SizeInt;
+    begin
+      ii:=h[s and hmask];
+      while ii>=0 do
+        begin
+          if FItems[ii]=s then
+            begin
+              deleteidx(ii);
+              exit(true);
+            end;
+          ii:=next[ii];
+        end;
+      result:=false;
+    end;
+
+
+    procedure Tsuperregisterworkhashlist.sort(less:Tsuperregistercomparefunc; param:pointer);
+    var
+      p,ih,i:SizeUint;
+      t:Tsuperregister;
+    begin
+      if length<2 then
+        exit;
+      p:=SizeUint(1) shl BsrDWord(length-1);
+      repeat
+        for ih:=p to length-1 do
+          begin
+            i:=ih;
+            t:=buf[i];
+            repeat
+              if not less(t,buf[i-p],param) then
+                break;
+              buf[i]:=buf[i-p];
+              dec(i,p)
+            until i<p;
+            buf[i]:=t;
+          end;
+        p:=p shr 1;
+      until p=0;
+      rebuildh(FItems,h,next,hmask,FNItems);
+    end;
+
+
+    procedure Tsuperregisterworkhashlist.rehash(forItems: SizeUint);
+    var
+      newHMask,newMinItems,newMaxItems : int32;
+      newItems : Psuperregister;
+      newH,newNext : PInt32;
+    begin
+      if forItems=0 then
+        begin
+          clear;
+          exit;
+        end;
+      newMaxItems:=4+forItems+forItems div 2;
+      newHMask:=1 shl (1+BsrDWord(newMaxItems div 8 or 1))-1; { UpToPow2(newMaxItems div 8)-1. Load factor = newMaxItems/newHMask = 400% to 800%. }
+      { Well, the whole hash thing is only to prevent the slowdown in extreme cases like webtbs/tw2242, saner loads aren't even desirable:
+        for example, load factors 50..100% increase max Tsuperregisterworkhashlist data size in webtbs/tw2242 to 1 Mb up from 500 Kb without any speedup. }
+      newMinItems:=SizeUint(forItems) div 8*4;
+      if newMinItems=0 then
+        newMinItems:=1; { force rehash(0) on emptying. Not necessarily a good idea... }
+
+      allocateregion(1+newHMask,newMaxItems,newItems,newH,newNext);
+
+      Move(FItems^,newItems^,FNItems*sizeof(Tsuperregister));
+      if hmask=newHMask then
+        begin
+          { Shortcut re-adding items if hash mask hasn't changed. }
+          Move(h^,newH^,(1+newHMask)*sizeof(h^));
+          Move(next^,newNext^,FNItems*sizeof(next^));
+        end
+      else
+        rebuildh(newItems,newH,newNext,newHMask,FNItems);
+
+      if Assigned(FItems) then
+        FreeMem(FItems);
+      FItems:=newItems;
+      h:=newH;
+      next:=newNext;
+      hmask:=newHMask;
+      minItems:=newMinItems;
+      maxItems:=newMaxItems;
+    end;
+
+
+    class function Tsuperregisterworkhashlist.allocateregion(
+      nh,amaxitems:uint32; out aitems:Psuperregister; out ah:PInt32; out anext:PInt32):SizeUint;
+    var
+      hOffset,nextOffset,ofs: SizeUint;
+    begin
+      ofs:=align(sizeof(Tsuperregister)*amaxitems,sizeof(int32)); { items + align to h }
+      hOffset:=ofs;
+      ofs:=align(ofs+nh*sizeof(int32),sizeof(int32)); { + h + align to next }
+      nextOffset:=ofs;
+      result:=ofs+amaxitems*sizeof(int32); { + next }
+      aitems:=GetMem(result);
+      ah:=pointer(aitems)+hOffset;
+      anext:=pointer(aitems)+nextOffset;
+    end;
+
+
+    class procedure Tsuperregisterworkhashlist.rebuildh(aitems:Psuperregister; ah,anext:PInt32; ahmask,anitems:int32);
+    var
+      ii : SizeInt;
+      hp : PInt32;
+    begin
+      FillDWord(ah^,1+ahmask,dword(-1));
+      FillDWord(anext^,anitems,dword(-1));
+      for ii:=0 to anitems-1 do
+        begin
+          hp:=ah+aitems[ii] and ahmask;
+          anext[ii]:=hp^;
+          hp^:=ii;
        end;
    end;

--- a/compiler/ncgutil.pas
+++ b/compiler/ncgutil.pas
@ -41,7 +41,7 @@ interface

      pusedregvars = ^tusedregvars;
      tusedregvars = record
-        intregvars, addrregvars, fpuregvars, mmregvars: Tsuperregisterworklist;
+        intregvars, addrregvars, fpuregvars, mmregvars: Tsuperregisterworkhashlist;
      end;

 {
@ -1203,16 +1203,16 @@ implementation

    procedure gen_sync_regvars(list:TAsmList; var rv: tusedregvars);
      var
-        count: longint;
+        i: longint;
      begin
-        for count := 1 to rv.intregvars.length do
-          cg.a_reg_sync(list,newreg(R_INTREGISTER,rv.intregvars.readidx(count-1),R_SUBWHOLE));
-        for count := 1 to rv.addrregvars.length do
-          cg.a_reg_sync(list,newreg(R_ADDRESSREGISTER,rv.addrregvars.readidx(count-1),R_SUBWHOLE));
-        for count := 1 to rv.fpuregvars.length do
-          cg.a_reg_sync(list,newreg(R_FPUREGISTER,rv.fpuregvars.readidx(count-1),R_SUBWHOLE));
-        for count := 1 to rv.mmregvars.length do
-          cg.a_reg_sync(list,newreg(R_MMREGISTER,rv.mmregvars.readidx(count-1),R_SUBWHOLE));
+        for i := 0 to rv.intregvars.length-1 do
+          cg.a_reg_sync(list,newreg(R_INTREGISTER,rv.intregvars.buf[i],R_SUBWHOLE));
+        for i := 0 to rv.addrregvars.length-1 do
+          cg.a_reg_sync(list,newreg(R_ADDRESSREGISTER,rv.addrregvars.buf[i],R_SUBWHOLE));
+        for i := 0 to rv.fpuregvars.length-1 do
+          cg.a_reg_sync(list,newreg(R_FPUREGISTER,rv.fpuregvars.buf[i],R_SUBWHOLE));
+        for i := 0 to rv.mmregvars.length-1 do
+          cg.a_reg_sync(list,newreg(R_MMREGISTER,rv.mmregvars.buf[i],R_SUBWHOLE));
      end;


--- a/compiler/rgobj.pas
+++ b/compiler/rgobj.pas
@ -112,7 +112,7 @@ unit rgobj;
        { The register allocator assigns each register a colour }
        colour   : Tsuperregister;
        movehlist : Tmovehashlist;
-        adjlist  : Psuperregisterworklist;
+        adjlist  : Tsuperregisterworkhashlist;
        degree   : TSuperregister;
        flags    : Treginfoflagset;
        weight   : longint;
@ -207,8 +207,8 @@ unit rgobj;
        regtype           : Tregistertype;
        { default subregister used }
        defaultsub        : tsubregister;
-        live_registers:Tsuperregisterworklist;
-        spillednodes: tsuperregisterworklist;
+        live_registers    : Tsuperregisterworkhashlist;
+        spillednodes      : tsuperregisterworkhashlist;

        { can be overridden to add cpu specific interferences }
        procedure add_cpu_interferences(p : tai);virtual;
@ -248,7 +248,7 @@ unit rgobj;
        freezeworklist,
        spillworklist,
        coalescednodes,
-        selectstack       : tsuperregisterworklist;
+        selectstack       : tsuperregisterworkhashlist;
        worklist_moves,
        active_moves,
        frozen_moves,
@ -275,8 +275,6 @@ unit rgobj;
        procedure colour_registers;
        procedure insert_regalloc_info(list:TAsmList;u:tsuperregister);
        procedure generate_interference_graph(list:TAsmList;headertai:tai);
-        { sort spilled nodes by increasing number of interferences }
-        procedure sort_spillednodes;
        { translates the registers in the given assembler list }
        procedure translate_registers(list:TAsmList);
        function  spill_registers(list:TAsmList;headertai:tai):boolean;virtual;
@ -285,7 +283,6 @@ unit rgobj;
        procedure add_to_movelist(u:Tsuperregister;ins:Tmoveins);
        function move_related(n:Tsuperregister):boolean;
        procedure make_work_list;
-        procedure sort_simplify_worklist;
        procedure enable_moves(n:Tsuperregister);
        procedure decrement_degree(m:Tsuperregister);
        procedure simplify;
@ -535,6 +532,7 @@ unit rgobj;
             reginfo[i].movehlist.init;
             reginfo[i].degree:=high(tsuperregister);
             reginfo[i].alias:=RS_INVALID;
+             reginfo[i].adjlist.init;
           end;
         { Usable registers }
         // default value set by constructor
@ -576,17 +574,15 @@ unit rgobj;

    procedure Trgobj.dispose_reginfo;
      var
-        i : cardinal;
+        i : sizeint;
      begin
        if reginfo<>nil then
          begin
            for i:=0 to maxreg-1 do
-              with reginfo[i] do
-                begin
-                  if adjlist<>nil then
-                    dispose(adjlist,done);
-                  movehlist.Done;
-                end;
+              begin
+                reginfo[i].adjlist.Done;
+                reginfo[i].movehlist.Done;
+              end;
            reginfo:=nil;
          end;
      end;
@ -615,7 +611,10 @@ unit rgobj;
              end;
            SetLength(reginfo,maxreginfo);
            for i:=oldmaxreginfo to maxreginfo-1 do
-              reginfo[i].movehlist.init;
+              begin
+                reginfo[i].movehlist.init;
+                reginfo[i].adjlist.init;
+              end;
          end;
        reginfo[result].subreg:=subreg;
      end;
@ -789,9 +788,7 @@ unit rgobj;
 {$endif}
        with reginfo[u] do
          begin
-            if adjlist=nil then
-              new(adjlist,init);
-            adjlist^.add(v);
+            adjlist.add(v);
            if (v<first_imaginary) and
               (v in usable_register_set) then
              inc(real_reg_interferences);
@ -814,13 +811,12 @@ unit rgobj;

    procedure trgobj.add_edges_used(u:Tsuperregister);

-    var i:cardinal;
+    var i:sizeint;

    begin
      with live_registers do
-        if length>0 then
-          for i:=0 to length-1 do
-            add_edge(u,get_alias(buf[i]));
+        for i:=0 to length-1 do
+          add_edge(u,get_alias(buf[i]));
    end;

 {$ifdef EXTDEBUG}
@ -1040,95 +1036,11 @@ unit rgobj;
          end;
    end;

-    procedure Trgobj.sort_simplify_worklist;
-
-    {Sorts the simplifyworklist by the number of interferences the
-     registers in it cause. This allows simplify to execute in
-     constant time.
-
-     Sort the list in the descending order, since items of simplifyworklist
-     are retrieved from end to start and then items are added to selectstack.
-     The selectstack list is also processed from end to start.
-
-     Such way nodes with most interferences will get their colors first.
-     Since degree of nodes in simplifyworklist before sorting is always
-     less than the number of usable registers this should not trigger spilling
-     and should lead to a better register allocation in some cases.
-    }
-
-    var p,h,i,leni,lent:longword;
-        t:Tsuperregister;
-        adji,adjt:Psuperregisterworklist;
-
-    begin
-      with simplifyworklist do
-        begin
-          if length<2 then
-            exit;
-          p:=longword(1) shl BsrDWord(length-1);
-          repeat
-            for h:=p to length-1 do
-              begin
-                i:=h;
-                t:=buf[i];
-                adjt:=reginfo[buf[i]].adjlist;
-                lent:=0;
-                if adjt<>nil then
-                  lent:=adjt^.length;
-                repeat
-                  adji:=reginfo[buf[i-p]].adjlist;
-                  leni:=0;
-                  if adji<>nil then
-                    leni:=adji^.length;
-                  if leni>=lent then
-                    break;
-                  buf[i]:=buf[i-p];
-                  dec(i,p)
-                until i<p;
-                buf[i]:=t;
-              end;
-            p:=p shr 1;
-          until p=0;
-        end;
-    end;
-
-
-    { sort spilled nodes by increasing number of interferences }
-    procedure Trgobj.sort_spillednodes;
+    function simplifyworklist_come_before(a,b:Tsuperregister;param:pointer):boolean;
      var
-        p,h,i,leni,lent:longword;
-        t:Tsuperregister;
-        adji,adjt:Psuperregisterworklist;
+        rg:TReginfoArray absolute param;
      begin
-        with spillednodes do
-          begin
-            if length<2 then
-              exit;
-            p:=longword(1) shl BsrDWord(length-1);
-            repeat
-              for h:=p to length-1 do
-                begin
-                  i:=h;
-                  t:=buf[i];
-                  adjt:=reginfo[buf[i]].adjlist;
-                  lent:=0;
-                  if adjt<>nil then
-                    lent:=adjt^.length;
-                  repeat
-                    adji:=reginfo[buf[i-p]].adjlist;
-                    leni:=0;
-                    if adji<>nil then
-                      leni:=adji^.length;
-                    if leni<=lent then
-                      break;
-                    buf[i]:=buf[i-p];
-                    dec(i,p)
-                  until i<p;
-                  buf[i]:=t;
-                end;
-              p:=p shr 1;
-            until p=0;
-          end;
+        result:=rg[a].adjlist.length>rg[b].adjlist.length;
      end;


@ -1142,10 +1054,7 @@ unit rgobj;
      for n:=first_imaginary to maxreg-1 do
        with reginfo[n] do
          begin
-            if adjlist=nil then
-              degree:=0
-            else
-              degree:=adjlist^.length;
+            degree:=adjlist.length;
            if degree>=usable_registers_cnt then
              spillworklist.add(n)
            else if move_related(n) then
@ -1153,7 +1062,21 @@ unit rgobj;
            else if not(ri_coalesced in flags) then
              simplifyworklist.add(n);
          end;
-      sort_simplify_worklist;
+
+      {Sort the simplifyworklist by the number of interferences the
+       registers in it cause. This allows simplify to execute in
+       constant time.
+
+       Sort the list in the descending order, since items of simplifyworklist
+       are retrieved from end to start and then items are added to selectstack.
+       The selectstack list is also processed from end to start.
+
+       Such way nodes with most interferences will get their colors first.
+       Since degree of nodes in simplifyworklist before sorting is always
+       less than the number of usable registers this should not trigger spilling
+       and should lead to a better register allocation in some cases.
+      }
+      simplifyworklist.sort(@simplifyworklist_come_before,pointer(reginfo));
    end;


@ -1190,9 +1113,9 @@ unit rgobj;

    procedure Trgobj.decrement_degree(m:Tsuperregister);

-    var adj : Psuperregisterworklist;
-        n : tsuperregister;
-        d,i : cardinal;
+    var n : tsuperregister;
+        d : cardinal;
+        i : sizeint;

    begin
      with reginfo[m] do
@ -1206,14 +1129,12 @@ unit rgobj;
              {Enable moves for m.}
              enable_moves(m);
              {Enable moves for adjacent.}
-              adj:=adjlist;
-              if adj<>nil then
-                for i:=1 to adj^.length do
-                  begin
-                    n:=adj^.buf[i-1];
-                    if reginfo[n].flags*[ri_selected,ri_coalesced]<>[] then
-                      enable_moves(n);
-                  end;
+              for i:=0 to adjlist.length-1 do
+                begin
+                  n:=adjlist.buf[i];
+                  if reginfo[n].flags*[ri_selected,ri_coalesced]<>[] then
+                    enable_moves(n);
+                end;
              {Remove the node from the spillworklist.}
              if not spillworklist.delete(m) then
                internalerror(200310145);
@ -1228,9 +1149,9 @@ unit rgobj;

    procedure trgobj.simplify;

-    var adj : Psuperregisterworklist;
+    var adj : Psuperregisterworkhashlist;
        m,n : Tsuperregister;
-        i : cardinal;
+        i : sizeint;
    begin
      {We take the element with the least interferences out of the
       simplifyworklist. Since the simplifyworklist is now sorted, we
@ -1242,16 +1163,15 @@ unit rgobj;
      with reginfo[m] do
        begin
          include(flags,ri_selected);
-          adj:=adjlist;
+          adj:=@adjlist;
+        end;
+      for i:=0 to adj^.length-1 do
+        begin
+          n:=adj^.buf[i];
+          if (n>=first_imaginary) and
+             (reginfo[n].flags*[ri_selected,ri_coalesced]=[]) then
+            decrement_degree(n);
        end;
-      if adj<>nil then
-        for i:=1 to adj^.length do
-          begin
-            n:=adj^.buf[i-1];
-            if (n>=first_imaginary) and
-               (reginfo[n].flags*[ri_selected,ri_coalesced]=[]) then
-              decrement_degree(n);
-          end;
    end;

    function trgobj.get_alias(n:Tsuperregister):Tsuperregister;
@ -1289,33 +1209,31 @@ unit rgobj;
            ibitmap[r,t];
      end;

-    var adj : Psuperregisterworklist;
-        i : cardinal;
+    var i : sizeint;
        n : tsuperregister;

    begin
      with reginfo[v] do
        begin
          adjacent_ok:=true;
-          adj:=adjlist;
-          if adj<>nil then
-            for i:=1 to adj^.length do
-              begin
-                n:=adj^.buf[i-1];
-                if (reginfo[n].flags*[ri_coalesced]=[]) and not ok(n,u) then
-                  begin
-                    adjacent_ok:=false;
-                    break;
-                  end;
-              end;
+          for i:=0 to adjlist.length-1 do
+            begin
+              n:=adjlist.buf[i];
+              if (reginfo[n].flags*[ri_coalesced]=[]) and not ok(n,u) then
+                begin
+                  adjacent_ok:=false;
+                  break;
+                end;
+            end;
        end;
    end;

    function trgobj.conservative(u,v:Tsuperregister):boolean;

-    var adj : Psuperregisterworklist;
+    var adj : Psuperregisterworkhashlist;
        done : Tsuperregisterset; {To prevent that we count nodes twice.}
-        i,k:cardinal;
+        i : sizeint;
+        k : cardinal;
        n : tsuperregister;

    begin
@ -1323,39 +1241,36 @@ unit rgobj;
      supregset_reset(done,false,maxreg);
      with reginfo[u] do
        begin
-          adj:=adjlist;
-          if adj<>nil then
-            for i:=1 to adj^.length do
-              begin
-                n:=adj^.buf[i-1];
-                if reginfo[n].flags*[ri_coalesced,ri_selected]=[] then
-                  begin
-                    supregset_include(done,n);
-                    if reginfo[n].degree>=usable_registers_cnt then
-                      inc(k);
-                  end;
-              end;
+          for i:=0 to adjlist.length-1 do
+            begin
+              n:=adjlist.buf[i];
+              if reginfo[n].flags*[ri_coalesced,ri_selected]=[] then
+                begin
+                  supregset_include(done,n);
+                  if reginfo[n].degree>=usable_registers_cnt then
+                    inc(k);
+                end;
+            end;
+        end;
+      adj:=@reginfo[v].adjlist;
+      for i:=0 to adj^.length-1 do
+        begin
+          n:=adj^.buf[i];
+          if (u<first_imaginary) and
+             (n>=first_imaginary) and
+             not ibitmap[u,n] and
+             (usable_registers_cnt-reginfo[n].real_reg_interferences<=1) then
+            begin
+              { Do not coalesce if 'u' is the last usable real register available
+                for imaginary register 'n'. }
+              conservative:=false;
+              exit;
+            end;
+          if not supregset_in(done,n) and
+             (reginfo[n].degree>=usable_registers_cnt) and
+             (reginfo[n].flags*[ri_coalesced,ri_selected]=[]) then
+            inc(k);
        end;
-      adj:=reginfo[v].adjlist;
-      if adj<>nil then
-        for i:=1 to adj^.length do
-          begin
-            n:=adj^.buf[i-1];
-            if (u<first_imaginary) and
-               (n>=first_imaginary) and
-               not ibitmap[u,n] and
-               (usable_registers_cnt-reginfo[n].real_reg_interferences<=1) then
-              begin
-                { Do not coalesce if 'u' is the last usable real register available
-                  for imaginary register 'n'. }
-                conservative:=false;
-                exit;
-              end;
-            if not supregset_in(done,n) and
-               (reginfo[n].degree>=usable_registers_cnt) and
-               (reginfo[n].flags*[ri_coalesced,ri_selected]=[]) then
-              inc(k);
-          end;
      conservative:=(k<usable_registers_cnt);
    end;

@ -1381,12 +1296,9 @@ unit rgobj;

    procedure trgobj.combine(u,v:Tsuperregister);

-    var adj : Psuperregisterworklist;
-        i,p,q:cardinal;
-        n:sizeint;
+    var adj : Psuperregisterworkhashlist;
+        i,n : sizeint;
        t : tsuperregister;
-        searched:Tmoveins;
-        found : boolean;
        vmovehlist : Pmovehashlist;

    begin
@ -1410,41 +1322,40 @@ unit rgobj;

      enable_moves(v);

-      adj:=reginfo[v].adjlist;
-      if adj<>nil then
-        for i:=1 to adj^.length do
-          begin
-            t:=adj^.buf[i-1];
-            with reginfo[t] do
-              if not(ri_coalesced in flags) then
-                begin
-                  {t has a connection to v. Since we are adding v to u, we
-                   need to connect t to u. However, beware if t was already
-                   connected to u...}
-                  if (ibitmap[t,u]) and not (ri_selected in flags) then
-                    begin
-                      {... because in that case, we are actually removing an edge
-                       and the degree of t decreases.}
-                      decrement_degree(t);
-                      { if v is combined with a real register, retry
-                        coalescing of interfering nodes since it may succeed now. }
-                      if (u<first_imaginary) and
-                         (adj^.length>=usable_registers_cnt) and
-                         (reginfo[t].degree>usable_registers_cnt) then
-                        enable_moves(t);
-                    end
-                  else
-                    begin
-                      add_edge(t,u);
-                      {We have added an edge to t and u. So their degree increases.
-                       However, v is added to u. That means its neighbours will
-                       no longer point to v, but to u instead. Therefore, only the
-                       degree of u increases.}
-                      if (u>=first_imaginary) and not (ri_selected in flags) then
-                        inc(reginfo[u].degree);
-                    end;
-                end;
-          end;
+      adj:=@reginfo[v].adjlist;
+      for i:=0 to adj^.length-1 do
+        begin
+          t:=adj^.buf[i];
+          with reginfo[t] do
+            if not(ri_coalesced in flags) then
+              begin
+                {t has a connection to v. Since we are adding v to u, we
+                 need to connect t to u. However, beware if t was already
+                 connected to u...}
+                if (ibitmap[t,u]) and not (ri_selected in flags) then
+                  begin
+                    {... because in that case, we are actually removing an edge
+                     and the degree of t decreases.}
+                    decrement_degree(t);
+                    { if v is combined with a real register, retry
+                      coalescing of interfering nodes since it may succeed now. }
+                    if (u<first_imaginary) and
+                       (adj^.length>=usable_registers_cnt) and
+                       (reginfo[t].degree>usable_registers_cnt) then
+                      enable_moves(t);
+                  end
+                else
+                  begin
+                    add_edge(t,u);
+                    {We have added an edge to t and u. So their degree increases.
+                     However, v is added to u. That means its neighbours will
+                     no longer point to v, but to u instead. Therefore, only the
+                     degree of u increases.}
+                    if (u>=first_imaginary) and not (ri_selected in flags) then
+                      inc(reginfo[u].degree);
+                  end;
+              end;
+        end;
      if (reginfo[u].degree>=usable_registers_cnt) and freezeworklist.delete(u) then
        spillworklist.add(u);
    end;
@ -1571,7 +1482,7 @@ unit rgobj;
    procedure trgobj.select_spill;
    var
      n : tsuperregister;
-      adj : psuperregisterworklist;
+      adj : Psuperregisterworkhashlist;
      maxlength,minlength,p,i :word;
      minweight: longint;
      {$ifdef SPILLING_NEW}
@ -1610,18 +1521,18 @@ unit rgobj;
          { Safe: This procedure is only called if length<>0 }
          for i:=0 to length-1 do
            begin
-              adj:=reginfo[buf^[i]].adjlist;
-              dist:=adj^.length-reginfo[buf^[i]].total_interferences/reginfo[buf^[i]].count_uses;
-              if assigned(adj) and
-                (reginfo[buf^[i]].weight<minweight) and
+              adj:=@reginfo[buf[i]].adjlist;
+              dist:=adj^.length-reginfo[buf[i]].total_interferences/reginfo[buf[i]].count_uses;
+              if (adj^.length<>0) and
+                (reginfo[buf[i]].weight<minweight) and
                (dist>=1) and
-                (reginfo[buf^[i]].weight>0) then
+                (reginfo[buf[i]].weight>0) then
                begin
                  p:=i;
-                  minweight:=reginfo[buf^[i]].weight;
+                  minweight:=reginfo[buf[i]].weight;
                end;
            end;
-          n:=buf^[p];
+          n:=buf[p];
          deleteidx(p);
        end;
 {$endif SPILLING_NEW}
@ -1648,9 +1559,8 @@ unit rgobj;
          for i:=0 to length-1 do
            if not(ri_spill_helper in reginfo[buf[i]].flags) then
              begin
-                adj:=reginfo[buf[i]].adjlist;
-                if assigned(adj) and
-                   (
+                adj:=@reginfo[buf[i]].adjlist;
+                if (
                    (adj^.length>maxlength) or
                    ((adj^.length=maxlength) and (reginfo[buf[i]].weight<minweight))
                   ) then
@ -1675,9 +1585,8 @@ unit rgobj;
              p:=0;
              for i:=0 to length-1 do
                begin
-                  adj:=reginfo[buf[i]].adjlist;
-                  if assigned(adj) and
-                     (
+                  adj:=@reginfo[buf[i]].adjlist;
+                  if (
                      (adj^.length<minlength) or
                      ((adj^.length=minlength) and (reginfo[buf[i]].weight<minweight))
                     ) then
@ -1719,8 +1628,9 @@ unit rgobj;

    function colour_register(n : Tsuperregister) : boolean;
      var
-        j,k : cardinal;
-        adj : Psuperregisterworklist;
+        j : sizeint;
+        k : cardinal;
+        adj : Psuperregisterworkhashlist;
        adj_colours:set of 0..255;
        a,c : Tsuperregister;
 {$if declared(RS_STACK_POINTER_REG) and (RS_STACK_POINTER_REG<>RS_INVALID)}
@ -1729,14 +1639,13 @@ unit rgobj;
      begin
        {Create a list of colours that we cannot assign to n.}
        adj_colours:=[];
-        adj:=reginfo[n].adjlist;
-        if adj<>nil then
-          for j:=0 to adj^.length-1 do
-            begin
-              a:=get_alias(adj^.buf[j]);
-              if supregset_in(colourednodes,a) and (reginfo[a].colour<=255) then
-                include(adj_colours,reginfo[a].colour);
-            end;
+        adj:=@reginfo[n].adjlist;
+        for j:=0 to adj^.length-1 do
+          begin
+            a:=get_alias(adj^.buf[j]);
+            if supregset_in(colourednodes,a) and (reginfo[a].colour<=255) then
+              include(adj_colours,reginfo[a].colour);
+          end;
        { e.g. AVR does not have a stack pointer register }
 {$if declared(RS_STACK_POINTER_REG) and (RS_STACK_POINTER_REG<>RS_INVALID)}
        { FIXME: temp variable r is needed here to avoid Internal error 20060521 }
@ -1764,16 +1673,17 @@ unit rgobj;
      end;

    var
-        i,k : cardinal;
+        i : sizeint;
+        k : cardinal;
        n : Tsuperregister;
        spill_loop : boolean;
    begin
      reset_colours;
      {Now colour the imaginary registers on the select-stack.}
      spill_loop:=false;
-      for i:=selectstack.length downto 1 do
+      for i:=selectstack.length-1 downto 0 do
        begin
-          n:=selectstack.buf[i-1];
+          n:=selectstack.buf[i];
          if not colour_register(n) and
            (ri_spill_helper in reginfo[n].flags) then
            begin
@ -1791,9 +1701,9 @@ unit rgobj;
            Trying to eliminte this by using a different colouring order. }
          reset_colours;
          { To prevent spilling of helper registers it is needed to assign colours to them first. }
-          for i:=selectstack.length downto 1 do
+          for i:=selectstack.length-1 downto 0 do
            begin
-              n:=selectstack.buf[i-1];
+              n:=selectstack.buf[i];
              if ri_spill_helper in reginfo[n].flags then
                if not colour_register(n) then
                  { Can't colour the spill helper register n.
@ -1802,18 +1712,18 @@ unit rgobj;
                  internalerror(2021091001);
            end;
          { Assign colours for the rest of the registers }
-          for i:=selectstack.length downto 1 do
+          for i:=selectstack.length-1 downto 0 do
            begin
-              n:=selectstack.buf[i-1];
+              n:=selectstack.buf[i];
              if not (ri_spill_helper in reginfo[n].flags) then
                colour_register(n);
            end;
        end;

      {Finally colour the nodes that were coalesced.}
-      for i:=1 to coalescednodes.length do
+      for i:=0 to coalescednodes.length-1 do
        begin
-          n:=coalescednodes.buf[i-1];
+          n:=coalescednodes.buf[i];
          k:=get_alias(n);
          reginfo[n].colour:=reginfo[k].colour;
        end;
@ -1867,36 +1777,23 @@ unit rgobj;
    {Remove node u from the interference graph and remove all collected
     move instructions it is associated with.}

-    var i : word;
+    var i : sizeint;
        v : Tsuperregister;
-        adj,adj2 : Psuperregisterworklist;
+        adj : Psuperregisterworkhashlist;

    begin
-      adj:=reginfo[u].adjlist;
-      if adj<>nil then
+      adj:=@reginfo[u].adjlist;
+      for i:=0 to adj^.length-1 do
        begin
-          for i:=1 to adj^.length do
-            begin
-              v:=adj^.buf[i-1];
-              {Remove (u,v) and (v,u) from bitmap.}
-              ibitmap[u,v]:=false;
-              ibitmap[v,u]:=false;
-              {Remove (v,u) from adjacency list.}
-              adj2:=reginfo[v].adjlist;
-              if adj2<>nil then
-                begin
-                  adj2^.delete(u);
-                  if adj2^.length=0 then
-                    begin
-                      dispose(adj2,done);
-                      reginfo[v].adjlist:=nil;
-                    end;
-                end;
-            end;
-          {Remove ( u,* ) from adjacency list.}
-          dispose(adj,done);
-          reginfo[u].adjlist:=nil;
+          v:=adj^.buf[i];
+          {Remove (u,v) and (v,u) from bitmap.}
+          ibitmap[u,v]:=false;
+          ibitmap[v,u]:=false;
+          {Remove (v,u) from adjacency list.}
+          reginfo[v].adjlist.delete(u);
        end;
+      {Remove ( u,* ) from adjacency list.}
+      adj^.clear;
    end;


@ -2139,7 +2036,7 @@ unit rgobj;
 {$ifdef DEBUG_REGISTERLIFE}
                              write(live_registers.length,'  ');
                              for i:=0 to live_registers.length-1 do
-                                write(std_regname(newreg(regtype,live_registers.buf^[i],defaultsub)),' ');
+                                write(std_regname(newreg(regtype,live_registers.buf[i],defaultsub)),' ');
                              writeln;
 {$endif DEBUG_REGISTERLIFE}
                              add_edges_used(supreg);
@ -2150,7 +2047,7 @@ unit rgobj;
 {$ifdef DEBUG_REGISTERLIFE}
                              write(live_registers.length,'  ');
                              for i:=0 to live_registers.length-1 do
-                                write(std_regname(newreg(regtype,live_registers.buf^[i],defaultsub)),' ');
+                                write(std_regname(newreg(regtype,live_registers.buf[i],defaultsub)),' ');
                              writeln;
 {$endif DEBUG_REGISTERLIFE}
                              add_edges_used(supreg);
@ -2416,6 +2313,14 @@ unit rgobj;
      end;


+    function spillednodes_come_before(a,b:Tsuperregister;param:pointer):boolean;
+      var
+        rg:TReginfoArray absolute param;
+      begin
+        result:=rg[a].adjlist.length<rg[b].adjlist.length;
+      end;
+
+
    function trgobj.spill_registers(list:TAsmList;headertai:tai):boolean;
    { Returns true if any help registers have been used }
      var
@ -2434,9 +2339,10 @@ unit rgobj;
      begin
        spill_registers:=false;
        live_registers.clear;
-        { spilling should start with the node with the highest number of interferences, so we can coalesce as
+        { sort spilled nodes by increasing number of interferences
+          spilling should start with the node with the highest number of interferences, so we can coalesce as
          much as possible spilled nodes (coalesce in case of spilled node means they share the same memory location) }
-        sort_spillednodes;
+        spillednodes.sort(@spillednodes_come_before,pointer(reginfo));
        for i:=first_imaginary to maxreg-1 do
          exclude(reginfo[i].flags,ri_selected);
        SetLength(spill_temps,maxreg);
@ -2799,7 +2705,7 @@ unit rgobj;
      var
        loadpos,
        storepos : tai;
-        oldlive_registers : tsuperregisterworklist;
+        oldlive_registers : Tsuperregisterworkhashlist;
      begin
        result := false;
        fillchar(spregs,sizeof(spregs),0);