From daa825a5a32c19fad8cf6de7a300108a4c7228e4 Mon Sep 17 00:00:00 2001 From: Rika Ichinose Date: Tue, 26 Apr 2022 02:23:14 +0300 Subject: [PATCH 1/3] Replace TMoveList with TMove >Hash< List. --- compiler/rgobj.pas | 434 +++++++++++++++++++++++---------------------- 1 file changed, 226 insertions(+), 208 deletions(-) diff --git a/compiler/rgobj.pas b/compiler/rgobj.pas index 35197c4ca2..eed73e9c00 100644 --- a/compiler/rgobj.pas +++ b/compiler/rgobj.pas @@ -81,21 +81,21 @@ unit rgobj; Tmoveins=class(Tlinkedlistitem) moveset:Tmoveset; x,y:Tsuperregister; - id:longint; + id:uint32; end; - Tmovelistheader=record - count, - maxcount, - sorted_until : cardinal; + PTmoveins=^Tmoveins; + Pmovehashlist=^Tmovehashlist; + Tmovehashlist=object + h : puint32; { region contains h + items; h[i]=0 means empty cell, h[i]=hi>0 references items[hi-1]. } + items : PTmoveins; + hmask,nitems,maxitems : uint32; + procedure init; + procedure done; + procedure add(ins:Tmoveins); { no-op if exists } + procedure rehash(foritems:sizeuint); end; - Tmovelist=record - header : Tmovelistheader; - data : array[tsuperregister] of Tmoveins; - end; - Pmovelist=^Tmovelist; - Treginfoflag=( ri_coalesced, { the register is coalesced with other register } ri_selected, { the register is put to selectstack } @@ -111,7 +111,7 @@ unit rgobj; alias : Tsuperregister; { The register allocator assigns each register a colour } colour : Tsuperregister; - movelist : Pmovelist; + movehlist : Tmovehashlist; adjlist : Psuperregisterworklist; degree : TSuperregister; flags : Treginfoflagset; @@ -152,6 +152,7 @@ unit rgobj; spilled : boolean; interferences : Tinterferencebitmap; end; + pspillinfo = ^tspillinfo; {#------------------------------------------------------------------ @@ -262,7 +263,7 @@ unit rgobj; has_usedmarks: boolean; has_directalloc: boolean; spillinfo : array of tspillinfo; - moveins_id_counter: longint; + moveins_id_counter: uint32; { Disposes of the reginfo array.} procedure dispose_reginfo; @@ -334,36 +335,6 @@ unit rgobj; globals, verbose,tgobj,procinfo,cgobj; - procedure sort_movelist(ml:Pmovelist); - - var h,i,p:longword; - t:Tmoveins; - - begin - with ml^ do - begin - if header.count<2 then - exit; - p:=longword(1) shl BsrDWord(header.count-1); - repeat - for h:=p to header.count-1 do - begin - i:=h; - t:=data[i]; - repeat - if data[i-p].id<=t.id then - break; - data[i]:=data[i-p]; - dec(i,p); - until imaxitems then + begin + h[ih]:=1+nitems; + items[nitems]:=ins; + nitems:=nitems+1; + exit; + end + else + begin + rehash(nitems+1); + add(ins); + exit; + end + else + if items[ii-1]=ins then + exit; +{$push} {$q-,r-} + ih:=(ih+hashRest+1) and hmask; + hashRest:=hashRest shr 5; +{$pop} + until false; + end; + + procedure Tmovehashlist.rehash(foritems:sizeuint); + var newh : puint32; + newitems : PTmoveins; + item : Tmoveins; + newhmask,newmaxitems,itemsOffset,iitem,ih : sizeuint; + hashRest : uint32; + begin + if foritems shr (bitsizeof(h^)-3)<>0 then + internalerror(202204251); { too big table } + newmaxitems:=4+foritems+foritems div 2; + newhmask:=sizeuint(1) shl (1+BsrDWord(newmaxitems-1))-1; { UpToPow2(newmaxitems)-1 } + if newmaxitems>newhmask div 2+newhmask div 16 then { 1/2+1/16 = 56.25% = max allowed load factor } + newhmask:=2*newhmask+1; + + itemsOffset:=Align(sizeof(h^)*(newhmask+1), {alignof(items^)} sizeof(pointer)); + newh:=GetMem(itemsOffset+newmaxitems*sizeof(items^)); + newitems:=pointer(newh)+itemsOffset; + + Move(items^,newitems^,sizeof(items^)*nitems); + + { Add newitems to newh. Shortcut by copying previous h if hmask hasn't changed. } + if hmask=newhmask then + Move(h^,newh^,sizeof(h^)*(newhmask+1)) + else + begin + FillChar(newh^,sizeof(h^)*(newhmask+1),0); + iitem:=0; + while iitemnil then dispose(adjlist,done); - if movelist<>nil then - dispose(movelist); + movehlist.Done; end; reginfo:=nil; end; @@ -523,6 +595,7 @@ unit rgobj; function trgobj.getnewreg(subreg:tsubregister):tsuperregister; var oldmaxreginfo : tsuperregister; + i : sizeint; begin result:=maxreg; inc(maxreg); @@ -541,6 +614,8 @@ unit rgobj; maxreginfoinc:=maxreginfoinc*2; end; SetLength(reginfo,maxreginfo); + for i:=oldmaxreginfo to maxreginfo-1 do + reginfo[i].movehlist.init; end; reginfo[result].subreg:=subreg; end; @@ -797,28 +872,7 @@ unit rgobj; if (u>=maxreginfo) then internalerror(2012101902); {$endif} - with reginfo[u] do - begin - if movelist=nil then - begin - { don't use sizeof(tmovelistheader), because that ignores alignment } - getmem(movelist,ptruint(@movelist^.data)-ptruint(movelist)+16*sizeof(pointer)); - movelist^.header.maxcount:=16; - movelist^.header.count:=0; - movelist^.header.sorted_until:=0; - end - else - begin - if movelist^.header.count>=movelist^.header.maxcount then - begin - movelist^.header.maxcount:=movelist^.header.maxcount*2; - { don't use sizeof(tmovelistheader), because that ignores alignment } - reallocmem(movelist,ptruint(@movelist^.data)-ptruint(movelist)+movelist^.header.maxcount*sizeof(pointer)); - end; - end; - movelist^.data[movelist^.header.count]:=ins; - inc(movelist^.header.count); - end; + reginfo[u].movehlist.add(ins); end; @@ -949,9 +1003,11 @@ unit rgobj; { How should we handle m68k move %d0,%a0? } if (getregtype(sreg)<>getregtype(dreg)) then exit; - if moveins_id_counter=high(moveins_id_counter) then - internalerror(2021112701); +{$push} {$q-,r-} inc(moveins_id_counter); +{$pop} + if moveins_id_counter=0 then + internalerror(2021112701); i:=Tmoveins.create; i.id:=moveins_id_counter; i.moveset:=ms_worklist_moves; @@ -970,19 +1026,18 @@ unit rgobj; function trgobj.move_related(n:Tsuperregister):boolean; - var i:cardinal; + var i : sizeint; + movehlist : Pmovehashlist; begin move_related:=false; - with reginfo[n] do - if movelist<>nil then - with movelist^ do - for i:=0 to header.count-1 do - if Tmoveins(data[i]).moveset in [ms_worklist_moves,ms_active_moves] then - begin - move_related:=true; - break; - end; + movehlist:=@reginfo[n].movehlist; + for i:=0 to sizeint(movehlist^.nitems)-1 do + if movehlist^.items[i].moveset in [ms_worklist_moves,ms_active_moves] then + begin + move_related:=true; + break; + end; end; procedure Trgobj.sort_simplify_worklist; @@ -1114,23 +1169,23 @@ unit rgobj; procedure trgobj.enable_moves(n:Tsuperregister); - var m:Tlinkedlistitem; - i:cardinal; + var i : sizeint; + movehlist : Pmovehashlist; + m : Tmoveins; begin - with reginfo[n] do - if movelist<>nil then - for i:=0 to movelist^.header.count-1 do + movehlist:=@reginfo[n].movehlist; + for i:=0 to sizeint(movehlist^.nitems)-1 do + begin + m:=movehlist^.items[i]; + if m.moveset=ms_active_moves then begin - m:=movelist^.data[i]; - if Tmoveins(m).moveset=ms_active_moves then - begin - {Move m from the set active_moves to the set worklist_moves.} - active_moves.remove(m); - Tmoveins(m).moveset:=ms_worklist_moves; - worklist_moves.concat(m); - end; - end; + {Move m from the set active_moves to the set worklist_moves.} + active_moves.remove(m); + m.moveset:=ms_worklist_moves; + worklist_moves.concat(m); + end; + end; end; procedure Trgobj.decrement_degree(m:Tsuperregister); @@ -1327,10 +1382,12 @@ unit rgobj; procedure trgobj.combine(u,v:Tsuperregister); var adj : Psuperregisterworklist; - original_u_count, i,n,p,q:cardinal; + i,p,q:cardinal; + n:sizeint; t : tsuperregister; searched:Tmoveins; found : boolean; + vmovehlist : Pmovehashlist; begin if not freezeworklist.delete(v) then @@ -1340,66 +1397,16 @@ unit rgobj; reginfo[v].alias:=u; {Combine both movelists. Since the movelists are sets, only add - elements that are not already present. The movelists cannot be - empty by definition; nodes are only coalesced if there is a move - between them. To prevent quadratic time blowup (movelists of - especially machine registers can get very large because of moves - generated during calls) we need to go into disgusting complexity. + elements that are not already present, but Tmovehashlist.add + already silently refuses duplicates, so just add. - (See webtbs/tw2242 for an example that stresses this.) + The movelists cannot be empty by definition; nodes are only coalesced + if there is a move between them. - We want to sort the movelist to be able to search logarithmically. - Unfortunately, sorting the movelist every time before searching - is counter-productive, since the movelist usually grows with a few - items at a time. Therefore, we split the movelist into a sorted - and an unsorted part and search through both. If the unsorted part - becomes too large, we sort.} - if assigned(reginfo[u].movelist) then - begin - {We have to weigh the cost of sorting the list against searching - the cost of the unsorted part. I use factor of 8 here; if the - number of items is less than 8 times the numer of unsorted items, - we'll sort the list.} - with reginfo[u].movelist^ do - if header.count<8*(header.count-header.sorted_until) then - sort_movelist(reginfo[u].movelist); - if assigned(reginfo[v].movelist) then - begin - original_u_count:=reginfo[u].movelist^.header.count; - for n:=0 to reginfo[v].movelist^.header.count-1 do - begin - {Binary search the sorted part of the list.} - searched:=reginfo[v].movelist^.data[n]; - p:=0; - q:=reginfo[u].movelist^.header.sorted_until; - i:=0; - if q<>0 then - repeat - i:=(p+q) shr 1; - if searched.id>reginfo[u].movelist^.data[i].id then - p:=i+1 - else - q:=i; - until p=q; - with reginfo[u].movelist^ do - if searched<>data[i] then - begin - {Linear search the unsorted part of the list.} - found:=false; - { no need to search the instructions we've already added - from v, we know we won't find a match there } - for i:=header.sorted_until+1 to original_u_count-1 do - if searched.id=data[i].id then - begin - found:=true; - break; - end; - if not found then - add_to_movelist(u,searched); - end; - end; - end; - end; + See webtbs/tw2242 for an example that stresses this. } + vmovehlist:=@reginfo[v].movehlist; + for n:=0 to sizeint(vmovehlist^.nitems)-1 do + add_to_movelist(u,vmovehlist^.items[n]); enable_moves(v); @@ -1502,39 +1509,40 @@ unit rgobj; procedure trgobj.freeze_moves(u:Tsuperregister); - var i:cardinal; - m:Tlinkedlistitem; - v,x,y:Tsuperregister; + var i:sizeint; + m:Tmoveins; + v,xalias,yalias:Tsuperregister; + umovehlist : Pmovehashlist; begin - if reginfo[u].movelist<>nil then - for i:=0 to reginfo[u].movelist^.header.count-1 do - begin - m:=reginfo[u].movelist^.data[i]; - if Tmoveins(m).moveset in [ms_worklist_moves,ms_active_moves] then - begin - x:=Tmoveins(m).x; - y:=Tmoveins(m).y; - if get_alias(y)=get_alias(u) then - v:=get_alias(x) - else - v:=get_alias(y); - {Move m from active_moves/worklist_moves to frozen_moves.} - if Tmoveins(m).moveset=ms_active_moves then - active_moves.remove(m) - else - worklist_moves.remove(m); - Tmoveins(m).moveset:=ms_frozen_moves; - frozen_moves.insert(m); + umovehlist:=@reginfo[u].movehlist; + for i:=0 to sizeint(umovehlist^.nitems)-1 do + begin + m:=umovehlist^.items[i]; + if m.moveset in [ms_worklist_moves,ms_active_moves] then + begin + xalias:=get_alias(m.x); + yalias:=get_alias(m.y); + if yalias=get_alias(u) then + v:=xalias + else + v:=yalias; + {Move m from active_moves/worklist_moves to frozen_moves.} + if m.moveset=ms_active_moves then + active_moves.remove(m) + else + worklist_moves.remove(m); + m.moveset:=ms_frozen_moves; + frozen_moves.insert(m); - if (v>=first_imaginary) and not(move_related(v)) and - (reginfo[v].degree=first_imaginary) and not(move_related(v)) and + (reginfo[v].degreemem moves } if (cs_opt_level3 in current_settings.optimizerswitches) and - getnewspillloc and - assigned(reginfo[t].movelist) then - for j:=0 to reginfo[t].movelist^.header.count-1 do - begin - x:=Tmoveins(reginfo[t].movelist^.data[j]).x; - y:=Tmoveins(reginfo[t].movelist^.data[j]).y; - if (x=t) and - (spillinfo[get_alias(y)].spilled) and - not(spillinfo[get_alias(y)].interferences[0,t]) then - begin - spill_temps[t]:=spillinfo[get_alias(y)].spilllocation; + getnewspillloc then + begin + tmovehlist:=@reginfo[t].movehlist; + for j:=0 to sizeint(tmovehlist^.nitems)-1 do + begin + m:=tmovehlist^.items[j]; + if (m.x=t) then + begin + yspill:=@spillinfo[get_alias(m.y)]; + if (yspill^.spilled) and + not(yspill^.interferences[0,t]) then + begin + spill_temps[t]:=yspill^.spilllocation; {$ifdef DEBUG_SPILLCOALESCE} - writeln('trgobj.spill_registers: Spill coalesce ',t,' to ',y); + writeln('trgobj.spill_registers: Spill coalesce ',t,' to ',m.y); {$endif DEBUG_SPILLCOALESCE} - getnewspillloc:=false; - break; - end - else if (y=t) and - (spillinfo[get_alias(x)].spilled) and - not(spillinfo[get_alias(x)].interferences[0,t]) then - begin + getnewspillloc:=false; + break; + end; + end; + if (m.y=t) then + begin + xspill:=@spillinfo[get_alias(m.x)]; + if (xspill^.spilled) and + not(xspill^.interferences[0,t]) then + begin {$ifdef DEBUG_SPILLCOALESCE} - writeln('trgobj.spill_registers: Spill coalesce ',t,' to ',x); + writeln('trgobj.spill_registers: Spill coalesce ',t,' to ',m.x); {$endif DEBUG_SPILLCOALESCE} - spill_temps[t]:=spillinfo[get_alias(x)].spilllocation; - getnewspillloc:=false; - break; - end; + spill_temps[t]:=xspill^.spilllocation; + getnewspillloc:=false; + break; + end; + end; + end; end; if getnewspillloc then From b2e596ada23003a6528ad0178241eaa79999a138 Mon Sep 17 00:00:00 2001 From: Rika Ichinose Date: Wed, 4 May 2022 01:13:08 +0300 Subject: [PATCH 2/3] Replace TSuperRegisterWorkList with TSuperRegisterWork >Hash< List. --- compiler/cgbase.pas | 374 +++++++++++++++++++++++---------- compiler/ncgutil.pas | 20 +- compiler/rgobj.pas | 478 +++++++++++++++++-------------------------- 3 files changed, 464 insertions(+), 408 deletions(-) diff --git a/compiler/cgbase.pas b/compiler/cgbase.pas index 4df245341d..a6784d9bef 100644 --- a/compiler/cgbase.pas +++ b/compiler/cgbase.pas @@ -312,6 +312,7 @@ interface TSubRegisterSet = set of TSubRegister; TSuperRegister = type word; + PSuperRegister = ^TSuperRegister; { The new register coding: @@ -368,26 +369,36 @@ interface shuffles : array[1..1] of word; end; - Tsuperregisterarray=array of Tsuperregister; + Tsuperregistercomparefunc = function(a, b: TSuperRegister; param: pointer): boolean; - Tsuperregisterworklist=object - buflength, - buflengthinc, - length:word; - buf:tsuperregisterarray; + Tsuperregisterworkhashlist=object constructor init; - constructor copyfrom(const x:Tsuperregisterworklist); + constructor copyfrom(const x:Tsuperregisterworkhashlist); destructor done; procedure clear; procedure add(s:tsuperregister); function addnodup(s:tsuperregister): boolean; { returns the last element and removes it from the list } function get:tsuperregister; - function readidx(i:word):tsuperregister; - procedure deleteidx(i:word); + procedure deleteidx(i:SizeInt); function delete(s:tsuperregister):boolean; + procedure sort(less:Tsuperregistercomparefunc; param:pointer); + private + { Either a region is allocated for FItems+h+next, or the table is empty with hmask=0, fake 'h' pointing to a single -1, and distinctive FItems=nil. } + FItems : Psuperregister; + h : PInt32; { Count of 'h's is always hmask+1 and is always a power of two. h[i]=-1 means empty, h[i]=hi>=0 references FItems[hi]. } + next : PInt32; { Chaining to allow duplicates and resolve collisions. } + FNItems : int32; { signed to allow subtracting one without surprises... } + hmask,minItems,maxItems : uint32; + procedure rehash(forItems:SizeUint); + class function allocateregion( + nh,amaxitems:uint32; out aitems:Psuperregister; out ah:PInt32; out anext:PInt32):SizeUint; static; + class procedure rebuildh(aitems:Psuperregister; ah,anext:PInt32; ahmask,anitems:int32); static; + public + property length: int32 read FNItems; + property buf: Psuperregister read FItems; end; - psuperregisterworklist=^tsuperregisterworklist; + Psuperregisterworkhashlist = ^Tsuperregisterworkhashlist; const { alias for easier understanding } @@ -540,119 +551,258 @@ implementation verbose, cutils; + {****************************************************************************** - tsuperregisterworklist + tsuperregisterworkhashlist ******************************************************************************} - constructor tsuperregisterworklist.init; + const + SuperRegisterWorkHashList_EmptyH: int32 = -1; + + constructor Tsuperregisterworkhashlist.init; begin - length:=0; - buflength:=0; - buflengthinc:=16; - buf:=nil; - end; - - constructor Tsuperregisterworklist.copyfrom(const x:Tsuperregisterworklist); - - begin - // self.x cannot be used, we'd copy over the dyn array - buflength:=x.buflength; - buflengthinc:=x.buflengthinc; - length:=x.length; - if x.buf<>nil then - begin - setlength(buf,buflength); - move(x.buf[0],buf[0],length*sizeof(Tsuperregister)); - end; - end; - - destructor tsuperregisterworklist.done; - - begin - buf:=nil; + h:=@SuperRegisterWorkHashList_EmptyH; end; - procedure tsuperregisterworklist.add(s:tsuperregister); - - begin - inc(length); - { Need to increase buffer length? } - if length>=buflength then - begin - inc(buflength,buflengthinc); - buflengthinc:=buflengthinc*2; - if buflengthinc>256 then - buflengthinc:=256; - setlength(buf,buflength); - end; - buf[length-1]:=s; - end; - - - function tsuperregisterworklist.addnodup(s:tsuperregister): boolean; - - begin - addnodup := false; - if (length=0) or (indexword(buf[0],length,s) = -1) then - begin - add(s); - addnodup := true; - end; - end; - - - procedure tsuperregisterworklist.clear; - - begin - length:=0; - end; - - - procedure tsuperregisterworklist.deleteidx(i:word); - - begin - if i>=length then - internalerror(200310144); - buf[i]:=buf[length-1]; - dec(length); - end; - - - function tsuperregisterworklist.readidx(i:word):tsuperregister; - begin - if (i >= length) then - internalerror(2005010601); - result := buf[i]; - end; - - - function tsuperregisterworklist.get:tsuperregister; - - begin - if length=0 then - internalerror(200310142); - dec(length); - get:=buf[length]; - end; - - - function tsuperregisterworklist.delete(s:tsuperregister):boolean; - + constructor Tsuperregisterworkhashlist.copyfrom(const x:Tsuperregisterworkhashlist); var - i:longint; - + datasize : SizeUint; begin - delete:=false; - if (system.length(buf)=0) then - exit; - { indexword in 1.0.x and 1.9.4 is broken } - i:=indexword(buf[0],length,s); - if i<>-1 then + if x.FNItems=0 then + h:=@SuperRegisterWorkHashList_EmptyH + else begin - deleteidx(i); - delete := true; + datasize:=allocateregion(1+x.hmask,x.maxItems,FItems,h,next); + Move(x.FItems^,FItems^,datasize); + FNItems:=x.FNItems; + hmask:=x.hmask; + minItems:=x.minItems; + maxItems:=x.maxItems; + end; + end; + + + destructor Tsuperregisterworkhashlist.done; + begin + FreeMem(FItems); + FItems:=nil; + end; + + + procedure Tsuperregisterworkhashlist.clear; + begin + if not Assigned(FItems) then + exit; + FreeMem(FItems); + FItems:=nil; + h:=@SuperRegisterWorkHashList_EmptyH; + FNItems:=0; + hmask:=0; + minItems:=0; + maxItems:=0; + end; + + + procedure Tsuperregisterworkhashlist.add(s:tsuperregister); + var + ii : SizeInt; + hp : PInt32; + begin + ii:=FNItems; + if uint32(ii)=maxItems then + rehash(ii+1); + FItems[ii]:=s; + hp:=h+s and hmask; + next[ii]:=hp^; + hp^:=ii; + FNItems:=ii+1; + end; + + + function Tsuperregisterworkhashlist.addnodup(s:tsuperregister):boolean; + var + ii : SizeInt; + begin + ii:=h[s and hmask]; + while ii>=0 do + begin + if FItems[ii]=s then + exit(false); + ii:=next[ii]; + end; + add(s); + result:=true; + end; + + + function Tsuperregisterworkhashlist.get:tsuperregister; + var + ii : SizeInt; + begin + ii:=length-1; + if ii<0 then + internalerror(202205030); + result:=FItems[ii]; + deleteidx(ii); + end; + + + procedure Tsuperregisterworkhashlist.deleteidx(i: SizeInt); + var + ii,ilast : SizeInt; + nextp : PInt32; + begin + if (i<0) or (i>=length) then + internalerror(202205031); + { Remove #i reference from h/next. } + nextp:=h+FItems[i] and hmask; + repeat + ii:=nextp^; + if ii=i then + break; + nextp:=next+ii; + until false; + nextp^:=next[ii]; + + { Move item #length-1 = #ilast to #i and fix up its reference in h/next. } + ilast:=length-1; + if i<>ilast then + begin + nextp:=h+FItems[ilast] and hmask; + repeat + ii:=nextp^; + if ii=ilast then + break; + nextp:=next+ii; + until false; + nextp^:=i; + FItems[i]:=FItems[ilast]; + next[i]:=next[ilast]; + end; + + FNItems:=ilast; + if ilast=0 do + begin + if FItems[ii]=s then + begin + deleteidx(ii); + exit(true); + end; + ii:=next[ii]; + end; + result:=false; + end; + + + procedure Tsuperregisterworkhashlist.sort(less:Tsuperregistercomparefunc; param:pointer); + var + p,ih,i:SizeUint; + t:Tsuperregister; + begin + if length<2 then + exit; + p:=SizeUint(1) shl BsrDWord(length-1); + repeat + for ih:=p to length-1 do + begin + i:=ih; + t:=buf[i]; + repeat + if not less(t,buf[i-p],param) then + break; + buf[i]:=buf[i-p]; + dec(i,p) + until inil then begin for i:=0 to maxreg-1 do - with reginfo[i] do - begin - if adjlist<>nil then - dispose(adjlist,done); - movehlist.Done; - end; + begin + reginfo[i].adjlist.Done; + reginfo[i].movehlist.Done; + end; reginfo:=nil; end; end; @@ -615,7 +611,10 @@ unit rgobj; end; SetLength(reginfo,maxreginfo); for i:=oldmaxreginfo to maxreginfo-1 do - reginfo[i].movehlist.init; + begin + reginfo[i].movehlist.init; + reginfo[i].adjlist.init; + end; end; reginfo[result].subreg:=subreg; end; @@ -789,9 +788,7 @@ unit rgobj; {$endif} with reginfo[u] do begin - if adjlist=nil then - new(adjlist,init); - adjlist^.add(v); + adjlist.add(v); if (v0 then - for i:=0 to length-1 do - add_edge(u,get_alias(buf[i])); + for i:=0 to length-1 do + add_edge(u,get_alias(buf[i])); end; {$ifdef EXTDEBUG} @@ -1040,95 +1036,11 @@ unit rgobj; end; end; - procedure Trgobj.sort_simplify_worklist; - - {Sorts the simplifyworklist by the number of interferences the - registers in it cause. This allows simplify to execute in - constant time. - - Sort the list in the descending order, since items of simplifyworklist - are retrieved from end to start and then items are added to selectstack. - The selectstack list is also processed from end to start. - - Such way nodes with most interferences will get their colors first. - Since degree of nodes in simplifyworklist before sorting is always - less than the number of usable registers this should not trigger spilling - and should lead to a better register allocation in some cases. - } - - var p,h,i,leni,lent:longword; - t:Tsuperregister; - adji,adjt:Psuperregisterworklist; - - begin - with simplifyworklist do - begin - if length<2 then - exit; - p:=longword(1) shl BsrDWord(length-1); - repeat - for h:=p to length-1 do - begin - i:=h; - t:=buf[i]; - adjt:=reginfo[buf[i]].adjlist; - lent:=0; - if adjt<>nil then - lent:=adjt^.length; - repeat - adji:=reginfo[buf[i-p]].adjlist; - leni:=0; - if adji<>nil then - leni:=adji^.length; - if leni>=lent then - break; - buf[i]:=buf[i-p]; - dec(i,p) - until inil then - lent:=adjt^.length; - repeat - adji:=reginfo[buf[i-p]].adjlist; - leni:=0; - if adji<>nil then - leni:=adji^.length; - if leni<=lent then - break; - buf[i]:=buf[i-p]; - dec(i,p) - until irg[b].adjlist.length; end; @@ -1142,10 +1054,7 @@ unit rgobj; for n:=first_imaginary to maxreg-1 do with reginfo[n] do begin - if adjlist=nil then - degree:=0 - else - degree:=adjlist^.length; + degree:=adjlist.length; if degree>=usable_registers_cnt then spillworklist.add(n) else if move_related(n) then @@ -1153,7 +1062,21 @@ unit rgobj; else if not(ri_coalesced in flags) then simplifyworklist.add(n); end; - sort_simplify_worklist; + + {Sort the simplifyworklist by the number of interferences the + registers in it cause. This allows simplify to execute in + constant time. + + Sort the list in the descending order, since items of simplifyworklist + are retrieved from end to start and then items are added to selectstack. + The selectstack list is also processed from end to start. + + Such way nodes with most interferences will get their colors first. + Since degree of nodes in simplifyworklist before sorting is always + less than the number of usable registers this should not trigger spilling + and should lead to a better register allocation in some cases. + } + simplifyworklist.sort(@simplifyworklist_come_before,pointer(reginfo)); end; @@ -1190,9 +1113,9 @@ unit rgobj; procedure Trgobj.decrement_degree(m:Tsuperregister); - var adj : Psuperregisterworklist; - n : tsuperregister; - d,i : cardinal; + var n : tsuperregister; + d : cardinal; + i : sizeint; begin with reginfo[m] do @@ -1206,14 +1129,12 @@ unit rgobj; {Enable moves for m.} enable_moves(m); {Enable moves for adjacent.} - adj:=adjlist; - if adj<>nil then - for i:=1 to adj^.length do - begin - n:=adj^.buf[i-1]; - if reginfo[n].flags*[ri_selected,ri_coalesced]<>[] then - enable_moves(n); - end; + for i:=0 to adjlist.length-1 do + begin + n:=adjlist.buf[i]; + if reginfo[n].flags*[ri_selected,ri_coalesced]<>[] then + enable_moves(n); + end; {Remove the node from the spillworklist.} if not spillworklist.delete(m) then internalerror(200310145); @@ -1228,9 +1149,9 @@ unit rgobj; procedure trgobj.simplify; - var adj : Psuperregisterworklist; + var adj : Psuperregisterworkhashlist; m,n : Tsuperregister; - i : cardinal; + i : sizeint; begin {We take the element with the least interferences out of the simplifyworklist. Since the simplifyworklist is now sorted, we @@ -1242,16 +1163,15 @@ unit rgobj; with reginfo[m] do begin include(flags,ri_selected); - adj:=adjlist; + adj:=@adjlist; + end; + for i:=0 to adj^.length-1 do + begin + n:=adj^.buf[i]; + if (n>=first_imaginary) and + (reginfo[n].flags*[ri_selected,ri_coalesced]=[]) then + decrement_degree(n); end; - if adj<>nil then - for i:=1 to adj^.length do - begin - n:=adj^.buf[i-1]; - if (n>=first_imaginary) and - (reginfo[n].flags*[ri_selected,ri_coalesced]=[]) then - decrement_degree(n); - end; end; function trgobj.get_alias(n:Tsuperregister):Tsuperregister; @@ -1289,33 +1209,31 @@ unit rgobj; ibitmap[r,t]; end; - var adj : Psuperregisterworklist; - i : cardinal; + var i : sizeint; n : tsuperregister; begin with reginfo[v] do begin adjacent_ok:=true; - adj:=adjlist; - if adj<>nil then - for i:=1 to adj^.length do - begin - n:=adj^.buf[i-1]; - if (reginfo[n].flags*[ri_coalesced]=[]) and not ok(n,u) then - begin - adjacent_ok:=false; - break; - end; - end; + for i:=0 to adjlist.length-1 do + begin + n:=adjlist.buf[i]; + if (reginfo[n].flags*[ri_coalesced]=[]) and not ok(n,u) then + begin + adjacent_ok:=false; + break; + end; + end; end; end; function trgobj.conservative(u,v:Tsuperregister):boolean; - var adj : Psuperregisterworklist; + var adj : Psuperregisterworkhashlist; done : Tsuperregisterset; {To prevent that we count nodes twice.} - i,k:cardinal; + i : sizeint; + k : cardinal; n : tsuperregister; begin @@ -1323,39 +1241,36 @@ unit rgobj; supregset_reset(done,false,maxreg); with reginfo[u] do begin - adj:=adjlist; - if adj<>nil then - for i:=1 to adj^.length do - begin - n:=adj^.buf[i-1]; - if reginfo[n].flags*[ri_coalesced,ri_selected]=[] then - begin - supregset_include(done,n); - if reginfo[n].degree>=usable_registers_cnt then - inc(k); - end; - end; + for i:=0 to adjlist.length-1 do + begin + n:=adjlist.buf[i]; + if reginfo[n].flags*[ri_coalesced,ri_selected]=[] then + begin + supregset_include(done,n); + if reginfo[n].degree>=usable_registers_cnt then + inc(k); + end; + end; + end; + adj:=@reginfo[v].adjlist; + for i:=0 to adj^.length-1 do + begin + n:=adj^.buf[i]; + if (u=first_imaginary) and + not ibitmap[u,n] and + (usable_registers_cnt-reginfo[n].real_reg_interferences<=1) then + begin + { Do not coalesce if 'u' is the last usable real register available + for imaginary register 'n'. } + conservative:=false; + exit; + end; + if not supregset_in(done,n) and + (reginfo[n].degree>=usable_registers_cnt) and + (reginfo[n].flags*[ri_coalesced,ri_selected]=[]) then + inc(k); end; - adj:=reginfo[v].adjlist; - if adj<>nil then - for i:=1 to adj^.length do - begin - n:=adj^.buf[i-1]; - if (u=first_imaginary) and - not ibitmap[u,n] and - (usable_registers_cnt-reginfo[n].real_reg_interferences<=1) then - begin - { Do not coalesce if 'u' is the last usable real register available - for imaginary register 'n'. } - conservative:=false; - exit; - end; - if not supregset_in(done,n) and - (reginfo[n].degree>=usable_registers_cnt) and - (reginfo[n].flags*[ri_coalesced,ri_selected]=[]) then - inc(k); - end; conservative:=(knil then - for i:=1 to adj^.length do - begin - t:=adj^.buf[i-1]; - with reginfo[t] do - if not(ri_coalesced in flags) then - begin - {t has a connection to v. Since we are adding v to u, we - need to connect t to u. However, beware if t was already - connected to u...} - if (ibitmap[t,u]) and not (ri_selected in flags) then - begin - {... because in that case, we are actually removing an edge - and the degree of t decreases.} - decrement_degree(t); - { if v is combined with a real register, retry - coalescing of interfering nodes since it may succeed now. } - if (u=usable_registers_cnt) and - (reginfo[t].degree>usable_registers_cnt) then - enable_moves(t); - end - else - begin - add_edge(t,u); - {We have added an edge to t and u. So their degree increases. - However, v is added to u. That means its neighbours will - no longer point to v, but to u instead. Therefore, only the - degree of u increases.} - if (u>=first_imaginary) and not (ri_selected in flags) then - inc(reginfo[u].degree); - end; - end; - end; + adj:=@reginfo[v].adjlist; + for i:=0 to adj^.length-1 do + begin + t:=adj^.buf[i]; + with reginfo[t] do + if not(ri_coalesced in flags) then + begin + {t has a connection to v. Since we are adding v to u, we + need to connect t to u. However, beware if t was already + connected to u...} + if (ibitmap[t,u]) and not (ri_selected in flags) then + begin + {... because in that case, we are actually removing an edge + and the degree of t decreases.} + decrement_degree(t); + { if v is combined with a real register, retry + coalescing of interfering nodes since it may succeed now. } + if (u=usable_registers_cnt) and + (reginfo[t].degree>usable_registers_cnt) then + enable_moves(t); + end + else + begin + add_edge(t,u); + {We have added an edge to t and u. So their degree increases. + However, v is added to u. That means its neighbours will + no longer point to v, but to u instead. Therefore, only the + degree of u increases.} + if (u>=first_imaginary) and not (ri_selected in flags) then + inc(reginfo[u].degree); + end; + end; + end; if (reginfo[u].degree>=usable_registers_cnt) and freezeworklist.delete(u) then spillworklist.add(u); end; @@ -1571,7 +1482,7 @@ unit rgobj; procedure trgobj.select_spill; var n : tsuperregister; - adj : psuperregisterworklist; + adj : Psuperregisterworkhashlist; maxlength,minlength,p,i :word; minweight: longint; {$ifdef SPILLING_NEW} @@ -1610,18 +1521,18 @@ unit rgobj; { Safe: This procedure is only called if length<>0 } for i:=0 to length-1 do begin - adj:=reginfo[buf^[i]].adjlist; - dist:=adj^.length-reginfo[buf^[i]].total_interferences/reginfo[buf^[i]].count_uses; - if assigned(adj) and - (reginfo[buf^[i]].weight0) and + (reginfo[buf[i]].weight=1) and - (reginfo[buf^[i]].weight>0) then + (reginfo[buf[i]].weight>0) then begin p:=i; - minweight:=reginfo[buf^[i]].weight; + minweight:=reginfo[buf[i]].weight; end; end; - n:=buf^[p]; + n:=buf[p]; deleteidx(p); end; {$endif SPILLING_NEW} @@ -1648,9 +1559,8 @@ unit rgobj; for i:=0 to length-1 do if not(ri_spill_helper in reginfo[buf[i]].flags) then begin - adj:=reginfo[buf[i]].adjlist; - if assigned(adj) and - ( + adj:=@reginfo[buf[i]].adjlist; + if ( (adj^.length>maxlength) or ((adj^.length=maxlength) and (reginfo[buf[i]].weightRS_INVALID)} @@ -1729,14 +1639,13 @@ unit rgobj; begin {Create a list of colours that we cannot assign to n.} adj_colours:=[]; - adj:=reginfo[n].adjlist; - if adj<>nil then - for j:=0 to adj^.length-1 do - begin - a:=get_alias(adj^.buf[j]); - if supregset_in(colourednodes,a) and (reginfo[a].colour<=255) then - include(adj_colours,reginfo[a].colour); - end; + adj:=@reginfo[n].adjlist; + for j:=0 to adj^.length-1 do + begin + a:=get_alias(adj^.buf[j]); + if supregset_in(colourednodes,a) and (reginfo[a].colour<=255) then + include(adj_colours,reginfo[a].colour); + end; { e.g. AVR does not have a stack pointer register } {$if declared(RS_STACK_POINTER_REG) and (RS_STACK_POINTER_REG<>RS_INVALID)} { FIXME: temp variable r is needed here to avoid Internal error 20060521 } @@ -1764,16 +1673,17 @@ unit rgobj; end; var - i,k : cardinal; + i : sizeint; + k : cardinal; n : Tsuperregister; spill_loop : boolean; begin reset_colours; {Now colour the imaginary registers on the select-stack.} spill_loop:=false; - for i:=selectstack.length downto 1 do + for i:=selectstack.length-1 downto 0 do begin - n:=selectstack.buf[i-1]; + n:=selectstack.buf[i]; if not colour_register(n) and (ri_spill_helper in reginfo[n].flags) then begin @@ -1791,9 +1701,9 @@ unit rgobj; Trying to eliminte this by using a different colouring order. } reset_colours; { To prevent spilling of helper registers it is needed to assign colours to them first. } - for i:=selectstack.length downto 1 do + for i:=selectstack.length-1 downto 0 do begin - n:=selectstack.buf[i-1]; + n:=selectstack.buf[i]; if ri_spill_helper in reginfo[n].flags then if not colour_register(n) then { Can't colour the spill helper register n. @@ -1802,18 +1712,18 @@ unit rgobj; internalerror(2021091001); end; { Assign colours for the rest of the registers } - for i:=selectstack.length downto 1 do + for i:=selectstack.length-1 downto 0 do begin - n:=selectstack.buf[i-1]; + n:=selectstack.buf[i]; if not (ri_spill_helper in reginfo[n].flags) then colour_register(n); end; end; {Finally colour the nodes that were coalesced.} - for i:=1 to coalescednodes.length do + for i:=0 to coalescednodes.length-1 do begin - n:=coalescednodes.buf[i-1]; + n:=coalescednodes.buf[i]; k:=get_alias(n); reginfo[n].colour:=reginfo[k].colour; end; @@ -1867,36 +1777,23 @@ unit rgobj; {Remove node u from the interference graph and remove all collected move instructions it is associated with.} - var i : word; + var i : sizeint; v : Tsuperregister; - adj,adj2 : Psuperregisterworklist; + adj : Psuperregisterworkhashlist; begin - adj:=reginfo[u].adjlist; - if adj<>nil then + adj:=@reginfo[u].adjlist; + for i:=0 to adj^.length-1 do begin - for i:=1 to adj^.length do - begin - v:=adj^.buf[i-1]; - {Remove (u,v) and (v,u) from bitmap.} - ibitmap[u,v]:=false; - ibitmap[v,u]:=false; - {Remove (v,u) from adjacency list.} - adj2:=reginfo[v].adjlist; - if adj2<>nil then - begin - adj2^.delete(u); - if adj2^.length=0 then - begin - dispose(adj2,done); - reginfo[v].adjlist:=nil; - end; - end; - end; - {Remove ( u,* ) from adjacency list.} - dispose(adj,done); - reginfo[u].adjlist:=nil; + v:=adj^.buf[i]; + {Remove (u,v) and (v,u) from bitmap.} + ibitmap[u,v]:=false; + ibitmap[v,u]:=false; + {Remove (v,u) from adjacency list.} + reginfo[v].adjlist.delete(u); end; + {Remove ( u,* ) from adjacency list.} + adj^.clear; end; @@ -2139,7 +2036,7 @@ unit rgobj; {$ifdef DEBUG_REGISTERLIFE} write(live_registers.length,' '); for i:=0 to live_registers.length-1 do - write(std_regname(newreg(regtype,live_registers.buf^[i],defaultsub)),' '); + write(std_regname(newreg(regtype,live_registers.buf[i],defaultsub)),' '); writeln; {$endif DEBUG_REGISTERLIFE} add_edges_used(supreg); @@ -2150,7 +2047,7 @@ unit rgobj; {$ifdef DEBUG_REGISTERLIFE} write(live_registers.length,' '); for i:=0 to live_registers.length-1 do - write(std_regname(newreg(regtype,live_registers.buf^[i],defaultsub)),' '); + write(std_regname(newreg(regtype,live_registers.buf[i],defaultsub)),' '); writeln; {$endif DEBUG_REGISTERLIFE} add_edges_used(supreg); @@ -2416,6 +2313,14 @@ unit rgobj; end; + function spillednodes_come_before(a,b:Tsuperregister;param:pointer):boolean; + var + rg:TReginfoArray absolute param; + begin + result:=rg[a].adjlist.length Date: Thu, 12 May 2022 10:43:20 +0300 Subject: [PATCH 3/3] Remove TMoveIns.id and use its address instead. --- compiler/rgobj.pas | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/compiler/rgobj.pas b/compiler/rgobj.pas index 95f5966cb4..72c35a8051 100644 --- a/compiler/rgobj.pas +++ b/compiler/rgobj.pas @@ -81,7 +81,6 @@ unit rgobj; Tmoveins=class(Tlinkedlistitem) moveset:Tmoveset; x,y:Tsuperregister; - id:uint32; end; PTmoveins=^Tmoveins; @@ -94,6 +93,7 @@ unit rgobj; procedure done; procedure add(ins:Tmoveins); { no-op if exists } procedure rehash(foritems:sizeuint); + class function hashins(m:Tmoveins):sizeuint; static; {$ifdef USEINLINE} inline; {$endif} end; Treginfoflag=( @@ -263,7 +263,6 @@ unit rgobj; has_usedmarks: boolean; has_directalloc: boolean; spillinfo : array of tspillinfo; - moveins_id_counter: uint32; { Disposes of the reginfo array.} procedure dispose_reginfo; @@ -408,10 +407,9 @@ unit rgobj; end; procedure Tmovehashlist.add(ins:Tmoveins); - var hashRest : uint32; - ih,ii : sizeuint; + var hashRest,ih,ii : sizeuint; begin - hashRest:=ins.id; + hashRest:=hashins(ins); ih:=hashRest and hmask; repeat ii:=h[ih]; @@ -443,8 +441,7 @@ unit rgobj; var newh : puint32; newitems : PTmoveins; item : Tmoveins; - newhmask,newmaxitems,itemsOffset,iitem,ih : sizeuint; - hashRest : uint32; + newhmask,newmaxitems,itemsOffset,iitem,ih,hashRest : sizeuint; begin if foritems shr (bitsizeof(h^)-3)<>0 then internalerror(202204251); { too big table } @@ -469,7 +466,7 @@ unit rgobj; while iitemgetregtype(dreg)) then exit; -{$push} {$q-,r-} - inc(moveins_id_counter); -{$pop} - if moveins_id_counter=0 then - internalerror(2021112701); i:=Tmoveins.create; - i.id:=moveins_id_counter; i.moveset:=ms_worklist_moves; worklist_moves.insert(i); ssupreg:=getsupreg(sreg);