{ Copyright (c) 1998-2012 by the Free Pascal team This unit implements the base class for the register allocator This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. **************************************************************************** } {$i fpcdefs.inc} { $define DEBUG_REGALLOC} { $define DEBUG_SPILLCOALESCE} { $define DEBUG_REGISTERLIFE} { Allow duplicate allocations, can be used to get the .s file written } { $define ALLOWDUPREG} {$ifdef DEBUG_REGALLOC} {$define EXTDEBUG} {$endif DEBUG_REGALLOC} unit rgobj; interface uses cutils, cpubase, aasmtai,aasmdata,aasmsym,aasmcpu, cclasses,globtype,cgbase,cgutils; type { The interference bitmap contains of 2 layers: layer 1 - 256*256 blocks with pointers to layer 2 blocks layer 2 - blocks of 32*256 (32 bytes = 256 bits) } Tinterferencebitmap2 = array[byte] of set of byte; Pinterferencebitmap2 = ^Tinterferencebitmap2; Tinterferencebitmap1 = array[byte] of Pinterferencebitmap2; pinterferencebitmap1 = ^tinterferencebitmap1; Tinterferencebitmap=class private maxx1, maxy1 : byte; fbitmap : pinterferencebitmap1; function getbitmap(x,y:tsuperregister):boolean; procedure setbitmap(x,y:tsuperregister;b:boolean); public constructor create; destructor destroy;override; property bitmap[x,y:tsuperregister]:boolean read getbitmap write setbitmap;default; end; {In the register allocator we keep track of move instructions. These instructions are moved between five linked lists. There is also a linked list per register to keep track about the moves it is associated with. Because we need to determine quickly in which of the five lists it is we add anu enumeradtion to each move instruction.} Tmoveset=(ms_coalesced_moves,ms_constrained_moves,ms_frozen_moves, ms_worklist_moves,ms_active_moves); Tmoveins=class(Tlinkedlistitem) moveset:Tmoveset; x,y:Tsuperregister; id:longint; end; Tmovelistheader=record count, maxcount, sorted_until : cardinal; end; Tmovelist=record header : Tmovelistheader; data : array[tsuperregister] of Tmoveins; end; Pmovelist=^Tmovelist; Treginfoflag=( ri_coalesced, { the register is coalesced with other register } ri_selected, { the register is put to selectstack } ri_spill_helper, { the register contains a value of a previously spilled register } ri_has_initial_loc { the register has the initial memory location (e.g. a parameter in the stack) } ); Treginfoflagset=set of Treginfoflag; Treginfo=record live_start, live_end : Tai; subreg : tsubregister; alias : Tsuperregister; { The register allocator assigns each register a colour } colour : Tsuperregister; movelist : Pmovelist; adjlist : Psuperregisterworklist; degree : TSuperregister; flags : Treginfoflagset; weight : longint; {$ifdef llvm} def : pointer; {$endif llvm} count_uses : longint; total_interferences : longint; real_reg_interferences: word; end; Preginfo=^TReginfo; tspillreginfo = record { a single register may appear more than once in an instruction, but with different subregister types -> store all subregister types that occur, so we can add the necessary constraints for the inline register that will have to replace it } spillregconstraints : set of TSubRegister; orgreg : tsuperregister; loadreg, storereg: tregister; regread, regwritten, mustbespilled: boolean; end; tspillregsinfo = record spillreginfocount: longint; spillreginfo: array[0..3] of tspillreginfo; end; Pspill_temp_list=^Tspill_temp_list; Tspill_temp_list=array[tsuperregister] of Treference; { used to store where a register is spilled and what interferences it has at the point of being spilled } tspillinfo = record spilllocation : treference; spilled : boolean; interferences : Tinterferencebitmap; end; {#------------------------------------------------------------------ This class implements the default register allocator. It is used by the code generator to allocate and free registers which might be valid across nodes. It also contains utility routines related to registers. Some of the methods in this class should be overridden by cpu-specific implementations. --------------------------------------------------------------------} trgobj=class preserved_by_proc : tcpuregisterset; used_in_proc : tcpuregisterset; { generate SSA code? } ssa_safe: boolean; constructor create(Aregtype:Tregistertype; Adefaultsub:Tsubregister; const Ausable:array of tsuperregister; Afirst_imaginary:Tsuperregister; Apreserved_by_proc:Tcpuregisterset); destructor destroy;override; { Allocate a register. An internalerror will be generated if there is no more free registers which can be allocated.} function getregister(list:TAsmList;subreg:Tsubregister):Tregister;virtual; { Get the register specified.} procedure getcpuregister(list:TAsmList;r:Tregister);virtual; procedure ungetcpuregister(list:TAsmList;r:Tregister);virtual; { Get multiple registers specified.} procedure alloccpuregisters(list:TAsmList;const r:Tcpuregisterset);virtual; { Free multiple registers specified.} procedure dealloccpuregisters(list:TAsmList;const r:Tcpuregisterset);virtual; function uses_registers:boolean;virtual; procedure add_reg_instruction(instr:Tai;r:tregister;aweight:longint); procedure add_move_instruction(instr:Taicpu); { Do the register allocation.} procedure do_register_allocation(list:TAsmList;headertai:tai);virtual; { Adds an interference edge. don't move this to the protected section, the arm cg requires to access this (FK) } procedure add_edge(u,v:Tsuperregister); { translates a single given imaginary register to it's real register } procedure translate_register(var reg : tregister); { sets the initial memory location of the register } procedure set_reg_initial_location(reg: tregister; const ref: treference); protected maxreginfo, maxreginfoinc, maxreg : Tsuperregister; regtype : Tregistertype; { default subregister used } defaultsub : tsubregister; live_registers:Tsuperregisterworklist; spillednodes: tsuperregisterworklist; { can be overridden to add cpu specific interferences } procedure add_cpu_interferences(p : tai);virtual; procedure add_constraints(reg:Tregister);virtual; function getregisterinline(list:TAsmList;const subregconstraints:Tsubregisterset):Tregister; procedure ungetregisterinline(list:TAsmList;r:Tregister); function get_spill_subreg(r : tregister) : tsubregister;virtual; function do_spill_replace(list:TAsmList;instr:tai_cpu_abstract_sym;orgreg:tsuperregister;const spilltemp:treference):boolean;virtual; { the orgrsupeg parameter is only here for the llvm target, so it can discover the def to use for the load } procedure do_spill_read(list:TAsmList;pos:tai;const spilltemp:treference;tempreg:tregister;orgsupreg:tsuperregister);virtual; procedure do_spill_written(list:TAsmList;pos:tai;const spilltemp:treference;tempreg:tregister;orgsupreg:tsuperregister);virtual; function addreginfo(var spregs: tspillregsinfo; const r: tsuperregisterset; reg: tregister; operation: topertype): boolean; function instr_get_oper_spilling_info(var spregs: tspillregsinfo; const r: tsuperregisterset; instr: tai_cpu_abstract_sym; opidx: longint): boolean; virtual; procedure substitute_spilled_registers(const spregs: tspillregsinfo; instr: tai_cpu_abstract_sym; opidx: longint); virtual; procedure try_replace_reg(const spregs: tspillregsinfo; var reg: tregister; useloadreg: boolean); function instr_spill_register(list:TAsmList; instr:tai_cpu_abstract_sym; const r:Tsuperregisterset; const spilltemplist:Tspill_temp_list): boolean;virtual; procedure insert_regalloc_info_all(list:TAsmList); procedure determine_spill_registers(list:TAsmList;headertail:tai); virtual; procedure get_spill_temp(list:TAsmlist;spill_temps: Pspill_temp_list; supreg: tsuperregister);virtual; strict protected { Highest register allocated until now.} reginfo : PReginfo; usable_registers_cnt : word; private int_live_range_direction: TRADirection; { First imaginary register.} first_imaginary : Tsuperregister; usable_registers : array[0..maxcpuregister] of tsuperregister; usable_register_set : tcpuregisterset; ibitmap : Tinterferencebitmap; simplifyworklist, freezeworklist, spillworklist, coalescednodes, selectstack : tsuperregisterworklist; worklist_moves, active_moves, frozen_moves, coalesced_moves, constrained_moves, { in this list we collect all moveins which should be disposed after register allocation finishes, we still need the moves for spill coalesce for the whole register allocation process, so they cannot be released as soon as they are frozen or whatever } move_garbage : Tlinkedlist; extended_backwards, backwards_was_first : tbitset; has_usedmarks: boolean; has_directalloc: boolean; spillinfo : array of tspillinfo; moveins_id_counter: longint; { Disposes of the reginfo array.} procedure dispose_reginfo; { Prepare the register colouring.} procedure prepare_colouring; { Clean up after register colouring.} procedure epilogue_colouring; { Colour the registers; that is do the register allocation.} procedure colour_registers; procedure insert_regalloc_info(list:TAsmList;u:tsuperregister); procedure generate_interference_graph(list:TAsmList;headertai:tai); { sort spilled nodes by increasing number of interferences } procedure sort_spillednodes; { translates the registers in the given assembler list } procedure translate_registers(list:TAsmList); function spill_registers(list:TAsmList;headertai:tai):boolean;virtual; function getnewreg(subreg:tsubregister):tsuperregister; procedure add_edges_used(u:Tsuperregister); procedure add_to_movelist(u:Tsuperregister;ins:Tmoveins); function move_related(n:Tsuperregister):boolean; procedure make_work_list; procedure sort_simplify_worklist; procedure enable_moves(n:Tsuperregister); procedure decrement_degree(m:Tsuperregister); procedure simplify; procedure add_worklist(u:Tsuperregister); function adjacent_ok(u,v:Tsuperregister):boolean; function conservative(u,v:Tsuperregister):boolean; procedure coalesce; procedure freeze_moves(u:Tsuperregister); procedure freeze; procedure select_spill; procedure assign_colours; procedure clear_interferences(u:Tsuperregister); procedure set_live_range_direction(dir: TRADirection); procedure set_live_start(reg : tsuperregister;t : tai); function get_live_start(reg : tsuperregister) : tai; procedure set_live_end(reg : tsuperregister;t : tai); function get_live_end(reg : tsuperregister) : tai; procedure alloc_spillinfo(max_reg: Tsuperregister); { Remove p from the list and set p to the next element in the list } procedure remove_ai(list:TAsmList; var p:Tai); {$ifdef DEBUG_SPILLCOALESCE} procedure write_spill_stats; {$endif DEBUG_SPILLCOALESCE} public {$ifdef EXTDEBUG} procedure writegraph(loopidx:longint); {$endif EXTDEBUG} procedure combine(u,v:Tsuperregister); { set v as an alias for u } procedure set_alias(u,v:Tsuperregister); function get_alias(n:Tsuperregister):Tsuperregister; property live_range_direction: TRADirection read int_live_range_direction write set_live_range_direction; property live_start[reg : tsuperregister]: tai read get_live_start write set_live_start; property live_end[reg : tsuperregister]: tai read get_live_end write set_live_end; end; const first_reg = 0; last_reg = high(tsuperregister)-1; maxspillingcounter = 20; implementation uses sysutils, globals, verbose,tgobj,procinfo,cgobj; procedure sort_movelist(ml:Pmovelist); var h,i,p:longword; t:Tmoveins; begin with ml^ do begin if header.count<2 then exit; p:=longword(1) shl BsrDWord(header.count-1); repeat for h:=p to header.count-1 do begin i:=h; t:=data[i]; repeat if data[i-p].id<=t.id then break; data[i]:=data[i-p]; dec(i,p); until i
maxx1) then
exit;
page:=fbitmap[x shr 8,y shr 8];
result:=assigned(page) and
((x and $ff) in page^[y and $ff]);
end;
procedure tinterferencebitmap.setbitmap(x,y:tsuperregister;b:boolean);
var
x1,y1 : byte;
begin
x1:=x shr 8;
y1:=y shr 8;
if x1>maxx1 then
begin
reallocmem(fbitmap,sizeof(tinterferencebitmap1)*(x1+1));
fillchar(fbitmap[maxx1+1],sizeof(tinterferencebitmap1)*(x1-maxx1),0);
maxx1:=x1;
end;
if not assigned(fbitmap[x1,y1]) then
begin
if y1>maxy1 then
maxy1:=y1;
new(fbitmap[x1,y1]);
fillchar(fbitmap[x1,y1]^,sizeof(tinterferencebitmap2),0);
end;
if b then
include(fbitmap[x1,y1]^[y and $ff],(x and $ff))
else
exclude(fbitmap[x1,y1]^[y and $ff],(x and $ff));
end;
{******************************************************************************
trgobj
******************************************************************************}
constructor trgobj.create(Aregtype:Tregistertype;
Adefaultsub:Tsubregister;
const Ausable:array of tsuperregister;
Afirst_imaginary:Tsuperregister;
Apreserved_by_proc:Tcpuregisterset);
var
i : cardinal;
begin
{ empty super register sets can cause very strange problems }
if high(Ausable)=-1 then
internalerror(200210181);
live_range_direction:=rad_forward;
first_imaginary:=Afirst_imaginary;
maxreg:=Afirst_imaginary;
regtype:=Aregtype;
defaultsub:=Adefaultsub;
preserved_by_proc:=Apreserved_by_proc;
// default values set by newinstance
// used_in_proc:=[];
// ssa_safe:=false;
live_registers.init;
{ Get reginfo for CPU registers }
maxreginfo:=first_imaginary;
maxreginfoinc:=16;
moveins_id_counter:=0;
worklist_moves:=Tlinkedlist.create;
move_garbage:=TLinkedList.Create;
reginfo:=allocmem(first_imaginary*sizeof(treginfo));
for i:=0 to first_imaginary-1 do
begin
reginfo[i].degree:=high(tsuperregister);
reginfo[i].alias:=RS_INVALID;
end;
{ Usable registers }
// default value set by constructor
// fillchar(usable_registers,sizeof(usable_registers),0);
for i:=low(Ausable) to high(Ausable) do
begin
usable_registers[i]:=Ausable[i];
include(usable_register_set,Ausable[i]);
end;
usable_registers_cnt:=high(Ausable)+1;
{ Initialize Worklists }
spillednodes.init;
simplifyworklist.init;
freezeworklist.init;
spillworklist.init;
coalescednodes.init;
selectstack.init;
end;
destructor trgobj.destroy;
begin
spillednodes.done;
simplifyworklist.done;
freezeworklist.done;
spillworklist.done;
coalescednodes.done;
selectstack.done;
live_registers.done;
move_garbage.free;
worklist_moves.free;
dispose_reginfo;
extended_backwards.free;
backwards_was_first.free;
end;
procedure Trgobj.dispose_reginfo;
var
i : cardinal;
begin
if reginfo<>nil then
begin
for i:=0 to maxreg-1 do
with reginfo[i] do
begin
if adjlist<>nil then
dispose(adjlist,done);
if movelist<>nil then
dispose(movelist);
end;
freemem(reginfo);
reginfo:=nil;
end;
end;
function trgobj.getnewreg(subreg:tsubregister):tsuperregister;
var
oldmaxreginfo : tsuperregister;
begin
result:=maxreg;
inc(maxreg);
if maxreg>=last_reg then
Message(parser_f_too_complex_proc);
if maxreg>=maxreginfo then
begin
oldmaxreginfo:=maxreginfo;
{ Prevent overflow }
if maxreginfoinc>last_reg-maxreginfo then
maxreginfo:=last_reg
else
begin
inc(maxreginfo,maxreginfoinc);
if maxreginfoinc<256 then
maxreginfoinc:=maxreginfoinc*2;
end;
reallocmem(reginfo,maxreginfo*sizeof(treginfo));
{ Do we really need it to clear it ? At least for 1.0.x (PFV) }
fillchar(reginfo[oldmaxreginfo],(maxreginfo-oldmaxreginfo)*sizeof(treginfo),0);
end;
reginfo[result].subreg:=subreg;
end;
function trgobj.getregister(list:TAsmList;subreg:Tsubregister):Tregister;
begin
{$ifdef EXTDEBUG}
if reginfo=nil then
InternalError(2004020901);
{$endif EXTDEBUG}
if defaultsub=R_SUBNONE then
result:=newreg(regtype,getnewreg(R_SUBNONE),R_SUBNONE)
else
result:=newreg(regtype,getnewreg(subreg),subreg);
end;
function trgobj.uses_registers:boolean;
begin
result:=(maxreg>first_imaginary) or has_usedmarks or has_directalloc;
end;
procedure trgobj.ungetcpuregister(list:TAsmList;r:Tregister);
begin
if (getsupreg(r)>=first_imaginary) then
InternalError(2004020902);
list.concat(Tai_regalloc.dealloc(r,nil));
end;
procedure trgobj.getcpuregister(list:TAsmList;r:Tregister);
var
supreg:Tsuperregister;
begin
supreg:=getsupreg(r);
if supreg>=first_imaginary then
internalerror(2003121503);
include(used_in_proc,supreg);
has_directalloc:=true;
list.concat(Tai_regalloc.alloc(r,nil));
end;
procedure trgobj.alloccpuregisters(list:TAsmList;const r:Tcpuregisterset);
var i:cardinal;
begin
for i:=0 to first_imaginary-1 do
if i in r then
getcpuregister(list,newreg(regtype,i,defaultsub));
end;
procedure trgobj.dealloccpuregisters(list:TAsmList;const r:Tcpuregisterset);
var i:cardinal;
begin
for i:=0 to first_imaginary-1 do
if i in r then
ungetcpuregister(list,newreg(regtype,i,defaultsub));
end;
const
rtindex : longint = 0;
procedure trgobj.do_register_allocation(list:TAsmList;headertai:tai);
var
spillingcounter:longint;
endspill:boolean;
i : Longint;
begin
{ Insert regalloc info for imaginary registers }
insert_regalloc_info_all(list);
ibitmap:=tinterferencebitmap.create;
generate_interference_graph(list,headertai);
{$ifdef DEBUG_SPILLCOALESCE}
if maxreg>first_imaginary then
writeln(current_procinfo.procdef.mangledname, ': register allocation [',regtype,']');
{$endif DEBUG_SPILLCOALESCE}
{$ifdef DEBUG_REGALLOC}
if maxreg>first_imaginary then
writegraph(rtindex);
{$endif DEBUG_REGALLOC}
inc(rtindex);
{ Don't do the real allocation when -sr is passed }
if (cs_no_regalloc in current_settings.globalswitches) then
exit;
{ Spill registers which interfere with all usable real registers.
It is pointless to keep them for further processing. Also it may
cause endless spilling.
This can happen when compiling for very constrained CPUs such as
i8086 where indexed memory access instructions allow only
few registers as arguments and additionally the calling convention
provides no general purpose volatile registers.
Also spill registers which have the initial memory location
and are used only once. This allows to access the memory location
directly, without preloading it to a register.
}
for i:=first_imaginary to maxreg-1 do
with reginfo[i] do
if (real_reg_interferences>=usable_registers_cnt) or
{ also spill registers which have the initial memory location
and are used only once }
((ri_has_initial_loc in flags) and (weight<=200)) then
spillednodes.add(i);
if spillednodes.length<>0 then
begin
spill_registers(list,headertai);
spillednodes.clear;
end;
{Do register allocation.}
spillingcounter:=0;
repeat
determine_spill_registers(list,headertai);
endspill:=true;
if spillednodes.length<>0 then
begin
inc(spillingcounter);
if spillingcounter>maxspillingcounter then
begin
{$ifdef EXTDEBUG}
{ Only exit here so the .s file is still generated. Assembling
the file will still trigger an error }
exit;
{$else}
internalerror(200309041);
{$endif}
end;
endspill:=not spill_registers(list,headertai);
end;
until endspill;
ibitmap.free;
translate_registers(list);
{$ifdef DEBUG_SPILLCOALESCE}
write_spill_stats;
{$endif DEBUG_SPILLCOALESCE}
{ we need the translation table for debugging info and verbose assembler output,
so not dispose them yet (FK)
}
for i:=0 to High(spillinfo) do
spillinfo[i].interferences.Free;
spillinfo:=nil;
end;
procedure trgobj.add_constraints(reg:Tregister);
begin
end;
procedure trgobj.add_edge(u,v:Tsuperregister);
{This procedure will add an edge to the virtual interference graph.}
procedure addadj(u,v:Tsuperregister);
begin
{$ifdef EXTDEBUG}
if (u>=maxreginfo) then
internalerror(2012101901);
{$endif}
with reginfo[u] do
begin
if adjlist=nil then
new(adjlist,init);
adjlist^.add(v);
if (v nil then
lent:=adjt^.length;
repeat
adji:=reginfo[buf^[i-p]].adjlist;
leni:=0;
if adji<>nil then
leni:=adji^.length;
if leni<=lent then
break;
buf^[i]:=buf^[i-p];
dec(i,p)
until i = 7, we cannot
assign it to any of the registers, thus it is significant.}
for n:=first_imaginary to maxreg-1 do
with reginfo[n] do
begin
if adjlist=nil then
degree:=0
else
degree:=adjlist^.length;
if degree>=usable_registers_cnt then
spillworklist.add(n)
else if move_related(n) then
freezeworklist.add(n)
else if not(ri_coalesced in flags) then
simplifyworklist.add(n);
end;
sort_simplify_worklist;
end;
procedure trgobj.prepare_colouring;
begin
make_work_list;
active_moves:=Tlinkedlist.create;
frozen_moves:=Tlinkedlist.create;
coalesced_moves:=Tlinkedlist.create;
constrained_moves:=Tlinkedlist.create;
selectstack.clear;
end;
procedure trgobj.enable_moves(n:Tsuperregister);
var m:Tlinkedlistitem;
i:cardinal;
begin
with reginfo[n] do
if movelist<>nil then
for i:=0 to movelist^.header.count-1 do
begin
m:=movelist^.data[i];
if Tmoveins(m).moveset=ms_active_moves then
begin
{Move m from the set active_moves to the set worklist_moves.}
active_moves.remove(m);
Tmoveins(m).moveset:=ms_worklist_moves;
worklist_moves.concat(m);
end;
end;
end;
procedure Trgobj.decrement_degree(m:Tsuperregister);
var adj : Psuperregisterworklist;
n : tsuperregister;
d,i : cardinal;
begin
with reginfo[m] do
begin
d:=degree;
if d=0 then
internalerror(200312151);
dec(degree);
if d=usable_registers_cnt then
begin
{Enable moves for m.}
enable_moves(m);
{Enable moves for adjacent.}
adj:=adjlist;
if adj<>nil then
for i:=1 to adj^.length do
begin
n:=adj^.buf^[i-1];
if reginfo[n].flags*[ri_selected,ri_coalesced]<>[] then
enable_moves(n);
end;
{Remove the node from the spillworklist.}
if not spillworklist.delete(m) then
internalerror(200310145);
if move_related(m) then
freezeworklist.add(m)
else
simplifyworklist.add(m);
end;
end;
end;
procedure trgobj.simplify;
var adj : Psuperregisterworklist;
m,n : Tsuperregister;
i : cardinal;
begin
{We take the element with the least interferences out of the
simplifyworklist. Since the simplifyworklist is now sorted, we
no longer need to search, but we can simply take the first element.}
m:=simplifyworklist.get;
{Push it on the selectstack.}
selectstack.add(m);
with reginfo[m] do
begin
include(flags,ri_selected);
adj:=adjlist;
end;
if adj<>nil then
for i:=1 to adj^.length do
begin
n:=adj^.buf^[i-1];
if (n>=first_imaginary) and
(reginfo[n].flags*[ri_selected,ri_coalesced]=[]) then
decrement_degree(n);
end;
end;
function trgobj.get_alias(n:Tsuperregister):Tsuperregister;
begin
if n>=maxreg then
internalerror(2021121201);
while ri_coalesced in reginfo[n].flags do
n:=reginfo[n].alias;
get_alias:=n;
end;
procedure trgobj.add_worklist(u:Tsuperregister);
begin
if (u>=first_imaginary) and
(not move_related(u)) and
(reginfo[u].degree