{ $Id$ Copyright (c) 1998-2002 by Florian Klaempfl This unit implements the base class for the register allocator This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. **************************************************************************** } {$i fpcdefs.inc} { Allow duplicate allocations, can be used to get the .s file written } { $define ALLOWDUPREG} {#****************************************************************************** @abstract(Abstract register allocator unit) Register allocator introduction. Free Pascal uses a Chaitin style register allocator. We use a variant similair to the one described in the book "Modern compiler implementation in C" by Andrew W. Appel., published by Cambridge University Press. The register allocator that is described by Appel uses a much improved way of register coalescing, called "iterated register coalescing". Instead of doing coalescing as a prepass to the register allocation, the coalescing is done inside the register allocator. This has the advantage that the register allocator can coalesce very aggresively without introducing spills. Reading this book is recommended for a complete understanding. Here is a small introduction. The code generator thinks it has an infinite amount of registers. Our processor has a limited amount of registers. Therefore we must reduce the amount of registers until there are less enough to fit into the processors registers. Registers can interfere or not interfere. If two imaginary registers interfere they cannot be placed into the same psysical register. Reduction of registers is done by: - "coalescing" Two registers that do not interfere are combined into one register. - "spilling" A register is changed into a memory location and the generated code is modified to use the memory location instead of the register. Register allocation is a graph colouring problem. Each register is a colour, and if two registers interfere there is a connection between them in the graph. In addition to the imaginary registers in the code generator, the psysical CPU registers are also present in this graph. This allows us to make interferences between imaginary registers and cpu registers. This is very usefull for describing architectural constraints, like for example that the div instruction modifies edx, so variables that are in use at that time cannot be stored into edx. This can be modelled by making edx interfere with those variables. Graph colouring is an NP complete problem. Therefore we use an approximation that pushes registers to colour on to a stack. This is done in the "simplify" procedure. The register allocator first checks which registers are a candidate for coalescing. *******************************************************************************} unit rgobj; interface uses cutils, cpubase, aasmbase,aasmtai,aasmcpu, cclasses,globtype,cgbase,node, {$ifdef delphi} dmisc, {$endif} cpuinfo ; type { regvarother_longintarray = array[tregisterindex] of longint; regvarother_booleanarray = array[tregisterindex] of boolean; regvarint_longintarray = array[first_int_supreg..last_int_supreg] of longint; regvarint_ptreearray = array[first_int_supreg..last_int_supreg] of tnode; } tsuperregisterworklist=object buflength, buflengthinc, length, head, tail : integer; buf : ^tsuperregister; constructor init; destructor done; procedure clear; procedure next(var i:integer); procedure add(s:tsuperregister); function get:tsuperregister; function getlast:tsuperregister; function getidx(i:integer):tsuperregister; procedure deleteidx(i:integer); function delete(s:tsuperregister):boolean; function find(s:tsuperregister):boolean; end; psuperregisterworklist=^tsuperregisterworklist; { The interference bitmap contains of 2 layers: layer 1 - 256*256 blocks with pointers to layer 2 blocks layer 2 - blocks of 32*256 (32 bytes = 256 bits) } Tinterferencebitmap2 = array[byte] of set of byte; Pinterferencebitmap2 = ^Tinterferencebitmap2; Tinterferencebitmap1 = array[byte] of Pinterferencebitmap2; pinterferencebitmap1 = ^tinterferencebitmap1; Tinterferencebitmap=class private maxx1, maxy1 : byte; fbitmap : pinterferencebitmap1; function getbitmap(x,y:tsuperregister):boolean; procedure setbitmap(x,y:tsuperregister;b:boolean); public constructor create; destructor destroy;override; property bitmap[x,y:tsuperregister]:boolean read getbitmap write setbitmap;default; end; Tmovelist=record count:cardinal; data:array[0..$ffff] of Tlinkedlistitem; end; Pmovelist=^Tmovelist; {In the register allocator we keep track of move instructions. These instructions are moved between five linked lists. There is also a linked list per register to keep track about the moves it is associated with. Because we need to determine quickly in which of the five lists it is we add anu enumeradtion to each move instruction.} Tmoveset=(ms_coalesced_moves,ms_constrained_moves,ms_frozen_moves, ms_worklist_moves,ms_active_moves); Tmoveins=class(Tlinkedlistitem) moveset:Tmoveset; { $ifdef ra_debug} x,y:Tsuperregister; { $endif} instruction:Taicpu; end; Treginfo=record alias : Tsuperregister; { The register allocator assigns each register a colour } colour : Tsuperregister; movelist : Pmovelist; adjlist : Psuperregisterworklist; degree : TSuperregister; end; Preginfo=^TReginfo; {#------------------------------------------------------------------ This class implements the abstract register allocator. It is used by the code generator to allocate and free registers which might be valid across nodes. It also contains utility routines related to registers. Some of the methods in this class should be overriden by cpu-specific implementations. --------------------------------------------------------------------} trgobj=class preserved_by_proc : tcpuregisterset; used_in_proc : tcpuregisterset; // is_reg_var : Tsuperregisterset; {old regvars} // reg_var_loaded:Tsuperregisterset; {old regvars} constructor create(Aregtype:Tregistertype; Adefaultsub:Tsubregister; const Ausable:array of tsuperregister; Afirst_imaginary:Tsuperregister; Apreserved_by_proc:Tcpuregisterset); destructor destroy;override; {# Allocate a register. An internalerror will be generated if there is no more free registers which can be allocated.} function getregister(list:Taasmoutput;subreg:Tsubregister):Tregister; procedure add_constraints(reg:Tregister);virtual; {# Get the register specified.} procedure getexplicitregister(list:Taasmoutput;r:Tregister); {# Get multiple registers specified.} procedure allocexplicitregisters(list:Taasmoutput;r:Tcpuregisterset); {# Free multiple registers specified.} procedure deallocexplicitregisters(list:Taasmoutput;r:Tcpuregisterset); function uses_registers:boolean; {# Deallocate any kind of register } procedure ungetregister(list:Taasmoutput;r:Tregister);virtual; {# Do the register allocation.} procedure do_register_allocation(list:Taasmoutput;headertai:tai); { procedure resetusableregisters;virtual;} { procedure makeregvar(reg:Tsuperregister);} {$ifdef EXTDEBUG} procedure writegraph(loopidx:longint); {$endif EXTDEBUG} procedure add_move_instruction(instr:Taicpu); {# Prepare the register colouring.} procedure prepare_colouring; {# Clean up after register colouring.} procedure epilogue_colouring; {# Colour the registers; that is do the register allocation.} procedure colour_registers; {# Spills certain registers in the specified assembler list.} function spill_registers(list:Taasmoutput;headertai:tai):boolean; procedure translate_registers(list:Taasmoutput); {# Adds an interference edge.} procedure add_edge(u,v:Tsuperregister); procedure check_unreleasedregs; unusedregs : Tsuperregisterset; protected regtype : Tregistertype; { default subregister used } defaultsub : tsubregister; {# First imaginary register.} first_imaginary : Tsuperregister; {# Highest register allocated until now.} reginfo : PReginfo; maxreginfo, maxreginfoinc, maxreg : Tsuperregister; usable_registers_cnt : integer; usable_registers : array[0..maxcpuregister-1] of tsuperregister; ibitmap : Tinterferencebitmap; spillednodes, simplifyworklist, freezeworklist, spillworklist, coalescednodes, selectstack : tsuperregisterworklist; worklist_moves, active_moves, frozen_moves, coalesced_moves, constrained_moves : Tlinkedlist; function getnewreg:tsuperregister; procedure getregisterinline(list:Taasmoutput;position:Tai;subreg:Tsubregister;var result:Tregister); procedure ungetregisterinline(list:Taasmoutput;position:Tai;r:Tregister); procedure add_edges_used(u:Tsuperregister); procedure add_to_movelist(u:Tsuperregister;data:Tlinkedlistitem); function move_related(n:Tsuperregister):boolean; procedure make_work_list; procedure enable_moves(n:Tsuperregister); procedure decrement_degree(m:Tsuperregister); procedure simplify; function get_alias(n:Tsuperregister):Tsuperregister; procedure add_worklist(u:Tsuperregister); function adjacent_ok(u,v:Tsuperregister):boolean; function conservative(u,v:Tsuperregister):boolean; procedure combine(u,v:Tsuperregister); procedure coalesce; procedure freeze_moves(u:Tsuperregister); procedure freeze; procedure select_spill; procedure assign_colours; procedure clear_interferences(u:Tsuperregister); end; const first_reg = 0; last_reg = high(tsuperregister)-1; maxspillingcounter = 20; implementation uses systems, globals,verbose,tgobj,procinfo; {****************************************************************************** tsuperregisterworklist ******************************************************************************} constructor tsuperregisterworklist.init; begin length:=0; buflength:=0; buflengthinc:=16; head:=0; tail:=0; buf:=nil; end; destructor tsuperregisterworklist.done; begin if assigned(buf) then freemem(buf); end; procedure tsuperregisterworklist.add(s:tsuperregister); var oldbuflength : integer; newbuf : ^tsuperregister; begin inc(length); { Need to increase buffer length? } if length>=buflength then begin oldbuflength:=buflength; inc(buflength,buflengthinc); buflengthinc:=buflengthinc*2; if buflengthinc>256 then buflengthinc:=256; { We need to allocate a new block and move data around when the tail is wrapped around } if tail
=buflength then tail:=0; end; procedure tsuperregisterworklist.clear; begin length:=0; tail:=0; head:=0; end; procedure tsuperregisterworklist.next(var i:integer); begin inc(i); if i>=buflength then i:=0; end; function tsuperregisterworklist.getidx(i:integer):tsuperregister; begin result:=buf[i]; end; procedure tsuperregisterworklist.deleteidx(i:integer); begin if length=0 then internalerror(200310144); buf[i]:=buf[head]; inc(head); if head>=buflength then head:=0; dec(length); end; function tsuperregisterworklist.get:tsuperregister; begin if length=0 then internalerror(200310142); result:=buf[head]; inc(head); if head>=buflength then head:=0; dec(length); end; function tsuperregisterworklist.getlast:tsuperregister; begin if length=0 then internalerror(200310143); dec(tail); if tail<0 then tail:=buflength-1; result:=buf[tail]; dec(length); end; function tsuperregisterworklist.delete(s:tsuperregister):boolean; var i : integer; begin result:=false; i:=head; while (i<>tail) do begin if buf[i]=s then begin deleteidx(i); result:=true; exit; end; inc(i); if i>=buflength then i:=0; end; end; function tsuperregisterworklist.find(s:tsuperregister):boolean; var i : integer; begin result:=false; i:=head; while (i<>tail) do begin if buf[i]=s then begin result:=true; exit; end; inc(i); if i>=buflength then i:=0; end; end; {****************************************************************************** tinterferencebitmap ******************************************************************************} constructor tinterferencebitmap.create; begin inherited create; maxx1:=1; getmem(fbitmap,sizeof(tinterferencebitmap1)*2); fillchar(fbitmap^,sizeof(tinterferencebitmap1)*2,0); end; destructor tinterferencebitmap.destroy; var i,j : byte; begin if assigned(fbitmap) then begin for i:=0 to maxx1 do for j:=0 to maxy1 do if assigned(fbitmap[i,j]) then dispose(fbitmap[i,j]); freemem(fbitmap); end; end; function tinterferencebitmap.getbitmap(x,y:tsuperregister):boolean; var page : pinterferencebitmap2; begin result:=false; if (x shr 8>maxx1) then exit; page:=fbitmap[x shr 8,y shr 8]; result:=assigned(page) and ((x and $ff) in page^[y and $ff]); end; procedure tinterferencebitmap.setbitmap(x,y:tsuperregister;b:boolean); var x1,y1 : byte; begin x1:=x shr 8; y1:=y shr 8; if x1>maxx1 then begin reallocmem(fbitmap,sizeof(tinterferencebitmap1)*(x1+1)); fillchar(fbitmap[maxx1+1],sizeof(tinterferencebitmap1)*(x1-maxx1),0); maxx1:=x1; end; if not assigned(fbitmap[x1,y1]) then begin if y1>maxy1 then maxy1:=y1; new(fbitmap[x1,y1]); fillchar(fbitmap[x1,y1]^,sizeof(tinterferencebitmap2),0); end; if b then include(fbitmap[x1,y1]^[y and $ff],(x and $ff)) else exclude(fbitmap[x1,y1]^[y and $ff],(x and $ff)); end; {****************************************************************************** trgobj ******************************************************************************} constructor trgobj.create(Aregtype:Tregistertype; Adefaultsub:Tsubregister; const Ausable:array of tsuperregister; Afirst_imaginary:Tsuperregister; Apreserved_by_proc:Tcpuregisterset); var i : Tsuperregister; begin { empty super register sets can cause very strange problems } if high(Ausable)=0 then internalerror(200210181); first_imaginary:=Afirst_imaginary; maxreg:=Afirst_imaginary; regtype:=Aregtype; defaultsub:=Adefaultsub; preserved_by_proc:=Apreserved_by_proc; used_in_proc:=[]; supregset_reset(unusedregs,true); { RS_INVALID can't be used } supregset_exclude(unusedregs,RS_INVALID); ibitmap:=tinterferencebitmap.create; { Get reginfo for CPU registers } reginfo:=allocmem(first_imaginary*sizeof(treginfo)); maxreginfo:=first_imaginary; maxreginfoinc:=16; for i:=0 to first_imaginary-1 do reginfo[i].degree:=high(tsuperregister); worklist_moves:=Tlinkedlist.create; { Usable registers } fillchar(usable_registers,sizeof(usable_registers),0); for i:=low(Ausable) to high(Ausable) do usable_registers[i]:=Ausable[i]; usable_registers_cnt:=high(Ausable)+1; { Initialize Worklists } spillednodes.init; simplifyworklist.init; freezeworklist.init; spillworklist.init; coalescednodes.init; selectstack.init; end; destructor trgobj.destroy; var i:Tsuperregister; begin spillednodes.done; simplifyworklist.done; freezeworklist.done; spillworklist.done; coalescednodes.done; selectstack.done; for i:=0 to maxreg-1 do begin if reginfo[i].adjlist<>nil then dispose(reginfo[i].adjlist,done); if reginfo[i].movelist<>nil then dispose(reginfo[i].movelist); end; freemem(reginfo); worklist_moves.free; ibitmap.free; end; function trgobj.getnewreg:tsuperregister; var oldmaxreginfo : tsuperregister; begin result:=maxreg; inc(maxreg); if maxreg>=last_reg then internalerror(200310146); if maxreg>=maxreginfo then begin oldmaxreginfo:=maxreginfo; inc(maxreginfo,maxreginfoinc); if maxreginfoinc<256 then maxreginfoinc:=maxreginfoinc*2; reallocmem(reginfo,maxreginfo*sizeof(treginfo)); { Do we really need it to clear it ? At least for 1.0.x (PFV) } fillchar(reginfo[oldmaxreginfo],(maxreginfo-oldmaxreginfo)*sizeof(treginfo),0); end; end; function trgobj.getregister(list:Taasmoutput;subreg:Tsubregister):Tregister; var p:Tsuperregister; r:Tregister; begin p:=getnewreg; supregset_exclude(unusedregs,p); r:=newreg(regtype,p,subreg); list.concat(Tai_regalloc.alloc(r)); add_edges_used(p); add_constraints(r); result:=r; end; function trgobj.uses_registers:boolean; begin result:=(maxreg>first_imaginary); end; procedure trgobj.ungetregister(list:Taasmoutput;r:Tregister); var supreg:Tsuperregister; begin supreg:=getsupreg(r); if not supregset_in(unusedregs,supreg) then begin supregset_include(unusedregs,supreg); list.concat(Tai_regalloc.dealloc(r)); add_edges_used(supreg); add_constraints(r); end; end; procedure trgobj.getexplicitregister(list:Taasmoutput;r:Tregister); var supreg:Tsuperregister; begin supreg:=getsupreg(r); if supregset_in(unusedregs,supreg) then begin supregset_exclude(unusedregs,supreg); if supreg