From 00cb8f57259d9f8858dc084e933d4c6755b42de2 Mon Sep 17 00:00:00 2001 From: Jonas Maebe Date: Sat, 20 Aug 2011 07:38:11 +0000 Subject: [PATCH] * made maxregs related information protected instead of private in trgobj, as well as insert_regalloc_info_all() + "register" allocator for the jvm target, which simply replaces every virtual register with a temp. This is done for all register types in one pass, so that the temps can be easily reused without worrying about conflicts. Small optimisation: alloc/store/dealloc/load sequences for a single reg are removed (many of these are generated because most cg nodes return their value in a register which is then immediately consumed by the parent) * map addressregisters to integer registers with size R_SUBD, because they require one stackslot (the subregister type is used by rgcpu to determine the size of the temp it has to allocate) git-svn-id: branches/jvmbackend@18316 - --- .gitattributes | 1 + compiler/jvm/cgcpu.pas | 36 +++++- compiler/jvm/rgcpu.pas | 285 +++++++++++++++++++++++++++++++++++++++++ compiler/rgobj.pas | 9 +- 4 files changed, 325 insertions(+), 6 deletions(-) create mode 100644 compiler/jvm/rgcpu.pas diff --git a/.gitattributes b/.gitattributes index 15d557cd3a..fe2f1d7537 100644 --- a/.gitattributes +++ b/.gitattributes @@ -218,6 +218,7 @@ compiler/jvm/cputarg.pas svneol=native#text/plain compiler/jvm/hlcgcpu.pas svneol=native#text/plain compiler/jvm/itcpujas.pas svneol=native#text/plain compiler/jvm/jvmreg.dat svneol=native#text/plain +compiler/jvm/rgcpu.pas svneol=native#text/plain compiler/jvm/rjvmcon.inc svneol=native#text/plain compiler/jvm/rjvmnor.inc svneol=native#text/plain compiler/jvm/rjvmnum.inc svneol=native#text/plain diff --git a/compiler/jvm/cgcpu.pas b/compiler/jvm/cgcpu.pas index 86fa073a13..2e96c65133 100644 --- a/compiler/jvm/cgcpu.pas +++ b/compiler/jvm/cgcpu.pas @@ -38,7 +38,10 @@ interface public procedure init_register_allocators;override; procedure done_register_allocators;override; - function getfpuregister(list:TAsmList;size:Tcgsize):Tregister;override; + function getintregister(list:TAsmList;size:Tcgsize):Tregister;override; + function getfpuregister(list:TAsmList;size:Tcgsize):Tregister;override; + function getaddressregister(list:TAsmList):Tregister;override; + procedure do_register_allocation(list:TAsmList;headertai:tai);override; end; procedure create_codegen; @@ -66,7 +69,7 @@ implementation rg[R_INTREGISTER]:=Trgcpu.create(R_INTREGISTER,R_SUBQ, [RS_R0],first_int_imreg,[]); {$endif not cpu64bitaddr} - rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBFD, + rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBFS, [RS_R0],first_fpu_imreg,[]); rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBNONE, [RS_R0],first_mm_imreg,[]); @@ -82,6 +85,15 @@ implementation end; + function tcgjvm.getintregister(list:TAsmList;size:Tcgsize):Tregister; + begin + if not(size in [OS_64,OS_S64]) then + result:=rg[R_INTREGISTER].getregister(list,R_SUBD) + else + result:=rg[R_INTREGISTER].getregister(list,R_SUBQ); + end; + + function tcgjvm.getfpuregister(list:TAsmList;size:Tcgsize):Tregister; begin if size=OS_F64 then @@ -91,6 +103,26 @@ implementation end; + function tcgjvm.getaddressregister(list:TAsmList):Tregister; + begin + { avoid problems in the compiler where int and addr registers are + mixed for now; we currently don't have to differentiate between the + two as far as the jvm backend is concerned } + result:=rg[R_INTREGISTER].getregister(list,R_SUBD) + end; + + + procedure tcgjvm.do_register_allocation(list:TAsmList;headertai:tai); + var + rt : tregistertype; + begin + { We only run the "register allocation" once for an arbitrary allocator, + which will perform the register->temp mapping for all register types. + This allows us to easily reuse temps. } + trgcpu(rg[R_INTREGISTER]).do_all_register_allocation(list,headertai); + end; + + procedure create_codegen; begin cg:=tcgjvm.Create; diff --git a/compiler/jvm/rgcpu.pas b/compiler/jvm/rgcpu.pas new file mode 100644 index 0000000000..9c35659753 --- /dev/null +++ b/compiler/jvm/rgcpu.pas @@ -0,0 +1,285 @@ +{ + Copyright (c) 2010 by Jonas Maebe + + This unit implements the JVM specific class for the register + allocator + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + ****************************************************************************} +unit rgcpu; + +{$i fpcdefs.inc} + + interface + + uses + aasmbase,aasmcpu,aasmtai,aasmdata, + cgbase,cgutils, + cpubase, + rgobj; + + type + tspilltemps = array[tregistertype] of ^Tspill_temp_list; + + { trgcpu } + + trgcpu=class(trgobj) + protected + class function do_spill_replace_all(list:TAsmList;instr:taicpu;const spilltemps: tspilltemps):boolean; + class procedure remove_dummy_load_stores(list: TAsmList; headertai: tai); + public + { performs the register allocation for *all* register types } + class procedure do_all_register_allocation(list: TAsmList; headertai: tai); + end; + + +implementation + + uses + verbose,cutils, + globtype,globals, + cgobj, + tgobj; + + { trgcpu } + + class function trgcpu.do_spill_replace_all(list:TAsmList;instr:taicpu;const spilltemps: tspilltemps):boolean; + var + l: longint; + reg: tregister; + begin + { jvm instructions never have more than one memory (virtual register) + operand, so there is no danger of superregister conflicts } + for l:=0 to instr.ops-1 do + if instr.oper[l]^.typ=top_reg then + begin + reg:=instr.oper[l]^.reg; + instr.loadref(l,spilltemps[getregtype(reg)]^[getsupreg(reg)]); + end; + end; + + + class procedure trgcpu.remove_dummy_load_stores(list: TAsmList; headertai: tai); + + function issimpleregstore(p: tai; reg: tregister; doubleprecisionok: boolean): boolean; + const + simplestoressp = [a_astore,a_fstore,a_istore]; + simplestoresdp = [a_dstore,a_lstore]; + begin + result:= + assigned(p) and + (p.typ=ait_instruction) and + ((taicpu(p).opcode in simplestoressp) or + (doubleprecisionok and + (taicpu(p).opcode in simplestoresdp))) and + ((reg=NR_NO) or + (taicpu(p).oper[0]^.typ=top_reg) and + (taicpu(p).oper[0]^.reg=reg)); + end; + + function issimpleregload(p: tai; reg: tregister; doubleprecisionok: boolean): boolean; + const + simpleloadssp = [a_aload,a_fload,a_iload]; + simpleloadsdp = [a_dload,a_lload]; + begin + result:= + assigned(p) and + (p.typ=ait_instruction) and + ((taicpu(p).opcode in simpleloadssp) or + (doubleprecisionok and + (taicpu(p).opcode in simpleloadsdp))) and + ((reg=NR_NO) or + (taicpu(p).oper[0]^.typ=top_reg) and + (taicpu(p).oper[0]^.reg=reg)); + end; + + + function try_remove_alloc_store_dealloc_load(var p: tai; reg: tregister): boolean; + var + q: tai; + begin + result:=false; + { check for: + alloc regx + store regx + dealloc regx + load regx + and remove. We don't have to check that the load/store + types match, because they have to for this to be + valid JVM code } + if issimpleregstore(tai(p.next),reg,true) and + assigned(p.next.next) and + (tai(p.next.next).typ=ait_regalloc) and + (tai_regalloc(p.next.next).ratype=ra_dealloc) and + (tai_regalloc(p.next.next).reg=reg) and + issimpleregload(tai(p.next.next.next),reg,true) then + begin + { remove the whole sequence: the allocation } + q:=Tai(p.next); + list.remove(p); + p.free; + p:=q; + { the store } + q:=Tai(p.next); + list.remove(p); + p.free; + p:=q; + { the dealloc } + q:=Tai(p.next); + list.remove(p); + p.free; + p:=q; + { the load } + q:=Tai(p.next); + list.remove(p); + p.free; + p:=q; + result:=true; + end; + end; + + + var + p: tai; + reg: tregister; + removedsomething: boolean; + begin + repeat + removedsomething:=false; + p:=headertai; + while assigned(p) do + begin + case p.typ of + ait_regalloc: + begin + if (tai_regalloc(p).ratype=ra_alloc) then + begin + reg:=tai_regalloc(p).reg; + if try_remove_alloc_store_dealloc_load(p,reg) then + begin + removedsomething:=true; + continue; + end; + { todo in peephole optimizer: + alloc regx // not double precision + store regx // not double precision + load regy or memy + dealloc regx + load regx + -> change into + load regy or memy + swap // can only handle single precision + + and then + swap + + -> remove swap + } + end; + end; + end; + p:=tai(p.next); + end; + until not removedsomething; + end; + + + class procedure trgcpu.do_all_register_allocation(list: TAsmList; headertai: tai); + var + spill_temps : tspilltemps; + templist : TAsmList; + intrg, + fprg : trgcpu; + p,q : tai; + size : longint; + begin + { Since there are no actual registers, we simply spill everything. We + use tt_regallocator temps, which are not used by the temp allocator + during code generation, so that we cannot accidentally overwrite + any temporary values } + + { get references to all register allocators } + intrg:=trgcpu(cg.rg[R_INTREGISTER]); + fprg:=trgcpu(cg.rg[R_FPUREGISTER]); + { determine the live ranges of all registers } + intrg.insert_regalloc_info_all(list); + fprg.insert_regalloc_info_all(list); + { Don't do the actual allocation when -sr is passed } + if (cs_no_regalloc in current_settings.globalswitches) then + exit; + { remove some simple useless store/load sequences } + remove_dummy_load_stores(list,headertai); + { allocate room to store the virtual register -> temp mapping } + spill_temps[R_INTREGISTER]:=allocmem(sizeof(treference)*intrg.maxreg); + spill_temps[R_FPUREGISTER]:=allocmem(sizeof(treference)*fprg.maxreg); + { List to insert temp allocations into } + templist:=TAsmList.create; + { allocate/replace all registers } + p:=headertai; + while assigned(p) do + begin + case p.typ of + ait_regalloc: + with Tai_regalloc(p) do + begin + case getregtype(reg) of + R_INTREGISTER: + if getsubreg(reg)=R_SUBD then + size:=4 + else + size:=8; + R_ADDRESSREGISTER: + size:=4; + R_FPUREGISTER: + if getsubreg(reg)=R_SUBFS then + size:=4 + else + size:=8; + else + internalerror(2010122912); + end; + case ratype of + ra_alloc : + tg.gettemp(templist, + size,1, + tt_regallocator,spill_temps[getregtype(reg)]^[getsupreg(reg)]); + ra_dealloc : + begin + tg.ungettemp(templist,spill_temps[getregtype(reg)]^[getsupreg(reg)]); + { don't invalidate the temp reference, may still be used one instruction + later } + end; + end; + { insert the tempallocation/free at the right place } + list.insertlistbefore(p,templist); + { remove the register allocation info for the register + (p.previous is valid because we just inserted the temp + allocation/free before p) } + q:=Tai(p.previous); + list.remove(p); + p.free; + p:=q; + end; + ait_instruction: + do_spill_replace_all(list,taicpu(p),spill_temps); + end; + p:=Tai(p.next); + end; + freemem(spill_temps[R_INTREGISTER]); + freemem(spill_temps[R_FPUREGISTER]); + end; + +end. diff --git a/compiler/rgobj.pas b/compiler/rgobj.pas index c045efa87d..e8f7e8aa8a 100644 --- a/compiler/rgobj.pas +++ b/compiler/rgobj.pas @@ -160,6 +160,10 @@ unit rgobj; { translates a single given imaginary register to it's real register } procedure translate_register(var reg : tregister); protected + maxreginfo, + maxreginfoinc, + maxreg : Tsuperregister; + regtype : Tregistertype; { default subregister used } defaultsub : tsubregister; @@ -179,15 +183,13 @@ unit rgobj; instr:taicpu; const r:Tsuperregisterset; const spilltemplist:Tspill_temp_list): boolean;virtual; + procedure insert_regalloc_info_all(list:TAsmList); private int_live_range_direction: TRADirection; {# First imaginary register.} first_imaginary : Tsuperregister; {# Highest register allocated until now.} reginfo : PReginfo; - maxreginfo, - maxreginfoinc, - maxreg : Tsuperregister; usable_registers_cnt : word; usable_registers : array[0..maxcpuregister-1] of tsuperregister; ibitmap : Tinterferencebitmap; @@ -217,7 +219,6 @@ unit rgobj; {# Colour the registers; that is do the register allocation.} procedure colour_registers; procedure insert_regalloc_info(list:TAsmList;u:tsuperregister); - procedure insert_regalloc_info_all(list:TAsmList); procedure generate_interference_graph(list:TAsmList;headertai:tai); { translates the registers in the given assembler list } procedure translate_registers(list:TAsmList);