From eedc447400fec2c6a8301077c8c731b19c2c5fa4 Mon Sep 17 00:00:00 2001 From: daniel Date: Sat, 8 Mar 2003 20:36:41 +0000 Subject: [PATCH] + Added newra version of Ti386shlshrnode + Added interference graph construction code --- compiler/i386/n386add.pas | 10 +- compiler/i386/n386mat.pas | 359 ++++++++++++++++++++++++++++---------- compiler/rgobj.pas | 150 ++++++++++++++-- 3 files changed, 405 insertions(+), 114 deletions(-) diff --git a/compiler/i386/n386add.pas b/compiler/i386/n386add.pas index c4798e7d4d..3b84724020 100644 --- a/compiler/i386/n386add.pas +++ b/compiler/i386/n386add.pas @@ -1029,9 +1029,13 @@ interface { right.location<>LOC_REGISTER } if (nodetype=subn) and (nf_swaped in flags) then begin +{$ifdef newra} + r:=rg.getregisterint(exprasmlist,OS_INT); +{$else} rg.getexplicitregisterint(exprasmlist,NR_EDI); r.enum:=R_INTREGISTER; r.number:=NR_EDI; +{$endif} cg64.a_load64low_loc_reg(exprasmlist,right.location,r); emit_reg_reg(op1,opsize,left.location.registerlow,r); emit_reg_reg(A_MOV,opsize,r,left.location.registerlow); @@ -1636,7 +1640,11 @@ begin end. { $Log$ - Revision 1.57 2003-03-08 13:59:17 daniel + Revision 1.58 2003-03-08 20:36:41 daniel + + Added newra version of Ti386shlshrnode + + Added interference graph construction code + + Revision 1.57 2003/03/08 13:59:17 daniel * Work to handle new register notation in ag386nsm + Added newra version of Ti386moddivnode diff --git a/compiler/i386/n386mat.pas b/compiler/i386/n386mat.pas index e18c08b38d..987b0b6c7b 100644 --- a/compiler/i386/n386mat.pas +++ b/compiler/i386/n386mat.pas @@ -70,6 +70,7 @@ implementation var r,r2,hreg1,hreg2:Tregister; power:longint; hl:Tasmlabel; + op:Tasmop; pushedregs:Tmaybesave; begin @@ -81,111 +82,105 @@ implementation maybe_restore(exprasmlist,left.location,pushedregs); if codegenerror then exit; - location_copy(location,left.location); if is_64bitint(resulttype.def) then + { should be handled in pass_1 (JM) } + internalerror(200109052); + { put numerator in register } + location_reset(location,LOC_REGISTER,OS_INT); + location_force_reg(exprasmlist,left.location,OS_INT,false); + hreg1:=left.location.register; + + if (nodetype=divn) and (right.nodetype=ordconstn) and + ispowerof2(tordconstnode(right).value,power) then begin - { should be handled in pass_1 (JM) } - internalerror(200109052); + { for signed numbers, the numerator must be adjusted before the + shift instruction, but not wih unsigned numbers! Otherwise, + "Cardinal($ffffffff) div 16" overflows! (JM) } + if is_signed(left.resulttype.def) Then + begin + if (aktOptProcessor <> class386) and + not(cs_littlesize in aktglobalswitches) then + { use a sequence without jumps, saw this in + comp.compilers (JM) } + begin + { no jumps, but more operations } + hreg2:=rg.getregisterint(exprasmlist,OS_INT); + emit_reg_reg(A_MOV,S_L,hreg1,hreg2); + {If the left value is signed, hreg2=$ffffffff, otherwise 0.} + emit_const_reg(A_SAR,S_L,31,hreg2); + {If signed, hreg2=right value-1, otherwise 0.} + emit_const_reg(A_AND,S_L,tordconstnode(right).value-1,hreg2); + { add to the left value } + emit_reg_reg(A_ADD,S_L,hreg2,hreg1); + { release EDX if we used it } + rg.ungetregisterint(exprasmlist,hreg2); + { do the shift } + emit_const_reg(A_SAR,S_L,power,hreg1); + end + else + begin + { a jump, but less operations } + emit_reg_reg(A_TEST,S_L,hreg1,hreg1); + objectlibrary.getlabel(hl); + emitjmp(C_NS,hl); + if power=1 then + emit_reg(A_INC,S_L,hreg1) + else + emit_const_reg(A_ADD,S_L,tordconstnode(right).value-1,hreg1); + cg.a_label(exprasmlist,hl); + emit_const_reg(A_SAR,S_L,power,hreg1); + end + end + else + emit_const_reg(A_SHR,S_L,power,hreg1); + location.register:=hreg1; end else begin - { put numerator in register } - location_reset(location,LOC_REGISTER,OS_INT); - location_force_reg(exprasmlist,left.location,OS_INT,false); - hreg1:=left.location.register; + {Bring denominator to a register.} + rg.getexplicitregisterint(exprasmlist,NR_EAX); + r.enum:=R_INTREGISTER; + r.number:=NR_EAX; + r2.enum:=R_INTREGISTER; + r2.number:=NR_EDX; + emit_reg_reg(A_MOV,S_L,hreg1,r); + rg.ungetregisterint(exprasmlist,hreg1); + rg.getexplicitregisterint(exprasmlist,NR_EDX); + {Sign extension depends on the left type.} + if torddef(left.resulttype.def).typ=u32bit then + emit_reg_reg(A_XOR,S_L,r2,r2) + else + emit_none(A_CDQ,S_NO); - if (nodetype=divn) and (right.nodetype=ordconstn) and - ispowerof2(tordconstnode(right).value,power) then + {Division depends on the right type.} + if torddef(right.resulttype.def).typ=u32bit then + op:=A_DIV + else + op:=A_IDIV; + + if right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then + emit_ref(op,S_L,right.location.reference) + else + emit_reg(op,S_L,right.location.register); + location_release(exprasmlist,right.location); + + {Copy the result into a new register. Release EAX & EDX.} + if nodetype=divn then begin - { for signed numbers, the numerator must be adjusted before the - shift instruction, but not wih unsigned numbers! Otherwise, - "Cardinal($ffffffff) div 16" overflows! (JM) } - if is_signed(left.resulttype.def) Then - begin - if (aktOptProcessor <> class386) and - not(CS_LittleSize in aktglobalswitches) then - { use a sequence without jumps, saw this in - comp.compilers (JM) } - begin - { no jumps, but more operations } - hreg2:=rg.getregisterint(exprasmlist,OS_INT); - emit_reg_reg(A_MOV,S_L,hreg1,hreg2); - { if the left value is signed, hreg2 := $ffffffff, - otherwise 0 } - emit_const_reg(A_SAR,S_L,31,hreg2); - { if signed, hreg2 := right value-1, otherwise 0 } - emit_const_reg(A_AND,S_L,tordconstnode(right).value-1,hreg2); - { add to the left value } - emit_reg_reg(A_ADD,S_L,hreg2,hreg1); - { release EDX if we used it } - { also releas EDI } - rg.ungetregisterint(exprasmlist,hreg2); - { do the shift } - emit_const_reg(A_SAR,S_L,power,hreg1); - end - else - begin - { a jump, but less operations } - emit_reg_reg(A_TEST,S_L,hreg1,hreg1); - objectlibrary.getlabel(hl); - emitjmp(C_NS,hl); - if power=1 then - emit_reg(A_INC,S_L,hreg1) - else - emit_const_reg(A_ADD,S_L,tordconstnode(right).value-1,hreg1); - cg.a_label(exprasmlist,hl); - emit_const_reg(A_SAR,S_L,power,hreg1); - end - end - else - emit_const_reg(A_SHR,S_L,power,hreg1); - location.register:=hreg1; + rg.ungetregisterint(exprasmlist,r2); + location.register:=rg.getregisterint(exprasmlist,OS_INT); + emit_reg_reg(A_MOV,S_L,r,location.register); + rg.ungetregisterint(exprasmlist,r); end else begin - {Bring denominator to a register.} - hreg2:=rg.getregisterint(exprasmlist,OS_INT); - if right.location.loc<>LOC_CREGISTER then - location_release(exprasmlist,right.location); - cg.a_load_loc_reg(exprasmlist,right.location,hreg2); - rg.getexplicitregisterint(exprasmlist,NR_EAX); - rg.getexplicitregisterint(exprasmlist,NR_EDX); - r.enum:=R_INTREGISTER; - r.number:=NR_EAX; - r2.enum:=R_INTREGISTER; - r2.number:=NR_EDX; - emit_reg_reg(A_MOV,S_L,hreg1,r); - rg.ungetregisterint(exprasmlist,hreg1); - {Sign extension depends on the left type.} - if torddef(left.resulttype.def).typ=u32bit then - emit_reg_reg(A_XOR,S_L,r2,r2) - else - emit_none(A_CDQ,S_NO); - - {Division depends on the right type.} - if torddef(right.resulttype.def).typ=u32bit then - emit_reg(A_DIV,S_L,hreg2) - else - emit_reg(A_IDIV,S_L,hreg2); - - rg.ungetregisterint(exprasmlist,hreg2); - if nodetype=divn then - begin - rg.ungetregisterint(exprasmlist,r2); - location.register:=rg.getregisterint(exprasmlist,OS_INT); - emit_reg_reg(A_MOV,S_L,r,location.register); - rg.ungetregisterint(exprasmlist,r); - end - else - begin - rg.ungetregisterint(exprasmlist,r); - location.register:=rg.getregisterint(exprasmlist,OS_INT); - emit_reg_reg(A_MOV,S_L,r2,location.register); - rg.ungetregisterint(exprasmlist,r2); - end; + rg.ungetregisterint(exprasmlist,r); + location.register:=rg.getregisterint(exprasmlist,OS_INT); + emit_reg_reg(A_MOV,S_L,r2,location.register); + rg.ungetregisterint(exprasmlist,r2); end; - end; + end; end; {$else} procedure ti386moddivnode.pass_2; @@ -414,10 +409,175 @@ implementation function ti386shlshrnode.first_shlshr64bitint: tnode; - begin - result := nil; - end; + begin + result := nil; + end; + +{$ifdef newra} + procedure ti386shlshrnode.pass_2; + + var hregister2,hregisterhigh,hregisterlow:Tregister; + r,r2:Tregister; + op:Tasmop; + l1,l2,l3:Tasmlabel; + pushedregs:Tmaybesave; + + begin + secondpass(left); + maybe_save(exprasmlist,right.registers32,left.location,pushedregs); + secondpass(right); + maybe_restore(exprasmlist,left.location,pushedregs); + + { determine operator } + if nodetype=shln then + op:=A_SHL + else + op:=A_SHR; + + if is_64bitint(left.resulttype.def) then + begin + location_reset(location,LOC_REGISTER,OS_64); + + { load left operator in a register } + location_force_reg(exprasmlist,left.location,OS_64,false); + hregisterhigh:=left.location.registerhigh; + hregisterlow:=left.location.registerlow; + if hregisterhigh.enum<>R_INTREGISTER then + internalerror(200302056); + if hregisterlow.enum<>R_INTREGISTER then + internalerror(200302056); + + { shifting by a constant directly coded: } + if (right.nodetype=ordconstn) then + begin + { shrd/shl works only for values <=31 !! } + if Tordconstnode(right).value>31 then + begin + if nodetype=shln then + begin + emit_reg_reg(A_XOR,S_L,hregisterhigh,hregisterhigh); + if ((tordconstnode(right).value and 31) <> 0) then + emit_const_reg(A_SHL,S_L,tordconstnode(right).value and 31, + hregisterlow); + end + else + begin + emit_reg_reg(A_XOR,S_L,hregisterlow,hregisterlow); + if ((tordconstnode(right).value and 31) <> 0) then + emit_const_reg(A_SHR,S_L,tordconstnode(right).value and 31, + hregisterhigh); + end; + location.registerhigh:=hregisterlow; + location.registerlow:=hregisterhigh; + end + else + begin + if nodetype=shln then + begin + emit_const_reg_reg(A_SHLD,S_L,tordconstnode(right).value and 31, + hregisterlow,hregisterhigh); + emit_const_reg(A_SHL,S_L,tordconstnode(right).value and 31, + hregisterlow); + end + else + begin + emit_const_reg_reg(A_SHRD,S_L,tordconstnode(right).value and 31, + hregisterhigh,hregisterlow); + emit_const_reg(A_SHR,S_L,tordconstnode(right).value and 31, + hregisterhigh); + end; + location.registerlow:=hregisterlow; + location.registerhigh:=hregisterhigh; + end; + end + else + begin + { load right operators in a register } + rg.getexplicitregisterint(exprasmlist,NR_ECX); + hregister2.enum:=R_INTREGISTER; + hregister2.number:=NR_ECX; + cg.a_load_loc_reg(exprasmlist,right.location,hregister2); + if right.location.loc<>LOC_CREGISTER then + location_release(exprasmlist,right.location); + + { left operator is already in a register } + { hence are both in a register } + { is it in the case ECX ? } + r.enum:=R_INTREGISTER; + r.number:=NR_ECX; + r2.enum:=R_INTREGISTER; + r2.number:=NR_CL; + + { the damned shift instructions work only til a count of 32 } + { so we've to do some tricks here } + objectlibrary.getlabel(l1); + objectlibrary.getlabel(l2); + objectlibrary.getlabel(l3); + emit_const_reg(A_CMP,S_L,64,hregister2); + emitjmp(C_L,l1); + emit_reg_reg(A_XOR,S_L,hregisterlow,hregisterlow); + emit_reg_reg(A_XOR,S_L,hregisterhigh,hregisterhigh); + cg.a_jmp_always(exprasmlist,l3); + cg.a_label(exprasmlist,l1); + emit_const_reg(A_CMP,S_L,32,hregister2); + emitjmp(C_L,l2); + emit_const_reg(A_SUB,S_L,32,hregister2); + if nodetype=shln then + begin + emit_reg_reg(A_SHL,S_L,r2,hregisterlow); + emit_reg_reg(A_MOV,S_L,hregisterlow,hregisterhigh); + emit_reg_reg(A_XOR,S_L,hregisterlow,hregisterlow); + cg.a_jmp_always(exprasmlist,l3); + cg.a_label(exprasmlist,l2); + emit_reg_reg_reg(A_SHLD,S_L,r2,hregisterlow,hregisterhigh); + emit_reg_reg(A_SHL,S_L,r2,hregisterlow); + end + else + begin + emit_reg_reg(A_SHR,S_L,r2,hregisterhigh); + emit_reg_reg(A_MOV,S_L,hregisterhigh,hregisterlow); + emit_reg_reg(A_XOR,S_L,hregisterhigh,hregisterhigh); + cg.a_jmp_always(exprasmlist,l3); + cg.a_label(exprasmlist,l2); + emit_reg_reg_reg(A_SHRD,S_L,r2,hregisterhigh,hregisterlow); + emit_reg_reg(A_SHR,S_L,r2,hregisterhigh); + end; + cg.a_label(exprasmlist,l3); + + rg.ungetregisterint(exprasmlist,hregister2); + location.registerlow:=hregisterlow; + location.registerhigh:=hregisterhigh; + end; + end + else + begin + { load left operators in a register } + location_copy(location,left.location); + location_force_reg(exprasmlist,location,OS_INT,false); + + r2.enum:=R_INTREGISTER; + r2.number:=NR_CL; + + { shifting by a constant directly coded: } + if (right.nodetype=ordconstn) then + { l shl 32 should 0 imho, but neither TP nor Delphi do it in this way (FK)} + emit_const_reg(op,S_L,tordconstnode(right).value and 31,location.register) + else + begin + { load right operators in a ECX } + if right.location.loc<>LOC_CREGISTER then + location_release(exprasmlist,right.location); + hregister2:=rg.getexplicitregisterint(exprasmlist,NR_ECX); + cg.a_load_loc_reg(exprasmlist,right.location,hregister2); + + { right operand is in ECX } + emit_reg_reg(op,S_L,r2,location.register); + rg.ungetregisterint(exprasmlist,hregister2); + end; + end; + end; +{$else} procedure ti386shlshrnode.pass_2; var hregister2,hregister3, @@ -692,6 +852,7 @@ implementation end; end; end; +{$endif} {***************************************************************************** @@ -1008,7 +1169,11 @@ begin end. { $Log$ - Revision 1.46 2003-03-08 13:59:17 daniel + Revision 1.47 2003-03-08 20:36:41 daniel + + Added newra version of Ti386shlshrnode + + Added interference graph construction code + + Revision 1.46 2003/03/08 13:59:17 daniel * Work to handle new register notation in ag386nsm + Added newra version of Ti386moddivnode diff --git a/compiler/rgobj.pas b/compiler/rgobj.pas index 6fb70bda31..2e7b45722a 100644 --- a/compiler/rgobj.pas +++ b/compiler/rgobj.pas @@ -28,6 +28,47 @@ references and registers which are used by the code generator. } + +{******************************************************************************* + +(applies to new register allocator) + +Register allocator introduction. + +Free Pascal uses a Chaitin style register allocator similair to the one +described in the book "Modern compiler implementation in C" by Andrew W. Appel., +published by Cambridge University Press. + +Reading this book is recommended for a complete understanding. Here is a small +introduction. + +The code generator thinks it has an infinite amount of registers. Our processor +has a limited amount of registers. Therefore we must reduce the amount of +registers until there are less enough to fit into the processors registers. + +Registers can interfere or not interfere. If two imaginary registers interfere +they cannot be placed into the same psysical register. Reduction of registers +is done by: + +- "coalescing" Two registers that do not interfere are combined + into one register. +- "spilling" A register is changed into a memory location and the generated + code is modified to use the memory location instead of the register. + +Register allocation is a graph colouring problem. Each register is a colour, and +if two registers interfere there is a connection between them in the graph. + +In addition to the imaginary registers in the code generator, the psysical +CPU registers are also present in this graph. This allows us to make +interferences between imaginary registers and cpu registers. This is very +usefull for describing archtectural constrains, like for example that +the div instruction modifies edx, so variables that are in use at that time +cannot be stored into edx. This can be modelled by making edx interfere +with those variables. + +*******************************************************************************} + + unit rgobj; interface @@ -59,6 +100,14 @@ unit rgobj; tpushedsaved = array[firstreg..lastreg] of tpushedsavedloc; Tpushedsavedint = array[first_supreg..last_supreg] of Tpushedsavedloc; + Tinterferencebitmap=array[Tsuperregister] of set of Tsuperregister; + Tinterferenceadjlist=array[Tsuperregister] of Pstring; + Tinterferencegraph=record + bitmap:Tinterferencebitmap; + adjlist:Tinterferenceadjlist; + end; + Pinterferencegraph=^Tinterferencegraph; + {# This class implements the abstract register allocator It is used by the code generator to allocate and free @@ -237,6 +286,9 @@ unit rgobj; procedure saveUnusedState(var state: pointer);virtual; procedure restoreUnusedState(var state: pointer);virtual; protected +{$ifdef newra} + igraph:Tinterferencegraph; +{$endif} { the following two contain the common (generic) code for all } { get- and ungetregisterxxx functions/procedures } function getregistergen(list: taasmoutput; const lowreg, highreg: Toldregister; @@ -259,6 +311,10 @@ unit rgobj; {$ifdef TEMPREGDEBUG} procedure testregisters; {$endif TEMPREGDEBUGx} +{$ifdef newra} + procedure add_edge(u,v:Tsuperregister); + procedure add_edges_used(u:Tsuperregister); +{$endif} end; const @@ -353,6 +409,9 @@ unit rgobj; fillchar(reg_user,sizeof(reg_user),0); fillchar(reg_releaser,sizeof(reg_releaser),0); {$endif TEMPREGDEBUG} +{$ifdef newra} + fillchar(igraph,sizeof(igraph),0); +{$endif} end; @@ -408,6 +467,9 @@ unit rgobj; list.concat(Tai_regalloc.alloc(r)); result:=r; lastintreg:=i; +{$ifdef newra} + add_edges_used(i); +{$endif} exit; end; until i=lastintreg; @@ -655,27 +717,37 @@ unit rgobj; end; - procedure trgobj.cleartempgen; + procedure Trgobj.cleartempgen; - begin - countunusedregsint:=countusableregsint; - countunusedregsfpu:=countusableregsfpu; - countunusedregsmm:=countusableregsmm; - {$ifdef newra} - unusedregsint:=[0..255]; - {$else} - unusedregsint:=usableregsint; - {$endif} - unusedregsfpu:=usableregsfpu; - unusedregsmm:=usableregsmm; - end; + var i:Tsuperregister; + + begin + countunusedregsint:=countusableregsint; + countunusedregsfpu:=countusableregsfpu; + countunusedregsmm:=countusableregsmm; + {$ifdef newra} + unusedregsint:=[0..255]; + {$else} + unusedregsint:=usableregsint; + {$endif} + unusedregsfpu:=usableregsfpu; + unusedregsmm:=usableregsmm; + {$ifdef newra} + for i:=low(Tsuperregister) to high(Tsuperregister) do + if igraph.adjlist[i]<>nil then + dispose(igraph.adjlist[i]); + fillchar(igraph,sizeof(igraph),0); + {$endif} + end; procedure trgobj.ungetreference(list : taasmoutput; const ref : treference); begin - ungetregisterint(list,ref.base); - ungetregisterint(list,ref.index); + if ref.base.number<>NR_NO then + ungetregisterint(list,ref.base); + if ref.index.number<>NR_NO then + ungetregisterint(list,ref.index); end; @@ -1098,6 +1170,48 @@ unit rgobj; state := nil; end; +{$ifdef newra} + procedure Trgobj.add_edge(u,v:Tsuperregister); + + {This procedure will add an edge to the virtual interference graph.} + + procedure addadj(u,v:Tsuperregister); + + begin + if igraph.adjlist[u]=nil then + begin + getmem(igraph.adjlist[u],16); + igraph.adjlist[u]^:=''; + end + else if (length(igraph.adjlist[u]^) and 15)=15 then + reallocmem(igraph.adjlist[u],length(igraph.adjlist[u]^)+16); + igraph.adjlist[u]^:=igraph.adjlist[u]^+char(v); + end; + + begin + if (u<>v) and not(v in igraph.bitmap[u]) then + begin + include(igraph.bitmap[u],v); + include(igraph.bitmap[v],u); + {Precoloured nodes are not stored in the interference graph.} + if not(u in [first_supreg..last_supreg]) then + addadj(u,v); + if not(v in [first_supreg..last_supreg]) then + addadj(v,u); + end; + end; + + procedure Trgobj.add_edges_used(u:Tsuperregister); + + var i:Tsuperregister; + + begin + for i:=1 to 255 do + if not(i in unusedregsint) then + add_edge(u,i); + end; +{$endif} + {**************************************************************************** TReference @@ -1228,7 +1342,11 @@ end. { $Log$ - Revision 1.28 2003-03-08 13:59:16 daniel + Revision 1.29 2003-03-08 20:36:41 daniel + + Added newra version of Ti386shlshrnode + + Added interference graph construction code + + Revision 1.28 2003/03/08 13:59:16 daniel * Work to handle new register notation in ag386nsm + Added newra version of Ti386moddivnode