{
    $Id$
    Copyright (c) 1998-2002 by Florian Klaempfl

    This unit implements the i386 specific class for the register
    allocator

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

 ****************************************************************************
}

unit rgcpu;

{$i fpcdefs.inc}

  interface

    uses
      cpubase,
      cpuinfo,
      aasmbase,aasmtai,aasmcpu,
      cclasses,globtype,cgbase,cginfo,rgobj;

    type
       trgcpu = class(trgobj)

          { to keep the same allocation order as with the old routines }
          function getregisterint(list:Taasmoutput;size:Tcgsize):Tregister;override;
{$ifndef newra}
          procedure ungetregisterint(list:Taasmoutput;r:Tregister); override;
          function getexplicitregisterint(list:Taasmoutput;r:Tnewregister):Tregister;override;
{$endif newra}

          function getregisterfpu(list: taasmoutput) : tregister; override;
          procedure ungetregisterfpu(list: taasmoutput; r : tregister); override;

          procedure ungetreference(list: taasmoutput; const ref : treference); override;

          {# Returns a subset register of the register r with the specified size.
             WARNING: There is no clearing of the upper parts of the register,
             if a 8-bit / 16-bit register is converted to a 32-bit register.
             It is up to the code generator to correctly zero fill the register
          }
          function makeregsize(reg: tregister; size: tcgsize): tregister; override;

          procedure resetusableregisters;override;

         { corrects the fpu stack register by ofs }
         function correct_fpuregister(r : tregister;ofs : byte) : tregister;

         fpuvaroffset : byte;
       end;


  implementation

    uses
       systems,
       globals,verbose,
       tgobj;

{************************************************************************}
{                         routine helpers                                }
{************************************************************************}

  const
    reg2reg64 : array[firstreg..lastreg] of toldregister = (R_NO,
      R_RAX,R_RCX,R_RDX,R_RBX,R_RSP,R_RBP,R_RSI,R_RDI,
      R_R8,R_R9,R_R10,R_R11,R_R12,R_R13,R_R14,R_R15,R_RIP,
      R_RAX,R_RCX,R_RDX,R_RBX,R_RSP,R_RBP,R_RSI,R_RDI,
      R_R8,R_R9,R_R10,R_R11,R_R12,R_R13,R_R14,R_R15,
      R_RAX,R_RCX,R_RDX,R_RBX,R_RSP,R_RBP,R_RSI,R_RDI,
      R_R8,R_R9,R_R10,R_R11,R_R12,R_R13,R_R14,R_R15,
      R_RAX,R_RCX,R_RDX,R_RBX,R_RSP,R_RBP,R_RSI,R_RDI,
      R_R8,R_R9,R_R10,R_R11,R_R12,R_R13,R_R14,R_R15,
      R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO
    );

    reg2reg32 : array[firstreg..lastreg] of toldregister = (R_NO,
      R_EAX,R_ECX,R_EDX,R_EBX,R_ESP,R_EBP,R_ESI,R_EDI,
      R_R8D,R_R9D,R_R10D,R_R11D,R_R12D,R_R13D,R_R14D,R_R15D,R_NO,
      R_EAX,R_ECX,R_EDX,R_EBX,R_ESP,R_EBP,R_ESI,R_EDI,
      R_R8D,R_R9D,R_R10D,R_R11D,R_R12D,R_R13D,R_R14D,R_R15D,
      R_EAX,R_ECX,R_EDX,R_EBX,R_ESP,R_EBP,R_ESI,R_EDI,
      R_R8D,R_R9D,R_R10D,R_R11D,R_R12D,R_R13D,R_R14D,R_R15D,
      R_EAX,R_ECX,R_EDX,R_EBX,R_ESP,R_EBP,R_ESI,R_EDI,
      R_R8D,R_R9D,R_R10D,R_R11D,R_R12D,R_R13D,R_R14D,R_R15D,
      R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO
    );

    reg2reg16 : array[firstreg..lastreg] of toldregister = (R_NO,
      R_AX,R_CX,R_DX,R_BX,R_SP,R_BP,R_SI,R_DI,
      R_R8W,R_R9W,R_R10W,R_R11W,R_R12W,R_R13W,R_R14W,R_R15W,R_NO,
      R_AX,R_CX,R_DX,R_BX,R_SP,R_BP,R_SI,R_DI,
      R_R8W,R_R9W,R_R10W,R_R11W,R_R12W,R_R13W,R_R14W,R_R15W,
      R_AX,R_CX,R_DX,R_BX,R_SP,R_BP,R_SI,R_DI,
      R_R8W,R_R9W,R_R10W,R_R11W,R_R12W,R_R13W,R_R14W,R_R15W,
      R_AX,R_CX,R_DX,R_BX,R_SP,R_BP,R_SI,R_DI,
      R_R8W,R_R9W,R_R10W,R_R11W,R_R12W,R_R13W,R_R14W,R_R15W,
      R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO
    );

    reg2reg8 : array[firstreg..lastreg] of toldregister = (R_NO,
      R_AL,R_CL,R_DL,R_BL,R_SPL,R_BPL,R_SIL,R_DIL,
      R_R8B,R_R9B,R_R10B,R_R11B,R_R12B,R_R13B,R_R14B,R_R15B,R_NO,
      R_AL,R_CL,R_DL,R_BL,R_SPL,R_BPL,R_SIL,R_DIL,
      R_R8B,R_R9B,R_R10B,R_R11B,R_R12B,R_R13B,R_R14B,R_R15B,
      R_AL,R_CL,R_DL,R_BL,R_SPL,R_BPL,R_SIL,R_DIL,
      R_R8B,R_R9B,R_R10B,R_R11B,R_R12B,R_R13B,R_R14B,R_R15B,
      R_AL,R_CL,R_DL,R_BL,R_SPL,R_BPL,R_SIL,R_DIL,
      R_R8B,R_R9B,R_R10B,R_R11B,R_R12B,R_R13B,R_R14B,R_R15B,
      R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,
      R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO,R_NO
    );

    { convert a register to a specfied register size }
    function changeregsize(r:tregister;size:topsize):tregister;
      var
        reg : tregister;
      begin
        case size of
          S_B :
            reg.enum:=reg2reg8[r.enum];
          S_W :
            reg.enum:=reg2reg16[r.enum];
          S_L :
            reg.enum:=reg2reg32[r.enum];
          S_Q :
            reg.enum:=reg2reg64[r.enum];
          else
            internalerror(200204101);
        end;
        if reg.enum=R_NO then
         internalerror(200204102);
        changeregsize:=reg;
      end;


{************************************************************************}
{                               trgcpu                                   }
{************************************************************************}

    function trgcpu.getregisterint(list: taasmoutput;size:Tcgsize): tregister;
    var subreg:Tsubregister;

    begin
      subreg:=cgsize2subreg(size);

      if countunusedregsint=0 then
        internalerror(10);
      result.enum:=R_INTREGISTER;
{$ifdef TEMPREGDEBUG}
      if curptree^.usableregsint-countunusedregsint>curptree^.registers32 then
        internalerror(10);
{$endif TEMPREGDEBUG}
{$ifdef EXTTEMPREGDEBUG}
      if curptree^.usableregs-countunusedregistersint>curptree^^.reallyusedregs then
        curptree^.reallyusedregs:=curptree^^.usableregs-countunusedregistersint;
{$endif EXTTEMPREGDEBUG}
      if RS_RAX in unusedregsint then
        begin
          dec(countunusedregsint);
          exclude(unusedregsint,RS_RAX);
          include(used_in_proc_int,RS_RAX);
          result.number:=RS_RAX shl 8 or subreg;
{$ifdef TEMPREGDEBUG}
          reg_user[R_RAX]:=curptree^;
{$endif TEMPREGDEBUG}
          exprasmlist.concat(tai_regalloc.alloc(result));
        end
      else if RS_RDX in unusedregsint then
        begin
          dec(countunusedregsint);
          exclude(unusedregsint,RS_RDX);
          include(used_in_proc_int,RS_RDX);
          result.number:=RS_RDX shl 8 or subreg;
{$ifdef TEMPREGDEBUG}
          reg_user[R_RDX]:=curptree^;
{$endif TEMPREGDEBUG}
          exprasmlist.concat(tai_regalloc.alloc(result));
        end
      else if RS_RBX in unusedregsint then
        begin
          dec(countunusedregsint);
          exclude(unusedregsint,RS_RBX);
          include(used_in_proc_int,RS_RBX);
          result.number:=RS_RBX shl 8 or subreg;
{$ifdef TEMPREGDEBUG}
          reg_user[R_RBX]:=curptree^;
{$endif TEMPREGDEBUG}
          exprasmlist.concat(tai_regalloc.alloc(result));
        end
      else if RS_RCX in unusedregsint then
        begin
          dec(countunusedregsint);
          exclude(unusedregsint,RS_RCX);
          include(used_in_proc_int,RS_RCX);
          result.number:=RS_RCX shl 8 or subreg;
{$ifdef TEMPREGDEBUG}
          reg_user[R_RCX]:=curptree^;
{$endif TEMPREGDEBUG}
          exprasmlist.concat(tai_regalloc.alloc(result));
        end
      else
        internalerror(10);
{$ifdef TEMPREGDEBUG}
      testregisters;
{$endif TEMPREGDEBUG}
    end;


    procedure trgcpu.ungetregisterint(list: taasmoutput; r : tregister);
      var supreg:Tsuperregister;
      begin
         if r.enum=R_NO then
          exit;
         if r.enum<>R_INTREGISTER then
            internalerror(200301234);
         supreg:=r.number shr 8;
         if (supreg in [RS_RDI]) then
           begin
             list.concat(tai_regalloc.DeAlloc(r));
             exit;
           end;
         if not(supreg in [RS_RAX,RS_RBX,RS_RCX,RS_RDX,RS_RSI]) then
           exit;
         inherited ungetregisterint(list,r);
      end;


   function trgcpu.getexplicitregisterint(list: taasmoutput; r : tnewregister) : tregister;

   var r2:Tregister;

    begin
      if (r shr 8) in [RS_RDI] then
        begin
          r2.enum:=R_INTREGISTER;
          r2.number:=r;
          list.concat(Tai_regalloc.alloc(r2));
          getexplicitregisterint:=r2;
          exit;
        end;
      result:=inherited getexplicitregisterint(list,r);
    end;


    function trgcpu.getregisterfpu(list: taasmoutput) : tregister;

      begin
        { note: don't return R_ST0, see comments above implementation of }
        { a_loadfpu_* methods in cgcpu (JM)                              }
        result.enum := R_ST;
      end;


    procedure trgcpu.ungetregisterfpu(list : taasmoutput; r : tregister);

      begin
        { nothing to do, fpu stack management is handled by the load/ }
        { store operations in cgcpu (JM)                              }
      end;


    procedure trgcpu.ungetreference(list: taasmoutput; const ref : treference);

      begin
         ungetregisterint(list,ref.base);
         ungetregisterint(list,ref.index);
      end;

   procedure trgcpu.resetusableregisters;

     begin
       inherited resetusableregisters;
       fpuvaroffset := 0;
     end;


   function trgcpu.correct_fpuregister(r : tregister;ofs : byte) : tregister;

     begin
        correct_fpuregister.enum:=toldregister(longint(r.enum)+ofs);
     end;


    function trgcpu.makeregsize(reg: tregister; size: tcgsize): tregister;

      var
        _result : topsize;
      begin
        case size of
          OS_32,OS_S32:
            begin
              _result := S_L;
            end;
          OS_8,OS_S8:
            begin
              _result := S_B;
            end;
          OS_16,OS_S16:
            begin
              _result := S_W;
            end;
          else
            internalerror(2001092312);
        end;
        makeregsize := changeregsize(reg,_result);
      end;


initialization
  rg := trgcpu.create(15);
end.

{
  $Log$
  Revision 1.5  2003-06-13 21:19:33  peter
    * current_procdef removed, use current_procinfo.procdef instead

  Revision 1.4  2002/04/25 20:15:40  florian
    * block nodes within expressions shouldn't release the used registers,
      fixed using a flag till the new rg is ready

  Revision 1.3  2003/01/05 13:36:54  florian
    * x86-64 compiles
    + very basic support for float128 type (x86-64 only)

  Revision 1.2  2002/07/25 22:55:34  florian
    * several fixes, small test units can be compiled

  Revision 1.1  2002/07/24 22:38:15  florian
    + initial release of x86-64 target code

  Revision 1.8  2002/07/01 18:46:34  peter
    * internal linker
    * reorganized aasm layer

  Revision 1.7  2002/05/16 19:46:52  carl
  + defines.inc -> fpcdefs.inc to avoid conflicts if compiling by hand
  + try to fix temp allocation (still in ifdef)
  + generic constructor calls
  + start of tassembler / tmodulebase class cleanup

  Revision 1.6  2002/05/12 16:53:18  peter
    * moved entry and exitcode to ncgutil and cgobj
    * foreach gets extra argument for passing local data to the
      iterator function
    * -CR checks also class typecasts at runtime by changing them
      into as
    * fixed compiler to cycle with the -CR option
    * fixed stabs with elf writer, finally the global variables can
      be watched
    * removed a lot of routines from cga unit and replaced them by
      calls to cgobj
    * u32bit-s32bit updates for and,or,xor nodes. When one element is
      u32bit then the other is typecasted also to u32bit without giving
      a rangecheck warning/error.
    * fixed pascal calling method with reversing also the high tree in
      the parast, detected by tcalcst3 test

  Revision 1.5  2002/04/21 15:43:32  carl
  * changeregsize -> rg.makeregsize
  * changeregsize moved from cpubase to here

  Revision 1.4  2002/04/15 19:44:22  peter
    * fixed stackcheck that would be called recursively when a stack
      error was found
    * generic changeregsize(reg,size) for i386 register resizing
    * removed some more routines from cga unit
    * fixed returnvalue handling
    * fixed default stacksize of linux and go32v2, 8kb was a bit small :-)

  Revision 1.3  2002/04/04 19:06:13  peter
    * removed unused units
    * use tlocation.size in cg.a_*loc*() routines

  Revision 1.2  2002/04/02 17:11:39  peter
    * tlocation,treference update
    * LOC_CONSTANT added for better constant handling
    * secondadd splitted in multiple routines
    * location_force_reg added for loading a location to a register
      of a specified size
    * secondassignment parses now first the right and then the left node
      (this is compatible with Kylix). This saves a lot of push/pop especially
      with string operations
    * adapted some routines to use the new cg methods

  Revision 1.1  2002/03/31 20:26:40  jonas
    + a_loadfpu_* and a_loadmm_* methods in tcg
    * register allocation is now handled by a class and is mostly processor
      independent (+rgobj.pas and i386/rgcpu.pas)
    * temp allocation is now handled by a class (+tgobj.pas, -i386\tgcpu.pas)
    * some small improvements and fixes to the optimizer
    * some register allocation fixes
    * some fpuvaroffset fixes in the unary minus node
    * push/popusedregisters is now called rg.save/restoreusedregisters and
      (for i386) uses temps instead of push/pop's when using -Op3 (that code is
      also better optimizable)
    * fixed and optimized register saving/restoring for new/dispose nodes
    * LOC_FPU locations now also require their "register" field to be set to
      R_ST, not R_ST0 (the latter is used for LOC_CFPUREGISTER locations only)
    - list field removed of the tnode class because it's not used currently
      and can cause hard-to-find bugs

}