fpc/compiler/i386/n386mat.pas
2003-12-26 13:19:16 +00:00

684 lines
25 KiB
ObjectPascal

{
$Id$
Copyright (c) 1998-2002 by Florian Klaempfl
Generate i386 assembler for math nodes
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
****************************************************************************
}
unit n386mat;
{$i fpcdefs.inc}
interface
uses
node,nmat,ncgmat,nx86mat;
type
ti386moddivnode = class(tmoddivnode)
procedure pass_2;override;
end;
ti386shlshrnode = class(tshlshrnode)
procedure pass_2;override;
{ everything will be handled in pass_2 }
function first_shlshr64bitint: tnode; override;
end;
ti386unaryminusnode = class(tx86unaryminusnode)
end;
ti386notnode = class(tcgnotnode)
procedure second_boolean;override;
{$ifdef SUPPORT_MMX}
procedure second_mmx;override;
{$endif SUPPORT_MMX}
end;
implementation
uses
globtype,systems,
cutils,verbose,globals,
symconst,symdef,aasmbase,aasmtai,defutil,
cgbase,pass_1,pass_2,
ncon,
cpubase,cpuinfo,
cga,ncgutil,cgobj;
{*****************************************************************************
TI386MODDIVNODE
*****************************************************************************}
procedure ti386moddivnode.pass_2;
var hreg1,hreg2:Tregister;
power:longint;
hl:Tasmlabel;
op:Tasmop;
begin
secondpass(left);
if codegenerror then
exit;
secondpass(right);
if codegenerror then
exit;
if is_64bitint(resulttype.def) then
{ should be handled in pass_1 (JM) }
internalerror(200109052);
{ put numerator in register }
location_reset(location,LOC_REGISTER,OS_INT);
location_force_reg(exprasmlist,left.location,OS_INT,false);
hreg1:=left.location.register;
if (nodetype=divn) and (right.nodetype=ordconstn) and
ispowerof2(tordconstnode(right).value,power) then
begin
{ for signed numbers, the numerator must be adjusted before the
shift instruction, but not wih unsigned numbers! Otherwise,
"Cardinal($ffffffff) div 16" overflows! (JM) }
if is_signed(left.resulttype.def) Then
begin
if (aktOptProcessor <> class386) and
not(cs_littlesize in aktglobalswitches) then
{ use a sequence without jumps, saw this in
comp.compilers (JM) }
begin
{ no jumps, but more operations }
hreg2:=cg.getintregister(exprasmlist,OS_INT);
emit_reg_reg(A_MOV,S_L,hreg1,hreg2);
{If the left value is signed, hreg2=$ffffffff, otherwise 0.}
emit_const_reg(A_SAR,S_L,31,hreg2);
{If signed, hreg2=right value-1, otherwise 0.}
emit_const_reg(A_AND,S_L,tordconstnode(right).value-1,hreg2);
{ add to the left value }
emit_reg_reg(A_ADD,S_L,hreg2,hreg1);
{ release EDX if we used it }
cg.ungetregister(exprasmlist,hreg2);
{ do the shift }
emit_const_reg(A_SAR,S_L,power,hreg1);
end
else
begin
{ a jump, but less operations }
emit_reg_reg(A_TEST,S_L,hreg1,hreg1);
objectlibrary.getlabel(hl);
cg.a_jmp_flags(exprasmlist,F_NS,hl);
if power=1 then
emit_reg(A_INC,S_L,hreg1)
else
emit_const_reg(A_ADD,S_L,tordconstnode(right).value-1,hreg1);
cg.a_label(exprasmlist,hl);
emit_const_reg(A_SAR,S_L,power,hreg1);
end
end
else
emit_const_reg(A_SHR,S_L,power,hreg1);
location.register:=hreg1;
end
else
begin
{Bring denominator to a register.}
cg.ungetregister(exprasmlist,hreg1);
cg.getexplicitregister(exprasmlist,NR_EAX);
emit_reg_reg(A_MOV,S_L,hreg1,NR_EAX);
cg.getexplicitregister(exprasmlist,NR_EDX);
{Sign extension depends on the left type.}
if torddef(left.resulttype.def).typ=u32bit then
emit_reg_reg(A_XOR,S_L,NR_EDX,NR_EDX)
else
emit_none(A_CDQ,S_NO);
{Division depends on the right type.}
if Torddef(right.resulttype.def).typ=u32bit then
op:=A_DIV
else
op:=A_IDIV;
if right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
emit_ref(op,S_L,right.location.reference)
else if right.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
emit_reg(op,S_L,right.location.register)
else
begin
hreg1:=cg.getintregister(exprasmlist,right.location.size);
cg.a_load_loc_reg(exprasmlist,OS_32,right.location,hreg1);
cg.ungetregister(exprasmlist,hreg1);
emit_reg(op,S_L,hreg1);
end;
location_release(exprasmlist,right.location);
{Copy the result into a new register. Release EAX & EDX.}
if nodetype=divn then
begin
cg.ungetregister(exprasmlist,NR_EDX);
cg.ungetregister(exprasmlist,NR_EAX);
location.register:=cg.getintregister(exprasmlist,OS_INT);
emit_reg_reg(A_MOV,S_L,NR_EAX,location.register);
end
else
begin
cg.ungetregister(exprasmlist,NR_EAX);
cg.ungetregister(exprasmlist,NR_EDX);
location.register:=cg.getintregister(exprasmlist,OS_INT);
emit_reg_reg(A_MOV,S_L,NR_EDX,location.register);
end;
end;
end;
{*****************************************************************************
TI386SHLRSHRNODE
*****************************************************************************}
function ti386shlshrnode.first_shlshr64bitint: tnode;
begin
result := nil;
end;
procedure ti386shlshrnode.pass_2;
var hregisterhigh,hregisterlow:Tregister;
op:Tasmop;
l1,l2,l3:Tasmlabel;
begin
secondpass(left);
secondpass(right);
{ determine operator }
if nodetype=shln then
op:=A_SHL
else
op:=A_SHR;
if is_64bitint(left.resulttype.def) then
begin
location_reset(location,LOC_REGISTER,OS_64);
{ load left operator in a register }
location_force_reg(exprasmlist,left.location,OS_64,false);
hregisterhigh:=left.location.registerhigh;
hregisterlow:=left.location.registerlow;
{ shifting by a constant directly coded: }
if (right.nodetype=ordconstn) then
begin
{ shrd/shl works only for values <=31 !! }
if Tordconstnode(right).value>63 then
begin
cg.a_load_const_reg(exprasmlist,OS_32,0,hregisterhigh);
cg.a_load_const_reg(exprasmlist,OS_32,0,hregisterlow);
location.registerlow:=hregisterlow;
location.registerhigh:=hregisterhigh;
end
else if Tordconstnode(right).value>31 then
begin
if nodetype=shln then
begin
emit_reg_reg(A_XOR,S_L,hregisterhigh,hregisterhigh);
if ((tordconstnode(right).value and 31) <> 0) then
emit_const_reg(A_SHL,S_L,tordconstnode(right).value and 31,
hregisterlow);
end
else
begin
emit_reg_reg(A_XOR,S_L,hregisterlow,hregisterlow);
if ((tordconstnode(right).value and 31) <> 0) then
emit_const_reg(A_SHR,S_L,tordconstnode(right).value and 31,
hregisterhigh);
end;
location.registerhigh:=hregisterlow;
location.registerlow:=hregisterhigh;
end
else
begin
if nodetype=shln then
begin
emit_const_reg_reg(A_SHLD,S_L,tordconstnode(right).value and 31,
hregisterlow,hregisterhigh);
emit_const_reg(A_SHL,S_L,tordconstnode(right).value and 31,
hregisterlow);
end
else
begin
emit_const_reg_reg(A_SHRD,S_L,tordconstnode(right).value and 31,
hregisterhigh,hregisterlow);
emit_const_reg(A_SHR,S_L,tordconstnode(right).value and 31,
hregisterhigh);
end;
location.registerlow:=hregisterlow;
location.registerhigh:=hregisterhigh;
end;
end
else
begin
{ load right operators in a register }
cg.getexplicitregister(exprasmlist,NR_ECX);
cg.a_load_loc_reg(exprasmlist,OS_32,right.location,NR_ECX);
if right.location.loc<>LOC_CREGISTER then
location_release(exprasmlist,right.location);
{ left operator is already in a register }
{ hence are both in a register }
{ is it in the case ECX ? }
{ the damned shift instructions work only til a count of 32 }
{ so we've to do some tricks here }
objectlibrary.getlabel(l1);
objectlibrary.getlabel(l2);
objectlibrary.getlabel(l3);
emit_const_reg(A_CMP,S_L,64,NR_ECX);
cg.a_jmp_flags(exprasmlist,F_L,l1);
emit_reg_reg(A_XOR,S_L,hregisterlow,hregisterlow);
emit_reg_reg(A_XOR,S_L,hregisterhigh,hregisterhigh);
cg.a_jmp_always(exprasmlist,l3);
cg.a_label(exprasmlist,l1);
emit_const_reg(A_CMP,S_L,32,NR_ECX);
cg.a_jmp_flags(exprasmlist,F_L,l2);
emit_const_reg(A_SUB,S_L,32,NR_ECX);
if nodetype=shln then
begin
emit_reg_reg(A_SHL,S_L,NR_CL,hregisterlow);
emit_reg_reg(A_MOV,S_L,hregisterlow,hregisterhigh);
emit_reg_reg(A_XOR,S_L,hregisterlow,hregisterlow);
cg.a_jmp_always(exprasmlist,l3);
cg.a_label(exprasmlist,l2);
emit_reg_reg_reg(A_SHLD,S_L,NR_CL,hregisterlow,hregisterhigh);
emit_reg_reg(A_SHL,S_L,NR_CL,hregisterlow);
end
else
begin
emit_reg_reg(A_SHR,S_L,NR_CL,hregisterhigh);
emit_reg_reg(A_MOV,S_L,hregisterhigh,hregisterlow);
emit_reg_reg(A_XOR,S_L,hregisterhigh,hregisterhigh);
cg.a_jmp_always(exprasmlist,l3);
cg.a_label(exprasmlist,l2);
emit_reg_reg_reg(A_SHRD,S_L,NR_CL,hregisterhigh,hregisterlow);
emit_reg_reg(A_SHR,S_L,NR_CL,hregisterhigh);
end;
cg.a_label(exprasmlist,l3);
cg.ungetregister(exprasmlist,NR_ECX);
location.registerlow:=hregisterlow;
location.registerhigh:=hregisterhigh;
end;
end
else
begin
{ load left operators in a register }
location_copy(location,left.location);
location_force_reg(exprasmlist,location,OS_INT,false);
{ shifting by a constant directly coded: }
if (right.nodetype=ordconstn) then
{ l shl 32 should 0 imho, but neither TP nor Delphi do it in this way (FK)}
emit_const_reg(op,S_L,tordconstnode(right).value and 31,location.register)
else
begin
{ load right operators in a ECX }
if right.location.loc<>LOC_CREGISTER then
location_release(exprasmlist,right.location);
cg.getexplicitregister(exprasmlist,NR_ECX);
cg.a_load_loc_reg(exprasmlist,OS_32,right.location,NR_ECX);
{ right operand is in ECX }
cg.ungetregister(exprasmlist,NR_ECX);
emit_reg_reg(op,S_L,NR_CL,location.register);
end;
end;
end;
{*****************************************************************************
TI386NOTNODE
*****************************************************************************}
procedure ti386notnode.second_boolean;
var
hl : tasmlabel;
opsize : topsize;
begin
opsize:=def_opsize(resulttype.def);
if left.expectloc=LOC_JUMP then
begin
location_reset(location,LOC_JUMP,OS_NO);
hl:=truelabel;
truelabel:=falselabel;
falselabel:=hl;
secondpass(left);
maketojumpbool(exprasmlist,left,lr_load_regvars);
hl:=truelabel;
truelabel:=falselabel;
falselabel:=hl;
end
else
begin
{ the second pass could change the location of left }
{ if it is a register variable, so we've to do }
{ this before the case statement }
secondpass(left);
case left.expectloc of
LOC_FLAGS :
begin
location_release(exprasmlist,left.location);
location_reset(location,LOC_FLAGS,OS_NO);
location.resflags:=left.location.resflags;
inverse_flags(location.resflags);
end;
LOC_CONSTANT,
LOC_REGISTER,
LOC_CREGISTER,
LOC_REFERENCE,
LOC_CREFERENCE :
begin
location_force_reg(exprasmlist,left.location,def_cgsize(resulttype.def),true);
location_release(exprasmlist,left.location);
emit_reg_reg(A_TEST,opsize,left.location.register,left.location.register);
location_reset(location,LOC_FLAGS,OS_NO);
location.resflags:=F_E;
end;
else
internalerror(200203224);
end;
end;
end;
{$ifdef SUPPORT_MMX}
procedure ti386notnode.second_mmx;
var hreg,r:Tregister;
begin
secondpass(left);
location_reset(location,LOC_MMXREGISTER,OS_NO);
r:=cg.getintregister(exprasmlist,OS_INT);
emit_const_reg(A_MOV,S_L,longint($ffffffff),r);
{ load operand }
case left.location.loc of
LOC_MMXREGISTER:
location_copy(location,left.location);
LOC_CMMXREGISTER:
begin
location.register:=cg.getmmxregister(exprasmlist,OS_M64);
emit_reg_reg(A_MOVQ,S_NO,left.location.register,location.register);
end;
LOC_REFERENCE,
LOC_CREFERENCE:
begin
location_release(exprasmlist,left.location);
location.register:=cg.getmmxregister(exprasmlist,OS_M64);
emit_ref_reg(A_MOVQ,S_NO,left.location.reference,location.register);
end;
end;
{ load mask }
hreg:=cg.getmmxregister(exprasmlist,OS_M64);
emit_reg_reg(A_MOVD,S_NO,r,hreg);
cg.ungetregister(exprasmlist,r);
{ lower 32 bit }
emit_reg_reg(A_PXOR,S_D,hreg,location.register);
{ shift mask }
emit_const_reg(A_PSLLQ,S_NO,32,hreg);
{ higher 32 bit }
cg.ungetregister(exprasmlist,hreg);
emit_reg_reg(A_PXOR,S_D,hreg,location.register);
end;
{$endif SUPPORT_MMX}
begin
cunaryminusnode:=ti386unaryminusnode;
cmoddivnode:=ti386moddivnode;
cshlshrnode:=ti386shlshrnode;
cnotnode:=ti386notnode;
end.
{
$Log$
Revision 1.68 2003-12-26 13:19:16 florian
* rtl and compiler compile with -Cfsse2
Revision 1.67 2003/12/25 01:07:09 florian
+ $fputype directive support
+ single data type operations with sse unit
* fixed more x86-64 stuff
Revision 1.66 2003/12/10 17:28:41 peter
* int64 shl/shr > 63 returns 0
Revision 1.65 2003/10/10 17:48:14 peter
* old trgobj moved to x86/rgcpu and renamed to trgx86fpu
* tregisteralloctor renamed to trgobj
* removed rgobj from a lot of units
* moved location_* and reference_* to cgobj
* first things for mmx register allocation
Revision 1.64 2003/10/09 21:31:37 daniel
* Register allocator splitted, ans abstract now
Revision 1.63 2003/10/01 20:34:49 peter
* procinfo unit contains tprocinfo
* cginfo renamed to cgbase
* moved cgmessage to verbose
* fixed ppc and sparc compiles
Revision 1.62 2003/09/29 20:58:56 peter
* optimized releasing of registers
Revision 1.61 2003/09/28 21:48:20 peter
* fix register leaks
Revision 1.60 2003/09/03 15:55:01 peter
* NEWRA branch merged
Revision 1.59.2.2 2003/08/31 13:50:16 daniel
* Remove sorting and use pregenerated indexes
* Some work on making things compile
Revision 1.59.2.1 2003/08/29 17:29:00 peter
* next batch of updates
Revision 1.59 2003/07/02 22:18:04 peter
* paraloc splitted in callerparaloc,calleeparaloc
* sparc calling convention updates
Revision 1.58 2003/06/13 21:19:31 peter
* current_procdef removed, use current_procinfo.procdef instead
Revision 1.57 2003/06/03 21:11:09 peter
* cg.a_load_* get a from and to size specifier
* makeregsize only accepts newregister
* i386 uses generic tcgnotnode,tcgunaryminus
Revision 1.56 2003/06/03 13:01:59 daniel
* Register allocator finished
Revision 1.55 2003/05/31 15:04:31 peter
* load_loc_reg update
Revision 1.54 2003/05/22 21:32:29 peter
* removed some unit dependencies
Revision 1.53 2003/04/22 23:50:23 peter
* firstpass uses expectloc
* checks if there are differences between the expectloc and
location.loc from secondpass in EXTDEBUG
Revision 1.52 2003/04/22 14:33:38 peter
* removed some notes/hints
Revision 1.51 2003/04/22 10:09:35 daniel
+ Implemented the actual register allocator
+ Scratch registers unavailable when new register allocator used
+ maybe_save/maybe_restore unavailable when new register allocator used
Revision 1.50 2003/04/21 19:15:26 peter
* when ecx is not available allocated another register
Revision 1.49 2003/04/17 10:02:48 daniel
* Tweaked register allocate/deallocate positition to less interferences
are generated.
Revision 1.48 2003/03/28 19:16:57 peter
* generic constructor working for i386
* remove fixed self register
* esi added as address register for i386
Revision 1.47 2003/03/08 20:36:41 daniel
+ Added newra version of Ti386shlshrnode
+ Added interference graph construction code
Revision 1.46 2003/03/08 13:59:17 daniel
* Work to handle new register notation in ag386nsm
+ Added newra version of Ti386moddivnode
Revision 1.45 2003/02/19 22:00:15 daniel
* Code generator converted to new register notation
- Horribily outdated todo.txt removed
Revision 1.44 2003/01/13 18:37:44 daniel
* Work on register conversion
Revision 1.43 2003/01/13 14:54:34 daniel
* Further work to convert codegenerator register convention;
internalerror bug fixed.
Revision 1.42 2003/01/08 18:43:57 daniel
* Tregister changed into a record
Revision 1.41 2002/11/25 17:43:26 peter
* splitted defbase in defutil,symutil,defcmp
* merged isconvertable and is_equal into compare_defs(_ext)
* made operator search faster by walking the list only once
Revision 1.40 2002/09/07 15:25:10 peter
* old logs removed and tabs fixed
Revision 1.39 2002/08/15 15:15:55 carl
* jmpbuf size allocation for exceptions is now cpu specific (as it should)
* more generic nodes for maths
* several fixes for better m68k support
Revision 1.38 2002/08/14 19:18:16 carl
* bugfix of unaryminus node with left LOC_CREGISTER
Revision 1.37 2002/08/12 15:08:42 carl
+ stab register indexes for powerpc (moved from gdb to cpubase)
+ tprocessor enumeration moved to cpuinfo
+ linker in target_info is now a class
* many many updates for m68k (will soon start to compile)
- removed some ifdef or correct them for correct cpu
Revision 1.36 2002/08/11 14:32:30 peter
* renamed current_library to objectlibrary
Revision 1.35 2002/08/11 13:24:17 peter
* saving of asmsymbols in ppu supported
* asmsymbollist global is removed and moved into a new class
tasmlibrarydata that will hold the info of a .a file which
corresponds with a single module. Added librarydata to tmodule
to keep the library info stored for the module. In the future the
objectfiles will also be stored to the tasmlibrarydata class
* all getlabel/newasmsymbol and friends are moved to the new class
Revision 1.34 2002/08/02 07:44:31 jonas
* made assigned() handling generic
* add nodes now can also evaluate constant expressions at compile time
that contain nil nodes
Revision 1.33 2002/07/20 11:58:02 florian
* types.pas renamed to defbase.pas because D6 contains a types
unit so this would conflicts if D6 programms are compiled
+ Willamette/SSE2 instructions to assembler added
Revision 1.32 2002/07/01 18:46:33 peter
* internal linker
* reorganized aasm layer
Revision 1.31 2002/05/18 13:34:25 peter
* readded missing revisions
Revision 1.30 2002/05/16 19:46:51 carl
+ defines.inc -> fpcdefs.inc to avoid conflicts if compiling by hand
+ try to fix temp allocation (still in ifdef)
+ generic constructor calls
+ start of tassembler / tmodulebase class cleanup
Revision 1.28 2002/05/13 19:54:38 peter
* removed n386ld and n386util units
* maybe_save/maybe_restore added instead of the old maybe_push
Revision 1.27 2002/05/12 16:53:17 peter
* moved entry and exitcode to ncgutil and cgobj
* foreach gets extra argument for passing local data to the
iterator function
* -CR checks also class typecasts at runtime by changing them
into as
* fixed compiler to cycle with the -CR option
* fixed stabs with elf writer, finally the global variables can
be watched
* removed a lot of routines from cga unit and replaced them by
calls to cgobj
* u32bit-s32bit updates for and,or,xor nodes. When one element is
u32bit then the other is typecasted also to u32bit without giving
a rangecheck warning/error.
* fixed pascal calling method with reversing also the high tree in
the parast, detected by tcalcst3 test
Revision 1.26 2002/04/04 19:06:12 peter
* removed unused units
* use tlocation.size in cg.a_*loc*() routines
Revision 1.25 2002/04/02 17:11:36 peter
* tlocation,treference update
* LOC_CONSTANT added for better constant handling
* secondadd splitted in multiple routines
* location_force_reg added for loading a location to a register
of a specified size
* secondassignment parses now first the right and then the left node
(this is compatible with Kylix). This saves a lot of push/pop especially
with string operations
* adapted some routines to use the new cg methods
Revision 1.24 2002/03/31 20:26:39 jonas
+ a_loadfpu_* and a_loadmm_* methods in tcg
* register allocation is now handled by a class and is mostly processor
independent (+rgobj.pas and i386/rgcpu.pas)
* temp allocation is now handled by a class (+tgobj.pas, -i386\tgcpu.pas)
* some small improvements and fixes to the optimizer
* some register allocation fixes
* some fpuvaroffset fixes in the unary minus node
* push/popusedregisters is now called rg.save/restoreusedregisters and
(for i386) uses temps instead of push/pop's when using -Op3 (that code is
also better optimizable)
* fixed and optimized register saving/restoring for new/dispose nodes
* LOC_FPU locations now also require their "register" field to be set to
R_ST, not R_ST0 (the latter is used for LOC_CFPUREGISTER locations only)
- list field removed of the tnode class because it's not used currently
and can cause hard-to-find bugs
Revision 1.23 2002/03/04 19:10:14 peter
* removed compiler warnings
}