{ $Id$ Copyright (c) 1998-2002 by Florian Klaempfl Generate i386 assembler for math nodes This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. **************************************************************************** } unit n386mat; {$i fpcdefs.inc} interface uses node,nmat,ncgmat,nx86mat; type ti386moddivnode = class(tmoddivnode) procedure pass_2;override; end; ti386shlshrnode = class(tshlshrnode) procedure pass_2;override; { everything will be handled in pass_2 } function first_shlshr64bitint: tnode; override; end; ti386unaryminusnode = class(tx86unaryminusnode) end; ti386notnode = class(tx86notnode) end; implementation uses globtype,systems, cutils,verbose,globals, symconst,symdef,aasmbase,aasmtai,defutil, cgbase,pass_1,pass_2, ncon, cpubase,cpuinfo, cga,ncgutil,cgobj; {***************************************************************************** TI386MODDIVNODE *****************************************************************************} procedure ti386moddivnode.pass_2; var hreg1,hreg2:Tregister; power:longint; hl:Tasmlabel; op:Tasmop; begin secondpass(left); if codegenerror then exit; secondpass(right); if codegenerror then exit; if is_64bitint(resulttype.def) then { should be handled in pass_1 (JM) } internalerror(200109052); { put numerator in register } location_reset(location,LOC_REGISTER,OS_INT); location_force_reg(exprasmlist,left.location,OS_INT,false); hreg1:=left.location.register; if (nodetype=divn) and (right.nodetype=ordconstn) and ispowerof2(tordconstnode(right).value,power) then begin { for signed numbers, the numerator must be adjusted before the shift instruction, but not wih unsigned numbers! Otherwise, "Cardinal($ffffffff) div 16" overflows! (JM) } if is_signed(left.resulttype.def) Then begin if (aktOptProcessor <> class386) and not(cs_littlesize in aktglobalswitches) then { use a sequence without jumps, saw this in comp.compilers (JM) } begin { no jumps, but more operations } hreg2:=cg.getintregister(exprasmlist,OS_INT); emit_reg_reg(A_MOV,S_L,hreg1,hreg2); {If the left value is signed, hreg2=$ffffffff, otherwise 0.} emit_const_reg(A_SAR,S_L,31,hreg2); {If signed, hreg2=right value-1, otherwise 0.} emit_const_reg(A_AND,S_L,tordconstnode(right).value-1,hreg2); { add to the left value } emit_reg_reg(A_ADD,S_L,hreg2,hreg1); { release EDX if we used it } cg.ungetregister(exprasmlist,hreg2); { do the shift } emit_const_reg(A_SAR,S_L,power,hreg1); end else begin { a jump, but less operations } emit_reg_reg(A_TEST,S_L,hreg1,hreg1); objectlibrary.getlabel(hl); cg.a_jmp_flags(exprasmlist,F_NS,hl); if power=1 then emit_reg(A_INC,S_L,hreg1) else emit_const_reg(A_ADD,S_L,tordconstnode(right).value-1,hreg1); cg.a_label(exprasmlist,hl); emit_const_reg(A_SAR,S_L,power,hreg1); end end else emit_const_reg(A_SHR,S_L,power,hreg1); location.register:=hreg1; end else begin {Bring denominator to a register.} cg.ungetregister(exprasmlist,hreg1); cg.getexplicitregister(exprasmlist,NR_EAX); emit_reg_reg(A_MOV,S_L,hreg1,NR_EAX); cg.getexplicitregister(exprasmlist,NR_EDX); {Sign extension depends on the left type.} if torddef(left.resulttype.def).typ=u32bit then emit_reg_reg(A_XOR,S_L,NR_EDX,NR_EDX) else emit_none(A_CDQ,S_NO); {Division depends on the right type.} if Torddef(right.resulttype.def).typ=u32bit then op:=A_DIV else op:=A_IDIV; if right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then emit_ref(op,S_L,right.location.reference) else if right.location.loc in [LOC_REGISTER,LOC_CREGISTER] then emit_reg(op,S_L,right.location.register) else begin hreg1:=cg.getintregister(exprasmlist,right.location.size); cg.a_load_loc_reg(exprasmlist,OS_32,right.location,hreg1); cg.ungetregister(exprasmlist,hreg1); emit_reg(op,S_L,hreg1); end; location_release(exprasmlist,right.location); {Copy the result into a new register. Release EAX & EDX.} if nodetype=divn then begin cg.ungetregister(exprasmlist,NR_EDX); cg.ungetregister(exprasmlist,NR_EAX); location.register:=cg.getintregister(exprasmlist,OS_INT); emit_reg_reg(A_MOV,S_L,NR_EAX,location.register); end else begin cg.ungetregister(exprasmlist,NR_EAX); cg.ungetregister(exprasmlist,NR_EDX); location.register:=cg.getintregister(exprasmlist,OS_INT); emit_reg_reg(A_MOV,S_L,NR_EDX,location.register); end; end; end; {***************************************************************************** TI386SHLRSHRNODE *****************************************************************************} function ti386shlshrnode.first_shlshr64bitint: tnode; begin result := nil; end; procedure ti386shlshrnode.pass_2; var hregisterhigh,hregisterlow:Tregister; op:Tasmop; l1,l2,l3:Tasmlabel; begin secondpass(left); secondpass(right); { determine operator } if nodetype=shln then op:=A_SHL else op:=A_SHR; if is_64bitint(left.resulttype.def) then begin location_reset(location,LOC_REGISTER,OS_64); { load left operator in a register } location_force_reg(exprasmlist,left.location,OS_64,false); hregisterhigh:=left.location.registerhigh; hregisterlow:=left.location.registerlow; { shifting by a constant directly coded: } if (right.nodetype=ordconstn) then begin { shrd/shl works only for values <=31 !! } if Tordconstnode(right).value>63 then begin cg.a_load_const_reg(exprasmlist,OS_32,0,hregisterhigh); cg.a_load_const_reg(exprasmlist,OS_32,0,hregisterlow); location.registerlow:=hregisterlow; location.registerhigh:=hregisterhigh; end else if Tordconstnode(right).value>31 then begin if nodetype=shln then begin emit_reg_reg(A_XOR,S_L,hregisterhigh,hregisterhigh); if ((tordconstnode(right).value and 31) <> 0) then emit_const_reg(A_SHL,S_L,tordconstnode(right).value and 31, hregisterlow); end else begin emit_reg_reg(A_XOR,S_L,hregisterlow,hregisterlow); if ((tordconstnode(right).value and 31) <> 0) then emit_const_reg(A_SHR,S_L,tordconstnode(right).value and 31, hregisterhigh); end; location.registerhigh:=hregisterlow; location.registerlow:=hregisterhigh; end else begin if nodetype=shln then begin emit_const_reg_reg(A_SHLD,S_L,tordconstnode(right).value and 31, hregisterlow,hregisterhigh); emit_const_reg(A_SHL,S_L,tordconstnode(right).value and 31, hregisterlow); end else begin emit_const_reg_reg(A_SHRD,S_L,tordconstnode(right).value and 31, hregisterhigh,hregisterlow); emit_const_reg(A_SHR,S_L,tordconstnode(right).value and 31, hregisterhigh); end; location.registerlow:=hregisterlow; location.registerhigh:=hregisterhigh; end; end else begin { load right operators in a register } cg.getexplicitregister(exprasmlist,NR_ECX); cg.a_load_loc_reg(exprasmlist,OS_32,right.location,NR_ECX); if right.location.loc<>LOC_CREGISTER then location_release(exprasmlist,right.location); { left operator is already in a register } { hence are both in a register } { is it in the case ECX ? } { the damned shift instructions work only til a count of 32 } { so we've to do some tricks here } objectlibrary.getlabel(l1); objectlibrary.getlabel(l2); objectlibrary.getlabel(l3); emit_const_reg(A_CMP,S_L,64,NR_ECX); cg.a_jmp_flags(exprasmlist,F_L,l1); emit_reg_reg(A_XOR,S_L,hregisterlow,hregisterlow); emit_reg_reg(A_XOR,S_L,hregisterhigh,hregisterhigh); cg.a_jmp_always(exprasmlist,l3); cg.a_label(exprasmlist,l1); emit_const_reg(A_CMP,S_L,32,NR_ECX); cg.a_jmp_flags(exprasmlist,F_L,l2); emit_const_reg(A_SUB,S_L,32,NR_ECX); if nodetype=shln then begin emit_reg_reg(A_SHL,S_L,NR_CL,hregisterlow); emit_reg_reg(A_MOV,S_L,hregisterlow,hregisterhigh); emit_reg_reg(A_XOR,S_L,hregisterlow,hregisterlow); cg.a_jmp_always(exprasmlist,l3); cg.a_label(exprasmlist,l2); emit_reg_reg_reg(A_SHLD,S_L,NR_CL,hregisterlow,hregisterhigh); emit_reg_reg(A_SHL,S_L,NR_CL,hregisterlow); end else begin emit_reg_reg(A_SHR,S_L,NR_CL,hregisterhigh); emit_reg_reg(A_MOV,S_L,hregisterhigh,hregisterlow); emit_reg_reg(A_XOR,S_L,hregisterhigh,hregisterhigh); cg.a_jmp_always(exprasmlist,l3); cg.a_label(exprasmlist,l2); emit_reg_reg_reg(A_SHRD,S_L,NR_CL,hregisterhigh,hregisterlow); emit_reg_reg(A_SHR,S_L,NR_CL,hregisterhigh); end; cg.a_label(exprasmlist,l3); cg.ungetregister(exprasmlist,NR_ECX); location.registerlow:=hregisterlow; location.registerhigh:=hregisterhigh; end; end else begin { load left operators in a register } location_copy(location,left.location); location_force_reg(exprasmlist,location,OS_INT,false); { shifting by a constant directly coded: } if (right.nodetype=ordconstn) then { l shl 32 should 0 imho, but neither TP nor Delphi do it in this way (FK)} emit_const_reg(op,S_L,tordconstnode(right).value and 31,location.register) else begin { load right operators in a ECX } if right.location.loc<>LOC_CREGISTER then location_release(exprasmlist,right.location); cg.getexplicitregister(exprasmlist,NR_ECX); cg.a_load_loc_reg(exprasmlist,OS_32,right.location,NR_ECX); { right operand is in ECX } cg.ungetregister(exprasmlist,NR_ECX); emit_reg_reg(op,S_L,NR_CL,location.register); end; end; end; begin cunaryminusnode:=ti386unaryminusnode; cmoddivnode:=ti386moddivnode; cshlshrnode:=ti386shlshrnode; cnotnode:=ti386notnode; end. { $Log$ Revision 1.69 2004-01-20 12:59:37 florian * common addnode code for x86-64 and i386 Revision 1.68 2003/12/26 13:19:16 florian * rtl and compiler compile with -Cfsse2 Revision 1.67 2003/12/25 01:07:09 florian + $fputype directive support + single data type operations with sse unit * fixed more x86-64 stuff Revision 1.66 2003/12/10 17:28:41 peter * int64 shl/shr > 63 returns 0 Revision 1.65 2003/10/10 17:48:14 peter * old trgobj moved to x86/rgcpu and renamed to trgx86fpu * tregisteralloctor renamed to trgobj * removed rgobj from a lot of units * moved location_* and reference_* to cgobj * first things for mmx register allocation Revision 1.64 2003/10/09 21:31:37 daniel * Register allocator splitted, ans abstract now Revision 1.63 2003/10/01 20:34:49 peter * procinfo unit contains tprocinfo * cginfo renamed to cgbase * moved cgmessage to verbose * fixed ppc and sparc compiles Revision 1.62 2003/09/29 20:58:56 peter * optimized releasing of registers Revision 1.61 2003/09/28 21:48:20 peter * fix register leaks Revision 1.60 2003/09/03 15:55:01 peter * NEWRA branch merged Revision 1.59.2.2 2003/08/31 13:50:16 daniel * Remove sorting and use pregenerated indexes * Some work on making things compile Revision 1.59.2.1 2003/08/29 17:29:00 peter * next batch of updates Revision 1.59 2003/07/02 22:18:04 peter * paraloc splitted in callerparaloc,calleeparaloc * sparc calling convention updates Revision 1.58 2003/06/13 21:19:31 peter * current_procdef removed, use current_procinfo.procdef instead Revision 1.57 2003/06/03 21:11:09 peter * cg.a_load_* get a from and to size specifier * makeregsize only accepts newregister * i386 uses generic tcgnotnode,tcgunaryminus Revision 1.56 2003/06/03 13:01:59 daniel * Register allocator finished Revision 1.55 2003/05/31 15:04:31 peter * load_loc_reg update Revision 1.54 2003/05/22 21:32:29 peter * removed some unit dependencies Revision 1.53 2003/04/22 23:50:23 peter * firstpass uses expectloc * checks if there are differences between the expectloc and location.loc from secondpass in EXTDEBUG Revision 1.52 2003/04/22 14:33:38 peter * removed some notes/hints Revision 1.51 2003/04/22 10:09:35 daniel + Implemented the actual register allocator + Scratch registers unavailable when new register allocator used + maybe_save/maybe_restore unavailable when new register allocator used Revision 1.50 2003/04/21 19:15:26 peter * when ecx is not available allocated another register Revision 1.49 2003/04/17 10:02:48 daniel * Tweaked register allocate/deallocate positition to less interferences are generated. Revision 1.48 2003/03/28 19:16:57 peter * generic constructor working for i386 * remove fixed self register * esi added as address register for i386 Revision 1.47 2003/03/08 20:36:41 daniel + Added newra version of Ti386shlshrnode + Added interference graph construction code Revision 1.46 2003/03/08 13:59:17 daniel * Work to handle new register notation in ag386nsm + Added newra version of Ti386moddivnode Revision 1.45 2003/02/19 22:00:15 daniel * Code generator converted to new register notation - Horribily outdated todo.txt removed Revision 1.44 2003/01/13 18:37:44 daniel * Work on register conversion Revision 1.43 2003/01/13 14:54:34 daniel * Further work to convert codegenerator register convention; internalerror bug fixed. Revision 1.42 2003/01/08 18:43:57 daniel * Tregister changed into a record Revision 1.41 2002/11/25 17:43:26 peter * splitted defbase in defutil,symutil,defcmp * merged isconvertable and is_equal into compare_defs(_ext) * made operator search faster by walking the list only once Revision 1.40 2002/09/07 15:25:10 peter * old logs removed and tabs fixed Revision 1.39 2002/08/15 15:15:55 carl * jmpbuf size allocation for exceptions is now cpu specific (as it should) * more generic nodes for maths * several fixes for better m68k support Revision 1.38 2002/08/14 19:18:16 carl * bugfix of unaryminus node with left LOC_CREGISTER Revision 1.37 2002/08/12 15:08:42 carl + stab register indexes for powerpc (moved from gdb to cpubase) + tprocessor enumeration moved to cpuinfo + linker in target_info is now a class * many many updates for m68k (will soon start to compile) - removed some ifdef or correct them for correct cpu Revision 1.36 2002/08/11 14:32:30 peter * renamed current_library to objectlibrary Revision 1.35 2002/08/11 13:24:17 peter * saving of asmsymbols in ppu supported * asmsymbollist global is removed and moved into a new class tasmlibrarydata that will hold the info of a .a file which corresponds with a single module. Added librarydata to tmodule to keep the library info stored for the module. In the future the objectfiles will also be stored to the tasmlibrarydata class * all getlabel/newasmsymbol and friends are moved to the new class Revision 1.34 2002/08/02 07:44:31 jonas * made assigned() handling generic * add nodes now can also evaluate constant expressions at compile time that contain nil nodes Revision 1.33 2002/07/20 11:58:02 florian * types.pas renamed to defbase.pas because D6 contains a types unit so this would conflicts if D6 programms are compiled + Willamette/SSE2 instructions to assembler added Revision 1.32 2002/07/01 18:46:33 peter * internal linker * reorganized aasm layer Revision 1.31 2002/05/18 13:34:25 peter * readded missing revisions Revision 1.30 2002/05/16 19:46:51 carl + defines.inc -> fpcdefs.inc to avoid conflicts if compiling by hand + try to fix temp allocation (still in ifdef) + generic constructor calls + start of tassembler / tmodulebase class cleanup Revision 1.28 2002/05/13 19:54:38 peter * removed n386ld and n386util units * maybe_save/maybe_restore added instead of the old maybe_push Revision 1.27 2002/05/12 16:53:17 peter * moved entry and exitcode to ncgutil and cgobj * foreach gets extra argument for passing local data to the iterator function * -CR checks also class typecasts at runtime by changing them into as * fixed compiler to cycle with the -CR option * fixed stabs with elf writer, finally the global variables can be watched * removed a lot of routines from cga unit and replaced them by calls to cgobj * u32bit-s32bit updates for and,or,xor nodes. When one element is u32bit then the other is typecasted also to u32bit without giving a rangecheck warning/error. * fixed pascal calling method with reversing also the high tree in the parast, detected by tcalcst3 test Revision 1.26 2002/04/04 19:06:12 peter * removed unused units * use tlocation.size in cg.a_*loc*() routines Revision 1.25 2002/04/02 17:11:36 peter * tlocation,treference update * LOC_CONSTANT added for better constant handling * secondadd splitted in multiple routines * location_force_reg added for loading a location to a register of a specified size * secondassignment parses now first the right and then the left node (this is compatible with Kylix). This saves a lot of push/pop especially with string operations * adapted some routines to use the new cg methods Revision 1.24 2002/03/31 20:26:39 jonas + a_loadfpu_* and a_loadmm_* methods in tcg * register allocation is now handled by a class and is mostly processor independent (+rgobj.pas and i386/rgcpu.pas) * temp allocation is now handled by a class (+tgobj.pas, -i386\tgcpu.pas) * some small improvements and fixes to the optimizer * some register allocation fixes * some fpuvaroffset fixes in the unary minus node * push/popusedregisters is now called rg.save/restoreusedregisters and (for i386) uses temps instead of push/pop's when using -Op3 (that code is also better optimizable) * fixed and optimized register saving/restoring for new/dispose nodes * LOC_FPU locations now also require their "register" field to be set to R_ST, not R_ST0 (the latter is used for LOC_CFPUREGISTER locations only) - list field removed of the tnode class because it's not used currently and can cause hard-to-find bugs Revision 1.23 2002/03/04 19:10:14 peter * removed compiler warnings }