From 19cbff220e98658763b25e8e92986707662535e7 Mon Sep 17 00:00:00 2001 From: nickysn Date: Tue, 12 Mar 2013 14:30:26 +0000 Subject: [PATCH] + added unit n8086mat, based on n386mat; TODO: adapt to i8086 git-svn-id: branches/i8086@23805 - --- .gitattributes | 1 + compiler/i8086/cpunode.pas | 4 +- compiler/i8086/n8086mat.pas | 474 ++++++++++++++++++++++++++++++++++++ 3 files changed, 477 insertions(+), 2 deletions(-) create mode 100644 compiler/i8086/n8086mat.pas diff --git a/.gitattributes b/.gitattributes index e30391c908..9875f8c858 100644 --- a/.gitattributes +++ b/.gitattributes @@ -253,6 +253,7 @@ compiler/i8086/i386op.inc svneol=native#text/plain compiler/i8086/i386prop.inc svneol=native#text/plain compiler/i8086/i386tab.inc svneol=native#text/plain compiler/i8086/n8086add.pas svneol=native#text/plain +compiler/i8086/n8086mat.pas svneol=native#text/plain compiler/i8086/r386ari.inc svneol=native#text/plain compiler/i8086/r386att.inc svneol=native#text/plain compiler/i8086/r386con.inc svneol=native#text/plain diff --git a/compiler/i8086/cpunode.pas b/compiler/i8086/cpunode.pas index f3f081b6df..977f0c004f 100644 --- a/compiler/i8086/cpunode.pas +++ b/compiler/i8086/cpunode.pas @@ -53,8 +53,8 @@ unit cpunode; n386cal, n386mem, n386set, - n386inl, - n386mat} + n386inl}, + n8086mat ; end. diff --git a/compiler/i8086/n8086mat.pas b/compiler/i8086/n8086mat.pas new file mode 100644 index 0000000000..8d73ff4897 --- /dev/null +++ b/compiler/i8086/n8086mat.pas @@ -0,0 +1,474 @@ +{ + Copyright (c) 1998-2002 by Florian Klaempfl + + Generate i8086 assembler for math nodes + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + **************************************************************************** +} +unit n8086mat; + +{$i fpcdefs.inc} + +interface + + uses + node,nmat,ncgmat,nx86mat; + + type + ti8086moddivnode = class(tmoddivnode) + procedure pass_generate_code;override; + end; + + ti8086shlshrnode = class(tcgshlshrnode) + procedure second_64bit;override; + function first_shlshr64bitint: tnode; override; + end; + + ti8086unaryminusnode = class(tx86unaryminusnode) + end; + + ti8086notnode = class(tx86notnode) + end; + + +implementation + + uses + globtype,systems,constexp, + cutils,verbose,globals, + symconst,symdef,aasmbase,aasmtai,aasmdata,defutil, + cgbase,pass_2, + ncon, + cpubase,cpuinfo, + cga,ncgutil,cgobj,cgutils, + hlcgobj; + +{***************************************************************************** + ti8086moddivnode +*****************************************************************************} + + function log2(i : dword) : dword; + begin + result:=0; + i:=i shr 1; + while i<>0 do + begin + i:=i shr 1; + inc(result); + end; + end; + + + procedure ti8086moddivnode.pass_generate_code; + var + hreg1,hreg2:Tregister; + power:longint; + hl:Tasmlabel; + op:Tasmop; + e : longint; + d,l,r,s,m,a,n,t : dword; + m_low,m_high,j,k : qword; + begin + secondpass(left); + if codegenerror then + exit; + secondpass(right); + if codegenerror then + exit; + + if is_64bitint(resultdef) then + { should be handled in pass_1 (JM) } + internalerror(200109052); + { put numerator in register } + location_reset(location,LOC_REGISTER,def_cgsize(resultdef)); + hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false); + hreg1:=left.location.register; + + if (nodetype=divn) and (right.nodetype=ordconstn) then + begin + if ispowerof2(tordconstnode(right).value.svalue,power) then + begin + { for signed numbers, the numerator must be adjusted before the + shift instruction, but not wih unsigned numbers! Otherwise, + "Cardinal($ffffffff) div 16" overflows! (JM) } + if is_signed(left.resultdef) Then + begin + if (current_settings.optimizecputype <> cpu_386) and + not(cs_opt_size in current_settings.optimizerswitches) then + { use a sequence without jumps, saw this in + comp.compilers (JM) } + begin + { no jumps, but more operations } + hreg2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT); + emit_reg_reg(A_MOV,S_L,hreg1,hreg2); + {If the left value is signed, hreg2=$ffffffff, otherwise 0.} + emit_const_reg(A_SAR,S_L,31,hreg2); + {If signed, hreg2=right value-1, otherwise 0.} + emit_const_reg(A_AND,S_L,tordconstnode(right).value.svalue-1,hreg2); + { add to the left value } + emit_reg_reg(A_ADD,S_L,hreg2,hreg1); + { do the shift } + emit_const_reg(A_SAR,S_L,power,hreg1); + end + else + begin + { a jump, but less operations } + emit_reg_reg(A_TEST,S_L,hreg1,hreg1); + current_asmdata.getjumplabel(hl); + cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NS,hl); + if power=1 then + emit_reg(A_INC,S_L,hreg1) + else + emit_const_reg(A_ADD,S_L,tordconstnode(right).value.svalue-1,hreg1); + cg.a_label(current_asmdata.CurrAsmList,hl); + emit_const_reg(A_SAR,S_L,power,hreg1); + end + end + else + emit_const_reg(A_SHR,S_L,power,hreg1); + location.register:=hreg1; + end + else + begin + if is_signed(left.resultdef) then + begin + e:=tordconstnode(right).value.svalue; + d:=abs(e); + { Determine algorithm (a), multiplier (m), and shift factor (s) for 32-bit + signed integer division. Based on: Granlund, T.; Montgomery, P.L.: + "Division by Invariant Integers using Multiplication". SIGPLAN Notices, + Vol. 29, June 1994, page 61. + } + + l:=log2(d); + j:=qword($80000000) mod qword(d); + k:=(qword(1) shl (32+l)) div (qword($80000000-j)); + m_low:=((qword(1)) shl (32+l)) div d; + m_high:=(((qword(1)) shl (32+l)) + k) div d; + while ((m_low shr 1) < (m_high shr 1)) and (l > 0) do + begin + m_low:=m_low shr 1; + m_high:=m_high shr 1; + dec(l); + end; + m:=dword(m_high); + s:=l; + if (m_high shr 31)<>0 then + a:=1 + else + a:=0; + cg.getcpuregister(current_asmdata.CurrAsmList,NR_EAX); + emit_const_reg(A_MOV,S_L,aint(m),NR_EAX); + cg.getcpuregister(current_asmdata.CurrAsmList,NR_EDX); + emit_reg(A_IMUL,S_L,hreg1); + emit_reg_reg(A_MOV,S_L,hreg1,NR_EAX); + if a<>0 then + begin + emit_reg_reg(A_ADD,S_L,NR_EAX,NR_EDX); + { + printf ("; dividend: memory location or register other than EAX or EDX\n"); + printf ("\n"); + printf ("MOV EAX, 0%08LXh\n", m); + printf ("IMUL dividend\n"); + printf ("MOV EAX, dividend\n"); + printf ("ADD EDX, EAX\n"); + if (s) printf ("SAR EDX, %d\n", s); + printf ("SHR EAX, 31\n"); + printf ("ADD EDX, EAX\n"); + if (e < 0) printf ("NEG EDX\n"); + printf ("\n"); + printf ("; quotient now in EDX\n"); + } + end; + { + printf ("; dividend: memory location of register other than EAX or EDX\n"); + printf ("\n"); + printf ("MOV EAX, 0%08LXh\n", m); + printf ("IMUL dividend\n"); + printf ("MOV EAX, dividend\n"); + if (s) printf ("SAR EDX, %d\n", s); + printf ("SHR EAX, 31\n"); + printf ("ADD EDX, EAX\n"); + if (e < 0) printf ("NEG EDX\n"); + printf ("\n"); + printf ("; quotient now in EDX\n"); + } + if s<>0 then + emit_const_reg(A_SAR,S_L,s,NR_EDX); + emit_const_reg(A_SHR,S_L,31,NR_EAX); + emit_reg_reg(A_ADD,S_L,NR_EAX,NR_EDX); + if e<0 then + emit_reg(A_NEG,S_L,NR_EDX); + cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EDX); + cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX); + location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT); + cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_EDX,location.register) + end + else + begin + d:=tordconstnode(right).value.svalue; + if d>=$80000000 then + begin + emit_const_reg(A_CMP,S_L,aint(d),hreg1); + location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT); + emit_const_reg(A_MOV,S_L,0,location.register); + emit_const_reg(A_SBB,S_L,-1,location.register); + end + else + begin + { Reduce divisor until it becomes odd } + n:=0; + t:=d; + while (t and 1)=0 do + begin + t:=t shr 1; + inc(n); + end; + { Generate m, s for algorithm 0. Based on: Granlund, T.; Montgomery, + P.L.: "Division by Invariant Integers using Multiplication". + SIGPLAN Notices, Vol. 29, June 1994, page 61. + } + l:=log2(t)+1; + j:=qword($ffffffff) mod qword(t); + k:=(qword(1) shl (32+l)) div (qword($ffffffff-j)); + m_low:=((qword(1)) shl (32+l)) div t; + m_high:=(((qword(1)) shl (32+l)) + k) div t; + while ((m_low shr 1) < (m_high shr 1)) and (l>0) do + begin + m_low:=m_low shr 1; + m_high:=m_high shr 1; + l:=l-1; + end; + if (m_high shr 32)=0 then + begin + m:=dword(m_high); + s:=l; + a:=0; + end + + { Generate m, s for algorithm 1. Based on: Magenheimer, D.J.; et al: + "Integer Multiplication and Division on the HP Precision Architecture". + IEEE Transactions on Computers, Vol 37, No. 8, August 1988, page 980. + } + else + begin + s:=log2(t); + m_low:=(qword(1) shl (32+s)) div qword(t); + r:=dword(((qword(1)) shl (32+s)) mod qword(t)); + if (r < ((t>>1)+1)) then + m:=dword(m_low) + else + m:=dword(m_low)+1; + a:=1; + end; + { Reduce multiplier for either algorithm to smallest possible } + while (m and 1)=0 do + begin + m:=m shr 1; + dec(s); + end; + { Adjust multiplier for reduction of even divisors } + inc(s,n); + cg.getcpuregister(current_asmdata.CurrAsmList,NR_EAX); + emit_const_reg(A_MOV,S_L,aint(m),NR_EAX); + cg.getcpuregister(current_asmdata.CurrAsmList,NR_EDX); + emit_reg(A_MUL,S_L,hreg1); + if a<>0 then + begin + { + printf ("; dividend: register other than EAX or memory location\n"); + printf ("\n"); + printf ("MOV EAX, 0%08lXh\n", m); + printf ("MUL dividend\n"); + printf ("ADD EAX, 0%08lXh\n", m); + printf ("ADC EDX, 0\n"); + if (s) printf ("SHR EDX, %d\n", s); + printf ("\n"); + printf ("; quotient now in EDX\n"); + } + emit_const_reg(A_ADD,S_L,aint(m),NR_EAX); + emit_const_reg(A_ADC,S_L,0,NR_EDX); + end; + if s<>0 then + emit_const_reg(A_SHR,S_L,aint(s),NR_EDX); + cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EDX); + cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX); + location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT); + cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_EDX,location.register) + end; + end + end + end + else + begin + cg.getcpuregister(current_asmdata.CurrAsmList,NR_EAX); + emit_reg_reg(A_MOV,S_L,hreg1,NR_EAX); + cg.getcpuregister(current_asmdata.CurrAsmList,NR_EDX); + {Sign extension depends on the left type.} + if torddef(left.resultdef).ordtype=u32bit then + emit_reg_reg(A_XOR,S_L,NR_EDX,NR_EDX) + else + emit_none(A_CDQ,S_NO); + + {Division depends on the right type.} + if Torddef(right.resultdef).ordtype=u32bit then + op:=A_DIV + else + op:=A_IDIV; + + if right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then + emit_ref(op,S_L,right.location.reference) + else if right.location.loc in [LOC_REGISTER,LOC_CREGISTER] then + emit_reg(op,S_L,right.location.register) + else + begin + hreg1:=cg.getintregister(current_asmdata.CurrAsmList,right.location.size); + hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,u32inttype,right.location,hreg1); + emit_reg(op,S_L,hreg1); + end; + + {Copy the result into a new register. Release EAX & EDX.} + cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EDX); + cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX); + location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT); + if nodetype=divn then + cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_EAX,location.register) + else + cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_EDX,location.register); + end; + end; + + +{***************************************************************************** + TI386SHLRSHRNODE +*****************************************************************************} + + + function ti8086shlshrnode.first_shlshr64bitint: tnode; + begin + result := nil; + end; + + procedure ti8086shlshrnode.second_64bit; + var + hreg64hi,hreg64lo:Tregister; + v : TConstExprInt; + l1,l2,l3:Tasmlabel; + begin + location_reset(location,LOC_REGISTER,def_cgsize(resultdef)); + + { load left operator in a register } + hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false); + hreg64hi:=left.location.register64.reghi; + hreg64lo:=left.location.register64.reglo; + + { shifting by a constant directly coded: } + if (right.nodetype=ordconstn) then + begin + v:=Tordconstnode(right).value and 63; + if v>31 then + begin + if nodetype=shln then + begin + emit_reg_reg(A_XOR,S_L,hreg64hi,hreg64hi); + if ((v and 31) <> 0) then + emit_const_reg(A_SHL,S_L,v.svalue and 31,hreg64lo); + end + else + begin + emit_reg_reg(A_XOR,S_L,hreg64lo,hreg64lo); + if ((v and 31) <> 0) then + emit_const_reg(A_SHR,S_L,v.svalue and 31,hreg64hi); + end; + location.register64.reghi:=hreg64lo; + location.register64.reglo:=hreg64hi; + end + else + begin + if nodetype=shln then + begin + emit_const_reg_reg(A_SHLD,S_L,v.svalue and 31,hreg64lo,hreg64hi); + emit_const_reg(A_SHL,S_L,v.svalue and 31,hreg64lo); + end + else + begin + emit_const_reg_reg(A_SHRD,S_L,v.svalue and 31,hreg64hi,hreg64lo); + emit_const_reg(A_SHR,S_L,v.svalue and 31,hreg64hi); + end; + location.register64.reglo:=hreg64lo; + location.register64.reghi:=hreg64hi; + end; + end + else + begin + { load right operators in a register } + cg.getcpuregister(current_asmdata.CurrAsmList,NR_ECX); + hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,u32inttype,right.location,NR_ECX); + + { left operator is already in a register } + { hence are both in a register } + { is it in the case ECX ? } + + { the damned shift instructions work only til a count of 32 } + { so we've to do some tricks here } + current_asmdata.getjumplabel(l1); + current_asmdata.getjumplabel(l2); + current_asmdata.getjumplabel(l3); + emit_const_reg(A_CMP,S_L,64,NR_ECX); + cg.a_jmp_flags(current_asmdata.CurrAsmList,F_L,l1); + emit_reg_reg(A_XOR,S_L,hreg64lo,hreg64lo); + emit_reg_reg(A_XOR,S_L,hreg64hi,hreg64hi); + cg.a_jmp_always(current_asmdata.CurrAsmList,l3); + cg.a_label(current_asmdata.CurrAsmList,l1); + emit_const_reg(A_CMP,S_L,32,NR_ECX); + cg.a_jmp_flags(current_asmdata.CurrAsmList,F_L,l2); + emit_const_reg(A_SUB,S_L,32,NR_ECX); + if nodetype=shln then + begin + emit_reg_reg(A_SHL,S_L,NR_CL,hreg64lo); + emit_reg_reg(A_MOV,S_L,hreg64lo,hreg64hi); + emit_reg_reg(A_XOR,S_L,hreg64lo,hreg64lo); + cg.a_jmp_always(current_asmdata.CurrAsmList,l3); + cg.a_label(current_asmdata.CurrAsmList,l2); + emit_reg_reg_reg(A_SHLD,S_L,NR_CL,hreg64lo,hreg64hi); + emit_reg_reg(A_SHL,S_L,NR_CL,hreg64lo); + end + else + begin + emit_reg_reg(A_SHR,S_L,NR_CL,hreg64hi); + emit_reg_reg(A_MOV,S_L,hreg64hi,hreg64lo); + emit_reg_reg(A_XOR,S_L,hreg64hi,hreg64hi); + cg.a_jmp_always(current_asmdata.CurrAsmList,l3); + cg.a_label(current_asmdata.CurrAsmList,l2); + emit_reg_reg_reg(A_SHRD,S_L,NR_CL,hreg64hi,hreg64lo); + emit_reg_reg(A_SHR,S_L,NR_CL,hreg64hi); + end; + cg.a_label(current_asmdata.CurrAsmList,l3); + + cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_ECX); + location.register64.reglo:=hreg64lo; + location.register64.reghi:=hreg64hi; + end; + end; + + +begin + cunaryminusnode:=ti8086unaryminusnode; + cmoddivnode:=ti8086moddivnode; + cshlshrnode:=ti8086shlshrnode; + cnotnode:=ti8086notnode; +end.