fpc/compiler/x86/nx86mat.pas
Jonas Maebe a25ebbba3e + added volatility information to all memory references
o separate information for reading and writing, because e.g. in a
     try-block, only the writes to local variables and parameters are
     volatile (they have to be committed immediately in case the next
     instruction causes an exception)
   o for now, only references to absolute memory addresses are marked
     as volatile
   o the volatily information is (should be) properly maintained throughout
     all code generators for all archictures with this patch
   o no optimizers or other compiler infrastructure uses the volatility
     information yet
   o this functionality is not (yet) exposed at the language level, it
     is only for internal code generator use right now

git-svn-id: trunk@34996 -
2016-11-27 18:17:37 +00:00

563 lines
23 KiB
ObjectPascal

{
Copyright (c) 1998-2002 by Florian Klaempfl
Generate x86 code for math nodes
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
****************************************************************************
}
unit nx86mat;
{$i fpcdefs.inc}
interface
uses
node,nmat,ncgmat;
type
tx86unaryminusnode = class(tcgunaryminusnode)
{$ifdef SUPPORT_MMX}
procedure second_mmx;override;
{$endif SUPPORT_MMX}
procedure second_float;override;
function pass_1:tnode;override;
end;
tx86notnode = class(tcgnotnode)
procedure second_boolean;override;
{$ifdef SUPPORT_MMX}
procedure second_mmx;override;
{$endif SUPPORT_MMX}
end;
tx86moddivnode = class(tcgmoddivnode)
procedure pass_generate_code;override;
end;
implementation
uses
globtype,
systems,constexp,
cutils,verbose,globals,
symconst,symdef,
aasmbase,aasmtai,aasmdata,defutil,
cgbase,pass_1,pass_2,
ncon,
cpubase,procinfo,
cga,ncgutil,cgobj,hlcgobj,cgx86,cgutils;
{*****************************************************************************
TI386UNARYMINUSNODE
*****************************************************************************}
function tx86unaryminusnode.pass_1 : tnode;
begin
result:=nil;
firstpass(left);
if codegenerror then
exit;
if (left.resultdef.typ=floatdef) then
begin
if use_vectorfpu(left.resultdef) then
expectloc:=LOC_MMREGISTER
else
expectloc:=LOC_FPUREGISTER;
end
{$ifdef SUPPORT_MMX}
else
if (cs_mmx in current_settings.localswitches) and
is_mmx_able_array(left.resultdef) then
begin
expectloc:=LOC_MMXREGISTER;
end
{$endif SUPPORT_MMX}
else
inherited pass_1;
end;
{$ifdef SUPPORT_MMX}
procedure tx86unaryminusnode.second_mmx;
var
op : tasmop;
hreg : tregister;
begin
op:=A_NONE;
secondpass(left);
location_reset(location,LOC_MMXREGISTER,OS_NO);
hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
emit_reg_reg(A_PXOR,S_NO,hreg,hreg);
case left.location.loc of
LOC_MMXREGISTER:
begin
location.register:=left.location.register;
end;
LOC_CMMXREGISTER:
begin
location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
emit_reg_reg(A_MOVQ,S_NO,left.location.register,location.register);
end;
LOC_REFERENCE,
LOC_CREFERENCE:
begin
location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
emit_ref_reg(A_MOVQ,S_NO,left.location.reference,location.register);
end;
else
internalerror(200203225);
end;
if cs_mmx_saturation in current_settings.localswitches then
case mmx_type(resultdef) of
mmxs8bit:
op:=A_PSUBSB;
mmxu8bit:
op:=A_PSUBUSB;
mmxs16bit,mmxfixed16:
op:=A_PSUBSW;
mmxu16bit:
op:=A_PSUBUSW;
end
else
case mmx_type(resultdef) of
mmxs8bit,mmxu8bit:
op:=A_PSUBB;
mmxs16bit,mmxu16bit,mmxfixed16:
op:=A_PSUBW;
mmxs32bit,mmxu32bit:
op:=A_PSUBD;
end;
if op = A_NONE then
internalerror(201408202);
emit_reg_reg(op,S_NO,location.register,hreg);
emit_reg_reg(A_MOVQ,S_NO,hreg,location.register);
end;
{$endif SUPPORT_MMX}
procedure tx86unaryminusnode.second_float;
var
reg : tregister;
href : treference;
l1 : tasmlabel;
begin
secondpass(left);
if expectloc=LOC_MMREGISTER then
begin
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
{ make life of register allocator easier }
location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
current_asmdata.getglobaldatalabel(l1);
new_section(current_asmdata.asmlists[al_typedconsts],sec_rodata_norel,l1.name,const_align(sizeof(pint)));
current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(l1));
case def_cgsize(resultdef) of
OS_F32:
current_asmdata.asmlists[al_typedconsts].concat(tai_const.create_32bit(longint(1 shl 31)));
OS_F64:
begin
current_asmdata.asmlists[al_typedconsts].concat(tai_const.create_32bit(0));
current_asmdata.asmlists[al_typedconsts].concat(tai_const.create_32bit(-(1 shl 31)));
end
else
internalerror(2004110215);
end;
reference_reset_symbol(href,l1,0,resultdef.alignment,[]);
if UseAVX then
cg.a_opmm_ref_reg_reg(current_asmdata.CurrAsmList,OP_XOR,left.location.size,href,left.location.register,location.register,nil)
else
begin
reg:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
cg.a_loadmm_ref_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),def_cgsize(resultdef),href,reg,mms_movescalar);
cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),def_cgsize(resultdef),left.location.register,location.register,mms_movescalar);
cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_XOR,left.location.size,reg,location.register,nil);
end;
end
else
begin
location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
case left.location.loc of
LOC_REFERENCE,
LOC_CREFERENCE:
begin
location.register:=NR_ST;
cg.a_loadfpu_ref_reg(current_asmdata.CurrAsmList,
left.location.size,location.size,
left.location.reference,location.register);
emit_none(A_FCHS,S_NO);
end;
LOC_FPUREGISTER,
LOC_CFPUREGISTER:
begin
{ "load st,st" is ignored by the code generator }
cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmList,left.location.size,location.size,left.location.register,NR_ST);
location.register:=NR_ST;
emit_none(A_FCHS,S_NO);
end;
else
internalerror(200312241);
end;
end;
end;
{*****************************************************************************
TX86NOTNODE
*****************************************************************************}
procedure tx86notnode.second_boolean;
var
opsize : tcgsize;
{$if defined(cpu32bitalu) or defined(cpu16bitalu)}
hreg: tregister;
{$endif}
begin
opsize:=def_cgsize(resultdef);
if not handle_locjump then
begin
{ the second pass could change the location of left }
{ if it is a register variable, so we've to do }
{ this before the case statement }
secondpass(left);
case left.location.loc of
LOC_FLAGS :
begin
location_reset(location,LOC_FLAGS,OS_NO);
location.resflags:=left.location.resflags;
inverse_flags(location.resflags);
end;
LOC_CREFERENCE,
LOC_REFERENCE:
begin
{$if defined(cpu32bitalu)}
if is_64bit(resultdef) then
begin
hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_32);
tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_32,OS_32,left.location.reference,hreg);
inc(left.location.reference.offset,4);
cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_32,left.location.reference,hreg);
end
else
{$elseif defined(cpu16bitalu)}
if is_64bit(resultdef) then
begin
hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_16);
tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_16,OS_16,left.location.reference,hreg);
inc(left.location.reference.offset,2);
cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
inc(left.location.reference.offset,2);
cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
inc(left.location.reference.offset,2);
cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
end
else if is_32bit(resultdef) then
begin
hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_16);
tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_16,OS_16,left.location.reference,hreg);
inc(left.location.reference.offset,2);
cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
end
else
{$endif}
emit_const_ref(A_CMP, TCGSize2Opsize[opsize], 0, left.location.reference);
location_reset(location,LOC_FLAGS,OS_NO);
location.resflags:=F_E;
end;
LOC_CONSTANT,
LOC_REGISTER,
LOC_CREGISTER,
LOC_SUBSETREG,
LOC_CSUBSETREG,
LOC_SUBSETREF,
LOC_CSUBSETREF :
begin
{$if defined(cpu32bitalu)}
if is_64bit(resultdef) then
begin
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
emit_reg_reg(A_OR,S_L,left.location.register64.reghi,left.location.register64.reglo);
end
else
{$elseif defined(cpu16bitalu)}
if is_64bit(resultdef) then
begin
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
emit_reg_reg(A_OR,S_W,GetNextReg(left.location.register64.reghi),left.location.register64.reghi);
emit_reg_reg(A_OR,S_W,GetNextReg(left.location.register64.reglo),left.location.register64.reglo);
emit_reg_reg(A_OR,S_W,left.location.register64.reghi,left.location.register64.reglo);
end
else if is_32bit(resultdef) then
begin
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
emit_reg_reg(A_OR,S_L,GetNextReg(left.location.register),left.location.register);
end
else
{$endif}
begin
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,true);
emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
end;
location_reset(location,LOC_FLAGS,OS_NO);
location.resflags:=F_E;
end;
else
internalerror(200203224);
end;
end;
end;
{$ifdef SUPPORT_MMX}
procedure tx86notnode.second_mmx;
var hreg,r:Tregister;
begin
secondpass(left);
location_reset(location,LOC_MMXREGISTER,OS_NO);
r:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
emit_const_reg(A_MOV,S_L,longint($ffffffff),r);
{ load operand }
case left.location.loc of
LOC_MMXREGISTER:
location_copy(location,left.location);
LOC_CMMXREGISTER:
begin
location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
emit_reg_reg(A_MOVQ,S_NO,left.location.register,location.register);
end;
LOC_REFERENCE,
LOC_CREFERENCE:
begin
location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
emit_ref_reg(A_MOVQ,S_NO,left.location.reference,location.register);
end;
end;
{ load mask }
hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
emit_reg_reg(A_MOVD,S_NO,r,hreg);
{ lower 32 bit }
emit_reg_reg(A_PXOR,S_NO,hreg,location.register);
{ shift mask }
emit_const_reg(A_PSLLQ,S_B,32,hreg);
{ higher 32 bit }
emit_reg_reg(A_PXOR,S_NO,hreg,location.register);
end;
{$endif SUPPORT_MMX}
{*****************************************************************************
TX86MODDIVNODE
*****************************************************************************}
procedure tx86moddivnode.pass_generate_code;
var
hreg1,hreg2,rega,regd:Tregister;
power:longint;
op:Tasmop;
cgsize:TCgSize;
opsize:topsize;
e, sm: aint;
d,m: aword;
m_add: boolean;
s: byte;
begin
secondpass(left);
if codegenerror then
exit;
secondpass(right);
if codegenerror then
exit;
{ put numerator in register }
cgsize:=def_cgsize(resultdef);
opsize:=TCGSize2OpSize[cgsize];
if not (cgsize in [OS_32,OS_S32,OS_64,OS_S64]) then
InternalError(2013102702);
rega:=newreg(R_INTREGISTER,RS_EAX,cgsize2subreg(R_INTREGISTER,cgsize));
regd:=newreg(R_INTREGISTER,RS_EDX,cgsize2subreg(R_INTREGISTER,cgsize));
location_reset(location,LOC_REGISTER,cgsize);
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
hreg1:=left.location.register;
if (nodetype=divn) and (right.nodetype=ordconstn) then
begin
if ispowerof2(int64(tordconstnode(right).value),power) then
begin
{ for signed numbers, the numerator must be adjusted before the
shift instruction, but not wih unsigned numbers! Otherwise,
"Cardinal($ffffffff) div 16" overflows! (JM) }
if is_signed(left.resultdef) Then
begin
{ use a sequence without jumps, saw this in
comp.compilers (JM) }
{ no jumps, but more operations }
hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
emit_reg_reg(A_MOV,opsize,hreg1,hreg2);
{If the left value is signed, hreg2=$ffffffff, otherwise 0.}
emit_const_reg(A_SAR,opsize,resultdef.size*8-1,hreg2);
{If signed, hreg2=right value-1, otherwise 0.}
{ (don't use emit_const_reg, because if value>high(longint)
then it must first be loaded into a register) }
cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,cgsize,tordconstnode(right).value-1,hreg2);
{ add to the left value }
emit_reg_reg(A_ADD,opsize,hreg2,hreg1);
{ do the shift }
emit_const_reg(A_SAR,opsize,power,hreg1);
end
else
emit_const_reg(A_SHR,opsize,power,hreg1);
location.register:=hreg1;
end
else
begin
if is_signed(left.resultdef) then
begin
e:=tordconstnode(right).value.svalue;
calc_divconst_magic_signed(resultdef.size*8,e,sm,s);
cg.getcpuregister(current_asmdata.CurrAsmList,rega);
emit_const_reg(A_MOV,opsize,sm,rega);
cg.getcpuregister(current_asmdata.CurrAsmList,regd);
emit_reg(A_IMUL,opsize,hreg1);
{ only the high half of result is used }
cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
{ add or subtract dividend }
if (e>0) and (sm<0) then
emit_reg_reg(A_ADD,opsize,hreg1,regd)
else if (e<0) and (sm>0) then
emit_reg_reg(A_SUB,opsize,hreg1,regd);
{ shift if necessary }
if (s<>0) then
emit_const_reg(A_SAR,opsize,s,regd);
{ extract and add the sign bit }
if (e<0) then
emit_reg_reg(A_MOV,opsize,regd,hreg1);
{ if e>=0, hreg1 still contains dividend }
emit_const_reg(A_SHR,opsize,left.resultdef.size*8-1,hreg1);
emit_reg_reg(A_ADD,opsize,hreg1,regd);
cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,regd,location.register)
end
else
begin
d:=tordconstnode(right).value.svalue;
if d>=aword(1) shl (left.resultdef.size*8-1) then
begin
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
if (cgsize in [OS_64,OS_S64]) then
begin
hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
emit_const_reg(A_MOV,opsize,aint(d),hreg2);
emit_reg_reg(A_CMP,opsize,hreg2,hreg1);
end
else
emit_const_reg(A_CMP,opsize,aint(d),hreg1);
location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
emit_const_reg(A_MOV,opsize,0,location.register);
emit_const_reg(A_SBB,opsize,-1,location.register);
cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
end
else
begin
calc_divconst_magic_unsigned(resultdef.size*8,d,m,m_add,s);
cg.getcpuregister(current_asmdata.CurrAsmList,rega);
emit_const_reg(A_MOV,opsize,aint(m),rega);
cg.getcpuregister(current_asmdata.CurrAsmList,regd);
emit_reg(A_MUL,opsize,hreg1);
cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
if m_add then
begin
{ addition can overflow, shift first bit considering carry,
then shift remaining bits in regular way. }
emit_reg_reg(A_ADD,opsize,hreg1,regd);
emit_const_reg(A_RCR,opsize,1,regd);
dec(s);
end;
if s<>0 then
emit_const_reg(A_SHR,opsize,aint(s),regd);
cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,regd,location.register)
end;
end;
end;
end
else
begin
{Bring denominator to a register.}
cg.getcpuregister(current_asmdata.CurrAsmList,rega);
emit_reg_reg(A_MOV,opsize,hreg1,rega);
cg.getcpuregister(current_asmdata.CurrAsmList,regd);
{Sign extension depends on the left type.}
if is_signed(left.resultdef) then
case left.resultdef.size of
{$ifdef x86_64}
8:
emit_none(A_CQO,S_NO);
{$endif x86_64}
4:
emit_none(A_CDQ,S_NO);
else
internalerror(2013102701);
end
else
emit_reg_reg(A_XOR,opsize,regd,regd);
{ Division depends on the result type }
if is_signed(resultdef) then
op:=A_IDIV
else
op:=A_DIV;
if right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
emit_ref(op,opsize,right.location.reference)
else if right.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
emit_reg(op,opsize,right.location.register)
else
begin
hreg1:=cg.getintregister(current_asmdata.CurrAsmList,right.location.size);
hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,right.resultdef,right.location,hreg1);
emit_reg(op,opsize,hreg1);
end;
{ Copy the result into a new register. Release R/EAX & R/EDX.}
cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
if nodetype=divn then
cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,rega,location.register)
else
cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,regd,location.register);
end;
end;
end.