mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-06 10:27:55 +02:00
2064 lines
84 KiB
ObjectPascal
2064 lines
84 KiB
ObjectPascal
{
|
|
Copyright (c) 2000-2002 by Florian Klaempfl
|
|
|
|
Common code generation for add nodes on the i386 and x86
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
|
|
****************************************************************************
|
|
}
|
|
unit nx86add;
|
|
|
|
{$i fpcdefs.inc}
|
|
|
|
interface
|
|
|
|
uses
|
|
symtype,
|
|
cgbase,
|
|
cpubase,
|
|
node,nadd,ncgadd;
|
|
|
|
type
|
|
tx86addnode = class(tcgaddnode)
|
|
protected
|
|
function getresflags(unsigned : boolean) : tresflags;
|
|
function getfpuresflags : tresflags;
|
|
procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
|
|
procedure force_left_and_right_fpureg;
|
|
procedure prepare_x87_locations(out refnode: tnode);
|
|
procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
|
|
procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
|
|
|
|
procedure second_cmpfloatvector;
|
|
|
|
procedure second_addfloatsse;
|
|
procedure second_addfloatavx;
|
|
public
|
|
function pass_1 : tnode;override;
|
|
function simplify(forinline : boolean) : tnode; override;
|
|
function use_fma : boolean;override;
|
|
procedure second_addfloat;override;
|
|
{$ifndef i8086}
|
|
procedure second_addsmallset;override;
|
|
{$endif not i8086}
|
|
procedure second_add64bit;override;
|
|
procedure second_cmpfloat;override;
|
|
procedure second_cmpsmallset;override;
|
|
procedure second_cmp64bit;override;
|
|
|
|
procedure second_cmpordinal;override;
|
|
procedure second_addordinal;override;
|
|
procedure second_addboolean;override;
|
|
{$ifdef SUPPORT_MMX}
|
|
procedure second_opmmx;override;
|
|
{$endif SUPPORT_MMX}
|
|
procedure second_opvector;override;
|
|
end;
|
|
|
|
|
|
implementation
|
|
|
|
uses
|
|
globtype,globals,
|
|
verbose,cutils,compinnr,
|
|
cpuinfo,
|
|
aasmbase,aasmdata,aasmcpu,
|
|
symconst,symdef,
|
|
cgobj,hlcgobj,cgx86,cga,cgutils,
|
|
tgobj,ncgutil,
|
|
ncon,nset,ninl,ncnv,ncal,nmat,
|
|
defutil,defcmp,constexp,
|
|
htypechk;
|
|
|
|
{ Range check must be disabled explicitly as the code serves
|
|
on three different architecture sizes }
|
|
{$R-}
|
|
|
|
{*****************************************************************************
|
|
Helpers
|
|
*****************************************************************************}
|
|
|
|
procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
|
|
var
|
|
power : longint;
|
|
hl4 : tasmlabel;
|
|
r : Tregister;
|
|
href : treference;
|
|
overflowcheck: boolean;
|
|
begin
|
|
overflowcheck:=needoverflowcheck;
|
|
{ at this point, left.location.loc should be LOC_REGISTER }
|
|
if right.location.loc=LOC_REGISTER then
|
|
begin
|
|
{ right.location is a LOC_REGISTER }
|
|
{ when swapped another result register }
|
|
if (nodetype=subn) and (nf_swapped in flags) then
|
|
begin
|
|
if extra_not then
|
|
emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
|
|
emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
|
|
{ newly swapped also set swapped flag }
|
|
location_swap(left.location,right.location);
|
|
toggleflag(nf_swapped);
|
|
end
|
|
else
|
|
begin
|
|
if extra_not then
|
|
emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
|
|
if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
|
|
location_swap(left.location,right.location);
|
|
emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
|
|
end;
|
|
end
|
|
else
|
|
begin
|
|
{ right.location is not a LOC_REGISTER }
|
|
if (nodetype=subn) and (nf_swapped in flags) then
|
|
begin
|
|
if extra_not then
|
|
cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
|
|
r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
|
|
hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
|
|
emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
|
|
cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
|
|
end
|
|
else
|
|
begin
|
|
{ Optimizations when right.location is a constant value }
|
|
if (op=A_CMP) and
|
|
(nodetype in [equaln,unequaln]) and
|
|
(right.location.loc=LOC_CONSTANT) and
|
|
(right.location.value=0) then
|
|
begin
|
|
{ 'test $-1,%reg' is transformable into 'test $-1,spilltemp' if %reg needs
|
|
spilling, while 'test %reg,%reg' still requires loading into register.
|
|
If spilling is not necessary, it is changed back into 'test %reg,%reg' by
|
|
peephole optimizer (this optimization is currently available only for i386). }
|
|
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
|
|
{$ifdef i386}
|
|
emit_const_reg(A_TEST,TCGSize2Opsize[opsize],aint(-1),left.location.register)
|
|
{$else i386}
|
|
emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
|
|
{$endif i386}
|
|
end
|
|
else
|
|
if (op=A_ADD) and
|
|
(right.location.loc=LOC_CONSTANT) and
|
|
(right.location.value=1) and
|
|
not overflowcheck and
|
|
UseIncDec then
|
|
begin
|
|
emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
|
|
end
|
|
else
|
|
if (op=A_SUB) and
|
|
(right.location.loc=LOC_CONSTANT) and
|
|
(right.location.value=1) and
|
|
not overflowcheck and
|
|
UseIncDec then
|
|
begin
|
|
emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
|
|
end
|
|
else
|
|
if (op=A_IMUL) and
|
|
(right.location.loc=LOC_CONSTANT) and
|
|
(ispowerof2(int64(right.location.value),power)) and
|
|
overflowcheck then
|
|
begin
|
|
emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
|
|
end
|
|
else if (op=A_IMUL) and
|
|
(right.location.loc=LOC_CONSTANT) and
|
|
(right.location.value>1) and (ispowerof2(int64(right.location.value)-1,power)) and
|
|
(power in [1..3]) and
|
|
not overflowcheck then
|
|
begin
|
|
reference_reset_base(href,left.location.register,0,ctempposinvalid,0,[]);
|
|
href.index:=left.location.register;
|
|
href.scalefactor:=int64(right.location.value)-1;
|
|
left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_LEA,TCgSize2OpSize[opsize],href,left.location.register));
|
|
end
|
|
else
|
|
begin
|
|
if extra_not then
|
|
begin
|
|
r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
|
|
hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
|
|
emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
|
|
|
|
if mboverflow and overflowcheck then
|
|
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
|
|
|
|
emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
|
|
end
|
|
else
|
|
begin
|
|
if mboverflow and overflowcheck then
|
|
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
|
|
|
|
emit_op_right_left(op,opsize);
|
|
end;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
{ only in case of overflow operations }
|
|
{ produce overflow code }
|
|
{ we must put it here directly, because sign of operation }
|
|
{ is in unsigned VAR!! }
|
|
if mboverflow then
|
|
begin
|
|
if overflowcheck then
|
|
begin
|
|
current_asmdata.getjumplabel(hl4);
|
|
if unsigned then
|
|
cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
|
|
else
|
|
cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
|
|
cg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
|
|
cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
|
|
cg.a_label(current_asmdata.CurrAsmList,hl4);
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
|
|
procedure tx86addnode.left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
|
|
begin
|
|
{ left location is not a register? }
|
|
if (left.location.loc<>LOC_REGISTER) then
|
|
begin
|
|
{ if right is register then we can swap the locations }
|
|
if (not noswap) and
|
|
(right.location.loc=LOC_REGISTER) then
|
|
begin
|
|
location_swap(left.location,right.location);
|
|
toggleflag(nf_swapped);
|
|
end
|
|
else if (not noswap) and
|
|
(right.location.loc=LOC_CREGISTER) then
|
|
begin
|
|
location_swap(left.location,right.location);
|
|
toggleflag(nf_swapped);
|
|
{ maybe we can reuse a constant register when the
|
|
operation is a comparison that doesn't change the
|
|
value of the register }
|
|
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
|
|
location:=left.location;
|
|
end
|
|
else
|
|
begin
|
|
{ maybe we can reuse a constant register when the
|
|
operation is a comparison that doesn't change the
|
|
value of the register }
|
|
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
|
|
end;
|
|
end;
|
|
if (right.location.loc<>LOC_CONSTANT) and
|
|
(tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
|
|
hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
|
|
if (left.location.loc<>LOC_CONSTANT) and
|
|
(tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
|
|
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
|
|
end;
|
|
|
|
|
|
procedure tx86addnode.force_left_and_right_fpureg;
|
|
begin
|
|
if (right.location.loc<>LOC_FPUREGISTER) then
|
|
begin
|
|
hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
|
|
if (left.location.loc<>LOC_FPUREGISTER) then
|
|
hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
|
|
else
|
|
{ left was on the stack => swap }
|
|
toggleflag(nf_swapped);
|
|
end
|
|
{ the nominator in st0 }
|
|
else if (left.location.loc<>LOC_FPUREGISTER) then
|
|
begin
|
|
hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
|
|
end
|
|
else
|
|
begin
|
|
{ fpu operands are always in the wrong order on the stack }
|
|
toggleflag(nf_swapped);
|
|
end;
|
|
end;
|
|
|
|
|
|
{ Makes sides suitable for executing an x87 instruction:
|
|
if either side is OS_F32/OS_F64-sized LOC_REFERENCE, it is returned in 'refnode'
|
|
everything else is loaded to FPU stack. }
|
|
procedure tx86addnode.prepare_x87_locations(out refnode: tnode);
|
|
begin
|
|
refnode:=nil;
|
|
|
|
{ later on, no mm registers are allowed, so transfer everything to memory here
|
|
below it is loaded into an fpu register if neede }
|
|
if left.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
|
|
hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
|
|
|
|
if right.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
|
|
hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
|
|
|
|
case ord(left.location.loc=LOC_FPUREGISTER)+ord(right.location.loc=LOC_FPUREGISTER) of
|
|
0:
|
|
begin
|
|
hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
|
|
if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
|
|
InternalError(2013090803);
|
|
if (left.location.size in [OS_F32,OS_F64]) then
|
|
begin
|
|
refnode:=left;
|
|
toggleflag(nf_swapped);
|
|
end
|
|
else
|
|
hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
|
|
end;
|
|
1:
|
|
begin { if left is on the stack then swap. }
|
|
if (left.location.loc=LOC_FPUREGISTER) then
|
|
refnode:=right
|
|
else
|
|
refnode:=left;
|
|
if not(refnode.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
|
|
InternalError(2013090801);
|
|
if not (refnode.location.size in [OS_F32,OS_F64]) then
|
|
begin
|
|
hlcg.location_force_fpureg(current_asmdata.CurrAsmList,refnode.location,refnode.resultdef,false);
|
|
if (refnode=right) then
|
|
toggleflag(nf_swapped);
|
|
refnode:=nil;
|
|
end
|
|
else
|
|
begin
|
|
if (refnode=left) then
|
|
toggleflag(nf_swapped);
|
|
end;
|
|
end;
|
|
2: { fpu operands are always in the wrong order on the stack }
|
|
toggleflag(nf_swapped);
|
|
else
|
|
InternalError(2013090802);
|
|
end;
|
|
end;
|
|
|
|
|
|
procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
|
|
{$ifdef x86_64}
|
|
var
|
|
tmpreg : tregister;
|
|
{$endif x86_64}
|
|
begin
|
|
if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
|
|
hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
|
|
{ left must be a register }
|
|
case right.location.loc of
|
|
LOC_REGISTER,
|
|
LOC_CREGISTER :
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
|
|
LOC_REFERENCE,
|
|
LOC_CREFERENCE :
|
|
begin
|
|
tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
|
|
end;
|
|
LOC_CONSTANT :
|
|
begin
|
|
{$ifdef x86_64}
|
|
{ x86_64 only supports signed 32 bits constants directly }
|
|
if (opsize in [OS_S64,OS_64]) and
|
|
((right.location.value<low(longint)) or (right.location.value>high(longint))) then
|
|
begin
|
|
tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
|
|
cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
|
|
end
|
|
else
|
|
{$endif x86_64}
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
|
|
end;
|
|
else
|
|
internalerror(200203232);
|
|
end;
|
|
end;
|
|
|
|
|
|
function tx86addnode.getresflags(unsigned : boolean) : tresflags;
|
|
begin
|
|
case nodetype of
|
|
equaln : getresflags:=F_E;
|
|
unequaln : getresflags:=F_NE;
|
|
else
|
|
if not(unsigned) then
|
|
begin
|
|
if nf_swapped in flags then
|
|
case nodetype of
|
|
ltn : getresflags:=F_G;
|
|
lten : getresflags:=F_GE;
|
|
gtn : getresflags:=F_L;
|
|
gten : getresflags:=F_LE;
|
|
else
|
|
internalerror(2013120105);
|
|
end
|
|
else
|
|
case nodetype of
|
|
ltn : getresflags:=F_L;
|
|
lten : getresflags:=F_LE;
|
|
gtn : getresflags:=F_G;
|
|
gten : getresflags:=F_GE;
|
|
else
|
|
internalerror(2013120106);
|
|
end;
|
|
end
|
|
else
|
|
begin
|
|
if nf_swapped in flags then
|
|
case nodetype of
|
|
ltn : getresflags:=F_A;
|
|
lten : getresflags:=F_AE;
|
|
gtn : getresflags:=F_B;
|
|
gten : getresflags:=F_BE;
|
|
else
|
|
internalerror(2013120107);
|
|
end
|
|
else
|
|
case nodetype of
|
|
ltn : getresflags:=F_B;
|
|
lten : getresflags:=F_BE;
|
|
gtn : getresflags:=F_A;
|
|
gten : getresflags:=F_AE;
|
|
else
|
|
internalerror(2013120108);
|
|
end;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
|
|
function tx86addnode.getfpuresflags : tresflags;
|
|
begin
|
|
if (nodetype=equaln) then
|
|
result:=F_FE
|
|
else if (nodetype=unequaln) then
|
|
result:=F_FNE
|
|
else if (nf_swapped in flags) then
|
|
case nodetype of
|
|
ltn : result:=F_FA;
|
|
lten : result:=F_FAE;
|
|
gtn : result:=F_FB;
|
|
gten : result:=F_FBE;
|
|
else
|
|
internalerror(2014031402);
|
|
end
|
|
else
|
|
case nodetype of
|
|
ltn : result:=F_FB;
|
|
lten : result:=F_FBE;
|
|
gtn : result:=F_FA;
|
|
gten : result:=F_FAE;
|
|
else
|
|
internalerror(2014031403);
|
|
end;
|
|
end;
|
|
|
|
{*****************************************************************************
|
|
AddSmallSet
|
|
*****************************************************************************}
|
|
|
|
{$ifndef i8086}
|
|
procedure tx86addnode.second_addsmallset;
|
|
var
|
|
setbase : aint;
|
|
opdef : tdef;
|
|
opsize : TCGSize;
|
|
op : TAsmOp;
|
|
extra_not,
|
|
noswap : boolean;
|
|
all_member_optimization:boolean;
|
|
|
|
begin
|
|
pass_left_right;
|
|
|
|
noswap:=false;
|
|
extra_not:=false;
|
|
all_member_optimization:=false;
|
|
opdef:=resultdef;
|
|
opsize:=int_cgsize(opdef.size);
|
|
if (left.resultdef.typ=setdef) then
|
|
setbase:=tsetdef(left.resultdef).setbase
|
|
else
|
|
setbase:=tsetdef(right.resultdef).setbase;
|
|
case nodetype of
|
|
addn :
|
|
begin
|
|
{ adding elements is not commutative }
|
|
if (nf_swapped in flags) and (left.nodetype=setelementn) then
|
|
swapleftright;
|
|
{ are we adding set elements ? }
|
|
if right.nodetype=setelementn then
|
|
begin
|
|
{ no range support for smallsets! }
|
|
if assigned(tsetelementnode(right).right) then
|
|
internalerror(43244);
|
|
{ btsb isn't supported }
|
|
if opsize=OS_8 then
|
|
begin
|
|
opsize:=OS_32;
|
|
opdef:=u32inttype;
|
|
end;
|
|
{ bts requires both elements to be registers }
|
|
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
|
|
hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
|
|
register_maybe_adjust_setbase(current_asmdata.CurrAsmList,opdef,right.location,setbase);
|
|
op:=A_BTS;
|
|
noswap:=true;
|
|
end
|
|
else
|
|
op:=A_OR;
|
|
end;
|
|
symdifn :
|
|
op:=A_XOR;
|
|
muln :
|
|
op:=A_AND;
|
|
subn :
|
|
begin
|
|
op:=A_AND;
|
|
if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
|
|
((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
|
|
all_member_optimization:=true;
|
|
|
|
if (not(nf_swapped in flags)) and
|
|
(right.location.loc=LOC_CONSTANT) then
|
|
right.location.value := not(right.location.value)
|
|
else if (nf_swapped in flags) and
|
|
(left.location.loc=LOC_CONSTANT) then
|
|
left.location.value := not(left.location.value)
|
|
else
|
|
extra_not:=true;
|
|
end;
|
|
xorn :
|
|
op:=A_XOR;
|
|
orn :
|
|
op:=A_OR;
|
|
andn :
|
|
op:=A_AND;
|
|
else
|
|
internalerror(2003042215);
|
|
end;
|
|
if all_member_optimization then
|
|
begin
|
|
{A set expression [0..31]-x can be implemented with a simple NOT.}
|
|
if nf_swapped in flags then
|
|
begin
|
|
{ newly swapped also set swapped flag }
|
|
location_swap(left.location,right.location);
|
|
toggleflag(nf_swapped);
|
|
end;
|
|
hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,false);
|
|
emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
|
|
location:=right.location;
|
|
end
|
|
else
|
|
begin
|
|
{ can we use the BMI1 instruction andn? }
|
|
if (op=A_AND) and extra_not and (CPUX86_HAS_BMI1 in cpu_capabilities[current_settings.cputype]) and
|
|
(resultdef.size in [4{$ifdef x86_64},8{$endif x86_64}]) then
|
|
begin
|
|
location_reset(location,LOC_REGISTER,left.location.size);
|
|
location.register:=cg.getintregister(current_asmdata.currAsmList,left.location.size);
|
|
if nf_swapped in flags then
|
|
begin
|
|
location_swap(left.location,right.location);
|
|
toggleflag(nf_swapped);
|
|
end;
|
|
hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,true);
|
|
if not(left.location.loc in [LOC_CREGISTER,LOC_REGISTER,LOC_CREFERENCE,LOC_REFERENCE]) then
|
|
hlcg.location_force_reg(current_asmdata.currAsmList,left.location,left.resultdef,opdef,true);
|
|
case left.location.loc of
|
|
LOC_CREGISTER,LOC_REGISTER:
|
|
emit_reg_reg_reg(A_ANDN,TCGSize2Opsize[opsize],left.location.register,right.location.register,location.register);
|
|
LOC_CREFERENCE,LOC_REFERENCE:
|
|
emit_ref_reg_reg(A_ANDN,TCGSize2Opsize[opsize],left.location.reference,right.location.register,location.register);
|
|
else
|
|
Internalerror(2018040201);
|
|
end;
|
|
end
|
|
else
|
|
begin
|
|
{ left must be a register }
|
|
left_must_be_reg(opdef,opsize,noswap);
|
|
emit_generic_code(op,opsize,true,extra_not,false);
|
|
location_freetemp(current_asmdata.CurrAsmList,right.location);
|
|
|
|
{ left is always a register and contains the result }
|
|
location:=left.location;
|
|
end;
|
|
end;
|
|
|
|
{ fix the changed opsize we did above because of the missing btsb }
|
|
if opsize<>int_cgsize(resultdef.size) then
|
|
hlcg.location_force_reg(current_asmdata.CurrAsmList,location,opdef,cgsize_orddef(int_cgsize(resultdef.size)),false);
|
|
end;
|
|
{$endif not i8086}
|
|
|
|
|
|
procedure tx86addnode.second_cmpsmallset;
|
|
var
|
|
opdef : tdef;
|
|
opsize : TCGSize;
|
|
op : TAsmOp;
|
|
begin
|
|
pass_left_right;
|
|
opdef:=left.resultdef;
|
|
opsize:=int_cgsize(opdef.size);
|
|
case nodetype of
|
|
equaln,
|
|
unequaln :
|
|
op:=A_CMP;
|
|
lten,gten:
|
|
begin
|
|
if (not(nf_swapped in flags) and (nodetype = lten)) or
|
|
((nf_swapped in flags) and (nodetype = gten)) then
|
|
swapleftright;
|
|
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
|
|
emit_op_right_left(A_AND,opsize);
|
|
op:=A_CMP;
|
|
{ warning: ugly hack, we need a JE so change the node to equaln }
|
|
nodetype:=equaln;
|
|
end;
|
|
else
|
|
internalerror(2003042204);
|
|
end;
|
|
{ left must be a register }
|
|
left_must_be_reg(opdef,opsize,false);
|
|
emit_generic_code(op,opsize,true,false,false);
|
|
location_freetemp(current_asmdata.CurrAsmList,right.location);
|
|
location_freetemp(current_asmdata.CurrAsmList,left.location);
|
|
|
|
location_reset(location,LOC_FLAGS,OS_NO);
|
|
location.resflags:=getresflags(true);
|
|
end;
|
|
|
|
|
|
{*****************************************************************************
|
|
AddMMX
|
|
*****************************************************************************}
|
|
|
|
{$ifdef SUPPORT_MMX}
|
|
procedure tx86addnode.second_opmmx;
|
|
var
|
|
op : TAsmOp;
|
|
cmpop : boolean;
|
|
mmxbase : tmmxtype;
|
|
hreg,
|
|
hregister : tregister;
|
|
begin
|
|
pass_left_right;
|
|
|
|
cmpop:=false;
|
|
op:=A_NOP;
|
|
|
|
mmxbase:=mmx_type(left.resultdef);
|
|
location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
|
|
case nodetype of
|
|
addn :
|
|
begin
|
|
if (cs_mmx_saturation in current_settings.localswitches) then
|
|
begin
|
|
case mmxbase of
|
|
mmxs8bit:
|
|
op:=A_PADDSB;
|
|
mmxu8bit:
|
|
op:=A_PADDUSB;
|
|
mmxs16bit,mmxfixed16:
|
|
op:=A_PADDSW;
|
|
mmxu16bit:
|
|
op:=A_PADDUSW;
|
|
else
|
|
;
|
|
end;
|
|
end
|
|
else
|
|
begin
|
|
case mmxbase of
|
|
mmxs8bit,mmxu8bit:
|
|
op:=A_PADDB;
|
|
mmxs16bit,mmxu16bit,mmxfixed16:
|
|
op:=A_PADDW;
|
|
mmxs32bit,mmxu32bit:
|
|
op:=A_PADDD;
|
|
else
|
|
;
|
|
end;
|
|
end;
|
|
end;
|
|
muln :
|
|
begin
|
|
case mmxbase of
|
|
mmxs16bit,mmxu16bit:
|
|
op:=A_PMULLW;
|
|
mmxfixed16:
|
|
op:=A_PMULHW;
|
|
else
|
|
;
|
|
end;
|
|
end;
|
|
subn :
|
|
begin
|
|
if (cs_mmx_saturation in current_settings.localswitches) then
|
|
begin
|
|
case mmxbase of
|
|
mmxs8bit:
|
|
op:=A_PSUBSB;
|
|
mmxu8bit:
|
|
op:=A_PSUBUSB;
|
|
mmxs16bit,mmxfixed16:
|
|
op:=A_PSUBSB;
|
|
mmxu16bit:
|
|
op:=A_PSUBUSW;
|
|
else
|
|
;
|
|
end;
|
|
end
|
|
else
|
|
begin
|
|
case mmxbase of
|
|
mmxs8bit,mmxu8bit:
|
|
op:=A_PSUBB;
|
|
mmxs16bit,mmxu16bit,mmxfixed16:
|
|
op:=A_PSUBW;
|
|
mmxs32bit,mmxu32bit:
|
|
op:=A_PSUBD;
|
|
else
|
|
;
|
|
end;
|
|
end;
|
|
end;
|
|
xorn:
|
|
op:=A_PXOR;
|
|
orn:
|
|
op:=A_POR;
|
|
andn:
|
|
op:=A_PAND;
|
|
else
|
|
internalerror(2003042214);
|
|
end;
|
|
|
|
if op = A_NOP then
|
|
internalerror(201408201);
|
|
|
|
{ left and right no register? }
|
|
{ then one must be demanded }
|
|
if (left.location.loc<>LOC_MMXREGISTER) then
|
|
begin
|
|
if (right.location.loc=LOC_MMXREGISTER) then
|
|
begin
|
|
location_swap(left.location,right.location);
|
|
toggleflag(nf_swapped);
|
|
end
|
|
else
|
|
begin
|
|
{ register variable ? }
|
|
if (left.location.loc=LOC_CMMXREGISTER) then
|
|
begin
|
|
hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
|
|
emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
|
|
end
|
|
else
|
|
begin
|
|
if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
|
|
internalerror(200203245);
|
|
|
|
hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
|
|
tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
|
|
emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
|
|
end;
|
|
|
|
location_reset(left.location,LOC_MMXREGISTER,OS_NO);
|
|
left.location.register:=hregister;
|
|
end;
|
|
end;
|
|
|
|
{ at this point, left.location.loc should be LOC_MMXREGISTER }
|
|
if right.location.loc<>LOC_MMXREGISTER then
|
|
begin
|
|
if (nodetype=subn) and (nf_swapped in flags) then
|
|
begin
|
|
hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
|
|
if right.location.loc=LOC_CMMXREGISTER then
|
|
begin
|
|
emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
|
|
emit_reg_reg(op,S_NO,left.location.register,hreg);
|
|
end
|
|
else
|
|
begin
|
|
if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
|
|
internalerror(2002032412);
|
|
tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
|
|
emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
|
|
emit_reg_reg(op,S_NO,left.location.register,hreg);
|
|
end;
|
|
location.register:=hreg;
|
|
end
|
|
else
|
|
begin
|
|
if (right.location.loc=LOC_CMMXREGISTER) then
|
|
emit_reg_reg(op,S_NO,right.location.register,left.location.register)
|
|
else
|
|
begin
|
|
if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
|
|
internalerror(200203246);
|
|
tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
|
|
emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
|
|
end;
|
|
location.register:=left.location.register;
|
|
end;
|
|
end
|
|
else
|
|
begin
|
|
{ right.location=LOC_MMXREGISTER }
|
|
if (nodetype=subn) and (nf_swapped in flags) then
|
|
begin
|
|
emit_reg_reg(op,S_NO,left.location.register,right.location.register);
|
|
location_swap(left.location,right.location);
|
|
toggleflag(nf_swapped);
|
|
end
|
|
else
|
|
begin
|
|
emit_reg_reg(op,S_NO,right.location.register,left.location.register);
|
|
end;
|
|
location.register:=left.location.register;
|
|
end;
|
|
|
|
location_freetemp(current_asmdata.CurrAsmList,right.location);
|
|
if cmpop then
|
|
location_freetemp(current_asmdata.CurrAsmList,left.location);
|
|
end;
|
|
{$endif SUPPORT_MMX}
|
|
|
|
|
|
{*****************************************************************************
|
|
AddFloat
|
|
*****************************************************************************}
|
|
|
|
procedure tx86addnode.second_addfloatsse;
|
|
var
|
|
op : topcg;
|
|
sqr_sum : boolean;
|
|
tmp : tnode;
|
|
begin
|
|
sqr_sum:=false;
|
|
if (current_settings.fputype>=fpu_sse3) and
|
|
use_vectorfpu(resultdef) and
|
|
(nodetype in [addn,subn]) and
|
|
(left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
|
|
(right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
|
|
begin
|
|
sqr_sum:=true;
|
|
tmp:=tinlinenode(left).left;
|
|
tinlinenode(left).left:=nil;
|
|
left.free;
|
|
left:=tmp;
|
|
|
|
tmp:=tinlinenode(right).left;
|
|
tinlinenode(right).left:=nil;
|
|
right.free;
|
|
right:=tmp;
|
|
end;
|
|
|
|
pass_left_right;
|
|
{ fpu operands are always in reversed order on the stack }
|
|
if (left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) and (right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
|
|
toggleflag(nf_swapped);
|
|
|
|
if (nf_swapped in flags) then
|
|
{ can't use swapleftright if both are on the fpu stack, since then }
|
|
{ both are "R_ST" -> nothing would change -> manually switch }
|
|
if (left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) and
|
|
(right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
|
|
emit_none(A_FXCH,S_NO)
|
|
else
|
|
swapleftright;
|
|
|
|
case nodetype of
|
|
addn :
|
|
op:=OP_ADD;
|
|
muln :
|
|
op:=OP_MUL;
|
|
subn :
|
|
op:=OP_SUB;
|
|
slashn :
|
|
op:=OP_DIV;
|
|
else
|
|
internalerror(200312231);
|
|
end;
|
|
|
|
location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
|
|
|
|
if sqr_sum then
|
|
begin
|
|
if nf_swapped in flags then
|
|
swapleftright;
|
|
|
|
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
|
|
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
|
|
location:=left.location;
|
|
if is_double(resultdef) then
|
|
begin
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
|
|
case nodetype of
|
|
addn:
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
|
|
subn:
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
|
|
else
|
|
internalerror(201108162);
|
|
end;
|
|
end
|
|
else
|
|
begin
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
|
|
{ ensure that bits 64..127 contain valid values }
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
|
|
{ the data is now in bits 0..32 and 64..95 }
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
|
|
case nodetype of
|
|
addn:
|
|
begin
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
|
|
end;
|
|
subn:
|
|
begin
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
|
|
end;
|
|
else
|
|
internalerror(201108163);
|
|
end;
|
|
end
|
|
end
|
|
{ we can use only right as left operand if the operation is commutative }
|
|
else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
|
|
begin
|
|
location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
|
|
cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.location.size,location.size,right.location.register,location.register,mms_movescalar);
|
|
{ force floating point reg. location to be written to memory,
|
|
we don't force it to mm register because writing to memory
|
|
allows probably shorter code because there is no direct fpu->mm register
|
|
copy instruction
|
|
}
|
|
if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
|
|
hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
|
|
cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
|
|
|
|
if left.location.loc=LOC_REFERENCE then
|
|
tg.ungetiftemp(current_asmdata.CurrAsmList,left.location.reference);
|
|
end
|
|
else
|
|
begin
|
|
if nf_swapped in flags then
|
|
swapleftright;
|
|
|
|
{ force floating point reg. location to be written to memory,
|
|
we don't force it to mm register because writing to memory
|
|
allows probably shorter code because there is no direct fpu->mm register
|
|
copy instruction
|
|
}
|
|
if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
|
|
hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
|
|
|
|
location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
|
|
cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
|
|
|
|
if left.location.loc=LOC_REFERENCE then
|
|
tg.ungetiftemp(current_asmdata.CurrAsmList,left.location.reference);
|
|
|
|
{ force floating point reg. location to be written to memory,
|
|
we don't force it to mm register because writing to memory
|
|
allows probably shorter code because there is no direct fpu->mm register
|
|
copy instruction
|
|
}
|
|
if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
|
|
hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
|
|
|
|
cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
|
|
|
|
if right.location.loc=LOC_REFERENCE then
|
|
tg.ungetiftemp(current_asmdata.CurrAsmList,right.location.reference);
|
|
end;
|
|
end;
|
|
|
|
|
|
procedure tx86addnode.second_addfloatavx;
|
|
var
|
|
op : topcg;
|
|
sqr_sum : boolean;
|
|
{$ifdef dummy}
|
|
tmp : tnode;
|
|
{$endif dummy}
|
|
begin
|
|
sqr_sum:=false;
|
|
{$ifdef dummy}
|
|
if (current_settings.fputype>=fpu_sse3) and
|
|
use_vectorfpu(resultdef) and
|
|
(nodetype in [addn,subn]) and
|
|
(left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
|
|
(right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
|
|
begin
|
|
sqr_sum:=true;
|
|
tmp:=tinlinenode(left).left;
|
|
tinlinenode(left).left:=nil;
|
|
left.free;
|
|
left:=tmp;
|
|
|
|
tmp:=tinlinenode(right).left;
|
|
tinlinenode(right).left:=nil;
|
|
right.free;
|
|
right:=tmp;
|
|
end;
|
|
{$endif dummy}
|
|
|
|
pass_left_right;
|
|
{ fpu operands are always in reversed order on the stack }
|
|
if (left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) and (right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
|
|
toggleflag(nf_swapped);
|
|
|
|
if (nf_swapped in flags) then
|
|
{ can't use swapleftright if both are on the fpu stack, since then }
|
|
{ both are "R_ST" -> nothing would change -> manually switch }
|
|
if (left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) and
|
|
(right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
|
|
emit_none(A_FXCH,S_NO)
|
|
else
|
|
swapleftright;
|
|
|
|
case nodetype of
|
|
addn :
|
|
op:=OP_ADD;
|
|
muln :
|
|
op:=OP_MUL;
|
|
subn :
|
|
op:=OP_SUB;
|
|
slashn :
|
|
op:=OP_DIV;
|
|
else
|
|
internalerror(2003122303);
|
|
end;
|
|
|
|
location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
|
|
|
|
if sqr_sum then
|
|
begin
|
|
if nf_swapped in flags then
|
|
swapleftright;
|
|
|
|
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
|
|
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
|
|
location:=left.location;
|
|
if is_double(resultdef) then
|
|
begin
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
|
|
case nodetype of
|
|
addn:
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
|
|
subn:
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
|
|
else
|
|
internalerror(2011081601);
|
|
end;
|
|
end
|
|
else
|
|
begin
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
|
|
{ ensure that bits 64..127 contain valid values }
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
|
|
{ the data is now in bits 0..32 and 64..95 }
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
|
|
case nodetype of
|
|
addn:
|
|
begin
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
|
|
end;
|
|
subn:
|
|
begin
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
|
|
end;
|
|
else
|
|
internalerror(2011081604);
|
|
end;
|
|
end
|
|
end
|
|
{ left*2 ? }
|
|
else if (nodetype=muln) and is_constrealnode(right) and is_number_float(trealconstnode(right).value_real) and (trealconstnode(right).value_real=2) then
|
|
begin
|
|
location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
|
|
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
|
|
cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
|
|
left.location.register,
|
|
left.location.register,
|
|
location.register,
|
|
mms_movescalar);
|
|
end
|
|
{ right*2 ? }
|
|
else if (nodetype=muln) and is_constrealnode(left) and is_number_float(trealconstnode(left).value_real) and (trealconstnode(left).value_real=2) then
|
|
begin
|
|
location.register:=cg.getmmregister(current_asmdata.CurrAsmList,right.location.size);
|
|
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
|
|
cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
|
|
right.location.register,
|
|
right.location.register,
|
|
location.register,
|
|
mms_movescalar);
|
|
end
|
|
{ we can use only right as left operand if the operation is commutative }
|
|
else if (right.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) and (op in [OP_ADD,OP_MUL]) then
|
|
begin
|
|
location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
|
|
{ force floating point reg. location to be written to memory,
|
|
we don't force it to mm register because writing to memory
|
|
allows probably shorter code because there is no direct fpu->mm register
|
|
copy instruction
|
|
}
|
|
if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
|
|
hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
|
|
cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
|
|
left.location,
|
|
right.location.register,
|
|
location.register,
|
|
mms_movescalar);
|
|
end
|
|
else
|
|
begin
|
|
if (nf_swapped in flags) then
|
|
swapleftright;
|
|
|
|
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
|
|
location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
|
|
{ force floating point reg. location to be written to memory,
|
|
we don't force it to mm register because writing to memory
|
|
allows probably shorter code because there is no direct fpu->mm register
|
|
copy instruction
|
|
}
|
|
if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
|
|
hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
|
|
cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
|
|
right.location,
|
|
left.location.register,
|
|
location.register,
|
|
mms_movescalar);
|
|
end;
|
|
end;
|
|
|
|
|
|
function tx86addnode.pass_1: tnode;
|
|
begin
|
|
{ on x86, we do not support fpu registers, so in case of operations using the x87, it
|
|
is normally useful, not to put the operands into registers which would be mm register }
|
|
if ((left.resultdef.typ=floatdef) or (right.resultdef.typ=floatdef)) and
|
|
(not(use_vectorfpu(left.resultdef)) and not(use_vectorfpu(right.resultdef)) and
|
|
not(use_vectorfpu(resultdef))) then
|
|
begin
|
|
make_not_regable(left,[ra_addr_regable]);
|
|
make_not_regable(right,[ra_addr_regable]);
|
|
end;
|
|
Result:=inherited pass_1;
|
|
{ correct expectloc, it does not matter of Result is set as another pass_1 is run on it
|
|
which will fix that one }
|
|
if use_vectorfpu(resultdef) then
|
|
expectloc:=LOC_MMREGISTER;
|
|
end;
|
|
|
|
|
|
function tx86addnode.simplify(forinline : boolean) : tnode;
|
|
var
|
|
t, m, ThisNode, ConstNode: TNode;
|
|
lt,rt, ThisType: TNodeType;
|
|
ThisDef: TDef;
|
|
DoOptimisation: Boolean;
|
|
|
|
reciprocal, comparison, divisor: AWord;
|
|
shift, N: Byte;
|
|
begin
|
|
{ Load into local variables to reduce the number of pointer deallocations }
|
|
rt:=right.nodetype;
|
|
lt:=left.nodetype;
|
|
|
|
DoOptimisation:=False;
|
|
|
|
{$if defined(cpu64bitalu) or defined(cpu32bitalu) or defined(cpu16bitalu)}
|
|
if (cs_opt_level1 in current_settings.optimizerswitches) and
|
|
{ The presence of overflow checks tends to cause internal errors with the multiplication nodes }
|
|
not (cs_check_overflow in current_settings.localswitches) and
|
|
(nodetype in [equaln,unequaln]) then
|
|
begin
|
|
if (lt=modn) and (rt=ordconstn) and (TOrdConstNode(right).value.uvalue=0) then
|
|
begin
|
|
t:=left;
|
|
m:=right;
|
|
end
|
|
else if (rt=modn) and (lt=ordconstn) and (TOrdConstNode(left).value.uvalue=0) then
|
|
begin
|
|
t:=right;
|
|
m:=left;
|
|
end
|
|
else
|
|
begin
|
|
t:=nil;
|
|
m:=nil;
|
|
end;
|
|
|
|
if Assigned(t) and (TModDivNode(t).right.nodetype=ordconstn) and
|
|
{$ifndef cpu64bitalu}
|
|
{ Converting Int64 and QWord division doesn't work under i386 }
|
|
{$ifndef cpu32bitalu}
|
|
(TModDivNode(t).resultdef.size < 4) and
|
|
{$else cpu32bitalu}
|
|
(TModDivNode(t).resultdef.size < 8) and
|
|
{$endif cpu32bitalu}
|
|
{$endif cpu64bitalu}
|
|
(TOrdConstNode(TModDivNode(t).right).value>=3) then
|
|
begin
|
|
divisor:=TOrdConstNode(TModDivNode(t).right).value.uvalue;
|
|
|
|
{ Exclude powers of 2, as there are more efficient ways to handle those }
|
|
if PopCnt(divisor)>1 then
|
|
begin
|
|
if is_signed(TModDivNode(t).left.resultdef) then
|
|
begin
|
|
{ See pages 250-251 of Hacker's Delight, Second Edition
|
|
for an explanation and proof of the algorithm, but
|
|
essentially, we're doing the following:
|
|
|
|
- Convert the divisor d to the form k.2^b if it isn't
|
|
already odd (in which case, k = d and b = 0)
|
|
- Calculate r, the multiplicative inverse of k modulo 2^N
|
|
- Calculate c = floor(2^(N-1) / k) & -(2^b)
|
|
- Let q = ((n * r) + c) ror b (mod 2^N)
|
|
- Repurpose c to equal floor(2c / 2^b) = c shr (b - 1)
|
|
(some RISC platforms will benefit from doing this over
|
|
precalculating the modified constant. For x86,
|
|
it's better with the constant precalculated for
|
|
32-bit and under, but for 64-bit, use SHR. )
|
|
- If q is below or equal to c, then (n mod d) = 0
|
|
}
|
|
while True do
|
|
begin
|
|
ThisNode:=TModDivNode(t).left;
|
|
case ThisNode.nodetype of
|
|
typeconvn:
|
|
begin
|
|
ThisDef:=TTypeConvNode(ThisNode).left.resultdef;
|
|
{ See if we can simplify things to a smaller ordinal to
|
|
reduce code size and increase speed }
|
|
if is_signed(ThisDef) and
|
|
is_integer(ThisDef) and
|
|
{ Byte-sized multiplications can cause problems }
|
|
(ThisDef.size>=2) and
|
|
{ Make sure the divisor is in range }
|
|
(divisor>=TOrdDef(ThisDef).low) and
|
|
(divisor<=TOrdDef(ThisDef).high) then
|
|
begin
|
|
TOrdConstNode(TModDivNode(t).right).resultdef:=ThisDef;
|
|
TOrdConstNode(m).resultdef:=ThisDef;
|
|
TModDivNode(t).resultdef:=ThisDef;
|
|
|
|
{ Destroy the typeconv node }
|
|
TModDivNode(t).left:=TTypeConvNode(ThisNode).left;
|
|
TTypeConvNode(ThisNode).left:=nil;
|
|
ThisNode.Free;
|
|
Continue;
|
|
end;
|
|
end;
|
|
ordconstn:
|
|
begin
|
|
{ Just simplify into a constant }
|
|
Result:=inherited simplify(forinline);
|
|
Exit;
|
|
end;
|
|
else
|
|
;
|
|
end;
|
|
|
|
DoOptimisation:=True;
|
|
Break;
|
|
end;
|
|
|
|
if DoOptimisation then
|
|
begin
|
|
ThisDef:=TModDivNode(t).left.resultdef;
|
|
|
|
if nodetype = equaln then
|
|
ThisType:=lten
|
|
else
|
|
ThisType:=gtn;
|
|
|
|
N:=ThisDef.size*8;
|
|
calc_mul_inverse(N, TOrdConstNode(TModDivNode(t).right).value.uvalue, reciprocal, shift);
|
|
|
|
{ Construct the following node tree for odd divisors:
|
|
<lten> (for equaln) or <gtn> (for notequaln)
|
|
<addn>
|
|
<muln>
|
|
<typeconv signed-to-unsigned>
|
|
<numerator node (TModDivNode(t).left)>
|
|
<reciprocal constant>
|
|
<comparison constant (effectively a signed shift)>
|
|
<comparison constant * 2>
|
|
|
|
For even divisors, convert them to the form k.2^b, with
|
|
odd k, then construct the following:
|
|
<lten> (for equaln) or <gtn> (for notequaln)
|
|
<ror>
|
|
(b)
|
|
<addn>
|
|
<muln>
|
|
<typeconv signed-to-unsigned>
|
|
<numerator node (TModDivNode(t).left)>
|
|
<reciprocal constant>
|
|
<comparison constant (effectively a signed shift)>
|
|
<comparison constant shr (b - 1)>
|
|
}
|
|
|
|
ThisNode:=ctypeconvnode.create_internal(TModDivNode(t).left, ThisDef);
|
|
TTypeConvNode(ThisNode).convtype:=tc_int_2_int;
|
|
ThisDef:=get_unsigned_inttype(ThisDef);
|
|
ThisNode.resultdef:=ThisDef;
|
|
|
|
TModDivNode(t).left:=nil;
|
|
|
|
ConstNode:=cordconstnode.create(reciprocal, ThisDef, False);
|
|
ConstNode.resultdef:=ThisDef;
|
|
|
|
ThisNode:=caddnode.create_internal(muln, ThisNode, ConstNode);
|
|
ThisNode.resultdef:=ThisDef;
|
|
|
|
{$push}
|
|
{$warnings off}
|
|
if shift>0 then
|
|
comparison:=((aWord(1) shl ((N-1) and (SizeOf(aWord)*8-1))) div (divisor shr shift)) and -(1 shl shift)
|
|
else
|
|
comparison:=(aWord(1) shl ((N-1) and (SizeOf(aWord)*8-1))) div divisor;
|
|
{$pop}
|
|
ConstNode:=cordconstnode.create(comparison, ThisDef, False);
|
|
ConstNode.resultdef:=ThisDef;
|
|
|
|
ThisNode:=caddnode.create_internal(addn, ThisNode, ConstNode);
|
|
ThisNode.resultdef:=ThisDef;
|
|
|
|
if shift>0 then
|
|
begin
|
|
ConstNode:=cordconstnode.create(shift, u8inttype, False);
|
|
ConstNode.resultdef:=u8inttype;
|
|
ThisNode:=cinlinenode.createintern(in_ror_x_y,false,
|
|
ccallparanode.create(ConstNode,
|
|
ccallparanode.create(ThisNode, nil)));
|
|
|
|
ThisNode.resultdef:=ThisDef;
|
|
|
|
ConstNode:=cordconstnode.create(comparison shr (shift - 1), ThisDef, False);
|
|
end
|
|
else
|
|
ConstNode:=cordconstnode.create(comparison*2, ThisDef, False);
|
|
|
|
ConstNode.resultdef:=ThisDef;
|
|
|
|
Result:=CAddNode.create_internal(ThisType, ThisNode, ConstNode);
|
|
Result.resultdef:=resultdef;
|
|
Exit;
|
|
end;
|
|
end
|
|
else
|
|
begin
|
|
{ For bit length N, convert "(x mod d) = 0" or "(x mod d) <> 0", where
|
|
d is an odd-numbered integer constant, to "(x * r) <= m", where
|
|
dr = 1 (mod 2^N) and m = floor(2^N / d).
|
|
|
|
If d is even, convert to the form k.2^b, where k is odd, then
|
|
convert to "(x * r) ror b <= m", where kr = 1 (mod 2^N) and
|
|
m = floor(2^N / d) = floor(2^(N-b) / k) }
|
|
while True do
|
|
begin
|
|
ThisNode:=TModDivNode(t).left;
|
|
case ThisNode.nodetype of
|
|
typeconvn:
|
|
begin
|
|
ThisDef:=TTypeConvNode(ThisNode).left.resultdef;
|
|
{ See if we can simplify things to a smaller ordinal to
|
|
reduce code size and increase speed }
|
|
if not is_signed(ThisDef) and
|
|
is_integer(ThisDef) and
|
|
{ Byte-sized multiplications can cause problems }
|
|
(ThisDef.size>=2) and
|
|
{ Make sure the divisor is in range }
|
|
(divisor>=TOrdDef(ThisDef).low) and
|
|
(divisor<=TOrdDef(ThisDef).high) then
|
|
begin
|
|
TOrdConstNode(TModDivNode(t).right).resultdef:=ThisDef;
|
|
TOrdConstNode(m).resultdef:=ThisDef;
|
|
TModDivNode(t).resultdef:=ThisDef;
|
|
|
|
{ Destroy the typeconv node }
|
|
TModDivNode(t).left:=TTypeConvNode(ThisNode).left;
|
|
TTypeConvNode(ThisNode).left:=nil;
|
|
ThisNode.Free;
|
|
Continue;
|
|
end;
|
|
end;
|
|
ordconstn:
|
|
begin
|
|
{ Just simplify into a constant }
|
|
Result:=inherited simplify(forinline);
|
|
Exit;
|
|
end;
|
|
else
|
|
;
|
|
end;
|
|
|
|
DoOptimisation:=True;
|
|
Break;
|
|
end;
|
|
|
|
if DoOptimisation then
|
|
begin
|
|
ThisDef:=TModDivNode(t).left.resultdef;
|
|
|
|
{ Construct the following node tree for odd divisors:
|
|
<lten> (for equaln) or <gtn> (for notequaln)
|
|
<muln>
|
|
<numerator node (TModDivNode(t).left)>
|
|
<reciprocal constant>
|
|
(2^N / divisor)
|
|
|
|
For even divisors, convert them to the form k.2^b, with
|
|
odd k, then construct the following:
|
|
<lten> (for equaln) or <gtn> (for notequaln)
|
|
<ror>
|
|
(b)
|
|
<muln>
|
|
<numerator node (TModDivNode(t).left)>
|
|
<reciprocal constant>
|
|
(2^N / divisor)
|
|
}
|
|
|
|
if nodetype=equaln then
|
|
ThisType:=lten
|
|
else
|
|
ThisType:=gtn;
|
|
|
|
N:=ThisDef.size*8;
|
|
calc_mul_inverse(N, TOrdConstNode(TModDivNode(t).right).value.uvalue, reciprocal, shift);
|
|
|
|
ConstNode:=cordconstnode.create(reciprocal, ThisDef, False);
|
|
ConstNode.resultdef:=ThisDef;
|
|
|
|
ThisNode:=caddnode.create_internal(muln, TModDivNode(t).left, ConstNode);
|
|
ThisNode.resultdef:=ThisDef;
|
|
|
|
TModDivNode(t).left:=nil;
|
|
|
|
if shift>0 then
|
|
begin
|
|
ConstNode:=cordconstnode.create(shift, u8inttype, False);
|
|
ConstNode.resultdef:=u8inttype;
|
|
ThisNode:=cinlinenode.createintern(in_ror_x_y,false,
|
|
ccallparanode.create(ConstNode,
|
|
ccallparanode.create(ThisNode, nil)));
|
|
|
|
ThisNode.resultdef:=ThisDef;
|
|
|
|
comparison:=(aWord(1) shl ((N-shift) and (SizeOf(aWord)*8-1))) div (divisor shr shift);
|
|
end
|
|
else
|
|
begin
|
|
{$push}
|
|
{$warnings off}
|
|
{ Because 2^N and divisor are relatively prime,
|
|
floor(2^N / divisor) = floor((2^N - 1) / divisor) }
|
|
comparison:=(aWord(not 0) shr (((SizeOf(aWord)*8)-N) and (SizeOf(aWord)*8-1))) div divisor;
|
|
{$pop}
|
|
end;
|
|
|
|
ConstNode:=cordconstnode.create(comparison, ThisDef, False);
|
|
ConstNode.resultdef:=ThisDef;
|
|
|
|
Result:=CAddNode.create_internal(ThisType, ThisNode, ConstNode);
|
|
Result.resultdef:=resultdef;
|
|
Exit;
|
|
end;
|
|
end;
|
|
end;
|
|
end;
|
|
end;
|
|
{$ifend defined(cpu64bitalu) or defined(cpu32bitalu) or defined(cpu16bitalu)}
|
|
Result:=inherited simplify(forinline);
|
|
end;
|
|
|
|
|
|
function tx86addnode.use_fma : boolean;
|
|
begin
|
|
{$ifndef i8086}
|
|
{ test if the result stays in an xmm register, fiddeling with fpu registers and fma makes no sense }
|
|
Result:=use_vectorfpu(resultdef) and
|
|
((fpu_capabilities[current_settings.fputype]*[FPUX86_HAS_FMA,FPUX86_HAS_FMA4])<>[]);
|
|
{$else i8086}
|
|
Result:=inherited use_fma;
|
|
{$endif i8086}
|
|
end;
|
|
|
|
|
|
procedure tx86addnode.second_cmpfloatvector;
|
|
var
|
|
op : tasmop;
|
|
const
|
|
ops_single: array[boolean] of tasmop = (A_COMISS,A_VCOMISS);
|
|
ops_double: array[boolean] of tasmop = (A_COMISD,A_VCOMISD);
|
|
begin
|
|
if is_single(left.resultdef) then
|
|
op:=ops_single[UseAVX]
|
|
else if is_double(left.resultdef) then
|
|
op:=ops_double[UseAVX]
|
|
else
|
|
internalerror(200402222);
|
|
pass_left_right;
|
|
|
|
{ fpu operands are always in reversed order on the stack }
|
|
if (left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) and (right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
|
|
toggleflag(nf_swapped);
|
|
|
|
location_reset(location,LOC_FLAGS,OS_NO);
|
|
|
|
{ Direct move fpu->mm register is not possible, so force any fpu operands to
|
|
memory (not to mm registers because one of the memory locations can be used
|
|
directly in compare instruction, yielding shorter code) }
|
|
if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
|
|
hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
|
|
if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
|
|
hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
|
|
|
|
if (right.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
|
|
begin
|
|
case left.location.loc of
|
|
LOC_REFERENCE,LOC_CREFERENCE:
|
|
begin
|
|
tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
|
|
end;
|
|
LOC_MMREGISTER,LOC_CMMREGISTER:
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
|
|
else
|
|
internalerror(200402221);
|
|
end;
|
|
toggleflag(nf_swapped);
|
|
end
|
|
else
|
|
begin
|
|
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
|
|
case right.location.loc of
|
|
LOC_REFERENCE,LOC_CREFERENCE:
|
|
begin
|
|
tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
|
|
end;
|
|
LOC_MMREGISTER,LOC_CMMREGISTER:
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
|
|
else
|
|
internalerror(200402223);
|
|
end;
|
|
end;
|
|
location.resflags:=getfpuresflags;
|
|
location_freetemp(current_asmdata.CurrAsmList,left.location);
|
|
location_freetemp(current_asmdata.CurrAsmList,right.location);
|
|
end;
|
|
|
|
|
|
procedure tx86addnode.second_opvector;
|
|
var
|
|
op : topcg;
|
|
begin
|
|
pass_left_right;
|
|
if (nf_swapped in flags) then
|
|
swapleftright;
|
|
|
|
case nodetype of
|
|
addn :
|
|
op:=OP_ADD;
|
|
muln :
|
|
op:=OP_MUL;
|
|
subn :
|
|
op:=OP_SUB;
|
|
slashn :
|
|
op:=OP_DIV;
|
|
else
|
|
internalerror(200610071);
|
|
end;
|
|
|
|
if fits_in_mm_register(left.resultdef) then
|
|
begin
|
|
location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
|
|
{ we can use only right as left operand if the operation is commutative }
|
|
if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
|
|
begin
|
|
if UseAVX then
|
|
begin
|
|
location.register:=cg.getmmregister(current_asmdata.CurrAsmList,OS_VECTOR);
|
|
cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,right.location.register,location.register,nil);
|
|
end
|
|
else
|
|
begin
|
|
location.register:=right.location.register;
|
|
cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
|
|
end;
|
|
end
|
|
else
|
|
begin
|
|
location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
|
|
if UseAVX then
|
|
begin
|
|
location.register:=cg.getmmregister(current_asmdata.CurrAsmList,OS_VECTOR);
|
|
cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,
|
|
tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,left.location.register,location.register,nil);
|
|
end
|
|
else
|
|
begin
|
|
location.register:=left.location.register;
|
|
cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
|
|
tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
|
|
end;
|
|
end;
|
|
end
|
|
else
|
|
begin
|
|
{ not yet supported }
|
|
internalerror(200610072);
|
|
end
|
|
end;
|
|
|
|
|
|
procedure tx86addnode.second_addfloat;
|
|
const
|
|
ops_add: array[boolean] of TAsmOp = (A_FADDP,A_FADD);
|
|
ops_mul: array[boolean] of TAsmOp = (A_FMULP,A_FMUL);
|
|
ops_sub: array[boolean] of TAsmOp = (A_FSUBP,A_FSUB);
|
|
ops_rsub: array[boolean] of TAsmOp = (A_FSUBRP,A_FSUBR);
|
|
ops_div: array[boolean] of TAsmOp = (A_FDIVP,A_FDIV);
|
|
ops_rdiv: array[boolean] of TAsmOp = (A_FDIVRP,A_FDIVR);
|
|
var
|
|
op : TAsmOp;
|
|
refnode, hp: tnode;
|
|
hasref : boolean;
|
|
begin
|
|
if use_vectorfpu(resultdef) then
|
|
begin
|
|
if UseAVX then
|
|
second_addfloatavx
|
|
else
|
|
second_addfloatsse;
|
|
exit;
|
|
end;
|
|
|
|
{ can the operation do the conversion? }
|
|
if (left.nodetype=typeconvn) and (is_double(ttypeconvnode(left).left.resultdef) or is_single(ttypeconvnode(left).left.resultdef)) then
|
|
begin
|
|
hp:=left;
|
|
left:=ttypeconvnode(left).left;
|
|
ttypeconvnode(hp).left:=nil;
|
|
hp.Free;
|
|
end;
|
|
if (right.nodetype=typeconvn) and (is_double(ttypeconvnode(right).left.resultdef) or is_single(ttypeconvnode(right).left.resultdef)) then
|
|
begin
|
|
hp:=right;
|
|
right:=ttypeconvnode(right).left;
|
|
ttypeconvnode(hp).left:=nil;
|
|
hp.Free;
|
|
end;
|
|
|
|
pass_left_right;
|
|
prepare_x87_locations(refnode);
|
|
hasref:=assigned(refnode);
|
|
|
|
case nodetype of
|
|
addn :
|
|
op:=ops_add[hasref];
|
|
muln :
|
|
op:=ops_mul[hasref];
|
|
subn :
|
|
if (nf_swapped in flags) then
|
|
op:=ops_rsub[hasref]
|
|
else
|
|
op:=ops_sub[hasref];
|
|
slashn :
|
|
if (nf_swapped in flags) then
|
|
op:=ops_rdiv[hasref]
|
|
else
|
|
op:=ops_div[hasref];
|
|
else
|
|
internalerror(2003042203);
|
|
end;
|
|
|
|
if hasref then
|
|
emit_ref(op,tcgsize2opsize[refnode.location.size],refnode.location.reference)
|
|
else
|
|
begin
|
|
emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
|
|
tcgx86(cg).dec_fpu_stack;
|
|
end;
|
|
|
|
location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
|
|
location.register:=NR_ST;
|
|
end;
|
|
|
|
|
|
procedure tx86addnode.second_cmpfloat;
|
|
{$ifdef i8086}
|
|
var
|
|
tmpref: treference;
|
|
{$endif i8086}
|
|
begin
|
|
if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
|
|
begin
|
|
second_cmpfloatvector;
|
|
exit;
|
|
end;
|
|
|
|
pass_left_right;
|
|
force_left_and_right_fpureg;
|
|
|
|
{$ifndef x86_64}
|
|
if current_settings.cputype<cpu_Pentium2 then
|
|
begin
|
|
emit_none(A_FCOMPP,S_NO);
|
|
tcgx86(cg).dec_fpu_stack;
|
|
tcgx86(cg).dec_fpu_stack;
|
|
|
|
{ load fpu flags }
|
|
{$ifdef i8086}
|
|
if current_settings.cputype < cpu_286 then
|
|
begin
|
|
tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);
|
|
emit_ref(A_FSTSW,S_NO,tmpref);
|
|
cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
|
|
inc(tmpref.offset);
|
|
emit_ref_reg(A_MOV,S_B,tmpref,NR_AH);
|
|
dec(tmpref.offset);
|
|
emit_none(A_SAHF,S_NO);
|
|
cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
|
|
tg.ungettemp(current_asmdata.CurrAsmList,tmpref);
|
|
end
|
|
else
|
|
{$endif i8086}
|
|
begin
|
|
cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
|
|
emit_reg(A_FNSTSW,S_NO,NR_AX);
|
|
emit_none(A_SAHF,S_NO);
|
|
cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
|
|
end;
|
|
if cs_fpu_fwait in current_settings.localswitches then
|
|
current_asmdata.CurrAsmList.concat(Taicpu.Op_none(A_FWAIT,S_NO));
|
|
end
|
|
else
|
|
{$endif x86_64}
|
|
begin
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
|
|
{ fcomip pops only one fpu register }
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
|
|
tcgx86(cg).dec_fpu_stack;
|
|
tcgx86(cg).dec_fpu_stack;
|
|
end;
|
|
|
|
location_reset(location,LOC_FLAGS,OS_NO);
|
|
location.resflags:=getfpuresflags;
|
|
end;
|
|
|
|
|
|
{*****************************************************************************
|
|
Add64bit
|
|
*****************************************************************************}
|
|
|
|
procedure tx86addnode.second_add64bit;
|
|
begin
|
|
{$ifdef cpu64bitalu}
|
|
second_addordinal;
|
|
{$else cpu64bitalu}
|
|
{ must be implemented separate }
|
|
internalerror(200402042);
|
|
{$endif cpu64bitalu}
|
|
end;
|
|
|
|
|
|
procedure tx86addnode.second_cmp64bit;
|
|
begin
|
|
{$ifdef cpu64bitalu}
|
|
second_cmpordinal;
|
|
{$else cpu64bitalu}
|
|
{ must be implemented separate }
|
|
internalerror(200402043);
|
|
{$endif cpu64bitalu}
|
|
end;
|
|
|
|
|
|
{*****************************************************************************
|
|
AddOrdinal
|
|
*****************************************************************************}
|
|
|
|
procedure tx86addnode.second_addordinal;
|
|
var
|
|
opsize : tcgsize;
|
|
unsigned : boolean;
|
|
cgop : topcg;
|
|
checkoverflow : Boolean;
|
|
ovloc : tlocation;
|
|
tmpreg : TRegister;
|
|
begin
|
|
{ determine if the comparison will be unsigned }
|
|
unsigned:=not(is_signed(left.resultdef)) or
|
|
not(is_signed(right.resultdef));
|
|
|
|
{ assume no overflow checking is require }
|
|
checkoverflow := false;
|
|
|
|
ovloc.loc:=LOC_VOID;
|
|
|
|
case nodetype of
|
|
addn:
|
|
begin
|
|
cgop:=OP_ADD;
|
|
checkoverflow:=true;
|
|
end;
|
|
xorn :
|
|
begin
|
|
cgop:=OP_XOR;
|
|
end;
|
|
orn :
|
|
begin
|
|
cgop:=OP_OR;
|
|
end;
|
|
andn:
|
|
begin
|
|
cgop:=OP_AND;
|
|
end;
|
|
muln:
|
|
begin
|
|
checkoverflow:=true;
|
|
if unsigned then
|
|
cgop:=OP_MUL
|
|
else
|
|
cgop:=OP_IMUL;
|
|
end;
|
|
subn :
|
|
begin
|
|
checkoverflow:=true;
|
|
cgop:=OP_SUB;
|
|
end;
|
|
else
|
|
internalerror(2015022501);
|
|
end;
|
|
|
|
checkoverflow:=
|
|
checkoverflow and
|
|
needoverflowcheck;
|
|
|
|
opsize:=def_cgsize(left.resultdef);
|
|
|
|
pass_left_right;
|
|
|
|
{ do we have to allocate a register? If yes, then three opcode instructions are better, however for sub three op code instructions
|
|
make no sense if right is a reference }
|
|
if ((left.location.loc<>LOC_REGISTER) and (right.location.loc<>LOC_REGISTER) and
|
|
((nodetype<>subn) or not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE])) and
|
|
{ 3 op mul makes only sense if a constant is involed }
|
|
((nodetype<>muln) or (left.location.loc=LOC_CONSTANT) or (right.location.loc=LOC_CONSTANT)
|
|
{$ifndef i8086}
|
|
or ((CPUX86_HAS_BMI2 in cpu_capabilities[current_settings.cputype]) and (not(needoverflowcheck))
|
|
)
|
|
{$endif i8086}
|
|
) and
|
|
(not(nodetype in [orn,andn,xorn]))) or
|
|
((nodetype=addn) and (left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_CONSTANT]) and (right.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_CONSTANT])) then
|
|
begin
|
|
{ allocate registers }
|
|
force_reg_left_right(false,true);
|
|
set_result_location_reg;
|
|
if nodetype<>subn then
|
|
begin
|
|
if checkoverflow then
|
|
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
|
|
|
|
if (right.location.loc<>LOC_CONSTANT) then
|
|
hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,cgop,resultdef,
|
|
left.location.register,right.location.register,
|
|
location.register,checkoverflow,ovloc)
|
|
else
|
|
hlcg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,cgop,resultdef,
|
|
right.location.value,left.location.register,
|
|
location.register,checkoverflow,ovloc);
|
|
end
|
|
else { subtract is a special case since its not commutative }
|
|
begin
|
|
if (nf_swapped in flags) then
|
|
swapleftright;
|
|
if left.location.loc<>LOC_CONSTANT then
|
|
begin
|
|
if checkoverflow then
|
|
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
|
|
|
|
if right.location.loc<>LOC_CONSTANT then
|
|
hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
|
|
right.location.register,left.location.register,
|
|
location.register,checkoverflow,ovloc)
|
|
else
|
|
hlcg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
|
|
right.location.value,left.location.register,
|
|
location.register,checkoverflow,ovloc);
|
|
end
|
|
else
|
|
begin
|
|
tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
|
|
hlcg.a_load_const_reg(current_asmdata.CurrAsmList,resultdef,
|
|
left.location.value,tmpreg);
|
|
|
|
if checkoverflow then
|
|
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
|
|
|
|
hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
|
|
right.location.register,tmpreg,location.register,checkoverflow,ovloc);
|
|
end;
|
|
end
|
|
end
|
|
else
|
|
begin
|
|
{ at least one location should be a register, if yes, try to re-use it, so we can try two operand opcodes }
|
|
if left.location.loc<>LOC_REGISTER then
|
|
begin
|
|
if right.location.loc<>LOC_REGISTER then
|
|
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false)
|
|
else
|
|
begin
|
|
location_swap(left.location,right.location);
|
|
toggleflag(nf_swapped);
|
|
end;
|
|
end;
|
|
|
|
{ at this point, left.location.loc should be LOC_REGISTER }
|
|
if right.location.loc=LOC_REGISTER then
|
|
begin
|
|
if checkoverflow then
|
|
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
|
|
|
|
{ when swapped another result register }
|
|
if (nodetype=subn) and (nf_swapped in flags) then
|
|
begin
|
|
cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,
|
|
left.location.register,right.location.register);
|
|
location_swap(left.location,right.location);
|
|
toggleflag(nf_swapped);
|
|
end
|
|
else
|
|
cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,
|
|
right.location.register,left.location.register);
|
|
end
|
|
else
|
|
begin
|
|
{ right.location<>LOC_REGISTER }
|
|
if right.location.loc in [LOC_CSUBSETREF,LOC_CSUBSETREG,LOC_SUBSETREF,LOC_SUBSETREG] then
|
|
hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,left.resultdef,true);
|
|
|
|
if (nodetype=subn) and (nf_swapped in flags) then
|
|
begin
|
|
tmpreg:=left.location.register;
|
|
left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
|
|
cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,left.location.register);
|
|
|
|
if checkoverflow then
|
|
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
|
|
|
|
cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,tmpreg,left.location.register);
|
|
end
|
|
else
|
|
begin
|
|
if checkoverflow then
|
|
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
|
|
|
|
cg.a_op_loc_reg(current_asmdata.CurrAsmList,cgop,opsize,right.location,left.location.register);
|
|
end;
|
|
location_freetemp(current_asmdata.CurrAsmList,right.location);
|
|
end;
|
|
|
|
location_copy(location,left.location);
|
|
end;
|
|
|
|
{ emit overflow check if required }
|
|
if checkoverflow then
|
|
cg.g_overflowcheck_loc(current_asmdata.CurrAsmList,Location,resultdef,ovloc);
|
|
end;
|
|
|
|
|
|
procedure tx86addnode.second_addboolean;
|
|
begin
|
|
if (nodetype in [orn,andn]) and
|
|
(not(cs_full_boolean_eval in current_settings.localswitches) or
|
|
(nf_short_bool in flags)) then
|
|
inherited second_addboolean
|
|
else if is_64bit(left.resultdef) then
|
|
inherited
|
|
else
|
|
second_addordinal;
|
|
end;
|
|
|
|
|
|
procedure tx86addnode.second_cmpordinal;
|
|
var
|
|
opdef : tdef;
|
|
opsize : tcgsize;
|
|
unsigned : boolean;
|
|
begin
|
|
unsigned:=not(is_signed(left.resultdef)) or
|
|
not(is_signed(right.resultdef));
|
|
opdef:=left.resultdef;
|
|
opsize:=def_cgsize(opdef);
|
|
|
|
pass_left_right;
|
|
|
|
if (right.location.loc=LOC_CONSTANT) and
|
|
(left.location.loc in [LOC_REFERENCE, LOC_CREFERENCE])
|
|
{$ifdef x86_64}
|
|
and ((not (opsize in [OS_64,OS_S64])) or (
|
|
(right.location.value>=low(longint)) and (right.location.value<=high(longint))
|
|
))
|
|
{$endif x86_64}
|
|
then
|
|
begin
|
|
emit_const_ref(A_CMP, TCGSize2Opsize[opsize], right.location.value, left.location.reference);
|
|
location_freetemp(current_asmdata.CurrAsmList,left.location);
|
|
end
|
|
else
|
|
begin
|
|
left_must_be_reg(opdef,opsize,false);
|
|
emit_generic_code(A_CMP,opsize,unsigned,false,false);
|
|
location_freetemp(current_asmdata.CurrAsmList,right.location);
|
|
location_freetemp(current_asmdata.CurrAsmList,left.location);
|
|
end;
|
|
location_reset(location,LOC_FLAGS,OS_NO);
|
|
location.resflags:=getresflags(unsigned);
|
|
end;
|
|
|
|
begin
|
|
caddnode:=tx86addnode;
|
|
end.
|