mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-05 05:58:02 +02:00
673 lines
26 KiB
ObjectPascal
673 lines
26 KiB
ObjectPascal
{
|
|
Copyright (c) 2000-2002 by Florian Klaempfl
|
|
|
|
Code generation for add nodes on the i386
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
|
|
****************************************************************************
|
|
}
|
|
unit n386add;
|
|
|
|
{$i fpcdefs.inc}
|
|
|
|
interface
|
|
|
|
uses
|
|
node,nadd,cpubase,nx86add;
|
|
|
|
type
|
|
ti386addnode = class(tx86addnode)
|
|
function use_generic_mul32to64: boolean; override;
|
|
function use_generic_mul64bit: boolean; override;
|
|
procedure second_addordinal; override;
|
|
procedure second_add64bit;override;
|
|
procedure second_cmp64bit;override;
|
|
procedure second_mul(unsigned: boolean);
|
|
procedure second_mul64bit;
|
|
protected
|
|
procedure set_mul_result_location;
|
|
end;
|
|
|
|
implementation
|
|
|
|
uses
|
|
globtype,systems,
|
|
cutils,verbose,globals,
|
|
symconst,symdef,paramgr,defutil,
|
|
aasmbase,aasmtai,aasmdata,aasmcpu,
|
|
cgbase,procinfo,
|
|
ncon,nset,cgutils,tgobj,
|
|
cpuinfo,
|
|
cga,ncgutil,cgobj,cg64f32,cgx86,
|
|
hlcgobj;
|
|
|
|
{*****************************************************************************
|
|
use_generic_mul32to64
|
|
*****************************************************************************}
|
|
|
|
function ti386addnode.use_generic_mul32to64: boolean;
|
|
begin
|
|
result := False;
|
|
end;
|
|
|
|
function ti386addnode.use_generic_mul64bit: boolean;
|
|
begin
|
|
result:=needoverflowcheck or
|
|
(cs_opt_size in current_settings.optimizerswitches);
|
|
end;
|
|
|
|
{ handles all unsigned multiplications, and 32->64 bit signed ones.
|
|
32bit-only signed mul is handled by generic codegen }
|
|
procedure ti386addnode.second_addordinal;
|
|
var
|
|
unsigned: boolean;
|
|
begin
|
|
unsigned:=not(is_signed(left.resultdef)) or
|
|
not(is_signed(right.resultdef));
|
|
{ use IMUL instead of MUL in case overflow checking is off and we're
|
|
doing a 32->32-bit multiplication }
|
|
if not needoverflowcheck and
|
|
not is_64bit(resultdef) then
|
|
unsigned:=false;
|
|
if (nodetype=muln) and (unsigned or is_64bit(resultdef)) then
|
|
second_mul(unsigned)
|
|
else
|
|
inherited second_addordinal;
|
|
end;
|
|
|
|
{*****************************************************************************
|
|
Add64bit
|
|
*****************************************************************************}
|
|
|
|
procedure ti386addnode.second_add64bit;
|
|
var
|
|
op : TOpCG;
|
|
op1,op2 : TAsmOp;
|
|
opsize : TOpSize;
|
|
hregister,
|
|
hregister2 : tregister;
|
|
hl4 : tasmlabel;
|
|
mboverflow,
|
|
unsigned:boolean;
|
|
r:Tregister;
|
|
begin
|
|
pass_left_right;
|
|
|
|
op1:=A_NONE;
|
|
op2:=A_NONE;
|
|
mboverflow:=false;
|
|
opsize:=S_L;
|
|
unsigned:=((left.resultdef.typ=orddef) and
|
|
(torddef(left.resultdef).ordtype=u64bit)) or
|
|
((right.resultdef.typ=orddef) and
|
|
(torddef(right.resultdef).ordtype=u64bit));
|
|
case nodetype of
|
|
addn :
|
|
begin
|
|
op:=OP_ADD;
|
|
mboverflow:=true;
|
|
end;
|
|
subn :
|
|
begin
|
|
op:=OP_SUB;
|
|
op1:=A_SUB;
|
|
op2:=A_SBB;
|
|
mboverflow:=true;
|
|
end;
|
|
xorn:
|
|
op:=OP_XOR;
|
|
orn:
|
|
op:=OP_OR;
|
|
andn:
|
|
op:=OP_AND;
|
|
muln:
|
|
begin
|
|
second_mul64bit;
|
|
exit;
|
|
end
|
|
else
|
|
begin
|
|
{ everything should be handled in pass_1 (JM) }
|
|
internalerror(2001090505);
|
|
end;
|
|
end;
|
|
|
|
{ left and right no register? }
|
|
{ then one must be demanded }
|
|
if (left.location.loc<>LOC_REGISTER) then
|
|
begin
|
|
if (right.location.loc<>LOC_REGISTER) then
|
|
begin
|
|
hregister:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
|
|
hregister2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
|
|
cg64.a_load64_loc_reg(current_asmdata.CurrAsmList,left.location,joinreg64(hregister,hregister2));
|
|
location_reset(left.location,LOC_REGISTER,left.location.size);
|
|
left.location.register64.reglo:=hregister;
|
|
left.location.register64.reghi:=hregister2;
|
|
end
|
|
else
|
|
begin
|
|
location_swap(left.location,right.location);
|
|
toggleflag(nf_swapped);
|
|
end;
|
|
end;
|
|
|
|
{ at this point, left.location.loc should be LOC_REGISTER }
|
|
if right.location.loc=LOC_REGISTER then
|
|
begin
|
|
if mboverflow and needoverflowcheck then
|
|
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
|
|
|
|
{ when swapped another result register }
|
|
if (nodetype=subn) and (nf_swapped in flags) then
|
|
begin
|
|
cg64.a_op64_reg_reg(current_asmdata.CurrAsmList,op,location.size,
|
|
left.location.register64,
|
|
right.location.register64);
|
|
location_swap(left.location,right.location);
|
|
toggleflag(nf_swapped);
|
|
end
|
|
else
|
|
begin
|
|
cg64.a_op64_reg_reg(current_asmdata.CurrAsmList,op,location.size,
|
|
right.location.register64,
|
|
left.location.register64);
|
|
end;
|
|
end
|
|
else
|
|
begin
|
|
{ right.location<>LOC_REGISTER }
|
|
if (nodetype=subn) and (nf_swapped in flags) then
|
|
begin
|
|
r:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
|
|
cg64.a_load64low_loc_reg(current_asmdata.CurrAsmList,right.location,r);
|
|
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
|
|
emit_reg_reg(op1,opsize,left.location.register64.reglo,r);
|
|
emit_reg_reg(A_MOV,opsize,r,left.location.register64.reglo);
|
|
cg64.a_load64high_loc_reg(current_asmdata.CurrAsmList,right.location,r);
|
|
{ the carry flag is still ok }
|
|
emit_reg_reg(op2,opsize,left.location.register64.reghi,r);
|
|
|
|
{ We need to keep the FLAGS register allocated for overflow checks }
|
|
if not mboverflow or not needoverflowcheck then
|
|
cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
|
|
|
|
emit_reg_reg(A_MOV,opsize,r,left.location.register64.reghi);
|
|
end
|
|
else
|
|
begin
|
|
if mboverflow and needoverflowcheck then
|
|
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
|
|
|
|
cg64.a_op64_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,
|
|
left.location.register64);
|
|
end;
|
|
location_freetemp(current_asmdata.CurrAsmList,right.location);
|
|
end;
|
|
|
|
{ only in case of overflow operations }
|
|
{ produce overflow code }
|
|
{ we must put it here directly, because sign of operation }
|
|
{ is in unsigned VAR!! }
|
|
if mboverflow then
|
|
begin
|
|
if needoverflowcheck then
|
|
begin
|
|
current_asmdata.getjumplabel(hl4);
|
|
if unsigned then
|
|
cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
|
|
else
|
|
cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
|
|
|
|
cg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
|
|
cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
|
|
cg.a_label(current_asmdata.CurrAsmList,hl4);
|
|
end;
|
|
end;
|
|
|
|
location_copy(location,left.location);
|
|
end;
|
|
|
|
|
|
procedure ti386addnode.second_cmp64bit;
|
|
var
|
|
truelabel,
|
|
falselabel,
|
|
hlab : tasmlabel;
|
|
href : treference;
|
|
unsigned : boolean;
|
|
|
|
procedure firstjmp64bitcmp;
|
|
|
|
var
|
|
oldnodetype : tnodetype;
|
|
|
|
begin
|
|
{ the jump the sequence is a little bit hairy }
|
|
case nodetype of
|
|
ltn,gtn:
|
|
begin
|
|
if (hlab<>location.truelabel) then
|
|
cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),location.truelabel);
|
|
{ cheat a little bit for the negative test }
|
|
toggleflag(nf_swapped);
|
|
if (hlab<>location.falselabel) then
|
|
cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),location.falselabel);
|
|
toggleflag(nf_swapped);
|
|
end;
|
|
lten,gten:
|
|
begin
|
|
oldnodetype:=nodetype;
|
|
if nodetype=lten then
|
|
nodetype:=ltn
|
|
else
|
|
nodetype:=gtn;
|
|
if (hlab<>location.truelabel) then
|
|
cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),location.truelabel);
|
|
{ cheat for the negative test }
|
|
if nodetype=ltn then
|
|
nodetype:=gtn
|
|
else
|
|
nodetype:=ltn;
|
|
if (hlab<>location.falselabel) then
|
|
cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),location.falselabel);
|
|
nodetype:=oldnodetype;
|
|
end;
|
|
equaln:
|
|
cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,location.falselabel);
|
|
unequaln:
|
|
cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,location.truelabel);
|
|
else
|
|
internalerror(2019050905);
|
|
end;
|
|
end;
|
|
|
|
procedure secondjmp64bitcmp;
|
|
|
|
begin
|
|
{ the jump the sequence is a little bit hairy }
|
|
case nodetype of
|
|
ltn,gtn,lten,gten:
|
|
begin
|
|
{ the comparisaion of the low dword have to be }
|
|
{ always unsigned! }
|
|
cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(true),location.truelabel);
|
|
cg.a_jmp_always(current_asmdata.CurrAsmList,location.falselabel);
|
|
end;
|
|
equaln:
|
|
begin
|
|
cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,location.falselabel);
|
|
cg.a_jmp_always(current_asmdata.CurrAsmList,location.truelabel);
|
|
end;
|
|
unequaln:
|
|
begin
|
|
cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NE,location.truelabel);
|
|
cg.a_jmp_always(current_asmdata.CurrAsmList,location.falselabel);
|
|
end;
|
|
else
|
|
internalerror(2019050904);
|
|
end;
|
|
end;
|
|
|
|
begin
|
|
truelabel:=nil;
|
|
falselabel:=nil;
|
|
pass_left_right;
|
|
|
|
unsigned:=((left.resultdef.typ=orddef) and
|
|
(torddef(left.resultdef).ordtype=u64bit)) or
|
|
((right.resultdef.typ=orddef) and
|
|
(torddef(right.resultdef).ordtype=u64bit));
|
|
|
|
{ we have LOC_JUMP as result }
|
|
current_asmdata.getjumplabel(truelabel);
|
|
current_asmdata.getjumplabel(falselabel);
|
|
location_reset_jump(location,truelabel,falselabel);
|
|
|
|
{ Relational compares against constants having low dword=0 can omit the
|
|
second compare based on the fact that any unsigned value is >=0 }
|
|
hlab:=nil;
|
|
if (right.location.loc=LOC_CONSTANT) and
|
|
(lo(right.location.value64)=0) then
|
|
begin
|
|
case getresflags(true) of
|
|
F_AE: hlab:=location.truelabel ;
|
|
F_B: hlab:=location.falselabel;
|
|
else
|
|
;
|
|
end;
|
|
end;
|
|
|
|
if (right.location.loc=LOC_CONSTANT) and
|
|
(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
|
|
begin
|
|
tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
|
|
href:=left.location.reference;
|
|
inc(href.offset,4);
|
|
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
|
|
emit_const_ref(A_CMP,S_L,aint(hi(right.location.value64)),href);
|
|
firstjmp64bitcmp;
|
|
cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
|
|
if assigned(hlab) then
|
|
cg.a_jmp_always(current_asmdata.CurrAsmList,hlab)
|
|
else
|
|
begin
|
|
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
|
|
emit_const_ref(A_CMP,S_L,aint(lo(right.location.value64)),left.location.reference);
|
|
secondjmp64bitcmp;
|
|
cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
|
|
end;
|
|
location_freetemp(current_asmdata.CurrAsmList,left.location);
|
|
exit;
|
|
end;
|
|
|
|
{ left and right no register? }
|
|
{ then one must be demanded }
|
|
if not (left.location.loc in [LOC_REGISTER,LOC_CREGISTER]) then
|
|
begin
|
|
if not (right.location.loc in [LOC_REGISTER,LOC_CREGISTER]) then
|
|
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true)
|
|
else
|
|
begin
|
|
location_swap(left.location,right.location);
|
|
toggleflag(nf_swapped);
|
|
end;
|
|
end;
|
|
|
|
{ at this point, left.location.loc should be LOC_[C]REGISTER }
|
|
case right.location.loc of
|
|
LOC_REGISTER,
|
|
LOC_CREGISTER :
|
|
begin
|
|
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
|
|
emit_reg_reg(A_CMP,S_L,right.location.register64.reghi,left.location.register64.reghi);
|
|
firstjmp64bitcmp;
|
|
emit_reg_reg(A_CMP,S_L,right.location.register64.reglo,left.location.register64.reglo);
|
|
secondjmp64bitcmp;
|
|
cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
|
|
end;
|
|
LOC_CREFERENCE,
|
|
LOC_REFERENCE :
|
|
begin
|
|
tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
|
|
href:=right.location.reference;
|
|
inc(href.offset,4);
|
|
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
|
|
emit_ref_reg(A_CMP,S_L,href,left.location.register64.reghi);
|
|
firstjmp64bitcmp;
|
|
emit_ref_reg(A_CMP,S_L,right.location.reference,left.location.register64.reglo);
|
|
secondjmp64bitcmp;
|
|
cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
|
|
location_freetemp(current_asmdata.CurrAsmList,right.location);
|
|
end;
|
|
LOC_CONSTANT :
|
|
begin
|
|
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_CMP,S_L,aint(hi(right.location.value64)),left.location.register64.reghi));
|
|
firstjmp64bitcmp;
|
|
cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
|
|
if assigned(hlab) then
|
|
cg.a_jmp_always(current_asmdata.CurrAsmList,hlab)
|
|
else
|
|
begin
|
|
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_CMP,S_L,aint(lo(right.location.value64)),left.location.register64.reglo));
|
|
secondjmp64bitcmp;
|
|
cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
|
|
end;
|
|
end;
|
|
else
|
|
internalerror(2002032803);
|
|
end;
|
|
|
|
end;
|
|
|
|
|
|
{*****************************************************************************
|
|
x86 MUL
|
|
*****************************************************************************}
|
|
|
|
procedure ti386addnode.set_mul_result_location;
|
|
begin
|
|
location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
|
|
{Free EAX,EDX}
|
|
cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EDX);
|
|
if is_64bit(resultdef) then
|
|
begin
|
|
{Allocate a couple of registers and store EDX:EAX into it}
|
|
location.register64.reghi := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
|
|
cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, NR_EDX, location.register64.reghi);
|
|
cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
|
|
location.register64.reglo := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
|
|
cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, NR_EAX, location.register64.reglo);
|
|
end
|
|
else
|
|
begin
|
|
{Allocate a new register and store the result in EAX in it.}
|
|
location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
|
|
cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
|
|
cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_EAX,location.register);
|
|
end;
|
|
location_freetemp(current_asmdata.CurrAsmList,left.location);
|
|
location_freetemp(current_asmdata.CurrAsmList,right.location);
|
|
end;
|
|
|
|
|
|
procedure ti386addnode.second_mul(unsigned: boolean);
|
|
|
|
var reg,reghi,reglo:Tregister;
|
|
ref:Treference;
|
|
use_ref:boolean;
|
|
hl4 : tasmlabel;
|
|
|
|
const
|
|
asmops: array[boolean] of tasmop = (A_IMUL, A_MUL);
|
|
|
|
begin
|
|
pass_left_right;
|
|
reg:=NR_NO;
|
|
reference_reset(ref,sizeof(pint),[]);
|
|
|
|
{ Mul supports registers and references, so if not register/reference,
|
|
load the location into a register.
|
|
The variant of IMUL which is capable of doing 32->64 bits has the same restrictions. }
|
|
use_ref:=false;
|
|
if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
|
|
reg:=left.location.register
|
|
else if left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
|
|
begin
|
|
tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
|
|
ref:=left.location.reference;
|
|
use_ref:=true;
|
|
end
|
|
else
|
|
begin
|
|
{ LOC_CONSTANT for example.}
|
|
reg:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
|
|
hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,left.resultdef,osuinttype,left.location,reg);
|
|
end;
|
|
|
|
if (CPUX86_HAS_BMI2 in cpu_capabilities[current_settings.cputype]) and
|
|
(not(needoverflowcheck) or
|
|
{ 32->64 bit cannot overflow }
|
|
is_64bit(resultdef)) then
|
|
begin
|
|
cg.getcpuregister(current_asmdata.CurrAsmList,NR_EDX);
|
|
hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,osuinttype,right.location,NR_EDX);
|
|
reglo:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
|
|
reghi:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
|
|
if use_ref then
|
|
current_asmdata.CurrAsmList.concat(Taicpu.Op_ref_reg_reg(A_MULX,S_L,ref,reglo,reghi))
|
|
else
|
|
emit_reg_reg_reg(A_MULX,S_L,reg,reglo,reghi);
|
|
cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EDX);
|
|
|
|
location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
|
|
location.register64.reglo:=reglo;
|
|
|
|
if is_64bit(resultdef) then
|
|
location.register64.reghi:=reghi;
|
|
|
|
location_freetemp(current_asmdata.CurrAsmList,left.location);
|
|
location_freetemp(current_asmdata.CurrAsmList,right.location);
|
|
end
|
|
else
|
|
begin
|
|
{ Allocate EAX. }
|
|
cg.getcpuregister(current_asmdata.CurrAsmList,NR_EAX);
|
|
{ Load the right value. }
|
|
hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,osuinttype,right.location,NR_EAX);
|
|
{ Also allocate EDX, since it is also modified by a mul (JM). }
|
|
cg.getcpuregister(current_asmdata.CurrAsmList,NR_EDX);
|
|
|
|
if needoverflowcheck then
|
|
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
|
|
|
|
if use_ref then
|
|
emit_ref(asmops[unsigned],S_L,ref)
|
|
else
|
|
emit_reg(asmops[unsigned],S_L,reg);
|
|
if needoverflowcheck and
|
|
{ 32->64 bit cannot overflow }
|
|
(not is_64bit(resultdef)) then
|
|
begin
|
|
current_asmdata.getjumplabel(hl4);
|
|
cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4);
|
|
cg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
|
|
cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
|
|
cg.a_label(current_asmdata.CurrAsmList,hl4);
|
|
end;
|
|
set_mul_result_location;
|
|
end;
|
|
end;
|
|
|
|
|
|
procedure ti386addnode.second_mul64bit;
|
|
var
|
|
list: TAsmList;
|
|
hreg1,hreg2: tregister;
|
|
begin
|
|
{ 64x64 multiplication yields 128-bit result, but we're only
|
|
interested in its lower 64 bits. This lower part is independent
|
|
of operand signs, and so is the generated code. }
|
|
{ pass_left_right already called from second_add64bit }
|
|
list:=current_asmdata.CurrAsmList;
|
|
if left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
|
|
tcgx86(cg).make_simple_ref(list,left.location.reference);
|
|
if right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
|
|
tcgx86(cg).make_simple_ref(list,right.location.reference);
|
|
|
|
{ calculate 32-bit terms lo(right)*hi(left) and hi(left)*lo(right) }
|
|
if (right.location.loc=LOC_CONSTANT) then
|
|
begin
|
|
{ if left has side effects, it could be that this code is called with right.location.value64=0,
|
|
see also #40182 }
|
|
if right.location.value64=0 then
|
|
begin
|
|
location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
|
|
location.register64.reglo := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
|
|
emit_const_reg(A_MOV,S_L,0,location.register64.reglo);
|
|
location.register64.reghi := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
|
|
emit_const_reg(A_MOV,S_L,0,location.register64.reghi);
|
|
exit;
|
|
end;
|
|
|
|
{ Omit zero terms, if any }
|
|
hreg1:=NR_NO;
|
|
hreg2:=NR_NO;
|
|
if lo(right.location.value64)<>0 then
|
|
hreg1:=cg.getintregister(list,OS_INT);
|
|
if hi(right.location.value64)<>0 then
|
|
hreg2:=cg.getintregister(list,OS_INT);
|
|
|
|
{ Take advantage of 3-operand form of IMUL }
|
|
case left.location.loc of
|
|
LOC_REGISTER,LOC_CREGISTER:
|
|
begin
|
|
if hreg1<>NR_NO then
|
|
emit_const_reg_reg(A_IMUL,S_L,longint(lo(right.location.value64)),left.location.register64.reghi,hreg1);
|
|
if hreg2<>NR_NO then
|
|
emit_const_reg_reg(A_IMUL,S_L,longint(hi(right.location.value64)),left.location.register64.reglo,hreg2);
|
|
end;
|
|
LOC_REFERENCE,LOC_CREFERENCE:
|
|
begin
|
|
if hreg2<>NR_NO then
|
|
list.concat(taicpu.op_const_ref_reg(A_IMUL,S_L,longint(hi(right.location.value64)),left.location.reference,hreg2));
|
|
inc(left.location.reference.offset,4);
|
|
if hreg1<>NR_NO then
|
|
list.concat(taicpu.op_const_ref_reg(A_IMUL,S_L,longint(lo(right.location.value64)),left.location.reference,hreg1));
|
|
dec(left.location.reference.offset,4);
|
|
end;
|
|
else
|
|
InternalError(2014011602);
|
|
end;
|
|
end
|
|
else
|
|
begin
|
|
hreg1:=cg.getintregister(list,OS_INT);
|
|
hreg2:=cg.getintregister(list,OS_INT);
|
|
cg64.a_load64low_loc_reg(list,left.location,hreg1);
|
|
cg64.a_load64high_loc_reg(list,left.location,hreg2);
|
|
case right.location.loc of
|
|
LOC_REGISTER,LOC_CREGISTER:
|
|
begin
|
|
emit_reg_reg(A_IMUL,S_L,right.location.register64.reghi,hreg1);
|
|
emit_reg_reg(A_IMUL,S_L,right.location.register64.reglo,hreg2);
|
|
end;
|
|
LOC_REFERENCE,LOC_CREFERENCE:
|
|
begin
|
|
emit_ref_reg(A_IMUL,S_L,right.location.reference,hreg2);
|
|
inc(right.location.reference.offset,4);
|
|
emit_ref_reg(A_IMUL,S_L,right.location.reference,hreg1);
|
|
dec(right.location.reference.offset,4);
|
|
end;
|
|
else
|
|
InternalError(2014011603);
|
|
end;
|
|
end;
|
|
{ add hi*lo and lo*hi terms together }
|
|
if (hreg1<>NR_NO) and (hreg2<>NR_NO) then
|
|
emit_reg_reg(A_ADD,S_L,hreg2,hreg1);
|
|
|
|
{ load lo(right) into EAX }
|
|
cg.getcpuregister(list,NR_EAX);
|
|
cg64.a_load64low_loc_reg(list,right.location,NR_EAX);
|
|
|
|
{ multiply EAX by lo(left), producing 64-bit value in EDX:EAX }
|
|
cg.getcpuregister(list,NR_EDX);
|
|
if (left.location.loc in [LOC_REGISTER,LOC_CREGISTER]) then
|
|
emit_reg(A_MUL,S_L,left.location.register64.reglo)
|
|
else if (left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
|
|
emit_ref(A_MUL,S_L,left.location.reference)
|
|
else
|
|
InternalError(2014011604);
|
|
{ add previously calculated terms to the high half }
|
|
if (hreg1<>NR_NO) then
|
|
emit_reg_reg(A_ADD,S_L,hreg1,NR_EDX)
|
|
else if (hreg2<>NR_NO) then
|
|
emit_reg_reg(A_ADD,S_L,hreg2,NR_EDX)
|
|
else
|
|
InternalError(2014011601);
|
|
|
|
{ Result is now in EDX:EAX. Copy it to virtual registers. }
|
|
set_mul_result_location;
|
|
end;
|
|
|
|
|
|
begin
|
|
caddnode:=ti386addnode;
|
|
end.
|