fpc/compiler/x86/nx86inl.pas

1836 lines
72 KiB
ObjectPascal

{
Copyright (c) 1998-2002 by Florian Klaempfl
Generate x86 inline nodes
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
****************************************************************************
}
unit nx86inl;
{$i fpcdefs.inc}
interface
uses
node,ninl,ncginl;
type
tx86inlinenode = class(tcginlinenode)
protected
procedure maybe_remove_round_trunc_typeconv; virtual;
public
function pass_typecheck_cpu:tnode;override;
{ first pass override
so that the code generator will actually generate
these nodes.
}
function first_cpu: tnode;override;
function first_pi: tnode ; override;
function first_arctan_real: tnode; override;
function first_abs_real: tnode; override;
function first_sqr_real: tnode; override;
function first_sqrt_real: tnode; override;
function first_ln_real: tnode; override;
function first_cos_real: tnode; override;
function first_sin_real: tnode; override;
function first_round_real: tnode; override;
function first_trunc_real: tnode; override;
function first_popcnt: tnode; override;
function first_fma: tnode; override;
function first_frac_real : tnode; override;
function first_int_real : tnode; override;
function first_minmax: tnode; override;
function simplify(forinline : boolean) : tnode; override;
{ second pass override to generate these nodes }
procedure pass_generate_code_cpu;override;
procedure second_IncludeExclude;override;
procedure second_AndOrXorShiftRot_assign;override;
procedure second_pi; override;
procedure second_arctan_real; override;
procedure second_abs_real; override;
procedure second_round_real; override;
procedure second_sqr_real; override;
procedure second_sqrt_real; override;
procedure second_ln_real; override;
procedure second_cos_real; override;
procedure second_sin_real; override;
procedure second_trunc_real; override;
procedure second_prefetch;override;
procedure second_abs_long;override;
procedure second_popcnt;override;
procedure second_fma;override;
procedure second_frac_real;override;
procedure second_int_real;override;
procedure second_high;override;
procedure second_minmax;override;
private
procedure load_fpu_location(lnode: tnode);
end;
implementation
uses
systems,
globtype,globals,
verbose,compinnr,fmodule,
defutil,
aasmbase,aasmdata,aasmcpu,
symconst,symtype,symdef,symcpu,
ncnv,
htypechk,
cgbase,pass_1,pass_2,
cpuinfo,cpubase,nutils,
ncal,ncgutil,nld,ncon,nadd,nmat,constexp,
tgobj,
cga,cgutils,cgx86,cgobj,hlcgobj,cutils;
{*****************************************************************************
TX86INLINENODE
*****************************************************************************}
procedure tx86inlinenode.maybe_remove_round_trunc_typeconv;
begin
{ only makes a difference for x86_64 }
end;
function tx86inlinenode.pass_typecheck_cpu: tnode;
begin
Result:=nil;
case inlinenumber of
in_x86_inportb:
begin
CheckParameters(1);
resultdef:=u8inttype;
end;
in_x86_inportw:
begin
CheckParameters(1);
resultdef:=u16inttype;
end;
in_x86_inportl:
begin
CheckParameters(1);
resultdef:=s32inttype;
end;
in_x86_outportb,
in_x86_outportw,
in_x86_outportl:
begin
CheckParameters(2);
resultdef:=voidtype;
end;
in_x86_cli,
in_x86_sti:
resultdef:=voidtype;
in_x86_get_cs,
in_x86_get_ss,
in_x86_get_ds,
in_x86_get_es,
in_x86_get_fs,
in_x86_get_gs:
{$ifdef i8086}
resultdef:=u16inttype;
{$else i8086}
resultdef:=s32inttype;
{$endif i8086}
{ include automatically generated code }
{$i x86mmtype.inc}
else
Result:=inherited pass_typecheck_cpu;
end;
end;
function tx86inlinenode.first_cpu: tnode;
begin
Result:=nil;
case inlinenumber of
in_x86_inportb,
in_x86_inportw,
in_x86_inportl,
in_x86_get_cs,
in_x86_get_ss,
in_x86_get_ds,
in_x86_get_es,
in_x86_get_fs,
in_x86_get_gs:
expectloc:=LOC_REGISTER;
in_x86_outportb,
in_x86_outportw,
in_x86_outportl,
in_x86_cli,
in_x86_sti:
expectloc:=LOC_VOID;
{ include automatically generated code }
{$i x86mmfirst.inc}
else
Result:=inherited first_cpu;
end;
end;
function tx86inlinenode.first_pi : tnode;
begin
if (tfloatdef(pbestrealtype^).floattype=s80real) then
begin
expectloc:=LOC_FPUREGISTER;
first_pi := nil;
end
else
result:=inherited;
end;
function tx86inlinenode.first_arctan_real : tnode;
begin
{$ifdef i8086}
{ FPATAN's range is limited to (0 <= value < 1) on the 8087 and 80287,
so we need to use the RTL helper on these FPUs }
if current_settings.cputype < cpu_386 then
begin
result := inherited;
exit;
end;
{$endif i8086}
if (tfloatdef(pbestrealtype^).floattype=s80real) then
begin
expectloc:=LOC_FPUREGISTER;
first_arctan_real := nil;
end
else
result:=inherited;
end;
function tx86inlinenode.first_abs_real : tnode;
begin
if use_vectorfpu(resultdef) then
expectloc:=LOC_MMREGISTER
else
expectloc:=LOC_FPUREGISTER;
first_abs_real := nil;
end;
function tx86inlinenode.first_sqr_real : tnode;
begin
if use_vectorfpu(resultdef) then
expectloc:=LOC_MMREGISTER
else
expectloc:=LOC_FPUREGISTER;
first_sqr_real := nil;
end;
function tx86inlinenode.first_sqrt_real : tnode;
begin
if use_vectorfpu(resultdef) then
expectloc:=LOC_MMREGISTER
else
expectloc:=LOC_FPUREGISTER;
first_sqrt_real := nil;
end;
function tx86inlinenode.first_ln_real : tnode;
begin
if (tfloatdef(pbestrealtype^).floattype=s80real) then
begin
expectloc:=LOC_FPUREGISTER;
first_ln_real := nil;
end
else
result:=inherited;
end;
function tx86inlinenode.first_cos_real : tnode;
begin
{$ifdef i8086}
{ FCOS is 387+ }
if current_settings.cputype < cpu_386 then
begin
result := inherited;
exit;
end;
{$endif i8086}
if (tfloatdef(pbestrealtype^).floattype=s80real) then
begin
expectloc:=LOC_FPUREGISTER;
result:=nil;
end
else
result:=inherited;
end;
function tx86inlinenode.first_sin_real : tnode;
begin
{$ifdef i8086}
{ FSIN is 387+ }
if current_settings.cputype < cpu_386 then
begin
result := inherited;
exit;
end;
{$endif i8086}
if (tfloatdef(pbestrealtype^).floattype=s80real) then
begin
expectloc:=LOC_FPUREGISTER;
result:=nil;
end
else
result:=inherited;
end;
function tx86inlinenode.first_round_real : tnode;
begin
maybe_remove_round_trunc_typeconv;
{$ifdef x86_64}
if use_vectorfpu(left.resultdef) then
expectloc:=LOC_REGISTER
else
{$endif x86_64}
expectloc:=LOC_REFERENCE;
result:=nil;
end;
function tx86inlinenode.first_trunc_real: tnode;
begin
maybe_remove_round_trunc_typeconv;
if (cs_opt_size in current_settings.optimizerswitches)
{$ifdef x86_64}
and not(use_vectorfpu(left.resultdef))
{$endif x86_64}
then
result:=inherited
else
begin
{$ifdef x86_64}
if use_vectorfpu(left.resultdef) then
expectloc:=LOC_REGISTER
else
{$endif x86_64}
expectloc:=LOC_REFERENCE;
result:=nil;
end;
end;
function tx86inlinenode.first_popcnt: tnode;
begin
Result:=nil;
{$ifndef i8086}
if (CPUX86_HAS_POPCNT in cpu_capabilities[current_settings.cputype])
{$ifdef i386}
and not is_64bit(left.resultdef)
{$endif i386}
then
expectloc:=LOC_REGISTER
else
{$endif not i8086}
Result:=inherited first_popcnt
end;
function tx86inlinenode.first_fma : tnode;
begin
{$ifndef i8086}
if ((fpu_capabilities[current_settings.fputype]*[FPUX86_HAS_FMA,FPUX86_HAS_FMA4])<>[]) and
((is_double(resultdef)) or (is_single(resultdef))) then
begin
expectloc:=LOC_MMREGISTER;
Result:=nil;
end
else
{$endif i8086}
Result:=inherited first_fma;
end;
function tx86inlinenode.first_frac_real : tnode;
begin
if (current_settings.fputype>=fpu_sse41) and
((is_double(resultdef)) or (is_single(resultdef))) then
begin
maybe_remove_round_trunc_typeconv;
expectloc:=LOC_MMREGISTER;
Result:=nil;
end
else
Result:=inherited first_frac_real;
end;
function tx86inlinenode.first_int_real : tnode;
begin
if (current_settings.fputype>=fpu_sse41) and
((is_double(resultdef)) or (is_single(resultdef))) then
begin
Result:=nil;
expectloc:=LOC_MMREGISTER;
end
else
Result:=inherited first_int_real;
end;
function tx86inlinenode.first_minmax: tnode;
begin
{$ifndef i8086}
if
{$ifdef i386}
((current_settings.fputype>=fpu_sse) and is_single(resultdef)) or
((current_settings.fputype>=fpu_sse2) and is_double(resultdef))
{$else i386}
((is_double(resultdef)) or (is_single(resultdef)))
{$endif i386}
then
begin
expectloc:=LOC_MMREGISTER;
Result:=nil;
end
else
{$endif i8086}
if
{$ifndef x86_64}
(CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) and
{$endif x86_64}
(
{$ifdef x86_64}
is_64bitint(resultdef) or
{$endif x86_64}
is_32bitint(resultdef)
) then
begin
expectloc:=LOC_REGISTER;
Result:=nil;
end
else
Result:=inherited first_minmax;
end;
function tx86inlinenode.simplify(forinline : boolean) : tnode;
var
temp : tnode;
begin
if (current_settings.fputype>=fpu_sse41) and
(inlinenumber=in_int_real) and (left.nodetype=typeconvn) and
not(nf_explicit in left.flags) and
(ttypeconvnode(left).left.resultdef.typ=floatdef) and
((is_double(ttypeconvnode(left).left.resultdef)) or (is_single(ttypeconvnode(left).left.resultdef))) then
begin
{ get rid of the type conversion }
temp:=ttypeconvnode(left).left;
ttypeconvnode(left).left:=nil;
left.free;
left:=temp;
result:=self.getcopy;
tinlinenode(result).resultdef:=temp.resultdef;
typecheckpass(result);
end
else
Result:=inherited simplify(forinline);
end;
procedure tx86inlinenode.pass_generate_code_cpu;
var
paraarray : array[1..4] of tnode;
i : integer;
op: TAsmOp;
procedure inport(dreg:TRegister;dsize:topsize;dtype:tdef);
var
portnumber: tnode;
begin
portnumber:=left;
secondpass(portnumber);
if (portnumber.location.loc=LOC_CONSTANT) and
(portnumber.location.value>=0) and
(portnumber.location.value<=255) then
begin
hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_IN,dsize,portnumber.location.value,dreg));
location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,dtype,resultdef,dreg,location.register);
end
else
begin
hlcg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portnumber.resultdef,u16inttype,portnumber.location,NR_DX);
hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_IN,dsize,NR_DX,dreg));
hlcg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,dtype,resultdef,dreg,location.register);
end;
end;
procedure outport(dreg:TRegister;dsize:topsize;dtype:tdef);
var
portnumber, portdata: tnode;
begin
portnumber:=tcallparanode(tcallparanode(left).right).left;
portdata:=tcallparanode(left).left;
secondpass(portdata);
secondpass(portnumber);
hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portdata.resultdef,dtype,portdata.location,dreg);
if (portnumber.location.loc=LOC_CONSTANT) and
(portnumber.location.value>=0) and
(portnumber.location.value<=255) then
current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_OUT,dsize,dreg,portnumber.location.value))
else
begin
hlcg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portnumber.resultdef,u16inttype,portnumber.location,NR_DX);
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_OUT,dsize,dreg,NR_DX));
hlcg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
end;
hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
end;
procedure get_segreg(segreg:tregister);
begin
location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MOV,TCGSize2OpSize[def_cgsize(resultdef)],segreg,location.register));
end;
function GetConstInt(n: tnode): longint;
begin
Result:=0;
if is_constintnode(n) then
result:=tordconstnode(n).value.svalue
else
Message(type_e_constant_expr_expected);
end;
procedure GetParameters(count: longint);
var
i: longint;
p: tnode;
begin
if (count=1) and
(not (left is tcallparanode)) then
paraarray[1]:=left
else
begin
p:=left;
for i := count downto 1 do
begin
paraarray[i]:=tcallparanode(p).paravalue;
p:=tcallparanode(p).nextpara;
end;
end;
end;
procedure location_force_mmxreg(list:TAsmList;var l: tlocation;maybeconst:boolean);
var
reg : tregister;
begin
if (l.loc<>LOC_MMXREGISTER) and
((l.loc<>LOC_CMMXREGISTER) or (not maybeconst)) then
begin
reg:=tcgx86(cg).getmmxregister(list);
cg.a_loadmm_loc_reg(list,OS_M64,l,reg,nil);
location_freetemp(list,l);
location_reset(l,LOC_MMXREGISTER,OS_M64);
l.register:=reg;
end;
end;
procedure location_make_ref(var loc: tlocation);
var
hloc: tlocation;
begin
case loc.loc of
LOC_CREGISTER,
LOC_REGISTER:
begin
location_reset_ref(hloc, LOC_REFERENCE, OS_32, 1, []);
hloc.reference.base:=loc.register;
loc:=hloc;
end;
LOC_CREFERENCE,
LOC_REFERENCE:
begin
end;
else
begin
hlcg.location_force_reg(current_asmdata.CurrAsmList,loc,u32inttype,u32inttype,false);
location_reset_ref(hloc, LOC_REFERENCE, OS_32, 1, []);
hloc.reference.base:=loc.register;
loc:=hloc;
end;
end;
end;
begin
FillChar(paraarray,sizeof(paraarray),0);
case inlinenumber of
in_x86_inportb:
inport(NR_AL,S_B,u8inttype);
in_x86_inportw:
inport(NR_AX,S_W,u16inttype);
in_x86_inportl:
inport(NR_EAX,S_L,s32inttype);
in_x86_outportb:
outport(NR_AL,S_B,u8inttype);
in_x86_outportw:
outport(NR_AX,S_W,u16inttype);
in_x86_outportl:
outport(NR_EAX,S_L,s32inttype);
in_x86_cli:
current_asmdata.CurrAsmList.concat(taicpu.op_none(A_CLI));
in_x86_sti:
current_asmdata.CurrAsmList.concat(taicpu.op_none(A_STI));
in_x86_get_cs:
get_segreg(NR_CS);
in_x86_get_ss:
get_segreg(NR_SS);
in_x86_get_ds:
get_segreg(NR_DS);
in_x86_get_es:
get_segreg(NR_ES);
in_x86_get_fs:
get_segreg(NR_FS);
in_x86_get_gs:
get_segreg(NR_GS);
{$i x86mmsecond.inc}
else
inherited pass_generate_code_cpu;
end;
end;
procedure tx86inlinenode.second_AndOrXorShiftRot_assign;
var
opsize : tcgsize;
valuenode, indexnode, loadnode: TNode;
DestReg: TRegister;
begin
{$ifndef i8086}
if (cs_opt_level2 in current_settings.optimizerswitches) then
begin
{ Saves on a lot of typecasting and potential coding mistakes }
valuenode := tcallparanode(left).left;
loadnode := tcallparanode(tcallparanode(left).right).left;
opsize := def_cgsize(loadnode.resultdef);
{ BMI2 optimisations }
if (CPUX86_HAS_BMI2 in cpu_capabilities[current_settings.cputype]) and (inlinenumber=in_and_assign_x_y) then
begin
{ If the second operand is "((1 shl y) - 1)", we can turn it
into a BZHI operator instead }
if (opsize in [OS_32, OS_S32{$ifdef x86_64}, OS_64, OS_S64{$endif x86_64}]) and
(valuenode.nodetype = subn) and
(taddnode(valuenode).right.nodetype = ordconstn) and
(tordconstnode(taddnode(valuenode).right).value = 1) and
(taddnode(valuenode).left.nodetype = shln) and
(tshlshrnode(taddnode(valuenode).left).left.nodetype = ordconstn) and
(tordconstnode(tshlshrnode(taddnode(valuenode).left).left).value = 1) then
begin
{ Skip the subtract and shift nodes completely }
{ Helps avoid all the awkward typecasts }
indexnode := tshlshrnode(taddnode(valuenode).left).right;
{$ifdef x86_64}
{ The code generator sometimes extends the shift result to 64-bit unnecessarily }
if (indexnode.nodetype = typeconvn) and (opsize in [OS_32, OS_S32]) and
(def_cgsize(TTypeConvNode(indexnode).resultdef) in [OS_64, OS_S64]) then
begin
{ Convert to the 32-bit type }
indexnode.resultdef:=loadnode.resultdef;
node_reset_flags(indexnode,[],[tnf_pass1_done]);
{ We should't be getting any new errors }
if do_firstpass(indexnode) then
InternalError(2022110202);
{ Keep things internally consistent in case indexnode changed }
tshlshrnode(taddnode(valuenode).left).right:=indexnode;
end;
{$endif x86_64}
secondpass(indexnode);
secondpass(loadnode);
{ allocate registers }
hlcg.location_force_reg(
current_asmdata.CurrAsmList,
indexnode.location,
indexnode.resultdef,
loadnode.resultdef,
false
);
case loadnode.location.loc of
LOC_REFERENCE,
LOC_CREFERENCE:
begin
{ BZHI can only write to a register }
DestReg := cg.getintregister(current_asmdata.CurrAsmList,opsize);
emit_reg_ref_reg(A_BZHI, TCGSize2OpSize[opsize], indexnode.location.register, loadnode.location.reference, DestReg);
emit_reg_ref(A_MOV, TCGSize2OpSize[opsize], DestReg, loadnode.location.reference);
end;
LOC_REGISTER,
LOC_CREGISTER:
emit_reg_reg_reg(A_BZHI, TCGSize2OpSize[opsize], indexnode.location.register, loadnode.location.register, loadnode.location.register);
else
InternalError(2022102120);
end;
Exit;
end;
end;
end;
{$endif not i8086}
inherited second_AndOrXorShiftRot_assign;
end;
procedure tx86inlinenode.second_pi;
begin
location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
emit_none(A_FLDPI,S_NO);
tcgx86(cg).inc_fpu_stack;
location.register:=NR_FPU_RESULT_REG;
end;
{ load the FPU into the an fpu register }
procedure tx86inlinenode.load_fpu_location(lnode: tnode);
begin
location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
location.register:=NR_FPU_RESULT_REG;
secondpass(lnode);
case lnode.location.loc of
LOC_FPUREGISTER:
;
LOC_CFPUREGISTER:
begin
cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmList,lnode.location.size,
lnode.location.size,lnode.location.register,location.register);
end;
LOC_REFERENCE,LOC_CREFERENCE:
begin
cg.a_loadfpu_ref_reg(current_asmdata.CurrAsmList,
lnode.location.size,lnode.location.size,
lnode.location.reference,location.register);
end;
LOC_MMREGISTER,LOC_CMMREGISTER:
begin
location:=lnode.location;
hlcg.location_force_fpureg(current_asmdata.CurrAsmList,location,lnode.resultdef,false);
end;
else
internalerror(309991);
end;
end;
procedure tx86inlinenode.second_arctan_real;
begin
load_fpu_location(left);
emit_none(A_FLD1,S_NO);
emit_none(A_FPATAN,S_NO);
end;
procedure tx86inlinenode.second_abs_real;
function needs_indirect:boolean; inline;
begin
result:=(tf_supports_packages in target_info.flags) and
(target_info.system in systems_indirect_var_imports);
end;
var
href : treference;
sym : tasmsymbol;
begin
if use_vectorfpu(resultdef) then
begin
secondpass(left);
if left.location.loc<>LOC_MMREGISTER then
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,UseAVX);
if UseAVX then
begin
location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
end
else
location:=left.location;
case tfloatdef(resultdef).floattype of
s32real:
begin
sym:=current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_SINGLE',AT_DATA,needs_indirect);
reference_reset_symbol(href,sym,0,4,[]);
current_module.add_extern_asmsym(sym);
tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
if UseAVX then
current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
A_VANDPS,S_XMM,href,left.location.register,location.register))
else
current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPS,S_XMM,href,location.register));
end;
s64real:
begin
sym:=current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_DOUBLE',AT_DATA,needs_indirect);
reference_reset_symbol(href,sym,0,4,[]);
current_module.add_extern_asmsym(sym);
tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
if UseAVX then
current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
A_VANDPD,S_XMM,href,left.location.register,location.register))
else
current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPD,S_XMM,href,location.register))
end;
else
internalerror(200506081);
end;
end
else
begin
load_fpu_location(left);
emit_none(A_FABS,S_NO);
end;
end;
procedure tx86inlinenode.second_round_real;
begin
{$ifdef x86_64}
if use_vectorfpu(left.resultdef) then
begin
secondpass(left);
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
location_reset(location,LOC_REGISTER,OS_S64);
location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
if UseAVX then
case left.location.size of
OS_F32:
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSS2SI,S_NO,left.location.register,location.register));
OS_F64:
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSD2SI,S_NO,left.location.register,location.register));
else
internalerror(2007031402);
end
else
case left.location.size of
OS_F32:
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSS2SI,S_NO,left.location.register,location.register));
OS_F64:
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSD2SI,S_NO,left.location.register,location.register));
else
internalerror(2007031404);
end;
end
else
{$endif x86_64}
begin
load_fpu_location(left);
location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
emit_ref(A_FISTP,S_IQ,location.reference);
tcgx86(cg).dec_fpu_stack;
emit_none(A_FWAIT,S_NO);
end;
end;
procedure tx86inlinenode.second_trunc_real;
var
oldcw,newcw : treference;
begin
{$ifdef x86_64}
if use_vectorfpu(left.resultdef) and
not((left.location.loc=LOC_FPUREGISTER) and (current_settings.fputype>=fpu_sse3)) then
begin
secondpass(left);
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
location_reset(location,LOC_REGISTER,OS_S64);
location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
if UseAVX then
case left.location.size of
OS_F32:
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSS2SI,S_NO,left.location.register,location.register));
OS_F64:
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSD2SI,S_NO,left.location.register,location.register));
else
internalerror(2007031401);
end
else
case left.location.size of
OS_F32:
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSS2SI,S_NO,left.location.register,location.register));
OS_F64:
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSD2SI,S_NO,left.location.register,location.register));
else
internalerror(2007031403);
end;
end
else
{$endif x86_64}
begin
if (current_settings.fputype>=fpu_sse3) then
begin
load_fpu_location(left);
location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
emit_ref(A_FISTTP,S_IQ,location.reference);
tcgx86(cg).dec_fpu_stack;
end
else
begin
tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,oldcw);
tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,newcw);
{$ifdef i8086}
if current_settings.cputype<=cpu_286 then
begin
emit_ref(A_FSTCW,S_NO,newcw);
emit_ref(A_FSTCW,S_NO,oldcw);
emit_none(A_FWAIT,S_NO);
end
else
{$endif i8086}
begin
emit_ref(A_FNSTCW,S_NO,newcw);
emit_ref(A_FNSTCW,S_NO,oldcw);
end;
emit_const_ref(A_OR,S_W,$0f00,newcw);
load_fpu_location(left);
emit_ref(A_FLDCW,S_NO,newcw);
location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
emit_ref(A_FISTP,S_IQ,location.reference);
tcgx86(cg).dec_fpu_stack;
emit_ref(A_FLDCW,S_NO,oldcw);
emit_none(A_FWAIT,S_NO);
tg.UnGetTemp(current_asmdata.CurrAsmList,oldcw);
tg.UnGetTemp(current_asmdata.CurrAsmList,newcw);
end;
end;
end;
procedure tx86inlinenode.second_sqr_real;
begin
if use_vectorfpu(resultdef) then
begin
secondpass(left);
location_reset(location,LOC_MMREGISTER,left.location.size);
location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
if UseAVX then
begin
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,left.location.register,left.location.register,location.register,mms_movescalar);
end
else
begin
if left.location.loc in [LOC_CFPUREGISTER,LOC_FPUREGISTER] then
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,location.register,location.register,mms_movescalar);
end;
end
else
begin
load_fpu_location(left);
emit_reg_reg(A_FMUL,S_NO,NR_ST0,NR_ST0);
end;
end;
procedure tx86inlinenode.second_sqrt_real;
begin
if use_vectorfpu(resultdef) then
begin
secondpass(left);
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
location_reset(location,LOC_MMREGISTER,left.location.size);
location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
if UseAVX then
case tfloatdef(resultdef).floattype of
s32real:
{ we use S_NO instead of S_XMM here, regardless of the register size, as the size of the memory location is 32/64 bit }
{ using left.location.register here as 2nd parameter is crucial to break dependency chains }
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSS,S_NO,left.location.register,left.location.register,location.register));
s64real:
{ we use S_NO instead of S_XMM here, regardless of the register size, as the size of the memory location is 32/64 bit }
{ using left.location.register here as 2nd parameter is crucial to break dependency chains }
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSD,S_NO,left.location.register,left.location.register,location.register));
else
internalerror(200510031);
end
else
case tfloatdef(resultdef).floattype of
s32real:
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSS,S_NO,left.location.register,location.register));
s64real:
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSD,S_NO,left.location.register,location.register));
else
internalerror(2005100303);
end;
end
else
begin
load_fpu_location(left);
if left.location.loc=LOC_REFERENCE then
tg.ungetiftemp(current_asmdata.CurrAsmList,left.location.reference);
emit_none(A_FSQRT,S_NO);
end;
end;
procedure tx86inlinenode.second_ln_real;
begin
load_fpu_location(left);
emit_none(A_FLDLN2,S_NO);
emit_none(A_FXCH,S_NO);
emit_none(A_FYL2X,S_NO);
end;
procedure tx86inlinenode.second_cos_real;
begin
{$ifdef i8086}
{ FCOS is 387+ }
if current_settings.cputype < cpu_386 then
begin
inherited;
exit;
end;
{$endif i8086}
load_fpu_location(left);
emit_none(A_FCOS,S_NO);
end;
procedure tx86inlinenode.second_sin_real;
begin
{$ifdef i8086}
{ FSIN is 387+ }
if current_settings.cputype < cpu_386 then
begin
inherited;
exit;
end;
{$endif i8086}
load_fpu_location(left);
emit_none(A_FSIN,S_NO)
end;
procedure tx86inlinenode.second_prefetch;
var
ref : treference;
r : tregister;
checkpointer_used : boolean;
begin
{$if defined(i386) or defined(i8086)}
if current_settings.cputype>=cpu_Pentium3 then
{$endif i386 or i8086}
begin
{ do not call Checkpointer for left node }
checkpointer_used:=(cs_checkpointer in current_settings.localswitches);
if checkpointer_used then
node_change_local_switch(left,cs_checkpointer,false);
secondpass(left);
if checkpointer_used then
node_change_local_switch(left,cs_checkpointer,false);
case left.location.loc of
LOC_CREFERENCE,
LOC_REFERENCE:
begin
r:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,r);
reference_reset_base(ref,r,0,left.location.reference.temppos,left.location.reference.alignment,left.location.reference.volatility);
current_asmdata.CurrAsmList.concat(taicpu.op_ref(A_PREFETCHNTA,S_NO,ref));
end;
else
{ nothing to prefetch };
end;
end;
end;
procedure tx86inlinenode.second_abs_long;
var
hregister : tregister;
opsize : tcgsize;
hp : taicpu;
hl: TAsmLabel;
begin
{$if defined(i8086) or defined(i386)}
if not(CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) then
begin
opsize:=def_cgsize(left.resultdef);
secondpass(left);
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
location:=left.location;
location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,location.register);
cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,opsize,tcgsize2size[opsize]*8-1,left.location.register);
cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,opsize,left.location.register,location.register);
cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_SUB,opsize,left.location.register,location.register);
if cs_check_overflow in current_settings.localswitches then
begin
current_asmdata.getjumplabel(hl);
cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl);
cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
cg.a_label(current_asmdata.CurrAsmList,hl);
end;
end
else
{$endif i8086 or i386}
begin
opsize:=def_cgsize(left.resultdef);
secondpass(left);
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
hregister:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
location:=left.location;
location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,hregister);
cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,location.register);
cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
emit_reg(A_NEG,tcgsize2opsize[opsize],hregister);
if cs_check_overflow in current_settings.localswitches then
begin
current_asmdata.getjumplabel(hl);
cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl);
cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
cg.a_label(current_asmdata.CurrAsmList,hl);
end;
hp:=taicpu.op_reg_reg(A_CMOVcc,tcgsize2opsize[opsize],hregister,location.register);
hp.condition:=C_NS;
cg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
current_asmdata.CurrAsmList.concat(hp);
end;
end;
{*****************************************************************************
INCLUDE/EXCLUDE GENERIC HANDLING
*****************************************************************************}
procedure tx86inlinenode.second_IncludeExclude;
var
hregister,
hregister2: tregister;
setbase : aint;
bitsperop,l : longint;
cgop : topcg;
asmop : tasmop;
opdef : tdef;
opsize,
orgsize: tcgsize;
begin
{$ifdef i8086}
{ BTS and BTR are 386+ }
if current_settings.cputype < cpu_386 then
{$else i8086}
{ bts on memory locations is very slow, so even the default code is faster }
if not(cs_opt_size in current_settings.optimizerswitches) and (tcallparanode(tcallparanode(left).right).left.expectloc<>LOC_CONSTANT) and
(tcallparanode(left).left.expectloc=LOC_REFERENCE) then
{$endif i8086}
begin
inherited;
exit;
end;
if is_smallset(tcallparanode(left).resultdef) then
begin
opdef:=tcallparanode(left).resultdef;
opsize:=int_cgsize(opdef.size)
end
else
begin
opdef:=u32inttype;
opsize:=OS_32;
end;
bitsperop:=(8*tcgsize2size[opsize]);
secondpass(tcallparanode(left).left);
secondpass(tcallparanode(tcallparanode(left).right).left);
setbase:=tsetdef(tcallparanode(left).left.resultdef).setbase;
if tcallparanode(tcallparanode(left).right).left.location.loc=LOC_CONSTANT then
begin
{ calculate bit position }
l:=1 shl ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) mod bitsperop);
{ determine operator }
if inlinenumber=in_include_x_y then
cgop:=OP_OR
else
begin
cgop:=OP_AND;
l:=not(l);
end;
case tcallparanode(left).left.location.loc of
LOC_REFERENCE :
begin
inc(tcallparanode(left).left.location.reference.offset,
((tcallparanode(tcallparanode(left).right).left.location.value-setbase) div bitsperop)*tcgsize2size[opsize]);
cg.a_op_const_ref(current_asmdata.CurrAsmList,cgop,opsize,l,tcallparanode(left).left.location.reference);
end;
LOC_CSUBSETREG,
LOC_CREGISTER :
hlcg.a_op_const_loc(current_asmdata.CurrAsmList,cgop,tcallparanode(left).left.resultdef,l,tcallparanode(left).left.location);
else
internalerror(200405022);
end;
end
else
begin
orgsize:=opsize;
if opsize in [OS_8,OS_S8] then
begin
opdef:=u32inttype;
opsize:=OS_32;
end;
{ determine asm operator }
if inlinenumber=in_include_x_y then
asmop:=A_BTS
else
asmop:=A_BTR;
hlcg.location_force_reg(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.location,tcallparanode(tcallparanode(left).right).left.resultdef,opdef,true);
register_maybe_adjust_setbase(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.resultdef,tcallparanode(tcallparanode(left).right).left.location,setbase);
hregister:=tcallparanode(tcallparanode(left).right).left.location.register;
if tcallparanode(left).left.location.loc=LOC_REFERENCE then
emit_reg_ref(asmop,tcgsize2opsize[opsize],hregister,tcallparanode(left).left.location.reference)
else
begin
{ second argument can't be an 8 bit register either }
hregister2:=tcallparanode(left).left.location.register;
if (orgsize in [OS_8,OS_S8]) then
hregister2:=cg.makeregsize(current_asmdata.CurrAsmList,hregister2,opsize);
emit_reg_reg(asmop,tcgsize2opsize[opsize],hregister,hregister2);
end;
end;
end;
procedure tx86inlinenode.second_popcnt;
var
opsize: tcgsize;
begin
secondpass(left);
opsize:=tcgsize2unsigned[left.location.size];
{ no 8 Bit popcont }
if opsize=OS_8 then
opsize:=OS_16;
if not(left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_REFERENCE,LOC_CREFERENCE]) or
(left.location.size<>opsize) then
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,cgsize_orddef(opsize),true);
location_reset(location,LOC_REGISTER,opsize);
location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
emit_reg_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.register,location.register)
else
emit_ref_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.reference,location.register);
if resultdef.size=1 then
begin
location.size:=OS_8;
location.register:=cg.makeregsize(current_asmdata.CurrAsmList,location.register,location.size);
end;
end;
procedure tx86inlinenode.second_fma;
{$ifndef i8086}
const
op : array[false..true,false..true,s32real..s64real,0..3] of TAsmOp =
(
{ positive product }
(
{ positive third operand }
((A_VFMADD231SS,A_VFMADD231SS,A_VFMADD231SS,A_VFMADD213SS),
(A_VFMADD231SD,A_VFMADD231SD,A_VFMADD231SD,A_VFMADD213SD)
),
{ negative third operand }
((A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB213SS),
(A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB213SD)
)
),
{ negative product }
(
{ positive third operand }
((A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD213SS),
(A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD213SD)
),
{ negative third operand }
((A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB213SS),
(A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB213SD)
)
)
);
var
paraarray : array[1..3] of tnode;
memop,
i : integer;
negop3,
negproduct,
gotmem : boolean;
{$endif i8086}
begin
{$ifndef i8086}
if (fpu_capabilities[current_settings.fputype]*[FPUX86_HAS_FMA,FPUX86_HAS_FMA4])<>[] then
begin
negop3:=false;
negproduct:=false;
paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
paraarray[3]:=tcallparanode(parameters).paravalue;
{ check if a neg. node can be removed
this is possible because changing the sign of
a floating point number does not affect its absolute
value in any way
}
if paraarray[1].nodetype=unaryminusn then
begin
paraarray[1]:=tunarynode(paraarray[1]).left;
{ do not release the unused unary minus node, it is kept and release together with the other nodes,
only no code is generated for it }
negproduct:=not(negproduct);
end;
if paraarray[2].nodetype=unaryminusn then
begin
paraarray[2]:=tunarynode(paraarray[2]).left;
{ do not release the unused unary minus node, it is kept and release together with the other nodes,
only no code is generated for it }
negproduct:=not(negproduct);
end;
if paraarray[3].nodetype=unaryminusn then
begin
paraarray[3]:=tunarynode(paraarray[3]).left;
{ do not release the unused unary minus node, it is kept and release together with the other nodes,
only no code is generated for it }
negop3:=true;
end;
for i:=1 to 3 do
secondpass(paraarray[i]);
{ only one memory operand is allowed }
gotmem:=false;
memop:=0;
{ in case parameters come on the FPU stack, we have to pop them in reverse order as we
called secondpass }
for i:=3 downto 1 do
begin
if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
begin
if (paraarray[i].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and not(gotmem) then
begin
memop:=i;
gotmem:=true;
end
else
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
end;
end;
location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
if gotmem then
begin
case memop of
1:
begin
hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
paraarray[3].location.register,location.register,mms_movescalar);
emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
paraarray[1].location.reference,paraarray[2].location.register,location.register);
end;
2:
begin
hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
paraarray[3].location.register,location.register,mms_movescalar);
emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
paraarray[2].location.reference,paraarray[1].location.register,location.register);
end;
3:
begin
hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
paraarray[1].location.register,location.register,mms_movescalar);
emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
paraarray[3].location.reference,paraarray[2].location.register,location.register);
end
else
internalerror(2014041301);
end;
end
else
begin
{ try to use the location which is already in a temp. mm register as destination,
so the compiler might be able to re-use the register }
if paraarray[1].location.loc=LOC_MMREGISTER then
begin
hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
paraarray[1].location.register,location.register,mms_movescalar);
emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
paraarray[3].location.register,paraarray[2].location.register,location.register);
end
else if paraarray[2].location.loc=LOC_MMREGISTER then
begin
hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[2].resultdef,resultdef,
paraarray[2].location.register,location.register,mms_movescalar);
emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
paraarray[3].location.register,paraarray[1].location.register,location.register);
end
else
begin
hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
paraarray[3].location.register,location.register,mms_movescalar);
emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,0],S_NO,
paraarray[1].location.register,paraarray[2].location.register,location.register);
end;
end;
end
else
{$endif i8086}
internalerror(2014032301);
end;
procedure tx86inlinenode.second_frac_real;
var
extrareg : TRegister;
begin
if use_vectorfpu(resultdef) then
begin
secondpass(left);
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
if UseAVX then
case tfloatdef(left.resultdef).floattype of
s32real:
begin
{$ifndef i8086}
if UseAVX512 and (FPUX86_HAS_AVX512DQ in fpu_capabilities[current_settings.fputype]) then
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VREDUCESS,S_NO,3,left.location.register,left.location.register,location.register))
else
{$endif not i8086}
begin
{ using left.location.register here as 3rd parameter is crucial to break dependency chains }
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register));
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSS,S_NO,location.register,left.location.register,location.register));
end;
end;
s64real:
begin
{$ifndef i8086}
if UseAVX512 and (FPUX86_HAS_AVX512DQ in fpu_capabilities[current_settings.fputype]) then
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VREDUCESD,S_NO,3,left.location.register,left.location.register,location.register))
else
{$endif not i8086}
begin
{ using left.location.register here as 3rd parameter is crucial to break dependency chains }
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register));
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSD,S_NO,location.register,left.location.register,location.register));
end;
end;
else
internalerror(2017052102);
end
else
begin
extrareg:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
case tfloatdef(left.resultdef).floattype of
s32real:
begin
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSS,S_NO,3,left.location.register,extrareg));
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSS,S_NO,extrareg,location.register));
end;
s64real:
begin
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSD,S_NO,3,left.location.register,extrareg));
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSD,S_NO,extrareg,location.register));
end;
else
internalerror(2017052103);
end;
end;
if tfloatdef(left.resultdef).floattype<>tfloatdef(resultdef).floattype then
hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,left.resultdef,resultdef,location.register,location.register,mms_movescalar);
end
else
internalerror(2017052101);
end;
procedure tx86inlinenode.second_int_real;
begin
if use_vectorfpu(resultdef) then
begin
secondpass(left);
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
location_reset(location,LOC_MMREGISTER,left.location.size);
location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
if UseAVX then
case tfloatdef(resultdef).floattype of
s32real:
{ using left.location.register here as 3rd parameter is crucial to break dependency chains }
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register));
s64real:
{ using left.location.register here as 3rd parameter is crucial to break dependency chains }
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register));
else
internalerror(2017052105);
end
else
begin
case tfloatdef(resultdef).floattype of
s32real:
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSS,S_NO,3,left.location.register,location.register));
s64real:
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSD,S_NO,3,left.location.register,location.register));
else
internalerror(2017052106);
end;
end;
end
else
internalerror(2017052107);
end;
procedure tx86inlinenode.second_high;
var
donelab: tasmlabel;
hregister : tregister;
href : treference;
begin
secondpass(left);
if not(is_dynamic_array(left.resultdef)) then
Internalerror(2019122809);
{ length in dynamic arrays is at offset -sizeof(pint) }
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
current_asmdata.getjumplabel(donelab);
{ by subtracting 1 here, we get the -1 into the register we need if the dyn. array is nil and the carry
flag is set in this case, so we can jump depending on it
when loading the actual high value, we have to take care later of the decreased value
do not use the cgs, as they might emit dec instead of a sub instruction, however with dec the trick
we are using is not working as dec does not touch the carry flag }
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_SUB,TCGSize2OpSize[def_cgsize(left.resultdef)],1,left.location.register));
{ volatility of the dyn. array refers to the volatility of the
string pointer, not of the string data }
cg.a_jmp_flags(current_asmdata.CurrAsmList,F_C,donelab);
hlcg.reference_reset_base(href,left.resultdef,left.location.register,-ossinttype.size+1,ctempposinvalid,ossinttype.alignment,[]);
{ if the string pointer is nil, the length is 0 -> reuse the register
that originally held the string pointer for the length, so that we
can keep the original nil/0 as length in that case }
hregister:=cg.makeregsize(current_asmdata.CurrAsmList,left.location.register,def_cgsize(resultdef));
hlcg.a_load_ref_reg(current_asmdata.CurrAsmList,ossinttype,resultdef,href,hregister);
cg.a_label(current_asmdata.CurrAsmList,donelab);
location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
location.register:=hregister;
end;
procedure tx86inlinenode.second_minmax;
{$ifndef i8086}
const
oparray : array[false..true,false..true,s32real..s64real] of TAsmOp =
(
(
(A_MINSS,A_MINSD),
(A_VMINSS,A_VMINSD)
),
(
(A_MAXSS,A_MAXSD),
(A_VMAXSS,A_VMAXSD)
)
);
{$endif i8086}
var
{$ifndef i8086}
memop : integer;
gotmem : boolean;
op: TAsmOp;
{$endif i8086}
i : integer;
paraarray : array[1..2] of tnode;
instr: TAiCpu;
opsize: topsize;
finalval: TCgInt;
begin
{$ifndef i8086}
if
{$ifdef i386}
((current_settings.fputype>=fpu_sse) and is_single(resultdef)) or
((current_settings.fputype>=fpu_sse2) and is_double(resultdef))
{$else i386}
is_single(resultdef) or is_double(resultdef)
{$endif i386}
then
begin
paraarray[1]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
paraarray[2]:=tcallparanode(parameters).paravalue;
for i:=low(paraarray) to high(paraarray) do
secondpass(paraarray[i]);
{ only one memory operand is allowed }
gotmem:=false;
memop:=0;
for i:=low(paraarray) to high(paraarray) do
begin
if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
begin
if (paraarray[i].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and not(gotmem) then
begin
memop:=i;
gotmem:=true;
end
else
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
end;
end;
{ due to min/max behaviour that it loads always the second operand (must be the else assignment) into destination if
one of the operands is a NaN, we cannot swap operands to omit a mova operation in case fastmath is off }
if not(cs_opt_fastmath in current_settings.optimizerswitches) and gotmem and (memop=1) then
begin
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[1].location,paraarray[1].resultdef,true);
gotmem:=false;
end;
op:=oparray[inlinenumber in [in_max_single,in_max_double],UseAVX,tfloatdef(resultdef).floattype];
location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
if gotmem then
begin
if UseAVX then
case memop of
1:
emit_ref_reg_reg(op,S_NO,
paraarray[1].location.reference,paraarray[2].location.register,location.register);
2:
emit_ref_reg_reg(op,S_NO,
paraarray[2].location.reference,paraarray[1].location.register,location.register);
else
internalerror(2020120504);
end
else
case memop of
1:
begin
hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[2].resultdef,resultdef,
paraarray[2].location.register,location.register,mms_movescalar);
emit_ref_reg(op,S_NO,
paraarray[1].location.reference,location.register);
end;
2:
begin
hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
paraarray[1].location.register,location.register,mms_movescalar);
emit_ref_reg(op,S_NO,
paraarray[2].location.reference,location.register);
end;
else
internalerror(2020120601);
end;
end
else
begin
if UseAVX then
emit_reg_reg_reg(op,S_NO,
paraarray[2].location.register,paraarray[1].location.register,location.register)
else
begin
hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
paraarray[1].location.register,location.register,mms_movescalar);
emit_reg_reg(op,S_NO,
paraarray[2].location.register,location.register)
end;
end;
end
else
{$endif i8086}
if
{$ifndef x86_64}
(CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) and
{$endif x86_64}
(
{$ifdef x86_64}
is_64bitint(resultdef) or
{$endif x86_64}
is_32bitint(resultdef)
) then
begin
{ paraarray[1] is the right-hand side }
paraarray[1]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
paraarray[2]:=tcallparanode(parameters).paravalue;
for i:=low(paraarray) to high(paraarray) do
secondpass(paraarray[i]);
if paraarray[2].location.loc = LOC_CONSTANT then
begin
{ Swap the parameters so the constant is on the right }
paraarray[2]:=paraarray[1];
paraarray[1]:=tcallparanode(parameters).paravalue;
end;
location_reset(location,LOC_REGISTER,paraarray[1].location.size);
location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,paraarray[1].location,location.register);
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
{$ifdef x86_64}
if is_64bitint(resultdef) then
opsize := S_Q
else
{$endif x86_64}
opsize := S_L;
{ Try to use references as is, unless they would trigger internal
error 200502052 }
if (cs_create_pic in current_settings.moduleswitches) and
Assigned(paraarray[2].location.reference.symbol) then
hlcg.location_force_reg(current_asmdata.CurrAsmList,paraarray[2].location,
paraarray[2].resultdef,paraarray[2].resultdef,true);
case paraarray[1].location.loc of
LOC_CONSTANT:
case paraarray[2].location.loc of
LOC_REFERENCE,LOC_CREFERENCE:
begin
current_asmdata.CurrAsmList.concat(taicpu.op_const_ref(A_CMP,opsize,
paraarray[1].location.value,paraarray[2].location.reference));
instr:=TAiCpu.op_ref_reg(A_CMOVcc,opsize,paraarray[2].location.reference,location.register);
end;
LOC_REGISTER,LOC_CREGISTER:
begin
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_CMP,opsize,
paraarray[1].location.value,paraarray[2].location.register));
instr:=TAiCpu.op_reg_reg(A_CMOVcc,opsize,paraarray[2].location.register,location.register);
end;
else
InternalError(2021121907);
end;
LOC_REFERENCE,LOC_CREFERENCE:
case paraarray[2].location.loc of
LOC_REFERENCE,LOC_CREFERENCE:
begin
{ The reference has already been stored at location.register, so use that }
current_asmdata.CurrAsmList.concat(taicpu.op_reg_ref(A_CMP,opsize,
location.register,paraarray[2].location.reference));
instr:=TAiCpu.op_ref_reg(A_CMOVcc,opsize,paraarray[2].location.reference,location.register);
end;
LOC_REGISTER,LOC_CREGISTER:
begin
current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_CMP,opsize,
paraarray[1].location.reference,paraarray[2].location.register));
instr:=TAiCpu.op_reg_reg(A_CMOVcc,opsize,paraarray[2].location.register,location.register);
end;
else
InternalError(2021121906);
end;
LOC_REGISTER,LOC_CREGISTER:
case paraarray[2].location.loc of
LOC_REFERENCE,LOC_CREFERENCE:
begin
current_asmdata.CurrAsmList.concat(taicpu.op_reg_ref(A_CMP,opsize,
paraarray[1].location.register,paraarray[2].location.reference));
instr:=TAiCpu.op_ref_reg(A_CMOVcc,opsize,paraarray[2].location.reference,location.register);
end;
LOC_REGISTER,LOC_CREGISTER:
begin
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,opsize,
paraarray[1].location.register,paraarray[2].location.register));
instr:=TAiCpu.op_reg_reg(A_CMOVcc,opsize,paraarray[2].location.register,location.register);
end;
else
InternalError(2021121905);
end;
else
InternalError(2021121904);
end;
case inlinenumber of
in_min_longint,
in_min_int64:
instr.condition := C_L;
in_min_dword,
in_min_qword:
instr.condition := C_B;
in_max_longint,
in_max_int64:
instr.condition := C_G;
in_max_dword,
in_max_qword:
instr.condition := C_A;
else
Internalerror(2021121903);
end;
current_asmdata.CurrAsmList.concat(instr);
cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
end
else
internalerror(2020120503);
end;
end.