+ make use of mulx instruction, if available

git-svn-id: trunk@43706 -
This commit is contained in:
florian 2019-12-21 16:20:10 +00:00
parent 30927039a6
commit 8465710329

View File

@ -50,6 +50,7 @@ interface
aasmbase,aasmtai,aasmdata,aasmcpu,
cgbase,procinfo,
ncon,nset,cgutils,tgobj,
cpuinfo,
cga,ncgutil,cgobj,cg64f32,cgx86,
hlcgobj;
@ -446,7 +447,7 @@ interface
procedure ti386addnode.second_mul(unsigned: boolean);
var reg:Tregister;
var reg,reghi,reglo:Tregister;
ref:Treference;
use_ref:boolean;
hl4 : tasmlabel;
@ -473,30 +474,58 @@ interface
end
else
begin
{LOC_CONSTANT for example.}
{ LOC_CONSTANT for example.}
reg:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,left.resultdef,osuinttype,left.location,reg);
end;
{Allocate EAX.}
cg.getcpuregister(current_asmdata.CurrAsmList,NR_EAX);
{Load the right value.}
hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,osuinttype,right.location,NR_EAX);
{Also allocate EDX, since it is also modified by a mul (JM).}
cg.getcpuregister(current_asmdata.CurrAsmList,NR_EDX);
if use_ref then
emit_ref(asmops[unsigned],S_L,ref)
else
emit_reg(asmops[unsigned],S_L,reg);
if needoverflowcheck and
if (CPUX86_HAS_BMI2 in cpu_capabilities[current_settings.cputype]) and
(not(needoverflowcheck) or
{ 32->64 bit cannot overflow }
(not is_64bit(resultdef)) then
is_64bit(resultdef)) then
begin
current_asmdata.getjumplabel(hl4);
cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4);
cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
cg.a_label(current_asmdata.CurrAsmList,hl4);
cg.getcpuregister(current_asmdata.CurrAsmList,NR_EDX);
hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,osuinttype,right.location,NR_EDX);
cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EDX);
reglo:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
reghi:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
if use_ref then
current_asmdata.CurrAsmList.concat(Taicpu.Op_ref_reg_reg(A_MULX,S_L,ref,reglo,reghi))
else
emit_reg_reg_reg(A_MULX,S_L,reg,reglo,reghi);
location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
location.register64.reglo:=reglo;
if is_64bit(resultdef) then
location.register64.reghi:=reghi;
location_freetemp(current_asmdata.CurrAsmList,left.location);
location_freetemp(current_asmdata.CurrAsmList,right.location);
end
else
begin
{ Allocate EAX. }
cg.getcpuregister(current_asmdata.CurrAsmList,NR_EAX);
{ Load the right value. }
hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,osuinttype,right.location,NR_EAX);
{ Also allocate EDX, since it is also modified by a mul (JM). }
cg.getcpuregister(current_asmdata.CurrAsmList,NR_EDX);
if use_ref then
emit_ref(asmops[unsigned],S_L,ref)
else
emit_reg(asmops[unsigned],S_L,reg);
if needoverflowcheck and
{ 32->64 bit cannot overflow }
(not is_64bit(resultdef)) then
begin
current_asmdata.getjumplabel(hl4);
cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4);
cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
cg.a_label(current_asmdata.CurrAsmList,hl4);
end;
set_mul_result_location;
end;
set_mul_result_location;
end;