+ enable support for a processor specific frac function

* implemented inlined frac support for CPUs with SSE4.1+

git-svn-id: trunk@36281 -
This commit is contained in:
florian 2017-05-21 11:12:59 +00:00
parent 912e6d129a
commit 8eec29b139
2 changed files with 76 additions and 0 deletions

View File

@ -64,6 +64,7 @@ interface
procedure second_popcnt; virtual;
procedure second_seg; virtual; abstract;
procedure second_fma; virtual;
procedure second_frac_real; virtual;
end;
implementation
@ -134,6 +135,8 @@ implementation
second_ln_real;
in_cos_real:
second_cos_real;
in_frac_real:
second_frac_real;
in_prefetch_var:
second_prefetch;
in_assigned_x:
@ -659,6 +662,11 @@ implementation
begin
end;
procedure tcginlinenode.second_frac_real;
begin
internalerror(2017052104);
end;
procedure tcginlinenode.second_abs_long;
var
tempreg1, tempreg2: tregister;

View File

@ -49,6 +49,7 @@ interface
function first_trunc_real: tnode; override;
function first_popcnt: tnode; override;
function first_fma: tnode; override;
function first_frac_real : tnode; override;
{ second pass override to generate these nodes }
procedure second_IncludeExclude;override;
procedure second_pi; override;
@ -69,6 +70,7 @@ interface
{$endif not i8086}
procedure second_popcnt;override;
procedure second_fma;override;
procedure second_frac_real;override;
private
procedure load_fpu_location(lnode: tnode);
end;
@ -274,6 +276,19 @@ implementation
end;
function tx86inlinenode.first_frac_real : tnode;
begin
if (current_settings.fputype>=fpu_sse41) and
((is_double(resultdef)) or (is_single(resultdef))) then
begin
expectloc:=LOC_MMREGISTER;
Result:=nil;
end
else
Result:=inherited first_frac_real;
end;
procedure tx86inlinenode.second_pi;
begin
location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
@ -282,6 +297,7 @@ implementation
location.register:=NR_FPU_RESULT_REG;
end;
{ load the FPU into the an fpu register }
procedure tx86inlinenode.load_fpu_location(lnode: tnode);
begin
@ -935,4 +951,56 @@ implementation
internalerror(2014032301);
end;
procedure tx86inlinenode.second_frac_real;
var
extrareg : TRegister;
begin
if use_vectorfpu(resultdef) then
begin
secondpass(left);
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
location_reset(location,LOC_MMREGISTER,left.location.size);
location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
if UseAVX then
case tfloatdef(resultdef).floattype of
s32real:
begin
{ using left.location.register here as 3rd parameter is crucial to break dependency chains }
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register));
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSS,S_NO,location.register,left.location.register,location.register));
end;
s64real:
begin
{ using left.location.register here as 3rd parameter is crucial to break dependency chains }
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register));
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSD,S_NO,location.register,left.location.register,location.register));
end;
else
internalerror(2017052102);
end
else
begin
extrareg:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
case tfloatdef(resultdef).floattype of
s32real:
begin
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSS,S_NO,3,left.location.register,extrareg));
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSS,S_NO,extrareg,location.register));
end;
s64real:
begin
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSD,S_NO,3,left.location.register,extrareg));
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSD,S_NO,extrareg,location.register));
end;
else
internalerror(2017052103);
end;
end;
end
else
internalerror(2017052101);
end;
end.