From b713c7380bbd705f224c1bc1ad1e26eb8f5694e4 Mon Sep 17 00:00:00 2001 From: florian Date: Wed, 23 Dec 2020 17:25:09 +0000 Subject: [PATCH] * implemented UseAVX512 properly + make use of VREDUCE* for frac(...) if AVX512QD is a available git-svn-id: trunk@47840 - --- compiler/x86/cpubase.pas | 3 +-- compiler/x86/nx86inl.pas | 22 ++++++++++++++++------ compiler/x86_64/cpuinfo.pas | 6 ++++-- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/compiler/x86/cpubase.pas b/compiler/x86/cpubase.pas index b2d5e110d6..7f56482e87 100644 --- a/compiler/x86/cpubase.pas +++ b/compiler/x86/cpubase.pas @@ -960,8 +960,7 @@ implementation function UseAVX512: boolean; begin - // Result:=(current_settings.fputype in fpu_avx_instructionsets) {$ifndef i8086}or (CPUX86_HAS_AVXUNIT in cpu_capabilities[current_settings.cputype]){$endif i8086}; - Result:=false; + Result:={$ifdef i8086}false{$else i8086}UseAVX and (FPUX86_HAS_AVX512F in fpu_capabilities[current_settings.fputype]){$endif i8086}; end; diff --git a/compiler/x86/nx86inl.pas b/compiler/x86/nx86inl.pas index b179362538..ac4fce336b 100644 --- a/compiler/x86/nx86inl.pas +++ b/compiler/x86/nx86inl.pas @@ -1315,15 +1315,25 @@ implementation case tfloatdef(left.resultdef).floattype of s32real: begin - { using left.location.register here as 3rd parameter is crucial to break dependency chains } - current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register)); - current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSS,S_NO,location.register,left.location.register,location.register)); + if UseAVX512 and (FPUX86_HAS_AVX512DQ in fpu_capabilities[current_settings.fputype]) then + current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VREDUCESS,S_NO,3,left.location.register,left.location.register,location.register)) + else + begin + { using left.location.register here as 3rd parameter is crucial to break dependency chains } + current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register)); + current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSS,S_NO,location.register,left.location.register,location.register)); + end; end; s64real: begin - { using left.location.register here as 3rd parameter is crucial to break dependency chains } - current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register)); - current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSD,S_NO,location.register,left.location.register,location.register)); + if UseAVX512 and (FPUX86_HAS_AVX512DQ in fpu_capabilities[current_settings.fputype]) then + current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VREDUCESD,S_NO,3,left.location.register,left.location.register,location.register)) + else + begin + { using left.location.register here as 3rd parameter is crucial to break dependency chains } + current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register)); + current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSD,S_NO,location.register,left.location.register,location.register)); + end; end; else internalerror(2017052102); diff --git a/compiler/x86_64/cpuinfo.pas b/compiler/x86_64/cpuinfo.pas index 05d0537b72..a0e99a7610 100644 --- a/compiler/x86_64/cpuinfo.pas +++ b/compiler/x86_64/cpuinfo.pas @@ -182,7 +182,9 @@ type tfpuflags = (FPUX86_HAS_AVXUNIT, FPUX86_HAS_32MMREGS, - FPUX86_HAS_AVX512F + FPUX86_HAS_AVX512F, + FPUX86_HAS_AVX512VL, + FPUX86_HAS_AVX512DQ ); const @@ -203,7 +205,7 @@ type { fpu_sse42 } [], { fpu_avx } [FPUX86_HAS_AVXUNIT], { fpu_avx2 } [FPUX86_HAS_AVXUNIT], - { fpu_avx512 } [FPUX86_HAS_AVXUNIT,FPUX86_HAS_32MMREGS,FPUX86_HAS_AVX512F] + { fpu_avx512 } [FPUX86_HAS_AVXUNIT,FPUX86_HAS_32MMREGS,FPUX86_HAS_AVX512F,FPUX86_HAS_AVX512VL,FPUX86_HAS_AVX512DQ] ); Implementation