diff --git a/compiler/fpcdefs.inc b/compiler/fpcdefs.inc index 6d2fbd9387..64f69d0cd9 100644 --- a/compiler/fpcdefs.inc +++ b/compiler/fpcdefs.inc @@ -96,6 +96,7 @@ {$define SUPPORT_SAFECALL} {$define SUPPORT_GET_FRAME} {$define cpucapabilities} + {$define fpucapabilities} {$define cpucg64shiftsupport} {$endif i386} @@ -113,6 +114,7 @@ {$define SUPPORT_SAFECALL} {$define SUPPORT_GET_FRAME} {$define cpucapabilities} + {$define fpucapabilities} {$endif x86_64} {$ifdef sparc} diff --git a/compiler/i386/cpuinfo.pas b/compiler/i386/cpuinfo.pas index 691ecb5a74..f479f96db3 100644 --- a/compiler/i386/cpuinfo.pas +++ b/compiler/i386/cpuinfo.pas @@ -165,13 +165,17 @@ type CPUX86_HAS_BMI1, CPUX86_HAS_BMI2, CPUX86_HAS_POPCNT, - CPUX86_HAS_AVXUNIT, CPUX86_HAS_LZCNT, CPUX86_HAS_MOVBE, CPUX86_HAS_FMA, CPUX86_HAS_FMA4 ); + tfpuflags = + (FPUX86_HAS_AVXUNIT, + FPUX86_HAS_32MMREGS + ); + const cpu_capabilities : array[tcputype] of set of tcpuflags = ( { cpu_none } [], @@ -183,10 +187,22 @@ type { cpu_Pentium4 } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2], { cpu_PentiumM } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2], { cpu_core_i } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT], - { cpu_core_avx } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT], - { cpu_core_avx2 } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT,CPUX86_HAS_BMI1,CPUX86_HAS_BMI2,CPUX86_HAS_LZCNT,CPUX86_HAS_MOVBE,CPUX86_HAS_FMA] + { cpu_core_avx } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT], + { cpu_core_avx2 } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT,CPUX86_HAS_BMI1,CPUX86_HAS_BMI2,CPUX86_HAS_LZCNT,CPUX86_HAS_MOVBE,CPUX86_HAS_FMA] ); + fpu_capabilities : array[tfputype] of set of tfpuflags = ( + { fpu_none } [], + { fpu_x87 } [], + { fpu_sse } [], + { fpu_sse2 } [], + { fpu_sse3 } [], + { fpu_ssse3 } [], + { fpu_sse41 } [], + { fpu_sse42 } [], + { fpu_avx } [FPUX86_HAS_AVXUNIT], + { fpu_avx2 } [FPUX86_HAS_AVXUNIT] + ); Implementation diff --git a/compiler/x86/cgx86.pas b/compiler/x86/cgx86.pas index 8f2fba5f0c..f3080febe2 100644 --- a/compiler/x86/cgx86.pas +++ b/compiler/x86/cgx86.pas @@ -204,7 +204,7 @@ unit cgx86; function UseAVX: boolean; begin - Result:=(current_settings.fputype in fpu_avx_instructionsets) {$ifndef i8086}or (CPUX86_HAS_AVXUNIT in cpu_capabilities[current_settings.cputype]){$endif i8086}; + Result:={$ifdef i8086}false{$else i8086}(FPUX86_HAS_AVXUNIT in fpu_capabilities[current_settings.fputype]){$endif i8086}; end; @@ -2882,7 +2882,7 @@ unit cgx86; {$ifndef i8086} { avx helps only to reduce size, using it in general does at least not help on an i7-4770 (FK) } - if (CPUX86_HAS_AVXUNIT in cpu_capabilities[current_settings.cputype]) and + if (FPUX86_HAS_AVXUNIT in fpu_capabilities[current_settings.fputype]) and // (cs_opt_size in current_settings.optimizerswitches) and ({$ifdef i386}(len=8) or{$endif i386}(len=16) or (len=24) or (len=32) { or (len=40) or (len=48)}) then cm:=copy_avx diff --git a/compiler/x86/cpubase.pas b/compiler/x86/cpubase.pas index 17b1c3b62d..17ccf2ba5d 100644 --- a/compiler/x86/cpubase.pas +++ b/compiler/x86/cpubase.pas @@ -143,6 +143,22 @@ uses RS_XMM13 = $0d; RS_XMM14 = $0e; RS_XMM15 = $0f; + RS_XMM16 = $10; + RS_XMM17 = $11; + RS_XMM18 = $12; + RS_XMM19 = $13; + RS_XMM20 = $14; + RS_XMM21 = $15; + RS_XMM22 = $16; + RS_XMM23 = $17; + RS_XMM24 = $18; + RS_XMM25 = $19; + RS_XMM26 = $1a; + RS_XMM27 = $1b; + RS_XMM28 = $1c; + RS_XMM29 = $1d; + RS_XMM30 = $1e; + RS_XMM31 = $1f; {$if defined(x86_64)} RS_RFLAGS = $06; diff --git a/compiler/x86_64/cgcpu.pas b/compiler/x86_64/cgcpu.pas index 8919601623..085922e0ee 100644 --- a/compiler/x86_64/cgcpu.pas +++ b/compiler/x86_64/cgcpu.pas @@ -60,6 +60,7 @@ unit cgcpu; uses globtype,globals,verbose,systems,cutils,cclasses, + cpuinfo, symtable,paramgr,cpupi, rgcpu,ncgutil; @@ -86,8 +87,13 @@ unit cgcpu; rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_RAX,RS_RDX,RS_RCX,RS_RSI,RS_RDI,RS_R8, RS_R9,RS_R10,RS_R11,RS_RBX,RS_R12,RS_R13,RS_R14,RS_R15],first_int_imreg,[]); - rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBWHOLE,[RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7, - RS_XMM8,RS_XMM9,RS_XMM10,RS_XMM11,RS_XMM12,RS_XMM13,RS_XMM14,RS_XMM15],first_mm_imreg,[]); + if FPUX86_HAS_32MMREGS in fpu_capabilities[current_settings.fputype] then + rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBWHOLE,[RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7, + RS_XMM8,RS_XMM9,RS_XMM10,RS_XMM11,RS_XMM12,RS_XMM13,RS_XMM14,RS_XMM15,RS_XMM16,RS_XMM17,RS_XMM18,RS_XMM19,RS_XMM20, + RS_XMM21,RS_XMM22,RS_XMM23,RS_XMM24,RS_XMM25,RS_XMM26,RS_XMM27,RS_XMM28,RS_XMM29,RS_XMM30,RS_XMM31],first_mm_imreg,[]) + else + rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBWHOLE,[RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7, + RS_XMM8,RS_XMM9,RS_XMM10,RS_XMM11,RS_XMM12,RS_XMM13,RS_XMM14,RS_XMM15],first_mm_imreg,[]); rgfpu:=Trgx86fpu.create; end; diff --git a/compiler/x86_64/cpuinfo.pas b/compiler/x86_64/cpuinfo.pas index d9eb1a135e..33801e1dbe 100644 --- a/compiler/x86_64/cpuinfo.pas +++ b/compiler/x86_64/cpuinfo.pas @@ -62,7 +62,8 @@ Type fpu_sse41, fpu_sse42, fpu_avx, - fpu_avx2 + fpu_avx2, + fpu_avx512f ); tcontrollertype = @@ -119,7 +120,7 @@ Const 'COREAVX2' ); - fputypestr : array[tfputype] of string[6] = ('', + fputypestr : array[tfputype] of string[7] = ('', // 'SOFT', 'SSE64', 'SSE3', @@ -127,10 +128,11 @@ Const 'SSE41', 'SSE42', 'AVX', - 'AVX2' + 'AVX2', + 'AVX512F' ); - fputypestrllvm : array[tfputype] of string[6] = ('', + fputypestrllvm : array[tfputype] of string[7] = ('', // 'SOFT', '', 'sse3', @@ -138,13 +140,14 @@ Const 'sse4.1', 'sse4.2', 'avx', - 'avx2' + 'avx2', + 'avx512f' ); - sse_singlescalar = [fpu_sse64..fpu_avx2]; - sse_doublescalar = [fpu_sse64..fpu_avx2]; + sse_singlescalar = [fpu_sse64..fpu_avx512f]; + sse_doublescalar = [fpu_sse64..fpu_avx512f]; - fpu_avx_instructionsets = [fpu_avx,fpu_avx2]; + fpu_avx_instructionsets = [fpu_avx,fpu_avx2,fpu_avx512f]; { Supported optimizations, only used for information } supported_optimizerswitches = genericlevel1optimizerswitches+ @@ -169,20 +172,36 @@ type CPUX86_HAS_BMI1, CPUX86_HAS_BMI2, CPUX86_HAS_POPCNT, - CPUX86_HAS_AVXUNIT, CPUX86_HAS_LZCNT, CPUX86_HAS_MOVBE, CPUX86_HAS_FMA, CPUX86_HAS_FMA4 ); + tfpuflags = + (FPUX86_HAS_AVXUNIT, + FPUX86_HAS_32MMREGS + ); + const cpu_capabilities : array[tcputype] of set of tcpuflags = ( { cpu_none } [], { Athlon64 } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2], { cpu_core_i } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT], - { cpu_core_avx } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT], - { cpu_core_avx2 } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT,CPUX86_HAS_BMI1,CPUX86_HAS_BMI2,CPUX86_HAS_LZCNT,CPUX86_HAS_MOVBE,CPUX86_HAS_FMA] + { cpu_core_avx } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT], + { cpu_core_avx2 } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT,CPUX86_HAS_BMI1,CPUX86_HAS_BMI2,CPUX86_HAS_LZCNT,CPUX86_HAS_MOVBE,CPUX86_HAS_FMA] + ); + + fpu_capabilities : array[tfputype] of set of tfpuflags = ( + { fpu_none } [], + { fpu_sse64 } [], + { fpu_sse3 } [], + { fpu_ssse3 } [], + { fpu_sse41 } [], + { fpu_sse42 } [], + { fpu_avx } [FPUX86_HAS_AVXUNIT], + { fpu_avx2 } [FPUX86_HAS_AVXUNIT], + { fpu_avx512 } [FPUX86_HAS_AVXUNIT,FPUX86_HAS_32MMREGS] ); Implementation