+ x86 makes use of fpu_capabilities

* moved CPUX86_HAS_AVXUNIT to FPUX86_HAS_AVXUNIT
+ mm register allocator can be initialized with 32 mm registers of AVX512

git-svn-id: trunk@42707 -
This commit is contained in:
florian 2019-08-16 11:35:03 +00:00
parent 497c830603
commit ba203c0564
6 changed files with 77 additions and 18 deletions

View File

@ -96,6 +96,7 @@
{$define SUPPORT_SAFECALL}
{$define SUPPORT_GET_FRAME}
{$define cpucapabilities}
{$define fpucapabilities}
{$define cpucg64shiftsupport}
{$endif i386}
@ -113,6 +114,7 @@
{$define SUPPORT_SAFECALL}
{$define SUPPORT_GET_FRAME}
{$define cpucapabilities}
{$define fpucapabilities}
{$endif x86_64}
{$ifdef sparc}

View File

@ -165,13 +165,17 @@ type
CPUX86_HAS_BMI1,
CPUX86_HAS_BMI2,
CPUX86_HAS_POPCNT,
CPUX86_HAS_AVXUNIT,
CPUX86_HAS_LZCNT,
CPUX86_HAS_MOVBE,
CPUX86_HAS_FMA,
CPUX86_HAS_FMA4
);
tfpuflags =
(FPUX86_HAS_AVXUNIT,
FPUX86_HAS_32MMREGS
);
const
cpu_capabilities : array[tcputype] of set of tcpuflags = (
{ cpu_none } [],
@ -183,10 +187,22 @@ type
{ cpu_Pentium4 } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2],
{ cpu_PentiumM } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2],
{ cpu_core_i } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT],
{ cpu_core_avx } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT],
{ cpu_core_avx2 } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT,CPUX86_HAS_BMI1,CPUX86_HAS_BMI2,CPUX86_HAS_LZCNT,CPUX86_HAS_MOVBE,CPUX86_HAS_FMA]
{ cpu_core_avx } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT],
{ cpu_core_avx2 } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT,CPUX86_HAS_BMI1,CPUX86_HAS_BMI2,CPUX86_HAS_LZCNT,CPUX86_HAS_MOVBE,CPUX86_HAS_FMA]
);
fpu_capabilities : array[tfputype] of set of tfpuflags = (
{ fpu_none } [],
{ fpu_x87 } [],
{ fpu_sse } [],
{ fpu_sse2 } [],
{ fpu_sse3 } [],
{ fpu_ssse3 } [],
{ fpu_sse41 } [],
{ fpu_sse42 } [],
{ fpu_avx } [FPUX86_HAS_AVXUNIT],
{ fpu_avx2 } [FPUX86_HAS_AVXUNIT]
);
Implementation

View File

@ -204,7 +204,7 @@ unit cgx86;
function UseAVX: boolean;
begin
Result:=(current_settings.fputype in fpu_avx_instructionsets) {$ifndef i8086}or (CPUX86_HAS_AVXUNIT in cpu_capabilities[current_settings.cputype]){$endif i8086};
Result:={$ifdef i8086}false{$else i8086}(FPUX86_HAS_AVXUNIT in fpu_capabilities[current_settings.fputype]){$endif i8086};
end;
@ -2882,7 +2882,7 @@ unit cgx86;
{$ifndef i8086}
{ avx helps only to reduce size, using it in general does at least not help on
an i7-4770 (FK) }
if (CPUX86_HAS_AVXUNIT in cpu_capabilities[current_settings.cputype]) and
if (FPUX86_HAS_AVXUNIT in fpu_capabilities[current_settings.fputype]) and
// (cs_opt_size in current_settings.optimizerswitches) and
({$ifdef i386}(len=8) or{$endif i386}(len=16) or (len=24) or (len=32) { or (len=40) or (len=48)}) then
cm:=copy_avx

View File

@ -143,6 +143,22 @@ uses
RS_XMM13 = $0d;
RS_XMM14 = $0e;
RS_XMM15 = $0f;
RS_XMM16 = $10;
RS_XMM17 = $11;
RS_XMM18 = $12;
RS_XMM19 = $13;
RS_XMM20 = $14;
RS_XMM21 = $15;
RS_XMM22 = $16;
RS_XMM23 = $17;
RS_XMM24 = $18;
RS_XMM25 = $19;
RS_XMM26 = $1a;
RS_XMM27 = $1b;
RS_XMM28 = $1c;
RS_XMM29 = $1d;
RS_XMM30 = $1e;
RS_XMM31 = $1f;
{$if defined(x86_64)}
RS_RFLAGS = $06;

View File

@ -60,6 +60,7 @@ unit cgcpu;
uses
globtype,globals,verbose,systems,cutils,cclasses,
cpuinfo,
symtable,paramgr,cpupi,
rgcpu,ncgutil;
@ -86,8 +87,13 @@ unit cgcpu;
rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_RAX,RS_RDX,RS_RCX,RS_RSI,RS_RDI,RS_R8,
RS_R9,RS_R10,RS_R11,RS_RBX,RS_R12,RS_R13,RS_R14,RS_R15],first_int_imreg,[]);
rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBWHOLE,[RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7,
RS_XMM8,RS_XMM9,RS_XMM10,RS_XMM11,RS_XMM12,RS_XMM13,RS_XMM14,RS_XMM15],first_mm_imreg,[]);
if FPUX86_HAS_32MMREGS in fpu_capabilities[current_settings.fputype] then
rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBWHOLE,[RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7,
RS_XMM8,RS_XMM9,RS_XMM10,RS_XMM11,RS_XMM12,RS_XMM13,RS_XMM14,RS_XMM15,RS_XMM16,RS_XMM17,RS_XMM18,RS_XMM19,RS_XMM20,
RS_XMM21,RS_XMM22,RS_XMM23,RS_XMM24,RS_XMM25,RS_XMM26,RS_XMM27,RS_XMM28,RS_XMM29,RS_XMM30,RS_XMM31],first_mm_imreg,[])
else
rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBWHOLE,[RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7,
RS_XMM8,RS_XMM9,RS_XMM10,RS_XMM11,RS_XMM12,RS_XMM13,RS_XMM14,RS_XMM15],first_mm_imreg,[]);
rgfpu:=Trgx86fpu.create;
end;

View File

@ -62,7 +62,8 @@ Type
fpu_sse41,
fpu_sse42,
fpu_avx,
fpu_avx2
fpu_avx2,
fpu_avx512f
);
tcontrollertype =
@ -119,7 +120,7 @@ Const
'COREAVX2'
);
fputypestr : array[tfputype] of string[6] = ('',
fputypestr : array[tfputype] of string[7] = ('',
// 'SOFT',
'SSE64',
'SSE3',
@ -127,10 +128,11 @@ Const
'SSE41',
'SSE42',
'AVX',
'AVX2'
'AVX2',
'AVX512F'
);
fputypestrllvm : array[tfputype] of string[6] = ('',
fputypestrllvm : array[tfputype] of string[7] = ('',
// 'SOFT',
'',
'sse3',
@ -138,13 +140,14 @@ Const
'sse4.1',
'sse4.2',
'avx',
'avx2'
'avx2',
'avx512f'
);
sse_singlescalar = [fpu_sse64..fpu_avx2];
sse_doublescalar = [fpu_sse64..fpu_avx2];
sse_singlescalar = [fpu_sse64..fpu_avx512f];
sse_doublescalar = [fpu_sse64..fpu_avx512f];
fpu_avx_instructionsets = [fpu_avx,fpu_avx2];
fpu_avx_instructionsets = [fpu_avx,fpu_avx2,fpu_avx512f];
{ Supported optimizations, only used for information }
supported_optimizerswitches = genericlevel1optimizerswitches+
@ -169,20 +172,36 @@ type
CPUX86_HAS_BMI1,
CPUX86_HAS_BMI2,
CPUX86_HAS_POPCNT,
CPUX86_HAS_AVXUNIT,
CPUX86_HAS_LZCNT,
CPUX86_HAS_MOVBE,
CPUX86_HAS_FMA,
CPUX86_HAS_FMA4
);
tfpuflags =
(FPUX86_HAS_AVXUNIT,
FPUX86_HAS_32MMREGS
);
const
cpu_capabilities : array[tcputype] of set of tcpuflags = (
{ cpu_none } [],
{ Athlon64 } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2],
{ cpu_core_i } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT],
{ cpu_core_avx } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT],
{ cpu_core_avx2 } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT,CPUX86_HAS_BMI1,CPUX86_HAS_BMI2,CPUX86_HAS_LZCNT,CPUX86_HAS_MOVBE,CPUX86_HAS_FMA]
{ cpu_core_avx } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT],
{ cpu_core_avx2 } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_SSE2,CPUX86_HAS_POPCNT,CPUX86_HAS_BMI1,CPUX86_HAS_BMI2,CPUX86_HAS_LZCNT,CPUX86_HAS_MOVBE,CPUX86_HAS_FMA]
);
fpu_capabilities : array[tfputype] of set of tfpuflags = (
{ fpu_none } [],
{ fpu_sse64 } [],
{ fpu_sse3 } [],
{ fpu_ssse3 } [],
{ fpu_sse41 } [],
{ fpu_sse42 } [],
{ fpu_avx } [FPUX86_HAS_AVXUNIT],
{ fpu_avx2 } [FPUX86_HAS_AVXUNIT],
{ fpu_avx512 } [FPUX86_HAS_AVXUNIT,FPUX86_HAS_32MMREGS]
);
Implementation