diff --git a/compiler/i386/i386tab.inc b/compiler/i386/i386tab.inc index 4c4d6af73d..bf39ba67f8 100644 --- a/compiler/i386/i386tab.inc +++ b/compiler/i386/i386tab.inc @@ -10542,6 +10542,13 @@ code : #220#232#233#234#248#1#230#72; flags : [if_avx512,if_bcst8,if_tfv] ), + ( + opcode : A_VCVTPD2DQ; + ops : 2; + optypes : (ot_ymmreg_mz,ot_zmmrm,ot_none,ot_none); + code : #220#232#233#234#248#1#230#72; + flags : [if_avx512,if_tfv] + ), ( opcode : A_VCVTPD2PS; ops : 2; @@ -21332,7 +21339,7 @@ ( opcode : A_VCVTPD2UDQ; ops : 2; - optypes : (ot_ymmreg_mz,ot_zmmrm,ot_none,ot_none); + optypes : (ot_ymmreg_mz,ot_zmmrm_er,ot_none,ot_none); code : #232#233#234#248#1#121#72; flags : [if_avx512,if_tfv] ), @@ -21343,13 +21350,6 @@ code : #232#233#234#248#1#121#72; flags : [if_avx512,if_bcst8,if_tfv] ), - ( - opcode : A_VCVTPD2UDQ; - ops : 2; - optypes : (ot_ymmreg_mz,ot_zmmreg_er,ot_none,ot_none); - code : #232#233#234#248#1#121#72; - flags : [if_avx512] - ), ( opcode : A_VCVTPD2UQQ; ops : 2; diff --git a/compiler/i8086/i8086tab.inc b/compiler/i8086/i8086tab.inc index 18b9d8fa35..baf6209d77 100644 --- a/compiler/i8086/i8086tab.inc +++ b/compiler/i8086/i8086tab.inc @@ -10570,6 +10570,13 @@ code : #220#232#233#234#248#1#230#72; flags : [if_avx512,if_bcst8,if_tfv] ), + ( + opcode : A_VCVTPD2DQ; + ops : 2; + optypes : (ot_ymmreg_mz,ot_zmmrm,ot_none,ot_none); + code : #220#232#233#234#248#1#230#72; + flags : [if_avx512,if_tfv] + ), ( opcode : A_VCVTPD2PS; ops : 2; @@ -21556,7 +21563,7 @@ ( opcode : A_VCVTPD2UDQ; ops : 2; - optypes : (ot_ymmreg_mz,ot_zmmrm,ot_none,ot_none); + optypes : (ot_ymmreg_mz,ot_zmmrm_er,ot_none,ot_none); code : #232#233#234#248#1#121#72; flags : [if_avx512,if_tfv] ), @@ -21567,13 +21574,6 @@ code : #232#233#234#248#1#121#72; flags : [if_avx512,if_bcst8,if_tfv] ), - ( - opcode : A_VCVTPD2UDQ; - ops : 2; - optypes : (ot_ymmreg_mz,ot_zmmreg_er,ot_none,ot_none); - code : #232#233#234#248#1#121#72; - flags : [if_avx512] - ), ( opcode : A_VCVTPD2UQQ; ops : 2; diff --git a/compiler/ppcx64.lpi b/compiler/ppcx64.lpi index 6fcb6c71e8..5819327b82 100644 --- a/compiler/ppcx64.lpi +++ b/compiler/ppcx64.lpi @@ -22,10 +22,18 @@ - - + + + + + + + + + + diff --git a/compiler/x86/aasmcpu.pas b/compiler/x86/aasmcpu.pas index 1f26e1ab6b..cb72767b93 100644 --- a/compiler/x86/aasmcpu.pas +++ b/compiler/x86/aasmcpu.pas @@ -376,6 +376,9 @@ interface ExistsSSEAVX : boolean; ConstSize : TConstSizeInfo; BCSTTypes : Set of TMemRefSizeInfoBCSTType; + RegXMMSizeMask : int64; + RegYMMSizeMask : int64; + RegZMMSizeMask : int64; end; @@ -5385,6 +5388,7 @@ implementation inc(insentry); end; + if InsTabMemRefSizeInfoCache^[AsmOp].ExistsSSEAVX then begin case RegBCSTSizeMask of @@ -5536,6 +5540,11 @@ implementation begin InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiNoMemRef; end; + + InsTabMemRefSizeInfoCache^[AsmOp].RegXMMSizeMask:=RegXMMSizeMask; + InsTabMemRefSizeInfoCache^[AsmOp].RegYMMSizeMask:=RegYMMSizeMask; + InsTabMemRefSizeInfoCache^[AsmOp].RegZMMSizeMask:=RegZMMSizeMask; + end; end; diff --git a/compiler/x86/rax86.pas b/compiler/x86/rax86.pas index 6024c35586..9a680e7403 100644 --- a/compiler/x86/rax86.pas +++ b/compiler/x86/rax86.pas @@ -456,6 +456,21 @@ var mmregs: Set of TSubregister; multiplicator: integer; bcst1,bcst2: string; + + function ScanLowestActiveBit(aValue: int64): int64; + var + i: integer; + begin + result := 0; + + for i := 0 to 63 do + if aValue and (1 shl i) <> 0 then + begin + result := 1 shl i; + break; + end; + end; + begin ExistsMemRefNoSize := false; ExistsMemRef := false; @@ -726,13 +741,57 @@ begin msiYMem64, msiZMem64: ; // ignore; gather/scatter opcodes haven a fixed element-size, not a fixed memory-size // the vector-register have indices with base of the memory-address in the memory-operand - msiMultipleMinSize8: memrefsize := 8; - msiMultipleMinSize16: memrefsize := 16; - msiMultipleMinSize32: memrefsize := 32; - msiMultipleMinSize64: memrefsize := 64; - msiMultipleMinSize128: memrefsize := 128; - msiMultipleMinSize256: memrefsize := 256; - msiMultipleMinSize512: memrefsize := 512; + // msiMultipleMinSize8: memrefsize := 8; + // msiMultipleMinSize16: memrefsize := 16; + // msiMultipleMinSize32: memrefsize := 32; + // msiMultipleMinSize64: memrefsize := 64; + //msiMultipleMinSize128: memrefsize := 128; + //msiMultipleMinSize256: memrefsize := 256; + //msiMultipleMinSize512: memrefsize := 512; + msiMultipleMinSize8, + msiMultipleMinSize16, + msiMultipleMinSize32, + msiMultipleMinSize64, + msiMultipleMinSize128, + msiMultipleMinSize256, + msiMultipleMinSize512: + begin + for j := 1 to ops do + begin + if operands[j].Opr.Typ = OPR_REGISTER then + begin + case getsubreg(operands[j].opr.reg) of + R_SUBMMX: begin + memrefsize := ScanLowestActiveBit(MemRefInfo(opcode).RegXMMSizeMask); + break; + end; + R_SUBMMY: begin + memrefsize := ScanLowestActiveBit(MemRefInfo(opcode).RegYMMSizeMask); + break; + end; + R_SUBMMZ: begin + memrefsize := ScanLowestActiveBit(MemRefInfo(opcode).RegZMMSizeMask); + break; + end; + else; + end; + end; + end; + + if memrefsize = -1 then + begin + case MemRefInfo(opcode).MemRefSize of + msiMultipleMinSize8: memrefsize := 8; + msiMultipleMinSize16: memrefsize := 16; + msiMultipleMinSize32: memrefsize := 32; + msiMultipleMinSize64: memrefsize := 64; + msiMultipleMinSize128: memrefsize := 128; + msiMultipleMinSize256: memrefsize := 256; + msiMultipleMinSize512: memrefsize := 512; + else; + end; + end; + end; msiNoSize, msiNoMemRef, msiUnknown, diff --git a/compiler/x86/x86ins.dat b/compiler/x86/x86ins.dat index 05f77170ce..4670ca5f17 100644 --- a/compiler/x86/x86ins.dat +++ b/compiler/x86/x86ins.dat @@ -4179,6 +4179,7 @@ xmmreg_mz,bmem64 \334\350\352\364\370\1\xE6\110 ymmreg_mz,zmmrm_er \334\350\351\352\370\1\xE6\110 AVX512,TFV ymmreg_mz,bmem64 \334\350\351\352\370\1\xE6\110 AVX512,BCST8,TFV ;ymmreg_mz,zmmreg_er \334\350\351\352\370\1\xE6\110 AVX512 +ymmreg_mz,zmmrm \334\350\351\352\370\1\xE6\110 AVX512,TFV ; VCVTPD2PS xmmreg_mz,mem256 must come first - map MemRefSize 256bits correct ; map all other MemrefSize (without broasdcast MemRef) to xmmreg, xmmrm diff --git a/compiler/x86_64/x8664tab.inc b/compiler/x86_64/x8664tab.inc index 59b8f5e22b..c705661a67 100644 --- a/compiler/x86_64/x8664tab.inc +++ b/compiler/x86_64/x8664tab.inc @@ -10843,6 +10843,13 @@ code : #220#232#233#234#248#1#230#72; flags : [if_avx512,if_bcst8,if_tfv] ), + ( + opcode : A_VCVTPD2DQ; + ops : 2; + optypes : (ot_ymmreg_mz,ot_zmmrm,ot_none,ot_none); + code : #220#232#233#234#248#1#230#72; + flags : [if_avx512,if_tfv] + ), ( opcode : A_VCVTPD2PS; ops : 2; @@ -21766,7 +21773,7 @@ ( opcode : A_VCVTPD2UDQ; ops : 2; - optypes : (ot_ymmreg_mz,ot_zmmrm,ot_none,ot_none); + optypes : (ot_ymmreg_mz,ot_zmmrm_er,ot_none,ot_none); code : #232#233#234#248#1#121#72; flags : [if_avx512,if_tfv] ), @@ -21777,13 +21784,6 @@ code : #232#233#234#248#1#121#72; flags : [if_avx512,if_bcst8,if_tfv] ), - ( - opcode : A_VCVTPD2UDQ; - ops : 2; - optypes : (ot_ymmreg_mz,ot_zmmreg_er,ot_none,ot_none); - code : #232#233#234#248#1#121#72; - flags : [if_avx512] - ), ( opcode : A_VCVTPD2UQQ; ops : 2;