diff --git a/compiler/i386/i386tab.inc b/compiler/i386/i386tab.inc
index 4c4d6af73d..bf39ba67f8 100644
--- a/compiler/i386/i386tab.inc
+++ b/compiler/i386/i386tab.inc
@@ -10542,6 +10542,13 @@
code : #220#232#233#234#248#1#230#72;
flags : [if_avx512,if_bcst8,if_tfv]
),
+ (
+ opcode : A_VCVTPD2DQ;
+ ops : 2;
+ optypes : (ot_ymmreg_mz,ot_zmmrm,ot_none,ot_none);
+ code : #220#232#233#234#248#1#230#72;
+ flags : [if_avx512,if_tfv]
+ ),
(
opcode : A_VCVTPD2PS;
ops : 2;
@@ -21332,7 +21339,7 @@
(
opcode : A_VCVTPD2UDQ;
ops : 2;
- optypes : (ot_ymmreg_mz,ot_zmmrm,ot_none,ot_none);
+ optypes : (ot_ymmreg_mz,ot_zmmrm_er,ot_none,ot_none);
code : #232#233#234#248#1#121#72;
flags : [if_avx512,if_tfv]
),
@@ -21343,13 +21350,6 @@
code : #232#233#234#248#1#121#72;
flags : [if_avx512,if_bcst8,if_tfv]
),
- (
- opcode : A_VCVTPD2UDQ;
- ops : 2;
- optypes : (ot_ymmreg_mz,ot_zmmreg_er,ot_none,ot_none);
- code : #232#233#234#248#1#121#72;
- flags : [if_avx512]
- ),
(
opcode : A_VCVTPD2UQQ;
ops : 2;
diff --git a/compiler/i8086/i8086tab.inc b/compiler/i8086/i8086tab.inc
index 18b9d8fa35..baf6209d77 100644
--- a/compiler/i8086/i8086tab.inc
+++ b/compiler/i8086/i8086tab.inc
@@ -10570,6 +10570,13 @@
code : #220#232#233#234#248#1#230#72;
flags : [if_avx512,if_bcst8,if_tfv]
),
+ (
+ opcode : A_VCVTPD2DQ;
+ ops : 2;
+ optypes : (ot_ymmreg_mz,ot_zmmrm,ot_none,ot_none);
+ code : #220#232#233#234#248#1#230#72;
+ flags : [if_avx512,if_tfv]
+ ),
(
opcode : A_VCVTPD2PS;
ops : 2;
@@ -21556,7 +21563,7 @@
(
opcode : A_VCVTPD2UDQ;
ops : 2;
- optypes : (ot_ymmreg_mz,ot_zmmrm,ot_none,ot_none);
+ optypes : (ot_ymmreg_mz,ot_zmmrm_er,ot_none,ot_none);
code : #232#233#234#248#1#121#72;
flags : [if_avx512,if_tfv]
),
@@ -21567,13 +21574,6 @@
code : #232#233#234#248#1#121#72;
flags : [if_avx512,if_bcst8,if_tfv]
),
- (
- opcode : A_VCVTPD2UDQ;
- ops : 2;
- optypes : (ot_ymmreg_mz,ot_zmmreg_er,ot_none,ot_none);
- code : #232#233#234#248#1#121#72;
- flags : [if_avx512]
- ),
(
opcode : A_VCVTPD2UQQ;
ops : 2;
diff --git a/compiler/ppcx64.lpi b/compiler/ppcx64.lpi
index 6fcb6c71e8..5819327b82 100644
--- a/compiler/ppcx64.lpi
+++ b/compiler/ppcx64.lpi
@@ -22,10 +22,18 @@
-
-
+
+
+
+
+
+
+
+
+
+
diff --git a/compiler/x86/aasmcpu.pas b/compiler/x86/aasmcpu.pas
index 1f26e1ab6b..cb72767b93 100644
--- a/compiler/x86/aasmcpu.pas
+++ b/compiler/x86/aasmcpu.pas
@@ -376,6 +376,9 @@ interface
ExistsSSEAVX : boolean;
ConstSize : TConstSizeInfo;
BCSTTypes : Set of TMemRefSizeInfoBCSTType;
+ RegXMMSizeMask : int64;
+ RegYMMSizeMask : int64;
+ RegZMMSizeMask : int64;
end;
@@ -5385,6 +5388,7 @@ implementation
inc(insentry);
end;
+
if InsTabMemRefSizeInfoCache^[AsmOp].ExistsSSEAVX then
begin
case RegBCSTSizeMask of
@@ -5536,6 +5540,11 @@ implementation
begin
InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize := msiNoMemRef;
end;
+
+ InsTabMemRefSizeInfoCache^[AsmOp].RegXMMSizeMask:=RegXMMSizeMask;
+ InsTabMemRefSizeInfoCache^[AsmOp].RegYMMSizeMask:=RegYMMSizeMask;
+ InsTabMemRefSizeInfoCache^[AsmOp].RegZMMSizeMask:=RegZMMSizeMask;
+
end;
end;
diff --git a/compiler/x86/rax86.pas b/compiler/x86/rax86.pas
index 6024c35586..9a680e7403 100644
--- a/compiler/x86/rax86.pas
+++ b/compiler/x86/rax86.pas
@@ -456,6 +456,21 @@ var
mmregs: Set of TSubregister;
multiplicator: integer;
bcst1,bcst2: string;
+
+ function ScanLowestActiveBit(aValue: int64): int64;
+ var
+ i: integer;
+ begin
+ result := 0;
+
+ for i := 0 to 63 do
+ if aValue and (1 shl i) <> 0 then
+ begin
+ result := 1 shl i;
+ break;
+ end;
+ end;
+
begin
ExistsMemRefNoSize := false;
ExistsMemRef := false;
@@ -726,13 +741,57 @@ begin
msiYMem64,
msiZMem64: ; // ignore; gather/scatter opcodes haven a fixed element-size, not a fixed memory-size
// the vector-register have indices with base of the memory-address in the memory-operand
- msiMultipleMinSize8: memrefsize := 8;
- msiMultipleMinSize16: memrefsize := 16;
- msiMultipleMinSize32: memrefsize := 32;
- msiMultipleMinSize64: memrefsize := 64;
- msiMultipleMinSize128: memrefsize := 128;
- msiMultipleMinSize256: memrefsize := 256;
- msiMultipleMinSize512: memrefsize := 512;
+ // msiMultipleMinSize8: memrefsize := 8;
+ // msiMultipleMinSize16: memrefsize := 16;
+ // msiMultipleMinSize32: memrefsize := 32;
+ // msiMultipleMinSize64: memrefsize := 64;
+ //msiMultipleMinSize128: memrefsize := 128;
+ //msiMultipleMinSize256: memrefsize := 256;
+ //msiMultipleMinSize512: memrefsize := 512;
+ msiMultipleMinSize8,
+ msiMultipleMinSize16,
+ msiMultipleMinSize32,
+ msiMultipleMinSize64,
+ msiMultipleMinSize128,
+ msiMultipleMinSize256,
+ msiMultipleMinSize512:
+ begin
+ for j := 1 to ops do
+ begin
+ if operands[j].Opr.Typ = OPR_REGISTER then
+ begin
+ case getsubreg(operands[j].opr.reg) of
+ R_SUBMMX: begin
+ memrefsize := ScanLowestActiveBit(MemRefInfo(opcode).RegXMMSizeMask);
+ break;
+ end;
+ R_SUBMMY: begin
+ memrefsize := ScanLowestActiveBit(MemRefInfo(opcode).RegYMMSizeMask);
+ break;
+ end;
+ R_SUBMMZ: begin
+ memrefsize := ScanLowestActiveBit(MemRefInfo(opcode).RegZMMSizeMask);
+ break;
+ end;
+ else;
+ end;
+ end;
+ end;
+
+ if memrefsize = -1 then
+ begin
+ case MemRefInfo(opcode).MemRefSize of
+ msiMultipleMinSize8: memrefsize := 8;
+ msiMultipleMinSize16: memrefsize := 16;
+ msiMultipleMinSize32: memrefsize := 32;
+ msiMultipleMinSize64: memrefsize := 64;
+ msiMultipleMinSize128: memrefsize := 128;
+ msiMultipleMinSize256: memrefsize := 256;
+ msiMultipleMinSize512: memrefsize := 512;
+ else;
+ end;
+ end;
+ end;
msiNoSize,
msiNoMemRef,
msiUnknown,
diff --git a/compiler/x86/x86ins.dat b/compiler/x86/x86ins.dat
index 05f77170ce..4670ca5f17 100644
--- a/compiler/x86/x86ins.dat
+++ b/compiler/x86/x86ins.dat
@@ -4179,6 +4179,7 @@ xmmreg_mz,bmem64 \334\350\352\364\370\1\xE6\110
ymmreg_mz,zmmrm_er \334\350\351\352\370\1\xE6\110 AVX512,TFV
ymmreg_mz,bmem64 \334\350\351\352\370\1\xE6\110 AVX512,BCST8,TFV
;ymmreg_mz,zmmreg_er \334\350\351\352\370\1\xE6\110 AVX512
+ymmreg_mz,zmmrm \334\350\351\352\370\1\xE6\110 AVX512,TFV
; VCVTPD2PS xmmreg_mz,mem256 must come first - map MemRefSize 256bits correct
; map all other MemrefSize (without broasdcast MemRef) to xmmreg, xmmrm
diff --git a/compiler/x86_64/x8664tab.inc b/compiler/x86_64/x8664tab.inc
index 59b8f5e22b..c705661a67 100644
--- a/compiler/x86_64/x8664tab.inc
+++ b/compiler/x86_64/x8664tab.inc
@@ -10843,6 +10843,13 @@
code : #220#232#233#234#248#1#230#72;
flags : [if_avx512,if_bcst8,if_tfv]
),
+ (
+ opcode : A_VCVTPD2DQ;
+ ops : 2;
+ optypes : (ot_ymmreg_mz,ot_zmmrm,ot_none,ot_none);
+ code : #220#232#233#234#248#1#230#72;
+ flags : [if_avx512,if_tfv]
+ ),
(
opcode : A_VCVTPD2PS;
ops : 2;
@@ -21766,7 +21773,7 @@
(
opcode : A_VCVTPD2UDQ;
ops : 2;
- optypes : (ot_ymmreg_mz,ot_zmmrm,ot_none,ot_none);
+ optypes : (ot_ymmreg_mz,ot_zmmrm_er,ot_none,ot_none);
code : #232#233#234#248#1#121#72;
flags : [if_avx512,if_tfv]
),
@@ -21777,13 +21784,6 @@
code : #232#233#234#248#1#121#72;
flags : [if_avx512,if_bcst8,if_tfv]
),
- (
- opcode : A_VCVTPD2UDQ;
- ops : 2;
- optypes : (ot_ymmreg_mz,ot_zmmreg_er,ot_none,ot_none);
- code : #232#233#234#248#1#121#72;
- flags : [if_avx512]
- ),
(
opcode : A_VCVTPD2UQQ;
ops : 2;