diff --git a/compiler/x86/aasmcpu.pas b/compiler/x86/aasmcpu.pas index 3900b97b5c..a602526b29 100644 --- a/compiler/x86/aasmcpu.pas +++ b/compiler/x86/aasmcpu.pas @@ -65,6 +65,7 @@ interface OT_VECTOR_EXT = OT_VECTORMASK or OT_VECTORZERO or OT_VECTORBCST or OT_VECTORSAE or OT_VECTORER; + OT_BITSB16 = OT_BITS16 or OT_VECTORBCST; OT_BITSB32 = OT_BITS32 or OT_VECTORBCST; OT_BITSB64 = OT_BITS64 or OT_VECTORBCST; @@ -235,6 +236,7 @@ interface OT_MEM8 = OT_MEMORY or OT_BITS8; OT_MEM16 = OT_MEMORY or OT_BITS16; OT_MEM16_M = OT_MEM16 or OT_VECTORMASK; + OT_BMEM16 = OT_MEMORY or OT_BITS16 or OT_VECTORBCST; OT_MEM32 = OT_MEMORY or OT_BITS32; OT_MEM32_M = OT_MEMORY or OT_BITS32 or OT_VECTORMASK; OT_BMEM32 = OT_MEMORY or OT_BITS32 or OT_VECTORBCST; @@ -278,13 +280,14 @@ interface OTVE_VECTOR_BCST4 = 1 shl 4; OTVE_VECTOR_BCST8 = 1 shl 5; OTVE_VECTOR_BCST16 = 3 shl 4; + OTVE_VECTOR_BCST32 = 1 shl 13; OTVE_VECTOR_RNSAE = OTVE_VECTOR_ER or 0; OTVE_VECTOR_RDSAE = OTVE_VECTOR_ER or 1 shl 6; OTVE_VECTOR_RUSAE = OTVE_VECTOR_ER or 1 shl 7; OTVE_VECTOR_RZSAE = OTVE_VECTOR_ER or 3 shl 6; - OTVE_VECTOR_BCST_MASK = OTVE_VECTOR_BCST2 or OTVE_VECTOR_BCST4 or OTVE_VECTOR_BCST8 or OTVE_VECTOR_BCST16; + OTVE_VECTOR_BCST_MASK = OTVE_VECTOR_BCST2 or OTVE_VECTOR_BCST4 or OTVE_VECTOR_BCST8 or OTVE_VECTOR_BCST16 or OTVE_VECTOR_BCST32; OTVE_VECTOR_ER_MASK = OTVE_VECTOR_RNSAE or OTVE_VECTOR_RDSAE or OTVE_VECTOR_RUSAE or OTVE_VECTOR_RZSAE; OTVE_VECTOR_MASK = OTVE_VECTOR_SAE or OTVE_VECTOR_ER or OTVE_VECTOR_ZERO or OTVE_VECTOR_WRITEMASK or OTVE_VECTOR_BCST; @@ -364,13 +367,13 @@ interface msiMultipleMinSize64, msiMultipleMinSize128, msiMultipleminSize256, msiMultipleMinSize512, msiMemRegSize, msiMemRegx16y32, msiMemRegx16y32z64, msiMemRegx32y64, msiMemRegx32y64z128, msiMemRegx64y128, msiMemRegx64y128z256, msiMemRegx64y256, msiMemRegx64y256z512, - msiMem8, msiMem16, msiMem32, msiBMem32, msiMem64, msiBMem64, msiMem128, msiMem256, msiMem512, + msiMem8, msiMem16, msiBMem16, msiMem32, msiBMem32, msiMem64, msiBMem64, msiMem128, msiMem256, msiMem512, msiXMem32, msiXMem64, msiYMem32, msiYMem64, msiZMem32, msiZMem64, msiVMemMultiple, msiVMemRegSize, msiMemRegConst128,msiMemRegConst256,msiMemRegConst512); - TMemRefSizeInfoBCST = (msbUnknown, msbBCST32, msbBCST64, msbMultiple); - TMemRefSizeInfoBCSTType = (btUnknown, bt1to2, bt1to4, bt1to8, bt1to16); + TMemRefSizeInfoBCST = (msbUnknown, msbBCST16, msbBCST32, msbBCST64, msbMultiple); + TMemRefSizeInfoBCSTType = (btUnknown, bt1to2, bt1to4, bt1to8, bt1to16, bt1to32); TEVEXTupleState = (etsUnknown, etsIsTuple, etsNotTuple); TConstSizeInfo = (csiUnknown, csiMultiple, csiNoSize, csiMem8, csiMem16, csiMem32, csiMem64); @@ -515,6 +518,7 @@ interface IF_BCST4, IF_BCST8, IF_BCST16, + IF_BCST32, IF_T2, { disp8 - tuple - 2 } IF_T4, { disp8 - tuple - 4 } IF_T8, { disp8 - tuple - 8 } @@ -1389,6 +1393,7 @@ implementation OTVE_VECTOR_BCST4: s := s + ' {1to4}'; OTVE_VECTOR_BCST8: s := s + ' {1to8}'; OTVE_VECTOR_BCST16: s := s + ' {1to16}'; + OTVE_VECTOR_BCST32: s := s + ' {1to32}'; end; if vopext and OTVE_VECTOR_ER = OTVE_VECTOR_ER then @@ -1906,12 +1911,12 @@ implementation // e.g. vfpclasspd k1, [RAX] {1to8}, 0 // =>> check flags - - case oper[i]^.vopext and (OTVE_VECTOR_BCST2 or OTVE_VECTOR_BCST4 or OTVE_VECTOR_BCST8 or OTVE_VECTOR_BCST16) of + case oper[i]^.vopext and (OTVE_VECTOR_BCST2 or OTVE_VECTOR_BCST4 or OTVE_VECTOR_BCST8 or OTVE_VECTOR_BCST16 or OTVE_VECTOR_BCST32) of OTVE_VECTOR_BCST2: if not(IF_BCST2 in p^.flags) then exit; OTVE_VECTOR_BCST4: if not(IF_BCST4 in p^.flags) then exit; OTVE_VECTOR_BCST8: if not(IF_BCST8 in p^.flags) then exit; OTVE_VECTOR_BCST16: if not(IF_BCST16 in p^.flags) then exit; + OTVE_VECTOR_BCST32: if not(IF_BCST32 in p^.flags) then exit; else exit; end; end; @@ -2048,7 +2053,12 @@ implementation if IF_TFV in aInsEntry^.Flags then begin for i := 0 to aInsEntry^.ops - 1 do - if (aInsEntry^.optypes[i] and OT_BMEM32 = OT_BMEM32) then + if (aInsEntry^.optypes[i] and OT_BMEM16 = OT_BMEM16) then + begin + tuplesize := 2; + break; + end + else if (aInsEntry^.optypes[i] and OT_BMEM32 = OT_BMEM32) then begin tuplesize := 4; break; @@ -2108,7 +2118,11 @@ implementation begin if aInsEntry^.optypes[i] and (OT_REGNORM or OT_MEMORY) = OT_REGMEM then begin - case aInsEntry^.optypes[i] and (OT_BITS32 or OT_BITS64) of + case aInsEntry^.optypes[i] and (OT_BITS16 or OT_BITS32 or OT_BITS64) of + OT_BITS16: begin + memsize := 16; + break; + end; OT_BITS32: begin memsize := 32; break; @@ -2208,14 +2222,14 @@ implementation if aInput.typ = top_ref then begin if aInput.ref^.base <> NR_NO then - begin + begin if (aInput.ref^.offset <> 0) and ((aInput.ref^.offset mod tuplesize) = 0) and (abs(aInput.ref^.offset) div tuplesize <= 127) then begin aInput.ref^.offset := aInput.ref^.offset div tuplesize; EVEXTupleState := etsIsTuple; - end; + end; end; end; end; @@ -5301,6 +5315,7 @@ implementation 0: MRefInfo := msiNoSize; OT_BITS8: MRefInfo := msiMem8; OT_BITS16: MRefInfo := msiMem16; + OT_BITSB16: MRefInfo := msiBMem16; OT_BITS32: MRefInfo := msiMem32; OT_BITSB32: MRefInfo := msiBMem32; OT_BITS64: MRefInfo := msiMem64; @@ -5328,7 +5343,7 @@ implementation else begin // ignore broadcast-memory - if not(MRefInfo in [msiBMem32, msiBMem64]) then + if not(MRefInfo in [msiBMem16, msiBMem32, msiBMem64]) then begin if InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize <> MRefInfo then begin @@ -5350,12 +5365,13 @@ implementation //if not(MRefInfo in [msiBMem32, msiBMem64]) and (actRegCount > 0) then if actRegCount > 0 then begin - if MRefInfo in [msiBMem32, msiBMem64] then + if MRefInfo in [msiBMem16, msiBMem32, msiBMem64] then begin if IF_BCST2 in insentry^.flags then InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes := InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes + [bt1to2]; if IF_BCST4 in insentry^.flags then InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes := InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes + [bt1to4]; if IF_BCST8 in insentry^.flags then InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes := InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes + [bt1to8]; if IF_BCST16 in insentry^.flags then InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes := InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes + [bt1to16]; + if IF_BCST32 in insentry^.flags then InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes := InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes + [bt1to32]; //InsTabMemRefSizeInfoCache^[AsmOp].BCSTTypes @@ -5412,6 +5428,10 @@ implementation begin case RegBCSTSizeMask of 0: ; // ignore; + OT_BITSB16: begin + InsTabMemRefSizeInfoCache^[AsmOp].MemRefSizeBCST := msbBCST16; + InsTabMemRefSizeInfoCache^[AsmOp].BCSTXMMMultiplicator := 8; + end; OT_BITSB32: begin InsTabMemRefSizeInfoCache^[AsmOp].MemRefSizeBCST := msbBCST32; InsTabMemRefSizeInfoCache^[AsmOp].BCSTXMMMultiplicator := 4; @@ -5572,13 +5592,13 @@ implementation if (AsmOp <> A_CVTSI2SD) and (AsmOp <> A_CVTSI2SS) then - begin + begin inc(iCntOpcodeValError); Str(gas_needsuffix[AsmOp],hs1); Str(InsTabMemRefSizeInfoCache^[AsmOp].MemRefSize,hs2); Message3(asmr_e_not_supported_combination_attsuffix_memrefsize_type, std_op2str[AsmOp],hs1,hs2); - end; + end; end; end; end; diff --git a/compiler/x86/agx86att.pas b/compiler/x86/agx86att.pas index d71e97a561..d238f44f8c 100644 --- a/compiler/x86/agx86att.pas +++ b/compiler/x86/agx86att.pas @@ -289,11 +289,12 @@ interface if o.vopext and OTVE_VECTOR_BCST = OTVE_VECTOR_BCST then begin - case o.vopext and (OTVE_VECTOR_BCST2 or OTVE_VECTOR_BCST4 or OTVE_VECTOR_BCST8 or OTVE_VECTOR_BCST16) of + case o.vopext and OTVE_VECTOR_BCST_MASK of OTVE_VECTOR_BCST2: owner.writer.AsmWrite('{1to2}'); OTVE_VECTOR_BCST4: owner.writer.AsmWrite('{1to4}'); OTVE_VECTOR_BCST8: owner.writer.AsmWrite('{1to8}'); OTVE_VECTOR_BCST16: owner.writer.AsmWrite('{1to16}'); + OTVE_VECTOR_BCST32: owner.writer.AsmWrite('{1to32}'); else ; //TG TODO errormsg end; end; diff --git a/compiler/x86/rax86.pas b/compiler/x86/rax86.pas index 988ba2b1e2..0a6156d0f2 100644 --- a/compiler/x86/rax86.pas +++ b/compiler/x86/rax86.pas @@ -534,6 +534,7 @@ begin if ExistsBCST then begin case MemRefInfo(opcode).MemRefSizeBCST of + msbBCST16: memrefsize := 16; msbBCST32: memrefsize := 32; msbBCST64: memrefsize := 64; else @@ -907,6 +908,10 @@ begin if ExistsBCST then begin case MemRefInfo(opcode).MemRefSizeBCST of + msbBCST16: begin + tx86operand(operands[i]).opsize := S_W; + tx86operand(operands[i]).size := OS_16; + end; msbBCST32: begin tx86operand(operands[i]).opsize := S_L; tx86operand(operands[i]).size := OS_32; @@ -1484,6 +1489,8 @@ begin Message2(asmr_e_mismatch_broadcasting_elements, '1to' + bcst1, '1to' + bcst2); 16: if not(bt1to16 in MemRefInfo(opcode).BCSTTypes) then Message2(asmr_e_mismatch_broadcasting_elements, '1to' + bcst1, '1to' + bcst2); + 32: if not(bt1to32 in MemRefInfo(opcode).BCSTTypes) then + Message2(asmr_e_mismatch_broadcasting_elements, '1to' + bcst1, '1to' + bcst2); end; end else if MemRefInfo(opcode).BCSTXMMMultiplicator * multiplicator <> vbcst then @@ -1549,7 +1556,7 @@ procedure Tx86Instruction.SetInstructionOpsize; end; end; - result := true; + result := true; end else if MemRefSize in MemRefMultiples - [msiVMemMultiple] then begin @@ -2265,6 +2272,7 @@ begin OTVE_VECTOR_BCST4: s := s + ' {1to4}'; OTVE_VECTOR_BCST8: s := s + ' {1to8}'; OTVE_VECTOR_BCST16: s := s + ' {1to16}'; + OTVE_VECTOR_BCST32: s := s + ' {1to32}'; end; if vopext and OTVE_VECTOR_ER = OTVE_VECTOR_ER then diff --git a/compiler/x86/rax86int.pas b/compiler/x86/rax86int.pas index b55acbc462..9072ffebd4 100644 --- a/compiler/x86/rax86int.pas +++ b/compiler/x86/rax86int.pas @@ -40,7 +40,7 @@ Unit Rax86int; AS_COMMA,AS_LBRACKET,AS_RBRACKET,AS_LPAREN, AS_RPAREN,AS_COLON,AS_DOT,AS_PLUS,AS_MINUS,AS_STAR, AS_SEPARATOR,AS_ID,AS_REGISTER,AS_OPCODE,AS_SLASH, - AS_LOPMASK,AS_VOPMASK,AS_LOPZEROMASK,AS_VOPZEROMASK,AS_LOPBCST,AS_OPBCST1TO2,AS_OPBCST1TO4,AS_OPBCST1TO8,AS_OPBCST1TO16,AS_LOPSAE,AS_OPSAE, + AS_LOPMASK,AS_VOPMASK,AS_LOPZEROMASK,AS_VOPZEROMASK,AS_LOPBCST,AS_OPBCST1TO2,AS_OPBCST1TO4,AS_OPBCST1TO8,AS_OPBCST1TO16,AS_OPBCST1TO32,AS_LOPSAE,AS_OPSAE, AS_LOPER,AS_OPRNSAE,AS_OPRDSAE,AS_OPRUSAE,AS_OPRZSAE, {------------------ Assembler directives --------------------} AS_ALIGN,AS_DB,AS_DW,AS_DD,AS_DQ,AS_PUBLIC,AS_END, @@ -167,7 +167,7 @@ Unit Rax86int; ',','[',']','(', ')',':','.','+','-','*', ';','identifier','register','opcode','/', - '','','','','','','','','','','', + '','','','','','','','','','','','', '','','','','', '','','','','','','END', '','','','','','','','','','','','','','', @@ -506,6 +506,7 @@ Unit Rax86int; else if (actasmpattern = '1TO4') then actasmtoken := AS_OPBCST1TO4 else if (actasmpattern = '1TO8') then actasmtoken := AS_OPBCST1TO8 else if (actasmpattern = '1TO16') then actasmtoken := AS_OPBCST1TO16 + else if (actasmpattern = '1TO32') then actasmtoken := AS_OPBCST1TO32 else actasmpattern := actasmpattern_origcase; end; AS_LOPSAE: @@ -816,7 +817,7 @@ Unit Rax86int; actasmpattern:=c; c:=current_scanner.asmgetchar; { Get the possible characters } - while c in ['1','2','4','6','8','t','T','o','O'] do + while c in ['1','2','3','4','6','8','t','T','o','O'] do begin actasmpattern:=actasmpattern + c; c:=current_scanner.asmgetchar; @@ -834,6 +835,7 @@ Unit Rax86int; else if (actasmpattern = '1TO4') then actasmtoken := AS_OPBCST1TO4 else if (actasmpattern = '1TO8') then actasmtoken := AS_OPBCST1TO8 else if (actasmpattern = '1TO16') then actasmtoken := AS_OPBCST1TO16 + else if (actasmpattern = '1TO32') then actasmtoken := AS_OPBCST1TO32 else actasmpattern := actasmpattern_origcase; c:=current_scanner.asmgetchar; end @@ -990,7 +992,7 @@ Unit Rax86int; kreg: tregister; begin Consume(actasmtoken, true); - if actasmtoken in [AS_VOPMASK, AS_VOPZEROMASK, AS_OPBCST1TO2, AS_OPBCST1TO4, AS_OPBCST1TO8, AS_OPBCST1TO16, + if actasmtoken in [AS_VOPMASK, AS_VOPZEROMASK, AS_OPBCST1TO2, AS_OPBCST1TO4, AS_OPBCST1TO8, AS_OPBCST1TO16, AS_OPBCST1TO32, AS_OPSAE,AS_OPRNSAE,AS_OPRDSAE,AS_OPRUSAE,AS_OPRZSAE] then begin case actasmtoken of @@ -1020,6 +1022,10 @@ Unit Rax86int; aop.vopext := aop.vopext or OTVE_VECTOR_BCST or OTVE_VECTOR_BCST16; aop.vbcst := 16; end; + AS_OPBCST1TO32: begin + aop.vopext := aop.vopext or OTVE_VECTOR_BCST or OTVE_VECTOR_BCST32; + aop.vbcst := 32; + end; AS_OPSAE: aop.vopext := aop.vopext or OTVE_VECTOR_SAE; AS_OPRNSAE: aop.vopext := aop.vopext or OTVE_VECTOR_RNSAE; AS_OPRDSAE: aop.vopext := aop.vopext or OTVE_VECTOR_RDSAE; @@ -2674,9 +2680,9 @@ Unit Rax86int; AS_QWORD : oper.typesize:=8; AS_DQWORD : oper.typesize:=16; AS_TBYTE : oper.typesize:=10; - AS_OWORD, - AS_XMMWORD: oper.typesize:=16; - AS_YWORD, + AS_OWORD, + AS_XMMWORD: oper.typesize:=16; + AS_YWORD, AS_YMMWORD: oper.typesize:=32; AS_ZWORD, AS_ZMMWORD: oper.typesize:=64;