avx512 disp8*N

git-svn-id: branches/tg74/avx512@39909 -
This commit is contained in:
tg74 2018-10-09 21:19:52 +00:00
parent 7deacdd036
commit 1ef9cc01e6
10 changed files with 8157 additions and 7854 deletions

View File

@ -1,2 +1,2 @@
{ don't edit, this file is generated from x86ins.dat }
4039;
4041;

File diff suppressed because it is too large Load Diff

View File

@ -1,2 +1,2 @@
{ don't edit, this file is generated from x86ins.dat }
4071;
4073;

File diff suppressed because it is too large Load Diff

View File

@ -368,6 +368,7 @@ interface
TMemRefSizeInfoBCST = (msbUnknown, msbBCST32, msbBCST64, msbMultiple);
TMemRefSizeInfoBCSTType = (btUnknown, bt1to2, bt1to4, bt1to8, bt1to16);
TEVEXTupleState = (etsUnknown, etsIsTuple, etsNotTuple);
TConstSizeInfo = (csiUnkown, csiMultiple, csiNoSize, csiMem8, csiMem16, csiMem32, csiMem64);
TInsTabMemRefSizeInfoRec = record
@ -379,6 +380,8 @@ interface
BCSTTypes : Set of TMemRefSizeInfoBCSTType;
end;
const
MemRefMultiples: set of TMemRefSizeInfo = [msiMultiple, msiMultiple8,
msiMultiple16, msiMultiple32,
@ -495,7 +498,21 @@ interface
IF_BCST2,
IF_BCST4,
IF_BCST8,
IF_BCST16
IF_BCST16,
IF_T2, { disp8 - tuple - 2 }
IF_T4, { disp8 - tuple - 4 }
IF_T8, { disp8 - tuple - 8 }
IF_T1S, { disp8 - tuple - 1 scalar }
IF_T1F32,
IF_T1F64,
IF_TMDDUP,
IF_TFV, { disp8 - tuple - full vector }
IF_TFVM, { disp8 - tuple - full vector memory }
IF_TQVM,
IF_TMEM128,
IF_THV,
IF_THVM,
IF_TOVM
);
tinsflags=set of tinsflag;
@ -504,6 +521,9 @@ interface
IF_ARMASK=[IF_AR0,IF_AR1,IF_AR2]; { mask for unsized argument spec }
IF_PLEVEL=[IF_8086..IF_NEC]; { mask for processor level }
IF_TUPLEMASK=[IF_T2..IF_TOVM]; { mask for AVX512 disp8-tuples }
type
tinsentry=packed record
opcode : tasmop;
@ -602,6 +622,7 @@ interface
insoffset : longint;
LastInsOffset : longint; { need to be public to be reset }
inssize : shortint;
EVEXTupleState: TEVEXTupleState; { AVX512 disp8*N }
{$ifdef x86_64}
rex : byte;
{$endif x86_64}
@ -617,6 +638,7 @@ interface
procedure Swapoperands;
function FindInsentry(objdata:TObjData):boolean;
function CheckUseEVEX: boolean;
procedure CheckEVEXTuple(const aInput:toper; aInsEntry: pInsentry; aIsVector128, aIsVector256, aIsVector512, aIsEVEXW1: boolean);
end;
function is_64_bit_ref(const ref:treference):boolean;
@ -978,6 +1000,7 @@ implementation
LastInsOffset:=-1;
InsOffset:=0;
InsSize:=0;
EVEXTupleState := etsUnknown;
end;
@ -1974,6 +1997,191 @@ implementation
end;
end;
procedure taicpu.CheckEVEXTuple(const aInput:toper; aInsEntry: pInsentry; aIsVector128, aIsVector256, aIsVector512, aIsEVEXW1: boolean);
var
i: integer;
tuplesize: integer;
memsize: integer;
begin
if EVEXTupleState = etsUnknown then
begin
EVEXTupleState := etsNotTuple;
if aInsEntry^.Flags * IF_TUPLEMASK <> [] then
begin
tuplesize := 0;
if IF_TFV in aInsEntry^.Flags then
begin
for i := 0 to aInsEntry^.ops - 1 do
if (aInsEntry^.optypes[i] and OT_BMEM32 = OT_BMEM32) then
begin
tuplesize := 4;
break;
end
else if (aInsEntry^.optypes[i] and OT_BMEM64 = OT_BMEM64) then
begin
tuplesize := 8;
break;
end
else if (aInsEntry^.optypes[i] and OT_MEMORY = OT_MEMORY) then
begin
if aIsVector512 then tuplesize := 64
else if aIsVector256 then tuplesize := 32
else tuplesize := 16;
break;
end
else if (aInsEntry^.optypes[i] and OT_REGNORM = OT_REGMEM) then
begin
if aIsVector512 then tuplesize := 64
else if aIsVector256 then tuplesize := 32
else tuplesize := 16;
break;
end;
end
else if IF_THV in aInsEntry^.Flags then
begin
for i := 0 to aInsEntry^.ops - 1 do
if (aInsEntry^.optypes[i] and OT_BMEM32 = OT_BMEM32) then
begin
tuplesize := 4;
break;
end
else if (aInsEntry^.optypes[i] and OT_REGNORM = OT_REGMEM) then
begin
if aIsVector512 then tuplesize := 32
else if aIsVector256 then tuplesize := 16
else tuplesize := 8;
break;
end
end
else if IF_TFVM in aInsEntry^.Flags then
begin
if aIsVector512 then tuplesize := 64
else if aIsVector256 then tuplesize := 32
else tuplesize := 16;
end
else
begin
memsize := 0;
for i := 0 to aInsEntry^.ops - 1 do
begin
if aInsEntry^.optypes[i] and (OT_REGNORM or OT_MEMORY) = OT_REGMEM then
begin
case aInsEntry^.optypes[i] and (OT_BITS32 or OT_BITS64) of
OT_BITS32: begin
memsize := 32;
break;
end;
OT_BITS64: begin
memsize := 64;
break;
end;
end;
end
else
case aInsEntry^.optypes[i] and (OT_MEM8 or OT_MEM16 or OT_MEM32 or OT_MEM64) of
OT_MEM8: begin
memsize := 8;
break;
end;
OT_MEM16: begin
memsize := 16;
break;
end;
OT_MEM32: begin
memsize := 32;
break;
end;
OT_MEM64: //if aIsEVEXW1 then
begin
memsize := 64;
break;
end;
end;
end;
if IF_T1S in aInsEntry^.Flags then
begin
case memsize of
8: tuplesize := 1;
16: tuplesize := 2;
else if aIsEVEXW1 then tuplesize := 8
else tuplesize := 4;
end;
end
else if IF_T1F32 in aInsEntry^.Flags then tuplesize := 4
else if IF_T1F64 in aInsEntry^.Flags then tuplesize := 8
else if IF_T2 in aInsEntry^.Flags then
begin
case aIsEVEXW1 of
false: tuplesize := 8;
else if aIsVector256 or aIsVector512 then tuplesize := 16;
end;
end
else if IF_T4 in aInsEntry^.Flags then
begin
case aIsEVEXW1 of
false: if aIsVector256 or aIsVector512 then tuplesize := 16;
else if aIsVector512 then tuplesize := 32;
end;
end
else if IF_T8 in aInsEntry^.Flags then
begin
case aIsEVEXW1 of
false: if aIsVector512 then tuplesize := 32;
end;
end
else if IF_THVM in aInsEntry^.Flags then
begin
tuplesize := 8; // default 128bit-vectorlength
if aIsVector256 then tuplesize := 16
else if aIsVector512 then tuplesize := 32;
end
else if IF_TQVM in aInsEntry^.Flags then
begin
tuplesize := 4; // default 128bit-vectorlength
if aIsVector256 then tuplesize := 8
else if aIsVector512 then tuplesize := 16;
end
else if IF_TOVM in aInsEntry^.Flags then
begin
tuplesize := 2; // default 128bit-vectorlength
if aIsVector256 then tuplesize := 4
else if aIsVector512 then tuplesize := 8;
end
else if IF_TMEM128 in aInsEntry^.Flags then tuplesize := 16
else if IF_TMDDUP in aInsEntry^.Flags then
begin
tuplesize := 8; // default 128bit-vectorlength
if aIsVector256 then tuplesize := 32
else if aIsVector512 then tuplesize := 64;
end;
end;;
if tuplesize > 0 then
begin
if aInput.typ = top_ref then
begin
if (aInput.ref^.offset <> 0) and
((aInput.ref^.offset mod tuplesize) = 0) and
(abs(aInput.ref^.offset) div tuplesize <= 127) then
begin
aInput.ref^.offset := aInput.ref^.offset div tuplesize;
EVEXTupleState := etsIsTuple;
end;
end;
end;
end;
end;
end;
function taicpu.Pass1(objdata:TObjData):longint;
@ -2869,6 +3077,7 @@ implementation
len : shortint;
len_ea_data: shortint;
len_ea_data_evex: shortint;
mref_offset: asizeint;
ea_data : ea;
exists_evex: boolean;
exists_vex: boolean;
@ -2876,6 +3085,14 @@ implementation
exists_prefix_66: boolean;
exists_prefix_F2: boolean;
exists_prefix_F3: boolean;
exists_l256: boolean;
exists_l512: boolean;
exists_EVEXW1: boolean;
pmref_operand: poper;
//i: integer;
//refsize: integer;
//tuplesize: integer;
//memsize: integer;
{$ifdef x86_64}
omit_rexw : boolean;
{$endif x86_64}
@ -2890,6 +3107,8 @@ implementation
len:=0;
len_ea_data := 0;
len_ea_data_evex:= 0;
mref_offset := 0;
pmref_operand := nil;
codes:=@p^.code[0];
exists_vex := false;
@ -2898,6 +3117,9 @@ implementation
exists_prefix_F2 := false;
exists_prefix_F3 := false;
exists_evex := false;
exists_l256 := false;
exists_l512 := false;
exists_EVEXW1 := false;
{$ifdef x86_64}
rex:=0;
omit_rexw:=false;
@ -3054,14 +3276,20 @@ implementation
end;
{$endif x86_64}
if process_ea(oper[(c shr 3) and 7]^, ea_data, 0, true) then
len_ea_data_evex := ea_data.size;
if process_ea(oper[(c shr 3) and 7]^, ea_data, 0, false) then
begin
len_ea_data := ea_data.size;
inc(len,ea_data.size);
end
else Message(asmw_e_invalid_effective_address);
if (oper[(c shr 3) and 7]^.typ = top_ref) and
(oper[(c shr 3) and 7]^.ref^.offset <> 0) then
begin
if (exists_vex and exists_evex and CheckUseEVEX) or
(not(exists_vex) and exists_evex) then
begin
CheckEVEXTuple(oper[(c shr 3) and 7]^, p, not(exists_l256 or exists_l512), exists_l256, exists_l512, exists_EVEXW1);
//const aInput:toper; aInsEntry: pInsentry; aIsVector128, aIsVector256, aIsVector512, aIsEVEXW1: boolean);
end;
end;
if process_ea(oper[(c shr 3) and 7]^, ea_data, 0, EVEXTupleState = etsNotTuple) then
inc(len,ea_data.size)
else Message(asmw_e_invalid_effective_address);
{$ifdef x86_64}
rex:=rex or ea_data.rex;
@ -3072,8 +3300,8 @@ implementation
begin
exists_evex := true;
end;
&351: ; // EVEX length bit 512
&352: ; // EVEX W1
&351: exists_l512 := true; // EVEX length bit 512
&352: exists_EVEXW1 := true; // EVEX W1
&362: // VEX prefix for AVX (length = 2 or 3 bytes, dependens on REX.XBW or opcode-prefix ($0F38 or $0F3A))
// =>> DEFAULT = 2 Bytes
begin
@ -3093,7 +3321,7 @@ implementation
exists_vex_extension := true;
end;
end;
&364: ; // VEX length bit 256
&364: exists_l256 := true; // VEX length bit 256
&366, // operand 2 (ymmreg) encoded immediate byte (bit 4-7)
&367: inc(len); // operand 3 (ymmreg) encoded immediate byte (bit 4-7)
@ -3143,9 +3371,6 @@ implementation
if CheckUseEVEX then
begin
inc(len, 4);
if len_ea_data <> len_ea_data_evex then
inc(len, len_ea_data_evex - len_ea_data);
end
else
begin
@ -3170,9 +3395,6 @@ implementation
if exists_prefix_66 then dec(len);
if exists_prefix_F2 then dec(len);
if exists_prefix_F3 then dec(len);
if len_ea_data <> len_ea_data_evex then
inc(len, len_ea_data_evex - len_ea_data);
end
else
begin
@ -4267,7 +4489,7 @@ implementation
rfield:=c and 7;
opidx:=(c shr 3) and 7;
if not process_ea(oper[opidx]^,ea_data,rfield, needed_EVEX) then
if not process_ea(oper[opidx]^,ea_data,rfield, EVEXTupleState = etsNotTuple) then
Message(asmw_e_invalid_effective_address);

File diff suppressed because it is too large Load Diff

View File

@ -1,2 +1,2 @@
{ don't edit, this file is generated from x86ins.dat }
4092;
4096;

File diff suppressed because it is too large Load Diff

View File

@ -201,7 +201,23 @@ type
IF_BCST2,
IF_BCST4,
IF_BCST8,
IF_BCST16
IF_BCST16,
IF_T2, { disp8 - tuple - 2 }
IF_T4, { disp8 - tuple - 4 }
IF_T8, { disp8 - tuple - 8 }
IF_T1S, { disp8 - tuple - 1 scalar }
IF_T1F32,
IF_T1F64,
IF_TMDDUP,
IF_TFV, { disp8 - tuple - full vector }
IF_TFVM, { disp8 - tuple - full vector memory }
IF_TQVM,
IF_TMEM128,
IF_THV,
IF_THVM,
IF_TOVM
);
tinsflags=set of tinsflag;
@ -1754,7 +1770,7 @@ begin
FReg32Base.Add('EBX');
FReg32Base.Add('ECX');
FReg32Base.Add('EDX');
FReg32Base.Add('ESP');
//FReg32Base.Add('ESP');
//FReg32Base.Add('EBP');
FReg32Base.Add('EDI');
FReg32Base.Add('ESI');
@ -1773,7 +1789,7 @@ begin
FReg64Base.Add('RBX');
FReg64Base.Add('RCX');
FReg64Base.Add('RDX');
FReg64Base.Add('RSP');
//FReg64Base.Add('RSP');
//FReg64Base.Add('RBP');
FReg64Base.Add('RDI');
FReg64Base.Add('RSI');
@ -1980,14 +1996,19 @@ begin
for il_Index := 0 to aSLIndexReg.Count - 1 do
begin
aRList.Add(format(aPrefix + '[%s + %s]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix]));
aRList.Add(format(aPrefix + '[%s + %s + $10]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix]));
aRList.Add(format(aPrefix + '[%s + %s + $40]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix]));
aRList.Add(format(aPrefix + '[%s + %s - $10]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix]));
aRList.Add(format(aPrefix + '[%s + %s - $40]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix]));
aRList.Add(format(aPrefix + '[%s + %s * 2]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix]));
aRList.Add(format(aPrefix + '[%s + %s * 4]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix]));
aRList.Add(format(aPrefix + '[%s + %s * 8]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix]));
//aRList.Add(format(aPrefix + '[%s + %s * 2 + 16]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix]));
//aRList.Add(format(aPrefix + '[%s + %s * 4 + 32]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix]));
//aRList.Add(format(aPrefix + '[%s + %s * 8 + 48]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix]));
aRList.Add(format(aPrefix + '[%s + %s * 2 + 16]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix]));
aRList.Add(format(aPrefix + '[%s + %s * 4 + 32]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix]));
aRList.Add(format(aPrefix + '[%s + %s * 8 + 64]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix]));
end;
end;
end;

View File

@ -1110,10 +1110,10 @@ begin
FOpCodeList.Add('vcvtsd2si,1,1,1,REG64,XMMREG_ER,,');
FOpCodeList.Add('vcvtsd2ss,1,1,1,XMMREG_MZ,XMMREG,MEM64,');
FOpCodeList.Add('vcvtsd2ss,1,1,1,XMMREG_MZ,XMMREG,XMMREG_ER,');
FOpCodeList.Add('vcvtsi2sd,1,1,1,XMMREG,XMMREG_ER,RM32,');
FOpCodeList.Add('vcvtsi2sd,0,1,1,XMMREG,XMMREG_ER,RM64,');
FOpCodeList.Add('vcvtsi2ss,1,1,1,XMMREG,XMMREG_ER,RM32,');
FOpCodeList.Add('vcvtsi2ss,0,1,1,XMMREG,XMMREG_ER,RM64,');
FOpCodeList.Add('vcvtsi2sd,1,1,1,XMMREG,XMMREG_ER,REG32,');
FOpCodeList.Add('vcvtsi2sd,0,1,1,XMMREG,XMMREG_ER,REG64,');
FOpCodeList.Add('vcvtsi2ss,1,1,1,XMMREG,XMMREG_ER,REG32,');
FOpCodeList.Add('vcvtsi2ss,0,1,1,XMMREG,XMMREG_ER,REG64,');
FOpCodeList.Add('vcvtss2sd,1,1,1,XMMREG_MZ,XMMREG,MEM32,');
FOpCodeList.Add('vcvtss2sd,1,1,1,XMMREG_MZ,XMMREG,XMMREG_SAE,');
FOpCodeList.Add('vcvtss2si,1,1,1,REG32,MEM32,,');