avx512 disp8*N

git-svn-id: branches/tg74/avx512@39909 -
This commit is contained in:
tg74 2018-10-09 21:19:52 +00:00
parent 7deacdd036
commit 1ef9cc01e6
10 changed files with 8157 additions and 7854 deletions

View File

@ -1,2 +1,2 @@
{ don't edit, this file is generated from x86ins.dat } { don't edit, this file is generated from x86ins.dat }
4039; 4041;

File diff suppressed because it is too large Load Diff

View File

@ -1,2 +1,2 @@
{ don't edit, this file is generated from x86ins.dat } { don't edit, this file is generated from x86ins.dat }
4071; 4073;

File diff suppressed because it is too large Load Diff

View File

@ -368,6 +368,7 @@ interface
TMemRefSizeInfoBCST = (msbUnknown, msbBCST32, msbBCST64, msbMultiple); TMemRefSizeInfoBCST = (msbUnknown, msbBCST32, msbBCST64, msbMultiple);
TMemRefSizeInfoBCSTType = (btUnknown, bt1to2, bt1to4, bt1to8, bt1to16); TMemRefSizeInfoBCSTType = (btUnknown, bt1to2, bt1to4, bt1to8, bt1to16);
TEVEXTupleState = (etsUnknown, etsIsTuple, etsNotTuple);
TConstSizeInfo = (csiUnkown, csiMultiple, csiNoSize, csiMem8, csiMem16, csiMem32, csiMem64); TConstSizeInfo = (csiUnkown, csiMultiple, csiNoSize, csiMem8, csiMem16, csiMem32, csiMem64);
TInsTabMemRefSizeInfoRec = record TInsTabMemRefSizeInfoRec = record
@ -379,6 +380,8 @@ interface
BCSTTypes : Set of TMemRefSizeInfoBCSTType; BCSTTypes : Set of TMemRefSizeInfoBCSTType;
end; end;
const const
MemRefMultiples: set of TMemRefSizeInfo = [msiMultiple, msiMultiple8, MemRefMultiples: set of TMemRefSizeInfo = [msiMultiple, msiMultiple8,
msiMultiple16, msiMultiple32, msiMultiple16, msiMultiple32,
@ -495,7 +498,21 @@ interface
IF_BCST2, IF_BCST2,
IF_BCST4, IF_BCST4,
IF_BCST8, IF_BCST8,
IF_BCST16 IF_BCST16,
IF_T2, { disp8 - tuple - 2 }
IF_T4, { disp8 - tuple - 4 }
IF_T8, { disp8 - tuple - 8 }
IF_T1S, { disp8 - tuple - 1 scalar }
IF_T1F32,
IF_T1F64,
IF_TMDDUP,
IF_TFV, { disp8 - tuple - full vector }
IF_TFVM, { disp8 - tuple - full vector memory }
IF_TQVM,
IF_TMEM128,
IF_THV,
IF_THVM,
IF_TOVM
); );
tinsflags=set of tinsflag; tinsflags=set of tinsflag;
@ -504,6 +521,9 @@ interface
IF_ARMASK=[IF_AR0,IF_AR1,IF_AR2]; { mask for unsized argument spec } IF_ARMASK=[IF_AR0,IF_AR1,IF_AR2]; { mask for unsized argument spec }
IF_PLEVEL=[IF_8086..IF_NEC]; { mask for processor level } IF_PLEVEL=[IF_8086..IF_NEC]; { mask for processor level }
IF_TUPLEMASK=[IF_T2..IF_TOVM]; { mask for AVX512 disp8-tuples }
type type
tinsentry=packed record tinsentry=packed record
opcode : tasmop; opcode : tasmop;
@ -602,6 +622,7 @@ interface
insoffset : longint; insoffset : longint;
LastInsOffset : longint; { need to be public to be reset } LastInsOffset : longint; { need to be public to be reset }
inssize : shortint; inssize : shortint;
EVEXTupleState: TEVEXTupleState; { AVX512 disp8*N }
{$ifdef x86_64} {$ifdef x86_64}
rex : byte; rex : byte;
{$endif x86_64} {$endif x86_64}
@ -617,6 +638,7 @@ interface
procedure Swapoperands; procedure Swapoperands;
function FindInsentry(objdata:TObjData):boolean; function FindInsentry(objdata:TObjData):boolean;
function CheckUseEVEX: boolean; function CheckUseEVEX: boolean;
procedure CheckEVEXTuple(const aInput:toper; aInsEntry: pInsentry; aIsVector128, aIsVector256, aIsVector512, aIsEVEXW1: boolean);
end; end;
function is_64_bit_ref(const ref:treference):boolean; function is_64_bit_ref(const ref:treference):boolean;
@ -978,6 +1000,7 @@ implementation
LastInsOffset:=-1; LastInsOffset:=-1;
InsOffset:=0; InsOffset:=0;
InsSize:=0; InsSize:=0;
EVEXTupleState := etsUnknown;
end; end;
@ -1974,6 +1997,191 @@ implementation
end; end;
end; end;
procedure taicpu.CheckEVEXTuple(const aInput:toper; aInsEntry: pInsentry; aIsVector128, aIsVector256, aIsVector512, aIsEVEXW1: boolean);
var
i: integer;
tuplesize: integer;
memsize: integer;
begin
if EVEXTupleState = etsUnknown then
begin
EVEXTupleState := etsNotTuple;
if aInsEntry^.Flags * IF_TUPLEMASK <> [] then
begin
tuplesize := 0;
if IF_TFV in aInsEntry^.Flags then
begin
for i := 0 to aInsEntry^.ops - 1 do
if (aInsEntry^.optypes[i] and OT_BMEM32 = OT_BMEM32) then
begin
tuplesize := 4;
break;
end
else if (aInsEntry^.optypes[i] and OT_BMEM64 = OT_BMEM64) then
begin
tuplesize := 8;
break;
end
else if (aInsEntry^.optypes[i] and OT_MEMORY = OT_MEMORY) then
begin
if aIsVector512 then tuplesize := 64
else if aIsVector256 then tuplesize := 32
else tuplesize := 16;
break;
end
else if (aInsEntry^.optypes[i] and OT_REGNORM = OT_REGMEM) then
begin
if aIsVector512 then tuplesize := 64
else if aIsVector256 then tuplesize := 32
else tuplesize := 16;
break;
end;
end
else if IF_THV in aInsEntry^.Flags then
begin
for i := 0 to aInsEntry^.ops - 1 do
if (aInsEntry^.optypes[i] and OT_BMEM32 = OT_BMEM32) then
begin
tuplesize := 4;
break;
end
else if (aInsEntry^.optypes[i] and OT_REGNORM = OT_REGMEM) then
begin
if aIsVector512 then tuplesize := 32
else if aIsVector256 then tuplesize := 16
else tuplesize := 8;
break;
end
end
else if IF_TFVM in aInsEntry^.Flags then
begin
if aIsVector512 then tuplesize := 64
else if aIsVector256 then tuplesize := 32
else tuplesize := 16;
end
else
begin
memsize := 0;
for i := 0 to aInsEntry^.ops - 1 do
begin
if aInsEntry^.optypes[i] and (OT_REGNORM or OT_MEMORY) = OT_REGMEM then
begin
case aInsEntry^.optypes[i] and (OT_BITS32 or OT_BITS64) of
OT_BITS32: begin
memsize := 32;
break;
end;
OT_BITS64: begin
memsize := 64;
break;
end;
end;
end
else
case aInsEntry^.optypes[i] and (OT_MEM8 or OT_MEM16 or OT_MEM32 or OT_MEM64) of
OT_MEM8: begin
memsize := 8;
break;
end;
OT_MEM16: begin
memsize := 16;
break;
end;
OT_MEM32: begin
memsize := 32;
break;
end;
OT_MEM64: //if aIsEVEXW1 then
begin
memsize := 64;
break;
end;
end;
end;
if IF_T1S in aInsEntry^.Flags then
begin
case memsize of
8: tuplesize := 1;
16: tuplesize := 2;
else if aIsEVEXW1 then tuplesize := 8
else tuplesize := 4;
end;
end
else if IF_T1F32 in aInsEntry^.Flags then tuplesize := 4
else if IF_T1F64 in aInsEntry^.Flags then tuplesize := 8
else if IF_T2 in aInsEntry^.Flags then
begin
case aIsEVEXW1 of
false: tuplesize := 8;
else if aIsVector256 or aIsVector512 then tuplesize := 16;
end;
end
else if IF_T4 in aInsEntry^.Flags then
begin
case aIsEVEXW1 of
false: if aIsVector256 or aIsVector512 then tuplesize := 16;
else if aIsVector512 then tuplesize := 32;
end;
end
else if IF_T8 in aInsEntry^.Flags then
begin
case aIsEVEXW1 of
false: if aIsVector512 then tuplesize := 32;
end;
end
else if IF_THVM in aInsEntry^.Flags then
begin
tuplesize := 8; // default 128bit-vectorlength
if aIsVector256 then tuplesize := 16
else if aIsVector512 then tuplesize := 32;
end
else if IF_TQVM in aInsEntry^.Flags then
begin
tuplesize := 4; // default 128bit-vectorlength
if aIsVector256 then tuplesize := 8
else if aIsVector512 then tuplesize := 16;
end
else if IF_TOVM in aInsEntry^.Flags then
begin
tuplesize := 2; // default 128bit-vectorlength
if aIsVector256 then tuplesize := 4
else if aIsVector512 then tuplesize := 8;
end
else if IF_TMEM128 in aInsEntry^.Flags then tuplesize := 16
else if IF_TMDDUP in aInsEntry^.Flags then
begin
tuplesize := 8; // default 128bit-vectorlength
if aIsVector256 then tuplesize := 32
else if aIsVector512 then tuplesize := 64;
end;
end;;
if tuplesize > 0 then
begin
if aInput.typ = top_ref then
begin
if (aInput.ref^.offset <> 0) and
((aInput.ref^.offset mod tuplesize) = 0) and
(abs(aInput.ref^.offset) div tuplesize <= 127) then
begin
aInput.ref^.offset := aInput.ref^.offset div tuplesize;
EVEXTupleState := etsIsTuple;
end;
end;
end;
end;
end;
end;
function taicpu.Pass1(objdata:TObjData):longint; function taicpu.Pass1(objdata:TObjData):longint;
@ -2869,6 +3077,7 @@ implementation
len : shortint; len : shortint;
len_ea_data: shortint; len_ea_data: shortint;
len_ea_data_evex: shortint; len_ea_data_evex: shortint;
mref_offset: asizeint;
ea_data : ea; ea_data : ea;
exists_evex: boolean; exists_evex: boolean;
exists_vex: boolean; exists_vex: boolean;
@ -2876,6 +3085,14 @@ implementation
exists_prefix_66: boolean; exists_prefix_66: boolean;
exists_prefix_F2: boolean; exists_prefix_F2: boolean;
exists_prefix_F3: boolean; exists_prefix_F3: boolean;
exists_l256: boolean;
exists_l512: boolean;
exists_EVEXW1: boolean;
pmref_operand: poper;
//i: integer;
//refsize: integer;
//tuplesize: integer;
//memsize: integer;
{$ifdef x86_64} {$ifdef x86_64}
omit_rexw : boolean; omit_rexw : boolean;
{$endif x86_64} {$endif x86_64}
@ -2890,6 +3107,8 @@ implementation
len:=0; len:=0;
len_ea_data := 0; len_ea_data := 0;
len_ea_data_evex:= 0; len_ea_data_evex:= 0;
mref_offset := 0;
pmref_operand := nil;
codes:=@p^.code[0]; codes:=@p^.code[0];
exists_vex := false; exists_vex := false;
@ -2898,6 +3117,9 @@ implementation
exists_prefix_F2 := false; exists_prefix_F2 := false;
exists_prefix_F3 := false; exists_prefix_F3 := false;
exists_evex := false; exists_evex := false;
exists_l256 := false;
exists_l512 := false;
exists_EVEXW1 := false;
{$ifdef x86_64} {$ifdef x86_64}
rex:=0; rex:=0;
omit_rexw:=false; omit_rexw:=false;
@ -3054,14 +3276,20 @@ implementation
end; end;
{$endif x86_64} {$endif x86_64}
if process_ea(oper[(c shr 3) and 7]^, ea_data, 0, true) then if (oper[(c shr 3) and 7]^.typ = top_ref) and
len_ea_data_evex := ea_data.size; (oper[(c shr 3) and 7]^.ref^.offset <> 0) then
if process_ea(oper[(c shr 3) and 7]^, ea_data, 0, false) then begin
begin if (exists_vex and exists_evex and CheckUseEVEX) or
len_ea_data := ea_data.size; (not(exists_vex) and exists_evex) then
inc(len,ea_data.size); begin
end CheckEVEXTuple(oper[(c shr 3) and 7]^, p, not(exists_l256 or exists_l512), exists_l256, exists_l512, exists_EVEXW1);
else Message(asmw_e_invalid_effective_address); //const aInput:toper; aInsEntry: pInsentry; aIsVector128, aIsVector256, aIsVector512, aIsEVEXW1: boolean);
end;
end;
if process_ea(oper[(c shr 3) and 7]^, ea_data, 0, EVEXTupleState = etsNotTuple) then
inc(len,ea_data.size)
else Message(asmw_e_invalid_effective_address);
{$ifdef x86_64} {$ifdef x86_64}
rex:=rex or ea_data.rex; rex:=rex or ea_data.rex;
@ -3072,8 +3300,8 @@ implementation
begin begin
exists_evex := true; exists_evex := true;
end; end;
&351: ; // EVEX length bit 512 &351: exists_l512 := true; // EVEX length bit 512
&352: ; // EVEX W1 &352: exists_EVEXW1 := true; // EVEX W1
&362: // VEX prefix for AVX (length = 2 or 3 bytes, dependens on REX.XBW or opcode-prefix ($0F38 or $0F3A)) &362: // VEX prefix for AVX (length = 2 or 3 bytes, dependens on REX.XBW or opcode-prefix ($0F38 or $0F3A))
// =>> DEFAULT = 2 Bytes // =>> DEFAULT = 2 Bytes
begin begin
@ -3093,7 +3321,7 @@ implementation
exists_vex_extension := true; exists_vex_extension := true;
end; end;
end; end;
&364: ; // VEX length bit 256 &364: exists_l256 := true; // VEX length bit 256
&366, // operand 2 (ymmreg) encoded immediate byte (bit 4-7) &366, // operand 2 (ymmreg) encoded immediate byte (bit 4-7)
&367: inc(len); // operand 3 (ymmreg) encoded immediate byte (bit 4-7) &367: inc(len); // operand 3 (ymmreg) encoded immediate byte (bit 4-7)
@ -3143,9 +3371,6 @@ implementation
if CheckUseEVEX then if CheckUseEVEX then
begin begin
inc(len, 4); inc(len, 4);
if len_ea_data <> len_ea_data_evex then
inc(len, len_ea_data_evex - len_ea_data);
end end
else else
begin begin
@ -3170,9 +3395,6 @@ implementation
if exists_prefix_66 then dec(len); if exists_prefix_66 then dec(len);
if exists_prefix_F2 then dec(len); if exists_prefix_F2 then dec(len);
if exists_prefix_F3 then dec(len); if exists_prefix_F3 then dec(len);
if len_ea_data <> len_ea_data_evex then
inc(len, len_ea_data_evex - len_ea_data);
end end
else else
begin begin
@ -4267,7 +4489,7 @@ implementation
rfield:=c and 7; rfield:=c and 7;
opidx:=(c shr 3) and 7; opidx:=(c shr 3) and 7;
if not process_ea(oper[opidx]^,ea_data,rfield, needed_EVEX) then if not process_ea(oper[opidx]^,ea_data,rfield, EVEXTupleState = etsNotTuple) then
Message(asmw_e_invalid_effective_address); Message(asmw_e_invalid_effective_address);

File diff suppressed because it is too large Load Diff

View File

@ -1,2 +1,2 @@
{ don't edit, this file is generated from x86ins.dat } { don't edit, this file is generated from x86ins.dat }
4092; 4096;

File diff suppressed because it is too large Load Diff

View File

@ -201,7 +201,23 @@ type
IF_BCST2, IF_BCST2,
IF_BCST4, IF_BCST4,
IF_BCST8, IF_BCST8,
IF_BCST16 IF_BCST16,
IF_T2, { disp8 - tuple - 2 }
IF_T4, { disp8 - tuple - 4 }
IF_T8, { disp8 - tuple - 8 }
IF_T1S, { disp8 - tuple - 1 scalar }
IF_T1F32,
IF_T1F64,
IF_TMDDUP,
IF_TFV, { disp8 - tuple - full vector }
IF_TFVM, { disp8 - tuple - full vector memory }
IF_TQVM,
IF_TMEM128,
IF_THV,
IF_THVM,
IF_TOVM
); );
tinsflags=set of tinsflag; tinsflags=set of tinsflag;
@ -1754,7 +1770,7 @@ begin
FReg32Base.Add('EBX'); FReg32Base.Add('EBX');
FReg32Base.Add('ECX'); FReg32Base.Add('ECX');
FReg32Base.Add('EDX'); FReg32Base.Add('EDX');
FReg32Base.Add('ESP'); //FReg32Base.Add('ESP');
//FReg32Base.Add('EBP'); //FReg32Base.Add('EBP');
FReg32Base.Add('EDI'); FReg32Base.Add('EDI');
FReg32Base.Add('ESI'); FReg32Base.Add('ESI');
@ -1773,7 +1789,7 @@ begin
FReg64Base.Add('RBX'); FReg64Base.Add('RBX');
FReg64Base.Add('RCX'); FReg64Base.Add('RCX');
FReg64Base.Add('RDX'); FReg64Base.Add('RDX');
FReg64Base.Add('RSP'); //FReg64Base.Add('RSP');
//FReg64Base.Add('RBP'); //FReg64Base.Add('RBP');
FReg64Base.Add('RDI'); FReg64Base.Add('RDI');
FReg64Base.Add('RSI'); FReg64Base.Add('RSI');
@ -1980,14 +1996,19 @@ begin
for il_Index := 0 to aSLIndexReg.Count - 1 do for il_Index := 0 to aSLIndexReg.Count - 1 do
begin begin
aRList.Add(format(aPrefix + '[%s + %s]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix])); aRList.Add(format(aPrefix + '[%s + %s]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix]));
aRList.Add(format(aPrefix + '[%s + %s + $10]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix]));
aRList.Add(format(aPrefix + '[%s + %s + $40]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix]));
aRList.Add(format(aPrefix + '[%s + %s - $10]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix]));
aRList.Add(format(aPrefix + '[%s + %s - $40]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix]));
aRList.Add(format(aPrefix + '[%s + %s * 2]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix])); aRList.Add(format(aPrefix + '[%s + %s * 2]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix]));
aRList.Add(format(aPrefix + '[%s + %s * 4]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix])); aRList.Add(format(aPrefix + '[%s + %s * 4]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix]));
aRList.Add(format(aPrefix + '[%s + %s * 8]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix])); aRList.Add(format(aPrefix + '[%s + %s * 8]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix]));
//aRList.Add(format(aPrefix + '[%s + %s * 2 + 16]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix])); aRList.Add(format(aPrefix + '[%s + %s * 2 + 16]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix]));
//aRList.Add(format(aPrefix + '[%s + %s * 4 + 32]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix])); aRList.Add(format(aPrefix + '[%s + %s * 4 + 32]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix]));
//aRList.Add(format(aPrefix + '[%s + %s * 8 + 48]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix])); aRList.Add(format(aPrefix + '[%s + %s * 8 + 64]%s', [aSLBaseReg[il_Base], aSLIndexReg[il_Index], aSuffix]));
end; end;
end; end;
end; end;

View File

@ -1110,10 +1110,10 @@ begin
FOpCodeList.Add('vcvtsd2si,1,1,1,REG64,XMMREG_ER,,'); FOpCodeList.Add('vcvtsd2si,1,1,1,REG64,XMMREG_ER,,');
FOpCodeList.Add('vcvtsd2ss,1,1,1,XMMREG_MZ,XMMREG,MEM64,'); FOpCodeList.Add('vcvtsd2ss,1,1,1,XMMREG_MZ,XMMREG,MEM64,');
FOpCodeList.Add('vcvtsd2ss,1,1,1,XMMREG_MZ,XMMREG,XMMREG_ER,'); FOpCodeList.Add('vcvtsd2ss,1,1,1,XMMREG_MZ,XMMREG,XMMREG_ER,');
FOpCodeList.Add('vcvtsi2sd,1,1,1,XMMREG,XMMREG_ER,RM32,'); FOpCodeList.Add('vcvtsi2sd,1,1,1,XMMREG,XMMREG_ER,REG32,');
FOpCodeList.Add('vcvtsi2sd,0,1,1,XMMREG,XMMREG_ER,RM64,'); FOpCodeList.Add('vcvtsi2sd,0,1,1,XMMREG,XMMREG_ER,REG64,');
FOpCodeList.Add('vcvtsi2ss,1,1,1,XMMREG,XMMREG_ER,RM32,'); FOpCodeList.Add('vcvtsi2ss,1,1,1,XMMREG,XMMREG_ER,REG32,');
FOpCodeList.Add('vcvtsi2ss,0,1,1,XMMREG,XMMREG_ER,RM64,'); FOpCodeList.Add('vcvtsi2ss,0,1,1,XMMREG,XMMREG_ER,REG64,');
FOpCodeList.Add('vcvtss2sd,1,1,1,XMMREG_MZ,XMMREG,MEM32,'); FOpCodeList.Add('vcvtss2sd,1,1,1,XMMREG_MZ,XMMREG,MEM32,');
FOpCodeList.Add('vcvtss2sd,1,1,1,XMMREG_MZ,XMMREG,XMMREG_SAE,'); FOpCodeList.Add('vcvtss2sd,1,1,1,XMMREG_MZ,XMMREG,XMMREG_SAE,');
FOpCodeList.Add('vcvtss2si,1,1,1,REG32,MEM32,,'); FOpCodeList.Add('vcvtss2si,1,1,1,REG32,MEM32,,');