mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-20 11:29:27 +02:00
* basic avx support for floating point operations (use -Cfavx to activate)
git-svn-id: trunk@24896 -
This commit is contained in:
parent
6a8e4f0381
commit
e81d2d1f3b
@ -52,8 +52,10 @@ unit cgobj;
|
||||
by Free Pascal. For 32-bit processors, the base class
|
||||
should be @link(tcg64f32) and not @var(tcg).
|
||||
}
|
||||
|
||||
{ tcg }
|
||||
|
||||
tcg = class
|
||||
public
|
||||
{ how many times is this current code executed }
|
||||
executionweight : longint;
|
||||
alignment : talignment;
|
||||
@ -271,6 +273,9 @@ unit cgobj;
|
||||
procedure a_opmm_ref_reg(list: TAsmList; Op: TOpCG; size : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); virtual;
|
||||
procedure a_opmm_loc_reg(list: TAsmList; Op: TOpCG; size : tcgsize;const loc: tlocation; reg: tregister;shuffle : pmmshuffle); virtual;
|
||||
procedure a_opmm_reg_ref(list: TAsmList; Op: TOpCG; size : tcgsize;reg: tregister;const ref: treference; shuffle : pmmshuffle); virtual;
|
||||
procedure a_opmm_loc_reg_reg(list: TAsmList;Op : TOpCG;size : tcgsize;const loc : tlocation;src,dst : tregister;shuffle : pmmshuffle); virtual;
|
||||
procedure a_opmm_reg_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src1,src2,dst: tregister;shuffle : pmmshuffle); virtual;
|
||||
procedure a_opmm_ref_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;const ref: treference; src,dst: tregister;shuffle : pmmshuffle); virtual;
|
||||
|
||||
procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle); virtual;
|
||||
procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize; mmreg, intreg: tregister; shuffle : pmmshuffle); virtual;
|
||||
@ -2061,6 +2066,33 @@ implementation
|
||||
end;
|
||||
|
||||
|
||||
procedure tcg.a_opmm_loc_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;const loc: tlocation; src,dst: tregister;shuffle : pmmshuffle);
|
||||
begin
|
||||
case loc.loc of
|
||||
LOC_CMMREGISTER,LOC_MMREGISTER:
|
||||
a_opmm_reg_reg_reg(list,op,size,loc.register,src,dst,shuffle);
|
||||
LOC_CREFERENCE,LOC_REFERENCE:
|
||||
a_opmm_ref_reg_reg(list,op,size,loc.reference,src,dst,shuffle);
|
||||
else
|
||||
internalerror(200312232);
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
procedure tcg.a_opmm_reg_reg_reg(list : TAsmList;Op : TOpCG;size : tcgsize;
|
||||
src1,src2,dst : tregister;shuffle : pmmshuffle);
|
||||
begin
|
||||
internalerror(2013061102);
|
||||
end;
|
||||
|
||||
|
||||
procedure tcg.a_opmm_ref_reg_reg(list : TAsmList;Op : TOpCG;size : tcgsize;
|
||||
const ref : treference;src,dst : tregister;shuffle : pmmshuffle);
|
||||
begin
|
||||
internalerror(2013061101);
|
||||
end;
|
||||
|
||||
|
||||
procedure tcg.g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);
|
||||
begin
|
||||
g_concatcopy(list,source,dest,len);
|
||||
|
@ -59,7 +59,8 @@ Type
|
||||
fpu_ssse3,
|
||||
fpu_sse41,
|
||||
fpu_sse42,
|
||||
fpu_avx
|
||||
fpu_avx,
|
||||
fpu_avx2
|
||||
);
|
||||
|
||||
|
||||
@ -96,11 +97,14 @@ Const
|
||||
'SSSE3',
|
||||
'SSE41',
|
||||
'SSE42',
|
||||
'AVX'
|
||||
'AVX',
|
||||
'AVX2'
|
||||
);
|
||||
|
||||
sse_singlescalar : set of tfputype = [fpu_sse,fpu_sse2,fpu_sse3];
|
||||
sse_doublescalar : set of tfputype = [fpu_sse2,fpu_sse3];
|
||||
sse_singlescalar = [fpu_sse..fpu_avx2];
|
||||
sse_doublescalar = [fpu_sse2..fpu_avx2];
|
||||
|
||||
fpu_avx_instructionsets = [fpu_avx,fpu_avx2];
|
||||
|
||||
{ Supported optimizations, only used for information }
|
||||
supported_optimizerswitches = genericlevel1optimizerswitches+
|
||||
|
@ -685,6 +685,10 @@
|
||||
(Ch: (Ch_RRAX, Ch_WMemEDI, Ch_RWRDI)),
|
||||
(Ch: (Ch_WRAX, Ch_RWRSI, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
|
||||
(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
|
||||
(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
|
||||
(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
@ -772,21 +776,17 @@
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
|
||||
(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
|
||||
(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
|
||||
(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
|
@ -296,7 +296,7 @@ interface
|
||||
constructor op_reg_reg_reg(op : tasmop;_size : topsize;_op1,_op2,_op3 : tregister);
|
||||
constructor op_const_reg_reg(op : tasmop;_size : topsize;_op1 : aint;_op2 : tregister;_op3 : tregister);
|
||||
constructor op_const_ref_reg(op : tasmop;_size : topsize;_op1 : aint;const _op2 : treference;_op3 : tregister);
|
||||
constructor op_reg_reg_ref(op : tasmop;_size : topsize;_op1,_op2 : tregister; const _op3 : treference);
|
||||
constructor op_ref_reg_reg(op : tasmop;_size : topsize;const _op1 : treference;_op2,_op3 : tregister);
|
||||
constructor op_const_reg_ref(op : tasmop;_size : topsize;_op1 : aint;_op2 : tregister;const _op3 : treference);
|
||||
|
||||
{ this is for Jmp instructions }
|
||||
@ -375,7 +375,8 @@ implementation
|
||||
systems,
|
||||
procinfo,
|
||||
itcpugas,
|
||||
symsym;
|
||||
symsym,
|
||||
cpuinfo;
|
||||
|
||||
{*****************************************************************************
|
||||
Instruction table
|
||||
@ -813,14 +814,14 @@ implementation
|
||||
end;
|
||||
|
||||
|
||||
constructor taicpu.op_reg_reg_ref(op : tasmop;_size : topsize;_op1,_op2 : tregister;const _op3 : treference);
|
||||
constructor taicpu.op_ref_reg_reg(op : tasmop;_size : topsize;const _op1 : treference;_op2,_op3 : tregister);
|
||||
begin
|
||||
inherited create(op);
|
||||
init(_size);
|
||||
ops:=3;
|
||||
loadreg(0,_op1);
|
||||
loadref(0,_op1);
|
||||
loadreg(1,_op2);
|
||||
loadref(2,_op3);
|
||||
loadreg(2,_op3);
|
||||
end;
|
||||
|
||||
|
||||
@ -2874,7 +2875,9 @@ implementation
|
||||
(oper[0]^.reg=oper[1]^.reg)
|
||||
) or
|
||||
(((opcode=A_MOVSS) or (opcode=A_MOVSD) or (opcode=A_MOVQ) or
|
||||
(opcode=A_MOVAPS) or (OPCODE=A_MOVAPD)) and
|
||||
(opcode=A_MOVAPS) or (OPCODE=A_MOVAPD) or
|
||||
(opcode=A_VMOVSS) or (opcode=A_VMOVSD) or (opcode=A_VMOVQ) or
|
||||
(opcode=A_VMOVAPS) or (OPCODE=A_VMOVAPD)) and
|
||||
(regtype = R_MMREGISTER) and
|
||||
(ops=2) and
|
||||
(oper[0]^.typ=top_reg) and
|
||||
@ -2929,8 +2932,11 @@ implementation
|
||||
begin
|
||||
{ the information in the instruction table is made for the string copy
|
||||
operation MOVSD so hack here (FK)
|
||||
|
||||
VMOVSS and VMOVSD has two and three operand flavours, this cannot modelled by x86ins.dat
|
||||
so fix it here (FK)
|
||||
}
|
||||
if (opcode=A_MOVSD) and (ops=2) then
|
||||
if ((opcode=A_MOVSD) or (opcode=A_VMOVSS) or (opcode=A_VMOVSD)) and (ops=2) then
|
||||
begin
|
||||
case opnr of
|
||||
0:
|
||||
@ -2961,17 +2967,30 @@ implementation
|
||||
result:=taicpu.op_ref_reg(A_MOV,reg2opsize(r),tmpref,r);
|
||||
end;
|
||||
R_MMREGISTER :
|
||||
case getsubreg(r) of
|
||||
R_SUBMMD:
|
||||
result:=taicpu.op_ref_reg(A_MOVSD,reg2opsize(r),ref,r);
|
||||
R_SUBMMS:
|
||||
result:=taicpu.op_ref_reg(A_MOVSS,reg2opsize(r),ref,r);
|
||||
R_SUBQ,
|
||||
R_SUBMMWHOLE:
|
||||
result:=taicpu.op_ref_reg(A_MOVQ,S_NO,ref,r);
|
||||
else
|
||||
internalerror(200506043);
|
||||
end;
|
||||
if current_settings.fputype in fpu_avx_instructionsets then
|
||||
case getsubreg(r) of
|
||||
R_SUBMMD:
|
||||
result:=taicpu.op_ref_reg(A_VMOVSD,reg2opsize(r),ref,r);
|
||||
R_SUBMMS:
|
||||
result:=taicpu.op_ref_reg(A_VMOVSS,reg2opsize(r),ref,r);
|
||||
R_SUBQ,
|
||||
R_SUBMMWHOLE:
|
||||
result:=taicpu.op_ref_reg(A_VMOVQ,S_NO,ref,r);
|
||||
else
|
||||
internalerror(200506043);
|
||||
end
|
||||
else
|
||||
case getsubreg(r) of
|
||||
R_SUBMMD:
|
||||
result:=taicpu.op_ref_reg(A_MOVSD,reg2opsize(r),ref,r);
|
||||
R_SUBMMS:
|
||||
result:=taicpu.op_ref_reg(A_MOVSS,reg2opsize(r),ref,r);
|
||||
R_SUBQ,
|
||||
R_SUBMMWHOLE:
|
||||
result:=taicpu.op_ref_reg(A_MOVQ,S_NO,ref,r);
|
||||
else
|
||||
internalerror(200506043);
|
||||
end;
|
||||
else
|
||||
internalerror(200401041);
|
||||
end;
|
||||
@ -3002,17 +3021,30 @@ implementation
|
||||
result:=taicpu.op_reg_ref(A_MOV,size,r,tmpref);
|
||||
end;
|
||||
R_MMREGISTER :
|
||||
case getsubreg(r) of
|
||||
R_SUBMMD:
|
||||
result:=taicpu.op_reg_ref(A_MOVSD,reg2opsize(r),r,ref);
|
||||
R_SUBMMS:
|
||||
result:=taicpu.op_reg_ref(A_MOVSS,reg2opsize(r),r,ref);
|
||||
R_SUBQ,
|
||||
R_SUBMMWHOLE:
|
||||
result:=taicpu.op_reg_ref(A_MOVQ,S_NO,r,ref);
|
||||
else
|
||||
internalerror(200506042);
|
||||
end;
|
||||
if current_settings.fputype in fpu_avx_instructionsets then
|
||||
case getsubreg(r) of
|
||||
R_SUBMMD:
|
||||
result:=taicpu.op_reg_ref(A_VMOVSD,reg2opsize(r),r,ref);
|
||||
R_SUBMMS:
|
||||
result:=taicpu.op_reg_ref(A_VMOVSS,reg2opsize(r),r,ref);
|
||||
R_SUBQ,
|
||||
R_SUBMMWHOLE:
|
||||
result:=taicpu.op_reg_ref(A_VMOVQ,S_NO,r,ref);
|
||||
else
|
||||
internalerror(200506042);
|
||||
end
|
||||
else
|
||||
case getsubreg(r) of
|
||||
R_SUBMMD:
|
||||
result:=taicpu.op_reg_ref(A_MOVSD,reg2opsize(r),r,ref);
|
||||
R_SUBMMS:
|
||||
result:=taicpu.op_reg_ref(A_MOVSS,reg2opsize(r),r,ref);
|
||||
R_SUBQ,
|
||||
R_SUBMMWHOLE:
|
||||
result:=taicpu.op_reg_ref(A_MOVQ,S_NO,r,ref);
|
||||
else
|
||||
internalerror(200506042);
|
||||
end;
|
||||
else
|
||||
internalerror(200401041);
|
||||
end;
|
||||
|
@ -92,6 +92,8 @@ unit cgx86;
|
||||
procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
|
||||
procedure a_opmm_ref_reg(list: TAsmList; Op: TOpCG; size : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
|
||||
procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle);override;
|
||||
procedure a_opmm_ref_reg_reg(list : TAsmList;Op : TOpCG;size : tcgsize;const ref : treference;src,dst : tregister;shuffle : pmmshuffle);override;
|
||||
procedure a_opmm_reg_reg_reg(list : TAsmList;Op : TOpCG;size : tcgsize;src1,src2,dst : tregister;shuffle : pmmshuffle);override;
|
||||
|
||||
{ comparison operations }
|
||||
procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
|
||||
@ -126,9 +128,9 @@ unit cgx86;
|
||||
procedure check_register_size(size:tcgsize;reg:tregister);
|
||||
|
||||
procedure opmm_loc_reg(list: TAsmList; Op: TOpCG; size : tcgsize;loc : tlocation;dst: tregister; shuffle : pmmshuffle);
|
||||
procedure opmm_loc_reg_reg(list : TAsmList;Op : TOpCG;size : tcgsize;loc : tlocation;src,dst : tregister;shuffle : pmmshuffle);
|
||||
|
||||
function get_darwin_call_stub(const s: string; weak: boolean): tasmsymbol;
|
||||
private
|
||||
procedure sizes2load(s1,s2 : tcgsize;var op: tasmop; var s3: topsize);
|
||||
|
||||
procedure floatload(list: TAsmList; t : tcgsize;const ref : treference);
|
||||
@ -175,7 +177,7 @@ unit cgx86;
|
||||
|
||||
function UseAVX: boolean;
|
||||
begin
|
||||
Result:=current_settings.fputype in [fpu_avx];
|
||||
Result:=current_settings.fputype in fpu_avx_instructionsets;
|
||||
end;
|
||||
|
||||
const
|
||||
@ -1144,12 +1146,18 @@ unit cgx86;
|
||||
|
||||
function get_scalar_mm_op(fromsize,tosize : tcgsize) : tasmop;
|
||||
const
|
||||
convertop : array[OS_F32..OS_F128,OS_F32..OS_F128] of tasmop = (
|
||||
convertopsse : array[OS_F32..OS_F128,OS_F32..OS_F128] of tasmop = (
|
||||
(A_MOVSS,A_CVTSS2SD,A_NONE,A_NONE,A_NONE),
|
||||
(A_CVTSD2SS,A_MOVSD,A_NONE,A_NONE,A_NONE),
|
||||
(A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
|
||||
(A_NONE,A_NONE,A_NONE,A_MOVQ,A_NONE),
|
||||
(A_NONE,A_NONE,A_NONE,A_NONE,A_NONE));
|
||||
convertopavx : array[OS_F32..OS_F128,OS_F32..OS_F128] of tasmop = (
|
||||
(A_VMOVSS,A_VCVTSS2SD,A_NONE,A_NONE,A_NONE),
|
||||
(A_VCVTSD2SS,A_VMOVSD,A_NONE,A_NONE,A_NONE),
|
||||
(A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
|
||||
(A_NONE,A_NONE,A_NONE,A_MOVQ,A_NONE),
|
||||
(A_NONE,A_NONE,A_NONE,A_NONE,A_NONE));
|
||||
begin
|
||||
{ we can have OS_F32/OS_F64 (record in function result/LOC_MMREGISTER) to
|
||||
OS_32/OS_64 (record in memory/LOC_REFERENCE) }
|
||||
@ -1161,14 +1169,24 @@ unit cgx86;
|
||||
OS_64:
|
||||
tosize:=OS_F64;
|
||||
end;
|
||||
if (fromsize in [low(convertop)..high(convertop)]) and
|
||||
(tosize in [low(convertop)..high(convertop)]) then
|
||||
result:=convertop[fromsize,tosize]
|
||||
if (fromsize in [low(convertopsse)..high(convertopsse)]) and
|
||||
(tosize in [low(convertopsse)..high(convertopsse)]) then
|
||||
begin
|
||||
if UseAVX then
|
||||
result:=convertopavx[fromsize,tosize]
|
||||
else
|
||||
result:=convertopsse[fromsize,tosize];
|
||||
end
|
||||
{ we can have OS_M64 (record in function result/LOC_MMREGISTER) to
|
||||
OS_64 (record in memory/LOC_REFERENCE) }
|
||||
else if (tcgsize2size[fromsize]=tcgsize2size[tosize]) and
|
||||
(fromsize=OS_M64) then
|
||||
result:=A_MOVQ
|
||||
begin
|
||||
if UseAVX then
|
||||
result:=A_VMOVQ
|
||||
else
|
||||
result:=A_MOVQ;
|
||||
end
|
||||
else
|
||||
internalerror(2010060104);
|
||||
if result=A_NONE then
|
||||
@ -1179,6 +1197,7 @@ unit cgx86;
|
||||
procedure tcgx86.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle);
|
||||
var
|
||||
instr : taicpu;
|
||||
op : TAsmOp;
|
||||
begin
|
||||
if shuffle=nil then
|
||||
begin
|
||||
@ -1200,8 +1219,26 @@ unit cgx86;
|
||||
end
|
||||
else if shufflescalar(shuffle) then
|
||||
begin
|
||||
instr:=taicpu.op_reg_reg(get_scalar_mm_op(fromsize,tosize),S_NO,reg1,reg2);
|
||||
op:=get_scalar_mm_op(fromsize,tosize);
|
||||
|
||||
{ VMOVSD/SS is not available with two register operands }
|
||||
if op=A_VMOVSD then
|
||||
op:=A_VMOVAPD
|
||||
else if op=A_VMOVSS then
|
||||
op:=A_VMOVAPS;
|
||||
|
||||
{ A_VCVTSD2SS and A_VCVTSS2SD require always three operands }
|
||||
if (op=A_VCVTSD2SS) or (op=A_VCVTSS2SD) then
|
||||
instr:=taicpu.op_reg_reg_reg(op,S_NO,reg1,reg2,reg2)
|
||||
else
|
||||
instr:=taicpu.op_reg_reg(op,S_NO,reg1,reg2);
|
||||
|
||||
case get_scalar_mm_op(fromsize,tosize) of
|
||||
A_VMOVAPD,
|
||||
A_VMOVAPS,
|
||||
A_VMOVSS,
|
||||
A_VMOVSD,
|
||||
A_VMOVQ,
|
||||
A_MOVSS,
|
||||
A_MOVSD,
|
||||
A_MOVQ:
|
||||
@ -1217,6 +1254,7 @@ unit cgx86;
|
||||
procedure tcgx86.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle);
|
||||
var
|
||||
tmpref : treference;
|
||||
op : tasmop;
|
||||
begin
|
||||
tmpref:=ref;
|
||||
make_simple_ref(list,tmpref);
|
||||
@ -1233,7 +1271,15 @@ unit cgx86;
|
||||
{$endif x86_64}
|
||||
end
|
||||
else if shufflescalar(shuffle) then
|
||||
list.concat(taicpu.op_ref_reg(get_scalar_mm_op(fromsize,tosize),S_NO,tmpref,reg))
|
||||
begin
|
||||
op:=get_scalar_mm_op(fromsize,tosize);
|
||||
|
||||
{ A_VCVTSD2SS and A_VCVTSS2SD require always three operands }
|
||||
if (op=A_VCVTSD2SS) or (op=A_VCVTSS2SD) then
|
||||
list.concat(taicpu.op_ref_reg_reg(op,S_NO,tmpref,reg,reg))
|
||||
else
|
||||
list.concat(taicpu.op_ref_reg(op,S_NO,tmpref,reg))
|
||||
end
|
||||
else
|
||||
internalerror(200312252);
|
||||
end;
|
||||
@ -1243,6 +1289,7 @@ unit cgx86;
|
||||
var
|
||||
hreg : tregister;
|
||||
tmpref : treference;
|
||||
op : tasmop;
|
||||
begin
|
||||
tmpref:=ref;
|
||||
make_simple_ref(list,tmpref);
|
||||
@ -1263,8 +1310,15 @@ unit cgx86;
|
||||
if tcgsize2size[tosize]<>tcgsize2size[fromsize] then
|
||||
begin
|
||||
hreg:=getmmregister(list,tosize);
|
||||
list.concat(taicpu.op_reg_reg(get_scalar_mm_op(fromsize,tosize),S_NO,reg,hreg));
|
||||
list.concat(taicpu.op_reg_ref(get_scalar_mm_op(tosize,tosize),S_NO,hreg,tmpref));
|
||||
op:=get_scalar_mm_op(fromsize,tosize);
|
||||
|
||||
{ A_VCVTSD2SS and A_VCVTSS2SD require always three operands }
|
||||
if (op=A_VCVTSD2SS) or (op=A_VCVTSS2SD) then
|
||||
list.concat(taicpu.op_reg_reg_reg(op,S_NO,reg,hreg,hreg))
|
||||
else
|
||||
list.concat(taicpu.op_reg_reg(op,S_NO,reg,hreg));
|
||||
|
||||
list.concat(taicpu.op_reg_ref(get_scalar_mm_op(tosize,tosize),S_NO,hreg,tmpref))
|
||||
end
|
||||
else
|
||||
list.concat(taicpu.op_reg_ref(get_scalar_mm_op(fromsize,tosize),S_NO,reg,tmpref));
|
||||
@ -1296,6 +1350,103 @@ unit cgx86;
|
||||
end;
|
||||
|
||||
|
||||
procedure tcgx86.opmm_loc_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;loc : tlocation;src,dst: tregister; shuffle : pmmshuffle);
|
||||
const
|
||||
opmm2asmop : array[0..1,OS_F32..OS_F64,topcg] of tasmop = (
|
||||
( { scalar }
|
||||
( { OS_F32 }
|
||||
A_NOP,A_NOP,A_VADDSS,A_NOP,A_VDIVSS,A_NOP,A_NOP,A_VMULSS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_VSUBSS,A_NOP,A_NOP,A_NOP
|
||||
),
|
||||
( { OS_F64 }
|
||||
A_NOP,A_NOP,A_VADDSD,A_NOP,A_VDIVSD,A_NOP,A_NOP,A_VMULSD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_VSUBSD,A_NOP,A_NOP,A_NOP
|
||||
)
|
||||
),
|
||||
( { vectorized/packed }
|
||||
{ because the logical packed single instructions have shorter op codes, we use always
|
||||
these
|
||||
}
|
||||
( { OS_F32 }
|
||||
A_NOP,A_NOP,A_VADDPS,A_NOP,A_VDIVPS,A_NOP,A_NOP,A_VMULPS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_VSUBPS,A_VXORPS,A_NOP,A_NOP
|
||||
),
|
||||
( { OS_F64 }
|
||||
A_NOP,A_NOP,A_VADDPD,A_NOP,A_VDIVPD,A_NOP,A_NOP,A_VMULPD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_VSUBPD,A_VXORPD,A_NOP,A_NOP
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
var
|
||||
resultreg : tregister;
|
||||
asmop : tasmop;
|
||||
begin
|
||||
{ this is an internally used procedure so the parameters have
|
||||
some constrains
|
||||
}
|
||||
if loc.size<>size then
|
||||
internalerror(2013061108);
|
||||
resultreg:=dst;
|
||||
{ deshuffle }
|
||||
//!!!
|
||||
if (shuffle<>nil) and not(shufflescalar(shuffle)) then
|
||||
begin
|
||||
internalerror(2013061107);
|
||||
end
|
||||
else if (shuffle=nil) then
|
||||
asmop:=opmm2asmop[1,size,op]
|
||||
else if shufflescalar(shuffle) then
|
||||
begin
|
||||
asmop:=opmm2asmop[0,size,op];
|
||||
{ no scalar operation available? }
|
||||
if asmop=A_NOP then
|
||||
begin
|
||||
{ do vectorized and shuffle finally }
|
||||
internalerror(2010060102);
|
||||
end;
|
||||
end
|
||||
else
|
||||
internalerror(2013061106);
|
||||
if asmop=A_NOP then
|
||||
internalerror(2013061105);
|
||||
case loc.loc of
|
||||
LOC_CREFERENCE,LOC_REFERENCE:
|
||||
begin
|
||||
make_simple_ref(current_asmdata.CurrAsmList,loc.reference);
|
||||
list.concat(taicpu.op_ref_reg_reg(asmop,S_NO,loc.reference,src,resultreg));
|
||||
end;
|
||||
LOC_CMMREGISTER,LOC_MMREGISTER:
|
||||
list.concat(taicpu.op_reg_reg_reg(asmop,S_NO,loc.register,src,resultreg));
|
||||
else
|
||||
internalerror(2013061104);
|
||||
end;
|
||||
{ shuffle }
|
||||
if resultreg<>dst then
|
||||
begin
|
||||
internalerror(2013061103);
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
procedure tcgx86.a_opmm_reg_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src1,src2,dst: tregister;shuffle : pmmshuffle);
|
||||
var
|
||||
l : tlocation;
|
||||
begin
|
||||
l.loc:=LOC_MMREGISTER;
|
||||
l.register:=src1;
|
||||
l.size:=size;
|
||||
opmm_loc_reg_reg(list,op,size,l,src2,dst,shuffle);
|
||||
end;
|
||||
|
||||
|
||||
procedure tcgx86.a_opmm_ref_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;const ref: treference; src,dst: tregister;shuffle : pmmshuffle);
|
||||
var
|
||||
l : tlocation;
|
||||
begin
|
||||
l.loc:=LOC_REFERENCE;
|
||||
l.reference:=ref;
|
||||
l.size:=size;
|
||||
opmm_loc_reg_reg(list,op,size,l,src,dst,shuffle);
|
||||
end;
|
||||
|
||||
|
||||
procedure tcgx86.opmm_loc_reg(list: TAsmList; Op: TOpCG; size : tcgsize;loc : tlocation;dst: tregister; shuffle : pmmshuffle);
|
||||
const
|
||||
opmm2asmop : array[0..1,OS_F32..OS_F64,topcg] of tasmop = (
|
||||
@ -1319,7 +1470,6 @@ unit cgx86;
|
||||
)
|
||||
)
|
||||
);
|
||||
|
||||
var
|
||||
resultreg : tregister;
|
||||
asmop : tasmop;
|
||||
|
@ -41,7 +41,10 @@ unit nx86add;
|
||||
procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
|
||||
|
||||
procedure second_cmpfloatsse;
|
||||
procedure second_cmpfloatavx;
|
||||
|
||||
procedure second_addfloatsse;
|
||||
procedure second_addfloatavx;
|
||||
public
|
||||
procedure second_addfloat;override;
|
||||
{$ifndef i8086}
|
||||
@ -794,6 +797,141 @@ unit nx86add;
|
||||
end;
|
||||
end;
|
||||
|
||||
procedure tx86addnode.second_addfloatavx;
|
||||
var
|
||||
op : topcg;
|
||||
sqr_sum : boolean;
|
||||
tmp : tnode;
|
||||
begin
|
||||
sqr_sum:=false;
|
||||
{$ifdef dummy}
|
||||
if (current_settings.fputype>=fpu_sse3) and
|
||||
use_vectorfpu(resultdef) and
|
||||
(nodetype in [addn,subn]) and
|
||||
(left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
|
||||
(right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
|
||||
begin
|
||||
sqr_sum:=true;
|
||||
tmp:=tinlinenode(left).left;
|
||||
tinlinenode(left).left:=nil;
|
||||
left.free;
|
||||
left:=tmp;
|
||||
|
||||
tmp:=tinlinenode(right).left;
|
||||
tinlinenode(right).left:=nil;
|
||||
right.free;
|
||||
right:=tmp;
|
||||
end;
|
||||
{$endif dummy}
|
||||
|
||||
pass_left_right;
|
||||
check_left_and_right_fpureg(false);
|
||||
|
||||
if (nf_swapped in flags) then
|
||||
{ can't use swapleftright if both are on the fpu stack, since then }
|
||||
{ both are "R_ST" -> nothing would change -> manually switch }
|
||||
if (left.location.loc = LOC_FPUREGISTER) and
|
||||
(right.location.loc = LOC_FPUREGISTER) then
|
||||
emit_none(A_FXCH,S_NO)
|
||||
else
|
||||
swapleftright;
|
||||
|
||||
case nodetype of
|
||||
addn :
|
||||
op:=OP_ADD;
|
||||
muln :
|
||||
op:=OP_MUL;
|
||||
subn :
|
||||
op:=OP_SUB;
|
||||
slashn :
|
||||
op:=OP_DIV;
|
||||
else
|
||||
internalerror(200312231);
|
||||
end;
|
||||
|
||||
location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
|
||||
|
||||
if sqr_sum then
|
||||
begin
|
||||
if nf_swapped in flags then
|
||||
swapleftright;
|
||||
|
||||
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
|
||||
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
|
||||
location:=left.location;
|
||||
if is_double(resultdef) then
|
||||
begin
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
|
||||
case nodetype of
|
||||
addn:
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
|
||||
subn:
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
|
||||
else
|
||||
internalerror(201108162);
|
||||
end;
|
||||
end
|
||||
else
|
||||
begin
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
|
||||
{ ensure that bits 64..127 contain valid values }
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
|
||||
{ the data is now in bits 0..32 and 64..95 }
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
|
||||
case nodetype of
|
||||
addn:
|
||||
begin
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
|
||||
end;
|
||||
subn:
|
||||
begin
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
|
||||
end;
|
||||
else
|
||||
internalerror(201108163);
|
||||
end;
|
||||
end
|
||||
end
|
||||
{ we can use only right as left operand if the operation is commutative }
|
||||
else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
|
||||
begin
|
||||
location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
|
||||
{ force floating point reg. location to be written to memory,
|
||||
we don't force it to mm register because writing to memory
|
||||
allows probably shorter code because there is no direct fpu->mm register
|
||||
copy instruction
|
||||
}
|
||||
if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
|
||||
hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
|
||||
cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
|
||||
left.location,
|
||||
right.location.register,
|
||||
location.register,
|
||||
mms_movescalar);
|
||||
end
|
||||
else
|
||||
begin
|
||||
if (nf_swapped in flags) then
|
||||
swapleftright;
|
||||
|
||||
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
|
||||
location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
|
||||
{ force floating point reg. location to be written to memory,
|
||||
we don't force it to mm register because writing to memory
|
||||
allows probably shorter code because there is no direct fpu->mm register
|
||||
copy instruction
|
||||
}
|
||||
if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
|
||||
hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
|
||||
cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
|
||||
right.location,
|
||||
left.location.register,
|
||||
location.register,
|
||||
mms_movescalar);
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
procedure tx86addnode.second_cmpfloatsse;
|
||||
var
|
||||
@ -860,6 +998,72 @@ unit nx86add;
|
||||
end;
|
||||
|
||||
|
||||
|
||||
procedure tx86addnode.second_cmpfloatavx;
|
||||
var
|
||||
op : tasmop;
|
||||
begin
|
||||
if is_single(left.resultdef) then
|
||||
op:=A_VCOMISS
|
||||
else if is_double(left.resultdef) then
|
||||
op:=A_VCOMISD
|
||||
else
|
||||
internalerror(200402222);
|
||||
pass_left_right;
|
||||
|
||||
location_reset(location,LOC_FLAGS,def_cgsize(resultdef));
|
||||
{ we can use only right as left operand if the operation is commutative }
|
||||
if (right.location.loc=LOC_MMREGISTER) then
|
||||
begin
|
||||
{ force floating point reg. location to be written to memory,
|
||||
we don't force it to mm register because writing to memory
|
||||
allows probably shorter code because there is no direct fpu->mm register
|
||||
copy instruction
|
||||
}
|
||||
if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
|
||||
hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
|
||||
case left.location.loc of
|
||||
LOC_REFERENCE,LOC_CREFERENCE:
|
||||
begin
|
||||
tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
|
||||
end;
|
||||
LOC_MMREGISTER,LOC_CMMREGISTER:
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
|
||||
else
|
||||
internalerror(200402221);
|
||||
end;
|
||||
if nf_swapped in flags then
|
||||
exclude(flags,nf_swapped)
|
||||
else
|
||||
include(flags,nf_swapped)
|
||||
end
|
||||
else
|
||||
begin
|
||||
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
|
||||
{ force floating point reg. location to be written to memory,
|
||||
we don't force it to mm register because writing to memory
|
||||
allows probably shorter code because there is no direct fpu->mm register
|
||||
copy instruction
|
||||
}
|
||||
if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
|
||||
hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
|
||||
case right.location.loc of
|
||||
LOC_REFERENCE,LOC_CREFERENCE:
|
||||
begin
|
||||
tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
|
||||
end;
|
||||
LOC_MMREGISTER,LOC_CMMREGISTER:
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
|
||||
else
|
||||
internalerror(200402223);
|
||||
end;
|
||||
end;
|
||||
location.resflags:=getresflags(true);
|
||||
end;
|
||||
|
||||
|
||||
procedure tx86addnode.second_opvector;
|
||||
var
|
||||
op : topcg;
|
||||
@ -912,7 +1116,10 @@ unit nx86add;
|
||||
begin
|
||||
if use_vectorfpu(resultdef) then
|
||||
begin
|
||||
second_addfloatsse;
|
||||
if UseAVX then
|
||||
second_addfloatavx
|
||||
else
|
||||
second_addfloatsse;
|
||||
exit;
|
||||
end;
|
||||
|
||||
@ -959,7 +1166,10 @@ unit nx86add;
|
||||
begin
|
||||
if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
|
||||
begin
|
||||
second_cmpfloatsse;
|
||||
if UseAVX then
|
||||
second_cmpfloatavx
|
||||
else
|
||||
second_cmpfloatsse;
|
||||
exit;
|
||||
end;
|
||||
|
||||
|
@ -276,14 +276,25 @@ implementation
|
||||
begin
|
||||
location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
|
||||
location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
|
||||
case location.size of
|
||||
OS_F32:
|
||||
op:=A_CVTSI2SS;
|
||||
OS_F64:
|
||||
op:=A_CVTSI2SD;
|
||||
else
|
||||
internalerror(2007120902);
|
||||
end;
|
||||
if UseAVX then
|
||||
case location.size of
|
||||
OS_F32:
|
||||
op:=A_VCVTSI2SS;
|
||||
OS_F64:
|
||||
op:=A_VCVTSI2SD;
|
||||
else
|
||||
internalerror(2007120902);
|
||||
end
|
||||
else
|
||||
case location.size of
|
||||
OS_F32:
|
||||
op:=A_CVTSI2SS;
|
||||
OS_F64:
|
||||
op:=A_CVTSI2SD;
|
||||
else
|
||||
internalerror(2007120902);
|
||||
end;
|
||||
|
||||
{ don't use left.location.size, because that one may be OS_32/OS_64
|
||||
if the lower bound of the orddef >= 0
|
||||
}
|
||||
@ -301,11 +312,19 @@ implementation
|
||||
begin
|
||||
href:=left.location.reference;
|
||||
tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,href);
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,opsize,href,location.register));
|
||||
if UseAVX then
|
||||
{ VCVTSI2.. requires a second source operand to copy bits 64..127 }
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(op,opsize,href,location.register,location.register))
|
||||
else
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,opsize,href,location.register));
|
||||
end;
|
||||
LOC_REGISTER,
|
||||
LOC_CREGISTER:
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,opsize,left.location.register,location.register));
|
||||
if UseAVX then
|
||||
{ VCVTSI2.. requires a second source operand to copy bits 64..127 }
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,opsize,left.location.register,location.register,location.register))
|
||||
else
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,opsize,left.location.register,location.register));
|
||||
end;
|
||||
end
|
||||
else
|
||||
|
@ -289,14 +289,24 @@ implementation
|
||||
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
|
||||
location_reset(location,LOC_REGISTER,OS_S64);
|
||||
location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
|
||||
case left.location.size of
|
||||
OS_F32:
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSS2SI,S_Q,left.location.register,location.register));
|
||||
OS_F64:
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSD2SI,S_Q,left.location.register,location.register));
|
||||
else
|
||||
internalerror(2007031402);
|
||||
end;
|
||||
if UseAVX then
|
||||
case left.location.size of
|
||||
OS_F32:
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSS2SI,S_Q,left.location.register,location.register));
|
||||
OS_F64:
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSD2SI,S_Q,left.location.register,location.register));
|
||||
else
|
||||
internalerror(2007031402);
|
||||
end
|
||||
else
|
||||
case left.location.size of
|
||||
OS_F32:
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSS2SI,S_Q,left.location.register,location.register));
|
||||
OS_F64:
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSD2SI,S_Q,left.location.register,location.register));
|
||||
else
|
||||
internalerror(2007031402);
|
||||
end;
|
||||
end
|
||||
else
|
||||
{$endif x86_64}
|
||||
@ -323,14 +333,24 @@ implementation
|
||||
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
|
||||
location_reset(location,LOC_REGISTER,OS_S64);
|
||||
location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
|
||||
case left.location.size of
|
||||
OS_F32:
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSS2SI,S_Q,left.location.register,location.register));
|
||||
OS_F64:
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSD2SI,S_Q,left.location.register,location.register));
|
||||
else
|
||||
internalerror(2007031401);
|
||||
end;
|
||||
if UseAVX then
|
||||
case left.location.size of
|
||||
OS_F32:
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSS2SI,S_Q,left.location.register,location.register));
|
||||
OS_F64:
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSD2SI,S_Q,left.location.register,location.register));
|
||||
else
|
||||
internalerror(2007031401);
|
||||
end
|
||||
else
|
||||
case left.location.size of
|
||||
OS_F32:
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSS2SI,S_Q,left.location.register,location.register));
|
||||
OS_F64:
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSD2SI,S_Q,left.location.register,location.register));
|
||||
else
|
||||
internalerror(2007031401);
|
||||
end;
|
||||
end
|
||||
else
|
||||
{$endif x86_64}
|
||||
@ -371,9 +391,18 @@ implementation
|
||||
if use_vectorfpu(resultdef) then
|
||||
begin
|
||||
secondpass(left);
|
||||
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
|
||||
location:=left.location;
|
||||
cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,left.location,left.location.register,mms_movescalar);
|
||||
location_reset(location,LOC_MMREGISTER,left.location.size);
|
||||
location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
|
||||
if UseAVX then
|
||||
begin
|
||||
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
|
||||
cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,left.location.register,left.location.register,location.register,mms_movescalar);
|
||||
end
|
||||
else
|
||||
begin
|
||||
cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
|
||||
cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,location.register,location.register,mms_movescalar);
|
||||
end;
|
||||
end
|
||||
else
|
||||
begin
|
||||
@ -389,15 +418,26 @@ implementation
|
||||
begin
|
||||
secondpass(left);
|
||||
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
|
||||
location:=left.location;
|
||||
case tfloatdef(resultdef).floattype of
|
||||
s32real:
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSS,S_XMM,location.register,location.register));
|
||||
s64real:
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSD,S_XMM,location.register,location.register));
|
||||
else
|
||||
internalerror(200510031);
|
||||
end;
|
||||
location_reset(location,LOC_MMREGISTER,left.location.size);
|
||||
location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
|
||||
if UseAVX then
|
||||
case tfloatdef(resultdef).floattype of
|
||||
s32real:
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSS,S_XMM,left.location.register,location.register,location.register));
|
||||
s64real:
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSD,S_XMM,left.location.register,location.register,location.register));
|
||||
else
|
||||
internalerror(200510031);
|
||||
end
|
||||
else
|
||||
case tfloatdef(resultdef).floattype of
|
||||
s32real:
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSS,S_XMM,left.location.register,location.register));
|
||||
s64real:
|
||||
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSD,S_XMM,left.location.register,location.register));
|
||||
else
|
||||
internalerror(200510031);
|
||||
end;
|
||||
end
|
||||
else
|
||||
begin
|
||||
|
@ -154,14 +154,11 @@ interface
|
||||
|
||||
if expectloc=LOC_MMREGISTER then
|
||||
begin
|
||||
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
|
||||
hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
|
||||
location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
|
||||
|
||||
{ make life of register allocator easier }
|
||||
location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
|
||||
cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),def_cgsize(resultdef),left.location.register,location.register,mms_movescalar);
|
||||
|
||||
reg:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
|
||||
|
||||
current_asmdata.getdatalabel(l1);
|
||||
new_section(current_asmdata.asmlists[al_typedconsts],sec_rodata_norel,l1.name,const_align(sizeof(pint)));
|
||||
@ -179,9 +176,16 @@ interface
|
||||
end;
|
||||
|
||||
reference_reset_symbol(href,l1,0,resultdef.alignment);
|
||||
reg:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
|
||||
cg.a_loadmm_ref_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),def_cgsize(resultdef),href,reg,mms_movescalar);
|
||||
|
||||
cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_XOR,left.location.size,reg,location.register,nil);
|
||||
if UseAVX then
|
||||
cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_XOR,left.location.size,reg,left.location.register,location.register,nil)
|
||||
else
|
||||
begin
|
||||
cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),def_cgsize(resultdef),left.location.register,location.register,mms_movescalar);
|
||||
cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_XOR,left.location.size,reg,location.register,nil);
|
||||
end;
|
||||
end
|
||||
else
|
||||
begin
|
||||
|
@ -134,143 +134,164 @@ implementation
|
||||
end;
|
||||
2,3 :
|
||||
begin
|
||||
{ We can handle opcodes with 2 and 3 operands the same way. The opcodes
|
||||
with 3 registers are shrd/shld, where the 3rd operand is const or CL,
|
||||
that doesn't need spilling.
|
||||
However, due to AT&T order inside the compiler, the 3rd operand is
|
||||
numbered 0, so look at operand no. 1 and 2 if we have 3 operands by
|
||||
adding a "n". }
|
||||
n:=0;
|
||||
if ops=3 then
|
||||
n:=1;
|
||||
if (oper[n+0]^.typ=top_reg) and
|
||||
(oper[n+1]^.typ=top_reg) and
|
||||
((getregtype(oper[n+0]^.reg)<>regtype) or
|
||||
(getregtype(oper[n+1]^.reg)<>regtype) or
|
||||
(get_alias(getsupreg(oper[n+0]^.reg))<>get_alias(getsupreg(oper[n+1]^.reg)))) then
|
||||
{ avx instruction?
|
||||
currently this rule is sufficient but it might be extended }
|
||||
if (ops=3) and (opcode<>A_SHRD) and (opcode<>A_SHLD) then
|
||||
begin
|
||||
if (getregtype(oper[n+0]^.reg)=regtype) and
|
||||
(get_alias(getsupreg(oper[n+0]^.reg))=orgreg) then
|
||||
replaceoper:=0+n
|
||||
else if (getregtype(oper[n+1]^.reg)=regtype) and
|
||||
(get_alias(getsupreg(oper[n+1]^.reg))=orgreg) then
|
||||
replaceoper:=1+n;
|
||||
{ avx instructions allow only the first operand (at&t counting) to be a register operand }
|
||||
{ all operands must be registers ... }
|
||||
if (oper[0]^.typ=top_reg) and
|
||||
(oper[1]^.typ=top_reg) and
|
||||
(oper[2]^.typ=top_reg) and
|
||||
{ but they must be different }
|
||||
((getregtype(oper[1]^.reg)<>regtype) or
|
||||
(get_alias(getsupreg(oper[0]^.reg))<>get_alias(getsupreg(oper[1]^.reg)))
|
||||
) and
|
||||
((getregtype(oper[2]^.reg)<>regtype) or
|
||||
(get_alias(getsupreg(oper[0]^.reg))<>get_alias(getsupreg(oper[2]^.reg)))
|
||||
) and
|
||||
(get_alias(getsupreg(oper[0]^.reg))=orgreg) then
|
||||
replaceoper:=0;
|
||||
end
|
||||
else if (oper[n+0]^.typ=top_reg) and
|
||||
(oper[n+1]^.typ=top_const) then
|
||||
else
|
||||
begin
|
||||
if (getregtype(oper[0+n]^.reg)=regtype) and
|
||||
(get_alias(getsupreg(oper[0+n]^.reg))=orgreg) then
|
||||
replaceoper:=0+n
|
||||
else
|
||||
internalerror(200704282);
|
||||
end
|
||||
else if (oper[n+0]^.typ=top_const) and
|
||||
(oper[n+1]^.typ=top_reg) then
|
||||
begin
|
||||
if (getregtype(oper[1+n]^.reg)=regtype) and
|
||||
(get_alias(getsupreg(oper[1+n]^.reg))=orgreg) then
|
||||
replaceoper:=1+n
|
||||
else
|
||||
internalerror(200704283);
|
||||
end;
|
||||
case replaceoper of
|
||||
0 :
|
||||
begin
|
||||
{ Some instructions don't allow memory references
|
||||
for source }
|
||||
case instr.opcode of
|
||||
A_BT,
|
||||
A_BTS,
|
||||
A_BTC,
|
||||
A_BTR,
|
||||
|
||||
{ shufp* would require 16 byte alignment for memory locations so we force the source
|
||||
operand into a register }
|
||||
A_SHUFPD,
|
||||
A_SHUFPS :
|
||||
replaceoper:=-1;
|
||||
{ We can handle opcodes with 2 and shrd/shld the same way, where the 3rd operand is const or CL,
|
||||
that doesn't need spilling.
|
||||
However, due to AT&T order inside the compiler, the 3rd operand is
|
||||
numbered 0, so look at operand no. 1 and 2 if we have 3 operands by
|
||||
adding a "n". }
|
||||
n:=0;
|
||||
if ops=3 then
|
||||
n:=1;
|
||||
if (oper[n+0]^.typ=top_reg) and
|
||||
(oper[n+1]^.typ=top_reg) and
|
||||
((getregtype(oper[n+0]^.reg)<>regtype) or
|
||||
(getregtype(oper[n+1]^.reg)<>regtype) or
|
||||
(get_alias(getsupreg(oper[n+0]^.reg))<>get_alias(getsupreg(oper[n+1]^.reg)))) then
|
||||
begin
|
||||
if (getregtype(oper[n+0]^.reg)=regtype) and
|
||||
(get_alias(getsupreg(oper[n+0]^.reg))=orgreg) then
|
||||
replaceoper:=0+n
|
||||
else if (getregtype(oper[n+1]^.reg)=regtype) and
|
||||
(get_alias(getsupreg(oper[n+1]^.reg))=orgreg) then
|
||||
replaceoper:=1+n;
|
||||
end
|
||||
else if (oper[n+0]^.typ=top_reg) and
|
||||
(oper[n+1]^.typ=top_const) then
|
||||
begin
|
||||
if (getregtype(oper[0+n]^.reg)=regtype) and
|
||||
(get_alias(getsupreg(oper[0+n]^.reg))=orgreg) then
|
||||
replaceoper:=0+n
|
||||
else
|
||||
internalerror(200704282);
|
||||
end
|
||||
else if (oper[n+0]^.typ=top_const) and
|
||||
(oper[n+1]^.typ=top_reg) then
|
||||
begin
|
||||
if (getregtype(oper[1+n]^.reg)=regtype) and
|
||||
(get_alias(getsupreg(oper[1+n]^.reg))=orgreg) then
|
||||
replaceoper:=1+n
|
||||
else
|
||||
internalerror(200704283);
|
||||
end;
|
||||
end;
|
||||
1 :
|
||||
begin
|
||||
{ Some instructions don't allow memory references
|
||||
for destination }
|
||||
case instr.opcode of
|
||||
A_CMOVcc,
|
||||
A_MOVZX,
|
||||
A_MOVSX,
|
||||
A_MOVSXD,
|
||||
A_MULSS,
|
||||
A_MULSD,
|
||||
A_SUBSS,
|
||||
A_SUBSD,
|
||||
A_ADDSD,
|
||||
A_ADDSS,
|
||||
A_DIVSD,
|
||||
A_DIVSS,
|
||||
A_SHLD,
|
||||
A_SHRD,
|
||||
A_COMISD,
|
||||
A_COMISS,
|
||||
A_CVTDQ2PD,
|
||||
A_CVTDQ2PS,
|
||||
A_CVTPD2DQ,
|
||||
A_CVTPD2PI,
|
||||
A_CVTPD2PS,
|
||||
A_CVTPI2PD,
|
||||
A_CVTPS2DQ,
|
||||
A_CVTPS2PD,
|
||||
A_CVTSD2SI,
|
||||
A_CVTSD2SS,
|
||||
A_CVTSI2SD,
|
||||
A_CVTSS2SD,
|
||||
A_CVTTPD2PI,
|
||||
A_CVTTPD2DQ,
|
||||
A_CVTTPS2DQ,
|
||||
A_CVTTSD2SI,
|
||||
A_CVTPI2PS,
|
||||
A_CVTPS2PI,
|
||||
A_CVTSI2SS,
|
||||
A_CVTSS2SI,
|
||||
A_CVTTPS2PI,
|
||||
A_CVTTSS2SI,
|
||||
A_IMUL,
|
||||
A_XORPD,
|
||||
A_XORPS,
|
||||
A_ORPD,
|
||||
A_ORPS,
|
||||
A_ANDPD,
|
||||
A_ANDPS,
|
||||
A_UNPCKLPS,
|
||||
A_UNPCKHPS,
|
||||
A_SHUFPD,
|
||||
A_SHUFPS:
|
||||
case replaceoper of
|
||||
0 :
|
||||
begin
|
||||
{ Some instructions don't allow memory references
|
||||
for source }
|
||||
case instr.opcode of
|
||||
A_BT,
|
||||
A_BTS,
|
||||
A_BTC,
|
||||
A_BTR,
|
||||
|
||||
replaceoper:=-1;
|
||||
{ shufp* would require 16 byte alignment for memory locations so we force the source
|
||||
operand into a register }
|
||||
A_SHUFPD,
|
||||
A_SHUFPS :
|
||||
replaceoper:=-1;
|
||||
end;
|
||||
end;
|
||||
1 :
|
||||
begin
|
||||
{ Some instructions don't allow memory references
|
||||
for destination }
|
||||
case instr.opcode of
|
||||
A_CMOVcc,
|
||||
A_MOVZX,
|
||||
A_MOVSX,
|
||||
A_MOVSXD,
|
||||
A_MULSS,
|
||||
A_MULSD,
|
||||
A_SUBSS,
|
||||
A_SUBSD,
|
||||
A_ADDSD,
|
||||
A_ADDSS,
|
||||
A_DIVSD,
|
||||
A_DIVSS,
|
||||
A_SHLD,
|
||||
A_SHRD,
|
||||
A_COMISD,
|
||||
A_COMISS,
|
||||
A_CVTDQ2PD,
|
||||
A_CVTDQ2PS,
|
||||
A_CVTPD2DQ,
|
||||
A_CVTPD2PI,
|
||||
A_CVTPD2PS,
|
||||
A_CVTPI2PD,
|
||||
A_CVTPS2DQ,
|
||||
A_CVTPS2PD,
|
||||
A_CVTSD2SI,
|
||||
A_CVTSD2SS,
|
||||
A_CVTSI2SD,
|
||||
A_CVTSS2SD,
|
||||
A_CVTTPD2PI,
|
||||
A_CVTTPD2DQ,
|
||||
A_CVTTPS2DQ,
|
||||
A_CVTTSD2SI,
|
||||
A_CVTPI2PS,
|
||||
A_CVTPS2PI,
|
||||
A_CVTSI2SS,
|
||||
A_CVTSS2SI,
|
||||
A_CVTTPS2PI,
|
||||
A_CVTTSS2SI,
|
||||
A_IMUL,
|
||||
A_XORPD,
|
||||
A_XORPS,
|
||||
A_ORPD,
|
||||
A_ORPS,
|
||||
A_ANDPD,
|
||||
A_ANDPS,
|
||||
A_UNPCKLPS,
|
||||
A_UNPCKHPS,
|
||||
A_SHUFPD,
|
||||
A_SHUFPS:
|
||||
|
||||
replaceoper:=-1;
|
||||
{$ifdef x86_64}
|
||||
A_MOV:
|
||||
{ 64 bit constants can only be moved into registers }
|
||||
if (oper[0]^.typ=top_const) and
|
||||
(oper[1]^.typ=top_reg) and
|
||||
((oper[0]^.val<low(longint)) or
|
||||
(oper[0]^.val>high(longint))) then
|
||||
replaceoper:=-1;
|
||||
A_MOV:
|
||||
{ 64 bit constants can only be moved into registers }
|
||||
if (oper[0]^.typ=top_const) and
|
||||
(oper[1]^.typ=top_reg) and
|
||||
((oper[0]^.val<low(longint)) or
|
||||
(oper[0]^.val>high(longint))) then
|
||||
replaceoper:=-1;
|
||||
{$endif x86_64}
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
{$ifdef x86_64}
|
||||
{$ifdef x86_64}
|
||||
{ 32 bit operations on 32 bit registers on x86_64 can result in
|
||||
zeroing the upper 32 bits of the register. This does not happen
|
||||
with memory operations, so we have to perform these calculations
|
||||
in registers. }
|
||||
if (instr.opsize=S_L) then
|
||||
replaceoper:=-1;
|
||||
{$endif x86_64}
|
||||
{$endif x86_64}
|
||||
|
||||
{ Replace register with spill reference }
|
||||
if replaceoper<>-1 then
|
||||
@ -287,6 +308,10 @@ implementation
|
||||
opcode:=A_MOVSS;
|
||||
A_MOVAPD:
|
||||
opcode:=A_MOVSD;
|
||||
A_VMOVAPS:
|
||||
opcode:=A_VMOVSS;
|
||||
A_VMOVAPD:
|
||||
opcode:=A_VMOVSD;
|
||||
end;
|
||||
result:=true;
|
||||
end;
|
||||
|
@ -3453,22 +3453,22 @@ void \326\1\xA7 X86_64
|
||||
|
||||
|
||||
[VADDPD]
|
||||
(Ch_All, Ch_None, Ch_None)
|
||||
(Ch_Wop3, Ch_Rop2, Ch_Rop1)
|
||||
xmmreg,xmmreg,xmmrm \361\362\370\1\x58\75\120 AVX,SANDYBRIDGE
|
||||
ymmreg,ymmreg,ymmrm \361\362\364\370\1\x58\75\120 AVX,SANDYBRIDGE
|
||||
|
||||
[VADDPS]
|
||||
(Ch_All, Ch_None, Ch_None)
|
||||
(Ch_Wop3, Ch_Rop2, Ch_Rop1)
|
||||
xmmreg,xmmreg,xmmrm \362\370\1\x58\75\120 AVX,SANDYBRIDGE
|
||||
ymmreg,ymmreg,ymmrm \362\364\370\1\x58\75\120 AVX,SANDYBRIDGE
|
||||
|
||||
[VADDSD]
|
||||
(Ch_All, Ch_None, Ch_None)
|
||||
(Ch_Wop3, Ch_Rop2, Ch_Rop1)
|
||||
xmmreg,xmmreg,mem64 \334\362\370\1\x58\75\120 AVX,SANDYBRIDGE
|
||||
xmmreg,xmmreg,xmmreg \334\362\370\1\x58\75\120 AVX,SANDYBRIDGE
|
||||
|
||||
[VADDSS]
|
||||
(Ch_All, Ch_None, Ch_None)
|
||||
(Ch_Wop3, Ch_Rop2, Ch_Rop1)
|
||||
xmmreg,xmmreg,mem32 \333\362\370\1\x58\75\120 AVX,SANDYBRIDGE
|
||||
xmmreg,xmmreg,xmmreg \333\362\370\1\x58\75\120 AVX,SANDYBRIDGE
|
||||
|
||||
@ -3919,7 +3919,7 @@ rm64,xmmreg \361\362\363\370\1\x7E\101 AVX,SA
|
||||
xmmreg,rm64 \361\362\363\370\1\x6E\110 AVX,SANDYBRIDGE
|
||||
|
||||
[VMOVSD]
|
||||
(Ch_All, Ch_None, Ch_None)
|
||||
(Ch_Wop3, Ch_Rop2, Ch_Rop1)
|
||||
xmmreg,xmmreg,xmmreg \334\362\370\1\x10\75\120 AVX,SANDYBRIDGE
|
||||
xmmreg,mem64 \334\362\370\1\x10\110 AVX,SANDYBRIDGE
|
||||
xmmreg,xmmreg,xmmreg \334\362\370\1\x11\75\102 AVX,SANDYBRIDGE
|
||||
@ -3936,7 +3936,7 @@ xmmreg,xmmrm \333\362\370\1\x12\110 AVX,SA
|
||||
ymmreg,ymmrm \333\362\364\370\1\x12\110 AVX,SANDYBRIDGE
|
||||
|
||||
[VMOVSS]
|
||||
(Ch_All, Ch_None, Ch_None)
|
||||
(Ch_Wop3, Ch_Rop2, Ch_Rop1)
|
||||
xmmreg,xmmreg,xmmreg \333\362\370\1\x10\75\120 AVX,SANDYBRIDGE
|
||||
xmmreg,mem64 \333\362\370\1\x10\110 AVX,SANDYBRIDGE
|
||||
xmmreg,xmmreg,xmmreg \333\362\370\1\x11\75\102 AVX,SANDYBRIDGE
|
||||
@ -3961,22 +3961,22 @@ ymmrm,ymmreg \362\364\370\1\x11\101 AVX,SA
|
||||
xmmreg,xmmreg,xmmrm,imm8 \361\362\372\1\x42\75\120\27 AVX,SANDYBRIDGE
|
||||
|
||||
[VMULPD]
|
||||
(Ch_All, Ch_None, Ch_None)
|
||||
(Ch_Wop3, Ch_Rop2, Ch_Rop1)
|
||||
xmmreg,xmmreg,xmmrm \361\362\370\1\x59\75\120 AVX,SANDYBRIDGE
|
||||
ymmreg,ymmreg,ymmrm \361\362\364\370\1\x59\75\120 AVX,SANDYBRIDGE
|
||||
|
||||
[VMULPS]
|
||||
(Ch_All, Ch_None, Ch_None)
|
||||
(Ch_Wop3, Ch_Rop2, Ch_Rop1)
|
||||
xmmreg,xmmreg,xmmrm \362\370\1\x59\75\120 AVX,SANDYBRIDGE
|
||||
ymmreg,ymmreg,ymmrm \362\364\370\1\x59\75\120 AVX,SANDYBRIDGE
|
||||
|
||||
[VMULSD]
|
||||
(Ch_All, Ch_None, Ch_None)
|
||||
(Ch_Wop3, Ch_Rop2, Ch_Rop1)
|
||||
xmmreg,xmmreg,mem64 \334\362\370\1\x59\75\120 AVX,SANDYBRIDGE
|
||||
xmmreg,xmmreg,xmmreg \334\362\370\1\x59\75\120 AVX,SANDYBRIDGE
|
||||
|
||||
[VMULSS]
|
||||
(Ch_All, Ch_None, Ch_None)
|
||||
(Ch_Wop3, Ch_Rop2, Ch_Rop1)
|
||||
xmmreg,xmmreg,mem32 \333\362\370\1\x59\75\120 AVX,SANDYBRIDGE
|
||||
xmmreg,xmmreg,xmmreg \333\362\370\1\x59\75\120 AVX,SANDYBRIDGE
|
||||
|
||||
|
@ -51,7 +51,8 @@ Type
|
||||
fpu_ssse3,
|
||||
fpu_sse41,
|
||||
fpu_sse42,
|
||||
fpu_avx
|
||||
fpu_avx,
|
||||
fpu_avx2
|
||||
);
|
||||
|
||||
Const
|
||||
@ -86,11 +87,14 @@ Const
|
||||
'SSSE3',
|
||||
'SSE41',
|
||||
'SSE42',
|
||||
'AVX'
|
||||
'AVX',
|
||||
'AVX2'
|
||||
);
|
||||
|
||||
sse_singlescalar : set of tfputype = [fpu_sse64,fpu_sse3];
|
||||
sse_doublescalar : set of tfputype = [fpu_sse64,fpu_sse3];
|
||||
sse_singlescalar = [fpu_sse64..fpu_avx2];
|
||||
sse_doublescalar = [fpu_sse64..fpu_avx2];
|
||||
|
||||
fpu_avx_instructionsets = [fpu_avx,fpu_avx2];
|
||||
|
||||
{ Supported optimizations, only used for information }
|
||||
supported_optimizerswitches = genericlevel1optimizerswitches+
|
||||
|
@ -685,6 +685,10 @@
|
||||
(Ch: (Ch_RRAX, Ch_WMemEDI, Ch_RWRDI)),
|
||||
(Ch: (Ch_WRAX, Ch_RWRSI, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
|
||||
(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
|
||||
(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
|
||||
(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
@ -772,21 +776,17 @@
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
|
||||
(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
|
||||
(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
|
||||
(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
(Ch: (Ch_All, Ch_None, Ch_None)),
|
||||
|
Loading…
Reference in New Issue
Block a user