mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-23 16:09:43 +02:00
546 lines
19 KiB
Plaintext
546 lines
19 KiB
Plaintext
; SSE
|
|
movss(out r0: xmm; r1: ptr32)
|
|
movaps(out r0: xmm; r1: ptr128)
|
|
movups(out r0: xmm; r1: ptr128)
|
|
|
|
movss[to_mem](r0: ptr32; r1: xmm)
|
|
movaps[to_mem](r0: ptr128; r1: xmm)
|
|
movups[to_mem](r0: ptr128; r1: xmm)
|
|
|
|
movss[to_val](out r0: f32; r1: xmm)
|
|
movss[from_val](out r0: xmm; r1: f32)
|
|
|
|
movlps(var r0: xmm; r1: ptr64)
|
|
movhps(var r0: xmm; r1: ptr64)
|
|
movlps[to_mem](r0: ptr64; r1: xmm)
|
|
movhps[to_mem](r0: ptr64; r1: xmm)
|
|
|
|
movlhps(var r0: xmm; r1: xmm)
|
|
movhlps(var r0: xmm; r1: xmm)
|
|
|
|
addss(var r0: f32; r1: f32)
|
|
addss[from_mem](var r0: f32; r1: ptr32)
|
|
subss(var r0: f32; r1: f32)
|
|
subss[from_mem](var r0: f32; r1: ptr32)
|
|
mulss(var r0: f32; r1: f32)
|
|
mulss[from_mem](var r0: f32; r1: ptr32)
|
|
divss(var r0: f32; r1: f32)
|
|
divss[from_mem](var r0: f32; r1: ptr32)
|
|
rcpss(var r0: f32; r1: f32)
|
|
rcpss[from_mem](var r0: f32; r1: ptr32)
|
|
sqrtss(var r0: f32; r1: f32)
|
|
sqrtss[from_mem](var r0: f32; r1: ptr32)
|
|
maxss(var r0: f32; r1: f32)
|
|
maxss[from_mem](var r0: f32; r1: ptr32)
|
|
minss(var r0: f32; r1: f32)
|
|
minss[from_mem](var r0: f32; r1: ptr32)
|
|
rsqrtss(var r0: f32; r1: f32)
|
|
rsqrtss[from_mem](var r0: f32; r1: ptr32)
|
|
|
|
addps(var r0: xmm; r1: xmm)
|
|
addps[from_mem](var r0: xmm; r1: ptr128)
|
|
subps(var r0: xmm; r1: xmm)
|
|
subps[from_mem](var r0: xmm; r1: ptr128)
|
|
mulps(var r0: xmm; r1: xmm)
|
|
mulps[from_mem](var r0: xmm; r1: ptr128)
|
|
divps(var r0: xmm; r1: xmm)
|
|
divps[from_mem](var r0: xmm; r1: ptr128)
|
|
rcpps(var r0: xmm; r1: xmm)
|
|
rcpps[from_mem](var r0: xmm; r1: ptr128)
|
|
sqrtps(var r0: xmm; r1: xmm)
|
|
sqrtps[from_mem](var r0: xmm; r1: ptr128)
|
|
maxps(var r0: xmm; r1: xmm)
|
|
maxps[from_mem](var r0: xmm; r1: ptr128)
|
|
minps(var r0: xmm; r1: xmm)
|
|
minps[from_mem](var r0: xmm; r1: ptr128)
|
|
rsqrtps(var r0: xmm; r1: xmm)
|
|
rsqrtps[from_mem](var r0: xmm; r1: ptr128)
|
|
|
|
andps(var r0: xmm; r1: xmm)
|
|
andps[from_mem](var r0: xmm; r1: ptr128)
|
|
orps(var r0: xmm; r1: xmm)
|
|
orps[from_mem](var r0: xmm; r1: ptr128)
|
|
xorps(var r0: xmm; r1: xmm)
|
|
xorps[from_mem](var r0: xmm; r1: ptr128)
|
|
andnps(var r0: xmm; r1: xmm)
|
|
andnps[from_mem](var r0: xmm; r1: ptr128)
|
|
|
|
cmpss(var r0: f32; r1: f32; imm: i32) (imm in [0..7])
|
|
cmpss[from_mem](var r0: f32; r1: ptr32; imm: i32) (imm in [0..7])
|
|
cmpps(var r0: xmm; r1: xmm; imm: i32) (imm in [0..7])
|
|
cmpps[from_mem](var r0: xmm; r1: ptr128; imm: i32) (imm in [0..7])
|
|
|
|
shufps(var r0: xmm; r1: xmm; imm: i32) (imm in [0..$ff])
|
|
shufps[from_mem](var r0: xmm; r1: ptr128; imm: i32) (imm in [0..$ff])
|
|
unpckhps(var r0: xmm; r1: xmm)
|
|
unpckhps[from_mem](var r0: xmm; r1: ptr128)
|
|
unpcklps(var r0: xmm; r1: xmm)
|
|
unpcklps[from_mem](var r0: xmm; r1: ptr128)
|
|
|
|
cvtsi2ss(var r0: f32; r1: reg)
|
|
cvtsi2ss[from_mem](var r0: f32; r1: ptr32)
|
|
cvtss2si(out r0: reg; r1: f32)
|
|
cvtss2si[from_mem](out r0: reg; r1: ptr32)
|
|
cvttss2si(out r0: reg; r1: f32)
|
|
cvttss2si[from_mem](out r0: reg; r1: ptr32)
|
|
|
|
cvtpi2ps(var r0: xmm; r1: mm)
|
|
cvtpi2ps[from_mem](var r0: xmm; r1: ptr64)
|
|
cvtps2pi(out r0: mm; r1: xmm)
|
|
cvtps2pi[from_mem](out r0: mm; r1: ptr64)
|
|
cvttps2pi(out r0: mm; r1: xmm)
|
|
cvttps2pi[from_mem](out r0: mm; r1: ptr64)
|
|
|
|
pmulhuw[mmx](var r0: mm; r1: mm)
|
|
pmulhuw[mmx_from_mem](var r0: mm; r1: ptr64)
|
|
psadbw[mmx](var r0: mm; r1: mm)
|
|
psadbw[mmx_from_mem](var r0: mm; r1: ptr64)
|
|
pavgb[mmx](var r0: mm; r1: mm)
|
|
pavgb[mmx_from_mem](var r0: mm; r1: ptr64)
|
|
pavgw[mmx](var r0: mm; r1: mm)
|
|
pavgw[mmx_from_mem](var r0: mm; r1: ptr64)
|
|
pmaxub[mmx](var r0: mm; r1: mm)
|
|
pmaxub[mmx_from_mem](var r0: mm; r1: ptr64)
|
|
pminub[mmx](var r0: mm; r1: mm)
|
|
pminub[mmx_from_mem](var r0: mm; r1: ptr64)
|
|
pmaxsw[mmx](var r0: mm; r1: mm)
|
|
pmaxsw[mmx_from_mem](var r0: mm; r1: ptr64)
|
|
pminsw[mmx](var r0: mm; r1: mm)
|
|
pminsw[mmx_from_mem](var r0: mm; r1: ptr64)
|
|
pextrw[mmx](out r0: reg; r1: mm; imm: i32) (imm in [0..3])
|
|
pinsrw[mmx](var r0: mm; r1: reg; imm: i32) (imm in [0..3])
|
|
|
|
pmovmskb[mmx](out r0: reg; r1: mm)
|
|
pshufw(out r0: mm; r1: mm; imm: i32) (imm in [0..$ff])
|
|
pshufw[from_mem](out r0: mm; r1: ptr64; imm: i32) (imm in [0..$ff])
|
|
|
|
; SSE2 data movement instructions
|
|
movapd(out r0: xmm; r1: ptr32)
|
|
movapd[to_mem](r0: ptr32; r1: xmm)
|
|
movntpd[to_mem](r0: ptr32; r1: xmm)
|
|
movhpd(out r0: xmm; r1: ptr32)
|
|
movhpd[to_mem](r0: ptr32; r1: xmm)
|
|
movlpd(out r0: xmm; r1: ptr32)
|
|
movlpd[to_mem](r0: ptr32; r1: xmm)
|
|
movupd(out r0: xmm; r1: ptr32)
|
|
movupd[to_mem](r0: ptr32; r1: xmm)
|
|
movmskpd(out r0: r32; r1: xmm)
|
|
movsd[from_mem](out r0: xmm; r1: ptr32)
|
|
movsd[to_mem](r0: ptr32; r1: xmm)
|
|
|
|
movsd[to_val](out r0: f64; r1: xmm)
|
|
movsd[from_val](out r0: xmm; r1: f64)
|
|
|
|
; SSE2 packed arithmetic instructions
|
|
addpd(var r0: xmm; r1: xmm)
|
|
addpd[from_mem](var r0: xmm; r1: ptr128)
|
|
addsd(var r0: f64; r1: f64)
|
|
addsd[from_mem](var r0: f64; r1: ptr64)
|
|
divpd(var r0: xmm; r1: xmm)
|
|
divpd[from_mem](var r0: xmm; r1: ptr128)
|
|
divsd(var r0: f64; r1: f64)
|
|
divsd[from_mem](var r0: f64; r1: ptr64)
|
|
maxpd(var r0: xmm; r1: xmm)
|
|
maxpd[from_mem](var r0: xmm; r1: ptr128)
|
|
maxsd(var r0: f64; r1: xmm)
|
|
maxsd[from_mem](var r0: xmm; r1: ptr64)
|
|
minpd(var r0: xmm; r1: xmm)
|
|
minpd[from_mem](var r0: xmm; r1: ptr128)
|
|
minsd(var r0: f64; r1: f64)
|
|
minsd[from_mem](var r0: f64; r1: ptr64)
|
|
mulpd(var r0: xmm; r1: xmm)
|
|
mulpd[from_mem](var r0: xmm; r1: ptr128)
|
|
mulsd(var r0: f64; r1: xmm)
|
|
mulsd[from_mem](var r0: f64; r1: ptr64)
|
|
sqrtpd(out r0: xmm; r1: xmm)
|
|
sqrtpd[from_mem](out r0: xmm; r1: ptr128)
|
|
sqrtsd(out r0: f64; r1: f64)
|
|
sqrtsd[from_mem](out r0: f64; r1: ptr64)
|
|
subpd(var r0: xmm; r1: xmm)
|
|
subpd[from_mem](var r0: xmm; r1: ptr128)
|
|
subsd(var r0: f64; r1: f64)
|
|
subsd[from_mem](var r0: f64; r1: ptr64)
|
|
|
|
; SSE2 logical instructions
|
|
andpd(var r0: xmm; r1: xmm)
|
|
andpd[from_mem](var r0: xmm; r1: ptr128)
|
|
andnpd(var r0: xmm; r1: xmm)
|
|
andnpd[from_mem](var r0: xmm; r1: ptr128)
|
|
orpd(var r0: xmm; r1: xmm)
|
|
orpd[from_mem](var r0: xmm; r1: ptr128)
|
|
xorpd(var r0: xmm; r1: xmm)
|
|
xorpd[from_mem](var r0: xmm; r1: ptr128)
|
|
|
|
; SSE2 compare instructions
|
|
cmppd(var r0: xmm; r1: xmm; imm: i32)
|
|
cmppd[from_mem](var r0: xmm; r1: ptr128; imm: i32)
|
|
cmpsd(var r0: f64; r1: f64; imm: i32)
|
|
cmpsd[from_mem](var r0: f64; r1: ptr64; imm: i32)
|
|
comisd(var r0: f64; r1: f64)
|
|
comisd[from_mem](var r0: f64; r1: ptr64)
|
|
ucomisd(var r0: f64; r1: f64)
|
|
ucomisd[from_mem](var r0: f64; r1: ptr64)
|
|
|
|
; SSE2 shuffle and unpack instructions
|
|
shufpd(var r0: xmm; r1: xmm; imm: i32)
|
|
shufpd[from_mem](var r0: xmm; r1: ptr128; imm: i32)
|
|
unpckhpd(var r0: xmm; r1: xmm)
|
|
unpckhpd[from_mem](var r0: xmm; r1: ptr128)
|
|
unpcklpd(var r0: xmm; r1: xmm)
|
|
unpcklpd[from_mem](var r0: xmm; r1: ptr128)
|
|
|
|
; SSE2 conversion instructions
|
|
cvtdq2pd(var r0: xmm; r1: xmm)
|
|
cvtdq2pd[from_mem](var r0: xmm; r1: ptr64)
|
|
cvtdq2ps(var r0: xmm; r1: xmm)
|
|
cvtdq2ps[from_mem](var r0: xmm; r1: ptr128)
|
|
cvtpd2dq(var r0: xmm; r1: xmm)
|
|
cvtpd2dq[from_mem](var r0: xmm; r1: ptr128)
|
|
cvtpd2pi(var r0: mm; r1: xmm)
|
|
cvtpd2pi[from_mem](var r0: mm; r1: ptr128)
|
|
cvtpd2ps(var r0: xmm; r1: xmm)
|
|
cvtpd2ps[from_mem](var r0: xmm; r1: ptr128)
|
|
cvtpi2pd(var r0: xmm; r1: mm)
|
|
cvtpi2pd[from_mem](var r0: xmm; r1: ptr64)
|
|
cvtps2dq(var r0: xmm; r1: xmm)
|
|
cvtps2dq[from_mem](var r0: xmm; r1: ptr128)
|
|
cvtps2pd(var r0: xmm; r1: xmm)
|
|
cvtps2pd[from_mem](var r0: xmm; r1: ptr64)
|
|
cvtsd2si(var r0: sreg; r1: xmm)
|
|
cvtsd2si[from_mem](var r0: sreg; r1: ptr64)
|
|
cvtsd2ss(var r0: xmm; r1: xmm)
|
|
cvtsd2ss[from_mem](var r0: xmm; r1: ptr64)
|
|
cvtsi2sd(var r0: f64; r1: r32)
|
|
cvtsi2sd[from_mem](var r0: f64; r1: ptr32)
|
|
cvtss2sd(var r0: f64; r1: f64)
|
|
cvtss2sd[from_mem](var r0: f64; r1: ptr32)
|
|
cvttpd2dq(var r0: xmm; r1: xmm)
|
|
cvttpd2dq[from_mem](var r0: xmm; r1: ptr128)
|
|
cvttpd2pi(var r0: mm; r1: xmm)
|
|
cvttpd2pi[from_mem](var r0: mm; r1: ptr128)
|
|
cvttps2dq(var r0: xmm; r1: xmm)
|
|
cvttps2dq[from_mem](var r0: xmm; r1: ptr128)
|
|
cvttsd2si(var r0: sreg; r1: xmm)
|
|
cvttsd2si[from_mem](var r0: sreg; r1: ptr64)
|
|
|
|
; SSE2 MMX-like instructions
|
|
movd[from_reg](out r0: xmm; r1: r32)
|
|
movd[from_mem](out r0: xmm; r1: ptr32)
|
|
movd[to_reg](out r0: r32; r1: xmm)
|
|
movd[to_mem](r0: ptr32; r1: xmm)
|
|
movq[from_mem](out r0: xmm; r1: ptr64)
|
|
movq[to_mem](r0: ptr64; r1: xmm)
|
|
pmovmskb(var r0: r32; r1: xmm)
|
|
pextrw[sse2](out r0: r16; r1: xmm; imm: i32)
|
|
pinsrw[sse2](var r0: xmm; r1: r32; imm: i32)
|
|
pinsrw[from_mem](var r0: xmm; r1: ptr16; imm: i32)
|
|
packssdw(var r0: xmm; r1: xmm)
|
|
packssdw[from_mem](var r0: xmm; r1: ptr128)
|
|
packsswb(var r0: xmm; r1: xmm)
|
|
packsswb[from_mem](var r0: xmm; r1: ptr128)
|
|
packuswb(var r0: xmm; r1: xmm)
|
|
packuswb[from_mem](var r0: xmm; r1: ptr128)
|
|
paddb(var r0: xmm; r1: xmm)
|
|
paddb[from_mem](var r0: xmm; r1: ptr128)
|
|
paddw(var r0: xmm; r1: xmm)
|
|
paddw[from_mem](var r0: xmm; r1: ptr128)
|
|
paddd(var r0: xmm; r1: xmm)
|
|
paddd[from_mem](var r0: xmm; r1: ptr128)
|
|
paddq(var r0: xmm; r1: xmm)
|
|
paddq[from_mem](var r0: xmm; r1: ptr128)
|
|
paddsb(var r0: xmm; r1: xmm)
|
|
paddsb[from_mem](var r0: xmm; r1: ptr128)
|
|
paddsw(var r0: xmm; r1: xmm)
|
|
paddsw[from_mem](var r0: xmm; r1: ptr128)
|
|
paddusb(var r0: xmm; r1: xmm)
|
|
paddusb[from_mem](var r0: xmm; r1: ptr128)
|
|
paddusw(var r0: xmm; r1: xmm)
|
|
paddusw[from_mem](var r0: xmm; r1: ptr128)
|
|
pand(var r0: xmm; r1: xmm)
|
|
pand[from_mem](var r0: xmm; r1: ptr128)
|
|
pandn(var r0: xmm; r1: xmm)
|
|
pandn[from_mem](var r0: xmm; r1: ptr128)
|
|
por(var r0: xmm; r1: xmm)
|
|
por[from_mem](var r0: xmm; r1: ptr128)
|
|
pxor(var r0: xmm; r1: xmm)
|
|
pxor[from_mem](var r0: xmm; r1: ptr128)
|
|
pcmpeqb(var r0: xmm; r1: xmm)
|
|
pcmpeqb[from_mem](var r0: xmm; r1: ptr128)
|
|
pcmpeqw(var r0: xmm; r1: xmm)
|
|
pcmpeqw[from_mem](var r0: xmm; r1: ptr128)
|
|
pcmpeqd(var r0: xmm; r1: xmm)
|
|
pcmpeqd[from_mem](var r0: xmm; r1: ptr128)
|
|
pcmpgtb(var r0: xmm; r1: xmm)
|
|
pcmpgtb[from_mem](var r0: xmm; r1: ptr128)
|
|
pcmpgtw(var r0: xmm; r1: xmm)
|
|
pcmpgtw[from_mem](var r0: xmm; r1: ptr128)
|
|
pcmpgtd(var r0: xmm; r1: xmm)
|
|
pcmpgtd[from_mem](var r0: xmm; r1: ptr128)
|
|
pmullw(var r0: xmm; r1: xmm)
|
|
pmullw[from_mem](var r0: xmm; r1: ptr128)
|
|
pmulhw(var r0: xmm; r1: xmm)
|
|
pmulhw[from_mem](var r0: xmm; r1: ptr128)
|
|
pmulhuw[sse2](var r0: xmm; r1: xmm)
|
|
pmulhuw[from_mem](var r0: xmm; r1: ptr128)
|
|
pmuludq(var r0: xmm; r1: xmm)
|
|
pmuludq[from_mem](var r0: xmm; r1: ptr128)
|
|
psllw[sse2](var r0: xmm; r1: xmm)
|
|
psllw[from_mem](var r0: xmm; r1: ptr128)
|
|
psllw[sse2_imm](var r0: xmm; imm: i32)
|
|
pslld[sse2](var r0: xmm; r1: xmm)
|
|
pslld[from_mem](var r0: xmm; r1: ptr128)
|
|
pslld[sse2_imm](var r0: xmm; imm: i32)
|
|
psllq[sse2](var r0: xmm; r1: xmm)
|
|
psllq[from_mem](var r0: xmm; r1: ptr128)
|
|
psllq[sse2_imm](var r0: xmm; imm: i32)
|
|
psrad[sse2](var r0: xmm; r1: xmm)
|
|
psrad[from_mem](var r0: xmm; r1: ptr128)
|
|
psrad[sse2_imm](var r0: xmm; imm: i32)
|
|
psraw[sse2](var r0: xmm; r1: xmm)
|
|
psraw[from_mem](var r0: xmm; r1: ptr128)
|
|
psraw[sse2_imm](var r0: xmm; imm: i32)
|
|
psrlw[sse2](var r0: xmm; r1: xmm)
|
|
psrlw[from_mem](var r0: xmm; r1: ptr128)
|
|
psrlw[sse2_imm](var r0: xmm; imm: i32)
|
|
psrld[sse2](var r0: xmm; r1: xmm)
|
|
psrld[from_mem](var r0: xmm; r1: ptr128)
|
|
psrld[sse2_imm](var r0: xmm; imm: i32)
|
|
psrlq[sse2](var r0: xmm; r1: xmm)
|
|
psrlq[from_mem](var r0: xmm; r1: ptr128)
|
|
psrlq[sse2_imm](var r0: xmm; imm: i32)
|
|
psubb(var r0: xmm; r1: xmm)
|
|
psubb[from_mem](var r0: xmm; r1: ptr128)
|
|
psubw(var r0: xmm; r1: xmm)
|
|
psubw[from_mem](var r0: xmm; r1: ptr128)
|
|
psubd(var r0: xmm; r1: xmm)
|
|
psubd[from_mem](var r0: xmm; r1: ptr128)
|
|
psubq(var r0: xmm; r1: xmm)
|
|
psubq[from_mem](var r0: xmm; r1: ptr128)
|
|
psubsb(var r0: xmm; r1: xmm)
|
|
psubsb[from_mem](var r0: xmm; r1: ptr128)
|
|
psubsw(var r0: xmm; r1: xmm)
|
|
psubsw[from_mem](var r0: xmm; r1: ptr128)
|
|
pmaddwd(var r0: xmm; r1: xmm)
|
|
pmaddwd[from_mem](var r0: xmm; r1: ptr128)
|
|
psubusb(var r0: xmm; r1: xmm)
|
|
psubusb[from_mem](var r0: xmm; r1: ptr128)
|
|
psubusw(var r0: xmm; r1: xmm)
|
|
psubusw[from_mem](var r0: xmm; r1: ptr128)
|
|
punpckhbw(var r0: xmm; r1: xmm)
|
|
punpckhbw[from_mem](var r0: xmm; r1: ptr128)
|
|
punpckhwd(var r0: xmm; r1: xmm)
|
|
punpckhwd[from_mem](var r0: xmm; r1: ptr128)
|
|
punpckhdq(var r0: xmm; r1: xmm)
|
|
punpckhdq[from_mem](var r0: xmm; r1: ptr128)
|
|
punpcklbw(var r0: xmm; r1: xmm)
|
|
punpcklbw[from_mem](var r0: xmm; r1: ptr128)
|
|
punpcklwd(var r0: xmm; r1: xmm)
|
|
punpcklwd[from_mem](var r0: xmm; r1: ptr128)
|
|
punpckldq(var r0: xmm; r1: xmm)
|
|
punpckldq[from_mem](var r0: xmm; r1: ptr128)
|
|
pavgb[sse2](var r0: xmm; r1: xmm)
|
|
pavgb[from_mem](var r0: xmm; r1: ptr128)
|
|
pavgw[sse2](var r0: xmm; r1: xmm)
|
|
pavgw[from_mem](var r0: xmm; r1: ptr128)
|
|
pminub[sse2](var r0: xmm; r1: xmm)
|
|
pminub[from_mem](var r0: xmm; r1: ptr128)
|
|
pminsw[sse2](var r0: xmm; r1: xmm)
|
|
pminsw[from_mem](var r0: xmm; r1: ptr128)
|
|
pmaxsw[sse2](var r0: xmm; r1: xmm)
|
|
pmaxsw[from_mem](var r0: xmm; r1: ptr128)
|
|
pmaxub[sse2](var r0: xmm; r1: xmm)
|
|
pmaxub[from_mem](var r0: xmm; r1: ptr128)
|
|
psadbw[sse2](var r0: xmm; r1: xmm)
|
|
psadbw[from_mem](var r0: xmm; r1: ptr128)
|
|
|
|
; SSE2 integer instructions
|
|
maskmovdqu(addr: edi_ptr; r0: xmm; r1: xmm)
|
|
movdq2q(out r0: mm; r1: xmm)
|
|
movdqa[from_mem](out r0: xmm; r1: ptr128)
|
|
movdqa(r0: ptr128; r1: xmm)
|
|
movdqu[from_mem](out r0: xmm; r1: ptr128)
|
|
movdqu(r0: ptr128; r1: xmm)
|
|
movq2dq(out r0: xmm; r1: mm)
|
|
movntdq(r0: ptr128; r1: xmm)
|
|
pshufhw(out r0: xmm; r1: xmm; imm: i32)
|
|
pshuflw(out r0: xmm; r1: xmm; imm: i32)
|
|
pshufd(out r0: xmm; r1: xmm; imm: i32)
|
|
pshufhw[from_mem](out r0: xmm; r1: ptr128; imm: i32)
|
|
pshuflw[from_mem](out r0: xmm; r1: ptr128; imm: i32)
|
|
pshufd[from_mem](out r0: xmm; r1: ptr128; imm: i32)
|
|
pslldq(var r0: xmm; imm: i32)
|
|
psrldq(var r0: xmm; imm: i32)
|
|
punpckhqdq(var r0: xmm; r1: xmm)
|
|
punpckhqdq[from_mem](var r0: xmm; r1: ptr128)
|
|
punpcklqdq(var r0: xmm; r1: xmm)
|
|
punpcklqdq[from_mem](var r0: xmm; r1: ptr128)
|
|
|
|
; SSE3 SIMD floating-point instructions
|
|
addsubps(var r0: xmm; r1: xmm)
|
|
addsubps[from_mem](var r0: xmm; r1: ptr128)
|
|
addsubpd(var r0: xmm; r1: xmm)
|
|
addsubpd[from_mem](var r0: xmm; r1: ptr128)
|
|
movddup(out r0: xmm; r1: xmm)
|
|
movddup[from_mem](out r0: xmm; r1: ptr64)
|
|
movsldup(out r0: xmm; r1: xmm)
|
|
movsldup[from_mem](out r0: xmm; r1: ptr128)
|
|
movshdup(out r0: xmm; r1: xmm)
|
|
movshdup[from_mem](out r0: xmm; r1: ptr128)
|
|
haddps(var r0: xmm; r1: xmm)
|
|
haddps[from_mem](var r0: xmm; r1: ptr128)
|
|
haddpd(var r0: xmm; r1: xmm)
|
|
haddpd[from_mem](var r0: xmm; r1: ptr128)
|
|
hsubps(var r0: xmm; r1: xmm)
|
|
hsubps[from_mem](var r0: xmm; r1: ptr128)
|
|
hsubpd(var r0: xmm; r1: xmm)
|
|
hsubpd[from_mem](var r0: xmm; r1: ptr128)
|
|
|
|
; SSE3 SIMD integer instructions
|
|
lddqu(out r0: xmm; r1: ptr128)
|
|
|
|
; SSSE3
|
|
psignb(var r0: xmm; r1: xmm)
|
|
psignb[from_mem](var r0: xmm; r1: ptr128)
|
|
psignw(var r0: xmm; r1: xmm)
|
|
psignw[from_mem](var r0: xmm; r1: ptr128)
|
|
psignd(var r0: xmm; r1: xmm)
|
|
psignd[from_mem](var r0: xmm; r1: ptr128)
|
|
pshufb(var r0: xmm; r1: xmm)
|
|
pshufb[from_mem](var r0: xmm; r1: ptr128)
|
|
pmulhrsw(var r0: xmm; r1: xmm)
|
|
pmulhrsw[from_mem](var r0: xmm; r1: ptr128)
|
|
pmaddubsw(var r0: xmm; r1: xmm)
|
|
pmaddubsw[from_mem](var r0: xmm; r1: ptr128)
|
|
phsubw(var r0: xmm; r1: xmm)
|
|
phsubw[from_mem](var r0: xmm; r1: ptr128)
|
|
phsubsw(var r0: xmm; r1: xmm)
|
|
phsubsw[from_mem](var r0: xmm; r1: ptr128)
|
|
phsubd(var r0: xmm; r1: xmm)
|
|
phsubd[from_mem](var r0: xmm; r1: ptr128)
|
|
phaddsw(var r0: xmm; r1: xmm)
|
|
phaddsw[from_mem](var r0: xmm; r1: ptr128)
|
|
phaddw(var r0: xmm; r1: xmm)
|
|
phaddw[from_mem](var r0: xmm; r1: ptr128)
|
|
phaddd(var r0: xmm; r1: xmm)
|
|
phaddd[from_mem](var r0: xmm; r1: ptr128)
|
|
palignr(var r0: xmm; r1: xmm; imm: i32)
|
|
palignr[from_mem](var r0: xmm; r1: ptr128; imm: i32)
|
|
pabsb(out r0: xmm; r1: xmm)
|
|
pabsb[from_mem](out r0: xmm; r1: ptr128)
|
|
pabsw(out r0: xmm; r1: xmm)
|
|
pabsw[from_mem](out r0: xmm; r1: ptr128)
|
|
pabsd(out r0: xmm; r1: xmm)
|
|
pabsd[from_mem](out r0: xmm; r1: ptr128)
|
|
|
|
; SSE4.1 SIMD floating-point instructions
|
|
dpps(var r0: xmm; r1: xmm; imm: i32)
|
|
dpps[from_mem](var r0: xmm; r1: ptr128; imm: i32)
|
|
dppd(var r0: xmm; r1: xmm; imm: i32)
|
|
dppd[from_mem](var r0: xmm; r1: ptr128; imm: i32)
|
|
blendps(var r0: xmm; r1: xmm; imm: i32)
|
|
blendps[from_mem](var r0: xmm; r1: ptr128; imm: i32)
|
|
blendvps(var r0: xmm; r1: xmm; mask: implicit_xmm0)
|
|
blendvps[from_mem](var r0: xmm; r1: ptr128; mask: implicit_xmm0)
|
|
blendpd(var r0: xmm; r1: xmm; imm: i32)
|
|
blendpd[from_mem](var r0: xmm; r1: ptr128; imm: i32)
|
|
blendvpd(var r0: xmm; r1: xmm; mask: implicit_xmm0)
|
|
blendvpd[from_mem](var r0: xmm; r1: ptr128; mask: implicit_xmm0)
|
|
roundps(out r0: xmm; r1: xmm; imm: i32)
|
|
roundps[from_mem](out r0: xmm; r1: ptr128; imm: i32)
|
|
roundss(out r0: xmm; r1: xmm; imm: i32)
|
|
roundss[from_mem](out r0: xmm; r1: ptr32; imm: i32)
|
|
roundpd(out r0: xmm; r1: xmm; imm: i32)
|
|
roundpd[from_mem](out r0: xmm; r1: ptr128; imm: i32)
|
|
roundsd(out r0: f64; r1: f64; imm: i32)
|
|
roundsd[from_mem](out r0: f64; r1: ptr64; imm: i32)
|
|
insertps(var r0: xmm; r1: xmm; imm: i32)
|
|
insertps[from_mem](var r0: xmm; r1: ptr32; imm: i32)
|
|
extractps(out r0: r32; r1: xmm; imm: i32)
|
|
extractps[from_mem](r0: ptr32; r1: xmm; imm: i32)
|
|
|
|
; SSE4.1 SIMD integer instructions
|
|
mpsadbw(var r0: xmm; r1: xmm; imm: i32)
|
|
mpsadbw[from_mem](var r0: xmm; r1: ptr128; imm: i32)
|
|
phminposuw(var r0: xmm; r1: xmm)
|
|
phminposuw[from_mem](var r0: xmm; r1: ptr128)
|
|
pmulld(var r0: xmm; r1: xmm)
|
|
pmulld[from_mem](var r0: xmm; r1: ptr128)
|
|
pmuldq(var r0: xmm; r1: xmm)
|
|
pmuldq[from_mem](var r0: xmm; r1: ptr128)
|
|
pblendvb(var r0: xmm; r1: xmm; mask: implicit_xmm0)
|
|
pblendvb[from_mem](var r0: xmm; r1: ptr128; mask: implicit_xmm0)
|
|
pblendw(var r0: xmm; r1: xmm; imm: i32)
|
|
pblendw[from_mem](var r0: xmm; r1: ptr128; imm: i32)
|
|
pminsb(var r0: xmm; r1: xmm)
|
|
pminsb[from_mem](var r0: xmm; r1: ptr128)
|
|
pminuw(var r0: xmm; r1: xmm)
|
|
pminuw[from_mem](var r0: xmm; r1: ptr128)
|
|
pminsd(var r0: xmm; r1: xmm)
|
|
pminsd[from_mem](var r0: xmm; r1: ptr128)
|
|
pminud(var r0: xmm; r1: xmm)
|
|
pminud[from_mem](var r0: xmm; r1: ptr128)
|
|
pmaxsb(var r0: xmm; r1: xmm)
|
|
pmaxsb[from_mem](var r0: xmm; r1: ptr128)
|
|
pmaxuw(var r0: xmm; r1: xmm)
|
|
pmaxuw[from_mem](var r0: xmm; r1: ptr128)
|
|
pmaxsd(var r0: xmm; r1: xmm)
|
|
pmaxsd[from_mem](var r0: xmm; r1: ptr128)
|
|
pmaxud(var r0: xmm; r1: xmm)
|
|
pmaxud[from_mem](var r0: xmm; r1: ptr128)
|
|
pinsrb(var r0: xmm; r1: r32; imm: i32)
|
|
pinsrb[from_mem](var r0: xmm; r1: ptr8; imm: i32)
|
|
pinsrd(var r0: xmm; r1: r32; imm: i32)
|
|
pinsrd[from_mem](var r0: xmm; r1: ptr32; imm: i32)
|
|
pinsrq(var r0: xmm; r1: reg; imm: i32) |X86_64
|
|
pinsrq[from_mem](var r0: xmm; r1: ptr64; imm: i32) |X86_64
|
|
pextrb(out r0: r8; r1: xmm; imm: i32)
|
|
pextrb[to_mem](r0: ptr8; r1: xmm; imm: i32)
|
|
pextrw[sse41_to_mem](r0: ptr16; r1: xmm; imm: i32)
|
|
pextrd(out r0: r32; r1: xmm; imm: i32)
|
|
pextrd[to_mem](r0: ptr32; r1: xmm; imm: i32)
|
|
pextrq(out r0: r32; r1: xmm; imm: i32) |X86_64
|
|
pextrq[to_mem](r0: ptr64; r1: xmm; imm: i32) |X86_64
|
|
pmovsxbw(out r0: xmm; r1: xmm)
|
|
pmovsxbw[from_mem](out r0: xmm; r1: ptr64)
|
|
pmovzxbw(out r0: xmm; r1: xmm)
|
|
pmovzxbw[from_mem](out r0: xmm; r1: ptr64)
|
|
pmovsxbd(out r0: xmm; r1: xmm)
|
|
pmovsxbd[from_mem](out r0: xmm; r1: ptr32)
|
|
pmovzxbd(out r0: xmm; r1: xmm)
|
|
pmovzxbd[from_mem](out r0: xmm; r1: ptr32)
|
|
pmovsxbq(out r0: xmm; r1: xmm)
|
|
pmovsxbq[from_mem](out r0: xmm; r1: ptr16)
|
|
pmovzxbq(out r0: xmm; r1: xmm)
|
|
pmovzxbq[from_mem](out r0: xmm; r1: ptr16)
|
|
pmovsxwd(out r0: xmm; r1: xmm)
|
|
pmovsxwd[from_mem](out r0: xmm; r1: ptr64)
|
|
pmovzxwd(out r0: xmm; r1: xmm)
|
|
pmovzxwd[from_mem](out r0: xmm; r1: ptr64)
|
|
pmovsxwq(out r0: xmm; r1: xmm)
|
|
pmovsxwq[from_mem](out r0: xmm; r1: ptr32)
|
|
pmovzxwq(out r0: xmm; r1: xmm)
|
|
pmovzxwq[from_mem](out r0: xmm; r1: ptr32)
|
|
pmovsxdq(out r0: xmm; r1: xmm)
|
|
pmovsxdq[from_mem](out r0: xmm; r1: ptr64)
|
|
pmovzxdq(out r0: xmm; r1: xmm)
|
|
pmovzxdq[from_mem](out r0: xmm; r1: ptr64)
|
|
ptest(var r0: xmm; r1: xmm)
|
|
ptest[from_mem](var r0: xmm; r1: ptr128)
|
|
pcmpeqq(var r0: xmm; r1: xmm)
|
|
pcmpeqq[from_mem](var r0: xmm; r1: ptr128)
|
|
packusdw(var r0: xmm; r1: xmm)
|
|
packusdw[from_mem](var r0: xmm; r1: ptr128)
|
|
movntdqa(out r0: xmm; r1: ptr128)
|
|
|
|
; SSE4.2
|
|
pcmpestri(var r0: xmm; r1: xmm; imm: i32)
|
|
pcmpestri[from_mem](var r0: xmm; r1: ptr128; imm: i32)
|
|
pcmpestrm(var r0: xmm; r1: xmm; imm: i32)
|
|
pcmpestrm[from_mem](var r0: xmm; r1: ptr128; imm: i32)
|
|
pcmpistri(var r0: xmm; r1: xmm; imm: i32)
|
|
pcmpistri[from_mem](var r0: xmm; r1: ptr128; imm: i32)
|
|
pcmpistrm(var r0: xmm; r1: xmm; imm: i32)
|
|
pcmpistrm[from_mem](var r0: xmm; r1: ptr128; imm: i32)
|
|
pcmpgtq(var r0: xmm; r1: xmm)
|
|
pcmpgtq[from_mem](var r0: xmm; r1: ptr128)
|