mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-06 14:47:55 +02:00
+ implementation of the vectorcall calling convention by J. Gareth Moreton
+ tests git-svn-id: trunk@38206 -
This commit is contained in:
parent
c63c3e99c7
commit
31f78ea2b6
3
.gitattributes
vendored
3
.gitattributes
vendored
@ -11999,6 +11999,9 @@ tests/test/cg/ttryfin4.pp svneol=native#text/plain
|
||||
tests/test/cg/ttryfin5.pp svneol=native#text/plain
|
||||
tests/test/cg/tumin.pp svneol=native#text/plain
|
||||
tests/test/cg/tvec.pp svneol=native#text/plain
|
||||
tests/test/cg/tvectorcall1.pp svneol=native#text/pascal
|
||||
tests/test/cg/tvectorcall2.pp svneol=native#text/pascal
|
||||
tests/test/cg/tvectorcall3.pp svneol=native#text/pascal
|
||||
tests/test/cg/uandorxorassign.pp svneol=native#text/plain
|
||||
tests/test/cg/unegnotassign.pp svneol=native#text/plain
|
||||
tests/test/cg/uprintf3.pp svneol=native#text/plain
|
||||
|
@ -164,14 +164,18 @@ interface
|
||||
{ OS_NO is also used memory references with large data that can
|
||||
not be loaded in a register directly }
|
||||
TCgSize = (OS_NO,
|
||||
{ integer registers }
|
||||
OS_8,OS_16,OS_32,OS_64,OS_128,OS_S8,OS_S16,OS_S32,OS_S64,OS_S128,
|
||||
{ single,double,extended,comp,float128 }
|
||||
OS_F32,OS_F64,OS_F80,OS_C64,OS_F128,
|
||||
OS_8, OS_16, OS_32, OS_64, OS_128,
|
||||
OS_S8, OS_S16, OS_S32, OS_S64, OS_S128,
|
||||
{ single, double, extended, comp, float128 }
|
||||
OS_F32, OS_F64, OS_F80, OS_C64, OS_F128,
|
||||
{ multi-media sizes: split in byte, word, dword, ... }
|
||||
{ entities, then the signed counterparts }
|
||||
OS_M8,OS_M16,OS_M32,OS_M64,OS_M128,OS_M256,
|
||||
OS_MS8,OS_MS16,OS_MS32,OS_MS64,OS_MS128,OS_MS256 );
|
||||
OS_M8, OS_M16, OS_M32, OS_M64, OS_M128, OS_M256, OS_M512,
|
||||
OS_MS8, OS_MS16, OS_MS32, OS_MS64, OS_MS128, OS_MS256, OS_MS512,
|
||||
{ multi-media sizes: single-precision floating-point }
|
||||
OS_MF32, OS_MF128, OS_MF256, OS_MF512,
|
||||
{ multi-media sizes: double-precision floating-point }
|
||||
OS_MD64, OS_MD128, OS_MD256, OS_MD512);
|
||||
|
||||
{ Register types }
|
||||
TRegisterType = (
|
||||
@ -205,15 +209,16 @@ interface
|
||||
{ For Intel X86 AVX-Register }
|
||||
R_SUBMMX, { = 12; 128 BITS }
|
||||
R_SUBMMY, { = 13; 256 BITS }
|
||||
R_SUBMMZ, { = 14; 512 BITS }
|
||||
{ Subregisters for the flags register (x86) }
|
||||
R_SUBFLAGCARRY, { = 14; Carry flag }
|
||||
R_SUBFLAGPARITY, { = 15; Parity flag }
|
||||
R_SUBFLAGAUXILIARY, { = 16; Auxiliary flag }
|
||||
R_SUBFLAGZERO, { = 17; Zero flag }
|
||||
R_SUBFLAGSIGN, { = 18; Sign flag }
|
||||
R_SUBFLAGOVERFLOW, { = 19; Overflow flag }
|
||||
R_SUBFLAGINTERRUPT, { = 20; Interrupt enable flag }
|
||||
R_SUBFLAGDIRECTION { = 21; Direction flag }
|
||||
R_SUBFLAGCARRY, { = 15; Carry flag }
|
||||
R_SUBFLAGPARITY, { = 16; Parity flag }
|
||||
R_SUBFLAGAUXILIARY, { = 17; Auxiliary flag }
|
||||
R_SUBFLAGZERO, { = 18; Zero flag }
|
||||
R_SUBFLAGSIGN, { = 19; Sign flag }
|
||||
R_SUBFLAGOVERFLOW, { = 20; Overflow flag }
|
||||
R_SUBFLAGINTERRUPT, { = 21; Interrupt enable flag }
|
||||
R_SUBFLAGDIRECTION { = 22; Direction flag }
|
||||
);
|
||||
TSubRegisterSet = set of TSubRegister;
|
||||
|
||||
@ -307,12 +312,19 @@ interface
|
||||
NR_INVALID = tregister($fffffffff);
|
||||
|
||||
tcgsize2size : Array[tcgsize] of integer =
|
||||
(0,
|
||||
{ integer values }
|
||||
(0,1,2,4,8,16,1,2,4,8,16,
|
||||
1, 2, 4, 8, 16,
|
||||
1, 2, 4, 8, 16,
|
||||
{ floating point values }
|
||||
4,8,10,8,16,
|
||||
4, 8, 10, 8, 16,
|
||||
{ multimedia values }
|
||||
1,2,4,8,16,32,1,2,4,8,16,32);
|
||||
1, 2, 4, 8, 16, 32, 64,
|
||||
1, 2, 4, 8, 16, 32, 64,
|
||||
{ single-precision multimedia values }
|
||||
4, 16, 32, 64,
|
||||
{ double-precision multimedia values }
|
||||
8, 16, 32, 64);
|
||||
|
||||
tfloat2tcgsize: array[tfloattype] of tcgsize =
|
||||
(OS_F32,OS_F64,OS_F80,OS_F80,OS_C64,OS_C64,OS_F128);
|
||||
@ -348,16 +360,25 @@ interface
|
||||
{ Table to convert tcgsize variables to the correspondending
|
||||
unsigned types }
|
||||
tcgsize2unsigned : array[tcgsize] of tcgsize = (OS_NO,
|
||||
OS_8,OS_16,OS_32,OS_64,OS_128,OS_8,OS_16,OS_32,OS_64,OS_128,
|
||||
OS_F32,OS_F64,OS_F80,OS_C64,OS_F128,
|
||||
OS_M8,OS_M16,OS_M32,OS_M64,OS_M128,OS_M256,OS_M8,OS_M16,OS_M32,
|
||||
OS_M64,OS_M128,OS_M256);
|
||||
OS_8, OS_16, OS_32, OS_64, OS_128,
|
||||
OS_8, OS_16, OS_32, OS_64, OS_128,
|
||||
|
||||
OS_F32, OS_F64, OS_F80, OS_C64, OS_F128,
|
||||
OS_M8, OS_M16, OS_M32, OS_M64, OS_M128, OS_M256, OS_M512,
|
||||
OS_M8, OS_M16, OS_M32, OS_M64, OS_M128, OS_M256, OS_M512,
|
||||
OS_MF32, OS_MF128,OS_MF256,OS_MF512,
|
||||
OS_MD64, OS_MD128,OS_MD256,OS_MD512);
|
||||
|
||||
|
||||
tcgsize2signed : array[tcgsize] of tcgsize = (OS_NO,
|
||||
OS_S8,OS_S16,OS_S32,OS_S64,OS_S128,OS_S8,OS_S16,OS_S32,OS_S64,OS_S128,
|
||||
OS_F32,OS_F64,OS_F80,OS_C64,OS_F128,
|
||||
OS_M8,OS_M16,OS_M32,OS_M64,OS_M128,OS_M256,OS_M8,OS_M16,OS_M32,
|
||||
OS_M64,OS_M128,OS_M256);
|
||||
OS_S8, OS_S16, OS_S32, OS_S64, OS_S128,
|
||||
OS_S8, OS_S16, OS_S32, OS_S64, OS_S128,
|
||||
|
||||
OS_F32, OS_F64, OS_F80, OS_C64, OS_F128,
|
||||
OS_MS8, OS_MS16, OS_MS32, OS_MS64, OS_MS128,OS_MS256,OS_MS512,
|
||||
OS_MS8, OS_MS16, OS_MS32, OS_MS64, OS_MS128,OS_MS256,OS_MS512,
|
||||
OS_MF32, OS_MF128,OS_MF256,OS_MF512,
|
||||
OS_MD64, OS_MD128,OS_MD256,OS_MD512);
|
||||
|
||||
|
||||
tcgloc2str : array[TCGLoc] of string[12] = (
|
||||
@ -404,6 +425,8 @@ interface
|
||||
}
|
||||
function int_cgsize(const a: tcgint): tcgsize;{$ifdef USEINLINE}inline;{$endif}
|
||||
function int_float_cgsize(const a: tcgint): tcgsize;
|
||||
function float_array_cgsize(const a: tcgint): tcgsize;{$ifdef USEINLINE}inline;{$endif}
|
||||
function double_array_cgsize(const a: tcgint): tcgsize;{$ifdef USEINLINE}inline;{$endif}
|
||||
|
||||
function tcgsize2str(cgsize: tcgsize):string;
|
||||
|
||||
@ -660,6 +683,8 @@ implementation
|
||||
result:=result+'mx';
|
||||
R_SUBMMY:
|
||||
result:=result+'my';
|
||||
R_SUBMMZ:
|
||||
result:=result+'mz';
|
||||
else
|
||||
internalerror(200308252);
|
||||
end;
|
||||
@ -701,6 +726,39 @@ implementation
|
||||
end;
|
||||
|
||||
|
||||
function float_array_cgsize(const a: tcgint): tcgsize;{$ifdef USEINLINE}inline;{$endif}
|
||||
begin
|
||||
case a of
|
||||
4:
|
||||
result := OS_MF32;
|
||||
16:
|
||||
result := OS_MF128;
|
||||
32:
|
||||
result := OS_MF256;
|
||||
64:
|
||||
result := OS_MF512;
|
||||
else
|
||||
result := int_cgsize(a);
|
||||
end;
|
||||
end;
|
||||
|
||||
function double_array_cgsize(const a: tcgint): tcgsize;{$ifdef USEINLINE}inline;{$endif}
|
||||
begin
|
||||
case a of
|
||||
8:
|
||||
result := OS_MD64;
|
||||
16:
|
||||
result := OS_MD128;
|
||||
32:
|
||||
result := OS_MD256;
|
||||
64:
|
||||
result := OS_MD512;
|
||||
else
|
||||
result := int_cgsize(a);
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
function tcgsize2str(cgsize: tcgsize):string;
|
||||
begin
|
||||
Str(cgsize, Result);
|
||||
|
@ -1338,7 +1338,24 @@ implementation
|
||||
arraydef :
|
||||
begin
|
||||
if is_dynamic_array(def) or not is_special_array(def) then
|
||||
result := int_cgsize(def.size)
|
||||
begin
|
||||
if (cs_support_vectors in current_settings.globalswitches) and is_vector(def) and ((TArrayDef(def).elementdef.typ = floatdef) and not (cs_fp_emulation in current_settings.moduleswitches)) then
|
||||
begin
|
||||
{ Determine if, based on the floating-point type and the size
|
||||
of the array, if it can be made into a vector }
|
||||
case TFloatDef(def).floattype of
|
||||
s32real:
|
||||
result := float_array_cgsize(def.size);
|
||||
s64real:
|
||||
result := double_array_cgsize(def.size);
|
||||
else
|
||||
{ If not, fall back }
|
||||
result := int_cgsize(def.size);
|
||||
end;
|
||||
end
|
||||
else
|
||||
result := int_cgsize(def.size);
|
||||
end
|
||||
else
|
||||
result := OS_NO;
|
||||
end;
|
||||
@ -1379,25 +1396,53 @@ implementation
|
||||
case def.typ of
|
||||
arraydef:
|
||||
begin
|
||||
if tarraydef(def).elementdef.typ in [orddef,floatdef] then
|
||||
begin
|
||||
{ this is not correct, OS_MX normally mean that the vector
|
||||
contains elements of size X. However, vectors themselves
|
||||
can also have different sizes (e.g. a vector of 2 singles on
|
||||
SSE) and the total size is currently more important }
|
||||
case def.size of
|
||||
1: result:=OS_M8;
|
||||
2: result:=OS_M16;
|
||||
4: result:=OS_M32;
|
||||
8: result:=OS_M64;
|
||||
16: result:=OS_M128;
|
||||
32: result:=OS_M256;
|
||||
else
|
||||
internalerror(2013060103);
|
||||
case tarraydef(def).elementdef.typ of
|
||||
orddef:
|
||||
begin
|
||||
{ this is not correct, OS_MX normally mean that the vector
|
||||
contains elements of size X. However, vectors themselves
|
||||
can also have different sizes (e.g. a vector of 2 singles on
|
||||
SSE) and the total size is currently more important }
|
||||
case def.size of
|
||||
1: result:=OS_M8;
|
||||
2: result:=OS_M16;
|
||||
4: result:=OS_M32;
|
||||
8: result:=OS_M64;
|
||||
16: result:=OS_M128;
|
||||
32: result:=OS_M256;
|
||||
64: result:=OS_M512;
|
||||
else
|
||||
internalerror(2013060103);
|
||||
end;
|
||||
end;
|
||||
end
|
||||
else
|
||||
result:=def_cgsize(def);
|
||||
floatdef:
|
||||
begin
|
||||
case TFloatDef(tarraydef(def).elementdef).floattype of
|
||||
s32real:
|
||||
case def.size of
|
||||
4: result:=OS_MF32;
|
||||
16: result:=OS_MF128;
|
||||
32: result:=OS_MF256;
|
||||
64: result:=OS_MF512;
|
||||
else
|
||||
internalerror(2017121400);
|
||||
end;
|
||||
s64real:
|
||||
case def.size of
|
||||
8: result:=OS_MD64;
|
||||
16: result:=OS_MD128;
|
||||
32: result:=OS_MD256;
|
||||
64: result:=OS_MD512;
|
||||
else
|
||||
internalerror(2017121401);
|
||||
end;
|
||||
else
|
||||
internalerror(2017121402);
|
||||
end;
|
||||
end;
|
||||
else
|
||||
result:=def_cgsize(def);
|
||||
end;
|
||||
end
|
||||
else
|
||||
result:=def_cgsize(def);
|
||||
|
@ -1112,7 +1112,8 @@ implementation
|
||||
'SYSV_ABI_DEFAULT',
|
||||
'SYSV_ABI_CDECL',
|
||||
'MS_ABI_DEFAULT',
|
||||
'MS_ABI_CDECL'
|
||||
'MS_ABI_CDECL',
|
||||
'VECTORCALL'
|
||||
);
|
||||
var
|
||||
t : tproccalloption;
|
||||
|
@ -539,7 +539,9 @@ interface
|
||||
pocall_sysv_abi_cdecl,
|
||||
{ for x86-64: forces Microsoft ABI (Pascal resp. C) }
|
||||
pocall_ms_abi_default,
|
||||
pocall_ms_abi_cdecl
|
||||
pocall_ms_abi_cdecl,
|
||||
{ for x86-64: Microsoft's "vectorcall" ABI }
|
||||
pocall_vectorcall
|
||||
);
|
||||
tproccalloptions = set of tproccalloption;
|
||||
|
||||
@ -560,9 +562,10 @@ interface
|
||||
'Interrupt',
|
||||
'HardFloat',
|
||||
'SysV_ABI_Default',
|
||||
'MS_ABI_CDecl',
|
||||
'MS_ABI_CDecl', { TODO: Is this correct? Shouldn't it be SysV_ABI_Default }
|
||||
'MS_ABI_Default',
|
||||
'MS_ABI_CDecl'
|
||||
'MS_ABI_CDecl',
|
||||
'VectorCall'
|
||||
);
|
||||
|
||||
{ Default calling convention }
|
||||
|
@ -1538,6 +1538,8 @@ implementation
|
||||
result:=OS_F32;
|
||||
OS_64:
|
||||
result:=OS_F64;
|
||||
OS_128:
|
||||
result:=OS_M128;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
@ -35,7 +35,8 @@
|
||||
S_NEAR,S_FAR,S_SHORT,
|
||||
S_T,
|
||||
S_XMM,
|
||||
S_YMM
|
||||
S_YMM,
|
||||
S_ZMM
|
||||
);
|
||||
|
||||
TOpSizes = set of topsize;
|
||||
|
@ -35,7 +35,8 @@
|
||||
S_NEAR,S_FAR,S_SHORT,
|
||||
S_T,
|
||||
S_XMM,
|
||||
S_YMM
|
||||
S_YMM,
|
||||
S_ZMM
|
||||
);
|
||||
|
||||
TOpSizes = set of topsize;
|
||||
|
@ -682,6 +682,7 @@ implementation
|
||||
|
||||
procedure tcgassignmentnode.pass_generate_code;
|
||||
var
|
||||
shuffle : pmmshuffle;
|
||||
hlabel : tasmlabel;
|
||||
href : treference;
|
||||
releaseright : boolean;
|
||||
@ -968,22 +969,21 @@ implementation
|
||||
LOC_MMREGISTER,
|
||||
LOC_CMMREGISTER:
|
||||
begin
|
||||
if left.resultdef.typ=arraydef then
|
||||
begin
|
||||
end
|
||||
if (is_vector(left.resultdef)) then
|
||||
shuffle := nil
|
||||
else
|
||||
begin
|
||||
case left.location.loc of
|
||||
LOC_CMMREGISTER,
|
||||
LOC_MMREGISTER:
|
||||
hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.resultdef,left.resultdef,right.location.register,left.location.register,mms_movescalar);
|
||||
LOC_REFERENCE,
|
||||
LOC_CREFERENCE:
|
||||
hlcg.a_loadmm_reg_ref(current_asmdata.CurrAsmList,right.resultdef,left.resultdef,right.location.register,left.location.reference,mms_movescalar);
|
||||
else
|
||||
internalerror(2009112601);
|
||||
end;
|
||||
end;
|
||||
shuffle := mms_movescalar;
|
||||
|
||||
case left.location.loc of
|
||||
LOC_CMMREGISTER,
|
||||
LOC_MMREGISTER:
|
||||
hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.resultdef,left.resultdef,right.location.register,left.location.register, shuffle);
|
||||
LOC_REFERENCE,
|
||||
LOC_CREFERENCE:
|
||||
hlcg.a_loadmm_reg_ref(current_asmdata.CurrAsmList,right.resultdef,left.resultdef,right.location.register,left.location.reference, shuffle);
|
||||
else
|
||||
internalerror(2009112601);
|
||||
end;
|
||||
end;
|
||||
LOC_REGISTER,
|
||||
LOC_CREGISTER :
|
||||
|
@ -335,7 +335,8 @@ implementation
|
||||
{ pocall_sysv_abi_default } 14,
|
||||
{ pocall_sysv_abi_cdecl } 15,
|
||||
{ pocall_ms_abi_default } 16,
|
||||
{ pocall_ms_abi_cdecl } 17
|
||||
{ pocall_ms_abi_cdecl } 17,
|
||||
{ pocall_vectorcall } 18
|
||||
);
|
||||
begin
|
||||
tcb.emit_ord_const(ProcCallOptionToCallConv[def.proccalloption],u8inttype);
|
||||
|
@ -2382,7 +2382,7 @@ type
|
||||
end;
|
||||
const
|
||||
{Should contain the number of procedure directives we support.}
|
||||
num_proc_directives=50;
|
||||
num_proc_directives=51;
|
||||
proc_direcdata:array[1..num_proc_directives] of proc_dir_rec=
|
||||
(
|
||||
(
|
||||
@ -2849,6 +2849,15 @@ const
|
||||
mutexclpocall : [];
|
||||
mutexclpotype : [potype_constructor,potype_destructor,potype_class_constructor,potype_class_destructor];
|
||||
mutexclpo : [po_interrupt]
|
||||
),(
|
||||
idtok:_VECTORCALL;
|
||||
pd_flags : [pd_interface,pd_implemen,pd_body,pd_procvar];
|
||||
handler : nil;
|
||||
pocall : pocall_vectorcall;
|
||||
pooption : [];
|
||||
mutexclpocall : [];
|
||||
mutexclpotype : [potype_constructor,potype_destructor,potype_class_constructor,potype_class_destructor];
|
||||
mutexclpo : [po_interrupt]
|
||||
)
|
||||
);
|
||||
|
||||
|
@ -82,6 +82,7 @@ interface
|
||||
function alignment:shortint;virtual;abstract;
|
||||
{ alignment when this type appears in a record/class/... }
|
||||
function structalignment:shortint;virtual;
|
||||
function aggregatealignment:shortint;virtual;
|
||||
function getvardef:longint;virtual;abstract;
|
||||
function getparentdef:tdef;virtual;
|
||||
function getsymtable(t:tgetsymtable):TSymtable;virtual;
|
||||
@ -379,6 +380,14 @@ implementation
|
||||
result:=alignment;
|
||||
end;
|
||||
|
||||
function tdef.aggregatealignment: shortint;
|
||||
begin
|
||||
if Assigned(Owner) and Assigned(Owner.defowner) and (Owner.defowner is TDef) and (Owner.defowner <> Self) then
|
||||
Result := max(structalignment, TDef(Owner.defowner).aggregatealignment)
|
||||
else
|
||||
Result := structalignment;
|
||||
end;
|
||||
|
||||
|
||||
procedure tdef.ChangeOwner(st:TSymtable);
|
||||
begin
|
||||
|
@ -289,6 +289,7 @@ type
|
||||
_OPENSTRING,
|
||||
_RIGHTSHIFT,
|
||||
_SPECIALIZE,
|
||||
_VECTORCALL,
|
||||
_CONSTRUCTOR,
|
||||
_GREATERTHAN,
|
||||
_INTERNCONST,
|
||||
@ -628,6 +629,7 @@ const
|
||||
(str:'OPENSTRING' ;special:false;keyword:[m_none];op:NOTOKEN),
|
||||
(str:'RIGHTSHIFT' ;special:false;keyword:[m_none];op:NOTOKEN), { delphi operator name }
|
||||
(str:'SPECIALIZE' ;special:false;keyword:[m_none];op:NOTOKEN),
|
||||
(str:'VECTORCALL' ;special:false;keyword:[m_none];op:NOTOKEN),
|
||||
(str:'CONSTRUCTOR' ;special:false;keyword:alllanguagemodes-[m_iso,m_extpas];op:NOTOKEN),
|
||||
(str:'GREATERTHAN' ;special:false;keyword:[m_none];op:NOTOKEN), { delphi operator name }
|
||||
(str:'INTERNCONST' ;special:false;keyword:[m_none];op:NOTOKEN),
|
||||
|
@ -52,6 +52,7 @@ interface
|
||||
OT_BITS64 = $00000008; { x86_64 and FPU }
|
||||
OT_BITS128 = $10000000; { 16 byte SSE }
|
||||
OT_BITS256 = $20000000; { 32 byte AVX }
|
||||
OT_BITS512 = $40000000; { 64 byte AVX512 }
|
||||
OT_BITS80 = $00000010; { FPU only }
|
||||
OT_FAR = $00000020; { this means 16:16 or 16:32, like in CALL/JMP }
|
||||
OT_NEAR = $00000040;
|
||||
@ -612,7 +613,8 @@ implementation
|
||||
OT_NEAR,OT_FAR,OT_SHORT,
|
||||
OT_NONE,
|
||||
OT_BITS128,
|
||||
OT_BITS256
|
||||
OT_BITS256,
|
||||
OT_BITS512
|
||||
),
|
||||
(OT_NONE,
|
||||
OT_BITS8,OT_BITS16,OT_BITS32,OT_BITS64,OT_BITS8,OT_BITS8,OT_BITS16,OT_BITS8,OT_BITS16,OT_BITS32,
|
||||
@ -622,7 +624,8 @@ implementation
|
||||
OT_NEAR,OT_FAR,OT_SHORT,
|
||||
OT_NONE,
|
||||
OT_BITS128,
|
||||
OT_BITS256
|
||||
OT_BITS256,
|
||||
OT_BITS512
|
||||
),
|
||||
(OT_NONE,
|
||||
OT_BITS8,OT_BITS16,OT_BITS32,OT_BITS64,OT_NONE,OT_NONE,OT_NONE,OT_NONE,OT_NONE,OT_NONE,
|
||||
@ -632,7 +635,8 @@ implementation
|
||||
OT_NEAR,OT_FAR,OT_SHORT,
|
||||
OT_NONE,
|
||||
OT_BITS128,
|
||||
OT_BITS256
|
||||
OT_BITS256,
|
||||
OT_BITS512
|
||||
)
|
||||
);
|
||||
|
||||
@ -650,7 +654,8 @@ implementation
|
||||
OT_NEAR,OT_FAR,OT_SHORT,
|
||||
OT_NONE,
|
||||
OT_BITS128,
|
||||
OT_BITS256
|
||||
OT_BITS256,
|
||||
OT_BITS512
|
||||
),
|
||||
(OT_NONE,
|
||||
OT_BITS8,OT_BITS16,OT_BITS32,OT_BITS64,OT_BITS8,OT_BITS8,OT_BITS16,
|
||||
@ -660,7 +665,8 @@ implementation
|
||||
OT_NEAR,OT_FAR,OT_SHORT,
|
||||
OT_NONE,
|
||||
OT_BITS128,
|
||||
OT_BITS256
|
||||
OT_BITS256,
|
||||
OT_BITS512
|
||||
),
|
||||
(OT_NONE,
|
||||
OT_BITS8,OT_BITS16,OT_BITS32,OT_BITS64,OT_NONE,OT_NONE,OT_NONE,
|
||||
@ -670,7 +676,8 @@ implementation
|
||||
OT_NEAR,OT_FAR,OT_SHORT,
|
||||
OT_NONE,
|
||||
OT_BITS128,
|
||||
OT_BITS256
|
||||
OT_BITS256,
|
||||
OT_BITS512
|
||||
)
|
||||
);
|
||||
|
||||
@ -688,7 +695,8 @@ implementation
|
||||
OT_NEAR,OT_FAR,OT_SHORT,
|
||||
OT_NONE,
|
||||
OT_BITS128,
|
||||
OT_BITS256
|
||||
OT_BITS256,
|
||||
OT_BITS512
|
||||
),
|
||||
(OT_NONE,
|
||||
OT_BITS8,OT_BITS16,OT_BITS32,OT_BITS64,OT_BITS8,OT_BITS8,OT_BITS16,
|
||||
@ -698,7 +706,8 @@ implementation
|
||||
OT_NEAR,OT_FAR,OT_SHORT,
|
||||
OT_NONE,
|
||||
OT_BITS128,
|
||||
OT_BITS256
|
||||
OT_BITS256,
|
||||
OT_BITS512
|
||||
),
|
||||
(OT_NONE,
|
||||
OT_BITS8,OT_BITS16,OT_BITS32,OT_BITS64,OT_NONE,OT_NONE,OT_NONE,
|
||||
@ -708,7 +717,8 @@ implementation
|
||||
OT_NEAR,OT_FAR,OT_SHORT,
|
||||
OT_NONE,
|
||||
OT_BITS128,
|
||||
OT_BITS256
|
||||
OT_BITS256,
|
||||
OT_BITS512
|
||||
)
|
||||
);
|
||||
|
||||
|
@ -158,20 +158,26 @@ unit cgx86;
|
||||
TCGSize2OpSize: Array[tcgsize] of topsize =
|
||||
(S_NO,S_B,S_W,S_L,S_Q,S_XMM,S_B,S_W,S_L,S_Q,S_XMM,
|
||||
S_FS,S_FL,S_FX,S_IQ,S_FXX,
|
||||
S_NO,S_NO,S_NO,S_MD,S_XMM,S_YMM,
|
||||
S_NO,S_NO,S_NO,S_NO,S_XMM,S_YMM);
|
||||
S_NO,S_NO,S_NO,S_MD,S_XMM,S_YMM,S_ZMM,
|
||||
S_NO,S_NO,S_NO,S_NO,S_XMM,S_YMM,S_ZMM,
|
||||
S_NO,S_XMM,S_YMM,S_ZMM,
|
||||
S_NO,S_XMM,S_YMM,S_ZMM);
|
||||
{$elseif defined(i386)}
|
||||
TCGSize2OpSize: Array[tcgsize] of topsize =
|
||||
(S_NO,S_B,S_W,S_L,S_L,S_T,S_B,S_W,S_L,S_L,S_L,
|
||||
S_FS,S_FL,S_FX,S_IQ,S_FXX,
|
||||
S_NO,S_NO,S_NO,S_MD,S_XMM,S_YMM,
|
||||
S_NO,S_NO,S_NO,S_NO,S_XMM,S_YMM);
|
||||
S_NO,S_NO,S_NO,S_MD,S_XMM,S_YMM,S_ZMM,
|
||||
S_NO,S_NO,S_NO,S_NO,S_XMM,S_YMM,S_ZMM,
|
||||
S_NO,S_XMM,S_YMM,S_ZMM,
|
||||
S_NO,S_XMM,S_YMM,S_ZMM);
|
||||
{$elseif defined(i8086)}
|
||||
TCGSize2OpSize: Array[tcgsize] of topsize =
|
||||
(S_NO,S_B,S_W,S_W,S_W,S_T,S_B,S_W,S_W,S_W,S_W,
|
||||
S_FS,S_FL,S_FX,S_IQ,S_FXX,
|
||||
S_NO,S_NO,S_NO,S_MD,S_XMM,S_YMM,
|
||||
S_NO,S_NO,S_NO,S_NO,S_XMM,S_YMM);
|
||||
S_NO,S_NO,S_NO,S_MD,S_XMM,S_YMM,S_ZMM,
|
||||
S_NO,S_NO,S_NO,S_NO,S_XMM,S_YMM,S_ZMM,
|
||||
S_NO,S_XMM,S_YMM,S_ZMM,
|
||||
S_NO,S_XMM,S_YMM,S_ZMM);
|
||||
{$endif}
|
||||
|
||||
{$ifndef NOTARGETWIN}
|
||||
@ -185,6 +191,9 @@ unit cgx86;
|
||||
{ returns true, if the compiler should use leave instead of mov/pop }
|
||||
function UseLeave: boolean;
|
||||
|
||||
{ Gets the byte alignment of a reference }
|
||||
function GetRefAlignment(ref: treference): Byte;
|
||||
|
||||
implementation
|
||||
|
||||
uses
|
||||
@ -225,6 +234,22 @@ unit cgx86;
|
||||
{$endif}
|
||||
end;
|
||||
|
||||
function GetRefAlignment(ref: treference): Byte; {$IFDEF USEINLINE}inline;{$ENDIF}
|
||||
begin
|
||||
{$ifdef x86_64}
|
||||
{ The stack pointer and base pointer will be aligned to 16-byte boundaries if the machine code is well-behaved }
|
||||
if (ref.base = NR_RSP) or (ref.base = NR_RBP) then
|
||||
begin
|
||||
if (ref.index = NR_NO) and ((ref.offset mod 16) = 0) then
|
||||
Result := 16
|
||||
else
|
||||
Result := ref.alignment;
|
||||
end
|
||||
else
|
||||
{$endif x86_64}
|
||||
Result := ref.alignment;
|
||||
end;
|
||||
|
||||
const
|
||||
TOpCG2AsmOp: Array[topcg] of TAsmOp = (A_NONE,A_MOV,A_ADD,A_AND,A_DIV,
|
||||
A_IDIV,A_IMUL,A_MUL,A_NEG,A_NOT,A_OR,
|
||||
@ -268,8 +293,19 @@ unit cgx86;
|
||||
result:=rg[R_MMREGISTER].getregister(list,R_SUBMMS);
|
||||
OS_M64:
|
||||
result:=rg[R_MMREGISTER].getregister(list,R_SUBQ);
|
||||
OS_M128:
|
||||
result:=rg[R_MMREGISTER].getregister(list,R_SUBMMWHOLE);
|
||||
OS_M128,
|
||||
OS_F128,
|
||||
OS_MF128,
|
||||
OS_MD128:
|
||||
result:=rg[R_MMREGISTER].getregister(list,R_SUBMMX); { R_SUBMMWHOLE seems a bit dangerous and ambiguous, so changed to R_SUBMMX. [Kit] }
|
||||
OS_M256,
|
||||
OS_MF256,
|
||||
OS_MD256:
|
||||
result:=rg[R_MMREGISTER].getregister(list,R_SUBMMY);
|
||||
OS_M512,
|
||||
OS_MF512,
|
||||
OS_MD512:
|
||||
result:=rg[R_MMREGISTER].getregister(list,R_SUBMMZ);
|
||||
else
|
||||
internalerror(200506041);
|
||||
end;
|
||||
@ -1260,13 +1296,13 @@ unit cgx86;
|
||||
(A_CVTSD2SS,A_MOVSD,A_NONE,A_NONE,A_NONE),
|
||||
(A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
|
||||
(A_NONE,A_NONE,A_NONE,A_MOVQ,A_NONE),
|
||||
(A_NONE,A_NONE,A_NONE,A_NONE,A_NONE));
|
||||
(A_NONE,A_NONE,A_NONE,A_NONE,A_MOVAPS));
|
||||
convertopavx : array[OS_F32..OS_F128,OS_F32..OS_F128] of tasmop = (
|
||||
(A_VMOVSS,A_VCVTSS2SD,A_NONE,A_NONE,A_NONE),
|
||||
(A_VCVTSD2SS,A_VMOVSD,A_NONE,A_NONE,A_NONE),
|
||||
(A_NONE,A_NONE,A_NONE,A_NONE,A_NONE),
|
||||
(A_NONE,A_NONE,A_NONE,A_MOVQ,A_NONE),
|
||||
(A_NONE,A_NONE,A_NONE,A_NONE,A_NONE));
|
||||
(A_NONE,A_NONE,A_NONE,A_NONE,A_VMOVAPS));
|
||||
begin
|
||||
{ we can have OS_F32/OS_F64 (record in function result/LOC_MMREGISTER) to
|
||||
OS_32/OS_64 (record in memory/LOC_REFERENCE) }
|
||||
@ -1288,13 +1324,33 @@ unit cgx86;
|
||||
end
|
||||
{ we can have OS_M64 (record in function result/LOC_MMREGISTER) to
|
||||
OS_64 (record in memory/LOC_REFERENCE) }
|
||||
else if (tcgsize2size[fromsize]=tcgsize2size[tosize]) and
|
||||
(fromsize=OS_M64) then
|
||||
else if (tcgsize2size[fromsize]=tcgsize2size[tosize]) then
|
||||
begin
|
||||
if UseAVX then
|
||||
result:=A_VMOVQ
|
||||
else
|
||||
result:=A_MOVQ;
|
||||
case fromsize of
|
||||
OS_M64:
|
||||
{ we can have OS_M64 (record in function result/LOC_MMREGISTER) to
|
||||
OS_64 (record in memory/LOC_REFERENCE) }
|
||||
if UseAVX then
|
||||
result:=A_VMOVQ
|
||||
else
|
||||
result:=A_MOVQ;
|
||||
OS_M128:
|
||||
{ 128-bit aligned vector }
|
||||
if UseAVX then
|
||||
result:=A_VMOVAPS
|
||||
else
|
||||
result:=A_MOVAPS;
|
||||
OS_M256,
|
||||
OS_M512:
|
||||
{ 256-bit aligned vector }
|
||||
if UseAVX then
|
||||
result:=A_VMOVAPS
|
||||
else
|
||||
{ SSE does not support 256-bit or 512-bit vectors }
|
||||
InternalError(2018012930);
|
||||
else
|
||||
InternalError(2018012920);
|
||||
end;
|
||||
end
|
||||
else
|
||||
internalerror(2010060104);
|
||||
@ -1313,12 +1369,14 @@ unit cgx86;
|
||||
if fromsize=tosize then
|
||||
{ needs correct size in case of spilling }
|
||||
case fromsize of
|
||||
OS_F32:
|
||||
OS_F32,
|
||||
OS_MF128:
|
||||
if UseAVX then
|
||||
instr:=taicpu.op_reg_reg(A_VMOVAPS,S_NO,reg1,reg2)
|
||||
else
|
||||
instr:=taicpu.op_reg_reg(A_MOVAPS,S_NO,reg1,reg2);
|
||||
OS_F64:
|
||||
OS_F64,
|
||||
OS_MD128:
|
||||
if UseAVX then
|
||||
instr:=taicpu.op_reg_reg(A_VMOVAPD,S_NO,reg1,reg2)
|
||||
else
|
||||
@ -1328,6 +1386,32 @@ unit cgx86;
|
||||
instr:=taicpu.op_reg_reg(A_VMOVQ,S_NO,reg1,reg2)
|
||||
else
|
||||
instr:=taicpu.op_reg_reg(A_MOVQ,S_NO,reg1,reg2);
|
||||
OS_M128, OS_MS128:
|
||||
if UseAVX then
|
||||
instr:=taicpu.op_reg_reg(A_VMOVDQA,S_NO,reg1,reg2)
|
||||
else
|
||||
instr:=taicpu.op_reg_reg(A_MOVDQA,S_NO,reg1,reg2);
|
||||
OS_MF256,
|
||||
OS_MF512:
|
||||
if UseAVX then
|
||||
instr:=taicpu.op_reg_reg(A_VMOVAPS,S_NO,reg1,reg2)
|
||||
else
|
||||
{ SSE doesn't support 512-bit vectors }
|
||||
InternalError(2018012931);
|
||||
OS_MD256,
|
||||
OS_MD512:
|
||||
if UseAVX then
|
||||
instr:=taicpu.op_reg_reg(A_VMOVAPD,S_NO,reg1,reg2)
|
||||
else
|
||||
{ SSE doesn't support 512-bit vectors }
|
||||
InternalError(2018012932);
|
||||
OS_M256, OS_MS256,
|
||||
OS_M512, OS_MS512:
|
||||
if UseAVX then
|
||||
instr:=taicpu.op_reg_reg(A_VMOVDQA,S_NO,reg1,reg2)
|
||||
else
|
||||
{ SSE doesn't support 512-bit vectors }
|
||||
InternalError(2018012933);
|
||||
else
|
||||
internalerror(2006091201);
|
||||
end
|
||||
@ -1385,15 +1469,152 @@ unit cgx86;
|
||||
make_simple_ref(list,tmpref);
|
||||
if shuffle=nil then
|
||||
begin
|
||||
if fromsize=OS_M64 then
|
||||
list.concat(taicpu.op_ref_reg(A_MOVQ,S_NO,tmpref,reg))
|
||||
else
|
||||
{$ifdef x86_64}
|
||||
{ x86-64 has always properly aligned data }
|
||||
list.concat(taicpu.op_ref_reg(A_MOVDQA,S_NO,tmpref,reg));
|
||||
{$else x86_64}
|
||||
list.concat(taicpu.op_ref_reg(A_MOVDQU,S_NO,tmpref,reg));
|
||||
{$endif x86_64}
|
||||
case fromsize of
|
||||
OS_F32:
|
||||
if UseAVX then
|
||||
op := A_VMOVSS
|
||||
else
|
||||
op := A_MOVSS;
|
||||
OS_F64:
|
||||
if UseAVX then
|
||||
op := A_VMOVSD
|
||||
else
|
||||
op := A_MOVSD;
|
||||
OS_M32, OS_32, OS_S32:
|
||||
if UseAVX then
|
||||
op := A_VMOVD
|
||||
else
|
||||
op := A_MOVD;
|
||||
OS_M64, OS_64, OS_S64:
|
||||
if UseAVX then
|
||||
op := A_VMOVQ
|
||||
else
|
||||
op := A_MOVQ;
|
||||
OS_MF128:
|
||||
{ Use XMM transfer of packed singles }
|
||||
if UseAVX then
|
||||
begin
|
||||
if GetRefAlignment(tmpref) = 16 then
|
||||
op := A_VMOVAPS
|
||||
else
|
||||
op := A_VMOVUPS
|
||||
end
|
||||
else
|
||||
begin
|
||||
if GetRefAlignment(tmpref) = 16 then
|
||||
op := A_MOVAPS
|
||||
else
|
||||
op := A_MOVUPS
|
||||
end;
|
||||
OS_MD128:
|
||||
{ Use XMM transfer of packed doubles }
|
||||
if UseAVX then
|
||||
begin
|
||||
if GetRefAlignment(tmpref) = 16 then
|
||||
op := A_VMOVAPD
|
||||
else
|
||||
op := A_VMOVUPD
|
||||
end
|
||||
else
|
||||
begin
|
||||
if GetRefAlignment(tmpref) = 16 then
|
||||
op := A_MOVAPD
|
||||
else
|
||||
op := A_MOVUPD
|
||||
end;
|
||||
OS_M128, OS_MS128:
|
||||
{ Use XMM integer transfer }
|
||||
if UseAVX then
|
||||
begin
|
||||
if GetRefAlignment(tmpref) = 16 then
|
||||
op := A_VMOVDQA
|
||||
else
|
||||
op := A_VMOVDQU
|
||||
end
|
||||
else
|
||||
begin
|
||||
if GetRefAlignment(tmpref) = 16 then
|
||||
op := A_MOVDQA
|
||||
else
|
||||
op := A_MOVDQU
|
||||
end;
|
||||
OS_MF256:
|
||||
{ Use YMM transfer of packed singles }
|
||||
if UseAVX then
|
||||
begin
|
||||
if GetRefAlignment(tmpref) = 32 then
|
||||
op := A_VMOVAPS
|
||||
else
|
||||
op := A_VMOVUPS
|
||||
end
|
||||
else
|
||||
{ SSE doesn't support 256-bit vectors }
|
||||
InternalError(2018012934);
|
||||
OS_MD256:
|
||||
{ Use YMM transfer of packed doubles }
|
||||
if UseAVX then
|
||||
begin
|
||||
if GetRefAlignment(tmpref) = 32 then
|
||||
op := A_VMOVAPD
|
||||
else
|
||||
op := A_VMOVUPD
|
||||
end
|
||||
else
|
||||
{ SSE doesn't support 256-bit vectors }
|
||||
InternalError(2018012935);
|
||||
OS_M256, OS_MS256:
|
||||
{ Use YMM integer transfer }
|
||||
if UseAVX then
|
||||
begin
|
||||
if GetRefAlignment(tmpref) = 32 then
|
||||
op := A_VMOVDQA
|
||||
else
|
||||
op := A_VMOVDQU
|
||||
end
|
||||
else
|
||||
{ SSE doesn't support 256-bit vectors }
|
||||
InternalError(2018012936);
|
||||
OS_MF512:
|
||||
{ Use ZMM transfer of packed singles }
|
||||
if UseAVX then
|
||||
begin
|
||||
if GetRefAlignment(tmpref) = 64 then
|
||||
op := A_VMOVAPS
|
||||
else
|
||||
op := A_VMOVUPS
|
||||
end
|
||||
else
|
||||
{ SSE doesn't support 512-bit vectors }
|
||||
InternalError(2018012937);
|
||||
OS_MD512:
|
||||
{ Use ZMM transfer of packed doubles }
|
||||
if UseAVX then
|
||||
begin
|
||||
if GetRefAlignment(tmpref) = 64 then
|
||||
op := A_VMOVAPD
|
||||
else
|
||||
op := A_VMOVUPD
|
||||
end
|
||||
else
|
||||
{ SSE doesn't support 512-bit vectors }
|
||||
InternalError(2018012938);
|
||||
OS_M512, OS_MS512:
|
||||
{ Use ZMM integer transfer }
|
||||
if UseAVX then
|
||||
begin
|
||||
if GetRefAlignment(tmpref) = 64 then
|
||||
op := A_VMOVDQA
|
||||
else
|
||||
op := A_VMOVDQU
|
||||
end
|
||||
else
|
||||
{ SSE doesn't support 512-bit vectors }
|
||||
InternalError(2018012939);
|
||||
else
|
||||
{ No valid transfer command available }
|
||||
internalerror(2017121410);
|
||||
end;
|
||||
list.concat(taicpu.op_ref_reg(op,S_NO,tmpref,reg));
|
||||
end
|
||||
else if shufflescalar(shuffle) then
|
||||
begin
|
||||
@ -1415,20 +1636,149 @@ unit cgx86;
|
||||
hreg : tregister;
|
||||
tmpref : treference;
|
||||
op : tasmop;
|
||||
|
||||
begin
|
||||
tmpref:=ref;
|
||||
make_simple_ref(list,tmpref);
|
||||
if shuffle=nil then
|
||||
begin
|
||||
if fromsize=OS_M64 then
|
||||
list.concat(taicpu.op_reg_ref(A_MOVQ,S_NO,reg,tmpref))
|
||||
else
|
||||
{$ifdef x86_64}
|
||||
{ x86-64 has always properly aligned data }
|
||||
list.concat(taicpu.op_reg_ref(A_MOVDQA,S_NO,reg,tmpref))
|
||||
{$else x86_64}
|
||||
list.concat(taicpu.op_reg_ref(A_MOVDQU,S_NO,reg,tmpref))
|
||||
{$endif x86_64}
|
||||
case fromsize of
|
||||
OS_F32:
|
||||
if UseAVX then
|
||||
op := A_VMOVSS
|
||||
else
|
||||
op := A_MOVSS;
|
||||
OS_F64:
|
||||
if UseAVX then
|
||||
op := A_VMOVSD
|
||||
else
|
||||
op := A_MOVSD;
|
||||
OS_M32, OS_32, OS_S32:
|
||||
if UseAVX then
|
||||
op := A_VMOVD
|
||||
else
|
||||
op := A_MOVD;
|
||||
OS_M64, OS_64, OS_S64:
|
||||
if UseAVX then
|
||||
op := A_VMOVQ
|
||||
else
|
||||
op := A_MOVQ;
|
||||
OS_MF128:
|
||||
{ Use XMM transfer of packed singles }
|
||||
if UseAVX then
|
||||
begin
|
||||
if GetRefAlignment(tmpref) = 16 then
|
||||
op := A_VMOVAPS
|
||||
else
|
||||
op := A_VMOVUPS
|
||||
end else
|
||||
begin
|
||||
if GetRefAlignment(tmpref) = 16 then
|
||||
op := A_MOVAPS
|
||||
else
|
||||
op := A_MOVUPS
|
||||
end;
|
||||
OS_MD128:
|
||||
{ Use XMM transfer of packed doubles }
|
||||
if UseAVX then
|
||||
begin
|
||||
if GetRefAlignment(tmpref) = 16 then
|
||||
op := A_VMOVAPD
|
||||
else
|
||||
op := A_VMOVUPD
|
||||
end else
|
||||
begin
|
||||
if GetRefAlignment(tmpref) = 16 then
|
||||
op := A_MOVAPD
|
||||
else
|
||||
op := A_MOVUPD
|
||||
end;
|
||||
OS_M128, OS_MS128:
|
||||
{ Use XMM integer transfer }
|
||||
if UseAVX then
|
||||
begin
|
||||
if GetRefAlignment(tmpref) = 16 then
|
||||
op := A_VMOVDQA
|
||||
else
|
||||
op := A_VMOVDQU
|
||||
end else
|
||||
begin
|
||||
if GetRefAlignment(tmpref) = 16 then
|
||||
op := A_MOVDQA
|
||||
else
|
||||
op := A_MOVDQU
|
||||
end;
|
||||
OS_MF256:
|
||||
{ Use XMM transfer of packed singles }
|
||||
if UseAVX then
|
||||
begin
|
||||
if GetRefAlignment(tmpref) = 32 then
|
||||
op := A_VMOVAPS
|
||||
else
|
||||
op := A_VMOVUPS
|
||||
end else
|
||||
{ SSE doesn't support 256-bit vectors }
|
||||
InternalError(2018012940);
|
||||
OS_MD256:
|
||||
{ Use XMM transfer of packed doubles }
|
||||
if UseAVX then
|
||||
begin
|
||||
if GetRefAlignment(tmpref) = 32 then
|
||||
op := A_VMOVAPD
|
||||
else
|
||||
op := A_VMOVUPD
|
||||
end else
|
||||
{ SSE doesn't support 256-bit vectors }
|
||||
InternalError(2018012941);
|
||||
OS_M256, OS_MS256:
|
||||
{ Use XMM integer transfer }
|
||||
if UseAVX then
|
||||
begin
|
||||
if GetRefAlignment(tmpref) = 32 then
|
||||
op := A_VMOVDQA
|
||||
else
|
||||
op := A_VMOVDQU
|
||||
end else
|
||||
{ SSE doesn't support 256-bit vectors }
|
||||
InternalError(2018012942);
|
||||
OS_MF512:
|
||||
{ Use XMM transfer of packed singles }
|
||||
if UseAVX then
|
||||
begin
|
||||
if GetRefAlignment(tmpref) = 64 then
|
||||
op := A_VMOVAPS
|
||||
else
|
||||
op := A_VMOVUPS
|
||||
end else
|
||||
{ SSE doesn't support 512-bit vectors }
|
||||
InternalError(2018012943);
|
||||
OS_MD512:
|
||||
{ Use XMM transfer of packed doubles }
|
||||
if UseAVX then
|
||||
begin
|
||||
if GetRefAlignment(tmpref) = 64 then
|
||||
op := A_VMOVAPD
|
||||
else
|
||||
op := A_VMOVUPD
|
||||
end else
|
||||
{ SSE doesn't support 512-bit vectors }
|
||||
InternalError(2018012944);
|
||||
OS_M512, OS_MS512:
|
||||
{ Use XMM integer transfer }
|
||||
if UseAVX then
|
||||
begin
|
||||
if GetRefAlignment(tmpref) = 64 then
|
||||
op := A_VMOVDQA
|
||||
else
|
||||
op := A_VMOVDQU
|
||||
end else
|
||||
{ SSE doesn't support 512-bit vectors }
|
||||
InternalError(2018012945);
|
||||
else
|
||||
{ No valid transfer command available }
|
||||
internalerror(2017121411);
|
||||
end;
|
||||
list.concat(taicpu.op_reg_ref(op,S_NO,reg,tmpref));
|
||||
end
|
||||
else if shufflescalar(shuffle) then
|
||||
begin
|
||||
|
@ -419,10 +419,12 @@ implementation
|
||||
else
|
||||
internalerror(2009071902);
|
||||
end;
|
||||
OS_M128,OS_MS128:
|
||||
OS_M128,OS_MS128,OS_MF128,OS_MD128:
|
||||
cgsize2subreg:=R_SUBMMX;
|
||||
OS_M256,OS_MS256:
|
||||
OS_M256,OS_MS256,OS_MF256,OS_MD256:
|
||||
cgsize2subreg:=R_SUBMMY;
|
||||
OS_M512,OS_MS512,OS_MF512,OS_MD512:
|
||||
cgsize2subreg:=R_SUBMMZ;
|
||||
OS_NO:
|
||||
{ error message should have been thrown already before, so avoid only
|
||||
an internal error }
|
||||
@ -435,7 +437,7 @@ implementation
|
||||
|
||||
function reg_cgsize(const reg: tregister): tcgsize;
|
||||
const subreg2cgsize:array[Tsubregister] of Tcgsize =
|
||||
(OS_NO,OS_8,OS_8,OS_16,OS_32,OS_64,OS_NO,OS_NO,OS_NO,OS_F32,OS_F64,OS_NO,OS_M128,OS_M256,OS_NO,OS_NO,OS_NO,OS_NO,OS_NO,OS_NO,OS_NO,OS_NO);
|
||||
(OS_NO,OS_8,OS_8,OS_16,OS_32,OS_64,OS_NO,OS_NO,OS_NO,OS_F32,OS_F64,OS_NO,OS_M128,OS_M256,OS_M512,OS_NO,OS_NO,OS_NO,OS_NO,OS_NO,OS_NO,OS_NO,OS_NO);
|
||||
begin
|
||||
case getregtype(reg) of
|
||||
R_INTREGISTER :
|
||||
@ -466,7 +468,7 @@ implementation
|
||||
function reg2opsize(r:Tregister):topsize;
|
||||
const
|
||||
subreg2opsize : array[tsubregister] of topsize =
|
||||
(S_NO,S_B,S_B,S_W,S_L,S_Q,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO);
|
||||
(S_NO,S_B,S_B,S_W,S_L,S_Q,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO);
|
||||
begin
|
||||
reg2opsize:=S_L;
|
||||
case getregtype(r) of
|
||||
|
@ -52,27 +52,28 @@ interface
|
||||
'd',
|
||||
'','','',
|
||||
't',
|
||||
'x',
|
||||
'y'
|
||||
'x',
|
||||
'y',
|
||||
'z'
|
||||
);
|
||||
{ suffix-to-opsize conversion tables, used in asmreadrer }
|
||||
{ !! S_LQ excluded: movzlq does not exist, movslq is processed
|
||||
as a separate instruction w/o suffix (aka movsxd), and there are
|
||||
no more instructions needing it. }
|
||||
att_sizesuffixstr : array[0..13] of string[2] = (
|
||||
'','BW','BL','WL','BQ','WQ',{'LQ',}'B','W','L','S','Q','T','X','Y'
|
||||
att_sizesuffixstr : array[0..14] of string[2] = (
|
||||
'','BW','BL','WL','BQ','WQ',{'LQ',}'B','W','L','S','Q','T','X','Y','Z'
|
||||
);
|
||||
att_sizesuffix : array[0..13] of topsize = (
|
||||
S_NO,S_BW,S_BL,S_WL,S_BQ,S_WQ,{S_LQ,}S_B,S_W,S_L,S_NO,S_Q,S_NO,S_NO,S_NO
|
||||
att_sizesuffix : array[0..14] of topsize = (
|
||||
S_NO,S_BW,S_BL,S_WL,S_BQ,S_WQ,{S_LQ,}S_B,S_W,S_L,S_NO,S_Q,S_NO,S_NO,S_NO,S_NO
|
||||
);
|
||||
att_sizefpusuffix : array[0..13] of topsize = (
|
||||
S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,{S_NO,}S_NO,S_NO,S_FL,S_FS,S_NO,S_FX,S_NO,S_NO
|
||||
att_sizefpusuffix : array[0..14] of topsize = (
|
||||
S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,{S_NO,}S_NO,S_NO,S_FL,S_FS,S_NO,S_FX,S_NO,S_NO,S_NO
|
||||
);
|
||||
att_sizefpuintsuffix : array[0..13] of topsize = (
|
||||
S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,{S_NO,}S_NO,S_NO,S_IL,S_IS,S_IQ,S_NO,S_NO,S_NO
|
||||
att_sizefpuintsuffix : array[0..14] of topsize = (
|
||||
S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,{S_NO,}S_NO,S_NO,S_IL,S_IS,S_IQ,S_NO,S_NO,S_NO,S_NO
|
||||
);
|
||||
att_sizemmsuffix : array[0..13] of topsize = (
|
||||
S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,{S_NO,}S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_XMM,S_YMM
|
||||
att_sizemmsuffix : array[0..14] of topsize = (
|
||||
S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,{S_NO,}S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_XMM,S_YMM,S_ZMM
|
||||
);
|
||||
{$else x86_64}
|
||||
gas_opsize2str : array[topsize] of string[2] = ('',
|
||||
@ -82,24 +83,25 @@ interface
|
||||
'd',
|
||||
'','','',
|
||||
't',
|
||||
'x',
|
||||
'y'
|
||||
'x',
|
||||
'y',
|
||||
'z'
|
||||
);
|
||||
{ suffix-to-opsize conversion tables, used in asmreadrer }
|
||||
att_sizesuffixstr : array[0..11] of string[2] = (
|
||||
'','BW','BL','WL','B','W','L','S','Q','T','X','Y'
|
||||
att_sizesuffixstr : array[0..12] of string[2] = (
|
||||
'','BW','BL','WL','B','W','L','S','Q','T','X','Y','Z'
|
||||
);
|
||||
att_sizesuffix : array[0..11] of topsize = (
|
||||
S_NO,S_BW,S_BL,S_WL,S_B,S_W,S_L,S_NO,S_NO,S_NO,S_NO,S_NO
|
||||
att_sizesuffix : array[0..12] of topsize = (
|
||||
S_NO,S_BW,S_BL,S_WL,S_B,S_W,S_L,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO
|
||||
);
|
||||
att_sizefpusuffix : array[0..11] of topsize = (
|
||||
S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_FL,S_FS,S_NO,S_FX,S_NO,S_NO
|
||||
att_sizefpusuffix : array[0..12] of topsize = (
|
||||
S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_FL,S_FS,S_NO,S_FX,S_NO,S_NO,S_NO
|
||||
);
|
||||
att_sizefpuintsuffix : array[0..11] of topsize = (
|
||||
S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_IL,S_IS,S_IQ,S_NO,S_NO,S_NO
|
||||
att_sizefpuintsuffix : array[0..12] of topsize = (
|
||||
S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_IL,S_IS,S_IQ,S_NO,S_NO,S_NO,S_NO
|
||||
);
|
||||
att_sizemmsuffix : array[0..11] of topsize = (
|
||||
S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_XMM,S_YMM
|
||||
att_sizemmsuffix : array[0..12] of topsize = (
|
||||
S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_NO,S_XMM,S_YMM,S_ZMM
|
||||
);
|
||||
|
||||
{$endif x86_64}
|
||||
|
@ -343,7 +343,8 @@ const
|
||||
0,0,0,
|
||||
80,
|
||||
128,
|
||||
256
|
||||
256,
|
||||
512
|
||||
);
|
||||
{$else}
|
||||
topsize2memsize: array[topsize] of integer =
|
||||
@ -354,7 +355,8 @@ topsize2memsize: array[topsize] of integer =
|
||||
0,0,0,
|
||||
80,
|
||||
128,
|
||||
256
|
||||
256,
|
||||
512
|
||||
);
|
||||
{$endif}
|
||||
|
||||
|
@ -74,10 +74,14 @@ uses
|
||||
A_MOVZX:
|
||||
Result:=OptPass1Movx(p);
|
||||
A_VMOVAPS,
|
||||
A_VMOVAPD:
|
||||
A_VMOVAPD,
|
||||
A_VMOVUPS,
|
||||
A_VMOVUPD:
|
||||
result:=OptPass1VMOVAP(p);
|
||||
A_MOVAPD,
|
||||
A_MOVAPS:
|
||||
A_MOVAPS,
|
||||
A_MOVUPD,
|
||||
A_MOVUPS:
|
||||
result:=OptPass1MOVAP(p);
|
||||
A_VDIVSD,
|
||||
A_VDIVSS,
|
||||
|
@ -35,7 +35,8 @@ type
|
||||
S_NEAR,S_FAR,S_SHORT,
|
||||
S_T,
|
||||
S_XMM,
|
||||
S_YMM
|
||||
S_YMM,
|
||||
S_ZMM
|
||||
);
|
||||
|
||||
TOpSizes = set of topsize;
|
||||
|
@ -108,7 +108,8 @@ Const
|
||||
pocall_sysv_abi_default,
|
||||
pocall_sysv_abi_cdecl,
|
||||
pocall_ms_abi_default,
|
||||
pocall_ms_abi_cdecl
|
||||
pocall_ms_abi_cdecl,
|
||||
pocall_vectorcall
|
||||
];
|
||||
|
||||
cputypestr : array[tcputype] of string[10] = ('',
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -173,7 +173,7 @@ implementation
|
||||
result:=
|
||||
((target_info.system=system_x86_64_win64) and
|
||||
not(proccall in [pocall_sysv_abi_default,pocall_sysv_abi_cdecl])) or
|
||||
(proccall in [pocall_ms_abi_default,pocall_ms_abi_cdecl]);
|
||||
(proccall in [pocall_ms_abi_default,pocall_ms_abi_cdecl,pocall_vectorcall]);
|
||||
end;
|
||||
|
||||
|
||||
|
869
tests/test/cg/tvectorcall1.pp
Normal file
869
tests/test/cg/tvectorcall1.pp
Normal file
@ -0,0 +1,869 @@
|
||||
{ %CPU=x86_64 }
|
||||
program vectorcall_hva_test1;
|
||||
|
||||
{$IFNDEF CPUX86_64}
|
||||
{$FATAL This test program can only be compiled on Windows or Linux 64-bit with an Intel processor }
|
||||
{$ENDIF}
|
||||
|
||||
{$ASMMODE Intel}
|
||||
{$PUSH}
|
||||
{$CODEALIGN RECORDMIN=16}
|
||||
{$PACKRECORDS C}
|
||||
type
|
||||
TM128 = record
|
||||
case Byte of
|
||||
0: (M128_F32: array[0..3] of Single);
|
||||
1: (M128_F64: array[0..1] of Double);
|
||||
end;
|
||||
{$POP}
|
||||
|
||||
{ HFA test: field style. }
|
||||
|
||||
{ NOTE: if the record falls on a 16-byte boundary, the 4-component entries will
|
||||
turned into vectors rather than HFAs. }
|
||||
|
||||
THFA1_SF = packed record
|
||||
F1: Single;
|
||||
end;
|
||||
|
||||
{$IFDEF WIN64}
|
||||
THFA2_SF = packed record
|
||||
F1, F2: Single;
|
||||
end;
|
||||
|
||||
THFA3_SF = packed record
|
||||
F1, F2, F3: Single;
|
||||
end;
|
||||
|
||||
THFA4_SF = packed record
|
||||
F1, F2, F3, F4: Single;
|
||||
end;
|
||||
{$ENDIF}
|
||||
|
||||
THFA1_DF = packed record
|
||||
F1: Double;
|
||||
end;
|
||||
|
||||
{$IFDEF WIN64}
|
||||
THFA2_DF = packed record
|
||||
F1, F2: Double;
|
||||
end;
|
||||
|
||||
THFA3_DF = packed record
|
||||
F1, F2, F3: Double;
|
||||
end;
|
||||
|
||||
THFA4_DF = packed record
|
||||
F1, F2, F3, F4: Double;
|
||||
end;
|
||||
{$ENDIF}
|
||||
|
||||
{ HFA test - array style }
|
||||
|
||||
{ NOTE: if the record falls on a 16-byte boundary, the 4-component entries will
|
||||
turned into vectors rather than HFAs. }
|
||||
|
||||
THFA1_SA = packed record
|
||||
F: array[0..0] of Single;
|
||||
end;
|
||||
|
||||
{$IFDEF WIN64}
|
||||
THFA2_SA = packed record
|
||||
F: array[0..1] of Single;
|
||||
end;
|
||||
|
||||
THFA3_SA = packed record
|
||||
F: array[0..2] of Single;
|
||||
end;
|
||||
|
||||
THFA4_SA = packed record
|
||||
F: array[0..3] of Single;
|
||||
end;
|
||||
{$ENDIF}
|
||||
|
||||
THFA1_DA = packed record
|
||||
F: array[0..0] of Double;
|
||||
end;
|
||||
|
||||
{$IFDEF WIN64}
|
||||
THFA2_DA = packed record
|
||||
F: array[0..1] of Double;
|
||||
end;
|
||||
|
||||
THFA3_DA = packed record
|
||||
F: array[0..2] of Double;
|
||||
end;
|
||||
|
||||
THFA4_DA = packed record
|
||||
F: array[0..3] of Double;
|
||||
end;
|
||||
{$ENDIF}
|
||||
|
||||
{ Single-type vector }
|
||||
|
||||
function HorizontalAddSingle(V: TM128): Single; vectorcall;
|
||||
begin
|
||||
HorizontalAddSingle := V.M128_F32[0] + V.M128_F32[1] + V.M128_F32[2] + V.M128_F32[3];
|
||||
end;
|
||||
|
||||
function HorizontalAddSingle_ASM(V: TM128): Single; vectorcall; assembler; nostackframe;
|
||||
asm
|
||||
HADDPS XMM0, XMM0
|
||||
HADDPS XMM0, XMM0
|
||||
end;
|
||||
|
||||
{ Double-type vector }
|
||||
|
||||
function HorizontalAddDouble(V: TM128): Double; vectorcall;
|
||||
begin
|
||||
HorizontalAddDouble := V.M128_F64[0] + V.M128_F64[1];
|
||||
end;
|
||||
|
||||
function HorizontalAddDouble_ASM(V: TM128): Double; vectorcall; assembler; nostackframe;
|
||||
asm
|
||||
HADDPD XMM0, XMM0
|
||||
end;
|
||||
|
||||
{ 3-element aggregate }
|
||||
|
||||
function AddSingles1F(HFA: THFA1_SF): Single; vectorcall;
|
||||
begin
|
||||
AddSingles1F := HFA.F1;
|
||||
end;
|
||||
|
||||
function AddSingles1F_ASM(HFA: THFA1_SF): Single; vectorcall; assembler; nostackframe;
|
||||
asm
|
||||
{ Do absolutely nothing! }
|
||||
end;
|
||||
|
||||
function AddDoubles1F(HFA: THFA1_DF): Double; vectorcall;
|
||||
begin
|
||||
AddDoubles1F := HFA.F1;
|
||||
end;
|
||||
|
||||
function AddDoubles1F_ASM(HFA: THFA1_DF): Double; vectorcall; assembler; nostackframe;
|
||||
asm
|
||||
{ Do absolutely nothing! }
|
||||
end;
|
||||
|
||||
function AddSingles1A(HFA: THFA1_SA): Single; vectorcall;
|
||||
begin
|
||||
AddSingles1A := HFA.F[0];
|
||||
end;
|
||||
|
||||
function AddSingles1A_ASM(HFA: THFA1_SA): Single; vectorcall; assembler; nostackframe;
|
||||
asm
|
||||
{ Do absolutely nothing! }
|
||||
end;
|
||||
|
||||
function AddDoubles1A(HFA: THFA1_DA): Double; vectorcall;
|
||||
begin
|
||||
AddDoubles1A := HFA.F[0];
|
||||
end;
|
||||
|
||||
function AddDoubles1A_ASM(HFA: THFA1_DA): Double; vectorcall; assembler; nostackframe;
|
||||
asm
|
||||
{ Do absolutely nothing! }
|
||||
end;
|
||||
|
||||
{$IFDEF WIN64}
|
||||
{ 2-element aggregate }
|
||||
|
||||
function AddSingles2F(HFA: THFA2_SF): Single; vectorcall;
|
||||
begin
|
||||
AddSingles2F := HFA.F1 + HFA.F2;
|
||||
end;
|
||||
|
||||
function AddSingles2F_ASM(HFA: THFA2_SF): Single; vectorcall; assembler; nostackframe;
|
||||
asm
|
||||
ADDSS XMM0, XMM1
|
||||
end;
|
||||
|
||||
function AddDoubles2F(HFA: THFA2_DF): Double; vectorcall;
|
||||
begin
|
||||
AddDoubles2F := HFA.F1 + HFA.F2;
|
||||
end;
|
||||
|
||||
function AddDoubles2F_ASM(HFA: THFA2_DF): Double; vectorcall; assembler; nostackframe;
|
||||
asm
|
||||
ADDSD XMM0, XMM1
|
||||
end;
|
||||
|
||||
function AddSingles2A(HFA: THFA2_SA): Single; vectorcall;
|
||||
begin
|
||||
AddSingles2A := HFA.F[0] + HFA.F[1];
|
||||
end;
|
||||
|
||||
function AddSingles2A_ASM(HFA: THFA2_SA): Single; vectorcall; assembler; nostackframe;
|
||||
asm
|
||||
ADDSS XMM0, XMM1
|
||||
end;
|
||||
|
||||
function AddDoubles2A(HFA: THFA2_DA): Double; vectorcall;
|
||||
begin
|
||||
AddDoubles2A := HFA.F[0] + HFA.F[1];
|
||||
end;
|
||||
|
||||
function AddDoubles2A_ASM(HFA: THFA2_DA): Double; vectorcall; assembler; nostackframe;
|
||||
asm
|
||||
ADDSD XMM0, XMM1
|
||||
end;
|
||||
|
||||
{ 3-element aggregate }
|
||||
|
||||
function AddSingles3F(HFA: THFA3_SF): Single; vectorcall;
|
||||
begin
|
||||
AddSingles3F := HFA.F1 + HFA.F2 + HFA.F3;
|
||||
end;
|
||||
|
||||
function AddSingles3F_ASM(HFA: THFA3_SF): Single; vectorcall; assembler; nostackframe;
|
||||
asm
|
||||
ADDSS XMM0, XMM1
|
||||
ADDSS XMM0, XMM2
|
||||
end;
|
||||
|
||||
function AddDoubles3F(HFA: THFA3_DF): Double; vectorcall;
|
||||
begin
|
||||
AddDoubles3F := HFA.F1 + HFA.F2 + HFA.F3;
|
||||
end;
|
||||
|
||||
function AddDoubles3F_ASM(HFA: THFA3_DF): Double; vectorcall; assembler; nostackframe;
|
||||
asm
|
||||
ADDSD XMM0, XMM1
|
||||
ADDSD XMM0, XMM2
|
||||
end;
|
||||
|
||||
function AddSingles3A(HFA: THFA3_SA): Single; vectorcall;
|
||||
begin
|
||||
AddSingles3A := HFA.F[0] + HFA.F[1] + HFA.F[2];
|
||||
end;
|
||||
|
||||
function AddSingles3A_ASM(HFA: THFA3_SA): Single; vectorcall; assembler; nostackframe;
|
||||
asm
|
||||
ADDSS XMM0, XMM1
|
||||
ADDSS XMM0, XMM2
|
||||
end;
|
||||
|
||||
function AddDoubles3A(HFA: THFA3_DA): Double; vectorcall;
|
||||
begin
|
||||
AddDoubles3A := HFA.F[0] + HFA.F[1] + HFA.F[2];
|
||||
end;
|
||||
|
||||
function AddDoubles3A_ASM(HFA: THFA3_DA): Double; vectorcall; assembler; nostackframe;
|
||||
asm
|
||||
ADDSD XMM0, XMM1
|
||||
ADDSD XMM0, XMM2
|
||||
end;
|
||||
|
||||
{ 4-element aggregate }
|
||||
|
||||
function AddSingles4F(HFA: THFA4_SF): Single; vectorcall;
|
||||
begin
|
||||
AddSingles4F := HFA.F1 + HFA.F2 + HFA.F3 + HFA.F4;
|
||||
end;
|
||||
|
||||
function AddSingles4F_ASM(HFA: THFA4_SF): Single; vectorcall; assembler; nostackframe;
|
||||
asm
|
||||
ADDSS XMM0, XMM1
|
||||
ADDSS XMM0, XMM2
|
||||
ADDSS XMM0, XMM3
|
||||
end;
|
||||
|
||||
function AddDoubles4F(HFA: THFA4_DF): Double; vectorcall;
|
||||
begin
|
||||
AddDoubles4F := HFA.F1 + HFA.F2 + HFA.F3 + HFA.F4;
|
||||
end;
|
||||
|
||||
function AddDoubles4F_ASM(HFA: THFA4_DF): Double; vectorcall; assembler; nostackframe;
|
||||
asm
|
||||
ADDSD XMM0, XMM1
|
||||
ADDSD XMM0, XMM2
|
||||
ADDSD XMM0, XMM3
|
||||
end;
|
||||
|
||||
function AddSingles4A(HFA: THFA4_SA): Single; vectorcall;
|
||||
begin
|
||||
AddSingles4A := HFA.F[0] + HFA.F[1] + HFA.F[2] + HFA.F[3];
|
||||
end;
|
||||
|
||||
function AddSingles4A_ASM(HFA: THFA4_SA): Single; vectorcall; assembler; nostackframe;
|
||||
asm
|
||||
ADDSS XMM0, XMM1
|
||||
ADDSS XMM0, XMM2
|
||||
ADDSS XMM0, XMM3
|
||||
end;
|
||||
|
||||
function AddDoubles4A(HFA: THFA4_DA): Double; vectorcall;
|
||||
begin
|
||||
AddDoubles4A := HFA.F[0] + HFA.F[1] + HFA.F[2] + HFA.F[3];
|
||||
end;
|
||||
|
||||
function AddDoubles4A_ASM(HFA: THFA4_DA): Double; vectorcall; assembler; nostackframe;
|
||||
asm
|
||||
ADDSD XMM0, XMM1
|
||||
ADDSD XMM0, XMM2
|
||||
ADDSD XMM0, XMM3
|
||||
end;
|
||||
{$ENDIF}
|
||||
|
||||
var
|
||||
HVA: TM128;
|
||||
HFA1_SF: THFA1_SF;
|
||||
HFA1_DF: THFA1_DF;
|
||||
HFA1_SA: THFA1_SA;
|
||||
HFA1_DA: THFA1_DA;
|
||||
{$IFDEF WIN64}
|
||||
HFA2_SF: THFA2_SF;
|
||||
HFA2_DF: THFA2_DF;
|
||||
HFA2_SA: THFA2_SA;
|
||||
HFA2_DA: THFA2_DA;
|
||||
HFA3_SF: THFA3_SF;
|
||||
HFA3_DF: THFA3_DF;
|
||||
HFA3_SA: THFA3_SA;
|
||||
HFA3_DA: THFA3_DA;
|
||||
HFA4_SF: THFA4_SF;
|
||||
HFA4_DF: THFA4_DF;
|
||||
HFA4_SA: THFA4_SA;
|
||||
HFA4_DA: THFA4_DA;
|
||||
{$ENDIF}
|
||||
TestPointer: PtrUInt;
|
||||
I, J: Integer;
|
||||
ResS, ResSA: Single;
|
||||
ResD, ResDA: Double;
|
||||
Addresses: array[0..3] of Pointer;
|
||||
FieldAddresses: array[0..3, 0..3] of Pointer;
|
||||
const
|
||||
AddressNames1: array[0..3] of ShortString = ('HFA1_SF', 'HFA1_DF', 'HFA1_SA', 'HFA1_DA');
|
||||
{$IFDEF WIN64}
|
||||
AddressNames2: array[0..3] of ShortString = ('HFA2_SF', 'HFA2_DF', 'HFA2_SA', 'HFA2_DA');
|
||||
AddressNames3: array[0..3] of ShortString = ('HFA3_SF', 'HFA3_DF', 'HFA3_SA', 'HFA3_DA');
|
||||
AddressNames4: array[0..3] of ShortString = ('HFA4_SF', 'HFA4_DF', 'HFA4_SA', 'HFA4_DA');
|
||||
{$ENDIF}
|
||||
FieldAddressNames: array[0..3] of ShortString = ('F1', 'F2', 'F3', 'F4');
|
||||
|
||||
ExpS1: Single = 5.0;
|
||||
{$IFDEF WIN64}
|
||||
ExpS2: Single = -5.0;
|
||||
ExpS3: Single = 10.0;
|
||||
{$ENDIF}
|
||||
ExpS4: Single = -10.0;
|
||||
ExpD1: Double = 5.0;
|
||||
ExpD2: Double = -5.0;
|
||||
{$IFDEF WIN64}
|
||||
ExpD3: Double = 10.0;
|
||||
ExpD4: Double = -10.0;
|
||||
{$ENDIF}
|
||||
begin
|
||||
|
||||
if (PtrUInt(@HVA) and $F) <> 0 then
|
||||
begin
|
||||
WriteLn('FAIL: HVA is not correctly aligned.');
|
||||
Halt(1);
|
||||
end;
|
||||
|
||||
{ array of singles }
|
||||
WriteLn('- horizontal add (4 singles)');
|
||||
HVA.M128_F32[0] := 5.0;
|
||||
HVA.M128_F32[1] := -10.0;
|
||||
HVA.M128_F32[2] := 15.0;
|
||||
HVA.M128_F32[3] := -20.0;
|
||||
ResS := HorizontalAddSingle(HVA);
|
||||
ResSA := HorizontalAddSingle_ASM(HVA);
|
||||
if (ResS <> ResSA) then
|
||||
begin
|
||||
WriteLn('FAIL: HorizontalAddSingle(HVA) has the vector in the wrong register.');
|
||||
Halt(1);
|
||||
end else
|
||||
begin
|
||||
if ResS <> ExpS4 then
|
||||
begin
|
||||
WriteLn('FAIL: HorizontalAddSingle(HVA) returned ', ResS, ' instead of ', ExpS4);
|
||||
Halt(1);
|
||||
end;
|
||||
end;
|
||||
|
||||
{ array of doubles }
|
||||
WriteLn('- horizontal add (2 doubles)');
|
||||
HVA.M128_F64[0] := 5.0;
|
||||
HVA.M128_F64[1] := -10.0;
|
||||
ResD := HorizontalAddDouble(HVA);
|
||||
ResDA := HorizontalAddDouble_ASM(HVA);
|
||||
if (ResD <> ResDA) then
|
||||
begin
|
||||
WriteLn('FAIL: HorizontalAddDouble(HVA) has the vector in the wrong register.');
|
||||
Halt(1);
|
||||
end else
|
||||
begin
|
||||
if ResD <> ExpD2 then
|
||||
begin
|
||||
WriteLn('FAIL: HorizontalAddDouble(HVA) returned ', ResD, ' instead of ', ExpD2);
|
||||
Halt(1);
|
||||
end;
|
||||
end;
|
||||
|
||||
{ 1-field aggregates }
|
||||
WriteLn('- 1-field aggregates');
|
||||
|
||||
Addresses[0] := @HFA1_SF;
|
||||
Addresses[1] := @HFA1_SA;
|
||||
Addresses[2] := @HFA1_DF;
|
||||
Addresses[3] := @HFA1_DA;
|
||||
FieldAddresses[0][0] := @(HFA1_SF.F1);
|
||||
FieldAddresses[1][0] := @(HFA1_SA.F[0]);
|
||||
FieldAddresses[2][0] := @(HFA1_DF.F1);
|
||||
FieldAddresses[3][0] := @(HFA1_DA.F[0]);
|
||||
|
||||
{ Check alignment }
|
||||
for I := 0 to 1 do
|
||||
begin
|
||||
TestPointer := PtrUInt(Addresses[I]);
|
||||
if Pointer(TestPointer) <> FieldAddresses[I][0] then
|
||||
begin
|
||||
WriteLn('FAIL: ', AddressNames1[I], ' is not correctly packed; field F1 is not in the expected place.');
|
||||
Halt(1);
|
||||
end;
|
||||
end;
|
||||
|
||||
HFA1_SF.F1 := 5.0;
|
||||
ResS := AddSingles1F(HFA1_SF);
|
||||
ResSA := AddSingles1F_ASM(HFA1_SF);
|
||||
if (ResS <> ResSA) then
|
||||
begin
|
||||
WriteLn('FAIL: AddSingles1F(', AddressNames1[I], ') is not passing the aggregate correctly.');
|
||||
Halt(1);
|
||||
end else
|
||||
begin
|
||||
if ResS <> ExpS1 then
|
||||
begin
|
||||
WriteLn('FAIL: AddSingles1F(', AddressNames1[I], ') returned ', ResS, ' instead of ', ExpS1);
|
||||
Halt(1);
|
||||
end;
|
||||
end;
|
||||
|
||||
HFA1_DF.F1 := 5.0;
|
||||
ResD := AddDoubles1F(HFA1_DF);
|
||||
ResDA := AddDoubles1F_ASM(HFA1_DF);
|
||||
if (ResD <> ResDA) then
|
||||
begin
|
||||
WriteLn('FAIL: AddDoubles1F(', AddressNames1[I], ') is not passing the aggregate correctly.');
|
||||
Halt(1);
|
||||
end else
|
||||
begin
|
||||
if ResD <> ExpD1 then
|
||||
begin
|
||||
WriteLn('FAIL: AddDoubles1F(', AddressNames1[I], ') returned ', ResD, ' instead of ', ExpD1);
|
||||
Halt(1);
|
||||
end;
|
||||
end;
|
||||
|
||||
HFA1_SA.F[0] := 5.0;
|
||||
ResS := AddSingles1A(HFA1_SA);
|
||||
ResSA := AddSingles1A_ASM(HFA1_SA);
|
||||
if (ResS <> ResSA) then
|
||||
begin
|
||||
WriteLn('FAIL: AddSingles1A(', AddressNames1[I], ') is not passing the aggregate correctly.');
|
||||
Halt(1);
|
||||
end else
|
||||
begin
|
||||
if ResS <> ExpS1 then
|
||||
begin
|
||||
WriteLn('FAIL: AddSingles1A(', AddressNames1[I], ') returned ', ResS, ' instead of ', ExpS1);
|
||||
Halt(1);
|
||||
end;
|
||||
end;
|
||||
|
||||
HFA1_DA.F[0] := 5.0;
|
||||
ResD := AddDoubles1A(HFA1_DA);
|
||||
ResDA := AddDoubles1A_ASM(HFA1_DA);
|
||||
if (ResD <> ResDA) then
|
||||
begin
|
||||
WriteLn('FAIL: AddDoubles1A(', AddressNames1[I], ') is not passing the aggregate correctly.');
|
||||
Halt(1);
|
||||
end else
|
||||
begin
|
||||
if ResD <> ExpD1 then
|
||||
begin
|
||||
WriteLn('FAIL: AddDoubles1A(', AddressNames1[I], ') returned ', ResD, ' instead of ', ExpD1);
|
||||
Halt(1);
|
||||
end;
|
||||
end;
|
||||
|
||||
{$IFDEF WIN64}
|
||||
{ 2-field aggregates }
|
||||
WriteLn('- 2-field aggregates');
|
||||
|
||||
Addresses[0] := @HFA2_SF;
|
||||
Addresses[1] := @HFA2_SA;
|
||||
FieldAddresses[0][0] := @(HFA2_SF.F1);
|
||||
FieldAddresses[0][1] := @(HFA2_SF.F2);
|
||||
FieldAddresses[1][0] := @(HFA2_SA.F[0]);
|
||||
FieldAddresses[1][1] := @(HFA2_SA.F[1]);
|
||||
|
||||
{ Check alignment of Singles }
|
||||
for I := 0 to 1 do
|
||||
begin
|
||||
TestPointer := PtrUInt(Addresses[I]);
|
||||
for J := 0 to 1 do
|
||||
begin
|
||||
if Pointer(TestPointer) <> FieldAddresses[I][J] then
|
||||
begin
|
||||
WriteLn('FAIL: ', AddressNames2[I], ' is not correctly packed; field ', FieldAddressNames[J], ' is not in the expected place.');
|
||||
Halt(1);
|
||||
end;
|
||||
|
||||
Inc(TestPointer, $4);
|
||||
end;
|
||||
end;
|
||||
|
||||
Addresses[2] := @HFA2_DF;
|
||||
Addresses[3] := @HFA2_DA;
|
||||
FieldAddresses[2][0] := @(HFA2_DF.F1);
|
||||
FieldAddresses[2][1] := @(HFA2_DF.F2);
|
||||
FieldAddresses[3][0] := @(HFA2_DA.F[0]);
|
||||
FieldAddresses[3][1] := @(HFA2_DA.F[1]);
|
||||
|
||||
{ Check alignment of Doubles }
|
||||
for I := 2 to 3 do
|
||||
begin
|
||||
TestPointer := PtrUInt(Addresses[I]);
|
||||
for J := 0 to 1 do
|
||||
begin
|
||||
if Pointer(TestPointer) <> FieldAddresses[I][J] then
|
||||
begin
|
||||
WriteLn('FAIL: ', AddressNames2[I], ' is not correctly packed; field ', FieldAddressNames[J], ' is not in the expected place.');
|
||||
Halt(1);
|
||||
end;
|
||||
|
||||
Inc(TestPointer, $8);
|
||||
end;
|
||||
end;
|
||||
|
||||
HFA2_SF.F1 := 5.0;
|
||||
HFA2_SF.F2 := -10.0;
|
||||
ResS := AddSingles2F(HFA2_SF);
|
||||
ResSA := AddSingles2F_ASM(HFA2_SF);
|
||||
if (ResS <> ResSA) then
|
||||
begin
|
||||
WriteLn('FAIL: AddSingles2F(HFA2_SF) is not passing the aggregate correctly.');
|
||||
Halt(1);
|
||||
end else
|
||||
begin
|
||||
if ResS <> ExpS2 then
|
||||
begin
|
||||
WriteLn('FAIL: AddSingles2F(HFA2_SF) returned ', ResS, ' instead of ', ExpS2);
|
||||
Halt(1);
|
||||
end;
|
||||
end;
|
||||
|
||||
HFA2_DF.F1 := 5.0;
|
||||
HFA2_DF.F2 := -10.0;
|
||||
ResD := AddDoubles2F(HFA2_DF);
|
||||
ResDA := AddDoubles2F_ASM(HFA2_DF);
|
||||
if (ResD <> ResDA) then
|
||||
begin
|
||||
WriteLn('FAIL: AddDoubles2F(HFA2_DF) is not passing the aggregate correctly.');
|
||||
Halt(1);
|
||||
end else
|
||||
begin
|
||||
if ResD <> ExpD2 then
|
||||
begin
|
||||
WriteLn('FAIL: AddDoubles2F(HFA2_DF) returned ', ResD, ' instead of ', ExpD2);
|
||||
Halt(1);
|
||||
end;
|
||||
end;
|
||||
|
||||
HFA2_SA.F[0] := 5.0;
|
||||
HFA2_SA.F[1] := -10.0;
|
||||
ResS := AddSingles2A(HFA2_SA);
|
||||
ResSA := AddSingles2A_ASM(HFA2_SA);
|
||||
if (ResS <> ResSA) then
|
||||
begin
|
||||
WriteLn('FAIL: AddSingles2A(HFA2_SA) is not passing the aggregate correctly.');
|
||||
Halt(1);
|
||||
end else
|
||||
begin
|
||||
if ResS <> ExpS2 then
|
||||
begin
|
||||
WriteLn('FAIL: AddSingles2A(HFA2_SA) returned ', ResS, ' instead of ', ExpS2);
|
||||
Halt(1);
|
||||
end;
|
||||
end;
|
||||
|
||||
HFA2_DA.F[0] := 5.0;
|
||||
HFA2_DA.F[1] := -10.0;
|
||||
ResD := AddDoubles2A(HFA2_DA);
|
||||
ResDA := AddDoubles2A_ASM(HFA2_DA);
|
||||
if (ResD <> ResDA) then
|
||||
begin
|
||||
WriteLn('FAIL: AddDoubles2A(HFA2_DA) is not passing the aggregate correctly.');
|
||||
Halt(1);
|
||||
end else
|
||||
begin
|
||||
if ResD <> ExpD2 then
|
||||
begin
|
||||
WriteLn('FAIL: AddDoubles2A(HFA2_DA) returned ', ResD, ' instead of ', ExpD2);
|
||||
Halt(1);
|
||||
end;
|
||||
end;
|
||||
|
||||
{ 3-field aggregates }
|
||||
WriteLn('- 3-field aggregates');
|
||||
|
||||
Addresses[0] := @HFA3_SF;
|
||||
Addresses[1] := @HFA3_SA;
|
||||
FieldAddresses[0][0] := @(HFA3_SF.F1);
|
||||
FieldAddresses[0][1] := @(HFA3_SF.F2);
|
||||
FieldAddresses[0][2] := @(HFA3_SF.F3);
|
||||
FieldAddresses[1][0] := @(HFA3_SA.F[0]);
|
||||
FieldAddresses[1][1] := @(HFA3_SA.F[1]);
|
||||
FieldAddresses[1][2] := @(HFA3_SA.F[2]);
|
||||
|
||||
{ Check alignment of Singles }
|
||||
for I := 0 to 1 do
|
||||
begin
|
||||
TestPointer := PtrUInt(Addresses[I]);
|
||||
for J := 0 to 2 do
|
||||
begin
|
||||
if Pointer(TestPointer) <> FieldAddresses[I][J] then
|
||||
begin
|
||||
WriteLn('FAIL: ', AddressNames3[I], ' is not correctly packed; field ', FieldAddressNames[J], ' is not in the expected place.');
|
||||
Halt(1);
|
||||
end;
|
||||
|
||||
Inc(TestPointer, $4);
|
||||
end;
|
||||
end;
|
||||
|
||||
Addresses[2] := @HFA3_DF;
|
||||
Addresses[3] := @HFA3_DA;
|
||||
FieldAddresses[2][0] := @(HFA3_DF.F1);
|
||||
FieldAddresses[2][1] := @(HFA3_DF.F2);
|
||||
FieldAddresses[2][2] := @(HFA3_DF.F3);
|
||||
FieldAddresses[3][0] := @(HFA3_DA.F[0]);
|
||||
FieldAddresses[3][1] := @(HFA3_DA.F[1]);
|
||||
FieldAddresses[3][2] := @(HFA3_DA.F[2]);
|
||||
|
||||
{ Check alignment of Doubles }
|
||||
for I := 2 to 3 do
|
||||
begin
|
||||
TestPointer := PtrUInt(Addresses[I]);
|
||||
for J := 0 to 2 do
|
||||
begin
|
||||
if Pointer(TestPointer) <> FieldAddresses[I][J] then
|
||||
begin
|
||||
WriteLn('FAIL: ', AddressNames3[I], ' is not correctly packed; field ', FieldAddressNames[J], ' is not in the expected place.');
|
||||
Halt(1);
|
||||
end;
|
||||
|
||||
Inc(TestPointer, $8);
|
||||
end;
|
||||
end;
|
||||
|
||||
HFA3_SF.F1 := 5.0;
|
||||
HFA3_SF.F2 := -10.0;
|
||||
HFA3_SF.F3 := 15.0;
|
||||
ResS := AddSingles3F(HFA3_SF);
|
||||
ResSA := AddSingles3F_ASM(HFA3_SF);
|
||||
if (ResS <> ResSA) then
|
||||
begin
|
||||
WriteLn('FAIL: AddSingles3F(HFA3_SF) is not passing the aggregate correctly.');
|
||||
Halt(1);
|
||||
end else
|
||||
begin
|
||||
if ResS <> ExpS3 then
|
||||
begin
|
||||
WriteLn('FAIL: AddSingles3F(HFA3_SF) returned ', ResS, ' instead of ', ExpS3);
|
||||
Halt(1);
|
||||
end;
|
||||
end;
|
||||
|
||||
HFA3_DF.F1 := 5.0;
|
||||
HFA3_DF.F2 := -10.0;
|
||||
HFA3_DF.F3 := 15.0;
|
||||
ResD := AddDoubles3F(HFA3_DF);
|
||||
ResDA := AddDoubles3F_ASM(HFA3_DF);
|
||||
if (ResD <> ResDA) then
|
||||
begin
|
||||
WriteLn('FAIL: AddDoubles3F(HFA3_DF) is not passing the aggregate correctly.');
|
||||
Halt(1);
|
||||
end else
|
||||
begin
|
||||
if ResD <> ExpD3 then
|
||||
begin
|
||||
WriteLn('FAIL: AddDoubles3F(HFA3_DF) returned ', ResD, ' instead of ', ExpD3);
|
||||
Halt(1);
|
||||
end;
|
||||
end;
|
||||
|
||||
HFA3_SA.F[0] := 5.0;
|
||||
HFA3_SA.F[1] := -10.0;
|
||||
HFA3_SA.F[2] := 15.0;
|
||||
ResS := AddSingles3A(HFA3_SA);
|
||||
ResSA := AddSingles3A_ASM(HFA3_SA);
|
||||
if (ResS <> ResSA) then
|
||||
begin
|
||||
WriteLn('FAIL: AddSingles3A(HFA3_SA) is not passing the aggregate correctly.');
|
||||
Halt(1);
|
||||
end else
|
||||
begin
|
||||
if ResS <> ExpS3 then
|
||||
begin
|
||||
WriteLn('FAIL: AddSingles3A(HFA3_SA) returned ', ResS, ' instead of ', ExpS3);
|
||||
Halt(1);
|
||||
end;
|
||||
end;
|
||||
|
||||
HFA3_DA.F[0] := 5.0;
|
||||
HFA3_DA.F[1] := -10.0;
|
||||
HFA3_DA.F[2] := 15.0;
|
||||
ResD := AddDoubles3A(HFA3_DA);
|
||||
ResDA := AddDoubles3A_ASM(HFA3_DA);
|
||||
if (ResD <> ResDA) then
|
||||
begin
|
||||
WriteLn('FAIL: AddDoubles3A(HFA3_DA) is not passing the aggregate correctly.');
|
||||
Halt(1);
|
||||
end else
|
||||
begin
|
||||
if ResD <> ExpD3 then
|
||||
begin
|
||||
WriteLn('FAIL: AddDoubles3A(HFA3_DA) returned ', ResD, ' instead of ', ExpD3);
|
||||
Halt(1);
|
||||
end;
|
||||
end;
|
||||
|
||||
{ 4-field aggregates }
|
||||
WriteLn('- 4-field aggregates');
|
||||
|
||||
Addresses[0] := @HFA4_SF;
|
||||
Addresses[1] := @HFA4_SA;
|
||||
FieldAddresses[0][0] := @(HFA4_SF.F1);
|
||||
FieldAddresses[0][1] := @(HFA4_SF.F2);
|
||||
FieldAddresses[0][2] := @(HFA4_SF.F3);
|
||||
FieldAddresses[0][3] := @(HFA4_SF.F4);
|
||||
FieldAddresses[1][0] := @(HFA4_SA.F[0]);
|
||||
FieldAddresses[1][1] := @(HFA4_SA.F[1]);
|
||||
FieldAddresses[1][2] := @(HFA4_SA.F[2]);
|
||||
FieldAddresses[1][3] := @(HFA4_SA.F[3]);
|
||||
|
||||
{ Check alignment of Singles }
|
||||
for I := 0 to 1 do
|
||||
begin
|
||||
TestPointer := PtrUInt(Addresses[I]);
|
||||
for J := 0 to 3 do
|
||||
begin
|
||||
if Pointer(TestPointer) <> FieldAddresses[I][J] then
|
||||
begin
|
||||
WriteLn('FAIL: ', AddressNames4[I], ' is not correctly packed; field ', FieldAddressNames[J], ' is not in the expected place.');
|
||||
Halt(1);
|
||||
end;
|
||||
|
||||
Inc(TestPointer, $4);
|
||||
end;
|
||||
end;
|
||||
|
||||
Addresses[2] := @HFA4_DF;
|
||||
Addresses[3] := @HFA4_DA;
|
||||
FieldAddresses[2][0] := @(HFA4_DF.F1);
|
||||
FieldAddresses[2][1] := @(HFA4_DF.F2);
|
||||
FieldAddresses[2][2] := @(HFA4_DF.F3);
|
||||
FieldAddresses[2][3] := @(HFA4_DF.F4);
|
||||
FieldAddresses[3][0] := @(HFA4_DA.F[0]);
|
||||
FieldAddresses[3][1] := @(HFA4_DA.F[1]);
|
||||
FieldAddresses[3][2] := @(HFA4_DA.F[2]);
|
||||
FieldAddresses[3][3] := @(HFA4_DA.F[3]);
|
||||
|
||||
{ Check alignment of Doubles }
|
||||
for I := 2 to 3 do
|
||||
begin
|
||||
TestPointer := PtrUInt(Addresses[I]);
|
||||
for J := 0 to 3 do
|
||||
begin
|
||||
if Pointer(TestPointer) <> FieldAddresses[I][J] then
|
||||
begin
|
||||
WriteLn('FAIL: ', AddressNames4[I], ' is not correctly packed; field ', FieldAddressNames[J], ' is not in the expected place.');
|
||||
Halt(1);
|
||||
end;
|
||||
|
||||
Inc(TestPointer, $8);
|
||||
end;
|
||||
end;
|
||||
|
||||
HFA4_SF.F1 := 5.0;
|
||||
HFA4_SF.F2 := -10.0;
|
||||
HFA4_SF.F3 := 15.0;
|
||||
HFA4_SF.F4 := -20.0;
|
||||
ResS := AddSingles4F(HFA4_SF);
|
||||
ResSA := AddSingles4F_ASM(HFA4_SF);
|
||||
if (ResS <> ResSA) then
|
||||
begin
|
||||
WriteLn('FAIL: AddSingles4F(HFA4_SF) is not passing the aggregate correctly.');
|
||||
Halt(1);
|
||||
end else
|
||||
begin
|
||||
if ResS <> ExpS4 then
|
||||
begin
|
||||
WriteLn('FAIL: AddSingles4F(HFA4_SF) returned ', ResS, ' instead of ', ExpS4);
|
||||
Halt(1);
|
||||
end;
|
||||
end;
|
||||
|
||||
HFA4_DF.F1 := 5.0;
|
||||
HFA4_DF.F2 := -10.0;
|
||||
HFA4_DF.F3 := 15.0;
|
||||
HFA4_DF.F4 := -20.0;
|
||||
ResD := AddDoubles4F(HFA4_DF);
|
||||
ResDA := AddDoubles4F_ASM(HFA4_DF);
|
||||
if (ResD <> ResDA) then
|
||||
begin
|
||||
WriteLn('FAIL: AddDoubles4F(HFA4_DF) is not passing the aggregate correctly.');
|
||||
Halt(1);
|
||||
end else
|
||||
begin
|
||||
if ResD <> ExpD4 then
|
||||
begin
|
||||
WriteLn('FAIL: AddDoubles4F(HFA4_DF) returned ', ResD, ' instead of ', ExpD4);
|
||||
Halt(1);
|
||||
end;
|
||||
end;
|
||||
|
||||
HFA4_SA.F[0] := 5.0;
|
||||
HFA4_SA.F[1] := -10.0;
|
||||
HFA4_SA.F[2] := 15.0;
|
||||
HFA4_SA.F[3] := -20.0;
|
||||
ResS := AddSingles4A(HFA4_SA);
|
||||
ResSA := AddSingles4A_ASM(HFA4_SA);
|
||||
if (ResS <> ResSA) then
|
||||
begin
|
||||
WriteLn('FAIL: AddSingles4A(HFA4_SA) is not passing the aggregate correctly.');
|
||||
Halt(1);
|
||||
end else
|
||||
begin
|
||||
if ResS <> ExpS4 then
|
||||
begin
|
||||
WriteLn('FAIL: AddSingles4A(HFA4_SA) returned ', ResS, ' instead of ', ExpS4);
|
||||
Halt(1);
|
||||
end;
|
||||
end;
|
||||
|
||||
HFA4_DA.F[0] := 5.0;
|
||||
HFA4_DA.F[1] := -10.0;
|
||||
HFA4_DA.F[2] := 15.0;
|
||||
HFA4_DA.F[3] := -20.0;
|
||||
ResD := AddDoubles4A(HFA4_DA);
|
||||
ResDA := AddDoubles4A_ASM(HFA4_DA);
|
||||
if (ResD <> ResDA) then
|
||||
begin
|
||||
WriteLn('FAIL: AddDoubles4A(HFA4_DF) is not passing the aggregate correctly.');
|
||||
Halt(1);
|
||||
end else
|
||||
begin
|
||||
if ResD <> ExpD4 then
|
||||
begin
|
||||
WriteLn('FAIL: AddDoubles4A(HFA4_DF) returned ', ResD, ' instead of ', ExpD4);
|
||||
Halt(1);
|
||||
end;
|
||||
end;
|
||||
{$ENDIF}
|
||||
WriteLn('ok');
|
||||
end.
|
||||
|
162
tests/test/cg/tvectorcall2.pp
Normal file
162
tests/test/cg/tvectorcall2.pp
Normal file
@ -0,0 +1,162 @@
|
||||
{ %CPU=x86_64 }
|
||||
program vectorcall_hva_test2;
|
||||
|
||||
{$IFNDEF CPUX86_64}
|
||||
{$FATAL This test program can only be compiled on Windows or Linux 64-bit with an Intel processor }
|
||||
{$ENDIF}
|
||||
|
||||
{$push}
|
||||
{$CODEALIGN RECORDMIN=16}
|
||||
{$PACKRECORDS C}
|
||||
type
|
||||
TM128 = record
|
||||
case Byte of
|
||||
0: (M128_F32: array[0..3] of Single);
|
||||
1: (M128_F64: array[0..1] of Double);
|
||||
end;
|
||||
{$pop}
|
||||
|
||||
{ HVA test }
|
||||
THVA = record
|
||||
V1, V2, V3, V4: TM128;
|
||||
end;
|
||||
|
||||
operator +(X, Y: TM128)Z: TM128; vectorcall;
|
||||
var
|
||||
I: Integer;
|
||||
begin
|
||||
for I := 0 to 3 do
|
||||
Z.M128_F32[I] := X.M128_F32[I] + Y.M128_F32[I];
|
||||
end;
|
||||
|
||||
operator -(X, Y: TM128)Z: TM128; vectorcall;
|
||||
var
|
||||
I: Integer;
|
||||
begin
|
||||
for I := 0 to 3 do
|
||||
Z.M128_F32[I] := X.M128_F32[I] - Y.M128_F32[I];
|
||||
end;
|
||||
|
||||
{ - InputHVA goes on the stack because there are not enough free XMM registers to contain the entire argument
|
||||
- A4 does NOT go on the stack and goes into an XMM register.
|
||||
}
|
||||
function HVATest(A1, A2, A3: TM128; InputHVA: THVA; A4: TM128; Op: Integer): THVA; vectorcall;
|
||||
begin
|
||||
{ FIXME: There is an internal stack misalignment for A4, necessitating the
|
||||
use of (V)MOVDQU instead of (V)MOVDQA in the compiled code. }
|
||||
case Op of
|
||||
1:
|
||||
begin
|
||||
HVATest.V1 := InputHVA.V1 + A1;
|
||||
HVATest.V2 := InputHVA.V2 + A2;
|
||||
HVATest.V3 := InputHVA.V3 + A3;
|
||||
HVATest.V4 := InputHVA.V4 + A4;
|
||||
end;
|
||||
2:
|
||||
begin
|
||||
HVATest.V1 := InputHVA.V1 - A1;
|
||||
HVATest.V2 := InputHVA.V2 - A2;
|
||||
HVATest.V3 := InputHVA.V3 - A3;
|
||||
HVATest.V4 := InputHVA.V4 - A4;
|
||||
end;
|
||||
else
|
||||
begin
|
||||
HVATest.V1 := InputHVA.V1 + A1;
|
||||
HVATest.V2 := InputHVA.V2 - A2;
|
||||
HVATest.V3 := InputHVA.V3 + A3;
|
||||
HVATest.V4 := InputHVA.V4 - A4;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
var
|
||||
B1, B2, B3, B4: TM128; HVA, AddRes, SubRes, MixRes, AddExp, SubExp, MixExp: THVA; I: Integer;
|
||||
begin
|
||||
B1.M128_F32[0] := 1.0; B1.M128_F32[1] := 2.0; B1.M128_F32[2] := 3.0; B1.M128_F32[3] := 4.0;
|
||||
B2.M128_F32[0] := 5.0; B2.M128_F32[1] := 6.0; B2.M128_F32[2] := 7.0; B2.M128_F32[3] := 8.0;
|
||||
B3.M128_F32[0] := 9.0; B3.M128_F32[1] := 10.0; B3.M128_F32[2] := 11.0; B3.M128_F32[3] := 12.0;
|
||||
B4.M128_F32[0] := 13.0; B4.M128_F32[1] := 14.0; B4.M128_F32[2] := 15.0; B4.M128_F32[3] := 16.0;
|
||||
|
||||
HVA.V1.M128_F32[0] := 10.0; HVA.V1.M128_F32[1] := 20.0; HVA.V1.M128_F32[2] := 30.0; HVA.V1.M128_F32[3] := 40.0;
|
||||
HVA.V2.M128_F32[0] := 50.0; HVA.V2.M128_F32[1] := 60.0; HVA.V2.M128_F32[2] := 70.0; HVA.V2.M128_F32[3] := 80.0;
|
||||
HVA.V3.M128_F32[0] := 90.0; HVA.V3.M128_F32[1] := 100.0; HVA.V3.M128_F32[2] := 110.0; HVA.V3.M128_F32[3] := 120.0;
|
||||
HVA.V4.M128_F32[0] := 130.0; HVA.V4.M128_F32[1] := 140.0; HVA.V4.M128_F32[2] := 150.0; HVA.V4.M128_F32[3] := 160.0;
|
||||
|
||||
AddExp.V1.M128_F32[0] := 11.0; AddExp.V1.M128_F32[1] := 22.0; AddExp.V1.M128_F32[2] := 33.0; AddExp.V1.M128_F32[3] := 44.0;
|
||||
AddExp.V2.M128_F32[0] := 55.0; AddExp.V2.M128_F32[1] := 66.0; AddExp.V2.M128_F32[2] := 77.0; AddExp.V2.M128_F32[3] := 88.0;
|
||||
AddExp.V3.M128_F32[0] := 99.0; AddExp.V3.M128_F32[1] := 110.0; AddExp.V3.M128_F32[2] := 121.0; AddExp.V3.M128_F32[3] := 132.0;
|
||||
AddExp.V4.M128_F32[0] := 143.0; AddExp.V4.M128_F32[1] := 154.0; AddExp.V4.M128_F32[2] := 165.0; AddExp.V4.M128_F32[3] := 176.0;
|
||||
|
||||
SubExp.V1.M128_F32[0] := 9.0; SubExp.V1.M128_F32[1] := 18.0; SubExp.V1.M128_F32[2] := 27.0; SubExp.V1.M128_F32[3] := 36.0;
|
||||
SubExp.V2.M128_F32[0] := 45.0; SubExp.V2.M128_F32[1] := 54.0; SubExp.V2.M128_F32[2] := 63.0; SubExp.V2.M128_F32[3] := 72.0;
|
||||
SubExp.V3.M128_F32[0] := 81.0; SubExp.V3.M128_F32[1] := 90.0; SubExp.V3.M128_F32[2] := 99.0; SubExp.V3.M128_F32[3] := 108.0;
|
||||
SubExp.V4.M128_F32[0] := 117.0; SubExp.V4.M128_F32[1] := 126.0; SubExp.V4.M128_F32[2] := 135.0; SubExp.V4.M128_F32[3] := 144.0;
|
||||
|
||||
MixExp.V1.M128_F32[0] := 11.0; MixExp.V1.M128_F32[1] := 22.0; MixExp.V1.M128_F32[2] := 33.0; MixExp.V1.M128_F32[3] := 44.0;
|
||||
MixExp.V2.M128_F32[0] := 45.0; MixExp.V2.M128_F32[1] := 54.0; MixExp.V2.M128_F32[2] := 63.0; MixExp.V2.M128_F32[3] := 72.0;
|
||||
MixExp.V3.M128_F32[0] := 99.0; MixExp.V3.M128_F32[1] := 110.0; MixExp.V3.M128_F32[2] := 121.0; MixExp.V3.M128_F32[3] := 132.0;
|
||||
MixExp.V4.M128_F32[0] := 117.0; MixExp.V4.M128_F32[1] := 126.0; MixExp.V4.M128_F32[2] := 135.0; MixExp.V4.M128_F32[3] := 144.0;
|
||||
|
||||
WriteLn(' B1: ', B1.M128_F32[0], ',', B1.M128_F32[1], ',', B1.M128_F32[2], ',', B1.M128_F32[3]);
|
||||
WriteLn(' B2: ', B2.M128_F32[0], ',', B2.M128_F32[1], ',', B2.M128_F32[2], ',', B2.M128_F32[3]);
|
||||
WriteLn(' B3: ', B3.M128_F32[0], ',', B3.M128_F32[1], ',', B3.M128_F32[2], ',', B3.M128_F32[3]);
|
||||
WriteLn(' B4: ', B4.M128_F32[0], ',', B4.M128_F32[1], ',', B4.M128_F32[2], ',', B4.M128_F32[3]);
|
||||
WriteLn('HVA.V1: ', HVA.V1.M128_F32[0], ',', HVA.V1.M128_F32[1], ',', HVA.V1.M128_F32[2], ',', HVA.V1.M128_F32[3]);
|
||||
WriteLn('HVA.V2: ', HVA.V2.M128_F32[0], ',', HVA.V2.M128_F32[1], ',', HVA.V2.M128_F32[2], ',', HVA.V2.M128_F32[3]);
|
||||
WriteLn('HVA.V3: ', HVA.V3.M128_F32[0], ',', HVA.V3.M128_F32[1], ',', HVA.V3.M128_F32[2], ',', HVA.V3.M128_F32[3]);
|
||||
WriteLn('HVA.V4: ', HVA.V4.M128_F32[0], ',', HVA.V4.M128_F32[1], ',', HVA.V4.M128_F32[2], ',', HVA.V4.M128_F32[3]);
|
||||
AddRes := HVATest(B1, B2, B3, HVA, B4, 1);
|
||||
SubRes := HVATest(B1, B2, B3, HVA, B4, 2);
|
||||
MixRes := HVATest(B1, B2, B3, HVA, B4, 0);
|
||||
WriteLn('----');
|
||||
WriteLn('AddRes.V1: ', AddRes.V1.M128_F32[0], ',', AddRes.V1.M128_F32[1], ',', AddRes.V1.M128_F32[2], ',', AddRes.V1.M128_F32[3]);
|
||||
WriteLn('AddRes.V2: ', AddRes.V2.M128_F32[0], ',', AddRes.V2.M128_F32[1], ',', AddRes.V2.M128_F32[2], ',', AddRes.V2.M128_F32[3]);
|
||||
WriteLn('AddRes.V3: ', AddRes.V3.M128_F32[0], ',', AddRes.V3.M128_F32[1], ',', AddRes.V3.M128_F32[2], ',', AddRes.V3.M128_F32[3]);
|
||||
WriteLn('AddRes.V4: ', AddRes.V4.M128_F32[0], ',', AddRes.V4.M128_F32[1], ',', AddRes.V4.M128_F32[2], ',', AddRes.V4.M128_F32[3]);
|
||||
WriteLn();
|
||||
WriteLn('AddExp.V1: ', AddExp.V1.M128_F32[0], ',', AddExp.V1.M128_F32[1], ',', AddExp.V1.M128_F32[2], ',', AddExp.V1.M128_F32[3]);
|
||||
WriteLn('AddExp.V2: ', AddExp.V2.M128_F32[0], ',', AddExp.V2.M128_F32[1], ',', AddExp.V2.M128_F32[2], ',', AddExp.V2.M128_F32[3]);
|
||||
WriteLn('AddExp.V3: ', AddExp.V3.M128_F32[0], ',', AddExp.V3.M128_F32[1], ',', AddExp.V3.M128_F32[2], ',', AddExp.V3.M128_F32[3]);
|
||||
WriteLn('AddExp.V4: ', AddExp.V4.M128_F32[0], ',', AddExp.V4.M128_F32[1], ',', AddExp.V4.M128_F32[2], ',', AddExp.V4.M128_F32[3]);
|
||||
WriteLn('----');
|
||||
WriteLn('SubRes.V1: ', SubRes.V1.M128_F32[0], ',', SubRes.V1.M128_F32[1], ',', SubRes.V1.M128_F32[2], ',', SubRes.V1.M128_F32[3]);
|
||||
WriteLn('SubRes.V2: ', SubRes.V2.M128_F32[0], ',', SubRes.V2.M128_F32[1], ',', SubRes.V2.M128_F32[2], ',', SubRes.V2.M128_F32[3]);
|
||||
WriteLn('SubRes.V3: ', SubRes.V3.M128_F32[0], ',', SubRes.V3.M128_F32[1], ',', SubRes.V3.M128_F32[2], ',', SubRes.V3.M128_F32[3]);
|
||||
WriteLn('SubRes.V4: ', SubRes.V4.M128_F32[0], ',', SubRes.V4.M128_F32[1], ',', SubRes.V4.M128_F32[2], ',', SubRes.V4.M128_F32[3]);
|
||||
WriteLn();
|
||||
WriteLn('SubExp.V1: ', SubExp.V1.M128_F32[0], ',', SubExp.V1.M128_F32[1], ',', SubExp.V1.M128_F32[2], ',', SubExp.V1.M128_F32[3]);
|
||||
WriteLn('SubExp.V2: ', SubExp.V2.M128_F32[0], ',', SubExp.V2.M128_F32[1], ',', SubExp.V2.M128_F32[2], ',', SubExp.V2.M128_F32[3]);
|
||||
WriteLn('SubExp.V3: ', SubExp.V3.M128_F32[0], ',', SubExp.V3.M128_F32[1], ',', SubExp.V3.M128_F32[2], ',', SubExp.V3.M128_F32[3]);
|
||||
WriteLn('SubExp.V4: ', SubExp.V4.M128_F32[0], ',', SubExp.V4.M128_F32[1], ',', SubExp.V4.M128_F32[2], ',', SubExp.V4.M128_F32[3]);
|
||||
WriteLn('----');
|
||||
WriteLn('MixRes.V1: ', MixRes.V1.M128_F32[0], ',', MixRes.V1.M128_F32[1], ',', MixRes.V1.M128_F32[2], ',', MixRes.V1.M128_F32[3]);
|
||||
WriteLn('MixRes.V2: ', MixRes.V2.M128_F32[0], ',', MixRes.V2.M128_F32[1], ',', MixRes.V2.M128_F32[2], ',', MixRes.V2.M128_F32[3]);
|
||||
WriteLn('MixRes.V3: ', MixRes.V3.M128_F32[0], ',', MixRes.V3.M128_F32[1], ',', MixRes.V3.M128_F32[2], ',', MixRes.V3.M128_F32[3]);
|
||||
WriteLn('MixRes.V4: ', MixRes.V4.M128_F32[0], ',', MixRes.V4.M128_F32[1], ',', MixRes.V4.M128_F32[2], ',', MixRes.V4.M128_F32[3]);
|
||||
WriteLn();
|
||||
WriteLn('MixExp.V1: ', MixExp.V1.M128_F32[0], ',', MixExp.V1.M128_F32[1], ',', MixExp.V1.M128_F32[2], ',', MixExp.V1.M128_F32[3]);
|
||||
WriteLn('MixExp.V2: ', MixExp.V2.M128_F32[0], ',', MixExp.V2.M128_F32[1], ',', MixExp.V2.M128_F32[2], ',', MixExp.V2.M128_F32[3]);
|
||||
WriteLn('MixExp.V3: ', MixExp.V3.M128_F32[0], ',', MixExp.V3.M128_F32[1], ',', MixExp.V3.M128_F32[2], ',', MixExp.V3.M128_F32[3]);
|
||||
WriteLn('MixExp.V4: ', MixExp.V4.M128_F32[0], ',', MixExp.V4.M128_F32[1], ',', MixExp.V4.M128_F32[2], ',', MixExp.V4.M128_F32[3]);
|
||||
|
||||
for I := 0 to 3 do
|
||||
begin
|
||||
if AddRes.V1.M128_F32[I] <> AddExp.V1.M128_F32[I] then
|
||||
begin
|
||||
WriteLn('FAILURE on AddRes.V1.M128_F32[', I, ']');
|
||||
Halt(1);
|
||||
end;
|
||||
if SubRes.V1.M128_F32[I] <> SubExp.V1.M128_F32[I] then
|
||||
begin
|
||||
WriteLn('FAILURE on SubRes.V1.M128_F32[', I, ']');
|
||||
Halt(1);
|
||||
end;
|
||||
if MixRes.V1.M128_F32[I] <> MixExp.V1.M128_F32[I] then
|
||||
begin
|
||||
WriteLn('FAILURE on MixRes.V1.M128_F32[', I, ']');
|
||||
Halt(1);
|
||||
end;
|
||||
end;
|
||||
|
||||
WriteLn('ok');
|
||||
end.
|
158
tests/test/cg/tvectorcall3.pp
Normal file
158
tests/test/cg/tvectorcall3.pp
Normal file
@ -0,0 +1,158 @@
|
||||
{ %CPU=x86_64 }
|
||||
program vectorcall_stack_test;
|
||||
|
||||
{$IFNDEF CPUX86_64}
|
||||
{$FATAL This test program can only be compiled on Windows or Linux 64-bit with an Intel processor }
|
||||
{$ENDIF}
|
||||
|
||||
{ This program can be compiled on Linux, and all the vectorcall
|
||||
routines should work the same, including the assembler routine.
|
||||
'vectorcall' should be ignored by the compiler on this platform. }
|
||||
|
||||
{$push}
|
||||
{$CODEALIGN RECORDMIN=16}
|
||||
{$PACKRECORDS C}
|
||||
type
|
||||
TM128 = record
|
||||
case Byte of
|
||||
0: (M128_F32: array[0..3] of Single);
|
||||
1: (M128_F64: array[0..1] of Double);
|
||||
end;
|
||||
|
||||
{$CODEALIGN RECORDMIN=32}
|
||||
{$PACKRECORDS C}
|
||||
type
|
||||
TM256 = record
|
||||
case Byte of
|
||||
0: (M256_F32: array[0..7] of Single);
|
||||
1: (M256_F64: array[0..3] of Double);
|
||||
2: (M256_M128: array[0..1] of TM128);
|
||||
end;
|
||||
{$pop}
|
||||
|
||||
TVector4f = packed record
|
||||
case Byte of
|
||||
0: (M128: TM128);
|
||||
1: (X, Y, Z, W: Single);
|
||||
end;
|
||||
|
||||
TVectorPair4f = packed record
|
||||
case Byte of
|
||||
0: (M256: TM256);
|
||||
1: (V: array[0..1] of TVector4f);
|
||||
2: (X1, Y1, Z1, W1, X2, Y2, Z2, W2: Single);
|
||||
end;
|
||||
|
||||
function TestFloat(TP: Single): Single; vectorcall; { vectorcall should have no effect on how this function behaves }
|
||||
begin
|
||||
TestFloat := TP * 1.5;
|
||||
end;
|
||||
|
||||
function AddVectors(V1, V2: TVector4f): TVector4f; vectorcall;
|
||||
begin
|
||||
AddVectors.X := V1.X + V2.X;
|
||||
AddVectors.Y := V1.Y + V2.Y;
|
||||
AddVectors.Z := V1.Z + V2.Z;
|
||||
AddVectors.W := V1.W + V2.W;
|
||||
end;
|
||||
|
||||
{$ASMMODE Intel}
|
||||
function AddVectorsAsm(V1, V2: TVector4f): TVector4f; vectorcall; assembler; nostackframe; inline; { The inline is for a future test }
|
||||
asm
|
||||
ADDPS XMM0, XMM1
|
||||
end;
|
||||
|
||||
{ Note: V1, V2 and the result will go on the stack until FPC fully supports 256-bit vectors }
|
||||
function AddVectors(V1, V2: TVectorPair4f): TVectorPair4f; vectorcall;
|
||||
var
|
||||
C: Integer;
|
||||
begin
|
||||
for C := 0 to 1 do
|
||||
begin
|
||||
AddVectors.V[C].X := V1.V[C].X + V2.V[C].X;
|
||||
AddVectors.V[C].Y := V1.V[C].Y + V2.V[C].Y;
|
||||
AddVectors.V[C].Z := V1.V[C].Z + V2.V[C].Z;
|
||||
AddVectors.V[C].W := V1.V[C].W + V2.V[C].W;
|
||||
end;
|
||||
end;
|
||||
|
||||
var
|
||||
Vecs: array[0..1] of TVector4f; Res, ResAsm, Exp: TVector4f;
|
||||
Pairs: array[0..1] of TVectorPair4f; ResPair, ExpPair: TVectorPair4f;
|
||||
I: Integer;
|
||||
begin
|
||||
FillDWord(Vecs[0], 0, 8);
|
||||
Vecs[0].X := TestFloat(2.0);
|
||||
Vecs[0].Y := 1.0;
|
||||
Vecs[0].Z := -4.0;
|
||||
Vecs[0].W := 1.0;
|
||||
|
||||
Vecs[1].X := 0.0;
|
||||
Vecs[1].Y := -2.0;
|
||||
Vecs[1].Z := TestFloat(4.0);
|
||||
Vecs[1].W := 0.0;
|
||||
|
||||
Exp.X := 3.0;
|
||||
Exp.Y := -1.0;
|
||||
Exp.Z := 2.0;
|
||||
Exp.W := 1.0;
|
||||
|
||||
Pairs[0].V[0].X := 1.0; Pairs[0].V[1].X := 5.0;
|
||||
Pairs[0].V[0].Y := 2.0; Pairs[0].V[1].Y := 6.0;
|
||||
Pairs[0].V[0].Z := 3.0; Pairs[0].V[1].Z := 7.0;
|
||||
Pairs[0].V[0].W := 4.0; Pairs[0].V[1].W := 8.0;
|
||||
|
||||
Pairs[1].V[0].X := 9.0; Pairs[1].V[1].X := 13.0;
|
||||
Pairs[1].V[0].Y := 10.0; Pairs[1].V[1].Y := 14.0;
|
||||
Pairs[1].V[0].Z := 11.0; Pairs[1].V[1].Z := 15.0;
|
||||
Pairs[1].V[0].W := 12.0; Pairs[1].V[1].W := 16.0;
|
||||
|
||||
ExpPair.V[0].X := 10.0; ExpPair.V[1].X := 18.0;
|
||||
ExpPair.V[0].Y := 12.0; ExpPair.V[1].Y := 20.0;
|
||||
ExpPair.V[0].Z := 14.0; ExpPair.V[1].Z := 22.0;
|
||||
ExpPair.V[0].W := 16.0; ExpPair.V[1].W := 24.0;
|
||||
|
||||
WriteLn('Vecs[0] = (', Vecs[0].X, ', ', Vecs[0].Y, ', ', Vecs[0].Z, ', ', Vecs[0].W, ')');
|
||||
WriteLn('Vecs[1] = (', Vecs[1].X, ', ', Vecs[1].Y, ', ', Vecs[1].Z, ', ', Vecs[1].W, ')');
|
||||
|
||||
Res := AddVectors(Vecs[0], Vecs[1]);
|
||||
ResAsm := AddVectorsAsm(Vecs[0], Vecs[1]);
|
||||
|
||||
WriteLn('Result = (', Res.X, ', ', Res.Y, ', ', Res.Z, ', ', Res.W, ')');
|
||||
WriteLn('ResAsm = (', ResAsm.X, ', ', ResAsm.Y, ', ', ResAsm.Z, ', ', ResAsm.W, ')');
|
||||
WriteLn('Expected = (', Exp.X, ', ', Exp.Y, ', ', Exp.Z, ', ', Exp.W, ')');
|
||||
|
||||
WriteLn('Pairs[0] = (', Pairs[0].V[0].X, ', ', Pairs[0].V[0].Y, ', ', Pairs[0].V[0].Z, ', ', Pairs[0].V[0].W, ', ', Pairs[0].V[1].X, ', ', Pairs[0].V[1].Y, ', ', Pairs[0].V[1].Z, ', ', Pairs[0].V[1].W, ')');
|
||||
WriteLn('Pairs[1] = (', Pairs[1].V[0].X, ', ', Pairs[1].V[0].Y, ', ', Pairs[1].V[0].Z, ', ', Pairs[1].V[0].W, ', ', Pairs[1].V[1].X, ', ', Pairs[1].V[1].Y, ', ', Pairs[1].V[1].Z, ', ', Pairs[1].V[1].W, ')');
|
||||
|
||||
ResPair := AddVectors(Pairs[0], Pairs[1]);
|
||||
|
||||
WriteLn('ResPair = (', ResPair.V[0].X, ', ', ResPair.V[0].Y, ', ', ResPair.V[0].Z, ', ', ResPair.V[0].W, ', ', ResPair.V[1].X, ', ', ResPair.V[1].Y, ', ', ResPair.V[1].Z, ', ', ResPair.V[1].W, ')');
|
||||
WriteLn('Expected = (', ExpPair.V[0].X, ', ', ExpPair.V[0].Y, ', ', ExpPair.V[0].Z, ', ', ExpPair.V[0].W, ', ', ExpPair.V[1].X, ', ', ExpPair.V[1].Y, ', ', ExpPair.V[1].Z, ', ', ExpPair.V[1].W, ')');
|
||||
|
||||
for I := 0 to 3 do
|
||||
begin
|
||||
if Res.M128.M128_F32[I] <> Exp.M128.M128_F32[I] then
|
||||
begin
|
||||
WriteLn('FAILURE on Res.M128.M128_F32[', I, ']');
|
||||
Halt(1);
|
||||
end;
|
||||
|
||||
if ResAsm.M128.M128_F32[I] <> Exp.M128.M128_F32[I] then
|
||||
begin
|
||||
WriteLn('FAILURE on ResAsm.M128.M128_F32[', I, ']');
|
||||
Halt(1);
|
||||
end;
|
||||
end;
|
||||
|
||||
for I := 0 to 7 do
|
||||
begin
|
||||
if ResPair.M256.M256_F32[I] <> ExpPair.M256.M256_F32[I] then
|
||||
begin
|
||||
WriteLn('FAILURE on ResPair.M256.M256_F32[', I, ']');
|
||||
Halt(1);
|
||||
end;
|
||||
end;
|
||||
|
||||
WriteLn('ok');
|
||||
end.
|
Loading…
Reference in New Issue
Block a user