* rtl and compiler compile with -Cfsse2

This commit is contained in:
florian 2003-12-26 13:19:16 +00:00
parent 3d8e1ab1fb
commit ace2d682b0
7 changed files with 107 additions and 145 deletions

View File

@ -1400,12 +1400,44 @@ implementation
procedure tcg.a_opmm_ref_reg(list: taasmoutput; Op: TOpCG; size : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); procedure tcg.a_opmm_ref_reg(list: taasmoutput; Op: TOpCG; size : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle);
var
hr : tregister;
hs : tmmshuffle;
begin begin
hr:=getmmregister(list,size);
a_loadmm_ref_reg(list,size,size,ref,hr,shuffle);
if realshuffle(shuffle) then
begin
hs:=shuffle^;
removeshuffles(hs);
a_opmm_reg_reg(list,op,size,hr,reg,@hs);
end
else
a_opmm_reg_reg(list,op,size,hr,reg,shuffle);
ungetregister(list,hr);
end; end;
procedure tcg.a_opmm_reg_ref(list: taasmoutput; Op: TOpCG; size : tcgsize;reg: tregister; const ref: treference; shuffle : pmmshuffle); procedure tcg.a_opmm_reg_ref(list: taasmoutput; Op: TOpCG; size : tcgsize;reg: tregister; const ref: treference; shuffle : pmmshuffle);
var
hr : tregister;
hs : tmmshuffle;
begin begin
hr:=getmmregister(list,size);
a_loadmm_ref_reg(list,size,size,ref,hr,shuffle);
if realshuffle(shuffle) then
begin
hs:=shuffle^;
removeshuffles(hs);
a_opmm_reg_reg(list,op,size,reg,hr,@hs);
a_loadmm_reg_ref(list,size,size,hr,ref,@hs);
end
else
begin
a_opmm_reg_reg(list,op,size,reg,hr,shuffle);
a_loadmm_reg_ref(list,size,size,hr,ref,shuffle);
end;
ungetregister(list,hr);
end; end;
@ -2000,7 +2032,10 @@ finalization
end. end.
{ {
$Log$ $Log$
Revision 1.144 2003-12-24 00:10:02 florian Revision 1.145 2003-12-26 13:19:16 florian
* rtl and compiler compile with -Cfsse2
Revision 1.144 2003/12/24 00:10:02 florian
- delete parameter in cg64 methods removed - delete parameter in cg64 methods removed
Revision 1.143 2003/12/23 14:38:07 florian Revision 1.143 2003/12/23 14:38:07 florian

View File

@ -27,7 +27,7 @@ unit n386mat;
interface interface
uses uses
node,nmat,ncgmat; node,nmat,ncgmat,nx86mat;
type type
ti386moddivnode = class(tmoddivnode) ti386moddivnode = class(tmoddivnode)
@ -40,12 +40,7 @@ interface
function first_shlshr64bitint: tnode; override; function first_shlshr64bitint: tnode; override;
end; end;
ti386unaryminusnode = class(tcgunaryminusnode) ti386unaryminusnode = class(tx86unaryminusnode)
{$ifdef SUPPORT_MMX}
procedure second_mmx;override;
{$endif SUPPORT_MMX}
procedure second_float;override;
function pass_1:tnode;override;
end; end;
ti386notnode = class(tcgnotnode) ti386notnode = class(tcgnotnode)
@ -355,133 +350,6 @@ implementation
end; end;
{*****************************************************************************
TI386UNARYMINUSNODE
*****************************************************************************}
function ti386unaryminusnode.pass_1 : tnode;
begin
result:=nil;
firstpass(left);
if codegenerror then
exit;
if (left.resulttype.def.deftype=floatdef) then
begin
if (registersfpu < 1) then
registersfpu := 1;
expectloc:=LOC_FPUREGISTER;
end
{$ifdef SUPPORT_MMX}
else
if (cs_mmx in aktlocalswitches) and
is_mmx_able_array(left.resulttype.def) then
begin
registers32:=left.registers32;
registersfpu:=left.registersfpu;
registersmmx:=left.registersmmx;
if (left.location.loc<>LOC_MMXREGISTER) and
(registersmmx<1) then
registersmmx:=1;
end
{$endif SUPPORT_MMX}
else
inherited pass_1;
end;
{$ifdef SUPPORT_MMX}
procedure ti386unaryminusnode.second_mmx;
var
op : tasmop;
hreg : tregister;
begin
secondpass(left);
location_reset(location,LOC_MMXREGISTER,OS_NO);
hreg:=cg.getmmxregister(exprasmlist,OS_M64);
emit_reg_reg(A_PXOR,S_NO,hreg,hreg);
case left.location.loc of
LOC_MMXREGISTER:
begin
location.register:=left.location.register;
end;
LOC_CMMXREGISTER:
begin
location.register:=cg.getmmxregister(exprasmlist,OS_M64);
emit_reg_reg(A_MOVQ,S_NO,left.location.register,location.register);
end;
LOC_REFERENCE,
LOC_CREFERENCE:
begin
reference_release(exprasmlist,left.location.reference);
location.register:=cg.getmmxregister(exprasmlist,OS_M64);
emit_ref_reg(A_MOVQ,S_NO,left.location.reference,location.register);
end;
else
internalerror(200203225);
end;
if cs_mmx_saturation in aktlocalswitches then
case mmx_type(resulttype.def) of
mmxs8bit:
op:=A_PSUBSB;
mmxu8bit:
op:=A_PSUBUSB;
mmxs16bit,mmxfixed16:
op:=A_PSUBSW;
mmxu16bit:
op:=A_PSUBUSW;
end
else
case mmx_type(resulttype.def) of
mmxs8bit,mmxu8bit:
op:=A_PSUBB;
mmxs16bit,mmxu16bit,mmxfixed16:
op:=A_PSUBW;
mmxs32bit,mmxu32bit:
op:=A_PSUBD;
end;
emit_reg_reg(op,S_NO,location.register,hreg);
cg.ungetregister(exprasmlist,hreg);
emit_reg_reg(A_MOVQ,S_NO,hreg,location.register);
end;
{$endif SUPPORT_MMX}
procedure ti386unaryminusnode.second_float;
begin
secondpass(left);
location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
case left.location.loc of
LOC_REFERENCE,
LOC_CREFERENCE:
begin
reference_release(exprasmlist,left.location.reference);
location.register:=NR_ST;
cg.a_loadfpu_ref_reg(exprasmlist,
def_cgsize(left.resulttype.def),
left.location.reference,location.register);
emit_none(A_FCHS,S_NO);
end;
LOC_FPUREGISTER,
LOC_CFPUREGISTER:
begin
{ "load st,st" is ignored by the code generator }
cg.a_loadfpu_reg_reg(exprasmlist,left.location.size,left.location.register,NR_ST);
location.register:=NR_ST;
emit_none(A_FCHS,S_NO);
end;
{
LOC_MMREGISTER,
LOC_CMMREGISTER:
begin
end;
}
else
internalerror(200312241);
end;
end;
{***************************************************************************** {*****************************************************************************
TI386NOTNODE TI386NOTNODE
*****************************************************************************} *****************************************************************************}
@ -580,14 +448,17 @@ implementation
{$endif SUPPORT_MMX} {$endif SUPPORT_MMX}
begin begin
cunaryminusnode:=ti386unaryminusnode;
cmoddivnode:=ti386moddivnode; cmoddivnode:=ti386moddivnode;
cshlshrnode:=ti386shlshrnode; cshlshrnode:=ti386shlshrnode;
cunaryminusnode:=ti386unaryminusnode;
cnotnode:=ti386notnode; cnotnode:=ti386notnode;
end. end.
{ {
$Log$ $Log$
Revision 1.67 2003-12-25 01:07:09 florian Revision 1.68 2003-12-26 13:19:16 florian
* rtl and compiler compile with -Cfsse2
Revision 1.67 2003/12/25 01:07:09 florian
+ $fputype directive support + $fputype directive support
+ single data type operations with sse unit + single data type operations with sse unit
* fixed more x86-64 stuff * fixed more x86-64 stuff

View File

@ -156,6 +156,20 @@ implementation
reference_reset_base(href,tempparaloc.reference.index,tempparaloc.reference.offset); reference_reset_base(href,tempparaloc.reference.index,tempparaloc.reference.offset);
cg.a_loadfpu_reg_ref(exprasmlist,def_cgsize(left.resulttype.def),left.location.register,href); cg.a_loadfpu_reg_ref(exprasmlist,def_cgsize(left.resulttype.def),left.location.register,href);
end; end;
LOC_MMREGISTER,
LOC_CMMREGISTER:
begin
size:=align(tfloatdef(left.resulttype.def).size,tempparaloc.alignment);
inc(tcgcallnode(aktcallnode).pushedparasize,size);
if tempparaloc.reference.index=NR_STACK_POINTER_REG then
begin
cg.g_stackpointer_alloc(exprasmlist,size);
reference_reset_base(href,NR_STACK_POINTER_REG,0);
end
else
reference_reset_base(href,tempparaloc.reference.index,tempparaloc.reference.offset);
cg.a_loadmm_reg_ref(exprasmlist,def_cgsize(left.resulttype.def),def_cgsize(left.resulttype.def),left.location.register,href,mms_movescalar);
end;
LOC_REFERENCE, LOC_REFERENCE,
LOC_CREFERENCE : LOC_CREFERENCE :
begin begin
@ -1131,7 +1145,10 @@ begin
end. end.
{ {
$Log$ $Log$
Revision 1.147 2003-12-21 19:42:42 florian Revision 1.148 2003-12-26 13:19:16 florian
* rtl and compiler compile with -Cfsse2
Revision 1.147 2003/12/21 19:42:42 florian
* fixed ppc inlining stuff * fixed ppc inlining stuff
* fixed wrong unit writing * fixed wrong unit writing
+ added some sse stuff + added some sse stuff

View File

@ -670,6 +670,15 @@ implementation
location_reset(l,LOC_REFERENCE,l.size); location_reset(l,LOC_REFERENCE,l.size);
l.reference:=r; l.reference:=r;
end; end;
LOC_MMREGISTER,
LOC_CMMREGISTER:
begin
tg.GetTemp(list,TCGSize2Size[l.size],tt_normal,r);
cg.a_loadmm_reg_ref(list,l.size,l.size,l.register,r,mms_movescalar);
location_release(list,l);
location_reset(l,LOC_REFERENCE,l.size);
l.reference:=r;
end;
LOC_CONSTANT, LOC_CONSTANT,
LOC_REGISTER, LOC_REGISTER,
LOC_CREGISTER : LOC_CREGISTER :
@ -2039,7 +2048,10 @@ implementation
end. end.
{ {
$Log$ $Log$
Revision 1.178 2003-12-26 00:32:21 florian Revision 1.179 2003-12-26 13:19:16 florian
* rtl and compiler compile with -Cfsse2
Revision 1.178 2003/12/26 00:32:21 florian
+ fpu<->mm register conversion + fpu<->mm register conversion
Revision 1.177 2003/12/24 00:10:02 florian Revision 1.177 2003/12/24 00:10:02 florian

View File

@ -830,11 +830,14 @@ unit cgx86;
) )
), ),
( { vectorized/packed } ( { vectorized/packed }
{ because the logical packed single instructions have shorter op codes, we use always
these
}
( { OS_F32 } ( { OS_F32 }
A_NOP,A_ADDPS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP A_NOP,A_ADDPS,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_XORPS
), ),
( { OS_F64 } ( { OS_F64 }
A_NOP,A_ADDPD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP A_NOP,A_ADDPD,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_NOP,A_XORPS
) )
) )
); );
@ -1918,7 +1921,10 @@ unit cgx86;
end. end.
{ {
$Log$ $Log$
Revision 1.98 2003-12-26 00:32:22 florian Revision 1.99 2003-12-26 13:19:16 florian
* rtl and compiler compile with -Cfsse2
Revision 1.98 2003/12/26 00:32:22 florian
+ fpu<->mm register conversion + fpu<->mm register conversion
Revision 1.97 2003/12/25 12:01:35 florian Revision 1.97 2003/12/25 12:01:35 florian

View File

@ -421,7 +421,8 @@ implementation
cgsize2subreg:=R_SUBQ; cgsize2subreg:=R_SUBQ;
OS_M64: OS_M64:
cgsize2subreg:=R_SUBNONE; cgsize2subreg:=R_SUBNONE;
OS_F32,OS_F64: OS_F32,OS_F64,
OS_M128,OS_MS128:
cgsize2subreg:=R_SUBWHOLE; cgsize2subreg:=R_SUBWHOLE;
else else
internalerror(200301231); internalerror(200301231);
@ -534,7 +535,10 @@ implementation
end. end.
{ {
$Log$ $Log$
Revision 1.33 2003-12-25 01:07:09 florian Revision 1.34 2003-12-26 13:19:16 florian
* rtl and compiler compile with -Cfsse2
Revision 1.33 2003/12/25 01:07:09 florian
+ $fputype directive support + $fputype directive support
+ single data type operations with sse unit + single data type operations with sse unit
* fixed more x86-64 stuff * fixed more x86-64 stuff

View File

@ -240,6 +240,13 @@ unit nx86add;
if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
begin begin
location.register:=right.location.register; location.register:=right.location.register;
{ force floating point reg. location to be written to memory,
we don't force it to mm register because writing to memory
allows probably shorter code because there is no direct fpu->mm register
copy instruction
}
if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
location_force_mem(exprasmlist,left.location);
cg.a_opmm_loc_reg(exprasmlist,op,location.size,left.location,location.register,mms_movescalar); cg.a_opmm_loc_reg(exprasmlist,op,location.size,left.location,location.register,mms_movescalar);
location_release(exprasmlist,left.location); location_release(exprasmlist,left.location);
end end
@ -247,6 +254,13 @@ unit nx86add;
begin begin
location_force_mmregscalar(exprasmlist,left.location,false); location_force_mmregscalar(exprasmlist,left.location,false);
location.register:=left.location.register; location.register:=left.location.register;
{ force floating point reg. location to be written to memory,
we don't force it to mm register because writing to memory
allows probably shorter code because there is no direct fpu->mm register
copy instruction
}
if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
location_force_mem(exprasmlist,right.location);
cg.a_opmm_loc_reg(exprasmlist,op,location.size,right.location,location.register,mms_movescalar); cg.a_opmm_loc_reg(exprasmlist,op,location.size,right.location,location.register,mms_movescalar);
location_release(exprasmlist,right.location); location_release(exprasmlist,right.location);
end; end;
@ -255,7 +269,10 @@ unit nx86add;
end. end.
{ {
$Log$ $Log$
Revision 1.4 2003-12-26 00:32:22 florian Revision 1.5 2003-12-26 13:19:16 florian
* rtl and compiler compile with -Cfsse2
Revision 1.4 2003/12/26 00:32:22 florian
+ fpu<->mm register conversion + fpu<->mm register conversion
Revision 1.3 2003/12/25 01:07:09 florian Revision 1.3 2003/12/25 01:07:09 florian