* Some work to restore the MMX capabilities

This commit is contained in:
daniel 2003-12-19 22:08:44 +00:00
parent e0303f16a7
commit d84b7d0743
6 changed files with 228 additions and 95 deletions

View File

@ -165,6 +165,10 @@ interface
end;
{$endif cpu64bit}
Tregistermmxset = packed record
reg0,reg1,reg2,reg3:Tregister
end;
{ Set type definition for registers }
tcpuregisterset = set of byte;
tsuperregisterset = array[byte] of set of byte;
@ -574,7 +578,10 @@ finalization
end.
{
$Log$
Revision 1.79 2003-12-15 21:25:48 peter
Revision 1.80 2003-12-19 22:08:44 daniel
* Some work to restore the MMX capabilities
Revision 1.79 2003/12/15 21:25:48 peter
* reg allocations for imaginary register are now inserted just
before reg allocation
* tregister changed to enum to allow compile time check

View File

@ -61,16 +61,41 @@ unit cgcpu;
class function tcg386.reg_cgsize(const reg: tregister): tcgsize;
const
const subreg2cgsize:array[Tsubregister] of Tcgsize =
(OS_NO,OS_8,OS_8,OS_16,OS_32,OS_64,OS_NO);
begin
case getregtype(reg) of
R_INTREGISTER :
reg_cgsize:=subreg2cgsize[getsubreg(reg)];
R_FPUREGISTER :
reg_cgsize:=OS_F80;
R_MMXREGISTER,
R_MMREGISTER :
reg_cgsize:=OS_M64;
R_SPECIALREGISTER :
case reg of
NR_CS,NR_DS,NR_ES,NR_SS,NR_FS,NR_GS:
reg_cgsize:=OS_16
else
reg_cgsize:=OS_32
end
else
internalerror(200303181);
end;
end;
{ const
opsize_2_cgsize: array[topsize] of tcgsize = (OS_NO,
OS_8,OS_16,OS_32,OS_NO,OS_NO,OS_NO,
OS_32,OS_64,OS_64,
OS_F32,OS_F64,OS_F80,OS_F32,OS_F64,OS_NO,OS_NO,
OS_F32,OS_F64,OS_F80,OS_F32,OS_F64,OS_M64,OS_NO,
OS_NO,OS_NO,OS_NO
);
begin
result := opsize_2_cgsize[reg2opsize(reg)];
end;
end;}
{ ************* 64bit operations ************ }
@ -207,7 +232,10 @@ begin
end.
{
$Log$
Revision 1.40 2003-10-10 17:48:14 peter
Revision 1.41 2003-12-19 22:08:44 daniel
* Some work to restore the MMX capabilities
Revision 1.40 2003/10/10 17:48:14 peter
* old trgobj moved to x86/rgcpu and renamed to trgx86fpu
* tregisteralloctor renamed to trgobj
* removed rgobj from a lot of units

View File

@ -632,6 +632,8 @@ implementation
{ firstpass everything }
flowcontrol:=[];
do_firstpass(code);
if code.registersfpu>0 then
include(current_procinfo.flags,pi_uses_fpu);
{ only do secondpass if there are no errors }
if ErrorCount=0 then
@ -1334,7 +1336,10 @@ implementation
end.
{
$Log$
Revision 1.179 2003-12-16 22:36:19 florian
Revision 1.180 2003-12-19 22:08:44 daniel
* Some work to restore the MMX capabilities
Revision 1.179 2003/12/16 22:36:19 florian
* forgot a commit
Revision 1.178 2003/12/16 21:29:24 florian

View File

@ -329,7 +329,9 @@ type
{# procedure is declared as @var(assembler), don't optimize}
pi_is_assembler,
{# procedure contains data which needs to be finalized }
pi_needs_implicit_finally
pi_needs_implicit_finally,
{# procedure uses fpu}
pi_uses_fpu
);
tprocinfoflags=set of tprocinfoflag;
@ -393,7 +395,10 @@ initialization
end.
{
$Log$
Revision 1.72 2003-12-16 21:29:24 florian
Revision 1.73 2003-12-19 22:08:44 daniel
* Some work to restore the MMX capabilities
Revision 1.72 2003/12/16 21:29:24 florian
+ inlined procedures inherit procinfo flags
Revision 1.71 2003/11/23 17:05:16 peter

View File

@ -40,7 +40,9 @@ unit cgx86;
procedure init_register_allocators;override;
procedure done_register_allocators;override;
function getfpuregister(list:Taasmoutput;size:Tcgsize):Tregister;override;
function getfpuregister(list:Taasmoutput;size:Tcgsize):Tregister;override;
function getmmxregister(list:Taasmoutput):Tregister;
procedure getexplicitregister(list:Taasmoutput;r:Tregister);override;
procedure ungetregister(list:Taasmoutput;r:Tregister);override;
procedure allocexplicitregisters(list:Taasmoutput;rt:Tregistertype;r:Tcpuregisterset);override;
@ -172,6 +174,7 @@ unit cgx86;
rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_EAX,RS_EDX,RS_ECX,RS_ESI,RS_EDI],first_int_imreg,[RS_EBP,RS_EBX])
else
rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_EAX,RS_EDX,RS_ECX,RS_EBX,RS_ESI,RS_EDI],first_int_imreg,[RS_EBP]);
rg[R_MMXREGISTER]:=trgcpu.create(R_MMXREGISTER,R_SUBNONE,[RS_MM0,RS_MM1,RS_MM2,RS_MM3,RS_MM4,RS_MM5,RS_MM6,RS_MM7],first_sse_imreg,[]);
rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBNONE,[RS_MM0,RS_MM1,RS_MM2,RS_MM3,RS_MM4,RS_MM5,RS_MM6,RS_MM7],first_sse_imreg,[]);
rgfpu:=Trgx86fpu.create;
end;
@ -183,6 +186,8 @@ unit cgx86;
rg[R_INTREGISTER]:=nil;
rg[R_MMREGISTER].free;
rg[R_MMREGISTER]:=nil;
rg[R_MMXREGISTER].free;
rg[R_MMXREGISTER]:=nil;
rgfpu.free;
end;
@ -192,6 +197,13 @@ unit cgx86;
result:=rgfpu.getregisterfpu(list);
end;
function Tcgx86.getmmxregister(list:Taasmoutput):Tregister;
begin
if not assigned(rg[R_MMXREGISTER]) then
internalerror(200312124);
result:=rg[R_MMXREGISTER].getregister(list,R_SUBNONE);
end;
procedure Tcgx86.getexplicitregister(list:Taasmoutput;r:Tregister);
begin
@ -1142,99 +1154,161 @@ unit cgx86;
procedure Tcgx86.g_concatcopy(list:Taasmoutput;const source,dest:Treference;
len:aword;delsource,loadref:boolean);
type copymode=(copy_move,copy_mmx,copy_string);
var srcref,dstref:Treference;
r:Tregister;
r,r0,r1,r2,r3:Tregister;
helpsize:aword;
copysize:byte;
cgsize:Tcgsize;
cm:copymode;
begin
cm:=copy_move;
helpsize:=12;
if cs_littlesize in aktglobalswitches then
helpsize:=8;
if not loadref and (len<=helpsize) then
begin
dstref:=dest;
srcref:=source;
copysize:=4;
cgsize:=OS_32;
while len<>0 do
begin
if len<2 then
begin
copysize:=1;
cgsize:=OS_8;
end
else if len<4 then
begin
copysize:=2;
cgsize:=OS_16;
end;
dec(len,copysize);
if (len=0) and delsource then
reference_release(list,source);
r:=getintregister(list,cgsize);
a_load_ref_reg(list,cgsize,cgsize,srcref,r);
ungetregister(list,r);
a_load_reg_ref(list,cgsize,cgsize,r,dstref);
inc(srcref.offset,copysize);
inc(dstref.offset,copysize);
end;
end
else
begin
getexplicitregister(list,NR_EDI);
a_loadaddr_ref_reg(list,dest,NR_EDI);
getexplicitregister(list,NR_ESI);
if loadref then
a_load_ref_reg(list,OS_ADDR,OS_ADDR,source,NR_ESI)
else
begin
a_loadaddr_ref_reg(list,source,NR_ESI);
if delsource then
begin
srcref:=source;
{ Don't release ESI register yet, it's needed
by the movsl }
if (srcref.base=NR_ESI) then
srcref.base:=NR_NO
else if (srcref.index=NR_ESI) then
srcref.index:=NR_NO;
reference_release(list,srcref);
end;
end;
getexplicitregister(list,NR_ECX);
list.concat(Taicpu.op_none(A_CLD,S_NO));
if cs_littlesize in aktglobalswitches then
begin
a_load_const_reg(list,OS_INT,len,NR_ECX);
list.concat(Taicpu.op_none(A_REP,S_NO));
list.concat(Taicpu.op_none(A_MOVSB,S_NO));
end
else
begin
helpsize:=len shr 2;
len:=len and 3;
if helpsize>1 then
begin
a_load_const_reg(list,OS_INT,helpsize,NR_ECX);
list.concat(Taicpu.op_none(A_REP,S_NO));
end;
if helpsize>0 then
list.concat(Taicpu.op_none(A_MOVSD,S_NO));
if len>1 then
begin
dec(len,2);
list.concat(Taicpu.op_none(A_MOVSW,S_NO));
end;
if len=1 then
list.concat(Taicpu.op_none(A_MOVSB,S_NO));
if (cs_mmx in aktlocalswitches) and
not(pi_uses_fpu in current_procinfo.flags) and
((len=8) or (len=16) or (len=24) or (len=32)) then
cm:=copy_mmx;
if (cs_littlesize in aktglobalswitches) and
(len>helpsize) and
not((len<=16) and (cm=copy_mmx)) then
cm:=copy_string;
if loadref then
cm:=copy_string;
case cm of
copy_move:
begin
dstref:=dest;
srcref:=source;
copysize:=4;
cgsize:=OS_32;
while len<>0 do
begin
if len<2 then
begin
copysize:=1;
cgsize:=OS_8;
end
else if len<4 then
begin
copysize:=2;
cgsize:=OS_16;
end;
dec(len,copysize);
if (len=0) and delsource then
reference_release(list,source);
r:=getintregister(list,cgsize);
a_load_ref_reg(list,cgsize,cgsize,srcref,r);
ungetregister(list,r);
a_load_reg_ref(list,cgsize,cgsize,r,dstref);
inc(srcref.offset,copysize);
inc(dstref.offset,copysize);
end;
ungetregister(list,NR_ECX);
ungetregister(list,NR_ESI);
ungetregister(list,NR_EDI);
end;
copy_mmx:
begin
dstref:=dest;
srcref:=source;
r0:=getmmxregister(list);
a_loadmm_ref_reg(list,OS_M64,OS_M64,srcref,r0,nil);
if len>=16 then
begin
inc(srcref.offset,8);
r1:=getmmxregister(list);
a_loadmm_ref_reg(list,OS_M64,OS_M64,srcref,r1,nil);
end;
if len>=24 then
begin
inc(srcref.offset,8);
r2:=getmmxregister(list);
a_loadmm_ref_reg(list,OS_M64,OS_M64,srcref,r2,nil);
end;
if len>=32 then
begin
inc(srcref.offset,8);
r3:=getmmxregister(list);
a_loadmm_ref_reg(list,OS_M64,OS_M64,srcref,r3,nil);
end;
a_loadmm_reg_ref(list,OS_M64,OS_M64,r0,dstref,nil);
ungetregister(list,r0);
if len>=16 then
begin
inc(dstref.offset,8);
a_loadmm_reg_ref(list,OS_M64,OS_M64,r1,dstref,nil);
ungetregister(list,r1);
end;
if len>=24 then
begin
inc(dstref.offset,8);
a_loadmm_reg_ref(list,OS_M64,OS_M64,r2,dstref,nil);
ungetregister(list,r2);
end;
if len>=32 then
begin
inc(dstref.offset,8);
a_loadmm_reg_ref(list,OS_M64,OS_M64,r3,dstref,nil);
ungetregister(list,r3);
end;
end
else {copy_string, should be a good fallback in case of unhandled}
begin
getexplicitregister(list,NR_EDI);
a_loadaddr_ref_reg(list,dest,NR_EDI);
getexplicitregister(list,NR_ESI);
if loadref then
a_load_ref_reg(list,OS_ADDR,OS_ADDR,source,NR_ESI)
else
begin
a_loadaddr_ref_reg(list,source,NR_ESI);
if delsource then
begin
srcref:=source;
{ Don't release ESI register yet, it's needed
by the movsl }
if (srcref.base=NR_ESI) then
srcref.base:=NR_NO
else if (srcref.index=NR_ESI) then
srcref.index:=NR_NO;
reference_release(list,srcref);
end;
end;
getexplicitregister(list,NR_ECX);
list.concat(Taicpu.op_none(A_CLD,S_NO));
if cs_littlesize in aktglobalswitches then
begin
a_load_const_reg(list,OS_INT,len,NR_ECX);
list.concat(Taicpu.op_none(A_REP,S_NO));
list.concat(Taicpu.op_none(A_MOVSB,S_NO));
end
else
begin
helpsize:=len shr 2;
len:=len and 3;
if helpsize>1 then
begin
a_load_const_reg(list,OS_INT,helpsize,NR_ECX);
list.concat(Taicpu.op_none(A_REP,S_NO));
end;
if helpsize>0 then
list.concat(Taicpu.op_none(A_MOVSD,S_NO));
if len>1 then
begin
dec(len,2);
list.concat(Taicpu.op_none(A_MOVSW,S_NO));
end;
if len=1 then
list.concat(Taicpu.op_none(A_MOVSB,S_NO));
end;
ungetregister(list,NR_ECX);
ungetregister(list,NR_ESI);
ungetregister(list,NR_EDI);
end;
end;
if delsource then
tg.ungetiftemp(list,source);
@ -1528,6 +1602,8 @@ unit cgx86;
list.concat(tai_regalloc.dealloc(NR_EBX));
list.concat(tai_regalloc.dealloc(NR_EBP));
list.concat(Taicpu.op_none(A_LEAVE,S_NO));
if assigned(rg[R_MMXREGISTER]) and (rg[R_MMXREGISTER].uses_registers) then
list.concat(Taicpu.op_none(A_EMMS,S_NO));
end;
@ -1682,7 +1758,10 @@ unit cgx86;
end.
{
$Log$
Revision 1.91 2003-12-15 21:25:49 peter
Revision 1.92 2003-12-19 22:08:44 daniel
* Some work to restore the MMX capabilities
Revision 1.91 2003/12/15 21:25:49 peter
* reg allocations for imaginary register are now inserted just
before reg allocation
* tregister changed to enum to allow compile time check

View File

@ -270,7 +270,11 @@ uses
4 : (register64 : tregister64);
);
{ it's only for better handling }
LOC_MMXREGISTER,LOC_CMMXREGISTER : (mmxreg : tregister);
LOC_MMXREGISTER,LOC_CMMXREGISTER : (
case longint of
0: (mmxreg : tregister);
1: (mmxregset : Tregistermmxset);
);
end;
tlocation = packed record
@ -415,6 +419,8 @@ implementation
cgsize2subreg:=R_SUBD;
OS_64,OS_S64:
cgsize2subreg:=R_SUBQ;
OS_M64:
cgsize2subreg:=R_SUBNONE;
else
internalerror(200301231);
end;
@ -526,7 +532,10 @@ implementation
end.
{
$Log$
Revision 1.31 2003-12-15 21:25:49 peter
Revision 1.32 2003-12-19 22:08:44 daniel
* Some work to restore the MMX capabilities
Revision 1.31 2003/12/15 21:25:49 peter
* reg allocations for imaginary register are now inserted just
before reg allocation
* tregister changed to enum to allow compile time check