* Some work to restore the MMX capabilities

This commit is contained in:
daniel 2003-12-19 22:08:44 +00:00
parent e0303f16a7
commit d84b7d0743
6 changed files with 228 additions and 95 deletions

View File

@ -165,6 +165,10 @@ interface
end; end;
{$endif cpu64bit} {$endif cpu64bit}
Tregistermmxset = packed record
reg0,reg1,reg2,reg3:Tregister
end;
{ Set type definition for registers } { Set type definition for registers }
tcpuregisterset = set of byte; tcpuregisterset = set of byte;
tsuperregisterset = array[byte] of set of byte; tsuperregisterset = array[byte] of set of byte;
@ -574,7 +578,10 @@ finalization
end. end.
{ {
$Log$ $Log$
Revision 1.79 2003-12-15 21:25:48 peter Revision 1.80 2003-12-19 22:08:44 daniel
* Some work to restore the MMX capabilities
Revision 1.79 2003/12/15 21:25:48 peter
* reg allocations for imaginary register are now inserted just * reg allocations for imaginary register are now inserted just
before reg allocation before reg allocation
* tregister changed to enum to allow compile time check * tregister changed to enum to allow compile time check

View File

@ -61,16 +61,41 @@ unit cgcpu;
class function tcg386.reg_cgsize(const reg: tregister): tcgsize; class function tcg386.reg_cgsize(const reg: tregister): tcgsize;
const
const subreg2cgsize:array[Tsubregister] of Tcgsize =
(OS_NO,OS_8,OS_8,OS_16,OS_32,OS_64,OS_NO);
begin
case getregtype(reg) of
R_INTREGISTER :
reg_cgsize:=subreg2cgsize[getsubreg(reg)];
R_FPUREGISTER :
reg_cgsize:=OS_F80;
R_MMXREGISTER,
R_MMREGISTER :
reg_cgsize:=OS_M64;
R_SPECIALREGISTER :
case reg of
NR_CS,NR_DS,NR_ES,NR_SS,NR_FS,NR_GS:
reg_cgsize:=OS_16
else
reg_cgsize:=OS_32
end
else
internalerror(200303181);
end;
end;
{ const
opsize_2_cgsize: array[topsize] of tcgsize = (OS_NO, opsize_2_cgsize: array[topsize] of tcgsize = (OS_NO,
OS_8,OS_16,OS_32,OS_NO,OS_NO,OS_NO, OS_8,OS_16,OS_32,OS_NO,OS_NO,OS_NO,
OS_32,OS_64,OS_64, OS_32,OS_64,OS_64,
OS_F32,OS_F64,OS_F80,OS_F32,OS_F64,OS_NO,OS_NO, OS_F32,OS_F64,OS_F80,OS_F32,OS_F64,OS_M64,OS_NO,
OS_NO,OS_NO,OS_NO OS_NO,OS_NO,OS_NO
); );
begin begin
result := opsize_2_cgsize[reg2opsize(reg)]; result := opsize_2_cgsize[reg2opsize(reg)];
end; end;}
{ ************* 64bit operations ************ } { ************* 64bit operations ************ }
@ -207,7 +232,10 @@ begin
end. end.
{ {
$Log$ $Log$
Revision 1.40 2003-10-10 17:48:14 peter Revision 1.41 2003-12-19 22:08:44 daniel
* Some work to restore the MMX capabilities
Revision 1.40 2003/10/10 17:48:14 peter
* old trgobj moved to x86/rgcpu and renamed to trgx86fpu * old trgobj moved to x86/rgcpu and renamed to trgx86fpu
* tregisteralloctor renamed to trgobj * tregisteralloctor renamed to trgobj
* removed rgobj from a lot of units * removed rgobj from a lot of units

View File

@ -632,6 +632,8 @@ implementation
{ firstpass everything } { firstpass everything }
flowcontrol:=[]; flowcontrol:=[];
do_firstpass(code); do_firstpass(code);
if code.registersfpu>0 then
include(current_procinfo.flags,pi_uses_fpu);
{ only do secondpass if there are no errors } { only do secondpass if there are no errors }
if ErrorCount=0 then if ErrorCount=0 then
@ -1334,7 +1336,10 @@ implementation
end. end.
{ {
$Log$ $Log$
Revision 1.179 2003-12-16 22:36:19 florian Revision 1.180 2003-12-19 22:08:44 daniel
* Some work to restore the MMX capabilities
Revision 1.179 2003/12/16 22:36:19 florian
* forgot a commit * forgot a commit
Revision 1.178 2003/12/16 21:29:24 florian Revision 1.178 2003/12/16 21:29:24 florian

View File

@ -329,7 +329,9 @@ type
{# procedure is declared as @var(assembler), don't optimize} {# procedure is declared as @var(assembler), don't optimize}
pi_is_assembler, pi_is_assembler,
{# procedure contains data which needs to be finalized } {# procedure contains data which needs to be finalized }
pi_needs_implicit_finally pi_needs_implicit_finally,
{# procedure uses fpu}
pi_uses_fpu
); );
tprocinfoflags=set of tprocinfoflag; tprocinfoflags=set of tprocinfoflag;
@ -393,7 +395,10 @@ initialization
end. end.
{ {
$Log$ $Log$
Revision 1.72 2003-12-16 21:29:24 florian Revision 1.73 2003-12-19 22:08:44 daniel
* Some work to restore the MMX capabilities
Revision 1.72 2003/12/16 21:29:24 florian
+ inlined procedures inherit procinfo flags + inlined procedures inherit procinfo flags
Revision 1.71 2003/11/23 17:05:16 peter Revision 1.71 2003/11/23 17:05:16 peter

View File

@ -40,7 +40,9 @@ unit cgx86;
procedure init_register_allocators;override; procedure init_register_allocators;override;
procedure done_register_allocators;override; procedure done_register_allocators;override;
function getfpuregister(list:Taasmoutput;size:Tcgsize):Tregister;override; function getfpuregister(list:Taasmoutput;size:Tcgsize):Tregister;override;
function getmmxregister(list:Taasmoutput):Tregister;
procedure getexplicitregister(list:Taasmoutput;r:Tregister);override; procedure getexplicitregister(list:Taasmoutput;r:Tregister);override;
procedure ungetregister(list:Taasmoutput;r:Tregister);override; procedure ungetregister(list:Taasmoutput;r:Tregister);override;
procedure allocexplicitregisters(list:Taasmoutput;rt:Tregistertype;r:Tcpuregisterset);override; procedure allocexplicitregisters(list:Taasmoutput;rt:Tregistertype;r:Tcpuregisterset);override;
@ -172,6 +174,7 @@ unit cgx86;
rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_EAX,RS_EDX,RS_ECX,RS_ESI,RS_EDI],first_int_imreg,[RS_EBP,RS_EBX]) rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_EAX,RS_EDX,RS_ECX,RS_ESI,RS_EDI],first_int_imreg,[RS_EBP,RS_EBX])
else else
rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_EAX,RS_EDX,RS_ECX,RS_EBX,RS_ESI,RS_EDI],first_int_imreg,[RS_EBP]); rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_EAX,RS_EDX,RS_ECX,RS_EBX,RS_ESI,RS_EDI],first_int_imreg,[RS_EBP]);
rg[R_MMXREGISTER]:=trgcpu.create(R_MMXREGISTER,R_SUBNONE,[RS_MM0,RS_MM1,RS_MM2,RS_MM3,RS_MM4,RS_MM5,RS_MM6,RS_MM7],first_sse_imreg,[]);
rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBNONE,[RS_MM0,RS_MM1,RS_MM2,RS_MM3,RS_MM4,RS_MM5,RS_MM6,RS_MM7],first_sse_imreg,[]); rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBNONE,[RS_MM0,RS_MM1,RS_MM2,RS_MM3,RS_MM4,RS_MM5,RS_MM6,RS_MM7],first_sse_imreg,[]);
rgfpu:=Trgx86fpu.create; rgfpu:=Trgx86fpu.create;
end; end;
@ -183,6 +186,8 @@ unit cgx86;
rg[R_INTREGISTER]:=nil; rg[R_INTREGISTER]:=nil;
rg[R_MMREGISTER].free; rg[R_MMREGISTER].free;
rg[R_MMREGISTER]:=nil; rg[R_MMREGISTER]:=nil;
rg[R_MMXREGISTER].free;
rg[R_MMXREGISTER]:=nil;
rgfpu.free; rgfpu.free;
end; end;
@ -192,6 +197,13 @@ unit cgx86;
result:=rgfpu.getregisterfpu(list); result:=rgfpu.getregisterfpu(list);
end; end;
function Tcgx86.getmmxregister(list:Taasmoutput):Tregister;
begin
if not assigned(rg[R_MMXREGISTER]) then
internalerror(200312124);
result:=rg[R_MMXREGISTER].getregister(list,R_SUBNONE);
end;
procedure Tcgx86.getexplicitregister(list:Taasmoutput;r:Tregister); procedure Tcgx86.getexplicitregister(list:Taasmoutput;r:Tregister);
begin begin
@ -1142,99 +1154,161 @@ unit cgx86;
procedure Tcgx86.g_concatcopy(list:Taasmoutput;const source,dest:Treference; procedure Tcgx86.g_concatcopy(list:Taasmoutput;const source,dest:Treference;
len:aword;delsource,loadref:boolean); len:aword;delsource,loadref:boolean);
type copymode=(copy_move,copy_mmx,copy_string);
var srcref,dstref:Treference; var srcref,dstref:Treference;
r:Tregister; r,r0,r1,r2,r3:Tregister;
helpsize:aword; helpsize:aword;
copysize:byte; copysize:byte;
cgsize:Tcgsize; cgsize:Tcgsize;
cm:copymode;
begin begin
cm:=copy_move;
helpsize:=12; helpsize:=12;
if cs_littlesize in aktglobalswitches then if cs_littlesize in aktglobalswitches then
helpsize:=8; helpsize:=8;
if not loadref and (len<=helpsize) then if (cs_mmx in aktlocalswitches) and
begin not(pi_uses_fpu in current_procinfo.flags) and
dstref:=dest; ((len=8) or (len=16) or (len=24) or (len=32)) then
srcref:=source; cm:=copy_mmx;
copysize:=4; if (cs_littlesize in aktglobalswitches) and
cgsize:=OS_32; (len>helpsize) and
while len<>0 do not((len<=16) and (cm=copy_mmx)) then
begin cm:=copy_string;
if len<2 then if loadref then
begin cm:=copy_string;
copysize:=1; case cm of
cgsize:=OS_8; copy_move:
end begin
else if len<4 then dstref:=dest;
begin srcref:=source;
copysize:=2; copysize:=4;
cgsize:=OS_16; cgsize:=OS_32;
end; while len<>0 do
dec(len,copysize); begin
if (len=0) and delsource then if len<2 then
reference_release(list,source); begin
r:=getintregister(list,cgsize); copysize:=1;
a_load_ref_reg(list,cgsize,cgsize,srcref,r); cgsize:=OS_8;
ungetregister(list,r); end
a_load_reg_ref(list,cgsize,cgsize,r,dstref); else if len<4 then
inc(srcref.offset,copysize); begin
inc(dstref.offset,copysize); copysize:=2;
end; cgsize:=OS_16;
end end;
else dec(len,copysize);
begin if (len=0) and delsource then
getexplicitregister(list,NR_EDI); reference_release(list,source);
a_loadaddr_ref_reg(list,dest,NR_EDI); r:=getintregister(list,cgsize);
getexplicitregister(list,NR_ESI); a_load_ref_reg(list,cgsize,cgsize,srcref,r);
if loadref then ungetregister(list,r);
a_load_ref_reg(list,OS_ADDR,OS_ADDR,source,NR_ESI) a_load_reg_ref(list,cgsize,cgsize,r,dstref);
else inc(srcref.offset,copysize);
begin inc(dstref.offset,copysize);
a_loadaddr_ref_reg(list,source,NR_ESI);
if delsource then
begin
srcref:=source;
{ Don't release ESI register yet, it's needed
by the movsl }
if (srcref.base=NR_ESI) then
srcref.base:=NR_NO
else if (srcref.index=NR_ESI) then
srcref.index:=NR_NO;
reference_release(list,srcref);
end;
end;
getexplicitregister(list,NR_ECX);
list.concat(Taicpu.op_none(A_CLD,S_NO));
if cs_littlesize in aktglobalswitches then
begin
a_load_const_reg(list,OS_INT,len,NR_ECX);
list.concat(Taicpu.op_none(A_REP,S_NO));
list.concat(Taicpu.op_none(A_MOVSB,S_NO));
end
else
begin
helpsize:=len shr 2;
len:=len and 3;
if helpsize>1 then
begin
a_load_const_reg(list,OS_INT,helpsize,NR_ECX);
list.concat(Taicpu.op_none(A_REP,S_NO));
end;
if helpsize>0 then
list.concat(Taicpu.op_none(A_MOVSD,S_NO));
if len>1 then
begin
dec(len,2);
list.concat(Taicpu.op_none(A_MOVSW,S_NO));
end;
if len=1 then
list.concat(Taicpu.op_none(A_MOVSB,S_NO));
end; end;
ungetregister(list,NR_ECX); end;
ungetregister(list,NR_ESI); copy_mmx:
ungetregister(list,NR_EDI); begin
dstref:=dest;
srcref:=source;
r0:=getmmxregister(list);
a_loadmm_ref_reg(list,OS_M64,OS_M64,srcref,r0,nil);
if len>=16 then
begin
inc(srcref.offset,8);
r1:=getmmxregister(list);
a_loadmm_ref_reg(list,OS_M64,OS_M64,srcref,r1,nil);
end;
if len>=24 then
begin
inc(srcref.offset,8);
r2:=getmmxregister(list);
a_loadmm_ref_reg(list,OS_M64,OS_M64,srcref,r2,nil);
end;
if len>=32 then
begin
inc(srcref.offset,8);
r3:=getmmxregister(list);
a_loadmm_ref_reg(list,OS_M64,OS_M64,srcref,r3,nil);
end;
a_loadmm_reg_ref(list,OS_M64,OS_M64,r0,dstref,nil);
ungetregister(list,r0);
if len>=16 then
begin
inc(dstref.offset,8);
a_loadmm_reg_ref(list,OS_M64,OS_M64,r1,dstref,nil);
ungetregister(list,r1);
end;
if len>=24 then
begin
inc(dstref.offset,8);
a_loadmm_reg_ref(list,OS_M64,OS_M64,r2,dstref,nil);
ungetregister(list,r2);
end;
if len>=32 then
begin
inc(dstref.offset,8);
a_loadmm_reg_ref(list,OS_M64,OS_M64,r3,dstref,nil);
ungetregister(list,r3);
end;
end
else {copy_string, should be a good fallback in case of unhandled}
begin
getexplicitregister(list,NR_EDI);
a_loadaddr_ref_reg(list,dest,NR_EDI);
getexplicitregister(list,NR_ESI);
if loadref then
a_load_ref_reg(list,OS_ADDR,OS_ADDR,source,NR_ESI)
else
begin
a_loadaddr_ref_reg(list,source,NR_ESI);
if delsource then
begin
srcref:=source;
{ Don't release ESI register yet, it's needed
by the movsl }
if (srcref.base=NR_ESI) then
srcref.base:=NR_NO
else if (srcref.index=NR_ESI) then
srcref.index:=NR_NO;
reference_release(list,srcref);
end;
end;
getexplicitregister(list,NR_ECX);
list.concat(Taicpu.op_none(A_CLD,S_NO));
if cs_littlesize in aktglobalswitches then
begin
a_load_const_reg(list,OS_INT,len,NR_ECX);
list.concat(Taicpu.op_none(A_REP,S_NO));
list.concat(Taicpu.op_none(A_MOVSB,S_NO));
end
else
begin
helpsize:=len shr 2;
len:=len and 3;
if helpsize>1 then
begin
a_load_const_reg(list,OS_INT,helpsize,NR_ECX);
list.concat(Taicpu.op_none(A_REP,S_NO));
end;
if helpsize>0 then
list.concat(Taicpu.op_none(A_MOVSD,S_NO));
if len>1 then
begin
dec(len,2);
list.concat(Taicpu.op_none(A_MOVSW,S_NO));
end;
if len=1 then
list.concat(Taicpu.op_none(A_MOVSB,S_NO));
end;
ungetregister(list,NR_ECX);
ungetregister(list,NR_ESI);
ungetregister(list,NR_EDI);
end;
end; end;
if delsource then if delsource then
tg.ungetiftemp(list,source); tg.ungetiftemp(list,source);
@ -1528,6 +1602,8 @@ unit cgx86;
list.concat(tai_regalloc.dealloc(NR_EBX)); list.concat(tai_regalloc.dealloc(NR_EBX));
list.concat(tai_regalloc.dealloc(NR_EBP)); list.concat(tai_regalloc.dealloc(NR_EBP));
list.concat(Taicpu.op_none(A_LEAVE,S_NO)); list.concat(Taicpu.op_none(A_LEAVE,S_NO));
if assigned(rg[R_MMXREGISTER]) and (rg[R_MMXREGISTER].uses_registers) then
list.concat(Taicpu.op_none(A_EMMS,S_NO));
end; end;
@ -1682,7 +1758,10 @@ unit cgx86;
end. end.
{ {
$Log$ $Log$
Revision 1.91 2003-12-15 21:25:49 peter Revision 1.92 2003-12-19 22:08:44 daniel
* Some work to restore the MMX capabilities
Revision 1.91 2003/12/15 21:25:49 peter
* reg allocations for imaginary register are now inserted just * reg allocations for imaginary register are now inserted just
before reg allocation before reg allocation
* tregister changed to enum to allow compile time check * tregister changed to enum to allow compile time check

View File

@ -270,7 +270,11 @@ uses
4 : (register64 : tregister64); 4 : (register64 : tregister64);
); );
{ it's only for better handling } { it's only for better handling }
LOC_MMXREGISTER,LOC_CMMXREGISTER : (mmxreg : tregister); LOC_MMXREGISTER,LOC_CMMXREGISTER : (
case longint of
0: (mmxreg : tregister);
1: (mmxregset : Tregistermmxset);
);
end; end;
tlocation = packed record tlocation = packed record
@ -415,6 +419,8 @@ implementation
cgsize2subreg:=R_SUBD; cgsize2subreg:=R_SUBD;
OS_64,OS_S64: OS_64,OS_S64:
cgsize2subreg:=R_SUBQ; cgsize2subreg:=R_SUBQ;
OS_M64:
cgsize2subreg:=R_SUBNONE;
else else
internalerror(200301231); internalerror(200301231);
end; end;
@ -526,7 +532,10 @@ implementation
end. end.
{ {
$Log$ $Log$
Revision 1.31 2003-12-15 21:25:49 peter Revision 1.32 2003-12-19 22:08:44 daniel
* Some work to restore the MMX capabilities
Revision 1.31 2003/12/15 21:25:49 peter
* reg allocations for imaginary register are now inserted just * reg allocations for imaginary register are now inserted just
before reg allocation before reg allocation
* tregister changed to enum to allow compile time check * tregister changed to enum to allow compile time check