+ memory references are now replaced by register reads in "regular"

instructions (e.g. "addl ref1,%eax" will be replaced by "addl %ebx,%eax"
    if %ebx contains ref1). Previously only complete load sequences were
    optimized away, but not such small accesses in other instructions than
    mov/movzx/movsx
This commit is contained in:
Jonas Maebe 2001-10-12 13:58:05 +00:00
parent 97c1152c6d
commit 82f7cf0957
2 changed files with 161 additions and 12 deletions

View File

@ -185,10 +185,7 @@ function isSimpleMemLoc(const ref: treference): boolean;
begin begin
isSimpleMemLoc := isSimpleMemLoc :=
(ref.index = R_NO) and (ref.index = R_NO) and
(not(ref.base in (usableregs+[R_EDI])) or not(ref.base in (usableregs+[R_EDI]));
(assigned(ref.symbol) and
(ref.base = R_NO) and
(ref.index = R_NO)));
end; end;
{checks whether the current instruction sequence (starting with p) and the {checks whether the current instruction sequence (starting with p) and the
@ -1314,6 +1311,48 @@ begin
(p.opcode = A_IDIV)); (p.opcode = A_IDIV));
end; end;
function memtoreg(const t: Taicpu; const ref: treference): tregister;
var
hp: tai;
p: pTaiprop;
regcounter: tregister;
begin
if not getlastinstruction(t,hp) then
begin
memtoreg := R_NO;
exit;
end;
p := pTaiprop(hp.optinfo);
if isSimpleMemLoc(ref) then
begin
for regcounter := R_EAX to R_EDI do
if (p^.regs[regcounter].typ in [CON_REF,CON_NOREMOVEREF]) and
(p^.regs[regcounter].nrofmods = 1) and
((Taicpu(p^.regs[regcounter].startmod).opcode = A_MOV) or
(Taicpu(p^.regs[regcounter].startmod).opcode = A_MOVZX) or
(Taicpu(p^.regs[regcounter].startmod).opcode = A_MOVSX)) and
(taicpu(p^.regs[regcounter].startmod).oper[0].typ = top_ref) and
refsequal(ref,taicpu(p^.regs[regcounter].startmod).oper[0].ref^) then
begin
if ((t.opsize <> S_B) or
(regcounter <> R_EDI)) and
sizescompatible(Taicpu(p^.regs[regcounter].startmod).opsize,t.opsize) then
begin
case t.opsize of
S_B,S_BW,S_BL:
memtoreg := reg32toreg8(regcounter);
S_W,S_WL:
memtoreg := reg32toreg16(regcounter);
S_L:
memtoreg := regcounter;
end;
exit;
end;
end;
end;
memtoreg := R_NO;
end;
procedure DoCSE(AsmL: TAAsmOutput; First, Last: Tai; findPrevSeqs, doSubOpts: boolean); procedure DoCSE(AsmL: TAAsmOutput; First, Last: Tai; findPrevSeqs, doSubOpts: boolean);
{marks the instructions that can be removed by RemoveInstructs. They're not {marks the instructions that can be removed by RemoveInstructs. They're not
removed immediately because sometimes an instruction needs to be checked in removed immediately because sometimes an instruction needs to be checked in
@ -1594,9 +1633,26 @@ Begin
pTaiProp(p.optInfo)^.regs[Taicpu(p).oper[0].reg].startMod,hp1); pTaiProp(p.optInfo)^.regs[Taicpu(p).oper[0].reg].startMod,hp1);
end end
else else
if (Taicpu(p).oper[1].typ = top_reg) and begin
not regInOp(Taicpu(p).oper[1].reg,Taicpu(p).oper[0]) then if (Taicpu(p).oper[1].typ = top_reg) and
removePrevNotUsedLoad(p,reg32(Taicpu(p).oper[1].reg),false); not regInOp(Taicpu(p).oper[1].reg,Taicpu(p).oper[0]) then
removePrevNotUsedLoad(p,reg32(Taicpu(p).oper[1].reg),false);
if doSubOpts and
(Taicpu(p).opcode <> A_LEA) and
(Taicpu(p).oper[0].typ = top_ref) then
begin
regcounter :=
memtoreg(taicpu(p),
Taicpu(p).oper[0].ref^);
if regcounter <> R_NO then
begin
Taicpu(p).loadreg(0,regcounter);
allocregbetween(asml,reg32(regcounter),
pTaiprop(p.optinfo)^.regs[reg32(regcounter)].startmod,
p);
end;
end;
end;
{ at first, only try optimizations of large blocks, because doing } { at first, only try optimizations of large blocks, because doing }
{ doing smaller ones may prevent bigger ones from completing in } { doing smaller ones may prevent bigger ones from completing in }
{ in the next pass } { in the next pass }
@ -1643,6 +1699,52 @@ Begin
A_STD: If GetLastInstruction(p, hp1) And A_STD: If GetLastInstruction(p, hp1) And
(PTaiProp(hp1.OptInfo)^.DirFlag = F_Set) Then (PTaiProp(hp1.OptInfo)^.DirFlag = F_Set) Then
PTaiProp(Tai(p).OptInfo)^.CanBeRemoved := True; PTaiProp(Tai(p).OptInfo)^.CanBeRemoved := True;
else
begin
for cnt := 1 to maxch do
begin
case InsProp[taicpu(p).opcode].Ch[cnt] of
Ch_ROp1:
if (taicpu(p).oper[0].typ = top_ref) and
((taicpu(p).opcode < A_F2XM1) or
((taicpu(p).opcode > A_IN) and
(taicpu(p).opcode < A_OUT)) or
(taicpu(p).opcode = A_PUSH) or
(taicpu(p).opcode = A_SUB) or
(taicpu(p).opcode = A_TEST) or
(taicpu(p).opcode = A_XOR))then
begin
regcounter :=
memtoreg(taicpu(p),
Taicpu(p).oper[0].ref^);
if regcounter <> R_NO then
begin
Taicpu(p).loadreg(0,regcounter);
allocregbetween(asml,reg32(regcounter),
pTaiprop(p.optinfo)^.regs[reg32(regcounter)].startmod,
p);
end;
end;
Ch_ROp2:
if ((taicpu(p).opcode = A_CMP) or
(taicpu(p).opcode = A_TEST)) and
(taicpu(p).oper[1].typ = top_ref) then
begin
regcounter :=
memtoreg(taicpu(p),
Taicpu(p).oper[1].ref^);
if regcounter <> R_NO then
begin
Taicpu(p).loadreg(1,regcounter);
allocregbetween(asml,reg32(regcounter),
pTaiprop(p.optinfo)^.regs[reg32(regcounter)].startmod,
p);
end;
end;
end;
end;
end;
End End
End; End;
End; End;
@ -1742,7 +1844,14 @@ End.
{ {
$Log$ $Log$
Revision 1.18 2001-09-04 14:01:03 jonas Revision 1.19 2001-10-12 13:58:05 jonas
+ memory references are now replaced by register reads in "regular"
instructions (e.g. "addl ref1,%eax" will be replaced by "addl %ebx,%eax"
if %ebx contains ref1). Previously only complete load sequences were
optimized away, but not such small accesses in other instructions than
mov/movzx/movsx
Revision 1.18 2001/09/04 14:01:03 jonas
* commented out some inactive code in csopt386 * commented out some inactive code in csopt386
+ small improvement: lea is now handled the same as mov/zx/sx + small improvement: lea is now handled the same as mov/zx/sx

View File

@ -191,6 +191,7 @@ function FindRegDealloc(reg: tregister; p: Tai): boolean;
Function RegsEquivalent(OldReg, NewReg: TRegister; Var RegInfo: TRegInfo; OpAct: TopAction): Boolean; Function RegsEquivalent(OldReg, NewReg: TRegister; Var RegInfo: TRegInfo; OpAct: TopAction): Boolean;
Function InstructionsEquivalent(p1, p2: Tai; Var RegInfo: TRegInfo): Boolean; Function InstructionsEquivalent(p1, p2: Tai; Var RegInfo: TRegInfo): Boolean;
function sizescompatible(loadsize,newsize: topsize): boolean;
Function OpsEqual(const o1,o2:toper): Boolean; Function OpsEqual(const o1,o2:toper): Boolean;
Function DFAPass1(AsmL: TAAsmOutput; BlockStart: Tai): Tai; Function DFAPass1(AsmL: TAAsmOutput; BlockStart: Tai): Tai;
@ -1164,10 +1165,13 @@ var
lastRemovedWasDealloc, firstRemovedWasAlloc, first: boolean; lastRemovedWasDealloc, firstRemovedWasAlloc, first: boolean;
Begin Begin
If not(reg in usableregs+[R_EDI,R_ESI]) or If not(reg in usableregs+[R_EDI,R_ESI]) or
not(assigned(p1)) Then not(assigned(p1)) then
{ this happens with registers which are loaded implicitely, outside the } { this happens with registers which are loaded implicitely, outside the }
{ current block (e.g. esi with self) } { current block (e.g. esi with self) }
exit; exit;
{ make sure we allocate it for this instruction }
if p1 = p2 then
getnextinstruction(p2,p2);
lastRemovedWasDealloc := false; lastRemovedWasDealloc := false;
firstRemovedWasAlloc := false; firstRemovedWasAlloc := false;
first := true; first := true;
@ -1433,6 +1437,34 @@ Begin {checks whether the two ops are equal}
End; End;
End; End;
function sizescompatible(loadsize,newsize: topsize): boolean;
begin
case loadsize of
S_B,S_BW,S_BL:
sizescompatible := (newsize = loadsize) or (newsize = S_B);
S_W,S_WL:
sizescompatible := (newsize = loadsize) or (newsize = S_W);
else
sizescompatible := newsize = S_L;
end;
end;
function opscompatible(p1,p2: Taicpu): boolean;
begin
case p1.opcode of
A_MOVZX,A_MOVSX:
opscompatible :=
((p2.opcode = p1.opcode) or (p2.opcode = A_MOV)) and
sizescompatible(p1.opsize,p2.opsize);
else
opscompatible :=
(p1.opcode = p2.opcode) and
(p1.opsize = p2.opsize);
end;
end;
Function InstructionsEquivalent(p1, p2: Tai; Var RegInfo: TRegInfo): Boolean; Function InstructionsEquivalent(p1, p2: Tai; Var RegInfo: TRegInfo): Boolean;
{$ifdef csdebug} {$ifdef csdebug}
var var
@ -1442,7 +1474,7 @@ Begin {checks whether two Taicpu instructions are equal}
If Assigned(p1) And Assigned(p2) And If Assigned(p1) And Assigned(p2) And
(Tai(p1).typ = ait_instruction) And (Tai(p1).typ = ait_instruction) And
(Tai(p1).typ = ait_instruction) And (Tai(p1).typ = ait_instruction) And
(Taicpu(p1).opcode = Taicpu(p2).opcode) And opscompatible(Taicpu(p1),Taicpu(p2)) and
(Taicpu(p1).oper[0].typ = Taicpu(p2).oper[0].typ) And (Taicpu(p1).oper[0].typ = Taicpu(p2).oper[0].typ) And
(Taicpu(p1).oper[1].typ = Taicpu(p2).oper[1].typ) And (Taicpu(p1).oper[1].typ = Taicpu(p2).oper[1].typ) And
(Taicpu(p1).oper[2].typ = Taicpu(p2).oper[2].typ) (Taicpu(p1).oper[2].typ = Taicpu(p2).oper[2].typ)
@ -1476,7 +1508,8 @@ Begin {checks whether two Taicpu instructions are equal}
AddOp2RegInfo(Taicpu(p1).oper[0], RegInfo); AddOp2RegInfo(Taicpu(p1).oper[0], RegInfo);
{the registers from .oper[1] have to be equivalent, but not necessarily equal} {the registers from .oper[1] have to be equivalent, but not necessarily equal}
InstructionsEquivalent := InstructionsEquivalent :=
RegsEquivalent(Taicpu(p1).oper[1].reg, Taicpu(p2).oper[1].reg, RegInfo, OpAct_Write); RegsEquivalent(reg32(Taicpu(p1).oper[1].reg),
reg32(Taicpu(p2).oper[1].reg), RegInfo, OpAct_Write);
End End
{the registers are loaded with values from different memory locations. If {the registers are loaded with values from different memory locations. If
this was allowed, the instructions "mov -4(esi),eax" and "mov -4(ebp),eax" this was allowed, the instructions "mov -4(esi),eax" and "mov -4(ebp),eax"
@ -2519,7 +2552,14 @@ End.
{ {
$Log$ $Log$
Revision 1.21 2001-09-04 14:01:04 jonas Revision 1.22 2001-10-12 13:58:05 jonas
+ memory references are now replaced by register reads in "regular"
instructions (e.g. "addl ref1,%eax" will be replaced by "addl %ebx,%eax"
if %ebx contains ref1). Previously only complete load sequences were
optimized away, but not such small accesses in other instructions than
mov/movzx/movsx
Revision 1.21 2001/09/04 14:01:04 jonas
* commented out some inactive code in csopt386 * commented out some inactive code in csopt386
+ small improvement: lea is now handled the same as mov/zx/sx + small improvement: lea is now handled the same as mov/zx/sx