mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-09-13 05:50:53 +02:00
+ memory references are now replaced by register reads in "regular"
instructions (e.g. "addl ref1,%eax" will be replaced by "addl %ebx,%eax" if %ebx contains ref1). Previously only complete load sequences were optimized away, but not such small accesses in other instructions than mov/movzx/movsx
This commit is contained in:
parent
97c1152c6d
commit
82f7cf0957
@ -185,10 +185,7 @@ function isSimpleMemLoc(const ref: treference): boolean;
|
|||||||
begin
|
begin
|
||||||
isSimpleMemLoc :=
|
isSimpleMemLoc :=
|
||||||
(ref.index = R_NO) and
|
(ref.index = R_NO) and
|
||||||
(not(ref.base in (usableregs+[R_EDI])) or
|
not(ref.base in (usableregs+[R_EDI]));
|
||||||
(assigned(ref.symbol) and
|
|
||||||
(ref.base = R_NO) and
|
|
||||||
(ref.index = R_NO)));
|
|
||||||
end;
|
end;
|
||||||
|
|
||||||
{checks whether the current instruction sequence (starting with p) and the
|
{checks whether the current instruction sequence (starting with p) and the
|
||||||
@ -1314,6 +1311,48 @@ begin
|
|||||||
(p.opcode = A_IDIV));
|
(p.opcode = A_IDIV));
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
function memtoreg(const t: Taicpu; const ref: treference): tregister;
|
||||||
|
var
|
||||||
|
hp: tai;
|
||||||
|
p: pTaiprop;
|
||||||
|
regcounter: tregister;
|
||||||
|
begin
|
||||||
|
if not getlastinstruction(t,hp) then
|
||||||
|
begin
|
||||||
|
memtoreg := R_NO;
|
||||||
|
exit;
|
||||||
|
end;
|
||||||
|
p := pTaiprop(hp.optinfo);
|
||||||
|
if isSimpleMemLoc(ref) then
|
||||||
|
begin
|
||||||
|
for regcounter := R_EAX to R_EDI do
|
||||||
|
if (p^.regs[regcounter].typ in [CON_REF,CON_NOREMOVEREF]) and
|
||||||
|
(p^.regs[regcounter].nrofmods = 1) and
|
||||||
|
((Taicpu(p^.regs[regcounter].startmod).opcode = A_MOV) or
|
||||||
|
(Taicpu(p^.regs[regcounter].startmod).opcode = A_MOVZX) or
|
||||||
|
(Taicpu(p^.regs[regcounter].startmod).opcode = A_MOVSX)) and
|
||||||
|
(taicpu(p^.regs[regcounter].startmod).oper[0].typ = top_ref) and
|
||||||
|
refsequal(ref,taicpu(p^.regs[regcounter].startmod).oper[0].ref^) then
|
||||||
|
begin
|
||||||
|
if ((t.opsize <> S_B) or
|
||||||
|
(regcounter <> R_EDI)) and
|
||||||
|
sizescompatible(Taicpu(p^.regs[regcounter].startmod).opsize,t.opsize) then
|
||||||
|
begin
|
||||||
|
case t.opsize of
|
||||||
|
S_B,S_BW,S_BL:
|
||||||
|
memtoreg := reg32toreg8(regcounter);
|
||||||
|
S_W,S_WL:
|
||||||
|
memtoreg := reg32toreg16(regcounter);
|
||||||
|
S_L:
|
||||||
|
memtoreg := regcounter;
|
||||||
|
end;
|
||||||
|
exit;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
memtoreg := R_NO;
|
||||||
|
end;
|
||||||
|
|
||||||
procedure DoCSE(AsmL: TAAsmOutput; First, Last: Tai; findPrevSeqs, doSubOpts: boolean);
|
procedure DoCSE(AsmL: TAAsmOutput; First, Last: Tai; findPrevSeqs, doSubOpts: boolean);
|
||||||
{marks the instructions that can be removed by RemoveInstructs. They're not
|
{marks the instructions that can be removed by RemoveInstructs. They're not
|
||||||
removed immediately because sometimes an instruction needs to be checked in
|
removed immediately because sometimes an instruction needs to be checked in
|
||||||
@ -1594,9 +1633,26 @@ Begin
|
|||||||
pTaiProp(p.optInfo)^.regs[Taicpu(p).oper[0].reg].startMod,hp1);
|
pTaiProp(p.optInfo)^.regs[Taicpu(p).oper[0].reg].startMod,hp1);
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
if (Taicpu(p).oper[1].typ = top_reg) and
|
begin
|
||||||
not regInOp(Taicpu(p).oper[1].reg,Taicpu(p).oper[0]) then
|
if (Taicpu(p).oper[1].typ = top_reg) and
|
||||||
removePrevNotUsedLoad(p,reg32(Taicpu(p).oper[1].reg),false);
|
not regInOp(Taicpu(p).oper[1].reg,Taicpu(p).oper[0]) then
|
||||||
|
removePrevNotUsedLoad(p,reg32(Taicpu(p).oper[1].reg),false);
|
||||||
|
if doSubOpts and
|
||||||
|
(Taicpu(p).opcode <> A_LEA) and
|
||||||
|
(Taicpu(p).oper[0].typ = top_ref) then
|
||||||
|
begin
|
||||||
|
regcounter :=
|
||||||
|
memtoreg(taicpu(p),
|
||||||
|
Taicpu(p).oper[0].ref^);
|
||||||
|
if regcounter <> R_NO then
|
||||||
|
begin
|
||||||
|
Taicpu(p).loadreg(0,regcounter);
|
||||||
|
allocregbetween(asml,reg32(regcounter),
|
||||||
|
pTaiprop(p.optinfo)^.regs[reg32(regcounter)].startmod,
|
||||||
|
p);
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
{ at first, only try optimizations of large blocks, because doing }
|
{ at first, only try optimizations of large blocks, because doing }
|
||||||
{ doing smaller ones may prevent bigger ones from completing in }
|
{ doing smaller ones may prevent bigger ones from completing in }
|
||||||
{ in the next pass }
|
{ in the next pass }
|
||||||
@ -1643,6 +1699,52 @@ Begin
|
|||||||
A_STD: If GetLastInstruction(p, hp1) And
|
A_STD: If GetLastInstruction(p, hp1) And
|
||||||
(PTaiProp(hp1.OptInfo)^.DirFlag = F_Set) Then
|
(PTaiProp(hp1.OptInfo)^.DirFlag = F_Set) Then
|
||||||
PTaiProp(Tai(p).OptInfo)^.CanBeRemoved := True;
|
PTaiProp(Tai(p).OptInfo)^.CanBeRemoved := True;
|
||||||
|
else
|
||||||
|
begin
|
||||||
|
for cnt := 1 to maxch do
|
||||||
|
begin
|
||||||
|
case InsProp[taicpu(p).opcode].Ch[cnt] of
|
||||||
|
Ch_ROp1:
|
||||||
|
if (taicpu(p).oper[0].typ = top_ref) and
|
||||||
|
((taicpu(p).opcode < A_F2XM1) or
|
||||||
|
((taicpu(p).opcode > A_IN) and
|
||||||
|
(taicpu(p).opcode < A_OUT)) or
|
||||||
|
(taicpu(p).opcode = A_PUSH) or
|
||||||
|
(taicpu(p).opcode = A_SUB) or
|
||||||
|
(taicpu(p).opcode = A_TEST) or
|
||||||
|
(taicpu(p).opcode = A_XOR))then
|
||||||
|
begin
|
||||||
|
regcounter :=
|
||||||
|
memtoreg(taicpu(p),
|
||||||
|
Taicpu(p).oper[0].ref^);
|
||||||
|
if regcounter <> R_NO then
|
||||||
|
begin
|
||||||
|
Taicpu(p).loadreg(0,regcounter);
|
||||||
|
allocregbetween(asml,reg32(regcounter),
|
||||||
|
pTaiprop(p.optinfo)^.regs[reg32(regcounter)].startmod,
|
||||||
|
p);
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
Ch_ROp2:
|
||||||
|
if ((taicpu(p).opcode = A_CMP) or
|
||||||
|
(taicpu(p).opcode = A_TEST)) and
|
||||||
|
(taicpu(p).oper[1].typ = top_ref) then
|
||||||
|
begin
|
||||||
|
regcounter :=
|
||||||
|
memtoreg(taicpu(p),
|
||||||
|
Taicpu(p).oper[1].ref^);
|
||||||
|
if regcounter <> R_NO then
|
||||||
|
begin
|
||||||
|
Taicpu(p).loadreg(1,regcounter);
|
||||||
|
allocregbetween(asml,reg32(regcounter),
|
||||||
|
pTaiprop(p.optinfo)^.regs[reg32(regcounter)].startmod,
|
||||||
|
p);
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
End
|
End
|
||||||
End;
|
End;
|
||||||
End;
|
End;
|
||||||
@ -1742,7 +1844,14 @@ End.
|
|||||||
|
|
||||||
{
|
{
|
||||||
$Log$
|
$Log$
|
||||||
Revision 1.18 2001-09-04 14:01:03 jonas
|
Revision 1.19 2001-10-12 13:58:05 jonas
|
||||||
|
+ memory references are now replaced by register reads in "regular"
|
||||||
|
instructions (e.g. "addl ref1,%eax" will be replaced by "addl %ebx,%eax"
|
||||||
|
if %ebx contains ref1). Previously only complete load sequences were
|
||||||
|
optimized away, but not such small accesses in other instructions than
|
||||||
|
mov/movzx/movsx
|
||||||
|
|
||||||
|
Revision 1.18 2001/09/04 14:01:03 jonas
|
||||||
* commented out some inactive code in csopt386
|
* commented out some inactive code in csopt386
|
||||||
+ small improvement: lea is now handled the same as mov/zx/sx
|
+ small improvement: lea is now handled the same as mov/zx/sx
|
||||||
|
|
||||||
|
@ -191,6 +191,7 @@ function FindRegDealloc(reg: tregister; p: Tai): boolean;
|
|||||||
|
|
||||||
Function RegsEquivalent(OldReg, NewReg: TRegister; Var RegInfo: TRegInfo; OpAct: TopAction): Boolean;
|
Function RegsEquivalent(OldReg, NewReg: TRegister; Var RegInfo: TRegInfo; OpAct: TopAction): Boolean;
|
||||||
Function InstructionsEquivalent(p1, p2: Tai; Var RegInfo: TRegInfo): Boolean;
|
Function InstructionsEquivalent(p1, p2: Tai; Var RegInfo: TRegInfo): Boolean;
|
||||||
|
function sizescompatible(loadsize,newsize: topsize): boolean;
|
||||||
Function OpsEqual(const o1,o2:toper): Boolean;
|
Function OpsEqual(const o1,o2:toper): Boolean;
|
||||||
|
|
||||||
Function DFAPass1(AsmL: TAAsmOutput; BlockStart: Tai): Tai;
|
Function DFAPass1(AsmL: TAAsmOutput; BlockStart: Tai): Tai;
|
||||||
@ -1164,10 +1165,13 @@ var
|
|||||||
lastRemovedWasDealloc, firstRemovedWasAlloc, first: boolean;
|
lastRemovedWasDealloc, firstRemovedWasAlloc, first: boolean;
|
||||||
Begin
|
Begin
|
||||||
If not(reg in usableregs+[R_EDI,R_ESI]) or
|
If not(reg in usableregs+[R_EDI,R_ESI]) or
|
||||||
not(assigned(p1)) Then
|
not(assigned(p1)) then
|
||||||
{ this happens with registers which are loaded implicitely, outside the }
|
{ this happens with registers which are loaded implicitely, outside the }
|
||||||
{ current block (e.g. esi with self) }
|
{ current block (e.g. esi with self) }
|
||||||
exit;
|
exit;
|
||||||
|
{ make sure we allocate it for this instruction }
|
||||||
|
if p1 = p2 then
|
||||||
|
getnextinstruction(p2,p2);
|
||||||
lastRemovedWasDealloc := false;
|
lastRemovedWasDealloc := false;
|
||||||
firstRemovedWasAlloc := false;
|
firstRemovedWasAlloc := false;
|
||||||
first := true;
|
first := true;
|
||||||
@ -1433,6 +1437,34 @@ Begin {checks whether the two ops are equal}
|
|||||||
End;
|
End;
|
||||||
End;
|
End;
|
||||||
|
|
||||||
|
|
||||||
|
function sizescompatible(loadsize,newsize: topsize): boolean;
|
||||||
|
begin
|
||||||
|
case loadsize of
|
||||||
|
S_B,S_BW,S_BL:
|
||||||
|
sizescompatible := (newsize = loadsize) or (newsize = S_B);
|
||||||
|
S_W,S_WL:
|
||||||
|
sizescompatible := (newsize = loadsize) or (newsize = S_W);
|
||||||
|
else
|
||||||
|
sizescompatible := newsize = S_L;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
function opscompatible(p1,p2: Taicpu): boolean;
|
||||||
|
begin
|
||||||
|
case p1.opcode of
|
||||||
|
A_MOVZX,A_MOVSX:
|
||||||
|
opscompatible :=
|
||||||
|
((p2.opcode = p1.opcode) or (p2.opcode = A_MOV)) and
|
||||||
|
sizescompatible(p1.opsize,p2.opsize);
|
||||||
|
else
|
||||||
|
opscompatible :=
|
||||||
|
(p1.opcode = p2.opcode) and
|
||||||
|
(p1.opsize = p2.opsize);
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
Function InstructionsEquivalent(p1, p2: Tai; Var RegInfo: TRegInfo): Boolean;
|
Function InstructionsEquivalent(p1, p2: Tai; Var RegInfo: TRegInfo): Boolean;
|
||||||
{$ifdef csdebug}
|
{$ifdef csdebug}
|
||||||
var
|
var
|
||||||
@ -1442,7 +1474,7 @@ Begin {checks whether two Taicpu instructions are equal}
|
|||||||
If Assigned(p1) And Assigned(p2) And
|
If Assigned(p1) And Assigned(p2) And
|
||||||
(Tai(p1).typ = ait_instruction) And
|
(Tai(p1).typ = ait_instruction) And
|
||||||
(Tai(p1).typ = ait_instruction) And
|
(Tai(p1).typ = ait_instruction) And
|
||||||
(Taicpu(p1).opcode = Taicpu(p2).opcode) And
|
opscompatible(Taicpu(p1),Taicpu(p2)) and
|
||||||
(Taicpu(p1).oper[0].typ = Taicpu(p2).oper[0].typ) And
|
(Taicpu(p1).oper[0].typ = Taicpu(p2).oper[0].typ) And
|
||||||
(Taicpu(p1).oper[1].typ = Taicpu(p2).oper[1].typ) And
|
(Taicpu(p1).oper[1].typ = Taicpu(p2).oper[1].typ) And
|
||||||
(Taicpu(p1).oper[2].typ = Taicpu(p2).oper[2].typ)
|
(Taicpu(p1).oper[2].typ = Taicpu(p2).oper[2].typ)
|
||||||
@ -1476,7 +1508,8 @@ Begin {checks whether two Taicpu instructions are equal}
|
|||||||
AddOp2RegInfo(Taicpu(p1).oper[0], RegInfo);
|
AddOp2RegInfo(Taicpu(p1).oper[0], RegInfo);
|
||||||
{the registers from .oper[1] have to be equivalent, but not necessarily equal}
|
{the registers from .oper[1] have to be equivalent, but not necessarily equal}
|
||||||
InstructionsEquivalent :=
|
InstructionsEquivalent :=
|
||||||
RegsEquivalent(Taicpu(p1).oper[1].reg, Taicpu(p2).oper[1].reg, RegInfo, OpAct_Write);
|
RegsEquivalent(reg32(Taicpu(p1).oper[1].reg),
|
||||||
|
reg32(Taicpu(p2).oper[1].reg), RegInfo, OpAct_Write);
|
||||||
End
|
End
|
||||||
{the registers are loaded with values from different memory locations. If
|
{the registers are loaded with values from different memory locations. If
|
||||||
this was allowed, the instructions "mov -4(esi),eax" and "mov -4(ebp),eax"
|
this was allowed, the instructions "mov -4(esi),eax" and "mov -4(ebp),eax"
|
||||||
@ -2519,7 +2552,14 @@ End.
|
|||||||
|
|
||||||
{
|
{
|
||||||
$Log$
|
$Log$
|
||||||
Revision 1.21 2001-09-04 14:01:04 jonas
|
Revision 1.22 2001-10-12 13:58:05 jonas
|
||||||
|
+ memory references are now replaced by register reads in "regular"
|
||||||
|
instructions (e.g. "addl ref1,%eax" will be replaced by "addl %ebx,%eax"
|
||||||
|
if %ebx contains ref1). Previously only complete load sequences were
|
||||||
|
optimized away, but not such small accesses in other instructions than
|
||||||
|
mov/movzx/movsx
|
||||||
|
|
||||||
|
Revision 1.21 2001/09/04 14:01:04 jonas
|
||||||
* commented out some inactive code in csopt386
|
* commented out some inactive code in csopt386
|
||||||
+ small improvement: lea is now handled the same as mov/zx/sx
|
+ small improvement: lea is now handled the same as mov/zx/sx
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user