mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-09-08 09:59:12 +02:00
+ memory references are now replaced by register reads in "regular"
instructions (e.g. "addl ref1,%eax" will be replaced by "addl %ebx,%eax" if %ebx contains ref1). Previously only complete load sequences were optimized away, but not such small accesses in other instructions than mov/movzx/movsx
This commit is contained in:
parent
97c1152c6d
commit
82f7cf0957
@ -185,10 +185,7 @@ function isSimpleMemLoc(const ref: treference): boolean;
|
||||
begin
|
||||
isSimpleMemLoc :=
|
||||
(ref.index = R_NO) and
|
||||
(not(ref.base in (usableregs+[R_EDI])) or
|
||||
(assigned(ref.symbol) and
|
||||
(ref.base = R_NO) and
|
||||
(ref.index = R_NO)));
|
||||
not(ref.base in (usableregs+[R_EDI]));
|
||||
end;
|
||||
|
||||
{checks whether the current instruction sequence (starting with p) and the
|
||||
@ -1314,6 +1311,48 @@ begin
|
||||
(p.opcode = A_IDIV));
|
||||
end;
|
||||
|
||||
function memtoreg(const t: Taicpu; const ref: treference): tregister;
|
||||
var
|
||||
hp: tai;
|
||||
p: pTaiprop;
|
||||
regcounter: tregister;
|
||||
begin
|
||||
if not getlastinstruction(t,hp) then
|
||||
begin
|
||||
memtoreg := R_NO;
|
||||
exit;
|
||||
end;
|
||||
p := pTaiprop(hp.optinfo);
|
||||
if isSimpleMemLoc(ref) then
|
||||
begin
|
||||
for regcounter := R_EAX to R_EDI do
|
||||
if (p^.regs[regcounter].typ in [CON_REF,CON_NOREMOVEREF]) and
|
||||
(p^.regs[regcounter].nrofmods = 1) and
|
||||
((Taicpu(p^.regs[regcounter].startmod).opcode = A_MOV) or
|
||||
(Taicpu(p^.regs[regcounter].startmod).opcode = A_MOVZX) or
|
||||
(Taicpu(p^.regs[regcounter].startmod).opcode = A_MOVSX)) and
|
||||
(taicpu(p^.regs[regcounter].startmod).oper[0].typ = top_ref) and
|
||||
refsequal(ref,taicpu(p^.regs[regcounter].startmod).oper[0].ref^) then
|
||||
begin
|
||||
if ((t.opsize <> S_B) or
|
||||
(regcounter <> R_EDI)) and
|
||||
sizescompatible(Taicpu(p^.regs[regcounter].startmod).opsize,t.opsize) then
|
||||
begin
|
||||
case t.opsize of
|
||||
S_B,S_BW,S_BL:
|
||||
memtoreg := reg32toreg8(regcounter);
|
||||
S_W,S_WL:
|
||||
memtoreg := reg32toreg16(regcounter);
|
||||
S_L:
|
||||
memtoreg := regcounter;
|
||||
end;
|
||||
exit;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
memtoreg := R_NO;
|
||||
end;
|
||||
|
||||
procedure DoCSE(AsmL: TAAsmOutput; First, Last: Tai; findPrevSeqs, doSubOpts: boolean);
|
||||
{marks the instructions that can be removed by RemoveInstructs. They're not
|
||||
removed immediately because sometimes an instruction needs to be checked in
|
||||
@ -1594,9 +1633,26 @@ Begin
|
||||
pTaiProp(p.optInfo)^.regs[Taicpu(p).oper[0].reg].startMod,hp1);
|
||||
end
|
||||
else
|
||||
if (Taicpu(p).oper[1].typ = top_reg) and
|
||||
not regInOp(Taicpu(p).oper[1].reg,Taicpu(p).oper[0]) then
|
||||
removePrevNotUsedLoad(p,reg32(Taicpu(p).oper[1].reg),false);
|
||||
begin
|
||||
if (Taicpu(p).oper[1].typ = top_reg) and
|
||||
not regInOp(Taicpu(p).oper[1].reg,Taicpu(p).oper[0]) then
|
||||
removePrevNotUsedLoad(p,reg32(Taicpu(p).oper[1].reg),false);
|
||||
if doSubOpts and
|
||||
(Taicpu(p).opcode <> A_LEA) and
|
||||
(Taicpu(p).oper[0].typ = top_ref) then
|
||||
begin
|
||||
regcounter :=
|
||||
memtoreg(taicpu(p),
|
||||
Taicpu(p).oper[0].ref^);
|
||||
if regcounter <> R_NO then
|
||||
begin
|
||||
Taicpu(p).loadreg(0,regcounter);
|
||||
allocregbetween(asml,reg32(regcounter),
|
||||
pTaiprop(p.optinfo)^.regs[reg32(regcounter)].startmod,
|
||||
p);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
{ at first, only try optimizations of large blocks, because doing }
|
||||
{ doing smaller ones may prevent bigger ones from completing in }
|
||||
{ in the next pass }
|
||||
@ -1643,6 +1699,52 @@ Begin
|
||||
A_STD: If GetLastInstruction(p, hp1) And
|
||||
(PTaiProp(hp1.OptInfo)^.DirFlag = F_Set) Then
|
||||
PTaiProp(Tai(p).OptInfo)^.CanBeRemoved := True;
|
||||
else
|
||||
begin
|
||||
for cnt := 1 to maxch do
|
||||
begin
|
||||
case InsProp[taicpu(p).opcode].Ch[cnt] of
|
||||
Ch_ROp1:
|
||||
if (taicpu(p).oper[0].typ = top_ref) and
|
||||
((taicpu(p).opcode < A_F2XM1) or
|
||||
((taicpu(p).opcode > A_IN) and
|
||||
(taicpu(p).opcode < A_OUT)) or
|
||||
(taicpu(p).opcode = A_PUSH) or
|
||||
(taicpu(p).opcode = A_SUB) or
|
||||
(taicpu(p).opcode = A_TEST) or
|
||||
(taicpu(p).opcode = A_XOR))then
|
||||
begin
|
||||
regcounter :=
|
||||
memtoreg(taicpu(p),
|
||||
Taicpu(p).oper[0].ref^);
|
||||
if regcounter <> R_NO then
|
||||
begin
|
||||
Taicpu(p).loadreg(0,regcounter);
|
||||
allocregbetween(asml,reg32(regcounter),
|
||||
pTaiprop(p.optinfo)^.regs[reg32(regcounter)].startmod,
|
||||
p);
|
||||
end;
|
||||
end;
|
||||
|
||||
Ch_ROp2:
|
||||
if ((taicpu(p).opcode = A_CMP) or
|
||||
(taicpu(p).opcode = A_TEST)) and
|
||||
(taicpu(p).oper[1].typ = top_ref) then
|
||||
begin
|
||||
regcounter :=
|
||||
memtoreg(taicpu(p),
|
||||
Taicpu(p).oper[1].ref^);
|
||||
if regcounter <> R_NO then
|
||||
begin
|
||||
Taicpu(p).loadreg(1,regcounter);
|
||||
allocregbetween(asml,reg32(regcounter),
|
||||
pTaiprop(p.optinfo)^.regs[reg32(regcounter)].startmod,
|
||||
p);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
End
|
||||
End;
|
||||
End;
|
||||
@ -1742,7 +1844,14 @@ End.
|
||||
|
||||
{
|
||||
$Log$
|
||||
Revision 1.18 2001-09-04 14:01:03 jonas
|
||||
Revision 1.19 2001-10-12 13:58:05 jonas
|
||||
+ memory references are now replaced by register reads in "regular"
|
||||
instructions (e.g. "addl ref1,%eax" will be replaced by "addl %ebx,%eax"
|
||||
if %ebx contains ref1). Previously only complete load sequences were
|
||||
optimized away, but not such small accesses in other instructions than
|
||||
mov/movzx/movsx
|
||||
|
||||
Revision 1.18 2001/09/04 14:01:03 jonas
|
||||
* commented out some inactive code in csopt386
|
||||
+ small improvement: lea is now handled the same as mov/zx/sx
|
||||
|
||||
|
@ -191,6 +191,7 @@ function FindRegDealloc(reg: tregister; p: Tai): boolean;
|
||||
|
||||
Function RegsEquivalent(OldReg, NewReg: TRegister; Var RegInfo: TRegInfo; OpAct: TopAction): Boolean;
|
||||
Function InstructionsEquivalent(p1, p2: Tai; Var RegInfo: TRegInfo): Boolean;
|
||||
function sizescompatible(loadsize,newsize: topsize): boolean;
|
||||
Function OpsEqual(const o1,o2:toper): Boolean;
|
||||
|
||||
Function DFAPass1(AsmL: TAAsmOutput; BlockStart: Tai): Tai;
|
||||
@ -1164,10 +1165,13 @@ var
|
||||
lastRemovedWasDealloc, firstRemovedWasAlloc, first: boolean;
|
||||
Begin
|
||||
If not(reg in usableregs+[R_EDI,R_ESI]) or
|
||||
not(assigned(p1)) Then
|
||||
not(assigned(p1)) then
|
||||
{ this happens with registers which are loaded implicitely, outside the }
|
||||
{ current block (e.g. esi with self) }
|
||||
exit;
|
||||
{ make sure we allocate it for this instruction }
|
||||
if p1 = p2 then
|
||||
getnextinstruction(p2,p2);
|
||||
lastRemovedWasDealloc := false;
|
||||
firstRemovedWasAlloc := false;
|
||||
first := true;
|
||||
@ -1433,6 +1437,34 @@ Begin {checks whether the two ops are equal}
|
||||
End;
|
||||
End;
|
||||
|
||||
|
||||
function sizescompatible(loadsize,newsize: topsize): boolean;
|
||||
begin
|
||||
case loadsize of
|
||||
S_B,S_BW,S_BL:
|
||||
sizescompatible := (newsize = loadsize) or (newsize = S_B);
|
||||
S_W,S_WL:
|
||||
sizescompatible := (newsize = loadsize) or (newsize = S_W);
|
||||
else
|
||||
sizescompatible := newsize = S_L;
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
function opscompatible(p1,p2: Taicpu): boolean;
|
||||
begin
|
||||
case p1.opcode of
|
||||
A_MOVZX,A_MOVSX:
|
||||
opscompatible :=
|
||||
((p2.opcode = p1.opcode) or (p2.opcode = A_MOV)) and
|
||||
sizescompatible(p1.opsize,p2.opsize);
|
||||
else
|
||||
opscompatible :=
|
||||
(p1.opcode = p2.opcode) and
|
||||
(p1.opsize = p2.opsize);
|
||||
end;
|
||||
end;
|
||||
|
||||
Function InstructionsEquivalent(p1, p2: Tai; Var RegInfo: TRegInfo): Boolean;
|
||||
{$ifdef csdebug}
|
||||
var
|
||||
@ -1442,7 +1474,7 @@ Begin {checks whether two Taicpu instructions are equal}
|
||||
If Assigned(p1) And Assigned(p2) And
|
||||
(Tai(p1).typ = ait_instruction) And
|
||||
(Tai(p1).typ = ait_instruction) And
|
||||
(Taicpu(p1).opcode = Taicpu(p2).opcode) And
|
||||
opscompatible(Taicpu(p1),Taicpu(p2)) and
|
||||
(Taicpu(p1).oper[0].typ = Taicpu(p2).oper[0].typ) And
|
||||
(Taicpu(p1).oper[1].typ = Taicpu(p2).oper[1].typ) And
|
||||
(Taicpu(p1).oper[2].typ = Taicpu(p2).oper[2].typ)
|
||||
@ -1476,7 +1508,8 @@ Begin {checks whether two Taicpu instructions are equal}
|
||||
AddOp2RegInfo(Taicpu(p1).oper[0], RegInfo);
|
||||
{the registers from .oper[1] have to be equivalent, but not necessarily equal}
|
||||
InstructionsEquivalent :=
|
||||
RegsEquivalent(Taicpu(p1).oper[1].reg, Taicpu(p2).oper[1].reg, RegInfo, OpAct_Write);
|
||||
RegsEquivalent(reg32(Taicpu(p1).oper[1].reg),
|
||||
reg32(Taicpu(p2).oper[1].reg), RegInfo, OpAct_Write);
|
||||
End
|
||||
{the registers are loaded with values from different memory locations. If
|
||||
this was allowed, the instructions "mov -4(esi),eax" and "mov -4(ebp),eax"
|
||||
@ -2519,7 +2552,14 @@ End.
|
||||
|
||||
{
|
||||
$Log$
|
||||
Revision 1.21 2001-09-04 14:01:04 jonas
|
||||
Revision 1.22 2001-10-12 13:58:05 jonas
|
||||
+ memory references are now replaced by register reads in "regular"
|
||||
instructions (e.g. "addl ref1,%eax" will be replaced by "addl %ebx,%eax"
|
||||
if %ebx contains ref1). Previously only complete load sequences were
|
||||
optimized away, but not such small accesses in other instructions than
|
||||
mov/movzx/movsx
|
||||
|
||||
Revision 1.21 2001/09/04 14:01:04 jonas
|
||||
* commented out some inactive code in csopt386
|
||||
+ small improvement: lea is now handled the same as mov/zx/sx
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user