* fixed optimiser so it compiles again

* fixed several bugs which were in there already for a long time, but
    which only popped up now :) -O2/-O3 will now optimise less than in
    the past (and correctly so), but -O2u/-O3u will optimise a bit more
  * some more small improvements for -O3 are still possible
This commit is contained in:
Jonas Maebe 2003-11-22 00:40:19 +00:00
parent 52980d5488
commit aad37ce53c
5 changed files with 4022 additions and 4035 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -21,23 +21,23 @@
****************************************************************************
}
Unit rrOpt386;
unit rropt386;
{$i fpcdefs.inc}
Interface
interface
Uses aasmbase,aasmtai,aasmcpu;
uses aasmbase,aasmtai,aasmcpu;
procedure doRenaming(asml: TAAsmoutput; first, last: Tai);
procedure doRenaming(asml: taasmoutput; first, last: tai);
Implementation
implementation
Uses
uses
{$ifdef replaceregdebug}cutils,{$endif}
verbose,globals,cpubase,daopt386,csopt386,cginfo,rgobj;
verbose,globals,cpubase,daopt386,csopt386,rgobj, cgbase, cgobj;
function canBeFirstSwitch(p: Taicpu; reg: tregister): boolean;
function canBeFirstSwitch(p: taicpu; supreg: tsuperregister): boolean;
{ checks whether an operation on reg can be switched to another reg without an }
{ additional mov, e.g. "addl $4,%reg1" can be changed to "leal 4(%reg1),%reg2" }
begin
@ -45,64 +45,56 @@ begin
case p.opcode of
A_MOV,A_MOVZX,A_MOVSX,A_LEA:
canBeFirstSwitch :=
(p.oper[1].typ = top_reg) and
(reg32(p.oper[1].reg).enum = reg.enum);
(p.oper[1]^.typ = top_reg) and
(getsupreg(p.oper[1]^.reg) = supreg);
A_IMUL:
canBeFirstSwitch :=
(p.ops >= 2) and
(reg32(p.oper[p.ops-1].reg).enum = reg.enum) and
(p.oper[0].typ <> top_ref) and
(getsupreg(p.oper[p.ops-1]^.reg) = supreg) and
(p.oper[0]^.typ <> top_ref) and
(not pTaiprop(p.optinfo)^.FlagsUsed);
A_INC,A_DEC,A_SUB,A_ADD:
canBeFirstSwitch :=
(p.oper[1].typ = top_reg) and
A_INC,A_DEC:
canBeFirstSwitch :=
(p.oper[0]^.typ = top_reg) and
(p.opsize = S_L) and
(reg32(p.oper[1].reg).enum = reg.enum) and
(p.oper[0].typ <> top_ref) and
(not pTaiprop(p.optinfo)^.FlagsUsed);
A_SUB,A_ADD:
canBeFirstSwitch :=
(p.oper[1]^.typ = top_reg) and
(p.opsize = S_L) and
(getsupreg(p.oper[1]^.reg) = supreg) and
(p.oper[0]^.typ <> top_ref) and
((p.opcode <> A_SUB) or
(p.oper[0].typ = top_const)) and
(p.oper[0]^.typ = top_const)) and
(not pTaiprop(p.optinfo)^.FlagsUsed);
A_SHL:
canBeFirstSwitch :=
(p.opsize = S_L) and
(p.oper[1].typ = top_reg) and
(p.oper[1].reg.enum = reg.enum) and
(p.oper[0].typ = top_const) and
(p.oper[0].val in [1,2,3]) and
(p.oper[1]^.typ = top_reg) and
(getsupreg(p.oper[1]^.reg) = supreg) and
(p.oper[0]^.typ = top_const) and
(p.oper[0]^.val in [1,2,3]) and
(not pTaiprop(p.optinfo)^.FlagsUsed);
end;
end;
procedure switchReg(var reg: tregister; reg1, reg2: tregister);
procedure switchReg(var reg: tregister; reg1, reg2: tsuperregister);
var
supreg: tsuperregister;
begin
if reg1.enum>lastreg then
internalerror(2003010801);
if reg2.enum>lastreg then
internalerror(2003010801);
if reg.enum>lastreg then
internalerror(2003010801);
if reg.enum = reg1.enum then
reg := reg2
else if reg.enum = reg2.enum then
reg := reg1
else if (reg.enum in regset8bit) then
begin
if (reg.enum = changeregsize(reg1,S_B).enum) then
reg := changeregsize(reg2,S_B)
else if reg.enum = changeregsize(reg2,S_B).enum then
reg := changeregsize(reg1,S_B);
end
else if (reg.enum in regset16bit) then
begin
if reg.enum = changeregsize(reg1,S_W).enum then
reg := changeregsize(reg2,S_W)
else if reg.enum = changeregsize(reg2,S_W).enum then
reg := changeregsize(reg1,S_W);
end;
if (reg = NR_NO) or
(getregtype(reg) <> R_INTREGISTER) then
exit;
supreg := getsupreg(reg);
if (supreg = reg1) then
setsupreg(reg,reg2)
else if (supreg = reg2) then
setsupreg(reg,reg1);
end;
procedure switchOp(var op: toper; reg1, reg2: tregister);
procedure switchOp(var op: toper; reg1, reg2: tsuperregister);
begin
case op.typ of
top_reg:
@ -115,35 +107,36 @@ begin
end;
end;
procedure doSwitchReg(hp: Taicpu; reg1,reg2: tregister);
procedure doSwitchReg(hp: taicpu; reg1,reg2: tsuperregister);
var
opCount: longint;
begin
for opCount := 0 to hp.ops-1 do
switchOp(hp.oper[opCount],reg1,reg2);
switchOp(hp.oper[opCount]^,reg1,reg2);
end;
procedure doFirstSwitch(p: Taicpu; reg1, reg2: tregister);
procedure doFirstSwitch(p: taicpu; reg1, reg2: tsuperregister);
var
tmpRef: treference;
begin
case p.opcode of
A_MOV,A_MOVZX,A_MOVSX,A_LEA:
begin
changeOp(p.oper[1],reg1,reg2);
changeOp(p.oper[0],reg2,reg1);
changeOp(p.oper[1]^,reg1,reg2);
changeOp(p.oper[0]^,reg2,reg1);
end;
A_IMUL:
begin
p.ops := 3;
p.loadreg(2,p.oper[1].reg);
changeOp(p.oper[2],reg1,reg2);
p.loadreg(2,p.oper[1]^.reg);
changeOp(p.oper[2]^,reg1,reg2);
end;
A_INC,A_DEC:
begin
reference_reset(tmpref);
tmpref.base := reg1;
tmpref.base := newreg(R_INTREGISTER,reg1,R_SUBWHOLE);
case p.opcode of
A_INC:
tmpref.offset := 1;
@ -152,51 +145,51 @@ begin
end;
p.ops := 2;
p.opcode := A_LEA;
p.loadreg(1,reg2);
p.loadreg(1,newreg(R_INTREGISTER,reg2,R_SUBWHOLE));
p.loadref(0,tmpref);
end;
A_SUB,A_ADD:
begin
reference_reset(tmpref);
tmpref.base := reg1;
case p.oper[0].typ of
tmpref.base := newreg(R_INTREGISTER,reg1,R_SUBWHOLE);
case p.oper[0]^.typ of
top_const:
begin
tmpref.offset := p.oper[0].val;
tmpref.offset := longint(p.oper[0]^.val);
if p.opcode = A_SUB then
tmpref.offset := - tmpRef.offset;
end;
top_symbol:
tmpref.symbol := p.oper[0].sym;
tmpref.symbol := p.oper[0]^.sym;
top_reg:
begin
tmpref.index := p.oper[0].reg;
tmpref.index := p.oper[0]^.reg;
tmpref.scalefactor := 1;
end;
else internalerror(200010031);
end;
p.opcode := A_LEA;
p.loadref(0,tmpref);
p.loadreg(1,reg2);
p.loadreg(1,newreg(R_INTREGISTER,reg2,R_SUBWHOLE));
end;
A_SHL:
begin
reference_reset(tmpref);
tmpref.index := reg1;
tmpref.scalefactor := 1 shl p.oper[0].val;
tmpref.base := newreg(R_INTREGISTER,reg1,R_SUBWHOLE);
tmpref.scalefactor := 1 shl p.oper[0]^.val;
p.opcode := A_LEA;
p.loadref(0,tmpref);
p.loadreg(1,reg2);
p.loadreg(1,newreg(R_INTREGISTER,reg2,R_SUBWHOLE));
end;
else internalerror(200010032);
end;
end;
function switchRegs(asml: TAAsmoutput; reg1, reg2: tregister; start: Tai): Boolean;
function switchRegs(asml: taasmoutput; reg1, reg2: tsuperregister; start: tai): Boolean;
{ change movl %reg1,%reg2 ... bla ... to ... bla with reg1 and reg2 switched }
var
endP, hp, lastreg1,lastreg2: Tai;
endP, hp, lastreg1,lastreg2: tai;
switchDone, switchLast, tmpResult, sequenceEnd, reg1Modified, reg2Modified: boolean;
reg1StillUsed, reg2StillUsed, isInstruction: boolean;
begin
@ -217,8 +210,8 @@ begin
{ "mov %oldReg,%newReg; <operations on %newReg>; mov %newReg, }
{ %oldReg" to "<operations on %oldReg>" }
switchLast := storeBack(endP,reg1,reg2);
reg1StillUsed := reg1.enum in pTaiprop(endp.optinfo)^.usedregs;
reg2StillUsed := reg2.enum in pTaiprop(endp.optinfo)^.usedregs;
reg1StillUsed := reg1 in pTaiprop(endp.optinfo)^.usedregs;
reg2StillUsed := reg2 in pTaiprop(endp.optinfo)^.usedregs;
isInstruction := endp.typ = ait_instruction;
sequenceEnd :=
switchLast or
@ -227,13 +220,13 @@ begin
(not reg1StillUsed and not reg2StillUsed) or
{ no support for (i)div, mul and imul with hardcoded operands }
(((not isInstruction) or
noHardCodedRegs(Taicpu(endP),reg1,reg2)) and
noHardCodedRegs(taicpu(endP),reg1,reg2)) and
(not reg1StillUsed or
(isInstruction and findRegDealloc(reg1,endP) and
regLoadedWithNewValue(reg1,false,Taicpu(endP)))) and
regLoadedWithNewValue(reg1,false,taicpu(endP)))) and
(not reg2StillUsed or
(isInstruction and findRegDealloc(reg2,endP) and
regLoadedWithNewValue(reg2,false,Taicpu(endP)))));
regLoadedWithNewValue(reg2,false,taicpu(endP)))));
{ we can't switch reg1 and reg2 in something like }
{ movl %reg1,%reg2 }
@ -249,7 +242,7 @@ begin
if not reg1Modified then
begin
reg1Modified := regModifiedByInstruction(reg1,endP);
if reg1Modified and not canBeFirstSwitch(Taicpu(endP),reg1) then
if reg1Modified and not canBeFirstSwitch(taicpu(endP),reg1) then
begin
tmpResult := false;
break;
@ -264,9 +257,9 @@ begin
tmpResult :=
(endp.typ <> ait_label) and
((not isInstruction) or
(NoHardCodedRegs(Taicpu(endP),reg1,reg2) and
RegSizesOk(reg1,reg2,Taicpu(endP)) and
(Taicpu(endp).opcode <> A_JMP)));
(NoHardCodedRegs(taicpu(endP),reg1,reg2) and
RegSizesOk(reg1,reg2,taicpu(endP)) and
(taicpu(endp).opcode <> A_JMP)));
end;
end;
@ -289,33 +282,35 @@ begin
reg1Modified := regModifiedByInstruction(reg1,hp);
if reg1Modified then
begin
doFirstSwitch(Taicpu(hp),reg1,reg2);
doFirstSwitch(taicpu(hp),reg1,reg2);
switchDone := true;
end;
end;
if not switchDone then
if reg1Modified then
doSwitchReg(Taicpu(hp),reg1,reg2)
doSwitchReg(taicpu(hp),reg1,reg2)
else
doReplaceReg(Taicpu(hp),reg2,reg1);
doReplaceReg(taicpu(hp),reg2,reg1);
end;
if regininstruction(reg1.enum,hp) then
if regininstruction(reg1,hp) then
lastreg1 := hp;
if regininstruction(reg2.enum,hp) then
if regininstruction(reg2,hp) then
lastreg2 := hp;
getNextInstruction(hp,hp);
end;
if switchLast then
doSwitchReg(Taicpu(hp),reg1,reg2)
else getLastInstruction(hp,hp);
allocRegBetween(asmL,reg1,start,lastreg1);
allocRegBetween(asmL,reg2,start,lastreg2);
doSwitchReg(taicpu(hp),reg1,reg2)
else
getLastInstruction(hp,hp);
allocRegBetween(asmL,newreg(R_INTREGISTER,reg1,R_SUBWHOLE),start,lastreg1);
allocRegBetween(asmL,newreg(R_INTREGISTER,reg2,R_SUBWHOLE),start,lastreg2);
end;
end;
procedure doRenaming(asml: TAAsmoutput; first, last: Tai);
procedure doRenaming(asml: taasmoutput; first, last: tai);
var
p: Tai;
p: tai;
begin
p := First;
SkipHead(p);
@ -324,25 +319,18 @@ begin
case p.typ of
ait_instruction:
begin
case Taicpu(p).opcode of
case taicpu(p).opcode of
A_MOV:
begin
if not(pTaiprop(p.optinfo)^.canBeRemoved) and
(Taicpu(p).oper[0].typ = top_reg) and
(Taicpu(p).oper[1].typ = top_reg) and
(Taicpu(p).opsize = S_L) and
{ (Taicpu(p).oper[0].reg.enum in (rg.usableregsint+[R_EDI])) and
(Taicpu(p).oper[1].reg.enum in (rg.usableregsint+[R_EDI])) then}
(Taicpu(p).oper[0].reg.enum in ([R_EDI])) and
(Taicpu(p).oper[1].reg.enum in ([R_EDI])) then
if switchRegs(asml,Taicpu(p).oper[0].reg,
Taicpu(p).oper[1].reg,p) then
(taicpu(p).oper[0]^.typ = top_reg) and
(taicpu(p).oper[1]^.typ = top_reg) and
(taicpu(p).opsize = S_L) and
(getsupreg(taicpu(p).oper[0]^.reg) in ([RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI])) and
(getsupreg(taicpu(p).oper[1]^.reg) in ([RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI])) then
if switchRegs(asml,getsupreg(taicpu(p).oper[0]^.reg),
getsupreg(taicpu(p).oper[1]^.reg),p) then
begin
{ getnextinstruction(p,hp);
asmL^.remove(p);
dispose(p,done);
p := hp;
continue }
pTaiprop(p.optinfo)^.canBeRemoved := true;
end;
end;
@ -358,7 +346,14 @@ End.
{
$Log$
Revision 1.22 2003-06-03 21:09:05 peter
Revision 1.23 2003-11-22 00:40:19 jonas
* fixed optimiser so it compiles again
* fixed several bugs which were in there already for a long time, but
which only popped up now :) -O2/-O3 will now optimise less than in
the past (and correctly so), but -O2u/-O3u will optimise a bit more
* some more small improvements for -O3 are still possible
Revision 1.22 2003/06/03 21:09:05 peter
* internal changeregsize for optimizer
* fix with a hack to not remove the first instruction of a block
which will leave blockstart pointing to invalid memory

View File

@ -24,7 +24,6 @@ unit psub;
{$i fpcdefs.inc}
{$define NOOPT}
interface
@ -752,14 +751,6 @@ implementation
if not(cs_no_regalloc in aktglobalswitches) then
begin
cg.do_register_allocation(aktproccode,headertai);
(*
{$ifndef NoOpt}
if (cs_optimize in aktglobalswitches) and
{ do not optimize pure assembler procedures }
not(pi_is_assembler in current_procinfo.flags) then
optimize(aktproccode);
{$endif NoOpt}
*)
end;
{ Add save and restore of used registers }
@ -776,6 +767,17 @@ implementation
aktfilepos:=exitpos;
gen_stackfree_code(templist,usesacc,usesacchi);
aktproccode.concatlist(templist);
{$ifndef NoOpt}
if not(cs_no_regalloc in aktglobalswitches) then
begin
if (cs_optimize in aktglobalswitches) and
{ do not optimize pure assembler procedures }
not(pi_is_assembler in current_procinfo.flags) then
optimize(aktproccode);
end;
{$endif NoOpt}
{ Add end symbol and debug info }
aktfilepos:=exitpos;
gen_proc_symbol_end(templist);
@ -1312,7 +1314,14 @@ implementation
end.
{
$Log$
Revision 1.171 2003-11-10 22:02:52 peter
Revision 1.172 2003-11-22 00:40:19 jonas
* fixed optimiser so it compiles again
* fixed several bugs which were in there already for a long time, but
which only popped up now :) -O2/-O3 will now optimise less than in
the past (and correctly so), but -O2u/-O3u will optimise a bit more
* some more small improvements for -O3 are still possible
Revision 1.171 2003/11/10 22:02:52 peter
* cross unit inlining fixed
Revision 1.170 2003/11/07 15:58:32 florian