+ register renaming ("fixes" bug1088)

* changed command line options meanings for optimizer:
      O2 now means peepholopts, CSE and register renaming in 1 pass
      O3 is the same, but repeated until no further optimizations are
        possible or until 5 passes have been done (to avoid endless loops)
  * changed aopt386 so it does this looping
  * added some procedures from csopt386 to the interface because they're
    used by rropt386 as well
  * some changes to csopt386 and daopt386 so that newly added instructions
    by the CSE get optimizer info (they were simply skipped previously),
    this fixes some bugs
This commit is contained in:
Jonas Maebe 2000-10-24 10:40:52 +00:00
parent 5aa1face10
commit a4fde73649
9 changed files with 1151 additions and 605 deletions

View File

@ -1870,7 +1870,7 @@ option_help_pages=11025_[
3*2Ou_enable uncertain optimizations (see docs)
3*2O1_level 1 optimizations (quick optimizations)
3*2O2_level 2 optimizations (-O1 + slower optimizations)
3*2O3_level 3 optimizations (same as -O2u)
3*2O3_level 3 optimizations (-O2 repeatedly, max 5 times)
3*2Op<x>_target processor:
3*3Op1_set target processor to 386/486
3*3Op2_set target processor to Pentium/PentiumMMX (tm)

View File

@ -43,15 +43,20 @@ Uses
Procedure Optimize(AsmL: PAasmOutput);
Var
count, max: longint;
BlockStart, BlockEnd, HP: Pai;
pass: longint;
slowopt, changed, lastLoop: boolean;
Begin
if (cs_slowoptimize in aktglobalswitches) then
{ Optimize twice }
max := 2
else max := 1;
for count := 1 to max do
begin
slowopt := (cs_slowoptimize in aktglobalswitches);
pass := 0;
changed := false;
repeat
lastLoop :=
not(slowopt) or
(not changed and (pass > 2)) or
{ prevent endless loops }
(pass = 4);
changed := false;
{ Setup labeltable, always necessary }
BlockStart := Pai(AsmL^.First);
BlockEnd := DFAPass1(AsmL, BlockStart);
@ -59,13 +64,15 @@ Begin
{ or nil }
While Assigned(BlockStart) Do
Begin
if pass = 0 then
PrePeepHoleOpts(AsmL, BlockStart, BlockEnd);
{ Peephole optimizations }
PeepHoleOptPass1(AsmL, BlockStart, BlockEnd);
{ Only perform them twice in the first pass }
if count = 1 then
if pass = 0 then
PeepHoleOptPass1(AsmL, BlockStart, BlockEnd);
{ Data flow analyzer }
If (cs_slowoptimize in aktglobalswitches) Then
If (cs_fastoptimize in aktglobalswitches) Then
Begin
If DFAPass2(
{$ifdef statedebug}
@ -73,10 +80,12 @@ Begin
{$endif statedebug}
BlockStart, BlockEnd) Then
{ common subexpression elimination }
CSE(AsmL, BlockStart, BlockEnd);
changed := CSE(asmL, blockStart, blockEnd, pass) or changed;
End;
{ More peephole optimizations }
PeepHoleOptPass2(AsmL, BlockStart, BlockEnd);
if lastLoop then
PostPeepHoleOpts(AsmL, BlockStart, BlockEnd);
{ Dispose labeltabel }
ShutDownDFA;
{ Continue where we left off, BlockEnd is either the start of an }
@ -100,15 +109,29 @@ Begin
BlockEnd := DFAPass1(AsmL, BlockStart)
{ Otherwise, skip the next assembler block }
Else BlockStart := HP;
End
End;
end;
End;
End;
inc(pass);
until lastLoop;
End;
End.
{
$Log$
Revision 1.1 2000-10-15 09:47:42 peter
Revision 1.2 2000-10-24 10:40:53 jonas
+ register renaming ("fixes" bug1088)
* changed command line options meanings for optimizer:
O2 now means peepholopts, CSE and register renaming in 1 pass
O3 is the same, but repeated until no further optimizations are
possible or until 5 passes have been done (to avoid endless loops)
* changed aopt386 so it does this looping
* added some procedures from csopt386 to the interface because they're
used by rropt386 as well
* some changes to csopt386 and daopt386 so that newly added instructions
by the CSE get optimizer info (they were simply skipped previously),
this fixes some bugs
Revision 1.1 2000/10/15 09:47:42 peter
* moved to i386/
Revision 1.5 2000/09/24 15:06:11 peter

View File

@ -27,16 +27,21 @@ Unit CSOpt386;
Interface
Uses aasm;
Uses aasm, cpubase, cpuasm;
{Procedure CSOpt386(First, Last: Pai);}
Procedure CSE(AsmL: PAasmOutput; First, Last: Pai);
function CSE(asmL: paasmoutput; first, last: pai; pass: longint): boolean;
function doReplaceReg(hp: paicpu; newReg, orgReg: tregister): boolean;
function changeOp(var o: toper; newReg, orgReg: tregister): boolean;
function storeBack(p1: pai; orgReg, newReg: tregister): boolean;
function NoHardCodedRegs(p: paicpu; orgReg, newReg: TRegister): boolean;
function RegSizesOK(oldReg,newReg: TRegister; p: paicpu): boolean;
Implementation
Uses
{$ifdef replaceregdebug}cutils,{$endif}
verbose, hcodegen, globals,cpubase,cpuasm,DAOpt386, tgeni386;
globtype, verbose, hcodegen, globals, daopt386, tgeni386, rropt386;
{
Function PaiInSequence(P: Pai; Const Seq: TContent): Boolean;
@ -83,7 +88,7 @@ begin
end;
end
else
if is_reg_var[reg32(p^.oper[1].reg)] then
{ if is_reg_var[reg32(p^.oper[1].reg)] then }
for regCounter := R_EAX to R_EDI do
begin
if writeDestroysContents(p^.oper[1],regCounter,c[regCounter]) then
@ -130,9 +135,9 @@ begin
for opCount := 1 to MaxCh do
case InsProp[p^.opcode].Ch[opCount] of
Ch_MOp1,CH_WOp1,CH_RWOp1:
if (p^.oper[0].typ = top_ref) or
((p^.oper[0].typ = top_reg) and
is_reg_var[reg32(p^.oper[0].reg)]) then
{ if (p^.oper[0].typ = top_ref) or }
{ ((p^.oper[0].typ = top_reg) and }
{ is_reg_var[reg32(p^.oper[0].reg)]) then }
for regCounter := R_EAX to R_EDI do
if writeDestroysContents(p^.oper[0],regCounter,c[regCounter]) then
begin
@ -140,9 +145,9 @@ begin
modifiesConflictingMemLocation := not(reg in regsStillValid);
end;
Ch_MOp2,CH_WOp2,CH_RWOp2:
if (p^.oper[1].typ = top_ref) or
((p^.oper[1].typ = top_reg) and
is_reg_var[reg32(p^.oper[1].reg)]) then
{ if (p^.oper[1].typ = top_ref) or }
{ ((p^.oper[1].typ = top_reg) and }
{ is_reg_var[reg32(p^.oper[1].reg)]) then }
for regCounter := R_EAX to R_EDI do
if writeDestroysContents(p^.oper[1],regCounter,c[regCounter]) then
begin
@ -150,9 +155,9 @@ begin
modifiesConflictingMemLocation := not(reg in regsStillValid);
end;
Ch_MOp3,CH_WOp3,CH_RWOp3:
if (p^.oper[2].typ = top_ref) or
((p^.oper[2].typ = top_reg) and
is_reg_var[reg32(p^.oper[2].reg)]) then
{ if (p^.oper[2].typ = top_ref) or }
{ ((p^.oper[2].typ = top_reg) and }
{ is_reg_var[reg32(p^.oper[2].reg)]) then }
for regCounter := R_EAX to R_EDI do
if writeDestroysContents(p^.oper[2],regCounter,c[regCounter]) then
begin
@ -290,7 +295,7 @@ end;
Found holds the number of instructions between StartMod and EndMod and false
is returned}
Function CheckSequence(p: Pai; var prev: pai; Reg: TRegister; Var Found: Longint;
Var RegInfo: TRegInfo): Boolean;
Var RegInfo: TRegInfo; findPrevSeqs: boolean): Boolean;
const
checkingPrevSequences: boolean = false;
@ -310,7 +315,8 @@ var
in [con_ref,con_noRemoveRef]);
if currentReg > R_EDI then
begin
if isSimpleMemLoc(paicpu(p)^.oper[0].ref^) then
if (paicpu(p)^.oper[0].typ <> top_ref) or
isSimpleMemLoc(paicpu(p)^.oper[0].ref^) then
begin
checkingPrevSequences := true;
passedJump := false;
@ -321,15 +327,19 @@ var
else getNextRegToTest := currentReg;
end;
if checkingPrevSequences then
getNextRegToTest :=
getPrevSequence(p,reg,prev,prev,passedJump,regsNotRead,RegsStillValid);
if findPrevSeqs then
getNextRegToTest :=
getPrevSequence(p,reg,prev,prev,passedJump,regsNotRead,RegsStillValid)
else
getNextRegToTest := R_NO;
end;
Var hp2, hp3{, EndMod},highPrev, orgPrev: Pai;
{Cnt,} OldNrOfMods: Longint;
startRegInfo, OrgRegInfo, HighRegInfo: TRegInfo;
regModified: array[R_NO..R_EDI] of boolean;
HighFound, OrgRegFound: Byte;
RegCounter, regCounter2: TRegister;
RegCounter, regCounter2, tmpreg: TRegister;
OrgRegResult: Boolean;
TmpResult: Boolean;
{TmpState: Byte;}
@ -356,6 +366,7 @@ Begin {CheckSequence}
regCounter := getNextRegToTest(prev,R_NO);
While (RegCounter <> R_NO) Do
Begin
fillchar(regModified,sizeof(regModified),0);
regInfo := startRegInfo;
Found := 0;
hp2 := PPaiProp(prev^.OptInfo)^.Regs[RegCounter].StartMod;
@ -371,13 +382,34 @@ Begin {CheckSequence}
((paicpu(hp3)^.opcode = A_MOV) or
(paicpu(hp3)^.opcode = A_MOVZX) or
(paicpu(hp3)^.opcode = A_MOVSX)) and
(paicpu(hp3)^.oper[0].typ in
[top_const,top_ref,top_symbol]) and
(paicpu(hp3)^.oper[1].typ = top_reg) and
not(regInRef(reg32(paicpu(hp3)^.oper[1].reg),
paicpu(hp3)^.oper[0].ref^)) then
regInfo.lastReload
[reg32(paicpu(hp3)^.oper[1].reg)] := hp3;
not(regInOp(paicpu(hp3)^.oper[1].reg,
paicpu(hp3)^.oper[0])) then
begin
tmpreg := reg32(paicpu(hp3)^.oper[1].reg);
regInfo.lastReload[tmpreg] := hp3;
case paicpu(hp3)^.oper[0].typ of
top_ref:
begin
if regModified[reg32(paicpu(hp3)^.oper[0].ref^.base)] then
with ppaiprop(hp3^.optinfo)^.regs[tmpreg] do
if nrOfMods > (oldNrOfMods - found) then
oldNrOfMods := found + nrOfMods;
if regModified[reg32(paicpu(hp3)^.oper[0].ref^.index)] then
with ppaiprop(hp3^.optinfo)^.regs[tmpreg] do
if nrOfMods > (oldNrOfMods - found) then
oldNrOfMods := found + nrOfMods;
end;
top_reg:
if regModified[reg32(paicpu(hp3)^.oper[0].reg)] then
with ppaiprop(hp3^.optinfo)^.regs[tmpreg] do
if nrOfMods > (oldNrOfMods - found) then
oldNrOfMods := found + nrOfMods;
end;
end;
for regCounter2 := R_EAX to R_EDI do
regModified[regCounter2] := regModified[regCounter2] or
regModifiedByInstruction(regCounter2,hp3);
GetNextInstruction(hp2, hp2);
GetNextInstruction(hp3, hp3);
Inc(Found)
@ -674,40 +706,6 @@ begin
end;
function FindRegDealloc(reg: tregister; p: pai): boolean;
{ assumes reg is a 32bit register }
var
hp: pai;
first: boolean;
begin
findregdealloc := false;
first := true;
while assigned(p^.previous) and
((Pai(p^.previous)^.typ in (skipinstr+[ait_align])) or
((Pai(p^.previous)^.typ = ait_label) and
labelCanBeSkipped(pai_label(p^.previous)))) do
begin
p := pai(p^.previous);
if (p^.typ = ait_regalloc) and
(pairegalloc(p)^.reg = reg) then
if not(pairegalloc(p)^.allocation) then
if first then
begin
findregdealloc := true;
break;
end
else
begin
findRegDealloc :=
getNextInstruction(p,hp) and
regLoadedWithNewValue(reg,false,hp);
break
end
else
first := false;
end
end;
Procedure ClearRegContentsFrom(reg: TRegister; p, endP: pai);
{ first clears the contents of reg from p till endP. Then the contents are }
{ cleared until the first instruction that changes reg }
@ -753,7 +751,7 @@ begin
{$endif replaceregdebug}
end;
function NoHardCodedRegs(p: paicpu; orgReg, newReg: tRegister): boolean;
function NoHardCodedRegs(p: paicpu; orgReg, newReg: TRegister): boolean;
var chCount: byte;
begin
case p^.opcode of
@ -775,7 +773,7 @@ begin
end;
end;
function ChangeReg(var Reg: TRegister; orgReg, newReg: TRegister): boolean;
function ChangeReg(var Reg: TRegister; newReg, orgReg: TRegister): boolean;
begin
changeReg := true;
if reg = newReg then
@ -787,15 +785,15 @@ begin
else changeReg := false;
end;
function changeOp(var o: toper; orgReg, newReg: tregister): boolean;
function changeOp(var o: toper; newReg, orgReg: tregister): boolean;
begin
case o.typ of
top_reg: changeOp := changeReg(o.reg,orgReg,newReg);
top_reg: changeOp := changeReg(o.reg,newReg,orgReg);
top_ref:
begin
changeOp :=
changeReg(o.ref^.base,orgReg,newReg) or
changeReg(o.ref^.index,orgReg,newReg);
changeReg(o.ref^.base,newReg,orgReg) or
changeReg(o.ref^.index,newReg,orgReg);
end;
end;
end;
@ -829,14 +827,14 @@ begin
end;
end;
function doReplaceReg(orgReg,newReg: tregister; hp: paicpu): boolean;
function doReplaceReg(hp: paicpu; newReg, orgReg: tregister): boolean;
var
opCount: byte;
opCount: longint;
tmpResult: boolean;
begin
for opCount := 0 to 2 do
for opCount := 0 to hp^.ops-1 do
tmpResult :=
changeOp(hp^.oper[opCount],orgReg,newReg) or tmpResult;
changeOp(hp^.oper[opCount],newReg,orgReg) or tmpResult;
doReplaceReg := tmpResult;
end;
@ -858,7 +856,7 @@ begin
end;
end;
function doReplaceReadReg(orgReg,newReg: tregister; p: paicpu): boolean;
function doReplaceReadReg(p: paicpu; newReg,orgReg: tregister): boolean;
var opCount: byte;
begin
doReplaceReadReg := false;
@ -870,13 +868,13 @@ begin
1: internalerror(1301001);
2,3:
begin
if changeOp(p^.oper[0],orgReg,newReg) then
if changeOp(p^.oper[0],newReg,orgReg) then
begin
{ updateStates(orgReg,newReg,p,false);}
doReplaceReadReg := true;
end;
if p^.ops = 3 then
if changeOp(p^.oper[1],orgReg,newReg) then
if changeOp(p^.oper[1],newReg,orgReg) then
begin
{ updateStates(orgReg,newReg,p,false);}
doReplaceReadReg := true;
@ -889,7 +887,7 @@ begin
begin
for opCount := 0 to 2 do
if p^.oper[opCount].typ = top_ref then
if changeOp(p^.oper[opCount],orgReg,newReg) then
if changeOp(p^.oper[opCount],newReg,orgReg) then
begin
{ updateStates(orgReg,newReg,p,false);}
doReplaceReadReg := true;
@ -898,21 +896,21 @@ begin
case InsProp[p^.opcode].Ch[opCount] of
Ch_ROp1:
if p^.oper[0].typ = top_reg then
if changeReg(p^.oper[0].reg,orgReg,newReg) then
if changeReg(p^.oper[0].reg,newReg,orgReg) then
begin
{ updateStates(orgReg,newReg,p,false);}
doReplaceReadReg := true;
end;
Ch_ROp2:
if p^.oper[1].typ = top_reg then
if changeReg(p^.oper[1].reg,orgReg,newReg) then
if changeReg(p^.oper[1].reg,newReg,orgReg) then
begin
{ updateStates(orgReg,newReg,p,false);}
doReplaceReadReg := true;
end;
Ch_ROp3:
if p^.oper[2].typ = top_reg then
if changeReg(p^.oper[2].reg,orgReg,newReg) then
if changeReg(p^.oper[2].reg,newReg,orgReg) then
begin
{ updateStates(orgReg,newReg,p,false);}
doReplaceReadReg := true;
@ -998,6 +996,20 @@ begin
end;
function storeBack(p1: pai; orgReg, newReg: tregister): boolean;
{ returns true if p1 contains an instruction that stores the contents }
{ of newReg back to orgReg }
begin
storeBack :=
(p1^.typ = ait_instruction) and
(paicpu(p1)^.opcode = A_MOV) and
(paicpu(p1)^.oper[0].typ = top_reg) and
(paicpu(p1)^.oper[0].reg = newReg) and
(paicpu(p1)^.oper[1].typ = top_reg) and
(paicpu(p1)^.oper[1].reg = orgReg);
end;
function ReplaceReg(asmL: PaasmOutput; orgReg, newReg: TRegister; p: pai;
const c: TContent; orgRegCanBeModified: Boolean;
var returnEndP: pai): Boolean;
@ -1012,18 +1024,6 @@ var endP, hp: Pai;
removeLast, sequenceEnd, tmpResult, newRegModified, orgRegRead,
stateChanged, readStateChanged: Boolean;
function storeBack(p1: pai): boolean;
{ returns true if p1 contains an instruction that stores the contents }
{ of newReg back to orgReg }
begin
storeBack :=
(p1^.typ = ait_instruction) and
(paicpu(p1)^.opcode = A_MOV) and
(paicpu(p1)^.oper[0].typ = top_reg) and
(paicpu(p1)^.oper[0].reg = newReg) and
(paicpu(p1)^.oper[1].typ = top_reg) and
(paicpu(p1)^.oper[1].reg = orgReg);
end;
begin
ReplaceReg := false;
@ -1055,7 +1055,7 @@ begin
{ if the newReg gets stored back to the oldReg, we can change }
{ "mov %oldReg,%newReg; <operations on %newReg>; mov %newReg, }
{ %oldReg" to "<operations on %oldReg>" }
removeLast := storeBack(endP);
removeLast := storeBack(endP, orgReg, newReg);
sequenceEnd :=
{ no support for (i)div, mul and imul with hardcoded operands }
(noHardCodedRegs(paicpu(endP),orgReg,newReg) and
@ -1144,14 +1144,14 @@ begin
if {not(PPaiProp(hp^.optInfo)^.canBeRemoved) and }
(hp^.typ = ait_instruction) then
stateChanged :=
doReplaceReg(orgReg,newReg,paicpu(hp)) or stateChanged;
doReplaceReg(paicpu(hp),newReg,orgReg) or stateChanged;
if stateChanged then
updateStates(orgReg,newReg,hp,true);
getNextInstruction(hp,hp)
end;
if assigned(endp) and (endp^.typ = ait_instruction) then
readStateChanged :=
DoReplaceReadReg(orgReg,newReg,paicpu(endP));
DoReplaceReadReg(paicpu(endP),newReg,orgReg);
if stateChanged or readStateChanged then
updateStates(orgReg,newReg,endP,stateChanged);
@ -1271,11 +1271,11 @@ begin
end;
Procedure DoCSE(AsmL: PAasmOutput; First, Last: Pai);
procedure DoCSE(AsmL: PAasmOutput; First, Last: Pai; findPrevSeqs, doSubOpts: boolean);
{marks the instructions that can be removed by RemoveInstructs. They're not
removed immediately because sometimes an instruction needs to be checked in
two different sequences}
var cnt, cnt2, cnt3: longint;
var cnt, cnt2, orgNrOfMods: longint;
p, hp1, hp2, prevSeq, prevSeq_next: Pai;
hp3, hp4: pai;
hp5 : pai;
@ -1284,7 +1284,6 @@ var cnt, cnt2, cnt3: longint;
Begin
p := First;
SkipHead(p);
First := p;
While (p <> Last) Do
Begin
Case p^.typ Of
@ -1302,11 +1301,19 @@ Begin
PPaiProp(Pai(p)^.OptInfo)^.CanBeRemoved := True;
A_MOV, A_MOVZX, A_MOVSX:
Begin
hp2 := p;
Case Paicpu(p)^.oper[0].typ Of
Top_Ref:
Begin {destination is always a register in this case}
top_ref, top_reg:
if (paicpu(p)^.oper[1].typ = top_reg) then
Begin
With PPaiProp(p^.OptInfo)^.Regs[Reg32(Paicpu(p)^.oper[1].reg)] Do
Begin
if assigned(startmod) and
(startmod = p)then
orgNrOfMods := ppaiprop(startmod^.optinfo)^.
regs[reg32(paicpu(p)^.oper[1].reg)].nrOfMods
else
orgNrOfMods := 0;
If (p = StartMod) And
GetLastInstruction (p, hp1) And
(hp1^.typ <> ait_marker) Then
@ -1317,7 +1324,7 @@ Begin
'cse checking '+att_reg2str[Reg32(Paicpu(p)^.oper[1].reg)])));
insertLLItem(asml,p,p^.next,hp5);
{$endif csdebug}
If CheckSequence(p,prevSeq,Paicpu(p)^.oper[1].reg, Cnt, RegInfo) And
If CheckSequence(p,prevSeq,Paicpu(p)^.oper[1].reg, Cnt, RegInfo, findPrevSeqs) And
(Cnt > 0) Then
Begin
hp1 := nil;
@ -1336,19 +1343,17 @@ Begin
{ movl 16(%ebp), %eax }
{ movl 8(%edx), %edx }
{ movl 4(%eax), eax }
hp2 := p;
Cnt2 := 1;
While Cnt2 <= Cnt Do
Begin
If Not(RegInInstruction(Paicpu(hp2)^.oper[1].reg, p)) then
(* If not(regInInstruction(Paicpu(hp2)^.oper[1].reg, p)) and
not(ppaiprop(p^.optinfo)^.canBeRemoved) then
begin
if ((p^.typ = ait_instruction) And
((paicpu(p)^.OpCode = A_MOV) or
(paicpu(p)^.opcode = A_MOVZX) or
(paicpu(p)^.opcode = A_MOVSX)) And
(paicpu(p)^.Oper[0].typ in
[top_const,top_ref,top_symbol])) and
(paicpu(p)^.oper[1].typ = top_reg) then
if (p^.typ = ait_instruction) And
((paicpu(p)^.OpCode = A_MOV) or
(paicpu(p)^.opcode = A_MOVZX) or
(paicpu(p)^.opcode = A_MOVSX)) And
(paicpu(p)^.oper[1].typ = top_reg) then
begin
regCounter := reg32(paicpu(p)^.oper[1].reg);
if (regCounter in reginfo.regsStillUsedAfterSeq) then
@ -1369,9 +1374,9 @@ Begin
end
{$endif noremove}
end
end
end *)
{$ifndef noremove}
else
{ else }
PPaiProp(p^.OptInfo)^.CanBeRemoved := True
{$endif noremove}
; Inc(Cnt2);
@ -1397,15 +1402,18 @@ Begin
(RegInfo.New2OldReg[RegCounter] <> R_NO) Then
Begin
AllocRegBetween(AsmL,RegInfo.New2OldReg[RegCounter],
PPaiProp(prevSeq^.OptInfo)^.Regs[RegInfo.New2OldReg[RegCounter]].StartMod,prevSeq_next);
PPaiProp(prevSeq^.OptInfo)^.Regs[RegInfo.New2OldReg[RegCounter]].StartMod,hp2);
if hp4 <> prevSeq then
begin
if assigned(reginfo.lastReload[regCounter]) then
getLastInstruction(reginfo.lastReload[regCounter],hp3)
else if assigned(reginfo.lastReload[regInfo.New2OldReg[regCounter]]) then
getLastInstruction(reginfo.lastReload[regInfo.new2OldReg[regCounter]],hp3)
else hp3 := hp4;
if prevSeq <> hp3 then
if prevSeq_next <> hp3 then
clearRegContentsFrom(regCounter,prevSeq_next,
hp3);
getnextInstruction(hp3,hp3);
allocRegBetween(asmL,regCounter,prevSeq,hp3);
end;
If Not(RegCounter In RegInfo.RegsLoadedForRef) And
@ -1421,20 +1429,23 @@ Begin
begin
hp3 := New(Pai_Marker,Init(NoPropInfoEnd));
InsertLLItem(AsmL, prevSeq, Pai(prevSeq^.next), hp3);
hp3 := New(Paicpu,Op_Reg_Reg(A_MOV, S_L,
hp5 := New(Paicpu,Op_Reg_Reg(A_MOV, S_L,
{old reg new reg}
RegInfo.New2OldReg[RegCounter], RegCounter));
InsertLLItem(AsmL, prevSeq, Pai(prevSeq^.next), hp3);
new(ppaiprop(hp5^.optinfo));
ppaiprop(hp5^.optinfo)^ := ppaiprop(prevSeq_next^.optinfo)^;
ppaiprop(hp5^.optinfo)^.canBeRemoved := false;
InsertLLItem(AsmL, prevSeq, Pai(prevSeq^.next), hp5);
hp3 := New(Pai_Marker,Init(NoPropInfoStart));
InsertLLItem(AsmL, prevSeq, Pai(prevSeq^.next), hp3);
{ adjusts states in previous instruction so that it will }
{ definitely be different from the previous or next state }
incstate(ppaiprop(prevSeq_next^.optinfo)^.
incstate(ppaiprop(hp5^.optinfo)^.
regs[RegInfo.New2OldReg[RegCounter]].rstate,20);
incstate(ppaiprop(prevSeq_next^.optinfo)^.
incstate(ppaiprop(hp5^.optinfo)^.
regs[regCounter].wstate,20);
updateState(RegInfo.New2OldReg[RegCounter],
prevSeq_next);
hp5);
end
End
Else
@ -1479,7 +1490,7 @@ Begin
(PPaiProp(p^.OptInfo)^.CanBeRemoved) Then
if (cnt > 0) then
begin
hp2 := p;
p := hp2;
Cnt2 := 1;
While Cnt2 <= Cnt Do
Begin
@ -1509,38 +1520,38 @@ Begin
end;
End;
End;
if not ppaiprop(p^.optinfo)^.canBeRemoved and
not regInRef(reg32(paicpu(p)^.oper[1].reg),
paicpu(p)^.oper[0].ref^) then
removePrevNotUsedLoad(p,reg32(paicpu(p)^.oper[1].reg),false);
End;
top_Reg:
{ try to replace the new reg with the old reg }
if not(PPaiProp(p^.optInfo)^.canBeRemoved) and
{ only remove if we're not storing something in a regvar }
(paicpu(p)^.oper[1].reg in (usableregs+[R_EDI])) and
(paicpu(p)^.opcode = A_MOV) and
getLastInstruction(p,hp4) then
begin
case paicpu(p)^.oper[1].typ of
top_Reg:
{ we only have to start replacing from the instruction after the mov, }
{ but replacereg only starts with getnextinstruction(p,p) }
if ReplaceReg(asmL,paicpu(p)^.oper[0].reg,
paicpu(p)^.oper[1].reg,p,
PPaiProp(hp4^.optInfo)^.Regs[paicpu(p)^.oper[1].reg],false,hp1) then
begin
PPaiProp(p^.optInfo)^.canBeRemoved := true;
allocRegBetween(asmL,paicpu(p)^.oper[0].reg,
PPaiProp(p^.optInfo)^.regs[paicpu(p)^.oper[0].reg].startMod,
hp1);
end
else
if reg32(paicpu(p)^.oper[0].reg) <> reg32(paicpu(p)^.oper[1].reg) then
removePrevNotUsedLoad(p,reg32(paicpu(p)^.oper[1].reg),false);
if not(PPaiProp(p^.optInfo)^.canBeRemoved) then
if (paicpu(p)^.oper[0].typ = top_reg) and
(paicpu(p)^.oper[1].typ = top_reg) and
{ only remove if we're not storing something in a regvar }
(paicpu(p)^.oper[1].reg in (usableregs+[R_EDI])) and
(paicpu(p)^.opcode = A_MOV) and
getLastInstruction(p,hp4) and
{ we only have to start replacing from the instruction after the mov, }
{ but replacereg only starts with getnextinstruction(p,p) }
replaceReg(asmL,paicpu(p)^.oper[0].reg,
paicpu(p)^.oper[1].reg,p,
ppaiprop(hp4^.optInfo)^.regs[paicpu(p)^.oper[1].reg],false,hp1) then
begin
ppaiprop(p^.optInfo)^.canBeRemoved := true;
allocRegBetween(asmL,paicpu(p)^.oper[0].reg,
ppaiProp(p^.optInfo)^.regs[paicpu(p)^.oper[0].reg].startMod,hp1);
end
end;
else
if (paicpu(p)^.oper[1].typ = top_reg) and
not regInOp(paicpu(p)^.oper[1].reg,paicpu(p)^.oper[0]) then
removePrevNotUsedLoad(p,reg32(paicpu(p)^.oper[1].reg),false);
{ at first, only try optimizations of large blocks, because doing }
{ doing smaller ones may prevent bigger ones from completing in }
{ in the next pass }
if not doSubOpts and (orgNrOfMods <> 0) then
begin
p := hp2;
for cnt := 1 to pred(orgNrOfMods) do
getNextInstruction(p,p);
end;
End;
top_symbol,Top_Const:
Begin
Case Paicpu(p)^.oper[1].typ Of
@ -1584,21 +1595,48 @@ Begin
End;
End;
Procedure RemoveInstructs(AsmL: PAasmOutput; First, Last: Pai);
function removeInstructs(asmL: paasmoutput; first, last: pai): boolean;
{ Removes the marked instructions and disposes the PPaiProps of the other }
{ instructions }
Var p, hp1: Pai;
begin
removeInstructs := false;
p := First;
While (p <> Last) Do
Begin
If (p^.typ = ait_marker) and
(pai_marker(p)^.kind in [noPropInfoStart,noPropInfoEnd]) then
(pai_marker(p)^.kind = noPropInfoStart) then
begin
hp1 := pai(p^.next);
asmL^.remove(p);
dispose(p,done);
p := hp1
while not((hp1^.typ = ait_marker) and
(pai_marker(p)^.kind = noPropInfoEnd)) do
begin
p := pai(hp1^.next);
{$ifndef noinstremove}
{ allocregbetween can insert new ait_regalloc objects }
{ without optinfo }
if assigned(hp1^.optinfo) then
if ppaiprop(hp1^.optinfo)^.canBeRemoved then
begin
dispose(ppaiprop(hp1^.optinfo));
hp1^.optinfo := nil;
asmL^.remove(hp1);
dispose(hp1,done);
hp1 := p;
end
else
{$endif noinstremove}
begin
dispose(ppaiprop(hp1^.optinfo));
hp1^.optinfo := nil;
end;
hp1 := p;
end;
p := pai(hp1^.next);
asmL^.remove(hp1);
dispose(hp1,done);
end
else
{$ifndef noinstremove}
@ -1609,6 +1647,7 @@ begin
AsmL^.Remove(p);
Dispose(p, Done);
p := hp1;
removeInstructs := true;
End
Else
{$endif noinstremove}
@ -1617,20 +1656,37 @@ begin
p := pai(p^.next);;
End;
End;
FreeMem(PaiPropBlock, NrOfPaiObjs*(((SizeOf(TPaiProp)+3)div 4)*4))
FreeMem(PaiPropBlock, NrOfPaiObjs*SizeOf(TPaiProp))
End;
Procedure CSE(AsmL: PAasmOutput; First, Last: Pai);
function CSE(AsmL: PAasmOutput; First, Last: Pai; pass: longint): boolean;
Begin
DoCSE(AsmL, First, Last);
RemoveInstructs(AsmL, First, Last);
DoCSE(AsmL, First, Last, not(cs_slowoptimize in aktglobalswitches) or (pass >= 2),
not(cs_slowoptimize in aktglobalswitches) or (pass >= 1));
{ register renaming }
if not(cs_slowoptimize in aktglobalswitches) or (pass > 0) then
doRenaming(asmL, first, last);
cse := removeInstructs(asmL, first, last);
End;
End.
{
$Log$
Revision 1.1 2000-10-15 09:47:43 peter
Revision 1.2 2000-10-24 10:40:53 jonas
+ register renaming ("fixes" bug1088)
* changed command line options meanings for optimizer:
O2 now means peepholopts, CSE and register renaming in 1 pass
O3 is the same, but repeated until no further optimizations are
possible or until 5 passes have been done (to avoid endless loops)
* changed aopt386 so it does this looping
* added some procedures from csopt386 to the interface because they're
used by rropt386 as well
* some changes to csopt386 and daopt386 so that newly added instructions
by the CSE get optimizer info (they were simply skipped previously),
this fixes some bugs
Revision 1.1 2000/10/15 09:47:43 peter
* moved to i386/
Revision 1.14 2000/09/30 13:07:23 jonas

View File

@ -183,6 +183,7 @@ Function regLoadedWithNewValue(reg: tregister; canDependOnPrevValue: boolean;
hp: pai): boolean;
Procedure UpdateUsedRegs(Var UsedRegs: TRegSet; p: Pai);
Procedure AllocRegBetween(AsmL: PAasmOutput; Reg: TRegister; p1, p2: Pai);
function FindRegDealloc(reg: tregister; p: pai): boolean;
Function RegsEquivalent(OldReg, NewReg: TRegister; Var RegInfo: TRegInfo; OpAct: TopAction): Boolean;
Function InstructionsEquivalent(p1, p2: Pai; Var RegInfo: TRegInfo): Boolean;
@ -720,15 +721,14 @@ Begin
End
Else Regsequivalent := False
Else
If Not(Reg32(NewReg) in NewRegsEncountered) and
((OpAct = OpAct_Write) or
((newReg = oldReg) and
not(newReg in usableregs + [R_EDI]))) Then
Begin
AddReg2RegInfo(OldReg, NewReg, RegInfo);
RegsEquivalent := True
End
Else RegsEquivalent := False
If Not(Reg32(NewReg) in NewRegsEncountered) and
((OpAct = OpAct_Write) or
(newReg = oldReg)) Then
Begin
AddReg2RegInfo(OldReg, NewReg, RegInfo);
RegsEquivalent := True
End
Else RegsEquivalent := False
Else RegsEquivalent := False
Else RegsEquivalent := OldReg = NewReg
End;
@ -973,7 +973,7 @@ Begin
((current^.typ = ait_label) and
labelCanBeSkipped(pai_label(current)))) do
Current := Pai(Current^.Next);
If Assigned(Current) And
{ If Assigned(Current) And
(Current^.typ = ait_Marker) And
(Pai_Marker(Current)^.Kind = NoPropInfoStart) Then
Begin
@ -981,10 +981,10 @@ Begin
((Current^.typ <> ait_Marker) Or
(Pai_Marker(Current)^.Kind <> NoPropInfoEnd)) Do
Current := Pai(Current^.Next);
End;
End;}
Until Not(Assigned(Current)) Or
(Current^.typ <> ait_Marker) Or
(Pai_Marker(Current)^.Kind <> NoPropInfoEnd);
not(Pai_Marker(Current)^.Kind in [NoPropInfoStart,NoPropInfoEnd]);
Next := Current;
If Assigned(Current) And
Not((Current^.typ In SkipInstr) or
@ -1009,12 +1009,12 @@ Begin
Current := Pai(Current^.previous);
While Assigned(Current) And
(((Current^.typ = ait_Marker) And
Not(Pai_Marker(Current)^.Kind in [AsmBlockEnd,NoPropInfoEnd])) or
Not(Pai_Marker(Current)^.Kind in [AsmBlockEnd{,NoPropInfoEnd}])) or
(Current^.typ In SkipInstr) or
((Current^.typ = ait_label) And
labelCanBeSkipped(pai_label(current)))) Do
Current := Pai(Current^.previous);
If Assigned(Current) And
{ If Assigned(Current) And
(Current^.typ = ait_Marker) And
(Pai_Marker(Current)^.Kind = NoPropInfoEnd) Then
Begin
@ -1022,10 +1022,10 @@ Begin
((Current^.typ <> ait_Marker) Or
(Pai_Marker(Current)^.Kind <> NoPropInfoStart)) Do
Current := Pai(Current^.previous);
End;
End;}
Until Not(Assigned(Current)) Or
(Current^.typ <> ait_Marker) Or
(Pai_Marker(Current)^.Kind <> NoPropInfoStart);
not(Pai_Marker(Current)^.Kind in [NoPropInfoStart,NoPropInfoEnd]);
If Not(Assigned(Current)) or
(Current^.typ In SkipInstr) or
((Current^.typ = ait_label) And
@ -1177,6 +1177,41 @@ Begin
end;
End;
function FindRegDealloc(reg: tregister; p: pai): boolean;
{ assumes reg is a 32bit register }
var
hp: pai;
first: boolean;
begin
findregdealloc := false;
first := true;
while assigned(p^.previous) and
((Pai(p^.previous)^.typ in (skipinstr+[ait_align])) or
((Pai(p^.previous)^.typ = ait_label) and
labelCanBeSkipped(pai_label(p^.previous)))) do
begin
p := pai(p^.previous);
if (p^.typ = ait_regalloc) and
(pairegalloc(p)^.reg = reg) then
if not(pairegalloc(p)^.allocation) then
if first then
begin
findregdealloc := true;
break;
end
else
begin
findRegDealloc :=
getNextInstruction(p,hp) and
regLoadedWithNewValue(reg,false,hp);
break
end
else
first := false;
end
end;
Procedure IncState(Var S: Byte; amount: longint);
{Increases S by 1, wraps around at $ffff to 0 (so we won't get overflow
@ -1233,7 +1268,7 @@ Begin
sequenceDependsonReg := TmpResult
End;
procedure invalidateDepedingRegs(p1: ppaiProp; reg: tregister);
procedure invalidateDependingRegs(p1: ppaiProp; reg: tregister);
var
counter: tregister;
begin
@ -1277,7 +1312,7 @@ Begin
{ con_invalid and con_noRemoveRef = con_unknown }
else typ := con_unknown;
end;
invalidateDepedingRegs(p1,reg);
invalidateDependingRegs(p1,reg);
end;
End;
@ -1644,7 +1679,7 @@ function writeToRegDestroysContents(destReg: tregister; reg: tregister;
{ modified }
begin
writeToRegDestroysContents :=
(c.typ <> con_unknown) and
(c.typ in [con_ref,con_noRemoveRef,con_invalid]) and
sequenceDependsOnReg(c,reg,reg32(destReg));
end;
@ -2033,74 +2068,64 @@ Begin
A_MOV, A_MOVZX, A_MOVSX:
Begin
Case Paicpu(p)^.oper[0].typ Of
Top_Reg:
Case Paicpu(p)^.oper[1].typ Of
Top_Reg:
top_ref, top_reg:
case paicpu(p)^.oper[1].typ Of
top_reg:
Begin
{$ifdef statedebug}
hp := new(pai_asm_comment,init(strpnew('destroying '+
att_reg2str[Paicpu(p)^.oper[1].reg])));
insertllitem(asml,p,p^.next,hp);
{$endif statedebug}
DestroyReg(CurProp, Paicpu(p)^.oper[1].reg, true);
ReadReg(CurProp, Paicpu(p)^.oper[0].reg);
{ CurProp^.Regs[Paicpu(p)^.oper[1].reg] :=
CurProp^.Regs[Paicpu(p)^.oper[0].reg];
If (CurProp^.Regs[Paicpu(p)^.oper[1].reg].ModReg = R_NO) Then
CurProp^.Regs[Paicpu(p)^.oper[1].reg].ModReg :=
Paicpu(p)^.oper[0].reg;}
readOp(curprop, paicpu(p)^.oper[0]);
tmpreg := reg32(paicpu(p)^.oper[1].reg);
if regInOp(tmpreg, paicpu(p)^.oper[0]) and
(curProp^.regs[tmpReg].typ in [con_ref,con_noRemoveRef]) then
begin
with curprop^.regs[tmpreg] Do
begin
incState(wstate,1);
{ also store how many instructions are part of the sequence in the first }
{ instruction's PPaiProp, so it can be easily accessed from within }
{ CheckSequence }
inc(nrOfMods, nrOfInstrSinceLastMod[tmpreg]);
ppaiprop(startmod^.optinfo)^.regs[tmpreg].nrOfMods := nrOfMods;
nrOfInstrSinceLastMod[tmpreg] := 0;
{ Destroy the contents of the registers }
{ that depended on the previous value of }
{ this register }
invalidateDependingRegs(curprop,tmpreg);
end;
end
else
begin
{$ifdef statedebug}
hp := new(pai_asm_comment,init(strpnew('destroying & initing '+att_reg2str[tmpreg])));
insertllitem(asml,p,p^.next,hp);
{$endif statedebug}
destroyReg(curprop, tmpreg, true);
if not(reginop(tmpreg, paicpu(p)^.oper[0])) then
with curprop^.regs[tmpreg] Do
begin
typ := con_ref;
startmod := p;
nrOfMods := 1;
end
end;
{$ifdef StateDebug}
hp := new(pai_asm_comment,init(strpnew(att_reg2str[TmpReg]+': '+tostr(CurProp^.Regs[TmpReg].WState))));
InsertLLItem(AsmL, p, p^.next, hp);
{$endif StateDebug}
End;
Top_Ref:
{ can only be if oper[0] = top_reg }
Begin
ReadReg(CurProp, Paicpu(p)^.oper[0].reg);
ReadRef(CurProp, Paicpu(p)^.oper[1].ref);
DestroyRefs(p, Paicpu(p)^.oper[1].ref^, Paicpu(p)^.oper[0].reg);
End;
End;
Top_Ref:
Begin {destination is always a register in this case}
ReadRef(CurProp, Paicpu(p)^.oper[0].ref);
TmpReg := Reg32(Paicpu(p)^.oper[1].reg);
If RegInRef(TmpReg, Paicpu(p)^.oper[0].ref^) And
(curProp^.regs[tmpReg].typ in [con_ref,con_noRemoveRef])
Then
Begin
With CurProp^.Regs[TmpReg] Do
Begin
incState(wstate,1);
{also store how many instructions are part of the sequence in the first
instructions PPaiProp, so it can be easily accessed from within
CheckSequence}
Inc(NrOfMods, NrOfInstrSinceLastMod[TmpReg]);
PPaiProp(Pai(StartMod)^.OptInfo)^.Regs[TmpReg].NrOfMods := NrOfMods;
NrOfInstrSinceLastMod[TmpReg] := 0;
{ Destroy the contents of the registers }
{ that depended on the previous value of }
{ this register }
invalidateDepedingRegs(curProp,tmpReg);
End;
End
Else
Begin
{$ifdef statedebug}
hp := new(pai_asm_comment,init(strpnew('destroying & initing '+att_reg2str[tmpreg])));
insertllitem(asml,p,p^.next,hp);
{$endif statedebug}
DestroyReg(CurProp, TmpReg, true);
If Not(RegInRef(TmpReg, Paicpu(p)^.oper[0].ref^)) Then
With CurProp^.Regs[TmpReg] Do
Begin
Typ := Con_Ref;
StartMod := p;
NrOfMods := 1;
End
End;
{$ifdef StateDebug}
hp := new(pai_asm_comment,init(strpnew(att_reg2str[TmpReg]+': '+tostr(CurProp^.Regs[TmpReg].WState))));
InsertLLItem(AsmL, p, p^.next, hp);
{$endif StateDebug}
End;
top_symbol,Top_Const:
Begin
Case Paicpu(p)^.oper[1].typ Of
@ -2317,12 +2342,12 @@ Begin
GetNextInstruction(p, p);
End;
{Uncomment the next line to see how much memory the reloading optimizer needs}
{ Writeln((NrOfPaiObjs*(((SizeOf(TPaiProp)+3)div 4)*4)));}
{ Writeln(NrOfPaiObjs*SizeOf(TPaiProp));}
{no need to check mem/maxavail, we've got as much virtual memory as we want}
If NrOfPaiObjs <> 0 Then
Begin
InitDFAPass2 := True;
GetMem(PaiPropBlock, NrOfPaiObjs*(((SizeOf(TPaiProp)+3)div 4)*4));
GetMem(PaiPropBlock, NrOfPaiObjs*SizeOf(TPaiProp));
p := BlockStart;
SkipHead(p);
For Count := 1 To NrOfPaiObjs Do
@ -2362,7 +2387,20 @@ End.
{
$Log$
Revision 1.2 2000-10-19 15:59:40 jonas
Revision 1.3 2000-10-24 10:40:53 jonas
+ register renaming ("fixes" bug1088)
* changed command line options meanings for optimizer:
O2 now means peepholopts, CSE and register renaming in 1 pass
O3 is the same, but repeated until no further optimizations are
possible or until 5 passes have been done (to avoid endless loops)
* changed aopt386 so it does this looping
* added some procedures from csopt386 to the interface because they're
used by rropt386 as well
* some changes to csopt386 and daopt386 so that newly added instructions
by the CSE get optimizer info (they were simply skipped previously),
this fixes some bugs
Revision 1.2 2000/10/19 15:59:40 jonas
* fixed bug in allocregbetween (the register wasn't added to the
usedregs set of the last instruction of the chain) ("merged")

View File

@ -28,14 +28,16 @@ Interface
Uses Aasm;
Procedure PrePeepHoleOpts(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
Procedure PeepHoleOptPass1(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
Procedure PeepHoleOptPass2(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
Procedure PostPeepHoleOpts(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
Implementation
Uses
globtype,systems,
globals,verbose,hcodegen,
globals,hcodegen,
{$ifdef finaldestdebug}
cobjects,
{$endif finaldestdebug}
@ -97,6 +99,308 @@ begin
end;
end;
Procedure PrePeepHoleOpts(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
var
p,hp1: pai;
l: longint;
tmpRef: treference;
Begin
P := BlockStart;
While (P <> BlockEnd) Do
Begin
Case P^.Typ Of
Ait_Instruction:
Begin
Case Paicpu(p)^.opcode Of
A_IMUL:
{changes certain "imul const, %reg"'s to lea sequences}
Begin
If (Paicpu(p)^.oper[0].typ = Top_Const) And
(Paicpu(p)^.oper[1].typ = Top_Reg) And
(Paicpu(p)^.opsize = S_L) Then
If (Paicpu(p)^.oper[0].val = 1) Then
If (Paicpu(p)^.oper[2].typ = Top_None) Then
{remove "imul $1, reg"}
Begin
hp1 := Pai(p^.Next);
AsmL^.Remove(p);
Dispose(p, Done);
p := hp1;
Continue;
End
Else
{change "imul $1, reg1, reg2" to "mov reg1, reg2"}
Begin
hp1 := New(Paicpu, Op_Reg_Reg(A_MOV, S_L, Paicpu(p)^.oper[1].reg,Paicpu(p)^.oper[2].reg));
InsertLLItem(AsmL, p^.previous, p^.next, hp1);
Dispose(p, Done);
p := hp1;
End
Else If
((Paicpu(p)^.oper[2].typ = Top_Reg) or
(Paicpu(p)^.oper[2].typ = Top_None)) And
(aktoptprocessor < ClassP6) And
(Paicpu(p)^.oper[0].val <= 12) And
Not(CS_LittleSize in aktglobalswitches) And
(Not(GetNextInstruction(p, hp1)) Or
{GetNextInstruction(p, hp1) And}
Not((Pai(hp1)^.typ = ait_instruction) And
((paicpu(hp1)^.opcode=A_Jcc) and
(paicpu(hp1)^.condition in [C_O,C_NO]))))
Then
Begin
Reset_reference(tmpref);
Case Paicpu(p)^.oper[0].val Of
3: Begin
{imul 3, reg1, reg2 to
lea (reg1,reg1,2), reg2
imul 3, reg1 to
lea (reg1,reg1,2), reg1}
TmpRef.base := Paicpu(p)^.oper[1].reg;
TmpRef.Index := Paicpu(p)^.oper[1].reg;
TmpRef.ScaleFactor := 2;
If (Paicpu(p)^.oper[2].typ = Top_None) Then
hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg))
Else
hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
InsertLLItem(AsmL,p^.previous, p^.next, hp1);
Dispose(p, Done);
p := hp1;
End;
5: Begin
{imul 5, reg1, reg2 to
lea (reg1,reg1,4), reg2
imul 5, reg1 to
lea (reg1,reg1,4), reg1}
TmpRef.base := Paicpu(p)^.oper[1].reg;
TmpRef.Index := Paicpu(p)^.oper[1].reg;
TmpRef.ScaleFactor := 4;
If (Paicpu(p)^.oper[2].typ = Top_None) Then
hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg))
Else
hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
InsertLLItem(AsmL,p^.previous, p^.next, hp1);
Dispose(p, Done);
p := hp1;
End;
6: Begin
{imul 6, reg1, reg2 to
lea (,reg1,2), reg2
lea (reg2,reg1,4), reg2
imul 6, reg1 to
lea (reg1,reg1,2), reg1
add reg1, reg1}
If (aktoptprocessor <= Class386)
Then
Begin
TmpRef.Index := Paicpu(p)^.oper[1].reg;
If (Paicpu(p)^.oper[2].typ = Top_Reg)
Then
Begin
TmpRef.base := Paicpu(p)^.oper[2].reg;
TmpRef.ScaleFactor := 4;
hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
End
Else
Begin
hp1 := New(Paicpu, op_reg_reg(A_ADD, S_L,
Paicpu(p)^.oper[1].reg,Paicpu(p)^.oper[1].reg));
End;
InsertLLItem(AsmL,p, p^.next, hp1);
Reset_reference(tmpref);
TmpRef.Index := Paicpu(p)^.oper[1].reg;
TmpRef.ScaleFactor := 2;
If (Paicpu(p)^.oper[2].typ = Top_Reg)
Then
Begin
TmpRef.base := R_NO;
hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef),
Paicpu(p)^.oper[2].reg));
End
Else
Begin
TmpRef.base := Paicpu(p)^.oper[1].reg;
hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
End;
InsertLLItem(AsmL,p^.previous, p^.next, hp1);
Dispose(p, Done);
p := Pai(hp1^.next);
End
End;
9: Begin
{imul 9, reg1, reg2 to
lea (reg1,reg1,8), reg2
imul 9, reg1 to
lea (reg1,reg1,8), reg1}
TmpRef.base := Paicpu(p)^.oper[1].reg;
TmpRef.Index := Paicpu(p)^.oper[1].reg;
TmpRef.ScaleFactor := 8;
If (Paicpu(p)^.oper[2].typ = Top_None) Then
hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg))
Else
hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
InsertLLItem(AsmL,p^.previous, p^.next, hp1);
Dispose(p, Done);
p := hp1;
End;
10: Begin
{imul 10, reg1, reg2 to
lea (reg1,reg1,4), reg2
add reg2, reg2
imul 10, reg1 to
lea (reg1,reg1,4), reg1
add reg1, reg1}
If (aktoptprocessor <= Class386) Then
Begin
If (Paicpu(p)^.oper[2].typ = Top_Reg) Then
hp1 := New(Paicpu, op_reg_reg(A_ADD, S_L,
Paicpu(p)^.oper[2].reg,Paicpu(p)^.oper[2].reg))
Else
hp1 := New(Paicpu, op_reg_reg(A_ADD, S_L,
Paicpu(p)^.oper[1].reg,Paicpu(p)^.oper[1].reg));
InsertLLItem(AsmL,p, p^.next, hp1);
TmpRef.base := Paicpu(p)^.oper[1].reg;
TmpRef.Index := Paicpu(p)^.oper[1].reg;
TmpRef.ScaleFactor := 4;
If (Paicpu(p)^.oper[2].typ = Top_Reg)
Then
hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg))
Else
hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
InsertLLItem(AsmL,p^.previous, p^.next, hp1);
Dispose(p, Done);
p := Pai(hp1^.next);
End
End;
12: Begin
{imul 12, reg1, reg2 to
lea (,reg1,4), reg2
lea (,reg1,8) reg2
imul 12, reg1 to
lea (reg1,reg1,2), reg1
lea (,reg1,4), reg1}
If (aktoptprocessor <= Class386)
Then
Begin
TmpRef.Index := Paicpu(p)^.oper[1].reg;
If (Paicpu(p)^.oper[2].typ = Top_Reg) Then
Begin
TmpRef.base := Paicpu(p)^.oper[2].reg;
TmpRef.ScaleFactor := 8;
hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
End
Else
Begin
TmpRef.base := R_NO;
TmpRef.ScaleFactor := 4;
hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
End;
InsertLLItem(AsmL,p, p^.next, hp1);
Reset_reference(tmpref);
TmpRef.Index := Paicpu(p)^.oper[1].reg;
If (Paicpu(p)^.oper[2].typ = Top_Reg) Then
Begin
TmpRef.base := R_NO;
TmpRef.ScaleFactor := 4;
hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
End
Else
Begin
TmpRef.base := Paicpu(p)^.oper[1].reg;
TmpRef.ScaleFactor := 2;
hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
End;
InsertLLItem(AsmL,p^.previous, p^.next, hp1);
Dispose(p, Done);
p := Pai(hp1^.next);
End
End
End;
End;
End;
A_SAR, A_SHR:
{changes the code sequence
shr/sar const1, x
shl const2, x
to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
Begin
If GetNextInstruction(p, hp1) And
(pai(hp1)^.typ = ait_instruction) and
(Paicpu(hp1)^.opcode = A_SHL) and
(Paicpu(p)^.oper[0].typ = top_const) and
(Paicpu(hp1)^.oper[0].typ = top_const) and
(Paicpu(hp1)^.opsize = Paicpu(p)^.opsize) And
(Paicpu(hp1)^.oper[1].typ = Paicpu(p)^.oper[1].typ) And
OpsEqual(Paicpu(hp1)^.oper[1], Paicpu(p)^.oper[1])
Then
If (Paicpu(p)^.oper[0].val > Paicpu(hp1)^.oper[0].val) And
Not(CS_LittleSize In aktglobalswitches)
Then
{ shr/sar const1, %reg
shl const2, %reg
with const1 > const2 }
Begin
Paicpu(p)^.LoadConst(0,Paicpu(p)^.oper[0].val-Paicpu(hp1)^.oper[0].val);
Paicpu(hp1)^.opcode := A_AND;
l := (1 shl (Paicpu(hp1)^.oper[0].val)) - 1;
Case Paicpu(p)^.opsize Of
S_L: Paicpu(hp1)^.LoadConst(0,l Xor longint(-1));
S_B: Paicpu(hp1)^.LoadConst(0,l Xor $ff);
S_W: Paicpu(hp1)^.LoadConst(0,l Xor $ffff);
End;
End
Else
If (Paicpu(p)^.oper[0].val<Paicpu(hp1)^.oper[0].val) And
Not(CS_LittleSize In aktglobalswitches)
Then
{ shr/sar const1, %reg
shl const2, %reg
with const1 < const2 }
Begin
Paicpu(hp1)^.LoadConst(0,Paicpu(hp1)^.oper[0].val-Paicpu(p)^.oper[0].val);
Paicpu(p)^.opcode := A_AND;
l := (1 shl (Paicpu(p)^.oper[0].val))-1;
Case Paicpu(p)^.opsize Of
S_L: Paicpu(p)^.LoadConst(0,l Xor $ffffffff);
S_B: Paicpu(p)^.LoadConst(0,l Xor $ff);
S_W: Paicpu(p)^.LoadConst(0,l Xor $ffff);
End;
End
Else
{ shr/sar const1, %reg
shl const2, %reg
with const1 = const2 }
if (Paicpu(p)^.oper[0].val = Paicpu(hp1)^.oper[0].val) then
Begin
Paicpu(p)^.opcode := A_AND;
l := (1 shl (Paicpu(p)^.oper[0].val))-1;
Case Paicpu(p)^.opsize Of
S_B: Paicpu(p)^.LoadConst(0,l Xor $ff);
S_W: Paicpu(p)^.LoadConst(0,l Xor $ffff);
S_L: Paicpu(p)^.LoadConst(0,l Xor $ffffffff);
End;
AsmL^.remove(hp1);
dispose(hp1, done);
End;
End;
A_XOR:
If (Paicpu(p)^.oper[0].typ = top_reg) And
(Paicpu(p)^.oper[1].typ = top_reg) And
(Paicpu(p)^.oper[0].reg = Paicpu(p)^.oper[1].reg) then
{ temporarily change this to 'mov reg,0' to make it easier }
{ for the CSE. Will be changed back in pass 2 }
begin
paicpu(p)^.opcode := A_MOV;
paicpu(p)^.loadconst(0,0);
end;
End;
End;
End;
p := Pai(p^.next)
End;
End;
Procedure PeepHoleOptPass1(Asml: PAasmOutput; BlockStart, BlockEnd: Pai);
{First pass of peepholeoptimizations}
@ -279,9 +583,7 @@ Begin
If (paicpu(p)^.opcode = A_JMP) Then
Begin
While GetNextInstruction(p, hp1) and
((hp1^.typ <> ait_label) or
{ skip unused labels, they're not referenced anywhere }
labelCanBeSkipped(pai_label(hp1))) Do
(hp1^.typ <> ait_label) do
If not(hp1^.typ in ([ait_label,ait_align]+skipinstr)) Then
Begin
AsmL^.Remove(hp1);
@ -289,6 +591,7 @@ Begin
End
else break;
End;
{ remove jumps to a label coming right after them }
If GetNextInstruction(p, hp1) then
Begin
if FindLabel(pasmlabel(paicpu(p)^.oper[0].sym), hp1) then
@ -528,212 +831,6 @@ Begin
A_FSTP,A_FISTP:
if doFpuLoadStoreOpt(asmL,p) then
continue;
A_IMUL:
{changes certain "imul const, %reg"'s to lea sequences}
Begin
If (Paicpu(p)^.oper[0].typ = Top_Const) And
(Paicpu(p)^.oper[1].typ = Top_Reg) And
(Paicpu(p)^.opsize = S_L) Then
If (Paicpu(p)^.oper[0].val = 1) Then
If (Paicpu(p)^.oper[2].typ = Top_None) Then
{remove "imul $1, reg"}
Begin
hp1 := Pai(p^.Next);
AsmL^.Remove(p);
Dispose(p, Done);
p := hp1;
Continue;
End
Else
{change "imul $1, reg1, reg2" to "mov reg1, reg2"}
Begin
hp1 := New(Paicpu, Op_Reg_Reg(A_MOV, S_L, Paicpu(p)^.oper[1].reg,Paicpu(p)^.oper[2].reg));
InsertLLItem(AsmL, p^.previous, p^.next, hp1);
Dispose(p, Done);
p := hp1;
End
Else If
((Paicpu(p)^.oper[2].typ = Top_Reg) or
(Paicpu(p)^.oper[2].typ = Top_None)) And
(aktoptprocessor < ClassP6) And
(Paicpu(p)^.oper[0].val <= 12) And
Not(CS_LittleSize in aktglobalswitches) And
(Not(GetNextInstruction(p, hp1)) Or
{GetNextInstruction(p, hp1) And}
Not((Pai(hp1)^.typ = ait_instruction) And
((paicpu(hp1)^.opcode=A_Jcc) and
(paicpu(hp1)^.condition in [C_O,C_NO]))))
Then
Begin
Reset_reference(tmpref);
Case Paicpu(p)^.oper[0].val Of
3: Begin
{imul 3, reg1, reg2 to
lea (reg1,reg1,2), reg2
imul 3, reg1 to
lea (reg1,reg1,2), reg1}
TmpRef.base := Paicpu(p)^.oper[1].reg;
TmpRef.Index := Paicpu(p)^.oper[1].reg;
TmpRef.ScaleFactor := 2;
If (Paicpu(p)^.oper[2].typ = Top_None) Then
hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg))
Else
hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
InsertLLItem(AsmL,p^.previous, p^.next, hp1);
Dispose(p, Done);
p := hp1;
End;
5: Begin
{imul 5, reg1, reg2 to
lea (reg1,reg1,4), reg2
imul 5, reg1 to
lea (reg1,reg1,4), reg1}
TmpRef.base := Paicpu(p)^.oper[1].reg;
TmpRef.Index := Paicpu(p)^.oper[1].reg;
TmpRef.ScaleFactor := 4;
If (Paicpu(p)^.oper[2].typ = Top_None) Then
hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg))
Else
hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
InsertLLItem(AsmL,p^.previous, p^.next, hp1);
Dispose(p, Done);
p := hp1;
End;
6: Begin
{imul 6, reg1, reg2 to
lea (,reg1,2), reg2
lea (reg2,reg1,4), reg2
imul 6, reg1 to
lea (reg1,reg1,2), reg1
add reg1, reg1}
If (aktoptprocessor <= Class386)
Then
Begin
TmpRef.Index := Paicpu(p)^.oper[1].reg;
If (Paicpu(p)^.oper[2].typ = Top_Reg)
Then
Begin
TmpRef.base := Paicpu(p)^.oper[2].reg;
TmpRef.ScaleFactor := 4;
hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
End
Else
Begin
hp1 := New(Paicpu, op_reg_reg(A_ADD, S_L,
Paicpu(p)^.oper[1].reg,Paicpu(p)^.oper[1].reg));
End;
InsertLLItem(AsmL,p, p^.next, hp1);
Reset_reference(tmpref);
TmpRef.Index := Paicpu(p)^.oper[1].reg;
TmpRef.ScaleFactor := 2;
If (Paicpu(p)^.oper[2].typ = Top_Reg)
Then
Begin
TmpRef.base := R_NO;
hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef),
Paicpu(p)^.oper[2].reg));
End
Else
Begin
TmpRef.base := Paicpu(p)^.oper[1].reg;
hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
End;
InsertLLItem(AsmL,p^.previous, p^.next, hp1);
Dispose(p, Done);
p := Pai(hp1^.next);
End
End;
9: Begin
{imul 9, reg1, reg2 to
lea (reg1,reg1,8), reg2
imul 9, reg1 to
lea (reg1,reg1,8), reg1}
TmpRef.base := Paicpu(p)^.oper[1].reg;
TmpRef.Index := Paicpu(p)^.oper[1].reg;
TmpRef.ScaleFactor := 8;
If (Paicpu(p)^.oper[2].typ = Top_None) Then
hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg))
Else
hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
InsertLLItem(AsmL,p^.previous, p^.next, hp1);
Dispose(p, Done);
p := hp1;
End;
10: Begin
{imul 10, reg1, reg2 to
lea (reg1,reg1,4), reg2
add reg2, reg2
imul 10, reg1 to
lea (reg1,reg1,4), reg1
add reg1, reg1}
If (aktoptprocessor <= Class386) Then
Begin
If (Paicpu(p)^.oper[2].typ = Top_Reg) Then
hp1 := New(Paicpu, op_reg_reg(A_ADD, S_L,
Paicpu(p)^.oper[2].reg,Paicpu(p)^.oper[2].reg))
Else
hp1 := New(Paicpu, op_reg_reg(A_ADD, S_L,
Paicpu(p)^.oper[1].reg,Paicpu(p)^.oper[1].reg));
InsertLLItem(AsmL,p, p^.next, hp1);
TmpRef.base := Paicpu(p)^.oper[1].reg;
TmpRef.Index := Paicpu(p)^.oper[1].reg;
TmpRef.ScaleFactor := 4;
If (Paicpu(p)^.oper[2].typ = Top_Reg)
Then
hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg))
Else
hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
InsertLLItem(AsmL,p^.previous, p^.next, hp1);
Dispose(p, Done);
p := Pai(hp1^.next);
End
End;
12: Begin
{imul 12, reg1, reg2 to
lea (,reg1,4), reg2
lea (,reg1,8) reg2
imul 12, reg1 to
lea (reg1,reg1,2), reg1
lea (,reg1,4), reg1}
If (aktoptprocessor <= Class386)
Then
Begin
TmpRef.Index := Paicpu(p)^.oper[1].reg;
If (Paicpu(p)^.oper[2].typ = Top_Reg) Then
Begin
TmpRef.base := Paicpu(p)^.oper[2].reg;
TmpRef.ScaleFactor := 8;
hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
End
Else
Begin
TmpRef.base := R_NO;
TmpRef.ScaleFactor := 4;
hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
End;
InsertLLItem(AsmL,p, p^.next, hp1);
Reset_reference(tmpref);
TmpRef.Index := Paicpu(p)^.oper[1].reg;
If (Paicpu(p)^.oper[2].typ = Top_Reg) Then
Begin
TmpRef.base := R_NO;
TmpRef.ScaleFactor := 4;
hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
End
Else
Begin
TmpRef.base := Paicpu(p)^.oper[1].reg;
TmpRef.ScaleFactor := 2;
hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
End;
InsertLLItem(AsmL,p^.previous, p^.next, hp1);
Dispose(p, Done);
p := Pai(hp1^.next);
End
End
End;
End;
End;
A_LEA:
Begin
{removes seg register prefixes from LEA operations, as they
@ -784,7 +881,6 @@ Begin
end;
end;
end;
End;
A_MOV:
Begin
@ -1420,71 +1516,6 @@ Begin
p := hp1;
End
End;
A_SAR, A_SHR:
{changes the code sequence
shr/sar const1, x
shl const2, x
to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
Begin
If GetNextInstruction(p, hp1) And
(pai(hp1)^.typ = ait_instruction) and
(Paicpu(hp1)^.opcode = A_SHL) and
(Paicpu(p)^.oper[0].typ = top_const) and
(Paicpu(hp1)^.oper[0].typ = top_const) and
(Paicpu(hp1)^.opsize = Paicpu(p)^.opsize) And
(Paicpu(hp1)^.oper[1].typ = Paicpu(p)^.oper[1].typ) And
OpsEqual(Paicpu(hp1)^.oper[1], Paicpu(p)^.oper[1])
Then
If (Paicpu(p)^.oper[0].val > Paicpu(hp1)^.oper[0].val) And
Not(CS_LittleSize In aktglobalswitches)
Then
{ shr/sar const1, %reg
shl const2, %reg
with const1 > const2 }
Begin
Paicpu(p)^.LoadConst(0,Paicpu(p)^.oper[0].val-Paicpu(hp1)^.oper[0].val);
Paicpu(hp1)^.opcode := A_AND;
l := (1 shl (Paicpu(hp1)^.oper[0].val)) - 1;
Case Paicpu(p)^.opsize Of
S_L: Paicpu(hp1)^.LoadConst(0,l Xor longint(-1));
S_B: Paicpu(hp1)^.LoadConst(0,l Xor $ff);
S_W: Paicpu(hp1)^.LoadConst(0,l Xor $ffff);
End;
End
Else
If (Paicpu(p)^.oper[0].val<Paicpu(hp1)^.oper[0].val) And
Not(CS_LittleSize In aktglobalswitches)
Then
{ shr/sar const1, %reg
shl const2, %reg
with const1 < const2 }
Begin
Paicpu(hp1)^.LoadConst(0,Paicpu(hp1)^.oper[0].val-Paicpu(p)^.oper[0].val);
Paicpu(p)^.opcode := A_AND;
l := (1 shl (Paicpu(p)^.oper[0].val))-1;
Case Paicpu(p)^.opsize Of
S_L: Paicpu(p)^.LoadConst(0,l Xor $ffffffff);
S_B: Paicpu(p)^.LoadConst(0,l Xor $ff);
S_W: Paicpu(p)^.LoadConst(0,l Xor $ffff);
End;
End
Else
{ shr/sar const1, %reg
shl const2, %reg
with const1 = const2 }
if (Paicpu(p)^.oper[0].val = Paicpu(hp1)^.oper[0].val) then
Begin
Paicpu(p)^.opcode := A_AND;
l := (1 shl (Paicpu(p)^.oper[0].val))-1;
Case Paicpu(p)^.opsize Of
S_B: Paicpu(p)^.LoadConst(0,l Xor $ff);
S_W: Paicpu(p)^.LoadConst(0,l Xor $ffff);
S_L: Paicpu(p)^.LoadConst(0,l Xor $ffffffff);
End;
AsmL^.remove(hp1);
dispose(hp1, done);
End;
End;
A_SETcc :
{ changes
setcc (funcres) setcc reg
@ -1604,6 +1635,7 @@ end;
Procedure PeepHoleOptPass2(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
{$ifdef USECMOV}
function CanBeCMOV(p : pai) : boolean;
begin
@ -1613,6 +1645,7 @@ Procedure PeepHoleOptPass2(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
(paicpu(p)^.oper[0].typ in [top_reg,top_ref]) and
(paicpu(p)^.oper[1].typ in [top_reg,top_ref]);
end;
{$endif USECMOV}
var
p,hp1,hp2: pai;
@ -1633,20 +1666,6 @@ Begin
Ait_Instruction:
Begin
Case Paicpu(p)^.opcode Of
A_CALL:
If (AktOptProcessor < ClassP6) And
GetNextInstruction(p, hp1) And
(hp1^.typ = ait_instruction) And
(paicpu(hp1)^.opcode = A_JMP) Then
Begin
Inc(paicpu(hp1)^.oper[0].sym^.refs);
hp2 := New(Paicpu,op_sym(A_PUSH,S_L,paicpu(hp1)^.oper[0].sym));
InsertLLItem(AsmL, p^.previous, p, hp2);
Paicpu(p)^.opcode := A_JMP;
AsmL^.Remove(hp1);
Dispose(hp1, Done)
End;
{$ifdef USECMOV}
A_Jcc:
if (aktspecificoptprocessor=ClassP6) then
@ -1833,59 +1852,6 @@ Begin
p := hp1
End;
End
else if (Paicpu(p)^.oper[0].typ = Top_Const) And
(Paicpu(p)^.oper[0].val = 0) And
(Paicpu(p)^.oper[1].typ = Top_Reg) Then
{ change "mov $0, %reg" into "xor %reg, %reg" }
Begin
Paicpu(p)^.opcode := A_XOR;
Paicpu(p)^.LoadReg(0,Paicpu(p)^.oper[1].reg);
End
End;
A_MOVZX:
Begin
If (Paicpu(p)^.oper[1].typ = top_reg) Then
If (Paicpu(p)^.oper[0].typ = top_reg)
Then
Case Paicpu(p)^.opsize of
S_BL:
Begin
If IsGP32Reg(Paicpu(p)^.oper[1].reg) And
Not(CS_LittleSize in aktglobalswitches) And
(aktoptprocessor = ClassP5)
Then
{Change "movzbl %reg1, %reg2" to
"xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
PentiumMMX}
Begin
hp1 := New(Paicpu, op_reg_reg(A_XOR, S_L,
Paicpu(p)^.oper[1].reg, Paicpu(p)^.oper[1].reg));
InsertLLItem(AsmL,p^.previous, p, hp1);
Paicpu(p)^.opcode := A_MOV;
Paicpu(p)^.changeopsize(S_B);
Paicpu(p)^.LoadReg(1,Reg32ToReg8(Paicpu(p)^.oper[1].reg));
End;
End;
End
Else
If (Paicpu(p)^.oper[0].typ = top_ref) And
(Paicpu(p)^.oper[0].ref^.base <> Paicpu(p)^.oper[1].reg) And
(Paicpu(p)^.oper[0].ref^.index <> Paicpu(p)^.oper[1].reg) And
Not(CS_LittleSize in aktglobalswitches) And
IsGP32Reg(Paicpu(p)^.oper[1].reg) And
(aktoptprocessor = ClassP5) And
(Paicpu(p)^.opsize = S_BL)
Then
{changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
Pentium and PentiumMMX}
Begin
hp1 := New(Paicpu,op_reg_reg(A_XOR, S_L, Paicpu(p)^.oper[1].reg,
Paicpu(p)^.oper[1].reg));
Paicpu(p)^.opcode := A_MOV;
Paicpu(p)^.changeopsize(S_B);
Paicpu(p)^.LoadReg(1,Reg32ToReg8(Paicpu(p)^.oper[1].reg));
InsertLLItem(AsmL,p^.previous, p, hp1);
End;
End;
A_TEST, A_OR:
{removes the line marked with (x) from the sequence
@ -1942,11 +1908,111 @@ Begin
End;
End;
Procedure PostPeepHoleOpts(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
var
p,hp1,hp2: pai;
Begin
P := BlockStart;
While (P <> BlockEnd) Do
Begin
Case P^.Typ Of
Ait_Instruction:
Begin
Case Paicpu(p)^.opcode Of
A_CALL:
If (AktOptProcessor < ClassP6) And
GetNextInstruction(p, hp1) And
(hp1^.typ = ait_instruction) And
(paicpu(hp1)^.opcode = A_JMP) Then
Begin
Inc(paicpu(hp1)^.oper[0].sym^.refs);
hp2 := New(Paicpu,op_sym(A_PUSH,S_L,paicpu(hp1)^.oper[0].sym));
InsertLLItem(AsmL, p^.previous, p, hp2);
Paicpu(p)^.opcode := A_JMP;
AsmL^.Remove(hp1);
Dispose(hp1, Done)
End;
A_MOV:
if (Paicpu(p)^.oper[0].typ = Top_Const) And
(Paicpu(p)^.oper[0].val = 0) And
(Paicpu(p)^.oper[1].typ = Top_Reg) Then
{ change "mov $0, %reg" into "xor %reg, %reg" }
Begin
Paicpu(p)^.opcode := A_XOR;
Paicpu(p)^.LoadReg(0,Paicpu(p)^.oper[1].reg);
End;
A_MOVZX:
Begin
If (Paicpu(p)^.oper[1].typ = top_reg) Then
If (Paicpu(p)^.oper[0].typ = top_reg)
Then
Case Paicpu(p)^.opsize of
S_BL:
Begin
If IsGP32Reg(Paicpu(p)^.oper[1].reg) And
Not(CS_LittleSize in aktglobalswitches) And
(aktoptprocessor = ClassP5)
Then
{Change "movzbl %reg1, %reg2" to
"xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
PentiumMMX}
Begin
hp1 := New(Paicpu, op_reg_reg(A_XOR, S_L,
Paicpu(p)^.oper[1].reg, Paicpu(p)^.oper[1].reg));
InsertLLItem(AsmL,p^.previous, p, hp1);
Paicpu(p)^.opcode := A_MOV;
Paicpu(p)^.changeopsize(S_B);
Paicpu(p)^.LoadReg(1,Reg32ToReg8(Paicpu(p)^.oper[1].reg));
End;
End;
End
Else
If (Paicpu(p)^.oper[0].typ = top_ref) And
(Paicpu(p)^.oper[0].ref^.base <> Paicpu(p)^.oper[1].reg) And
(Paicpu(p)^.oper[0].ref^.index <> Paicpu(p)^.oper[1].reg) And
Not(CS_LittleSize in aktglobalswitches) And
IsGP32Reg(Paicpu(p)^.oper[1].reg) And
(aktoptprocessor = ClassP5) And
(Paicpu(p)^.opsize = S_BL)
Then
{changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
Pentium and PentiumMMX}
Begin
hp1 := New(Paicpu,op_reg_reg(A_XOR, S_L, Paicpu(p)^.oper[1].reg,
Paicpu(p)^.oper[1].reg));
Paicpu(p)^.opcode := A_MOV;
Paicpu(p)^.changeopsize(S_B);
Paicpu(p)^.LoadReg(1,Reg32ToReg8(Paicpu(p)^.oper[1].reg));
InsertLLItem(AsmL,p^.previous, p, hp1);
End;
End;
End;
End;
End;
p := Pai(p^.next)
End;
End;
End.
{
$Log$
Revision 1.1 2000-10-15 09:47:43 peter
Revision 1.2 2000-10-24 10:40:54 jonas
+ register renaming ("fixes" bug1088)
* changed command line options meanings for optimizer:
O2 now means peepholopts, CSE and register renaming in 1 pass
O3 is the same, but repeated until no further optimizations are
possible or until 5 passes have been done (to avoid endless loops)
* changed aopt386 so it does this looping
* added some procedures from csopt386 to the interface because they're
used by rropt386 as well
* some changes to csopt386 and daopt386 so that newly added instructions
by the CSE get optimizer info (they were simply skipped previously),
this fixes some bugs
Revision 1.1 2000/10/15 09:47:43 peter
* moved to i386/
Revision 1.13 2000/10/02 13:01:29 jonas

350
compiler/i386/rropt386.pas Normal file
View File

@ -0,0 +1,350 @@
{
$Id$
Copyright (c) 1998-2000 by Jonas Maebe, member of the Free Pascal
development team
This unit contains register renaming functionality
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
****************************************************************************
}
Unit rrOpt386;
{$i defines.inc}
Interface
Uses aasm;
procedure doRenaming(asml: paasmoutput; first, last: pai);
Implementation
Uses
{$ifdef replaceregdebug}cutils,{$endif}
verbose,globals,cpubase,cpuasm,daopt386,csopt386,tgeni386;
function canBeFirstSwitch(p: paicpu; reg: tregister): boolean;
{ checks whether an operation on reg can be switched to another reg without an }
{ additional mov, e.g. "addl $4,%reg1" can be changed to "leal 4(%reg1),%reg2" }
begin
canBeFirstSwitch := false;
case p^.opcode of
A_MOV,A_MOVZX,A_MOVSX,A_LEA:
canBeFirstSwitch :=
(p^.oper[1].typ = top_reg) and
(reg32(p^.oper[1].reg) = reg);
A_IMUL:
canBeFirstSwitch :=
(p^.ops >= 2) and
(reg32(p^.oper[p^.ops-1].reg) = reg);
A_INC,A_DEC,A_SUB,A_ADD:
canBeFirstSwitch :=
(p^.oper[1].typ = top_reg) and
(p^.opsize = S_L) and
(reg32(p^.oper[1].reg) = reg) and
(p^.oper[0].typ <> top_ref) and
((p^.opcode <> A_SUB) or
(p^.oper[0].typ = top_const));
A_SHL:
canBeFirstSwitch :=
(p^.opsize = S_L) and
(p^.oper[1].typ = top_reg) and
(p^.oper[1].reg = reg) and
(p^.oper[0].typ = top_const) and
(p^.oper[0].val in [1,2,3]);
end;
end;
procedure switchReg(var reg: tregister; reg1, reg2: tregister);
begin
if reg = reg1 then
reg := reg2
else if reg = reg2 then
reg := reg1
else if reg = regtoreg8(reg1) then
reg := regtoreg8(reg2)
else if reg = regtoreg8(reg2) then
reg := regtoreg8(reg1)
else if reg = regtoreg16(reg1) then
reg := regtoreg16(reg2)
else if reg = regtoreg16(reg2) then
reg := regtoreg16(reg1)
end;
procedure switchOp(var op: toper; reg1, reg2: tregister);
begin
case op.typ of
top_reg:
switchReg(op.reg,reg1,reg2);
top_ref:
begin
switchReg(op.ref^.base,reg1,reg2);
switchReg(op.ref^.index,reg1,reg2);
end;
end;
end;
procedure doSwitchReg(hp: paicpu; reg1,reg2: tregister);
var
opCount: longint;
begin
for opCount := 0 to hp^.ops-1 do
switchOp(hp^.oper[opCount],reg1,reg2);
end;
procedure doFirstSwitch(p: paicpu; reg1, reg2: tregister);
var
tmpRef: treference;
begin
case p^.opcode of
A_MOV,A_MOVZX,A_MOVSX,A_LEA:
begin
changeOp(p^.oper[1],reg1,reg2);
changeOp(p^.oper[0],reg2,reg1);
end;
A_IMUL:
begin
p^.ops := 3;
p^.loadreg(2,p^.oper[1].reg);
changeOp(p^.oper[2],reg1,reg2);
end;
A_INC,A_DEC:
begin
reset_reference(tmpref);
tmpref.base := reg1;
case p^.opcode of
A_INC:
tmpref.offset := 1;
A_DEC:
tmpref.offset := -1;
end;
p^.ops := 2;
p^.opcode := A_LEA;
p^.loadreg(1,reg2);
p^.loadref(0,newreference(tmpref));
end;
A_SUB,A_ADD:
begin
reset_reference(tmpref);
tmpref.base := reg1;
case p^.oper[0].typ of
top_const:
begin
tmpref.offset := p^.oper[0].val;
if p^.opcode = A_SUB then
tmpref.offset := - tmpRef.offset;
end;
top_symbol:
tmpref.symbol := p^.oper[0].sym;
top_reg:
begin
tmpref.index := p^.oper[0].reg;
tmpref.scalefactor := 1;
end;
else internalerror(200010031);
end;
p^.opcode := A_LEA;
p^.loadref(0,newreference(tmpref));
p^.loadreg(1,reg2);
end;
A_SHL:
begin
reset_reference(tmpref);
tmpref.base := reg1;
tmpref.scalefactor := 1 shl p^.oper[0].val;
p^.opcode := A_LEA;
p^.loadref(0,newreference(tmpref));
p^.loadreg(1,reg2);
end;
else internalerror(200010032);
end;
end;
function switchRegs(asml: paasmoutput; reg1, reg2: tregister; start: pai): Boolean;
{ change movl %reg1,%reg2 ... bla ... to ... bla with reg1 and reg2 switched }
var
endP, hp: pai;
switchDone, switchLast, tmpResult, sequenceEnd, reg1Modified, reg2Modified: boolean;
reg1StillUsed, reg2StillUsed, isInstruction: boolean;
begin
switchRegs := false;
tmpResult := true;
sequenceEnd := false;
reg1Modified := false;
reg2Modified := false;
endP := start;
while tmpResult and not sequenceEnd do
begin
tmpResult :=
getNextInstruction(endP,endP);
If tmpResult and
not ppaiprop(endP^.optinfo)^.canBeRemoved then
begin
{ if the newReg gets stored back to the oldReg, we can change }
{ "mov %oldReg,%newReg; <operations on %newReg>; mov %newReg, }
{ %oldReg" to "<operations on %oldReg>" }
switchLast := storeBack(endP,reg1,reg2);
reg1StillUsed := reg1 in ppaiprop(endP^.optinfo)^.usedregs;
reg2StillUsed := reg2 in ppaiprop(endP^.optinfo)^.usedregs;
isInstruction := endP^.typ = ait_instruction;
sequenceEnd :=
switchLast or
{ if both registers are released right before an instruction }
{ that contains hardcoded regs, it's ok too }
(not reg1StillUsed and not reg2StillUsed) or
{ no support for (i)div, mul and imul with hardcoded operands }
(((not isInstruction) or
noHardCodedRegs(paicpu(endP),reg1,reg2)) and
(not reg1StillUsed or
(isInstruction and findRegDealloc(reg1,endP) and
regLoadedWithNewValue(reg1,false,paicpu(endP)))) and
(not reg2StillUsed or
(isInstruction and findRegDealloc(reg2,endP) and
regLoadedWithNewValue(reg2,false,paicpu(endP)))));
{ we can't switch reg1 and reg2 in something like }
{ movl %reg1,%reg2 }
{ movl (%reg2),%reg2 }
{ movl 4(%reg1),%reg1 }
if reg2Modified and not(reg1Modified) and
regReadByInstruction(reg1,endP) then
begin
tmpResult := false;
break
end;
if not reg1Modified then
begin
reg1Modified := regModifiedByInstruction(reg1,endP);
if reg1Modified and not canBeFirstSwitch(paicpu(endP),reg1) then
begin
tmpResult := false;
break;
end;
end;
if not reg2Modified then
reg2Modified := regModifiedByInstruction(reg2,endP);
if sequenceEnd then
break;
tmpResult :=
(endP^.typ <> ait_label) and
((not isInstruction) or
(NoHardCodedRegs(paicpu(endP),reg1,reg2) and
RegSizesOk(reg1,reg2,paicpu(endP))));
end;
end;
if tmpResult and sequenceEnd then
begin
switchRegs := true;
reg1Modified := false;
reg2Modified := false;
getNextInstruction(start,hp);
while hp <> endP do
begin
if (not ppaiprop(hp^.optinfo)^.canberemoved) and
(hp^.typ = ait_instruction) then
begin
switchDone := false;
if not reg1Modified then
begin
reg1Modified := regModifiedByInstruction(reg1,hp);
if reg1Modified then
begin
doFirstSwitch(paicpu(hp),reg1,reg2);
switchDone := true;
end;
end;
if not switchDone then
if reg1Modified then
doSwitchReg(paicpu(hp),reg1,reg2)
else
doReplaceReg(paicpu(hp),reg2,reg1);
end;
getNextInstruction(hp,hp);
end;
if switchLast then
doSwitchReg(paicpu(hp),reg1,reg2)
else getLastInstruction(hp,hp);
allocRegBetween(asmL,reg1,start,hp);
allocRegBetween(asmL,reg2,start,hp);
end;
end;
procedure doRenaming(asml: paasmoutput; first, last: pai);
var
p: pai;
begin
p := First;
SkipHead(p);
while p <> last do
begin
case p^.typ of
ait_instruction:
begin
case paicpu(p)^.opcode of
A_MOV:
begin
if not(ppaiprop(p^.optinfo)^.canBeRemoved) and
(paicpu(p)^.oper[0].typ = top_reg) and
(paicpu(p)^.oper[1].typ = top_reg) and
(paicpu(p)^.opsize = S_L) and
(paicpu(p)^.oper[0].reg in (usableregs+[R_EDI])) and
(paicpu(p)^.oper[1].reg in (usableregs+[R_EDI])) then
if switchRegs(asml,paicpu(p)^.oper[0].reg,
paicpu(p)^.oper[1].reg,p) then
begin
{ getnextinstruction(p,hp);
asmL^.remove(p);
dispose(p,done);
p := hp;
continue }
ppaiprop(p^.optinfo)^.canBeRemoved := true;
end;
end;
end;
end;
end;
getNextInstruction(p,p);
end;
end;
End.
{
$Log$
Revision 1.1 2000-10-24 10:40:54 jonas
+ register renaming ("fixes" bug1088)
* changed command line options meanings for optimizer:
O2 now means peepholopts, CSE and register renaming in 1 pass
O3 is the same, but repeated until no further optimizations are
possible or until 5 passes have been done (to avoid endless loops)
* changed aopt386 so it does this looping
* added some procedures from csopt386 to the interface because they're
used by rropt386 as well
* some changes to csopt386 and daopt386 so that newly added instructions
by the CSE get optimizer info (they were simply skipped previously),
this fixes some bugs
}

View File

@ -557,7 +557,7 @@ const
option_info=11024;
option_help_pages=11025;
MsgTxtSize = 31210;
MsgTxtSize = 31225;
MsgIdxMax : array[1..20] of longint=(
17,58,165,34,41,41,86,14,35,40,

View File

@ -728,37 +728,37 @@ const msgtxt : array[0..000130,1..240] of char=(
'3*2Ou_enable uncertain optimizations (see docs)'#010+
'3*2O1_level 1 optimizat','ions (quick optimizations)'#010+
'3*2O2_level 2 optimizations (-O1 + slower optimizations)'#010+
'3*2O3_level 3 optimizations (same as -O2u)'#010+
'3*2O3_level 3 optimizations (-O2 repeatedly, max 5 times)'#010+
'3*2Op<x>_target processor:'#010+
'3*3Op1_set target processor to 386/486'#010+
'3*3Op2_set target processor to Pentium/PentiumM','MX (tm)'#010+
'3*3Op2_set target processor to P','entium/PentiumMMX (tm)'#010+
'3*3Op3_set target processor to PPro/PII/c6x86/K6 (tm)'#010+
'3*1T<x>_Target operating system:'#010+
'3*2TGO32V1_version 1 of DJ Delorie DOS extender'#010+
'3*2TGO32V2_version 2 of DJ Delorie DOS extender'#010+
'3*2TLINUX_Linux'#010+
'3*2Tnetware_Novell Netware Module',' (experimental)'#010+
'3*2Tnetware_Novell',' Netware Module (experimental)'#010+
'3*2TOS2_OS/2 2.x'#010+
'3*2TWin32_Windows 32 Bit'#010+
'3*1W<x>_Win32 target options'#010+
'3*2WB<x>_Set Image base to Hexadecimal <x> value'#010+
'3*2WC_Specify console type application'#010+
'3*2WD_Use DEFFILE to export functions of DLL or EXE'#010+
'3*2WG_Specify',' graphic type application'#010+
'3*2WD_Use DEFFILE to export functions of DLL or EX','E'#010+
'3*2WG_Specify graphic type application'#010+
'3*2WN_Do not generate relocation code (necessary for debugging)'#010+
'3*2WR_Generate relocation code'#010+
'6*1A<x>_output format'#010+
'6*2Aas_Unix o-file using GNU AS'#010+
'6*2Agas_GNU Motorola assembler'#010+
'6*2Amit_MIT Syntax (old GAS)'#010+
'6*2Am','ot_Standard Motorola assembler'#010+
'6*2Amit_MIT Syntax ','(old GAS)'#010+
'6*2Amot_Standard Motorola assembler'#010+
'6*1O_optimizations:'#010+
'6*2Oa_turn on the optimizer'#010+
'6*2Og_generate smaller code'#010+
'6*2OG_generate faster code (default)'#010+
'6*2Ox_optimize maximum (still BUGGY!!!)'#010+
'6*2O2_set target processor to a MC68020+'#010+
'6*1R<x>_assembl','er reading style:'#010+
'6*2O2_set target processor to a MC68020+'#010,
'6*1R<x>_assembler reading style:'#010+
'6*2RMOT_read motorola style assembler'#010+
'6*1T<x>_Target operating system:'#010+
'6*2TAMIGA_Commodore Amiga'#010+
@ -767,5 +767,5 @@ const msgtxt : array[0..000130,1..240] of char=(
'6*2TLINUX_Linux-68k'#010+
'**1*_'#010+
'**1?_shows this help'#010+
'**1h_shows this help withou','t waiting'#000
'**1h_shows t','his help without waiting'#000
);

View File

@ -58,10 +58,10 @@ begin
'g' : initglobalswitches:=initglobalswitches+[cs_littlesize];
'G' : initglobalswitches:=initglobalswitches-[cs_littlesize];
'r' : initglobalswitches:=initglobalswitches+[cs_regalloc];
'u' : initglobalswitches:=initglobalswitches+[cs_optimize,cs_uncertainopts];
'1' : initglobalswitches:=initglobalswitches-[cs_slowoptimize,cs_uncertainopts]+[cs_optimize,cs_fastoptimize];
'2' : initglobalswitches:=initglobalswitches-[cs_uncertainopts]+[cs_optimize,cs_fastoptimize,cs_slowoptimize];
'3' : initglobalswitches:=initglobalswitches+[cs_optimize,cs_fastoptimize,cs_slowoptimize,cs_uncertainopts];
'u' : initglobalswitches:=initglobalswitches+[cs_uncertainopts];
'1' : initglobalswitches:=initglobalswitches-[cs_fastoptimize,cs_slowoptimize]+[cs_optimize];
'2' : initglobalswitches:=initglobalswitches-[cs_slowoptimize]+[cs_optimize,cs_fastoptimize];
'3' : initglobalswitches:=initglobalswitches+[cs_optimize,cs_fastoptimize,cs_slowoptimize];
'p' :
Begin
If j < Length(Opt) Then
@ -115,7 +115,20 @@ end;
end.
{
$Log$
Revision 1.5 2000-09-24 15:06:20 peter
Revision 1.6 2000-10-24 10:40:53 jonas
+ register renaming ("fixes" bug1088)
* changed command line options meanings for optimizer:
O2 now means peepholopts, CSE and register renaming in 1 pass
O3 is the same, but repeated until no further optimizations are
possible or until 5 passes have been done (to avoid endless loops)
* changed aopt386 so it does this looping
* added some procedures from csopt386 to the interface because they're
used by rropt386 as well
* some changes to csopt386 and daopt386 so that newly added instructions
by the CSE get optimizer info (they were simply skipped previously),
this fixes some bugs
Revision 1.5 2000/09/24 15:06:20 peter
* use defines.inc
Revision 1.4 2000/08/27 16:11:51 peter