* i386 peephole assembler uses largely the common peephole optimizer infrastructure, the resulting code is besides a few improvements the same

git-svn-id: trunk@33542 -
This commit is contained in:
florian 2016-04-21 20:14:01 +00:00
parent 4ac3953c34
commit 3c2dab9878
11 changed files with 732 additions and 3172 deletions

6
.gitattributes vendored
View File

@ -188,7 +188,9 @@ compiler/hlcgobj.pas svneol=native#text/plain
compiler/html/i386/readme.txt svneol=native#text/plain compiler/html/i386/readme.txt svneol=native#text/plain
compiler/html/powerpc/readme.txt svneol=native#text/plain compiler/html/powerpc/readme.txt svneol=native#text/plain
compiler/htypechk.pas svneol=native#text/plain compiler/htypechk.pas svneol=native#text/plain
compiler/i386/aopt386.pas svneol=native#text/plain compiler/i386/aoptcpu.pas svneol=native#text/plain
compiler/i386/aoptcpub.pas svneol=native#text/plain
compiler/i386/aoptcpud.pas svneol=native#text/plain
compiler/i386/cgcpu.pas svneol=native#text/plain compiler/i386/cgcpu.pas svneol=native#text/plain
compiler/i386/cpubase.inc svneol=native#text/plain compiler/i386/cpubase.inc svneol=native#text/plain
compiler/i386/cpuelf.pas svneol=native#text/plain compiler/i386/cpuelf.pas svneol=native#text/plain
@ -197,7 +199,6 @@ compiler/i386/cpunode.pas svneol=native#text/plain
compiler/i386/cpupara.pas svneol=native#text/plain compiler/i386/cpupara.pas svneol=native#text/plain
compiler/i386/cpupi.pas svneol=native#text/plain compiler/i386/cpupi.pas svneol=native#text/plain
compiler/i386/cputarg.pas svneol=native#text/plain compiler/i386/cputarg.pas svneol=native#text/plain
compiler/i386/daopt386.pas svneol=native#text/plain
compiler/i386/hlcgcpu.pas svneol=native#text/plain compiler/i386/hlcgcpu.pas svneol=native#text/plain
compiler/i386/i386att.inc svneol=native#text/plain compiler/i386/i386att.inc svneol=native#text/plain
compiler/i386/i386atts.inc svneol=native#text/plain compiler/i386/i386atts.inc svneol=native#text/plain
@ -214,7 +215,6 @@ compiler/i386/n386ld.pas svneol=native#text/plain
compiler/i386/n386mat.pas svneol=native#text/plain compiler/i386/n386mat.pas svneol=native#text/plain
compiler/i386/n386mem.pas svneol=native#text/plain compiler/i386/n386mem.pas svneol=native#text/plain
compiler/i386/n386set.pas svneol=native#text/plain compiler/i386/n386set.pas svneol=native#text/plain
compiler/i386/popt386.pas svneol=native#text/plain
compiler/i386/r386ari.inc svneol=native#text/plain compiler/i386/r386ari.inc svneol=native#text/plain
compiler/i386/r386att.inc svneol=native#text/plain compiler/i386/r386att.inc svneol=native#text/plain
compiler/i386/r386con.inc svneol=native#text/plain compiler/i386/r386con.inc svneol=native#text/plain

View File

@ -40,7 +40,7 @@ Unit aopt;
Constructor create(_AsmL: TAsmList); virtual; reintroduce; Constructor create(_AsmL: TAsmList); virtual; reintroduce;
{ call the necessary optimizer procedures } { call the necessary optimizer procedures }
Procedure Optimize; Procedure Optimize;virtual;
Destructor destroy;override; Destructor destroy;override;
private private
@ -50,6 +50,7 @@ Unit aopt;
Also fixes some RegDeallocs like "# %eax released; push (%eax)" } Also fixes some RegDeallocs like "# %eax released; push (%eax)" }
Procedure BuildLabelTableAndFixRegAlloc; Procedure BuildLabelTableAndFixRegAlloc;
procedure clear; procedure clear;
protected
procedure pass_1; procedure pass_1;
End; End;
TAsmOptimizerClass = class of TAsmOptimizer; TAsmOptimizerClass = class of TAsmOptimizer;

View File

@ -339,10 +339,10 @@ Unit AoptObj;
procedure RemoveDelaySlot(hp1: tai); procedure RemoveDelaySlot(hp1: tai);
{ peephole optimizer } { peephole optimizer }
procedure PrePeepHoleOpts; procedure PrePeepHoleOpts; virtual;
procedure PeepHoleOptPass1; procedure PeepHoleOptPass1; virtual;
procedure PeepHoleOptPass2; virtual; procedure PeepHoleOptPass2; virtual;
procedure PostPeepHoleOpts; procedure PostPeepHoleOpts; virtual;
{ processor dependent methods } { processor dependent methods }
// if it returns true, perform a "continue" // if it returns true, perform a "continue"

View File

@ -1,118 +0,0 @@
{
Copyright (c) 1998-2002 by Jonas Maebe
This unit calls the optimization procedures to optimize the assembler
code for i386+
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
****************************************************************************
}
Unit aopt386;
{$i fpcdefs.inc}
Interface
Uses
aasmbase,aasmtai,aasmdata,aasmcpu;
Procedure Optimize(AsmL: TAsmList);
Implementation
Uses
globtype,
globals,
DAOpt386,POpt386;
Procedure Optimize(AsmL: TAsmList);
Var
BlockStart, BlockEnd, HP: Tai;
pass: longint;
slowopt, changed, lastLoop: boolean;
Begin
slowopt := (cs_opt_level3 in current_settings.optimizerswitches);
pass := 0;
changed := false;
dfa := TDFAObj.create(asml);
repeat
lastLoop :=
not(slowopt) or
(not changed and (pass > 2)) or
{ prevent endless loops }
(pass = 4);
changed := false;
{ Setup labeltable, always necessary }
blockstart := tai(asml.first);
blockend := dfa.pass_1(blockstart);
{ Blockend now either contains an ait_marker with Kind = mark_AsmBlockStart, }
{ or nil }
While Assigned(BlockStart) Do
Begin
if (cs_opt_peephole in current_settings.optimizerswitches) then
begin
if (pass = 0) then
PrePeepHoleOpts(AsmL, BlockStart, BlockEnd);
{ Peephole optimizations }
PeepHoleOptPass1(AsmL, BlockStart, BlockEnd);
{ Only perform them twice in the first pass }
if pass = 0 then
PeepHoleOptPass1(AsmL, BlockStart, BlockEnd);
end;
{ More peephole optimizations }
if (cs_opt_peephole in current_settings.optimizerswitches) then
begin
PeepHoleOptPass2(AsmL, BlockStart, BlockEnd);
if lastLoop then
PostPeepHoleOpts(AsmL, BlockStart, BlockEnd);
end;
{ Free memory }
dfa.clear;
{ Continue where we left off, BlockEnd is either the start of an }
{ assembler block or nil }
BlockStart := BlockEnd;
While Assigned(BlockStart) And
(BlockStart.typ = ait_Marker) And
(Tai_Marker(BlockStart).Kind = mark_AsmBlockStart) Do
Begin
{ We stopped at an assembler block, so skip it }
Repeat
BlockStart := Tai(BlockStart.Next);
Until (BlockStart.Typ = Ait_Marker) And
(Tai_Marker(Blockstart).Kind = mark_AsmBlockEnd);
{ Blockstart now contains a Tai_marker(mark_AsmBlockEnd) }
If GetNextInstruction(BlockStart, HP) And
((HP.typ <> ait_Marker) Or
(Tai_Marker(HP).Kind <> mark_AsmBlockStart)) Then
{ There is no assembler block anymore after the current one, so }
{ optimize the next block of "normal" instructions }
BlockEnd := dfa.pass_1(blockstart)
{ Otherwise, skip the next assembler block }
else
blockStart := hp;
End;
End;
inc(pass);
until lastLoop;
dfa.free;
End;
End.

View File

@ -1,7 +1,7 @@
{ {
Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
This unit contains the peephole optimizer. This unit contains the peephole optimizer for i386
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -19,37 +19,53 @@
**************************************************************************** ****************************************************************************
} }
unit popt386;
unit aoptcpu;
{$i fpcdefs.inc} {$i fpcdefs.inc}
{ $define DEBUG_AOPTCPU} { $define DEBUG_AOPTCPU}
interface Interface
uses Aasmbase,aasmtai,aasmdata,aasmcpu,verbose; uses
cgbase,
cpubase, aoptobj, aoptcpub, aopt, aoptx86,
Aasmbase,aasmtai,aasmdata;
procedure PrePeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai); Type
procedure PeepHoleOptPass1(asml: TAsmList; BlockStart, BlockEnd: tai); TCpuAsmOptimizer = class(TX86AsmOptimizer)
procedure PeepHoleOptPass2(asml: TAsmList; BlockStart, BlockEnd: tai); procedure Optimize; override;
procedure PostPeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai); procedure PrePeepHoleOpts; override;
procedure PeepHoleOptPass1; override;
procedure PeepHoleOptPass2; override;
procedure PostPeepHoleOpts; override;
function IsExitCode(p : tai) : boolean;
function DoFpuLoadStoreOpt(var p : tai) : boolean;
procedure RemoveLastDeallocForFuncRes(p : tai);
procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
function InstructionLoadsFromReg(const reg : TRegister;const hp : tai) : boolean;override;
end;
implementation Var
AsmOptimizer : TCpuAsmOptimizer;
uses Implementation
cutils,globtype,systems,
globals,cgbase,procinfo,
symsym,
{$ifdef finaldestdebug}
cobjects,
{$endif finaldestdebug}
cpuinfo,cpubase,cgutils,daopt386,
cgx86,
aoptx86;
uses
verbose,globtype,globals,
cutils,
aoptbase,
cpuinfo,
aasmcpu,
procinfo,
cgutils,cgx86,
{ units we should get rid off: }
symsym,symconst;
function isFoldableArithOp(hp1: taicpu; reg: tregister): boolean; function isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
begin begin
isFoldableArithOp := False; isFoldableArithOp := False;
case hp1.opcode of case hp1.opcode of
A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR: A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
@ -64,23 +80,10 @@ begin
(taicpu(hp1).oper[0]^.typ = top_reg) and (taicpu(hp1).oper[0]^.typ = top_reg) and
(taicpu(hp1).oper[0]^.reg = reg); (taicpu(hp1).oper[0]^.reg = reg);
end; end;
end; end;
function RegUsedAfterInstruction(reg: Tregister; p: tai; var UsedRegs: TRegSet): Boolean; function TCpuAsmOptimizer.IsExitCode(p : tai) : boolean;
var
supreg: tsuperregister;
begin
supreg := getsupreg(reg);
UpdateUsedRegs(UsedRegs, tai(p.Next));
RegUsedAfterInstruction :=
(supreg in UsedRegs) and
(not(getNextInstruction(p,p)) or
not(regLoadedWithNewValue(supreg,false,p)));
end;
function IsExitCode(p : tai) : boolean;
var var
hp2,hp3 : tai; hp2,hp3 : tai;
begin begin
@ -109,11 +112,52 @@ function IsExitCode(p : tai) : boolean;
end; end;
function doFpuLoadStoreOpt(asmL: TAsmList; var p: tai): boolean; procedure TCPUAsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
{ returns true if a "continue" should be done after this optimization }
var hp1, hp2: tai; procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
begin var
doFpuLoadStoreOpt := false; hp2: tai;
begin
hp2 := p;
repeat
hp2 := tai(hp2.previous);
if assigned(hp2) and
(hp2.typ = ait_regalloc) and
(tai_regalloc(hp2).ratype=ra_dealloc) and
(getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
(getsupreg(tai_regalloc(hp2).reg) = supreg) then
begin
asml.remove(hp2);
hp2.free;
break;
end;
until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
end;
begin
case current_procinfo.procdef.returndef.typ of
arraydef,recorddef,pointerdef,
stringdef,enumdef,procdef,objectdef,errordef,
filedef,setdef,procvardef,
classrefdef,forwarddef:
DoRemoveLastDeallocForFuncRes(RS_EAX);
orddef:
if current_procinfo.procdef.returndef.size <> 0 then
begin
DoRemoveLastDeallocForFuncRes(RS_EAX);
{ for int64/qword }
if current_procinfo.procdef.returndef.size = 8 then
DoRemoveLastDeallocForFuncRes(RS_EDX);
end;
end;
end;
function TCPUAsmoptimizer.DoFpuLoadStoreOpt(var p: tai): boolean;
{ returns true if a "continue" should be done after this optimization }
var hp1, hp2: tai;
begin
DoFpuLoadStoreOpt := false;
if (taicpu(p).oper[0]^.typ = top_ref) and if (taicpu(p).oper[0]^.typ = top_ref) and
getNextInstruction(p, hp1) and getNextInstruction(p, hp1) and
(hp1.typ = ait_instruction) and (hp1.typ = ait_instruction) and
@ -140,7 +184,7 @@ begin
p.free; p.free;
hp1.free; hp1.free;
p := hp2; p := hp2;
removeLastDeallocForFuncRes(asmL, p); removeLastDeallocForFuncRes(p);
doFPULoadStoreOpt := true; doFPULoadStoreOpt := true;
end end
(* can't be done because the store operation rounds (* can't be done because the store operation rounds
@ -157,7 +201,198 @@ begin
end end
*) *)
end; end;
end; end;
{ allocates register reg between (and including) instructions p1 and p2
the type of p1 and p2 must not be in SkipInstr
note that this routine is both called from the peephole optimizer
where optinfo is not yet initialised) and from the cse (where it is) }
procedure TCpuAsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
var
hp, start: tai;
removedsomething,
firstRemovedWasAlloc,
lastRemovedWasDealloc: boolean;
begin
{$ifdef EXTDEBUG}
if assigned(p1.optinfo) and
(ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
internalerror(2004101010);
{$endif EXTDEBUG}
start := p1;
if (reg = NR_ESP) or
(reg = current_procinfo.framepointer) or
not(assigned(p1)) then
{ this happens with registers which are loaded implicitely, outside the }
{ current block (e.g. esi with self) }
exit;
{ make sure we allocate it for this instruction }
getnextinstruction(p2,p2);
lastRemovedWasDealloc := false;
removedSomething := false;
firstRemovedWasAlloc := false;
{$ifdef allocregdebug}
hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
' from here...'));
insertllitem(asml,p1.previous,p1,hp);
hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
' till here...'));
insertllitem(asml,p2,p2.next,hp);
{$endif allocregdebug}
if not(RegInUsedRegs(reg,initialusedregs)) then
begin
hp := tai_regalloc.alloc(reg,nil);
insertllItem(p1.previous,p1,hp);
IncludeRegInUsedRegs(reg,initialusedregs);
end;
while assigned(p1) and
(p1 <> p2) do
begin
if assigned(p1.optinfo) then
internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
p1 := tai(p1.next);
repeat
while assigned(p1) and
(p1.typ in (SkipInstr-[ait_regalloc])) Do
p1 := tai(p1.next);
{ remove all allocation/deallocation info about the register in between }
if assigned(p1) and
(p1.typ = ait_regalloc) then
if tai_regalloc(p1).reg=reg then
begin
if not removedSomething then
begin
firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
removedSomething := true;
end;
lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
hp := tai(p1.Next);
asml.Remove(p1);
p1.free;
p1 := hp;
end
else p1 := tai(p1.next);
until not(assigned(p1)) or
not(p1.typ in SkipInstr);
end;
if assigned(p1) then
begin
if firstRemovedWasAlloc then
begin
hp := tai_regalloc.Alloc(reg,nil);
insertLLItem(start.previous,start,hp);
end;
if lastRemovedWasDealloc then
begin
hp := tai_regalloc.DeAlloc(reg,nil);
insertLLItem(p1.previous,p1,hp);
end;
end;
end;
{ converts a TChange variable to a TRegister }
function tch2reg(ch: tinschange): tsuperregister;
const
ch2reg: array[CH_REAX..CH_REDI] of tsuperregister = (RS_EAX,RS_ECX,RS_EDX,RS_EBX,RS_ESP,RS_EBP,RS_ESI,RS_EDI);
begin
if (ch <= CH_REDI) then
tch2reg := ch2reg[ch]
else if (ch <= CH_WEDI) then
tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_REDI))]
else if (ch <= CH_RWEDI) then
tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_WEDI))]
else if (ch <= CH_MEDI) then
tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_RWEDI))]
else
InternalError(2016041901)
end;
{ Checks if the register is a 32 bit general purpose register }
function isgp32reg(reg: TRegister): boolean;
begin
{$push}{$warnings off}
isgp32reg:=(getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)>=RS_EAX) and (getsupreg(reg)<=RS_EBX);
{$pop}
end;
function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
begin
Result:=RegReadByInstruction(reg,hp);
end;
function TCpuAsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
var
p: taicpu;
opcount: longint;
begin
RegReadByInstruction := false;
if hp.typ <> ait_instruction then
exit;
p := taicpu(hp);
case p.opcode of
A_CALL:
regreadbyinstruction := true;
A_IMUL:
case p.ops of
1:
regReadByInstruction :=
(reg = NR_EAX) or RegInOp(reg,p.oper[0]^);
2,3:
regReadByInstruction :=
reginop(reg,p.oper[0]^) or
reginop(reg,p.oper[1]^);
end;
A_IDIV,A_DIV,A_MUL:
begin
regReadByInstruction :=
RegInOp(reg,p.oper[0]^) or (getsupreg(reg) in [RS_EAX,RS_EDX]);
end;
else
begin
for opcount := 0 to p.ops-1 do
if (p.oper[opCount]^.typ = top_ref) and
RegInRef(reg,p.oper[opcount]^.ref^) then
begin
RegReadByInstruction := true;
exit
end;
for opcount := 1 to maxinschanges do
case insprop[p.opcode].ch[opcount] of
CH_REAX..CH_REDI,CH_RWEAX..CH_MEDI:
if getsupreg(reg) = tch2reg(insprop[p.opcode].ch[opcount]) then
begin
RegReadByInstruction := true;
exit
end;
CH_RWOP1,CH_ROP1,CH_MOP1:
if reginop(reg,p.oper[0]^) then
begin
RegReadByInstruction := true;
exit
end;
Ch_RWOP2,Ch_ROP2,Ch_MOP2:
if reginop(reg,p.oper[1]^) then
begin
RegReadByInstruction := true;
exit
end;
Ch_RWOP3,Ch_ROP3,Ch_MOP3:
if reginop(reg,p.oper[2]^) then
begin
RegReadByInstruction := true;
exit
end;
end;
end;
end;
end;
{ returns true if p contains a memory operand with a segment set } { returns true if p contains a memory operand with a segment set }
@ -174,7 +409,26 @@ begin
end; end;
procedure PrePeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai); function InstrReadsFlags(p: tai): boolean;
var
l: longint;
begin
InstrReadsFlags := true;
case p.typ of
ait_instruction:
begin
for l := 1 to maxinschanges do
if InsProp[taicpu(p).opcode].Ch[l] in [Ch_RFlags,Ch_RWFlags,Ch_All] then
exit;
end;
ait_label:
exit;
end;
InstrReadsFlags := false;
end;
procedure TCPUAsmOptimizer.PrePeepHoleOpts;
var var
p,hp1: tai; p,hp1: tai;
l: aint; l: aint;
@ -212,7 +466,7 @@ begin
{change "imul $1, reg1, reg2" to "mov reg1, reg2"} {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
begin begin
hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg); hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
InsertLLItem(asml, p.previous, p.next, hp1); InsertLLItem(p.previous, p.next, hp1);
p.free; p.free;
p := hp1; p := hp1;
end end
@ -241,7 +495,7 @@ begin
hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg) hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
else else
hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg); hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
InsertLLItem(asml,p.previous, p.next, hp1); InsertLLItem(p.previous, p.next, hp1);
p.free; p.free;
p := hp1; p := hp1;
end; end;
@ -257,7 +511,7 @@ begin
hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg) hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
else else
hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg); hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
InsertLLItem(asml,p.previous, p.next, hp1); InsertLLItem(p.previous, p.next, hp1);
p.free; p.free;
p := hp1; p := hp1;
end; end;
@ -282,7 +536,7 @@ begin
hp1 := taicpu.op_reg_reg(A_ADD, S_L, hp1 := taicpu.op_reg_reg(A_ADD, S_L,
taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg); taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
end; end;
InsertLLItem(asml,p, p.next, hp1); InsertLLItem(p, p.next, hp1);
reference_reset(tmpref,2); reference_reset(tmpref,2);
TmpRef.index := taicpu(p).oper[1]^.reg; TmpRef.index := taicpu(p).oper[1]^.reg;
TmpRef.ScaleFactor := 2; TmpRef.ScaleFactor := 2;
@ -297,7 +551,7 @@ begin
TmpRef.base := taicpu(p).oper[1]^.reg; TmpRef.base := taicpu(p).oper[1]^.reg;
hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg); hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
end; end;
InsertLLItem(asml,p.previous, p.next, hp1); InsertLLItem(p.previous, p.next, hp1);
p.free; p.free;
p := tai(hp1.next); p := tai(hp1.next);
end end
@ -314,7 +568,7 @@ begin
hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg) hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
else else
hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg); hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
InsertLLItem(asml,p.previous, p.next, hp1); InsertLLItem(p.previous, p.next, hp1);
p.free; p.free;
p := hp1; p := hp1;
end; end;
@ -333,7 +587,7 @@ begin
else else
hp1 := taicpu.op_reg_reg(A_ADD, S_L, hp1 := taicpu.op_reg_reg(A_ADD, S_L,
taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg); taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
InsertLLItem(asml,p, p.next, hp1); InsertLLItem(p, p.next, hp1);
TmpRef.base := taicpu(p).oper[1]^.reg; TmpRef.base := taicpu(p).oper[1]^.reg;
TmpRef.index := taicpu(p).oper[1]^.reg; TmpRef.index := taicpu(p).oper[1]^.reg;
TmpRef.ScaleFactor := 4; TmpRef.ScaleFactor := 4;
@ -341,7 +595,7 @@ begin
hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg) hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
else else
hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg); hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
InsertLLItem(asml,p.previous, p.next, hp1); InsertLLItem(p.previous, p.next, hp1);
p.free; p.free;
p := tai(hp1.next); p := tai(hp1.next);
end end
@ -369,7 +623,7 @@ begin
TmpRef.ScaleFactor := 4; TmpRef.ScaleFactor := 4;
hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg); hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
end; end;
InsertLLItem(asml,p, p.next, hp1); InsertLLItem(p, p.next, hp1);
reference_reset(tmpref,2); reference_reset(tmpref,2);
TmpRef.index := taicpu(p).oper[1]^.reg; TmpRef.index := taicpu(p).oper[1]^.reg;
if (taicpu(p).ops = 3) then if (taicpu(p).ops = 3) then
@ -384,7 +638,7 @@ begin
TmpRef.ScaleFactor := 2; TmpRef.ScaleFactor := 2;
hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg); hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
end; end;
InsertLLItem(asml,p.previous, p.next, hp1); InsertLLItem(p.previous, p.next, hp1);
p.free; p.free;
p := tai(hp1.next); p := tai(hp1.next);
end end
@ -490,7 +744,7 @@ function SkipLabels(hp: tai; var hp2: tai): boolean;
{ First pass of peephole optimizations } { First pass of peephole optimizations }
procedure PeepHoleOptPass1(Asml: TAsmList; BlockStart, BlockEnd: tai); procedure TCPUAsmOPtimizer.PeepHoleOptPass1;
{$ifdef DEBUG_AOPTCPU} {$ifdef DEBUG_AOPTCPU}
procedure DebugMsg(const s: string;p : tai); procedure DebugMsg(const s: string;p : tai);
@ -517,7 +771,7 @@ var
TmpRef: TReference; TmpRef: TReference;
UsedRegs, TmpUsedRegs: TRegSet; TmpUsedRegs: TAllUsedRegs;
TmpBool1, TmpBool2: Boolean; TmpBool1, TmpBool2: Boolean;
@ -555,7 +809,7 @@ var
GetfinalDestination := false; GetfinalDestination := false;
if level > 20 then if level > 20 then
exit; exit;
p1 := dfa.getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol)); p1 := getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
if assigned(p1) then if assigned(p1) then
begin begin
SkipLabels(p1,p1); SkipLabels(p1,p1);
@ -595,7 +849,7 @@ var
strpnew('previous label inserted')))); strpnew('previous label inserted'))));
{$endif finaldestdebug} {$endif finaldestdebug}
current_asmdata.getjumplabel(l); current_asmdata.getjumplabel(l);
insertllitem(asml,p1,p1.next,tai_label.Create(l)); insertllitem(p1,p1.next,tai_label.Create(l));
tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs; tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
hp.oper[0]^.ref^.symbol := l; hp.oper[0]^.ref^.symbol := l;
l.increfs; l.increfs;
@ -666,7 +920,7 @@ var
begin begin
p := BlockStart; p := BlockStart;
UsedRegs := []; ClearUsedRegs;
while (p <> BlockEnd) Do while (p <> BlockEnd) Do
begin begin
UpDateUsedRegs(UsedRegs, tai(p.next)); UpDateUsedRegs(UsedRegs, tai(p.next));
@ -793,7 +1047,7 @@ begin
(hp1.typ = ait_instruction) and (hp1.typ = ait_instruction) and
(taicpu(hp1).is_jmp) and (taicpu(hp1).is_jmp) and
(taicpu(hp1).opcode<>A_JMP) and (taicpu(hp1).opcode<>A_JMP) and
not(getsupreg(taicpu(p).oper[1]^.reg) in UsedRegs) then not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
taicpu(p).opcode := A_TEST; taicpu(p).opcode := A_TEST;
end; end;
A_CMP: A_CMP:
@ -816,7 +1070,7 @@ begin
(hp1.typ=ait_instruction) and (hp1.typ=ait_instruction) and
(taicpu(hp1).opcode=A_Jcc) and (taicpu(hp1).opcode=A_Jcc) and
(Taicpu(hp1).condition in [C_E,C_NE]) and (Taicpu(hp1).condition in [C_E,C_NE]) and
not(getsupreg(Taicpu(p).oper[1]^.reg) in usedregs) then not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, UsedRegs)) then
begin begin
Taicpu(p).opcode:=A_NEG; Taicpu(p).opcode:=A_NEG;
Taicpu(p).loadoper(0,Taicpu(p).oper[1]^); Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
@ -976,7 +1230,7 @@ begin
end end
end; end;
A_FSTP,A_FISTP: A_FSTP,A_FISTP:
if doFpuLoadStoreOpt(asmL,p) then if doFpuLoadStoreOpt(p) then
continue; continue;
A_LEA: A_LEA:
begin begin
@ -994,7 +1248,7 @@ begin
begin begin
hp1 := taicpu.op_reg_reg(A_MOV, S_L,taicpu(p).oper[0]^.ref^.base, hp1 := taicpu.op_reg_reg(A_MOV, S_L,taicpu(p).oper[0]^.ref^.base,
taicpu(p).oper[1]^.reg); taicpu(p).oper[1]^.reg);
InsertLLItem(asml,p.previous,p.next, hp1); InsertLLItem(p.previous,p.next, hp1);
p.free; p.free;
p := hp1; p := hp1;
continue; continue;
@ -1074,7 +1328,6 @@ begin
end; end;
A_MOV: A_MOV:
begin begin
TmpUsedRegs := UsedRegs;
if (taicpu(p).oper[1]^.typ = top_reg) and if (taicpu(p).oper[1]^.typ = top_reg) and
(getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
GetNextInstruction(p, hp1) and GetNextInstruction(p, hp1) and
@ -1083,8 +1336,9 @@ begin
(taicpu(hp1).oper[0]^.typ = top_reg) and (taicpu(hp1).oper[0]^.typ = top_reg) and
(taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
begin begin
CopyUsedRegs(TmpUsedRegs);
{we have "mov x, %treg; mov %treg, y} {we have "mov x, %treg; mov %treg, y}
if not(RegInOp(getsupreg(taicpu(p).oper[1]^.reg),taicpu(hp1).oper[1]^)) and if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
{we've got "mov x, %treg; mov %treg, y; with %treg is not used after } {we've got "mov x, %treg; mov %treg, y; with %treg is not used after }
case taicpu(p).oper[0]^.typ Of case taicpu(p).oper[0]^.typ Of
@ -1095,6 +1349,7 @@ begin
taicpu(p).loadOper(1,taicpu(hp1).oper[1]^); taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
asml.remove(hp1); asml.remove(hp1);
hp1.free; hp1.free;
ReleaseUsedRegs(TmpUsedRegs);
continue; continue;
end; end;
top_ref: top_ref:
@ -1105,9 +1360,11 @@ begin
taicpu(p).loadoper(1,taicpu(hp1).oper[1]^); taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
asml.remove(hp1); asml.remove(hp1);
hp1.free; hp1.free;
ReleaseUsedRegs(TmpUsedRegs);
continue; continue;
end; end;
end end;
ReleaseUsedRegs(TmpUsedRegs);
end end
else else
{Change "mov %reg1, %reg2; xxx %reg2, ???" to {Change "mov %reg1, %reg2; xxx %reg2, ???" to
@ -1128,10 +1385,10 @@ begin
(taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
{we have "mov %reg1, %reg2; test/or %reg2, %reg2"} {we have "mov %reg1, %reg2; test/or %reg2, %reg2"}
begin begin
TmpUsedRegs := UsedRegs; CopyUsedRegs(TmpUsedRegs);
{ reg1 will be used after the first instruction, } { reg1 will be used after the first instruction, }
{ so update the allocation info } { so update the allocation info }
allocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs); AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
if GetNextInstruction(hp1, hp2) and if GetNextInstruction(hp1, hp2) and
(hp2.typ = ait_instruction) and (hp2.typ = ait_instruction) and
taicpu(hp2).is_jmp and taicpu(hp2).is_jmp and
@ -1144,6 +1401,7 @@ begin
asml.remove(p); asml.remove(p);
p.free; p.free;
p := hp1; p := hp1;
ReleaseUsedRegs(TmpUsedRegs);
continue continue
end end
else else
@ -1153,6 +1411,7 @@ begin
taicpu(hp1).loadoper(0,taicpu(p).oper[0]^); taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
taicpu(hp1).loadoper(1,taicpu(p).oper[0]^); taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
end; end;
ReleaseUsedRegs(TmpUsedRegs);
end end
{ else { else
if (taicpu(p.next)^.opcode if (taicpu(p.next)^.opcode
@ -1178,7 +1437,7 @@ begin
asml.remove(p); asml.remove(p);
p.free; p.free;
p := hp1; p := hp1;
RemoveLastDeallocForFuncRes(asmL,p); RemoveLastDeallocForFuncRes(p);
end end
else else
if (taicpu(p).oper[0]^.typ = top_reg) and if (taicpu(p).oper[0]^.typ = top_reg) and
@ -1190,7 +1449,7 @@ begin
{change "mov reg1, mem1; cmp x, mem1" to "mov reg, mem1; cmp x, reg1"} {change "mov reg1, mem1; cmp x, mem1" to "mov reg, mem1; cmp x, reg1"}
begin begin
taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg); taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
allocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs); AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
end; end;
{ Next instruction is also a MOV ? } { Next instruction is also a MOV ? }
if GetNextInstruction(p, hp1) and if GetNextInstruction(p, hp1) and
@ -1211,13 +1470,13 @@ begin
mov mem1/reg2, reg1 } mov mem1/reg2, reg1 }
begin begin
if (taicpu(p).oper[0]^.typ = top_reg) then if (taicpu(p).oper[0]^.typ = top_reg) then
AllocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs); AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
asml.remove(hp1); asml.remove(hp1);
hp1.free; hp1.free;
end end
else else
begin begin
TmpUsedRegs := UsedRegs; CopyUsedRegs(TmpUsedRegs);
UpdateUsedRegs(TmpUsedRegs, tai(hp1.next)); UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
if (taicpu(p).oper[1]^.typ = top_ref) and if (taicpu(p).oper[1]^.typ = top_ref) and
{ mov reg1, mem1 { mov reg1, mem1
@ -1243,11 +1502,12 @@ begin
taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^); taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg); taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
end; end;
ReleaseUsedRegs(TmpUsedRegs);
end; end;
end end
else else
begin begin
tmpUsedRegs := UsedRegs; CopyUsedRegs(TmpUsedRegs);
if GetNextInstruction(hp1, hp2) and if GetNextInstruction(hp1, hp2) and
(taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.typ = top_ref) and
(taicpu(p).oper[1]^.typ = top_reg) and (taicpu(p).oper[1]^.typ = top_reg) and
@ -1260,7 +1520,7 @@ begin
(taicpu(hp2).oper[1]^.typ = top_reg) and (taicpu(hp2).oper[1]^.typ = top_reg) and
(taicpu(hp2).oper[0]^.typ = top_ref) and (taicpu(hp2).oper[0]^.typ = top_ref) and
RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
if not regInRef(getsupreg(taicpu(hp2).oper[1]^.reg),taicpu(hp2).oper[0]^.ref^) and if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
{ mov mem1, %reg1 { mov mem1, %reg1
mov %reg1, mem2 mov %reg1, mem2
@ -1269,7 +1529,7 @@ begin
mov mem1, reg2 mov mem1, reg2
mov reg2, mem2} mov reg2, mem2}
begin begin
AllocRegBetween(asmL,taicpu(hp2).oper[1]^.reg,p,hp2,usedregs); AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
taicpu(p).loadoper(1,taicpu(hp2).oper[1]^); taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^); taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
asml.remove(hp2); asml.remove(hp2);
@ -1277,8 +1537,8 @@ begin
end end
else else
if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
not(RegInRef(getsupreg(taicpu(p).oper[1]^.reg),taicpu(p).oper[0]^.ref^)) and not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
not(RegInRef(getsupreg(taicpu(hp2).oper[1]^.reg),taicpu(hp2).oper[0]^.ref^)) then not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
{ mov mem1, reg1 mov mem1, reg1 { mov mem1, reg1 mov mem1, reg1
mov reg1, mem2 mov reg1, mem2 mov reg1, mem2 mov reg1, mem2
mov mem2, reg2 mov mem2, reg1 mov mem2, reg2 mov mem2, reg1
@ -1299,26 +1559,27 @@ begin
taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg); taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^); taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg); taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
allocRegBetween(asmL,taicpu(p).oper[1]^.reg,p,hp2,usedregs); AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
(getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
allocRegBetween(asmL,taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs); AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
(getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
allocRegBetween(asmL,taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs); AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
end end
else else
if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
begin begin
taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg); taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
allocRegBetween(asmL,taicpu(p).oper[1]^.reg,p,hp2,usedregs); AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
end end
else else
begin begin
asml.remove(hp2); asml.remove(hp2);
hp2.free; hp2.free;
end end;
end ReleaseUsedRegs(TmpUsedRegs);
end;
end end
else else
(* {movl [mem1],reg1 (* {movl [mem1],reg1
@ -1347,9 +1608,9 @@ begin
(taicpu(hp1).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[1]^.typ = top_reg) and
(taicpu(p).opsize = taicpu(hp1).opsize) and (taicpu(p).opsize = taicpu(hp1).opsize) and
RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
not(reginref(getsupreg(taicpu(hp1).oper[1]^.reg),taicpu(hp1).oper[0]^.ref^)) then not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
begin begin
allocregbetween(asml,taicpu(hp1).oper[1]^.reg,p,hp1,usedregs); allocregbetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg); taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^); taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg); taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
@ -1370,7 +1631,7 @@ begin
begin begin
Taicpu(hp2).opcode:=A_MOV; Taicpu(hp2).opcode:=A_MOV;
asml.remove(hp1); asml.remove(hp1);
insertllitem(asml,hp2,hp2.next,hp1); insertllitem(hp2,hp2.next,hp1);
asml.remove(p); asml.remove(p);
p.free; p.free;
p:=hp1; p:=hp1;
@ -1389,7 +1650,7 @@ begin
{mov reg1,ref {mov reg1,ref
lea reg2,[reg1,reg2] --> add reg2,ref} lea reg2,[reg1,reg2] --> add reg2,ref}
begin begin
TmpUsedRegs := UsedRegs; CopyUsedRegs(TmpUsedRegs);
{ reg1 may not be used afterwards } { reg1 may not be used afterwards }
if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
begin begin
@ -1400,6 +1661,7 @@ begin
p.free; p.free;
p:=hp1; p:=hp1;
end; end;
ReleaseUsedRegs(TmpUsedRegs);
end; end;
end; end;
@ -1419,11 +1681,7 @@ begin
(getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
((taicpu(hp1).ops=1) and ((taicpu(hp1).ops=1) and
(getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
{ reg2 must not be used after the sequence considered, so not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
it must be either deallocated or loaded with a new value }
(GetNextInstruction(hp2,hp3) and
(FindRegDealloc(getsupreg(taicpu(hp2).oper[0]^.reg),tai(hp3)) or
RegLoadedWithNewValue(getsupreg(taicpu(hp2).oper[0]^.reg), false, hp3))) then
{ change movsX/movzX reg/ref, reg2 } { change movsX/movzX reg/ref, reg2 }
{ add/sub/or/... reg3/$const, reg2 } { add/sub/or/... reg3/$const, reg2 }
{ mov reg2 reg/ref } { mov reg2 reg/ref }
@ -1782,7 +2040,7 @@ begin
else else
hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
taicpu(p).oper[1]^.reg); taicpu(p).oper[1]^.reg);
InsertLLItem(asml,p.previous, p.next, hp1); InsertLLItem(p.previous, p.next, hp1);
p.free; p.free;
p := hp1; p := hp1;
end; end;
@ -1798,7 +2056,7 @@ begin
begin begin
hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize, hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg); taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
InsertLLItem(asml,p.previous, p.next, hp1); InsertLLItem(p.previous, p.next, hp1);
p.free; p.free;
p := hp1; p := hp1;
end end
@ -1811,7 +2069,7 @@ begin
TmpRef.index := taicpu(p).oper[1]^.reg; TmpRef.index := taicpu(p).oper[1]^.reg;
TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val; TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg); hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
InsertLLItem(asml,p.previous, p.next, hp1); InsertLLItem(p.previous, p.next, hp1);
p.free; p.free;
p := hp1; p := hp1;
end end
@ -1863,8 +2121,8 @@ begin
hp1 := tai(p.next); hp1 := tai(p.next);
while Assigned(hp1) and while Assigned(hp1) and
(tai(hp1).typ in [ait_instruction]+SkipInstr) and (tai(hp1).typ in [ait_instruction]+SkipInstr) and
not regReadByInstruction(RS_ESP,hp1) and not RegReadByInstruction(NR_ESP,hp1) and
not regModifiedByInstruction(RS_ESP,hp1) do not RegModifiedByInstruction(NR_ESP,hp1) do
hp1 := tai(hp1.next); hp1 := tai(hp1.next);
if Assigned(hp1) and if Assigned(hp1) and
(tai(hp1).typ = ait_instruction) and (tai(hp1).typ = ait_instruction) and
@ -1896,7 +2154,7 @@ begin
end; end;
procedure PeepHoleOptPass2(asml: TAsmList; BlockStart, BlockEnd: tai); procedure TCPUAsmOptimizer.PeepHoleOptPass2;
{$ifdef DEBUG_AOPTCPU} {$ifdef DEBUG_AOPTCPU}
procedure DebugMsg(const s: string;p : tai); procedure DebugMsg(const s: string;p : tai);
@ -1929,12 +2187,12 @@ var
p,hp1,hp2,hp3: tai; p,hp1,hp2,hp3: tai;
l : longint; l : longint;
condition : tasmcond; condition : tasmcond;
UsedRegs, TmpUsedRegs: TRegSet; TmpUsedRegs: TAllUsedRegs;
carryadd_opcode: Tasmop; carryadd_opcode: Tasmop;
begin begin
p := BlockStart; p := BlockStart;
UsedRegs := []; ClearUsedRegs;
while (p <> BlockEnd) Do while (p <> BlockEnd) Do
begin begin
UpdateUsedRegs(UsedRegs, tai(p.next)); UpdateUsedRegs(UsedRegs, tai(p.next));
@ -2126,7 +2384,7 @@ begin
end; end;
end; end;
A_FSTP,A_FISTP: A_FSTP,A_FISTP:
if doFpuLoadStoreOpt(asmL,p) then if DoFpuLoadStoreOpt(p) then
continue; continue;
A_IMUL: A_IMUL:
begin begin
@ -2164,7 +2422,7 @@ begin
} }
if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) then if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) then
begin begin
hp1:=dfa.getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol)); hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
if assigned(hp1) and SkipLabels(hp1,hp1) and (hp1.typ=ait_instruction) and (taicpu(hp1).opcode=A_RET) and (taicpu(p).condition=C_None) then if assigned(hp1) and SkipLabels(hp1,hp1) and (hp1.typ=ait_instruction) and (taicpu(hp1).opcode=A_RET) and (taicpu(p).condition=C_None) then
begin begin
tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs; tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;
@ -2226,7 +2484,7 @@ begin
MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
(taicpu(hp2).oper[1]^.typ = top_ref) then (taicpu(hp2).oper[1]^.typ = top_ref) then
begin begin
TmpUsedRegs := UsedRegs; CopyUsedRegs(TmpUsedRegs);
UpdateUsedRegs(TmpUsedRegs,tai(hp1.next)); UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,
@ -2259,6 +2517,7 @@ begin
hp2.free; hp2.free;
p := hp1 p := hp1
end; end;
ReleaseUsedRegs(TmpUsedRegs);
end end
end; end;
end; end;
@ -2269,7 +2528,7 @@ begin
end; end;
procedure PostPeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai); procedure TCPUAsmOptimizer.PostPeepHoleOpts;
var var
p,hp1,hp2: tai; p,hp1,hp2: tai;
IsTestConstX: boolean; IsTestConstX: boolean;
@ -2298,7 +2557,7 @@ begin
((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
begin begin
hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol); hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
InsertLLItem(asml, p.previous, p, hp2); InsertLLItem(p.previous, p, hp2);
taicpu(p).opcode := A_JMP; taicpu(p).opcode := A_JMP;
taicpu(p).is_jmp := true; taicpu(p).is_jmp := true;
asml.remove(hp1); asml.remove(hp1);
@ -2366,7 +2625,7 @@ See test/tgadint64 in the test suite.
case taicpu(p).opsize of case taicpu(p).opsize of
S_BL: S_BL:
begin begin
if IsGP32Reg(getsupreg(taicpu(p).oper[1]^.reg)) and if IsGP32Reg(taicpu(p).oper[1]^.reg) and
not(cs_opt_size in current_settings.optimizerswitches) and not(cs_opt_size in current_settings.optimizerswitches) and
(current_settings.optimizecputype = cpu_Pentium) then (current_settings.optimizecputype = cpu_Pentium) then
{Change "movzbl %reg1, %reg2" to {Change "movzbl %reg1, %reg2" to
@ -2375,7 +2634,7 @@ See test/tgadint64 in the test suite.
begin begin
hp1 := taicpu.op_reg_reg(A_XOR, S_L, hp1 := taicpu.op_reg_reg(A_XOR, S_L,
taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg); taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
InsertLLItem(asml,p.previous, p, hp1); InsertLLItem(p.previous, p, hp1);
taicpu(p).opcode := A_MOV; taicpu(p).opcode := A_MOV;
taicpu(p).changeopsize(S_B); taicpu(p).changeopsize(S_B);
setsubreg(taicpu(p).oper[1]^.reg,R_SUBL); setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
@ -2386,7 +2645,7 @@ See test/tgadint64 in the test suite.
(taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
(taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
not(cs_opt_size in current_settings.optimizerswitches) and not(cs_opt_size in current_settings.optimizerswitches) and
IsGP32Reg(getsupreg(taicpu(p).oper[1]^.reg)) and IsGP32Reg(taicpu(p).oper[1]^.reg) and
(current_settings.optimizecputype = cpu_Pentium) and (current_settings.optimizecputype = cpu_Pentium) and
(taicpu(p).opsize = S_BL) then (taicpu(p).opsize = S_BL) then
{changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
@ -2397,7 +2656,7 @@ See test/tgadint64 in the test suite.
taicpu(p).opcode := A_MOV; taicpu(p).opcode := A_MOV;
taicpu(p).changeopsize(S_B); taicpu(p).changeopsize(S_B);
setsubreg(taicpu(p).oper[1]^.reg,R_SUBL); setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
InsertLLItem(asml,p.previous, p, hp1); InsertLLItem(p.previous, p, hp1);
end; end;
end; end;
A_TEST, A_OR: A_TEST, A_OR:
@ -2496,5 +2755,79 @@ See test/tgadint64 in the test suite.
end; end;
Procedure TCpuAsmOptimizer.Optimize;
Var
HP: Tai;
pass: longint;
slowopt, changed, lastLoop: boolean;
Begin
slowopt := (cs_opt_level3 in current_settings.optimizerswitches);
pass := 0;
changed := false;
repeat
lastLoop :=
not(slowopt) or
(not changed and (pass > 2)) or
{ prevent endless loops }
(pass = 4);
changed := false;
{ Setup labeltable, always necessary }
blockstart := tai(asml.first);
pass_1;
{ Blockend now either contains an ait_marker with Kind = mark_AsmBlockStart, }
{ or nil }
While Assigned(BlockStart) Do
Begin
if (cs_opt_peephole in current_settings.optimizerswitches) then
begin
if (pass = 0) then
PrePeepHoleOpts;
{ Peephole optimizations }
PeepHoleOptPass1;
{ Only perform them twice in the first pass }
if pass = 0 then
PeepHoleOptPass1;
end;
{ More peephole optimizations }
if (cs_opt_peephole in current_settings.optimizerswitches) then
begin
PeepHoleOptPass2;
if lastLoop then
PostPeepHoleOpts;
end;
{ Continue where we left off, BlockEnd is either the start of an }
{ assembler block or nil }
BlockStart := BlockEnd;
While Assigned(BlockStart) And
(BlockStart.typ = ait_Marker) And
(Tai_Marker(BlockStart).Kind = mark_AsmBlockStart) Do
Begin
{ We stopped at an assembler block, so skip it }
Repeat
BlockStart := Tai(BlockStart.Next);
Until (BlockStart.Typ = Ait_Marker) And
(Tai_Marker(Blockstart).Kind = mark_AsmBlockEnd);
{ Blockstart now contains a Tai_marker(mark_AsmBlockEnd) }
If GetNextInstruction(BlockStart, HP) And
((HP.typ <> ait_Marker) Or
(Tai_Marker(HP).Kind <> mark_AsmBlockStart)) Then
{ There is no assembler block anymore after the current one, so }
{ optimize the next block of "normal" instructions }
pass_1
{ Otherwise, skip the next assembler block }
else
blockStart := hp;
End;
End;
inc(pass);
until lastLoop;
dfa.free;
End;
begin
casmoptimizer:=TCpuAsmOptimizer;
end. end.

113
compiler/i386/aoptcpub.pas Normal file
View File

@ -0,0 +1,113 @@
{
Copyright (c) 1998-2004 by Jonas Maebe, member of the Free Pascal
Development Team
This unit contains several types and constants necessary for the
optimizer to work on the sparc architecture
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
****************************************************************************
}
Unit aoptcpub; { Assembler OPTimizer CPU specific Base }
{$i fpcdefs.inc}
{ enable the following define if memory references can have a scaled index }
{ define RefsHaveScale}
{ enable the following define if memory references can have a segment }
{ override }
{ define RefsHaveSegment}
Interface
Uses
cpubase,aasmcpu,AOptBase;
Type
{ type of a normal instruction }
TInstr = Taicpu;
PInstr = ^TInstr;
{ ************************************************************************* }
{ **************************** TCondRegs ********************************** }
{ ************************************************************************* }
{ Info about the conditional registers }
TCondRegs = Object
Constructor Init;
Destructor Done;
End;
{ ************************************************************************* }
{ **************************** TAoptBaseCpu ******************************* }
{ ************************************************************************* }
TAoptBaseCpu = class(TAoptBase)
End;
{ ************************************************************************* }
{ ******************************* Constants ******************************* }
{ ************************************************************************* }
Const
{ the maximum number of things (registers, memory, ...) a single instruction }
{ changes }
MaxCh = 3;
{ the maximum number of operands an instruction has }
MaxOps = 3;
{Oper index of operand that contains the source (reference) with a load }
{instruction }
LoadSrc = 0;
{Oper index of operand that contains the destination (register) with a load }
{instruction }
LoadDst = 1;
{Oper index of operand that contains the source (register) with a store }
{instruction }
StoreSrc = 0;
{Oper index of operand that contains the destination (reference) with a load }
{instruction }
StoreDst = 1;
aopt_uncondjmp = A_JMP;
aopt_condjmp = A_Jcc;
Implementation
{ ************************************************************************* }
{ **************************** TCondRegs ********************************** }
{ ************************************************************************* }
Constructor TCondRegs.init;
Begin
End;
Destructor TCondRegs.Done; {$ifdef inl} inline; {$endif inl}
Begin
End;
End.

View File

@ -0,0 +1,36 @@
{
Copyright (c) 1998-2004 by Jonas Maebe, member of the Free Pascal
Development Team
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
****************************************************************************
}
Unit aoptcpud;
{$i fpcdefs.inc}
interface
uses
aoptda;
type
TAOptDFACpu = class(TAOptDFA)
end;
implementation
end.

File diff suppressed because it is too large Load Diff

View File

@ -133,11 +133,7 @@ implementation
,cpuinfo ,cpuinfo
{$endif arm} {$endif arm}
{$ifndef NOOPT} {$ifndef NOOPT}
{$ifdef i386}
,aopt386
{$else i386}
,aopt ,aopt
{$endif i386}
{$endif} {$endif}
; ;

View File

@ -31,7 +31,13 @@ unit aoptx86;
globtype, globtype,
cpubase, cpubase,
aasmtai, aasmtai,
cgbase,cgutils; cgbase,cgutils,
aopt;
type
TX86AsmOptimizer = class(TAsmOptimizer)
function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
end;
function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean; function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean; function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
@ -147,5 +153,40 @@ unit aoptx86;
(taicpu(instr).oper[1]^.typ=ot1); (taicpu(instr).oper[1]^.typ=ot1);
end; end;
function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
var
p: taicpu;
begin
if not assigned(hp) or
(hp.typ <> ait_instruction) then
begin
Result := false;
exit;
end;
p := taicpu(hp);
Result :=
(((p.opcode = A_MOV) or
(p.opcode = A_MOVZX) or
(p.opcode = A_MOVSX) or
(p.opcode = A_LEA) or
(p.opcode = A_VMOVSS) or
(p.opcode = A_VMOVSD) or
(p.opcode = A_VMOVQ) or
(p.opcode = A_MOVSS) or
(p.opcode = A_MOVSD) or
(p.opcode = A_MOVQ)) and
(p.oper[1]^.typ = top_reg) and
(getsupreg(p.oper[1]^.reg) = getsupreg(reg)) and
((p.oper[0]^.typ = top_const) or
((p.oper[0]^.typ = top_reg) and
(getsupreg(p.oper[0]^.reg) <> getsupreg(reg))) or
((p.oper[0]^.typ = top_ref) and
not RegInRef(reg,p.oper[0]^.ref^)))) or
((p.opcode = A_POP) and
(getsupreg(p.oper[0]^.reg) = getsupreg(reg)));
end;
end. end.

View File

@ -27,12 +27,11 @@ unit aoptcpu;
interface interface
uses cgbase, cpubase, aasmtai, aopt, aoptcpub; uses cgbase, cpubase, aasmtai, aopt, aoptx86, aoptcpub;
type type
TCpuAsmOptimizer = class(TAsmOptimizer) TCpuAsmOptimizer = class(TX86AsmOptimizer)
function PeepHoleOptPass1Cpu(var p: tai): boolean; override; function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
end; end;
implementation implementation
@ -44,7 +43,6 @@ uses
cgutils, cgutils,
aoptobj, aoptobj,
aasmbase, aasmdata, aasmcpu, aasmbase, aasmdata, aasmcpu,
aoptx86,
itcpugas; itcpugas;
function isFoldableArithOp(hp1: taicpu; reg: tregister): boolean; function isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
@ -66,40 +64,6 @@ begin
end; end;
function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
var
p: taicpu;
begin
if not assigned(hp) or
(hp.typ <> ait_instruction) then
begin
Result := false;
exit;
end;
p := taicpu(hp);
Result :=
(((p.opcode = A_MOV) or
(p.opcode = A_MOVZX) or
(p.opcode = A_MOVSX) or
(p.opcode = A_LEA) or
(p.opcode = A_VMOVSS) or
(p.opcode = A_VMOVSD) or
(p.opcode = A_VMOVQ) or
(p.opcode = A_MOVSS) or
(p.opcode = A_MOVSD) or
(p.opcode = A_MOVQ)) and
(p.oper[1]^.typ = top_reg) and
(getsupreg(p.oper[1]^.reg) = getsupreg(reg)) and
((p.oper[0]^.typ = top_const) or
((p.oper[0]^.typ = top_reg) and
(getsupreg(p.oper[0]^.reg) <> getsupreg(reg))) or
((p.oper[0]^.typ = top_ref) and
not RegInRef(reg,p.oper[0]^.ref^)))) or
((p.opcode = A_POP) and
(getsupreg(p.oper[0]^.reg) = getsupreg(reg)));
end;
function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean; function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
var var
hp1, hp2: tai; hp1, hp2: tai;