{ Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe This unit contains the peephole optimizer. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. **************************************************************************** } unit popt386; {$i fpcdefs.inc} { $define DEBUG_AOPTCPU} interface uses Aasmbase,aasmtai,aasmdata,aasmcpu,verbose; procedure PrePeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai); procedure PeepHoleOptPass1(asml: TAsmList; BlockStart, BlockEnd: tai); procedure PeepHoleOptPass2(asml: TAsmList; BlockStart, BlockEnd: tai); procedure PostPeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai); implementation uses cutils,globtype,systems, globals,cgbase,procinfo, symsym, {$ifdef finaldestdebug} cobjects, {$endif finaldestdebug} cpuinfo,cpubase,cgutils,daopt386, cgx86; function isFoldableArithOp(hp1: taicpu; reg: tregister): boolean; begin isFoldableArithOp := False; case hp1.opcode of A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR: isFoldableArithOp := ((taicpu(hp1).oper[0]^.typ = top_const) or ((taicpu(hp1).oper[0]^.typ = top_reg) and (taicpu(hp1).oper[0]^.reg <> reg))) and (taicpu(hp1).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[1]^.reg = reg); A_INC,A_DEC: isFoldableArithOp := (taicpu(hp1).oper[0]^.typ = top_reg) and (taicpu(hp1).oper[0]^.reg = reg); end; end; function RegUsedAfterInstruction(reg: Tregister; p: tai; var UsedRegs: TRegSet): Boolean; var supreg: tsuperregister; begin supreg := getsupreg(reg); UpdateUsedRegs(UsedRegs, tai(p.Next)); RegUsedAfterInstruction := (supreg in UsedRegs) and (not(getNextInstruction(p,p)) or not(regLoadedWithNewValue(supreg,false,p))); end; function IsExitCode(p : tai) : boolean; var hp2,hp3 : tai; begin result:=(p.typ=ait_instruction) and ((taicpu(p).opcode = A_RET) or ((taicpu(p).opcode=A_LEAVE) and GetNextInstruction(p,hp2) and (hp2.typ=ait_instruction) and (taicpu(hp2).opcode=A_RET) ) or ((taicpu(p).opcode=A_MOV) and (taicpu(p).oper[0]^.typ=top_reg) and (taicpu(p).oper[0]^.reg=NR_EBP) and (taicpu(p).oper[1]^.typ=top_reg) and (taicpu(p).oper[1]^.reg=NR_ESP) and GetNextInstruction(p,hp2) and (hp2.typ=ait_instruction) and (taicpu(hp2).opcode=A_POP) and (taicpu(hp2).oper[0]^.typ=top_reg) and (taicpu(hp2).oper[0]^.reg=NR_EBP) and GetNextInstruction(hp2,hp3) and (hp3.typ=ait_instruction) and (taicpu(hp3).opcode=A_RET) ) ); end; function doFpuLoadStoreOpt(asmL: TAsmList; var p: tai): boolean; { returns true if a "continue" should be done after this optimization } var hp1, hp2: tai; begin doFpuLoadStoreOpt := false; if (taicpu(p).oper[0]^.typ = top_ref) and getNextInstruction(p, hp1) and (hp1.typ = ait_instruction) and (((taicpu(hp1).opcode = A_FLD) and (taicpu(p).opcode = A_FSTP)) or ((taicpu(p).opcode = A_FISTP) and (taicpu(hp1).opcode = A_FILD))) and (taicpu(hp1).oper[0]^.typ = top_ref) and (taicpu(hp1).opsize = taicpu(p).opsize) and refsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then begin { replacing fstp f;fld f by fst f is only valid for extended because of rounding } if (taicpu(p).opsize=S_FX) and getNextInstruction(hp1, hp2) and (hp2.typ = ait_instruction) and IsExitCode(hp2) and (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and not(assigned(current_procinfo.procdef.funcretsym) and (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and (taicpu(p).oper[0]^.ref^.index = NR_NO) then begin asml.remove(p); asml.remove(hp1); p.free; hp1.free; p := hp2; removeLastDeallocForFuncRes(asmL, p); doFPULoadStoreOpt := true; end (* can't be done because the store operation rounds else { fst can't store an extended value! } if (taicpu(p).opsize <> S_FX) and (taicpu(p).opsize <> S_IQ) then begin if (taicpu(p).opcode = A_FSTP) then taicpu(p).opcode := A_FST else taicpu(p).opcode := A_FIST; asml.remove(hp1); hp1.free; end *) end; end; { returns true if p contains a memory operand with a segment set } function InsContainsSegRef(p: taicpu): boolean; var i: longint; begin result:=true; for i:=0 to p.opercnt-1 do if (p.oper[i]^.typ=top_ref) and (p.oper[i]^.ref^.segment<>NR_NO) then exit; result:=false; end; procedure PrePeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai); var p,hp1: tai; l: aint; tmpRef: treference; begin p := BlockStart; while (p <> BlockEnd) Do begin case p.Typ Of Ait_Instruction: begin if InsContainsSegRef(taicpu(p)) then begin p := tai(p.next); continue; end; case taicpu(p).opcode Of A_IMUL: {changes certain "imul const, %reg"'s to lea sequences} begin if (taicpu(p).oper[0]^.typ = Top_Const) and (taicpu(p).oper[1]^.typ = Top_Reg) and (taicpu(p).opsize = S_L) then if (taicpu(p).oper[0]^.val = 1) then if (taicpu(p).ops = 2) then {remove "imul $1, reg"} begin hp1 := tai(p.Next); asml.remove(p); p.free; p := hp1; continue; end else {change "imul $1, reg1, reg2" to "mov reg1, reg2"} begin hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg); InsertLLItem(asml, p.previous, p.next, hp1); p.free; p := hp1; end else if ((taicpu(p).ops <= 2) or (taicpu(p).oper[2]^.typ = Top_Reg)) and (taicpu(p).oper[0]^.val <= 12) and not(cs_opt_size in current_settings.optimizerswitches) and (not(GetNextInstruction(p, hp1)) or {GetNextInstruction(p, hp1) and} not((tai(hp1).typ = ait_instruction) and ((taicpu(hp1).opcode=A_Jcc) and (taicpu(hp1).condition in [C_O,C_NO])))) then begin reference_reset(tmpref,1); case taicpu(p).oper[0]^.val Of 3: begin {imul 3, reg1, reg2 to lea (reg1,reg1,2), reg2 imul 3, reg1 to lea (reg1,reg1,2), reg1} TmpRef.base := taicpu(p).oper[1]^.reg; TmpRef.index := taicpu(p).oper[1]^.reg; TmpRef.ScaleFactor := 2; if (taicpu(p).ops = 2) then hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg) else hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg); InsertLLItem(asml,p.previous, p.next, hp1); p.free; p := hp1; end; 5: begin {imul 5, reg1, reg2 to lea (reg1,reg1,4), reg2 imul 5, reg1 to lea (reg1,reg1,4), reg1} TmpRef.base := taicpu(p).oper[1]^.reg; TmpRef.index := taicpu(p).oper[1]^.reg; TmpRef.ScaleFactor := 4; if (taicpu(p).ops = 2) then hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg) else hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg); InsertLLItem(asml,p.previous, p.next, hp1); p.free; p := hp1; end; 6: begin {imul 6, reg1, reg2 to lea (,reg1,2), reg2 lea (reg2,reg1,4), reg2 imul 6, reg1 to lea (reg1,reg1,2), reg1 add reg1, reg1} if (current_settings.optimizecputype <= cpu_386) then begin TmpRef.index := taicpu(p).oper[1]^.reg; if (taicpu(p).ops = 3) then begin TmpRef.base := taicpu(p).oper[2]^.reg; TmpRef.ScaleFactor := 4; hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg); end else begin hp1 := taicpu.op_reg_reg(A_ADD, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg); end; InsertLLItem(asml,p, p.next, hp1); reference_reset(tmpref,2); TmpRef.index := taicpu(p).oper[1]^.reg; TmpRef.ScaleFactor := 2; if (taicpu(p).ops = 3) then begin TmpRef.base := NR_NO; hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg); end else begin TmpRef.base := taicpu(p).oper[1]^.reg; hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg); end; InsertLLItem(asml,p.previous, p.next, hp1); p.free; p := tai(hp1.next); end end; 9: begin {imul 9, reg1, reg2 to lea (reg1,reg1,8), reg2 imul 9, reg1 to lea (reg1,reg1,8), reg1} TmpRef.base := taicpu(p).oper[1]^.reg; TmpRef.index := taicpu(p).oper[1]^.reg; TmpRef.ScaleFactor := 8; if (taicpu(p).ops = 2) then hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg) else hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg); InsertLLItem(asml,p.previous, p.next, hp1); p.free; p := hp1; end; 10: begin {imul 10, reg1, reg2 to lea (reg1,reg1,4), reg2 add reg2, reg2 imul 10, reg1 to lea (reg1,reg1,4), reg1 add reg1, reg1} if (current_settings.optimizecputype <= cpu_386) then begin if (taicpu(p).ops = 3) then hp1 := taicpu.op_reg_reg(A_ADD, S_L, taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg) else hp1 := taicpu.op_reg_reg(A_ADD, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg); InsertLLItem(asml,p, p.next, hp1); TmpRef.base := taicpu(p).oper[1]^.reg; TmpRef.index := taicpu(p).oper[1]^.reg; TmpRef.ScaleFactor := 4; if (taicpu(p).ops = 3) then hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg) else hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg); InsertLLItem(asml,p.previous, p.next, hp1); p.free; p := tai(hp1.next); end end; 12: begin {imul 12, reg1, reg2 to lea (,reg1,4), reg2 lea (reg2,reg1,8), reg2 imul 12, reg1 to lea (reg1,reg1,2), reg1 lea (,reg1,4), reg1} if (current_settings.optimizecputype <= cpu_386) then begin TmpRef.index := taicpu(p).oper[1]^.reg; if (taicpu(p).ops = 3) then begin TmpRef.base := taicpu(p).oper[2]^.reg; TmpRef.ScaleFactor := 8; hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg); end else begin TmpRef.base := NR_NO; TmpRef.ScaleFactor := 4; hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg); end; InsertLLItem(asml,p, p.next, hp1); reference_reset(tmpref,2); TmpRef.index := taicpu(p).oper[1]^.reg; if (taicpu(p).ops = 3) then begin TmpRef.base := NR_NO; TmpRef.ScaleFactor := 4; hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg); end else begin TmpRef.base := taicpu(p).oper[1]^.reg; TmpRef.ScaleFactor := 2; hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg); end; InsertLLItem(asml,p.previous, p.next, hp1); p.free; p := tai(hp1.next); end end end; end; end; A_SAR, A_SHR: {changes the code sequence shr/sar const1, x shl const2, x to either "sar/and", "shl/and" or just "and" depending on const1 and const2} begin if GetNextInstruction(p, hp1) and (tai(hp1).typ = ait_instruction) and (taicpu(hp1).opcode = A_SHL) and (taicpu(p).oper[0]^.typ = top_const) and (taicpu(hp1).oper[0]^.typ = top_const) and (taicpu(hp1).opsize = taicpu(p).opsize) and (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and not(cs_opt_size in current_settings.optimizerswitches) then { shr/sar const1, %reg shl const2, %reg with const1 > const2 } begin taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val); taicpu(hp1).opcode := A_AND; l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1; case taicpu(p).opsize Of S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff)); S_B: taicpu(hp1).loadConst(0,l Xor $ff); S_W: taicpu(hp1).loadConst(0,l Xor $ffff); end; end else if (taicpu(p).oper[0]^.val l1: becomes l1: je l2 je l3 l2: l2: jmp l3 jmp l3 the level parameter denotes how deeep we have already followed the jump, to avoid endless loops with constructs such as "l5: ; jmp l5" } var p1, p2: tai; l: tasmlabel; function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean; begin FindAnyLabel := false; while assigned(hp.next) and (tai(hp.next).typ in (SkipInstr+[ait_align])) Do hp := tai(hp.next); if assigned(hp.next) and (tai(hp.next).typ = ait_label) then begin FindAnyLabel := true; l := tai_label(hp.next).labsym; end end; begin GetfinalDestination := false; if level > 20 then exit; p1 := dfa.getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol)); if assigned(p1) then begin SkipLabels(p1,p1); if (tai(p1).typ = ait_instruction) and (taicpu(p1).is_jmp) then if { the next instruction after the label where the jump hp arrives} { is unconditional or of the same type as hp, so continue } (taicpu(p1).condition in [C_None,hp.condition]) or { the next instruction after the label where the jump hp arrives} { is the opposite of hp (so this one is never taken), but after } { that one there is a branch that will be taken, so perform a } { little hack: set p1 equal to this instruction (that's what the} { last SkipLabels is for, only works with short bool evaluation)} ((taicpu(p1).condition = inverse_cond(hp.condition)) and SkipLabels(p1,p2) and (p2.typ = ait_instruction) and (taicpu(p2).is_jmp) and (taicpu(p2).condition in [C_None,hp.condition]) and SkipLabels(p1,p1)) then begin { quick check for loops of the form "l5: ; jmp l5 } if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr = tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then exit; if not GetFinalDestination(asml, taicpu(p1),succ(level)) then exit; tasmlabel(hp.oper[0]^.ref^.symbol).decrefs; hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol; tasmlabel(hp.oper[0]^.ref^.symbol).increfs; end else if (taicpu(p1).condition = inverse_cond(hp.condition)) then if not FindAnyLabel(p1,l) then begin {$ifdef finaldestdebug} insertllitem(asml,p1,p1.next,tai_comment.Create( strpnew('previous label inserted')))); {$endif finaldestdebug} current_asmdata.getjumplabel(l); insertllitem(asml,p1,p1.next,tai_label.Create(l)); tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs; hp.oper[0]^.ref^.symbol := l; l.increfs; { this won't work, since the new label isn't in the labeltable } { so it will fail the rangecheck. Labeltable should become a } { hashtable to support this: } { GetFinalDestination(asml, hp); } end else begin {$ifdef finaldestdebug} insertllitem(asml,p1,p1.next,tai_comment.Create( strpnew('next label reused')))); {$endif finaldestdebug} l.increfs; hp.oper[0]^.ref^.symbol := l; if not GetFinalDestination(asml, hp,succ(level)) then exit; end; end; GetFinalDestination := true; end; function DoSubAddOpt(var p: tai): Boolean; begin DoSubAddOpt := False; if GetLastInstruction(p, hp1) and (hp1.typ = ait_instruction) and (taicpu(hp1).opsize = taicpu(p).opsize) then case taicpu(hp1).opcode Of A_DEC: if (taicpu(hp1).oper[0]^.typ = top_reg) and (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then begin taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1); asml.remove(hp1); hp1.free; end; A_SUB: if (taicpu(hp1).oper[0]^.typ = top_const) and (taicpu(hp1).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then begin taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val); asml.remove(hp1); hp1.free; end; A_ADD: if (taicpu(hp1).oper[0]^.typ = top_const) and (taicpu(hp1).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then begin taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val); asml.remove(hp1); hp1.free; if (taicpu(p).oper[0]^.val = 0) then begin hp1 := tai(p.next); asml.remove(p); p.free; if not GetLastInstruction(hp1, p) then p := hp1; DoSubAddOpt := True; end end; end; end; begin p := BlockStart; UsedRegs := []; while (p <> BlockEnd) Do begin UpDateUsedRegs(UsedRegs, tai(p.next)); case p.Typ Of ait_instruction: begin current_filepos:=taicpu(p).fileinfo; if InsContainsSegRef(taicpu(p)) then begin p := tai(p.next); continue; end; { Handle Jmp Optimizations } if taicpu(p).is_jmp then begin {the following if-block removes all code between a jmp and the next label, because it can never be executed} if (taicpu(p).opcode = A_JMP) then begin hp2:=p; while GetNextInstruction(hp2, hp1) and (hp1.typ <> ait_label) do if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then begin { don't kill start/end of assembler block, no-line-info-start/end etc } if hp1.typ<>ait_marker then begin asml.remove(hp1); hp1.free; end else hp2:=hp1; end else break; end; { remove jumps to a label coming right after them } if GetNextInstruction(p, hp1) then begin if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and { TODO: FIXME removing the first instruction fails} (p<>blockstart) then begin hp2:=tai(hp1.next); asml.remove(p); p.free; p:=hp2; continue; end else begin if hp1.typ = ait_label then SkipLabels(hp1,hp1); if (tai(hp1).typ=ait_instruction) and (taicpu(hp1).opcode=A_JMP) and GetNextInstruction(hp1, hp2) and FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then begin if taicpu(p).opcode=A_Jcc then begin taicpu(p).condition:=inverse_cond(taicpu(p).condition); tai_label(hp2).labsym.decrefs; taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol; { when free'ing hp1, the ref. isn't decresed, so we don't increase it (FK) taicpu(p).oper[0]^.ref^.symbol.increfs; } asml.remove(hp1); hp1.free; GetFinalDestination(asml, taicpu(p),0); end else begin GetFinalDestination(asml, taicpu(p),0); p:=tai(p.next); continue; end; end else GetFinalDestination(asml, taicpu(p),0); end; end; end else { All other optimizes } begin for l := 0 to taicpu(p).ops-1 Do if (taicpu(p).oper[l]^.typ = top_ref) then With taicpu(p).oper[l]^.ref^ Do begin if (base = NR_NO) and (index <> NR_NO) and (scalefactor in [0,1]) then begin base := index; index := NR_NO end end; case taicpu(p).opcode Of A_AND: begin if (taicpu(p).oper[0]^.typ = top_const) and (taicpu(p).oper[1]^.typ = top_reg) and GetNextInstruction(p, hp1) and (tai(hp1).typ = ait_instruction) and (taicpu(hp1).opcode = A_AND) and (taicpu(hp1).oper[0]^.typ = top_const) and (taicpu(hp1).oper[1]^.typ = top_reg) and (getsupreg(taicpu(p).oper[1]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) then {change "and const1, reg; and const2, reg" to "and (const1 and const2), reg"} begin taicpu(hp1).loadConst(0,taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val); asml.remove(p); p.free; p:=hp1; end else {change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the jump, but only if it's a conditional jump (PFV) } if (taicpu(p).oper[1]^.typ = top_reg) and GetNextInstruction(p, hp1) and (hp1.typ = ait_instruction) and (taicpu(hp1).is_jmp) and (taicpu(hp1).opcode<>A_JMP) and not(getsupreg(taicpu(p).oper[1]^.reg) in UsedRegs) then taicpu(p).opcode := A_TEST; end; A_CMP: begin { cmp register,$8000 neg register je target --> jo target .... only if register is deallocated before jump.} case Taicpu(p).opsize of S_B: v:=$80; S_W: v:=$8000; S_L: v:=aint($80000000); else internalerror(2013112905); end; if (taicpu(p).oper[0]^.typ=Top_const) and (taicpu(p).oper[0]^.val=v) and (Taicpu(p).oper[1]^.typ=top_reg) and GetNextInstruction(p, hp1) and (hp1.typ=ait_instruction) and (taicpu(hp1).opcode=A_Jcc) and (Taicpu(hp1).condition in [C_E,C_NE]) and not(getsupreg(Taicpu(p).oper[1]^.reg) in usedregs) then begin Taicpu(p).opcode:=A_NEG; Taicpu(p).loadoper(0,Taicpu(p).oper[1]^); Taicpu(p).clearop(1); Taicpu(p).ops:=1; if Taicpu(hp1).condition=C_E then Taicpu(hp1).condition:=C_O else Taicpu(hp1).condition:=C_NO; continue; end; { @@2: @@2: .... .... cmp operand1,0 jle/jbe @@1 dec operand1 --> sub operand1,1 jmp @@2 jge/jae @@2 @@1: @@1: ... ....} if (taicpu(p).oper[0]^.typ = top_const) and (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and (taicpu(p).oper[0]^.val = 0) and GetNextInstruction(p, hp1) and (hp1.typ = ait_instruction) and (taicpu(hp1).is_jmp) and (taicpu(hp1).opcode=A_Jcc) and (taicpu(hp1).condition in [C_LE,C_BE]) and GetNextInstruction(hp1,hp2) and (hp2.typ = ait_instruction) and (taicpu(hp2).opcode = A_DEC) and OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and GetNextInstruction(hp2, hp3) and (hp3.typ = ait_instruction) and (taicpu(hp3).is_jmp) and (taicpu(hp3).opcode = A_JMP) and GetNextInstruction(hp3, hp4) and FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then begin taicpu(hp2).Opcode := A_SUB; taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^); taicpu(hp2).loadConst(0,1); taicpu(hp2).ops:=2; taicpu(hp3).Opcode := A_Jcc; case taicpu(hp1).condition of C_LE: taicpu(hp3).condition := C_GE; C_BE: taicpu(hp3).condition := C_AE; end; asml.remove(p); asml.remove(hp1); p.free; hp1.free; p := hp2; continue; end end; A_FLD: begin if (taicpu(p).oper[0]^.typ = top_reg) and GetNextInstruction(p, hp1) and (hp1.typ = Ait_Instruction) and (taicpu(hp1).oper[0]^.typ = top_reg) and (taicpu(hp1).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[0]^.reg = NR_ST) and (taicpu(hp1).oper[1]^.reg = NR_ST1) then { change to fld reg fxxx reg,st fxxxp st, st1 (hp1) Remark: non commutative operations must be reversed! } begin case taicpu(hp1).opcode Of A_FMULP,A_FADDP, A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP: begin case taicpu(hp1).opcode Of A_FADDP: taicpu(hp1).opcode := A_FADD; A_FMULP: taicpu(hp1).opcode := A_FMUL; A_FSUBP: taicpu(hp1).opcode := A_FSUBR; A_FSUBRP: taicpu(hp1).opcode := A_FSUB; A_FDIVP: taicpu(hp1).opcode := A_FDIVR; A_FDIVRP: taicpu(hp1).opcode := A_FDIV; end; taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg; taicpu(hp1).oper[1]^.reg := NR_ST; asml.remove(p); p.free; p := hp1; continue; end; end; end else if (taicpu(p).oper[0]^.typ = top_ref) and GetNextInstruction(p, hp2) and (hp2.typ = Ait_Instruction) and (taicpu(hp2).ops = 2) and (taicpu(hp2).oper[0]^.typ = top_reg) and (taicpu(hp2).oper[1]^.typ = top_reg) and (taicpu(p).opsize in [S_FS, S_FL]) and (taicpu(hp2).oper[0]^.reg = NR_ST) and (taicpu(hp2).oper[1]^.reg = NR_ST1) then if GetLastInstruction(p, hp1) and (hp1.typ = Ait_Instruction) and ((taicpu(hp1).opcode = A_FLD) or (taicpu(hp1).opcode = A_FST)) and (taicpu(hp1).opsize = taicpu(p).opsize) and (taicpu(hp1).oper[0]^.typ = top_ref) and RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then if ((taicpu(hp2).opcode = A_FMULP) or (taicpu(hp2).opcode = A_FADDP)) then { change to fld/fst mem1 (hp1) fld/fst mem1 fld mem1 (p) fadd/ faddp/ fmul st, st fmulp st, st1 (hp2) } begin asml.remove(p); p.free; p := hp1; if (taicpu(hp2).opcode = A_FADDP) then taicpu(hp2).opcode := A_FADD else taicpu(hp2).opcode := A_FMUL; taicpu(hp2).oper[1]^.reg := NR_ST; end else { change to fld/fst mem1 (hp1) fld/fst mem1 fld mem1 (p) fld st} begin taicpu(p).changeopsize(S_FL); taicpu(p).loadreg(0,NR_ST); end else begin case taicpu(hp2).opcode Of A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP: { change to fld/fst mem1 (hp1) fld/fst mem1 fld mem2 (p) fxxx mem2 fxxxp st, st1 (hp2) } begin case taicpu(hp2).opcode Of A_FADDP: taicpu(p).opcode := A_FADD; A_FMULP: taicpu(p).opcode := A_FMUL; A_FSUBP: taicpu(p).opcode := A_FSUBR; A_FSUBRP: taicpu(p).opcode := A_FSUB; A_FDIVP: taicpu(p).opcode := A_FDIVR; A_FDIVRP: taicpu(p).opcode := A_FDIV; end; asml.remove(hp2); hp2.free; end end end end; A_FSTP,A_FISTP: if doFpuLoadStoreOpt(asmL,p) then continue; A_LEA: begin {removes seg register prefixes from LEA operations, as they don't do anything} taicpu(p).oper[0]^.ref^.Segment := NR_NO; {changes "lea (%reg1), %reg2" into "mov %reg1, %reg2"} if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX..RS_ESP]) and (taicpu(p).oper[0]^.ref^.index = NR_NO) and (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then begin if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and (taicpu(p).oper[0]^.ref^.offset = 0) then begin hp1 := taicpu.op_reg_reg(A_MOV, S_L,taicpu(p).oper[0]^.ref^.base, taicpu(p).oper[1]^.reg); InsertLLItem(asml,p.previous,p.next, hp1); p.free; p := hp1; continue; end else if (taicpu(p).oper[0]^.ref^.offset = 0) then begin hp1 := tai(p.Next); asml.remove(p); p.free; p := hp1; continue; end { continue to use lea to adjust the stack pointer, it is the recommended way, but only if not optimizing for size } else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or (cs_opt_size in current_settings.optimizerswitches) then with taicpu(p).oper[0]^.ref^ do if (base = taicpu(p).oper[1]^.reg) then begin l := offset; if (l=1) and UseIncDec then begin taicpu(p).opcode := A_INC; taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg); taicpu(p).ops := 1 end else if (l=-1) and UseIncDec then begin taicpu(p).opcode := A_DEC; taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg); taicpu(p).ops := 1; end else begin if (l<0) and (l<>-2147483648) then begin taicpu(p).opcode := A_SUB; taicpu(p).loadConst(0,-l); end else begin taicpu(p).opcode := A_ADD; taicpu(p).loadConst(0,l); end; end; end; end (* This is unsafe, lea doesn't modify the flags but "add" does. This breaks webtbs/tw15694.pp. The above transformations are also unsafe, but they don't seem to be triggered by code that FPC generators (or that at least does not occur in the tests...). This needs to be fixed by checking for the liveness of the flags register. else if MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) then begin hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.index, taicpu(p).oper[0]^.ref^.base); InsertLLItem(asml,p.previous,p.next, hp1); DebugMsg('Peephole Lea2AddBase done',hp1); p.free; p:=hp1; continue; end else if MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) then begin hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.base, taicpu(p).oper[0]^.ref^.index); InsertLLItem(asml,p.previous,p.next,hp1); DebugMsg('Peephole Lea2AddIndex done',hp1); p.free; p:=hp1; continue; end *) end; A_MOV: begin TmpUsedRegs := UsedRegs; if (taicpu(p).oper[1]^.typ = top_reg) and (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and GetNextInstruction(p, hp1) and (tai(hp1).typ = ait_instruction) and (taicpu(hp1).opcode = A_MOV) and (taicpu(hp1).oper[0]^.typ = top_reg) and (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then begin {we have "mov x, %treg; mov %treg, y} if not(RegInOp(getsupreg(taicpu(p).oper[1]^.reg),taicpu(hp1).oper[1]^)) and not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then {we've got "mov x, %treg; mov %treg, y; with %treg is not used after } case taicpu(p).oper[0]^.typ Of top_reg: begin { change "mov %reg, %treg; mov %treg, y" to "mov %reg, y" } taicpu(p).loadOper(1,taicpu(hp1).oper[1]^); asml.remove(hp1); hp1.free; continue; end; top_ref: if (taicpu(hp1).oper[1]^.typ = top_reg) then begin { change "mov mem, %treg; mov %treg, %reg" to "mov mem, %reg" } taicpu(p).loadoper(1,taicpu(hp1).oper[1]^); asml.remove(hp1); hp1.free; continue; end; end end else {Change "mov %reg1, %reg2; xxx %reg2, ???" to "mov %reg1, %reg2; xxx %reg1, ???" to avoid a write/read penalty} if (taicpu(p).oper[0]^.typ = top_reg) and (taicpu(p).oper[1]^.typ = top_reg) and GetNextInstruction(p,hp1) and (tai(hp1).typ = ait_instruction) and (taicpu(hp1).ops >= 1) and (taicpu(hp1).oper[0]^.typ = top_reg) and (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then {we have "mov %reg1, %reg2; XXX %reg2, ???"} begin if ((taicpu(hp1).opcode = A_OR) or (taicpu(hp1).opcode = A_TEST)) and (taicpu(hp1).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then {we have "mov %reg1, %reg2; test/or %reg2, %reg2"} begin TmpUsedRegs := UsedRegs; { reg1 will be used after the first instruction, } { so update the allocation info } allocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs); if GetNextInstruction(hp1, hp2) and (hp2.typ = ait_instruction) and taicpu(hp2).is_jmp and not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then { change "mov %reg1, %reg2; test/or %reg2, %reg2; jxx" to "test %reg1, %reg1; jxx" } begin taicpu(hp1).loadoper(0,taicpu(p).oper[0]^); taicpu(hp1).loadoper(1,taicpu(p).oper[0]^); asml.remove(p); p.free; p := hp1; continue end else {change "mov %reg1, %reg2; test/or %reg2, %reg2" to "mov %reg1, %reg2; test/or %reg1, %reg1"} begin taicpu(hp1).loadoper(0,taicpu(p).oper[0]^); taicpu(hp1).loadoper(1,taicpu(p).oper[0]^); end; end { else if (taicpu(p.next)^.opcode in [A_PUSH, A_OR, A_XOR, A_AND, A_TEST])} {change "mov %reg1, %reg2; push/or/xor/... %reg2, ???" to "mov %reg1, %reg2; push/or/xor/... %reg1, ???"} end else {leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with x >= RetOffset) as it doesn't do anything (it writes either to a parameter or to the temporary storage room for the function result)} if GetNextInstruction(p, hp1) and (tai(hp1).typ = ait_instruction) then if IsExitCode(hp1) and (taicpu(p).oper[1]^.typ = top_ref) and (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and not(assigned(current_procinfo.procdef.funcretsym) and (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and (taicpu(p).oper[1]^.ref^.index = NR_NO) and (taicpu(p).oper[0]^.typ = top_reg) then begin asml.remove(p); p.free; p := hp1; RemoveLastDeallocForFuncRes(asmL,p); end else if (taicpu(p).oper[0]^.typ = top_reg) and (taicpu(p).oper[1]^.typ = top_ref) and (taicpu(p).opsize = taicpu(hp1).opsize) and (taicpu(hp1).opcode = A_CMP) and (taicpu(hp1).oper[1]^.typ = top_ref) and RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then {change "mov reg1, mem1; cmp x, mem1" to "mov reg, mem1; cmp x, reg1"} begin taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg); allocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs); end; { Next instruction is also a MOV ? } if GetNextInstruction(p, hp1) and MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then begin if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then {mov reg1, mem1 or mov mem1, reg1 mov mem2, reg2 mov reg2, mem2} begin if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then {mov reg1, mem1 or mov mem1, reg1 mov mem2, reg1 mov reg2, mem1} begin if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then { Removes the second statement from mov reg1, mem1/reg2 mov mem1/reg2, reg1 } begin if (taicpu(p).oper[0]^.typ = top_reg) then AllocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs); asml.remove(hp1); hp1.free; end else begin TmpUsedRegs := UsedRegs; UpdateUsedRegs(TmpUsedRegs, tai(hp1.next)); if (taicpu(p).oper[1]^.typ = top_ref) and { mov reg1, mem1 mov mem2, reg1 } (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and GetNextInstruction(hp1, hp2) and (hp2.typ = ait_instruction) and (taicpu(hp2).opcode = A_CMP) and (taicpu(hp2).opsize = taicpu(p).opsize) and (taicpu(hp2).oper[0]^.typ = TOp_Ref) and (taicpu(hp2).oper[1]^.typ = TOp_Reg) and RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(p).oper[1]^.ref^) and (taicpu(hp2).oper[1]^.reg= taicpu(p).oper[0]^.reg) and not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then { change to mov reg1, mem1 mov reg1, mem1 mov mem2, reg1 cmp reg1, mem2 cmp mem1, reg1 } begin asml.remove(hp2); hp2.free; taicpu(hp1).opcode := A_CMP; taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^); taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg); end; end; end else begin tmpUsedRegs := UsedRegs; if GetNextInstruction(hp1, hp2) and (taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[0]^.typ = top_reg) and (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[1]^.typ = top_ref) and (tai(hp2).typ = ait_instruction) and (taicpu(hp2).opcode = A_MOV) and (taicpu(hp2).opsize = taicpu(p).opsize) and (taicpu(hp2).oper[1]^.typ = top_reg) and (taicpu(hp2).oper[0]^.typ = top_ref) and RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then if not regInRef(getsupreg(taicpu(hp2).oper[1]^.reg),taicpu(hp2).oper[0]^.ref^) and not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then { mov mem1, %reg1 mov %reg1, mem2 mov mem2, reg2 to: mov mem1, reg2 mov reg2, mem2} begin AllocRegBetween(asmL,taicpu(hp2).oper[1]^.reg,p,hp2,usedregs); taicpu(p).loadoper(1,taicpu(hp2).oper[1]^); taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^); asml.remove(hp2); hp2.free; end else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and not(RegInRef(getsupreg(taicpu(p).oper[1]^.reg),taicpu(p).oper[0]^.ref^)) and not(RegInRef(getsupreg(taicpu(hp2).oper[1]^.reg),taicpu(hp2).oper[0]^.ref^)) then { mov mem1, reg1 mov mem1, reg1 mov reg1, mem2 mov reg1, mem2 mov mem2, reg2 mov mem2, reg1 to: to: mov mem1, reg1 mov mem1, reg1 mov mem1, reg2 mov reg1, mem2 mov reg1, mem2 or (if mem1 depends on reg1 and/or if mem2 depends on reg2) to: mov mem1, reg1 mov reg1, mem2 mov reg1, reg2 } begin taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^); taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg); taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^); taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg); allocRegBetween(asmL,taicpu(p).oper[1]^.reg,p,hp2,usedregs); if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then allocRegBetween(asmL,taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs); if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then allocRegBetween(asmL,taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs); end else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then begin taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg); allocRegBetween(asmL,taicpu(p).oper[1]^.reg,p,hp2,usedregs); end else begin asml.remove(hp2); hp2.free; end end end else (* {movl [mem1],reg1 movl [mem1],reg2 to: movl [mem1],reg1 movl reg1,reg2 } if (taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[0]^.typ = top_ref) and (taicpu(hp1).oper[1]^.typ = top_reg) and (taicpu(p).opsize = taicpu(hp1).opsize) and RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg) else*) { movl const1,[mem1] movl [mem1],reg1 to: movl const1,reg1 movl reg1,[mem1] } if (taicpu(p).oper[0]^.typ = top_const) and (taicpu(p).oper[1]^.typ = top_ref) and (taicpu(hp1).oper[0]^.typ = top_ref) and (taicpu(hp1).oper[1]^.typ = top_reg) and (taicpu(p).opsize = taicpu(hp1).opsize) and RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and not(reginref(getsupreg(taicpu(hp1).oper[1]^.reg),taicpu(hp1).oper[0]^.ref^)) then begin allocregbetween(asml,taicpu(hp1).oper[1]^.reg,p,hp1,usedregs); taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg); taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^); taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg); taicpu(hp1).fileinfo := taicpu(p).fileinfo; end end; if GetNextInstruction(p, hp1) and MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and GetNextInstruction(hp1, hp2) and MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and MatchOperand(Taicpu(p).oper[0]^,0) and (Taicpu(p).oper[1]^.typ = top_reg) and MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then {mov reg1,0 bts reg1,operand1 --> mov reg1,operand2 or reg1,operand2 bts reg1,operand1} begin Taicpu(hp2).opcode:=A_MOV; asml.remove(hp1); insertllitem(asml,hp2,hp2.next,hp1); asml.remove(p); p.free; p:=hp1; end; if GetNextInstruction(p, hp1) and MatchInstruction(hp1,A_LEA,[S_L]) and (Taicpu(p).oper[0]^.typ = top_ref) and (Taicpu(p).oper[1]^.typ = top_reg) and ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg) ) or (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg) ) ) then {mov reg1,ref lea reg2,[reg1,reg2] --> add reg2,ref} begin TmpUsedRegs := UsedRegs; { reg1 may not be used afterwards } if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then begin Taicpu(hp1).opcode:=A_ADD; Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^; DebugMsg('Peephole MovLea2Add done',hp1); asml.remove(p); p.free; p:=hp1; end; end; end; A_MOVSX, A_MOVZX : begin if (taicpu(p).oper[1]^.typ = top_reg) and GetNextInstruction(p,hp1) and (hp1.typ = ait_instruction) and IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX]) and GetNextInstruction(hp1,hp2) and MatchInstruction(hp2,A_MOV,[]) and (taicpu(hp2).oper[0]^.typ = top_reg) and OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and (((taicpu(hp1).ops=2) and (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or ((taicpu(hp1).ops=1) and (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and { reg2 must not be used after the sequence considered, so it must be either deallocated or loaded with a new value } (GetNextInstruction(hp2,hp3) and (FindRegDealloc(getsupreg(taicpu(hp2).oper[0]^.reg),tai(hp3)) or RegLoadedWithNewValue(getsupreg(taicpu(hp2).oper[0]^.reg), false, hp3))) then { change movsX/movzX reg/ref, reg2 } { add/sub/or/... reg3/$const, reg2 } { mov reg2 reg/ref } { to add/sub/or/... reg3/$const, reg/ref } begin { by example: movswl %si,%eax movswl %si,%eax p decl %eax addl %edx,%eax hp1 movw %ax,%si movw %ax,%si hp2 -> movswl %si,%eax movswl %si,%eax p decw %eax addw %edx,%eax hp1 movw %ax,%si movw %ax,%si hp2 } taicpu(hp1).changeopsize(taicpu(hp2).opsize); { -> movswl %si,%eax movswl %si,%eax p decw %si addw %dx,%si hp1 movw %ax,%si movw %ax,%si hp2 } case taicpu(hp1).ops of 1: taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^); 2: begin taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^); if (taicpu(hp1).oper[0]^.typ = top_reg) then setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg)); end; else internalerror(2008042701); end; { -> decw %si addw %dx,%si p } asml.remove(p); asml.remove(hp2); p.free; hp2.free; p := hp1 end { removes superfluous And's after movzx's } else if taicpu(p).opcode=A_MOVZX then begin if (taicpu(p).oper[1]^.typ = top_reg) and GetNextInstruction(p, hp1) and (tai(hp1).typ = ait_instruction) and (taicpu(hp1).opcode = A_AND) and (taicpu(hp1).oper[0]^.typ = top_const) and (taicpu(hp1).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then case taicpu(p).opsize Of S_BL, S_BW: if (taicpu(hp1).oper[0]^.val = $ff) then begin asml.remove(hp1); hp1.free; end; S_WL: if (taicpu(hp1).oper[0]^.val = $ffff) then begin asml.remove(hp1); hp1.free; end; end; {changes some movzx constructs to faster synonims (all examples are given with eax/ax, but are also valid for other registers)} if (taicpu(p).oper[1]^.typ = top_reg) then if (taicpu(p).oper[0]^.typ = top_reg) then case taicpu(p).opsize of S_BW: begin if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and not(cs_opt_size in current_settings.optimizerswitches) then {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"} begin taicpu(p).opcode := A_AND; taicpu(p).changeopsize(S_W); taicpu(p).loadConst(0,$ff); end else if GetNextInstruction(p, hp1) and (tai(hp1).typ = ait_instruction) and (taicpu(hp1).opcode = A_AND) and (taicpu(hp1).oper[0]^.typ = top_const) and (taicpu(hp1).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then {Change "movzbw %reg1, %reg2; andw $const, %reg2" to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"} begin taicpu(p).opcode := A_MOV; taicpu(p).changeopsize(S_W); setsubreg(taicpu(p).oper[0]^.reg,R_SUBW); taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff); end; end; S_BL: begin if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and not(cs_opt_size in current_settings.optimizerswitches) then {Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"} begin taicpu(p).opcode := A_AND; taicpu(p).changeopsize(S_L); taicpu(p).loadConst(0,$ff) end else if GetNextInstruction(p, hp1) and (tai(hp1).typ = ait_instruction) and (taicpu(hp1).opcode = A_AND) and (taicpu(hp1).oper[0]^.typ = top_const) and (taicpu(hp1).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then {Change "movzbl %reg1, %reg2; andl $const, %reg2" to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"} begin taicpu(p).opcode := A_MOV; taicpu(p).changeopsize(S_L); setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE); taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff); end end; S_WL: begin if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and not(cs_opt_size in current_settings.optimizerswitches) then {Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax"} begin taicpu(p).opcode := A_AND; taicpu(p).changeopsize(S_L); taicpu(p).loadConst(0,$ffff); end else if GetNextInstruction(p, hp1) and (tai(hp1).typ = ait_instruction) and (taicpu(hp1).opcode = A_AND) and (taicpu(hp1).oper[0]^.typ = top_const) and (taicpu(hp1).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then {Change "movzwl %reg1, %reg2; andl $const, %reg2" to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"} begin taicpu(p).opcode := A_MOV; taicpu(p).changeopsize(S_L); setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE); taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff); end; end; end else if (taicpu(p).oper[0]^.typ = top_ref) then begin if GetNextInstruction(p, hp1) and (tai(hp1).typ = ait_instruction) and (taicpu(hp1).opcode = A_AND) and (taicpu(hp1).oper[0]^.typ = Top_Const) and (taicpu(hp1).oper[1]^.typ = Top_Reg) and (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then begin taicpu(p).opcode := A_MOV; case taicpu(p).opsize Of S_BL: begin taicpu(p).changeopsize(S_L); taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff); end; S_WL: begin taicpu(p).changeopsize(S_L); taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff); end; S_BW: begin taicpu(p).changeopsize(S_W); taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff); end; end; end; end; end; end; (* should not be generated anymore by the current code generator A_POP: begin if target_info.system=system_i386_go32v2 then begin { Transform a series of pop/pop/pop/push/push/push to } { 'movl x(%esp),%reg' for go32v2 (not for the rest, } { because I'm not sure whether they can cope with } { 'movl x(%esp),%reg' with x > 0, I believe we had } { such a problem when using esp as frame pointer (JM) } if (taicpu(p).oper[0]^.typ = top_reg) then begin hp1 := p; hp2 := p; l := 0; while getNextInstruction(hp1,hp1) and (hp1.typ = ait_instruction) and (taicpu(hp1).opcode = A_POP) and (taicpu(hp1).oper[0]^.typ = top_reg) do begin hp2 := hp1; inc(l,4); end; getLastInstruction(p,hp3); l1 := 0; while (hp2 <> hp3) and assigned(hp1) and (hp1.typ = ait_instruction) and (taicpu(hp1).opcode = A_PUSH) and (taicpu(hp1).oper[0]^.typ = top_reg) and (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do begin { change it to a two op operation } taicpu(hp2).oper[1]^.typ:=top_none; taicpu(hp2).ops:=2; taicpu(hp2).opcode := A_MOV; taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^); reference_reset(tmpref); tmpRef.base.enum:=R_INTREGISTER; tmpRef.base.number:=NR_STACK_POINTER_REG; convert_register_to_enum(tmpref.base); tmpRef.offset := l; taicpu(hp2).loadRef(0,tmpRef); hp4 := hp1; getNextInstruction(hp1,hp1); asml.remove(hp4); hp4.free; getLastInstruction(hp2,hp2); dec(l,4); inc(l1); end; if l <> -4 then begin inc(l,4); for l1 := l1 downto 1 do begin getNextInstruction(hp2,hp2); dec(taicpu(hp2).oper[0]^.ref^.offset,l); end end end end else begin if (taicpu(p).oper[0]^.typ = top_reg) and GetNextInstruction(p, hp1) and (tai(hp1).typ=ait_instruction) and (taicpu(hp1).opcode=A_PUSH) and (taicpu(hp1).oper[0]^.typ = top_reg) and (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then begin { change it to a two op operation } taicpu(p).oper[1]^.typ:=top_none; taicpu(p).ops:=2; taicpu(p).opcode := A_MOV; taicpu(p).loadoper(1,taicpu(p).oper[0]^); reference_reset(tmpref); TmpRef.base.enum := R_ESP; taicpu(p).loadRef(0,TmpRef); asml.remove(hp1); hp1.free; end; end; end; *) A_PUSH: begin if (taicpu(p).opsize = S_W) and (taicpu(p).oper[0]^.typ = Top_Const) and GetNextInstruction(p, hp1) and (tai(hp1).typ = ait_instruction) and (taicpu(hp1).opcode = A_PUSH) and (taicpu(hp1).oper[0]^.typ = Top_Const) and (taicpu(hp1).opsize = S_W) then begin taicpu(p).changeopsize(S_L); taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val)); asml.remove(hp1); hp1.free; end; end; A_SHL, A_SAL: begin if (taicpu(p).oper[0]^.typ = Top_Const) and (taicpu(p).oper[1]^.typ = Top_Reg) and (taicpu(p).opsize = S_L) and (taicpu(p).oper[0]^.val <= 3) then {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement} begin TmpBool1 := True; {should we check the next instruction?} TmpBool2 := False; {have we found an add/sub which could be integrated in the lea?} reference_reset(tmpref,2); TmpRef.index := taicpu(p).oper[1]^.reg; TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val; while TmpBool1 and GetNextInstruction(p, hp1) and (tai(hp1).typ = ait_instruction) and ((((taicpu(hp1).opcode = A_ADD) or (taicpu(hp1).opcode = A_SUB)) and (taicpu(hp1).oper[1]^.typ = Top_Reg) and (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or (((taicpu(hp1).opcode = A_INC) or (taicpu(hp1).opcode = A_DEC)) and (taicpu(hp1).oper[0]^.typ = Top_Reg) and (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and (not GetNextInstruction(hp1,hp2) or not instrReadsFlags(hp2)) Do begin TmpBool1 := False; if (taicpu(hp1).oper[0]^.typ = Top_Const) then begin TmpBool1 := True; TmpBool2 := True; case taicpu(hp1).opcode of A_ADD: inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val)); A_SUB: dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val)); end; asml.remove(hp1); hp1.free; end else if (taicpu(hp1).oper[0]^.typ = Top_Reg) and (((taicpu(hp1).opcode = A_ADD) and (TmpRef.base = NR_NO)) or (taicpu(hp1).opcode = A_INC) or (taicpu(hp1).opcode = A_DEC)) then begin TmpBool1 := True; TmpBool2 := True; case taicpu(hp1).opcode of A_ADD: TmpRef.base := taicpu(hp1).oper[0]^.reg; A_INC: inc(TmpRef.offset); A_DEC: dec(TmpRef.offset); end; asml.remove(hp1); hp1.free; end; end; if TmpBool2 or ((current_settings.optimizecputype < cpu_Pentium2) and (taicpu(p).oper[0]^.val <= 3) and not(cs_opt_size in current_settings.optimizerswitches)) then begin if not(TmpBool2) and (taicpu(p).oper[0]^.val = 1) then begin hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize, taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg) end else hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg); InsertLLItem(asml,p.previous, p.next, hp1); p.free; p := hp1; end; end else if (current_settings.optimizecputype < cpu_Pentium2) and (taicpu(p).oper[0]^.typ = top_const) and (taicpu(p).oper[1]^.typ = top_reg) then if (taicpu(p).oper[0]^.val = 1) then {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386, but faster on a 486, and Tairable in both U and V pipes on the Pentium (unlike shl, which is only Tairable in the U pipe)} begin hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize, taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg); InsertLLItem(asml,p.previous, p.next, hp1); p.free; p := hp1; end else if (taicpu(p).opsize = S_L) and (taicpu(p).oper[0]^.val<= 3) then {changes "shl $2, %reg" to "lea (,%reg,4), %reg" "shl $3, %reg" to "lea (,%reg,8), %reg} begin reference_reset(tmpref,2); TmpRef.index := taicpu(p).oper[1]^.reg; TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val; hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg); InsertLLItem(asml,p.previous, p.next, hp1); p.free; p := hp1; end end; A_SETcc : { changes setcc (funcres) setcc reg movb (funcres), reg to leave/ret leave/ret } begin if (taicpu(p).oper[0]^.typ = top_ref) and GetNextInstruction(p, hp1) and GetNextInstruction(hp1, hp2) and IsExitCode(hp2) and (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and (taicpu(p).oper[0]^.ref^.index = NR_NO) and not(assigned(current_procinfo.procdef.funcretsym) and (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and (hp1.typ = ait_instruction) and (taicpu(hp1).opcode = A_MOV) and (taicpu(hp1).opsize = S_B) and (taicpu(hp1).oper[0]^.typ = top_ref) and RefsEqual(taicpu(hp1).oper[0]^.ref^, taicpu(p).oper[0]^.ref^) then begin taicpu(p).loadReg(0,taicpu(hp1).oper[1]^.reg); asml.remove(hp1); hp1.free; end end; A_SUB: { * change "subl $2, %esp; pushw x" to "pushl x"} { * change "sub/add const1, reg" or "dec reg" followed by "sub const2, reg" to one "sub ..., reg" } begin if (taicpu(p).oper[0]^.typ = top_const) and (taicpu(p).oper[1]^.typ = top_reg) then if (taicpu(p).oper[0]^.val = 2) and (taicpu(p).oper[1]^.reg = NR_ESP) and { Don't do the sub/push optimization if the sub } { comes from setting up the stack frame (JM) } (not getLastInstruction(p,hp1) or (hp1.typ <> ait_instruction) or (taicpu(hp1).opcode <> A_MOV) or (taicpu(hp1).oper[0]^.typ <> top_reg) or (taicpu(hp1).oper[0]^.reg <> NR_ESP) or (taicpu(hp1).oper[1]^.typ <> top_reg) or (taicpu(hp1).oper[1]^.reg <> NR_EBP)) then begin hp1 := tai(p.next); while Assigned(hp1) and (tai(hp1).typ in [ait_instruction]+SkipInstr) and not regReadByInstruction(RS_ESP,hp1) and not regModifiedByInstruction(RS_ESP,hp1) do hp1 := tai(hp1.next); if Assigned(hp1) and (tai(hp1).typ = ait_instruction) and (taicpu(hp1).opcode = A_PUSH) and (taicpu(hp1).opsize = S_W) then begin taicpu(hp1).changeopsize(S_L); if taicpu(hp1).oper[0]^.typ=top_reg then setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE); hp1 := tai(p.next); asml.remove(p); p.free; p := hp1; continue end; if DoSubAddOpt(p) then continue; end else if DoSubAddOpt(p) then continue end; end; end; { if is_jmp } end; end; updateUsedRegs(UsedRegs,p); p:=tai(p.next); end; end; procedure PeepHoleOptPass2(asml: TAsmList; BlockStart, BlockEnd: tai); {$ifdef DEBUG_AOPTCPU} procedure DebugMsg(const s: string;p : tai); begin asml.insertbefore(tai_comment.Create(strpnew(s)), p); end; {$else DEBUG_AOPTCPU} procedure DebugMsg(const s: string;p : tai);inline; begin end; {$endif DEBUG_AOPTCPU} function CanBeCMOV(p : tai) : boolean; begin CanBeCMOV:=assigned(p) and (p.typ=ait_instruction) and (taicpu(p).opcode=A_MOV) and (taicpu(p).opsize in [S_L,S_W]) and ((taicpu(p).oper[0]^.typ = top_reg) { we can't use cmov ref,reg because ref could be nil and cmov still throws an exception if ref=nil but the mov isn't done (FK) or ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr = addr_no)) } ) and (taicpu(p).oper[1]^.typ in [top_reg]); end; var p,hp1,hp2,hp3: tai; l : longint; condition : tasmcond; UsedRegs, TmpUsedRegs: TRegSet; carryadd_opcode: Tasmop; begin p := BlockStart; UsedRegs := []; while (p <> BlockEnd) Do begin UpdateUsedRegs(UsedRegs, tai(p.next)); case p.Typ Of Ait_Instruction: begin if InsContainsSegRef(taicpu(p)) then begin p := tai(p.next); continue; end; case taicpu(p).opcode Of A_Jcc: begin { jb @@1 cmc inc/dec operand --> adc/sbb operand,0 @@1: ... and ... jnb @@1 inc/dec operand --> adc/sbb operand,0 @@1: } if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then begin carryadd_opcode:=A_NONE; if Taicpu(p).condition in [C_NAE,C_B] then begin if Taicpu(hp1).opcode=A_INC then carryadd_opcode:=A_ADC; if Taicpu(hp1).opcode=A_DEC then carryadd_opcode:=A_SBB; if carryadd_opcode<>A_NONE then begin Taicpu(p).clearop(0); Taicpu(p).ops:=0; Taicpu(p).is_jmp:=false; Taicpu(p).opcode:=A_CMC; Taicpu(p).condition:=C_NONE; Taicpu(hp1).ops:=2; Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^); Taicpu(hp1).loadconst(0,0); Taicpu(hp1).opcode:=carryadd_opcode; continue; end; end; if Taicpu(p).condition in [C_AE,C_NB] then begin if Taicpu(hp1).opcode=A_INC then carryadd_opcode:=A_ADC; if Taicpu(hp1).opcode=A_DEC then carryadd_opcode:=A_SBB; if carryadd_opcode<>A_NONE then begin asml.remove(p); p.free; Taicpu(hp1).ops:=2; Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^); Taicpu(hp1).loadconst(0,0); Taicpu(hp1).opcode:=carryadd_opcode; p:=hp1; continue; end; end; end; if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then begin { check for jCC xxx xxx: } l:=0; GetNextInstruction(p, hp1); while assigned(hp1) and CanBeCMOV(hp1) and { stop on labels } not(hp1.typ=ait_label) do begin inc(l); GetNextInstruction(hp1,hp1); end; if assigned(hp1) then begin if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then begin if (l<=4) and (l>0) then begin condition:=inverse_cond(taicpu(p).condition); hp2:=p; GetNextInstruction(p,hp1); p:=hp1; repeat taicpu(hp1).opcode:=A_CMOVcc; taicpu(hp1).condition:=condition; GetNextInstruction(hp1,hp1); until not(assigned(hp1)) or not(CanBeCMOV(hp1)); { wait with removing else GetNextInstruction could ignore the label if it was the only usage in the jump moved away } tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs; asml.remove(hp2); hp2.free; continue; end; end else begin { check further for jCC xxx jmp yyy xxx: yyy: } { hp2 points to jmp yyy } hp2:=hp1; { skip hp1 to xxx } GetNextInstruction(hp1, hp1); if assigned(hp2) and assigned(hp1) and (l<=3) and (hp2.typ=ait_instruction) and (taicpu(hp2).is_jmp) and (taicpu(hp2).condition=C_None) and { real label and jump, no further references to the label are allowed } (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then begin l:=0; { skip hp1 to } GetNextInstruction(hp1, hp1); while assigned(hp1) and CanBeCMOV(hp1) do begin inc(l); GetNextInstruction(hp1, hp1); end; { hp1 points to yyy: } if assigned(hp1) and FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then begin condition:=inverse_cond(taicpu(p).condition); GetNextInstruction(p,hp1); hp3:=p; p:=hp1; repeat taicpu(hp1).opcode:=A_CMOVcc; taicpu(hp1).condition:=condition; GetNextInstruction(hp1,hp1); until not(assigned(hp1)) or not(CanBeCMOV(hp1)); { hp2 is still at jmp yyy } GetNextInstruction(hp2,hp1); { hp2 is now at xxx: } condition:=inverse_cond(condition); GetNextInstruction(hp1,hp1); { hp1 is now at } repeat taicpu(hp1).opcode:=A_CMOVcc; taicpu(hp1).condition:=condition; GetNextInstruction(hp1,hp1); until not(assigned(hp1)) or not(CanBeCMOV(hp1)); { asml.remove(hp1.next) hp1.next.free; asml.remove(hp1); hp1.free; } { remove jCC } tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs; asml.remove(hp3); hp3.free; { remove jmp } tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs; asml.remove(hp2); hp2.free; continue; end; end; end; end; end; end; A_FSTP,A_FISTP: if doFpuLoadStoreOpt(asmL,p) then continue; A_IMUL: begin if (taicpu(p).ops >= 2) and ((taicpu(p).oper[0]^.typ = top_const) or ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and (taicpu(p).oper[1]^.typ = top_reg) and ((taicpu(p).ops = 2) or ((taicpu(p).oper[2]^.typ = top_reg) and (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and getLastInstruction(p,hp1) and (hp1.typ = ait_instruction) and (taicpu(hp1).opcode = A_MOV) and (taicpu(hp1).oper[0]^.typ = top_reg) and (taicpu(hp1).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then { change "mov reg1,reg2; imul y,reg2" to "imul y,reg1,reg2" } begin taicpu(p).ops := 3; taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg); taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg); asml.remove(hp1); hp1.free; end; end; A_MOV: begin if (taicpu(p).oper[0]^.typ = top_reg) and (taicpu(p).oper[1]^.typ = top_reg) and GetNextInstruction(p, hp1) and (hp1.typ = ait_Instruction) and ((taicpu(hp1).opcode = A_MOV) or (taicpu(hp1).opcode = A_MOVZX) or (taicpu(hp1).opcode = A_MOVSX)) and (taicpu(hp1).oper[0]^.typ = top_ref) and (taicpu(hp1).oper[1]^.typ = top_reg) and ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) or (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)) and (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then {mov reg1, reg2 mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2} begin if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg; if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg; asml.remove(p); p.free; p := hp1; continue; end else if (taicpu(p).oper[0]^.typ = top_ref) and GetNextInstruction(p,hp1) and (hp1.typ = ait_instruction) and (IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) or ((taicpu(hp1).opcode=A_LEA) and (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) and ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)) or (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) ) ) ) and GetNextInstruction(hp1,hp2) and MatchInstruction(hp2,A_MOV,[]) and MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and (taicpu(hp2).oper[1]^.typ = top_ref) then begin TmpUsedRegs := UsedRegs; UpdateUsedRegs(TmpUsedRegs,tai(hp1.next)); if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp2, TmpUsedRegs))) then { change mov (ref), reg } { add/sub/or/... reg2/$const, reg } { mov reg, (ref) } { # release reg } { to add/sub/or/... reg2/$const, (ref) } begin case taicpu(hp1).opcode of A_INC,A_DEC: taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^); A_LEA: begin taicpu(hp1).opcode:=A_ADD; if taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg then taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index) else taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base); taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^); DebugMsg('Peephole FoldLea done',hp1); end else taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^); end; asml.remove(p); asml.remove(hp2); p.free; hp2.free; p := hp1 end; end end; end; end; end; p := tai(p.next) end; end; procedure PostPeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai); var p,hp1,hp2: tai; IsTestConstX: boolean; begin p := BlockStart; while (p <> BlockEnd) Do begin case p.Typ Of Ait_Instruction: begin if InsContainsSegRef(taicpu(p)) then begin p := tai(p.next); continue; end; case taicpu(p).opcode Of A_CALL: begin { don't do this on modern CPUs, this really hurts them due to broken call/ret pairing } if (current_settings.optimizecputype < cpu_Pentium2) and not(cs_create_pic in current_settings.moduleswitches) and GetNextInstruction(p, hp1) and (hp1.typ = ait_instruction) and (taicpu(hp1).opcode = A_JMP) and ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then begin hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol); InsertLLItem(asml, p.previous, p, hp2); taicpu(p).opcode := A_JMP; taicpu(p).is_jmp := true; asml.remove(hp1); hp1.free; end { replace call procname ret by jmp procname this should never hurt except when pic is used, not sure how to handle it then but do it only on level 4 because it destroys stack back traces } else if (cs_opt_level4 in current_settings.optimizerswitches) and not(cs_create_pic in current_settings.moduleswitches) and GetNextInstruction(p, hp1) and (hp1.typ = ait_instruction) and (taicpu(hp1).opcode = A_RET) and (taicpu(hp1).ops=0) then begin taicpu(p).opcode := A_JMP; taicpu(p).is_jmp := true; asml.remove(hp1); hp1.free; end; end; A_CMP: begin if (taicpu(p).oper[0]^.typ = top_const) and (taicpu(p).oper[0]^.val = 0) and (taicpu(p).oper[1]^.typ = top_reg) then {change "cmp $0, %reg" to "test %reg, %reg"} begin taicpu(p).opcode := A_TEST; taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg); continue; end; end; (* Optimization is not safe; xor clears the carry flag. See test/tgadint64 in the test suite. A_MOV: if (taicpu(p).oper[0]^.typ = Top_Const) and (taicpu(p).oper[0]^.val = 0) and (taicpu(p).oper[1]^.typ = Top_Reg) then { change "mov $0, %reg" into "xor %reg, %reg" } begin taicpu(p).opcode := A_XOR; taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg); end; *) A_MOVZX: { if register vars are on, it's possible there is code like } { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" } { so we can't safely replace the movzx then with xor/mov, } { since that would change the flags (JM) } if not(cs_opt_regvar in current_settings.optimizerswitches) then begin if (taicpu(p).oper[1]^.typ = top_reg) then if (taicpu(p).oper[0]^.typ = top_reg) then case taicpu(p).opsize of S_BL: begin if IsGP32Reg(getsupreg(taicpu(p).oper[1]^.reg)) and not(cs_opt_size in current_settings.optimizerswitches) and (current_settings.optimizecputype = cpu_Pentium) then {Change "movzbl %reg1, %reg2" to "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and PentiumMMX} begin hp1 := taicpu.op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg); InsertLLItem(asml,p.previous, p, hp1); taicpu(p).opcode := A_MOV; taicpu(p).changeopsize(S_B); setsubreg(taicpu(p).oper[1]^.reg,R_SUBL); end; end; end else if (taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and not(cs_opt_size in current_settings.optimizerswitches) and IsGP32Reg(getsupreg(taicpu(p).oper[1]^.reg)) and (current_settings.optimizecputype = cpu_Pentium) and (taicpu(p).opsize = S_BL) then {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for Pentium and PentiumMMX} begin hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg); taicpu(p).opcode := A_MOV; taicpu(p).changeopsize(S_B); setsubreg(taicpu(p).oper[1]^.reg,R_SUBL); InsertLLItem(asml,p.previous, p, hp1); end; end; A_TEST, A_OR: {removes the line marked with (x) from the sequence and/or/xor/add/sub/... $x, %y test/or %y, %y | test $-1, %y (x) j(n)z _Label as the first instruction already adjusts the ZF %y operand may also be a reference } begin IsTestConstX:=(taicpu(p).opcode=A_TEST) and MatchOperand(taicpu(p).oper[0]^,-1); if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and GetLastInstruction(p, hp1) and (tai(hp1).typ = ait_instruction) and GetNextInstruction(p,hp2) and MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then case taicpu(hp1).opcode Of A_ADD, A_SUB, A_OR, A_XOR, A_AND: begin if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and { does not work in case of overflow for G(E)/L(E)/C_O/C_NO } { and in case of carry for A(E)/B(E)/C/NC } ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or ((taicpu(hp1).opcode <> A_ADD) and (taicpu(hp1).opcode <> A_SUB))) then begin hp1 := tai(p.next); asml.remove(p); p.free; p := tai(hp1); continue end; end; A_SHL, A_SAL, A_SHR, A_SAR: begin if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and { SHL/SAL/SHR/SAR with a value of 0 do not change the flags } { therefore, it's only safe to do this optimization for } { shifts by a (nonzero) constant } (taicpu(hp1).oper[0]^.typ = top_const) and (taicpu(hp1).oper[0]^.val <> 0) and { does not work in case of overflow for G(E)/L(E)/C_O/C_NO } { and in case of carry for A(E)/B(E)/C/NC } (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then begin hp1 := tai(p.next); asml.remove(p); p.free; p := tai(hp1); continue end; end; A_DEC, A_INC, A_NEG: begin if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and { does not work in case of overflow for G(E)/L(E)/C_O/C_NO } { and in case of carry for A(E)/B(E)/C/NC } (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then begin case taicpu(hp1).opcode Of A_DEC, A_INC: {replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag} begin case taicpu(hp1).opcode Of A_DEC: taicpu(hp1).opcode := A_SUB; A_INC: taicpu(hp1).opcode := A_ADD; end; taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^); taicpu(hp1).loadConst(0,1); taicpu(hp1).ops:=2; end end; hp1 := tai(p.next); asml.remove(p); p.free; p := tai(hp1); continue end; end else { change "test $-1,%reg" into "test %reg,%reg" } if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then taicpu(p).loadoper(0,taicpu(p).oper[1]^); end { case } else { change "test $-1,%reg" into "test %reg,%reg" } if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then taicpu(p).loadoper(0,taicpu(p).oper[1]^); end; end; end; end; p := tai(p.next) end; end; end.