From 05a8783b1e7b8589b77f90be0a3f8faa6c4318c9 Mon Sep 17 00:00:00 2001 From: florian Date: Thu, 17 May 2012 08:31:44 +0000 Subject: [PATCH] * patch by Nico Erfurth: Improve ARM-Peephole Optimizers 1.) Introduce a ARM-specific RegUsedAfterInstruction which analyzes instructions and reg allocation information to see if a register is really needed afterwards to decide if some special optimizations can be done. 2.) Introduce "RemoveSuperfluousMove" This tries to fold mov into a previous Data-Instruction (ADD, ORR, etc) or LDR-Instruction. 3.) Introduce new Optimizer "DataMov2Data" and modify LdrMov2Ldr to use RemoveSuperfluousMove 4.) Expand Ldr* and Str* Optimizers to also work on {Ldr,Str}{,b,h} git-svn-id: trunk@21314 - --- compiler/arm/aoptcpu.pas | 160 ++++++++++++++++++++++++++++++++------- 1 file changed, 133 insertions(+), 27 deletions(-) diff --git a/compiler/arm/aoptcpu.pas b/compiler/arm/aoptcpu.pas index 1077a6fcbf..c5b9ef2d02 100644 --- a/compiler/arm/aoptcpu.pas +++ b/compiler/arm/aoptcpu.pas @@ -39,6 +39,9 @@ Type function PeepHoleOptPass1Cpu(var p: tai): boolean; override; procedure PeepHoleOptPass2;override; Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override; + procedure RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string); + function RegUsedAfterInstruction(reg: Tregister; p: tai; + var AllUsedRegs: TAllUsedRegs): Boolean; End; TCpuPreRegallocScheduler = class(TAsmOptimizer) @@ -106,7 +109,7 @@ Implementation result := (oper.typ = top_reg) and (oper.reg = reg); end; - procedure RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList) ; + procedure RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList); begin if (taicpu(movp).condition = C_EQ) and (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and @@ -118,11 +121,91 @@ Implementation end; end; + function regLoadedWithNewValue(reg: tregister; hp: tai): boolean; + var + p: taicpu; + begin + p := taicpu(hp); + regLoadedWithNewValue := + (assigned(hp)) and + (hp.typ = ait_instruction) and + (not(p.opcode in [A_STR, A_STRB, A_STRH, A_CMP, A_CMN, A_TST, A_TEQ, + A_B, A_BL, A_BX, A_BLX])) and + (p.oper[0]^.typ = top_reg) and + (p.oper[0]^.reg = reg); + end; + + function instructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean; + var + p: taicpu; + i: longint; + begin + instructionLoadsFromReg := false; + if not (assigned(hp) and (hp.typ = ait_instruction)) then + exit; + p:=taicpu(hp); + + i:=1; + {For these instructions we have to start on oper[0]} + if (p.opcode in [A_STR, A_STRB, A_STRH, A_CMP, A_CMN, A_TST, A_TEQ, + A_B, A_BL, A_BX, A_BLX]) then i:=0; + + while(itaicpu(hp1).oper[1]^.ref^.index) and (taicpu(p).oper[0]^.reg<>taicpu(hp1).oper[1]^.ref^.base) and @@ -221,8 +320,7 @@ Implementation taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg); end; result := true; - end - else + end; { Remove superfluous mov after ldr changes ldr reg1, ref @@ -236,22 +334,8 @@ Implementation * ldr+mov have the same conditions * mov does not set flags } - if GetNextInstruction(p, hp1) and - MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and - (taicpu(hp1).ops=2) and {We can't optimize if there is a shiftop} - MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then - begin - CopyUsedRegs(TmpUsedRegs); - UpdateUsedRegs(TmpUsedRegs, tai(p.next)); - If not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then - begin - asml.insertbefore(tai_comment.Create(strpnew('Peephole LdrMov2Ldr removed superfluous mov')), hp1); - taicpu(p).loadreg(0,taicpu(hp1).oper[0]^.reg); - asml.remove(hp1); - hp1.free; - end; - ReleaseUsedRegs(TmpUsedRegs); - end; + if GetNextInstruction(p, hp1) then + RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr'); end; A_MOV: begin @@ -311,7 +395,8 @@ Implementation (taicpu(p).oper[1]^.typ = top_const) and GetNextInstruction(p,hp1) then begin - while MatchInstruction(hp1, A_STR, [], []) and + while (tai(p).typ = ait_instruction) and + (taicpu(p).opcode in [A_STR, A_STRH, A_STRB]) and MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[0]^) and GetNextInstruction(hp1, hp2) and MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and @@ -332,6 +417,8 @@ Implementation add r1, r1, #1 to add r1, r0, #1 + + Todo: Make it work for mov+cmp too } if (taicpu(p).ops = 2) and (taicpu(p).oper[1]^.typ = top_reg) and @@ -365,7 +452,16 @@ Implementation end; end; end; - A_AND: + A_ADD, + A_ADC, + A_RSB, + A_RSC, + A_SUB, + A_SBC, + A_AND, + A_BIC, + A_EOR, + A_ORR: begin { change @@ -374,7 +470,8 @@ Implementation to and reg2,reg1,(const1 and const2) } - if (taicpu(p).oper[1]^.typ = top_reg) and + if (taicpu(p).opcode = A_AND) and + (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(p).oper[2]^.typ = top_const) and GetNextInstruction(p, hp1) and MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and @@ -388,6 +485,15 @@ Implementation asml.remove(hp1); hp1.free; end; + { + change + add reg1, ... + mov reg2, reg1 + to + add reg2, ... + } + if GetNextInstruction(p, hp1) then + RemoveSuperfluousMove(p, hp1, 'DataMov2Data'); end; A_CMP: begin