From c644503daf3318089de2881dbe090ea382b0340f Mon Sep 17 00:00:00 2001 From: masta Date: Tue, 28 Jan 2014 13:20:35 +0000 Subject: [PATCH] Add MovLdr2Ldr peephole optimizer for ARM The existing LdrLdr2LdrMov optimizer will generate a lot of sequences like this: ldr regA, [...] mov regB, regA ldr regB, [regB, ...] this now gets changed to ldr regA, [...] ldr regB, [regA, ...] this saves an instruction and might open up more possibilities for the load scheduler. git-svn-id: trunk@26603 - --- compiler/arm/aoptcpu.pas | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/compiler/arm/aoptcpu.pas b/compiler/arm/aoptcpu.pas index 98f707c540..1d0a13d05d 100644 --- a/compiler/arm/aoptcpu.pas +++ b/compiler/arm/aoptcpu.pas @@ -1165,6 +1165,41 @@ Implementation end; end; end; + { Fold the very common sequence + mov regA, regB + ldr* regA, [regA] + to + ldr* regA, [regB] + CAUTION! If this one is successful p might not be a mov instruction anymore! + } + if (taicpu(p).opcode = A_MOV) and + (taicpu(p).ops = 2) and + (taicpu(p).oper[1]^.typ = top_reg) and + (taicpu(p).oppostfix = PF_NONE) and + GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and + MatchInstruction(hp1, [A_LDR, A_STR], [taicpu(p).condition], []) and + { We can change the base register only when the instruction uses AM_OFFSET } + ((taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) or + ((taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and + (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg)) + ) and + not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and + RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then + begin + DebugMsg('Peephole MovLdr2Ldr done', hp1); + if (taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and + (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then + taicpu(hp1).oper[1]^.ref^.base := taicpu(p).oper[1]^.reg; + + if taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then + taicpu(hp1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg; + + asml.remove(p); + p.free; + p:=hp1; + result:=true; + end; + { This folds shifterops into following instructions mov r0, r1, lsl #8 add r2, r3, r0