From ef16df72da2c246e152cc8f650eed60cc9434f54 Mon Sep 17 00:00:00 2001 From: Jonas Maebe Date: Thu, 13 Jul 2006 17:48:41 +0000 Subject: [PATCH] * some optimizations for code generated for "(x and power_of_two_const) <>/= 0)" git-svn-id: trunk@4159 - --- compiler/powerpc/aoptcpu.pas | 85 ++++++++++++++++++++++++++++-------- 1 file changed, 68 insertions(+), 17 deletions(-) diff --git a/compiler/powerpc/aoptcpu.pas b/compiler/powerpc/aoptcpu.pas index b8c8ab4009..b107007b4a 100644 --- a/compiler/powerpc/aoptcpu.pas +++ b/compiler/powerpc/aoptcpu.pas @@ -97,8 +97,8 @@ const function TCpuAsmOptimizer.cmpi_mfcr_opt(p, next1, next2: taicpu): boolean; var - next3: tai; - inverse: boolean; + next3, prev: tai; + inverse, prevrlwinm: boolean; begin result := true; inverse := @@ -106,7 +106,8 @@ const (next3.typ = ait_instruction) and (taicpu(next3).opcode = A_XORI) and (taicpu(next3).oper[0]^.reg = taicpu(next3).oper[1]^.reg) and - (taicpu(next3).oper[0]^.reg = taicpu(next2).oper[0]^.reg); + (taicpu(next3).oper[0]^.reg = taicpu(next2).oper[0]^.reg) and + (taicpu(next3).oper[2]^.val = 1); case taicpu(next2).oper[2]^.val of 1: begin @@ -136,18 +137,65 @@ const } 3: begin - // equal/not equal to zero (the xori remains in the latter case; - // there's a more optimal sequence without it, but needs extra - // register) - p.opcode := A_CNTLZW; - p.loadreg(1,p.oper[0]^.reg); - p.loadreg(0,next1.oper[0]^.reg); - next1.ops := 3; - next1.opcode := A_SRWI; - next1.loadreg(1,next1.oper[0]^.reg); - next1.loadconst(2,5); - asml.remove(next2); - next2.free; + prevrlwinm := + getlastinstruction(p,prev) and + (prev.typ = ait_instruction) and + ((taicpu(prev).opcode = A_RLWINM) or + (taicpu(prev).opcode = A_RLWINM_)) and + (taicpu(prev).oper[0]^.reg = p.oper[0]^.reg) and + (taicpu(prev).oper[3]^.val = taicpu(prev).oper[4]^.val); + + if (prevrlwinm) then + begin + // isolate the bit we need + if (taicpu(prev).oper[3]^.val <> 31) then + begin + p.opcode := A_RLWINM; + p.ops := 5; + p.loadreg(1,p.oper[0]^.reg); + p.loadreg(0,next1.oper[0]^.reg); + p.loadconst(2,taicpu(prev).oper[3]^.val + 1); + p.loadconst(3,31); + p.loadconst(4,31); + end + else { if (taicpu(prev).oper[0]^.reg <> next1.oper[0]^.reg) then } + begin + p.opcode := A_MR; + p.loadreg(1,p.oper[0]^.reg); + p.loadreg(0,next1.oper[0]^.reg); + end; + if not inverse then + begin + next1.ops := 3; + next1.opcode := A_XORI; + next1.loadreg(1,next1.oper[0]^.reg); + next1.loadconst(2,1); + end + else + begin + asml.remove(next1); + next1.free; + asml.remove(next3); + next3.free; + end; + asml.remove(next2); + next2.free; + end + else + begin + // equal/not equal to zero (the xori remains in the latter case; + // there's a more optimal sequence without it, but needs extra + // register) + p.opcode := A_CNTLZW; + p.loadreg(1,p.oper[0]^.reg); + p.loadreg(0,next1.oper[0]^.reg); + next1.ops := 3; + next1.opcode := A_SRWI; + next1.loadreg(1,next1.oper[0]^.reg); + next1.loadconst(2,5); + asml.remove(next2); + next2.free; + end; end; else result := false; @@ -217,7 +265,9 @@ const begin if getnextinstruction(p,next1) and (next1.typ = ait_instruction) and - (taicpu(next1).opcode = A_RLWINM) and + ((taicpu(next1).opcode = A_RLWINM) or + (taicpu(next1).opcode = A_SLWI) or + (taicpu(next1).opcode = A_SRWI)) and (taicpu(next1).oper[0]^.reg = taicpu(p).oper[0]^.reg) and (taicpu(next1).oper[1]^.reg = taicpu(p).oper[0]^.reg) then begin @@ -236,7 +286,8 @@ const if getnextinstruction(p,next1) and (next1.typ = ait_instruction) and ((taicpu(next1).opcode = A_SLWI) or - (taicpu(next1).opcode = A_RLWINM)) and + (taicpu(next1).opcode = A_RLWINM) or + (taicpu(next1).opcode = A_SRWI)) and (taicpu(next1).oper[0]^.reg = taicpu(p).oper[0]^.reg) and (taicpu(next1).oper[1]^.reg = taicpu(p).oper[0]^.reg) then case taicpu(next1).opcode of