From cd3f064a3318f9a23c0437234c3c1852c46ab364 Mon Sep 17 00:00:00 2001 From: Jonas Maebe <jonas@freepascal.org> Date: Wed, 12 Oct 2005 19:47:21 +0000 Subject: [PATCH] + enabled postpeepholeopts phase + optimize "integer op" followed by comparison of target register with zero to a variant of that integer op which sets the flags (ppc) + change rlwinm. instructions which do nothing but an "and" operation into andi./andis., since the rlwinm. is cracked on the G5 while andi./andis. isn't git-svn-id: trunk@1361 - --- compiler/aopt.pas | 4 +- compiler/aoptobj.pas | 19 +++++ compiler/powerpc/aoptcpu.pas | 138 ++++++++++++++++++++++++++++++++++- 3 files changed, 158 insertions(+), 3 deletions(-) diff --git a/compiler/aopt.pas b/compiler/aopt.pas index f0d0c6ea66..c3d9e6ec0a 100644 --- a/compiler/aopt.pas +++ b/compiler/aopt.pas @@ -219,7 +219,9 @@ Unit aopt; End; { more peephole optimizations } { PeepHoleOptPass2;} - { free memory�} + { if pass = last_pass then } + PostPeepHoleOpts; + { free memory } clear; { continue where we left off, BlockEnd is either the start of an } { assembler block or nil} diff --git a/compiler/aoptobj.pas b/compiler/aoptobj.pas index f5f5186f1d..49a1698a0e 100644 --- a/compiler/aoptobj.pas +++ b/compiler/aoptobj.pas @@ -298,6 +298,7 @@ Unit AoptObj; { processor dependent methods } // if it returns true, perform a "continue" function PeepHoleOptPass1Cpu(var p: tai): boolean; virtual; + function PostPeepHoleOptsCpu(var p: tai): boolean; virtual; End; Function ArrayRefsEq(const r1, r2: TReference): Boolean; @@ -1094,7 +1095,19 @@ Unit AoptObj; procedure TAOptObj.PostPeepHoleOpts; + var + p: tai; begin + p := BlockStart; + //!!!! UsedRegs := []; + while (p <> BlockEnd) Do + begin + //!!!! UpDateUsedRegs(UsedRegs, tai(p.next)); + if PostPeepHoleOptsCpu(p) then + continue; + //!!!!!!!! updateUsedRegs(UsedRegs,p); + p:=tai(p.next); + end; end; @@ -1103,4 +1116,10 @@ Unit AoptObj; result := false; end; + + function TAOptObj.PostPeepHoleOptsCpu(var p: tai): boolean; + begin + result := false; + end; + End. diff --git a/compiler/powerpc/aoptcpu.pas b/compiler/powerpc/aoptcpu.pas index 98825b5a59..00b5463f60 100644 --- a/compiler/powerpc/aoptcpu.pas +++ b/compiler/powerpc/aoptcpu.pas @@ -34,12 +34,15 @@ Type TCpuAsmOptimizer = class(TAsmOptimizer) { uses the same constructor as TAopObj } function PeepHoleOptPass1Cpu(var p: tai): boolean; override; + + function PostPeepHoleOptsCpu(var p: tai): boolean; override; + End; Implementation uses - cutils, aasmcpu; + cutils, aasmcpu, cgbase; function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean; var @@ -146,7 +149,138 @@ Implementation end; end; + + const + modifyflags: array[tasmop] of tasmop = + (a_none, a_add_, a_add_, a_addo_, a_addo_, a_addc_, a_addc_, a_addco_, a_addco_, + a_adde_, a_adde_, a_addeo_, a_addeo_, {a_addi could be addic_ if sure doesn't disturb carry} a_none, a_addic_, a_addic_, a_none, + a_addme_, a_addme_, a_addmeo_, a_addmeo_, a_addze_, a_addze_, a_addzeo_, + a_addzeo_, a_and_, a_and_, a_andc_, a_andc_, a_andi_, a_andis_, a_none, + a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, + a_none, a_none, a_none, a_none, a_none, a_cntlzw_, a_cntlzw_, a_none, + a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, + a_none, a_none, a_none, a_none, a_none, a_none, a_divw_, a_divw_, a_divwo_, a_divwo_, + a_divwu_, a_divwu_, a_divwuo_, a_divwuo_, a_none, a_none, a_none, a_eqv_, + a_eqv_, a_extsb_, a_extsb_, a_extsh_, a_extsh_, a_none, a_none, a_none, + a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, + a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, + a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, + a_none, a_none, a_none, a_none, a_none, a_none, a_none, + a_none, a_none, a_none, a_none, a_none, a_none, + a_none, a_none, a_none, a_none, a_none, a_none, a_none, + a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, + a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, + a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, + a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, + a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, + a_none, a_none, a_none, a_mffs, a_mffs_, a_mfmsr, a_mfspr, a_mfsr, + a_mfsrin, a_mftb, a_mtcrf, a_none, a_none, a_none, a_none, + a_none, a_none, a_none, a_none, a_none, a_none, a_mulhw_, + a_mulhw_, a_mulhwu_, a_mulhwu_, a_none, a_mullw_, a_mullw_, a_mullwo_, + a_mullwo_, a_nand_, a_nand_, a_neg_, a_neg_, a_nego_, a_nego_, a_nor_, a_nor_, + a_or_, a_or_, a_orc_, a_orc_, a_none, a_none, a_none, a_rlwimi_, a_rlwimi_, + a_rlwinm_, a_rlwinm_, a_rlwnm_, a_rlwnm_, a_none, a_slw_, a_slw_, a_sraw_, a_sraw_, + a_srawi_, a_srawi_,a_srw_, a_srw_, a_none, a_none, a_none, a_none, a_none, + a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, + a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, + a_none, a_none, a_none, a_none, a_none, a_subf_, a_subf_, a_subfo_, + a_subfo_, a_subfc_, a_subfc_, a_subfco_, a_subfco_, a_subfe_, a_subfe_, + a_subfeo_, a_subfeo_, a_none, a_subfme_, a_subfme_, a_subfmeo_, a_subfmeo_, + a_subfze_, a_subfze_, a_subfzeo_, a_subfzeo_, a_none, a_none, a_none, + a_none, a_none, a_none, a_xor_, a_xor_, a_none, a_none, + { simplified mnemonics } + a_none, a_none, a_subic_, a_subic_, a_sub_, a_sub_, a_subo_, a_subo_, + a_subc_, a_subc_, a_subco_, a_subco_, a_none, a_none, a_none, a_none, + a_extlwi_, a_extlwi_, a_extrwi_, a_extrwi_, a_inslwi_, a_inslwi_, a_insrwi_, + a_insrwi_, a_rotlwi_, a_rotlwi_, a_rotlw_, a_rotlw_, a_slwi_, a_slwi_, + a_srwi_, a_srwi_, a_clrlwi_, a_clrlwi_, a_clrrwi_, a_clrrwi_, a_clrslwi_, + a_clrslwi_, a_none, a_none, a_none, a_none, a_none, a_none, a_none, + a_none, a_none {move to special prupose reg}, a_none {move from special purpose reg}, + a_none, a_none, a_none, a_none, a_mr_, a_mr_, a_not_, a_not_, a_none, a_none, a_none, + a_none, a_none); + + function changetomodifyflags(p: taicpu): boolean; + begin + result := false; + if (modifyflags[p.opcode] <> a_none) then + begin + p.opcode := modifyflags[p.opcode]; + result := true; + end; + end; + + + + function TCpuAsmOptimizer.PostPeepHoleOptsCpu(var p: tai): boolean; + var + next1: tai; + begin + result := false; + case p.typ of + ait_instruction: + begin + case taicpu(p).opcode of + A_RLWINM_: + begin + // rlwinm_ is cracked on the G5, andi_/andis_ aren't + if (taicpu(p).oper[2]^.val = 0) then + if (taicpu(p).oper[3]^.val < 16) and + (taicpu(p).oper[4]^.val < 16) then + begin + taicpu(p).opcode := A_ANDIS_; + taicpu(p).oper[2]^.val := + ((1 shl (16-taicpu(p).oper[3]^.val)) - 1) and + not((1 shl (15-taicpu(p).oper[4]^.val)) - 1); + taicpu(p).clearop(3); + taicpu(p).clearop(4); + taicpu(p).ops := 3; + taicpu(p).opercnt := 2; + end + else if (taicpu(p).oper[3]^.val >= 16) and + (taicpu(p).oper[4]^.val >= 16) then + begin + taicpu(p).opcode := A_ANDI_; + taicpu(p).oper[2]^.val := + ((1 shl (32-taicpu(p).oper[3]^.val)) - 1) and + not((1 shl (31-taicpu(p).oper[4]^.val)) - 1); + taicpu(p).clearop(3); + taicpu(p).clearop(4); + taicpu(p).ops := 3; + taicpu(p).opercnt := 2; + end; + end; + end; + + // change "integer operation with destination reg" followed by a + // comparison to zero of that reg, with a variant of that integer + // operation which sets the flags (if it exists) + if not(result) and + (taicpu(p).ops >= 2) and + (taicpu(p).oper[0]^.typ = top_reg) and + (taicpu(p).oper[1]^.typ = top_reg) and + getnextinstruction(p,next1) and + (next1.typ = ait_instruction) and + ((taicpu(next1).opcode = A_CMPWI) or + (taicpu(next1).opcode = A_CMPLWI)) and + // make sure it the result goes to cr0 + (((taicpu(next1).ops = 2) and + (taicpu(next1).oper[1]^.val = 0) and + (taicpu(next1).oper[0]^.reg = taicpu(p).oper[0]^.reg)) or + ((taicpu(next1).ops = 3) and + (taicpu(next1).oper[2]^.val = 0) and + (taicpu(next1).oper[0]^.typ = top_reg) and + (getsupreg(taicpu(next1).oper[0]^.reg) = RS_CR0) and + (taicpu(next1).oper[1]^.reg = taicpu(p).oper[0]^.reg))) and + changetomodifyflags(taicpu(p)) then + begin + asml.remove(next1); + next1.free; + result := true; + end; + end; + end; + end; + begin casmoptimizer:=TCpuAsmOptimizer; End. -