From ab446386357ee1633c551be3522f0f57b8639e78 Mon Sep 17 00:00:00 2001 From: sergei Date: Tue, 21 Jan 2014 14:22:59 +0000 Subject: [PATCH] + SPARC: initial peephole optimizer. git-svn-id: trunk@26555 - --- compiler/sparc/aoptcpu.pas | 296 ++++++++++++++++++++++++++++++++++++- 1 file changed, 295 insertions(+), 1 deletion(-) diff --git a/compiler/sparc/aoptcpu.pas b/compiler/sparc/aoptcpu.pas index cb656af36e..bcd85b4f7a 100644 --- a/compiler/sparc/aoptcpu.pas +++ b/compiler/sparc/aoptcpu.pas @@ -28,14 +28,308 @@ unit aoptcpu; Interface uses - cpubase, aoptobj, aoptcpub, aopt; + cgbase, cpubase, aoptobj, aoptcpub, aopt, aasmtai; Type TCpuAsmOptimizer = class(TAsmOptimizer) + function GetNextInstructionUsingReg(Current: tai; + var Next: tai; reg: TRegister): Boolean; + function TryRemoveMov(var p: tai; opcode: TAsmOp): boolean; + function PeepHoleOptPass1Cpu(var p: tai): boolean; override; + function RegUsedAfterInstruction(reg: Tregister; p: tai; + var AllUsedRegs: TAllUsedRegs): Boolean; End; Implementation + uses + globtype,globals,aasmcpu; + + function MatchInstruction(const instr: tai; const op: TAsmOp): boolean; + begin + result := + (instr.typ = ait_instruction) and + (taicpu(instr).opcode = op); + end; + + + function MatchOperand(const oper: TOper; reg: TRegister): boolean; + begin + result:=(oper.typ=top_reg) and (oper.reg=reg); + end; + + + function IsSameReg(this,next: taicpu): boolean; + begin + result:=(next.ops=3) and + (next.oper[2]^.typ=top_reg) and + (next.oper[0]^.typ=top_reg) and + (next.oper[2]^.reg=next.oper[0]^.reg) and + (next.oper[2]^.reg=this.oper[2]^.reg); + end; + + + function regLoadedWithNewValue(reg: tregister; hp: tai): boolean; + var + p: taicpu; + begin + p:=taicpu(hp); + result:=false; + if not ((assigned(hp)) and (hp.typ=ait_instruction)) then + exit; + + case p.opcode of + { These instructions do not write into a register at all } + A_NOP, + A_FCMPs,A_FCMPd,A_FCMPq,A_CMP, + A_BA,A_Bxx,A_FBA,A_FBxx, + A_STB,A_STH,A_ST,A_STF,A_STDF: + exit; + end; + + result:=(p.ops>0) and (p.oper[p.ops-1]^.typ=top_reg) and + (p.oper[p.ops-1]^.reg=reg); + end; + + + function instructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean; + var + p: taicpu; + i: longint; + begin + result:=false; + if not (assigned(hp) and (hp.typ=ait_instruction)) then + exit; + p:=taicpu(hp); + + i:=0; + while(iait_instruction) or (RegInInstruction(reg,Next)) or + (is_calljmp(taicpu(Next).opcode)); + if is_calljmp(taicpu(next).opcode) then + begin + result:=false; + next:=nil; + end; + end; + + + function TCpuAsmOptimizer.RegUsedAfterInstruction(reg: Tregister; p: tai; + var AllUsedRegs: TAllUsedRegs): Boolean; + begin + AllUsedRegs[getregtype(reg)].Update(tai(p.Next),true); + RegUsedAfterInstruction := + AllUsedRegs[getregtype(reg)].IsUsed(reg) and + not(regLoadedWithNewValue(reg,p)) and + ( + not(GetNextInstruction(p,p)) or + instructionLoadsFromReg(reg,p) or + not(regLoadedWithNewValue(reg,p)) + ); + end; + + + function TCpuAsmOptimizer.TryRemoveMov(var p: tai; opcode: TAsmOp): boolean; + var + next,hp1: tai; + alloc,dealloc: tai_regalloc; + begin + { Fold + op ...,%reg1 + ... + opcode %reg1,%reg2 + dealloc %reg1 + into + op ...,%reg2 + opcode may be A_MOV, A_FMOVs, A_FMOVd, etc. + } + result:=false; + if (taicpu(p).ops=3) and + { don't mess with instructions using %g0 for destination } + (taicpu(p).oper[2]^.reg<>NR_G0) and + GetNextInstructionUsingReg(p,next,taicpu(p).oper[2]^.reg) and + MatchInstruction(next,opcode) and + MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[2]^.reg) and + { the destination register of mov cannot be used between p and next } + (not RegUsedBetween(taicpu(next).oper[1]^.reg,p,next)) and + { This is necessary so 'mov %reg1,%y' is not folded. Compiler should + probably generate A_WRY opcode for this, not A_MOV. } + (getregtype(taicpu(next).oper[1]^.reg)<>R_SPECIALREGISTER) then + begin + dealloc:=FindRegDealloc(taicpu(p).oper[2]^.reg,tai(next.Next)); + if assigned(dealloc) then + begin + { taicpu(p).oper[2]^.reg is not used anymore, try to find its allocation + and remove it if possible } + GetLastInstruction(p,hp1); + + asml.Remove(dealloc); + alloc:=FindRegAlloc(taicpu(p).oper[2]^.reg,tai(hp1.Next)); + if assigned(alloc) then + begin + asml.Remove(alloc); + alloc.free; + dealloc.free; + end + else + asml.InsertAfter(dealloc,p); + + { try to move the allocation of the target register } + GetLastInstruction(next,hp1); + alloc:=FindRegAlloc(taicpu(next).oper[1]^.reg,tai(hp1.Next)); + if assigned(alloc) then + begin + asml.Remove(alloc); + asml.InsertBefore(alloc,p); + { adjust used regs } + IncludeRegInUsedRegs(taicpu(next).oper[1]^.reg,UsedRegs); + end; + + { finally get rid of the mov } + taicpu(p).loadreg(2,taicpu(next).oper[1]^.reg); + asml.remove(next); + next.free; + end; + end; + end; + + + function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean; + var + next,next2: tai; + TmpUsedRegs: TAllUsedRegs; + begin + result:=false; + case p.typ of + ait_instruction: + begin + case taicpu(p).opcode of + A_SLL: + begin + { if this is sign/zero extension... } + if (taicpu(p).oper[1]^.typ=top_const) and + GetNextInstruction(p,next) and + (MatchInstruction(next,A_SRL) or MatchInstruction(next,A_SRA)) and + IsSameReg(taicpu(p),taicpu(next)) and + (taicpu(next).oper[1]^.typ=top_const) and + (taicpu(next).oper[1]^.val=taicpu(p).oper[1]^.val) and + (taicpu(next).oper[1]^.val=16) and + { ...followed by 16-bit store (possibly with PIC simplification, etc. in between) } + GetNextInstructionUsingReg(next,next2,taicpu(p).oper[2]^.reg) and + MatchInstruction(next2,A_STH) and + (taicpu(next2).oper[0]^.typ=top_reg) and + (taicpu(next2).oper[0]^.reg=taicpu(p).oper[2]^.reg) and + { the initial register may not be reused } + (not RegUsedBetween(taicpu(p).oper[0]^.reg,next,next2)) then + begin + CopyUsedRegs(TmpUsedRegs); + UpdateUsedRegs(TmpUsedRegs, tai(p.next)); + UpdateUsedRegs(TmpUsedRegs, tai(next.next)); + if not RegUsedAfterInstruction(taicpu(p).oper[2]^.reg,next2,TmpUsedRegs) then + begin + taicpu(next2).loadreg(0,taicpu(p).oper[0]^.reg); + asml.remove(p); + asml.remove(next); + p.free; + next.free; + p:=next2; + end; + ReleaseUsedRegs(TmpUsedRegs); + end + else + TryRemoveMov(p,A_MOV); + end; + + A_AND: + begin + { Remove sign extension after 'and' if bit 7 of const operand is clear } + if (taicpu(p).oper[1]^.typ=top_const) and + GetNextInstruction(p,next) and + MatchInstruction(next,A_SLL) and + GetNextInstruction(next,next2) and + MatchInstruction(next2,A_SRA) and + IsSameReg(taicpu(p),taicpu(next)) and + IsSameReg(taicpu(p),taicpu(next2)) and + (taicpu(next).oper[1]^.typ=top_const) and + (taicpu(next2).oper[1]^.typ=top_const) and + (taicpu(next).oper[1]^.val=taicpu(next2).oper[1]^.val) and + ({( + (taicpu(p).oper[2]^.val<=$7fff) and + (taicpu(next).oper[2]^.val=16) + ) or }( + (taicpu(p).oper[1]^.val<=$7f) and + (taicpu(next).oper[1]^.val=24) + )) then + begin + asml.remove(next); + asml.remove(next2); + next.free; + next2.free; + end + else if (taicpu(p).oper[1]^.typ=top_const) and + (taicpu(p).oper[1]^.val=255) and + GetNextInstruction(p,next) and + MatchInstruction(next,A_STB) and + (taicpu(next).oper[0]^.typ=top_reg) and + (taicpu(next).oper[0]^.reg=taicpu(p).oper[2]^.reg) then + begin + CopyUsedRegs(TmpUsedRegs); + UpdateUsedRegs(TmpUsedRegs, tai(p.next)); + if not RegUsedAfterInstruction(taicpu(p).oper[2]^.reg,next,TmpUsedRegs) then + begin + taicpu(next).loadreg(0,taicpu(p).oper[0]^.reg); + asml.remove(p); + p.free; + p:=next; + end; + ReleaseUsedRegs(TmpUsedRegs); + end + else + TryRemoveMov(p,A_MOV); + end; + + A_ADD,A_ADDcc,A_ADDX, + A_SUB,A_SUBcc,A_SUBX, + A_SRA, + A_SRL, + A_ANDcc,A_OR,A_ORcc,A_XOR,A_XORcc: + TryRemoveMov(p,A_MOV); + + A_FADDs, A_FSUBs, A_FMULs, A_FDIVs, + A_FABSs, A_FNEGs, A_FSQRTs, + A_FDTOs, A_FITOs, A_FQTOs: + TryRemoveMov(p,A_FMOVs); + + A_FADDd, A_FSUBd, A_FMULd, A_FDIVd, + A_FABSd, A_FNEGd, A_FSQRTd, + A_FSTOd, A_FITOd, A_FQTOd: + TryRemoveMov(p,A_FMOVd); + end; + end; + end; + end; + begin casmoptimizer:=TCpuAsmOptimizer; end.