{ Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe This unit contains the peephole optimizer for i386 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. **************************************************************************** } unit aoptcpu; {$i fpcdefs.inc} {$ifdef EXTDEBUG} {$define DEBUG_AOPTCPU} {$endif EXTDEBUG} Interface uses cgbase, cpubase, aopt, aoptx86, Aasmbase,aasmtai,aasmdata; Type TCpuAsmOptimizer = class(TX86AsmOptimizer) function PrePeepHoleOptsCpu(var p: tai): boolean; override; function PeepHoleOptPass1Cpu(var p: tai): boolean; override; function PeepHoleOptPass2Cpu(var p: tai): boolean; override; function PostPeepHoleOptsCpu(var p : tai) : boolean; override; end; Var AsmOptimizer : TCpuAsmOptimizer; Implementation uses verbose,globtype,globals, cpuinfo, aasmcpu, aoptutils, aasmcfi, procinfo, cgutils, { units we should get rid off: } symsym,symconst; { Checks if the register is a 32 bit general purpose register } function isgp32reg(reg: TRegister): boolean; begin {$push}{$warnings off} isgp32reg:=(getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)>=RS_EAX) and (getsupreg(reg)<=RS_EBX); {$pop} end; { returns true if p contains a memory operand with a segment set } function InsContainsSegRef(p: taicpu): boolean; var i: longint; begin result:=true; for i:=0 to p.opercnt-1 do if (p.oper[i]^.typ=top_ref) and (p.oper[i]^.ref^.segment<>NR_NO) then exit; result:=false; end; function TCPUAsmOPtimizer.PrePeepHoleOptsCpu(var p: tai): boolean; begin repeat Result:=False; case p.typ of ait_instruction: begin if InsContainsSegRef(taicpu(p)) then begin p := tai(p.next); { Nothing's actually changed, so no need to set Result to True, but try again to see if an instruction immediately follows } Continue; end; case taicpu(p).opcode Of A_IMUL: Result:=PrePeepholeOptIMUL(p); A_SAR,A_SHR: Result:=PrePeepholeOptSxx(p); A_AND: Result:=PrePeepholeOptAND(p); A_XOR: begin if (taicpu(p).oper[0]^.typ = top_reg) and (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then { temporarily change this to 'mov reg,0' to make it easier } { for the CSE. Will be changed back in pass 2 } begin taicpu(p).opcode := A_MOV; taicpu(p).loadConst(0,0); Result:=true; end; end; else { Do nothing }; end; end; else { Do nothing }; end; Break; until False; { If this flag is set, something was optimised ahead of p, so move ahead by 1 instruction but treat as if Result was set to True } if aoc_ForceNewIteration in OptsToCheck then begin Exclude(OptsToCheck, aoc_ForceNewIteration); if not Result then begin if (p.typ in SkipInstr) then UpdateUsedRegs(p); p := tai(p.Next); Result := True; end; end; end; function TCPUAsmOPtimizer.PeepHoleOptPass1Cpu(var p: tai): boolean; var hp1 : tai; begin result:=False; case p.Typ Of ait_instruction: begin current_filepos:=taicpu(p).fileinfo; if InsContainsSegRef(taicpu(p)) then exit; case taicpu(p).opcode Of A_ADD: Result:=OptPass1ADD(p); A_AND: Result:=OptPass1And(p); A_CMOVcc: Result:=OptPass1CMOVcc(p); A_IMUL: Result:=OptPass1Imul(p); A_CMP: Result:=OptPass1Cmp(p); A_VPXORD, A_VPXORQ, A_VXORPS, A_VXORPD, A_VPXOR: Result:=OptPass1VPXor(p); A_XORPS, A_XORPD, A_PXOR: Result:=OptPass1PXor(p); A_FLD: Result:=OptPass1FLD(p); A_FSTP,A_FISTP: Result:=OptPass1FSTP(p); A_LEA: Result:=OptPass1LEA(p); A_MOV: Result:=OptPass1MOV(p); A_MOVSX, A_MOVZX : Result:=OptPass1Movx(p); A_TEST: Result:=OptPass1Test(p); A_PUSH: begin if (taicpu(p).opsize = S_W) and (taicpu(p).oper[0]^.typ = Top_Const) and GetNextInstruction(p, hp1) and (tai(hp1).typ = ait_instruction) and (taicpu(hp1).opcode = A_PUSH) and (taicpu(hp1).oper[0]^.typ = Top_Const) and (taicpu(hp1).opsize = S_W) then begin taicpu(p).changeopsize(S_L); taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val)); asml.remove(hp1); hp1.free; Result:=true; end; end; A_SHL, A_SAL: Result:=OptPass1SHLSAL(p); A_SHR: Result:=OptPass1SHR(p); A_SUB: Result:=OptPass1Sub(p); A_Jcc: Result:=OptPass1Jcc(p); A_MOVDQA, A_MOVAPD, A_MOVAPS, A_MOVUPD, A_MOVUPS, A_VMOVAPS, A_VMOVAPD, A_VMOVUPS, A_VMOVUPD: Result:=OptPass1_V_MOVAP(p); A_VDIVSD, A_VDIVSS, A_VSUBSD, A_VSUBSS, A_VMULSD, A_VMULSS, A_VADDSD, A_VADDSS, A_VANDPD, A_VANDPS, A_VORPD, A_VORPS: Result:=OptPass1VOP(p); A_MULSD, A_MULSS, A_ADDSD, A_ADDSS: Result:=OptPass1OP(p); A_VMOVSD, A_VMOVSS, A_MOVSD, A_MOVSS: Result:=OptPass1MOVXX(p); A_SHRX, A_SHLX: Result:=OptPass1SHXX(p); A_VMOVDQA, A_VMOVDQU: Result:=OptPass1VMOVDQ(p); A_VCVTSS2SD, A_CVTSS2SD: Result:=OptPass1_V_Cvtss2sd(p); A_CLC, A_STC: Result:=OptPass1STCCLC(p); else ; end; end; else ; end; { If this flag is set, force another run of pass 1 even if p wasn't changed } if aoc_ForceNewIteration in OptsToCheck then begin Exclude(OptsToCheck, aoc_ForceNewIteration); if not Result then begin if (p.typ in SkipInstr) then begin UpdateUsedRegs(p); p := tai(p.Next); end else begin p := tai(p.Next); UpdateUsedRegs(p); end; Result := True; end; end; end; function TCPUAsmOptimizer.PeepHoleOptPass2Cpu(var p: tai): boolean; begin Result:=false; case p.Typ Of Ait_Instruction: begin if InsContainsSegRef(taicpu(p)) then exit; case taicpu(p).opcode Of A_ADD: Result:=OptPass2ADD(p); A_CMOVcc: Result:=OptPass2CMOVcc(p); A_CMP: Result:=OptPass2CMP(p); A_TEST: Result:=OptPass2TEST(p); A_Jcc: Result:=OptPass2Jcc(p); A_Lea: Result:=OptPass2Lea(p); A_FSTP,A_FISTP: Result:=OptPass1FSTP(p); A_IMUL: Result:=OptPass2Imul(p); A_JMP: Result:=OptPass2Jmp(p); A_MOV: Result:=OptPass2MOV(p); A_MOVZX: Result:=OptPass2Movx(p); A_SUB: Result:=OptPass2SUB(p); A_SETcc: Result:=OptPass2SETcc(p); A_CLC, A_STC: Result:=OptPass2STCCLC(p); else ; end; end; else ; end; { If this flag is set, force another run of pass 2 even if p wasn't changed (-O3 only), but otherwise move p ahead by 1 instruction and treat as if Result was set to True } if aoc_ForceNewIteration in OptsToCheck then begin Exclude(OptsToCheck, aoc_ForceNewIteration); if not Result then begin if (p.typ in SkipInstr) then begin UpdateUsedRegs(p); p := tai(p.Next); end else begin p := tai(p.Next); UpdateUsedRegs(p); end; Result := True; end; end; end; function TCPUAsmOptimizer.PostPeepHoleOptsCpu(var p : tai) : boolean; var hp1: tai; begin Result:=false; case p.Typ Of Ait_Instruction: begin if InsContainsSegRef(taicpu(p)) then Exit; case taicpu(p).opcode Of A_CALL: Result:=PostPeepHoleOptCall(p); A_LEA: Result:=PostPeepholeOptLea(p); A_CMP: Result:=PostPeepholeOptCmp(p); A_MOV: Result:=PostPeepholeOptMov(p); A_MOVZX: { if register vars are on, it's possible there is code like } { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" } { so we can't safely replace the movzx then with xor/mov, } { since that would change the flags (JM) } if PostPeepholeOptMovzx(p) then Result := True else if not(cs_opt_regvar in current_settings.optimizerswitches) then begin if (taicpu(p).oper[1]^.typ = top_reg) then if (taicpu(p).oper[0]^.typ = top_reg) then case taicpu(p).opsize of S_BL: begin if IsGP32Reg(taicpu(p).oper[1]^.reg) and not(cs_opt_size in current_settings.optimizerswitches) and (current_settings.optimizecputype = cpu_Pentium) then {Change "movzbl %reg1, %reg2" to "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and PentiumMMX} begin hp1 := taicpu.op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg); InsertLLItem(p.previous, p, hp1); taicpu(p).opcode := A_MOV; taicpu(p).changeopsize(S_B); setsubreg(taicpu(p).oper[1]^.reg,R_SUBL); Result := True; end; end; else ; end else if (taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and not(cs_opt_size in current_settings.optimizerswitches) and IsGP32Reg(taicpu(p).oper[1]^.reg) and (current_settings.optimizecputype = cpu_Pentium) and (taicpu(p).opsize = S_BL) then {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for Pentium and PentiumMMX} begin hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg); taicpu(p).opcode := A_MOV; taicpu(p).changeopsize(S_B); setsubreg(taicpu(p).oper[1]^.reg,R_SUBL); InsertLLItem(p.previous, p, hp1); Result := True; end; end; A_TEST, A_OR: Result:=PostPeepholeOptTestOr(p); A_AND: Result:=PostPeepholeOptAnd(p); A_MOVSX: Result:=PostPeepholeOptMOVSX(p); A_SHR: Result:=PostPeepholeOptShr(p); A_ADD, A_SUB: Result:=PostPeepholeOptADDSUB(p); A_XOR: Result:=PostPeepholeOptXor(p); A_RET: Result:=PostPeepholeOptRET(p); A_VPXOR: Result:=PostPeepholeOptVPXOR(p); else ; end; { Optimise any reference-type operands (if Result is True, the instruction will be checked on the next iteration) } if not Result then OptimizeRefs(taicpu(p)); end; else ; end; { If this flag is set, something was optimised ahead of p, so move ahead by 1 instruction but treat as if Result was set to True } if aoc_ForceNewIteration in OptsToCheck then begin Exclude(OptsToCheck, aoc_ForceNewIteration); if not Result then begin if (p.typ in SkipInstr) then UpdateUsedRegs(p); p := tai(p.Next); Result := True; end; end; end; begin casmoptimizer:=TCpuAsmOptimizer; end.