diff --git a/compiler/x86/aoptx86.pas b/compiler/x86/aoptx86.pas index 8463dfab72..add70a2be8 100644 --- a/compiler/x86/aoptx86.pas +++ b/compiler/x86/aoptx86.pas @@ -137,6 +137,8 @@ unit aoptx86; function DeepMOVOpt(const p_mov: taicpu; const hp: taicpu): Boolean; + function FuncMov2Func(var p: tai; const hp1: tai): Boolean; + procedure DebugMsg(const s : string; p : tai);inline; class function IsExitCode(p : tai) : boolean; static; @@ -2682,6 +2684,87 @@ unit aoptx86; end; + function TX86AsmOptimizer.FuncMov2Func(var p: tai; const hp1: tai): Boolean; + var + hp2: tai; + p_SourceReg, p_TargetReg: TRegister; + + begin + Result := False; + { Backward optimisation. If we have: + func. %reg1,%reg2 + mov %reg2,%reg3 + (dealloc %reg2) + + Change to: + func. %reg1,%reg3 (see comment below for what a valid func. is) + + Perform similar optimisations with 1, 3 and 4-operand instructions + that only have one output. + } + if MatchOpType(taicpu(p), top_reg, top_reg) then + begin + p_SourceReg := taicpu(p).oper[0]^.reg; + p_TargetReg := taicpu(p).oper[1]^.reg; + TransferUsedRegs(TmpUsedRegs); + if not RegUsedAfterInstruction(p_SourceReg, p, TmpUsedRegs) and + GetLastInstruction(p, hp2) and + (hp2.typ = ait_instruction) and + { Have to make sure it's an instruction that only reads from + the first operands and only writes (not reads or modifies) to + the last one; in essence, a pure function such as BSR, POPCNT + or ANDN } + ( + ( + (taicpu(hp2).ops = 1) and + (insprop[taicpu(hp2).opcode].Ch * [Ch_Wop1] = [Ch_Wop1]) + ) or + ( + (taicpu(hp2).ops = 2) and + (insprop[taicpu(hp2).opcode].Ch * [Ch_Rop1, Ch_Wop2] = [Ch_Rop1, Ch_Wop2]) + ) or + ( + (taicpu(hp2).ops = 3) and + (insprop[taicpu(hp2).opcode].Ch * [Ch_Rop1, Ch_Rop2, Ch_Wop3] = [Ch_Rop1, Ch_Rop2, Ch_Wop3]) + ) or + ( + (taicpu(hp2).ops = 4) and + (insprop[taicpu(hp2).opcode].Ch * [Ch_Rop1, Ch_Rop2, Ch_Rop3, Ch_Wop4] = [Ch_Rop1, Ch_Rop2, Ch_Rop3, Ch_Wop4]) + ) + ) and + (taicpu(hp2).oper[taicpu(hp2).ops-1]^.typ = top_reg) and + (taicpu(hp2).oper[taicpu(hp2).ops-1]^.reg = p_SourceReg) then + begin + case taicpu(hp2).opcode of + A_FSTSW, A_FNSTSW, + A_IN, A_INS, A_OUT, A_OUTS, + A_CMPS, A_LODS, A_MOVS, A_SCAS, A_STOS: + { These routines have explicit operands, but they are restricted in + what they can be (e.g. IN and OUT can only read from AL, AX or + EAX. } + ; + else + begin + DebugMsg(SPeepholeOptimization + 'Removed MOV and changed destination on previous instruction to optimise register usage (FuncMov2Func)', p); + taicpu(hp2).oper[taicpu(hp2).ops-1]^.reg := p_TargetReg; + + if not RegInInstruction(p_TargetReg, hp2) then + begin + { Since we're allocating from an earlier point, we + need to remove the register from the tracking } + ExcludeRegFromUsedRegs(p_TargetReg, TmpUsedRegs); + AllocRegBetween(p_TargetReg, hp2, p, TmpUsedRegs); + end; + RemoveCurrentp(p, hp1); + Result := True; + Exit; + end; + end; + end; + end; + end; + + function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean; var hp1, hp2, hp3: tai; @@ -4810,76 +4893,11 @@ unit aoptx86; end; end; - { Backward optimisation. If we have: - func. %reg1,%reg2 - mov %reg2,%reg3 - (dealloc %reg2) - - Change to: - func. %reg1,%reg3 (see comment below for what a valid func. is) - - Perform similar optimisations with 1, 3 and 4-operand instructions - that only have one output. - } - if MatchOpType(taicpu(p), top_reg, top_reg) then + { Backward optimisation shared with OptPass2MOV } + if FuncMov2Func(p, hp1) then begin - p_SourceReg := taicpu(p).oper[0]^.reg; - { Remember that p_TargetReg contains taicpu(p).oper[1]^.reg } - TransferUsedRegs(TmpUsedRegs); - if not RegUsedAfterInstruction(p_SourceReg, p, TmpUsedRegs) and - GetLastInstruction(p, hp2) and - (hp2.typ = ait_instruction) and - { Have to make sure it's an instruction that only reads from - the first operands and only writes (not reads or modifies) to - the last one; in essence, a pure function such as BSR, POPCNT - or ANDN } - ( - ( - (taicpu(hp2).ops = 1) and - (insprop[taicpu(hp2).opcode].Ch * [Ch_Wop1] = [Ch_Wop1]) - ) or - ( - (taicpu(hp2).ops = 2) and - (insprop[taicpu(hp2).opcode].Ch * [Ch_Rop1, Ch_Wop2] = [Ch_Rop1, Ch_Wop2]) - ) or - ( - (taicpu(hp2).ops = 3) and - (insprop[taicpu(hp2).opcode].Ch * [Ch_Rop1, Ch_Rop2, Ch_Wop3] = [Ch_Rop1, Ch_Rop2, Ch_Wop3]) - ) or - ( - (taicpu(hp2).ops = 4) and - (insprop[taicpu(hp2).opcode].Ch * [Ch_Rop1, Ch_Rop2, Ch_Rop3, Ch_Wop4] = [Ch_Rop1, Ch_Rop2, Ch_Rop3, Ch_Wop4]) - ) - ) and - (taicpu(hp2).oper[taicpu(hp2).ops-1]^.typ = top_reg) and - (taicpu(hp2).oper[taicpu(hp2).ops-1]^.reg = p_SourceReg) then - begin - case taicpu(hp2).opcode of - A_FSTSW, A_FNSTSW, - A_IN, A_INS, A_OUT, A_OUTS, - A_CMPS, A_LODS, A_MOVS, A_SCAS, A_STOS: - { These routines have explicit operands, but they are restricted in - what they can be (e.g. IN and OUT can only read from AL, AX or - EAX. } - ; - else - begin - DebugMsg(SPeepholeOptimization + 'Removed MOV and changed destination on previous instruction to optimise register usage (FuncMov2Func)', p); - taicpu(hp2).oper[taicpu(hp2).ops-1]^.reg := p_TargetReg; - - if not RegInInstruction(p_TargetReg, hp2) then - begin - { Since we're allocating from an earlier point, we - need to remove the register from the tracking } - ExcludeRegFromUsedRegs(p_TargetReg, TmpUsedRegs); - AllocRegBetween(p_TargetReg, hp2, p, TmpUsedRegs); - end; - RemoveCurrentp(p, hp1); - Result := True; - Exit; - end; - end; - end; + Result := True; + Exit; end; end; @@ -9240,6 +9258,12 @@ unit aoptx86; setsubreg(taicpu(hp3).oper[1]^.reg, R_SUBD); {$endif x86_64} end; + + if FuncMov2Func(p, hp1) then + begin + Result := True; + Exit; + end; end;