mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-05 05:58:02 +02:00
487 lines
16 KiB
ObjectPascal
487 lines
16 KiB
ObjectPascal
{
|
|
Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
|
|
|
|
This unit contains the peephole optimizer for i386
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
|
|
****************************************************************************
|
|
}
|
|
|
|
unit aoptcpu;
|
|
|
|
{$i fpcdefs.inc}
|
|
|
|
{$ifdef EXTDEBUG}
|
|
{$define DEBUG_AOPTCPU}
|
|
{$endif EXTDEBUG}
|
|
|
|
Interface
|
|
|
|
uses
|
|
cgbase,
|
|
cpubase, aopt, aoptx86,
|
|
Aasmbase,aasmtai,aasmdata;
|
|
|
|
Type
|
|
TCpuAsmOptimizer = class(TX86AsmOptimizer)
|
|
function PrePeepHoleOptsCpu(var p: tai): boolean; override;
|
|
function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
|
|
function PeepHoleOptPass2Cpu(var p: tai): boolean; override;
|
|
function PostPeepHoleOptsCpu(var p : tai) : boolean; override;
|
|
end;
|
|
|
|
Var
|
|
AsmOptimizer : TCpuAsmOptimizer;
|
|
|
|
Implementation
|
|
|
|
uses
|
|
verbose,globtype,globals,
|
|
cpuinfo,
|
|
aasmcpu,
|
|
aoptutils,
|
|
aasmcfi,
|
|
procinfo,
|
|
cgutils,
|
|
{ units we should get rid off: }
|
|
symsym,symconst;
|
|
|
|
|
|
{ Checks if the register is a 32 bit general purpose register }
|
|
function isgp32reg(reg: TRegister): boolean;
|
|
begin
|
|
{$push}{$warnings off}
|
|
isgp32reg:=(getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)>=RS_EAX) and (getsupreg(reg)<=RS_EBX);
|
|
{$pop}
|
|
end;
|
|
|
|
|
|
{ returns true if p contains a memory operand with a segment set }
|
|
function InsContainsSegRef(p: taicpu): boolean;
|
|
var
|
|
i: longint;
|
|
begin
|
|
result:=true;
|
|
for i:=0 to p.opercnt-1 do
|
|
if (p.oper[i]^.typ=top_ref) and
|
|
(p.oper[i]^.ref^.segment<>NR_NO) then
|
|
exit;
|
|
result:=false;
|
|
end;
|
|
|
|
|
|
function TCPUAsmOPtimizer.PrePeepHoleOptsCpu(var p: tai): boolean;
|
|
begin
|
|
repeat
|
|
Result:=False;
|
|
case p.typ of
|
|
ait_instruction:
|
|
begin
|
|
if InsContainsSegRef(taicpu(p)) then
|
|
begin
|
|
p := tai(p.next);
|
|
{ Nothing's actually changed, so no need to set Result to True,
|
|
but try again to see if an instruction immediately follows }
|
|
Continue;
|
|
end;
|
|
case taicpu(p).opcode Of
|
|
A_IMUL:
|
|
Result:=PrePeepholeOptIMUL(p);
|
|
A_SAR,A_SHR:
|
|
Result:=PrePeepholeOptSxx(p);
|
|
A_AND:
|
|
Result:=PrePeepholeOptAND(p);
|
|
A_XOR:
|
|
begin
|
|
if (taicpu(p).oper[0]^.typ = top_reg) and
|
|
(taicpu(p).oper[1]^.typ = top_reg) and
|
|
(taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
|
|
{ temporarily change this to 'mov reg,0' to make it easier }
|
|
{ for the CSE. Will be changed back in pass 2 }
|
|
begin
|
|
taicpu(p).opcode := A_MOV;
|
|
taicpu(p).loadConst(0,0);
|
|
Result:=true;
|
|
end;
|
|
end;
|
|
else
|
|
{ Do nothing };
|
|
end;
|
|
end;
|
|
else
|
|
{ Do nothing };
|
|
end;
|
|
Break;
|
|
until False;
|
|
|
|
{ If this flag is set, something was optimised ahead of p, so move
|
|
ahead by 1 instruction but treat as if Result was set to True }
|
|
if aoc_ForceNewIteration in OptsToCheck then
|
|
begin
|
|
Exclude(OptsToCheck, aoc_ForceNewIteration);
|
|
|
|
if not Result then
|
|
begin
|
|
if (p.typ in SkipInstr) then
|
|
UpdateUsedRegs(p);
|
|
|
|
p := tai(p.Next);
|
|
Result := True;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
|
|
function TCPUAsmOPtimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
|
|
var
|
|
hp1 : tai;
|
|
begin
|
|
result:=False;
|
|
case p.Typ Of
|
|
ait_instruction:
|
|
begin
|
|
current_filepos:=taicpu(p).fileinfo;
|
|
if InsContainsSegRef(taicpu(p)) then
|
|
exit;
|
|
case taicpu(p).opcode Of
|
|
A_ADD:
|
|
Result:=OptPass1ADD(p);
|
|
A_AND:
|
|
Result:=OptPass1And(p);
|
|
A_CMOVcc:
|
|
Result:=OptPass1CMOVcc(p);
|
|
A_IMUL:
|
|
Result:=OptPass1Imul(p);
|
|
A_CMP:
|
|
Result:=OptPass1Cmp(p);
|
|
A_VPXORD,
|
|
A_VPXORQ,
|
|
A_VXORPS,
|
|
A_VXORPD,
|
|
A_VPXOR:
|
|
Result:=OptPass1VPXor(p);
|
|
A_XORPS,
|
|
A_XORPD,
|
|
A_PXOR:
|
|
Result:=OptPass1PXor(p);
|
|
A_FLD:
|
|
Result:=OptPass1FLD(p);
|
|
A_FSTP,A_FISTP:
|
|
Result:=OptPass1FSTP(p);
|
|
A_LEA:
|
|
Result:=OptPass1LEA(p);
|
|
A_MOV:
|
|
Result:=OptPass1MOV(p);
|
|
A_MOVSX,
|
|
A_MOVZX :
|
|
Result:=OptPass1Movx(p);
|
|
A_TEST:
|
|
Result:=OptPass1Test(p);
|
|
A_PUSH:
|
|
begin
|
|
if (taicpu(p).opsize = S_W) and
|
|
(taicpu(p).oper[0]^.typ = Top_Const) and
|
|
GetNextInstruction(p, hp1) and
|
|
(tai(hp1).typ = ait_instruction) and
|
|
(taicpu(hp1).opcode = A_PUSH) and
|
|
(taicpu(hp1).oper[0]^.typ = Top_Const) and
|
|
(taicpu(hp1).opsize = S_W) then
|
|
begin
|
|
taicpu(p).changeopsize(S_L);
|
|
taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
|
|
asml.remove(hp1);
|
|
hp1.free;
|
|
Result:=true;
|
|
end;
|
|
end;
|
|
A_SHL, A_SAL:
|
|
Result:=OptPass1SHLSAL(p);
|
|
A_SHR:
|
|
Result:=OptPass1SHR(p);
|
|
A_SUB:
|
|
Result:=OptPass1Sub(p);
|
|
A_Jcc:
|
|
Result:=OptPass1Jcc(p);
|
|
A_MOVDQA,
|
|
A_MOVAPD,
|
|
A_MOVAPS,
|
|
A_MOVUPD,
|
|
A_MOVUPS,
|
|
A_VMOVAPS,
|
|
A_VMOVAPD,
|
|
A_VMOVUPS,
|
|
A_VMOVUPD:
|
|
Result:=OptPass1_V_MOVAP(p);
|
|
A_VDIVSD,
|
|
A_VDIVSS,
|
|
A_VSUBSD,
|
|
A_VSUBSS,
|
|
A_VMULSD,
|
|
A_VMULSS,
|
|
A_VADDSD,
|
|
A_VADDSS,
|
|
A_VANDPD,
|
|
A_VANDPS,
|
|
A_VORPD,
|
|
A_VORPS:
|
|
Result:=OptPass1VOP(p);
|
|
A_MULSD,
|
|
A_MULSS,
|
|
A_ADDSD,
|
|
A_ADDSS:
|
|
Result:=OptPass1OP(p);
|
|
A_VMOVSD,
|
|
A_VMOVSS,
|
|
A_MOVSD,
|
|
A_MOVSS:
|
|
Result:=OptPass1MOVXX(p);
|
|
A_SHRX,
|
|
A_SHLX:
|
|
Result:=OptPass1SHXX(p);
|
|
A_VMOVDQA,
|
|
A_VMOVDQU:
|
|
Result:=OptPass1VMOVDQ(p);
|
|
A_VCVTSS2SD,
|
|
A_CVTSS2SD:
|
|
Result:=OptPass1_V_Cvtss2sd(p);
|
|
A_CLC,
|
|
A_STC:
|
|
Result:=OptPass1STCCLC(p);
|
|
else
|
|
;
|
|
end;
|
|
end;
|
|
else
|
|
;
|
|
end;
|
|
{ If this flag is set, force another run of pass 1 even if p wasn't
|
|
changed }
|
|
if aoc_ForceNewIteration in OptsToCheck then
|
|
begin
|
|
Exclude(OptsToCheck, aoc_ForceNewIteration);
|
|
|
|
if not Result then
|
|
begin
|
|
if (p.typ in SkipInstr) then
|
|
begin
|
|
UpdateUsedRegs(p);
|
|
p := tai(p.Next);
|
|
end
|
|
else
|
|
begin
|
|
p := tai(p.Next);
|
|
UpdateUsedRegs(p);
|
|
end;
|
|
Result := True;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
|
|
function TCPUAsmOptimizer.PeepHoleOptPass2Cpu(var p: tai): boolean;
|
|
begin
|
|
Result:=false;
|
|
case p.Typ Of
|
|
Ait_Instruction:
|
|
begin
|
|
if InsContainsSegRef(taicpu(p)) then
|
|
exit;
|
|
case taicpu(p).opcode Of
|
|
A_ADD:
|
|
Result:=OptPass2ADD(p);
|
|
A_CMOVcc:
|
|
Result:=OptPass2CMOVcc(p);
|
|
A_CMP:
|
|
Result:=OptPass2CMP(p);
|
|
A_TEST:
|
|
Result:=OptPass2TEST(p);
|
|
A_Jcc:
|
|
Result:=OptPass2Jcc(p);
|
|
A_Lea:
|
|
Result:=OptPass2Lea(p);
|
|
A_FSTP,A_FISTP:
|
|
Result:=OptPass1FSTP(p);
|
|
A_IMUL:
|
|
Result:=OptPass2Imul(p);
|
|
A_JMP:
|
|
Result:=OptPass2Jmp(p);
|
|
A_MOV:
|
|
Result:=OptPass2MOV(p);
|
|
A_MOVZX:
|
|
Result:=OptPass2Movx(p);
|
|
A_SUB:
|
|
Result:=OptPass2SUB(p);
|
|
A_SETcc:
|
|
Result:=OptPass2SETcc(p);
|
|
A_CLC,
|
|
A_STC:
|
|
Result:=OptPass2STCCLC(p);
|
|
else
|
|
;
|
|
end;
|
|
end;
|
|
else
|
|
;
|
|
end;
|
|
{ If this flag is set, force another run of pass 2 even if p wasn't
|
|
changed (-O3 only), but otherwise move p ahead by 1 instruction
|
|
and treat as if Result was set to True }
|
|
if aoc_ForceNewIteration in OptsToCheck then
|
|
begin
|
|
Exclude(OptsToCheck, aoc_ForceNewIteration);
|
|
|
|
if not Result then
|
|
begin
|
|
if (p.typ in SkipInstr) then
|
|
begin
|
|
UpdateUsedRegs(p);
|
|
p := tai(p.Next);
|
|
end
|
|
else
|
|
begin
|
|
p := tai(p.Next);
|
|
UpdateUsedRegs(p);
|
|
end;
|
|
Result := True;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
|
|
function TCPUAsmOptimizer.PostPeepHoleOptsCpu(var p : tai) : boolean;
|
|
var
|
|
hp1: tai;
|
|
begin
|
|
Result:=false;
|
|
case p.Typ Of
|
|
Ait_Instruction:
|
|
begin
|
|
if InsContainsSegRef(taicpu(p)) then
|
|
Exit;
|
|
case taicpu(p).opcode Of
|
|
A_CALL:
|
|
Result:=PostPeepHoleOptCall(p);
|
|
A_LEA:
|
|
Result:=PostPeepholeOptLea(p);
|
|
A_CMP:
|
|
Result:=PostPeepholeOptCmp(p);
|
|
A_MOV:
|
|
Result:=PostPeepholeOptMov(p);
|
|
A_MOVZX:
|
|
{ if register vars are on, it's possible there is code like }
|
|
{ "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
|
|
{ so we can't safely replace the movzx then with xor/mov, }
|
|
{ since that would change the flags (JM) }
|
|
if PostPeepholeOptMovzx(p) then
|
|
Result := True
|
|
else if not(cs_opt_regvar in current_settings.optimizerswitches) then
|
|
begin
|
|
if (taicpu(p).oper[1]^.typ = top_reg) then
|
|
if (taicpu(p).oper[0]^.typ = top_reg)
|
|
then
|
|
case taicpu(p).opsize of
|
|
S_BL:
|
|
begin
|
|
if IsGP32Reg(taicpu(p).oper[1]^.reg) and
|
|
not(cs_opt_size in current_settings.optimizerswitches) and
|
|
(current_settings.optimizecputype = cpu_Pentium) then
|
|
{Change "movzbl %reg1, %reg2" to
|
|
"xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
|
|
PentiumMMX}
|
|
begin
|
|
hp1 := taicpu.op_reg_reg(A_XOR, S_L,
|
|
taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
|
|
InsertLLItem(p.previous, p, hp1);
|
|
taicpu(p).opcode := A_MOV;
|
|
taicpu(p).changeopsize(S_B);
|
|
setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
|
|
Result := True;
|
|
end;
|
|
end;
|
|
else
|
|
;
|
|
end
|
|
else if (taicpu(p).oper[0]^.typ = top_ref) and
|
|
(taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
|
|
(taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
|
|
not(cs_opt_size in current_settings.optimizerswitches) and
|
|
IsGP32Reg(taicpu(p).oper[1]^.reg) and
|
|
(current_settings.optimizecputype = cpu_Pentium) and
|
|
(taicpu(p).opsize = S_BL) then
|
|
{changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
|
|
Pentium and PentiumMMX}
|
|
begin
|
|
hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
|
|
taicpu(p).oper[1]^.reg);
|
|
taicpu(p).opcode := A_MOV;
|
|
taicpu(p).changeopsize(S_B);
|
|
setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
|
|
InsertLLItem(p.previous, p, hp1);
|
|
Result := True;
|
|
end;
|
|
end;
|
|
A_TEST, A_OR:
|
|
Result:=PostPeepholeOptTestOr(p);
|
|
A_AND:
|
|
Result:=PostPeepholeOptAnd(p);
|
|
A_MOVSX:
|
|
Result:=PostPeepholeOptMOVSX(p);
|
|
A_SHR:
|
|
Result:=PostPeepholeOptShr(p);
|
|
A_ADD,
|
|
A_SUB:
|
|
Result:=PostPeepholeOptADDSUB(p);
|
|
A_XOR:
|
|
Result:=PostPeepholeOptXor(p);
|
|
A_RET:
|
|
Result:=PostPeepholeOptRET(p);
|
|
A_VPXOR:
|
|
Result:=PostPeepholeOptVPXOR(p);
|
|
else
|
|
;
|
|
end;
|
|
|
|
{ Optimise any reference-type operands (if Result is True, the
|
|
instruction will be checked on the next iteration) }
|
|
if not Result then
|
|
OptimizeRefs(taicpu(p));
|
|
end;
|
|
else
|
|
;
|
|
end;
|
|
{ If this flag is set, something was optimised ahead of p, so move
|
|
ahead by 1 instruction but treat as if Result was set to True }
|
|
if aoc_ForceNewIteration in OptsToCheck then
|
|
begin
|
|
Exclude(OptsToCheck, aoc_ForceNewIteration);
|
|
|
|
if not Result then
|
|
begin
|
|
if (p.typ in SkipInstr) then
|
|
UpdateUsedRegs(p);
|
|
|
|
p := tai(p.Next);
|
|
Result := True;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
|
|
begin
|
|
casmoptimizer:=TCpuAsmOptimizer;
|
|
end.
|
|
|