* Some streamlining in x86's OptPass1LEA routine now that the stack

pointer is properly tracked, which also permits some deeper
    optimisations on -O1 and -O2 virtually for free.
This commit is contained in:
J. Gareth "Curious Kit" Moreton 2022-02-08 04:56:36 +00:00 committed by FPK
parent 8120fb8b77
commit 305e49c0b8

View File

@ -4911,7 +4911,6 @@ unit aoptx86;
hp1: tai;
ref: Integer;
saveref: treference;
TempReg: TRegister;
Multiple: TCGInt;
begin
Result:=false;
@ -4970,13 +4969,15 @@ unit aoptx86;
end;
end;
{ Don't optimise if the stack or frame pointer is the destination register }
if (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG) or (taicpu(p).oper[1]^.reg=current_procinfo.framepointer) then
Exit;
if GetNextInstruction(p,hp1) and
(hp1.typ=ait_instruction) then
begin
if MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) and
MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
MatchOpType(Taicpu(hp1),top_reg,top_reg) and
(taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) then
MatchOpType(Taicpu(hp1),top_reg,top_reg) then
begin
TransferUsedRegs(TmpUsedRegs);
UpdateUsedRegs(TmpUsedRegs, tai(p.next));
@ -4995,335 +4996,332 @@ unit aoptx86;
<op> ...,<ref. with reg1>,...
to
<op> ...,<ref1>,... }
if (taicpu(p).oper[1]^.reg<>current_procinfo.framepointer) and
(taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) and
not(MatchInstruction(hp1,A_LEA,[])) then
{ find a reference which uses reg1 }
if (taicpu(hp1).ops>=1) and (taicpu(hp1).oper[0]^.typ=top_ref) and RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^) then
ref:=0
else if (taicpu(hp1).ops>=2) and (taicpu(hp1).oper[1]^.typ=top_ref) and RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^) then
ref:=1
else
ref:=-1;
if (ref<>-1) and
{ reg1 must be either the base or the index }
((taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg) xor (taicpu(hp1).oper[ref]^.ref^.index=taicpu(p).oper[1]^.reg)) then
begin
{ find a reference which uses reg1 }
if (taicpu(hp1).ops>=1) and (taicpu(hp1).oper[0]^.typ=top_ref) and RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^) then
ref:=0
else if (taicpu(hp1).ops>=2) and (taicpu(hp1).oper[1]^.typ=top_ref) and RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^) then
ref:=1
{ reg1 can be removed from the reference }
saveref:=taicpu(hp1).oper[ref]^.ref^;
if taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg then
taicpu(hp1).oper[ref]^.ref^.base:=NR_NO
else if taicpu(hp1).oper[ref]^.ref^.index=taicpu(p).oper[1]^.reg then
taicpu(hp1).oper[ref]^.ref^.index:=NR_NO
else
ref:=-1;
if (ref<>-1) and
{ reg1 must be either the base or the index }
((taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg) xor (taicpu(hp1).oper[ref]^.ref^.index=taicpu(p).oper[1]^.reg)) then
begin
{ reg1 can be removed from the reference }
saveref:=taicpu(hp1).oper[ref]^.ref^;
if taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg then
taicpu(hp1).oper[ref]^.ref^.base:=NR_NO
else if taicpu(hp1).oper[ref]^.ref^.index=taicpu(p).oper[1]^.reg then
taicpu(hp1).oper[ref]^.ref^.index:=NR_NO
else
Internalerror(2019111201);
{ check if the can insert all data of the lea into the second instruction }
if ((taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg) or (taicpu(hp1).oper[ref]^.ref^.scalefactor <= 1)) and
((taicpu(p).oper[0]^.ref^.base=NR_NO) or (taicpu(hp1).oper[ref]^.ref^.base=NR_NO)) and
((taicpu(p).oper[0]^.ref^.index=NR_NO) or (taicpu(hp1).oper[ref]^.ref^.index=NR_NO)) and
((taicpu(p).oper[0]^.ref^.symbol=nil) or (taicpu(hp1).oper[ref]^.ref^.symbol=nil)) and
((taicpu(p).oper[0]^.ref^.relsymbol=nil) or (taicpu(hp1).oper[ref]^.ref^.relsymbol=nil)) and
((taicpu(p).oper[0]^.ref^.scalefactor <= 1) or (taicpu(hp1).oper[ref]^.ref^.scalefactor <= 1)) and
{ Segment register of p.oper[0]^.ref will be NR_NO already }
(taicpu(hp1).oper[ref]^.ref^.segment=NR_NO)
Internalerror(2019111201);
{ check if the can insert all data of the lea into the second instruction }
if ((taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg) or (taicpu(hp1).oper[ref]^.ref^.scalefactor <= 1)) and
((taicpu(p).oper[0]^.ref^.base=NR_NO) or (taicpu(hp1).oper[ref]^.ref^.base=NR_NO)) and
((taicpu(p).oper[0]^.ref^.index=NR_NO) or (taicpu(hp1).oper[ref]^.ref^.index=NR_NO)) and
((taicpu(p).oper[0]^.ref^.symbol=nil) or (taicpu(hp1).oper[ref]^.ref^.symbol=nil)) and
((taicpu(p).oper[0]^.ref^.relsymbol=nil) or (taicpu(hp1).oper[ref]^.ref^.relsymbol=nil)) and
((taicpu(p).oper[0]^.ref^.scalefactor <= 1) or (taicpu(hp1).oper[ref]^.ref^.scalefactor <= 1)) and
(taicpu(p).oper[0]^.ref^.segment=NR_NO) and (taicpu(hp1).oper[ref]^.ref^.segment=NR_NO)
{$ifdef x86_64}
and (abs(taicpu(hp1).oper[ref]^.ref^.offset+taicpu(p).oper[0]^.ref^.offset)<=$7fffffff)
and (((taicpu(p).oper[0]^.ref^.base<>NR_RIP) and (taicpu(p).oper[0]^.ref^.index<>NR_RIP)) or
((taicpu(hp1).oper[ref]^.ref^.base=NR_NO) and (taicpu(hp1).oper[ref]^.ref^.index=NR_NO))
)
and (abs(taicpu(hp1).oper[ref]^.ref^.offset+taicpu(p).oper[0]^.ref^.offset)<=$7fffffff)
and (((taicpu(p).oper[0]^.ref^.base<>NR_RIP) and (taicpu(p).oper[0]^.ref^.index<>NR_RIP)) or
((taicpu(hp1).oper[ref]^.ref^.base=NR_NO) and (taicpu(hp1).oper[ref]^.ref^.index=NR_NO))
)
{$endif x86_64}
then
then
begin
{ reg1 might not used by the second instruction after it is remove from the reference }
if not(RegInInstruction(taicpu(p).oper[1]^.reg,taicpu(hp1))) then
begin
{ reg1 might not used by the second instruction after it is remove from the reference }
if not(RegInInstruction(taicpu(p).oper[1]^.reg,taicpu(hp1))) then
TransferUsedRegs(TmpUsedRegs);
UpdateUsedRegs(TmpUsedRegs, tai(p.next));
{ reg1 is not updated so it might not be used afterwards }
if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
begin
TransferUsedRegs(TmpUsedRegs);
UpdateUsedRegs(TmpUsedRegs, tai(p.next));
{ reg1 is not updated so it might not be used afterwards }
if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
begin
DebugMsg(SPeepholeOptimization + 'LeaOp2Op done',p);
if taicpu(p).oper[0]^.ref^.base<>NR_NO then
taicpu(hp1).oper[ref]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
if taicpu(p).oper[0]^.ref^.index<>NR_NO then
taicpu(hp1).oper[ref]^.ref^.index:=taicpu(p).oper[0]^.ref^.index;
if taicpu(p).oper[0]^.ref^.symbol<>nil then
taicpu(hp1).oper[ref]^.ref^.symbol:=taicpu(p).oper[0]^.ref^.symbol;
if taicpu(p).oper[0]^.ref^.relsymbol<>nil then
taicpu(hp1).oper[ref]^.ref^.relsymbol:=taicpu(p).oper[0]^.ref^.relsymbol;
if taicpu(p).oper[0]^.ref^.scalefactor > 1 then
taicpu(hp1).oper[ref]^.ref^.scalefactor:=taicpu(p).oper[0]^.ref^.scalefactor;
inc(taicpu(hp1).oper[ref]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
RemoveCurrentP(p, hp1);
result:=true;
exit;
end
end;
DebugMsg(SPeepholeOptimization + 'LeaOp2Op done',p);
if taicpu(p).oper[0]^.ref^.base<>NR_NO then
taicpu(hp1).oper[ref]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
if taicpu(p).oper[0]^.ref^.index<>NR_NO then
taicpu(hp1).oper[ref]^.ref^.index:=taicpu(p).oper[0]^.ref^.index;
if taicpu(p).oper[0]^.ref^.symbol<>nil then
taicpu(hp1).oper[ref]^.ref^.symbol:=taicpu(p).oper[0]^.ref^.symbol;
if taicpu(p).oper[0]^.ref^.relsymbol<>nil then
taicpu(hp1).oper[ref]^.ref^.relsymbol:=taicpu(p).oper[0]^.ref^.relsymbol;
if taicpu(p).oper[0]^.ref^.scalefactor > 1 then
taicpu(hp1).oper[ref]^.ref^.scalefactor:=taicpu(p).oper[0]^.ref^.scalefactor;
inc(taicpu(hp1).oper[ref]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
RemoveCurrentP(p, hp1);
result:=true;
exit;
end
end;
{ recover }
taicpu(hp1).oper[ref]^.ref^:=saveref;
end;
{ recover }
taicpu(hp1).oper[ref]^.ref^:=saveref;
end;
end;
{ for now, we do not mess with the stack pointer, thought it might be usefull to remove
unneeded lea sequences on the stack pointer, it needs to be tested in detail }
if (taicpu(p).oper[1]^.reg <> NR_STACK_POINTER_REG) and
GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[1]^.reg) then
begin
{ Check common LEA/LEA conditions }
if MatchInstruction(hp1,A_LEA,[taicpu(p).opsize]) and
(taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg) and
(taicpu(p).oper[0]^.ref^.relsymbol = nil) and
(taicpu(p).oper[0]^.ref^.symbol = nil) and
(taicpu(hp1).oper[0]^.ref^.relsymbol = nil) and
{ Since we're merging two LEA instructions, the segment registers don't matter }
(taicpu(hp1).oper[0]^.ref^.symbol = nil) and
(
(taicpu(p).oper[0]^.ref^.base = NR_NO) or { Don't call RegModifiedBetween unnecessarily }
not(RegModifiedBetween(taicpu(p).oper[0]^.ref^.base,p,hp1))
) and (
(taicpu(p).oper[0]^.ref^.index = taicpu(p).oper[0]^.ref^.base) or { Don't call RegModifiedBetween unnecessarily }
(taicpu(p).oper[0]^.ref^.index = NR_NO) or
not(RegModifiedBetween(taicpu(p).oper[0]^.ref^.index,p,hp1))
) then
if RegInInstruction(DestinationReg, hp1) or
{ Check further ahead (up to 2 instructions ahead for -O2) }
GetNextInstructionUsingReg(hp1,hp1,taicpu(p).oper[1]^.reg) then
begin
{ changes
lea (regX,scale), reg1
lea offset(reg1,reg1), reg1
to
lea offset(regX,scale*2), reg1
and
lea (regX,scale1), reg1
lea offset(reg1,scale2), reg1
to
lea offset(regX,scale1*scale2), reg1
... so long as the final scale does not exceed 8
(Similarly, allow the first instruction to be "lea (regX,regX),reg1")
}
if (taicpu(p).oper[0]^.ref^.offset = 0) and
(taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) and
{ Check common LEA/LEA conditions }
if MatchInstruction(hp1,A_LEA,[taicpu(p).opsize]) and
(taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg) and
(taicpu(p).oper[0]^.ref^.relsymbol = nil) and
(taicpu(p).oper[0]^.ref^.segment = NR_NO) and
(taicpu(p).oper[0]^.ref^.symbol = nil) and
(taicpu(hp1).oper[0]^.ref^.relsymbol = nil) and
(taicpu(hp1).oper[0]^.ref^.segment = NR_NO) and
(taicpu(hp1).oper[0]^.ref^.symbol = nil) and
(
(
(taicpu(p).oper[0]^.ref^.base = NR_NO)
) or (
(taicpu(p).oper[0]^.ref^.scalefactor <= 1) and
(
(taicpu(p).oper[0]^.ref^.base = taicpu(p).oper[0]^.ref^.index) and
not(RegUsedBetween(taicpu(p).oper[0]^.ref^.index, p, hp1))
)
)
) and (
(
{ lea (reg1,scale2), reg1 variant }
(taicpu(hp1).oper[0]^.ref^.base = NR_NO) and
(
(
(taicpu(p).oper[0]^.ref^.base = NR_NO) and
(taicpu(hp1).oper[0]^.ref^.scalefactor * taicpu(p).oper[0]^.ref^.scalefactor <= 8)
) or (
{ lea (regX,regX), reg1 variant }
(taicpu(p).oper[0]^.ref^.base <> NR_NO) and
(taicpu(hp1).oper[0]^.ref^.scalefactor <= 4)
)
)
) or (
{ lea (reg1,reg1), reg1 variant }
(taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) and
(taicpu(hp1).oper[0]^.ref^.scalefactor <= 1)
(taicpu(p).oper[0]^.ref^.base = NR_NO) or { Don't call RegModifiedBetween unnecessarily }
not(RegModifiedBetween(taicpu(p).oper[0]^.ref^.base,p,hp1))
) and (
(taicpu(p).oper[0]^.ref^.index = taicpu(p).oper[0]^.ref^.base) or { Don't call RegModifiedBetween unnecessarily }
(taicpu(p).oper[0]^.ref^.index = NR_NO) or
not(RegModifiedBetween(taicpu(p).oper[0]^.ref^.index,p,hp1))
)
) then
begin
DebugMsg(SPeepholeOptimization + 'LeaLea2Lea 2 done',p);
{ changes
lea (regX,scale), reg1
lea offset(reg1,reg1), reg1
to
lea offset(regX,scale*2), reg1
{ Make everything homogeneous to make calculations easier }
if (taicpu(p).oper[0]^.ref^.base <> NR_NO) then
begin
if taicpu(p).oper[0]^.ref^.index <> NR_NO then
{ Convert lea (regX,regX),reg1 to lea (regX,2),reg1 }
taicpu(p).oper[0]^.ref^.scalefactor := 2
else
taicpu(p).oper[0]^.ref^.index := taicpu(p).oper[0]^.ref^.base;
and
lea (regX,scale1), reg1
lea offset(reg1,scale2), reg1
to
lea offset(regX,scale1*scale2), reg1
taicpu(p).oper[0]^.ref^.base := NR_NO;
end;
... so long as the final scale does not exceed 8
if (taicpu(hp1).oper[0]^.ref^.base = NR_NO) then
begin
{ Just to prevent miscalculations }
if (taicpu(hp1).oper[0]^.ref^.scalefactor = 0) then
taicpu(hp1).oper[0]^.ref^.scalefactor := taicpu(p).oper[0]^.ref^.scalefactor
else
taicpu(hp1).oper[0]^.ref^.scalefactor := taicpu(hp1).oper[0]^.ref^.scalefactor * taicpu(p).oper[0]^.ref^.scalefactor;
end
else
begin
taicpu(hp1).oper[0]^.ref^.base := NR_NO;
taicpu(hp1).oper[0]^.ref^.scalefactor := taicpu(p).oper[0]^.ref^.scalefactor * 2;
end;
taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.ref^.index;
RemoveCurrentP(p);
result:=true;
exit;
end
{ changes
lea offset1(regX), reg1
lea offset2(reg1), reg1
to
lea offset1+offset2(regX), reg1 }
else if
(
(taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) and
(taicpu(p).oper[0]^.ref^.index = NR_NO)
) or (
(taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) and
(taicpu(hp1).oper[0]^.ref^.scalefactor <= 1) and
(
(Similarly, allow the first instruction to be "lea (regX,regX),reg1")
}
if (taicpu(p).oper[0]^.ref^.base<>NR_STACK_POINTER_REG) and { lea (%rsp,scale),reg is not a valid encoding }
(taicpu(p).oper[0]^.ref^.offset = 0) and
(taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) and
(
(taicpu(p).oper[0]^.ref^.index = NR_NO) or
(taicpu(p).oper[0]^.ref^.base = NR_NO)
) or (
(taicpu(p).oper[0]^.ref^.scalefactor <= 1) and
(
(taicpu(p).oper[0]^.ref^.index = NR_NO) or
(taicpu(p).oper[0]^.ref^.base = NR_NO)
) or (
(taicpu(p).oper[0]^.ref^.scalefactor <= 1) and
(
(taicpu(p).oper[0]^.ref^.base = taicpu(p).oper[0]^.ref^.index) and
not(RegUsedBetween(taicpu(p).oper[0]^.ref^.index, p, hp1))
)
)
) and (
(
{ lea (reg1,scale2), reg1 variant }
(taicpu(hp1).oper[0]^.ref^.base = NR_NO) and
(
(taicpu(p).oper[0]^.ref^.index = taicpu(p).oper[0]^.ref^.base) and
(
(taicpu(hp1).oper[0]^.ref^.index = NR_NO) or
(taicpu(hp1).oper[0]^.ref^.base = NR_NO)
(taicpu(p).oper[0]^.ref^.base = NR_NO) and
(taicpu(hp1).oper[0]^.ref^.scalefactor * taicpu(p).oper[0]^.ref^.scalefactor <= 8)
) or (
{ lea (regX,regX), reg1 variant }
(taicpu(p).oper[0]^.ref^.base <> NR_NO) and
(taicpu(hp1).oper[0]^.ref^.scalefactor <= 4)
)
)
) or (
{ lea (reg1,reg1), reg1 variant }
(taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) and
(taicpu(hp1).oper[0]^.ref^.scalefactor <= 1)
)
) then
begin
DebugMsg(SPeepholeOptimization + 'LeaLea2Lea 2 done',p);
{ Make everything homogeneous to make calculations easier }
if (taicpu(p).oper[0]^.ref^.base <> NR_NO) then
begin
if taicpu(p).oper[0]^.ref^.index <> NR_NO then
{ Convert lea (regX,regX),reg1 to lea (regX,2),reg1 }
taicpu(p).oper[0]^.ref^.scalefactor := 2
else
taicpu(p).oper[0]^.ref^.index := taicpu(p).oper[0]^.ref^.base;
taicpu(p).oper[0]^.ref^.base := NR_NO;
end;
if (taicpu(hp1).oper[0]^.ref^.base = NR_NO) then
begin
{ Just to prevent miscalculations }
if (taicpu(hp1).oper[0]^.ref^.scalefactor = 0) then
taicpu(hp1).oper[0]^.ref^.scalefactor := taicpu(p).oper[0]^.ref^.scalefactor
else
taicpu(hp1).oper[0]^.ref^.scalefactor := taicpu(hp1).oper[0]^.ref^.scalefactor * taicpu(p).oper[0]^.ref^.scalefactor;
end
else
begin
taicpu(hp1).oper[0]^.ref^.base := NR_NO;
taicpu(hp1).oper[0]^.ref^.scalefactor := taicpu(p).oper[0]^.ref^.scalefactor * 2;
end;
taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.ref^.index;
RemoveCurrentP(p);
result:=true;
exit;
end
{ changes
lea offset1(regX), reg1
lea offset2(reg1), reg1
to
lea offset1+offset2(regX), reg1 }
else if
(
(taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) and
(taicpu(p).oper[0]^.ref^.index = NR_NO)
) or (
(taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) and
(taicpu(hp1).oper[0]^.ref^.scalefactor <= 1) and
(
(
(taicpu(p).oper[0]^.ref^.index = NR_NO) or
(taicpu(p).oper[0]^.ref^.base = NR_NO)
) or (
(taicpu(p).oper[0]^.ref^.scalefactor <= 1) and
(
(taicpu(p).oper[0]^.ref^.index = NR_NO) or
(
(taicpu(p).oper[0]^.ref^.index = taicpu(p).oper[0]^.ref^.base) and
(
(taicpu(hp1).oper[0]^.ref^.index = NR_NO) or
(taicpu(hp1).oper[0]^.ref^.base = NR_NO)
)
)
)
)
)
)
)
) then
begin
DebugMsg(SPeepholeOptimization + 'LeaLea2Lea 1 done',p);
if taicpu(hp1).oper[0]^.ref^.index=taicpu(p).oper[1]^.reg then
) then
begin
taicpu(hp1).oper[0]^.ref^.index:=taicpu(p).oper[0]^.ref^.base;
inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset*max(taicpu(hp1).oper[0]^.ref^.scalefactor,1));
{ if the register is used as index and base, we have to increase for base as well
and adapt base }
if taicpu(hp1).oper[0]^.ref^.base=taicpu(p).oper[1]^.reg then
DebugMsg(SPeepholeOptimization + 'LeaLea2Lea 1 done',p);
if taicpu(hp1).oper[0]^.ref^.index=taicpu(p).oper[1]^.reg then
begin
taicpu(hp1).oper[0]^.ref^.index:=taicpu(p).oper[0]^.ref^.base;
inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset*max(taicpu(hp1).oper[0]^.ref^.scalefactor,1));
{ if the register is used as index and base, we have to increase for base as well
and adapt base }
if taicpu(hp1).oper[0]^.ref^.base=taicpu(p).oper[1]^.reg then
begin
taicpu(hp1).oper[0]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
end;
end
else
begin
taicpu(hp1).oper[0]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
taicpu(hp1).oper[0]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
end;
end
else
begin
inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
taicpu(hp1).oper[0]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
if taicpu(p).oper[0]^.ref^.index<>NR_NO then
begin
taicpu(hp1).oper[0]^.ref^.base:=taicpu(hp1).oper[0]^.ref^.index;
taicpu(hp1).oper[0]^.ref^.index:=taicpu(p).oper[0]^.ref^.index;
taicpu(hp1).oper[0]^.ref^.scalefactor:=taicpu(p).oper[0]^.ref^.scalefactor;
end;
RemoveCurrentP(p);
result:=true;
exit;
end;
if taicpu(p).oper[0]^.ref^.index<>NR_NO then
begin
taicpu(hp1).oper[0]^.ref^.base:=taicpu(hp1).oper[0]^.ref^.index;
taicpu(hp1).oper[0]^.ref^.index:=taicpu(p).oper[0]^.ref^.index;
taicpu(hp1).oper[0]^.ref^.scalefactor:=taicpu(p).oper[0]^.ref^.scalefactor;
end;
RemoveCurrentP(p);
result:=true;
exit;
end;
end;
{ Change:
leal/q $x(%reg1),%reg2
...
shll/q $y,%reg2
To:
leal/q $(x+2^y)(%reg1,2^y),%reg2 (if y <= 3)
}
if MatchInstruction(hp1, A_SHL, [taicpu(p).opsize]) and
MatchOpType(taicpu(hp1), top_const, top_reg) and
(taicpu(hp1).oper[0]^.val <= 3) then
begin
Multiple := 1 shl taicpu(hp1).oper[0]^.val;
TransferUsedRegs(TmpUsedRegs);
UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
TempReg := taicpu(hp1).oper[1]^.reg; { Store locally to reduce the number of dereferences }
if
{ This allows the optimisation in some circumstances even if the lea instruction already has a scale factor
(this works even if scalefactor is zero) }
((Multiple * taicpu(p).oper[0]^.ref^.scalefactor) <= 8) and
{ Ensure offset doesn't go out of bounds }
(abs(taicpu(p).oper[0]^.ref^.offset * Multiple) <= $7FFFFFFF) and
not (RegInUsedRegs(NR_DEFAULTFLAGS,TmpUsedRegs)) and
MatchOperand(taicpu(p).oper[1]^, TempReg) and
(
(
not SuperRegistersEqual(taicpu(p).oper[0]^.ref^.base, TempReg) and
(
(taicpu(p).oper[0]^.ref^.index = NR_NO) or
(taicpu(p).oper[0]^.ref^.index = NR_INVALID) or
(
{ Check for lea $x(%reg1,%reg1),%reg2 and treat as it it were lea $x(%reg1,2),%reg2 }
(taicpu(p).oper[0]^.ref^.index = taicpu(p).oper[0]^.ref^.base) and
(taicpu(p).oper[0]^.ref^.scalefactor <= 1)
)
)
) or (
(
(taicpu(p).oper[0]^.ref^.base = NR_NO) or
(taicpu(p).oper[0]^.ref^.base = NR_INVALID)
) and
not SuperRegistersEqual(taicpu(p).oper[0]^.ref^.index, TempReg)
)
) then
{ Change:
leal/q $x(%reg1),%reg2
...
shll/q $y,%reg2
To:
leal/q $(x+2^y)(%reg1,2^y),%reg2 (if y <= 3)
}
if (taicpu(p).oper[0]^.ref^.base<>NR_STACK_POINTER_REG) and { lea (%rsp,scale),reg is not a valid encoding }
MatchInstruction(hp1, A_SHL, [taicpu(p).opsize]) and
MatchOpType(taicpu(hp1), top_const, top_reg) and
(taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) and
(taicpu(hp1).oper[0]^.val <= 3) then
begin
repeat
with taicpu(p).oper[0]^.ref^ do
begin
{ Convert lea $x(%reg1,%reg1),%reg2 to lea $x(%reg1,2),%reg2 }
if index = base then
begin
if Multiple > 4 then
{ Optimisation will no longer work because resultant
scale factor will exceed 8 }
Break;
Multiple := 1 shl taicpu(hp1).oper[0]^.val;
TransferUsedRegs(TmpUsedRegs);
UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
base := NR_NO;
scalefactor := 2;
DebugMsg(SPeepholeOptimization + 'lea $x(%reg1,%reg1),%reg2 -> lea $x(%reg1,2),%reg2 for following optimisation', p);
end
else if (base <> NR_NO) and (base <> NR_INVALID) then
if
{ This allows the optimisation in some circumstances even if the lea instruction already has a scale factor
(this works even if scalefactor is zero) }
((Multiple * taicpu(p).oper[0]^.ref^.scalefactor) <= 8) and
{ Ensure offset doesn't go out of bounds }
(abs(taicpu(p).oper[0]^.ref^.offset * Multiple) <= $7FFFFFFF) and
not (RegInUsedRegs(NR_DEFAULTFLAGS,TmpUsedRegs)) and
(
(
not SuperRegistersEqual(taicpu(p).oper[0]^.ref^.base, taicpu(p).oper[1]^.reg) and
(
(taicpu(p).oper[0]^.ref^.index = NR_NO) or
(taicpu(p).oper[0]^.ref^.index = NR_INVALID) or
(
{ Check for lea $x(%reg1,%reg1),%reg2 and treat as it it were lea $x(%reg1,2),%reg2 }
(taicpu(p).oper[0]^.ref^.index = taicpu(p).oper[0]^.ref^.base) and
(taicpu(p).oper[0]^.ref^.scalefactor <= 1)
)
)
) or (
(
(taicpu(p).oper[0]^.ref^.base = NR_NO) or
(taicpu(p).oper[0]^.ref^.base = NR_INVALID)
) and
not SuperRegistersEqual(taicpu(p).oper[0]^.ref^.index, taicpu(p).oper[1]^.reg)
)
) then
begin
repeat
with taicpu(p).oper[0]^.ref^ do
begin
{ Scale factor only works on the index register }
index := base;
base := NR_NO;
{ Convert lea $x(%reg1,%reg1),%reg2 to lea $x(%reg1,2),%reg2 }
if index = base then
begin
if Multiple > 4 then
{ Optimisation will no longer work because resultant
scale factor will exceed 8 }
Break;
base := NR_NO;
scalefactor := 2;
DebugMsg(SPeepholeOptimization + 'lea $x(%reg1,%reg1),%reg2 -> lea $x(%reg1,2),%reg2 for following optimisation', p);
end
else if (base <> NR_NO) and (base <> NR_INVALID) then
begin
{ Scale factor only works on the index register }
index := base;
base := NR_NO;
end;
{ For safety }
if scalefactor <= 1 then
begin
DebugMsg(SPeepholeOptimization + 'LeaShl2Lea 1', p);
scalefactor := Multiple;
end
else
begin
DebugMsg(SPeepholeOptimization + 'LeaShl2Lea 2', p);
scalefactor := scalefactor * Multiple;
end;
offset := offset * Multiple;
end;
{ For safety }
if scalefactor <= 1 then
begin
DebugMsg(SPeepholeOptimization + 'LeaShl2Lea 1', p);
scalefactor := Multiple;
end
else
begin
DebugMsg(SPeepholeOptimization + 'LeaShl2Lea 2', p);
scalefactor := scalefactor * Multiple;
end;
offset := offset * Multiple;
end;
RemoveInstruction(hp1);
Result := True;
Exit;
{ This repeat..until loop exists for the benefit of Break }
until True;
RemoveInstruction(hp1);
Result := True;
Exit;
{ This repeat..until loop exists for the benefit of Break }
until True;
end;
end;
end;
end;