mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-09-06 13:50:20 +02:00
* Some streamlining in x86's OptPass1LEA routine now that the stack
pointer is properly tracked, which also permits some deeper optimisations on -O1 and -O2 virtually for free.
This commit is contained in:
parent
8120fb8b77
commit
305e49c0b8
@ -4911,7 +4911,6 @@ unit aoptx86;
|
||||
hp1: tai;
|
||||
ref: Integer;
|
||||
saveref: treference;
|
||||
TempReg: TRegister;
|
||||
Multiple: TCGInt;
|
||||
begin
|
||||
Result:=false;
|
||||
@ -4970,13 +4969,15 @@ unit aoptx86;
|
||||
end;
|
||||
end;
|
||||
|
||||
{ Don't optimise if the stack or frame pointer is the destination register }
|
||||
if (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG) or (taicpu(p).oper[1]^.reg=current_procinfo.framepointer) then
|
||||
Exit;
|
||||
if GetNextInstruction(p,hp1) and
|
||||
(hp1.typ=ait_instruction) then
|
||||
begin
|
||||
if MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) and
|
||||
MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
|
||||
MatchOpType(Taicpu(hp1),top_reg,top_reg) and
|
||||
(taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) then
|
||||
MatchOpType(Taicpu(hp1),top_reg,top_reg) then
|
||||
begin
|
||||
TransferUsedRegs(TmpUsedRegs);
|
||||
UpdateUsedRegs(TmpUsedRegs, tai(p.next));
|
||||
@ -4995,335 +4996,332 @@ unit aoptx86;
|
||||
<op> ...,<ref. with reg1>,...
|
||||
to
|
||||
<op> ...,<ref1>,... }
|
||||
if (taicpu(p).oper[1]^.reg<>current_procinfo.framepointer) and
|
||||
(taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) and
|
||||
not(MatchInstruction(hp1,A_LEA,[])) then
|
||||
|
||||
{ find a reference which uses reg1 }
|
||||
if (taicpu(hp1).ops>=1) and (taicpu(hp1).oper[0]^.typ=top_ref) and RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^) then
|
||||
ref:=0
|
||||
else if (taicpu(hp1).ops>=2) and (taicpu(hp1).oper[1]^.typ=top_ref) and RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^) then
|
||||
ref:=1
|
||||
else
|
||||
ref:=-1;
|
||||
if (ref<>-1) and
|
||||
{ reg1 must be either the base or the index }
|
||||
((taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg) xor (taicpu(hp1).oper[ref]^.ref^.index=taicpu(p).oper[1]^.reg)) then
|
||||
begin
|
||||
{ find a reference which uses reg1 }
|
||||
if (taicpu(hp1).ops>=1) and (taicpu(hp1).oper[0]^.typ=top_ref) and RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^) then
|
||||
ref:=0
|
||||
else if (taicpu(hp1).ops>=2) and (taicpu(hp1).oper[1]^.typ=top_ref) and RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^) then
|
||||
ref:=1
|
||||
{ reg1 can be removed from the reference }
|
||||
saveref:=taicpu(hp1).oper[ref]^.ref^;
|
||||
if taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg then
|
||||
taicpu(hp1).oper[ref]^.ref^.base:=NR_NO
|
||||
else if taicpu(hp1).oper[ref]^.ref^.index=taicpu(p).oper[1]^.reg then
|
||||
taicpu(hp1).oper[ref]^.ref^.index:=NR_NO
|
||||
else
|
||||
ref:=-1;
|
||||
if (ref<>-1) and
|
||||
{ reg1 must be either the base or the index }
|
||||
((taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg) xor (taicpu(hp1).oper[ref]^.ref^.index=taicpu(p).oper[1]^.reg)) then
|
||||
begin
|
||||
{ reg1 can be removed from the reference }
|
||||
saveref:=taicpu(hp1).oper[ref]^.ref^;
|
||||
if taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg then
|
||||
taicpu(hp1).oper[ref]^.ref^.base:=NR_NO
|
||||
else if taicpu(hp1).oper[ref]^.ref^.index=taicpu(p).oper[1]^.reg then
|
||||
taicpu(hp1).oper[ref]^.ref^.index:=NR_NO
|
||||
else
|
||||
Internalerror(2019111201);
|
||||
{ check if the can insert all data of the lea into the second instruction }
|
||||
if ((taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg) or (taicpu(hp1).oper[ref]^.ref^.scalefactor <= 1)) and
|
||||
((taicpu(p).oper[0]^.ref^.base=NR_NO) or (taicpu(hp1).oper[ref]^.ref^.base=NR_NO)) and
|
||||
((taicpu(p).oper[0]^.ref^.index=NR_NO) or (taicpu(hp1).oper[ref]^.ref^.index=NR_NO)) and
|
||||
((taicpu(p).oper[0]^.ref^.symbol=nil) or (taicpu(hp1).oper[ref]^.ref^.symbol=nil)) and
|
||||
((taicpu(p).oper[0]^.ref^.relsymbol=nil) or (taicpu(hp1).oper[ref]^.ref^.relsymbol=nil)) and
|
||||
((taicpu(p).oper[0]^.ref^.scalefactor <= 1) or (taicpu(hp1).oper[ref]^.ref^.scalefactor <= 1)) and
|
||||
{ Segment register of p.oper[0]^.ref will be NR_NO already }
|
||||
(taicpu(hp1).oper[ref]^.ref^.segment=NR_NO)
|
||||
Internalerror(2019111201);
|
||||
{ check if the can insert all data of the lea into the second instruction }
|
||||
if ((taicpu(hp1).oper[ref]^.ref^.base=taicpu(p).oper[1]^.reg) or (taicpu(hp1).oper[ref]^.ref^.scalefactor <= 1)) and
|
||||
((taicpu(p).oper[0]^.ref^.base=NR_NO) or (taicpu(hp1).oper[ref]^.ref^.base=NR_NO)) and
|
||||
((taicpu(p).oper[0]^.ref^.index=NR_NO) or (taicpu(hp1).oper[ref]^.ref^.index=NR_NO)) and
|
||||
((taicpu(p).oper[0]^.ref^.symbol=nil) or (taicpu(hp1).oper[ref]^.ref^.symbol=nil)) and
|
||||
((taicpu(p).oper[0]^.ref^.relsymbol=nil) or (taicpu(hp1).oper[ref]^.ref^.relsymbol=nil)) and
|
||||
((taicpu(p).oper[0]^.ref^.scalefactor <= 1) or (taicpu(hp1).oper[ref]^.ref^.scalefactor <= 1)) and
|
||||
(taicpu(p).oper[0]^.ref^.segment=NR_NO) and (taicpu(hp1).oper[ref]^.ref^.segment=NR_NO)
|
||||
{$ifdef x86_64}
|
||||
and (abs(taicpu(hp1).oper[ref]^.ref^.offset+taicpu(p).oper[0]^.ref^.offset)<=$7fffffff)
|
||||
and (((taicpu(p).oper[0]^.ref^.base<>NR_RIP) and (taicpu(p).oper[0]^.ref^.index<>NR_RIP)) or
|
||||
((taicpu(hp1).oper[ref]^.ref^.base=NR_NO) and (taicpu(hp1).oper[ref]^.ref^.index=NR_NO))
|
||||
)
|
||||
and (abs(taicpu(hp1).oper[ref]^.ref^.offset+taicpu(p).oper[0]^.ref^.offset)<=$7fffffff)
|
||||
and (((taicpu(p).oper[0]^.ref^.base<>NR_RIP) and (taicpu(p).oper[0]^.ref^.index<>NR_RIP)) or
|
||||
((taicpu(hp1).oper[ref]^.ref^.base=NR_NO) and (taicpu(hp1).oper[ref]^.ref^.index=NR_NO))
|
||||
)
|
||||
{$endif x86_64}
|
||||
then
|
||||
then
|
||||
begin
|
||||
{ reg1 might not used by the second instruction after it is remove from the reference }
|
||||
if not(RegInInstruction(taicpu(p).oper[1]^.reg,taicpu(hp1))) then
|
||||
begin
|
||||
{ reg1 might not used by the second instruction after it is remove from the reference }
|
||||
if not(RegInInstruction(taicpu(p).oper[1]^.reg,taicpu(hp1))) then
|
||||
TransferUsedRegs(TmpUsedRegs);
|
||||
UpdateUsedRegs(TmpUsedRegs, tai(p.next));
|
||||
{ reg1 is not updated so it might not be used afterwards }
|
||||
if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
|
||||
begin
|
||||
TransferUsedRegs(TmpUsedRegs);
|
||||
UpdateUsedRegs(TmpUsedRegs, tai(p.next));
|
||||
{ reg1 is not updated so it might not be used afterwards }
|
||||
if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
|
||||
begin
|
||||
DebugMsg(SPeepholeOptimization + 'LeaOp2Op done',p);
|
||||
if taicpu(p).oper[0]^.ref^.base<>NR_NO then
|
||||
taicpu(hp1).oper[ref]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
|
||||
if taicpu(p).oper[0]^.ref^.index<>NR_NO then
|
||||
taicpu(hp1).oper[ref]^.ref^.index:=taicpu(p).oper[0]^.ref^.index;
|
||||
if taicpu(p).oper[0]^.ref^.symbol<>nil then
|
||||
taicpu(hp1).oper[ref]^.ref^.symbol:=taicpu(p).oper[0]^.ref^.symbol;
|
||||
if taicpu(p).oper[0]^.ref^.relsymbol<>nil then
|
||||
taicpu(hp1).oper[ref]^.ref^.relsymbol:=taicpu(p).oper[0]^.ref^.relsymbol;
|
||||
if taicpu(p).oper[0]^.ref^.scalefactor > 1 then
|
||||
taicpu(hp1).oper[ref]^.ref^.scalefactor:=taicpu(p).oper[0]^.ref^.scalefactor;
|
||||
inc(taicpu(hp1).oper[ref]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
|
||||
RemoveCurrentP(p, hp1);
|
||||
result:=true;
|
||||
exit;
|
||||
end
|
||||
end;
|
||||
DebugMsg(SPeepholeOptimization + 'LeaOp2Op done',p);
|
||||
if taicpu(p).oper[0]^.ref^.base<>NR_NO then
|
||||
taicpu(hp1).oper[ref]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
|
||||
if taicpu(p).oper[0]^.ref^.index<>NR_NO then
|
||||
taicpu(hp1).oper[ref]^.ref^.index:=taicpu(p).oper[0]^.ref^.index;
|
||||
if taicpu(p).oper[0]^.ref^.symbol<>nil then
|
||||
taicpu(hp1).oper[ref]^.ref^.symbol:=taicpu(p).oper[0]^.ref^.symbol;
|
||||
if taicpu(p).oper[0]^.ref^.relsymbol<>nil then
|
||||
taicpu(hp1).oper[ref]^.ref^.relsymbol:=taicpu(p).oper[0]^.ref^.relsymbol;
|
||||
if taicpu(p).oper[0]^.ref^.scalefactor > 1 then
|
||||
taicpu(hp1).oper[ref]^.ref^.scalefactor:=taicpu(p).oper[0]^.ref^.scalefactor;
|
||||
inc(taicpu(hp1).oper[ref]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
|
||||
RemoveCurrentP(p, hp1);
|
||||
result:=true;
|
||||
exit;
|
||||
end
|
||||
end;
|
||||
{ recover }
|
||||
taicpu(hp1).oper[ref]^.ref^:=saveref;
|
||||
end;
|
||||
{ recover }
|
||||
taicpu(hp1).oper[ref]^.ref^:=saveref;
|
||||
end;
|
||||
|
||||
end;
|
||||
|
||||
{ for now, we do not mess with the stack pointer, thought it might be usefull to remove
|
||||
unneeded lea sequences on the stack pointer, it needs to be tested in detail }
|
||||
if (taicpu(p).oper[1]^.reg <> NR_STACK_POINTER_REG) and
|
||||
GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[1]^.reg) then
|
||||
begin
|
||||
{ Check common LEA/LEA conditions }
|
||||
if MatchInstruction(hp1,A_LEA,[taicpu(p).opsize]) and
|
||||
(taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg) and
|
||||
(taicpu(p).oper[0]^.ref^.relsymbol = nil) and
|
||||
(taicpu(p).oper[0]^.ref^.symbol = nil) and
|
||||
(taicpu(hp1).oper[0]^.ref^.relsymbol = nil) and
|
||||
{ Since we're merging two LEA instructions, the segment registers don't matter }
|
||||
(taicpu(hp1).oper[0]^.ref^.symbol = nil) and
|
||||
(
|
||||
(taicpu(p).oper[0]^.ref^.base = NR_NO) or { Don't call RegModifiedBetween unnecessarily }
|
||||
not(RegModifiedBetween(taicpu(p).oper[0]^.ref^.base,p,hp1))
|
||||
) and (
|
||||
(taicpu(p).oper[0]^.ref^.index = taicpu(p).oper[0]^.ref^.base) or { Don't call RegModifiedBetween unnecessarily }
|
||||
(taicpu(p).oper[0]^.ref^.index = NR_NO) or
|
||||
not(RegModifiedBetween(taicpu(p).oper[0]^.ref^.index,p,hp1))
|
||||
) then
|
||||
if RegInInstruction(DestinationReg, hp1) or
|
||||
{ Check further ahead (up to 2 instructions ahead for -O2) }
|
||||
GetNextInstructionUsingReg(hp1,hp1,taicpu(p).oper[1]^.reg) then
|
||||
begin
|
||||
{ changes
|
||||
lea (regX,scale), reg1
|
||||
lea offset(reg1,reg1), reg1
|
||||
to
|
||||
lea offset(regX,scale*2), reg1
|
||||
|
||||
and
|
||||
lea (regX,scale1), reg1
|
||||
lea offset(reg1,scale2), reg1
|
||||
to
|
||||
lea offset(regX,scale1*scale2), reg1
|
||||
|
||||
... so long as the final scale does not exceed 8
|
||||
|
||||
(Similarly, allow the first instruction to be "lea (regX,regX),reg1")
|
||||
}
|
||||
if (taicpu(p).oper[0]^.ref^.offset = 0) and
|
||||
(taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) and
|
||||
{ Check common LEA/LEA conditions }
|
||||
if MatchInstruction(hp1,A_LEA,[taicpu(p).opsize]) and
|
||||
(taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg) and
|
||||
(taicpu(p).oper[0]^.ref^.relsymbol = nil) and
|
||||
(taicpu(p).oper[0]^.ref^.segment = NR_NO) and
|
||||
(taicpu(p).oper[0]^.ref^.symbol = nil) and
|
||||
(taicpu(hp1).oper[0]^.ref^.relsymbol = nil) and
|
||||
(taicpu(hp1).oper[0]^.ref^.segment = NR_NO) and
|
||||
(taicpu(hp1).oper[0]^.ref^.symbol = nil) and
|
||||
(
|
||||
(
|
||||
(taicpu(p).oper[0]^.ref^.base = NR_NO)
|
||||
) or (
|
||||
(taicpu(p).oper[0]^.ref^.scalefactor <= 1) and
|
||||
(
|
||||
(taicpu(p).oper[0]^.ref^.base = taicpu(p).oper[0]^.ref^.index) and
|
||||
not(RegUsedBetween(taicpu(p).oper[0]^.ref^.index, p, hp1))
|
||||
)
|
||||
)
|
||||
) and (
|
||||
(
|
||||
{ lea (reg1,scale2), reg1 variant }
|
||||
(taicpu(hp1).oper[0]^.ref^.base = NR_NO) and
|
||||
(
|
||||
(
|
||||
(taicpu(p).oper[0]^.ref^.base = NR_NO) and
|
||||
(taicpu(hp1).oper[0]^.ref^.scalefactor * taicpu(p).oper[0]^.ref^.scalefactor <= 8)
|
||||
) or (
|
||||
{ lea (regX,regX), reg1 variant }
|
||||
(taicpu(p).oper[0]^.ref^.base <> NR_NO) and
|
||||
(taicpu(hp1).oper[0]^.ref^.scalefactor <= 4)
|
||||
)
|
||||
)
|
||||
) or (
|
||||
{ lea (reg1,reg1), reg1 variant }
|
||||
(taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) and
|
||||
(taicpu(hp1).oper[0]^.ref^.scalefactor <= 1)
|
||||
(taicpu(p).oper[0]^.ref^.base = NR_NO) or { Don't call RegModifiedBetween unnecessarily }
|
||||
not(RegModifiedBetween(taicpu(p).oper[0]^.ref^.base,p,hp1))
|
||||
) and (
|
||||
(taicpu(p).oper[0]^.ref^.index = taicpu(p).oper[0]^.ref^.base) or { Don't call RegModifiedBetween unnecessarily }
|
||||
(taicpu(p).oper[0]^.ref^.index = NR_NO) or
|
||||
not(RegModifiedBetween(taicpu(p).oper[0]^.ref^.index,p,hp1))
|
||||
)
|
||||
) then
|
||||
begin
|
||||
DebugMsg(SPeepholeOptimization + 'LeaLea2Lea 2 done',p);
|
||||
{ changes
|
||||
lea (regX,scale), reg1
|
||||
lea offset(reg1,reg1), reg1
|
||||
to
|
||||
lea offset(regX,scale*2), reg1
|
||||
|
||||
{ Make everything homogeneous to make calculations easier }
|
||||
if (taicpu(p).oper[0]^.ref^.base <> NR_NO) then
|
||||
begin
|
||||
if taicpu(p).oper[0]^.ref^.index <> NR_NO then
|
||||
{ Convert lea (regX,regX),reg1 to lea (regX,2),reg1 }
|
||||
taicpu(p).oper[0]^.ref^.scalefactor := 2
|
||||
else
|
||||
taicpu(p).oper[0]^.ref^.index := taicpu(p).oper[0]^.ref^.base;
|
||||
and
|
||||
lea (regX,scale1), reg1
|
||||
lea offset(reg1,scale2), reg1
|
||||
to
|
||||
lea offset(regX,scale1*scale2), reg1
|
||||
|
||||
taicpu(p).oper[0]^.ref^.base := NR_NO;
|
||||
end;
|
||||
... so long as the final scale does not exceed 8
|
||||
|
||||
if (taicpu(hp1).oper[0]^.ref^.base = NR_NO) then
|
||||
begin
|
||||
{ Just to prevent miscalculations }
|
||||
if (taicpu(hp1).oper[0]^.ref^.scalefactor = 0) then
|
||||
taicpu(hp1).oper[0]^.ref^.scalefactor := taicpu(p).oper[0]^.ref^.scalefactor
|
||||
else
|
||||
taicpu(hp1).oper[0]^.ref^.scalefactor := taicpu(hp1).oper[0]^.ref^.scalefactor * taicpu(p).oper[0]^.ref^.scalefactor;
|
||||
end
|
||||
else
|
||||
begin
|
||||
taicpu(hp1).oper[0]^.ref^.base := NR_NO;
|
||||
taicpu(hp1).oper[0]^.ref^.scalefactor := taicpu(p).oper[0]^.ref^.scalefactor * 2;
|
||||
end;
|
||||
|
||||
taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.ref^.index;
|
||||
RemoveCurrentP(p);
|
||||
result:=true;
|
||||
exit;
|
||||
end
|
||||
|
||||
{ changes
|
||||
lea offset1(regX), reg1
|
||||
lea offset2(reg1), reg1
|
||||
to
|
||||
lea offset1+offset2(regX), reg1 }
|
||||
else if
|
||||
(
|
||||
(taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) and
|
||||
(taicpu(p).oper[0]^.ref^.index = NR_NO)
|
||||
) or (
|
||||
(taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) and
|
||||
(taicpu(hp1).oper[0]^.ref^.scalefactor <= 1) and
|
||||
(
|
||||
(Similarly, allow the first instruction to be "lea (regX,regX),reg1")
|
||||
}
|
||||
if (taicpu(p).oper[0]^.ref^.base<>NR_STACK_POINTER_REG) and { lea (%rsp,scale),reg is not a valid encoding }
|
||||
(taicpu(p).oper[0]^.ref^.offset = 0) and
|
||||
(taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) and
|
||||
(
|
||||
(taicpu(p).oper[0]^.ref^.index = NR_NO) or
|
||||
(taicpu(p).oper[0]^.ref^.base = NR_NO)
|
||||
) or (
|
||||
(taicpu(p).oper[0]^.ref^.scalefactor <= 1) and
|
||||
(
|
||||
(taicpu(p).oper[0]^.ref^.index = NR_NO) or
|
||||
(taicpu(p).oper[0]^.ref^.base = NR_NO)
|
||||
) or (
|
||||
(taicpu(p).oper[0]^.ref^.scalefactor <= 1) and
|
||||
(
|
||||
(taicpu(p).oper[0]^.ref^.base = taicpu(p).oper[0]^.ref^.index) and
|
||||
not(RegUsedBetween(taicpu(p).oper[0]^.ref^.index, p, hp1))
|
||||
)
|
||||
)
|
||||
) and (
|
||||
(
|
||||
{ lea (reg1,scale2), reg1 variant }
|
||||
(taicpu(hp1).oper[0]^.ref^.base = NR_NO) and
|
||||
(
|
||||
(taicpu(p).oper[0]^.ref^.index = taicpu(p).oper[0]^.ref^.base) and
|
||||
(
|
||||
(taicpu(hp1).oper[0]^.ref^.index = NR_NO) or
|
||||
(taicpu(hp1).oper[0]^.ref^.base = NR_NO)
|
||||
(taicpu(p).oper[0]^.ref^.base = NR_NO) and
|
||||
(taicpu(hp1).oper[0]^.ref^.scalefactor * taicpu(p).oper[0]^.ref^.scalefactor <= 8)
|
||||
) or (
|
||||
{ lea (regX,regX), reg1 variant }
|
||||
(taicpu(p).oper[0]^.ref^.base <> NR_NO) and
|
||||
(taicpu(hp1).oper[0]^.ref^.scalefactor <= 4)
|
||||
)
|
||||
)
|
||||
) or (
|
||||
{ lea (reg1,reg1), reg1 variant }
|
||||
(taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) and
|
||||
(taicpu(hp1).oper[0]^.ref^.scalefactor <= 1)
|
||||
)
|
||||
) then
|
||||
begin
|
||||
DebugMsg(SPeepholeOptimization + 'LeaLea2Lea 2 done',p);
|
||||
|
||||
{ Make everything homogeneous to make calculations easier }
|
||||
if (taicpu(p).oper[0]^.ref^.base <> NR_NO) then
|
||||
begin
|
||||
if taicpu(p).oper[0]^.ref^.index <> NR_NO then
|
||||
{ Convert lea (regX,regX),reg1 to lea (regX,2),reg1 }
|
||||
taicpu(p).oper[0]^.ref^.scalefactor := 2
|
||||
else
|
||||
taicpu(p).oper[0]^.ref^.index := taicpu(p).oper[0]^.ref^.base;
|
||||
|
||||
taicpu(p).oper[0]^.ref^.base := NR_NO;
|
||||
end;
|
||||
|
||||
if (taicpu(hp1).oper[0]^.ref^.base = NR_NO) then
|
||||
begin
|
||||
{ Just to prevent miscalculations }
|
||||
if (taicpu(hp1).oper[0]^.ref^.scalefactor = 0) then
|
||||
taicpu(hp1).oper[0]^.ref^.scalefactor := taicpu(p).oper[0]^.ref^.scalefactor
|
||||
else
|
||||
taicpu(hp1).oper[0]^.ref^.scalefactor := taicpu(hp1).oper[0]^.ref^.scalefactor * taicpu(p).oper[0]^.ref^.scalefactor;
|
||||
end
|
||||
else
|
||||
begin
|
||||
taicpu(hp1).oper[0]^.ref^.base := NR_NO;
|
||||
taicpu(hp1).oper[0]^.ref^.scalefactor := taicpu(p).oper[0]^.ref^.scalefactor * 2;
|
||||
end;
|
||||
|
||||
taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.ref^.index;
|
||||
RemoveCurrentP(p);
|
||||
result:=true;
|
||||
exit;
|
||||
end
|
||||
|
||||
{ changes
|
||||
lea offset1(regX), reg1
|
||||
lea offset2(reg1), reg1
|
||||
to
|
||||
lea offset1+offset2(regX), reg1 }
|
||||
else if
|
||||
(
|
||||
(taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) and
|
||||
(taicpu(p).oper[0]^.ref^.index = NR_NO)
|
||||
) or (
|
||||
(taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) and
|
||||
(taicpu(hp1).oper[0]^.ref^.scalefactor <= 1) and
|
||||
(
|
||||
(
|
||||
(taicpu(p).oper[0]^.ref^.index = NR_NO) or
|
||||
(taicpu(p).oper[0]^.ref^.base = NR_NO)
|
||||
) or (
|
||||
(taicpu(p).oper[0]^.ref^.scalefactor <= 1) and
|
||||
(
|
||||
(taicpu(p).oper[0]^.ref^.index = NR_NO) or
|
||||
(
|
||||
(taicpu(p).oper[0]^.ref^.index = taicpu(p).oper[0]^.ref^.base) and
|
||||
(
|
||||
(taicpu(hp1).oper[0]^.ref^.index = NR_NO) or
|
||||
(taicpu(hp1).oper[0]^.ref^.base = NR_NO)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
) then
|
||||
begin
|
||||
DebugMsg(SPeepholeOptimization + 'LeaLea2Lea 1 done',p);
|
||||
|
||||
if taicpu(hp1).oper[0]^.ref^.index=taicpu(p).oper[1]^.reg then
|
||||
) then
|
||||
begin
|
||||
taicpu(hp1).oper[0]^.ref^.index:=taicpu(p).oper[0]^.ref^.base;
|
||||
inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset*max(taicpu(hp1).oper[0]^.ref^.scalefactor,1));
|
||||
{ if the register is used as index and base, we have to increase for base as well
|
||||
and adapt base }
|
||||
if taicpu(hp1).oper[0]^.ref^.base=taicpu(p).oper[1]^.reg then
|
||||
DebugMsg(SPeepholeOptimization + 'LeaLea2Lea 1 done',p);
|
||||
|
||||
if taicpu(hp1).oper[0]^.ref^.index=taicpu(p).oper[1]^.reg then
|
||||
begin
|
||||
taicpu(hp1).oper[0]^.ref^.index:=taicpu(p).oper[0]^.ref^.base;
|
||||
inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset*max(taicpu(hp1).oper[0]^.ref^.scalefactor,1));
|
||||
{ if the register is used as index and base, we have to increase for base as well
|
||||
and adapt base }
|
||||
if taicpu(hp1).oper[0]^.ref^.base=taicpu(p).oper[1]^.reg then
|
||||
begin
|
||||
taicpu(hp1).oper[0]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
|
||||
inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
|
||||
end;
|
||||
end
|
||||
else
|
||||
begin
|
||||
taicpu(hp1).oper[0]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
|
||||
inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
|
||||
taicpu(hp1).oper[0]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
|
||||
end;
|
||||
end
|
||||
else
|
||||
begin
|
||||
inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
|
||||
taicpu(hp1).oper[0]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
|
||||
if taicpu(p).oper[0]^.ref^.index<>NR_NO then
|
||||
begin
|
||||
taicpu(hp1).oper[0]^.ref^.base:=taicpu(hp1).oper[0]^.ref^.index;
|
||||
taicpu(hp1).oper[0]^.ref^.index:=taicpu(p).oper[0]^.ref^.index;
|
||||
taicpu(hp1).oper[0]^.ref^.scalefactor:=taicpu(p).oper[0]^.ref^.scalefactor;
|
||||
end;
|
||||
RemoveCurrentP(p);
|
||||
result:=true;
|
||||
exit;
|
||||
end;
|
||||
if taicpu(p).oper[0]^.ref^.index<>NR_NO then
|
||||
begin
|
||||
taicpu(hp1).oper[0]^.ref^.base:=taicpu(hp1).oper[0]^.ref^.index;
|
||||
taicpu(hp1).oper[0]^.ref^.index:=taicpu(p).oper[0]^.ref^.index;
|
||||
taicpu(hp1).oper[0]^.ref^.scalefactor:=taicpu(p).oper[0]^.ref^.scalefactor;
|
||||
end;
|
||||
RemoveCurrentP(p);
|
||||
result:=true;
|
||||
exit;
|
||||
end;
|
||||
end;
|
||||
|
||||
{ Change:
|
||||
leal/q $x(%reg1),%reg2
|
||||
...
|
||||
shll/q $y,%reg2
|
||||
To:
|
||||
leal/q $(x+2^y)(%reg1,2^y),%reg2 (if y <= 3)
|
||||
}
|
||||
if MatchInstruction(hp1, A_SHL, [taicpu(p).opsize]) and
|
||||
MatchOpType(taicpu(hp1), top_const, top_reg) and
|
||||
(taicpu(hp1).oper[0]^.val <= 3) then
|
||||
begin
|
||||
Multiple := 1 shl taicpu(hp1).oper[0]^.val;
|
||||
TransferUsedRegs(TmpUsedRegs);
|
||||
UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
|
||||
|
||||
TempReg := taicpu(hp1).oper[1]^.reg; { Store locally to reduce the number of dereferences }
|
||||
if
|
||||
{ This allows the optimisation in some circumstances even if the lea instruction already has a scale factor
|
||||
(this works even if scalefactor is zero) }
|
||||
((Multiple * taicpu(p).oper[0]^.ref^.scalefactor) <= 8) and
|
||||
|
||||
{ Ensure offset doesn't go out of bounds }
|
||||
(abs(taicpu(p).oper[0]^.ref^.offset * Multiple) <= $7FFFFFFF) and
|
||||
|
||||
not (RegInUsedRegs(NR_DEFAULTFLAGS,TmpUsedRegs)) and
|
||||
MatchOperand(taicpu(p).oper[1]^, TempReg) and
|
||||
(
|
||||
(
|
||||
not SuperRegistersEqual(taicpu(p).oper[0]^.ref^.base, TempReg) and
|
||||
(
|
||||
(taicpu(p).oper[0]^.ref^.index = NR_NO) or
|
||||
(taicpu(p).oper[0]^.ref^.index = NR_INVALID) or
|
||||
(
|
||||
{ Check for lea $x(%reg1,%reg1),%reg2 and treat as it it were lea $x(%reg1,2),%reg2 }
|
||||
(taicpu(p).oper[0]^.ref^.index = taicpu(p).oper[0]^.ref^.base) and
|
||||
(taicpu(p).oper[0]^.ref^.scalefactor <= 1)
|
||||
)
|
||||
)
|
||||
) or (
|
||||
(
|
||||
(taicpu(p).oper[0]^.ref^.base = NR_NO) or
|
||||
(taicpu(p).oper[0]^.ref^.base = NR_INVALID)
|
||||
) and
|
||||
not SuperRegistersEqual(taicpu(p).oper[0]^.ref^.index, TempReg)
|
||||
)
|
||||
) then
|
||||
{ Change:
|
||||
leal/q $x(%reg1),%reg2
|
||||
...
|
||||
shll/q $y,%reg2
|
||||
To:
|
||||
leal/q $(x+2^y)(%reg1,2^y),%reg2 (if y <= 3)
|
||||
}
|
||||
if (taicpu(p).oper[0]^.ref^.base<>NR_STACK_POINTER_REG) and { lea (%rsp,scale),reg is not a valid encoding }
|
||||
MatchInstruction(hp1, A_SHL, [taicpu(p).opsize]) and
|
||||
MatchOpType(taicpu(hp1), top_const, top_reg) and
|
||||
(taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) and
|
||||
(taicpu(hp1).oper[0]^.val <= 3) then
|
||||
begin
|
||||
repeat
|
||||
with taicpu(p).oper[0]^.ref^ do
|
||||
begin
|
||||
{ Convert lea $x(%reg1,%reg1),%reg2 to lea $x(%reg1,2),%reg2 }
|
||||
if index = base then
|
||||
begin
|
||||
if Multiple > 4 then
|
||||
{ Optimisation will no longer work because resultant
|
||||
scale factor will exceed 8 }
|
||||
Break;
|
||||
Multiple := 1 shl taicpu(hp1).oper[0]^.val;
|
||||
TransferUsedRegs(TmpUsedRegs);
|
||||
UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next));
|
||||
|
||||
base := NR_NO;
|
||||
scalefactor := 2;
|
||||
DebugMsg(SPeepholeOptimization + 'lea $x(%reg1,%reg1),%reg2 -> lea $x(%reg1,2),%reg2 for following optimisation', p);
|
||||
end
|
||||
else if (base <> NR_NO) and (base <> NR_INVALID) then
|
||||
if
|
||||
{ This allows the optimisation in some circumstances even if the lea instruction already has a scale factor
|
||||
(this works even if scalefactor is zero) }
|
||||
((Multiple * taicpu(p).oper[0]^.ref^.scalefactor) <= 8) and
|
||||
|
||||
{ Ensure offset doesn't go out of bounds }
|
||||
(abs(taicpu(p).oper[0]^.ref^.offset * Multiple) <= $7FFFFFFF) and
|
||||
|
||||
not (RegInUsedRegs(NR_DEFAULTFLAGS,TmpUsedRegs)) and
|
||||
(
|
||||
(
|
||||
not SuperRegistersEqual(taicpu(p).oper[0]^.ref^.base, taicpu(p).oper[1]^.reg) and
|
||||
(
|
||||
(taicpu(p).oper[0]^.ref^.index = NR_NO) or
|
||||
(taicpu(p).oper[0]^.ref^.index = NR_INVALID) or
|
||||
(
|
||||
{ Check for lea $x(%reg1,%reg1),%reg2 and treat as it it were lea $x(%reg1,2),%reg2 }
|
||||
(taicpu(p).oper[0]^.ref^.index = taicpu(p).oper[0]^.ref^.base) and
|
||||
(taicpu(p).oper[0]^.ref^.scalefactor <= 1)
|
||||
)
|
||||
)
|
||||
) or (
|
||||
(
|
||||
(taicpu(p).oper[0]^.ref^.base = NR_NO) or
|
||||
(taicpu(p).oper[0]^.ref^.base = NR_INVALID)
|
||||
) and
|
||||
not SuperRegistersEqual(taicpu(p).oper[0]^.ref^.index, taicpu(p).oper[1]^.reg)
|
||||
)
|
||||
) then
|
||||
begin
|
||||
repeat
|
||||
with taicpu(p).oper[0]^.ref^ do
|
||||
begin
|
||||
{ Scale factor only works on the index register }
|
||||
index := base;
|
||||
base := NR_NO;
|
||||
{ Convert lea $x(%reg1,%reg1),%reg2 to lea $x(%reg1,2),%reg2 }
|
||||
if index = base then
|
||||
begin
|
||||
if Multiple > 4 then
|
||||
{ Optimisation will no longer work because resultant
|
||||
scale factor will exceed 8 }
|
||||
Break;
|
||||
|
||||
base := NR_NO;
|
||||
scalefactor := 2;
|
||||
DebugMsg(SPeepholeOptimization + 'lea $x(%reg1,%reg1),%reg2 -> lea $x(%reg1,2),%reg2 for following optimisation', p);
|
||||
end
|
||||
else if (base <> NR_NO) and (base <> NR_INVALID) then
|
||||
begin
|
||||
{ Scale factor only works on the index register }
|
||||
index := base;
|
||||
base := NR_NO;
|
||||
end;
|
||||
|
||||
{ For safety }
|
||||
if scalefactor <= 1 then
|
||||
begin
|
||||
DebugMsg(SPeepholeOptimization + 'LeaShl2Lea 1', p);
|
||||
scalefactor := Multiple;
|
||||
end
|
||||
else
|
||||
begin
|
||||
DebugMsg(SPeepholeOptimization + 'LeaShl2Lea 2', p);
|
||||
scalefactor := scalefactor * Multiple;
|
||||
end;
|
||||
|
||||
offset := offset * Multiple;
|
||||
end;
|
||||
|
||||
{ For safety }
|
||||
if scalefactor <= 1 then
|
||||
begin
|
||||
DebugMsg(SPeepholeOptimization + 'LeaShl2Lea 1', p);
|
||||
scalefactor := Multiple;
|
||||
end
|
||||
else
|
||||
begin
|
||||
DebugMsg(SPeepholeOptimization + 'LeaShl2Lea 2', p);
|
||||
scalefactor := scalefactor * Multiple;
|
||||
end;
|
||||
|
||||
offset := offset * Multiple;
|
||||
end;
|
||||
RemoveInstruction(hp1);
|
||||
Result := True;
|
||||
Exit;
|
||||
{ This repeat..until loop exists for the benefit of Break }
|
||||
until True;
|
||||
RemoveInstruction(hp1);
|
||||
Result := True;
|
||||
Exit;
|
||||
{ This repeat..until loop exists for the benefit of Break }
|
||||
until True;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
Loading…
Reference in New Issue
Block a user