mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-08 20:28:13 +02:00
* x86: The LEA/LEA optimisations can now work with a different
destination register and the intermediate register still in use.
This commit is contained in:
parent
931bda5633
commit
77f53ebde3
@ -5643,13 +5643,13 @@ unit aoptx86;
|
||||
|
||||
function TX86AsmOptimizer.OptPass1LEA(var p : tai) : boolean;
|
||||
var
|
||||
hp1: tai;
|
||||
hp1, hp2: tai;
|
||||
ref: Integer;
|
||||
saveref: treference;
|
||||
offsetcalc: Int64;
|
||||
TempReg: TRegister;
|
||||
Multiple: TCGInt;
|
||||
Adjacent: Boolean;
|
||||
Adjacent, IntermediateRegDiscarded: Boolean;
|
||||
begin
|
||||
Result:=false;
|
||||
|
||||
@ -5804,7 +5804,6 @@ unit aoptx86;
|
||||
begin
|
||||
{ Check common LEA/LEA conditions }
|
||||
if MatchInstruction(hp1,A_LEA,[taicpu(p).opsize]) and
|
||||
(taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg) and
|
||||
(taicpu(p).oper[0]^.ref^.relsymbol = nil) and
|
||||
(taicpu(p).oper[0]^.ref^.segment = NR_NO) and
|
||||
(taicpu(p).oper[0]^.ref^.symbol = nil) and
|
||||
@ -5827,6 +5826,16 @@ unit aoptx86;
|
||||
)
|
||||
) then
|
||||
begin
|
||||
TransferUsedRegs(TmpUsedRegs);
|
||||
hp2 := p;
|
||||
repeat
|
||||
UpdateUsedRegs(TmpUsedRegs, tai(hp2.Next));
|
||||
until not GetNextInstruction(hp2, hp2) or (hp2 = hp1);
|
||||
|
||||
IntermediateRegDiscarded :=
|
||||
(taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg) or
|
||||
not RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs);
|
||||
|
||||
{ changes
|
||||
lea offset1(regX,scale), reg1
|
||||
lea offset2(reg1,reg1), reg2
|
||||
@ -5850,6 +5859,11 @@ unit aoptx86;
|
||||
(Similarly, allow the first instruction to be "lea (regX,regX),reg1")
|
||||
}
|
||||
if (taicpu(p).oper[0]^.ref^.base<>NR_STACK_POINTER_REG) and { lea (%rsp,scale),reg is not a valid encoding }
|
||||
(
|
||||
{ Don't optimise if size is a concern and the intermediate register remains in use }
|
||||
IntermediateRegDiscarded or
|
||||
not (cs_opt_size in current_settings.optimizerswitches)
|
||||
) and
|
||||
(taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) and
|
||||
(
|
||||
(taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[0]^.ref^.index) or
|
||||
@ -5897,8 +5911,6 @@ unit aoptx86;
|
||||
|
||||
if (offsetcalc <= $7FFFFFFF) and (offsetcalc >= -2147483648) then
|
||||
begin
|
||||
DebugMsg(SPeepholeOptimization + 'LeaLea2Lea 2 done',p);
|
||||
|
||||
if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) and
|
||||
(taicpu(hp1).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) then
|
||||
begin
|
||||
@ -5925,7 +5937,16 @@ unit aoptx86;
|
||||
if (taicpu(p).oper[0]^.ref^.offset <> 0) then
|
||||
Inc(taicpu(hp1).oper[0]^.ref^.offset, taicpu(p).oper[0]^.ref^.offset * max(taicpu(p).oper[0]^.ref^.scalefactor, 1));
|
||||
taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.ref^.index;
|
||||
RemoveCurrentP(p);
|
||||
|
||||
{ Only remove the first LEA if we don't need the intermediate register's value as is }
|
||||
if IntermediateRegDiscarded then
|
||||
begin
|
||||
DebugMsg(SPeepholeOptimization + 'LeaLea2Lea 2 done',p);
|
||||
RemoveCurrentP(p);
|
||||
end
|
||||
else
|
||||
DebugMsg(SPeepholeOptimization + 'LeaLea2LeaLea 2 done (intermediate register still in use)',p);
|
||||
|
||||
result:=true;
|
||||
exit;
|
||||
end;
|
||||
@ -5933,29 +5954,35 @@ unit aoptx86;
|
||||
|
||||
{ changes
|
||||
lea offset1(regX), reg1
|
||||
lea offset2(reg1), reg1
|
||||
lea offset2(reg1), reg2
|
||||
to
|
||||
lea offset1+offset2(regX), reg1 }
|
||||
if
|
||||
lea offset1+offset2(regX), reg2 }
|
||||
if (
|
||||
{ Don't optimise if size is a concern and the intermediate register remains in use }
|
||||
IntermediateRegDiscarded or
|
||||
not (cs_opt_size in current_settings.optimizerswitches)
|
||||
) and
|
||||
(
|
||||
(taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) and
|
||||
(taicpu(p).oper[0]^.ref^.index = NR_NO)
|
||||
) or (
|
||||
(taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) and
|
||||
(taicpu(hp1).oper[0]^.ref^.scalefactor <= 1) and
|
||||
(
|
||||
(taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) and
|
||||
(taicpu(p).oper[0]^.ref^.index = NR_NO)
|
||||
) or (
|
||||
(taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) and
|
||||
(taicpu(hp1).oper[0]^.ref^.scalefactor <= 1) and
|
||||
(
|
||||
(taicpu(p).oper[0]^.ref^.index = NR_NO) or
|
||||
(taicpu(p).oper[0]^.ref^.base = NR_NO)
|
||||
) or (
|
||||
(taicpu(p).oper[0]^.ref^.scalefactor <= 1) and
|
||||
(
|
||||
(taicpu(p).oper[0]^.ref^.index = NR_NO) or
|
||||
(taicpu(p).oper[0]^.ref^.base = NR_NO)
|
||||
) or (
|
||||
(taicpu(p).oper[0]^.ref^.scalefactor <= 1) and
|
||||
(
|
||||
(taicpu(p).oper[0]^.ref^.index = taicpu(p).oper[0]^.ref^.base) and
|
||||
(taicpu(p).oper[0]^.ref^.index = NR_NO) or
|
||||
(
|
||||
(taicpu(hp1).oper[0]^.ref^.index = NR_NO) or
|
||||
(taicpu(hp1).oper[0]^.ref^.base = NR_NO)
|
||||
(taicpu(p).oper[0]^.ref^.index = taicpu(p).oper[0]^.ref^.base) and
|
||||
(
|
||||
(taicpu(hp1).oper[0]^.ref^.index = NR_NO) or
|
||||
(taicpu(hp1).oper[0]^.ref^.base = NR_NO)
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
@ -5963,34 +5990,49 @@ unit aoptx86;
|
||||
)
|
||||
) then
|
||||
begin
|
||||
DebugMsg(SPeepholeOptimization + 'LeaLea2Lea 1 done',p);
|
||||
{ Make sure the offset doesn't go out of range (use 64-bit arithmetic)}
|
||||
offsetcalc := taicpu(hp1).oper[0]^.ref^.offset;
|
||||
Inc(offsetcalc, Int64(taicpu(p).oper[0]^.ref^.offset) * max(taicpu(hp1).oper[0]^.ref^.scalefactor, 1));
|
||||
|
||||
if taicpu(hp1).oper[0]^.ref^.index=taicpu(p).oper[1]^.reg then
|
||||
if (offsetcalc <= $7FFFFFFF) and (offsetcalc >= -2147483648) then
|
||||
begin
|
||||
taicpu(hp1).oper[0]^.ref^.index:=taicpu(p).oper[0]^.ref^.base;
|
||||
inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset*max(taicpu(hp1).oper[0]^.ref^.scalefactor,1));
|
||||
{ if the register is used as index and base, we have to increase for base as well
|
||||
and adapt base }
|
||||
if taicpu(hp1).oper[0]^.ref^.base=taicpu(p).oper[1]^.reg then
|
||||
if taicpu(hp1).oper[0]^.ref^.index=taicpu(p).oper[1]^.reg then
|
||||
begin
|
||||
taicpu(hp1).oper[0]^.ref^.index:=taicpu(p).oper[0]^.ref^.base;
|
||||
inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset*max(taicpu(hp1).oper[0]^.ref^.scalefactor,1));
|
||||
{ if the register is used as index and base, we have to increase for base as well
|
||||
and adapt base }
|
||||
if taicpu(hp1).oper[0]^.ref^.base=taicpu(p).oper[1]^.reg then
|
||||
begin
|
||||
taicpu(hp1).oper[0]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
|
||||
inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
|
||||
end;
|
||||
end
|
||||
else
|
||||
begin
|
||||
taicpu(hp1).oper[0]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
|
||||
inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
|
||||
taicpu(hp1).oper[0]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
|
||||
end;
|
||||
end
|
||||
else
|
||||
begin
|
||||
inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
|
||||
taicpu(hp1).oper[0]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
|
||||
if taicpu(p).oper[0]^.ref^.index<>NR_NO then
|
||||
begin
|
||||
taicpu(hp1).oper[0]^.ref^.base:=taicpu(hp1).oper[0]^.ref^.index;
|
||||
taicpu(hp1).oper[0]^.ref^.index:=taicpu(p).oper[0]^.ref^.index;
|
||||
taicpu(hp1).oper[0]^.ref^.scalefactor:=taicpu(p).oper[0]^.ref^.scalefactor;
|
||||
end;
|
||||
|
||||
{ Only remove the first LEA if we don't need the intermediate register's value as is }
|
||||
if IntermediateRegDiscarded then
|
||||
begin
|
||||
DebugMsg(SPeepholeOptimization + 'LeaLea2Lea 1 done',p);
|
||||
RemoveCurrentP(p);
|
||||
end
|
||||
else
|
||||
DebugMsg(SPeepholeOptimization + 'LeaLea2LeaLea 1 done (intermediate register still in use)',p);
|
||||
|
||||
|
||||
result:=true;
|
||||
exit;
|
||||
end;
|
||||
if taicpu(p).oper[0]^.ref^.index<>NR_NO then
|
||||
begin
|
||||
taicpu(hp1).oper[0]^.ref^.base:=taicpu(hp1).oper[0]^.ref^.index;
|
||||
taicpu(hp1).oper[0]^.ref^.index:=taicpu(p).oper[0]^.ref^.index;
|
||||
taicpu(hp1).oper[0]^.ref^.scalefactor:=taicpu(p).oper[0]^.ref^.scalefactor;
|
||||
end;
|
||||
RemoveCurrentP(p);
|
||||
result:=true;
|
||||
exit;
|
||||
end;
|
||||
end;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user