mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-08-20 09:49:08 +02:00
* x86: Reversal of DoAddRefOpt optimisation if no pipeline saving is made
This commit is contained in:
parent
45883c2cc0
commit
1d86db3100
@ -157,6 +157,7 @@ unit aoptx86;
|
||||
|
||||
function DoArithCombineOpt(var p : tai) : Boolean;
|
||||
function DoAddRefOpt(var p, hp1: tai; Reg: TRegister; Value: TCGInt): Boolean;
|
||||
function DoReverseAddRefOpt(var p: tai; Value: TCGInt): Boolean;
|
||||
function DoMovCmpMemOpt(var p : tai; const hp1: tai) : Boolean;
|
||||
function DoSETccLblRETOpt(var p: tai; const hp_label: tai_label) : Boolean;
|
||||
|
||||
@ -6449,6 +6450,73 @@ unit aoptx86;
|
||||
end;
|
||||
|
||||
|
||||
function TX86AsmOptimizer.DoReverseAddRefOpt(var p: tai; Value: TCGInt): Boolean;
|
||||
var
|
||||
CurrentRef: PReference;
|
||||
OtherRegister: TRegister;
|
||||
X: Integer;
|
||||
hp1, hp2: tai;
|
||||
begin
|
||||
{ Assume that p has been checked and confirmed to be of the form
|
||||
"ADD/SUB const,%reg" }
|
||||
Result := False;
|
||||
|
||||
if GetLastInstruction(p, hp1) and (hp1.typ = ait_instruction) and
|
||||
{ Make sure this instruction doesn't also modify the register used in
|
||||
the ADD/SUB instruction }
|
||||
not RegModifiedByInstruction(taicpu(p).oper[1]^.reg, hp1) then
|
||||
begin
|
||||
{ Find the reference }
|
||||
for X := 0 to taicpu(Hp1).ops - 1 do
|
||||
if (taicpu(hp1).oper[X]^.typ = top_ref) then
|
||||
begin
|
||||
{ Locally store the pointer to the reference }
|
||||
CurrentRef := taicpu(hp1).oper[X]^.ref;
|
||||
|
||||
if
|
||||
{ Only references of the form x(%reg1,%reg2,scale) can be
|
||||
optimised here }
|
||||
(CurrentRef^.refaddr <> addr_no) or
|
||||
not RegInRef(taicpu(p).oper[1]^.reg, CurrentRef^) then
|
||||
Exit;
|
||||
|
||||
{ Account for the scale factor on the value }
|
||||
if SuperRegistersEqual(taicpu(p).oper[1]^.reg, CurrentRef^.index) then
|
||||
begin
|
||||
OtherRegister := CurrentRef^.base;
|
||||
if SuperRegistersEqual(taicpu(p).oper[1]^.reg, CurrentRef^.base) then
|
||||
Inc(Value, Value * max(CurrentRef^.scalefactor, 1))
|
||||
else
|
||||
Value := Value * max(CurrentRef^.scalefactor, 1);
|
||||
end
|
||||
else
|
||||
OtherRegister := CurrentRef^.index;
|
||||
|
||||
if (OtherRegister <> NR_NO) and
|
||||
{ If we can't set the offset to zero, this is wasted effort }
|
||||
(CurrentRef^.offset = Value)
|
||||
and GetLastInstruction(hp1, hp2) and
|
||||
{ Make sure there is a pipeline stall between hp2 and hp1,
|
||||
otherwise a saving won't be made }
|
||||
RegModifiedByInstruction(OtherRegister, hp2) then
|
||||
begin
|
||||
taicpu(hp1).oper[X]^.ref^.offset := 0;
|
||||
AsmL.Remove(hp1);
|
||||
AsmL.InsertAfter(hp1, p);
|
||||
|
||||
{ In case OtherRegister got deallocated right after the reference }
|
||||
AllocRegBetween(OtherRegister, hp2, hp1, UsedRegs);
|
||||
|
||||
DebugMsg(SPeepholeOptimization + 'Rearranged MOV; (ref); ADD/SUB to MOV; ADD/SUB; (ref) to remove offset and minimise cache pollution', hp2);
|
||||
|
||||
{ Keep p as the current instruction }
|
||||
Result := True;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
function TX86AsmOptimizer.DoArithCombineOpt(var p: tai): Boolean;
|
||||
var
|
||||
hp1 : tai;
|
||||
@ -15495,6 +15563,19 @@ unit aoptx86;
|
||||
begin
|
||||
Result := False;
|
||||
|
||||
if taicpu(p).oper[0]^.typ = top_const then
|
||||
begin
|
||||
{ Sometimes, DoAddRefOpt makes an optimisation that doesn't
|
||||
improve code speed and only increases cache pollution. If these
|
||||
aren't cleared by other optimisations, rectify it here }
|
||||
if taicpu(p).oper[1]^.typ = top_reg then
|
||||
begin
|
||||
if taicpu(p).opcode = A_ADD then
|
||||
Result := DoReverseAddRefOpt(p, taicpu(p).oper[0]^.val)
|
||||
else
|
||||
Result := DoReverseAddRefOpt(p, -taicpu(p).oper[0]^.val);
|
||||
end;
|
||||
|
||||
{ Change:
|
||||
add/sub 128,(dest)
|
||||
|
||||
@ -15504,7 +15585,7 @@ unit aoptx86;
|
||||
This generaally takes fewer bytes to encode because -128 can be stored
|
||||
in a signed byte, whereas +128 cannot.
|
||||
}
|
||||
if (taicpu(p).opsize <> S_B) and MatchOperand(taicpu(p).oper[0]^, 128) then
|
||||
if (taicpu(p).opsize <> S_B) and (taicpu(p).oper[0]^.val = 128) then
|
||||
begin
|
||||
if taicpu(p).opcode = A_ADD then
|
||||
Opposite := A_SUB
|
||||
@ -15621,6 +15702,7 @@ unit aoptx86;
|
||||
Result := True;
|
||||
Exit;
|
||||
end;
|
||||
end;
|
||||
|
||||
{ Detect:
|
||||
add/sub %reg2,(dest)
|
||||
|
Loading…
Reference in New Issue
Block a user