mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-09-10 22:09:18 +02:00
* arm / a64: New optimisation for removing the number of necessary S/UXTB/H instructions
This commit is contained in:
parent
b1a9150160
commit
096d5f50b2
@ -60,6 +60,9 @@ Type
|
||||
|
||||
function OptPass2Bitwise(var p: tai): Boolean;
|
||||
function OptPass2TST(var p: tai): Boolean;
|
||||
|
||||
protected
|
||||
function DoXTArithOp(var p: tai; hp1: tai): Boolean;
|
||||
End;
|
||||
|
||||
function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
|
||||
@ -675,6 +678,123 @@ Implementation
|
||||
end;
|
||||
|
||||
|
||||
function TARMAsmOptimizer.DoXTArithOp(var p: tai; hp1: tai): Boolean;
|
||||
var
|
||||
hp2: tai;
|
||||
ConstLimit: TCGInt;
|
||||
ValidPostFixes: TOpPostFixes;
|
||||
FirstCode, SecondCode, ThirdCode, FourthCode: TAsmOp;
|
||||
begin
|
||||
Result := False;
|
||||
{ Change:
|
||||
uxtb/h reg1,reg1
|
||||
(operation on reg1 with immediate operand where the upper 24/56
|
||||
bits don't affect the state of the first 8 bits )
|
||||
uxtb/h reg1,reg1
|
||||
|
||||
Remove first uxtb/h
|
||||
}
|
||||
case taicpu(p).opcode of
|
||||
A_UXTB,
|
||||
A_SXTB:
|
||||
begin
|
||||
ConstLimit := $FF;
|
||||
ValidPostFixes := [PF_B];
|
||||
FirstCode := A_UXTB;
|
||||
SecondCode := A_SXTB;
|
||||
ThirdCode := A_UXTB; { Used to indicate no other valid codes }
|
||||
FourthCode := A_SXTB;
|
||||
end;
|
||||
A_UXTH,
|
||||
A_SXTH:
|
||||
begin
|
||||
ConstLimit := $FFFF;
|
||||
ValidPostFixes := [PF_B, PF_H];
|
||||
FirstCode := A_UXTH;
|
||||
SecondCode := A_SXTH;
|
||||
ThirdCode := A_UXTB;
|
||||
FourthCode := A_SXTB;
|
||||
end;
|
||||
else
|
||||
InternalError(2024051401);
|
||||
end;
|
||||
|
||||
{$ifndef AARCH64}
|
||||
{ Regular ARM doesn't have the multi-instruction MatchInstruction available }
|
||||
if (hp1.typ = ait_instruction) and (taicpu(hp1).oppostfix = PF_None) then
|
||||
case taicpu(hp1).opcode of
|
||||
A_ADD, A_SUB, A_MUL, A_LSL, A_AND, A_ORR, A_EOR, A_BIC, A_ORN:
|
||||
{$endif AARCH64}
|
||||
|
||||
if
|
||||
(taicpu(p).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
|
||||
{$ifdef AARCH64}
|
||||
MatchInstruction(hp1, [A_ADD, A_SUB, A_MUL, A_LSL, A_AND, A_ORR, A_EOR, A_BIC, A_ORN, A_EON], [PF_None]) and
|
||||
{$endif AARCH64}
|
||||
(taicpu(hp1).condition = C_None) and
|
||||
(taicpu(hp1).ops = 3) and
|
||||
(taicpu(hp1).oper[0]^.reg = taicpu(p).oper[0]^.reg) and
|
||||
(taicpu(hp1).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
|
||||
(taicpu(hp1).oper[2]^.typ = top_const) and
|
||||
(
|
||||
(
|
||||
{ If the AND immediate is 8-bit, then this essentially performs
|
||||
the functionality of the second UXTB and so its presence is
|
||||
not required }
|
||||
(taicpu(hp1).opcode = A_AND) and
|
||||
(taicpu(hp1).oper[2]^.val >= 0) and
|
||||
(taicpu(hp1).oper[2]^.val <= ConstLimit)
|
||||
) or
|
||||
(
|
||||
GetNextInstructionUsingReg(hp1,hp2,taicpu(p).oper[0]^.reg) and
|
||||
(hp2.typ = ait_instruction) and
|
||||
(taicpu(hp2).ops = 2) and
|
||||
(taicpu(hp2).condition = C_None) and
|
||||
(
|
||||
(
|
||||
(taicpu(hp2).opcode in [FirstCode, SecondCode, ThirdCode, FourthCode]) and
|
||||
(taicpu(hp2).oppostfix = PF_None) and
|
||||
(taicpu(hp2).oper[1]^.reg = taicpu(p).oper[0]^.reg)
|
||||
{ Destination is allowed to be different in this case, but
|
||||
only if the source is no longer in use (it being the same as
|
||||
the source is covered by RegEndOfLife as well) }
|
||||
) or
|
||||
(
|
||||
{ STRB essentially fills the same role as the second UXTB
|
||||
as long as the register is deallocated afterwards }
|
||||
MatchInstruction(hp2, A_STR, [C_None], ValidPostFixes) and
|
||||
(taicpu(hp2).oper[0]^.reg = taicpu(p).oper[0]^.reg) and
|
||||
not RegInOp(taicpu(p).oper[0]^.reg, taicpu(hp2).oper[1]^)
|
||||
)
|
||||
) and
|
||||
RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp2))
|
||||
)
|
||||
) then
|
||||
begin
|
||||
DebugMsg(SPeepholeOptimization + 'S/Uxtb/hArithUxtb/h2ArithS/Uxtb/h done', p);
|
||||
Result := RemoveCurrentP(p);
|
||||
|
||||
{ Simplify bitwise constants if able }
|
||||
{$ifdef AARCH64}
|
||||
if (taicpu(hp1).opcode in [A_AND, A_ORR, A_EOR, A_BIC, A_ORN, A_EON]) and
|
||||
is_shifter_const(taicpu(hp1).oper[2]^.val and ConstLimit, OS_32) then
|
||||
{$else AARCH64}
|
||||
if (
|
||||
(ConstLimit = $FF) or
|
||||
(taicpu(hp1).oper[2]^.val <= $100)
|
||||
) and
|
||||
(taicpu(hp1).opcode in [A_AND, A_ORR, A_EOR, A_BIC, A_ORN]) then
|
||||
{$endif AARCH64}
|
||||
taicpu(hp1).oper[2]^.val := taicpu(hp1).oper[2]^.val and ConstLimit;
|
||||
end;
|
||||
{$ifndef AARCH64}
|
||||
else
|
||||
;
|
||||
end;
|
||||
{$endif not AARCH64}
|
||||
end;
|
||||
|
||||
|
||||
function TARMAsmOptimizer.OptPass1UXTB(var p : tai) : Boolean;
|
||||
var
|
||||
hp1, hp2: tai;
|
||||
@ -773,6 +893,8 @@ Implementation
|
||||
taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
|
||||
result:=RemoveCurrentP(p);
|
||||
end
|
||||
else if DoXTArithOp(p, hp1) then
|
||||
Result:=true
|
||||
{$ifdef AARCH64}
|
||||
else if USxtOp2Op(p,hp1,SM_UXTB) then
|
||||
Result:=true
|
||||
@ -862,6 +984,8 @@ Implementation
|
||||
taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
|
||||
result:=RemoveCurrentP(p);
|
||||
end
|
||||
else if DoXTArithOp(p, hp1) then
|
||||
Result:=true
|
||||
{$ifdef AARCH64}
|
||||
else if USxtOp2Op(p,hp1,SM_UXTH) then
|
||||
Result:=true
|
||||
@ -974,6 +1098,8 @@ Implementation
|
||||
taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
|
||||
result:=RemoveCurrentP(p);
|
||||
end
|
||||
else if DoXTArithOp(p, hp1) then
|
||||
Result:=true
|
||||
{$ifdef AARCH64}
|
||||
else if USxtOp2Op(p,hp1,SM_SXTB) then
|
||||
Result:=true
|
||||
@ -1094,6 +1220,8 @@ Implementation
|
||||
taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
|
||||
result:=RemoveCurrentP(p);
|
||||
end
|
||||
else if DoXTArithOp(p, hp1) then
|
||||
Result:=true
|
||||
{$ifdef AARCH64}
|
||||
else if USxtOp2Op(p,hp1,SM_SXTH) then
|
||||
Result:=true
|
||||
|
Loading…
Reference in New Issue
Block a user