* arm / a64: New optimisation for removing the number of necessary S/UXTB/H instructions

This commit is contained in:
J. Gareth "Curious Kit" Moreton 2024-05-14 02:22:00 +01:00 committed by FPK
parent b1a9150160
commit 096d5f50b2

View File

@ -60,6 +60,9 @@ Type
function OptPass2Bitwise(var p: tai): Boolean;
function OptPass2TST(var p: tai): Boolean;
protected
function DoXTArithOp(var p: tai; hp1: tai): Boolean;
End;
function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
@ -675,6 +678,123 @@ Implementation
end;
function TARMAsmOptimizer.DoXTArithOp(var p: tai; hp1: tai): Boolean;
var
hp2: tai;
ConstLimit: TCGInt;
ValidPostFixes: TOpPostFixes;
FirstCode, SecondCode, ThirdCode, FourthCode: TAsmOp;
begin
Result := False;
{ Change:
uxtb/h reg1,reg1
(operation on reg1 with immediate operand where the upper 24/56
bits don't affect the state of the first 8 bits )
uxtb/h reg1,reg1
Remove first uxtb/h
}
case taicpu(p).opcode of
A_UXTB,
A_SXTB:
begin
ConstLimit := $FF;
ValidPostFixes := [PF_B];
FirstCode := A_UXTB;
SecondCode := A_SXTB;
ThirdCode := A_UXTB; { Used to indicate no other valid codes }
FourthCode := A_SXTB;
end;
A_UXTH,
A_SXTH:
begin
ConstLimit := $FFFF;
ValidPostFixes := [PF_B, PF_H];
FirstCode := A_UXTH;
SecondCode := A_SXTH;
ThirdCode := A_UXTB;
FourthCode := A_SXTB;
end;
else
InternalError(2024051401);
end;
{$ifndef AARCH64}
{ Regular ARM doesn't have the multi-instruction MatchInstruction available }
if (hp1.typ = ait_instruction) and (taicpu(hp1).oppostfix = PF_None) then
case taicpu(hp1).opcode of
A_ADD, A_SUB, A_MUL, A_LSL, A_AND, A_ORR, A_EOR, A_BIC, A_ORN:
{$endif AARCH64}
if
(taicpu(p).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
{$ifdef AARCH64}
MatchInstruction(hp1, [A_ADD, A_SUB, A_MUL, A_LSL, A_AND, A_ORR, A_EOR, A_BIC, A_ORN, A_EON], [PF_None]) and
{$endif AARCH64}
(taicpu(hp1).condition = C_None) and
(taicpu(hp1).ops = 3) and
(taicpu(hp1).oper[0]^.reg = taicpu(p).oper[0]^.reg) and
(taicpu(hp1).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
(taicpu(hp1).oper[2]^.typ = top_const) and
(
(
{ If the AND immediate is 8-bit, then this essentially performs
the functionality of the second UXTB and so its presence is
not required }
(taicpu(hp1).opcode = A_AND) and
(taicpu(hp1).oper[2]^.val >= 0) and
(taicpu(hp1).oper[2]^.val <= ConstLimit)
) or
(
GetNextInstructionUsingReg(hp1,hp2,taicpu(p).oper[0]^.reg) and
(hp2.typ = ait_instruction) and
(taicpu(hp2).ops = 2) and
(taicpu(hp2).condition = C_None) and
(
(
(taicpu(hp2).opcode in [FirstCode, SecondCode, ThirdCode, FourthCode]) and
(taicpu(hp2).oppostfix = PF_None) and
(taicpu(hp2).oper[1]^.reg = taicpu(p).oper[0]^.reg)
{ Destination is allowed to be different in this case, but
only if the source is no longer in use (it being the same as
the source is covered by RegEndOfLife as well) }
) or
(
{ STRB essentially fills the same role as the second UXTB
as long as the register is deallocated afterwards }
MatchInstruction(hp2, A_STR, [C_None], ValidPostFixes) and
(taicpu(hp2).oper[0]^.reg = taicpu(p).oper[0]^.reg) and
not RegInOp(taicpu(p).oper[0]^.reg, taicpu(hp2).oper[1]^)
)
) and
RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp2))
)
) then
begin
DebugMsg(SPeepholeOptimization + 'S/Uxtb/hArithUxtb/h2ArithS/Uxtb/h done', p);
Result := RemoveCurrentP(p);
{ Simplify bitwise constants if able }
{$ifdef AARCH64}
if (taicpu(hp1).opcode in [A_AND, A_ORR, A_EOR, A_BIC, A_ORN, A_EON]) and
is_shifter_const(taicpu(hp1).oper[2]^.val and ConstLimit, OS_32) then
{$else AARCH64}
if (
(ConstLimit = $FF) or
(taicpu(hp1).oper[2]^.val <= $100)
) and
(taicpu(hp1).opcode in [A_AND, A_ORR, A_EOR, A_BIC, A_ORN]) then
{$endif AARCH64}
taicpu(hp1).oper[2]^.val := taicpu(hp1).oper[2]^.val and ConstLimit;
end;
{$ifndef AARCH64}
else
;
end;
{$endif not AARCH64}
end;
function TARMAsmOptimizer.OptPass1UXTB(var p : tai) : Boolean;
var
hp1, hp2: tai;
@ -773,6 +893,8 @@ Implementation
taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
result:=RemoveCurrentP(p);
end
else if DoXTArithOp(p, hp1) then
Result:=true
{$ifdef AARCH64}
else if USxtOp2Op(p,hp1,SM_UXTB) then
Result:=true
@ -862,6 +984,8 @@ Implementation
taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
result:=RemoveCurrentP(p);
end
else if DoXTArithOp(p, hp1) then
Result:=true
{$ifdef AARCH64}
else if USxtOp2Op(p,hp1,SM_UXTH) then
Result:=true
@ -974,6 +1098,8 @@ Implementation
taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
result:=RemoveCurrentP(p);
end
else if DoXTArithOp(p, hp1) then
Result:=true
{$ifdef AARCH64}
else if USxtOp2Op(p,hp1,SM_SXTB) then
Result:=true
@ -1094,6 +1220,8 @@ Implementation
taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
result:=RemoveCurrentP(p);
end
else if DoXTArithOp(p, hp1) then
Result:=true
{$ifdef AARCH64}
else if USxtOp2Op(p,hp1,SM_SXTH) then
Result:=true