mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-08-19 04:29:26 +02:00
* (modified) patch by J. Gareth Moreton: ARM/AArch64 Some short-range LDR/STR optimisations, last part of #38841
This commit is contained in:
parent
1e072aef31
commit
5762e687a3
@ -1115,7 +1115,7 @@ Implementation
|
|||||||
hp1: tai;
|
hp1: tai;
|
||||||
Reference: TReference;
|
Reference: TReference;
|
||||||
SizeMismatch: Boolean;
|
SizeMismatch: Boolean;
|
||||||
SrcReg: TRegister;
|
SrcReg, DstReg: TRegister;
|
||||||
NewOp: TAsmOp;
|
NewOp: TAsmOp;
|
||||||
begin
|
begin
|
||||||
Result := False;
|
Result := False;
|
||||||
@ -1130,16 +1130,14 @@ Implementation
|
|||||||
(hp1.typ = ait_instruction) and
|
(hp1.typ = ait_instruction) and
|
||||||
(taicpu(hp1).condition = C_None) and
|
(taicpu(hp1).condition = C_None) and
|
||||||
(taicpu(hp1).oppostfix = taicpu(p).oppostfix) then
|
(taicpu(hp1).oppostfix = taicpu(p).oppostfix) then
|
||||||
|
|
||||||
if GetNextInstruction(p, hp1) and
|
|
||||||
(hp1.typ = ait_instruction) and
|
|
||||||
(taicpu(hp1).condition = C_None) then
|
|
||||||
begin
|
begin
|
||||||
{ Saves constant dereferencing and makes it easier to change the size if necessary }
|
{ Saves constant dereferencing and makes it easier to change the size if necessary }
|
||||||
SrcReg := taicpu(p).oper[0]^.reg;
|
SrcReg := taicpu(p).oper[0]^.reg;
|
||||||
|
DstReg := taicpu(hp1).oper[0]^.reg;
|
||||||
|
|
||||||
if (taicpu(hp1).opcode = A_LDR) and
|
if (taicpu(hp1).opcode = A_LDR) and
|
||||||
RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) and
|
RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) and
|
||||||
|
(taicpu(hp1).oper[1]^.ref^.volatility=[]) and
|
||||||
(
|
(
|
||||||
(taicpu(hp1).oppostfix = taicpu(p).oppostfix) or
|
(taicpu(hp1).oppostfix = taicpu(p).oppostfix) or
|
||||||
((taicpu(p).oppostfix = PF_B) and (taicpu(hp1).oppostfix = PF_SB)) or
|
((taicpu(p).oppostfix = PF_B) and (taicpu(hp1).oppostfix = PF_SB)) or
|
||||||
@ -1157,7 +1155,7 @@ Implementation
|
|||||||
If reg1 <> reg2, replace ldr with "mov reg2,reg1"
|
If reg1 <> reg2, replace ldr with "mov reg2,reg1"
|
||||||
}
|
}
|
||||||
|
|
||||||
if (SrcReg = taicpu(hp1).oper[0]^.reg) and
|
if (SrcReg = DstReg) and
|
||||||
{ e.g. the ldrb in strb/ldrb is not a null operation as it clears the upper 24 bits }
|
{ e.g. the ldrb in strb/ldrb is not a null operation as it clears the upper 24 bits }
|
||||||
(taicpu(p).oppostfix=PF_None) then
|
(taicpu(p).oppostfix=PF_None) then
|
||||||
begin
|
begin
|
||||||
@ -1166,17 +1164,39 @@ Implementation
|
|||||||
Result := True;
|
Result := True;
|
||||||
Exit;
|
Exit;
|
||||||
end
|
end
|
||||||
else if (getregtype(taicpu(p).oper[0]^.reg) = R_INTREGISTER) and
|
else if (getregtype(SrcReg) = R_INTREGISTER) and
|
||||||
(getregtype(taicpu(hp1).oper[0]^.reg) = R_INTREGISTER) and
|
(getregtype(DstReg) = R_INTREGISTER) and
|
||||||
(getsubreg(taicpu(p).oper[0]^.reg) = getsubreg(taicpu(hp1).oper[0]^.reg)) then
|
(getsubreg(SrcReg) = getsubreg(DstReg)) then
|
||||||
begin
|
begin
|
||||||
NewOp:=A_NONE;
|
NewOp:=A_NONE;
|
||||||
if taicpu(hp1).oppostfix=PF_None then
|
if taicpu(hp1).oppostfix=PF_None then
|
||||||
NewOp:=A_MOV
|
NewOp:=A_MOV
|
||||||
else
|
else
|
||||||
{$ifndef AARCH64}
|
{$ifdef ARM}
|
||||||
if (current_settings.cputype >= cpu_armv6) then
|
if (current_settings.cputype < cpu_armv6) then
|
||||||
{$endif not AARCH64}
|
begin
|
||||||
|
{ The zero- and sign-extension operations were only
|
||||||
|
introduced under ARMv6 }
|
||||||
|
case taicpu(hp1).oppostfix of
|
||||||
|
PF_B:
|
||||||
|
begin
|
||||||
|
{ The if-block afterwards will set the middle operand to the correct register }
|
||||||
|
taicpu(hp1).allocate_oper(3);
|
||||||
|
taicpu(hp1).ops := 3;
|
||||||
|
taicpu(hp1).loadconst(2, $FF);
|
||||||
|
NewOp := A_AND;
|
||||||
|
end;
|
||||||
|
PF_H:
|
||||||
|
{ ARMv5 and under doesn't have a concise way of storing the immediate $FFFF, so leave alone };
|
||||||
|
PF_SB,
|
||||||
|
PF_SH:
|
||||||
|
{ Do nothing - can't easily encode sign-extensions };
|
||||||
|
else
|
||||||
|
InternalError(2021043002);
|
||||||
|
end;
|
||||||
|
end
|
||||||
|
else
|
||||||
|
{$endif ARM}
|
||||||
case taicpu(hp1).oppostfix of
|
case taicpu(hp1).oppostfix of
|
||||||
PF_B:
|
PF_B:
|
||||||
NewOp := A_UXTB;
|
NewOp := A_UXTB;
|
||||||
@ -1201,8 +1221,8 @@ Implementation
|
|||||||
|
|
||||||
taicpu(hp1).oppostfix := PF_None;
|
taicpu(hp1).oppostfix := PF_None;
|
||||||
taicpu(hp1).opcode := NewOp;
|
taicpu(hp1).opcode := NewOp;
|
||||||
taicpu(hp1).loadreg(1, taicpu(p).oper[0]^.reg);
|
taicpu(hp1).loadreg(1, SrcReg);
|
||||||
AllocRegBetween(taicpu(p).oper[0]^.reg, p, hp1, UsedRegs);
|
AllocRegBetween(SrcReg, p, hp1, UsedRegs);
|
||||||
Result := True;
|
Result := True;
|
||||||
Exit;
|
Exit;
|
||||||
end;
|
end;
|
||||||
@ -1218,7 +1238,7 @@ Implementation
|
|||||||
If reg1 <> reg2, delete the first str
|
If reg1 <> reg2, delete the first str
|
||||||
IF reg1 = reg2, delete the second str
|
IF reg1 = reg2, delete the second str
|
||||||
}
|
}
|
||||||
if SrcReg = taicpu(hp1).oper[0]^.reg then
|
if (SrcReg = DstReg) and (taicpu(hp1).oper[1]^.ref^.volatility=[]) then
|
||||||
begin
|
begin
|
||||||
DebugMsg(SPeepholeOptimization + 'Removed duplicate store instruction (store/store -> store/nop)', hp1);
|
DebugMsg(SPeepholeOptimization + 'Removed duplicate store instruction (store/store -> store/nop)', hp1);
|
||||||
RemoveInstruction(hp1);
|
RemoveInstruction(hp1);
|
||||||
@ -1227,7 +1247,8 @@ Implementation
|
|||||||
end
|
end
|
||||||
else if
|
else if
|
||||||
{ Registers same byte size? }
|
{ Registers same byte size? }
|
||||||
(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)] = tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]) then
|
(tcgsize2size[reg_cgsize(SrcReg)] = tcgsize2size[reg_cgsize(DstReg)]) and
|
||||||
|
(taicpu(p).oper[1]^.ref^.volatility=[]) then
|
||||||
begin
|
begin
|
||||||
DebugMsg(SPeepholeOptimization + 'Removed dominated store instruction (store/store -> nop/store)', p);
|
DebugMsg(SPeepholeOptimization + 'Removed dominated store instruction (store/store -> nop/store)', p);
|
||||||
RemoveCurrentP(p, hp1);
|
RemoveCurrentP(p, hp1);
|
||||||
|
Loading…
Reference in New Issue
Block a user