* (modified) patch by J. Gareth Moreton: ARM/AArch64 Some short-range LDR/STR optimisations, last part of #38841

This commit is contained in:
florian 2021-09-12 19:36:27 +02:00
parent 1e072aef31
commit 5762e687a3

View File

@ -1115,7 +1115,7 @@ Implementation
hp1: tai; hp1: tai;
Reference: TReference; Reference: TReference;
SizeMismatch: Boolean; SizeMismatch: Boolean;
SrcReg: TRegister; SrcReg, DstReg: TRegister;
NewOp: TAsmOp; NewOp: TAsmOp;
begin begin
Result := False; Result := False;
@ -1130,16 +1130,14 @@ Implementation
(hp1.typ = ait_instruction) and (hp1.typ = ait_instruction) and
(taicpu(hp1).condition = C_None) and (taicpu(hp1).condition = C_None) and
(taicpu(hp1).oppostfix = taicpu(p).oppostfix) then (taicpu(hp1).oppostfix = taicpu(p).oppostfix) then
if GetNextInstruction(p, hp1) and
(hp1.typ = ait_instruction) and
(taicpu(hp1).condition = C_None) then
begin begin
{ Saves constant dereferencing and makes it easier to change the size if necessary } { Saves constant dereferencing and makes it easier to change the size if necessary }
SrcReg := taicpu(p).oper[0]^.reg; SrcReg := taicpu(p).oper[0]^.reg;
DstReg := taicpu(hp1).oper[0]^.reg;
if (taicpu(hp1).opcode = A_LDR) and if (taicpu(hp1).opcode = A_LDR) and
RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) and RefsEqual(taicpu(hp1).oper[1]^.ref^, Reference) and
(taicpu(hp1).oper[1]^.ref^.volatility=[]) and
( (
(taicpu(hp1).oppostfix = taicpu(p).oppostfix) or (taicpu(hp1).oppostfix = taicpu(p).oppostfix) or
((taicpu(p).oppostfix = PF_B) and (taicpu(hp1).oppostfix = PF_SB)) or ((taicpu(p).oppostfix = PF_B) and (taicpu(hp1).oppostfix = PF_SB)) or
@ -1157,7 +1155,7 @@ Implementation
If reg1 <> reg2, replace ldr with "mov reg2,reg1" If reg1 <> reg2, replace ldr with "mov reg2,reg1"
} }
if (SrcReg = taicpu(hp1).oper[0]^.reg) and if (SrcReg = DstReg) and
{ e.g. the ldrb in strb/ldrb is not a null operation as it clears the upper 24 bits } { e.g. the ldrb in strb/ldrb is not a null operation as it clears the upper 24 bits }
(taicpu(p).oppostfix=PF_None) then (taicpu(p).oppostfix=PF_None) then
begin begin
@ -1166,17 +1164,39 @@ Implementation
Result := True; Result := True;
Exit; Exit;
end end
else if (getregtype(taicpu(p).oper[0]^.reg) = R_INTREGISTER) and else if (getregtype(SrcReg) = R_INTREGISTER) and
(getregtype(taicpu(hp1).oper[0]^.reg) = R_INTREGISTER) and (getregtype(DstReg) = R_INTREGISTER) and
(getsubreg(taicpu(p).oper[0]^.reg) = getsubreg(taicpu(hp1).oper[0]^.reg)) then (getsubreg(SrcReg) = getsubreg(DstReg)) then
begin begin
NewOp:=A_NONE; NewOp:=A_NONE;
if taicpu(hp1).oppostfix=PF_None then if taicpu(hp1).oppostfix=PF_None then
NewOp:=A_MOV NewOp:=A_MOV
else else
{$ifndef AARCH64} {$ifdef ARM}
if (current_settings.cputype >= cpu_armv6) then if (current_settings.cputype < cpu_armv6) then
{$endif not AARCH64} begin
{ The zero- and sign-extension operations were only
introduced under ARMv6 }
case taicpu(hp1).oppostfix of
PF_B:
begin
{ The if-block afterwards will set the middle operand to the correct register }
taicpu(hp1).allocate_oper(3);
taicpu(hp1).ops := 3;
taicpu(hp1).loadconst(2, $FF);
NewOp := A_AND;
end;
PF_H:
{ ARMv5 and under doesn't have a concise way of storing the immediate $FFFF, so leave alone };
PF_SB,
PF_SH:
{ Do nothing - can't easily encode sign-extensions };
else
InternalError(2021043002);
end;
end
else
{$endif ARM}
case taicpu(hp1).oppostfix of case taicpu(hp1).oppostfix of
PF_B: PF_B:
NewOp := A_UXTB; NewOp := A_UXTB;
@ -1201,8 +1221,8 @@ Implementation
taicpu(hp1).oppostfix := PF_None; taicpu(hp1).oppostfix := PF_None;
taicpu(hp1).opcode := NewOp; taicpu(hp1).opcode := NewOp;
taicpu(hp1).loadreg(1, taicpu(p).oper[0]^.reg); taicpu(hp1).loadreg(1, SrcReg);
AllocRegBetween(taicpu(p).oper[0]^.reg, p, hp1, UsedRegs); AllocRegBetween(SrcReg, p, hp1, UsedRegs);
Result := True; Result := True;
Exit; Exit;
end; end;
@ -1218,7 +1238,7 @@ Implementation
If reg1 <> reg2, delete the first str If reg1 <> reg2, delete the first str
IF reg1 = reg2, delete the second str IF reg1 = reg2, delete the second str
} }
if SrcReg = taicpu(hp1).oper[0]^.reg then if (SrcReg = DstReg) and (taicpu(hp1).oper[1]^.ref^.volatility=[]) then
begin begin
DebugMsg(SPeepholeOptimization + 'Removed duplicate store instruction (store/store -> store/nop)', hp1); DebugMsg(SPeepholeOptimization + 'Removed duplicate store instruction (store/store -> store/nop)', hp1);
RemoveInstruction(hp1); RemoveInstruction(hp1);
@ -1227,7 +1247,8 @@ Implementation
end end
else if else if
{ Registers same byte size? } { Registers same byte size? }
(tcgsize2size[reg_cgsize(taicpu(p).oper[0]^.reg)] = tcgsize2size[reg_cgsize(taicpu(hp1).oper[0]^.reg)]) then (tcgsize2size[reg_cgsize(SrcReg)] = tcgsize2size[reg_cgsize(DstReg)]) and
(taicpu(p).oper[1]^.ref^.volatility=[]) then
begin begin
DebugMsg(SPeepholeOptimization + 'Removed dominated store instruction (store/store -> nop/store)', p); DebugMsg(SPeepholeOptimization + 'Removed dominated store instruction (store/store -> nop/store)', p);
RemoveCurrentP(p, hp1); RemoveCurrentP(p, hp1);