mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-19 07:19:26 +02:00
* patch by Nico Erfurth:
Reorder unaligned Load sequence on ARM The old version produced code like that: ldrb rDEST, [rBASE] ldrb rTemp, [rBASE, #1] orr rDEST, rDEST, rTEMP lsl #8 (2 stall cycles) ldrb rTemp, [rBASE, #2] orr rDEST, rDEST, rTEMP lsl #16 (2 stall cycles) ldrb rTemp, [rBASE, #3] orr rDEST, rDEST, rTEMP lsl #24 (2 stall cycles) This creates a lot of stall-cycles on ARM Implementations with load delay slots like Marvel Kirkwood or Intel XScale. With the usual up to 2 stall-cycles this code requires a total of 13 cycles (7 instructions + 6 stall cycles) in best case. The new code uses a second temp register to avoid the stall cycles. ldrb rDEST, [rBASE] ldrb rTemp1, [rBASE, #1] ldrb rTemp2, [rBASE, #2] orr rDEST, rDEST, rTEMP1 lsl #8 ldrb rTemp1, [rBASE, #3] orr rDEST, rDEST, rTEMP2 lsl #16 orr rDEST, rDEST, rTEMP1 lsl #24 (1 stall cycle) The rescheduling and second register bring the total cycles down to 8. If a later rescheduling should happen for the last orr it even can go down to 7. git-svn-id: trunk@21363 -
This commit is contained in:
parent
dc03282cb7
commit
c75486db89
@ -177,7 +177,7 @@ unit cgcpu;
|
||||
|
||||
|
||||
uses
|
||||
globals,verbose,systems,cutils,
|
||||
globals,verbose,systems,cutils,sysutils,
|
||||
aopt,aoptcpu,
|
||||
fmodule,
|
||||
symconst,symsym,
|
||||
@ -388,19 +388,26 @@ unit cgcpu;
|
||||
end
|
||||
else
|
||||
begin
|
||||
tmpreg2:=getintregister(list,OS_INT);
|
||||
if target_info.endian=endian_big then
|
||||
inc(usedtmpref.offset,3);
|
||||
a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,reg);
|
||||
|
||||
inc(usedtmpref.offset,dir);
|
||||
a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
|
||||
|
||||
inc(usedtmpref.offset,dir);
|
||||
a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg2);
|
||||
|
||||
so.shiftimm:=8;
|
||||
list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
|
||||
|
||||
inc(usedtmpref.offset,dir);
|
||||
a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
|
||||
|
||||
so.shiftimm:=16;
|
||||
list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
|
||||
inc(usedtmpref.offset,dir);
|
||||
a_internal_load_ref_reg(list,OS_8,OS_8,usedtmpref,tmpreg);
|
||||
list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg2,so));
|
||||
|
||||
so.shiftimm:=24;
|
||||
list.concat(taicpu.op_reg_reg_reg_shifterop(A_ORR,reg,reg,tmpreg,so));
|
||||
end;
|
||||
@ -706,7 +713,7 @@ unit cgcpu;
|
||||
OP_SAR:
|
||||
begin
|
||||
if a>32 then
|
||||
internalerror(200308295);
|
||||
internalerror(200308298);
|
||||
if a<>0 then
|
||||
begin
|
||||
shifterop_reset(so);
|
||||
@ -1081,7 +1088,7 @@ unit cgcpu;
|
||||
OS_F32:
|
||||
oppostfix:=PF_None;
|
||||
else
|
||||
InternalError(200308295);
|
||||
InternalError(200308299);
|
||||
end;
|
||||
if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[tosize]) then
|
||||
begin
|
||||
|
Loading…
Reference in New Issue
Block a user