mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-09-15 10:51:08 +02:00
* perform 4 instead of 8 byte copies at a time if source or dest is
unaligned * use integer instead of floating point for 8 byte copies because the integer unit has lower latency git-svn-id: trunk@9347 -
This commit is contained in:
parent
bc545b077f
commit
96180029b2
@ -1738,18 +1738,14 @@ end;
|
|||||||
|
|
||||||
{ ************* concatcopy ************ }
|
{ ************* concatcopy ************ }
|
||||||
|
|
||||||
const
|
|
||||||
maxmoveunit = 8;
|
|
||||||
|
|
||||||
|
|
||||||
procedure tcgppc.g_concatcopy(list: TAsmList; const source, dest: treference;
|
procedure tcgppc.g_concatcopy(list: TAsmList; const source, dest: treference;
|
||||||
len: aint);
|
len: aint);
|
||||||
|
|
||||||
var
|
var
|
||||||
countreg, tempreg: TRegister;
|
countreg, tempreg:TRegister;
|
||||||
src, dst: TReference;
|
src, dst: TReference;
|
||||||
lab: tasmlabel;
|
lab: tasmlabel;
|
||||||
count, count2: longint;
|
count, count2, step: longint;
|
||||||
size: tcgsize;
|
size: tcgsize;
|
||||||
|
|
||||||
begin
|
begin
|
||||||
@ -1759,7 +1755,8 @@ begin
|
|||||||
list.concat(tai_comment.create(strpnew('g_concatcopy1 ' + inttostr(len) + ' bytes left ')));
|
list.concat(tai_comment.create(strpnew('g_concatcopy1 ' + inttostr(len) + ' bytes left ')));
|
||||||
{$ENDIF extdebug}
|
{$ENDIF extdebug}
|
||||||
{ if the references are equal, exit, there is no need to copy anything }
|
{ if the references are equal, exit, there is no need to copy anything }
|
||||||
if (references_equal(source, dest)) then
|
if references_equal(source, dest) or
|
||||||
|
(len=0) then
|
||||||
exit;
|
exit;
|
||||||
|
|
||||||
{ make sure short loads are handled as optimally as possible;
|
{ make sure short loads are handled as optimally as possible;
|
||||||
@ -1768,7 +1765,7 @@ begin
|
|||||||
NOTE: maybe use some scratch registers to pair load/store instructions
|
NOTE: maybe use some scratch registers to pair load/store instructions
|
||||||
}
|
}
|
||||||
|
|
||||||
if (len <= maxmoveunit) then begin
|
if (len <= 8) then begin
|
||||||
src := source; dst := dest;
|
src := source; dst := dest;
|
||||||
{$IFDEF extdebug}
|
{$IFDEF extdebug}
|
||||||
list.concat(tai_comment.create(strpnew('g_concatcopy3 ' + inttostr(src.offset) + ' ' + inttostr(dst.offset))));
|
list.concat(tai_comment.create(strpnew('g_concatcopy3 ' + inttostr(src.offset) + ' ' + inttostr(dst.offset))));
|
||||||
@ -1798,16 +1795,29 @@ begin
|
|||||||
{$ENDIF extdebug}
|
{$ENDIF extdebug}
|
||||||
|
|
||||||
|
|
||||||
count := len div maxmoveunit;
|
if not(source.alignment in [1,2]) and
|
||||||
|
not(dest.alignment in [1,2]) then
|
||||||
|
begin
|
||||||
|
count:=len div 8;
|
||||||
|
step:=8;
|
||||||
|
size:=OS_64;
|
||||||
|
end
|
||||||
|
else
|
||||||
|
begin
|
||||||
|
count:=len div 4;
|
||||||
|
step:=4;
|
||||||
|
size:=OS_32;
|
||||||
|
end;
|
||||||
|
|
||||||
|
tempreg:=getintregister(list,size);
|
||||||
reference_reset(src);
|
reference_reset(src);
|
||||||
reference_reset(dst);
|
reference_reset(dst);
|
||||||
{ load the address of source into src.base }
|
{ load the address of source into src.base }
|
||||||
if (count > 4) or
|
if (count > 4) or
|
||||||
not issimpleref(source) or
|
not issimpleref(source) or
|
||||||
((source.index <> NR_NO) and
|
((source.index <> NR_NO) and
|
||||||
((source.offset + len) > high(smallint))) then begin
|
((source.offset + len) > high(smallint))) then begin
|
||||||
src.base := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
|
src.base := getaddressregister(list);
|
||||||
a_loadaddr_ref_reg(list, source, src.base);
|
a_loadaddr_ref_reg(list, source, src.base);
|
||||||
end else begin
|
end else begin
|
||||||
src := source;
|
src := source;
|
||||||
@ -1817,7 +1827,7 @@ begin
|
|||||||
not issimpleref(dest) or
|
not issimpleref(dest) or
|
||||||
((dest.index <> NR_NO) and
|
((dest.index <> NR_NO) and
|
||||||
((dest.offset + len) > high(smallint))) then begin
|
((dest.offset + len) > high(smallint))) then begin
|
||||||
dst.base := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
|
dst.base := getaddressregister(list);
|
||||||
a_loadaddr_ref_reg(list, dest, dst.base);
|
a_loadaddr_ref_reg(list, dest, dst.base);
|
||||||
end else begin
|
end else begin
|
||||||
dst := dest;
|
dst := dest;
|
||||||
@ -1826,64 +1836,63 @@ begin
|
|||||||
{ generate a loop }
|
{ generate a loop }
|
||||||
if count > 4 then begin
|
if count > 4 then begin
|
||||||
{ the offsets are zero after the a_loadaddress_ref_reg and just
|
{ the offsets are zero after the a_loadaddress_ref_reg and just
|
||||||
have to be set to 8. I put an Inc there so debugging may be
|
have to be set to step. I put an Inc there so debugging may be
|
||||||
easier (should offset be different from zero here, it will be
|
easier (should offset be different from zero here, it will be
|
||||||
easy to notice in the generated assembler }
|
easy to notice in the generated assembler }
|
||||||
inc(dst.offset, 8);
|
inc(dst.offset, step);
|
||||||
inc(src.offset, 8);
|
inc(src.offset, step);
|
||||||
list.concat(taicpu.op_reg_reg_const(A_SUBI, src.base, src.base, 8));
|
list.concat(taicpu.op_reg_reg_const(A_SUBI, src.base, src.base, step));
|
||||||
list.concat(taicpu.op_reg_reg_const(A_SUBI, dst.base, dst.base, 8));
|
list.concat(taicpu.op_reg_reg_const(A_SUBI, dst.base, dst.base, step));
|
||||||
countreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
|
countreg := getintregister(list, OS_INT);
|
||||||
a_load_const_reg(list, OS_64, count, countreg);
|
a_load_const_reg(list, OS_INT, count, countreg);
|
||||||
{ explicitely allocate F0 since it can be used safely here
|
|
||||||
(for holding date that's being copied) }
|
|
||||||
a_reg_alloc(list, NR_F0);
|
|
||||||
current_asmdata.getjumplabel(lab);
|
current_asmdata.getjumplabel(lab);
|
||||||
a_label(list, lab);
|
a_label(list, lab);
|
||||||
list.concat(taicpu.op_reg_reg_const(A_SUBIC_, countreg, countreg, 1));
|
list.concat(taicpu.op_reg_reg_const(A_SUBIC_, countreg, countreg, 1));
|
||||||
list.concat(taicpu.op_reg_ref(A_LFDU, NR_F0, src));
|
if (size=OS_64) then
|
||||||
list.concat(taicpu.op_reg_ref(A_STFDU, NR_F0, dst));
|
begin
|
||||||
|
list.concat(taicpu.op_reg_ref(A_LDU, tempreg, src));
|
||||||
|
list.concat(taicpu.op_reg_ref(A_STDU, tempreg, dst));
|
||||||
|
end
|
||||||
|
else
|
||||||
|
begin
|
||||||
|
list.concat(taicpu.op_reg_ref(A_LWZU, tempreg, src));
|
||||||
|
list.concat(taicpu.op_reg_ref(A_STWU, tempreg, dst));
|
||||||
|
end;
|
||||||
a_jmp(list, A_BC, C_NE, 0, lab);
|
a_jmp(list, A_BC, C_NE, 0, lab);
|
||||||
a_reg_dealloc(list, NR_F0);
|
a_reg_sync(list,src.base);
|
||||||
len := len mod 8;
|
a_reg_sync(list,dst.base);
|
||||||
|
a_reg_sync(list,countreg);
|
||||||
|
len := len mod step;
|
||||||
|
count := 0;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
count := len div 8;
|
|
||||||
{ unrolled loop }
|
{ unrolled loop }
|
||||||
if count > 0 then begin
|
if count > 0 then begin
|
||||||
a_reg_alloc(list, NR_F0);
|
|
||||||
for count2 := 1 to count do begin
|
for count2 := 1 to count do begin
|
||||||
a_loadfpu_ref_reg(list, OS_F64, OS_F64, src, NR_F0);
|
a_load_ref_reg(list, size, size, src, tempreg);
|
||||||
a_loadfpu_reg_ref(list, OS_F64, OS_F64, NR_F0, dst);
|
a_load_reg_ref(list, size, size, tempreg, dst);
|
||||||
inc(src.offset, 8);
|
inc(src.offset, step);
|
||||||
inc(dst.offset, 8);
|
inc(dst.offset, step);
|
||||||
end;
|
end;
|
||||||
a_reg_dealloc(list, NR_F0);
|
len := len mod step;
|
||||||
len := len mod 8;
|
|
||||||
end;
|
end;
|
||||||
|
|
||||||
if (len and 4) <> 0 then begin
|
if (len and 4) <> 0 then begin
|
||||||
a_reg_alloc(list, NR_R0);
|
a_load_ref_reg(list, OS_32, OS_32, src, tempreg);
|
||||||
a_load_ref_reg(list, OS_32, OS_32, src, NR_R0);
|
a_load_reg_ref(list, OS_32, OS_32, tempreg, dst);
|
||||||
a_load_reg_ref(list, OS_32, OS_32, NR_R0, dst);
|
|
||||||
inc(src.offset, 4);
|
inc(src.offset, 4);
|
||||||
inc(dst.offset, 4);
|
inc(dst.offset, 4);
|
||||||
a_reg_dealloc(list, NR_R0);
|
|
||||||
end;
|
end;
|
||||||
{ copy the leftovers }
|
{ copy the leftovers }
|
||||||
if (len and 2) <> 0 then begin
|
if (len and 2) <> 0 then begin
|
||||||
a_reg_alloc(list, NR_R0);
|
a_load_ref_reg(list, OS_16, OS_16, src, tempreg);
|
||||||
a_load_ref_reg(list, OS_16, OS_16, src, NR_R0);
|
a_load_reg_ref(list, OS_16, OS_16, tempreg, dst);
|
||||||
a_load_reg_ref(list, OS_16, OS_16, NR_R0, dst);
|
|
||||||
inc(src.offset, 2);
|
inc(src.offset, 2);
|
||||||
inc(dst.offset, 2);
|
inc(dst.offset, 2);
|
||||||
a_reg_dealloc(list, NR_R0);
|
|
||||||
end;
|
end;
|
||||||
if (len and 1) <> 0 then begin
|
if (len and 1) <> 0 then begin
|
||||||
a_reg_alloc(list, NR_R0);
|
a_load_ref_reg(list, OS_8, OS_8, src, tempreg);
|
||||||
a_load_ref_reg(list, OS_8, OS_8, src, NR_R0);
|
a_load_reg_ref(list, OS_8, OS_8, tempreg, dst);
|
||||||
a_load_reg_ref(list, OS_8, OS_8, NR_R0, dst);
|
|
||||||
a_reg_dealloc(list, NR_R0);
|
|
||||||
end;
|
end;
|
||||||
|
|
||||||
end;
|
end;
|
||||||
|
Loading…
Reference in New Issue
Block a user