+ Xtensa: tcgcpu.g_concatcopy

git-svn-id: trunk@44350 -
This commit is contained in:
florian 2020-03-23 20:54:57 +00:00
parent 46e8cb3e49
commit b1759d9e91

View File

@ -37,7 +37,8 @@ interface
type
tcgcpu=class(tcg)
private
procedure fixref(list : TAsmList; var ref : treference);
procedure fixref(list : TAsmList; var ref : treference);
procedure g_concatcopy_move(list : tasmlist; const Source,dest : treference; len : tcgint);
public
procedure init_register_allocators;override;
procedure done_register_allocators;override;
@ -65,7 +66,7 @@ interface
procedure a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel);override;
procedure a_jmp_always(list: TAsmList; l: TAsmLabel);override;
procedure g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);override;
procedure g_concatcopy(list : TAsmList; const source,dest : treference; len : tcgint);override;
procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
{$ifdef dummy}
@ -595,10 +596,143 @@ implementation
end;
procedure tcgcpu.g_concatcopy(list : TAsmList; const source,
dest : treference; len : tcgint);
procedure tcgcpu.g_concatcopy_move(list: tasmlist; const Source, dest: treference; len: tcgint);
var
paraloc1, paraloc2, paraloc3: TCGPara;
pd: tprocdef;
begin
list.Concat(taicpu.op_none(A_NOP));
pd:=search_system_proc('MOVE');
paraloc1.init;
paraloc2.init;
paraloc3.init;
paramanager.getcgtempparaloc(list, pd, 1, paraloc1);
paramanager.getcgtempparaloc(list, pd, 2, paraloc2);
paramanager.getcgtempparaloc(list, pd, 3, paraloc3);
a_load_const_cgpara(list, OS_SINT, len, paraloc3);
a_loadaddr_ref_cgpara(list, dest, paraloc2);
a_loadaddr_ref_cgpara(list, Source, paraloc1);
paramanager.freecgpara(list, paraloc3);
paramanager.freecgpara(list, paraloc2);
paramanager.freecgpara(list, paraloc1);
alloccpuregisters(list, R_INTREGISTER, paramanager.get_volatile_registers_int(pocall_default));
alloccpuregisters(list, R_FPUREGISTER, paramanager.get_volatile_registers_fpu(pocall_default));
a_call_name(list, 'FPC_MOVE', false);
dealloccpuregisters(list, R_FPUREGISTER, paramanager.get_volatile_registers_fpu(pocall_default));
dealloccpuregisters(list, R_INTREGISTER, paramanager.get_volatile_registers_int(pocall_default));
paraloc3.done;
paraloc2.done;
paraloc1.done;
end;
procedure tcgcpu.g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);
var
tmpreg1, hreg, countreg: TRegister;
src, dst, src2, dst2: TReference;
lab: tasmlabel;
Count, count2: aint;
function reference_is_reusable(const ref: treference): boolean;
begin
result:=(ref.base<>NR_NO) and (ref.index=NR_NO) and
(ref.symbol=nil);
end;
begin
src2:=source;
fixref(list,src2);
dst2:=dest;
fixref(list,dst2);
if len > high(longint) then
internalerror(2002072704);
{ A call (to FPC_MOVE) requires the outgoing parameter area to be properly
allocated on stack. This can only be done before tmipsprocinfo.set_first_temp_offset,
i.e. before secondpass. Other internal procedures request correct stack frame
by setting pi_do_call during firstpass, but for this particular one it is impossible.
Therefore, if the current procedure is a leaf one, we have to leave it that way. }
{ anybody wants to determine a good value here :)? }
if (len > 100) and
assigned(current_procinfo) and
(pi_do_call in current_procinfo.flags) then
g_concatcopy_move(list, src2, dst2, len)
else
begin
Count := len div 4;
if (count<=4) and reference_is_reusable(src2) then
src:=src2
else
begin
reference_reset(src,sizeof(aint),[]);
{ load the address of src2 into src.base }
src.base := GetAddressRegister(list);
a_loadaddr_ref_reg(list, src2, src.base);
end;
if (count<=4) and reference_is_reusable(dst2) then
dst:=dst2
else
begin
reference_reset(dst,sizeof(aint),[]);
{ load the address of dst2 into dst.base }
dst.base := GetAddressRegister(list);
a_loadaddr_ref_reg(list, dst2, dst.base);
end;
{ generate a loop }
if Count > 4 then
begin
countreg := GetIntRegister(list, OS_INT);
tmpreg1 := GetIntRegister(list, OS_INT);
a_load_const_reg(list, OS_INT, Count, countreg);
current_asmdata.getjumplabel(lab);
a_label(list, lab);
list.concat(taicpu.op_reg_ref(A_L32I, tmpreg1, src));
list.concat(taicpu.op_reg_ref(A_S32I, tmpreg1, dst));
list.concat(taicpu.op_reg_reg_const(A_ADDI, src.base, src.base, 4));
list.concat(taicpu.op_reg_reg_const(A_ADDI, dst.base, dst.base, 4));
list.concat(taicpu.op_reg_reg_const(A_ADDI, countreg, countreg, -1));
a_cmp_const_reg_label(list,OS_INT,OC_GT,0,countreg,lab);
len := len mod 4;
end;
{ unrolled loop }
Count := len div 4;
if Count > 0 then
begin
tmpreg1 := GetIntRegister(list, OS_INT);
for count2 := 1 to Count do
begin
list.concat(taicpu.op_reg_ref(A_L32I, tmpreg1, src));
list.concat(taicpu.op_reg_ref(A_S32I, tmpreg1, dst));
Inc(src.offset, 4);
Inc(dst.offset, 4);
end;
len := len mod 4;
end;
if (len and 4) <> 0 then
begin
hreg := GetIntRegister(list, OS_INT);
a_load_ref_reg(list, OS_32, OS_32, src, hreg);
a_load_reg_ref(list, OS_32, OS_32, hreg, dst);
Inc(src.offset, 4);
Inc(dst.offset, 4);
end;
{ copy the leftovers }
if (len and 2) <> 0 then
begin
hreg := GetIntRegister(list, OS_INT);
a_load_ref_reg(list, OS_16, OS_16, src, hreg);
a_load_reg_ref(list, OS_16, OS_16, hreg, dst);
Inc(src.offset, 2);
Inc(dst.offset, 2);
end;
if (len and 1) <> 0 then
begin
hreg := GetIntRegister(list, OS_INT);
a_load_ref_reg(list, OS_8, OS_8, src, hreg);
a_load_reg_ref(list, OS_8, OS_8, hreg, dst);
end;
end;
end;