m68k: reworked cg.g_concatcopy. generates shorter and faster code in most cases now.

git-svn-id: trunk@33575 -
This commit is contained in:
Károly Balogh 2016-04-30 00:15:34 +00:00
parent adbef4fc84
commit f48747adf5

View File

@ -1485,108 +1485,104 @@ unit cgcpu;
procedure tcg68k.g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint); procedure tcg68k.g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);
const
lentocgsize: array[1..4] of tcgsize = (OS_8,OS_16,OS_NO,OS_32);
var var
helpsize : longint; helpsize : longint;
i : byte; i : byte;
hregister : tregister; hregister : tregister;
iregister : tregister; iregister : tregister;
jregister : tregister; jregister : tregister;
hp1 : treference;
hp2 : treference;
hl : tasmlabel; hl : tasmlabel;
srcrefp,dstrefp : treference;
srcref,dstref : treference; srcref,dstref : treference;
begin begin
if (len in [1,2,4]) and (current_settings.cputype <> cpu_mc68000) then
begin
//list.concat(tai_comment.create(strpnew('g_concatcopy: small')));
a_load_ref_ref(list,lentocgsize[len],lentocgsize[len],source,dest);
exit;
end;
//list.concat(tai_comment.create(strpnew('g_concatcopy')));
hregister := getintregister(list,OS_INT); hregister := getintregister(list,OS_INT);
{ from 12 bytes movs is being used } iregister:=getaddressregister(list);
if ((len<=8) or (not(cs_opt_size in current_settings.optimizerswitches) and (len<=12))) then reference_reset_base(srcref,iregister,0,source.alignment);
srcrefp:=srcref;
srcrefp.direction := dir_inc;
jregister:=getaddressregister(list);
reference_reset_base(dstref,jregister,0,dest.alignment);
dstrefp:=dstref;
dstrefp.direction := dir_inc;
{ iregister = source }
{ jregister = destination }
a_loadaddr_ref_reg(list,source,iregister);
a_loadaddr_ref_reg(list,dest,jregister);
if (current_settings.cputype <> cpu_mc68000) then
begin begin
srcref := source; if not ((len<=8) or (not(cs_opt_size in current_settings.optimizerswitches) and (len<=16))) then
dstref := dest; begin
helpsize:=len div 4; //list.concat(tai_comment.create(strpnew('g_concatcopy tight copy loop 020+')));
{ move a dword x times } helpsize := len - len mod 4;
for i:=1 to helpsize do len := len mod 4;
begin a_load_const_reg(list,OS_INT,(helpsize div 4)-1,hregister);
a_load_ref_reg(list,OS_INT,OS_INT,srcref,hregister); current_asmdata.getjumplabel(hl);
a_load_reg_ref(list,OS_INT,OS_INT,hregister,dstref); a_label(list,hl);
inc(srcref.offset,4); list.concat(taicpu.op_ref_ref(A_MOVE,S_L,srcrefp,dstrefp));
inc(dstref.offset,4); if (current_settings.cputype in cpu_coldfire) or ((helpsize div 4)-1 > high(smallint)) then
dec(len,4); begin
end; { Coldfire does not support DBRA, also it is word only }
{ move a word } list.concat(taicpu.op_const_reg(A_SUBQ,S_L,1,hregister));
if len>1 then list.concat(taicpu.op_sym(A_BPL,S_NO,hl));
begin end
a_load_ref_reg(list,OS_16,OS_16,srcref,hregister); else
a_load_reg_ref(list,OS_16,OS_16,hregister,dstref); list.concat(taicpu.op_reg_sym(A_DBRA,S_NO,hregister,hl));
inc(srcref.offset,2); end;
inc(dstref.offset,2); helpsize:=len div 4;
dec(len,2); { move a dword x times }
end; for i:=1 to helpsize do
{ move a single byte } begin
if len>0 then dec(len,4);
begin if (len > 0) then
a_load_ref_reg(list,OS_8,OS_8,srcref,hregister); list.concat(taicpu.op_ref_ref(A_MOVE,S_L,srcrefp,dstrefp))
a_load_reg_ref(list,OS_8,OS_8,hregister,dstref); else
end list.concat(taicpu.op_ref_ref(A_MOVE,S_L,srcref,dstref));
end;
{ move a word }
if len>1 then
begin
dec(len,2);
if (len > 0) then
list.concat(taicpu.op_ref_ref(A_MOVE,S_W,srcrefp,dstrefp))
else
list.concat(taicpu.op_ref_ref(A_MOVE,S_W,srcref,dstref));
end;
{ move a single byte }
if len>0 then
list.concat(taicpu.op_ref_ref(A_MOVE,S_B,srcref,dstref));
end end
else else
begin begin
iregister:=getaddressregister(list); { Fast 68010 loop mode with no possible alignment problems }
jregister:=getaddressregister(list); //list.concat(tai_comment.create(strpnew('g_concatcopy tight byte copy loop')));
{ reference for move (An)+,(An)+ } a_load_const_reg(list,OS_INT,len - 1,hregister);
reference_reset(hp1,source.alignment); current_asmdata.getjumplabel(hl);
hp1.base := iregister; { source register } a_label(list,hl);
hp1.direction := dir_inc; list.concat(taicpu.op_ref_ref(A_MOVE,S_B,srcrefp,dstrefp));
reference_reset(hp2,dest.alignment); if (len - 1) > high(smallint) then
hp2.base := jregister; begin
hp2.direction := dir_inc; list.concat(taicpu.op_const_reg(A_SUBQ,S_L,1,hregister));
{ iregister = source } list.concat(taicpu.op_sym(A_BPL,S_NO,hl));
{ jregister = destination } end
else
a_loadaddr_ref_reg(list,source,iregister); list.concat(taicpu.op_reg_sym(A_DBRA,S_L,hregister,hl));
a_loadaddr_ref_reg(list,dest,jregister);
{ double word move only on 68020+ machines }
{ because of possible alignment problems }
{ use fast loop mode }
if (current_settings.cputype=cpu_MC68020) then
begin
//list.concat(tai_comment.create(strpnew('g_concatcopy tight copy loop 020+')));
helpsize := len - len mod 4;
len := len mod 4;
a_load_const_reg(list,OS_INT,(helpsize div 4)-1,hregister);
current_asmdata.getjumplabel(hl);
a_label(list,hl);
list.concat(taicpu.op_ref_ref(A_MOVE,S_L,hp1,hp2));
list.concat(taicpu.op_reg_sym(A_DBRA,S_L,hregister,hl));
if len > 1 then
begin
dec(len,2);
list.concat(taicpu.op_ref_ref(A_MOVE,S_W,hp1,hp2));
end;
if len = 1 then
list.concat(taicpu.op_ref_ref(A_MOVE,S_B,hp1,hp2));
end
else
begin
{ Fast 68010 loop mode with no possible alignment problems }
//list.concat(tai_comment.create(strpnew('g_concatcopy tight byte copy loop')));
a_load_const_reg(list,OS_INT,len - 1,hregister);
current_asmdata.getjumplabel(hl);
a_label(list,hl);
list.concat(taicpu.op_ref_ref(A_MOVE,S_B,hp1,hp2));
if current_settings.cputype in cpu_coldfire then
begin
{ Coldfire does not support DBRA }
list.concat(taicpu.op_const_reg(A_SUBQ,S_L,1,hregister));
list.concat(taicpu.op_sym(A_BPL,S_NO,hl));
end
else
list.concat(taicpu.op_reg_sym(A_DBRA,S_L,hregister,hl));
end;
end; end;
end; end;
procedure tcg68k.g_overflowcheck(list: TAsmList; const l:tlocation; def:tdef); procedure tcg68k.g_overflowcheck(list: TAsmList; const l:tlocation; def:tdef);
var var