+ Support (as target-independent as possible) optimization of division by constants:

The code generator gets two new methods, a_mul_reg_reg_pair and g_div_const_reg_reg. The first one is basically 32x32 to 64 bits multiplication (or any other size, with result having twice the size of arguments), which must be implemented for every target. The second one actually does the job, its default implementation taken from powerpc64 and is sufficiently good for all three-address targets.

+ Enabled optimized division for MIPS target, target-specific changes are under 30 lines.

git-svn-id: trunk@27904 -
This commit is contained in:
sergei 2014-06-08 22:50:24 +00:00
parent 73d7f2aa18
commit cd27d64cd5
3 changed files with 103 additions and 9 deletions

View File

@ -249,6 +249,10 @@ unit cgobj;
{ bit scan instructions }
procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tcgsize; src, dst: TRegister); virtual; abstract;
{ Multiplication with doubling result size.
dstlo or dsthi may be NR_NO, in which case corresponding half of result is discarded. }
procedure a_mul_reg_reg_pair(list: TAsmList; size: tcgsize; src1,src2,dstlo,dsthi: TRegister);virtual;
{ fpu move instructions }
procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize:tcgsize; reg1, reg2: tregister); virtual; abstract;
procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); virtual; abstract;
@ -458,6 +462,9 @@ unit cgobj;
{ Generate code to exit an unwind-protected region. The default implementation
produces a simple jump to destination label. }
procedure g_local_unwind(list: TAsmList; l: TAsmLabel);virtual;
{ Generate code for integer division by constant,
generic version is suitable for 3-address CPUs }
procedure g_div_const_reg_reg(list:tasmlist; size: TCgSize; a: tcgint; src,dst: tregister); virtual;
protected
function g_indirect_sym_load(list:TAsmList;const symname: string; const flags: tindsymflags): tregister;virtual;
@ -2501,6 +2508,69 @@ implementation
internalerror(200807238);
end;
procedure tcg.a_mul_reg_reg_pair(list: TAsmList; size: TCgSize; src1,src2,dstlo,dsthi: TRegister);
begin
internalerror(2014060801);
end;
procedure tcg.g_div_const_reg_reg(list:tasmlist; size: TCgSize; a: tcgint; src,dst: tregister);
var
divreg: tregister;
magic: aInt;
u_magic: aWord;
u_shift: byte;
u_add: boolean;
begin
divreg:=getintregister(list,OS_INT);
if (size in [OS_S32,OS_S64]) then
begin
calc_divconst_magic_signed(tcgsize2size[size]*8,a,magic,u_shift);
{ load magic value }
a_load_const_reg(list,OS_INT,magic,divreg);
{ multiply, discarding low bits }
a_mul_reg_reg_pair(list,size,src,divreg,NR_NO,dst);
{ add/subtract numerator }
if (a>0) and (magic<0) then
a_op_reg_reg_reg(list,OP_ADD,OS_INT,src,dst,dst)
else if (a<0) and (magic>0) then
a_op_reg_reg_reg(list,OP_SUB,OS_INT,src,dst,dst);
{ shift shift places to the right (arithmetic) }
a_op_const_reg_reg(list,OP_SAR,OS_INT,u_shift,dst,dst);
{ extract and add sign bit }
if (a>=0) then
a_op_const_reg_reg(list,OP_SHR,OS_INT,tcgsize2size[size]*8-1,src,divreg)
else
a_op_const_reg_reg(list,OP_SHR,OS_INT,tcgsize2size[size]*8-1,dst,divreg);
a_op_reg_reg_reg(list,OP_ADD,OS_INT,dst,divreg,dst);
end
else if (size in [OS_32,OS_64]) then
begin
calc_divconst_magic_unsigned(tcgsize2size[size]*8,a,u_magic,u_add,u_shift);
{ load magic in divreg }
a_load_const_reg(list,OS_INT,tcgint(u_magic),divreg);
{ multiply, discarding low bits }
a_mul_reg_reg_pair(list,size,src,divreg,NR_NO,dst);
if (u_add) then
begin
{ Calculate "(numerator+result) shr u_shift", avoiding possible overflow }
a_op_reg_reg_reg(list,OP_SUB,OS_INT,dst,src,divreg);
{ divreg=(numerator-result) }
a_op_const_reg_reg(list,OP_SHR,OS_INT,1,divreg,divreg);
{ divreg=(numerator-result)/2 }
a_op_reg_reg_reg(list,OP_ADD,OS_INT,divreg,dst,divreg);
{ divreg=(numerator+result)/2, already shifted by 1, so decrease u_shift. }
a_op_const_reg_reg(list,OP_SHR,OS_INT,u_shift-1,divreg,dst);
end
else
a_op_const_reg_reg(list,OP_SHR,OS_INT,u_shift,dst,dst);
end
else
InternalError(2014060601);
end;
{*****************************************************************************
TCG64
*****************************************************************************}

View File

@ -76,6 +76,7 @@ type
procedure g_flags2reg(list: tasmlist; size: TCgSize; const f: TResFlags; reg: tregister); override;
procedure a_jmp_always(List: tasmlist; l: TAsmLabel); override;
procedure a_jmp_name(list: tasmlist; const s: string); override;
procedure a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister); override;
procedure g_overflowCheck(List: tasmlist; const Loc: TLocation; def: TDef); override;
procedure g_overflowCheck_loc(List: tasmlist; const Loc: TLocation; def: TDef; ovloc: tlocation); override;
procedure g_proc_entry(list: tasmlist; localsize: longint; nostackframe: boolean); override;
@ -1154,6 +1155,24 @@ procedure TCGMIPS.g_flags2reg(list: tasmlist; size: tcgsize; const f: tresflags;
end;
procedure TCGMIPS.a_mul_reg_reg_pair(list: tasmlist; size: tcgsize; src1,src2,dstlo,dsthi: tregister);
var
asmop: tasmop;
begin
case size of
OS_32: asmop:=A_MULTU;
OS_S32: asmop:=A_MULT;
else
InternalError(2014060802);
end;
list.concat(taicpu.op_reg_reg(asmop,src1,src2));
if (dstlo<>NR_NO) then
list.concat(taicpu.op_reg(A_MFLO,dstlo));
if (dsthi<>NR_NO) then
list.concat(taicpu.op_reg(A_MFHI,dsthi));
end;
procedure TCGMIPS.g_overflowCheck(List: tasmlist; const Loc: TLocation; def: TDef);
begin
// this is an empty procedure

View File

@ -86,16 +86,21 @@ begin
numerator := left.location.Register;
if (nodetype = divn) and
(right.nodetype = ordconstn) and
ispowerof2(tordconstnode(right).Value.svalue, power) then
(right.nodetype = ordconstn) then
begin
tmpreg := cg.GetIntRegister(current_asmdata.CurrAsmList, OS_INT);
cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SAR, OS_INT, 31, numerator, tmpreg);
{ if signed, tmpreg=right value-1, otherwise 0 }
cg.a_op_const_reg(current_asmdata.CurrAsmList, OP_AND, OS_INT, tordconstnode(right).Value.svalue - 1, tmpreg);
{ add left value }
cg.a_op_reg_reg(current_asmdata.CurrAsmList, OP_ADD, OS_INT, numerator, tmpreg);
cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SAR, OS_INT, aword(power), tmpreg, location.register);
if ispowerof2(tordconstnode(right).Value.svalue, power) then
begin
tmpreg := cg.GetIntRegister(current_asmdata.CurrAsmList, OS_INT);
cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SAR, OS_INT, 31, numerator, tmpreg);
{ if signed, tmpreg=right value-1, otherwise 0 }
cg.a_op_const_reg(current_asmdata.CurrAsmList, OP_AND, OS_INT, tordconstnode(right).Value.svalue - 1, tmpreg);
{ add left value }
cg.a_op_reg_reg(current_asmdata.CurrAsmList, OP_ADD, OS_INT, numerator, tmpreg);
cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SAR, OS_INT, aword(power), tmpreg, location.register);
end
else
cg.g_div_const_reg_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),
tordconstnode(right).value.svalue,numerator,location.register);
end
else
begin