* ppc64: moved division by constant optimization from nppcmat to cgcpu unit

* ppc64: cg now also replaces multiplications/divisions by negative powers of two by shifts (and a negate)
* ppc64: replacement of divison by constants now properly check the -O2 compiler switch

git-svn-id: trunk@1662 -
This commit is contained in:
tom_at_work 2005-11-05 17:59:14 +00:00
parent 375bc7bbb0
commit d088695868
5 changed files with 248 additions and 373 deletions

View File

@ -172,6 +172,136 @@ uses
symconst, symsym, fmodule,
rgobj, tgobj, cpupi, procinfo, paramgr;
{ helper function which calculate "magic" values for replacement of unsigned
division by constant operation by multiplication. See the PowerPC compiler
developer manual for more information }
procedure getmagic_unsignedN(const N : byte; const d : aWord;
out magic_m : aWord; out magic_add : boolean; out magic_shift : byte);
var
p : aInt;
nc, delta, q1, r1, q2, r2, two_N_minus_1 : aWord;
begin
assert(d > 0);
two_N_minus_1 := aWord(1) shl (N-1);
magic_add := false;
nc := - 1 - (-d) mod d;
p := N-1; { initialize p }
q1 := two_N_minus_1 div nc; { initialize q1 = 2p/nc }
r1 := two_N_minus_1 - q1*nc; { initialize r1 = rem(2p,nc) }
q2 := (two_N_minus_1-1) div d; { initialize q2 = (2p-1)/d }
r2 := (two_N_minus_1-1) - q2*d; { initialize r2 = rem((2p-1),d) }
repeat
inc(p);
if (r1 >= (nc - r1)) then begin
q1 := 2 * q1 + 1; { update q1 }
r1 := 2*r1 - nc; { update r1 }
end else begin
q1 := 2*q1; { update q1 }
r1 := 2*r1; { update r1 }
end;
if ((r2 + 1) >= (d - r2)) then begin
if (q2 >= (two_N_minus_1-1)) then
magic_add := true;
q2 := 2*q2 + 1; { update q2 }
r2 := 2*r2 + 1 - d; { update r2 }
end else begin
if (q2 >= two_N_minus_1) then
magic_add := true;
q2 := 2*q2; { update q2 }
r2 := 2*r2 + 1; { update r2 }
end;
delta := d - 1 - r2;
until not ((p < (2*N)) and ((q1 < delta) or ((q1 = delta) and (r1 = 0))));
magic_m := q2 + 1; { resulting magic number }
magic_shift := p - N; { resulting shift }
end;
{ helper function which calculate "magic" values for replacement of signed
division by constant operation by multiplication. See the PowerPC compiler
developer manual for more information }
procedure getmagic_signedN(const N : byte; const d : aInt;
out magic_m : aInt; out magic_s : aInt);
var
p : aInt;
ad, anc, delta, q1, r1, q2, r2, t : aWord;
two_N_minus_1 : aWord;
begin
assert((d < -1) or (d > 1));
two_N_minus_1 := aWord(1) shl (N-1);
ad := abs(d);
t := two_N_minus_1 + (aWord(d) shr (N-1));
anc := t - 1 - t mod ad; { absolute value of nc }
p := (N-1); { initialize p }
q1 := two_N_minus_1 div anc; { initialize q1 = 2p/abs(nc) }
r1 := two_N_minus_1 - q1*anc; { initialize r1 = rem(2p,abs(nc)) }
q2 := two_N_minus_1 div ad; { initialize q2 = 2p/abs(d) }
r2 := two_N_minus_1 - q2*ad; { initialize r2 = rem(2p,abs(d)) }
repeat
inc(p);
q1 := 2*q1; { update q1 = 2p/abs(nc) }
r1 := 2*r1; { update r1 = rem(2p/abs(nc)) }
if (r1 >= anc) then begin { must be unsigned comparison }
inc(q1);
dec(r1, anc);
end;
q2 := 2*q2; { update q2 = 2p/abs(d) }
r2 := 2*r2; { update r2 = rem(2p/abs(d)) }
if (r2 >= ad) then begin { must be unsigned comparison }
inc(q2);
dec(r2, ad);
end;
delta := ad - r2;
until not ((q1 < delta) or ((q1 = delta) and (r1 = 0)));
magic_m := q2 + 1;
if (d < 0) then begin
magic_m := -magic_m; { resulting magic number }
end;
magic_s := p - N; { resulting shift }
end;
{ finds positive and negative powers of two of the given value, returning the
power and whether it's a negative power or not in addition to the actual result
of the function }
function ispowerof2(value : aInt; out power : byte; out neg : boolean) : boolean;
var
i : longint;
hl : aInt;
begin
neg := false;
{ also try to find negative power of two's by negating if the
value is negative. low(aInt) is special because it can not be
negated. Simply return the appropriate values for it }
if (value < 0) then begin
neg := true;
if (value = low(aInt)) then begin
power := sizeof(aInt)*8-1;
result := true;
exit;
end;
value := -value;
end;
if ((value and (value-1)) <> 0) then begin
result := false;
exit;
end;
hl := 1;
for i := 0 to (sizeof(aInt)*8-1) do begin
if (hl = value) then begin
result := true;
power := i;
exit;
end;
hl := hl shl 1;
end;
end;
procedure tcgppc.init_register_allocators;
begin
inherited init_register_allocators;
@ -438,7 +568,9 @@ procedure tcgppc.a_load_const_reg(list: taasmoutput; size: TCGSize; a: aint;
end;
{ R0-safe version of the above (ADDIS doesn't work the same way with R0 as base), without
the return value }
the return value. Unused until further testing shows that it is not really necessary;
loading the upper 32 bits of a value is now done using R12, which does not require
special treatment }
procedure load32bitconstantR0(list : taasmoutput; size : TCGSize; a : longint;
reg : TRegister);
begin
@ -707,9 +839,86 @@ var
else
list.concat(taicpu.op_reg_reg_const(A_ANDI_, dst, src, word(a)));
end;
procedure do_constant_div(list : taasmoutput; size : TCgSize; a : aint; src, dst : TRegister;
signed : boolean);
const
negops : array[boolean] of tasmop = (A_NEG, A_NEGO);
var
magic, shift : int64;
u_magic : qword;
u_shift : byte;
u_add : boolean;
power : byte;
isNegPower : boolean;
divreg : tregister;
begin
if (a = 0) then begin
internalerror(2005061701);
end else if (a = 1) then begin
cg.a_load_reg_reg(exprasmlist, OS_INT, OS_INT, src, dst);
end else if (a = -1) then begin
{ note: only in the signed case possible..., may overflow }
exprasmlist.concat(taicpu.op_reg_reg(negops[cs_check_overflow in aktlocalswitches], dst, src));
end else if (ispowerof2(a, power, isNegPower)) then begin
if (signed) then begin
{ From "The PowerPC Compiler Writer's Guide", pg. 52ff }
cg.a_op_const_reg_reg(exprasmlist, OP_SAR, OS_INT, power,
src, dst);
exprasmlist.concat(taicpu.op_reg_reg(A_ADDZE, dst, dst));
if (isNegPower) then
exprasmlist.concat(taicpu.op_reg_reg(A_NEG, dst, dst));
end else begin
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, power, src, dst)
end;
end else begin
{ replace division by multiplication, both implementations }
{ from "The PowerPC Compiler Writer's Guide" pg. 53ff }
divreg := cg.getintregister(exprasmlist, OS_INT);
if (signed) then begin
getmagic_signedN(sizeof(aInt)*8, a, magic, shift);
{ load magic value }
cg.a_load_const_reg(exprasmlist, OS_INT, magic, divreg);
{ multiply }
exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULHD, dst, src, divreg));
{ add/subtract numerator }
if (a > 0) and (magic < 0) then begin
cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, src, dst, dst);
end else if (a < 0) and (magic > 0) then begin
cg.a_op_reg_reg_reg(exprasmlist, OP_SUB, OS_INT, src, dst, dst);
end;
{ shift shift places to the right (arithmetic) }
cg.a_op_const_reg_reg(exprasmlist, OP_SAR, OS_INT, shift, dst, dst);
{ extract and add sign bit }
if (a >= 0) then begin
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 63, src, divreg);
end else begin
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 63, dst, divreg);
end;
cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, dst, divreg, dst);
end else begin
getmagic_unsignedN(sizeof(aWord)*8, a, u_magic, u_add, u_shift);
{ load magic in divreg }
cg.a_load_const_reg(exprasmlist, OS_INT, u_magic, divreg);
exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULHDU, dst, src, divreg));
if (u_add) then begin
cg.a_op_reg_reg_reg(exprasmlist, OP_SUB, OS_INT, dst, src, divreg);
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 1, divreg, divreg);
cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, divreg, dst, divreg);
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, u_shift-1, divreg, dst);
end else begin
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, u_shift, dst, dst);
end;
end;
end;
end;
var
scratchreg: tregister;
shift, shiftmask : longint;
shift : byte;
shiftmask : longint;
isneg : boolean;
begin
{ subtraction is the same as addition with negative constant }
@ -725,13 +934,8 @@ begin
useReg := false;
case (op) of
OP_DIV, OP_IDIV:
{ actually, this method should be never called directly with OP_DIV or
OP_IDIV, so just provide basic support.
TODO: move division by constant stuff from nppcmat.pas here }
if (a = 0) then
internalerror(200208103)
else if (a = 1) then
a_load_reg_reg(list, size, size, src, dst)
if (cs_slowoptimize in aktglobalswitches) then
do_constant_div(list, size, a, src, dst, op = OP_IDIV)
else
usereg := true;
OP_IMUL, OP_MUL:
@ -743,9 +947,11 @@ begin
list.concat(taicpu.op_reg_reg(A_NEG, dst, dst))
else if (a = 1) then
a_load_reg_reg(list, OS_INT, OS_INT, src, dst)
else if ispowerof2(a, shift) then
list.concat(taicpu.op_reg_reg_const(A_SLDI, dst, src, shift))
else if (a >= low(smallint)) and (a <= high(smallint)) then
else if ispowerof2(a, shift, isneg) then begin
list.concat(taicpu.op_reg_reg_const(A_SLDI, dst, src, shift));
if (isneg) then
exprasmlist.concat(taicpu.op_reg_reg(A_NEG, dst, dst));
end else if (a >= low(smallint)) and (a <= high(smallint)) then
list.concat(taicpu.op_reg_reg_const(A_MULLI, dst, src,
smallint(a)))
else
@ -808,7 +1014,6 @@ end;
procedure tcgppc.a_op_reg_reg_reg(list: taasmoutput; op: TOpCg;
size: tcgsize; src1, src2, dst: tregister);
const
op_reg_reg_opcg2asmop32: array[TOpCG] of tasmop =
(A_NONE, A_ADD, A_AND, A_DIVWU, A_DIVW, A_MULLW, A_MULLW, A_NEG, A_NOT, A_OR,
@ -816,7 +1021,6 @@ const
op_reg_reg_opcg2asmop64: array[TOpCG] of tasmop =
(A_NONE, A_ADD, A_AND, A_DIVDU, A_DIVD, A_MULLD, A_MULLD, A_NEG, A_NOT, A_OR,
A_SRAD, A_SLD, A_SRD, A_SUB, A_XOR);
begin
case op of
OP_NEG, OP_NOT:
@ -1559,7 +1763,7 @@ begin
least four. If not, add the bytes which are "off" to the base register and
adjust the offset accordingly }
case op of
A_LD, A_LDU, A_STD, A_STDU, A_LWA, A_LWAU :
A_LD, A_LDU, A_STD, A_STDU, A_LWA :
if ((ref.offset mod 4) <> 0) then begin
tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
@ -1621,7 +1825,8 @@ begin
tmpref.base := ref.base;
tmpref.index := tmpreg2;
case op of
{ the code generator doesn't generate update instructions anyway }
{ the code generator doesn't generate update instructions anyway, so
error out on those instructions }
A_LBZ : op := A_LBZX;
A_LHZ : op := A_LHZX;
A_LWZ : op := A_LWZX;

View File

@ -96,7 +96,7 @@ type
A_RLDICL,
A_DIVDU, A_DIVDU_, A_DIVD, A_DIVD_, A_MULLD, A_MULLD_, A_MULHD, A_MULHD_, A_SRAD, A_SLD, A_SRD,
A_DIVDUO_, A_DIVDO_,
A_LWA, A_LWAU, A_LWAX, A_LWAUX,
A_LWA, A_LWAX, A_LWAUX,
A_FCFID,
A_LDARX, A_STDCX_, A_CNTLZD,
A_LVX, A_STVX,

View File

@ -86,7 +86,7 @@ const
'rldicl',
'divdu', 'divdu.', 'divd', 'divd.', 'mulld', 'mulld.', 'mulhd', 'mulhd.', 'srad', 'sld', 'srd',
'divduo.', 'divdo.',
'lwa', '<illegal lwau>', 'lwax', 'lwaux',
'lwa', 'lwax', 'lwaux',
'fcfid',
'ldarx', 'stdcx.', 'cntlzd',
'lvx', 'stvx',

View File

@ -36,7 +36,8 @@ type
}
function first_abs_real: tnode; override;
function first_sqr_real: tnode; override;
{ todo: inline trunc/round/frac?/int }
{ trunc/round/frac?/int can't be inlined? }
procedure second_abs_real; override;
procedure second_sqr_real; override;

View File

@ -59,176 +59,6 @@ uses
cpubase, cpuinfo,
ncgutil, cgcpu, rgobj;
{ helper functions }
procedure getmagic_unsigned32(d : dword; out magic_m : dword; out magic_add : boolean; out magic_shift : dword);
var
p : longint;
nc, delta, q1, r1, q2, r2 : dword;
begin
assert(d > 0);
magic_add := false;
nc := - 1 - (-d) mod d;
p := 31; { initialize p }
q1 := $80000000 div nc; { initialize q1 = 2p/nc }
r1 := $80000000 - q1*nc; { initialize r1 = rem(2p,nc) }
q2 := $7FFFFFFF div d; { initialize q2 = (2p-1)/d }
r2 := $7FFFFFFF - q2*d; { initialize r2 = rem((2p-1),d) }
repeat
inc(p);
if (r1 >= (nc - r1)) then begin
q1 := 2 * q1 + 1; { update q1 }
r1 := 2*r1 - nc; { update r1 }
end else begin
q1 := 2*q1; { update q1 }
r1 := 2*r1; { update r1 }
end;
if ((r2 + 1) >= (d - r2)) then begin
if (q2 >= $7FFFFFFF) then
magic_add := true;
q2 := 2*q2 + 1; { update q2 }
r2 := 2*r2 + 1 - d; { update r2 }
end else begin
if (q2 >= $80000000) then
magic_add := true;
q2 := 2*q2; { update q2 }
r2 := 2*r2 + 1; { update r2 }
end;
delta := d - 1 - r2;
until not ((p < 64) and ((q1 < delta) or ((q1 = delta) and (r1 = 0))));
magic_m := q2 + 1; { resulting magic number }
magic_shift := p - 32; { resulting shift }
end;
procedure getmagic_signed32(d : longint; out magic_m : longint; out magic_s : longint);
const
two_31 : DWord = high(longint)+1;
var
p : Longint;
ad, anc, delta, q1, r1, q2, r2, t : DWord;
begin
assert((d < -1) or (d > 1));
ad := abs(d);
t := two_31 + (DWord(d) shr 31);
anc := t - 1 - t mod ad; { absolute value of nc }
p := 31; { initialize p }
q1 := two_31 div anc; { initialize q1 = 2p/abs(nc) }
r1 := two_31 - q1*anc; { initialize r1 = rem(2p,abs(nc)) }
q2 := two_31 div ad; { initialize q2 = 2p/abs(d) }
r2 := two_31 - q2*ad; { initialize r2 = rem(2p,abs(d)) }
repeat
inc(p);
q1 := 2*q1; { update q1 = 2p/abs(nc) }
r1 := 2*r1; { update r1 = rem(2p/abs(nc)) }
if (r1 >= anc) then begin { must be unsigned comparison }
inc(q1);
dec(r1, anc);
end;
q2 := 2*q2; { update q2 = 2p/abs(d) }
r2 := 2*r2; { update r2 = rem(2p/abs(d)) }
if (r2 >= ad) then begin { must be unsigned comparison }
inc(q2);
dec(r2, ad);
end;
delta := ad - r2;
until not ((q1 < delta) or ((q1 = delta) and (r1 = 0)));
magic_m := q2 + 1;
if (d < 0) then begin
magic_m := -magic_m; { resulting magic number }
end;
magic_s := p - 32; { resulting shift }
end;
{ helper functions }
procedure getmagic_unsigned64(d : qword; out magic_m : qword; out magic_add : boolean; out magic_shift : qword);
const
two_63 : QWord = $8000000000000000;
var
p : int64;
nc, delta, q1, r1, q2, r2 : qword;
begin
assert(d > 0);
magic_add := false;
nc := - 1 - (-d) mod d;
p := 63; { initialize p }
q1 := two_63 div nc; { initialize q1 = 2p/nc }
r1 := two_63 - q1*nc; { initialize r1 = rem(2p,nc) }
q2 := (two_63-1) div d; { initialize q2 = (2p-1)/d }
r2 := (two_63-1) - q2*d; { initialize r2 = rem((2p-1),d) }
repeat
inc(p);
if (r1 >= (nc - r1)) then begin
q1 := 2 * q1 + 1; { update q1 }
r1 := 2*r1 - nc; { update r1 }
end else begin
q1 := 2*q1; { update q1 }
r1 := 2*r1; { update r1 }
end;
if ((r2 + 1) >= (d - r2)) then begin
if (q2 >= (two_63-1)) then
magic_add := true;
q2 := 2*q2 + 1; { update q2 }
r2 := 2*r2 + 1 - d; { update r2 }
end else begin
if (q2 >= two_63) then
magic_add := true;
q2 := 2*q2; { update q2 }
r2 := 2*r2 + 1; { update r2 }
end;
delta := d - 1 - r2;
until not ((p < 128) and ((q1 < delta) or ((q1 = delta) and (r1 = 0))));
magic_m := q2 + 1; { resulting magic number }
magic_shift := p - 64; { resulting shift }
end;
procedure getmagic_signed64(d : int64; out magic_m : int64; out magic_s : int64);
const
two_63 : QWord = $8000000000000000;
var
p : int64;
ad, anc, delta, q1, r1, q2, r2, t : QWord;
begin
assert((d < -1) or (d > 1));
ad := abs(d);
t := two_63 + (QWord(d) shr 63);
anc := t - 1 - t mod ad; { absolute value of nc }
p := 63; { initialize p }
q1 := two_63 div anc; { initialize q1 = 2p/abs(nc) }
r1 := two_63 - q1*anc; { initialize r1 = rem(2p,abs(nc)) }
q2 := two_63 div ad; { initialize q2 = 2p/abs(d) }
r2 := two_63 - q2*ad; { initialize r2 = rem(2p,abs(d)) }
repeat
inc(p);
q1 := 2*q1; { update q1 = 2p/abs(nc) }
r1 := 2*r1; { update r1 = rem(2p/abs(nc)) }
if (r1 >= anc) then begin { must be unsigned comparison }
inc(q1);
dec(r1, anc);
end;
q2 := 2*q2; { update q2 = 2p/abs(d) }
r2 := 2*r2; { update r2 = rem(2p/abs(d)) }
if (r2 >= ad) then begin { must be unsigned comparison }
inc(q2);
dec(r2, ad);
end;
delta := ad - r2;
until not ((q1 < delta) or ((q1 = delta) and (r1 = 0)));
magic_m := q2 + 1;
if (d < 0) then begin
magic_m := -magic_m; { resulting magic number }
end;
magic_s := p - 64; { resulting shift }
end;
{*****************************************************************************
TPPCMODDIVNODE
*****************************************************************************}
@ -243,8 +73,13 @@ end;
procedure tppcmoddivnode.pass_2;
const { signed overflow }
divops: array[boolean, boolean] of tasmop =
((A_DIVDU,A_DIVDU_),(A_DIVD,A_DIVDO_));
((A_DIVDU, A_DIVDU_),(A_DIVD, A_DIVDO_));
divcgops : array[boolean] of TOpCG = (OP_DIV, OP_IDIV);
zerocond: tasmcond = (dirhint: DH_Plus; simple: true; cond:C_NE; cr: RS_CR7);
tcgsize2native : array[OS_8..OS_S128] of tcgsize = (
OS_64, OS_64, OS_64, OS_64, OS_NO,
OS_S64, OS_S64, OS_S64, OS_S64, OS_NO
);
var
power : longint;
op : tasmop;
@ -254,78 +89,10 @@ var
hl : tasmlabel;
done: boolean;
procedure genOrdConstNodeDiv;
const
negops : array[boolean] of tasmop = (A_NEG, A_NEGO);
var
magic, shift : int64;
u_magic, u_shift : qword;
u_add : boolean;
divreg : tregister;
begin
if (tordconstnode(right).value = 0) then begin
internalerror(2005061701);
end else if (tordconstnode(right).value = 1) then begin
cg.a_load_reg_reg(exprasmlist, OS_INT, OS_INT, numerator, resultreg);
end else if (tordconstnode(right).value = -1) then begin
{ note: only in the signed case possible..., may overflow }
exprasmlist.concat(taicpu.op_reg_reg(negops[cs_check_overflow in aktlocalswitches], resultreg, numerator));
end else if (ispowerof2(tordconstnode(right).value, power)) then begin
if (is_signed(right.resulttype.def)) then begin
{ From "The PowerPC Compiler Writer's Guide", pg. 52ff }
cg.a_op_const_reg_reg(exprasmlist, OP_SAR, OS_INT, power,
numerator, resultreg);
exprasmlist.concat(taicpu.op_reg_reg(A_ADDZE, resultreg, resultreg));
end else begin
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, power, numerator, resultreg)
end;
end else begin
{ replace division by multiplication, both implementations }
{ from "The PowerPC Compiler Writer's Guide" pg. 53ff }
divreg := cg.getintregister(exprasmlist, OS_INT);
if (is_signed(right.resulttype.def)) then begin
getmagic_signed64(tordconstnode(right).value, magic, shift);
{ load magic value }
cg.a_load_const_reg(exprasmlist, OS_INT, magic, divreg);
{ multiply }
exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULHD, resultreg, numerator, divreg));
{ add/subtract numerator }
if (tordconstnode(right).value > 0) and (magic < 0) then begin
cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, numerator, resultreg, resultreg);
end else if (tordconstnode(right).value < 0) and (magic > 0) then begin
cg.a_op_reg_reg_reg(exprasmlist, OP_SUB, OS_INT, numerator, resultreg, resultreg);
end;
{ shift shift places to the right (arithmetic) }
cg.a_op_const_reg_reg(exprasmlist, OP_SAR, OS_INT, shift, resultreg, resultreg);
{ extract and add sign bit }
if (tordconstnode(right).value >= 0) then begin
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 63, numerator, divreg);
end else begin
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 63, resultreg, divreg);
end;
cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, resultreg, divreg, resultreg);
end else begin
getmagic_unsigned64(tordconstnode(right).value, u_magic, u_add, u_shift);
{ load magic in divreg }
cg.a_load_const_reg(exprasmlist, OS_INT, u_magic, divreg);
exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULHDU, resultreg, numerator, divreg));
if (u_add) then begin
cg.a_op_reg_reg_reg(exprasmlist, OP_SUB, OS_INT, resultreg, numerator, divreg);
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 1, divreg, divreg);
cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, divreg, resultreg, divreg);
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, u_shift-1, divreg, resultreg);
end else begin
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, u_shift, resultreg, resultreg);
end;
end;
end;
done := true;
end;
procedure genOrdConstNodeMod;
var
modreg, maskreg, tempreg : tregister;
isNegPower : boolean;
begin
if (tordconstnode(right).value = 0) then begin
internalerror(2005061702);
@ -348,11 +115,14 @@ var
cg.a_op_reg_reg_reg(exprasmlist, OP_AND, OS_INT, modreg, maskreg, maskreg);
cg.a_op_reg_reg_reg(exprasmlist, OP_OR, OS_INT, maskreg, tempreg, resultreg);
end else begin
cg.a_op_const_reg_reg(exprasmlist, OP_AND, OS_INT, tordconstnode(right).value-1, numerator, resultreg);
cg.a_op_const_reg_reg(exprasmlist, OP_AND, OS_INT, tordconstnode(right).value-1, numerator,
resultreg);
end;
end else begin
genOrdConstNodeDiv();
cg.a_op_const_reg_reg(exprasmlist, OP_MUL, OS_INT, tordconstnode(right).value, resultreg, resultreg);
cg.a_op_const_reg_reg(exprasmlist, divCgOps[is_signed(right.resulttype.def)], OS_INT,
tordconstnode(right).value, numerator, resultreg);
cg.a_op_const_reg_reg(exprasmlist, OP_MUL, OS_INT, tordconstnode(right).value, resultreg,
resultreg);
cg.a_op_reg_reg_reg(exprasmlist, OP_SUB, OS_INT, resultreg, numerator, resultreg);
end;
end;
@ -380,20 +150,19 @@ begin
resultreg := cg.getintregister(exprasmlist,size);
end;
done := false;
(*
if (right.nodetype = ordconstn) then begin
if (cs_slowoptimize in aktglobalswitches) and (right.nodetype = ordconstn) then begin
if (nodetype = divn) then
genOrdConstNodeDiv
else
cg.a_op_const_reg_reg(exprasmlist, divCgOps[is_signed(right.resulttype.def)],
size, tordconstnode(right).value, numerator, resultreg)
else
genOrdConstNodeMod;
done := true;
end;
*)
if (not done) then begin
{ load divider in a register if necessary }
location_force_reg(exprasmlist,right.location,
def_cgsize(right.resulttype.def),true);
location_force_reg(exprasmlist,right.location,def_cgsize(right.resulttype.def),true);
if (right.nodetype <> ordconstn) then
exprasmlist.concat(taicpu.op_reg_reg_const(A_CMPDI, NR_CR7,
right.location.register, 0))
@ -403,13 +172,14 @@ begin
end;
divider := right.location.register;
{ needs overflow checking, (-maxlongint-1) div (-1) overflows! }
op := divops[is_signed(right.resulttype.def),
cs_check_overflow in aktlocalswitches];
{ select the correct opcode according to the sign of the result, whether we need
overflow checking }
op := divops[is_signed(right.resulttype.def), cs_check_overflow in aktlocalswitches];
exprasmlist.concat(taicpu.op_reg_reg_reg(op, resultreg, numerator,
divider));
if (nodetype = modn) then begin
{ multiply with the divisor again, taking care of the correct size }
exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULLD,resultreg,
divider,resultreg));
exprasmlist.concat(taicpu.op_reg_reg_reg(A_SUB,location.register,
@ -433,111 +203,10 @@ begin
cg.g_overflowcheck(exprasmlist,location,resulttype.def);
end;
(*
procedure tppcmoddivnode.pass_2;
const
// ts: todo, use 32 bit operations if possible (much faster!)
{ signed overflow }
divops: array[boolean, boolean] of tasmop =
((A_DIVDU, A_DIVDUO_), (A_DIVD, A_DIVDO_));
zerocond: tasmcond = (dirhint: DH_Plus; simple: true; cond: C_NE; cr: RS_CR1);
var
power: longint;
op: tasmop;
numerator,
divider,
resultreg: tregister;
size: Tcgsize;
hl: tasmlabel;
begin
secondpass(left);
secondpass(right);
location_copy(location, left.location);
{ put numerator in register }
size := def_cgsize(left.resulttype.def);
location_force_reg(exprasmlist, left.location,
size, true);
location_copy(location, left.location);
numerator := location.register;
resultreg := location.register;
if (location.loc = LOC_CREGISTER) then
begin
location.loc := LOC_REGISTER;
location.register := cg.getintregister(exprasmlist, size);
resultreg := location.register;
end;
if (nodetype = modn) then
begin
resultreg := cg.getintregister(exprasmlist, size);
end;
if (nodetype = divn) and
(right.nodetype = ordconstn) and
ispowerof2(tordconstnode(right).value, power) then
begin
if (is_signed(right.resulttype.def)) then begin
{ From "The PowerPC Compiler Writer's Guide": }
{ This code uses the fact that, in the PowerPC architecture, }
{ the shift right algebraic instructions set the Carry bit if }
{ the source register contains a negative number and one or }
{ more 1-bits are shifted out. Otherwise, the carry bit is }
{ cleared. The addze instruction corrects the quotient, if }
{ necessary, when the dividend is negative. For example, if }
{ n = -13, (0xFFFF_FFF3), and k = 2, after executing the srawi }
{ instruction, q = -4 (0xFFFF_FFFC) and CA = 1. After executing }
{ the addze instruction, q = -3, the correct quotient. }
cg.a_op_const_reg_reg(exprasmlist, OP_SAR, OS_64, power,
numerator, resultreg);
exprasmlist.concat(taicpu.op_reg_reg(A_ADDZE, resultreg, resultreg));
end else begin
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, power, numerator, resultreg);
end;
end else begin
{ load divider in a register if necessary }
location_force_reg(exprasmlist, right.location,
def_cgsize(right.resulttype.def), true);
if (right.nodetype <> ordconstn) then
{$NOTE ts: testme}
exprasmlist.concat(taicpu.op_reg_reg_const(A_CMPDI, NR_CR1,
right.location.register, 0));
divider := right.location.register;
{ needs overflow checking, (-maxlongint-1) div (-1) overflows! }
{ And on PPC, the only way to catch a div-by-0 is by checking }
{ the overflow flag (JM) }
op := divops[is_signed(right.resulttype.def),
cs_check_overflow in aktlocalswitches];
exprasmlist.concat(taicpu.op_reg_reg_reg(op, resultreg, numerator,
divider));
if (nodetype = modn) then begin
{$NOTE ts:testme}
exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULLD, resultreg,
divider, resultreg));
exprasmlist.concat(taicpu.op_reg_reg_reg(A_SUB, location.register,
numerator, resultreg));
resultreg := location.register;
end;
end;
{ set result location }
location.loc := LOC_REGISTER;
location.register := resultreg;
if (right.nodetype <> ordconstn) then begin
objectlibrary.getjumplabel(hl);
exprasmlist.concat(taicpu.op_cond_sym(A_BC, zerocond, hl));
cg.a_call_name(exprasmlist, 'FPC_DIVBYZERO');
cg.a_label(exprasmlist, hl);
end;
cg.g_overflowcheck(exprasmlist, location, resulttype.def);
end;
*)
{*****************************************************************************
TPPCSHLRSHRNODE
*****************************************************************************}
procedure tppcshlshrnode.pass_2;
var