mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-10-16 18:26:00 +02:00
* ppc64: moved division by constant optimization from nppcmat to cgcpu unit
* ppc64: cg now also replaces multiplications/divisions by negative powers of two by shifts (and a negate) * ppc64: replacement of divison by constants now properly check the -O2 compiler switch git-svn-id: trunk@1662 -
This commit is contained in:
parent
375bc7bbb0
commit
d088695868
@ -172,6 +172,136 @@ uses
|
||||
symconst, symsym, fmodule,
|
||||
rgobj, tgobj, cpupi, procinfo, paramgr;
|
||||
|
||||
{ helper function which calculate "magic" values for replacement of unsigned
|
||||
division by constant operation by multiplication. See the PowerPC compiler
|
||||
developer manual for more information }
|
||||
procedure getmagic_unsignedN(const N : byte; const d : aWord;
|
||||
out magic_m : aWord; out magic_add : boolean; out magic_shift : byte);
|
||||
var
|
||||
p : aInt;
|
||||
nc, delta, q1, r1, q2, r2, two_N_minus_1 : aWord;
|
||||
begin
|
||||
assert(d > 0);
|
||||
|
||||
two_N_minus_1 := aWord(1) shl (N-1);
|
||||
|
||||
magic_add := false;
|
||||
nc := - 1 - (-d) mod d;
|
||||
p := N-1; { initialize p }
|
||||
q1 := two_N_minus_1 div nc; { initialize q1 = 2p/nc }
|
||||
r1 := two_N_minus_1 - q1*nc; { initialize r1 = rem(2p,nc) }
|
||||
q2 := (two_N_minus_1-1) div d; { initialize q2 = (2p-1)/d }
|
||||
r2 := (two_N_minus_1-1) - q2*d; { initialize r2 = rem((2p-1),d) }
|
||||
repeat
|
||||
inc(p);
|
||||
if (r1 >= (nc - r1)) then begin
|
||||
q1 := 2 * q1 + 1; { update q1 }
|
||||
r1 := 2*r1 - nc; { update r1 }
|
||||
end else begin
|
||||
q1 := 2*q1; { update q1 }
|
||||
r1 := 2*r1; { update r1 }
|
||||
end;
|
||||
if ((r2 + 1) >= (d - r2)) then begin
|
||||
if (q2 >= (two_N_minus_1-1)) then
|
||||
magic_add := true;
|
||||
q2 := 2*q2 + 1; { update q2 }
|
||||
r2 := 2*r2 + 1 - d; { update r2 }
|
||||
end else begin
|
||||
if (q2 >= two_N_minus_1) then
|
||||
magic_add := true;
|
||||
q2 := 2*q2; { update q2 }
|
||||
r2 := 2*r2 + 1; { update r2 }
|
||||
end;
|
||||
delta := d - 1 - r2;
|
||||
until not ((p < (2*N)) and ((q1 < delta) or ((q1 = delta) and (r1 = 0))));
|
||||
magic_m := q2 + 1; { resulting magic number }
|
||||
magic_shift := p - N; { resulting shift }
|
||||
end;
|
||||
|
||||
{ helper function which calculate "magic" values for replacement of signed
|
||||
division by constant operation by multiplication. See the PowerPC compiler
|
||||
developer manual for more information }
|
||||
procedure getmagic_signedN(const N : byte; const d : aInt;
|
||||
out magic_m : aInt; out magic_s : aInt);
|
||||
var
|
||||
p : aInt;
|
||||
ad, anc, delta, q1, r1, q2, r2, t : aWord;
|
||||
two_N_minus_1 : aWord;
|
||||
|
||||
begin
|
||||
assert((d < -1) or (d > 1));
|
||||
|
||||
two_N_minus_1 := aWord(1) shl (N-1);
|
||||
|
||||
ad := abs(d);
|
||||
t := two_N_minus_1 + (aWord(d) shr (N-1));
|
||||
anc := t - 1 - t mod ad; { absolute value of nc }
|
||||
p := (N-1); { initialize p }
|
||||
q1 := two_N_minus_1 div anc; { initialize q1 = 2p/abs(nc) }
|
||||
r1 := two_N_minus_1 - q1*anc; { initialize r1 = rem(2p,abs(nc)) }
|
||||
q2 := two_N_minus_1 div ad; { initialize q2 = 2p/abs(d) }
|
||||
r2 := two_N_minus_1 - q2*ad; { initialize r2 = rem(2p,abs(d)) }
|
||||
repeat
|
||||
inc(p);
|
||||
q1 := 2*q1; { update q1 = 2p/abs(nc) }
|
||||
r1 := 2*r1; { update r1 = rem(2p/abs(nc)) }
|
||||
if (r1 >= anc) then begin { must be unsigned comparison }
|
||||
inc(q1);
|
||||
dec(r1, anc);
|
||||
end;
|
||||
q2 := 2*q2; { update q2 = 2p/abs(d) }
|
||||
r2 := 2*r2; { update r2 = rem(2p/abs(d)) }
|
||||
if (r2 >= ad) then begin { must be unsigned comparison }
|
||||
inc(q2);
|
||||
dec(r2, ad);
|
||||
end;
|
||||
delta := ad - r2;
|
||||
until not ((q1 < delta) or ((q1 = delta) and (r1 = 0)));
|
||||
magic_m := q2 + 1;
|
||||
if (d < 0) then begin
|
||||
magic_m := -magic_m; { resulting magic number }
|
||||
end;
|
||||
magic_s := p - N; { resulting shift }
|
||||
end;
|
||||
|
||||
{ finds positive and negative powers of two of the given value, returning the
|
||||
power and whether it's a negative power or not in addition to the actual result
|
||||
of the function }
|
||||
function ispowerof2(value : aInt; out power : byte; out neg : boolean) : boolean;
|
||||
var
|
||||
i : longint;
|
||||
hl : aInt;
|
||||
begin
|
||||
neg := false;
|
||||
{ also try to find negative power of two's by negating if the
|
||||
value is negative. low(aInt) is special because it can not be
|
||||
negated. Simply return the appropriate values for it }
|
||||
if (value < 0) then begin
|
||||
neg := true;
|
||||
if (value = low(aInt)) then begin
|
||||
power := sizeof(aInt)*8-1;
|
||||
result := true;
|
||||
exit;
|
||||
end;
|
||||
value := -value;
|
||||
end;
|
||||
|
||||
if ((value and (value-1)) <> 0) then begin
|
||||
result := false;
|
||||
exit;
|
||||
end;
|
||||
hl := 1;
|
||||
for i := 0 to (sizeof(aInt)*8-1) do begin
|
||||
if (hl = value) then begin
|
||||
result := true;
|
||||
power := i;
|
||||
exit;
|
||||
end;
|
||||
hl := hl shl 1;
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
procedure tcgppc.init_register_allocators;
|
||||
begin
|
||||
inherited init_register_allocators;
|
||||
@ -438,7 +568,9 @@ procedure tcgppc.a_load_const_reg(list: taasmoutput; size: TCGSize; a: aint;
|
||||
end;
|
||||
|
||||
{ R0-safe version of the above (ADDIS doesn't work the same way with R0 as base), without
|
||||
the return value }
|
||||
the return value. Unused until further testing shows that it is not really necessary;
|
||||
loading the upper 32 bits of a value is now done using R12, which does not require
|
||||
special treatment }
|
||||
procedure load32bitconstantR0(list : taasmoutput; size : TCGSize; a : longint;
|
||||
reg : TRegister);
|
||||
begin
|
||||
@ -707,9 +839,86 @@ var
|
||||
else
|
||||
list.concat(taicpu.op_reg_reg_const(A_ANDI_, dst, src, word(a)));
|
||||
end;
|
||||
|
||||
procedure do_constant_div(list : taasmoutput; size : TCgSize; a : aint; src, dst : TRegister;
|
||||
signed : boolean);
|
||||
const
|
||||
negops : array[boolean] of tasmop = (A_NEG, A_NEGO);
|
||||
var
|
||||
magic, shift : int64;
|
||||
u_magic : qword;
|
||||
u_shift : byte;
|
||||
u_add : boolean;
|
||||
power : byte;
|
||||
isNegPower : boolean;
|
||||
|
||||
divreg : tregister;
|
||||
begin
|
||||
if (a = 0) then begin
|
||||
internalerror(2005061701);
|
||||
end else if (a = 1) then begin
|
||||
cg.a_load_reg_reg(exprasmlist, OS_INT, OS_INT, src, dst);
|
||||
end else if (a = -1) then begin
|
||||
{ note: only in the signed case possible..., may overflow }
|
||||
exprasmlist.concat(taicpu.op_reg_reg(negops[cs_check_overflow in aktlocalswitches], dst, src));
|
||||
end else if (ispowerof2(a, power, isNegPower)) then begin
|
||||
if (signed) then begin
|
||||
{ From "The PowerPC Compiler Writer's Guide", pg. 52ff }
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_SAR, OS_INT, power,
|
||||
src, dst);
|
||||
exprasmlist.concat(taicpu.op_reg_reg(A_ADDZE, dst, dst));
|
||||
if (isNegPower) then
|
||||
exprasmlist.concat(taicpu.op_reg_reg(A_NEG, dst, dst));
|
||||
end else begin
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, power, src, dst)
|
||||
end;
|
||||
end else begin
|
||||
{ replace division by multiplication, both implementations }
|
||||
{ from "The PowerPC Compiler Writer's Guide" pg. 53ff }
|
||||
divreg := cg.getintregister(exprasmlist, OS_INT);
|
||||
if (signed) then begin
|
||||
getmagic_signedN(sizeof(aInt)*8, a, magic, shift);
|
||||
{ load magic value }
|
||||
cg.a_load_const_reg(exprasmlist, OS_INT, magic, divreg);
|
||||
{ multiply }
|
||||
exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULHD, dst, src, divreg));
|
||||
{ add/subtract numerator }
|
||||
if (a > 0) and (magic < 0) then begin
|
||||
cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, src, dst, dst);
|
||||
end else if (a < 0) and (magic > 0) then begin
|
||||
cg.a_op_reg_reg_reg(exprasmlist, OP_SUB, OS_INT, src, dst, dst);
|
||||
end;
|
||||
{ shift shift places to the right (arithmetic) }
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_SAR, OS_INT, shift, dst, dst);
|
||||
{ extract and add sign bit }
|
||||
if (a >= 0) then begin
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 63, src, divreg);
|
||||
end else begin
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 63, dst, divreg);
|
||||
end;
|
||||
cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, dst, divreg, dst);
|
||||
end else begin
|
||||
getmagic_unsignedN(sizeof(aWord)*8, a, u_magic, u_add, u_shift);
|
||||
{ load magic in divreg }
|
||||
cg.a_load_const_reg(exprasmlist, OS_INT, u_magic, divreg);
|
||||
exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULHDU, dst, src, divreg));
|
||||
if (u_add) then begin
|
||||
cg.a_op_reg_reg_reg(exprasmlist, OP_SUB, OS_INT, dst, src, divreg);
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 1, divreg, divreg);
|
||||
cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, divreg, dst, divreg);
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, u_shift-1, divreg, dst);
|
||||
end else begin
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, u_shift, dst, dst);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
var
|
||||
scratchreg: tregister;
|
||||
shift, shiftmask : longint;
|
||||
shift : byte;
|
||||
shiftmask : longint;
|
||||
isneg : boolean;
|
||||
|
||||
begin
|
||||
{ subtraction is the same as addition with negative constant }
|
||||
@ -725,13 +934,8 @@ begin
|
||||
useReg := false;
|
||||
case (op) of
|
||||
OP_DIV, OP_IDIV:
|
||||
{ actually, this method should be never called directly with OP_DIV or
|
||||
OP_IDIV, so just provide basic support.
|
||||
TODO: move division by constant stuff from nppcmat.pas here }
|
||||
if (a = 0) then
|
||||
internalerror(200208103)
|
||||
else if (a = 1) then
|
||||
a_load_reg_reg(list, size, size, src, dst)
|
||||
if (cs_slowoptimize in aktglobalswitches) then
|
||||
do_constant_div(list, size, a, src, dst, op = OP_IDIV)
|
||||
else
|
||||
usereg := true;
|
||||
OP_IMUL, OP_MUL:
|
||||
@ -743,9 +947,11 @@ begin
|
||||
list.concat(taicpu.op_reg_reg(A_NEG, dst, dst))
|
||||
else if (a = 1) then
|
||||
a_load_reg_reg(list, OS_INT, OS_INT, src, dst)
|
||||
else if ispowerof2(a, shift) then
|
||||
list.concat(taicpu.op_reg_reg_const(A_SLDI, dst, src, shift))
|
||||
else if (a >= low(smallint)) and (a <= high(smallint)) then
|
||||
else if ispowerof2(a, shift, isneg) then begin
|
||||
list.concat(taicpu.op_reg_reg_const(A_SLDI, dst, src, shift));
|
||||
if (isneg) then
|
||||
exprasmlist.concat(taicpu.op_reg_reg(A_NEG, dst, dst));
|
||||
end else if (a >= low(smallint)) and (a <= high(smallint)) then
|
||||
list.concat(taicpu.op_reg_reg_const(A_MULLI, dst, src,
|
||||
smallint(a)))
|
||||
else
|
||||
@ -808,7 +1014,6 @@ end;
|
||||
|
||||
procedure tcgppc.a_op_reg_reg_reg(list: taasmoutput; op: TOpCg;
|
||||
size: tcgsize; src1, src2, dst: tregister);
|
||||
|
||||
const
|
||||
op_reg_reg_opcg2asmop32: array[TOpCG] of tasmop =
|
||||
(A_NONE, A_ADD, A_AND, A_DIVWU, A_DIVW, A_MULLW, A_MULLW, A_NEG, A_NOT, A_OR,
|
||||
@ -816,7 +1021,6 @@ const
|
||||
op_reg_reg_opcg2asmop64: array[TOpCG] of tasmop =
|
||||
(A_NONE, A_ADD, A_AND, A_DIVDU, A_DIVD, A_MULLD, A_MULLD, A_NEG, A_NOT, A_OR,
|
||||
A_SRAD, A_SLD, A_SRD, A_SUB, A_XOR);
|
||||
|
||||
begin
|
||||
case op of
|
||||
OP_NEG, OP_NOT:
|
||||
@ -1559,7 +1763,7 @@ begin
|
||||
least four. If not, add the bytes which are "off" to the base register and
|
||||
adjust the offset accordingly }
|
||||
case op of
|
||||
A_LD, A_LDU, A_STD, A_STDU, A_LWA, A_LWAU :
|
||||
A_LD, A_LDU, A_STD, A_STDU, A_LWA :
|
||||
if ((ref.offset mod 4) <> 0) then begin
|
||||
tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
|
||||
|
||||
@ -1621,7 +1825,8 @@ begin
|
||||
tmpref.base := ref.base;
|
||||
tmpref.index := tmpreg2;
|
||||
case op of
|
||||
{ the code generator doesn't generate update instructions anyway }
|
||||
{ the code generator doesn't generate update instructions anyway, so
|
||||
error out on those instructions }
|
||||
A_LBZ : op := A_LBZX;
|
||||
A_LHZ : op := A_LHZX;
|
||||
A_LWZ : op := A_LWZX;
|
||||
|
@ -96,7 +96,7 @@ type
|
||||
A_RLDICL,
|
||||
A_DIVDU, A_DIVDU_, A_DIVD, A_DIVD_, A_MULLD, A_MULLD_, A_MULHD, A_MULHD_, A_SRAD, A_SLD, A_SRD,
|
||||
A_DIVDUO_, A_DIVDO_,
|
||||
A_LWA, A_LWAU, A_LWAX, A_LWAUX,
|
||||
A_LWA, A_LWAX, A_LWAUX,
|
||||
A_FCFID,
|
||||
A_LDARX, A_STDCX_, A_CNTLZD,
|
||||
A_LVX, A_STVX,
|
||||
|
@ -86,7 +86,7 @@ const
|
||||
'rldicl',
|
||||
'divdu', 'divdu.', 'divd', 'divd.', 'mulld', 'mulld.', 'mulhd', 'mulhd.', 'srad', 'sld', 'srd',
|
||||
'divduo.', 'divdo.',
|
||||
'lwa', '<illegal lwau>', 'lwax', 'lwaux',
|
||||
'lwa', 'lwax', 'lwaux',
|
||||
'fcfid',
|
||||
'ldarx', 'stdcx.', 'cntlzd',
|
||||
'lvx', 'stvx',
|
||||
|
@ -36,7 +36,8 @@ type
|
||||
}
|
||||
function first_abs_real: tnode; override;
|
||||
function first_sqr_real: tnode; override;
|
||||
{ todo: inline trunc/round/frac?/int }
|
||||
|
||||
{ trunc/round/frac?/int can't be inlined? }
|
||||
|
||||
procedure second_abs_real; override;
|
||||
procedure second_sqr_real; override;
|
||||
|
@ -59,176 +59,6 @@ uses
|
||||
cpubase, cpuinfo,
|
||||
ncgutil, cgcpu, rgobj;
|
||||
|
||||
{ helper functions }
|
||||
procedure getmagic_unsigned32(d : dword; out magic_m : dword; out magic_add : boolean; out magic_shift : dword);
|
||||
var
|
||||
p : longint;
|
||||
nc, delta, q1, r1, q2, r2 : dword;
|
||||
|
||||
begin
|
||||
assert(d > 0);
|
||||
|
||||
magic_add := false;
|
||||
nc := - 1 - (-d) mod d;
|
||||
p := 31; { initialize p }
|
||||
q1 := $80000000 div nc; { initialize q1 = 2p/nc }
|
||||
r1 := $80000000 - q1*nc; { initialize r1 = rem(2p,nc) }
|
||||
q2 := $7FFFFFFF div d; { initialize q2 = (2p-1)/d }
|
||||
r2 := $7FFFFFFF - q2*d; { initialize r2 = rem((2p-1),d) }
|
||||
repeat
|
||||
inc(p);
|
||||
if (r1 >= (nc - r1)) then begin
|
||||
q1 := 2 * q1 + 1; { update q1 }
|
||||
r1 := 2*r1 - nc; { update r1 }
|
||||
end else begin
|
||||
q1 := 2*q1; { update q1 }
|
||||
r1 := 2*r1; { update r1 }
|
||||
end;
|
||||
if ((r2 + 1) >= (d - r2)) then begin
|
||||
if (q2 >= $7FFFFFFF) then
|
||||
magic_add := true;
|
||||
q2 := 2*q2 + 1; { update q2 }
|
||||
r2 := 2*r2 + 1 - d; { update r2 }
|
||||
end else begin
|
||||
if (q2 >= $80000000) then
|
||||
magic_add := true;
|
||||
q2 := 2*q2; { update q2 }
|
||||
r2 := 2*r2 + 1; { update r2 }
|
||||
end;
|
||||
delta := d - 1 - r2;
|
||||
until not ((p < 64) and ((q1 < delta) or ((q1 = delta) and (r1 = 0))));
|
||||
magic_m := q2 + 1; { resulting magic number }
|
||||
magic_shift := p - 32; { resulting shift }
|
||||
end;
|
||||
|
||||
procedure getmagic_signed32(d : longint; out magic_m : longint; out magic_s : longint);
|
||||
const
|
||||
two_31 : DWord = high(longint)+1;
|
||||
var
|
||||
p : Longint;
|
||||
ad, anc, delta, q1, r1, q2, r2, t : DWord;
|
||||
|
||||
begin
|
||||
assert((d < -1) or (d > 1));
|
||||
|
||||
ad := abs(d);
|
||||
t := two_31 + (DWord(d) shr 31);
|
||||
anc := t - 1 - t mod ad; { absolute value of nc }
|
||||
p := 31; { initialize p }
|
||||
q1 := two_31 div anc; { initialize q1 = 2p/abs(nc) }
|
||||
r1 := two_31 - q1*anc; { initialize r1 = rem(2p,abs(nc)) }
|
||||
q2 := two_31 div ad; { initialize q2 = 2p/abs(d) }
|
||||
r2 := two_31 - q2*ad; { initialize r2 = rem(2p,abs(d)) }
|
||||
repeat
|
||||
inc(p);
|
||||
q1 := 2*q1; { update q1 = 2p/abs(nc) }
|
||||
r1 := 2*r1; { update r1 = rem(2p/abs(nc)) }
|
||||
if (r1 >= anc) then begin { must be unsigned comparison }
|
||||
inc(q1);
|
||||
dec(r1, anc);
|
||||
end;
|
||||
q2 := 2*q2; { update q2 = 2p/abs(d) }
|
||||
r2 := 2*r2; { update r2 = rem(2p/abs(d)) }
|
||||
if (r2 >= ad) then begin { must be unsigned comparison }
|
||||
inc(q2);
|
||||
dec(r2, ad);
|
||||
end;
|
||||
delta := ad - r2;
|
||||
until not ((q1 < delta) or ((q1 = delta) and (r1 = 0)));
|
||||
magic_m := q2 + 1;
|
||||
if (d < 0) then begin
|
||||
magic_m := -magic_m; { resulting magic number }
|
||||
end;
|
||||
magic_s := p - 32; { resulting shift }
|
||||
end;
|
||||
|
||||
{ helper functions }
|
||||
procedure getmagic_unsigned64(d : qword; out magic_m : qword; out magic_add : boolean; out magic_shift : qword);
|
||||
const
|
||||
two_63 : QWord = $8000000000000000;
|
||||
var
|
||||
p : int64;
|
||||
nc, delta, q1, r1, q2, r2 : qword;
|
||||
|
||||
begin
|
||||
assert(d > 0);
|
||||
|
||||
magic_add := false;
|
||||
nc := - 1 - (-d) mod d;
|
||||
p := 63; { initialize p }
|
||||
q1 := two_63 div nc; { initialize q1 = 2p/nc }
|
||||
r1 := two_63 - q1*nc; { initialize r1 = rem(2p,nc) }
|
||||
q2 := (two_63-1) div d; { initialize q2 = (2p-1)/d }
|
||||
r2 := (two_63-1) - q2*d; { initialize r2 = rem((2p-1),d) }
|
||||
repeat
|
||||
inc(p);
|
||||
if (r1 >= (nc - r1)) then begin
|
||||
q1 := 2 * q1 + 1; { update q1 }
|
||||
r1 := 2*r1 - nc; { update r1 }
|
||||
end else begin
|
||||
q1 := 2*q1; { update q1 }
|
||||
r1 := 2*r1; { update r1 }
|
||||
end;
|
||||
if ((r2 + 1) >= (d - r2)) then begin
|
||||
if (q2 >= (two_63-1)) then
|
||||
magic_add := true;
|
||||
q2 := 2*q2 + 1; { update q2 }
|
||||
r2 := 2*r2 + 1 - d; { update r2 }
|
||||
end else begin
|
||||
if (q2 >= two_63) then
|
||||
magic_add := true;
|
||||
q2 := 2*q2; { update q2 }
|
||||
r2 := 2*r2 + 1; { update r2 }
|
||||
end;
|
||||
delta := d - 1 - r2;
|
||||
until not ((p < 128) and ((q1 < delta) or ((q1 = delta) and (r1 = 0))));
|
||||
magic_m := q2 + 1; { resulting magic number }
|
||||
magic_shift := p - 64; { resulting shift }
|
||||
end;
|
||||
|
||||
procedure getmagic_signed64(d : int64; out magic_m : int64; out magic_s : int64);
|
||||
const
|
||||
two_63 : QWord = $8000000000000000;
|
||||
var
|
||||
p : int64;
|
||||
ad, anc, delta, q1, r1, q2, r2, t : QWord;
|
||||
|
||||
begin
|
||||
assert((d < -1) or (d > 1));
|
||||
|
||||
ad := abs(d);
|
||||
t := two_63 + (QWord(d) shr 63);
|
||||
anc := t - 1 - t mod ad; { absolute value of nc }
|
||||
p := 63; { initialize p }
|
||||
q1 := two_63 div anc; { initialize q1 = 2p/abs(nc) }
|
||||
r1 := two_63 - q1*anc; { initialize r1 = rem(2p,abs(nc)) }
|
||||
q2 := two_63 div ad; { initialize q2 = 2p/abs(d) }
|
||||
r2 := two_63 - q2*ad; { initialize r2 = rem(2p,abs(d)) }
|
||||
repeat
|
||||
inc(p);
|
||||
q1 := 2*q1; { update q1 = 2p/abs(nc) }
|
||||
r1 := 2*r1; { update r1 = rem(2p/abs(nc)) }
|
||||
if (r1 >= anc) then begin { must be unsigned comparison }
|
||||
inc(q1);
|
||||
dec(r1, anc);
|
||||
end;
|
||||
q2 := 2*q2; { update q2 = 2p/abs(d) }
|
||||
r2 := 2*r2; { update r2 = rem(2p/abs(d)) }
|
||||
if (r2 >= ad) then begin { must be unsigned comparison }
|
||||
inc(q2);
|
||||
dec(r2, ad);
|
||||
end;
|
||||
delta := ad - r2;
|
||||
until not ((q1 < delta) or ((q1 = delta) and (r1 = 0)));
|
||||
magic_m := q2 + 1;
|
||||
if (d < 0) then begin
|
||||
magic_m := -magic_m; { resulting magic number }
|
||||
end;
|
||||
magic_s := p - 64; { resulting shift }
|
||||
end;
|
||||
|
||||
|
||||
|
||||
{*****************************************************************************
|
||||
TPPCMODDIVNODE
|
||||
*****************************************************************************}
|
||||
@ -243,8 +73,13 @@ end;
|
||||
procedure tppcmoddivnode.pass_2;
|
||||
const { signed overflow }
|
||||
divops: array[boolean, boolean] of tasmop =
|
||||
((A_DIVDU,A_DIVDU_),(A_DIVD,A_DIVDO_));
|
||||
((A_DIVDU, A_DIVDU_),(A_DIVD, A_DIVDO_));
|
||||
divcgops : array[boolean] of TOpCG = (OP_DIV, OP_IDIV);
|
||||
zerocond: tasmcond = (dirhint: DH_Plus; simple: true; cond:C_NE; cr: RS_CR7);
|
||||
tcgsize2native : array[OS_8..OS_S128] of tcgsize = (
|
||||
OS_64, OS_64, OS_64, OS_64, OS_NO,
|
||||
OS_S64, OS_S64, OS_S64, OS_S64, OS_NO
|
||||
);
|
||||
var
|
||||
power : longint;
|
||||
op : tasmop;
|
||||
@ -254,78 +89,10 @@ var
|
||||
hl : tasmlabel;
|
||||
done: boolean;
|
||||
|
||||
procedure genOrdConstNodeDiv;
|
||||
const
|
||||
negops : array[boolean] of tasmop = (A_NEG, A_NEGO);
|
||||
var
|
||||
magic, shift : int64;
|
||||
u_magic, u_shift : qword;
|
||||
u_add : boolean;
|
||||
|
||||
divreg : tregister;
|
||||
begin
|
||||
if (tordconstnode(right).value = 0) then begin
|
||||
internalerror(2005061701);
|
||||
end else if (tordconstnode(right).value = 1) then begin
|
||||
cg.a_load_reg_reg(exprasmlist, OS_INT, OS_INT, numerator, resultreg);
|
||||
end else if (tordconstnode(right).value = -1) then begin
|
||||
{ note: only in the signed case possible..., may overflow }
|
||||
exprasmlist.concat(taicpu.op_reg_reg(negops[cs_check_overflow in aktlocalswitches], resultreg, numerator));
|
||||
end else if (ispowerof2(tordconstnode(right).value, power)) then begin
|
||||
if (is_signed(right.resulttype.def)) then begin
|
||||
{ From "The PowerPC Compiler Writer's Guide", pg. 52ff }
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_SAR, OS_INT, power,
|
||||
numerator, resultreg);
|
||||
exprasmlist.concat(taicpu.op_reg_reg(A_ADDZE, resultreg, resultreg));
|
||||
end else begin
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, power, numerator, resultreg)
|
||||
end;
|
||||
end else begin
|
||||
{ replace division by multiplication, both implementations }
|
||||
{ from "The PowerPC Compiler Writer's Guide" pg. 53ff }
|
||||
divreg := cg.getintregister(exprasmlist, OS_INT);
|
||||
if (is_signed(right.resulttype.def)) then begin
|
||||
getmagic_signed64(tordconstnode(right).value, magic, shift);
|
||||
{ load magic value }
|
||||
cg.a_load_const_reg(exprasmlist, OS_INT, magic, divreg);
|
||||
{ multiply }
|
||||
exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULHD, resultreg, numerator, divreg));
|
||||
{ add/subtract numerator }
|
||||
if (tordconstnode(right).value > 0) and (magic < 0) then begin
|
||||
cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, numerator, resultreg, resultreg);
|
||||
end else if (tordconstnode(right).value < 0) and (magic > 0) then begin
|
||||
cg.a_op_reg_reg_reg(exprasmlist, OP_SUB, OS_INT, numerator, resultreg, resultreg);
|
||||
end;
|
||||
{ shift shift places to the right (arithmetic) }
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_SAR, OS_INT, shift, resultreg, resultreg);
|
||||
{ extract and add sign bit }
|
||||
if (tordconstnode(right).value >= 0) then begin
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 63, numerator, divreg);
|
||||
end else begin
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 63, resultreg, divreg);
|
||||
end;
|
||||
cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, resultreg, divreg, resultreg);
|
||||
end else begin
|
||||
getmagic_unsigned64(tordconstnode(right).value, u_magic, u_add, u_shift);
|
||||
{ load magic in divreg }
|
||||
cg.a_load_const_reg(exprasmlist, OS_INT, u_magic, divreg);
|
||||
exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULHDU, resultreg, numerator, divreg));
|
||||
if (u_add) then begin
|
||||
cg.a_op_reg_reg_reg(exprasmlist, OP_SUB, OS_INT, resultreg, numerator, divreg);
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 1, divreg, divreg);
|
||||
cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, divreg, resultreg, divreg);
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, u_shift-1, divreg, resultreg);
|
||||
end else begin
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, u_shift, resultreg, resultreg);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
done := true;
|
||||
end;
|
||||
|
||||
procedure genOrdConstNodeMod;
|
||||
var
|
||||
modreg, maskreg, tempreg : tregister;
|
||||
isNegPower : boolean;
|
||||
begin
|
||||
if (tordconstnode(right).value = 0) then begin
|
||||
internalerror(2005061702);
|
||||
@ -348,11 +115,14 @@ var
|
||||
cg.a_op_reg_reg_reg(exprasmlist, OP_AND, OS_INT, modreg, maskreg, maskreg);
|
||||
cg.a_op_reg_reg_reg(exprasmlist, OP_OR, OS_INT, maskreg, tempreg, resultreg);
|
||||
end else begin
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_AND, OS_INT, tordconstnode(right).value-1, numerator, resultreg);
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_AND, OS_INT, tordconstnode(right).value-1, numerator,
|
||||
resultreg);
|
||||
end;
|
||||
end else begin
|
||||
genOrdConstNodeDiv();
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_MUL, OS_INT, tordconstnode(right).value, resultreg, resultreg);
|
||||
cg.a_op_const_reg_reg(exprasmlist, divCgOps[is_signed(right.resulttype.def)], OS_INT,
|
||||
tordconstnode(right).value, numerator, resultreg);
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_MUL, OS_INT, tordconstnode(right).value, resultreg,
|
||||
resultreg);
|
||||
cg.a_op_reg_reg_reg(exprasmlist, OP_SUB, OS_INT, resultreg, numerator, resultreg);
|
||||
end;
|
||||
end;
|
||||
@ -380,20 +150,19 @@ begin
|
||||
resultreg := cg.getintregister(exprasmlist,size);
|
||||
end;
|
||||
done := false;
|
||||
(*
|
||||
if (right.nodetype = ordconstn) then begin
|
||||
|
||||
if (cs_slowoptimize in aktglobalswitches) and (right.nodetype = ordconstn) then begin
|
||||
if (nodetype = divn) then
|
||||
genOrdConstNodeDiv
|
||||
else
|
||||
cg.a_op_const_reg_reg(exprasmlist, divCgOps[is_signed(right.resulttype.def)],
|
||||
size, tordconstnode(right).value, numerator, resultreg)
|
||||
else
|
||||
genOrdConstNodeMod;
|
||||
done := true;
|
||||
end;
|
||||
*)
|
||||
|
||||
if (not done) then begin
|
||||
{ load divider in a register if necessary }
|
||||
location_force_reg(exprasmlist,right.location,
|
||||
def_cgsize(right.resulttype.def),true);
|
||||
location_force_reg(exprasmlist,right.location,def_cgsize(right.resulttype.def),true);
|
||||
if (right.nodetype <> ordconstn) then
|
||||
exprasmlist.concat(taicpu.op_reg_reg_const(A_CMPDI, NR_CR7,
|
||||
right.location.register, 0))
|
||||
@ -403,13 +172,14 @@ begin
|
||||
end;
|
||||
divider := right.location.register;
|
||||
|
||||
{ needs overflow checking, (-maxlongint-1) div (-1) overflows! }
|
||||
op := divops[is_signed(right.resulttype.def),
|
||||
cs_check_overflow in aktlocalswitches];
|
||||
{ select the correct opcode according to the sign of the result, whether we need
|
||||
overflow checking }
|
||||
op := divops[is_signed(right.resulttype.def), cs_check_overflow in aktlocalswitches];
|
||||
exprasmlist.concat(taicpu.op_reg_reg_reg(op, resultreg, numerator,
|
||||
divider));
|
||||
|
||||
if (nodetype = modn) then begin
|
||||
{ multiply with the divisor again, taking care of the correct size }
|
||||
exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULLD,resultreg,
|
||||
divider,resultreg));
|
||||
exprasmlist.concat(taicpu.op_reg_reg_reg(A_SUB,location.register,
|
||||
@ -433,111 +203,10 @@ begin
|
||||
cg.g_overflowcheck(exprasmlist,location,resulttype.def);
|
||||
end;
|
||||
|
||||
(*
|
||||
procedure tppcmoddivnode.pass_2;
|
||||
const
|
||||
// ts: todo, use 32 bit operations if possible (much faster!)
|
||||
{ signed overflow }
|
||||
divops: array[boolean, boolean] of tasmop =
|
||||
((A_DIVDU, A_DIVDUO_), (A_DIVD, A_DIVDO_));
|
||||
zerocond: tasmcond = (dirhint: DH_Plus; simple: true; cond: C_NE; cr: RS_CR1);
|
||||
var
|
||||
power: longint;
|
||||
op: tasmop;
|
||||
numerator,
|
||||
divider,
|
||||
resultreg: tregister;
|
||||
size: Tcgsize;
|
||||
hl: tasmlabel;
|
||||
|
||||
begin
|
||||
secondpass(left);
|
||||
secondpass(right);
|
||||
location_copy(location, left.location);
|
||||
|
||||
{ put numerator in register }
|
||||
size := def_cgsize(left.resulttype.def);
|
||||
location_force_reg(exprasmlist, left.location,
|
||||
size, true);
|
||||
location_copy(location, left.location);
|
||||
numerator := location.register;
|
||||
resultreg := location.register;
|
||||
if (location.loc = LOC_CREGISTER) then
|
||||
begin
|
||||
location.loc := LOC_REGISTER;
|
||||
location.register := cg.getintregister(exprasmlist, size);
|
||||
resultreg := location.register;
|
||||
end;
|
||||
if (nodetype = modn) then
|
||||
begin
|
||||
resultreg := cg.getintregister(exprasmlist, size);
|
||||
end;
|
||||
|
||||
if (nodetype = divn) and
|
||||
(right.nodetype = ordconstn) and
|
||||
ispowerof2(tordconstnode(right).value, power) then
|
||||
begin
|
||||
if (is_signed(right.resulttype.def)) then begin
|
||||
{ From "The PowerPC Compiler Writer's Guide": }
|
||||
{ This code uses the fact that, in the PowerPC architecture, }
|
||||
{ the shift right algebraic instructions set the Carry bit if }
|
||||
{ the source register contains a negative number and one or }
|
||||
{ more 1-bits are shifted out. Otherwise, the carry bit is }
|
||||
{ cleared. The addze instruction corrects the quotient, if }
|
||||
{ necessary, when the dividend is negative. For example, if }
|
||||
{ n = -13, (0xFFFF_FFF3), and k = 2, after executing the srawi }
|
||||
{ instruction, q = -4 (0xFFFF_FFFC) and CA = 1. After executing }
|
||||
{ the addze instruction, q = -3, the correct quotient. }
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_SAR, OS_64, power,
|
||||
numerator, resultreg);
|
||||
exprasmlist.concat(taicpu.op_reg_reg(A_ADDZE, resultreg, resultreg));
|
||||
end else begin
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, power, numerator, resultreg);
|
||||
end;
|
||||
end else begin
|
||||
{ load divider in a register if necessary }
|
||||
location_force_reg(exprasmlist, right.location,
|
||||
def_cgsize(right.resulttype.def), true);
|
||||
if (right.nodetype <> ordconstn) then
|
||||
{$NOTE ts: testme}
|
||||
exprasmlist.concat(taicpu.op_reg_reg_const(A_CMPDI, NR_CR1,
|
||||
right.location.register, 0));
|
||||
divider := right.location.register;
|
||||
|
||||
{ needs overflow checking, (-maxlongint-1) div (-1) overflows! }
|
||||
{ And on PPC, the only way to catch a div-by-0 is by checking }
|
||||
{ the overflow flag (JM) }
|
||||
op := divops[is_signed(right.resulttype.def),
|
||||
cs_check_overflow in aktlocalswitches];
|
||||
exprasmlist.concat(taicpu.op_reg_reg_reg(op, resultreg, numerator,
|
||||
divider));
|
||||
|
||||
if (nodetype = modn) then begin
|
||||
{$NOTE ts:testme}
|
||||
exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULLD, resultreg,
|
||||
divider, resultreg));
|
||||
exprasmlist.concat(taicpu.op_reg_reg_reg(A_SUB, location.register,
|
||||
numerator, resultreg));
|
||||
resultreg := location.register;
|
||||
end;
|
||||
end;
|
||||
{ set result location }
|
||||
location.loc := LOC_REGISTER;
|
||||
location.register := resultreg;
|
||||
if (right.nodetype <> ordconstn) then begin
|
||||
objectlibrary.getjumplabel(hl);
|
||||
exprasmlist.concat(taicpu.op_cond_sym(A_BC, zerocond, hl));
|
||||
cg.a_call_name(exprasmlist, 'FPC_DIVBYZERO');
|
||||
cg.a_label(exprasmlist, hl);
|
||||
end;
|
||||
cg.g_overflowcheck(exprasmlist, location, resulttype.def);
|
||||
end;
|
||||
*)
|
||||
{*****************************************************************************
|
||||
TPPCSHLRSHRNODE
|
||||
*****************************************************************************}
|
||||
|
||||
|
||||
procedure tppcshlshrnode.pass_2;
|
||||
|
||||
var
|
||||
|
Loading…
Reference in New Issue
Block a user