From d088695868b8ef447da474832d2356cd5edc28f5 Mon Sep 17 00:00:00 2001 From: tom_at_work Date: Sat, 5 Nov 2005 17:59:14 +0000 Subject: [PATCH] * ppc64: moved division by constant optimization from nppcmat to cgcpu unit * ppc64: cg now also replaces multiplications/divisions by negative powers of two by shifts (and a negate) * ppc64: replacement of divison by constants now properly check the -O2 compiler switch git-svn-id: trunk@1662 - --- compiler/powerpc64/cgcpu.pas | 237 ++++++++++++++++++-- compiler/powerpc64/cpubase.pas | 2 +- compiler/powerpc64/itcpugas.pas | 2 +- compiler/powerpc64/nppcinl.pas | 3 +- compiler/powerpc64/nppcmat.pas | 377 ++------------------------------ 5 files changed, 248 insertions(+), 373 deletions(-) diff --git a/compiler/powerpc64/cgcpu.pas b/compiler/powerpc64/cgcpu.pas index 24d40df15d..c6c10243ea 100644 --- a/compiler/powerpc64/cgcpu.pas +++ b/compiler/powerpc64/cgcpu.pas @@ -172,6 +172,136 @@ uses symconst, symsym, fmodule, rgobj, tgobj, cpupi, procinfo, paramgr; +{ helper function which calculate "magic" values for replacement of unsigned + division by constant operation by multiplication. See the PowerPC compiler + developer manual for more information } +procedure getmagic_unsignedN(const N : byte; const d : aWord; + out magic_m : aWord; out magic_add : boolean; out magic_shift : byte); +var + p : aInt; + nc, delta, q1, r1, q2, r2, two_N_minus_1 : aWord; +begin + assert(d > 0); + + two_N_minus_1 := aWord(1) shl (N-1); + + magic_add := false; + nc := - 1 - (-d) mod d; + p := N-1; { initialize p } + q1 := two_N_minus_1 div nc; { initialize q1 = 2p/nc } + r1 := two_N_minus_1 - q1*nc; { initialize r1 = rem(2p,nc) } + q2 := (two_N_minus_1-1) div d; { initialize q2 = (2p-1)/d } + r2 := (two_N_minus_1-1) - q2*d; { initialize r2 = rem((2p-1),d) } + repeat + inc(p); + if (r1 >= (nc - r1)) then begin + q1 := 2 * q1 + 1; { update q1 } + r1 := 2*r1 - nc; { update r1 } + end else begin + q1 := 2*q1; { update q1 } + r1 := 2*r1; { update r1 } + end; + if ((r2 + 1) >= (d - r2)) then begin + if (q2 >= (two_N_minus_1-1)) then + magic_add := true; + q2 := 2*q2 + 1; { update q2 } + r2 := 2*r2 + 1 - d; { update r2 } + end else begin + if (q2 >= two_N_minus_1) then + magic_add := true; + q2 := 2*q2; { update q2 } + r2 := 2*r2 + 1; { update r2 } + end; + delta := d - 1 - r2; + until not ((p < (2*N)) and ((q1 < delta) or ((q1 = delta) and (r1 = 0)))); + magic_m := q2 + 1; { resulting magic number } + magic_shift := p - N; { resulting shift } +end; + +{ helper function which calculate "magic" values for replacement of signed + division by constant operation by multiplication. See the PowerPC compiler + developer manual for more information } +procedure getmagic_signedN(const N : byte; const d : aInt; + out magic_m : aInt; out magic_s : aInt); +var + p : aInt; + ad, anc, delta, q1, r1, q2, r2, t : aWord; + two_N_minus_1 : aWord; + +begin + assert((d < -1) or (d > 1)); + + two_N_minus_1 := aWord(1) shl (N-1); + + ad := abs(d); + t := two_N_minus_1 + (aWord(d) shr (N-1)); + anc := t - 1 - t mod ad; { absolute value of nc } + p := (N-1); { initialize p } + q1 := two_N_minus_1 div anc; { initialize q1 = 2p/abs(nc) } + r1 := two_N_minus_1 - q1*anc; { initialize r1 = rem(2p,abs(nc)) } + q2 := two_N_minus_1 div ad; { initialize q2 = 2p/abs(d) } + r2 := two_N_minus_1 - q2*ad; { initialize r2 = rem(2p,abs(d)) } + repeat + inc(p); + q1 := 2*q1; { update q1 = 2p/abs(nc) } + r1 := 2*r1; { update r1 = rem(2p/abs(nc)) } + if (r1 >= anc) then begin { must be unsigned comparison } + inc(q1); + dec(r1, anc); + end; + q2 := 2*q2; { update q2 = 2p/abs(d) } + r2 := 2*r2; { update r2 = rem(2p/abs(d)) } + if (r2 >= ad) then begin { must be unsigned comparison } + inc(q2); + dec(r2, ad); + end; + delta := ad - r2; + until not ((q1 < delta) or ((q1 = delta) and (r1 = 0))); + magic_m := q2 + 1; + if (d < 0) then begin + magic_m := -magic_m; { resulting magic number } + end; + magic_s := p - N; { resulting shift } +end; + +{ finds positive and negative powers of two of the given value, returning the + power and whether it's a negative power or not in addition to the actual result + of the function } +function ispowerof2(value : aInt; out power : byte; out neg : boolean) : boolean; +var + i : longint; + hl : aInt; +begin + neg := false; + { also try to find negative power of two's by negating if the + value is negative. low(aInt) is special because it can not be + negated. Simply return the appropriate values for it } + if (value < 0) then begin + neg := true; + if (value = low(aInt)) then begin + power := sizeof(aInt)*8-1; + result := true; + exit; + end; + value := -value; + end; + + if ((value and (value-1)) <> 0) then begin + result := false; + exit; + end; + hl := 1; + for i := 0 to (sizeof(aInt)*8-1) do begin + if (hl = value) then begin + result := true; + power := i; + exit; + end; + hl := hl shl 1; + end; +end; + + procedure tcgppc.init_register_allocators; begin inherited init_register_allocators; @@ -438,7 +568,9 @@ procedure tcgppc.a_load_const_reg(list: taasmoutput; size: TCGSize; a: aint; end; { R0-safe version of the above (ADDIS doesn't work the same way with R0 as base), without - the return value } + the return value. Unused until further testing shows that it is not really necessary; + loading the upper 32 bits of a value is now done using R12, which does not require + special treatment } procedure load32bitconstantR0(list : taasmoutput; size : TCGSize; a : longint; reg : TRegister); begin @@ -707,9 +839,86 @@ var else list.concat(taicpu.op_reg_reg_const(A_ANDI_, dst, src, word(a))); end; + + procedure do_constant_div(list : taasmoutput; size : TCgSize; a : aint; src, dst : TRegister; + signed : boolean); + const + negops : array[boolean] of tasmop = (A_NEG, A_NEGO); + var + magic, shift : int64; + u_magic : qword; + u_shift : byte; + u_add : boolean; + power : byte; + isNegPower : boolean; + + divreg : tregister; + begin + if (a = 0) then begin + internalerror(2005061701); + end else if (a = 1) then begin + cg.a_load_reg_reg(exprasmlist, OS_INT, OS_INT, src, dst); + end else if (a = -1) then begin + { note: only in the signed case possible..., may overflow } + exprasmlist.concat(taicpu.op_reg_reg(negops[cs_check_overflow in aktlocalswitches], dst, src)); + end else if (ispowerof2(a, power, isNegPower)) then begin + if (signed) then begin + { From "The PowerPC Compiler Writer's Guide", pg. 52ff } + cg.a_op_const_reg_reg(exprasmlist, OP_SAR, OS_INT, power, + src, dst); + exprasmlist.concat(taicpu.op_reg_reg(A_ADDZE, dst, dst)); + if (isNegPower) then + exprasmlist.concat(taicpu.op_reg_reg(A_NEG, dst, dst)); + end else begin + cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, power, src, dst) + end; + end else begin + { replace division by multiplication, both implementations } + { from "The PowerPC Compiler Writer's Guide" pg. 53ff } + divreg := cg.getintregister(exprasmlist, OS_INT); + if (signed) then begin + getmagic_signedN(sizeof(aInt)*8, a, magic, shift); + { load magic value } + cg.a_load_const_reg(exprasmlist, OS_INT, magic, divreg); + { multiply } + exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULHD, dst, src, divreg)); + { add/subtract numerator } + if (a > 0) and (magic < 0) then begin + cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, src, dst, dst); + end else if (a < 0) and (magic > 0) then begin + cg.a_op_reg_reg_reg(exprasmlist, OP_SUB, OS_INT, src, dst, dst); + end; + { shift shift places to the right (arithmetic) } + cg.a_op_const_reg_reg(exprasmlist, OP_SAR, OS_INT, shift, dst, dst); + { extract and add sign bit } + if (a >= 0) then begin + cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 63, src, divreg); + end else begin + cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 63, dst, divreg); + end; + cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, dst, divreg, dst); + end else begin + getmagic_unsignedN(sizeof(aWord)*8, a, u_magic, u_add, u_shift); + { load magic in divreg } + cg.a_load_const_reg(exprasmlist, OS_INT, u_magic, divreg); + exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULHDU, dst, src, divreg)); + if (u_add) then begin + cg.a_op_reg_reg_reg(exprasmlist, OP_SUB, OS_INT, dst, src, divreg); + cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 1, divreg, divreg); + cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, divreg, dst, divreg); + cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, u_shift-1, divreg, dst); + end else begin + cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, u_shift, dst, dst); + end; + end; + end; + end; + var scratchreg: tregister; - shift, shiftmask : longint; + shift : byte; + shiftmask : longint; + isneg : boolean; begin { subtraction is the same as addition with negative constant } @@ -725,13 +934,8 @@ begin useReg := false; case (op) of OP_DIV, OP_IDIV: - { actually, this method should be never called directly with OP_DIV or - OP_IDIV, so just provide basic support. - TODO: move division by constant stuff from nppcmat.pas here } - if (a = 0) then - internalerror(200208103) - else if (a = 1) then - a_load_reg_reg(list, size, size, src, dst) + if (cs_slowoptimize in aktglobalswitches) then + do_constant_div(list, size, a, src, dst, op = OP_IDIV) else usereg := true; OP_IMUL, OP_MUL: @@ -743,9 +947,11 @@ begin list.concat(taicpu.op_reg_reg(A_NEG, dst, dst)) else if (a = 1) then a_load_reg_reg(list, OS_INT, OS_INT, src, dst) - else if ispowerof2(a, shift) then - list.concat(taicpu.op_reg_reg_const(A_SLDI, dst, src, shift)) - else if (a >= low(smallint)) and (a <= high(smallint)) then + else if ispowerof2(a, shift, isneg) then begin + list.concat(taicpu.op_reg_reg_const(A_SLDI, dst, src, shift)); + if (isneg) then + exprasmlist.concat(taicpu.op_reg_reg(A_NEG, dst, dst)); + end else if (a >= low(smallint)) and (a <= high(smallint)) then list.concat(taicpu.op_reg_reg_const(A_MULLI, dst, src, smallint(a))) else @@ -808,7 +1014,6 @@ end; procedure tcgppc.a_op_reg_reg_reg(list: taasmoutput; op: TOpCg; size: tcgsize; src1, src2, dst: tregister); - const op_reg_reg_opcg2asmop32: array[TOpCG] of tasmop = (A_NONE, A_ADD, A_AND, A_DIVWU, A_DIVW, A_MULLW, A_MULLW, A_NEG, A_NOT, A_OR, @@ -816,7 +1021,6 @@ const op_reg_reg_opcg2asmop64: array[TOpCG] of tasmop = (A_NONE, A_ADD, A_AND, A_DIVDU, A_DIVD, A_MULLD, A_MULLD, A_NEG, A_NOT, A_OR, A_SRAD, A_SLD, A_SRD, A_SUB, A_XOR); - begin case op of OP_NEG, OP_NOT: @@ -1559,7 +1763,7 @@ begin least four. If not, add the bytes which are "off" to the base register and adjust the offset accordingly } case op of - A_LD, A_LDU, A_STD, A_STDU, A_LWA, A_LWAU : + A_LD, A_LDU, A_STD, A_STDU, A_LWA : if ((ref.offset mod 4) <> 0) then begin tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE); @@ -1621,7 +1825,8 @@ begin tmpref.base := ref.base; tmpref.index := tmpreg2; case op of - { the code generator doesn't generate update instructions anyway } + { the code generator doesn't generate update instructions anyway, so + error out on those instructions } A_LBZ : op := A_LBZX; A_LHZ : op := A_LHZX; A_LWZ : op := A_LWZX; diff --git a/compiler/powerpc64/cpubase.pas b/compiler/powerpc64/cpubase.pas index 839c2e6931..0fe3932355 100644 --- a/compiler/powerpc64/cpubase.pas +++ b/compiler/powerpc64/cpubase.pas @@ -96,7 +96,7 @@ type A_RLDICL, A_DIVDU, A_DIVDU_, A_DIVD, A_DIVD_, A_MULLD, A_MULLD_, A_MULHD, A_MULHD_, A_SRAD, A_SLD, A_SRD, A_DIVDUO_, A_DIVDO_, - A_LWA, A_LWAU, A_LWAX, A_LWAUX, + A_LWA, A_LWAX, A_LWAUX, A_FCFID, A_LDARX, A_STDCX_, A_CNTLZD, A_LVX, A_STVX, diff --git a/compiler/powerpc64/itcpugas.pas b/compiler/powerpc64/itcpugas.pas index 810224d845..73dc51bccd 100644 --- a/compiler/powerpc64/itcpugas.pas +++ b/compiler/powerpc64/itcpugas.pas @@ -86,7 +86,7 @@ const 'rldicl', 'divdu', 'divdu.', 'divd', 'divd.', 'mulld', 'mulld.', 'mulhd', 'mulhd.', 'srad', 'sld', 'srd', 'divduo.', 'divdo.', - 'lwa', '', 'lwax', 'lwaux', + 'lwa', 'lwax', 'lwaux', 'fcfid', 'ldarx', 'stdcx.', 'cntlzd', 'lvx', 'stvx', diff --git a/compiler/powerpc64/nppcinl.pas b/compiler/powerpc64/nppcinl.pas index 938d58cfa1..1f2809141b 100644 --- a/compiler/powerpc64/nppcinl.pas +++ b/compiler/powerpc64/nppcinl.pas @@ -36,7 +36,8 @@ type } function first_abs_real: tnode; override; function first_sqr_real: tnode; override; - { todo: inline trunc/round/frac?/int } + + { trunc/round/frac?/int can't be inlined? } procedure second_abs_real; override; procedure second_sqr_real; override; diff --git a/compiler/powerpc64/nppcmat.pas b/compiler/powerpc64/nppcmat.pas index b7c372ef01..6da508c230 100644 --- a/compiler/powerpc64/nppcmat.pas +++ b/compiler/powerpc64/nppcmat.pas @@ -59,176 +59,6 @@ uses cpubase, cpuinfo, ncgutil, cgcpu, rgobj; -{ helper functions } -procedure getmagic_unsigned32(d : dword; out magic_m : dword; out magic_add : boolean; out magic_shift : dword); -var - p : longint; - nc, delta, q1, r1, q2, r2 : dword; - -begin - assert(d > 0); - - magic_add := false; - nc := - 1 - (-d) mod d; - p := 31; { initialize p } - q1 := $80000000 div nc; { initialize q1 = 2p/nc } - r1 := $80000000 - q1*nc; { initialize r1 = rem(2p,nc) } - q2 := $7FFFFFFF div d; { initialize q2 = (2p-1)/d } - r2 := $7FFFFFFF - q2*d; { initialize r2 = rem((2p-1),d) } - repeat - inc(p); - if (r1 >= (nc - r1)) then begin - q1 := 2 * q1 + 1; { update q1 } - r1 := 2*r1 - nc; { update r1 } - end else begin - q1 := 2*q1; { update q1 } - r1 := 2*r1; { update r1 } - end; - if ((r2 + 1) >= (d - r2)) then begin - if (q2 >= $7FFFFFFF) then - magic_add := true; - q2 := 2*q2 + 1; { update q2 } - r2 := 2*r2 + 1 - d; { update r2 } - end else begin - if (q2 >= $80000000) then - magic_add := true; - q2 := 2*q2; { update q2 } - r2 := 2*r2 + 1; { update r2 } - end; - delta := d - 1 - r2; - until not ((p < 64) and ((q1 < delta) or ((q1 = delta) and (r1 = 0)))); - magic_m := q2 + 1; { resulting magic number } - magic_shift := p - 32; { resulting shift } -end; - -procedure getmagic_signed32(d : longint; out magic_m : longint; out magic_s : longint); -const - two_31 : DWord = high(longint)+1; -var - p : Longint; - ad, anc, delta, q1, r1, q2, r2, t : DWord; - -begin - assert((d < -1) or (d > 1)); - - ad := abs(d); - t := two_31 + (DWord(d) shr 31); - anc := t - 1 - t mod ad; { absolute value of nc } - p := 31; { initialize p } - q1 := two_31 div anc; { initialize q1 = 2p/abs(nc) } - r1 := two_31 - q1*anc; { initialize r1 = rem(2p,abs(nc)) } - q2 := two_31 div ad; { initialize q2 = 2p/abs(d) } - r2 := two_31 - q2*ad; { initialize r2 = rem(2p,abs(d)) } - repeat - inc(p); - q1 := 2*q1; { update q1 = 2p/abs(nc) } - r1 := 2*r1; { update r1 = rem(2p/abs(nc)) } - if (r1 >= anc) then begin { must be unsigned comparison } - inc(q1); - dec(r1, anc); - end; - q2 := 2*q2; { update q2 = 2p/abs(d) } - r2 := 2*r2; { update r2 = rem(2p/abs(d)) } - if (r2 >= ad) then begin { must be unsigned comparison } - inc(q2); - dec(r2, ad); - end; - delta := ad - r2; - until not ((q1 < delta) or ((q1 = delta) and (r1 = 0))); - magic_m := q2 + 1; - if (d < 0) then begin - magic_m := -magic_m; { resulting magic number } - end; - magic_s := p - 32; { resulting shift } -end; - -{ helper functions } -procedure getmagic_unsigned64(d : qword; out magic_m : qword; out magic_add : boolean; out magic_shift : qword); -const - two_63 : QWord = $8000000000000000; -var - p : int64; - nc, delta, q1, r1, q2, r2 : qword; - -begin - assert(d > 0); - - magic_add := false; - nc := - 1 - (-d) mod d; - p := 63; { initialize p } - q1 := two_63 div nc; { initialize q1 = 2p/nc } - r1 := two_63 - q1*nc; { initialize r1 = rem(2p,nc) } - q2 := (two_63-1) div d; { initialize q2 = (2p-1)/d } - r2 := (two_63-1) - q2*d; { initialize r2 = rem((2p-1),d) } - repeat - inc(p); - if (r1 >= (nc - r1)) then begin - q1 := 2 * q1 + 1; { update q1 } - r1 := 2*r1 - nc; { update r1 } - end else begin - q1 := 2*q1; { update q1 } - r1 := 2*r1; { update r1 } - end; - if ((r2 + 1) >= (d - r2)) then begin - if (q2 >= (two_63-1)) then - magic_add := true; - q2 := 2*q2 + 1; { update q2 } - r2 := 2*r2 + 1 - d; { update r2 } - end else begin - if (q2 >= two_63) then - magic_add := true; - q2 := 2*q2; { update q2 } - r2 := 2*r2 + 1; { update r2 } - end; - delta := d - 1 - r2; - until not ((p < 128) and ((q1 < delta) or ((q1 = delta) and (r1 = 0)))); - magic_m := q2 + 1; { resulting magic number } - magic_shift := p - 64; { resulting shift } -end; - -procedure getmagic_signed64(d : int64; out magic_m : int64; out magic_s : int64); -const - two_63 : QWord = $8000000000000000; -var - p : int64; - ad, anc, delta, q1, r1, q2, r2, t : QWord; - -begin - assert((d < -1) or (d > 1)); - - ad := abs(d); - t := two_63 + (QWord(d) shr 63); - anc := t - 1 - t mod ad; { absolute value of nc } - p := 63; { initialize p } - q1 := two_63 div anc; { initialize q1 = 2p/abs(nc) } - r1 := two_63 - q1*anc; { initialize r1 = rem(2p,abs(nc)) } - q2 := two_63 div ad; { initialize q2 = 2p/abs(d) } - r2 := two_63 - q2*ad; { initialize r2 = rem(2p,abs(d)) } - repeat - inc(p); - q1 := 2*q1; { update q1 = 2p/abs(nc) } - r1 := 2*r1; { update r1 = rem(2p/abs(nc)) } - if (r1 >= anc) then begin { must be unsigned comparison } - inc(q1); - dec(r1, anc); - end; - q2 := 2*q2; { update q2 = 2p/abs(d) } - r2 := 2*r2; { update r2 = rem(2p/abs(d)) } - if (r2 >= ad) then begin { must be unsigned comparison } - inc(q2); - dec(r2, ad); - end; - delta := ad - r2; - until not ((q1 < delta) or ((q1 = delta) and (r1 = 0))); - magic_m := q2 + 1; - if (d < 0) then begin - magic_m := -magic_m; { resulting magic number } - end; - magic_s := p - 64; { resulting shift } -end; - - - {***************************************************************************** TPPCMODDIVNODE *****************************************************************************} @@ -243,8 +73,13 @@ end; procedure tppcmoddivnode.pass_2; const { signed overflow } divops: array[boolean, boolean] of tasmop = - ((A_DIVDU,A_DIVDU_),(A_DIVD,A_DIVDO_)); + ((A_DIVDU, A_DIVDU_),(A_DIVD, A_DIVDO_)); + divcgops : array[boolean] of TOpCG = (OP_DIV, OP_IDIV); zerocond: tasmcond = (dirhint: DH_Plus; simple: true; cond:C_NE; cr: RS_CR7); + tcgsize2native : array[OS_8..OS_S128] of tcgsize = ( + OS_64, OS_64, OS_64, OS_64, OS_NO, + OS_S64, OS_S64, OS_S64, OS_S64, OS_NO + ); var power : longint; op : tasmop; @@ -254,78 +89,10 @@ var hl : tasmlabel; done: boolean; - procedure genOrdConstNodeDiv; - const - negops : array[boolean] of tasmop = (A_NEG, A_NEGO); - var - magic, shift : int64; - u_magic, u_shift : qword; - u_add : boolean; - - divreg : tregister; - begin - if (tordconstnode(right).value = 0) then begin - internalerror(2005061701); - end else if (tordconstnode(right).value = 1) then begin - cg.a_load_reg_reg(exprasmlist, OS_INT, OS_INT, numerator, resultreg); - end else if (tordconstnode(right).value = -1) then begin - { note: only in the signed case possible..., may overflow } - exprasmlist.concat(taicpu.op_reg_reg(negops[cs_check_overflow in aktlocalswitches], resultreg, numerator)); - end else if (ispowerof2(tordconstnode(right).value, power)) then begin - if (is_signed(right.resulttype.def)) then begin - { From "The PowerPC Compiler Writer's Guide", pg. 52ff } - cg.a_op_const_reg_reg(exprasmlist, OP_SAR, OS_INT, power, - numerator, resultreg); - exprasmlist.concat(taicpu.op_reg_reg(A_ADDZE, resultreg, resultreg)); - end else begin - cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, power, numerator, resultreg) - end; - end else begin - { replace division by multiplication, both implementations } - { from "The PowerPC Compiler Writer's Guide" pg. 53ff } - divreg := cg.getintregister(exprasmlist, OS_INT); - if (is_signed(right.resulttype.def)) then begin - getmagic_signed64(tordconstnode(right).value, magic, shift); - { load magic value } - cg.a_load_const_reg(exprasmlist, OS_INT, magic, divreg); - { multiply } - exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULHD, resultreg, numerator, divreg)); - { add/subtract numerator } - if (tordconstnode(right).value > 0) and (magic < 0) then begin - cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, numerator, resultreg, resultreg); - end else if (tordconstnode(right).value < 0) and (magic > 0) then begin - cg.a_op_reg_reg_reg(exprasmlist, OP_SUB, OS_INT, numerator, resultreg, resultreg); - end; - { shift shift places to the right (arithmetic) } - cg.a_op_const_reg_reg(exprasmlist, OP_SAR, OS_INT, shift, resultreg, resultreg); - { extract and add sign bit } - if (tordconstnode(right).value >= 0) then begin - cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 63, numerator, divreg); - end else begin - cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 63, resultreg, divreg); - end; - cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, resultreg, divreg, resultreg); - end else begin - getmagic_unsigned64(tordconstnode(right).value, u_magic, u_add, u_shift); - { load magic in divreg } - cg.a_load_const_reg(exprasmlist, OS_INT, u_magic, divreg); - exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULHDU, resultreg, numerator, divreg)); - if (u_add) then begin - cg.a_op_reg_reg_reg(exprasmlist, OP_SUB, OS_INT, resultreg, numerator, divreg); - cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 1, divreg, divreg); - cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, divreg, resultreg, divreg); - cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, u_shift-1, divreg, resultreg); - end else begin - cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, u_shift, resultreg, resultreg); - end; - end; - end; - done := true; - end; - procedure genOrdConstNodeMod; var modreg, maskreg, tempreg : tregister; + isNegPower : boolean; begin if (tordconstnode(right).value = 0) then begin internalerror(2005061702); @@ -348,11 +115,14 @@ var cg.a_op_reg_reg_reg(exprasmlist, OP_AND, OS_INT, modreg, maskreg, maskreg); cg.a_op_reg_reg_reg(exprasmlist, OP_OR, OS_INT, maskreg, tempreg, resultreg); end else begin - cg.a_op_const_reg_reg(exprasmlist, OP_AND, OS_INT, tordconstnode(right).value-1, numerator, resultreg); + cg.a_op_const_reg_reg(exprasmlist, OP_AND, OS_INT, tordconstnode(right).value-1, numerator, + resultreg); end; end else begin - genOrdConstNodeDiv(); - cg.a_op_const_reg_reg(exprasmlist, OP_MUL, OS_INT, tordconstnode(right).value, resultreg, resultreg); + cg.a_op_const_reg_reg(exprasmlist, divCgOps[is_signed(right.resulttype.def)], OS_INT, + tordconstnode(right).value, numerator, resultreg); + cg.a_op_const_reg_reg(exprasmlist, OP_MUL, OS_INT, tordconstnode(right).value, resultreg, + resultreg); cg.a_op_reg_reg_reg(exprasmlist, OP_SUB, OS_INT, resultreg, numerator, resultreg); end; end; @@ -380,20 +150,19 @@ begin resultreg := cg.getintregister(exprasmlist,size); end; done := false; -(* - if (right.nodetype = ordconstn) then begin + + if (cs_slowoptimize in aktglobalswitches) and (right.nodetype = ordconstn) then begin if (nodetype = divn) then - genOrdConstNodeDiv - else + cg.a_op_const_reg_reg(exprasmlist, divCgOps[is_signed(right.resulttype.def)], + size, tordconstnode(right).value, numerator, resultreg) + else genOrdConstNodeMod; done := true; end; -*) if (not done) then begin { load divider in a register if necessary } - location_force_reg(exprasmlist,right.location, - def_cgsize(right.resulttype.def),true); + location_force_reg(exprasmlist,right.location,def_cgsize(right.resulttype.def),true); if (right.nodetype <> ordconstn) then exprasmlist.concat(taicpu.op_reg_reg_const(A_CMPDI, NR_CR7, right.location.register, 0)) @@ -403,13 +172,14 @@ begin end; divider := right.location.register; - { needs overflow checking, (-maxlongint-1) div (-1) overflows! } - op := divops[is_signed(right.resulttype.def), - cs_check_overflow in aktlocalswitches]; + { select the correct opcode according to the sign of the result, whether we need + overflow checking } + op := divops[is_signed(right.resulttype.def), cs_check_overflow in aktlocalswitches]; exprasmlist.concat(taicpu.op_reg_reg_reg(op, resultreg, numerator, divider)); if (nodetype = modn) then begin + { multiply with the divisor again, taking care of the correct size } exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULLD,resultreg, divider,resultreg)); exprasmlist.concat(taicpu.op_reg_reg_reg(A_SUB,location.register, @@ -433,111 +203,10 @@ begin cg.g_overflowcheck(exprasmlist,location,resulttype.def); end; -(* -procedure tppcmoddivnode.pass_2; -const - // ts: todo, use 32 bit operations if possible (much faster!) - { signed overflow } - divops: array[boolean, boolean] of tasmop = - ((A_DIVDU, A_DIVDUO_), (A_DIVD, A_DIVDO_)); - zerocond: tasmcond = (dirhint: DH_Plus; simple: true; cond: C_NE; cr: RS_CR1); -var - power: longint; - op: tasmop; - numerator, - divider, - resultreg: tregister; - size: Tcgsize; - hl: tasmlabel; - -begin - secondpass(left); - secondpass(right); - location_copy(location, left.location); - - { put numerator in register } - size := def_cgsize(left.resulttype.def); - location_force_reg(exprasmlist, left.location, - size, true); - location_copy(location, left.location); - numerator := location.register; - resultreg := location.register; - if (location.loc = LOC_CREGISTER) then - begin - location.loc := LOC_REGISTER; - location.register := cg.getintregister(exprasmlist, size); - resultreg := location.register; - end; - if (nodetype = modn) then - begin - resultreg := cg.getintregister(exprasmlist, size); - end; - - if (nodetype = divn) and - (right.nodetype = ordconstn) and - ispowerof2(tordconstnode(right).value, power) then - begin - if (is_signed(right.resulttype.def)) then begin - { From "The PowerPC Compiler Writer's Guide": } - { This code uses the fact that, in the PowerPC architecture, } - { the shift right algebraic instructions set the Carry bit if } - { the source register contains a negative number and one or } - { more 1-bits are shifted out. Otherwise, the carry bit is } - { cleared. The addze instruction corrects the quotient, if } - { necessary, when the dividend is negative. For example, if } - { n = -13, (0xFFFF_FFF3), and k = 2, after executing the srawi } - { instruction, q = -4 (0xFFFF_FFFC) and CA = 1. After executing } - { the addze instruction, q = -3, the correct quotient. } - cg.a_op_const_reg_reg(exprasmlist, OP_SAR, OS_64, power, - numerator, resultreg); - exprasmlist.concat(taicpu.op_reg_reg(A_ADDZE, resultreg, resultreg)); - end else begin - cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, power, numerator, resultreg); - end; - end else begin - { load divider in a register if necessary } - location_force_reg(exprasmlist, right.location, - def_cgsize(right.resulttype.def), true); - if (right.nodetype <> ordconstn) then -{$NOTE ts: testme} - exprasmlist.concat(taicpu.op_reg_reg_const(A_CMPDI, NR_CR1, - right.location.register, 0)); - divider := right.location.register; - - { needs overflow checking, (-maxlongint-1) div (-1) overflows! } - { And on PPC, the only way to catch a div-by-0 is by checking } - { the overflow flag (JM) } - op := divops[is_signed(right.resulttype.def), - cs_check_overflow in aktlocalswitches]; - exprasmlist.concat(taicpu.op_reg_reg_reg(op, resultreg, numerator, - divider)); - - if (nodetype = modn) then begin -{$NOTE ts:testme} - exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULLD, resultreg, - divider, resultreg)); - exprasmlist.concat(taicpu.op_reg_reg_reg(A_SUB, location.register, - numerator, resultreg)); - resultreg := location.register; - end; - end; - { set result location } - location.loc := LOC_REGISTER; - location.register := resultreg; - if (right.nodetype <> ordconstn) then begin - objectlibrary.getjumplabel(hl); - exprasmlist.concat(taicpu.op_cond_sym(A_BC, zerocond, hl)); - cg.a_call_name(exprasmlist, 'FPC_DIVBYZERO'); - cg.a_label(exprasmlist, hl); - end; - cg.g_overflowcheck(exprasmlist, location, resulttype.def); -end; -*) {***************************************************************************** TPPCSHLRSHRNODE *****************************************************************************} - procedure tppcshlshrnode.pass_2; var