mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-12 21:29:43 +02:00
* a_op_const_reg_reg optimizations
* added some more 64 bit PPC opcodes * removed last mwpascal references * added replacement of division/modulo by constant by multiplications and shifts for 64 bit * general cleanup git-svn-id: trunk@1648 -
This commit is contained in:
parent
4bd32a686d
commit
e4a61f4af1
@ -157,16 +157,9 @@ type
|
||||
end;
|
||||
|
||||
const
|
||||
TOpCG2AsmOpConstLo: array[topcg] of TAsmOp = (A_NONE, A_ADDI, A_ANDI_,
|
||||
A_DIVWU,
|
||||
A_DIVW, A_MULLW, A_MULLW, A_NONE, A_NONE, A_ORI,
|
||||
A_SRAWI, A_SLWI, A_SRWI, A_SUBI, A_XORI);
|
||||
TOpCG2AsmOpConstHi: array[topcg] of TAsmOp = (A_NONE, A_ADDIS, A_ANDIS_,
|
||||
A_DIVWU, A_DIVW, A_MULLW, A_MULLW, A_NONE, A_NONE,
|
||||
A_ORIS, A_NONE, A_NONE, A_NONE, A_SUBIS, A_XORIS);
|
||||
|
||||
TShiftOpCG2AsmOpConst32 : array[OP_SAR..OP_SHR] of TAsmOp = (A_SRAWI, A_SLWI, A_SRWI);
|
||||
TShiftOpCG2AsmOpConst64 : array[OP_SAR..OP_SHR] of TAsmOp = (A_SRADI, A_SLDI, A_SRDI);
|
||||
TShiftOpCG2AsmOpConst : array[boolean, OP_SAR..OP_SHR] of TAsmOp = (
|
||||
(A_SRAWI, A_SLWI, A_SRWI), (A_SRADI, A_SLDI, A_SRDI)
|
||||
);
|
||||
|
||||
TOpCmp2AsmCond: array[topcmp] of TAsmCondFlag = (C_NONE, C_EQ, C_GT,
|
||||
C_LT, C_GE, C_LE, C_NE, C_LE, C_LT, C_GE, C_GT);
|
||||
@ -248,10 +241,13 @@ begin
|
||||
location^.register)
|
||||
else
|
||||
{ load non-integral sized memory location into register. This
|
||||
memory location be 1-sizeleft byte sized.
|
||||
Always assume that this memory area is properly aligned, eg. start
|
||||
loading the larger quantities for "odd" quantities first }
|
||||
memory location be 1-sizeleft byte sized.
|
||||
Always assume that this memory area is properly aligned, eg. start
|
||||
loading the larger quantities for "odd" quantities first }
|
||||
case sizeleft of
|
||||
1,2,4,8 :
|
||||
a_load_ref_reg(list, int_cgsize(sizeleft), location^.size, tmpref,
|
||||
location^.register);
|
||||
3 : begin
|
||||
a_reg_alloc(list, NR_R12);
|
||||
a_load_ref_reg(list, OS_16, location^.size, tmpref,
|
||||
@ -259,7 +255,7 @@ begin
|
||||
inc(tmpref.offset, tcgsize2size[OS_16]);
|
||||
a_load_ref_reg(list, OS_8, location^.size, tmpref,
|
||||
location^.register);
|
||||
list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R12, 8, 40));
|
||||
list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R12, 8, 40));
|
||||
a_reg_dealloc(list, NR_R12);
|
||||
end;
|
||||
5 : begin
|
||||
@ -267,8 +263,8 @@ begin
|
||||
a_load_ref_reg(list, OS_32, location^.size, tmpref, NR_R12);
|
||||
inc(tmpref.offset, tcgsize2size[OS_32]);
|
||||
a_load_ref_reg(list, OS_8, location^.size, tmpref, location^.register);
|
||||
list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R12, 8, 24));
|
||||
a_reg_dealloc(list, NR_R12);
|
||||
list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R12, 8, 24));
|
||||
a_reg_dealloc(list, NR_R12);
|
||||
end;
|
||||
6 : begin
|
||||
a_reg_alloc(list, NR_R12);
|
||||
@ -286,20 +282,16 @@ begin
|
||||
a_load_ref_reg(list, OS_16, location^.size, tmpref, NR_R0);
|
||||
inc(tmpref.offset, tcgsize2size[OS_16]);
|
||||
a_load_ref_reg(list, OS_8, location^.size, tmpref, location^.register);
|
||||
list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, NR_R0, NR_R12, 16, 16));
|
||||
list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R0, 8, 8));
|
||||
list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, NR_R0, NR_R12, 16, 16));
|
||||
list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R0, 8, 8));
|
||||
a_reg_dealloc(list, NR_R0);
|
||||
a_reg_dealloc(list, NR_R12);
|
||||
end;
|
||||
1,2,4,8 :
|
||||
a_load_ref_reg(list, int_cgsize(sizeleft), location^.size, tmpref,
|
||||
location^.register);
|
||||
else
|
||||
else
|
||||
{ still > 8 bytes to load, so load data single register now }
|
||||
a_load_ref_reg(list, location^.size, location^.size, tmpref,
|
||||
location^.register);
|
||||
end;
|
||||
// a_load_ref_reg(list, location^.size, location^.size, tmpref,
|
||||
// location^.register);
|
||||
end;
|
||||
LOC_REFERENCE:
|
||||
begin
|
||||
@ -368,12 +360,8 @@ begin
|
||||
AT_FUNCTION)));
|
||||
if (addNOP) then
|
||||
list.concat(taicpu.op_none(A_NOP));
|
||||
{
|
||||
the compiler does not properly set this flag anymore in pass 1, and
|
||||
for now we only need it after pass 2 (I hope) (JM)
|
||||
if not(pi_do_call in current_procinfo.flags) then
|
||||
internalerror(2003060703);
|
||||
}
|
||||
{ the compiler does not properly set this flag anymore in pass 1, and
|
||||
for now we only need it after pass 2 (I hope) (JM) }
|
||||
include(current_procinfo.flags, pi_do_call);
|
||||
end;
|
||||
|
||||
@ -503,9 +491,9 @@ begin
|
||||
32 bits should contain -1
|
||||
- loading the lower 32 bits resulted in 0 in the upper 32 bits, and the upper
|
||||
32 bits should contain 0 }
|
||||
load32bitconstantR0(list, size, hi(a), NR_R0);
|
||||
load32bitconstant(list, size, hi(a), NR_R12);
|
||||
{ combine both registers }
|
||||
list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, reg, NR_R0, 32, 0));
|
||||
list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, reg, NR_R12, 32, 0));
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
@ -550,7 +538,7 @@ const
|
||||
((A_LBZ, A_LBZU), (A_LBZX, A_LBZUX)),
|
||||
((A_LHA, A_LHAU), (A_LHAX, A_LHAUX)),
|
||||
{ there's no load-word-arithmetic-indexed with update, simulate it in code :( }
|
||||
((A_LWA, A_LWAU), (A_LWAX, A_LWAUX)),
|
||||
((A_LWA, A_NOP), (A_LWAX, A_LWAUX)),
|
||||
((A_LD, A_LDU), (A_LDX, A_LDUX))
|
||||
);
|
||||
var
|
||||
@ -563,12 +551,12 @@ begin
|
||||
ref2 := ref;
|
||||
fixref(list, ref2, tosize);
|
||||
{ the caller is expected to have adjusted the reference already
|
||||
in this case }
|
||||
in this case }
|
||||
if (TCGSize2Size[fromsize] >= TCGSize2Size[tosize]) then
|
||||
fromsize := tosize;
|
||||
op := loadinstr[fromsize, ref2.index <> NR_NO, false];
|
||||
{ there is no LWAU instruction, simulate using ADDI and LWA }
|
||||
if (op = A_LWAU) then begin
|
||||
if (op = A_NOP) then begin
|
||||
list.concat(taicpu.op_reg_reg_const(A_ADDI, reg, reg, ref2.offset));
|
||||
ref2.offset := 0;
|
||||
op := A_LWA;
|
||||
@ -605,8 +593,8 @@ var
|
||||
begin
|
||||
op := movemap[fromsize, tosize];
|
||||
case op of
|
||||
A_MR, A_EXTSB, A_EXTSH, A_EXTSW : instr := taicpu.op_reg_reg(op, reg2, reg1);
|
||||
A_RLDICL : instr := taicpu.op_reg_reg_const_const(A_RLDICL, reg2, reg1, 0, (8-tcgsize2size[fromsize])*8);
|
||||
A_MR, A_EXTSB, A_EXTSH, A_EXTSW : instr := taicpu.op_reg_reg(op, reg2, reg1);
|
||||
A_RLDICL : instr := taicpu.op_reg_reg_const_const(A_RLDICL, reg2, reg1, 0, (8-tcgsize2size[fromsize])*8);
|
||||
else
|
||||
internalerror(2002090901);
|
||||
end;
|
||||
@ -614,8 +602,8 @@ begin
|
||||
rg[R_INTREGISTER].add_move_instruction(instr);
|
||||
end;
|
||||
|
||||
procedure tcgppc.a_loadfpu_reg_reg(list: taasmoutput; size: tcgsize; reg1, reg2:
|
||||
tregister);
|
||||
procedure tcgppc.a_loadfpu_reg_reg(list: taasmoutput; size: tcgsize;
|
||||
reg1, reg2: tregister);
|
||||
var
|
||||
instr: taicpu;
|
||||
begin
|
||||
@ -624,8 +612,8 @@ begin
|
||||
rg[R_FPUREGISTER].add_move_instruction(instr);
|
||||
end;
|
||||
|
||||
procedure tcgppc.a_loadfpu_ref_reg(list: taasmoutput; size: tcgsize; const ref:
|
||||
treference; reg: tregister);
|
||||
procedure tcgppc.a_loadfpu_ref_reg(list: taasmoutput; size: tcgsize;
|
||||
const ref: treference; reg: tregister);
|
||||
const
|
||||
FpuLoadInstr: array[OS_F32..OS_F64, boolean, boolean] of TAsmOp =
|
||||
{ indexed? updating?}
|
||||
@ -654,7 +642,6 @@ end;
|
||||
|
||||
procedure tcgppc.a_loadfpu_reg_ref(list: taasmoutput; size: tcgsize; reg:
|
||||
tregister; const ref: treference);
|
||||
|
||||
const
|
||||
FpuStoreInstr: array[OS_F32..OS_F64, boolean, boolean] of TAsmOp =
|
||||
{ indexed? updating? }
|
||||
@ -688,139 +675,131 @@ end;
|
||||
procedure tcgppc.a_op_const_reg_reg(list: taasmoutput; op: TOpCg;
|
||||
size: tcgsize; a: aint; src, dst: tregister);
|
||||
var
|
||||
l1, l2: longint;
|
||||
oplo, ophi: tasmop;
|
||||
scratchreg: tregister;
|
||||
useReg : boolean;
|
||||
shiftmask : longint;
|
||||
|
||||
procedure do_lo_hi;
|
||||
procedure do_lo_hi(loOp, hiOp : TAsmOp);
|
||||
begin
|
||||
{ Optimization for logical ops (excluding AND), trying to do this as efficiently
|
||||
as possible by only generating code for the affected halfwords. Note that all
|
||||
the instructions handled here must have "X op 0 = X" for every halfword. }
|
||||
usereg := false;
|
||||
if (size in [OS_64, OS_S64]) then begin
|
||||
{ ts: use register method for 64 bit consts. Sloooooow }
|
||||
if (aword(a) > high(dword)) then begin
|
||||
usereg := true;
|
||||
end else if (size in [OS_32, OS_S32]) then begin
|
||||
list.concat(taicpu.op_reg_reg_const(oplo, dst, src, word(a)));
|
||||
list.concat(taicpu.op_reg_reg_const(ophi, dst, dst, word(a shr 16)));
|
||||
end else begin
|
||||
list.concat(taicpu.op_reg_reg_const(oplo, dst, src, word(a)));
|
||||
if (word(a) <> 0) then begin
|
||||
list.concat(taicpu.op_reg_reg_const(loOp, dst, src, word(a)));
|
||||
if (word(a shr 16) <> 0) then
|
||||
list.concat(taicpu.op_reg_reg_const(hiOp, dst, dst, word(a shr 16)));
|
||||
end else if (word(a shr 16) <> 0) then
|
||||
list.concat(taicpu.op_reg_reg_const(hiOp, dst, src, word(a shr 16)));
|
||||
end;
|
||||
end;
|
||||
|
||||
procedure do_lo_hi_and;
|
||||
begin
|
||||
{ optimization logical and with immediate: only use "andi." for 16 bit
|
||||
ands, otherwise use register method. Doing this for 32 bit constants
|
||||
would not give any advantage to the register method (via useReg := true),
|
||||
requiring a scratch register and three instructions. }
|
||||
usereg := false;
|
||||
if (aword(a) > high(word)) then
|
||||
usereg := true
|
||||
else
|
||||
list.concat(taicpu.op_reg_reg_const(A_ANDI_, dst, src, word(a)));
|
||||
end;
|
||||
var
|
||||
scratchreg: tregister;
|
||||
shift, shiftmask : longint;
|
||||
|
||||
begin
|
||||
{ subtraction is the same as addition with negative constant }
|
||||
if op = OP_SUB then begin
|
||||
a_op_const_reg_reg(list, OP_ADD, size, -a, src, dst);
|
||||
exit;
|
||||
end;
|
||||
ophi := TOpCG2AsmOpConstHi[op];
|
||||
oplo := TOpCG2AsmOpConstLo[op];
|
||||
{ peephole optimizations for AND, OR, XOR - can't this be done at
|
||||
some higher level, independent of architecture? }
|
||||
if (op in [OP_AND, OP_OR, OP_XOR]) then begin
|
||||
if (a = 0) then begin
|
||||
if op = OP_AND then
|
||||
list.concat(taicpu.op_reg_const(A_LI, dst, 0))
|
||||
else
|
||||
a_load_reg_reg(list, size, size, src, dst);
|
||||
exit;
|
||||
end else if (a = -1) then begin
|
||||
case op of
|
||||
OP_OR:
|
||||
list.concat(taicpu.op_reg_const(A_LI, dst, -1));
|
||||
OP_XOR:
|
||||
list.concat(taicpu.op_reg_reg(A_NOT, dst, src));
|
||||
OP_AND:
|
||||
a_load_reg_reg(list, size, size, src, dst);
|
||||
end;
|
||||
exit;
|
||||
end;
|
||||
{ optimization for add }
|
||||
end else if (op = OP_ADD) then
|
||||
if a = 0 then begin
|
||||
a_load_reg_reg(list, size, size, src, dst);
|
||||
exit;
|
||||
end else if (a >= low(smallint)) and (a <= high(smallint)) then begin
|
||||
list.concat(taicpu.op_reg_reg_const(A_ADDI, dst, src, smallint(a)));
|
||||
exit;
|
||||
end;
|
||||
{ This case includes some peephole optimizations for the various operations,
|
||||
(e.g. AND, OR, XOR, ..) - can't this be done at some higher level,
|
||||
independent of architecture? }
|
||||
|
||||
{ otherwise, the instructions we can generate depend on the operation }
|
||||
{ assume that we do not need a scratch register for the operation }
|
||||
useReg := false;
|
||||
case op of
|
||||
case (op) of
|
||||
OP_DIV, OP_IDIV:
|
||||
{ actually, this method should be never called directly with OP_DIV or
|
||||
OP_IDIV, so just provide basic support.
|
||||
TODO: move division by constant stuff from nppcmat.pas here }
|
||||
if (a = 0) then
|
||||
internalerror(200208103)
|
||||
else if (a = 1) then begin
|
||||
a_load_reg_reg(list, OS_INT, OS_INT, src, dst);
|
||||
exit
|
||||
end else if false {and ispowerof2(a, l1)} then begin
|
||||
internalerror(200208103);
|
||||
case op of
|
||||
OP_DIV: begin
|
||||
list.concat(taicpu.op_reg_reg_const(A_SRDI, dst, src, l1));
|
||||
end;
|
||||
OP_IDIV:
|
||||
begin
|
||||
list.concat(taicpu.op_reg_reg_const(A_SRADI, dst, src, l1));
|
||||
list.concat(taicpu.op_reg_reg(A_ADDZE, dst, dst));
|
||||
end;
|
||||
end;
|
||||
exit;
|
||||
end else
|
||||
usereg := true;
|
||||
else if (a = 1) then
|
||||
a_load_reg_reg(list, size, size, src, dst)
|
||||
else
|
||||
usereg := true;
|
||||
OP_IMUL, OP_MUL:
|
||||
if (a = 0) then begin
|
||||
list.concat(taicpu.op_reg_const(A_LI, dst, 0));
|
||||
exit
|
||||
end else if (a = -1) then begin
|
||||
list.concat(taicpu.op_reg_reg(A_NEG, dst, dst));
|
||||
end else if (a = 1) then begin
|
||||
a_load_reg_reg(list, OS_INT, OS_INT, src, dst);
|
||||
exit
|
||||
end else if ispowerof2(a, l1) then
|
||||
list.concat(taicpu.op_reg_reg_const(A_SLDI, dst, src, l1))
|
||||
{ idea: factorize constant multiplicands and use adds/shifts with few factors;
|
||||
however, even a 64 bit multiply is already quite fast on PPC64 }
|
||||
if (a = 0) then
|
||||
a_load_const_reg(list, size, 0, dst)
|
||||
else if (a = -1) then
|
||||
list.concat(taicpu.op_reg_reg(A_NEG, dst, dst))
|
||||
else if (a = 1) then
|
||||
a_load_reg_reg(list, OS_INT, OS_INT, src, dst)
|
||||
else if ispowerof2(a, shift) then
|
||||
list.concat(taicpu.op_reg_reg_const(A_SLDI, dst, src, shift))
|
||||
else if (a >= low(smallint)) and (a <= high(smallint)) then
|
||||
list.concat(taicpu.op_reg_reg_const(A_MULLI, dst, src,
|
||||
smallint(a)))
|
||||
else
|
||||
usereg := true;
|
||||
OP_ADD:
|
||||
{$todo ts:optimize}
|
||||
useReg := true;
|
||||
if (a = 0) then
|
||||
a_load_reg_reg(list, size, size, src, dst)
|
||||
else if (a >= low(smallint)) and (a <= high(smallint)) then
|
||||
list.concat(taicpu.op_reg_reg_const(A_ADDI, dst, src, smallint(a)))
|
||||
else
|
||||
useReg := true;
|
||||
OP_OR:
|
||||
do_lo_hi;
|
||||
if (a = 0) then
|
||||
a_load_reg_reg(list, size, size, src, dst)
|
||||
else if (a = -1) then
|
||||
a_load_const_reg(list, size, -1, dst)
|
||||
else
|
||||
do_lo_hi(A_ORI, A_ORIS);
|
||||
OP_AND:
|
||||
useReg := true;
|
||||
if (a = 0) then
|
||||
a_load_const_reg(list, size, 0, dst)
|
||||
else if (a = -1) then
|
||||
a_load_reg_reg(list, size, size, src, dst)
|
||||
else
|
||||
do_lo_hi_and;
|
||||
OP_XOR:
|
||||
do_lo_hi;
|
||||
if (a = 0) then
|
||||
a_load_reg_reg(list, size, size, src, dst)
|
||||
else if (a = -1) then
|
||||
list.concat(taicpu.op_reg_reg(A_NOT, dst, src))
|
||||
else
|
||||
do_lo_hi(A_XORI, A_XORIS);
|
||||
OP_SHL, OP_SHR, OP_SAR:
|
||||
begin
|
||||
{$note ts: cleanup todo, fix remaining bugs}
|
||||
if (size in [OS_64, OS_S64]) then begin
|
||||
if (a and 63) <> 0 then
|
||||
list.concat(taicpu.op_reg_reg_const(
|
||||
TShiftOpCG2AsmOpConst64[Op], dst, src, a and 63))
|
||||
else
|
||||
a_load_reg_reg(list, size, size, src, dst);
|
||||
if (a shr 6) <> 0 then
|
||||
internalError(68991);
|
||||
end else begin
|
||||
if (a and 31) <> 0 then
|
||||
list.concat(taicpu.op_reg_reg_const(
|
||||
TShiftOpCG2AsmOpConst32[Op], dst, src, a and 31))
|
||||
else
|
||||
a_load_reg_reg(list, size, size, src, dst);
|
||||
if (a shr 5) <> 0 then
|
||||
internalError(68991);
|
||||
end;
|
||||
if (size in [OS_64, OS_S64]) then
|
||||
shift := 6
|
||||
else
|
||||
shift := 5;
|
||||
|
||||
shiftmask := (1 shl shift)-1;
|
||||
if (a and shiftmask) <> 0 then
|
||||
list.concat(taicpu.op_reg_reg_const(
|
||||
TShiftOpCG2AsmOpConst[size in [OS_64, OS_S64], op], dst, src, a and shiftmask))
|
||||
else
|
||||
a_load_reg_reg(list, size, size, src, dst);
|
||||
if ((a shr shift) <> 0) then
|
||||
internalError(68991);
|
||||
end
|
||||
else
|
||||
internalerror(200109091);
|
||||
else
|
||||
internalerror(200109091);
|
||||
end;
|
||||
{ if all else failed, load the constant in a register and then }
|
||||
{ perform the operation }
|
||||
if useReg then begin
|
||||
{ if all else failed, load the constant in a register and then
|
||||
perform the operation }
|
||||
if (useReg) then begin
|
||||
scratchreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
|
||||
a_load_const_reg(list, size, a, scratchreg);
|
||||
a_op_reg_reg_reg(list, op, size, scratchreg, src, dst);
|
||||
@ -843,35 +822,29 @@ begin
|
||||
OP_NEG, OP_NOT:
|
||||
begin
|
||||
list.concat(taicpu.op_reg_reg(op_reg_reg_opcg2asmop64[op], dst, src1));
|
||||
if (op = OP_NOT) and
|
||||
not (size in [OS_64, OS_S64]) then
|
||||
if (op = OP_NOT) and not (size in [OS_64, OS_S64]) then
|
||||
{ zero/sign extend result again, fromsize is not important here }
|
||||
a_load_reg_reg(list, OS_S64, size, dst, dst)
|
||||
end;
|
||||
else
|
||||
{$NOTE ts:testme}
|
||||
if (size in [OS_64, OS_S64]) then begin
|
||||
list.concat(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop64[op], dst, src2,
|
||||
src1));
|
||||
end else begin
|
||||
list.concat(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop32[op], dst, src2,
|
||||
src1));
|
||||
end;
|
||||
else
|
||||
if (size in [OS_64, OS_S64]) then begin
|
||||
list.concat(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop64[op], dst, src2,
|
||||
src1));
|
||||
end else begin
|
||||
list.concat(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop32[op], dst, src2,
|
||||
src1));
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
{*************** compare instructructions ****************}
|
||||
|
||||
procedure tcgppc.a_cmp_const_reg_label(list: taasmoutput; size: tcgsize; cmp_op:
|
||||
topcmp; a: aint; reg: tregister;
|
||||
l: tasmlabel);
|
||||
|
||||
procedure tcgppc.a_cmp_const_reg_label(list: taasmoutput; size: tcgsize;
|
||||
cmp_op: topcmp; a: aint; reg: tregister; l: tasmlabel);
|
||||
var
|
||||
scratch_register: TRegister;
|
||||
signed: boolean;
|
||||
|
||||
begin
|
||||
{ todo: use 32 bit compares? }
|
||||
signed := cmp_op in [OC_GT, OC_LT, OC_GTE, OC_LTE];
|
||||
{ in the following case, we generate more efficient code when }
|
||||
{ signed is true }
|
||||
@ -897,13 +870,10 @@ begin
|
||||
a_jmp(list, A_BC, TOpCmp2AsmCond[cmp_op], 0, l);
|
||||
end;
|
||||
|
||||
procedure tcgppc.a_cmp_reg_reg_label(list: taasmoutput; size: tcgsize; cmp_op:
|
||||
topcmp;
|
||||
reg1, reg2: tregister; l: tasmlabel);
|
||||
|
||||
procedure tcgppc.a_cmp_reg_reg_label(list: taasmoutput; size: tcgsize;
|
||||
cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel);
|
||||
var
|
||||
op: tasmop;
|
||||
|
||||
begin
|
||||
if cmp_op in [OC_GT, OC_LT, OC_GTE, OC_LTE] then
|
||||
if (size in [OS_64, OS_S64]) then
|
||||
@ -953,11 +923,9 @@ end;
|
||||
|
||||
procedure tcgppc.g_flags2reg(list: taasmoutput; size: TCgSize; const f:
|
||||
TResFlags; reg: TRegister);
|
||||
|
||||
var
|
||||
testbit: byte;
|
||||
bitvalue: boolean;
|
||||
|
||||
begin
|
||||
{ get the bit to extract from the conditional register + its requested value (0 or 1) }
|
||||
testbit := ((f.cr - RS_CR0) * 4);
|
||||
@ -1375,7 +1343,7 @@ begin
|
||||
list.concat(taicpu.op_reg_reg_const(A_SUBI, dst.base, dst.base, 8));
|
||||
countreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
|
||||
a_load_const_reg(list, OS_32, count, countreg);
|
||||
{ explicitely allocate R_0 since it can be used safely here
|
||||
{ explicitely allocate F0 since it can be used safely here
|
||||
(for holding date that's being copied) }
|
||||
a_reg_alloc(list, NR_F0);
|
||||
objectlibrary.getjumplabel(lab);
|
||||
|
@ -94,7 +94,7 @@ type
|
||||
A_SRDI, A_SRADI,
|
||||
A_SLDI,
|
||||
A_RLDICL,
|
||||
A_DIVDU, A_DIVD, A_MULLD, A_SRAD, A_SLD, A_SRD,
|
||||
A_DIVDU, A_DIVDU_, A_DIVD, A_DIVD_, A_MULLD, A_MULLD_, A_MULHD, A_MULHD_, A_SRAD, A_SLD, A_SRD,
|
||||
A_DIVDUO_, A_DIVDO_,
|
||||
A_LWA, A_LWAU, A_LWAX, A_LWAUX,
|
||||
A_FCFID,
|
||||
|
@ -49,9 +49,7 @@ const
|
||||
{ the difference to stdcall is only the name mangling }
|
||||
pocall_cdecl,
|
||||
{ the difference to stdcall is only the name mangling }
|
||||
pocall_cppdecl,
|
||||
{ pass all const records by reference }
|
||||
pocall_mwpascal
|
||||
pocall_cppdecl
|
||||
];
|
||||
|
||||
processorsstr: array[tprocessors] of string[10] = ('',
|
||||
|
@ -408,10 +408,11 @@ begin
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
curintreg := nextintreg;
|
||||
curfloatreg := nextfloatreg;
|
||||
curmmreg := nextmmreg;
|
||||
cur_stack_offset := stack_offset;
|
||||
cur_stack_offset := stack_offset;
|
||||
result := stack_offset;
|
||||
end;
|
||||
|
||||
|
@ -66,7 +66,7 @@ var
|
||||
begin
|
||||
if not (po_assembler in procdef.procoptions) then begin
|
||||
{ align the stack properly }
|
||||
ofs := align(maxpushedparasize + LinkageAreaSizeELF, ELF_STACK_ALIGN);
|
||||
ofs := align(maxpushedparasize + LinkageAreaSizeELF, 8);
|
||||
|
||||
{ the ABI specification says that it is required to always allocate space for 8 * 8 bytes
|
||||
for registers R3-R10 and stack header if there's a stack frame, but GCC doesn't do that,
|
||||
@ -74,7 +74,6 @@ begin
|
||||
// if (ofs < 112) then begin
|
||||
// ofs := 112;
|
||||
// end;
|
||||
|
||||
tg.setfirsttemp(ofs);
|
||||
end else begin
|
||||
locals := 0;
|
||||
|
@ -84,7 +84,7 @@ const
|
||||
'srdi', 'sradi',
|
||||
'sldi',
|
||||
'rldicl',
|
||||
'divdu', 'divd', 'mulld', 'srad', 'sld', 'srd',
|
||||
'divdu', 'divdu.', 'divd', 'divd.', 'mulld', 'mulld.', 'mulhd', 'mulhd.', 'srad', 'sld', 'srd',
|
||||
'divduo.', 'divdo.',
|
||||
'lwa', '<illegal lwau>', 'lwax', 'lwaux',
|
||||
'fcfid',
|
||||
|
@ -163,7 +163,6 @@ begin
|
||||
end
|
||||
end;
|
||||
|
||||
// Todo: ts: allow emiting word compares...
|
||||
procedure tppcaddnode.emit_compare(unsigned: boolean);
|
||||
var
|
||||
op: tasmop;
|
||||
@ -175,8 +174,7 @@ begin
|
||||
swapleftright;
|
||||
// can we use an immediate, or do we have to load the
|
||||
// constant in a register first?
|
||||
if (right.location.loc = LOC_CONSTANT) then
|
||||
begin
|
||||
if (right.location.loc = LOC_CONSTANT) then begin
|
||||
if (nodetype in [equaln, unequaln]) then
|
||||
if (unsigned and
|
||||
(aword(right.location.value) > high(word))) or
|
||||
@ -193,15 +191,13 @@ begin
|
||||
(aint(right.location.value) >= low(smallint)) and
|
||||
(aint(right.location.value) <= high(smallint))) then
|
||||
useconst := true
|
||||
else
|
||||
begin
|
||||
else begin
|
||||
useconst := false;
|
||||
tmpreg := cg.getintregister(exprasmlist, OS_INT);
|
||||
cg.a_load_const_reg(exprasmlist, OS_INT,
|
||||
right.location.value, tmpreg);
|
||||
end
|
||||
end
|
||||
else
|
||||
end else
|
||||
useconst := false;
|
||||
location.loc := LOC_FLAGS;
|
||||
location.resflags := getresflags;
|
||||
@ -215,15 +211,13 @@ begin
|
||||
else
|
||||
op := A_CMPLD;
|
||||
|
||||
if (right.location.loc = LOC_CONSTANT) then
|
||||
begin
|
||||
if (right.location.loc = LOC_CONSTANT) then begin
|
||||
if useconst then
|
||||
exprasmlist.concat(taicpu.op_reg_const(op, left.location.register,
|
||||
longint(right.location.value)))
|
||||
else
|
||||
exprasmlist.concat(taicpu.op_reg_reg(op, left.location.register, tmpreg));
|
||||
end
|
||||
else
|
||||
end else
|
||||
exprasmlist.concat(taicpu.op_reg_reg(op,
|
||||
left.location.register, right.location.register));
|
||||
end;
|
||||
@ -237,7 +231,7 @@ var
|
||||
cgop: TOpCg;
|
||||
cgsize: TCgSize;
|
||||
cmpop,
|
||||
isjump: boolean;
|
||||
isjump: boolean;
|
||||
otl, ofl: tasmlabel;
|
||||
begin
|
||||
{ calculate the operator which is more difficult }
|
||||
@ -525,7 +519,6 @@ begin
|
||||
cg.a_op_reg_reg(exprasmlist, OP_SHL, OS_64,
|
||||
right.location.register, tmpreg);
|
||||
if left.location.loc <> LOC_CONSTANT then begin
|
||||
|
||||
cg.a_op_reg_reg_reg(exprasmlist, OP_OR, OS_64, tmpreg,
|
||||
left.location.register, location.register)
|
||||
end else begin
|
||||
|
@ -36,6 +36,8 @@ type
|
||||
}
|
||||
function first_abs_real: tnode; override;
|
||||
function first_sqr_real: tnode; override;
|
||||
{ todo: inline trunc/round/frac?/int }
|
||||
|
||||
procedure second_abs_real; override;
|
||||
procedure second_sqr_real; override;
|
||||
procedure second_prefetch; override;
|
||||
|
@ -59,6 +59,176 @@ uses
|
||||
cpubase, cpuinfo,
|
||||
ncgutil, cgcpu, rgobj;
|
||||
|
||||
{ helper functions }
|
||||
procedure getmagic_unsigned32(d : dword; out magic_m : dword; out magic_add : boolean; out magic_shift : dword);
|
||||
var
|
||||
p : longint;
|
||||
nc, delta, q1, r1, q2, r2 : dword;
|
||||
|
||||
begin
|
||||
assert(d > 0);
|
||||
|
||||
magic_add := false;
|
||||
nc := - 1 - (-d) mod d;
|
||||
p := 31; { initialize p }
|
||||
q1 := $80000000 div nc; { initialize q1 = 2p/nc }
|
||||
r1 := $80000000 - q1*nc; { initialize r1 = rem(2p,nc) }
|
||||
q2 := $7FFFFFFF div d; { initialize q2 = (2p-1)/d }
|
||||
r2 := $7FFFFFFF - q2*d; { initialize r2 = rem((2p-1),d) }
|
||||
repeat
|
||||
inc(p);
|
||||
if (r1 >= (nc - r1)) then begin
|
||||
q1 := 2 * q1 + 1; { update q1 }
|
||||
r1 := 2*r1 - nc; { update r1 }
|
||||
end else begin
|
||||
q1 := 2*q1; { update q1 }
|
||||
r1 := 2*r1; { update r1 }
|
||||
end;
|
||||
if ((r2 + 1) >= (d - r2)) then begin
|
||||
if (q2 >= $7FFFFFFF) then
|
||||
magic_add := true;
|
||||
q2 := 2*q2 + 1; { update q2 }
|
||||
r2 := 2*r2 + 1 - d; { update r2 }
|
||||
end else begin
|
||||
if (q2 >= $80000000) then
|
||||
magic_add := true;
|
||||
q2 := 2*q2; { update q2 }
|
||||
r2 := 2*r2 + 1; { update r2 }
|
||||
end;
|
||||
delta := d - 1 - r2;
|
||||
until not ((p < 64) and ((q1 < delta) or ((q1 = delta) and (r1 = 0))));
|
||||
magic_m := q2 + 1; { resulting magic number }
|
||||
magic_shift := p - 32; { resulting shift }
|
||||
end;
|
||||
|
||||
procedure getmagic_signed32(d : longint; out magic_m : longint; out magic_s : longint);
|
||||
const
|
||||
two_31 : DWord = high(longint)+1;
|
||||
var
|
||||
p : Longint;
|
||||
ad, anc, delta, q1, r1, q2, r2, t : DWord;
|
||||
|
||||
begin
|
||||
assert((d < -1) or (d > 1));
|
||||
|
||||
ad := abs(d);
|
||||
t := two_31 + (DWord(d) shr 31);
|
||||
anc := t - 1 - t mod ad; { absolute value of nc }
|
||||
p := 31; { initialize p }
|
||||
q1 := two_31 div anc; { initialize q1 = 2p/abs(nc) }
|
||||
r1 := two_31 - q1*anc; { initialize r1 = rem(2p,abs(nc)) }
|
||||
q2 := two_31 div ad; { initialize q2 = 2p/abs(d) }
|
||||
r2 := two_31 - q2*ad; { initialize r2 = rem(2p,abs(d)) }
|
||||
repeat
|
||||
inc(p);
|
||||
q1 := 2*q1; { update q1 = 2p/abs(nc) }
|
||||
r1 := 2*r1; { update r1 = rem(2p/abs(nc)) }
|
||||
if (r1 >= anc) then begin { must be unsigned comparison }
|
||||
inc(q1);
|
||||
dec(r1, anc);
|
||||
end;
|
||||
q2 := 2*q2; { update q2 = 2p/abs(d) }
|
||||
r2 := 2*r2; { update r2 = rem(2p/abs(d)) }
|
||||
if (r2 >= ad) then begin { must be unsigned comparison }
|
||||
inc(q2);
|
||||
dec(r2, ad);
|
||||
end;
|
||||
delta := ad - r2;
|
||||
until not ((q1 < delta) or ((q1 = delta) and (r1 = 0)));
|
||||
magic_m := q2 + 1;
|
||||
if (d < 0) then begin
|
||||
magic_m := -magic_m; { resulting magic number }
|
||||
end;
|
||||
magic_s := p - 32; { resulting shift }
|
||||
end;
|
||||
|
||||
{ helper functions }
|
||||
procedure getmagic_unsigned64(d : qword; out magic_m : qword; out magic_add : boolean; out magic_shift : qword);
|
||||
const
|
||||
two_63 : QWord = $8000000000000000;
|
||||
var
|
||||
p : int64;
|
||||
nc, delta, q1, r1, q2, r2 : qword;
|
||||
|
||||
begin
|
||||
assert(d > 0);
|
||||
|
||||
magic_add := false;
|
||||
nc := - 1 - (-d) mod d;
|
||||
p := 63; { initialize p }
|
||||
q1 := two_63 div nc; { initialize q1 = 2p/nc }
|
||||
r1 := two_63 - q1*nc; { initialize r1 = rem(2p,nc) }
|
||||
q2 := (two_63-1) div d; { initialize q2 = (2p-1)/d }
|
||||
r2 := (two_63-1) - q2*d; { initialize r2 = rem((2p-1),d) }
|
||||
repeat
|
||||
inc(p);
|
||||
if (r1 >= (nc - r1)) then begin
|
||||
q1 := 2 * q1 + 1; { update q1 }
|
||||
r1 := 2*r1 - nc; { update r1 }
|
||||
end else begin
|
||||
q1 := 2*q1; { update q1 }
|
||||
r1 := 2*r1; { update r1 }
|
||||
end;
|
||||
if ((r2 + 1) >= (d - r2)) then begin
|
||||
if (q2 >= (two_63-1)) then
|
||||
magic_add := true;
|
||||
q2 := 2*q2 + 1; { update q2 }
|
||||
r2 := 2*r2 + 1 - d; { update r2 }
|
||||
end else begin
|
||||
if (q2 >= two_63) then
|
||||
magic_add := true;
|
||||
q2 := 2*q2; { update q2 }
|
||||
r2 := 2*r2 + 1; { update r2 }
|
||||
end;
|
||||
delta := d - 1 - r2;
|
||||
until not ((p < 128) and ((q1 < delta) or ((q1 = delta) and (r1 = 0))));
|
||||
magic_m := q2 + 1; { resulting magic number }
|
||||
magic_shift := p - 64; { resulting shift }
|
||||
end;
|
||||
|
||||
procedure getmagic_signed64(d : int64; out magic_m : int64; out magic_s : int64);
|
||||
const
|
||||
two_63 : QWord = $8000000000000000;
|
||||
var
|
||||
p : int64;
|
||||
ad, anc, delta, q1, r1, q2, r2, t : QWord;
|
||||
|
||||
begin
|
||||
assert((d < -1) or (d > 1));
|
||||
|
||||
ad := abs(d);
|
||||
t := two_63 + (QWord(d) shr 63);
|
||||
anc := t - 1 - t mod ad; { absolute value of nc }
|
||||
p := 63; { initialize p }
|
||||
q1 := two_63 div anc; { initialize q1 = 2p/abs(nc) }
|
||||
r1 := two_63 - q1*anc; { initialize r1 = rem(2p,abs(nc)) }
|
||||
q2 := two_63 div ad; { initialize q2 = 2p/abs(d) }
|
||||
r2 := two_63 - q2*ad; { initialize r2 = rem(2p,abs(d)) }
|
||||
repeat
|
||||
inc(p);
|
||||
q1 := 2*q1; { update q1 = 2p/abs(nc) }
|
||||
r1 := 2*r1; { update r1 = rem(2p/abs(nc)) }
|
||||
if (r1 >= anc) then begin { must be unsigned comparison }
|
||||
inc(q1);
|
||||
dec(r1, anc);
|
||||
end;
|
||||
q2 := 2*q2; { update q2 = 2p/abs(d) }
|
||||
r2 := 2*r2; { update r2 = rem(2p/abs(d)) }
|
||||
if (r2 >= ad) then begin { must be unsigned comparison }
|
||||
inc(q2);
|
||||
dec(r2, ad);
|
||||
end;
|
||||
delta := ad - r2;
|
||||
until not ((q1 < delta) or ((q1 = delta) and (r1 = 0)));
|
||||
magic_m := q2 + 1;
|
||||
if (d < 0) then begin
|
||||
magic_m := -magic_m; { resulting magic number }
|
||||
end;
|
||||
magic_s := p - 64; { resulting shift }
|
||||
end;
|
||||
|
||||
|
||||
|
||||
{*****************************************************************************
|
||||
TPPCMODDIVNODE
|
||||
*****************************************************************************}
|
||||
@ -70,6 +240,200 @@ begin
|
||||
include(current_procinfo.flags, pi_do_call);
|
||||
end;
|
||||
|
||||
procedure tppcmoddivnode.pass_2;
|
||||
const { signed overflow }
|
||||
divops: array[boolean, boolean] of tasmop =
|
||||
((A_DIVDU,A_DIVDU_),(A_DIVD,A_DIVDO_));
|
||||
zerocond: tasmcond = (dirhint: DH_Plus; simple: true; cond:C_NE; cr: RS_CR7);
|
||||
var
|
||||
power : longint;
|
||||
op : tasmop;
|
||||
numerator, divider,
|
||||
resultreg : tregister;
|
||||
size : TCgSize;
|
||||
hl : tasmlabel;
|
||||
done: boolean;
|
||||
|
||||
procedure genOrdConstNodeDiv;
|
||||
const
|
||||
negops : array[boolean] of tasmop = (A_NEG, A_NEGO);
|
||||
var
|
||||
magic, shift : int64;
|
||||
u_magic, u_shift : qword;
|
||||
u_add : boolean;
|
||||
|
||||
divreg : tregister;
|
||||
begin
|
||||
if (tordconstnode(right).value = 0) then begin
|
||||
internalerror(2005061701);
|
||||
end else if (tordconstnode(right).value = 1) then begin
|
||||
cg.a_load_reg_reg(exprasmlist, OS_INT, OS_INT, numerator, resultreg);
|
||||
end else if (tordconstnode(right).value = -1) then begin
|
||||
{ note: only in the signed case possible..., may overflow }
|
||||
exprasmlist.concat(taicpu.op_reg_reg(negops[cs_check_overflow in aktlocalswitches], resultreg, numerator));
|
||||
end else if (ispowerof2(tordconstnode(right).value, power)) then begin
|
||||
if (is_signed(right.resulttype.def)) then begin
|
||||
{ From "The PowerPC Compiler Writer's Guide", pg. 52ff }
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_SAR, OS_INT, power,
|
||||
numerator, resultreg);
|
||||
exprasmlist.concat(taicpu.op_reg_reg(A_ADDZE, resultreg, resultreg));
|
||||
end else begin
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, power, numerator, resultreg)
|
||||
end;
|
||||
end else begin
|
||||
{ replace division by multiplication, both implementations }
|
||||
{ from "The PowerPC Compiler Writer's Guide" pg. 53ff }
|
||||
divreg := cg.getintregister(exprasmlist, OS_INT);
|
||||
if (is_signed(right.resulttype.def)) then begin
|
||||
getmagic_signed64(tordconstnode(right).value, magic, shift);
|
||||
{ load magic value }
|
||||
cg.a_load_const_reg(exprasmlist, OS_INT, magic, divreg);
|
||||
{ multiply }
|
||||
exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULHD, resultreg, numerator, divreg));
|
||||
{ add/subtract numerator }
|
||||
if (tordconstnode(right).value > 0) and (magic < 0) then begin
|
||||
cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, numerator, resultreg, resultreg);
|
||||
end else if (tordconstnode(right).value < 0) and (magic > 0) then begin
|
||||
cg.a_op_reg_reg_reg(exprasmlist, OP_SUB, OS_INT, numerator, resultreg, resultreg);
|
||||
end;
|
||||
{ shift shift places to the right (arithmetic) }
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_SAR, OS_INT, shift, resultreg, resultreg);
|
||||
{ extract and add sign bit }
|
||||
if (tordconstnode(right).value >= 0) then begin
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 63, numerator, divreg);
|
||||
end else begin
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 63, resultreg, divreg);
|
||||
end;
|
||||
cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, resultreg, divreg, resultreg);
|
||||
end else begin
|
||||
getmagic_unsigned64(tordconstnode(right).value, u_magic, u_add, u_shift);
|
||||
{ load magic in divreg }
|
||||
cg.a_load_const_reg(exprasmlist, OS_INT, u_magic, divreg);
|
||||
exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULHDU, resultreg, numerator, divreg));
|
||||
if (u_add) then begin
|
||||
cg.a_op_reg_reg_reg(exprasmlist, OP_SUB, OS_INT, resultreg, numerator, divreg);
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 1, divreg, divreg);
|
||||
cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, divreg, resultreg, divreg);
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, u_shift-1, divreg, resultreg);
|
||||
end else begin
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, u_shift, resultreg, resultreg);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
done := true;
|
||||
end;
|
||||
|
||||
procedure genOrdConstNodeMod;
|
||||
var
|
||||
modreg, maskreg, tempreg : tregister;
|
||||
begin
|
||||
if (tordconstnode(right).value = 0) then begin
|
||||
internalerror(2005061702);
|
||||
end else if (abs(tordconstnode(right).value) = 1) then begin
|
||||
{ x mod +/-1 is always zero }
|
||||
cg.a_load_const_reg(exprasmlist, OS_INT, 0, resultreg);
|
||||
end else if (ispowerof2(tordconstnode(right).value, power)) then begin
|
||||
if (is_signed(right.resulttype.def)) then begin
|
||||
tempreg := cg.getintregister(exprasmlist, OS_INT);
|
||||
maskreg := cg.getintregister(exprasmlist, OS_INT);
|
||||
modreg := cg.getintregister(exprasmlist, OS_INT);
|
||||
|
||||
cg.a_load_const_reg(exprasmlist, OS_INT, abs(tordconstnode(right).value)-1, modreg);
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_SAR, OS_INT, 63, numerator, maskreg);
|
||||
cg.a_op_reg_reg_reg(exprasmlist, OP_AND, OS_INT, numerator, modreg, tempreg);
|
||||
|
||||
exprasmlist.concat(taicpu.op_reg_reg_reg(A_ANDC, maskreg, maskreg, modreg));
|
||||
exprasmlist.concat(taicpu.op_reg_reg_const(A_SUBFIC, modreg, tempreg, 0));
|
||||
exprasmlist.concat(taicpu.op_reg_reg_reg(A_SUBFE, modreg, modreg, modreg));
|
||||
cg.a_op_reg_reg_reg(exprasmlist, OP_AND, OS_INT, modreg, maskreg, maskreg);
|
||||
cg.a_op_reg_reg_reg(exprasmlist, OP_OR, OS_INT, maskreg, tempreg, resultreg);
|
||||
end else begin
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_AND, OS_INT, tordconstnode(right).value-1, numerator, resultreg);
|
||||
end;
|
||||
end else begin
|
||||
genOrdConstNodeDiv();
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_MUL, OS_INT, tordconstnode(right).value, resultreg, resultreg);
|
||||
cg.a_op_reg_reg_reg(exprasmlist, OP_SUB, OS_INT, resultreg, numerator, resultreg);
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
begin
|
||||
secondpass(left);
|
||||
secondpass(right);
|
||||
location_copy(location,left.location);
|
||||
|
||||
{ put numerator in register }
|
||||
size:=def_cgsize(left.resulttype.def);
|
||||
location_force_reg(exprasmlist,left.location,
|
||||
size,true);
|
||||
location_copy(location,left.location);
|
||||
numerator := location.register;
|
||||
resultreg := location.register;
|
||||
if (location.loc = LOC_CREGISTER) then begin
|
||||
location.loc := LOC_REGISTER;
|
||||
location.register := cg.getintregister(exprasmlist,size);
|
||||
resultreg := location.register;
|
||||
end else if (nodetype = modn) or (right.nodetype = ordconstn) then begin
|
||||
{ for a modulus op, and for const nodes we need the result register
|
||||
to be an extra register }
|
||||
resultreg := cg.getintregister(exprasmlist,size);
|
||||
end;
|
||||
done := false;
|
||||
(*
|
||||
if (right.nodetype = ordconstn) then begin
|
||||
if (nodetype = divn) then
|
||||
genOrdConstNodeDiv
|
||||
else
|
||||
genOrdConstNodeMod;
|
||||
done := true;
|
||||
end;
|
||||
*)
|
||||
|
||||
if (not done) then begin
|
||||
{ load divider in a register if necessary }
|
||||
location_force_reg(exprasmlist,right.location,
|
||||
def_cgsize(right.resulttype.def),true);
|
||||
if (right.nodetype <> ordconstn) then
|
||||
exprasmlist.concat(taicpu.op_reg_reg_const(A_CMPDI, NR_CR7,
|
||||
right.location.register, 0))
|
||||
else begin
|
||||
if (tordconstnode(right).value = 0) then
|
||||
internalerror(2005100301);
|
||||
end;
|
||||
divider := right.location.register;
|
||||
|
||||
{ needs overflow checking, (-maxlongint-1) div (-1) overflows! }
|
||||
op := divops[is_signed(right.resulttype.def),
|
||||
cs_check_overflow in aktlocalswitches];
|
||||
exprasmlist.concat(taicpu.op_reg_reg_reg(op, resultreg, numerator,
|
||||
divider));
|
||||
|
||||
if (nodetype = modn) then begin
|
||||
exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULLD,resultreg,
|
||||
divider,resultreg));
|
||||
exprasmlist.concat(taicpu.op_reg_reg_reg(A_SUB,location.register,
|
||||
numerator,resultreg));
|
||||
resultreg := location.register;
|
||||
end;
|
||||
end;
|
||||
{ set result location }
|
||||
location.loc:=LOC_REGISTER;
|
||||
location.register:=resultreg;
|
||||
if right.nodetype <> ordconstn then begin
|
||||
objectlibrary.getjumplabel(hl);
|
||||
exprasmlist.concat(taicpu.op_cond_sym(A_BC,zerocond,hl));
|
||||
cg.a_call_name(exprasmlist,'FPC_DIVBYZERO');
|
||||
cg.a_label(exprasmlist,hl);
|
||||
end;
|
||||
{ unsigned division/module can only overflow in case of division by zero
|
||||
(but checking this overflow flag is more convoluted than performing a
|
||||
simple comparison with 0) }
|
||||
if is_signed(right.resulttype.def) then
|
||||
cg.g_overflowcheck(exprasmlist,location,resulttype.def);
|
||||
end;
|
||||
|
||||
(*
|
||||
procedure tppcmoddivnode.pass_2;
|
||||
const
|
||||
// ts: todo, use 32 bit operations if possible (much faster!)
|
||||
@ -130,9 +494,7 @@ begin
|
||||
end else begin
|
||||
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, power, numerator, resultreg);
|
||||
end;
|
||||
end
|
||||
else
|
||||
begin
|
||||
end else begin
|
||||
{ load divider in a register if necessary }
|
||||
location_force_reg(exprasmlist, right.location,
|
||||
def_cgsize(right.resulttype.def), true);
|
||||
@ -150,8 +512,7 @@ begin
|
||||
exprasmlist.concat(taicpu.op_reg_reg_reg(op, resultreg, numerator,
|
||||
divider));
|
||||
|
||||
if (nodetype = modn) then
|
||||
begin
|
||||
if (nodetype = modn) then begin
|
||||
{$NOTE ts:testme}
|
||||
exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULLD, resultreg,
|
||||
divider, resultreg));
|
||||
@ -163,8 +524,7 @@ begin
|
||||
{ set result location }
|
||||
location.loc := LOC_REGISTER;
|
||||
location.register := resultreg;
|
||||
if right.nodetype <> ordconstn then
|
||||
begin
|
||||
if (right.nodetype <> ordconstn) then begin
|
||||
objectlibrary.getjumplabel(hl);
|
||||
exprasmlist.concat(taicpu.op_cond_sym(A_BC, zerocond, hl));
|
||||
cg.a_call_name(exprasmlist, 'FPC_DIVBYZERO');
|
||||
@ -172,7 +532,7 @@ begin
|
||||
end;
|
||||
cg.g_overflowcheck(exprasmlist, location, resulttype.def);
|
||||
end;
|
||||
|
||||
*)
|
||||
{*****************************************************************************
|
||||
TPPCSHLRSHRNODE
|
||||
*****************************************************************************}
|
||||
@ -181,8 +541,8 @@ end;
|
||||
procedure tppcshlshrnode.pass_2;
|
||||
|
||||
var
|
||||
resultreg, hregister1, hregister2,
|
||||
hreg64hi, hreg64lo: tregister;
|
||||
resultreg, hregister1, hregister2 : tregister;
|
||||
|
||||
op: topcg;
|
||||
asmop1, asmop2: tasmop;
|
||||
shiftval: aint;
|
||||
@ -199,7 +559,7 @@ begin
|
||||
hregister1 := location.register;
|
||||
if (location.loc = LOC_CREGISTER) then begin
|
||||
location.loc := LOC_REGISTER;
|
||||
resultreg := cg.getintregister(exprasmlist, OS_64);
|
||||
resultreg := cg.getintregister(exprasmlist, OS_INT);
|
||||
location.register := resultreg;
|
||||
end;
|
||||
|
||||
@ -257,17 +617,14 @@ begin
|
||||
end;
|
||||
LOC_REFERENCE, LOC_CREFERENCE:
|
||||
begin
|
||||
if (left.resulttype.def.deftype = floatdef) then
|
||||
begin
|
||||
if (left.resulttype.def.deftype = floatdef) then begin
|
||||
src1 := cg.getfpuregister(exprasmlist,
|
||||
def_cgsize(left.resulttype.def));
|
||||
location.register := src1;
|
||||
cg.a_loadfpu_ref_reg(exprasmlist,
|
||||
def_cgsize(left.resulttype.def),
|
||||
left.location.reference, src1);
|
||||
end
|
||||
else
|
||||
begin
|
||||
end else begin
|
||||
src1 := cg.getintregister(exprasmlist, OS_64);
|
||||
location.register := src1;
|
||||
cg.a_load_ref_reg(exprasmlist, OS_64, OS_64,
|
||||
@ -276,28 +633,19 @@ begin
|
||||
end;
|
||||
end;
|
||||
{ choose appropriate operand }
|
||||
if left.resulttype.def.deftype <> floatdef then
|
||||
begin
|
||||
if left.resulttype.def.deftype <> floatdef then begin
|
||||
if not (cs_check_overflow in aktlocalswitches) then
|
||||
op := A_NEG
|
||||
else
|
||||
op := A_NEGO_;
|
||||
location.loc := LOC_REGISTER;
|
||||
end
|
||||
else
|
||||
begin
|
||||
end else begin
|
||||
op := A_FNEG;
|
||||
location.loc := LOC_FPUREGISTER;
|
||||
end;
|
||||
{ emit operation }
|
||||
exprasmlist.concat(taicpu.op_reg_reg(op, location.register, src1));
|
||||
end;
|
||||
{ Here was a problem... }
|
||||
{ Operand to be negated always }
|
||||
{ seems to be converted to signed }
|
||||
{ 32-bit before doing neg!! }
|
||||
{ So this is useless... }
|
||||
{ that's not true: -2^31 gives an overflow error if it is negated (FK) }
|
||||
cg.g_overflowcheck(exprasmlist, location, resulttype.def);
|
||||
end;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user