* a_op_const_reg_reg optimizations

* added some more 64 bit PPC opcodes
* removed last mwpascal references
* added replacement of division/modulo by constant by multiplications and shifts for 64 bit
* general cleanup

git-svn-id: trunk@1648 -
This commit is contained in:
tom_at_work 2005-11-04 22:49:05 +00:00
parent 4bd32a686d
commit e4a61f4af1
9 changed files with 524 additions and 215 deletions

View File

@ -157,16 +157,9 @@ type
end; end;
const const
TOpCG2AsmOpConstLo: array[topcg] of TAsmOp = (A_NONE, A_ADDI, A_ANDI_, TShiftOpCG2AsmOpConst : array[boolean, OP_SAR..OP_SHR] of TAsmOp = (
A_DIVWU, (A_SRAWI, A_SLWI, A_SRWI), (A_SRADI, A_SLDI, A_SRDI)
A_DIVW, A_MULLW, A_MULLW, A_NONE, A_NONE, A_ORI, );
A_SRAWI, A_SLWI, A_SRWI, A_SUBI, A_XORI);
TOpCG2AsmOpConstHi: array[topcg] of TAsmOp = (A_NONE, A_ADDIS, A_ANDIS_,
A_DIVWU, A_DIVW, A_MULLW, A_MULLW, A_NONE, A_NONE,
A_ORIS, A_NONE, A_NONE, A_NONE, A_SUBIS, A_XORIS);
TShiftOpCG2AsmOpConst32 : array[OP_SAR..OP_SHR] of TAsmOp = (A_SRAWI, A_SLWI, A_SRWI);
TShiftOpCG2AsmOpConst64 : array[OP_SAR..OP_SHR] of TAsmOp = (A_SRADI, A_SLDI, A_SRDI);
TOpCmp2AsmCond: array[topcmp] of TAsmCondFlag = (C_NONE, C_EQ, C_GT, TOpCmp2AsmCond: array[topcmp] of TAsmCondFlag = (C_NONE, C_EQ, C_GT,
C_LT, C_GE, C_LE, C_NE, C_LE, C_LT, C_GE, C_GT); C_LT, C_GE, C_LE, C_NE, C_LE, C_LT, C_GE, C_GT);
@ -248,10 +241,13 @@ begin
location^.register) location^.register)
else else
{ load non-integral sized memory location into register. This { load non-integral sized memory location into register. This
memory location be 1-sizeleft byte sized. memory location be 1-sizeleft byte sized.
Always assume that this memory area is properly aligned, eg. start Always assume that this memory area is properly aligned, eg. start
loading the larger quantities for "odd" quantities first } loading the larger quantities for "odd" quantities first }
case sizeleft of case sizeleft of
1,2,4,8 :
a_load_ref_reg(list, int_cgsize(sizeleft), location^.size, tmpref,
location^.register);
3 : begin 3 : begin
a_reg_alloc(list, NR_R12); a_reg_alloc(list, NR_R12);
a_load_ref_reg(list, OS_16, location^.size, tmpref, a_load_ref_reg(list, OS_16, location^.size, tmpref,
@ -259,7 +255,7 @@ begin
inc(tmpref.offset, tcgsize2size[OS_16]); inc(tmpref.offset, tcgsize2size[OS_16]);
a_load_ref_reg(list, OS_8, location^.size, tmpref, a_load_ref_reg(list, OS_8, location^.size, tmpref,
location^.register); location^.register);
list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R12, 8, 40)); list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R12, 8, 40));
a_reg_dealloc(list, NR_R12); a_reg_dealloc(list, NR_R12);
end; end;
5 : begin 5 : begin
@ -267,8 +263,8 @@ begin
a_load_ref_reg(list, OS_32, location^.size, tmpref, NR_R12); a_load_ref_reg(list, OS_32, location^.size, tmpref, NR_R12);
inc(tmpref.offset, tcgsize2size[OS_32]); inc(tmpref.offset, tcgsize2size[OS_32]);
a_load_ref_reg(list, OS_8, location^.size, tmpref, location^.register); a_load_ref_reg(list, OS_8, location^.size, tmpref, location^.register);
list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R12, 8, 24)); list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R12, 8, 24));
a_reg_dealloc(list, NR_R12); a_reg_dealloc(list, NR_R12);
end; end;
6 : begin 6 : begin
a_reg_alloc(list, NR_R12); a_reg_alloc(list, NR_R12);
@ -286,20 +282,16 @@ begin
a_load_ref_reg(list, OS_16, location^.size, tmpref, NR_R0); a_load_ref_reg(list, OS_16, location^.size, tmpref, NR_R0);
inc(tmpref.offset, tcgsize2size[OS_16]); inc(tmpref.offset, tcgsize2size[OS_16]);
a_load_ref_reg(list, OS_8, location^.size, tmpref, location^.register); a_load_ref_reg(list, OS_8, location^.size, tmpref, location^.register);
list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, NR_R0, NR_R12, 16, 16)); list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, NR_R0, NR_R12, 16, 16));
list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R0, 8, 8)); list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, location^.register, NR_R0, 8, 8));
a_reg_dealloc(list, NR_R0); a_reg_dealloc(list, NR_R0);
a_reg_dealloc(list, NR_R12); a_reg_dealloc(list, NR_R12);
end; end;
1,2,4,8 : else
a_load_ref_reg(list, int_cgsize(sizeleft), location^.size, tmpref, { still > 8 bytes to load, so load data single register now }
location^.register);
else
a_load_ref_reg(list, location^.size, location^.size, tmpref, a_load_ref_reg(list, location^.size, location^.size, tmpref,
location^.register); location^.register);
end; end;
// a_load_ref_reg(list, location^.size, location^.size, tmpref,
// location^.register);
end; end;
LOC_REFERENCE: LOC_REFERENCE:
begin begin
@ -368,12 +360,8 @@ begin
AT_FUNCTION))); AT_FUNCTION)));
if (addNOP) then if (addNOP) then
list.concat(taicpu.op_none(A_NOP)); list.concat(taicpu.op_none(A_NOP));
{ { the compiler does not properly set this flag anymore in pass 1, and
the compiler does not properly set this flag anymore in pass 1, and for now we only need it after pass 2 (I hope) (JM) }
for now we only need it after pass 2 (I hope) (JM)
if not(pi_do_call in current_procinfo.flags) then
internalerror(2003060703);
}
include(current_procinfo.flags, pi_do_call); include(current_procinfo.flags, pi_do_call);
end; end;
@ -503,9 +491,9 @@ begin
32 bits should contain -1 32 bits should contain -1
- loading the lower 32 bits resulted in 0 in the upper 32 bits, and the upper - loading the lower 32 bits resulted in 0 in the upper 32 bits, and the upper
32 bits should contain 0 } 32 bits should contain 0 }
load32bitconstantR0(list, size, hi(a), NR_R0); load32bitconstant(list, size, hi(a), NR_R12);
{ combine both registers } { combine both registers }
list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, reg, NR_R0, 32, 0)); list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, reg, NR_R12, 32, 0));
end; end;
end; end;
end; end;
@ -550,7 +538,7 @@ const
((A_LBZ, A_LBZU), (A_LBZX, A_LBZUX)), ((A_LBZ, A_LBZU), (A_LBZX, A_LBZUX)),
((A_LHA, A_LHAU), (A_LHAX, A_LHAUX)), ((A_LHA, A_LHAU), (A_LHAX, A_LHAUX)),
{ there's no load-word-arithmetic-indexed with update, simulate it in code :( } { there's no load-word-arithmetic-indexed with update, simulate it in code :( }
((A_LWA, A_LWAU), (A_LWAX, A_LWAUX)), ((A_LWA, A_NOP), (A_LWAX, A_LWAUX)),
((A_LD, A_LDU), (A_LDX, A_LDUX)) ((A_LD, A_LDU), (A_LDX, A_LDUX))
); );
var var
@ -563,12 +551,12 @@ begin
ref2 := ref; ref2 := ref;
fixref(list, ref2, tosize); fixref(list, ref2, tosize);
{ the caller is expected to have adjusted the reference already { the caller is expected to have adjusted the reference already
in this case } in this case }
if (TCGSize2Size[fromsize] >= TCGSize2Size[tosize]) then if (TCGSize2Size[fromsize] >= TCGSize2Size[tosize]) then
fromsize := tosize; fromsize := tosize;
op := loadinstr[fromsize, ref2.index <> NR_NO, false]; op := loadinstr[fromsize, ref2.index <> NR_NO, false];
{ there is no LWAU instruction, simulate using ADDI and LWA } { there is no LWAU instruction, simulate using ADDI and LWA }
if (op = A_LWAU) then begin if (op = A_NOP) then begin
list.concat(taicpu.op_reg_reg_const(A_ADDI, reg, reg, ref2.offset)); list.concat(taicpu.op_reg_reg_const(A_ADDI, reg, reg, ref2.offset));
ref2.offset := 0; ref2.offset := 0;
op := A_LWA; op := A_LWA;
@ -605,8 +593,8 @@ var
begin begin
op := movemap[fromsize, tosize]; op := movemap[fromsize, tosize];
case op of case op of
A_MR, A_EXTSB, A_EXTSH, A_EXTSW : instr := taicpu.op_reg_reg(op, reg2, reg1); A_MR, A_EXTSB, A_EXTSH, A_EXTSW : instr := taicpu.op_reg_reg(op, reg2, reg1);
A_RLDICL : instr := taicpu.op_reg_reg_const_const(A_RLDICL, reg2, reg1, 0, (8-tcgsize2size[fromsize])*8); A_RLDICL : instr := taicpu.op_reg_reg_const_const(A_RLDICL, reg2, reg1, 0, (8-tcgsize2size[fromsize])*8);
else else
internalerror(2002090901); internalerror(2002090901);
end; end;
@ -614,8 +602,8 @@ begin
rg[R_INTREGISTER].add_move_instruction(instr); rg[R_INTREGISTER].add_move_instruction(instr);
end; end;
procedure tcgppc.a_loadfpu_reg_reg(list: taasmoutput; size: tcgsize; reg1, reg2: procedure tcgppc.a_loadfpu_reg_reg(list: taasmoutput; size: tcgsize;
tregister); reg1, reg2: tregister);
var var
instr: taicpu; instr: taicpu;
begin begin
@ -624,8 +612,8 @@ begin
rg[R_FPUREGISTER].add_move_instruction(instr); rg[R_FPUREGISTER].add_move_instruction(instr);
end; end;
procedure tcgppc.a_loadfpu_ref_reg(list: taasmoutput; size: tcgsize; const ref: procedure tcgppc.a_loadfpu_ref_reg(list: taasmoutput; size: tcgsize;
treference; reg: tregister); const ref: treference; reg: tregister);
const const
FpuLoadInstr: array[OS_F32..OS_F64, boolean, boolean] of TAsmOp = FpuLoadInstr: array[OS_F32..OS_F64, boolean, boolean] of TAsmOp =
{ indexed? updating?} { indexed? updating?}
@ -654,7 +642,6 @@ end;
procedure tcgppc.a_loadfpu_reg_ref(list: taasmoutput; size: tcgsize; reg: procedure tcgppc.a_loadfpu_reg_ref(list: taasmoutput; size: tcgsize; reg:
tregister; const ref: treference); tregister; const ref: treference);
const const
FpuStoreInstr: array[OS_F32..OS_F64, boolean, boolean] of TAsmOp = FpuStoreInstr: array[OS_F32..OS_F64, boolean, boolean] of TAsmOp =
{ indexed? updating? } { indexed? updating? }
@ -688,139 +675,131 @@ end;
procedure tcgppc.a_op_const_reg_reg(list: taasmoutput; op: TOpCg; procedure tcgppc.a_op_const_reg_reg(list: taasmoutput; op: TOpCg;
size: tcgsize; a: aint; src, dst: tregister); size: tcgsize; a: aint; src, dst: tregister);
var var
l1, l2: longint;
oplo, ophi: tasmop;
scratchreg: tregister;
useReg : boolean; useReg : boolean;
shiftmask : longint;
procedure do_lo_hi; procedure do_lo_hi(loOp, hiOp : TAsmOp);
begin begin
{ Optimization for logical ops (excluding AND), trying to do this as efficiently
as possible by only generating code for the affected halfwords. Note that all
the instructions handled here must have "X op 0 = X" for every halfword. }
usereg := false; usereg := false;
if (size in [OS_64, OS_S64]) then begin if (aword(a) > high(dword)) then begin
{ ts: use register method for 64 bit consts. Sloooooow }
usereg := true; usereg := true;
end else if (size in [OS_32, OS_S32]) then begin
list.concat(taicpu.op_reg_reg_const(oplo, dst, src, word(a)));
list.concat(taicpu.op_reg_reg_const(ophi, dst, dst, word(a shr 16)));
end else begin end else begin
list.concat(taicpu.op_reg_reg_const(oplo, dst, src, word(a))); if (word(a) <> 0) then begin
list.concat(taicpu.op_reg_reg_const(loOp, dst, src, word(a)));
if (word(a shr 16) <> 0) then
list.concat(taicpu.op_reg_reg_const(hiOp, dst, dst, word(a shr 16)));
end else if (word(a shr 16) <> 0) then
list.concat(taicpu.op_reg_reg_const(hiOp, dst, src, word(a shr 16)));
end; end;
end; end;
procedure do_lo_hi_and;
begin
{ optimization logical and with immediate: only use "andi." for 16 bit
ands, otherwise use register method. Doing this for 32 bit constants
would not give any advantage to the register method (via useReg := true),
requiring a scratch register and three instructions. }
usereg := false;
if (aword(a) > high(word)) then
usereg := true
else
list.concat(taicpu.op_reg_reg_const(A_ANDI_, dst, src, word(a)));
end;
var
scratchreg: tregister;
shift, shiftmask : longint;
begin begin
{ subtraction is the same as addition with negative constant }
if op = OP_SUB then begin if op = OP_SUB then begin
a_op_const_reg_reg(list, OP_ADD, size, -a, src, dst); a_op_const_reg_reg(list, OP_ADD, size, -a, src, dst);
exit; exit;
end; end;
ophi := TOpCG2AsmOpConstHi[op]; { This case includes some peephole optimizations for the various operations,
oplo := TOpCG2AsmOpConstLo[op]; (e.g. AND, OR, XOR, ..) - can't this be done at some higher level,
{ peephole optimizations for AND, OR, XOR - can't this be done at independent of architecture? }
some higher level, independent of architecture? }
if (op in [OP_AND, OP_OR, OP_XOR]) then begin
if (a = 0) then begin
if op = OP_AND then
list.concat(taicpu.op_reg_const(A_LI, dst, 0))
else
a_load_reg_reg(list, size, size, src, dst);
exit;
end else if (a = -1) then begin
case op of
OP_OR:
list.concat(taicpu.op_reg_const(A_LI, dst, -1));
OP_XOR:
list.concat(taicpu.op_reg_reg(A_NOT, dst, src));
OP_AND:
a_load_reg_reg(list, size, size, src, dst);
end;
exit;
end;
{ optimization for add }
end else if (op = OP_ADD) then
if a = 0 then begin
a_load_reg_reg(list, size, size, src, dst);
exit;
end else if (a >= low(smallint)) and (a <= high(smallint)) then begin
list.concat(taicpu.op_reg_reg_const(A_ADDI, dst, src, smallint(a)));
exit;
end;
{ otherwise, the instructions we can generate depend on the operation } { assume that we do not need a scratch register for the operation }
useReg := false; useReg := false;
case op of case (op) of
OP_DIV, OP_IDIV: OP_DIV, OP_IDIV:
{ actually, this method should be never called directly with OP_DIV or
OP_IDIV, so just provide basic support.
TODO: move division by constant stuff from nppcmat.pas here }
if (a = 0) then if (a = 0) then
internalerror(200208103) internalerror(200208103)
else if (a = 1) then begin else if (a = 1) then
a_load_reg_reg(list, OS_INT, OS_INT, src, dst); a_load_reg_reg(list, size, size, src, dst)
exit else
end else if false {and ispowerof2(a, l1)} then begin usereg := true;
internalerror(200208103);
case op of
OP_DIV: begin
list.concat(taicpu.op_reg_reg_const(A_SRDI, dst, src, l1));
end;
OP_IDIV:
begin
list.concat(taicpu.op_reg_reg_const(A_SRADI, dst, src, l1));
list.concat(taicpu.op_reg_reg(A_ADDZE, dst, dst));
end;
end;
exit;
end else
usereg := true;
OP_IMUL, OP_MUL: OP_IMUL, OP_MUL:
if (a = 0) then begin { idea: factorize constant multiplicands and use adds/shifts with few factors;
list.concat(taicpu.op_reg_const(A_LI, dst, 0)); however, even a 64 bit multiply is already quite fast on PPC64 }
exit if (a = 0) then
end else if (a = -1) then begin a_load_const_reg(list, size, 0, dst)
list.concat(taicpu.op_reg_reg(A_NEG, dst, dst)); else if (a = -1) then
end else if (a = 1) then begin list.concat(taicpu.op_reg_reg(A_NEG, dst, dst))
a_load_reg_reg(list, OS_INT, OS_INT, src, dst); else if (a = 1) then
exit a_load_reg_reg(list, OS_INT, OS_INT, src, dst)
end else if ispowerof2(a, l1) then else if ispowerof2(a, shift) then
list.concat(taicpu.op_reg_reg_const(A_SLDI, dst, src, l1)) list.concat(taicpu.op_reg_reg_const(A_SLDI, dst, src, shift))
else if (a >= low(smallint)) and (a <= high(smallint)) then else if (a >= low(smallint)) and (a <= high(smallint)) then
list.concat(taicpu.op_reg_reg_const(A_MULLI, dst, src, list.concat(taicpu.op_reg_reg_const(A_MULLI, dst, src,
smallint(a))) smallint(a)))
else else
usereg := true; usereg := true;
OP_ADD: OP_ADD:
{$todo ts:optimize} if (a = 0) then
useReg := true; a_load_reg_reg(list, size, size, src, dst)
else if (a >= low(smallint)) and (a <= high(smallint)) then
list.concat(taicpu.op_reg_reg_const(A_ADDI, dst, src, smallint(a)))
else
useReg := true;
OP_OR: OP_OR:
do_lo_hi; if (a = 0) then
a_load_reg_reg(list, size, size, src, dst)
else if (a = -1) then
a_load_const_reg(list, size, -1, dst)
else
do_lo_hi(A_ORI, A_ORIS);
OP_AND: OP_AND:
useReg := true; if (a = 0) then
a_load_const_reg(list, size, 0, dst)
else if (a = -1) then
a_load_reg_reg(list, size, size, src, dst)
else
do_lo_hi_and;
OP_XOR: OP_XOR:
do_lo_hi; if (a = 0) then
a_load_reg_reg(list, size, size, src, dst)
else if (a = -1) then
list.concat(taicpu.op_reg_reg(A_NOT, dst, src))
else
do_lo_hi(A_XORI, A_XORIS);
OP_SHL, OP_SHR, OP_SAR: OP_SHL, OP_SHR, OP_SAR:
begin begin
{$note ts: cleanup todo, fix remaining bugs} if (size in [OS_64, OS_S64]) then
if (size in [OS_64, OS_S64]) then begin shift := 6
if (a and 63) <> 0 then else
list.concat(taicpu.op_reg_reg_const( shift := 5;
TShiftOpCG2AsmOpConst64[Op], dst, src, a and 63))
else shiftmask := (1 shl shift)-1;
a_load_reg_reg(list, size, size, src, dst); if (a and shiftmask) <> 0 then
if (a shr 6) <> 0 then list.concat(taicpu.op_reg_reg_const(
internalError(68991); TShiftOpCG2AsmOpConst[size in [OS_64, OS_S64], op], dst, src, a and shiftmask))
end else begin else
if (a and 31) <> 0 then a_load_reg_reg(list, size, size, src, dst);
list.concat(taicpu.op_reg_reg_const( if ((a shr shift) <> 0) then
TShiftOpCG2AsmOpConst32[Op], dst, src, a and 31)) internalError(68991);
else
a_load_reg_reg(list, size, size, src, dst);
if (a shr 5) <> 0 then
internalError(68991);
end;
end end
else else
internalerror(200109091); internalerror(200109091);
end; end;
{ if all else failed, load the constant in a register and then } { if all else failed, load the constant in a register and then
{ perform the operation } perform the operation }
if useReg then begin if (useReg) then begin
scratchreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE); scratchreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
a_load_const_reg(list, size, a, scratchreg); a_load_const_reg(list, size, a, scratchreg);
a_op_reg_reg_reg(list, op, size, scratchreg, src, dst); a_op_reg_reg_reg(list, op, size, scratchreg, src, dst);
@ -843,35 +822,29 @@ begin
OP_NEG, OP_NOT: OP_NEG, OP_NOT:
begin begin
list.concat(taicpu.op_reg_reg(op_reg_reg_opcg2asmop64[op], dst, src1)); list.concat(taicpu.op_reg_reg(op_reg_reg_opcg2asmop64[op], dst, src1));
if (op = OP_NOT) and if (op = OP_NOT) and not (size in [OS_64, OS_S64]) then
not (size in [OS_64, OS_S64]) then
{ zero/sign extend result again, fromsize is not important here } { zero/sign extend result again, fromsize is not important here }
a_load_reg_reg(list, OS_S64, size, dst, dst) a_load_reg_reg(list, OS_S64, size, dst, dst)
end; end;
else else
{$NOTE ts:testme} if (size in [OS_64, OS_S64]) then begin
if (size in [OS_64, OS_S64]) then begin list.concat(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop64[op], dst, src2,
list.concat(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop64[op], dst, src2, src1));
src1)); end else begin
end else begin list.concat(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop32[op], dst, src2,
list.concat(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop32[op], dst, src2, src1));
src1)); end;
end;
end; end;
end; end;
{*************** compare instructructions ****************} {*************** compare instructructions ****************}
procedure tcgppc.a_cmp_const_reg_label(list: taasmoutput; size: tcgsize; cmp_op: procedure tcgppc.a_cmp_const_reg_label(list: taasmoutput; size: tcgsize;
topcmp; a: aint; reg: tregister; cmp_op: topcmp; a: aint; reg: tregister; l: tasmlabel);
l: tasmlabel);
var var
scratch_register: TRegister; scratch_register: TRegister;
signed: boolean; signed: boolean;
begin begin
{ todo: use 32 bit compares? }
signed := cmp_op in [OC_GT, OC_LT, OC_GTE, OC_LTE]; signed := cmp_op in [OC_GT, OC_LT, OC_GTE, OC_LTE];
{ in the following case, we generate more efficient code when } { in the following case, we generate more efficient code when }
{ signed is true } { signed is true }
@ -897,13 +870,10 @@ begin
a_jmp(list, A_BC, TOpCmp2AsmCond[cmp_op], 0, l); a_jmp(list, A_BC, TOpCmp2AsmCond[cmp_op], 0, l);
end; end;
procedure tcgppc.a_cmp_reg_reg_label(list: taasmoutput; size: tcgsize; cmp_op: procedure tcgppc.a_cmp_reg_reg_label(list: taasmoutput; size: tcgsize;
topcmp; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel);
reg1, reg2: tregister; l: tasmlabel);
var var
op: tasmop; op: tasmop;
begin begin
if cmp_op in [OC_GT, OC_LT, OC_GTE, OC_LTE] then if cmp_op in [OC_GT, OC_LT, OC_GTE, OC_LTE] then
if (size in [OS_64, OS_S64]) then if (size in [OS_64, OS_S64]) then
@ -953,11 +923,9 @@ end;
procedure tcgppc.g_flags2reg(list: taasmoutput; size: TCgSize; const f: procedure tcgppc.g_flags2reg(list: taasmoutput; size: TCgSize; const f:
TResFlags; reg: TRegister); TResFlags; reg: TRegister);
var var
testbit: byte; testbit: byte;
bitvalue: boolean; bitvalue: boolean;
begin begin
{ get the bit to extract from the conditional register + its requested value (0 or 1) } { get the bit to extract from the conditional register + its requested value (0 or 1) }
testbit := ((f.cr - RS_CR0) * 4); testbit := ((f.cr - RS_CR0) * 4);
@ -1375,7 +1343,7 @@ begin
list.concat(taicpu.op_reg_reg_const(A_SUBI, dst.base, dst.base, 8)); list.concat(taicpu.op_reg_reg_const(A_SUBI, dst.base, dst.base, 8));
countreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE); countreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
a_load_const_reg(list, OS_32, count, countreg); a_load_const_reg(list, OS_32, count, countreg);
{ explicitely allocate R_0 since it can be used safely here { explicitely allocate F0 since it can be used safely here
(for holding date that's being copied) } (for holding date that's being copied) }
a_reg_alloc(list, NR_F0); a_reg_alloc(list, NR_F0);
objectlibrary.getjumplabel(lab); objectlibrary.getjumplabel(lab);

View File

@ -94,7 +94,7 @@ type
A_SRDI, A_SRADI, A_SRDI, A_SRADI,
A_SLDI, A_SLDI,
A_RLDICL, A_RLDICL,
A_DIVDU, A_DIVD, A_MULLD, A_SRAD, A_SLD, A_SRD, A_DIVDU, A_DIVDU_, A_DIVD, A_DIVD_, A_MULLD, A_MULLD_, A_MULHD, A_MULHD_, A_SRAD, A_SLD, A_SRD,
A_DIVDUO_, A_DIVDO_, A_DIVDUO_, A_DIVDO_,
A_LWA, A_LWAU, A_LWAX, A_LWAUX, A_LWA, A_LWAU, A_LWAX, A_LWAUX,
A_FCFID, A_FCFID,

View File

@ -49,9 +49,7 @@ const
{ the difference to stdcall is only the name mangling } { the difference to stdcall is only the name mangling }
pocall_cdecl, pocall_cdecl,
{ the difference to stdcall is only the name mangling } { the difference to stdcall is only the name mangling }
pocall_cppdecl, pocall_cppdecl
{ pass all const records by reference }
pocall_mwpascal
]; ];
processorsstr: array[tprocessors] of string[10] = ('', processorsstr: array[tprocessors] of string[10] = ('',

View File

@ -408,10 +408,11 @@ begin
end; end;
end; end;
end; end;
curintreg := nextintreg; curintreg := nextintreg;
curfloatreg := nextfloatreg; curfloatreg := nextfloatreg;
curmmreg := nextmmreg; curmmreg := nextmmreg;
cur_stack_offset := stack_offset; cur_stack_offset := stack_offset;
result := stack_offset; result := stack_offset;
end; end;

View File

@ -66,7 +66,7 @@ var
begin begin
if not (po_assembler in procdef.procoptions) then begin if not (po_assembler in procdef.procoptions) then begin
{ align the stack properly } { align the stack properly }
ofs := align(maxpushedparasize + LinkageAreaSizeELF, ELF_STACK_ALIGN); ofs := align(maxpushedparasize + LinkageAreaSizeELF, 8);
{ the ABI specification says that it is required to always allocate space for 8 * 8 bytes { the ABI specification says that it is required to always allocate space for 8 * 8 bytes
for registers R3-R10 and stack header if there's a stack frame, but GCC doesn't do that, for registers R3-R10 and stack header if there's a stack frame, but GCC doesn't do that,
@ -74,7 +74,6 @@ begin
// if (ofs < 112) then begin // if (ofs < 112) then begin
// ofs := 112; // ofs := 112;
// end; // end;
tg.setfirsttemp(ofs); tg.setfirsttemp(ofs);
end else begin end else begin
locals := 0; locals := 0;

View File

@ -84,7 +84,7 @@ const
'srdi', 'sradi', 'srdi', 'sradi',
'sldi', 'sldi',
'rldicl', 'rldicl',
'divdu', 'divd', 'mulld', 'srad', 'sld', 'srd', 'divdu', 'divdu.', 'divd', 'divd.', 'mulld', 'mulld.', 'mulhd', 'mulhd.', 'srad', 'sld', 'srd',
'divduo.', 'divdo.', 'divduo.', 'divdo.',
'lwa', '<illegal lwau>', 'lwax', 'lwaux', 'lwa', '<illegal lwau>', 'lwax', 'lwaux',
'fcfid', 'fcfid',

View File

@ -163,7 +163,6 @@ begin
end end
end; end;
// Todo: ts: allow emiting word compares...
procedure tppcaddnode.emit_compare(unsigned: boolean); procedure tppcaddnode.emit_compare(unsigned: boolean);
var var
op: tasmop; op: tasmop;
@ -175,8 +174,7 @@ begin
swapleftright; swapleftright;
// can we use an immediate, or do we have to load the // can we use an immediate, or do we have to load the
// constant in a register first? // constant in a register first?
if (right.location.loc = LOC_CONSTANT) then if (right.location.loc = LOC_CONSTANT) then begin
begin
if (nodetype in [equaln, unequaln]) then if (nodetype in [equaln, unequaln]) then
if (unsigned and if (unsigned and
(aword(right.location.value) > high(word))) or (aword(right.location.value) > high(word))) or
@ -193,15 +191,13 @@ begin
(aint(right.location.value) >= low(smallint)) and (aint(right.location.value) >= low(smallint)) and
(aint(right.location.value) <= high(smallint))) then (aint(right.location.value) <= high(smallint))) then
useconst := true useconst := true
else else begin
begin
useconst := false; useconst := false;
tmpreg := cg.getintregister(exprasmlist, OS_INT); tmpreg := cg.getintregister(exprasmlist, OS_INT);
cg.a_load_const_reg(exprasmlist, OS_INT, cg.a_load_const_reg(exprasmlist, OS_INT,
right.location.value, tmpreg); right.location.value, tmpreg);
end end
end end else
else
useconst := false; useconst := false;
location.loc := LOC_FLAGS; location.loc := LOC_FLAGS;
location.resflags := getresflags; location.resflags := getresflags;
@ -215,15 +211,13 @@ begin
else else
op := A_CMPLD; op := A_CMPLD;
if (right.location.loc = LOC_CONSTANT) then if (right.location.loc = LOC_CONSTANT) then begin
begin
if useconst then if useconst then
exprasmlist.concat(taicpu.op_reg_const(op, left.location.register, exprasmlist.concat(taicpu.op_reg_const(op, left.location.register,
longint(right.location.value))) longint(right.location.value)))
else else
exprasmlist.concat(taicpu.op_reg_reg(op, left.location.register, tmpreg)); exprasmlist.concat(taicpu.op_reg_reg(op, left.location.register, tmpreg));
end end else
else
exprasmlist.concat(taicpu.op_reg_reg(op, exprasmlist.concat(taicpu.op_reg_reg(op,
left.location.register, right.location.register)); left.location.register, right.location.register));
end; end;
@ -237,7 +231,7 @@ var
cgop: TOpCg; cgop: TOpCg;
cgsize: TCgSize; cgsize: TCgSize;
cmpop, cmpop,
isjump: boolean; isjump: boolean;
otl, ofl: tasmlabel; otl, ofl: tasmlabel;
begin begin
{ calculate the operator which is more difficult } { calculate the operator which is more difficult }
@ -525,7 +519,6 @@ begin
cg.a_op_reg_reg(exprasmlist, OP_SHL, OS_64, cg.a_op_reg_reg(exprasmlist, OP_SHL, OS_64,
right.location.register, tmpreg); right.location.register, tmpreg);
if left.location.loc <> LOC_CONSTANT then begin if left.location.loc <> LOC_CONSTANT then begin
cg.a_op_reg_reg_reg(exprasmlist, OP_OR, OS_64, tmpreg, cg.a_op_reg_reg_reg(exprasmlist, OP_OR, OS_64, tmpreg,
left.location.register, location.register) left.location.register, location.register)
end else begin end else begin

View File

@ -36,6 +36,8 @@ type
} }
function first_abs_real: tnode; override; function first_abs_real: tnode; override;
function first_sqr_real: tnode; override; function first_sqr_real: tnode; override;
{ todo: inline trunc/round/frac?/int }
procedure second_abs_real; override; procedure second_abs_real; override;
procedure second_sqr_real; override; procedure second_sqr_real; override;
procedure second_prefetch; override; procedure second_prefetch; override;

View File

@ -59,6 +59,176 @@ uses
cpubase, cpuinfo, cpubase, cpuinfo,
ncgutil, cgcpu, rgobj; ncgutil, cgcpu, rgobj;
{ helper functions }
procedure getmagic_unsigned32(d : dword; out magic_m : dword; out magic_add : boolean; out magic_shift : dword);
var
p : longint;
nc, delta, q1, r1, q2, r2 : dword;
begin
assert(d > 0);
magic_add := false;
nc := - 1 - (-d) mod d;
p := 31; { initialize p }
q1 := $80000000 div nc; { initialize q1 = 2p/nc }
r1 := $80000000 - q1*nc; { initialize r1 = rem(2p,nc) }
q2 := $7FFFFFFF div d; { initialize q2 = (2p-1)/d }
r2 := $7FFFFFFF - q2*d; { initialize r2 = rem((2p-1),d) }
repeat
inc(p);
if (r1 >= (nc - r1)) then begin
q1 := 2 * q1 + 1; { update q1 }
r1 := 2*r1 - nc; { update r1 }
end else begin
q1 := 2*q1; { update q1 }
r1 := 2*r1; { update r1 }
end;
if ((r2 + 1) >= (d - r2)) then begin
if (q2 >= $7FFFFFFF) then
magic_add := true;
q2 := 2*q2 + 1; { update q2 }
r2 := 2*r2 + 1 - d; { update r2 }
end else begin
if (q2 >= $80000000) then
magic_add := true;
q2 := 2*q2; { update q2 }
r2 := 2*r2 + 1; { update r2 }
end;
delta := d - 1 - r2;
until not ((p < 64) and ((q1 < delta) or ((q1 = delta) and (r1 = 0))));
magic_m := q2 + 1; { resulting magic number }
magic_shift := p - 32; { resulting shift }
end;
procedure getmagic_signed32(d : longint; out magic_m : longint; out magic_s : longint);
const
two_31 : DWord = high(longint)+1;
var
p : Longint;
ad, anc, delta, q1, r1, q2, r2, t : DWord;
begin
assert((d < -1) or (d > 1));
ad := abs(d);
t := two_31 + (DWord(d) shr 31);
anc := t - 1 - t mod ad; { absolute value of nc }
p := 31; { initialize p }
q1 := two_31 div anc; { initialize q1 = 2p/abs(nc) }
r1 := two_31 - q1*anc; { initialize r1 = rem(2p,abs(nc)) }
q2 := two_31 div ad; { initialize q2 = 2p/abs(d) }
r2 := two_31 - q2*ad; { initialize r2 = rem(2p,abs(d)) }
repeat
inc(p);
q1 := 2*q1; { update q1 = 2p/abs(nc) }
r1 := 2*r1; { update r1 = rem(2p/abs(nc)) }
if (r1 >= anc) then begin { must be unsigned comparison }
inc(q1);
dec(r1, anc);
end;
q2 := 2*q2; { update q2 = 2p/abs(d) }
r2 := 2*r2; { update r2 = rem(2p/abs(d)) }
if (r2 >= ad) then begin { must be unsigned comparison }
inc(q2);
dec(r2, ad);
end;
delta := ad - r2;
until not ((q1 < delta) or ((q1 = delta) and (r1 = 0)));
magic_m := q2 + 1;
if (d < 0) then begin
magic_m := -magic_m; { resulting magic number }
end;
magic_s := p - 32; { resulting shift }
end;
{ helper functions }
procedure getmagic_unsigned64(d : qword; out magic_m : qword; out magic_add : boolean; out magic_shift : qword);
const
two_63 : QWord = $8000000000000000;
var
p : int64;
nc, delta, q1, r1, q2, r2 : qword;
begin
assert(d > 0);
magic_add := false;
nc := - 1 - (-d) mod d;
p := 63; { initialize p }
q1 := two_63 div nc; { initialize q1 = 2p/nc }
r1 := two_63 - q1*nc; { initialize r1 = rem(2p,nc) }
q2 := (two_63-1) div d; { initialize q2 = (2p-1)/d }
r2 := (two_63-1) - q2*d; { initialize r2 = rem((2p-1),d) }
repeat
inc(p);
if (r1 >= (nc - r1)) then begin
q1 := 2 * q1 + 1; { update q1 }
r1 := 2*r1 - nc; { update r1 }
end else begin
q1 := 2*q1; { update q1 }
r1 := 2*r1; { update r1 }
end;
if ((r2 + 1) >= (d - r2)) then begin
if (q2 >= (two_63-1)) then
magic_add := true;
q2 := 2*q2 + 1; { update q2 }
r2 := 2*r2 + 1 - d; { update r2 }
end else begin
if (q2 >= two_63) then
magic_add := true;
q2 := 2*q2; { update q2 }
r2 := 2*r2 + 1; { update r2 }
end;
delta := d - 1 - r2;
until not ((p < 128) and ((q1 < delta) or ((q1 = delta) and (r1 = 0))));
magic_m := q2 + 1; { resulting magic number }
magic_shift := p - 64; { resulting shift }
end;
procedure getmagic_signed64(d : int64; out magic_m : int64; out magic_s : int64);
const
two_63 : QWord = $8000000000000000;
var
p : int64;
ad, anc, delta, q1, r1, q2, r2, t : QWord;
begin
assert((d < -1) or (d > 1));
ad := abs(d);
t := two_63 + (QWord(d) shr 63);
anc := t - 1 - t mod ad; { absolute value of nc }
p := 63; { initialize p }
q1 := two_63 div anc; { initialize q1 = 2p/abs(nc) }
r1 := two_63 - q1*anc; { initialize r1 = rem(2p,abs(nc)) }
q2 := two_63 div ad; { initialize q2 = 2p/abs(d) }
r2 := two_63 - q2*ad; { initialize r2 = rem(2p,abs(d)) }
repeat
inc(p);
q1 := 2*q1; { update q1 = 2p/abs(nc) }
r1 := 2*r1; { update r1 = rem(2p/abs(nc)) }
if (r1 >= anc) then begin { must be unsigned comparison }
inc(q1);
dec(r1, anc);
end;
q2 := 2*q2; { update q2 = 2p/abs(d) }
r2 := 2*r2; { update r2 = rem(2p/abs(d)) }
if (r2 >= ad) then begin { must be unsigned comparison }
inc(q2);
dec(r2, ad);
end;
delta := ad - r2;
until not ((q1 < delta) or ((q1 = delta) and (r1 = 0)));
magic_m := q2 + 1;
if (d < 0) then begin
magic_m := -magic_m; { resulting magic number }
end;
magic_s := p - 64; { resulting shift }
end;
{***************************************************************************** {*****************************************************************************
TPPCMODDIVNODE TPPCMODDIVNODE
*****************************************************************************} *****************************************************************************}
@ -70,6 +240,200 @@ begin
include(current_procinfo.flags, pi_do_call); include(current_procinfo.flags, pi_do_call);
end; end;
procedure tppcmoddivnode.pass_2;
const { signed overflow }
divops: array[boolean, boolean] of tasmop =
((A_DIVDU,A_DIVDU_),(A_DIVD,A_DIVDO_));
zerocond: tasmcond = (dirhint: DH_Plus; simple: true; cond:C_NE; cr: RS_CR7);
var
power : longint;
op : tasmop;
numerator, divider,
resultreg : tregister;
size : TCgSize;
hl : tasmlabel;
done: boolean;
procedure genOrdConstNodeDiv;
const
negops : array[boolean] of tasmop = (A_NEG, A_NEGO);
var
magic, shift : int64;
u_magic, u_shift : qword;
u_add : boolean;
divreg : tregister;
begin
if (tordconstnode(right).value = 0) then begin
internalerror(2005061701);
end else if (tordconstnode(right).value = 1) then begin
cg.a_load_reg_reg(exprasmlist, OS_INT, OS_INT, numerator, resultreg);
end else if (tordconstnode(right).value = -1) then begin
{ note: only in the signed case possible..., may overflow }
exprasmlist.concat(taicpu.op_reg_reg(negops[cs_check_overflow in aktlocalswitches], resultreg, numerator));
end else if (ispowerof2(tordconstnode(right).value, power)) then begin
if (is_signed(right.resulttype.def)) then begin
{ From "The PowerPC Compiler Writer's Guide", pg. 52ff }
cg.a_op_const_reg_reg(exprasmlist, OP_SAR, OS_INT, power,
numerator, resultreg);
exprasmlist.concat(taicpu.op_reg_reg(A_ADDZE, resultreg, resultreg));
end else begin
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, power, numerator, resultreg)
end;
end else begin
{ replace division by multiplication, both implementations }
{ from "The PowerPC Compiler Writer's Guide" pg. 53ff }
divreg := cg.getintregister(exprasmlist, OS_INT);
if (is_signed(right.resulttype.def)) then begin
getmagic_signed64(tordconstnode(right).value, magic, shift);
{ load magic value }
cg.a_load_const_reg(exprasmlist, OS_INT, magic, divreg);
{ multiply }
exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULHD, resultreg, numerator, divreg));
{ add/subtract numerator }
if (tordconstnode(right).value > 0) and (magic < 0) then begin
cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, numerator, resultreg, resultreg);
end else if (tordconstnode(right).value < 0) and (magic > 0) then begin
cg.a_op_reg_reg_reg(exprasmlist, OP_SUB, OS_INT, numerator, resultreg, resultreg);
end;
{ shift shift places to the right (arithmetic) }
cg.a_op_const_reg_reg(exprasmlist, OP_SAR, OS_INT, shift, resultreg, resultreg);
{ extract and add sign bit }
if (tordconstnode(right).value >= 0) then begin
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 63, numerator, divreg);
end else begin
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 63, resultreg, divreg);
end;
cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, resultreg, divreg, resultreg);
end else begin
getmagic_unsigned64(tordconstnode(right).value, u_magic, u_add, u_shift);
{ load magic in divreg }
cg.a_load_const_reg(exprasmlist, OS_INT, u_magic, divreg);
exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULHDU, resultreg, numerator, divreg));
if (u_add) then begin
cg.a_op_reg_reg_reg(exprasmlist, OP_SUB, OS_INT, resultreg, numerator, divreg);
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, 1, divreg, divreg);
cg.a_op_reg_reg_reg(exprasmlist, OP_ADD, OS_INT, divreg, resultreg, divreg);
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, u_shift-1, divreg, resultreg);
end else begin
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, u_shift, resultreg, resultreg);
end;
end;
end;
done := true;
end;
procedure genOrdConstNodeMod;
var
modreg, maskreg, tempreg : tregister;
begin
if (tordconstnode(right).value = 0) then begin
internalerror(2005061702);
end else if (abs(tordconstnode(right).value) = 1) then begin
{ x mod +/-1 is always zero }
cg.a_load_const_reg(exprasmlist, OS_INT, 0, resultreg);
end else if (ispowerof2(tordconstnode(right).value, power)) then begin
if (is_signed(right.resulttype.def)) then begin
tempreg := cg.getintregister(exprasmlist, OS_INT);
maskreg := cg.getintregister(exprasmlist, OS_INT);
modreg := cg.getintregister(exprasmlist, OS_INT);
cg.a_load_const_reg(exprasmlist, OS_INT, abs(tordconstnode(right).value)-1, modreg);
cg.a_op_const_reg_reg(exprasmlist, OP_SAR, OS_INT, 63, numerator, maskreg);
cg.a_op_reg_reg_reg(exprasmlist, OP_AND, OS_INT, numerator, modreg, tempreg);
exprasmlist.concat(taicpu.op_reg_reg_reg(A_ANDC, maskreg, maskreg, modreg));
exprasmlist.concat(taicpu.op_reg_reg_const(A_SUBFIC, modreg, tempreg, 0));
exprasmlist.concat(taicpu.op_reg_reg_reg(A_SUBFE, modreg, modreg, modreg));
cg.a_op_reg_reg_reg(exprasmlist, OP_AND, OS_INT, modreg, maskreg, maskreg);
cg.a_op_reg_reg_reg(exprasmlist, OP_OR, OS_INT, maskreg, tempreg, resultreg);
end else begin
cg.a_op_const_reg_reg(exprasmlist, OP_AND, OS_INT, tordconstnode(right).value-1, numerator, resultreg);
end;
end else begin
genOrdConstNodeDiv();
cg.a_op_const_reg_reg(exprasmlist, OP_MUL, OS_INT, tordconstnode(right).value, resultreg, resultreg);
cg.a_op_reg_reg_reg(exprasmlist, OP_SUB, OS_INT, resultreg, numerator, resultreg);
end;
end;
begin
secondpass(left);
secondpass(right);
location_copy(location,left.location);
{ put numerator in register }
size:=def_cgsize(left.resulttype.def);
location_force_reg(exprasmlist,left.location,
size,true);
location_copy(location,left.location);
numerator := location.register;
resultreg := location.register;
if (location.loc = LOC_CREGISTER) then begin
location.loc := LOC_REGISTER;
location.register := cg.getintregister(exprasmlist,size);
resultreg := location.register;
end else if (nodetype = modn) or (right.nodetype = ordconstn) then begin
{ for a modulus op, and for const nodes we need the result register
to be an extra register }
resultreg := cg.getintregister(exprasmlist,size);
end;
done := false;
(*
if (right.nodetype = ordconstn) then begin
if (nodetype = divn) then
genOrdConstNodeDiv
else
genOrdConstNodeMod;
done := true;
end;
*)
if (not done) then begin
{ load divider in a register if necessary }
location_force_reg(exprasmlist,right.location,
def_cgsize(right.resulttype.def),true);
if (right.nodetype <> ordconstn) then
exprasmlist.concat(taicpu.op_reg_reg_const(A_CMPDI, NR_CR7,
right.location.register, 0))
else begin
if (tordconstnode(right).value = 0) then
internalerror(2005100301);
end;
divider := right.location.register;
{ needs overflow checking, (-maxlongint-1) div (-1) overflows! }
op := divops[is_signed(right.resulttype.def),
cs_check_overflow in aktlocalswitches];
exprasmlist.concat(taicpu.op_reg_reg_reg(op, resultreg, numerator,
divider));
if (nodetype = modn) then begin
exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULLD,resultreg,
divider,resultreg));
exprasmlist.concat(taicpu.op_reg_reg_reg(A_SUB,location.register,
numerator,resultreg));
resultreg := location.register;
end;
end;
{ set result location }
location.loc:=LOC_REGISTER;
location.register:=resultreg;
if right.nodetype <> ordconstn then begin
objectlibrary.getjumplabel(hl);
exprasmlist.concat(taicpu.op_cond_sym(A_BC,zerocond,hl));
cg.a_call_name(exprasmlist,'FPC_DIVBYZERO');
cg.a_label(exprasmlist,hl);
end;
{ unsigned division/module can only overflow in case of division by zero
(but checking this overflow flag is more convoluted than performing a
simple comparison with 0) }
if is_signed(right.resulttype.def) then
cg.g_overflowcheck(exprasmlist,location,resulttype.def);
end;
(*
procedure tppcmoddivnode.pass_2; procedure tppcmoddivnode.pass_2;
const const
// ts: todo, use 32 bit operations if possible (much faster!) // ts: todo, use 32 bit operations if possible (much faster!)
@ -130,9 +494,7 @@ begin
end else begin end else begin
cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, power, numerator, resultreg); cg.a_op_const_reg_reg(exprasmlist, OP_SHR, OS_INT, power, numerator, resultreg);
end; end;
end end else begin
else
begin
{ load divider in a register if necessary } { load divider in a register if necessary }
location_force_reg(exprasmlist, right.location, location_force_reg(exprasmlist, right.location,
def_cgsize(right.resulttype.def), true); def_cgsize(right.resulttype.def), true);
@ -150,8 +512,7 @@ begin
exprasmlist.concat(taicpu.op_reg_reg_reg(op, resultreg, numerator, exprasmlist.concat(taicpu.op_reg_reg_reg(op, resultreg, numerator,
divider)); divider));
if (nodetype = modn) then if (nodetype = modn) then begin
begin
{$NOTE ts:testme} {$NOTE ts:testme}
exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULLD, resultreg, exprasmlist.concat(taicpu.op_reg_reg_reg(A_MULLD, resultreg,
divider, resultreg)); divider, resultreg));
@ -163,8 +524,7 @@ begin
{ set result location } { set result location }
location.loc := LOC_REGISTER; location.loc := LOC_REGISTER;
location.register := resultreg; location.register := resultreg;
if right.nodetype <> ordconstn then if (right.nodetype <> ordconstn) then begin
begin
objectlibrary.getjumplabel(hl); objectlibrary.getjumplabel(hl);
exprasmlist.concat(taicpu.op_cond_sym(A_BC, zerocond, hl)); exprasmlist.concat(taicpu.op_cond_sym(A_BC, zerocond, hl));
cg.a_call_name(exprasmlist, 'FPC_DIVBYZERO'); cg.a_call_name(exprasmlist, 'FPC_DIVBYZERO');
@ -172,7 +532,7 @@ begin
end; end;
cg.g_overflowcheck(exprasmlist, location, resulttype.def); cg.g_overflowcheck(exprasmlist, location, resulttype.def);
end; end;
*)
{***************************************************************************** {*****************************************************************************
TPPCSHLRSHRNODE TPPCSHLRSHRNODE
*****************************************************************************} *****************************************************************************}
@ -181,8 +541,8 @@ end;
procedure tppcshlshrnode.pass_2; procedure tppcshlshrnode.pass_2;
var var
resultreg, hregister1, hregister2, resultreg, hregister1, hregister2 : tregister;
hreg64hi, hreg64lo: tregister;
op: topcg; op: topcg;
asmop1, asmop2: tasmop; asmop1, asmop2: tasmop;
shiftval: aint; shiftval: aint;
@ -199,7 +559,7 @@ begin
hregister1 := location.register; hregister1 := location.register;
if (location.loc = LOC_CREGISTER) then begin if (location.loc = LOC_CREGISTER) then begin
location.loc := LOC_REGISTER; location.loc := LOC_REGISTER;
resultreg := cg.getintregister(exprasmlist, OS_64); resultreg := cg.getintregister(exprasmlist, OS_INT);
location.register := resultreg; location.register := resultreg;
end; end;
@ -257,17 +617,14 @@ begin
end; end;
LOC_REFERENCE, LOC_CREFERENCE: LOC_REFERENCE, LOC_CREFERENCE:
begin begin
if (left.resulttype.def.deftype = floatdef) then if (left.resulttype.def.deftype = floatdef) then begin
begin
src1 := cg.getfpuregister(exprasmlist, src1 := cg.getfpuregister(exprasmlist,
def_cgsize(left.resulttype.def)); def_cgsize(left.resulttype.def));
location.register := src1; location.register := src1;
cg.a_loadfpu_ref_reg(exprasmlist, cg.a_loadfpu_ref_reg(exprasmlist,
def_cgsize(left.resulttype.def), def_cgsize(left.resulttype.def),
left.location.reference, src1); left.location.reference, src1);
end end else begin
else
begin
src1 := cg.getintregister(exprasmlist, OS_64); src1 := cg.getintregister(exprasmlist, OS_64);
location.register := src1; location.register := src1;
cg.a_load_ref_reg(exprasmlist, OS_64, OS_64, cg.a_load_ref_reg(exprasmlist, OS_64, OS_64,
@ -276,28 +633,19 @@ begin
end; end;
end; end;
{ choose appropriate operand } { choose appropriate operand }
if left.resulttype.def.deftype <> floatdef then if left.resulttype.def.deftype <> floatdef then begin
begin
if not (cs_check_overflow in aktlocalswitches) then if not (cs_check_overflow in aktlocalswitches) then
op := A_NEG op := A_NEG
else else
op := A_NEGO_; op := A_NEGO_;
location.loc := LOC_REGISTER; location.loc := LOC_REGISTER;
end end else begin
else
begin
op := A_FNEG; op := A_FNEG;
location.loc := LOC_FPUREGISTER; location.loc := LOC_FPUREGISTER;
end; end;
{ emit operation } { emit operation }
exprasmlist.concat(taicpu.op_reg_reg(op, location.register, src1)); exprasmlist.concat(taicpu.op_reg_reg(op, location.register, src1));
end; end;
{ Here was a problem... }
{ Operand to be negated always }
{ seems to be converted to signed }
{ 32-bit before doing neg!! }
{ So this is useless... }
{ that's not true: -2^31 gives an overflow error if it is negated (FK) }
cg.g_overflowcheck(exprasmlist, location, resulttype.def); cg.g_overflowcheck(exprasmlist, location, resulttype.def);
end; end;