* call by register now also takes care of the TOC and environment pointer

* first "optimize for size"-optimization

git-svn-id: trunk@1593 -
This commit is contained in:
tom_at_work 2005-10-25 20:12:21 +00:00
parent 58582a44a9
commit 5520442fc4
2 changed files with 137 additions and 116 deletions

View File

@ -50,8 +50,6 @@ type
paraloc: tcgpara); override;
procedure a_call_name(list: taasmoutput; const s: string); override;
procedure a_call_name_direct(list: taasmoutput; s: string; prependDot : boolean);
procedure a_call_reg(list: taasmoutput; reg: tregister); override;
procedure a_op_const_reg(list: taasmoutput; Op: TOpCG; size: TCGSize; a:
@ -67,8 +65,11 @@ type
{ move instructions }
procedure a_load_const_reg(list: taasmoutput; size: tcgsize; a: aint; reg:
tregister); override;
{ stores the contents of register reg to the memory location described by
ref }
procedure a_load_reg_ref(list: taasmoutput; fromsize, tosize: tcgsize; reg:
tregister; const ref: treference); override;
{ loads the memory pointed to by ref into register reg }
procedure a_load_ref_reg(list: taasmoutput; fromsize, tosize: tcgsize; const
Ref: treference; reg: tregister); override;
procedure a_load_reg_reg(list: taasmoutput; fromsize, tosize: tcgsize; reg1,
@ -77,6 +78,7 @@ type
{ fpu move instructions }
procedure a_loadfpu_reg_reg(list: taasmoutput; size: tcgsize; reg1, reg2:
tregister); override;
procedure a_loadfpu_ref_reg(list: taasmoutput; size: tcgsize; const ref:
treference; reg: tregister); override;
procedure a_loadfpu_reg_ref(list: taasmoutput; size: tcgsize; reg:
@ -150,6 +152,8 @@ type
{ returns true if the offset of the given reference can not be represented by a 16 bit
immediate as required by some PowerPC instructions }
function hasLargeOffset(const ref : TReference) : Boolean; inline;
procedure a_call_name_direct(list: taasmoutput; s: string; prependDot : boolean; addNOP : boolean);
end;
const
@ -258,10 +262,9 @@ begin
else
internalerror(2002072801);
end;
LOC_VOID:
begin
// nothing to do
end;
LOC_VOID:
{ nothing to do }
;
else
internalerror(2002081103);
end;
@ -300,17 +303,17 @@ end;
procedure tcgppc.a_call_name(list: taasmoutput; const s: string);
begin
a_call_name_direct(list, s, true);
a_call_name_direct(list, s, true, true);
end;
procedure tcgppc.a_call_name_direct(list: taasmoutput; s: string; prependDot : boolean);
procedure tcgppc.a_call_name_direct(list: taasmoutput; s: string; prependDot : boolean; addNOP : boolean);
begin
if (prependDot) then begin
s := '.' + s;
end;
if (prependDot) then
s := '.' + s;
list.concat(taicpu.op_sym(A_BL, objectlibrary.newasmsymbol(s, AB_EXTERNAL,
AT_FUNCTION)));
list.concat(taicpu.op_none(A_NOP));
if (addNOP) then
list.concat(taicpu.op_none(A_NOP));
{
the compiler does not properly set this flag anymore in pass 1, and
for now we only need it after pass 2 (I hope) (JM)
@ -324,35 +327,43 @@ end;
{ calling a procedure by address }
procedure tcgppc.a_call_reg(list: taasmoutput; reg: tregister);
var
tmpreg: tregister;
tmpref: treference;
gotref : treference;
begin
tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
if (not (cs_littlesize in aktglobalswitches)) then begin
{ load actual function entry (reg contains the reference to the function descriptor)
into R0 }
reference_reset_base(tmpref, reg, 0);
a_load_ref_reg(list, OS_ADDR, OS_ADDR, tmpref, NR_R0);
reference_reset(tmpref);
tmpref.offset := 0;
tmpref.base := reg;
list.concat(taicpu.op_reg_ref(A_LD, tmpreg, tmpref));
{ save TOC pointer in stackframe }
reference_reset_base(tmpref, NR_STACK_POINTER_REG, LA_RTOC_ELF);
a_load_reg_ref(list, OS_ADDR, OS_ADDR, NR_RTOC, tmpref);
// TODO: GOT change
{ move actual function pointer to CTR register }
list.concat(taicpu.op_reg(A_MTCTR, NR_R0));
// reference_reset(gotref);
// tmpref.offset := 40;
// tmpref.base := rg[R_INTREGISTER].getregister(list, NR_STACK_POINTER_REG);
{ load new TOC pointer from function descriptor into RTOC register }
reference_reset_base(tmpref, reg, tcgsize2size[OS_ADDR]);
a_load_ref_reg(list, OS_ADDR, OS_ADDR, tmpref, NR_RTOC);
// taicpu.op_load_reg_ref(list, OS_INT, OS_INT,
list.concat(taicpu.op_reg(A_MTCTR, tmpreg));
{ load new environment pointer from function descriptor into R11 register }
reference_reset_base(tmpref, reg, 2*tcgsize2size[OS_ADDR]);
a_load_ref_reg(list, OS_ADDR, OS_ADDR, tmpref, NR_R11);
{ call function }
list.concat(taicpu.op_none(A_BCTRL));
end else begin
{ call ptrgl helper routine which expects the pointer to the function descriptor
in R11 }
a_load_reg_reg(list, OS_ADDR, OS_ADDR, reg, NR_R11);
a_call_name_direct(list, 'ptrgl', true, false);
end;
{ we need to load the old RTOC from stackframe because we changed it}
reference_reset_base(tmpref, NR_STACK_POINTER_REG, LA_RTOC_ELF);
a_load_ref_reg(list, OS_ADDR, OS_ADDR, tmpref, NR_RTOC);
list.concat(taicpu.op_none(A_BCTRL));
//if target_info.system=system_powerpc_macos then
// //NOP is not needed here.
// list.concat(taicpu.op_none(A_NOP));
include(current_procinfo.flags, pi_do_call);
end;
@ -389,7 +400,7 @@ procedure tcgppc.a_load_const_reg(list: taasmoutput; size: TCGSize; a: aint;
procedure load32bitconstantR0(list : taasmoutput; size : TCGSize; a : longint;
reg : TRegister);
begin
// only 16 bit constant? (-2^15 <= a <= +2^15-1)
{ only 16 bit constant? (-2^15 <= a <= +2^15-1) }
if (a >= low(smallint)) and (a <= high(smallint)) then begin
list.concat(taicpu.op_reg_const(A_LI, reg, smallint(a)));
end else begin
@ -474,7 +485,7 @@ procedure tcgppc.a_load_ref_reg(list: taasmoutput; fromsize, tosize: tcgsize;
const
LoadInstr: array[OS_8..OS_S64, boolean, boolean] of TAsmOp =
{ indexed? updating?}
{ indexed? updating? }
(((A_LBZ, A_LBZU), (A_LBZX, A_LBZUX)),
((A_LHZ, A_LHZU), (A_LHZX, A_LHZUX)),
((A_LWZ, A_LWZU), (A_LWZX, A_LWZUX)),
@ -493,26 +504,24 @@ var
ref2: treference;
begin
{ TODO: optimize/take into consideration fromsize/tosize. Will }
{ probably only matter for OS_S8 loads though }
if not (fromsize in [OS_8, OS_S8, OS_16, OS_S16, OS_32, OS_S32, OS_64, OS_S64]) then
internalerror(2002090902);
ref2 := ref;
fixref(list, ref2, tosize);
{ the caller is expected to have adjusted the reference already }
{ in this case }
{ the caller is expected to have adjusted the reference already
in this case }
if (TCGSize2Size[fromsize] >= TCGSize2Size[tosize]) then
fromsize := tosize;
op := loadinstr[fromsize, ref2.index <> NR_NO, false];
// there is no LWAU instruction, simulate using ADDI and LWA
{ there is no LWAU instruction, simulate using ADDI and LWA }
if (op = A_LWAU) then begin
list.concat(taicpu.op_reg_reg_const(A_ADDI, reg, reg, ref2.offset));
ref2.offset := 0;
op := A_LWA;
end;
a_load_store(list, op, reg, ref2);
// sign extend shortint if necessary, since there is no
// load instruction that does that automatically (JM)
{ sign extend shortint if necessary, since there is no
load instruction that does that automatically (JM) }
if fromsize = OS_S8 then
list.concat(taicpu.op_reg_reg(A_EXTSB, reg, reg));
end;
@ -573,8 +582,8 @@ var
ref2: treference;
begin
{ several functions call this procedure with OS_32 or OS_64 }
{ so this makes life easier (FK) }
{ several functions call this procedure with OS_32 or OS_64
so this makes life easier (FK) }
case size of
OS_32, OS_F32:
size := OS_F32;
@ -594,7 +603,7 @@ procedure tcgppc.a_loadfpu_reg_ref(list: taasmoutput; size: tcgsize; reg:
const
FpuStoreInstr: array[OS_F32..OS_F64, boolean, boolean] of TAsmOp =
{ indexed? updating?}
{ indexed? updating? }
(((A_STFS, A_STFSU), (A_STFSX, A_STFSUX)),
((A_STFD, A_STFDU), (A_STFDX, A_STFDUX)));
var
@ -635,7 +644,7 @@ var
begin
usereg := false;
if (size in [OS_64, OS_S64]) then begin
// ts: use register method for 64 bit consts. Sloooooow
{ ts: use register method for 64 bit consts. Sloooooow }
usereg := true;
end else if (size in [OS_32, OS_S32]) then begin
list.concat(taicpu.op_reg_reg_const(oplo, dst, src, word(a)));
@ -652,8 +661,8 @@ begin
end;
ophi := TOpCG2AsmOpConstHi[op];
oplo := TOpCG2AsmOpConstLo[op];
// peephole optimizations for AND, OR, XOR - can't this be done at
// some higher level, independent of architecture?
{ peephole optimizations for AND, OR, XOR - can't this be done at
some higher level, independent of architecture? }
if (op in [OP_AND, OP_OR, OP_XOR]) then begin
if (a = 0) then begin
if op = OP_AND then
@ -682,8 +691,7 @@ begin
exit;
end;
{ otherwise, the instructions we can generate depend on the }
{ operation }
{ otherwise, the instructions we can generate depend on the operation }
useReg := false;
case op of
OP_DIV, OP_IDIV:
@ -897,8 +905,7 @@ var
bitvalue: boolean;
begin
{ get the bit to extract from the conditional register + its }
{ requested value (0 or 1) }
{ get the bit to extract from the conditional register + its requested value (0 or 1) }
testbit := ((f.cr - RS_CR0) * 4);
case f.flag of
F_EQ, F_NE:
@ -920,8 +927,7 @@ begin
end;
{ load the conditional register in the destination reg }
list.concat(taicpu.op_reg(A_MFCR, reg));
{ we will move the bit that has to be tested to bit 0 by rotating }
{ left }
{ we will move the bit that has to be tested to bit 0 by rotating left }
testbit := (testbit + 1) and 31;
{ extract bit }
list.concat(taicpu.op_reg_reg_const_const_const(
@ -980,13 +986,13 @@ end;
procedure tcgppc.g_proc_entry(list: taasmoutput; localsize: longint;
nostackframe: boolean);
{ generated the entry code of a procedure/function. Note: localsize is the }
{ sum of the size necessary for local variables and the maximum possible }
{ combined size of ALL the parameters of a procedure called by the current }
{ one. }
{ This procedure may be called before, as well as after g_return_from_proc }
{ is called. NOTE registers are not to be allocated through the register }
{ allocator here, because the register colouring has already occured !! }
{ generated the entry code of a procedure/function. Note: localsize is the
sum of the size necessary for local variables and the maximum possible
combined size of ALL the parameters of a procedure called by the current
one.
This procedure may be called before, as well as after g_return_from_proc
is called. NOTE registers are not to be allocated through the register
allocator here, because the register colouring has already occured !! }
var
firstregfpu, firstreggpr: TSuperRegister;
href: treference;
@ -996,34 +1002,34 @@ var
fprcount, gprcount : aint;
begin
{ CR and LR only have to be saved in case they are modified by the current }
{ procedure, but currently this isn't checked, so save them always }
{ following is the entry code as described in "Altivec Programming }
{ Interface Manual", bar the saving of AltiVec registers }
{ CR and LR only have to be saved in case they are modified by the current
procedure, but currently this isn't checked, so save them always
following is the entry code as described in "Altivec Programming
Interface Manual", bar the saving of AltiVec registers }
a_reg_alloc(list, NR_STACK_POINTER_REG);
a_reg_alloc(list, NR_R0);
calcFirstUsedFPR(firstregfpu, fprcount);
calcFirstUsedGPR(firstreggpr, gprcount);
// calculate real stack frame size
{ calculate real stack frame size }
localsize := tppcprocinfo(current_procinfo).calc_stackframe_size(
gprcount, fprcount);
// determine whether we need to save the link register
{ determine whether we need to save the link register }
needslinkreg := ((not (po_assembler in current_procinfo.procdef.procoptions)) and
(pi_do_call in current_procinfo.flags));
// move link register to r0
{ move link register to r0 }
if (needslinkreg) then begin
list.concat(taicpu.op_reg(A_MFLR, NR_R0));
end;
// save old stack frame pointer
{ save old stack frame pointer }
if (localsize > 0) then begin
a_reg_alloc(list, NR_R12);
list.concat(taicpu.op_reg_reg(A_MR, NR_R12, NR_STACK_POINTER_REG));
end;
// save registers, FPU first, then GPR
{ save registers, FPU first, then GPR }
reference_reset_base(href, NR_STACK_POINTER_REG, -8);
if (fprcount > 0) then begin
for regcount := RS_F31 downto firstregfpu do begin
@ -1040,15 +1046,15 @@ begin
end;
end;
// VMX registers not supported by FPC atm
{ VMX registers not supported by FPC atm }
// we may need to store R0 (=LR) ourselves
{ we may need to store R0 (=LR) ourselves }
if (needslinkreg) then begin
reference_reset_base(href, NR_STACK_POINTER_REG, LA_LR_ELF);
list.concat(taicpu.op_reg_ref(A_STD, NR_R0, href));
end;
// create stack frame
{ create stack frame }
if (not nostackframe) and (localsize > 0) then begin
if (localsize <= high(smallint)) then begin
reference_reset_base(href, NR_STACK_POINTER_REG, -localsize);
@ -1056,10 +1062,10 @@ begin
end else begin
reference_reset_base(href, NR_NO, -localsize);
// use R0 for loading the constant (which is definitely > 32k when entering
// this branch)
// inlined because it must not use temp registers because register allocations
// have already been done :(
{ use R0 for loading the constant (which is definitely > 32k when entering
this branch)
Inlined at this position because it must not use temp registers because
register allocations have already been done :( }
{ Code template:
lis r0,ofs@highest
ori r0,r0,ofs@higher
@ -1076,9 +1082,9 @@ begin
list.concat(taicpu.op_reg_reg_reg(A_STDUX, NR_R1, NR_R1, NR_R0));
end;
end;
// CR register not used by FPC atm
{ CR register not used by FPC atm }
// keep R1 allocated???
{ keep R1 allocated??? }
a_reg_dealloc(list, NR_R0);
end;
@ -1097,26 +1103,26 @@ begin
calcFirstUsedFPR(firstregfpu, fprcount);
calcFirstUsedGPR(firstreggpr, gprcount);
// determine whether we need to restore the link register
{ determine whether we need to restore the link register }
needslinkreg := ((not (po_assembler in current_procinfo.procdef.procoptions)) and
(pi_do_call in current_procinfo.flags));
// calculate stack frame
{ calculate stack frame }
localsize := tppcprocinfo(current_procinfo).calc_stackframe_size(
gprcount, fprcount);
// CR register not supported
{ CR register not supported }
// restore stack pointer
{ restore stack pointer }
if (not nostackframe) and (localsize > 0) then begin
if (localsize <= high(smallint)) then begin
list.concat(taicpu.op_reg_reg_const(A_ADDI, NR_STACK_POINTER_REG, NR_STACK_POINTER_REG, localsize));
end else begin
reference_reset_base(href, NR_NO, localsize);
// use R0 for loading the constant (which is definitely > 32k when entering
// this branch)
// inlined because it must not use temp registers because register allocations
// have already been done :(
{ use R0 for loading the constant (which is definitely > 32k when entering
this branch)
Inlined because it must not use temp registers because register allocations
have already been done :( }
{ Code template:
lis r0,ofs@highest
ori r0,ofs@higher
@ -1134,7 +1140,7 @@ begin
end;
end;
// load registers, FPR first, then GPR
{ load registers, FPR first, then GPR }
{$note ts:todo change order of loading}
reference_reset_base(href, NR_STACK_POINTER_REG, -tcgsize2size[OS_FLOAT]);
if (fprcount > 0) then begin
@ -1152,16 +1158,16 @@ begin
end;
end;
// VMX not supported...
{ VMX not supported... }
// restore LR (if needed)
{ restore LR (if needed) }
if (needslinkreg) then begin
reference_reset_base(href, NR_STACK_POINTER_REG, LA_LR_ELF);
list.concat(taicpu.op_reg_ref(A_LD, NR_R0, href));
list.concat(taicpu.op_reg(A_MTLR, NR_R0));
end;
// generate return instruction
{ generate return instruction }
list.concat(taicpu.op_none(A_BLR));
end;
@ -1171,7 +1177,7 @@ procedure tcgppc.a_loadaddr_ref_reg(list: taasmoutput; const ref: treference; r:
var
ref2, tmpref: treference;
// register used to construct address
{ register used to construct address }
tempreg : TRegister;
begin
@ -1185,17 +1191,17 @@ begin
tmpref.offset := ref2.offset;
tmpref.symbol := ref2.symbol;
tmpref.relsymbol := ref2.relsymbol;
// load 64 bit reference into r. If the reference already has a base register,
// first load the 64 bit value into a temp register, then add it to the result
// register rD
{ load 64 bit reference into r. If the reference already has a base register,
first load the 64 bit value into a temp register, then add it to the result
register rD }
if (ref2.base <> NR_NO) then begin
// already have a base register, so allocate a new one
{ already have a base register, so allocate a new one }
tempreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
end else begin
tempreg := r;
end;
// code for loading a reference from a symbol into a register rD.
{ code for loading a reference from a symbol into a register rD }
(*
lis rX,SYM@highest
ori rX,SYM@higher
@ -1213,8 +1219,8 @@ begin
tmpref.refaddr := addr_low;
list.concat(taicpu.op_reg_reg_ref(A_ORI, tempreg, tempreg, tmpref));
// if there's already a base register, add the temp register contents to
// the base register
{ if there's already a base register, add the temp register contents to
the base register }
if (ref2.base <> NR_NO) then begin
list.concat(taicpu.op_reg_reg_reg(A_ADD, r, tempreg, ref2.base));
end;
@ -1222,8 +1228,8 @@ begin
{ no symbol, but offset <> 0 }
if ref2.base <> NR_NO then begin
a_op_const_reg_reg(list, OP_ADD, OS_64, ref2.offset, ref2.base, r)
{ FixRef makes sure that "(ref.index <> R_NO) and (ref.offset <> 0)" never}
{ occurs, so now only ref.offset has to be loaded }
{ FixRef makes sure that "(ref.index <> R_NO) and (ref.offset <> 0)" never
occurs, so now only ref.offset has to be loaded }
end else begin
a_load_const_reg(list, OS_64, ref2.offset, r)
end;
@ -1305,18 +1311,18 @@ begin
{ generate a loop }
if count > 4 then begin
{ the offsets are zero after the a_loadaddress_ref_reg and just }
{ have to be set to 8. I put an Inc there so debugging may be }
{ easier (should offset be different from zero here, it will be }
{ easy to notice in the generated assembler }
{ the offsets are zero after the a_loadaddress_ref_reg and just
have to be set to 8. I put an Inc there so debugging may be
easier (should offset be different from zero here, it will be
easy to notice in the generated assembler }
inc(dst.offset, 8);
inc(src.offset, 8);
list.concat(taicpu.op_reg_reg_const(A_SUBI, src.base, src.base, 8));
list.concat(taicpu.op_reg_reg_const(A_SUBI, dst.base, dst.base, 8));
countreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
a_load_const_reg(list, OS_32, count, countreg);
{ explicitely allocate R_0 since it can be used safely here }
{ (for holding date that's being copied) }
{ explicitely allocate R_0 since it can be used safely here
(for holding date that's being copied) }
a_reg_alloc(list, NR_F0);
objectlibrary.getjumplabel(lab);
a_label(list, lab);
@ -1382,10 +1388,10 @@ begin
(torddef(def).typ in [u64bit, u16bit, u32bit, u8bit, uchar,
bool8bit, bool16bit, bool32bit]))) then
begin
// ... instruction setting overflow flag ...
// mfxerf R0
// mtcrf 128, R0
// ble cr0, label
{ ... instructions setting overflow flag ...
mfxerf R0
mtcrf 128, R0
ble cr0, label }
list.concat(taicpu.op_reg(A_MFXER, NR_R0));
list.concat(taicpu.op_const_reg(A_MTCRF, 128, NR_R0));
flags.cr := RS_CR0;
@ -1426,15 +1432,15 @@ procedure tcgppc.g_intf_wrapper(list: TAAsmoutput; procdef: tprocdef; const
{ add support for offsets > 16 bit }
internalerror(200510201);
list.concat(taicpu.op_reg_ref(A_LD, NR_R11, href));
// the loaded reference is a function descriptor reference, so deref again
// (at ofs 0 there's the real pointer)
{ the loaded reference is a function descriptor reference, so deref again
(at ofs 0 there's the real pointer) }
{$warning ts:TODO: update GOT reference}
reference_reset_base(href, NR_R11, 0);
list.concat(taicpu.op_reg_ref(A_LD, NR_R11, href));
list.concat(taicpu.op_reg(A_MTCTR, NR_R11));
list.concat(taicpu.op_none(A_BCTR));
// NOP needed for the linker...?
{ NOP needed for the linker...? }
list.concat(taicpu.op_none(A_NOP));
end;
@ -1464,11 +1470,10 @@ begin
{ set param1 interface to self }
g_adjust_self_value(list, procdef, ioffset);
{ case 4 }
if po_virtualmethod in procdef.procoptions then begin
loadvmttor11;
op_onr11methodaddr;
end { case 0 } else
end else
{$note ts:todo add GOT change?? - think not needed :) }
list.concat(taicpu.op_sym(A_B,
objectlibrary.newasmsymbol('.' + procdef.mangledname, AB_EXTERNAL,

View File

@ -60,6 +60,22 @@
li 3,1
sc
.endm
.section ".text"
.align 3
.globl .ptrgl
.ptrgl:
ld 0, 0(11)
std 2, 40(1)
mtctr 0
ld 2, 8(11)
ld 11, 8(11)
bctr
.long 0
.byte 0, 12, 0, 0, 0, 0, 0, 0
.type .ptrgl, @function
.size .ptrgl, . - .ptrgl
/*
Main Pascal entry point label (function)
*/
@ -69,7 +85,7 @@ FUNCTION_PROLOG _start
/* Set up an initial stack frame, and clear the LR */
clrrdi 1, 1, 5 /* align r1 */
li 0, 0
stdu 1,-48(1)
stdu 1,-128(1)
mtlr 0
std 0, 0(1) /* r1 = pointer to NULL value */