* jumptable support

* fixed cg for loading from addresses with symbol and large offset, but no base register

git-svn-id: trunk@1547 -
This commit is contained in:
tom_at_work 2005-10-20 17:33:33 +00:00
parent 202236c6e0
commit 9531293b89
2 changed files with 119 additions and 48 deletions

View File

@ -146,6 +146,10 @@ type
{ returns the lowest numbered GP register in use, and the number of used GP registers { returns the lowest numbered GP register in use, and the number of used GP registers
for the current procedure } for the current procedure }
procedure calcFirstUsedGPR(out firstgpr : TSuperRegister; out gprcount : aint); procedure calcFirstUsedGPR(out firstgpr : TSuperRegister; out gprcount : aint);
{ returns true if the offset of the given reference can not be represented by a 16 bit
immediate as required by some PowerPC instructions }
function hasLargeOffset(const ref : TReference) : Boolean; inline;
end; end;
const const
@ -1072,7 +1076,6 @@ begin
list.concat(taicpu.op_reg_reg_reg(A_STDUX, NR_R1, NR_R1, NR_R0)); list.concat(taicpu.op_reg_reg_reg(A_STDUX, NR_R1, NR_R1, NR_R0));
end; end;
end; end;
// CR register not used by FPC atm // CR register not used by FPC atm
// keep R1 allocated??? // keep R1 allocated???
@ -1084,8 +1087,6 @@ procedure tcgppc.g_proc_exit(list: taasmoutput; parasize: longint; nostackframe:
{ This procedure may be called before, as well as after g_stackframe_entry } { This procedure may be called before, as well as after g_stackframe_entry }
{ is called. NOTE registers are not to be allocated through the register } { is called. NOTE registers are not to be allocated through the register }
{ allocator here, because the register colouring has already occured !! } { allocator here, because the register colouring has already occured !! }
var var
regcount, firstregfpu, firstreggpr: TSuperRegister; regcount, firstregfpu, firstreggpr: TSuperRegister;
href: treference; href: treference;
@ -1177,7 +1178,7 @@ begin
ref2 := ref; ref2 := ref;
fixref(list, ref2, OS_64); fixref(list, ref2, OS_64);
{ load a symbol } { load a symbol }
if assigned(ref2.symbol) or (ref2.offset < low(smallint)) or (ref2.offset > high(smallint)) then begin if assigned(ref2.symbol) or (hasLargeOffset(ref2)) then begin
{ add the symbol's value to the base of the reference, and if the } { add the symbol's value to the base of the reference, and if the }
{ reference doesn't have a base, create one } { reference doesn't have a base, create one }
reference_reset(tmpref); reference_reset(tmpref);
@ -1416,14 +1417,14 @@ procedure tcgppc.g_intf_wrapper(list: TAAsmoutput; procdef: tprocdef; const
{ call/jmp vmtoffs(%eax) ; method offs } { call/jmp vmtoffs(%eax) ; method offs }
reference_reset_base(href, NR_R11, reference_reset_base(href, NR_R11,
procdef._class.vmtmethodoffset(procdef.extnumber)); procdef._class.vmtmethodoffset(procdef.extnumber));
if not ((aint(href.offset) >= low(smallint)) and if not (hasLargeOffset(href)) then begin
(aint(href.offset) <= high(smallint))) then begin
{$warning ts:adapt me for offsets > 16 bit }
list.concat(taicpu.op_reg_reg_const(A_ADDIS, NR_R11, NR_R11, list.concat(taicpu.op_reg_reg_const(A_ADDIS, NR_R11, NR_R11,
smallint((href.offset shr 16) + ord(smallint(href.offset and $FFFF) < smallint((href.offset shr 16) + ord(smallint(href.offset and $FFFF) <
0)))); 0))));
href.offset := smallint(href.offset and $FFFF); href.offset := smallint(href.offset and $FFFF);
end; end else
{ add support for offsets > 16 bit }
internalerror(200510201);
list.concat(taicpu.op_reg_ref(A_LD, NR_R11, href)); list.concat(taicpu.op_reg_ref(A_LD, NR_R11, href));
// the loaded reference is a function descriptor reference, so deref again // the loaded reference is a function descriptor reference, so deref again
// (at ofs 0 there's the real pointer) // (at ofs 0 there's the real pointer)
@ -1494,7 +1495,6 @@ begin
end; end;
function tcgppc.fixref(list: taasmoutput; var ref: treference; const size : TCgsize): boolean; function tcgppc.fixref(list: taasmoutput; var ref: treference; const size : TCgsize): boolean;
var var
tmpreg: tregister; tmpreg: tregister;
needsAlign : boolean; needsAlign : boolean;
@ -1523,14 +1523,14 @@ var
tmpref: treference; tmpref: treference;
largeOffset: Boolean; largeOffset: Boolean;
begin begin
// at this point there must not be a combination of values in the ref treference { at this point there must not be a combination of values in the ref treference
// which is not possible to directly map to instructions of the PowerPC architecture which is not possible to directly map to instructions of the PowerPC architecture }
if (ref.index <> NR_NO) and ((ref.offset <> 0) or (assigned(ref.symbol))) then if (ref.index <> NR_NO) and ((ref.offset <> 0) or (assigned(ref.symbol))) then
internalerror(200310131); internalerror(200310131);
// for some instructions we need to check that the offset is divisible by at { for some instructions we need to check that the offset is divisible by at
// least four. If not, add the bytes which are "off" to the base register and least four. If not, add the bytes which are "off" to the base register and
// adjust the offset accordingly adjust the offset accordingly }
case op of case op of
A_LD, A_LDU, A_STD, A_STDU, A_LWA, A_LWAU : A_LD, A_LDU, A_STD, A_STDU, A_LWA, A_LWAU :
if ((ref.offset mod 4) <> 0) then begin if ((ref.offset mod 4) <> 0) then begin
@ -1547,26 +1547,31 @@ begin
end; end;
end; end;
// if we have to load/store from a symbol or large addresses, use a temporary register { if we have to load/store from a symbol or large addresses, use a temporary register
// containing the address containing the address }
if assigned(ref.symbol) or (ref.offset < low(smallint)) or (ref.offset > high(smallint)) then begin if assigned(ref.symbol) or (hasLargeOffset(ref)) then begin
tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE); tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
if (hasLargeOffset(ref) and (ref.base = NR_NO)) then begin
ref.base := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
a_load_const_reg(list, OS_ADDR, ref.offset, ref.base);
ref.offset := 0;
end;
reference_reset(tmpref); reference_reset(tmpref);
tmpref.symbol := ref.symbol; tmpref.symbol := ref.symbol;
tmpref.relsymbol := ref.relsymbol; tmpref.relsymbol := ref.relsymbol;
tmpref.offset := ref.offset; tmpref.offset := ref.offset;
if (ref.base <> NR_NO) then begin if (ref.base <> NR_NO) then begin
{ { As long as the TOC isn't working we try to achieve highest speed (in this
As long as the TOC isn't working we try to achieve highest speed (in this case by allowing instructions execute in parallel) as possible at the cost
case by allowing instructions execute in parallel) as possible, at the cost
of using another temporary register. So the code template when there is of using another temporary register. So the code template when there is
a base register and an offset is the following: a base register and an offset is the following:
lis rT1, SYM+offs@highest lis rT1, SYM+offs@highest
ori rT1, rT1, SYM+offs@higher ori rT1, rT1, SYM+offs@higher
lis rT2, SYM+offs@high lis rT2, SYM+offs@hi
ori rT2, SYM+offs@low ori rT2, SYM+offs@lo
rldimi rT2, rT1, 32 rldimi rT2, rT1, 32
<op>X reg, base, rT2 <op>X reg, base, rT2
@ -1589,7 +1594,7 @@ begin
tmpref.base := ref.base; tmpref.base := ref.base;
tmpref.index := tmpreg2; tmpref.index := tmpreg2;
case op of case op of
// the code generator doesn't generate update instructions anyway { the code generator doesn't generate update instructions anyway }
A_LBZ : op := A_LBZX; A_LBZ : op := A_LBZX;
A_LHZ : op := A_LHZX; A_LHZ : op := A_LHZX;
A_LWZ : op := A_LWZX; A_LWZ : op := A_LWZX;
@ -1607,7 +1612,7 @@ begin
A_STFS : op := A_STFSX; A_STFS : op := A_STFSX;
A_STFD : op := A_STFDX; A_STFD : op := A_STFDX;
else else
// unknown load/store opcode { unknown load/store opcode }
internalerror(2005101302); internalerror(2005101302);
end; end;
list.concat(taicpu.op_reg_ref(op, reg, tmpref)); list.concat(taicpu.op_reg_ref(op, reg, tmpref));
@ -1652,6 +1657,12 @@ begin
list.concat(p) list.concat(p)
end; end;
function tcgppc.hasLargeOffset(const ref : TReference) : Boolean;
begin
{ this rather strange calculation is required because offsets of TReferences are unsigned }
result := aword(ref.offset-low(smallint)) > high(smallint)-low(smallint);
end;
begin begin
cg := tcgppc.create; cg := tcgppc.create;
end. end.

View File

@ -26,33 +26,103 @@ unit nppcset;
interface interface
uses uses
node, nset, ncgset, cpubase, cgbase, cgobj, aasmbase, aasmtai; node, nset, ncgset, cpubase, cgbase, cgobj, aasmbase, aasmtai, globtype;
type type
tppccasenode = class(tcgcasenode) tppccasenode = class(tcgcasenode)
protected protected
procedure optimizevalues(var max_linear_list : aint; var max_dist : aword); override;
function has_jumptable : boolean; override;
procedure genjumptable(hp: pcaselabel; min_, max_ : aint); override;
procedure genlinearlist(hp: pcaselabel); override; procedure genlinearlist(hp: pcaselabel); override;
end; end;
implementation implementation
uses uses
globtype, systems, systems,
verbose, globals, verbose, globals,
symconst, symdef, defutil, symconst, symdef, defutil,
paramgr, paramgr,
cpuinfo, cpuinfo,
pass_2, cgcpu, pass_2, cgcpu,
ncon, ncon,
tgobj, ncgutil, regvars, rgobj, aasmcpu; tgobj, ncgutil, regvars, rgobj, aasmcpu,
procinfo, cgutils;
{***************************************************************************** {*****************************************************************************
TCGCASENODE TCGCASENODE
*****************************************************************************} *****************************************************************************}
procedure tppccasenode.genlinearlist(hp: pcaselabel); procedure tppccasenode.optimizevalues(var max_linear_list : aint; var max_dist : aword);
begin
max_linear_list := 10;
end;
function tppccasenode.has_jumptable : boolean;
begin
has_jumptable := true;
end;
procedure tppccasenode.genjumptable(hp : pcaselabel; min_, max_ : aint);
var
table : tasmlabel;
last : TConstExprInt;
indexreg : tregister;
href : treference;
procedure genitem(list:taasmoutput;t : pcaselabel);
var
i : aint;
begin
if assigned(t^.less) then
genitem(list,t^.less);
{ fill possible hole }
for i:=last+1 to t^._low-1 do
list.concat(Tai_const.Create_sym(elselabel));
for i:=t^._low to t^._high do
list.concat(Tai_const.Create_sym(blocklabel(t^.blockid)));
last:=t^._high;
if assigned(t^.greater) then
genitem(list,t^.greater);
end;
begin
{ this is exactly the same code as for 32 bit PowerPC processors. It might be useful to change this
later (with e.g. TOC support) into a method which uses relative values in the jumptable to save space
and memory bandwidth. At the moment this is not a good idea, since these methods involve loading of
one or more 64 bit integer adresses which is slow }
if not(jumptable_no_range) then begin
{ case expr less than min_ => goto elselabel }
cg.a_cmp_const_reg_label(exprasmlist,opsize,jmp_lt,aint(min_),hregister,elselabel);
{ case expr greater than max_ => goto elselabel }
cg.a_cmp_const_reg_label(exprasmlist,opsize,jmp_gt,aint(max_),hregister,elselabel);
end;
objectlibrary.getjumplabel(table);
{ allocate base and index registers register }
indexreg:= cg.makeregsize(exprasmlist, hregister, OS_INT);
{ indexreg := hregister; }
cg.a_load_reg_reg(exprasmlist, opsize, OS_INT, hregister, indexreg);
{ create reference, indexreg := indexreg * sizeof(OS_ADDR) }
cg.a_op_const_reg(exprasmlist, OP_MUL, OS_INT, tcgsize2size[OS_ADDR], indexreg);
reference_reset_symbol(href, table, (-aint(min_)) * tcgsize2size[OS_ADDR]);
href.index := indexreg;
cg.a_load_ref_reg(exprasmlist, OS_INT, OS_INT, href, indexreg);
exprasmlist.concat(taicpu.op_reg(A_MTCTR, indexreg));
exprasmlist.concat(taicpu.op_none(A_BCTR));
{ generate jump table }
new_section(current_procinfo.aktlocaldata,sec_data,current_procinfo.procdef.mangledname,sizeof(aint));
current_procinfo.aktlocaldata.concat(Tai_label.Create(table));
last:=min_;
genitem(current_procinfo.aktlocaldata,hp);
end;
procedure tppccasenode.genlinearlist(hp: pcaselabel);
var var
first, lastrange: boolean; first, lastrange: boolean;
last: TConstExprInt; last: TConstExprInt;
@ -81,13 +151,11 @@ var
if assigned(t^.less) then if assigned(t^.less) then
genitem(t^.less); genitem(t^.less);
{ need we to test the first value } { need we to test the first value }
if first and (t^._low > get_min_value(left.resulttype.def)) then if first and (t^._low > get_min_value(left.resulttype.def)) then begin
begin
cg.a_cmp_const_reg_label(exprasmlist, OS_INT, jmp_lt, aword(t^._low), cg.a_cmp_const_reg_label(exprasmlist, OS_INT, jmp_lt, aword(t^._low),
hregister, elselabel); hregister, elselabel);
end; end;
if t^._low = t^._high then if t^._low = t^._high then begin
begin
if t^._low - last = 0 then if t^._low - last = 0 then
cg.a_cmp_const_reg_label(exprasmlist, opsize, OC_EQ, 0, hregister, cg.a_cmp_const_reg_label(exprasmlist, opsize, OC_EQ, 0, hregister,
blocklabel(t^.blockid)) blocklabel(t^.blockid))
@ -96,26 +164,20 @@ var
tcgppc(cg).a_jmp_cond(exprasmlist, OC_EQ, blocklabel(t^.blockid)); tcgppc(cg).a_jmp_cond(exprasmlist, OC_EQ, blocklabel(t^.blockid));
last := t^._low; last := t^._low;
lastrange := false; lastrange := false;
end end else begin
else
begin
{ it begins with the smallest label, if the value } { it begins with the smallest label, if the value }
{ is even smaller then jump immediately to the } { is even smaller then jump immediately to the }
{ ELSE-label } { ELSE-label }
if first then if first then begin
begin
{ have we to ajust the first value ? } { have we to ajust the first value ? }
if (t^._low > get_min_value(left.resulttype.def)) then if (t^._low > get_min_value(left.resulttype.def)) then
gensub(aint(t^._low)); gensub(aint(t^._low));
end end else begin
else
begin
{ if there is no unused label between the last and the } { if there is no unused label between the last and the }
{ present label then the lower limit can be checked } { present label then the lower limit can be checked }
{ immediately. else check the range in between: } { immediately. else check the range in between: }
gensub(aint(t^._low - last)); gensub(aint(t^._low - last));
if ((t^._low - last) <> 1) or if ((t^._low - last) <> 1) or (not lastrange) then
(not lastrange) then
tcgppc(cg).a_jmp_cond(exprasmlist, jmp_lt, elselabel); tcgppc(cg).a_jmp_cond(exprasmlist, jmp_lt, elselabel);
end; end;
gensub(aint(t^._high - t^._low)); gensub(aint(t^._high - t^._low));
@ -130,11 +192,9 @@ var
begin begin
{ do we need to generate cmps? } { do we need to generate cmps? }
if (with_sign and (min_label < 0)) or if (with_sign and (min_label < 0)) or (opsize = OS_32) then
(opsize = OS_32) then
genlinearcmplist(hp) genlinearcmplist(hp)
else else begin
begin
last := 0; last := 0;
lastrange := false; lastrange := false;
first := true; first := true;