mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-14 14:59:32 +02:00
* jumptable support
* fixed cg for loading from addresses with symbol and large offset, but no base register git-svn-id: trunk@1547 -
This commit is contained in:
parent
202236c6e0
commit
9531293b89
@ -146,6 +146,10 @@ type
|
||||
{ returns the lowest numbered GP register in use, and the number of used GP registers
|
||||
for the current procedure }
|
||||
procedure calcFirstUsedGPR(out firstgpr : TSuperRegister; out gprcount : aint);
|
||||
|
||||
{ returns true if the offset of the given reference can not be represented by a 16 bit
|
||||
immediate as required by some PowerPC instructions }
|
||||
function hasLargeOffset(const ref : TReference) : Boolean; inline;
|
||||
end;
|
||||
|
||||
const
|
||||
@ -438,7 +442,7 @@ begin
|
||||
{ combine both registers }
|
||||
list.concat(taicpu.op_reg_reg_const_const(A_RLDIMI, reg, NR_R0, 32, 0));
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
procedure tcgppc.a_load_reg_ref(list: taasmoutput; fromsize, tosize: TCGSize;
|
||||
@ -1072,7 +1076,6 @@ begin
|
||||
list.concat(taicpu.op_reg_reg_reg(A_STDUX, NR_R1, NR_R1, NR_R0));
|
||||
end;
|
||||
end;
|
||||
|
||||
// CR register not used by FPC atm
|
||||
|
||||
// keep R1 allocated???
|
||||
@ -1084,8 +1087,6 @@ procedure tcgppc.g_proc_exit(list: taasmoutput; parasize: longint; nostackframe:
|
||||
{ This procedure may be called before, as well as after g_stackframe_entry }
|
||||
{ is called. NOTE registers are not to be allocated through the register }
|
||||
{ allocator here, because the register colouring has already occured !! }
|
||||
|
||||
|
||||
var
|
||||
regcount, firstregfpu, firstreggpr: TSuperRegister;
|
||||
href: treference;
|
||||
@ -1177,7 +1178,7 @@ begin
|
||||
ref2 := ref;
|
||||
fixref(list, ref2, OS_64);
|
||||
{ load a symbol }
|
||||
if assigned(ref2.symbol) or (ref2.offset < low(smallint)) or (ref2.offset > high(smallint)) then begin
|
||||
if assigned(ref2.symbol) or (hasLargeOffset(ref2)) then begin
|
||||
{ add the symbol's value to the base of the reference, and if the }
|
||||
{ reference doesn't have a base, create one }
|
||||
reference_reset(tmpref);
|
||||
@ -1416,14 +1417,14 @@ procedure tcgppc.g_intf_wrapper(list: TAAsmoutput; procdef: tprocdef; const
|
||||
{ call/jmp vmtoffs(%eax) ; method offs }
|
||||
reference_reset_base(href, NR_R11,
|
||||
procdef._class.vmtmethodoffset(procdef.extnumber));
|
||||
if not ((aint(href.offset) >= low(smallint)) and
|
||||
(aint(href.offset) <= high(smallint))) then begin
|
||||
{$warning ts:adapt me for offsets > 16 bit }
|
||||
if not (hasLargeOffset(href)) then begin
|
||||
list.concat(taicpu.op_reg_reg_const(A_ADDIS, NR_R11, NR_R11,
|
||||
smallint((href.offset shr 16) + ord(smallint(href.offset and $FFFF) <
|
||||
0))));
|
||||
href.offset := smallint(href.offset and $FFFF);
|
||||
end;
|
||||
end else
|
||||
{ add support for offsets > 16 bit }
|
||||
internalerror(200510201);
|
||||
list.concat(taicpu.op_reg_ref(A_LD, NR_R11, href));
|
||||
// the loaded reference is a function descriptor reference, so deref again
|
||||
// (at ofs 0 there's the real pointer)
|
||||
@ -1494,7 +1495,6 @@ begin
|
||||
end;
|
||||
|
||||
function tcgppc.fixref(list: taasmoutput; var ref: treference; const size : TCgsize): boolean;
|
||||
|
||||
var
|
||||
tmpreg: tregister;
|
||||
needsAlign : boolean;
|
||||
@ -1523,14 +1523,14 @@ var
|
||||
tmpref: treference;
|
||||
largeOffset: Boolean;
|
||||
begin
|
||||
// at this point there must not be a combination of values in the ref treference
|
||||
// which is not possible to directly map to instructions of the PowerPC architecture
|
||||
{ at this point there must not be a combination of values in the ref treference
|
||||
which is not possible to directly map to instructions of the PowerPC architecture }
|
||||
if (ref.index <> NR_NO) and ((ref.offset <> 0) or (assigned(ref.symbol))) then
|
||||
internalerror(200310131);
|
||||
|
||||
// for some instructions we need to check that the offset is divisible by at
|
||||
// least four. If not, add the bytes which are "off" to the base register and
|
||||
// adjust the offset accordingly
|
||||
|
||||
{ for some instructions we need to check that the offset is divisible by at
|
||||
least four. If not, add the bytes which are "off" to the base register and
|
||||
adjust the offset accordingly }
|
||||
case op of
|
||||
A_LD, A_LDU, A_STD, A_STDU, A_LWA, A_LWAU :
|
||||
if ((ref.offset mod 4) <> 0) then begin
|
||||
@ -1547,26 +1547,31 @@ begin
|
||||
end;
|
||||
end;
|
||||
|
||||
// if we have to load/store from a symbol or large addresses, use a temporary register
|
||||
// containing the address
|
||||
if assigned(ref.symbol) or (ref.offset < low(smallint)) or (ref.offset > high(smallint)) then begin
|
||||
{ if we have to load/store from a symbol or large addresses, use a temporary register
|
||||
containing the address }
|
||||
if assigned(ref.symbol) or (hasLargeOffset(ref)) then begin
|
||||
tmpreg := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
|
||||
|
||||
if (hasLargeOffset(ref) and (ref.base = NR_NO)) then begin
|
||||
ref.base := rg[R_INTREGISTER].getregister(list, R_SUBWHOLE);
|
||||
a_load_const_reg(list, OS_ADDR, ref.offset, ref.base);
|
||||
ref.offset := 0;
|
||||
end;
|
||||
|
||||
reference_reset(tmpref);
|
||||
tmpref.symbol := ref.symbol;
|
||||
tmpref.relsymbol := ref.relsymbol;
|
||||
tmpref.offset := ref.offset;
|
||||
if (ref.base <> NR_NO) then begin
|
||||
{
|
||||
As long as the TOC isn't working we try to achieve highest speed (in this
|
||||
case by allowing instructions execute in parallel) as possible, at the cost
|
||||
{ As long as the TOC isn't working we try to achieve highest speed (in this
|
||||
case by allowing instructions execute in parallel) as possible at the cost
|
||||
of using another temporary register. So the code template when there is
|
||||
a base register and an offset is the following:
|
||||
|
||||
lis rT1, SYM+offs@highest
|
||||
ori rT1, rT1, SYM+offs@higher
|
||||
lis rT2, SYM+offs@high
|
||||
ori rT2, SYM+offs@low
|
||||
lis rT2, SYM+offs@hi
|
||||
ori rT2, SYM+offs@lo
|
||||
rldimi rT2, rT1, 32
|
||||
|
||||
<op>X reg, base, rT2
|
||||
@ -1589,7 +1594,7 @@ begin
|
||||
tmpref.base := ref.base;
|
||||
tmpref.index := tmpreg2;
|
||||
case op of
|
||||
// the code generator doesn't generate update instructions anyway
|
||||
{ the code generator doesn't generate update instructions anyway }
|
||||
A_LBZ : op := A_LBZX;
|
||||
A_LHZ : op := A_LHZX;
|
||||
A_LWZ : op := A_LWZX;
|
||||
@ -1607,7 +1612,7 @@ begin
|
||||
A_STFS : op := A_STFSX;
|
||||
A_STFD : op := A_STFDX;
|
||||
else
|
||||
// unknown load/store opcode
|
||||
{ unknown load/store opcode }
|
||||
internalerror(2005101302);
|
||||
end;
|
||||
list.concat(taicpu.op_reg_ref(op, reg, tmpref));
|
||||
@ -1652,6 +1657,12 @@ begin
|
||||
list.concat(p)
|
||||
end;
|
||||
|
||||
function tcgppc.hasLargeOffset(const ref : TReference) : Boolean;
|
||||
begin
|
||||
{ this rather strange calculation is required because offsets of TReferences are unsigned }
|
||||
result := aword(ref.offset-low(smallint)) > high(smallint)-low(smallint);
|
||||
end;
|
||||
|
||||
begin
|
||||
cg := tcgppc.create;
|
||||
end.
|
||||
|
@ -26,33 +26,103 @@ unit nppcset;
|
||||
interface
|
||||
|
||||
uses
|
||||
node, nset, ncgset, cpubase, cgbase, cgobj, aasmbase, aasmtai;
|
||||
node, nset, ncgset, cpubase, cgbase, cgobj, aasmbase, aasmtai, globtype;
|
||||
|
||||
type
|
||||
|
||||
tppccasenode = class(tcgcasenode)
|
||||
protected
|
||||
procedure optimizevalues(var max_linear_list : aint; var max_dist : aword); override;
|
||||
|
||||
function has_jumptable : boolean; override;
|
||||
procedure genjumptable(hp: pcaselabel; min_, max_ : aint); override;
|
||||
procedure genlinearlist(hp: pcaselabel); override;
|
||||
end;
|
||||
|
||||
implementation
|
||||
|
||||
uses
|
||||
globtype, systems,
|
||||
systems,
|
||||
verbose, globals,
|
||||
symconst, symdef, defutil,
|
||||
paramgr,
|
||||
cpuinfo,
|
||||
pass_2, cgcpu,
|
||||
ncon,
|
||||
tgobj, ncgutil, regvars, rgobj, aasmcpu;
|
||||
tgobj, ncgutil, regvars, rgobj, aasmcpu,
|
||||
procinfo, cgutils;
|
||||
|
||||
{*****************************************************************************
|
||||
TCGCASENODE
|
||||
*****************************************************************************}
|
||||
|
||||
procedure tppccasenode.genlinearlist(hp: pcaselabel);
|
||||
procedure tppccasenode.optimizevalues(var max_linear_list : aint; var max_dist : aword);
|
||||
begin
|
||||
max_linear_list := 10;
|
||||
end;
|
||||
|
||||
function tppccasenode.has_jumptable : boolean;
|
||||
begin
|
||||
has_jumptable := true;
|
||||
end;
|
||||
|
||||
procedure tppccasenode.genjumptable(hp : pcaselabel; min_, max_ : aint);
|
||||
var
|
||||
table : tasmlabel;
|
||||
last : TConstExprInt;
|
||||
indexreg : tregister;
|
||||
href : treference;
|
||||
|
||||
procedure genitem(list:taasmoutput;t : pcaselabel);
|
||||
var
|
||||
i : aint;
|
||||
begin
|
||||
if assigned(t^.less) then
|
||||
genitem(list,t^.less);
|
||||
{ fill possible hole }
|
||||
for i:=last+1 to t^._low-1 do
|
||||
list.concat(Tai_const.Create_sym(elselabel));
|
||||
for i:=t^._low to t^._high do
|
||||
list.concat(Tai_const.Create_sym(blocklabel(t^.blockid)));
|
||||
last:=t^._high;
|
||||
if assigned(t^.greater) then
|
||||
genitem(list,t^.greater);
|
||||
end;
|
||||
|
||||
begin
|
||||
{ this is exactly the same code as for 32 bit PowerPC processors. It might be useful to change this
|
||||
later (with e.g. TOC support) into a method which uses relative values in the jumptable to save space
|
||||
and memory bandwidth. At the moment this is not a good idea, since these methods involve loading of
|
||||
one or more 64 bit integer adresses which is slow }
|
||||
if not(jumptable_no_range) then begin
|
||||
{ case expr less than min_ => goto elselabel }
|
||||
cg.a_cmp_const_reg_label(exprasmlist,opsize,jmp_lt,aint(min_),hregister,elselabel);
|
||||
{ case expr greater than max_ => goto elselabel }
|
||||
cg.a_cmp_const_reg_label(exprasmlist,opsize,jmp_gt,aint(max_),hregister,elselabel);
|
||||
end;
|
||||
objectlibrary.getjumplabel(table);
|
||||
{ allocate base and index registers register }
|
||||
indexreg:= cg.makeregsize(exprasmlist, hregister, OS_INT);
|
||||
{ indexreg := hregister; }
|
||||
cg.a_load_reg_reg(exprasmlist, opsize, OS_INT, hregister, indexreg);
|
||||
{ create reference, indexreg := indexreg * sizeof(OS_ADDR) }
|
||||
cg.a_op_const_reg(exprasmlist, OP_MUL, OS_INT, tcgsize2size[OS_ADDR], indexreg);
|
||||
reference_reset_symbol(href, table, (-aint(min_)) * tcgsize2size[OS_ADDR]);
|
||||
href.index := indexreg;
|
||||
|
||||
cg.a_load_ref_reg(exprasmlist, OS_INT, OS_INT, href, indexreg);
|
||||
|
||||
exprasmlist.concat(taicpu.op_reg(A_MTCTR, indexreg));
|
||||
exprasmlist.concat(taicpu.op_none(A_BCTR));
|
||||
|
||||
{ generate jump table }
|
||||
new_section(current_procinfo.aktlocaldata,sec_data,current_procinfo.procdef.mangledname,sizeof(aint));
|
||||
current_procinfo.aktlocaldata.concat(Tai_label.Create(table));
|
||||
last:=min_;
|
||||
genitem(current_procinfo.aktlocaldata,hp);
|
||||
end;
|
||||
|
||||
procedure tppccasenode.genlinearlist(hp: pcaselabel);
|
||||
var
|
||||
first, lastrange: boolean;
|
||||
last: TConstExprInt;
|
||||
@ -81,13 +151,11 @@ var
|
||||
if assigned(t^.less) then
|
||||
genitem(t^.less);
|
||||
{ need we to test the first value }
|
||||
if first and (t^._low > get_min_value(left.resulttype.def)) then
|
||||
begin
|
||||
if first and (t^._low > get_min_value(left.resulttype.def)) then begin
|
||||
cg.a_cmp_const_reg_label(exprasmlist, OS_INT, jmp_lt, aword(t^._low),
|
||||
hregister, elselabel);
|
||||
end;
|
||||
if t^._low = t^._high then
|
||||
begin
|
||||
if t^._low = t^._high then begin
|
||||
if t^._low - last = 0 then
|
||||
cg.a_cmp_const_reg_label(exprasmlist, opsize, OC_EQ, 0, hregister,
|
||||
blocklabel(t^.blockid))
|
||||
@ -96,26 +164,20 @@ var
|
||||
tcgppc(cg).a_jmp_cond(exprasmlist, OC_EQ, blocklabel(t^.blockid));
|
||||
last := t^._low;
|
||||
lastrange := false;
|
||||
end
|
||||
else
|
||||
begin
|
||||
end else begin
|
||||
{ it begins with the smallest label, if the value }
|
||||
{ is even smaller then jump immediately to the }
|
||||
{ ELSE-label }
|
||||
if first then
|
||||
begin
|
||||
if first then begin
|
||||
{ have we to ajust the first value ? }
|
||||
if (t^._low > get_min_value(left.resulttype.def)) then
|
||||
gensub(aint(t^._low));
|
||||
end
|
||||
else
|
||||
begin
|
||||
end else begin
|
||||
{ if there is no unused label between the last and the }
|
||||
{ present label then the lower limit can be checked }
|
||||
{ immediately. else check the range in between: }
|
||||
gensub(aint(t^._low - last));
|
||||
if ((t^._low - last) <> 1) or
|
||||
(not lastrange) then
|
||||
if ((t^._low - last) <> 1) or (not lastrange) then
|
||||
tcgppc(cg).a_jmp_cond(exprasmlist, jmp_lt, elselabel);
|
||||
end;
|
||||
gensub(aint(t^._high - t^._low));
|
||||
@ -130,11 +192,9 @@ var
|
||||
|
||||
begin
|
||||
{ do we need to generate cmps? }
|
||||
if (with_sign and (min_label < 0)) or
|
||||
(opsize = OS_32) then
|
||||
if (with_sign and (min_label < 0)) or (opsize = OS_32) then
|
||||
genlinearcmplist(hp)
|
||||
else
|
||||
begin
|
||||
else begin
|
||||
last := 0;
|
||||
lastrange := false;
|
||||
first := true;
|
||||
|
Loading…
Reference in New Issue
Block a user