mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-05 11:18:18 +02:00
324 lines
13 KiB
ObjectPascal
324 lines
13 KiB
ObjectPascal
{
|
|
Copyright (c) 2015 by Jonas Maebe
|
|
|
|
Generate AArch64 assembler for in set/case nodes
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
|
|
****************************************************************************
|
|
}
|
|
unit ncpuset;
|
|
|
|
{$i fpcdefs.inc}
|
|
|
|
interface
|
|
|
|
uses
|
|
node,nset,ncgset,cpubase,cgbase,cgobj,aasmbase,aasmtai,aasmdata,globtype;
|
|
|
|
type
|
|
taarch64casenode = class(tcgcasenode)
|
|
protected
|
|
procedure optimizevalues(var max_linear_list: int64; var max_dist: qword);override;
|
|
function has_jumptable: boolean;override;
|
|
procedure genjumptable(hp: pcaselabel ;min_, max_: int64);override;
|
|
procedure genlinearlist(hp: pcaselabel);override;
|
|
end;
|
|
|
|
|
|
implementation
|
|
|
|
uses
|
|
systems,
|
|
verbose,globals,constexp,cutils,
|
|
symconst,symdef,defutil,
|
|
paramgr,
|
|
cpuinfo,
|
|
pass_2,cgcpu,
|
|
ncon,
|
|
tgobj,ncgutil,rgobj,aasmcpu,
|
|
procinfo,
|
|
cgutils;
|
|
|
|
{*****************************************************************************
|
|
TCGCASENODE
|
|
*****************************************************************************}
|
|
|
|
|
|
procedure taarch64casenode.optimizevalues(var max_linear_list: int64; var max_dist: qword);
|
|
begin
|
|
max_linear_list:=10;
|
|
end;
|
|
|
|
|
|
function taarch64casenode.has_jumptable: boolean;
|
|
begin
|
|
has_jumptable:=true;
|
|
end;
|
|
|
|
|
|
procedure taarch64casenode.genlinearlist(hp : pcaselabel);
|
|
var
|
|
first : boolean;
|
|
lastrange : boolean;
|
|
last : TConstExprInt;
|
|
cond_lt,cond_le : tresflags;
|
|
opcgsize, unsigned_opcgsize: tcgsize;
|
|
|
|
procedure genitem(t : pcaselabel);
|
|
var
|
|
ovloc: tlocation;
|
|
begin
|
|
if assigned(t^.less) then
|
|
genitem(t^.less);
|
|
{ need we to test the first value }
|
|
if first and (t^._low>get_min_value(left.resultdef)) then
|
|
begin
|
|
cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opcgsize,jmp_lt,aint(t^._low.svalue),hregister,elselabel);
|
|
end;
|
|
if t^._low=t^._high then
|
|
begin
|
|
if t^._low-last=0 then
|
|
cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList, opcgsize, OC_EQ,0,hregister,blocklabel(t^.blockid))
|
|
else
|
|
begin
|
|
{ use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because
|
|
then genlinearlist wouldn't be used }
|
|
cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList, OP_SUB, unsigned_opcgsize, aint(t^._low.svalue-last.svalue), hregister, hregister,
|
|
true,ovloc);
|
|
cg.a_jmp_flags(current_asmdata.CurrAsmList,F_EQ,blocklabel(t^.blockid));
|
|
end;
|
|
last:=t^._low;
|
|
lastrange:=false;
|
|
end
|
|
else
|
|
begin
|
|
{ it begins with the smallest label, if the value }
|
|
{ is even smaller then jump immediately to the }
|
|
{ ELSE-label }
|
|
if first then
|
|
begin
|
|
{ have we to ajust the first value ? }
|
|
if (t^._low>get_min_value(left.resultdef)) or (get_min_value(left.resultdef)<>0) then
|
|
begin
|
|
{ use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because
|
|
then genlinearlist wouldn't be use }
|
|
cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList, OP_SUB, unsigned_opcgsize, aint(t^._low.svalue), hregister, hregister,
|
|
true,ovloc);
|
|
end;
|
|
end
|
|
else
|
|
begin
|
|
{ if there is no unused label between the last and the }
|
|
{ present label then the lower limit can be checked }
|
|
{ immediately. else check the range in between: }
|
|
|
|
{ use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because
|
|
then genlinearlist wouldn't be use }
|
|
cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList, OP_SUB, unsigned_opcgsize, aint(t^._low.svalue - last.svalue), hregister, hregister,
|
|
true,ovloc);
|
|
{ no jump necessary here if the new range starts at }
|
|
{ at the value following the previous one }
|
|
if (aint(t^._low.svalue - last.svalue) <> 1) or
|
|
(not lastrange) then
|
|
cg.a_jmp_flags(current_asmdata.CurrAsmList,cond_lt,elselabel);
|
|
end;
|
|
{ use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because
|
|
then genlinearlist wouldn't be use }
|
|
cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,unsigned_opcgsize,aint(t^._high.svalue - t^._low.svalue), hregister, hregister,
|
|
true,ovloc);
|
|
cg.a_jmp_flags(current_asmdata.CurrAsmList,cond_le,blocklabel(t^.blockid));
|
|
|
|
last:=t^._high;
|
|
lastrange:=true;
|
|
end;
|
|
first:=false;
|
|
if assigned(t^.greater) then
|
|
genitem(t^.greater);
|
|
end;
|
|
|
|
begin
|
|
opcgsize:=def_cgsize(opsize);
|
|
case opcgsize of
|
|
OS_8,OS_16,OS_32,OS_S8,OS_S16,OS_S32:
|
|
unsigned_opcgsize:=OS_32;
|
|
OS_64,OS_S64:
|
|
unsigned_opcgsize:=OS_64;
|
|
else
|
|
Internalerror(2019090902);
|
|
end;
|
|
if with_sign then
|
|
begin
|
|
cond_lt:=F_LT;
|
|
cond_le:=F_LE;
|
|
end
|
|
else
|
|
begin
|
|
cond_lt:=F_CC;
|
|
cond_le:=F_LS;
|
|
end;
|
|
{ do we need to generate cmps? }
|
|
if (with_sign and (min_label<0)) then
|
|
genlinearcmplist(hp)
|
|
else
|
|
begin
|
|
last:=0;
|
|
lastrange:=false;
|
|
first:=true;
|
|
genitem(hp);
|
|
cg.a_jmp_always(current_asmdata.CurrAsmList,elselabel);
|
|
end;
|
|
end;
|
|
|
|
|
|
procedure taarch64casenode.genjumptable(hp: pcaselabel; min_, max_: int64);
|
|
var
|
|
last: TConstExprInt;
|
|
tablelabel: TAsmLabel;
|
|
basereg,indexreg,jumpreg: TRegister;
|
|
href: TReference;
|
|
opcgsize: tcgsize;
|
|
sectype: TAsmSectiontype;
|
|
jtitemconsttype: taiconst_type;
|
|
|
|
procedure genitem(list:TAsmList;t : pcaselabel);
|
|
var
|
|
i : int64;
|
|
begin
|
|
if assigned(t^.less) then
|
|
genitem(list,t^.less);
|
|
{ fill possible hole }
|
|
i:=last.svalue+1;
|
|
while i<=t^._low.svalue-1 do
|
|
begin
|
|
if target_info.system=system_aarch64_win64 then
|
|
list.concat(Tai_const.Create_sym(elselabel))
|
|
else
|
|
list.concat(Tai_const.Create_rel_sym(jtitemconsttype,tablelabel,elselabel));
|
|
inc(i);
|
|
end;
|
|
i:=t^._low.svalue;
|
|
while i<=t^._high.svalue do
|
|
begin
|
|
if target_info.system=system_aarch64_win64 then
|
|
list.concat(Tai_const.Create_sym(blocklabel(t^.blockid)))
|
|
else
|
|
list.concat(Tai_const.Create_rel_sym(jtitemconsttype,tablelabel,blocklabel(t^.blockid)));
|
|
inc(i);
|
|
end;
|
|
last:=t^._high;
|
|
if assigned(t^.greater) then
|
|
genitem(list,t^.greater);
|
|
end;
|
|
|
|
begin
|
|
if not(target_info.system in systems_darwin) then
|
|
jtitemconsttype:=aitconst_32bit
|
|
else
|
|
{ see https://gmplib.org/list-archives/gmp-bugs/2012-December/002836.html }
|
|
jtitemconsttype:=aitconst_darwin_dwarf_delta32;
|
|
|
|
last:=min_;
|
|
opcgsize:=def_cgsize(opsize);
|
|
{ a <= x <= b <-> unsigned(x-a) <= (b-a) }
|
|
cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SUB,opcgsize,aint(min_),hregister);
|
|
if not(jumptable_no_range) then
|
|
begin
|
|
{ case expr greater than max_ => goto elselabel }
|
|
cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opcgsize,OC_A,aint(max_)-aint(min_),hregister,elselabel);
|
|
min_:=0;
|
|
end;
|
|
if target_info.system=system_aarch64_win64 then
|
|
current_asmdata.getstaticdatalabel(tablelabel)
|
|
else
|
|
{ local label in order to avoid using GOT }
|
|
current_asmdata.getlabel(tablelabel,alt_data);
|
|
indexreg:=cg.makeregsize(current_asmdata.CurrAsmList,hregister,OS_ADDR);
|
|
cg.a_load_reg_reg(current_asmdata.CurrAsmList,opcgsize,OS_ADDR,hregister,indexreg);
|
|
{ load table address }
|
|
reference_reset_symbol(href,tablelabel,0,4,[]);
|
|
basereg:=cg.getaddressregister(current_asmdata.CurrAsmList);
|
|
cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,basereg);
|
|
{ load the slot }
|
|
jumpreg:=cg.getaddressregister(current_asmdata.CurrAsmList);
|
|
if target_info.system=system_aarch64_win64 then
|
|
reference_reset_base(href,basereg,0,href.temppos,sizeof(aint),[])
|
|
else
|
|
reference_reset_base(href,basereg,0,href.temppos,4,[]);
|
|
href.index:=indexreg;
|
|
href.shiftmode:=SM_LSL;
|
|
if target_info.system=system_aarch64_win64 then
|
|
begin
|
|
{ Use a 64-bit absolute table under aarch64-win64 }
|
|
href.shiftimm:=3;
|
|
cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,href,jumpreg);
|
|
end
|
|
else
|
|
begin
|
|
{ load table slot, 32-bit sign extended }
|
|
href.shiftimm:=2;
|
|
cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_S32,OS_ADDR,href,jumpreg);
|
|
{ add table address }
|
|
cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_ADD,OS_ADDR,basereg,jumpreg);
|
|
end;
|
|
{ and finally jump }
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_BR,jumpreg));
|
|
{ generate jump table }
|
|
if target_info.system=system_aarch64_win64 then
|
|
begin
|
|
{ For windows, it has to be in a data section otherwise an access violation
|
|
will occur, but also full 64-bit references to avoid problems with
|
|
relative references }
|
|
sectype:=sec_rodata;
|
|
new_section(current_procinfo.aktlocaldata,sectype,lower(current_procinfo.procdef.mangledname),sizeof(aint));
|
|
end
|
|
else
|
|
begin
|
|
if not(target_info.system in systems_darwin) then
|
|
sectype:=sec_rodata
|
|
else
|
|
begin
|
|
{ on Mac OS X, dead code stripping ("smart linking") happens based on
|
|
global symbols: every global/static symbol (symbols that do not
|
|
start with "L") marks the start of a new "subsection" that is
|
|
discarded by the linker if there are no references to this symbol.
|
|
This means that if you put the jump table in the rodata section, it
|
|
will become part of the block of data associated with the previous
|
|
non-L-label in the rodata section and stay or be thrown away
|
|
depending on whether that block of data is referenced. Therefore,
|
|
jump tables must be added in the code section and since aktlocaldata
|
|
is inserted right after the routine, it will become part of the
|
|
same subsection that contains the routine's code }
|
|
sectype:=sec_code;
|
|
end;
|
|
new_section(current_procinfo.aktlocaldata,sectype,current_procinfo.procdef.mangledname,4);
|
|
end;
|
|
if target_info.system in systems_darwin then
|
|
begin
|
|
{ additionally, these tables are now marked via ".data_region jt32"
|
|
and ".end_data_region" }
|
|
current_procinfo.aktlocaldata.concat(tai_directive.Create(asd_data_region,'jt32'));
|
|
end;
|
|
current_procinfo.aktlocaldata.concat(Tai_label.Create(tablelabel));
|
|
genitem(current_procinfo.aktlocaldata,hp);
|
|
if target_info.system in systems_darwin then
|
|
current_procinfo.aktlocaldata.concat(tai_directive.Create(asd_end_data_region,''));
|
|
end;
|
|
|
|
|
|
begin
|
|
ccasenode:=taarch64casenode;
|
|
end.
|