{ Copyright (c) 2015 by Jonas Maebe Generate AArch64 assembler for in set/case nodes This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. **************************************************************************** } unit ncpuset; {$i fpcdefs.inc} interface uses node,nset,ncgset,cpubase,cgbase,cgobj,aasmbase,aasmtai,aasmdata,globtype; type taarch64casenode = class(tcgcasenode) protected procedure optimizevalues(var max_linear_list: int64; var max_dist: qword);override; function has_jumptable: boolean;override; procedure genjumptable(hp: pcaselabel ;min_, max_: int64);override; procedure genlinearlist(hp: pcaselabel);override; end; implementation uses systems, verbose,globals,constexp,cutils, symconst,symdef,defutil, paramgr, cpuinfo, pass_2,cgcpu, ncon, tgobj,ncgutil,rgobj,aasmcpu, procinfo, cgutils; {***************************************************************************** TCGCASENODE *****************************************************************************} procedure taarch64casenode.optimizevalues(var max_linear_list: int64; var max_dist: qword); begin max_linear_list:=10; end; function taarch64casenode.has_jumptable: boolean; begin has_jumptable:=true; end; procedure taarch64casenode.genlinearlist(hp : pcaselabel); var first : boolean; lastrange : boolean; last : TConstExprInt; cond_lt,cond_le : tresflags; opcgsize, unsigned_opcgsize: tcgsize; procedure genitem(t : pcaselabel); var ovloc: tlocation; begin if assigned(t^.less) then genitem(t^.less); { need we to test the first value } if first and (t^._low>get_min_value(left.resultdef)) then begin cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opcgsize,jmp_lt,aint(t^._low.svalue),hregister,elselabel); end; if t^._low=t^._high then begin if t^._low-last=0 then cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList, opcgsize, OC_EQ,0,hregister,blocklabel(t^.blockid)) else begin { use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because then genlinearlist wouldn't be used } cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList, OP_SUB, unsigned_opcgsize, aint(t^._low.svalue-last.svalue), hregister, hregister, true,ovloc); cg.a_jmp_flags(current_asmdata.CurrAsmList,F_EQ,blocklabel(t^.blockid)); end; last:=t^._low; lastrange:=false; end else begin { it begins with the smallest label, if the value } { is even smaller then jump immediately to the } { ELSE-label } if first then begin { have we to ajust the first value ? } if (t^._low>get_min_value(left.resultdef)) or (get_min_value(left.resultdef)<>0) then begin { use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because then genlinearlist wouldn't be use } cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList, OP_SUB, unsigned_opcgsize, aint(t^._low.svalue), hregister, hregister, true,ovloc); end; end else begin { if there is no unused label between the last and the } { present label then the lower limit can be checked } { immediately. else check the range in between: } { use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because then genlinearlist wouldn't be use } cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList, OP_SUB, unsigned_opcgsize, aint(t^._low.svalue - last.svalue), hregister, hregister, true,ovloc); { no jump necessary here if the new range starts at } { at the value following the previous one } if (aint(t^._low.svalue - last.svalue) <> 1) or (not lastrange) then cg.a_jmp_flags(current_asmdata.CurrAsmList,cond_lt,elselabel); end; { use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because then genlinearlist wouldn't be use } cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,unsigned_opcgsize,aint(t^._high.svalue - t^._low.svalue), hregister, hregister, true,ovloc); cg.a_jmp_flags(current_asmdata.CurrAsmList,cond_le,blocklabel(t^.blockid)); last:=t^._high; lastrange:=true; end; first:=false; if assigned(t^.greater) then genitem(t^.greater); end; begin opcgsize:=def_cgsize(opsize); case opcgsize of OS_8,OS_16,OS_32,OS_S8,OS_S16,OS_S32: unsigned_opcgsize:=OS_32; OS_64,OS_S64: unsigned_opcgsize:=OS_64; else Internalerror(2019090902); end; if with_sign then begin cond_lt:=F_LT; cond_le:=F_LE; end else begin cond_lt:=F_CC; cond_le:=F_LS; end; { do we need to generate cmps? } if (with_sign and (min_label<0)) then genlinearcmplist(hp) else begin last:=0; lastrange:=false; first:=true; genitem(hp); cg.a_jmp_always(current_asmdata.CurrAsmList,elselabel); end; end; procedure taarch64casenode.genjumptable(hp: pcaselabel; min_, max_: int64); var last: TConstExprInt; tablelabel: TAsmLabel; basereg,indexreg,jumpreg: TRegister; href: TReference; opcgsize: tcgsize; sectype: TAsmSectiontype; jtitemconsttype: taiconst_type; procedure genitem(list:TAsmList;t : pcaselabel); var i : int64; begin if assigned(t^.less) then genitem(list,t^.less); { fill possible hole } i:=last.svalue+1; while i<=t^._low.svalue-1 do begin if target_info.system=system_aarch64_win64 then list.concat(Tai_const.Create_sym(elselabel)) else list.concat(Tai_const.Create_rel_sym(jtitemconsttype,tablelabel,elselabel)); inc(i); end; i:=t^._low.svalue; while i<=t^._high.svalue do begin if target_info.system=system_aarch64_win64 then list.concat(Tai_const.Create_sym(blocklabel(t^.blockid))) else list.concat(Tai_const.Create_rel_sym(jtitemconsttype,tablelabel,blocklabel(t^.blockid))); inc(i); end; last:=t^._high; if assigned(t^.greater) then genitem(list,t^.greater); end; begin if not(target_info.system in systems_darwin) then jtitemconsttype:=aitconst_32bit else { see https://gmplib.org/list-archives/gmp-bugs/2012-December/002836.html } jtitemconsttype:=aitconst_darwin_dwarf_delta32; last:=min_; opcgsize:=def_cgsize(opsize); { a <= x <= b <-> unsigned(x-a) <= (b-a) } cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SUB,opcgsize,aint(min_),hregister); if not(jumptable_no_range) then begin { case expr greater than max_ => goto elselabel } cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opcgsize,OC_A,aint(max_)-aint(min_),hregister,elselabel); min_:=0; end; if target_info.system=system_aarch64_win64 then current_asmdata.getstaticdatalabel(tablelabel) else { local label in order to avoid using GOT } current_asmdata.getlabel(tablelabel,alt_data); indexreg:=cg.makeregsize(current_asmdata.CurrAsmList,hregister,OS_ADDR); cg.a_load_reg_reg(current_asmdata.CurrAsmList,opcgsize,OS_ADDR,hregister,indexreg); { load table address } reference_reset_symbol(href,tablelabel,0,4,[]); basereg:=cg.getaddressregister(current_asmdata.CurrAsmList); cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,basereg); { load the slot } jumpreg:=cg.getaddressregister(current_asmdata.CurrAsmList); if target_info.system=system_aarch64_win64 then reference_reset_base(href,basereg,0,href.temppos,sizeof(aint),[]) else reference_reset_base(href,basereg,0,href.temppos,4,[]); href.index:=indexreg; href.shiftmode:=SM_LSL; if target_info.system=system_aarch64_win64 then begin { Use a 64-bit absolute table under aarch64-win64 } href.shiftimm:=3; cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,href,jumpreg); end else begin { load table slot, 32-bit sign extended } href.shiftimm:=2; cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_S32,OS_ADDR,href,jumpreg); { add table address } cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_ADD,OS_ADDR,basereg,jumpreg); end; { and finally jump } current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_BR,jumpreg)); { generate jump table } if target_info.system=system_aarch64_win64 then begin { For windows, it has to be in a data section otherwise an access violation will occur, but also full 64-bit references to avoid problems with relative references } sectype:=sec_rodata; new_section(current_procinfo.aktlocaldata,sectype,lower(current_procinfo.procdef.mangledname),sizeof(aint)); end else begin if not(target_info.system in systems_darwin) then sectype:=sec_rodata else begin { on Mac OS X, dead code stripping ("smart linking") happens based on global symbols: every global/static symbol (symbols that do not start with "L") marks the start of a new "subsection" that is discarded by the linker if there are no references to this symbol. This means that if you put the jump table in the rodata section, it will become part of the block of data associated with the previous non-L-label in the rodata section and stay or be thrown away depending on whether that block of data is referenced. Therefore, jump tables must be added in the code section and since aktlocaldata is inserted right after the routine, it will become part of the same subsection that contains the routine's code } sectype:=sec_code; end; new_section(current_procinfo.aktlocaldata,sectype,current_procinfo.procdef.mangledname,4); end; if target_info.system in systems_darwin then begin { additionally, these tables are now marked via ".data_region jt32" and ".end_data_region" } current_procinfo.aktlocaldata.concat(tai_directive.Create(asd_data_region,'jt32')); end; current_procinfo.aktlocaldata.concat(Tai_label.Create(tablelabel)); genitem(current_procinfo.aktlocaldata,hp); if target_info.system in systems_darwin then current_procinfo.aktlocaldata.concat(tai_directive.Create(asd_end_data_region,'')); end; begin ccasenode:=taarch64casenode; end.