From 9f16c34329410b03b01ae0f66c67e2f90c07b864 Mon Sep 17 00:00:00 2001 From: florian Date: Wed, 7 Nov 2018 22:02:58 +0000 Subject: [PATCH] + initial work for tls-based threadvar support on arm-linux git-svn-id: trunk@40267 - --- .gitattributes | 1 + compiler/aasmtai.pas | 8 ++- compiler/aggas.pas | 15 ++++- compiler/arm/aoptcpu.pas | 13 +++- compiler/arm/cgcpu.pas | 24 ++++++-- compiler/arm/cpunode.pas | 1 + compiler/arm/cpupi.pas | 8 +++ compiler/arm/narmld.pas | 97 ++++++++++++++++++++++++++++++ compiler/cgbase.pas | 9 ++- compiler/cgobj.pas | 8 +++ compiler/globtype.pas | 4 +- compiler/ncgld.pas | 4 +- compiler/nld.pas | 5 +- compiler/procinfo.pas | 13 ++++ compiler/psub.pas | 15 ++++- compiler/systems/i_linux.pas | 5 +- compiler/utils/ppuutils/ppudump.pp | 4 +- rtl/arm/arm.inc | 9 +++ 18 files changed, 223 insertions(+), 20 deletions(-) create mode 100644 compiler/arm/narmld.pas diff --git a/.gitattributes b/.gitattributes index 988f7cf721..389e515032 100644 --- a/.gitattributes +++ b/.gitattributes @@ -85,6 +85,7 @@ compiler/arm/narmcal.pas svneol=native#text/plain compiler/arm/narmcnv.pas svneol=native#text/plain compiler/arm/narmcon.pas svneol=native#text/plain compiler/arm/narminl.pas svneol=native#text/plain +compiler/arm/narmld.pas svneol=native#text/pascal compiler/arm/narmmat.pas svneol=native#text/plain compiler/arm/narmmem.pas svneol=native#text/plain compiler/arm/narmset.pas svneol=native#text/plain diff --git a/compiler/aasmtai.pas b/compiler/aasmtai.pas index 22a5b2f30a..a1c2de53e4 100644 --- a/compiler/aasmtai.pas +++ b/compiler/aasmtai.pas @@ -145,7 +145,11 @@ interface { offset of symbol's GOT slot in GOT } aitconst_got, { offset of symbol itself from GOT } - aitconst_gotoff_symbol + aitconst_gotoff_symbol, + { ARM TLS code } + aitconst_gottpoff, + aitconst_tpoff + ); tairealconsttype = ( @@ -1759,7 +1763,7 @@ implementation end; - constructor tai_const.Create_rel_sym_offset(_typ: taiconst_type; _sym,_endsym: tasmsymbol; _ofs: int64); + constructor tai_const.Create_rel_sym_offset(_typ: taiconst_type; _sym, _endsym: tasmsymbol; _ofs: int64); begin self.create_sym_offset(_sym,_ofs); consttype:=_typ; diff --git a/compiler/aggas.pas b/compiler/aggas.pas index add53a957d..c3b23b4dfb 100644 --- a/compiler/aggas.pas +++ b/compiler/aggas.pas @@ -347,9 +347,13 @@ implementation exit; end; - if (atype=sec_threadvar) and - (target_info.system in (systems_windows+systems_wince)) then - secname:='.tls'; + if atype=sec_threadvar then + begin + if (target_info.system in (systems_windows+systems_wince)) then + secname:='.tls' + else if (target_info.system in systems_linux) then + secname:='.tbss'; + end; { go32v2 stub only loads .text and .data sections, and allocates space for .bss. Thus, data which normally goes into .rodata and .rodata_norel sections must @@ -943,6 +947,11 @@ implementation WriteAixIntConst(tai_const(hp)); writer.AsmLn; end; + aitconst_gottpoff: + begin + writer.AsmWrite(#9'.word'#9+tai_const(hp).sym.name+'(gottpoff)+(.-'+tai_const(hp).endsym.name+tostr_with_plus(tai_const(hp).symofs)+')'); + writer.Asmln; + end; {$endif cpu64bitaddr} aitconst_got: begin diff --git a/compiler/arm/aoptcpu.pas b/compiler/arm/aoptcpu.pas index 20d6ed0284..879972c670 100644 --- a/compiler/arm/aoptcpu.pas +++ b/compiler/arm/aoptcpu.pas @@ -2516,6 +2516,15 @@ Implementation { TODO : schedule also forward } { TODO : schedule distance > 1 } + + { returns true if p might be a load of a pc relative tls offset } + function PossibleTLSLoad(const p: tai) : boolean; + begin + Result:=(p.typ=ait_instruction) and (taicpu(p).opcode=A_LDR) and (taicpu(p).oper[1]^.typ=top_ref) and (((taicpu(p).oper[1]^.ref^.base=NR_PC) and + (taicpu(p).oper[1]^.ref^.index<>NR_NO)) or ((taicpu(p).oper[1]^.ref^.base<>NR_NO) and + (taicpu(p).oper[1]^.ref^.index=NR_PC))); + end; + var hp1,hp2,hp3,hp4,hp5,insertpos : tai; list : TAsmList; @@ -2572,7 +2581,9 @@ Implementation ) and { if we modify the basereg AND the first instruction used that reg, we can not schedule } ((taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) or - not(instructionLoadsFromReg(taicpu(hp1).oper[1]^.ref^.base,p))) then + not(instructionLoadsFromReg(taicpu(hp1).oper[1]^.ref^.base,p))) and + not(PossibleTLSLoad(p)) and + not(PossibleTLSLoad(hp1)) then begin hp3:=tai(p.Previous); hp5:=tai(p.next); diff --git a/compiler/arm/cgcpu.pas b/compiler/arm/cgcpu.pas index 545c7085d3..677630003f 100644 --- a/compiler/arm/cgcpu.pas +++ b/compiler/arm/cgcpu.pas @@ -107,13 +107,15 @@ unit cgcpu; { try to generate optimized 32 Bit multiplication, returns true if successful generated } function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean; - { clear out potential overflow bits from 8 or 16 bit operations } - { the upper 24/16 bits of a register after an operation } + { clear out potential overflow bits from 8 or 16 bit operations + the upper 24/16 bits of a register after an operation } procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister); { mla for thumb requires that none of the registers is equal to r13/r15, this method ensures this } procedure safe_mla(list: TAsmList;op1,op2,op3,op4 : TRegister); + + procedure g_maybe_tls_init(list : TAsmList); override; end; { tcgarm is shared between normal arm and thumb-2 } @@ -2114,7 +2116,7 @@ unit cgcpu; end; end; end; - end; + end; end; @@ -2476,6 +2478,8 @@ unit cgcpu; a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg); indirection_done:=true; end + else if ref.refaddr=addr_gottpoff then + current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset)) else if (cs_create_pic in current_settings.moduleswitches) then if (tf_pic_uses_got in target_info.flags) then current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_got,ref.symbol)) @@ -3271,6 +3275,15 @@ unit cgcpu; end; + procedure tbasecgarm.g_maybe_tls_init(list : TAsmList); + begin + list.concat(tai_regalloc.alloc(NR_R0,nil)); + a_call_name(list,'fpc_read_tp',false); + a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_R0,current_procinfo.tlsoffset); + list.concat(tai_regalloc.dealloc(NR_R0,nil)); + end; + + procedure tcg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64); begin case op of @@ -5026,7 +5039,10 @@ unit cgcpu; cg.a_label(current_procinfo.aktlocaldata,l); tmpref.symboldata:=current_procinfo.aktlocaldata.last; - current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset)); + if ref.refaddr=addr_gottpoff then + current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset)) + else + current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset)); { load consts entry } tmpref.symbol:=l; diff --git a/compiler/arm/cpunode.pas b/compiler/arm/cpunode.pas index 3f3125575d..3995ce992d 100644 --- a/compiler/arm/cpunode.pas +++ b/compiler/arm/cpunode.pas @@ -38,6 +38,7 @@ unit cpunode; narmcal, narmmat, narminl, + narmld, narmcnv, narmcon, narmset, diff --git a/compiler/arm/cpupi.pas b/compiler/arm/cpupi.pas index aab6d3de24..a786d67eba 100644 --- a/compiler/arm/cpupi.pas +++ b/compiler/arm/cpupi.pas @@ -49,6 +49,8 @@ unit cpupi; procedure generate_parameter_info;override; procedure allocate_got_register(list : TAsmList);override; procedure postprocess_code;override; + + procedure allocate_tls_register(list : TAsmList);override; end; @@ -276,6 +278,12 @@ unit cpupi; finalizearmcode(aktproccode,aktlocaldata); end; + + procedure tcpuprocinfo.allocate_tls_register(list: TAsmList); + begin + current_procinfo.tlsoffset:=cg.getaddressregister(list); + end; + begin cprocinfo:=tcpuprocinfo; end. diff --git a/compiler/arm/narmld.pas b/compiler/arm/narmld.pas new file mode 100644 index 0000000000..532fc4c752 --- /dev/null +++ b/compiler/arm/narmld.pas @@ -0,0 +1,97 @@ +{ + Copyright (c) 1998-2018 by Florian Klaempfl + + Generate arm assembler for load nodes + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + **************************************************************************** +} +unit narmld; + +{$i fpcdefs.inc} + +interface + + uses + globtype, + symsym, + node,ncgld,pass_1,aasmbase; + + type + tarmloadnode = class(tcgloadnode) + procedure generate_threadvar_access(gvs : tstaticvarsym); override; + end; + + +implementation + + uses + globals,verbose, + cgbase,cgobj,cgutils, + aasmdata, + systems, + symcpu,symdef, + nld, + cpubase, + parabase, + procinfo; + +{***************************************************************************** + TI386LOADNODE +*****************************************************************************} + + procedure tarmloadnode.generate_threadvar_access(gvs: tstaticvarsym); + var + paraloc1 : tcgpara; + pd: tprocdef; + href: treference; + hregister : tregister; + handled: boolean; + l : TAsmLabel; + begin + handled:=false; + if tf_section_threadvars in target_info.flags then + begin + if target_info.system in [system_arm_linux] then + begin + if not(pi_uses_threadvar in current_procinfo.flags) then + internalerror(2012012101); + current_asmdata.getjumplabel(l); + reference_reset_symbol(href,current_asmdata.RefAsmSymbol(gvs.mangledname,AT_DATA),-8,sizeof(AInt),[]); + href.refaddr:=addr_gottpoff; + href.relsymbol:=l; + hregister:=cg.getaddressregister(current_asmdata.CurrAsmList); + cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,hregister); + cg.a_label(current_asmdata.CurrAsmList,l); + reference_reset(href,0,[]); + href.base:=NR_PC; + href.index:=hregister; + hregister:=cg.getaddressregister(current_asmdata.CurrAsmList); + cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,href,hregister); + location.reference.base:=current_procinfo.tlsoffset; + location.reference.index:=hregister; + handled:=true; + end; + end; + + if not handled then + inherited; + end; + + +begin + cloadnode:=tarmloadnode; +end. diff --git a/compiler/cgbase.pas b/compiler/cgbase.pas index e7abf39926..d6b27a15c4 100644 --- a/compiler/cgbase.pas +++ b/compiler/cgbase.pas @@ -63,8 +63,6 @@ interface TCGNonRefLoc=low(TCGLoc)..pred(LOC_CREFERENCE); TCGRefLoc=LOC_CREFERENCE..LOC_REFERENCE; - { since we have only 16bit offsets, we need to be able to specify the high - and lower 16 bits of the address of a symbol of up to 64 bit } trefaddr = ( addr_no, addr_full, @@ -72,6 +70,8 @@ interface addr_pic_no_got {$IF defined(POWERPC) or defined(POWERPC64) or defined(SPARC) or defined(MIPS) or defined(SPARC64)} , + { since we have only 16bit offsets, we need to be able to specify the high + and lower 16 bits of the address of a symbol of up to 64 bit } addr_low, // bits 48-63 addr_high, // bits 32-47 {$IF defined(POWERPC64)} @@ -122,6 +122,11 @@ interface ,addr_gdop_hix22 ,addr_gdop_lox22 {$endif SPARC64} + {$IFDEF ARM} + ,addr_gottpoff + ,addr_tpoff + {$ENDIF} + ); diff --git a/compiler/cgobj.pas b/compiler/cgobj.pas index bd70437aea..de7957b07f 100644 --- a/compiler/cgobj.pas +++ b/compiler/cgobj.pas @@ -437,6 +437,8 @@ unit cgobj; { initialize the pic/got register } procedure g_maybe_got_init(list: TAsmList); virtual; + { initialize the tls register if needed } + procedure g_maybe_tls_init(list : TAsmList); virtual; { allocallcpuregisters, a_call_name, deallocallcpuregisters sequence } procedure g_call(list: TAsmList; const s: string); { Generate code to exit an unwind-protected region. The default implementation @@ -2781,6 +2783,12 @@ implementation begin end; + + procedure tcg.g_maybe_tls_init(list: TAsmList); + begin + end; + + procedure tcg.g_call(list: TAsmList;const s: string); begin allocallcpuregisters(list); diff --git a/compiler/globtype.pas b/compiler/globtype.pas index d6f874d975..2ef524b8c3 100644 --- a/compiler/globtype.pas +++ b/compiler/globtype.pas @@ -700,7 +700,9 @@ interface for i8086 cpu huge memory model, as this changes SP register it requires special handling to restore DS segment register } - pi_has_open_array_parameter + pi_has_open_array_parameter, + { subroutine uses threadvars } + pi_uses_threadvar ); tprocinfoflags=set of tprocinfoflag; diff --git a/compiler/ncgld.pas b/compiler/ncgld.pas index c1058887d7..a152b2ab14 100644 --- a/compiler/ncgld.pas +++ b/compiler/ncgld.pas @@ -493,8 +493,8 @@ implementation reference_reset_symbol(location.reference,current_asmdata.WeakRefAsmSymbol(gvs.mangledname,AT_DATA),0,location.reference.alignment,[]) end else - location:=gvs.localloc; - end; + location:=gvs.localloc; + end; { make const a LOC_CREFERENCE } if (gvs.varspez=vs_const) and diff --git a/compiler/nld.pas b/compiler/nld.pas index 262271ae51..006f831aa0 100644 --- a/compiler/nld.pas +++ b/compiler/nld.pas @@ -429,7 +429,10 @@ implementation include(current_procinfo.flags,pi_needs_got); { call to get address of threadvar } if (vo_is_thread_var in tabstractvarsym(symtableentry).varoptions) then - include(current_procinfo.flags,pi_do_call); + begin + include(current_procinfo.flags,pi_do_call); + include(current_procinfo.flags,pi_uses_threadvar); + end; end; procsym : begin diff --git a/compiler/procinfo.pas b/compiler/procinfo.pas index 97cd4a33f4..7e2fe97f25 100644 --- a/compiler/procinfo.pas +++ b/compiler/procinfo.pas @@ -95,6 +95,11 @@ unit procinfo; got : tregister; CurrGOTLabel : tasmlabel; + { register containing the tlsoffset } + tlsoffset : tregister; + { reference label for tls addresses } + tlslabel : tasmlabel; + { Holds the reference used to store all saved registers. } save_regs_ref : treference; @@ -150,6 +155,9 @@ unit procinfo; { Allocate got register } procedure allocate_got_register(list: TAsmList);virtual; + { Allocate tls register } + procedure allocate_tls_register(list: TAsmList);virtual; + { get frame pointer } procedure init_framepointer; virtual; @@ -288,6 +296,11 @@ implementation { most os/cpu combo's don't use this yet, so not yet abstract } end; + procedure tprocinfo.allocate_tls_register(list : TAsmList); + begin + end; + + procedure tprocinfo.init_framepointer; begin { most targets use a constant, but some have a typed constant that must diff --git a/compiler/psub.pas b/compiler/psub.pas index d167d6650c..97810ab588 100644 --- a/compiler/psub.pas +++ b/compiler/psub.pas @@ -1451,6 +1451,9 @@ implementation { allocate got register if needed } allocate_got_register(aktproccode); + if pi_uses_threadvar in flags then + allocate_tls_register(aktproccode); + { Allocate space in temp/registers for parast and localst } current_filepos:=entrypos; gen_alloc_symtable(aktproccode,procdef,procdef.parast); @@ -1561,6 +1564,10 @@ implementation (got<>NR_NO) then cg.a_reg_sync(aktproccode,got); + if (pi_uses_threadvar in flags) and + (tlsoffset<>NR_NO) then + cg.a_reg_sync(aktproccode,tlsoffset); + gen_free_symtable(aktproccode,procdef.localst); gen_free_symtable(aktproccode,procdef.parast); @@ -1579,7 +1586,7 @@ implementation begin current_filepos:=entrypos; hlcg.gen_stack_check_call(templist); - aktproccode.insertlistafter(stackcheck_asmnode.currenttai,templist) + aktproccode.insertlistafter(stackcheck_asmnode.currenttai,templist); end; { this code (got loading) comes before everything which has } @@ -1599,9 +1606,13 @@ implementation current_filepos:=entrypos; { load got if necessary } cg.g_maybe_got_init(templist); - aktproccode.insertlistafter(headertai,templist); + if pi_uses_threadvar in flags then + cg.g_maybe_tls_init(templist); + aktproccode.insertlistafter(stackcheck_asmnode.currenttai,templist); + + { re-enable if more code at the end is ever generated here cg.set_regalloc_live_range_direction(rad_forward); } diff --git a/compiler/systems/i_linux.pas b/compiler/systems/i_linux.pas index dbd2804936..14dc6016fb 100644 --- a/compiler/systems/i_linux.pas +++ b/compiler/systems/i_linux.pas @@ -589,7 +589,7 @@ unit i_linux; name : 'Linux for ARMHF'; shortname : 'Linux'; flags : [tf_needs_symbol_size,tf_needs_symbol_type,tf_files_case_sensitive, - tf_requires_proper_alignment, + tf_requires_proper_alignment,tf_section_threadvars, tf_smartlink_sections,tf_pic_uses_got, tf_has_winlike_resources]; cpu : cpu_arm; @@ -660,6 +660,9 @@ unit i_linux; shortname : 'Linux'; flags : [tf_needs_symbol_size,tf_needs_symbol_type,tf_files_case_sensitive, tf_requires_proper_alignment, +{$ifdef tls_threadvars} + tf_section_threadvars, +{$endif tls_threadvars} tf_smartlink_sections,tf_pic_uses_got, tf_has_winlike_resources]; cpu : cpu_arm; diff --git a/compiler/utils/ppuutils/ppudump.pp b/compiler/utils/ppuutils/ppudump.pp index 4efacf519f..882e0f3b6a 100644 --- a/compiler/utils/ppuutils/ppudump.pp +++ b/compiler/utils/ppuutils/ppudump.pp @@ -1354,7 +1354,9 @@ const (mask:pi_calls_c_varargs; str:' calls function with C-style varargs '), (mask:pi_has_open_array_parameter; - str:' has open array parameter ') + str:' has open array parameter '), + (mask:pi_uses_threadvar; + str:' uses threadvars ') ); var procinfooptions : tprocinfoflags; diff --git a/rtl/arm/arm.inc b/rtl/arm/arm.inc index 3ff4e744ec..672968d449 100644 --- a/rtl/arm/arm.inc +++ b/rtl/arm/arm.inc @@ -96,6 +96,15 @@ begin end; {$endif wince} +{$ifdef linux} +function fpc_read_tp : pointer; [public, alias: 'fpc_read_tp'];assembler; nostackframe; +asm + // Helper is located at 0xffff0fe0 + mvn r0,#0x0000f000 // mov r0, #0xffff0fff + sub pc,r0,#0x1f // Jump to helper +end; +{$endif linux} + {**************************************************************************** stack frame related stuff ****************************************************************************}