From 597a23d278f2c870dbf0a5d7fadb83908b365ce0 Mon Sep 17 00:00:00 2001 From: florian Date: Sun, 27 Jan 2019 09:37:25 +0000 Subject: [PATCH] + tls support for x86_64-linux (not yet enabled by default) git-svn-id: trunk@41081 - --- compiler/cgbase.pas | 4 + compiler/systems/i_linux.pas | 3 + compiler/x86/agx86att.pas | 11 +- compiler/x86/cgx86.pas | 50 +++++ compiler/x86/nx86ld.pas | 39 ++++ compiler/x86_64/cpunode.pas | 1 + rtl/linux/si_impl.inc | 5 + rtl/linux/system.pp | 341 +++++++++++++++++++++-------------- rtl/linux/x86_64/si_prc.inc | 13 +- 9 files changed, 331 insertions(+), 136 deletions(-) diff --git a/compiler/cgbase.pas b/compiler/cgbase.pas index 43d2b5fe38..fe763b881d 100644 --- a/compiler/cgbase.pas +++ b/compiler/cgbase.pas @@ -130,6 +130,10 @@ interface ,addr_ntpoff ,addr_tlsgd {$ENDIF} +{$ifdef x86_64} + ,addr_tpoff + ,addr_tlsgd +{$endif x86_64} ); diff --git a/compiler/systems/i_linux.pas b/compiler/systems/i_linux.pas index 3da6cb7813..eb363158a7 100644 --- a/compiler/systems/i_linux.pas +++ b/compiler/systems/i_linux.pas @@ -380,6 +380,9 @@ unit i_linux; name : 'Linux for x86-64'; shortname : 'Linux'; flags : [tf_smartlink_sections,tf_needs_symbol_size,tf_needs_dwarf_cfi, +{$ifdef tls_threadvars} + tf_section_threadvars, +{$endif tls_threadvars} tf_library_needs_pic,tf_needs_symbol_type,tf_files_case_sensitive, tf_has_winlike_resources,tf_safecall_exceptions,tf_safecall_clearstack]; cpu : cpu_x86_64; diff --git a/compiler/x86/agx86att.pas b/compiler/x86/agx86att.pas index 9cb98ecbd5..3d5bd5a788 100644 --- a/compiler/x86/agx86att.pas +++ b/compiler/x86/agx86att.pas @@ -185,6 +185,12 @@ interface addr_tlsgd: owner.writer.AsmWrite('@tlsgd'); {$endif i386} +{$ifdef x86_64} + addr_tpoff: + owner.writer.AsmWrite('@tpoff'); + addr_tlsgd: + owner.writer.AsmWrite('@tlsgd'); +{$endif x86_64} end; if offset<0 then @@ -231,7 +237,10 @@ interface else owner.writer.AsmWrite(gas_regname(o.reg)); top_ref : - if o.ref^.refaddr in [addr_no,addr_pic,addr_pic_no_got{$ifdef i386},addr_ntpoff,addr_tlsgd{$endif i386}] then + if o.ref^.refaddr in [addr_no,addr_pic,addr_pic_no_got + {$ifdef i386},addr_ntpoff,addr_tlsgd{$endif i386} + {$ifdef x86_64},addr_tpoff,addr_tlsgd{$endif x86_64} + ] then WriteReference(o.ref^) else begin diff --git a/compiler/x86/cgx86.pas b/compiler/x86/cgx86.pas index fc6a488d0f..00036e86dc 100644 --- a/compiler/x86/cgx86.pas +++ b/compiler/x86/cgx86.pas @@ -1140,6 +1140,38 @@ unit cgx86; end; end; {$endif i386} +{$ifdef x86_64} + if refaddr=addr_tpoff then + begin + { Convert thread local address to a process global addres + as we cannot handle far pointers.} + case target_info.system of + system_x86_64_linux: + if segment=NR_FS then + begin + reference_reset(tmpref,1,[]); + tmpref.segment:=NR_FS; + tmpreg:=getaddressregister(list); + a_load_ref_reg(list,OS_ADDR,OS_ADDR,tmpref,tmpreg); + reference_reset(tmpref,1,[]); + tmpref.symbol:=symbol; + tmpref.refaddr:=refaddr; + tmpref.base:=tmpreg; + if base<>NR_NO then + tmpref.index:=base; + list.concat(Taicpu.op_ref_reg(A_LEA,tcgsize2opsize[OS_ADDR],tmpref,tmpreg)); + segment:=NR_NO; + base:=tmpreg; + symbol:=nil; + refaddr:=addr_no; + end + else + Internalerror(2019012003); + else + Internalerror(2019012004); + end; + end; +{$endif x86_64} if (base=NR_NO) and (index=NR_NO) then begin if assigned(dirref.symbol) then @@ -2814,6 +2846,24 @@ unit cgx86; dstref.base:=r; end; {$endif i386} +{$ifdef x86_64} + { we could handle "far" pointers here, but reloading es/ds is probably much slower + than just resolving the tls segment } + if (srcref.refaddr=addr_tpoff) and (srcref.segment=NR_FS) then + begin + r:=getaddressregister(list); + a_loadaddr_ref_reg(list,srcref,r); + reference_reset(srcref,srcref.alignment,srcref.volatility); + srcref.base:=r; + end; + if (dstref.refaddr=addr_tpoff) and (dstref.segment=NR_FS) then + begin + r:=getaddressregister(list); + a_loadaddr_ref_reg(list,dstref,r); + reference_reset(dstref,dstref.alignment,dstref.volatility); + dstref.base:=r; + end; +{$endif x86_64} cm:=copy_move; helpsize:=3*sizeof(aword); if cs_opt_size in current_settings.optimizerswitches then diff --git a/compiler/x86/nx86ld.pas b/compiler/x86/nx86ld.pas index faa14b697b..5e8d5dad5b 100644 --- a/compiler/x86/nx86ld.pas +++ b/compiler/x86/nx86ld.pas @@ -121,6 +121,45 @@ implementation end; end; {$endif i386} +{$ifdef x86_64} + case target_info.system of + system_x86_64_linux: + begin + case current_settings.tlsmodel of + tlsm_local: + begin + location.reference.segment:=NR_FS; + location.reference.refaddr:=addr_tpoff; + end; + tlsm_general: + begin + if not(cs_create_pic in current_settings.moduleswitches) then + Internalerror(2019012001); + + current_asmdata.CurrAsmList.concat(tai_const.Create_8bit($66)); + reference_reset(href,0,[]); + location.reference.base:=NR_RIP; + location.reference.scalefactor:=1; + location.reference.refaddr:=addr_tlsgd; + cg.getcpuregister(current_asmdata.CurrAsmList,NR_RDI); + current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_LEA,S_Q,location.reference,NR_RDI)); + current_asmdata.CurrAsmList.concat(tai_const.Create_8bit($66)); + current_asmdata.CurrAsmList.concat(tai_const.Create_8bit($66)); + current_asmdata.CurrAsmList.concat(tai_const.Create_8bit($48)); + cg.g_call(current_asmdata.CurrAsmList,'__tls_get_addr'); + cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_RDI); + cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX); + hregister:=cg.getaddressregister(current_asmdata.CurrAsmList); + cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,NR_RAX,hregister); + reference_reset(location.reference,location.reference.alignment,location.reference.volatility); + location.reference.base:=hregister; + end; + else + Internalerror(2019012002); + end; + end; + end; +{$endif x86_64} end; end; diff --git a/compiler/x86_64/cpunode.pas b/compiler/x86_64/cpunode.pas index 4db49a7f67..eb9b228aaf 100644 --- a/compiler/x86_64/cpunode.pas +++ b/compiler/x86_64/cpunode.pas @@ -53,6 +53,7 @@ unit cpunode; nx86con, nx86mem, nx64add, + nx86ld, nx64cal, nx64cnv, nx64mat, diff --git a/rtl/linux/si_impl.inc b/rtl/linux/si_impl.inc index 9b0911cd5e..a727310551 100644 --- a/rtl/linux/si_impl.inc +++ b/rtl/linux/si_impl.inc @@ -16,6 +16,7 @@ procedure PascalMain; external name 'PASCALMAIN'; {$ifdef FPC_HAS_INDIRECT_ENTRY_INFORMATION} procedure SysEntry(constref info: TEntryInformation); external name 'FPC_SysEntry'; +procedure SysEntry_InitTLS(constref info: TEntryInformation); external name 'FPC_SysEntry_InitTLS'; var InitFinalTable : record end; external name 'INITFINAL'; @@ -30,7 +31,11 @@ var const SysInitEntryInformation : TEntryInformation = ( InitFinalTable : @InitFinalTable; +{$ifdef FPC_SECTION_THREADVARS} + ThreadvarTablesTable : nil; +{$else FPC_SECTION_THREADVARS} ThreadvarTablesTable : @ThreadvarTablesTable; +{$endif FPC_SECTION_THREADVARS} ResourceStringTables : @ResourceStringTables; {$ifdef FPC_HAS_RESSTRINITS} ResStrInitTables : @ResStrInitTables; diff --git a/rtl/linux/system.pp b/rtl/linux/system.pp index 8fa11ffc7c..a0654d82dc 100644 --- a/rtl/linux/system.pp +++ b/rtl/linux/system.pp @@ -116,6 +116,180 @@ procedure OsSetupEntryInformation(constref info: TEntryInformation); forward; {$I sysandroid.inc} {$endif android} +{***************************************************************************** + TLS handling +*****************************************************************************} + +{$if defined(CPUARM)} +{$define INITTLS} +Function fpset_tls(p : pointer;size : SizeUInt):cint; +begin + Result:=do_syscall(syscall_nr___ARM_NR_set_tls,TSysParam(p)); +end; +{$endif defined(CPUARM)} + +{$if defined(CPUI386)} +{$define INITTLS} +Function fpset_tls(p : pointer;size : SizeUInt):cint; +var + desc : record + entry_number : dword; + base_addr : dword; + limit : dword; + flags : dword; + end; + selector : word; +begin + // get descriptor from the kernel + desc.entry_number:=$ffffffff; + // TLS is accessed by negative offsets, only the TCB pointer is at offset 0 + desc.base_addr:=dword(p)+size-SizeOf(Pointer); + // 4 GB, limit is given in pages + desc.limit:=$fffff; + // seg_32bit:1, contents:0, read_exec_only:0, limit_in_pages:1, seg_not_present:0, useable:1 + desc.flags:=%1010001; + Result:=do_syscall(syscall_nr_set_thread_area,TSysParam(@desc)); + if Result=0 then + begin + selector:=desc.entry_number*8+3; + asm + movw selector,%gs + movl desc.base_addr,%eax + movl %eax,%gs:0 + end; + end; +end; +{$endif defined(CPUI386)} + +{$if defined(CPUX86_64)} +{$define INITTLS} +const + ARCH_SET_FS = $1002; + +Function fpset_tls(p : pointer;size : SizeUInt):cint; +begin + p:=pointer(qword(p)+size-SizeOf(Pointer)); + Result:=do_syscall(syscall_nr_arch_prctl,TSysParam(ARCH_SET_FS),TSysParam(p)); + if Result=0 then + begin + asm + movq p,%rax + movq %rax,%fs:0 + end; + end; +end; +{$endif defined(CPUX86_64)} + + +{$ifdef INITTLS} +{ This code initialized the TLS segment for single threaded and static programs. + + In case of multithreaded and/or dynamically linked programs it is assumed that they + dependent anyways on glibc which initializes tls properly. + + As soon as a purely FPC dynamic loading or multithreading is implemented, the code + needs to be extended to handle these cases as well. +} + +procedure InitTLS; [public,alias:'FPC_INITTLS']; + const + PT_TLS = 7; + PT_DYNAMIC = 2; + + type +{$ifdef CPU64} + tphdr = record + p_type, + p_flags : dword; + p_offset, + p_vaddr, + p_paddr, + p_filesz, + p_memsz, + p_align : qword; + end; +{$else CPU64} + tphdr = record + p_type, + p_offset, + p_vaddr, + p_paddr, + p_filesz, + p_memsz, + p_flags, + p_align : dword; + end; +{$endif CPU64} + pphdr = ^tphdr; + + var + phdr : pphdr; + phnum : dword; + i : integer; + tls : pointer; + auxp : ppointer; + found : boolean; + size : SizeUInt; + begin + auxp:=ppointer(envp); + { skip environment } + while assigned(auxp^) do + inc(auxp); + inc(auxp); + phdr:=nil; + phnum:=0; + { now we are at the auxillary vector } + while assigned(auxp^) do + begin + case plongint(auxp)^ of + 3: + phdr:=pphdr(ppointer(auxp+1)^); + 5: + phnum:=pdword(auxp+1)^; + end; + inc(auxp,2); + end; + found:=false; + size:=0; + for i:=1 to phnum do + begin + case phdr^.p_type of + PT_TLS: + begin + found:=true; + inc(size,phdr^.p_memsz); + size:=Align(size,phdr^.p_align); + end; + PT_DYNAMIC: + { if the program header contains a dynamic section, the program + is linked dynamically so the dynamic linker takes care of the + allocation of the TLS segment. + + We cannot allocate it by ourself anyways as PT_TLS provides only + the size of TLS data of the executable itself + } + exit; + end; + inc(phdr); + end; + if found then + begin +{$ifdef CPUI386} + { threadvars should start at a page boundary, + add extra space for the TCB } + size:=Align(size,4096)+sizeof(Pointer); +{$endif CPUI386} +{$ifdef CPUX86_64} + { threadvars should start at a page boundary, + add extra space for the TCB } + size:=Align(size,4096)+sizeof(Pointer); +{$endif CPUX86_64} + tls:=Fpmmap(nil,size,3,MAP_PRIVATE+MAP_ANONYMOUS,-1,0); + fpset_tls(tls,size); + end; + end; +{$endif INITTLS} + {***************************************************************************** Indirect Entry Point *****************************************************************************} @@ -133,6 +307,8 @@ begin initialstklen := info.OS.stklen; end; +{ we need two variants here because TLS must be initialized by FPC only if no libc is linked however, + InitTLS cannot be called from the start up files because when they are run, envp is not setup yet.} procedure SysEntry(constref info: TEntryInformation);[public,alias:'FPC_SysEntry']; begin SetupEntryInformation(info); @@ -142,6 +318,18 @@ begin info.PascalMain(); end; +procedure SysEntry_InitTLS(constref info: TEntryInformation);[public,alias:'FPC_SysEntry_InitTLS']; +begin + SetupEntryInformation(info); +{$ifdef INITTLS} + InitTLS; +{$endif INITTLS} +{$ifdef cpui386} + Set8087CW(Default8087CW); +{$endif cpui386} + info.PascalMain(); +end; + {$else} var {$ifndef FPC_BOOTSTRAP_INDIRECT_ENTRY} @@ -153,6 +341,8 @@ var operatingsystem_parameter_argv : Pointer; public name 'operatingsystem_parameter_argv'; +{ we need two variants here because TLS must be initialized by FPC only if no libc is linked however, + InitTLS cannot be called from the start up files because when they are run, envp is not setup yet.} procedure SysEntry(constref info: TEntryInformation);[public,alias:'FPC_SysEntry']; begin initialstkptr := info.OS.stkptr; @@ -164,6 +354,23 @@ begin {$endif cpui386} info.PascalMain(); end; + + +procedure SysEntry_InitTLS(constref info: TEntryInformation);[public,alias:'FPC_SysEntry_InitTLS']; +begin + initialstkptr := info.OS.stkptr; + operatingsystem_parameter_envp := info.OS.envp; + operatingsystem_parameter_argc := info.OS.argc; + operatingsystem_parameter_argv := info.OS.argv; +{$ifdef INITTLS} + InitTLS; +{$endif INITTLS} +{$ifdef cpui386} + Set8087CW(Default8087CW); +{$endif cpui386} + info.PascalMain(); +end; + {$endif FPC_BOOTSTRAP_INDIRECT_ENTRY} {$if defined(CPUARM) and defined(FPC_ABI_EABI)} @@ -441,140 +648,6 @@ begin result := stklen; end; - -{$if defined(CPUARM)} -{$define INITTLS} -Function fpset_tls(p : pointer;size : SizeUInt):cint; -begin - Result:=do_syscall(syscall_nr___ARM_NR_set_tls,TSysParam(p)); -end; -{$endif defined(CPUARM)} - -{$if defined(CPUI386)} -{$define INITTLS} -Function fpset_tls(p : pointer;size : SizeUInt):cint; -var - desc : record - entry_number : dword; - base_addr : dword; - limit : dword; - flags : dword; - end; - selector : word; -begin - // get descriptor from the kernel - desc.entry_number:=$ffffffff; - // TLS is accessed by negative offsets, only the TCB pointer is at offset 0 - desc.base_addr:=dword(p)+size-SizeOf(Pointer); - // 4 GB, limit is given in pages - desc.limit:=$fffff; - // seg_32bit:1, contents:0, read_exec_only:0, limit_in_pages:1, seg_not_present:0, useable:1 - desc.flags:=%1010001; - Result:=do_syscall(syscall_nr_set_thread_area,TSysParam(@desc)); - if Result=0 then - begin - selector:=desc.entry_number*8+3; - asm - movw selector,%gs - movl desc.base_addr,%eax - movl %eax,%gs:0 - end; - end; -end; -{$endif defined(CPUI386)} - -{$ifdef INITTLS} -{ This code initialized the TLS segment for single threaded and static programs. - - In case of multithreaded and/or dynamically linked programs it is assumed that they - dependent anyways on glibc which initializes tls properly. - - As soon as a purely FPC dynamic loading or multithreading is implemented, the code - needs to be extended to handle these cases as well. -} - -procedure InitTLS; [public,alias:'FPC_INITTLS']; - const - PT_TLS = 7; - PT_DYNAMIC = 2; - - type - tphdr = record - p_type, - p_offset, - p_vaddr, - p_paddr, - p_filesz, - p_memsz, - p_flags, - p_align : dword; - end; - pphdr = ^tphdr; - - var - phdr : pphdr; - phnum : dword; - i : integer; - tls : pointer; - auxp : ppointer; - found : boolean; - size : SizeUInt; - begin - auxp:=ppointer(envp); - { skip environment } - while assigned(auxp^) do - inc(auxp); - inc(auxp); - phdr:=nil; - phnum:=0; - { now we are at the auxillary vector } - while assigned(auxp^) do - begin - case plongint(auxp)^ of - 3: - phdr:=pphdr(ppointer(auxp+1)^); - 5: - phnum:=pdword(auxp+1)^; - end; - inc(auxp,2); - end; - found:=false; - size:=0; - for i:=1 to phnum do - begin - case phdr^.p_type of - PT_TLS: - begin - found:=true; - inc(size,phdr^.p_memsz); - size:=Align(size,phdr^.p_align); - end; - PT_DYNAMIC: - { if the program header contains a dynamic section, the program - is linked dynamically so the dynamic linker takes care of the - allocation of the TLS segment. - - We cannot allocate it by ourself anyways as PT_TLS provides only - the size of TLS data of the executable itself - } - exit; - end; - inc(phdr); - end; - if found then - begin -{$ifdef CPUI386} - { threadvars should start at a page boundary, - add extra space for the TCB } - size:=Align(size,4096)+sizeof(Pointer); -{$endif CPUI386} - tls:=Fpmmap(nil,size,3,MAP_PRIVATE+MAP_ANONYMOUS,-1,0); - fpset_tls(tls,size); - end; - end; -{$endif CPUARM} - - {$if FPC_FULLVERSION>30200} {$if defined(CPUI386) or defined(CPUARM)} {$I abitag.inc} diff --git a/rtl/linux/x86_64/si_prc.inc b/rtl/linux/x86_64/si_prc.inc index 7e4ec9d3f7..67a15f8139 100644 --- a/rtl/linux/x86_64/si_prc.inc +++ b/rtl/linux/x86_64/si_prc.inc @@ -35,6 +35,13 @@ {$L abitag.o} +procedure InitTLS; [external name 'FPC_INITTLS']; + +{ so far, I found no case where this is actually called, so it is a dummy so far (FK) } +function __tls_get_addr(p : pointer) : pointer; public name '__tls_get_addr'; + begin + end; + {****************************************************************************** Process start/halt ******************************************************************************} @@ -71,7 +78,7 @@ procedure _FPC_proc_start; assembler; nostackframe; public name '_start'; movq %r10,%rdi xorq %rbp, %rbp - call SysEntry + call SysEntry_InitTLS {$else FPC_HAS_INDIRECT_ENTRY_INFORMATION} popq %rsi { Pop the argument count. } movq operatingsystem_parameter_argc@GOTPCREL(%rip),%rax @@ -88,6 +95,10 @@ procedure _FPC_proc_start; assembler; nostackframe; public name '_start'; movq initialstkptr@GOTPCREL(%rip),%rax movq %rsp,(%rax) +{$if FPC_FULLVERSION>30200} + call InitTLS +{$endif FPC_FULLVERSION>30200} + xorq %rbp, %rbp call PASCALMAIN {$endif FPC_HAS_INDIRECT_ENTRY_INFORMATION}