From 534661b91c98492995274c364c8177c45efc63db Mon Sep 17 00:00:00 2001 From: Austin Foxley Date: Sat, 19 Sep 2009 10:04:05 -0700 Subject: ldso/: tls support for dynamic linker Signed-off-by: Austin Foxley --- ldso/include/dl-hash.h | 61 ++- ldso/include/ldso.h | 6 + ldso/include/ldsodefs.h | 147 ++++++ ldso/ldso/Makefile.in | 11 + ldso/ldso/arm/aeabi_read_tp.S | 64 +++ ldso/ldso/arm/dl-debug.h | 4 +- ldso/ldso/arm/dl-sysdep.h | 21 +- ldso/ldso/arm/elfinterp.c | 52 +- ldso/ldso/arm/resolve.S | 4 + ldso/ldso/arm/thumb_atomics.S | 79 ++++ ldso/ldso/dl-elf.c | 67 +++ ldso/ldso/dl-hash.c | 87 ++-- ldso/ldso/dl-startup.c | 14 + ldso/ldso/dl-tls.c | 1048 +++++++++++++++++++++++++++++++++++++++++ ldso/ldso/i386/dl-sysdep.h | 7 +- ldso/ldso/i386/elfinterp.c | 32 +- ldso/ldso/ldso.c | 158 ++++++- ldso/ldso/mips/elfinterp.c | 71 ++- ldso/ldso/sh/dl-debug.h | 2 + ldso/ldso/sh/dl-sysdep.h | 9 + ldso/ldso/sh/elfinterp.c | 39 +- ldso/ldso/sparc/dl-sysdep.h | 4 +- ldso/ldso/sparc/elfinterp.c | 75 +-- ldso/libdl/libdl.c | 325 ++++++++++++- 24 files changed, 2239 insertions(+), 148 deletions(-) create mode 100644 ldso/include/ldsodefs.h create mode 100644 ldso/ldso/arm/aeabi_read_tp.S create mode 100644 ldso/ldso/arm/thumb_atomics.S create mode 100644 ldso/ldso/dl-tls.c diff --git a/ldso/include/dl-hash.h b/ldso/include/dl-hash.h index e7ca4aba8..1b28a34b6 100644 --- a/ldso/include/dl-hash.h +++ b/ldso/include/dl-hash.h @@ -34,7 +34,32 @@ struct elf_resolve { struct elf_resolve * next; struct elf_resolve * prev; /* Nothing after this address is used by gdb. */ - ElfW(Addr) mapaddr; /* Address at which ELF segments (either main app and DSO) are mapped into */ + +#if USE_TLS + /* Thread-local storage related info. */ + + /* Start of the initialization image. */ + void *l_tls_initimage; + /* Size of the initialization image. */ + size_t l_tls_initimage_size; + /* Size of the TLS block. */ + size_t l_tls_blocksize; + /* Alignment requirement of the TLS block. */ + size_t l_tls_align; + /* Offset of first byte module alignment. */ + size_t l_tls_firstbyte_offset; +# ifndef NO_TLS_OFFSET +# define NO_TLS_OFFSET 0 +# endif + /* For objects present at startup time: offset in the static TLS block. */ + ptrdiff_t l_tls_offset; + /* Index of the module in the dtv array. */ + size_t l_tls_modid; + /* Nonzero if _dl_init_static_tls should be called for this module */ + unsigned int l_need_tls_init:1; +#endif + + ElfW(Addr) mapaddr; enum {elf_lib, elf_executable,program_interpreter, loaded_file} libtype; struct dyn_elf * symbol_scope; unsigned short usage_count; @@ -106,26 +131,31 @@ struct elf_resolve { extern struct dyn_elf * _dl_symbol_tables; extern struct elf_resolve * _dl_loaded_modules; -extern struct dyn_elf * _dl_handles; +extern struct dyn_elf * _dl_handles; extern struct elf_resolve * _dl_add_elf_hash_table(const char * libname, DL_LOADADDR_TYPE loadaddr, unsigned long * dynamic_info, unsigned long dynamic_addr, unsigned long dynamic_size); -extern char * _dl_lookup_hash(const char * name, struct dyn_elf * rpnt, - struct elf_resolve *mytpnt, int type_class -#ifdef __FDPIC__ - , struct elf_resolve **tpntp +#if USE_TLS || defined __FDPIC__ +#define _DL_LOOKUP_HASH_NEEDS_EXTRA_TPNT +#define _DL_LOOKUP_HASH_EXTRA_TPNT ,struct elf_resolve **tpntp +#else +#undef _DL_LOOKUP_HASH_NEEDS_EXTRA_TPNT +#define _DL_LOOKUP_HASH_EXTRA_TPNT #endif - ); +extern char * _dl_lookup_hash(const char * name, struct dyn_elf * rpnt, + struct elf_resolve *mytpnt, int type_class _DL_LOOKUP_HASH_EXTRA_TPNT); + static __always_inline char *_dl_find_hash(const char *name, struct dyn_elf *rpnt, - struct elf_resolve *mytpnt, int type_class) + struct elf_resolve *mytpnt, int type_class, + struct elf_resolve **tpntp) { -#ifdef __FDPIC__ - return _dl_lookup_hash(name, rpnt, mytpnt, type_class, NULL); +#ifdef _DL_LOOKUP_HASH_NEEDS_EXTRA_TPNT + return _dl_lookup_hash(name, rpnt, mytpnt, type_class, tpntp); #else - return _dl_lookup_hash(name, rpnt, mytpnt, type_class); + return _dl_lookup_hash(name, rpnt, mytpnt, type_class); #endif } @@ -148,8 +178,11 @@ static __inline__ int _dl_symbol(char * name) #define LD_ERROR_NOTDYN 5 #define LD_ERROR_MMAP_FAILED 6 #define LD_ERROR_NODYNAMIC 7 -#define LD_WRONG_RELOCS 8 -#define LD_BAD_HANDLE 9 -#define LD_NO_SYMBOL 10 +#define LD_ERROR_TLS_FAILED 8 +#define LD_WRONG_RELOCS 9 +#define LD_BAD_HANDLE 10 +#define LD_NO_SYMBOL 11 + + #endif /* _LD_HASH_H_ */ diff --git a/ldso/include/ldso.h b/ldso/include/ldso.h index dc4d92db6..1dd35febc 100644 --- a/ldso/include/ldso.h +++ b/ldso/include/ldso.h @@ -38,6 +38,10 @@ #include /* Now the ldso specific headers */ #include +#ifdef __UCLIBC_HAS_TLS__ +/* Defines USE_TLS */ +#include +#endif #include /* common align masks, if not specified by sysdep headers */ @@ -113,6 +117,8 @@ extern int _dl_debug_file; #endif extern void *_dl_malloc(size_t size); +extern void * _dl_calloc(size_t __nmemb, size_t __size); +extern void * _dl_realloc(void * __ptr, size_t __size); extern void _dl_free(void *); extern char *_dl_getenv(const char *symbol, char **envp); extern void _dl_unsetenv(const char *symbol, char **envp); diff --git a/ldso/include/ldsodefs.h b/ldso/include/ldsodefs.h new file mode 100644 index 000000000..432c7b848 --- /dev/null +++ b/ldso/include/ldsodefs.h @@ -0,0 +1,147 @@ +#ifndef _LDSODEFS_H +#define _LDSODEFS_H 1 + +#include + +#include +#include + +#ifdef __mips__ +/* The MIPS ABI specifies that the dynamic section has to be read-only. */ + +#define DL_RO_DYN_SECTION 1 + +/* TODO: Import in 64-bit relocations from glibc. */ +#endif + +#ifndef SHARED +# define EXTERN extern +#else +# ifdef IS_IN_rtld +# define EXTERN +# else +# define EXTERN extern +# endif +#endif + +/* Non-shared code has no support for multiple namespaces. */ +#ifdef SHARED +# define DL_NNS 16 +#else +# define DL_NNS 1 +#endif + +#define GL(x) _##x +#define GLRO(x) _##x + +/* Variable pointing to the end of the stack (or close to it). This value + must be constant over the runtime of the application. Some programs + might use the variable which results in copy relocations on some + platforms. But this does not matter, ld.so can always use the local + copy. */ +extern void *__libc_stack_end; + +/* Determine next available module ID. */ +extern size_t _dl_next_tls_modid (void) internal_function attribute_hidden; + +/* Calculate offset of the TLS blocks in the static TLS block. */ +extern void _dl_determine_tlsoffset (void) internal_function attribute_hidden; + +/* Set up the data structures for TLS, when they were not set up at startup. + Returns nonzero on malloc failure. + This is called from _dl_map_object_from_fd or by libpthread. */ +extern int _dl_tls_setup (void) internal_function; +rtld_hidden_proto (_dl_tls_setup) + +/* Allocate memory for static TLS block (unless MEM is nonzero) and dtv. */ +extern void *_dl_allocate_tls (void *mem) internal_function; + +/* Get size and alignment requirements of the static TLS block. */ +extern void _dl_get_tls_static_info (size_t *sizep, size_t *alignp) + internal_function; + +extern void _dl_allocate_static_tls (struct link_map *map) + internal_function attribute_hidden; + +/* Taken from glibc/elf/dl-reloc.c */ +#define CHECK_STATIC_TLS(sym_map) \ + do { \ + if (__builtin_expect ((sym_map)->l_tls_offset == NO_TLS_OFFSET, 0)) \ + _dl_allocate_static_tls (sym_map); \ + } while (0) + +/* These are internal entry points to the two halves of _dl_allocate_tls, + only used within rtld.c itself at startup time. */ +extern void *_dl_allocate_tls_storage (void) + internal_function attribute_hidden; +extern void *_dl_allocate_tls_init (void *) internal_function; + +/* Deallocate memory allocated with _dl_allocate_tls. */ +extern void _dl_deallocate_tls (void *tcb, bool dealloc_tcb) internal_function; + +extern void _dl_nothread_init_static_tls (struct link_map *) internal_function attribute_hidden; + +/* Highest dtv index currently needed. */ +EXTERN size_t _dl_tls_max_dtv_idx; +/* Flag signalling whether there are gaps in the module ID allocation. */ +EXTERN bool _dl_tls_dtv_gaps; +/* Information about the dtv slots. */ +EXTERN struct dtv_slotinfo_list +{ + size_t len; + struct dtv_slotinfo_list *next; + struct dtv_slotinfo + { + size_t gen; + bool is_static; + struct link_map *map; + } slotinfo[0]; +} *_dl_tls_dtv_slotinfo_list; +/* Number of modules in the static TLS block. */ +EXTERN size_t _dl_tls_static_nelem; +/* Size of the static TLS block. */ +EXTERN size_t _dl_tls_static_size; +/* Size actually allocated in the static TLS block. */ +EXTERN size_t _dl_tls_static_used; +/* Alignment requirement of the static TLS block. */ +EXTERN size_t _dl_tls_static_align; +/* Function pointer for catching TLS errors. */ +EXTERN void **(*_dl_error_catch_tsd) (void) __attribute__ ((const)); + +/* Number of additional entries in the slotinfo array of each slotinfo + list element. A large number makes it almost certain take we never + have to iterate beyond the first element in the slotinfo list. */ +# define TLS_SLOTINFO_SURPLUS (62) + +/* Number of additional slots in the dtv allocated. */ +# define DTV_SURPLUS (14) + + /* Initial dtv of the main thread, not allocated with normal malloc. */ + EXTERN void *_dl_initial_dtv; + /* Generation counter for the dtv. */ + EXTERN size_t _dl_tls_generation; + + EXTERN void (*_dl_init_static_tls) (struct link_map *); + +/* We have the auxiliary vector. */ +#define HAVE_AUX_VECTOR + +/* We can assume that the kernel always provides the AT_UID, AT_EUID, + AT_GID, and AT_EGID values in the auxiliary vector from 2.4.0 or so on. */ +#if __ASSUME_AT_XID +# define HAVE_AUX_XID +#endif + +/* We can assume that the kernel always provides the AT_SECURE value + in the auxiliary vector from 2.5.74 or so on. */ +#if __ASSUME_AT_SECURE +# define HAVE_AUX_SECURE +#endif + +/* Starting with one of the 2.4.0 pre-releases the Linux kernel passes + up the page size information. */ +#if __ASSUME_AT_PAGESIZE +# define HAVE_AUX_PAGESIZE +#endif + +#endif diff --git a/ldso/ldso/Makefile.in b/ldso/ldso/Makefile.in index a74c36e5e..350cc8108 100644 --- a/ldso/ldso/Makefile.in +++ b/ldso/ldso/Makefile.in @@ -15,6 +15,17 @@ CFLAGS-ldso += -fno-omit-frame-pointer CFLAGS-ldso += -I$(top_srcdir)ldso/ldso/$(TARGET_ARCH) -I$(top_srcdir)ldso/include -I$(top_srcdir)ldso/ldso CFLAGS-ldso += -DUCLIBC_RUNTIME_PREFIX=\"$(RUNTIME_PREFIX)\" -DUCLIBC_LDSO=\"$(UCLIBC_LDSO)\" +ifeq ($(DODEBUG),y) +# Not really much point in including debugging info, since gdb +# can't really debug ldso, since gdb requires help from ldso to +# debug things.... +# On arm, gcc-4.3.x onwards -Os emits calls to libgcc, which calls _div0, +# which tries to call raise(). And raise comes from libc so a catch 22. +# Using -O2 instead. We could have use -fno-early-inlining with -Os too. + +CFLAGS-ldso += -O2 -g +endif + CFLAGS-ldso/ldso/$(TARGET_ARCH)/ := $(CFLAGS-ldso) CFLAGS-ldso.c := -DLDSO_ELFINTERP=\"$(TARGET_ARCH)/elfinterp.c\" $(CFLAGS-ldso) diff --git a/ldso/ldso/arm/aeabi_read_tp.S b/ldso/ldso/arm/aeabi_read_tp.S new file mode 100644 index 000000000..f81bae676 --- /dev/null +++ b/ldso/ldso/arm/aeabi_read_tp.S @@ -0,0 +1,64 @@ +/* Copyright (C) 2005 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include + +#ifdef __UCLIBC_HAS_THREADS_NATIVE__ + +#include +#include + +/* GCC will emit calls to this routine under -mtp=soft. Linux has an + equivalent helper function (which clobbers fewer registers than + a normal function call) in a high page of memory; tail call to the + helper. + + This function is exported from libc for use by user code. libpthread, librt, + and the dynamic linker get their own private copies, for + performance (and in the case of ld.so, out of necessity); those are + all hidden. */ + +#ifndef NOT_IN_libc + .global __aeabi_read_tp +#else + .hidden __aeabi_read_tp +#endif +ENTRY (__aeabi_read_tp) + mov r0, #0xffff0fff + sub pc, r0, #31 +END (__aeabi_read_tp) + +#endif /* __UCLIBC_HAS_THREADS_NATIVE__ */ + diff --git a/ldso/ldso/arm/dl-debug.h b/ldso/ldso/arm/dl-debug.h index d5103202c..1bca6ff36 100644 --- a/ldso/ldso/arm/dl-debug.h +++ b/ldso/ldso/arm/dl-debug.h @@ -33,12 +33,14 @@ static const char *_dl_reltypes_tab[] = [4] "R_ARM_PC13", "R_ARM_ABS16", "R_ARM_ABS12", "R_ARM_THM_ABS5", [8] "R_ARM_ABS8", "R_ARM_SBREL32","R_ARM_THM_PC22", "R_ARM_THM_PC8", [12] "R_ARM_AMP_VCALL9", "R_ARM_SWI24", "R_ARM_THM_SWI8", "R_ARM_XPC25", - [16] "R_ARM_THM_XPC22", + [16] "R_ARM_THM_XPC22", "R_ARM_TLS_DTPMOD32", "R_ARM_TLS_DTPOFF32", "R_ARM_TLS_TPOFF32", [20] "R_ARM_COPY", "R_ARM_GLOB_DAT","R_ARM_JUMP_SLOT", "R_ARM_RELATIVE", [24] "R_ARM_GOTOFF", "R_ARM_GOTPC", "R_ARM_GOT32", "R_ARM_PLT32", [32] "R_ARM_ALU_PCREL_7_0","R_ARM_ALU_PCREL_15_8","R_ARM_ALU_PCREL_23_15","R_ARM_LDR_SBREL_11_0", [36] "R_ARM_ALU_SBREL_19_12","R_ARM_ALU_SBREL_27_20", [100] "R_ARM_GNU_VTENTRY","R_ARM_GNU_VTINHERIT","R_ARM_THM_PC11","R_ARM_THM_PC9", + [104] "R_ARM_TLS_GD32","R_ARM_TLS_LDM32","R_ARM_TLS_LDO32","R_ARM_TLS_IE32", + [108] "R_ARM_TLS_LE32","R_ARM_TLS_LDO12","R_ARM_TLS_LE12","R_ARM_TLS_IE12GP", [249] "R_ARM_RXPC25", "R_ARM_RSBREL32", "R_ARM_THM_RPC22", "R_ARM_RREL32", [253] "R_ARM_RABS22", "R_ARM_RPC24", "R_ARM_RBASE", }; diff --git a/ldso/ldso/arm/dl-sysdep.h b/ldso/ldso/arm/dl-sysdep.h index 75c58b0ec..5a2912ab5 100644 --- a/ldso/ldso/arm/dl-sysdep.h +++ b/ldso/ldso/arm/dl-sysdep.h @@ -5,6 +5,9 @@ * Copyright (C) 2000-2004 by Erik Andersen */ +#ifndef _ARCH_DL_SYSDEP +#define _ARCH_DL_SYSDEP + /* Define this if the system uses RELOCA. */ #undef ELF_USES_RELOCA #include @@ -55,12 +58,21 @@ static __always_inline unsigned long arm_modulus(unsigned long m, unsigned long struct elf_resolve; unsigned long _dl_linux_resolver(struct elf_resolve * tpnt, int reloc_entry); -/* ELF_RTYPE_CLASS_PLT iff TYPE describes relocation of a PLT entry, so - PLT entries should not be allowed to define the value. +/* 4096 bytes alignment */ +#define PAGE_ALIGN 0xfffff000 +#define ADDR_ALIGN 0xfff +#define OFFS_ALIGN 0x7ffff000 + +/* ELF_RTYPE_CLASS_PLT iff TYPE describes relocation of a PLT entry or + TLS variable, so undefined references should not be allowed to + define the value. + ELF_RTYPE_CLASS_NOCOPY iff TYPE should not be allowed to resolve to one of the main executable's symbols, as for a COPY reloc. */ -#define elf_machine_type_class(type) \ - ((((type) == R_ARM_JUMP_SLOT) * ELF_RTYPE_CLASS_PLT) \ +#define elf_machine_type_class(type) \ + ((((type) == R_ARM_JUMP_SLOT || (type) == R_ARM_TLS_DTPMOD32 \ + || (type) == R_ARM_TLS_DTPOFF32 || (type) == R_ARM_TLS_TPOFF32) \ + * ELF_RTYPE_CLASS_PLT) \ | (((type) == R_ARM_COPY) * ELF_RTYPE_CLASS_COPY)) /* Return the link-time address of _DYNAMIC. Conveniently, this is the @@ -136,6 +148,7 @@ elf_machine_relative (Elf32_Addr load_off, const Elf32_Addr rel_addr, *reloc_addr += load_off; } while (--relative_count); } +#endif /* !_ARCH_DL_SYSDEP */ #ifdef __ARM_EABI__ #define DL_MALLOC_ALIGN 8 /* EABI needs 8 byte alignment for STRD LDRD */ diff --git a/ldso/ldso/arm/elfinterp.c b/ldso/ldso/arm/elfinterp.c index 197975e4a..1469df016 100644 --- a/ldso/ldso/arm/elfinterp.c +++ b/ldso/ldso/arm/elfinterp.c @@ -50,7 +50,7 @@ unsigned long _dl_linux_resolver(struct elf_resolve *tpnt, int reloc_entry) Elf32_Sym *symtab; ELF_RELOC *rel_addr; int symtab_index; - char *new_addr; + unsigned long new_addr; char **got_addr; unsigned long instr_addr; @@ -70,7 +70,7 @@ unsigned long _dl_linux_resolver(struct elf_resolve *tpnt, int reloc_entry) /* Get the address of the GOT entry */ new_addr = _dl_find_hash(symname, tpnt->symbol_scope, - tpnt, ELF_RTYPE_CLASS_PLT); + tpnt, ELF_RTYPE_CLASS_PLT, NULL); if (unlikely(!new_addr)) { _dl_dprintf(2, "%s: can't resolve symbol '%s'\n", _dl_progname, symname); @@ -89,13 +89,13 @@ unsigned long _dl_linux_resolver(struct elf_resolve *tpnt, int reloc_entry) } } if (!_dl_debug_nofixups) { - *got_addr = new_addr; + *got_addr = (char*)new_addr; } #else - *got_addr = new_addr; + *got_addr = (char*)new_addr; #endif - return (unsigned long) new_addr; + return new_addr; } static int @@ -188,28 +188,40 @@ _dl_do_reloc (struct elf_resolve *tpnt,struct dyn_elf *scope, int symtab_index; unsigned long *reloc_addr; unsigned long symbol_addr; + const Elf32_Sym *def = 0; + struct elf_resolve *def_mod = 0; int goof = 0; - reloc_addr = (unsigned long *) (tpnt->loadaddr + (unsigned long) rpnt->r_offset); + reloc_addr = (unsigned long *) (tpnt->loadaddr + + (unsigned long) rpnt->r_offset); + reloc_type = ELF32_R_TYPE(rpnt->r_info); symtab_index = ELF32_R_SYM(rpnt->r_info); symbol_addr = 0; if (symtab_index) { - - symbol_addr = (unsigned long) _dl_find_hash(strtab + symtab[symtab_index].st_name, - scope, tpnt, elf_machine_type_class(reloc_type)); + symbol_addr = _dl_find_hash(strtab + symtab[symtab_index].st_name, + scope, tpnt, + elf_machine_type_class(reloc_type), + &def_mod); /* * We want to allow undefined references to weak symbols - this might * have been intentional. We should not be linking local symbols * here, so all bases should be covered. */ - if (!symbol_addr && ELF32_ST_BIND(symtab[symtab_index].st_info) != STB_WEAK) { - _dl_dprintf (2, "%s: can't resolve symbol '%s'\n", - _dl_progname, strtab + symtab[symtab_index].st_name); - _dl_exit (1); + if (!symbol_addr && (ELF_ST_TYPE(symtab[symtab_index].st_info) != STT_TLS) + && (ELF32_ST_BIND(symtab[symtab_index].st_info) != STB_WEAK)) { + /* This may be non-fatal if called from dlopen. */ + return 1; + } + } else { + /* Relocs against STN_UNDEF are usually treated as using a + symbol value of zero, and using the module containing the + reloc itself. */ + symbol_addr = symtab[symtab_index].st_value; + def_mod = tpnt; } #if defined (__SUPPORT_LD_DEBUG__) @@ -265,6 +277,20 @@ _dl_do_reloc (struct elf_resolve *tpnt,struct dyn_elf *scope, _dl_memcpy((void *) reloc_addr, (void *) symbol_addr, symtab[symtab_index].st_size); break; +#if USE_TLS + case R_ARM_TLS_DTPMOD32: + *reloc_addr = def_mod->l_tls_modid; + break; + + case R_ARM_TLS_DTPOFF32: + *reloc_addr += symbol_addr; + break; + + case R_ARM_TLS_TPOFF32: + CHECK_STATIC_TLS ((struct link_map *) def_mod); + *reloc_addr += (symbol_addr + def_mod->l_tls_offset); + break; +#endif default: return -1; /*call _dl_exit(1) */ } diff --git a/ldso/ldso/arm/resolve.S b/ldso/ldso/arm/resolve.S index b422c334d..08889d06e 100644 --- a/ldso/ldso/arm/resolve.S +++ b/ldso/ldso/arm/resolve.S @@ -95,6 +95,10 @@ #include +#define sl r10 +#define fp r11 +#define ip r12 + .text .align 4 @ 16 byte boundary and there are 32 bytes below (arm case) #if !defined(__thumb__) || defined(__thumb2__) diff --git a/ldso/ldso/arm/thumb_atomics.S b/ldso/ldso/arm/thumb_atomics.S new file mode 100644 index 000000000..f6ae3db3c --- /dev/null +++ b/ldso/ldso/arm/thumb_atomics.S @@ -0,0 +1,79 @@ +/* Copyright (C) 2006 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include + +#ifdef __UCLIBC_HAS_THREADS_NATIVE__ + +#include + +#if defined __thumb__ + +/* Out-of-line atomic operations that we can't do in Thumb mode. + This ends up in various libraries where it is needed (and + a few .a archives where it isn't). */ + + .hidden __thumb_swpb +ENTRY (__thumb_swpb) + swpb r0, r0, [r1] + bx lr +END (__thumb_swpb) + + .hidden __thumb_swp +ENTRY (__thumb_swp) + swp r0, r0, [r1] + bx lr +END (__thumb_swp) + + .hidden __thumb_cmpxchg +ENTRY (__thumb_cmpxchg) + stmdb sp!, {r4, lr} + mov r4, r0 +0: ldr r3, [r2] + cmp r3, r4 + bne 1f + mov r0, r4 + mov r3, #0xffff0fff + mov lr, pc + add pc, r3, #(0xffff0fc0 - 0xffff0fff) + bcc 0b + mov r3, r4 +1: mov r0, r3 + ldmia sp!, {r4, pc} +END (__thumb_cmpxchg) + +#endif /* __thumb__ */ +#endif /* __UCLIBC_HAS_THREADS_NATIVE__ */ + diff --git a/ldso/ldso/dl-elf.c b/ldso/ldso/dl-elf.c index 89708497d..75e8f7186 100644 --- a/ldso/ldso/dl-elf.c +++ b/ldso/ldso/dl-elf.c @@ -329,6 +329,9 @@ struct elf_resolve *_dl_load_elf_shared_library(int secure, ElfW(Dyn) *dpnt; struct elf_resolve *tpnt; ElfW(Phdr) *ppnt; +#if USE_TLS + ElfW(Phdr) *tlsppnt = NULL; +#endif char *status, *header; unsigned long dynamic_info[DYNAMIC_SIZE]; unsigned long *lpnt; @@ -433,6 +436,29 @@ struct elf_resolve *_dl_load_elf_shared_library(int secure, maxvma = ppnt->p_vaddr + ppnt->p_memsz; } } + if (ppnt->p_type == PT_TLS) + { +#if USE_TLS + if (ppnt->p_memsz == 0) + /* Nothing to do for an empty segment. */ + continue; + else + /* Save for after 'tpnt' is actually allocated. */ + tlsppnt = ppnt; +#else + /* + * Yup, the user was an idiot and tried to sneak in a library with + * TLS in it and we don't support it. Let's fall on our own sword + * and scream at the luser while we die. + */ + _dl_dprintf(2, "%s: '%s' library contains unsupported TLS\n", + _dl_progname, libname); + _dl_internal_error_number = LD_ERROR_TLS_FAILED; + _dl_close(infile); + _dl_munmap(header, _dl_pagesize); + return NULL; +#endif + } ppnt++; } @@ -708,6 +734,37 @@ struct elf_resolve *_dl_load_elf_shared_library(int secure, tpnt->ppnt = (ElfW(Phdr) *) DL_RELOC_ADDR(tpnt->loadaddr, epnt->e_phoff); tpnt->n_phent = epnt->e_phnum; +#if USE_TLS + if (tlsppnt) + { + _dl_debug_early("Found TLS header for %s\n", libname); +#if NO_TLS_OFFSET != 0 + tpnt->l_tls_offset = NO_TLS_OFFSET; +#endif + tpnt->l_tls_blocksize = tlsppnt->p_memsz; + tpnt->l_tls_align = tlsppnt->p_align; + if (tlsppnt->p_align == 0) + tpnt->l_tls_firstbyte_offset = 0; + else + tpnt->l_tls_firstbyte_offset = tlsppnt->p_vaddr & + (tlsppnt->p_align - 1); + tpnt->l_tls_initimage_size = tlsppnt->p_filesz; + tpnt->l_tls_initimage = (void *) tlsppnt->p_vaddr; + + /* Assign the next available module ID. */ + tpnt->l_tls_modid = _dl_next_tls_modid (); + + /* We know the load address, so add it to the offset. */ + if (tpnt->l_tls_initimage != NULL) + { + unsigned int tmp = (unsigned int) tpnt->l_tls_initimage; + tpnt->l_tls_initimage = (char *) tlsppnt->p_vaddr + tpnt->loadaddr; + _dl_debug_early("Relocated TLS initial image from %x to %x (size = %x)\n", tmp, tpnt->l_tls_initimage, tpnt->l_tls_initimage_size); + tmp = 0; + } + } +#endif + /* * Add this object into the symbol chain */ @@ -816,6 +873,16 @@ int _dl_fixup(struct dyn_elf *rpnt, int now_flag) } tpnt->init_flag |= JMP_RELOCS_DONE; } + +#if 0 +/* _dl_add_to_slotinfo is called by init_tls() for initial DSO + or by dlopen() for dynamically loaded DSO. */ +#if USE_TLS + /* Add object to slot information data if necessasy. */ + if (tpnt->l_tls_blocksize != 0 && tls_init_tp_called) + _dl_add_to_slotinfo ((struct link_map *) tpnt); +#endif +#endif return goof; } diff --git a/ldso/ldso/dl-hash.c b/ldso/ldso/dl-hash.c index 4809c4348..3103d9f0b 100644 --- a/ldso/ldso/dl-hash.c +++ b/ldso/ldso/dl-hash.c @@ -157,18 +157,29 @@ struct elf_resolve *_dl_add_elf_hash_table(const char *libname, static __attribute_noinline__ const ElfW(Sym) * check_match (const ElfW(Sym) *sym, char *strtab, const char* undef_name, int type_class) { - if (type_class & (sym->st_shndx == SHN_UNDEF)) - /* undefined symbol itself */ - return NULL; -#ifdef __mips__ - if (sym->st_shndx == SHN_UNDEF && !(sym->st_other & STO_MIPS_PLT)) - return NULL; -#endif - - if (sym->st_value == 0) - /* No value */ - return NULL; +#if USE_TLS + if((sym->st_value == 0 && (ELF_ST_TYPE(sym->st_info) != STT_TLS)) + || (type_class & (sym->st_shndx == SHN_UNDEF))) + /* No value or undefined symbol itself */ + return NULL; + + if(ELF_ST_TYPE(sym->st_info) > STT_FUNC + && ELF_ST_TYPE(sym->st_info) != STT_COMMON + && ELF_ST_TYPE(sym->st_info) != STT_TLS) + /* Ignore all but STT_NOTYPE, STT_OBJECT, STT_FUNC and STT_COMMON + * entries (and STT_TLS if TLS is supported) since these + * are no code/data definitions. + */ + return NULL; +#else + if (type_class & (sym->st_shndx == SHN_UNDEF)) + /* undefined symbol itself */ + return NULL; + + if (sym->st_value == 0) + /* No value */ + return NULL; if (ELF_ST_TYPE(sym->st_info) > STT_FUNC && ELF_ST_TYPE(sym->st_info) != STT_COMMON) @@ -177,7 +188,7 @@ check_match (const ElfW(Sym) *sym, char *strtab, const char* undef_name, int typ * code/data definitions */ return NULL; - +#endif if (_dl_strcmp(strtab + sym->st_name, undef_name) != 0) return NULL; @@ -257,12 +268,11 @@ _dl_lookup_sysv_hash(struct elf_resolve *tpnt, ElfW(Sym) *symtab, unsigned long * This function resolves externals, and this is either called when we process * relocations or when we call an entry in the PLT table for the first time. */ -char *_dl_lookup_hash(const char *name, struct dyn_elf *rpnt, - struct elf_resolve *mytpnt, int type_class -#ifdef __FDPIC__ - , struct elf_resolve **tpntp -#endif - ) +char *_dl_lookup_hash(const char *name, struct dyn_elf *rpnt, struct elf_resolve *mytpnt, int type_class +#if USE_TLS +,struct elf_resolve **tls_tpnt +#endif +) { struct elf_resolve *tpnt = NULL; ElfW(Sym) *symtab; @@ -270,8 +280,7 @@ char *_dl_lookup_hash(const char *name, struct dyn_elf *rpnt, unsigned long elf_hash_number = 0xffffffff; const ElfW(Sym) *sym = NULL; - const ElfW(Sym) *weak_sym = 0; - struct elf_resolve *weak_tpnt = 0; + char *weak_result = NULL; #ifdef __LDSO_GNU_HASH_SUPPORT__ unsigned long gnu_hash_number = _dl_gnu_hash((const unsigned char *)name); @@ -329,37 +338,29 @@ char *_dl_lookup_hash(const char *name, struct dyn_elf *rpnt, if (sym) { /* At this point we have found the requested symbol, do binding */ +#if USE_TLS + if(ELF_ST_TYPE(sym->st_info) == STT_TLS) { + _dl_assert((tls_tpnt != NULL)); + *tls_tpnt = tpnt; + + return (char*)sym->st_value; + } +#endif + switch (ELF_ST_BIND(sym->st_info)) { case STB_WEAK: #if 0 -/* Perhaps we should support old style weak symbol handling - * per what glibc does when you export LD_DYNAMIC_WEAK */ - if (!weak_sym) { - weak_tpnt = tpnt; - weak_sym = sym; - } + /* Perhaps we should support old style weak symbol handling + * per what glibc does when you export LD_DYNAMIC_WEAK */ + if (!weak_result) + weak_result = (char *)tpnt->loadaddr + sym->st_value; break; #endif case STB_GLOBAL: -#ifdef __FDPIC__ - if (tpntp) - *tpntp = tpnt; -#endif - return (char *) DL_FIND_HASH_VALUE (tpnt, type_class, sym); + return (char*)tpnt->loadaddr + sym->st_value; default: /* Local symbols not handled here */ break; } } - if (weak_sym) { -#ifdef __FDPIC__ - if (tpntp) - *tpntp = weak_tpnt; -#endif - return (char *) DL_FIND_HASH_VALUE (weak_tpnt, type_class, weak_sym); - } -#ifdef __FDPIC__ - if (tpntp) - *tpntp = NULL; -#endif - return NULL; + return weak_result; } diff --git a/ldso/ldso/dl-startup.c b/ldso/ldso/dl-startup.c index de9c8bc4e..6f07b960a 100644 --- a/ldso/ldso/dl-startup.c +++ b/ldso/ldso/dl-startup.c @@ -209,6 +209,20 @@ DL_START(unsigned long args) _dl_parse_dynamic_info(dpnt, tpnt->dynamic_info, NULL, load_addr); #endif + /* + * BIG ASSUMPTION: We assume that the dynamic loader does not + * have any TLS data itself. If this ever occurs + * more work than what is done below for the + * loader will have to happen. + */ +#if USE_TLS + /* This was done by _dl_memset above. */ + /* tpnt->l_tls_modid = 0; */ +# if NO_TLS_OFFSET != 0 + tpnt->l_tls_offset = NO_TLS_OFFSET; +# endif +#endif + SEND_EARLY_STDERR_DEBUG("Done scanning DYNAMIC section\n"); #if defined(PERFORM_BOOTSTRAP_GOT) diff --git a/ldso/ldso/dl-tls.c b/ldso/ldso/dl-tls.c new file mode 100644 index 000000000..e718373cd --- /dev/null +++ b/ldso/ldso/dl-tls.c @@ -0,0 +1,1048 @@ +/* vi: set sw=4 ts=4: */ +/* + * Thread-local storage handling in the ELF dynamic linker. + * + * Copyright (C) 2005 by Steven J. Hill + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the above contributors may not be + * used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include + +void *(*_dl_calloc_function) (size_t __nmemb, size_t __size) = NULL; +void *(*_dl_realloc_function) (void *__ptr, size_t __size) = NULL; +void *(*_dl_memalign_function) (size_t __boundary, size_t __size) = NULL; + +void (*_dl_free_function) (void *__ptr); +void *_dl_memalign (size_t __boundary, size_t __size); +struct link_map *_dl_update_slotinfo (unsigned long int req_modid); + +/* Round up N to the nearest multiple of P, where P is a power of 2 + --- without using libgcc division routines. */ +#define roundup_pow2(n, p) (((n) + (p) - 1) & ~((p) - 1)) + +void * +_dl_calloc (size_t __nmemb, size_t __size) +{ + void *result; + size_t size = (__size * __nmemb); + + if (_dl_calloc_function) + return (*_dl_calloc_function) (__nmemb, __size); + + if ((result = _dl_malloc(size)) != NULL) { + _dl_memset(result, 0, size); + } + + return result; +} + +void * +_dl_realloc (void * __ptr, size_t __size) +{ + if (_dl_realloc_function) + return (*_dl_realloc_function) (__ptr, __size); + + _dl_debug_early("NOT IMPLEMENTED PROPERLY!!!\n"); + return NULL; +} + +void +_dl_free (void *__ptr) +{ + if (_dl_free_function) + (*_dl_free_function) (__ptr); + +#if 0 + _dl_debug_early("NOT IMPLEMENTED PROPERLY!!!\n"); +#endif +} + + +/* The __tls_get_addr function has two basic forms which differ in the + arguments. The IA-64 form takes two parameters, the module ID and + offset. The form used, among others, on IA-32 takes a reference to + a special structure which contain the same information. The second + form seems to be more often used (in the moment) so we default to + it. Users of the IA-64 form have to provide adequate definitions + of the following macros. */ +#ifndef GET_ADDR_ARGS +# define GET_ADDR_ARGS tls_index *ti +#endif +#ifndef GET_ADDR_MODULE +# define GET_ADDR_MODULE ti->ti_module +#endif +#ifndef GET_ADDR_OFFSET +# define GET_ADDR_OFFSET ti->ti_offset +#endif + +/* + * Amount of excess space to allocate in the static TLS area + * to allow dynamic loading of modules defining IE-model TLS data. + */ +#define TLS_STATIC_SURPLUS 64 + DL_NNS * 100 + +/* Value used for dtv entries for which the allocation is delayed. */ +#define TLS_DTV_UNALLOCATED ((void *) -1l) + +/* + * We are trying to perform a static TLS relocation in MAP, but it was + * dynamically loaded. This can only work if there is enough surplus in + * the static TLS area already allocated for each running thread. If this + * object's TLS segment is too big to fit, we fail. If it fits, + * we set MAP->l_tls_offset and return. + * This function intentionally does not return any value but signals error + * directly, as static TLS should be rare and code handling it should + * not be inlined as much as possible. + */ +void +internal_function __attribute_noinline__ +_dl_allocate_static_tls (struct link_map *map) +{ + /* If the alignment requirements are too high fail. */ + if (map->l_tls_align > _dl_tls_static_align) + { +fail: + _dl_dprintf(2, "cannot allocate memory in static TLS block"); + _dl_exit(30); + } + +# ifdef TLS_TCB_AT_TP + size_t freebytes; + size_t n; + size_t blsize; + + freebytes = _dl_tls_static_size - _dl_tls_static_used - TLS_TCB_SIZE; + + blsize = map->l_tls_blocksize + map->l_tls_firstbyte_offset; + if (freebytes < blsize) + goto fail; + + n = (freebytes - blsize) & ~(map->l_tls_align - 1); + + size_t offset = _dl_tls_static_used + (freebytes - n + - map->l_tls_firstbyte_offset); + + map->l_tls_offset = _dl_tls_static_used = offset; +# elif defined(TLS_DTV_AT_TP) + size_t used; + size_t check; + + size_t offset = roundup_pow2 (_dl_tls_static_used, map->l_tls_align); + used = offset + map->l_tls_blocksize; + check = used; + + /* dl_tls_static_used includes the TCB at the beginning. */ + if (check > _dl_tls_static_size) + goto fail; + + map->l_tls_offset = offset; + _dl_tls_static_used = used; +# else +# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined" +# endif + + /* + * If the object is not yet relocated we cannot initialize the + * static TLS region. Delay it. + */ + if (((struct elf_resolve *) map)->init_flag & RELOCS_DONE) + { +#ifdef SHARED + /* + * Update the slot information data for at least the generation of + * the DSO we are allocating data for. + */ + if (__builtin_expect (THREAD_DTV()[0].counter != _dl_tls_generation, 0)) + (void) _dl_update_slotinfo (map->l_tls_modid); +#endif + _dl_init_static_tls (map); + } + else + map->l_need_tls_init = 1; +} + +#ifdef SHARED +/* Initialize static TLS area and DTV for current (only) thread. + libpthread implementations should provide their own hook + to handle all threads. */ +void +internal_function __attribute_noinline__ +_dl_nothread_init_static_tls (struct link_map *map) +{ +# ifdef TLS_TCB_AT_TP + void *dest = (char *) THREAD_SELF - map->l_tls_offset; +# elif defined(TLS_DTV_AT_TP) + void *dest = (char *) THREAD_SELF + map->l_tls_offset + TLS_PRE_TCB_SIZE; +# else +# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined" +# endif + + /* Fill in the DTV slot so that a later LD/GD access will find it. */ + dtv_t *dtv = THREAD_DTV (); + if (!(map->l_tls_modid <= dtv[-1].counter)) { + _dl_dprintf(2, "map->l_tls_modid <= dtv[-1].counter FAILED!\n"); + _dl_exit(30); + } + dtv[map->l_tls_modid].pointer.val = dest; + dtv[map->l_tls_modid].pointer.is_static = true; + + /* Initialize the memory. */ + _dl_memcpy(dest, map->l_tls_initimage, map->l_tls_initimage_size); + _dl_memset((dest + map->l_tls_initimage_size), '\0', + map->l_tls_blocksize - map->l_tls_initimage_size); +} +#endif + +/* Taken from glibc/sysdeps/generic/dl-tls.c */ +static void +oom (void) +{ + _dl_debug_early("cannot allocate thread-local memory: ABORT\n"); + _dl_exit(30); +} + +size_t +internal_function +_dl_next_tls_modid (void) +{ + size_t result; + + if (__builtin_expect (_dl_tls_dtv_gaps, false)) + { + size_t disp = 0; + struct dtv_slotinfo_list *runp = _dl_tls_dtv_slotinfo_list; + + /* Note that this branch will never be executed during program + start since there are no gaps at that time. Therefore it + does not matter that the dl_tls_dtv_slotinfo is not allocated + yet when the function is called for the first times. + + NB: the offset +1 is due to the fact that DTV[0] is used + for something else. */ + result = _dl_tls_static_nelem + 1; + if (result <= _dl_tls_max_dtv_idx) + do + { + while (result - disp < runp->len) + { + if (runp->slotinfo[result - disp].map == NULL) + break; + + ++result; + _dl_assert (result <= _dl_tls_max_dtv_idx + 1); + } + + if (result - disp < runp->len) + break; + + disp += runp->len; + } + while ((runp = runp->next) != NULL); + + if (result > _dl_tls_max_dtv_idx) + { + /* The new index must indeed be exactly one higher than the + previous high. */ + _dl_assert (result == _dl_tls_max_dtv_idx + 1); + /* There is no gap anymore. */ + _dl_tls_dtv_gaps = false; + + goto nogaps; + } + } + else + { + /* No gaps, allocate a new entry. */ + nogaps: + + result = ++_dl_tls_max_dtv_idx; + } + + return result; +} + +void +internal_function +_dl_determine_tlsoffset (void) +{ + size_t max_align = TLS_TCB_ALIGN; + size_t freetop = 0; + size_t freebottom = 0; + + /* The first element of the dtv slot info list is allocated. */ + _dl_assert (_dl_tls_dtv_slotinfo_list != NULL); + /* There is at this point only one element in the + dl_tls_dtv_slotinfo_list list. */ + _dl_assert (_dl_tls_dtv_slotinfo_list->next == NULL); + + struct dtv_slotinfo *slotinfo = _dl_tls_dtv_slotinfo_list->slotinfo; + + /* Determining the offset of the various parts of the static TLS + block has several dependencies. In addition we have to work + around bugs in some toolchains. + + Each TLS block from the objects available at link time has a size + and an alignment requirement. The GNU ld computes the alignment + requirements for the data at the positions *in the file*, though. + I.e, it is not simply possible to allocate a block with the size + of the TLS program header entry. The data is layed out assuming + that the first byte of the TLS block fulfills + + p_vaddr mod p_align == &TLS_BLOCK mod p_align + + This means we have to add artificial padding at the beginning of + the TLS block. These bytes are never used for the TLS data in + this module but the first byte allocated must be aligned + according to mod p_align == 0 so that the first byte of the TLS + block is aligned according to p_vaddr mod p_align. This is ugly + and the linker can help by computing the offsets in the TLS block + assuming the first byte of the TLS block is aligned according to + p_align. + + The extra space which might be allocated before the first byte of + the TLS block need not go unused. The code below tries to use + that memory for the next TLS block. This can work if the total + memory requirement for the next TLS block is smaller than the + gap. */ + +# ifdef TLS_TCB_AT_TP + /* We simply start with zero. */ + size_t offset = 0; + + for (size_t cnt = 1; slotinfo[cnt].map != NULL; ++cnt) + { + _dl_assert (cnt < _dl_tls_dtv_slotinfo_list->len); + + size_t firstbyte = (-slotinfo[cnt].map->l_tls_firstbyte_offset + & (slotinfo[cnt].map->l_tls_align - 1)); + size_t off; + max_align = MAX (max_align, slotinfo[cnt].map->l_tls_align); + + if (freebottom - freetop >= slotinfo[cnt].map->l_tls_blocksize) + { + off = roundup_pow2 (freetop + slotinfo[cnt].map->l_tls_blocksize + - firstbyte, slotinfo[cnt].map->l_tls_align) + + firstbyte; + if (off <= freebottom) + { + freetop = off; + + /* XXX For some architectures we perhaps should store the + negative offset. */ + slotinfo[cnt].map->l_tls_offset = off; + continue; + } + } + + off = roundup_pow2 (offset + slotinfo[cnt].map->l_tls_blocksize + - firstbyte, slotinfo[cnt].map->l_tls_align) + + firstbyte; + if (off > offset + slotinfo[cnt].map->l_tls_blocksize + + (freebottom - freetop)) + { + freetop = offset; + freebottom = off - slotinfo[cnt].map->l_tls_blocksize; + } + offset = off; + + /* XXX For some architectures we perhaps should store the + negative offset. */ + slotinfo[cnt].map->l_tls_offset = off; + } + + _dl_tls_static_used = offset; + _dl_tls_static_size = (roundup_pow2 (offset + TLS_STATIC_SURPLUS, max_align) + + TLS_TCB_SIZE); +# elif defined(TLS_DTV_AT_TP) + /* The TLS blocks start right after the TCB. */ + size_t offset = TLS_TCB_SIZE; + size_t cnt; + + for (cnt = 1; slotinfo[cnt].map != NULL; ++cnt) + { + _dl_assert (cnt < _dl_tls_dtv_slotinfo_list->len); + + size_t firstbyte = (-slotinfo[cnt].map->l_tls_firstbyte_offset + & (slotinfo[cnt].map->l_tls_align - 1)); + size_t off; + max_align = MAX (max_align, slotinfo[cnt].map->l_tls_align); + + if (slotinfo[cnt].map->l_tls_blocksize <= freetop - freebottom) + { + off = roundup_pow2 (freebottom, slotinfo[cnt].map->l_tls_align); + if (off - freebottom < firstbyte) + off += slotinfo[cnt].map->l_tls_align; + if (off + slotinfo[cnt].map->l_tls_blocksize - firstbyte <= freetop) + { + slotinfo[cnt].map->l_tls_offset = off - firstbyte; + freebottom = (off + slotinfo[cnt].map->l_tls_blocksize + - firstbyte); + continue; + } + } + + off = roundup_pow2 (offset, slotinfo[cnt].map->l_tls_align); + if (off - offset < firstbyte) + off += slotinfo[cnt].map->l_tls_align; + + slotinfo[cnt].map->l_tls_offset = off - firstbyte; + if (off - firstbyte - offset > freetop - freebottom) + { + freebottom = offset; + freetop = off - firstbyte; + } + + offset = off + slotinfo[cnt].map->l_tls_blocksize - firstbyte; + } + + _dl_tls_static_used = offset; + _dl_tls_static_size = roundup_pow2 (offset + TLS_STATIC_SURPLUS, + TLS_TCB_ALIGN); +# else +# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined" +# endif + + /* The alignment requirement for the static TLS block. */ + _dl_tls_static_align = max_align; +} + +/* This is called only when the data structure setup was skipped at startup, + when there was no need for it then. Now we have dynamically loaded + something needing TLS, or libpthread needs it. */ +rtld_hidden_proto(_dl_tls_setup) +int +internal_function +_dl_tls_setup (void) +{ + _dl_assert (_dl_tls_dtv_slotinfo_list == NULL); + _dl_assert (_dl_tls_max_dtv_idx == 0); + + const size_t nelem = 2 + TLS_SLOTINFO_SURPLUS; + + _dl_tls_dtv_slotinfo_list + = _dl_calloc (1, (sizeof (struct dtv_slotinfo_list) + + nelem * sizeof (struct dtv_slotinfo))); + if (_dl_tls_dtv_slotinfo_list == NULL) + return -1; + + _dl_tls_dtv_slotinfo_list->len = nelem; + + /* Number of elements in the static TLS block. It can't be zero + because of various assumptions. The one element is null. */ + _dl_tls_static_nelem = _dl_tls_max_dtv_idx = 1; + + /* This initializes more variables for us. */ + _dl_determine_tlsoffset (); + + return 0; +} +rtld_hidden_def (_dl_tls_setup) + +static void * +internal_function +allocate_dtv (void *result) +{ + dtv_t *dtv; + size_t dtv_length; + + /* We allocate a few more elements in the dtv than are needed for the + initial set of modules. This should avoid in most cases expansions + of the dtv. */ + dtv_length = _dl_tls_max_dtv_idx + DTV_SURPLUS; + dtv = _dl_calloc (dtv_length + 2, sizeof (dtv_t)); + if (dtv != NULL) + { + /* This is the initial length of the dtv. */ + dtv[0].counter = dtv_length; + + /* The rest of the dtv (including the generation counter) is + Initialize with zero to indicate nothing there. */ + + /* Add the dtv to the thread data structures. */ + INSTALL_DTV (result, dtv); + } + else + result = NULL; + + return result; +} + +/* Get size and alignment requirements of the static TLS block. */ +void +internal_function +_dl_get_tls_static_info (size_t *sizep, size_t *alignp) +{ + *sizep = _dl_tls_static_size; + *alignp = _dl_tls_static_align; +} + +void * +internal_function +_dl_allocate_tls_storage (void) +{ + void *result; + size_t size = _dl_tls_static_size; + +# if defined(TLS_DTV_AT_TP) + /* Memory layout is: + [ TLS_PRE_TCB_SIZE ] [ TLS_TCB_SIZE ] [ TLS blocks ] + ^ This should be returned. */ + size += (TLS_PRE_TCB_SIZE + _dl_tls_static_align - 1) + & ~(_dl_tls_static_align - 1); +# endif + + /* Allocate a correctly aligned chunk of memory. */ + result = _dl_memalign (_dl_tls_static_align, size); + if (__builtin_expect (result != NULL, 1)) + { + /* Allocate the DTV. */ + void *allocated = result; + +# ifdef TLS_TCB_AT_TP + /* The TCB follows the TLS blocks. */ + result = (char *) result + size - TLS_TCB_SIZE; + + /* Clear the TCB data structure. We can't ask the caller (i.e. + libpthread) to do it, because we will initialize the DTV et al. */ + _dl_memset (result, '\0', TLS_TCB_SIZE); +# elif defined(TLS_DTV_AT_TP) + result = (char *) result + size - _dl_tls_static_size; + + /* Clear the TCB data structure and TLS_PRE_TCB_SIZE bytes before it. + We can't ask the caller (i.e. libpthread) to do it, because we will + initialize the DTV et al. */ + _dl_memset ((char *) result - TLS_PRE_TCB_SIZE, '\0', + TLS_PRE_TCB_SIZE + TLS_TCB_SIZE); +# endif + + result = allocate_dtv (result); + if (result == NULL) + _dl_free (allocated); + } + + return result; +} + +void * +internal_function +_dl_allocate_tls_init (void *result) +{ + if (result == NULL) + /* The memory allocation failed. */ + return NULL; + + dtv_t *dtv = GET_DTV (result); + struct dtv_slotinfo_list *listp; + size_t total = 0; + size_t maxgen = 0; + + /* We have to prepare the dtv for all currently loaded modules using + TLS. For those which are dynamically loaded we add the values + indicating deferred allocation. */ + listp = _dl_tls_dtv_slotinfo_list; + while (1) + { + size_t cnt; + + for (cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt) + { + struct link_map *map; + void *dest; + + /* Check for the total number of used slots. */ + if (total + cnt > _dl_tls_max_dtv_idx) + break; + + map = listp->slotinfo[cnt].map; + if (map == NULL) + /* Unused entry. */ + continue; + + /* Keep track of the maximum generation number. This might + not be the generation counter. */ + maxgen = MAX (maxgen, listp->slotinfo[cnt].gen); + + if (map->l_tls_offset == NO_TLS_OFFSET) + { + /* For dynamically loaded modules we simply store + the value indicating deferred allocation. */ + dtv[map->l_tls_modid].pointer.val = TLS_DTV_UNALLOCATED; + dtv[map->l_tls_modid].pointer.is_static = false; + continue; + } + + _dl_assert (map->l_tls_modid == cnt); + _dl_assert (map->l_tls_blocksize >= map->l_tls_initimage_size); +# ifdef TLS_TCB_AT_TP + _dl_assert ((size_t) map->l_tls_offset >= map->l_tls_blocksize); + dest = (char *) result - map->l_tls_offset; +# elif defined(TLS_DTV_AT_TP) + dest = (char *) result + map->l_tls_offset; +# else +# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined" +# endif + + /* Copy the initialization image and clear the BSS part. */ + dtv[map->l_tls_modid].pointer.val = dest; + dtv[map->l_tls_modid].pointer.is_static = true; + _dl_memcpy(dest, map->l_tls_initimage, map->l_tls_initimage_size); + _dl_memset((dest + map->l_tls_initimage_size), '\0', + map->l_tls_blocksize - map->l_tls_initimage_size); + + } + + total += cnt; + if (total >= _dl_tls_max_dtv_idx) + break; + + listp = listp->next; + _dl_assert (listp != NULL); + } + + /* The DTV version is up-to-date now. */ + dtv[0].counter = maxgen; + + return result; +} + +void * +internal_function +_dl_allocate_tls (void *mem) +{ + return _dl_allocate_tls_init (mem == NULL + ? _dl_allocate_tls_storage () + : allocate_dtv (mem)); +} + +void +internal_function +_dl_deallocate_tls (void *tcb, bool dealloc_tcb) +{ + dtv_t *dtv = GET_DTV (tcb); + size_t cnt; + + /* We need to free the memory allocated for non-static TLS. */ + for (cnt = 0; cnt < dtv[-1].counter; ++cnt) + if (! dtv[1 + cnt].pointer.is_static + && dtv[1 + cnt].pointer.val != TLS_DTV_UNALLOCATED) + _dl_free (dtv[1 + cnt].pointer.val); + + /* The array starts with dtv[-1]. */ + if (dtv != _dl_initial_dtv) + _dl_free (dtv - 1); + + if (dealloc_tcb) + { +# ifdef TLS_TCB_AT_TP + /* The TCB follows the TLS blocks. Back up to free the whole block. */ + tcb -= _dl_tls_static_size - TLS_TCB_SIZE; +# elif defined(TLS_DTV_AT_TP) + /* Back up the TLS_PRE_TCB_SIZE bytes. */ + tcb -= (TLS_PRE_TCB_SIZE + _dl_tls_static_align - 1) + & ~(_dl_tls_static_align - 1); +# endif + _dl_free (tcb); + } +} + +static void * +allocate_and_init (struct link_map *map) +{ + void *newp; + + newp = _dl_memalign (map->l_tls_align, map->l_tls_blocksize); + if (newp == NULL) + { + _dl_dprintf(2, "%s:%d: Out of memory!!!\n", __FUNCTION__, __LINE__); + _dl_exit(1); + } + + /* Initialize the memory. */ + _dl_memcpy (newp, map->l_tls_initimage, map->l_tls_initimage_size); + _dl_memset ((newp + map->l_tls_initimage_size), '\0', + map->l_tls_blocksize - map->l_tls_initimage_size); + + return newp; +} + +struct link_map * +_dl_update_slotinfo (unsigned long int req_modid) +{ + struct link_map *the_map = NULL; + dtv_t *dtv = THREAD_DTV (); + + /* The global dl_tls_dtv_slotinfo array contains for each module + index the generation counter current when the entry was created. + This array never shrinks so that all module indices which were + valid at some time can be used to access it. Before the first + use of a new module index in this function the array was extended + appropriately. Access also does not have to be guarded against + modifications of the array. It is assumed that pointer-size + values can be read atomically even in SMP environments. It is + possible that other threads at the same time dynamically load + code and therefore add to the slotinfo list. This is a problem + since we must not pick up any information about incomplete work. + The solution to this is to ignore all dtv slots which were + created after the one we are currently interested. We know that + dynamic loading for this module is completed and this is the last + load operation we know finished. */ + unsigned long int idx = req_modid; + struct dtv_slotinfo_list *listp = _dl_tls_dtv_slotinfo_list; + + _dl_debug_early ("Updating slotinfo for module %d\n", req_modid); + + while (idx >= listp->len) + { + idx -= listp->len; + listp = listp->next; + } + + if (dtv[0].counter < listp->slotinfo[idx].gen) + { + /* The generation counter for the slot is higher than what the + current dtv implements. We have to update the whole dtv but + only those entries with a generation counter <= the one for + the entry we need. */ + size_t new_gen = listp->slotinfo[idx].gen; + size_t total = 0; + + /* We have to look through the entire dtv slotinfo list. */ + listp = _dl_tls_dtv_slotinfo_list; + do + { + size_t cnt; + + for (cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt) + { + size_t gen = listp->slotinfo[cnt].gen; + + if (gen > new_gen) + /* This is a slot for a generation younger than the + one we are handling now. It might be incompletely + set up so ignore it. */ + continue; + + /* If the entry is older than the current dtv layout we + know we don't have to handle it. */ + if (gen <= dtv[0].counter) + continue; + + /* If there is no map this means the entry is empty. */ + struct link_map *map = listp->slotinfo[cnt].map; + if (map == NULL) + { + /* If this modid was used at some point the memory + might still be allocated. */ + if (! dtv[total + cnt].pointer.is_static + && dtv[total + cnt].pointer.val != TLS_DTV_UNALLOCATED) + { + _dl_free (dtv[total + cnt].pointer.val); + dtv[total + cnt].pointer.val = TLS_DTV_UNALLOCATED; + } + + continue; + } + + /* Check whether the current dtv array is large enough. */ + size_t modid = map->l_tls_modid; + _dl_assert (total + cnt == modid); + if (dtv[-1].counter < modid) + { + /* Reallocate the dtv. */ + dtv_t *newp; + size_t newsize = _dl_tls_max_dtv_idx + DTV_SURPLUS; + size_t oldsize = dtv[-1].counter; + + _dl_assert (map->l_tls_modid <= newsize); + + if (dtv == _dl_initial_dtv) + { + /* This is the initial dtv that was allocated + during rtld startup using the dl-minimal.c + malloc instead of the real malloc. We can't + free it, we have to abandon the old storage. */ + + newp = _dl_malloc ((2 + newsize) * sizeof (dtv_t)); + if (newp == NULL) + oom (); + _dl_memcpy (newp, &dtv[-1], oldsize * sizeof (dtv_t)); + } + else + { + newp = _dl_realloc (&dtv[-1], + (2 + newsize) * sizeof (dtv_t)); + if (newp == NULL) + oom (); + } + + newp[0].counter = newsize; + + /* Clear the newly allocated part. */ + _dl_memset (newp + 2 + oldsize, '\0', + (newsize - oldsize) * sizeof (dtv_t)); + + /* Point dtv to the generation counter. */ + dtv = &newp[1]; + + /* Install this new dtv in the thread data + structures. */ + INSTALL_NEW_DTV (dtv); + } + + /* If there is currently memory allocate for this + dtv entry free it. */ + /* XXX Ideally we will at some point create a memory + pool. */ + if (! dtv[modid].pointer.is_static + && dtv[modid].pointer.val != TLS_DTV_UNALLOCATED) + /* Note that free is called for NULL is well. We + deallocate even if it is this dtv entry we are + supposed to load. The reason is that we call + memalign and not malloc. */ + _dl_free (dtv[modid].pointer.val); + + /* This module is loaded dynamically- We defer memory + allocation. */ + dtv[modid].pointer.is_static = false; + dtv[modid].pointer.val = TLS_DTV_UNALLOCATED; + + if (modid == req_modid) + the_map = map; + } + + total += listp->len; + } + while ((listp = listp->next) != NULL); + + /* This will be the new maximum generation counter. */ + dtv[0].counter = new_gen; + } + + return the_map; +} + + +/* The generic dynamic and local dynamic model cannot be used in + statically linked applications. */ +void * +__tls_get_addr (GET_ADDR_ARGS) +{ + dtv_t *dtv = THREAD_DTV (); + struct link_map *the_map = NULL; + void *p; + + if (__builtin_expect (dtv[0].counter != _dl_tls_generation, 0)) + the_map = _dl_update_slotinfo (GET_ADDR_MODULE); + + p = dtv[GET_ADDR_MODULE].pointer.val; + + if (__builtin_expect (p == TLS_DTV_UNALLOCATED, 0)) + { + /* The allocation was deferred. Do it now. */ + if (the_map == NULL) + { + /* Find the link map for this module. */ + size_t idx = GET_ADDR_MODULE; + struct dtv_slotinfo_list *listp = _dl_tls_dtv_slotinfo_list; + + while (idx >= listp->len) + { + idx -= listp->len; + listp = listp->next; + } + + the_map = listp->slotinfo[idx].map; + } + + p = dtv[GET_ADDR_MODULE].pointer.val = allocate_and_init (the_map); + dtv[GET_ADDR_MODULE].pointer.is_static = false; + } + + return (char *) p + GET_ADDR_OFFSET; +} + +void +_dl_add_to_slotinfo (struct link_map *l) +{ + /* Now that we know the object is loaded successfully add + modules containing TLS data to the dtv info table. We + might have to increase its size. */ + struct dtv_slotinfo_list *listp; + struct dtv_slotinfo_list *prevp; + size_t idx = l->l_tls_modid; + + _dl_debug_early("Adding to slotinfo for %s\n", l->l_name); + + /* Find the place in the dtv slotinfo list. */ + listp = _dl_tls_dtv_slotinfo_list; + prevp = NULL; /* Needed to shut up gcc. */ + do + { + /* Does it fit in the array of this list element? */ + if (idx < listp->len) + break; + idx -= listp->len; + prevp = listp; + listp = listp->next; + } + while (listp != NULL); + + if (listp == NULL) + { + /* When we come here it means we have to add a new element + to the slotinfo list. And the new module must be in + the first slot. */ + _dl_assert (idx == 0); + + listp = prevp->next = (struct dtv_slotinfo_list *) + _dl_malloc (sizeof (struct dtv_slotinfo_list) + + TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo)); + if (listp == NULL) + { + /* We ran out of memory. We will simply fail this + call but don't undo anything we did so far. The + application will crash or be terminated anyway very + soon. */ + + /* We have to do this since some entries in the dtv + slotinfo array might already point to this + generation. */ + ++_dl_tls_generation; + + _dl_dprintf (_dl_debug_file, + "cannot create TLS data structures: ABORT\n"); + _dl_exit (127); + } + + listp->len = TLS_SLOTINFO_SURPLUS; + listp->next = NULL; + _dl_memset (listp->slotinfo, '\0', + TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo)); + } + + /* Add the information into the slotinfo data structure. */ + listp->slotinfo[idx].map = l; + listp->slotinfo[idx].gen = _dl_tls_generation + 1; + /* ??? ideally this would be done once per call to dlopen. However there's + no easy way to indicate whether a library used TLS, so do it here + instead. */ + /* Bump the TLS generation number. */ + _dl_tls_generation++; +} + +/* Taken from glibc/elf/rtld.c */ +static bool tls_init_tp_called; + +/* _dl_error_catch_tsd points to this for the single-threaded case. + It's reset by the thread library for multithreaded programs. */ +void ** __attribute__ ((const)) +_dl_initial_error_catch_tsd (void) +{ + static void *data; + return &data; +} + +#ifdef SHARED +void* +internal_function +init_tls (void); + +rtld_hidden_proto(init_tls) +void * +internal_function +init_tls (void) +{ + /* Number of elements in the static TLS block. */ + _dl_tls_static_nelem = _dl_tls_max_dtv_idx; + + /* Do not do this twice. The audit interface might have required + the DTV interfaces to be set up early. */ + if (_dl_initial_dtv != NULL) + return NULL; + + /* Allocate the array which contains the information about the + dtv slots. We allocate a few entries more than needed to + avoid the need for reallocation. */ + size_t nelem = _dl_tls_max_dtv_idx + 1 + TLS_SLOTINFO_SURPLUS; + + /* Allocate. */ + _dl_assert (_dl_tls_dtv_slotinfo_list == NULL); + _dl_tls_dtv_slotinfo_list = (struct dtv_slotinfo_list *) + _dl_calloc (sizeof (struct dtv_slotinfo_list) + + nelem * sizeof (struct dtv_slotinfo), 1); + /* No need to check the return value. If memory allocation failed + the program would have been terminated. */ + + struct dtv_slotinfo *slotinfo = _dl_tls_dtv_slotinfo_list->slotinfo; + _dl_tls_dtv_slotinfo_list->len = nelem; + _dl_tls_dtv_slotinfo_list->next = NULL; + + /* Fill in the information from the loaded modules. No namespace + but the base one can be filled at this time. */ + int i = 0; + struct link_map *l; + for (l = (struct link_map *) _dl_loaded_modules; l != NULL; l = l->l_next) + if (l->l_tls_blocksize != 0) + { + /* This is a module with TLS data. Store the map reference. + The generation counter is zero. */ + + /* Skeep slot[0]: it will be never used */ + slotinfo[++i].map = l; + } + _dl_assert (i == _dl_tls_max_dtv_idx); + + /* Compute the TLS offsets for the various blocks. */ + _dl_determine_tlsoffset (); + + /* Construct the static TLS block and the dtv for the initial + thread. For some platforms this will include allocating memory + for the thread descriptor. The memory for the TLS block will + never be freed. It should be allocated accordingly. The dtv + array can be changed if dynamic loading requires it. */ + void *tcbp = _dl_allocate_tls_storage (); + if (tcbp == NULL) { + _dl_debug_early("\ncannot allocate TLS data structures for initial thread"); + _dl_exit(30); + } + + /* Store for detection of the special case by __tls_get_addr + so it knows not to pass this dtv to the normal realloc. */ + _dl_initial_dtv = GET_DTV (tcbp); + + /* And finally install it for the main thread. If ld.so itself uses + TLS we know the thread pointer was initialized earlier. */ + const char *lossage = TLS_INIT_TP (tcbp, USE___THREAD); + if(__builtin_expect (lossage != NULL, 0)) { + _dl_debug_early("cannot set up thread-local storage: %s\n", lossage); + _dl_exit(30); + } + tls_init_tp_called = true; + + return tcbp; +} +rtld_hidden_def (init_tls) +#endif + diff --git a/ldso/ldso/i386/dl-sysdep.h b/ldso/ldso/i386/dl-sysdep.h index 6e84861e4..a66c80212 100644 --- a/ldso/ldso/i386/dl-sysdep.h +++ b/ldso/ldso/i386/dl-sysdep.h @@ -31,7 +31,9 @@ extern unsigned long _dl_linux_resolver(struct elf_resolve * tpnt, int reloc_ent ELF_RTYPE_CLASS_NOCOPY iff TYPE should not be allowed to resolve to one of the main executable's symbols, as for a COPY reloc. */ #define elf_machine_type_class(type) \ - ((((type) == R_386_JMP_SLOT) * ELF_RTYPE_CLASS_PLT) \ + ((((type) == R_386_JMP_SLOT || (type) == R_386_TLS_DTPMOD32 \ + || (type) == R_386_TLS_DTPOFF32 || (type) == R_386_TLS_TPOFF32 \ + || (type) == R_386_TLS_TPOFF) * ELF_RTYPE_CLASS_PLT) \ | (((type) == R_386_COPY) * ELF_RTYPE_CLASS_COPY)) /* Return the link-time address of _DYNAMIC. Conveniently, this is the @@ -55,9 +57,10 @@ elf_machine_load_address (void) it to assembly. We need a dummy reference to some global variable via the GOT to make sure the compiler initialized %ebx in time. */ Elf32_Addr addr; + int tmp; __asm__ ("leal _dl_start@GOTOFF(%%ebx), %0\n" "subl _dl_start@GOT(%%ebx), %0" - : "=r" (addr) : "m" (_dl_errno) : "cc"); + : "=r" (addr) : "m" (tmp) : "cc"); return addr; } diff --git a/ldso/ldso/i386/elfinterp.c b/ldso/ldso/i386/elfinterp.c index 94f7405e1..af0b397d0 100644 --- a/ldso/ldso/i386/elfinterp.c +++ b/ldso/ldso/i386/elfinterp.c @@ -71,7 +71,7 @@ _dl_linux_resolver(struct elf_resolve *tpnt, int reloc_entry) got_addr = (char **)instr_addr; /* Get the address of the GOT entry. */ - new_addr = _dl_find_hash(symname, tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT); + new_addr = _dl_find_hash(symname, tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT, NULL); if (unlikely(!new_addr)) { _dl_dprintf(2, "%s: can't resolve symbol '%s' in lib '%s'.\n", _dl_progname, symname, tpnt->libname); _dl_exit(1); @@ -162,6 +162,7 @@ _dl_do_reloc(struct elf_resolve *tpnt, struct dyn_elf *scope, int reloc_type; int symtab_index; char *symname; + struct elf_resolve *tls_tpnt = 0; unsigned long *reloc_addr; unsigned long symbol_addr; #if defined (__SUPPORT_LD_DEBUG__) @@ -176,16 +177,21 @@ _dl_do_reloc(struct elf_resolve *tpnt, struct dyn_elf *scope, if (symtab_index) { symbol_addr = (unsigned long)_dl_find_hash(symname, scope, tpnt, - elf_machine_type_class(reloc_type)); + elf_machine_type_class(reloc_type), &tls_tpnt); /* * We want to allow undefined references to weak symbols - this * might have been intentional. We should not be linking local * symbols here, so all bases should be covered. */ - if (unlikely(!symbol_addr && ELF32_ST_BIND(symtab[symtab_index].st_info) != STB_WEAK)) + if (unlikely(!symbol_addr && (ELF_ST_TYPE(symtab[symtab_index].st_info) != STT_TLS) + && ELF32_ST_BIND(symtab[symtab_index].st_info) != STB_WEAK)) return 1; + } else { + symbol_addr = symtab[symtab_index].st_value; + tls_tpnt = tpnt; } + #if defined (__SUPPORT_LD_DEBUG__) old_val = *reloc_addr; @@ -222,6 +228,26 @@ _dl_do_reloc(struct elf_resolve *tpnt, struct dyn_elf *scope, symtab[symtab_index].st_size); } break; +#if USE_TLS + case R_386_TLS_DTPMOD32: + *reloc_addr = tls_tpnt->l_tls_modid; + break; + case R_386_TLS_DTPOFF32: + /* During relocation all TLS symbols are defined and used. + * Therefore the offset is already correct. */ + *reloc_addr = symbol_addr; + break; + case R_386_TLS_TPOFF32: + /* The offset is positive, backward from the thread pointer. */ + CHECK_STATIC_TLS((struct link_map*) tls_tpnt); + *reloc_addr += tls_tpnt->l_tls_offset - symbol_addr; + break; + case R_386_TLS_TPOFF: + /* The offset is negative, forward from the thread pointer. */ + CHECK_STATIC_TLS((struct link_map*) tls_tpnt); + *reloc_addr += symbol_addr - tls_tpnt->l_tls_offset; + break; +#endif default: return -1; } diff --git a/ldso/ldso/ldso.c b/ldso/ldso/ldso.c index 786775a44..485204298 100644 --- a/ldso/ldso/ldso.c +++ b/ldso/ldso/ldso.c @@ -38,6 +38,10 @@ #define ALLOW_ZERO_PLTGOT +#if USE_TLS +#include "dl-tls.c" +#endif + /* Pull in the value of _dl_progname */ #include LDSO_ELFINTERP @@ -96,13 +100,15 @@ extern void _start(void); #ifdef __UCLIBC_HAS_SSP__ # include -static uintptr_t stack_chk_guard; +uintptr_t stack_chk_guard; # ifndef THREAD_SET_STACK_GUARD /* Only exported for architectures that don't store the stack guard canary * in local thread area. */ uintptr_t __stack_chk_guard attribute_relro; -# endif -# ifdef __UCLIBC_HAS_SSP_COMPAT__ +# ifdef __UCLIBC_HAS_SSP_COMPAT__ +strong_alias(__stack_chk_guard,__guard) +# endif +# elif __UCLIBC_HAS_SSP_COMPAT__ uintptr_t __guard attribute_relro; # endif #endif @@ -213,11 +219,31 @@ static void *_dl_zalloc(size_t size) return p; } -void _dl_free (void *p) +#if USE_TLS +void * _dl_memalign (size_t __boundary, size_t __size) { - if (_dl_free_function) - (*_dl_free_function) (p); + void *result; + int i = 0; + size_t delta; + size_t rounded = 0; + + if (_dl_memalign_function) + return (*_dl_memalign_function) (__boundary, __size); + + while (rounded < __boundary) { + rounded = (1 << i++); + } + + delta = (((size_t) _dl_malloc_addr + __size) & (rounded - 1)); + + if ((result = _dl_malloc(rounded - delta)) == NULL) + return result; + + result = _dl_malloc(__size); + + return result; } +#endif static void __attribute__ ((destructor)) __attribute_used__ _dl_fini(void) { @@ -262,6 +288,10 @@ void _dl_get_ready_to_run(struct elf_resolve *tpnt, DL_LOADADDR_TYPE load_addr, ElfW(Addr) relro_addr = 0; size_t relro_size = 0; struct stat st; +#if USE_TLS + void *tcbp = NULL; +#endif + /* Wahoo!!! We managed to make a function call! Get malloc * setup so we can use _dl_dprintf() to print debug noise @@ -336,18 +366,22 @@ void _dl_get_ready_to_run(struct elf_resolve *tpnt, DL_LOADADDR_TYPE load_addr, unlazy = RTLD_NOW; } - /* sjhill: your TLS init should go before this */ +#if USE_TLS + _dl_error_catch_tsd = &_dl_initial_error_catch_tsd; + _dl_init_static_tls = &_dl_nothread_init_static_tls; +#endif + #ifdef __UCLIBC_HAS_SSP__ /* Set up the stack checker's canary. */ stack_chk_guard = _dl_setup_stack_chk_guard (); # ifdef THREAD_SET_STACK_GUARD THREAD_SET_STACK_GUARD (stack_chk_guard); +# ifdef __UCLIBC_HAS_SSP_COMPAT__ + __guard = stack_chk_guard; +# endif # else __stack_chk_guard = stack_chk_guard; # endif -# ifdef __UCLIBC_HAS_SSP_COMPAT__ - __guard = stack_chk_guard; -# endif #endif /* At this point we are now free to examine the user application, @@ -461,10 +495,53 @@ void _dl_get_ready_to_run(struct elf_resolve *tpnt, DL_LOADADDR_TYPE load_addr, _dl_debug_early("Lib Loader: (%x) %s\n", (unsigned) DL_LOADADDR_BASE(tpnt->loadaddr), tpnt->libname); } + + /* Discover any TLS sections if the target supports them. */ + if (ppnt->p_type == PT_TLS) { +#if USE_TLS + if (ppnt->p_memsz > 0) { + app_tpnt->l_tls_blocksize = ppnt->p_memsz; + app_tpnt->l_tls_align = ppnt->p_align; + if (ppnt->p_align == 0) + app_tpnt->l_tls_firstbyte_offset = 0; + else + app_tpnt->l_tls_firstbyte_offset = + (ppnt->p_vaddr & (ppnt->p_align - 1)); + app_tpnt->l_tls_initimage_size = ppnt->p_filesz; + app_tpnt->l_tls_initimage = (void *) ppnt->p_vaddr; + + /* This image gets the ID one. */ + _dl_tls_max_dtv_idx = app_tpnt->l_tls_modid = 1; + + } + _dl_debug_early("Found TLS header for appplication program\n"); + break; +#else + _dl_dprintf(_dl_debug_file, "Program uses unsupported TLS data!\n"); + _dl_exit(1); +#endif + } } app_tpnt->relro_addr = relro_addr; app_tpnt->relro_size = relro_size; +#if USE_TLS + /* + * Adjust the address of the TLS initialization image in + * case the executable is actually an ET_DYN object. + */ + if (app_tpnt->l_tls_initimage != NULL) + { +#ifdef __SUPPORT_LD_DEBUG_EARLY__ + unsigned int tmp = (unsigned int) app_tpnt->l_tls_initimage; +#endif + app_tpnt->l_tls_initimage = + (char *) app_tpnt->l_tls_initimage + app_tpnt->loadaddr; + _dl_debug_early("Relocated TLS initial image from %x to %x (size = %x)\n", tmp, app_tpnt->l_tls_initimage, app_tpnt->l_tls_initimage_size); + + } +#endif + #ifdef __SUPPORT_LD_DEBUG__ _dl_debug = _dl_getenv("LD_DEBUG", envp); if (_dl_debug) { @@ -603,6 +680,7 @@ void _dl_get_ready_to_run(struct elf_resolve *tpnt, DL_LOADADDR_TYPE load_addr, #ifdef __LDSO_PRELOAD_FILE_SUPPORT__ do { + struct stat st; char *preload; int fd; char c, *cp, *cp2; @@ -850,6 +928,22 @@ void _dl_get_ready_to_run(struct elf_resolve *tpnt, DL_LOADADDR_TYPE load_addr, } #endif +#if USE_TLS + /* We do not initialize any of the TLS functionality unless any of the + * initial modules uses TLS. This makes dynamic loading of modules with + * TLS impossible, but to support it requires either eagerly doing setup + * now or lazily doing it later. Doing it now makes us incompatible with + * an old kernel that can't perform TLS_INIT_TP, even if no TLS is ever + * used. Trying to do it lazily is too hairy to try when there could be + * multiple threads (from a non-TLS-using libpthread). */ + bool was_tls_init_tp_called = tls_init_tp_called; + if (tcbp == NULL) + { + _dl_debug_early("Calling init_tls()!\n"); + tcbp = init_tls (); + } +#endif + _dl_debug_early("Beginning relocation fixups\n"); #ifdef __mips__ @@ -875,6 +969,30 @@ void _dl_get_ready_to_run(struct elf_resolve *tpnt, DL_LOADADDR_TYPE load_addr, _dl_protect_relro (tpnt); } +#if USE_TLS + if (!was_tls_init_tp_called && _dl_tls_max_dtv_idx > 0) + ++_dl_tls_generation; + + _dl_debug_early("Calling _dl_allocate_tls_init()!\n"); + + /* Now that we have completed relocation, the initializer data + for the TLS blocks has its final values and we can copy them + into the main thread's TLS area, which we allocated above. */ + _dl_allocate_tls_init (tcbp); + + /* And finally install it for the main thread. If ld.so itself uses + TLS we know the thread pointer was initialized earlier. */ + if (! tls_init_tp_called) + { + const char *lossage = (char *) TLS_INIT_TP (tcbp, USE___THREAD); + if (__builtin_expect (lossage != NULL, 0)) + { + _dl_debug_early("cannot set up thread-local storage: %s\n", lossage); + _dl_exit(30); + } + } +#endif /* USE_TLS */ + /* OK, at this point things are pretty much ready to run. Now we need * to touch up a few items that are required, and then we can let the * user application have at it. Note that the dynamic linker itself @@ -882,7 +1000,7 @@ void _dl_get_ready_to_run(struct elf_resolve *tpnt, DL_LOADADDR_TYPE load_addr, * ld.so.1, so we have to look up each symbol individually. */ - _dl_envp = (unsigned long *) (intptr_t) _dl_find_hash(__C_SYMBOL_PREFIX__ "__environ", _dl_symbol_tables, NULL, 0); + _dl_envp = (unsigned long *) (intptr_t) _dl_find_hash(__C_SYMBOL_PREFIX__ "__environ", _dl_symbol_tables, NULL, 0, NULL); if (_dl_envp) *_dl_envp = (unsigned long) envp; @@ -938,7 +1056,23 @@ void _dl_get_ready_to_run(struct elf_resolve *tpnt, DL_LOADADDR_TYPE load_addr, /* Find the real malloc function and make ldso functions use that from now on */ _dl_malloc_function = (void* (*)(size_t)) (intptr_t) _dl_find_hash(__C_SYMBOL_PREFIX__ "malloc", - _dl_symbol_tables, NULL, ELF_RTYPE_CLASS_PLT); + _dl_symbol_tables, NULL, ELF_RTYPE_CLASS_PLT, NULL); + +#if USE_TLS + /* Find the real functions and make ldso functions use them from now on */ + _dl_calloc_function = (void* (*)(size_t, size_t)) (intptr_t) + _dl_find_hash(__C_SYMBOL_PREFIX__ "calloc", _dl_symbol_tables, NULL, ELF_RTYPE_CLASS_PLT, NULL); + + _dl_realloc_function = (void* (*)(void *, size_t)) (intptr_t) + _dl_find_hash(__C_SYMBOL_PREFIX__ "realloc", _dl_symbol_tables, NULL, ELF_RTYPE_CLASS_PLT, NULL); + + _dl_free_function = (void (*)(void *)) (intptr_t) + _dl_find_hash(__C_SYMBOL_PREFIX__ "free", _dl_symbol_tables, NULL, ELF_RTYPE_CLASS_PLT, NULL); + + _dl_memalign_function = (void* (*)(size_t, size_t)) (intptr_t) + _dl_find_hash(__C_SYMBOL_PREFIX__ "memalign", _dl_symbol_tables, NULL, ELF_RTYPE_CLASS_PLT, NULL); + +#endif /* Notify the debugger that all objects are now mapped in. */ _dl_debug_addr->r_state = RT_CONSISTENT; diff --git a/ldso/ldso/mips/elfinterp.c b/ldso/ldso/mips/elfinterp.c index 8d9b7c413..97a86a137 100644 --- a/ldso/ldso/mips/elfinterp.c +++ b/ldso/ldso/mips/elfinterp.c @@ -56,7 +56,7 @@ unsigned long __dl_runtime_resolve(unsigned long sym_index, symname = strtab + sym->st_name; new_addr = (unsigned long) _dl_find_hash(symname, - tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT); + tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT, NULL); if (unlikely(!new_addr)) { _dl_dprintf (2, "%s: can't resolve symbol '%s'\n", _dl_progname, symname); @@ -111,7 +111,7 @@ __dl_runtime_pltresolve(struct elf_resolve *tpnt, int reloc_entry) got_addr = (char **)instr_addr; /* Get the address of the GOT entry. */ - new_addr = _dl_find_hash(symname, tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT); + new_addr = _dl_find_hash(symname, tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT, NULL); if (unlikely(!new_addr)) { _dl_dprintf(2, "%s: can't resolve symbol '%s' in lib '%s'.\n", _dl_progname, symname, tpnt->libname); _dl_exit(1); @@ -188,13 +188,66 @@ int _dl_parse_relocation_information(struct dyn_elf *xpnt, symbol_addr = (unsigned long)_dl_find_hash(symname, tpnt->symbol_scope, tpnt, - elf_machine_type_class(reloc_type)); + elf_machine_type_class(reloc_type), NULL); if (unlikely(!symbol_addr && ELF32_ST_BIND(symtab[symtab_index].st_info) != STB_WEAK)) return 1; } switch (reloc_type) { -#if _MIPS_SIM == _MIPS_SIM_ABI64 +#if USE_TLS +# if _MIPS_SIM == _MIPS_SIM_ABI64 + case R_MIPS_TLS_DTPMOD64: + case R_MIPS_TLS_DTPREL64: + case R_MIPS_TLS_TPREL64: +# else + case R_MIPS_TLS_DTPMOD32: + case R_MIPS_TLS_DTPREL32: + case R_MIPS_TLS_TPREL32: +# endif + { + ElfW(Sym) *sym_tls = &symtab[symtab_index]; + struct elf_resolve *tpnt_tls = tpnt; + + if (ELF32_ST_BIND(symtab[symtab_index].st_info) != STB_LOCAL) { + _dl_find_hash((strtab + symtab[symtab_index].st_name), + _dl_symbol_tables, tpnt_tls, 1, &sym_tls); + } + + switch (reloc_type) + { + case R_MIPS_TLS_DTPMOD64: + case R_MIPS_TLS_DTPMOD32: + if (tpnt_tls) + *(ElfW(Word) *)reloc_addr = tpnt_tls->l_tls_modid; +#if defined (__SUPPORT_LD_DEBUG__) +_dl_dprintf(2, "TLS_DTPMOD : %s, %d, %d\n", (strtab + symtab[symtab_index].st_name), old_val, *((unsigned int *)reloc_addr)); +#endif + break; + + case R_MIPS_TLS_DTPREL64: + case R_MIPS_TLS_DTPREL32: + *(ElfW(Word) *)reloc_addr += + TLS_DTPREL_VALUE (sym_tls); +#if defined (__SUPPORT_LD_DEBUG__) +_dl_dprintf(2, "TLS_DTPREL : %s, %x, %x\n", (strtab + symtab[symtab_index].st_name), old_val, *((unsigned int *)reloc_addr)); +#endif + break; + + case R_MIPS_TLS_TPREL32: + case R_MIPS_TLS_TPREL64: + CHECK_STATIC_TLS((struct link_map *)tpnt_tls); + *(ElfW(Word) *)reloc_addr += + TLS_TPREL_VALUE (tpnt_tls, sym_tls); +#if defined (__SUPPORT_LD_DEBUG__) +_dl_dprintf(2, "TLS_TPREL : %s, %x, %x\n", (strtab + symtab[symtab_index].st_name), old_val, *((unsigned int *)reloc_addr)); +#endif + break; + } + + break; + } +#endif /* USE_TLS */ +#if _MIPS_SIM == _MIS_SIM_ABI64 case (R_MIPS_64 << 8) | R_MIPS_REL32: #else /* O32 || N32 */ case R_MIPS_REL32: @@ -241,9 +294,9 @@ int _dl_parse_relocation_information(struct dyn_elf *xpnt, _dl_dprintf(2, "symbol '%s': ", strtab + symtab[symtab_index].st_name); #if defined (__SUPPORT_LD_DEBUG__) - _dl_dprintf(2, "can't handle reloc type %s\n ", _dl_reltypes(reloc_type)); + _dl_dprintf(2, "can't handle reloc type '%s' in lib '%s'\n", _dl_reltypes(reloc_type), tpnt->libname); #else - _dl_dprintf(2, "can't handle reloc type %x\n", reloc_type); + _dl_dprintf(2, "can't handle reloc type %x in lib '%s'\n", reloc_type, tpnt->libname); #endif _dl_exit(1); } @@ -292,12 +345,12 @@ void _dl_perform_mips_global_got_relocations(struct elf_resolve *tpnt, int lazy) } else { *got_entry = (unsigned long) _dl_find_hash(strtab + - sym->st_name, tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT); + sym->st_name, tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT, NULL); } } else if (sym->st_shndx == SHN_COMMON) { *got_entry = (unsigned long) _dl_find_hash(strtab + - sym->st_name, tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT); + sym->st_name, tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT, NULL); } else if (ELF_ST_TYPE(sym->st_info) == STT_FUNC && *got_entry != sym->st_value && tmp_lazy) { @@ -309,7 +362,7 @@ void _dl_perform_mips_global_got_relocations(struct elf_resolve *tpnt, int lazy) } else { *got_entry = (unsigned long) _dl_find_hash(strtab + - sym->st_name, tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT); + sym->st_name, tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT, NULL); } got_entry++; diff --git a/ldso/ldso/sh/dl-debug.h b/ldso/ldso/sh/dl-debug.h index e862da1ee..e2e74f8e4 100644 --- a/ldso/ldso/sh/dl-debug.h +++ b/ldso/ldso/sh/dl-debug.h @@ -36,6 +36,8 @@ static const char *_dl_reltypes_tab[] = [25] "R_SH_SWITCH16","R_SH_SWITCH32","R_SH_USES", [28] "R_SH_COUNT", "R_SH_ALIGN", "R_SH_CODE", "R_SH_DATA", [32] "R_SH_LABEL", "R_SH_SWITCH8", "R_SH_GNU_VTINHERIT","R_SH_GNU_VTENTRY", +[144] "R_SH_TLS_GD_32","R_SH_TLS_LD_32", "R_SH_TLS_LDO_32", "R_SH_TLS_IE_32", +[148] "R_SH_TLS_LE_32","R_SH_TLS_DTPMOD32", "R_SH_TLS_DTPOFF32", "R_SH_TLS_TPOFF32", [160] "R_SH_GOT32", "R_SH_PLT32", "R_SH_COPY", "R_SH_GLOB_DAT", [164] "R_SH_JMP_SLOT","R_SH_RELATIVE","R_SH_GOTOFF", "R_SH_GOTPC", }; diff --git a/ldso/ldso/sh/dl-sysdep.h b/ldso/ldso/sh/dl-sysdep.h index d4fc78402..7937ceb08 100644 --- a/ldso/ldso/sh/dl-sysdep.h +++ b/ldso/ldso/sh/dl-sysdep.h @@ -6,6 +6,7 @@ /* Define this if the system uses RELOCA. */ #define ELF_USES_RELOCA #include +#include /* * Initialization sequence for a GOT. */ @@ -88,9 +89,17 @@ _dl_urem(unsigned int n, unsigned int base) define the value. ELF_RTYPE_CLASS_NOCOPY iff TYPE should not be allowed to resolve to one of the main executable's symbols, as for a COPY reloc. */ +#if defined USE_TLS +# define elf_machine_type_class(type) \ + ((((type) == R_SH_JMP_SLOT || (type) == R_SH_TLS_DTPMOD32 \ + || (type) == R_SH_TLS_DTPOFF32 || (type) == R_SH_TLS_TPOFF32) \ + * ELF_RTYPE_CLASS_PLT) \ + | (((type) == R_SH_COPY) * ELF_RTYPE_CLASS_COPY)) +#else #define elf_machine_type_class(type) \ ((((type) == R_SH_JMP_SLOT) * ELF_RTYPE_CLASS_PLT) \ | (((type) == R_SH_COPY) * ELF_RTYPE_CLASS_COPY)) +#endif /* Return the link-time address of _DYNAMIC. Conveniently, this is the first element of the GOT. This must be inlined in a function which diff --git a/ldso/ldso/sh/elfinterp.c b/ldso/ldso/sh/elfinterp.c index 964b2ea37..5f2db417f 100644 --- a/ldso/ldso/sh/elfinterp.c +++ b/ldso/ldso/sh/elfinterp.c @@ -69,7 +69,8 @@ unsigned long _dl_linux_resolver(struct elf_resolve *tpnt, int reloc_entry) got_addr = (char **) instr_addr; /* Get the address of the GOT entry */ - new_addr = _dl_find_hash(symname, tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT); + new_addr = _dl_find_hash(symname, tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT, NULL); + if (unlikely(!new_addr)) { _dl_dprintf(2, "%s: can't resolve symbol '%s'\n", _dl_progname, symname); _dl_exit(1); @@ -159,6 +160,9 @@ _dl_do_reloc (struct elf_resolve *tpnt,struct dyn_elf *scope, unsigned long old_val; #endif +struct elf_resolve *tls_tpnt = NULL; + + reloc_addr = (unsigned long *)(intptr_t) (tpnt->loadaddr + (unsigned long) rpnt->r_offset); reloc_type = ELF32_R_TYPE(rpnt->r_info); symtab_index = ELF32_R_SYM(rpnt->r_info); @@ -167,21 +171,18 @@ _dl_do_reloc (struct elf_resolve *tpnt,struct dyn_elf *scope, if (symtab_index) { symbol_addr = (unsigned long) _dl_find_hash(symname, scope, tpnt, - elf_machine_type_class(reloc_type)); - + elf_machine_type_class(reloc_type), &tls_tpnt); /* * We want to allow undefined references to weak symbols - this might * have been intentional. We should not be linking local symbols * here, so all bases should be covered. */ - if (!symbol_addr && ELF32_ST_BIND(symtab[symtab_index].st_info) != STB_WEAK) { + + if (!symbol_addr && (ELF_ST_TYPE(symtab[symtab_index].st_info) != STT_TLS) &&(ELF32_ST_BIND(symtab[symtab_index].st_info) != STB_WEAK)) { _dl_dprintf(2, "%s: can't resolve symbol '%s'\n", _dl_progname, strtab + symtab[symtab_index].st_name); - /* - * The caller should handle the error: undefined reference to weak symbols - * are not fatal. - */ + /* Let the caller to handle the error: it may be non fatal if called from dlopen */ return 1; } } @@ -189,6 +190,14 @@ _dl_do_reloc (struct elf_resolve *tpnt,struct dyn_elf *scope, #if defined (__SUPPORT_LD_DEBUG__) old_val = *reloc_addr; #endif + +#if USE_TLS + /* In case of a TLS reloc, tls_tpnt NULL means we have an 'anonymous' symbol. + This is the casa of a static tls variable, so the lookup module is just + that one is referencing the tls variable. */ + if(!tls_tpnt) + tls_tpnt = tpnt; +#endif switch (reloc_type) { case R_SH_NONE: break; @@ -215,6 +224,20 @@ _dl_do_reloc (struct elf_resolve *tpnt,struct dyn_elf *scope, case R_SH_RELATIVE: *reloc_addr = (unsigned long) tpnt->loadaddr + rpnt->r_addend; break; +#if USE_TLS + case R_SH_TLS_DTPMOD32: + *reloc_addr = tls_tpnt->l_tls_modid; + break; + + case R_SH_TLS_DTPOFF32: + *reloc_addr = symbol_addr; + break; + + case R_SH_TLS_TPOFF32: + CHECK_STATIC_TLS ((struct link_map *) tls_tpnt); + *reloc_addr = tls_tpnt->l_tls_offset + symbol_addr + rpnt->r_addend; + break; +#endif default: return -1; diff --git a/ldso/ldso/sparc/dl-sysdep.h b/ldso/ldso/sparc/dl-sysdep.h index fc42de86b..d35a39147 100644 --- a/ldso/ldso/sparc/dl-sysdep.h +++ b/ldso/ldso/sparc/dl-sysdep.h @@ -97,7 +97,9 @@ sparc_mod(unsigned long m, unsigned long p) ELF_RTYPE_CLASS_NOCOPY iff TYPE should not be allowed to resolve to one of the main executable's symbols, as for a COPY reloc. */ #define elf_machine_type_class(type) \ - ((((type) == R_SPARC_JMP_SLOT) * ELF_RTYPE_CLASS_PLT) \ + ((((type) == R_SPARC_JMP_SLOT || (type) == R_SPARC_TLS_DTPMOD32 \ + || (type) == R_SPARC_TLS_DTPOFF32 || (type) == R_SPARC_TLS_TPOFF32) \ + * ELF_RTYPE_CLASS_PLT) \ | (((type) == R_SPARC_COPY) * ELF_RTYPE_CLASS_COPY)) /* The SPARC overlaps DT_RELA and DT_PLTREL. */ diff --git a/ldso/ldso/sparc/elfinterp.c b/ldso/ldso/sparc/elfinterp.c index ce3991f90..5f3617bbe 100644 --- a/ldso/ldso/sparc/elfinterp.c +++ b/ldso/ldso/sparc/elfinterp.c @@ -80,7 +80,7 @@ _dl_linux_resolver(struct elf_resolve *tpnt, int reloc_entry) got_addr = (char **)instr_addr; /* Get the address of the GOT entry */ - new_addr = _dl_find_hash(symname, tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT); + new_addr = _dl_find_hash(symname, tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT, NULL); if (unlikely(!new_addr)) { _dl_dprintf(2, "%s: Can't resolve symbol '%s'\n", _dl_progname, symname); _dl_exit(1); @@ -99,8 +99,8 @@ _dl_linux_resolver(struct elf_resolve *tpnt, int reloc_entry) if (!_dl_debug_nofixups) #endif { - got_addr[1] = (char *) (0x03000000 | (((unsigned int) new_addr >> 10) & 0x3fffff)); - got_addr[2] = (char *) (0x81c06000 | ((unsigned int) new_addr & 0x3ff)); + got_addr[1] = (char *) (OPCODE_SETHI_G1 | (((unsigned int) new_addr >> 10) & 0x3fffff)); + got_addr[2] = (char *) (OPCODE_JMP_G1 | ((unsigned int) new_addr & 0x3ff)); } return (unsigned long)new_addr; @@ -170,6 +170,7 @@ _dl_do_reloc(struct elf_resolve *tpnt, struct dyn_elf *scope, int reloc_type; int symtab_index; char *symname; + struct elf_resolve *tls_tpnt = 0; ElfW(Sym) *sym; ElfW(Addr) *reloc_addr; ElfW(Addr) symbol_addr; @@ -186,17 +187,25 @@ _dl_do_reloc(struct elf_resolve *tpnt, struct dyn_elf *scope, if (symtab_index) { symbol_addr = (ElfW(Addr))_dl_find_hash(symname, scope, tpnt, - elf_machine_type_class(reloc_type)); + elf_machine_type_class(reloc_type), &tls_tpnt); /* * We want to allow undefined references to weak symbols - this * might have been intentional. We should not be linking local * symbols here, so all bases should be covered. */ - if (unlikely(!symbol_addr && ELF_ST_BIND(sym->st_info) != STB_WEAK)) { - _dl_dprintf(2, "%s: can't resolve symbol '%s'\n", _dl_progname, symname); - _dl_exit(1); + if (unlikely(!symbol_addr && (ELF_ST_TYPE(sym->st_info) != STT_TLS) + && (ELF_ST_BIND(sym->st_info) != STB_WEAK))) { + /* This may be non-fatal if called from dlopen. */ + return 1; + } - } + } else { + /* Relocs agfainst STN_UNDEF are usually treated as using a + * symbol value of zero, and using the module containing the + * reloc itself. */ + symbol_addr = sym->st_value; + tls_tpnt = tpnt; + } #if defined (__SUPPORT_LD_DEBUG__) old_val = *reloc_addr; @@ -208,21 +217,6 @@ _dl_do_reloc(struct elf_resolve *tpnt, struct dyn_elf *scope, case R_SPARC_NONE: break; -#if 0 /* these dont really seem to be useful */ - case R_SPARC_8: - *(char *) reloc_addr = symbol_addr; - break; - case R_SPARC_16: - *(short *) reloc_addr = symbol_addr; - break; - case R_SPARC_DISP8: - *(char *) reloc_addr = (symbol_addr) - (Elf32_Addr) reloc_addr; - break; - case R_SPARC_DISP16: - *(short *) reloc_addr = (symbol_addr) - (Elf32_Addr) reloc_addr; - break; -#endif - case R_SPARC_DISP32: *reloc_addr = symbol_addr - (unsigned int) reloc_addr; break; @@ -232,7 +226,7 @@ _dl_do_reloc(struct elf_resolve *tpnt, struct dyn_elf *scope, symbol_addr = tpnt->loadaddr + rpnt->r_addend; else symbol_addr += rpnt->r_addend; - *reloc_addr = (*reloc_addr & ~0x3ff)|(symbol_addr & 0x3ff); + *reloc_addr = (*reloc_addr & ~0x3ff) | (symbol_addr & 0x3ff); break; case R_SPARC_GLOB_DAT: @@ -241,17 +235,8 @@ _dl_do_reloc(struct elf_resolve *tpnt, struct dyn_elf *scope, break; case R_SPARC_JMP_SLOT: -/* -value = symbol_addr; -value += reloc->r_addend; -disp = value - reloc_addr; -reloc_addr[1] = OPCODE_JMP_G1 | (value & 0x3ff); -reloc_addr[0] = OPCODE_SETHI_G1 | (value >> 10); - reloc_addr[1] = OPCODE_JMP_G1 | ((symbol_addr-(Elf32_Addr)reloc_addr) & 0x3ff); - reloc_addr[0] = OPCODE_SETHI_G1 | ((symbol_addr-(Elf32_Addr)reloc_addr) >> 10); -*/ - reloc_addr[1] = 0x03000000 | ((symbol_addr >> 10) & 0x3fffff); - reloc_addr[2] = 0x81c06000 | (symbol_addr & 0x3ff); + reloc_addr[1] = OPCODE_SETHI_G1 | (( symbol_addr >> 10 ) & 0x3fffff); + reloc_addr[2] = OPCODE_JMP_G1 | ( symbol_addr & 0x3ff ); break; case R_SPARC_RELATIVE: @@ -287,6 +272,26 @@ reloc_addr[0] = OPCODE_SETHI_G1 | (value >> 10); } else _dl_dprintf(_dl_debug_file, "no symbol_addr to copy !?\n"); break; +#if USE_TLS + case R_SPARC_TLS_DTPMOD32: + *reloc_addr = tls_tpnt->l_tls_modid; + break; + + case R_SPARC_TLS_DTPOFF32: + /* During relocation all TLS symbols are defined and used. + * Therefore the offset is already correct. */ + *reloc_addr = sym->st_value + rpnt->r_addend; + break; + + case R_SPARC_TLS_TPOFF32: + /* The offset is negative, forward from the thread pointer. + * We know the offset of object the symbol is contained in. + * It is a negative value which will be added to the + * thread pointer. */ + CHECK_STATIC_TLS ((struct link_map *) tls_tpnt); + *reloc_addr = sym->st_value - tls_tpnt->l_tls_offset + rpnt->r_addend; + break; +#endif default: return -1; /* Calls _dl_exit(1). */ } diff --git a/ldso/libdl/libdl.c b/ldso/libdl/libdl.c index 8646a74d8..7f7f70bd9 100644 --- a/ldso/libdl/libdl.c +++ b/ldso/libdl/libdl.c @@ -35,13 +35,25 @@ #include /* Needed for 'strstr' prototype' */ #include +#ifdef __UCLIBC_HAS_TLS__ +#include +#endif + +#if USE_TLS +#include +extern void (*_dl_init_static_tls) (struct link_map *); +extern void _dl_add_to_slotinfo(struct link_map *l); +#endif #ifdef SHARED +# if USE_TLS +# include +extern struct link_map *_dl_update_slotinfo(unsigned long int req_modid); +# endif /* When libdl is loaded as a shared library, we need to load in * and use a pile of symbols from ldso... */ -extern char *_dl_find_hash(const char *, struct dyn_elf *, struct elf_resolve *, int); extern struct elf_resolve * _dl_load_shared_library(int, struct dyn_elf **, struct elf_resolve *, char *, int); extern int _dl_fixup(struct dyn_elf *rpnt, int lazy); @@ -50,6 +62,7 @@ extern int _dl_errno; extern struct dyn_elf *_dl_symbol_tables; extern struct dyn_elf *_dl_handles; extern struct elf_resolve *_dl_loaded_modules; +extern void _dl_free (void *__ptr); extern struct r_debug *_dl_debug_addr; extern unsigned long _dl_error_number; extern void *(*_dl_malloc_function)(size_t); @@ -83,7 +96,7 @@ char *_dl_debug_reloc = NULL; char *_dl_debug_detail = NULL; char *_dl_debug_nofixups = NULL; char *_dl_debug_bindings = NULL; -int _dl_debug_file = NULL; +int _dl_debug_file = 2; #endif const char *_dl_progname = ""; /* Program name */ void *(*_dl_malloc_function)(size_t); @@ -97,6 +110,15 @@ struct r_debug *_dl_debug_addr = NULL; #include "../ldso/dl-array.c" #include "../ldso/dl-debug.c" + + +# if USE_TLS +/* + * Giving this initialized value preallocates some surplus bytes in the + * static TLS area, see __libc_setup_tls (libc-tls.c). + */ +size_t _dl_tls_static_size = 2048; +# endif #include LDSO_ELFINTERP #include "../ldso/dl-hash.c" #define _dl_trace_loaded_objects 0 @@ -133,6 +155,7 @@ static const char *const dl_error_names[] = { "Not an ELF shared library", "Unable to mmap file", "No dynamic section", + "Library contains unsupported TLS", #ifdef ELF_USES_RELOCA "Unable to process REL relocs", #else @@ -142,6 +165,111 @@ static const char *const dl_error_names[] = { "Unable to resolve symbol" }; + +#if USE_TLS +#ifdef SHARED +/* + * Systems which do not have tls_index also probably have to define + * DONT_USE_TLS_INDEX. + */ + +# ifndef __TLS_GET_ADDR +# define __TLS_GET_ADDR __tls_get_addr +# endif + +/* + * Return the symbol address given the map of the module it is in and + * the symbol record. This is used in dl-sym.c. + */ +static void * +internal_function +_dl_tls_symaddr(struct link_map *map, const Elf32_Addr st_value) +{ +# ifndef DONT_USE_TLS_INDEX + tls_index tmp = + { + .ti_module = map->l_tls_modid, + .ti_offset = st_value + }; + + return __TLS_GET_ADDR (&tmp); +# else + return __TLS_GET_ADDR (map->l_tls_modid, st_value); +# endif +} +#endif + +/* Returns true we an non-empty was found. */ +static bool +remove_slotinfo(size_t idx, struct dtv_slotinfo_list *listp, size_t disp, + bool should_be_there) +{ + if(idx - disp >= listp->len) + { + if(listp->next == NULL) + { + /* + * The index is not actually valid in the slotinfo list, + * because this object was closed before it was fully set + * up due to some error. + */ + _dl_assert(!should_be_there); + } + else + { + if(remove_slotinfo(idx, listp->next, disp + listp->len, + should_be_there)) + return true; + + /* + * No non-empty entry. Search from the end of this element's + * slotinfo array. + */ + idx = disp + listp->len; + } + } + else + { + struct link_map *old_map = listp->slotinfo[idx - disp].map; + + /* + * The entry might still be in its unused state if we are + * closing an object that wasn't fully set up. + */ + if(__builtin_expect(old_map != NULL, 1)) + { + _dl_assert(old_map->l_tls_modid == idx); + + /* Mark the entry as unused. */ + listp->slotinfo[idx - disp].gen = _dl_tls_generation + 1; + listp->slotinfo[idx - disp].map = NULL; + } + + /* + * If this is not the last currently used entry no need to + * look further. + */ + if (idx != _dl_tls_max_dtv_idx) + return true; + } + + while(idx - disp > (disp == 0 ? 1 + _dl_tls_static_nelem : 0)) + { + --idx; + + if(listp->slotinfo[idx - disp].map != NULL) + { + /* Found a new last used index. */ + _dl_tls_max_dtv_idx = idx; + return true; + } + } + + /* No non-entry in this list element. */ + return false; +} +#endif + void dl_cleanup(void) __attribute__ ((destructor)); void dl_cleanup(void) { @@ -165,6 +293,9 @@ void *dlopen(const char *libname, int flag) unsigned int nlist, i; struct elf_resolve **init_fini_list; static bool _dl_init; +#if USE_TLS + bool any_tls = false; +#endif /* A bit of sanity checking... */ if (!(flag & (RTLD_LAZY|RTLD_NOW))) { @@ -396,6 +527,52 @@ void *dlopen(const char *libname, int flag) } /* TODO: Should we set the protections of all pages back to R/O now ? */ + +#if USE_TLS + + for (i=0; i < nlist; i++) { + struct elf_resolve *tmp_tpnt = init_fini_list[i]; + /* Only add TLS memory if this object is loaded now and + therefore is not yet initialized. */ + + if (!(tmp_tpnt->init_flag & INIT_FUNCS_CALLED) + /* Only if the module defines thread local data. */ + && __builtin_expect (tmp_tpnt->l_tls_blocksize > 0, 0)) { + + /* Now that we know the object is loaded successfully add + modules containing TLS data to the slot info table. We + might have to increase its size. */ + _dl_add_to_slotinfo ((struct link_map*)tmp_tpnt); + + /* It is the case in which we couldn't perform TLS static + initialization at relocation time, and we delayed it until + the relocation has been completed. */ + + if (tmp_tpnt->l_need_tls_init) { + tmp_tpnt->l_need_tls_init = 0; +# ifdef SHARED + /* Update the slot information data for at least the + generation of the DSO we are allocating data for. */ + _dl_update_slotinfo (tmp_tpnt->l_tls_modid); +# endif + + _dl_init_static_tls((struct link_map*)tmp_tpnt); + _dl_assert (tmp_tpnt->l_need_tls_init == 0); + } + + /* We have to bump the generation counter. */ + any_tls = true; + } + } + + /* Bump the generation number if necessary. */ + if (any_tls && __builtin_expect (++_dl_tls_generation == 0, 0)) { + _dl_debug_early("TLS generation counter wrapped! Please report this."); + _dl_exit(30); + } + +#endif + /* Notify the debugger we have added some objects. */ if (_dl_debug_addr) { dl_brk = (void (*)(void)) _dl_debug_addr->r_brk; @@ -445,6 +622,7 @@ void *dlsym(void *vhandle, const char *name) ElfW(Addr) from; struct dyn_elf *rpnt; void *ret; + struct elf_resolve *tls_tpnt = NULL; /* Nastiness to support underscore prefixes. */ #ifdef __UCLIBC_UNDERSCORES__ char tmp_buf[80]; @@ -499,7 +677,15 @@ void *dlsym(void *vhandle, const char *name) tpnt = NULL; if (handle == _dl_symbol_tables) tpnt = handle->dyn; /* Only search RTLD_GLOBAL objs if global object */ - ret = _dl_find_hash(name2, handle, tpnt, ELF_RTYPE_CLASS_DLSYM); + ret = _dl_find_hash(name2, handle, NULL, 0, &tls_tpnt); + +#if defined USE_TLS && defined SHARED + if(tls_tpnt) { + /* The found symbol is a thread-local storage variable. + Return the address for to the current thread. */ + ret = _dl_tls_symaddr ((struct link_map *)tls_tpnt, (Elf32_Addr)ret); + } +#endif /* * Nothing found. @@ -532,6 +718,12 @@ static int do_dlclose(void *vhandle, int need_fini) struct dyn_elf *handle; unsigned int end; unsigned int i, j; +#if USE_TLS + bool any_tls = false; + size_t tls_free_start = NO_TLS_OFFSET; + size_t tls_free_end = NO_TLS_OFFSET; + struct link_map *tls_lmap; +#endif handle = (struct dyn_elf *) vhandle; if (handle == _dl_symbol_tables) @@ -587,6 +779,118 @@ static int do_dlclose(void *vhandle, int need_fini) if (end < ppnt->p_vaddr + ppnt->p_memsz) end = ppnt->p_vaddr + ppnt->p_memsz; } + +#if USE_TLS + /* Do the cast to make things easy. */ + tls_lmap = (struct link_map *) tpnt; + + /* Remove the object from the dtv slotinfo array if it uses TLS. */ + if (__builtin_expect (tls_lmap->l_tls_blocksize > 0, 0)) + { + any_tls = true; + + if (_dl_tls_dtv_slotinfo_list != NULL + && ! remove_slotinfo (tls_lmap->l_tls_modid, + _dl_tls_dtv_slotinfo_list, 0, + (tpnt->init_flag & INIT_FUNCS_CALLED))) + /* All dynamically loaded modules with TLS are unloaded. */ + _dl_tls_max_dtv_idx = _dl_tls_static_nelem; + + if (tls_lmap->l_tls_offset != NO_TLS_OFFSET) + { + /* + * Collect a contiguous chunk built from the objects in + * this search list, going in either direction. When the + * whole chunk is at the end of the used area then we can + * reclaim it. + */ +# if defined(TLS_TCB_AT_TP) + if (tls_free_start == NO_TLS_OFFSET + || (size_t) tls_lmap->l_tls_offset == tls_free_start) + { + /* Extend the contiguous chunk being reclaimed. */ + tls_free_start + = tls_lmap->l_tls_offset - + tls_lmap->l_tls_blocksize; + + if (tls_free_end == NO_TLS_OFFSET) + tls_free_end = tls_lmap->l_tls_offset; + } + else if (tls_lmap->l_tls_offset - tls_lmap->l_tls_blocksize + == tls_free_end) + /* Extend the chunk backwards. */ + tls_free_end = tls_lmap->l_tls_offset; + else + { + /* + * This isn't contiguous with the last chunk freed. + * One of them will be leaked unless we can free + * one block right away. + */ + if (tls_free_end == _dl_tls_static_used) + { + _dl_tls_static_used = tls_free_start; + tls_free_end = tls_lmap->l_tls_offset; + tls_free_start + = tls_free_end - tls_lmap->l_tls_blocksize; + } + else if ((size_t) tls_lmap->l_tls_offset + == _dl_tls_static_used) + _dl_tls_static_used = tls_lmap->l_tls_offset - + tls_lmap->l_tls_blocksize; + else if (tls_free_end < (size_t) tls_lmap->l_tls_offset) + { + /* + * We pick the later block. It has a chance + * to be freed. + */ + tls_free_end = tls_lmap->l_tls_offset; + tls_free_start = tls_free_end - + tls_lmap->l_tls_blocksize; + } + } +# elif defined(TLS_DTV_AT_TP) + if ((size_t) tls_lmap->l_tls_offset == tls_free_end) + /* Extend the contiguous chunk being reclaimed. */ + tls_free_end -= tls_lmap->l_tls_blocksize; + else if (tls_lmap->l_tls_offset + tls_lmap->l_tls_blocksize + == tls_free_start) + /* Extend the chunk backwards. */ + tls_free_start = tls_lmap->l_tls_offset; + else + { + /* + * This isn't contiguous with the last chunk + * freed. One of them will be leaked. + */ + if (tls_free_end == _dl_tls_static_used) + _dl_tls_static_used = tls_free_start; + tls_free_start = tls_lmap->l_tls_offset; + tls_free_end = tls_free_start + + tls_lmap->l_tls_blocksize; + } +# else +# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined" +# endif + } else { + +#define TLS_DTV_UNALLOCATED ((void *) -1l) + + dtv_t *dtv = THREAD_DTV (); + + _dl_assert(!(dtv[tls_lmap->l_tls_modid].pointer.is_static)); + if(dtv[tls_lmap->l_tls_modid].pointer.val != TLS_DTV_UNALLOCATED) { + /* Note that free is called for NULL is well. We + deallocate even if it is this dtv entry we are + supposed to load. The reason is that we call + memalign and not malloc. */ + _dl_free (dtv[tls_lmap->l_tls_modid].pointer.val); + dtv[tls_lmap->l_tls_modid].pointer.val = TLS_DTV_UNALLOCATED; + } + } + } +#endif + DL_LIB_UNMAP (tpnt, end); /* Free elements in RTLD_LOCAL scope list */ for (runp = tpnt->rtld_local; runp; runp = tmp) { @@ -638,6 +942,21 @@ static int do_dlclose(void *vhandle, int need_fini) free(handle->init_fini.init_fini); free(handle); +#if USE_TLS + /* If we removed any object which uses TLS bump the generation counter. */ + if (any_tls) + { + if (__builtin_expect (++_dl_tls_generation == 0, 0)) + { + _dl_debug_early ("TLS generation counter wrapped! Please report to the uClibc mailing list.\n"); + _dl_exit(30); + } + + if (tls_free_end == _dl_tls_static_used) + _dl_tls_static_used = tls_free_start; + } +#endif + if (_dl_debug_addr) { dl_brk = (void (*)(void)) _dl_debug_addr->r_brk; if (dl_brk != NULL) { -- cgit v1.2.3