diff options
author | Austin Foxley <austinf@cetoncorp.com> | 2009-10-17 13:05:04 -0700 |
---|---|---|
committer | Austin Foxley <austinf@cetoncorp.com> | 2009-10-17 13:05:04 -0700 |
commit | 843d561eba0c4856fc4465215d56c81b868dec14 (patch) | |
tree | 87fad6df4f60f2ea710ccaacb5cf645731172331 | |
parent | 4a689f0b907d4a98582a9c4b7f6be811924db8ee (diff) |
sparc specific bits needed for nptl
* a tweaked clone.S with RESET_PID support
* atomic.h with cmpxchg that works for v8 and beyond
Signed-off-by: Austin Foxley <austinf@cetoncorp.com>
-rw-r--r-- | libc/sysdeps/linux/sparc/Makefile.arch | 11 | ||||
-rw-r--r-- | libc/sysdeps/linux/sparc/bits/atomic.h | 329 | ||||
-rw-r--r-- | libc/sysdeps/linux/sparc/bits/uClibc_arch_features.h | 2 | ||||
-rw-r--r-- | libc/sysdeps/linux/sparc/clone.S | 99 | ||||
-rw-r--r-- | libc/sysdeps/linux/sparc/sparcv9/clone.S | 102 |
5 files changed, 511 insertions, 32 deletions
diff --git a/libc/sysdeps/linux/sparc/Makefile.arch b/libc/sysdeps/linux/sparc/Makefile.arch index ffae27bc7..8a624205a 100644 --- a/libc/sysdeps/linux/sparc/Makefile.arch +++ b/libc/sysdeps/linux/sparc/Makefile.arch @@ -5,8 +5,15 @@ # Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. # -CSRC := brk.c __syscall_error.c qp_ops.c sigaction.c +CSRC := brk.c __syscall_error.c qp_ops.c SSRC := \ - __longjmp.S fork.S vfork.S clone.S setjmp.S bsd-setjmp.S bsd-_setjmp.S \ + __longjmp.S setjmp.S bsd-setjmp.S bsd-_setjmp.S \ syscall.S urem.S udiv.S umul.S sdiv.S rem.S pipe.S + +ifneq ($(UCLIBC_HAS_THREADS_NATIVE),y) +CSRC += sigaction.c +SSRC += fork.S vfork.S +endif + + diff --git a/libc/sysdeps/linux/sparc/bits/atomic.h b/libc/sysdeps/linux/sparc/bits/atomic.h new file mode 100644 index 000000000..f625eb92a --- /dev/null +++ b/libc/sysdeps/linux/sparc/bits/atomic.h @@ -0,0 +1,329 @@ +/* Atomic operations. sparc32 version. + Copyright (C) 2003, 2004, 2006 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek <jakub@redhat.com>, 2003. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#ifndef _BITS_ATOMIC_H +#define _BITS_ATOMIC_H 1 + +#include <stdint.h> + +typedef int8_t atomic8_t; +typedef uint8_t uatomic8_t; +typedef int_fast8_t atomic_fast8_t; +typedef uint_fast8_t uatomic_fast8_t; + +typedef int16_t atomic16_t; +typedef uint16_t uatomic16_t; +typedef int_fast16_t atomic_fast16_t; +typedef uint_fast16_t uatomic_fast16_t; + +typedef int32_t atomic32_t; +typedef uint32_t uatomic32_t; +typedef int_fast32_t atomic_fast32_t; +typedef uint_fast32_t uatomic_fast32_t; + +typedef int64_t atomic64_t; +typedef uint64_t uatomic64_t; +typedef int_fast64_t atomic_fast64_t; +typedef uint_fast64_t uatomic_fast64_t; + +typedef intptr_t atomicptr_t; +typedef uintptr_t uatomicptr_t; +typedef intmax_t atomic_max_t; +typedef uintmax_t uatomic_max_t; + + +/* We have no compare and swap, just test and set. + The following implementation contends on 64 global locks + per library and assumes no variable will be accessed using atomic.h + macros from two different libraries. */ + +__make_section_unallocated + (".gnu.linkonce.b.__sparc32_atomic_locks, \"aw\", %nobits"); + +volatile unsigned char __sparc32_atomic_locks[64] + __attribute__ ((nocommon, section (".gnu.linkonce.b.__sparc32_atomic_locks" + __sec_comment), + visibility ("hidden"))); + +#define __sparc32_atomic_do_lock(addr) \ + do \ + { \ + unsigned int __old_lock; \ + unsigned int __idx = (((long) addr >> 2) ^ ((long) addr >> 12)) \ + & 63; \ + do \ + __asm __volatile ("ldstub %1, %0" \ + : "=r" (__old_lock), \ + "=m" (__sparc32_atomic_locks[__idx]) \ + : "m" (__sparc32_atomic_locks[__idx]) \ + : "memory"); \ + while (__old_lock); \ + } \ + while (0) + +#define __sparc32_atomic_do_unlock(addr) \ + do \ + { \ + __sparc32_atomic_locks[(((long) addr >> 2) \ + ^ ((long) addr >> 12)) & 63] = 0; \ + __asm __volatile ("" ::: "memory"); \ + } \ + while (0) + +#define __sparc32_atomic_do_lock24(addr) \ + do \ + { \ + unsigned int __old_lock; \ + do \ + __asm __volatile ("ldstub %1, %0" \ + : "=r" (__old_lock), "=m" (*(addr)) \ + : "m" (*(addr)) \ + : "memory"); \ + while (__old_lock); \ + } \ + while (0) + +#define __sparc32_atomic_do_unlock24(addr) \ + do \ + { \ + *(char *) (addr) = 0; \ + __asm __volatile ("" ::: "memory"); \ + } \ + while (0) + + +#ifndef SHARED +# define __v9_compare_and_exchange_val_32_acq(mem, newval, oldval) \ +({ \ + register __typeof (*(mem)) __acev_tmp __asm ("%g6"); \ + register __typeof (mem) __acev_mem __asm ("%g1") = (mem); \ + register __typeof (*(mem)) __acev_oldval __asm ("%g5"); \ + __acev_tmp = (newval); \ + __acev_oldval = (oldval); \ + /* .word 0xcde05005 is cas [%g1], %g5, %g6. Can't use cas here though, \ + because as will then mark the object file as V8+ arch. */ \ + __asm __volatile (".word 0xcde05005" \ + : "+r" (__acev_tmp), "=m" (*__acev_mem) \ + : "r" (__acev_oldval), "m" (*__acev_mem), \ + "r" (__acev_mem) : "memory"); \ + __acev_tmp; }) +#endif + +/* The only basic operation needed is compare and exchange. */ +#define __v7_compare_and_exchange_val_acq(mem, newval, oldval) \ + ({ __typeof (mem) __acev_memp = (mem); \ + __typeof (*mem) __acev_ret; \ + __typeof (*mem) __acev_newval = (newval); \ + \ + __sparc32_atomic_do_lock (__acev_memp); \ + __acev_ret = *__acev_memp; \ + if (__acev_ret == (oldval)) \ + *__acev_memp = __acev_newval; \ + __sparc32_atomic_do_unlock (__acev_memp); \ + __acev_ret; }) + +#define __v7_compare_and_exchange_bool_acq(mem, newval, oldval) \ + ({ __typeof (mem) __aceb_memp = (mem); \ + int __aceb_ret; \ + __typeof (*mem) __aceb_newval = (newval); \ + \ + __sparc32_atomic_do_lock (__aceb_memp); \ + __aceb_ret = 0; \ + if (*__aceb_memp == (oldval)) \ + *__aceb_memp = __aceb_newval; \ + else \ + __aceb_ret = 1; \ + __sparc32_atomic_do_unlock (__aceb_memp); \ + __aceb_ret; }) + +#define __v7_exchange_acq(mem, newval) \ + ({ __typeof (mem) __acev_memp = (mem); \ + __typeof (*mem) __acev_ret; \ + __typeof (*mem) __acev_newval = (newval); \ + \ + __sparc32_atomic_do_lock (__acev_memp); \ + __acev_ret = *__acev_memp; \ + *__acev_memp = __acev_newval; \ + __sparc32_atomic_do_unlock (__acev_memp); \ + __acev_ret; }) + +#define __v7_exchange_and_add(mem, value) \ + ({ __typeof (mem) __acev_memp = (mem); \ + __typeof (*mem) __acev_ret; \ + \ + __sparc32_atomic_do_lock (__acev_memp); \ + __acev_ret = *__acev_memp; \ + *__acev_memp = __acev_ret + (value); \ + __sparc32_atomic_do_unlock (__acev_memp); \ + __acev_ret; }) + +/* Special versions, which guarantee that top 8 bits of all values + are cleared and use those bits as the ldstub lock. */ +#define __v7_compare_and_exchange_val_24_acq(mem, newval, oldval) \ + ({ __typeof (mem) __acev_memp = (mem); \ + __typeof (*mem) __acev_ret; \ + __typeof (*mem) __acev_newval = (newval); \ + \ + __sparc32_atomic_do_lock24 (__acev_memp); \ + __acev_ret = *__acev_memp & 0xffffff; \ + if (__acev_ret == (oldval)) \ + *__acev_memp = __acev_newval; \ + else \ + __sparc32_atomic_do_unlock24 (__acev_memp); \ + __asm __volatile ("" ::: "memory"); \ + __acev_ret; }) + +#define __v7_exchange_24_rel(mem, newval) \ + ({ __typeof (mem) __acev_memp = (mem); \ + __typeof (*mem) __acev_ret; \ + __typeof (*mem) __acev_newval = (newval); \ + \ + __sparc32_atomic_do_lock24 (__acev_memp); \ + __acev_ret = *__acev_memp & 0xffffff; \ + *__acev_memp = __acev_newval; \ + __asm __volatile ("" ::: "memory"); \ + __acev_ret; }) + +#ifdef SHARED + +/* When dynamically linked, we assume pre-v9 libraries are only ever + used on pre-v9 CPU. */ +# define __atomic_is_v9 0 + +# define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \ + __v7_compare_and_exchange_val_acq (mem, newval, oldval) + +# define atomic_compare_and_exchange_bool_acq(mem, newval, oldval) \ + __v7_compare_and_exchange_bool_acq (mem, newval, oldval) + +# define atomic_exchange_acq(mem, newval) \ + __v7_exchange_acq (mem, newval) + +# define atomic_exchange_and_add(mem, value) \ + __v7_exchange_and_add (mem, value) + +# define atomic_compare_and_exchange_val_24_acq(mem, newval, oldval) \ + ({ \ + if (sizeof (*mem) != 4) \ + abort (); \ + __v7_compare_and_exchange_val_24_acq (mem, newval, oldval); }) + +# define atomic_exchange_24_rel(mem, newval) \ + ({ \ + if (sizeof (*mem) != 4) \ + abort (); \ + __v7_exchange_24_rel (mem, newval); }) + +#else + + + +/* + Here's what we'd like to do: + + In libc.a/libpthread.a etc. we don't know if we'll be run on + pre-v9 or v9 CPU. To be interoperable with dynamically linked + apps on v9 CPUs e.g. with process shared primitives, use cas insn + on v9 CPUs and ldstub on pre-v9. + + However, we have no good way to test at run time that I know of, + so resort to the lowest common denominator (v7 ops) -austinf + */ +#define __atomic_is_v9 0 + +# define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \ + ({ \ + __typeof (*mem) __acev_wret; \ + if (sizeof (*mem) != 4) \ + abort (); \ + if (__atomic_is_v9) \ + __acev_wret \ + = __v9_compare_and_exchange_val_32_acq (mem, newval, oldval);\ + else \ + __acev_wret \ + = __v7_compare_and_exchange_val_acq (mem, newval, oldval); \ + __acev_wret; }) + +# define atomic_compare_and_exchange_bool_acq(mem, newval, oldval) \ + ({ \ + int __acev_wret; \ + if (sizeof (*mem) != 4) \ + abort (); \ + if (__atomic_is_v9) \ + { \ + __typeof (oldval) __acev_woldval = (oldval); \ + __acev_wret \ + = __v9_compare_and_exchange_val_32_acq (mem, newval, \ + __acev_woldval) \ + != __acev_woldval; \ + } \ + else \ + __acev_wret \ + = __v7_compare_and_exchange_bool_acq (mem, newval, oldval); \ + __acev_wret; }) + +# define atomic_exchange_rel(mem, newval) \ + ({ \ + __typeof (*mem) __acev_wret; \ + if (sizeof (*mem) != 4) \ + abort (); \ + if (__atomic_is_v9) \ + { \ + __typeof (mem) __acev_wmemp = (mem); \ + __typeof (*(mem)) __acev_wval = (newval); \ + do \ + __acev_wret = *__acev_wmemp; \ + while (__builtin_expect \ + (__v9_compare_and_exchange_val_32_acq (__acev_wmemp,\ + __acev_wval, \ + __acev_wret) \ + != __acev_wret, 0)); \ + } \ + else \ + __acev_wret = __v7_exchange_acq (mem, newval); \ + __acev_wret; }) + +# define atomic_compare_and_exchange_val_24_acq(mem, newval, oldval) \ + ({ \ + __typeof (*mem) __acev_wret; \ + if (sizeof (*mem) != 4) \ + abort (); \ + if (__atomic_is_v9) \ + __acev_wret \ + = __v9_compare_and_exchange_val_32_acq (mem, newval, oldval);\ + else \ + __acev_wret \ + = __v7_compare_and_exchange_val_24_acq (mem, newval, oldval);\ + __acev_wret; }) + +# define atomic_exchange_24_rel(mem, newval) \ + ({ \ + __typeof (*mem) __acev_w24ret; \ + if (sizeof (*mem) != 4) \ + abort (); \ + if (__atomic_is_v9) \ + __acev_w24ret = atomic_exchange_rel (mem, newval); \ + else \ + __acev_w24ret = __v7_exchange_24_rel (mem, newval); \ + __acev_w24ret; }) + +#endif + +#endif /* bits/atomic.h */ diff --git a/libc/sysdeps/linux/sparc/bits/uClibc_arch_features.h b/libc/sysdeps/linux/sparc/bits/uClibc_arch_features.h index 1dbfa2b55..2d8cdd78b 100644 --- a/libc/sysdeps/linux/sparc/bits/uClibc_arch_features.h +++ b/libc/sysdeps/linux/sparc/bits/uClibc_arch_features.h @@ -36,7 +36,7 @@ #undef __UCLIBC_HAVE_ASM_GLOBAL_DOT_NAME__ /* define if target supports CFI pseudo ops */ -#undef __UCLIBC_HAVE_ASM_CFI_DIRECTIVES__ +#define __UCLIBC_HAVE_ASM_CFI_DIRECTIVES__ /* define if target supports IEEE signed zero floats */ #define __UCLIBC_HAVE_SIGNED_ZERO__ diff --git a/libc/sysdeps/linux/sparc/clone.S b/libc/sysdeps/linux/sparc/clone.S index 0e41ee0cb..2b6609531 100644 --- a/libc/sysdeps/linux/sparc/clone.S +++ b/libc/sysdeps/linux/sparc/clone.S @@ -1,4 +1,5 @@ -/* Copyright (C) 1996, 1997, 1998, 2000 Free Software Foundation, Inc. +/* Copyright (C) 1996, 1997, 1998, 2000, 2003, 2004, 2007 + Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Richard Henderson (rth@tamu.edu). @@ -20,47 +21,87 @@ /* clone() is even more special than fork() as it mucks with stacks and invokes a function in the right context after its all over. */ -#include <features.h> +#include <asm/errno.h> #include <asm/unistd.h> +#include <tcb-offsets.h> +#include <sysdep.h> -/* int clone(int (*fn)(void *arg), void *child_stack, int flags, void *arg); */ +#define CLONE_VM 0x00000100 +#define CLONE_THREAD 0x00010000 -.text -.global clone -.type clone,%function -.align 4 +/* int clone(int (*fn)(void *arg), void *child_stack, int flags, void *arg, + pid_t *ptid, void *tls, pid_t *ctid); */ -clone: + .text +ENTRY (__clone) save %sp,-96,%sp + cfi_def_cfa_register(%fp) + cfi_window_save + cfi_register(%o7, %i7) /* sanity check arguments */ - tst %i0 - be __error - orcc %i1,%g0,%o1 - be __error - mov %i2,%o0 + orcc %i0,%g0,%g2 + be .Leinval + orcc %i1,%g0,%o1 + be .Leinval + mov %i2,%o0 + + /* The child_stack is the top of the stack, allocate one + whole stack frame from that as this is what the kernel + expects. */ + sub %o1, 96, %o1 + mov %i3, %g3 + mov %i2, %g4 + + /* ptid */ + mov %i4,%o2 + /* tls */ + mov %i5,%o3 + /* ctid */ + ld [%fp+92],%o4 /* Do the system call */ set __NR_clone,%g1 ta 0x10 - bcs __error - tst %o1 + bcs .Lerror + tst %o1 bne __thread_start - nop - ret - restore %o0,%g0,%o0 - -__error: - jmp __syscall_error + nop + jmpl %i7 + 8, %g0 + restore %o0,%g0,%o0 -.size clone,.-clone - -.type __thread_start,%function +.Leinval: + mov EINVAL, %o0 +.Lerror: + call __errno_location + mov %o0, %i0 + st %i0,[%o0] + jmpl %i7 + 8, %g0 + restore %g0,-1,%o0 +END(__clone) + .type __thread_start,@function __thread_start: - call %i0 - mov %i3,%o0 - call HIDDEN_JUMPTARGET(_exit),0 - nop +#ifdef RESET_PID + sethi %hi(CLONE_THREAD), %l0 + andcc %g4, %l0, %g0 + bne 1f + andcc %g4, CLONE_VM, %g0 + bne,a 2f + mov -1,%o0 + set __NR_getpid,%g1 + ta 0x10 +2: + st %o0,[%g7 + PID] + st %o0,[%g7 + TID] +1: +#endif + mov %g0, %fp /* terminate backtrace */ + call %g2 + mov %g3,%o0 + call exit,0 + nop + + .size __thread_start, .-__thread_start -.size __thread_start,.-__thread_start +weak_alias (__clone, clone) diff --git a/libc/sysdeps/linux/sparc/sparcv9/clone.S b/libc/sysdeps/linux/sparc/sparcv9/clone.S new file mode 100644 index 000000000..9d101e239 --- /dev/null +++ b/libc/sysdeps/linux/sparc/sparcv9/clone.S @@ -0,0 +1,102 @@ +/* Copyright (C) 1997, 2000, 2007 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@tamu.edu). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* clone() is even more special than fork() as it mucks with stacks + and invokes a function in the right context after its all over. */ + +#include <asm/errno.h> +#include <asm/unistd.h> +#include <tcb-offsets.h> +#include <sysdep.h> + +#define CLONE_VM 0x00000100 +#define CLONE_THREAD 0x00010000 + +/* int clone(int (*fn)(void *arg), void *child_stack, int flags, void *arg, + pid_t *ptid, void *tls, pid_t *ctid); */ + + .register %g2,#scratch + .register %g3,#scratch + + .text + +ENTRY (__clone) + save %sp, -192, %sp + cfi_def_cfa_register(%fp) + cfi_window_save + cfi_register(%o7, %i7) + + /* sanity check arguments */ + brz,pn %i0, 99f /* fn non-NULL? */ + mov %i0, %g2 + brz,pn %i1, 99f /* child_stack non-NULL? */ + mov %i2, %o0 /* clone flags */ + + /* The child_stack is the top of the stack, allocate one + whole stack frame from that as this is what the kernel + expects. Also, subtract STACK_BIAS. */ + sub %i1, 192 + 0x7ff, %o1 + mov %i3, %g3 + mov %i2, %g4 + + mov %i4,%o2 /* PTID */ + mov %i5,%o3 /* TLS */ + ldx [%fp+0x7ff+176],%o4 /* CTID */ + + /* Do the system call */ + set __NR_clone, %g1 + ta 0x6d + bcs,pn %xcc, 98f + nop + brnz,pn %o1, __thread_start + nop + jmpl %i7 + 8, %g0 + restore %o0, %g0, %o0 +99: mov EINVAL, %o0 +98: call HIDDEN_JUMPTARGET(__errno_location) + mov %o0, %i0 + st %i0, [%o0] + jmpl %i7 + 8, %g0 + restore %g0,-1,%o0 +END(__clone) + + .type __thread_start,@function +__thread_start: +#ifdef RESET_PID + sethi %hi(CLONE_THREAD), %l0 + andcc %g4, %l0, %g0 + bne,pt %icc, 1f + andcc %g4, CLONE_VM, %g0 + bne,a,pn %icc, 2f + mov -1,%o0 + set __NR_getpid,%g1 + ta 0x6d +2: st %o0,[%g7 + PID] + st %o0,[%g7 + TID] +1: +#endif + mov %g0, %fp /* terminate backtrace */ + call %g2 + mov %g3,%o0 + call HIDDEN_JUMPTARGET(_exit),0 + nop + + .size __thread_start, .-__thread_start + +weak_alias (__clone, clone) |