diff options
Diffstat (limited to 'libc/sysdeps/linux/sh/bits')
-rw-r--r-- | libc/sysdeps/linux/sh/bits/atomic.h | 516 |
1 files changed, 195 insertions, 321 deletions
diff --git a/libc/sysdeps/linux/sh/bits/atomic.h b/libc/sysdeps/linux/sh/bits/atomic.h index 6bb7255c5..dd6e5f97d 100644 --- a/libc/sysdeps/linux/sh/bits/atomic.h +++ b/libc/sysdeps/linux/sh/bits/atomic.h @@ -54,6 +54,10 @@ typedef uintmax_t uatomic_max_t; Japan. http://lc.linux.or.jp/lc2002/papers/niibe0919h.pdf (in Japanese). + Niibe Yutaka, "gUSA: User Space Atomicity with Little Kernel + Modification", LinuxTag 2003, Rome. + http://www.semmel.ch/Linuxtag-DVD/talks/170/paper.html (in English). + B.N. Bershad, D. Redell, and J. Ellis, "Fast Mutual Exclusion for Uniprocessors", Proceedings of the Fifth Architectural Support for Programming Languages and Operating Systems (ASPLOS), pp. 223-233, @@ -65,56 +69,44 @@ typedef uintmax_t uatomic_max_t; r1: saved stack pointer */ -#define __arch_compare_and_exchange_val_8_acq(mem, newval, oldval) \ - ({ __typeof (*(mem)) __result; \ - __asm__ __volatile__ ("\ +/* Avoid having lots of different versions of compare and exchange, + by having this one complicated version. Parameters: + bwl: b, w or l for 8, 16 and 32 bit versions. + version: val or bool, depending on whether the result is the + previous value or a bool indicating whether the transfer + did happen (note this needs inverting before being + returned in atomic_compare_and_exchange_bool). +*/ + +#define __arch_compare_and_exchange_n(mem, newval, oldval, bwl, version) \ + ({ signed long __result; \ + __asm __volatile ("\ .align 2\n\ mova 1f,r0\n\ nop\n\ mov r15,r1\n\ mov #-8,r15\n\ - 0: mov.b @%1,%0\n\ + 0: mov." #bwl " @%1,%0\n\ cmp/eq %0,%3\n\ bf 1f\n\ - mov.b %2,@%1\n\ - 1: mov r1,r15"\ - : "=&r" (__result) : "r" (mem), "r" (newval), "r" (oldval) \ - : "r0", "r1", "t", "memory"); \ + mov." #bwl " %2,@%1\n\ + 1: mov r1,r15\n\ + .ifeqs \"bool\",\"" #version "\"\n\ + movt %0\n\ + .endif\n" \ + : "=&r" (__result) \ + : "r" (mem), "r" (newval), "r" (oldval) \ + : "r0", "r1", "t", "memory"); \ __result; }) +#define __arch_compare_and_exchange_val_8_acq(mem, newval, oldval) \ + __arch_compare_and_exchange_n(mem, newval, (int8_t)(oldval), b, val) + #define __arch_compare_and_exchange_val_16_acq(mem, newval, oldval) \ - ({ __typeof (*(mem)) __result; \ - __asm__ __volatile__ ("\ - .align 2\n\ - mova 1f,r0\n\ - nop\n\ - mov r15,r1\n\ - mov #-8,r15\n\ - 0: mov.w @%1,%0\n\ - cmp/eq %0,%3\n\ - bf 1f\n\ - mov.w %2,@%1\n\ - 1: mov r1,r15"\ - : "=&r" (__result) : "r" (mem), "r" (newval), "r" (oldval) \ - : "r0", "r1", "t", "memory"); \ - __result; }) + __arch_compare_and_exchange_n(mem, newval, (int16_t)(oldval), w, val) #define __arch_compare_and_exchange_val_32_acq(mem, newval, oldval) \ - ({ __typeof (*(mem)) __result; \ - __asm__ __volatile__ ("\ - .align 2\n\ - mova 1f,r0\n\ - nop\n\ - mov r15,r1\n\ - mov #-8,r15\n\ - 0: mov.l @%1,%0\n\ - cmp/eq %0,%3\n\ - bf 1f\n\ - mov.l %2,@%1\n\ - 1: mov r1,r15"\ - : "=&r" (__result) : "r" (mem), "r" (newval), "r" (oldval) \ - : "r0", "r1", "t", "memory"); \ - __result; }) + __arch_compare_and_exchange_n(mem, newval, (int32_t)(oldval), l, val) /* XXX We do not really need 64-bit compare-and-exchange. At least not in the moment. Using it would mean causing portability @@ -122,298 +114,180 @@ typedef uintmax_t uatomic_max_t; such an operation. So don't define any code for now. */ # define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \ - (abort (), (__typeof (*mem)) 0) + (abort (), 0) + +/* For "bool" routines, return if the exchange did NOT occur */ + +#define __arch_compare_and_exchange_bool_8_acq(mem, newval, oldval) \ + (! __arch_compare_and_exchange_n(mem, newval, (int8_t)(oldval), b, bool)) + +#define __arch_compare_and_exchange_bool_16_acq(mem, newval, oldval) \ + (! __arch_compare_and_exchange_n(mem, newval, (int16_t)(oldval), w, bool)) + +#define __arch_compare_and_exchange_bool_32_acq(mem, newval, oldval) \ + (! __arch_compare_and_exchange_n(mem, newval, (int32_t)(oldval), l, bool)) + +# define __arch_compare_and_exchange_bool_64_acq(mem, newval, oldval) \ + (abort (), 0) + +/* Similar to the above, have one template which can be used in a + number of places. This version returns both the old and the new + values of the location. Parameters: + bwl: b, w or l for 8, 16 and 32 bit versions. + oper: The instruction to perform on the old value. + Note old is not sign extended, so should be an unsigned long. +*/ + +#define __arch_operate_old_new_n(mem, value, old, new, bwl, oper) \ + (void) ({ __asm __volatile ("\ + .align 2\n\ + mova 1f,r0\n\ + mov r15,r1\n\ + nop\n\ + mov #-8,r15\n\ + 0: mov." #bwl " @%2,%0\n\ + mov %0,%1\n\ + " #oper " %3,%1\n\ + mov." #bwl " %1,@%2\n\ + 1: mov r1,r15" \ + : "=&r" (old), "=&r"(new) \ + : "r" (mem), "r" (value) \ + : "r0", "r1", "memory"); \ + }) + +#define __arch_exchange_and_add_8_int(mem, value) \ + ({ int32_t __value = (value), __new, __old; \ + __arch_operate_old_new_n((mem), __value, __old, __new, b, add); \ + __old; }) + +#define __arch_exchange_and_add_16_int(mem, value) \ + ({ int32_t __value = (value), __new, __old; \ + __arch_operate_old_new_n((mem), __value, __old, __new, w, add); \ + __old; }) + +#define __arch_exchange_and_add_32_int(mem, value) \ + ({ int32_t __value = (value), __new, __old; \ + __arch_operate_old_new_n((mem), __value, __old, __new, l, add); \ + __old; }) + +#define __arch_exchange_and_add_64_int(mem, value) \ + (abort (), 0) #define atomic_exchange_and_add(mem, value) \ - ({ __typeof (*(mem)) __result, __tmp, __value = (value); \ - if (sizeof (*(mem)) == 1) \ - __asm__ __volatile__ ("\ - .align 2\n\ - mova 1f,r0\n\ - mov r15,r1\n\ - mov #-6,r15\n\ - 0: mov.b @%2,%0\n\ - add %0,%1\n\ - mov.b %1,@%2\n\ - 1: mov r1,r15"\ - : "=&r" (__result), "=&r" (__tmp) : "r" (mem), "1" (__value) \ - : "r0", "r1", "memory"); \ - else if (sizeof (*(mem)) == 2) \ - __asm__ __volatile__ ("\ - .align 2\n\ - mova 1f,r0\n\ - mov r15,r1\n\ - mov #-6,r15\n\ - 0: mov.w @%2,%0\n\ - add %0,%1\n\ - mov.w %1,@%2\n\ - 1: mov r1,r15"\ - : "=&r" (__result), "=&r" (__tmp) : "r" (mem), "1" (__value) \ - : "r0", "r1", "memory"); \ - else if (sizeof (*(mem)) == 4) \ - __asm__ __volatile__ ("\ - .align 2\n\ - mova 1f,r0\n\ - mov r15,r1\n\ - mov #-6,r15\n\ - 0: mov.l @%2,%0\n\ - add %0,%1\n\ - mov.l %1,@%2\n\ - 1: mov r1,r15"\ - : "=&r" (__result), "=&r" (__tmp) : "r" (mem), "1" (__value) \ - : "r0", "r1", "memory"); \ - else \ - { \ - __typeof (mem) memp = (mem); \ - do \ - __result = *memp; \ - while (__arch_compare_and_exchange_val_64_acq \ - (memp, __result + __value, __result) == __result); \ - (void) __value; \ - } \ - __result; }) + __atomic_val_bysize (__arch_exchange_and_add, int, mem, value) + + +/* Again, another template. We get a slight optimisation when the old value + does not need to be returned. Parameters: + bwl: b, w or l for 8, 16 and 32 bit versions. + oper: The instruction to perform on the old value. +*/ + +#define __arch_operate_new_n(mem, value, bwl, oper) \ + ({ int32_t __value = (value), __new; \ + __asm __volatile ("\ + .align 2\n\ + mova 1f,r0\n\ + mov r15,r1\n\ + mov #-6,r15\n\ + 0: mov." #bwl " @%1,%0\n\ + " #oper " %2,%0\n\ + mov." #bwl " %0,@%1\n\ + 1: mov r1,r15" \ + : "=&r" (__new) \ + : "r" (mem), "r" (__value) \ + : "r0", "r1", "memory"); \ + __new; \ + }) + +#define __arch_add_8_int(mem, value) \ + __arch_operate_new_n(mem, value, b, add) + +#define __arch_add_16_int(mem, value) \ + __arch_operate_new_n(mem, value, w, add) + +#define __arch_add_32_int(mem, value) \ + __arch_operate_new_n(mem, value, l, add) + +#define __arch_add_64_int(mem, value) \ + (abort (), 0) #define atomic_add(mem, value) \ - (void) ({ __typeof (*(mem)) __tmp, __value = (value); \ - if (sizeof (*(mem)) == 1) \ - __asm__ __volatile__ ("\ - .align 2\n\ - mova 1f,r0\n\ - mov r15,r1\n\ - mov #-6,r15\n\ - 0: mov.b @%1,r2\n\ - add r2,%0\n\ - mov.b %0,@%1\n\ - 1: mov r1,r15"\ - : "=&r" (__tmp) : "r" (mem), "0" (__value) \ - : "r0", "r1", "r2", "memory"); \ - else if (sizeof (*(mem)) == 2) \ - __asm__ __volatile__ ("\ - .align 2\n\ - mova 1f,r0\n\ - mov r15,r1\n\ - mov #-6,r15\n\ - 0: mov.w @%1,r2\n\ - add r2,%0\n\ - mov.w %0,@%1\n\ - 1: mov r1,r15"\ - : "=&r" (__tmp) : "r" (mem), "0" (__value) \ - : "r0", "r1", "r2", "memory"); \ - else if (sizeof (*(mem)) == 4) \ - __asm__ __volatile__ ("\ - .align 2\n\ - mova 1f,r0\n\ - mov r15,r1\n\ - mov #-6,r15\n\ - 0: mov.l @%1,r2\n\ - add r2,%0\n\ - mov.l %0,@%1\n\ - 1: mov r1,r15"\ - : "=&r" (__tmp) : "r" (mem), "0" (__value) \ - : "r0", "r1", "r2", "memory"); \ - else \ - { \ - __typeof (*(mem)) oldval; \ - __typeof (mem) memp = (mem); \ - do \ - oldval = *memp; \ - while (__arch_compare_and_exchange_val_64_acq \ - (memp, oldval + __value, oldval) == oldval); \ - (void) __value; \ - } \ - }) + ((void) __atomic_val_bysize (__arch_add, int, mem, value)) + + +#define __arch_add_negative_8_int(mem, value) \ + (__arch_operate_new_n(mem, value, b, add) < 0) + +#define __arch_add_negative_16_int(mem, value) \ + (__arch_operate_new_n(mem, value, w, add) < 0) + +#define __arch_add_negative_32_int(mem, value) \ + (__arch_operate_new_n(mem, value, l, add) < 0) + +#define __arch_add_negative_64_int(mem, value) \ + (abort (), 0) #define atomic_add_negative(mem, value) \ - ({ unsigned char __result; \ - __typeof (*(mem)) __tmp, __value = (value); \ - if (sizeof (*(mem)) == 1) \ - __asm__ __volatile__ ("\ - .align 2\n\ - mova 1f,r0\n\ - mov r15,r1\n\ - mov #-6,r15\n\ - 0: mov.b @%2,r2\n\ - add r2,%1\n\ - mov.b %1,@%2\n\ - 1: mov r1,r15\n\ - shal %1\n\ - movt %0"\ - : "=r" (__result), "=&r" (__tmp) : "r" (mem), "1" (__value) \ - : "r0", "r1", "r2", "t", "memory"); \ - else if (sizeof (*(mem)) == 2) \ - __asm__ __volatile__ ("\ - .align 2\n\ - mova 1f,r0\n\ - mov r15,r1\n\ - mov #-6,r15\n\ - 0: mov.w @%2,r2\n\ - add r2,%1\n\ - mov.w %1,@%2\n\ - 1: mov r1,r15\n\ - shal %1\n\ - movt %0"\ - : "=r" (__result), "=&r" (__tmp) : "r" (mem), "1" (__value) \ - : "r0", "r1", "r2", "t", "memory"); \ - else if (sizeof (*(mem)) == 4) \ - __asm__ __volatile__ ("\ - .align 2\n\ - mova 1f,r0\n\ - mov r15,r1\n\ - mov #-6,r15\n\ - 0: mov.l @%2,r2\n\ - add r2,%1\n\ - mov.l %1,@%2\n\ - 1: mov r1,r15\n\ - shal %1\n\ - movt %0"\ - : "=r" (__result), "=&r" (__tmp) : "r" (mem), "1" (__value) \ - : "r0", "r1", "r2", "t", "memory"); \ - else \ - abort (); \ - __result; }) + __atomic_bool_bysize (__arch_add_negative, int, mem, value) + + +#define __arch_add_zero_8_int(mem, value) \ + (__arch_operate_new_n(mem, value, b, add) == 0) + +#define __arch_add_zero_16_int(mem, value) \ + (__arch_operate_new_n(mem, value, w, add) == 0) + +#define __arch_add_zero_32_int(mem, value) \ + (__arch_operate_new_n(mem, value, l, add) == 0) + +#define __arch_add_zero_64_int(mem, value) \ + (abort (), 0) #define atomic_add_zero(mem, value) \ - ({ unsigned char __result; \ - __typeof (*(mem)) __tmp, __value = (value); \ - if (sizeof (*(mem)) == 1) \ - __asm__ __volatile__ ("\ - .align 2\n\ - mova 1f,r0\n\ - mov r15,r1\n\ - mov #-6,r15\n\ - 0: mov.b @%2,r2\n\ - add r2,%1\n\ - mov.b %1,@%2\n\ - 1: mov r1,r15\n\ - tst %1,%1\n\ - movt %0"\ - : "=r" (__result), "=&r" (__tmp) : "r" (mem), "1" (__value) \ - : "r0", "r1", "r2", "t", "memory"); \ - else if (sizeof (*(mem)) == 2) \ - __asm__ __volatile__ ("\ - .align 2\n\ - mova 1f,r0\n\ - mov r15,r1\n\ - mov #-6,r15\n\ - 0: mov.w @%2,r2\n\ - add r2,%1\n\ - mov.w %1,@%2\n\ - 1: mov r1,r15\n\ - tst %1,%1\n\ - movt %0"\ - : "=r" (__result), "=&r" (__tmp) : "r" (mem), "1" (__value) \ - : "r0", "r1", "r2", "t", "memory"); \ - else if (sizeof (*(mem)) == 4) \ - __asm__ __volatile__ ("\ - .align 2\n\ - mova 1f,r0\n\ - mov r15,r1\n\ - mov #-6,r15\n\ - 0: mov.l @%2,r2\n\ - add r2,%1\n\ - mov.l %1,@%2\n\ - 1: mov r1,r15\n\ - tst %1,%1\n\ - movt %0"\ - : "=r" (__result), "=&r" (__tmp) : "r" (mem), "1" (__value) \ - : "r0", "r1", "r2", "t", "memory"); \ - else \ - abort (); \ - __result; }) + __atomic_bool_bysize (__arch_add_zero, int, mem, value) + #define atomic_increment_and_test(mem) atomic_add_zero((mem), 1) #define atomic_decrement_and_test(mem) atomic_add_zero((mem), -1) -#define atomic_bit_set(mem, bit) \ - (void) ({ unsigned int __mask = 1 << (bit); \ - if (sizeof (*(mem)) == 1) \ - __asm__ __volatile__ ("\ - .align 2\n\ - mova 1f,r0\n\ - mov r15,r1\n\ - mov #-6,r15\n\ - 0: mov.b @%0,r2\n\ - or %1,r2\n\ - mov.b r2,@%0\n\ - 1: mov r1,r15"\ - : : "r" (mem), "r" (__mask) \ - : "r0", "r1", "r2", "memory"); \ - else if (sizeof (*(mem)) == 2) \ - __asm__ __volatile__ ("\ - .align 2\n\ - mova 1f,r0\n\ - mov r15,r1\n\ - mov #-6,r15\n\ - 0: mov.w @%0,r2\n\ - or %1,r2\n\ - mov.w r2,@%0\n\ - 1: mov r1,r15"\ - : : "r" (mem), "r" (__mask) \ - : "r0", "r1", "r2", "memory"); \ - else if (sizeof (*(mem)) == 4) \ - __asm__ __volatile__ ("\ - .align 2\n\ - mova 1f,r0\n\ - mov r15,r1\n\ - mov #-6,r15\n\ - 0: mov.l @%0,r2\n\ - or %1,r2\n\ - mov.l r2,@%0\n\ - 1: mov r1,r15"\ - : : "r" (mem), "r" (__mask) \ - : "r0", "r1", "r2", "memory"); \ - else \ - abort (); \ - }) - -#define atomic_bit_test_set(mem, bit) \ - ({ unsigned int __mask = 1 << (bit); \ - unsigned int __result = __mask; \ - if (sizeof (*(mem)) == 1) \ - __asm__ __volatile__ ("\ - .align 2\n\ - mova 1f,r0\n\ - nop\n\ - mov r15,r1\n\ - mov #-8,r15\n\ - 0: mov.b @%2,r2\n\ - or r2,%1\n\ - and r2,%0\n\ - mov.b %1,@%2\n\ - 1: mov r1,r15"\ - : "=&r" (__result), "=&r" (__mask) \ - : "r" (mem), "0" (__result), "1" (__mask) \ - : "r0", "r1", "r2", "memory"); \ - else if (sizeof (*(mem)) == 2) \ - __asm__ __volatile__ ("\ - .align 2\n\ - mova 1f,r0\n\ - nop\n\ - mov r15,r1\n\ - mov #-8,r15\n\ - 0: mov.w @%2,r2\n\ - or r2,%1\n\ - and r2,%0\n\ - mov.w %1,@%2\n\ - 1: mov r1,r15"\ - : "=&r" (__result), "=&r" (__mask) \ - : "r" (mem), "0" (__result), "1" (__mask) \ - : "r0", "r1", "r2", "memory"); \ - else if (sizeof (*(mem)) == 4) \ - __asm__ __volatile__ ("\ - .align 2\n\ - mova 1f,r0\n\ - nop\n\ - mov r15,r1\n\ - mov #-8,r15\n\ - 0: mov.l @%2,r2\n\ - or r2,%1\n\ - and r2,%0\n\ - mov.l %1,@%2\n\ - 1: mov r1,r15"\ - : "=&r" (__result), "=&r" (__mask) \ - : "r" (mem), "0" (__result), "1" (__mask) \ - : "r0", "r1", "r2", "memory"); \ - else \ - abort (); \ - __result; }) + +#define __arch_bit_set_8_int(mem, value) \ + __arch_operate_new_n(mem, 1<<(value), b, or) + +#define __arch_bit_set_16_int(mem, value) \ + __arch_operate_new_n(mem, 1<<(value), w, or) + +#define __arch_bit_set_32_int(mem, value) \ + __arch_operate_new_n(mem, 1<<(value), l, or) + +#define __arch_bit_set_64_int(mem, value) \ + (abort (), 0) + +#define __arch_add_64_int(mem, value) \ + (abort (), 0) + +#define atomic_bit_set(mem, value) \ + ((void) __atomic_val_bysize (__arch_bit_set, int, mem, value)) + + +#define __arch_bit_test_set_8_int(mem, value) \ + ({ int32_t __value = 1<<(value), __new, __old; \ + __arch_operate_old_new_n((mem), __value, __old, __new, b, or); \ + __old & __value; }) + +#define __arch_bit_test_set_16_int(mem, value) \ + ({ int32_t __value = 1<<(value), __new, __old; \ + __arch_operate_old_new_n((mem), __value, __old, __new, w, or); \ + __old & __value; }) + +#define __arch_bit_test_set_32_int(mem, value) \ + ({ int32_t __value = 1<<(value), __new, __old; \ + __arch_operate_old_new_n((mem), __value, __old, __new, l, or); \ + __old & __value; }) + +#define __arch_bit_test_set_64_int(mem, value) \ + (abort (), 0) + +#define atomic_bit_test_set(mem, value) \ + __atomic_val_bysize (__arch_bit_test_set, int, mem, value) |