summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVladimir Murzin <vladimir.murzin@arm.com>2022-07-18 12:57:23 +0100
committerWaldemar Brodkorb <wbx@openadk.org>2022-07-20 11:47:17 +0200
commita13b4d8aabe9b525fa280f4a240e89d991d7f106 (patch)
treed99f43894898d04c5f7cbc66a2c90c9ace62be41
parent03fbd941e943976bb92cb392882c2ff7ec218704 (diff)
linuxthreads/arm: fix ldrex/strex loop when built with O0
O0 build result in the following codegen 00000000 <ldrex>: 0: b480 push {r7} 2: b085 sub sp, #20 4: af00 add r7, sp, #0 6: 6078 str r0, [r7, #4] 8: 687b ldr r3, [r7, #4] a: e853 3f00 ldrex r3, [r3] e: 60fb str r3, [r7, #12] 10: 68fb ldr r3, [r7, #12] 12: 4618 mov r0, r3 14: 3714 adds r7, #20 16: 46bd mov sp, r7 18: f85d 7b04 ldr.w r7, [sp], #4 1c: 4770 bx lr 0000001e <strex>: 1e: b480 push {r7} 20: b085 sub sp, #20 22: af00 add r7, sp, #0 24: 6078 str r0, [r7, #4] 26: 6039 str r1, [r7, #0] 28: 687b ldr r3, [r7, #4] 2a: 683a ldr r2, [r7, #0] 2c: e842 3300 strex r3, r3, [r2] 30: 60fb str r3, [r7, #12] 32: 68fb ldr r3, [r7, #12] 34: 4618 mov r0, r3 36: 3714 adds r7, #20 38: 46bd mov sp, r7 3a: f85d 7b04 ldr.w r7, [sp], #4 3e: 4770 bx lr 00000040 <testandset>: 40: b590 push {r4, r7, lr} 42: b083 sub sp, #12 44: af00 add r7, sp, #0 46: 6078 str r0, [r7, #4] 48: 6878 ldr r0, [r7, #4] 4a: f7ff fffe bl 0 <ldrex> 4e: 4603 mov r3, r0 50: 461c mov r4, r3 52: 6879 ldr r1, [r7, #4] 54: 2001 movs r0, #1 56: f7ff fffe bl 1e <strex> 5a: 4603 mov r3, r0 5c: 2b00 cmp r3, #0 5e: d1f3 bne.n 48 <testandset+0x8> 60: 4623 mov r3, r4 62: 4618 mov r0, r3 64: 370c adds r7, #12 66: 46bd mov sp, r7 68: bd90 pop {r4, r7, pc} ARM ARM suggests that LoadExcl/StoreExcl loops are guaranteed to make forward progress only if, for any LoadExcl/StoreExcl loop within a single thread of execution, the software meets all of the following conditions: 1 Between the Load-Exclusive and the Store-Exclusive, there are no explicit memory accesses, preloads, direct or indirect System register writes, address translation instructions, cache or TLB maintenance instructions, exception generating instructions, exception returns, or indirect branches. ... Obviously condition is not met for O0 builds. O2 build (which is highly likely the most common setting) able to do the right thing resulting in 00000000 <ldrex>: 0: e850 0f00 ldrex r0, [r0] 4: 4770 bx lr 6: bf00 nop 00000008 <strex>: 8: e841 0000 strex r0, r0, [r1] c: 4770 bx lr e: bf00 nop 00000010 <testandset>: 10: 2101 movs r1, #1 12: 4603 mov r3, r0 14: e853 0f00 ldrex r0, [r3] 18: e843 1200 strex r2, r1, [r3] 1c: 2a00 cmp r2, #0 1e: d1f9 bne.n 14 <testandset+0x4> 20: 4770 bx lr 22: bf00 nop Rather than depending on level of optimisation implement whole ldrex/strex loop in inline assembly. Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
-rw-r--r--libpthread/linuxthreads/sysdeps/arm/pt-machine.h34
1 files changed, 9 insertions, 25 deletions
diff --git a/libpthread/linuxthreads/sysdeps/arm/pt-machine.h b/libpthread/linuxthreads/sysdeps/arm/pt-machine.h
index fc17e9bc7..b00b10495 100644
--- a/libpthread/linuxthreads/sysdeps/arm/pt-machine.h
+++ b/libpthread/linuxthreads/sysdeps/arm/pt-machine.h
@@ -29,35 +29,19 @@
#endif
#if defined(__thumb2__)
-PT_EI long int ldrex(int *spinlock)
-{
- long int ret;
- __asm__ __volatile__(
- "ldrex %0, [%1]\n"
- : "=r"(ret)
- : "r"(spinlock) : "memory");
- return ret;
-}
-
-PT_EI long int strex(int val, int *spinlock)
-{
- long int ret;
- __asm__ __volatile__(
- "strex %0, %1, [%2]\n"
- : "=r"(ret)
- : "r" (val), "r"(spinlock) : "memory");
- return ret;
-}
-
/* Spinlock implementation; required. */
PT_EI long int
testandset (int *spinlock)
{
- register unsigned int ret;
-
- do {
- ret = ldrex(spinlock);
- } while (strex(1, spinlock));
+ unsigned int ret, tmp, val = 1;
+
+ __asm__ __volatile__ (
+"0: ldrex %0, [%2] \n"
+" strex %1, %3, [%2] \n"
+" cmp %1, #0 \n"
+" bne 0b"
+ : "=&r" (ret), "=&r" (tmp)
+ : "r" (spinlock), "r" (val) : "memory", "cc");
return ret;
}