summaryrefslogtreecommitdiff
path: root/libc
diff options
context:
space:
mode:
authorBernhard Reutner-Fischer <rep.dot.nop@gmail.com>2008-03-26 13:40:36 +0000
committerBernhard Reutner-Fischer <rep.dot.nop@gmail.com>2008-03-26 13:40:36 +0000
commitefce79f09ae6daa77cd322df0d532beec3f445f5 (patch)
treeae936850c5671b8bea0abf0d33bf2196f7abc796 /libc
parent17e961d9c708ab202760ce830f8efe73e91bb129 (diff)
Paul Brook writes:
The attached patch adds support for compiling arm uClibc as pure Thumb code. This is needed because some recent ARM codes do not implement traditional ARM mode. Specifically: * Cortex-M1 - An extremely minimal FPGA based core that only implements Thumb-1 (aka ARMv6-M). * Cortex-M3 - A Thumb-2 only ARMv7-M core. Most of uClibc already builds in Thumb mode, all that is left are a handful of assembly bits. Tested on arm-uclinuxeabi.
Diffstat (limited to 'libc')
-rw-r--r--libc/string/arm/_memcpy.S182
-rw-r--r--libc/string/arm/bcopy.S12
-rw-r--r--libc/string/arm/bzero.S12
-rw-r--r--libc/string/arm/memcmp.S28
-rw-r--r--libc/string/arm/memcpy.S11
-rw-r--r--libc/string/arm/memmove.S11
-rw-r--r--libc/string/arm/memset.S62
-rw-r--r--libc/string/arm/strcmp.S19
-rw-r--r--libc/string/arm/strlen.S25
-rw-r--r--libc/string/arm/strncmp.S33
-rw-r--r--libc/sysdeps/linux/arm/__longjmp.S33
-rw-r--r--libc/sysdeps/linux/arm/bits/arm_asm.h28
-rw-r--r--libc/sysdeps/linux/arm/bsd-_setjmp.S28
-rw-r--r--libc/sysdeps/linux/arm/bsd-setjmp.S26
-rw-r--r--libc/sysdeps/linux/arm/clone.S53
-rw-r--r--libc/sysdeps/linux/arm/crt1.S69
-rw-r--r--libc/sysdeps/linux/arm/crti.S1
-rw-r--r--libc/sysdeps/linux/arm/crtn.S1
-rw-r--r--libc/sysdeps/linux/arm/mmap64.S45
-rw-r--r--libc/sysdeps/linux/arm/setjmp.S27
-rw-r--r--libc/sysdeps/linux/arm/sigrestorer.S7
-rw-r--r--libc/sysdeps/linux/arm/syscall-eabi.S26
-rw-r--r--libc/sysdeps/linux/arm/vfork.S40
23 files changed, 757 insertions, 22 deletions
diff --git a/libc/string/arm/_memcpy.S b/libc/string/arm/_memcpy.S
index 3704f96b5..5ef63c45a 100644
--- a/libc/string/arm/_memcpy.S
+++ b/libc/string/arm/_memcpy.S
@@ -39,7 +39,9 @@
#include <features.h>
#include <endian.h>
+#include <bits/arm_asm.h>
+#if !defined(THUMB1_ONLY)
/*
* This is one fun bit of code ...
* Some easy listening music is suggested while trying to understand this
@@ -77,11 +79,36 @@
.type _memcpy,%function
.align 4
+/* XXX: The Thumb-2 conditionals can be removed if/when we require an
+ assembler that supports unified syntax. */
+.macro copy regs
+#if defined(__thumb2__)
+ ittt ge
+ ldmiage r1!, \regs
+ stmiage r0!, \regs
+#else
+ ldmgeia r1!, \regs
+ stmgeia r0!, \regs
+#endif
+.endm
+
+.macro copydb regs
+#if defined(__thumb2__)
+ ittt ge
+ ldmdbge r1!, \regs
+ stmdbge r0!, \regs
+#else
+ ldmgedb r1!, \regs
+ stmgedb r0!, \regs
+#endif
+.endm
+
_memcpy:
/* Determine copy direction */
cmp r1, r0
bcc .Lmemcpy_backwards
+ IT(tt, eq)
moveq r0, #0 /* Quick abort for len=0 */
#if defined(__USE_BX__)
bxeq lr
@@ -102,7 +129,7 @@ _memcpy:
blt .Lmemcpy_fl12 /* less than 12 bytes (4 from above) */
subs r2, r2, #0x14
blt .Lmemcpy_fl32 /* less than 32 bytes (12 from above) */
- stmdb sp!, {r4} /* borrow r4 */
+ str r4, [sp, #-4]! /* borrow r4 */
/* blat 32 bytes at a time */
/* XXX for really big copies perhaps we should use more registers */
@@ -115,19 +142,22 @@ _memcpy:
bge .Lmemcpy_floop32
cmn r2, #0x10
- ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
- stmgeia r0!, {r3, r4, r12, lr}
+ /* blat a remaining 16 bytes */
+ copy "{r3, r4, r12, lr}"
subge r2, r2, #0x10
- ldmia sp!, {r4} /* return r4 */
+ ldr r4, [sp], #4 /* restore r4 */
.Lmemcpy_fl32:
adds r2, r2, #0x14
/* blat 12 bytes at a time */
.Lmemcpy_floop12:
- ldmgeia r1!, {r3, r12, lr}
- stmgeia r0!, {r3, r12, lr}
+ copy "{r3, r12, lr}"
+#if defined(__thumb2__)
+ subsge r2, r2, #0x0c
+#else
subges r2, r2, #0x0c
+#endif
bge .Lmemcpy_floop12
.Lmemcpy_fl12:
@@ -135,26 +165,48 @@ _memcpy:
blt .Lmemcpy_fl4
subs r2, r2, #4
+ IT(tt, lt)
ldrlt r3, [r1], #4
strlt r3, [r0], #4
- ldmgeia r1!, {r3, r12}
- stmgeia r0!, {r3, r12}
+ copy "{r3, r12}"
subge r2, r2, #4
.Lmemcpy_fl4:
/* less than 4 bytes to go */
adds r2, r2, #4
+#if defined(__thumb2__)
+ it eq
+ popeq {r0, pc} /* done */
+#elif defined(__ARM_ARCH_4T__)
+ ldmeqia sp!, {r0, r3} /* done */
+ bxeq r3
+#else
ldmeqia sp!, {r0, pc} /* done */
+#endif
/* copy the crud byte at a time */
cmp r2, #2
ldrb r3, [r1], #1
strb r3, [r0], #1
+#if defined(__thumb2__)
+ itt ge
+ ldrbge r3, [r1], #1
+ strbge r3, [r0], #1
+ itt gt
+ ldrbgt r3, [r1], #1
+ strbgt r3, [r0], #1
+#else
ldrgeb r3, [r1], #1
strgeb r3, [r0], #1
ldrgtb r3, [r1], #1
strgtb r3, [r0], #1
+#endif
+#if defined(__ARM_ARCH_4T__)
+ ldmia sp!, {r0, r3}
+ bx r3
+#else
ldmia sp!, {r0, pc}
+#endif
/* erg - unaligned destination */
.Lmemcpy_fdestul:
@@ -164,10 +216,19 @@ _memcpy:
/* align destination with byte copies */
ldrb r3, [r1], #1
strb r3, [r0], #1
+#if defined(__thumb2__)
+ itt ge
+ ldrbge r3, [r1], #1
+ strbge r3, [r0], #1
+ itt gt
+ ldrbgt r3, [r1], #1
+ strbgt r3, [r0], #1
+#else
ldrgeb r3, [r1], #1
strgeb r3, [r0], #1
ldrgtb r3, [r1], #1
strgtb r3, [r0], #1
+#endif
subs r2, r2, r12
blt .Lmemcpy_fl4 /* less the 4 bytes */
@@ -370,12 +431,12 @@ _memcpy:
.Lmemcpy_bl32:
cmn r2, #0x10
- ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
- stmgedb r0!, {r3, r4, r12, lr}
+ /* blat a remaining 16 bytes */
+ copydb "{r3, r4, r12, lr}"
subge r2, r2, #0x10
adds r2, r2, #0x14
- ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */
- stmgedb r0!, {r3, r12, lr}
+ /* blat a remaining 12 bytes */
+ copydb "{r3, r12, lr}"
subge r2, r2, #0x0c
ldmia sp!, {r4, lr}
@@ -383,15 +444,16 @@ _memcpy:
adds r2, r2, #8
blt .Lmemcpy_bl4
subs r2, r2, #4
+ IT(tt, lt)
ldrlt r3, [r1, #-4]!
strlt r3, [r0, #-4]!
- ldmgedb r1!, {r3, r12}
- stmgedb r0!, {r3, r12}
+ copydb "{r3, r12}"
subge r2, r2, #4
.Lmemcpy_bl4:
/* less than 4 bytes to go */
adds r2, r2, #4
+ IT(t, eq)
#if defined(__USE_BX__)
bxeq lr
#else
@@ -401,10 +463,19 @@ _memcpy:
cmp r2, #2
ldrb r3, [r1, #-1]!
strb r3, [r0, #-1]!
+#ifdef __thumb2__
+ itt ge
+ ldrbge r3, [r1, #-1]!
+ strbge r3, [r0, #-1]!
+ itt gt
+ ldrbgt r3, [r1, #-1]!
+ strbgt r3, [r0, #-1]!
+#else
ldrgeb r3, [r1, #-1]!
strgeb r3, [r0, #-1]!
ldrgtb r3, [r1, #-1]!
strgtb r3, [r0, #-1]!
+#endif
#if defined(__USE_BX__)
bx lr
#else
@@ -417,10 +488,19 @@ _memcpy:
/* align destination with byte copies */
ldrb r3, [r1, #-1]!
strb r3, [r0, #-1]!
+#ifdef __thumb2__
+ itt ge
+ ldrbge r3, [r1, #-1]!
+ strbge r3, [r0, #-1]!
+ itt gt
+ ldrbgt r3, [r1, #-1]!
+ strbgt r3, [r0, #-1]!
+#else
ldrgeb r3, [r1, #-1]!
strgeb r3, [r0, #-1]!
ldrgtb r3, [r1, #-1]!
strgtb r3, [r0, #-1]!
+#endif
subs r2, r2, r12
blt .Lmemcpy_bl4 /* less than 4 bytes to go */
ands r12, r1, #3
@@ -591,3 +671,77 @@ _memcpy:
.Lmemcpy_bsrcul1l4:
add r1, r1, #1
b .Lmemcpy_bl4
+
+#else /* THUMB1_ONLY */
+
+/* This is a fairly dumb implementation for when we can't use the 32-bit code
+ above. */
+.text
+.global _memcpy
+.hidden _memcpy
+.type _memcpy,%function
+.align 4
+.thumb
+_memcpy:
+ push {r0, r4}
+ cmp r2, #0
+ beq .Lmemcpy_exit
+ @ See if we have overlapping regions, and need to reverse the
+ @ direction of the copy
+ cmp r0, r1
+ bls .Lmemcpy_forwards
+ add r4, r1, r2
+ cmp r0, r4
+ bcc .Lmemcpy_backwards
+.Lmemcpy_forwards:
+ /* Forwards. */
+ mov r3, r0
+ eor r3, r1
+ mov r4, #3
+ tst r3, r4
+ bne .Lmemcpy_funaligned
+ cmp r2, #8
+ bcc .Lmemcpy_funaligned
+1: @ copy up to the first word boundary.
+ tst r0, r4
+ beq 1f
+ ldrb r3, [r1]
+ add r1, r1, #1
+ strb r3, [r0]
+ add r0, r0, #1
+ sub r2, r2, #1
+ b 1b
+1: @ Copy aligned words
+ ldr r3, [r1]
+ add r1, r1, #4
+ str r3, [r0]
+ add r0, r0, #4
+ sub r2, r2, #4
+ cmp r2, #4
+ bcs 1b
+ cmp r2, #0
+ beq .Lmemcpy_exit
+.Lmemcpy_funaligned:
+1:
+ ldrb r3, [r1]
+ add r1, r1, #1
+ strb r3, [r0]
+ add r0, r0, #1
+ sub r2, r2, #1
+ bne 1b
+.Lmemcpy_exit:
+ pop {r0, r4}
+ bx lr
+
+.Lmemcpy_backwards:
+ add r0, r0, r2
+ add r1, r1, r2
+1:
+ sub r0, r0, #1
+ sub r1, r1, #1
+ ldrb r3, [r1]
+ strb r3, [r0]
+ sub r2, r2, #1
+ bne 1b
+ b .Lmemcpy_exit
+#endif
diff --git a/libc/string/arm/bcopy.S b/libc/string/arm/bcopy.S
index db3c9e6c1..2d6e90d13 100644
--- a/libc/string/arm/bcopy.S
+++ b/libc/string/arm/bcopy.S
@@ -40,6 +40,7 @@
/* bcopy = memcpy/memmove with arguments reversed. */
#include <features.h>
+#include <bits/arm_asm.h>
#ifdef __UCLIBC_SUSV3_LEGACY__
@@ -48,12 +49,23 @@
.type bcopy,%function
.align 4
+#if defined(__thumb__) && !defined(__thumb2__)
+.thumb_func
+bcopy:
+ push {r2, lr}
+ mov ip, r0
+ mov r0, r1
+ mov r1, ip
+ bl _memcpy
+ POP_RET
+#else
bcopy:
/* switch the source and destination registers */
eor r0, r1, r0
eor r1, r0, r1
eor r0, r1, r0
b _memcpy /* (PLT) */
+#endif
.size bcopy,.-bcopy
diff --git a/libc/string/arm/bzero.S b/libc/string/arm/bzero.S
index ee49cf560..e576a12e9 100644
--- a/libc/string/arm/bzero.S
+++ b/libc/string/arm/bzero.S
@@ -38,6 +38,7 @@
*/
#include <features.h>
+#include <bits/arm_asm.h>
#ifdef __UCLIBC_SUSV3_LEGACY__
@@ -46,10 +47,21 @@
.type bzero,%function
.align 4
+#if defined(__thumb__) && !defined(__thumb2__)
+.thumb_func
+bzero:
+ push {r2, lr}
+ mov r2, r1
+ mov r1, #0
+ bl HIDDEN_JUMPTARGET(memset)
+ POP_RET
+#else
+
bzero:
mov r2, r1
mov r1, #0
b HIDDEN_JUMPTARGET(memset)
+#endif
.size bzero,.-bzero
diff --git a/libc/string/arm/memcmp.S b/libc/string/arm/memcmp.S
index 4f78b5128..65409f43a 100644
--- a/libc/string/arm/memcmp.S
+++ b/libc/string/arm/memcmp.S
@@ -30,15 +30,41 @@
*/
#include <features.h>
+#include <bits/arm_asm.h>
.text
.global memcmp
.type memcmp,%function
.align 4
+#if defined(THUMB1_ONLY)
+.thumb_func
+memcmp:
+ cmp r2, #0
+ bne 1f
+ mov r0, #0
+ bx lr
+1:
+ push {r4}
+ add r4, r0, r2
+2:
+ ldrb r2, [r0]
+ add r0, r0, #1
+ ldrb r3, [r1]
+ add r1, r1, #1
+ cmp r4, r0
+ beq 3f
+ cmp r2, r3
+ beq 2b
+3:
+ sub r0, r2, r3
+ pop {r4}
+ bx lr
+#else
memcmp:
/* if ((len - 1) < 0) return 0 */
subs r2, r2, #1
+ IT(tt, mi)
movmi r0, #0
#if defined(__USE_BX__)
bxmi lr
@@ -51,6 +77,7 @@ memcmp:
ldrb r2, [r0], #1
ldrb r3, [r1], #1
cmp ip, r0
+ IT(t, cs)
cmpcs r2, r3
beq 1b
sub r0, r2, r3
@@ -59,6 +86,7 @@ memcmp:
#else
mov pc, lr
#endif
+#endif
.size memcmp,.-memcmp
diff --git a/libc/string/arm/memcpy.S b/libc/string/arm/memcpy.S
index 7a5b6ab76..d2013d211 100644
--- a/libc/string/arm/memcpy.S
+++ b/libc/string/arm/memcpy.S
@@ -38,16 +38,23 @@
*/
#include <features.h>
+#include <bits/arm_asm.h>
.text
.global memcpy
.type memcpy,%function
.align 4
+#if defined(__thumb__) && !defined(__thumb2__)
+.thumb_func
memcpy:
- stmfd sp!, {r0, lr}
+ push {r0, lr}
bl _memcpy
- ldmfd sp!, {r0, pc}
+ POP_RET
+#else
+memcpy:
+ b _memcpy
+#endif
.size memcpy,.-memcpy
diff --git a/libc/string/arm/memmove.S b/libc/string/arm/memmove.S
index 45cd9b4d4..c11b98dd4 100644
--- a/libc/string/arm/memmove.S
+++ b/libc/string/arm/memmove.S
@@ -38,16 +38,23 @@
*/
#include <features.h>
+#include <bits/arm_asm.h>
.text
.global memmove
.type memmove,%function
.align 4
+#if defined(__thumb__) && !defined(__thumb2__)
+.thumb_func
memmove:
- stmfd sp!, {r0, lr}
+ push {r2, lr}
bl _memcpy
- ldmfd sp!, {r0, pc}
+ POP_RET
+#else
+memmove:
+ b _memcpy
+#endif
.size memmove,.-memmove
diff --git a/libc/string/arm/memset.S b/libc/string/arm/memset.S
index 16bfe0dc5..66aa6039c 100644
--- a/libc/string/arm/memset.S
+++ b/libc/string/arm/memset.S
@@ -19,12 +19,52 @@
#include <features.h>
#include <sys/syscall.h>
+#include <bits/arm_asm.h>
.text
.global memset
.type memset,%function
.align 4
+#if defined(THUMB1_ONLY)
+.thumb_func
+memset:
+ mov ip, r0
+ cmp r2, #8 @ at least 8 bytes to do?
+ bcc 2f
+
+ lsl r3, r1, #8
+ orr r1, r3
+ lsl r3, r1, #16
+ orr r1, r3
+
+ mov r3, #3
+1: @ Fill up to the first word boundary
+ tst r0, r3
+ beq 1f
+ strb r1, [r0]
+ add r0, r0, #1
+ sub r2, r2, #1
+ b 1b
+1: @ Fill aligned words
+ str r1, [r0]
+ add r0, r0, #4
+ sub r2, r2, #4
+ cmp r2, #4
+ bcs 1b
+
+2: @ Fill the remaining bytes
+ cmp r2, #0
+ beq 2f
+1:
+ strb r1, [r0]
+ add r0, r0, #1
+ sub r2, r2, #1
+ bne 1b
+2:
+ mov r0, ip
+ bx lr
+#else
memset:
mov a4, a1
cmp a3, $8 @ at least 8 bytes to do?
@@ -33,8 +73,14 @@ memset:
orr a2, a2, a2, lsl $16
1:
tst a4, $3 @ aligned yet?
+#if defined(__thumb2__)
+ itt ne
+ strbne a2, [a4], $1
+ subne a3, a3, $1
+#else
strneb a2, [a4], $1
subne a3, a3, $1
+#endif
bne 1b
mov ip, a2
1:
@@ -51,16 +97,30 @@ memset:
stmia a4!, {a2, ip}
sub a3, a3, $8
cmp a3, $8 @ 8 bytes still to do?
+#if defined(__thumb2__)
+ itt ge
+ stmiage a4!, {a2, ip}
+ subge a3, a3, $8
+#else
stmgeia a4!, {a2, ip}
subge a3, a3, $8
+#endif
bge 1b
2:
movs a3, a3 @ anything left?
+ IT(t, eq)
#if defined(__USE_BX__)
bxeq lr
#else
moveq pc, lr @ nope
#endif
+#if defined (__thumb2__)
+1:
+ strb a2, [a4], #1
+ subs a3, a3, #1
+ bne 1b
+ bx lr
+#else
rsb a3, a3, $7
add pc, pc, a3, lsl $2
mov r0, r0
@@ -76,6 +136,8 @@ memset:
#else
mov pc, lr
#endif
+#endif
+#endif
.size memset,.-memset
diff --git a/libc/string/arm/strcmp.S b/libc/string/arm/strcmp.S
index 89aa38874..97363c1c2 100644
--- a/libc/string/arm/strcmp.S
+++ b/libc/string/arm/strcmp.S
@@ -30,17 +30,35 @@
*/
#include <features.h>
+#include <bits/arm_asm.h>
.text
.global strcmp
.type strcmp,%function
.align 4
+#if defined(__thumb__) && !defined(__thumb2__)
+.thumb_func
+strcmp:
+1:
+ ldrb r2, [r0]
+ add r0, r0, #1
+ ldrb r3, [r1]
+ add r1, r1, #1
+ cmp r2, #0
+ beq 2f
+ cmp r2, r3
+ beq 1b
+2:
+ sub r0, r2, r3
+ bx lr
+#else
strcmp:
1:
ldrb r2, [r0], #1
ldrb r3, [r1], #1
cmp r2, #1
+ IT(t, cs)
cmpcs r2, r3
beq 1b
sub r0, r2, r3
@@ -49,6 +67,7 @@ strcmp:
#else
mov pc, lr
#endif
+#endif
.size strcmp,.-strcmp
diff --git a/libc/string/arm/strlen.S b/libc/string/arm/strlen.S
index 5b4b02e17..949e918f4 100644
--- a/libc/string/arm/strlen.S
+++ b/libc/string/arm/strlen.S
@@ -20,6 +20,7 @@
#include <features.h>
#include <endian.h>
#include <sys/syscall.h>
+#include <bits/arm_asm.h>
/* size_t strlen(const char *S)
* entry: r0 -> string
@@ -31,6 +32,19 @@
.type strlen,%function
.align 4
+#if defined(THUMB1_ONLY)
+/* A simple implementation for when the ARM implementation can't be used. */
+.thumb_func
+strlen:
+ mov r2, #0
+1:
+ ldrb r1, [r0, r2]
+ add r2, r2, #1
+ cmp r1, #0
+ bne 1b
+ sub r0, r2, #1
+ bx lr
+#else
strlen:
bic r1, r0, $3 @ addr of word containing first byte
ldr r2, [r1], $4 @ get the first word
@@ -41,38 +55,48 @@ strlen:
#if __BYTE_ORDER == __BIG_ENDIAN
orr r2, r2, $0xff000000 @ set this byte to non-zero
subs r3, r3, $1 @ any more to do?
+ IT(t, gt)
orrgt r2, r2, $0x00ff0000 @ if so, set this byte
subs r3, r3, $1 @ more?
+ IT(t, gt)
orrgt r2, r2, $0x0000ff00 @ then set.
#else
orr r2, r2, $0x000000ff @ set this byte to non-zero
subs r3, r3, $1 @ any more to do?
+ IT(t, gt)
orrgt r2, r2, $0x0000ff00 @ if so, set this byte
subs r3, r3, $1 @ more?
+ IT(t, gt)
orrgt r2, r2, $0x00ff0000 @ then set.
#endif
Laligned: @ here, we have a word in r2. Does it
tst r2, $0x000000ff @ contain any zeroes?
+ IT(tttt, ne)
tstne r2, $0x0000ff00 @
tstne r2, $0x00ff0000 @
tstne r2, $0xff000000 @
addne r0, r0, $4 @ if not, the string is 4 bytes longer
+ IT(t, ne)
ldrne r2, [r1], $4 @ and we continue to the next word
bne Laligned @
Llastword: @ drop through to here once we find a
#if __BYTE_ORDER == __BIG_ENDIAN
tst r2, $0xff000000 @ word that has a zero byte in it
+ IT(tttt, ne)
addne r0, r0, $1 @
tstne r2, $0x00ff0000 @ and add up to 3 bytes on to it
addne r0, r0, $1 @
tstne r2, $0x0000ff00 @ (if first three all non-zero, 4th
+ IT(t, ne)
addne r0, r0, $1 @ must be zero)
#else
tst r2, $0x000000ff @
+ IT(tttt, ne)
addne r0, r0, $1 @
tstne r2, $0x0000ff00 @ and add up to 3 bytes on to it
addne r0, r0, $1 @
tstne r2, $0x00ff0000 @ (if first three all non-zero, 4th
+ IT(t, ne)
addne r0, r0, $1 @ must be zero)
#endif
#if defined(__USE_BX__)
@@ -80,6 +104,7 @@ Llastword: @ drop through to here once we find a
#else
mov pc,lr
#endif
+#endif
.size strlen,.-strlen
diff --git a/libc/string/arm/strncmp.S b/libc/string/arm/strncmp.S
index eaf0620b4..8487639c8 100644
--- a/libc/string/arm/strncmp.S
+++ b/libc/string/arm/strncmp.S
@@ -30,15 +30,46 @@
*/
#include <features.h>
+#include <bits/arm_asm.h>
.text
.global strncmp
.type strncmp,%function
.align 4
+#if defined(THUMB1_ONLY)
+.thumb_func
strncmp:
/* if (len == 0) return 0 */
cmp r2, #0
+ bne 1f
+ mov r0, #0
+ bx lr
+1:
+ push {r4}
+
+ /* ip == last src address to compare */
+ add r4, r0, r2
+2:
+ cmp r4, r0
+ beq 3f
+ ldrb r2, [r0]
+ add r0, r0, #1
+ ldrb r3, [r1]
+ add r1, r1, #1
+ cmp r2, #0
+ beq 3f
+ cmp r2, r3
+ beq 2b
+3:
+ sub r0, r2, r3
+ pop {r4}
+ bx lr
+#else
+strncmp:
+ /* if (len == 0) return 0 */
+ cmp r2, #0
+ IT(tt, eq)
moveq r0, #0
#if defined(__USE_BX__)
bxeq lr
@@ -53,6 +84,7 @@ strncmp:
ldrb r2, [r0], #1
ldrb r3, [r1], #1
cmp ip, r0
+ IT(tt, cs)
cmpcs r2, #1
cmpcs r2, r3
beq 1b
@@ -62,6 +94,7 @@ strncmp:
#else
mov pc, lr
#endif
+#endif
.size strncmp,.-strncmp
diff --git a/libc/sysdeps/linux/arm/__longjmp.S b/libc/sysdeps/linux/arm/__longjmp.S
index 4261797f8..5faf4ece9 100644
--- a/libc/sysdeps/linux/arm/__longjmp.S
+++ b/libc/sysdeps/linux/arm/__longjmp.S
@@ -18,6 +18,7 @@
02111-1307 USA. */
#include <features.h>
+#include <bits/arm_asm.h>
#define _SETJMP_H
#define _ASM
#include <bits/setjmp.h>
@@ -26,13 +27,44 @@
.global __longjmp
.type __longjmp,%function
.align 2
+#if defined(THUMB1_ONLY)
+.thumb_func
+__longjmp:
+ mov r2, r0
+ movs r0, r1
+ /* can't let setjmp() return zero! */
+ bne 1f
+ mov r0, #1
+1:
+ mov r1, r2
+ /* Restore registers, shuffling them through low regs. */
+ add r2, #(4 * 4)
+ ldmia r2!, {r4, r5, r6, r7}
+ mov r8, r4
+ mov r9, r5
+ mov sl, r6
+ mov fp, r7
+ ldmia r2!, {r4, r5}
+ mov sp, r4
+ mov lr, r5
+ ldmia r1!, {r4, r5, r6, r7}
+ bx lr
+#else
__longjmp:
mov ip, r0 /* save jmp_buf pointer */
movs r0, r1 /* get the return value in place */
+ IT(t, eq)
moveq r0, #1 /* can't let setjmp() return zero! */
+#if defined(__thumb2__)
+ /* Thumb-2 does not allow loading sp with ldm. */
+ ldmia ip!, {v1-v6, sl, fp}
+ ldr sp, [ip], #4
+ ldr lr, [ip], #4
+#else
ldmia ip!, {v1-v6, sl, fp, sp, lr}
+#endif
#if defined __UCLIBC_HAS_FLOATS__ && ! defined __UCLIBC_HAS_SOFT_FLOAT__
#ifdef __VFP_FP__
@@ -76,6 +108,7 @@ __longjmp:
#else
mov pc, lr
#endif
+#endif
.size __longjmp,.-__longjmp
libc_hidden_def(__longjmp)
diff --git a/libc/sysdeps/linux/arm/bits/arm_asm.h b/libc/sysdeps/linux/arm/bits/arm_asm.h
new file mode 100644
index 000000000..1d87df6eb
--- /dev/null
+++ b/libc/sysdeps/linux/arm/bits/arm_asm.h
@@ -0,0 +1,28 @@
+/* Various definitons used the the ARM uClibc assembly code. */
+#ifndef _ARM_ASM_H
+#define _ARM_ASM_H
+
+#ifdef __thumb2__
+.thumb
+.syntax unified
+#define IT(t, cond) i##t cond
+#else
+/* XXX: This can be removed if/when we require an assembler that supports
+ unified assembly syntax. */
+#define IT(t, cond)
+/* Code to return from a thumb function stub. */
+#ifdef __ARM_ARCH_4T__
+#define POP_RET pop {r2, pc}
+#else
+#define POP_RET pop {r2, r3}; bx r3
+#endif
+#endif
+
+#if defined(__ARM_ARCH_6M__)
+/* Force arm mode to flush out errors on M profile cores. */
+#undef IT
+#define THUMB1_ONLY 1
+#endif
+
+#endif /* _ARM_ASM_H */
+
diff --git a/libc/sysdeps/linux/arm/bsd-_setjmp.S b/libc/sysdeps/linux/arm/bsd-_setjmp.S
index f70073266..a05570df7 100644
--- a/libc/sysdeps/linux/arm/bsd-_setjmp.S
+++ b/libc/sysdeps/linux/arm/bsd-_setjmp.S
@@ -17,13 +17,38 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
-/* This just does a tail-call to `__sigsetjmp (ARG, 1)'.
+#include <bits/arm_asm.h>
+
+/* This just does a tail-call to `__sigsetjmp (ARG, 0)'.
We cannot do it in C because it must be a tail-call, so frame-unwinding
in setjmp doesn't clobber the state restored by longjmp. */
.global _setjmp
.type _setjmp,%function
.align 2
+#if defined(THUMB1_ONLY)
+.thumb_func
+_setjmp:
+ mov r1, #0
+#ifdef __PIC__
+ ldr r3, .L_GOT
+ adr r2, .L_GOT
+ add r3, r2, r3
+
+ ldr r2, .L_GOT+4 /* __sigsetjmp */
+ ldr r2, [r2, r3]
+ bx r2
+
+ .align 2
+.L_GOT:
+ .word _GLOBAL_OFFSET_TABLE_-.L_GOT
+ .word __sigsetjmp(GOT)
+#else
+ ldr r2, =__sigsetjmp
+ bx r2
+.pool
+#endif
+#else
_setjmp:
mov r1, #0
#ifdef __PIC__
@@ -31,5 +56,6 @@ _setjmp:
#else
b __sigsetjmp
#endif
+#endif
.size _setjmp,.-_setjmp
diff --git a/libc/sysdeps/linux/arm/bsd-setjmp.S b/libc/sysdeps/linux/arm/bsd-setjmp.S
index 6253c6675..d7ca72ad5 100644
--- a/libc/sysdeps/linux/arm/bsd-setjmp.S
+++ b/libc/sysdeps/linux/arm/bsd-setjmp.S
@@ -17,6 +17,8 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
+#include <bits/arm_asm.h>
+
/* This just does a tail-call to `__sigsetjmp (ARG, 1)'.
We cannot do it in C because it must be a tail-call, so frame-unwinding
in setjmp doesn't clobber the state restored by longjmp. */
@@ -24,6 +26,29 @@
.global setjmp
.type setjmp,%function
.align 2
+#if defined(THUMB1_ONLY)
+.thumb_func
+setjmp:
+ mov r1, #1
+#ifdef __PIC__
+ ldr r3, .L_GOT
+ adr r2, .L_GOT
+ add r3, r2, r3
+
+ ldr r2, .L_GOT+4 /* __sigsetjmp */
+ ldr r2, [r2, r3]
+ bx r2
+
+ .align 2
+.L_GOT:
+ .word _GLOBAL_OFFSET_TABLE_-.L_GOT
+ .word __sigsetjmp(GOT)
+#else
+ ldr r2, =__sigsetjmp
+ bx r2
+.pool
+#endif
+#else
setjmp:
mov r1, #1
#ifdef __PIC__
@@ -31,5 +56,6 @@ setjmp:
#else
b __sigsetjmp
#endif
+#endif
.size setjmp,.-setjmp
diff --git a/libc/sysdeps/linux/arm/clone.S b/libc/sysdeps/linux/arm/clone.S
index a5a847d1e..d9483735d 100644
--- a/libc/sysdeps/linux/arm/clone.S
+++ b/libc/sysdeps/linux/arm/clone.S
@@ -24,17 +24,66 @@
#include <features.h>
#include <bits/errno.h>
#include <sys/syscall.h>
+#include <bits/arm_asm.h>
-#ifdef __NR_clone
+#if defined(__NR_clone)
/* int clone(int (*fn)(void *arg), void *child_stack, int flags, void *arg); */
.text
.global clone
.type clone,%function
.align 2
+#if defined(THUMB1_ONLY)
+.thumb_func
clone:
@ sanity check args
cmp r0, #0
+ beq __einval
+ cmp r1, #0
+ beq __einval
+
+ @ insert the args onto the new stack
+ sub r1, r1, #8
+ str r3, [r1, #4]
+ @ save the function pointer as the 0th element
+ str r0, [r1]
+
+ @ do the system call
+ @ get flags
+ mov r0, r2
+ @ new sp is already in r1
+ DO_CALL (clone)
+ movs a1, a1
+ blt __error
+ beq 1f
+ bx lr
+1:
+
+ @ pick the function arg and call address off the stack and execute
+ ldr r0, [sp, #4]
+ ldr r1, [sp]
+ bl 2f @ blx r1
+
+ @ and we are done, passing the return value through r0
+ bl HIDDEN_JUMPTARGET(_exit)
+ @ Should never return
+ b .
+
+2:
+ bx r1
+
+__einval:
+ ldr r0, =-EINVAL
+__error:
+ push {r3, lr}
+ bl __syscall_error
+ POP_RET
+.pool
+#else
+clone:
+ @ sanity check args
+ cmp r0, #0
+ IT(te, ne)
cmpne r1, #0
moveq r0, #-EINVAL
beq __error
@@ -52,6 +101,7 @@ clone:
DO_CALL (clone)
movs a1, a1
blt __error
+ IT(t, ne)
#if defined(__USE_BX__)
bxne lr
#else
@@ -68,6 +118,7 @@ clone:
__error:
b __syscall_error
+#endif
.size clone,.-clone
diff --git a/libc/sysdeps/linux/arm/crt1.S b/libc/sysdeps/linux/arm/crt1.S
index 8d4d230a7..082348e39 100644
--- a/libc/sysdeps/linux/arm/crt1.S
+++ b/libc/sysdeps/linux/arm/crt1.S
@@ -94,6 +94,7 @@ ARM register quick reference:
*/
#include <features.h>
+#include <bits/arm_asm.h>
.text
.globl _start
@@ -105,6 +106,73 @@ ARM register quick reference:
.weak _fini
#endif
+#if defined(THUMB1_ONLY)
+.thumb_func
+_start:
+ /* Clear the frame pointer since this is the outermost frame. */
+ mov r3, #0
+ mov fp, r3
+
+#ifdef __ARCH_USE_MMU__
+ /* Pop argc off the stack and save a pointer to argv */
+ pop {a2}
+ mov a3, sp
+#else
+ /*
+ * uClinux/arm stacks look a little different from normal
+ * MMU-full Linux/arm stacks (for no good reason)
+ */
+ /* pull argc and argv off the stack. We are going to push 3
+ * arguments, so pop one here to maintain doubleword alignment. */
+ pop {a2}
+ ldr a3, [sp]
+#endif
+
+ /* Push stack limit and rtld_fini */
+ push {a1, a3}
+
+#ifdef __PIC__
+ ldr r4, .L_GOT
+.L_GOT_OFF:
+ adr r5, .L_GOT
+ add r4, r5, r4
+
+ ldr r5, .L_GOT+4 /* _fini */
+ ldr a1, [r4, r5]
+ push {a1} /* Push _fini */
+
+ ldr r5, .L_GOT+8 /* _init */
+ ldr a4, [r4, r5]
+
+ ldr r5, .L_GOT+12 /* main */
+ ldr a1, [r4, r5]
+
+#else
+ /* Fetch address of fini */
+ ldr r4, =_fini
+ /* Push fini */
+ push {r4}
+
+ /* Set up the other arguments in registers */
+ ldr a1, =main
+ ldr a4, =_init
+#endif
+ /* __uClibc_main (main, argc, argv, init, fini, rtld_fini, stack_end) */
+ /* Let the libc call main and exit with its return code. */
+ bl __uClibc_main
+
+ /* should never get here....*/
+ bl abort
+.pool
+
+#ifdef __PIC__
+.L_GOT:
+ .word _GLOBAL_OFFSET_TABLE_-.L_GOT
+ .word _fini(GOT)
+ .word _init(GOT)
+ .word main(GOT)
+#endif
+#else /* !THUMB1_ONLY */
_start:
/* Clear the frame pointer and link register since this is the outermost frame. */
mov fp, #0
@@ -175,6 +243,7 @@ _start:
.word _init(GOT)
.word main(GOT)
#endif
+#endif
/* Define a symbol for the first piece of initialized data. */
.data
diff --git a/libc/sysdeps/linux/arm/crti.S b/libc/sysdeps/linux/arm/crti.S
index 4835b8331..e335b7140 100644
--- a/libc/sysdeps/linux/arm/crti.S
+++ b/libc/sysdeps/linux/arm/crti.S
@@ -1,5 +1,6 @@
.file "initfini.c"
+#include <bits/arm_asm.h>
.section .init
.global _init
.type _init, %function
diff --git a/libc/sysdeps/linux/arm/crtn.S b/libc/sysdeps/linux/arm/crtn.S
index 7a1ca1ab1..de01b38dc 100644
--- a/libc/sysdeps/linux/arm/crtn.S
+++ b/libc/sysdeps/linux/arm/crtn.S
@@ -1,5 +1,6 @@
.file "initfini.c"
+#include <bits/arm_asm.h>
.section .init
.global _init
.type _init, %function
diff --git a/libc/sysdeps/linux/arm/mmap64.S b/libc/sysdeps/linux/arm/mmap64.S
index ba8cb2fca..73d6b51ce 100644
--- a/libc/sysdeps/linux/arm/mmap64.S
+++ b/libc/sysdeps/linux/arm/mmap64.S
@@ -20,6 +20,7 @@
#define _ERRNO_H
#include <bits/errno.h>
#include <sys/syscall.h>
+#include <bits/arm_asm.h>
#if defined __UCLIBC_HAS_LFS__ && defined __NR_mmap2
@@ -28,9 +29,46 @@
.global mmap64
.type mmap64,%function
.align 2
-mmap64:
#ifdef __ARM_EABI__
+#if defined(THUMB1_ONLY)
+.thumb_func
+mmap64:
+#ifdef __ARMEB__
+/* Offsets are after pushing 3 words. */
+# define LOW_OFFSET 12 + 8 + 4
+# define HIGH_OFFSET 12 + 8 + 0
+#else
+# define LOW_OFFSET 12 + 8 + 0
+# define HIGH_OFFSET 12 + 8 + 4
+#endif
+ push {r4, r5, r6}
+ ldr r6, [sp, $LOW_OFFSET]
+ ldr r5, [sp, $HIGH_OFFSET]
+ lsl r4, r6, #20 @ check that offset is page-aligned
+ bne .Linval
+ lsr r4, r5, #12 @ check for overflow
+ bne .Linval
+ @ compose page offset
+ lsr r6, r6, #12
+ lsl r5, r5, #20
+ orr r5, r5, r6
+ ldr r4, [sp, #8] @ load fd
+ DO_CALL (mmap2)
+ ldr r1, =0xfffff000
+ cmp r0, r1
+ bcs .Lerror
+ bx lr
+.Linval:
+ ldr r0, =-EINVAL
+ pop {r4, r5, r6}
+.Lerror:
+ push {r3, lr}
+ bl __syscall_error
+ POP_RET
+.pool
+#else /* !THUMB1_ONLY */
+mmap64:
#ifdef __ARMEB__
# define LOW_OFFSET 8 + 4
/* The initial + 4 is for the stack postdecrement. */
@@ -45,6 +83,7 @@ mmap64:
str r4, [sp, #-4]!
movs r4, ip, lsl $20 @ check that offset is page-aligned
mov ip, ip, lsr $12
+ IT(t, eq)
moveqs r4, r5, lsr $12 @ check for overflow
bne .Linval
ldr r4, [sp, $8] @ load fd
@@ -52,6 +91,7 @@ mmap64:
DO_CALL (mmap2)
cmn r0, $4096
ldmfd sp!, {r4, r5}
+ IT(t, cc)
#if defined(__USE_BX__)
bxcc lr
#else
@@ -62,7 +102,8 @@ mmap64:
mov r0, $-EINVAL
ldmfd sp!, {r4, r5}
b __syscall_error
-#else
+#endif
+#else /* !__ARM_EABI__ */
stmfd sp!, {r4, r5, lr}
ldr r5, [sp, $16]
ldr r4, [sp, $12]
diff --git a/libc/sysdeps/linux/arm/setjmp.S b/libc/sysdeps/linux/arm/setjmp.S
index 8d15b8324..2df7d551a 100644
--- a/libc/sysdeps/linux/arm/setjmp.S
+++ b/libc/sysdeps/linux/arm/setjmp.S
@@ -18,15 +18,41 @@
02111-1307 USA. */
#include <features.h>
+#include <bits/arm_asm.h>
.global __sigsetjmp
.type __sigsetjmp,%function
.align 2
+#if defined(THUMB1_ONLY)
+.thumb_func
__sigsetjmp:
+ push {r3, r4, r5, r6, r7, lr}
mov ip, r0
+ stmia r0!, {r4, r5, r6, r7}
+ mov r2, r8
+ mov r3, r9
+ mov r4, sl
+ mov r5, fp
+ add r6, sp, #(6 * 4)
+ mov r7, lr
+ stmia r0!, {r2, r3, r4, r5, r6, r7}
+ mov r0, ip
+ bl __sigjmp_save
+ pop {r3, r4, r5, r6, r7, pc}
+
+#else
+__sigsetjmp:
+ /* Save registers */
+ mov ip, r0
+#if defined(__thumb2__)
+ stmia ip!, {v1-v6, sl, fp}
+ movs r2, sp
+ stmia ip!, {r2, lr}
+#else
/* Save registers */
stmia ip!, {v1-v6, sl, fp, sp, lr}
+#endif
#if defined __UCLIBC_HAS_FLOATS__ && ! defined __UCLIBC_HAS_SOFT_FLOAT__
# ifdef __VFP_FP__
/* Store the VFP registers. */
@@ -70,5 +96,6 @@ __sigsetjmp:
#else
B __sigjmp_save
#endif
+#endif
.size __sigsetjmp,.-__sigsetjmp
diff --git a/libc/sysdeps/linux/arm/sigrestorer.S b/libc/sysdeps/linux/arm/sigrestorer.S
index 194228a38..79728fd40 100644
--- a/libc/sysdeps/linux/arm/sigrestorer.S
+++ b/libc/sysdeps/linux/arm/sigrestorer.S
@@ -16,6 +16,7 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
+#include <bits/arm_asm.h>
#include <sys/syscall.h>
#include <linux/version.h>
@@ -38,6 +39,9 @@
.type __default_sa_restorer,%function
.align 2
#ifdef __ARM_EABI__
+#ifdef __thumb__
+.thumb_func
+#endif
.fnstart
.save {r0-r15}
#if LINUX_VERSION_CODE >= 0x020612
@@ -62,6 +66,9 @@ __default_sa_restorer:
.type __default_rt_sa_restorer,%function
.align 2
#ifdef __ARM_EABI__
+#ifdef __thumb__
+.thumb_func
+#endif
.fnstart
.save {r0-r15}
#if LINUX_VERSION_CODE >= 0x020612
diff --git a/libc/sysdeps/linux/arm/syscall-eabi.S b/libc/sysdeps/linux/arm/syscall-eabi.S
index efc30690c..b9318821b 100644
--- a/libc/sysdeps/linux/arm/syscall-eabi.S
+++ b/libc/sysdeps/linux/arm/syscall-eabi.S
@@ -17,6 +17,7 @@
02111-1307 USA. */
#include <sys/syscall.h>
+#include <bits/arm_asm.h>
/* In the EABI syscall interface, we don't need a special syscall to
implement syscall(). It won't work reliably with 64-bit arguments
@@ -26,6 +27,29 @@
.global syscall
.type syscall,%function
.align 4
+#if defined(THUMB1_ONLY)
+.thumb_func
+syscall:
+ push {r4, r5, r6, r7}
+ mov ip, r0
+ mov r0, r1
+ mov r1, r2
+ mov r2, r3
+ add r7, sp, #(4 * 4)
+ ldmia r7!, {r3, r4, r5, r6}
+ mov r7, ip
+ swi 0x0
+ pop {r4, r5, r6, r7}
+ ldr r1, =0xfffff000
+ cmp r0, r1
+ bcs 1f
+ bx lr
+1:
+ push {r3, lr}
+ bl __syscall_error
+ POP_RET
+.pool
+#else
syscall:
mov ip, sp
stmfd sp!, {r4, r5, r6, r7}
@@ -37,11 +61,13 @@ syscall:
swi 0x0
ldmfd sp!, {r4, r5, r6, r7}
cmn r0, #4096
+ IT(t, cc)
#if defined(__USE_BX__)
bxcc lr
#else
movcc pc, lr
#endif
b __syscall_error
+#endif
.size syscall,.-syscall
diff --git a/libc/sysdeps/linux/arm/vfork.S b/libc/sysdeps/linux/arm/vfork.S
index e9f63d46e..42595b026 100644
--- a/libc/sysdeps/linux/arm/vfork.S
+++ b/libc/sysdeps/linux/arm/vfork.S
@@ -6,6 +6,7 @@
*/
#include <features.h>
+#include <bits/arm_asm.h>
#define _ERRNO_H
#include <bits/errno.h>
@@ -18,11 +19,47 @@
.type __vfork,%function
.align 4
+#if defined(__thumb__) && !defined(__thumb2__)
+.thumb_func
+__vfork:
+#ifdef __NR_vfork
+ DO_CALL (vfork)
+ ldr r1, =0xfffff000
+ cmp r0, r1
+ bcs 1f
+ bx lr
+1:
+
+ /* Check if vfork even exists. */
+ ldr r1, =-ENOSYS
+ cmp r0, r1
+ bne __error
+
+ /* If we don't have vfork, use fork. */
+ DO_CALL (fork)
+ ldr r1, =0xfffff000
+ cmp r0, r1
+
+ /* Syscall worked. Return to child/parent */
+ bcs 1f
+ bx lr
+1:
+
+__error:
+ push {r3, lr}
+ bl __syscall_error
+ POP_RET
+.pool
+
+#endif
+
+#else
__vfork:
#ifdef __NR_vfork
DO_CALL (vfork)
cmn r0, #4096
+ IT(t, cc)
#if defined(__USE_BX__)
bxcc lr
#else
@@ -40,6 +77,7 @@ __vfork:
cmn r0, #4096
/* Syscall worked. Return to child/parent */
+ IT(t, cc)
#if defined(__USE_BX__)
bxcc lr
#else
@@ -48,8 +86,10 @@ __vfork:
__error:
b __syscall_error
+#endif
.size __vfork,.-__vfork
+
weak_alias(__vfork,vfork)
libc_hidden_weak(vfork)
#endif