From efce79f09ae6daa77cd322df0d532beec3f445f5 Mon Sep 17 00:00:00 2001 From: Bernhard Reutner-Fischer Date: Wed, 26 Mar 2008 13:40:36 +0000 Subject: Paul Brook writes: The attached patch adds support for compiling arm uClibc as pure Thumb code. This is needed because some recent ARM codes do not implement traditional ARM mode. Specifically: * Cortex-M1 - An extremely minimal FPGA based core that only implements Thumb-1 (aka ARMv6-M). * Cortex-M3 - A Thumb-2 only ARMv7-M core. Most of uClibc already builds in Thumb mode, all that is left are a handful of assembly bits. Tested on arm-uclinuxeabi. --- Rules.mak | 2 + extra/Configs/Config.arm | 10 ++ ldso/ldso/arm/resolve.S | 3 +- libc/string/arm/_memcpy.S | 182 +++++++++++++++++++++++++++++++--- libc/string/arm/bcopy.S | 12 +++ libc/string/arm/bzero.S | 12 +++ libc/string/arm/memcmp.S | 28 ++++++ libc/string/arm/memcpy.S | 11 +- libc/string/arm/memmove.S | 11 +- libc/string/arm/memset.S | 62 ++++++++++++ libc/string/arm/strcmp.S | 19 ++++ libc/string/arm/strlen.S | 25 +++++ libc/string/arm/strncmp.S | 33 ++++++ libc/sysdeps/linux/arm/__longjmp.S | 33 ++++++ libc/sysdeps/linux/arm/bits/arm_asm.h | 28 ++++++ libc/sysdeps/linux/arm/bsd-_setjmp.S | 28 +++++- libc/sysdeps/linux/arm/bsd-setjmp.S | 26 +++++ libc/sysdeps/linux/arm/clone.S | 53 +++++++++- libc/sysdeps/linux/arm/crt1.S | 69 +++++++++++++ libc/sysdeps/linux/arm/crti.S | 1 + libc/sysdeps/linux/arm/crtn.S | 1 + libc/sysdeps/linux/arm/mmap64.S | 45 ++++++++- libc/sysdeps/linux/arm/setjmp.S | 27 +++++ libc/sysdeps/linux/arm/sigrestorer.S | 7 ++ libc/sysdeps/linux/arm/syscall-eabi.S | 26 +++++ libc/sysdeps/linux/arm/vfork.S | 40 ++++++++ 26 files changed, 771 insertions(+), 23 deletions(-) create mode 100644 libc/sysdeps/linux/arm/bits/arm_asm.h diff --git a/Rules.mak b/Rules.mak index a6c0c3735..2535b0907 100644 --- a/Rules.mak +++ b/Rules.mak @@ -219,6 +219,8 @@ ifeq ($(TARGET_ARCH),arm) CPU_CFLAGS-$(CONFIG_ARM_XSCALE)+=$(call check_gcc,-mtune=xscale,-mtune=strongarm110) CPU_CFLAGS-$(CONFIG_ARM_XSCALE)+=-march=armv5te -Wa,-mcpu=xscale CPU_CFLAGS-$(CONFIG_ARM_IWMMXT)+=-march=iwmmxt -Wa,-mcpu=iwmmxt -mabi=iwmmxt + CPU_CFLAGS-$(CONFIG_ARM_CORTEX_M3)+=-mcpu=cortex-m3 -mthumb + CPU_CFLAGS-$(CONFIG_ARM_CORTEX_M1)+=-mcpu=cortex-m1 -mthumb endif ifeq ($(TARGET_ARCH),mips) diff --git a/extra/Configs/Config.arm b/extra/Configs/Config.arm index 7aec08ce2..26e1f3da4 100644 --- a/extra/Configs/Config.arm +++ b/extra/Configs/Config.arm @@ -107,6 +107,16 @@ config CONFIG_ARM1176JZF_S bool "Arm 1176JZF-S" select ARCH_HAS_MMU +config CONFIG_ARM_CORTEX_M3 + bool "Arm Cortex-M3" + select ARCH_HAS_NO_MMU + select USE_BX + +config CONFIG_ARM_CORTEX_M1 + bool "Arm Cortex-M1" + select ARCH_HAS_NO_MMU + select USE_BX + config CONFIG_ARM_SA110 bool "Intel StrongArm SA-110" select ARCH_HAS_MMU diff --git a/ldso/ldso/arm/resolve.S b/ldso/ldso/arm/resolve.S index cbeb2232d..b422c334d 100644 --- a/ldso/ldso/arm/resolve.S +++ b/ldso/ldso/arm/resolve.S @@ -91,12 +91,13 @@ */ #include +#include #include .text .align 4 @ 16 byte boundary and there are 32 bytes below (arm case) - #if !defined(__thumb__) + #if !defined(__thumb__) || defined(__thumb2__) .arm .globl _dl_linux_resolve .type _dl_linux_resolve,%function diff --git a/libc/string/arm/_memcpy.S b/libc/string/arm/_memcpy.S index 3704f96b5..5ef63c45a 100644 --- a/libc/string/arm/_memcpy.S +++ b/libc/string/arm/_memcpy.S @@ -39,7 +39,9 @@ #include #include +#include +#if !defined(THUMB1_ONLY) /* * This is one fun bit of code ... * Some easy listening music is suggested while trying to understand this @@ -77,11 +79,36 @@ .type _memcpy,%function .align 4 +/* XXX: The Thumb-2 conditionals can be removed if/when we require an + assembler that supports unified syntax. */ +.macro copy regs +#if defined(__thumb2__) + ittt ge + ldmiage r1!, \regs + stmiage r0!, \regs +#else + ldmgeia r1!, \regs + stmgeia r0!, \regs +#endif +.endm + +.macro copydb regs +#if defined(__thumb2__) + ittt ge + ldmdbge r1!, \regs + stmdbge r0!, \regs +#else + ldmgedb r1!, \regs + stmgedb r0!, \regs +#endif +.endm + _memcpy: /* Determine copy direction */ cmp r1, r0 bcc .Lmemcpy_backwards + IT(tt, eq) moveq r0, #0 /* Quick abort for len=0 */ #if defined(__USE_BX__) bxeq lr @@ -102,7 +129,7 @@ _memcpy: blt .Lmemcpy_fl12 /* less than 12 bytes (4 from above) */ subs r2, r2, #0x14 blt .Lmemcpy_fl32 /* less than 32 bytes (12 from above) */ - stmdb sp!, {r4} /* borrow r4 */ + str r4, [sp, #-4]! /* borrow r4 */ /* blat 32 bytes at a time */ /* XXX for really big copies perhaps we should use more registers */ @@ -115,19 +142,22 @@ _memcpy: bge .Lmemcpy_floop32 cmn r2, #0x10 - ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ - stmgeia r0!, {r3, r4, r12, lr} + /* blat a remaining 16 bytes */ + copy "{r3, r4, r12, lr}" subge r2, r2, #0x10 - ldmia sp!, {r4} /* return r4 */ + ldr r4, [sp], #4 /* restore r4 */ .Lmemcpy_fl32: adds r2, r2, #0x14 /* blat 12 bytes at a time */ .Lmemcpy_floop12: - ldmgeia r1!, {r3, r12, lr} - stmgeia r0!, {r3, r12, lr} + copy "{r3, r12, lr}" +#if defined(__thumb2__) + subsge r2, r2, #0x0c +#else subges r2, r2, #0x0c +#endif bge .Lmemcpy_floop12 .Lmemcpy_fl12: @@ -135,26 +165,48 @@ _memcpy: blt .Lmemcpy_fl4 subs r2, r2, #4 + IT(tt, lt) ldrlt r3, [r1], #4 strlt r3, [r0], #4 - ldmgeia r1!, {r3, r12} - stmgeia r0!, {r3, r12} + copy "{r3, r12}" subge r2, r2, #4 .Lmemcpy_fl4: /* less than 4 bytes to go */ adds r2, r2, #4 +#if defined(__thumb2__) + it eq + popeq {r0, pc} /* done */ +#elif defined(__ARM_ARCH_4T__) + ldmeqia sp!, {r0, r3} /* done */ + bxeq r3 +#else ldmeqia sp!, {r0, pc} /* done */ +#endif /* copy the crud byte at a time */ cmp r2, #2 ldrb r3, [r1], #1 strb r3, [r0], #1 +#if defined(__thumb2__) + itt ge + ldrbge r3, [r1], #1 + strbge r3, [r0], #1 + itt gt + ldrbgt r3, [r1], #1 + strbgt r3, [r0], #1 +#else ldrgeb r3, [r1], #1 strgeb r3, [r0], #1 ldrgtb r3, [r1], #1 strgtb r3, [r0], #1 +#endif +#if defined(__ARM_ARCH_4T__) + ldmia sp!, {r0, r3} + bx r3 +#else ldmia sp!, {r0, pc} +#endif /* erg - unaligned destination */ .Lmemcpy_fdestul: @@ -164,10 +216,19 @@ _memcpy: /* align destination with byte copies */ ldrb r3, [r1], #1 strb r3, [r0], #1 +#if defined(__thumb2__) + itt ge + ldrbge r3, [r1], #1 + strbge r3, [r0], #1 + itt gt + ldrbgt r3, [r1], #1 + strbgt r3, [r0], #1 +#else ldrgeb r3, [r1], #1 strgeb r3, [r0], #1 ldrgtb r3, [r1], #1 strgtb r3, [r0], #1 +#endif subs r2, r2, r12 blt .Lmemcpy_fl4 /* less the 4 bytes */ @@ -370,12 +431,12 @@ _memcpy: .Lmemcpy_bl32: cmn r2, #0x10 - ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ - stmgedb r0!, {r3, r4, r12, lr} + /* blat a remaining 16 bytes */ + copydb "{r3, r4, r12, lr}" subge r2, r2, #0x10 adds r2, r2, #0x14 - ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ - stmgedb r0!, {r3, r12, lr} + /* blat a remaining 12 bytes */ + copydb "{r3, r12, lr}" subge r2, r2, #0x0c ldmia sp!, {r4, lr} @@ -383,15 +444,16 @@ _memcpy: adds r2, r2, #8 blt .Lmemcpy_bl4 subs r2, r2, #4 + IT(tt, lt) ldrlt r3, [r1, #-4]! strlt r3, [r0, #-4]! - ldmgedb r1!, {r3, r12} - stmgedb r0!, {r3, r12} + copydb "{r3, r12}" subge r2, r2, #4 .Lmemcpy_bl4: /* less than 4 bytes to go */ adds r2, r2, #4 + IT(t, eq) #if defined(__USE_BX__) bxeq lr #else @@ -401,10 +463,19 @@ _memcpy: cmp r2, #2 ldrb r3, [r1, #-1]! strb r3, [r0, #-1]! +#ifdef __thumb2__ + itt ge + ldrbge r3, [r1, #-1]! + strbge r3, [r0, #-1]! + itt gt + ldrbgt r3, [r1, #-1]! + strbgt r3, [r0, #-1]! +#else ldrgeb r3, [r1, #-1]! strgeb r3, [r0, #-1]! ldrgtb r3, [r1, #-1]! strgtb r3, [r0, #-1]! +#endif #if defined(__USE_BX__) bx lr #else @@ -417,10 +488,19 @@ _memcpy: /* align destination with byte copies */ ldrb r3, [r1, #-1]! strb r3, [r0, #-1]! +#ifdef __thumb2__ + itt ge + ldrbge r3, [r1, #-1]! + strbge r3, [r0, #-1]! + itt gt + ldrbgt r3, [r1, #-1]! + strbgt r3, [r0, #-1]! +#else ldrgeb r3, [r1, #-1]! strgeb r3, [r0, #-1]! ldrgtb r3, [r1, #-1]! strgtb r3, [r0, #-1]! +#endif subs r2, r2, r12 blt .Lmemcpy_bl4 /* less than 4 bytes to go */ ands r12, r1, #3 @@ -591,3 +671,77 @@ _memcpy: .Lmemcpy_bsrcul1l4: add r1, r1, #1 b .Lmemcpy_bl4 + +#else /* THUMB1_ONLY */ + +/* This is a fairly dumb implementation for when we can't use the 32-bit code + above. */ +.text +.global _memcpy +.hidden _memcpy +.type _memcpy,%function +.align 4 +.thumb +_memcpy: + push {r0, r4} + cmp r2, #0 + beq .Lmemcpy_exit + @ See if we have overlapping regions, and need to reverse the + @ direction of the copy + cmp r0, r1 + bls .Lmemcpy_forwards + add r4, r1, r2 + cmp r0, r4 + bcc .Lmemcpy_backwards +.Lmemcpy_forwards: + /* Forwards. */ + mov r3, r0 + eor r3, r1 + mov r4, #3 + tst r3, r4 + bne .Lmemcpy_funaligned + cmp r2, #8 + bcc .Lmemcpy_funaligned +1: @ copy up to the first word boundary. + tst r0, r4 + beq 1f + ldrb r3, [r1] + add r1, r1, #1 + strb r3, [r0] + add r0, r0, #1 + sub r2, r2, #1 + b 1b +1: @ Copy aligned words + ldr r3, [r1] + add r1, r1, #4 + str r3, [r0] + add r0, r0, #4 + sub r2, r2, #4 + cmp r2, #4 + bcs 1b + cmp r2, #0 + beq .Lmemcpy_exit +.Lmemcpy_funaligned: +1: + ldrb r3, [r1] + add r1, r1, #1 + strb r3, [r0] + add r0, r0, #1 + sub r2, r2, #1 + bne 1b +.Lmemcpy_exit: + pop {r0, r4} + bx lr + +.Lmemcpy_backwards: + add r0, r0, r2 + add r1, r1, r2 +1: + sub r0, r0, #1 + sub r1, r1, #1 + ldrb r3, [r1] + strb r3, [r0] + sub r2, r2, #1 + bne 1b + b .Lmemcpy_exit +#endif diff --git a/libc/string/arm/bcopy.S b/libc/string/arm/bcopy.S index db3c9e6c1..2d6e90d13 100644 --- a/libc/string/arm/bcopy.S +++ b/libc/string/arm/bcopy.S @@ -40,6 +40,7 @@ /* bcopy = memcpy/memmove with arguments reversed. */ #include +#include #ifdef __UCLIBC_SUSV3_LEGACY__ @@ -48,12 +49,23 @@ .type bcopy,%function .align 4 +#if defined(__thumb__) && !defined(__thumb2__) +.thumb_func +bcopy: + push {r2, lr} + mov ip, r0 + mov r0, r1 + mov r1, ip + bl _memcpy + POP_RET +#else bcopy: /* switch the source and destination registers */ eor r0, r1, r0 eor r1, r0, r1 eor r0, r1, r0 b _memcpy /* (PLT) */ +#endif .size bcopy,.-bcopy diff --git a/libc/string/arm/bzero.S b/libc/string/arm/bzero.S index ee49cf560..e576a12e9 100644 --- a/libc/string/arm/bzero.S +++ b/libc/string/arm/bzero.S @@ -38,6 +38,7 @@ */ #include +#include #ifdef __UCLIBC_SUSV3_LEGACY__ @@ -46,10 +47,21 @@ .type bzero,%function .align 4 +#if defined(__thumb__) && !defined(__thumb2__) +.thumb_func +bzero: + push {r2, lr} + mov r2, r1 + mov r1, #0 + bl HIDDEN_JUMPTARGET(memset) + POP_RET +#else + bzero: mov r2, r1 mov r1, #0 b HIDDEN_JUMPTARGET(memset) +#endif .size bzero,.-bzero diff --git a/libc/string/arm/memcmp.S b/libc/string/arm/memcmp.S index 4f78b5128..65409f43a 100644 --- a/libc/string/arm/memcmp.S +++ b/libc/string/arm/memcmp.S @@ -30,15 +30,41 @@ */ #include +#include .text .global memcmp .type memcmp,%function .align 4 +#if defined(THUMB1_ONLY) +.thumb_func +memcmp: + cmp r2, #0 + bne 1f + mov r0, #0 + bx lr +1: + push {r4} + add r4, r0, r2 +2: + ldrb r2, [r0] + add r0, r0, #1 + ldrb r3, [r1] + add r1, r1, #1 + cmp r4, r0 + beq 3f + cmp r2, r3 + beq 2b +3: + sub r0, r2, r3 + pop {r4} + bx lr +#else memcmp: /* if ((len - 1) < 0) return 0 */ subs r2, r2, #1 + IT(tt, mi) movmi r0, #0 #if defined(__USE_BX__) bxmi lr @@ -51,6 +77,7 @@ memcmp: ldrb r2, [r0], #1 ldrb r3, [r1], #1 cmp ip, r0 + IT(t, cs) cmpcs r2, r3 beq 1b sub r0, r2, r3 @@ -59,6 +86,7 @@ memcmp: #else mov pc, lr #endif +#endif .size memcmp,.-memcmp diff --git a/libc/string/arm/memcpy.S b/libc/string/arm/memcpy.S index 7a5b6ab76..d2013d211 100644 --- a/libc/string/arm/memcpy.S +++ b/libc/string/arm/memcpy.S @@ -38,16 +38,23 @@ */ #include +#include .text .global memcpy .type memcpy,%function .align 4 +#if defined(__thumb__) && !defined(__thumb2__) +.thumb_func memcpy: - stmfd sp!, {r0, lr} + push {r0, lr} bl _memcpy - ldmfd sp!, {r0, pc} + POP_RET +#else +memcpy: + b _memcpy +#endif .size memcpy,.-memcpy diff --git a/libc/string/arm/memmove.S b/libc/string/arm/memmove.S index 45cd9b4d4..c11b98dd4 100644 --- a/libc/string/arm/memmove.S +++ b/libc/string/arm/memmove.S @@ -38,16 +38,23 @@ */ #include +#include .text .global memmove .type memmove,%function .align 4 +#if defined(__thumb__) && !defined(__thumb2__) +.thumb_func memmove: - stmfd sp!, {r0, lr} + push {r2, lr} bl _memcpy - ldmfd sp!, {r0, pc} + POP_RET +#else +memmove: + b _memcpy +#endif .size memmove,.-memmove diff --git a/libc/string/arm/memset.S b/libc/string/arm/memset.S index 16bfe0dc5..66aa6039c 100644 --- a/libc/string/arm/memset.S +++ b/libc/string/arm/memset.S @@ -19,12 +19,52 @@ #include #include +#include .text .global memset .type memset,%function .align 4 +#if defined(THUMB1_ONLY) +.thumb_func +memset: + mov ip, r0 + cmp r2, #8 @ at least 8 bytes to do? + bcc 2f + + lsl r3, r1, #8 + orr r1, r3 + lsl r3, r1, #16 + orr r1, r3 + + mov r3, #3 +1: @ Fill up to the first word boundary + tst r0, r3 + beq 1f + strb r1, [r0] + add r0, r0, #1 + sub r2, r2, #1 + b 1b +1: @ Fill aligned words + str r1, [r0] + add r0, r0, #4 + sub r2, r2, #4 + cmp r2, #4 + bcs 1b + +2: @ Fill the remaining bytes + cmp r2, #0 + beq 2f +1: + strb r1, [r0] + add r0, r0, #1 + sub r2, r2, #1 + bne 1b +2: + mov r0, ip + bx lr +#else memset: mov a4, a1 cmp a3, $8 @ at least 8 bytes to do? @@ -33,8 +73,14 @@ memset: orr a2, a2, a2, lsl $16 1: tst a4, $3 @ aligned yet? +#if defined(__thumb2__) + itt ne + strbne a2, [a4], $1 + subne a3, a3, $1 +#else strneb a2, [a4], $1 subne a3, a3, $1 +#endif bne 1b mov ip, a2 1: @@ -51,16 +97,30 @@ memset: stmia a4!, {a2, ip} sub a3, a3, $8 cmp a3, $8 @ 8 bytes still to do? +#if defined(__thumb2__) + itt ge + stmiage a4!, {a2, ip} + subge a3, a3, $8 +#else stmgeia a4!, {a2, ip} subge a3, a3, $8 +#endif bge 1b 2: movs a3, a3 @ anything left? + IT(t, eq) #if defined(__USE_BX__) bxeq lr #else moveq pc, lr @ nope #endif +#if defined (__thumb2__) +1: + strb a2, [a4], #1 + subs a3, a3, #1 + bne 1b + bx lr +#else rsb a3, a3, $7 add pc, pc, a3, lsl $2 mov r0, r0 @@ -76,6 +136,8 @@ memset: #else mov pc, lr #endif +#endif +#endif .size memset,.-memset diff --git a/libc/string/arm/strcmp.S b/libc/string/arm/strcmp.S index 89aa38874..97363c1c2 100644 --- a/libc/string/arm/strcmp.S +++ b/libc/string/arm/strcmp.S @@ -30,17 +30,35 @@ */ #include +#include .text .global strcmp .type strcmp,%function .align 4 +#if defined(__thumb__) && !defined(__thumb2__) +.thumb_func +strcmp: +1: + ldrb r2, [r0] + add r0, r0, #1 + ldrb r3, [r1] + add r1, r1, #1 + cmp r2, #0 + beq 2f + cmp r2, r3 + beq 1b +2: + sub r0, r2, r3 + bx lr +#else strcmp: 1: ldrb r2, [r0], #1 ldrb r3, [r1], #1 cmp r2, #1 + IT(t, cs) cmpcs r2, r3 beq 1b sub r0, r2, r3 @@ -49,6 +67,7 @@ strcmp: #else mov pc, lr #endif +#endif .size strcmp,.-strcmp diff --git a/libc/string/arm/strlen.S b/libc/string/arm/strlen.S index 5b4b02e17..949e918f4 100644 --- a/libc/string/arm/strlen.S +++ b/libc/string/arm/strlen.S @@ -20,6 +20,7 @@ #include #include #include +#include /* size_t strlen(const char *S) * entry: r0 -> string @@ -31,6 +32,19 @@ .type strlen,%function .align 4 +#if defined(THUMB1_ONLY) +/* A simple implementation for when the ARM implementation can't be used. */ +.thumb_func +strlen: + mov r2, #0 +1: + ldrb r1, [r0, r2] + add r2, r2, #1 + cmp r1, #0 + bne 1b + sub r0, r2, #1 + bx lr +#else strlen: bic r1, r0, $3 @ addr of word containing first byte ldr r2, [r1], $4 @ get the first word @@ -41,38 +55,48 @@ strlen: #if __BYTE_ORDER == __BIG_ENDIAN orr r2, r2, $0xff000000 @ set this byte to non-zero subs r3, r3, $1 @ any more to do? + IT(t, gt) orrgt r2, r2, $0x00ff0000 @ if so, set this byte subs r3, r3, $1 @ more? + IT(t, gt) orrgt r2, r2, $0x0000ff00 @ then set. #else orr r2, r2, $0x000000ff @ set this byte to non-zero subs r3, r3, $1 @ any more to do? + IT(t, gt) orrgt r2, r2, $0x0000ff00 @ if so, set this byte subs r3, r3, $1 @ more? + IT(t, gt) orrgt r2, r2, $0x00ff0000 @ then set. #endif Laligned: @ here, we have a word in r2. Does it tst r2, $0x000000ff @ contain any zeroes? + IT(tttt, ne) tstne r2, $0x0000ff00 @ tstne r2, $0x00ff0000 @ tstne r2, $0xff000000 @ addne r0, r0, $4 @ if not, the string is 4 bytes longer + IT(t, ne) ldrne r2, [r1], $4 @ and we continue to the next word bne Laligned @ Llastword: @ drop through to here once we find a #if __BYTE_ORDER == __BIG_ENDIAN tst r2, $0xff000000 @ word that has a zero byte in it + IT(tttt, ne) addne r0, r0, $1 @ tstne r2, $0x00ff0000 @ and add up to 3 bytes on to it addne r0, r0, $1 @ tstne r2, $0x0000ff00 @ (if first three all non-zero, 4th + IT(t, ne) addne r0, r0, $1 @ must be zero) #else tst r2, $0x000000ff @ + IT(tttt, ne) addne r0, r0, $1 @ tstne r2, $0x0000ff00 @ and add up to 3 bytes on to it addne r0, r0, $1 @ tstne r2, $0x00ff0000 @ (if first three all non-zero, 4th + IT(t, ne) addne r0, r0, $1 @ must be zero) #endif #if defined(__USE_BX__) @@ -80,6 +104,7 @@ Llastword: @ drop through to here once we find a #else mov pc,lr #endif +#endif .size strlen,.-strlen diff --git a/libc/string/arm/strncmp.S b/libc/string/arm/strncmp.S index eaf0620b4..8487639c8 100644 --- a/libc/string/arm/strncmp.S +++ b/libc/string/arm/strncmp.S @@ -30,15 +30,46 @@ */ #include +#include .text .global strncmp .type strncmp,%function .align 4 +#if defined(THUMB1_ONLY) +.thumb_func strncmp: /* if (len == 0) return 0 */ cmp r2, #0 + bne 1f + mov r0, #0 + bx lr +1: + push {r4} + + /* ip == last src address to compare */ + add r4, r0, r2 +2: + cmp r4, r0 + beq 3f + ldrb r2, [r0] + add r0, r0, #1 + ldrb r3, [r1] + add r1, r1, #1 + cmp r2, #0 + beq 3f + cmp r2, r3 + beq 2b +3: + sub r0, r2, r3 + pop {r4} + bx lr +#else +strncmp: + /* if (len == 0) return 0 */ + cmp r2, #0 + IT(tt, eq) moveq r0, #0 #if defined(__USE_BX__) bxeq lr @@ -53,6 +84,7 @@ strncmp: ldrb r2, [r0], #1 ldrb r3, [r1], #1 cmp ip, r0 + IT(tt, cs) cmpcs r2, #1 cmpcs r2, r3 beq 1b @@ -62,6 +94,7 @@ strncmp: #else mov pc, lr #endif +#endif .size strncmp,.-strncmp diff --git a/libc/sysdeps/linux/arm/__longjmp.S b/libc/sysdeps/linux/arm/__longjmp.S index 4261797f8..5faf4ece9 100644 --- a/libc/sysdeps/linux/arm/__longjmp.S +++ b/libc/sysdeps/linux/arm/__longjmp.S @@ -18,6 +18,7 @@ 02111-1307 USA. */ #include +#include #define _SETJMP_H #define _ASM #include @@ -26,13 +27,44 @@ .global __longjmp .type __longjmp,%function .align 2 +#if defined(THUMB1_ONLY) +.thumb_func +__longjmp: + mov r2, r0 + movs r0, r1 + /* can't let setjmp() return zero! */ + bne 1f + mov r0, #1 +1: + mov r1, r2 + /* Restore registers, shuffling them through low regs. */ + add r2, #(4 * 4) + ldmia r2!, {r4, r5, r6, r7} + mov r8, r4 + mov r9, r5 + mov sl, r6 + mov fp, r7 + ldmia r2!, {r4, r5} + mov sp, r4 + mov lr, r5 + ldmia r1!, {r4, r5, r6, r7} + bx lr +#else __longjmp: mov ip, r0 /* save jmp_buf pointer */ movs r0, r1 /* get the return value in place */ + IT(t, eq) moveq r0, #1 /* can't let setjmp() return zero! */ +#if defined(__thumb2__) + /* Thumb-2 does not allow loading sp with ldm. */ + ldmia ip!, {v1-v6, sl, fp} + ldr sp, [ip], #4 + ldr lr, [ip], #4 +#else ldmia ip!, {v1-v6, sl, fp, sp, lr} +#endif #if defined __UCLIBC_HAS_FLOATS__ && ! defined __UCLIBC_HAS_SOFT_FLOAT__ #ifdef __VFP_FP__ @@ -76,6 +108,7 @@ __longjmp: #else mov pc, lr #endif +#endif .size __longjmp,.-__longjmp libc_hidden_def(__longjmp) diff --git a/libc/sysdeps/linux/arm/bits/arm_asm.h b/libc/sysdeps/linux/arm/bits/arm_asm.h new file mode 100644 index 000000000..1d87df6eb --- /dev/null +++ b/libc/sysdeps/linux/arm/bits/arm_asm.h @@ -0,0 +1,28 @@ +/* Various definitons used the the ARM uClibc assembly code. */ +#ifndef _ARM_ASM_H +#define _ARM_ASM_H + +#ifdef __thumb2__ +.thumb +.syntax unified +#define IT(t, cond) i##t cond +#else +/* XXX: This can be removed if/when we require an assembler that supports + unified assembly syntax. */ +#define IT(t, cond) +/* Code to return from a thumb function stub. */ +#ifdef __ARM_ARCH_4T__ +#define POP_RET pop {r2, pc} +#else +#define POP_RET pop {r2, r3}; bx r3 +#endif +#endif + +#if defined(__ARM_ARCH_6M__) +/* Force arm mode to flush out errors on M profile cores. */ +#undef IT +#define THUMB1_ONLY 1 +#endif + +#endif /* _ARM_ASM_H */ + diff --git a/libc/sysdeps/linux/arm/bsd-_setjmp.S b/libc/sysdeps/linux/arm/bsd-_setjmp.S index f70073266..a05570df7 100644 --- a/libc/sysdeps/linux/arm/bsd-_setjmp.S +++ b/libc/sysdeps/linux/arm/bsd-_setjmp.S @@ -17,13 +17,38 @@ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ -/* This just does a tail-call to `__sigsetjmp (ARG, 1)'. +#include + +/* This just does a tail-call to `__sigsetjmp (ARG, 0)'. We cannot do it in C because it must be a tail-call, so frame-unwinding in setjmp doesn't clobber the state restored by longjmp. */ .global _setjmp .type _setjmp,%function .align 2 +#if defined(THUMB1_ONLY) +.thumb_func +_setjmp: + mov r1, #0 +#ifdef __PIC__ + ldr r3, .L_GOT + adr r2, .L_GOT + add r3, r2, r3 + + ldr r2, .L_GOT+4 /* __sigsetjmp */ + ldr r2, [r2, r3] + bx r2 + + .align 2 +.L_GOT: + .word _GLOBAL_OFFSET_TABLE_-.L_GOT + .word __sigsetjmp(GOT) +#else + ldr r2, =__sigsetjmp + bx r2 +.pool +#endif +#else _setjmp: mov r1, #0 #ifdef __PIC__ @@ -31,5 +56,6 @@ _setjmp: #else b __sigsetjmp #endif +#endif .size _setjmp,.-_setjmp diff --git a/libc/sysdeps/linux/arm/bsd-setjmp.S b/libc/sysdeps/linux/arm/bsd-setjmp.S index 6253c6675..d7ca72ad5 100644 --- a/libc/sysdeps/linux/arm/bsd-setjmp.S +++ b/libc/sysdeps/linux/arm/bsd-setjmp.S @@ -17,6 +17,8 @@ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ +#include + /* This just does a tail-call to `__sigsetjmp (ARG, 1)'. We cannot do it in C because it must be a tail-call, so frame-unwinding in setjmp doesn't clobber the state restored by longjmp. */ @@ -24,6 +26,29 @@ .global setjmp .type setjmp,%function .align 2 +#if defined(THUMB1_ONLY) +.thumb_func +setjmp: + mov r1, #1 +#ifdef __PIC__ + ldr r3, .L_GOT + adr r2, .L_GOT + add r3, r2, r3 + + ldr r2, .L_GOT+4 /* __sigsetjmp */ + ldr r2, [r2, r3] + bx r2 + + .align 2 +.L_GOT: + .word _GLOBAL_OFFSET_TABLE_-.L_GOT + .word __sigsetjmp(GOT) +#else + ldr r2, =__sigsetjmp + bx r2 +.pool +#endif +#else setjmp: mov r1, #1 #ifdef __PIC__ @@ -31,5 +56,6 @@ setjmp: #else b __sigsetjmp #endif +#endif .size setjmp,.-setjmp diff --git a/libc/sysdeps/linux/arm/clone.S b/libc/sysdeps/linux/arm/clone.S index a5a847d1e..d9483735d 100644 --- a/libc/sysdeps/linux/arm/clone.S +++ b/libc/sysdeps/linux/arm/clone.S @@ -24,17 +24,66 @@ #include #include #include +#include -#ifdef __NR_clone +#if defined(__NR_clone) /* int clone(int (*fn)(void *arg), void *child_stack, int flags, void *arg); */ .text .global clone .type clone,%function .align 2 +#if defined(THUMB1_ONLY) +.thumb_func clone: @ sanity check args cmp r0, #0 + beq __einval + cmp r1, #0 + beq __einval + + @ insert the args onto the new stack + sub r1, r1, #8 + str r3, [r1, #4] + @ save the function pointer as the 0th element + str r0, [r1] + + @ do the system call + @ get flags + mov r0, r2 + @ new sp is already in r1 + DO_CALL (clone) + movs a1, a1 + blt __error + beq 1f + bx lr +1: + + @ pick the function arg and call address off the stack and execute + ldr r0, [sp, #4] + ldr r1, [sp] + bl 2f @ blx r1 + + @ and we are done, passing the return value through r0 + bl HIDDEN_JUMPTARGET(_exit) + @ Should never return + b . + +2: + bx r1 + +__einval: + ldr r0, =-EINVAL +__error: + push {r3, lr} + bl __syscall_error + POP_RET +.pool +#else +clone: + @ sanity check args + cmp r0, #0 + IT(te, ne) cmpne r1, #0 moveq r0, #-EINVAL beq __error @@ -52,6 +101,7 @@ clone: DO_CALL (clone) movs a1, a1 blt __error + IT(t, ne) #if defined(__USE_BX__) bxne lr #else @@ -68,6 +118,7 @@ clone: __error: b __syscall_error +#endif .size clone,.-clone diff --git a/libc/sysdeps/linux/arm/crt1.S b/libc/sysdeps/linux/arm/crt1.S index 8d4d230a7..082348e39 100644 --- a/libc/sysdeps/linux/arm/crt1.S +++ b/libc/sysdeps/linux/arm/crt1.S @@ -94,6 +94,7 @@ ARM register quick reference: */ #include +#include .text .globl _start @@ -105,6 +106,73 @@ ARM register quick reference: .weak _fini #endif +#if defined(THUMB1_ONLY) +.thumb_func +_start: + /* Clear the frame pointer since this is the outermost frame. */ + mov r3, #0 + mov fp, r3 + +#ifdef __ARCH_USE_MMU__ + /* Pop argc off the stack and save a pointer to argv */ + pop {a2} + mov a3, sp +#else + /* + * uClinux/arm stacks look a little different from normal + * MMU-full Linux/arm stacks (for no good reason) + */ + /* pull argc and argv off the stack. We are going to push 3 + * arguments, so pop one here to maintain doubleword alignment. */ + pop {a2} + ldr a3, [sp] +#endif + + /* Push stack limit and rtld_fini */ + push {a1, a3} + +#ifdef __PIC__ + ldr r4, .L_GOT +.L_GOT_OFF: + adr r5, .L_GOT + add r4, r5, r4 + + ldr r5, .L_GOT+4 /* _fini */ + ldr a1, [r4, r5] + push {a1} /* Push _fini */ + + ldr r5, .L_GOT+8 /* _init */ + ldr a4, [r4, r5] + + ldr r5, .L_GOT+12 /* main */ + ldr a1, [r4, r5] + +#else + /* Fetch address of fini */ + ldr r4, =_fini + /* Push fini */ + push {r4} + + /* Set up the other arguments in registers */ + ldr a1, =main + ldr a4, =_init +#endif + /* __uClibc_main (main, argc, argv, init, fini, rtld_fini, stack_end) */ + /* Let the libc call main and exit with its return code. */ + bl __uClibc_main + + /* should never get here....*/ + bl abort +.pool + +#ifdef __PIC__ +.L_GOT: + .word _GLOBAL_OFFSET_TABLE_-.L_GOT + .word _fini(GOT) + .word _init(GOT) + .word main(GOT) +#endif +#else /* !THUMB1_ONLY */ _start: /* Clear the frame pointer and link register since this is the outermost frame. */ mov fp, #0 @@ -175,6 +243,7 @@ _start: .word _init(GOT) .word main(GOT) #endif +#endif /* Define a symbol for the first piece of initialized data. */ .data diff --git a/libc/sysdeps/linux/arm/crti.S b/libc/sysdeps/linux/arm/crti.S index 4835b8331..e335b7140 100644 --- a/libc/sysdeps/linux/arm/crti.S +++ b/libc/sysdeps/linux/arm/crti.S @@ -1,5 +1,6 @@ .file "initfini.c" +#include .section .init .global _init .type _init, %function diff --git a/libc/sysdeps/linux/arm/crtn.S b/libc/sysdeps/linux/arm/crtn.S index 7a1ca1ab1..de01b38dc 100644 --- a/libc/sysdeps/linux/arm/crtn.S +++ b/libc/sysdeps/linux/arm/crtn.S @@ -1,5 +1,6 @@ .file "initfini.c" +#include .section .init .global _init .type _init, %function diff --git a/libc/sysdeps/linux/arm/mmap64.S b/libc/sysdeps/linux/arm/mmap64.S index ba8cb2fca..73d6b51ce 100644 --- a/libc/sysdeps/linux/arm/mmap64.S +++ b/libc/sysdeps/linux/arm/mmap64.S @@ -20,6 +20,7 @@ #define _ERRNO_H #include #include +#include #if defined __UCLIBC_HAS_LFS__ && defined __NR_mmap2 @@ -28,9 +29,46 @@ .global mmap64 .type mmap64,%function .align 2 -mmap64: #ifdef __ARM_EABI__ +#if defined(THUMB1_ONLY) +.thumb_func +mmap64: +#ifdef __ARMEB__ +/* Offsets are after pushing 3 words. */ +# define LOW_OFFSET 12 + 8 + 4 +# define HIGH_OFFSET 12 + 8 + 0 +#else +# define LOW_OFFSET 12 + 8 + 0 +# define HIGH_OFFSET 12 + 8 + 4 +#endif + push {r4, r5, r6} + ldr r6, [sp, $LOW_OFFSET] + ldr r5, [sp, $HIGH_OFFSET] + lsl r4, r6, #20 @ check that offset is page-aligned + bne .Linval + lsr r4, r5, #12 @ check for overflow + bne .Linval + @ compose page offset + lsr r6, r6, #12 + lsl r5, r5, #20 + orr r5, r5, r6 + ldr r4, [sp, #8] @ load fd + DO_CALL (mmap2) + ldr r1, =0xfffff000 + cmp r0, r1 + bcs .Lerror + bx lr +.Linval: + ldr r0, =-EINVAL + pop {r4, r5, r6} +.Lerror: + push {r3, lr} + bl __syscall_error + POP_RET +.pool +#else /* !THUMB1_ONLY */ +mmap64: #ifdef __ARMEB__ # define LOW_OFFSET 8 + 4 /* The initial + 4 is for the stack postdecrement. */ @@ -45,6 +83,7 @@ mmap64: str r4, [sp, #-4]! movs r4, ip, lsl $20 @ check that offset is page-aligned mov ip, ip, lsr $12 + IT(t, eq) moveqs r4, r5, lsr $12 @ check for overflow bne .Linval ldr r4, [sp, $8] @ load fd @@ -52,6 +91,7 @@ mmap64: DO_CALL (mmap2) cmn r0, $4096 ldmfd sp!, {r4, r5} + IT(t, cc) #if defined(__USE_BX__) bxcc lr #else @@ -62,7 +102,8 @@ mmap64: mov r0, $-EINVAL ldmfd sp!, {r4, r5} b __syscall_error -#else +#endif +#else /* !__ARM_EABI__ */ stmfd sp!, {r4, r5, lr} ldr r5, [sp, $16] ldr r4, [sp, $12] diff --git a/libc/sysdeps/linux/arm/setjmp.S b/libc/sysdeps/linux/arm/setjmp.S index 8d15b8324..2df7d551a 100644 --- a/libc/sysdeps/linux/arm/setjmp.S +++ b/libc/sysdeps/linux/arm/setjmp.S @@ -18,15 +18,41 @@ 02111-1307 USA. */ #include +#include .global __sigsetjmp .type __sigsetjmp,%function .align 2 +#if defined(THUMB1_ONLY) +.thumb_func __sigsetjmp: + push {r3, r4, r5, r6, r7, lr} mov ip, r0 + stmia r0!, {r4, r5, r6, r7} + mov r2, r8 + mov r3, r9 + mov r4, sl + mov r5, fp + add r6, sp, #(6 * 4) + mov r7, lr + stmia r0!, {r2, r3, r4, r5, r6, r7} + mov r0, ip + bl __sigjmp_save + pop {r3, r4, r5, r6, r7, pc} + +#else +__sigsetjmp: + /* Save registers */ + mov ip, r0 +#if defined(__thumb2__) + stmia ip!, {v1-v6, sl, fp} + movs r2, sp + stmia ip!, {r2, lr} +#else /* Save registers */ stmia ip!, {v1-v6, sl, fp, sp, lr} +#endif #if defined __UCLIBC_HAS_FLOATS__ && ! defined __UCLIBC_HAS_SOFT_FLOAT__ # ifdef __VFP_FP__ /* Store the VFP registers. */ @@ -70,5 +96,6 @@ __sigsetjmp: #else B __sigjmp_save #endif +#endif .size __sigsetjmp,.-__sigsetjmp diff --git a/libc/sysdeps/linux/arm/sigrestorer.S b/libc/sysdeps/linux/arm/sigrestorer.S index 194228a38..79728fd40 100644 --- a/libc/sysdeps/linux/arm/sigrestorer.S +++ b/libc/sysdeps/linux/arm/sigrestorer.S @@ -16,6 +16,7 @@ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ +#include #include #include @@ -38,6 +39,9 @@ .type __default_sa_restorer,%function .align 2 #ifdef __ARM_EABI__ +#ifdef __thumb__ +.thumb_func +#endif .fnstart .save {r0-r15} #if LINUX_VERSION_CODE >= 0x020612 @@ -62,6 +66,9 @@ __default_sa_restorer: .type __default_rt_sa_restorer,%function .align 2 #ifdef __ARM_EABI__ +#ifdef __thumb__ +.thumb_func +#endif .fnstart .save {r0-r15} #if LINUX_VERSION_CODE >= 0x020612 diff --git a/libc/sysdeps/linux/arm/syscall-eabi.S b/libc/sysdeps/linux/arm/syscall-eabi.S index efc30690c..b9318821b 100644 --- a/libc/sysdeps/linux/arm/syscall-eabi.S +++ b/libc/sysdeps/linux/arm/syscall-eabi.S @@ -17,6 +17,7 @@ 02111-1307 USA. */ #include +#include /* In the EABI syscall interface, we don't need a special syscall to implement syscall(). It won't work reliably with 64-bit arguments @@ -26,6 +27,29 @@ .global syscall .type syscall,%function .align 4 +#if defined(THUMB1_ONLY) +.thumb_func +syscall: + push {r4, r5, r6, r7} + mov ip, r0 + mov r0, r1 + mov r1, r2 + mov r2, r3 + add r7, sp, #(4 * 4) + ldmia r7!, {r3, r4, r5, r6} + mov r7, ip + swi 0x0 + pop {r4, r5, r6, r7} + ldr r1, =0xfffff000 + cmp r0, r1 + bcs 1f + bx lr +1: + push {r3, lr} + bl __syscall_error + POP_RET +.pool +#else syscall: mov ip, sp stmfd sp!, {r4, r5, r6, r7} @@ -37,11 +61,13 @@ syscall: swi 0x0 ldmfd sp!, {r4, r5, r6, r7} cmn r0, #4096 + IT(t, cc) #if defined(__USE_BX__) bxcc lr #else movcc pc, lr #endif b __syscall_error +#endif .size syscall,.-syscall diff --git a/libc/sysdeps/linux/arm/vfork.S b/libc/sysdeps/linux/arm/vfork.S index e9f63d46e..42595b026 100644 --- a/libc/sysdeps/linux/arm/vfork.S +++ b/libc/sysdeps/linux/arm/vfork.S @@ -6,6 +6,7 @@ */ #include +#include #define _ERRNO_H #include @@ -18,11 +19,47 @@ .type __vfork,%function .align 4 +#if defined(__thumb__) && !defined(__thumb2__) +.thumb_func +__vfork: +#ifdef __NR_vfork + DO_CALL (vfork) + ldr r1, =0xfffff000 + cmp r0, r1 + bcs 1f + bx lr +1: + + /* Check if vfork even exists. */ + ldr r1, =-ENOSYS + cmp r0, r1 + bne __error + + /* If we don't have vfork, use fork. */ + DO_CALL (fork) + ldr r1, =0xfffff000 + cmp r0, r1 + + /* Syscall worked. Return to child/parent */ + bcs 1f + bx lr +1: + +__error: + push {r3, lr} + bl __syscall_error + POP_RET +.pool + +#endif + +#else __vfork: #ifdef __NR_vfork DO_CALL (vfork) cmn r0, #4096 + IT(t, cc) #if defined(__USE_BX__) bxcc lr #else @@ -40,6 +77,7 @@ __vfork: cmn r0, #4096 /* Syscall worked. Return to child/parent */ + IT(t, cc) #if defined(__USE_BX__) bxcc lr #else @@ -48,8 +86,10 @@ __vfork: __error: b __syscall_error +#endif .size __vfork,.-__vfork + weak_alias(__vfork,vfork) libc_hidden_weak(vfork) #endif -- cgit v1.2.3