From dba942c80dc2cfa5768a856fff98e22a755fdd27 Mon Sep 17 00:00:00 2001
From: Waldemar Brodkorb <wbx@uclibc-ng.org>
Date: Tue, 4 Oct 2016 06:51:35 +0200
Subject: add experimental aarch64 support

Ported over from GNU C Library and runtime tested in Qemu.
---
 Rules.mak                                          |   6 +
 extra/Configs/Config.aarch64                       |  32 ++
 extra/Configs/Config.in                            |  11 +-
 extra/Configs/Config.in.arch                       |   2 +-
 include/atomic.h                                   |  10 +-
 include/elf.h                                      | 166 +++++++++++
 ldso/include/dl-syscall.h                          |  10 +
 ldso/ldso/aarch64/dl-startup.h                     |  98 ++++++
 ldso/ldso/aarch64/dl-syscalls.h                    |   1 +
 ldso/ldso/aarch64/dl-sysdep.h                      | 107 +++++++
 ldso/ldso/aarch64/dl-tlsdesc.S                     | 207 +++++++++++++
 ldso/ldso/aarch64/elfinterp.c                      | 306 +++++++++++++++++++
 ldso/ldso/aarch64/resolve.S                        |  97 ++++++
 libc/string/aarch64/Makefile                       |  13 +
 libc/string/aarch64/memcpy.S                       | 230 ++++++++++++++
 libc/string/aarch64/memset.S                       | 189 ++++++++++++
 libc/sysdeps/linux/aarch64/Makefile                |   9 +
 libc/sysdeps/linux/aarch64/Makefile.arch           |   5 +
 libc/sysdeps/linux/aarch64/__longjmp.S             | 107 +++++++
 libc/sysdeps/linux/aarch64/__syscall_error.c       |  18 ++
 libc/sysdeps/linux/aarch64/bits/atomic.h           | 172 +++++++++++
 libc/sysdeps/linux/aarch64/bits/endian.h           |  30 ++
 libc/sysdeps/linux/aarch64/bits/fcntl.h            | 329 +++++++++++++++++++++
 libc/sysdeps/linux/aarch64/bits/kernel_types.h     |  42 +++
 libc/sysdeps/linux/aarch64/bits/setjmp.h           |  33 +++
 libc/sysdeps/linux/aarch64/bits/stackinfo.h        |  33 +++
 libc/sysdeps/linux/aarch64/bits/syscalls.h         | 106 +++++++
 .../linux/aarch64/bits/uClibc_arch_features.h      |  38 +++
 libc/sysdeps/linux/aarch64/bits/uClibc_page.h      |  25 ++
 libc/sysdeps/linux/aarch64/bits/wordsize.h         |  18 ++
 libc/sysdeps/linux/aarch64/bsd-_setjmp.S           |   1 +
 libc/sysdeps/linux/aarch64/bsd-setjmp.S            |   1 +
 libc/sysdeps/linux/aarch64/clone.S                 |  85 ++++++
 libc/sysdeps/linux/aarch64/crt1.S                  |  89 ++++++
 libc/sysdeps/linux/aarch64/crti.S                  |  59 ++++
 libc/sysdeps/linux/aarch64/crtn.S                  |  46 +++
 libc/sysdeps/linux/aarch64/jmpbuf-offsets.h        |  57 ++++
 libc/sysdeps/linux/aarch64/jmpbuf-unwind.h         |  34 +++
 libc/sysdeps/linux/aarch64/setjmp.S                |  59 ++++
 libc/sysdeps/linux/aarch64/sys/procfs.h            | 123 ++++++++
 libc/sysdeps/linux/aarch64/sys/ucontext.h          |  53 ++++
 libc/sysdeps/linux/aarch64/sys/user.h              |  37 +++
 libc/sysdeps/linux/aarch64/syscall.S               |  42 +++
 libc/sysdeps/linux/aarch64/sysdep.h                | 150 ++++++++++
 libc/sysdeps/linux/aarch64/vfork.S                 |  37 +++
 libc/sysdeps/linux/common-generic/bits/stat.h      |  49 ---
 libc/sysdeps/linux/common/fstat.c                  |  28 +-
 libc/sysdeps/linux/common/fstatat.c                |   9 -
 libc/sysdeps/linux/common/lstat.c                  |   9 +
 libc/sysdeps/linux/common/lstat64.c                |  14 +-
 libc/sysdeps/linux/common/stat.c                   |   8 +
 libpthread/nptl/sysdeps/aarch64/Makefile.arch      |  32 ++
 libpthread/nptl/sysdeps/aarch64/dl-tls.h           |  55 ++++
 libpthread/nptl/sysdeps/aarch64/libc-dl-tlsdesc.S  |   1 +
 libpthread/nptl/sysdeps/aarch64/libc-tls.c         |  35 +++
 .../nptl/sysdeps/aarch64/pthread_spin_lock.c       |  65 ++++
 .../nptl/sysdeps/aarch64/pthread_spin_trylock.c    |  26 ++
 libpthread/nptl/sysdeps/aarch64/pthreaddef.h       |  34 +++
 libpthread/nptl/sysdeps/aarch64/tcb-offsets.sym    |   6 +
 libpthread/nptl/sysdeps/aarch64/tls.h              | 149 ++++++++++
 libpthread/nptl/sysdeps/aarch64/tlsdesc.sym        |  17 ++
 .../nptl/sysdeps/unix/sysv/linux/aarch64/Makefile  |  13 +
 .../sysdeps/unix/sysv/linux/aarch64/Makefile.arch  |  14 +
 .../unix/sysv/linux/aarch64/bits/pthreadtypes.h    | 174 +++++++++++
 .../unix/sysv/linux/aarch64/bits/semaphore.h       |  30 ++
 .../sysdeps/unix/sysv/linux/aarch64/createthread.c |  21 ++
 .../nptl/sysdeps/unix/sysv/linux/aarch64/fork.c    |  11 +
 .../sysdeps/unix/sysv/linux/aarch64/lowlevellock.h | 323 ++++++++++++++++++++
 .../sysdeps/unix/sysv/linux/aarch64/pt-raise.c     |  18 ++
 .../sysdeps/unix/sysv/linux/aarch64/pthread_once.c |  90 ++++++
 .../unix/sysv/linux/aarch64/sysdep-cancel.h        | 132 +++++++++
 utils/ldd.c                                        |   5 +
 72 files changed, 4606 insertions(+), 85 deletions(-)
 create mode 100644 extra/Configs/Config.aarch64
 create mode 100644 ldso/ldso/aarch64/dl-startup.h
 create mode 100644 ldso/ldso/aarch64/dl-syscalls.h
 create mode 100644 ldso/ldso/aarch64/dl-sysdep.h
 create mode 100644 ldso/ldso/aarch64/dl-tlsdesc.S
 create mode 100644 ldso/ldso/aarch64/elfinterp.c
 create mode 100644 ldso/ldso/aarch64/resolve.S
 create mode 100644 libc/string/aarch64/Makefile
 create mode 100644 libc/string/aarch64/memcpy.S
 create mode 100644 libc/string/aarch64/memset.S
 create mode 100644 libc/sysdeps/linux/aarch64/Makefile
 create mode 100644 libc/sysdeps/linux/aarch64/Makefile.arch
 create mode 100644 libc/sysdeps/linux/aarch64/__longjmp.S
 create mode 100644 libc/sysdeps/linux/aarch64/__syscall_error.c
 create mode 100644 libc/sysdeps/linux/aarch64/bits/atomic.h
 create mode 100644 libc/sysdeps/linux/aarch64/bits/endian.h
 create mode 100644 libc/sysdeps/linux/aarch64/bits/fcntl.h
 create mode 100644 libc/sysdeps/linux/aarch64/bits/kernel_types.h
 create mode 100644 libc/sysdeps/linux/aarch64/bits/setjmp.h
 create mode 100644 libc/sysdeps/linux/aarch64/bits/stackinfo.h
 create mode 100644 libc/sysdeps/linux/aarch64/bits/syscalls.h
 create mode 100644 libc/sysdeps/linux/aarch64/bits/uClibc_arch_features.h
 create mode 100755 libc/sysdeps/linux/aarch64/bits/uClibc_page.h
 create mode 100644 libc/sysdeps/linux/aarch64/bits/wordsize.h
 create mode 100644 libc/sysdeps/linux/aarch64/bsd-_setjmp.S
 create mode 100644 libc/sysdeps/linux/aarch64/bsd-setjmp.S
 create mode 100644 libc/sysdeps/linux/aarch64/clone.S
 create mode 100644 libc/sysdeps/linux/aarch64/crt1.S
 create mode 100644 libc/sysdeps/linux/aarch64/crti.S
 create mode 100644 libc/sysdeps/linux/aarch64/crtn.S
 create mode 100644 libc/sysdeps/linux/aarch64/jmpbuf-offsets.h
 create mode 100644 libc/sysdeps/linux/aarch64/jmpbuf-unwind.h
 create mode 100644 libc/sysdeps/linux/aarch64/setjmp.S
 create mode 100644 libc/sysdeps/linux/aarch64/sys/procfs.h
 create mode 100644 libc/sysdeps/linux/aarch64/sys/ucontext.h
 create mode 100644 libc/sysdeps/linux/aarch64/sys/user.h
 create mode 100644 libc/sysdeps/linux/aarch64/syscall.S
 create mode 100644 libc/sysdeps/linux/aarch64/sysdep.h
 create mode 100644 libc/sysdeps/linux/aarch64/vfork.S
 create mode 100644 libpthread/nptl/sysdeps/aarch64/Makefile.arch
 create mode 100644 libpthread/nptl/sysdeps/aarch64/dl-tls.h
 create mode 100644 libpthread/nptl/sysdeps/aarch64/libc-dl-tlsdesc.S
 create mode 100644 libpthread/nptl/sysdeps/aarch64/libc-tls.c
 create mode 100644 libpthread/nptl/sysdeps/aarch64/pthread_spin_lock.c
 create mode 100644 libpthread/nptl/sysdeps/aarch64/pthread_spin_trylock.c
 create mode 100644 libpthread/nptl/sysdeps/aarch64/pthreaddef.h
 create mode 100644 libpthread/nptl/sysdeps/aarch64/tcb-offsets.sym
 create mode 100644 libpthread/nptl/sysdeps/aarch64/tls.h
 create mode 100644 libpthread/nptl/sysdeps/aarch64/tlsdesc.sym
 create mode 100644 libpthread/nptl/sysdeps/unix/sysv/linux/aarch64/Makefile
 create mode 100644 libpthread/nptl/sysdeps/unix/sysv/linux/aarch64/Makefile.arch
 create mode 100644 libpthread/nptl/sysdeps/unix/sysv/linux/aarch64/bits/pthreadtypes.h
 create mode 100644 libpthread/nptl/sysdeps/unix/sysv/linux/aarch64/bits/semaphore.h
 create mode 100644 libpthread/nptl/sysdeps/unix/sysv/linux/aarch64/createthread.c
 create mode 100644 libpthread/nptl/sysdeps/unix/sysv/linux/aarch64/fork.c
 create mode 100644 libpthread/nptl/sysdeps/unix/sysv/linux/aarch64/lowlevellock.h
 create mode 100644 libpthread/nptl/sysdeps/unix/sysv/linux/aarch64/pt-raise.c
 create mode 100644 libpthread/nptl/sysdeps/unix/sysv/linux/aarch64/pthread_once.c
 create mode 100644 libpthread/nptl/sysdeps/unix/sysv/linux/aarch64/sysdep-cancel.h

diff --git a/Rules.mak b/Rules.mak
index 8bd286354..f53c5817a 100644
--- a/Rules.mak
+++ b/Rules.mak
@@ -300,6 +300,7 @@ ifneq ($(TARGET_ARCH),sh)
 ifneq ($(TARGET_ARCH),c6x)
 ifneq ($(TARGET_ARCH),h8300)
 ifneq ($(TARGET_ARCH),arc)
+ifneq ($(TARGET_ARCH),aarch64)
 CPU_CFLAGS-y += -msoft-float
 endif
 endif
@@ -310,6 +311,11 @@ endif
 endif
 endif
 endif
+endif
+
+ifeq ($(TARGET_ARCH),aarch64)
+CPU_CFLAGS-y += -ftls-model=initial-exec
+endif
 
 $(eval $(call check-gcc-var,-std=gnu99))
 CPU_CFLAGS-y += $(CFLAG_-std=gnu99)
diff --git a/extra/Configs/Config.aarch64 b/extra/Configs/Config.aarch64
new file mode 100644
index 000000000..cf1ce3b79
--- /dev/null
+++ b/extra/Configs/Config.aarch64
@@ -0,0 +1,32 @@
+#
+# For a description of the syntax of this configuration file,
+# see extra/config/Kconfig-language.txt
+#
+
+config TARGET_ARCH
+	string
+	default "aarch64"
+
+config FORCE_OPTIONS_FOR_ARCH
+	bool
+	default y
+	select ARCH_ANY_ENDIAN
+	select ARCH_HAS_MMU
+	select UCLIBC_HAS_FPU
+
+choice
+	prompt "MMU Page Size"
+	default CONFIG_AARCH64_PAGE_SIZE_4K
+
+config CONFIG_AARCH64_PAGE_SIZE_4K
+	bool "4KB"
+	help
+	  Choose between 4k(default), 16k or 64k
+
+config CONFIG_AARCH64_PAGE_SIZE_16K
+	bool "16KB"
+
+config CONFIG_AARCH64_PAGE_SIZE_64K
+	bool "64KB"
+
+endchoice
diff --git a/extra/Configs/Config.in b/extra/Configs/Config.in
index b2cf977b7..850bd7d13 100644
--- a/extra/Configs/Config.in
+++ b/extra/Configs/Config.in
@@ -15,6 +15,7 @@ config VERSION
 
 choice
 	prompt "Target Architecture"
+	default TARGET_aarch64 if DESIRED_TARGET_ARCH = "aarch64"
 	default TARGET_alpha if DESIRED_TARGET_ARCH = "alpha"
 	default TARGET_arc if DESIRED_TARGET_ARCH = "arc"
 	default TARGET_arm if DESIRED_TARGET_ARCH = "arm"
@@ -42,6 +43,9 @@ choice
 	help
 	  The architecture of your target.
 
+config TARGET_aarch64
+	bool "aarch64"
+
 config TARGET_alpha
 	bool "alpha"
 
@@ -124,6 +128,10 @@ endchoice
 
 menu "Target Architecture Features and Options"
 
+if TARGET_aarch64
+source "extra/Configs/Config.aarch64"
+endif
+
 if TARGET_alpha
 source "extra/Configs/Config.alpha"
 endif
@@ -500,7 +508,8 @@ config UCLIBC_HAS_LINUXTHREADS
 	bool "Linuxthreads"
 	# linuxthreads need nanosleep()
 	select UCLIBC_HAS_REALTIME
-	depends on !TARGET_metag
+	depends on !TARGET_aarch64 && \
+		   !TARGET_metag
 	help
 	  If you want to compile uClibc with Linuxthreads support, then answer Y.
 
diff --git a/extra/Configs/Config.in.arch b/extra/Configs/Config.in.arch
index 37dd8bd74..a1a2a5f99 100644
--- a/extra/Configs/Config.in.arch
+++ b/extra/Configs/Config.in.arch
@@ -191,7 +191,7 @@ config UCLIBC_HAS_FENV
 config UCLIBC_HAS_LONG_DOUBLE_MATH
 	bool "Enable long double support"
 	depends on DO_C99_MATH
-	depends on TARGET_aarch64 || TARGET_alpha || TARGET_i386 || TARGET_ia64 || TARGET_m68k || TARGET_powerpc || TARGET_s390 || TARGET_sparc || TARGET_tile || TARGET_x86_64
+	depends on TARGET_alpha || TARGET_i386 || TARGET_ia64 || TARGET_m68k || TARGET_powerpc || TARGET_s390 || TARGET_sparc || TARGET_tile || TARGET_x86_64
 	default y
 	help
 	  If you want the uClibc math library to contain the full set of C99
diff --git a/include/atomic.h b/include/atomic.h
index 3680d8714..267aff5d5 100644
--- a/include/atomic.h
+++ b/include/atomic.h
@@ -542,24 +542,18 @@
   ({ __typeof (x) __x; __asm__ ("" : "=r" (__x) : "0" (x)); __x; })
 #endif
 
-/* This is equal to 1 iff the architecture supports 64b atomic operations.  */
-#define __HAVE_64B_ATOMICS 0 /* TODO: not yet used - Add these to arch bits! */
-#ifndef __HAVE_64B_ATOMICS
-#error Unable to determine if 64-bit atomics are present.
-#endif
-
 /* The following functions are a subset of the atomic operations provided by
    C11.  Usually, a function named atomic_OP_MO(args) is equivalent to C11's
    atomic_OP_explicit(args, memory_order_MO); exceptions noted below.  */
 
 /* Each arch can request to use compiler built-ins for C11 atomics.  If it
    does, all atomics will be based on these.  */
-#if 0 /* not yet used USE_ATOMIC_COMPILER_BUILTINS */
+#if defined USE_ATOMIC_COMPILER_BUILTINS
 
 /* We require 32b atomic operations; some archs also support 64b atomic
    operations.  */
 void __atomic_link_error (void);
-# if __HAVE_64B_ATOMICS == 1
+# if defined(__HAVE_64B_ATOMICS) && __HAVE_64B_ATOMICS
 #  define __atomic_check_size(mem) \
    if ((sizeof (*mem) != 4) && (sizeof (*mem) != 8))			      \
      __atomic_link_error ();
diff --git a/include/elf.h b/include/elf.h
index 0f188e792..5312be97c 100644
--- a/include/elf.h
+++ b/include/elf.h
@@ -269,6 +269,7 @@ typedef struct
 #define EM_TI_C6000	140
 #define EM_NDS32	167		/* Andes Tech NDS32 */
 #define EM_METAG	174		/* Imagination Technologies Meta */
+#define EM_AARCH64	183		/* ARM AARCH64 */
 #define EM_MICROBLAZE	189		/* Xilinx Microblaze */
 #define EM_ARCV2	195		/* ARCv2 Cores */
 
@@ -725,6 +726,31 @@ typedef struct
 #define NT_LWPSTATUS	16		/* Contains copy of lwpstatus struct */
 #define NT_LWPSINFO	17		/* Contains copy of lwpinfo struct */
 #define NT_PRFPXREG	20		/* Contains copy of fprxregset struct*/
+#define NT_SIGINFO	0x53494749	/* Contains copy of siginfo_t,
+					   size might increase */
+#define NT_FILE		0x46494c45	/* Contains information about mapped
+					   files */
+#define NT_PRXFPREG	0x46e62b7f	/* Contains copy of user_fxsr_struct */
+#define NT_PPC_VMX	0x100		/* PowerPC Altivec/VMX registers */
+#define NT_PPC_SPE	0x101		/* PowerPC SPE/EVR registers */
+#define NT_PPC_VSX	0x102		/* PowerPC VSX registers */
+#define NT_386_TLS	0x200		/* i386 TLS slots (struct user_desc) */
+#define NT_386_IOPERM	0x201		/* x86 io permission bitmap (1=deny) */
+#define NT_X86_XSTATE	0x202		/* x86 extended state using xsave */
+#define NT_S390_HIGH_GPRS	0x300	/* s390 upper register halves */
+#define NT_S390_TIMER	0x301		/* s390 timer register */
+#define NT_S390_TODCMP	0x302		/* s390 TOD clock comparator register */
+#define NT_S390_TODPREG	0x303		/* s390 TOD programmable register */
+#define NT_S390_CTRS	0x304		/* s390 control registers */
+#define NT_S390_PREFIX	0x305		/* s390 prefix register */
+#define NT_S390_LAST_BREAK	0x306	/* s390 breaking event address */
+#define NT_S390_SYSTEM_CALL	0x307	/* s390 system call restart data */
+#define NT_S390_TDB	0x308		/* s390 transaction diagnostic block */
+#define NT_ARM_VFP	0x400		/* ARM VFP/NEON registers */
+#define NT_ARM_TLS	0x401		/* ARM TLS register */
+#define NT_ARM_HW_BREAK	0x402		/* ARM hardware breakpoint registers */
+#define NT_ARM_HW_WATCH	0x403		/* ARM hardware watchpoint registers */
+#define NT_ARM_SYSTEM_CALL	0x404	/* ARM system call number */
 
 /* Legal values for the note segment descriptor types for object files.  */
 
@@ -2358,6 +2384,146 @@ typedef Elf32_Addr Elf32_Conflict;
 /* Processor specific values for the Phdr p_type field.  */
 #define PT_ARM_EXIDX	0x70000001	/* .ARM.exidx segment */
 
+/* AArch64 relocs.  */
+
+#define R_AARCH64_NONE            0	/* No relocation.  */
+
+/* ILP32 AArch64 relocs.  */
+#define R_AARCH64_P32_ABS32		  1	/* Direct 32 bit.  */
+#define R_AARCH64_P32_COPY		180	/* Copy symbol at runtime.  */
+#define R_AARCH64_P32_GLOB_DAT		181	/* Create GOT entry.  */
+#define R_AARCH64_P32_JUMP_SLOT		182	/* Create PLT entry.  */
+#define R_AARCH64_P32_RELATIVE		183	/* Adjust by program base.  */
+#define R_AARCH64_P32_TLS_DTPMOD	184	/* Module number, 32 bit.  */
+#define R_AARCH64_P32_TLS_DTPREL	185	/* Module-relative offset, 32 bit.  */
+#define R_AARCH64_P32_TLS_TPREL		186	/* TP-relative offset, 32 bit.  */
+#define R_AARCH64_P32_TLSDESC		187	/* TLS Descriptor.  */
+#define R_AARCH64_P32_IRELATIVE		188	/* STT_GNU_IFUNC relocation. */
+
+/* LP64 AArch64 relocs.  */
+#define R_AARCH64_ABS64         257	/* Direct 64 bit. */
+#define R_AARCH64_ABS32         258	/* Direct 32 bit.  */
+#define R_AARCH64_ABS16		259	/* Direct 16-bit.  */
+#define R_AARCH64_PREL64	260	/* PC-relative 64-bit.	*/
+#define R_AARCH64_PREL32	261	/* PC-relative 32-bit.	*/
+#define R_AARCH64_PREL16	262	/* PC-relative 16-bit.	*/
+#define R_AARCH64_MOVW_UABS_G0	263	/* Dir. MOVZ imm. from bits 15:0.  */
+#define R_AARCH64_MOVW_UABS_G0_NC 264	/* Likewise for MOVK; no check.  */
+#define R_AARCH64_MOVW_UABS_G1	265	/* Dir. MOVZ imm. from bits 31:16.  */
+#define R_AARCH64_MOVW_UABS_G1_NC 266	/* Likewise for MOVK; no check.  */
+#define R_AARCH64_MOVW_UABS_G2	267	/* Dir. MOVZ imm. from bits 47:32.  */
+#define R_AARCH64_MOVW_UABS_G2_NC 268	/* Likewise for MOVK; no check.  */
+#define R_AARCH64_MOVW_UABS_G3	269	/* Dir. MOV{K,Z} imm. from 63:48.  */
+#define R_AARCH64_MOVW_SABS_G0	270	/* Dir. MOV{N,Z} imm. from 15:0.  */
+#define R_AARCH64_MOVW_SABS_G1	271	/* Dir. MOV{N,Z} imm. from 31:16.  */
+#define R_AARCH64_MOVW_SABS_G2	272	/* Dir. MOV{N,Z} imm. from 47:32.  */
+#define R_AARCH64_LD_PREL_LO19	273	/* PC-rel. LD imm. from bits 20:2.  */
+#define R_AARCH64_ADR_PREL_LO21	274	/* PC-rel. ADR imm. from bits 20:0.  */
+#define R_AARCH64_ADR_PREL_PG_HI21 275	/* Page-rel. ADRP imm. from 32:12.  */
+#define R_AARCH64_ADR_PREL_PG_HI21_NC 276 /* Likewise; no overflow check.  */
+#define R_AARCH64_ADD_ABS_LO12_NC 277	/* Dir. ADD imm. from bits 11:0.  */
+#define R_AARCH64_LDST8_ABS_LO12_NC 278	/* Likewise for LD/ST; no check. */
+#define R_AARCH64_TSTBR14	279	/* PC-rel. TBZ/TBNZ imm. from 15:2.  */
+#define R_AARCH64_CONDBR19	280	/* PC-rel. cond. br. imm. from 20:2. */
+#define R_AARCH64_JUMP26	282	/* PC-rel. B imm. from bits 27:2.  */
+#define R_AARCH64_CALL26	283	/* Likewise for CALL.  */
+#define R_AARCH64_LDST16_ABS_LO12_NC 284 /* Dir. ADD imm. from bits 11:1.  */
+#define R_AARCH64_LDST32_ABS_LO12_NC 285 /* Likewise for bits 11:2.  */
+#define R_AARCH64_LDST64_ABS_LO12_NC 286 /* Likewise for bits 11:3.  */
+#define R_AARCH64_MOVW_PREL_G0	287	/* PC-rel. MOV{N,Z} imm. from 15:0.  */
+#define R_AARCH64_MOVW_PREL_G0_NC 288	/* Likewise for MOVK; no check.  */
+#define R_AARCH64_MOVW_PREL_G1	289	/* PC-rel. MOV{N,Z} imm. from 31:16. */
+#define R_AARCH64_MOVW_PREL_G1_NC 290	/* Likewise for MOVK; no check.  */
+#define R_AARCH64_MOVW_PREL_G2	291	/* PC-rel. MOV{N,Z} imm. from 47:32. */
+#define R_AARCH64_MOVW_PREL_G2_NC 292	/* Likewise for MOVK; no check.  */
+#define R_AARCH64_MOVW_PREL_G3	293	/* PC-rel. MOV{N,Z} imm. from 63:48. */
+#define R_AARCH64_LDST128_ABS_LO12_NC 299 /* Dir. ADD imm. from bits 11:4.  */
+#define R_AARCH64_MOVW_GOTOFF_G0 300	/* GOT-rel. off. MOV{N,Z} imm. 15:0. */
+#define R_AARCH64_MOVW_GOTOFF_G0_NC 301	/* Likewise for MOVK; no check.  */
+#define R_AARCH64_MOVW_GOTOFF_G1 302	/* GOT-rel. o. MOV{N,Z} imm. 31:16.  */
+#define R_AARCH64_MOVW_GOTOFF_G1_NC 303	/* Likewise for MOVK; no check.  */
+#define R_AARCH64_MOVW_GOTOFF_G2 304	/* GOT-rel. o. MOV{N,Z} imm. 47:32.  */
+#define R_AARCH64_MOVW_GOTOFF_G2_NC 305	/* Likewise for MOVK; no check.  */
+#define R_AARCH64_MOVW_GOTOFF_G3 306	/* GOT-rel. o. MOV{N,Z} imm. 63:48.  */
+#define R_AARCH64_GOTREL64	307	/* GOT-relative 64-bit.  */
+#define R_AARCH64_GOTREL32	308	/* GOT-relative 32-bit.  */
+#define R_AARCH64_GOT_LD_PREL19	309	/* PC-rel. GOT off. load imm. 20:2.  */
+#define R_AARCH64_LD64_GOTOFF_LO15 310	/* GOT-rel. off. LD/ST imm. 14:3.  */
+#define R_AARCH64_ADR_GOT_PAGE	311	/* P-page-rel. GOT off. ADRP 32:12.  */
+#define R_AARCH64_LD64_GOT_LO12_NC 312	/* Dir. GOT off. LD/ST imm. 11:3.  */
+#define R_AARCH64_LD64_GOTPAGE_LO15 313	/* GOT-page-rel. GOT off. LD/ST 14:3 */
+#define R_AARCH64_TLSGD_ADR_PREL21 512	/* PC-relative ADR imm. 20:0.  */
+#define R_AARCH64_TLSGD_ADR_PAGE21 513	/* page-rel. ADRP imm. 32:12.  */
+#define R_AARCH64_TLSGD_ADD_LO12_NC 514	/* direct ADD imm. from 11:0.  */
+#define R_AARCH64_TLSGD_MOVW_G1	515	/* GOT-rel. MOV{N,Z} 31:16.  */
+#define R_AARCH64_TLSGD_MOVW_G0_NC 516	/* GOT-rel. MOVK imm. 15:0.  */
+#define R_AARCH64_TLSLD_ADR_PREL21 517	/* Like 512; local dynamic model.  */
+#define R_AARCH64_TLSLD_ADR_PAGE21 518	/* Like 513; local dynamic model.  */
+#define R_AARCH64_TLSLD_ADD_LO12_NC 519	/* Like 514; local dynamic model.  */
+#define R_AARCH64_TLSLD_MOVW_G1	520	/* Like 515; local dynamic model.  */
+#define R_AARCH64_TLSLD_MOVW_G0_NC 521	/* Like 516; local dynamic model.  */
+#define R_AARCH64_TLSLD_LD_PREL19 522	/* TLS PC-rel. load imm. 20:2.  */
+#define R_AARCH64_TLSLD_MOVW_DTPREL_G2 523 /* TLS DTP-rel. MOV{N,Z} 47:32.  */
+#define R_AARCH64_TLSLD_MOVW_DTPREL_G1 524 /* TLS DTP-rel. MOV{N,Z} 31:16.  */
+#define R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC 525 /* Likewise; MOVK; no check.  */
+#define R_AARCH64_TLSLD_MOVW_DTPREL_G0 526 /* TLS DTP-rel. MOV{N,Z} 15:0.  */
+#define R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC 527 /* Likewise; MOVK; no check.  */
+#define R_AARCH64_TLSLD_ADD_DTPREL_HI12 528 /* DTP-rel. ADD imm. from 23:12. */
+#define R_AARCH64_TLSLD_ADD_DTPREL_LO12 529 /* DTP-rel. ADD imm. from 11:0.  */
+#define R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC 530 /* Likewise; no ovfl. check.  */
+#define R_AARCH64_TLSLD_LDST8_DTPREL_LO12 531 /* DTP-rel. LD/ST imm. 11:0.  */
+#define R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC 532 /* Likewise; no check.  */
+#define R_AARCH64_TLSLD_LDST16_DTPREL_LO12 533 /* DTP-rel. LD/ST imm. 11:1.  */
+#define R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC 534 /* Likewise; no check.  */
+#define R_AARCH64_TLSLD_LDST32_DTPREL_LO12 535 /* DTP-rel. LD/ST imm. 11:2.  */
+#define R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC 536 /* Likewise; no check.  */
+#define R_AARCH64_TLSLD_LDST64_DTPREL_LO12 537 /* DTP-rel. LD/ST imm. 11:3.  */
+#define R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC 538 /* Likewise; no check.  */
+#define R_AARCH64_TLSIE_MOVW_GOTTPREL_G1 539 /* GOT-rel. MOV{N,Z} 31:16.  */
+#define R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC 540 /* GOT-rel. MOVK 15:0.  */
+#define R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 541 /* Page-rel. ADRP 32:12.  */
+#define R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC 542 /* Direct LD off. 11:3.  */
+#define R_AARCH64_TLSIE_LD_GOTTPREL_PREL19 543 /* PC-rel. load imm. 20:2.  */
+#define R_AARCH64_TLSLE_MOVW_TPREL_G2 544 /* TLS TP-rel. MOV{N,Z} 47:32.  */
+#define R_AARCH64_TLSLE_MOVW_TPREL_G1 545 /* TLS TP-rel. MOV{N,Z} 31:16.  */
+#define R_AARCH64_TLSLE_MOVW_TPREL_G1_NC 546 /* Likewise; MOVK; no check.  */
+#define R_AARCH64_TLSLE_MOVW_TPREL_G0 547 /* TLS TP-rel. MOV{N,Z} 15:0.  */
+#define R_AARCH64_TLSLE_MOVW_TPREL_G0_NC 548 /* Likewise; MOVK; no check.  */
+#define R_AARCH64_TLSLE_ADD_TPREL_HI12 549 /* TP-rel. ADD imm. 23:12.  */
+#define R_AARCH64_TLSLE_ADD_TPREL_LO12 550 /* TP-rel. ADD imm. 11:0.  */
+#define R_AARCH64_TLSLE_ADD_TPREL_LO12_NC 551 /* Likewise; no ovfl. check.  */
+#define R_AARCH64_TLSLE_LDST8_TPREL_LO12 552 /* TP-rel. LD/ST off. 11:0.  */
+#define R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC 553 /* Likewise; no ovfl. check. */
+#define R_AARCH64_TLSLE_LDST16_TPREL_LO12 554 /* TP-rel. LD/ST off. 11:1.  */
+#define R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC 555 /* Likewise; no check.  */
+#define R_AARCH64_TLSLE_LDST32_TPREL_LO12 556 /* TP-rel. LD/ST off. 11:2.  */
+#define R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC 557 /* Likewise; no check.  */
+#define R_AARCH64_TLSLE_LDST64_TPREL_LO12 558 /* TP-rel. LD/ST off. 11:3.  */
+#define R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC 559 /* Likewise; no check.  */
+#define R_AARCH64_TLSDESC_LD_PREL19 560	/* PC-rel. load immediate 20:2.  */
+#define R_AARCH64_TLSDESC_ADR_PREL21 561 /* PC-rel. ADR immediate 20:0.  */
+#define R_AARCH64_TLSDESC_ADR_PAGE21 562 /* Page-rel. ADRP imm. 32:12.  */
+#define R_AARCH64_TLSDESC_LD64_LO12 563	/* Direct LD off. from 11:3.  */
+#define R_AARCH64_TLSDESC_ADD_LO12 564	/* Direct ADD imm. from 11:0.  */
+#define R_AARCH64_TLSDESC_OFF_G1 565	/* GOT-rel. MOV{N,Z} imm. 31:16.  */
+#define R_AARCH64_TLSDESC_OFF_G0_NC 566	/* GOT-rel. MOVK imm. 15:0; no ck.  */
+#define R_AARCH64_TLSDESC_LDR	567	/* Relax LDR.  */
+#define R_AARCH64_TLSDESC_ADD	568	/* Relax ADD.  */
+#define R_AARCH64_TLSDESC_CALL	569	/* Relax BLR.  */
+#define R_AARCH64_TLSLE_LDST128_TPREL_LO12 570 /* TP-rel. LD/ST off. 11:4.  */
+#define R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC 571 /* Likewise; no check.  */
+#define R_AARCH64_TLSLD_LDST128_DTPREL_LO12 572 /* DTP-rel. LD/ST imm. 11:4. */
+#define R_AARCH64_TLSLD_LDST128_DTPREL_LO12_NC 573 /* Likewise; no check.  */
+#define R_AARCH64_COPY         1024	/* Copy symbol at runtime.  */
+#define R_AARCH64_GLOB_DAT     1025	/* Create GOT entry.  */
+#define R_AARCH64_JUMP_SLOT    1026	/* Create PLT entry.  */
+#define R_AARCH64_RELATIVE     1027	/* Adjust by program base.  */
+#define R_AARCH64_TLS_DTPMOD   1028	/* Module number, 64 bit.  */
+#define R_AARCH64_TLS_DTPREL   1029	/* Module-relative offset, 64 bit.  */
+#define R_AARCH64_TLS_TPREL    1030	/* TP-relative offset, 64 bit.  */
+#define R_AARCH64_TLSDESC      1031	/* TLS Descriptor.  */
+#define R_AARCH64_IRELATIVE	1032	/* STT_GNU_IFUNC relocation.  */
+
 /* ARM relocs.  */
 
 #define R_ARM_NONE		0	/* No reloc */
diff --git a/ldso/include/dl-syscall.h b/ldso/include/dl-syscall.h
index 5528ba6a0..3baddd323 100644
--- a/ldso/include/dl-syscall.h
+++ b/ldso/include/dl-syscall.h
@@ -96,6 +96,16 @@ static __always_inline int _dl_stat(const char *file_name,
 {
 	return _dl_fstatat64(AT_FDCWD, file_name, buf, 0);
 }
+#elif defined __NR_newfstatat && !defined __NR_stat
+# define __NR__dl_newfstatat __NR_newfstatat
+static __always_inline _syscall4(int, _dl_newfstatat, int, fd, const char *,
+				 fn, struct stat *, stat, int, flags)
+
+static __always_inline int _dl_stat(const char *file_name,
+                        struct stat *buf)
+{
+	return _dl_newfstatat(AT_FDCWD, file_name, buf, 0);
+}
 #elif defined __NR_stat
 # define __NR__dl_stat __NR_stat
 static __always_inline _syscall2(int, _dl_stat, const char *, file_name,
diff --git a/ldso/ldso/aarch64/dl-startup.h b/ldso/ldso/aarch64/dl-startup.h
new file mode 100644
index 000000000..1fac5ec35
--- /dev/null
+++ b/ldso/ldso/aarch64/dl-startup.h
@@ -0,0 +1,98 @@
+/*
+ * Architecture specific code used by dl-startup.c
+ * Copyright (C) 2016 Waldemar Brodkorb <wbx@uclibc-ng.org>
+ * Ported from GNU libc
+ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+ */
+
+/* Copyright (C) 1995-2016 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <features.h>
+
+__asm__("\
+.text									\n\
+.globl _start								\n\
+.type _start, %function							\n\
+.globl _dl_start_user							\n\
+.type _dl_start_user, %function						\n\
+_start:									\n\
+	mov	x0, sp							\n\
+	bl	_dl_start						\n\
+	// returns user entry point in x0				\n\
+	mov	x21, x0							\n\
+_dl_start_user:								\n\
+	// get the original arg count					\n\
+	ldr	x1, [sp]						\n\
+	// get the argv address						\n\
+	add	x2, sp, #(1<<3)						\n\
+	// get _dl_skip_args to see if we were				\n\
+	// invoked as an executable					\n\
+	adrp	x4, _dl_skip_args					\n\
+        ldr	w4, [x4, #:lo12:_dl_skip_args]				\n\
+	// do we need to adjust argc/argv				\n\
+        cmp	w4, 0							\n\
+	beq	.L_done_stack_adjust					\n\
+	// subtract _dl_skip_args from original arg count		\n\
+	sub	x1, x1, x4						\n\
+	// store adjusted argc back to stack				\n\
+	str	x1, [sp]						\n\
+	// find the first unskipped argument				\n\
+	mov	x3, x2							\n\
+	add	x4, x2, x4, lsl #3					\n\
+	// shuffle envp down						\n\
+1:	ldr	x5, [x4], #(1<<3)					\n\
+	str	x5, [x3], #(1<<3)					\n\
+	cmp	x5, #0							\n\
+	bne	1b							\n\
+	// shuffle auxv down						\n\
+1:	ldp	x0, x5, [x4, #(2<<3)]!					\n\
+	stp	x0, x5, [x3], #(2<<3)					\n\
+	cmp	x0, #0							\n\
+	bne	1b							\n\
+.L_done_stack_adjust:							\n\
+	// compute envp							\n\
+	add	x3, x2, x1, lsl #3					\n\
+	add	x3, x3, #(1<<3)						\n\
+	// load the finalizer function					\n\
+	adrp	x0, _dl_fini						\n\
+	add	x0, x0, #:lo12:_dl_fini					\n\
+	// jump to the user_s entry point				\n\
+	br      x21							\n\
+");
+
+/* Get a pointer to the argv array.  On many platforms this can be just
+ * the address of the first argument, on other platforms we need to
+ * do something a little more subtle here.  */
+#define GET_ARGV(ARGVP, ARGS) ARGVP = (((unsigned long*)ARGS)+1)
+
+/* Handle relocation of the symbols in the dynamic loader. */
+static __always_inline
+void PERFORM_BOOTSTRAP_RELOC(ELF_RELOC *rpnt, ElfW(Addr) *reloc_addr,
+	ElfW(Addr) symbol_addr, ElfW(Addr) load_addr, ElfW(Addr) *sym)
+{
+	switch (ELF_R_TYPE(rpnt->r_info)) {
+		case R_AARCH64_NONE:
+			break;
+		case R_AARCH64_ABS64:
+		case R_AARCH64_GLOB_DAT:
+		case R_AARCH64_JUMP_SLOT:
+			*reloc_addr = symbol_addr + rpnt->r_addend;
+			break;
+		default:
+			_dl_exit(1);
+	}
+}
diff --git a/ldso/ldso/aarch64/dl-syscalls.h b/ldso/ldso/aarch64/dl-syscalls.h
new file mode 100644
index 000000000..f40c4fd31
--- /dev/null
+++ b/ldso/ldso/aarch64/dl-syscalls.h
@@ -0,0 +1 @@
+/* stub for arch-specific syscall issues */
diff --git a/ldso/ldso/aarch64/dl-sysdep.h b/ldso/ldso/aarch64/dl-sysdep.h
new file mode 100644
index 000000000..4e8cdd906
--- /dev/null
+++ b/ldso/ldso/aarch64/dl-sysdep.h
@@ -0,0 +1,107 @@
+/*
+ * Various assembly language/system dependent hacks that are required
+ * so that we can minimize the amount of platform specific code.
+ * Copyright (C) 2000-2004 by Erik Andersen <andersen@codepoet.org>
+ * Copyright (C) 2017 by Waldemar Brodkorb <wbx@uclibc-ng.org>
+ * Ported from GNU C Library
+ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+ */
+
+/* Copyright (C) 1995-2016 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define this if the system uses RELOCA.  */
+#define ELF_USES_RELOCA
+
+#include <elf.h>
+#include <link.h>
+
+/* Initialization sequence for the GOT.  */
+#define INIT_GOT(GOT_BASE,MODULE) \
+{				\
+  GOT_BASE[2] = (unsigned long) _dl_linux_resolve; \
+  GOT_BASE[1] = (unsigned long) MODULE; \
+}
+
+/* Here we define the magic numbers that this dynamic loader should accept */
+#define MAGIC1 EM_AARCH64
+#undef  MAGIC2
+
+/* Used for error messages */
+#define ELF_TARGET "aarch64"
+
+struct elf_resolve;
+unsigned long _dl_linux_resolver(struct elf_resolve * tpnt, int reloc_entry);
+
+#define elf_machine_type_class(type)					\
+  ((((type) == R_AARCH64_JUMP_SLOT 					\
+     || (type) == R_AARCH64_TLS_DTPMOD					\
+     || (type) == R_AARCH64_TLS_DTPREL 					\
+     || (type) == R_AARCH64_TLS_TPREL					\
+     || (type) == R_AARCH64_TLSDESC) * ELF_RTYPE_CLASS_PLT)		\
+   | (((type) == R_AARCH64_COPY) * ELF_RTYPE_CLASS_COPY))
+
+/* Return the link-time address of _DYNAMIC.  Conveniently, this is the
+   first element of the GOT. */
+extern const ElfW(Addr) _GLOBAL_OFFSET_TABLE_[] attribute_hidden;
+static __always_inline ElfW(Addr) __attribute__ ((unused))
+elf_machine_dynamic (void)
+{
+  return _GLOBAL_OFFSET_TABLE_[0];
+}
+
+/* Return the run-time load address of the shared object.  */
+
+static __always_inline ElfW(Addr) __attribute__ ((unused))
+elf_machine_load_address (void)
+{
+  /* To figure out the load address we use the definition that for any symbol:
+     dynamic_addr(symbol) = static_addr(symbol) + load_addr
+
+     The choice of symbol is arbitrary. The static address we obtain
+     by constructing a non GOT reference to the symbol, the dynamic
+     address of the symbol we compute using adrp/add to compute the
+     symbol's address relative to the PC.
+     This depends on 32/16bit relocations being resolved at link time
+     and that the static address fits in the 32/16 bits.  */
+
+  ElfW(Addr) static_addr;
+  ElfW(Addr) dynamic_addr;
+
+  __asm__("					\n"
+"	adrp	%1, _dl_start;			\n"
+"	add	%1, %1, #:lo12:_dl_start	\n"
+"	ldr	%w0, 1f				\n"
+"	b	2f				\n"
+"1:						\n"
+"	.word	_dl_start			\n"
+"2:						\n"
+    : "=r" (static_addr),  "=r" (dynamic_addr));
+  return dynamic_addr - static_addr;
+}
+
+static __always_inline void
+elf_machine_relative(Elf64_Addr load_off, const Elf64_Addr rel_addr,
+                     Elf64_Word relative_count)
+{
+	Elf64_Rela *rpnt = (Elf64_Rela*)rel_addr;
+	--rpnt;
+	do {
+		Elf64_Addr *const reloc_addr = (Elf64_Addr*)(load_off + (++rpnt)->r_offset);
+
+		*reloc_addr = load_off + rpnt->r_addend;
+	} while (--relative_count);
+}
diff --git a/ldso/ldso/aarch64/dl-tlsdesc.S b/ldso/ldso/aarch64/dl-tlsdesc.S
new file mode 100644
index 000000000..4520da69b
--- /dev/null
+++ b/ldso/ldso/aarch64/dl-tlsdesc.S
@@ -0,0 +1,207 @@
+/*
+ * Copyright (C) 2017 Waldemar Brodkorb <wbx@uclibc-ng.org>
+ * Ported from GNU C Library
+ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+ */
+
+/* Thread-local storage handling in the ELF dynamic linker.
+   AArch64 version.
+   Copyright (C) 2011-2017 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+#if defined __UCLIBC_HAS_TLS__
+
+#include <tls.h>
+#include "tlsdesc.h"
+
+#define PTR_REG(n)              x##n
+#define PTR_LOG_SIZE            3
+#define PTR_SIZE        (1<<PTR_LOG_SIZE)
+
+#define NSAVEDQREGPAIRS	16
+#define SAVE_Q_REGISTERS				\
+	stp	q0, q1,	[sp, #-32*NSAVEDQREGPAIRS]!;	\
+	cfi_adjust_cfa_offset (32*NSAVEDQREGPAIRS);	\
+	stp	 q2,  q3, [sp, #32*1];			\
+	stp	 q4,  q5, [sp, #32*2];			\
+	stp	 q6,  q7, [sp, #32*3];			\
+	stp	 q8,  q9, [sp, #32*4];			\
+	stp	q10, q11, [sp, #32*5];			\
+	stp	q12, q13, [sp, #32*6];			\
+	stp	q14, q15, [sp, #32*7];			\
+	stp	q16, q17, [sp, #32*8];			\
+	stp	q18, q19, [sp, #32*9];			\
+	stp	q20, q21, [sp, #32*10];			\
+	stp	q22, q23, [sp, #32*11];			\
+	stp	q24, q25, [sp, #32*12];			\
+	stp	q26, q27, [sp, #32*13];			\
+	stp	q28, q29, [sp, #32*14];			\
+	stp	q30, q31, [sp, #32*15];
+
+#define RESTORE_Q_REGISTERS				\
+	ldp	 q2,  q3, [sp, #32*1];			\
+	ldp	 q4,  q5, [sp, #32*2];			\
+	ldp	 q6,  q7, [sp, #32*3];			\
+	ldp	 q8,  q9, [sp, #32*4];			\
+	ldp	q10, q11, [sp, #32*5];			\
+	ldp	q12, q13, [sp, #32*6];			\
+	ldp	q14, q15, [sp, #32*7];			\
+	ldp	q16, q17, [sp, #32*8];			\
+	ldp	q18, q19, [sp, #32*9];			\
+	ldp	q20, q21, [sp, #32*10];			\
+	ldp	q22, q23, [sp, #32*11];			\
+	ldp	q24, q25, [sp, #32*12];			\
+	ldp	q26, q27, [sp, #32*13];			\
+	ldp	q28, q29, [sp, #32*14];			\
+	ldp	q30, q31, [sp, #32*15];			\
+	ldp	 q0,  q1, [sp], #32*NSAVEDQREGPAIRS;	\
+	cfi_adjust_cfa_offset (-32*NSAVEDQREGPAIRS);
+
+	.text
+
+	/* Compute the thread pointer offset for symbols in the static
+	   TLS block. The offset is the same for all threads.
+	   Prototype:
+	   _dl_tlsdesc_return (tlsdesc *) ;
+	 */
+	.hidden _dl_tlsdesc_return
+	.global	_dl_tlsdesc_return
+	.type	_dl_tlsdesc_return,%function
+	.align 2
+_dl_tlsdesc_return:
+	ldr x0,[x0,#8]
+	ret
+	.size	_dl_tlsdesc_return, .-_dl_tlsdesc_return
+
+#ifdef SHARED
+	/* Handler for dynamic TLS symbols.
+	   Prototype:
+	   _dl_tlsdesc_dynamic (tlsdesc *) ;
+
+	   The second word of the descriptor points to a
+	   tlsdesc_dynamic_arg structure.
+
+	   Returns the offset between the thread pointer and the
+	   object referenced by the argument.
+
+	   ptrdiff_t
+	   __attribute__ ((__regparm__ (1)))
+	   _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
+	   {
+	     struct tlsdesc_dynamic_arg *td = tdp->arg;
+	     dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV);
+	     if (__builtin_expect (td->gen_count <= dtv[0].counter
+		&& (dtv[td->tlsinfo.ti_module].pointer.val
+		    != TLS_DTV_UNALLOCATED),
+		1))
+	       return dtv[td->tlsinfo.ti_module].pointer.val
+		+ td->tlsinfo.ti_offset
+		- __thread_pointer;
+
+	     return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
+	   }
+	 */
+
+	.hidden _dl_tlsdesc_dynamic
+	.global	_dl_tlsdesc_dynamic
+	.type	_dl_tlsdesc_dynamic,%function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_dynamic:
+# define NSAVEXREGPAIRS 2
+	stp	x29, x30, [sp,#-(32+16*NSAVEXREGPAIRS)]!
+	cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
+	mov	x29, sp
+
+	/* Save just enough registers to support fast path, if we fall
+	   into slow path we will save additional registers.  */
+
+	stp	x1,  x2, [sp, #32+16*0]
+	stp	x3,  x4, [sp, #32+16*1]
+
+	mrs	x4, tpidr_el0
+	/* The ldar here happens after the load from [x0] at the call site
+	   (that is generated by the compiler as part of the TLS access ABI),
+	   so it reads the same value (this function is the final value of
+	   td->entry) and thus it synchronizes with the release store to
+	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
+	   from [x0,#PTR_SIZE] here happens after the initialization of td->arg.  */
+	ldar	PTR_REG (zr), [x0]
+	ldr	PTR_REG (1), [x0,#TLSDESC_ARG]
+	ldr	PTR_REG (0), [x4,#TCBHEAD_DTV]
+	ldr	PTR_REG (3), [x1,#TLSDESC_GEN_COUNT]
+	ldr	PTR_REG (2), [x0,#DTV_COUNTER]
+	cmp	PTR_REG (3), PTR_REG (2)
+	b.hi	2f
+	ldr	PTR_REG (2), [x1,#TLSDESC_MODID]
+	add	PTR_REG (0), PTR_REG (0), PTR_REG (2), lsl #(PTR_LOG_SIZE + 1)
+	ldr	PTR_REG (0), [x0] /* Load val member of DTV entry.  */
+	cmp	x0, #TLS_DTV_UNALLOCATED
+	b.eq	2f
+	ldr	PTR_REG (1), [x1,#TLSDESC_MODOFF]
+	add	PTR_REG (0), PTR_REG (0), PTR_REG (1)
+	sub	PTR_REG (0), PTR_REG (0), PTR_REG (4)
+1:
+	ldp	 x1,  x2, [sp, #32+16*0]
+	ldp	 x3,  x4, [sp, #32+16*1]
+
+	ldp	x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
+	cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
+# undef NSAVEXREGPAIRS
+	ret
+2:
+	/* This is the slow path. We need to call __tls_get_addr() which
+	   means we need to save and restore all the register that the
+	   callee will trash.  */
+
+	/* Save the remaining registers that we must treat as caller save.  */
+# define NSAVEXREGPAIRS 7
+	stp	 x5,  x6, [sp, #-16*NSAVEXREGPAIRS]!
+	cfi_adjust_cfa_offset (16*NSAVEXREGPAIRS)
+	stp	 x7,  x8, [sp, #16*1]
+	stp	 x9, x10, [sp, #16*2]
+	stp	x11, x12, [sp, #16*3]
+	stp	x13, x14, [sp, #16*4]
+	stp	x15, x16, [sp, #16*5]
+	stp	x17, x18, [sp, #16*6]
+
+	SAVE_Q_REGISTERS
+
+	mov	x0, x1
+	bl	__tls_get_addr
+
+	mrs	x1, tpidr_el0
+	sub	PTR_REG (0), PTR_REG (0), PTR_REG (1)
+
+	RESTORE_Q_REGISTERS
+
+	ldp	 x7,  x8, [sp, #16*1]
+	ldp	 x9, x10, [sp, #16*2]
+	ldp	x11, x12, [sp, #16*3]
+	ldp	x13, x14, [sp, #16*4]
+	ldp	x15, x16, [sp, #16*5]
+	ldp	x17, x18, [sp, #16*6]
+	ldp	 x5,  x6, [sp], #16*NSAVEXREGPAIRS
+	cfi_adjust_cfa_offset (-16*NSAVEXREGPAIRS)
+	b	1b
+	cfi_endproc
+	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
+# undef NSAVEXREGPAIRS
+
+#endif // SHARED
+#endif // __UCLIBC_HAS_TLS__
diff --git a/ldso/ldso/aarch64/elfinterp.c b/ldso/ldso/aarch64/elfinterp.c
new file mode 100644
index 000000000..879484e16
--- /dev/null
+++ b/ldso/ldso/aarch64/elfinterp.c
@@ -0,0 +1,306 @@
+/* AARCH64 ELF shared library loader suppport
+ *
+ * Copyright (C) 2001-2004 Erik Andersen
+ * Copyright (C) 2016-2017 Waldemar Brodkorb <wbx@uclibc-ng.org>
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. The name of the above contributors may not be
+ *    used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* Program to load an ELF binary on a linux system, and run it.
+   References to symbols in sharable libraries can be resolved by either
+   an ELF sharable library or a linux style of shared library. */
+
+#include "ldso.h"
+
+#if defined(USE_TLS) && USE_TLS
+#include "dl-tls.h"
+#include "tlsdeschtab.h"
+#endif
+
+extern int _dl_linux_resolve(void);
+
+unsigned long _dl_linux_resolver(struct elf_resolve *tpnt, int reloc_entry)
+{
+	ELF_RELOC *this_reloc;
+	char *strtab;
+	ElfW(Sym) *symtab;
+	int symtab_index;
+	char *rel_addr;
+	char *new_addr;
+	char **got_addr;
+	ElfW(Addr) instr_addr;
+	char *symname;
+
+	rel_addr = (char *)tpnt->dynamic_info[DT_JMPREL];
+	this_reloc = (ELF_RELOC *)(rel_addr + reloc_entry);
+	symtab_index = ELF_R_SYM(this_reloc->r_info);
+
+	symtab = (ElfW(Sym) *)tpnt->dynamic_info[DT_SYMTAB];
+	strtab = (char *)tpnt->dynamic_info[DT_STRTAB];
+	symname = strtab + symtab[symtab_index].st_name;
+
+	/* Address of jump instruction to fix up */
+	instr_addr = (this_reloc->r_offset + tpnt->loadaddr);
+	got_addr = (char **)instr_addr;
+
+	/* Get the address of the GOT entry */
+	new_addr = _dl_find_hash(symname, &_dl_loaded_modules->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT, NULL);
+	if (unlikely(!new_addr)) {
+		_dl_dprintf(2, "%s: can't resolve symbol '%s'\n", _dl_progname, symname);
+		_dl_exit(1);
+	}
+#if defined (__SUPPORT_LD_DEBUG__)
+	if (_dl_debug_bindings) {
+		_dl_dprintf(_dl_debug_file, "\nresolve function: %s", symname);
+		if (_dl_debug_detail) _dl_dprintf(_dl_debug_file,
+				"\tpatched %x ==> %x @ %x", *got_addr, new_addr, got_addr);
+	}
+	if (!_dl_debug_nofixups) {
+		*got_addr = new_addr;
+	}
+#else
+	*got_addr = new_addr;
+#endif
+	return (unsigned long)new_addr;
+}
+
+static int
+_dl_parse(struct elf_resolve *tpnt, struct r_scope_elem *scope,
+	  unsigned long rel_addr, unsigned long rel_size,
+	  int (*reloc_fnc) (struct elf_resolve *tpnt, struct r_scope_elem *scope,
+			    ELF_RELOC *rpnt, ElfW(Sym) *symtab, char *strtab))
+{
+	unsigned int i;
+	char *strtab;
+	ElfW(Sym) *symtab;
+	ELF_RELOC *rpnt;
+	int symtab_index;
+
+	/* Parse the relocation information */
+	rpnt = (ELF_RELOC *)rel_addr;
+	rel_size = rel_size / sizeof(ELF_RELOC);
+
+	symtab = (ElfW(Sym) *)tpnt->dynamic_info[DT_SYMTAB];
+	strtab = (char *)tpnt->dynamic_info[DT_STRTAB];
+
+	for (i = 0; i < rel_size; i++, rpnt++) {
+		int res;
+
+		symtab_index = ELF_R_SYM(rpnt->r_info);
+
+		debug_sym(symtab, strtab, symtab_index);
+		debug_reloc(symtab, strtab, rpnt);
+
+		res = reloc_fnc(tpnt, scope, rpnt, symtab, strtab);
+
+		if (res==0) 
+			continue;
+
+		_dl_dprintf(2, "\n%s: ", _dl_progname);
+
+		if (symtab_index)
+			_dl_dprintf(2, "symbol '%s': ", 
+				strtab + symtab[symtab_index].st_name);
+
+		if (unlikely(res < 0)) {
+		        int reloc_type = ELF_R_TYPE(rpnt->r_info);
+			_dl_dprintf(2, "can't handle reloc type %x\n", reloc_type);
+			_dl_exit(-res);
+		} else if (unlikely(res > 0)) {
+			_dl_dprintf(2, "can't resolve symbol\n");
+			return res;
+		}
+	  }
+
+	  return 0;
+}
+
+static int
+_dl_do_reloc (struct elf_resolve *tpnt, struct r_scope_elem *scope,
+	      ELF_RELOC *rpnt, ElfW(Sym) *symtab, char *strtab)
+{
+	int reloc_type;
+	int symtab_index;
+	char *symname;
+#if defined USE_TLS && USE_TLS
+	struct elf_resolve *tls_tpnt = NULL;
+#endif
+	struct symbol_ref sym_ref;
+	ElfW(Addr) *reloc_addr;
+	ElfW(Addr) symbol_addr;
+#if defined (__SUPPORT_LD_DEBUG__)
+	ElfW(Addr) old_val;
+#endif
+
+	reloc_addr = (ElfW(Addr)*)(tpnt->loadaddr + (unsigned long)rpnt->r_offset);
+	reloc_type = ELF_R_TYPE(rpnt->r_info);
+	symtab_index = ELF_R_SYM(rpnt->r_info);
+	sym_ref.sym = &symtab[symtab_index];
+	sym_ref.tpnt = NULL;
+	symbol_addr = 0;
+	symname = strtab + sym_ref.sym->st_name;
+
+	if (symtab_index) {
+		symbol_addr = (ElfW(Addr))_dl_find_hash(symname, scope, tpnt,
+				elf_machine_type_class(reloc_type), &sym_ref);
+
+		/*
+		 * We want to allow undefined references to weak symbols - this might
+		 * have been intentional.  We should not be linking local symbols
+		 * here, so all bases should be covered.
+		 */
+		if (unlikely (!symbol_addr && 
+			(ELF_ST_TYPE(symtab[symtab_index].st_info) != STT_TLS) &&
+			(ELF_ST_BIND(symtab[symtab_index].st_info) != STB_WEAK))) {
+			return 1;
+		}
+		if (_dl_trace_prelink) {
+			_dl_debug_lookup (symname, tpnt, &symtab[symtab_index],
+						&sym_ref, elf_machine_type_class(reloc_type));
+		}
+#if defined USE_TLS && USE_TLS
+		tls_tpnt = sym_ref.tpnt;
+#endif
+	} else {
+		/*
+		 * Relocs against STN_UNDEF are usually treated as using a
+		 * symbol value of zero, and using the module containing the
+		 * reloc itself.
+		 */
+		symbol_addr = sym_ref.sym->st_value;
+#if defined USE_TLS && USE_TLS
+		tls_tpnt = tpnt;
+#endif
+	}
+
+#if defined (__SUPPORT_LD_DEBUG__)
+	old_val = *reloc_addr;
+#endif
+
+	switch (reloc_type) {
+		case R_AARCH64_NONE:
+			break;
+		case R_AARCH64_ABS64: 		/* REL_SYMBOLIC */
+		case R_AARCH64_GLOB_DAT:	/* REL_GOT */
+		case R_AARCH64_JUMP_SLOT:	/* REL_PLT */
+			*reloc_addr = symbol_addr + rpnt->r_addend;
+			break;
+		case R_AARCH64_RELATIVE:
+			*reloc_addr += tpnt->loadaddr + rpnt->r_addend;
+			break;
+		case R_AARCH64_COPY:
+			_dl_memcpy((void *) reloc_addr,
+				   (void *) symbol_addr, sym_ref.sym->st_size);
+			break;
+#if defined USE_TLS && USE_TLS
+		case R_AARCH64_TLS_TPREL:
+			CHECK_STATIC_TLS ((struct link_map *) tls_tpnt);
+			*reloc_addr = (symbol_addr + tls_tpnt->l_tls_offset);
+			break;
+		case R_AARCH64_TLSDESC:
+				{
+					struct tlsdesc volatile *td =
+							(struct tlsdesc volatile *)reloc_addr;
+#ifndef SHARED
+					CHECK_STATIC_TLS((struct link_map *) tls_tpnt);
+#else
+					if (!TRY_STATIC_TLS ((struct link_map *) tls_tpnt))
+					{
+					        td->arg = _dl_make_tlsdesc_dynamic((struct link_map *) tls_tpnt, symbol_addr);
+					        td->entry = _dl_tlsdesc_dynamic;
+					}
+					else
+#endif
+					{
+					        td->arg = symbol_addr + tls_tpnt->l_tls_offset;
+					        td->entry = _dl_tlsdesc_return;
+					}
+				}
+			break;
+#endif
+		default:
+			return -1; /*call _dl_exit(1) */
+	}
+
+#if defined (__SUPPORT_LD_DEBUG__)
+	if (_dl_debug_reloc && _dl_debug_detail) {
+		_dl_dprintf(_dl_debug_file, "\tpatched: %x ==> %x @ %x\n", 
+				old_val, *reloc_addr, reloc_addr);
+	}
+#endif
+
+	return 0;
+}
+
+static int
+_dl_do_lazy_reloc (struct elf_resolve *tpnt, struct r_scope_elem *scope,
+		   ELF_RELOC *rpnt, ElfW(Sym) *symtab, char *strtab)
+{
+	int reloc_type;
+	ElfW(Addr) *reloc_addr;
+#if defined (__SUPPORT_LD_DEBUG__)
+	ElfW(Addr) old_val;
+#endif
+
+	(void)scope;
+	(void)strtab;
+
+	reloc_addr = (ElfW(Addr)*)(tpnt->loadaddr + rpnt->r_offset);
+	reloc_type = ELF_R_TYPE(rpnt->r_info);
+
+#if defined (__SUPPORT_LD_DEBUG__)
+	old_val = *reloc_addr;
+#endif
+
+	switch (reloc_type) {
+		case R_AARCH64_NONE:
+			break;
+		case R_AARCH64_JUMP_SLOT:
+			*reloc_addr += tpnt->loadaddr;
+			break;
+		default:
+			return -1; /*call _dl_exit(1) */
+	}
+
+#if defined (__SUPPORT_LD_DEBUG__)
+	if (_dl_debug_reloc && _dl_debug_detail) {
+		_dl_dprintf(_dl_debug_file, "\tpatched_lazy: %x ==> %x @ %x\n",
+			    old_val, *reloc_addr, reloc_addr);
+	}
+#endif
+
+	return 0;
+}
+
+void _dl_parse_lazy_relocation_information(struct dyn_elf *rpnt,
+	unsigned long rel_addr, unsigned long rel_size)
+{
+	(void)_dl_parse(rpnt->dyn, NULL, rel_addr, rel_size, _dl_do_lazy_reloc);
+}
+
+int _dl_parse_relocation_information(struct dyn_elf *rpnt,
+	struct r_scope_elem *scope, unsigned long rel_addr, unsigned long rel_size)
+{
+	return _dl_parse(rpnt->dyn, scope, rel_addr, rel_size, _dl_do_reloc);
+}
diff --git a/ldso/ldso/aarch64/resolve.S b/ldso/ldso/aarch64/resolve.S
new file mode 100644
index 000000000..3b907c46c
--- /dev/null
+++ b/ldso/ldso/aarch64/resolve.S
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2016 by Waldemar Brodkorb <wbx@uclibc-ng.org>
+ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+ * ported from GNU libc
+ */
+
+/* Copyright (C) 2005-2016 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <features.h>
+
+#define PTR_REG(n)		x##n
+#define PTR_LOG_SIZE		3
+#define PTR_SIZE	(1<<PTR_LOG_SIZE)
+
+#define ip0 x16
+#define ip0l PTR_REG (16)
+#define ip1 x17
+#define lr  x30
+
+/* RELA relocatons are 3 pointers */
+#define RELA_SIZE (PTR_SIZE * 3)
+
+ .text
+ .globl _dl_linux_resolve
+ .type _dl_linux_resolve, %function
+ .align 2
+
+_dl_linux_resolve:
+	/* AArch64 we get called with:
+	   ip0		&PLTGOT[2]
+	   ip1		temp(dl resolver entry point)
+	   [sp, #8]	lr
+	   [sp, #0]	&PLTGOT[n]
+	 */
+
+	/* Save arguments.  */
+	stp	x8, x9, [sp, #-(80+8*16)]!
+	stp	x6, x7, [sp,  #16]
+	stp	x4, x5, [sp,  #32]
+	stp	x2, x3, [sp,  #48]
+	stp	x0, x1, [sp,  #64]
+	stp	q0, q1, [sp, #(80+0*16)]
+	stp	q2, q3, [sp, #(80+2*16)]
+	stp	q4, q5, [sp, #(80+4*16)]
+	stp	q6, q7, [sp, #(80+6*16)]
+
+	/* Get pointer to linker struct.  */
+	ldr	PTR_REG (0), [ip0, #-PTR_SIZE]
+
+	/* Prepare to call _dl_linux_resolver().  */
+	ldr	x1, [sp, 80+8*16]	/* Recover &PLTGOT[n] */
+
+	sub     x1, x1, ip0
+	add     x1, x1, x1, lsl #1
+	lsl     x1, x1, #3
+	sub     x1, x1, #(RELA_SIZE<<3)
+	lsr     x1, x1, #3
+
+	/* Call resolver routine.  */
+	bl	_dl_linux_resolver
+
+	/* Save the return.  */
+	mov	ip0, x0
+
+	/* Get arguments and return address back.  */
+	ldp	q0, q1, [sp, #(80+0*16)]
+	ldp	q2, q3, [sp, #(80+2*16)]
+	ldp	q4, q5, [sp, #(80+4*16)]
+	ldp	q6, q7, [sp, #(80+6*16)]
+	ldp	x0, x1, [sp, #64]
+	ldp	x2, x3, [sp, #48]
+	ldp	x4, x5, [sp, #32]
+	ldp	x6, x7, [sp, #16]
+	ldp	x8, x9, [sp], #(80+8*16)
+
+	ldp	ip1, lr, [sp], #16
+
+	/* Jump to the newly found address.  */
+	br	ip0
+
+.size _dl_linux_resolve, .-_dl_linux_resolve
diff --git a/libc/string/aarch64/Makefile b/libc/string/aarch64/Makefile
new file mode 100644
index 000000000..0a95346fd
--- /dev/null
+++ b/libc/string/aarch64/Makefile
@@ -0,0 +1,13 @@
+# Makefile for uClibc
+#
+# Copyright (C) 2000-2005 Erik Andersen <andersen@uclibc.org>
+#
+# Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+#
+
+top_srcdir:=../../../
+top_builddir:=../../../
+all: objs
+include $(top_builddir)Rules.mak
+include ../Makefile.in
+include $(top_srcdir)Makerules
diff --git a/libc/string/aarch64/memcpy.S b/libc/string/aarch64/memcpy.S
new file mode 100644
index 000000000..87b2552a2
--- /dev/null
+++ b/libc/string/aarch64/memcpy.S
@@ -0,0 +1,230 @@
+/* Copyright (C) 2012-2017 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, unaligned accesses.
+ *
+ */
+
+#define dstin	x0
+#define src	x1
+#define count	x2
+#define dst	x3
+#define srcend	x4
+#define dstend	x5
+#define A_l	x6
+#define A_lw	w6
+#define A_h	x7
+#define A_hw	w7
+#define B_l	x8
+#define B_lw	w8
+#define B_h	x9
+#define C_l	x10
+#define C_h	x11
+#define D_l	x12
+#define D_h	x13
+#define E_l	src
+#define E_h	count
+#define F_l	srcend
+#define F_h	dst
+#define G_l	count
+#define G_h	dst
+#define tmp1	x14
+
+ENTRY (memcpy)
+
+	prfm	PLDL1KEEP, [src]
+	add	srcend, src, count
+	add	dstend, dstin, count
+	cmp	count, 16
+	b.ls	L(copy16)
+	cmp	count, 96
+	b.hi	L(copy_long)
+
+	/* Medium copies: 17..96 bytes.  */
+	sub	tmp1, count, 1
+	ldp	A_l, A_h, [src]
+	tbnz	tmp1, 6, L(copy96)
+	ldp	D_l, D_h, [srcend, -16]
+	tbz	tmp1, 5, 1f
+	ldp	B_l, B_h, [src, 16]
+	ldp	C_l, C_h, [srcend, -32]
+	stp	B_l, B_h, [dstin, 16]
+	stp	C_l, C_h, [dstend, -32]
+1:
+	stp	A_l, A_h, [dstin]
+	stp	D_l, D_h, [dstend, -16]
+	ret
+
+	.p2align 4
+	/* Small copies: 0..16 bytes.  */
+L(copy16):
+	cmp	count, 8
+	b.lo	1f
+	ldr	A_l, [src]
+	ldr	A_h, [srcend, -8]
+	str	A_l, [dstin]
+	str	A_h, [dstend, -8]
+	ret
+	.p2align 4
+1:
+	tbz	count, 2, 1f
+	ldr	A_lw, [src]
+	ldr	A_hw, [srcend, -4]
+	str	A_lw, [dstin]
+	str	A_hw, [dstend, -4]
+	ret
+
+	/* Copy 0..3 bytes.  Use a branchless sequence that copies the same
+	   byte 3 times if count==1, or the 2nd byte twice if count==2.  */
+1:
+	cbz	count, 2f
+	lsr	tmp1, count, 1
+	ldrb	A_lw, [src]
+	ldrb	A_hw, [srcend, -1]
+	ldrb	B_lw, [src, tmp1]
+	strb	A_lw, [dstin]
+	strb	B_lw, [dstin, tmp1]
+	strb	A_hw, [dstend, -1]
+2:	ret
+
+	.p2align 4
+	/* Copy 64..96 bytes.  Copy 64 bytes from the start and
+	   32 bytes from the end.  */
+L(copy96):
+	ldp	B_l, B_h, [src, 16]
+	ldp	C_l, C_h, [src, 32]
+	ldp	D_l, D_h, [src, 48]
+	ldp	E_l, E_h, [srcend, -32]
+	ldp	F_l, F_h, [srcend, -16]
+	stp	A_l, A_h, [dstin]
+	stp	B_l, B_h, [dstin, 16]
+	stp	C_l, C_h, [dstin, 32]
+	stp	D_l, D_h, [dstin, 48]
+	stp	E_l, E_h, [dstend, -32]
+	stp	F_l, F_h, [dstend, -16]
+	ret
+
+	/* Align DST to 16 byte alignment so that we don't cross cache line
+	   boundaries on both loads and stores.  There are at least 96 bytes
+	   to copy, so copy 16 bytes unaligned and then align.  The loop
+	   copies 64 bytes per iteration and prefetches one iteration ahead.  */
+
+	.p2align 4
+L(copy_long):
+	and	tmp1, dstin, 15
+	bic	dst, dstin, 15
+	ldp	D_l, D_h, [src]
+	sub	src, src, tmp1
+	add	count, count, tmp1	/* Count is now 16 too large.  */
+	ldp	A_l, A_h, [src, 16]
+	stp	D_l, D_h, [dstin]
+	ldp	B_l, B_h, [src, 32]
+	ldp	C_l, C_h, [src, 48]
+	ldp	D_l, D_h, [src, 64]!
+	subs	count, count, 128 + 16	/* Test and readjust count.  */
+	b.ls	2f
+1:
+	stp	A_l, A_h, [dst, 16]
+	ldp	A_l, A_h, [src, 16]
+	stp	B_l, B_h, [dst, 32]
+	ldp	B_l, B_h, [src, 32]
+	stp	C_l, C_h, [dst, 48]
+	ldp	C_l, C_h, [src, 48]
+	stp	D_l, D_h, [dst, 64]!
+	ldp	D_l, D_h, [src, 64]!
+	subs	count, count, 64
+	b.hi	1b
+
+	/* Write the last full set of 64 bytes.  The remainder is at most 64
+	   bytes, so it is safe to always copy 64 bytes from the end even if
+	   there is just 1 byte left.  */
+2:
+	ldp	E_l, E_h, [srcend, -64]
+	stp	A_l, A_h, [dst, 16]
+	ldp	A_l, A_h, [srcend, -48]
+	stp	B_l, B_h, [dst, 32]
+	ldp	B_l, B_h, [srcend, -32]
+	stp	C_l, C_h, [dst, 48]
+	ldp	C_l, C_h, [srcend, -16]
+	stp	D_l, D_h, [dst, 64]
+	stp	E_l, E_h, [dstend, -64]
+	stp	A_l, A_h, [dstend, -48]
+	stp	B_l, B_h, [dstend, -32]
+	stp	C_l, C_h, [dstend, -16]
+	ret
+
+	.p2align 4
+L(move_long):
+	cbz	tmp1, 3f
+
+	add	srcend, src, count
+	add	dstend, dstin, count
+
+	/* Align dstend to 16 byte alignment so that we don't cross cache line
+	   boundaries on both loads and stores.  There are at least 96 bytes
+	   to copy, so copy 16 bytes unaligned and then align.  The loop
+	   copies 64 bytes per iteration and prefetches one iteration ahead.  */
+
+	and	tmp1, dstend, 15
+	ldp	D_l, D_h, [srcend, -16]
+	sub	srcend, srcend, tmp1
+	sub	count, count, tmp1
+	ldp	A_l, A_h, [srcend, -16]
+	stp	D_l, D_h, [dstend, -16]
+	ldp	B_l, B_h, [srcend, -32]
+	ldp	C_l, C_h, [srcend, -48]
+	ldp	D_l, D_h, [srcend, -64]!
+	sub	dstend, dstend, tmp1
+	subs	count, count, 128
+	b.ls	2f
+
+	nop
+1:
+	stp	A_l, A_h, [dstend, -16]
+	ldp	A_l, A_h, [srcend, -16]
+	stp	B_l, B_h, [dstend, -32]
+	ldp	B_l, B_h, [srcend, -32]
+	stp	C_l, C_h, [dstend, -48]
+	ldp	C_l, C_h, [srcend, -48]
+	stp	D_l, D_h, [dstend, -64]!
+	ldp	D_l, D_h, [srcend, -64]!
+	subs	count, count, 64
+	b.hi	1b
+
+	/* Write the last full set of 64 bytes.  The remainder is at most 64
+	   bytes, so it is safe to always copy 64 bytes from the start even if
+	   there is just 1 byte left.  */
+2:
+	ldp	G_l, G_h, [src, 48]
+	stp	A_l, A_h, [dstend, -16]
+	ldp	A_l, A_h, [src, 32]
+	stp	B_l, B_h, [dstend, -32]
+	ldp	B_l, B_h, [src, 16]
+	stp	C_l, C_h, [dstend, -48]
+	ldp	C_l, C_h, [src]
+	stp	D_l, D_h, [dstend, -64]
+	stp	G_l, G_h, [dstin, 48]
+	stp	A_l, A_h, [dstin, 32]
+	stp	B_l, B_h, [dstin, 16]
+	stp	C_l, C_h, [dstin]
+3:	ret
+
+END (memcpy)
+libc_hidden_def (memcpy)
diff --git a/libc/string/aarch64/memset.S b/libc/string/aarch64/memset.S
new file mode 100644
index 000000000..d6686bedc
--- /dev/null
+++ b/libc/string/aarch64/memset.S
@@ -0,0 +1,189 @@
+/* Copyright (C) 2012-2017 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, unaligned accesses
+ *
+ */
+
+#define dstin	x0
+#define val	x1
+#define valw	w1
+#define count	x2
+#define dst	x3
+#define dstend	x4
+#define tmp1	x5
+#define tmp1w	w5
+#define tmp2	x6
+#define tmp2w	w6
+#define zva_len x7
+#define zva_lenw w7
+
+ENTRY_ALIGN (memset, 6)
+
+	dup	v0.16B, valw
+	add	dstend, dstin, count
+
+	cmp	count, 96
+	b.hi	L(set_long)
+	cmp	count, 16
+	b.hs	L(set_medium)
+	mov	val, v0.D[0]
+
+	/* Set 0..15 bytes.  */
+	tbz	count, 3, 1f
+	str	val, [dstin]
+	str	val, [dstend, -8]
+	ret
+	nop
+1:	tbz	count, 2, 2f
+	str	valw, [dstin]
+	str	valw, [dstend, -4]
+	ret
+2:	cbz	count, 3f
+	strb	valw, [dstin]
+	tbz	count, 1, 3f
+	strh	valw, [dstend, -2]
+3:	ret
+
+	/* Set 17..96 bytes.  */
+L(set_medium):
+	str	q0, [dstin]
+	tbnz	count, 6, L(set96)
+	str	q0, [dstend, -16]
+	tbz	count, 5, 1f
+	str	q0, [dstin, 16]
+	str	q0, [dstend, -32]
+1:	ret
+
+	.p2align 4
+	/* Set 64..96 bytes.  Write 64 bytes from the start and
+	   32 bytes from the end.  */
+L(set96):
+	str	q0, [dstin, 16]
+	stp	q0, q0, [dstin, 32]
+	stp	q0, q0, [dstend, -32]
+	ret
+
+	.p2align 3
+	nop
+L(set_long):
+	and	valw, valw, 255
+	bic	dst, dstin, 15
+	str	q0, [dstin]
+	cmp	count, 256
+	ccmp	valw, 0, 0, cs
+	b.eq	L(try_zva)
+L(no_zva):
+	sub	count, dstend, dst	/* Count is 16 too large.  */
+	add	dst, dst, 16
+	sub	count, count, 64 + 16	/* Adjust count and bias for loop.  */
+1:	stp	q0, q0, [dst], 64
+	stp	q0, q0, [dst, -32]
+L(tail64):
+	subs	count, count, 64
+	b.hi	1b
+2:	stp	q0, q0, [dstend, -64]
+	stp	q0, q0, [dstend, -32]
+	ret
+
+	.p2align 3
+L(try_zva):
+	mrs	tmp1, dczid_el0
+	tbnz	tmp1w, 4, L(no_zva)
+	and	tmp1w, tmp1w, 15
+	cmp	tmp1w, 4	/* ZVA size is 64 bytes.  */
+	b.ne	 L(zva_128)
+
+	/* Write the first and last 64 byte aligned block using stp rather
+	   than using DC ZVA.  This is faster on some cores.
+	 */
+L(zva_64):
+	str	q0, [dst, 16]
+	stp	q0, q0, [dst, 32]
+	bic	dst, dst, 63
+	stp	q0, q0, [dst, 64]
+	stp	q0, q0, [dst, 96]
+	sub	count, dstend, dst	/* Count is now 128 too large.	*/
+	sub	count, count, 128+64+64	/* Adjust count and bias for loop.  */
+	add	dst, dst, 128
+	nop
+1:	dc	zva, dst
+	add	dst, dst, 64
+	subs	count, count, 64
+	b.hi	1b
+	stp	q0, q0, [dst, 0]
+	stp	q0, q0, [dst, 32]
+	stp	q0, q0, [dstend, -64]
+	stp	q0, q0, [dstend, -32]
+	ret
+
+	.p2align 3
+L(zva_128):
+	cmp	tmp1w, 5	/* ZVA size is 128 bytes.  */
+	b.ne	L(zva_other)
+
+	str	q0, [dst, 16]
+	stp	q0, q0, [dst, 32]
+	stp	q0, q0, [dst, 64]
+	stp	q0, q0, [dst, 96]
+	bic	dst, dst, 127
+	sub	count, dstend, dst	/* Count is now 128 too large.	*/
+	sub	count, count, 128+128	/* Adjust count and bias for loop.  */
+	add	dst, dst, 128
+1:	dc	zva, dst
+	add	dst, dst, 128
+	subs	count, count, 128
+	b.hi	1b
+	stp	q0, q0, [dstend, -128]
+	stp	q0, q0, [dstend, -96]
+	stp	q0, q0, [dstend, -64]
+	stp	q0, q0, [dstend, -32]
+	ret
+
+L(zva_other):
+	mov	tmp2w, 4
+	lsl	zva_lenw, tmp2w, tmp1w
+	add	tmp1, zva_len, 64	/* Max alignment bytes written.	 */
+	cmp	count, tmp1
+	blo	L(no_zva)
+
+	sub	tmp2, zva_len, 1
+	add	tmp1, dst, zva_len
+	add	dst, dst, 16
+	subs	count, tmp1, dst	/* Actual alignment bytes to write.  */
+	bic	tmp1, tmp1, tmp2	/* Aligned dc zva start address.  */
+	beq	2f
+1:	stp	q0, q0, [dst], 64
+	stp	q0, q0, [dst, -32]
+	subs	count, count, 64
+	b.hi	1b
+2:	mov	dst, tmp1
+	sub	count, dstend, tmp1	/* Remaining bytes to write.  */
+	subs	count, count, zva_len
+	b.lo	4f
+3:	dc	zva, dst
+	add	dst, dst, zva_len
+	subs	count, count, zva_len
+	b.hs	3b
+4:	add	count, count, zva_len
+	b	L(tail64)
+
+END (memset)
+libc_hidden_def (memset)
diff --git a/libc/sysdeps/linux/aarch64/Makefile b/libc/sysdeps/linux/aarch64/Makefile
new file mode 100644
index 000000000..86a32a613
--- /dev/null
+++ b/libc/sysdeps/linux/aarch64/Makefile
@@ -0,0 +1,9 @@
+# Makefile for uClibc-ng
+# Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+
+top_srcdir=../../../../
+top_builddir=../../../../
+all: objs
+include $(top_builddir)Rules.mak
+include Makefile.arch
+include $(top_srcdir)Makerules
diff --git a/libc/sysdeps/linux/aarch64/Makefile.arch b/libc/sysdeps/linux/aarch64/Makefile.arch
new file mode 100644
index 000000000..b38f760a8
--- /dev/null
+++ b/libc/sysdeps/linux/aarch64/Makefile.arch
@@ -0,0 +1,5 @@
+# Makefile for uClibc-ng
+# Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+
+CSRC-y := __syscall_error.c
+SSRC-y := clone.S setjmp.S syscall.S __longjmp.S bsd-setjmp.S bsd-_setjmp.S vfork.S
diff --git a/libc/sysdeps/linux/aarch64/__longjmp.S b/libc/sysdeps/linux/aarch64/__longjmp.S
new file mode 100644
index 000000000..e4d11b165
--- /dev/null
+++ b/libc/sysdeps/linux/aarch64/__longjmp.S
@@ -0,0 +1,107 @@
+/* Copyright (C) 1997-2016 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published