summaryrefslogtreecommitdiff
path: root/libc/string/csky/cskyv2/abiv2_memcpy.S
diff options
context:
space:
mode:
authorGuo Ren <ren_guo@c-sky.com>2017-10-15 20:59:34 +0800
committerWaldemar Brodkorb <wbx@uclibc-ng.org>2017-11-19 09:20:11 +0100
commit2fcffe26e815b7125a357c83b59617ab93c16b41 (patch)
treefe5a973dc4bbf38bce8468a4497f5f656f082a9f /libc/string/csky/cskyv2/abiv2_memcpy.S
parent9e38e0aa45cca21d5023d0af94377f0e1e41d2f4 (diff)
csky: port to uclibc-ng
Follow the steps to build c-sky uclibc linux system: 1. git clone https://github.com/c-sky/buildroot.git 2. cd buildroot 3. make qemu_csky_ck810_uclibc_defconfig 4. make Follow the buildroot/board/qemu/csky/readme.txt to run. This buildroot toolchain is pre-build, But you can rebuild the c-sky uclibc-ng alone and install it to the buildroot sysroot manually. We'll try our best to improve the uclibc-ng continuously. Signed-off-by: Guo Ren <ren_guo@c-sky.com>
Diffstat (limited to 'libc/string/csky/cskyv2/abiv2_memcpy.S')
-rw-r--r--libc/string/csky/cskyv2/abiv2_memcpy.S184
1 files changed, 184 insertions, 0 deletions
diff --git a/libc/string/csky/cskyv2/abiv2_memcpy.S b/libc/string/csky/cskyv2/abiv2_memcpy.S
new file mode 100644
index 000000000..c112ec01b
--- /dev/null
+++ b/libc/string/csky/cskyv2/abiv2_memcpy.S
@@ -0,0 +1,184 @@
+.macro GET_FRONT_BITS rx ry
+#ifdef __cskyLE__
+ lsr \rx, \ry
+#else
+ lsl \rx, \ry
+#endif
+.endm
+
+.macro GET_AFTER_BITS rx ry
+#ifdef __cskyLE__
+ lsl \rx, \ry
+#else
+ lsr \rx, \ry
+#endif
+.endm
+
+
+#ifdef WANT_WIDE
+# define Wmemcpy wmemcpy
+#else
+# define Wmemcpy memcpy
+#endif
+
+/* void *memcpy(void *dest, const void *src, size_t n); */
+
+ .text
+ .align 2
+ .global Wmemcpy
+ .type Wmemcpy, @function
+Wmemcpy:
+ mov r3, r0
+ cmplti r2, 4 /* If len less than 4 bytes */
+ jbt .L_copy_by_byte
+
+ mov r12, r0
+ andi r12, 3
+ bnez r12, .L_dest_not_aligned /* If dest is not 4 bytes aligned */
+.L0:
+ mov r12, r1
+ andi r12, 3
+ bnez r12, .L_dest_aligned_but_src_not_aligned /* If dest is aligned, but src is not aligned */
+
+ cmplti r2, 16 /* dest and src are all aligned */
+ jbt .L_aligned_and_len_less_16bytes /* If len less than 16 bytes */
+
+.L_aligned_and_len_larger_16bytes: /* src and dst are all aligned, and len > 16 bytes */
+ ldw r18, (r1, 0)
+ ldw r19, (r1, 4)
+ ldw r20, (r1, 8)
+ ldw r21, (r1, 12)
+ stw r18, (r3, 0)
+ stw r19, (r3, 4)
+ stw r20, (r3, 8)
+ stw r21, (r3, 12)
+ subi r2, 16
+ addi r1, 16
+ addi r3, 16
+ cmplti r2, 16
+ jbf .L_aligned_and_len_larger_16bytes
+
+.L_aligned_and_len_less_16bytes:
+ cmplti r2, 4
+ jbt .L_copy_by_byte
+ ldw r18, (r1, 0)
+ stw r18, (r3, 0)
+ subi r2, 4
+ addi r1, 4
+ addi r3, 4
+ jbr .L_aligned_and_len_less_16bytes
+
+.L_copy_by_byte: /* len less than 4 bytes */
+ cmpnei r2, 0
+ jbf .L_return
+ ldb r18, (r1, 0)
+ stb r18, (r3, 0)
+ subi r2, 1
+ addi r1, 1
+ addi r3, 1
+ jbr .L_copy_by_byte
+
+.L_return:
+ rts
+
+/* If dest is not aligned, just copying some bytes makes the dest align.
+ After that, we judge whether the src is aligned. */
+
+.L_dest_not_aligned:
+ rsub r13, r1, r3 /* consider overlapped case */
+ abs r13, r13
+ cmplt r13, r2
+ jbt .L_copy_by_byte
+
+.L1:
+ ldb r18, (r1, 0) /* makes the dest align. */
+ stb r18, (r3, 0)
+ addi r12, 1
+ subi r2, 1
+ addi r1, 1
+ addi r3, 1
+ cmpnei r12, 4
+ jbt .L1
+ cmplti r2, 4
+ jbt .L_copy_by_byte
+ jbf .L0 /* judge whether the src is aligned. */
+
+.L_dest_aligned_but_src_not_aligned:
+ rsub r13, r1, r3 /* consider overlapped case */
+ abs r13, r13
+ cmplt r13, r2
+ jbt .L_copy_by_byte
+
+ bclri r1, 0
+ bclri r1, 1
+ ldw r18, (r1, 0)
+ addi r1, 4
+
+ movi r13, 8
+ mult r13, r12
+ mov r24, r13 /* r12 is used to store the misaligned bits */
+ rsubi r13, 32
+ mov r25, r13
+
+ cmplti r2, 16
+ jbt .L_not_aligned_and_len_less_16bytes
+
+.L_not_aligned_and_len_larger_16bytes:
+ ldw r20, (r1, 0)
+ ldw r21, (r1, 4)
+ ldw r22, (r1, 8)
+ ldw r23, (r1, 12)
+
+ GET_FRONT_BITS r18 r24 /* little or big endian? */
+ mov r19, r20
+ GET_AFTER_BITS r20 r25
+ or r20, r18
+
+ GET_FRONT_BITS r19 r24
+ mov r18, r21
+ GET_AFTER_BITS r21 r13
+ or r21, r19
+
+ GET_FRONT_BITS r18 r24
+ mov r19, r22
+ GET_AFTER_BITS r22 r25
+ or r22, r18
+
+ GET_FRONT_BITS r19 r24
+ mov r18, r23
+ GET_AFTER_BITS r23 r25
+ or r23, r19
+
+ stw r20, (r3, 0)
+ stw r21, (r3, 4)
+ stw r22, (r3, 8)
+ stw r23, (r3, 12)
+ subi r2, 16
+ addi r1, 16
+ addi r3, 16
+ cmplti r2, 16
+ jbf .L_not_aligned_and_len_larger_16bytes
+
+.L_not_aligned_and_len_less_16bytes:
+ cmplti r2, 4
+ jbf .L2
+ rsubi r12, 4 /* r12 is used to stored the misaligned bits */
+ subu r1, r12 /* initial the position */
+ jbr .L_copy_by_byte
+.L2:
+ ldw r21, (r1, 0)
+ GET_FRONT_BITS r18 r24
+ mov r19, r18
+ mov r18, r21
+ GET_AFTER_BITS r21 r25
+ or r21, r19
+ stw r21, (r3, 0)
+ subi r2, 4
+ addi r1, 4
+ addi r3, 4
+ jbr .L_not_aligned_and_len_less_16bytes
+
+.size Wmemcpy, .-Wmemcpy
+
+libc_hidden_def(Wmemcpy)
+.weak Wmemcpy