diff options
author | Alexey Brodkin <Alexey.Brodkin@synopsys.com> | 2017-08-18 01:25:25 +0300 |
---|---|---|
committer | Waldemar Brodkorb <wbx@uclibc-ng.org> | 2017-08-20 12:53:17 +0200 |
commit | 26cc89d99cc9d783859eb9d38e067fad5d6bbb60 (patch) | |
tree | bb2f9a637c0f492eeb66465e975aa26f6207eb3c /libc/string/arc/arcv2/memcpy.S | |
parent | d9f7022736fd429c6c62c93441804dc87900ed6f (diff) |
arc: Merge ARCv2 string routines in generic ARC .S files
In cde74b83f9b2 "ARC: remove special CFLAGS/LDFLAGS handling" we
got rid of CONFIG_ARC_CPU_HS which was used to select ARCv2-specific
implementation of optimized string routines. So now ARCv2-tuned
memset/memcpy/strcmp are not used, instead those for ARC700 used for
both ARC700 and ARCHS.
Without uClibc config option we may only tell which CPU type we're
targeting by built-in defines of GCC. I.e. no more conditional file
inclusion in Makefiles. That leaves us only one option - merge both
implementations in 1 file and use ifdefs.
Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Diffstat (limited to 'libc/string/arc/arcv2/memcpy.S')
-rw-r--r-- | libc/string/arc/arcv2/memcpy.S | 236 |
1 files changed, 0 insertions, 236 deletions
diff --git a/libc/string/arc/arcv2/memcpy.S b/libc/string/arc/arcv2/memcpy.S deleted file mode 100644 index ba29e8790..000000000 --- a/libc/string/arc/arcv2/memcpy.S +++ /dev/null @@ -1,236 +0,0 @@ -/* - * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) - * - * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball. - */ - -#include <features.h> -#include <sysdep.h> - -#ifdef __LITTLE_ENDIAN__ -# define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; << -# define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >> -# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM -# define MERGE_2(RX,RY,IMM) -# define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF -# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM -#else -# define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >> -# define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; << -# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; << -# define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; << -# define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM -# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08 -#endif - -#if defined(__LL64__) || defined(__ARC_LL64__) -# define PREFETCH_READ(RX) prefetch [RX, 56] -# define PREFETCH_WRITE(RX) prefetchw [RX, 64] -# define LOADX(DST,RX) ldd.ab DST, [RX, 8] -# define STOREX(SRC,RX) std.ab SRC, [RX, 8] -# define ZOLSHFT 5 -# define ZOLAND 0x1F -#else -# define PREFETCH_READ(RX) prefetch [RX, 28] -# define PREFETCH_WRITE(RX) prefetchw [RX, 32] -# define LOADX(DST,RX) ld.ab DST, [RX, 4] -# define STOREX(SRC,RX) st.ab SRC, [RX, 4] -# define ZOLSHFT 4 -# define ZOLAND 0xF -#endif - -ENTRY(memcpy) - prefetch [r1] ; Prefetch the read location - prefetchw [r0] ; Prefetch the write location - mov.f 0, r2 -;;; if size is zero - jz.d [blink] - mov r3, r0 ; don't clobber ret val - -;;; if size <= 8 - cmp r2, 8 - bls.d @.Lsmallchunk - mov.f lp_count, r2 - - and.f r4, r0, 0x03 - rsub lp_count, r4, 4 - lpnz @.Laligndestination - ;; LOOP BEGIN - ldb.ab r5, [r1,1] - sub r2, r2, 1 - stb.ab r5, [r3,1] -.Laligndestination: - -;;; Check the alignment of the source - and.f r4, r1, 0x03 - bnz.d @.Lsourceunaligned - -;;; CASE 0: Both source and destination are 32bit aligned -;;; Convert len to Dwords, unfold x4 - lsr.f lp_count, r2, ZOLSHFT - lpnz @.Lcopy32_64bytes - ;; LOOP START - LOADX (r6, r1) - PREFETCH_READ (r1) - PREFETCH_WRITE (r3) - LOADX (r8, r1) - LOADX (r10, r1) - LOADX (r4, r1) - STOREX (r6, r3) - STOREX (r8, r3) - STOREX (r10, r3) - STOREX (r4, r3) -.Lcopy32_64bytes: - - and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes -.Lsmallchunk: - lpnz @.Lcopyremainingbytes - ;; LOOP START - ldb.ab r5, [r1,1] - stb.ab r5, [r3,1] -.Lcopyremainingbytes: - - j [blink] -;;; END CASE 0 - -.Lsourceunaligned: - cmp r4, 2 - beq.d @.LunalignedOffby2 - sub r2, r2, 1 - - bhi.d @.LunalignedOffby3 - ldb.ab r5, [r1, 1] - -;;; CASE 1: The source is unaligned, off by 1 - ;; Hence I need to read 1 byte for a 16bit alignment - ;; and 2bytes to reach 32bit alignment - ldh.ab r6, [r1, 2] - sub r2, r2, 2 - ;; Convert to words, unfold x2 - lsr.f lp_count, r2, 3 - MERGE_1 (r6, r6, 8) - MERGE_2 (r5, r5, 24) - or r5, r5, r6 - - ;; Both src and dst are aligned - lpnz @.Lcopy8bytes_1 - ;; LOOP START - ld.ab r6, [r1, 4] - prefetch [r1, 28] ;Prefetch the next read location - ld.ab r8, [r1,4] - prefetchw [r3, 32] ;Prefetch the next write location - - SHIFT_1 (r7, r6, 24) - or r7, r7, r5 - SHIFT_2 (r5, r6, 8) - - SHIFT_1 (r9, r8, 24) - or r9, r9, r5 - SHIFT_2 (r5, r8, 8) - - st.ab r7, [r3, 4] - st.ab r9, [r3, 4] -.Lcopy8bytes_1: - - ;; Write back the remaining 16bits - EXTRACT_1 (r6, r5, 16) - sth.ab r6, [r3, 2] - ;; Write back the remaining 8bits - EXTRACT_2 (r5, r5, 16) - stb.ab r5, [r3, 1] - - and.f lp_count, r2, 0x07 ;Last 8bytes - lpnz @.Lcopybytewise_1 - ;; LOOP START - ldb.ab r6, [r1,1] - stb.ab r6, [r3,1] -.Lcopybytewise_1: - j [blink] - -.LunalignedOffby2: -;;; CASE 2: The source is unaligned, off by 2 - ldh.ab r5, [r1, 2] - sub r2, r2, 1 - - ;; Both src and dst are aligned - ;; Convert to words, unfold x2 - lsr.f lp_count, r2, 3 -#ifdef __BIG_ENDIAN__ - asl.nz r5, r5, 16 -#endif - lpnz @.Lcopy8bytes_2 - ;; LOOP START - ld.ab r6, [r1, 4] - prefetch [r1, 28] ;Prefetch the next read location - ld.ab r8, [r1,4] - prefetchw [r3, 32] ;Prefetch the next write location - - SHIFT_1 (r7, r6, 16) - or r7, r7, r5 - SHIFT_2 (r5, r6, 16) - - SHIFT_1 (r9, r8, 16) - or r9, r9, r5 - SHIFT_2 (r5, r8, 16) - - st.ab r7, [r3, 4] - st.ab r9, [r3, 4] -.Lcopy8bytes_2: - -#ifdef __BIG_ENDIAN__ - lsr.nz r5, r5, 16 -#endif - sth.ab r5, [r3, 2] - - and.f lp_count, r2, 0x07 ;Last 8bytes - lpnz @.Lcopybytewise_2 - ;; LOOP START - ldb.ab r6, [r1,1] - stb.ab r6, [r3,1] -.Lcopybytewise_2: - j [blink] - -.LunalignedOffby3: -;;; CASE 3: The source is unaligned, off by 3 -;;; Hence, I need to read 1byte for achieve the 32bit alignment - - ;; Both src and dst are aligned - ;; Convert to words, unfold x2 - lsr.f lp_count, r2, 3 -#ifdef __BIG_ENDIAN__ - asl.ne r5, r5, 24 -#endif - lpnz @.Lcopy8bytes_3 - ;; LOOP START - ld.ab r6, [r1, 4] - prefetch [r1, 28] ;Prefetch the next read location - ld.ab r8, [r1,4] - prefetchw [r3, 32] ;Prefetch the next write location - - SHIFT_1 (r7, r6, 8) - or r7, r7, r5 - SHIFT_2 (r5, r6, 24) - - SHIFT_1 (r9, r8, 8) - or r9, r9, r5 - SHIFT_2 (r5, r8, 24) - - st.ab r7, [r3, 4] - st.ab r9, [r3, 4] -.Lcopy8bytes_3: - -#ifdef __BIG_ENDIAN__ - lsr.nz r5, r5, 24 -#endif - stb.ab r5, [r3, 1] - - and.f lp_count, r2, 0x07 ;Last 8bytes - lpnz @.Lcopybytewise_3 - ;; LOOP START - ldb.ab r6, [r1,1] - stb.ab r6, [r3,1] -.Lcopybytewise_3: - j [blink] - -END(memcpy) -libc_hidden_def(memcpy) |