summaryrefslogtreecommitdiff
path: root/libc/string/arc/memcmp.S
diff options
context:
space:
mode:
authorPavel Kozlov <pavel.kozlov@synopsys.com>2022-10-07 13:43:45 +0400
committerWaldemar Brodkorb <wbx@openadk.org>2022-10-14 09:47:02 +0200
commit663b8a0497c40a20668258bd69db13924c569c41 (patch)
treec494a97dedbfa9ae8aa72b3c7f25b05490ec8130 /libc/string/arc/memcmp.S
parentde6be7bc60f190a0d746945a3a5a143bc93a1a65 (diff)
arc: add optimized string functions for ARCv3
Add ability to use optimized versions of string functions for ARCv3 32-bit CPUs with UCLIBC_HAS_STRING_ARCH_OPT option. Add optimized memcpy/memset/memcmp code for ARCv3 CPUs based on the code from newlib and adapt for ARCv3 existed optimized strchr/strcmp/strcpy/strlen. Link to the Synopsys newlib repo with code for ARCv3 on GitHub: https://github.com/foss-for-synopsys-dwc-arc-processors/newlib Signed-off-by: Pavel Kozlov <pavel.kozlov@synopsys.com>
Diffstat (limited to 'libc/string/arc/memcmp.S')
-rw-r--r--libc/string/arc/memcmp.S94
1 files changed, 93 insertions, 1 deletions
diff --git a/libc/string/arc/memcmp.S b/libc/string/arc/memcmp.S
index a60757e7a..20122a296 100644
--- a/libc/string/arc/memcmp.S
+++ b/libc/string/arc/memcmp.S
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2013, 2022 Synopsys, Inc. (www.synopsys.com)
* Copyright (C) 2007 ARC International (UK) LTD
*
* Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
@@ -17,6 +17,8 @@
#endif
ENTRY(memcmp)
+
+#if defined(__ARC700__) || defined(__ARCHS__)
or r12,r0,r1
asl_s r12,r12,30
sub r3,r2,1
@@ -149,6 +151,96 @@ ENTRY(memcmp)
.Lnil:
j_s.d [blink]
mov r0,0
+
+#elif (__ARC64_ARCH32__)
+ ;; Based on Synopsys code from newlib's arc64/memcmp.S
+ cmp r2, 32
+ bls.d @.L_compare_1_bytes
+ mov r3, r0 ; "r0" will be used as return value
+
+ lsr r12, r2, 4 ; counter for 16-byte chunks
+ xor r13, r13, r13 ; the mask showing inequal registers
+
+.L_compare_16_bytes:
+ ld.ab r4, [r3, +4]
+ ld.ab r5, [r1, +4]
+ ld.ab r6, [r3, +4]
+ ld.ab r7, [r1, +4]
+ ld.ab r8, [r3, +4]
+ ld.ab r9, [r1, +4]
+ ld.ab r10, [r3, +4]
+ ld.ab r11, [r1, +4]
+ xor.f 0, r4, r5
+ xor.ne r13, r13, 0b0001
+ xor.f 0, r6, r7
+ xor.ne r13, r13, 0b0010
+ xor.f 0, r8, r9
+ xor.ne r13, r13, 0b0100
+ xor.f 0, r10, r11
+ xor.ne r13, r13, 0b1000
+ brne r13, 0, @.L_unequal_find
+ dbnz r12, @.L_compare_16_bytes
+
+ ;; Adjusting the pointers because of the extra loads in the end
+ sub r1, r1, 4
+ sub r3, r3, 4
+ bmsk_s r2, r2, 3 ; any remaining bytes to compare
+
+.L_compare_1_bytes:
+ cmp r2, 0
+ jeq.d [blink]
+ xor_s r0, r0, r0
+
+2:
+ ldb.ab r4, [r3, +1]
+ ldb.ab r5, [r1, +1]
+ sub.f r0, r4, r5
+ jne [blink]
+ dbnz r2, @2b
+ j_s [blink]
+
+ ;; At this point, we want to find the _first_ comparison that marked the
+ ;; inequality of "lhs" and "rhs"
+.L_unequal_find:
+ ffs r13, r13
+ asl r13, r13, 2
+ bi [r13]
+.L_unequal_r4r5:
+ mov r1, r4
+ b.d @.L_diff_byte_in_regs
+ mov r2, r5
+ nop
+.L_unequal_r6r7:
+ mov r1, r6
+ b.d @.L_diff_byte_in_regs
+ mov r2, r7
+ nop
+.L_unequal_r8r9:
+ mov r1, r8
+ b.d @.L_diff_byte_in_regs
+ mov r2, r9
+ nop
+.L_unequal_r10r11:
+ mov r1, r10
+ mov r2, r11
+
+ ;; fall-through
+ ;; If we're here, that means the two operands are not equal.
+.L_diff_byte_in_regs:
+ xor r0, r1, r2
+ ffs r0, r0
+ and r0, r0, 0x18
+ lsr r1, r1, r0
+ lsr r2, r2, r0
+ bmsk_s r1, r1, 7
+ bmsk_s r2, r2, 7
+ j_s.d [blink]
+ sub r0, r1, r2
+
+#else
+#error "Unsupported ARC CPU type"
+#endif
+
END(memcmp)
libc_hidden_def(memcmp)