summaryrefslogtreecommitdiff
path: root/libc/string/arc/strcmp.S
diff options
context:
space:
mode:
Diffstat (limited to 'libc/string/arc/strcmp.S')
-rw-r--r--libc/string/arc/strcmp.S81
1 files changed, 79 insertions, 2 deletions
diff --git a/libc/string/arc/strcmp.S b/libc/string/arc/strcmp.S
index 5a0e56045..ad38d9e00 100644
--- a/libc/string/arc/strcmp.S
+++ b/libc/string/arc/strcmp.S
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2013, 2014-2015, 2017 Synopsys, Inc. (www.synopsys.com)
* Copyright (C) 2007 ARC International (UK) LTD
*
* Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
@@ -8,6 +8,13 @@
#include <features.h>
#include <sysdep.h>
+#if !defined(__ARC700__) && !defined(__ARCHS__)
+#error "Neither ARC700 nor ARCHS is defined!"
+#endif
+
+ENTRY(strcmp)
+
+#ifdef __ARC700__
/* This is optimized primarily for the ARC700.
It would be possible to speed up the loops by one cycle / word
respective one cycle / byte by forcing double source 1 alignment, unrolling
@@ -15,7 +22,6 @@
source 1; however, that would increase the overhead for loop setup / finish,
and strcmp might often terminate early. */
-ENTRY(strcmp)
or r2,r0,r1
bmsk_s r2,r2,1
brne r2,0,.Lcharloop
@@ -93,6 +99,77 @@ ENTRY(strcmp)
.Lcmpend:
j_s.d [blink]
sub r0,r2,r3
+#endif /* __ARC700__ */
+
+#ifdef __ARCHS__
+ or r2, r0, r1
+ bmsk_s r2, r2, 1
+ brne r2, 0, @.Lcharloop
+
+;;; s1 and s2 are word aligned
+ ld.ab r2, [r0, 4]
+
+ mov_s r12, 0x01010101
+ ror r11, r12
+ .align 4
+.LwordLoop:
+ ld.ab r3, [r1, 4]
+ ;; Detect NULL char in str1
+ sub r4, r2, r12
+ ld.ab r5, [r0, 4]
+ bic r4, r4, r2
+ and r4, r4, r11
+ brne.d.nt r4, 0, .LfoundNULL
+ ;; Check if the read locations are the same
+ cmp r2, r3
+ beq.d .LwordLoop
+ mov.eq r2, r5
+
+ ;; A match is found, spot it out
+#ifdef __LITTLE_ENDIAN__
+ swape r3, r3
+ mov_s r0, 1
+ swape r2, r2
+#else
+ mov_s r0, 1
+#endif
+ cmp_s r2, r3
+ j_s.d [blink]
+ bset.lo r0, r0, 31
+
+ .align 4
+.LfoundNULL:
+#ifdef __BIG_ENDIAN__
+ swape r4, r4
+ swape r2, r2
+ swape r3, r3
+#endif
+ ;; Find null byte
+ ffs r0, r4
+ bmsk r2, r2, r0
+ bmsk r3, r3, r0
+ swape r2, r2
+ swape r3, r3
+ ;; make the return value
+ sub.f r0, r2, r3
+ mov.hi r0, 1
+ j_s.d [blink]
+ bset.lo r0, r0, 31
+
+ .align 4
+.Lcharloop:
+ ldb.ab r2, [r0, 1]
+ ldb.ab r3, [r1, 1]
+ nop
+ breq r2, 0, .Lcmpend
+ breq r2, r3, .Lcharloop
+
+ .align 4
+.Lcmpend:
+ j_s.d [blink]
+ sub r0, r2, r3
+#endif /* __ARCHS__ */
+
END(strcmp)
libc_hidden_def(strcmp)