From 1ce639990b6340a8b9f18d076c427fcf5a93695e Mon Sep 17 00:00:00 2001 From: Joern Rennecke Date: Fri, 13 Dec 2013 14:49:24 +0530 Subject: string: Add ARC support Signed-off-by: Joern Rennecke Signed-off-by: Vineet Gupta Signed-off-by: Bernhard Reutner-Fischer --- libc/string/arc/strcpy.S | 71 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 libc/string/arc/strcpy.S (limited to 'libc/string/arc/strcpy.S') diff --git a/libc/string/arc/strcpy.S b/libc/string/arc/strcpy.S new file mode 100644 index 000000000..241bf3ee6 --- /dev/null +++ b/libc/string/arc/strcpy.S @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com) + * Copyright (C) 2007 ARC International (UK) LTD + * + * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball. + */ + + +#include + +/* If dst and src are 4 byte aligned, copy 8 bytes at a time. + If the src is 4, but not 8 byte aligned, we first read 4 bytes to get + it 8 byte aligned. Thus, we can do a little read-ahead, without + dereferencing a cache line that we should not touch. + Note that short and long instructions have been scheduled to avoid + branch stalls. + The beq_s to r3z could be made unaligned & long to avoid a stall + there, but the it is not likely to be taken often, and it + would also be likey to cost an unaligned mispredict at the next call. */ + +ENTRY(strcpy) + or r2,r0,r1 + bmsk_s r2,r2,1 + brne.d r2,0,charloop + mov_s r10,r0 + ld_s r3,[r1,0] + mov r8,0x01010101 + bbit0.d r1,2,loop_start + ror r12,r8 + sub r2,r3,r8 + bic_s r2,r2,r3 + tst_s r2,r12 + bne r3z + mov_s r4,r3 + .balign 4 +loop: + ld.a r3,[r1,4] + st.ab r4,[r10,4] +loop_start: + ld.a r4,[r1,4] + sub r2,r3,r8 + bic_s r2,r2,r3 + tst_s r2,r12 + bne_s r3z + st.ab r3,[r10,4] + sub r2,r4,r8 + bic r2,r2,r4 + tst r2,r12 + beq loop + mov_s r3,r4 +#ifdef __LITTLE_ENDIAN__ +r3z: bmsk.f r1,r3,7 + lsr_s r3,r3,8 +#else +r3z: lsr.f r1,r3,24 + asl_s r3,r3,8 +#endif + bne.d r3z + stb.ab r1,[r10,1] + j_s [blink] + + .balign 4 +charloop: + ldb.ab r3,[r1,1] + + + brne.d r3,0,charloop + stb.ab r3,[r10,1] + j [blink] +END(strcpy) +libc_hidden_def(strcpy) -- cgit v1.2.3