summaryrefslogtreecommitdiff
path: root/libc/string/generic
diff options
context:
space:
mode:
authorCarmelo Amoroso <carmelo.amoroso@st.com>2008-09-09 16:55:27 +0000
committerCarmelo Amoroso <carmelo.amoroso@st.com>2008-09-09 16:55:27 +0000
commite4f55f33f69fce85099dd5936cc74856aa1b453d (patch)
treef35d6680197aa43098c99291e909150bf52f409a /libc/string/generic
parenta79016198c859a3388584ac7782d760f349e2d67 (diff)
Add optimized memcpy implementation for sh4 (from Stuart Menefy @STMicroelectronics).
This implementation is based on 'backward copying'. Signed-off-by: Carmelo Amoroso <carmelo.amoroso@st.com>
Diffstat (limited to 'libc/string/generic')
-rw-r--r--libc/string/generic/_memcpy_fwd.c185
-rw-r--r--libc/string/generic/memcopy.h3
-rw-r--r--libc/string/generic/memcpy.c186
-rw-r--r--libc/string/generic/memmove.c17
4 files changed, 201 insertions, 190 deletions
diff --git a/libc/string/generic/_memcpy_fwd.c b/libc/string/generic/_memcpy_fwd.c
new file mode 100644
index 000000000..470165a57
--- /dev/null
+++ b/libc/string/generic/_memcpy_fwd.c
@@ -0,0 +1,185 @@
+/* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to
+ block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
+ Both SRCP and DSTP should be aligned for memory operations on `op_t's. */
+
+static void _wordcopy_fwd_aligned (long int dstp, long int srcp, size_t len)
+{
+ op_t a0, a1;
+
+ switch (len % 8)
+ {
+ case 2:
+ a0 = ((op_t *) srcp)[0];
+ srcp -= 6 * OPSIZ;
+ dstp -= 7 * OPSIZ;
+ len += 6;
+ goto do1;
+ case 3:
+ a1 = ((op_t *) srcp)[0];
+ srcp -= 5 * OPSIZ;
+ dstp -= 6 * OPSIZ;
+ len += 5;
+ goto do2;
+ case 4:
+ a0 = ((op_t *) srcp)[0];
+ srcp -= 4 * OPSIZ;
+ dstp -= 5 * OPSIZ;
+ len += 4;
+ goto do3;
+ case 5:
+ a1 = ((op_t *) srcp)[0];
+ srcp -= 3 * OPSIZ;
+ dstp -= 4 * OPSIZ;
+ len += 3;
+ goto do4;
+ case 6:
+ a0 = ((op_t *) srcp)[0];
+ srcp -= 2 * OPSIZ;
+ dstp -= 3 * OPSIZ;
+ len += 2;
+ goto do5;
+ case 7:
+ a1 = ((op_t *) srcp)[0];
+ srcp -= 1 * OPSIZ;
+ dstp -= 2 * OPSIZ;
+ len += 1;
+ goto do6;
+
+ case 0:
+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+ return;
+ a0 = ((op_t *) srcp)[0];
+ srcp -= 0 * OPSIZ;
+ dstp -= 1 * OPSIZ;
+ goto do7;
+ case 1:
+ a1 = ((op_t *) srcp)[0];
+ srcp -=-1 * OPSIZ;
+ dstp -= 0 * OPSIZ;
+ len -= 1;
+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+ goto do0;
+ goto do8; /* No-op. */
+ }
+
+ do
+ {
+ do8:
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = a1;
+ do7:
+ a1 = ((op_t *) srcp)[1];
+ ((op_t *) dstp)[1] = a0;
+ do6:
+ a0 = ((op_t *) srcp)[2];
+ ((op_t *) dstp)[2] = a1;
+ do5:
+ a1 = ((op_t *) srcp)[3];
+ ((op_t *) dstp)[3] = a0;
+ do4:
+ a0 = ((op_t *) srcp)[4];
+ ((op_t *) dstp)[4] = a1;
+ do3:
+ a1 = ((op_t *) srcp)[5];
+ ((op_t *) dstp)[5] = a0;
+ do2:
+ a0 = ((op_t *) srcp)[6];
+ ((op_t *) dstp)[6] = a1;
+ do1:
+ a1 = ((op_t *) srcp)[7];
+ ((op_t *) dstp)[7] = a0;
+
+ srcp += 8 * OPSIZ;
+ dstp += 8 * OPSIZ;
+ len -= 8;
+ }
+ while (len != 0);
+
+ /* This is the right position for do0. Please don't move
+ it into the loop. */
+ do0:
+ ((op_t *) dstp)[0] = a1;
+}
+
+/* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
+ block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
+ DSTP should be aligned for memory operations on `op_t's, but SRCP must
+ *not* be aligned. */
+
+static void _wordcopy_fwd_dest_aligned (long int dstp, long int srcp, size_t len)
+{
+ op_t a0, a1, a2, a3;
+ int sh_1, sh_2;
+
+ /* Calculate how to shift a word read at the memory operation
+ aligned srcp to make it aligned for copy. */
+
+ sh_1 = 8 * (srcp % OPSIZ);
+ sh_2 = 8 * OPSIZ - sh_1;
+
+ /* Make SRCP aligned by rounding it down to the beginning of the `op_t'
+ it points in the middle of. */
+ srcp &= -OPSIZ;
+
+ switch (len % 4)
+ {
+ case 2:
+ a1 = ((op_t *) srcp)[0];
+ a2 = ((op_t *) srcp)[1];
+ srcp -= 1 * OPSIZ;
+ dstp -= 3 * OPSIZ;
+ len += 2;
+ goto do1;
+ case 3:
+ a0 = ((op_t *) srcp)[0];
+ a1 = ((op_t *) srcp)[1];
+ srcp -= 0 * OPSIZ;
+ dstp -= 2 * OPSIZ;
+ len += 1;
+ goto do2;
+ case 0:
+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+ return;
+ a3 = ((op_t *) srcp)[0];
+ a0 = ((op_t *) srcp)[1];
+ srcp -=-1 * OPSIZ;
+ dstp -= 1 * OPSIZ;
+ len += 0;
+ goto do3;
+ case 1:
+ a2 = ((op_t *) srcp)[0];
+ a3 = ((op_t *) srcp)[1];
+ srcp -=-2 * OPSIZ;
+ dstp -= 0 * OPSIZ;
+ len -= 1;
+ if (OP_T_THRES <= 3 * OPSIZ && len == 0)
+ goto do0;
+ goto do4; /* No-op. */
+ }
+
+ do
+ {
+ do4:
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2);
+ do3:
+ a1 = ((op_t *) srcp)[1];
+ ((op_t *) dstp)[1] = MERGE (a3, sh_1, a0, sh_2);
+ do2:
+ a2 = ((op_t *) srcp)[2];
+ ((op_t *) dstp)[2] = MERGE (a0, sh_1, a1, sh_2);
+ do1:
+ a3 = ((op_t *) srcp)[3];
+ ((op_t *) dstp)[3] = MERGE (a1, sh_1, a2, sh_2);
+
+ srcp += 4 * OPSIZ;
+ dstp += 4 * OPSIZ;
+ len -= 4;
+ }
+ while (len != 0);
+
+ /* This is the right position for do0. Please don't move
+ it into the loop. */
+ do0:
+ ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2);
+}
diff --git a/libc/string/generic/memcopy.h b/libc/string/generic/memcopy.h
index df1ba9a97..fab4da764 100644
--- a/libc/string/generic/memcopy.h
+++ b/libc/string/generic/memcopy.h
@@ -107,6 +107,7 @@ typedef unsigned char byte;
} \
} while (0)
+#ifdef __ARCH_HAS_BWD_MEMCPY__
/* Copy *up to* NBYTES bytes from SRC_BP to DST_BP, with
the assumption that DST_BP is aligned on an OPSIZ multiple. If
not all bytes could be easily copied, store remaining number of bytes
@@ -125,6 +126,8 @@ typedef unsigned char byte;
(nbytes_left) = (nbytes) % OPSIZ; \
} while (0)
+#endif
+
/* Copy *up to* NBYTES_TO_COPY bytes from SRC_END_PTR to DST_END_PTR,
beginning at the words (of type op_t) right before the pointers and
continuing towards smaller addresses. May take advantage of that
diff --git a/libc/string/generic/memcpy.c b/libc/string/generic/memcpy.c
index fa6606ceb..4284f2fe5 100644
--- a/libc/string/generic/memcpy.c
+++ b/libc/string/generic/memcpy.c
@@ -25,192 +25,6 @@
/* Experimentally off - libc_hidden_proto(memcpy) */
-/* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to
- block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
- Both SRCP and DSTP should be aligned for memory operations on `op_t's. */
-
-static void _wordcopy_fwd_aligned (long int dstp, long int srcp, size_t len)
-{
- op_t a0, a1;
-
- switch (len % 8)
- {
- case 2:
- a0 = ((op_t *) srcp)[0];
- srcp -= 6 * OPSIZ;
- dstp -= 7 * OPSIZ;
- len += 6;
- goto do1;
- case 3:
- a1 = ((op_t *) srcp)[0];
- srcp -= 5 * OPSIZ;
- dstp -= 6 * OPSIZ;
- len += 5;
- goto do2;
- case 4:
- a0 = ((op_t *) srcp)[0];
- srcp -= 4 * OPSIZ;
- dstp -= 5 * OPSIZ;
- len += 4;
- goto do3;
- case 5:
- a1 = ((op_t *) srcp)[0];
- srcp -= 3 * OPSIZ;
- dstp -= 4 * OPSIZ;
- len += 3;
- goto do4;
- case 6:
- a0 = ((op_t *) srcp)[0];
- srcp -= 2 * OPSIZ;
- dstp -= 3 * OPSIZ;
- len += 2;
- goto do5;
- case 7:
- a1 = ((op_t *) srcp)[0];
- srcp -= 1 * OPSIZ;
- dstp -= 2 * OPSIZ;
- len += 1;
- goto do6;
-
- case 0:
- if (OP_T_THRES <= 3 * OPSIZ && len == 0)
- return;
- a0 = ((op_t *) srcp)[0];
- srcp -= 0 * OPSIZ;
- dstp -= 1 * OPSIZ;
- goto do7;
- case 1:
- a1 = ((op_t *) srcp)[0];
- srcp -=-1 * OPSIZ;
- dstp -= 0 * OPSIZ;
- len -= 1;
- if (OP_T_THRES <= 3 * OPSIZ && len == 0)
- goto do0;
- goto do8; /* No-op. */
- }
-
- do
- {
- do8:
- a0 = ((op_t *) srcp)[0];
- ((op_t *) dstp)[0] = a1;
- do7:
- a1 = ((op_t *) srcp)[1];
- ((op_t *) dstp)[1] = a0;
- do6:
- a0 = ((op_t *) srcp)[2];
- ((op_t *) dstp)[2] = a1;
- do5:
- a1 = ((op_t *) srcp)[3];
- ((op_t *) dstp)[3] = a0;
- do4:
- a0 = ((op_t *) srcp)[4];
- ((op_t *) dstp)[4] = a1;
- do3:
- a1 = ((op_t *) srcp)[5];
- ((op_t *) dstp)[5] = a0;
- do2:
- a0 = ((op_t *) srcp)[6];
- ((op_t *) dstp)[6] = a1;
- do1:
- a1 = ((op_t *) srcp)[7];
- ((op_t *) dstp)[7] = a0;
-
- srcp += 8 * OPSIZ;
- dstp += 8 * OPSIZ;
- len -= 8;
- }
- while (len != 0);
-
- /* This is the right position for do0. Please don't move
- it into the loop. */
- do0:
- ((op_t *) dstp)[0] = a1;
-}
-
-/* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
- block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
- DSTP should be aligned for memory operations on `op_t's, but SRCP must
- *not* be aligned. */
-
-static void _wordcopy_fwd_dest_aligned (long int dstp, long int srcp, size_t len)
-{
- op_t a0, a1, a2, a3;
- int sh_1, sh_2;
-
- /* Calculate how to shift a word read at the memory operation
- aligned srcp to make it aligned for copy. */
-
- sh_1 = 8 * (srcp % OPSIZ);
- sh_2 = 8 * OPSIZ - sh_1;
-
- /* Make SRCP aligned by rounding it down to the beginning of the `op_t'
- it points in the middle of. */
- srcp &= -OPSIZ;
-
- switch (len % 4)
- {
- case 2:
- a1 = ((op_t *) srcp)[0];
- a2 = ((op_t *) srcp)[1];
- srcp -= 1 * OPSIZ;
- dstp -= 3 * OPSIZ;
- len += 2;
- goto do1;
- case 3:
- a0 = ((op_t *) srcp)[0];
- a1 = ((op_t *) srcp)[1];
- srcp -= 0 * OPSIZ;
- dstp -= 2 * OPSIZ;
- len += 1;
- goto do2;
- case 0:
- if (OP_T_THRES <= 3 * OPSIZ && len == 0)
- return;
- a3 = ((op_t *) srcp)[0];
- a0 = ((op_t *) srcp)[1];
- srcp -=-1 * OPSIZ;
- dstp -= 1 * OPSIZ;
- len += 0;
- goto do3;
- case 1:
- a2 = ((op_t *) srcp)[0];
- a3 = ((op_t *) srcp)[1];
- srcp -=-2 * OPSIZ;
- dstp -= 0 * OPSIZ;
- len -= 1;
- if (OP_T_THRES <= 3 * OPSIZ && len == 0)
- goto do0;
- goto do4; /* No-op. */
- }
-
- do
- {
- do4:
- a0 = ((op_t *) srcp)[0];
- ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2);
- do3:
- a1 = ((op_t *) srcp)[1];
- ((op_t *) dstp)[1] = MERGE (a3, sh_1, a0, sh_2);
- do2:
- a2 = ((op_t *) srcp)[2];
- ((op_t *) dstp)[2] = MERGE (a0, sh_1, a1, sh_2);
- do1:
- a3 = ((op_t *) srcp)[3];
- ((op_t *) dstp)[3] = MERGE (a1, sh_1, a2, sh_2);
-
- srcp += 4 * OPSIZ;
- dstp += 4 * OPSIZ;
- len -= 4;
- }
- while (len != 0);
-
- /* This is the right position for do0. Please don't move
- it into the loop. */
- do0:
- ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2);
-}
-
void *memcpy (void *dstpp, const void *srcpp, size_t len)
{
unsigned long int dstp = (long int) dstpp;
diff --git a/libc/string/generic/memmove.c b/libc/string/generic/memmove.c
index b2a017b16..7f945b150 100644
--- a/libc/string/generic/memmove.c
+++ b/libc/string/generic/memmove.c
@@ -24,12 +24,18 @@
#include "memcopy.h"
#include "pagecopy.h"
+#ifdef __ARCH_HAS_BWD_MEMCPY__
+/* generic-opt memmove assumes memcpy does forward copying! */
+#include "_memcpy_fwd.c"
+#endif
+
/* Experimentally off - libc_hidden_proto(memmove) */
/* Experimentally off - libc_hidden_proto(memcpy) */
static void _wordcopy_bwd_aligned (long int dstp, long int srcp, size_t len)
{
- op_t a0, a1;
+ op_t a0 = 0;
+ op_t a1 = 0;
switch (len % 8)
{
@@ -133,7 +139,10 @@ static void _wordcopy_bwd_aligned (long int dstp, long int srcp, size_t len)
static void _wordcopy_bwd_dest_aligned (long int dstp, long int srcp, size_t len)
{
- op_t a0, a1, a2, a3;
+ op_t a0 = 0;
+ op_t a1 = 0;
+ op_t a2 = 0;
+ op_t a3 = 0;
int sh_1, sh_2;
/* Calculate how to shift a word read at the memory operation
@@ -218,8 +227,8 @@ void *memmove (void *dest, const void *src, size_t len)
Reduces the working set. */
if (dstp - srcp >= len) /* *Unsigned* compare! */
{
-#if 1
-#warning REMINDER: generic-opt memmove assumes memcpy does forward copying!
+#ifndef __ARCH_HAS_BWD_MEMCPY__
+ /* Backward memcpy implementation cannot be used */
memcpy(dest, src, len);
#else
/* Copy from the beginning to the end. */