From e34f25503ad8f9d5b7e8d1801cbe345ce3daa949 Mon Sep 17 00:00:00 2001
From: Denis Vlasenko <vda.linux@googlemail.com>
Date: Tue, 9 Dec 2008 13:23:59 +0000
Subject: smaller memcpy

    text           data     bss     dec     hex filename
-     39              0       0      39      27 libc/string/i386/memcpy.os
+     35              0       0      35      23 libc/string/i386/memcpy.os
---
 libc/string/i386/memcpy.c | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

(limited to 'libc')

diff --git a/libc/string/i386/memcpy.c b/libc/string/i386/memcpy.c
index 285583f3b..216ddfd1a 100644
--- a/libc/string/i386/memcpy.c
+++ b/libc/string/i386/memcpy.c
@@ -35,19 +35,20 @@
 /* Experimentally off - libc_hidden_proto(memcpy) */
 void *memcpy(void * to, const void * from, size_t n)
 {
-    int d0, d1, d2;
-    __asm__ __volatile__(
-	    "rep ; movsl\n\t"
-	    "testb $2,%b4\n\t"
-	    "je 1f\n\t"
-	    "movsw\n"
-	    "1:\ttestb $1,%b4\n\t"
-	    "je 2f\n\t"
-	    "movsb\n"
-	    "2:"
-	    : "=&c" (d0), "=&D" (d1), "=&S" (d2)
-	    :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
-	    : "memory");
-    return (to);
+	int d0, d1, d2;
+	__asm__ __volatile__(
+		"	rep; movsl\n"
+		"	movl %4,%%ecx\n"
+		"	andl $3,%%ecx\n"
+		/* jz is optional. avoids "rep; movsb" with ecx == 0,
+		 * but adds a branch, which is currently (2008) faster */
+		"	jz 1f\n"
+		"	rep; movsb\n"
+		"1:\n"
+		: "=&c" (d0), "=&D" (d1), "=&S" (d2)
+		: "0" (n / 4), "g" (n), "1" ((long)to), "2" ((long)from)
+		: "memory"
+	);
+	return to;
 }
 libc_hidden_def(memcpy)
-- 
cgit v1.2.3