From 2baa0cccce48cc0c2fffcb0a6191dc383aceec72 Mon Sep 17 00:00:00 2001
From: Eric Andersen <andersen@codepoet.org>
Date: Fri, 14 May 2004 10:29:45 +0000
Subject: Alexandre Oliva writes:

This patch introduces optimized versions of memcpy and memset for
frv.
---
 libc/string/frv/memcpy.S | 124 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 124 insertions(+)
 create mode 100644 libc/string/frv/memcpy.S

(limited to 'libc/string/frv/memcpy.S')

diff --git a/libc/string/frv/memcpy.S b/libc/string/frv/memcpy.S
new file mode 100644
index 000000000..63cc523a9
--- /dev/null
+++ b/libc/string/frv/memcpy.S
@@ -0,0 +1,124 @@
+/* memcpy.S: optimised assembly memcpy
+ *
+ * Copyright (C) 2003, 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Library General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Library General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Library General Public
+ *  License along with this library; if not, write to the Free
+ *  Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+        .text
+        .p2align	4
+
+###############################################################################
+#
+# void *memcpy(void *to, const char *from, size_t count)
+#
+# - NOTE: must not use any stack. exception detection performs function return
+#         to caller's fixup routine, aborting the remainder of the copy
+#
+###############################################################################
+        .globl		memcpy
+        .type		memcpy,@function
+memcpy:
+	or.p		gr8,gr9,gr4
+	orcc		gr10,gr0,gr0,icc3
+	or.p		gr10,gr4,gr4
+	beqlr		icc3,#0
+
+	# optimise based on best common alignment for to, from & count
+	andicc.p	gr4,#0x1f,gr0,icc0
+	setlos		#8,gr11
+	andicc.p	gr4,#0x0f,gr0,icc1
+	beq		icc0,#0,memcpy_32
+	andicc.p	gr4,#0x07,gr0,icc0
+	beq		icc1,#0,memcpy_16
+	andicc.p	gr4,#0x03,gr0,icc1
+	beq		icc0,#0,memcpy_8
+	andicc.p	gr4,#0x01,gr0,icc0
+	beq		icc1,#0,memcpy_4
+	setlos.p	#1,gr11
+	beq		icc0,#0,memcpy_2
+
+	# do byte by byte copy
+	sub.p		gr8,gr11,gr3
+	sub		gr9,gr11,gr9
+0:	ldubu.p		@(gr9,gr11),gr4
+	subicc		gr10,#1,gr10,icc0
+	stbu.p		gr4,@(gr3,gr11)
+	bne		icc0,#2,0b
+	bralr
+
+	# do halfword by halfword copy
+memcpy_2:
+	setlos		#2,gr11
+	sub.p		gr8,gr11,gr3
+	sub		gr9,gr11,gr9
+0:	lduhu.p		@(gr9,gr11),gr4
+	subicc		gr10,#2,gr10,icc0
+	sthu.p		gr4,@(gr3,gr11)
+	bne		icc0,#2,0b
+	bralr
+
+	# do word by word copy
+memcpy_4:
+	setlos		#4,gr11
+	sub.p		gr8,gr11,gr3
+	sub		gr9,gr11,gr9
+0:	ldu.p		@(gr9,gr11),gr4
+	subicc		gr10,#4,gr10,icc0
+	stu.p		gr4,@(gr3,gr11)
+	bne		icc0,#2,0b
+	bralr
+
+	# do double-word by double-word copy
+memcpy_8:
+	sub.p		gr8,gr11,gr3
+	sub		gr9,gr11,gr9
+0:	lddu.p		@(gr9,gr11),gr4
+	subicc		gr10,#8,gr10,icc0
+	stdu.p		gr4,@(gr3,gr11)
+	bne		icc0,#2,0b
+	bralr
+
+	# do quad-word by quad-word copy
+memcpy_16:
+	sub.p		gr8,gr11,gr3
+	sub		gr9,gr11,gr9
+0:	lddu		@(gr9,gr11),gr4
+	lddu.p		@(gr9,gr11),gr6
+	subicc		gr10,#16,gr10,icc0
+	stdu		gr4,@(gr3,gr11)
+	stdu.p		gr6,@(gr3,gr11)
+	bne		icc0,#2,0b
+	bralr
+
+	# do eight-word by eight-word copy
+memcpy_32:
+	sub.p		gr8,gr11,gr3
+	sub		gr9,gr11,gr9
+0:	lddu		@(gr9,gr11),gr4
+	lddu		@(gr9,gr11),gr6
+	lddu		@(gr9,gr11),gr12
+	lddu.p		@(gr9,gr11),gr14
+	subicc		gr10,#32,gr10,icc0
+	stdu		gr4,@(gr3,gr11)
+	stdu		gr6,@(gr3,gr11)
+	stdu		gr12,@(gr3,gr11)
+	stdu.p		gr14,@(gr3,gr11)
+	bne		icc0,#2,0b
+	bralr
+
+	.size		memcpy, .-memcpy
-- 
cgit v1.2.3