/*
 * Copyright (C) 2004-2007 Atmel Corporation
 *
 * This file is subject to the terms and conditions of the GNU Lesser General
 * Public License.  See the file "COPYING.LIB" in the main directory of this
 * archive for more details.
 */

#define dst r12
#define src r11
#define len r10

	.text
	.global memmove
	.type	memmove, @function
memmove:
	cp.w	src, dst
	brge	HIDDEN_JUMPTARGET(memcpy)

	add	dst, len
	add	src, len
	pref	src[-1]

	/*
	 * The rest is basically the same as in memcpy.S except that
	 * the direction is reversed.
	 */
	cp.w	len, 32
	brge	.Lmore_than_31

	sub	len, 1
	retlt	r12
1:	ld.ub	r8, --src
	st.b	--dst, r8
	sub	len, 1
	brge	1b
	retal	r12

.Lmore_than_31:
	pushm	r0-r7, lr

	/* Check alignment */
	mov	r8, src
	andl	r8, 31, COH
	brne	.Lunaligned_src
	mov	r8, r12
	andl	r8, 3, COH
	brne	.Lunaligned_dst

.Laligned_copy:
	sub	len, 32
	brlt	.Lless_than_32

1:	/* Copy 32 bytes at a time */
	sub	src, 32
	ldm	src, r0-r7
	sub	dst, 32
	sub	len, 32
	stm	dst, r0-r7
	brge	1b

.Lless_than_32:
	/* Copy 16 more bytes if possible */
	sub	len, -16
	brlt	.Lless_than_16
	sub	src, 16
	ldm	src, r0-r3
	sub	dst, 16
	sub	len, 16
	stm	dst, r0-r3

.Lless_than_16:
	/* Do the remaining as byte copies */
	sub	len, -16
	breq	2f
1:	ld.ub	r0, --src
	st.b	--dst, r0
	sub	len, 1
	brne	1b

2:	popm	r0-r7, pc

.Lunaligned_src:
	/* Make src cacheline-aligned. r8 = (src & 31) */
	sub	len, r8
1:	ld.ub	r0, --src
	st.b	--dst, r0
	sub	r8, 1
	brne	1b

	/* If dst is word-aligned, we're ready to go */
	pref	src[-4]
	mov	r8, 3
	tst	dst, r8
	breq	.Laligned_copy

.Lunaligned_dst:
	/* src is aligned, but dst is not. Expect bad performance */
	sub	len, 4
	brlt	2f
1:	ld.w	r0, --src
	st.w	--dst, r0
	sub	len, 4
	brge	1b

2:	neg	len
	add	pc, pc, len << 2
	.rept	3
	ld.ub	r0, --src
	st.b	--dst, r0
	.endr

	popm	r0-r7, pc
	.size	memmove, . - memmove

libc_hidden_def(memmove)