diff options
Diffstat (limited to 'libc/string/avr32/memcpy.S')
-rw-r--r-- | libc/string/avr32/memcpy.S | 111 |
1 files changed, 111 insertions, 0 deletions
diff --git a/libc/string/avr32/memcpy.S b/libc/string/avr32/memcpy.S new file mode 100644 index 000000000..f95aabd13 --- /dev/null +++ b/libc/string/avr32/memcpy.S @@ -0,0 +1,111 @@ +/* + * Copyright (C) 2004-2007 Atmel Corporation + * + * This file is subject to the terms and conditions of the GNU Lesser General + * Public License. See the file "COPYING.LIB" in the main directory of this + * archive for more details. + */ + +/* Don't use r12 as dst since we must return it unmodified */ +#define dst r9 +#define src r11 +#define len r10 + + .text + .global memcpy + .type memcpy, @function +memcpy: + pref src[0] + mov dst, r12 + + /* If we have less than 32 bytes, don't do anything fancy */ + cp.w len, 32 + brge .Lmore_than_31 + + sub len, 1 + retlt r12 +1: ld.ub r8, src++ + st.b dst++, r8 + sub len, 1 + brge 1b + retal r12 + +.Lmore_than_31: + pushm r0-r7, lr + + /* Check alignment */ + mov r8, src + andl r8, 31, COH + brne .Lunaligned_src + mov r8, dst + andl r8, 3, COH + brne .Lunaligned_dst + +.Laligned_copy: + sub len, 32 + brlt .Lless_than_32 + +1: /* Copy 32 bytes at a time */ + ldm src, r0-r7 + sub src, -32 + stm dst, r0-r7 + sub dst, -32 + sub len, 32 + brge 1b + +.Lless_than_32: + /* Copy 16 more bytes if possible */ + sub len, -16 + brlt .Lless_than_16 + ldm src, r0-r3 + sub src, -16 + sub len, 16 + stm dst, r0-r3 + sub dst, -16 + +.Lless_than_16: + /* Do the remaining as byte copies */ + neg len + add pc, pc, len << 2 + .rept 15 + ld.ub r0, src++ + st.b dst++, r0 + .endr + + popm r0-r7, pc + +.Lunaligned_src: + /* Make src cacheline-aligned. r8 = (src & 31) */ + rsub r8, r8, 32 + sub len, r8 +1: ld.ub r0, src++ + st.b dst++, r0 + sub r8, 1 + brne 1b + + /* If dst is word-aligned, we're ready to go */ + pref src[0] + mov r8, 3 + tst dst, r8 + breq .Laligned_copy + +.Lunaligned_dst: + /* src is aligned, but dst is not. Expect bad performance */ + sub len, 4 + brlt 2f +1: ld.w r0, src++ + st.w dst++, r0 + sub len, 4 + brge 1b + +2: neg len + add pc, pc, len << 2 + .rept 3 + ld.ub r0, src++ + st.b dst++, r0 + .endr + + popm r0-r7, pc + .size memcpy, . - memcpy + +libc_hidden_def(memcpy) |