summaryrefslogtreecommitdiff
path: root/libc/string/avr32/memmove.S
blob: 8ca4da54dbfc38b81b8985037fbd0d4a9525c81a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
/*
 * Copyright (C) 2004-2007 Atmel Corporation
 *
 * This file is subject to the terms and conditions of the GNU Lesser General
 * Public License.  See the file "COPYING.LIB" in the main directory of this
 * archive for more details.
 */

#define dst r12
#define src r11
#define len r10

       .text
       .global memmove
       .type   memmove, @function
memmove:
       cp.w    src, dst
       brge    HIDDEN_JUMPTARGET(memcpy)

       add     dst, len
       add     src, len
       pref    src[-1]

       /*
        * The rest is basically the same as in memcpy.S except that
        * the direction is reversed.
        */
       cp.w    len, 32
       brge    .Lmore_than_31

       sub     len, 1
       retlt   r12
1:     ld.ub   r8, --src
       st.b    --dst, r8
       sub     len, 1
       brge    1b
       retal   r12

.Lmore_than_31:
       pushm   r0-r7, lr

       /* Check alignment */
       mov     r8, src
       andl    r8, 31, COH
       brne    .Lunaligned_src
       mov     r8, r12
       andl    r8, 3, COH
       brne    .Lunaligned_dst

.Laligned_copy:
       sub     len, 32
       brlt    .Lless_than_32

1:     /* Copy 32 bytes at a time */
       sub     src, 32
       ldm     src, r0-r7
       sub     dst, 32
       sub     len, 32
       stm     dst, r0-r7
       brge    1b

.Lless_than_32:
       /* Copy 16 more bytes if possible */
       sub     len, -16
       brlt    .Lless_than_16
       sub     src, 16
       ldm     src, r0-r3
       sub     dst, 16
       sub     len, 16
       stm     dst, r0-r3

.Lless_than_16:
       /* Do the remaining as byte copies */
       sub     len, -16
       breq    2f
1:     ld.ub   r0, --src
       st.b    --dst, r0
       sub     len, 1
       brne    1b

2:     popm    r0-r7, pc

.Lunaligned_src:
       /* Make src cacheline-aligned. r8 = (src & 31) */
       sub     len, r8
1:     ld.ub   r0, --src
       st.b    --dst, r0
       sub     r8, 1
       brne    1b

       /* If dst is word-aligned, we're ready to go */
       pref    src[-4]
       mov     r8, 3
       tst     dst, r8
       breq    .Laligned_copy

.Lunaligned_dst:
       /* src is aligned, but dst is not. Expect bad performance */
       sub     len, 4
       brlt    2f
1:     ld.w    r0, --src
       st.w    --dst, r0
       sub     len, 4
       brge    1b

2:     neg     len
       add     pc, pc, len << 2
       .rept   3
       ld.ub   r0, --src
       st.b    --dst, r0
       .endr

       popm    r0-r7, pc
       .size   memmove, . - memmove

libc_hidden_def(memmove)