blob: 22794ec33bd31a249b3d21a44ef7633141389d29 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
|
/*
* Copyright (C) 2004 Joakim Tjernlund
* Copyright (C) 2000-2005 Erik Andersen <andersen@uclibc.org>
*
* Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
*/
/* These are carefully optimized mem*() functions for PPC written in C.
* Don't muck around with these function without checking the generated
* assembler code.
* It is possible to optimize these significantly more by using specific
* data cache instructions(mainly dcbz). However that requires knownledge
* about the CPU's cache line size.
*
* BUG ALERT!
* The cache instructions on MPC8xx CPU's are buggy(they don't update
* the DAR register when causing a DTLB Miss/Error) and cannot be
* used on 8xx CPU's without a kernel patch to work around this
* problem.
*/
#include <string.h>
/* PPC can do pre increment and load/store, but not post increment and
load/store. Therefore use *++ptr instead of *ptr++. */
void *memcpy(void *to, const void *from, size_t len)
{
unsigned long rem, chunks, tmp1, tmp2;
unsigned char *tmp_to;
unsigned char *tmp_from = (unsigned char *)from;
chunks = len / 8;
tmp_from -= 4;
tmp_to = to - 4;
if (!chunks)
goto lessthan8;
rem = (unsigned long )tmp_to % 4;
if (rem)
goto align;
copy_chunks:
do {
/* make gcc to load all data, then store it */
tmp1 = *(unsigned long *)(tmp_from+4);
tmp_from += 8;
tmp2 = *(unsigned long *)tmp_from;
*(unsigned long *)(tmp_to+4) = tmp1;
tmp_to += 8;
*(unsigned long *)tmp_to = tmp2;
} while (--chunks);
lessthan8:
len = len % 8;
if (len >= 4) {
tmp_from += 4;
tmp_to += 4;
*(unsigned long *)(tmp_to) = *(unsigned long *)(tmp_from);
len -= 4;
}
if (!len)
return to;
tmp_from += 3;
tmp_to += 3;
do {
*++tmp_to = *++tmp_from;
} while (--len);
return to;
align:
/* ???: Do we really need to generate the carry flag here? If not, then:
rem -= 4; */
rem = 4 - rem;
len -= rem;
do {
*(tmp_to+4) = *(tmp_from+4);
++tmp_from;
++tmp_to;
} while (--rem);
chunks = len / 8;
if (chunks)
goto copy_chunks;
goto lessthan8;
}
libc_hidden_def(memcpy)
|