From 2e6cfb475a941a7b6429c71b245f5452909437dd Mon Sep 17 00:00:00 2001 From: Eric Andersen Date: Tue, 27 Jan 2004 07:36:19 +0000 Subject: Joakim Tjernlund writes: Hi Erik I have had some fun trying to optimize memcpy, memset and memmove for PPC. There are only boot tested, but I don't expect any problems :) Read the comments in powerpc/string.c for more info. Patch is relative to libc/string Jocke --- libc/string/powerpc/Makefile | 43 +++++++++ libc/string/powerpc/string.c | 201 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 244 insertions(+) create mode 100644 libc/string/powerpc/Makefile create mode 100644 libc/string/powerpc/string.c (limited to 'libc/string/powerpc') diff --git a/libc/string/powerpc/Makefile b/libc/string/powerpc/Makefile new file mode 100644 index 000000000..d99798694 --- /dev/null +++ b/libc/string/powerpc/Makefile @@ -0,0 +1,43 @@ +# Makefile for uClibc +# +# Copyright (C) 2000-2003 Erik Andersen +# +# This program is free software; you can redistribute it and/or modify it under +# the terms of the GNU Library General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) any +# later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more +# details. +# +# You should have received a copy of the GNU Library General Public License +# along with this program; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +TOPDIR=../../../ +include $(TOPDIR)Rules.mak + +MSRC= string.c +MOBJ= memcpy.o memmove.o memset.o bzero.o +OBJS=$(MOBJ) + +all: $(OBJS) $(LIBC) + +$(LIBC): ar-target + +ar-target: $(OBJS) + $(AR) $(ARFLAGS) $(LIBC) $(OBJS) + +$(MOBJ): $(MSRC) + $(CC) $(CFLAGS) -DL_$* $< -c -o $*.o + $(STRIPTOOL) -x -R .note -R .comment $*.o + +$(COBJS): %.o : %.c + $(CC) $(CFLAGS) -c $< -o $@ + $(STRIPTOOL) -x -R .note -R .comment $*.o + +clean: + $(RM) *.[oa] *~ core + diff --git a/libc/string/powerpc/string.c b/libc/string/powerpc/string.c new file mode 100644 index 000000000..32485670d --- /dev/null +++ b/libc/string/powerpc/string.c @@ -0,0 +1,201 @@ +/* + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU Library General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more + * details. + */ + +/* These are carefully optimized mem*() functions for PPC written in C. + * Don't muck around with these function without checking the generated + * assmbler code. + * It is possible to optimize these significantly more by using specific + * data cache instructions(mainly dcbz). However that requires knownledge + * about the CPU's cache line size. + * + * BUG ALERT! + * The cache instructions on MPC8xx CPU's are buggy(they don't update + * the DAR register when causing a DTLB Miss/Error) and cannot be + * used on 8xx CPU's without a kernel patch to work around this + * problem. + * + * Copyright (C) 2004 Joakim Tjernlund + */ + +#define _STDIO_UTILITY +#define _GNU_SOURCE +#include +#include /* for __LOCALE_C_ONLY */ + +#ifdef L_memcpy +void *memcpy(void *to, const void *from, size_t n) +/* PPC can do pre increment and load/store, but not post increment and load/store. + Therefore use *++ptr instead of *ptr++. */ +{ + unsigned long rem, chunks, tmp1, tmp2; + void *tmp_to; + + chunks = n / 8; + from -= 4; + tmp_to = to - 4; + if (!chunks) + goto lessthan8; + rem = (unsigned long )tmp_to % 4; + if (rem) + goto align; + copy_chunks: + do { + /* make gcc to load all data, then store it */ + tmp1 = *(unsigned long *)(from+4); + from += 8; + tmp2 = *(unsigned long *)from; + *(unsigned long *)(tmp_to+4) = tmp1; + tmp_to += 8; + *(unsigned long *)tmp_to = tmp2; + } while (--chunks); + lessthan8: + n = n % 8; + if (n >= 4) { + *++(unsigned long *)tmp_to = *++(unsigned long *)from; + n = n-4; + } + if (!n ) return to; + from += 3; + tmp_to += 3; + do { + *++(unsigned char *)tmp_to = *++(unsigned char *)from; + } while (--n); + + return to; + align: + rem = 4 - rem; + n = n-rem; + do { + *(unsigned char *)(tmp_to+4) = *(unsigned char *)(from+4); + ++from; + ++tmp_to; + } while (--rem); + chunks = n / 8; + if (chunks) + goto copy_chunks; + goto lessthan8; +} +#endif + +#ifdef L_memmove +void *memmove(void *to, const void *from, size_t n) +{ + unsigned long rem, chunks, tmp1, tmp2; + void *tmp_to; + + if (from >= to) + return memcpy(to, from, n); + chunks = n / 8; + from += n; + tmp_to = to + n; + if (!chunks) + goto lessthan8; + rem = (unsigned long )tmp_to % 4; + if (rem) + goto align; + copy_chunks: + do { + /* make gcc to load all data, then store it */ + tmp1 = *(unsigned long *)(from-4); + from -= 8; + tmp2 = *(unsigned long *)from; + *(unsigned long *)(tmp_to-4) = tmp1; + tmp_to -= 8; + *(unsigned long *)tmp_to = tmp2; + } while (--chunks); + lessthan8: + n = n % 8; + if (n >= 4) { + *--(unsigned long *)tmp_to = *--(unsigned long *)from; + n = n-4; + } + if (!n ) return to; + do { + *--(unsigned char *)tmp_to = *--(unsigned char *)from; + } while (--n); + + return to; + align: + rem = 4 - rem; + n = n-rem; + do { + *--(unsigned char *)tmp_to = *--(unsigned char *)from; + } while (--rem); + chunks = n / 8; + if (chunks) + goto copy_chunks; + goto lessthan8; +} +#endif + +#ifdef L_memset +static inline int expand_byte_word(int c){ + /* this does: + c = c << 8 | c; + c = c << 16 | c ; + */ + asm("rlwimi %0,%0,8,16,23\n" + "\trlwimi %0,%0,16,0,15\n" + : "=r" (c) : "0" (c)); + return c; +} +void *memset(void *to, int c, size_t n) +{ + unsigned long rem, chunks; + void *tmp_to; + + chunks = n / 8; + tmp_to = to - 4; + c = expand_byte_word(c); + if (!chunks) + goto lessthan8; + rem = (unsigned long )tmp_to % 4; + if (rem) + goto align; + copy_chunks: + do { + *++(unsigned long *)tmp_to = c; + *++(unsigned long *)tmp_to = c; + } while (--chunks); + lessthan8: + n = n % 8; + if (n >= 4) { + *++(unsigned long *)tmp_to = c; + n = n-4; + } + if (!n ) return to; + tmp_to += 3; + do { + *++(unsigned char *)tmp_to = c; + } while (--n); + + return to; + align: + rem = 4 - rem; + n = n-rem; + do { + *(unsigned char *)(tmp_to+4) = c; + ++tmp_to; + } while (--rem); + chunks = n / 8; + if (chunks) + goto copy_chunks; + goto lessthan8; +} +#endif + +#ifdef L_bzero +void bzero(void *s, size_t n) +{ + (void)memset(s, 0, n); +} +#endif -- cgit v1.2.3