From 4fcc031a7085a47b9a027a20a919574f8aab0768 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 30 May 2006 09:13:53 +0000 Subject: import some optimized functions from blackfin cvs --- libc/string/bfin/Makefile | 13 +++++ libc/string/bfin/memchr.S | 54 ++++++++++++++++++++ libc/string/bfin/memcmp.S | 101 +++++++++++++++++++++++++++++++++++++ libc/string/bfin/memcpy.S | 74 +++++++++++++++++++++++++++ libc/string/bfin/memmove.S | 95 +++++++++++++++++++++++++++++++++++ libc/string/bfin/memset.S | 86 ++++++++++++++++++++++++++++++++ libc/string/bfin/strcmp.S | 121 +++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 544 insertions(+) create mode 100644 libc/string/bfin/Makefile create mode 100644 libc/string/bfin/memchr.S create mode 100644 libc/string/bfin/memcmp.S create mode 100644 libc/string/bfin/memcpy.S create mode 100644 libc/string/bfin/memmove.S create mode 100644 libc/string/bfin/memset.S create mode 100644 libc/string/bfin/strcmp.S (limited to 'libc/string/bfin') diff --git a/libc/string/bfin/Makefile b/libc/string/bfin/Makefile new file mode 100644 index 000000000..0a95346fd --- /dev/null +++ b/libc/string/bfin/Makefile @@ -0,0 +1,13 @@ +# Makefile for uClibc +# +# Copyright (C) 2000-2005 Erik Andersen +# +# Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. +# + +top_srcdir:=../../../ +top_builddir:=../../../ +all: objs +include $(top_builddir)Rules.mak +include ../Makefile.in +include $(top_srcdir)Makerules diff --git a/libc/string/bfin/memchr.S b/libc/string/bfin/memchr.S new file mode 100644 index 000000000..6ecaf37a1 --- /dev/null +++ b/libc/string/bfin/memchr.S @@ -0,0 +1,54 @@ +/* memchr.S + * Copyright (C) 2003, 2005, 2006 Analog Devices Inc., All Rights Reserved. + * + * This file is subject to the terms and conditions of the GNU Library General + * Public License. See the file "COPYING.LIB" in the main directory of this + * archive for more details. + * + * Non-LGPL License also available as part of VisualDSP++ + * http://www.analog.com/processors/resources/crosscore/visualDspDevSoftware.html + */ + +/* void *memchr(const void *s, int c, size_t n); + * R0 = address (s) + * R1 = sought byte (c) + * R2 = count (n) + * + * Returns pointer to located character. + */ + +.text + +.align 2 + +.global _memchr +.type _memchr, STT_FUNC +_memchr: + P0 = R0; // P0 = address + P2 = R2; // P2 = count + R1 = R1.B(Z); + CC = R2 == 0; + IF CC JUMP failed; + +bytes: + LSETUP (byte_loop_s , byte_loop_e) LC0=P2; + +byte_loop_s: + R3 = B[P0++](Z); + CC = R3 == R1; + IF CC JUMP found; + NOP; +byte_loop_e: + +failed: + R0=0; + RTS; + +found: + R0 = P0; + R0 += -1; + RTS; + +.size _memchr,.-_memchr + +libc_hidden_def (memchr) diff --git a/libc/string/bfin/memcmp.S b/libc/string/bfin/memcmp.S new file mode 100644 index 000000000..f2679d5ae --- /dev/null +++ b/libc/string/bfin/memcmp.S @@ -0,0 +1,101 @@ +/* memcmp.S + * Copyright (C) 2003, 2005, 2006 Analog Devices Inc., All Rights Reserved. + * + * This file is subject to the terms and conditions of the GNU Library General + * Public License. See the file "COPYING.LIB" in the main directory of this + * archive for more details. + * + * Non-LGPL License also available as part of VisualDSP++ + * http://www.analog.com/processors/resources/crosscore/visualDspDevSoftware.html + */ + +/* int memcmp(const void *s1, const void *s2, size_t n); + * R0 = First Address (s1) + * R1 = Second Address (s2) + * R2 = count (n) + * + * Favours word aligned data. + */ + +.text + +.align 2 + +.global _memcmp +.type _memcmp, STT_FUNC +_memcmp: + I1 = P3; + P0 = R0; // P0 = s1 address + P3 = R1; // P3 = s2 Address + P2 = R2 ; // P2 = count + CC = R2 <= 7(IU); + IF CC JUMP too_small; + I0 = R1; // s2 + R1 = R1 | R0; // OR addresses together + R1 <<= 30; // check bottom two bits + CC = AZ; // AZ set if zero. + IF !CC JUMP bytes ; // Jump if addrs not aligned. + + P1 = P2 >> 2; // count = n/4 + R3 = 3; + R2 = R2 & R3; // remainder + P2 = R2; // set remainder + + LSETUP (quad_loop_s , quad_loop_e) LC0=P1; +quad_loop_s: +#if !defined(__WORKAROUND_AVOID_DAG1) + MNOP || R0 = [P0++] || R1 = [I0++]; +#else + R0 = [P0++]; + R1 = [I0++]; +#endif + CC = R0 == R1; + IF !CC JUMP quad_different; +quad_loop_e: + NOP; + + P3 = I0; // s2 +too_small: + CC = P2 == 0; //Check zero count + IF CC JUMP finished; // very unlikely + +bytes: + LSETUP (byte_loop_s , byte_loop_e) LC0=P2; +byte_loop_s: + R1 = B[P3++](Z); // *s2 + R0 = B[P0++](Z); // *s1 + CC = R0 == R1; + IF !CC JUMP different; +byte_loop_e: + NOP; + +different: + R0 = R0 - R1; + P3 = I1; + RTS; + +quad_different: + // We've read two quads which don't match. + // Can't just compare them, because we're + // a little-endian machine, so the MSBs of + // the regs occur at later addresses in the + // string. + // Arrange to re-read those two quads again, + // byte-by-byte. + P0 += -4; // back up to the start of the + P3 = I0; // quads, and increase the + P2 += 4; // remainder count + P3 += -4; + JUMP bytes; + +finished: + R0 = 0; + P3 = I1; + RTS; +.size _memcmp,.-_memcmp + +libc_hidden_def (memcmp) + +#ifdef __UCLIBC_SUSV3_LEGACY__ +strong_alias (memcmp,bcmp) +#endif diff --git a/libc/string/bfin/memcpy.S b/libc/string/bfin/memcpy.S new file mode 100644 index 000000000..e7ba7048e --- /dev/null +++ b/libc/string/bfin/memcpy.S @@ -0,0 +1,74 @@ +/* memcpy.S + * Copyright (C) 2003, 2005, 2006 Analog Devices Inc., All Rights Reserved. + * + * This file is subject to the terms and conditions of the GNU Library General + * Public License. See the file "COPYING.LIB" in the main directory of this + * archive for more details. + * + * Non-LGPL License also available as part of VisualDSP++ + * http://www.analog.com/processors/resources/crosscore/visualDspDevSoftware.html + */ + +/* void *memcpy(void *dest, const void *src, size_t n); + * R0 = To Address (dest) (leave unchanged to form result) + * R1 = From Address (src) + * R2 = count + * + * Note: Favours word alignment + */ + +.text + +.align 2 + +.global _memcpy +.type _memcpy, STT_FUNC +_memcpy: + [--SP] = P3; + P0 = R0; // P0 = To address + P3 = R1; // P3 = From Address + P2 = R2 ; // P2 = count + CC = R2 <= 7(IU); + IF CC JUMP too_small; + I0 = R1; + R3 = R1 | R0; // OR addresses together + R3 <<= 30; // check bottom two bits + CC = AZ; // AZ set if zero. + IF !CC JUMP bytes ; // Jump if addrs not aligned. + P1 = P2 >> 2; // count = n/4 + P1 += -1; + R3 = 3; + R2 = R2 & R3; // remainder + P2 = R2; // set remainder + R1 = [I0++]; +#if !defined(__WORKAROUND_AVOID_DAG1) + LSETUP (quad_loop , quad_loop) LC0=P1; +quad_loop: MNOP || [P0++] = R1 || R1 = [I0++]; +#else + LSETUP (quad_loop_s , quad_loop_e) LC0=P1; +quad_loop_s: [P0++] = R1; +quad_loop_e: R1 = [I0++]; +#endif + [P0++] = R1; + + CC = P2 == 0; // any remaining bytes? + P3 = I0; // Ammend P3 for remaining copy + IF !CC JUMP bytes; + P3 = [SP++]; + RTS; + +too_small: + CC = P2 == 0; //Check zero count + IF CC JUMP finished; // very unlikely + +bytes: + LSETUP (byte_loop_s , byte_loop_e) LC0=P2; +byte_loop_s: R1 = B[P3++](Z); +byte_loop_e: B[P0++] = R1; + +finished: + P3 = [SP++]; + RTS; +.size _memcpy,.-_memcpy + +libc_hidden_def (memcpy) diff --git a/libc/string/bfin/memmove.S b/libc/string/bfin/memmove.S new file mode 100644 index 000000000..3d446f326 --- /dev/null +++ b/libc/string/bfin/memmove.S @@ -0,0 +1,95 @@ +/* memmove.S + * Copyright (C) 2003, 2005, 2006 Analog Devices Inc., All Rights Reserved. + * + * This file is subject to the terms and conditions of the GNU Library General + * Public License. See the file "COPYING.LIB" in the main directory of this + * archive for more details. + * + * Non-LGPL License also available as part of VisualDSP++ + * http://www.analog.com/processors/resources/crosscore/visualDspDevSoftware.html + */ + +/* void *memmove(void *dest, const void *src, size_t n); + * R0 = To Address (dest) (leave unchanged to form result) + * R1 = From Address (src) + * R2 = count (n) + * + * Note: Data may overlap + */ + +.text + +.align 2 + +.global _memmove +.type _memmove, STT_FUNC +_memmove: + I1 = P3; + P0 = R0; // P0 = To address + P3 = R1; // P3 = From Address + P2 = R2 ; // P2 = count + CC = P2 == 0; //Check zero count + IF CC JUMP finished; // very unlikely + + CC = R1 < R0 (IU); // From < To + IF !CC JUMP no_overlap; + R3 = R1 + R2; + CC = R0 <= R3 (IU); // (From+len) >= To + IF CC JUMP overlap; +no_overlap: + R3 = 11; + CC = R2 <= R3; + IF CC JUMP bytes; + R3 = R1 | R0; // OR addresses together + R3 <<= 30; // check bottom two bits + CC = AZ; // AZ set if zero. + IF !CC JUMP bytes ; // Jump if addrs not aligned. + + I0 = P3; + P1 = P2 >> 2; // count = n/4 + P1 += -1; + R3 = 3; + R2 = R2 & R3; // remainder + P2 = R2; // set remainder + R1 = [I0++]; + +#if !defined(__WORKAROUND_AVOID_DAG1) + LSETUP (quad_loop , quad_loop) LC0=P1; +quad_loop: MNOP || [P0++] = R1 || R1 = [I0++]; +#else + LSETUP (quad_loop_s, quad_loop_e) LC0=P1; +quad_loop_s: [P0++] = R1; +quad_loop_e: R1 = [I0++]; +#endif + [P0++] = R1; + + CC = P2 == 0; // any remaining bytes? + P3 = I0; // Ammend P3 to updated ptr. + IF !CC JUMP bytes; + P3 = I1; + RTS; + +bytes: LSETUP (byte2_s , byte2_e) LC0=P2; +byte2_s: R1 = B[P3++](Z); +byte2_e: B[P0++] = R1; + +finished: + P3 = I1; + RTS; + +overlap: + P2 += -1; + P0 = P0 + P2; + P3 = P3 + P2; + R1 = B[P3--] (Z); + CC = P2 == 0; + IF CC JUMP no_loop; + LSETUP (ol_s, ol_e) LC0 = P2; +ol_s: B[P0--] = R1; +ol_e: R1 = B[P3--] (Z); +no_loop: B[P0] = R1; + P3 = I1; + RTS; +.size _memmove,.-_memmove + +libc_hidden_def (memmove) diff --git a/libc/string/bfin/memset.S b/libc/string/bfin/memset.S new file mode 100644 index 000000000..bd8eb4b6a --- /dev/null +++ b/libc/string/bfin/memset.S @@ -0,0 +1,86 @@ +/* memset.S + * Copyright (C) 2003, 2005, 2006 Analog Devices Inc., All Rights Reserved. + * + * This file is subject to the terms and conditions of the GNU Library General + * Public License. See the file "COPYING.LIB" in the main directory of this + * archive for more details. + * + * Non-LGPL License also available as part of VisualDSP++ + * http://www.analog.com/processors/resources/crosscore/visualDspDevSoftware.html + */ + +/* void *memset(void *s, int c, size_t n); + * R0 = address (s) (leave unchanged to form result) + * R1 = filler byte (c) + * R2 = count (n) + * + * Note: Favours word aligned data. + */ + +.text + +.align 2 + +.global _memset +.type _memset, STT_FUNC +_memset: + P0 = R0 ; // P0 = address + P2 = R2 ; // P2 = count + R3 = R0 + R2; // end + CC = R2 <= 7(IU); + IF CC JUMP too_small; + R1 = R1.B (Z); // R1 = fill char + R2 = 3; + R2 = R0 & R2; // addr bottom two bits + CC = R2 == 0; // AZ set if zero. + IF !CC JUMP force_align ; // Jump if addr not aligned. + +aligned: + P1 = P2 >> 2; // count = n/4 + R2 = R1 << 8; // create quad filler + R2.L = R2.L + R1.L(NS); + R2.H = R2.L + R1.H(NS); + P2 = R3; + + LSETUP (quad_loop , quad_loop) LC0=P1; +quad_loop: + [P0++] = R2; + + CC = P0 == P2; + IF !CC JUMP bytes_left; + RTS; + +bytes_left: + R2 = R3; // end point + R3 = P0; // current position + R2 = R2 - R3; // bytes left + P2 = R2; + +too_small: + CC = P2 == 0; //Check zero count + IF CC JUMP finished; // Unusual + +bytes: LSETUP (byte_loop , byte_loop) LC0=P2; +byte_loop: B[P0++] = R1; + +finished: + RTS; + +force_align: + CC = BITTST (R0, 0 ); // odd byte + R0 = 4; + R0 = R0 - R2; + P1 = R0; + R0 = P0; // Recover return address + IF !CC JUMP skip1; + B[P0++] = R1; +skip1: + CC = R2 <= 2; // 2 bytes + P2 -= P1; // reduce count + IF !CC JUMP aligned; + B[P0++] = R1; + B[P0++] = R1; + JUMP aligned; +.size _memset,.-_memset + +libc_hidden_def (memset) diff --git a/libc/string/bfin/strcmp.S b/libc/string/bfin/strcmp.S new file mode 100644 index 000000000..6365024ec --- /dev/null +++ b/libc/string/bfin/strcmp.S @@ -0,0 +1,121 @@ +/* strcmp.S + * Copyright (C) 2003, 2005, 2006 Analog Devices Inc., All Rights Reserved. + * + * This file is subject to the terms and conditions of the GNU Library General + * Public License. See the file "COPYING.LIB" in the main directory of this + * archive for more details. + * + * Non-LGPL License also available as part of VisualDSP++ + * http://www.analog.com/processors/resources/crosscore/visualDspDevSoftware.html + */ + +/* Fast strcmp() for Blackfin. + * When both strings are aligned, this processes four characters at + * a time. Uses a hw loop with "very big" count to loop "forever", + * until difference or a terminating zero is found. + * Once the end-case word has been identified, breaks out of the + * loop to check more carefully (same as the unaligned case). + */ + +.text + +.align 2 + +.global _strcmp +.type _strcmp, STT_FUNC +_strcmp: + [--sp] = (R7:4); + p1 = r0; + p2 = r1; + + p0 = -1; // (need for loop counter init) + + // check if byte aligned + r0 = r0 | r1; // check both pointers at same time + r0 <<= 30; // dump all but last 2 bits + cc = az; // are they zero? + if !cc jump unaligned; // no; use unaligned code. + // fall-thru for aligned case.. + + // note that r0 is zero from the previous... + // p0 set to -1 + + lsetup (beginloop, endloop) lc0=p0; + // pick up first words + r1 = [p1++]; + r2 = [p2++]; + // make up mask: 0FF0FF + r7 = 0xFF; + r7.h = 0xFF; + // loop : 9 cycles to check 4 characters + cc = r1 == r2; +beginloop: + if !cc jump notequal4; // compare failure, exit loop + + // starting with 44332211 + // see if char 3 or char 1 is 0 + r3 = r1 & r7; // form 00330011 + // add to zero, and (r2 is free, reload) + r6 = r3 +|+ r0 || r2 = [p2++] || nop; + cc = az; // true if either is zero + r3 = r1 ^ r3; // form 44002200 (4321^0301 => 4020) + // (trick, saves having another mask) + // add to zero, and (r1 is free, reload) + r6 = r3 +|+ r0 || r1 = [p1++] || nop; + cc |= az; // true if either is zero + if cc jump zero4; // leave if a zero somewhere +endloop: + cc = r1 == r2; + + // loop exits +notequal4: // compare failure on 4-char compare + // address pointers are one word ahead; + // faster to use zero4 exit code + p1 += 4; + p2 += 4; + +zero4: // one of the bytes in word 1 is zero + // but we've already fetched the next word; so + // backup two to look at failing word again + p1 += -8; + p2 += -8; + + + + // here when pointers are unaligned: checks one + // character at a time. Also use at the end of + // the word-check algorithm to figure out what happened +unaligned: + // R0 is non-zero from before. + // p0 set to -1 + + r0 = 0 (Z); + r1 = B[p1++] (Z); + r2 = B[p2++] (Z); + lsetup (beginloop1, endloop1) lc0=p0; + +beginloop1: + cc = r1; // first char must be non-zero + // chars must be the same + r3 = r2 - r1 (NS) || r1 = B[p1++] (Z) || nop; + cc &= az; + r3 = r0 - r2; // second char must be non-zero + cc &= an; + if !cc jump exitloop1; +endloop1: + r2 = B[p2++] (Z); + +exitloop1: // here means we found a zero or a difference. + // we have r2(N), p2(N), r1(N+1), p1(N+2) + r1=B[p1+ -2] (Z); + r0 = r1 - r2; + (r7:4) = [sp++]; + rts; +.size _strcmp,.-_strcmp + +libc_hidden_def (strcmp) + +#ifndef __UCLIBC_HAS_LOCALE__ +strong_alias (strcmp,strcoll) +libc_hidden_def (strcoll) +#endif -- cgit v1.2.3