From 4fcc031a7085a47b9a027a20a919574f8aab0768 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 30 May 2006 09:13:53 +0000 Subject: import some optimized functions from blackfin cvs --- libc/string/bfin/strcmp.S | 121 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 libc/string/bfin/strcmp.S (limited to 'libc/string/bfin/strcmp.S') diff --git a/libc/string/bfin/strcmp.S b/libc/string/bfin/strcmp.S new file mode 100644 index 000000000..6365024ec --- /dev/null +++ b/libc/string/bfin/strcmp.S @@ -0,0 +1,121 @@ +/* strcmp.S + * Copyright (C) 2003, 2005, 2006 Analog Devices Inc., All Rights Reserved. + * + * This file is subject to the terms and conditions of the GNU Library General + * Public License. See the file "COPYING.LIB" in the main directory of this + * archive for more details. + * + * Non-LGPL License also available as part of VisualDSP++ + * http://www.analog.com/processors/resources/crosscore/visualDspDevSoftware.html + */ + +/* Fast strcmp() for Blackfin. + * When both strings are aligned, this processes four characters at + * a time. Uses a hw loop with "very big" count to loop "forever", + * until difference or a terminating zero is found. + * Once the end-case word has been identified, breaks out of the + * loop to check more carefully (same as the unaligned case). + */ + +.text + +.align 2 + +.global _strcmp +.type _strcmp, STT_FUNC +_strcmp: + [--sp] = (R7:4); + p1 = r0; + p2 = r1; + + p0 = -1; // (need for loop counter init) + + // check if byte aligned + r0 = r0 | r1; // check both pointers at same time + r0 <<= 30; // dump all but last 2 bits + cc = az; // are they zero? + if !cc jump unaligned; // no; use unaligned code. + // fall-thru for aligned case.. + + // note that r0 is zero from the previous... + // p0 set to -1 + + lsetup (beginloop, endloop) lc0=p0; + // pick up first words + r1 = [p1++]; + r2 = [p2++]; + // make up mask: 0FF0FF + r7 = 0xFF; + r7.h = 0xFF; + // loop : 9 cycles to check 4 characters + cc = r1 == r2; +beginloop: + if !cc jump notequal4; // compare failure, exit loop + + // starting with 44332211 + // see if char 3 or char 1 is 0 + r3 = r1 & r7; // form 00330011 + // add to zero, and (r2 is free, reload) + r6 = r3 +|+ r0 || r2 = [p2++] || nop; + cc = az; // true if either is zero + r3 = r1 ^ r3; // form 44002200 (4321^0301 => 4020) + // (trick, saves having another mask) + // add to zero, and (r1 is free, reload) + r6 = r3 +|+ r0 || r1 = [p1++] || nop; + cc |= az; // true if either is zero + if cc jump zero4; // leave if a zero somewhere +endloop: + cc = r1 == r2; + + // loop exits +notequal4: // compare failure on 4-char compare + // address pointers are one word ahead; + // faster to use zero4 exit code + p1 += 4; + p2 += 4; + +zero4: // one of the bytes in word 1 is zero + // but we've already fetched the next word; so + // backup two to look at failing word again + p1 += -8; + p2 += -8; + + + + // here when pointers are unaligned: checks one + // character at a time. Also use at the end of + // the word-check algorithm to figure out what happened +unaligned: + // R0 is non-zero from before. + // p0 set to -1 + + r0 = 0 (Z); + r1 = B[p1++] (Z); + r2 = B[p2++] (Z); + lsetup (beginloop1, endloop1) lc0=p0; + +beginloop1: + cc = r1; // first char must be non-zero + // chars must be the same + r3 = r2 - r1 (NS) || r1 = B[p1++] (Z) || nop; + cc &= az; + r3 = r0 - r2; // second char must be non-zero + cc &= an; + if !cc jump exitloop1; +endloop1: + r2 = B[p2++] (Z); + +exitloop1: // here means we found a zero or a difference. + // we have r2(N), p2(N), r1(N+1), p1(N+2) + r1=B[p1+ -2] (Z); + r0 = r1 - r2; + (r7:4) = [sp++]; + rts; +.size _strcmp,.-_strcmp + +libc_hidden_def (strcmp) + +#ifndef __UCLIBC_HAS_LOCALE__ +strong_alias (strcmp,strcoll) +libc_hidden_def (strcoll) +#endif -- cgit v1.2.3