From 22686a1383c4a4a319eaaa6b16b1a9540114bd66 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Tue, 5 Feb 2008 14:51:48 +0000 Subject: Add support for the Meta architecture Meta cores are 32-bit, hardware multithreaded, general purpose, embedded processors which also feature a DSP instruction set, and can be found in many digital radios. They are capable of running different operating systems on different hardware threads, for example a digital radio might run RTOSes for DAB decoding and audio decoding on 3 hardware threads, and run Linux on the 4th hardware thread to manage the user interface, networking etc. HTPs are also capable of running SMP Linux on multiple hardware threads. Signed-off-by: Markos Chandras Signed-off-by: Bernhard Reutner-Fischer --- libc/string/metag/Makefile | 13 ++ libc/string/metag/memchr.S | 156 ++++++++++++++++++++ libc/string/metag/memcpy.S | 189 ++++++++++++++++++++++++ libc/string/metag/memmove.S | 350 ++++++++++++++++++++++++++++++++++++++++++++ libc/string/metag/memset.S | 90 ++++++++++++ libc/string/metag/strchr.S | 167 +++++++++++++++++++++ libc/string/metag/strcmp.S | 65 ++++++++ libc/string/metag/strcpy.S | 94 ++++++++++++ 8 files changed, 1124 insertions(+) create mode 100644 libc/string/metag/Makefile create mode 100644 libc/string/metag/memchr.S create mode 100644 libc/string/metag/memcpy.S create mode 100644 libc/string/metag/memmove.S create mode 100644 libc/string/metag/memset.S create mode 100644 libc/string/metag/strchr.S create mode 100644 libc/string/metag/strcmp.S create mode 100644 libc/string/metag/strcpy.S (limited to 'libc/string') diff --git a/libc/string/metag/Makefile b/libc/string/metag/Makefile new file mode 100644 index 000000000..523cf6842 --- /dev/null +++ b/libc/string/metag/Makefile @@ -0,0 +1,13 @@ +# Makefile for uClibc +# +# Copyright (C) 2000-2005 Erik Andersen +# +# Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball. +# + +top_srcdir:=../../../ +top_builddir:=../../../ +all: objs +include $(top_builddir)Rules.mak +include ../Makefile.in +include $(top_srcdir)Makerules diff --git a/libc/string/metag/memchr.S b/libc/string/metag/memchr.S new file mode 100644 index 000000000..8b48d863c --- /dev/null +++ b/libc/string/metag/memchr.S @@ -0,0 +1,156 @@ +! Copyright (C) 2013 Imagination Technologies Ltd. +! +! Licensed under LGPL v2.1 or later, see the file COPYING.LIB in this tarball. + + .text + .global _memchr + .type _memchr,function +! D0Ar6 src +! D0Ar2 c +! D1Ar3 n +_memchr: + CMP D1Ar3, #0 + BEQ $Lexit_fail + !! convert c to unsigned char + AND D0Ar2,D0Ar2,#0xff + MOV D0Ar6, D1Ar1 + MOV D1Ar5, D0Ar6 + !! test alignment + AND D1Ar5, D1Ar5, #7 + CMP D1Ar5, #0 + BNZ $Lunaligned_loop + !! length must be greater than or equal to 8 for aligned loop + CMP D1Ar3, #8 + BGE $Laligned_setup +$Lunaligned_loop: + !! get 1 char from s + GETB D0Re0, [D0Ar6++] + !! increase alignment counter + ADD D1Ar5, D1Ar5, #1 + !! decrement n + SUB D1Ar3, D1Ar3, #1 + !! exit if we have a match + CMP D0Re0, D0Ar2 + BZ $Lexit_success1 + !! exit if we have hit the end of the string + CMP D1Ar3, #0 + BZ $Lexit_fail + !! fall through if the buffer is aligned now + CMP D1Ar5, #8 + BNE $Lunaligned_loop + !! fall through if there is more than 8 bytes left + CMP D1Ar3, #8 + BLT $Lunaligned_loop +$Laligned_setup: + !! fill the c into 4 bytes + MOV D0Ar4, D0Ar2 + LSL D0Ar4, D0Ar4, #8 + ADD D0Ar4, D0Ar4, D0Ar2 + LSL D0Ar4, D0Ar4, #8 + ADD D0Ar4, D0Ar4, D0Ar2 + LSL D0Ar4, D0Ar4, #8 + ADD D0Ar4, D0Ar4, D0Ar2 + !! divide n by 8 + MOV D1Ar5, D1Ar3 + LSR D1Ar5, D1Ar5, #3 +$Laligned_loop: + !! get 8 chars from s + GETL D0Re0, D1Re0, [D0Ar6++] + !! decrement loop counter + SUB D1Ar5, D1Ar5, #1 + !! test first 4 chars + XOR D0Re0, D0Re0, D0Ar4 + !! test second 4 chars + MOV D0Ar2, D1Re0 + XOR D1Re0, D0Ar2, D0Ar4 + !! check for matches in the first 4 chars + MOV D0Ar2, D0Re0 + ADDT D0Re0, D0Re0, #HI(0xfefefeff) + ADD D0Re0, D0Re0, #LO(0xfefefeff) + XOR D0Ar2, D0Ar2, #-1 + AND D0Re0, D0Re0, D0Ar2 + ANDMT D0Re0, D0Re0, #HI(0x80808080) + ANDMB D0Re0, D0Re0, #LO(0x80808080) + CMP D0Re0, #0 + BNZ $Lmatch_word1 + !! check for matches in the second 4 chars + MOV D1Ar1, D1Re0 + ADDT D1Re0, D1Re0, #HI(0xfefefeff) + ADD D1Re0, D1Re0, #LO(0xfefefeff) + XOR D1Ar1, D1Ar1, #-1 + AND D1Re0, D1Re0, D1Ar1 + ANDMT D1Re0, D1Re0, #HI(0x80808080) + ANDMB D1Re0, D1Re0, #LO(0x80808080) + CMP D1Re0, #0 + BNZ $Lmatch_word2 + !! check if we have reached the end of the buffer + CMP D1Ar5, #0 + BNE $Laligned_loop + !! exit if there are no chars left to check + AND D1Ar3, D1Ar3, #7 + CMP D1Ar3, #0 + BZ $Lexit_fail + !! recover c + AND D0Ar2, D0Ar4, #0xff +$Lbyte_loop: + !! get 1 char from s + GETB D0Re0, [D0Ar6++] + !! decrement n + SUB D1Ar3, D1Ar3, #1 + !! exit if we have a match + CMP D0Re0, D0Ar2 + BZ $Lexit_success1 + !! fall through if we have run out of chars + CMP D1Ar3, #0 + BNE $Lbyte_loop + +$Lexit_fail: + MOV D0Re0, #0 + B $Lend + +$Lmatch_word1: + !! move the match word into D1Re0 + MOV D1Re0, D0Re0 + !! roll back the buffer pointer by 4 chars + SUB D0Ar6, D0Ar6, #4 +$Lmatch_word2: + !! roll back the buffer pointer by 4 chars + SUB D0Ar6, D0Ar6, #4 + !! exit if lowest byte is 0 + MOV D1Ar1, D1Re0 + AND D1Ar1, D1Ar1, #0xff + CMP D1Ar1, #0 + BNE $Lexit_success2 + !! advance buffer pointer to the next char + ADD D0Ar6, D0Ar6, #1 + !! shift in the next lowest byte + LSR D1Re0, D1Re0, #8 + !! exit if lowest byte is 0 + MOV D1Ar1, D1Re0 + AND D1Ar1, D1Ar1, #0xff + CMP D1Ar1, #0 + BNE $Lexit_success2 + !! advance buffer pointer to the next char + ADD D0Ar6, D0Ar6, #1 + !! shift in the next lowest byte + LSR D1Re0, D1Re0, #8 + !! exit if lowest byte is 0 + MOV D1Ar1, D1Re0 + AND D1Ar1, D1Ar1, #0xff + CMP D1Ar1, #0 + BNE $Lexit_success2 + !! the match must be in the last byte, exit + ADD D0Ar6, D0Ar6, #1 + B $Lexit_success2 + +$Lexit_success1: + SUB D0Ar6, D0Ar6, #1 +$Lexit_success2: + !! return the buffer pointer + MOV D0Re0, D0Ar6 +$Lend: + MOV PC, D1RtP + + .size _memchr,.-_memchr + +libc_hidden_def(memchr) diff --git a/libc/string/metag/memcpy.S b/libc/string/metag/memcpy.S new file mode 100644 index 000000000..f96c9d131 --- /dev/null +++ b/libc/string/metag/memcpy.S @@ -0,0 +1,189 @@ +! Copyright (C) 2013 Imagination Technologies Ltd. + +! Licensed under LGPL v2.1 or later, see the file COPYING.LIB in this tarball. + + .text + .global _memcpy + .type _memcpy,function +! D1Ar1 dst +! D0Ar2 src +! D1Ar3 cnt +! D0Re0 dst +_memcpy: + CMP D1Ar3, #16 + MOV A1.2, D0Ar2 ! source pointer + MOV A0.2, D1Ar1 ! destination pointer + MOV A0.3, D1Ar1 ! for return value +! If there are less than 16 bytes to copy use the byte copy loop + BGE $Llong_copy + +$Lbyte_copy: +! Simply copy a byte at a time + SUBS TXRPT, D1Ar3, #1 + BLT $Lend +$Lloop_byte: + GETB D1Re0, [A1.2++] + SETB [A0.2++], D1Re0 + BR $Lloop_byte + +$Lend: +! Finally set return value and return + MOV D0Re0, A0.3 + MOV PC, D1RtP + +$Llong_copy: + ANDS D1Ar5, D1Ar1, #7 ! test destination alignment + BZ $Laligned_dst + +! The destination address is not 8 byte aligned. We will copy bytes from +! the source to the destination until the remaining data has an 8 byte +! destination address alignment (i.e we should never copy more than 7 +! bytes here). +$Lalign_dst: + GETB D0Re0, [A1.2++] + ADD D1Ar5, D1Ar5, #1 ! dest is aligned when D1Ar5 reaches #8 + SUB D1Ar3, D1Ar3, #1 ! decrement count of remaining bytes + SETB [A0.2++], D0Re0 + CMP D1Ar5, #8 + BNE $Lalign_dst + +! We have at least (16 - 7) = 9 bytes to copy - calculate the number of 8 byte +! blocks, then jump to the unaligned copy loop or fall through to the aligned +! copy loop as appropriate. +$Laligned_dst: + MOV D0Ar4, A1.2 + LSR D1Ar5, D1Ar3, #3 ! D1Ar5 = number of 8 byte blocks + ANDS D0Ar4, D0Ar4, #7 ! test source alignment + BNZ $Lunaligned_copy ! if unaligned, use unaligned copy loop + +! Both source and destination are 8 byte aligned - the easy case. +$Laligned_copy: + LSRS D1Ar5, D1Ar3, #5 ! D1Ar5 = number of 32 byte blocks + BZ $Lbyte_copy + SUB TXRPT, D1Ar5, #1 + +$Laligned_32: + GETL D0Re0, D1Re0, [A1.2++] + GETL D0Ar6, D1Ar5, [A1.2++] + SETL [A0.2++], D0Re0, D1Re0 + SETL [A0.2++], D0Ar6, D1Ar5 + GETL D0Re0, D1Re0, [A1.2++] + GETL D0Ar6, D1Ar5, [A1.2++] + SETL [A0.2++], D0Re0, D1Re0 + SETL [A0.2++], D0Ar6, D1Ar5 + BR $Laligned_32 + +! If there are any remaining bytes use the byte copy loop, otherwise we are done + ANDS D1Ar3, D1Ar3, #0x1f + BNZ $Lbyte_copy + B $Lend + +! The destination is 8 byte aligned but the source is not, and there are 8 +! or more bytes to be copied. +$Lunaligned_copy: +! Adjust the source pointer (A1.2) to the 8 byte boundary before its +! current value + MOV D0Ar4, A1.2 + MOV D0Ar6, A1.2 + ANDMB D0Ar4, D0Ar4, #0xfff8 + MOV A1.2, D0Ar4 +! Save the number of bytes of mis-alignment in D0Ar4 for use later + SUBS D0Ar6, D0Ar6, D0Ar4 + MOV D0Ar4, D0Ar6 +! if there is no mis-alignment after all, use the aligned copy loop + BZ $Laligned_copy + +! prefetch 8 bytes + GETL D0Re0, D1Re0, [A1.2] + + SUB TXRPT, D1Ar5, #1 + +! There are 3 mis-alignment cases to be considered. Less than 4 bytes, exactly +! 4 bytes, and more than 4 bytes. + CMP D0Ar6, #4 + BLT $Lunaligned_1_2_3 ! use 1-3 byte mis-alignment loop + BZ $Lunaligned_4 ! use 4 byte mis-alignment loop + +! The mis-alignment is more than 4 bytes +$Lunaligned_5_6_7: + SUB D0Ar6, D0Ar6, #4 +! Calculate the bit offsets required for the shift operations necesssary +! to align the data. +! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset) + MULW D0Ar6, D0Ar6, #8 + MOV D1Ar5, #32 + SUB D1Ar5, D1Ar5, D0Ar6 +! Move data 4 bytes before we enter the main loop + MOV D0Re0, D1Re0 + +$Lloop_5_6_7: + GETL D0Ar2, D1Ar1, [++A1.2] +! form 64-bit data in D0Re0, D1Re0 + LSR D0Re0, D0Re0, D0Ar6 + MOV D1Re0, D0Ar2 + LSL D1Re0, D1Re0, D1Ar5 + ADD D0Re0, D0Re0, D1Re0 + + LSR D0Ar2, D0Ar2, D0Ar6 + LSL D1Re0, D1Ar1, D1Ar5 + ADD D1Re0, D1Re0, D0Ar2 + + SETL [A0.2++], D0Re0, D1Re0 + MOV D0Re0, D1Ar1 + BR $Lloop_5_6_7 + + B $Lunaligned_end + +$Lunaligned_1_2_3: +! Calculate the bit offsets required for the shift operations necesssary +! to align the data. +! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset) + MULW D0Ar6, D0Ar6, #8 + MOV D1Ar5, #32 + SUB D1Ar5, D1Ar5, D0Ar6 + +$Lloop_1_2_3: +! form 64-bit data in D0Re0,D1Re0 + LSR D0Re0, D0Re0, D0Ar6 + LSL D1Ar1, D1Re0, D1Ar5 + ADD D0Re0, D0Re0, D1Ar1 + MOV D0Ar2, D1Re0 + LSR D0FrT, D0Ar2, D0Ar6 + GETL D0Ar2, D1Ar1, [++A1.2] + + MOV D1Re0, D0Ar2 + LSL D1Re0, D1Re0, D1Ar5 + ADD D1Re0, D1Re0, D0FrT + + SETL [A0.2++], D0Re0, D1Re0 + MOV D0Re0, D0Ar2 + MOV D1Re0, D1Ar1 + BR $Lloop_1_2_3 + + B $Lunaligned_end + +! The 4 byte mis-alignment case - this does not require any shifting, just a +! shuffling of registers. +$Lunaligned_4: + MOV D0Re0, D1Re0 +$Lloop_4: + GETL D0Ar2, D1Ar1, [++A1.2] + MOV D1Re0, D0Ar2 + SETL [A0.2++], D0Re0, D1Re0 + MOV D0Re0, D1Ar1 + BR $Lloop_4 + +$Lunaligned_end: +! If there are no remaining bytes to copy, we are done. + ANDS D1Ar3, D1Ar3, #7 + BZ $Lend +! Re-adjust the source pointer (A1.2) back to the actual (unaligned) byte +! address of the remaining bytes, and fall through to the byte copy loop. + MOV D0Ar6, A1.2 + ADD D1Ar5, D0Ar4, D0Ar6 + MOV A1.2, D1Ar5 + B $Lbyte_copy + + .size _memcpy,.-_memcpy + +libc_hidden_def(memcpy) diff --git a/libc/string/metag/memmove.S b/libc/string/metag/memmove.S new file mode 100644 index 000000000..3416fd558 --- /dev/null +++ b/libc/string/metag/memmove.S @@ -0,0 +1,350 @@ +! Copyright (C) 2013 Imagination Technologies Ltd. + +! Licensed under LGPL v2.1 or later, see the file COPYING.LIB in this tarball. + + + .text + .global _memmove + .type _memmove,function +! D1Ar1 dst +! D0Ar2 src +! D1Ar3 cnt +! D0Re0 dst +_memmove: + CMP D1Ar3, #0 + MOV D0Re0, D1Ar1 + BZ $LEND2 + MSETL [A0StP], D0.5, D0.6, D0.7 + MOV D1Ar5, D0Ar2 + CMP D1Ar1, D1Ar5 + BLT $Lforwards_copy + SUB D0Ar4, D1Ar1, D1Ar3 + ADD D0Ar4, D0Ar4, #1 + CMP D0Ar2, D0Ar4 + BLT $Lforwards_copy + ! should copy backwards + MOV D1Re0, D0Ar2 + ! adjust pointer to the end of mem + ADD D0Ar2, D1Re0, D1Ar3 + ADD D1Ar1, D1Ar1, D1Ar3 + + MOV A1.2, D0Ar2 + MOV A0.2, D1Ar1 + CMP D1Ar3, #8 + BLT $Lbbyte_loop + + MOV D0Ar4, D0Ar2 + MOV D1Ar5, D1Ar1 + + ! test 8 byte alignment + ANDS D1Ar5, D1Ar5, #7 + BNE $Lbdest_unaligned + + ANDS D0Ar4, D0Ar4, #7 + BNE $Lbsrc_unaligned + + LSR D1Ar5, D1Ar3, #3 + +$Lbaligned_loop: + GETL D0Re0, D1Re0, [--A1.2] + SETL [--A0.2], D0Re0, D1Re0 + SUBS D1Ar5, D1Ar5, #1 + BNE $Lbaligned_loop + + ANDS D1Ar3, D1Ar3, #7 + BZ $Lbbyte_loop_exit +$Lbbyte_loop: + GETB D1Re0, [--A1.2] + SETB [--A0.2], D1Re0 + SUBS D1Ar3, D1Ar3, #1 + BNE $Lbbyte_loop +$Lbbyte_loop_exit: + MOV D0Re0, A0.2 +$LEND: + SUB A0.2, A0StP, #24 + MGETL D0.5, D0.6, D0.7, [A0.2] + SUB A0StP, A0StP, #24 +$LEND2: + MOV PC, D1RtP + +$Lbdest_unaligned: + GETB D0Re0, [--A1.2] + SETB [--A0.2], D0Re0 + SUBS D1Ar5, D1Ar5, #1 + SUB D1Ar3, D1Ar3, #1 + BNE $Lbdest_unaligned + CMP D1Ar3, #8 + BLT $Lbbyte_loop +$Lbsrc_unaligned: + LSR D1Ar5, D1Ar3, #3 + ! adjust A1.2 + MOV D0Ar4, A1.2 + ! save original address + MOV D0Ar6, A1.2 + + ADD D0Ar4, D0Ar4, #7 + ANDMB D0Ar4, D0Ar4, #0xfff8 + ! new address is the 8-byte aligned one above the original + MOV A1.2, D0Ar4 + + ! A0.2 dst 64-bit is aligned + ! measure the gap size + SUB D0Ar6, D0Ar4, D0Ar6 + MOVS D0Ar4, D0Ar6 + ! keep this information for the later adjustment + ! both aligned + BZ $Lbaligned_loop + + ! prefetch + GETL D0Re0, D1Re0, [--A1.2] + + CMP D0Ar6, #4 + BLT $Lbunaligned_1_2_3 + ! 32-bit aligned + BZ $Lbaligned_4 + + SUB D0Ar6, D0Ar6, #4 + ! D1.6 stores the gap size in bits + MULW D1.6, D0Ar6, #8 + MOV D0.6, #32 + ! D0.6 stores the complement of the gap size + SUB D0.6, D0.6, D1.6 + +$Lbunaligned_5_6_7: + GETL D0.7, D1.7, [--A1.2] + ! form 64-bit data in D0Re0, D1Re0 + MOV D1Re0, D0Re0 + ! D1Re0 << gap-size + LSL D1Re0, D1Re0, D1.6 + MOV D0Re0, D1.7 + ! D0Re0 >> complement + LSR D0Re0, D0Re0, D0.6 + MOV D1.5, D0Re0 + ! combine the both + ADD D1Re0, D1Re0, D1.5 + + MOV D1.5, D1.7 + LSL D1.5, D1.5, D1.6 + MOV D0Re0, D0.7 + LSR D0Re0, D0Re0, D0.6 + MOV D0.5, D1.5 + ADD D0Re0, D0Re0, D0.5 + + SETL [--A0.2], D0Re0, D1Re0 + MOV D0Re0, D0.7 + MOV D1Re0, D1.7 + SUBS D1Ar5, D1Ar5, #1 + BNE $Lbunaligned_5_6_7 + + ANDS D1Ar3, D1Ar3, #7 + BZ $Lbbyte_loop_exit + ! Adjust A1.2 + ! A1.2 <- A1.2 +8 - gapsize + ADD A1.2, A1.2, #8 + SUB A1.2, A1.2, D0Ar4 + B $Lbbyte_loop + +$Lbunaligned_1_2_3: + MULW D1.6, D0Ar6, #8 + MOV D0.6, #32 + SUB D0.6, D0.6, D1.6 + +$Lbunaligned_1_2_3_loop: + GETL D0.7, D1.7, [--A1.2] + ! form 64-bit data in D0Re0, D1Re0 + LSL D1Re0, D1Re0, D1.6 + ! save D0Re0 for later use + MOV D0.5, D0Re0 + LSR D0Re0, D0Re0, D0.6 + MOV D1.5, D0Re0 + ADD D1Re0, D1Re0, D1.5 + + ! orignal data in D0Re0 + MOV D1.5, D0.5 + LSL D1.5, D1.5, D1.6 + MOV D0Re0, D1.7 + LSR D0Re0, D0Re0, D0.6 + MOV D0.5, D1.5 + ADD D0Re0, D0Re0, D0.5 + + SETL [--A0.2], D0Re0, D1Re0 + MOV D0Re0, D0.7 + MOV D1Re0, D1.7 + SUBS D1Ar5, D1Ar5, #1 + BNE $Lbunaligned_1_2_3_loop + + ANDS D1Ar3, D1Ar3, #7 + BZ $Lbbyte_loop_exit + ! Adjust A1.2 + ADD A1.2, A1.2, #8 + SUB A1.2, A1.2, D0Ar4 + B $Lbbyte_loop + +$Lbaligned_4: + GETL D0.7, D1.7, [--A1.2] + MOV D1Re0, D0Re0 + MOV D0Re0, D1.7 + SETL [--A0.2], D0Re0, D1Re0 + MOV D0Re0, D0.7 + MOV D1Re0, D1.7 + SUBS D1Ar5, D1Ar5, #1 + BNE $Lbaligned_4 + ANDS D1Ar3, D1Ar3, #7 + BZ $Lbbyte_loop_exit + ! Adjust A1.2 + ADD A1.2, A1.2, #8 + SUB A1.2, A1.2, D0Ar4 + B $Lbbyte_loop + +$Lforwards_copy: + MOV A1.2, D0Ar2 + MOV A0.2, D1Ar1 + CMP D1Ar3, #8 + BLT $Lfbyte_loop + + MOV D0Ar4, D0Ar2 + MOV D1Ar5, D1Ar1 + + ANDS D1Ar5, D1Ar5, #7 + BNE $Lfdest_unaligned + + ANDS D0Ar4, D0Ar4, #7 + BNE $Lfsrc_unaligned + + LSR D1Ar5, D1Ar3, #3 + +$Lfaligned_loop: + GETL D0Re0, D1Re0, [A1.2++] + SUBS D1Ar5, D1Ar5, #1 + SETL [A0.2++], D0Re0, D1Re0 + BNE $Lfaligned_loop + + ANDS D1Ar3, D1Ar3, #7 + BZ $Lfbyte_loop_exit +$Lfbyte_loop: + GETB D1Re0, [A1.2++] + SETB [A0.2++], D1Re0 + SUBS D1Ar3, D1Ar3, #1 + BNE $Lfbyte_loop +$Lfbyte_loop_exit: + MOV D0Re0, D1Ar1 + B $LEND + +$Lfdest_unaligned: + GETB D0Re0, [A1.2++] + ADD D1Ar5, D1Ar5, #1 + SUB D1Ar3, D1Ar3, #1 + SETB [A0.2++], D0Re0 + CMP D1Ar5, #8 + BNE $Lfdest_unaligned + CMP D1Ar3, #8 + BLT $Lfbyte_loop +$Lfsrc_unaligned: + ! adjust A1.2 + LSR D1Ar5, D1Ar3, #3 + + MOV D0Ar4, A1.2 + MOV D0Ar6, A1.2 + ANDMB D0Ar4, D0Ar4, #0xfff8 + MOV A1.2, D0Ar4 + + ! A0.2 dst 64-bit is aligned + SUB D0Ar6, D0Ar6, D0Ar4 + ! keep the information for the later adjustment + MOVS D0Ar4, D0Ar6 + + ! both aligned + BZ $Lfaligned_loop + + ! prefetch + GETL D0Re0, D1Re0, [A1.2] + + CMP D0Ar6, #4 + BLT $Lfunaligned_1_2_3 + BZ $Lfaligned_4 + + SUB D0Ar6, D0Ar6, #4 + MULW D0.6, D0Ar6, #8 + MOV D1.6, #32 + SUB D1.6, D1.6, D0.6 + +$Lfunaligned_5_6_7: + GETL D0.7, D1.7, [++A1.2] + ! form 64-bit data in D0Re0, D1Re0 + MOV D0Re0, D1Re0 + LSR D0Re0, D0Re0, D0.6 + MOV D1Re0, D0.7 + LSL D1Re0, D1Re0, D1.6 + MOV D0.5, D1Re0 + ADD D0Re0, D0Re0, D0.5 + + MOV D0.5, D0.7 + LSR D0.5, D0.5, D0.6 + MOV D1Re0, D1.7 + LSL D1Re0, D1Re0, D1.6 + MOV D1.5, D0.5 + ADD D1Re0, D1Re0, D1.5 + + SETL [A0.2++], D0Re0, D1Re0 + MOV D0Re0, D0.7 + MOV D1Re0, D1.7 + SUBS D1Ar5, D1Ar5, #1 + BNE $Lfunaligned_5_6_7 + + ANDS D1Ar3, D1Ar3, #7 + BZ $Lfbyte_loop_exit + ! Adjust A1.2 + ADD A1.2, A1.2, D0Ar4 + B $Lfbyte_loop + +$Lfunaligned_1_2_3: + MULW D0.6, D0Ar6, #8 + MOV D1.6, #32 + SUB D1.6, D1.6, D0.6 + +$Lfunaligned_1_2_3_loop: + GETL D0.7, D1.7, [++A1.2] + ! form 64-bit data in D0Re0, D1Re0 + LSR D0Re0, D0Re0, D0.6 + MOV D1.5, D1Re0 + LSL D1Re0, D1Re0, D1.6 + MOV D0.5, D1Re0 + ADD D0Re0, D0Re0, D0.5 + + MOV D0.5, D1.5 + LSR D0.5, D0.5, D0.6 + MOV D1Re0, D0.7 + LSL D1Re0, D1Re0, D1.6 + MOV D1.5, D0.5 + ADD D1Re0, D1Re0, D1.5 + + SETL [A0.2++], D0Re0, D1Re0 + MOV D0Re0, D0.7 + MOV D1Re0, D1.7 + SUBS D1Ar5, D1Ar5, #1 + BNE $Lfunaligned_1_2_3_loop + + ANDS D1Ar3, D1Ar3, #7 + BZ $Lfbyte_loop_exit + ! Adjust A1.2 + ADD A1.2, A1.2, D0Ar4 + B $Lfbyte_loop + +$Lfaligned_4: + GETL D0.7, D1.7, [++A1.2] + MOV D0Re0, D1Re0 + MOV D1Re0, D0.7 + SETL [A0.2++], D0Re0, D1Re0 + MOV D0Re0, D0.7 + MOV D1Re0, D1.7 + SUBS D1Ar5, D1Ar5, #1 + BNE $Lfaligned_4 + ANDS D1Ar3, D1Ar3, #7 + BZ $Lfbyte_loop_exit + ! Adjust A1.2 + ADD A1.2, A1.2, D0Ar4 + B $Lfbyte_loop + + .size _memmove,.-_memmove + +libc_hidden_def(memmove) diff --git a/libc/string/metag/memset.S b/libc/string/metag/memset.S new file mode 100644 index 000000000..8d4e9a158 --- /dev/null +++ b/libc/string/metag/memset.S @@ -0,0 +1,90 @@ +! Copyright (C) 2013 Imagination Technologies Ltd. + +! Licensed under LGPL v2.1 or later, see the file COPYING.LIB in this tarball. + + + .text + .global _memset + .type _memset,function +! D1Ar1 dst +! D0Ar2 c +! D1Ar3 cnt +! D0Re0 dst +_memset: + AND D0Ar2,D0Ar2,#0xFF ! Ensure a byte input value + MULW D0Ar2,D0Ar2,#0x0101 ! Duplicate byte value into 0-15 + ANDS D0Ar4,D1Ar1,#7 ! Extract bottom LSBs of dst + LSL D0Re0,D0Ar2,#16 ! Duplicate byte value into 16-31 + ADD A0.2,D0Ar2,D0Re0 ! Duplicate byte value into 4 (A0.2) + MOV D0Re0,D1Ar1 ! Return dst + BZ $LLongStub ! if start address is aligned + ! start address is not aligned on an 8 byte boundary, so we + ! need the number of bytes up to the next 8 byte address + ! boundary, or the length of the string if less than 8, in D1Ar5 + MOV D0Ar2,#8 ! Need 8 - N in D1Ar5 ... + SUB D1Ar5,D0Ar2,D0Ar4 ! ... subtract N + CMP D1Ar3,D1Ar5 + MOVMI D1Ar5,D1Ar3 + B $LByteStub ! dst is mis-aligned, do $LByteStub + +! +! Preamble to LongLoop which generates 4*8 bytes per interation (5 cycles) +! +$LLongStub: + LSRS D0Ar2,D1Ar3,#5 + AND D1Ar3,D1Ar3,#0x1F + MOV A1.2,A0.2 + BEQ $LLongishStub + SUB TXRPT,D0Ar2,#1 + CMP D1Ar3,#0 +$LLongLoop: + SETL [D1Ar1++],A0.2,A1.2 + SETL [D1Ar1++],A0.2,A1.2 + SETL [D1Ar1++],A0.2,A1.2 + SETL [D1Ar1++],A0.2,A1.2 + BR $LLongLoop + BZ $Lexit +! +! Preamble to LongishLoop which generates 1*8 bytes per interation (2 cycles) +! +$LLongishStub: + LSRS D0Ar2,D1Ar3,#3 + AND D1Ar3,D1Ar3,#0x7 + MOV D1Ar5,D1Ar3 + BEQ $LByteStub + SUB TXRPT,D0Ar2,#1 + CMP D1Ar3,#0 +$LLongishLoop: + SETL [D1Ar1++],A0.2,A1.2 + BR $LLongishLoop + BZ $Lexit +! +! This does a byte structured burst of up to 7 bytes +! +! D1Ar1 should point to the location required +! D1Ar3 should be the remaining total byte count +! D1Ar5 should be burst size (<= D1Ar3) +! +$LByteStub: + SUBS D1Ar3,D1Ar3,D1Ar5 ! Reduce count + ADD D1Ar1,D1Ar1,D1Ar5 ! Advance pointer to end of area + MULW D1Ar5,D1Ar5,#4 ! Scale to (1*4), (2*4), (3*4) + SUB D1Ar5,D1Ar5,#(8*4) ! Rebase to -(7*4), -(6*4), -(5*4), ... + MOV A1.2,D1Ar5 + SUB PC,CPC1,A1.2 ! Jump into table below + SETB [D1Ar1+#(-7)],A0.2 + SETB [D1Ar1+#(-6)],A0.2 + SETB [D1Ar1+#(-5)],A0.2 + SETB [D1Ar1+#(-4)],A0.2 + SETB [D1Ar1+#(-3)],A0.2 + SETB [D1Ar1+#(-2)],A0.2 + SETB [D1Ar1+#(-1)],A0.2 +! +! Return if all data has been output, otherwise do $LLongStub +! + BNZ $LLongStub +$Lexit: + MOV PC,D1RtP + .size _memset,.-_memset + +libc_hidden_def(memset) diff --git a/libc/string/metag/strchr.S b/libc/string/metag/strchr.S new file mode 100644 index 000000000..6b0f2ea43 --- /dev/null +++ b/libc/string/metag/strchr.S @@ -0,0 +1,167 @@ +! Copyright (C) 2013 Imagination Technologies Ltd. + +! Licensed under LGPL v2.1 or later, see the file COPYING.LIB in this tarball. + + +#include + + .text + .global _strchr + .type _strchr, function +! D1Ar1 src +! D0Ar2 c +_strchr: + AND D0Ar2,D0Ar2,#0xff ! Drop all but 8 bits of c + MOV D1Ar5, D1Ar1 ! Copy src to D1Ar5 + AND D1Ar5, D1Ar5, #7 ! Check 64 bit alignment + CMP D1Ar5, #0 + BZ $Laligned64bit ! Jump to 64 bit aligned strchr +$Lalign64bit: + GETB D0Re0, [D1Ar1++] ! Get the next character + ADD D1Ar5, D1Ar5, #1 ! Increment alignment counter + CMP D0Re0, D0Ar2 ! Is the char c + BZ $Lcharatprevious ! If so exit returning position + CMP D0Re0, #0 ! End of string? + BZ $Lnotfound ! If so exit + CMP D1Ar5, #8 ! Are we aligned 64bit yet? + BNZ $Lalign64bit ! If not keep aligning +$Laligned64bit: ! src is 64bit aligned + MOV D0Ar4, D0Ar2 ! put c into D0Ar4 + LSL D0Ar4, D0Ar4, #8 ! Shift it up + ADD D0Ar4, D0Ar4, D0Ar2 ! another c + LSL D0Ar4, D0Ar4, #8 ! shift + ADD D0Ar4, D0Ar4, D0Ar2 ! another c + LSL D0Ar4, D0Ar4, #8 ! shift + ADD D0Ar4, D0Ar4, D0Ar2 ! 4 copies of c +$Lcheck8bytes: + GETL D0Re0, D1Re0, [D1Ar1++] ! grab 16 bytes + MOV A0.3, D0Re0 ! save for later use + ! first word + ! check for \0 + MOV D0Ar2, D0Re0 ! D0Ar2 is a scratch now + ADDT D0Re0, D0Re0, #HI(0xfefefeff) ! Do 4 1-byte compares + ADD D0Re0, D0Re0, #LO(0xfefefeff) + XOR D0Ar2, D0Ar2, #-1 + AND D0Re0, D0Re0, D0Ar2 + ANDMT D0Re0, D0Re0, #HI(0x80808080) + ANDMB D0Re0, D0Re0, #LO(0x80808080) + CMP D0Re0, #0 + BNZ $Lnullinword1 ! found \0 (or c if c==\0) + + ! Check for c + MOV D0Re0, A0.3 ! restore the first word + XOR D0Re0, D0Re0, D0Ar4 + MOV D0Ar2, D0Re0 ! DO 4 1-byte compares + ADDT D0Re0, D0Re0, #HI(0xfefefeff) + ADD D0Re0, D0Re0, #LO(0xfefefeff) + XOR D0Ar2, D0Ar2, #-1 + AND D0Re0, D0Re0, D0Ar2 + ANDMT D0Re0, D0Re0, #HI(0x80808080) + ANDMB D0Re0, D0Re0, #LO(0x80808080) + CMP D0Re0, #0 + BNZ $Lcharinword1 ! found c + + ! second word + ! check for \0 + MOV A0.3, D1Re0 ! save for later use + MOV D1Ar3, D1Re0 + ADDT D1Re0, D1Re0, #HI(0xfefefeff) ! Do 4 1-byte compares + ADD D1Re0, D1Re0, #LO(0xfefefeff) + XOR D1Ar3, D1Ar3, #-1 + AND D1Re0, D1Re0, D1Ar3 + ANDMT D1Re0, D1Re0, #HI(0x80808080) + ANDMB D1Re0, D1Re0, #LO(0x80808080) + CMP D1Re0, #0 + BNZ $Lnullinword2 ! Found \0 (or c if c==\0) + + MOV D0.4, A0.3 ! restore the second word + XOR D1Re0, D0.4, D0Ar4 ! test c + + MOV D1Ar3, D1Re0 + ADDT D1Re0, D1Re0, #HI(0xfefefeff) ! Do 4 1-byte compares + ADD D1Re0, D1Re0, #LO(0xfefefeff) + XOR D1Ar3, D1Ar3, #-1 + AND D1Re0, D1Re0, D1Ar3 + ANDMT D1Re0, D1Re0, #HI(0x80808080) + ANDMB D1Re0, D1Re0, #LO(0x80808080) + CMP D1Re0, #0 + BNZ $Lcharinword2 ! found c + + B $Lcheck8bytes ! Keep checking + +$Lnullinword1: ! found \0 somewhere, check for c too + SUB D1Ar1, D1Ar1, #4 +$Lnullinword2: + SUB D1Ar1, D1Ar1, #4 + AND D0Ar2, D0Ar4, #0xff ! restore c + MOV D0Re0, A0.3 ! restore the word + MOV D0.4, D0Re0 ! for shifting later + AND D0Re0, D0Re0, #0xff ! take first byte of word + CMP D0Re0, D0Ar2 + BZ $Lcharatcurrent ! found c + CMP D0Re0, #0! + BZ $Lnotfound ! found \0 + + ADD D1Ar1, D1Ar1, #1 + LSR D0.4, D0.4, #8 + MOV D0Re0, D0.4 + AND D0Re0, D0Re0, #0xff ! take second byte of word + CMP D0Re0, D0Ar2 + BZ $Lcharatcurrent ! found c + CMP D0Re0, #0 + BZ $Lnotfound ! found \0 + + ADD D1Ar1, D1Ar1, #1 + LSR D0.4, D0.4, #8 + MOV D0Re0, D0.4 + AND D0Re0, D0Re0, #0xff ! take third byte of word + CMP D0Re0, D0Ar2 + BZ $Lcharatcurrent ! found c + CMP D0Re0, #0 + BZ $Lnotfound ! found \0 + + ADD D1Ar1, D1Ar1, #1 ! move to 4th byte + CMP D0Ar2, #0 ! If c was \0 + BZ $Lcharatcurrent ! c has been found! + +$Lnotfound: + MOV D0Re0, #0 ! End of string c not found + B $Lend + +$Lcharinword1: ! found c in first word + MOV D1Re0, D0Re0 + SUB D1Ar1, D1Ar1, #4 +$Lcharinword2: ! found c in second word + SUB D1Ar1, D1Ar1, #4 + + AND D0Re0, D1Re0, #0xff ! First byte + CMP D0Re0, #0 ! Test c (zero indicates c due + ! to the 4 1-byte compare code) + BNE $Lcharatcurrent + ADD D1Ar1, D1Ar1, #1 + + LSR D1Re0, D1Re0, #8 + AND D0Re0, D1Re0, #0xff ! Second byte + CMP D0Re0, #0 ! Test c (indicated by zero) + BNE $Lcharatcurrent + ADD D1Ar1, D1Ar1, #1 + + LSR D1Re0, D1Re0, #8 + AND D0Re0, D1Re0, #0xff ! Third byte + CMP D0Re0, #0 ! Test c (indicated by zero) + BNE $Lcharatcurrent + ADD D1Ar1, D1Ar1, #1 ! Must be the fourth byte + B $Lcharatcurrent + +$Lcharatprevious: + SUB D1Ar1, D1Ar1, #1 ! Fix-up pointer +$Lcharatcurrent: + MOV D0Re0, D1Ar1 ! Return the string pointer +$Lend: + MOV PC, D1RtP + .size _strchr,.-_strchr + +libc_hidden_def(strchr) +#ifdef __UCLIBC_SUSV3_LEGACY__ +strong_alias(strchr,index) +#endif diff --git a/libc/string/metag/strcmp.S b/libc/string/metag/strcmp.S new file mode 100644 index 000000000..3278ffaa5 --- /dev/null +++ b/libc/string/metag/strcmp.S @@ -0,0 +1,65 @@ +! Copyright (C) 2013 Imagination Technologies Ltd. + +! Licensed under LGPL v2.1 or later, see the file COPYING.LIB in this tarball. + + +#include + + .text + .global _strcmp + .type _strcmp,function +!D1Ar1 s1 +!D0Ar2 s2 +_strcmp: + TST D1Ar1,#3 + TSTZ D0Ar2,#3 + MOVT D1Re0,#0x0101 + ADD D1Re0,D1Re0,#0x0101 + BNZ $Lstrcmp_slow + GETD D1Ar3,[D1Ar1+#4++] ! Load 32-bits from s1 + GETD D1Ar5,[D0Ar2+#4++] ! Load 32-bits from s2 + LSL D0FrT,D1Re0,#7 ! D0FrT = 0x80808080 +$Lstrcmp4_loop: + SUB D0Re0,D1Ar3,D1Re0 ! D1Re0 = 0x01010101 + MOV D0Ar6,D1Ar3 + SUBS D0Ar4,D1Ar3,D1Ar5 ! Calculate difference + XOR D0Ar6,D0Ar6,#-1 + GETD D1Ar3,[D1Ar1+#4++] ! Load 32-bits from s1 + AND D0Re0,D0Re0,D0Ar6 + ANDSZ D0Ar6,D0Re0,D0FrT ! D0FrT = 0x80808080 + GETD D1Ar5,[D0Ar2+#4++] ! Load 32-bits from s2 + BZ $Lstrcmp4_loop + AND D0Ar6, D0Re0, D0FrT ! D0FrT = 0x80808080 +! +! Either they are different or they both contain a NULL + junk +! +$Lstrcmp4_end: + LSLS D0Re0,D0Ar4,#24 ! Was Byte[0] the same? + LSLSZ D0Ar2,D0Ar6,#24 ! Yes: AND they where not zero? + LSLSZ D0Re0,D0Ar4,#16 ! Yes: Was Byte[1] the same? + LSLSZ D0Ar2,D0Ar6,#16 ! Yes: AND they where not zero? + LSLSZ D0Re0,D0Ar4,#8 ! Tes: Was Byte[2] the same? + LSLSZ D0Ar2,D0Ar6,#8 ! Yes: AND they where not zero? + MOVZ D0Re0,D0Ar4 ! Yes: Must by Byte[3] thats the result + ASR D0Re0,D0Re0,#24 ! Sign extend result to integer + MOV PC,D1RtP +! +! Misaligned case, byte at a time +! +$Lstrcmp_slow: + GETB D1Ar3,[D1Ar1++] ! Load char from s1 + GETB D1Ar5,[D0Ar2++] ! Load char from s2 + CMP D1Ar3,#1 ! Null -> C and NZ, rest -> NC (\1->Z) + CMPNC D1Ar3,D1Ar5 ! NOT Null: Same -> Z, else -> NZ + BZ $Lstrcmp_slow ! NOT Null and Same: Loop + SUB D0Re0,D1Ar3,D1Ar5 ! Generate result + MOV PC,D1RtP + + .size _strcmp,.-_strcmp + + +libc_hidden_def(strcmp) +#ifndef __UCLIBC_HAS_LOCALE__ +strong_alias(strcmp,strcoll) +libc_hidden_def(strcoll) +#endif diff --git a/libc/string/metag/strcpy.S b/libc/string/metag/strcpy.S new file mode 100644 index 000000000..529ac9279 --- /dev/null +++ b/libc/string/metag/strcpy.S @@ -0,0 +1,94 @@ +! Copyright (C) 2013 Imagination Technologies Ltd. + +! Licensed under LGPL v2.1 or later, see the file COPYING.LIB in this tarball. + + + .text + .global _strcpy + .type _strcpy,function +! D1Ar1 dst +! D0Ar2 src + +_strcpy: + MOV A1.2, D1Ar1 + + ! test 4 byte alignment of src + ANDS D0Ar4, D0Ar2, #3 + BNZ $Lbyteloop + + ! test 4 byte alignment of dest + ANDS D1Ar5, D1Ar1, #3 + BNZ $Lbyteloop + + ! load mask values for aligned loops + MOVT D1Ar3, #HI(0xfefefeff) + ADD D1Ar3, D1Ar3, #LO(0xfefefeff) + MOVT D0FrT, #HI(0x80808080) + ADD D0FrT, D0FrT, #LO(0x80808080) + + ! test 8 byte alignment of src + ANDS D0Ar4, D0Ar2, #7 + BNZ $Lwordloop + + ! test 8 byte alignment of dest + ANDS D1Ar5, D1Ar1, #7 + BNZ $Lwordloop + +$L8byteloop: + GETL D1Ar5, D0Ar6, [D0Ar2++] + MOV D1Re0, D1Ar5 + MOV D0Re0, D1Ar5 + ADD D1Re0, D1Re0, D1Ar3 + XOR D0Re0, D0Re0, #-1 + AND D1Re0, D1Re0, D0Re0 + ANDS D1Re0, D1Re0, D0FrT + BNZ $Lnullfound ! NULL in first word + + MOV D1Re0, D0Ar6 + MOV D0Re0, D0Ar6 + ADD D1Re0, D1Re0, D1Ar3 + XOR D0Re0, D0Re0, #-1 + AND D1Re0, D1Re0, D0Re0 + ANDS D1Re0, D1Re0, D0FrT + BNZ $Lnullfound2 ! NULL in the second word + + SETL [A1.2++], D1Ar5, D0Ar6 + B $L8byteloop + +$Lwordloop: + GETD D0Ar6, [D0Ar2++] + MOV D1Re0, D0Ar6 + MOV D0Re0, D0Ar6 + ADD D1Re0, D1Re0, D1Ar3 + XOR D0Re0, D0Re0, #-1 + AND D1Re0, D1Re0, D0Re0 + ANDS D1Re0, D1Re0, D0FrT + MOV D1Ar5, D0Ar6 + BNZ $Lnullfound + SETD [A1.2++], D0Ar6 + B $Lwordloop + +$Lnullfound2: + SETD [A1.2++], D1Ar5 + MOV D1Ar5, D0Ar6 + +$Lnullfound: + SETB [A1.2++], D1Ar5 + ANDS D0Ar6, D1Ar5, #0xff + LSR D1Ar5, D1Ar5, #8 + BNZ $Lnullfound + B $Lend + +$Lbyteloop: + GETB D0Ar6, [D0Ar2++] + SETB [A1.2++], D0Ar6 + CMP D0Ar6, #0 + BNZ $Lbyteloop + +$Lend: + MOV D0Re0, D1Ar1 + MOV PC, D1RtP + + .size _strcpy,.-_strcpy + +libc_hidden_def(strcpy) -- cgit v1.2.3