diff options
| -rw-r--r-- | libc/string/mips/memcpy.S | 119 | ||||
| -rw-r--r-- | libc/string/mips/memset.S | 71 | 
2 files changed, 186 insertions, 4 deletions
| diff --git a/libc/string/mips/memcpy.S b/libc/string/mips/memcpy.S index 155bc1128..9b05ee6da 100644 --- a/libc/string/mips/memcpy.S +++ b/libc/string/mips/memcpy.S @@ -22,11 +22,124 @@  #include <endian.h>  #include "sysdep.h" +/* void *memcpy(void *s1, const void *s2, size_t n);  */ +  #ifdef __mips64 -#error mips32 code being compiled for mips64! + +#include <sys/asm.h> + +#if __BYTE_ORDER == __BIG_ENDIAN +#  define LDHI	ldl		/* high part is left in big-endian	*/ +#  define SDHI	sdl		/* high part is left in big-endian	*/ +#  define LDLO	ldr		/* low part is right in big-endian	*/ +#  define SDLO	sdr		/* low part is right in big-endian	*/ +#else +#  define LDHI	ldr		/* high part is right in little-endian	*/ +#  define SDHI	sdr		/* high part is right in little-endian	*/ +#  define LDLO	ldl		/* low part is left in little-endian	*/ +#  define SDLO	sdl		/* low part is left in little-endian	*/  #endif -/* void *memcpy(void *s1, const void *s2, size_t n);  */ +ENTRY (memcpy) +	.set	noreorder + +	slti	t0, a2, 16		# Less than 16? +	bne	t0, zero, L(last16) +	move	v0, a0			# Setup exit value before too late + +	xor	t0, a1, a0		# Find a0/a1 displacement +	andi	t0, 0x7 +	bne	t0, zero, L(shift)	# Go handle the unaligned case +	PTR_SUBU t1, zero, a1 +	andi	t1, 0x7			# a0/a1 are aligned, but are we +	beq	t1, zero, L(chk8w)	#  starting in the middle of a word? +	PTR_SUBU a2, t1 +	LDHI	t0, 0(a1)		# Yes we are... take care of that +	PTR_ADDU a1, t1 +	SDHI	t0, 0(a0) +	PTR_ADDU a0, t1 + +L(chk8w): +	andi	t0, a2, 0x3f		# 64 or more bytes left? +	beq	t0, a2, L(chk1w) +	PTR_SUBU a3, a2, t0		# Yes +	PTR_ADDU a3, a1			# a3 = end address of loop +	move	a2, t0			# a2 = what will be left after loop +L(lop8w):	 +	ld	t0,  0(a1)		# Loop taking 8 words at a time +	ld	t1,  8(a1) +	ld	t2, 16(a1) +	ld	t3, 24(a1) +	ld	ta0, 32(a1) +	ld	ta1, 40(a1) +	ld	ta2, 48(a1) +	ld	ta3, 56(a1) +	PTR_ADDIU a0, 64 +	PTR_ADDIU a1, 64 +	sd	t0, -64(a0) +	sd	t1, -56(a0) +	sd	t2, -48(a0) +	sd	t3, -40(a0) +	sd	ta0, -32(a0) +	sd	ta1, -24(a0) +	sd	ta2, -16(a0) +	bne	a1, a3, L(lop8w) +	sd	ta3,  -8(a0) + +L(chk1w): +	andi	t0, a2, 0x7		# 8 or more bytes left? +	beq	t0, a2, L(last16) +	PTR_SUBU a3, a2, t0		# Yes, handle them one dword at a time +	PTR_ADDU a3, a1			# a3 again end address +	move	a2, t0 +L(lop1w): +	ld	t0, 0(a1) +	PTR_ADDIU a0, 8 +	PTR_ADDIU a1, 8 +	bne	a1, a3, L(lop1w) +	sd	t0, -8(a0) + +L(last16): +	blez	a2, L(lst16e)		# Handle last 16 bytes, one at a time +	PTR_ADDU a3, a2, a1 +L(lst16l): +	lb	t0, 0(a1) +	PTR_ADDIU a0, 1 +	PTR_ADDIU a1, 1 +	bne	a1, a3, L(lst16l) +	sb	t0, -1(a0) +L(lst16e): +	jr	ra			# Bye, bye +	nop + +L(shift): +	PTR_SUBU a3, zero, a0		# Src and Dest unaligned  +	andi	a3, 0x7			#  (unoptimized case...) +	beq	a3, zero, L(shft1) +	PTR_SUBU a2, a3			# a2 = bytes left +	LDHI	t0, 0(a1)		# Take care of first odd part +	LDLO	t0, 7(a1) +	PTR_ADDU a1, a3 +	SDHI	t0, 0(a0) +	PTR_ADDU a0, a3 +L(shft1): +	andi	t0, a2, 0x7 +	PTR_SUBU a3, a2, t0 +	PTR_ADDU a3, a1 +L(shfth): +	LDHI	t1, 0(a1)		# Limp through, dword by dword +	LDLO	t1, 7(a1) +	PTR_ADDIU a0, 8 +	PTR_ADDIU a1, 8 +	bne	a1, a3, L(shfth) +	sd	t1, -8(a0) +	b	L(last16)		# Handle anything which may be left +	move	a2, t0 + +	.set	reorder +END (memcpy) + +#else /* !__mips64 */  #if __BYTE_ORDER == __BIG_ENDIAN  #  define LWHI	lwl		/* high part is left in big-endian	*/ @@ -139,4 +252,6 @@ L(shfth):  	.set	reorder  END (memcpy) +#endif /* !__mips64 */ +  libc_hidden_def(memcpy) diff --git a/libc/string/mips/memset.S b/libc/string/mips/memset.S index 9169ad58a..ff0554ff9 100644 --- a/libc/string/mips/memset.S +++ b/libc/string/mips/memset.S @@ -22,11 +22,76 @@  #include <endian.h>  #include "sysdep.h" +/* void *memset(void *s, int c, size_t n).  */ +  #ifdef __mips64 -#error mips32 code being compiled for mips64! + +#include <sys/asm.h> + +#if __BYTE_ORDER == __BIG_ENDIAN +# define SDHI	sdl		/* high part is left in big-endian	*/ +#else +# define SDHI	sdr		/* high part is right in little-endian	*/  #endif -/* void *memset(void *s, int c, size_t n).  */ +ENTRY (memset) +	.set	noreorder + +	slti	ta1, a2, 16		# Less than 16? +	bne	ta1, zero, L(last16) +	move	v0, a0			# Setup exit value before too late + +	beq	a1, zero, L(ueven)	# If zero pattern, no need to extend +	andi	a1, 0xff		# Avoid problems with bogus arguments +	dsll	ta0, a1, 8 +	or	a1, ta0 +	dsll	ta0, a1, 16 +	or	a1, ta0			# a1 is now pattern in full word +	dsll	ta0, a1, 32 +	or	a1, ta0			# a1 is now pattern in double word + +L(ueven): +	PTR_SUBU ta0, zero, a0		# Unaligned address? +	andi	ta0, 0x7 +	beq	ta0, zero, L(chkw) +	PTR_SUBU a2, ta0 +	SDHI	a1, 0(a0)		# Yes, handle first unaligned part +	PTR_ADDU a0, ta0		# Now both a0 and a2 are updated + +L(chkw): +	andi	ta0, a2, 0xf		# Enough left for one loop iteration? +	beq	ta0, a2, L(chkl) +	PTR_SUBU a3, a2, ta0 +	PTR_ADDU a3, a0			# a3 is last loop address +1 +	move	a2, ta0			# a2 is now # of bytes left after loop +L(loopw): +	PTR_ADDIU a0, 16		# Handle 2 dwords pr. iteration +	sd	a1, -16(a0) +	bne	a0, a3, L(loopw) +	sd	a1,  -8(a0) + +L(chkl): +	andi	ta0, a2, 0x8		# Check if there is at least a double +	beq	ta0, zero, L(last16)	#  word remaining after the loop +	PTR_SUBU a2, ta0 +	sd	a1, 0(a0)		# Yes... +	PTR_ADDIU a0, 8 + +L(last16): +	blez	a2, L(exit)		# Handle last 16 bytes (if cnt>0) +	PTR_ADDU a3, a2, a0		# a3 is last address +1 +L(lst16l): +	PTR_ADDIU a0, 1 +	bne	a0, a3, L(lst16l) +	sb	a1, -1(a0) +L(exit): +	j	ra			# Bye, bye +	nop + +	.set	reorder +END (memset) + +#else /* !__mips64 */  #if __BYTE_ORDER == __BIG_ENDIAN  # define SWHI	swl		/* high part is left in big-endian	*/ @@ -89,4 +154,6 @@ L(exit):  	.set	reorder  END (memset) +#endif /* !__mips64 */ +  libc_hidden_def(memset) | 
