summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Andersen <andersen@codepoet.org>2002-02-04 10:07:51 +0000
committerEric Andersen <andersen@codepoet.org>2002-02-04 10:07:51 +0000
commitfea1b23bcf0b9ef29af0968f5adf1e822ca874ae (patch)
tree76c4a02bf12795a3f5b6b4698a8174e8d84d0485
parent59c9d20af6744d27285c9a505577308bd1f82781 (diff)
Adjust sparc port do it now actually works.
-Erik
-rw-r--r--libc/sysdeps/linux/sparc/Makefile11
-rw-r--r--libc/sysdeps/linux/sparc/__longjmp.S50
-rw-r--r--libc/sysdeps/linux/sparc/brk.c49
-rw-r--r--libc/sysdeps/linux/sparc/bsd-_setjmp.S1
-rw-r--r--libc/sysdeps/linux/sparc/bsd-setjmp.S1
-rw-r--r--libc/sysdeps/linux/sparc/clone.S72
-rw-r--r--libc/sysdeps/linux/sparc/crt0.S90
-rw-r--r--libc/sysdeps/linux/sparc/crt0.c (renamed from libc/sysdeps/linux/sparc/fork.c)34
-rw-r--r--libc/sysdeps/linux/sparc/fork.S46
-rw-r--r--libc/sysdeps/linux/sparc/rem.S367
-rw-r--r--libc/sysdeps/linux/sparc/sdiv.S366
-rw-r--r--libc/sysdeps/linux/sparc/setjmp.S23
-rw-r--r--libc/sysdeps/linux/sparc/udiv.S348
-rw-r--r--libc/sysdeps/linux/sparc/umul.S160
-rw-r--r--libc/sysdeps/linux/sparc/urem.S350
15 files changed, 1820 insertions, 148 deletions
diff --git a/libc/sysdeps/linux/sparc/Makefile b/libc/sysdeps/linux/sparc/Makefile
index 9e9ffb66d..2a53b8e17 100644
--- a/libc/sysdeps/linux/sparc/Makefile
+++ b/libc/sysdeps/linux/sparc/Makefile
@@ -25,13 +25,14 @@ TOPDIR=../../../../
include $(TOPDIR)Rules.mak
ASFLAGS=$(CFLAGS)
-CRT0=crt0.S
-CRT0_OBJ=$(patsubst %.S,%.o, $(CRT0))
+CRT0=crt0.c
+CRT0_OBJ=$(patsubst %.c,%.o, $(CRT0))
-SSRC=__longjmp.S setjmp.S vfork.S
+SSRC=__longjmp.S fork.S vfork.S clone.S setjmp.S bsd-setjmp.S bsd-_setjmp.S \
+ urem.S udiv.S umul.S sdiv.S rem.S
SOBJS=$(patsubst %.S,%.o, $(SSRC))
-CSRC=fork.c
+CSRC=brk.c
COBJS=$(patsubst %.c,%.o, $(CSRC))
OBJS=$(SOBJS) $(MOBJ) $(COBJS)
@@ -46,7 +47,7 @@ ar-target: $(OBJS) $(CRT0_OBJ)
$(AR) $(ARFLAGS) $(LIBC) $(OBJS)
cp $(CRT0_OBJ) $(TOPDIR)libc/$(CRT0_OBJ)
-$(CRT0_OBJ): %.o : %.S
+$(CRT0_OBJ): %.o : %.c
$(CC) $(CFLAGS) -c $< -o $@
$(STRIPTOOL) -x -R .note -R .comment $*.o
diff --git a/libc/sysdeps/linux/sparc/__longjmp.S b/libc/sysdeps/linux/sparc/__longjmp.S
index 54af3c27a..2d568d381 100644
--- a/libc/sysdeps/linux/sparc/__longjmp.S
+++ b/libc/sysdeps/linux/sparc/__longjmp.S
@@ -2,21 +2,19 @@
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
+ Lesser General Public License for more details.
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
-
-/* Code taken from glibc/sysdeps/sparc/sparc32/ (glibc 2.2.2) */
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
#include <sysdep.h>
@@ -27,7 +25,11 @@
#define ST_FLUSH_WINDOWS 3
#define RW_FP [%fp + 0x48]
-ENTRY(__longjmp)
+.global __longjmp;
+.align 4;
+__longjmp: ;
+.type __longjmp ,@function;
+
/* Store our arguments in global registers so we can still
use them while unwinding frames and their register windows. */
@@ -40,27 +42,21 @@ ENTRY(__longjmp)
xor %fp, %g3, %o0
add %fp, 512, %o1
andncc %o0, 4095, %o0
- bne thread
- //bne LOC(thread)
+ bne .Lthread
cmp %o1, %g3
- bl thread
- //bl LOC(thread)
+ bl .Lthread
/* Now we will loop, unwinding the register windows up the stack
until the restored %fp value matches the target value in %g3. */
-//LOC(loop):
-loop:
+.Lloop:
cmp %fp, %g3 /* Have we reached the target frame? */
- bl,a loop /* Loop while current fp is below target. */
- //bl,a LOC(loop) /* Loop while current fp is below target. */
+ bl,a .Lloop /* Loop while current fp is below target. */
restore /* Unwind register window in delay slot. */
- be,a found /* Better have hit it exactly. */
- //be,a LOC(found) /* Better have hit it exactly. */
+ be,a .Lfound /* Better have hit it exactly. */
ld ENV(g1,JB_SP), %o0 /* Delay slot: extract target SP. */
-thread:
-//LOC(thread):
+.Lthread:
/*
* Do a "flush register windows trap". The trap handler in the
* kernel writes all the register windows to their stack slots, and
@@ -77,15 +73,13 @@ thread:
retl
restore %g2, 0, %o0 /* Restore values from above register frame. */
-found:
-//LOC(found):
+.Lfound:
/* We have unwound register windows so %fp matches the target. */
mov %o0, %sp /* OK, install new SP. */
-//LOC(sp_ok):
-sp_ok:
+.Lsp_ok:
ld ENV(g1,JB_PC), %o0 /* Extract target return PC. */
jmp %o0 + 8 /* Return there. */
mov %g2, %o0 /* Delay slot: set return value. */
-END(__longjmp)
+.size __longjmp , . - __longjmp
diff --git a/libc/sysdeps/linux/sparc/brk.c b/libc/sysdeps/linux/sparc/brk.c
new file mode 100644
index 000000000..16a5e3942
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/brk.c
@@ -0,0 +1,49 @@
+/* brk system call for Linux/i386.
+ Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <errno.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+
+/* This must be initialized data because commons can't have aliases. */
+void *___brk_addr = 0;
+
+
+int brk (void *addr)
+{
+ void *newbrk;
+
+ {
+ register void *o0 __asm__("%o0") = addr;
+ register int g1 __asm__("%g1") = 17 ;
+ __asm ("t 0x10" : "=r"(o0) : "r"(g1), "0"(o0) : "cc");
+ newbrk = o0;
+ }
+
+ ___brk_addr = newbrk;
+
+ if (newbrk < addr)
+ {
+ __set_errno (ENOMEM);
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/libc/sysdeps/linux/sparc/bsd-_setjmp.S b/libc/sysdeps/linux/sparc/bsd-_setjmp.S
new file mode 100644
index 000000000..4e6a2da56
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/bsd-_setjmp.S
@@ -0,0 +1 @@
+/* _setjmp is in setjmp.S */
diff --git a/libc/sysdeps/linux/sparc/bsd-setjmp.S b/libc/sysdeps/linux/sparc/bsd-setjmp.S
new file mode 100644
index 000000000..1da848d2f
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/bsd-setjmp.S
@@ -0,0 +1 @@
+/* setjmp is in setjmp.S */
diff --git a/libc/sysdeps/linux/sparc/clone.S b/libc/sysdeps/linux/sparc/clone.S
new file mode 100644
index 000000000..9af88688c
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/clone.S
@@ -0,0 +1,72 @@
+/* Copyright (C) 1996, 1997, 1998, 2000 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Richard Henderson (rth@tamu.edu).
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/* clone() is even more special than fork() as it mucks with stacks
+ and invokes a function in the right context after its all over. */
+
+#include <asm/errno.h>
+#include <asm/unistd.h>
+
+/* int clone(int (*fn)(void *arg), void *child_stack, int flags, void *arg); */
+
+ .text
+ .align 4
+ .globl __clone
+ .type __clone,@function
+
+__clone:
+ save %sp,-96,%sp
+
+ /* sanity check arguments */
+ tst %i0
+ be .Lerror
+ orcc %i1,%g0,%o1
+ be .Lerror
+ mov %i2,%o0
+
+ /* Do the system call */
+ set __NR_clone,%g1
+ ta 0x10
+ bcs .Lerror
+ tst %o1
+ bne __thread_start
+ nop
+ ret
+ restore %o0,%g0,%o0
+
+.Lerror:
+ call __errno_location
+ or %g0,EINVAL,%i0
+ st %i0,[%o0]
+ ret
+ restore %g0,-1,%o0
+
+ .size __clone, .-__clone
+
+ .type __thread_start,@function
+
+__thread_start:
+ call %i0
+ mov %i3,%o0
+ call _exit,0
+ nop
+
+ .size __thread_start, .-__thread_start
+
+.weak clone ; clone = __clone
diff --git a/libc/sysdeps/linux/sparc/crt0.S b/libc/sysdeps/linux/sparc/crt0.S
deleted file mode 100644
index 7910131db..000000000
--- a/libc/sysdeps/linux/sparc/crt0.S
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * xrt0.s for ERC32.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 675
- * Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-/* code taken from leonccs 1.0 leon/src/libio/crt0.S
- I don't know if this is available anymore, now that LECCS
- is out. And I'm not sure if this source is in the LECCS distro :(
-*/
-
- .text
-! Original :
-! .global __start, _main
-! uC-libc version :
- .global _start
- .global __uClibc_main
-
-! Start the real-time clock with a tick of 14 clocks
-!
-
-_start:
-
- save %sp, -64, %sp
-
- /* clear the bss */
-
- sethi %hi(edata),%g2
- or %g2,%lo(edata),%g2 ! g2 = start of bss
- sethi %hi(_end),%g3
- or %g3,%lo(_end),%g3 ! g3 = end of bss
- mov %g0,%g1 ! so std has two zeros
-zerobss:
- std %g0,[%g2]
- add %g2,8,%g2
- cmp %g2,%g3
- bleu,a zerobss
- nop
-
- /* move data segment to proper location */
-
-relocd:
- set (_endtext),%g2 ! g2 = start of data in aout file
- set (_environ),%g4 ! g4 = start of where data should go
- set (_edata),%g3 ! g3 = end of where data should go
- subcc %g3, %g4, %g5 ! g5 = length of data
-
- subcc %g4, %g2, %g0 ! need to relocate data ?
- ble initok
- ld [%g4], %g6
-! subcc %g6, 1, %g0
-! be initok
-mvdata:
- subcc %g5, 8, %g5
- ldd [%g2 + %g5], %g6
- bg mvdata
- std %g6, [%g4 + %g5]
-
-initok:
-
-! call _main
- call __uClibc_main
- nop
-! Should not return from uClibc main()
-! ret
-! nop
-
- .seg "data"
- .global .bdata
-.bdata:
- .align 8
- .global _environ ! first symbol in sdata
-_environ:
- .word 1
-
-
-
diff --git a/libc/sysdeps/linux/sparc/fork.c b/libc/sysdeps/linux/sparc/crt0.c
index 6b3a34f98..02805fdde 100644
--- a/libc/sysdeps/linux/sparc/fork.c
+++ b/libc/sysdeps/linux/sparc/crt0.c
@@ -1,9 +1,8 @@
/* vi: set sw=4 ts=4: */
-/* fork for uClibc
+/* uClibc/sysdeps/linux/i386/crt0.S
+ * Pull stuff off the stack and get uClibc moving.
*
- * Copyright (C) 2000 by Lineo, inc. and Erik Andersen
* Copyright (C) 2000,2001 by Erik Andersen <andersen@uclibc.org>
- * Written by Erik Andersen <andersen@uclibc.org>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU Library General Public License as published by
@@ -20,15 +19,26 @@
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-#include <errno.h>
-#include <features.h>
-#include <sys/types.h>
-#include <sys/syscall.h>
+extern void __uClibc_main(int argc,void *argv,void *envp);
-#ifndef __HAS_NO_MMU__
+/* a little bit of stuff to support C++ */
+__asm__(".section .ctors,\"aw\"\n.align 4\n.global __CTOR_LIST__\n"
+ "__CTOR_LIST__:\n.long -1\n");
-//#define __NR_fork 2
-#include <unistd.h>
-_syscall0(pid_t, fork);
+__asm__(".section .dtors,\"aw\"\n.align 4\n.global __DTOR_LIST__\n"
+ "__DTOR_LIST__:\n.long -1\n");
+
+void _start(unsigned int first_arg)
+{
+ unsigned int argc;
+ char **argv, **envp;
+ unsigned long *stack;
+
+ stack = (unsigned long*) &first_arg;
+ argc = *(stack - 1);
+ argv = (char **) stack;
+ envp = (char **)stack + argc + 1;
+
+ __uClibc_main(argc, argv, envp);
+}
-#endif
diff --git a/libc/sysdeps/linux/sparc/fork.S b/libc/sysdeps/linux/sparc/fork.S
new file mode 100644
index 000000000..6bab58c81
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/fork.S
@@ -0,0 +1,46 @@
+/* Copyright (C) 1991, 92, 94, 95, 97, 99 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+
+.text;
+.global fork;
+.align 4;
+fork: ;
+.type fork ,@function; ;
+
+ mov 2, %g1 ;
+ ta 0x10;
+ bcc,a 9000f;
+ nop;
+ save %sp,-96,%sp;
+ call __errno_location;
+ nop;
+ st %i0,[%o0];
+ jmpl %i7+8,%g0;
+ restore %g0,-1,%o0; ;
+ 9000:;
+
+ /* %o1 is now 0 for the parent and 1 for the child. Decrement it to
+ make it -1 (all bits set) for the parent, and 0 (no bits set)
+ for the child. Then AND it with %o0, so the parent gets
+ %o0&-1==pid, and the child gets %o0&0==0. */
+ sub %o1, 1, %o1
+ retl
+ and %o0, %o1, %o0
+
diff --git a/libc/sysdeps/linux/sparc/rem.S b/libc/sysdeps/linux/sparc/rem.S
new file mode 100644
index 000000000..52bf167a4
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/rem.S
@@ -0,0 +1,367 @@
+ /* This file is generated from divrem.m4; DO NOT EDIT! */
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ * .rem name of function to generate
+ * rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1
+ * true true=true => signed; true=false => unsigned
+ *
+ * Algorithm parameters:
+ * N how many bits per iteration we try to get (4)
+ * WORDSIZE total number of bits (32)
+ *
+ * Derived constants:
+ * TOPBITS number of bits in the top decade of a number
+ *
+ * Important variables:
+ * Q the partial quotient under development (initially 0)
+ * R the remainder so far, initially the dividend
+ * ITER number of main division loop iterations required;
+ * equal to ceil(log2(quotient) / N). Note that this
+ * is the log base (2^N) of the quotient.
+ * V the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ * Current estimate for non-large dividend is
+ * ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ * A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ * different path, as the upper bits of the quotient must be developed
+ * one bit at a time.
+ */
+
+
+
+#include <sysdep.h>
+
+
+.global .rem;
+.align 4;
+.type .rem ,@function;
+
+.rem:
+ ! compute sign of result; if neither is negative, no problem
+ orcc %o1, %o0, %g0 ! either negative?
+ bge 2f ! no, go do the divide
+ mov %o0, %g3 ! sign of remainder matches %o0
+ tst %o1
+ bge 1f
+ tst %o0
+ ! %o1 is definitely negative; %o0 might also be negative
+ bge 2f ! if %o0 not negative...
+ sub %g0, %o1, %o1 ! in any case, make %o1 nonneg
+1: ! %o0 is negative, %o1 is nonnegative
+ sub %g0, %o0, %o0 ! make %o0 nonnegative
+2:
+
+ ! Ready to divide. Compute size of quotient; scale comparand.
+ orcc %o1, %g0, %o5
+ bne 1f
+ mov %o0, %o3
+
+ ! Divide by zero trap. If it returns, return 0 (about as
+ ! wrong as possible, but that is what SunOS does...).
+ ta 0x02
+ retl
+ clr %o0
+
+1:
+ cmp %o3, %o5 ! if %o1 exceeds %o0, done
+ blu .Lgot_result ! (and algorithm fails otherwise)
+ clr %o2
+ sethi %hi(1 << (32 - 4 - 1)), %g1
+ cmp %o3, %g1
+ blu .Lnot_really_big
+ clr %o4
+
+ ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
+ ! as our usual N-at-a-shot divide step will cause overflow and havoc.
+ ! The number of bits in the result here is N*ITER+SC, where SC <= N.
+ ! Compute ITER in an unorthodox manner: know we need to shift V into
+ ! the top decade: so do not even bother to compare to R.
+ 1:
+ cmp %o5, %g1
+ bgeu 3f
+ mov 1, %g2
+ sll %o5, 4, %o5
+ b 1b
+ add %o4, 1, %o4
+
+ ! Now compute %g2.
+ 2: addcc %o5, %o5, %o5
+ bcc .Lnot_too_big
+ add %g2, 1, %g2
+
+ ! We get here if the %o1 overflowed while shifting.
+ ! This means that %o3 has the high-order bit set.
+ ! Restore %o5 and subtract from %o3.
+ sll %g1, 4, %g1 ! high order bit
+ srl %o5, 1, %o5 ! rest of %o5
+ add %o5, %g1, %o5
+ b .Ldo_single_div
+ sub %g2, 1, %g2
+
+ .Lnot_too_big:
+ 3: cmp %o5, %o3
+ blu 2b
+ nop
+ be .Ldo_single_div
+ nop
+ /* NB: these are commented out in the V8-Sparc manual as well */
+ /* (I do not understand this) */
+ ! %o5 > %o3: went too far: back up 1 step
+ ! srl %o5, 1, %o5
+ ! dec %g2
+ ! do single-bit divide steps
+ !
+ ! We have to be careful here. We know that %o3 >= %o5, so we can do the
+ ! first divide step without thinking. BUT, the others are conditional,
+ ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
+ ! order bit set in the first step, just falling into the regular
+ ! division loop will mess up the first time around.
+ ! So we unroll slightly...
+ .Ldo_single_div:
+ subcc %g2, 1, %g2
+ bl .Lend_regular_divide
+ nop
+ sub %o3, %o5, %o3
+ mov 1, %o2
+ b .Lend_single_divloop
+ nop
+ .Lsingle_divloop:
+ sll %o2, 1, %o2
+ bl 1f
+ srl %o5, 1, %o5
+ ! %o3 >= 0
+ sub %o3, %o5, %o3
+ b 2f
+ add %o2, 1, %o2
+ 1: ! %o3 < 0
+ add %o3, %o5, %o3
+ sub %o2, 1, %o2
+ 2:
+ .Lend_single_divloop:
+ subcc %g2, 1, %g2
+ bge .Lsingle_divloop
+ tst %o3
+ b,a .Lend_regular_divide
+
+.Lnot_really_big:
+1:
+ sll %o5, 4, %o5
+ cmp %o5, %o3
+ bleu 1b
+ addcc %o4, 1, %o4
+ be .Lgot_result
+ sub %o4, 1, %o4
+
+ tst %o3 ! set up for initial iteration
+.Ldivloop:
+ sll %o2, 4, %o2
+ ! depth 1, accumulated bits 0
+ bl .L1.16
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 2, accumulated bits 1
+ bl .L2.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 3
+ bl .L3.19
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 7
+ bl .L4.23
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2+1), %o2
+
+.L4.23:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2-1), %o2
+
+
+.L3.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 5
+ bl .L4.21
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2+1), %o2
+
+.L4.21:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2-1), %o2
+
+
+
+.L2.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 1
+ bl .L3.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 3
+ bl .L4.19
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2+1), %o2
+
+.L4.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2-1), %o2
+
+
+.L3.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 1
+ bl .L4.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2+1), %o2
+
+.L4.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2-1), %o2
+
+
+
+
+.L1.16:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 2, accumulated bits -1
+ bl .L2.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -1
+ bl .L3.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -1
+ bl .L4.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2+1), %o2
+
+.L4.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2-1), %o2
+
+
+.L3.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -3
+ bl .L4.13
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2+1), %o2
+
+.L4.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2-1), %o2
+
+
+
+.L2.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -3
+ bl .L3.13
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -5
+ bl .L4.11
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2+1), %o2
+
+.L4.11:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2-1), %o2
+
+
+.L3.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -7
+ bl .L4.9
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2+1), %o2
+
+.L4.9:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2-1), %o2
+
+
+
+
+ 9:
+.Lend_regular_divide:
+ subcc %o4, 1, %o4
+ bge .Ldivloop
+ tst %o3
+ bl,a .Lgot_result
+ ! non-restoring fixup here (one instruction only!)
+ add %o3, %o1, %o3
+
+
+.Lgot_result:
+ ! check to see if answer should be < 0
+ tst %g3
+ bl,a 1f
+ sub %g0, %o3, %o3
+1:
+ retl
+ mov %o3, %o0
+
+END(.rem)
diff --git a/libc/sysdeps/linux/sparc/sdiv.S b/libc/sysdeps/linux/sparc/sdiv.S
new file mode 100644
index 000000000..678a19413
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/sdiv.S
@@ -0,0 +1,366 @@
+ /* This file is generated from divrem.m4; DO NOT EDIT! */
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ * .div name of function to generate
+ * div div=div => %o0 / %o1; div=rem => %o0 % %o1
+ * true true=true => signed; true=false => unsigned
+ *
+ * Algorithm parameters:
+ * N how many bits per iteration we try to get (4)
+ * WORDSIZE total number of bits (32)
+ *
+ * Derived constants:
+ * TOPBITS number of bits in the top decade of a number
+ *
+ * Important variables:
+ * Q the partial quotient under development (initially 0)
+ * R the remainder so far, initially the dividend
+ * ITER number of main division loop iterations required;
+ * equal to ceil(log2(quotient) / N). Note that this
+ * is the log base (2^N) of the quotient.
+ * V the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ * Current estimate for non-large dividend is
+ * ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ * A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ * different path, as the upper bits of the quotient must be developed
+ * one bit at a time.
+ */
+
+
+
+#include <sysdep.h>
+
+.global .div;
+.align 4;
+.type .div ,@function;
+
+.div:
+ ! compute sign of result; if neither is negative, no problem
+ orcc %o1, %o0, %g0 ! either negative?
+ bge 2f ! no, go do the divide
+ xor %o1, %o0, %g3 ! compute sign in any case
+ tst %o1
+ bge 1f
+ tst %o0
+ ! %o1 is definitely negative; %o0 might also be negative
+ bge 2f ! if %o0 not negative...
+ sub %g0, %o1, %o1 ! in any case, make %o1 nonneg
+1: ! %o0 is negative, %o1 is nonnegative
+ sub %g0, %o0, %o0 ! make %o0 nonnegative
+2:
+
+ ! Ready to divide. Compute size of quotient; scale comparand.
+ orcc %o1, %g0, %o5
+ bne 1f
+ mov %o0, %o3
+
+ ! Divide by zero trap. If it returns, return 0 (about as
+ ! wrong as possible, but that is what SunOS does...).
+ ta 0x02
+ retl
+ clr %o0
+
+1:
+ cmp %o3, %o5 ! if %o1 exceeds %o0, done
+ blu .Lgot_result ! (and algorithm fails otherwise)
+ clr %o2
+ sethi %hi(1 << (32 - 4 - 1)), %g1
+ cmp %o3, %g1
+ blu .Lnot_really_big
+ clr %o4
+
+ ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
+ ! as our usual N-at-a-shot divide step will cause overflow and havoc.
+ ! The number of bits in the result here is N*ITER+SC, where SC <= N.
+ ! Compute ITER in an unorthodox manner: know we need to shift V into
+ ! the top decade: so do not even bother to compare to R.
+ 1:
+ cmp %o5, %g1
+ bgeu 3f
+ mov 1, %g2
+ sll %o5, 4, %o5
+ b 1b
+ add %o4, 1, %o4
+
+ ! Now compute %g2.
+ 2: addcc %o5, %o5, %o5
+ bcc .Lnot_too_big
+ add %g2, 1, %g2
+
+ ! We get here if the %o1 overflowed while shifting.
+ ! This means that %o3 has the high-order bit set.
+ ! Restore %o5 and subtract from %o3.
+ sll %g1, 4, %g1 ! high order bit
+ srl %o5, 1, %o5 ! rest of %o5
+ add %o5, %g1, %o5
+ b .Ldo_single_div
+ sub %g2, 1, %g2
+
+ .Lnot_too_big:
+ 3: cmp %o5, %o3
+ blu 2b
+ nop
+ be .Ldo_single_div
+ nop
+ /* NB: these are commented out in the V8-Sparc manual as well */
+ /* (I do not understand this) */
+ ! %o5 > %o3: went too far: back up 1 step
+ ! srl %o5, 1, %o5
+ ! dec %g2
+ ! do single-bit divide steps
+ !
+ ! We have to be careful here. We know that %o3 >= %o5, so we can do the
+ ! first divide step without thinking. BUT, the others are conditional,
+ ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
+ ! order bit set in the first step, just falling into the regular
+ ! division loop will mess up the first time around.
+ ! So we unroll slightly...
+ .Ldo_single_div:
+ subcc %g2, 1, %g2
+ bl .Lend_regular_divide
+ nop
+ sub %o3, %o5, %o3
+ mov 1, %o2
+ b .Lend_single_divloop
+ nop
+ .Lsingle_divloop:
+ sll %o2, 1, %o2
+ bl 1f
+ srl %o5, 1, %o5
+ ! %o3 >= 0
+ sub %o3, %o5, %o3
+ b 2f
+ add %o2, 1, %o2
+ 1: ! %o3 < 0
+ add %o3, %o5, %o3
+ sub %o2, 1, %o2
+ 2:
+ .Lend_single_divloop:
+ subcc %g2, 1, %g2
+ bge .Lsingle_divloop
+ tst %o3
+ b,a .Lend_regular_divide
+
+.Lnot_really_big:
+1:
+ sll %o5, 4, %o5
+ cmp %o5, %o3
+ bleu 1b
+ addcc %o4, 1, %o4
+ be .Lgot_result
+ sub %o4, 1, %o4
+
+ tst %o3 ! set up for initial iteration
+.Ldivloop:
+ sll %o2, 4, %o2
+ ! depth 1, accumulated bits 0
+ bl .L1.16
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 2, accumulated bits 1
+ bl .L2.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 3
+ bl .L3.19
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 7
+ bl .L4.23
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2+1), %o2
+
+.L4.23:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2-1), %o2
+
+
+.L3.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 5
+ bl .L4.21
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2+1), %o2
+
+.L4.21:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2-1), %o2
+
+
+
+.L2.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 1
+ bl .L3.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 3
+ bl .L4.19
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2+1), %o2
+
+.L4.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2-1), %o2
+
+
+.L3.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 1
+ bl .L4.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2+1), %o2
+
+.L4.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2-1), %o2
+
+
+
+
+.L1.16:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 2, accumulated bits -1
+ bl .L2.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -1
+ bl .L3.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -1
+ bl .L4.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2+1), %o2
+
+.L4.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2-1), %o2
+
+
+.L3.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -3
+ bl .L4.13
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2+1), %o2
+
+.L4.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2-1), %o2
+
+
+
+.L2.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -3
+ bl .L3.13
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -5
+ bl .L4.11
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2+1), %o2
+
+.L4.11:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2-1), %o2
+
+
+.L3.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -7
+ bl .L4.9
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2+1), %o2
+
+.L4.9:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2-1), %o2
+
+
+
+
+ 9:
+.Lend_regular_divide:
+ subcc %o4, 1, %o4
+ bge .Ldivloop
+ tst %o3
+ bl,a .Lgot_result
+ ! non-restoring fixup here (one instruction only!)
+ sub %o2, 1, %o2
+
+
+.Lgot_result:
+ ! check to see if answer should be < 0
+ tst %g3
+ bl,a 1f
+ sub %g0, %o2, %o2
+1:
+ retl
+ mov %o2, %o0
+
+END(.div)
diff --git a/libc/sysdeps/linux/sparc/setjmp.S b/libc/sysdeps/linux/sparc/setjmp.S
index 70fa68697..30051418c 100644
--- a/libc/sysdeps/linux/sparc/setjmp.S
+++ b/libc/sysdeps/linux/sparc/setjmp.S
@@ -2,24 +2,21 @@
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
+ Lesser General Public License for more details.
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
-
-/* Code taken from glibc2.2.2/sysdeps/sparc/sparc32/setjmp.S */
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
#include <sysdep.h>
-#include "sysdep.h"
#define _ASM 1
#define _SETJMP_H
@@ -49,5 +46,5 @@ ENTRY (__sigsetjmp)
mov %g1, %o7
END(__sigsetjmp)
-//weak_extern(_setjmp)
-//weak_extern(setjmp)
+.weak _setjmp
+.weak setjmp
diff --git a/libc/sysdeps/linux/sparc/udiv.S b/libc/sysdeps/linux/sparc/udiv.S
new file mode 100644
index 000000000..e6f143118
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/udiv.S
@@ -0,0 +1,348 @@
+ /* This file is generated from divrem.m4; DO NOT EDIT! */
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ * .udiv name of function to generate
+ * div div=div => %o0 / %o1; div=rem => %o0 % %o1
+ * false false=true => signed; false=false => unsigned
+ *
+ * Algorithm parameters:
+ * N how many bits per iteration we try to get (4)
+ * WORDSIZE total number of bits (32)
+ *
+ * Derived constants:
+ * TOPBITS number of bits in the top decade of a number
+ *
+ * Important variables:
+ * Q the partial quotient under development (initially 0)
+ * R the remainder so far, initially the dividend
+ * ITER number of main division loop iterations required;
+ * equal to ceil(log2(quotient) / N). Note that this
+ * is the log base (2^N) of the quotient.
+ * V the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ * Current estimate for non-large dividend is
+ * ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ * A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ * different path, as the upper bits of the quotient must be developed
+ * one bit at a time.
+ */
+
+
+
+#include <sysdep.h>
+
+.global .udiv;
+.align 4;
+.type .udiv ,@function;
+
+.udiv:
+ ! Ready to divide. Compute size of quotient; scale comparand.
+ orcc %o1, %g0, %o5
+ bne 1f
+ mov %o0, %o3
+
+ ! Divide by zero trap. If it returns, return 0 (about as
+ ! wrong as possible, but that is what SunOS does...).
+ ta 0x02
+ retl
+ clr %o0
+
+1:
+ cmp %o3, %o5 ! if %o1 exceeds %o0, done
+ blu .Lgot_result ! (and algorithm fails otherwise)
+ clr %o2
+ sethi %hi(1 << (32 - 4 - 1)), %g1
+ cmp %o3, %g1
+ blu .Lnot_really_big
+ clr %o4
+
+ ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
+ ! as our usual N-at-a-shot divide step will cause overflow and havoc.
+ ! The number of bits in the result here is N*ITER+SC, where SC <= N.
+ ! Compute ITER in an unorthodox manner: know we need to shift V into
+ ! the top decade: so do not even bother to compare to R.
+ 1:
+ cmp %o5, %g1
+ bgeu 3f
+ mov 1, %g2
+ sll %o5, 4, %o5
+ b 1b
+ add %o4, 1, %o4
+
+ ! Now compute %g2.
+ 2: addcc %o5, %o5, %o5
+ bcc .Lnot_too_big
+ add %g2, 1, %g2
+
+ ! We get here if the %o1 overflowed while shifting.
+ ! This means that %o3 has the high-order bit set.
+ ! Restore %o5 and subtract from %o3.
+ sll %g1, 4, %g1 ! high order bit
+ srl %o5, 1, %o5 ! rest of %o5
+ add %o5, %g1, %o5
+ b .Ldo_single_div
+ sub %g2, 1, %g2
+
+ .Lnot_too_big:
+ 3: cmp %o5, %o3
+ blu 2b
+ nop
+ be .Ldo_single_div
+ nop
+ /* NB: these are commented out in the V8-Sparc manual as well */
+ /* (I do not understand this) */
+ ! %o5 > %o3: went too far: back up 1 step
+ ! srl %o5, 1, %o5
+ ! dec %g2
+ ! do single-bit divide steps
+ !
+ ! We have to be careful here. We know that %o3 >= %o5, so we can do the
+ ! first divide step without thinking. BUT, the others are conditional,
+ ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
+ ! order bit set in the first step, just falling into the regular
+ ! division loop will mess up the first time around.
+ ! So we unroll slightly...
+ .Ldo_single_div:
+ subcc %g2, 1, %g2
+ bl .Lend_regular_divide
+ nop
+ sub %o3, %o5, %o3
+ mov 1, %o2
+ b .Lend_single_divloop
+ nop
+ .Lsingle_divloop:
+ sll %o2, 1, %o2
+ bl 1f
+ srl %o5, 1, %o5
+ ! %o3 >= 0
+ sub %o3, %o5, %o3
+ b 2f
+ add %o2, 1, %o2
+ 1: ! %o3 < 0
+ add %o3, %o5, %o3
+ sub %o2, 1, %o2
+ 2:
+ .Lend_single_divloop:
+ subcc %g2, 1, %g2
+ bge .Lsingle_divloop
+ tst %o3
+ b,a .Lend_regular_divide
+
+.Lnot_really_big:
+1:
+ sll %o5, 4, %o5
+ cmp %o5, %o3
+ bleu 1b
+ addcc %o4, 1, %o4
+ be .Lgot_result
+ sub %o4, 1, %o4
+
+ tst %o3 ! set up for initial iteration
+.Ldivloop:
+ sll %o2, 4, %o2
+ ! depth 1, accumulated bits 0
+ bl .L1.16
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 2, accumulated bits 1
+ bl .L2.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 3
+ bl .L3.19
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 7
+ bl .L4.23
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2+1), %o2
+
+.L4.23:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2-1), %o2
+
+
+.L3.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 5
+ bl .L4.21
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2+1), %o2
+
+.L4.21:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2-1), %o2
+
+
+
+.L2.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 1
+ bl .L3.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 3
+ bl .L4.19
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2+1), %o2
+
+.L4.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2-1), %o2
+
+
+.L3.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 1
+ bl .L4.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2+1), %o2
+
+.L4.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2-1), %o2
+
+
+
+
+.L1.16:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 2, accumulated bits -1
+ bl .L2.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -1
+ bl .L3.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -1
+ bl .L4.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2+1), %o2
+
+.L4.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2-1), %o2
+
+
+.L3.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -3
+ bl .L4.13
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2+1), %o2
+
+.L4.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2-1), %o2
+
+
+
+.L2.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -3
+ bl .L3.13
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -5
+ bl .L4.11
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2+1), %o2
+
+.L4.11:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2-1), %o2
+
+
+.L3.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -7
+ bl .L4.9
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2+1), %o2
+
+.L4.9:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2-1), %o2
+
+
+
+
+ 9:
+.Lend_regular_divide:
+ subcc %o4, 1, %o4
+ bge .Ldivloop
+ tst %o3
+ bl,a .Lgot_result
+ ! non-restoring fixup here (one instruction only!)
+ sub %o2, 1, %o2
+
+
+.Lgot_result:
+
+ retl
+ mov %o2, %o0
+
+END(.udiv)
diff --git a/libc/sysdeps/linux/sparc/umul.S b/libc/sysdeps/linux/sparc/umul.S
new file mode 100644
index 000000000..d26ae4382
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/umul.S
@@ -0,0 +1,160 @@
+/*
+ * Unsigned multiply. Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the
+ * upper 32 bits of the 64-bit product).
+ *
+ * This code optimizes short (less than 13-bit) multiplies. Short
+ * multiplies require 25 instruction cycles, and long ones require
+ * 45 instruction cycles.
+ *
+ * On return, overflow has occurred (%o1 is not zero) if and only if
+ * the Z condition code is clear, allowing, e.g., the following:
+ *
+ * call .umul
+ * nop
+ * bnz overflow (or tnz)
+ */
+
+#include <sysdep.h>
+
+
+.global .umul;
+.align 4;
+.type .umul ,@function;
+
+.umul:
+ or %o0, %o1, %o4
+ mov %o0, %y ! multiplier -> Y
+ andncc %o4, 0xfff, %g0 ! test bits 12..31 of *both* args
+ be .Lmul_shortway ! if zero, can do it the short way
+ andcc %g0, %g0, %o4 ! zero the partial product; clear N & V
+
+ /*
+ * Long multiply. 32 steps, followed by a final shift step.
+ */
+ mulscc %o4, %o1, %o4 ! 1
+ mulscc %o4, %o1, %o4 ! 2
+ mulscc %o4, %o1, %o4 ! 3
+ mulscc %o4, %o1, %o4 ! 4
+ mulscc %o4, %o1, %o4 ! 5
+ mulscc %o4, %o1, %o4 ! 6
+ mulscc %o4, %o1, %o4 ! 7
+ mulscc %o4, %o1, %o4 ! 8
+ mulscc %o4, %o1, %o4 ! 9
+ mulscc %o4, %o1, %o4 ! 10
+ mulscc %o4, %o1, %o4 ! 11
+ mulscc %o4, %o1, %o4 ! 12
+ mulscc %o4, %o1, %o4 ! 13
+ mulscc %o4, %o1, %o4 ! 14
+ mulscc %o4, %o1, %o4 ! 15
+ mulscc %o4, %o1, %o4 ! 16
+ mulscc %o4, %o1, %o4 ! 17
+ mulscc %o4, %o1, %o4 ! 18
+ mulscc %o4, %o1, %o4 ! 19
+ mulscc %o4, %o1, %o4 ! 20
+ mulscc %o4, %o1, %o4 ! 21
+ mulscc %o4, %o1, %o4 ! 22
+ mulscc %o4, %o1, %o4 ! 23
+ mulscc %o4, %o1, %o4 ! 24
+ mulscc %o4, %o1, %o4 ! 25
+ mulscc %o4, %o1, %o4 ! 26
+ mulscc %o4, %o1, %o4 ! 27
+ mulscc %o4, %o1, %o4 ! 28
+ mulscc %o4, %o1, %o4 ! 29
+ mulscc %o4, %o1, %o4 ! 30
+ mulscc %o4, %o1, %o4 ! 31
+ mulscc %o4, %o1, %o4 ! 32
+ mulscc %o4, %g0, %o4 ! final shift
+
+ /*
+ * Normally, with the shift-and-add approach, if both numbers are
+ * positive you get the correct result. With 32-bit two's-complement
+ * numbers, -x is represented as
+ *
+ * x 32
+ * ( 2 - ------ ) mod 2 * 2
+ * 32
+ * 2
+ *
+ * (the `mod 2' subtracts 1 from 1.bbbb). To avoid lots of 2^32s,
+ * we can treat this as if the radix point were just to the left
+ * of the sign bit (multiply by 2^32), and get
+ *
+ * -x = (2 - x) mod 2
+ *
+ * Then, ignoring the `mod 2's for convenience:
+ *
+ * x * y = xy
+ * -x * y = 2y - xy
+ * x * -y = 2x - xy
+ * -x * -y = 4 - 2x - 2y + xy
+ *
+ * For signed multiplies, we subtract (x << 32) from the partial
+ * product to fix this problem for negative multipliers (see mul.s).
+ * Because of the way the shift into the partial product is calculated
+ * (N xor V), this term is automatically removed for the multiplicand,
+ * so we don't have to adjust.
+ *
+ * But for unsigned multiplies, the high order bit wasn't a sign bit,
+ * and the correction is wrong. So for unsigned multiplies where the
+ * high order bit is one, we end up with xy - (y << 32). To fix it
+ * we add y << 32.
+ */
+#if 0
+ tst %o1
+ bl,a 1f ! if %o1 < 0 (high order bit = 1),
+ add %o4, %o0, %o4 ! %o4 += %o0 (add y to upper half)
+1: rd %y, %o0 ! get lower half of product
+ retl
+ addcc %o4, %g0, %o1 ! put upper half in place and set Z for %o1==0
+#else
+ /* Faster code from tege@sics.se. */
+ sra %o1, 31, %o2 ! make mask from sign bit
+ and %o0, %o2, %o2 ! %o2 = 0 or %o0, depending on sign of %o1
+ rd %y, %o0 ! get lower half of product
+ retl
+ addcc %o4, %o2, %o1 ! add compensation and put upper half in place
+#endif
+
+.Lmul_shortway:
+ /*
+ * Short multiply. 12 steps, followed by a final shift step.
+ * The resulting bits are off by 12 and (32-12) = 20 bit positions,
+ * but there is no problem with %o0 being negative (unlike above),
+ * and overflow is impossible (the answer is at most 24 bits long).
+ */
+ mulscc %o4, %o1, %o4 ! 1
+ mulscc %o4, %o1, %o4 ! 2
+ mulscc %o4, %o1, %o4 ! 3
+ mulscc %o4, %o1, %o4 ! 4
+ mulscc %o4, %o1, %o4 ! 5
+ mulscc %o4, %o1, %o4 ! 6
+ mulscc %o4, %o1, %o4 ! 7
+ mulscc %o4, %o1, %o4 ! 8
+ mulscc %o4, %o1, %o4 ! 9
+ mulscc %o4, %o1, %o4 ! 10
+ mulscc %o4, %o1, %o4 ! 11
+ mulscc %o4, %o1, %o4 ! 12
+ mulscc %o4, %g0, %o4 ! final shift
+
+ /*
+ * %o4 has 20 of the bits that should be in the result; %y has
+ * the bottom 12 (as %y's top 12). That is:
+ *
+ * %o4 %y
+ * +----------------+----------------+
+ * | -12- | -20- | -12- | -20- |
+ * +------(---------+------)---------+
+ * -----result-----
+ *
+ * The 12 bits of %o4 left of the `result' area are all zero;
+ * in fact, all top 20 bits of %o4 are zero.
+ */
+
+ rd %y, %o5
+ sll %o4, 12, %o0 ! shift middle bits left 12
+ srl %o5, 20, %o5 ! shift low bits right 20
+ or %o5, %o0, %o0
+ retl
+ addcc %g0, %g0, %o1 ! %o1 = zero, and set Z
+
+.size .umul , . -.umul
diff --git a/libc/sysdeps/linux/sparc/urem.S b/libc/sysdeps/linux/sparc/urem.S
new file mode 100644
index 000000000..2e236b151
--- /dev/null
+++ b/libc/sysdeps/linux/sparc/urem.S
@@ -0,0 +1,350 @@
+ /* This file is generated from divrem.m4; DO NOT EDIT! */
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ * .urem name of function to generate
+ * rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1
+ * false false=true => signed; false=false => unsigned
+ *
+ * Algorithm parameters:
+ * N how many bits per iteration we try to get (4)
+ * WORDSIZE total number of bits (32)
+ *
+ * Derived constants:
+ * TOPBITS number of bits in the top decade of a number
+ *
+ * Important variables:
+ * Q the partial quotient under development (initially 0)
+ * R the remainder so far, initially the dividend
+ * ITER number of main division loop iterations required;
+ * equal to ceil(log2(quotient) / N). Note that this
+ * is the log base (2^N) of the quotient.
+ * V the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ * Current estimate for non-large dividend is
+ * ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ * A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ * different path, as the upper bits of the quotient must be developed
+ * one bit at a time.
+ */
+
+
+
+#include <sysdep.h>
+
+
+.global .urem;
+.align 4;
+.type .urem ,@function;
+
+.urem:
+
+ ! Ready to divide. Compute size of quotient; scale comparand.
+ orcc %o1, %g0, %o5
+ bne 1f
+ mov %o0, %o3
+
+ ! Divide by zero trap. If it returns, return 0 (about as
+ ! wrong as possible, but that is what SunOS does...).
+ ta 0x02
+ retl
+ clr %o0
+
+1:
+ cmp %o3, %o5 ! if %o1 exceeds %o0, done
+ blu .Lgot_result ! (and algorithm fails otherwise)
+ clr %o2
+ sethi %hi(1 << (32 - 4 - 1)), %g1
+ cmp %o3, %g1
+ blu .Lnot_really_big
+ clr %o4
+
+ ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
+ ! as our usual N-at-a-shot divide step will cause overflow and havoc.
+ ! The number of bits in the result here is N*ITER+SC, where SC <= N.
+ ! Compute ITER in an unorthodox manner: know we need to shift V into
+ ! the top decade: so do not even bother to compare to R.
+ 1:
+ cmp %o5, %g1
+ bgeu 3f
+ mov 1, %g2
+ sll %o5, 4, %o5
+ b 1b
+ add %o4, 1, %o4
+
+ ! Now compute %g2.
+ 2: addcc %o5, %o5, %o5
+ bcc .Lnot_too_big
+ add %g2, 1, %g2
+
+ ! We get here if the %o1 overflowed while shifting.
+ ! This means that %o3 has the high-order bit set.
+ ! Restore %o5 and subtract from %o3.
+ sll %g1, 4, %g1 ! high order bit
+ srl %o5, 1, %o5 ! rest of %o5
+ add %o5, %g1, %o5
+ b .Ldo_single_div
+ sub %g2, 1, %g2
+
+ .Lnot_too_big:
+ 3: cmp %o5, %o3
+ blu 2b
+ nop
+ be .Ldo_single_div
+ nop
+ /* NB: these are commented out in the V8-Sparc manual as well */
+ /* (I do not understand this) */
+ ! %o5 > %o3: went too far: back up 1 step
+ ! srl %o5, 1, %o5
+ ! dec %g2
+ ! do single-bit divide steps
+ !
+ ! We have to be careful here. We know that %o3 >= %o5, so we can do the
+ ! first divide step without thinking. BUT, the others are conditional,
+ ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
+ ! order bit set in the first step, just falling into the regular
+ ! division loop will mess up the first time around.
+ ! So we unroll slightly...
+ .Ldo_single_div:
+ subcc %g2, 1, %g2
+ bl .Lend_regular_divide
+ nop
+ sub %o3, %o5, %o3
+ mov 1, %o2
+ b .Lend_single_divloop
+ nop
+ .Lsingle_divloop:
+ sll %o2, 1, %o2
+ bl 1f
+ srl %o5, 1, %o5
+ ! %o3 >= 0
+ sub %o3, %o5, %o3
+ b 2f
+ add %o2, 1, %o2
+ 1: ! %o3 < 0
+ add %o3, %o5, %o3
+ sub %o2, 1, %o2
+ 2:
+ .Lend_single_divloop:
+ subcc %g2, 1, %g2
+ bge .Lsingle_divloop
+ tst %o3
+ b,a .Lend_regular_divide
+
+.Lnot_really_big:
+1:
+ sll %o5, 4, %o5
+ cmp %o5, %o3
+ bleu 1b
+ addcc %o4, 1, %o4
+ be .Lgot_result
+ sub %o4, 1, %o4
+
+ tst %o3 ! set up for initial iteration
+.Ldivloop:
+ sll %o2, 4, %o2
+ ! depth 1, accumulated bits 0
+ bl .L1.16
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 2, accumulated bits 1
+ bl .L2.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 3
+ bl .L3.19
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 7
+ bl .L4.23
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2+1), %o2
+
+.L4.23:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2-1), %o2
+
+
+.L3.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 5
+ bl .L4.21
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2+1), %o2
+
+.L4.21:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2-1), %o2
+
+
+
+.L2.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 1
+ bl .L3.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 3
+ bl .L4.19
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2+1), %o2
+
+.L4.19:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2-1), %o2
+
+
+.L3.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 1
+ bl .L4.17
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2+1), %o2
+
+.L4.17:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2-1), %o2
+
+
+
+
+.L1.16:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 2, accumulated bits -1
+ bl .L2.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -1
+ bl .L3.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -1
+ bl .L4.15
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2+1), %o2
+
+.L4.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2-1), %o2
+
+
+.L3.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -3
+ bl .L4.13
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2+1), %o2
+
+.L4.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2-1), %o2
+
+
+
+.L2.15:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -3
+ bl .L3.13
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -5
+ bl .L4.11
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2+1), %o2
+
+.L4.11:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2-1), %o2
+
+
+.L3.13:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -7
+ bl .L4.9
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2+1), %o2
+
+.L4.9:
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2-1), %o2
+
+
+
+
+ 9:
+.Lend_regular_divide:
+ subcc %o4, 1, %o4
+ bge .Ldivloop
+ tst %o3
+ bl,a .Lgot_result
+ ! non-restoring fixup here (one instruction only!)
+ add %o3, %o1, %o3
+
+
+.Lgot_result:
+
+ retl
+ mov %o3, %o0
+
+END(.urem)