diff options
-rw-r--r-- | ldso/ldso/Makefile | 3 | ||||
-rw-r--r-- | ldso/ldso/arm/dl-sysdep.h | 29 | ||||
-rw-r--r-- | ldso/ldso/arm/ld_sysdep.h | 29 | ||||
-rw-r--r-- | ldso/ldso/sh/dl-sysdep.h | 62 | ||||
-rw-r--r-- | ldso/ldso/sh/ld_sysdep.h | 62 | ||||
-rw-r--r-- | ldso/ldso/sparc/DEFS.h | 5 | ||||
-rw-r--r-- | ldso/ldso/sparc/dl-sysdep.h | 19 | ||||
-rw-r--r-- | ldso/ldso/sparc/ld_sysdep.h | 19 | ||||
-rw-r--r-- | ldso/ldso/sparc/sdiv.S | 361 | ||||
-rw-r--r-- | ldso/ldso/sparc/udiv.S | 343 | ||||
-rw-r--r-- | ldso/ldso/sparc/umul.S | 153 | ||||
-rw-r--r-- | ldso/ldso/sparc/urem.S | 344 |
12 files changed, 8 insertions, 1421 deletions
diff --git a/ldso/ldso/Makefile b/ldso/ldso/Makefile index ff1d9ea48..e647bc972 100644 --- a/ldso/ldso/Makefile +++ b/ldso/ldso/Makefile @@ -26,6 +26,7 @@ TOPDIR=../../ DOPIC=true include $(TOPDIR)Rules.mak LDSO_FULLNAME=ld-uClibc-$(MAJOR_VERSION).$(MINOR_VERSION).$(SUBLEVEL).so +LIBGCC:=$(shell $(CC) -print-libgcc-file-name) # Enable this to enable all the code needed to support traditional ldd # (i.e. where the shared library loader does all the heavy lifting) @@ -80,7 +81,7 @@ all: lib lib:: ldso.h $(OBJS) $(DLINK_OBJS) $(LD) $(LDFLAGS) -e _dl_boot -soname=$(UCLIBC_LDSO) \ - -o $(LDSO_FULLNAME) $(OBJS); + -o $(LDSO_FULLNAME) $(OBJS) $(LIBGCC); install -d $(TOPDIR)lib install -m 755 $(LDSO_FULLNAME) $(TOPDIR)lib (cd $(TOPDIR)lib && ln -sf $(LDSO_FULLNAME) $(UCLIBC_LDSO)) diff --git a/ldso/ldso/arm/dl-sysdep.h b/ldso/ldso/arm/dl-sysdep.h index ef994caad..74abb305e 100644 --- a/ldso/ldso/arm/dl-sysdep.h +++ b/ldso/ldso/arm/dl-sysdep.h @@ -89,34 +89,7 @@ struct elf_resolve; unsigned long _dl_linux_resolver(struct elf_resolve * tpnt, int reloc_entry); -static inline unsigned long arm_modulus(unsigned long m, unsigned long p) { - unsigned long i,t,inc; - i=p; t=0; - while(!(i&(1<<31))) { - i<<=1; - t++; - } - t--; - for(inc=t;inc>2;inc--) { - i=p<<inc; - if(i&(1<<31)) - break; - while(m>=i) { - m-=i; - i<<=1; - if(i&(1<<31)) - break; - if(i<p) - break; - } - } - while(m>=p) { - m-=p; - } - return m; -} - -#define do_rem(result, n, base) result=arm_modulus(n,base); +#define do_rem(result, n, base) result = (n % base) /* 4096 bytes alignment */ #define PAGE_ALIGN 0xfffff000 diff --git a/ldso/ldso/arm/ld_sysdep.h b/ldso/ldso/arm/ld_sysdep.h index ef994caad..74abb305e 100644 --- a/ldso/ldso/arm/ld_sysdep.h +++ b/ldso/ldso/arm/ld_sysdep.h @@ -89,34 +89,7 @@ struct elf_resolve; unsigned long _dl_linux_resolver(struct elf_resolve * tpnt, int reloc_entry); -static inline unsigned long arm_modulus(unsigned long m, unsigned long p) { - unsigned long i,t,inc; - i=p; t=0; - while(!(i&(1<<31))) { - i<<=1; - t++; - } - t--; - for(inc=t;inc>2;inc--) { - i=p<<inc; - if(i&(1<<31)) - break; - while(m>=i) { - m-=i; - i<<=1; - if(i&(1<<31)) - break; - if(i<p) - break; - } - } - while(m>=p) { - m-=p; - } - return m; -} - -#define do_rem(result, n, base) result=arm_modulus(n,base); +#define do_rem(result, n, base) result = (n % base) /* 4096 bytes alignment */ #define PAGE_ALIGN 0xfffff000 diff --git a/ldso/ldso/sh/dl-sysdep.h b/ldso/ldso/sh/dl-sysdep.h index dc1b895b9..00938b655 100644 --- a/ldso/ldso/sh/dl-sysdep.h +++ b/ldso/ldso/sh/dl-sysdep.h @@ -77,67 +77,7 @@ struct elf_resolve; extern unsigned long _dl_linux_resolver(struct elf_resolve * tpnt, int reloc_entry); -static __inline__ unsigned int -_dl_urem(unsigned int n, unsigned int base) -{ -register unsigned int __r0 __asm__ ("r0"); -register unsigned int __r4 __asm__ ("r4") = n; -register unsigned int __r5 __asm__ ("r5") = base; - - __asm__ (" - mov #0, r0 - div0u - - ! get one bit from the msb of the numerator into the T - ! bit and divide it by whats in %2. Put the answer bit - ! into the T bit so it can come out again at the bottom - - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 - mov r4, r0 -" - : "=r" (__r0) - : "r" (__r4), "r" (__r5) - : "r4", "cc"); - - return n - (base * __r0); -} - -#define do_rem(result, n, base) ((result) = _dl_urem((n), (base))) +#define do_rem(result, n, base) result = (n % base) /* 4096 bytes alignment */ #define PAGE_ALIGN 0xfffff000 diff --git a/ldso/ldso/sh/ld_sysdep.h b/ldso/ldso/sh/ld_sysdep.h index dc1b895b9..00938b655 100644 --- a/ldso/ldso/sh/ld_sysdep.h +++ b/ldso/ldso/sh/ld_sysdep.h @@ -77,67 +77,7 @@ struct elf_resolve; extern unsigned long _dl_linux_resolver(struct elf_resolve * tpnt, int reloc_entry); -static __inline__ unsigned int -_dl_urem(unsigned int n, unsigned int base) -{ -register unsigned int __r0 __asm__ ("r0"); -register unsigned int __r4 __asm__ ("r4") = n; -register unsigned int __r5 __asm__ ("r5") = base; - - __asm__ (" - mov #0, r0 - div0u - - ! get one bit from the msb of the numerator into the T - ! bit and divide it by whats in %2. Put the answer bit - ! into the T bit so it can come out again at the bottom - - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 ; div1 r5, r0 - rotcl r4 - mov r4, r0 -" - : "=r" (__r0) - : "r" (__r4), "r" (__r5) - : "r4", "cc"); - - return n - (base * __r0); -} - -#define do_rem(result, n, base) ((result) = _dl_urem((n), (base))) +#define do_rem(result, n, base) result = (n % base) /* 4096 bytes alignment */ #define PAGE_ALIGN 0xfffff000 diff --git a/ldso/ldso/sparc/DEFS.h b/ldso/ldso/sparc/DEFS.h deleted file mode 100644 index 4b9abccfd..000000000 --- a/ldso/ldso/sparc/DEFS.h +++ /dev/null @@ -1,5 +0,0 @@ -#define FUNC(name) \ - .global name; \ - .type name,@function; \ - .align 4; \ - name: diff --git a/ldso/ldso/sparc/dl-sysdep.h b/ldso/ldso/sparc/dl-sysdep.h index 98660e03b..a5773111b 100644 --- a/ldso/ldso/sparc/dl-sysdep.h +++ b/ldso/ldso/sparc/dl-sysdep.h @@ -97,25 +97,8 @@ extern unsigned int _dl_linux_resolver(unsigned int reloc_entry, */ #define SOLARIS_COMPATIBLE -/* - * Define this because we do not want to call .udiv in the library. - * Change on the plans -miguel: - * We just statically link against .udiv. This is required - * if we want to be able to run on Sun4c machines. - */ +#define do_rem(result, n, base) result = (n % base) -/* We now link .urem against this one */ -#ifdef USE_V8 -#define do_rem(result,n,base) ({ \ -volatile int __res; \ -__asm__("mov %%g0,%%Y\n\t" \ - "sdiv %2,%3,%%l6\n\t" \ - "smul %%l6,%3,%%l6\n\t" \ - "sub %2,%%l6,%0\n\t" \ - :"=r" (result),"=r" (__res):"r" (n),"r"(base) : "l6" ); __res; }) -#else -#define do_rem(a,b,c) a = _dl_urem (b,c); -#endif /* * dbx wants the binder to have a specific name. Mustn't disappoint it. */ diff --git a/ldso/ldso/sparc/ld_sysdep.h b/ldso/ldso/sparc/ld_sysdep.h index 98660e03b..a5773111b 100644 --- a/ldso/ldso/sparc/ld_sysdep.h +++ b/ldso/ldso/sparc/ld_sysdep.h @@ -97,25 +97,8 @@ extern unsigned int _dl_linux_resolver(unsigned int reloc_entry, */ #define SOLARIS_COMPATIBLE -/* - * Define this because we do not want to call .udiv in the library. - * Change on the plans -miguel: - * We just statically link against .udiv. This is required - * if we want to be able to run on Sun4c machines. - */ +#define do_rem(result, n, base) result = (n % base) -/* We now link .urem against this one */ -#ifdef USE_V8 -#define do_rem(result,n,base) ({ \ -volatile int __res; \ -__asm__("mov %%g0,%%Y\n\t" \ - "sdiv %2,%3,%%l6\n\t" \ - "smul %%l6,%3,%%l6\n\t" \ - "sub %2,%%l6,%0\n\t" \ - :"=r" (result),"=r" (__res):"r" (n),"r"(base) : "l6" ); __res; }) -#else -#define do_rem(a,b,c) a = _dl_urem (b,c); -#endif /* * dbx wants the binder to have a specific name. Mustn't disappoint it. */ diff --git a/ldso/ldso/sparc/sdiv.S b/ldso/ldso/sparc/sdiv.S deleted file mode 100644 index 549cfa02c..000000000 --- a/ldso/ldso/sparc/sdiv.S +++ /dev/null @@ -1,361 +0,0 @@ - /* This file is generated from divrem.m4; DO NOT EDIT! */ -/* - * Division and remainder, from Appendix E of the Sparc Version 8 - * Architecture Manual, with fixes from Gordon Irlam. - */ - -/* - * Input: dividend and divisor in %o0 and %o1 respectively. - * - * m4 parameters: - * .div name of function to generate - * div div=div => %o0 / %o1; div=rem => %o0 % %o1 - * true true=true => signed; true=false => unsigned - * - * Algorithm parameters: - * N how many bits per iteration we try to get (4) - * WORDSIZE total number of bits (32) - * - * Derived constants: - * TOPBITS number of bits in the top decade of a number - * - * Important variables: - * Q the partial quotient under development (initially 0) - * R the remainder so far, initially the dividend - * ITER number of main division loop iterations required; - * equal to ceil(log2(quotient) / N). Note that this - * is the log base (2^N) of the quotient. - * V the current comparand, initially divisor*2^(ITER*N-1) - * - * Cost: - * Current estimate for non-large dividend is - * ceil(log2(quotient) / N) * (10 + 7N/2) + C - * A large dividend is one greater than 2^(31-TOPBITS) and takes a - * different path, as the upper bits of the quotient must be developed - * one bit at a time. - */ - - - -#include "DEFS.h" -#include <asm/traps.h> - -FUNC(_dl_div) - ! compute sign of result; if neither is negative, no problem - orcc %o1, %o0, %g0 ! either negative? - bge 2f ! no, go do the divide - xor %o1, %o0, %g6 ! compute sign in any case - tst %o1 - bge 1f - tst %o0 - ! %o1 is definitely negative; %o0 might also be negative - bge 2f ! if %o0 not negative... - sub %g0, %o1, %o1 ! in any case, make %o1 nonneg -1: ! %o0 is negative, %o1 is nonnegative - sub %g0, %o0, %o0 ! make %o0 nonnegative -2: - - ! Ready to divide. Compute size of quotient; scale comparand. - orcc %o1, %g0, %o5 - bne 1f - mov %o0, %o3 - - ! Divide by zero trap. If it returns, return 0 (about as - ! wrong as possible, but that is what SunOS does...). - ta ST_DIV0 - retl - clr %o0 - -1: - cmp %o3, %o5 ! if %o1 exceeds %o0, done - blu Lgot_result ! (and algorithm fails otherwise) - clr %o2 - sethi %hi(1 << (32 - 4 - 1)), %g1 - cmp %o3, %g1 - blu Lnot_really_big - clr %o4 - - ! Here the dividend is >= 2**(31-N) or so. We must be careful here, - ! as our usual N-at-a-shot divide step will cause overflow and havoc. - ! The number of bits in the result here is N*ITER+SC, where SC <= N. - ! Compute ITER in an unorthodox manner: know we need to shift V into - ! the top decade: so do not even bother to compare to R. - 1: - cmp %o5, %g1 - bgeu 3f - mov 1, %g7 - sll %o5, 4, %o5 - b 1b - add %o4, 1, %o4 - - ! Now compute %g7. - 2: addcc %o5, %o5, %o5 - bcc Lnot_too_big - add %g7, 1, %g7 - - ! We get here if the %o1 overflowed while shifting. - ! This means that %o3 has the high-order bit set. - ! Restore %o5 and subtract from %o3. - sll %g1, 4, %g1 ! high order bit - srl %o5, 1, %o5 ! rest of %o5 - add %o5, %g1, %o5 - b Ldo_single_div - sub %g7, 1, %g7 - - Lnot_too_big: - 3: cmp %o5, %o3 - blu 2b - nop - be Ldo_single_div - nop - /* NB: these are commented out in the V8-Sparc manual as well */ - /* (I do not understand this) */ - ! %o5 > %o3: went too far: back up 1 step - ! srl %o5, 1, %o5 - ! dec %g7 - ! do single-bit divide steps - ! - ! We have to be careful here. We know that %o3 >= %o5, so we can do the - ! first divide step without thinking. BUT, the others are conditional, - ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- - ! order bit set in the first step, just falling into the regular - ! division loop will mess up the first time around. - ! So we unroll slightly... - Ldo_single_div: - subcc %g7, 1, %g7 - bl Lend_regular_divide - nop - sub %o3, %o5, %o3 - mov 1, %o2 - b Lend_single_divloop - nop - Lsingle_divloop: - sll %o2, 1, %o2 - bl 1f - srl %o5, 1, %o5 - ! %o3 >= 0 - sub %o3, %o5, %o3 - b 2f - add %o2, 1, %o2 - 1: ! %o3 < 0 - add %o3, %o5, %o3 - sub %o2, 1, %o2 - 2: - Lend_single_divloop: - subcc %g7, 1, %g7 - bge Lsingle_divloop - tst %o3 - b,a Lend_regular_divide - -Lnot_really_big: -1: - sll %o5, 4, %o5 - cmp %o5, %o3 - bleu 1b - addcc %o4, 1, %o4 - be Lgot_result - sub %o4, 1, %o4 - - tst %o3 ! set up for initial iteration -Ldivloop: - sll %o2, 4, %o2 - ! depth 1, accumulated bits 0 - bl L.1.16 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 2, accumulated bits 1 - bl L.2.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits 3 - bl L.3.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 7 - bl L.4.23 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (7*2+1), %o2 - -L.4.23: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (7*2-1), %o2 - - -L.3.19: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 5 - bl L.4.21 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (5*2+1), %o2 - -L.4.21: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (5*2-1), %o2 - - - -L.2.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits 1 - bl L.3.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 3 - bl L.4.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (3*2+1), %o2 - -L.4.19: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (3*2-1), %o2 - - -L.3.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 1 - bl L.4.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (1*2+1), %o2 - -L.4.17: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (1*2-1), %o2 - - - - -L.1.16: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 2, accumulated bits -1 - bl L.2.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits -1 - bl L.3.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -1 - bl L.4.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2+1), %o2 - -L.4.15: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2-1), %o2 - - -L.3.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -3 - bl L.4.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2+1), %o2 - -L.4.13: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2-1), %o2 - - - -L.2.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits -3 - bl L.3.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -5 - bl L.4.11 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2+1), %o2 - -L.4.11: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2-1), %o2 - - -L.3.13: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -7 - bl L.4.9 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2+1), %o2 - -L.4.9: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2-1), %o2 - - - - - 9: -Lend_regular_divide: - subcc %o4, 1, %o4 - bge Ldivloop - tst %o3 - bl,a Lgot_result - ! non-restoring fixup here (one instruction only!) - sub %o2, 1, %o2 - - -Lgot_result: - ! check to see if answer should be < 0 - tst %g6 - bl,a 1f - sub %g0, %o2, %o2 -1: - retl - mov %o2, %o0 diff --git a/ldso/ldso/sparc/udiv.S b/ldso/ldso/sparc/udiv.S deleted file mode 100644 index bfa2fcb8f..000000000 --- a/ldso/ldso/sparc/udiv.S +++ /dev/null @@ -1,343 +0,0 @@ - /* This file is generated from divrem.m4; DO NOT EDIT! */ -/* - * Division and remainder, from Appendix E of the Sparc Version 8 - * Architecture Manual, with fixes from Gordon Irlam. - */ - -/* - * Input: dividend and divisor in %o0 and %o1 respectively. - * - * m4 parameters: - * .udiv name of function to generate - * div div=div => %o0 / %o1; div=rem => %o0 % %o1 - * false false=true => signed; false=false => unsigned - * - * Algorithm parameters: - * N how many bits per iteration we try to get (4) - * WORDSIZE total number of bits (32) - * - * Derived constants: - * TOPBITS number of bits in the top decade of a number - * - * Important variables: - * Q the partial quotient under development (initially 0) - * R the remainder so far, initially the dividend - * ITER number of main division loop iterations required; - * equal to ceil(log2(quotient) / N). Note that this - * is the log base (2^N) of the quotient. - * V the current comparand, initially divisor*2^(ITER*N-1) - * - * Cost: - * Current estimate for non-large dividend is - * ceil(log2(quotient) / N) * (10 + 7N/2) + C - * A large dividend is one greater than 2^(31-TOPBITS) and takes a - * different path, as the upper bits of the quotient must be developed - * one bit at a time. - */ - - -#include "DEFS.h" -#include <asm/traps.h> - -FUNC(_dl_udiv) - - ! Ready to divide. Compute size of quotient; scale comparand. - orcc %o1, %g0, %o5 - bne 1f - mov %o0, %o3 - - ! Divide by zero trap. If it returns, return 0 (about as - ! wrong as possible, but that is what SunOS does...). - ta ST_DIV0 - retl - clr %o0 - -1: - cmp %o3, %o5 ! if %o1 exceeds %o0, done - blu Lgot_result ! (and algorithm fails otherwise) - clr %o2 - sethi %hi(1 << (32 - 4 - 1)), %g1 - cmp %o3, %g1 - blu Lnot_really_big - clr %o4 - - ! Here the dividend is >= 2**(31-N) or so. We must be careful here, - ! as our usual N-at-a-shot divide step will cause overflow and havoc. - ! The number of bits in the result here is N*ITER+SC, where SC <= N. - ! Compute ITER in an unorthodox manner: know we need to shift V into - ! the top decade: so do not even bother to compare to R. - 1: - cmp %o5, %g1 - bgeu 3f - mov 1, %g7 - sll %o5, 4, %o5 - b 1b - add %o4, 1, %o4 - - ! Now compute %g7. - 2: addcc %o5, %o5, %o5 - bcc Lnot_too_big - add %g7, 1, %g7 - - ! We get here if the %o1 overflowed while shifting. - ! This means that %o3 has the high-order bit set. - ! Restore %o5 and subtract from %o3. - sll %g1, 4, %g1 ! high order bit - srl %o5, 1, %o5 ! rest of %o5 - add %o5, %g1, %o5 - b Ldo_single_div - sub %g7, 1, %g7 - - Lnot_too_big: - 3: cmp %o5, %o3 - blu 2b - nop - be Ldo_single_div - nop - /* NB: these are commented out in the V8-Sparc manual as well */ - /* (I do not understand this) */ - ! %o5 > %o3: went too far: back up 1 step - ! srl %o5, 1, %o5 - ! dec %g7 - ! do single-bit divide steps - ! - ! We have to be careful here. We know that %o3 >= %o5, so we can do the - ! first divide step without thinking. BUT, the others are conditional, - ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- - ! order bit set in the first step, just falling into the regular - ! division loop will mess up the first time around. - ! So we unroll slightly... - Ldo_single_div: - subcc %g7, 1, %g7 - bl Lend_regular_divide - nop - sub %o3, %o5, %o3 - mov 1, %o2 - b Lend_single_divloop - nop - Lsingle_divloop: - sll %o2, 1, %o2 - bl 1f - srl %o5, 1, %o5 - ! %o3 >= 0 - sub %o3, %o5, %o3 - b 2f - add %o2, 1, %o2 - 1: ! %o3 < 0 - add %o3, %o5, %o3 - sub %o2, 1, %o2 - 2: - Lend_single_divloop: - subcc %g7, 1, %g7 - bge Lsingle_divloop - tst %o3 - b,a Lend_regular_divide - -Lnot_really_big: -1: - sll %o5, 4, %o5 - cmp %o5, %o3 - bleu 1b - addcc %o4, 1, %o4 - be Lgot_result - sub %o4, 1, %o4 - - tst %o3 ! set up for initial iteration -Ldivloop: - sll %o2, 4, %o2 - ! depth 1, accumulated bits 0 - bl L.1.16 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 2, accumulated bits 1 - bl L.2.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits 3 - bl L.3.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 7 - bl L.4.23 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (7*2+1), %o2 - -L.4.23: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (7*2-1), %o2 - - -L.3.19: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 5 - bl L.4.21 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (5*2+1), %o2 - -L.4.21: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (5*2-1), %o2 - - - -L.2.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits 1 - bl L.3.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 3 - bl L.4.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (3*2+1), %o2 - -L.4.19: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (3*2-1), %o2 - - -L.3.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 1 - bl L.4.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (1*2+1), %o2 - -L.4.17: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (1*2-1), %o2 - - - - -L.1.16: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 2, accumulated bits -1 - bl L.2.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits -1 - bl L.3.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -1 - bl L.4.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2+1), %o2 - -L.4.15: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2-1), %o2 - - -L.3.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -3 - bl L.4.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2+1), %o2 - -L.4.13: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2-1), %o2 - - - -L.2.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits -3 - bl L.3.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -5 - bl L.4.11 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2+1), %o2 - -L.4.11: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2-1), %o2 - - -L.3.13: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -7 - bl L.4.9 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2+1), %o2 - -L.4.9: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2-1), %o2 - - - - - 9: -Lend_regular_divide: - subcc %o4, 1, %o4 - bge Ldivloop - tst %o3 - bl,a Lgot_result - ! non-restoring fixup here (one instruction only!) - sub %o2, 1, %o2 - - -Lgot_result: - - retl - mov %o2, %o0 diff --git a/ldso/ldso/sparc/umul.S b/ldso/ldso/sparc/umul.S deleted file mode 100644 index 7a26c295c..000000000 --- a/ldso/ldso/sparc/umul.S +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Unsigned multiply. Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the - * upper 32 bits of the 64-bit product). - * - * This code optimizes short (less than 13-bit) multiplies. Short - * multiplies require 25 instruction cycles, and long ones require - * 45 instruction cycles. - * - * On return, overflow has occurred (%o1 is not zero) if and only if - * the Z condition code is clear, allowing, e.g., the following: - * - * call .umul - * nop - * bnz overflow (or tnz) - */ - -#include "DEFS.h" -FUNC(.umul) - or %o0, %o1, %o4 - mov %o0, %y ! multiplier -> Y - andncc %o4, 0xfff, %g0 ! test bits 12..31 of *both* args - be Lmul_shortway ! if zero, can do it the short way - andcc %g0, %g0, %o4 ! zero the partial product and clear N and V - - /* - * Long multiply. 32 steps, followed by a final shift step. - */ - mulscc %o4, %o1, %o4 ! 1 - mulscc %o4, %o1, %o4 ! 2 - mulscc %o4, %o1, %o4 ! 3 - mulscc %o4, %o1, %o4 ! 4 - mulscc %o4, %o1, %o4 ! 5 - mulscc %o4, %o1, %o4 ! 6 - mulscc %o4, %o1, %o4 ! 7 - mulscc %o4, %o1, %o4 ! 8 - mulscc %o4, %o1, %o4 ! 9 - mulscc %o4, %o1, %o4 ! 10 - mulscc %o4, %o1, %o4 ! 11 - mulscc %o4, %o1, %o4 ! 12 - mulscc %o4, %o1, %o4 ! 13 - mulscc %o4, %o1, %o4 ! 14 - mulscc %o4, %o1, %o4 ! 15 - mulscc %o4, %o1, %o4 ! 16 - mulscc %o4, %o1, %o4 ! 17 - mulscc %o4, %o1, %o4 ! 18 - mulscc %o4, %o1, %o4 ! 19 - mulscc %o4, %o1, %o4 ! 20 - mulscc %o4, %o1, %o4 ! 21 - mulscc %o4, %o1, %o4 ! 22 - mulscc %o4, %o1, %o4 ! 23 - mulscc %o4, %o1, %o4 ! 24 - mulscc %o4, %o1, %o4 ! 25 - mulscc %o4, %o1, %o4 ! 26 - mulscc %o4, %o1, %o4 ! 27 - mulscc %o4, %o1, %o4 ! 28 - mulscc %o4, %o1, %o4 ! 29 - mulscc %o4, %o1, %o4 ! 30 - mulscc %o4, %o1, %o4 ! 31 - mulscc %o4, %o1, %o4 ! 32 - mulscc %o4, %g0, %o4 ! final shift - - - /* - * Normally, with the shift-and-add approach, if both numbers are - * positive you get the correct result. With 32-bit two's-complement - * numbers, -x is represented as - * - * x 32 - * ( 2 - ------ ) mod 2 * 2 - * 32 - * 2 - * - * (the `mod 2' subtracts 1 from 1.bbbb). To avoid lots of 2^32s, - * we can treat this as if the radix point were just to the left - * of the sign bit (multiply by 2^32), and get - * - * -x = (2 - x) mod 2 - * - * Then, ignoring the `mod 2's for convenience: - * - * x * y = xy - * -x * y = 2y - xy - * x * -y = 2x - xy - * -x * -y = 4 - 2x - 2y + xy - * - * For signed multiplies, we subtract (x << 32) from the partial - * product to fix this problem for negative multipliers (see mul.s). - * Because of the way the shift into the partial product is calculated - * (N xor V), this term is automatically removed for the multiplicand, - * so we don't have to adjust. - * - * But for unsigned multiplies, the high order bit wasn't a sign bit, - * and the correction is wrong. So for unsigned multiplies where the - * high order bit is one, we end up with xy - (y << 32). To fix it - * we add y << 32. - */ -#if 0 - tst %o1 - bl,a 1f ! if %o1 < 0 (high order bit = 1), - add %o4, %o0, %o4 ! %o4 += %o0 (add y to upper half) -1: rd %y, %o0 ! get lower half of product - retl - addcc %o4, %g0, %o1 ! put upper half in place and set Z for %o1==0 -#else - /* Faster code from tege@sics.se. */ - sra %o1, 31, %o2 ! make mask from sign bit - and %o0, %o2, %o2 ! %o2 = 0 or %o0, depending on sign of %o1 - rd %y, %o0 ! get lower half of product - retl - addcc %o4, %o2, %o1 ! add compensation and put upper half in place -#endif - -Lmul_shortway: - /* - * Short multiply. 12 steps, followed by a final shift step. - * The resulting bits are off by 12 and (32-12) = 20 bit positions, - * but there is no problem with %o0 being negative (unlike above), - * and overflow is impossible (the answer is at most 24 bits long). - */ - mulscc %o4, %o1, %o4 ! 1 - mulscc %o4, %o1, %o4 ! 2 - mulscc %o4, %o1, %o4 ! 3 - mulscc %o4, %o1, %o4 ! 4 - mulscc %o4, %o1, %o4 ! 5 - mulscc %o4, %o1, %o4 ! 6 - mulscc %o4, %o1, %o4 ! 7 - mulscc %o4, %o1, %o4 ! 8 - mulscc %o4, %o1, %o4 ! 9 - mulscc %o4, %o1, %o4 ! 10 - mulscc %o4, %o1, %o4 ! 11 - mulscc %o4, %o1, %o4 ! 12 - mulscc %o4, %g0, %o4 ! final shift - - /* - * %o4 has 20 of the bits that should be in the result; %y has - * the bottom 12 (as %y's top 12). That is: - * - * %o4 %y - * +----------------+----------------+ - * | -12- | -20- | -12- | -20- | - * +------(---------+------)---------+ - * -----result----- - * - * The 12 bits of %o4 left of the `result' area are all zero; - * in fact, all top 20 bits of %o4 are zero. - */ - - rd %y, %o5 - sll %o4, 12, %o0 ! shift middle bits left 12 - srl %o5, 20, %o5 ! shift low bits right 20 - or %o5, %o0, %o0 - retl - addcc %g0, %g0, %o1 ! %o1 = zero, and set Z diff --git a/ldso/ldso/sparc/urem.S b/ldso/ldso/sparc/urem.S deleted file mode 100644 index 072e6a8c9..000000000 --- a/ldso/ldso/sparc/urem.S +++ /dev/null @@ -1,344 +0,0 @@ - /* This file is generated from divrem.m4; DO NOT EDIT! */ -/* - * Division and remainder, from Appendix E of the Sparc Version 8 - * Architecture Manual, with fixes from Gordon Irlam. - */ - -/* - * Input: dividend and divisor in %o0 and %o1 respectively. - * - * m4 parameters: - * .urem name of function to generate - * rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1 - * false false=true => signed; false=false => unsigned - * - * Algorithm parameters: - * N how many bits per iteration we try to get (4) - * WORDSIZE total number of bits (32) - * - * Derived constants: - * TOPBITS number of bits in the top decade of a number - * - * Important variables: - * Q the partial quotient under development (initially 0) - * R the remainder so far, initially the dividend - * ITER number of main division loop iterations required; - * equal to ceil(log2(quotient) / N). Note that this - * is the log base (2^N) of the quotient. - * V the current comparand, initially divisor*2^(ITER*N-1) - * - * Cost: - * Current estimate for non-large dividend is - * ceil(log2(quotient) / N) * (10 + 7N/2) + C - * A large dividend is one greater than 2^(31-TOPBITS) and takes a - * different path, as the upper bits of the quotient must be developed - * one bit at a time. - */ - - - -#include "DEFS.h" -#include <asm/traps.h> - -FUNC(_dl_urem) - - ! Ready to divide. Compute size of quotient; scale comparand. - orcc %o1, %g0, %o5 - bne 1f - mov %o0, %o3 - - ! Divide by zero trap. If it returns, return 0 (about as - ! wrong as possible, but that is what SunOS does...). - ta ST_DIV0 - retl - clr %o0 - -1: - cmp %o3, %o5 ! if %o1 exceeds %o0, done - blu Lgot_result ! (and algorithm fails otherwise) - clr %o2 - sethi %hi(1 << (32 - 4 - 1)), %g1 - cmp %o3, %g1 - blu Lnot_really_big - clr %o4 - - ! Here the dividend is >= 2**(31-N) or so. We must be careful here, - ! as our usual N-at-a-shot divide step will cause overflow and havoc. - ! The number of bits in the result here is N*ITER+SC, where SC <= N. - ! Compute ITER in an unorthodox manner: know we need to shift V into - ! the top decade: so do not even bother to compare to R. - 1: - cmp %o5, %g1 - bgeu 3f - mov 1, %g7 - sll %o5, 4, %o5 - b 1b - add %o4, 1, %o4 - - ! Now compute %g7. - 2: addcc %o5, %o5, %o5 - bcc Lnot_too_big - add %g7, 1, %g7 - - ! We get here if the %o1 overflowed while shifting. - ! This means that %o3 has the high-order bit set. - ! Restore %o5 and subtract from %o3. - sll %g1, 4, %g1 ! high order bit - srl %o5, 1, %o5 ! rest of %o5 - add %o5, %g1, %o5 - b Ldo_single_div - sub %g7, 1, %g7 - - Lnot_too_big: - 3: cmp %o5, %o3 - blu 2b - nop - be Ldo_single_div - nop - /* NB: these are commented out in the V8-Sparc manual as well */ - /* (I do not understand this) */ - ! %o5 > %o3: went too far: back up 1 step - ! srl %o5, 1, %o5 - ! dec %g7 - ! do single-bit divide steps - ! - ! We have to be careful here. We know that %o3 >= %o5, so we can do the - ! first divide step without thinking. BUT, the others are conditional, - ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- - ! order bit set in the first step, just falling into the regular - ! division loop will mess up the first time around. - ! So we unroll slightly... - Ldo_single_div: - subcc %g7, 1, %g7 - bl Lend_regular_divide - nop - sub %o3, %o5, %o3 - mov 1, %o2 - b Lend_single_divloop - nop - Lsingle_divloop: - sll %o2, 1, %o2 - bl 1f - srl %o5, 1, %o5 - ! %o3 >= 0 - sub %o3, %o5, %o3 - b 2f - add %o2, 1, %o2 - 1: ! %o3 < 0 - add %o3, %o5, %o3 - sub %o2, 1, %o2 - 2: - Lend_single_divloop: - subcc %g7, 1, %g7 - bge Lsingle_divloop - tst %o3 - b,a Lend_regular_divide - -Lnot_really_big: -1: - sll %o5, 4, %o5 - cmp %o5, %o3 - bleu 1b - addcc %o4, 1, %o4 - be Lgot_result - sub %o4, 1, %o4 - - tst %o3 ! set up for initial iteration -Ldivloop: - sll %o2, 4, %o2 - ! depth 1, accumulated bits 0 - bl L.1.16 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 2, accumulated bits 1 - bl L.2.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits 3 - bl L.3.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 7 - bl L.4.23 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (7*2+1), %o2 - -L.4.23: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (7*2-1), %o2 - - -L.3.19: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 5 - bl L.4.21 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (5*2+1), %o2 - -L.4.21: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (5*2-1), %o2 - - - -L.2.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits 1 - bl L.3.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 3 - bl L.4.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (3*2+1), %o2 - -L.4.19: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (3*2-1), %o2 - - -L.3.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 1 - bl L.4.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (1*2+1), %o2 - -L.4.17: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (1*2-1), %o2 - - - - -L.1.16: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 2, accumulated bits -1 - bl L.2.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits -1 - bl L.3.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -1 - bl L.4.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2+1), %o2 - -L.4.15: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2-1), %o2 - - -L.3.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -3 - bl L.4.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2+1), %o2 - -L.4.13: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2-1), %o2 - - - -L.2.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits -3 - bl L.3.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -5 - bl L.4.11 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2+1), %o2 - -L.4.11: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2-1), %o2 - - -L.3.13: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -7 - bl L.4.9 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2+1), %o2 - -L.4.9: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2-1), %o2 - - - - - 9: -Lend_regular_divide: - subcc %o4, 1, %o4 - bge Ldivloop - tst %o3 - bl,a Lgot_result - ! non-restoring fixup here (one instruction only!) - add %o3, %o1, %o3 - - -Lgot_result: - - retl - mov %o3, %o0 |