summaryrefslogtreecommitdiffstats
path: root/mdk-stage1/dietlibc/sparc
diff options
context:
space:
mode:
authorGuillaume Cottenceau <gc@mandriva.com>2001-01-04 20:04:45 +0000
committerGuillaume Cottenceau <gc@mandriva.com>2001-01-04 20:04:45 +0000
commit02fec4701cee79f875c1d02b8b4aee09380dbcb8 (patch)
treef4f291aedbb2e60ee58351481858a8cd3ec80b6b /mdk-stage1/dietlibc/sparc
parent9887fe04751edf39e8389f2c3ec3f020b5e1c17d (diff)
downloaddrakx-02fec4701cee79f875c1d02b8b4aee09380dbcb8.tar
drakx-02fec4701cee79f875c1d02b8b4aee09380dbcb8.tar.gz
drakx-02fec4701cee79f875c1d02b8b4aee09380dbcb8.tar.bz2
drakx-02fec4701cee79f875c1d02b8b4aee09380dbcb8.tar.xz
drakx-02fec4701cee79f875c1d02b8b4aee09380dbcb8.zip
integrate dietlibc/stdio per default for cdrom and disk only installs
Diffstat (limited to 'mdk-stage1/dietlibc/sparc')
-rw-r--r--mdk-stage1/dietlibc/sparc/Makefile.add3
-rw-r--r--mdk-stage1/dietlibc/sparc/__longjmp.S66
-rw-r--r--mdk-stage1/dietlibc/sparc/fork.S19
-rw-r--r--mdk-stage1/dietlibc/sparc/mmap.c43
-rw-r--r--mdk-stage1/dietlibc/sparc/pipe.S20
-rw-r--r--mdk-stage1/dietlibc/sparc/setjmp.S35
-rw-r--r--mdk-stage1/dietlibc/sparc/start.S46
-rw-r--r--mdk-stage1/dietlibc/sparc/udiv.S363
-rw-r--r--mdk-stage1/dietlibc/sparc/umul.S170
-rw-r--r--mdk-stage1/dietlibc/sparc/unified.S28
-rw-r--r--mdk-stage1/dietlibc/sparc/urem.S362
11 files changed, 1155 insertions, 0 deletions
diff --git a/mdk-stage1/dietlibc/sparc/Makefile.add b/mdk-stage1/dietlibc/sparc/Makefile.add
new file mode 100644
index 000000000..a12a0446a
--- /dev/null
+++ b/mdk-stage1/dietlibc/sparc/Makefile.add
@@ -0,0 +1,3 @@
+
+CFLAGS+=-mcpu=supersparc -Os
+override VPATH=sparc:syscalls.s:lib
diff --git a/mdk-stage1/dietlibc/sparc/__longjmp.S b/mdk-stage1/dietlibc/sparc/__longjmp.S
new file mode 100644
index 000000000..81dd24af2
--- /dev/null
+++ b/mdk-stage1/dietlibc/sparc/__longjmp.S
@@ -0,0 +1,66 @@
+#define _ASM
+#define _SETJMP_H
+#include <bits/setjmp.h>
+
+
+#define ENV(base,reg) [%base + (reg * 4)]
+#define ST_FLUSH_WINDOWS 3
+#define RW_FP [%fp + 0x48]
+
+.text
+.global __longjmp
+__longjmp:
+ /* Store our arguments in global registers so we can still
+ use them while unwinding frames and their register windows. */
+
+ ld ENV(o0,JB_FP), %g3 /* Cache target FP in register %g3. */
+ mov %o0, %g1 /* ENV in %g1 */
+ orcc %o1, %g0, %g2 /* VAL in %g2 */
+ be,a 0f /* Branch if zero; else skip delay slot. */
+ mov 1, %g2 /* Delay slot only hit if zero: VAL = 1. */
+0:
+ xor %fp, %g3, %o0
+ add %fp, 512, %o1
+ andncc %o0, 4095, %o0
+ bne .Lthread
+ cmp %o1, %g3
+ bl .Lthread
+
+ /* Now we will loop, unwinding the register windows up the stack
+ until the restored %fp value matches the target value in %g3. */
+
+.Lloop:
+ cmp %fp, %g3 /* Have we reached the target frame? */
+ bl,a .Lloop /* Loop while current fp is below target. */
+ restore /* Unwind register window in delay slot. */
+ be,a .Lfound /* Better have hit it exactly. */
+ ld ENV(g1,JB_SP), %o0 /* Delay slot: extract target SP. */
+
+.Lthread:
+ /*
+ * Do a "flush register windows trap". The trap handler in the
+ * kernel writes all the register windows to their stack slots, and
+ * marks them all as invalid (needing to be sucked up from the
+ * stack when used). This ensures that all information needed to
+ * unwind to these callers is in memory, not in the register
+ * windows.
+ */
+ ta ST_FLUSH_WINDOWS
+ ld ENV(g1,JB_PC), %o7 /* Set return PC. */
+ ld ENV(g1,JB_SP), %fp /* Set saved SP on restore below. */
+ sub %fp, 64, %sp /* Allocate a register frame. */
+ st %g3, RW_FP /* Set saved FP on restore below. */
+ retl
+ restore %g2, 0, %o0 /* Restore values from above register frame. */
+
+.Lfound:
+ /* We have unwound register windows so %fp matches the target. */
+ mov %o0, %sp /* OK, install new SP. */
+
+.Lsp_ok:
+ ld ENV(g1,JB_PC), %o0 /* Extract target return PC. */
+ jmp %o0 + 8 /* Return there. */
+ mov %g2, %o0 /* Delay slot: set return value. */
+
+.size __longjmp, . - __longjmp
+
diff --git a/mdk-stage1/dietlibc/sparc/fork.S b/mdk-stage1/dietlibc/sparc/fork.S
new file mode 100644
index 000000000..150839971
--- /dev/null
+++ b/mdk-stage1/dietlibc/sparc/fork.S
@@ -0,0 +1,19 @@
+#include "syscalls.h"
+
+.text
+.global fork
+fork:
+ mov 2, %g1
+ ta 0x10
+ bcc,a 1f
+ nop
+
+ sethi %hi(errno), %o3
+ or %o3, %lo(errno), %o3
+ st %i0, [%o3]
+
+ retl
+ mov -1, %o0
+1: dec %o1
+ retl
+ and %o0, %o1, %o0
diff --git a/mdk-stage1/dietlibc/sparc/mmap.c b/mdk-stage1/dietlibc/sparc/mmap.c
new file mode 100644
index 000000000..25ebdc24e
--- /dev/null
+++ b/mdk-stage1/dietlibc/sparc/mmap.c
@@ -0,0 +1,43 @@
+#include <linux/types.h>
+#include <linux/unistd.h>
+
+#define __SYSCALL_STRING \
+ "ta 0x10;" \
+ "bcs 2f;" \
+ " nop;" \
+ "1:" \
+ ".subsection 2;" \
+ "2:" \
+ "save %%sp, -192, %%sp;" \
+ "call __errno_location;" \
+ " nop;" \
+ "st %%i0,[%%o0];" \
+ "ba 1b;" \
+ " restore %%g0, -1, %%o0;" \
+ ".previous;"
+
+#define __SYSCALL_CLOBBERS "g2", "g3", "g4", "g5", "g7", \
+ "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \
+ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", \
+ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", \
+ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", \
+ "cc", "memory"
+
+#define inline_syscall6(name,arg1,arg2,arg3,arg4,arg5,arg6) \
+({ \
+ register long __o0 __asm__ ("o0") = (long)(arg1); \
+ register long __o1 __asm__ ("o1") = (long)(arg2); \
+ register long __o2 __asm__ ("o2") = (long)(arg3); \
+ register long __o3 __asm__ ("o3") = (long)(arg4); \
+ register long __o4 __asm__ ("o4") = (long)(arg5); \
+ register long __o5 __asm__ ("o5") = (long)(arg6); \
+ register long __g1 __asm__ ("g1") = __NR_##name; \
+ __asm__ (__SYSCALL_STRING : "=r" (__g1), "=r" (__o0) : \
+ "0" (__g1), "1" (__o0), "r" (__o1), "r" (__o2), \
+ "r" (__o3), "r" (__o4), "r" (__o5) : \
+ __SYSCALL_CLOBBERS); \
+ __o0; \
+})
+int mmap(void*start,size_t length,int prot,int flags,int fd,off_t offset) {
+ return inline_syscall6(mmap,start,length,prot,flags,fd,offset);
+}
diff --git a/mdk-stage1/dietlibc/sparc/pipe.S b/mdk-stage1/dietlibc/sparc/pipe.S
new file mode 100644
index 000000000..b8ad1d251
--- /dev/null
+++ b/mdk-stage1/dietlibc/sparc/pipe.S
@@ -0,0 +1,20 @@
+#include "syscalls.h"
+
+.text
+.global pipe
+pipe:
+ mov __NR_pipe, %g1
+ ta 0x10
+ bcc,a .Lnoerror
+ nop
+ save %sp, -96, %sp
+ call __errno_location
+ nop
+ st %i0, [ %o0 ]
+ ret
+ restore %g0, -1, %o0
+.Lnoerror:
+ st %o0, [ %o2 ]
+ st %o1, [ %o2 + 4 ]
+ retl
+ mov %g0, %o0
diff --git a/mdk-stage1/dietlibc/sparc/setjmp.S b/mdk-stage1/dietlibc/sparc/setjmp.S
new file mode 100644
index 000000000..be77af3c4
--- /dev/null
+++ b/mdk-stage1/dietlibc/sparc/setjmp.S
@@ -0,0 +1,35 @@
+#define _ASM
+#define _SETJMP_H
+#include <bits/setjmp.h>
+
+#define ST_FLUSH_WINDOWS 0x03
+
+.section .rodata
+.text
+.globl __setjmp
+__setjmp:
+ b 1f
+ set 0, %o1
+.size __setjmp,.-__setjmp
+
+.globl setjmp
+setjmp:
+ set 1, %o1
+.size setjmp,.-setjmp
+
+.globl __sigsetjmp
+__sigsetjmp:
+1:
+ /* Save our PC, SP and FP. Save the signal mask if requested with
+ a tail-call for simplicity; it always returns zero. */
+ ta ST_FLUSH_WINDOWS
+
+ st %o7, [%o0 + (JB_PC * 4)]
+ st %sp, [%o0 + (JB_SP * 4)]
+ st %fp, [%o0 + (JB_FP * 4)]
+
+ mov %o7, %g1
+ call __sigjmp_save
+ mov %g1, %o7
+.size __sigsetjmp,.-__sigsetjmp
+
diff --git a/mdk-stage1/dietlibc/sparc/start.S b/mdk-stage1/dietlibc/sparc/start.S
new file mode 100644
index 000000000..e948aaddc
--- /dev/null
+++ b/mdk-stage1/dietlibc/sparc/start.S
@@ -0,0 +1,46 @@
+#include "start.h"
+
+#ifdef __sparc__
+ .section ".text"
+ .align 4
+ .global _start
+ .type _start,@function
+_start:
+
+ /* Terminate the stack frame, and reserve space for functions to
+ drop their arguments. */
+ mov %g0, %fp
+ sub %sp, 6*4, %sp
+
+ /* Extract the arguments and environment as encoded on the stack. The
+ argument info starts after one register window (16 words) past the SP. */
+ ld [%sp+22*4], %o0
+ add %sp, 23*4, %o1
+ add %o1, %o0, %o2
+ add %o2, %o0, %o2
+ add %o2, %o0, %o2
+ add %o2, %o0, %o2
+ add %o2, 4, %o2
+
+ sethi %hi(environ), %o3
+ or %o3, %lo(environ), %o3
+ st %o2, [%o3]
+
+ /* When starting a binary via the dynamic linker, %g1 contains the
+ address of the shared library termination function, which will be
+ registered with atexit(). If we are statically linked, this will
+ be NULL. */
+
+ /* Let libc do the rest of the initialization, and call main. */
+ call main
+ mov %g1, %o5
+
+ b exit
+ mov %o0, %i0
+
+ /* Die very horribly if exit returns. */
+ unimp
+
+ .size _start, .-_start
+#endif
+
diff --git a/mdk-stage1/dietlibc/sparc/udiv.S b/mdk-stage1/dietlibc/sparc/udiv.S
new file mode 100644
index 000000000..87479e7bd
--- /dev/null
+++ b/mdk-stage1/dietlibc/sparc/udiv.S
@@ -0,0 +1,363 @@
+#ifdef __sparc__
+ /* This file is generated from divrem.m4; DO NOT EDIT! */
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ * .udiv name of function to generate
+ * div div=div => %o0 / %o1; div=rem => %o0 % %o1
+ * false false=true => signed; false=false => unsigned
+ *
+ * Algorithm parameters:
+ * N how many bits per iteration we try to get (4)
+ * WORDSIZE total number of bits (32)
+ *
+ * Derived constants:
+ * TOPBITS number of bits in the top decade of a number
+ *
+ * Important variables:
+ * Q the partial quotient under development (initially 0)
+ * R the remainder so far, initially the dividend
+ * ITER number of main division loop iterations required;
+ * equal to ceil(log2(quotient) / N). Note that this
+ * is the log base (2^N) of the quotient.
+ * V the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ * Current estimate for non-large dividend is
+ * ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ * A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ * different path, as the upper bits of the quotient must be developed
+ * one bit at a time.
+ */
+
+
+
+#define C_LABEL(name) name:
+
+#define C_SYMBOL_NAME(name) name
+
+#define ENTRY(name) \
+ .global C_SYMBOL_NAME(name); \
+ .align 4;\
+ C_LABEL(name);\
+ .type name,@function;
+
+#define LOC(name) . ## L ## name
+
+#define END(name) \
+ .size name, . - name
+
+#define ST_DIV0 0x02
+
+ENTRY(.udiv)
+
+ ! Ready to divide. Compute size of quotient; scale comparand.
+ orcc %o1, %g0, %o5
+ bne 1f
+ mov %o0, %o3
+
+ ! Divide by zero trap. If it returns, return 0 (about as
+ ! wrong as possible, but that is what SunOS does...).
+ ta ST_DIV0
+ retl
+ clr %o0
+
+1:
+ cmp %o3, %o5 ! if %o1 exceeds %o0, done
+ blu LOC(got_result) ! (and algorithm fails otherwise)
+ clr %o2
+ sethi %hi(1 << (32 - 4 - 1)), %g1
+ cmp %o3, %g1
+ blu LOC(not_really_big)
+ clr %o4
+
+ ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
+ ! as our usual N-at-a-shot divide step will cause overflow and havoc.
+ ! The number of bits in the result here is N*ITER+SC, where SC <= N.
+ ! Compute ITER in an unorthodox manner: know we need to shift V into
+ ! the top decade: so do not even bother to compare to R.
+ 1:
+ cmp %o5, %g1
+ bgeu 3f
+ mov 1, %g2
+ sll %o5, 4, %o5
+ b 1b
+ add %o4, 1, %o4
+
+ ! Now compute %g2.
+ 2: addcc %o5, %o5, %o5
+ bcc LOC(not_too_big)
+ add %g2, 1, %g2
+
+ ! We get here if the %o1 overflowed while shifting.
+ ! This means that %o3 has the high-order bit set.
+ ! Restore %o5 and subtract from %o3.
+ sll %g1, 4, %g1 ! high order bit
+ srl %o5, 1, %o5 ! rest of %o5
+ add %o5, %g1, %o5
+ b LOC(do_single_div)
+ sub %g2, 1, %g2
+
+ LOC(not_too_big):
+ 3: cmp %o5, %o3
+ blu 2b
+ nop
+ be LOC(do_single_div)
+ nop
+ /* NB: these are commented out in the V8-Sparc manual as well */
+ /* (I do not understand this) */
+ ! %o5 > %o3: went too far: back up 1 step
+ ! srl %o5, 1, %o5
+ ! dec %g2
+ ! do single-bit divide steps
+ !
+ ! We have to be careful here. We know that %o3 >= %o5, so we can do the
+ ! first divide step without thinking. BUT, the others are conditional,
+ ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
+ ! order bit set in the first step, just falling into the regular
+ ! division loop will mess up the first time around.
+ ! So we unroll slightly...
+ LOC(do_single_div):
+ subcc %g2, 1, %g2
+ bl LOC(end_regular_divide)
+ nop
+ sub %o3, %o5, %o3
+ mov 1, %o2
+ b LOC(end_single_divloop)
+ nop
+ LOC(single_divloop):
+ sll %o2, 1, %o2
+ bl 1f
+ srl %o5, 1, %o5
+ ! %o3 >= 0
+ sub %o3, %o5, %o3
+ b 2f
+ add %o2, 1, %o2
+ 1: ! %o3 < 0
+ add %o3, %o5, %o3
+ sub %o2, 1, %o2
+ 2:
+ LOC(end_single_divloop):
+ subcc %g2, 1, %g2
+ bge LOC(single_divloop)
+ tst %o3
+ b,a LOC(end_regular_divide)
+
+LOC(not_really_big):
+1:
+ sll %o5, 4, %o5
+ cmp %o5, %o3
+ bleu 1b
+ addcc %o4, 1, %o4
+ be LOC(got_result)
+ sub %o4, 1, %o4
+
+ tst %o3 ! set up for initial iteration
+LOC(divloop):
+ sll %o2, 4, %o2
+ ! depth 1, accumulated bits 0
+ bl LOC(1.16)
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 2, accumulated bits 1
+ bl LOC(2.17)
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 3
+ bl LOC(3.19)
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 7
+ bl LOC(4.23)
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2+1), %o2
+
+LOC(4.23):
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2-1), %o2
+
+
+LOC(3.19):
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 5
+ bl LOC(4.21)
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2+1), %o2
+
+LOC(4.21):
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2-1), %o2
+
+
+
+LOC(2.17):
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 1
+ bl LOC(3.17)
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 3
+ bl LOC(4.19)
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2+1), %o2
+
+LOC(4.19):
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2-1), %o2
+
+
+LOC(3.17):
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 1
+ bl LOC(4.17)
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2+1), %o2
+
+LOC(4.17):
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2-1), %o2
+
+
+
+
+LOC(1.16):
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 2, accumulated bits -1
+ bl LOC(2.15)
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -1
+ bl LOC(3.15)
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -1
+ bl LOC(4.15)
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2+1), %o2
+
+LOC(4.15):
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2-1), %o2
+
+
+LOC(3.15):
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -3
+ bl LOC(4.13)
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2+1), %o2
+
+LOC(4.13):
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2-1), %o2
+
+
+
+LOC(2.15):
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -3
+ bl LOC(3.13)
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -5
+ bl LOC(4.11)
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2+1), %o2
+
+LOC(4.11):
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2-1), %o2
+
+
+LOC(3.13):
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -7
+ bl LOC(4.9)
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2+1), %o2
+
+LOC(4.9):
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2-1), %o2
+
+
+
+
+ 9:
+LOC(end_regular_divide):
+ subcc %o4, 1, %o4
+ bge LOC(divloop)
+ tst %o3
+ bl,a LOC(got_result)
+ ! non-restoring fixup here (one instruction only!)
+ sub %o2, 1, %o2
+
+
+LOC(got_result):
+
+ retl
+ mov %o2, %o0
+
+END(.udiv)
+
+#endif
diff --git a/mdk-stage1/dietlibc/sparc/umul.S b/mdk-stage1/dietlibc/sparc/umul.S
new file mode 100644
index 000000000..15038ab2a
--- /dev/null
+++ b/mdk-stage1/dietlibc/sparc/umul.S
@@ -0,0 +1,170 @@
+#ifdef __sparc__
+/*
+ * Unsigned multiply. Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the
+ * upper 32 bits of the 64-bit product).
+ *
+ * This code optimizes short (less than 13-bit) multiplies. Short
+ * multiplies require 25 instruction cycles, and long ones require
+ * 45 instruction cycles.
+ *
+ * On return, overflow has occurred (%o1 is not zero) if and only if
+ * the Z condition code is clear, allowing, e.g., the following:
+ *
+ * call .umul
+ * nop
+ * bnz overflow (or tnz)
+ */
+
+#define C_LABEL(name) name:
+
+#define C_SYMBOL_NAME(name) name
+
+#define ENTRY(name) \
+ .global C_SYMBOL_NAME(name); \
+ .align 4;\
+ C_LABEL(name);\
+ .type name,@function;
+
+#define LOC(name) . ## L ## name
+
+#define END(name) \
+ .size name, . - name
+
+ENTRY(.umul)
+ or %o0, %o1, %o4
+ mov %o0, %y ! multiplier -> Y
+ andncc %o4, 0xfff, %g0 ! test bits 12..31 of *both* args
+ be LOC(mul_shortway) ! if zero, can do it the short way
+ andcc %g0, %g0, %o4 ! zero the partial product; clear N & V
+
+ /*
+ * Long multiply. 32 steps, followed by a final shift step.
+ */
+ mulscc %o4, %o1, %o4 ! 1
+ mulscc %o4, %o1, %o4 ! 2
+ mulscc %o4, %o1, %o4 ! 3
+ mulscc %o4, %o1, %o4 ! 4
+ mulscc %o4, %o1, %o4 ! 5
+ mulscc %o4, %o1, %o4 ! 6
+ mulscc %o4, %o1, %o4 ! 7
+ mulscc %o4, %o1, %o4 ! 8
+ mulscc %o4, %o1, %o4 ! 9
+ mulscc %o4, %o1, %o4 ! 10
+ mulscc %o4, %o1, %o4 ! 11
+ mulscc %o4, %o1, %o4 ! 12
+ mulscc %o4, %o1, %o4 ! 13
+ mulscc %o4, %o1, %o4 ! 14
+ mulscc %o4, %o1, %o4 ! 15
+ mulscc %o4, %o1, %o4 ! 16
+ mulscc %o4, %o1, %o4 ! 17
+ mulscc %o4, %o1, %o4 ! 18
+ mulscc %o4, %o1, %o4 ! 19
+ mulscc %o4, %o1, %o4 ! 20
+ mulscc %o4, %o1, %o4 ! 21
+ mulscc %o4, %o1, %o4 ! 22
+ mulscc %o4, %o1, %o4 ! 23
+ mulscc %o4, %o1, %o4 ! 24
+ mulscc %o4, %o1, %o4 ! 25
+ mulscc %o4, %o1, %o4 ! 26
+ mulscc %o4, %o1, %o4 ! 27
+ mulscc %o4, %o1, %o4 ! 28
+ mulscc %o4, %o1, %o4 ! 29
+ mulscc %o4, %o1, %o4 ! 30
+ mulscc %o4, %o1, %o4 ! 31
+ mulscc %o4, %o1, %o4 ! 32
+ mulscc %o4, %g0, %o4 ! final shift
+
+ /*
+ * Normally, with the shift-and-add approach, if both numbers are
+ * positive you get the correct result. With 32-bit two's-complement
+ * numbers, -x is represented as
+ *
+ * x 32
+ * ( 2 - ------ ) mod 2 * 2
+ * 32
+ * 2
+ *
+ * (the `mod 2' subtracts 1 from 1.bbbb). To avoid lots of 2^32s,
+ * we can treat this as if the radix point were just to the left
+ * of the sign bit (multiply by 2^32), and get
+ *
+ * -x = (2 - x) mod 2
+ *
+ * Then, ignoring the `mod 2's for convenience:
+ *
+ * x * y = xy
+ * -x * y = 2y - xy
+ * x * -y = 2x - xy
+ * -x * -y = 4 - 2x - 2y + xy
+ *
+ * For signed multiplies, we subtract (x << 32) from the partial
+ * product to fix this problem for negative multipliers (see mul.s).
+ * Because of the way the shift into the partial product is calculated
+ * (N xor V), this term is automatically removed for the multiplicand,
+ * so we don't have to adjust.
+ *
+ * But for unsigned multiplies, the high order bit wasn't a sign bit,
+ * and the correction is wrong. So for unsigned multiplies where the
+ * high order bit is one, we end up with xy - (y << 32). To fix it
+ * we add y << 32.
+ */
+#if 0
+ tst %o1
+ bl,a 1f ! if %o1 < 0 (high order bit = 1),
+ add %o4, %o0, %o4 ! %o4 += %o0 (add y to upper half)
+1: rd %y, %o0 ! get lower half of product
+ retl
+ addcc %o4, %g0, %o1 ! put upper half in place and set Z for %o1==0
+#else
+ /* Faster code from tege@sics.se. */
+ sra %o1, 31, %o2 ! make mask from sign bit
+ and %o0, %o2, %o2 ! %o2 = 0 or %o0, depending on sign of %o1
+ rd %y, %o0 ! get lower half of product
+ retl
+ addcc %o4, %o2, %o1 ! add compensation and put upper half in place
+#endif
+
+LOC(mul_shortway):
+ /*
+ * Short multiply. 12 steps, followed by a final shift step.
+ * The resulting bits are off by 12 and (32-12) = 20 bit positions,
+ * but there is no problem with %o0 being negative (unlike above),
+ * and overflow is impossible (the answer is at most 24 bits long).
+ */
+ mulscc %o4, %o1, %o4 ! 1
+ mulscc %o4, %o1, %o4 ! 2
+ mulscc %o4, %o1, %o4 ! 3
+ mulscc %o4, %o1, %o4 ! 4
+ mulscc %o4, %o1, %o4 ! 5
+ mulscc %o4, %o1, %o4 ! 6
+ mulscc %o4, %o1, %o4 ! 7
+ mulscc %o4, %o1, %o4 ! 8
+ mulscc %o4, %o1, %o4 ! 9
+ mulscc %o4, %o1, %o4 ! 10
+ mulscc %o4, %o1, %o4 ! 11
+ mulscc %o4, %o1, %o4 ! 12
+ mulscc %o4, %g0, %o4 ! final shift
+
+ /*
+ * %o4 has 20 of the bits that should be in the result; %y has
+ * the bottom 12 (as %y's top 12). That is:
+ *
+ * %o4 %y
+ * +----------------+----------------+
+ * | -12- | -20- | -12- | -20- |
+ * +------(---------+------)---------+
+ * -----result-----
+ *
+ * The 12 bits of %o4 left of the `result' area are all zero;
+ * in fact, all top 20 bits of %o4 are zero.
+ */
+
+ rd %y, %o5
+ sll %o4, 12, %o0 ! shift middle bits left 12
+ srl %o5, 20, %o5 ! shift low bits right 20
+ or %o5, %o0, %o0
+ retl
+ addcc %g0, %g0, %o1 ! %o1 = zero, and set Z
+
+END(.umul)
+#endif
diff --git a/mdk-stage1/dietlibc/sparc/unified.S b/mdk-stage1/dietlibc/sparc/unified.S
new file mode 100644
index 000000000..53b1612a9
--- /dev/null
+++ b/mdk-stage1/dietlibc/sparc/unified.S
@@ -0,0 +1,28 @@
+#include <dietfeatures.h>
+
+.text
+.global __unified_syscall
+__unified_syscall:
+ ta 0x10
+
+ bcc 1f
+ save %sp, -104, %sp
+
+ neg %i0, %i0
+1:
+ add %i0, 0xff, %l2
+ cmp %l2, 0xfe
+ bgu 2f
+ neg %i0, %l3
+#ifdef WANT_THREAD_SAVE
+ call __errno_location
+ nop
+#else
+ sethi %hi(errno), %o0
+ or %o0, %lo(errno), %o0
+#endif
+ st %l3, [ %o0 ]
+ mov -1, %o0
+2:
+ ret
+ restore
diff --git a/mdk-stage1/dietlibc/sparc/urem.S b/mdk-stage1/dietlibc/sparc/urem.S
new file mode 100644
index 000000000..943cb7873
--- /dev/null
+++ b/mdk-stage1/dietlibc/sparc/urem.S
@@ -0,0 +1,362 @@
+#ifdef __sparc__
+ /* This file is generated from divrem.m4; DO NOT EDIT! */
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ * .urem name of function to generate
+ * rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1
+ * false false=true => signed; false=false => unsigned
+ *
+ * Algorithm parameters:
+ * N how many bits per iteration we try to get (4)
+ * WORDSIZE total number of bits (32)
+ *
+ * Derived constants:
+ * TOPBITS number of bits in the top decade of a number
+ *
+ * Important variables:
+ * Q the partial quotient under development (initially 0)
+ * R the remainder so far, initially the dividend
+ * ITER number of main division loop iterations required;
+ * equal to ceil(log2(quotient) / N). Note that this
+ * is the log base (2^N) of the quotient.
+ * V the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ * Current estimate for non-large dividend is
+ * ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ * A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ * different path, as the upper bits of the quotient must be developed
+ * one bit at a time.
+ */
+
+
+
+#define C_LABEL(name) name:
+
+#define C_SYMBOL_NAME(name) name
+
+#define ENTRY(name) \
+ .global C_SYMBOL_NAME(name); \
+ .align 4;\
+ C_LABEL(name);\
+ .type name,@function;
+
+#define LOC(name) . ## L ## name
+
+#define END(name) \
+ .size name, . - name
+
+#define ST_DIV0 0x02
+
+ENTRY(.urem)
+
+ ! Ready to divide. Compute size of quotient; scale comparand.
+ orcc %o1, %g0, %o5
+ bne 1f
+ mov %o0, %o3
+
+ ! Divide by zero trap. If it returns, return 0 (about as
+ ! wrong as possible, but that is what SunOS does...).
+ ta ST_DIV0
+ retl
+ clr %o0
+
+1:
+ cmp %o3, %o5 ! if %o1 exceeds %o0, done
+ blu LOC(got_result) ! (and algorithm fails otherwise)
+ clr %o2
+ sethi %hi(1 << (32 - 4 - 1)), %g1
+ cmp %o3, %g1
+ blu LOC(not_really_big)
+ clr %o4
+
+ ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
+ ! as our usual N-at-a-shot divide step will cause overflow and havoc.
+ ! The number of bits in the result here is N*ITER+SC, where SC <= N.
+ ! Compute ITER in an unorthodox manner: know we need to shift V into
+ ! the top decade: so do not even bother to compare to R.
+ 1:
+ cmp %o5, %g1
+ bgeu 3f
+ mov 1, %g2
+ sll %o5, 4, %o5
+ b 1b
+ add %o4, 1, %o4
+
+ ! Now compute %g2.
+ 2: addcc %o5, %o5, %o5
+ bcc LOC(not_too_big)
+ add %g2, 1, %g2
+
+ ! We get here if the %o1 overflowed while shifting.
+ ! This means that %o3 has the high-order bit set.
+ ! Restore %o5 and subtract from %o3.
+ sll %g1, 4, %g1 ! high order bit
+ srl %o5, 1, %o5 ! rest of %o5
+ add %o5, %g1, %o5
+ b LOC(do_single_div)
+ sub %g2, 1, %g2
+
+ LOC(not_too_big):
+ 3: cmp %o5, %o3
+ blu 2b
+ nop
+ be LOC(do_single_div)
+ nop
+ /* NB: these are commented out in the V8-Sparc manual as well */
+ /* (I do not understand this) */
+ ! %o5 > %o3: went too far: back up 1 step
+ ! srl %o5, 1, %o5
+ ! dec %g2
+ ! do single-bit divide steps
+ !
+ ! We have to be careful here. We know that %o3 >= %o5, so we can do the
+ ! first divide step without thinking. BUT, the others are conditional,
+ ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
+ ! order bit set in the first step, just falling into the regular
+ ! division loop will mess up the first time around.
+ ! So we unroll slightly...
+ LOC(do_single_div):
+ subcc %g2, 1, %g2
+ bl LOC(end_regular_divide)
+ nop
+ sub %o3, %o5, %o3
+ mov 1, %o2
+ b LOC(end_single_divloop)
+ nop
+ LOC(single_divloop):
+ sll %o2, 1, %o2
+ bl 1f
+ srl %o5, 1, %o5
+ ! %o3 >= 0
+ sub %o3, %o5, %o3
+ b 2f
+ add %o2, 1, %o2
+ 1: ! %o3 < 0
+ add %o3, %o5, %o3
+ sub %o2, 1, %o2
+ 2:
+ LOC(end_single_divloop):
+ subcc %g2, 1, %g2
+ bge LOC(single_divloop)
+ tst %o3
+ b,a LOC(end_regular_divide)
+
+LOC(not_really_big):
+1:
+ sll %o5, 4, %o5
+ cmp %o5, %o3
+ bleu 1b
+ addcc %o4, 1, %o4
+ be LOC(got_result)
+ sub %o4, 1, %o4
+
+ tst %o3 ! set up for initial iteration
+LOC(divloop):
+ sll %o2, 4, %o2
+ ! depth 1, accumulated bits 0
+ bl LOC(1.16)
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 2, accumulated bits 1
+ bl LOC(2.17)
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 3
+ bl LOC(3.19)
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 7
+ bl LOC(4.23)
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2+1), %o2
+
+LOC(4.23):
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (7*2-1), %o2
+
+
+LOC(3.19):
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 5
+ bl LOC(4.21)
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2+1), %o2
+
+LOC(4.21):
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (5*2-1), %o2
+
+
+
+LOC(2.17):
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits 1
+ bl LOC(3.17)
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 3
+ bl LOC(4.19)
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2+1), %o2
+
+LOC(4.19):
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (3*2-1), %o2
+
+
+LOC(3.17):
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits 1
+ bl LOC(4.17)
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2+1), %o2
+
+LOC(4.17):
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (1*2-1), %o2
+
+
+
+
+LOC(1.16):
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 2, accumulated bits -1
+ bl LOC(2.15)
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -1
+ bl LOC(3.15)
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -1
+ bl LOC(4.15)
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2+1), %o2
+
+LOC(4.15):
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-1*2-1), %o2
+
+
+LOC(3.15):
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -3
+ bl LOC(4.13)
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2+1), %o2
+
+LOC(4.13):
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-3*2-1), %o2
+
+
+
+LOC(2.15):
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 3, accumulated bits -3
+ bl LOC(3.13)
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -5
+ bl LOC(4.11)
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2+1), %o2
+
+LOC(4.11):
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-5*2-1), %o2
+
+
+LOC(3.13):
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ ! depth 4, accumulated bits -7
+ bl LOC(4.9)
+ srl %o5,1,%o5
+ ! remainder is positive
+ subcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2+1), %o2
+
+LOC(4.9):
+ ! remainder is negative
+ addcc %o3,%o5,%o3
+ b 9f
+ add %o2, (-7*2-1), %o2
+
+
+
+
+ 9:
+LOC(end_regular_divide):
+ subcc %o4, 1, %o4
+ bge LOC(divloop)
+ tst %o3
+ bl,a LOC(got_result)
+ ! non-restoring fixup here (one instruction only!)
+ add %o3, %o1, %o3
+
+
+LOC(got_result):
+
+ retl
+ mov %o3, %o0
+
+END(.urem)
+#endif