headers/footers.
! SPARC __mpn_add_n -- Add two limb vectors of the same length > 0 and store
! sum in a third limb vector.
-
-! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
-
+!
+! Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
+!
! This file is part of the GNU MP Library.
-
+!
! The GNU MP Library is free software; you can redistribute it and/or modify
! it under the terms of the GNU Library General Public License as published by
! the Free Software Foundation; either version 2 of the License, or (at your
! option) any later version.
-
+!
! The GNU MP Library is distributed in the hope that it will be useful, but
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! License for more details.
-
+!
! You should have received a copy of the GNU Library General Public License
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! INPUT PARAMETERS
-#define res_ptr %o0
-#define s1_ptr %o1
-#define s2_ptr %o2
-#define size %o3
+#define RES_PTR %o0
+#define S1_PTR %o1
+#define S2_PTR %o2
+#define SIZE %o3
-#include "sysdep.h"
+#include <sysdep.h>
- .text
- .align 4
- .global C_SYMBOL_NAME(__mpn_add_n)
-C_SYMBOL_NAME(__mpn_add_n):
- xor s2_ptr,res_ptr,%g1
+ENTRY(__mpn_add_n)
+ xor S2_PTR,RES_PTR,%g1
andcc %g1,4,%g0
- bne L1 ! branch if alignment differs
- nop
+ bne LOC(1) ! branch if alignment differs
+ nop
! ** V1a **
-L0: andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0
- be L_v1 ! if no, branch
- nop
-/* Add least significant limb separately to align res_ptr and s2_ptr */
- ld [s1_ptr],%g4
- add s1_ptr,4,s1_ptr
- ld [s2_ptr],%g2
- add s2_ptr,4,s2_ptr
- add size,-1,size
+LOC(0): andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0
+ be LOC(v1) ! if no, branch
+ nop
+/* Add least significant limb separately to align RES_PTR and S2_PTR */
+ ld [S1_PTR],%g4
+ add S1_PTR,4,S1_PTR
+ ld [S2_PTR],%g2
+ add S2_PTR,4,S2_PTR
+ add SIZE,-1,SIZE
addcc %g4,%g2,%o4
- st %o4,[res_ptr]
- add res_ptr,4,res_ptr
-L_v1: addx %g0,%g0,%o4 ! save cy in register
- cmp size,2 ! if size < 2 ...
- bl Lend2 ! ... branch to tail code
+ st %o4,[RES_PTR]
+ add RES_PTR,4,RES_PTR
+LOC(v1):
+ addx %g0,%g0,%o4 ! save cy in register
+ cmp SIZE,2 ! if SIZE < 2 ...
+ bl LOC(end2) ! ... branch to tail code
subcc %g0,%o4,%g0 ! restore cy
- ld [s1_ptr+0],%g4
- addcc size,-10,size
- ld [s1_ptr+4],%g1
- ldd [s2_ptr+0],%g2
- blt Lfin1
+ ld [S1_PTR+0],%g4
+ addcc SIZE,-10,SIZE
+ ld [S1_PTR+4],%g1
+ ldd [S2_PTR+0],%g2
+ blt LOC(fin1)
subcc %g0,%o4,%g0 ! restore cy
/* Add blocks of 8 limbs until less than 8 limbs remain */
-Loop1: addxcc %g4,%g2,%o4
- ld [s1_ptr+8],%g4
+LOC(loop1):
+ addxcc %g4,%g2,%o4
+ ld [S1_PTR+8],%g4
addxcc %g1,%g3,%o5
- ld [s1_ptr+12],%g1
- ldd [s2_ptr+8],%g2
- std %o4,[res_ptr+0]
+ ld [S1_PTR+12],%g1
+ ldd [S2_PTR+8],%g2
+ std %o4,[RES_PTR+0]
addxcc %g4,%g2,%o4
- ld [s1_ptr+16],%g4
+ ld [S1_PTR+16],%g4
addxcc %g1,%g3,%o5
- ld [s1_ptr+20],%g1
- ldd [s2_ptr+16],%g2
- std %o4,[res_ptr+8]
+ ld [S1_PTR+20],%g1
+ ldd [S2_PTR+16],%g2
+ std %o4,[RES_PTR+8]
addxcc %g4,%g2,%o4
- ld [s1_ptr+24],%g4
+ ld [S1_PTR+24],%g4
addxcc %g1,%g3,%o5
- ld [s1_ptr+28],%g1
- ldd [s2_ptr+24],%g2
- std %o4,[res_ptr+16]
+ ld [S1_PTR+28],%g1
+ ldd [S2_PTR+24],%g2
+ std %o4,[RES_PTR+16]
addxcc %g4,%g2,%o4
- ld [s1_ptr+32],%g4
+ ld [S1_PTR+32],%g4
addxcc %g1,%g3,%o5
- ld [s1_ptr+36],%g1
- ldd [s2_ptr+32],%g2
- std %o4,[res_ptr+24]
+ ld [S1_PTR+36],%g1
+ ldd [S2_PTR+32],%g2
+ std %o4,[RES_PTR+24]
addx %g0,%g0,%o4 ! save cy in register
- addcc size,-8,size
- add s1_ptr,32,s1_ptr
- add s2_ptr,32,s2_ptr
- add res_ptr,32,res_ptr
- bge Loop1
+ addcc SIZE,-8,SIZE
+ add S1_PTR,32,S1_PTR
+ add S2_PTR,32,S2_PTR
+ add RES_PTR,32,RES_PTR
+ bge LOC(loop1)
subcc %g0,%o4,%g0 ! restore cy
-Lfin1: addcc size,8-2,size
- blt Lend1
+LOC(fin1):
+ addcc SIZE,8-2,SIZE
+ blt LOC(end1)
subcc %g0,%o4,%g0 ! restore cy
/* Add blocks of 2 limbs until less than 2 limbs remain */
-Loope1: addxcc %g4,%g2,%o4
- ld [s1_ptr+8],%g4
+LOC(loope1):
+ addxcc %g4,%g2,%o4
+ ld [S1_PTR+8],%g4
addxcc %g1,%g3,%o5
- ld [s1_ptr+12],%g1
- ldd [s2_ptr+8],%g2
- std %o4,[res_ptr+0]
+ ld [S1_PTR+12],%g1
+ ldd [S2_PTR+8],%g2
+ std %o4,[RES_PTR+0]
addx %g0,%g0,%o4 ! save cy in register
- addcc size,-2,size
- add s1_ptr,8,s1_ptr
- add s2_ptr,8,s2_ptr
- add res_ptr,8,res_ptr
- bge Loope1
+ addcc SIZE,-2,SIZE
+ add S1_PTR,8,S1_PTR
+ add S2_PTR,8,S2_PTR
+ add RES_PTR,8,RES_PTR
+ bge LOC(loope1)
subcc %g0,%o4,%g0 ! restore cy
-Lend1: addxcc %g4,%g2,%o4
+LOC(end1):
+ addxcc %g4,%g2,%o4
addxcc %g1,%g3,%o5
- std %o4,[res_ptr+0]
+ std %o4,[RES_PTR+0]
addx %g0,%g0,%o4 ! save cy in register
- andcc size,1,%g0
- be Lret1
+ andcc SIZE,1,%g0
+ be LOC(ret1)
subcc %g0,%o4,%g0 ! restore cy
/* Add last limb */
- ld [s1_ptr+8],%g4
- ld [s2_ptr+8],%g2
+ ld [S1_PTR+8],%g4
+ ld [S2_PTR+8],%g2
addxcc %g4,%g2,%o4
- st %o4,[res_ptr+8]
+ st %o4,[RES_PTR+8]
-Lret1: retl
+LOC(ret1):
+ retl
addx %g0,%g0,%o0 ! return carry-out from most sign. limb
-L1: xor s1_ptr,res_ptr,%g1
+LOC(1): xor S1_PTR,RES_PTR,%g1
andcc %g1,4,%g0
- bne L2
+ bne LOC(2)
nop
! ** V1b **
- mov s2_ptr,%g1
- mov s1_ptr,s2_ptr
- b L0
- mov %g1,s1_ptr
+ mov S2_PTR,%g1
+ mov S1_PTR,S2_PTR
+ b LOC(0)
+ mov %g1,S1_PTR
! ** V2 **
-/* If we come here, the alignment of s1_ptr and res_ptr as well as the
- alignment of s2_ptr and res_ptr differ. Since there are only two ways
+/* If we come here, the alignment of S1_PTR and RES_PTR as well as the
+ alignment of S2_PTR and RES_PTR differ. Since there are only two ways
things can be aligned (that we care about) we now know that the alignment
- of s1_ptr and s2_ptr are the same. */
+ of S1_PTR and S2_PTR are the same. */
-L2: cmp size,1
- be Ljone
+LOC(2): cmp SIZE,1
+ be LOC(jone)
nop
- andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0
- be L_v2 ! if no, branch
+ andcc S1_PTR,4,%g0 ! S1_PTR unaligned? Side effect: cy=0
+ be LOC(v2) ! if no, branch
nop
-/* Add least significant limb separately to align s1_ptr and s2_ptr */
- ld [s1_ptr],%g4
- add s1_ptr,4,s1_ptr
- ld [s2_ptr],%g2
- add s2_ptr,4,s2_ptr
- add size,-1,size
+/* Add least significant limb separately to align S1_PTR and S2_PTR */
+ ld [S1_PTR],%g4
+ add S1_PTR,4,S1_PTR
+ ld [S2_PTR],%g2
+ add S2_PTR,4,S2_PTR
+ add SIZE,-1,SIZE
addcc %g4,%g2,%o4
- st %o4,[res_ptr]
- add res_ptr,4,res_ptr
+ st %o4,[RES_PTR]
+ add RES_PTR,4,RES_PTR
-L_v2: addx %g0,%g0,%o4 ! save cy in register
- addcc size,-8,size
- blt Lfin2
+LOC(v2):
+ addx %g0,%g0,%o4 ! save cy in register
+ addcc SIZE,-8,SIZE
+ blt LOC(fin2)
subcc %g0,%o4,%g0 ! restore cy
/* Add blocks of 8 limbs until less than 8 limbs remain */
-Loop2: ldd [s1_ptr+0],%g2
- ldd [s2_ptr+0],%o4
+LOC(loop2):
+ ldd [S1_PTR+0],%g2
+ ldd [S2_PTR+0],%o4
addxcc %g2,%o4,%g2
- st %g2,[res_ptr+0]
+ st %g2,[RES_PTR+0]
addxcc %g3,%o5,%g3
- st %g3,[res_ptr+4]
- ldd [s1_ptr+8],%g2
- ldd [s2_ptr+8],%o4
+ st %g3,[RES_PTR+4]
+ ldd [S1_PTR+8],%g2
+ ldd [S2_PTR+8],%o4
addxcc %g2,%o4,%g2
- st %g2,[res_ptr+8]
+ st %g2,[RES_PTR+8]
addxcc %g3,%o5,%g3
- st %g3,[res_ptr+12]
- ldd [s1_ptr+16],%g2
- ldd [s2_ptr+16],%o4
+ st %g3,[RES_PTR+12]
+ ldd [S1_PTR+16],%g2
+ ldd [S2_PTR+16],%o4
addxcc %g2,%o4,%g2
- st %g2,[res_ptr+16]
+ st %g2,[RES_PTR+16]
addxcc %g3,%o5,%g3
- st %g3,[res_ptr+20]
- ldd [s1_ptr+24],%g2
- ldd [s2_ptr+24],%o4
+ st %g3,[RES_PTR+20]
+ ldd [S1_PTR+24],%g2
+ ldd [S2_PTR+24],%o4
addxcc %g2,%o4,%g2
- st %g2,[res_ptr+24]
+ st %g2,[RES_PTR+24]
addxcc %g3,%o5,%g3
- st %g3,[res_ptr+28]
+ st %g3,[RES_PTR+28]
addx %g0,%g0,%o4 ! save cy in register
- addcc size,-8,size
- add s1_ptr,32,s1_ptr
- add s2_ptr,32,s2_ptr
- add res_ptr,32,res_ptr
- bge Loop2
+ addcc SIZE,-8,SIZE
+ add S1_PTR,32,S1_PTR
+ add S2_PTR,32,S2_PTR
+ add RES_PTR,32,RES_PTR
+ bge LOC(loop2)
subcc %g0,%o4,%g0 ! restore cy
-Lfin2: addcc size,8-2,size
- blt Lend2
+LOC(fin2):
+ addcc SIZE,8-2,SIZE
+ blt LOC(end2)
subcc %g0,%o4,%g0 ! restore cy
-Loope2: ldd [s1_ptr+0],%g2
- ldd [s2_ptr+0],%o4
+LOC(loope2):
+ ldd [S1_PTR+0],%g2
+ ldd [S2_PTR+0],%o4
addxcc %g2,%o4,%g2
- st %g2,[res_ptr+0]
+ st %g2,[RES_PTR+0]
addxcc %g3,%o5,%g3
- st %g3,[res_ptr+4]
+ st %g3,[RES_PTR+4]
addx %g0,%g0,%o4 ! save cy in register
- addcc size,-2,size
- add s1_ptr,8,s1_ptr
- add s2_ptr,8,s2_ptr
- add res_ptr,8,res_ptr
- bge Loope2
+ addcc SIZE,-2,SIZE
+ add S1_PTR,8,S1_PTR
+ add S2_PTR,8,S2_PTR
+ add RES_PTR,8,RES_PTR
+ bge LOC(loope2)
subcc %g0,%o4,%g0 ! restore cy
-Lend2: andcc size,1,%g0
- be Lret2
+LOC(end2):
+ andcc SIZE,1,%g0
+ be LOC(ret2)
subcc %g0,%o4,%g0 ! restore cy
/* Add last limb */
-Ljone: ld [s1_ptr],%g4
- ld [s2_ptr],%g2
+LOC(jone):
+ ld [S1_PTR],%g4
+ ld [S2_PTR],%g2
addxcc %g4,%g2,%o4
- st %o4,[res_ptr]
+ st %o4,[RES_PTR]
-Lret2: retl
+LOC(ret2):
+ retl
addx %g0,%g0,%o0 ! return carry-out from most sign. limb
+
+END(__mpn_add_n)
! SPARC __mpn_addmul_1 -- Multiply a limb vector with a limb and add
! the result to a second limb vector.
-
-! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
-
+!
+! Copyright (C) 1992, 1993, 1994, 1997 Free Software Foundation, Inc.
+!
! This file is part of the GNU MP Library.
-
+!
! The GNU MP Library is free software; you can redistribute it and/or modify
! it under the terms of the GNU Library General Public License as published by
! the Free Software Foundation; either version 2 of the License, or (at your
! option) any later version.
-
+!
! The GNU MP Library is distributed in the hope that it will be useful, but
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! License for more details.
-
+!
! You should have received a copy of the GNU Library General Public License
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! INPUT PARAMETERS
-! res_ptr o0
-! s1_ptr o1
-! size o2
-! s2_limb o3
+! RES_PTR o0
+! S1_PTR o1
+! SIZE o2
+! S2_LIMB o3
-#include "sysdep.h"
+#include <sysdep.h>
-.text
- .align 4
- .global C_SYMBOL_NAME(__mpn_addmul_1)
-C_SYMBOL_NAME(__mpn_addmul_1):
+ENTRY(__mpn_addmul_1)
! Make S1_PTR and RES_PTR point at the end of their blocks
! and put (- 4 x SIZE) in index/loop counter.
sll %o2,2,%o2
sub %g0,%o2,%o2
cmp %o3,0xfff
- bgu Large
+ bgu LOC(large)
nop
ld [%o1+%o2],%o5
mov 0,%o0
- b L0
+ b LOC(0)
add %o4,-4,%o4
-Loop0:
+LOC(loop0):
addcc %o5,%g1,%g1
ld [%o1+%o2],%o5
addx %o0,%g0,%o0
st %g1,[%o4+%o2]
-L0: wr %g0,%o3,%y
+LOC(0): wr %g0,%o3,%y
sra %o5,31,%g2
and %o3,%g2,%g2
andcc %g1,0,%g1
addcc %g1,%o0,%g1
addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb
addcc %o2,4,%o2 ! loop counter
- bne Loop0
+ bne LOC(loop0)
ld [%o4+%o2],%o5
addcc %o5,%g1,%g1
st %g1,[%o4+%o2]
-Large: ld [%o1+%o2],%o5
+LOC(large):
+ ld [%o1+%o2],%o5
mov 0,%o0
sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0
- b L1
+ b LOC(1)
add %o4,-4,%o4
-Loop:
+LOC(loop):
addcc %o5,%g3,%g3
ld [%o1+%o2],%o5
addx %o0,%g0,%o0
st %g3,[%o4+%o2]
-L1: wr %g0,%o5,%y
+LOC(1): wr %g0,%o5,%y
and %o5,%g4,%g2
andcc %g0,%g0,%g1
mulscc %g1,%o3,%g1
addcc %g3,%o0,%g3
addx %g2,%g1,%o0
addcc %o2,4,%o2
- bne Loop
+ bne LOC(loop)
ld [%o4+%o2],%o5
addcc %o5,%g3,%g3
addx %o0,%g0,%o0
retl
st %g3,[%o4+%o2]
+
+END(__mpn_addmul_1)
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
-#include "sysdep.h"
+#include <sysdep.h>
/* Code produced by Sun's C compiler calls this function with two extra
arguments which it makes relocatable symbols but seem always to be
sub %sp, %o0, %sp /* Push some stack space. */
retl /* Return; the returned buffer leaves 96 */
add %sp, 96, %o0 /* bytes of register save area at the top. */
+END (__builtin_alloca)
dnl
dnl m4 reminder: ifelse(a,b,c,d) => if a is b, then c, else d
define(T, `%g1')dnl
-define(SC, `%g7')dnl
-ifelse(S, `true', `define(SIGN, `%g6')')dnl
+define(SC, `%g2')dnl
+ifelse(S, `true', `define(SIGN, `%g3')')dnl
dnl
dnl This is the recursive definition for developing quotient digits.
dnl
define(DEVELOP_QUOTIENT_BITS,
` ! depth $1, accumulated bits $2
- bl L.$1.eval(2**N+$2)
+ bl LOC($1.eval(2**N+$2))
srl V,1,V
! remainder is positive
subcc R,V,R
` b 9f
add Q, ($2*2+1), Q
', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2+1)')')
-L.$1.eval(2**N+$2):
+LOC($1.eval(2**N+$2)):
! remainder is negative
addcc R,V,R
ifelse($1, N,
', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2-1)')')
ifelse($1, 1, `9:')')dnl
-#include "sysdep.h"
-#ifdef __linux__
-#include <asm/traps.h>
-#else
-#ifdef __svr4__
+#include <sysdep.h>
#include <sys/trap.h>
-#else
-#include <machine/trap.h>
-#endif
-#endif
-FUNC(NAME)
+ENTRY(NAME)
ifelse(S, `true',
` ! compute sign of result; if neither is negative, no problem
orcc divisor, dividend, %g0 ! either negative?
1:
cmp R, V ! if divisor exceeds dividend, done
- blu Lgot_result ! (and algorithm fails otherwise)
+ blu LOC(got_result) ! (and algorithm fails otherwise)
clr Q
sethi %hi(1 << (WORDSIZE - TOPBITS - 1)), T
cmp R, T
- blu Lnot_really_big
+ blu LOC(not_really_big)
clr ITER
! `Here the dividend is >= 2**(31-N) or so. We must be careful here,
! Now compute SC.
2: addcc V, V, V
- bcc Lnot_too_big
+ bcc LOC(not_too_big)
add SC, 1, SC
! We get here if the divisor overflowed while shifting.
sll T, TOPBITS, T ! high order bit
srl V, 1, V ! rest of V
add V, T, V
- b Ldo_single_div
+ b LOC(do_single_div)
sub SC, 1, SC
- Lnot_too_big:
+ LOC(not_too_big):
3: cmp V, R
blu 2b
nop
- be Ldo_single_div
+ be LOC(do_single_div)
nop
/* NB: these are commented out in the V8-Sparc manual as well */
/* (I do not understand this) */
! order bit set in the first step, just falling into the regular
! division loop will mess up the first time around.
! So we unroll slightly...
- Ldo_single_div:
+ LOC(do_single_div):
subcc SC, 1, SC
- bl Lend_regular_divide
+ bl LOC(end_regular_divide)
nop
sub R, V, R
mov 1, Q
- b Lend_single_divloop
+ b LOC(end_single_divloop)
nop
- Lsingle_divloop:
+ LOC(single_divloop):
sll Q, 1, Q
bl 1f
srl V, 1, V
add R, V, R
sub Q, 1, Q
2:
- Lend_single_divloop:
+ LOC(end_single_divloop):
subcc SC, 1, SC
- bge Lsingle_divloop
+ bge LOC(single_divloop)
tst R
- b,a Lend_regular_divide
+ b,a LOC(end_regular_divide)
-Lnot_really_big:
+LOC(not_really_big):
1:
sll V, N, V
cmp V, R
bleu 1b
addcc ITER, 1, ITER
- be Lgot_result
+ be LOC(got_result)
sub ITER, 1, ITER
tst R ! set up for initial iteration
-Ldivloop:
+LOC(divloop):
sll Q, N, Q
DEVELOP_QUOTIENT_BITS(1, 0)
-Lend_regular_divide:
+LOC(end_regular_divide):
subcc ITER, 1, ITER
- bge Ldivloop
+ bge LOC(divloop)
tst R
- bl,a Lgot_result
+ bl,a LOC(got_result)
! non-restoring fixup here (one instruction only!)
ifelse(OP, `div',
` sub Q, 1, Q
', ` add R, divisor, R
')
-Lgot_result:
+LOC(got_result):
ifelse(S, `true',
` ! check to see if answer should be < 0
tst SIGN
1:')
retl
ifelse(OP, `div', `mov Q, %o0', `mov R, %o0')
+
+END(NAME)
* This code optimizes short (less than 13-bit) multiplies.
*/
-#include "sysdep.h"
+#include <sysdep.h>
+
+
ENTRY(.mul)
mov %o0, %y ! multiplier -> Y
andncc %o0, 0xfff, %g0 ! test bits 12..31
- be Lmul_shortway ! if zero, can do it the short way
+ be LOC(mul_shortway) ! if zero, can do it the short way
andcc %g0, %g0, %o4 ! zero the partial product and clear N and V
/*
! and put upper half in place
#endif
-Lmul_shortway:
+LOC(mul_shortway):
/*
* Short multiply. 12 steps, followed by a final shift step.
* The resulting bits are off by 12 and (32-12) = 20 bit positions,
or %o5, %o0, %o0 ! construct low part of result
retl
sra %o4, 20, %o1 ! ... and extract high part of result
+
+END(.mul)
-! sparc __mpn_lshift --
-
-! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
-
+! Sparc __mpn_lshift --
+!
+! Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
+!
! This file is part of the GNU MP Library.
-
+!
! The GNU MP Library is free software; you can redistribute it and/or modify
! it under the terms of the GNU Library General Public License as published by
! the Free Software Foundation; either version 2 of the License, or (at your
! option) any later version.
-
+!
! The GNU MP Library is distributed in the hope that it will be useful, but
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! License for more details.
-
+!
! You should have received a copy of the GNU Library General Public License
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! INPUT PARAMETERS
-! res_ptr %o0
-! src_ptr %o1
-! size %o2
-! cnt %o3
+! RES_PTR %o0
+! SRC_PTR %o1
+! SIZE %o2
+! CNT %o3
-#include "sysdep.h"
+#include <sysdep.h>
- .text
- .align 4
- .global C_SYMBOL_NAME(__mpn_lshift)
-C_SYMBOL_NAME(__mpn_lshift):
+ENTRY(__mpn_lshift)
sll %o2,2,%g1
add %o1,%g1,%o1 ! make %o1 point at end of src
ld [%o1-4],%g2 ! load first limb
add %o2,-1,%o2
andcc %o2,4-1,%g4 ! number of limbs in first loop
srl %g2,%o5,%g1 ! compute function result
- be L0 ! if multiple of 4 limbs, skip first loop
+ be LOC(0) ! if multiple of 4 limbs, skip first loop
st %g1,[%sp+80]
sub %o2,%g4,%o2 ! adjust count for main loop
-Loop0: ld [%o1-8],%g3
+LOC(loop0):
+ ld [%o1-8],%g3
add %o0,-4,%o0
add %o1,-4,%o1
addcc %g4,-1,%g4
srl %g3,%o5,%g1
mov %g3,%g2
or %o4,%g1,%o4
- bne Loop0
+ bne LOC(loop0)
st %o4,[%o0+0]
-L0: tst %o2
- be Lend
+LOC(0): tst %o2
+ be LOC(end)
nop
-Loop: ld [%o1-8],%g3
+LOC(loop):
+ ld [%o1-8],%g3
add %o0,-16,%o0
addcc %o2,-4,%o2
sll %g2,%o3,%o4
add %o1,-16,%o1
or %g4,%g1,%g4
- bne Loop
+ bne LOC(loop)
st %g4,[%o0+0]
-Lend: sll %g2,%o3,%g2
+LOC(end):
+ sll %g2,%o3,%g2
st %g2,[%o0-4]
retl
ld [%sp+80],%o0
+
+END(__mpn_lshift)
! SPARC __mpn_mul_1 -- Multiply a limb vector with a limb and store
! the result in a second limb vector.
-
-! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
-
+!
+! Copyright (C) 1992, 1993, 1994, 1997 Free Software Foundation, Inc.
+!
! This file is part of the GNU MP Library.
-
+!
! The GNU MP Library is free software; you can redistribute it and/or modify
! it under the terms of the GNU Library General Public License as published by
! the Free Software Foundation; either version 2 of the License, or (at your
! option) any later version.
-
+!
! The GNU MP Library is distributed in the hope that it will be useful, but
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! License for more details.
-
+!
! You should have received a copy of the GNU Library General Public License
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! INPUT PARAMETERS
-! res_ptr o0
-! s1_ptr o1
-! size o2
-! s2_limb o3
+! RES_PTR o0
+! S1_PTR o1
+! SIZE o2
+! S2_LIMB o3
! ADD CODE FOR SMALL MULTIPLIERS!
!1: ld
! sll a,29,y2
! st x,
-#include "sysdep.h"
+#include <sysdep.h>
-.text
- .align 4
- .global C_SYMBOL_NAME(__mpn_mul_1)
-C_SYMBOL_NAME(__mpn_mul_1):
+ENTRY(__mpn_mul_1)
! Make S1_PTR and RES_PTR point at the end of their blocks
! and put (- 4 x SIZE) in index/loop counter.
sll %o2,2,%o2
sub %g0,%o2,%o2
cmp %o3,0xfff
- bgu Large
+ bgu LOC(large)
nop
ld [%o1+%o2],%o5
mov 0,%o0
- b L0
+ b LOC(0)
add %o4,-4,%o4
-Loop0:
+LOC(loop0):
st %g1,[%o4+%o2]
-L0: wr %g0,%o3,%y
+LOC(0): wr %g0,%o3,%y
sra %o5,31,%g2
and %o3,%g2,%g2
andcc %g1,0,%g1
addcc %g1,%o0,%g1
addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb
addcc %o2,4,%o2 ! loop counter
- bne,a Loop0
+ bne,a LOC(loop0)
ld [%o1+%o2],%o5
retl
st %g1,[%o4+%o2]
-Large: ld [%o1+%o2],%o5
+LOC(large):
+ ld [%o1+%o2],%o5
mov 0,%o0
sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0
- b L1
+ b LOC(1)
add %o4,-4,%o4
-Loop:
+LOC(loop):
st %g3,[%o4+%o2]
-L1: wr %g0,%o5,%y
+LOC(1): wr %g0,%o5,%y
and %o5,%g4,%g2 ! g2 = S1_LIMB iff S2_LIMB < 0, else 0
andcc %g0,%g0,%g1
mulscc %g1,%o3,%g1
addcc %g3,%o0,%g3
addx %g2,%g1,%o0 ! add sign-compensation and cy to hi limb
addcc %o2,4,%o2 ! loop counter
- bne,a Loop
+ bne,a LOC(loop)
ld [%o1+%o2],%o5
retl
st %g3,[%o4+%o2]
+
+END(__mpn_mul_1)
-#include "sysdep.h"
-#ifdef __linux__
-#include <asm/traps.h>
-#else
-#ifdef __svr4__
+#include <sysdep.h>
#include <sys/trap.h>
-#else
-#include <machine/trap.h>
-#endif
-#endif
ENTRY(.rem)
! compute sign of result; if neither is negative, no problem
orcc %o1, %o0, %g0 ! either negative?
bge 2f ! no, go do the divide
- mov %o0, %g6 ! sign of remainder matches %o0
+ mov %o0, %g3 ! sign of remainder matches %o0
tst %o1
bge 1f
tst %o0
1:
cmp %o3, %o5 ! if %o1 exceeds %o0, done
- blu Lgot_result ! (and algorithm fails otherwise)
+ blu LOC(got_result) ! (and algorithm fails otherwise)
clr %o2
sethi %hi(1 << (32 - 4 - 1)), %g1
cmp %o3, %g1
- blu Lnot_really_big
+ blu LOC(not_really_big)
clr %o4
! Here the dividend is >= 2**(31-N) or so. We must be careful here,
1:
cmp %o5, %g1
bgeu 3f
- mov 1, %g7
+ mov 1, %g2
sll %o5, 4, %o5
b 1b
add %o4, 1, %o4
- ! Now compute %g7.
+ ! Now compute %g2.
2: addcc %o5, %o5, %o5
- bcc Lnot_too_big
- add %g7, 1, %g7
+ bcc LOC(not_too_big)
+ add %g2, 1, %g2
! We get here if the %o1 overflowed while shifting.
! This means that %o3 has the high-order bit set.
sll %g1, 4, %g1 ! high order bit
srl %o5, 1, %o5 ! rest of %o5
add %o5, %g1, %o5
- b Ldo_single_div
- sub %g7, 1, %g7
+ b LOC(do_single_div)
+ sub %g2, 1, %g2
- Lnot_too_big:
+ LOC(not_too_big):
3: cmp %o5, %o3
blu 2b
nop
- be Ldo_single_div
+ be LOC(do_single_div)
nop
/* NB: these are commented out in the V8-Sparc manual as well */
/* (I do not understand this) */
! %o5 > %o3: went too far: back up 1 step
! srl %o5, 1, %o5
- ! dec %g7
+ ! dec %g2
! do single-bit divide steps
!
! We have to be careful here. We know that %o3 >= %o5, so we can do the
! order bit set in the first step, just falling into the regular
! division loop will mess up the first time around.
! So we unroll slightly...
- Ldo_single_div:
- subcc %g7, 1, %g7
- bl Lend_regular_divide
+ LOC(do_single_div):
+ subcc %g2, 1, %g2
+ bl LOC(end_regular_divide)
nop
sub %o3, %o5, %o3
mov 1, %o2
- b Lend_single_divloop
+ b LOC(end_single_divloop)
nop
- Lsingle_divloop:
+ LOC(single_divloop):
sll %o2, 1, %o2
bl 1f
srl %o5, 1, %o5
add %o3, %o5, %o3
sub %o2, 1, %o2
2:
- Lend_single_divloop:
- subcc %g7, 1, %g7
- bge Lsingle_divloop
+ LOC(end_single_divloop):
+ subcc %g2, 1, %g2
+ bge LOC(single_divloop)
tst %o3
- b,a Lend_regular_divide
+ b,a LOC(end_regular_divide)
-Lnot_really_big:
+LOC(not_really_big):
1:
sll %o5, 4, %o5
cmp %o5, %o3
bleu 1b
addcc %o4, 1, %o4
- be Lgot_result
+ be LOC(got_result)
sub %o4, 1, %o4
tst %o3 ! set up for initial iteration
-Ldivloop:
+LOC(divloop):
sll %o2, 4, %o2
! depth 1, accumulated bits 0
- bl L.1.16
+ bl LOC(1.16)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 2, accumulated bits 1
- bl L.2.17
+ bl LOC(2.17)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 3, accumulated bits 3
- bl L.3.19
+ bl LOC(3.19)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits 7
- bl L.4.23
+ bl LOC(4.23)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (7*2+1), %o2
-
-L.4.23:
+
+LOC(4.23):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (7*2-1), %o2
-
-
-L.3.19:
+
+
+LOC(3.19):
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits 5
- bl L.4.21
+ bl LOC(4.21)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (5*2+1), %o2
-
-L.4.21:
+
+LOC(4.21):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (5*2-1), %o2
-
-
-
-L.2.17:
+
+
+
+LOC(2.17):
! remainder is negative
addcc %o3,%o5,%o3
! depth 3, accumulated bits 1
- bl L.3.17
+ bl LOC(3.17)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits 3
- bl L.4.19
+ bl LOC(4.19)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (3*2+1), %o2
-
-L.4.19:
+
+LOC(4.19):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (3*2-1), %o2
-
-
-L.3.17:
+
+
+LOC(3.17):
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits 1
- bl L.4.17
+ bl LOC(4.17)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (1*2+1), %o2
-
-L.4.17:
+
+LOC(4.17):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (1*2-1), %o2
-
-
-
-
-L.1.16:
+
+
+
+
+LOC(1.16):
! remainder is negative
addcc %o3,%o5,%o3
! depth 2, accumulated bits -1
- bl L.2.15
+ bl LOC(2.15)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 3, accumulated bits -1
- bl L.3.15
+ bl LOC(3.15)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits -1
- bl L.4.15
+ bl LOC(4.15)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-1*2+1), %o2
-
-L.4.15:
+
+LOC(4.15):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-1*2-1), %o2
-
-
-L.3.15:
+
+
+LOC(3.15):
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits -3
- bl L.4.13
+ bl LOC(4.13)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-3*2+1), %o2
-
-L.4.13:
+
+LOC(4.13):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-3*2-1), %o2
-
-
-
-L.2.15:
+
+
+
+LOC(2.15):
! remainder is negative
addcc %o3,%o5,%o3
! depth 3, accumulated bits -3
- bl L.3.13
+ bl LOC(3.13)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits -5
- bl L.4.11
+ bl LOC(4.11)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-5*2+1), %o2
-
-L.4.11:
+
+LOC(4.11):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-5*2-1), %o2
-
-
-L.3.13:
+
+
+LOC(3.13):
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits -7
- bl L.4.9
+ bl LOC(4.9)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-7*2+1), %o2
-
-L.4.9:
+
+LOC(4.9):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-7*2-1), %o2
-
-
-
-
+
+
+
+
9:
-Lend_regular_divide:
+LOC(end_regular_divide):
subcc %o4, 1, %o4
- bge Ldivloop
+ bge LOC(divloop)
tst %o3
- bl,a Lgot_result
+ bl,a LOC(got_result)
! non-restoring fixup here (one instruction only!)
add %o3, %o1, %o3
-Lgot_result:
+LOC(got_result):
! check to see if answer should be < 0
- tst %g6
+ tst %g3
bl,a 1f
sub %g0, %o3, %o3
1:
retl
mov %o3, %o0
+
+END(.rem)
! sparc __mpn_rshift --
-
-! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
-
+!
+! Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
+!
! This file is part of the GNU MP Library.
-
+!
! The GNU MP Library is free software; you can redistribute it and/or modify
! it under the terms of the GNU Library General Public License as published by
! the Free Software Foundation; either version 2 of the License, or (at your
! option) any later version.
-
+!
! The GNU MP Library is distributed in the hope that it will be useful, but
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! License for more details.
-
+!
! You should have received a copy of the GNU Library General Public License
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! INPUT PARAMETERS
-! res_ptr %o0
-! src_ptr %o1
-! size %o2
-! cnt %o3
+! RES_PTR %o0
+! SRC_PTR %o1
+! SIZE %o2
+! CNT %o3
-#include "sysdep.h"
+#include <sysdep.h>
- .text
- .align 4
- .global C_SYMBOL_NAME(__mpn_rshift)
-C_SYMBOL_NAME(__mpn_rshift):
+ENTRY(__mpn_rshift)
ld [%o1],%g2 ! load first limb
sub %g0,%o3,%o5 ! negate shift count
add %o2,-1,%o2
andcc %o2,4-1,%g4 ! number of limbs in first loop
sll %g2,%o5,%g1 ! compute function result
- be L0 ! if multiple of 4 limbs, skip first loop
+ be LOC(0) ! if multiple of 4 limbs, skip first loop
st %g1,[%sp+80]
sub %o2,%g4,%o2 ! adjust count for main loop
-Loop0: ld [%o1+4],%g3
+LOC(loop0):
+ ld [%o1+4],%g3
add %o0,4,%o0
add %o1,4,%o1
addcc %g4,-1,%g4
sll %g3,%o5,%g1
mov %g3,%g2
or %o4,%g1,%o4
- bne Loop0
+ bne LOC(loop0)
st %o4,[%o0-4]
-L0: tst %o2
- be Lend
+LOC(0): tst %o2
+ be LOC(end)
nop
-Loop: ld [%o1+4],%g3
+LOC(loop):
+ ld [%o1+4],%g3
add %o0,16,%o0
addcc %o2,-4,%o2
srl %g2,%o3,%o4
add %o1,16,%o1
or %g4,%g1,%g4
- bne Loop
+ bne LOC(loop)
st %g4,[%o0-4]
-Lend: srl %g2,%o3,%g2
+LOC(end):
+ srl %g2,%o3,%g2
st %g2,[%o0-0]
retl
ld [%sp+80],%o0
+
+END(__mpn_rshift)
-#include "sysdep.h"
-#ifdef __linux__
-#include <asm/traps.h>
-#else
-#ifdef __svr4__
+#include <sysdep.h>
#include <sys/trap.h>
-#else
-#include <machine/trap.h>
-#endif
-#endif
ENTRY(.div)
! compute sign of result; if neither is negative, no problem
orcc %o1, %o0, %g0 ! either negative?
bge 2f ! no, go do the divide
- xor %o1, %o0, %g6 ! compute sign in any case
+ xor %o1, %o0, %g3 ! compute sign in any case
tst %o1
bge 1f
tst %o0
1:
cmp %o3, %o5 ! if %o1 exceeds %o0, done
- blu Lgot_result ! (and algorithm fails otherwise)
+ blu LOC(got_result) ! (and algorithm fails otherwise)
clr %o2
sethi %hi(1 << (32 - 4 - 1)), %g1
cmp %o3, %g1
- blu Lnot_really_big
+ blu LOC(not_really_big)
clr %o4
! Here the dividend is >= 2**(31-N) or so. We must be careful here,
1:
cmp %o5, %g1
bgeu 3f
- mov 1, %g7
+ mov 1, %g2
sll %o5, 4, %o5
b 1b
add %o4, 1, %o4
- ! Now compute %g7.
+ ! Now compute %g2.
2: addcc %o5, %o5, %o5
- bcc Lnot_too_big
- add %g7, 1, %g7
+ bcc LOC(not_too_big)
+ add %g2, 1, %g2
! We get here if the %o1 overflowed while shifting.
! This means that %o3 has the high-order bit set.
sll %g1, 4, %g1 ! high order bit
srl %o5, 1, %o5 ! rest of %o5
add %o5, %g1, %o5
- b Ldo_single_div
- sub %g7, 1, %g7
+ b LOC(do_single_div)
+ sub %g2, 1, %g2
- Lnot_too_big:
+ LOC(not_too_big):
3: cmp %o5, %o3
blu 2b
nop
- be Ldo_single_div
+ be LOC(do_single_div)
nop
/* NB: these are commented out in the V8-Sparc manual as well */
/* (I do not understand this) */
! %o5 > %o3: went too far: back up 1 step
! srl %o5, 1, %o5
- ! dec %g7
+ ! dec %g2
! do single-bit divide steps
!
! We have to be careful here. We know that %o3 >= %o5, so we can do the
! order bit set in the first step, just falling into the regular
! division loop will mess up the first time around.
! So we unroll slightly...
- Ldo_single_div:
- subcc %g7, 1, %g7
- bl Lend_regular_divide
+ LOC(do_single_div):
+ subcc %g2, 1, %g2
+ bl LOC(end_regular_divide)
nop
sub %o3, %o5, %o3
mov 1, %o2
- b Lend_single_divloop
+ b LOC(end_single_divloop)
nop
- Lsingle_divloop:
+ LOC(single_divloop):
sll %o2, 1, %o2
bl 1f
srl %o5, 1, %o5
add %o3, %o5, %o3
sub %o2, 1, %o2
2:
- Lend_single_divloop:
- subcc %g7, 1, %g7
- bge Lsingle_divloop
+ LOC(end_single_divloop):
+ subcc %g2, 1, %g2
+ bge LOC(single_divloop)
tst %o3
- b,a Lend_regular_divide
+ b,a LOC(end_regular_divide)
-Lnot_really_big:
+LOC(not_really_big):
1:
sll %o5, 4, %o5
cmp %o5, %o3
bleu 1b
addcc %o4, 1, %o4
- be Lgot_result
+ be LOC(got_result)
sub %o4, 1, %o4
tst %o3 ! set up for initial iteration
-Ldivloop:
+LOC(divloop):
sll %o2, 4, %o2
! depth 1, accumulated bits 0
- bl L.1.16
+ bl LOC(1.16)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 2, accumulated bits 1
- bl L.2.17
+ bl LOC(2.17)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 3, accumulated bits 3
- bl L.3.19
+ bl LOC(3.19)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits 7
- bl L.4.23
+ bl LOC(4.23)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (7*2+1), %o2
-
-L.4.23:
+
+LOC(4.23):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (7*2-1), %o2
-
-
-L.3.19:
+
+
+LOC(3.19):
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits 5
- bl L.4.21
+ bl LOC(4.21)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (5*2+1), %o2
-
-L.4.21:
+
+LOC(4.21):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (5*2-1), %o2
-
-
-
-L.2.17:
+
+
+
+LOC(2.17):
! remainder is negative
addcc %o3,%o5,%o3
! depth 3, accumulated bits 1
- bl L.3.17
+ bl LOC(3.17)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits 3
- bl L.4.19
+ bl LOC(4.19)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (3*2+1), %o2
-
-L.4.19:
+
+LOC(4.19):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (3*2-1), %o2
-
-
-L.3.17:
+
+
+LOC(3.17):
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits 1
- bl L.4.17
+ bl LOC(4.17)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (1*2+1), %o2
-
-L.4.17:
+
+LOC(4.17):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (1*2-1), %o2
-
-
-
-
-L.1.16:
+
+
+
+
+LOC(1.16):
! remainder is negative
addcc %o3,%o5,%o3
! depth 2, accumulated bits -1
- bl L.2.15
+ bl LOC(2.15)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 3, accumulated bits -1
- bl L.3.15
+ bl LOC(3.15)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits -1
- bl L.4.15
+ bl LOC(4.15)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-1*2+1), %o2
-
-L.4.15:
+
+LOC(4.15):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-1*2-1), %o2
-
-
-L.3.15:
+
+
+LOC(3.15):
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits -3
- bl L.4.13
+ bl LOC(4.13)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-3*2+1), %o2
-
-L.4.13:
+
+LOC(4.13):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-3*2-1), %o2
-
-
-
-L.2.15:
+
+
+
+LOC(2.15):
! remainder is negative
addcc %o3,%o5,%o3
! depth 3, accumulated bits -3
- bl L.3.13
+ bl LOC(3.13)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits -5
- bl L.4.11
+ bl LOC(4.11)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-5*2+1), %o2
-
-L.4.11:
+
+LOC(4.11):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-5*2-1), %o2
-
-
-L.3.13:
+
+
+LOC(3.13):
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits -7
- bl L.4.9
+ bl LOC(4.9)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-7*2+1), %o2
-
-L.4.9:
+
+LOC(4.9):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-7*2-1), %o2
-
-
-
-
+
+
+
+
9:
-Lend_regular_divide:
+LOC(end_regular_divide):
subcc %o4, 1, %o4
- bge Ldivloop
+ bge LOC(divloop)
tst %o3
- bl,a Lgot_result
+ bl,a LOC(got_result)
! non-restoring fixup here (one instruction only!)
sub %o2, 1, %o2
-Lgot_result:
+LOC(got_result):
! check to see if answer should be < 0
- tst %g6
+ tst %g3
bl,a 1f
sub %g0, %o2, %o2
1:
retl
mov %o2, %o0
+
+END(.div)
! size o2
! s2_limb o3
-#include "sysdep.h"
+#include <sysdep.h>
-.text
- .align 4
- .global C_SYMBOL_NAME(__mpn_addmul_1)
-C_SYMBOL_NAME(__mpn_addmul_1):
- orcc %g0,%g0,%g2
+ENTRY(__mpn_addmul_1)
ld [%o1+0],%o4 ! 1
-
sll %o2,4,%g1
- and %g1,(4-1)<<4,%g1
-#if PIC
+ orcc %g0,%g0,%g2
mov %o7,%g4 ! Save return address register
- call 1f
- add %o7,LL-1f,%g3
-1: mov %g4,%o7 ! Restore return address register
-#else
- sethi %hi(LL),%g3
- or %g3,%lo(LL),%g3
-#endif
- jmp %g3+%g1
- nop
-LL:
-LL00: add %o0,-4,%o0
- b Loop00 /* 4, 8, 12, ... */
- add %o1,-4,%o1
- nop
-LL01: b Loop01 /* 1, 5, 9, ... */
+ and %g1,(4-1)<<4,%g1
+1: call 2f
+ add %o7,3f-1b,%g3
+2: jmp %g3+%g1
+ mov %g4,%o7 ! Restore return address register
+
+ .align 4
+3:
+LOC(00):
+ add %o0,-4,%o0
+ b LOC(loop00) /* 4, 8, 12, ... */
+ add %o1,-4,%o1
nop
+LOC(01):
+ b LOC(loop01) /* 1, 5, 9, ... */
+ nop
nop
nop
-LL10: add %o0,-12,%o0 /* 2, 6, 10, ... */
- b Loop10
- add %o1,4,%o1
+LOC(10):
+ add %o0,-12,%o0 /* 2, 6, 10, ... */
+ b LOC(loop10)
+ add %o1,4,%o1
nop
-LL11: add %o0,-8,%o0 /* 3, 7, 11, ... */
- b Loop11
- add %o1,-8,%o1
+LOC(11):
+ add %o0,-8,%o0 /* 3, 7, 11, ... */
+ b LOC(loop11)
+ add %o1,-8,%o1
nop
-1: addcc %g3,%g2,%g3 ! 1
+LOC(loop):
+ addcc %g3,%g2,%g3 ! 1
ld [%o1+4],%o4 ! 2
rd %y,%g2 ! 1
addx %g0,%g2,%g2
ld [%o0+0],%g1 ! 2
addcc %g1,%g3,%g3
st %g3,[%o0+0] ! 1
-Loop00: umul %o4,%o3,%g3 ! 2
+LOC(loop00):
+ umul %o4,%o3,%g3 ! 2
ld [%o0+4],%g1 ! 2
addxcc %g3,%g2,%g3 ! 2
ld [%o1+8],%o4 ! 3
nop
addcc %g1,%g3,%g3
st %g3,[%o0+4] ! 2
-Loop11: umul %o4,%o3,%g3 ! 3
+LOC(loop11):
+ umul %o4,%o3,%g3 ! 3
addxcc %g3,%g2,%g3 ! 3
ld [%o1+12],%o4 ! 4
rd %y,%g2 ! 3
ld [%o0+8],%g1 ! 2
addcc %g1,%g3,%g3
st %g3,[%o0+8] ! 3
-Loop10: umul %o4,%o3,%g3 ! 4
+LOC(loop10):
+ umul %o4,%o3,%g3 ! 4
addxcc %g3,%g2,%g3 ! 4
ld [%o1+0],%o4 ! 1
rd %y,%g2 ! 4
st %g3,[%o0+12] ! 4
add %o0,16,%o0
addx %g0,%g2,%g2
-Loop01: addcc %o2,-4,%o2
- bg 1b
- umul %o4,%o3,%g3 ! 1
+LOC(loop01):
+ addcc %o2,-4,%o2
+ bg LOC(loop)
+ umul %o4,%o3,%g3 ! 1
addcc %g3,%g2,%g3 ! 4
rd %y,%g2 ! 4
ld [%o0+0],%g1 ! 2
addcc %g1,%g3,%g3
st %g3,[%o0+0] ! 4
- addx %g0,%g2,%o0
-
retl
- nop
-
-
-! umul, ld, addxcc, rd, st
-
-! umul, ld, addxcc, rd, ld, addcc, st, addx
+ addx %g0,%g2,%o0
+END(__mpn_addmul_1)
! size o2
! s2_limb o3
-#include "sysdep.h"
+#include <sysdep.h>
-.text
- .align 8
- .global C_SYMBOL_NAME(__mpn_mul_1)
-C_SYMBOL_NAME(__mpn_mul_1):
+ENTRY(__mpn_mul_1)
sll %o2,4,%g1
- and %g1,(4-1)<<4,%g1
-#if PIC
mov %o7,%g4 ! Save return address register
- call 1f
- add %o7,LL-1f,%g3
-1: mov %g4,%o7 ! Restore return address register
-#else
- sethi %hi(LL),%g3
- or %g3,%lo(LL),%g3
-#endif
+ and %g1,(4-1)<<4,%g1
+1: call 2f
+ add %o7,3f-1b,%g3
+2: mov %g4,%o7 ! Restore return address register
jmp %g3+%g1
- ld [%o1+0],%o4 ! 1
-LL:
-LL00: add %o0,-4,%o0
+ ld [%o1+0],%o4 ! 1
+
+ .align 4
+3:
+LOC(00):
+ add %o0,-4,%o0
add %o1,-4,%o1
- b Loop00 /* 4, 8, 12, ... */
- orcc %g0,%g0,%g2
-LL01: b Loop01 /* 1, 5, 9, ... */
- orcc %g0,%g0,%g2
+ b LOC(loop00) /* 4, 8, 12, ... */
+ orcc %g0,%g0,%g2
+LOC(01):
+ b LOC(loop01) /* 1, 5, 9, ... */
+ orcc %g0,%g0,%g2
nop
nop
-LL10: add %o0,-12,%o0 /* 2, 6, 10, ... */
+LOC(10):
+ add %o0,-12,%o0 /* 2, 6, 10, ... */
add %o1,4,%o1
- b Loop10
- orcc %g0,%g0,%g2
+ b LOC(loop10)
+ orcc %g0,%g0,%g2
nop
-LL11: add %o0,-8,%o0 /* 3, 7, 11, ... */
+LOC(11):
+ add %o0,-8,%o0 /* 3, 7, 11, ... */
add %o1,-8,%o1
- b Loop11
- orcc %g0,%g0,%g2
+ b LOC(loop11)
+ orcc %g0,%g0,%g2
-Loop: addcc %g3,%g2,%g3 ! 1
+LOC(loop):
+ addcc %g3,%g2,%g3 ! 1
ld [%o1+4],%o4 ! 2
st %g3,[%o0+0] ! 1
rd %y,%g2 ! 1
-Loop00: umul %o4,%o3,%g3 ! 2
+LOC(loop00):
+ umul %o4,%o3,%g3 ! 2
addxcc %g3,%g2,%g3 ! 2
ld [%o1+8],%o4 ! 3
st %g3,[%o0+4] ! 2
rd %y,%g2 ! 2
-Loop11: umul %o4,%o3,%g3 ! 3
+LOC(loop11):
+ umul %o4,%o3,%g3 ! 3
addxcc %g3,%g2,%g3 ! 3
ld [%o1+12],%o4 ! 4
add %o1,16,%o1
st %g3,[%o0+8] ! 3
rd %y,%g2 ! 3
-Loop10: umul %o4,%o3,%g3 ! 4
+LOC(loop10):
+ umul %o4,%o3,%g3 ! 4
addxcc %g3,%g2,%g3 ! 4
ld [%o1+0],%o4 ! 1
st %g3,[%o0+12] ! 4
add %o0,16,%o0
rd %y,%g2 ! 4
addx %g0,%g2,%g2
-Loop01: addcc %o2,-4,%o2
- bg Loop
- umul %o4,%o3,%g3 ! 1
+LOC(loop01):
+ addcc %o2,-4,%o2
+ bg LOC(loop)
+ umul %o4,%o3,%g3 ! 1
addcc %g3,%g2,%g3 ! 4
st %g3,[%o0+0] ! 4
rd %y,%g2 ! 4
-
retl
- addx %g0,%g2,%o0
+ addx %g0,%g2,%o0
+
+END(__mpn_mul_1)
! size o2
! s2_limb o3
-#include "sysdep.h"
+#include <sysdep.h>
-.text
- .align 4
- .global C_SYMBOL_NAME(__mpn_submul_1)
-C_SYMBOL_NAME(__mpn_submul_1):
+ENTRY(__mpn_submul_1)
sub %g0,%o2,%o2 ! negate ...
sll %o2,2,%o2 ! ... and scale size
sub %o1,%o2,%o1 ! o1 is offset s1_ptr
mov 0,%o0 ! clear cy_limb
-Loop: ld [%o1+%o2],%o4
+LOC(loop):
+ ld [%o1+%o2],%o4
ld [%g1+%o2],%g2
umul %o4,%o3,%o5
rd %y,%g3
st %g2,[%g1+%o2]
addcc %o2,4,%o2
- bne Loop
+ bne LOC(loop)
nop
retl
nop
+
+END(__mpn_submul_1)
#include "sysdep.h"
- .text
- .align 4
- .global C_SYMBOL_NAME(__udiv_qrnnd)
-C_SYMBOL_NAME(__udiv_qrnnd):
+ENTRY(__udiv_qrnnd)
tst %o3
- bneg Largedivisor
+ bneg LOC(largedivisor)
mov 8,%g1
- b Lp1
+ b LOC(p1)
addxcc %o2,%o2,%o2
-Lplop: bcc Ln1
+LOC(plop):
+ bcc LOC(n1)
addxcc %o2,%o2,%o2
-Lp1: addx %o1,%o1,%o1
+LOC(p1):
+ addx %o1,%o1,%o1
subcc %o1,%o3,%o4
- bcc Ln2
+ bcc LOC(n2)
addxcc %o2,%o2,%o2
-Lp2: addx %o1,%o1,%o1
+LOC(p2):
+ addx %o1,%o1,%o1
subcc %o1,%o3,%o4
- bcc Ln3
+ bcc LOC(n3)
addxcc %o2,%o2,%o2
-Lp3: addx %o1,%o1,%o1
+LOC(p3):
+ addx %o1,%o1,%o1
subcc %o1,%o3,%o4
- bcc Ln4
+ bcc LOC(n4)
addxcc %o2,%o2,%o2
-Lp4: addx %o1,%o1,%o1
+LOC(p4):
+ addx %o1,%o1,%o1
addcc %g1,-1,%g1
- bne Lplop
+ bne LOC(plop)
subcc %o1,%o3,%o4
- bcc Ln5
+ bcc LOC(n5)
addxcc %o2,%o2,%o2
-Lp5: st %o1,[%o0]
+LOC(p5):
+ st %o1,[%o0]
retl
xnor %g0,%o2,%o0
-Lnlop: bcc Lp1
+LOC(nlop):
+ bcc LOC(p1)
addxcc %o2,%o2,%o2
-Ln1: addx %o4,%o4,%o4
+LOC(n1):
+ addx %o4,%o4,%o4
subcc %o4,%o3,%o1
- bcc Lp2
+ bcc LOC(p2)
addxcc %o2,%o2,%o2
-Ln2: addx %o4,%o4,%o4
+LOC(n2):
+ addx %o4,%o4,%o4
subcc %o4,%o3,%o1
- bcc Lp3
+ bcc LOC(p3)
addxcc %o2,%o2,%o2
-Ln3: addx %o4,%o4,%o4
+LOC(n3):
+ addx %o4,%o4,%o4
subcc %o4,%o3,%o1
- bcc Lp4
+ bcc LOC(p4)
addxcc %o2,%o2,%o2
-Ln4: addx %o4,%o4,%o4
+LOC(n4):
+ addx %o4,%o4,%o4
addcc %g1,-1,%g1
- bne Lnlop
+ bne LOC(nlop)
subcc %o4,%o3,%o1
- bcc Lp5
+ bcc LOC(p5)
addxcc %o2,%o2,%o2
-Ln5: st %o4,[%o0]
+LOC(n5):
+ st %o4,[%o0]
retl
xnor %g0,%o2,%o0
-Largedivisor:
+LOC(largedivisor):
and %o2,1,%o5 ! %o5 = n0 & 1
srl %o2,1,%o2
srl %o3,1,%g3 ! %g3 = floor(d / 2)
add %g3,%g2,%g3 ! %g3 = ceil(d / 2)
- b LLp1
+ b LOC(Lp1)
addxcc %o2,%o2,%o2
-LLplop: bcc LLn1
+LOC(Lplop):
+ bcc LOC(Ln1)
addxcc %o2,%o2,%o2
-LLp1: addx %o1,%o1,%o1
+LOC(Lp1):
+ addx %o1,%o1,%o1
subcc %o1,%g3,%o4
- bcc LLn2
+ bcc LOC(Ln2)
addxcc %o2,%o2,%o2
-LLp2: addx %o1,%o1,%o1
+LOC(Lp2):
+ addx %o1,%o1,%o1
subcc %o1,%g3,%o4
- bcc LLn3
+ bcc LOC(Ln3)
addxcc %o2,%o2,%o2
-LLp3: addx %o1,%o1,%o1
+LOC(Lp3):
+ addx %o1,%o1,%o1
subcc %o1,%g3,%o4
- bcc LLn4
+ bcc LOC(Ln4)
addxcc %o2,%o2,%o2
-LLp4: addx %o1,%o1,%o1
+LOC(Lp4):
+ addx %o1,%o1,%o1
addcc %g1,-1,%g1
- bne LLplop
+ bne LOC(Lplop)
subcc %o1,%g3,%o4
- bcc LLn5
+ bcc LOC(Ln5)
addxcc %o2,%o2,%o2
-LLp5: add %o1,%o1,%o1 ! << 1
+LOC(Lp5):
+ add %o1,%o1,%o1 ! << 1
tst %g2
- bne Oddp
+ bne LOC(Oddp)
add %o5,%o1,%o1
st %o1,[%o0]
retl
xnor %g0,%o2,%o0
-LLnlop: bcc LLp1
+LOC(Lnlop):
+ bcc LOC(Lp1)
addxcc %o2,%o2,%o2
-LLn1: addx %o4,%o4,%o4
+LOC(Ln1):
+ addx %o4,%o4,%o4
subcc %o4,%g3,%o1
- bcc LLp2
+ bcc LOC(Lp2)
addxcc %o2,%o2,%o2
-LLn2: addx %o4,%o4,%o4
+LOC(Ln2):
+ addx %o4,%o4,%o4
subcc %o4,%g3,%o1
- bcc LLp3
+ bcc LOC(Lp3)
addxcc %o2,%o2,%o2
-LLn3: addx %o4,%o4,%o4
+LOC(Ln3):
+ addx %o4,%o4,%o4
subcc %o4,%g3,%o1
- bcc LLp4
+ bcc LOC(Lp4)
addxcc %o2,%o2,%o2
-LLn4: addx %o4,%o4,%o4
+LOC(Ln4):
+ addx %o4,%o4,%o4
addcc %g1,-1,%g1
- bne LLnlop
+ bne LOC(Lnlop)
subcc %o4,%g3,%o1
- bcc LLp5
+ bcc LOC(Lp5)
addxcc %o2,%o2,%o2
-LLn5: add %o4,%o4,%o4 ! << 1
+LOC(Ln5):
+ add %o4,%o4,%o4 ! << 1
tst %g2
- bne Oddn
+ bne LOC(Oddn)
add %o5,%o4,%o4
st %o4,[%o0]
retl
xnor %g0,%o2,%o0
-Oddp: xnor %g0,%o2,%o2
+LOC(Oddp):
+ xnor %g0,%o2,%o2
! q' in %o2. r' in %o1
addcc %o1,%o2,%o1
- bcc LLp6
+ bcc LOC(Lp6)
addx %o2,0,%o2
sub %o1,%o3,%o1
-LLp6: subcc %o1,%o3,%g0
- bcs LLp7
+LOC(Lp6):
+ subcc %o1,%o3,%g0
+ bcs LOC(Lp7)
subx %o2,-1,%o2
sub %o1,%o3,%o1
-LLp7: st %o1,[%o0]
+LOC(Lp7):
+ st %o1,[%o0]
retl
mov %o2,%o0
-Oddn: xnor %g0,%o2,%o2
+LOC(Oddn):
+ xnor %g0,%o2,%o2
! q' in %o2. r' in %o4
addcc %o4,%o2,%o4
- bcc LLn6
+ bcc LOC(Ln6)
addx %o2,0,%o2
sub %o4,%o3,%o4
-LLn6: subcc %o4,%o3,%g0
- bcs LLn7
+LOC(Ln6):
+ subcc %o4,%o3,%g0
+ bcs LOC(Ln7)
subx %o2,-1,%o2
sub %o4,%o3,%o4
-LLn7: st %o4,[%o0]
+LOC(Ln7):
+ st %o4,[%o0]
retl
mov %o2,%o0
+
+END(__udiv_qrnnd)
! SPARC __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
! store difference in a third limb vector.
-
+!
! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
-
+!
! This file is part of the GNU MP Library.
-
+!
! The GNU MP Library is free software; you can redistribute it and/or modify
! it under the terms of the GNU Library General Public License as published by
! the Free Software Foundation; either version 2 of the License, or (at your
! option) any later version.
-
+!
! The GNU MP Library is distributed in the hope that it will be useful, but
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! License for more details.
-
+!
! You should have received a copy of the GNU Library General Public License
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! INPUT PARAMETERS
-#define res_ptr %o0
-#define s1_ptr %o1
-#define s2_ptr %o2
-#define size %o3
+#define RES_PTR %o0
+#define S1_PTR %o1
+#define S2_PTR %o2
+#define SIZE %o3
-#include "sysdep.h"
+#include <sysdep.h>
- .text
- .align 4
- .global C_SYMBOL_NAME(__mpn_sub_n)
-C_SYMBOL_NAME(__mpn_sub_n):
- xor s2_ptr,res_ptr,%g1
+ENTRY(__mpn_sub_n)
+ xor S2_PTR,RES_PTR,%g1
andcc %g1,4,%g0
- bne L1 ! branch if alignment differs
+ bne LOC(1) ! branch if alignment differs
nop
! ** V1a **
- andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0
- be L_v1 ! if no, branch
+ andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0
+ be LOC(v1) ! if no, branch
nop
-/* Add least significant limb separately to align res_ptr and s2_ptr */
- ld [s1_ptr],%g4
- add s1_ptr,4,s1_ptr
- ld [s2_ptr],%g2
- add s2_ptr,4,s2_ptr
- add size,-1,size
+/* Add least significant limb separately to align RES_PTR and S2_PTR */
+ ld [S1_PTR],%g4
+ add S1_PTR,4,S1_PTR
+ ld [S2_PTR],%g2
+ add S2_PTR,4,S2_PTR
+ add SIZE,-1,SIZE
subcc %g4,%g2,%o4
- st %o4,[res_ptr]
- add res_ptr,4,res_ptr
-L_v1: addx %g0,%g0,%o4 ! save cy in register
- cmp size,2 ! if size < 2 ...
- bl Lend2 ! ... branch to tail code
+ st %o4,[RES_PTR]
+ add RES_PTR,4,RES_PTR
+LOC(v1):
+ addx %g0,%g0,%o4 ! save cy in register
+ cmp SIZE,2 ! if SIZE < 2 ...
+ bl LOC(end2) ! ... branch to tail code
subcc %g0,%o4,%g0 ! restore cy
- ld [s1_ptr+0],%g4
- addcc size,-10,size
- ld [s1_ptr+4],%g1
- ldd [s2_ptr+0],%g2
- blt Lfin1
+ ld [S1_PTR+0],%g4
+ addcc SIZE,-10,SIZE
+ ld [S1_PTR+4],%g1
+ ldd [S2_PTR+0],%g2
+ blt LOC(fin1)
subcc %g0,%o4,%g0 ! restore cy
/* Add blocks of 8 limbs until less than 8 limbs remain */
-Loop1: subxcc %g4,%g2,%o4
- ld [s1_ptr+8],%g4
+LOC(loop1):
+ subxcc %g4,%g2,%o4
+ ld [S1_PTR+8],%g4
subxcc %g1,%g3,%o5
- ld [s1_ptr+12],%g1
- ldd [s2_ptr+8],%g2
- std %o4,[res_ptr+0]
+ ld [S1_PTR+12],%g1
+ ldd [S2_PTR+8],%g2
+ std %o4,[RES_PTR+0]
subxcc %g4,%g2,%o4
- ld [s1_ptr+16],%g4
+ ld [S1_PTR+16],%g4
subxcc %g1,%g3,%o5
- ld [s1_ptr+20],%g1
- ldd [s2_ptr+16],%g2
- std %o4,[res_ptr+8]
+ ld [S1_PTR+20],%g1
+ ldd [S2_PTR+16],%g2
+ std %o4,[RES_PTR+8]
subxcc %g4,%g2,%o4
- ld [s1_ptr+24],%g4
+ ld [S1_PTR+24],%g4
subxcc %g1,%g3,%o5
- ld [s1_ptr+28],%g1
- ldd [s2_ptr+24],%g2
- std %o4,[res_ptr+16]
+ ld [S1_PTR+28],%g1
+ ldd [S2_PTR+24],%g2
+ std %o4,[RES_PTR+16]
subxcc %g4,%g2,%o4
- ld [s1_ptr+32],%g4
+ ld [S1_PTR+32],%g4
subxcc %g1,%g3,%o5
- ld [s1_ptr+36],%g1
- ldd [s2_ptr+32],%g2
- std %o4,[res_ptr+24]
+ ld [S1_PTR+36],%g1
+ ldd [S2_PTR+32],%g2
+ std %o4,[RES_PTR+24]
addx %g0,%g0,%o4 ! save cy in register
- addcc size,-8,size
- add s1_ptr,32,s1_ptr
- add s2_ptr,32,s2_ptr
- add res_ptr,32,res_ptr
- bge Loop1
+ addcc SIZE,-8,SIZE
+ add S1_PTR,32,S1_PTR
+ add S2_PTR,32,S2_PTR
+ add RES_PTR,32,RES_PTR
+ bge LOC(loop1)
subcc %g0,%o4,%g0 ! restore cy
-Lfin1: addcc size,8-2,size
- blt Lend1
+LOC(fin1):
+ addcc SIZE,8-2,SIZE
+ blt LOC(end1)
subcc %g0,%o4,%g0 ! restore cy
/* Add blocks of 2 limbs until less than 2 limbs remain */
-Loope1: subxcc %g4,%g2,%o4
- ld [s1_ptr+8],%g4
+LOC(loope1):
+ subxcc %g4,%g2,%o4
+ ld [S1_PTR+8],%g4
subxcc %g1,%g3,%o5
- ld [s1_ptr+12],%g1
- ldd [s2_ptr+8],%g2
- std %o4,[res_ptr+0]
+ ld [S1_PTR+12],%g1
+ ldd [S2_PTR+8],%g2
+ std %o4,[RES_PTR+0]
addx %g0,%g0,%o4 ! save cy in register
- addcc size,-2,size
- add s1_ptr,8,s1_ptr
- add s2_ptr,8,s2_ptr
- add res_ptr,8,res_ptr
- bge Loope1
+ addcc SIZE,-2,SIZE
+ add S1_PTR,8,S1_PTR
+ add S2_PTR,8,S2_PTR
+ add RES_PTR,8,RES_PTR
+ bge LOC(loope1)
subcc %g0,%o4,%g0 ! restore cy
-Lend1: subxcc %g4,%g2,%o4
+LOC(end1):
+ subxcc %g4,%g2,%o4
subxcc %g1,%g3,%o5
- std %o4,[res_ptr+0]
+ std %o4,[RES_PTR+0]
addx %g0,%g0,%o4 ! save cy in register
- andcc size,1,%g0
- be Lret1
+ andcc SIZE,1,%g0
+ be LOC(ret1)
subcc %g0,%o4,%g0 ! restore cy
/* Add last limb */
- ld [s1_ptr+8],%g4
- ld [s2_ptr+8],%g2
+ ld [S1_PTR+8],%g4
+ ld [S2_PTR+8],%g2
subxcc %g4,%g2,%o4
- st %o4,[res_ptr+8]
+ st %o4,[RES_PTR+8]
-Lret1: retl
+LOC(ret1):
+ retl
addx %g0,%g0,%o0 ! return carry-out from most sign. limb
-L1: xor s1_ptr,res_ptr,%g1
+LOC(1): xor S1_PTR,RES_PTR,%g1
andcc %g1,4,%g0
- bne L2
+ bne LOC(2)
nop
! ** V1b **
- andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0
- be L_v1b ! if no, branch
+ andcc RES_PTR,4,%g0 ! RES_PTR unaligned? Side effect: cy=0
+ be LOC(v1b) ! if no, branch
nop
-/* Add least significant limb separately to align res_ptr and s1_ptr */
- ld [s2_ptr],%g4
- add s2_ptr,4,s2_ptr
- ld [s1_ptr],%g2
- add s1_ptr,4,s1_ptr
- add size,-1,size
+/* Add least significant limb separately to align RES_PTR and S1_PTR */
+ ld [S2_PTR],%g4
+ add S2_PTR,4,S2_PTR
+ ld [S1_PTR],%g2
+ add S1_PTR,4,S1_PTR
+ add SIZE,-1,SIZE
subcc %g2,%g4,%o4
- st %o4,[res_ptr]
- add res_ptr,4,res_ptr
-L_v1b: addx %g0,%g0,%o4 ! save cy in register
- cmp size,2 ! if size < 2 ...
- bl Lend2 ! ... branch to tail code
+ st %o4,[RES_PTR]
+ add RES_PTR,4,RES_PTR
+LOC(v1b):
+ addx %g0,%g0,%o4 ! save cy in register
+ cmp SIZE,2 ! if SIZE < 2 ...
+ bl LOC(end2) ! ... branch to tail code
subcc %g0,%o4,%g0 ! restore cy
- ld [s2_ptr+0],%g4
- addcc size,-10,size
- ld [s2_ptr+4],%g1
- ldd [s1_ptr+0],%g2
- blt Lfin1b
+ ld [S2_PTR+0],%g4
+ addcc SIZE,-10,SIZE
+ ld [S2_PTR+4],%g1
+ ldd [S1_PTR+0],%g2
+ blt LOC(fin1b)
subcc %g0,%o4,%g0 ! restore cy
/* Add blocks of 8 limbs until less than 8 limbs remain */
-Loop1b: subxcc %g2,%g4,%o4
- ld [s2_ptr+8],%g4
+LOC(loop1b):
+ subxcc %g2,%g4,%o4
+ ld [S2_PTR+8],%g4
subxcc %g3,%g1,%o5
- ld [s2_ptr+12],%g1
- ldd [s1_ptr+8],%g2
- std %o4,[res_ptr+0]
+ ld [S2_PTR+12],%g1
+ ldd [S1_PTR+8],%g2
+ std %o4,[RES_PTR+0]
subxcc %g2,%g4,%o4
- ld [s2_ptr+16],%g4
+ ld [S2_PTR+16],%g4
subxcc %g3,%g1,%o5
- ld [s2_ptr+20],%g1
- ldd [s1_ptr+16],%g2
- std %o4,[res_ptr+8]
+ ld [S2_PTR+20],%g1
+ ldd [S1_PTR+16],%g2
+ std %o4,[RES_PTR+8]
subxcc %g2,%g4,%o4
- ld [s2_ptr+24],%g4
+ ld [S2_PTR+24],%g4
subxcc %g3,%g1,%o5
- ld [s2_ptr+28],%g1
- ldd [s1_ptr+24],%g2
- std %o4,[res_ptr+16]
+ ld [S2_PTR+28],%g1
+ ldd [S1_PTR+24],%g2
+ std %o4,[RES_PTR+16]
subxcc %g2,%g4,%o4
- ld [s2_ptr+32],%g4
+ ld [S2_PTR+32],%g4
subxcc %g3,%g1,%o5
- ld [s2_ptr+36],%g1
- ldd [s1_ptr+32],%g2
- std %o4,[res_ptr+24]
+ ld [S2_PTR+36],%g1
+ ldd [S1_PTR+32],%g2
+ std %o4,[RES_PTR+24]
addx %g0,%g0,%o4 ! save cy in register
- addcc size,-8,size
- add s1_ptr,32,s1_ptr
- add s2_ptr,32,s2_ptr
- add res_ptr,32,res_ptr
- bge Loop1b
+ addcc SIZE,-8,SIZE
+ add S1_PTR,32,S1_PTR
+ add S2_PTR,32,S2_PTR
+ add RES_PTR,32,RES_PTR
+ bge LOC(loop1b)
subcc %g0,%o4,%g0 ! restore cy
-Lfin1b: addcc size,8-2,size
- blt Lend1b
+LOC(fin1b):
+ addcc SIZE,8-2,SIZE
+ blt LOC(end1b)
subcc %g0,%o4,%g0 ! restore cy
/* Add blocks of 2 limbs until less than 2 limbs remain */
-Loope1b:subxcc %g2,%g4,%o4
- ld [s2_ptr+8],%g4
+LOC(loope1b):
+ subxcc %g2,%g4,%o4
+ ld [S2_PTR+8],%g4
subxcc %g3,%g1,%o5
- ld [s2_ptr+12],%g1
- ldd [s1_ptr+8],%g2
- std %o4,[res_ptr+0]
+ ld [S2_PTR+12],%g1
+ ldd [S1_PTR+8],%g2
+ std %o4,[RES_PTR+0]
addx %g0,%g0,%o4 ! save cy in register
- addcc size,-2,size
- add s1_ptr,8,s1_ptr
- add s2_ptr,8,s2_ptr
- add res_ptr,8,res_ptr
- bge Loope1b
+ addcc SIZE,-2,SIZE
+ add S1_PTR,8,S1_PTR
+ add S2_PTR,8,S2_PTR
+ add RES_PTR,8,RES_PTR
+ bge LOC(loope1b)
subcc %g0,%o4,%g0 ! restore cy
-Lend1b: subxcc %g2,%g4,%o4
+LOC(end1b):
+ subxcc %g2,%g4,%o4
subxcc %g3,%g1,%o5
- std %o4,[res_ptr+0]
+ std %o4,[RES_PTR+0]
addx %g0,%g0,%o4 ! save cy in register
- andcc size,1,%g0
- be Lret1b
+ andcc SIZE,1,%g0
+ be LOC(ret1b)
subcc %g0,%o4,%g0 ! restore cy
/* Add last limb */
- ld [s2_ptr+8],%g4
- ld [s1_ptr+8],%g2
+ ld [S2_PTR+8],%g4
+ ld [S1_PTR+8],%g2
subxcc %g2,%g4,%o4
- st %o4,[res_ptr+8]
+ st %o4,[RES_PTR+8]
-Lret1b: retl
+LOC(ret1b):
+ retl
addx %g0,%g0,%o0 ! return carry-out from most sign. limb
! ** V2 **
-/* If we come here, the alignment of s1_ptr and res_ptr as well as the
- alignment of s2_ptr and res_ptr differ. Since there are only two ways
+/* If we come here, the alignment of S1_PTR and RES_PTR as well as the
+ alignment of S2_PTR and RES_PTR differ. Since there are only two ways
things can be aligned (that we care about) we now know that the alignment
- of s1_ptr and s2_ptr are the same. */
+ of S1_PTR and S2_PTR are the same. */
-L2: cmp size,1
- be Ljone
+LOC(2): cmp SIZE,1
+ be LOC(jone)
nop
- andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0
- be L_v2 ! if no, branch
+ andcc S1_PTR,4,%g0 ! S1_PTR unaligned? Side effect: cy=0
+ be LOC(v2) ! if no, branch
nop
-/* Add least significant limb separately to align s1_ptr and s2_ptr */
- ld [s1_ptr],%g4
- add s1_ptr,4,s1_ptr
- ld [s2_ptr],%g2
- add s2_ptr,4,s2_ptr
- add size,-1,size
+/* Add least significant limb separately to align S1_PTR and S2_PTR */
+ ld [S1_PTR],%g4
+ add S1_PTR,4,S1_PTR
+ ld [S2_PTR],%g2
+ add S2_PTR,4,S2_PTR
+ add SIZE,-1,SIZE
subcc %g4,%g2,%o4
- st %o4,[res_ptr]
- add res_ptr,4,res_ptr
+ st %o4,[RES_PTR]
+ add RES_PTR,4,RES_PTR
-L_v2: addx %g0,%g0,%o4 ! save cy in register
- addcc size,-8,size
- blt Lfin2
+LOC(v2):
+ addx %g0,%g0,%o4 ! save cy in register
+ addcc SIZE,-8,SIZE
+ blt LOC(fin2)
subcc %g0,%o4,%g0 ! restore cy
/* Add blocks of 8 limbs until less than 8 limbs remain */
-Loop2: ldd [s1_ptr+0],%g2
- ldd [s2_ptr+0],%o4
+LOC(loop2):
+ ldd [S1_PTR+0],%g2
+ ldd [S2_PTR+0],%o4
subxcc %g2,%o4,%g2
- st %g2,[res_ptr+0]
+ st %g2,[RES_PTR+0]
subxcc %g3,%o5,%g3
- st %g3,[res_ptr+4]
- ldd [s1_ptr+8],%g2
- ldd [s2_ptr+8],%o4
+ st %g3,[RES_PTR+4]
+ ldd [S1_PTR+8],%g2
+ ldd [S2_PTR+8],%o4
subxcc %g2,%o4,%g2
- st %g2,[res_ptr+8]
+ st %g2,[RES_PTR+8]
subxcc %g3,%o5,%g3
- st %g3,[res_ptr+12]
- ldd [s1_ptr+16],%g2
- ldd [s2_ptr+16],%o4
+ st %g3,[RES_PTR+12]
+ ldd [S1_PTR+16],%g2
+ ldd [S2_PTR+16],%o4
subxcc %g2,%o4,%g2
- st %g2,[res_ptr+16]
+ st %g2,[RES_PTR+16]
subxcc %g3,%o5,%g3
- st %g3,[res_ptr+20]
- ldd [s1_ptr+24],%g2
- ldd [s2_ptr+24],%o4
+ st %g3,[RES_PTR+20]
+ ldd [S1_PTR+24],%g2
+ ldd [S2_PTR+24],%o4
subxcc %g2,%o4,%g2
- st %g2,[res_ptr+24]
+ st %g2,[RES_PTR+24]
subxcc %g3,%o5,%g3
- st %g3,[res_ptr+28]
+ st %g3,[RES_PTR+28]
addx %g0,%g0,%o4 ! save cy in register
- addcc size,-8,size
- add s1_ptr,32,s1_ptr
- add s2_ptr,32,s2_ptr
- add res_ptr,32,res_ptr
- bge Loop2
+ addcc SIZE,-8,SIZE
+ add S1_PTR,32,S1_PTR
+ add S2_PTR,32,S2_PTR
+ add RES_PTR,32,RES_PTR
+ bge LOC(loop2)
subcc %g0,%o4,%g0 ! restore cy
-Lfin2: addcc size,8-2,size
- blt Lend2
+LOC(fin2):
+ addcc SIZE,8-2,SIZE
+ blt LOC(end2)
subcc %g0,%o4,%g0 ! restore cy
-Loope2: ldd [s1_ptr+0],%g2
- ldd [s2_ptr+0],%o4
+LOC(loope2):
+ ldd [S1_PTR+0],%g2
+ ldd [S2_PTR+0],%o4
subxcc %g2,%o4,%g2
- st %g2,[res_ptr+0]
+ st %g2,[RES_PTR+0]
subxcc %g3,%o5,%g3
- st %g3,[res_ptr+4]
+ st %g3,[RES_PTR+4]
addx %g0,%g0,%o4 ! save cy in register
- addcc size,-2,size
- add s1_ptr,8,s1_ptr
- add s2_ptr,8,s2_ptr
- add res_ptr,8,res_ptr
- bge Loope2
+ addcc SIZE,-2,SIZE
+ add S1_PTR,8,S1_PTR
+ add S2_PTR,8,S2_PTR
+ add RES_PTR,8,RES_PTR
+ bge LOC(loope2)
subcc %g0,%o4,%g0 ! restore cy
-Lend2: andcc size,1,%g0
- be Lret2
+LOC(end2):
+ andcc SIZE,1,%g0
+ be LOC(ret2)
subcc %g0,%o4,%g0 ! restore cy
/* Add last limb */
-Ljone: ld [s1_ptr],%g4
- ld [s2_ptr],%g2
+LOC(jone):
+ ld [S1_PTR],%g4
+ ld [S2_PTR],%g2
subxcc %g4,%g2,%o4
- st %o4,[res_ptr]
+ st %o4,[RES_PTR]
-Lret2: retl
+LOC(ret2):
+ retl
addx %g0,%g0,%o0 ! return carry-out from most sign. limb
+
+END(__mpn_sub_n)
! SPARC __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
! the result from a second limb vector.
-
-! Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
-
+!
+! Copyright (C) 1992, 1993, 1994, 1997 Free Software Foundation, Inc.
+!
! This file is part of the GNU MP Library.
-
+!
! The GNU MP Library is free software; you can redistribute it and/or modify
! it under the terms of the GNU Library General Public License as published by
! the Free Software Foundation; either version 2 of the License, or (at your
! option) any later version.
-
+!
! The GNU MP Library is distributed in the hope that it will be useful, but
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! License for more details.
-
+!
! You should have received a copy of the GNU Library General Public License
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
! INPUT PARAMETERS
-! res_ptr o0
-! s1_ptr o1
-! size o2
-! s2_limb o3
+! RES_PTR o0
+! S1_PTR o1
+! SIZE o2
+! S2_LIMB o3
-#include "sysdep.h"
+#include <sysdep.h>
-.text
- .align 4
- .global C_SYMBOL_NAME(__mpn_submul_1)
-C_SYMBOL_NAME(__mpn_submul_1):
+ENTRY(__mpn_submul_1)
! Make S1_PTR and RES_PTR point at the end of their blocks
! and put (- 4 x SIZE) in index/loop counter.
sll %o2,2,%o2
sub %g0,%o2,%o2
cmp %o3,0xfff
- bgu Large
+ bgu LOC(large)
nop
ld [%o1+%o2],%o5
mov 0,%o0
- b L0
+ b LOC(0)
add %o4,-4,%o4
-Loop0:
+LOC(loop0):
subcc %o5,%g1,%g1
ld [%o1+%o2],%o5
addx %o0,%g0,%o0
st %g1,[%o4+%o2]
-L0: wr %g0,%o3,%y
+LOC(0): wr %g0,%o3,%y
sra %o5,31,%g2
and %o3,%g2,%g2
andcc %g1,0,%g1
addcc %g1,%o0,%g1
addx %g2,%g4,%o0 ! add sign-compensation and cy to hi limb
addcc %o2,4,%o2 ! loop counter
- bne Loop0
+ bne LOC(loop0)
ld [%o4+%o2],%o5
subcc %o5,%g1,%g1
st %g1,[%o4+%o2]
-Large: ld [%o1+%o2],%o5
+LOC(large):
+ ld [%o1+%o2],%o5
mov 0,%o0
sra %o3,31,%g4 ! g4 = mask of ones iff S2_LIMB < 0
- b L1
+ b LOC(1)
add %o4,-4,%o4
-Loop:
+LOC(loop):
subcc %o5,%g3,%g3
ld [%o1+%o2],%o5
addx %o0,%g0,%o0
st %g3,[%o4+%o2]
-L1: wr %g0,%o5,%y
+LOC(1): wr %g0,%o5,%y
and %o5,%g4,%g2
andcc %g0,%g0,%g1
mulscc %g1,%o3,%g1
addcc %g3,%o0,%g3
addx %g2,%g1,%o0
addcc %o2,4,%o2
- bne Loop
+ bne LOC(loop)
ld [%o4+%o2],%o5
subcc %o5,%g3,%g3
addx %o0,%g0,%o0
retl
st %g3,[%o4+%o2]
+
+END(__mpn_submul_1)
-#include "sysdep.h"
-#ifdef __linux__
-#include <asm/traps.h>
-#else
-#ifdef __svr4__
+#include <sysdep.h>
#include <sys/trap.h>
-#else
-#include <machine/trap.h>
-#endif
-#endif
ENTRY(.udiv)
1:
cmp %o3, %o5 ! if %o1 exceeds %o0, done
- blu Lgot_result ! (and algorithm fails otherwise)
+ blu LOC(got_result) ! (and algorithm fails otherwise)
clr %o2
sethi %hi(1 << (32 - 4 - 1)), %g1
cmp %o3, %g1
- blu Lnot_really_big
+ blu LOC(not_really_big)
clr %o4
! Here the dividend is >= 2**(31-N) or so. We must be careful here,
1:
cmp %o5, %g1
bgeu 3f
- mov 1, %g7
+ mov 1, %g2
sll %o5, 4, %o5
b 1b
add %o4, 1, %o4
- ! Now compute %g7.
+ ! Now compute %g2.
2: addcc %o5, %o5, %o5
- bcc Lnot_too_big
- add %g7, 1, %g7
+ bcc LOC(not_too_big)
+ add %g2, 1, %g2
! We get here if the %o1 overflowed while shifting.
! This means that %o3 has the high-order bit set.
sll %g1, 4, %g1 ! high order bit
srl %o5, 1, %o5 ! rest of %o5
add %o5, %g1, %o5
- b Ldo_single_div
- sub %g7, 1, %g7
+ b LOC(do_single_div)
+ sub %g2, 1, %g2
- Lnot_too_big:
+ LOC(not_too_big):
3: cmp %o5, %o3
blu 2b
nop
- be Ldo_single_div
+ be LOC(do_single_div)
nop
/* NB: these are commented out in the V8-Sparc manual as well */
/* (I do not understand this) */
! %o5 > %o3: went too far: back up 1 step
! srl %o5, 1, %o5
- ! dec %g7
+ ! dec %g2
! do single-bit divide steps
!
! We have to be careful here. We know that %o3 >= %o5, so we can do the
! order bit set in the first step, just falling into the regular
! division loop will mess up the first time around.
! So we unroll slightly...
- Ldo_single_div:
- subcc %g7, 1, %g7
- bl Lend_regular_divide
+ LOC(do_single_div):
+ subcc %g2, 1, %g2
+ bl LOC(end_regular_divide)
nop
sub %o3, %o5, %o3
mov 1, %o2
- b Lend_single_divloop
+ b LOC(end_single_divloop)
nop
- Lsingle_divloop:
+ LOC(single_divloop):
sll %o2, 1, %o2
bl 1f
srl %o5, 1, %o5
add %o3, %o5, %o3
sub %o2, 1, %o2
2:
- Lend_single_divloop:
- subcc %g7, 1, %g7
- bge Lsingle_divloop
+ LOC(end_single_divloop):
+ subcc %g2, 1, %g2
+ bge LOC(single_divloop)
tst %o3
- b,a Lend_regular_divide
+ b,a LOC(end_regular_divide)
-Lnot_really_big:
+LOC(not_really_big):
1:
sll %o5, 4, %o5
cmp %o5, %o3
bleu 1b
addcc %o4, 1, %o4
- be Lgot_result
+ be LOC(got_result)
sub %o4, 1, %o4
tst %o3 ! set up for initial iteration
-Ldivloop:
+LOC(divloop):
sll %o2, 4, %o2
! depth 1, accumulated bits 0
- bl L.1.16
+ bl LOC(1.16)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 2, accumulated bits 1
- bl L.2.17
+ bl LOC(2.17)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 3, accumulated bits 3
- bl L.3.19
+ bl LOC(3.19)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits 7
- bl L.4.23
+ bl LOC(4.23)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (7*2+1), %o2
-
-L.4.23:
+
+LOC(4.23):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (7*2-1), %o2
-
-
-L.3.19:
+
+
+LOC(3.19):
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits 5
- bl L.4.21
+ bl LOC(4.21)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (5*2+1), %o2
-
-L.4.21:
+
+LOC(4.21):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (5*2-1), %o2
-
-
-
-L.2.17:
+
+
+
+LOC(2.17):
! remainder is negative
addcc %o3,%o5,%o3
! depth 3, accumulated bits 1
- bl L.3.17
+ bl LOC(3.17)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits 3
- bl L.4.19
+ bl LOC(4.19)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (3*2+1), %o2
-
-L.4.19:
+
+LOC(4.19):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (3*2-1), %o2
-
-
-L.3.17:
+
+
+LOC(3.17):
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits 1
- bl L.4.17
+ bl LOC(4.17)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (1*2+1), %o2
-
-L.4.17:
+
+LOC(4.17):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (1*2-1), %o2
-
-
-
-
-L.1.16:
+
+
+
+
+LOC(1.16):
! remainder is negative
addcc %o3,%o5,%o3
! depth 2, accumulated bits -1
- bl L.2.15
+ bl LOC(2.15)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 3, accumulated bits -1
- bl L.3.15
+ bl LOC(3.15)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits -1
- bl L.4.15
+ bl LOC(4.15)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-1*2+1), %o2
-
-L.4.15:
+
+LOC(4.15):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-1*2-1), %o2
-
-
-L.3.15:
+
+
+LOC(3.15):
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits -3
- bl L.4.13
+ bl LOC(4.13)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-3*2+1), %o2
-
-L.4.13:
+
+LOC(4.13):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-3*2-1), %o2
-
-
-
-L.2.15:
+
+
+
+LOC(2.15):
! remainder is negative
addcc %o3,%o5,%o3
! depth 3, accumulated bits -3
- bl L.3.13
+ bl LOC(3.13)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits -5
- bl L.4.11
+ bl LOC(4.11)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-5*2+1), %o2
-
-L.4.11:
+
+LOC(4.11):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-5*2-1), %o2
-
-
-L.3.13:
+
+
+LOC(3.13):
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits -7
- bl L.4.9
+ bl LOC(4.9)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-7*2+1), %o2
-
-L.4.9:
+
+LOC(4.9):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-7*2-1), %o2
-
-
-
-
+
+
+
+
9:
-Lend_regular_divide:
+LOC(end_regular_divide):
subcc %o4, 1, %o4
- bge Ldivloop
+ bge LOC(divloop)
tst %o3
- bl,a Lgot_result
+ bl,a LOC(got_result)
! non-restoring fixup here (one instruction only!)
sub %o2, 1, %o2
-Lgot_result:
+LOC(got_result):
retl
mov %o2, %o0
+
+END(.udiv)
! SPARC __udiv_qrnnd division support, used from longlong.h.
-
+!
! Copyright (C) 1993, 1994, 1997 Free Software Foundation, Inc.
-
+!
! This file is part of the GNU MP Library.
-
+!
! The GNU MP Library is free software; you can redistribute it and/or modify
! it under the terms of the GNU Library General Public License as published by
! the Free Software Foundation; either version 2 of the License, or (at your
! option) any later version.
-
+!
! The GNU MP Library is distributed in the hope that it will be useful, but
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
! License for more details.
-
+!
! You should have received a copy of the GNU Library General Public License
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
+!
! Added PIC support - May/96, Miguel de Icaza
-
+!
! INPUT PARAMETERS
! rem_ptr i0
! n1 i1
! n0 i2
! d i3
-#include "sysdep.h"
+#include <sysdep.h>
#undef ret /* Kludge for glibc */
+#ifdef PIC
.text
+#else
+ .section .rodata,#alloc
+#endif
.align 8
.type two_to_32,@object
+ .size two_to_32,8
two_to_32:
.double 0r4294967296
- .size two_to_32,8
.type two_to_31,@object
+ .size two_to_31,8
two_to_31:
.double 0r2147483648
- .size two_to_31,8
- .align 4
- .global __udiv_qrnnd
- .type __udiv_qrnnd,@function
+ .text
ENTRY(__udiv_qrnnd)
!#PROLOGUE# 0
save %sp,-104,%sp
st %i1,[%fp-8]
ld [%fp-8],%f10
#ifdef PIC
-.Lbase: call 1f
+LOC(base):
+ call 1f
fitod %f10,%f4
-1: ldd [%o7-(.Lbase-two_to_32)],%f8
+1: ldd [%o7-(LOC(base)-two_to_32)],%f8
#else
sethi %hi(two_to_32),%o7
fitod %f10,%f4
ldd [%o7+%lo(two_to_32)],%f8
#endif
cmp %i1,0
- bge L248
+ bge LOC(248)
mov %i0,%i5
faddd %f4,%f8,%f4
-.L248:
+LOC(248):
st %i2,[%fp-8]
ld [%fp-8],%f10
fmuld %f4,%f8,%f6
cmp %i2,0
- bge L249
+ bge LOC(249)
fitod %f10,%f2
faddd %f2,%f8,%f2
-.L249:
+LOC(249):
st %i3,[%fp-8]
faddd %f6,%f2,%f2
ld [%fp-8],%f10
cmp %i3,0
- bge L250
+ bge LOC(250)
fitod %f10,%f4
faddd %f4,%f8,%f4
-.L250:
+LOC(250):
fdivd %f2,%f4,%f2
#ifdef PIC
- ldd [%o7-(.Lbase-two_to_31)],%f4
+ ldd [%o7-(LOC(base)-two_to_31)],%f4
#else
sethi %hi(two_to_31),%o7
ldd [%o7+%lo(two_to_31)],%f4
#endif
fcmped %f2,%f4
nop
- fbge,a L251
+ fbge,a LOC(251)
fsubd %f2,%f4,%f2
fdtoi %f2,%f2
st %f2,[%fp-8]
- b L252
+ b LOC(252)
ld [%fp-8],%i4
-.L251:
+LOC(251):
fdtoi %f2,%f2
st %f2,[%fp-8]
ld [%fp-8],%i4
sethi %hi(-2147483648),%g2
xor %i4,%g2,%i4
-.L252:
+LOC(252):
wr %g0,%i4,%y
sra %i3,31,%g2
and %i4,%g2,%g2
rd %y,%g3
subcc %i2,%g3,%o7
subxcc %i1,%i0,%g0
- be L253
+ be LOC(253)
cmp %o7,%i3
add %i4,-1,%i0
st %o7,[%i5]
ret
restore
-.L253:
- blu L246
+LOC(253):
+ blu LOC(246)
mov %i4,%i0
add %i4,1,%i0
sub %o7,%i3,%o7
-.L246:
+LOC(246):
st %o7,[%i5]
ret
restore
- .size __udiv_qrnnd, .-__udiv_qrnnd
+END(__udiv_qrnnd)
* bnz overflow (or tnz)
*/
-#include "DEFS.h"
-FUNC(.umul)
+#include <sysdep.h>
+
+ENTRY(.umul)
or %o0, %o1, %o4
- mov %o0, %y ! multiplier -> Y
- andncc %o4, 0xfff, %g0 ! test bits 12..31 of *both* args
- be Lmul_shortway ! if zero, can do it the short way
- andcc %g0, %g0, %o4 ! zero the partial product and clear N and V
+ mov %o0, %y ! multiplier -> Y
+ andncc %o4, 0xfff, %g0 ! test bits 12..31 of *both* args
+ be LOC(mul_shortway) ! if zero, can do it the short way
+ andcc %g0, %g0, %o4 ! zero the partial product; clear N & V
/*
* Long multiply. 32 steps, followed by a final shift step.
mulscc %o4, %o1, %o4 ! 32
mulscc %o4, %g0, %o4 ! final shift
-
/*
* Normally, with the shift-and-add approach, if both numbers are
* positive you get the correct result. With 32-bit two's-complement
#if 0
tst %o1
bl,a 1f ! if %o1 < 0 (high order bit = 1),
- add %o4, %o0, %o4 ! %o4 += %o0 (add y to upper half)
+ add %o4, %o0, %o4 ! %o4 += %o0 (add y to upper half)
1: rd %y, %o0 ! get lower half of product
retl
- addcc %o4, %g0, %o1 ! put upper half in place and set Z for %o1==0
+ addcc %o4, %g0, %o1 ! put upper half in place and set Z for %o1==0
#else
/* Faster code from tege@sics.se. */
sra %o1, 31, %o2 ! make mask from sign bit
and %o0, %o2, %o2 ! %o2 = 0 or %o0, depending on sign of %o1
rd %y, %o0 ! get lower half of product
retl
- addcc %o4, %o2, %o1 ! add compensation and put upper half in place
+ addcc %o4, %o2, %o1 ! add compensation and put upper half in place
#endif
-Lmul_shortway:
+LOC(mul_shortway):
/*
* Short multiply. 12 steps, followed by a final shift step.
* The resulting bits are off by 12 and (32-12) = 20 bit positions,
srl %o5, 20, %o5 ! shift low bits right 20
or %o5, %o0, %o0
retl
- addcc %g0, %g0, %o1 ! %o1 = zero, and set Z
+ addcc %g0, %g0, %o1 ! %o1 = zero, and set Z
+
+END(.umul)
-#include "sysdep.h"
-#ifdef __linux__
-#include <asm/traps.h>
-#else
-#ifdef __svr4__
+#include <sysdep.h>
#include <sys/trap.h>
-#else
-#include <machine/trap.h>
-#endif
-#endif
ENTRY(.urem)
1:
cmp %o3, %o5 ! if %o1 exceeds %o0, done
- blu Lgot_result ! (and algorithm fails otherwise)
+ blu LOC(got_result) ! (and algorithm fails otherwise)
clr %o2
sethi %hi(1 << (32 - 4 - 1)), %g1
cmp %o3, %g1
- blu Lnot_really_big
+ blu LOC(not_really_big)
clr %o4
! Here the dividend is >= 2**(31-N) or so. We must be careful here,
1:
cmp %o5, %g1
bgeu 3f
- mov 1, %g7
+ mov 1, %g2
sll %o5, 4, %o5
b 1b
add %o4, 1, %o4
- ! Now compute %g7.
+ ! Now compute %g2.
2: addcc %o5, %o5, %o5
- bcc Lnot_too_big
- add %g7, 1, %g7
+ bcc LOC(not_too_big)
+ add %g2, 1, %g2
! We get here if the %o1 overflowed while shifting.
! This means that %o3 has the high-order bit set.
sll %g1, 4, %g1 ! high order bit
srl %o5, 1, %o5 ! rest of %o5
add %o5, %g1, %o5
- b Ldo_single_div
- sub %g7, 1, %g7
+ b LOC(do_single_div)
+ sub %g2, 1, %g2
- Lnot_too_big:
+ LOC(not_too_big):
3: cmp %o5, %o3
blu 2b
nop
- be Ldo_single_div
+ be LOC(do_single_div)
nop
/* NB: these are commented out in the V8-Sparc manual as well */
/* (I do not understand this) */
! %o5 > %o3: went too far: back up 1 step
! srl %o5, 1, %o5
- ! dec %g7
+ ! dec %g2
! do single-bit divide steps
!
! We have to be careful here. We know that %o3 >= %o5, so we can do the
! order bit set in the first step, just falling into the regular
! division loop will mess up the first time around.
! So we unroll slightly...
- Ldo_single_div:
- subcc %g7, 1, %g7
- bl Lend_regular_divide
+ LOC(do_single_div):
+ subcc %g2, 1, %g2
+ bl LOC(end_regular_divide)
nop
sub %o3, %o5, %o3
mov 1, %o2
- b Lend_single_divloop
+ b LOC(end_single_divloop)
nop
- Lsingle_divloop:
+ LOC(single_divloop):
sll %o2, 1, %o2
bl 1f
srl %o5, 1, %o5
add %o3, %o5, %o3
sub %o2, 1, %o2
2:
- Lend_single_divloop:
- subcc %g7, 1, %g7
- bge Lsingle_divloop
+ LOC(end_single_divloop):
+ subcc %g2, 1, %g2
+ bge LOC(single_divloop)
tst %o3
- b,a Lend_regular_divide
+ b,a LOC(end_regular_divide)
-Lnot_really_big:
+LOC(not_really_big):
1:
sll %o5, 4, %o5
cmp %o5, %o3
bleu 1b
addcc %o4, 1, %o4
- be Lgot_result
+ be LOC(got_result)
sub %o4, 1, %o4
tst %o3 ! set up for initial iteration
-Ldivloop:
+LOC(divloop):
sll %o2, 4, %o2
! depth 1, accumulated bits 0
- bl L.1.16
+ bl LOC(1.16)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 2, accumulated bits 1
- bl L.2.17
+ bl LOC(2.17)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 3, accumulated bits 3
- bl L.3.19
+ bl LOC(3.19)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits 7
- bl L.4.23
+ bl LOC(4.23)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (7*2+1), %o2
-
-L.4.23:
+
+LOC(4.23):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (7*2-1), %o2
-
-
-L.3.19:
+
+
+LOC(3.19):
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits 5
- bl L.4.21
+ bl LOC(4.21)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (5*2+1), %o2
-
-L.4.21:
+
+LOC(4.21):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (5*2-1), %o2
-
-
-
-L.2.17:
+
+
+
+LOC(2.17):
! remainder is negative
addcc %o3,%o5,%o3
! depth 3, accumulated bits 1
- bl L.3.17
+ bl LOC(3.17)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits 3
- bl L.4.19
+ bl LOC(4.19)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (3*2+1), %o2
-
-L.4.19:
+
+LOC(4.19):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (3*2-1), %o2
-
-
-L.3.17:
+
+
+LOC(3.17):
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits 1
- bl L.4.17
+ bl LOC(4.17)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (1*2+1), %o2
-
-L.4.17:
+
+LOC(4.17):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (1*2-1), %o2
-
-
-
-
-L.1.16:
+
+
+
+
+LOC(1.16):
! remainder is negative
addcc %o3,%o5,%o3
! depth 2, accumulated bits -1
- bl L.2.15
+ bl LOC(2.15)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 3, accumulated bits -1
- bl L.3.15
+ bl LOC(3.15)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits -1
- bl L.4.15
+ bl LOC(4.15)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-1*2+1), %o2
-
-L.4.15:
+
+LOC(4.15):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-1*2-1), %o2
-
-
-L.3.15:
+
+
+LOC(3.15):
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits -3
- bl L.4.13
+ bl LOC(4.13)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-3*2+1), %o2
-
-L.4.13:
+
+LOC(4.13):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-3*2-1), %o2
-
-
-
-L.2.15:
+
+
+
+LOC(2.15):
! remainder is negative
addcc %o3,%o5,%o3
! depth 3, accumulated bits -3
- bl L.3.13
+ bl LOC(3.13)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
! depth 4, accumulated bits -5
- bl L.4.11
+ bl LOC(4.11)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-5*2+1), %o2
-
-L.4.11:
+
+LOC(4.11):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-5*2-1), %o2
-
-
-L.3.13:
+
+
+LOC(3.13):
! remainder is negative
addcc %o3,%o5,%o3
! depth 4, accumulated bits -7
- bl L.4.9
+ bl LOC(4.9)
srl %o5,1,%o5
! remainder is positive
subcc %o3,%o5,%o3
b 9f
add %o2, (-7*2+1), %o2
-
-L.4.9:
+
+LOC(4.9):
! remainder is negative
addcc %o3,%o5,%o3
b 9f
add %o2, (-7*2-1), %o2
-
-
-
-
+
+
+
+
9:
-Lend_regular_divide:
+LOC(end_regular_divide):
subcc %o4, 1, %o4
- bge Ldivloop
+ bge LOC(divloop)
tst %o3
- bl,a Lgot_result
+ bl,a LOC(got_result)
! non-restoring fixup here (one instruction only!)
add %o3, %o1, %o3
-Lgot_result:
+LOC(got_result):
retl
mov %o3, %o0
+
+END(.urem)