__mpn_add_n:
.frame $30,0,$26,0
- ldq $3,0($17)
- ldq $4,0($18)
-
- subq $19,1,$19
- and $19,4-1,$2 # number of limbs in first loop
- bis $31,$31,$0
- beq $2,.L0 # if multiple of 4 limbs, skip first loop
-
- subq $19,$2,$19
-
-.Loop0: subq $2,1,$2
+ or $31,$31,$25 # clear cy
+ subq $19,4,$19 # decr loop cnt
+ blt $19,.Lend2 # if less than 4 limbs, goto 2nd loop
+ # Start software pipeline for 1st loop
+ ldq $0,0($18)
+ ldq $1,8($18)
+ ldq $4,0($17)
ldq $5,8($17)
- addq $4,$0,$4
- ldq $6,8($18)
- cmpult $4,$0,$1
- addq $3,$4,$4
- cmpult $4,$3,$0
- stq $4,0($16)
- or $0,$1,$0
-
- addq $17,8,$17
- addq $18,8,$18
- bis $5,$5,$3
- bis $6,$6,$4
- addq $16,8,$16
- bne $2,.Loop0
-
-.L0: beq $19,.Lend
-
+ addq $17,32,$17 # update s1_ptr
+ ldq $2,16($18)
+ addq $0,$4,$20 # 1st main add
+ ldq $3,24($18)
+ subq $19,4,$19 # decr loop cnt
+ ldq $6,-16($17)
+ cmpult $20,$0,$25 # compute cy from last add
+ ldq $7,-8($17)
+ addq $1,$25,$28 # cy add
+ addq $18,32,$18 # update s2_ptr
+ addq $5,$28,$21 # 2nd main add
+ cmpult $28,$25,$8 # compute cy from last add
+ blt $19,.Lend1 # if less than 4 limbs remain, jump
+ # 1st loop handles groups of 4 limbs in a software pipeline
.align 4
-.Loop: subq $19,4,$19
- unop
-
- ldq $6,8($18)
- addq $4,$0,$0
+.Loop: cmpult $21,$28,$25 # compute cy from last add
+ ldq $0,0($18)
+ or $8,$25,$25 # combine cy from the two adds
+ ldq $1,8($18)
+ addq $2,$25,$28 # cy add
+ ldq $4,0($17)
+ addq $28,$6,$22 # 3rd main add
ldq $5,8($17)
- cmpult $0,$4,$1
- ldq $4,16($18)
- addq $3,$0,$20
- cmpult $20,$3,$0
- ldq $3,16($17)
- or $0,$1,$0
- addq $6,$0,$0
- cmpult $0,$6,$1
- ldq $6,24($18)
- addq $5,$0,$21
- cmpult $21,$5,$0
- ldq $5,24($17)
- or $0,$1,$0
- addq $4,$0,$0
- cmpult $0,$4,$1
- ldq $4,32($18)
- addq $3,$0,$22
- cmpult $22,$3,$0
- ldq $3,32($17)
- or $0,$1,$0
- addq $6,$0,$0
- cmpult $0,$6,$1
- addq $5,$0,$23
- cmpult $23,$5,$0
- or $0,$1,$0
-
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $22,$28,$25 # compute cy from last add
stq $20,0($16)
+ or $8,$25,$25 # combine cy from the two adds
stq $21,8($16)
- stq $22,16($16)
- stq $23,24($16)
-
- addq $17,32,$17
- addq $18,32,$18
- addq $16,32,$16
- bne $19,.Loop
+ addq $3,$25,$28 # cy add
+ addq $28,$7,$23 # 4th main add
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $23,$28,$25 # compute cy from last add
+ addq $17,32,$17 # update s1_ptr
+ or $8,$25,$25 # combine cy from the two adds
+ addq $16,32,$16 # update res_ptr
+ addq $0,$25,$28 # cy add
+ ldq $2,16($18)
+ addq $4,$28,$20 # 1st main add
+ ldq $3,24($18)
+ cmpult $28,$25,$8 # compute cy from last add
+ ldq $6,-16($17)
+ cmpult $20,$28,$25 # compute cy from last add
+ ldq $7,-8($17)
+ or $8,$25,$25 # combine cy from the two adds
+ subq $19,4,$19 # decr loop cnt
+ stq $22,-16($16)
+ addq $1,$25,$28 # cy add
+ stq $23,-8($16)
+ addq $5,$28,$21 # 2nd main add
+ addq $18,32,$18 # update s2_ptr
+ cmpult $28,$25,$8 # compute cy from last add
+ bge $19,.Loop
+ # Finish software pipeline for 1st loop
+.Lend1: cmpult $21,$28,$25 # compute cy from last add
+ or $8,$25,$25 # combine cy from the two adds
+ addq $2,$25,$28 # cy add
+ addq $28,$6,$22 # 3rd main add
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $22,$28,$25 # compute cy from last add
+ stq $20,0($16)
+ or $8,$25,$25 # combine cy from the two adds
+ stq $21,8($16)
+ addq $3,$25,$28 # cy add
+ addq $28,$7,$23 # 4th main add
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $23,$28,$25 # compute cy from last add
+ or $8,$25,$25 # combine cy from the two adds
+ addq $16,32,$16 # update res_ptr
+ stq $22,-16($16)
+ stq $23,-8($16)
+.Lend2: addq $19,4,$19 # restore loop cnt
+ beq $19,.Lret
+ # Start software pipeline for 2nd loop
+ ldq $0,0($18)
+ ldq $4,0($17)
+ subq $19,1,$19
+ beq $19,.Lend0
+ # 2nd loop handles remaining 1-3 limbs
+ .align 4
+.Loop0: addq $0,$25,$28 # cy add
+ ldq $0,8($18)
+ addq $4,$28,$20 # main add
+ ldq $4,8($17)
+ addq $18,8,$18
+ cmpult $28,$25,$8 # compute cy from last add
+ addq $17,8,$17
+ stq $20,0($16)
+ cmpult $20,$28,$25 # compute cy from last add
+ subq $19,1,$19 # decr loop cnt
+ or $8,$25,$25 # combine cy from the two adds
+ addq $16,8,$16
+ bne $19,.Loop0
+.Lend0: addq $0,$25,$28 # cy add
+ addq $4,$28,$20 # main add
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $20,$28,$25 # compute cy from last add
+ stq $20,0($16)
+ or $8,$25,$25 # combine cy from the two adds
-.Lend: addq $4,$0,$4
- cmpult $4,$0,$1
- addq $3,$4,$4
- cmpult $4,$3,$0
- stq $4,0($16)
- or $0,$1,$0
+.Lret: or $25,$31,$0 # return cy
ret $31,($26),1
-
.end __mpn_add_n
# size r18
# cnt r19
- # This code runs at 4.25 cycles/limb on the EV5.
+ # This code runs at 3.25 cycles/limb on the EV5.
.set noreorder
.set noat
and $18,4-1,$28 # number of limbs in first loop
srl $4,$20,$0 # compute function result
- beq $28,L0
+ beq $28,.L0
subq $18,$28,$18
.align 3
-Loop0: ldq $3,-16($17)
+.Loop0: ldq $3,-16($17)
subq $16,8,$16
sll $4,$19,$5
subq $17,8,$17
or $3,$3,$4
or $5,$6,$8
stq $8,0($16)
- bne $28,Loop0
+ bne $28,.Loop0
-L0: sll $4,$19,$24
- beq $18,Lend
+.L0: sll $4,$19,$24
+ beq $18,.Lend
# warm up phase 1
ldq $1,-16($17)
subq $18,4,$18
ldq $2,-24($17)
ldq $3,-32($17)
ldq $4,-40($17)
- beq $18,Lcool1
+ beq $18,.Lend1
# warm up phase 2
srl $1,$20,$7
sll $1,$19,$21
sll $4,$19,$24
ldq $4,-72($17)
subq $18,4,$18
- beq $18,Lcool1
+ beq $18,.Lend2
.align 4
# main loop
-Loop: stq $7,-8($16)
+.Loop: stq $7,-8($16)
or $5,$22,$5
stq $8,-16($16)
or $6,$23,$6
subq $16,32,$16
srl $4,$20,$6
- ldq $3,-96($17
+ ldq $3,-96($17)
sll $4,$19,$24
ldq $4,-104($17)
subq $17,32,$17
- bne $18,Loop
- unop
- unop
+ bne $18,.Loop
# cool down phase 2/1
-Lcool1: stq $7,-8($16)
+.Lend2: stq $7,-8($16)
or $5,$22,$5
stq $8,-16($16)
or $6,$23,$6
ret $31,($26),1
# cool down phase 1/1
-Lcool1: srl $1,$20,$7
+.Lend1: srl $1,$20,$7
sll $1,$19,$21
srl $2,$20,$8
sll $2,$19,$22
stq $24,-40($16)
ret $31,($26),1
-Lend stq $24,-8($16)
+.Lend: stq $24,-8($16)
ret $31,($26),1
.end __mpn_lshift
# size r18
# cnt r19
- # This code runs at 4.25 cycles/limb on the EV5.
+ # This code runs at 3.25 cycles/limb on the EV5.
.set noreorder
.set noat
and $18,4-1,$28 # number of limbs in first loop
sll $4,$20,$0 # compute function result
- beq $28,L0
+ beq $28,.L0
subq $18,$28,$18
.align 3
-Loop0: ldq $3,8($17)
+.Loop0: ldq $3,8($17)
addq $16,8,$16
srl $4,$19,$5
addq $17,8,$17
or $3,$3,$4
or $5,$6,$8
stq $8,-8($16)
- bne $28,Loop0
+ bne $28,.Loop0
-L0: srl $4,$19,$24
- beq $18,Lend
+.L0: srl $4,$19,$24
+ beq $18,.Lend
# warm up phase 1
ldq $1,8($17)
subq $18,4,$18
ldq $2,16($17)
ldq $3,24($17)
ldq $4,32($17)
- beq $18,Lcool1
+ beq $18,.Lend1
# warm up phase 2
sll $1,$20,$7
srl $1,$19,$21
srl $4,$19,$24
ldq $4,64($17)
subq $18,4,$18
- beq $18,Lcool2
+ beq $18,.Lend2
.align 4
# main loop
-Loop: stq $7,0($16)
+.Loop: stq $7,0($16)
or $5,$22,$5
stq $8,8($16)
or $6,$23,$6
ldq $4,96($17)
addq $17,32,$17
- bne $18,Loop
- unop
- unop
+ bne $18,.Loop
# cool down phase 2/1
-Lcool2: stq $7,0($16)
+.Lend2: stq $7,0($16)
or $5,$22,$5
stq $8,8($16)
or $6,$23,$6
ret $31,($26),1
# cool down phase 1/1
-Lcool1: sll $1,$20,$7
+.Lend1: sll $1,$20,$7
srl $1,$19,$21
sll $2,$20,$8
srl $2,$19,$22
stq $24,32($16)
ret $31,($26),1
-Lend: stq $24,0($16)
+.Lend: stq $24,0($16)
ret $31,($26),1
.end __mpn_rshift
--- /dev/null
+ # Alpha __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+ # store difference in a third limb vector.
+
+ # Copyright (C) 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Library General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Library General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $16
+ # s1_ptr $17
+ # s2_ptr $18
+ # size $19
+
+ .set noreorder
+ .set noat
+.text
+ .align 3
+ .globl __mpn_sub_n
+ .ent __mpn_sub_n
+__mpn_sub_n:
+ .frame $30,0,$26,0
+
+ or $31,$31,$25 # clear cy
+ subq $19,4,$19 # decr loop cnt
+ blt $19,.Lend2 # if less than 4 limbs, goto 2nd loop
+ # Start software pipeline for 1st loop
+ ldq $0,0($18)
+ ldq $1,8($18)
+ ldq $4,0($17)
+ ldq $5,8($17)
+ addq $17,32,$17 # update s1_ptr
+ ldq $2,16($18)
+ subq $4,$0,$20 # 1st main sub
+ ldq $3,24($18)
+ subq $19,4,$19 # decr loop cnt
+ ldq $6,-16($17)
+ cmpult $4,$20,$25 # compute cy from last sub
+ ldq $7,-8($17)
+ addq $1,$25,$28 # cy add
+ addq $18,32,$18 # update s2_ptr
+ subq $5,$28,$21 # 2nd main sub
+ cmpult $28,$25,$8 # compute cy from last add
+ blt $19,.Lend1 # if less than 4 limbs remain, jump
+ # 1st loop handles groups of 4 limbs in a software pipeline
+ .align 4
+.Loop: cmpult $5,$21,$25 # compute cy from last add
+ ldq $0,0($18)
+ or $8,$25,$25 # combine cy from the two adds
+ ldq $1,8($18)
+ addq $2,$25,$28 # cy add
+ ldq $4,0($17)
+ subq $6,$28,$22 # 3rd main sub
+ ldq $5,8($17)
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $6,$22,$25 # compute cy from last add
+ stq $20,0($16)
+ or $8,$25,$25 # combine cy from the two adds
+ stq $21,8($16)
+ addq $3,$25,$28 # cy add
+ subq $7,$28,$23 # 4th main sub
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $7,$23,$25 # compute cy from last add
+ addq $17,32,$17 # update s1_ptr
+ or $8,$25,$25 # combine cy from the two adds
+ addq $16,32,$16 # update res_ptr
+ addq $0,$25,$28 # cy add
+ ldq $2,16($18)
+ subq $4,$28,$20 # 1st main sub
+ ldq $3,24($18)
+ cmpult $28,$25,$8 # compute cy from last add
+ ldq $6,-16($17)
+ cmpult $4,$20,$25 # compute cy from last add
+ ldq $7,-8($17)
+ or $8,$25,$25 # combine cy from the two adds
+ subq $19,4,$19 # decr loop cnt
+ stq $22,-16($16)
+ addq $1,$25,$28 # cy add
+ stq $23,-8($16)
+ subq $5,$28,$21 # 2nd main sub
+ addq $18,32,$18 # update s2_ptr
+ cmpult $28,$25,$8 # compute cy from last add
+ bge $19,.Loop
+ # Finish software pipeline for 1st loop
+.Lend1: cmpult $5,$21,$25 # compute cy from last add
+ or $8,$25,$25 # combine cy from the two adds
+ addq $2,$25,$28 # cy add
+ subq $6,$28,$22 # 3rd main sub
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $6,$22,$25 # compute cy from last add
+ stq $20,0($16)
+ or $8,$25,$25 # combine cy from the two adds
+ stq $21,8($16)
+ addq $3,$25,$28 # cy add
+ subq $7,$28,$23 # 4th main sub
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $7,$23,$25 # compute cy from last add
+ or $8,$25,$25 # combine cy from the two adds
+ addq $16,32,$16 # update res_ptr
+ stq $22,-16($16)
+ stq $23,-8($16)
+.Lend2: addq $19,4,$19 # restore loop cnt
+ beq $19,.Lret
+ # Start software pipeline for 2nd loop
+ ldq $0,0($18)
+ ldq $4,0($17)
+ subq $19,1,$19
+ beq $19,.Lend0
+ # 2nd loop handles remaining 1-3 limbs
+ .align 4
+.Loop0: addq $0,$25,$28 # cy add
+ ldq $0,8($18)
+ subq $4,$28,$20 # main sub
+ ldq $1,8($17)
+ addq $18,8,$18
+ cmpult $28,$25,$8 # compute cy from last add
+ addq $17,8,$17
+ stq $20,0($16)
+ cmpult $4,$20,$25 # compute cy from last add
+ subq $19,1,$19 # decr loop cnt
+ or $8,$25,$25 # combine cy from the two adds
+ addq $16,8,$16
+ or $1,$31,$4
+ bne $19,.Loop0
+.Lend0: addq $0,$25,$28 # cy add
+ subq $4,$28,$20 # main sub
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $4,$20,$25 # compute cy from last add
+ stq $20,0($16)
+ or $8,$25,$25 # combine cy from the two adds
+
+.Lret: or $25,$31,$0 # return cy
+ ret $31,($26),1
+ .end __mpn_sub_n
-/* __mpn_add_n -- Add two limb vectors of equal, non-zero length.
+/* mpn_add_n -- Add two limb vectors of equal, non-zero length.
-Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
mp_limb
#if __STDC__
-__mpn_add_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size)
+mpn_add_n (mp_ptr res_ptr, mp_srcptr s1_ptr, mp_srcptr s2_ptr, mp_size_t size)
#else
-__mpn_add_n (res_ptr, s1_ptr, s2_ptr, size)
+mpn_add_n (res_ptr, s1_ptr, s2_ptr, size)
register mp_ptr res_ptr;
register mp_srcptr s1_ptr;
register mp_srcptr s2_ptr;
-/* __mpn_addmul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR
+/* mpn_addmul_1 -- multiply the S1_SIZE long limb vector pointed to by S1_PTR
by S2_LIMB, add the S1_SIZE least significant limbs of the product to the
limb vector pointed to by RES_PTR. Return the most significant limb of
the product, adjusted for carry-out from the addition.
-Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#include "longlong.h"
mp_limb
-__mpn_addmul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
+mpn_addmul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
register mp_ptr res_ptr;
register mp_srcptr s1_ptr;
mp_size_t s1_size;
-/* __mpn_cmp -- Compare two low-level natural-number integers.
+/* mpn_cmp -- Compare two low-level natural-number integers.
-Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
int
#if __STDC__
-__mpn_cmp (mp_srcptr op1_ptr, mp_srcptr op2_ptr, mp_size_t size)
+mpn_cmp (mp_srcptr op1_ptr, mp_srcptr op2_ptr, mp_size_t size)
#else
-__mpn_cmp (op1_ptr, op2_ptr, size)
+mpn_cmp (op1_ptr, op2_ptr, size)
mp_srcptr op1_ptr;
mp_srcptr op2_ptr;
mp_size_t size;
-/* __mpn_divmod_1(quot_ptr, dividend_ptr, dividend_size, divisor_limb) --
+/* mpn_divmod_1(quot_ptr, dividend_ptr, dividend_size, divisor_limb) --
Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR.
Return the single-limb remainder.
QUOT_PTR and DIVIDEND_PTR might point to the same limb.
-Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
mp_limb
#if __STDC__
-__mpn_divmod_1 (mp_ptr quot_ptr,
+mpn_divmod_1 (mp_ptr quot_ptr,
mp_srcptr dividend_ptr, mp_size_t dividend_size,
mp_limb divisor_limb)
#else
-__mpn_divmod_1 (quot_ptr, dividend_ptr, dividend_size, divisor_limb)
+mpn_divmod_1 (quot_ptr, dividend_ptr, dividend_size, divisor_limb)
mp_ptr quot_ptr;
mp_srcptr dividend_ptr;
mp_size_t dividend_size;
-/* __mpn_lshift -- Shift left low level.
+/* mpn_lshift -- Shift left low level.
-Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
mp_limb
#if __STDC__
-__mpn_lshift (register mp_ptr wp,
+mpn_lshift (register mp_ptr wp,
register mp_srcptr up, mp_size_t usize,
register unsigned int cnt)
#else
-__mpn_lshift (wp, up, usize, cnt)
+mpn_lshift (wp, up, usize, cnt)
register mp_ptr wp;
register mp_srcptr up;
mp_size_t usize;
-/* __mpn_mod_1(dividend_ptr, dividend_size, divisor_limb) --
+/* mpn_mod_1(dividend_ptr, dividend_size, divisor_limb) --
Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
Return the single-limb remainder.
There are no constraints on the value of the divisor.
mp_limb
#if __STDC__
-__mpn_mod_1 (mp_srcptr dividend_ptr, mp_size_t dividend_size,
- mp_limb divisor_limb)
+mpn_mod_1 (mp_srcptr dividend_ptr, mp_size_t dividend_size,
+ mp_limb divisor_limb)
#else
-__mpn_mod_1 (dividend_ptr, dividend_size, divisor_limb)
+mpn_mod_1 (dividend_ptr, dividend_size, divisor_limb)
mp_srcptr dividend_ptr;
mp_size_t dividend_size;
mp_limb divisor_limb;
-/* __mpn_mul -- Multiply two natural numbers.
+/* mpn_mul -- Multiply two natural numbers.
-Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
mp_limb
#if __STDC__
-__mpn_mul (mp_ptr prodp,
- mp_srcptr up, mp_size_t usize,
- mp_srcptr vp, mp_size_t vsize)
+mpn_mul (mp_ptr prodp,
+ mp_srcptr up, mp_size_t usize,
+ mp_srcptr vp, mp_size_t vsize)
#else
-__mpn_mul (prodp, up, usize, vp, vsize)
+mpn_mul (prodp, up, usize, vp, vsize)
mp_ptr prodp;
mp_srcptr up;
mp_size_t usize;
mp_ptr prod_endp = prodp + usize + vsize - 1;
mp_limb cy;
mp_ptr tspace;
+ TMP_DECL (marker);
if (vsize < KARATSUBA_THRESHOLD)
{
cy_limb = 0;
}
else
- cy_limb = __mpn_mul_1 (prodp, up, usize, v_limb);
+ cy_limb = mpn_mul_1 (prodp, up, usize, v_limb);
prodp[usize] = cy_limb;
prodp++;
{
cy_limb = 0;
if (v_limb == 1)
- cy_limb = __mpn_add_n (prodp, prodp, up, usize);
+ cy_limb = mpn_add_n (prodp, prodp, up, usize);
}
else
- cy_limb = __mpn_addmul_1 (prodp, up, usize, v_limb);
+ cy_limb = mpn_addmul_1 (prodp, up, usize, v_limb);
prodp[usize] = cy_limb;
prodp++;
return cy_limb;
}
- tspace = (mp_ptr) alloca (2 * vsize * BYTES_PER_MP_LIMB);
+ TMP_MARK (marker);
+
+ tspace = (mp_ptr) TMP_ALLOC (2 * vsize * BYTES_PER_MP_LIMB);
MPN_MUL_N_RECURSE (prodp, up, vp, vsize, tspace);
prodp += vsize;
usize -= vsize;
if (usize >= vsize)
{
- mp_ptr tp = (mp_ptr) alloca (2 * vsize * BYTES_PER_MP_LIMB);
+ mp_ptr tp = (mp_ptr) TMP_ALLOC (2 * vsize * BYTES_PER_MP_LIMB);
do
{
MPN_MUL_N_RECURSE (tp, up, vp, vsize, tspace);
- cy = __mpn_add_n (prodp, prodp, tp, vsize);
- __mpn_add_1 (prodp + vsize, tp + vsize, vsize, cy);
+ cy = mpn_add_n (prodp, prodp, tp, vsize);
+ mpn_add_1 (prodp + vsize, tp + vsize, vsize, cy);
prodp += vsize;
up += vsize;
usize -= vsize;
if (usize != 0)
{
- __mpn_mul (tspace, vp, vsize, up, usize);
- cy = __mpn_add_n (prodp, prodp, tspace, vsize);
- __mpn_add_1 (prodp + vsize, tspace + vsize, usize, cy);
+ mpn_mul (tspace, vp, vsize, up, usize);
+ cy = mpn_add_n (prodp, prodp, tspace, vsize);
+ mpn_add_1 (prodp + vsize, tspace + vsize, usize, cy);
}
+ TMP_FREE (marker);
return *prod_endp;
}
-/* __mpn_mul_1 -- Multiply a limb vector with a single limb and
+/* mpn_mul_1 -- Multiply a limb vector with a single limb and
store the product in a second limb vector.
-Copyright (C) 1991, 1992, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#include "longlong.h"
mp_limb
-__mpn_mul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
+mpn_mul_1 (res_ptr, s1_ptr, s1_size, s2_limb)
register mp_ptr res_ptr;
register mp_srcptr s1_ptr;
mp_size_t s1_size;
-/* __mpn_mul_n -- Multiply two natural numbers of length n.
+/* mpn_mul_n -- Multiply two natural numbers of length n.
-Copyright (C) 1991, 1992, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#define KARATSUBA_THRESHOLD 2
#endif
-void
-#if __STDC__
-____mpn_mul_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
-#else
-____mpn_mul_n ();
-#endif
-
/* Handle simple cases with traditional multiplication.
This is the most critical code of multiplication. All multiplies rely
void
#if __STDC__
-____mpn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
+impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
#else
-____mpn_mul_n_basecase (prodp, up, vp, size)
+impn_mul_n_basecase (prodp, up, vp, size)
mp_ptr prodp;
mp_srcptr up;
mp_srcptr vp;
cy_limb = 0;
}
else
- cy_limb = __mpn_mul_1 (prodp, up, size, v_limb);
+ cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
prodp[size] = cy_limb;
prodp++;
{
cy_limb = 0;
if (v_limb == 1)
- cy_limb = __mpn_add_n (prodp, prodp, up, size);
+ cy_limb = mpn_add_n (prodp, prodp, up, size);
}
else
- cy_limb = __mpn_addmul_1 (prodp, up, size, v_limb);
+ cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
prodp[size] = cy_limb;
prodp++;
void
#if __STDC__
-____mpn_mul_n (mp_ptr prodp,
+impn_mul_n (mp_ptr prodp,
mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace)
#else
-____mpn_mul_n (prodp, up, vp, size, tspace)
+impn_mul_n (prodp, up, vp, size, tspace)
mp_ptr prodp;
mp_srcptr up;
mp_srcptr vp;
mp_limb cy_limb;
MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace);
- cy_limb = __mpn_addmul_1 (prodp + esize, up, esize, vp[esize]);
+ cy_limb = mpn_addmul_1 (prodp + esize, up, esize, vp[esize]);
prodp[esize + esize] = cy_limb;
- cy_limb = __mpn_addmul_1 (prodp + esize, vp, size, up[esize]);
+ cy_limb = mpn_addmul_1 (prodp + esize, vp, size, up[esize]);
prodp[esize + size] = cy_limb;
}
/*** Product M. ________________
|_(U1-U0)(V0-V1)_| */
- if (__mpn_cmp (up + hsize, up, hsize) >= 0)
+ if (mpn_cmp (up + hsize, up, hsize) >= 0)
{
- __mpn_sub_n (prodp, up + hsize, up, hsize);
+ mpn_sub_n (prodp, up + hsize, up, hsize);
negflg = 0;
}
else
{
- __mpn_sub_n (prodp, up, up + hsize, hsize);
+ mpn_sub_n (prodp, up, up + hsize, hsize);
negflg = 1;
}
- if (__mpn_cmp (vp + hsize, vp, hsize) >= 0)
+ if (mpn_cmp (vp + hsize, vp, hsize) >= 0)
{
- __mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize);
+ mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize);
negflg ^= 1;
}
else
{
- __mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize);
+ mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize);
/* No change of NEGFLG. */
}
/* Read temporary operands from low part of PROD.
/*** Add/copy product H. */
MPN_COPY (prodp + hsize, prodp + size, hsize);
- cy = __mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
+ cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
/*** Add product M (if NEGFLG M is a negative number). */
if (negflg)
- cy -= __mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
+ cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
else
- cy += __mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
+ cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
/*** Product L. ________________ ________________
|________________||____U0 x V0_____| */
/*** Add/copy Product L (twice). */
- cy += __mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
+ cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
if (cy)
- __mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
+ mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
MPN_COPY (prodp, tspace, hsize);
- cy = __mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
+ cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
if (cy)
- __mpn_add_1 (prodp + size, prodp + size, size, 1);
+ mpn_add_1 (prodp + size, prodp + size, size, 1);
}
}
void
#if __STDC__
-____mpn_sqr_n_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t size)
+impn_sqr_n_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t size)
#else
-____mpn_sqr_n_basecase (prodp, up, size)
+impn_sqr_n_basecase (prodp, up, size)
mp_ptr prodp;
mp_srcptr up;
mp_size_t size;
cy_limb = 0;
}
else
- cy_limb = __mpn_mul_1 (prodp, up, size, v_limb);
+ cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
prodp[size] = cy_limb;
prodp++;
{
cy_limb = 0;
if (v_limb == 1)
- cy_limb = __mpn_add_n (prodp, prodp, up, size);
+ cy_limb = mpn_add_n (prodp, prodp, up, size);
}
else
- cy_limb = __mpn_addmul_1 (prodp, up, size, v_limb);
+ cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
prodp[size] = cy_limb;
prodp++;
void
#if __STDC__
-____mpn_sqr_n (mp_ptr prodp,
+impn_sqr_n (mp_ptr prodp,
mp_srcptr up, mp_size_t size, mp_ptr tspace)
#else
-____mpn_sqr_n (prodp, up, size, tspace)
+impn_sqr_n (prodp, up, size, tspace)
mp_ptr prodp;
mp_srcptr up;
mp_size_t size;
mp_limb cy_limb;
MPN_SQR_N_RECURSE (prodp, up, esize, tspace);
- cy_limb = __mpn_addmul_1 (prodp + esize, up, esize, up[esize]);
+ cy_limb = mpn_addmul_1 (prodp + esize, up, esize, up[esize]);
prodp[esize + esize] = cy_limb;
- cy_limb = __mpn_addmul_1 (prodp + esize, up, size, up[esize]);
+ cy_limb = mpn_addmul_1 (prodp + esize, up, size, up[esize]);
prodp[esize + size] = cy_limb;
}
/*** Product M. ________________
|_(U1-U0)(U0-U1)_| */
- if (__mpn_cmp (up + hsize, up, hsize) >= 0)
+ if (mpn_cmp (up + hsize, up, hsize) >= 0)
{
- __mpn_sub_n (prodp, up + hsize, up, hsize);
+ mpn_sub_n (prodp, up + hsize, up, hsize);
}
else
{
- __mpn_sub_n (prodp, up, up + hsize, hsize);
+ mpn_sub_n (prodp, up, up + hsize, hsize);
}
/* Read temporary operands from low part of PROD.
/*** Add/copy product H. */
MPN_COPY (prodp + hsize, prodp + size, hsize);
- cy = __mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
+ cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
/*** Add product M (if NEGFLG M is a negative number). */
- cy -= __mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
+ cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
/*** Product L. ________________ ________________
|________________||____U0 x U0_____| */
/*** Add/copy Product L (twice). */
- cy += __mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
+ cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
if (cy)
- __mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
+ mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
MPN_COPY (prodp, tspace, hsize);
- cy = __mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
+ cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
if (cy)
- __mpn_add_1 (prodp + size, prodp + size, size, 1);
+ mpn_add_1 (prodp + size, prodp + size, size, 1);
}
}
/* This should be made into an inline function in gmp.h. */
inline void
#if __STDC__
-__mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
+mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
#else
-__mpn_mul_n (prodp, up, vp, size)
+mpn_mul_n (prodp, up, vp, size)
mp_ptr prodp;
mp_srcptr up;
mp_srcptr vp;
mp_size_t size;
#endif
{
+ TMP_DECL (marker);
+ TMP_MARK (marker);
if (up == vp)
{
if (size < KARATSUBA_THRESHOLD)
{
- ____mpn_sqr_n_basecase (prodp, up, size);
+ impn_sqr_n_basecase (prodp, up, size);
}
else
{
mp_ptr tspace;
- tspace = (mp_ptr) alloca (2 * size * BYTES_PER_MP_LIMB);
- ____mpn_sqr_n (prodp, up, size, tspace);
+ tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
+ impn_sqr_n (prodp, up, size, tspace);
}
}
else
{
if (size < KARATSUBA_THRESHOLD)
{
- ____mpn_mul_n_basecase (prodp, up, vp, size);
+ impn_mul_n_basecase (prodp, up, vp, size);
}
else
{
mp_ptr tspace;
- tspace = (mp_ptr) alloca (2 * size * BYTES_PER_MP_LIMB);
- ____mpn_mul_n (prodp, up, vp, size, tspace);
+ tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
+ impn_mul_n (prodp, up, vp, size, tspace);
}
}
+ TMP_FREE (marker);
}
-/* __mpn_rshift -- Shift right a low-level natural-number integer.
+/* mpn_rshift -- Shift right a low-level natural-number integer.
-Copyright (C) 1991, 1993, 1994 Free Software Foundation, Inc.
+Copyright (C) 1991, 1993, 1994, 1996 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
mp_limb
#if __STDC__
-__mpn_rshift (register mp_ptr wp,
+mpn_rshift (register mp_ptr wp,
register mp_srcptr up, mp_size_t usize,
register unsigned int cnt)
#else
-__mpn_rshift (wp, up, usize, cnt)
+mpn_rshift (wp, up, usize, cnt)
register mp_ptr wp;
register mp_srcptr up;
mp_size_t usize;
# MIPS __mpn_addmul_1 -- Multiply a limb vector with a single limb and
# add the product to a second limb vector.
- # Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+ # Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
addu $2,$2,$10
sw $3,0($4)
addiu $4,$4,4
- bne $6,$0,Loop # should be "bnel"
+ bne $6,$0,Loop
addu $2,$9,$2 # add high product limb and carry from addition
# cool down phase 1
daddu $2,$2,$10
sd $3,0($4)
daddiu $4,$4,8
- bne $6,$0,Loop # should be "bnel"
+ bne $6,$0,Loop
daddu $2,$9,$2 # add high product limb and carry from addition
# cool down phase 1
sltu $2,$10,$2 # carry from previous addition -> $2
sd $10,0($4)
daddiu $4,$4,8
- bne $6,$0,Loop # should be "bnel"
+ bne $6,$0,Loop
daddu $2,$9,$2 # add high product limb and carry from addition
# cool down phase 1
daddu $2,$2,$10
sd $3,0($4)
daddiu $4,$4,8
- bne $6,$0,Loop # should be "bnel"
+ bne $6,$0,Loop
daddu $2,$9,$2 # add high product limb and carry from addition
# cool down phase 1
# MIPS __mpn_mul_1 -- Multiply a limb vector with a single limb and
# store the product in a second limb vector.
- # Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+ # Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
sltu $2,$10,$2 # carry from previous addition -> $2
sw $10,0($4)
addiu $4,$4,4
- bne $6,$0,Loop # should be "bnel"
+ bne $6,$0,Loop
addu $2,$9,$2 # add high product limb and carry from addition
# cool down phase 1
# MIPS __mpn_submul_1 -- Multiply a limb vector with a single limb and
# subtract the product from a second limb vector.
- # Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+ # Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
addu $2,$2,$10
sw $3,0($4)
addiu $4,$4,4
- bne $6,$0,Loop # should be "bnel"
+ bne $6,$0,Loop
addu $2,$9,$2 # add high product limb and carry from addition
# cool down phase 1
# IBM POWER __mpn_add_n -- Add two limb vectors of equal, non-zero length.
-# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.
# IBM POWER __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
# store difference in a third limb vector.
-# Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+# Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
# This file is part of the GNU MP Library.