1/* mpn_mod_34lsub1 -- remainder modulo 2^(GMP_NUMB_BITS*3/4)-1.
2
3   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
4   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
5   FUTURE GNU MP RELEASES.
6
7Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
8
9This file is part of the GNU MP Library.
10
11The GNU MP Library is free software; you can redistribute it and/or modify
12it under the terms of the GNU Lesser General Public License as published by
13the Free Software Foundation; either version 3 of the License, or (at your
14option) any later version.
15
16The GNU MP Library is distributed in the hope that it will be useful, but
17WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
19License for more details.
20
21You should have received a copy of the GNU Lesser General Public License
22along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
23
24
25#include "gmp.h"
26#include "gmp-impl.h"
27
28
29/* Calculate a remainder from {p,n} divided by 2^(GMP_NUMB_BITS*3/4)-1.
30   The remainder is not fully reduced, it's any limb value congruent to
31   {p,n} modulo that divisor.
32
33   This implementation is only correct when GMP_NUMB_BITS is a multiple of
34   4.
35
36   FIXME: If GMP_NAIL_BITS is some silly big value during development then
37   it's possible the carry accumulators c0,c1,c2 could overflow.
38
39   General notes:
40
41   The basic idea is to use a set of N accumulators (N=3 in this case) to
42   effectively get a remainder mod 2^(GMP_NUMB_BITS*N)-1 followed at the end
43   by a reduction to GMP_NUMB_BITS*N/M bits (M=4 in this case) for a
44   remainder mod 2^(GMP_NUMB_BITS*N/M)-1.  N and M are chosen to give a good
45   set of small prime factors in 2^(GMP_NUMB_BITS*N/M)-1.
46
47   N=3 M=4 suits GMP_NUMB_BITS==32 and GMP_NUMB_BITS==64 quite well, giving
48   a few more primes than a single accumulator N=1 does, and for no extra
49   cost (assuming the processor has a decent number of registers).
50
51   For strange nailified values of GMP_NUMB_BITS the idea would be to look
52   for what N and M give good primes.  With GMP_NUMB_BITS not a power of 2
53   the choices for M may be opened up a bit.  But such things are probably
54   best done in separate code, not grafted on here.  */
55
56#if GMP_NUMB_BITS % 4 == 0
57
58#define B1  (GMP_NUMB_BITS / 4)
59#define B2  (B1 * 2)
60#define B3  (B1 * 3)
61
62#define M1  ((CNST_LIMB(1) << B1) - 1)
63#define M2  ((CNST_LIMB(1) << B2) - 1)
64#define M3  ((CNST_LIMB(1) << B3) - 1)
65
66#define LOW0(n)      ((n) & M3)
67#define HIGH0(n)     ((n) >> B3)
68
69#define LOW1(n)      (((n) & M2) << B1)
70#define HIGH1(n)     ((n) >> B2)
71
72#define LOW2(n)      (((n) & M1) << B2)
73#define HIGH2(n)     ((n) >> B1)
74
75#define PARTS0(n)    (LOW0(n) + HIGH0(n))
76#define PARTS1(n)    (LOW1(n) + HIGH1(n))
77#define PARTS2(n)    (LOW2(n) + HIGH2(n))
78
79#define ADD(c,a,val)                    \
80  do {                                  \
81    mp_limb_t  new_c;                   \
82    ADDC_LIMB (new_c, a, a, val);       \
83    (c) += new_c;                       \
84  } while (0)
85
86mp_limb_t
87mpn_mod_34lsub1 (mp_srcptr p, mp_size_t n)
88{
89  mp_limb_t  c0 = 0;
90  mp_limb_t  c1 = 0;
91  mp_limb_t  c2 = 0;
92  mp_limb_t  a0, a1, a2;
93
94  ASSERT (n >= 1);
95  ASSERT (n/3 < GMP_NUMB_MAX);
96
97  a0 = a1 = a2 = 0;
98  c0 = c1 = c2 = 0;
99
100  while ((n -= 3) >= 0)
101    {
102      ADD (c0, a0, p[0]);
103      ADD (c1, a1, p[1]);
104      ADD (c2, a2, p[2]);
105      p += 3;
106    }
107
108  if (n != -3)
109    {
110      ADD (c0, a0, p[0]);
111      if (n != -2)
112	ADD (c1, a1, p[1]);
113    }
114
115  return
116    PARTS0 (a0) + PARTS1 (a1) + PARTS2 (a2)
117    + PARTS1 (c0) + PARTS2 (c1) + PARTS0 (c2);
118}
119
120#endif
121