1/* primesieve (BIT_ARRAY, N) -- Fills the BIT_ARRAY with a mask for primes up to N.
2
3Contributed to the GNU project by Marco Bodrato.
4
5THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.
6IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.
7IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR
8DISAPPEAR IN A FUTURE GNU MP RELEASE.
9
10Copyright 2010-2012, 2015, 2016 Free Software Foundation, Inc.
11
12This file is part of the GNU MP Library.
13
14The GNU MP Library is free software; you can redistribute it and/or modify
15it under the terms of either:
16
17  * the GNU Lesser General Public License as published by the Free
18    Software Foundation; either version 3 of the License, or (at your
19    option) any later version.
20
21or
22
23  * the GNU General Public License as published by the Free Software
24    Foundation; either version 2 of the License, or (at your option) any
25    later version.
26
27or both in parallel, as here.
28
29The GNU MP Library is distributed in the hope that it will be useful, but
30WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
32for more details.
33
34You should have received copies of the GNU General Public License and the
35GNU Lesser General Public License along with the GNU MP Library.  If not,
36see https://www.gnu.org/licenses/.  */
37
38#include "gmp-impl.h"
39
40#if 0
41static mp_limb_t
42bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
43#endif
44
45/* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
46static mp_limb_t
47id_to_n  (mp_limb_t id)  { return id*3+1+(id&1); }
48
49/* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
50static mp_limb_t
51n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
52
53#if 0
54static mp_size_t
55primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
56#endif
57
58#if GMP_LIMB_BITS > 61
59#define SIEVE_SEED CNST_LIMB(0x3294C9E069128480)
60#if GMP_LIMB_BITS == 64
61/* 110bits pre-sieved mask for primes 5, 11*/
62#define SIEVE_MASK1 CNST_LIMB(0x81214a1204892058)
63#define SIEVE_MASKT CNST_LIMB(0xc8130681244)
64/* 182bits pre-sieved mask for primes 7, 13*/
65#define SIEVE_2MSK1 CNST_LIMB(0x9402180c40230184)
66#define SIEVE_2MSK2 CNST_LIMB(0x0285021088402120)
67#define SIEVE_2MSKT CNST_LIMB(0xa41210084421)
68#define SEED_LIMIT 210
69#else
70#define SEED_LIMIT 202
71#endif
72#else
73#if GMP_LIMB_BITS > 30
74#define SIEVE_SEED CNST_LIMB(0x69128480)
75#if GMP_LIMB_BITS == 32
76/* 70bits pre-sieved mask for primes 5, 7*/
77#define SIEVE_MASK1 CNST_LIMB(0x12148960)
78#define SIEVE_MASK2 CNST_LIMB(0x44a120cc)
79#define SIEVE_MASKT CNST_LIMB(0x1a)
80#define SEED_LIMIT 120
81#else
82#define SEED_LIMIT 114
83#endif
84#else
85#if GMP_LIMB_BITS > 15
86#define SIEVE_SEED CNST_LIMB(0x8480)
87#define SEED_LIMIT 54
88#else
89#if GMP_LIMB_BITS > 7
90#define SIEVE_SEED CNST_LIMB(0x80)
91#define SEED_LIMIT 34
92#else
93#define SIEVE_SEED CNST_LIMB(0x0)
94#define SEED_LIMIT 24
95#endif /* 7 */
96#endif /* 15 */
97#endif /* 30 */
98#endif /* 61 */
99
100#define SET_OFF1(m1, m2, M1, M2, off, BITS)		\
101  if (off) {						\
102    if (off < GMP_LIMB_BITS) {				\
103      m1 = (M1 >> off) | (M2 << (GMP_LIMB_BITS - off));	\
104      if (off <= BITS - GMP_LIMB_BITS) {		\
105	m2 = M1 << (BITS - GMP_LIMB_BITS - off)		\
106	  | M2 >> off;					\
107      } else {						\
108	m1 |= M1 << (BITS - off);			\
109	m2 = M1 >> (off + GMP_LIMB_BITS - BITS);	\
110      }							\
111    } else {						\
112      m1 = M1 << (BITS - off)				\
113	| M2 >> (off - GMP_LIMB_BITS);			\
114      m2 = M2 << (BITS - off)				\
115	| M1 >> (off + GMP_LIMB_BITS - BITS);		\
116    }							\
117  } else {						\
118    m1 = M1; m2 = M2;					\
119  }
120
121#define SET_OFF2(m1, m2, m3, M1, M2, M3, off, BITS)	\
122  if (off) {						\
123    if (off <= GMP_LIMB_BITS) {				\
124      m1 = M2 << (GMP_LIMB_BITS - off);			\
125      m2 = M3 << (GMP_LIMB_BITS - off);			\
126      if (off != GMP_LIMB_BITS) {			\
127	m1 |= (M1 >> off);				\
128	m2 |= (M2 >> off);				\
129      }							\
130      if (off <= BITS - 2 * GMP_LIMB_BITS) {		\
131	m3 = M1 << (BITS - 2 * GMP_LIMB_BITS - off)	\
132	  | M3 >> off;					\
133      } else {						\
134	m2 |= M1 << (BITS - GMP_LIMB_BITS - off);	\
135	m3 = M1 >> (off + 2 * GMP_LIMB_BITS - BITS);	\
136      }							\
137    } else if (off < 2 *GMP_LIMB_BITS) {		\
138      m1 = M2 >> (off - GMP_LIMB_BITS)			\
139	| M3 << (2 * GMP_LIMB_BITS - off);		\
140      if (off <= BITS - GMP_LIMB_BITS) {		\
141	m2 = M3 >> (off - GMP_LIMB_BITS)		\
142	  | M1 << (BITS - GMP_LIMB_BITS - off);		\
143	m3 = M2 << (BITS - GMP_LIMB_BITS - off);	\
144	if (off != BITS - GMP_LIMB_BITS) {		\
145	  m3 |= M1 >> (off + 2 * GMP_LIMB_BITS - BITS);	\
146	}						\
147      } else {						\
148	m1 |= M1 << (BITS - off);			\
149	m2 = M2 << (BITS - off)				\
150	  | M1 >> (GMP_LIMB_BITS - BITS + off);		\
151	m3 = M2 >> (GMP_LIMB_BITS - BITS + off);	\
152      }							\
153    } else {						\
154      m1 = M1 << (BITS - off)				\
155	| M3 >> (off - 2 * GMP_LIMB_BITS);		\
156      m2 = M2 << (BITS - off)				\
157	| M1 >> (off + GMP_LIMB_BITS - BITS);		\
158      m3 = M3 << (BITS - off)				\
159	| M2 >> (off + GMP_LIMB_BITS - BITS);		\
160    }							\
161  } else {						\
162    m1 = M1; m2 = M2; m3 = M3;				\
163  }
164
165#define ROTATE1(m1, m2, BITS)			\
166  do {						\
167    mp_limb_t __tmp;				\
168    __tmp = m1 >> (2 * GMP_LIMB_BITS - BITS);	\
169    m1 = (m1 << (BITS - GMP_LIMB_BITS)) | m2;	\
170    m2 = __tmp;					\
171  } while (0)
172
173#define ROTATE2(m1, m2, m3, BITS)		\
174  do {						\
175    mp_limb_t __tmp;				\
176    __tmp = m2 >> (3 * GMP_LIMB_BITS - BITS);	\
177    m2 = m2 << (BITS - GMP_LIMB_BITS * 2)	\
178      | m1 >> (3 * GMP_LIMB_BITS - BITS);	\
179    m1 = m1 << (BITS - GMP_LIMB_BITS * 2) | m3;	\
180    m3 = __tmp;					\
181  } while (0)
182
183static mp_limb_t
184fill_bitpattern (mp_ptr bit_array, mp_size_t limbs, mp_limb_t offset)
185{
186#ifdef SIEVE_2MSK2
187  mp_limb_t m11, m12, m21, m22, m23;
188
189  if (offset == 0) { /* This branch is not needed. */
190    m11 = SIEVE_MASK1;
191    m12 = SIEVE_MASKT;
192    m21 = SIEVE_2MSK1;
193    m22 = SIEVE_2MSK2;
194    m23 = SIEVE_2MSKT;
195  } else { /* correctly handle offset == 0... */
196    m21 = offset % 110;
197    SET_OFF1 (m11, m12, SIEVE_MASK1, SIEVE_MASKT, m21, 110);
198    offset %= 182;
199    SET_OFF2 (m21, m22, m23, SIEVE_2MSK1, SIEVE_2MSK2, SIEVE_2MSKT, offset, 182);
200  }
201  /* THINK: Consider handling odd values of 'limbs' outside the loop,
202     to have a single exit condition. */
203  do {
204    bit_array[0] = m11 | m21;
205    if (--limbs == 0)
206      break;
207    ROTATE1 (m11, m12, 110);
208    bit_array[1] = m11 | m22;
209    bit_array += 2;
210    ROTATE1 (m11, m12, 110);
211    ROTATE2 (m21, m22, m23, 182);
212  } while (--limbs != 0);
213  return 4;
214#else
215#ifdef SIEVE_MASK2
216  mp_limb_t mask, mask2, tail;
217
218  if (offset == 0) { /* This branch is not needed. */
219    mask = SIEVE_MASK1;
220    mask2 = SIEVE_MASK2;
221    tail = SIEVE_MASKT;
222  } else { /* correctly handle offset == 0... */
223    offset %= 70;
224    SET_OFF2 (mask, mask2, tail, SIEVE_MASK1, SIEVE_MASK2, SIEVE_MASKT, offset, 70);
225  }
226  /* THINK: Consider handling odd values of 'limbs' outside the loop,
227     to have a single exit condition. */
228  do {
229    bit_array[0] = mask;
230    if (--limbs == 0)
231      break;
232    bit_array[1] = mask2;
233    bit_array += 2;
234    ROTATE2 (mask, mask2, tail, 70);
235  } while (--limbs != 0);
236  return 2;
237#else
238  MPN_FILL (bit_array, limbs, CNST_LIMB(0));
239  return 0;
240#endif
241#endif
242}
243
244static void
245first_block_primesieve (mp_ptr bit_array, mp_limb_t n)
246{
247  mp_size_t bits, limbs;
248  mp_limb_t i;
249
250  ASSERT (n > 4);
251
252  bits  = n_to_bit(n);
253  limbs = bits / GMP_LIMB_BITS;
254
255  if (limbs != 0)
256    i = fill_bitpattern (bit_array + 1, limbs, 0);
257  bit_array[0] = SIEVE_SEED;
258
259  if ((bits + 1) % GMP_LIMB_BITS != 0)
260    bit_array[limbs] |= MP_LIMB_T_MAX << ((bits + 1) % GMP_LIMB_BITS);
261
262  if (n > SEED_LIMIT) {
263    mp_limb_t mask, index;
264
265    ASSERT (i < GMP_LIMB_BITS);
266
267    if (n_to_bit (SEED_LIMIT + 1) < GMP_LIMB_BITS)
268      i = 0;
269    mask = CNST_LIMB(1) << i;
270    index = 0;
271    do {
272      ++i;
273      if ((bit_array[index] & mask) == 0)
274	{
275	  mp_size_t step, lindex;
276	  mp_limb_t lmask;
277	  unsigned  maskrot;
278
279	  step = id_to_n(i);
280/*	  lindex = n_to_bit(id_to_n(i)*id_to_n(i)); */
281	  lindex = i*(step+1)-1+(-(i&1)&(i+1));
282/*	  lindex = i*(step+1+(i&1))-1+(i&1); */
283	  if (lindex > bits)
284	    break;
285
286	  step <<= 1;
287	  maskrot = step % GMP_LIMB_BITS;
288
289	  lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS);
290	  do {
291	    bit_array[lindex / GMP_LIMB_BITS] |= lmask;
292	    lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot);
293	    lindex += step;
294	  } while (lindex <= bits);
295
296/*	  lindex = n_to_bit(id_to_n(i)*bit_to_n(i)); */
297	  lindex = i*(i*3+6)+(i&1);
298
299	  lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS);
300	  for ( ; lindex <= bits; lindex += step) {
301	    bit_array[lindex / GMP_LIMB_BITS] |= lmask;
302	    lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot);
303	  };
304	}
305      mask = mask << 1 | mask >> (GMP_LIMB_BITS-1);
306      index += mask & 1;
307    } while (1);
308  }
309}
310
311static void
312block_resieve (mp_ptr bit_array, mp_size_t limbs, mp_limb_t offset,
313	       mp_srcptr sieve)
314{
315  mp_size_t bits, off = offset;
316  mp_limb_t mask, index, i;
317
318  ASSERT (limbs > 0);
319  ASSERT (offset >= GMP_LIMB_BITS);
320
321  bits = limbs * GMP_LIMB_BITS - 1;
322
323  i = fill_bitpattern (bit_array, limbs, offset - GMP_LIMB_BITS);
324
325  ASSERT (i < GMP_LIMB_BITS);
326
327  mask = CNST_LIMB(1) << i;
328  index = 0;
329  do {
330    ++i;
331    if ((sieve[index] & mask) == 0)
332      {
333	mp_size_t step, lindex;
334	mp_limb_t lmask;
335	unsigned  maskrot;
336
337	step = id_to_n(i);
338
339/*	lindex = n_to_bit(id_to_n(i)*id_to_n(i)); */
340	lindex = i*(step+1)-1+(-(i&1)&(i+1));
341/*	lindex = i*(step+1+(i&1))-1+(i&1); */
342	if (lindex > bits + off)
343	  break;
344
345	step <<= 1;
346	maskrot = step % GMP_LIMB_BITS;
347
348	if (lindex < off)
349	  lindex += step * ((off - lindex - 1) / step + 1);
350
351	lindex -= off;
352
353	lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS);
354	for ( ; lindex <= bits; lindex += step) {
355	  bit_array[lindex / GMP_LIMB_BITS] |= lmask;
356	  lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot);
357	};
358
359/*	lindex = n_to_bit(id_to_n(i)*bit_to_n(i)); */
360	lindex = i*(i*3+6)+(i&1);
361
362	if (lindex < off)
363	  lindex += step * ((off - lindex - 1) / step + 1);
364
365	lindex -= off;
366
367	lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS);
368	for ( ; lindex <= bits; lindex += step) {
369	  bit_array[lindex / GMP_LIMB_BITS] |= lmask;
370	  lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot);
371	};
372      }
373      mask = mask << 1 | mask >> (GMP_LIMB_BITS-1);
374      index += mask & 1;
375  } while (1);
376}
377
378#define BLOCK_SIZE 2048
379
380/* Fills bit_array with the characteristic function of composite
381   numbers up to the parameter n. I.e. a bit set to "1" represent a
382   composite, a "0" represent a prime.
383
384   The primesieve_size(n) limbs pointed to by bit_array are
385   overwritten. The returned value counts prime integers in the
386   interval [4, n]. Note that n > 4.
387
388   Even numbers and multiples of 3 are excluded "a priori", only
389   numbers equivalent to +/- 1 mod 6 have their bit in the array.
390
391   Once sieved, if the bit b is ZERO it represent a prime, the
392   represented prime is bit_to_n(b), if the LSbit is bit 0, or
393   id_to_n(b), if you call "1" the first bit.
394 */
395
396mp_limb_t
397gmp_primesieve (mp_ptr bit_array, mp_limb_t n)
398{
399  mp_size_t size;
400  mp_limb_t bits;
401
402  ASSERT (n > 4);
403
404  bits = n_to_bit(n);
405  size = bits / GMP_LIMB_BITS + 1;
406
407  if (size > BLOCK_SIZE * 2) {
408    mp_size_t off;
409    off = BLOCK_SIZE + (size % BLOCK_SIZE);
410    first_block_primesieve (bit_array, id_to_n (off * GMP_LIMB_BITS));
411    do {
412      block_resieve (bit_array + off, BLOCK_SIZE, off * GMP_LIMB_BITS, bit_array);
413    } while ((off += BLOCK_SIZE) < size);
414  } else {
415    first_block_primesieve (bit_array, n);
416  }
417
418  if ((bits + 1) % GMP_LIMB_BITS != 0)
419    bit_array[size-1] |= MP_LIMB_T_MAX << ((bits + 1) % GMP_LIMB_BITS);
420
421  return size * GMP_LIMB_BITS - mpn_popcount (bit_array, size);
422}
423
424#undef BLOCK_SIZE
425#undef SEED_LIMIT
426#undef SIEVE_SEED
427#undef SIEVE_MASK1
428#undef SIEVE_MASK2
429#undef SIEVE_MASKT
430#undef SIEVE_2MSK1
431#undef SIEVE_2MSK2
432#undef SIEVE_2MSKT
433#undef SET_OFF1
434#undef SET_OFF2
435#undef ROTATE1
436#undef ROTATE2
437