1/* List and count primes.
2   Written by tege while on holiday in Rodupp, August 2001.
3   Between 10 and 500 times faster than previous program.
4
5Copyright 2001, 2002, 2006 Free Software Foundation, Inc.
6
7This program is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free Software
9Foundation; either version 3 of the License, or (at your option) any later
10version.
11
12This program is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
14PARTICULAR PURPOSE.  See the GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License along with
17this program.  If not, see http://www.gnu.org/licenses/.  */
18
19#include <stdlib.h>
20#include <stdio.h>
21#include <string.h>
22#include <math.h>
23#include <assert.h>
24
25/* IDEAS:
26 * Do not fill primes[] with real primes when the range [fr,to] is small,
27   when fr,to are relatively large.  Fill primes[] with odd numbers instead.
28   [Probably a bad idea, since the primes[] array would become very large.]
29 * Separate small primes and large primes when sieving.  Either the Montgomery
30   way (i.e., having a large array a multiple of L1 cache size), or just
31   separate loops for primes <= S and primes > S.  The latter primes do not
32   require an inner loop, since they will touch the sieving array at most once.
33 * Pre-fill sieving array with an appropriately aligned ...00100100... pattern,
34   then omit 3 from primes array.  (May require similar special handling of 3
35   as we now have for 2.)
36 * A large SIEVE_LIMIT currently implies very large memory usage, mainly due
37   to the sieving array in make_primelist, but also because of the primes[]
38   array.  We might want to stage the program, using sieve_region/find_primes
39   to build primes[].  Make report() a function pointer, as part of achieving
40   this.
41 * Store primes[] as two arrays, one array with primes represented as delta
42   values using just 8 bits (if gaps are too big, store bogus primes!)
43   and one array with "rem" values.  The latter needs 32-bit values.
44 * A new entry point, mpz_probab_prime_likely_p, would be useful.
45 * Improve command line syntax and versatility.  "primes -f FROM -t TO",
46   allow either to be omitted for open interval.  (But disallow
47   "primes -c -f FROM" since that would be infinity.)  Allow printing a
48   limited *number* of primes using syntax like "primes -f FROM -n NUMBER".
49 * When looking for maxgaps, we should not perform any primality testing until
50   we find possible record gaps.  Should speed up the searches tremendously.
51 */
52
53#include "gmp.h"
54
55struct primes
56{
57  unsigned int prime;
58  int rem;
59};
60
61struct primes *primes;
62unsigned long n_primes;
63
64void find_primes __GMP_PROTO ((unsigned char *, mpz_t, unsigned long, mpz_t));
65void sieve_region __GMP_PROTO ((unsigned char *, mpz_t, unsigned long));
66void make_primelist __GMP_PROTO ((unsigned long));
67
68int flag_print = 1;
69int flag_count = 0;
70int flag_maxgap = 0;
71unsigned long maxgap = 0;
72unsigned long total_primes = 0;
73
74void
75report (mpz_t prime)
76{
77  total_primes += 1;
78  if (flag_print)
79    {
80      mpz_out_str (stdout, 10, prime);
81      printf ("\n");
82    }
83  if (flag_maxgap)
84    {
85      static unsigned long prev_prime_low = 0;
86      unsigned long gap;
87      if (prev_prime_low != 0)
88	{
89	  gap = mpz_get_ui (prime) - prev_prime_low;
90	  if (maxgap < gap)
91	    maxgap = gap;
92	}
93      prev_prime_low = mpz_get_ui (prime);
94    }
95}
96
97int
98main (int argc, char *argv[])
99{
100  char *progname = argv[0];
101  mpz_t fr, to;
102  mpz_t fr2, to2;
103  unsigned long sieve_lim;
104  unsigned long est_n_primes;
105  unsigned char *s;
106  mpz_t tmp;
107  mpz_t siev_sqr_lim;
108
109  while (argc != 1)
110    {
111      if (strcmp (argv[1], "-c") == 0)
112	{
113	  flag_count = 1;
114	  argv++;
115	  argc--;
116	}
117      else if (strcmp (argv[1], "-p") == 0)
118	{
119	  flag_print = 2;
120	  argv++;
121	  argc--;
122	}
123      else if (strcmp (argv[1], "-g") == 0)
124	{
125	  flag_maxgap = 1;
126	  argv++;
127	  argc--;
128	}
129      else
130	break;
131    }
132
133  if (flag_count || flag_maxgap)
134    flag_print--;		/* clear unless an explicit -p  */
135
136  mpz_init (fr);
137  mpz_init (to);
138  mpz_init (fr2);
139  mpz_init (to2);
140
141  if (argc == 3)
142    {
143      mpz_set_str (fr, argv[1], 0);
144      if (argv[2][0] == '+')
145	{
146	  mpz_set_str (to, argv[2] + 1, 0);
147	  mpz_add (to, to, fr);
148	}
149      else
150	mpz_set_str (to, argv[2], 0);
151    }
152  else if (argc == 2)
153    {
154      mpz_set_ui (fr, 0);
155      mpz_set_str (to, argv[1], 0);
156    }
157  else
158    {
159      fprintf (stderr, "usage: %s [-c] [-p] [-g] [from [+]]to\n", progname);
160      exit (1);
161    }
162
163  mpz_set (fr2, fr);
164  if (mpz_cmp_ui (fr2, 3) < 0)
165    {
166      mpz_set_ui (fr2, 2);
167      report (fr2);
168      mpz_set_ui (fr2, 3);
169    }
170  mpz_setbit (fr2, 0);				/* make odd */
171  mpz_sub_ui (to2, to, 1);
172  mpz_setbit (to2, 0);				/* make odd */
173
174  mpz_init (tmp);
175  mpz_init (siev_sqr_lim);
176
177  mpz_sqrt (tmp, to2);
178#define SIEVE_LIMIT 10000000
179  if (mpz_cmp_ui (tmp, SIEVE_LIMIT) < 0)
180    {
181      sieve_lim = mpz_get_ui (tmp);
182    }
183  else
184    {
185      sieve_lim = SIEVE_LIMIT;
186      mpz_sub (tmp, to2, fr2);
187      if (mpz_cmp_ui (tmp, sieve_lim) < 0)
188	sieve_lim = mpz_get_ui (tmp);	/* limit sieving for small ranges */
189    }
190  mpz_set_ui (siev_sqr_lim, sieve_lim + 1);
191  mpz_mul_ui (siev_sqr_lim, siev_sqr_lim, sieve_lim + 1);
192
193  est_n_primes = (size_t) (sieve_lim / log((double) sieve_lim) * 1.13) + 10;
194  primes = malloc (est_n_primes * sizeof primes[0]);
195  make_primelist (sieve_lim);
196  assert (est_n_primes >= n_primes);
197
198#if DEBUG
199  printf ("sieve_lim = %lu\n", sieve_lim);
200  printf ("n_primes = %lu (3..%u)\n",
201	  n_primes, primes[n_primes - 1].prime);
202#endif
203
204#define S (1 << 15)		/* FIXME: Figure out L1 cache size */
205  s = malloc (S/2);
206  while (mpz_cmp (fr2, to2) <= 0)
207    {
208      unsigned long rsize;
209      rsize = S;
210      mpz_add_ui (tmp, fr2, rsize);
211      if (mpz_cmp (tmp, to2) > 0)
212	{
213	  mpz_sub (tmp, to2, fr2);
214	  rsize = mpz_get_ui (tmp) + 2;
215	}
216#if DEBUG
217      printf ("Sieving region ["); mpz_out_str (stdout, 10, fr2);
218      printf (","); mpz_add_ui (tmp, fr2, rsize - 2);
219      mpz_out_str (stdout, 10, tmp); printf ("]\n");
220#endif
221      sieve_region (s, fr2, rsize);
222      find_primes (s, fr2, rsize / 2, siev_sqr_lim);
223
224      mpz_add_ui (fr2, fr2, S);
225    }
226  free (s);
227
228  if (flag_count)
229    printf ("Pi(interval) = %lu\n", total_primes);
230
231  if (flag_maxgap)
232    printf ("max gap: %lu\n", maxgap);
233
234  return 0;
235}
236
237/* Find primes in region [fr,fr+rsize).  Requires that fr is odd and that
238   rsize is even.  The sieving array s should be aligned for "long int" and
239   have rsize/2 entries, rounded up to the nearest multiple of "long int".  */
240void
241sieve_region (unsigned char *s, mpz_t fr, unsigned long rsize)
242{
243  unsigned long ssize = rsize / 2;
244  unsigned long start, start2, prime;
245  unsigned long i;
246  mpz_t tmp;
247
248  mpz_init (tmp);
249
250#if 0
251  /* initialize sieving array */
252  for (ii = 0; ii < (ssize + sizeof (long) - 1) / sizeof (long); ii++)
253    ((long *) s) [ii] = ~0L;
254#else
255  {
256    long k;
257    long *se = (long *) (s + ((ssize + sizeof (long) - 1) & -sizeof (long)));
258    for (k = -((ssize + sizeof (long) - 1) / sizeof (long)); k < 0; k++)
259      se[k] = ~0L;
260  }
261#endif
262
263  for (i = 0; i < n_primes; i++)
264    {
265      prime = primes[i].prime;
266
267      if (primes[i].rem >= 0)
268	{
269	  start2 = primes[i].rem;
270	}
271      else
272	{
273	  mpz_set_ui (tmp, prime);
274	  mpz_mul_ui (tmp, tmp, prime);
275	  if (mpz_cmp (fr, tmp) <= 0)
276	    {
277	      mpz_sub (tmp, tmp, fr);
278	      if (mpz_cmp_ui (tmp, 2 * ssize) > 0)
279		break;		/* avoid overflow at next line, also speedup */
280	      start = mpz_get_ui (tmp);
281	    }
282	  else
283	    {
284	      start = (prime - mpz_tdiv_ui (fr, prime)) % prime;
285	      if (start % 2 != 0)
286		start += prime;		/* adjust if even divisible */
287	    }
288	  start2 = start / 2;
289	}
290
291#if 0
292      for (ii = start2; ii < ssize; ii += prime)
293	s[ii] = 0;
294      primes[i].rem = ii - ssize;
295#else
296      {
297	long k;
298	unsigned char *se = s + ssize; /* point just beyond sieving range */
299	for (k = start2 - ssize; k < 0; k += prime)
300	  se[k] = 0;
301	primes[i].rem = k;
302      }
303#endif
304    }
305  mpz_clear (tmp);
306}
307
308/* Find primes in region [fr,fr+rsize), using the previously sieved s[].  */
309void
310find_primes (unsigned char *s, mpz_t  fr, unsigned long ssize,
311	     mpz_t siev_sqr_lim)
312{
313  unsigned long j, ij;
314  mpz_t tmp;
315
316  mpz_init (tmp);
317  for (j = 0; j < (ssize + sizeof (long) - 1) / sizeof (long); j++)
318    {
319      if (((long *) s) [j] != 0)
320	{
321	  for (ij = 0; ij < sizeof (long); ij++)
322	    {
323	      if (s[j * sizeof (long) + ij] != 0)
324		{
325		  if (j * sizeof (long) + ij >= ssize)
326		    goto out;
327		  mpz_add_ui (tmp, fr, (j * sizeof (long) + ij) * 2);
328		  if (mpz_cmp (tmp, siev_sqr_lim) < 0 ||
329		      mpz_probab_prime_p (tmp, 10))
330		    report (tmp);
331		}
332	    }
333	}
334    }
335 out:
336  mpz_clear (tmp);
337}
338
339/* Generate a list of primes and store in the global array primes[].  */
340void
341make_primelist (unsigned long maxprime)
342{
343#if 1
344  unsigned char *s;
345  unsigned long ssize = maxprime / 2;
346  unsigned long i, ii, j;
347
348  s = malloc (ssize);
349  memset (s, ~0, ssize);
350  for (i = 3; ; i += 2)
351    {
352      unsigned long isqr = i * i;
353      if (isqr >= maxprime)
354	break;
355      if (s[i * i / 2 - 1] == 0)
356	continue;				/* only sieve with primes */
357      for (ii = i * i / 2 - 1; ii < ssize; ii += i)
358	s[ii] = 0;
359    }
360  n_primes = 0;
361  for (j = 0; j < ssize; j++)
362    {
363      if (s[j] != 0)
364	{
365	  primes[n_primes].prime = j * 2 + 3;
366	  primes[n_primes].rem = -1;
367	  n_primes++;
368	}
369    }
370  /* FIXME: This should not be needed if fencepost errors were fixed... */
371  if (primes[n_primes - 1].prime > maxprime)
372    n_primes--;
373  free (s);
374#else
375  unsigned long i;
376  n_primes = 0;
377  for (i = 3; i <= maxprime; i += 2)
378    {
379      if (i < 7 || (i % 3 != 0 && i % 5 != 0 && i % 7 != 0))
380	{
381	  primes[n_primes].prime = i;
382	  primes[n_primes].rem = -1;
383	  n_primes++;
384	}
385    }
386#endif
387}
388