1/* stat.c -- statistical tests of random number sequences. */
2
3/*
4Copyright 1999, 2000 Free Software Foundation, Inc.
5
6This file is part of the GNU MP Library test suite.
7
8The GNU MP Library test suite is free software; you can redistribute it
9and/or modify it under the terms of the GNU General Public License as
10published by the Free Software Foundation; either version 3 of the License,
11or (at your option) any later version.
12
13The GNU MP Library test suite is distributed in the hope that it will be
14useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
16Public License for more details.
17
18You should have received a copy of the GNU General Public License along with
19the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
20
21/* Examples:
22
23  $ gen 1000 | stat
24Test 1000 real numbers.
25
26  $ gen 30000 | stat -2 1000
27Test 1000 real numbers 30 times and then test the 30 results in a
28``second level''.
29
30  $ gen -f mpz_urandomb 1000 | stat -i 0xffffffff
31Test 1000 integers 0 <= X <= 2^32-1.
32
33  $ gen -f mpz_urandomb -z 34 1000 | stat -i 0x3ffffffff
34Test 1000 integers 0 <= X <= 2^34-1.
35
36*/
37
38#include <stdio.h>
39#include <stdlib.h>
40#include <unistd.h>
41#include <math.h>
42#include "gmpstat.h"
43
44#if !HAVE_DECL_OPTARG
45extern char *optarg;
46extern int optind, opterr;
47#endif
48
49#define FVECSIZ (100000L)
50
51int g_debug = 0;
52
53static void
54print_ks_results (mpf_t f_p, mpf_t f_p_prob,
55		  mpf_t f_m, mpf_t f_m_prob,
56		  FILE *fp)
57{
58  double p, pp, m, mp;
59
60  p = mpf_get_d (f_p);
61  m = mpf_get_d (f_m);
62  pp = mpf_get_d (f_p_prob);
63  mp = mpf_get_d (f_m_prob);
64
65  fprintf (fp, "%.4f (%.0f%%)\t", p, pp * 100.0);
66  fprintf (fp, "%.4f (%.0f%%)\n", m, mp * 100.0);
67}
68
69static void
70print_x2_table (unsigned int v, FILE *fp)
71{
72  double t[7];
73  int f;
74
75
76  fprintf (fp, "Chi-square table for v=%u\n", v);
77  fprintf (fp, "1%%\t5%%\t25%%\t50%%\t75%%\t95%%\t99%%\n");
78  x2_table (t, v);
79  for (f = 0; f < 7; f++)
80    fprintf (fp, "%.2f\t", t[f]);
81  fputs ("\n", fp);
82}
83
84
85
86/* Pks () -- Distribution function for KS results with a big n (like 1000
87   or so):  F(x) = 1 - pow(e, -2*x^2) [Knuth, vol 2, p.51]. */
88/* gnuplot: plot [0:1] Pks(x), Pks(x) = 1-exp(-2*x**2)  */
89
90static void
91Pks (mpf_t p, mpf_t x)
92{
93  double dt;			/* temp double */
94
95  mpf_set (p, x);
96  mpf_mul (p, p, p);		/* p = x^2 */
97  mpf_mul_ui (p, p, 2);		/* p = 2*x^2 */
98  mpf_neg (p, p);		/* p = -2*x^2 */
99  /* No pow() in gmp.  Use doubles. */
100  /* FIXME: Use exp()? */
101  dt = pow (M_E, mpf_get_d (p));
102  mpf_set_d (p, dt);
103  mpf_ui_sub (p, 1, p);
104}
105
106/* f_freq() -- frequency test on real numbers 0<=f<1*/
107static void
108f_freq (const unsigned l1runs, const unsigned l2runs,
109	mpf_t fvec[], const unsigned long n)
110{
111  unsigned f;
112  mpf_t f_p, f_p_prob;
113  mpf_t f_m, f_m_prob;
114  mpf_t *l1res;			/* level 1 result array */
115
116  mpf_init (f_p);  mpf_init (f_m);
117  mpf_init (f_p_prob);  mpf_init (f_m_prob);
118
119
120  /* Allocate space for 1st level results. */
121  l1res = (mpf_t *) malloc (l2runs * 2 * sizeof (mpf_t));
122  if (NULL == l1res)
123    {
124      fprintf (stderr, "stat: malloc failure\n");
125      exit (1);
126    }
127
128  printf ("\nEquidistribution/Frequency test on real numbers (0<=X<1):\n");
129  printf ("\tKp\t\tKm\n");
130
131  for (f = 0; f < l2runs; f++)
132    {
133      /*  f_printvec (fvec, n); */
134      mpf_freqt (f_p, f_m, fvec + f * n, n);
135
136      /* what's the probability of getting these results? */
137      ks_table (f_p_prob, f_p, n);
138      ks_table (f_m_prob, f_m, n);
139
140      if (l1runs == 0)
141	{
142	  /*printf ("%u:\t", f + 1);*/
143	  print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
144	}
145      else
146	{
147	  /* save result */
148	  mpf_init_set (l1res[f], f_p);
149	  mpf_init_set (l1res[f + l2runs], f_m);
150	}
151    }
152
153  /* Now, apply the KS test on the results from the 1st level rounds
154     with the distribution
155     F(x) = 1 - pow(e, -2*x^2)	[Knuth, vol 2, p.51] */
156
157  if (l1runs != 0)
158    {
159      /*printf ("-------------------------------------\n");*/
160
161      /* The Kp's. */
162      ks (f_p, f_m, l1res, Pks, l2runs);
163      ks_table (f_p_prob, f_p, l2runs);
164      ks_table (f_m_prob, f_m, l2runs);
165      printf ("Kp:\t");
166      print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
167
168      /* The Km's. */
169      ks (f_p, f_m, l1res + l2runs, Pks, l2runs);
170      ks_table (f_p_prob, f_p, l2runs);
171      ks_table (f_m_prob, f_m, l2runs);
172      printf ("Km:\t");
173      print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
174    }
175
176  mpf_clear (f_p);  mpf_clear (f_m);
177  mpf_clear (f_p_prob);  mpf_clear (f_m_prob);
178  free (l1res);
179}
180
181/* z_freq(l1runs, l2runs, zvec, n, max) -- frequency test on integers
182   0<=z<=MAX */
183static void
184z_freq (const unsigned l1runs,
185	const unsigned l2runs,
186	mpz_t zvec[],
187	const unsigned long n,
188	unsigned int max)
189{
190  mpf_t V;			/* result */
191  double d_V;			/* result as a double */
192
193  mpf_init (V);
194
195
196  printf ("\nEquidistribution/Frequency test on integers (0<=X<=%u):\n", max);
197  print_x2_table (max, stdout);
198
199  mpz_freqt (V, zvec, max, n);
200
201  d_V = mpf_get_d (V);
202  printf ("V = %.2f (n = %lu)\n", d_V, n);
203
204  mpf_clear (V);
205}
206
207unsigned int stat_debug = 0;
208
209int
210main (argc, argv)
211     int argc;
212     char *argv[];
213{
214  const char usage[] =
215    "usage: stat [-d] [-2 runs] [-i max | -r max] [file]\n" \
216    "       file     filename\n" \
217    "       -2 runs  perform 2-level test with RUNS runs on 1st level\n" \
218    "       -d       increase debugging level\n" \
219    "       -i max   input is integers 0 <= Z <= MAX\n" \
220    "       -r max   input is real numbers 0 <= R < 1 and use MAX as\n" \
221    "                maximum value when converting real numbers to integers\n" \
222    "";
223
224  mpf_t fvec[FVECSIZ];
225  mpz_t zvec[FVECSIZ];
226  unsigned long int f, n, vecentries;
227  char *filen;
228  FILE *fp;
229  int c;
230  int omitoutput = 0;
231  int realinput = -1;		/* 1: input is real numbers 0<=R<1;
232				   0: input is integers 0 <= Z <= MAX. */
233  long l1runs = 0,		/* 1st level runs */
234    l2runs = 1;			/* 2nd level runs */
235  mpf_t f_temp;
236  mpz_t z_imax;			/* max value when converting between
237				   real number and integer. */
238  mpf_t f_imax_plus1;		/* f_imax + 1 stored in an mpf_t for
239				   convenience */
240  mpf_t f_imax_minus1;		/* f_imax - 1 stored in an mpf_t for
241				   convenience */
242
243
244  mpf_init (f_temp);
245  mpz_init_set_ui (z_imax, 0x7fffffff);
246  mpf_init (f_imax_plus1);
247  mpf_init (f_imax_minus1);
248
249  while ((c = getopt (argc, argv, "d2:i:r:")) != -1)
250    switch (c)
251      {
252      case '2':
253	l1runs = atol (optarg);
254	l2runs = -1;		/* set later on */
255	break;
256      case 'd':			/* increase debug level */
257	stat_debug++;
258	break;
259      case 'i':
260	if (1 == realinput)
261	  {
262	    fputs ("stat: options -i and -r are mutually exclusive\n", stderr);
263	    exit (1);
264	  }
265	if (mpz_set_str (z_imax, optarg, 0))
266	  {
267	    fprintf (stderr, "stat: bad max value %s\n", optarg);
268	    exit (1);
269	  }
270	realinput = 0;
271	break;
272      case 'r':
273	if (0 == realinput)
274	  {
275	    fputs ("stat: options -i and -r are mutually exclusive\n", stderr);
276	    exit (1);
277	  }
278	if (mpz_set_str (z_imax, optarg, 0))
279	  {
280	    fprintf (stderr, "stat: bad max value %s\n", optarg);
281	    exit (1);
282	  }
283	realinput = 1;
284	break;
285      case 'o':
286	omitoutput = atoi (optarg);
287	break;
288      case '?':
289      default:
290	fputs (usage, stderr);
291	exit (1);
292      }
293  argc -= optind;
294  argv += optind;
295
296  if (argc < 1)
297    fp = stdin;
298  else
299    filen = argv[0];
300
301  if (fp != stdin)
302    if (NULL == (fp = fopen (filen, "r")))
303      {
304	perror (filen);
305	exit (1);
306      }
307
308  if (-1 == realinput)
309    realinput = 1;		/* default is real numbers */
310
311  /* read file and fill appropriate vec */
312  if (1 == realinput)		/* real input */
313    {
314      for (f = 0; f < FVECSIZ ; f++)
315	{
316	  mpf_init (fvec[f]);
317	  if (!mpf_inp_str (fvec[f], fp, 10))
318	    break;
319	}
320    }
321  else				/* integer input */
322    {
323      for (f = 0; f < FVECSIZ ; f++)
324	{
325	  mpz_init (zvec[f]);
326	  if (!mpz_inp_str (zvec[f], fp, 10))
327	    break;
328	}
329    }
330  vecentries = n = f;		/* number of entries read */
331  fclose (fp);
332
333  if (FVECSIZ == f)
334    fprintf (stderr, "stat: warning: discarding input due to lazy allocation "\
335	     "of only %ld entries.  sorry.\n", FVECSIZ);
336
337  printf ("Got %lu numbers.\n", n);
338
339  /* convert and fill the other vec */
340  /* since fvec[] contains 0<=f<1 and we want ivec[] to contain
341     0<=z<=imax and we are truncating all fractions when
342     converting float to int, we have to add 1 to imax.*/
343  mpf_set_z (f_imax_plus1, z_imax);
344  mpf_add_ui (f_imax_plus1, f_imax_plus1, 1);
345  if (1 == realinput)		/* fill zvec[] */
346    {
347      for (f = 0; f < n; f++)
348	{
349	  mpf_mul (f_temp, fvec[f], f_imax_plus1);
350	  mpz_init (zvec[f]);
351	  mpz_set_f (zvec[f], f_temp); /* truncating fraction */
352	  if (stat_debug > 1)
353	    {
354	      mpz_out_str (stderr, 10, zvec[f]);
355	      fputs ("\n", stderr);
356	    }
357	}
358    }
359  else				/* integer input; fill fvec[] */
360    {
361      /*    mpf_set_z (f_imax_minus1, z_imax);
362	    mpf_sub_ui (f_imax_minus1, f_imax_minus1, 1);*/
363      for (f = 0; f < n; f++)
364	{
365	  mpf_init (fvec[f]);
366	  mpf_set_z (fvec[f], zvec[f]);
367	  mpf_div (fvec[f], fvec[f], f_imax_plus1);
368	  if (stat_debug > 1)
369	    {
370	      mpf_out_str (stderr, 10, 0, fvec[f]);
371	      fputs ("\n", stderr);
372	    }
373	}
374    }
375
376  /* 2 levels? */
377  if (1 != l2runs)
378    {
379      l2runs = n / l1runs;
380      printf ("Doing %ld second level rounds "\
381	      "with %ld entries in each round", l2runs, l1runs);
382      if (n % l1runs)
383	printf (" (discarding %ld entr%s)", n % l1runs,
384		n % l1runs == 1 ? "y" : "ies");
385      puts (".");
386      n = l1runs;
387    }
388
389#ifndef DONT_FFREQ
390  f_freq (l1runs, l2runs, fvec, n);
391#endif
392#ifdef DO_ZFREQ
393  z_freq (l1runs, l2runs, zvec, n, mpz_get_ui (z_imax));
394#endif
395
396  mpf_clear (f_temp); mpz_clear (z_imax);
397  mpf_clear (f_imax_plus1);
398  mpf_clear (f_imax_minus1);
399  for (f = 0; f < vecentries; f++)
400    {
401      mpf_clear (fvec[f]);
402      mpz_clear (zvec[f]);
403    }
404
405  return 0;
406}
407