1/* Speed measuring program.
2
3Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2006, 2008, 2009, 2010 Free
4Software Foundation, Inc.
5
6This file is part of the GNU MP Library.
7
8The GNU MP Library is free software; you can redistribute it and/or modify
9it under the terms of the GNU Lesser General Public License as published by
10the Free Software Foundation; either version 3 of the License, or (at your
11option) any later version.
12
13The GNU MP Library is distributed in the hope that it will be useful, but
14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16License for more details.
17
18You should have received a copy of the GNU Lesser General Public License
19along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
20
21/* Usage message is in the code below, run with no arguments to print it.
22   See README for interesting applications.
23
24   To add a new routine foo(), create a speed_foo() function in the style of
25   the existing ones and add an entry in the routine[] array.  Put FLAG_R if
26   speed_foo() wants an "r" parameter.
27
28   The routines don't have help messages or descriptions, but most have
29   suggestive names.  See the source code for full details.
30
31*/
32
33#include "config.h"
34
35#include <limits.h>
36#include <stdio.h>
37#include <stdlib.h>
38#include <string.h>
39
40#if HAVE_UNISTD_H
41#include <unistd.h>  /* for getpid, R_OK */
42#endif
43
44#if TIME_WITH_SYS_TIME
45# include <sys/time.h>  /* for struct timeval */
46# include <time.h>
47#else
48# if HAVE_SYS_TIME_H
49#  include <sys/time.h>
50# else
51#  include <time.h>
52# endif
53#endif
54
55#if HAVE_SYS_RESOURCE_H
56#include <sys/resource.h>  /* for getrusage() */
57#endif
58
59
60#include "gmp.h"
61#include "gmp-impl.h"
62#include "longlong.h"  /* for the benefit of speed-many.c */
63#include "tests.h"
64#include "speed.h"
65
66
67#if !HAVE_DECL_OPTARG
68extern char *optarg;
69extern int optind, opterr;
70#endif
71
72#if !HAVE_STRTOUL
73#define strtoul(p,e,b)  (unsigned long) strtol(p,e,b)
74#endif
75
76#ifdef SPEED_EXTRA_PROTOS
77SPEED_EXTRA_PROTOS
78#endif
79#ifdef SPEED_EXTRA_PROTOS2
80SPEED_EXTRA_PROTOS2
81#endif
82
83
84#define MPN_FILL(ptr, size, n)          \
85  do {                                  \
86    mp_size_t __i;                      \
87    ASSERT ((size) >= 0);               \
88    for (__i = 0; __i < (size); __i++)  \
89      (ptr)[__i] = (n);                 \
90  } while (0)
91
92
93#if GMP_LIMB_BITS == 32
94#define GMP_NUMB_0xAA  (CNST_LIMB(0xAAAAAAAA) & GMP_NUMB_MASK)
95#endif
96#if GMP_LIMB_BITS == 64
97#define GMP_NUMB_0xAA  (CNST_LIMB(0xAAAAAAAAAAAAAAAA) & GMP_NUMB_MASK)
98#endif
99
100
101#define CMP_ABSOLUTE     1
102#define CMP_RATIO        2
103#define CMP_DIFFERENCE   3
104#define CMP_DIFFPREV     4
105int  option_cmp = CMP_ABSOLUTE;
106
107#define UNIT_SECONDS        1
108#define UNIT_CYCLES         2
109#define UNIT_CYCLESPERLIMB  3
110int  option_unit = UNIT_SECONDS;
111
112#define DATA_RANDOM   1
113#define DATA_RANDOM2  2
114#define DATA_ZEROS    3
115#define DATA_AAS      4
116#define DATA_FFS      5
117#define DATA_2FD      6
118int  option_data = DATA_RANDOM;
119
120int        option_square = 0;
121double     option_factor = 0.0;
122mp_size_t  option_step = 1;
123int        option_gnuplot = 0;
124char      *option_gnuplot_basename;
125struct size_array_t {
126  mp_size_t start, end;
127} *size_array = NULL;
128mp_size_t  size_num = 0;
129mp_size_t  size_allocnum = 0;
130int        option_resource_usage = 0;
131long       option_seed = 123456789;
132
133struct speed_params  sp;
134
135#define COLUMN_WIDTH  13  /* for the free-form output */
136
137#define FLAG_R            (1<<0)  /* require ".r" */
138#define FLAG_R_OPTIONAL   (1<<1)  /* optional ".r" */
139#define FLAG_RSIZE        (1<<2)
140#define FLAG_NODATA       (1<<3)  /* don't alloc xp, yp */
141
142const struct routine_t {
143  /* constants */
144  const char        *name;
145  speed_function_t  fun;
146  int               flag;
147} routine[] = {
148
149  { "noop",              speed_noop                 },
150  { "noop_wxs",          speed_noop_wxs             },
151  { "noop_wxys",         speed_noop_wxys            },
152
153  { "mpn_add_n",         speed_mpn_add_n,     FLAG_R_OPTIONAL },
154  { "mpn_sub_n",         speed_mpn_sub_n,     FLAG_R_OPTIONAL },
155
156#if HAVE_NATIVE_mpn_add_n_sub_n
157  { "mpn_add_n_sub_n",      speed_mpn_add_n_sub_n,     FLAG_R_OPTIONAL },
158#endif
159
160  { "mpn_addmul_1",      speed_mpn_addmul_1,  FLAG_R },
161  { "mpn_submul_1",      speed_mpn_submul_1,  FLAG_R },
162#if HAVE_NATIVE_mpn_addmul_2
163  { "mpn_addmul_2",      speed_mpn_addmul_2,  FLAG_R_OPTIONAL },
164#endif
165#if HAVE_NATIVE_mpn_addmul_3
166  { "mpn_addmul_3",      speed_mpn_addmul_3,  FLAG_R_OPTIONAL },
167#endif
168#if HAVE_NATIVE_mpn_addmul_4
169  { "mpn_addmul_4",      speed_mpn_addmul_4,  FLAG_R_OPTIONAL },
170#endif
171#if HAVE_NATIVE_mpn_addmul_5
172  { "mpn_addmul_5",      speed_mpn_addmul_5,  FLAG_R_OPTIONAL },
173#endif
174#if HAVE_NATIVE_mpn_addmul_6
175  { "mpn_addmul_6",      speed_mpn_addmul_6,  FLAG_R_OPTIONAL },
176#endif
177#if HAVE_NATIVE_mpn_addmul_7
178  { "mpn_addmul_7",      speed_mpn_addmul_7,  FLAG_R_OPTIONAL },
179#endif
180#if HAVE_NATIVE_mpn_addmul_8
181  { "mpn_addmul_8",      speed_mpn_addmul_8,  FLAG_R_OPTIONAL },
182#endif
183  { "mpn_mul_1",         speed_mpn_mul_1,     FLAG_R },
184  { "mpn_mul_1_inplace", speed_mpn_mul_1_inplace, FLAG_R },
185#if HAVE_NATIVE_mpn_mul_2
186  { "mpn_mul_2",         speed_mpn_mul_2,     FLAG_R_OPTIONAL },
187#endif
188#if HAVE_NATIVE_mpn_mul_3
189  { "mpn_mul_3",         speed_mpn_mul_3,     FLAG_R_OPTIONAL },
190#endif
191#if HAVE_NATIVE_mpn_mul_4
192  { "mpn_mul_4",         speed_mpn_mul_4,     FLAG_R_OPTIONAL },
193#endif
194
195  { "mpn_divrem_1",      speed_mpn_divrem_1,  FLAG_R },
196  { "mpn_divrem_1f",     speed_mpn_divrem_1f, FLAG_R },
197#if HAVE_NATIVE_mpn_divrem_1c
198  { "mpn_divrem_1c",     speed_mpn_divrem_1c, FLAG_R },
199  { "mpn_divrem_1cf",    speed_mpn_divrem_1cf,FLAG_R },
200#endif
201  { "mpn_mod_1",         speed_mpn_mod_1,     FLAG_R_OPTIONAL },
202#if HAVE_NATIVE_mpn_mod_1c
203  { "mpn_mod_1c",        speed_mpn_mod_1c,    FLAG_R_OPTIONAL },
204#endif
205  { "mpn_preinv_divrem_1",  speed_mpn_preinv_divrem_1,  FLAG_R },
206  { "mpn_preinv_divrem_1f", speed_mpn_preinv_divrem_1f, FLAG_R },
207  { "mpn_preinv_mod_1",  speed_mpn_preinv_mod_1, FLAG_R },
208
209  { "mpn_mod_1_1",       speed_mpn_mod_1_1,       FLAG_R_OPTIONAL },
210  { "mpn_mod_1s_2",      speed_mpn_mod_1_2,       FLAG_R_OPTIONAL },
211  { "mpn_mod_1s_3",      speed_mpn_mod_1_3,       FLAG_R_OPTIONAL },
212  { "mpn_mod_1s_4",      speed_mpn_mod_1_4,       FLAG_R_OPTIONAL },
213
214  { "mpn_divrem_1_div",  speed_mpn_divrem_1_div,  FLAG_R },
215  { "mpn_divrem_1_inv",  speed_mpn_divrem_1_inv,  FLAG_R },
216  { "mpn_divrem_1f_div", speed_mpn_divrem_1f_div, FLAG_R },
217  { "mpn_divrem_1f_inv", speed_mpn_divrem_1f_inv, FLAG_R },
218  { "mpn_mod_1_div",     speed_mpn_mod_1_div,     FLAG_R },
219  { "mpn_mod_1_inv",     speed_mpn_mod_1_inv,     FLAG_R },
220
221  { "mpn_divrem_2",      speed_mpn_divrem_2,        },
222  { "mpn_divrem_2_div",  speed_mpn_divrem_2_div,    },
223  { "mpn_divrem_2_inv",  speed_mpn_divrem_2_inv,    },
224
225  { "mpn_divexact_1",    speed_mpn_divexact_1,    FLAG_R },
226  { "mpn_divexact_by3",  speed_mpn_divexact_by3          },
227
228  { "mpn_bdiv_q_1",      speed_mpn_bdiv_q_1,      FLAG_R_OPTIONAL },
229  { "mpn_pi1_bdiv_q_1",  speed_mpn_pi1_bdiv_q_1,  FLAG_R_OPTIONAL },
230  { "mpn_bdiv_dbm1c",    speed_mpn_bdiv_dbm1c,    FLAG_R_OPTIONAL },
231
232#if HAVE_NATIVE_mpn_modexact_1_odd
233  { "mpn_modexact_1_odd",  speed_mpn_modexact_1_odd,  FLAG_R },
234#endif
235  { "mpn_modexact_1c_odd", speed_mpn_modexact_1c_odd, FLAG_R },
236
237#if GMP_NUMB_BITS % 4 == 0
238  { "mpn_mod_34lsub1",   speed_mpn_mod_34lsub1 },
239#endif
240
241  { "mpn_lshift",        speed_mpn_lshift, FLAG_R   },
242  { "mpn_lshiftc",       speed_mpn_lshiftc, FLAG_R   },
243  { "mpn_rshift",        speed_mpn_rshift, FLAG_R   },
244
245  { "mpn_and_n",         speed_mpn_and_n,  FLAG_R_OPTIONAL },
246  { "mpn_andn_n",        speed_mpn_andn_n, FLAG_R_OPTIONAL },
247  { "mpn_nand_n",        speed_mpn_nand_n, FLAG_R_OPTIONAL },
248  { "mpn_ior_n",         speed_mpn_ior_n,  FLAG_R_OPTIONAL },
249  { "mpn_iorn_n",        speed_mpn_iorn_n, FLAG_R_OPTIONAL },
250  { "mpn_nior_n",        speed_mpn_nior_n, FLAG_R_OPTIONAL },
251  { "mpn_xor_n",         speed_mpn_xor_n,  FLAG_R_OPTIONAL },
252  { "mpn_xnor_n",        speed_mpn_xnor_n, FLAG_R_OPTIONAL },
253  { "mpn_com",           speed_mpn_com              },
254
255  { "mpn_popcount",      speed_mpn_popcount         },
256  { "mpn_hamdist",       speed_mpn_hamdist          },
257
258  { "mpn_matrix22_mul",  speed_mpn_matrix22_mul     },
259
260  { "mpn_hgcd",          speed_mpn_hgcd             },
261  { "mpn_hgcd_lehmer",   speed_mpn_hgcd_lehmer      },
262
263  { "mpn_gcd_1",         speed_mpn_gcd_1,  FLAG_R_OPTIONAL },
264  { "mpn_gcd_1N",        speed_mpn_gcd_1N, FLAG_R_OPTIONAL },
265
266  { "mpn_gcd",           speed_mpn_gcd                    },
267#if 0
268  { "mpn_gcd_binary",    speed_mpn_gcd_binary             },
269  { "mpn_gcd_accel",     speed_mpn_gcd_accel              },
270  { "find_a",            speed_find_a,        FLAG_NODATA },
271#endif
272
273  { "mpn_gcdext",            speed_mpn_gcdext            },
274  { "mpn_gcdext_single",     speed_mpn_gcdext_single     },
275  { "mpn_gcdext_double",     speed_mpn_gcdext_double     },
276  { "mpn_gcdext_one_single", speed_mpn_gcdext_one_single },
277  { "mpn_gcdext_one_double", speed_mpn_gcdext_one_double },
278#if 0
279  { "mpn_gcdext_lehmer",     speed_mpn_gcdext_lehmer     },
280#endif
281  { "mpz_jacobi",        speed_mpz_jacobi           },
282  { "mpn_jacobi_base",   speed_mpn_jacobi_base      },
283  { "mpn_jacobi_base_1", speed_mpn_jacobi_base_1    },
284  { "mpn_jacobi_base_2", speed_mpn_jacobi_base_2    },
285  { "mpn_jacobi_base_3", speed_mpn_jacobi_base_3    },
286
287  { "mpn_mul",           speed_mpn_mul,         FLAG_R_OPTIONAL },
288  { "mpn_mul_basecase",  speed_mpn_mul_basecase,FLAG_R_OPTIONAL },
289  { "mpn_sqr_basecase",  speed_mpn_sqr_basecase     },
290#if HAVE_NATIVE_mpn_sqr_diagonal
291  { "mpn_sqr_diagonal",  speed_mpn_sqr_diagonal     },
292#endif
293
294  { "mpn_mul_n",         speed_mpn_mul_n            },
295  { "mpn_sqr",           speed_mpn_sqr              },
296
297  { "mpn_toom2_sqr",     speed_mpn_toom2_sqr        },
298  { "mpn_toom3_sqr",     speed_mpn_toom3_sqr        },
299  { "mpn_toom4_sqr",     speed_mpn_toom4_sqr        },
300  { "mpn_toom6_sqr",     speed_mpn_toom6_sqr        },
301  { "mpn_toom8_sqr",     speed_mpn_toom8_sqr        },
302  { "mpn_toom22_mul",    speed_mpn_toom22_mul       },
303  { "mpn_toom33_mul",    speed_mpn_toom33_mul       },
304  { "mpn_toom44_mul",    speed_mpn_toom44_mul       },
305  { "mpn_toom6h_mul",    speed_mpn_toom6h_mul       },
306  { "mpn_toom8h_mul",    speed_mpn_toom8h_mul       },
307  { "mpn_toom32_mul",    speed_mpn_toom32_mul       },
308  { "mpn_toom42_mul",    speed_mpn_toom42_mul       },
309  { "mpn_toom43_mul",    speed_mpn_toom43_mul       },
310  { "mpn_toom63_mul",    speed_mpn_toom63_mul       },
311  { "mpn_nussbaumer_mul",    speed_mpn_nussbaumer_mul    },
312  { "mpn_nussbaumer_mul_sqr",speed_mpn_nussbaumer_mul_sqr},
313#if WANT_OLD_FFT_FULL
314  { "mpn_mul_fft_full",      speed_mpn_mul_fft_full      },
315  { "mpn_mul_fft_full_sqr",  speed_mpn_mul_fft_full_sqr  },
316#endif
317  { "mpn_mul_fft",       speed_mpn_mul_fft,     FLAG_R_OPTIONAL },
318  { "mpn_mul_fft_sqr",   speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL },
319
320  { "mpn_mullo_n",        speed_mpn_mullo_n         },
321  { "mpn_mullo_basecase", speed_mpn_mullo_basecase  },
322
323  { "mpn_bc_mulmod_bnm1",      speed_mpn_bc_mulmod_bnm1      },
324  { "mpn_mulmod_bnm1",         speed_mpn_mulmod_bnm1         },
325  { "mpn_mulmod_bnm1_rounded", speed_mpn_mulmod_bnm1_rounded },
326  { "mpn_sqrmod_bnm1",         speed_mpn_sqrmod_bnm1         },
327
328  { "mpn_invert",              speed_mpn_invert              },
329  { "mpn_invertappr",          speed_mpn_invertappr          },
330  { "mpn_ni_invertappr",       speed_mpn_ni_invertappr       },
331  { "mpn_binvert",             speed_mpn_binvert             },
332
333  { "mpn_sbpi1_div_qr",        speed_mpn_sbpi1_div_qr,    FLAG_R_OPTIONAL},
334  { "mpn_dcpi1_div_qr",        speed_mpn_dcpi1_div_qr,    FLAG_R_OPTIONAL},
335  { "mpn_mu_div_qr",           speed_mpn_mu_div_qr,       FLAG_R_OPTIONAL},
336  { "mpn_mupi_div_qr",         speed_mpn_mupi_div_qr,     FLAG_R_OPTIONAL},
337  { "mpn_sbpi1_divappr_q",     speed_mpn_sbpi1_divappr_q, FLAG_R_OPTIONAL},
338  { "mpn_dcpi1_divappr_q",     speed_mpn_dcpi1_divappr_q, FLAG_R_OPTIONAL},
339
340  { "mpn_sbpi1_bdiv_qr",       speed_mpn_sbpi1_bdiv_qr       },
341  { "mpn_dcpi1_bdiv_qr",       speed_mpn_dcpi1_bdiv_qr       },
342  { "mpn_sbpi1_bdiv_q",        speed_mpn_sbpi1_bdiv_q        },
343  { "mpn_dcpi1_bdiv_q",        speed_mpn_dcpi1_bdiv_q        },
344
345  { "mpn_get_str",          speed_mpn_get_str,     FLAG_R_OPTIONAL },
346  { "mpn_set_str",          speed_mpn_set_str,     FLAG_R_OPTIONAL },
347  { "mpn_set_str_basecase", speed_mpn_bc_set_str,  FLAG_R_OPTIONAL },
348
349  { "mpn_sqrtrem",       speed_mpn_sqrtrem          },
350  { "mpn_rootrem",       speed_mpn_rootrem, FLAG_R  },
351
352  { "mpn_fib2_ui",       speed_mpn_fib2_ui,    FLAG_NODATA },
353  { "mpz_fib_ui",        speed_mpz_fib_ui,     FLAG_NODATA },
354  { "mpz_fib2_ui",       speed_mpz_fib2_ui,    FLAG_NODATA },
355  { "mpz_lucnum_ui",     speed_mpz_lucnum_ui,  FLAG_NODATA },
356  { "mpz_lucnum2_ui",    speed_mpz_lucnum2_ui, FLAG_NODATA },
357
358  { "mpz_add",           speed_mpz_add              },
359  { "mpz_bin_uiui",      speed_mpz_bin_uiui, FLAG_NODATA | FLAG_R_OPTIONAL },
360  { "mpz_fac_ui",        speed_mpz_fac_ui,   FLAG_NODATA   },
361  { "mpz_powm",          speed_mpz_powm             },
362  { "mpz_powm_mod",      speed_mpz_powm_mod         },
363  { "mpz_powm_redc",     speed_mpz_powm_redc        },
364  { "mpz_powm_ui",       speed_mpz_powm_ui,  FLAG_R_OPTIONAL },
365
366  { "mpz_mod",           speed_mpz_mod              },
367  { "mpn_redc_1",        speed_mpn_redc_1           },
368  { "mpn_redc_2",        speed_mpn_redc_2           },
369  { "mpn_redc_n",        speed_mpn_redc_n           },
370
371  { "MPN_COPY",          speed_MPN_COPY             },
372  { "MPN_COPY_INCR",     speed_MPN_COPY_INCR        },
373  { "MPN_COPY_DECR",     speed_MPN_COPY_DECR        },
374  { "memcpy",            speed_memcpy               },
375#if HAVE_NATIVE_mpn_copyi
376  { "mpn_copyi",         speed_mpn_copyi            },
377#endif
378#if HAVE_NATIVE_mpn_copyd
379  { "mpn_copyd",         speed_mpn_copyd            },
380#endif
381#if HAVE_NATIVE_mpn_addlsh1_n
382  { "mpn_addlsh1_n",     speed_mpn_addlsh1_n        },
383#endif
384#if HAVE_NATIVE_mpn_sublsh1_n
385  { "mpn_sublsh1_n",     speed_mpn_sublsh1_n        },
386#endif
387#if HAVE_NATIVE_mpn_rsblsh1_n
388  { "mpn_rsblsh1_n",     speed_mpn_rsblsh1_n        },
389#endif
390#if HAVE_NATIVE_mpn_addlsh2_n
391  { "mpn_addlsh2_n",     speed_mpn_addlsh2_n        },
392#endif
393#if HAVE_NATIVE_mpn_sublsh2_n
394  { "mpn_sublsh2_n",     speed_mpn_sublsh2_n        },
395#endif
396#if HAVE_NATIVE_mpn_rsblsh2_n
397  { "mpn_rsblsh2_n",     speed_mpn_rsblsh2_n        },
398#endif
399#if HAVE_NATIVE_mpn_rsh1add_n
400  { "mpn_rsh1add_n",     speed_mpn_rsh1add_n        },
401#endif
402#if HAVE_NATIVE_mpn_rsh1sub_n
403  { "mpn_rsh1sub_n",     speed_mpn_rsh1sub_n        },
404#endif
405
406  { "MPN_ZERO",          speed_MPN_ZERO             },
407
408  { "binvert_limb",       speed_binvert_limb,       FLAG_NODATA },
409  { "binvert_limb_mul1",  speed_binvert_limb_mul1,  FLAG_NODATA },
410  { "binvert_limb_loop",  speed_binvert_limb_loop,  FLAG_NODATA },
411  { "binvert_limb_cond",  speed_binvert_limb_cond,  FLAG_NODATA },
412  { "binvert_limb_arith", speed_binvert_limb_arith, FLAG_NODATA },
413
414  { "malloc_free",                  speed_malloc_free                  },
415  { "malloc_realloc_free",          speed_malloc_realloc_free          },
416  { "gmp_allocate_free",            speed_gmp_allocate_free            },
417  { "gmp_allocate_reallocate_free", speed_gmp_allocate_reallocate_free },
418  { "mpz_init_clear",               speed_mpz_init_clear               },
419  { "mpq_init_clear",               speed_mpq_init_clear               },
420  { "mpf_init_clear",               speed_mpf_init_clear               },
421  { "mpz_init_realloc_clear",       speed_mpz_init_realloc_clear       },
422
423  { "umul_ppmm",         speed_umul_ppmm,     FLAG_R_OPTIONAL },
424#if HAVE_NATIVE_mpn_umul_ppmm
425  { "mpn_umul_ppmm",     speed_mpn_umul_ppmm, FLAG_R_OPTIONAL },
426#endif
427#if HAVE_NATIVE_mpn_umul_ppmm_r
428  { "mpn_umul_ppmm_r",   speed_mpn_umul_ppmm_r, FLAG_R_OPTIONAL },
429#endif
430
431  { "count_leading_zeros",  speed_count_leading_zeros,  FLAG_NODATA | FLAG_R_OPTIONAL },
432  { "count_trailing_zeros", speed_count_trailing_zeros, FLAG_NODATA | FLAG_R_OPTIONAL },
433
434  { "udiv_qrnnd",             speed_udiv_qrnnd,             FLAG_R_OPTIONAL },
435  { "udiv_qrnnd_preinv1",     speed_udiv_qrnnd_preinv1,     FLAG_R_OPTIONAL },
436  { "udiv_qrnnd_preinv2",     speed_udiv_qrnnd_preinv2,     FLAG_R_OPTIONAL },
437  { "udiv_qrnnd_c",           speed_udiv_qrnnd_c,           FLAG_R_OPTIONAL },
438#if HAVE_NATIVE_mpn_udiv_qrnnd
439  { "mpn_udiv_qrnnd",         speed_mpn_udiv_qrnnd,         FLAG_R_OPTIONAL },
440#endif
441#if HAVE_NATIVE_mpn_udiv_qrnnd_r
442  { "mpn_udiv_qrnnd_r",       speed_mpn_udiv_qrnnd_r,       FLAG_R_OPTIONAL },
443#endif
444  { "invert_limb",            speed_invert_limb,            FLAG_R_OPTIONAL },
445
446  { "operator_div",           speed_operator_div,           FLAG_R_OPTIONAL },
447  { "operator_mod",           speed_operator_mod,           FLAG_R_OPTIONAL },
448
449  { "gmp_randseed",    speed_gmp_randseed,    FLAG_R_OPTIONAL               },
450  { "gmp_randseed_ui", speed_gmp_randseed_ui, FLAG_R_OPTIONAL | FLAG_NODATA },
451  { "mpz_urandomb",    speed_mpz_urandomb,    FLAG_R_OPTIONAL | FLAG_NODATA },
452
453#ifdef SPEED_EXTRA_ROUTINES
454  SPEED_EXTRA_ROUTINES
455#endif
456#ifdef SPEED_EXTRA_ROUTINES2
457  SPEED_EXTRA_ROUTINES2
458#endif
459};
460
461
462struct choice_t {
463  const struct routine_t  *p;
464  mp_limb_t               r;
465  double                  scale;
466  double                  time;
467  int                     no_time;
468  double                  prev_time;
469  const char              *name;
470};
471struct choice_t  *choice;
472int  num_choices = 0;
473
474
475void
476data_fill (mp_ptr ptr, mp_size_t size)
477{
478  switch (option_data) {
479  case DATA_RANDOM:
480    mpn_random (ptr, size);
481    break;
482  case DATA_RANDOM2:
483    mpn_random2 (ptr, size);
484    break;
485  case DATA_ZEROS:
486    MPN_ZERO (ptr, size);
487    break;
488  case DATA_AAS:
489    MPN_FILL (ptr, size, GMP_NUMB_0xAA);
490    break;
491  case DATA_FFS:
492    MPN_FILL (ptr, size, GMP_NUMB_MAX);
493    break;
494  case DATA_2FD:
495    MPN_FILL (ptr, size, GMP_NUMB_MAX);
496    ptr[0] -= 2;
497    break;
498  default:
499    abort();
500    /*NOTREACHED*/
501  }
502}
503
504/* The code here handling the various combinations of output options isn't
505   too attractive, but it works and is fairly clean.  */
506
507#define SIZE_TO_DIVISOR(n)              \
508  (option_square == 1 ? (n)*(n)         \
509  : option_square == 2 ? (n)*((n)+1)/2  \
510  : (n))
511
512void
513run_one (FILE *fp, struct speed_params *s, mp_size_t prev_size)
514{
515  const char  *first_open_fastest, *first_open_notfastest, *first_close;
516  int         i, fastest, want_data;
517  double      fastest_time;
518  TMP_DECL;
519
520  TMP_MARK;
521
522  /* allocate data, unless all routines are NODATA */
523  want_data = 0;
524  for (i = 0; i < num_choices; i++)
525    want_data |= ((choice[i].p->flag & FLAG_NODATA) == 0);
526
527  if (want_data)
528    {
529      SPEED_TMP_ALLOC_LIMBS (sp.xp, s->size, s->align_xp);
530      SPEED_TMP_ALLOC_LIMBS (sp.yp, s->size, s->align_yp);
531
532      data_fill (s->xp, s->size);
533      data_fill (s->yp, s->size);
534    }
535  else
536    {
537      sp.xp = NULL;
538      sp.yp = NULL;
539    }
540
541  if (prev_size == -1 && option_cmp == CMP_DIFFPREV)
542    {
543      first_open_fastest = "(#";
544      first_open_notfastest = " (";
545      first_close = ")";
546    }
547  else
548    {
549      first_open_fastest = "#";
550      first_open_notfastest = " ";
551      first_close = "";
552    }
553
554  fastest = -1;
555  fastest_time = -1.0;
556  for (i = 0; i < num_choices; i++)
557    {
558      s->r = choice[i].r;
559      choice[i].time = speed_measure (choice[i].p->fun, s);
560      choice[i].no_time = (choice[i].time == -1.0);
561      if (! choice[i].no_time)
562        choice[i].time *= choice[i].scale;
563
564      /* Apply the effect of CMP_DIFFPREV, but the new choice[i].prev_time
565         is before any differences.  */
566      {
567        double     t;
568        t = choice[i].time;
569        if (t != -1.0 && option_cmp == CMP_DIFFPREV && prev_size != -1)
570          {
571            if (choice[i].prev_time == -1.0)
572              choice[i].no_time = 1;
573            else
574              choice[i].time = choice[i].time - choice[i].prev_time;
575          }
576        choice[i].prev_time = t;
577      }
578
579      if (choice[i].no_time)
580        continue;
581
582      /* Look for the fastest after CMP_DIFFPREV has been applied, but
583         before CMP_RATIO or CMP_DIFFERENCE.  There's only a fastest shown
584         if there's more than one routine.  */
585      if (num_choices > 1 && (fastest == -1 || choice[i].time < fastest_time))
586        {
587          fastest = i;
588          fastest_time = choice[i].time;
589        }
590
591      if (option_cmp == CMP_DIFFPREV)
592        {
593          /* Conversion for UNIT_CYCLESPERLIMB differs in CMP_DIFFPREV. */
594          if (option_unit == UNIT_CYCLES)
595            choice[i].time /= speed_cycletime;
596          else if (option_unit == UNIT_CYCLESPERLIMB)
597            {
598              if (prev_size == -1)
599                choice[i].time /= speed_cycletime;
600              else
601                choice[i].time /=  (speed_cycletime
602                                    * (SIZE_TO_DIVISOR(s->size)
603                                       - SIZE_TO_DIVISOR(prev_size)));
604            }
605        }
606      else
607        {
608          if (option_unit == UNIT_CYCLES)
609            choice[i].time /= speed_cycletime;
610          else if (option_unit == UNIT_CYCLESPERLIMB)
611            choice[i].time /= (speed_cycletime * SIZE_TO_DIVISOR(s->size));
612
613          if (option_cmp == CMP_RATIO && i > 0)
614            {
615              /* A ratio isn't affected by the units chosen. */
616              if (choice[0].no_time || choice[0].time == 0.0)
617                choice[i].no_time = 1;
618              else
619                choice[i].time /= choice[0].time;
620            }
621          else if (option_cmp == CMP_DIFFERENCE && i > 0)
622            {
623              if (choice[0].no_time)
624                {
625                  choice[i].no_time = 1;
626                  continue;
627                }
628              choice[i].time -= choice[0].time;
629            }
630        }
631    }
632
633  if (option_gnuplot)
634    {
635      /* In CMP_DIFFPREV, don't print anything for the first size, start
636         with the second where an actual difference is available.
637
638         In CMP_RATIO, print the first column as 1.0.
639
640         The 9 decimals printed is much more than the expected precision of
641         the measurements actually. */
642
643      if (! (option_cmp == CMP_DIFFPREV && prev_size == -1))
644        {
645          fprintf (fp, "%-6ld ", s->size);
646          for (i = 0; i < num_choices; i++)
647            fprintf (fp, "  %.9e",
648                     choice[i].no_time ? 0.0
649                     : (option_cmp == CMP_RATIO && i == 0) ? 1.0
650                     : choice[i].time);
651          fprintf (fp, "\n");
652        }
653    }
654  else
655    {
656      fprintf (fp, "%-6ld ", s->size);
657      for (i = 0; i < num_choices; i++)
658        {
659          char  buf[128];
660          int   decimals;
661
662          if (choice[i].no_time)
663            {
664              fprintf (fp, " %*s", COLUMN_WIDTH, "n/a");
665            }
666          else
667            {if (option_unit == UNIT_CYCLESPERLIMB
668                 || (option_cmp == CMP_RATIO && i > 0))
669                decimals = 4;
670              else if (option_unit == UNIT_CYCLES)
671                decimals = 2;
672              else
673                decimals = 9;
674
675              sprintf (buf, "%s%.*f%s",
676                       i == fastest ? first_open_fastest : first_open_notfastest,
677                       decimals, choice[i].time, first_close);
678              fprintf (fp, " %*s", COLUMN_WIDTH, buf);
679            }
680        }
681      fprintf (fp, "\n");
682    }
683
684  TMP_FREE;
685}
686
687void
688run_all (FILE *fp)
689{
690  mp_size_t  prev_size;
691  int        i;
692  TMP_DECL;
693
694  TMP_MARK;
695  SPEED_TMP_ALLOC_LIMBS (sp.xp_block, SPEED_BLOCK_SIZE, sp.align_xp);
696  SPEED_TMP_ALLOC_LIMBS (sp.yp_block, SPEED_BLOCK_SIZE, sp.align_yp);
697
698  data_fill (sp.xp_block, SPEED_BLOCK_SIZE);
699  data_fill (sp.yp_block, SPEED_BLOCK_SIZE);
700
701  for (i = 0; i < size_num; i++)
702    {
703      sp.size = size_array[i].start;
704      prev_size = -1;
705      for (;;)
706        {
707          mp_size_t  step;
708
709          if (option_data == DATA_2FD && sp.size >= 2)
710            sp.xp[sp.size-1] = 2;
711
712          run_one (fp, &sp, prev_size);
713          prev_size = sp.size;
714
715          if (option_data == DATA_2FD && sp.size >= 2)
716            sp.xp[sp.size-1] = MP_LIMB_T_MAX;
717
718          if (option_factor != 0.0)
719            {
720              step = (mp_size_t) (sp.size * option_factor - sp.size);
721              if (step < 1)
722                step = 1;
723            }
724          else
725            step = 1;
726          if (step < option_step)
727            step = option_step;
728
729          sp.size += step;
730          if (sp.size > size_array[i].end)
731            break;
732        }
733    }
734
735  TMP_FREE;
736}
737
738
739FILE *
740fopen_for_write (const char *filename)
741{
742  FILE  *fp;
743  if ((fp = fopen (filename, "w")) == NULL)
744    {
745      fprintf (stderr, "Cannot create %s\n", filename);
746      exit(1);
747    }
748  return fp;
749}
750
751void
752fclose_written (FILE *fp, const char *filename)
753{
754  int  err;
755
756  err = ferror (fp);
757  err |= fclose (fp);
758
759  if (err)
760    {
761      fprintf (stderr, "Error writing %s\n", filename);
762      exit(1);
763    }
764}
765
766
767void
768run_gnuplot (int argc, char *argv[])
769{
770  char  *plot_filename;
771  char  *data_filename;
772  FILE  *fp;
773  int   i;
774
775  plot_filename = (char *) (*__gmp_allocate_func)
776    (strlen (option_gnuplot_basename) + 20);
777  data_filename = (char *) (*__gmp_allocate_func)
778    (strlen (option_gnuplot_basename) + 20);
779
780  sprintf (plot_filename, "%s.gnuplot", option_gnuplot_basename);
781  sprintf (data_filename, "%s.data",    option_gnuplot_basename);
782
783  fp = fopen_for_write (plot_filename);
784
785  fprintf (fp, "# Generated with:\n");
786  fprintf (fp, "#");
787  for (i = 0; i < argc; i++)
788    fprintf (fp, " %s", argv[i]);
789  fprintf (fp, "\n");
790  fprintf (fp, "\n");
791
792  fprintf (fp, "reset\n");
793
794  /* Putting the key at the top left is usually good, and you can change it
795     interactively if it's not. */
796  fprintf (fp, "set key left\n");
797
798  /* designed to make it possible to see crossovers easily */
799  fprintf (fp, "set data style lines\n");
800
801  fprintf (fp, "plot ");
802  for (i = 0; i < num_choices; i++)
803    {
804      fprintf (fp, " \"%s\" using 1:%d", data_filename, i+2);
805      fprintf (fp, " title \"%s\"", choice[i].name);
806
807      if (i != num_choices-1)
808        fprintf (fp, ", \\");
809      fprintf (fp, "\n");
810    }
811
812  fprintf (fp, "load \"-\"\n");
813  fclose_written (fp, plot_filename);
814
815  fp = fopen_for_write (data_filename);
816
817  /* Unbuffered so you can see where the program was up to if it crashes or
818     you kill it. */
819  setbuf (fp, NULL);
820
821  run_all (fp);
822  fclose_written (fp, data_filename);
823}
824
825
826/* Return a limb with n many one bits (starting from the least significant) */
827
828#define LIMB_ONES(n) \
829  ((n) == GMP_LIMB_BITS ? MP_LIMB_T_MAX      \
830    : (n) == 0 ? CNST_LIMB(0)                   \
831    : (CNST_LIMB(1) << (n)) - 1)
832
833mp_limb_t
834r_string (const char *s)
835{
836  const char  *s_orig = s;
837  long        n;
838
839  if (strcmp (s, "aas") == 0)
840    return GMP_NUMB_0xAA;
841
842  {
843    mpz_t      z;
844    mp_limb_t  l;
845    int        set, siz;
846
847    mpz_init (z);
848    set = mpz_set_str (z, s, 0);
849    siz = SIZ(z);
850    l = (siz == 0 ? 0 : siz > 0 ? PTR(z)[0] : -PTR(z)[0]);
851    mpz_clear (z);
852    if (set == 0)
853      {
854        if (siz > 1 || siz < -1)
855          printf ("Warning, r parameter %s truncated to %d bits\n",
856                  s_orig, GMP_LIMB_BITS);
857        return l;
858      }
859  }
860
861  if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
862    n = strtoul (s+2, (char **) &s, 16);
863  else
864    n = strtol (s, (char **) &s, 10);
865
866  if (strcmp (s, "bits") == 0)
867    {
868      mp_limb_t  l;
869      if (n > GMP_LIMB_BITS)
870        {
871          fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
872                   n, GMP_LIMB_BITS);
873          exit (1);
874        }
875      mpn_random (&l, 1);
876      return (l | (CNST_LIMB(1) << (n-1))) & LIMB_ONES(n);
877    }
878  else  if (strcmp (s, "ones") == 0)
879    {
880      if (n > GMP_LIMB_BITS)
881        {
882          fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n",
883                   n, GMP_LIMB_BITS);
884          exit (1);
885        }
886      return LIMB_ONES (n);
887    }
888  else if (*s != '\0')
889    {
890      fprintf (stderr, "invalid r parameter: %s\n", s_orig);
891      exit (1);
892    }
893
894  return n;
895}
896
897
898void
899routine_find (struct choice_t *c, const char *s_orig)
900{
901  const char  *s;
902  int     i;
903  size_t  nlen;
904
905  c->name = s_orig;
906  s = strchr (s_orig, '*');
907  if (s != NULL)
908    {
909      c->scale = atof(s_orig);
910      s++;
911    }
912  else
913    {
914      c->scale = 1.0;
915      s = s_orig;
916    }
917
918  for (i = 0; i < numberof (routine); i++)
919    {
920      nlen = strlen (routine[i].name);
921      if (memcmp (s, routine[i].name, nlen) != 0)
922        continue;
923
924      if (s[nlen] == '.')
925        {
926          /* match, with a .r parameter */
927
928          if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL)))
929            {
930              fprintf (stderr,
931                       "Choice %s bad: doesn't take a \".<r>\" parameter\n",
932                       s_orig);
933              exit (1);
934            }
935
936          c->p = &routine[i];
937          c->r = r_string (s + nlen + 1);
938          return;
939        }
940
941      if (s[nlen] == '\0')
942        {
943          /* match, with no parameter */
944
945          if (routine[i].flag & FLAG_R)
946            {
947              fprintf (stderr,
948                       "Choice %s bad: needs a \".<r>\" parameter\n",
949                       s_orig);
950              exit (1);
951            }
952
953          c->p = &routine[i];
954          c->r = 0;
955          return;
956        }
957    }
958
959  fprintf (stderr, "Choice %s unrecognised\n", s_orig);
960  exit (1);
961}
962
963
964void
965usage (void)
966{
967  int  i;
968
969  speed_time_init ();
970
971  printf ("Usage: speed [-options] -s size <routine>...\n");
972  printf ("Measure the speed of some routines.\n");
973  printf ("Times are in seconds, accuracy is shown.\n");
974  printf ("\n");
975  printf ("   -p num     set precision as number of time units each routine must run\n");
976  printf ("   -s size[-end][,size[-end]]...   sizes to measure\n");
977  printf ("              single sizes or ranges, sep with comma or use multiple -s\n");
978  printf ("   -t step    step through sizes by given amount\n");
979  printf ("   -f factor  step through sizes by given factor (eg. 1.05)\n");
980  printf ("   -r         show times as ratios of the first routine\n");
981  printf ("   -d         show times as difference from the first routine\n");
982  printf ("   -D         show times as difference from previous size shown\n");
983  printf ("   -c         show times in CPU cycles\n");
984  printf ("   -C         show times in cycles per limb\n");
985  printf ("   -u         print resource usage (memory) at end\n");
986  printf ("   -P name    output plot files \"name.gnuplot\" and \"name.data\"\n");
987  printf ("   -a <type>  use given data: random(default), random2, zeros, aas, ffs, 2fd\n");
988  printf ("   -x, -y, -w, -W <align>  specify data alignments, sources and dests\n");
989  printf ("   -o addrs   print addresses of data blocks\n");
990  printf ("\n");
991  printf ("If both -t and -f are used, it means step by the factor or the step, whichever\n");
992  printf ("is greater.\n");
993  printf ("If both -C and -D are used, it means cycles per however many limbs between a\n");
994  printf ("size and the previous size.\n");
995  printf ("\n");
996  printf ("After running with -P, plots can be viewed with Gnuplot or Quickplot.\n");
997  printf ("\"gnuplot name.gnuplot\" (use \"set logscale xy; replot\" at the prompt for\n");
998  printf ("a log/log plot).\n");
999  printf ("\"quickplot -s name.data\" (has interactive zooming, and note -s is important\n");
1000  printf ("when viewing more than one routine, it means same axis scales for all data).\n");
1001  printf ("\n");
1002  printf ("The available routines are as follows.\n");
1003  printf ("\n");
1004
1005  for (i = 0; i < numberof (routine); i++)
1006    {
1007      if (routine[i].flag & FLAG_R)
1008        printf ("\t%s.r\n", routine[i].name);
1009      else if (routine[i].flag & FLAG_R_OPTIONAL)
1010        printf ("\t%s (optional .r)\n", routine[i].name);
1011      else
1012        printf ("\t%s\n", routine[i].name);
1013    }
1014  printf ("\n");
1015  printf ("Routines with a \".r\" need an extra parameter, for example mpn_lshift.6\n");
1016  printf ("r should be in decimal, or use 0xN for hexadecimal.\n");
1017  printf ("\n");
1018  printf ("Special forms for r are \"<N>bits\" for a random N bit number, \"<N>ones\" for\n");
1019  printf ("N one bits, or \"aas\" for 0xAA..AA.\n");
1020  printf ("\n");
1021  printf ("Times for sizes out of the range accepted by a routine are shown as 0.\n");
1022  printf ("The fastest routine at each size is marked with a # (free form output only).\n");
1023  printf ("\n");
1024  printf ("%s", speed_time_string);
1025  printf ("\n");
1026  printf ("Gnuplot home page http://www.gnuplot.info/\n");
1027  printf ("Quickplot home page http://quickplot.sourceforge.net/\n");
1028}
1029
1030void
1031check_align_option (const char *name, mp_size_t align)
1032{
1033  if (align < 0 || align > SPEED_TMP_ALLOC_ADJUST_MASK)
1034    {
1035      fprintf (stderr, "Alignment request out of range: %s %ld\n",
1036               name, (long) align);
1037      fprintf (stderr, "  should be 0 to %d (limbs), inclusive\n",
1038               SPEED_TMP_ALLOC_ADJUST_MASK);
1039      exit (1);
1040    }
1041}
1042
1043int
1044main (int argc, char *argv[])
1045{
1046  int  i;
1047  int  opt;
1048
1049  /* Unbuffered so output goes straight out when directed to a pipe or file
1050     and isn't lost on killing the program half way.  */
1051  setbuf (stdout, NULL);
1052
1053  for (;;)
1054    {
1055      opt = getopt(argc, argv, "a:CcDdEFf:o:p:P:rRs:t:ux:y:w:W:z");
1056      if (opt == EOF)
1057        break;
1058
1059      switch (opt) {
1060      case 'a':
1061        if (strcmp (optarg, "random") == 0)       option_data = DATA_RANDOM;
1062        else if (strcmp (optarg, "random2") == 0) option_data = DATA_RANDOM2;
1063        else if (strcmp (optarg, "zeros") == 0)   option_data = DATA_ZEROS;
1064        else if (strcmp (optarg, "aas") == 0)     option_data = DATA_AAS;
1065        else if (strcmp (optarg, "ffs") == 0)     option_data = DATA_FFS;
1066        else if (strcmp (optarg, "2fd") == 0)     option_data = DATA_2FD;
1067        else
1068          {
1069            fprintf (stderr, "unrecognised data option: %s\n", optarg);
1070            exit (1);
1071          }
1072        break;
1073      case 'C':
1074        if (option_unit  != UNIT_SECONDS) goto bad_unit;
1075        option_unit = UNIT_CYCLESPERLIMB;
1076        break;
1077      case 'c':
1078        if (option_unit != UNIT_SECONDS)
1079          {
1080          bad_unit:
1081            fprintf (stderr, "cannot use more than one of -c, -C\n");
1082            exit (1);
1083          }
1084        option_unit = UNIT_CYCLES;
1085        break;
1086      case 'D':
1087        if (option_cmp != CMP_ABSOLUTE) goto bad_cmp;
1088        option_cmp = CMP_DIFFPREV;
1089        break;
1090      case 'd':
1091        if (option_cmp != CMP_ABSOLUTE)
1092          {
1093          bad_cmp:
1094            fprintf (stderr, "cannot use more than one of -d, -D, -r\n");
1095            exit (1);
1096          }
1097        option_cmp = CMP_DIFFERENCE;
1098        break;
1099      case 'E':
1100        option_square = 1;
1101        break;
1102      case 'F':
1103        option_square = 2;
1104        break;
1105      case 'f':
1106        option_factor = atof (optarg);
1107        if (option_factor <= 1.0)
1108          {
1109            fprintf (stderr, "-f factor must be > 1.0\n");
1110            exit (1);
1111          }
1112        break;
1113      case 'o':
1114        speed_option_set (optarg);
1115        break;
1116      case 'P':
1117        option_gnuplot = 1;
1118        option_gnuplot_basename = optarg;
1119        break;
1120      case 'p':
1121        speed_precision = atoi (optarg);
1122        break;
1123      case 'R':
1124        option_seed = time (NULL);
1125        break;
1126      case 'r':
1127        if (option_cmp != CMP_ABSOLUTE)
1128          goto bad_cmp;
1129        option_cmp = CMP_RATIO;
1130        break;
1131      case 's':
1132        {
1133          char  *s;
1134          for (s = strtok (optarg, ","); s != NULL; s = strtok (NULL, ","))
1135            {
1136              if (size_num == size_allocnum)
1137                {
1138                  size_array = (struct size_array_t *)
1139                    __gmp_allocate_or_reallocate
1140                    (size_array,
1141                     size_allocnum * sizeof(size_array[0]),
1142                     (size_allocnum+10) * sizeof(size_array[0]));
1143                  size_allocnum += 10;
1144                }
1145              if (sscanf (s, "%ld-%ld",
1146                          &size_array[size_num].start,
1147                          &size_array[size_num].end) != 2)
1148                {
1149                  size_array[size_num].start = size_array[size_num].end
1150                    = atol (s);
1151                }
1152
1153              if (size_array[size_num].start < 0
1154                  || size_array[size_num].end < 0
1155                  || size_array[size_num].start > size_array[size_num].end)
1156                {
1157                  fprintf (stderr, "invalid size parameter: %s\n", s);
1158                  exit (1);
1159                }
1160
1161              size_num++;
1162            }
1163        }
1164        break;
1165      case 't':
1166        option_step = atol (optarg);
1167        if (option_step < 1)
1168          {
1169            fprintf (stderr, "-t step must be >= 1\n");
1170            exit (1);
1171          }
1172        break;
1173      case 'u':
1174        option_resource_usage = 1;
1175        break;
1176      case 'z':
1177        sp.cache = 1;
1178        break;
1179      case 'x':
1180        sp.align_xp = atol (optarg);
1181        check_align_option ("-x", sp.align_xp);
1182        break;
1183      case 'y':
1184        sp.align_yp = atol (optarg);
1185        check_align_option ("-y", sp.align_yp);
1186        break;
1187      case 'w':
1188        sp.align_wp = atol (optarg);
1189        check_align_option ("-w", sp.align_wp);
1190        break;
1191      case 'W':
1192        sp.align_wp2 = atol (optarg);
1193        check_align_option ("-W", sp.align_wp2);
1194        break;
1195      case '?':
1196        exit(1);
1197      }
1198    }
1199
1200  if (optind >= argc)
1201    {
1202      usage ();
1203      exit (1);
1204    }
1205
1206  if (size_num == 0)
1207    {
1208      fprintf (stderr, "-s <size> must be specified\n");
1209      exit (1);
1210    }
1211
1212  gmp_randinit_default (__gmp_rands);
1213  __gmp_rands_initialized = 1;
1214  gmp_randseed_ui (__gmp_rands, option_seed);
1215
1216  choice = (struct choice_t *) (*__gmp_allocate_func)
1217    ((argc - optind) * sizeof(choice[0]));
1218  for ( ; optind < argc; optind++)
1219    {
1220      struct choice_t  c;
1221      routine_find (&c, argv[optind]);
1222      choice[num_choices] = c;
1223      num_choices++;
1224    }
1225
1226  if ((option_cmp == CMP_RATIO || option_cmp == CMP_DIFFERENCE) &&
1227      num_choices < 2)
1228    {
1229      fprintf (stderr, "WARNING, -d or -r does nothing when only one routine requested\n");
1230    }
1231
1232  speed_time_init ();
1233  if (option_unit == UNIT_CYCLES || option_unit == UNIT_CYCLESPERLIMB)
1234    speed_cycletime_need_cycles ();
1235  else
1236    speed_cycletime_need_seconds ();
1237
1238  if (option_gnuplot)
1239    {
1240      run_gnuplot (argc, argv);
1241    }
1242  else
1243    {
1244      if (option_unit == UNIT_SECONDS)
1245        printf ("overhead %.9f secs", speed_measure (speed_noop, NULL));
1246      else
1247        printf ("overhead %.2f cycles",
1248                speed_measure (speed_noop, NULL) / speed_cycletime);
1249      printf (", precision %d units of %.2e secs",
1250              speed_precision, speed_unittime);
1251
1252      if (speed_cycletime == 1.0 || speed_cycletime == 0.0)
1253        printf (", CPU freq unknown\n");
1254      else
1255        printf (", CPU freq %.2f MHz\n", 1e-6/speed_cycletime);
1256
1257      printf ("       ");
1258      for (i = 0; i < num_choices; i++)
1259        printf (" %*s", COLUMN_WIDTH, choice[i].name);
1260      printf ("\n");
1261
1262      run_all (stdout);
1263    }
1264
1265  if (option_resource_usage)
1266    {
1267#if HAVE_GETRUSAGE
1268      {
1269        /* This doesn't give data sizes on linux 2.0.x, only utime. */
1270        struct rusage  r;
1271        if (getrusage (RUSAGE_SELF, &r) != 0)
1272          perror ("getrusage");
1273        else
1274          printf ("getrusage(): utime %ld.%06ld data %ld stack %ld maxresident %ld\n",
1275                  r.ru_utime.tv_sec, r.ru_utime.tv_usec,
1276                  r.ru_idrss, r.ru_isrss, r.ru_ixrss);
1277      }
1278#else
1279      printf ("getrusage() not available\n");
1280#endif
1281
1282      /* Linux kernel. */
1283      {
1284        char  buf[128];
1285        sprintf (buf, "/proc/%d/status", getpid());
1286        if (access (buf, R_OK) == 0)
1287          {
1288            sprintf (buf, "cat /proc/%d/status", getpid());
1289            system (buf);
1290          }
1291
1292      }
1293    }
1294
1295  return 0;
1296}
1297