1/* Speed measuring program. 2 3Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2006, 2008, 2009, 2010 Free 4Software Foundation, Inc. 5 6This file is part of the GNU MP Library. 7 8The GNU MP Library is free software; you can redistribute it and/or modify 9it under the terms of the GNU Lesser General Public License as published by 10the Free Software Foundation; either version 3 of the License, or (at your 11option) any later version. 12 13The GNU MP Library is distributed in the hope that it will be useful, but 14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16License for more details. 17 18You should have received a copy of the GNU Lesser General Public License 19along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 20 21/* Usage message is in the code below, run with no arguments to print it. 22 See README for interesting applications. 23 24 To add a new routine foo(), create a speed_foo() function in the style of 25 the existing ones and add an entry in the routine[] array. Put FLAG_R if 26 speed_foo() wants an "r" parameter. 27 28 The routines don't have help messages or descriptions, but most have 29 suggestive names. See the source code for full details. 30 31*/ 32 33#include "config.h" 34 35#include <limits.h> 36#include <stdio.h> 37#include <stdlib.h> 38#include <string.h> 39 40#if HAVE_UNISTD_H 41#include <unistd.h> /* for getpid, R_OK */ 42#endif 43 44#if TIME_WITH_SYS_TIME 45# include <sys/time.h> /* for struct timeval */ 46# include <time.h> 47#else 48# if HAVE_SYS_TIME_H 49# include <sys/time.h> 50# else 51# include <time.h> 52# endif 53#endif 54 55#if HAVE_SYS_RESOURCE_H 56#include <sys/resource.h> /* for getrusage() */ 57#endif 58 59 60#include "gmp.h" 61#include "gmp-impl.h" 62#include "longlong.h" /* for the benefit of speed-many.c */ 63#include "tests.h" 64#include "speed.h" 65 66 67#if !HAVE_DECL_OPTARG 68extern char *optarg; 69extern int optind, opterr; 70#endif 71 72#if !HAVE_STRTOUL 73#define strtoul(p,e,b) (unsigned long) strtol(p,e,b) 74#endif 75 76#ifdef SPEED_EXTRA_PROTOS 77SPEED_EXTRA_PROTOS 78#endif 79#ifdef SPEED_EXTRA_PROTOS2 80SPEED_EXTRA_PROTOS2 81#endif 82 83 84#define MPN_FILL(ptr, size, n) \ 85 do { \ 86 mp_size_t __i; \ 87 ASSERT ((size) >= 0); \ 88 for (__i = 0; __i < (size); __i++) \ 89 (ptr)[__i] = (n); \ 90 } while (0) 91 92 93#if GMP_LIMB_BITS == 32 94#define GMP_NUMB_0xAA (CNST_LIMB(0xAAAAAAAA) & GMP_NUMB_MASK) 95#endif 96#if GMP_LIMB_BITS == 64 97#define GMP_NUMB_0xAA (CNST_LIMB(0xAAAAAAAAAAAAAAAA) & GMP_NUMB_MASK) 98#endif 99 100 101#define CMP_ABSOLUTE 1 102#define CMP_RATIO 2 103#define CMP_DIFFERENCE 3 104#define CMP_DIFFPREV 4 105int option_cmp = CMP_ABSOLUTE; 106 107#define UNIT_SECONDS 1 108#define UNIT_CYCLES 2 109#define UNIT_CYCLESPERLIMB 3 110int option_unit = UNIT_SECONDS; 111 112#define DATA_RANDOM 1 113#define DATA_RANDOM2 2 114#define DATA_ZEROS 3 115#define DATA_AAS 4 116#define DATA_FFS 5 117#define DATA_2FD 6 118int option_data = DATA_RANDOM; 119 120int option_square = 0; 121double option_factor = 0.0; 122mp_size_t option_step = 1; 123int option_gnuplot = 0; 124char *option_gnuplot_basename; 125struct size_array_t { 126 mp_size_t start, end; 127} *size_array = NULL; 128mp_size_t size_num = 0; 129mp_size_t size_allocnum = 0; 130int option_resource_usage = 0; 131long option_seed = 123456789; 132 133struct speed_params sp; 134 135#define COLUMN_WIDTH 13 /* for the free-form output */ 136 137#define FLAG_R (1<<0) /* require ".r" */ 138#define FLAG_R_OPTIONAL (1<<1) /* optional ".r" */ 139#define FLAG_RSIZE (1<<2) 140#define FLAG_NODATA (1<<3) /* don't alloc xp, yp */ 141 142const struct routine_t { 143 /* constants */ 144 const char *name; 145 speed_function_t fun; 146 int flag; 147} routine[] = { 148 149 { "noop", speed_noop }, 150 { "noop_wxs", speed_noop_wxs }, 151 { "noop_wxys", speed_noop_wxys }, 152 153 { "mpn_add_n", speed_mpn_add_n, FLAG_R_OPTIONAL }, 154 { "mpn_sub_n", speed_mpn_sub_n, FLAG_R_OPTIONAL }, 155 156#if HAVE_NATIVE_mpn_add_n_sub_n 157 { "mpn_add_n_sub_n", speed_mpn_add_n_sub_n, FLAG_R_OPTIONAL }, 158#endif 159 160 { "mpn_addmul_1", speed_mpn_addmul_1, FLAG_R }, 161 { "mpn_submul_1", speed_mpn_submul_1, FLAG_R }, 162#if HAVE_NATIVE_mpn_addmul_2 163 { "mpn_addmul_2", speed_mpn_addmul_2, FLAG_R_OPTIONAL }, 164#endif 165#if HAVE_NATIVE_mpn_addmul_3 166 { "mpn_addmul_3", speed_mpn_addmul_3, FLAG_R_OPTIONAL }, 167#endif 168#if HAVE_NATIVE_mpn_addmul_4 169 { "mpn_addmul_4", speed_mpn_addmul_4, FLAG_R_OPTIONAL }, 170#endif 171#if HAVE_NATIVE_mpn_addmul_5 172 { "mpn_addmul_5", speed_mpn_addmul_5, FLAG_R_OPTIONAL }, 173#endif 174#if HAVE_NATIVE_mpn_addmul_6 175 { "mpn_addmul_6", speed_mpn_addmul_6, FLAG_R_OPTIONAL }, 176#endif 177#if HAVE_NATIVE_mpn_addmul_7 178 { "mpn_addmul_7", speed_mpn_addmul_7, FLAG_R_OPTIONAL }, 179#endif 180#if HAVE_NATIVE_mpn_addmul_8 181 { "mpn_addmul_8", speed_mpn_addmul_8, FLAG_R_OPTIONAL }, 182#endif 183 { "mpn_mul_1", speed_mpn_mul_1, FLAG_R }, 184 { "mpn_mul_1_inplace", speed_mpn_mul_1_inplace, FLAG_R }, 185#if HAVE_NATIVE_mpn_mul_2 186 { "mpn_mul_2", speed_mpn_mul_2, FLAG_R_OPTIONAL }, 187#endif 188#if HAVE_NATIVE_mpn_mul_3 189 { "mpn_mul_3", speed_mpn_mul_3, FLAG_R_OPTIONAL }, 190#endif 191#if HAVE_NATIVE_mpn_mul_4 192 { "mpn_mul_4", speed_mpn_mul_4, FLAG_R_OPTIONAL }, 193#endif 194 195 { "mpn_divrem_1", speed_mpn_divrem_1, FLAG_R }, 196 { "mpn_divrem_1f", speed_mpn_divrem_1f, FLAG_R }, 197#if HAVE_NATIVE_mpn_divrem_1c 198 { "mpn_divrem_1c", speed_mpn_divrem_1c, FLAG_R }, 199 { "mpn_divrem_1cf", speed_mpn_divrem_1cf,FLAG_R }, 200#endif 201 { "mpn_mod_1", speed_mpn_mod_1, FLAG_R_OPTIONAL }, 202#if HAVE_NATIVE_mpn_mod_1c 203 { "mpn_mod_1c", speed_mpn_mod_1c, FLAG_R_OPTIONAL }, 204#endif 205 { "mpn_preinv_divrem_1", speed_mpn_preinv_divrem_1, FLAG_R }, 206 { "mpn_preinv_divrem_1f", speed_mpn_preinv_divrem_1f, FLAG_R }, 207 { "mpn_preinv_mod_1", speed_mpn_preinv_mod_1, FLAG_R }, 208 209 { "mpn_mod_1_1", speed_mpn_mod_1_1, FLAG_R_OPTIONAL }, 210 { "mpn_mod_1s_2", speed_mpn_mod_1_2, FLAG_R_OPTIONAL }, 211 { "mpn_mod_1s_3", speed_mpn_mod_1_3, FLAG_R_OPTIONAL }, 212 { "mpn_mod_1s_4", speed_mpn_mod_1_4, FLAG_R_OPTIONAL }, 213 214 { "mpn_divrem_1_div", speed_mpn_divrem_1_div, FLAG_R }, 215 { "mpn_divrem_1_inv", speed_mpn_divrem_1_inv, FLAG_R }, 216 { "mpn_divrem_1f_div", speed_mpn_divrem_1f_div, FLAG_R }, 217 { "mpn_divrem_1f_inv", speed_mpn_divrem_1f_inv, FLAG_R }, 218 { "mpn_mod_1_div", speed_mpn_mod_1_div, FLAG_R }, 219 { "mpn_mod_1_inv", speed_mpn_mod_1_inv, FLAG_R }, 220 221 { "mpn_divrem_2", speed_mpn_divrem_2, }, 222 { "mpn_divrem_2_div", speed_mpn_divrem_2_div, }, 223 { "mpn_divrem_2_inv", speed_mpn_divrem_2_inv, }, 224 225 { "mpn_divexact_1", speed_mpn_divexact_1, FLAG_R }, 226 { "mpn_divexact_by3", speed_mpn_divexact_by3 }, 227 228 { "mpn_bdiv_q_1", speed_mpn_bdiv_q_1, FLAG_R_OPTIONAL }, 229 { "mpn_pi1_bdiv_q_1", speed_mpn_pi1_bdiv_q_1, FLAG_R_OPTIONAL }, 230 { "mpn_bdiv_dbm1c", speed_mpn_bdiv_dbm1c, FLAG_R_OPTIONAL }, 231 232#if HAVE_NATIVE_mpn_modexact_1_odd 233 { "mpn_modexact_1_odd", speed_mpn_modexact_1_odd, FLAG_R }, 234#endif 235 { "mpn_modexact_1c_odd", speed_mpn_modexact_1c_odd, FLAG_R }, 236 237#if GMP_NUMB_BITS % 4 == 0 238 { "mpn_mod_34lsub1", speed_mpn_mod_34lsub1 }, 239#endif 240 241 { "mpn_lshift", speed_mpn_lshift, FLAG_R }, 242 { "mpn_lshiftc", speed_mpn_lshiftc, FLAG_R }, 243 { "mpn_rshift", speed_mpn_rshift, FLAG_R }, 244 245 { "mpn_and_n", speed_mpn_and_n, FLAG_R_OPTIONAL }, 246 { "mpn_andn_n", speed_mpn_andn_n, FLAG_R_OPTIONAL }, 247 { "mpn_nand_n", speed_mpn_nand_n, FLAG_R_OPTIONAL }, 248 { "mpn_ior_n", speed_mpn_ior_n, FLAG_R_OPTIONAL }, 249 { "mpn_iorn_n", speed_mpn_iorn_n, FLAG_R_OPTIONAL }, 250 { "mpn_nior_n", speed_mpn_nior_n, FLAG_R_OPTIONAL }, 251 { "mpn_xor_n", speed_mpn_xor_n, FLAG_R_OPTIONAL }, 252 { "mpn_xnor_n", speed_mpn_xnor_n, FLAG_R_OPTIONAL }, 253 { "mpn_com", speed_mpn_com }, 254 255 { "mpn_popcount", speed_mpn_popcount }, 256 { "mpn_hamdist", speed_mpn_hamdist }, 257 258 { "mpn_matrix22_mul", speed_mpn_matrix22_mul }, 259 260 { "mpn_hgcd", speed_mpn_hgcd }, 261 { "mpn_hgcd_lehmer", speed_mpn_hgcd_lehmer }, 262 263 { "mpn_gcd_1", speed_mpn_gcd_1, FLAG_R_OPTIONAL }, 264 { "mpn_gcd_1N", speed_mpn_gcd_1N, FLAG_R_OPTIONAL }, 265 266 { "mpn_gcd", speed_mpn_gcd }, 267#if 0 268 { "mpn_gcd_binary", speed_mpn_gcd_binary }, 269 { "mpn_gcd_accel", speed_mpn_gcd_accel }, 270 { "find_a", speed_find_a, FLAG_NODATA }, 271#endif 272 273 { "mpn_gcdext", speed_mpn_gcdext }, 274 { "mpn_gcdext_single", speed_mpn_gcdext_single }, 275 { "mpn_gcdext_double", speed_mpn_gcdext_double }, 276 { "mpn_gcdext_one_single", speed_mpn_gcdext_one_single }, 277 { "mpn_gcdext_one_double", speed_mpn_gcdext_one_double }, 278#if 0 279 { "mpn_gcdext_lehmer", speed_mpn_gcdext_lehmer }, 280#endif 281 { "mpz_jacobi", speed_mpz_jacobi }, 282 { "mpn_jacobi_base", speed_mpn_jacobi_base }, 283 { "mpn_jacobi_base_1", speed_mpn_jacobi_base_1 }, 284 { "mpn_jacobi_base_2", speed_mpn_jacobi_base_2 }, 285 { "mpn_jacobi_base_3", speed_mpn_jacobi_base_3 }, 286 287 { "mpn_mul", speed_mpn_mul, FLAG_R_OPTIONAL }, 288 { "mpn_mul_basecase", speed_mpn_mul_basecase,FLAG_R_OPTIONAL }, 289 { "mpn_sqr_basecase", speed_mpn_sqr_basecase }, 290#if HAVE_NATIVE_mpn_sqr_diagonal 291 { "mpn_sqr_diagonal", speed_mpn_sqr_diagonal }, 292#endif 293 294 { "mpn_mul_n", speed_mpn_mul_n }, 295 { "mpn_sqr", speed_mpn_sqr }, 296 297 { "mpn_toom2_sqr", speed_mpn_toom2_sqr }, 298 { "mpn_toom3_sqr", speed_mpn_toom3_sqr }, 299 { "mpn_toom4_sqr", speed_mpn_toom4_sqr }, 300 { "mpn_toom6_sqr", speed_mpn_toom6_sqr }, 301 { "mpn_toom8_sqr", speed_mpn_toom8_sqr }, 302 { "mpn_toom22_mul", speed_mpn_toom22_mul }, 303 { "mpn_toom33_mul", speed_mpn_toom33_mul }, 304 { "mpn_toom44_mul", speed_mpn_toom44_mul }, 305 { "mpn_toom6h_mul", speed_mpn_toom6h_mul }, 306 { "mpn_toom8h_mul", speed_mpn_toom8h_mul }, 307 { "mpn_toom32_mul", speed_mpn_toom32_mul }, 308 { "mpn_toom42_mul", speed_mpn_toom42_mul }, 309 { "mpn_toom43_mul", speed_mpn_toom43_mul }, 310 { "mpn_toom63_mul", speed_mpn_toom63_mul }, 311 { "mpn_nussbaumer_mul", speed_mpn_nussbaumer_mul }, 312 { "mpn_nussbaumer_mul_sqr",speed_mpn_nussbaumer_mul_sqr}, 313#if WANT_OLD_FFT_FULL 314 { "mpn_mul_fft_full", speed_mpn_mul_fft_full }, 315 { "mpn_mul_fft_full_sqr", speed_mpn_mul_fft_full_sqr }, 316#endif 317 { "mpn_mul_fft", speed_mpn_mul_fft, FLAG_R_OPTIONAL }, 318 { "mpn_mul_fft_sqr", speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL }, 319 320 { "mpn_mullo_n", speed_mpn_mullo_n }, 321 { "mpn_mullo_basecase", speed_mpn_mullo_basecase }, 322 323 { "mpn_bc_mulmod_bnm1", speed_mpn_bc_mulmod_bnm1 }, 324 { "mpn_mulmod_bnm1", speed_mpn_mulmod_bnm1 }, 325 { "mpn_mulmod_bnm1_rounded", speed_mpn_mulmod_bnm1_rounded }, 326 { "mpn_sqrmod_bnm1", speed_mpn_sqrmod_bnm1 }, 327 328 { "mpn_invert", speed_mpn_invert }, 329 { "mpn_invertappr", speed_mpn_invertappr }, 330 { "mpn_ni_invertappr", speed_mpn_ni_invertappr }, 331 { "mpn_binvert", speed_mpn_binvert }, 332 333 { "mpn_sbpi1_div_qr", speed_mpn_sbpi1_div_qr, FLAG_R_OPTIONAL}, 334 { "mpn_dcpi1_div_qr", speed_mpn_dcpi1_div_qr, FLAG_R_OPTIONAL}, 335 { "mpn_mu_div_qr", speed_mpn_mu_div_qr, FLAG_R_OPTIONAL}, 336 { "mpn_mupi_div_qr", speed_mpn_mupi_div_qr, FLAG_R_OPTIONAL}, 337 { "mpn_sbpi1_divappr_q", speed_mpn_sbpi1_divappr_q, FLAG_R_OPTIONAL}, 338 { "mpn_dcpi1_divappr_q", speed_mpn_dcpi1_divappr_q, FLAG_R_OPTIONAL}, 339 340 { "mpn_sbpi1_bdiv_qr", speed_mpn_sbpi1_bdiv_qr }, 341 { "mpn_dcpi1_bdiv_qr", speed_mpn_dcpi1_bdiv_qr }, 342 { "mpn_sbpi1_bdiv_q", speed_mpn_sbpi1_bdiv_q }, 343 { "mpn_dcpi1_bdiv_q", speed_mpn_dcpi1_bdiv_q }, 344 345 { "mpn_get_str", speed_mpn_get_str, FLAG_R_OPTIONAL }, 346 { "mpn_set_str", speed_mpn_set_str, FLAG_R_OPTIONAL }, 347 { "mpn_set_str_basecase", speed_mpn_bc_set_str, FLAG_R_OPTIONAL }, 348 349 { "mpn_sqrtrem", speed_mpn_sqrtrem }, 350 { "mpn_rootrem", speed_mpn_rootrem, FLAG_R }, 351 352 { "mpn_fib2_ui", speed_mpn_fib2_ui, FLAG_NODATA }, 353 { "mpz_fib_ui", speed_mpz_fib_ui, FLAG_NODATA }, 354 { "mpz_fib2_ui", speed_mpz_fib2_ui, FLAG_NODATA }, 355 { "mpz_lucnum_ui", speed_mpz_lucnum_ui, FLAG_NODATA }, 356 { "mpz_lucnum2_ui", speed_mpz_lucnum2_ui, FLAG_NODATA }, 357 358 { "mpz_add", speed_mpz_add }, 359 { "mpz_bin_uiui", speed_mpz_bin_uiui, FLAG_NODATA | FLAG_R_OPTIONAL }, 360 { "mpz_fac_ui", speed_mpz_fac_ui, FLAG_NODATA }, 361 { "mpz_powm", speed_mpz_powm }, 362 { "mpz_powm_mod", speed_mpz_powm_mod }, 363 { "mpz_powm_redc", speed_mpz_powm_redc }, 364 { "mpz_powm_ui", speed_mpz_powm_ui, FLAG_R_OPTIONAL }, 365 366 { "mpz_mod", speed_mpz_mod }, 367 { "mpn_redc_1", speed_mpn_redc_1 }, 368 { "mpn_redc_2", speed_mpn_redc_2 }, 369 { "mpn_redc_n", speed_mpn_redc_n }, 370 371 { "MPN_COPY", speed_MPN_COPY }, 372 { "MPN_COPY_INCR", speed_MPN_COPY_INCR }, 373 { "MPN_COPY_DECR", speed_MPN_COPY_DECR }, 374 { "memcpy", speed_memcpy }, 375#if HAVE_NATIVE_mpn_copyi 376 { "mpn_copyi", speed_mpn_copyi }, 377#endif 378#if HAVE_NATIVE_mpn_copyd 379 { "mpn_copyd", speed_mpn_copyd }, 380#endif 381#if HAVE_NATIVE_mpn_addlsh1_n 382 { "mpn_addlsh1_n", speed_mpn_addlsh1_n }, 383#endif 384#if HAVE_NATIVE_mpn_sublsh1_n 385 { "mpn_sublsh1_n", speed_mpn_sublsh1_n }, 386#endif 387#if HAVE_NATIVE_mpn_rsblsh1_n 388 { "mpn_rsblsh1_n", speed_mpn_rsblsh1_n }, 389#endif 390#if HAVE_NATIVE_mpn_addlsh2_n 391 { "mpn_addlsh2_n", speed_mpn_addlsh2_n }, 392#endif 393#if HAVE_NATIVE_mpn_sublsh2_n 394 { "mpn_sublsh2_n", speed_mpn_sublsh2_n }, 395#endif 396#if HAVE_NATIVE_mpn_rsblsh2_n 397 { "mpn_rsblsh2_n", speed_mpn_rsblsh2_n }, 398#endif 399#if HAVE_NATIVE_mpn_rsh1add_n 400 { "mpn_rsh1add_n", speed_mpn_rsh1add_n }, 401#endif 402#if HAVE_NATIVE_mpn_rsh1sub_n 403 { "mpn_rsh1sub_n", speed_mpn_rsh1sub_n }, 404#endif 405 406 { "MPN_ZERO", speed_MPN_ZERO }, 407 408 { "binvert_limb", speed_binvert_limb, FLAG_NODATA }, 409 { "binvert_limb_mul1", speed_binvert_limb_mul1, FLAG_NODATA }, 410 { "binvert_limb_loop", speed_binvert_limb_loop, FLAG_NODATA }, 411 { "binvert_limb_cond", speed_binvert_limb_cond, FLAG_NODATA }, 412 { "binvert_limb_arith", speed_binvert_limb_arith, FLAG_NODATA }, 413 414 { "malloc_free", speed_malloc_free }, 415 { "malloc_realloc_free", speed_malloc_realloc_free }, 416 { "gmp_allocate_free", speed_gmp_allocate_free }, 417 { "gmp_allocate_reallocate_free", speed_gmp_allocate_reallocate_free }, 418 { "mpz_init_clear", speed_mpz_init_clear }, 419 { "mpq_init_clear", speed_mpq_init_clear }, 420 { "mpf_init_clear", speed_mpf_init_clear }, 421 { "mpz_init_realloc_clear", speed_mpz_init_realloc_clear }, 422 423 { "umul_ppmm", speed_umul_ppmm, FLAG_R_OPTIONAL }, 424#if HAVE_NATIVE_mpn_umul_ppmm 425 { "mpn_umul_ppmm", speed_mpn_umul_ppmm, FLAG_R_OPTIONAL }, 426#endif 427#if HAVE_NATIVE_mpn_umul_ppmm_r 428 { "mpn_umul_ppmm_r", speed_mpn_umul_ppmm_r, FLAG_R_OPTIONAL }, 429#endif 430 431 { "count_leading_zeros", speed_count_leading_zeros, FLAG_NODATA | FLAG_R_OPTIONAL }, 432 { "count_trailing_zeros", speed_count_trailing_zeros, FLAG_NODATA | FLAG_R_OPTIONAL }, 433 434 { "udiv_qrnnd", speed_udiv_qrnnd, FLAG_R_OPTIONAL }, 435 { "udiv_qrnnd_preinv1", speed_udiv_qrnnd_preinv1, FLAG_R_OPTIONAL }, 436 { "udiv_qrnnd_preinv2", speed_udiv_qrnnd_preinv2, FLAG_R_OPTIONAL }, 437 { "udiv_qrnnd_c", speed_udiv_qrnnd_c, FLAG_R_OPTIONAL }, 438#if HAVE_NATIVE_mpn_udiv_qrnnd 439 { "mpn_udiv_qrnnd", speed_mpn_udiv_qrnnd, FLAG_R_OPTIONAL }, 440#endif 441#if HAVE_NATIVE_mpn_udiv_qrnnd_r 442 { "mpn_udiv_qrnnd_r", speed_mpn_udiv_qrnnd_r, FLAG_R_OPTIONAL }, 443#endif 444 { "invert_limb", speed_invert_limb, FLAG_R_OPTIONAL }, 445 446 { "operator_div", speed_operator_div, FLAG_R_OPTIONAL }, 447 { "operator_mod", speed_operator_mod, FLAG_R_OPTIONAL }, 448 449 { "gmp_randseed", speed_gmp_randseed, FLAG_R_OPTIONAL }, 450 { "gmp_randseed_ui", speed_gmp_randseed_ui, FLAG_R_OPTIONAL | FLAG_NODATA }, 451 { "mpz_urandomb", speed_mpz_urandomb, FLAG_R_OPTIONAL | FLAG_NODATA }, 452 453#ifdef SPEED_EXTRA_ROUTINES 454 SPEED_EXTRA_ROUTINES 455#endif 456#ifdef SPEED_EXTRA_ROUTINES2 457 SPEED_EXTRA_ROUTINES2 458#endif 459}; 460 461 462struct choice_t { 463 const struct routine_t *p; 464 mp_limb_t r; 465 double scale; 466 double time; 467 int no_time; 468 double prev_time; 469 const char *name; 470}; 471struct choice_t *choice; 472int num_choices = 0; 473 474 475void 476data_fill (mp_ptr ptr, mp_size_t size) 477{ 478 switch (option_data) { 479 case DATA_RANDOM: 480 mpn_random (ptr, size); 481 break; 482 case DATA_RANDOM2: 483 mpn_random2 (ptr, size); 484 break; 485 case DATA_ZEROS: 486 MPN_ZERO (ptr, size); 487 break; 488 case DATA_AAS: 489 MPN_FILL (ptr, size, GMP_NUMB_0xAA); 490 break; 491 case DATA_FFS: 492 MPN_FILL (ptr, size, GMP_NUMB_MAX); 493 break; 494 case DATA_2FD: 495 MPN_FILL (ptr, size, GMP_NUMB_MAX); 496 ptr[0] -= 2; 497 break; 498 default: 499 abort(); 500 /*NOTREACHED*/ 501 } 502} 503 504/* The code here handling the various combinations of output options isn't 505 too attractive, but it works and is fairly clean. */ 506 507#define SIZE_TO_DIVISOR(n) \ 508 (option_square == 1 ? (n)*(n) \ 509 : option_square == 2 ? (n)*((n)+1)/2 \ 510 : (n)) 511 512void 513run_one (FILE *fp, struct speed_params *s, mp_size_t prev_size) 514{ 515 const char *first_open_fastest, *first_open_notfastest, *first_close; 516 int i, fastest, want_data; 517 double fastest_time; 518 TMP_DECL; 519 520 TMP_MARK; 521 522 /* allocate data, unless all routines are NODATA */ 523 want_data = 0; 524 for (i = 0; i < num_choices; i++) 525 want_data |= ((choice[i].p->flag & FLAG_NODATA) == 0); 526 527 if (want_data) 528 { 529 SPEED_TMP_ALLOC_LIMBS (sp.xp, s->size, s->align_xp); 530 SPEED_TMP_ALLOC_LIMBS (sp.yp, s->size, s->align_yp); 531 532 data_fill (s->xp, s->size); 533 data_fill (s->yp, s->size); 534 } 535 else 536 { 537 sp.xp = NULL; 538 sp.yp = NULL; 539 } 540 541 if (prev_size == -1 && option_cmp == CMP_DIFFPREV) 542 { 543 first_open_fastest = "(#"; 544 first_open_notfastest = " ("; 545 first_close = ")"; 546 } 547 else 548 { 549 first_open_fastest = "#"; 550 first_open_notfastest = " "; 551 first_close = ""; 552 } 553 554 fastest = -1; 555 fastest_time = -1.0; 556 for (i = 0; i < num_choices; i++) 557 { 558 s->r = choice[i].r; 559 choice[i].time = speed_measure (choice[i].p->fun, s); 560 choice[i].no_time = (choice[i].time == -1.0); 561 if (! choice[i].no_time) 562 choice[i].time *= choice[i].scale; 563 564 /* Apply the effect of CMP_DIFFPREV, but the new choice[i].prev_time 565 is before any differences. */ 566 { 567 double t; 568 t = choice[i].time; 569 if (t != -1.0 && option_cmp == CMP_DIFFPREV && prev_size != -1) 570 { 571 if (choice[i].prev_time == -1.0) 572 choice[i].no_time = 1; 573 else 574 choice[i].time = choice[i].time - choice[i].prev_time; 575 } 576 choice[i].prev_time = t; 577 } 578 579 if (choice[i].no_time) 580 continue; 581 582 /* Look for the fastest after CMP_DIFFPREV has been applied, but 583 before CMP_RATIO or CMP_DIFFERENCE. There's only a fastest shown 584 if there's more than one routine. */ 585 if (num_choices > 1 && (fastest == -1 || choice[i].time < fastest_time)) 586 { 587 fastest = i; 588 fastest_time = choice[i].time; 589 } 590 591 if (option_cmp == CMP_DIFFPREV) 592 { 593 /* Conversion for UNIT_CYCLESPERLIMB differs in CMP_DIFFPREV. */ 594 if (option_unit == UNIT_CYCLES) 595 choice[i].time /= speed_cycletime; 596 else if (option_unit == UNIT_CYCLESPERLIMB) 597 { 598 if (prev_size == -1) 599 choice[i].time /= speed_cycletime; 600 else 601 choice[i].time /= (speed_cycletime 602 * (SIZE_TO_DIVISOR(s->size) 603 - SIZE_TO_DIVISOR(prev_size))); 604 } 605 } 606 else 607 { 608 if (option_unit == UNIT_CYCLES) 609 choice[i].time /= speed_cycletime; 610 else if (option_unit == UNIT_CYCLESPERLIMB) 611 choice[i].time /= (speed_cycletime * SIZE_TO_DIVISOR(s->size)); 612 613 if (option_cmp == CMP_RATIO && i > 0) 614 { 615 /* A ratio isn't affected by the units chosen. */ 616 if (choice[0].no_time || choice[0].time == 0.0) 617 choice[i].no_time = 1; 618 else 619 choice[i].time /= choice[0].time; 620 } 621 else if (option_cmp == CMP_DIFFERENCE && i > 0) 622 { 623 if (choice[0].no_time) 624 { 625 choice[i].no_time = 1; 626 continue; 627 } 628 choice[i].time -= choice[0].time; 629 } 630 } 631 } 632 633 if (option_gnuplot) 634 { 635 /* In CMP_DIFFPREV, don't print anything for the first size, start 636 with the second where an actual difference is available. 637 638 In CMP_RATIO, print the first column as 1.0. 639 640 The 9 decimals printed is much more than the expected precision of 641 the measurements actually. */ 642 643 if (! (option_cmp == CMP_DIFFPREV && prev_size == -1)) 644 { 645 fprintf (fp, "%-6ld ", s->size); 646 for (i = 0; i < num_choices; i++) 647 fprintf (fp, " %.9e", 648 choice[i].no_time ? 0.0 649 : (option_cmp == CMP_RATIO && i == 0) ? 1.0 650 : choice[i].time); 651 fprintf (fp, "\n"); 652 } 653 } 654 else 655 { 656 fprintf (fp, "%-6ld ", s->size); 657 for (i = 0; i < num_choices; i++) 658 { 659 char buf[128]; 660 int decimals; 661 662 if (choice[i].no_time) 663 { 664 fprintf (fp, " %*s", COLUMN_WIDTH, "n/a"); 665 } 666 else 667 {if (option_unit == UNIT_CYCLESPERLIMB 668 || (option_cmp == CMP_RATIO && i > 0)) 669 decimals = 4; 670 else if (option_unit == UNIT_CYCLES) 671 decimals = 2; 672 else 673 decimals = 9; 674 675 sprintf (buf, "%s%.*f%s", 676 i == fastest ? first_open_fastest : first_open_notfastest, 677 decimals, choice[i].time, first_close); 678 fprintf (fp, " %*s", COLUMN_WIDTH, buf); 679 } 680 } 681 fprintf (fp, "\n"); 682 } 683 684 TMP_FREE; 685} 686 687void 688run_all (FILE *fp) 689{ 690 mp_size_t prev_size; 691 int i; 692 TMP_DECL; 693 694 TMP_MARK; 695 SPEED_TMP_ALLOC_LIMBS (sp.xp_block, SPEED_BLOCK_SIZE, sp.align_xp); 696 SPEED_TMP_ALLOC_LIMBS (sp.yp_block, SPEED_BLOCK_SIZE, sp.align_yp); 697 698 data_fill (sp.xp_block, SPEED_BLOCK_SIZE); 699 data_fill (sp.yp_block, SPEED_BLOCK_SIZE); 700 701 for (i = 0; i < size_num; i++) 702 { 703 sp.size = size_array[i].start; 704 prev_size = -1; 705 for (;;) 706 { 707 mp_size_t step; 708 709 if (option_data == DATA_2FD && sp.size >= 2) 710 sp.xp[sp.size-1] = 2; 711 712 run_one (fp, &sp, prev_size); 713 prev_size = sp.size; 714 715 if (option_data == DATA_2FD && sp.size >= 2) 716 sp.xp[sp.size-1] = MP_LIMB_T_MAX; 717 718 if (option_factor != 0.0) 719 { 720 step = (mp_size_t) (sp.size * option_factor - sp.size); 721 if (step < 1) 722 step = 1; 723 } 724 else 725 step = 1; 726 if (step < option_step) 727 step = option_step; 728 729 sp.size += step; 730 if (sp.size > size_array[i].end) 731 break; 732 } 733 } 734 735 TMP_FREE; 736} 737 738 739FILE * 740fopen_for_write (const char *filename) 741{ 742 FILE *fp; 743 if ((fp = fopen (filename, "w")) == NULL) 744 { 745 fprintf (stderr, "Cannot create %s\n", filename); 746 exit(1); 747 } 748 return fp; 749} 750 751void 752fclose_written (FILE *fp, const char *filename) 753{ 754 int err; 755 756 err = ferror (fp); 757 err |= fclose (fp); 758 759 if (err) 760 { 761 fprintf (stderr, "Error writing %s\n", filename); 762 exit(1); 763 } 764} 765 766 767void 768run_gnuplot (int argc, char *argv[]) 769{ 770 char *plot_filename; 771 char *data_filename; 772 FILE *fp; 773 int i; 774 775 plot_filename = (char *) (*__gmp_allocate_func) 776 (strlen (option_gnuplot_basename) + 20); 777 data_filename = (char *) (*__gmp_allocate_func) 778 (strlen (option_gnuplot_basename) + 20); 779 780 sprintf (plot_filename, "%s.gnuplot", option_gnuplot_basename); 781 sprintf (data_filename, "%s.data", option_gnuplot_basename); 782 783 fp = fopen_for_write (plot_filename); 784 785 fprintf (fp, "# Generated with:\n"); 786 fprintf (fp, "#"); 787 for (i = 0; i < argc; i++) 788 fprintf (fp, " %s", argv[i]); 789 fprintf (fp, "\n"); 790 fprintf (fp, "\n"); 791 792 fprintf (fp, "reset\n"); 793 794 /* Putting the key at the top left is usually good, and you can change it 795 interactively if it's not. */ 796 fprintf (fp, "set key left\n"); 797 798 /* designed to make it possible to see crossovers easily */ 799 fprintf (fp, "set data style lines\n"); 800 801 fprintf (fp, "plot "); 802 for (i = 0; i < num_choices; i++) 803 { 804 fprintf (fp, " \"%s\" using 1:%d", data_filename, i+2); 805 fprintf (fp, " title \"%s\"", choice[i].name); 806 807 if (i != num_choices-1) 808 fprintf (fp, ", \\"); 809 fprintf (fp, "\n"); 810 } 811 812 fprintf (fp, "load \"-\"\n"); 813 fclose_written (fp, plot_filename); 814 815 fp = fopen_for_write (data_filename); 816 817 /* Unbuffered so you can see where the program was up to if it crashes or 818 you kill it. */ 819 setbuf (fp, NULL); 820 821 run_all (fp); 822 fclose_written (fp, data_filename); 823} 824 825 826/* Return a limb with n many one bits (starting from the least significant) */ 827 828#define LIMB_ONES(n) \ 829 ((n) == GMP_LIMB_BITS ? MP_LIMB_T_MAX \ 830 : (n) == 0 ? CNST_LIMB(0) \ 831 : (CNST_LIMB(1) << (n)) - 1) 832 833mp_limb_t 834r_string (const char *s) 835{ 836 const char *s_orig = s; 837 long n; 838 839 if (strcmp (s, "aas") == 0) 840 return GMP_NUMB_0xAA; 841 842 { 843 mpz_t z; 844 mp_limb_t l; 845 int set, siz; 846 847 mpz_init (z); 848 set = mpz_set_str (z, s, 0); 849 siz = SIZ(z); 850 l = (siz == 0 ? 0 : siz > 0 ? PTR(z)[0] : -PTR(z)[0]); 851 mpz_clear (z); 852 if (set == 0) 853 { 854 if (siz > 1 || siz < -1) 855 printf ("Warning, r parameter %s truncated to %d bits\n", 856 s_orig, GMP_LIMB_BITS); 857 return l; 858 } 859 } 860 861 if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) 862 n = strtoul (s+2, (char **) &s, 16); 863 else 864 n = strtol (s, (char **) &s, 10); 865 866 if (strcmp (s, "bits") == 0) 867 { 868 mp_limb_t l; 869 if (n > GMP_LIMB_BITS) 870 { 871 fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n", 872 n, GMP_LIMB_BITS); 873 exit (1); 874 } 875 mpn_random (&l, 1); 876 return (l | (CNST_LIMB(1) << (n-1))) & LIMB_ONES(n); 877 } 878 else if (strcmp (s, "ones") == 0) 879 { 880 if (n > GMP_LIMB_BITS) 881 { 882 fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n", 883 n, GMP_LIMB_BITS); 884 exit (1); 885 } 886 return LIMB_ONES (n); 887 } 888 else if (*s != '\0') 889 { 890 fprintf (stderr, "invalid r parameter: %s\n", s_orig); 891 exit (1); 892 } 893 894 return n; 895} 896 897 898void 899routine_find (struct choice_t *c, const char *s_orig) 900{ 901 const char *s; 902 int i; 903 size_t nlen; 904 905 c->name = s_orig; 906 s = strchr (s_orig, '*'); 907 if (s != NULL) 908 { 909 c->scale = atof(s_orig); 910 s++; 911 } 912 else 913 { 914 c->scale = 1.0; 915 s = s_orig; 916 } 917 918 for (i = 0; i < numberof (routine); i++) 919 { 920 nlen = strlen (routine[i].name); 921 if (memcmp (s, routine[i].name, nlen) != 0) 922 continue; 923 924 if (s[nlen] == '.') 925 { 926 /* match, with a .r parameter */ 927 928 if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL))) 929 { 930 fprintf (stderr, 931 "Choice %s bad: doesn't take a \".<r>\" parameter\n", 932 s_orig); 933 exit (1); 934 } 935 936 c->p = &routine[i]; 937 c->r = r_string (s + nlen + 1); 938 return; 939 } 940 941 if (s[nlen] == '\0') 942 { 943 /* match, with no parameter */ 944 945 if (routine[i].flag & FLAG_R) 946 { 947 fprintf (stderr, 948 "Choice %s bad: needs a \".<r>\" parameter\n", 949 s_orig); 950 exit (1); 951 } 952 953 c->p = &routine[i]; 954 c->r = 0; 955 return; 956 } 957 } 958 959 fprintf (stderr, "Choice %s unrecognised\n", s_orig); 960 exit (1); 961} 962 963 964void 965usage (void) 966{ 967 int i; 968 969 speed_time_init (); 970 971 printf ("Usage: speed [-options] -s size <routine>...\n"); 972 printf ("Measure the speed of some routines.\n"); 973 printf ("Times are in seconds, accuracy is shown.\n"); 974 printf ("\n"); 975 printf (" -p num set precision as number of time units each routine must run\n"); 976 printf (" -s size[-end][,size[-end]]... sizes to measure\n"); 977 printf (" single sizes or ranges, sep with comma or use multiple -s\n"); 978 printf (" -t step step through sizes by given amount\n"); 979 printf (" -f factor step through sizes by given factor (eg. 1.05)\n"); 980 printf (" -r show times as ratios of the first routine\n"); 981 printf (" -d show times as difference from the first routine\n"); 982 printf (" -D show times as difference from previous size shown\n"); 983 printf (" -c show times in CPU cycles\n"); 984 printf (" -C show times in cycles per limb\n"); 985 printf (" -u print resource usage (memory) at end\n"); 986 printf (" -P name output plot files \"name.gnuplot\" and \"name.data\"\n"); 987 printf (" -a <type> use given data: random(default), random2, zeros, aas, ffs, 2fd\n"); 988 printf (" -x, -y, -w, -W <align> specify data alignments, sources and dests\n"); 989 printf (" -o addrs print addresses of data blocks\n"); 990 printf ("\n"); 991 printf ("If both -t and -f are used, it means step by the factor or the step, whichever\n"); 992 printf ("is greater.\n"); 993 printf ("If both -C and -D are used, it means cycles per however many limbs between a\n"); 994 printf ("size and the previous size.\n"); 995 printf ("\n"); 996 printf ("After running with -P, plots can be viewed with Gnuplot or Quickplot.\n"); 997 printf ("\"gnuplot name.gnuplot\" (use \"set logscale xy; replot\" at the prompt for\n"); 998 printf ("a log/log plot).\n"); 999 printf ("\"quickplot -s name.data\" (has interactive zooming, and note -s is important\n"); 1000 printf ("when viewing more than one routine, it means same axis scales for all data).\n"); 1001 printf ("\n"); 1002 printf ("The available routines are as follows.\n"); 1003 printf ("\n"); 1004 1005 for (i = 0; i < numberof (routine); i++) 1006 { 1007 if (routine[i].flag & FLAG_R) 1008 printf ("\t%s.r\n", routine[i].name); 1009 else if (routine[i].flag & FLAG_R_OPTIONAL) 1010 printf ("\t%s (optional .r)\n", routine[i].name); 1011 else 1012 printf ("\t%s\n", routine[i].name); 1013 } 1014 printf ("\n"); 1015 printf ("Routines with a \".r\" need an extra parameter, for example mpn_lshift.6\n"); 1016 printf ("r should be in decimal, or use 0xN for hexadecimal.\n"); 1017 printf ("\n"); 1018 printf ("Special forms for r are \"<N>bits\" for a random N bit number, \"<N>ones\" for\n"); 1019 printf ("N one bits, or \"aas\" for 0xAA..AA.\n"); 1020 printf ("\n"); 1021 printf ("Times for sizes out of the range accepted by a routine are shown as 0.\n"); 1022 printf ("The fastest routine at each size is marked with a # (free form output only).\n"); 1023 printf ("\n"); 1024 printf ("%s", speed_time_string); 1025 printf ("\n"); 1026 printf ("Gnuplot home page http://www.gnuplot.info/\n"); 1027 printf ("Quickplot home page http://quickplot.sourceforge.net/\n"); 1028} 1029 1030void 1031check_align_option (const char *name, mp_size_t align) 1032{ 1033 if (align < 0 || align > SPEED_TMP_ALLOC_ADJUST_MASK) 1034 { 1035 fprintf (stderr, "Alignment request out of range: %s %ld\n", 1036 name, (long) align); 1037 fprintf (stderr, " should be 0 to %d (limbs), inclusive\n", 1038 SPEED_TMP_ALLOC_ADJUST_MASK); 1039 exit (1); 1040 } 1041} 1042 1043int 1044main (int argc, char *argv[]) 1045{ 1046 int i; 1047 int opt; 1048 1049 /* Unbuffered so output goes straight out when directed to a pipe or file 1050 and isn't lost on killing the program half way. */ 1051 setbuf (stdout, NULL); 1052 1053 for (;;) 1054 { 1055 opt = getopt(argc, argv, "a:CcDdEFf:o:p:P:rRs:t:ux:y:w:W:z"); 1056 if (opt == EOF) 1057 break; 1058 1059 switch (opt) { 1060 case 'a': 1061 if (strcmp (optarg, "random") == 0) option_data = DATA_RANDOM; 1062 else if (strcmp (optarg, "random2") == 0) option_data = DATA_RANDOM2; 1063 else if (strcmp (optarg, "zeros") == 0) option_data = DATA_ZEROS; 1064 else if (strcmp (optarg, "aas") == 0) option_data = DATA_AAS; 1065 else if (strcmp (optarg, "ffs") == 0) option_data = DATA_FFS; 1066 else if (strcmp (optarg, "2fd") == 0) option_data = DATA_2FD; 1067 else 1068 { 1069 fprintf (stderr, "unrecognised data option: %s\n", optarg); 1070 exit (1); 1071 } 1072 break; 1073 case 'C': 1074 if (option_unit != UNIT_SECONDS) goto bad_unit; 1075 option_unit = UNIT_CYCLESPERLIMB; 1076 break; 1077 case 'c': 1078 if (option_unit != UNIT_SECONDS) 1079 { 1080 bad_unit: 1081 fprintf (stderr, "cannot use more than one of -c, -C\n"); 1082 exit (1); 1083 } 1084 option_unit = UNIT_CYCLES; 1085 break; 1086 case 'D': 1087 if (option_cmp != CMP_ABSOLUTE) goto bad_cmp; 1088 option_cmp = CMP_DIFFPREV; 1089 break; 1090 case 'd': 1091 if (option_cmp != CMP_ABSOLUTE) 1092 { 1093 bad_cmp: 1094 fprintf (stderr, "cannot use more than one of -d, -D, -r\n"); 1095 exit (1); 1096 } 1097 option_cmp = CMP_DIFFERENCE; 1098 break; 1099 case 'E': 1100 option_square = 1; 1101 break; 1102 case 'F': 1103 option_square = 2; 1104 break; 1105 case 'f': 1106 option_factor = atof (optarg); 1107 if (option_factor <= 1.0) 1108 { 1109 fprintf (stderr, "-f factor must be > 1.0\n"); 1110 exit (1); 1111 } 1112 break; 1113 case 'o': 1114 speed_option_set (optarg); 1115 break; 1116 case 'P': 1117 option_gnuplot = 1; 1118 option_gnuplot_basename = optarg; 1119 break; 1120 case 'p': 1121 speed_precision = atoi (optarg); 1122 break; 1123 case 'R': 1124 option_seed = time (NULL); 1125 break; 1126 case 'r': 1127 if (option_cmp != CMP_ABSOLUTE) 1128 goto bad_cmp; 1129 option_cmp = CMP_RATIO; 1130 break; 1131 case 's': 1132 { 1133 char *s; 1134 for (s = strtok (optarg, ","); s != NULL; s = strtok (NULL, ",")) 1135 { 1136 if (size_num == size_allocnum) 1137 { 1138 size_array = (struct size_array_t *) 1139 __gmp_allocate_or_reallocate 1140 (size_array, 1141 size_allocnum * sizeof(size_array[0]), 1142 (size_allocnum+10) * sizeof(size_array[0])); 1143 size_allocnum += 10; 1144 } 1145 if (sscanf (s, "%ld-%ld", 1146 &size_array[size_num].start, 1147 &size_array[size_num].end) != 2) 1148 { 1149 size_array[size_num].start = size_array[size_num].end 1150 = atol (s); 1151 } 1152 1153 if (size_array[size_num].start < 0 1154 || size_array[size_num].end < 0 1155 || size_array[size_num].start > size_array[size_num].end) 1156 { 1157 fprintf (stderr, "invalid size parameter: %s\n", s); 1158 exit (1); 1159 } 1160 1161 size_num++; 1162 } 1163 } 1164 break; 1165 case 't': 1166 option_step = atol (optarg); 1167 if (option_step < 1) 1168 { 1169 fprintf (stderr, "-t step must be >= 1\n"); 1170 exit (1); 1171 } 1172 break; 1173 case 'u': 1174 option_resource_usage = 1; 1175 break; 1176 case 'z': 1177 sp.cache = 1; 1178 break; 1179 case 'x': 1180 sp.align_xp = atol (optarg); 1181 check_align_option ("-x", sp.align_xp); 1182 break; 1183 case 'y': 1184 sp.align_yp = atol (optarg); 1185 check_align_option ("-y", sp.align_yp); 1186 break; 1187 case 'w': 1188 sp.align_wp = atol (optarg); 1189 check_align_option ("-w", sp.align_wp); 1190 break; 1191 case 'W': 1192 sp.align_wp2 = atol (optarg); 1193 check_align_option ("-W", sp.align_wp2); 1194 break; 1195 case '?': 1196 exit(1); 1197 } 1198 } 1199 1200 if (optind >= argc) 1201 { 1202 usage (); 1203 exit (1); 1204 } 1205 1206 if (size_num == 0) 1207 { 1208 fprintf (stderr, "-s <size> must be specified\n"); 1209 exit (1); 1210 } 1211 1212 gmp_randinit_default (__gmp_rands); 1213 __gmp_rands_initialized = 1; 1214 gmp_randseed_ui (__gmp_rands, option_seed); 1215 1216 choice = (struct choice_t *) (*__gmp_allocate_func) 1217 ((argc - optind) * sizeof(choice[0])); 1218 for ( ; optind < argc; optind++) 1219 { 1220 struct choice_t c; 1221 routine_find (&c, argv[optind]); 1222 choice[num_choices] = c; 1223 num_choices++; 1224 } 1225 1226 if ((option_cmp == CMP_RATIO || option_cmp == CMP_DIFFERENCE) && 1227 num_choices < 2) 1228 { 1229 fprintf (stderr, "WARNING, -d or -r does nothing when only one routine requested\n"); 1230 } 1231 1232 speed_time_init (); 1233 if (option_unit == UNIT_CYCLES || option_unit == UNIT_CYCLESPERLIMB) 1234 speed_cycletime_need_cycles (); 1235 else 1236 speed_cycletime_need_seconds (); 1237 1238 if (option_gnuplot) 1239 { 1240 run_gnuplot (argc, argv); 1241 } 1242 else 1243 { 1244 if (option_unit == UNIT_SECONDS) 1245 printf ("overhead %.9f secs", speed_measure (speed_noop, NULL)); 1246 else 1247 printf ("overhead %.2f cycles", 1248 speed_measure (speed_noop, NULL) / speed_cycletime); 1249 printf (", precision %d units of %.2e secs", 1250 speed_precision, speed_unittime); 1251 1252 if (speed_cycletime == 1.0 || speed_cycletime == 0.0) 1253 printf (", CPU freq unknown\n"); 1254 else 1255 printf (", CPU freq %.2f MHz\n", 1e-6/speed_cycletime); 1256 1257 printf (" "); 1258 for (i = 0; i < num_choices; i++) 1259 printf (" %*s", COLUMN_WIDTH, choice[i].name); 1260 printf ("\n"); 1261 1262 run_all (stdout); 1263 } 1264 1265 if (option_resource_usage) 1266 { 1267#if HAVE_GETRUSAGE 1268 { 1269 /* This doesn't give data sizes on linux 2.0.x, only utime. */ 1270 struct rusage r; 1271 if (getrusage (RUSAGE_SELF, &r) != 0) 1272 perror ("getrusage"); 1273 else 1274 printf ("getrusage(): utime %ld.%06ld data %ld stack %ld maxresident %ld\n", 1275 r.ru_utime.tv_sec, r.ru_utime.tv_usec, 1276 r.ru_idrss, r.ru_isrss, r.ru_ixrss); 1277 } 1278#else 1279 printf ("getrusage() not available\n"); 1280#endif 1281 1282 /* Linux kernel. */ 1283 { 1284 char buf[128]; 1285 sprintf (buf, "/proc/%d/status", getpid()); 1286 if (access (buf, R_OK) == 0) 1287 { 1288 sprintf (buf, "cat /proc/%d/status", getpid()); 1289 system (buf); 1290 } 1291 1292 } 1293 } 1294 1295 return 0; 1296} 1297