1/* Speed measuring program. 2 3Copyright 1999-2003, 2005, 2006, 2008-2019 Free Software Foundation, Inc. 4 5This file is part of the GNU MP Library. 6 7The GNU MP Library is free software; you can redistribute it and/or modify 8it under the terms of either: 9 10 * the GNU Lesser General Public License as published by the Free 11 Software Foundation; either version 3 of the License, or (at your 12 option) any later version. 13 14or 15 16 * the GNU General Public License as published by the Free Software 17 Foundation; either version 2 of the License, or (at your option) any 18 later version. 19 20or both in parallel, as here. 21 22The GNU MP Library is distributed in the hope that it will be useful, but 23WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25for more details. 26 27You should have received copies of the GNU General Public License and the 28GNU Lesser General Public License along with the GNU MP Library. If not, 29see https://www.gnu.org/licenses/. */ 30 31/* Usage message is in the code below, run with no arguments to print it. 32 See README for interesting applications. 33 34 To add a new routine foo(), create a speed_foo() function in the style of 35 the existing ones and add an entry in the routine[] array. Put FLAG_R if 36 speed_foo() wants an "r" parameter. 37 38 The routines don't have help messages or descriptions, but most have 39 suggestive names. See the source code for full details. */ 40 41#include "config.h" 42 43#include <limits.h> 44#include <stdio.h> 45#include <stdlib.h> 46#include <string.h> 47 48#if HAVE_UNISTD_H 49#include <unistd.h> /* for getpid, R_OK */ 50#endif 51 52#if TIME_WITH_SYS_TIME 53# include <sys/time.h> /* for struct timeval */ 54# include <time.h> 55#else 56# if HAVE_SYS_TIME_H 57# include <sys/time.h> 58# else 59# include <time.h> 60# endif 61#endif 62 63#if HAVE_SYS_RESOURCE_H 64#include <sys/resource.h> /* for getrusage() */ 65#endif 66 67 68#include "gmp-impl.h" 69#include "longlong.h" /* for the benefit of speed-many.c */ 70#include "tests.h" 71#include "speed.h" 72 73 74#if !HAVE_DECL_OPTARG 75extern char *optarg; 76extern int optind, opterr; 77#endif 78 79#if !HAVE_STRTOUL 80#define strtoul(p,e,b) (unsigned long) strtol(p,e,b) 81#endif 82 83#ifdef SPEED_EXTRA_PROTOS 84SPEED_EXTRA_PROTOS 85#endif 86#ifdef SPEED_EXTRA_PROTOS2 87SPEED_EXTRA_PROTOS2 88#endif 89 90 91#if GMP_LIMB_BITS == 32 92#define GMP_NUMB_0xAA (CNST_LIMB(0xAAAAAAAA) & GMP_NUMB_MASK) 93#endif 94#if GMP_LIMB_BITS == 64 95#define GMP_NUMB_0xAA (CNST_LIMB(0xAAAAAAAAAAAAAAAA) & GMP_NUMB_MASK) 96#endif 97 98 99#define CMP_ABSOLUTE 1 100#define CMP_RATIO 2 101#define CMP_DIFFERENCE 3 102#define CMP_DIFFPREV 4 103int option_cmp = CMP_ABSOLUTE; 104 105#define UNIT_SECONDS 1 106#define UNIT_CYCLES 2 107#define UNIT_CYCLESPERLIMB 3 108int option_unit = UNIT_SECONDS; 109 110#define DATA_RANDOM 1 111#define DATA_RANDOM2 2 112#define DATA_ZEROS 3 113#define DATA_AAS 4 114#define DATA_FFS 5 115#define DATA_2FD 6 116int option_data = DATA_RANDOM; 117 118int option_square = 0; 119double option_factor = 0.0; 120mp_size_t option_step = 1; 121int option_gnuplot = 0; 122char *option_gnuplot_basename; 123struct size_array_t { 124 mp_size_t start, end; 125} *size_array = NULL; 126mp_size_t size_num = 0; 127mp_size_t size_allocnum = 0; 128int option_resource_usage = 0; 129long option_seed = 123456789; 130 131struct speed_params sp; 132 133#define COLUMN_WIDTH 13 /* for the free-form output */ 134 135#define FLAG_R (1<<0) /* require ".r" */ 136#define FLAG_R_OPTIONAL (1<<1) /* optional ".r" */ 137#define FLAG_RSIZE (1<<2) 138#define FLAG_NODATA (1<<3) /* don't alloc xp, yp */ 139 140const struct routine_t { 141 /* constants */ 142 const char *name; 143 speed_function_t fun; 144 int flag; 145} routine[] = { 146 147 { "noop", speed_noop }, 148 { "noop_wxs", speed_noop_wxs }, 149 { "noop_wxys", speed_noop_wxys }, 150 151 { "mpn_add_n", speed_mpn_add_n, FLAG_R_OPTIONAL }, 152 { "mpn_sub_n", speed_mpn_sub_n, FLAG_R_OPTIONAL }, 153 { "mpn_add_1", speed_mpn_add_1, FLAG_R }, 154 { "mpn_add_1_inplace", speed_mpn_add_1_inplace, FLAG_R }, 155 { "mpn_sub_1", speed_mpn_sub_1, FLAG_R }, 156 { "mpn_sub_1_inplace", speed_mpn_sub_1_inplace, FLAG_R }, 157 158 { "mpn_add_err1_n", speed_mpn_add_err1_n }, 159 { "mpn_add_err2_n", speed_mpn_add_err2_n }, 160 { "mpn_add_err3_n", speed_mpn_add_err3_n }, 161 { "mpn_sub_err1_n", speed_mpn_sub_err1_n }, 162 { "mpn_sub_err2_n", speed_mpn_sub_err2_n }, 163 { "mpn_sub_err3_n", speed_mpn_sub_err3_n }, 164 165#if HAVE_NATIVE_mpn_add_n_sub_n 166 { "mpn_add_n_sub_n", speed_mpn_add_n_sub_n, FLAG_R_OPTIONAL }, 167#endif 168 169 { "mpn_addmul_1", speed_mpn_addmul_1, FLAG_R }, 170 { "mpn_submul_1", speed_mpn_submul_1, FLAG_R }, 171#if HAVE_NATIVE_mpn_addmul_2 172 { "mpn_addmul_2", speed_mpn_addmul_2, FLAG_R_OPTIONAL }, 173#endif 174#if HAVE_NATIVE_mpn_addmul_3 175 { "mpn_addmul_3", speed_mpn_addmul_3, FLAG_R_OPTIONAL }, 176#endif 177#if HAVE_NATIVE_mpn_addmul_4 178 { "mpn_addmul_4", speed_mpn_addmul_4, FLAG_R_OPTIONAL }, 179#endif 180#if HAVE_NATIVE_mpn_addmul_5 181 { "mpn_addmul_5", speed_mpn_addmul_5, FLAG_R_OPTIONAL }, 182#endif 183#if HAVE_NATIVE_mpn_addmul_6 184 { "mpn_addmul_6", speed_mpn_addmul_6, FLAG_R_OPTIONAL }, 185#endif 186#if HAVE_NATIVE_mpn_addmul_7 187 { "mpn_addmul_7", speed_mpn_addmul_7, FLAG_R_OPTIONAL }, 188#endif 189#if HAVE_NATIVE_mpn_addmul_8 190 { "mpn_addmul_8", speed_mpn_addmul_8, FLAG_R_OPTIONAL }, 191#endif 192 { "mpn_mul_1", speed_mpn_mul_1, FLAG_R }, 193 { "mpn_mul_1_inplace", speed_mpn_mul_1_inplace, FLAG_R }, 194#if HAVE_NATIVE_mpn_mul_2 195 { "mpn_mul_2", speed_mpn_mul_2, FLAG_R_OPTIONAL }, 196#endif 197#if HAVE_NATIVE_mpn_mul_3 198 { "mpn_mul_3", speed_mpn_mul_3, FLAG_R_OPTIONAL }, 199#endif 200#if HAVE_NATIVE_mpn_mul_4 201 { "mpn_mul_4", speed_mpn_mul_4, FLAG_R_OPTIONAL }, 202#endif 203#if HAVE_NATIVE_mpn_mul_5 204 { "mpn_mul_5", speed_mpn_mul_5, FLAG_R_OPTIONAL }, 205#endif 206#if HAVE_NATIVE_mpn_mul_6 207 { "mpn_mul_6", speed_mpn_mul_6, FLAG_R_OPTIONAL }, 208#endif 209 210 { "mpn_divrem_1", speed_mpn_divrem_1, FLAG_R }, 211 { "mpn_divrem_1f", speed_mpn_divrem_1f, FLAG_R }, 212#if HAVE_NATIVE_mpn_divrem_1c 213 { "mpn_divrem_1c", speed_mpn_divrem_1c, FLAG_R }, 214 { "mpn_divrem_1cf", speed_mpn_divrem_1cf,FLAG_R }, 215#endif 216 { "mpn_mod_1", speed_mpn_mod_1, FLAG_R }, 217#if HAVE_NATIVE_mpn_mod_1c 218 { "mpn_mod_1c", speed_mpn_mod_1c, FLAG_R }, 219#endif 220 { "mpn_preinv_divrem_1", speed_mpn_preinv_divrem_1, FLAG_R }, 221 { "mpn_preinv_divrem_1f", speed_mpn_preinv_divrem_1f, FLAG_R }, 222 { "mpn_preinv_mod_1", speed_mpn_preinv_mod_1, FLAG_R }, 223 224 { "mpn_mod_1_1", speed_mpn_mod_1_1, FLAG_R }, 225 { "mpn_mod_1_1_1", speed_mpn_mod_1_1_1, FLAG_R }, 226 { "mpn_mod_1_1_2", speed_mpn_mod_1_1_2, FLAG_R }, 227 { "mpn_mod_1s_2", speed_mpn_mod_1_2, FLAG_R }, 228 { "mpn_mod_1s_3", speed_mpn_mod_1_3, FLAG_R }, 229 { "mpn_mod_1s_4", speed_mpn_mod_1_4, FLAG_R }, 230 231 { "mpn_divrem_1_div", speed_mpn_divrem_1_div, FLAG_R }, 232 { "mpn_divrem_1_inv", speed_mpn_divrem_1_inv, FLAG_R }, 233 { "mpn_divrem_1f_div", speed_mpn_divrem_1f_div, FLAG_R }, 234 { "mpn_divrem_1f_inv", speed_mpn_divrem_1f_inv, FLAG_R }, 235 { "mpn_mod_1_div", speed_mpn_mod_1_div, FLAG_R }, 236 { "mpn_mod_1_inv", speed_mpn_mod_1_inv, FLAG_R }, 237 238 { "mpn_divrem_2", speed_mpn_divrem_2, }, 239 { "mpn_divrem_2_div", speed_mpn_divrem_2_div, }, 240 { "mpn_divrem_2_inv", speed_mpn_divrem_2_inv, }, 241 242 { "mpn_div_qr_1n_pi1", speed_mpn_div_qr_1n_pi1, FLAG_R }, 243 { "mpn_div_qr_1n_pi1_1",speed_mpn_div_qr_1n_pi1_1, FLAG_R }, 244 { "mpn_div_qr_1n_pi1_2",speed_mpn_div_qr_1n_pi1_2, FLAG_R }, 245 { "mpn_div_qr_1", speed_mpn_div_qr_1, FLAG_R }, 246 247 { "mpn_div_qr_2n", speed_mpn_div_qr_2n, }, 248 { "mpn_div_qr_2u", speed_mpn_div_qr_2u, }, 249 250 { "mpn_divexact_1", speed_mpn_divexact_1, FLAG_R }, 251 { "mpn_divexact_by3", speed_mpn_divexact_by3 }, 252 253 { "mpn_bdiv_q_1", speed_mpn_bdiv_q_1, FLAG_R }, 254 { "mpn_pi1_bdiv_q_1", speed_mpn_pi1_bdiv_q_1, FLAG_R_OPTIONAL }, 255 { "mpn_bdiv_dbm1c", speed_mpn_bdiv_dbm1c, FLAG_R_OPTIONAL }, 256 257#if HAVE_NATIVE_mpn_modexact_1_odd 258 { "mpn_modexact_1_odd", speed_mpn_modexact_1_odd, FLAG_R }, 259#endif 260 { "mpn_modexact_1c_odd", speed_mpn_modexact_1c_odd, FLAG_R }, 261 262#if GMP_NUMB_BITS % 4 == 0 263 { "mpn_mod_34lsub1", speed_mpn_mod_34lsub1 }, 264#endif 265 266 { "mpn_lshift", speed_mpn_lshift, FLAG_R }, 267 { "mpn_lshiftc", speed_mpn_lshiftc, FLAG_R }, 268 { "mpn_rshift", speed_mpn_rshift, FLAG_R }, 269 270 { "mpn_and_n", speed_mpn_and_n, FLAG_R_OPTIONAL }, 271 { "mpn_andn_n", speed_mpn_andn_n, FLAG_R_OPTIONAL }, 272 { "mpn_nand_n", speed_mpn_nand_n, FLAG_R_OPTIONAL }, 273 { "mpn_ior_n", speed_mpn_ior_n, FLAG_R_OPTIONAL }, 274 { "mpn_iorn_n", speed_mpn_iorn_n, FLAG_R_OPTIONAL }, 275 { "mpn_nior_n", speed_mpn_nior_n, FLAG_R_OPTIONAL }, 276 { "mpn_xor_n", speed_mpn_xor_n, FLAG_R_OPTIONAL }, 277 { "mpn_xnor_n", speed_mpn_xnor_n, FLAG_R_OPTIONAL }, 278 { "mpn_com", speed_mpn_com }, 279 { "mpn_neg", speed_mpn_neg }, 280 281 { "mpn_popcount", speed_mpn_popcount }, 282 { "mpn_hamdist", speed_mpn_hamdist }, 283 284 { "mpn_matrix22_mul", speed_mpn_matrix22_mul }, 285 286 { "mpn_hgcd2", speed_mpn_hgcd2, FLAG_NODATA }, 287 { "mpn_hgcd2_1", speed_mpn_hgcd2_1, FLAG_NODATA }, 288 { "mpn_hgcd2_2", speed_mpn_hgcd2_2, FLAG_NODATA }, 289 { "mpn_hgcd2_3", speed_mpn_hgcd2_3, FLAG_NODATA }, 290 { "mpn_hgcd2_4", speed_mpn_hgcd2_4, FLAG_NODATA }, 291 { "mpn_hgcd2_5", speed_mpn_hgcd2_5, FLAG_NODATA }, 292 { "mpn_hgcd", speed_mpn_hgcd }, 293 { "mpn_hgcd_lehmer", speed_mpn_hgcd_lehmer }, 294 { "mpn_hgcd_appr", speed_mpn_hgcd_appr }, 295 { "mpn_hgcd_appr_lehmer", speed_mpn_hgcd_appr_lehmer }, 296 297 { "mpn_hgcd_reduce", speed_mpn_hgcd_reduce }, 298 { "mpn_hgcd_reduce_1", speed_mpn_hgcd_reduce_1 }, 299 { "mpn_hgcd_reduce_2", speed_mpn_hgcd_reduce_2 }, 300 301 { "mpn_gcd_1", speed_mpn_gcd_1, FLAG_R_OPTIONAL }, 302 { "mpn_gcd_11", speed_mpn_gcd_11, FLAG_R_OPTIONAL }, 303 { "mpn_gcd_1N", speed_mpn_gcd_1N, FLAG_R_OPTIONAL }, 304 { "mpn_gcd_22", speed_mpn_gcd_22, FLAG_R_OPTIONAL }, 305 306 { "mpn_gcd", speed_mpn_gcd }, 307 308 { "mpn_gcdext", speed_mpn_gcdext }, 309 { "mpn_gcdext_single", speed_mpn_gcdext_single }, 310 { "mpn_gcdext_double", speed_mpn_gcdext_double }, 311 { "mpn_gcdext_one_single", speed_mpn_gcdext_one_single }, 312 { "mpn_gcdext_one_double", speed_mpn_gcdext_one_double }, 313#if 0 314 { "mpn_gcdext_lehmer", speed_mpn_gcdext_lehmer }, 315#endif 316 317 { "mpz_nextprime", speed_mpz_nextprime }, 318 319 { "mpz_jacobi", speed_mpz_jacobi }, 320 { "mpn_jacobi_base", speed_mpn_jacobi_base }, 321 { "mpn_jacobi_base_1", speed_mpn_jacobi_base_1 }, 322 { "mpn_jacobi_base_2", speed_mpn_jacobi_base_2 }, 323 { "mpn_jacobi_base_3", speed_mpn_jacobi_base_3 }, 324 { "mpn_jacobi_base_4", speed_mpn_jacobi_base_4 }, 325 326 { "mpn_mul", speed_mpn_mul, FLAG_R_OPTIONAL }, 327 { "mpn_mul_basecase", speed_mpn_mul_basecase,FLAG_R_OPTIONAL }, 328 { "mpn_sqr_basecase", speed_mpn_sqr_basecase }, 329#if HAVE_NATIVE_mpn_sqr_diagonal 330 { "mpn_sqr_diagonal", speed_mpn_sqr_diagonal }, 331#endif 332#if HAVE_NATIVE_mpn_sqr_diag_addlsh1 333 { "mpn_sqr_diag_addlsh1", speed_mpn_sqr_diag_addlsh1 }, 334#endif 335 336 { "mpn_mul_n", speed_mpn_mul_n }, 337 { "mpn_sqr", speed_mpn_sqr }, 338 339 { "mpn_toom2_sqr", speed_mpn_toom2_sqr }, 340 { "mpn_toom3_sqr", speed_mpn_toom3_sqr }, 341 { "mpn_toom4_sqr", speed_mpn_toom4_sqr }, 342 { "mpn_toom6_sqr", speed_mpn_toom6_sqr }, 343 { "mpn_toom8_sqr", speed_mpn_toom8_sqr }, 344 { "mpn_toom22_mul", speed_mpn_toom22_mul }, 345 { "mpn_toom33_mul", speed_mpn_toom33_mul }, 346 { "mpn_toom44_mul", speed_mpn_toom44_mul }, 347 { "mpn_toom6h_mul", speed_mpn_toom6h_mul }, 348 { "mpn_toom8h_mul", speed_mpn_toom8h_mul }, 349 { "mpn_toom32_mul", speed_mpn_toom32_mul }, 350 { "mpn_toom42_mul", speed_mpn_toom42_mul }, 351 { "mpn_toom43_mul", speed_mpn_toom43_mul }, 352 { "mpn_toom63_mul", speed_mpn_toom63_mul }, 353 { "mpn_nussbaumer_mul", speed_mpn_nussbaumer_mul }, 354 { "mpn_nussbaumer_mul_sqr",speed_mpn_nussbaumer_mul_sqr}, 355#if WANT_OLD_FFT_FULL 356 { "mpn_mul_fft_full", speed_mpn_mul_fft_full }, 357 { "mpn_mul_fft_full_sqr", speed_mpn_mul_fft_full_sqr }, 358#endif 359 { "mpn_mul_fft", speed_mpn_mul_fft, FLAG_R_OPTIONAL }, 360 { "mpn_mul_fft_sqr", speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL }, 361 362 { "mpn_sqrlo", speed_mpn_sqrlo }, 363 { "mpn_sqrlo_basecase", speed_mpn_sqrlo_basecase }, 364 { "mpn_mullo_n", speed_mpn_mullo_n }, 365 { "mpn_mullo_basecase", speed_mpn_mullo_basecase }, 366 367 { "mpn_mulmid_basecase", speed_mpn_mulmid_basecase, FLAG_R_OPTIONAL }, 368 { "mpn_toom42_mulmid", speed_mpn_toom42_mulmid }, 369 { "mpn_mulmid_n", speed_mpn_mulmid_n }, 370 { "mpn_mulmid", speed_mpn_mulmid, FLAG_R_OPTIONAL }, 371 372 { "mpn_bc_mulmod_bnm1", speed_mpn_bc_mulmod_bnm1 }, 373 { "mpn_mulmod_bnm1", speed_mpn_mulmod_bnm1 }, 374 { "mpn_mulmod_bnm1_rounded", speed_mpn_mulmod_bnm1_rounded }, 375 { "mpn_sqrmod_bnm1", speed_mpn_sqrmod_bnm1 }, 376 377 { "mpn_invert", speed_mpn_invert }, 378 { "mpn_invertappr", speed_mpn_invertappr }, 379 { "mpn_ni_invertappr", speed_mpn_ni_invertappr }, 380 { "mpn_binvert", speed_mpn_binvert }, 381 { "mpn_sec_invert", speed_mpn_sec_invert }, 382 383 { "mpn_sbpi1_div_qr", speed_mpn_sbpi1_div_qr, FLAG_R_OPTIONAL}, 384 { "mpn_dcpi1_div_qr", speed_mpn_dcpi1_div_qr, FLAG_R_OPTIONAL}, 385 { "mpn_mu_div_qr", speed_mpn_mu_div_qr, FLAG_R_OPTIONAL}, 386 { "mpn_mupi_div_qr", speed_mpn_mupi_div_qr, FLAG_R_OPTIONAL}, 387 { "mpn_sbpi1_divappr_q", speed_mpn_sbpi1_divappr_q, FLAG_R_OPTIONAL}, 388 { "mpn_dcpi1_divappr_q", speed_mpn_dcpi1_divappr_q, FLAG_R_OPTIONAL}, 389 390 { "mpn_sbpi1_bdiv_qr", speed_mpn_sbpi1_bdiv_qr }, 391 { "mpn_dcpi1_bdiv_qr", speed_mpn_dcpi1_bdiv_qr }, 392 { "mpn_sbpi1_bdiv_q", speed_mpn_sbpi1_bdiv_q }, 393 { "mpn_dcpi1_bdiv_q", speed_mpn_dcpi1_bdiv_q }, 394 { "mpn_sbpi1_bdiv_r", speed_mpn_sbpi1_bdiv_r }, 395 396 { "mpn_broot", speed_mpn_broot, FLAG_R }, 397 { "mpn_broot_invm1", speed_mpn_broot_invm1, FLAG_R }, 398 { "mpn_brootinv", speed_mpn_brootinv, FLAG_R }, 399 400 { "mpn_get_str", speed_mpn_get_str, FLAG_R_OPTIONAL }, 401 { "mpn_set_str", speed_mpn_set_str, FLAG_R_OPTIONAL }, 402 { "mpn_set_str_basecase", speed_mpn_bc_set_str, FLAG_R_OPTIONAL }, 403 404 { "mpn_sqrtrem", speed_mpn_sqrtrem }, 405 { "mpn_rootrem", speed_mpn_rootrem, FLAG_R }, 406 { "mpn_sqrt", speed_mpn_sqrt }, 407 { "mpn_root", speed_mpn_root, FLAG_R }, 408 409 { "mpn_perfect_power_p", speed_mpn_perfect_power_p, }, 410 { "mpn_perfect_square_p", speed_mpn_perfect_square_p, }, 411 412 { "mpn_fib2_ui", speed_mpn_fib2_ui, FLAG_NODATA }, 413 { "mpz_fib_ui", speed_mpz_fib_ui, FLAG_NODATA }, 414 { "mpz_fib2_ui", speed_mpz_fib2_ui, FLAG_NODATA }, 415 { "mpz_lucnum_ui", speed_mpz_lucnum_ui, FLAG_NODATA }, 416 { "mpz_lucnum2_ui", speed_mpz_lucnum2_ui, FLAG_NODATA }, 417 418 { "mpz_add", speed_mpz_add }, 419 { "mpz_invert", speed_mpz_invert, FLAG_R_OPTIONAL }, 420 { "mpz_bin_uiui", speed_mpz_bin_uiui, FLAG_NODATA | FLAG_R_OPTIONAL }, 421 { "mpz_bin_ui", speed_mpz_bin_ui, FLAG_NODATA | FLAG_R_OPTIONAL }, 422 { "mpz_fac_ui", speed_mpz_fac_ui, FLAG_NODATA }, 423 { "mpz_2fac_ui", speed_mpz_2fac_ui, FLAG_NODATA }, 424 { "mpz_mfac_uiui", speed_mpz_mfac_uiui, FLAG_NODATA | FLAG_R_OPTIONAL }, 425 { "mpz_primorial_ui", speed_mpz_primorial_ui, FLAG_NODATA }, 426 { "mpz_powm", speed_mpz_powm, FLAG_R_OPTIONAL }, 427 { "mpz_powm_mod", speed_mpz_powm_mod }, 428 { "mpz_powm_redc", speed_mpz_powm_redc }, 429 { "mpz_powm_sec", speed_mpz_powm_sec }, 430 { "mpz_powm_ui", speed_mpz_powm_ui, FLAG_R_OPTIONAL }, 431 432 { "mpz_mod", speed_mpz_mod }, 433 { "mpn_redc_1", speed_mpn_redc_1 }, 434 { "mpn_redc_2", speed_mpn_redc_2 }, 435 { "mpn_redc_n", speed_mpn_redc_n }, 436 437 { "MPN_COPY", speed_MPN_COPY }, 438 { "MPN_COPY_INCR", speed_MPN_COPY_INCR }, 439 { "MPN_COPY_DECR", speed_MPN_COPY_DECR }, 440 { "memcpy", speed_memcpy }, 441#if HAVE_NATIVE_mpn_copyi 442 { "mpn_copyi", speed_mpn_copyi }, 443#endif 444#if HAVE_NATIVE_mpn_copyd 445 { "mpn_copyd", speed_mpn_copyd }, 446#endif 447 { "mpn_sec_tabselect", speed_mpn_sec_tabselect, FLAG_R_OPTIONAL }, 448#if HAVE_NATIVE_mpn_addlsh1_n == 1 449 { "mpn_addlsh1_n", speed_mpn_addlsh1_n, FLAG_R_OPTIONAL }, 450#endif 451#if HAVE_NATIVE_mpn_sublsh1_n == 1 452 { "mpn_sublsh1_n", speed_mpn_sublsh1_n, FLAG_R_OPTIONAL }, 453#endif 454#if HAVE_NATIVE_mpn_addlsh1_n_ip1 455 { "mpn_addlsh1_n_ip1", speed_mpn_addlsh1_n_ip1 }, 456#endif 457#if HAVE_NATIVE_mpn_addlsh1_n_ip2 458 { "mpn_addlsh1_n_ip2", speed_mpn_addlsh1_n_ip2 }, 459#endif 460#if HAVE_NATIVE_mpn_sublsh1_n_ip1 461 { "mpn_sublsh1_n_ip1", speed_mpn_sublsh1_n_ip1 }, 462#endif 463#if HAVE_NATIVE_mpn_rsblsh1_n == 1 464 { "mpn_rsblsh1_n", speed_mpn_rsblsh1_n, FLAG_R_OPTIONAL }, 465#endif 466#if HAVE_NATIVE_mpn_addlsh2_n == 1 467 { "mpn_addlsh2_n", speed_mpn_addlsh2_n, FLAG_R_OPTIONAL }, 468#endif 469#if HAVE_NATIVE_mpn_sublsh2_n == 1 470 { "mpn_sublsh2_n", speed_mpn_sublsh2_n, FLAG_R_OPTIONAL }, 471#endif 472#if HAVE_NATIVE_mpn_addlsh2_n_ip1 473 { "mpn_addlsh2_n_ip1", speed_mpn_addlsh2_n_ip1 }, 474#endif 475#if HAVE_NATIVE_mpn_addlsh2_n_ip2 476 { "mpn_addlsh2_n_ip2", speed_mpn_addlsh2_n_ip2 }, 477#endif 478#if HAVE_NATIVE_mpn_sublsh2_n_ip1 479 { "mpn_sublsh2_n_ip1", speed_mpn_sublsh2_n_ip1 }, 480#endif 481#if HAVE_NATIVE_mpn_rsblsh2_n == 1 482 { "mpn_rsblsh2_n", speed_mpn_rsblsh2_n, FLAG_R_OPTIONAL }, 483#endif 484#if HAVE_NATIVE_mpn_addlsh_n 485 { "mpn_addlsh_n", speed_mpn_addlsh_n, FLAG_R_OPTIONAL }, 486#endif 487#if HAVE_NATIVE_mpn_sublsh_n 488 { "mpn_sublsh_n", speed_mpn_sublsh_n, FLAG_R_OPTIONAL }, 489#endif 490#if HAVE_NATIVE_mpn_addlsh_n_ip1 491 { "mpn_addlsh_n_ip1", speed_mpn_addlsh_n_ip1 }, 492#endif 493#if HAVE_NATIVE_mpn_addlsh_n_ip2 494 { "mpn_addlsh_n_ip2", speed_mpn_addlsh_n_ip2 }, 495#endif 496#if HAVE_NATIVE_mpn_sublsh_n_ip1 497 { "mpn_sublsh_n_ip1", speed_mpn_sublsh_n_ip1 }, 498#endif 499#if HAVE_NATIVE_mpn_rsblsh_n 500 { "mpn_rsblsh_n", speed_mpn_rsblsh_n, FLAG_R_OPTIONAL }, 501#endif 502#if HAVE_NATIVE_mpn_rsh1add_n 503 { "mpn_rsh1add_n", speed_mpn_rsh1add_n, FLAG_R_OPTIONAL }, 504#endif 505#if HAVE_NATIVE_mpn_rsh1sub_n 506 { "mpn_rsh1sub_n", speed_mpn_rsh1sub_n, FLAG_R_OPTIONAL }, 507#endif 508 509 { "mpn_cnd_add_n", speed_mpn_cnd_add_n, FLAG_R_OPTIONAL }, 510 { "mpn_cnd_sub_n", speed_mpn_cnd_sub_n, FLAG_R_OPTIONAL }, 511 512 { "MPN_ZERO", speed_MPN_ZERO }, 513 514 { "binvert_limb", speed_binvert_limb, FLAG_NODATA }, 515 { "binvert_limb_mul1", speed_binvert_limb_mul1, FLAG_NODATA }, 516 { "binvert_limb_loop", speed_binvert_limb_loop, FLAG_NODATA }, 517 { "binvert_limb_cond", speed_binvert_limb_cond, FLAG_NODATA }, 518 { "binvert_limb_arith", speed_binvert_limb_arith, FLAG_NODATA }, 519 520 { "malloc_free", speed_malloc_free }, 521 { "malloc_realloc_free", speed_malloc_realloc_free }, 522 { "gmp_allocate_free", speed_gmp_allocate_free }, 523 { "gmp_allocate_reallocate_free", speed_gmp_allocate_reallocate_free }, 524 { "mpz_init_clear", speed_mpz_init_clear }, 525 { "mpq_init_clear", speed_mpq_init_clear }, 526 { "mpf_init_clear", speed_mpf_init_clear }, 527 { "mpz_init_realloc_clear", speed_mpz_init_realloc_clear }, 528 529 { "umul_ppmm", speed_umul_ppmm, FLAG_R_OPTIONAL }, 530#if HAVE_NATIVE_mpn_umul_ppmm 531 { "mpn_umul_ppmm", speed_mpn_umul_ppmm, FLAG_R_OPTIONAL }, 532#endif 533#if HAVE_NATIVE_mpn_umul_ppmm_r 534 { "mpn_umul_ppmm_r", speed_mpn_umul_ppmm_r, FLAG_R_OPTIONAL }, 535#endif 536 537 { "count_leading_zeros", speed_count_leading_zeros, FLAG_NODATA | FLAG_R_OPTIONAL }, 538 { "count_trailing_zeros", speed_count_trailing_zeros, FLAG_NODATA | FLAG_R_OPTIONAL }, 539 540 { "udiv_qrnnd", speed_udiv_qrnnd, FLAG_R_OPTIONAL }, 541 { "udiv_qrnnd_c", speed_udiv_qrnnd_c, FLAG_R_OPTIONAL }, 542#if HAVE_NATIVE_mpn_udiv_qrnnd 543 { "mpn_udiv_qrnnd", speed_mpn_udiv_qrnnd, FLAG_R_OPTIONAL }, 544#endif 545#if HAVE_NATIVE_mpn_udiv_qrnnd_r 546 { "mpn_udiv_qrnnd_r", speed_mpn_udiv_qrnnd_r, FLAG_R_OPTIONAL }, 547#endif 548 { "invert_limb", speed_invert_limb, FLAG_R_OPTIONAL }, 549 550 { "operator_div", speed_operator_div, FLAG_R_OPTIONAL }, 551 { "operator_mod", speed_operator_mod, FLAG_R_OPTIONAL }, 552 553 { "gmp_randseed", speed_gmp_randseed, FLAG_R_OPTIONAL }, 554 { "gmp_randseed_ui", speed_gmp_randseed_ui, FLAG_R_OPTIONAL | FLAG_NODATA }, 555 { "mpz_urandomb", speed_mpz_urandomb, FLAG_R_OPTIONAL | FLAG_NODATA }, 556 557#ifdef SPEED_EXTRA_ROUTINES 558 SPEED_EXTRA_ROUTINES 559#endif 560#ifdef SPEED_EXTRA_ROUTINES2 561 SPEED_EXTRA_ROUTINES2 562#endif 563}; 564 565 566struct choice_t { 567 const struct routine_t *p; 568 mp_limb_t r; 569 double scale; 570 double time; 571 int no_time; 572 double prev_time; 573 const char *name; 574}; 575struct choice_t *choice; 576int num_choices = 0; 577 578 579void 580data_fill (mp_ptr ptr, mp_size_t size) 581{ 582 switch (option_data) { 583 case DATA_RANDOM: 584 mpn_random (ptr, size); 585 break; 586 case DATA_RANDOM2: 587 mpn_random2 (ptr, size); 588 break; 589 case DATA_ZEROS: 590 MPN_ZERO (ptr, size); 591 break; 592 case DATA_AAS: 593 MPN_FILL (ptr, size, GMP_NUMB_0xAA); 594 break; 595 case DATA_FFS: 596 MPN_FILL (ptr, size, GMP_NUMB_MAX); 597 break; 598 case DATA_2FD: 599 MPN_FILL (ptr, size, GMP_NUMB_MAX); 600 ptr[0] -= 2; 601 break; 602 default: 603 abort(); 604 /*NOTREACHED*/ 605 } 606} 607 608/* The code here handling the various combinations of output options isn't 609 too attractive, but it works and is fairly clean. */ 610 611#define SIZE_TO_DIVISOR(n) \ 612 (option_square == 1 ? (n)*(n) \ 613 : option_square == 2 ? (n)*((n)+1)/2 \ 614 : (n)) 615 616void 617run_one (FILE *fp, struct speed_params *s, mp_size_t prev_size) 618{ 619 const char *first_open_fastest, *first_open_notfastest, *first_close; 620 int i, fastest, want_data; 621 double fastest_time; 622 TMP_DECL; 623 624 TMP_MARK; 625 626 /* allocate data, unless all routines are NODATA */ 627 want_data = 0; 628 for (i = 0; i < num_choices; i++) 629 want_data |= ((choice[i].p->flag & FLAG_NODATA) == 0); 630 631 if (want_data) 632 { 633 SPEED_TMP_ALLOC_LIMBS (sp.xp, s->size, s->align_xp); 634 SPEED_TMP_ALLOC_LIMBS (sp.yp, s->size, s->align_yp); 635 636 data_fill (s->xp, s->size); 637 data_fill (s->yp, s->size); 638 } 639 else 640 { 641 sp.xp = NULL; 642 sp.yp = NULL; 643 } 644 645 if (prev_size == -1 && option_cmp == CMP_DIFFPREV) 646 { 647 first_open_fastest = "(#"; 648 first_open_notfastest = " ("; 649 first_close = ")"; 650 } 651 else 652 { 653 first_open_fastest = "#"; 654 first_open_notfastest = " "; 655 first_close = ""; 656 } 657 658 fastest = -1; 659 fastest_time = -1.0; 660 for (i = 0; i < num_choices; i++) 661 { 662 s->r = choice[i].r; 663 choice[i].time = speed_measure (choice[i].p->fun, s); 664 choice[i].no_time = (choice[i].time == -1.0); 665 if (! choice[i].no_time) 666 choice[i].time *= choice[i].scale; 667 668 /* Apply the effect of CMP_DIFFPREV, but the new choice[i].prev_time 669 is before any differences. */ 670 { 671 double t; 672 t = choice[i].time; 673 if (t != -1.0 && option_cmp == CMP_DIFFPREV && prev_size != -1) 674 { 675 if (choice[i].prev_time == -1.0) 676 choice[i].no_time = 1; 677 else 678 choice[i].time = choice[i].time - choice[i].prev_time; 679 } 680 choice[i].prev_time = t; 681 } 682 683 if (choice[i].no_time) 684 continue; 685 686 /* Look for the fastest after CMP_DIFFPREV has been applied, but 687 before CMP_RATIO or CMP_DIFFERENCE. There's only a fastest shown 688 if there's more than one routine. */ 689 if (num_choices > 1 && (fastest == -1 || choice[i].time < fastest_time)) 690 { 691 fastest = i; 692 fastest_time = choice[i].time; 693 } 694 695 if (option_cmp == CMP_DIFFPREV) 696 { 697 /* Conversion for UNIT_CYCLESPERLIMB differs in CMP_DIFFPREV. */ 698 if (option_unit == UNIT_CYCLES) 699 choice[i].time /= speed_cycletime; 700 else if (option_unit == UNIT_CYCLESPERLIMB) 701 { 702 if (prev_size == -1) 703 choice[i].time /= speed_cycletime; 704 else 705 choice[i].time /= (speed_cycletime 706 * (SIZE_TO_DIVISOR(s->size) 707 - SIZE_TO_DIVISOR(prev_size))); 708 } 709 } 710 else 711 { 712 if (option_unit == UNIT_CYCLES) 713 choice[i].time /= speed_cycletime; 714 else if (option_unit == UNIT_CYCLESPERLIMB) 715 choice[i].time /= (speed_cycletime * SIZE_TO_DIVISOR(s->size)); 716 717 if (option_cmp == CMP_RATIO && i > 0) 718 { 719 /* A ratio isn't affected by the units chosen. */ 720 if (choice[0].no_time || choice[0].time == 0.0) 721 choice[i].no_time = 1; 722 else 723 choice[i].time /= choice[0].time; 724 } 725 else if (option_cmp == CMP_DIFFERENCE && i > 0) 726 { 727 if (choice[0].no_time) 728 { 729 choice[i].no_time = 1; 730 continue; 731 } 732 choice[i].time -= choice[0].time; 733 } 734 } 735 } 736 737 if (option_gnuplot) 738 { 739 /* In CMP_DIFFPREV, don't print anything for the first size, start 740 with the second where an actual difference is available. 741 742 In CMP_RATIO, print the first column as 1.0. 743 744 The 9 decimals printed is much more than the expected precision of 745 the measurements actually. */ 746 747 if (! (option_cmp == CMP_DIFFPREV && prev_size == -1)) 748 { 749 fprintf (fp, "%-6ld ", s->size); 750 for (i = 0; i < num_choices; i++) 751 fprintf (fp, " %.9e", 752 choice[i].no_time ? 0.0 753 : (option_cmp == CMP_RATIO && i == 0) ? 1.0 754 : choice[i].time); 755 fprintf (fp, "\n"); 756 } 757 } 758 else 759 { 760 fprintf (fp, "%-6ld ", s->size); 761 for (i = 0; i < num_choices; i++) 762 { 763 char buf[128]; 764 int decimals; 765 766 if (choice[i].no_time) 767 { 768 fprintf (fp, " %*s", COLUMN_WIDTH, "n/a"); 769 } 770 else 771 {if (option_unit == UNIT_CYCLESPERLIMB 772 || (option_cmp == CMP_RATIO && i > 0)) 773 decimals = 4; 774 else if (option_unit == UNIT_CYCLES) 775 decimals = 2; 776 else 777 decimals = 9; 778 779 sprintf (buf, "%s%.*f%s", 780 i == fastest ? first_open_fastest : first_open_notfastest, 781 decimals, choice[i].time, first_close); 782 fprintf (fp, " %*s", COLUMN_WIDTH, buf); 783 } 784 } 785 fprintf (fp, "\n"); 786 } 787 788 TMP_FREE; 789} 790 791void 792run_all (FILE *fp) 793{ 794 mp_size_t prev_size; 795 int i; 796 TMP_DECL; 797 798 TMP_MARK; 799 SPEED_TMP_ALLOC_LIMBS (sp.xp_block, SPEED_BLOCK_SIZE, sp.align_xp); 800 SPEED_TMP_ALLOC_LIMBS (sp.yp_block, SPEED_BLOCK_SIZE, sp.align_yp); 801 802 data_fill (sp.xp_block, SPEED_BLOCK_SIZE); 803 data_fill (sp.yp_block, SPEED_BLOCK_SIZE); 804 805 for (i = 0; i < size_num; i++) 806 { 807 sp.size = size_array[i].start; 808 prev_size = -1; 809 for (;;) 810 { 811 mp_size_t step; 812 813 if (option_data == DATA_2FD && sp.size >= 2) 814 sp.xp[sp.size-1] = 2; 815 816 run_one (fp, &sp, prev_size); 817 prev_size = sp.size; 818 819 if (option_data == DATA_2FD && sp.size >= 2) 820 sp.xp[sp.size-1] = MP_LIMB_T_MAX; 821 822 if (option_factor != 0.0) 823 { 824 step = (mp_size_t) (sp.size * option_factor - sp.size); 825 if (step < 1) 826 step = 1; 827 } 828 else 829 step = 1; 830 if (step < option_step) 831 step = option_step; 832 833 sp.size += step; 834 if (sp.size > size_array[i].end) 835 break; 836 } 837 } 838 839 TMP_FREE; 840} 841 842 843FILE * 844fopen_for_write (const char *filename) 845{ 846 FILE *fp; 847 if ((fp = fopen (filename, "w")) == NULL) 848 { 849 fprintf (stderr, "Cannot create %s\n", filename); 850 exit(1); 851 } 852 return fp; 853} 854 855void 856fclose_written (FILE *fp, const char *filename) 857{ 858 int err; 859 860 err = ferror (fp); 861 err |= fclose (fp); 862 863 if (err) 864 { 865 fprintf (stderr, "Error writing %s\n", filename); 866 exit(1); 867 } 868} 869 870 871void 872run_gnuplot (int argc, char *argv[]) 873{ 874 char *plot_filename; 875 char *data_filename; 876 FILE *fp; 877 int i; 878 879 plot_filename = (char *) (*__gmp_allocate_func) 880 (strlen (option_gnuplot_basename) + 20); 881 data_filename = (char *) (*__gmp_allocate_func) 882 (strlen (option_gnuplot_basename) + 20); 883 884 sprintf (plot_filename, "%s.gnuplot", option_gnuplot_basename); 885 sprintf (data_filename, "%s.data", option_gnuplot_basename); 886 887 fp = fopen_for_write (plot_filename); 888 889 fprintf (fp, "# Generated with:\n"); 890 fprintf (fp, "#"); 891 for (i = 0; i < argc; i++) 892 fprintf (fp, " %s", argv[i]); 893 fprintf (fp, "\n"); 894 fprintf (fp, "\n"); 895 896 fprintf (fp, "reset\n"); 897 898 /* Putting the key at the top left is usually good, and you can change it 899 interactively if it's not. */ 900 fprintf (fp, "set key left\n"); 901 902 /* write underscores, not subscripts */ 903 fprintf (fp, "set termoption noenhanced\n"); 904 905 /* designed to make it possible to see crossovers easily */ 906 fprintf (fp, "set style data lines\n"); 907 908 fprintf (fp, "plot "); 909 for (i = 0; i < num_choices; i++) 910 { 911 fprintf (fp, " \"%s\" using 1:%d", data_filename, i+2); 912 fprintf (fp, " title \"%s\"", choice[i].name); 913 914 if (i != num_choices-1) 915 fprintf (fp, ", \\"); 916 fprintf (fp, "\n"); 917 } 918 919 fprintf (fp, "load \"-\"\n"); 920 fclose_written (fp, plot_filename); 921 922 fp = fopen_for_write (data_filename); 923 924 /* Unbuffered so you can see where the program was up to if it crashes or 925 you kill it. */ 926 setbuf (fp, NULL); 927 928 run_all (fp); 929 fclose_written (fp, data_filename); 930} 931 932 933/* Return a limb with n many one bits (starting from the least significant) */ 934 935#define LIMB_ONES(n) \ 936 ((n) == GMP_LIMB_BITS ? MP_LIMB_T_MAX \ 937 : (n) == 0 ? CNST_LIMB(0) \ 938 : (CNST_LIMB(1) << (n)) - 1) 939 940mp_limb_t 941r_string (const char *s) 942{ 943 const char *s_orig = s; 944 long n; 945 946 if (strcmp (s, "aas") == 0) 947 return GMP_NUMB_0xAA; 948 949 { 950 mpz_t z; 951 mp_limb_t l; 952 int set, siz; 953 954 mpz_init (z); 955 set = mpz_set_str (z, s, 0); 956 siz = SIZ(z); 957 l = (siz == 0 ? 0 : siz > 0 ? PTR(z)[0] : -PTR(z)[0]); 958 mpz_clear (z); 959 if (set == 0) 960 { 961 if (siz > 1 || siz < -1) 962 printf ("Warning, r parameter %s truncated to %d bits\n", 963 s_orig, GMP_LIMB_BITS); 964 return l; 965 } 966 } 967 968 if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) 969 n = strtoul (s+2, (char **) &s, 16); 970 else 971 n = strtol (s, (char **) &s, 10); 972 973 if (strcmp (s, "bits") == 0) 974 { 975 mp_limb_t l; 976 if (n > GMP_LIMB_BITS) 977 { 978 fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n", 979 n, GMP_LIMB_BITS); 980 exit (1); 981 } 982 mpn_random (&l, 1); 983 return (l | (CNST_LIMB(1) << (n-1))) & LIMB_ONES(n); 984 } 985 else if (strcmp (s, "ones") == 0) 986 { 987 if (n > GMP_LIMB_BITS) 988 { 989 fprintf (stderr, "%ld bit parameter invalid (max %d bits)\n", 990 n, GMP_LIMB_BITS); 991 exit (1); 992 } 993 return LIMB_ONES (n); 994 } 995 else if (*s != '\0') 996 { 997 fprintf (stderr, "invalid r parameter: %s\n", s_orig); 998 exit (1); 999 } 1000 1001 return n; 1002} 1003 1004 1005void 1006routine_find (struct choice_t *c, const char *s_orig) 1007{ 1008 const char *s; 1009 int i; 1010 size_t nlen; 1011 1012 c->name = s_orig; 1013 s = strchr (s_orig, '*'); 1014 if (s != NULL) 1015 { 1016 c->scale = atof(s_orig); 1017 s++; 1018 } 1019 else 1020 { 1021 c->scale = 1.0; 1022 s = s_orig; 1023 } 1024 1025 for (i = 0; i < numberof (routine); i++) 1026 { 1027 nlen = strlen (routine[i].name); 1028 if (memcmp (s, routine[i].name, nlen) != 0) 1029 continue; 1030 1031 if (s[nlen] == '.') 1032 { 1033 /* match, with a .r parameter */ 1034 1035 if (! (routine[i].flag & (FLAG_R|FLAG_R_OPTIONAL))) 1036 { 1037 fprintf (stderr, 1038 "Choice %s bad: doesn't take a \".<r>\" parameter\n", 1039 s_orig); 1040 exit (1); 1041 } 1042 1043 c->p = &routine[i]; 1044 c->r = r_string (s + nlen + 1); 1045 return; 1046 } 1047 1048 if (s[nlen] == '\0') 1049 { 1050 /* match, with no parameter */ 1051 1052 if (routine[i].flag & FLAG_R) 1053 { 1054 fprintf (stderr, 1055 "Choice %s bad: needs a \".<r>\" parameter\n", 1056 s_orig); 1057 exit (1); 1058 } 1059 1060 c->p = &routine[i]; 1061 c->r = 0; 1062 return; 1063 } 1064 } 1065 1066 fprintf (stderr, "Choice %s unrecognised\n", s_orig); 1067 exit (1); 1068} 1069 1070 1071void 1072usage (void) 1073{ 1074 int i; 1075 1076 speed_time_init (); 1077 1078 printf ("Usage: speed [-options] -s size <routine>...\n"); 1079 printf ("Measure the speed of some routines.\n"); 1080 printf ("Times are in seconds, accuracy is shown.\n"); 1081 printf ("\n"); 1082 printf (" -p num set precision as number of time units each routine must run\n"); 1083 printf (" -s size[-end][,size[-end]]... sizes to measure\n"); 1084 printf (" single sizes or ranges, sep with comma or use multiple -s\n"); 1085 printf (" -t step step through sizes by given amount\n"); 1086 printf (" -f factor step through sizes by given factor (eg. 1.05)\n"); 1087 printf (" -r show times as ratios of the first routine\n"); 1088 printf (" -d show times as difference from the first routine\n"); 1089 printf (" -D show times as difference from previous size shown\n"); 1090 printf (" -c show times in CPU cycles\n"); 1091 printf (" -C show times in cycles per limb\n"); 1092 printf (" -u print resource usage (memory) at end\n"); 1093 printf (" -P name output plot files \"name.gnuplot\" and \"name.data\"\n"); 1094 printf (" -a <type> use given data: random(default), random2, zeros, aas, ffs, 2fd\n"); 1095 printf (" -x, -y, -w, -W <align> specify data alignments, sources and dests\n"); 1096 printf (" -o addrs print addresses of data blocks\n"); 1097 printf ("\n"); 1098 printf ("If both -t and -f are used, it means step by the factor or the step, whichever\n"); 1099 printf ("is greater.\n"); 1100 printf ("If both -C and -D are used, it means cycles per however many limbs between a\n"); 1101 printf ("size and the previous size.\n"); 1102 printf ("\n"); 1103 printf ("After running with -P, plots can be viewed with Gnuplot or Quickplot.\n"); 1104 printf ("\"gnuplot name.gnuplot\" (use \"set logscale xy; replot\" at the prompt for\n"); 1105 printf ("a log/log plot).\n"); 1106 printf ("\"quickplot -s name.data\" (has interactive zooming, and note -s is important\n"); 1107 printf ("when viewing more than one routine, it means same axis scales for all data).\n"); 1108 printf ("\n"); 1109 printf ("The available routines are as follows.\n"); 1110 printf ("\n"); 1111 1112 for (i = 0; i < numberof (routine); i++) 1113 { 1114 if (routine[i].flag & FLAG_R) 1115 printf ("\t%s.r\n", routine[i].name); 1116 else if (routine[i].flag & FLAG_R_OPTIONAL) 1117 printf ("\t%s (optional .r)\n", routine[i].name); 1118 else 1119 printf ("\t%s\n", routine[i].name); 1120 } 1121 printf ("\n"); 1122 printf ("Routines with a \".r\" need an extra parameter, for example mpn_lshift.6\n"); 1123 printf ("r should be in decimal, or use 0xN for hexadecimal.\n"); 1124 printf ("\n"); 1125 printf ("Special forms for r are \"<N>bits\" for a random N bit number, \"<N>ones\" for\n"); 1126 printf ("N one bits, or \"aas\" for 0xAA..AA.\n"); 1127 printf ("\n"); 1128 printf ("Times for sizes out of the range accepted by a routine are shown as 0.\n"); 1129 printf ("The fastest routine at each size is marked with a # (free form output only).\n"); 1130 printf ("\n"); 1131 printf ("%s", speed_time_string); 1132 printf ("\n"); 1133 printf ("Gnuplot home page http://www.gnuplot.info/\n"); 1134 printf ("Quickplot home page http://quickplot.sourceforge.net/\n"); 1135} 1136 1137void 1138check_align_option (const char *name, mp_size_t align) 1139{ 1140 if (align < 0 || align > SPEED_TMP_ALLOC_ADJUST_MASK) 1141 { 1142 fprintf (stderr, "Alignment request out of range: %s %ld\n", 1143 name, (long) align); 1144 fprintf (stderr, " should be 0 to %d (limbs), inclusive\n", 1145 SPEED_TMP_ALLOC_ADJUST_MASK); 1146 exit (1); 1147 } 1148} 1149 1150int 1151main (int argc, char *argv[]) 1152{ 1153 int i; 1154 int opt; 1155 1156 /* Unbuffered so output goes straight out when directed to a pipe or file 1157 and isn't lost on killing the program half way. */ 1158 setbuf (stdout, NULL); 1159 1160 for (;;) 1161 { 1162 opt = getopt(argc, argv, "a:CcDdEFf:o:p:P:rRs:t:ux:y:w:W:z"); 1163 if (opt == EOF) 1164 break; 1165 1166 switch (opt) { 1167 case 'a': 1168 if (strcmp (optarg, "random") == 0) option_data = DATA_RANDOM; 1169 else if (strcmp (optarg, "random2") == 0) option_data = DATA_RANDOM2; 1170 else if (strcmp (optarg, "zeros") == 0) option_data = DATA_ZEROS; 1171 else if (strcmp (optarg, "aas") == 0) option_data = DATA_AAS; 1172 else if (strcmp (optarg, "ffs") == 0) option_data = DATA_FFS; 1173 else if (strcmp (optarg, "2fd") == 0) option_data = DATA_2FD; 1174 else 1175 { 1176 fprintf (stderr, "unrecognised data option: %s\n", optarg); 1177 exit (1); 1178 } 1179 break; 1180 case 'C': 1181 if (option_unit != UNIT_SECONDS) goto bad_unit; 1182 option_unit = UNIT_CYCLESPERLIMB; 1183 break; 1184 case 'c': 1185 if (option_unit != UNIT_SECONDS) 1186 { 1187 bad_unit: 1188 fprintf (stderr, "cannot use more than one of -c, -C\n"); 1189 exit (1); 1190 } 1191 option_unit = UNIT_CYCLES; 1192 break; 1193 case 'D': 1194 if (option_cmp != CMP_ABSOLUTE) goto bad_cmp; 1195 option_cmp = CMP_DIFFPREV; 1196 break; 1197 case 'd': 1198 if (option_cmp != CMP_ABSOLUTE) 1199 { 1200 bad_cmp: 1201 fprintf (stderr, "cannot use more than one of -d, -D, -r\n"); 1202 exit (1); 1203 } 1204 option_cmp = CMP_DIFFERENCE; 1205 break; 1206 case 'E': 1207 option_square = 1; 1208 break; 1209 case 'F': 1210 option_square = 2; 1211 break; 1212 case 'f': 1213 option_factor = atof (optarg); 1214 if (option_factor <= 1.0) 1215 { 1216 fprintf (stderr, "-f factor must be > 1.0\n"); 1217 exit (1); 1218 } 1219 break; 1220 case 'o': 1221 speed_option_set (optarg); 1222 break; 1223 case 'P': 1224 option_gnuplot = 1; 1225 option_gnuplot_basename = optarg; 1226 break; 1227 case 'p': 1228 speed_precision = atoi (optarg); 1229 break; 1230 case 'R': 1231 option_seed = time (NULL); 1232 break; 1233 case 'r': 1234 if (option_cmp != CMP_ABSOLUTE) 1235 goto bad_cmp; 1236 option_cmp = CMP_RATIO; 1237 break; 1238 case 's': 1239 { 1240 char *s; 1241 for (s = strtok (optarg, ","); s != NULL; s = strtok (NULL, ",")) 1242 { 1243 if (size_num == size_allocnum) 1244 { 1245 size_array = (struct size_array_t *) 1246 __gmp_allocate_or_reallocate 1247 (size_array, 1248 size_allocnum * sizeof(size_array[0]), 1249 (size_allocnum+10) * sizeof(size_array[0])); 1250 size_allocnum += 10; 1251 } 1252 if (sscanf (s, "%ld-%ld", 1253 &size_array[size_num].start, 1254 &size_array[size_num].end) != 2) 1255 { 1256 size_array[size_num].start = size_array[size_num].end 1257 = atol (s); 1258 } 1259 1260 if (size_array[size_num].start < 0 1261 || size_array[size_num].end < 0 1262 || size_array[size_num].start > size_array[size_num].end) 1263 { 1264 fprintf (stderr, "invalid size parameter: %s\n", s); 1265 exit (1); 1266 } 1267 1268 size_num++; 1269 } 1270 } 1271 break; 1272 case 't': 1273 option_step = atol (optarg); 1274 if (option_step < 1) 1275 { 1276 fprintf (stderr, "-t step must be >= 1\n"); 1277 exit (1); 1278 } 1279 break; 1280 case 'u': 1281 option_resource_usage = 1; 1282 break; 1283 case 'z': 1284 sp.cache = 1; 1285 break; 1286 case 'x': 1287 sp.align_xp = atol (optarg); 1288 check_align_option ("-x", sp.align_xp); 1289 break; 1290 case 'y': 1291 sp.align_yp = atol (optarg); 1292 check_align_option ("-y", sp.align_yp); 1293 break; 1294 case 'w': 1295 sp.align_wp = atol (optarg); 1296 check_align_option ("-w", sp.align_wp); 1297 break; 1298 case 'W': 1299 sp.align_wp2 = atol (optarg); 1300 check_align_option ("-W", sp.align_wp2); 1301 break; 1302 case '?': 1303 exit(1); 1304 } 1305 } 1306 1307 if (optind >= argc) 1308 { 1309 usage (); 1310 exit (1); 1311 } 1312 1313 if (size_num == 0) 1314 { 1315 fprintf (stderr, "-s <size> must be specified\n"); 1316 exit (1); 1317 } 1318 1319 gmp_randinit_default (__gmp_rands); 1320 __gmp_rands_initialized = 1; 1321 gmp_randseed_ui (__gmp_rands, option_seed); 1322 1323 choice = (struct choice_t *) (*__gmp_allocate_func) 1324 ((argc - optind) * sizeof(choice[0])); 1325 for ( ; optind < argc; optind++) 1326 { 1327 struct choice_t c; 1328 routine_find (&c, argv[optind]); 1329 choice[num_choices] = c; 1330 num_choices++; 1331 } 1332 1333 if ((option_cmp == CMP_RATIO || option_cmp == CMP_DIFFERENCE) && 1334 num_choices < 2) 1335 { 1336 fprintf (stderr, "WARNING, -d or -r does nothing when only one routine requested\n"); 1337 } 1338 1339 speed_time_init (); 1340 if (option_unit == UNIT_CYCLES || option_unit == UNIT_CYCLESPERLIMB) 1341 speed_cycletime_need_cycles (); 1342 else 1343 speed_cycletime_need_seconds (); 1344 1345 if (option_gnuplot) 1346 { 1347 run_gnuplot (argc, argv); 1348 } 1349 else 1350 { 1351 if (option_unit == UNIT_SECONDS) 1352 printf ("overhead %.9f secs", speed_measure (speed_noop, NULL)); 1353 else 1354 printf ("overhead %.2f cycles", 1355 speed_measure (speed_noop, NULL) / speed_cycletime); 1356 printf (", precision %d units of %.2e secs", 1357 speed_precision, speed_unittime); 1358 1359 if (speed_cycletime == 1.0 || speed_cycletime == 0.0) 1360 printf (", CPU freq unknown\n"); 1361 else 1362 printf (", CPU freq %.2f MHz\n", 1e-6/speed_cycletime); 1363 1364 printf (" "); 1365 for (i = 0; i < num_choices; i++) 1366 printf (" %*s", COLUMN_WIDTH, choice[i].name); 1367 printf ("\n"); 1368 1369 run_all (stdout); 1370 } 1371 1372 if (option_resource_usage) 1373 { 1374#if HAVE_GETRUSAGE 1375 { 1376 /* This doesn't give data sizes on linux 2.0.x, only utime. */ 1377 struct rusage r; 1378 if (getrusage (RUSAGE_SELF, &r) != 0) 1379 perror ("getrusage"); 1380 else 1381 printf ("getrusage(): utime %ld.%06ld data %ld stack %ld maxresident %ld\n", 1382 (long) r.ru_utime.tv_sec, (long) r.ru_utime.tv_usec, 1383 r.ru_idrss, r.ru_isrss, r.ru_ixrss); 1384 } 1385#else 1386 printf ("getrusage() not available\n"); 1387#endif 1388 1389 /* Linux kernel. */ 1390 { 1391 char buf[128]; 1392 sprintf (buf, "/proc/%d/status", getpid()); 1393 if (access (buf, R_OK) == 0) 1394 { 1395 sprintf (buf, "cat /proc/%d/status", getpid()); 1396 system (buf); 1397 } 1398 1399 } 1400 } 1401 1402 return 0; 1403} 1404