1/* [Description] 2 3Copyright 2010-2023 Free Software Foundation, Inc. 4Contributed by the AriC and Caramba projects, INRIA. 5 6This file is part of the GNU MPFR Library. 7 8The GNU MPFR Library is free software; you can redistribute it and/or modify 9it under the terms of the GNU Lesser General Public License as published by 10the Free Software Foundation; either version 3 of the License, or (at your 11option) any later version. 12 13The GNU MPFR Library is distributed in the hope that it will be useful, but 14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16License for more details. 17 18You should have received a copy of the GNU Lesser General Public License 19along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 20https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 2151 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23#include <stdlib.h> 24#include <time.h> 25 26#define MPFR_NEED_LONGLONG_H 27#include "mpfr-impl.h" 28 29#undef _PROTO 30#define _PROTO __GMP_PROTO 31#include "speed.h" 32 33/* Let f be a function for which we have several implementations f1, f2... */ 34/* We wish to have a quick overview of which implementation is the best */ 35/* in function of the point x where f(x) is computed and of the precision */ 36/* prec requested by the user. */ 37/* This is performed by drawing a 2D graphic with color indicating which */ 38/* method is the best. */ 39/* For building this graphic, the following structur is used (see the core */ 40/* of generate_2D_sample for an explanation of each field. */ 41struct speed_params2D 42{ 43 /* x-window: [min_x, max_x] or [2^min_x, 2^max_x] */ 44 /* or [-2^(max_x), -2^(min_x)] U [2^min_x, 2^max_x] */ 45 /* depending on the value of logarithmic_scale_x */ 46 double min_x; 47 double max_x; 48 49 /* prec-window: [min_prec, max_prec] */ 50 mpfr_prec_t min_prec; 51 mpfr_prec_t max_prec; 52 53 /* number of sample points for the x-axis and the prec-axis */ 54 int nb_points_x; 55 int nb_points_prec; 56 57 /* should the sample points be logarithmically scaled or not */ 58 int logarithmic_scale_x; 59 int logarithmic_scale_prec; 60 61 /* list of functions g1, g2... measuring the execution time of f1, f2... */ 62 /* when given a point x and a precision prec stored in s. */ 63 /* We use s->xp to store the significant of x, s->r to store its exponent */ 64 /* s->align_xp to store its sign, and s->size to store prec. */ 65 double (**speed_funcs) (struct speed_params *s); 66}; 67 68/* Given an array t of nb_functions double indicating the timings of several */ 69/* implementations, return i, such that t[i] is the best timing. */ 70int 71find_best (double *t, int nb_functions) 72{ 73 int i, ibest; 74 double best; 75 76 if (nb_functions<=0) 77 { 78 fprintf (stderr, "There is no function\n"); 79 abort (); 80 } 81 82 ibest = 0; 83 best = t[0]; 84 for (i=1; i<nb_functions; i++) 85 { 86 if (t[i]<best) 87 { 88 best = t[i]; 89 ibest = i; 90 } 91 } 92 93 return ibest; 94} 95 96void generate_2D_sample (FILE *output, struct speed_params2D param) 97{ 98 mpfr_t temp; 99 double incr_prec; 100 mpfr_t incr_x; 101 mpfr_t x, x2; 102 double prec; 103 struct speed_params s; 104 int i; 105 int test; 106 int nb_functions; 107 double *t; /* store the timing of each implementation */ 108 109 /* We first determine how many implementations we have */ 110 nb_functions = 0; 111 while (param.speed_funcs[nb_functions] != NULL) 112 nb_functions++; 113 114 t = malloc (nb_functions * sizeof (double)); 115 if (t == NULL) 116 { 117 fprintf (stderr, "Can't allocate memory.\n"); 118 abort (); 119 } 120 121 122 mpfr_init2 (temp, MPFR_SMALL_PRECISION); 123 124 /* The precision is sampled from min_prec to max_prec with */ 125 /* approximately nb_points_prec points. If logarithmic_scale_prec */ 126 /* is true, the precision is multiplied by incr_prec at each */ 127 /* step. Otherwise, incr_prec is added at each step. */ 128 if (param.logarithmic_scale_prec) 129 { 130 mpfr_set_ui (temp, (unsigned long int)param.max_prec, MPFR_RNDU); 131 mpfr_div_ui (temp, temp, (unsigned long int)param.min_prec, MPFR_RNDU); 132 mpfr_root (temp, temp, 133 (unsigned long int)param.nb_points_prec, MPFR_RNDU); 134 incr_prec = mpfr_get_d (temp, MPFR_RNDU); 135 } 136 else 137 { 138 incr_prec = (double)param.max_prec - (double)param.min_prec; 139 incr_prec = incr_prec/((double)param.nb_points_prec); 140 } 141 142 /* The points x are sampled according to the following rule: */ 143 /* If logarithmic_scale_x = 0: */ 144 /* nb_points_x points are equally distributed between min_x and max_x */ 145 /* If logarithmic_scale_x = 1: */ 146 /* nb_points_x points are sampled from 2^(min_x) to 2^(max_x). At */ 147 /* each step, the current point is multiplied by incr_x. */ 148 /* If logarithmic_scale_x = -1: */ 149 /* nb_points_x/2 points are sampled from -2^(max_x) to -2^(min_x) */ 150 /* (at each step, the current point is divided by incr_x); and */ 151 /* nb_points_x/2 points are sampled from 2^(min_x) to 2^(max_x) */ 152 /* (at each step, the current point is multiplied by incr_x). */ 153 mpfr_init2 (incr_x, param.max_prec); 154 if (param.logarithmic_scale_x == 0) 155 { 156 mpfr_set_d (incr_x, 157 (param.max_x - param.min_x)/(double)param.nb_points_x, 158 MPFR_RNDU); 159 } 160 else if (param.logarithmic_scale_x == -1) 161 { 162 mpfr_set_d (incr_x, 163 2.*(param.max_x - param.min_x)/(double)param.nb_points_x, 164 MPFR_RNDU); 165 mpfr_exp2 (incr_x, incr_x, MPFR_RNDU); 166 } 167 else 168 { /* other values of param.logarithmic_scale_x are considered as 1 */ 169 mpfr_set_d (incr_x, 170 (param.max_x - param.min_x)/(double)param.nb_points_x, 171 MPFR_RNDU); 172 mpfr_exp2 (incr_x, incr_x, MPFR_RNDU); 173 } 174 175 /* Main loop */ 176 mpfr_init2 (x, param.max_prec); 177 mpfr_init2 (x2, param.max_prec); 178 prec = (double)param.min_prec; 179 while (prec <= param.max_prec) 180 { 181 printf ("prec = %d\n", (int)prec); 182 if (param.logarithmic_scale_x == 0) 183 mpfr_set_d (temp, param.min_x, MPFR_RNDU); 184 else if (param.logarithmic_scale_x == -1) 185 { 186 mpfr_set_d (temp, param.max_x, MPFR_RNDD); 187 mpfr_exp2 (temp, temp, MPFR_RNDD); 188 mpfr_neg (temp, temp, MPFR_RNDU); 189 } 190 else 191 { 192 mpfr_set_d (temp, param.min_x, MPFR_RNDD); 193 mpfr_exp2 (temp, temp, MPFR_RNDD); 194 } 195 196 /* We perturb x a little bit, in order to avoid trailing zeros that */ 197 /* might change the behavior of algorithms. */ 198 mpfr_const_pi (x, MPFR_RNDN); 199 mpfr_div_2ui (x, x, 7, MPFR_RNDN); 200 mpfr_add_ui (x, x, 1, MPFR_RNDN); 201 mpfr_mul (x, x, temp, MPFR_RNDN); 202 203 test = 1; 204 while (test) 205 { 206 mpfr_fprintf (output, "%e\t", mpfr_get_d (x, MPFR_RNDN)); 207 mpfr_fprintf (output, "%Pu\t", (mpfr_prec_t)prec); 208 209 s.r = (mp_limb_t)mpfr_get_exp (x); 210 s.size = (mpfr_prec_t)prec; 211 s.align_xp = (mpfr_sgn (x) > 0)?1:2; 212 mpfr_set_prec (x2, (mpfr_prec_t)prec); 213 mpfr_set (x2, x, MPFR_RNDU); 214 s.xp = x2->_mpfr_d; 215 216 for (i=0; i<nb_functions; i++) 217 { 218 t[i] = speed_measure (param.speed_funcs[i], &s); 219 mpfr_fprintf (output, "%e\t", t[i]); 220 } 221 fprintf (output, "%d\n", 1 + find_best (t, nb_functions)); 222 223 if (param.logarithmic_scale_x == 0) 224 { 225 mpfr_add (x, x, incr_x, MPFR_RNDU); 226 if (mpfr_cmp_d (x, param.max_x) > 0) 227 test=0; 228 } 229 else 230 { 231 if (mpfr_sgn (x) < 0 ) 232 { /* if x<0, it means that logarithmic_scale_x=-1 */ 233 mpfr_div (x, x, incr_x, MPFR_RNDU); 234 mpfr_abs (temp, x, MPFR_RNDD); 235 mpfr_log2 (temp, temp, MPFR_RNDD); 236 if (mpfr_cmp_d (temp, param.min_x) < 0) 237 mpfr_neg (x, x, MPFR_RNDN); 238 } 239 else 240 { 241 mpfr_mul (x, x, incr_x, MPFR_RNDU); 242 mpfr_set (temp, x, MPFR_RNDD); 243 mpfr_log2 (temp, temp, MPFR_RNDD); 244 if (mpfr_cmp_d (temp, param.max_x) > 0) 245 test=0; 246 } 247 } 248 } 249 250 prec = ( (param.logarithmic_scale_prec) ? (prec * incr_prec) 251 : (prec + incr_prec) ); 252 fprintf (output, "\n"); 253 } 254 255 free (t); 256 mpfr_clear (incr_x); 257 mpfr_clear (x); 258 mpfr_clear (x2); 259 mpfr_clear (temp); 260 261 return; 262} 263 264#define SPEED_MPFR_FUNC_2D(mean_func) \ 265 do \ 266 { \ 267 double t; \ 268 unsigned i; \ 269 mpfr_t w, x; \ 270 mp_size_t size; \ 271 \ 272 SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \ 273 SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \ 274 \ 275 size = (s->size-1)/GMP_NUMB_BITS+1; \ 276 s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \ 277 MPFR_TMP_INIT1 (s->xp, x, s->size); \ 278 MPFR_SET_EXP (x, (mpfr_exp_t) s->r); \ 279 if (s->align_xp == 2) MPFR_SET_NEG (x); \ 280 \ 281 mpfr_init2 (w, s->size); \ 282 speed_starttime (); \ 283 i = s->reps; \ 284 \ 285 do \ 286 mean_func (w, x, MPFR_RNDN); \ 287 while (--i != 0); \ 288 t = speed_endtime (); \ 289 \ 290 mpfr_clear (w); \ 291 return t; \ 292 } \ 293 while (0) 294 295mpfr_prec_t mpfr_exp_2_threshold; 296mpfr_prec_t old_threshold = MPFR_EXP_2_THRESHOLD; 297#undef MPFR_EXP_2_THRESHOLD 298#define MPFR_EXP_2_THRESHOLD mpfr_exp_2_threshold 299#include "exp_2.c" 300 301double 302timing_exp1 (struct speed_params *s) 303{ 304 mpfr_exp_2_threshold = s->size+1; 305 SPEED_MPFR_FUNC_2D (mpfr_exp_2); 306} 307 308double 309timing_exp2 (struct speed_params *s) 310{ 311 mpfr_exp_2_threshold = s->size-1; 312 SPEED_MPFR_FUNC_2D (mpfr_exp_2); 313} 314 315double 316timing_exp3 (struct speed_params *s) 317{ 318 SPEED_MPFR_FUNC_2D (mpfr_exp_3); 319} 320 321 322#include "ai.c" 323double 324timing_ai1 (struct speed_params *s) 325{ 326 SPEED_MPFR_FUNC_2D (mpfr_ai1); 327} 328 329double 330timing_ai2 (struct speed_params *s) 331{ 332 SPEED_MPFR_FUNC_2D (mpfr_ai2); 333} 334 335/* These functions are for testing purpose only */ 336/* They are used to draw which method is actually used */ 337double 338virtual_timing_ai1 (struct speed_params *s) 339{ 340 double t; 341 unsigned i; 342 mpfr_t w, x; 343 mp_size_t size; 344 mpfr_t temp1, temp2; 345 346 SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); 347 SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); 348 349 size = (s->size-1)/GMP_NUMB_BITS+1; 350 s->xp[size-1] |= MPFR_LIMB_HIGHBIT; 351 MPFR_TMP_INIT1 (s->xp, x, s->size); 352 MPFR_SET_EXP (x, (mpfr_exp_t) s->r); 353 if (s->align_xp == 2) MPFR_SET_NEG (x); 354 355 mpfr_init2 (w, s->size); 356 speed_starttime (); 357 i = s->reps; 358 359 mpfr_init2 (temp1, MPFR_SMALL_PRECISION); 360 mpfr_init2 (temp2, MPFR_SMALL_PRECISION); 361 362 mpfr_set (temp1, x, MPFR_SMALL_PRECISION); 363 mpfr_set_si (temp2, MPFR_AI_THRESHOLD2, MPFR_RNDN); 364 mpfr_mul_ui (temp2, temp2, (unsigned int)MPFR_PREC (w), MPFR_RNDN); 365 366 if (MPFR_IS_NEG (x)) 367 mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD1, MPFR_RNDN); 368 else 369 mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD3, MPFR_RNDN); 370 371 mpfr_add (temp1, temp1, temp2, MPFR_RNDN); 372 373 if (mpfr_cmp_si (temp1, MPFR_AI_SCALE) > 0) 374 t = 1000.; 375 else 376 t = 1.; 377 378 mpfr_clear (temp1); 379 mpfr_clear (temp2); 380 381 return t; 382} 383 384double 385virtual_timing_ai2 (struct speed_params *s) 386{ 387 double t; 388 unsigned i; 389 mpfr_t w, x; 390 mp_size_t size; 391 mpfr_t temp1, temp2; 392 393 SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); 394 SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); 395 396 size = (s->size-1)/GMP_NUMB_BITS+1; 397 s->xp[size-1] |= MPFR_LIMB_HIGHBIT; 398 MPFR_TMP_INIT1 (s->xp, x, s->size); 399 MPFR_SET_EXP (x, (mpfr_exp_t) s->r); 400 if (s->align_xp == 2) MPFR_SET_NEG (x); 401 402 mpfr_init2 (w, s->size); 403 speed_starttime (); 404 i = s->reps; 405 406 mpfr_init2 (temp1, MPFR_SMALL_PRECISION); 407 mpfr_init2 (temp2, MPFR_SMALL_PRECISION); 408 409 mpfr_set (temp1, x, MPFR_SMALL_PRECISION); 410 mpfr_set_si (temp2, MPFR_AI_THRESHOLD2, MPFR_RNDN); 411 mpfr_mul_ui (temp2, temp2, (unsigned int)MPFR_PREC (w), MPFR_RNDN); 412 413 if (MPFR_IS_NEG (x)) 414 mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD1, MPFR_RNDN); 415 else 416 mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD3, MPFR_RNDN); 417 418 mpfr_add (temp1, temp1, temp2, MPFR_RNDN); 419 420 if (mpfr_cmp_si (temp1, MPFR_AI_SCALE) > 0) 421 t = 1.; 422 else 423 t = 1000.; 424 425 mpfr_clear (temp1); 426 mpfr_clear (temp2); 427 428 return t; 429} 430 431int 432main (void) 433{ 434 FILE *output; 435 struct speed_params2D param; 436 double (*speed_funcs[3]) (struct speed_params *s); 437 438 /* char filename[256] = "virtual_timing_ai.dat"; */ 439 /* speed_funcs[0] = virtual_timing_ai1; */ 440 /* speed_funcs[1] = virtual_timing_ai2; */ 441 442 char filename[256] = "airy.dat"; 443 speed_funcs[0] = timing_ai1; 444 speed_funcs[1] = timing_ai2; 445 446 speed_funcs[2] = NULL; 447 output = fopen (filename, "w"); 448 if (output == NULL) 449 { 450 fprintf (stderr, "Can't open file '%s' for writing.\n", filename); 451 abort (); 452 } 453 param.min_x = -80; 454 param.max_x = 60; 455 param.min_prec = 50; 456 param.max_prec = 1500; 457 param.nb_points_x = 200; 458 param.nb_points_prec = 200; 459 param.logarithmic_scale_x = 0; 460 param.logarithmic_scale_prec = 0; 461 param.speed_funcs = speed_funcs; 462 463 generate_2D_sample (output, param); 464 465 fclose (output); 466 mpfr_free_cache (); 467 return 0; 468} 469