tuneup.c revision 1.1.1.4
1/* Tune various threshold of MPFR 2 3Copyright 2005-2020 Free Software Foundation, Inc. 4Contributed by the AriC and Caramba projects, INRIA. 5 6This file is part of the GNU MPFR Library. 7 8The GNU MPFR Library is free software; you can redistribute it and/or modify 9it under the terms of the GNU Lesser General Public License as published by 10the Free Software Foundation; either version 3 of the License, or (at your 11option) any later version. 12 13The GNU MPFR Library is distributed in the hope that it will be useful, but 14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16License for more details. 17 18You should have received a copy of the GNU Lesser General Public License 19along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see 20https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc., 2151 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ 22 23#include <stdlib.h> 24#include <time.h> 25 26#define MPFR_NEED_LONGLONG_H 27#include "mpfr-impl.h" 28 29#undef _PROTO 30#define _PROTO __GMP_PROTO 31#include "speed.h" 32 33/* Undefine static assertion system */ 34#undef MPFR_STAT_STATIC_ASSERT 35#define MPFR_STAT_STATIC_ASSERT(a) MPFR_ASSERTN(a) 36 37int verbose; 38 39/* template for an unary function */ 40/* s->size: precision of both input and output 41 s->xp : Mantissa of first input 42 s->yp : mantissa of second input */ 43#define SPEED_MPFR_FUNC(mean_fun) \ 44 do \ 45 { \ 46 unsigned i; \ 47 mpfr_limb_ptr wp; \ 48 double t; \ 49 mpfr_t w, x; \ 50 mp_size_t size; \ 51 MPFR_TMP_DECL (marker); \ 52 \ 53 SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \ 54 SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \ 55 MPFR_TMP_MARK (marker); \ 56 \ 57 size = (s->size-1)/GMP_NUMB_BITS+1; \ 58 s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \ 59 MPFR_TMP_INIT1 (s->xp, x, s->size); \ 60 MPFR_SET_EXP (x, 0); \ 61 \ 62 MPFR_TMP_INIT (wp, w, s->size, size); \ 63 \ 64 speed_operand_src (s, s->xp, size); \ 65 speed_operand_dst (s, wp, size); \ 66 speed_cache_fill (s); \ 67 \ 68 speed_starttime (); \ 69 i = s->reps; \ 70 do \ 71 mean_fun (w, x, MPFR_RNDN); \ 72 while (--i != 0); \ 73 t = speed_endtime (); \ 74 \ 75 MPFR_TMP_FREE (marker); \ 76 return t; \ 77 } \ 78 while (0) 79 80/* same as SPEED_MPFR_FUNC, but for say mpfr_sin_cos (y, z, x, r) */ 81#define SPEED_MPFR_FUNC2(mean_fun) \ 82 do \ 83 { \ 84 unsigned i; \ 85 mpfr_limb_ptr vp, wp; \ 86 double t; \ 87 mpfr_t v, w, x; \ 88 mp_size_t size; \ 89 MPFR_TMP_DECL (marker); \ 90 \ 91 SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \ 92 SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \ 93 MPFR_TMP_MARK (marker); \ 94 \ 95 size = (s->size-1)/GMP_NUMB_BITS+1; \ 96 s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \ 97 MPFR_TMP_INIT1 (s->xp, x, s->size); \ 98 MPFR_SET_EXP (x, 0); \ 99 \ 100 MPFR_TMP_INIT (vp, v, s->size, size); \ 101 MPFR_TMP_INIT (wp, w, s->size, size); \ 102 \ 103 speed_operand_src (s, s->xp, size); \ 104 speed_operand_dst (s, vp, size); \ 105 speed_operand_dst (s, wp, size); \ 106 speed_cache_fill (s); \ 107 \ 108 speed_starttime (); \ 109 i = s->reps; \ 110 do \ 111 mean_fun (v, w, x, MPFR_RNDN); \ 112 while (--i != 0); \ 113 t = speed_endtime (); \ 114 \ 115 MPFR_TMP_FREE (marker); \ 116 return t; \ 117 } \ 118 while (0) 119 120/* template for a function like mpfr_mul */ 121#define SPEED_MPFR_OP(mean_fun) \ 122 do \ 123 { \ 124 unsigned i; \ 125 mpfr_limb_ptr wp; \ 126 double t; \ 127 mpfr_t w, x, y; \ 128 mp_size_t size; \ 129 MPFR_TMP_DECL (marker); \ 130 \ 131 SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \ 132 SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \ 133 MPFR_TMP_MARK (marker); \ 134 \ 135 size = (s->size-1)/GMP_NUMB_BITS+1; \ 136 s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \ 137 MPFR_TMP_INIT1 (s->xp, x, s->size); \ 138 MPFR_SET_EXP (x, 0); \ 139 s->yp[size-1] |= MPFR_LIMB_HIGHBIT; \ 140 MPFR_TMP_INIT1 (s->yp, y, s->size); \ 141 MPFR_SET_EXP (y, 0); \ 142 \ 143 MPFR_TMP_INIT (wp, w, s->size, size); \ 144 \ 145 speed_operand_src (s, s->xp, size); \ 146 speed_operand_src (s, s->yp, size); \ 147 speed_operand_dst (s, wp, size); \ 148 speed_cache_fill (s); \ 149 \ 150 speed_starttime (); \ 151 i = s->reps; \ 152 do \ 153 mean_fun (w, x, y, MPFR_RNDN); \ 154 while (--i != 0); \ 155 t = speed_endtime (); \ 156 \ 157 MPFR_TMP_FREE (marker); \ 158 return t; \ 159 } \ 160 while (0) 161 162/* special template for mpfr_mul(a,b,b) */ 163#define SPEED_MPFR_SQR(mean_fun) \ 164 do \ 165 { \ 166 unsigned i; \ 167 mpfr_limb_ptr wp; \ 168 double t; \ 169 mpfr_t w, x; \ 170 mp_size_t size; \ 171 MPFR_TMP_DECL (marker); \ 172 \ 173 SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \ 174 SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \ 175 MPFR_TMP_MARK (marker); \ 176 \ 177 size = (s->size-1)/GMP_NUMB_BITS+1; \ 178 s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \ 179 MPFR_TMP_INIT1 (s->xp, x, s->size); \ 180 MPFR_SET_EXP (x, 0); \ 181 \ 182 MPFR_TMP_INIT (wp, w, s->size, size); \ 183 \ 184 speed_operand_src (s, s->xp, size); \ 185 speed_operand_dst (s, wp, size); \ 186 speed_cache_fill (s); \ 187 \ 188 speed_starttime (); \ 189 i = s->reps; \ 190 do \ 191 mean_fun (w, x, x, MPFR_RNDN); \ 192 while (--i != 0); \ 193 t = speed_endtime (); \ 194 \ 195 MPFR_TMP_FREE (marker); \ 196 return t; \ 197 } \ 198 while (0) 199 200/* s->size: precision of both input and output 201 s->xp : Mantissa of first input 202 s->r : exponent 203 s->align_xp : sign (1 means positive, 2 means negative) 204*/ 205#define SPEED_MPFR_FUNC_WITH_EXPONENT(mean_fun) \ 206 do \ 207 { \ 208 unsigned i; \ 209 mpfr_limb_ptr wp; \ 210 double t; \ 211 mpfr_t w, x; \ 212 mp_size_t size; \ 213 MPFR_TMP_DECL (marker); \ 214 \ 215 SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN); \ 216 SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX); \ 217 MPFR_TMP_MARK (marker); \ 218 \ 219 size = (s->size-1)/GMP_NUMB_BITS+1; \ 220 s->xp[size-1] |= MPFR_LIMB_HIGHBIT; \ 221 MPFR_TMP_INIT1 (s->xp, x, s->size); \ 222 MPFR_SET_EXP (x, s->r); \ 223 if (s->align_xp == 2) MPFR_SET_NEG (x); \ 224 \ 225 MPFR_TMP_INIT (wp, w, s->size, size); \ 226 \ 227 speed_operand_src (s, s->xp, size); \ 228 speed_operand_dst (s, wp, size); \ 229 speed_cache_fill (s); \ 230 \ 231 speed_starttime (); \ 232 i = s->reps; \ 233 do \ 234 mean_fun (w, x, MPFR_RNDN); \ 235 while (--i != 0); \ 236 t = speed_endtime (); \ 237 \ 238 MPFR_TMP_FREE (marker); \ 239 return t; \ 240 } \ 241 while (0) 242 243/* First we include all the functions we want to tune inside this program. 244 We can't use the GNU MPFR library since the thresholds are fixed macros. */ 245 246/* Setup mpfr_exp_2 */ 247mpfr_prec_t mpfr_exp_2_threshold; 248#undef MPFR_EXP_2_THRESHOLD 249#define MPFR_EXP_2_THRESHOLD mpfr_exp_2_threshold 250#include "exp_2.c" 251static double 252speed_mpfr_exp_2 (struct speed_params *s) 253{ 254 SPEED_MPFR_FUNC (mpfr_exp_2); 255} 256 257/* Setup mpfr_exp */ 258mpfr_prec_t mpfr_exp_threshold; 259#undef MPFR_EXP_THRESHOLD 260#define MPFR_EXP_THRESHOLD mpfr_exp_threshold 261#include "exp.c" 262static double 263speed_mpfr_exp (struct speed_params *s) 264{ 265 SPEED_MPFR_FUNC (mpfr_exp); 266} 267 268/* Setup mpfr_sin_cos */ 269mpfr_prec_t mpfr_sincos_threshold; 270#undef MPFR_SINCOS_THRESHOLD 271#define MPFR_SINCOS_THRESHOLD mpfr_sincos_threshold 272#include "sin_cos.c" 273#include "cos.c" 274static double 275speed_mpfr_sincos (struct speed_params *s) 276{ 277 SPEED_MPFR_FUNC2 (mpfr_sin_cos); 278} 279 280/* Setup mpfr_mul, mpfr_sqr and mpfr_div */ 281/* Since mpfr_mul() deals with both mul and sqr, and contains an assert that 282 the thresholds are >= 1, we initialize both values to 1 to avoid a failed 283 assertion (let's recall that static assertions are replaced by normal 284 dynamic assertions here). */ 285mpfr_prec_t mpfr_mul_threshold = 1; 286mpfr_prec_t mpfr_sqr_threshold = 1; 287mpfr_prec_t mpfr_div_threshold; 288#undef MPFR_MUL_THRESHOLD 289#define MPFR_MUL_THRESHOLD mpfr_mul_threshold 290#undef MPFR_SQR_THRESHOLD 291#define MPFR_SQR_THRESHOLD mpfr_sqr_threshold 292#undef MPFR_DIV_THRESHOLD 293#define MPFR_DIV_THRESHOLD mpfr_div_threshold 294#include "mul.c" 295#include "div.c" 296static double 297speed_mpfr_mul (struct speed_params *s) 298{ 299 SPEED_MPFR_OP (mpfr_mul); 300} 301static double 302speed_mpfr_sqr (struct speed_params *s) 303{ 304 SPEED_MPFR_SQR (mpfr_mul); 305} 306static double 307speed_mpfr_div (struct speed_params *s) 308{ 309 SPEED_MPFR_OP (mpfr_div); 310} 311 312/************************************************ 313 * Common functions (inspired by GMP function) * 314 ************************************************/ 315static int 316analyze_data (double *dat, int ndat) 317{ 318 double x, min_x; 319 int j, min_j; 320 321 x = 0.0; 322 for (j = 0; j < ndat; j++) 323 if (dat[j] > 0.0) 324 x += dat[j]; 325 326 min_x = x; 327 min_j = 0; 328 329 for (j = 0; j < ndat; x -= dat[j], j++) 330 { 331 if (x < min_x) 332 { 333 min_x = x; 334 min_j = j; 335 } 336 } 337 return min_j; 338} 339 340static double 341mpfr_speed_measure (speed_function_t fun, struct speed_params *s, char *m) 342{ 343 double t = -1.0; 344 int i; 345 int number_of_iterations = 30; 346 for (i = 1; i <= number_of_iterations && t == -1.0; i++) 347 { 348 t = speed_measure (fun, s); 349 if ( (t == -1.0) && (i+1 <= number_of_iterations) ) 350 printf("speed_measure failed for size %lu. Trying again... (%d/%d)\n", 351 s->size, i+1, number_of_iterations); 352 } 353 if (t == -1.0) 354 { 355 fprintf (stderr, "Failed to measure %s!\n", m); 356 fprintf (stderr, "If CPU frequency scaling is enabled, please disable it:\n"); 357 fprintf (stderr, " under Linux: cpufreq-selector -g performance\n"); 358 fprintf (stderr, "On a multi-core processor, you might also try to load all the cores\n"); 359 abort (); 360 } 361 return t; 362} 363 364#define THRESHOLD_WINDOW 16 365#define THRESHOLD_FINAL_WINDOW 128 366static double 367domeasure (mpfr_prec_t *threshold, 368 double (*func) (struct speed_params *), 369 mpfr_prec_t p) 370{ 371 struct speed_params s; 372 mp_size_t size; 373 double t1, t2, d; 374 375 s.align_xp = s.align_yp = s.align_wp = 64; 376 s.size = p; 377 size = (p - 1)/GMP_NUMB_BITS+1; 378 s.xp = malloc (2*size*sizeof (mp_limb_t)); 379 if (s.xp == NULL) 380 { 381 fprintf (stderr, "Can't allocate memory.\n"); 382 abort (); 383 } 384 mpn_random (s.xp, size); 385 s.yp = s.xp + size; 386 mpn_random (s.yp, size); 387 *threshold = MPFR_PREC_MAX; 388 t1 = mpfr_speed_measure (func, &s, "function 1"); 389 *threshold = 1; 390 t2 = mpfr_speed_measure (func, &s, "function 2"); 391 free (s.xp); 392 /* t1 is the time of the first algo (used for low prec) */ 393 if (t2 >= t1) 394 d = (t2 - t1) / t2; 395 else 396 d = (t2 - t1) / t1; 397 /* d > 0 if we have to use algo 1. 398 d < 0 if we have to use algo 2 */ 399 return d; 400} 401 402/* Performs measures when both the precision and the point of evaluation 403 shall vary. s.yp is ignored and not initialized. 404 It assumes that func depends on three thresholds with a boundary of the 405 form threshold1*x + threshold2*p = some scaling factor, if x<0, 406 and threshold3*x + threshold2*p = some scaling factor, if x>=0. 407*/ 408static double 409domeasure2 (long int *threshold1, long int *threshold2, long int *threshold3, 410 double (*func) (struct speed_params *), 411 mpfr_prec_t p, 412 mpfr_t x) 413{ 414 struct speed_params s; 415 mp_size_t size; 416 double t1, t2, d; 417 mpfr_t xtmp; 418 419 if (MPFR_IS_SINGULAR (x)) 420 { 421 mpfr_fprintf (stderr, "x=%RNf is not a regular number.\n"); 422 abort (); 423 } 424 if (MPFR_IS_NEG (x)) 425 s.align_xp = 2; 426 else 427 s.align_xp = 1; 428 429 s.align_yp = s.align_wp = 64; 430 s.size = p; 431 size = (p - 1)/GMP_NUMB_BITS+1; 432 433 mpfr_init2 (xtmp, p); 434 mpn_random (xtmp->_mpfr_d, size); 435 xtmp->_mpfr_d[size-1] |= MPFR_LIMB_HIGHBIT; 436 MPFR_SET_EXP (xtmp, -53); 437 mpfr_add_ui (xtmp, xtmp, 1, MPFR_RNDN); 438 mpfr_mul (xtmp, xtmp, x, MPFR_RNDN); /* xtmp = x*(1+perturb) */ 439 /* where perturb ~ 2^(-53) is */ 440 /* randomly chosen. */ 441 s.xp = xtmp->_mpfr_d; 442 s.r = MPFR_GET_EXP (xtmp); 443 444 *threshold1 = 0; 445 *threshold2 = 0; 446 *threshold3 = 0; 447 t1 = mpfr_speed_measure (func, &s, "function 1"); 448 449 if (MPFR_IS_NEG (x)) 450 *threshold1 = INT_MIN; 451 else 452 *threshold3 = INT_MAX; 453 *threshold2 = INT_MAX; 454 t2 = mpfr_speed_measure (func, &s, "function 2"); 455 456 /* t1 is the time of the first algo (used for low prec) */ 457 if (t2 >= t1) 458 d = (t2 - t1) / t2; 459 else 460 d = (t2 - t1) / t1; 461 /* d > 0 if we have to use algo 1. 462 d < 0 if we have to use algo 2 */ 463 mpfr_clear (xtmp); 464 return d; 465} 466 467/* Tune a function with a simple THRESHOLD 468 The function doesn't depend on another threshold. 469 It assumes that it uses algo1 if p < THRESHOLD 470 and algo2 otherwise. 471 if algo2 is better for low prec, and algo1 better for high prec, 472 the behaviour of this function is undefined. */ 473static void 474tune_simple_func (mpfr_prec_t *threshold, 475 double (*func) (struct speed_params *), 476 mpfr_prec_t pstart) 477{ 478 double measure[THRESHOLD_FINAL_WINDOW+1]; 479 double d = -1.0; 480 mpfr_prec_t pstep; 481 int i, numpos, numneg, try; 482 mpfr_prec_t pmin, pmax, p; 483 484 /* first look for a lower bound within 10% */ 485 pmin = p = pstart; 486 for (i = 0; i < 10 && d < 0.0; i++) 487 d = domeasure (threshold, func, pmin); 488 if (d < 0.0) 489 { 490 if (verbose) 491 printf ("Oops: even for precision %lu, algo 2 seems to be faster!\n", 492 (unsigned long) pmin); 493 *threshold = pmin; 494 return; 495 } 496 if (d >= 1.00) 497 for (;;) 498 { 499 d = domeasure (threshold, func, pmin); 500 if (d < 1.00) 501 break; 502 p = pmin; 503 pmin += pmin/2; 504 } 505 pmin = p; 506 for (;;) 507 { 508 d = domeasure (threshold, func, pmin); 509 if (d < 0.10) 510 break; 511 pmin += GMP_NUMB_BITS; 512 } 513 514 /* then look for an upper bound within 20% */ 515 pmax = pmin * 2; 516 for (;;) 517 { 518 d = domeasure (threshold, func, pmax); 519 if (d < -0.20) 520 break; 521 pmax += pmin / 2; /* don't increase too rapidly */ 522 } 523 524 /* The threshold is between pmin and pmax. Affine them */ 525 try = 0; 526 while ((pmax-pmin) >= THRESHOLD_FINAL_WINDOW) 527 { 528 pstep = MAX(MIN(GMP_NUMB_BITS/2,(pmax-pmin)/(2*THRESHOLD_WINDOW)),1); 529 if (verbose) 530 printf ("Pmin = %8lu Pmax = %8lu Pstep=%lu\n", pmin, pmax, pstep); 531 p = (pmin + pmax) / 2; 532 for (i = numpos = numneg = 0 ; i < THRESHOLD_WINDOW + 1 ; i++) 533 { 534 measure[i] = domeasure (threshold, func, 535 p+(i-THRESHOLD_WINDOW/2)*pstep); 536 if (measure[i] > 0) 537 numpos ++; 538 else if (measure[i] < 0) 539 numneg ++; 540 } 541 if (numpos > numneg) 542 /* We use more often algo 1 than algo 2 */ 543 pmin = p - THRESHOLD_WINDOW/2*pstep; 544 else if (numpos < numneg) 545 pmax = p + THRESHOLD_WINDOW/2*pstep; 546 else 547 /* numpos == numneg ... */ 548 if (++ try > 2) 549 { 550 *threshold = p; 551 if (verbose) 552 printf ("Quick find: %lu\n", *threshold); 553 return ; 554 } 555 } 556 557 /* Final tune... */ 558 if (verbose) 559 printf ("Finalizing in [%lu, %lu]... ", pmin, pmax); 560 for (i = 0 ; i < THRESHOLD_FINAL_WINDOW+1 ; i++) 561 measure[i] = domeasure (threshold, func, pmin+i); 562 i = analyze_data (measure, THRESHOLD_FINAL_WINDOW+1); 563 *threshold = pmin + i; 564 if (verbose) 565 printf ("%lu\n", *threshold); 566 return; 567} 568 569/* Tune a function which behavior depends on both p and x, 570 in a given direction. 571 It assumes that for (x,p) close to zero, algo1 is used 572 and algo2 is used when (x,p) is far from zero. 573 If algo2 is better for low prec, and algo1 better for high prec, 574 the behavior of this function is undefined. 575 This tuning function tries couples (x,p) of the form (ell*dirx, ell*dirp) 576 until it finds a point on the boundary. It returns ell. 577 */ 578static void 579tune_simple_func_in_some_direction (long int *threshold1, 580 long int *threshold2, 581 long int *threshold3, 582 double (*func) (struct speed_params *), 583 mpfr_prec_t pstart, 584 int dirx, int dirp, 585 mpfr_t xres, mpfr_prec_t *pres) 586{ 587 double measure[THRESHOLD_FINAL_WINDOW+1]; 588 double d; 589 mpfr_prec_t pstep; 590 int i, numpos, numneg, try; 591 mpfr_prec_t pmin, pmax, p; 592 mpfr_t xmin, xmax, x; 593 mpfr_t ratio; 594 595 mpfr_init2 (ratio, MPFR_SMALL_PRECISION); 596 mpfr_set_si (ratio, dirx, MPFR_RNDN); 597 mpfr_div_si (ratio, ratio, dirp, MPFR_RNDN); 598 599 mpfr_init2 (xmin, MPFR_SMALL_PRECISION); 600 mpfr_init2 (xmax, MPFR_SMALL_PRECISION); 601 mpfr_init2 (x, MPFR_SMALL_PRECISION); 602 603 /* first look for a lower bound within 10% */ 604 pmin = p = pstart; 605 mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN); 606 mpfr_set (x, xmin, MPFR_RNDN); 607 608 d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin); 609 if (d < 0.0) 610 { 611 if (verbose) 612 printf ("Oops: even for %lu, algo 2 seems to be faster!\n", 613 (unsigned long) pmin); 614 *pres = MPFR_PREC_MIN; 615 mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN); 616 mpfr_clear (ratio); mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax); 617 return; 618 } 619 if (d >= 1.00) 620 for (;;) 621 { 622 d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin); 623 if (d < 1.00) 624 break; 625 p = pmin; 626 mpfr_set (x, xmin, MPFR_RNDN); 627 pmin += pmin/2; 628 mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN); 629 } 630 pmin = p; 631 mpfr_set (xmin, x, MPFR_RNDN); 632 for (;;) 633 { 634 d = domeasure2 (threshold1, threshold2, threshold3, func, pmin, xmin); 635 if (d < 0.10) 636 break; 637 pmin += GMP_NUMB_BITS; 638 mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN); 639 } 640 641 /* then look for an upper bound within 20% */ 642 pmax = pmin * 2; 643 mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN); 644 for (;;) 645 { 646 d = domeasure2 (threshold1, threshold2, threshold3, func, pmax, xmax); 647 if (d < -0.20) 648 break; 649 pmax += pmin / 2; /* don't increase too rapidly */ 650 mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN); 651 } 652 653 /* The threshold is between pmin and pmax. Affine them */ 654 try = 0; 655 while ((pmax-pmin) >= THRESHOLD_FINAL_WINDOW) 656 { 657 pstep = MAX(MIN(GMP_NUMB_BITS/2,(pmax-pmin)/(2*THRESHOLD_WINDOW)),1); 658 if (verbose) 659 printf ("Pmin = %8lu Pmax = %8lu Pstep=%lu\n", pmin, pmax, pstep); 660 p = (pmin + pmax) / 2; 661 mpfr_mul_ui (x, ratio, (unsigned int)p, MPFR_RNDN); 662 for (i = numpos = numneg = 0 ; i < THRESHOLD_WINDOW + 1 ; i++) 663 { 664 *pres = p+(i-THRESHOLD_WINDOW/2)*pstep; 665 mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN); 666 measure[i] = domeasure2 (threshold1, threshold2, threshold3, 667 func, *pres, xres); 668 if (measure[i] > 0) 669 numpos ++; 670 else if (measure[i] < 0) 671 numneg ++; 672 } 673 if (numpos > numneg) 674 { 675 /* We use more often algo 1 than algo 2 */ 676 pmin = p - THRESHOLD_WINDOW/2*pstep; 677 mpfr_mul_ui (xmin, ratio, (unsigned int)pmin, MPFR_RNDN); 678 } 679 else if (numpos < numneg) 680 { 681 pmax = p + THRESHOLD_WINDOW/2*pstep; 682 mpfr_mul_ui (xmax, ratio, (unsigned int)pmax, MPFR_RNDN); 683 } 684 else 685 /* numpos == numneg ... */ 686 if (++ try > 2) 687 { 688 *pres = p; 689 mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN); 690 if (verbose) 691 printf ("Quick find: %lu\n", *pres); 692 mpfr_clear (ratio); 693 mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax); 694 return ; 695 } 696 } 697 698 /* Final tune... */ 699 if (verbose) 700 printf ("Finalizing in [%lu, %lu]... ", pmin, pmax); 701 for (i = 0 ; i < THRESHOLD_FINAL_WINDOW+1 ; i++) 702 { 703 *pres = pmin+i; 704 mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN); 705 measure[i] = domeasure2 (threshold1, threshold2, threshold3, 706 func, *pres, xres); 707 } 708 i = analyze_data (measure, THRESHOLD_FINAL_WINDOW+1); 709 *pres = pmin + i; 710 mpfr_mul_ui (xres, ratio, (unsigned int)*pres, MPFR_RNDN); 711 if (verbose) 712 printf ("%lu\n", *pres); 713 mpfr_clear (ratio); mpfr_clear (x); mpfr_clear (xmin); mpfr_clear (xmax); 714 return; 715} 716 717/************************************ 718 * Tune Mulders' mulhigh function * 719 ************************************/ 720#define TOLERANCE 1.00 721#define MULDERS_TABLE_SIZE 1024 722#ifndef MPFR_MULHIGH_SIZE 723# define MPFR_MULHIGH_SIZE MULDERS_TABLE_SIZE 724#endif 725#ifndef MPFR_SQRHIGH_SIZE 726# define MPFR_SQRHIGH_SIZE MULDERS_TABLE_SIZE 727#endif 728#ifndef MPFR_DIVHIGH_SIZE 729# define MPFR_DIVHIGH_SIZE MULDERS_TABLE_SIZE 730#endif 731#define MPFR_MULHIGH_TAB_SIZE MPFR_MULHIGH_SIZE 732#define MPFR_SQRHIGH_TAB_SIZE MPFR_SQRHIGH_SIZE 733#define MPFR_DIVHIGH_TAB_SIZE MPFR_DIVHIGH_SIZE 734#include "mulders.c" 735 736static double 737speed_mpfr_mulhigh (struct speed_params *s) 738{ 739 SPEED_ROUTINE_MPN_MUL_N (mpfr_mulhigh_n); 740} 741 742static double 743speed_mpfr_sqrhigh (struct speed_params *s) 744{ 745 SPEED_ROUTINE_MPN_SQR (mpfr_sqrhigh_n); 746} 747 748static double 749speed_mpfr_divhigh (struct speed_params *s) 750{ 751 SPEED_ROUTINE_MPN_DC_DIVREM_CALL (mpfr_divhigh_n (q, a, d, s->size)); 752} 753 754#define MAX_STEPS 513 /* maximum number of values of k tried for a given n */ 755 756/* Tune mpfr_mulhigh_n for size n */ 757static mp_size_t 758tune_mul_mulders_upto (mp_size_t n) 759{ 760 struct speed_params s; 761 mp_size_t k, kbest, step; 762 double t, tbest; 763 MPFR_TMP_DECL (marker); 764 765 if (n == 0) 766 return -1; 767 768 MPFR_TMP_MARK (marker); 769 s.align_xp = s.align_yp = s.align_wp = 64; 770 s.size = n; 771 s.xp = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t)); 772 s.yp = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t)); 773 mpn_random (s.xp, n); 774 mpn_random (s.yp, n); 775 776 /* Check k == -1, mpn_mul_basecase */ 777 mulhigh_ktab[n] = -1; 778 kbest = -1; 779 tbest = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh"); 780 781 /* Check k == 0, mpn_mulhigh_n_basecase */ 782 mulhigh_ktab[n] = 0; 783 t = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh"); 784 if (t * TOLERANCE < tbest) 785 kbest = 0, tbest = t; 786 787 /* Check Mulders with cutoff point k */ 788 step = 1 + n / (2 * MAX_STEPS); 789 /* we need k >= (n+3)/2, which translates into k >= (n+4)/2 in C */ 790 for (k = (n + 4) / 2 ; k < n ; k += step) 791 { 792 mulhigh_ktab[n] = k; 793 t = mpfr_speed_measure (speed_mpfr_mulhigh, &s, "mpfr_mulhigh"); 794 if (t * TOLERANCE < tbest) 795 kbest = k, tbest = t; 796 } 797 798 mulhigh_ktab[n] = kbest; 799 800 MPFR_TMP_FREE (marker); 801 return kbest; 802} 803 804/* Tune mpfr_sqrhigh_n for size n */ 805static mp_size_t 806tune_sqr_mulders_upto (mp_size_t n) 807{ 808 struct speed_params s; 809 mp_size_t k, kbest, step; 810 double t, tbest; 811 MPFR_TMP_DECL (marker); 812 813 if (n == 0) 814 return -1; 815 816 MPFR_TMP_MARK (marker); 817 s.align_xp = s.align_wp = 64; 818 s.size = n; 819 s.xp = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t)); 820 mpn_random (s.xp, n); 821 822 /* Check k == -1, mpn_sqr_basecase */ 823 sqrhigh_ktab[n] = -1; 824 kbest = -1; 825 tbest = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh"); 826 827 /* Check k == 0, mpfr_mulhigh_n_basecase */ 828 sqrhigh_ktab[n] = 0; 829 t = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh"); 830 if (t * TOLERANCE < tbest) 831 kbest = 0, tbest = t; 832 833 /* Check Mulders */ 834 step = 1 + n / (2 * MAX_STEPS); 835 /* we need k >= (n+3)/2, which translates into k >= (n+4)/2 in C */ 836 for (k = (n + 4) / 2 ; k < n ; k += step) 837 { 838 sqrhigh_ktab[n] = k; 839 t = mpfr_speed_measure (speed_mpfr_sqrhigh, &s, "mpfr_sqrhigh"); 840 if (t * TOLERANCE < tbest) 841 kbest = k, tbest = t; 842 } 843 844 sqrhigh_ktab[n] = kbest; 845 846 MPFR_TMP_FREE (marker); 847 return kbest; 848} 849 850/* Tune mpfr_divhigh_n for size n. 851 Ensure divhigh_ktab[n] < n for n > 0. */ 852static mp_size_t 853tune_div_mulders_upto (mp_size_t n) 854{ 855 struct speed_params s; 856 mp_size_t k, kbest, step; 857 double t, tbest; 858 MPFR_TMP_DECL (marker); 859 860 /* we require n > 2 in mpfr_divhigh */ 861 if (n <= 2) 862 { 863 divhigh_ktab[n] = 0; 864 return 0; 865 } 866 867 MPFR_TMP_MARK (marker); 868 s.align_xp = s.align_yp = s.align_wp = s.align_wp2 = 64; 869 s.size = n; 870 s.xp = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t)); 871 s.yp = MPFR_TMP_ALLOC (n * sizeof (mp_limb_t)); 872 mpn_random (s.xp, n); 873 mpn_random (s.yp, n); 874 875 /* Check k == 0, i.e., mpfr_divhigh_n_basecase */ 876 kbest = 0; 877 tbest = mpfr_speed_measure (speed_mpfr_divhigh, &s, "mpfr_divhigh"); 878 879 /* Check Mulders */ 880 step = 1 + n / (2 * MAX_STEPS); 881 /* we should have (n+3)/2 <= k < n-1, which translates into 882 (n+4)/2 <= k < n-1 in C */ 883 for (k = (n + 4) / 2 ; k < n - 1; k += step) 884 { 885 divhigh_ktab[n] = k; 886 t = mpfr_speed_measure (speed_mpfr_divhigh, &s, "mpfr_divhigh"); 887 if (t * TOLERANCE < tbest) 888 kbest = k, tbest = t; 889 } 890 891 MPFR_ASSERTN(kbest < n); 892 divhigh_ktab[n] = kbest; 893 894 MPFR_TMP_FREE (marker); 895 896 return kbest; 897} 898 899static void 900tune_mul_mulders (FILE *f) 901{ 902 mp_size_t k; 903 904 if (verbose) 905 printf ("Tuning mpfr_mulhigh_n[%d]", (int) MPFR_MULHIGH_TAB_SIZE); 906 fprintf (f, "#define MPFR_MULHIGH_TAB \\\n "); 907 for (k = 0 ; k < MPFR_MULHIGH_TAB_SIZE ; k++) 908 { 909 fprintf (f, "%d", (int) tune_mul_mulders_upto (k)); 910 if (k != MPFR_MULHIGH_TAB_SIZE-1) 911 fputc (',', f); 912 if ((k+1) % 16 == 0) 913 fprintf (f, " \\\n "); 914 if (verbose) 915 putchar ('.'); 916 } 917 fprintf (f, " \n"); 918 if (verbose) 919 putchar ('\n'); 920} 921 922static void 923tune_sqr_mulders (FILE *f) 924{ 925 mp_size_t k; 926 927 if (verbose) 928 printf ("Tuning mpfr_sqrhigh_n[%d]", (int) MPFR_SQRHIGH_TAB_SIZE); 929 fprintf (f, "#define MPFR_SQRHIGH_TAB \\\n "); 930 for (k = 0 ; k < MPFR_SQRHIGH_TAB_SIZE ; k++) 931 { 932 fprintf (f, "%d", (int) tune_sqr_mulders_upto (k)); 933 if (k != MPFR_SQRHIGH_TAB_SIZE-1) 934 fputc (',', f); 935 if ((k+1) % 16 == 0) 936 fprintf (f, " \\\n "); 937 if (verbose) 938 putchar ('.'); 939 } 940 fprintf (f, " \n"); 941 if (verbose) 942 putchar ('\n'); 943} 944 945static void 946tune_div_mulders (FILE *f) 947{ 948 mp_size_t k; 949 950 if (verbose) 951 printf ("Tuning mpfr_divhigh_n[%d]", (int) MPFR_DIVHIGH_TAB_SIZE); 952 fprintf (f, "#define MPFR_DIVHIGH_TAB \\\n "); 953 for (k = 0 ; k < MPFR_DIVHIGH_TAB_SIZE ; k++) 954 { 955 fprintf (f, "%d", (int) tune_div_mulders_upto (k)); 956 if (k != MPFR_DIVHIGH_TAB_SIZE - 1) 957 fputc (',', f); 958 if ((k+1) % 16 == 0) 959 fprintf (f, " /*%zu-%zu*/ \\\n ", (size_t) k - 15, (size_t) k); 960 if (verbose) 961 putchar ('.'); 962 } 963 fprintf (f, " \n"); 964 if (verbose) 965 putchar ('\n'); 966} 967 968/******************************************************* 969 * Tuning functions for mpfr_ai * 970 *******************************************************/ 971 972long int mpfr_ai_threshold1; 973long int mpfr_ai_threshold2; 974long int mpfr_ai_threshold3; 975#undef MPFR_AI_THRESHOLD1 976#define MPFR_AI_THRESHOLD1 mpfr_ai_threshold1 977#undef MPFR_AI_THRESHOLD2 978#define MPFR_AI_THRESHOLD2 mpfr_ai_threshold2 979#undef MPFR_AI_THRESHOLD3 980#define MPFR_AI_THRESHOLD3 mpfr_ai_threshold3 981 982#include "ai.c" 983 984static double 985speed_mpfr_ai (struct speed_params *s) 986{ 987 SPEED_MPFR_FUNC_WITH_EXPONENT (mpfr_ai); 988} 989 990 991/******************************************************* 992 * Tune all the threshold of MPFR * 993 * Warning: tune the function in their dependent order!* 994 *******************************************************/ 995static void 996all (const char *filename) 997{ 998 FILE *f; 999 time_t start_time, end_time; 1000 struct tm *tp; 1001 mpfr_t x1, x2, x3, tmp1, tmp2; 1002 mpfr_prec_t p1, p2, p3; 1003 1004 f = fopen (filename, "w"); 1005 if (f == NULL) 1006 { 1007 fprintf (stderr, "Can't open file '%s' for writing.\n", filename); 1008 abort (); 1009 } 1010 1011 speed_time_init (); 1012 if (verbose) 1013 { 1014 printf ("Using: %s\n", speed_time_string); 1015 printf ("speed_precision %d", speed_precision); 1016 if (speed_unittime == 1.0) 1017 printf (", speed_unittime 1 cycle"); 1018 else 1019 printf (", speed_unittime %.2e secs", speed_unittime); 1020 if (speed_cycletime == 1.0 || speed_cycletime == 0.0) 1021 printf (", CPU freq unknown\n"); 1022 else 1023 printf (", CPU freq %.2f MHz\n\n", 1e-6/speed_cycletime); 1024 } 1025 1026 time (&start_time); 1027 tp = localtime (&start_time); 1028 fprintf (f, "/* Generated by MPFR's tuneup.c, %d-%02d-%02d, ", 1029 tp->tm_year+1900, tp->tm_mon+1, tp->tm_mday); 1030 1031#ifdef __ICC 1032 fprintf (f, "icc %d.%d.%d */\n", __ICC / 100, __ICC / 10 % 10, __ICC % 10); 1033#elif defined(__GNUC__) 1034#ifdef __GNUC_PATCHLEVEL__ 1035 fprintf (f, "gcc %d.%d.%d */\n", __GNUC__, __GNUC_MINOR__, 1036 __GNUC_PATCHLEVEL__); 1037#else 1038 fprintf (f, "gcc %d.%d */\n", __GNUC__, __GNUC_MINOR__); 1039#endif 1040#elif defined (__SUNPRO_C) 1041 fprintf (f, "Sun C %d.%d */\n", __SUNPRO_C / 0x100, __SUNPRO_C % 0x100); 1042#elif defined (__sgi) && defined (_COMPILER_VERSION) 1043 fprintf (f, "MIPSpro C %d.%d.%d */\n", 1044 _COMPILER_VERSION / 100, 1045 _COMPILER_VERSION / 10 % 10, 1046 _COMPILER_VERSION % 10); 1047#elif defined (__DECC) && defined (__DECC_VER) 1048 fprintf (f, "DEC C %d */\n", __DECC_VER); 1049#else 1050 fprintf (f, "system compiler */\n"); 1051#endif 1052 fprintf (f, "\n\n"); 1053 fprintf (f, "#ifndef MPFR_TUNE_CASE\n"); 1054 fprintf (f, "#define MPFR_TUNE_CASE \"src/mparam.h\"\n"); 1055 fprintf (f, "#endif\n\n"); 1056 1057 /* Tune mulhigh */ 1058 tune_mul_mulders (f); 1059 1060 /* Tune sqrhigh */ 1061 tune_sqr_mulders (f); 1062 1063 /* Tune divhigh */ 1064 tune_div_mulders (f); 1065 fflush (f); 1066 1067 /* Tune mpfr_mul (threshold is in limbs, but it doesn't matter too much) */ 1068 if (verbose) 1069 printf ("Tuning mpfr_mul...\n"); 1070 tune_simple_func (&mpfr_mul_threshold, speed_mpfr_mul, 1071 2*GMP_NUMB_BITS+1); 1072 fprintf (f, "#define MPFR_MUL_THRESHOLD %lu /* limbs */\n", 1073 (unsigned long) (mpfr_mul_threshold - 1) / GMP_NUMB_BITS + 1); 1074 1075 /* Tune mpfr_sqr (threshold is in limbs, but it doesn't matter too much) */ 1076 if (verbose) 1077 printf ("Tuning mpfr_sqr...\n"); 1078 tune_simple_func (&mpfr_sqr_threshold, speed_mpfr_sqr, 1079 2*GMP_NUMB_BITS+1); 1080 fprintf (f, "#define MPFR_SQR_THRESHOLD %lu /* limbs */\n", 1081 (unsigned long) (mpfr_sqr_threshold - 1) / GMP_NUMB_BITS + 1); 1082 1083 /* Tune mpfr_div (threshold is in limbs, but it doesn't matter too much) */ 1084 if (verbose) 1085 printf ("Tuning mpfr_div...\n"); 1086 tune_simple_func (&mpfr_div_threshold, speed_mpfr_div, 1087 2*GMP_NUMB_BITS+1); 1088 fprintf (f, "#define MPFR_DIV_THRESHOLD %lu /* limbs */\n", 1089 (unsigned long) (mpfr_div_threshold - 1) / GMP_NUMB_BITS + 1); 1090 1091 /* Tune mpfr_exp_2 */ 1092 if (verbose) 1093 printf ("Tuning mpfr_exp_2...\n"); 1094 tune_simple_func (&mpfr_exp_2_threshold, speed_mpfr_exp_2, GMP_NUMB_BITS); 1095 fprintf (f, "#define MPFR_EXP_2_THRESHOLD %lu /* bits */\n", 1096 (unsigned long) mpfr_exp_2_threshold); 1097 1098 /* Tune mpfr_exp */ 1099 if (verbose) 1100 printf ("Tuning mpfr_exp...\n"); 1101 tune_simple_func (&mpfr_exp_threshold, speed_mpfr_exp, 1102 MPFR_PREC_MIN+3*GMP_NUMB_BITS); 1103 fprintf (f, "#define MPFR_EXP_THRESHOLD %lu /* bits */\n", 1104 (unsigned long) mpfr_exp_threshold); 1105 1106 /* Tune mpfr_sin_cos */ 1107 if (verbose) 1108 printf ("Tuning mpfr_sin_cos...\n"); 1109 tune_simple_func (&mpfr_sincos_threshold, speed_mpfr_sincos, 1110 MPFR_PREC_MIN+3*GMP_NUMB_BITS); 1111 fprintf (f, "#define MPFR_SINCOS_THRESHOLD %lu /* bits */\n", 1112 (unsigned long) mpfr_sincos_threshold); 1113 1114 /* Tune mpfr_ai */ 1115 if (verbose) 1116 printf ("Tuning mpfr_ai...\n"); 1117 mpfr_init2 (x1, MPFR_SMALL_PRECISION); 1118 mpfr_init2 (x2, MPFR_SMALL_PRECISION); 1119 mpfr_init2 (x3, MPFR_SMALL_PRECISION); 1120 mpfr_init2 (tmp1, MPFR_SMALL_PRECISION); 1121 mpfr_init2 (tmp2, MPFR_SMALL_PRECISION); 1122 1123 tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2, 1124 &mpfr_ai_threshold3, speed_mpfr_ai, 1125 MPFR_PREC_MIN+GMP_NUMB_BITS, 1126 -60, 200, x1, &p1); 1127 tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2, 1128 &mpfr_ai_threshold3, speed_mpfr_ai, 1129 MPFR_PREC_MIN+GMP_NUMB_BITS, 1130 -20, 500, x2, &p2); 1131 tune_simple_func_in_some_direction (&mpfr_ai_threshold1, &mpfr_ai_threshold2, 1132 &mpfr_ai_threshold3, speed_mpfr_ai, 1133 MPFR_PREC_MIN+GMP_NUMB_BITS, 1134 40, 200, x3, &p3); 1135 1136 mpfr_mul_ui (tmp1, x2, (unsigned long)p1, MPFR_RNDN); 1137 mpfr_mul_ui (tmp2, x1, (unsigned long)p2, MPFR_RNDN); 1138 mpfr_sub (tmp1, tmp1, tmp2, MPFR_RNDN); 1139 mpfr_div_ui (tmp1, tmp1, MPFR_AI_SCALE, MPFR_RNDN); 1140 1141 mpfr_set_ui (tmp2, (unsigned long)p1, MPFR_RNDN); 1142 mpfr_sub_ui (tmp2, tmp2, (unsigned long)p2, MPFR_RNDN); 1143 mpfr_div (tmp2, tmp2, tmp1, MPFR_RNDN); 1144 mpfr_ai_threshold1 = mpfr_get_si (tmp2, MPFR_RNDN); 1145 1146 mpfr_sub (tmp2, x2, x1, MPFR_RNDN); 1147 mpfr_div (tmp2, tmp2, tmp1, MPFR_RNDN); 1148 mpfr_ai_threshold2 = mpfr_get_si (tmp2, MPFR_RNDN); 1149 1150 mpfr_set_ui (tmp1, (unsigned long)p3, MPFR_RNDN); 1151 mpfr_mul_si (tmp1, tmp1, mpfr_ai_threshold2, MPFR_RNDN); 1152 mpfr_ui_sub (tmp1, MPFR_AI_SCALE, tmp1, MPFR_RNDN); 1153 mpfr_div (tmp1, tmp1, x3, MPFR_RNDN); 1154 mpfr_ai_threshold3 = mpfr_get_si (tmp1, MPFR_RNDN); 1155 1156 fprintf (f, "#define MPFR_AI_THRESHOLD1 %ld /* threshold for negative input of mpfr_ai */\n", mpfr_ai_threshold1); 1157 fprintf (f, "#define MPFR_AI_THRESHOLD2 %ld\n", mpfr_ai_threshold2); 1158 fprintf (f, "#define MPFR_AI_THRESHOLD3 %ld\n", mpfr_ai_threshold3); 1159 1160 mpfr_clear (x1); mpfr_clear (x2); mpfr_clear (x3); 1161 mpfr_clear (tmp1); mpfr_clear (tmp2); 1162 1163 /* End of tuning */ 1164 time (&end_time); 1165 fprintf (f, "/* Tuneup completed successfully, took %ld seconds */\n", 1166 (long) (end_time - start_time)); 1167 if (verbose) 1168 printf ("Complete (took %ld seconds).\n", (long) (end_time - start_time)); 1169 1170 fclose (f); 1171} 1172 1173 1174/* Main function */ 1175int main (int argc, char *argv[]) 1176{ 1177 /* Unbuffered so if output is redirected to a file it isn't lost if the 1178 program is killed part way through. */ 1179 setbuf (stdout, NULL); 1180 setbuf (stderr, NULL); 1181 1182 verbose = argc > 1; 1183 1184 if (verbose) 1185 printf ("Tuning MPFR (Coffee time?)...\n"); 1186 1187 all ("mparam.h"); 1188 1189 return 0; 1190} 1191