1/* Run some tests on various mpn routines. 2 3 THIS IS A TEST PROGRAM USED ONLY FOR DEVELOPMENT. IT'S ALMOST CERTAIN TO 4 BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE VERSIONS OF GMP. 5 6Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2008, 2009 Free Software 7Foundation, Inc. 8 9This file is part of the GNU MP Library. 10 11The GNU MP Library is free software; you can redistribute it and/or modify 12it under the terms of the GNU Lesser General Public License as published by 13the Free Software Foundation; either version 3 of the License, or (at your 14option) any later version. 15 16The GNU MP Library is distributed in the hope that it will be useful, but 17WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 18or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 19License for more details. 20 21You should have received a copy of the GNU Lesser General Public License 22along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 23 24 25/* Usage: try [options] <function>... 26 27 For example, "./try mpn_add_n" to run tests of that function. 28 29 Combinations of alignments and overlaps are tested, with redzones above 30 or below the destinations, and with the sources write-protected. 31 32 The number of tests performed becomes ridiculously large with all the 33 combinations, and for that reason this can't be a part of a "make check", 34 it's meant only for development. The code isn't very pretty either. 35 36 During development it can help to disable the redzones, since seeing the 37 rest of the destination written can show where the wrong part is, or if 38 the dst pointers are off by 1 or whatever. The magic DEADVAL initial 39 fill (see below) will show locations never written. 40 41 The -s option can be used to test only certain size operands, which is 42 useful if some new code doesn't yet support say sizes less than the 43 unrolling, or whatever. 44 45 When a problem occurs it'll of course be necessary to run the program 46 under gdb to find out quite where, how and why it's going wrong. Disable 47 the spinner with the -W option when doing this, or single stepping won't 48 work. Using the "-1" option to run with simple data can be useful. 49 50 New functions to test can be added in try_array[]. If a new TYPE is 51 required then add it to the existing constants, set up its parameters in 52 param_init(), and add it to the call() function. Extra parameter fields 53 can be added if necessary, or further interpretations given to existing 54 fields. 55 56 57 Portability: 58 59 This program is not designed for use on Cray vector systems under Unicos, 60 it will fail to compile due to missing _SC_PAGE_SIZE. Those systems 61 don't really have pages or mprotect. We could arrange to run the tests 62 without the redzones, but we haven't bothered currently. 63 64 65 Enhancements: 66 67 umul_ppmm support is not very good, lots of source data is generated 68 whereas only two limbs are needed. 69 70 Make a little scheme for interpreting the "SIZE" selections uniformly. 71 72 Make tr->size==SIZE_2 work, for the benefit of find_a which wants just 2 73 source limbs. Possibly increase the default repetitions in that case. 74 75 Automatically detect gdb and disable the spinner (use -W for now). 76 77 Make a way to re-run a failing case in the debugger. Have an option to 78 snapshot each test case before it's run so the data is available if a 79 segv occurs. (This should be more reliable than the current print_all() 80 in the signal handler.) 81 82 When alignment means a dst isn't hard against the redzone, check the 83 space in between remains unchanged. 84 85 When a source overlaps a destination, don't run both s[i].high 0 and 1, 86 as s[i].high has no effect. Maybe encode s[i].high into overlap->s[i]. 87 88 When partial overlaps aren't done, don't loop over source alignments 89 during overlaps. 90 91 Try to make the looping code a bit less horrible. Right now it's pretty 92 hard to see what iterations are actually done. 93 94 Perhaps specific setups and loops for each style of function under test 95 would be clearer than a parameterized general loop. There's lots of 96 stuff common to all functions, but the exceptions get messy. 97 98 When there's no overlap, run with both src>dst and src<dst. A subtle 99 calling-conventions violation occurred in a P6 copy which depended on the 100 relative location of src and dst. 101 102 multiplier_N is more or less a third source region for the addmul_N 103 routines, and could be done with the redzoned region scheme. 104 105*/ 106 107 108/* always do assertion checking */ 109#define WANT_ASSERT 1 110 111#include "config.h" 112 113#include <errno.h> 114#include <limits.h> 115#include <signal.h> 116#include <stdio.h> 117#include <stdlib.h> 118#include <string.h> 119#include <time.h> 120 121#if HAVE_UNISTD_H 122#include <unistd.h> 123#endif 124 125#if HAVE_SYS_MMAN_H 126#include <sys/mman.h> 127#endif 128 129#include "gmp.h" 130#include "gmp-impl.h" 131#include "longlong.h" 132#include "tests.h" 133 134 135#if !HAVE_DECL_OPTARG 136extern char *optarg; 137extern int optind, opterr; 138#endif 139 140#if ! HAVE_DECL_SYS_NERR 141extern int sys_nerr; 142#endif 143 144#if ! HAVE_DECL_SYS_ERRLIST 145extern char *sys_errlist[]; 146#endif 147 148#if ! HAVE_STRERROR 149char * 150strerror (int n) 151{ 152 if (n < 0 || n >= sys_nerr) 153 return "errno out of range"; 154 else 155 return sys_errlist[n]; 156} 157#endif 158 159/* Rumour has it some systems lack a define of PROT_NONE. */ 160#ifndef PROT_NONE 161#define PROT_NONE 0 162#endif 163 164/* Dummy defines for when mprotect doesn't exist. */ 165#ifndef PROT_READ 166#define PROT_READ 0 167#endif 168#ifndef PROT_WRITE 169#define PROT_WRITE 0 170#endif 171 172/* _SC_PAGESIZE is standard, but hpux 9 and possibly other systems have 173 _SC_PAGE_SIZE instead. */ 174#if defined (_SC_PAGE_SIZE) && ! defined (_SC_PAGESIZE) 175#define _SC_PAGESIZE _SC_PAGE_SIZE 176#endif 177 178 179#ifdef EXTRA_PROTOS 180EXTRA_PROTOS 181#endif 182#ifdef EXTRA_PROTOS2 183EXTRA_PROTOS2 184#endif 185 186 187#define DEFAULT_REPETITIONS 10 188 189int option_repetitions = DEFAULT_REPETITIONS; 190int option_spinner = 1; 191int option_redzones = 1; 192int option_firstsize = 0; 193int option_lastsize = 500; 194int option_firstsize2 = 0; 195 196#define ALIGNMENTS 4 197#define OVERLAPS 4 198#define CARRY_RANDOMS 5 199#define MULTIPLIER_RANDOMS 5 200#define DIVISOR_RANDOMS 5 201#define FRACTION_COUNT 4 202 203int option_print = 0; 204 205#define DATA_TRAND 0 206#define DATA_ZEROS 1 207#define DATA_SEQ 2 208#define DATA_FFS 3 209#define DATA_2FD 4 210int option_data = DATA_TRAND; 211 212 213mp_size_t pagesize; 214#define PAGESIZE_LIMBS (pagesize / BYTES_PER_MP_LIMB) 215 216/* must be a multiple of the page size */ 217#define REDZONE_BYTES (pagesize * 16) 218#define REDZONE_LIMBS (REDZONE_BYTES / BYTES_PER_MP_LIMB) 219 220 221#define MAX3(x,y,z) (MAX (x, MAX (y, z))) 222 223#if GMP_LIMB_BITS == 32 224#define DEADVAL CNST_LIMB(0xDEADBEEF) 225#else 226#define DEADVAL CNST_LIMB(0xDEADBEEFBADDCAFE) 227#endif 228 229 230struct region_t { 231 mp_ptr ptr; 232 mp_size_t size; 233}; 234 235 236#define TRAP_NOWHERE 0 237#define TRAP_REF 1 238#define TRAP_FUN 2 239#define TRAP_SETUPS 3 240int trap_location = TRAP_NOWHERE; 241 242 243#define NUM_SOURCES 2 244#define NUM_DESTS 2 245 246struct source_t { 247 struct region_t region; 248 int high; 249 mp_size_t align; 250 mp_ptr p; 251}; 252 253struct source_t s[NUM_SOURCES]; 254 255struct dest_t { 256 int high; 257 mp_size_t align; 258 mp_size_t size; 259}; 260 261struct dest_t d[NUM_DESTS]; 262 263struct source_each_t { 264 mp_ptr p; 265}; 266 267struct dest_each_t { 268 struct region_t region; 269 mp_ptr p; 270}; 271 272mp_size_t size; 273mp_size_t size2; 274unsigned long shift; 275mp_limb_t carry; 276mp_limb_t divisor; 277mp_limb_t multiplier; 278mp_limb_t multiplier_N[8]; 279 280struct each_t { 281 const char *name; 282 struct dest_each_t d[NUM_DESTS]; 283 struct source_each_t s[NUM_SOURCES]; 284 mp_limb_t retval; 285}; 286 287struct each_t ref = { "Ref" }; 288struct each_t fun = { "Fun" }; 289 290#define SRC_SIZE(n) ((n) == 1 && tr->size2 ? size2 : size) 291 292void validate_fail __GMP_PROTO ((void)); 293 294 295#if HAVE_TRY_NEW_C 296#include "try-new.c" 297#endif 298 299 300typedef mp_limb_t (*tryfun_t) __GMP_PROTO ((ANYARGS)); 301 302struct try_t { 303 char retval; 304 305 char src[2]; 306 char dst[2]; 307 308#define SIZE_YES 1 309#define SIZE_ALLOW_ZERO 2 310#define SIZE_1 3 /* 1 limb */ 311#define SIZE_2 4 /* 2 limbs */ 312#define SIZE_3 5 /* 3 limbs */ 313#define SIZE_FRACTION 6 /* size2 is fraction for divrem etc */ 314#define SIZE_SIZE2 7 315#define SIZE_PLUS_1 8 316#define SIZE_SUM 9 317#define SIZE_DIFF 10 318#define SIZE_DIFF_PLUS_1 11 319#define SIZE_RETVAL 12 320#define SIZE_CEIL_HALF 13 321#define SIZE_GET_STR 14 322#define SIZE_PLUS_MSIZE_SUB_1 15 /* size+msize-1 */ 323 char size; 324 char size2; 325 char dst_size[2]; 326 327 /* multiplier_N size in limbs */ 328 mp_size_t msize; 329 330 char dst_bytes[2]; 331 332 char dst0_from_src1; 333 334#define CARRY_BIT 1 /* single bit 0 or 1 */ 335#define CARRY_3 2 /* 0, 1, 2 */ 336#define CARRY_4 3 /* 0 to 3 */ 337#define CARRY_LIMB 4 /* any limb value */ 338#define CARRY_DIVISOR 5 /* carry<divisor */ 339 char carry; 340 341 /* a fudge to tell the output when to print negatives */ 342 char carry_sign; 343 344 char multiplier; 345 char shift; 346 347#define DIVISOR_LIMB 1 348#define DIVISOR_NORM 2 349#define DIVISOR_ODD 3 350 char divisor; 351 352#define DATA_NON_ZERO 1 353#define DATA_GCD 2 354#define DATA_SRC0_ODD 3 355#define DATA_SRC0_HIGHBIT 4 356#define DATA_SRC1_ODD 5 357#define DATA_SRC1_HIGHBIT 6 358#define DATA_MULTIPLE_DIVISOR 7 359#define DATA_UDIV_QRNND 8 360 char data; 361 362/* Default is allow full overlap. */ 363#define OVERLAP_NONE 1 364#define OVERLAP_LOW_TO_HIGH 2 365#define OVERLAP_HIGH_TO_LOW 3 366#define OVERLAP_NOT_SRCS 4 367#define OVERLAP_NOT_SRC2 8 368 char overlap; 369 370 tryfun_t reference; 371 const char *reference_name; 372 373 void (*validate) __GMP_PROTO ((void)); 374 const char *validate_name; 375}; 376 377struct try_t *tr; 378 379 380void 381validate_mod_34lsub1 (void) 382{ 383#define CNST_34LSUB1 ((CNST_LIMB(1) << (3 * (GMP_NUMB_BITS / 4))) - 1) 384 385 mp_srcptr ptr = s[0].p; 386 int error = 0; 387 mp_limb_t got, got_mod, want, want_mod; 388 389 ASSERT (size >= 1); 390 391 got = fun.retval; 392 got_mod = got % CNST_34LSUB1; 393 394 want = refmpn_mod_34lsub1 (ptr, size); 395 want_mod = want % CNST_34LSUB1; 396 397 if (got_mod != want_mod) 398 { 399 gmp_printf ("got 0x%MX reduced from 0x%MX\n", got_mod, got); 400 gmp_printf ("want 0x%MX reduced from 0x%MX\n", want_mod, want); 401 error = 1; 402 } 403 404 if (error) 405 validate_fail (); 406} 407 408void 409validate_divexact_1 (void) 410{ 411 mp_srcptr src = s[0].p; 412 mp_srcptr dst = fun.d[0].p; 413 int error = 0; 414 415 ASSERT (size >= 1); 416 417 { 418 mp_ptr tp = refmpn_malloc_limbs (size); 419 mp_limb_t rem; 420 421 rem = refmpn_divrem_1 (tp, 0, src, size, divisor); 422 if (rem != 0) 423 { 424 gmp_printf ("Remainder a%%d == 0x%MX, mpn_divexact_1 undefined\n", rem); 425 error = 1; 426 } 427 if (! refmpn_equal_anynail (tp, dst, size)) 428 { 429 printf ("Quotient a/d wrong\n"); 430 mpn_trace ("fun ", dst, size); 431 mpn_trace ("want", tp, size); 432 error = 1; 433 } 434 free (tp); 435 } 436 437 if (error) 438 validate_fail (); 439} 440 441 442void 443validate_modexact_1c_odd (void) 444{ 445 mp_srcptr ptr = s[0].p; 446 mp_limb_t r = fun.retval; 447 int error = 0; 448 449 ASSERT (size >= 1); 450 ASSERT (divisor & 1); 451 452 if ((r & GMP_NAIL_MASK) != 0) 453 printf ("r has non-zero nail\n"); 454 455 if (carry < divisor) 456 { 457 if (! (r < divisor)) 458 { 459 printf ("Don't have r < divisor\n"); 460 error = 1; 461 } 462 } 463 else /* carry >= divisor */ 464 { 465 if (! (r <= divisor)) 466 { 467 printf ("Don't have r <= divisor\n"); 468 error = 1; 469 } 470 } 471 472 { 473 mp_limb_t c = carry % divisor; 474 mp_ptr tp = refmpn_malloc_limbs (size+1); 475 mp_size_t k; 476 477 for (k = size-1; k <= size; k++) 478 { 479 /* set {tp,size+1} to r*b^k + a - c */ 480 refmpn_copyi (tp, ptr, size); 481 tp[size] = 0; 482 ASSERT_NOCARRY (refmpn_add_1 (tp+k, tp+k, size+1-k, r)); 483 if (refmpn_sub_1 (tp, tp, size+1, c)) 484 ASSERT_CARRY (mpn_add_1 (tp, tp, size+1, divisor)); 485 486 if (refmpn_mod_1 (tp, size+1, divisor) == 0) 487 goto good_remainder; 488 } 489 printf ("Remainder matches neither r*b^(size-1) nor r*b^size\n"); 490 error = 1; 491 492 good_remainder: 493 free (tp); 494 } 495 496 if (error) 497 validate_fail (); 498} 499 500void 501validate_modexact_1_odd (void) 502{ 503 carry = 0; 504 validate_modexact_1c_odd (); 505} 506 507 508void 509validate_sqrtrem (void) 510{ 511 mp_srcptr orig_ptr = s[0].p; 512 mp_size_t orig_size = size; 513 mp_size_t root_size = (size+1)/2; 514 mp_srcptr root_ptr = fun.d[0].p; 515 mp_size_t rem_size = fun.retval; 516 mp_srcptr rem_ptr = fun.d[1].p; 517 mp_size_t prod_size = 2*root_size; 518 mp_ptr p; 519 int error = 0; 520 521 if (rem_size < 0 || rem_size > size) 522 { 523 printf ("Bad remainder size retval %ld\n", (long) rem_size); 524 validate_fail (); 525 } 526 527 p = refmpn_malloc_limbs (prod_size); 528 529 p[root_size] = refmpn_lshift (p, root_ptr, root_size, 1); 530 if (refmpn_cmp_twosizes (p,root_size+1, rem_ptr,rem_size) < 0) 531 { 532 printf ("Remainder bigger than 2*root\n"); 533 error = 1; 534 } 535 536 refmpn_sqr (p, root_ptr, root_size); 537 if (rem_size != 0) 538 refmpn_add (p, p, prod_size, rem_ptr, rem_size); 539 if (refmpn_cmp_twosizes (p,prod_size, orig_ptr,orig_size) != 0) 540 { 541 printf ("root^2+rem != original\n"); 542 mpn_trace ("prod", p, prod_size); 543 error = 1; 544 } 545 free (p); 546 547 if (error) 548 validate_fail (); 549} 550 551 552/* These types are indexes into the param[] array and are arbitrary so long 553 as they're all distinct and within the size of param[]. Renumber 554 whenever necessary or desired. */ 555 556#define TYPE_ADD 1 557#define TYPE_ADD_N 2 558#define TYPE_ADD_NC 3 559#define TYPE_SUB 4 560#define TYPE_SUB_N 5 561#define TYPE_SUB_NC 6 562 563#define TYPE_MUL_1 7 564#define TYPE_MUL_1C 8 565 566#define TYPE_MUL_2 9 567#define TYPE_MUL_3 92 568#define TYPE_MUL_4 93 569 570#define TYPE_ADDMUL_1 10 571#define TYPE_ADDMUL_1C 11 572#define TYPE_SUBMUL_1 12 573#define TYPE_SUBMUL_1C 13 574 575#define TYPE_ADDMUL_2 14 576#define TYPE_ADDMUL_3 15 577#define TYPE_ADDMUL_4 16 578#define TYPE_ADDMUL_5 17 579#define TYPE_ADDMUL_6 18 580#define TYPE_ADDMUL_7 19 581#define TYPE_ADDMUL_8 20 582 583#define TYPE_ADDSUB_N 21 584#define TYPE_ADDSUB_NC 22 585 586#define TYPE_RSHIFT 23 587#define TYPE_LSHIFT 24 588#define TYPE_LSHIFTC 25 589 590#define TYPE_COPY 26 591#define TYPE_COPYI 27 592#define TYPE_COPYD 28 593#define TYPE_COM 29 594 595#define TYPE_ADDLSH1_N 30 596#define TYPE_ADDLSH2_N 48 597#define TYPE_ADDLSH_N 49 598#define TYPE_SUBLSH1_N 31 599#define TYPE_SUBLSH_N 130 600#define TYPE_RSBLSH1_N 34 601#define TYPE_RSBLSH2_N 46 602#define TYPE_RSBLSH_N 47 603#define TYPE_RSH1ADD_N 32 604#define TYPE_RSH1SUB_N 33 605 606#define TYPE_MOD_1 35 607#define TYPE_MOD_1C 36 608#define TYPE_DIVMOD_1 37 609#define TYPE_DIVMOD_1C 38 610#define TYPE_DIVREM_1 39 611#define TYPE_DIVREM_1C 40 612#define TYPE_PREINV_DIVREM_1 41 613#define TYPE_PREINV_MOD_1 42 614#define TYPE_MOD_34LSUB1 43 615#define TYPE_UDIV_QRNND 44 616#define TYPE_UDIV_QRNND_R 45 617 618#define TYPE_DIVEXACT_1 50 619#define TYPE_DIVEXACT_BY3 51 620#define TYPE_DIVEXACT_BY3C 52 621#define TYPE_MODEXACT_1_ODD 53 622#define TYPE_MODEXACT_1C_ODD 54 623 624#define TYPE_INVERT 55 625#define TYPE_BINVERT 56 626 627#define TYPE_GCD 60 628#define TYPE_GCD_1 61 629#define TYPE_GCD_FINDA 62 630#define TYPE_MPZ_JACOBI 63 631#define TYPE_MPZ_KRONECKER 64 632#define TYPE_MPZ_KRONECKER_UI 65 633#define TYPE_MPZ_KRONECKER_SI 66 634#define TYPE_MPZ_UI_KRONECKER 67 635#define TYPE_MPZ_SI_KRONECKER 68 636 637#define TYPE_AND_N 70 638#define TYPE_NAND_N 71 639#define TYPE_ANDN_N 72 640#define TYPE_IOR_N 73 641#define TYPE_IORN_N 74 642#define TYPE_NIOR_N 75 643#define TYPE_XOR_N 76 644#define TYPE_XNOR_N 77 645 646#define TYPE_MUL_MN 80 647#define TYPE_MUL_N 81 648#define TYPE_SQR 82 649#define TYPE_UMUL_PPMM 83 650#define TYPE_UMUL_PPMM_R 84 651#define TYPE_MULLO_N 85 652 653#define TYPE_SBPI1_DIV_QR 90 654#define TYPE_TDIV_QR 91 655 656#define TYPE_SQRTREM 100 657#define TYPE_ZERO 101 658#define TYPE_GET_STR 102 659#define TYPE_POPCOUNT 103 660#define TYPE_HAMDIST 104 661 662#define TYPE_EXTRA 110 663 664struct try_t param[150]; 665 666 667void 668param_init (void) 669{ 670 struct try_t *p; 671 672#define COPY(index) memcpy (p, ¶m[index], sizeof (*p)) 673 674#if HAVE_STRINGIZE 675#define REFERENCE(fun) \ 676 p->reference = (tryfun_t) fun; \ 677 p->reference_name = #fun 678#define VALIDATE(fun) \ 679 p->validate = fun; \ 680 p->validate_name = #fun 681#else 682#define REFERENCE(fun) \ 683 p->reference = (tryfun_t) fun; \ 684 p->reference_name = "fun" 685#define VALIDATE(fun) \ 686 p->validate = fun; \ 687 p->validate_name = "fun" 688#endif 689 690 691 p = ¶m[TYPE_ADD_N]; 692 p->retval = 1; 693 p->dst[0] = 1; 694 p->src[0] = 1; 695 p->src[1] = 1; 696 REFERENCE (refmpn_add_n); 697 698 p = ¶m[TYPE_ADD_NC]; 699 COPY (TYPE_ADD_N); 700 p->carry = CARRY_BIT; 701 REFERENCE (refmpn_add_nc); 702 703 p = ¶m[TYPE_SUB_N]; 704 COPY (TYPE_ADD_N); 705 REFERENCE (refmpn_sub_n); 706 707 p = ¶m[TYPE_SUB_NC]; 708 COPY (TYPE_ADD_NC); 709 REFERENCE (refmpn_sub_nc); 710 711 p = ¶m[TYPE_ADD]; 712 COPY (TYPE_ADD_N); 713 p->size = SIZE_ALLOW_ZERO; 714 p->size2 = 1; 715 REFERENCE (refmpn_add); 716 717 p = ¶m[TYPE_SUB]; 718 COPY (TYPE_ADD); 719 REFERENCE (refmpn_sub); 720 721 722 p = ¶m[TYPE_MUL_1]; 723 p->retval = 1; 724 p->dst[0] = 1; 725 p->src[0] = 1; 726 p->multiplier = 1; 727 p->overlap = OVERLAP_LOW_TO_HIGH; 728 REFERENCE (refmpn_mul_1); 729 730 p = ¶m[TYPE_MUL_1C]; 731 COPY (TYPE_MUL_1); 732 p->carry = CARRY_LIMB; 733 REFERENCE (refmpn_mul_1c); 734 735 736 p = ¶m[TYPE_MUL_2]; 737 p->retval = 1; 738 p->dst[0] = 1; 739 p->dst_size[0] = SIZE_PLUS_MSIZE_SUB_1; 740 p->src[0] = 1; 741 p->src[1] = 1; 742 p->msize = 2; 743 p->overlap = OVERLAP_NOT_SRC2; 744 REFERENCE (refmpn_mul_2); 745 746 p = ¶m[TYPE_MUL_3]; 747 COPY (TYPE_MUL_2); 748 p->msize = 3; 749 REFERENCE (refmpn_mul_3); 750 751 p = ¶m[TYPE_MUL_4]; 752 COPY (TYPE_MUL_2); 753 p->msize = 4; 754 REFERENCE (refmpn_mul_4); 755 756 757 p = ¶m[TYPE_ADDMUL_1]; 758 p->retval = 1; 759 p->dst[0] = 1; 760 p->src[0] = 1; 761 p->multiplier = 1; 762 p->dst0_from_src1 = 1; 763 REFERENCE (refmpn_addmul_1); 764 765 p = ¶m[TYPE_ADDMUL_1C]; 766 COPY (TYPE_ADDMUL_1); 767 p->carry = CARRY_LIMB; 768 REFERENCE (refmpn_addmul_1c); 769 770 p = ¶m[TYPE_SUBMUL_1]; 771 COPY (TYPE_ADDMUL_1); 772 REFERENCE (refmpn_submul_1); 773 774 p = ¶m[TYPE_SUBMUL_1C]; 775 COPY (TYPE_ADDMUL_1C); 776 REFERENCE (refmpn_submul_1c); 777 778 779 p = ¶m[TYPE_ADDMUL_2]; 780 p->retval = 1; 781 p->dst[0] = 1; 782 p->dst_size[0] = SIZE_PLUS_MSIZE_SUB_1; 783 p->src[0] = 1; 784 p->src[1] = 1; 785 p->msize = 2; 786 p->dst0_from_src1 = 1; 787 p->overlap = OVERLAP_NOT_SRC2; 788 REFERENCE (refmpn_addmul_2); 789 790 p = ¶m[TYPE_ADDMUL_3]; 791 COPY (TYPE_ADDMUL_2); 792 p->msize = 3; 793 REFERENCE (refmpn_addmul_3); 794 795 p = ¶m[TYPE_ADDMUL_4]; 796 COPY (TYPE_ADDMUL_2); 797 p->msize = 4; 798 REFERENCE (refmpn_addmul_4); 799 800 p = ¶m[TYPE_ADDMUL_5]; 801 COPY (TYPE_ADDMUL_2); 802 p->msize = 5; 803 REFERENCE (refmpn_addmul_5); 804 805 p = ¶m[TYPE_ADDMUL_6]; 806 COPY (TYPE_ADDMUL_2); 807 p->msize = 6; 808 REFERENCE (refmpn_addmul_6); 809 810 p = ¶m[TYPE_ADDMUL_7]; 811 COPY (TYPE_ADDMUL_2); 812 p->msize = 7; 813 REFERENCE (refmpn_addmul_7); 814 815 p = ¶m[TYPE_ADDMUL_8]; 816 COPY (TYPE_ADDMUL_2); 817 p->msize = 8; 818 REFERENCE (refmpn_addmul_8); 819 820 821 p = ¶m[TYPE_AND_N]; 822 p->dst[0] = 1; 823 p->src[0] = 1; 824 p->src[1] = 1; 825 REFERENCE (refmpn_and_n); 826 827 p = ¶m[TYPE_ANDN_N]; 828 COPY (TYPE_AND_N); 829 REFERENCE (refmpn_andn_n); 830 831 p = ¶m[TYPE_NAND_N]; 832 COPY (TYPE_AND_N); 833 REFERENCE (refmpn_nand_n); 834 835 p = ¶m[TYPE_IOR_N]; 836 COPY (TYPE_AND_N); 837 REFERENCE (refmpn_ior_n); 838 839 p = ¶m[TYPE_IORN_N]; 840 COPY (TYPE_AND_N); 841 REFERENCE (refmpn_iorn_n); 842 843 p = ¶m[TYPE_NIOR_N]; 844 COPY (TYPE_AND_N); 845 REFERENCE (refmpn_nior_n); 846 847 p = ¶m[TYPE_XOR_N]; 848 COPY (TYPE_AND_N); 849 REFERENCE (refmpn_xor_n); 850 851 p = ¶m[TYPE_XNOR_N]; 852 COPY (TYPE_AND_N); 853 REFERENCE (refmpn_xnor_n); 854 855 856 p = ¶m[TYPE_ADDSUB_N]; 857 p->retval = 1; 858 p->dst[0] = 1; 859 p->dst[1] = 1; 860 p->src[0] = 1; 861 p->src[1] = 1; 862 REFERENCE (refmpn_add_n_sub_n); 863 864 p = ¶m[TYPE_ADDSUB_NC]; 865 COPY (TYPE_ADDSUB_N); 866 p->carry = CARRY_4; 867 REFERENCE (refmpn_add_n_sub_nc); 868 869 870 p = ¶m[TYPE_COPY]; 871 p->dst[0] = 1; 872 p->src[0] = 1; 873 p->overlap = OVERLAP_NONE; 874 p->size = SIZE_ALLOW_ZERO; 875 REFERENCE (refmpn_copy); 876 877 p = ¶m[TYPE_COPYI]; 878 p->dst[0] = 1; 879 p->src[0] = 1; 880 p->overlap = OVERLAP_LOW_TO_HIGH; 881 p->size = SIZE_ALLOW_ZERO; 882 REFERENCE (refmpn_copyi); 883 884 p = ¶m[TYPE_COPYD]; 885 p->dst[0] = 1; 886 p->src[0] = 1; 887 p->overlap = OVERLAP_HIGH_TO_LOW; 888 p->size = SIZE_ALLOW_ZERO; 889 REFERENCE (refmpn_copyd); 890 891 p = ¶m[TYPE_COM]; 892 p->dst[0] = 1; 893 p->src[0] = 1; 894 REFERENCE (refmpn_com); 895 896 897 p = ¶m[TYPE_ADDLSH1_N]; 898 COPY (TYPE_ADD_N); 899 REFERENCE (refmpn_addlsh1_n); 900 901 p = ¶m[TYPE_ADDLSH2_N]; 902 COPY (TYPE_ADD_N); 903 REFERENCE (refmpn_addlsh2_n); 904 905 p = ¶m[TYPE_ADDLSH_N]; 906 COPY (TYPE_ADD_N); 907 p->shift = 1; 908 REFERENCE (refmpn_addlsh_n); 909 910 p = ¶m[TYPE_SUBLSH1_N]; 911 COPY (TYPE_ADD_N); 912 REFERENCE (refmpn_sublsh1_n); 913 914 p = ¶m[TYPE_SUBLSH_N]; 915 COPY (TYPE_ADDLSH_N); 916 REFERENCE (refmpn_sublsh_n); 917 918 p = ¶m[TYPE_RSBLSH1_N]; 919 COPY (TYPE_ADD_N); 920 REFERENCE (refmpn_rsblsh1_n); 921 922 p = ¶m[TYPE_RSBLSH2_N]; 923 COPY (TYPE_ADD_N); 924 REFERENCE (refmpn_rsblsh2_n); 925 926 p = ¶m[TYPE_RSBLSH_N]; 927 COPY (TYPE_ADDLSH_N); 928 REFERENCE (refmpn_rsblsh_n); 929 930 p = ¶m[TYPE_RSH1ADD_N]; 931 COPY (TYPE_ADD_N); 932 REFERENCE (refmpn_rsh1add_n); 933 934 p = ¶m[TYPE_RSH1SUB_N]; 935 COPY (TYPE_ADD_N); 936 REFERENCE (refmpn_rsh1sub_n); 937 938 939 p = ¶m[TYPE_MOD_1]; 940 p->retval = 1; 941 p->src[0] = 1; 942 p->size = SIZE_ALLOW_ZERO; 943 p->divisor = DIVISOR_LIMB; 944 REFERENCE (refmpn_mod_1); 945 946 p = ¶m[TYPE_MOD_1C]; 947 COPY (TYPE_MOD_1); 948 p->carry = CARRY_DIVISOR; 949 REFERENCE (refmpn_mod_1c); 950 951 p = ¶m[TYPE_DIVMOD_1]; 952 COPY (TYPE_MOD_1); 953 p->dst[0] = 1; 954 REFERENCE (refmpn_divmod_1); 955 956 p = ¶m[TYPE_DIVMOD_1C]; 957 COPY (TYPE_DIVMOD_1); 958 p->carry = CARRY_DIVISOR; 959 REFERENCE (refmpn_divmod_1c); 960 961 p = ¶m[TYPE_DIVREM_1]; 962 COPY (TYPE_DIVMOD_1); 963 p->size2 = SIZE_FRACTION; 964 p->dst_size[0] = SIZE_SUM; 965 REFERENCE (refmpn_divrem_1); 966 967 p = ¶m[TYPE_DIVREM_1C]; 968 COPY (TYPE_DIVREM_1); 969 p->carry = CARRY_DIVISOR; 970 REFERENCE (refmpn_divrem_1c); 971 972 p = ¶m[TYPE_PREINV_DIVREM_1]; 973 COPY (TYPE_DIVREM_1); 974 p->size = SIZE_YES; /* ie. no size==0 */ 975 REFERENCE (refmpn_preinv_divrem_1); 976 977 p = ¶m[TYPE_PREINV_MOD_1]; 978 p->retval = 1; 979 p->src[0] = 1; 980 p->divisor = DIVISOR_NORM; 981 REFERENCE (refmpn_preinv_mod_1); 982 983 p = ¶m[TYPE_MOD_34LSUB1]; 984 p->retval = 1; 985 p->src[0] = 1; 986 VALIDATE (validate_mod_34lsub1); 987 988 p = ¶m[TYPE_UDIV_QRNND]; 989 p->retval = 1; 990 p->src[0] = 1; 991 p->dst[0] = 1; 992 p->dst_size[0] = SIZE_1; 993 p->divisor = UDIV_NEEDS_NORMALIZATION ? DIVISOR_NORM : DIVISOR_LIMB; 994 p->data = DATA_UDIV_QRNND; 995 p->overlap = OVERLAP_NONE; 996 REFERENCE (refmpn_udiv_qrnnd); 997 998 p = ¶m[TYPE_UDIV_QRNND_R]; 999 COPY (TYPE_UDIV_QRNND); 1000 REFERENCE (refmpn_udiv_qrnnd_r); 1001 1002 1003 p = ¶m[TYPE_DIVEXACT_1]; 1004 p->dst[0] = 1; 1005 p->src[0] = 1; 1006 p->divisor = DIVISOR_LIMB; 1007 p->data = DATA_MULTIPLE_DIVISOR; 1008 VALIDATE (validate_divexact_1); 1009 REFERENCE (refmpn_divmod_1); 1010 1011 1012 p = ¶m[TYPE_DIVEXACT_BY3]; 1013 p->retval = 1; 1014 p->dst[0] = 1; 1015 p->src[0] = 1; 1016 REFERENCE (refmpn_divexact_by3); 1017 1018 p = ¶m[TYPE_DIVEXACT_BY3C]; 1019 COPY (TYPE_DIVEXACT_BY3); 1020 p->carry = CARRY_3; 1021 REFERENCE (refmpn_divexact_by3c); 1022 1023 1024 p = ¶m[TYPE_MODEXACT_1_ODD]; 1025 p->retval = 1; 1026 p->src[0] = 1; 1027 p->divisor = DIVISOR_ODD; 1028 VALIDATE (validate_modexact_1_odd); 1029 1030 p = ¶m[TYPE_MODEXACT_1C_ODD]; 1031 COPY (TYPE_MODEXACT_1_ODD); 1032 p->carry = CARRY_LIMB; 1033 VALIDATE (validate_modexact_1c_odd); 1034 1035 1036 p = ¶m[TYPE_GCD_1]; 1037 p->retval = 1; 1038 p->src[0] = 1; 1039 p->data = DATA_NON_ZERO; 1040 p->divisor = DIVISOR_LIMB; 1041 REFERENCE (refmpn_gcd_1); 1042 1043 p = ¶m[TYPE_GCD]; 1044 p->retval = 1; 1045 p->dst[0] = 1; 1046 p->src[0] = 1; 1047 p->src[1] = 1; 1048 p->size2 = 1; 1049 p->dst_size[0] = SIZE_RETVAL; 1050 p->overlap = OVERLAP_NOT_SRCS; 1051 p->data = DATA_GCD; 1052 REFERENCE (refmpn_gcd); 1053 1054 1055 p = ¶m[TYPE_MPZ_JACOBI]; 1056 p->retval = 1; 1057 p->src[0] = 1; 1058 p->size = SIZE_ALLOW_ZERO; 1059 p->src[1] = 1; 1060 p->data = DATA_SRC1_ODD; 1061 p->size2 = 1; 1062 p->carry = CARRY_4; 1063 p->carry_sign = 1; 1064 REFERENCE (refmpz_jacobi); 1065 1066 p = ¶m[TYPE_MPZ_KRONECKER]; 1067 COPY (TYPE_MPZ_JACOBI); 1068 p->data = 0; /* clear inherited DATA_SRC1_ODD */ 1069 REFERENCE (refmpz_kronecker); 1070 1071 1072 p = ¶m[TYPE_MPZ_KRONECKER_UI]; 1073 p->retval = 1; 1074 p->src[0] = 1; 1075 p->size = SIZE_ALLOW_ZERO; 1076 p->multiplier = 1; 1077 p->carry = CARRY_BIT; 1078 REFERENCE (refmpz_kronecker_ui); 1079 1080 p = ¶m[TYPE_MPZ_KRONECKER_SI]; 1081 COPY (TYPE_MPZ_KRONECKER_UI); 1082 REFERENCE (refmpz_kronecker_si); 1083 1084 p = ¶m[TYPE_MPZ_UI_KRONECKER]; 1085 COPY (TYPE_MPZ_KRONECKER_UI); 1086 REFERENCE (refmpz_ui_kronecker); 1087 1088 p = ¶m[TYPE_MPZ_SI_KRONECKER]; 1089 COPY (TYPE_MPZ_KRONECKER_UI); 1090 REFERENCE (refmpz_si_kronecker); 1091 1092 1093 p = ¶m[TYPE_SQR]; 1094 p->dst[0] = 1; 1095 p->src[0] = 1; 1096 p->dst_size[0] = SIZE_SUM; 1097 p->overlap = OVERLAP_NONE; 1098 REFERENCE (refmpn_sqr); 1099 1100 p = ¶m[TYPE_MUL_N]; 1101 COPY (TYPE_SQR); 1102 p->src[1] = 1; 1103 REFERENCE (refmpn_mul_n); 1104 1105 p = ¶m[TYPE_MULLO_N]; 1106 COPY (TYPE_MUL_N); 1107 p->dst_size[0] = 0; 1108 REFERENCE (refmpn_mullo_n); 1109 1110 p = ¶m[TYPE_MUL_MN]; 1111 COPY (TYPE_MUL_N); 1112 p->size2 = 1; 1113 REFERENCE (refmpn_mul_basecase); 1114 1115 p = ¶m[TYPE_UMUL_PPMM]; 1116 p->retval = 1; 1117 p->src[0] = 1; 1118 p->dst[0] = 1; 1119 p->dst_size[0] = SIZE_1; 1120 p->overlap = OVERLAP_NONE; 1121 REFERENCE (refmpn_umul_ppmm); 1122 1123 p = ¶m[TYPE_UMUL_PPMM_R]; 1124 COPY (TYPE_UMUL_PPMM); 1125 REFERENCE (refmpn_umul_ppmm_r); 1126 1127 1128 p = ¶m[TYPE_RSHIFT]; 1129 p->retval = 1; 1130 p->dst[0] = 1; 1131 p->src[0] = 1; 1132 p->shift = 1; 1133 p->overlap = OVERLAP_LOW_TO_HIGH; 1134 REFERENCE (refmpn_rshift); 1135 1136 p = ¶m[TYPE_LSHIFT]; 1137 COPY (TYPE_RSHIFT); 1138 p->overlap = OVERLAP_HIGH_TO_LOW; 1139 REFERENCE (refmpn_lshift); 1140 1141 p = ¶m[TYPE_LSHIFTC]; 1142 COPY (TYPE_RSHIFT); 1143 p->overlap = OVERLAP_HIGH_TO_LOW; 1144 REFERENCE (refmpn_lshiftc); 1145 1146 1147 p = ¶m[TYPE_POPCOUNT]; 1148 p->retval = 1; 1149 p->src[0] = 1; 1150 REFERENCE (refmpn_popcount); 1151 1152 p = ¶m[TYPE_HAMDIST]; 1153 COPY (TYPE_POPCOUNT); 1154 p->src[1] = 1; 1155 REFERENCE (refmpn_hamdist); 1156 1157 1158 p = ¶m[TYPE_SBPI1_DIV_QR]; 1159 p->retval = 1; 1160 p->dst[0] = 1; 1161 p->dst[1] = 1; 1162 p->src[0] = 1; 1163 p->src[1] = 1; 1164 p->data = DATA_SRC1_HIGHBIT; 1165 p->size2 = 1; 1166 p->dst_size[0] = SIZE_DIFF; 1167 p->overlap = OVERLAP_NONE; 1168 REFERENCE (refmpn_sb_div_qr); 1169 1170 p = ¶m[TYPE_TDIV_QR]; 1171 p->dst[0] = 1; 1172 p->dst[1] = 1; 1173 p->src[0] = 1; 1174 p->src[1] = 1; 1175 p->size2 = 1; 1176 p->dst_size[0] = SIZE_DIFF_PLUS_1; 1177 p->dst_size[1] = SIZE_SIZE2; 1178 p->overlap = OVERLAP_NONE; 1179 REFERENCE (refmpn_tdiv_qr); 1180 1181 p = ¶m[TYPE_SQRTREM]; 1182 p->retval = 1; 1183 p->dst[0] = 1; 1184 p->dst[1] = 1; 1185 p->src[0] = 1; 1186 p->dst_size[0] = SIZE_CEIL_HALF; 1187 p->dst_size[1] = SIZE_RETVAL; 1188 p->overlap = OVERLAP_NONE; 1189 VALIDATE (validate_sqrtrem); 1190 REFERENCE (refmpn_sqrtrem); 1191 1192 p = ¶m[TYPE_ZERO]; 1193 p->dst[0] = 1; 1194 p->size = SIZE_ALLOW_ZERO; 1195 REFERENCE (refmpn_zero); 1196 1197 p = ¶m[TYPE_GET_STR]; 1198 p->retval = 1; 1199 p->src[0] = 1; 1200 p->size = SIZE_ALLOW_ZERO; 1201 p->dst[0] = 1; 1202 p->dst[1] = 1; 1203 p->dst_size[0] = SIZE_GET_STR; 1204 p->dst_bytes[0] = 1; 1205 p->overlap = OVERLAP_NONE; 1206 REFERENCE (refmpn_get_str); 1207 1208 p = ¶m[TYPE_BINVERT]; 1209 p->dst[0] = 1; 1210 p->src[0] = 1; 1211 p->data = DATA_SRC0_ODD; 1212 p->overlap = OVERLAP_NONE; 1213 REFERENCE (refmpn_binvert); 1214 1215 p = ¶m[TYPE_INVERT]; 1216 p->dst[0] = 1; 1217 p->src[0] = 1; 1218 p->data = DATA_SRC0_HIGHBIT; 1219 p->overlap = OVERLAP_NONE; 1220 REFERENCE (refmpn_invert); 1221 1222#ifdef EXTRA_PARAM_INIT 1223 EXTRA_PARAM_INIT 1224#endif 1225} 1226 1227 1228/* The following are macros if there's no native versions, so wrap them in 1229 functions that can be in try_array[]. */ 1230 1231void 1232MPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1233{ MPN_COPY (rp, sp, size); } 1234 1235void 1236MPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1237{ MPN_COPY_INCR (rp, sp, size); } 1238 1239void 1240MPN_COPY_DECR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1241{ MPN_COPY_DECR (rp, sp, size); } 1242 1243void 1244__GMPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1245{ __GMPN_COPY (rp, sp, size); } 1246 1247#ifdef __GMPN_COPY_INCR 1248void 1249__GMPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1250{ __GMPN_COPY_INCR (rp, sp, size); } 1251#endif 1252 1253void 1254mpn_com_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1255{ mpn_com (rp, sp, size); } 1256 1257void 1258mpn_and_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1259{ mpn_and_n (rp, s1, s2, size); } 1260 1261void 1262mpn_andn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1263{ mpn_andn_n (rp, s1, s2, size); } 1264 1265void 1266mpn_nand_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1267{ mpn_nand_n (rp, s1, s2, size); } 1268 1269void 1270mpn_ior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1271{ mpn_ior_n (rp, s1, s2, size); } 1272 1273void 1274mpn_iorn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1275{ mpn_iorn_n (rp, s1, s2, size); } 1276 1277void 1278mpn_nior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1279{ mpn_nior_n (rp, s1, s2, size); } 1280 1281void 1282mpn_xor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1283{ mpn_xor_n (rp, s1, s2, size); } 1284 1285void 1286mpn_xnor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size) 1287{ mpn_xnor_n (rp, s1, s2, size); } 1288 1289mp_limb_t 1290udiv_qrnnd_fun (mp_limb_t *remptr, mp_limb_t n1, mp_limb_t n0, mp_limb_t d) 1291{ 1292 mp_limb_t q; 1293 udiv_qrnnd (q, *remptr, n1, n0, d); 1294 return q; 1295} 1296 1297mp_limb_t 1298mpn_divexact_by3_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size) 1299{ 1300 return mpn_divexact_by3 (rp, sp, size); 1301} 1302 1303mp_limb_t 1304mpn_modexact_1_odd_fun (mp_srcptr ptr, mp_size_t size, mp_limb_t divisor) 1305{ 1306 return mpn_modexact_1_odd (ptr, size, divisor); 1307} 1308 1309void 1310mpn_toom22_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size) 1311{ 1312 mp_ptr tspace; 1313 TMP_DECL; 1314 TMP_MARK; 1315 tspace = TMP_ALLOC_LIMBS (mpn_toom22_mul_itch (size, size)); 1316 mpn_toom22_mul (dst, src1, size, src2, size, tspace); 1317 TMP_FREE; 1318} 1319void 1320mpn_toom2_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size) 1321{ 1322 mp_ptr tspace; 1323 TMP_DECL; 1324 TMP_MARK; 1325 tspace = TMP_ALLOC_LIMBS (mpn_toom2_sqr_itch (size)); 1326 mpn_toom2_sqr (dst, src, size, tspace); 1327 TMP_FREE; 1328} 1329void 1330mpn_toom33_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size) 1331{ 1332 mp_ptr tspace; 1333 TMP_DECL; 1334 TMP_MARK; 1335 tspace = TMP_ALLOC_LIMBS (mpn_toom33_mul_itch (size, size)); 1336 mpn_toom33_mul (dst, src1, size, src2, size, tspace); 1337 TMP_FREE; 1338} 1339void 1340mpn_toom3_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size) 1341{ 1342 mp_ptr tspace; 1343 TMP_DECL; 1344 TMP_MARK; 1345 tspace = TMP_ALLOC_LIMBS (mpn_toom3_sqr_itch (size)); 1346 mpn_toom3_sqr (dst, src, size, tspace); 1347 TMP_FREE; 1348} 1349void 1350mpn_toom44_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size) 1351{ 1352 mp_ptr tspace; 1353 TMP_DECL; 1354 TMP_MARK; 1355 tspace = TMP_ALLOC_LIMBS (mpn_toom44_mul_itch (size, size)); 1356 mpn_toom44_mul (dst, src1, size, src2, size, tspace); 1357 TMP_FREE; 1358} 1359void 1360mpn_toom4_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size) 1361{ 1362 mp_ptr tspace; 1363 TMP_DECL; 1364 TMP_MARK; 1365 tspace = TMP_ALLOC_LIMBS (mpn_toom4_sqr_itch (size)); 1366 mpn_toom4_sqr (dst, src, size, tspace); 1367 TMP_FREE; 1368} 1369 1370mp_limb_t 1371umul_ppmm_fun (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2) 1372{ 1373 mp_limb_t high; 1374 umul_ppmm (high, *lowptr, m1, m2); 1375 return high; 1376} 1377 1378void 1379MPN_ZERO_fun (mp_ptr ptr, mp_size_t size) 1380{ MPN_ZERO (ptr, size); } 1381 1382 1383struct choice_t { 1384 const char *name; 1385 tryfun_t function; 1386 int type; 1387 mp_size_t minsize; 1388}; 1389 1390#if HAVE_STRINGIZE 1391#define TRY(fun) #fun, (tryfun_t) fun 1392#define TRY_FUNFUN(fun) #fun, (tryfun_t) fun##_fun 1393#else 1394#define TRY(fun) "fun", (tryfun_t) fun 1395#define TRY_FUNFUN(fun) "fun", (tryfun_t) fun/**/_fun 1396#endif 1397 1398const struct choice_t choice_array[] = { 1399 { TRY(mpn_add), TYPE_ADD }, 1400 { TRY(mpn_sub), TYPE_SUB }, 1401 1402 { TRY(mpn_add_n), TYPE_ADD_N }, 1403 { TRY(mpn_sub_n), TYPE_SUB_N }, 1404 1405#if HAVE_NATIVE_mpn_add_nc 1406 { TRY(mpn_add_nc), TYPE_ADD_NC }, 1407#endif 1408#if HAVE_NATIVE_mpn_sub_nc 1409 { TRY(mpn_sub_nc), TYPE_SUB_NC }, 1410#endif 1411 1412#if HAVE_NATIVE_mpn_add_n_sub_n 1413 { TRY(mpn_add_n_sub_n), TYPE_ADDSUB_N }, 1414#endif 1415#if HAVE_NATIVE_mpn_add_n_sub_nc 1416 { TRY(mpn_add_n_sub_nc), TYPE_ADDSUB_NC }, 1417#endif 1418 1419 { TRY(mpn_addmul_1), TYPE_ADDMUL_1 }, 1420 { TRY(mpn_submul_1), TYPE_SUBMUL_1 }, 1421#if HAVE_NATIVE_mpn_addmul_1c 1422 { TRY(mpn_addmul_1c), TYPE_ADDMUL_1C }, 1423#endif 1424#if HAVE_NATIVE_mpn_submul_1c 1425 { TRY(mpn_submul_1c), TYPE_SUBMUL_1C }, 1426#endif 1427 1428#if HAVE_NATIVE_mpn_addmul_2 1429 { TRY(mpn_addmul_2), TYPE_ADDMUL_2, 2 }, 1430#endif 1431#if HAVE_NATIVE_mpn_addmul_3 1432 { TRY(mpn_addmul_3), TYPE_ADDMUL_3, 3 }, 1433#endif 1434#if HAVE_NATIVE_mpn_addmul_4 1435 { TRY(mpn_addmul_4), TYPE_ADDMUL_4, 4 }, 1436#endif 1437#if HAVE_NATIVE_mpn_addmul_5 1438 { TRY(mpn_addmul_5), TYPE_ADDMUL_5, 5 }, 1439#endif 1440#if HAVE_NATIVE_mpn_addmul_6 1441 { TRY(mpn_addmul_6), TYPE_ADDMUL_6, 6 }, 1442#endif 1443#if HAVE_NATIVE_mpn_addmul_7 1444 { TRY(mpn_addmul_7), TYPE_ADDMUL_7, 7 }, 1445#endif 1446#if HAVE_NATIVE_mpn_addmul_8 1447 { TRY(mpn_addmul_8), TYPE_ADDMUL_8, 8 }, 1448#endif 1449 1450 { TRY_FUNFUN(mpn_com), TYPE_COM }, 1451 1452 { TRY_FUNFUN(MPN_COPY), TYPE_COPY }, 1453 { TRY_FUNFUN(MPN_COPY_INCR), TYPE_COPYI }, 1454 { TRY_FUNFUN(MPN_COPY_DECR), TYPE_COPYD }, 1455 1456 { TRY_FUNFUN(__GMPN_COPY), TYPE_COPY }, 1457#ifdef __GMPN_COPY_INCR 1458 { TRY_FUNFUN(__GMPN_COPY_INCR), TYPE_COPYI }, 1459#endif 1460 1461#if HAVE_NATIVE_mpn_copyi 1462 { TRY(mpn_copyi), TYPE_COPYI }, 1463#endif 1464#if HAVE_NATIVE_mpn_copyd 1465 { TRY(mpn_copyd), TYPE_COPYD }, 1466#endif 1467 1468#if HAVE_NATIVE_mpn_addlsh1_n 1469 { TRY(mpn_addlsh1_n), TYPE_ADDLSH1_N }, 1470#endif 1471#if HAVE_NATIVE_mpn_addlsh2_n 1472 { TRY(mpn_addlsh2_n), TYPE_ADDLSH2_N }, 1473#endif 1474#if HAVE_NATIVE_mpn_addlsh_n 1475 { TRY(mpn_addlsh_n), TYPE_ADDLSH_N }, 1476#endif 1477#if HAVE_NATIVE_mpn_sublsh1_n 1478 { TRY(mpn_sublsh1_n), TYPE_SUBLSH1_N }, 1479#endif 1480#if HAVE_NATIVE_mpn_sublsh_n 1481 { TRY(mpn_sublsh_n), TYPE_SUBLSH_N }, 1482#endif 1483#if HAVE_NATIVE_mpn_rsblsh1_n 1484 { TRY(mpn_rsblsh1_n), TYPE_RSBLSH1_N }, 1485#endif 1486#if HAVE_NATIVE_mpn_rsblsh2_n 1487 { TRY(mpn_rsblsh2_n), TYPE_RSBLSH2_N }, 1488#endif 1489#if HAVE_NATIVE_mpn_rsblsh_n 1490 { TRY(mpn_rsblsh_n), TYPE_RSBLSH_N }, 1491#endif 1492#if HAVE_NATIVE_mpn_rsh1add_n 1493 { TRY(mpn_rsh1add_n), TYPE_RSH1ADD_N }, 1494#endif 1495#if HAVE_NATIVE_mpn_rsh1sub_n 1496 { TRY(mpn_rsh1sub_n), TYPE_RSH1SUB_N }, 1497#endif 1498 1499 { TRY_FUNFUN(mpn_and_n), TYPE_AND_N }, 1500 { TRY_FUNFUN(mpn_andn_n), TYPE_ANDN_N }, 1501 { TRY_FUNFUN(mpn_nand_n), TYPE_NAND_N }, 1502 { TRY_FUNFUN(mpn_ior_n), TYPE_IOR_N }, 1503 { TRY_FUNFUN(mpn_iorn_n), TYPE_IORN_N }, 1504 { TRY_FUNFUN(mpn_nior_n), TYPE_NIOR_N }, 1505 { TRY_FUNFUN(mpn_xor_n), TYPE_XOR_N }, 1506 { TRY_FUNFUN(mpn_xnor_n), TYPE_XNOR_N }, 1507 1508 { TRY(mpn_divrem_1), TYPE_DIVREM_1 }, 1509#if USE_PREINV_DIVREM_1 1510 { TRY(mpn_preinv_divrem_1), TYPE_PREINV_DIVREM_1 }, 1511#endif 1512 { TRY(mpn_mod_1), TYPE_MOD_1 }, 1513#if USE_PREINV_MOD_1 1514 { TRY(mpn_preinv_mod_1), TYPE_PREINV_MOD_1 }, 1515#endif 1516#if HAVE_NATIVE_mpn_divrem_1c 1517 { TRY(mpn_divrem_1c), TYPE_DIVREM_1C }, 1518#endif 1519#if HAVE_NATIVE_mpn_mod_1c 1520 { TRY(mpn_mod_1c), TYPE_MOD_1C }, 1521#endif 1522#if GMP_NUMB_BITS % 4 == 0 1523 { TRY(mpn_mod_34lsub1), TYPE_MOD_34LSUB1 }, 1524#endif 1525 1526 { TRY_FUNFUN(udiv_qrnnd), TYPE_UDIV_QRNND, 2 }, 1527#if HAVE_NATIVE_mpn_udiv_qrnnd 1528 { TRY(mpn_udiv_qrnnd), TYPE_UDIV_QRNND, 2 }, 1529#endif 1530#if HAVE_NATIVE_mpn_udiv_qrnnd_r 1531 { TRY(mpn_udiv_qrnnd_r), TYPE_UDIV_QRNND_R, 2 }, 1532#endif 1533 1534 { TRY(mpn_divexact_1), TYPE_DIVEXACT_1 }, 1535 { TRY_FUNFUN(mpn_divexact_by3), TYPE_DIVEXACT_BY3 }, 1536 { TRY(mpn_divexact_by3c), TYPE_DIVEXACT_BY3C }, 1537 1538 { TRY_FUNFUN(mpn_modexact_1_odd), TYPE_MODEXACT_1_ODD }, 1539 { TRY(mpn_modexact_1c_odd), TYPE_MODEXACT_1C_ODD }, 1540 1541 1542 { TRY(mpn_sbpi1_div_qr), TYPE_SBPI1_DIV_QR, 3}, 1543 { TRY(mpn_tdiv_qr), TYPE_TDIV_QR }, 1544 1545 { TRY(mpn_mul_1), TYPE_MUL_1 }, 1546#if HAVE_NATIVE_mpn_mul_1c 1547 { TRY(mpn_mul_1c), TYPE_MUL_1C }, 1548#endif 1549#if HAVE_NATIVE_mpn_mul_2 1550 { TRY(mpn_mul_2), TYPE_MUL_2, 2 }, 1551#endif 1552#if HAVE_NATIVE_mpn_mul_3 1553 { TRY(mpn_mul_3), TYPE_MUL_3, 3 }, 1554#endif 1555#if HAVE_NATIVE_mpn_mul_4 1556 { TRY(mpn_mul_4), TYPE_MUL_4, 4 }, 1557#endif 1558 1559 { TRY(mpn_rshift), TYPE_RSHIFT }, 1560 { TRY(mpn_lshift), TYPE_LSHIFT }, 1561 { TRY(mpn_lshiftc), TYPE_LSHIFTC }, 1562 1563 1564 { TRY(mpn_mul_basecase), TYPE_MUL_MN }, 1565 { TRY(mpn_mullo_basecase), TYPE_MULLO_N }, 1566#if SQR_TOOM2_THRESHOLD > 0 1567 { TRY(mpn_sqr_basecase), TYPE_SQR }, 1568#endif 1569 1570 { TRY(mpn_mul), TYPE_MUL_MN }, 1571 { TRY(mpn_mul_n), TYPE_MUL_N }, 1572 { TRY(mpn_sqr), TYPE_SQR }, 1573 1574 { TRY_FUNFUN(umul_ppmm), TYPE_UMUL_PPMM, 2 }, 1575#if HAVE_NATIVE_mpn_umul_ppmm 1576 { TRY(mpn_umul_ppmm), TYPE_UMUL_PPMM, 2 }, 1577#endif 1578#if HAVE_NATIVE_mpn_umul_ppmm_r 1579 { TRY(mpn_umul_ppmm_r), TYPE_UMUL_PPMM_R, 2 }, 1580#endif 1581 1582 { TRY_FUNFUN(mpn_toom22_mul), TYPE_MUL_N, MPN_TOOM22_MUL_MINSIZE }, 1583 { TRY_FUNFUN(mpn_toom2_sqr), TYPE_SQR, MPN_TOOM2_SQR_MINSIZE }, 1584 { TRY_FUNFUN(mpn_toom33_mul), TYPE_MUL_N, MPN_TOOM33_MUL_MINSIZE }, 1585 { TRY_FUNFUN(mpn_toom3_sqr), TYPE_SQR, MPN_TOOM3_SQR_MINSIZE }, 1586 { TRY_FUNFUN(mpn_toom44_mul), TYPE_MUL_N, MPN_TOOM44_MUL_MINSIZE }, 1587 { TRY_FUNFUN(mpn_toom4_sqr), TYPE_SQR, MPN_TOOM4_SQR_MINSIZE }, 1588 1589 { TRY(mpn_gcd_1), TYPE_GCD_1 }, 1590 { TRY(mpn_gcd), TYPE_GCD }, 1591 { TRY(mpz_jacobi), TYPE_MPZ_JACOBI }, 1592 { TRY(mpz_kronecker_ui), TYPE_MPZ_KRONECKER_UI }, 1593 { TRY(mpz_kronecker_si), TYPE_MPZ_KRONECKER_SI }, 1594 { TRY(mpz_ui_kronecker), TYPE_MPZ_UI_KRONECKER }, 1595 { TRY(mpz_si_kronecker), TYPE_MPZ_SI_KRONECKER }, 1596 1597 { TRY(mpn_popcount), TYPE_POPCOUNT }, 1598 { TRY(mpn_hamdist), TYPE_HAMDIST }, 1599 1600 { TRY(mpn_sqrtrem), TYPE_SQRTREM }, 1601 1602 { TRY_FUNFUN(MPN_ZERO), TYPE_ZERO }, 1603 1604 { TRY(mpn_get_str), TYPE_GET_STR }, 1605 1606 { TRY(mpn_binvert), TYPE_BINVERT }, 1607 { TRY(mpn_invert), TYPE_INVERT }, 1608 1609#ifdef EXTRA_ROUTINES 1610 EXTRA_ROUTINES 1611#endif 1612}; 1613 1614const struct choice_t *choice = NULL; 1615 1616 1617void 1618mprotect_maybe (void *addr, size_t len, int prot) 1619{ 1620 if (!option_redzones) 1621 return; 1622 1623#if HAVE_MPROTECT 1624 if (mprotect (addr, len, prot) != 0) 1625 { 1626 fprintf (stderr, "Cannot mprotect %p 0x%X 0x%X: %s\n", 1627 addr, (unsigned) len, prot, strerror (errno)); 1628 exit (1); 1629 } 1630#else 1631 { 1632 static int warned = 0; 1633 if (!warned) 1634 { 1635 fprintf (stderr, 1636 "mprotect not available, bounds testing not performed\n"); 1637 warned = 1; 1638 } 1639 } 1640#endif 1641} 1642 1643/* round "a" up to a multiple of "m" */ 1644size_t 1645round_up_multiple (size_t a, size_t m) 1646{ 1647 unsigned long r; 1648 1649 r = a % m; 1650 if (r == 0) 1651 return a; 1652 else 1653 return a + (m - r); 1654} 1655 1656 1657/* On some systems it seems that only an mmap'ed region can be mprotect'ed, 1658 for instance HP-UX 10. 1659 1660 mmap will almost certainly return a pointer already aligned to a page 1661 boundary, but it's easy enough to share the alignment handling with the 1662 malloc case. */ 1663 1664void 1665malloc_region (struct region_t *r, mp_size_t n) 1666{ 1667 mp_ptr p; 1668 size_t nbytes; 1669 1670 ASSERT ((pagesize % BYTES_PER_MP_LIMB) == 0); 1671 1672 n = round_up_multiple (n, PAGESIZE_LIMBS); 1673 r->size = n; 1674 1675 nbytes = n*BYTES_PER_MP_LIMB + 2*REDZONE_BYTES + pagesize; 1676 1677#if defined (MAP_ANONYMOUS) && ! defined (MAP_ANON) 1678#define MAP_ANON MAP_ANONYMOUS 1679#endif 1680 1681#if HAVE_MMAP && defined (MAP_ANON) 1682 /* note must pass fd=-1 for MAP_ANON on BSD */ 1683 p = mmap (NULL, nbytes, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0); 1684 if (p == (void *) -1) 1685 { 1686 fprintf (stderr, "Cannot mmap %#x anon bytes: %s\n", 1687 (unsigned) nbytes, strerror (errno)); 1688 exit (1); 1689 } 1690#else 1691 p = (mp_ptr) malloc (nbytes); 1692 ASSERT_ALWAYS (p != NULL); 1693#endif 1694 1695 p = align_pointer (p, pagesize); 1696 1697 mprotect_maybe (p, REDZONE_BYTES, PROT_NONE); 1698 p += REDZONE_LIMBS; 1699 r->ptr = p; 1700 1701 mprotect_maybe (p + n, REDZONE_BYTES, PROT_NONE); 1702} 1703 1704void 1705mprotect_region (const struct region_t *r, int prot) 1706{ 1707 mprotect_maybe (r->ptr, r->size, prot); 1708} 1709 1710 1711/* First four entries must be 0,1,2,3 for the benefit of CARRY_BIT, CARRY_3, 1712 and CARRY_4 */ 1713mp_limb_t carry_array[] = { 1714 0, 1, 2, 3, 1715 4, 1716 CNST_LIMB(1) << 8, 1717 CNST_LIMB(1) << 16, 1718 GMP_NUMB_MAX 1719}; 1720int carry_index; 1721 1722#define CARRY_COUNT \ 1723 ((tr->carry == CARRY_BIT) ? 2 \ 1724 : tr->carry == CARRY_3 ? 3 \ 1725 : tr->carry == CARRY_4 ? 4 \ 1726 : (tr->carry == CARRY_LIMB || tr->carry == CARRY_DIVISOR) \ 1727 ? numberof(carry_array) + CARRY_RANDOMS \ 1728 : 1) 1729 1730#define MPN_RANDOM_ALT(index,dst,size) \ 1731 (((index) & 1) ? refmpn_random (dst, size) : refmpn_random2 (dst, size)) 1732 1733/* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have 1734 the same type */ 1735#define CARRY_ITERATION \ 1736 for (carry_index = 0; \ 1737 (carry_index < numberof (carry_array) \ 1738 ? (carry = carry_array[carry_index]) \ 1739 : (MPN_RANDOM_ALT (carry_index, &carry, 1), (mp_limb_t) 0)), \ 1740 (tr->carry == CARRY_DIVISOR ? carry %= divisor : 0), \ 1741 carry_index < CARRY_COUNT; \ 1742 carry_index++) 1743 1744 1745mp_limb_t multiplier_array[] = { 1746 0, 1, 2, 3, 1747 CNST_LIMB(1) << 8, 1748 CNST_LIMB(1) << 16, 1749 GMP_NUMB_MAX - 2, 1750 GMP_NUMB_MAX - 1, 1751 GMP_NUMB_MAX 1752}; 1753int multiplier_index; 1754 1755mp_limb_t divisor_array[] = { 1756 1, 2, 3, 1757 CNST_LIMB(1) << 8, 1758 CNST_LIMB(1) << 16, 1759 CNST_LIMB(1) << (GMP_NUMB_BITS/2 - 1), 1760 GMP_NUMB_MAX >> (GMP_NUMB_BITS/2), 1761 GMP_NUMB_HIGHBIT, 1762 GMP_NUMB_HIGHBIT + 1, 1763 GMP_NUMB_MAX - 2, 1764 GMP_NUMB_MAX - 1, 1765 GMP_NUMB_MAX 1766}; 1767 1768int divisor_index; 1769 1770/* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have 1771 the same type */ 1772#define ARRAY_ITERATION(var, index, limit, array, randoms, cond) \ 1773 for (index = 0; \ 1774 (index < numberof (array) \ 1775 ? (var = array[index]) \ 1776 : (MPN_RANDOM_ALT (index, &var, 1), (mp_limb_t) 0)), \ 1777 index < limit; \ 1778 index++) 1779 1780#define MULTIPLIER_COUNT \ 1781 (tr->multiplier \ 1782 ? numberof (multiplier_array) + MULTIPLIER_RANDOMS \ 1783 : 1) 1784 1785#define MULTIPLIER_ITERATION \ 1786 ARRAY_ITERATION(multiplier, multiplier_index, MULTIPLIER_COUNT, \ 1787 multiplier_array, MULTIPLIER_RANDOMS, TRY_MULTIPLIER) 1788 1789#define DIVISOR_COUNT \ 1790 (tr->divisor \ 1791 ? numberof (divisor_array) + DIVISOR_RANDOMS \ 1792 : 1) 1793 1794#define DIVISOR_ITERATION \ 1795 ARRAY_ITERATION(divisor, divisor_index, DIVISOR_COUNT, divisor_array, \ 1796 DIVISOR_RANDOMS, TRY_DIVISOR) 1797 1798 1799/* overlap_array[].s[i] is where s[i] should be, 0 or 1 means overlapping 1800 d[0] or d[1] respectively, -1 means a separate (write-protected) 1801 location. */ 1802 1803struct overlap_t { 1804 int s[NUM_SOURCES]; 1805} overlap_array[] = { 1806 { { -1, -1 } }, 1807 { { 0, -1 } }, 1808 { { -1, 0 } }, 1809 { { 0, 0 } }, 1810 { { 1, -1 } }, 1811 { { -1, 1 } }, 1812 { { 1, 1 } }, 1813 { { 0, 1 } }, 1814 { { 1, 0 } }, 1815}; 1816 1817struct overlap_t *overlap, *overlap_limit; 1818 1819#define OVERLAP_COUNT \ 1820 (tr->overlap & OVERLAP_NONE ? 1 \ 1821 : tr->overlap & OVERLAP_NOT_SRCS ? 3 \ 1822 : tr->overlap & OVERLAP_NOT_SRC2 ? 2 \ 1823 : tr->dst[1] ? 9 \ 1824 : tr->src[1] ? 4 \ 1825 : tr->dst[0] ? 2 \ 1826 : 1) 1827 1828#define OVERLAP_ITERATION \ 1829 for (overlap = &overlap_array[0], \ 1830 overlap_limit = &overlap_array[OVERLAP_COUNT]; \ 1831 overlap < overlap_limit; \ 1832 overlap++) 1833 1834 1835int base = 10; 1836 1837#define T_RAND_COUNT 2 1838int t_rand; 1839 1840void 1841t_random (mp_ptr ptr, mp_size_t n) 1842{ 1843 if (n == 0) 1844 return; 1845 1846 switch (option_data) { 1847 case DATA_TRAND: 1848 switch (t_rand) { 1849 case 0: refmpn_random (ptr, n); break; 1850 case 1: refmpn_random2 (ptr, n); break; 1851 default: abort(); 1852 } 1853 break; 1854 case DATA_SEQ: 1855 { 1856 static mp_limb_t counter = 0; 1857 mp_size_t i; 1858 for (i = 0; i < n; i++) 1859 ptr[i] = ++counter; 1860 } 1861 break; 1862 case DATA_ZEROS: 1863 refmpn_zero (ptr, n); 1864 break; 1865 case DATA_FFS: 1866 refmpn_fill (ptr, n, GMP_NUMB_MAX); 1867 break; 1868 case DATA_2FD: 1869 /* Special value 0x2FFF...FFFD, which divided by 3 gives 0xFFF...FFF, 1870 inducing the q1_ff special case in the mul-by-inverse part of some 1871 versions of divrem_1 and mod_1. */ 1872 refmpn_fill (ptr, n, (mp_limb_t) -1); 1873 ptr[n-1] = 2; 1874 ptr[0] -= 2; 1875 break; 1876 1877 default: 1878 abort(); 1879 } 1880} 1881#define T_RAND_ITERATION \ 1882 for (t_rand = 0; t_rand < T_RAND_COUNT; t_rand++) 1883 1884 1885void 1886print_each (const struct each_t *e) 1887{ 1888 int i; 1889 1890 printf ("%s %s\n", e->name, e == &ref ? tr->reference_name : choice->name); 1891 if (tr->retval) 1892 mpn_trace (" retval", &e->retval, 1); 1893 1894 for (i = 0; i < NUM_DESTS; i++) 1895 { 1896 if (tr->dst[i]) 1897 { 1898 if (tr->dst_bytes[i]) 1899 byte_tracen (" d[%d]", i, e->d[i].p, d[i].size); 1900 else 1901 mpn_tracen (" d[%d]", i, e->d[i].p, d[i].size); 1902 printf (" located %p\n", (void *) (e->d[i].p)); 1903 } 1904 } 1905 1906 for (i = 0; i < NUM_SOURCES; i++) 1907 if (tr->src[i]) 1908 printf (" s[%d] located %p\n", i, (void *) (e->s[i].p)); 1909} 1910 1911 1912void 1913print_all (void) 1914{ 1915 int i; 1916 1917 printf ("\n"); 1918 printf ("size %ld\n", (long) size); 1919 if (tr->size2) 1920 printf ("size2 %ld\n", (long) size2); 1921 1922 for (i = 0; i < NUM_DESTS; i++) 1923 if (d[i].size != size) 1924 printf ("d[%d].size %ld\n", i, (long) d[i].size); 1925 1926 if (tr->multiplier) 1927 mpn_trace (" multiplier", &multiplier, 1); 1928 if (tr->divisor) 1929 mpn_trace (" divisor", &divisor, 1); 1930 if (tr->shift) 1931 printf (" shift %lu\n", shift); 1932 if (tr->carry) 1933 mpn_trace (" carry", &carry, 1); 1934 if (tr->msize) 1935 mpn_trace (" multiplier_N", multiplier_N, tr->msize); 1936 1937 for (i = 0; i < NUM_DESTS; i++) 1938 if (tr->dst[i]) 1939 printf (" d[%d] %s, align %ld, size %ld\n", 1940 i, d[i].high ? "high" : "low", 1941 (long) d[i].align, (long) d[i].size); 1942 1943 for (i = 0; i < NUM_SOURCES; i++) 1944 { 1945 if (tr->src[i]) 1946 { 1947 printf (" s[%d] %s, align %ld, ", 1948 i, s[i].high ? "high" : "low", (long) s[i].align); 1949 switch (overlap->s[i]) { 1950 case -1: 1951 printf ("no overlap\n"); 1952 break; 1953 default: 1954 printf ("==d[%d]%s\n", 1955 overlap->s[i], 1956 tr->overlap == OVERLAP_LOW_TO_HIGH ? "+a" 1957 : tr->overlap == OVERLAP_HIGH_TO_LOW ? "-a" 1958 : ""); 1959 break; 1960 } 1961 printf (" s[%d]=", i); 1962 if (tr->carry_sign && (carry & (1 << i))) 1963 printf ("-"); 1964 mpn_trace (NULL, s[i].p, SRC_SIZE(i)); 1965 } 1966 } 1967 1968 if (tr->dst0_from_src1) 1969 mpn_trace (" d[0]", s[1].region.ptr, size); 1970 1971 if (tr->reference) 1972 print_each (&ref); 1973 print_each (&fun); 1974} 1975 1976void 1977compare (void) 1978{ 1979 int error = 0; 1980 int i; 1981 1982 if (tr->retval && ref.retval != fun.retval) 1983 { 1984 gmp_printf ("Different return values (%Mu, %Mu)\n", 1985 ref.retval, fun.retval); 1986 error = 1; 1987 } 1988 1989 for (i = 0; i < NUM_DESTS; i++) 1990 { 1991 switch (tr->dst_size[i]) { 1992 case SIZE_RETVAL: 1993 case SIZE_GET_STR: 1994 d[i].size = ref.retval; 1995 break; 1996 } 1997 } 1998 1999 for (i = 0; i < NUM_DESTS; i++) 2000 { 2001 if (! tr->dst[i]) 2002 continue; 2003 2004 if (tr->dst_bytes[i]) 2005 { 2006 if (memcmp (ref.d[i].p, fun.d[i].p, d[i].size) != 0) 2007 { 2008 printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n", 2009 i, 2010 (long) byte_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size), 2011 (long) byte_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size)); 2012 error = 1; 2013 } 2014 } 2015 else 2016 { 2017 if (d[i].size != 0 2018 && ! refmpn_equal_anynail (ref.d[i].p, fun.d[i].p, d[i].size)) 2019 { 2020 printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n", 2021 i, 2022 (long) mpn_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size), 2023 (long) mpn_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size)); 2024 error = 1; 2025 } 2026 } 2027 } 2028 2029 if (error) 2030 { 2031 print_all(); 2032 abort(); 2033 } 2034} 2035 2036 2037/* The functions are cast if the return value should be a long rather than 2038 the default mp_limb_t. This is necessary under _LONG_LONG_LIMB. This 2039 might not be enough if some actual calling conventions checking is 2040 implemented on a long long limb system. */ 2041 2042void 2043call (struct each_t *e, tryfun_t function) 2044{ 2045 switch (choice->type) { 2046 case TYPE_ADD: 2047 case TYPE_SUB: 2048 e->retval = CALLING_CONVENTIONS (function) 2049 (e->d[0].p, e->s[0].p, size, e->s[1].p, size2); 2050 break; 2051 2052 case TYPE_ADD_N: 2053 case TYPE_SUB_N: 2054 case TYPE_ADDLSH1_N: 2055 case TYPE_ADDLSH2_N: 2056 case TYPE_SUBLSH1_N: 2057 case TYPE_RSBLSH1_N: 2058 case TYPE_RSBLSH2_N: 2059 case TYPE_RSH1ADD_N: 2060 case TYPE_RSH1SUB_N: 2061 e->retval = CALLING_CONVENTIONS (function) 2062 (e->d[0].p, e->s[0].p, e->s[1].p, size); 2063 break; 2064 case TYPE_ADDLSH_N: 2065 case TYPE_SUBLSH_N: 2066 case TYPE_RSBLSH_N: 2067 e->retval = CALLING_CONVENTIONS (function) 2068 (e->d[0].p, e->s[0].p, e->s[1].p, size, shift); 2069 break; 2070 case TYPE_ADD_NC: 2071 case TYPE_SUB_NC: 2072 e->retval = CALLING_CONVENTIONS (function) 2073 (e->d[0].p, e->s[0].p, e->s[1].p, size, carry); 2074 break; 2075 2076 case TYPE_MUL_1: 2077 case TYPE_ADDMUL_1: 2078 case TYPE_SUBMUL_1: 2079 e->retval = CALLING_CONVENTIONS (function) 2080 (e->d[0].p, e->s[0].p, size, multiplier); 2081 break; 2082 case TYPE_MUL_1C: 2083 case TYPE_ADDMUL_1C: 2084 case TYPE_SUBMUL_1C: 2085 e->retval = CALLING_CONVENTIONS (function) 2086 (e->d[0].p, e->s[0].p, size, multiplier, carry); 2087 break; 2088 2089 case TYPE_MUL_2: 2090 case TYPE_MUL_3: 2091 case TYPE_MUL_4: 2092 if (size == 1) 2093 abort (); 2094 e->retval = CALLING_CONVENTIONS (function) 2095 (e->d[0].p, e->s[0].p, size, multiplier_N); 2096 break; 2097 2098 case TYPE_ADDMUL_2: 2099 case TYPE_ADDMUL_3: 2100 case TYPE_ADDMUL_4: 2101 case TYPE_ADDMUL_5: 2102 case TYPE_ADDMUL_6: 2103 case TYPE_ADDMUL_7: 2104 case TYPE_ADDMUL_8: 2105 if (size == 1) 2106 abort (); 2107 e->retval = CALLING_CONVENTIONS (function) 2108 (e->d[0].p, e->s[0].p, size, multiplier_N); 2109 break; 2110 2111 case TYPE_AND_N: 2112 case TYPE_ANDN_N: 2113 case TYPE_NAND_N: 2114 case TYPE_IOR_N: 2115 case TYPE_IORN_N: 2116 case TYPE_NIOR_N: 2117 case TYPE_XOR_N: 2118 case TYPE_XNOR_N: 2119 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size); 2120 break; 2121 2122 case TYPE_ADDSUB_N: 2123 e->retval = CALLING_CONVENTIONS (function) 2124 (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size); 2125 break; 2126 case TYPE_ADDSUB_NC: 2127 e->retval = CALLING_CONVENTIONS (function) 2128 (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size, carry); 2129 break; 2130 2131 case TYPE_COPY: 2132 case TYPE_COPYI: 2133 case TYPE_COPYD: 2134 case TYPE_COM: 2135 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size); 2136 break; 2137 2138 2139 case TYPE_DIVEXACT_BY3: 2140 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size); 2141 break; 2142 case TYPE_DIVEXACT_BY3C: 2143 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, 2144 carry); 2145 break; 2146 2147 2148 case TYPE_DIVMOD_1: 2149 case TYPE_DIVEXACT_1: 2150 e->retval = CALLING_CONVENTIONS (function) 2151 (e->d[0].p, e->s[0].p, size, divisor); 2152 break; 2153 case TYPE_DIVMOD_1C: 2154 e->retval = CALLING_CONVENTIONS (function) 2155 (e->d[0].p, e->s[0].p, size, divisor, carry); 2156 break; 2157 case TYPE_DIVREM_1: 2158 e->retval = CALLING_CONVENTIONS (function) 2159 (e->d[0].p, size2, e->s[0].p, size, divisor); 2160 break; 2161 case TYPE_DIVREM_1C: 2162 e->retval = CALLING_CONVENTIONS (function) 2163 (e->d[0].p, size2, e->s[0].p, size, divisor, carry); 2164 break; 2165 case TYPE_PREINV_DIVREM_1: 2166 { 2167 mp_limb_t dinv; 2168 unsigned shift; 2169 shift = refmpn_count_leading_zeros (divisor); 2170 dinv = refmpn_invert_limb (divisor << shift); 2171 e->retval = CALLING_CONVENTIONS (function) 2172 (e->d[0].p, size2, e->s[0].p, size, divisor, dinv, shift); 2173 } 2174 break; 2175 case TYPE_MOD_1: 2176 case TYPE_MODEXACT_1_ODD: 2177 e->retval = CALLING_CONVENTIONS (function) 2178 (e->s[0].p, size, divisor); 2179 break; 2180 case TYPE_MOD_1C: 2181 case TYPE_MODEXACT_1C_ODD: 2182 e->retval = CALLING_CONVENTIONS (function) 2183 (e->s[0].p, size, divisor, carry); 2184 break; 2185 case TYPE_PREINV_MOD_1: 2186 e->retval = CALLING_CONVENTIONS (function) 2187 (e->s[0].p, size, divisor, refmpn_invert_limb (divisor)); 2188 break; 2189 case TYPE_MOD_34LSUB1: 2190 e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size); 2191 break; 2192 2193 case TYPE_UDIV_QRNND: 2194 e->retval = CALLING_CONVENTIONS (function) 2195 (e->d[0].p, e->s[0].p[1], e->s[0].p[0], divisor); 2196 break; 2197 case TYPE_UDIV_QRNND_R: 2198 e->retval = CALLING_CONVENTIONS (function) 2199 (e->s[0].p[1], e->s[0].p[0], divisor, e->d[0].p); 2200 break; 2201 2202 case TYPE_SBPI1_DIV_QR: 2203 { 2204 gmp_pi1_t dinv; 2205 invert_pi1 (dinv, e->s[1].p[size2-1], e->s[1].p[size2-2]); /* FIXME: use refinvert_pi1 */ 2206 refmpn_copyi (e->d[1].p, e->s[0].p, size); /* dividend */ 2207 refmpn_fill (e->d[0].p, size-size2, 0x98765432); /* quotient */ 2208 e->retval = CALLING_CONVENTIONS (function) 2209 (e->d[0].p, e->d[1].p, size, e->s[1].p, size2, dinv.inv32); 2210 refmpn_zero (e->d[1].p+size2, size-size2); /* excess over remainder */ 2211 } 2212 break; 2213 2214 case TYPE_TDIV_QR: 2215 CALLING_CONVENTIONS (function) (e->d[0].p, e->d[1].p, 0, 2216 e->s[0].p, size, e->s[1].p, size2); 2217 break; 2218 2219 case TYPE_GCD_1: 2220 /* Must have a non-zero src, but this probably isn't the best way to do 2221 it. */ 2222 if (refmpn_zero_p (e->s[0].p, size)) 2223 e->retval = 0; 2224 else 2225 e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size, divisor); 2226 break; 2227 2228 case TYPE_GCD: 2229 /* Sources are destroyed, so they're saved and replaced, but a general 2230 approach to this might be better. Note that it's still e->s[0].p and 2231 e->s[1].p that are passed, to get the desired alignments. */ 2232 { 2233 mp_ptr s0 = refmpn_malloc_limbs (size); 2234 mp_ptr s1 = refmpn_malloc_limbs (size2); 2235 refmpn_copyi (s0, e->s[0].p, size); 2236 refmpn_copyi (s1, e->s[1].p, size2); 2237 2238 mprotect_region (&s[0].region, PROT_READ|PROT_WRITE); 2239 mprotect_region (&s[1].region, PROT_READ|PROT_WRITE); 2240 e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, 2241 e->s[0].p, size, 2242 e->s[1].p, size2); 2243 refmpn_copyi (e->s[0].p, s0, size); 2244 refmpn_copyi (e->s[1].p, s1, size2); 2245 free (s0); 2246 free (s1); 2247 } 2248 break; 2249 2250 case TYPE_GCD_FINDA: 2251 { 2252 /* FIXME: do this with a flag */ 2253 mp_limb_t c[2]; 2254 c[0] = e->s[0].p[0]; 2255 c[0] += (c[0] == 0); 2256 c[1] = e->s[0].p[0]; 2257 c[1] += (c[1] == 0); 2258 e->retval = CALLING_CONVENTIONS (function) (c); 2259 } 2260 break; 2261 2262 case TYPE_MPZ_JACOBI: 2263 case TYPE_MPZ_KRONECKER: 2264 { 2265 mpz_t a, b; 2266 PTR(a) = e->s[0].p; SIZ(a) = ((carry&1)==0 ? size : -size); 2267 PTR(b) = e->s[1].p; SIZ(b) = ((carry&2)==0 ? size2 : -size2); 2268 e->retval = CALLING_CONVENTIONS (function) (a, b); 2269 } 2270 break; 2271 case TYPE_MPZ_KRONECKER_UI: 2272 { 2273 mpz_t a; 2274 PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size); 2275 e->retval = CALLING_CONVENTIONS(function) (a, (unsigned long)multiplier); 2276 } 2277 break; 2278 case TYPE_MPZ_KRONECKER_SI: 2279 { 2280 mpz_t a; 2281 PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size); 2282 e->retval = CALLING_CONVENTIONS (function) (a, (long) multiplier); 2283 } 2284 break; 2285 case TYPE_MPZ_UI_KRONECKER: 2286 { 2287 mpz_t b; 2288 PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size); 2289 e->retval = CALLING_CONVENTIONS(function) ((unsigned long)multiplier, b); 2290 } 2291 break; 2292 case TYPE_MPZ_SI_KRONECKER: 2293 { 2294 mpz_t b; 2295 PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size); 2296 e->retval = CALLING_CONVENTIONS (function) ((long) multiplier, b); 2297 } 2298 break; 2299 2300 case TYPE_MUL_MN: 2301 CALLING_CONVENTIONS (function) 2302 (e->d[0].p, e->s[0].p, size, e->s[1].p, size2); 2303 break; 2304 case TYPE_MUL_N: 2305 case TYPE_MULLO_N: 2306 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size); 2307 break; 2308 case TYPE_SQR: 2309 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size); 2310 break; 2311 2312 case TYPE_UMUL_PPMM: 2313 e->retval = CALLING_CONVENTIONS (function) 2314 (e->d[0].p, e->s[0].p[0], e->s[0].p[1]); 2315 break; 2316 case TYPE_UMUL_PPMM_R: 2317 e->retval = CALLING_CONVENTIONS (function) 2318 (e->s[0].p[0], e->s[0].p[1], e->d[0].p); 2319 break; 2320 2321 case TYPE_LSHIFT: 2322 case TYPE_LSHIFTC: 2323 case TYPE_RSHIFT: 2324 e->retval = CALLING_CONVENTIONS (function) 2325 (e->d[0].p, e->s[0].p, size, shift); 2326 break; 2327 2328 case TYPE_POPCOUNT: 2329 e->retval = (* (unsigned long (*)(ANYARGS)) 2330 CALLING_CONVENTIONS (function)) (e->s[0].p, size); 2331 break; 2332 case TYPE_HAMDIST: 2333 e->retval = (* (unsigned long (*)(ANYARGS)) 2334 CALLING_CONVENTIONS (function)) (e->s[0].p, e->s[1].p, size); 2335 break; 2336 2337 case TYPE_SQRTREM: 2338 e->retval = (* (long (*)(ANYARGS)) CALLING_CONVENTIONS (function)) 2339 (e->d[0].p, e->d[1].p, e->s[0].p, size); 2340 break; 2341 2342 case TYPE_ZERO: 2343 CALLING_CONVENTIONS (function) (e->d[0].p, size); 2344 break; 2345 2346 case TYPE_GET_STR: 2347 { 2348 size_t sizeinbase, fill; 2349 char *dst; 2350 MPN_SIZEINBASE (sizeinbase, e->s[0].p, size, base); 2351 ASSERT_ALWAYS (sizeinbase <= d[0].size); 2352 fill = d[0].size - sizeinbase; 2353 if (d[0].high) 2354 { 2355 memset (e->d[0].p, 0xBA, fill); 2356 dst = (char *) e->d[0].p + fill; 2357 } 2358 else 2359 { 2360 dst = (char *) e->d[0].p; 2361 memset (dst + sizeinbase, 0xBA, fill); 2362 } 2363 if (POW2_P (base)) 2364 { 2365 e->retval = CALLING_CONVENTIONS (function) (dst, base, 2366 e->s[0].p, size); 2367 } 2368 else 2369 { 2370 refmpn_copy (e->d[1].p, e->s[0].p, size); 2371 e->retval = CALLING_CONVENTIONS (function) (dst, base, 2372 e->d[1].p, size); 2373 } 2374 refmpn_zero (e->d[1].p, size); /* clobbered or unused */ 2375 } 2376 break; 2377 2378 case TYPE_INVERT: 2379 { 2380 mp_ptr scratch; 2381 TMP_DECL; 2382 TMP_MARK; 2383 scratch = TMP_ALLOC_LIMBS (mpn_invert_itch (size)); 2384 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, scratch); 2385 TMP_FREE; 2386 } 2387 break; 2388 case TYPE_BINVERT: 2389 { 2390 mp_ptr scratch; 2391 TMP_DECL; 2392 TMP_MARK; 2393 scratch = TMP_ALLOC_LIMBS (mpn_binvert_itch (size)); 2394 CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, scratch); 2395 TMP_FREE; 2396 } 2397 break; 2398 2399#ifdef EXTRA_CALL 2400 EXTRA_CALL 2401#endif 2402 2403 default: 2404 printf ("Unknown routine type %d\n", choice->type); 2405 abort (); 2406 break; 2407 } 2408} 2409 2410 2411void 2412pointer_setup (struct each_t *e) 2413{ 2414 int i, j; 2415 2416 for (i = 0; i < NUM_DESTS; i++) 2417 { 2418 switch (tr->dst_size[i]) { 2419 case 0: 2420 case SIZE_RETVAL: /* will be adjusted later */ 2421 d[i].size = size; 2422 break; 2423 2424 case SIZE_1: 2425 d[i].size = 1; 2426 break; 2427 case SIZE_2: 2428 d[i].size = 2; 2429 break; 2430 case SIZE_3: 2431 d[i].size = 3; 2432 break; 2433 2434 case SIZE_PLUS_1: 2435 d[i].size = size+1; 2436 break; 2437 case SIZE_PLUS_MSIZE_SUB_1: 2438 d[i].size = size + tr->msize - 1; 2439 break; 2440 2441 case SIZE_SUM: 2442 if (tr->size2) 2443 d[i].size = size + size2; 2444 else 2445 d[i].size = 2*size; 2446 break; 2447 2448 case SIZE_SIZE2: 2449 d[i].size = size2; 2450 break; 2451 2452 case SIZE_DIFF: 2453 d[i].size = size - size2; 2454 break; 2455 2456 case SIZE_DIFF_PLUS_1: 2457 d[i].size = size - size2 + 1; 2458 break; 2459 2460 case SIZE_CEIL_HALF: 2461 d[i].size = (size+1)/2; 2462 break; 2463 2464 case SIZE_GET_STR: 2465 { 2466 mp_limb_t ff = GMP_NUMB_MAX; 2467 MPN_SIZEINBASE (d[i].size, &ff - (size-1), size, base); 2468 } 2469 break; 2470 2471 default: 2472 printf ("Unrecognised dst_size type %d\n", tr->dst_size[i]); 2473 abort (); 2474 } 2475 } 2476 2477 /* establish e->d[].p destinations */ 2478 for (i = 0; i < NUM_DESTS; i++) 2479 { 2480 mp_size_t offset = 0; 2481 2482 /* possible room for overlapping sources */ 2483 for (j = 0; j < numberof (overlap->s); j++) 2484 if (overlap->s[j] == i) 2485 offset = MAX (offset, s[j].align); 2486 2487 if (d[i].high) 2488 { 2489 if (tr->dst_bytes[i]) 2490 { 2491 e->d[i].p = (mp_ptr) 2492 ((char *) (e->d[i].region.ptr + e->d[i].region.size) 2493 - d[i].size - d[i].align); 2494 } 2495 else 2496 { 2497 e->d[i].p = e->d[i].region.ptr + e->d[i].region.size 2498 - d[i].size - d[i].align; 2499 if (tr->overlap == OVERLAP_LOW_TO_HIGH) 2500 e->d[i].p -= offset; 2501 } 2502 } 2503 else 2504 { 2505 if (tr->dst_bytes[i]) 2506 { 2507 e->d[i].p = (mp_ptr) ((char *) e->d[i].region.ptr + d[i].align); 2508 } 2509 else 2510 { 2511 e->d[i].p = e->d[i].region.ptr + d[i].align; 2512 if (tr->overlap == OVERLAP_HIGH_TO_LOW) 2513 e->d[i].p += offset; 2514 } 2515 } 2516 } 2517 2518 /* establish e->s[].p sources */ 2519 for (i = 0; i < NUM_SOURCES; i++) 2520 { 2521 int o = overlap->s[i]; 2522 switch (o) { 2523 case -1: 2524 /* no overlap */ 2525 e->s[i].p = s[i].p; 2526 break; 2527 case 0: 2528 case 1: 2529 /* overlap with d[o] */ 2530 if (tr->overlap == OVERLAP_HIGH_TO_LOW) 2531 e->s[i].p = e->d[o].p - s[i].align; 2532 else if (tr->overlap == OVERLAP_LOW_TO_HIGH) 2533 e->s[i].p = e->d[o].p + s[i].align; 2534 else if (tr->size2 == SIZE_FRACTION) 2535 e->s[i].p = e->d[o].p + size2; 2536 else 2537 e->s[i].p = e->d[o].p; 2538 break; 2539 default: 2540 abort(); 2541 break; 2542 } 2543 } 2544} 2545 2546 2547void 2548validate_fail (void) 2549{ 2550 if (tr->reference) 2551 { 2552 trap_location = TRAP_REF; 2553 call (&ref, tr->reference); 2554 trap_location = TRAP_NOWHERE; 2555 } 2556 2557 print_all(); 2558 abort(); 2559} 2560 2561 2562void 2563try_one (void) 2564{ 2565 int i; 2566 2567 if (option_spinner) 2568 spinner(); 2569 spinner_count++; 2570 2571 trap_location = TRAP_SETUPS; 2572 2573 if (tr->divisor == DIVISOR_NORM) 2574 divisor |= GMP_NUMB_HIGHBIT; 2575 if (tr->divisor == DIVISOR_ODD) 2576 divisor |= 1; 2577 2578 for (i = 0; i < NUM_SOURCES; i++) 2579 { 2580 if (s[i].high) 2581 s[i].p = s[i].region.ptr + s[i].region.size - SRC_SIZE(i) - s[i].align; 2582 else 2583 s[i].p = s[i].region.ptr + s[i].align; 2584 } 2585 2586 pointer_setup (&ref); 2587 pointer_setup (&fun); 2588 2589 ref.retval = 0x04152637; 2590 fun.retval = 0x8C9DAEBF; 2591 2592 t_random (multiplier_N, tr->msize); 2593 2594 for (i = 0; i < NUM_SOURCES; i++) 2595 { 2596 if (! tr->src[i]) 2597 continue; 2598 2599 mprotect_region (&s[i].region, PROT_READ|PROT_WRITE); 2600 t_random (s[i].p, SRC_SIZE(i)); 2601 2602 switch (tr->data) { 2603 case DATA_NON_ZERO: 2604 if (refmpn_zero_p (s[i].p, SRC_SIZE(i))) 2605 s[i].p[0] = 1; 2606 break; 2607 2608 case DATA_MULTIPLE_DIVISOR: 2609 /* same number of low zero bits as divisor */ 2610 s[i].p[0] &= ~ LOW_ZEROS_MASK (divisor); 2611 refmpn_sub_1 (s[i].p, s[i].p, size, 2612 refmpn_mod_1 (s[i].p, size, divisor)); 2613 break; 2614 2615 case DATA_GCD: 2616 /* s[1] no more bits than s[0] */ 2617 if (i == 1 && size2 == size) 2618 s[1].p[size-1] &= refmpn_msbone_mask (s[0].p[size-1]); 2619 2620 /* high limb non-zero */ 2621 s[i].p[SRC_SIZE(i)-1] += (s[i].p[SRC_SIZE(i)-1] == 0); 2622 2623 /* odd */ 2624 s[i].p[0] |= 1; 2625 break; 2626 2627 case DATA_SRC0_ODD: 2628 if (i == 0) 2629 s[i].p[0] |= 1; 2630 break; 2631 2632 case DATA_SRC1_ODD: 2633 if (i == 1) 2634 s[i].p[0] |= 1; 2635 break; 2636 2637 case DATA_SRC1_HIGHBIT: 2638 if (i == 1) 2639 { 2640 if (tr->size2) 2641 s[i].p[size2-1] |= GMP_NUMB_HIGHBIT; 2642 else 2643 s[i].p[size-1] |= GMP_NUMB_HIGHBIT; 2644 } 2645 break; 2646 2647 case DATA_SRC0_HIGHBIT: 2648 if (i == 0) 2649 { 2650 s[i].p[size-1] |= GMP_NUMB_HIGHBIT; 2651 } 2652 break; 2653 2654 case DATA_UDIV_QRNND: 2655 s[i].p[1] %= divisor; 2656 break; 2657 } 2658 2659 mprotect_region (&s[i].region, PROT_READ); 2660 } 2661 2662 for (i = 0; i < NUM_DESTS; i++) 2663 { 2664 if (! tr->dst[i]) 2665 continue; 2666 2667 if (tr->dst0_from_src1 && i==0) 2668 { 2669 mp_size_t copy = MIN (d[0].size, SRC_SIZE(1)); 2670 mp_size_t fill = MAX (0, d[0].size - copy); 2671 MPN_COPY (fun.d[0].p, s[1].region.ptr, copy); 2672 MPN_COPY (ref.d[0].p, s[1].region.ptr, copy); 2673 refmpn_fill (fun.d[0].p + copy, fill, DEADVAL); 2674 refmpn_fill (ref.d[0].p + copy, fill, DEADVAL); 2675 } 2676 else if (tr->dst_bytes[i]) 2677 { 2678 memset (ref.d[i].p, 0xBA, d[i].size); 2679 memset (fun.d[i].p, 0xBA, d[i].size); 2680 } 2681 else 2682 { 2683 refmpn_fill (ref.d[i].p, d[i].size, DEADVAL); 2684 refmpn_fill (fun.d[i].p, d[i].size, DEADVAL); 2685 } 2686 } 2687 2688 for (i = 0; i < NUM_SOURCES; i++) 2689 { 2690 if (! tr->src[i]) 2691 continue; 2692 2693 if (ref.s[i].p != s[i].p) 2694 { 2695 refmpn_copyi (ref.s[i].p, s[i].p, SRC_SIZE(i)); 2696 refmpn_copyi (fun.s[i].p, s[i].p, SRC_SIZE(i)); 2697 } 2698 } 2699 2700 if (option_print) 2701 print_all(); 2702 2703 if (tr->validate != NULL) 2704 { 2705 trap_location = TRAP_FUN; 2706 call (&fun, choice->function); 2707 trap_location = TRAP_NOWHERE; 2708 2709 if (! CALLING_CONVENTIONS_CHECK ()) 2710 { 2711 print_all(); 2712 abort(); 2713 } 2714 2715 (*tr->validate) (); 2716 } 2717 else 2718 { 2719 trap_location = TRAP_REF; 2720 call (&ref, tr->reference); 2721 trap_location = TRAP_FUN; 2722 call (&fun, choice->function); 2723 trap_location = TRAP_NOWHERE; 2724 2725 if (! CALLING_CONVENTIONS_CHECK ()) 2726 { 2727 print_all(); 2728 abort(); 2729 } 2730 2731 compare (); 2732 } 2733} 2734 2735 2736#define SIZE_ITERATION \ 2737 for (size = MAX3 (option_firstsize, \ 2738 choice->minsize, \ 2739 (tr->size == SIZE_ALLOW_ZERO) ? 0 : 1); \ 2740 size <= option_lastsize; \ 2741 size++) 2742 2743#define SIZE2_FIRST \ 2744 (tr->size2 == SIZE_2 ? 2 \ 2745 : tr->size2 == SIZE_FRACTION ? option_firstsize2 \ 2746 : tr->size2 ? \ 2747 MAX (choice->minsize, (option_firstsize2 != 0 \ 2748 ? option_firstsize2 : 1)) \ 2749 : 0) 2750 2751#define SIZE2_LAST \ 2752 (tr->size2 == SIZE_2 ? 2 \ 2753 : tr->size2 == SIZE_FRACTION ? FRACTION_COUNT-1 \ 2754 : tr->size2 ? size \ 2755 : 0) 2756 2757#define SIZE2_ITERATION \ 2758 for (size2 = SIZE2_FIRST; size2 <= SIZE2_LAST; size2++) 2759 2760#define ALIGN_COUNT(cond) ((cond) ? ALIGNMENTS : 1) 2761#define ALIGN_ITERATION(w,n,cond) \ 2762 for (w[n].align = 0; w[n].align < ALIGN_COUNT(cond); w[n].align++) 2763 2764#define HIGH_LIMIT(cond) ((cond) != 0) 2765#define HIGH_COUNT(cond) (HIGH_LIMIT (cond) + 1) 2766#define HIGH_ITERATION(w,n,cond) \ 2767 for (w[n].high = 0; w[n].high <= HIGH_LIMIT(cond); w[n].high++) 2768 2769#define SHIFT_LIMIT \ 2770 ((unsigned long) (tr->shift ? GMP_NUMB_BITS -1 : 1)) 2771 2772#define SHIFT_ITERATION \ 2773 for (shift = 1; shift <= SHIFT_LIMIT; shift++) 2774 2775 2776void 2777try_many (void) 2778{ 2779 int i; 2780 2781 { 2782 unsigned long total = 1; 2783 2784 total *= option_repetitions; 2785 total *= option_lastsize; 2786 if (tr->size2 == SIZE_FRACTION) total *= FRACTION_COUNT; 2787 else if (tr->size2) total *= (option_lastsize+1)/2; 2788 2789 total *= SHIFT_LIMIT; 2790 total *= MULTIPLIER_COUNT; 2791 total *= DIVISOR_COUNT; 2792 total *= CARRY_COUNT; 2793 total *= T_RAND_COUNT; 2794 2795 total *= HIGH_COUNT (tr->dst[0]); 2796 total *= HIGH_COUNT (tr->dst[1]); 2797 total *= HIGH_COUNT (tr->src[0]); 2798 total *= HIGH_COUNT (tr->src[1]); 2799 2800 total *= ALIGN_COUNT (tr->dst[0]); 2801 total *= ALIGN_COUNT (tr->dst[1]); 2802 total *= ALIGN_COUNT (tr->src[0]); 2803 total *= ALIGN_COUNT (tr->src[1]); 2804 2805 total *= OVERLAP_COUNT; 2806 2807 printf ("%s %lu\n", choice->name, total); 2808 } 2809 2810 spinner_count = 0; 2811 2812 for (i = 0; i < option_repetitions; i++) 2813 SIZE_ITERATION 2814 SIZE2_ITERATION 2815 2816 SHIFT_ITERATION 2817 MULTIPLIER_ITERATION 2818 DIVISOR_ITERATION 2819 CARRY_ITERATION /* must be after divisor */ 2820 T_RAND_ITERATION 2821 2822 HIGH_ITERATION(d,0, tr->dst[0]) 2823 HIGH_ITERATION(d,1, tr->dst[1]) 2824 HIGH_ITERATION(s,0, tr->src[0]) 2825 HIGH_ITERATION(s,1, tr->src[1]) 2826 2827 ALIGN_ITERATION(d,0, tr->dst[0]) 2828 ALIGN_ITERATION(d,1, tr->dst[1]) 2829 ALIGN_ITERATION(s,0, tr->src[0]) 2830 ALIGN_ITERATION(s,1, tr->src[1]) 2831 2832 OVERLAP_ITERATION 2833 try_one(); 2834 2835 printf("\n"); 2836} 2837 2838 2839/* Usually print_all() doesn't show much, but it might give a hint as to 2840 where the function was up to when it died. */ 2841void 2842trap (int sig) 2843{ 2844 const char *name = "noname"; 2845 2846 switch (sig) { 2847 case SIGILL: name = "SIGILL"; break; 2848#ifdef SIGBUS 2849 case SIGBUS: name = "SIGBUS"; break; 2850#endif 2851 case SIGSEGV: name = "SIGSEGV"; break; 2852 case SIGFPE: name = "SIGFPE"; break; 2853 } 2854 2855 printf ("\n\nSIGNAL TRAP: %s\n", name); 2856 2857 switch (trap_location) { 2858 case TRAP_REF: 2859 printf (" in reference function: %s\n", tr->reference_name); 2860 break; 2861 case TRAP_FUN: 2862 printf (" in test function: %s\n", choice->name); 2863 print_all (); 2864 break; 2865 case TRAP_SETUPS: 2866 printf (" in parameter setups\n"); 2867 print_all (); 2868 break; 2869 default: 2870 printf (" somewhere unknown\n"); 2871 break; 2872 } 2873 exit (1); 2874} 2875 2876 2877void 2878try_init (void) 2879{ 2880#if HAVE_GETPAGESIZE 2881 /* Prefer getpagesize() over sysconf(), since on SunOS 4 sysconf() doesn't 2882 know _SC_PAGESIZE. */ 2883 pagesize = getpagesize (); 2884#else 2885#if HAVE_SYSCONF 2886 if ((pagesize = sysconf (_SC_PAGESIZE)) == -1) 2887 { 2888 /* According to the linux man page, sysconf doesn't set errno */ 2889 fprintf (stderr, "Cannot get sysconf _SC_PAGESIZE\n"); 2890 exit (1); 2891 } 2892#else 2893Error, error, cannot get page size 2894#endif 2895#endif 2896 2897 printf ("pagesize is 0x%lX bytes\n", pagesize); 2898 2899 signal (SIGILL, trap); 2900#ifdef SIGBUS 2901 signal (SIGBUS, trap); 2902#endif 2903 signal (SIGSEGV, trap); 2904 signal (SIGFPE, trap); 2905 2906 { 2907 int i; 2908 2909 for (i = 0; i < NUM_SOURCES; i++) 2910 { 2911 malloc_region (&s[i].region, 2*option_lastsize+ALIGNMENTS-1); 2912 printf ("s[%d] %p to %p (0x%lX bytes)\n", 2913 i, (void *) (s[i].region.ptr), 2914 (void *) (s[i].region.ptr + s[i].region.size), 2915 (long) s[i].region.size * BYTES_PER_MP_LIMB); 2916 } 2917 2918#define INIT_EACH(e,es) \ 2919 for (i = 0; i < NUM_DESTS; i++) \ 2920 { \ 2921 malloc_region (&e.d[i].region, 2*option_lastsize+ALIGNMENTS-1); \ 2922 printf ("%s d[%d] %p to %p (0x%lX bytes)\n", \ 2923 es, i, (void *) (e.d[i].region.ptr), \ 2924 (void *) (e.d[i].region.ptr + e.d[i].region.size), \ 2925 (long) e.d[i].region.size * BYTES_PER_MP_LIMB); \ 2926 } 2927 2928 INIT_EACH(ref, "ref"); 2929 INIT_EACH(fun, "fun"); 2930 } 2931} 2932 2933int 2934strmatch_wild (const char *pattern, const char *str) 2935{ 2936 size_t plen, slen; 2937 2938 /* wildcard at start */ 2939 if (pattern[0] == '*') 2940 { 2941 pattern++; 2942 plen = strlen (pattern); 2943 slen = strlen (str); 2944 return (plen == 0 2945 || (slen >= plen && memcmp (pattern, str+slen-plen, plen) == 0)); 2946 } 2947 2948 /* wildcard at end */ 2949 plen = strlen (pattern); 2950 if (plen >= 1 && pattern[plen-1] == '*') 2951 return (memcmp (pattern, str, plen-1) == 0); 2952 2953 /* no wildcards */ 2954 return (strcmp (pattern, str) == 0); 2955} 2956 2957void 2958try_name (const char *name) 2959{ 2960 int found = 0; 2961 int i; 2962 2963 for (i = 0; i < numberof (choice_array); i++) 2964 { 2965 if (strmatch_wild (name, choice_array[i].name)) 2966 { 2967 choice = &choice_array[i]; 2968 tr = ¶m[choice->type]; 2969 try_many (); 2970 found = 1; 2971 } 2972 } 2973 2974 if (!found) 2975 { 2976 printf ("%s unknown\n", name); 2977 /* exit (1); */ 2978 } 2979} 2980 2981 2982void 2983usage (const char *prog) 2984{ 2985 int col = 0; 2986 int i; 2987 2988 printf ("Usage: %s [options] function...\n", prog); 2989 printf (" -1 use limb data 1,2,3,etc\n"); 2990 printf (" -9 use limb data all 0xFF..FFs\n"); 2991 printf (" -a zeros use limb data all zeros\n"); 2992 printf (" -a ffs use limb data all 0xFF..FFs (same as -9)\n"); 2993 printf (" -a 2fd use data 0x2FFF...FFFD\n"); 2994 printf (" -p print each case tried (try this if seg faulting)\n"); 2995 printf (" -R seed random numbers from time()\n"); 2996 printf (" -r reps set repetitions (default %d)\n", DEFAULT_REPETITIONS); 2997 printf (" -s size starting size to test\n"); 2998 printf (" -S size2 starting size2 to test\n"); 2999 printf (" -s s1-s2 range of sizes to test\n"); 3000 printf (" -W don't show the spinner (use this in gdb)\n"); 3001 printf (" -z disable mprotect() redzones\n"); 3002 printf ("Default data is refmpn_random() and refmpn_random2().\n"); 3003 printf ("\n"); 3004 printf ("Functions that can be tested:\n"); 3005 3006 for (i = 0; i < numberof (choice_array); i++) 3007 { 3008 if (col + 1 + strlen (choice_array[i].name) > 79) 3009 { 3010 printf ("\n"); 3011 col = 0; 3012 } 3013 printf (" %s", choice_array[i].name); 3014 col += 1 + strlen (choice_array[i].name); 3015 } 3016 printf ("\n"); 3017 3018 exit(1); 3019} 3020 3021 3022int 3023main (int argc, char *argv[]) 3024{ 3025 int i; 3026 3027 /* unbuffered output */ 3028 setbuf (stdout, NULL); 3029 setbuf (stderr, NULL); 3030 3031 /* default trace in hex, and in upper-case so can paste into bc */ 3032 mp_trace_base = -16; 3033 3034 param_init (); 3035 3036 { 3037 unsigned long seed = 123; 3038 int opt; 3039 3040 while ((opt = getopt(argc, argv, "19a:b:E:pRr:S:s:Wz")) != EOF) 3041 { 3042 switch (opt) { 3043 case '1': 3044 /* use limb data values 1, 2, 3, ... etc */ 3045 option_data = DATA_SEQ; 3046 break; 3047 case '9': 3048 /* use limb data values 0xFFF...FFF always */ 3049 option_data = DATA_FFS; 3050 break; 3051 case 'a': 3052 if (strcmp (optarg, "zeros") == 0) option_data = DATA_ZEROS; 3053 else if (strcmp (optarg, "seq") == 0) option_data = DATA_SEQ; 3054 else if (strcmp (optarg, "ffs") == 0) option_data = DATA_FFS; 3055 else if (strcmp (optarg, "2fd") == 0) option_data = DATA_2FD; 3056 else 3057 { 3058 fprintf (stderr, "unrecognised data option: %s\n", optarg); 3059 exit (1); 3060 } 3061 break; 3062 case 'b': 3063 mp_trace_base = atoi (optarg); 3064 break; 3065 case 'E': 3066 /* re-seed */ 3067 sscanf (optarg, "%lu", &seed); 3068 printf ("Re-seeding with %lu\n", seed); 3069 break; 3070 case 'p': 3071 option_print = 1; 3072 break; 3073 case 'R': 3074 /* randomize */ 3075 seed = time (NULL); 3076 printf ("Seeding with %lu, re-run using \"-E %lu\"\n", seed, seed); 3077 break; 3078 case 'r': 3079 option_repetitions = atoi (optarg); 3080 break; 3081 case 's': 3082 { 3083 char *p; 3084 option_firstsize = strtol (optarg, 0, 0); 3085 if ((p = strchr (optarg, '-')) != NULL) 3086 option_lastsize = strtol (p+1, 0, 0); 3087 } 3088 break; 3089 case 'S': 3090 /* -S <size> sets the starting size for the second of a two size 3091 routine (like mpn_mul_basecase) */ 3092 option_firstsize2 = strtol (optarg, 0, 0); 3093 break; 3094 case 'W': 3095 /* use this when running in the debugger */ 3096 option_spinner = 0; 3097 break; 3098 case 'z': 3099 /* disable redzones */ 3100 option_redzones = 0; 3101 break; 3102 case '?': 3103 usage (argv[0]); 3104 break; 3105 } 3106 } 3107 3108 gmp_randinit_default (__gmp_rands); 3109 __gmp_rands_initialized = 1; 3110 gmp_randseed_ui (__gmp_rands, seed); 3111 } 3112 3113 try_init(); 3114 3115 if (argc <= optind) 3116 usage (argv[0]); 3117 3118 for (i = optind; i < argc; i++) 3119 try_name (argv[i]); 3120 3121 return 0; 3122} 3123