1/* Run some tests on various mpn routines.
2
3   THIS IS A TEST PROGRAM USED ONLY FOR DEVELOPMENT.  IT'S ALMOST CERTAIN TO
4   BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE VERSIONS OF GMP.
5
6Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2008, 2009 Free Software
7Foundation, Inc.
8
9This file is part of the GNU MP Library.
10
11The GNU MP Library is free software; you can redistribute it and/or modify
12it under the terms of the GNU Lesser General Public License as published by
13the Free Software Foundation; either version 3 of the License, or (at your
14option) any later version.
15
16The GNU MP Library is distributed in the hope that it will be useful, but
17WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
19License for more details.
20
21You should have received a copy of the GNU Lesser General Public License
22along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
23
24
25/* Usage: try [options] <function>...
26
27   For example, "./try mpn_add_n" to run tests of that function.
28
29   Combinations of alignments and overlaps are tested, with redzones above
30   or below the destinations, and with the sources write-protected.
31
32   The number of tests performed becomes ridiculously large with all the
33   combinations, and for that reason this can't be a part of a "make check",
34   it's meant only for development.  The code isn't very pretty either.
35
36   During development it can help to disable the redzones, since seeing the
37   rest of the destination written can show where the wrong part is, or if
38   the dst pointers are off by 1 or whatever.  The magic DEADVAL initial
39   fill (see below) will show locations never written.
40
41   The -s option can be used to test only certain size operands, which is
42   useful if some new code doesn't yet support say sizes less than the
43   unrolling, or whatever.
44
45   When a problem occurs it'll of course be necessary to run the program
46   under gdb to find out quite where, how and why it's going wrong.  Disable
47   the spinner with the -W option when doing this, or single stepping won't
48   work.  Using the "-1" option to run with simple data can be useful.
49
50   New functions to test can be added in try_array[].  If a new TYPE is
51   required then add it to the existing constants, set up its parameters in
52   param_init(), and add it to the call() function.  Extra parameter fields
53   can be added if necessary, or further interpretations given to existing
54   fields.
55
56
57   Portability:
58
59   This program is not designed for use on Cray vector systems under Unicos,
60   it will fail to compile due to missing _SC_PAGE_SIZE.  Those systems
61   don't really have pages or mprotect.  We could arrange to run the tests
62   without the redzones, but we haven't bothered currently.
63
64
65   Enhancements:
66
67   umul_ppmm support is not very good, lots of source data is generated
68   whereas only two limbs are needed.
69
70   Make a little scheme for interpreting the "SIZE" selections uniformly.
71
72   Make tr->size==SIZE_2 work, for the benefit of find_a which wants just 2
73   source limbs.  Possibly increase the default repetitions in that case.
74
75   Automatically detect gdb and disable the spinner (use -W for now).
76
77   Make a way to re-run a failing case in the debugger.  Have an option to
78   snapshot each test case before it's run so the data is available if a
79   segv occurs.  (This should be more reliable than the current print_all()
80   in the signal handler.)
81
82   When alignment means a dst isn't hard against the redzone, check the
83   space in between remains unchanged.
84
85   When a source overlaps a destination, don't run both s[i].high 0 and 1,
86   as s[i].high has no effect.  Maybe encode s[i].high into overlap->s[i].
87
88   When partial overlaps aren't done, don't loop over source alignments
89   during overlaps.
90
91   Try to make the looping code a bit less horrible.  Right now it's pretty
92   hard to see what iterations are actually done.
93
94   Perhaps specific setups and loops for each style of function under test
95   would be clearer than a parameterized general loop.  There's lots of
96   stuff common to all functions, but the exceptions get messy.
97
98   When there's no overlap, run with both src>dst and src<dst.  A subtle
99   calling-conventions violation occurred in a P6 copy which depended on the
100   relative location of src and dst.
101
102   multiplier_N is more or less a third source region for the addmul_N
103   routines, and could be done with the redzoned region scheme.
104
105*/
106
107
108/* always do assertion checking */
109#define WANT_ASSERT 1
110
111#include "config.h"
112
113#include <errno.h>
114#include <limits.h>
115#include <signal.h>
116#include <stdio.h>
117#include <stdlib.h>
118#include <string.h>
119#include <time.h>
120
121#if HAVE_UNISTD_H
122#include <unistd.h>
123#endif
124
125#if HAVE_SYS_MMAN_H
126#include <sys/mman.h>
127#endif
128
129#include "gmp.h"
130#include "gmp-impl.h"
131#include "longlong.h"
132#include "tests.h"
133
134
135#if !HAVE_DECL_OPTARG
136extern char *optarg;
137extern int optind, opterr;
138#endif
139
140#if ! HAVE_DECL_SYS_NERR
141extern int sys_nerr;
142#endif
143
144#if ! HAVE_DECL_SYS_ERRLIST
145extern char *sys_errlist[];
146#endif
147
148#if ! HAVE_STRERROR
149char *
150strerror (int n)
151{
152  if (n < 0 || n >= sys_nerr)
153    return "errno out of range";
154  else
155    return sys_errlist[n];
156}
157#endif
158
159/* Rumour has it some systems lack a define of PROT_NONE. */
160#ifndef PROT_NONE
161#define PROT_NONE   0
162#endif
163
164/* Dummy defines for when mprotect doesn't exist. */
165#ifndef PROT_READ
166#define PROT_READ   0
167#endif
168#ifndef PROT_WRITE
169#define PROT_WRITE  0
170#endif
171
172/* _SC_PAGESIZE is standard, but hpux 9 and possibly other systems have
173   _SC_PAGE_SIZE instead. */
174#if defined (_SC_PAGE_SIZE) && ! defined (_SC_PAGESIZE)
175#define _SC_PAGESIZE  _SC_PAGE_SIZE
176#endif
177
178
179#ifdef EXTRA_PROTOS
180EXTRA_PROTOS
181#endif
182#ifdef EXTRA_PROTOS2
183EXTRA_PROTOS2
184#endif
185
186
187#define DEFAULT_REPETITIONS  10
188
189int  option_repetitions = DEFAULT_REPETITIONS;
190int  option_spinner = 1;
191int  option_redzones = 1;
192int  option_firstsize = 0;
193int  option_lastsize = 500;
194int  option_firstsize2 = 0;
195
196#define ALIGNMENTS          4
197#define OVERLAPS            4
198#define CARRY_RANDOMS       5
199#define MULTIPLIER_RANDOMS  5
200#define DIVISOR_RANDOMS     5
201#define FRACTION_COUNT      4
202
203int  option_print = 0;
204
205#define DATA_TRAND  0
206#define DATA_ZEROS  1
207#define DATA_SEQ    2
208#define DATA_FFS    3
209#define DATA_2FD    4
210int  option_data = DATA_TRAND;
211
212
213mp_size_t  pagesize;
214#define PAGESIZE_LIMBS  (pagesize / BYTES_PER_MP_LIMB)
215
216/* must be a multiple of the page size */
217#define REDZONE_BYTES   (pagesize * 16)
218#define REDZONE_LIMBS   (REDZONE_BYTES / BYTES_PER_MP_LIMB)
219
220
221#define MAX3(x,y,z)   (MAX (x, MAX (y, z)))
222
223#if GMP_LIMB_BITS == 32
224#define DEADVAL  CNST_LIMB(0xDEADBEEF)
225#else
226#define DEADVAL  CNST_LIMB(0xDEADBEEFBADDCAFE)
227#endif
228
229
230struct region_t {
231  mp_ptr     ptr;
232  mp_size_t  size;
233};
234
235
236#define TRAP_NOWHERE 0
237#define TRAP_REF     1
238#define TRAP_FUN     2
239#define TRAP_SETUPS  3
240int trap_location = TRAP_NOWHERE;
241
242
243#define NUM_SOURCES  2
244#define NUM_DESTS    2
245
246struct source_t {
247  struct region_t  region;
248  int        high;
249  mp_size_t  align;
250  mp_ptr     p;
251};
252
253struct source_t  s[NUM_SOURCES];
254
255struct dest_t {
256  int        high;
257  mp_size_t  align;
258  mp_size_t  size;
259};
260
261struct dest_t  d[NUM_DESTS];
262
263struct source_each_t {
264  mp_ptr     p;
265};
266
267struct dest_each_t {
268  struct region_t  region;
269  mp_ptr     p;
270};
271
272mp_size_t       size;
273mp_size_t       size2;
274unsigned long   shift;
275mp_limb_t       carry;
276mp_limb_t       divisor;
277mp_limb_t       multiplier;
278mp_limb_t       multiplier_N[8];
279
280struct each_t {
281  const char  *name;
282  struct dest_each_t    d[NUM_DESTS];
283  struct source_each_t  s[NUM_SOURCES];
284  mp_limb_t  retval;
285};
286
287struct each_t  ref = { "Ref" };
288struct each_t  fun = { "Fun" };
289
290#define SRC_SIZE(n)  ((n) == 1 && tr->size2 ? size2 : size)
291
292void validate_fail __GMP_PROTO ((void));
293
294
295#if HAVE_TRY_NEW_C
296#include "try-new.c"
297#endif
298
299
300typedef mp_limb_t (*tryfun_t) __GMP_PROTO ((ANYARGS));
301
302struct try_t {
303  char  retval;
304
305  char  src[2];
306  char  dst[2];
307
308#define SIZE_YES          1
309#define SIZE_ALLOW_ZERO   2
310#define SIZE_1            3  /* 1 limb  */
311#define SIZE_2            4  /* 2 limbs */
312#define SIZE_3            5  /* 3 limbs */
313#define SIZE_FRACTION     6  /* size2 is fraction for divrem etc */
314#define SIZE_SIZE2        7
315#define SIZE_PLUS_1       8
316#define SIZE_SUM          9
317#define SIZE_DIFF        10
318#define SIZE_DIFF_PLUS_1 11
319#define SIZE_RETVAL      12
320#define SIZE_CEIL_HALF   13
321#define SIZE_GET_STR     14
322#define SIZE_PLUS_MSIZE_SUB_1 15  /* size+msize-1 */
323  char  size;
324  char  size2;
325  char  dst_size[2];
326
327  /* multiplier_N size in limbs */
328  mp_size_t  msize;
329
330  char  dst_bytes[2];
331
332  char  dst0_from_src1;
333
334#define CARRY_BIT     1  /* single bit 0 or 1 */
335#define CARRY_3       2  /* 0, 1, 2 */
336#define CARRY_4       3  /* 0 to 3 */
337#define CARRY_LIMB    4  /* any limb value */
338#define CARRY_DIVISOR 5  /* carry<divisor */
339  char  carry;
340
341  /* a fudge to tell the output when to print negatives */
342  char  carry_sign;
343
344  char  multiplier;
345  char  shift;
346
347#define DIVISOR_LIMB  1
348#define DIVISOR_NORM  2
349#define DIVISOR_ODD   3
350  char  divisor;
351
352#define DATA_NON_ZERO         1
353#define DATA_GCD              2
354#define DATA_SRC0_ODD         3
355#define DATA_SRC0_HIGHBIT     4
356#define DATA_SRC1_ODD         5
357#define DATA_SRC1_HIGHBIT     6
358#define DATA_MULTIPLE_DIVISOR 7
359#define DATA_UDIV_QRNND       8
360  char  data;
361
362/* Default is allow full overlap. */
363#define OVERLAP_NONE         1
364#define OVERLAP_LOW_TO_HIGH  2
365#define OVERLAP_HIGH_TO_LOW  3
366#define OVERLAP_NOT_SRCS     4
367#define OVERLAP_NOT_SRC2     8
368  char  overlap;
369
370  tryfun_t    reference;
371  const char  *reference_name;
372
373  void        (*validate) __GMP_PROTO ((void));
374  const char  *validate_name;
375};
376
377struct try_t  *tr;
378
379
380void
381validate_mod_34lsub1 (void)
382{
383#define CNST_34LSUB1   ((CNST_LIMB(1) << (3 * (GMP_NUMB_BITS / 4))) - 1)
384
385  mp_srcptr  ptr = s[0].p;
386  int        error = 0;
387  mp_limb_t  got, got_mod, want, want_mod;
388
389  ASSERT (size >= 1);
390
391  got = fun.retval;
392  got_mod = got % CNST_34LSUB1;
393
394  want = refmpn_mod_34lsub1 (ptr, size);
395  want_mod = want % CNST_34LSUB1;
396
397  if (got_mod != want_mod)
398    {
399      gmp_printf ("got   0x%MX reduced from 0x%MX\n", got_mod, got);
400      gmp_printf ("want  0x%MX reduced from 0x%MX\n", want_mod, want);
401      error = 1;
402    }
403
404  if (error)
405    validate_fail ();
406}
407
408void
409validate_divexact_1 (void)
410{
411  mp_srcptr  src = s[0].p;
412  mp_srcptr  dst = fun.d[0].p;
413  int  error = 0;
414
415  ASSERT (size >= 1);
416
417  {
418    mp_ptr     tp = refmpn_malloc_limbs (size);
419    mp_limb_t  rem;
420
421    rem = refmpn_divrem_1 (tp, 0, src, size, divisor);
422    if (rem != 0)
423      {
424	gmp_printf ("Remainder a%%d == 0x%MX, mpn_divexact_1 undefined\n", rem);
425	error = 1;
426      }
427    if (! refmpn_equal_anynail (tp, dst, size))
428      {
429	printf ("Quotient a/d wrong\n");
430	mpn_trace ("fun ", dst, size);
431	mpn_trace ("want", tp, size);
432	error = 1;
433      }
434    free (tp);
435  }
436
437  if (error)
438    validate_fail ();
439}
440
441
442void
443validate_modexact_1c_odd (void)
444{
445  mp_srcptr  ptr = s[0].p;
446  mp_limb_t  r = fun.retval;
447  int  error = 0;
448
449  ASSERT (size >= 1);
450  ASSERT (divisor & 1);
451
452  if ((r & GMP_NAIL_MASK) != 0)
453    printf ("r has non-zero nail\n");
454
455  if (carry < divisor)
456    {
457      if (! (r < divisor))
458	{
459	  printf ("Don't have r < divisor\n");
460	  error = 1;
461	}
462    }
463  else /* carry >= divisor */
464    {
465      if (! (r <= divisor))
466	{
467	  printf ("Don't have r <= divisor\n");
468	  error = 1;
469	}
470    }
471
472  {
473    mp_limb_t  c = carry % divisor;
474    mp_ptr     tp = refmpn_malloc_limbs (size+1);
475    mp_size_t  k;
476
477    for (k = size-1; k <= size; k++)
478      {
479	/* set {tp,size+1} to r*b^k + a - c */
480	refmpn_copyi (tp, ptr, size);
481	tp[size] = 0;
482	ASSERT_NOCARRY (refmpn_add_1 (tp+k, tp+k, size+1-k, r));
483	if (refmpn_sub_1 (tp, tp, size+1, c))
484	  ASSERT_CARRY (mpn_add_1 (tp, tp, size+1, divisor));
485
486	if (refmpn_mod_1 (tp, size+1, divisor) == 0)
487	  goto good_remainder;
488      }
489    printf ("Remainder matches neither r*b^(size-1) nor r*b^size\n");
490    error = 1;
491
492  good_remainder:
493    free (tp);
494  }
495
496  if (error)
497    validate_fail ();
498}
499
500void
501validate_modexact_1_odd (void)
502{
503  carry = 0;
504  validate_modexact_1c_odd ();
505}
506
507
508void
509validate_sqrtrem (void)
510{
511  mp_srcptr  orig_ptr = s[0].p;
512  mp_size_t  orig_size = size;
513  mp_size_t  root_size = (size+1)/2;
514  mp_srcptr  root_ptr = fun.d[0].p;
515  mp_size_t  rem_size = fun.retval;
516  mp_srcptr  rem_ptr = fun.d[1].p;
517  mp_size_t  prod_size = 2*root_size;
518  mp_ptr     p;
519  int  error = 0;
520
521  if (rem_size < 0 || rem_size > size)
522    {
523      printf ("Bad remainder size retval %ld\n", (long) rem_size);
524      validate_fail ();
525    }
526
527  p = refmpn_malloc_limbs (prod_size);
528
529  p[root_size] = refmpn_lshift (p, root_ptr, root_size, 1);
530  if (refmpn_cmp_twosizes (p,root_size+1, rem_ptr,rem_size) < 0)
531    {
532      printf ("Remainder bigger than 2*root\n");
533      error = 1;
534    }
535
536  refmpn_sqr (p, root_ptr, root_size);
537  if (rem_size != 0)
538    refmpn_add (p, p, prod_size, rem_ptr, rem_size);
539  if (refmpn_cmp_twosizes (p,prod_size, orig_ptr,orig_size) != 0)
540    {
541      printf ("root^2+rem != original\n");
542      mpn_trace ("prod", p, prod_size);
543      error = 1;
544    }
545  free (p);
546
547  if (error)
548    validate_fail ();
549}
550
551
552/* These types are indexes into the param[] array and are arbitrary so long
553   as they're all distinct and within the size of param[].  Renumber
554   whenever necessary or desired.  */
555
556#define TYPE_ADD               1
557#define TYPE_ADD_N             2
558#define TYPE_ADD_NC            3
559#define TYPE_SUB               4
560#define TYPE_SUB_N             5
561#define TYPE_SUB_NC            6
562
563#define TYPE_MUL_1             7
564#define TYPE_MUL_1C            8
565
566#define TYPE_MUL_2             9
567#define TYPE_MUL_3             92
568#define TYPE_MUL_4             93
569
570#define TYPE_ADDMUL_1         10
571#define TYPE_ADDMUL_1C        11
572#define TYPE_SUBMUL_1         12
573#define TYPE_SUBMUL_1C        13
574
575#define TYPE_ADDMUL_2         14
576#define TYPE_ADDMUL_3         15
577#define TYPE_ADDMUL_4         16
578#define TYPE_ADDMUL_5         17
579#define TYPE_ADDMUL_6         18
580#define TYPE_ADDMUL_7         19
581#define TYPE_ADDMUL_8         20
582
583#define TYPE_ADDSUB_N         21
584#define TYPE_ADDSUB_NC        22
585
586#define TYPE_RSHIFT           23
587#define TYPE_LSHIFT           24
588#define TYPE_LSHIFTC          25
589
590#define TYPE_COPY             26
591#define TYPE_COPYI            27
592#define TYPE_COPYD            28
593#define TYPE_COM              29
594
595#define TYPE_ADDLSH1_N        30
596#define TYPE_ADDLSH2_N        48
597#define TYPE_ADDLSH_N         49
598#define TYPE_SUBLSH1_N        31
599#define TYPE_SUBLSH_N        130
600#define TYPE_RSBLSH1_N        34
601#define TYPE_RSBLSH2_N        46
602#define TYPE_RSBLSH_N         47
603#define TYPE_RSH1ADD_N        32
604#define TYPE_RSH1SUB_N        33
605
606#define TYPE_MOD_1            35
607#define TYPE_MOD_1C           36
608#define TYPE_DIVMOD_1         37
609#define TYPE_DIVMOD_1C        38
610#define TYPE_DIVREM_1         39
611#define TYPE_DIVREM_1C        40
612#define TYPE_PREINV_DIVREM_1  41
613#define TYPE_PREINV_MOD_1     42
614#define TYPE_MOD_34LSUB1      43
615#define TYPE_UDIV_QRNND       44
616#define TYPE_UDIV_QRNND_R     45
617
618#define TYPE_DIVEXACT_1       50
619#define TYPE_DIVEXACT_BY3     51
620#define TYPE_DIVEXACT_BY3C    52
621#define TYPE_MODEXACT_1_ODD   53
622#define TYPE_MODEXACT_1C_ODD  54
623
624#define TYPE_INVERT           55
625#define TYPE_BINVERT          56
626
627#define TYPE_GCD              60
628#define TYPE_GCD_1            61
629#define TYPE_GCD_FINDA        62
630#define TYPE_MPZ_JACOBI       63
631#define TYPE_MPZ_KRONECKER    64
632#define TYPE_MPZ_KRONECKER_UI 65
633#define TYPE_MPZ_KRONECKER_SI 66
634#define TYPE_MPZ_UI_KRONECKER 67
635#define TYPE_MPZ_SI_KRONECKER 68
636
637#define TYPE_AND_N            70
638#define TYPE_NAND_N           71
639#define TYPE_ANDN_N           72
640#define TYPE_IOR_N            73
641#define TYPE_IORN_N           74
642#define TYPE_NIOR_N           75
643#define TYPE_XOR_N            76
644#define TYPE_XNOR_N           77
645
646#define TYPE_MUL_MN           80
647#define TYPE_MUL_N            81
648#define TYPE_SQR              82
649#define TYPE_UMUL_PPMM        83
650#define TYPE_UMUL_PPMM_R      84
651#define TYPE_MULLO_N          85
652
653#define TYPE_SBPI1_DIV_QR     90
654#define TYPE_TDIV_QR          91
655
656#define TYPE_SQRTREM          100
657#define TYPE_ZERO             101
658#define TYPE_GET_STR          102
659#define TYPE_POPCOUNT         103
660#define TYPE_HAMDIST          104
661
662#define TYPE_EXTRA            110
663
664struct try_t  param[150];
665
666
667void
668param_init (void)
669{
670  struct try_t  *p;
671
672#define COPY(index)  memcpy (p, &param[index], sizeof (*p))
673
674#if HAVE_STRINGIZE
675#define REFERENCE(fun)                  \
676  p->reference = (tryfun_t) fun;        \
677  p->reference_name = #fun
678#define VALIDATE(fun)           \
679  p->validate = fun;            \
680  p->validate_name = #fun
681#else
682#define REFERENCE(fun)                  \
683  p->reference = (tryfun_t) fun;        \
684  p->reference_name = "fun"
685#define VALIDATE(fun)           \
686  p->validate = fun;            \
687  p->validate_name = "fun"
688#endif
689
690
691  p = &param[TYPE_ADD_N];
692  p->retval = 1;
693  p->dst[0] = 1;
694  p->src[0] = 1;
695  p->src[1] = 1;
696  REFERENCE (refmpn_add_n);
697
698  p = &param[TYPE_ADD_NC];
699  COPY (TYPE_ADD_N);
700  p->carry = CARRY_BIT;
701  REFERENCE (refmpn_add_nc);
702
703  p = &param[TYPE_SUB_N];
704  COPY (TYPE_ADD_N);
705  REFERENCE (refmpn_sub_n);
706
707  p = &param[TYPE_SUB_NC];
708  COPY (TYPE_ADD_NC);
709  REFERENCE (refmpn_sub_nc);
710
711  p = &param[TYPE_ADD];
712  COPY (TYPE_ADD_N);
713  p->size = SIZE_ALLOW_ZERO;
714  p->size2 = 1;
715  REFERENCE (refmpn_add);
716
717  p = &param[TYPE_SUB];
718  COPY (TYPE_ADD);
719  REFERENCE (refmpn_sub);
720
721
722  p = &param[TYPE_MUL_1];
723  p->retval = 1;
724  p->dst[0] = 1;
725  p->src[0] = 1;
726  p->multiplier = 1;
727  p->overlap = OVERLAP_LOW_TO_HIGH;
728  REFERENCE (refmpn_mul_1);
729
730  p = &param[TYPE_MUL_1C];
731  COPY (TYPE_MUL_1);
732  p->carry = CARRY_LIMB;
733  REFERENCE (refmpn_mul_1c);
734
735
736  p = &param[TYPE_MUL_2];
737  p->retval = 1;
738  p->dst[0] = 1;
739  p->dst_size[0] = SIZE_PLUS_MSIZE_SUB_1;
740  p->src[0] = 1;
741  p->src[1] = 1;
742  p->msize = 2;
743  p->overlap = OVERLAP_NOT_SRC2;
744  REFERENCE (refmpn_mul_2);
745
746  p = &param[TYPE_MUL_3];
747  COPY (TYPE_MUL_2);
748  p->msize = 3;
749  REFERENCE (refmpn_mul_3);
750
751  p = &param[TYPE_MUL_4];
752  COPY (TYPE_MUL_2);
753  p->msize = 4;
754  REFERENCE (refmpn_mul_4);
755
756
757  p = &param[TYPE_ADDMUL_1];
758  p->retval = 1;
759  p->dst[0] = 1;
760  p->src[0] = 1;
761  p->multiplier = 1;
762  p->dst0_from_src1 = 1;
763  REFERENCE (refmpn_addmul_1);
764
765  p = &param[TYPE_ADDMUL_1C];
766  COPY (TYPE_ADDMUL_1);
767  p->carry = CARRY_LIMB;
768  REFERENCE (refmpn_addmul_1c);
769
770  p = &param[TYPE_SUBMUL_1];
771  COPY (TYPE_ADDMUL_1);
772  REFERENCE (refmpn_submul_1);
773
774  p = &param[TYPE_SUBMUL_1C];
775  COPY (TYPE_ADDMUL_1C);
776  REFERENCE (refmpn_submul_1c);
777
778
779  p = &param[TYPE_ADDMUL_2];
780  p->retval = 1;
781  p->dst[0] = 1;
782  p->dst_size[0] = SIZE_PLUS_MSIZE_SUB_1;
783  p->src[0] = 1;
784  p->src[1] = 1;
785  p->msize = 2;
786  p->dst0_from_src1 = 1;
787  p->overlap = OVERLAP_NOT_SRC2;
788  REFERENCE (refmpn_addmul_2);
789
790  p = &param[TYPE_ADDMUL_3];
791  COPY (TYPE_ADDMUL_2);
792  p->msize = 3;
793  REFERENCE (refmpn_addmul_3);
794
795  p = &param[TYPE_ADDMUL_4];
796  COPY (TYPE_ADDMUL_2);
797  p->msize = 4;
798  REFERENCE (refmpn_addmul_4);
799
800  p = &param[TYPE_ADDMUL_5];
801  COPY (TYPE_ADDMUL_2);
802  p->msize = 5;
803  REFERENCE (refmpn_addmul_5);
804
805  p = &param[TYPE_ADDMUL_6];
806  COPY (TYPE_ADDMUL_2);
807  p->msize = 6;
808  REFERENCE (refmpn_addmul_6);
809
810  p = &param[TYPE_ADDMUL_7];
811  COPY (TYPE_ADDMUL_2);
812  p->msize = 7;
813  REFERENCE (refmpn_addmul_7);
814
815  p = &param[TYPE_ADDMUL_8];
816  COPY (TYPE_ADDMUL_2);
817  p->msize = 8;
818  REFERENCE (refmpn_addmul_8);
819
820
821  p = &param[TYPE_AND_N];
822  p->dst[0] = 1;
823  p->src[0] = 1;
824  p->src[1] = 1;
825  REFERENCE (refmpn_and_n);
826
827  p = &param[TYPE_ANDN_N];
828  COPY (TYPE_AND_N);
829  REFERENCE (refmpn_andn_n);
830
831  p = &param[TYPE_NAND_N];
832  COPY (TYPE_AND_N);
833  REFERENCE (refmpn_nand_n);
834
835  p = &param[TYPE_IOR_N];
836  COPY (TYPE_AND_N);
837  REFERENCE (refmpn_ior_n);
838
839  p = &param[TYPE_IORN_N];
840  COPY (TYPE_AND_N);
841  REFERENCE (refmpn_iorn_n);
842
843  p = &param[TYPE_NIOR_N];
844  COPY (TYPE_AND_N);
845  REFERENCE (refmpn_nior_n);
846
847  p = &param[TYPE_XOR_N];
848  COPY (TYPE_AND_N);
849  REFERENCE (refmpn_xor_n);
850
851  p = &param[TYPE_XNOR_N];
852  COPY (TYPE_AND_N);
853  REFERENCE (refmpn_xnor_n);
854
855
856  p = &param[TYPE_ADDSUB_N];
857  p->retval = 1;
858  p->dst[0] = 1;
859  p->dst[1] = 1;
860  p->src[0] = 1;
861  p->src[1] = 1;
862  REFERENCE (refmpn_add_n_sub_n);
863
864  p = &param[TYPE_ADDSUB_NC];
865  COPY (TYPE_ADDSUB_N);
866  p->carry = CARRY_4;
867  REFERENCE (refmpn_add_n_sub_nc);
868
869
870  p = &param[TYPE_COPY];
871  p->dst[0] = 1;
872  p->src[0] = 1;
873  p->overlap = OVERLAP_NONE;
874  p->size = SIZE_ALLOW_ZERO;
875  REFERENCE (refmpn_copy);
876
877  p = &param[TYPE_COPYI];
878  p->dst[0] = 1;
879  p->src[0] = 1;
880  p->overlap = OVERLAP_LOW_TO_HIGH;
881  p->size = SIZE_ALLOW_ZERO;
882  REFERENCE (refmpn_copyi);
883
884  p = &param[TYPE_COPYD];
885  p->dst[0] = 1;
886  p->src[0] = 1;
887  p->overlap = OVERLAP_HIGH_TO_LOW;
888  p->size = SIZE_ALLOW_ZERO;
889  REFERENCE (refmpn_copyd);
890
891  p = &param[TYPE_COM];
892  p->dst[0] = 1;
893  p->src[0] = 1;
894  REFERENCE (refmpn_com);
895
896
897  p = &param[TYPE_ADDLSH1_N];
898  COPY (TYPE_ADD_N);
899  REFERENCE (refmpn_addlsh1_n);
900
901  p = &param[TYPE_ADDLSH2_N];
902  COPY (TYPE_ADD_N);
903  REFERENCE (refmpn_addlsh2_n);
904
905  p = &param[TYPE_ADDLSH_N];
906  COPY (TYPE_ADD_N);
907  p->shift = 1;
908  REFERENCE (refmpn_addlsh_n);
909
910  p = &param[TYPE_SUBLSH1_N];
911  COPY (TYPE_ADD_N);
912  REFERENCE (refmpn_sublsh1_n);
913
914  p = &param[TYPE_SUBLSH_N];
915  COPY (TYPE_ADDLSH_N);
916  REFERENCE (refmpn_sublsh_n);
917
918  p = &param[TYPE_RSBLSH1_N];
919  COPY (TYPE_ADD_N);
920  REFERENCE (refmpn_rsblsh1_n);
921
922  p = &param[TYPE_RSBLSH2_N];
923  COPY (TYPE_ADD_N);
924  REFERENCE (refmpn_rsblsh2_n);
925
926  p = &param[TYPE_RSBLSH_N];
927  COPY (TYPE_ADDLSH_N);
928  REFERENCE (refmpn_rsblsh_n);
929
930  p = &param[TYPE_RSH1ADD_N];
931  COPY (TYPE_ADD_N);
932  REFERENCE (refmpn_rsh1add_n);
933
934  p = &param[TYPE_RSH1SUB_N];
935  COPY (TYPE_ADD_N);
936  REFERENCE (refmpn_rsh1sub_n);
937
938
939  p = &param[TYPE_MOD_1];
940  p->retval = 1;
941  p->src[0] = 1;
942  p->size = SIZE_ALLOW_ZERO;
943  p->divisor = DIVISOR_LIMB;
944  REFERENCE (refmpn_mod_1);
945
946  p = &param[TYPE_MOD_1C];
947  COPY (TYPE_MOD_1);
948  p->carry = CARRY_DIVISOR;
949  REFERENCE (refmpn_mod_1c);
950
951  p = &param[TYPE_DIVMOD_1];
952  COPY (TYPE_MOD_1);
953  p->dst[0] = 1;
954  REFERENCE (refmpn_divmod_1);
955
956  p = &param[TYPE_DIVMOD_1C];
957  COPY (TYPE_DIVMOD_1);
958  p->carry = CARRY_DIVISOR;
959  REFERENCE (refmpn_divmod_1c);
960
961  p = &param[TYPE_DIVREM_1];
962  COPY (TYPE_DIVMOD_1);
963  p->size2 = SIZE_FRACTION;
964  p->dst_size[0] = SIZE_SUM;
965  REFERENCE (refmpn_divrem_1);
966
967  p = &param[TYPE_DIVREM_1C];
968  COPY (TYPE_DIVREM_1);
969  p->carry = CARRY_DIVISOR;
970  REFERENCE (refmpn_divrem_1c);
971
972  p = &param[TYPE_PREINV_DIVREM_1];
973  COPY (TYPE_DIVREM_1);
974  p->size = SIZE_YES; /* ie. no size==0 */
975  REFERENCE (refmpn_preinv_divrem_1);
976
977  p = &param[TYPE_PREINV_MOD_1];
978  p->retval = 1;
979  p->src[0] = 1;
980  p->divisor = DIVISOR_NORM;
981  REFERENCE (refmpn_preinv_mod_1);
982
983  p = &param[TYPE_MOD_34LSUB1];
984  p->retval = 1;
985  p->src[0] = 1;
986  VALIDATE (validate_mod_34lsub1);
987
988  p = &param[TYPE_UDIV_QRNND];
989  p->retval = 1;
990  p->src[0] = 1;
991  p->dst[0] = 1;
992  p->dst_size[0] = SIZE_1;
993  p->divisor = UDIV_NEEDS_NORMALIZATION ? DIVISOR_NORM : DIVISOR_LIMB;
994  p->data = DATA_UDIV_QRNND;
995  p->overlap = OVERLAP_NONE;
996  REFERENCE (refmpn_udiv_qrnnd);
997
998  p = &param[TYPE_UDIV_QRNND_R];
999  COPY (TYPE_UDIV_QRNND);
1000  REFERENCE (refmpn_udiv_qrnnd_r);
1001
1002
1003  p = &param[TYPE_DIVEXACT_1];
1004  p->dst[0] = 1;
1005  p->src[0] = 1;
1006  p->divisor = DIVISOR_LIMB;
1007  p->data = DATA_MULTIPLE_DIVISOR;
1008  VALIDATE (validate_divexact_1);
1009  REFERENCE (refmpn_divmod_1);
1010
1011
1012  p = &param[TYPE_DIVEXACT_BY3];
1013  p->retval = 1;
1014  p->dst[0] = 1;
1015  p->src[0] = 1;
1016  REFERENCE (refmpn_divexact_by3);
1017
1018  p = &param[TYPE_DIVEXACT_BY3C];
1019  COPY (TYPE_DIVEXACT_BY3);
1020  p->carry = CARRY_3;
1021  REFERENCE (refmpn_divexact_by3c);
1022
1023
1024  p = &param[TYPE_MODEXACT_1_ODD];
1025  p->retval = 1;
1026  p->src[0] = 1;
1027  p->divisor = DIVISOR_ODD;
1028  VALIDATE (validate_modexact_1_odd);
1029
1030  p = &param[TYPE_MODEXACT_1C_ODD];
1031  COPY (TYPE_MODEXACT_1_ODD);
1032  p->carry = CARRY_LIMB;
1033  VALIDATE (validate_modexact_1c_odd);
1034
1035
1036  p = &param[TYPE_GCD_1];
1037  p->retval = 1;
1038  p->src[0] = 1;
1039  p->data = DATA_NON_ZERO;
1040  p->divisor = DIVISOR_LIMB;
1041  REFERENCE (refmpn_gcd_1);
1042
1043  p = &param[TYPE_GCD];
1044  p->retval = 1;
1045  p->dst[0] = 1;
1046  p->src[0] = 1;
1047  p->src[1] = 1;
1048  p->size2 = 1;
1049  p->dst_size[0] = SIZE_RETVAL;
1050  p->overlap = OVERLAP_NOT_SRCS;
1051  p->data = DATA_GCD;
1052  REFERENCE (refmpn_gcd);
1053
1054
1055  p = &param[TYPE_MPZ_JACOBI];
1056  p->retval = 1;
1057  p->src[0] = 1;
1058  p->size = SIZE_ALLOW_ZERO;
1059  p->src[1] = 1;
1060  p->data = DATA_SRC1_ODD;
1061  p->size2 = 1;
1062  p->carry = CARRY_4;
1063  p->carry_sign = 1;
1064  REFERENCE (refmpz_jacobi);
1065
1066  p = &param[TYPE_MPZ_KRONECKER];
1067  COPY (TYPE_MPZ_JACOBI);
1068  p->data = 0;			/* clear inherited DATA_SRC1_ODD */
1069  REFERENCE (refmpz_kronecker);
1070
1071
1072  p = &param[TYPE_MPZ_KRONECKER_UI];
1073  p->retval = 1;
1074  p->src[0] = 1;
1075  p->size = SIZE_ALLOW_ZERO;
1076  p->multiplier = 1;
1077  p->carry = CARRY_BIT;
1078  REFERENCE (refmpz_kronecker_ui);
1079
1080  p = &param[TYPE_MPZ_KRONECKER_SI];
1081  COPY (TYPE_MPZ_KRONECKER_UI);
1082  REFERENCE (refmpz_kronecker_si);
1083
1084  p = &param[TYPE_MPZ_UI_KRONECKER];
1085  COPY (TYPE_MPZ_KRONECKER_UI);
1086  REFERENCE (refmpz_ui_kronecker);
1087
1088  p = &param[TYPE_MPZ_SI_KRONECKER];
1089  COPY (TYPE_MPZ_KRONECKER_UI);
1090  REFERENCE (refmpz_si_kronecker);
1091
1092
1093  p = &param[TYPE_SQR];
1094  p->dst[0] = 1;
1095  p->src[0] = 1;
1096  p->dst_size[0] = SIZE_SUM;
1097  p->overlap = OVERLAP_NONE;
1098  REFERENCE (refmpn_sqr);
1099
1100  p = &param[TYPE_MUL_N];
1101  COPY (TYPE_SQR);
1102  p->src[1] = 1;
1103  REFERENCE (refmpn_mul_n);
1104
1105  p = &param[TYPE_MULLO_N];
1106  COPY (TYPE_MUL_N);
1107  p->dst_size[0] = 0;
1108  REFERENCE (refmpn_mullo_n);
1109
1110  p = &param[TYPE_MUL_MN];
1111  COPY (TYPE_MUL_N);
1112  p->size2 = 1;
1113  REFERENCE (refmpn_mul_basecase);
1114
1115  p = &param[TYPE_UMUL_PPMM];
1116  p->retval = 1;
1117  p->src[0] = 1;
1118  p->dst[0] = 1;
1119  p->dst_size[0] = SIZE_1;
1120  p->overlap = OVERLAP_NONE;
1121  REFERENCE (refmpn_umul_ppmm);
1122
1123  p = &param[TYPE_UMUL_PPMM_R];
1124  COPY (TYPE_UMUL_PPMM);
1125  REFERENCE (refmpn_umul_ppmm_r);
1126
1127
1128  p = &param[TYPE_RSHIFT];
1129  p->retval = 1;
1130  p->dst[0] = 1;
1131  p->src[0] = 1;
1132  p->shift = 1;
1133  p->overlap = OVERLAP_LOW_TO_HIGH;
1134  REFERENCE (refmpn_rshift);
1135
1136  p = &param[TYPE_LSHIFT];
1137  COPY (TYPE_RSHIFT);
1138  p->overlap = OVERLAP_HIGH_TO_LOW;
1139  REFERENCE (refmpn_lshift);
1140
1141  p = &param[TYPE_LSHIFTC];
1142  COPY (TYPE_RSHIFT);
1143  p->overlap = OVERLAP_HIGH_TO_LOW;
1144  REFERENCE (refmpn_lshiftc);
1145
1146
1147  p = &param[TYPE_POPCOUNT];
1148  p->retval = 1;
1149  p->src[0] = 1;
1150  REFERENCE (refmpn_popcount);
1151
1152  p = &param[TYPE_HAMDIST];
1153  COPY (TYPE_POPCOUNT);
1154  p->src[1] = 1;
1155  REFERENCE (refmpn_hamdist);
1156
1157
1158  p = &param[TYPE_SBPI1_DIV_QR];
1159  p->retval = 1;
1160  p->dst[0] = 1;
1161  p->dst[1] = 1;
1162  p->src[0] = 1;
1163  p->src[1] = 1;
1164  p->data = DATA_SRC1_HIGHBIT;
1165  p->size2 = 1;
1166  p->dst_size[0] = SIZE_DIFF;
1167  p->overlap = OVERLAP_NONE;
1168  REFERENCE (refmpn_sb_div_qr);
1169
1170  p = &param[TYPE_TDIV_QR];
1171  p->dst[0] = 1;
1172  p->dst[1] = 1;
1173  p->src[0] = 1;
1174  p->src[1] = 1;
1175  p->size2 = 1;
1176  p->dst_size[0] = SIZE_DIFF_PLUS_1;
1177  p->dst_size[1] = SIZE_SIZE2;
1178  p->overlap = OVERLAP_NONE;
1179  REFERENCE (refmpn_tdiv_qr);
1180
1181  p = &param[TYPE_SQRTREM];
1182  p->retval = 1;
1183  p->dst[0] = 1;
1184  p->dst[1] = 1;
1185  p->src[0] = 1;
1186  p->dst_size[0] = SIZE_CEIL_HALF;
1187  p->dst_size[1] = SIZE_RETVAL;
1188  p->overlap = OVERLAP_NONE;
1189  VALIDATE (validate_sqrtrem);
1190  REFERENCE (refmpn_sqrtrem);
1191
1192  p = &param[TYPE_ZERO];
1193  p->dst[0] = 1;
1194  p->size = SIZE_ALLOW_ZERO;
1195  REFERENCE (refmpn_zero);
1196
1197  p = &param[TYPE_GET_STR];
1198  p->retval = 1;
1199  p->src[0] = 1;
1200  p->size = SIZE_ALLOW_ZERO;
1201  p->dst[0] = 1;
1202  p->dst[1] = 1;
1203  p->dst_size[0] = SIZE_GET_STR;
1204  p->dst_bytes[0] = 1;
1205  p->overlap = OVERLAP_NONE;
1206  REFERENCE (refmpn_get_str);
1207
1208  p = &param[TYPE_BINVERT];
1209  p->dst[0] = 1;
1210  p->src[0] = 1;
1211  p->data = DATA_SRC0_ODD;
1212  p->overlap = OVERLAP_NONE;
1213  REFERENCE (refmpn_binvert);
1214
1215  p = &param[TYPE_INVERT];
1216  p->dst[0] = 1;
1217  p->src[0] = 1;
1218  p->data = DATA_SRC0_HIGHBIT;
1219  p->overlap = OVERLAP_NONE;
1220  REFERENCE (refmpn_invert);
1221
1222#ifdef EXTRA_PARAM_INIT
1223  EXTRA_PARAM_INIT
1224#endif
1225}
1226
1227
1228/* The following are macros if there's no native versions, so wrap them in
1229   functions that can be in try_array[]. */
1230
1231void
1232MPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1233{ MPN_COPY (rp, sp, size); }
1234
1235void
1236MPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1237{ MPN_COPY_INCR (rp, sp, size); }
1238
1239void
1240MPN_COPY_DECR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1241{ MPN_COPY_DECR (rp, sp, size); }
1242
1243void
1244__GMPN_COPY_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1245{ __GMPN_COPY (rp, sp, size); }
1246
1247#ifdef __GMPN_COPY_INCR
1248void
1249__GMPN_COPY_INCR_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1250{ __GMPN_COPY_INCR (rp, sp, size); }
1251#endif
1252
1253void
1254mpn_com_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1255{ mpn_com (rp, sp, size); }
1256
1257void
1258mpn_and_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1259{ mpn_and_n (rp, s1, s2, size); }
1260
1261void
1262mpn_andn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1263{ mpn_andn_n (rp, s1, s2, size); }
1264
1265void
1266mpn_nand_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1267{ mpn_nand_n (rp, s1, s2, size); }
1268
1269void
1270mpn_ior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1271{ mpn_ior_n (rp, s1, s2, size); }
1272
1273void
1274mpn_iorn_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1275{ mpn_iorn_n (rp, s1, s2, size); }
1276
1277void
1278mpn_nior_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1279{ mpn_nior_n (rp, s1, s2, size); }
1280
1281void
1282mpn_xor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1283{ mpn_xor_n (rp, s1, s2, size); }
1284
1285void
1286mpn_xnor_n_fun (mp_ptr rp, mp_srcptr s1, mp_srcptr s2, mp_size_t size)
1287{ mpn_xnor_n (rp, s1, s2, size); }
1288
1289mp_limb_t
1290udiv_qrnnd_fun (mp_limb_t *remptr, mp_limb_t n1, mp_limb_t n0, mp_limb_t d)
1291{
1292  mp_limb_t  q;
1293  udiv_qrnnd (q, *remptr, n1, n0, d);
1294  return q;
1295}
1296
1297mp_limb_t
1298mpn_divexact_by3_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
1299{
1300  return mpn_divexact_by3 (rp, sp, size);
1301}
1302
1303mp_limb_t
1304mpn_modexact_1_odd_fun (mp_srcptr ptr, mp_size_t size, mp_limb_t divisor)
1305{
1306  return mpn_modexact_1_odd (ptr, size, divisor);
1307}
1308
1309void
1310mpn_toom22_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
1311{
1312  mp_ptr  tspace;
1313  TMP_DECL;
1314  TMP_MARK;
1315  tspace = TMP_ALLOC_LIMBS (mpn_toom22_mul_itch (size, size));
1316  mpn_toom22_mul (dst, src1, size, src2, size, tspace);
1317  TMP_FREE;
1318}
1319void
1320mpn_toom2_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
1321{
1322  mp_ptr tspace;
1323  TMP_DECL;
1324  TMP_MARK;
1325  tspace = TMP_ALLOC_LIMBS (mpn_toom2_sqr_itch (size));
1326  mpn_toom2_sqr (dst, src, size, tspace);
1327  TMP_FREE;
1328}
1329void
1330mpn_toom33_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
1331{
1332  mp_ptr  tspace;
1333  TMP_DECL;
1334  TMP_MARK;
1335  tspace = TMP_ALLOC_LIMBS (mpn_toom33_mul_itch (size, size));
1336  mpn_toom33_mul (dst, src1, size, src2, size, tspace);
1337  TMP_FREE;
1338}
1339void
1340mpn_toom3_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
1341{
1342  mp_ptr tspace;
1343  TMP_DECL;
1344  TMP_MARK;
1345  tspace = TMP_ALLOC_LIMBS (mpn_toom3_sqr_itch (size));
1346  mpn_toom3_sqr (dst, src, size, tspace);
1347  TMP_FREE;
1348}
1349void
1350mpn_toom44_mul_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size)
1351{
1352  mp_ptr  tspace;
1353  TMP_DECL;
1354  TMP_MARK;
1355  tspace = TMP_ALLOC_LIMBS (mpn_toom44_mul_itch (size, size));
1356  mpn_toom44_mul (dst, src1, size, src2, size, tspace);
1357  TMP_FREE;
1358}
1359void
1360mpn_toom4_sqr_fun (mp_ptr dst, mp_srcptr src, mp_size_t size)
1361{
1362  mp_ptr tspace;
1363  TMP_DECL;
1364  TMP_MARK;
1365  tspace = TMP_ALLOC_LIMBS (mpn_toom4_sqr_itch (size));
1366  mpn_toom4_sqr (dst, src, size, tspace);
1367  TMP_FREE;
1368}
1369
1370mp_limb_t
1371umul_ppmm_fun (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2)
1372{
1373  mp_limb_t  high;
1374  umul_ppmm (high, *lowptr, m1, m2);
1375  return high;
1376}
1377
1378void
1379MPN_ZERO_fun (mp_ptr ptr, mp_size_t size)
1380{ MPN_ZERO (ptr, size); }
1381
1382
1383struct choice_t {
1384  const char  *name;
1385  tryfun_t    function;
1386  int         type;
1387  mp_size_t   minsize;
1388};
1389
1390#if HAVE_STRINGIZE
1391#define TRY(fun)        #fun, (tryfun_t) fun
1392#define TRY_FUNFUN(fun) #fun, (tryfun_t) fun##_fun
1393#else
1394#define TRY(fun)        "fun", (tryfun_t) fun
1395#define TRY_FUNFUN(fun) "fun", (tryfun_t) fun/**/_fun
1396#endif
1397
1398const struct choice_t choice_array[] = {
1399  { TRY(mpn_add),       TYPE_ADD    },
1400  { TRY(mpn_sub),       TYPE_SUB    },
1401
1402  { TRY(mpn_add_n),     TYPE_ADD_N  },
1403  { TRY(mpn_sub_n),     TYPE_SUB_N  },
1404
1405#if HAVE_NATIVE_mpn_add_nc
1406  { TRY(mpn_add_nc),    TYPE_ADD_NC },
1407#endif
1408#if HAVE_NATIVE_mpn_sub_nc
1409  { TRY(mpn_sub_nc),    TYPE_SUB_NC },
1410#endif
1411
1412#if HAVE_NATIVE_mpn_add_n_sub_n
1413  { TRY(mpn_add_n_sub_n),  TYPE_ADDSUB_N  },
1414#endif
1415#if HAVE_NATIVE_mpn_add_n_sub_nc
1416  { TRY(mpn_add_n_sub_nc), TYPE_ADDSUB_NC },
1417#endif
1418
1419  { TRY(mpn_addmul_1),  TYPE_ADDMUL_1  },
1420  { TRY(mpn_submul_1),  TYPE_SUBMUL_1  },
1421#if HAVE_NATIVE_mpn_addmul_1c
1422  { TRY(mpn_addmul_1c), TYPE_ADDMUL_1C },
1423#endif
1424#if HAVE_NATIVE_mpn_submul_1c
1425  { TRY(mpn_submul_1c), TYPE_SUBMUL_1C },
1426#endif
1427
1428#if HAVE_NATIVE_mpn_addmul_2
1429  { TRY(mpn_addmul_2), TYPE_ADDMUL_2, 2 },
1430#endif
1431#if HAVE_NATIVE_mpn_addmul_3
1432  { TRY(mpn_addmul_3), TYPE_ADDMUL_3, 3 },
1433#endif
1434#if HAVE_NATIVE_mpn_addmul_4
1435  { TRY(mpn_addmul_4), TYPE_ADDMUL_4, 4 },
1436#endif
1437#if HAVE_NATIVE_mpn_addmul_5
1438  { TRY(mpn_addmul_5), TYPE_ADDMUL_5, 5 },
1439#endif
1440#if HAVE_NATIVE_mpn_addmul_6
1441  { TRY(mpn_addmul_6), TYPE_ADDMUL_6, 6 },
1442#endif
1443#if HAVE_NATIVE_mpn_addmul_7
1444  { TRY(mpn_addmul_7), TYPE_ADDMUL_7, 7 },
1445#endif
1446#if HAVE_NATIVE_mpn_addmul_8
1447  { TRY(mpn_addmul_8), TYPE_ADDMUL_8, 8 },
1448#endif
1449
1450  { TRY_FUNFUN(mpn_com),  TYPE_COM },
1451
1452  { TRY_FUNFUN(MPN_COPY),      TYPE_COPY },
1453  { TRY_FUNFUN(MPN_COPY_INCR), TYPE_COPYI },
1454  { TRY_FUNFUN(MPN_COPY_DECR), TYPE_COPYD },
1455
1456  { TRY_FUNFUN(__GMPN_COPY),      TYPE_COPY },
1457#ifdef __GMPN_COPY_INCR
1458  { TRY_FUNFUN(__GMPN_COPY_INCR), TYPE_COPYI },
1459#endif
1460
1461#if HAVE_NATIVE_mpn_copyi
1462  { TRY(mpn_copyi), TYPE_COPYI },
1463#endif
1464#if HAVE_NATIVE_mpn_copyd
1465  { TRY(mpn_copyd), TYPE_COPYD },
1466#endif
1467
1468#if HAVE_NATIVE_mpn_addlsh1_n
1469  { TRY(mpn_addlsh1_n), TYPE_ADDLSH1_N },
1470#endif
1471#if HAVE_NATIVE_mpn_addlsh2_n
1472  { TRY(mpn_addlsh2_n), TYPE_ADDLSH2_N },
1473#endif
1474#if HAVE_NATIVE_mpn_addlsh_n
1475  { TRY(mpn_addlsh_n), TYPE_ADDLSH_N },
1476#endif
1477#if HAVE_NATIVE_mpn_sublsh1_n
1478  { TRY(mpn_sublsh1_n), TYPE_SUBLSH1_N },
1479#endif
1480#if HAVE_NATIVE_mpn_sublsh_n
1481  { TRY(mpn_sublsh_n), TYPE_SUBLSH_N },
1482#endif
1483#if HAVE_NATIVE_mpn_rsblsh1_n
1484  { TRY(mpn_rsblsh1_n), TYPE_RSBLSH1_N },
1485#endif
1486#if HAVE_NATIVE_mpn_rsblsh2_n
1487  { TRY(mpn_rsblsh2_n), TYPE_RSBLSH2_N },
1488#endif
1489#if HAVE_NATIVE_mpn_rsblsh_n
1490  { TRY(mpn_rsblsh_n), TYPE_RSBLSH_N },
1491#endif
1492#if HAVE_NATIVE_mpn_rsh1add_n
1493  { TRY(mpn_rsh1add_n), TYPE_RSH1ADD_N },
1494#endif
1495#if HAVE_NATIVE_mpn_rsh1sub_n
1496  { TRY(mpn_rsh1sub_n), TYPE_RSH1SUB_N },
1497#endif
1498
1499  { TRY_FUNFUN(mpn_and_n),  TYPE_AND_N  },
1500  { TRY_FUNFUN(mpn_andn_n), TYPE_ANDN_N },
1501  { TRY_FUNFUN(mpn_nand_n), TYPE_NAND_N },
1502  { TRY_FUNFUN(mpn_ior_n),  TYPE_IOR_N  },
1503  { TRY_FUNFUN(mpn_iorn_n), TYPE_IORN_N },
1504  { TRY_FUNFUN(mpn_nior_n), TYPE_NIOR_N },
1505  { TRY_FUNFUN(mpn_xor_n),  TYPE_XOR_N  },
1506  { TRY_FUNFUN(mpn_xnor_n), TYPE_XNOR_N },
1507
1508  { TRY(mpn_divrem_1),     TYPE_DIVREM_1 },
1509#if USE_PREINV_DIVREM_1
1510  { TRY(mpn_preinv_divrem_1), TYPE_PREINV_DIVREM_1 },
1511#endif
1512  { TRY(mpn_mod_1),        TYPE_MOD_1 },
1513#if USE_PREINV_MOD_1
1514  { TRY(mpn_preinv_mod_1), TYPE_PREINV_MOD_1 },
1515#endif
1516#if HAVE_NATIVE_mpn_divrem_1c
1517  { TRY(mpn_divrem_1c),    TYPE_DIVREM_1C },
1518#endif
1519#if HAVE_NATIVE_mpn_mod_1c
1520  { TRY(mpn_mod_1c),       TYPE_MOD_1C },
1521#endif
1522#if GMP_NUMB_BITS % 4 == 0
1523  { TRY(mpn_mod_34lsub1),  TYPE_MOD_34LSUB1 },
1524#endif
1525
1526  { TRY_FUNFUN(udiv_qrnnd), TYPE_UDIV_QRNND, 2 },
1527#if HAVE_NATIVE_mpn_udiv_qrnnd
1528  { TRY(mpn_udiv_qrnnd),    TYPE_UDIV_QRNND, 2 },
1529#endif
1530#if HAVE_NATIVE_mpn_udiv_qrnnd_r
1531  { TRY(mpn_udiv_qrnnd_r),  TYPE_UDIV_QRNND_R, 2 },
1532#endif
1533
1534  { TRY(mpn_divexact_1),          TYPE_DIVEXACT_1 },
1535  { TRY_FUNFUN(mpn_divexact_by3), TYPE_DIVEXACT_BY3 },
1536  { TRY(mpn_divexact_by3c),       TYPE_DIVEXACT_BY3C },
1537
1538  { TRY_FUNFUN(mpn_modexact_1_odd), TYPE_MODEXACT_1_ODD },
1539  { TRY(mpn_modexact_1c_odd),       TYPE_MODEXACT_1C_ODD },
1540
1541
1542  { TRY(mpn_sbpi1_div_qr), TYPE_SBPI1_DIV_QR, 3},
1543  { TRY(mpn_tdiv_qr),      TYPE_TDIV_QR },
1544
1545  { TRY(mpn_mul_1),      TYPE_MUL_1 },
1546#if HAVE_NATIVE_mpn_mul_1c
1547  { TRY(mpn_mul_1c),     TYPE_MUL_1C },
1548#endif
1549#if HAVE_NATIVE_mpn_mul_2
1550  { TRY(mpn_mul_2),      TYPE_MUL_2, 2 },
1551#endif
1552#if HAVE_NATIVE_mpn_mul_3
1553  { TRY(mpn_mul_3),      TYPE_MUL_3, 3 },
1554#endif
1555#if HAVE_NATIVE_mpn_mul_4
1556  { TRY(mpn_mul_4),      TYPE_MUL_4, 4 },
1557#endif
1558
1559  { TRY(mpn_rshift),     TYPE_RSHIFT },
1560  { TRY(mpn_lshift),     TYPE_LSHIFT },
1561  { TRY(mpn_lshiftc),    TYPE_LSHIFTC },
1562
1563
1564  { TRY(mpn_mul_basecase), TYPE_MUL_MN },
1565  { TRY(mpn_mullo_basecase), TYPE_MULLO_N },
1566#if SQR_TOOM2_THRESHOLD > 0
1567  { TRY(mpn_sqr_basecase), TYPE_SQR },
1568#endif
1569
1570  { TRY(mpn_mul),    TYPE_MUL_MN },
1571  { TRY(mpn_mul_n),  TYPE_MUL_N },
1572  { TRY(mpn_sqr),    TYPE_SQR },
1573
1574  { TRY_FUNFUN(umul_ppmm), TYPE_UMUL_PPMM, 2 },
1575#if HAVE_NATIVE_mpn_umul_ppmm
1576  { TRY(mpn_umul_ppmm),    TYPE_UMUL_PPMM, 2 },
1577#endif
1578#if HAVE_NATIVE_mpn_umul_ppmm_r
1579  { TRY(mpn_umul_ppmm_r),  TYPE_UMUL_PPMM_R, 2 },
1580#endif
1581
1582  { TRY_FUNFUN(mpn_toom22_mul),  TYPE_MUL_N,  MPN_TOOM22_MUL_MINSIZE },
1583  { TRY_FUNFUN(mpn_toom2_sqr),   TYPE_SQR,    MPN_TOOM2_SQR_MINSIZE },
1584  { TRY_FUNFUN(mpn_toom33_mul),  TYPE_MUL_N,  MPN_TOOM33_MUL_MINSIZE },
1585  { TRY_FUNFUN(mpn_toom3_sqr),   TYPE_SQR,    MPN_TOOM3_SQR_MINSIZE },
1586  { TRY_FUNFUN(mpn_toom44_mul),  TYPE_MUL_N,  MPN_TOOM44_MUL_MINSIZE },
1587  { TRY_FUNFUN(mpn_toom4_sqr),   TYPE_SQR,    MPN_TOOM4_SQR_MINSIZE },
1588
1589  { TRY(mpn_gcd_1),        TYPE_GCD_1            },
1590  { TRY(mpn_gcd),          TYPE_GCD              },
1591  { TRY(mpz_jacobi),       TYPE_MPZ_JACOBI       },
1592  { TRY(mpz_kronecker_ui), TYPE_MPZ_KRONECKER_UI },
1593  { TRY(mpz_kronecker_si), TYPE_MPZ_KRONECKER_SI },
1594  { TRY(mpz_ui_kronecker), TYPE_MPZ_UI_KRONECKER },
1595  { TRY(mpz_si_kronecker), TYPE_MPZ_SI_KRONECKER },
1596
1597  { TRY(mpn_popcount),   TYPE_POPCOUNT },
1598  { TRY(mpn_hamdist),    TYPE_HAMDIST },
1599
1600  { TRY(mpn_sqrtrem),    TYPE_SQRTREM },
1601
1602  { TRY_FUNFUN(MPN_ZERO), TYPE_ZERO },
1603
1604  { TRY(mpn_get_str),    TYPE_GET_STR },
1605
1606  { TRY(mpn_binvert),    TYPE_BINVERT },
1607  { TRY(mpn_invert),     TYPE_INVERT  },
1608
1609#ifdef EXTRA_ROUTINES
1610  EXTRA_ROUTINES
1611#endif
1612};
1613
1614const struct choice_t *choice = NULL;
1615
1616
1617void
1618mprotect_maybe (void *addr, size_t len, int prot)
1619{
1620  if (!option_redzones)
1621    return;
1622
1623#if HAVE_MPROTECT
1624  if (mprotect (addr, len, prot) != 0)
1625    {
1626      fprintf (stderr, "Cannot mprotect %p 0x%X 0x%X: %s\n",
1627	       addr, (unsigned) len, prot, strerror (errno));
1628      exit (1);
1629    }
1630#else
1631  {
1632    static int  warned = 0;
1633    if (!warned)
1634      {
1635	fprintf (stderr,
1636		 "mprotect not available, bounds testing not performed\n");
1637	warned = 1;
1638      }
1639  }
1640#endif
1641}
1642
1643/* round "a" up to a multiple of "m" */
1644size_t
1645round_up_multiple (size_t a, size_t m)
1646{
1647  unsigned long  r;
1648
1649  r = a % m;
1650  if (r == 0)
1651    return a;
1652  else
1653    return a + (m - r);
1654}
1655
1656
1657/* On some systems it seems that only an mmap'ed region can be mprotect'ed,
1658   for instance HP-UX 10.
1659
1660   mmap will almost certainly return a pointer already aligned to a page
1661   boundary, but it's easy enough to share the alignment handling with the
1662   malloc case. */
1663
1664void
1665malloc_region (struct region_t *r, mp_size_t n)
1666{
1667  mp_ptr  p;
1668  size_t  nbytes;
1669
1670  ASSERT ((pagesize % BYTES_PER_MP_LIMB) == 0);
1671
1672  n = round_up_multiple (n, PAGESIZE_LIMBS);
1673  r->size = n;
1674
1675  nbytes = n*BYTES_PER_MP_LIMB + 2*REDZONE_BYTES + pagesize;
1676
1677#if defined (MAP_ANONYMOUS) && ! defined (MAP_ANON)
1678#define MAP_ANON  MAP_ANONYMOUS
1679#endif
1680
1681#if HAVE_MMAP && defined (MAP_ANON)
1682  /* note must pass fd=-1 for MAP_ANON on BSD */
1683  p = mmap (NULL, nbytes, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
1684  if (p == (void *) -1)
1685    {
1686      fprintf (stderr, "Cannot mmap %#x anon bytes: %s\n",
1687	       (unsigned) nbytes, strerror (errno));
1688      exit (1);
1689    }
1690#else
1691  p = (mp_ptr) malloc (nbytes);
1692  ASSERT_ALWAYS (p != NULL);
1693#endif
1694
1695  p = align_pointer (p, pagesize);
1696
1697  mprotect_maybe (p, REDZONE_BYTES, PROT_NONE);
1698  p += REDZONE_LIMBS;
1699  r->ptr = p;
1700
1701  mprotect_maybe (p + n, REDZONE_BYTES, PROT_NONE);
1702}
1703
1704void
1705mprotect_region (const struct region_t *r, int prot)
1706{
1707  mprotect_maybe (r->ptr, r->size, prot);
1708}
1709
1710
1711/* First four entries must be 0,1,2,3 for the benefit of CARRY_BIT, CARRY_3,
1712   and CARRY_4 */
1713mp_limb_t  carry_array[] = {
1714  0, 1, 2, 3,
1715  4,
1716  CNST_LIMB(1) << 8,
1717  CNST_LIMB(1) << 16,
1718  GMP_NUMB_MAX
1719};
1720int        carry_index;
1721
1722#define CARRY_COUNT                                             \
1723  ((tr->carry == CARRY_BIT) ? 2                                 \
1724   : tr->carry == CARRY_3   ? 3                                 \
1725   : tr->carry == CARRY_4   ? 4                                 \
1726   : (tr->carry == CARRY_LIMB || tr->carry == CARRY_DIVISOR)    \
1727     ? numberof(carry_array) + CARRY_RANDOMS                    \
1728   : 1)
1729
1730#define MPN_RANDOM_ALT(index,dst,size) \
1731  (((index) & 1) ? refmpn_random (dst, size) : refmpn_random2 (dst, size))
1732
1733/* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have
1734   the same type */
1735#define CARRY_ITERATION                                                 \
1736  for (carry_index = 0;                                                 \
1737       (carry_index < numberof (carry_array)                            \
1738	? (carry = carry_array[carry_index])                            \
1739	: (MPN_RANDOM_ALT (carry_index, &carry, 1), (mp_limb_t) 0)),    \
1740	 (tr->carry == CARRY_DIVISOR ? carry %= divisor : 0),           \
1741	 carry_index < CARRY_COUNT;                                     \
1742       carry_index++)
1743
1744
1745mp_limb_t  multiplier_array[] = {
1746  0, 1, 2, 3,
1747  CNST_LIMB(1) << 8,
1748  CNST_LIMB(1) << 16,
1749  GMP_NUMB_MAX - 2,
1750  GMP_NUMB_MAX - 1,
1751  GMP_NUMB_MAX
1752};
1753int        multiplier_index;
1754
1755mp_limb_t  divisor_array[] = {
1756  1, 2, 3,
1757  CNST_LIMB(1) << 8,
1758  CNST_LIMB(1) << 16,
1759  CNST_LIMB(1) << (GMP_NUMB_BITS/2 - 1),
1760  GMP_NUMB_MAX >> (GMP_NUMB_BITS/2),
1761  GMP_NUMB_HIGHBIT,
1762  GMP_NUMB_HIGHBIT + 1,
1763  GMP_NUMB_MAX - 2,
1764  GMP_NUMB_MAX - 1,
1765  GMP_NUMB_MAX
1766};
1767
1768int        divisor_index;
1769
1770/* The dummy value after MPN_RANDOM_ALT ensures both sides of the ":" have
1771   the same type */
1772#define ARRAY_ITERATION(var, index, limit, array, randoms, cond)        \
1773  for (index = 0;                                                       \
1774       (index < numberof (array)                                        \
1775	? (var = array[index])                                          \
1776	: (MPN_RANDOM_ALT (index, &var, 1), (mp_limb_t) 0)),            \
1777       index < limit;                                                   \
1778       index++)
1779
1780#define MULTIPLIER_COUNT                                \
1781  (tr->multiplier                                       \
1782    ? numberof (multiplier_array) + MULTIPLIER_RANDOMS  \
1783    : 1)
1784
1785#define MULTIPLIER_ITERATION                                            \
1786  ARRAY_ITERATION(multiplier, multiplier_index, MULTIPLIER_COUNT,       \
1787		  multiplier_array, MULTIPLIER_RANDOMS, TRY_MULTIPLIER)
1788
1789#define DIVISOR_COUNT                           \
1790  (tr->divisor                                  \
1791   ? numberof (divisor_array) + DIVISOR_RANDOMS \
1792   : 1)
1793
1794#define DIVISOR_ITERATION                                               \
1795  ARRAY_ITERATION(divisor, divisor_index, DIVISOR_COUNT, divisor_array, \
1796		  DIVISOR_RANDOMS, TRY_DIVISOR)
1797
1798
1799/* overlap_array[].s[i] is where s[i] should be, 0 or 1 means overlapping
1800   d[0] or d[1] respectively, -1 means a separate (write-protected)
1801   location. */
1802
1803struct overlap_t {
1804  int  s[NUM_SOURCES];
1805} overlap_array[] = {
1806  { { -1, -1 } },
1807  { {  0, -1 } },
1808  { { -1,  0 } },
1809  { {  0,  0 } },
1810  { {  1, -1 } },
1811  { { -1,  1 } },
1812  { {  1,  1 } },
1813  { {  0,  1 } },
1814  { {  1,  0 } },
1815};
1816
1817struct overlap_t  *overlap, *overlap_limit;
1818
1819#define OVERLAP_COUNT                   \
1820  (tr->overlap & OVERLAP_NONE       ? 1 \
1821   : tr->overlap & OVERLAP_NOT_SRCS ? 3 \
1822   : tr->overlap & OVERLAP_NOT_SRC2 ? 2 \
1823   : tr->dst[1]                     ? 9 \
1824   : tr->src[1]                     ? 4 \
1825   : tr->dst[0]                     ? 2 \
1826   : 1)
1827
1828#define OVERLAP_ITERATION                               \
1829  for (overlap = &overlap_array[0],                     \
1830    overlap_limit = &overlap_array[OVERLAP_COUNT];      \
1831    overlap < overlap_limit;                            \
1832    overlap++)
1833
1834
1835int  base = 10;
1836
1837#define T_RAND_COUNT  2
1838int  t_rand;
1839
1840void
1841t_random (mp_ptr ptr, mp_size_t n)
1842{
1843  if (n == 0)
1844    return;
1845
1846  switch (option_data) {
1847  case DATA_TRAND:
1848    switch (t_rand) {
1849    case 0: refmpn_random (ptr, n); break;
1850    case 1: refmpn_random2 (ptr, n); break;
1851    default: abort();
1852    }
1853    break;
1854  case DATA_SEQ:
1855    {
1856      static mp_limb_t  counter = 0;
1857      mp_size_t  i;
1858      for (i = 0; i < n; i++)
1859	ptr[i] = ++counter;
1860    }
1861    break;
1862  case DATA_ZEROS:
1863    refmpn_zero (ptr, n);
1864    break;
1865  case DATA_FFS:
1866    refmpn_fill (ptr, n, GMP_NUMB_MAX);
1867    break;
1868  case DATA_2FD:
1869    /* Special value 0x2FFF...FFFD, which divided by 3 gives 0xFFF...FFF,
1870       inducing the q1_ff special case in the mul-by-inverse part of some
1871       versions of divrem_1 and mod_1. */
1872    refmpn_fill (ptr, n, (mp_limb_t) -1);
1873    ptr[n-1] = 2;
1874    ptr[0] -= 2;
1875    break;
1876
1877  default:
1878    abort();
1879  }
1880}
1881#define T_RAND_ITERATION \
1882  for (t_rand = 0; t_rand < T_RAND_COUNT; t_rand++)
1883
1884
1885void
1886print_each (const struct each_t *e)
1887{
1888  int  i;
1889
1890  printf ("%s %s\n", e->name, e == &ref ? tr->reference_name : choice->name);
1891  if (tr->retval)
1892    mpn_trace ("   retval", &e->retval, 1);
1893
1894  for (i = 0; i < NUM_DESTS; i++)
1895    {
1896      if (tr->dst[i])
1897	{
1898	  if (tr->dst_bytes[i])
1899	    byte_tracen ("   d[%d]", i, e->d[i].p, d[i].size);
1900	  else
1901	    mpn_tracen ("   d[%d]", i, e->d[i].p, d[i].size);
1902	  printf ("        located %p\n", (void *) (e->d[i].p));
1903	}
1904    }
1905
1906  for (i = 0; i < NUM_SOURCES; i++)
1907    if (tr->src[i])
1908      printf ("   s[%d] located %p\n", i, (void *)  (e->s[i].p));
1909}
1910
1911
1912void
1913print_all (void)
1914{
1915  int  i;
1916
1917  printf ("\n");
1918  printf ("size  %ld\n", (long) size);
1919  if (tr->size2)
1920    printf ("size2 %ld\n", (long) size2);
1921
1922  for (i = 0; i < NUM_DESTS; i++)
1923    if (d[i].size != size)
1924      printf ("d[%d].size %ld\n", i, (long) d[i].size);
1925
1926  if (tr->multiplier)
1927    mpn_trace ("   multiplier", &multiplier, 1);
1928  if (tr->divisor)
1929    mpn_trace ("   divisor", &divisor, 1);
1930  if (tr->shift)
1931    printf ("   shift %lu\n", shift);
1932  if (tr->carry)
1933    mpn_trace ("   carry", &carry, 1);
1934  if (tr->msize)
1935    mpn_trace ("   multiplier_N", multiplier_N, tr->msize);
1936
1937  for (i = 0; i < NUM_DESTS; i++)
1938    if (tr->dst[i])
1939      printf ("   d[%d] %s, align %ld, size %ld\n",
1940	      i, d[i].high ? "high" : "low",
1941	      (long) d[i].align, (long) d[i].size);
1942
1943  for (i = 0; i < NUM_SOURCES; i++)
1944    {
1945      if (tr->src[i])
1946	{
1947	  printf ("   s[%d] %s, align %ld, ",
1948		  i, s[i].high ? "high" : "low", (long) s[i].align);
1949	  switch (overlap->s[i]) {
1950	  case -1:
1951	    printf ("no overlap\n");
1952	    break;
1953	  default:
1954	    printf ("==d[%d]%s\n",
1955		    overlap->s[i],
1956		    tr->overlap == OVERLAP_LOW_TO_HIGH ? "+a"
1957		    : tr->overlap == OVERLAP_HIGH_TO_LOW ? "-a"
1958		    : "");
1959	    break;
1960	  }
1961	  printf ("   s[%d]=", i);
1962	  if (tr->carry_sign && (carry & (1 << i)))
1963	    printf ("-");
1964	  mpn_trace (NULL, s[i].p, SRC_SIZE(i));
1965	}
1966    }
1967
1968  if (tr->dst0_from_src1)
1969    mpn_trace ("   d[0]", s[1].region.ptr, size);
1970
1971  if (tr->reference)
1972    print_each (&ref);
1973  print_each (&fun);
1974}
1975
1976void
1977compare (void)
1978{
1979  int  error = 0;
1980  int  i;
1981
1982  if (tr->retval && ref.retval != fun.retval)
1983    {
1984      gmp_printf ("Different return values (%Mu, %Mu)\n",
1985		  ref.retval, fun.retval);
1986      error = 1;
1987    }
1988
1989  for (i = 0; i < NUM_DESTS; i++)
1990    {
1991      switch (tr->dst_size[i]) {
1992      case SIZE_RETVAL:
1993      case SIZE_GET_STR:
1994	d[i].size = ref.retval;
1995	break;
1996      }
1997    }
1998
1999  for (i = 0; i < NUM_DESTS; i++)
2000    {
2001      if (! tr->dst[i])
2002	continue;
2003
2004      if (tr->dst_bytes[i])
2005	{
2006	  if (memcmp (ref.d[i].p, fun.d[i].p, d[i].size) != 0)
2007	    {
2008	      printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n",
2009		      i,
2010		      (long) byte_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size),
2011		      (long) byte_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size));
2012	      error = 1;
2013	    }
2014	}
2015      else
2016	{
2017	  if (d[i].size != 0
2018	      && ! refmpn_equal_anynail (ref.d[i].p, fun.d[i].p, d[i].size))
2019	    {
2020	      printf ("Different d[%d] data results, low diff at %ld, high diff at %ld\n",
2021		      i,
2022		      (long) mpn_diff_lowest (ref.d[i].p, fun.d[i].p, d[i].size),
2023		      (long) mpn_diff_highest (ref.d[i].p, fun.d[i].p, d[i].size));
2024	      error = 1;
2025	    }
2026	}
2027    }
2028
2029  if (error)
2030    {
2031      print_all();
2032      abort();
2033    }
2034}
2035
2036
2037/* The functions are cast if the return value should be a long rather than
2038   the default mp_limb_t.  This is necessary under _LONG_LONG_LIMB.  This
2039   might not be enough if some actual calling conventions checking is
2040   implemented on a long long limb system.  */
2041
2042void
2043call (struct each_t *e, tryfun_t function)
2044{
2045  switch (choice->type) {
2046  case TYPE_ADD:
2047  case TYPE_SUB:
2048    e->retval = CALLING_CONVENTIONS (function)
2049      (e->d[0].p, e->s[0].p, size, e->s[1].p, size2);
2050    break;
2051
2052  case TYPE_ADD_N:
2053  case TYPE_SUB_N:
2054  case TYPE_ADDLSH1_N:
2055  case TYPE_ADDLSH2_N:
2056  case TYPE_SUBLSH1_N:
2057  case TYPE_RSBLSH1_N:
2058  case TYPE_RSBLSH2_N:
2059  case TYPE_RSH1ADD_N:
2060  case TYPE_RSH1SUB_N:
2061    e->retval = CALLING_CONVENTIONS (function)
2062      (e->d[0].p, e->s[0].p, e->s[1].p, size);
2063    break;
2064  case TYPE_ADDLSH_N:
2065  case TYPE_SUBLSH_N:
2066  case TYPE_RSBLSH_N:
2067    e->retval = CALLING_CONVENTIONS (function)
2068      (e->d[0].p, e->s[0].p, e->s[1].p, size, shift);
2069    break;
2070  case TYPE_ADD_NC:
2071  case TYPE_SUB_NC:
2072    e->retval = CALLING_CONVENTIONS (function)
2073      (e->d[0].p, e->s[0].p, e->s[1].p, size, carry);
2074    break;
2075
2076  case TYPE_MUL_1:
2077  case TYPE_ADDMUL_1:
2078  case TYPE_SUBMUL_1:
2079    e->retval = CALLING_CONVENTIONS (function)
2080      (e->d[0].p, e->s[0].p, size, multiplier);
2081    break;
2082  case TYPE_MUL_1C:
2083  case TYPE_ADDMUL_1C:
2084  case TYPE_SUBMUL_1C:
2085    e->retval = CALLING_CONVENTIONS (function)
2086      (e->d[0].p, e->s[0].p, size, multiplier, carry);
2087    break;
2088
2089  case TYPE_MUL_2:
2090  case TYPE_MUL_3:
2091  case TYPE_MUL_4:
2092    if (size == 1)
2093      abort ();
2094    e->retval = CALLING_CONVENTIONS (function)
2095      (e->d[0].p, e->s[0].p, size, multiplier_N);
2096    break;
2097
2098  case TYPE_ADDMUL_2:
2099  case TYPE_ADDMUL_3:
2100  case TYPE_ADDMUL_4:
2101  case TYPE_ADDMUL_5:
2102  case TYPE_ADDMUL_6:
2103  case TYPE_ADDMUL_7:
2104  case TYPE_ADDMUL_8:
2105    if (size == 1)
2106      abort ();
2107    e->retval = CALLING_CONVENTIONS (function)
2108      (e->d[0].p, e->s[0].p, size, multiplier_N);
2109    break;
2110
2111  case TYPE_AND_N:
2112  case TYPE_ANDN_N:
2113  case TYPE_NAND_N:
2114  case TYPE_IOR_N:
2115  case TYPE_IORN_N:
2116  case TYPE_NIOR_N:
2117  case TYPE_XOR_N:
2118  case TYPE_XNOR_N:
2119    CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size);
2120    break;
2121
2122  case TYPE_ADDSUB_N:
2123    e->retval = CALLING_CONVENTIONS (function)
2124      (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size);
2125    break;
2126  case TYPE_ADDSUB_NC:
2127    e->retval = CALLING_CONVENTIONS (function)
2128      (e->d[0].p, e->d[1].p, e->s[0].p, e->s[1].p, size, carry);
2129    break;
2130
2131  case TYPE_COPY:
2132  case TYPE_COPYI:
2133  case TYPE_COPYD:
2134  case TYPE_COM:
2135    CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
2136    break;
2137
2138
2139  case TYPE_DIVEXACT_BY3:
2140    e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
2141    break;
2142  case TYPE_DIVEXACT_BY3C:
2143    e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size,
2144						carry);
2145    break;
2146
2147
2148  case TYPE_DIVMOD_1:
2149  case TYPE_DIVEXACT_1:
2150    e->retval = CALLING_CONVENTIONS (function)
2151      (e->d[0].p, e->s[0].p, size, divisor);
2152    break;
2153  case TYPE_DIVMOD_1C:
2154    e->retval = CALLING_CONVENTIONS (function)
2155      (e->d[0].p, e->s[0].p, size, divisor, carry);
2156    break;
2157  case TYPE_DIVREM_1:
2158    e->retval = CALLING_CONVENTIONS (function)
2159      (e->d[0].p, size2, e->s[0].p, size, divisor);
2160    break;
2161  case TYPE_DIVREM_1C:
2162    e->retval = CALLING_CONVENTIONS (function)
2163      (e->d[0].p, size2, e->s[0].p, size, divisor, carry);
2164    break;
2165  case TYPE_PREINV_DIVREM_1:
2166    {
2167      mp_limb_t  dinv;
2168      unsigned   shift;
2169      shift = refmpn_count_leading_zeros (divisor);
2170      dinv = refmpn_invert_limb (divisor << shift);
2171      e->retval = CALLING_CONVENTIONS (function)
2172	(e->d[0].p, size2, e->s[0].p, size, divisor, dinv, shift);
2173    }
2174    break;
2175  case TYPE_MOD_1:
2176  case TYPE_MODEXACT_1_ODD:
2177    e->retval = CALLING_CONVENTIONS (function)
2178      (e->s[0].p, size, divisor);
2179    break;
2180  case TYPE_MOD_1C:
2181  case TYPE_MODEXACT_1C_ODD:
2182    e->retval = CALLING_CONVENTIONS (function)
2183      (e->s[0].p, size, divisor, carry);
2184    break;
2185  case TYPE_PREINV_MOD_1:
2186    e->retval = CALLING_CONVENTIONS (function)
2187      (e->s[0].p, size, divisor, refmpn_invert_limb (divisor));
2188    break;
2189  case TYPE_MOD_34LSUB1:
2190    e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size);
2191    break;
2192
2193  case TYPE_UDIV_QRNND:
2194    e->retval = CALLING_CONVENTIONS (function)
2195      (e->d[0].p, e->s[0].p[1], e->s[0].p[0], divisor);
2196    break;
2197  case TYPE_UDIV_QRNND_R:
2198    e->retval = CALLING_CONVENTIONS (function)
2199      (e->s[0].p[1], e->s[0].p[0], divisor, e->d[0].p);
2200    break;
2201
2202  case TYPE_SBPI1_DIV_QR:
2203    {
2204      gmp_pi1_t dinv;
2205      invert_pi1 (dinv, e->s[1].p[size2-1], e->s[1].p[size2-2]); /* FIXME: use refinvert_pi1 */
2206      refmpn_copyi (e->d[1].p, e->s[0].p, size);        /* dividend */
2207      refmpn_fill (e->d[0].p, size-size2, 0x98765432);  /* quotient */
2208      e->retval = CALLING_CONVENTIONS (function)
2209	(e->d[0].p, e->d[1].p, size, e->s[1].p, size2, dinv.inv32);
2210      refmpn_zero (e->d[1].p+size2, size-size2);    /* excess over remainder */
2211    }
2212    break;
2213
2214  case TYPE_TDIV_QR:
2215    CALLING_CONVENTIONS (function) (e->d[0].p, e->d[1].p, 0,
2216				    e->s[0].p, size, e->s[1].p, size2);
2217    break;
2218
2219  case TYPE_GCD_1:
2220    /* Must have a non-zero src, but this probably isn't the best way to do
2221       it. */
2222    if (refmpn_zero_p (e->s[0].p, size))
2223      e->retval = 0;
2224    else
2225      e->retval = CALLING_CONVENTIONS (function) (e->s[0].p, size, divisor);
2226    break;
2227
2228  case TYPE_GCD:
2229    /* Sources are destroyed, so they're saved and replaced, but a general
2230       approach to this might be better.  Note that it's still e->s[0].p and
2231       e->s[1].p that are passed, to get the desired alignments. */
2232    {
2233      mp_ptr  s0 = refmpn_malloc_limbs (size);
2234      mp_ptr  s1 = refmpn_malloc_limbs (size2);
2235      refmpn_copyi (s0, e->s[0].p, size);
2236      refmpn_copyi (s1, e->s[1].p, size2);
2237
2238      mprotect_region (&s[0].region, PROT_READ|PROT_WRITE);
2239      mprotect_region (&s[1].region, PROT_READ|PROT_WRITE);
2240      e->retval = CALLING_CONVENTIONS (function) (e->d[0].p,
2241						  e->s[0].p, size,
2242						  e->s[1].p, size2);
2243      refmpn_copyi (e->s[0].p, s0, size);
2244      refmpn_copyi (e->s[1].p, s1, size2);
2245      free (s0);
2246      free (s1);
2247    }
2248    break;
2249
2250  case TYPE_GCD_FINDA:
2251    {
2252      /* FIXME: do this with a flag */
2253      mp_limb_t  c[2];
2254      c[0] = e->s[0].p[0];
2255      c[0] += (c[0] == 0);
2256      c[1] = e->s[0].p[0];
2257      c[1] += (c[1] == 0);
2258      e->retval = CALLING_CONVENTIONS (function) (c);
2259    }
2260    break;
2261
2262  case TYPE_MPZ_JACOBI:
2263  case TYPE_MPZ_KRONECKER:
2264    {
2265      mpz_t  a, b;
2266      PTR(a) = e->s[0].p; SIZ(a) = ((carry&1)==0 ? size : -size);
2267      PTR(b) = e->s[1].p; SIZ(b) = ((carry&2)==0 ? size2 : -size2);
2268      e->retval = CALLING_CONVENTIONS (function) (a, b);
2269    }
2270    break;
2271  case TYPE_MPZ_KRONECKER_UI:
2272    {
2273      mpz_t  a;
2274      PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);
2275      e->retval = CALLING_CONVENTIONS(function) (a, (unsigned long)multiplier);
2276    }
2277    break;
2278  case TYPE_MPZ_KRONECKER_SI:
2279    {
2280      mpz_t  a;
2281      PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);
2282      e->retval = CALLING_CONVENTIONS (function) (a, (long) multiplier);
2283    }
2284    break;
2285  case TYPE_MPZ_UI_KRONECKER:
2286    {
2287      mpz_t  b;
2288      PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size);
2289      e->retval = CALLING_CONVENTIONS(function) ((unsigned long)multiplier, b);
2290    }
2291    break;
2292  case TYPE_MPZ_SI_KRONECKER:
2293    {
2294      mpz_t  b;
2295      PTR(b) = e->s[0].p; SIZ(b) = (carry==0 ? size : -size);
2296      e->retval = CALLING_CONVENTIONS (function) ((long) multiplier, b);
2297    }
2298    break;
2299
2300  case TYPE_MUL_MN:
2301    CALLING_CONVENTIONS (function)
2302      (e->d[0].p, e->s[0].p, size, e->s[1].p, size2);
2303    break;
2304  case TYPE_MUL_N:
2305  case TYPE_MULLO_N:
2306    CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size);
2307    break;
2308  case TYPE_SQR:
2309    CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
2310    break;
2311
2312  case TYPE_UMUL_PPMM:
2313    e->retval = CALLING_CONVENTIONS (function)
2314      (e->d[0].p, e->s[0].p[0], e->s[0].p[1]);
2315    break;
2316  case TYPE_UMUL_PPMM_R:
2317    e->retval = CALLING_CONVENTIONS (function)
2318      (e->s[0].p[0], e->s[0].p[1], e->d[0].p);
2319    break;
2320
2321  case TYPE_LSHIFT:
2322  case TYPE_LSHIFTC:
2323  case TYPE_RSHIFT:
2324    e->retval = CALLING_CONVENTIONS (function)
2325      (e->d[0].p, e->s[0].p, size, shift);
2326    break;
2327
2328  case TYPE_POPCOUNT:
2329    e->retval = (* (unsigned long (*)(ANYARGS))
2330		 CALLING_CONVENTIONS (function)) (e->s[0].p, size);
2331    break;
2332  case TYPE_HAMDIST:
2333    e->retval = (* (unsigned long (*)(ANYARGS))
2334		 CALLING_CONVENTIONS (function)) (e->s[0].p, e->s[1].p, size);
2335    break;
2336
2337  case TYPE_SQRTREM:
2338    e->retval = (* (long (*)(ANYARGS)) CALLING_CONVENTIONS (function))
2339      (e->d[0].p, e->d[1].p, e->s[0].p, size);
2340    break;
2341
2342  case TYPE_ZERO:
2343    CALLING_CONVENTIONS (function) (e->d[0].p, size);
2344    break;
2345
2346  case TYPE_GET_STR:
2347    {
2348      size_t  sizeinbase, fill;
2349      char    *dst;
2350      MPN_SIZEINBASE (sizeinbase, e->s[0].p, size, base);
2351      ASSERT_ALWAYS (sizeinbase <= d[0].size);
2352      fill = d[0].size - sizeinbase;
2353      if (d[0].high)
2354	{
2355	  memset (e->d[0].p, 0xBA, fill);
2356	  dst = (char *) e->d[0].p + fill;
2357	}
2358      else
2359	{
2360	  dst = (char *) e->d[0].p;
2361	  memset (dst + sizeinbase, 0xBA, fill);
2362	}
2363      if (POW2_P (base))
2364	{
2365	  e->retval = CALLING_CONVENTIONS (function) (dst, base,
2366						      e->s[0].p, size);
2367	}
2368      else
2369	{
2370	  refmpn_copy (e->d[1].p, e->s[0].p, size);
2371	  e->retval = CALLING_CONVENTIONS (function) (dst, base,
2372						      e->d[1].p, size);
2373	}
2374      refmpn_zero (e->d[1].p, size);  /* clobbered or unused */
2375    }
2376    break;
2377
2378 case TYPE_INVERT:
2379    {
2380      mp_ptr scratch;
2381      TMP_DECL;
2382      TMP_MARK;
2383      scratch = TMP_ALLOC_LIMBS (mpn_invert_itch (size));
2384      CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, scratch);
2385      TMP_FREE;
2386    }
2387    break;
2388  case TYPE_BINVERT:
2389    {
2390      mp_ptr scratch;
2391      TMP_DECL;
2392      TMP_MARK;
2393      scratch = TMP_ALLOC_LIMBS (mpn_binvert_itch (size));
2394      CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size, scratch);
2395      TMP_FREE;
2396    }
2397    break;
2398
2399#ifdef EXTRA_CALL
2400    EXTRA_CALL
2401#endif
2402
2403  default:
2404    printf ("Unknown routine type %d\n", choice->type);
2405    abort ();
2406    break;
2407  }
2408}
2409
2410
2411void
2412pointer_setup (struct each_t *e)
2413{
2414  int  i, j;
2415
2416  for (i = 0; i < NUM_DESTS; i++)
2417    {
2418      switch (tr->dst_size[i]) {
2419      case 0:
2420      case SIZE_RETVAL: /* will be adjusted later */
2421	d[i].size = size;
2422	break;
2423
2424      case SIZE_1:
2425	d[i].size = 1;
2426	break;
2427      case SIZE_2:
2428	d[i].size = 2;
2429	break;
2430      case SIZE_3:
2431	d[i].size = 3;
2432	break;
2433
2434      case SIZE_PLUS_1:
2435	d[i].size = size+1;
2436	break;
2437      case SIZE_PLUS_MSIZE_SUB_1:
2438	d[i].size = size + tr->msize - 1;
2439	break;
2440
2441      case SIZE_SUM:
2442	if (tr->size2)
2443	  d[i].size = size + size2;
2444	else
2445	  d[i].size = 2*size;
2446	break;
2447
2448      case SIZE_SIZE2:
2449	d[i].size = size2;
2450	break;
2451
2452      case SIZE_DIFF:
2453	d[i].size = size - size2;
2454	break;
2455
2456      case SIZE_DIFF_PLUS_1:
2457	d[i].size = size - size2 + 1;
2458	break;
2459
2460      case SIZE_CEIL_HALF:
2461	d[i].size = (size+1)/2;
2462	break;
2463
2464      case SIZE_GET_STR:
2465	{
2466	  mp_limb_t ff = GMP_NUMB_MAX;
2467	  MPN_SIZEINBASE (d[i].size, &ff - (size-1), size, base);
2468	}
2469	break;
2470
2471      default:
2472	printf ("Unrecognised dst_size type %d\n", tr->dst_size[i]);
2473	abort ();
2474      }
2475    }
2476
2477  /* establish e->d[].p destinations */
2478  for (i = 0; i < NUM_DESTS; i++)
2479    {
2480      mp_size_t  offset = 0;
2481
2482      /* possible room for overlapping sources */
2483      for (j = 0; j < numberof (overlap->s); j++)
2484	if (overlap->s[j] == i)
2485	  offset = MAX (offset, s[j].align);
2486
2487      if (d[i].high)
2488	{
2489	  if (tr->dst_bytes[i])
2490	    {
2491	      e->d[i].p = (mp_ptr)
2492		((char *) (e->d[i].region.ptr + e->d[i].region.size)
2493		 - d[i].size - d[i].align);
2494	    }
2495	  else
2496	    {
2497	      e->d[i].p = e->d[i].region.ptr + e->d[i].region.size
2498		- d[i].size - d[i].align;
2499	      if (tr->overlap == OVERLAP_LOW_TO_HIGH)
2500		e->d[i].p -= offset;
2501	    }
2502	}
2503      else
2504	{
2505	  if (tr->dst_bytes[i])
2506	    {
2507	      e->d[i].p = (mp_ptr) ((char *) e->d[i].region.ptr + d[i].align);
2508	    }
2509	  else
2510	    {
2511	      e->d[i].p = e->d[i].region.ptr + d[i].align;
2512	      if (tr->overlap == OVERLAP_HIGH_TO_LOW)
2513		e->d[i].p += offset;
2514	    }
2515	}
2516    }
2517
2518  /* establish e->s[].p sources */
2519  for (i = 0; i < NUM_SOURCES; i++)
2520    {
2521      int  o = overlap->s[i];
2522      switch (o) {
2523      case -1:
2524	/* no overlap */
2525	e->s[i].p = s[i].p;
2526	break;
2527      case 0:
2528      case 1:
2529	/* overlap with d[o] */
2530	if (tr->overlap == OVERLAP_HIGH_TO_LOW)
2531	  e->s[i].p = e->d[o].p - s[i].align;
2532	else if (tr->overlap == OVERLAP_LOW_TO_HIGH)
2533	  e->s[i].p = e->d[o].p + s[i].align;
2534	else if (tr->size2 == SIZE_FRACTION)
2535	  e->s[i].p = e->d[o].p + size2;
2536	else
2537	  e->s[i].p = e->d[o].p;
2538	break;
2539      default:
2540	abort();
2541	break;
2542      }
2543    }
2544}
2545
2546
2547void
2548validate_fail (void)
2549{
2550  if (tr->reference)
2551    {
2552      trap_location = TRAP_REF;
2553      call (&ref, tr->reference);
2554      trap_location = TRAP_NOWHERE;
2555    }
2556
2557  print_all();
2558  abort();
2559}
2560
2561
2562void
2563try_one (void)
2564{
2565  int  i;
2566
2567  if (option_spinner)
2568    spinner();
2569  spinner_count++;
2570
2571  trap_location = TRAP_SETUPS;
2572
2573  if (tr->divisor == DIVISOR_NORM)
2574    divisor |= GMP_NUMB_HIGHBIT;
2575  if (tr->divisor == DIVISOR_ODD)
2576    divisor |= 1;
2577
2578  for (i = 0; i < NUM_SOURCES; i++)
2579    {
2580      if (s[i].high)
2581	s[i].p = s[i].region.ptr + s[i].region.size - SRC_SIZE(i) - s[i].align;
2582      else
2583	s[i].p = s[i].region.ptr + s[i].align;
2584    }
2585
2586  pointer_setup (&ref);
2587  pointer_setup (&fun);
2588
2589  ref.retval = 0x04152637;
2590  fun.retval = 0x8C9DAEBF;
2591
2592  t_random (multiplier_N, tr->msize);
2593
2594  for (i = 0; i < NUM_SOURCES; i++)
2595    {
2596      if (! tr->src[i])
2597	continue;
2598
2599      mprotect_region (&s[i].region, PROT_READ|PROT_WRITE);
2600      t_random (s[i].p, SRC_SIZE(i));
2601
2602      switch (tr->data) {
2603      case DATA_NON_ZERO:
2604	if (refmpn_zero_p (s[i].p, SRC_SIZE(i)))
2605	  s[i].p[0] = 1;
2606	break;
2607
2608      case DATA_MULTIPLE_DIVISOR:
2609	/* same number of low zero bits as divisor */
2610	s[i].p[0] &= ~ LOW_ZEROS_MASK (divisor);
2611	refmpn_sub_1 (s[i].p, s[i].p, size,
2612		      refmpn_mod_1 (s[i].p, size, divisor));
2613	break;
2614
2615      case DATA_GCD:
2616	/* s[1] no more bits than s[0] */
2617	if (i == 1 && size2 == size)
2618	  s[1].p[size-1] &= refmpn_msbone_mask (s[0].p[size-1]);
2619
2620	/* high limb non-zero */
2621	s[i].p[SRC_SIZE(i)-1] += (s[i].p[SRC_SIZE(i)-1] == 0);
2622
2623	/* odd */
2624	s[i].p[0] |= 1;
2625	break;
2626
2627      case DATA_SRC0_ODD:
2628	if (i == 0)
2629	  s[i].p[0] |= 1;
2630	break;
2631
2632      case DATA_SRC1_ODD:
2633	if (i == 1)
2634	  s[i].p[0] |= 1;
2635	break;
2636
2637      case DATA_SRC1_HIGHBIT:
2638	if (i == 1)
2639	  {
2640	    if (tr->size2)
2641	      s[i].p[size2-1] |= GMP_NUMB_HIGHBIT;
2642	    else
2643	      s[i].p[size-1] |= GMP_NUMB_HIGHBIT;
2644	  }
2645	break;
2646
2647      case DATA_SRC0_HIGHBIT:
2648       if (i == 0)
2649         {
2650           s[i].p[size-1] |= GMP_NUMB_HIGHBIT;
2651         }
2652       break;
2653
2654      case DATA_UDIV_QRNND:
2655	s[i].p[1] %= divisor;
2656	break;
2657      }
2658
2659      mprotect_region (&s[i].region, PROT_READ);
2660    }
2661
2662  for (i = 0; i < NUM_DESTS; i++)
2663    {
2664      if (! tr->dst[i])
2665	continue;
2666
2667      if (tr->dst0_from_src1 && i==0)
2668	{
2669	  mp_size_t  copy = MIN (d[0].size, SRC_SIZE(1));
2670	  mp_size_t  fill = MAX (0, d[0].size - copy);
2671	  MPN_COPY (fun.d[0].p, s[1].region.ptr, copy);
2672	  MPN_COPY (ref.d[0].p, s[1].region.ptr, copy);
2673	  refmpn_fill (fun.d[0].p + copy, fill, DEADVAL);
2674	  refmpn_fill (ref.d[0].p + copy, fill, DEADVAL);
2675	}
2676      else if (tr->dst_bytes[i])
2677	{
2678	  memset (ref.d[i].p, 0xBA, d[i].size);
2679	  memset (fun.d[i].p, 0xBA, d[i].size);
2680	}
2681      else
2682	{
2683	  refmpn_fill (ref.d[i].p, d[i].size, DEADVAL);
2684	  refmpn_fill (fun.d[i].p, d[i].size, DEADVAL);
2685	}
2686    }
2687
2688  for (i = 0; i < NUM_SOURCES; i++)
2689    {
2690      if (! tr->src[i])
2691	continue;
2692
2693      if (ref.s[i].p != s[i].p)
2694	{
2695	  refmpn_copyi (ref.s[i].p, s[i].p, SRC_SIZE(i));
2696	  refmpn_copyi (fun.s[i].p, s[i].p, SRC_SIZE(i));
2697	}
2698    }
2699
2700  if (option_print)
2701    print_all();
2702
2703  if (tr->validate != NULL)
2704    {
2705      trap_location = TRAP_FUN;
2706      call (&fun, choice->function);
2707      trap_location = TRAP_NOWHERE;
2708
2709      if (! CALLING_CONVENTIONS_CHECK ())
2710	{
2711	  print_all();
2712	  abort();
2713	}
2714
2715      (*tr->validate) ();
2716    }
2717  else
2718    {
2719      trap_location = TRAP_REF;
2720      call (&ref, tr->reference);
2721      trap_location = TRAP_FUN;
2722      call (&fun, choice->function);
2723      trap_location = TRAP_NOWHERE;
2724
2725      if (! CALLING_CONVENTIONS_CHECK ())
2726	{
2727	  print_all();
2728	  abort();
2729	}
2730
2731      compare ();
2732    }
2733}
2734
2735
2736#define SIZE_ITERATION                                          \
2737  for (size = MAX3 (option_firstsize,                           \
2738		    choice->minsize,                            \
2739		    (tr->size == SIZE_ALLOW_ZERO) ? 0 : 1);     \
2740       size <= option_lastsize;                                 \
2741       size++)
2742
2743#define SIZE2_FIRST                                     \
2744  (tr->size2 == SIZE_2 ? 2                              \
2745   : tr->size2 == SIZE_FRACTION ? option_firstsize2     \
2746   : tr->size2 ?                                        \
2747   MAX (choice->minsize, (option_firstsize2 != 0        \
2748			  ? option_firstsize2 : 1))     \
2749   : 0)
2750
2751#define SIZE2_LAST                                      \
2752  (tr->size2 == SIZE_2 ? 2                              \
2753   : tr->size2 == SIZE_FRACTION ? FRACTION_COUNT-1      \
2754   : tr->size2 ? size                                   \
2755   : 0)
2756
2757#define SIZE2_ITERATION \
2758  for (size2 = SIZE2_FIRST; size2 <= SIZE2_LAST; size2++)
2759
2760#define ALIGN_COUNT(cond)  ((cond) ? ALIGNMENTS : 1)
2761#define ALIGN_ITERATION(w,n,cond) \
2762  for (w[n].align = 0; w[n].align < ALIGN_COUNT(cond); w[n].align++)
2763
2764#define HIGH_LIMIT(cond)  ((cond) != 0)
2765#define HIGH_COUNT(cond)  (HIGH_LIMIT (cond) + 1)
2766#define HIGH_ITERATION(w,n,cond) \
2767  for (w[n].high = 0; w[n].high <= HIGH_LIMIT(cond); w[n].high++)
2768
2769#define SHIFT_LIMIT                                     \
2770  ((unsigned long) (tr->shift ? GMP_NUMB_BITS -1 : 1))
2771
2772#define SHIFT_ITERATION                                 \
2773  for (shift = 1; shift <= SHIFT_LIMIT; shift++)
2774
2775
2776void
2777try_many (void)
2778{
2779  int   i;
2780
2781  {
2782    unsigned long  total = 1;
2783
2784    total *= option_repetitions;
2785    total *= option_lastsize;
2786    if (tr->size2 == SIZE_FRACTION) total *= FRACTION_COUNT;
2787    else if (tr->size2)             total *= (option_lastsize+1)/2;
2788
2789    total *= SHIFT_LIMIT;
2790    total *= MULTIPLIER_COUNT;
2791    total *= DIVISOR_COUNT;
2792    total *= CARRY_COUNT;
2793    total *= T_RAND_COUNT;
2794
2795    total *= HIGH_COUNT (tr->dst[0]);
2796    total *= HIGH_COUNT (tr->dst[1]);
2797    total *= HIGH_COUNT (tr->src[0]);
2798    total *= HIGH_COUNT (tr->src[1]);
2799
2800    total *= ALIGN_COUNT (tr->dst[0]);
2801    total *= ALIGN_COUNT (tr->dst[1]);
2802    total *= ALIGN_COUNT (tr->src[0]);
2803    total *= ALIGN_COUNT (tr->src[1]);
2804
2805    total *= OVERLAP_COUNT;
2806
2807    printf ("%s %lu\n", choice->name, total);
2808  }
2809
2810  spinner_count = 0;
2811
2812  for (i = 0; i < option_repetitions; i++)
2813    SIZE_ITERATION
2814      SIZE2_ITERATION
2815
2816      SHIFT_ITERATION
2817      MULTIPLIER_ITERATION
2818      DIVISOR_ITERATION
2819      CARRY_ITERATION /* must be after divisor */
2820      T_RAND_ITERATION
2821
2822      HIGH_ITERATION(d,0, tr->dst[0])
2823      HIGH_ITERATION(d,1, tr->dst[1])
2824      HIGH_ITERATION(s,0, tr->src[0])
2825      HIGH_ITERATION(s,1, tr->src[1])
2826
2827      ALIGN_ITERATION(d,0, tr->dst[0])
2828      ALIGN_ITERATION(d,1, tr->dst[1])
2829      ALIGN_ITERATION(s,0, tr->src[0])
2830      ALIGN_ITERATION(s,1, tr->src[1])
2831
2832      OVERLAP_ITERATION
2833      try_one();
2834
2835  printf("\n");
2836}
2837
2838
2839/* Usually print_all() doesn't show much, but it might give a hint as to
2840   where the function was up to when it died. */
2841void
2842trap (int sig)
2843{
2844  const char *name = "noname";
2845
2846  switch (sig) {
2847  case SIGILL:  name = "SIGILL";  break;
2848#ifdef SIGBUS
2849  case SIGBUS:  name = "SIGBUS";  break;
2850#endif
2851  case SIGSEGV: name = "SIGSEGV"; break;
2852  case SIGFPE:  name = "SIGFPE";  break;
2853  }
2854
2855  printf ("\n\nSIGNAL TRAP: %s\n", name);
2856
2857  switch (trap_location) {
2858  case TRAP_REF:
2859    printf ("  in reference function: %s\n", tr->reference_name);
2860    break;
2861  case TRAP_FUN:
2862    printf ("  in test function: %s\n", choice->name);
2863    print_all ();
2864    break;
2865  case TRAP_SETUPS:
2866    printf ("  in parameter setups\n");
2867    print_all ();
2868    break;
2869  default:
2870    printf ("  somewhere unknown\n");
2871    break;
2872  }
2873  exit (1);
2874}
2875
2876
2877void
2878try_init (void)
2879{
2880#if HAVE_GETPAGESIZE
2881  /* Prefer getpagesize() over sysconf(), since on SunOS 4 sysconf() doesn't
2882     know _SC_PAGESIZE. */
2883  pagesize = getpagesize ();
2884#else
2885#if HAVE_SYSCONF
2886  if ((pagesize = sysconf (_SC_PAGESIZE)) == -1)
2887    {
2888      /* According to the linux man page, sysconf doesn't set errno */
2889      fprintf (stderr, "Cannot get sysconf _SC_PAGESIZE\n");
2890      exit (1);
2891    }
2892#else
2893Error, error, cannot get page size
2894#endif
2895#endif
2896
2897  printf ("pagesize is 0x%lX bytes\n", pagesize);
2898
2899  signal (SIGILL,  trap);
2900#ifdef SIGBUS
2901  signal (SIGBUS,  trap);
2902#endif
2903  signal (SIGSEGV, trap);
2904  signal (SIGFPE,  trap);
2905
2906  {
2907    int  i;
2908
2909    for (i = 0; i < NUM_SOURCES; i++)
2910      {
2911	malloc_region (&s[i].region, 2*option_lastsize+ALIGNMENTS-1);
2912	printf ("s[%d] %p to %p (0x%lX bytes)\n",
2913		i, (void *) (s[i].region.ptr),
2914		(void *) (s[i].region.ptr + s[i].region.size),
2915		(long) s[i].region.size * BYTES_PER_MP_LIMB);
2916      }
2917
2918#define INIT_EACH(e,es)                                                 \
2919    for (i = 0; i < NUM_DESTS; i++)                                     \
2920      {                                                                 \
2921	malloc_region (&e.d[i].region, 2*option_lastsize+ALIGNMENTS-1); \
2922	printf ("%s d[%d] %p to %p (0x%lX bytes)\n",                    \
2923		es, i, (void *) (e.d[i].region.ptr),			\
2924		(void *)  (e.d[i].region.ptr + e.d[i].region.size),	\
2925		(long) e.d[i].region.size * BYTES_PER_MP_LIMB);         \
2926      }
2927
2928    INIT_EACH(ref, "ref");
2929    INIT_EACH(fun, "fun");
2930  }
2931}
2932
2933int
2934strmatch_wild (const char *pattern, const char *str)
2935{
2936  size_t  plen, slen;
2937
2938  /* wildcard at start */
2939  if (pattern[0] == '*')
2940    {
2941      pattern++;
2942      plen = strlen (pattern);
2943      slen = strlen (str);
2944      return (plen == 0
2945	      || (slen >= plen && memcmp (pattern, str+slen-plen, plen) == 0));
2946    }
2947
2948  /* wildcard at end */
2949  plen = strlen (pattern);
2950  if (plen >= 1 && pattern[plen-1] == '*')
2951    return (memcmp (pattern, str, plen-1) == 0);
2952
2953  /* no wildcards */
2954  return (strcmp (pattern, str) == 0);
2955}
2956
2957void
2958try_name (const char *name)
2959{
2960  int  found = 0;
2961  int  i;
2962
2963  for (i = 0; i < numberof (choice_array); i++)
2964    {
2965      if (strmatch_wild (name, choice_array[i].name))
2966	{
2967	  choice = &choice_array[i];
2968	  tr = &param[choice->type];
2969	  try_many ();
2970	  found = 1;
2971	}
2972    }
2973
2974  if (!found)
2975    {
2976      printf ("%s unknown\n", name);
2977      /* exit (1); */
2978    }
2979}
2980
2981
2982void
2983usage (const char *prog)
2984{
2985  int  col = 0;
2986  int  i;
2987
2988  printf ("Usage: %s [options] function...\n", prog);
2989  printf ("    -1        use limb data 1,2,3,etc\n");
2990  printf ("    -9        use limb data all 0xFF..FFs\n");
2991  printf ("    -a zeros  use limb data all zeros\n");
2992  printf ("    -a ffs    use limb data all 0xFF..FFs (same as -9)\n");
2993  printf ("    -a 2fd    use data 0x2FFF...FFFD\n");
2994  printf ("    -p        print each case tried (try this if seg faulting)\n");
2995  printf ("    -R        seed random numbers from time()\n");
2996  printf ("    -r reps   set repetitions (default %d)\n", DEFAULT_REPETITIONS);
2997  printf ("    -s size   starting size to test\n");
2998  printf ("    -S size2  starting size2 to test\n");
2999  printf ("    -s s1-s2  range of sizes to test\n");
3000  printf ("    -W        don't show the spinner (use this in gdb)\n");
3001  printf ("    -z        disable mprotect() redzones\n");
3002  printf ("Default data is refmpn_random() and refmpn_random2().\n");
3003  printf ("\n");
3004  printf ("Functions that can be tested:\n");
3005
3006  for (i = 0; i < numberof (choice_array); i++)
3007    {
3008      if (col + 1 + strlen (choice_array[i].name) > 79)
3009	{
3010	  printf ("\n");
3011	  col = 0;
3012	}
3013      printf (" %s", choice_array[i].name);
3014      col += 1 + strlen (choice_array[i].name);
3015    }
3016  printf ("\n");
3017
3018  exit(1);
3019}
3020
3021
3022int
3023main (int argc, char *argv[])
3024{
3025  int  i;
3026
3027  /* unbuffered output */
3028  setbuf (stdout, NULL);
3029  setbuf (stderr, NULL);
3030
3031  /* default trace in hex, and in upper-case so can paste into bc */
3032  mp_trace_base = -16;
3033
3034  param_init ();
3035
3036  {
3037    unsigned long  seed = 123;
3038    int   opt;
3039
3040    while ((opt = getopt(argc, argv, "19a:b:E:pRr:S:s:Wz")) != EOF)
3041      {
3042	switch (opt) {
3043	case '1':
3044	  /* use limb data values 1, 2, 3, ... etc */
3045	  option_data = DATA_SEQ;
3046	  break;
3047	case '9':
3048	  /* use limb data values 0xFFF...FFF always */
3049	  option_data = DATA_FFS;
3050	  break;
3051	case 'a':
3052	  if (strcmp (optarg, "zeros") == 0)     option_data = DATA_ZEROS;
3053	  else if (strcmp (optarg, "seq") == 0)  option_data = DATA_SEQ;
3054	  else if (strcmp (optarg, "ffs") == 0)  option_data = DATA_FFS;
3055	  else if (strcmp (optarg, "2fd") == 0)  option_data = DATA_2FD;
3056	  else
3057	    {
3058	      fprintf (stderr, "unrecognised data option: %s\n", optarg);
3059	      exit (1);
3060	    }
3061	  break;
3062	case 'b':
3063	  mp_trace_base = atoi (optarg);
3064	  break;
3065	case 'E':
3066	  /* re-seed */
3067	  sscanf (optarg, "%lu", &seed);
3068	  printf ("Re-seeding with %lu\n", seed);
3069	  break;
3070	case 'p':
3071	  option_print = 1;
3072	  break;
3073	case 'R':
3074	  /* randomize */
3075	  seed = time (NULL);
3076	  printf ("Seeding with %lu, re-run using \"-E %lu\"\n", seed, seed);
3077	  break;
3078	case 'r':
3079	  option_repetitions = atoi (optarg);
3080	  break;
3081	case 's':
3082	  {
3083	    char  *p;
3084	    option_firstsize = strtol (optarg, 0, 0);
3085	    if ((p = strchr (optarg, '-')) != NULL)
3086	      option_lastsize = strtol (p+1, 0, 0);
3087	  }
3088	  break;
3089	case 'S':
3090	  /* -S <size> sets the starting size for the second of a two size
3091	     routine (like mpn_mul_basecase) */
3092	  option_firstsize2 = strtol (optarg, 0, 0);
3093	  break;
3094	case 'W':
3095	  /* use this when running in the debugger */
3096	  option_spinner = 0;
3097	  break;
3098	case 'z':
3099	  /* disable redzones */
3100	  option_redzones = 0;
3101	  break;
3102	case '?':
3103	  usage (argv[0]);
3104	  break;
3105	}
3106      }
3107
3108    gmp_randinit_default (__gmp_rands);
3109    __gmp_rands_initialized = 1;
3110    gmp_randseed_ui (__gmp_rands, seed);
3111  }
3112
3113  try_init();
3114
3115  if (argc <= optind)
3116    usage (argv[0]);
3117
3118  for (i = optind; i < argc; i++)
3119    try_name (argv[i]);
3120
3121  return 0;
3122}
3123