1/* Include file for internal GNU MP types and definitions.
2
3   THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
4   BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
5
6Copyright 1991-2018 Free Software Foundation, Inc.
7
8This file is part of the GNU MP Library.
9
10The GNU MP Library is free software; you can redistribute it and/or modify
11it under the terms of either:
12
13  * the GNU Lesser General Public License as published by the Free
14    Software Foundation; either version 3 of the License, or (at your
15    option) any later version.
16
17or
18
19  * the GNU General Public License as published by the Free Software
20    Foundation; either version 2 of the License, or (at your option) any
21    later version.
22
23or both in parallel, as here.
24
25The GNU MP Library is distributed in the hope that it will be useful, but
26WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
27or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
28for more details.
29
30You should have received copies of the GNU General Public License and the
31GNU Lesser General Public License along with the GNU MP Library.  If not,
32see https://www.gnu.org/licenses/.  */
33
34
35/* __GMP_DECLSPEC must be given on any global data that will be accessed
36   from outside libgmp, meaning from the test or development programs, or
37   from libgmpxx.  Failing to do this will result in an incorrect address
38   being used for the accesses.  On functions __GMP_DECLSPEC makes calls
39   from outside libgmp more efficient, but they'll still work fine without
40   it.  */
41
42
43#ifndef __GMP_IMPL_H__
44#define __GMP_IMPL_H__
45
46#if defined _CRAY
47#include <intrinsics.h>  /* for _popcnt */
48#endif
49
50/* For INT_MAX, etc. We used to avoid it because of a bug (on solaris,
51   gcc 2.95 under -mcpu=ultrasparc in ABI=32 ends up getting wrong
52   values (the ABI=64 values)), but it should be safe now.
53
54   On Cray vector systems, however, we need the system limits.h since sizes
55   of signed and unsigned types can differ there, depending on compiler
56   options (eg. -hnofastmd), making our SHRT_MAX etc expressions fail.  For
57   reference, int can be 46 or 64 bits, whereas uint is always 64 bits; and
58   short can be 24, 32, 46 or 64 bits, and different for ushort.  */
59
60#include <limits.h>
61
62/* For fat.h and other fat binary stuff.
63   No need for __GMP_ATTRIBUTE_PURE or __GMP_NOTHROW, since functions
64   declared this way are only used to set function pointers in __gmpn_cpuvec,
65   they're not called directly.  */
66#define DECL_add_n(name) \
67  __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)
68#define DECL_addlsh1_n(name) \
69  DECL_add_n (name)
70#define DECL_addlsh2_n(name) \
71  DECL_add_n (name)
72#define DECL_addmul_1(name) \
73  __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
74#define DECL_addmul_2(name) \
75  __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)
76#define DECL_bdiv_dbm1c(name) \
77  __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)
78#define DECL_cnd_add_n(name) \
79  __GMP_DECLSPEC mp_limb_t name (mp_limb_t, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)
80#define DECL_cnd_sub_n(name) \
81  __GMP_DECLSPEC mp_limb_t name (mp_limb_t, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)
82#define DECL_com(name) \
83  __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t)
84#define DECL_copyd(name) \
85  __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t)
86#define DECL_copyi(name) \
87  DECL_copyd (name)
88#define DECL_divexact_1(name) \
89  __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
90#define DECL_divexact_by3c(name) \
91  __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
92#define DECL_divrem_1(name) \
93  __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t)
94#define DECL_gcd_11(name) \
95  __GMP_DECLSPEC mp_limb_t name (mp_limb_t, mp_limb_t)
96#define DECL_lshift(name) \
97  __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, unsigned)
98#define DECL_lshiftc(name) \
99  DECL_lshift (name)
100#define DECL_mod_1(name) \
101  __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t)
102#define DECL_mod_1_1p(name) \
103  __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [])
104#define DECL_mod_1_1p_cps(name) \
105  __GMP_DECLSPEC void name (mp_limb_t cps[], mp_limb_t b)
106#define DECL_mod_1s_2p(name) \
107  DECL_mod_1_1p (name)
108#define DECL_mod_1s_2p_cps(name) \
109  DECL_mod_1_1p_cps (name)
110#define DECL_mod_1s_4p(name) \
111  DECL_mod_1_1p (name)
112#define DECL_mod_1s_4p_cps(name) \
113  DECL_mod_1_1p_cps (name)
114#define DECL_mod_34lsub1(name) \
115  __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t)
116#define DECL_modexact_1c_odd(name) \
117  __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)
118#define DECL_mul_1(name) \
119  DECL_addmul_1 (name)
120#define DECL_mul_basecase(name) \
121  __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)
122#define DECL_mullo_basecase(name) \
123  __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)
124#define DECL_preinv_divrem_1(name) \
125  __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int)
126#define DECL_preinv_mod_1(name) \
127  __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)
128#define DECL_redc_1(name) \
129  __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
130#define DECL_redc_2(name) \
131  __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)
132#define DECL_rshift(name) \
133  DECL_lshift (name)
134#define DECL_sqr_basecase(name) \
135  __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t)
136#define DECL_sub_n(name) \
137  DECL_add_n (name)
138#define DECL_sublsh1_n(name) \
139  DECL_add_n (name)
140#define DECL_submul_1(name) \
141  DECL_addmul_1 (name)
142
143#if ! defined (__GMP_WITHIN_CONFIGURE)
144#include "config.h"
145#include "gmp.h"
146#include "gmp-mparam.h"
147#include "fib_table.h"
148#include "fac_table.h"
149#include "mp_bases.h"
150#if WANT_FAT_BINARY
151#include "fat.h"
152#endif
153#endif
154
155#if HAVE_INTTYPES_H      /* for uint_least32_t */
156# include <inttypes.h>
157#else
158# if HAVE_STDINT_H
159#  include <stdint.h>
160# endif
161#endif
162
163#ifdef __cplusplus
164#include <cstring>  /* for strlen */
165#include <string>   /* for std::string */
166#endif
167
168
169#ifndef WANT_TMP_DEBUG  /* for TMP_ALLOC_LIMBS_2 and others */
170#define WANT_TMP_DEBUG 0
171#endif
172
173/* The following tries to get a good version of alloca.  The tests are
174   adapted from autoconf AC_FUNC_ALLOCA, with a couple of additions.
175   Whether this succeeds is tested by GMP_FUNC_ALLOCA and HAVE_ALLOCA will
176   be setup appropriately.
177
178   ifndef alloca - a cpp define might already exist.
179       glibc <stdlib.h> includes <alloca.h> which uses GCC __builtin_alloca.
180       HP cc +Olibcalls adds a #define of alloca to __builtin_alloca.
181
182   GCC __builtin_alloca - preferred whenever available.
183
184   _AIX pragma - IBM compilers need a #pragma in "each module that needs to
185       use alloca".  Pragma indented to protect pre-ANSI cpp's.  _IBMR2 was
186       used in past versions of GMP, retained still in case it matters.
187
188       The autoconf manual says this pragma needs to be at the start of a C
189       file, apart from comments and preprocessor directives.  Is that true?
190       xlc on aix 4.xxx doesn't seem to mind it being after prototypes etc
191       from gmp.h.
192*/
193
194#ifndef alloca
195# ifdef __GNUC__
196#  define alloca __builtin_alloca
197# else
198#  ifdef __DECC
199#   define alloca(x) __ALLOCA(x)
200#  else
201#   ifdef _MSC_VER
202#    include <malloc.h>
203#    define alloca _alloca
204#   else
205#    if HAVE_ALLOCA_H
206#     include <alloca.h>
207#    else
208#     if defined (_AIX) || defined (_IBMR2)
209 #pragma alloca
210#     else
211#      if !defined (__NetBSD__)
212        char *alloca ();
213#      else
214#       include <stdlib.h>
215#      endif
216#     endif
217#    endif
218#   endif
219#  endif
220# endif
221#endif
222
223
224/* if not provided by gmp-mparam.h */
225#ifndef GMP_LIMB_BYTES
226#define GMP_LIMB_BYTES  SIZEOF_MP_LIMB_T
227#endif
228#ifndef GMP_LIMB_BITS
229#define GMP_LIMB_BITS  (8 * SIZEOF_MP_LIMB_T)
230#endif
231
232#define BITS_PER_ULONG  (8 * SIZEOF_UNSIGNED_LONG)
233
234
235/* gmp_uint_least32_t is an unsigned integer type with at least 32 bits. */
236#if HAVE_UINT_LEAST32_T
237typedef uint_least32_t      gmp_uint_least32_t;
238#else
239#if SIZEOF_UNSIGNED_SHORT >= 4
240typedef unsigned short      gmp_uint_least32_t;
241#else
242#if SIZEOF_UNSIGNED >= 4
243typedef unsigned            gmp_uint_least32_t;
244#else
245typedef unsigned long       gmp_uint_least32_t;
246#endif
247#endif
248#endif
249
250
251/* gmp_intptr_t, for pointer to integer casts */
252#if HAVE_INTPTR_T
253typedef intptr_t            gmp_intptr_t;
254#else /* fallback */
255typedef size_t              gmp_intptr_t;
256#endif
257
258
259/* pre-inverse types for truncating division and modulo */
260typedef struct {mp_limb_t inv32;} gmp_pi1_t;
261typedef struct {mp_limb_t inv21, inv32, inv53;} gmp_pi2_t;
262
263
264/* "const" basically means a function does nothing but examine its arguments
265   and give a return value, it doesn't read or write any memory (neither
266   global nor pointed to by arguments), and has no other side-effects.  This
267   is more restrictive than "pure".  See info node "(gcc)Function
268   Attributes".  __GMP_NO_ATTRIBUTE_CONST_PURE lets tune/common.c etc turn
269   this off when trying to write timing loops.  */
270#if HAVE_ATTRIBUTE_CONST && ! defined (__GMP_NO_ATTRIBUTE_CONST_PURE)
271#define ATTRIBUTE_CONST  __attribute__ ((const))
272#else
273#define ATTRIBUTE_CONST
274#endif
275
276#if HAVE_ATTRIBUTE_NORETURN
277#define ATTRIBUTE_NORETURN  __attribute__ ((noreturn))
278#else
279#define ATTRIBUTE_NORETURN
280#endif
281
282/* "malloc" means a function behaves like malloc in that the pointer it
283   returns doesn't alias anything.  */
284#if HAVE_ATTRIBUTE_MALLOC
285#define ATTRIBUTE_MALLOC  __attribute__ ((malloc))
286#else
287#define ATTRIBUTE_MALLOC
288#endif
289
290
291#if ! HAVE_STRCHR
292#define strchr(s,c)  index(s,c)
293#endif
294
295#if ! HAVE_MEMSET
296#define memset(p, c, n)						\
297  do {								\
298    unsigned char *__memset__p = (unsigned char *) (p);		\
299    int	 __i;							\
300    ASSERT ((n) >= 0);						\
301    for (__i = 0; __i < (n); __i++)				\
302      __memset__p[__i] = (c);					\
303  } while (0)
304#endif
305
306/* va_copy is standard in C99, and gcc provides __va_copy when in strict C89
307   mode.  Falling back to a memcpy will give maximum portability, since it
308   works no matter whether va_list is a pointer, struct or array.  */
309#if ! defined (va_copy) && defined (__va_copy)
310#define va_copy(dst,src)  __va_copy(dst,src)
311#endif
312#if ! defined (va_copy)
313#define va_copy(dst,src) \
314  do { memcpy (&(dst), &(src), sizeof (va_list)); } while (0)
315#endif
316
317
318/* HAVE_HOST_CPU_alpha_CIX is 1 on an alpha with the CIX instructions
319   (ie. ctlz, ctpop, cttz).  */
320#if HAVE_HOST_CPU_alphaev67 || HAVE_HOST_CPU_alphaev68  \
321  || HAVE_HOST_CPU_alphaev7
322#define HAVE_HOST_CPU_alpha_CIX 1
323#endif
324
325
326#if defined (__cplusplus)
327extern "C" {
328#endif
329
330
331/* Usage: TMP_DECL;
332	  TMP_MARK;
333	  ptr = TMP_ALLOC (bytes);
334	  TMP_FREE;
335
336   Small allocations should use TMP_SALLOC, big allocations should use
337   TMP_BALLOC.  Allocations that might be small or big should use TMP_ALLOC.
338
339   Functions that use just TMP_SALLOC should use TMP_SDECL, TMP_SMARK, and
340   TMP_SFREE.
341
342   TMP_DECL just declares a variable, but might be empty and so must be last
343   in a list of variables.  TMP_MARK must be done before any TMP_ALLOC.
344   TMP_ALLOC(0) is not allowed.  TMP_FREE doesn't need to be done if a
345   TMP_MARK was made, but then no TMP_ALLOCs.  */
346
347/* The alignment in bytes, used for TMP_ALLOCed blocks, when alloca or
348   __gmp_allocate_func doesn't already determine it.  */
349union tmp_align_t {
350  mp_limb_t  l;
351  double     d;
352  char       *p;
353};
354#define __TMP_ALIGN  sizeof (union tmp_align_t)
355
356/* Return "a" rounded upwards to a multiple of "m", if it isn't already.
357   "a" must be an unsigned type.
358   This is designed for use with a compile-time constant "m".
359   The POW2 case is expected to be usual, and gcc 3.0 and up recognises
360   "(-(8*n))%8" or the like is always zero, which means the rounding up in
361   the WANT_TMP_NOTREENTRANT version of TMP_ALLOC below will be a noop.  */
362#define ROUND_UP_MULTIPLE(a,m)          \
363  (POW2_P(m) ? (a) + (-(a))%(m)         \
364   : (a)+(m)-1 - (((a)+(m)-1) % (m)))
365
366#if defined (WANT_TMP_ALLOCA) || defined (WANT_TMP_REENTRANT)
367struct tmp_reentrant_t {
368  struct tmp_reentrant_t  *next;
369  size_t		  size;	  /* bytes, including header */
370};
371__GMP_DECLSPEC void *__gmp_tmp_reentrant_alloc (struct tmp_reentrant_t **, size_t) ATTRIBUTE_MALLOC;
372__GMP_DECLSPEC void  __gmp_tmp_reentrant_free (struct tmp_reentrant_t *);
373#endif
374
375#if WANT_TMP_ALLOCA
376#define TMP_SDECL
377#define TMP_DECL		struct tmp_reentrant_t *__tmp_marker
378#define TMP_SMARK
379#define TMP_MARK		__tmp_marker = 0
380#define TMP_SALLOC(n)		alloca(n)
381#define TMP_BALLOC(n)		__gmp_tmp_reentrant_alloc (&__tmp_marker, n)
382/* The peculiar stack allocation limit here is chosen for efficient asm.  */
383#define TMP_ALLOC(n)							\
384  (LIKELY ((n) <= 0x7f00) ? TMP_SALLOC(n) : TMP_BALLOC(n))
385#define TMP_SFREE
386#define TMP_FREE							\
387  do {									\
388    if (UNLIKELY (__tmp_marker != 0))					\
389      __gmp_tmp_reentrant_free (__tmp_marker);				\
390  } while (0)
391#endif
392
393#if WANT_TMP_REENTRANT
394#define TMP_SDECL		TMP_DECL
395#define TMP_DECL		struct tmp_reentrant_t *__tmp_marker
396#define TMP_SMARK		TMP_MARK
397#define TMP_MARK		__tmp_marker = 0
398#define TMP_SALLOC(n)		TMP_ALLOC(n)
399#define TMP_BALLOC(n)		TMP_ALLOC(n)
400#define TMP_ALLOC(n)		__gmp_tmp_reentrant_alloc (&__tmp_marker, n)
401#define TMP_SFREE		TMP_FREE
402#define TMP_FREE		__gmp_tmp_reentrant_free (__tmp_marker)
403#endif
404
405#if WANT_TMP_NOTREENTRANT
406struct tmp_marker
407{
408  struct tmp_stack *which_chunk;
409  void *alloc_point;
410};
411__GMP_DECLSPEC void *__gmp_tmp_alloc (unsigned long) ATTRIBUTE_MALLOC;
412__GMP_DECLSPEC void __gmp_tmp_mark (struct tmp_marker *);
413__GMP_DECLSPEC void __gmp_tmp_free (struct tmp_marker *);
414#define TMP_SDECL		TMP_DECL
415#define TMP_DECL		struct tmp_marker __tmp_marker
416#define TMP_SMARK		TMP_MARK
417#define TMP_MARK		__gmp_tmp_mark (&__tmp_marker)
418#define TMP_SALLOC(n)		TMP_ALLOC(n)
419#define TMP_BALLOC(n)		TMP_ALLOC(n)
420#define TMP_ALLOC(n)							\
421  __gmp_tmp_alloc (ROUND_UP_MULTIPLE ((unsigned long) (n), __TMP_ALIGN))
422#define TMP_SFREE		TMP_FREE
423#define TMP_FREE		__gmp_tmp_free (&__tmp_marker)
424#endif
425
426#if WANT_TMP_DEBUG
427/* See tal-debug.c for some comments. */
428struct tmp_debug_t {
429  struct tmp_debug_entry_t  *list;
430  const char                *file;
431  int                       line;
432};
433struct tmp_debug_entry_t {
434  struct tmp_debug_entry_t  *next;
435  void                      *block;
436  size_t                    size;
437};
438__GMP_DECLSPEC void  __gmp_tmp_debug_mark (const char *, int, struct tmp_debug_t **,
439					   struct tmp_debug_t *,
440					   const char *, const char *);
441__GMP_DECLSPEC void *__gmp_tmp_debug_alloc (const char *, int, int,
442					    struct tmp_debug_t **, const char *,
443					    size_t) ATTRIBUTE_MALLOC;
444__GMP_DECLSPEC void  __gmp_tmp_debug_free (const char *, int, int,
445					   struct tmp_debug_t **,
446					   const char *, const char *);
447#define TMP_SDECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
448#define TMP_DECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
449#define TMP_SMARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
450#define TMP_MARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
451#define TMP_SFREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker")
452#define TMP_FREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker")
453/* The marker variable is designed to provoke an uninitialized variable
454   warning from the compiler if TMP_FREE is used without a TMP_MARK.
455   __tmp_marker_inscope does the same for TMP_ALLOC.  Runtime tests pick
456   these things up too.  */
457#define TMP_DECL_NAME(marker, marker_name)				\
458  int marker;								\
459  int __tmp_marker_inscope;						\
460  const char *__tmp_marker_name = marker_name;				\
461  struct tmp_debug_t  __tmp_marker_struct;				\
462  /* don't demand NULL, just cast a zero */				\
463  struct tmp_debug_t  *__tmp_marker = (struct tmp_debug_t *) 0
464#define TMP_MARK_NAME(marker, marker_name)				\
465  do {									\
466    marker = 1;								\
467    __tmp_marker_inscope = 1;						\
468    __gmp_tmp_debug_mark  (ASSERT_FILE, ASSERT_LINE,			\
469			   &__tmp_marker, &__tmp_marker_struct,		\
470			   __tmp_marker_name, marker_name);		\
471  } while (0)
472#define TMP_SALLOC(n)		TMP_ALLOC(n)
473#define TMP_BALLOC(n)		TMP_ALLOC(n)
474#define TMP_ALLOC(size)							\
475  __gmp_tmp_debug_alloc (ASSERT_FILE, ASSERT_LINE,			\
476			 __tmp_marker_inscope,				\
477			 &__tmp_marker, __tmp_marker_name, size)
478#define TMP_FREE_NAME(marker, marker_name)				\
479  do {									\
480    __gmp_tmp_debug_free  (ASSERT_FILE, ASSERT_LINE,			\
481			   marker, &__tmp_marker,			\
482			   __tmp_marker_name, marker_name);		\
483  } while (0)
484#endif /* WANT_TMP_DEBUG */
485
486
487/* Allocating various types. */
488#define TMP_ALLOC_TYPE(n,type)  ((type *) TMP_ALLOC ((n) * sizeof (type)))
489#define TMP_SALLOC_TYPE(n,type) ((type *) TMP_SALLOC ((n) * sizeof (type)))
490#define TMP_BALLOC_TYPE(n,type) ((type *) TMP_BALLOC ((n) * sizeof (type)))
491#define TMP_ALLOC_LIMBS(n)      TMP_ALLOC_TYPE(n,mp_limb_t)
492#define TMP_SALLOC_LIMBS(n)     TMP_SALLOC_TYPE(n,mp_limb_t)
493#define TMP_BALLOC_LIMBS(n)     TMP_BALLOC_TYPE(n,mp_limb_t)
494#define TMP_ALLOC_MP_PTRS(n)    TMP_ALLOC_TYPE(n,mp_ptr)
495#define TMP_SALLOC_MP_PTRS(n)   TMP_SALLOC_TYPE(n,mp_ptr)
496#define TMP_BALLOC_MP_PTRS(n)   TMP_BALLOC_TYPE(n,mp_ptr)
497
498/* It's more efficient to allocate one block than many.  This is certainly
499   true of the malloc methods, but it can even be true of alloca if that
500   involves copying a chunk of stack (various RISCs), or a call to a stack
501   bounds check (mingw).  In any case, when debugging keep separate blocks
502   so a redzoning malloc debugger can protect each individually.  */
503#define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize)				\
504  do {									\
505    if (WANT_TMP_DEBUG)							\
506      {									\
507	(xp) = TMP_ALLOC_LIMBS (xsize);					\
508	(yp) = TMP_ALLOC_LIMBS (ysize);					\
509      }									\
510    else								\
511      {									\
512	(xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize));			\
513	(yp) = (xp) + (xsize);						\
514      }									\
515  } while (0)
516#define TMP_ALLOC_LIMBS_3(xp,xsize, yp,ysize, zp,zsize)			\
517  do {									\
518    if (WANT_TMP_DEBUG)							\
519      {									\
520	(xp) = TMP_ALLOC_LIMBS (xsize);					\
521	(yp) = TMP_ALLOC_LIMBS (ysize);					\
522	(zp) = TMP_ALLOC_LIMBS (zsize);					\
523      }									\
524    else								\
525      {									\
526	(xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize) + (zsize));		\
527	(yp) = (xp) + (xsize);						\
528	(zp) = (yp) + (ysize);						\
529      }									\
530  } while (0)
531
532/* From gmp.h, nicer names for internal use. */
533#define CRAY_Pragma(str)               __GMP_CRAY_Pragma(str)
534#define MPN_CMP(result, xp, yp, size)  __GMPN_CMP(result, xp, yp, size)
535#define LIKELY(cond)                   __GMP_LIKELY(cond)
536#define UNLIKELY(cond)                 __GMP_UNLIKELY(cond)
537
538#define ABS(x) ((x) >= 0 ? (x) : -(x))
539#define NEG_CAST(T,x) (- (__GMP_CAST (T, (x) + 1) - 1))
540#define ABS_CAST(T,x) ((x) >= 0 ? __GMP_CAST (T, x) : NEG_CAST (T,x))
541#undef MIN
542#define MIN(l,o) ((l) < (o) ? (l) : (o))
543#undef MAX
544#define MAX(h,i) ((h) > (i) ? (h) : (i))
545#define numberof(x)  (sizeof (x) / sizeof ((x)[0]))
546
547/* Field access macros.  */
548#define SIZ(x) ((x)->_mp_size)
549#define ABSIZ(x) ABS (SIZ (x))
550#define PTR(x) ((x)->_mp_d)
551#define EXP(x) ((x)->_mp_exp)
552#define PREC(x) ((x)->_mp_prec)
553#define ALLOC(x) ((x)->_mp_alloc)
554#define NUM(x) mpq_numref(x)
555#define DEN(x) mpq_denref(x)
556
557/* n-1 inverts any low zeros and the lowest one bit.  If n&(n-1) leaves zero
558   then that lowest one bit must have been the only bit set.  n==0 will
559   return true though, so avoid that.  */
560#define POW2_P(n)  (((n) & ((n) - 1)) == 0)
561
562/* This is intended for constant THRESHOLDs only, where the compiler
563   can completely fold the result.  */
564#define LOG2C(n) \
565 (((n) >=    0x1) + ((n) >=    0x2) + ((n) >=    0x4) + ((n) >=    0x8) + \
566  ((n) >=   0x10) + ((n) >=   0x20) + ((n) >=   0x40) + ((n) >=   0x80) + \
567  ((n) >=  0x100) + ((n) >=  0x200) + ((n) >=  0x400) + ((n) >=  0x800) + \
568  ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000))
569
570#define MP_LIMB_T_MAX      (~ (mp_limb_t) 0)
571
572/* Must cast ULONG_MAX etc to unsigned long etc, since they might not be
573   unsigned on a K&R compiler.  In particular the HP-UX 10 bundled K&R cc
574   treats the plain decimal values in <limits.h> as signed.  */
575#define ULONG_HIGHBIT      (ULONG_MAX ^ ((unsigned long) ULONG_MAX >> 1))
576#define UINT_HIGHBIT       (UINT_MAX ^ ((unsigned) UINT_MAX >> 1))
577#define USHRT_HIGHBIT      (USHRT_MAX ^ ((unsigned short) USHRT_MAX >> 1))
578#define GMP_LIMB_HIGHBIT  (MP_LIMB_T_MAX ^ (MP_LIMB_T_MAX >> 1))
579
580#if __GMP_MP_SIZE_T_INT
581#define MP_SIZE_T_MAX      INT_MAX
582#define MP_SIZE_T_MIN      INT_MIN
583#else
584#define MP_SIZE_T_MAX      LONG_MAX
585#define MP_SIZE_T_MIN      LONG_MIN
586#endif
587
588/* mp_exp_t is the same as mp_size_t */
589#define MP_EXP_T_MAX   MP_SIZE_T_MAX
590#define MP_EXP_T_MIN   MP_SIZE_T_MIN
591
592#define LONG_HIGHBIT       LONG_MIN
593#define INT_HIGHBIT        INT_MIN
594#define SHRT_HIGHBIT       SHRT_MIN
595
596
597#define GMP_NUMB_HIGHBIT  (CNST_LIMB(1) << (GMP_NUMB_BITS-1))
598
599#if GMP_NAIL_BITS == 0
600#define GMP_NAIL_LOWBIT   CNST_LIMB(0)
601#else
602#define GMP_NAIL_LOWBIT   (CNST_LIMB(1) << GMP_NUMB_BITS)
603#endif
604
605#if GMP_NAIL_BITS != 0
606/* Set various *_THRESHOLD values to be used for nails.  Thus we avoid using
607   code that has not yet been qualified.  */
608
609#undef  DC_DIV_QR_THRESHOLD
610#define DC_DIV_QR_THRESHOLD              50
611
612#undef DIVREM_1_NORM_THRESHOLD
613#undef DIVREM_1_UNNORM_THRESHOLD
614#undef MOD_1_NORM_THRESHOLD
615#undef MOD_1_UNNORM_THRESHOLD
616#undef USE_PREINV_DIVREM_1
617#undef DIVREM_2_THRESHOLD
618#undef DIVEXACT_1_THRESHOLD
619#define DIVREM_1_NORM_THRESHOLD           MP_SIZE_T_MAX  /* no preinv */
620#define DIVREM_1_UNNORM_THRESHOLD         MP_SIZE_T_MAX  /* no preinv */
621#define MOD_1_NORM_THRESHOLD              MP_SIZE_T_MAX  /* no preinv */
622#define MOD_1_UNNORM_THRESHOLD            MP_SIZE_T_MAX  /* no preinv */
623#define USE_PREINV_DIVREM_1               0  /* no preinv */
624#define DIVREM_2_THRESHOLD                MP_SIZE_T_MAX  /* no preinv */
625
626/* mpn/generic/mul_fft.c is not nails-capable. */
627#undef  MUL_FFT_THRESHOLD
628#undef  SQR_FFT_THRESHOLD
629#define MUL_FFT_THRESHOLD                MP_SIZE_T_MAX
630#define SQR_FFT_THRESHOLD                MP_SIZE_T_MAX
631#endif
632
633/* Swap macros. */
634
635#define MP_LIMB_T_SWAP(x, y)						\
636  do {									\
637    mp_limb_t __mp_limb_t_swap__tmp = (x);				\
638    (x) = (y);								\
639    (y) = __mp_limb_t_swap__tmp;					\
640  } while (0)
641#define MP_SIZE_T_SWAP(x, y)						\
642  do {									\
643    mp_size_t __mp_size_t_swap__tmp = (x);				\
644    (x) = (y);								\
645    (y) = __mp_size_t_swap__tmp;					\
646  } while (0)
647
648#define MP_PTR_SWAP(x, y)						\
649  do {									\
650    mp_ptr __mp_ptr_swap__tmp = (x);					\
651    (x) = (y);								\
652    (y) = __mp_ptr_swap__tmp;						\
653  } while (0)
654#define MP_SRCPTR_SWAP(x, y)						\
655  do {									\
656    mp_srcptr __mp_srcptr_swap__tmp = (x);				\
657    (x) = (y);								\
658    (y) = __mp_srcptr_swap__tmp;					\
659  } while (0)
660
661#define MPN_PTR_SWAP(xp,xs, yp,ys)					\
662  do {									\
663    MP_PTR_SWAP (xp, yp);						\
664    MP_SIZE_T_SWAP (xs, ys);						\
665  } while(0)
666#define MPN_SRCPTR_SWAP(xp,xs, yp,ys)					\
667  do {									\
668    MP_SRCPTR_SWAP (xp, yp);						\
669    MP_SIZE_T_SWAP (xs, ys);						\
670  } while(0)
671
672#define MPZ_PTR_SWAP(x, y)						\
673  do {									\
674    mpz_ptr __mpz_ptr_swap__tmp = (x);					\
675    (x) = (y);								\
676    (y) = __mpz_ptr_swap__tmp;						\
677  } while (0)
678#define MPZ_SRCPTR_SWAP(x, y)						\
679  do {									\
680    mpz_srcptr __mpz_srcptr_swap__tmp = (x);				\
681    (x) = (y);								\
682    (y) = __mpz_srcptr_swap__tmp;					\
683  } while (0)
684
685#define MPQ_PTR_SWAP(x, y)						\
686  do {                                                                  \
687    mpq_ptr __mpq_ptr_swap__tmp = (x);					\
688    (x) = (y);                                                          \
689    (y) = __mpq_ptr_swap__tmp;						\
690  } while (0)
691#define MPQ_SRCPTR_SWAP(x, y)                                           \
692  do {                                                                  \
693    mpq_srcptr __mpq_srcptr_swap__tmp = (x);                            \
694    (x) = (y);                                                          \
695    (y) = __mpq_srcptr_swap__tmp;                                       \
696  } while (0)
697
698
699/* Enhancement: __gmp_allocate_func could have "__attribute__ ((malloc))",
700   but current gcc (3.0) doesn't seem to support that.  */
701__GMP_DECLSPEC extern void * (*__gmp_allocate_func) (size_t);
702__GMP_DECLSPEC extern void * (*__gmp_reallocate_func) (void *, size_t, size_t);
703__GMP_DECLSPEC extern void   (*__gmp_free_func) (void *, size_t);
704
705__GMP_DECLSPEC void *__gmp_default_allocate (size_t);
706__GMP_DECLSPEC void *__gmp_default_reallocate (void *, size_t, size_t);
707__GMP_DECLSPEC void __gmp_default_free (void *, size_t);
708
709#define __GMP_ALLOCATE_FUNC_TYPE(n,type) \
710  ((type *) (*__gmp_allocate_func) ((n) * sizeof (type)))
711#define __GMP_ALLOCATE_FUNC_LIMBS(n)   __GMP_ALLOCATE_FUNC_TYPE (n, mp_limb_t)
712
713#define __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, type)		\
714  ((type *) (*__gmp_reallocate_func)					\
715   (p, (old_size) * sizeof (type), (new_size) * sizeof (type)))
716#define __GMP_REALLOCATE_FUNC_LIMBS(p, old_size, new_size)		\
717  __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, mp_limb_t)
718
719#define __GMP_FREE_FUNC_TYPE(p,n,type) (*__gmp_free_func) (p, (n) * sizeof (type))
720#define __GMP_FREE_FUNC_LIMBS(p,n)     __GMP_FREE_FUNC_TYPE (p, n, mp_limb_t)
721
722#define __GMP_REALLOCATE_FUNC_MAYBE(ptr, oldsize, newsize)		\
723  do {									\
724    if ((oldsize) != (newsize))						\
725      (ptr) = (*__gmp_reallocate_func) (ptr, oldsize, newsize);		\
726  } while (0)
727
728#define __GMP_REALLOCATE_FUNC_MAYBE_TYPE(ptr, oldsize, newsize, type)	\
729  do {									\
730    if ((oldsize) != (newsize))						\
731      (ptr) = (type *) (*__gmp_reallocate_func)				\
732	(ptr, (oldsize) * sizeof (type), (newsize) * sizeof (type));	\
733  } while (0)
734
735
736/* Dummy for non-gcc, code involving it will go dead. */
737#if ! defined (__GNUC__) || __GNUC__ < 2
738#define __builtin_constant_p(x)   0
739#endif
740
741
742/* In gcc 2.96 and up on i386, tail calls are optimized to jumps if the
743   stack usage is compatible.  __attribute__ ((regparm (N))) helps by
744   putting leading parameters in registers, avoiding extra stack.
745
746   regparm cannot be used with calls going through the PLT, because the
747   binding code there may clobber the registers (%eax, %edx, %ecx) used for
748   the regparm parameters.  Calls to local (ie. static) functions could
749   still use this, if we cared to differentiate locals and globals.
750
751   On athlon-unknown-freebsd4.9 with gcc 3.3.3, regparm cannot be used with
752   -p or -pg profiling, since that version of gcc doesn't realize the
753   .mcount calls will clobber the parameter registers.  Other systems are
754   ok, like debian with glibc 2.3.2 (mcount doesn't clobber), but we don't
755   bother to try to detect this.  regparm is only an optimization so we just
756   disable it when profiling (profiling being a slowdown anyway).  */
757
758#if HAVE_HOST_CPU_FAMILY_x86 && __GMP_GNUC_PREREQ (2,96) && ! defined (PIC) \
759  && ! WANT_PROFILING_PROF && ! WANT_PROFILING_GPROF
760#define USE_LEADING_REGPARM 1
761#else
762#define USE_LEADING_REGPARM 0
763#endif
764
765/* Macros for altering parameter order according to regparm usage. */
766#if USE_LEADING_REGPARM
767#define REGPARM_2_1(a,b,x)    x,a,b
768#define REGPARM_3_1(a,b,c,x)  x,a,b,c
769#define REGPARM_ATTR(n) __attribute__ ((regparm (n)))
770#else
771#define REGPARM_2_1(a,b,x)    a,b,x
772#define REGPARM_3_1(a,b,c,x)  a,b,c,x
773#define REGPARM_ATTR(n)
774#endif
775
776
777/* ASM_L gives a local label for a gcc asm block, for use when temporary
778   local labels like "1:" might not be available, which is the case for
779   instance on the x86s (the SCO assembler doesn't support them).
780
781   The label generated is made unique by including "%=" which is a unique
782   number for each insn.  This ensures the same name can be used in multiple
783   asm blocks, perhaps via a macro.  Since jumps between asm blocks are not
784   allowed there's no need for a label to be usable outside a single
785   block.  */
786
787#define ASM_L(name)  LSYM_PREFIX "asm_%=_" #name
788
789
790#if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86
791#if 0
792/* FIXME: Check that these actually improve things.
793   FIXME: Need a cld after each std.
794   FIXME: Can't have inputs in clobbered registers, must describe them as
795   dummy outputs, and add volatile. */
796#define MPN_COPY_INCR(DST, SRC, N)					\
797  __asm__ ("cld\n\trep\n\tmovsl" : :					\
798	   "D" (DST), "S" (SRC), "c" (N) :				\
799	   "cx", "di", "si", "memory")
800#define MPN_COPY_DECR(DST, SRC, N)					\
801  __asm__ ("std\n\trep\n\tmovsl" : :					\
802	   "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) :	\
803	   "cx", "di", "si", "memory")
804#endif
805#endif
806
807
808__GMP_DECLSPEC void __gmpz_aorsmul_1 (REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_limb_t, mp_size_t)) REGPARM_ATTR(1);
809#define mpz_aorsmul_1(w,u,v,sub)  __gmpz_aorsmul_1 (REGPARM_3_1 (w, u, v, sub))
810
811#define mpz_n_pow_ui __gmpz_n_pow_ui
812__GMP_DECLSPEC void    mpz_n_pow_ui (mpz_ptr, mp_srcptr, mp_size_t, unsigned long);
813
814
815#define mpn_addmul_1c __MPN(addmul_1c)
816__GMP_DECLSPEC mp_limb_t mpn_addmul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
817
818#ifndef mpn_addmul_2  /* if not done with cpuvec in a fat binary */
819#define mpn_addmul_2 __MPN(addmul_2)
820__GMP_DECLSPEC mp_limb_t mpn_addmul_2 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
821#endif
822
823#define mpn_addmul_3 __MPN(addmul_3)
824__GMP_DECLSPEC mp_limb_t mpn_addmul_3 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
825
826#define mpn_addmul_4 __MPN(addmul_4)
827__GMP_DECLSPEC mp_limb_t mpn_addmul_4 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
828
829#define mpn_addmul_5 __MPN(addmul_5)
830__GMP_DECLSPEC mp_limb_t mpn_addmul_5 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
831
832#define mpn_addmul_6 __MPN(addmul_6)
833__GMP_DECLSPEC mp_limb_t mpn_addmul_6 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
834
835#define mpn_addmul_7 __MPN(addmul_7)
836__GMP_DECLSPEC mp_limb_t mpn_addmul_7 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
837
838#define mpn_addmul_8 __MPN(addmul_8)
839__GMP_DECLSPEC mp_limb_t mpn_addmul_8 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
840
841/* Alternative entry point in mpn_addmul_2 for the benefit of mpn_sqr_basecase.  */
842#define mpn_addmul_2s __MPN(addmul_2s)
843__GMP_DECLSPEC mp_limb_t mpn_addmul_2s (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
844
845/* Override mpn_addlsh1_n, mpn_addlsh2_n, mpn_sublsh1_n, etc with mpn_addlsh_n,
846   etc when !HAVE_NATIVE the former but HAVE_NATIVE_ the latter.  Similarly,
847   override foo_ip1 functions with foo.  We then lie and say these macros
848   represent native functions, but leave a trace by using the value 2 rather
849   than 1.  */
850
851#if HAVE_NATIVE_mpn_addlsh_n && ! HAVE_NATIVE_mpn_addlsh1_n
852#define mpn_addlsh1_n(a,b,c,d)          mpn_addlsh_n(a,b,c,d,1)
853#define HAVE_NATIVE_mpn_addlsh1_n       2
854#endif
855
856#if HAVE_NATIVE_mpn_addlsh_nc && ! HAVE_NATIVE_mpn_addlsh1_nc
857#define mpn_addlsh1_nc(a,b,c,d,x)       mpn_addlsh_nc(a,b,c,d,1,x)
858#define HAVE_NATIVE_mpn_addlsh1_nc      2
859#endif
860
861#if HAVE_NATIVE_mpn_addlsh1_n && ! HAVE_NATIVE_mpn_addlsh1_n_ip1
862#define mpn_addlsh1_n_ip1(a,b,n)        mpn_addlsh1_n(a,a,b,n)
863#define HAVE_NATIVE_mpn_addlsh1_n_ip1   2
864#endif
865
866#if HAVE_NATIVE_mpn_addlsh1_nc && ! HAVE_NATIVE_mpn_addlsh1_nc_ip1
867#define mpn_addlsh1_nc_ip1(a,b,n,c)     mpn_addlsh1_nc(a,a,b,n,c)
868#define HAVE_NATIVE_mpn_addlsh1_nc_ip1  2
869#endif
870
871#if HAVE_NATIVE_mpn_addlsh_n && ! HAVE_NATIVE_mpn_addlsh2_n
872#define mpn_addlsh2_n(a,b,c,d)          mpn_addlsh_n(a,b,c,d,2)
873#define HAVE_NATIVE_mpn_addlsh2_n       2
874#endif
875
876#if HAVE_NATIVE_mpn_addlsh_nc && ! HAVE_NATIVE_mpn_addlsh2_nc
877#define mpn_addlsh2_nc(a,b,c,d,x)       mpn_addlsh_nc(a,b,c,d,2,x)
878#define HAVE_NATIVE_mpn_addlsh2_nc      2
879#endif
880
881#if HAVE_NATIVE_mpn_addlsh2_n && ! HAVE_NATIVE_mpn_addlsh2_n_ip1
882#define mpn_addlsh2_n_ip1(a,b,n)        mpn_addlsh2_n(a,a,b,n)
883#define HAVE_NATIVE_mpn_addlsh2_n_ip1   2
884#endif
885
886#if HAVE_NATIVE_mpn_addlsh2_nc && ! HAVE_NATIVE_mpn_addlsh2_nc_ip1
887#define mpn_addlsh2_nc_ip1(a,b,n,c)     mpn_addlsh2_nc(a,a,b,n,c)
888#define HAVE_NATIVE_mpn_addlsh2_nc_ip1  2
889#endif
890
891#if HAVE_NATIVE_mpn_sublsh_n && ! HAVE_NATIVE_mpn_sublsh1_n
892#define mpn_sublsh1_n(a,b,c,d)          mpn_sublsh_n(a,b,c,d,1)
893#define HAVE_NATIVE_mpn_sublsh1_n       2
894#endif
895
896#if HAVE_NATIVE_mpn_sublsh_nc && ! HAVE_NATIVE_mpn_sublsh1_nc
897#define mpn_sublsh1_nc(a,b,c,d,x)       mpn_sublsh_nc(a,b,c,d,1,x)
898#define HAVE_NATIVE_mpn_sublsh1_nc      2
899#endif
900
901#if HAVE_NATIVE_mpn_sublsh1_n && ! HAVE_NATIVE_mpn_sublsh1_n_ip1
902#define mpn_sublsh1_n_ip1(a,b,n)        mpn_sublsh1_n(a,a,b,n)
903#define HAVE_NATIVE_mpn_sublsh1_n_ip1   2
904#endif
905
906#if HAVE_NATIVE_mpn_sublsh1_nc && ! HAVE_NATIVE_mpn_sublsh1_nc_ip1
907#define mpn_sublsh1_nc_ip1(a,b,n,c)     mpn_sublsh1_nc(a,a,b,n,c)
908#define HAVE_NATIVE_mpn_sublsh1_nc_ip1  2
909#endif
910
911#if HAVE_NATIVE_mpn_sublsh_n && ! HAVE_NATIVE_mpn_sublsh2_n
912#define mpn_sublsh2_n(a,b,c,d)          mpn_sublsh_n(a,b,c,d,2)
913#define HAVE_NATIVE_mpn_sublsh2_n       2
914#endif
915
916#if HAVE_NATIVE_mpn_sublsh_nc && ! HAVE_NATIVE_mpn_sublsh2_nc
917#define mpn_sublsh2_nc(a,b,c,d,x)       mpn_sublsh_nc(a,b,c,d,2,x)
918#define HAVE_NATIVE_mpn_sublsh2_nc      2
919#endif
920
921#if HAVE_NATIVE_mpn_sublsh2_n && ! HAVE_NATIVE_mpn_sublsh2_n_ip1
922#define mpn_sublsh2_n_ip1(a,b,n)        mpn_sublsh2_n(a,a,b,n)
923#define HAVE_NATIVE_mpn_sublsh2_n_ip1   2
924#endif
925
926#if HAVE_NATIVE_mpn_sublsh2_nc && ! HAVE_NATIVE_mpn_sublsh2_nc_ip1
927#define mpn_sublsh2_nc_ip1(a,b,n,c)     mpn_sublsh2_nc(a,a,b,n,c)
928#define HAVE_NATIVE_mpn_sublsh2_nc_ip1  2
929#endif
930
931#if HAVE_NATIVE_mpn_rsblsh_n && ! HAVE_NATIVE_mpn_rsblsh1_n
932#define mpn_rsblsh1_n(a,b,c,d)          mpn_rsblsh_n(a,b,c,d,1)
933#define HAVE_NATIVE_mpn_rsblsh1_n       2
934#endif
935
936#if HAVE_NATIVE_mpn_rsblsh_nc && ! HAVE_NATIVE_mpn_rsblsh1_nc
937#define mpn_rsblsh1_nc(a,b,c,d,x)       mpn_rsblsh_nc(a,b,c,d,1,x)
938#define HAVE_NATIVE_mpn_rsblsh1_nc      2
939#endif
940
941#if HAVE_NATIVE_mpn_rsblsh1_n && ! HAVE_NATIVE_mpn_rsblsh1_n_ip1
942#define mpn_rsblsh1_n_ip1(a,b,n)        mpn_rsblsh1_n(a,a,b,n)
943#define HAVE_NATIVE_mpn_rsblsh1_n_ip1   2
944#endif
945
946#if HAVE_NATIVE_mpn_rsblsh1_nc && ! HAVE_NATIVE_mpn_rsblsh1_nc_ip1
947#define mpn_rsblsh1_nc_ip1(a,b,n,c)     mpn_rsblsh1_nc(a,a,b,n,c)
948#define HAVE_NATIVE_mpn_rsblsh1_nc_ip1  2
949#endif
950
951#if HAVE_NATIVE_mpn_rsblsh_n && ! HAVE_NATIVE_mpn_rsblsh2_n
952#define mpn_rsblsh2_n(a,b,c,d)          mpn_rsblsh_n(a,b,c,d,2)
953#define HAVE_NATIVE_mpn_rsblsh2_n       2
954#endif
955
956#if HAVE_NATIVE_mpn_rsblsh_nc && ! HAVE_NATIVE_mpn_rsblsh2_nc
957#define mpn_rsblsh2_nc(a,b,c,d,x)       mpn_rsblsh_nc(a,b,c,d,2,x)
958#define HAVE_NATIVE_mpn_rsblsh2_nc      2
959#endif
960
961#if HAVE_NATIVE_mpn_rsblsh2_n && ! HAVE_NATIVE_mpn_rsblsh2_n_ip1
962#define mpn_rsblsh2_n_ip1(a,b,n)        mpn_rsblsh2_n(a,a,b,n)
963#define HAVE_NATIVE_mpn_rsblsh2_n_ip1   2
964#endif
965
966#if HAVE_NATIVE_mpn_rsblsh2_nc && ! HAVE_NATIVE_mpn_rsblsh2_nc_ip1
967#define mpn_rsblsh2_nc_ip1(a,b,n,c)     mpn_rsblsh2_nc(a,a,b,n,c)
968#define HAVE_NATIVE_mpn_rsblsh2_nc_ip1  2
969#endif
970
971
972#ifndef mpn_addlsh1_n
973#define mpn_addlsh1_n __MPN(addlsh1_n)
974__GMP_DECLSPEC mp_limb_t mpn_addlsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
975#endif
976#ifndef mpn_addlsh1_nc
977#define mpn_addlsh1_nc __MPN(addlsh1_nc)
978__GMP_DECLSPEC mp_limb_t mpn_addlsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
979#endif
980#ifndef mpn_addlsh1_n_ip1
981#define mpn_addlsh1_n_ip1 __MPN(addlsh1_n_ip1)
982__GMP_DECLSPEC mp_limb_t mpn_addlsh1_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
983#endif
984#ifndef mpn_addlsh1_nc_ip1
985#define mpn_addlsh1_nc_ip1 __MPN(addlsh1_nc_ip1)
986__GMP_DECLSPEC mp_limb_t mpn_addlsh1_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
987#endif
988
989#ifndef mpn_addlsh2_n
990#define mpn_addlsh2_n __MPN(addlsh2_n)
991__GMP_DECLSPEC mp_limb_t mpn_addlsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
992#endif
993#ifndef mpn_addlsh2_nc
994#define mpn_addlsh2_nc __MPN(addlsh2_nc)
995__GMP_DECLSPEC mp_limb_t mpn_addlsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
996#endif
997#ifndef mpn_addlsh2_n_ip1
998#define mpn_addlsh2_n_ip1 __MPN(addlsh2_n_ip1)
999__GMP_DECLSPEC mp_limb_t mpn_addlsh2_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
1000#endif
1001#ifndef mpn_addlsh2_nc_ip1
1002#define mpn_addlsh2_nc_ip1 __MPN(addlsh2_nc_ip1)
1003__GMP_DECLSPEC mp_limb_t mpn_addlsh2_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1004#endif
1005
1006#ifndef mpn_addlsh_n
1007#define mpn_addlsh_n __MPN(addlsh_n)
1008__GMP_DECLSPEC mp_limb_t mpn_addlsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int);
1009#endif
1010#ifndef mpn_addlsh_nc
1011#define mpn_addlsh_nc __MPN(addlsh_nc)
1012__GMP_DECLSPEC mp_limb_t mpn_addlsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
1013#endif
1014#ifndef mpn_addlsh_n_ip1
1015#define mpn_addlsh_n_ip1 __MPN(addlsh_n_ip1)
1016  __GMP_DECLSPEC mp_limb_t mpn_addlsh_n_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
1017#endif
1018#ifndef mpn_addlsh_nc_ip1
1019#define mpn_addlsh_nc_ip1 __MPN(addlsh_nc_ip1)
1020__GMP_DECLSPEC mp_limb_t mpn_addlsh_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
1021#endif
1022
1023#ifndef mpn_sublsh1_n
1024#define mpn_sublsh1_n __MPN(sublsh1_n)
1025__GMP_DECLSPEC mp_limb_t mpn_sublsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1026#endif
1027#ifndef mpn_sublsh1_nc
1028#define mpn_sublsh1_nc __MPN(sublsh1_nc)
1029__GMP_DECLSPEC mp_limb_t mpn_sublsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1030#endif
1031#ifndef mpn_sublsh1_n_ip1
1032#define mpn_sublsh1_n_ip1 __MPN(sublsh1_n_ip1)
1033__GMP_DECLSPEC mp_limb_t mpn_sublsh1_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
1034#endif
1035#ifndef mpn_sublsh1_nc_ip1
1036#define mpn_sublsh1_nc_ip1 __MPN(sublsh1_nc_ip1)
1037__GMP_DECLSPEC mp_limb_t mpn_sublsh1_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1038#endif
1039
1040#ifndef mpn_sublsh2_n
1041#define mpn_sublsh2_n __MPN(sublsh2_n)
1042__GMP_DECLSPEC mp_limb_t mpn_sublsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1043#endif
1044#ifndef mpn_sublsh2_nc
1045#define mpn_sublsh2_nc __MPN(sublsh2_nc)
1046__GMP_DECLSPEC mp_limb_t mpn_sublsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1047#endif
1048#ifndef mpn_sublsh2_n_ip1
1049#define mpn_sublsh2_n_ip1 __MPN(sublsh2_n_ip1)
1050__GMP_DECLSPEC mp_limb_t mpn_sublsh2_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
1051#endif
1052#ifndef mpn_sublsh2_nc_ip1
1053#define mpn_sublsh2_nc_ip1 __MPN(sublsh2_nc_ip1)
1054__GMP_DECLSPEC mp_limb_t mpn_sublsh2_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1055#endif
1056
1057#ifndef mpn_sublsh_n
1058#define mpn_sublsh_n __MPN(sublsh_n)
1059__GMP_DECLSPEC mp_limb_t mpn_sublsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int);
1060#endif
1061#ifndef mpn_sublsh_nc
1062#define mpn_sublsh_nc __MPN(sublsh_nc)
1063__GMP_DECLSPEC mp_limb_t mpn_sublsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
1064#endif
1065#ifndef mpn_sublsh_n_ip1
1066#define mpn_sublsh_n_ip1 __MPN(sublsh_n_ip1)
1067  __GMP_DECLSPEC mp_limb_t mpn_sublsh_n_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
1068#endif
1069#ifndef mpn_sublsh_nc_ip1
1070#define mpn_sublsh_nc_ip1 __MPN(sublsh_nc_ip1)
1071__GMP_DECLSPEC mp_limb_t mpn_sublsh_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
1072#endif
1073
1074#define mpn_rsblsh1_n __MPN(rsblsh1_n)
1075__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1076#define mpn_rsblsh1_nc __MPN(rsblsh1_nc)
1077__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1078
1079#define mpn_rsblsh2_n __MPN(rsblsh2_n)
1080__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1081#define mpn_rsblsh2_nc __MPN(rsblsh2_nc)
1082__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1083
1084#define mpn_rsblsh_n __MPN(rsblsh_n)
1085__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int);
1086#define mpn_rsblsh_nc __MPN(rsblsh_nc)
1087__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
1088
1089#define mpn_rsh1add_n __MPN(rsh1add_n)
1090__GMP_DECLSPEC mp_limb_t mpn_rsh1add_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1091#define mpn_rsh1add_nc __MPN(rsh1add_nc)
1092__GMP_DECLSPEC mp_limb_t mpn_rsh1add_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1093
1094#define mpn_rsh1sub_n __MPN(rsh1sub_n)
1095__GMP_DECLSPEC mp_limb_t mpn_rsh1sub_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1096#define mpn_rsh1sub_nc __MPN(rsh1sub_nc)
1097__GMP_DECLSPEC mp_limb_t mpn_rsh1sub_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1098
1099#ifndef mpn_lshiftc  /* if not done with cpuvec in a fat binary */
1100#define mpn_lshiftc __MPN(lshiftc)
1101__GMP_DECLSPEC mp_limb_t mpn_lshiftc (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
1102#endif
1103
1104#define mpn_add_err1_n  __MPN(add_err1_n)
1105__GMP_DECLSPEC mp_limb_t mpn_add_err1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1106
1107#define mpn_add_err2_n  __MPN(add_err2_n)
1108__GMP_DECLSPEC mp_limb_t mpn_add_err2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1109
1110#define mpn_add_err3_n  __MPN(add_err3_n)
1111__GMP_DECLSPEC mp_limb_t mpn_add_err3_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1112
1113#define mpn_sub_err1_n  __MPN(sub_err1_n)
1114__GMP_DECLSPEC mp_limb_t mpn_sub_err1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1115
1116#define mpn_sub_err2_n  __MPN(sub_err2_n)
1117__GMP_DECLSPEC mp_limb_t mpn_sub_err2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1118
1119#define mpn_sub_err3_n  __MPN(sub_err3_n)
1120__GMP_DECLSPEC mp_limb_t mpn_sub_err3_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1121
1122#define mpn_add_n_sub_n __MPN(add_n_sub_n)
1123__GMP_DECLSPEC mp_limb_t mpn_add_n_sub_n (mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1124
1125#define mpn_add_n_sub_nc __MPN(add_n_sub_nc)
1126__GMP_DECLSPEC mp_limb_t mpn_add_n_sub_nc (mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
1127
1128#define mpn_addaddmul_1msb0 __MPN(addaddmul_1msb0)
1129__GMP_DECLSPEC mp_limb_t mpn_addaddmul_1msb0 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
1130
1131#define mpn_divrem_1c __MPN(divrem_1c)
1132__GMP_DECLSPEC mp_limb_t mpn_divrem_1c (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
1133
1134#define mpn_dump __MPN(dump)
1135__GMP_DECLSPEC void mpn_dump (mp_srcptr, mp_size_t);
1136
1137#define mpn_fib2_ui __MPN(fib2_ui)
1138__GMP_DECLSPEC mp_size_t mpn_fib2_ui (mp_ptr, mp_ptr, unsigned long);
1139
1140#define mpn_fib2m __MPN(fib2m)
1141__GMP_DECLSPEC int mpn_fib2m (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
1142
1143#define mpn_strongfibo __MPN(strongfibo)
1144__GMP_DECLSPEC int mpn_strongfibo (mp_srcptr, mp_size_t, mp_ptr);
1145
1146/* Remap names of internal mpn functions.  */
1147#define __clz_tab               __MPN(clz_tab)
1148#define mpn_udiv_w_sdiv		__MPN(udiv_w_sdiv)
1149
1150#define mpn_jacobi_base __MPN(jacobi_base)
1151__GMP_DECLSPEC int mpn_jacobi_base (mp_limb_t, mp_limb_t, int) ATTRIBUTE_CONST;
1152
1153#define mpn_jacobi_2 __MPN(jacobi_2)
1154__GMP_DECLSPEC int mpn_jacobi_2 (mp_srcptr, mp_srcptr, unsigned);
1155
1156#define mpn_jacobi_n __MPN(jacobi_n)
1157__GMP_DECLSPEC int mpn_jacobi_n (mp_ptr, mp_ptr, mp_size_t, unsigned);
1158
1159#define mpn_mod_1c __MPN(mod_1c)
1160__GMP_DECLSPEC mp_limb_t mpn_mod_1c (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
1161
1162#define mpn_mul_1c __MPN(mul_1c)
1163__GMP_DECLSPEC mp_limb_t mpn_mul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
1164
1165#define mpn_mul_2 __MPN(mul_2)
1166__GMP_DECLSPEC mp_limb_t mpn_mul_2 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
1167
1168#define mpn_mul_3 __MPN(mul_3)
1169__GMP_DECLSPEC mp_limb_t mpn_mul_3 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
1170
1171#define mpn_mul_4 __MPN(mul_4)
1172__GMP_DECLSPEC mp_limb_t mpn_mul_4 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
1173
1174#define mpn_mul_5 __MPN(mul_5)
1175__GMP_DECLSPEC mp_limb_t mpn_mul_5 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
1176
1177#define mpn_mul_6 __MPN(mul_6)
1178__GMP_DECLSPEC mp_limb_t mpn_mul_6 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
1179
1180#ifndef mpn_mul_basecase  /* if not done with cpuvec in a fat binary */
1181#define mpn_mul_basecase __MPN(mul_basecase)
1182__GMP_DECLSPEC void mpn_mul_basecase (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
1183#endif
1184
1185#define mpn_mullo_n __MPN(mullo_n)
1186__GMP_DECLSPEC void mpn_mullo_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1187
1188#ifndef mpn_mullo_basecase  /* if not done with cpuvec in a fat binary */
1189#define mpn_mullo_basecase __MPN(mullo_basecase)
1190__GMP_DECLSPEC void mpn_mullo_basecase (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1191#endif
1192
1193#ifndef mpn_sqr_basecase  /* if not done with cpuvec in a fat binary */
1194#define mpn_sqr_basecase __MPN(sqr_basecase)
1195__GMP_DECLSPEC void mpn_sqr_basecase (mp_ptr, mp_srcptr, mp_size_t);
1196#endif
1197
1198#define mpn_sqrlo __MPN(sqrlo)
1199__GMP_DECLSPEC void mpn_sqrlo (mp_ptr, mp_srcptr, mp_size_t);
1200
1201#define mpn_sqrlo_basecase __MPN(sqrlo_basecase)
1202__GMP_DECLSPEC void mpn_sqrlo_basecase (mp_ptr, mp_srcptr, mp_size_t);
1203
1204#define mpn_mulmid_basecase __MPN(mulmid_basecase)
1205__GMP_DECLSPEC void mpn_mulmid_basecase (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
1206
1207#define mpn_mulmid_n __MPN(mulmid_n)
1208__GMP_DECLSPEC void mpn_mulmid_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1209
1210#define mpn_mulmid __MPN(mulmid)
1211__GMP_DECLSPEC void mpn_mulmid (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
1212
1213#define mpn_submul_1c __MPN(submul_1c)
1214__GMP_DECLSPEC mp_limb_t mpn_submul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
1215
1216#ifndef mpn_redc_1  /* if not done with cpuvec in a fat binary */
1217#define mpn_redc_1 __MPN(redc_1)
1218__GMP_DECLSPEC mp_limb_t mpn_redc_1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1219#endif
1220
1221#ifndef mpn_redc_2  /* if not done with cpuvec in a fat binary */
1222#define mpn_redc_2 __MPN(redc_2)
1223__GMP_DECLSPEC mp_limb_t mpn_redc_2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
1224#endif
1225
1226#define mpn_redc_n __MPN(redc_n)
1227__GMP_DECLSPEC void mpn_redc_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
1228
1229
1230#ifndef mpn_mod_1_1p_cps  /* if not done with cpuvec in a fat binary */
1231#define mpn_mod_1_1p_cps __MPN(mod_1_1p_cps)
1232__GMP_DECLSPEC void mpn_mod_1_1p_cps (mp_limb_t [4], mp_limb_t);
1233#endif
1234#ifndef mpn_mod_1_1p  /* if not done with cpuvec in a fat binary */
1235#define mpn_mod_1_1p __MPN(mod_1_1p)
1236__GMP_DECLSPEC mp_limb_t mpn_mod_1_1p (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [4]) __GMP_ATTRIBUTE_PURE;
1237#endif
1238
1239#ifndef mpn_mod_1s_2p_cps  /* if not done with cpuvec in a fat binary */
1240#define mpn_mod_1s_2p_cps __MPN(mod_1s_2p_cps)
1241__GMP_DECLSPEC void mpn_mod_1s_2p_cps (mp_limb_t [5], mp_limb_t);
1242#endif
1243#ifndef mpn_mod_1s_2p  /* if not done with cpuvec in a fat binary */
1244#define mpn_mod_1s_2p __MPN(mod_1s_2p)
1245__GMP_DECLSPEC mp_limb_t mpn_mod_1s_2p (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [5]) __GMP_ATTRIBUTE_PURE;
1246#endif
1247
1248#ifndef mpn_mod_1s_3p_cps  /* if not done with cpuvec in a fat binary */
1249#define mpn_mod_1s_3p_cps __MPN(mod_1s_3p_cps)
1250__GMP_DECLSPEC void mpn_mod_1s_3p_cps (mp_limb_t [6], mp_limb_t);
1251#endif
1252#ifndef mpn_mod_1s_3p  /* if not done with cpuvec in a fat binary */
1253#define mpn_mod_1s_3p __MPN(mod_1s_3p)
1254__GMP_DECLSPEC mp_limb_t mpn_mod_1s_3p (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [6]) __GMP_ATTRIBUTE_PURE;
1255#endif
1256
1257#ifndef mpn_mod_1s_4p_cps  /* if not done with cpuvec in a fat binary */
1258#define mpn_mod_1s_4p_cps __MPN(mod_1s_4p_cps)
1259__GMP_DECLSPEC void mpn_mod_1s_4p_cps (mp_limb_t [7], mp_limb_t);
1260#endif
1261#ifndef mpn_mod_1s_4p  /* if not done with cpuvec in a fat binary */
1262#define mpn_mod_1s_4p __MPN(mod_1s_4p)
1263__GMP_DECLSPEC mp_limb_t mpn_mod_1s_4p (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [7]) __GMP_ATTRIBUTE_PURE;
1264#endif
1265
1266#define mpn_bc_mulmod_bnm1 __MPN(bc_mulmod_bnm1)
1267__GMP_DECLSPEC void mpn_bc_mulmod_bnm1 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
1268#define mpn_mulmod_bnm1 __MPN(mulmod_bnm1)
1269__GMP_DECLSPEC void mpn_mulmod_bnm1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1270#define mpn_mulmod_bnm1_next_size __MPN(mulmod_bnm1_next_size)
1271__GMP_DECLSPEC mp_size_t mpn_mulmod_bnm1_next_size (mp_size_t) ATTRIBUTE_CONST;
1272static inline mp_size_t
1273mpn_mulmod_bnm1_itch (mp_size_t rn, mp_size_t an, mp_size_t bn) {
1274  mp_size_t n, itch;
1275  n = rn >> 1;
1276  itch = rn + 4 +
1277    (an > n ? (bn > n ? rn : n) : 0);
1278  return itch;
1279}
1280
1281#define mpn_sqrmod_bnm1 __MPN(sqrmod_bnm1)
1282__GMP_DECLSPEC void mpn_sqrmod_bnm1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1283#define mpn_sqrmod_bnm1_next_size __MPN(sqrmod_bnm1_next_size)
1284__GMP_DECLSPEC mp_size_t mpn_sqrmod_bnm1_next_size (mp_size_t) ATTRIBUTE_CONST;
1285static inline mp_size_t
1286mpn_sqrmod_bnm1_itch (mp_size_t rn, mp_size_t an) {
1287  mp_size_t n, itch;
1288  n = rn >> 1;
1289  itch = rn + 3 +
1290    (an > n ? an : 0);
1291  return itch;
1292}
1293
1294typedef __gmp_randstate_struct *gmp_randstate_ptr;
1295typedef const __gmp_randstate_struct *gmp_randstate_srcptr;
1296
1297/* Pseudo-random number generator function pointers structure.  */
1298typedef struct {
1299  void (*randseed_fn) (gmp_randstate_t, mpz_srcptr);
1300  void (*randget_fn) (gmp_randstate_t, mp_ptr, unsigned long int);
1301  void (*randclear_fn) (gmp_randstate_t);
1302  void (*randiset_fn) (gmp_randstate_ptr, gmp_randstate_srcptr);
1303} gmp_randfnptr_t;
1304
1305/* Macro to obtain a void pointer to the function pointers structure.  */
1306#define RNG_FNPTR(rstate) ((rstate)->_mp_algdata._mp_lc)
1307
1308/* Macro to obtain a pointer to the generator's state.
1309   When used as a lvalue the rvalue needs to be cast to mp_ptr.  */
1310#define RNG_STATE(rstate) ((rstate)->_mp_seed->_mp_d)
1311
1312/* Write a given number of random bits to rp.  */
1313#define _gmp_rand(rp, state, bits)					\
1314  do {									\
1315    gmp_randstate_ptr  __rstate = (state);				\
1316    (*((gmp_randfnptr_t *) RNG_FNPTR (__rstate))->randget_fn)		\
1317      (__rstate, rp, bits);						\
1318  } while (0)
1319
1320__GMP_DECLSPEC void __gmp_randinit_mt_noseed (gmp_randstate_t);
1321
1322
1323/* __gmp_rands is the global state for the old-style random functions, and
1324   is also used in the test programs (hence the __GMP_DECLSPEC).
1325
1326   There's no seeding here, so mpz_random etc will generate the same
1327   sequence every time.  This is not unlike the C library random functions
1328   if you don't seed them, so perhaps it's acceptable.  Digging up a seed
1329   from /dev/random or the like would work on many systems, but might
1330   encourage a false confidence, since it'd be pretty much impossible to do
1331   something that would work reliably everywhere.  In any case the new style
1332   functions are recommended to applications which care about randomness, so
1333   the old functions aren't too important.  */
1334
1335__GMP_DECLSPEC extern char             __gmp_rands_initialized;
1336__GMP_DECLSPEC extern gmp_randstate_t  __gmp_rands;
1337
1338#define RANDS								\
1339  ((__gmp_rands_initialized ? 0						\
1340    : (__gmp_rands_initialized = 1,					\
1341       __gmp_randinit_mt_noseed (__gmp_rands), 0)),			\
1342   __gmp_rands)
1343
1344/* this is used by the test programs, to free memory */
1345#define RANDS_CLEAR()							\
1346  do {									\
1347    if (__gmp_rands_initialized)					\
1348      {									\
1349	__gmp_rands_initialized = 0;					\
1350	gmp_randclear (__gmp_rands);					\
1351      }									\
1352  } while (0)
1353
1354
1355/* For a threshold between algorithms A and B, size>=thresh is where B
1356   should be used.  Special value MP_SIZE_T_MAX means only ever use A, or
1357   value 0 means only ever use B.  The tests for these special values will
1358   be compile-time constants, so the compiler should be able to eliminate
1359   the code for the unwanted algorithm.  */
1360
1361#if ! defined (__GNUC__) || __GNUC__ < 2
1362#define ABOVE_THRESHOLD(size,thresh)					\
1363  ((thresh) == 0							\
1364   || ((thresh) != MP_SIZE_T_MAX					\
1365       && (size) >= (thresh)))
1366#else
1367#define ABOVE_THRESHOLD(size,thresh)					\
1368  ((__builtin_constant_p (thresh) && (thresh) == 0)			\
1369   || (!(__builtin_constant_p (thresh) && (thresh) == MP_SIZE_T_MAX)	\
1370       && (size) >= (thresh)))
1371#endif
1372#define BELOW_THRESHOLD(size,thresh)  (! ABOVE_THRESHOLD (size, thresh))
1373
1374/* The minimal supported value for Toom22 depends also on Toom32 and
1375   Toom42 implementations. */
1376#define MPN_TOOM22_MUL_MINSIZE    6
1377#define MPN_TOOM2_SQR_MINSIZE     4
1378
1379#define MPN_TOOM33_MUL_MINSIZE   17
1380#define MPN_TOOM3_SQR_MINSIZE    17
1381
1382#define MPN_TOOM44_MUL_MINSIZE   30
1383#define MPN_TOOM4_SQR_MINSIZE    30
1384
1385#define MPN_TOOM6H_MUL_MINSIZE   46
1386#define MPN_TOOM6_SQR_MINSIZE    46
1387
1388#define MPN_TOOM8H_MUL_MINSIZE   86
1389#define MPN_TOOM8_SQR_MINSIZE    86
1390
1391#define MPN_TOOM32_MUL_MINSIZE   10
1392#define MPN_TOOM42_MUL_MINSIZE   10
1393#define MPN_TOOM43_MUL_MINSIZE   25
1394#define MPN_TOOM53_MUL_MINSIZE   17
1395#define MPN_TOOM54_MUL_MINSIZE   31
1396#define MPN_TOOM63_MUL_MINSIZE   49
1397
1398#define MPN_TOOM42_MULMID_MINSIZE    4
1399
1400#define   mpn_sqr_diagonal __MPN(sqr_diagonal)
1401__GMP_DECLSPEC void      mpn_sqr_diagonal (mp_ptr, mp_srcptr, mp_size_t);
1402
1403#define mpn_sqr_diag_addlsh1 __MPN(sqr_diag_addlsh1)
1404__GMP_DECLSPEC void      mpn_sqr_diag_addlsh1 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
1405
1406#define   mpn_toom_interpolate_5pts __MPN(toom_interpolate_5pts)
1407__GMP_DECLSPEC void      mpn_toom_interpolate_5pts (mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_limb_t);
1408
1409enum toom6_flags {toom6_all_pos = 0, toom6_vm1_neg = 1, toom6_vm2_neg = 2};
1410#define   mpn_toom_interpolate_6pts __MPN(toom_interpolate_6pts)
1411__GMP_DECLSPEC void      mpn_toom_interpolate_6pts (mp_ptr, mp_size_t, enum toom6_flags, mp_ptr, mp_ptr, mp_ptr, mp_size_t);
1412
1413enum toom7_flags { toom7_w1_neg = 1, toom7_w3_neg = 2 };
1414#define   mpn_toom_interpolate_7pts __MPN(toom_interpolate_7pts)
1415__GMP_DECLSPEC void      mpn_toom_interpolate_7pts (mp_ptr, mp_size_t, enum toom7_flags, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
1416
1417#define mpn_toom_interpolate_8pts __MPN(toom_interpolate_8pts)
1418__GMP_DECLSPEC void      mpn_toom_interpolate_8pts (mp_ptr, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
1419
1420#define mpn_toom_interpolate_12pts __MPN(toom_interpolate_12pts)
1421__GMP_DECLSPEC void      mpn_toom_interpolate_12pts (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr);
1422
1423#define mpn_toom_interpolate_16pts __MPN(toom_interpolate_16pts)
1424__GMP_DECLSPEC void      mpn_toom_interpolate_16pts (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr);
1425
1426#define   mpn_toom_couple_handling __MPN(toom_couple_handling)
1427__GMP_DECLSPEC void mpn_toom_couple_handling (mp_ptr, mp_size_t, mp_ptr, int, mp_size_t, int, int);
1428
1429#define   mpn_toom_eval_dgr3_pm1 __MPN(toom_eval_dgr3_pm1)
1430__GMP_DECLSPEC int mpn_toom_eval_dgr3_pm1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
1431
1432#define   mpn_toom_eval_dgr3_pm2 __MPN(toom_eval_dgr3_pm2)
1433__GMP_DECLSPEC int mpn_toom_eval_dgr3_pm2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
1434
1435#define   mpn_toom_eval_pm1 __MPN(toom_eval_pm1)
1436__GMP_DECLSPEC int mpn_toom_eval_pm1 (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
1437
1438#define   mpn_toom_eval_pm2 __MPN(toom_eval_pm2)
1439__GMP_DECLSPEC int mpn_toom_eval_pm2 (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
1440
1441#define   mpn_toom_eval_pm2exp __MPN(toom_eval_pm2exp)
1442__GMP_DECLSPEC int mpn_toom_eval_pm2exp (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr);
1443
1444#define   mpn_toom_eval_pm2rexp __MPN(toom_eval_pm2rexp)
1445__GMP_DECLSPEC int mpn_toom_eval_pm2rexp (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr);
1446
1447#define   mpn_toom22_mul __MPN(toom22_mul)
1448__GMP_DECLSPEC void      mpn_toom22_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1449
1450#define   mpn_toom32_mul __MPN(toom32_mul)
1451__GMP_DECLSPEC void      mpn_toom32_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1452
1453#define   mpn_toom42_mul __MPN(toom42_mul)
1454__GMP_DECLSPEC void      mpn_toom42_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1455
1456#define   mpn_toom52_mul __MPN(toom52_mul)
1457__GMP_DECLSPEC void      mpn_toom52_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1458
1459#define   mpn_toom62_mul __MPN(toom62_mul)
1460__GMP_DECLSPEC void      mpn_toom62_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1461
1462#define   mpn_toom2_sqr __MPN(toom2_sqr)
1463__GMP_DECLSPEC void      mpn_toom2_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1464
1465#define   mpn_toom33_mul __MPN(toom33_mul)
1466__GMP_DECLSPEC void      mpn_toom33_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1467
1468#define   mpn_toom43_mul __MPN(toom43_mul)
1469__GMP_DECLSPEC void      mpn_toom43_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1470
1471#define   mpn_toom53_mul __MPN(toom53_mul)
1472__GMP_DECLSPEC void      mpn_toom53_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1473
1474#define   mpn_toom54_mul __MPN(toom54_mul)
1475__GMP_DECLSPEC void      mpn_toom54_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1476
1477#define   mpn_toom63_mul __MPN(toom63_mul)
1478__GMP_DECLSPEC void      mpn_toom63_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1479
1480#define   mpn_toom3_sqr __MPN(toom3_sqr)
1481__GMP_DECLSPEC void      mpn_toom3_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1482
1483#define   mpn_toom44_mul __MPN(toom44_mul)
1484__GMP_DECLSPEC void      mpn_toom44_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1485
1486#define   mpn_toom4_sqr __MPN(toom4_sqr)
1487__GMP_DECLSPEC void      mpn_toom4_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1488
1489#define   mpn_toom6h_mul __MPN(toom6h_mul)
1490__GMP_DECLSPEC void      mpn_toom6h_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1491
1492#define   mpn_toom6_sqr __MPN(toom6_sqr)
1493__GMP_DECLSPEC void      mpn_toom6_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1494
1495#define   mpn_toom8h_mul __MPN(toom8h_mul)
1496__GMP_DECLSPEC void      mpn_toom8h_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1497
1498#define   mpn_toom8_sqr __MPN(toom8_sqr)
1499__GMP_DECLSPEC void      mpn_toom8_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1500
1501#define   mpn_toom42_mulmid __MPN(toom42_mulmid)
1502__GMP_DECLSPEC void      mpn_toom42_mulmid (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
1503
1504#define   mpn_fft_best_k __MPN(fft_best_k)
1505__GMP_DECLSPEC int       mpn_fft_best_k (mp_size_t, int) ATTRIBUTE_CONST;
1506
1507#define   mpn_mul_fft __MPN(mul_fft)
1508__GMP_DECLSPEC mp_limb_t mpn_mul_fft (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int);
1509
1510#define   mpn_mul_fft_full __MPN(mul_fft_full)
1511__GMP_DECLSPEC void      mpn_mul_fft_full (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
1512
1513#define   mpn_nussbaumer_mul __MPN(nussbaumer_mul)
1514__GMP_DECLSPEC void      mpn_nussbaumer_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
1515
1516#define   mpn_fft_next_size __MPN(fft_next_size)
1517__GMP_DECLSPEC mp_size_t mpn_fft_next_size (mp_size_t, int) ATTRIBUTE_CONST;
1518
1519#define   mpn_div_qr_1n_pi1 __MPN(div_qr_1n_pi1)
1520  __GMP_DECLSPEC mp_limb_t mpn_div_qr_1n_pi1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
1521
1522#define   mpn_div_qr_2n_pi1 __MPN(div_qr_2n_pi1)
1523  __GMP_DECLSPEC mp_limb_t mpn_div_qr_2n_pi1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
1524
1525#define   mpn_div_qr_2u_pi1 __MPN(div_qr_2u_pi1)
1526  __GMP_DECLSPEC mp_limb_t mpn_div_qr_2u_pi1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int, mp_limb_t);
1527
1528#define   mpn_sbpi1_div_qr __MPN(sbpi1_div_qr)
1529__GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1530
1531#define   mpn_sbpi1_div_q __MPN(sbpi1_div_q)
1532__GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1533
1534#define   mpn_sbpi1_divappr_q __MPN(sbpi1_divappr_q)
1535__GMP_DECLSPEC mp_limb_t mpn_sbpi1_divappr_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1536
1537#define   mpn_dcpi1_div_qr __MPN(dcpi1_div_qr)
1538__GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *);
1539#define   mpn_dcpi1_div_qr_n __MPN(dcpi1_div_qr_n)
1540__GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, gmp_pi1_t *, mp_ptr);
1541
1542#define   mpn_dcpi1_div_q __MPN(dcpi1_div_q)
1543__GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *);
1544
1545#define   mpn_dcpi1_divappr_q __MPN(dcpi1_divappr_q)
1546__GMP_DECLSPEC mp_limb_t mpn_dcpi1_divappr_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *);
1547
1548#define   mpn_mu_div_qr __MPN(mu_div_qr)
1549__GMP_DECLSPEC mp_limb_t mpn_mu_div_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1550#define   mpn_mu_div_qr_itch __MPN(mu_div_qr_itch)
1551__GMP_DECLSPEC mp_size_t mpn_mu_div_qr_itch (mp_size_t, mp_size_t, int) ATTRIBUTE_CONST;
1552
1553#define   mpn_preinv_mu_div_qr __MPN(preinv_mu_div_qr)
1554__GMP_DECLSPEC mp_limb_t mpn_preinv_mu_div_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1555#define   mpn_preinv_mu_div_qr_itch __MPN(preinv_mu_div_qr_itch)
1556__GMP_DECLSPEC mp_size_t mpn_preinv_mu_div_qr_itch (mp_size_t, mp_size_t, mp_size_t) ATTRIBUTE_CONST;
1557
1558#define   mpn_mu_divappr_q __MPN(mu_divappr_q)
1559__GMP_DECLSPEC mp_limb_t mpn_mu_divappr_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1560#define   mpn_mu_divappr_q_itch __MPN(mu_divappr_q_itch)
1561__GMP_DECLSPEC mp_size_t mpn_mu_divappr_q_itch (mp_size_t, mp_size_t, int) ATTRIBUTE_CONST;
1562
1563#define   mpn_mu_div_q __MPN(mu_div_q)
1564__GMP_DECLSPEC mp_limb_t mpn_mu_div_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1565#define   mpn_mu_div_q_itch __MPN(mu_div_q_itch)
1566__GMP_DECLSPEC mp_size_t mpn_mu_div_q_itch (mp_size_t, mp_size_t, int) ATTRIBUTE_CONST;
1567
1568#define  mpn_div_q __MPN(div_q)
1569__GMP_DECLSPEC void mpn_div_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1570
1571#define   mpn_invert __MPN(invert)
1572__GMP_DECLSPEC void      mpn_invert (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1573#define mpn_invert_itch(n)  mpn_invertappr_itch(n)
1574
1575#define   mpn_ni_invertappr __MPN(ni_invertappr)
1576__GMP_DECLSPEC mp_limb_t mpn_ni_invertappr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1577#define   mpn_invertappr __MPN(invertappr)
1578__GMP_DECLSPEC mp_limb_t mpn_invertappr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1579#define mpn_invertappr_itch(n)  (2 * (n))
1580
1581#define   mpn_binvert __MPN(binvert)
1582__GMP_DECLSPEC void      mpn_binvert (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
1583#define   mpn_binvert_itch __MPN(binvert_itch)
1584__GMP_DECLSPEC mp_size_t mpn_binvert_itch (mp_size_t) ATTRIBUTE_CONST;
1585
1586#define mpn_bdiv_q_1 __MPN(bdiv_q_1)
1587__GMP_DECLSPEC mp_limb_t mpn_bdiv_q_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1588
1589#define mpn_pi1_bdiv_q_1 __MPN(pi1_bdiv_q_1)
1590__GMP_DECLSPEC mp_limb_t mpn_pi1_bdiv_q_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int);
1591
1592#define   mpn_sbpi1_bdiv_qr __MPN(sbpi1_bdiv_qr)
1593__GMP_DECLSPEC mp_limb_t mpn_sbpi1_bdiv_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1594
1595#define   mpn_sbpi1_bdiv_q __MPN(sbpi1_bdiv_q)
1596__GMP_DECLSPEC void      mpn_sbpi1_bdiv_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1597
1598#define   mpn_sbpi1_bdiv_r __MPN(sbpi1_bdiv_r)
1599__GMP_DECLSPEC mp_limb_t mpn_sbpi1_bdiv_r (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1600
1601#define   mpn_dcpi1_bdiv_qr __MPN(dcpi1_bdiv_qr)
1602__GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1603#define   mpn_dcpi1_bdiv_qr_n_itch __MPN(dcpi1_bdiv_qr_n_itch)
1604__GMP_DECLSPEC mp_size_t mpn_dcpi1_bdiv_qr_n_itch (mp_size_t) ATTRIBUTE_CONST;
1605
1606#define   mpn_dcpi1_bdiv_qr_n __MPN(dcpi1_bdiv_qr_n)
1607__GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
1608#define   mpn_dcpi1_bdiv_q __MPN(dcpi1_bdiv_q)
1609__GMP_DECLSPEC void      mpn_dcpi1_bdiv_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
1610
1611#define   mpn_mu_bdiv_qr __MPN(mu_bdiv_qr)
1612__GMP_DECLSPEC mp_limb_t mpn_mu_bdiv_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1613#define   mpn_mu_bdiv_qr_itch __MPN(mu_bdiv_qr_itch)
1614__GMP_DECLSPEC mp_size_t mpn_mu_bdiv_qr_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
1615
1616#define   mpn_mu_bdiv_q __MPN(mu_bdiv_q)
1617__GMP_DECLSPEC void      mpn_mu_bdiv_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1618#define   mpn_mu_bdiv_q_itch __MPN(mu_bdiv_q_itch)
1619__GMP_DECLSPEC mp_size_t mpn_mu_bdiv_q_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
1620
1621#define   mpn_bdiv_qr __MPN(bdiv_qr)
1622__GMP_DECLSPEC mp_limb_t mpn_bdiv_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1623#define   mpn_bdiv_qr_itch __MPN(bdiv_qr_itch)
1624__GMP_DECLSPEC mp_size_t mpn_bdiv_qr_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
1625
1626#define   mpn_bdiv_q __MPN(bdiv_q)
1627__GMP_DECLSPEC void      mpn_bdiv_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1628#define   mpn_bdiv_q_itch __MPN(bdiv_q_itch)
1629__GMP_DECLSPEC mp_size_t mpn_bdiv_q_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
1630
1631#define   mpn_divexact __MPN(divexact)
1632__GMP_DECLSPEC void      mpn_divexact (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
1633#define   mpn_divexact_itch __MPN(divexact_itch)
1634__GMP_DECLSPEC mp_size_t mpn_divexact_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
1635
1636#ifndef mpn_bdiv_dbm1c  /* if not done with cpuvec in a fat binary */
1637#define   mpn_bdiv_dbm1c __MPN(bdiv_dbm1c)
1638__GMP_DECLSPEC mp_limb_t mpn_bdiv_dbm1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
1639#endif
1640
1641#define   mpn_bdiv_dbm1(dst, src, size, divisor) \
1642  mpn_bdiv_dbm1c (dst, src, size, divisor, __GMP_CAST (mp_limb_t, 0))
1643
1644#define   mpn_powm __MPN(powm)
1645__GMP_DECLSPEC void      mpn_powm (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
1646#define   mpn_powlo __MPN(powlo)
1647__GMP_DECLSPEC void      mpn_powlo (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
1648
1649#define mpn_sec_pi1_div_qr __MPN(sec_pi1_div_qr)
1650__GMP_DECLSPEC mp_limb_t mpn_sec_pi1_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
1651#define mpn_sec_pi1_div_r __MPN(sec_pi1_div_r)
1652__GMP_DECLSPEC void mpn_sec_pi1_div_r (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
1653
1654
1655#ifndef DIVEXACT_BY3_METHOD
1656#if GMP_NUMB_BITS % 2 == 0 && ! defined (HAVE_NATIVE_mpn_divexact_by3c)
1657#define DIVEXACT_BY3_METHOD 0	/* default to using mpn_bdiv_dbm1c */
1658#else
1659#define DIVEXACT_BY3_METHOD 1
1660#endif
1661#endif
1662
1663#if DIVEXACT_BY3_METHOD == 0
1664#undef mpn_divexact_by3
1665#define mpn_divexact_by3(dst,src,size) \
1666  (3 & mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3)))
1667/* override mpn_divexact_by3c defined in gmp.h */
1668/*
1669#undef mpn_divexact_by3c
1670#define mpn_divexact_by3c(dst,src,size,cy) \
1671  (3 & mpn_bdiv_dbm1c (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3, GMP_NUMB_MASK / 3 * cy)))
1672*/
1673#endif
1674
1675#if GMP_NUMB_BITS % 4 == 0
1676#define mpn_divexact_by5(dst,src,size) \
1677  (7 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 5)))
1678#endif
1679
1680#if GMP_NUMB_BITS % 3 == 0
1681#define mpn_divexact_by7(dst,src,size) \
1682  (7 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 7)))
1683#endif
1684
1685#if GMP_NUMB_BITS % 6 == 0
1686#define mpn_divexact_by9(dst,src,size) \
1687  (15 & 7 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 9)))
1688#endif
1689
1690#if GMP_NUMB_BITS % 10 == 0
1691#define mpn_divexact_by11(dst,src,size) \
1692  (15 & 5 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 11)))
1693#endif
1694
1695#if GMP_NUMB_BITS % 12 == 0
1696#define mpn_divexact_by13(dst,src,size) \
1697  (15 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 13)))
1698#endif
1699
1700#if GMP_NUMB_BITS % 4 == 0
1701#define mpn_divexact_by15(dst,src,size) \
1702  (15 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 15)))
1703#endif
1704
1705#define mpz_divexact_gcd  __gmpz_divexact_gcd
1706__GMP_DECLSPEC void    mpz_divexact_gcd (mpz_ptr, mpz_srcptr, mpz_srcptr);
1707
1708#define mpz_prodlimbs  __gmpz_prodlimbs
1709__GMP_DECLSPEC mp_size_t mpz_prodlimbs (mpz_ptr, mp_ptr, mp_size_t);
1710
1711#define mpz_oddfac_1  __gmpz_oddfac_1
1712__GMP_DECLSPEC void mpz_oddfac_1 (mpz_ptr, mp_limb_t, unsigned);
1713
1714#define mpz_stronglucas  __gmpz_stronglucas
1715__GMP_DECLSPEC int mpz_stronglucas (mpz_srcptr, mpz_ptr, mpz_ptr);
1716
1717#define mpz_lucas_mod  __gmpz_lucas_mod
1718__GMP_DECLSPEC int mpz_lucas_mod (mpz_ptr, mpz_ptr, long, mp_bitcnt_t, mpz_srcptr, mpz_ptr, mpz_ptr);
1719
1720#define mpz_inp_str_nowhite __gmpz_inp_str_nowhite
1721#ifdef _GMP_H_HAVE_FILE
1722__GMP_DECLSPEC size_t  mpz_inp_str_nowhite (mpz_ptr, FILE *, int, int, size_t);
1723#endif
1724
1725#define mpn_divisible_p __MPN(divisible_p)
1726__GMP_DECLSPEC int     mpn_divisible_p (mp_srcptr, mp_size_t, mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE;
1727
1728#define   mpn_rootrem __MPN(rootrem)
1729__GMP_DECLSPEC mp_size_t mpn_rootrem (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1730
1731#define mpn_broot __MPN(broot)
1732__GMP_DECLSPEC void mpn_broot (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1733
1734#define mpn_broot_invm1 __MPN(broot_invm1)
1735__GMP_DECLSPEC void mpn_broot_invm1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
1736
1737#define mpn_brootinv __MPN(brootinv)
1738__GMP_DECLSPEC void mpn_brootinv (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
1739
1740#define mpn_bsqrt __MPN(bsqrt)
1741__GMP_DECLSPEC void mpn_bsqrt (mp_ptr, mp_srcptr, mp_bitcnt_t, mp_ptr);
1742
1743#define mpn_bsqrtinv __MPN(bsqrtinv)
1744__GMP_DECLSPEC int mpn_bsqrtinv (mp_ptr, mp_srcptr, mp_bitcnt_t, mp_ptr);
1745
1746#if defined (_CRAY)
1747#define MPN_COPY_INCR(dst, src, n)					\
1748  do {									\
1749    int __i;		/* Faster on some Crays with plain int */	\
1750    _Pragma ("_CRI ivdep");						\
1751    for (__i = 0; __i < (n); __i++)					\
1752      (dst)[__i] = (src)[__i];						\
1753  } while (0)
1754#endif
1755
1756/* used by test programs, hence __GMP_DECLSPEC */
1757#ifndef mpn_copyi  /* if not done with cpuvec in a fat binary */
1758#define mpn_copyi __MPN(copyi)
1759__GMP_DECLSPEC void mpn_copyi (mp_ptr, mp_srcptr, mp_size_t);
1760#endif
1761
1762#if ! defined (MPN_COPY_INCR) && HAVE_NATIVE_mpn_copyi
1763#define MPN_COPY_INCR(dst, src, size)					\
1764  do {									\
1765    ASSERT ((size) >= 0);						\
1766    ASSERT (MPN_SAME_OR_INCR_P (dst, src, size));			\
1767    mpn_copyi (dst, src, size);						\
1768  } while (0)
1769#endif
1770
1771/* Copy N limbs from SRC to DST incrementing, N==0 allowed.  */
1772#if ! defined (MPN_COPY_INCR)
1773#define MPN_COPY_INCR(dst, src, n)					\
1774  do {									\
1775    ASSERT ((n) >= 0);							\
1776    ASSERT (MPN_SAME_OR_INCR_P (dst, src, n));				\
1777    if ((n) != 0)							\
1778      {									\
1779	mp_size_t __n = (n) - 1;					\
1780	mp_ptr __dst = (dst);						\
1781	mp_srcptr __src = (src);					\
1782	mp_limb_t __x;							\
1783	__x = *__src++;							\
1784	if (__n != 0)							\
1785	  {								\
1786	    do								\
1787	      {								\
1788		*__dst++ = __x;						\
1789		__x = *__src++;						\
1790	      }								\
1791	    while (--__n);						\
1792	  }								\
1793	*__dst++ = __x;							\
1794      }									\
1795  } while (0)
1796#endif
1797
1798
1799#if defined (_CRAY)
1800#define MPN_COPY_DECR(dst, src, n)					\
1801  do {									\
1802    int __i;		/* Faster on some Crays with plain int */	\
1803    _Pragma ("_CRI ivdep");						\
1804    for (__i = (n) - 1; __i >= 0; __i--)				\
1805      (dst)[__i] = (src)[__i];						\
1806  } while (0)
1807#endif
1808
1809/* used by test programs, hence __GMP_DECLSPEC */
1810#ifndef mpn_copyd  /* if not done with cpuvec in a fat binary */
1811#define mpn_copyd __MPN(copyd)
1812__GMP_DECLSPEC void mpn_copyd (mp_ptr, mp_srcptr, mp_size_t);
1813#endif
1814
1815#if ! defined (MPN_COPY_DECR) && HAVE_NATIVE_mpn_copyd
1816#define MPN_COPY_DECR(dst, src, size)					\
1817  do {									\
1818    ASSERT ((size) >= 0);						\
1819    ASSERT (MPN_SAME_OR_DECR_P (dst, src, size));			\
1820    mpn_copyd (dst, src, size);						\
1821  } while (0)
1822#endif
1823
1824/* Copy N limbs from SRC to DST decrementing, N==0 allowed.  */
1825#if ! defined (MPN_COPY_DECR)
1826#define MPN_COPY_DECR(dst, src, n)					\
1827  do {									\
1828    ASSERT ((n) >= 0);							\
1829    ASSERT (MPN_SAME_OR_DECR_P (dst, src, n));				\
1830    if ((n) != 0)							\
1831      {									\
1832	mp_size_t __n = (n) - 1;					\
1833	mp_ptr __dst = (dst) + __n;					\
1834	mp_srcptr __src = (src) + __n;					\
1835	mp_limb_t __x;							\
1836	__x = *__src--;							\
1837	if (__n != 0)							\
1838	  {								\
1839	    do								\
1840	      {								\
1841		*__dst-- = __x;						\
1842		__x = *__src--;						\
1843	      }								\
1844	    while (--__n);						\
1845	  }								\
1846	*__dst-- = __x;							\
1847      }									\
1848  } while (0)
1849#endif
1850
1851
1852#ifndef MPN_COPY
1853#define MPN_COPY(d,s,n)							\
1854  do {									\
1855    ASSERT (MPN_SAME_OR_SEPARATE_P (d, s, n));				\
1856    MPN_COPY_INCR (d, s, n);						\
1857  } while (0)
1858#endif
1859
1860
1861/* Set {dst,size} to the limbs of {src,size} in reverse order. */
1862#define MPN_REVERSE(dst, src, size)					\
1863  do {									\
1864    mp_ptr     __dst = (dst);						\
1865    mp_size_t  __size = (size);						\
1866    mp_srcptr  __src = (src) + __size - 1;				\
1867    mp_size_t  __i;							\
1868    ASSERT ((size) >= 0);						\
1869    ASSERT (! MPN_OVERLAP_P (dst, size, src, size));			\
1870    CRAY_Pragma ("_CRI ivdep");						\
1871    for (__i = 0; __i < __size; __i++)					\
1872      {									\
1873	*__dst = *__src;						\
1874	__dst++;							\
1875	__src--;							\
1876      }									\
1877  } while (0)
1878
1879
1880/* Zero n limbs at dst.
1881
1882   For power and powerpc we want an inline stu/bdnz loop for zeroing.  On
1883   ppc630 for instance this is optimal since it can sustain only 1 store per
1884   cycle.
1885
1886   gcc 2.95.x (for powerpc64 -maix64, or powerpc32) doesn't recognise the
1887   "for" loop in the generic code below can become stu/bdnz.  The do/while
1888   here helps it get to that.  The same caveat about plain -mpowerpc64 mode
1889   applies here as to __GMPN_COPY_INCR in gmp.h.
1890
1891   xlc 3.1 already generates stu/bdnz from the generic C, and does so from
1892   this loop too.
1893
1894   Enhancement: GLIBC does some trickery with dcbz to zero whole cache lines
1895   at a time.  MPN_ZERO isn't all that important in GMP, so it might be more
1896   trouble than it's worth to do the same, though perhaps a call to memset
1897   would be good when on a GNU system.  */
1898
1899#if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc
1900#define MPN_FILL(dst, n, f)						\
1901  do {									\
1902    mp_ptr __dst = (dst) - 1;						\
1903    mp_size_t __n = (n);						\
1904    ASSERT (__n > 0);							\
1905    do									\
1906      *++__dst = (f);							\
1907    while (--__n);							\
1908  } while (0)
1909#endif
1910
1911#ifndef MPN_FILL
1912#define MPN_FILL(dst, n, f)						\
1913  do {									\
1914    mp_ptr __dst = (dst);						\
1915    mp_size_t __n = (n);						\
1916    ASSERT (__n > 0);							\
1917    do									\
1918      *__dst++ = (f);							\
1919    while (--__n);							\
1920  } while (0)
1921#endif
1922
1923#define MPN_ZERO(dst, n)						\
1924  do {									\
1925    ASSERT ((n) >= 0);							\
1926    if ((n) != 0)							\
1927      MPN_FILL (dst, n, CNST_LIMB (0));					\
1928  } while (0)
1929
1930/* On the x86s repe/scasl doesn't seem useful, since it takes many cycles to
1931   start up and would need to strip a lot of zeros before it'd be faster
1932   than a simple cmpl loop.  Here are some times in cycles for
1933   std/repe/scasl/cld and cld/repe/scasl (the latter would be for stripping
1934   low zeros).
1935
1936		std   cld
1937	   P5    18    16
1938	   P6    46    38
1939	   K6    36    13
1940	   K7    21    20
1941*/
1942#ifndef MPN_NORMALIZE
1943#define MPN_NORMALIZE(DST, NLIMBS) \
1944  do {									\
1945    while ((NLIMBS) > 0)						\
1946      {									\
1947	if ((DST)[(NLIMBS) - 1] != 0)					\
1948	  break;							\
1949	(NLIMBS)--;							\
1950      }									\
1951  } while (0)
1952#endif
1953#ifndef MPN_NORMALIZE_NOT_ZERO
1954#define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS)				\
1955  do {									\
1956    while (1)								\
1957      {									\
1958	ASSERT ((NLIMBS) >= 1);						\
1959	if ((DST)[(NLIMBS) - 1] != 0)					\
1960	  break;							\
1961	(NLIMBS)--;							\
1962      }									\
1963  } while (0)
1964#endif
1965
1966/* Strip least significant zero limbs from {ptr,size} by incrementing ptr
1967   and decrementing size.  low should be ptr[0], and will be the new ptr[0]
1968   on returning.  The number in {ptr,size} must be non-zero, ie. size!=0 and
1969   somewhere a non-zero limb.  */
1970#define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size, low)			\
1971  do {									\
1972    ASSERT ((size) >= 1);						\
1973    ASSERT ((low) == (ptr)[0]);						\
1974									\
1975    while ((low) == 0)							\
1976      {									\
1977	(size)--;							\
1978	ASSERT ((size) >= 1);						\
1979	(ptr)++;							\
1980	(low) = *(ptr);							\
1981      }									\
1982  } while (0)
1983
1984/* Initialize X of type mpz_t with space for NLIMBS limbs.  X should be a
1985   temporary variable; it will be automatically cleared out at function
1986   return.  We use __x here to make it possible to accept both mpz_ptr and
1987   mpz_t arguments.  */
1988#define MPZ_TMP_INIT(X, NLIMBS)						\
1989  do {									\
1990    mpz_ptr __x = (X);							\
1991    ASSERT ((NLIMBS) >= 1);						\
1992    __x->_mp_alloc = (NLIMBS);						\
1993    __x->_mp_d = TMP_ALLOC_LIMBS (NLIMBS);				\
1994  } while (0)
1995
1996#if WANT_ASSERT
1997static inline void *
1998_mpz_newalloc (mpz_ptr z, mp_size_t n)
1999{
2000  void * res = _mpz_realloc(z,n);
2001  /* If we are checking the code, force a random change to limbs. */
2002  ((mp_ptr) res)[0] = ~ ((mp_ptr) res)[ALLOC (z) - 1];
2003  return res;
2004}
2005#else
2006#define _mpz_newalloc _mpz_realloc
2007#endif
2008/* Realloc for an mpz_t WHAT if it has less than NEEDED limbs.  */
2009#define MPZ_REALLOC(z,n) (UNLIKELY ((n) > ALLOC(z))			\
2010			  ? (mp_ptr) _mpz_realloc(z,n)			\
2011			  : PTR(z))
2012#define MPZ_NEWALLOC(z,n) (UNLIKELY ((n) > ALLOC(z))			\
2013			   ? (mp_ptr) _mpz_newalloc(z,n)		\
2014			   : PTR(z))
2015
2016#define MPZ_EQUAL_1_P(z)  (SIZ(z)==1 && PTR(z)[0] == 1)
2017
2018
2019/* MPN_FIB2_SIZE(n) is the size in limbs required by mpn_fib2_ui for fp and
2020   f1p.
2021
2022   From Knuth vol 1 section 1.2.8, F[n] = phi^n/sqrt(5) rounded to the
2023   nearest integer, where phi=(1+sqrt(5))/2 is the golden ratio.  So the
2024   number of bits required is n*log_2((1+sqrt(5))/2) = n*0.6942419.
2025
2026   The multiplier used is 23/32=0.71875 for efficient calculation on CPUs
2027   without good floating point.  There's +2 for rounding up, and a further
2028   +2 since at the last step x limbs are doubled into a 2x+1 limb region
2029   whereas the actual F[2k] value might be only 2x-1 limbs.
2030
2031   Note that a division is done first, since on a 32-bit system it's at
2032   least conceivable to go right up to n==ULONG_MAX.  (F[2^32-1] would be
2033   about 380Mbytes, plus temporary workspace of about 1.2Gbytes here and
2034   whatever a multiply of two 190Mbyte numbers takes.)
2035
2036   Enhancement: When GMP_NUMB_BITS is not a power of 2 the division could be
2037   worked into the multiplier.  */
2038
2039#define MPN_FIB2_SIZE(n) \
2040  ((mp_size_t) ((n) / 32 * 23 / GMP_NUMB_BITS) + 4)
2041
2042
2043/* FIB_TABLE(n) returns the Fibonacci number F[n].  Must have n in the range
2044   -1 <= n <= FIB_TABLE_LIMIT (that constant in fib_table.h).
2045
2046   FIB_TABLE_LUCNUM_LIMIT (in fib_table.h) is the largest n for which L[n] =
2047   F[n] + 2*F[n-1] fits in a limb.  */
2048
2049__GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[];
2050#define FIB_TABLE(n)  (__gmp_fib_table[(n)+1])
2051
2052extern const mp_limb_t __gmp_oddfac_table[];
2053extern const mp_limb_t __gmp_odd2fac_table[];
2054extern const unsigned char __gmp_fac2cnt_table[];
2055extern const mp_limb_t __gmp_limbroots_table[];
2056
2057/* n^log <= GMP_NUMB_MAX, a limb can store log factors less than n */
2058static inline unsigned
2059log_n_max (mp_limb_t n)
2060{
2061  unsigned log;
2062  for (log = 8; n > __gmp_limbroots_table[log - 1]; log--);
2063  return log;
2064}
2065
2066#define SIEVESIZE 512		/* FIXME: Allow gmp_init_primesieve to choose */
2067typedef struct
2068{
2069  unsigned long d;		   /* current index in s[] */
2070  unsigned long s0;		   /* number corresponding to s[0] */
2071  unsigned long sqrt_s0;	   /* misnomer for sqrt(s[SIEVESIZE-1]) */
2072  unsigned char s[SIEVESIZE + 1];  /* sieve table */
2073} gmp_primesieve_t;
2074
2075#define gmp_init_primesieve __gmp_init_primesieve
2076__GMP_DECLSPEC void gmp_init_primesieve (gmp_primesieve_t *);
2077
2078#define gmp_nextprime __gmp_nextprime
2079__GMP_DECLSPEC unsigned long int gmp_nextprime (gmp_primesieve_t *);
2080
2081#define gmp_primesieve __gmp_primesieve
2082__GMP_DECLSPEC mp_limb_t gmp_primesieve (mp_ptr, mp_limb_t);
2083
2084
2085#ifndef MUL_TOOM22_THRESHOLD
2086#define MUL_TOOM22_THRESHOLD             30
2087#endif
2088
2089#ifndef MUL_TOOM33_THRESHOLD
2090#define MUL_TOOM33_THRESHOLD            100
2091#endif
2092
2093#ifndef MUL_TOOM44_THRESHOLD
2094#define MUL_TOOM44_THRESHOLD            300
2095#endif
2096
2097#ifndef MUL_TOOM6H_THRESHOLD
2098#define MUL_TOOM6H_THRESHOLD            350
2099#endif
2100
2101#ifndef SQR_TOOM6_THRESHOLD
2102#define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD
2103#endif
2104
2105#ifndef MUL_TOOM8H_THRESHOLD
2106#define MUL_TOOM8H_THRESHOLD            450
2107#endif
2108
2109#ifndef SQR_TOOM8_THRESHOLD
2110#define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD
2111#endif
2112
2113#ifndef MUL_TOOM32_TO_TOOM43_THRESHOLD
2114#define MUL_TOOM32_TO_TOOM43_THRESHOLD  100
2115#endif
2116
2117#ifndef MUL_TOOM32_TO_TOOM53_THRESHOLD
2118#define MUL_TOOM32_TO_TOOM53_THRESHOLD  110
2119#endif
2120
2121#ifndef MUL_TOOM42_TO_TOOM53_THRESHOLD
2122#define MUL_TOOM42_TO_TOOM53_THRESHOLD  100
2123#endif
2124
2125#ifndef MUL_TOOM42_TO_TOOM63_THRESHOLD
2126#define MUL_TOOM42_TO_TOOM63_THRESHOLD  110
2127#endif
2128
2129#ifndef MUL_TOOM43_TO_TOOM54_THRESHOLD
2130#define MUL_TOOM43_TO_TOOM54_THRESHOLD  150
2131#endif
2132
2133/* MUL_TOOM22_THRESHOLD_LIMIT is the maximum for MUL_TOOM22_THRESHOLD.  In a
2134   normal build MUL_TOOM22_THRESHOLD is a constant and we use that.  In a fat
2135   binary or tune program build MUL_TOOM22_THRESHOLD is a variable and a
2136   separate hard limit will have been defined.  Similarly for TOOM3.  */
2137#ifndef MUL_TOOM22_THRESHOLD_LIMIT
2138#define MUL_TOOM22_THRESHOLD_LIMIT  MUL_TOOM22_THRESHOLD
2139#endif
2140#ifndef MUL_TOOM33_THRESHOLD_LIMIT
2141#define MUL_TOOM33_THRESHOLD_LIMIT  MUL_TOOM33_THRESHOLD
2142#endif
2143#ifndef MULLO_BASECASE_THRESHOLD_LIMIT
2144#define MULLO_BASECASE_THRESHOLD_LIMIT  MULLO_BASECASE_THRESHOLD
2145#endif
2146#ifndef SQRLO_BASECASE_THRESHOLD_LIMIT
2147#define SQRLO_BASECASE_THRESHOLD_LIMIT  SQRLO_BASECASE_THRESHOLD
2148#endif
2149#ifndef SQRLO_DC_THRESHOLD_LIMIT
2150#define SQRLO_DC_THRESHOLD_LIMIT  SQRLO_DC_THRESHOLD
2151#endif
2152
2153/* SQR_BASECASE_THRESHOLD is where mpn_sqr_basecase should take over from
2154   mpn_mul_basecase.  Default is to use mpn_sqr_basecase from 0.  (Note that we
2155   certainly always want it if there's a native assembler mpn_sqr_basecase.)
2156
2157   If it turns out that mpn_toom2_sqr becomes faster than mpn_mul_basecase
2158   before mpn_sqr_basecase does, then SQR_BASECASE_THRESHOLD is the toom2
2159   threshold and SQR_TOOM2_THRESHOLD is 0.  This oddity arises more or less
2160   because SQR_TOOM2_THRESHOLD represents the size up to which mpn_sqr_basecase
2161   should be used, and that may be never.  */
2162
2163#ifndef SQR_BASECASE_THRESHOLD
2164#define SQR_BASECASE_THRESHOLD            0  /* never use mpn_mul_basecase */
2165#endif
2166
2167#ifndef SQR_TOOM2_THRESHOLD
2168#define SQR_TOOM2_THRESHOLD              50
2169#endif
2170
2171#ifndef SQR_TOOM3_THRESHOLD
2172#define SQR_TOOM3_THRESHOLD             120
2173#endif
2174
2175#ifndef SQR_TOOM4_THRESHOLD
2176#define SQR_TOOM4_THRESHOLD             400
2177#endif
2178
2179/* See comments above about MUL_TOOM33_THRESHOLD_LIMIT.  */
2180#ifndef SQR_TOOM3_THRESHOLD_LIMIT
2181#define SQR_TOOM3_THRESHOLD_LIMIT  SQR_TOOM3_THRESHOLD
2182#endif
2183
2184#ifndef MULMID_TOOM42_THRESHOLD
2185#define MULMID_TOOM42_THRESHOLD     MUL_TOOM22_THRESHOLD
2186#endif
2187
2188#ifndef MULLO_BASECASE_THRESHOLD
2189#define MULLO_BASECASE_THRESHOLD          0  /* never use mpn_mul_basecase */
2190#endif
2191
2192#ifndef MULLO_DC_THRESHOLD
2193#define MULLO_DC_THRESHOLD         (2*MUL_TOOM22_THRESHOLD)
2194#endif
2195
2196#ifndef MULLO_MUL_N_THRESHOLD
2197#define MULLO_MUL_N_THRESHOLD      (2*MUL_FFT_THRESHOLD)
2198#endif
2199
2200#ifndef SQRLO_BASECASE_THRESHOLD
2201#define SQRLO_BASECASE_THRESHOLD          0  /* never use mpn_sqr_basecase */
2202#endif
2203
2204#ifndef SQRLO_DC_THRESHOLD
2205#define SQRLO_DC_THRESHOLD         (MULLO_DC_THRESHOLD)
2206#endif
2207
2208#ifndef SQRLO_SQR_THRESHOLD
2209#define SQRLO_SQR_THRESHOLD        (MULLO_MUL_N_THRESHOLD)
2210#endif
2211
2212#ifndef DC_DIV_QR_THRESHOLD
2213#define DC_DIV_QR_THRESHOLD        (2*MUL_TOOM22_THRESHOLD)
2214#endif
2215
2216#ifndef DC_DIVAPPR_Q_THRESHOLD
2217#define DC_DIVAPPR_Q_THRESHOLD          200
2218#endif
2219
2220#ifndef DC_BDIV_QR_THRESHOLD
2221#define DC_BDIV_QR_THRESHOLD       (2*MUL_TOOM22_THRESHOLD)
2222#endif
2223
2224#ifndef DC_BDIV_Q_THRESHOLD
2225#define DC_BDIV_Q_THRESHOLD             180
2226#endif
2227
2228#ifndef DIVEXACT_JEB_THRESHOLD
2229#define DIVEXACT_JEB_THRESHOLD           25
2230#endif
2231
2232#ifndef INV_MULMOD_BNM1_THRESHOLD
2233#define INV_MULMOD_BNM1_THRESHOLD  (4*MULMOD_BNM1_THRESHOLD)
2234#endif
2235
2236#ifndef INV_APPR_THRESHOLD
2237#define INV_APPR_THRESHOLD         INV_NEWTON_THRESHOLD
2238#endif
2239
2240#ifndef INV_NEWTON_THRESHOLD
2241#define INV_NEWTON_THRESHOLD            200
2242#endif
2243
2244#ifndef BINV_NEWTON_THRESHOLD
2245#define BINV_NEWTON_THRESHOLD           300
2246#endif
2247
2248#ifndef MU_DIVAPPR_Q_THRESHOLD
2249#define MU_DIVAPPR_Q_THRESHOLD         2000
2250#endif
2251
2252#ifndef MU_DIV_QR_THRESHOLD
2253#define MU_DIV_QR_THRESHOLD            2000
2254#endif
2255
2256#ifndef MUPI_DIV_QR_THRESHOLD
2257#define MUPI_DIV_QR_THRESHOLD           200
2258#endif
2259
2260#ifndef MU_BDIV_Q_THRESHOLD
2261#define MU_BDIV_Q_THRESHOLD            2000
2262#endif
2263
2264#ifndef MU_BDIV_QR_THRESHOLD
2265#define MU_BDIV_QR_THRESHOLD           2000
2266#endif
2267
2268#ifndef MULMOD_BNM1_THRESHOLD
2269#define MULMOD_BNM1_THRESHOLD            16
2270#endif
2271
2272#ifndef SQRMOD_BNM1_THRESHOLD
2273#define SQRMOD_BNM1_THRESHOLD            16
2274#endif
2275
2276#ifndef MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD
2277#define MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD  (INV_MULMOD_BNM1_THRESHOLD/2)
2278#endif
2279
2280#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
2281
2282#ifndef REDC_1_TO_REDC_2_THRESHOLD
2283#define REDC_1_TO_REDC_2_THRESHOLD       15
2284#endif
2285#ifndef REDC_2_TO_REDC_N_THRESHOLD
2286#define REDC_2_TO_REDC_N_THRESHOLD      100
2287#endif
2288
2289#else
2290
2291#ifndef REDC_1_TO_REDC_N_THRESHOLD
2292#define REDC_1_TO_REDC_N_THRESHOLD      100
2293#endif
2294
2295#endif /* HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2 */
2296
2297
2298/* First k to use for an FFT modF multiply.  A modF FFT is an order
2299   log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba,
2300   whereas k=4 is 1.33 which is faster than toom3 at 1.485.    */
2301#define FFT_FIRST_K  4
2302
2303/* Threshold at which FFT should be used to do a modF NxN -> N multiply. */
2304#ifndef MUL_FFT_MODF_THRESHOLD
2305#define MUL_FFT_MODF_THRESHOLD   (MUL_TOOM33_THRESHOLD * 3)
2306#endif
2307#ifndef SQR_FFT_MODF_THRESHOLD
2308#define SQR_FFT_MODF_THRESHOLD   (SQR_TOOM3_THRESHOLD * 3)
2309#endif
2310
2311/* Threshold at which FFT should be used to do an NxN -> 2N multiply.  This
2312   will be a size where FFT is using k=7 or k=8, since an FFT-k used for an
2313   NxN->2N multiply and not recursing into itself is an order
2314   log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which
2315   is the first better than toom3.  */
2316#ifndef MUL_FFT_THRESHOLD
2317#define MUL_FFT_THRESHOLD   (MUL_FFT_MODF_THRESHOLD * 10)
2318#endif
2319#ifndef SQR_FFT_THRESHOLD
2320#define SQR_FFT_THRESHOLD   (SQR_FFT_MODF_THRESHOLD * 10)
2321#endif
2322
2323/* Table of thresholds for successive modF FFT "k"s.  The first entry is
2324   where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2,
2325   etc.  See mpn_fft_best_k(). */
2326#ifndef MUL_FFT_TABLE
2327#define MUL_FFT_TABLE							\
2328  { MUL_TOOM33_THRESHOLD * 4,   /* k=5 */				\
2329    MUL_TOOM33_THRESHOLD * 8,   /* k=6 */				\
2330    MUL_TOOM33_THRESHOLD * 16,  /* k=7 */				\
2331    MUL_TOOM33_THRESHOLD * 32,  /* k=8 */				\
2332    MUL_TOOM33_THRESHOLD * 96,  /* k=9 */				\
2333    MUL_TOOM33_THRESHOLD * 288, /* k=10 */				\
2334    0 }
2335#endif
2336#ifndef SQR_FFT_TABLE
2337#define SQR_FFT_TABLE							\
2338  { SQR_TOOM3_THRESHOLD * 4,   /* k=5 */				\
2339    SQR_TOOM3_THRESHOLD * 8,   /* k=6 */				\
2340    SQR_TOOM3_THRESHOLD * 16,  /* k=7 */				\
2341    SQR_TOOM3_THRESHOLD * 32,  /* k=8 */				\
2342    SQR_TOOM3_THRESHOLD * 96,  /* k=9 */				\
2343    SQR_TOOM3_THRESHOLD * 288, /* k=10 */				\
2344    0 }
2345#endif
2346
2347struct fft_table_nk
2348{
2349  gmp_uint_least32_t n:27;
2350  gmp_uint_least32_t k:5;
2351};
2352
2353#ifndef FFT_TABLE_ATTRS
2354#define FFT_TABLE_ATTRS   static const
2355#endif
2356
2357#define MPN_FFT_TABLE_SIZE  16
2358
2359
2360#ifndef DC_DIV_QR_THRESHOLD
2361#define DC_DIV_QR_THRESHOLD    (3 * MUL_TOOM22_THRESHOLD)
2362#endif
2363
2364#ifndef GET_STR_DC_THRESHOLD
2365#define GET_STR_DC_THRESHOLD             18
2366#endif
2367
2368#ifndef GET_STR_PRECOMPUTE_THRESHOLD
2369#define GET_STR_PRECOMPUTE_THRESHOLD     35
2370#endif
2371
2372#ifndef SET_STR_DC_THRESHOLD
2373#define SET_STR_DC_THRESHOLD            750
2374#endif
2375
2376#ifndef SET_STR_PRECOMPUTE_THRESHOLD
2377#define SET_STR_PRECOMPUTE_THRESHOLD   2000
2378#endif
2379
2380#ifndef FAC_ODD_THRESHOLD
2381#define FAC_ODD_THRESHOLD    35
2382#endif
2383
2384#ifndef FAC_DSC_THRESHOLD
2385#define FAC_DSC_THRESHOLD   400
2386#endif
2387
2388/* Return non-zero if xp,xsize and yp,ysize overlap.
2389   If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
2390   overlap.  If both these are false, there's an overlap. */
2391#define MPN_OVERLAP_P(xp, xsize, yp, ysize)				\
2392  ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
2393#define MEM_OVERLAP_P(xp, xsize, yp, ysize)				\
2394  (   (char *) (xp) + (xsize) > (char *) (yp)				\
2395   && (char *) (yp) + (ysize) > (char *) (xp))
2396
2397/* Return non-zero if xp,xsize and yp,ysize are either identical or not
2398   overlapping.  Return zero if they're partially overlapping. */
2399#define MPN_SAME_OR_SEPARATE_P(xp, yp, size)				\
2400  MPN_SAME_OR_SEPARATE2_P(xp, size, yp, size)
2401#define MPN_SAME_OR_SEPARATE2_P(xp, xsize, yp, ysize)			\
2402  ((xp) == (yp) || ! MPN_OVERLAP_P (xp, xsize, yp, ysize))
2403
2404/* Return non-zero if dst,dsize and src,ssize are either identical or
2405   overlapping in a way suitable for an incrementing/decrementing algorithm.
2406   Return zero if they're partially overlapping in an unsuitable fashion. */
2407#define MPN_SAME_OR_INCR2_P(dst, dsize, src, ssize)			\
2408  ((dst) <= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
2409#define MPN_SAME_OR_INCR_P(dst, src, size)				\
2410  MPN_SAME_OR_INCR2_P(dst, size, src, size)
2411#define MPN_SAME_OR_DECR2_P(dst, dsize, src, ssize)			\
2412  ((dst) >= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
2413#define MPN_SAME_OR_DECR_P(dst, src, size)				\
2414  MPN_SAME_OR_DECR2_P(dst, size, src, size)
2415
2416
2417/* ASSERT() is a private assertion checking scheme, similar to <assert.h>.
2418   ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS()
2419   does it always.  Generally assertions are meant for development, but
2420   might help when looking for a problem later too.  */
2421
2422#ifdef __LINE__
2423#define ASSERT_LINE  __LINE__
2424#else
2425#define ASSERT_LINE  -1
2426#endif
2427
2428#ifdef __FILE__
2429#define ASSERT_FILE  __FILE__
2430#else
2431#define ASSERT_FILE  ""
2432#endif
2433
2434__GMP_DECLSPEC void __gmp_assert_header (const char *, int);
2435__GMP_DECLSPEC void __gmp_assert_fail (const char *, int, const char *) ATTRIBUTE_NORETURN;
2436
2437#define ASSERT_FAIL(expr)  __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr)
2438
2439#define ASSERT_ALWAYS(expr)						\
2440  do {									\
2441    if (UNLIKELY (!(expr)))						\
2442      ASSERT_FAIL (expr);						\
2443  } while (0)
2444
2445#if WANT_ASSERT
2446#define ASSERT(expr)   ASSERT_ALWAYS (expr)
2447#else
2448#define ASSERT(expr)   do {} while (0)
2449#endif
2450
2451
2452/* ASSERT_CARRY checks the expression is non-zero, and ASSERT_NOCARRY checks
2453   that it's zero.  In both cases if assertion checking is disabled the
2454   expression is still evaluated.  These macros are meant for use with
2455   routines like mpn_add_n() where the return value represents a carry or
2456   whatever that should or shouldn't occur in some context.  For example,
2457   ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */
2458#if WANT_ASSERT
2459#define ASSERT_CARRY(expr)     ASSERT_ALWAYS ((expr) != 0)
2460#define ASSERT_NOCARRY(expr)   ASSERT_ALWAYS ((expr) == 0)
2461#else
2462#define ASSERT_CARRY(expr)     (expr)
2463#define ASSERT_NOCARRY(expr)   (expr)
2464#endif
2465
2466
2467/* ASSERT_CODE includes code when assertion checking is wanted.  This is the
2468   same as writing "#if WANT_ASSERT", but more compact.  */
2469#if WANT_ASSERT
2470#define ASSERT_CODE(expr)  expr
2471#else
2472#define ASSERT_CODE(expr)
2473#endif
2474
2475
2476/* Test that an mpq_t is in fully canonical form.  This can be used as
2477   protection on routines like mpq_equal which give wrong results on
2478   non-canonical inputs.  */
2479#if WANT_ASSERT
2480#define ASSERT_MPQ_CANONICAL(q)						\
2481  do {									\
2482    ASSERT (q->_mp_den._mp_size > 0);					\
2483    if (q->_mp_num._mp_size == 0)					\
2484      {									\
2485	/* zero should be 0/1 */					\
2486	ASSERT (mpz_cmp_ui (mpq_denref(q), 1L) == 0);			\
2487      }									\
2488    else								\
2489      {									\
2490	/* no common factors */						\
2491	mpz_t  __g;							\
2492	mpz_init (__g);							\
2493	mpz_gcd (__g, mpq_numref(q), mpq_denref(q));			\
2494	ASSERT (mpz_cmp_ui (__g, 1) == 0);				\
2495	mpz_clear (__g);						\
2496      }									\
2497  } while (0)
2498#else
2499#define ASSERT_MPQ_CANONICAL(q)	 do {} while (0)
2500#endif
2501
2502/* Check that the nail parts are zero. */
2503#define ASSERT_ALWAYS_LIMB(limb)					\
2504  do {									\
2505    mp_limb_t  __nail = (limb) & GMP_NAIL_MASK;				\
2506    ASSERT_ALWAYS (__nail == 0);					\
2507  } while (0)
2508#define ASSERT_ALWAYS_MPN(ptr, size)					\
2509  do {									\
2510    /* let whole loop go dead when no nails */				\
2511    if (GMP_NAIL_BITS != 0)						\
2512      {									\
2513	mp_size_t  __i;							\
2514	for (__i = 0; __i < (size); __i++)				\
2515	  ASSERT_ALWAYS_LIMB ((ptr)[__i]);				\
2516      }									\
2517  } while (0)
2518#if WANT_ASSERT
2519#define ASSERT_LIMB(limb)       ASSERT_ALWAYS_LIMB (limb)
2520#define ASSERT_MPN(ptr, size)   ASSERT_ALWAYS_MPN (ptr, size)
2521#else
2522#define ASSERT_LIMB(limb)       do {} while (0)
2523#define ASSERT_MPN(ptr, size)   do {} while (0)
2524#endif
2525
2526
2527/* Assert that an mpn region {ptr,size} is zero, or non-zero.
2528   size==0 is allowed, and in that case {ptr,size} considered to be zero.  */
2529#if WANT_ASSERT
2530#define ASSERT_MPN_ZERO_P(ptr,size)					\
2531  do {									\
2532    mp_size_t  __i;							\
2533    ASSERT ((size) >= 0);						\
2534    for (__i = 0; __i < (size); __i++)					\
2535      ASSERT ((ptr)[__i] == 0);						\
2536  } while (0)
2537#define ASSERT_MPN_NONZERO_P(ptr,size)					\
2538  do {									\
2539    mp_size_t  __i;							\
2540    int	       __nonzero = 0;						\
2541    ASSERT ((size) >= 0);						\
2542    for (__i = 0; __i < (size); __i++)					\
2543      if ((ptr)[__i] != 0)						\
2544	{								\
2545	  __nonzero = 1;						\
2546	  break;							\
2547	}								\
2548    ASSERT (__nonzero);							\
2549  } while (0)
2550#else
2551#define ASSERT_MPN_ZERO_P(ptr,size)     do {} while (0)
2552#define ASSERT_MPN_NONZERO_P(ptr,size)  do {} while (0)
2553#endif
2554
2555
2556#if ! HAVE_NATIVE_mpn_com
2557#undef mpn_com
2558#define mpn_com(d,s,n)							\
2559  do {									\
2560    mp_ptr     __d = (d);						\
2561    mp_srcptr  __s = (s);						\
2562    mp_size_t  __n = (n);						\
2563    ASSERT (__n >= 1);							\
2564    ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s, __n));			\
2565    do									\
2566      *__d++ = (~ *__s++) & GMP_NUMB_MASK;				\
2567    while (--__n);							\
2568  } while (0)
2569#endif
2570
2571#define MPN_LOGOPS_N_INLINE(rp, up, vp, n, operation)			\
2572  do {									\
2573    mp_srcptr	__up = (up);						\
2574    mp_srcptr	__vp = (vp);						\
2575    mp_ptr	__rp = (rp);						\
2576    mp_size_t	__n = (n);						\
2577    mp_limb_t __a, __b;							\
2578    ASSERT (__n > 0);							\
2579    ASSERT (MPN_SAME_OR_SEPARATE_P (__rp, __up, __n));			\
2580    ASSERT (MPN_SAME_OR_SEPARATE_P (__rp, __vp, __n));			\
2581    __up += __n;							\
2582    __vp += __n;							\
2583    __rp += __n;							\
2584    __n = -__n;								\
2585    do {								\
2586      __a = __up[__n];							\
2587      __b = __vp[__n];							\
2588      __rp[__n] = operation;						\
2589    } while (++__n);							\
2590  } while (0)
2591
2592
2593#if ! HAVE_NATIVE_mpn_and_n
2594#undef mpn_and_n
2595#define mpn_and_n(rp, up, vp, n) \
2596  MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a & __b)
2597#endif
2598
2599#if ! HAVE_NATIVE_mpn_andn_n
2600#undef mpn_andn_n
2601#define mpn_andn_n(rp, up, vp, n) \
2602  MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a & ~__b)
2603#endif
2604
2605#if ! HAVE_NATIVE_mpn_nand_n
2606#undef mpn_nand_n
2607#define mpn_nand_n(rp, up, vp, n) \
2608  MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a & __b) & GMP_NUMB_MASK)
2609#endif
2610
2611#if ! HAVE_NATIVE_mpn_ior_n
2612#undef mpn_ior_n
2613#define mpn_ior_n(rp, up, vp, n) \
2614  MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a | __b)
2615#endif
2616
2617#if ! HAVE_NATIVE_mpn_iorn_n
2618#undef mpn_iorn_n
2619#define mpn_iorn_n(rp, up, vp, n) \
2620  MPN_LOGOPS_N_INLINE (rp, up, vp, n, (__a | ~__b) & GMP_NUMB_MASK)
2621#endif
2622
2623#if ! HAVE_NATIVE_mpn_nior_n
2624#undef mpn_nior_n
2625#define mpn_nior_n(rp, up, vp, n) \
2626  MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a | __b) & GMP_NUMB_MASK)
2627#endif
2628
2629#if ! HAVE_NATIVE_mpn_xor_n
2630#undef mpn_xor_n
2631#define mpn_xor_n(rp, up, vp, n) \
2632  MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a ^ __b)
2633#endif
2634
2635#if ! HAVE_NATIVE_mpn_xnor_n
2636#undef mpn_xnor_n
2637#define mpn_xnor_n(rp, up, vp, n) \
2638  MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a ^ __b) & GMP_NUMB_MASK)
2639#endif
2640
2641#define mpn_trialdiv __MPN(trialdiv)
2642__GMP_DECLSPEC mp_limb_t mpn_trialdiv (mp_srcptr, mp_size_t, mp_size_t, int *);
2643
2644#define mpn_remove __MPN(remove)
2645__GMP_DECLSPEC mp_bitcnt_t mpn_remove (mp_ptr, mp_size_t *, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_bitcnt_t);
2646
2647
2648/* ADDC_LIMB sets w=x+y and cout to 0 or 1 for a carry from that addition. */
2649#if GMP_NAIL_BITS == 0
2650#define ADDC_LIMB(cout, w, x, y)					\
2651  do {									\
2652    mp_limb_t  __x = (x);						\
2653    mp_limb_t  __y = (y);						\
2654    mp_limb_t  __w = __x + __y;						\
2655    (w) = __w;								\
2656    (cout) = __w < __x;							\
2657  } while (0)
2658#else
2659#define ADDC_LIMB(cout, w, x, y)					\
2660  do {									\
2661    mp_limb_t  __w;							\
2662    ASSERT_LIMB (x);							\
2663    ASSERT_LIMB (y);							\
2664    __w = (x) + (y);							\
2665    (w) = __w & GMP_NUMB_MASK;						\
2666    (cout) = __w >> GMP_NUMB_BITS;					\
2667  } while (0)
2668#endif
2669
2670/* SUBC_LIMB sets w=x-y and cout to 0 or 1 for a borrow from that
2671   subtract.  */
2672#if GMP_NAIL_BITS == 0
2673#define SUBC_LIMB(cout, w, x, y)					\
2674  do {									\
2675    mp_limb_t  __x = (x);						\
2676    mp_limb_t  __y = (y);						\
2677    mp_limb_t  __w = __x - __y;						\
2678    (w) = __w;								\
2679    (cout) = __w > __x;							\
2680  } while (0)
2681#else
2682#define SUBC_LIMB(cout, w, x, y)					\
2683  do {									\
2684    mp_limb_t  __w = (x) - (y);						\
2685    (w) = __w & GMP_NUMB_MASK;						\
2686    (cout) = __w >> (GMP_LIMB_BITS-1);					\
2687  } while (0)
2688#endif
2689
2690
2691/* MPN_INCR_U does {ptr,size} += n, MPN_DECR_U does {ptr,size} -= n, both
2692   expecting no carry (or borrow) from that.
2693
2694   The size parameter is only for the benefit of assertion checking.  In a
2695   normal build it's unused and the carry/borrow is just propagated as far
2696   as it needs to go.
2697
2698   On random data, usually only one or two limbs of {ptr,size} get updated,
2699   so there's no need for any sophisticated looping, just something compact
2700   and sensible.
2701
2702   FIXME: Switch all code from mpn_{incr,decr}_u to MPN_{INCR,DECR}_U,
2703   declaring their operand sizes, then remove the former.  This is purely
2704   for the benefit of assertion checking.  */
2705
2706#if defined (__GNUC__) && GMP_NAIL_BITS == 0 && ! defined (NO_ASM)	\
2707  && (defined(HAVE_HOST_CPU_FAMILY_x86) || defined(HAVE_HOST_CPU_FAMILY_x86_64)) \
2708  && ! WANT_ASSERT
2709/* Better flags handling than the generic C gives on i386, saving a few
2710   bytes of code and maybe a cycle or two.  */
2711
2712#define MPN_IORD_U(ptr, incr, aors)					\
2713  do {									\
2714    mp_ptr  __ptr_dummy;						\
2715    if (__builtin_constant_p (incr) && (incr) == 0)			\
2716      {									\
2717      }									\
2718    else if (__builtin_constant_p (incr) && (incr) == 1)		\
2719      {									\
2720	__asm__ __volatile__						\
2721	  ("\n" ASM_L(top) ":\n"					\
2722	   "\t" aors "\t$1, (%0)\n"					\
2723	   "\tlea\t%c2(%0), %0\n"					\
2724	   "\tjc\t" ASM_L(top)						\
2725	   : "=r" (__ptr_dummy)						\
2726	   : "0"  (ptr), "n" (sizeof(mp_limb_t))			\
2727	   : "memory");							\
2728      }									\
2729    else								\
2730      {									\
2731	__asm__ __volatile__						\
2732	  (   aors  "\t%2, (%0)\n"					\
2733	   "\tjnc\t" ASM_L(done) "\n"					\
2734	   ASM_L(top) ":\n"						\
2735	   "\t" aors "\t$1, %c3(%0)\n"					\
2736	   "\tlea\t%c3(%0), %0\n"					\
2737	   "\tjc\t" ASM_L(top) "\n"					\
2738	   ASM_L(done) ":\n"						\
2739	   : "=r" (__ptr_dummy)						\
2740	   : "0"  (ptr),						\
2741	     "re" ((mp_limb_t) (incr)), "n" (sizeof(mp_limb_t))		\
2742	   : "memory");							\
2743      }									\
2744  } while (0)
2745
2746#if GMP_LIMB_BITS == 32
2747#define MPN_INCR_U(ptr, size, incr)  MPN_IORD_U (ptr, incr, "addl")
2748#define MPN_DECR_U(ptr, size, incr)  MPN_IORD_U (ptr, incr, "subl")
2749#endif
2750#if GMP_LIMB_BITS == 64
2751#define MPN_INCR_U(ptr, size, incr)  MPN_IORD_U (ptr, incr, "addq")
2752#define MPN_DECR_U(ptr, size, incr)  MPN_IORD_U (ptr, incr, "subq")
2753#endif
2754#define mpn_incr_u(ptr, incr)  MPN_INCR_U (ptr, 0, incr)
2755#define mpn_decr_u(ptr, incr)  MPN_DECR_U (ptr, 0, incr)
2756#endif
2757
2758#if GMP_NAIL_BITS == 0
2759#ifndef mpn_incr_u
2760#define mpn_incr_u(p,incr)						\
2761  do {									\
2762    mp_limb_t __x;							\
2763    mp_ptr __p = (p);							\
2764    if (__builtin_constant_p (incr) && (incr) == 1)			\
2765      {									\
2766	while (++(*(__p++)) == 0)					\
2767	  ;								\
2768      }									\
2769    else								\
2770      {									\
2771	__x = *__p + (incr);						\
2772	*__p = __x;							\
2773	if (__x < (incr))						\
2774	  while (++(*(++__p)) == 0)					\
2775	    ;								\
2776      }									\
2777  } while (0)
2778#endif
2779#ifndef mpn_decr_u
2780#define mpn_decr_u(p,incr)						\
2781  do {									\
2782    mp_limb_t __x;							\
2783    mp_ptr __p = (p);							\
2784    if (__builtin_constant_p (incr) && (incr) == 1)			\
2785      {									\
2786	while ((*(__p++))-- == 0)					\
2787	  ;								\
2788      }									\
2789    else								\
2790      {									\
2791	__x = *__p;							\
2792	*__p = __x - (incr);						\
2793	if (__x < (incr))						\
2794	  while ((*(++__p))-- == 0)					\
2795	    ;								\
2796      }									\
2797  } while (0)
2798#endif
2799#endif
2800
2801#if GMP_NAIL_BITS >= 1
2802#ifndef mpn_incr_u
2803#define mpn_incr_u(p,incr)						\
2804  do {									\
2805    mp_limb_t __x;							\
2806    mp_ptr __p = (p);							\
2807    if (__builtin_constant_p (incr) && (incr) == 1)			\
2808      {									\
2809	do								\
2810	  {								\
2811	    __x = (*__p + 1) & GMP_NUMB_MASK;				\
2812	    *__p++ = __x;						\
2813	  }								\
2814	while (__x == 0);						\
2815      }									\
2816    else								\
2817      {									\
2818	__x = (*__p + (incr));						\
2819	*__p++ = __x & GMP_NUMB_MASK;					\
2820	if (__x >> GMP_NUMB_BITS != 0)					\
2821	  {								\
2822	    do								\
2823	      {								\
2824		__x = (*__p + 1) & GMP_NUMB_MASK;			\
2825		*__p++ = __x;						\
2826	      }								\
2827	    while (__x == 0);						\
2828	  }								\
2829      }									\
2830  } while (0)
2831#endif
2832#ifndef mpn_decr_u
2833#define mpn_decr_u(p,incr)						\
2834  do {									\
2835    mp_limb_t __x;							\
2836    mp_ptr __p = (p);							\
2837    if (__builtin_constant_p (incr) && (incr) == 1)			\
2838      {									\
2839	do								\
2840	  {								\
2841	    __x = *__p;							\
2842	    *__p++ = (__x - 1) & GMP_NUMB_MASK;				\
2843	  }								\
2844	while (__x == 0);						\
2845      }									\
2846    else								\
2847      {									\
2848	__x = *__p - (incr);						\
2849	*__p++ = __x & GMP_NUMB_MASK;					\
2850	if (__x >> GMP_NUMB_BITS != 0)					\
2851	  {								\
2852	    do								\
2853	      {								\
2854		__x = *__p;						\
2855		*__p++ = (__x - 1) & GMP_NUMB_MASK;			\
2856	      }								\
2857	    while (__x == 0);						\
2858	  }								\
2859      }									\
2860  } while (0)
2861#endif
2862#endif
2863
2864#ifndef MPN_INCR_U
2865#if WANT_ASSERT
2866#define MPN_INCR_U(ptr, size, n)					\
2867  do {									\
2868    ASSERT ((size) >= 1);						\
2869    ASSERT_NOCARRY (mpn_add_1 (ptr, ptr, size, n));			\
2870  } while (0)
2871#else
2872#define MPN_INCR_U(ptr, size, n)   mpn_incr_u (ptr, n)
2873#endif
2874#endif
2875
2876#ifndef MPN_DECR_U
2877#if WANT_ASSERT
2878#define MPN_DECR_U(ptr, size, n)					\
2879  do {									\
2880    ASSERT ((size) >= 1);						\
2881    ASSERT_NOCARRY (mpn_sub_1 (ptr, ptr, size, n));			\
2882  } while (0)
2883#else
2884#define MPN_DECR_U(ptr, size, n)   mpn_decr_u (ptr, n)
2885#endif
2886#endif
2887
2888
2889/* Structure for conversion between internal binary format and strings.  */
2890struct bases
2891{
2892  /* Number of digits in the conversion base that always fits in an mp_limb_t.
2893     For example, for base 10 on a machine where an mp_limb_t has 32 bits this
2894     is 9, since 10**9 is the largest number that fits into an mp_limb_t.  */
2895  int chars_per_limb;
2896
2897  /* log(2)/log(conversion_base) */
2898  mp_limb_t logb2;
2899
2900  /* log(conversion_base)/log(2) */
2901  mp_limb_t log2b;
2902
2903  /* base**chars_per_limb, i.e. the biggest number that fits a word, built by
2904     factors of base.  Exception: For 2, 4, 8, etc, big_base is log2(base),
2905     i.e. the number of bits used to represent each digit in the base.  */
2906  mp_limb_t big_base;
2907
2908  /* A GMP_LIMB_BITS bit approximation to 1/big_base, represented as a
2909     fixed-point number.  Instead of dividing by big_base an application can
2910     choose to multiply by big_base_inverted.  */
2911  mp_limb_t big_base_inverted;
2912};
2913
2914#define   mp_bases __MPN(bases)
2915__GMP_DECLSPEC extern const struct bases mp_bases[257];
2916
2917
2918/* Compute the number of digits in base for nbits bits, making sure the result
2919   is never too small.  The two variants of the macro implement the same
2920   function; the GT2 variant below works just for bases > 2.  */
2921#define DIGITS_IN_BASE_FROM_BITS(res, nbits, b)				\
2922  do {									\
2923    mp_limb_t _ph, _dummy;						\
2924    size_t _nbits = (nbits);						\
2925    umul_ppmm (_ph, _dummy, mp_bases[b].logb2, _nbits);			\
2926    _ph += (_dummy + _nbits < _dummy);					\
2927    res = _ph + 1;							\
2928  } while (0)
2929#define DIGITS_IN_BASEGT2_FROM_BITS(res, nbits, b)			\
2930  do {									\
2931    mp_limb_t _ph, _dummy;						\
2932    size_t _nbits = (nbits);						\
2933    umul_ppmm (_ph, _dummy, mp_bases[b].logb2 + 1, _nbits);		\
2934    res = _ph + 1;							\
2935  } while (0)
2936
2937/* For power of 2 bases this is exact.  For other bases the result is either
2938   exact or one too big.
2939
2940   To be exact always it'd be necessary to examine all the limbs of the
2941   operand, since numbers like 100..000 and 99...999 generally differ only
2942   in the lowest limb.  It'd be possible to examine just a couple of high
2943   limbs to increase the probability of being exact, but that doesn't seem
2944   worth bothering with.  */
2945
2946#define MPN_SIZEINBASE(result, ptr, size, base)				\
2947  do {									\
2948    int	   __lb_base, __cnt;						\
2949    size_t __totbits;							\
2950									\
2951    ASSERT ((size) >= 0);						\
2952    ASSERT ((base) >= 2);						\
2953    ASSERT ((base) < numberof (mp_bases));				\
2954									\
2955    /* Special case for X == 0.  */					\
2956    if ((size) == 0)							\
2957      (result) = 1;							\
2958    else								\
2959      {									\
2960	/* Calculate the total number of significant bits of X.  */	\
2961	count_leading_zeros (__cnt, (ptr)[(size)-1]);			\
2962	__totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\
2963									\
2964	if (POW2_P (base))						\
2965	  {								\
2966	    __lb_base = mp_bases[base].big_base;			\
2967	    (result) = (__totbits + __lb_base - 1) / __lb_base;		\
2968	  }								\
2969	else								\
2970	  {								\
2971	    DIGITS_IN_BASEGT2_FROM_BITS (result, __totbits, base);	\
2972	  }								\
2973      }									\
2974  } while (0)
2975
2976#define MPN_SIZEINBASE_2EXP(result, ptr, size, base2exp)			\
2977  do {										\
2978    int          __cnt;								\
2979    mp_bitcnt_t  __totbits;							\
2980    ASSERT ((size) > 0);							\
2981    ASSERT ((ptr)[(size)-1] != 0);						\
2982    count_leading_zeros (__cnt, (ptr)[(size)-1]);				\
2983    __totbits = (mp_bitcnt_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);	\
2984    (result) = (__totbits + (base2exp)-1) / (base2exp);				\
2985  } while (0)
2986
2987
2988/* bit count to limb count, rounding up */
2989#define BITS_TO_LIMBS(n)  (((n) + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS)
2990
2991/* MPN_SET_UI sets an mpn (ptr, cnt) to given ui.  MPZ_FAKE_UI creates fake
2992   mpz_t from ui.  The zp argument must have room for LIMBS_PER_ULONG limbs
2993   in both cases (LIMBS_PER_ULONG is also defined here.) */
2994#if BITS_PER_ULONG <= GMP_NUMB_BITS /* need one limb per ulong */
2995
2996#define LIMBS_PER_ULONG 1
2997#define MPN_SET_UI(zp, zn, u)						\
2998  (zp)[0] = (u);							\
2999  (zn) = ((zp)[0] != 0);
3000#define MPZ_FAKE_UI(z, zp, u)						\
3001  (zp)[0] = (u);							\
3002  PTR (z) = (zp);							\
3003  SIZ (z) = ((zp)[0] != 0);						\
3004  ASSERT_CODE (ALLOC (z) = 1);
3005
3006#else /* need two limbs per ulong */
3007
3008#define LIMBS_PER_ULONG 2
3009#define MPN_SET_UI(zp, zn, u)						\
3010  (zp)[0] = (u) & GMP_NUMB_MASK;					\
3011  (zp)[1] = (u) >> GMP_NUMB_BITS;					\
3012  (zn) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0);
3013#define MPZ_FAKE_UI(z, zp, u)						\
3014  (zp)[0] = (u) & GMP_NUMB_MASK;					\
3015  (zp)[1] = (u) >> GMP_NUMB_BITS;					\
3016  SIZ (z) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0);			\
3017  PTR (z) = (zp);							\
3018  ASSERT_CODE (ALLOC (z) = 2);
3019
3020#endif
3021
3022
3023#if HAVE_HOST_CPU_FAMILY_x86
3024#define TARGET_REGISTER_STARVED 1
3025#else
3026#define TARGET_REGISTER_STARVED 0
3027#endif
3028
3029
3030/* LIMB_HIGHBIT_TO_MASK(n) examines the high bit of a limb value and turns 1
3031   or 0 there into a limb 0xFF..FF or 0 respectively.
3032
3033   On most CPUs this is just an arithmetic right shift by GMP_LIMB_BITS-1,
3034   but C99 doesn't guarantee signed right shifts are arithmetic, so we have
3035   a little compile-time test and a fallback to a "? :" form.  The latter is
3036   necessary for instance on Cray vector systems.
3037
3038   Recent versions of gcc (eg. 3.3) will in fact optimize a "? :" like this
3039   to an arithmetic right shift anyway, but it's good to get the desired
3040   shift on past versions too (in particular since an important use of
3041   LIMB_HIGHBIT_TO_MASK is in udiv_qrnnd_preinv).  */
3042
3043#define LIMB_HIGHBIT_TO_MASK(n)						\
3044  (((mp_limb_signed_t) -1 >> 1) < 0					\
3045   ? (mp_limb_signed_t) (n) >> (GMP_LIMB_BITS - 1)			\
3046   : (n) & GMP_LIMB_HIGHBIT ? MP_LIMB_T_MAX : CNST_LIMB(0))
3047
3048
3049/* Use a library function for invert_limb, if available. */
3050#define  mpn_invert_limb __MPN(invert_limb)
3051__GMP_DECLSPEC mp_limb_t mpn_invert_limb (mp_limb_t) ATTRIBUTE_CONST;
3052#if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb
3053#define invert_limb(invxl,xl)						\
3054  do {									\
3055    (invxl) = mpn_invert_limb (xl);					\
3056  } while (0)
3057#endif
3058
3059#ifndef invert_limb
3060#define invert_limb(invxl,xl)						\
3061  do {									\
3062    mp_limb_t _dummy;							\
3063    ASSERT ((xl) != 0);							\
3064    udiv_qrnnd (invxl, _dummy, ~(xl), ~CNST_LIMB(0), xl);		\
3065  } while (0)
3066#endif
3067
3068#define invert_pi1(dinv, d1, d0)					\
3069  do {									\
3070    mp_limb_t _v, _p, _t1, _t0, _mask;					\
3071    invert_limb (_v, d1);						\
3072    _p = (d1) * _v;							\
3073    _p += (d0);								\
3074    if (_p < (d0))							\
3075      {									\
3076	_v--;								\
3077	_mask = -(mp_limb_t) (_p >= (d1));				\
3078	_p -= (d1);							\
3079	_v += _mask;							\
3080	_p -= _mask & (d1);						\
3081      }									\
3082    umul_ppmm (_t1, _t0, d0, _v);					\
3083    _p += _t1;								\
3084    if (_p < _t1)							\
3085      {									\
3086	_v--;								\
3087	if (UNLIKELY (_p >= (d1)))					\
3088	  {								\
3089	    if (_p > (d1) || _t0 >= (d0))				\
3090	      _v--;							\
3091	  }								\
3092      }									\
3093    (dinv).inv32 = _v;							\
3094  } while (0)
3095
3096
3097/* udiv_qrnnd_preinv -- Based on work by Niels M��ller and Torbj��rn Granlund.
3098   We write things strangely below, to help gcc.  A more straightforward
3099   version:
3100	_r = (nl) - _qh * (d);
3101	_t = _r + (d);
3102	if (_r >= _ql)
3103	  {
3104	    _qh--;
3105	    _r = _t;
3106	  }
3107   For one operation shorter critical path, one may want to use this form:
3108	_p = _qh * (d)
3109	_s = (nl) + (d);
3110	_r = (nl) - _p;
3111	_t = _s - _p;
3112	if (_r >= _ql)
3113	  {
3114	    _qh--;
3115	    _r = _t;
3116	  }
3117*/
3118#define udiv_qrnnd_preinv(q, r, nh, nl, d, di)				\
3119  do {									\
3120    mp_limb_t _qh, _ql, _r, _mask;					\
3121    umul_ppmm (_qh, _ql, (nh), (di));					\
3122    if (__builtin_constant_p (nl) && (nl) == 0)				\
3123      {									\
3124	_qh += (nh) + 1;						\
3125	_r = - _qh * (d);						\
3126	_mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */	\
3127	_qh += _mask;							\
3128	_r += _mask & (d);						\
3129      }									\
3130    else								\
3131      {									\
3132	add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl));		\
3133	_r = (nl) - _qh * (d);						\
3134	_mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */	\
3135	_qh += _mask;							\
3136	_r += _mask & (d);						\
3137	if (UNLIKELY (_r >= (d)))					\
3138	  {								\
3139	    _r -= (d);							\
3140	    _qh++;							\
3141	  }								\
3142      }									\
3143    (r) = _r;								\
3144    (q) = _qh;								\
3145  } while (0)
3146
3147/* Dividing (NH, NL) by D, returning the remainder only. Unlike
3148   udiv_qrnnd_preinv, works also for the case NH == D, where the
3149   quotient doesn't quite fit in a single limb. */
3150#define udiv_rnnd_preinv(r, nh, nl, d, di)				\
3151  do {									\
3152    mp_limb_t _qh, _ql, _r, _mask;					\
3153    umul_ppmm (_qh, _ql, (nh), (di));					\
3154    if (__builtin_constant_p (nl) && (nl) == 0)				\
3155      {									\
3156	_r = ~(_qh + (nh)) * (d);					\
3157	_mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */	\
3158	_r += _mask & (d);						\
3159      }									\
3160    else								\
3161      {									\
3162	add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl));		\
3163	_r = (nl) - _qh * (d);						\
3164	_mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */	\
3165	_r += _mask & (d);						\
3166	if (UNLIKELY (_r >= (d)))					\
3167	  _r -= (d);							\
3168      }									\
3169    (r) = _r;								\
3170  } while (0)
3171
3172/* Compute quotient the quotient and remainder for n / d. Requires d
3173   >= B^2 / 2 and n < d B. di is the inverse
3174
3175     floor ((B^3 - 1) / (d0 + d1 B)) - B.
3176
3177   NOTE: Output variables are updated multiple times. Only some inputs
3178   and outputs may overlap.
3179*/
3180#define udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv)		\
3181  do {									\
3182    mp_limb_t _q0, _t1, _t0, _mask;					\
3183    umul_ppmm ((q), _q0, (n2), (dinv));					\
3184    add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1));			\
3185									\
3186    /* Compute the two most significant limbs of n - q'd */		\
3187    (r1) = (n1) - (d1) * (q);						\
3188    sub_ddmmss ((r1), (r0), (r1), (n0), (d1), (d0));			\
3189    umul_ppmm (_t1, _t0, (d0), (q));					\
3190    sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0);			\
3191    (q)++;								\
3192									\
3193    /* Conditionally adjust q and the remainders */			\
3194    _mask = - (mp_limb_t) ((r1) >= _q0);				\
3195    (q) += _mask;							\
3196    add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0));	\
3197    if (UNLIKELY ((r1) >= (d1)))					\
3198      {									\
3199	if ((r1) > (d1) || (r0) >= (d0))				\
3200	  {								\
3201	    (q)++;							\
3202	    sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0));		\
3203	  }								\
3204      }									\
3205  } while (0)
3206
3207#ifndef mpn_preinv_divrem_1  /* if not done with cpuvec in a fat binary */
3208#define   mpn_preinv_divrem_1 __MPN(preinv_divrem_1)
3209__GMP_DECLSPEC mp_limb_t mpn_preinv_divrem_1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int);
3210#endif
3211
3212
3213/* USE_PREINV_DIVREM_1 is whether to use mpn_preinv_divrem_1, as opposed to the
3214   plain mpn_divrem_1.  The default is yes, since the few CISC chips where
3215   preinv is not good have defines saying so.  */
3216#ifndef USE_PREINV_DIVREM_1
3217#define USE_PREINV_DIVREM_1   1
3218#endif
3219
3220#if USE_PREINV_DIVREM_1
3221#define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift)    \
3222  mpn_preinv_divrem_1 (qp, xsize, ap, size, d, dinv, shift)
3223#else
3224#define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift)    \
3225  mpn_divrem_1 (qp, xsize, ap, size, d)
3226#endif
3227
3228#ifndef PREINV_MOD_1_TO_MOD_1_THRESHOLD
3229#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10
3230#endif
3231
3232/* This selection may seem backwards.  The reason mpn_mod_1 typically takes
3233   over for larger sizes is that it uses the mod_1_1 function.  */
3234#define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse)		\
3235  (BELOW_THRESHOLD (size, PREINV_MOD_1_TO_MOD_1_THRESHOLD)		\
3236   ? mpn_preinv_mod_1 (src, size, divisor, inverse)			\
3237   : mpn_mod_1 (src, size, divisor))
3238
3239
3240#ifndef mpn_mod_34lsub1  /* if not done with cpuvec in a fat binary */
3241#define mpn_mod_34lsub1 __MPN(mod_34lsub1)
3242__GMP_DECLSPEC mp_limb_t mpn_mod_34lsub1 (mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE;
3243#endif
3244
3245
3246/* DIVEXACT_1_THRESHOLD is at what size to use mpn_divexact_1, as opposed to
3247   plain mpn_divrem_1.  Likewise BMOD_1_TO_MOD_1_THRESHOLD for
3248   mpn_modexact_1_odd against plain mpn_mod_1.  On most CPUs divexact and
3249   modexact are faster at all sizes, so the defaults are 0.  Those CPUs
3250   where this is not right have a tuned threshold.  */
3251#ifndef DIVEXACT_1_THRESHOLD
3252#define DIVEXACT_1_THRESHOLD  0
3253#endif
3254#ifndef BMOD_1_TO_MOD_1_THRESHOLD
3255#define BMOD_1_TO_MOD_1_THRESHOLD  10
3256#endif
3257
3258#define MPN_DIVREM_OR_DIVEXACT_1(rp, up, n, d)				\
3259  do {									\
3260    if (BELOW_THRESHOLD (n, DIVEXACT_1_THRESHOLD))			\
3261      ASSERT_NOCARRY (mpn_divrem_1 (rp, (mp_size_t) 0, up, n, d));	\
3262    else								\
3263      {									\
3264	ASSERT (mpn_mod_1 (up, n, d) == 0);				\
3265	mpn_divexact_1 (rp, up, n, d);					\
3266      }									\
3267  } while (0)
3268
3269#ifndef mpn_modexact_1c_odd  /* if not done with cpuvec in a fat binary */
3270#define mpn_modexact_1c_odd __MPN(modexact_1c_odd)
3271__GMP_DECLSPEC mp_limb_t mpn_modexact_1c_odd (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
3272#endif
3273
3274#if HAVE_NATIVE_mpn_modexact_1_odd
3275#define   mpn_modexact_1_odd  __MPN(modexact_1_odd)
3276__GMP_DECLSPEC mp_limb_t mpn_modexact_1_odd (mp_srcptr, mp_size_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
3277#else
3278#define mpn_modexact_1_odd(src,size,divisor) \
3279  mpn_modexact_1c_odd (src, size, divisor, CNST_LIMB(0))
3280#endif
3281
3282#define MPN_MOD_OR_MODEXACT_1_ODD(src,size,divisor)			\
3283  (BELOW_THRESHOLD (size, BMOD_1_TO_MOD_1_THRESHOLD)			\
3284   ? mpn_modexact_1_odd (src, size, divisor)				\
3285   : mpn_mod_1 (src, size, divisor))
3286
3287/* binvert_limb() sets inv to the multiplicative inverse of n modulo
3288   2^GMP_NUMB_BITS, ie. satisfying inv*n == 1 mod 2^GMP_NUMB_BITS.
3289   n must be odd (otherwise such an inverse doesn't exist).
3290
3291   This is not to be confused with invert_limb(), which is completely
3292   different.
3293
3294   The table lookup gives an inverse with the low 8 bits valid, and each
3295   multiply step doubles the number of bits.  See Jebelean "An algorithm for
3296   exact division" end of section 4 (reference in gmp.texi).
3297
3298   Possible enhancement: Could use UHWtype until the last step, if half-size
3299   multiplies are faster (might help under _LONG_LONG_LIMB).
3300
3301   Alternative: As noted in Granlund and Montgomery "Division by Invariant
3302   Integers using Multiplication" (reference in gmp.texi), n itself gives a
3303   3-bit inverse immediately, and could be used instead of a table lookup.
3304   A 4-bit inverse can be obtained effectively from xoring bits 1 and 2 into
3305   bit 3, for instance with (((n + 2) & 4) << 1) ^ n.  */
3306
3307#define binvert_limb_table  __gmp_binvert_limb_table
3308__GMP_DECLSPEC extern const unsigned char  binvert_limb_table[128];
3309
3310#define binvert_limb(inv,n)						\
3311  do {									\
3312    mp_limb_t  __n = (n);						\
3313    mp_limb_t  __inv;							\
3314    ASSERT ((__n & 1) == 1);						\
3315									\
3316    __inv = binvert_limb_table[(__n/2) & 0x7F]; /*  8 */		\
3317    if (GMP_NUMB_BITS > 8)   __inv = 2 * __inv - __inv * __inv * __n;	\
3318    if (GMP_NUMB_BITS > 16)  __inv = 2 * __inv - __inv * __inv * __n;	\
3319    if (GMP_NUMB_BITS > 32)  __inv = 2 * __inv - __inv * __inv * __n;	\
3320									\
3321    if (GMP_NUMB_BITS > 64)						\
3322      {									\
3323	int  __invbits = 64;						\
3324	do {								\
3325	  __inv = 2 * __inv - __inv * __inv * __n;			\
3326	  __invbits *= 2;						\
3327	} while (__invbits < GMP_NUMB_BITS);				\
3328      }									\
3329									\
3330    ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1);			\
3331    (inv) = __inv & GMP_NUMB_MASK;					\
3332  } while (0)
3333#define modlimb_invert binvert_limb  /* backward compatibility */
3334
3335/* Multiplicative inverse of 3, modulo 2^GMP_NUMB_BITS.
3336   Eg. 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits.
3337   GMP_NUMB_MAX/3*2+1 is right when GMP_NUMB_BITS is even, but when it's odd
3338   we need to start from GMP_NUMB_MAX>>1. */
3339#define MODLIMB_INVERSE_3 (((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 2)) / 3) * 2 + 1)
3340
3341/* ceil(GMP_NUMB_MAX/3) and ceil(2*GMP_NUMB_MAX/3).
3342   These expressions work because GMP_NUMB_MAX%3 != 0 for all GMP_NUMB_BITS. */
3343#define GMP_NUMB_CEIL_MAX_DIV3   (GMP_NUMB_MAX / 3 + 1)
3344#define GMP_NUMB_CEIL_2MAX_DIV3  ((GMP_NUMB_MAX>>1) / 3 + 1 + GMP_NUMB_HIGHBIT)
3345
3346
3347/* Set r to -a mod d.  a>=d is allowed.  Can give r>d.  All should be limbs.
3348
3349   It's not clear whether this is the best way to do this calculation.
3350   Anything congruent to -a would be fine for the one limb congruence
3351   tests.  */
3352
3353#define NEG_MOD(r, a, d)						\
3354  do {									\
3355    ASSERT ((d) != 0);							\
3356    ASSERT_LIMB (a);							\
3357    ASSERT_LIMB (d);							\
3358									\
3359    if ((a) <= (d))							\
3360      {									\
3361	/* small a is reasonably likely */				\
3362	(r) = (d) - (a);						\
3363      }									\
3364    else								\
3365      {									\
3366	unsigned   __twos;						\
3367	mp_limb_t  __dnorm;						\
3368	count_leading_zeros (__twos, d);				\
3369	__twos -= GMP_NAIL_BITS;					\
3370	__dnorm = (d) << __twos;					\
3371	(r) = ((a) <= __dnorm ? __dnorm : 2*__dnorm) - (a);		\
3372      }									\
3373									\
3374    ASSERT_LIMB (r);							\
3375  } while (0)
3376
3377/* A bit mask of all the least significant zero bits of n, or -1 if n==0. */
3378#define LOW_ZEROS_MASK(n)  (((n) & -(n)) - 1)
3379
3380
3381/* ULONG_PARITY sets "p" to 1 if there's an odd number of 1 bits in "n", or
3382   to 0 if there's an even number.  "n" should be an unsigned long and "p"
3383   an int.  */
3384
3385#if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX
3386#define ULONG_PARITY(p, n)						\
3387  do {									\
3388    int __p;								\
3389    __asm__ ("ctpop %1, %0" : "=r" (__p) : "r" (n));			\
3390    (p) = __p & 1;							\
3391  } while (0)
3392#endif
3393
3394/* Cray intrinsic _popcnt. */
3395#ifdef _CRAY
3396#define ULONG_PARITY(p, n)      \
3397  do {                          \
3398    (p) = _popcnt (n) & 1;      \
3399  } while (0)
3400#endif
3401
3402#if defined (__GNUC__) && ! defined (__INTEL_COMPILER)			\
3403    && ! defined (NO_ASM) && defined (__ia64)
3404/* unsigned long is either 32 or 64 bits depending on the ABI, zero extend
3405   to a 64 bit unsigned long long for popcnt */
3406#define ULONG_PARITY(p, n)						\
3407  do {									\
3408    unsigned long long  __n = (unsigned long) (n);			\
3409    int  __p;								\
3410    __asm__ ("popcnt %0 = %1" : "=r" (__p) : "r" (__n));		\
3411    (p) = __p & 1;							\
3412  } while (0)
3413#endif
3414
3415#if defined (__GNUC__) && ! defined (__INTEL_COMPILER)			\
3416    && ! defined (NO_ASM) && HAVE_HOST_CPU_FAMILY_x86
3417#if __GMP_GNUC_PREREQ (3,1)
3418#define __GMP_qm "=Qm"
3419#define __GMP_q "=Q"
3420#else
3421#define __GMP_qm "=qm"
3422#define __GMP_q "=q"
3423#endif
3424#define ULONG_PARITY(p, n)						\
3425  do {									\
3426    char	   __p;							\
3427    unsigned long  __n = (n);						\
3428    __n ^= (__n >> 16);							\
3429    __asm__ ("xorb %h1, %b1\n\t"					\
3430	     "setpo %0"							\
3431	 : __GMP_qm (__p), __GMP_q (__n)				\
3432	 : "1" (__n));							\
3433    (p) = __p;								\
3434  } while (0)
3435#endif
3436
3437#if ! defined (ULONG_PARITY)
3438#define ULONG_PARITY(p, n)						\
3439  do {									\
3440    unsigned long  __n = (n);						\
3441    int  __p = 0;							\
3442    do									\
3443      {									\
3444	__p ^= 0x96696996L >> (__n & 0x1F);				\
3445	__n >>= 5;							\
3446      }									\
3447    while (__n != 0);							\
3448									\
3449    (p) = __p & 1;							\
3450  } while (0)
3451#endif
3452
3453
3454/* 3 cycles on 604 or 750 since shifts and rlwimi's can pair.  gcc (as of
3455   version 3.1 at least) doesn't seem to know how to generate rlwimi for
3456   anything other than bit-fields, so use "asm".  */
3457#if defined (__GNUC__) && ! defined (NO_ASM)                    \
3458  && HAVE_HOST_CPU_FAMILY_powerpc && GMP_LIMB_BITS == 32
3459#define BSWAP_LIMB(dst, src)						\
3460  do {									\
3461    mp_limb_t  __bswapl_src = (src);					\
3462    mp_limb_t  __tmp1 = __bswapl_src >> 24;		/* low byte */	\
3463    mp_limb_t  __tmp2 = __bswapl_src << 24;		/* high byte */	\
3464    __asm__ ("rlwimi %0, %2, 24, 16, 23"		/* 2nd low */	\
3465	 : "=r" (__tmp1) : "0" (__tmp1), "r" (__bswapl_src));		\
3466    __asm__ ("rlwimi %0, %2,  8,  8, 15"		/* 3nd high */	\
3467	 : "=r" (__tmp2) : "0" (__tmp2), "r" (__bswapl_src));		\
3468    (dst) = __tmp1 | __tmp2;				/* whole */	\
3469  } while (0)
3470#endif
3471
3472/* bswap is available on i486 and up and is fast.  A combination rorw $8 /
3473   roll $16 / rorw $8 is used in glibc for plain i386 (and in the linux
3474   kernel with xchgb instead of rorw), but this is not done here, because
3475   i386 means generic x86 and mixing word and dword operations will cause
3476   partial register stalls on P6 chips.  */
3477#if defined (__GNUC__) && ! defined (NO_ASM)            \
3478  && HAVE_HOST_CPU_FAMILY_x86 && ! HAVE_HOST_CPU_i386   \
3479  && GMP_LIMB_BITS == 32
3480#define BSWAP_LIMB(dst, src)						\
3481  do {									\
3482    __asm__ ("bswap %0" : "=r" (dst) : "0" (src));			\
3483  } while (0)
3484#endif
3485
3486#if defined (__GNUC__) && ! defined (NO_ASM)            \
3487  && defined (__amd64__) && GMP_LIMB_BITS == 64
3488#define BSWAP_LIMB(dst, src)						\
3489  do {									\
3490    __asm__ ("bswap %q0" : "=r" (dst) : "0" (src));			\
3491  } while (0)
3492#endif
3493
3494#if defined (__GNUC__) && ! defined (__INTEL_COMPILER)			\
3495    && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64
3496#define BSWAP_LIMB(dst, src)						\
3497  do {									\
3498    __asm__ ("mux1 %0 = %1, @rev" : "=r" (dst) :  "r" (src));		\
3499  } while (0)
3500#endif
3501
3502/* As per glibc. */
3503#if defined (__GNUC__) && ! defined (NO_ASM)                    \
3504  && HAVE_HOST_CPU_FAMILY_m68k && GMP_LIMB_BITS == 32
3505#define BSWAP_LIMB(dst, src)						\
3506  do {									\
3507    mp_limb_t  __bswapl_src = (src);					\
3508    __asm__ ("ror%.w %#8, %0\n\t"					\
3509	     "swap   %0\n\t"						\
3510	     "ror%.w %#8, %0"						\
3511	     : "=d" (dst)						\
3512	     : "0" (__bswapl_src));					\
3513  } while (0)
3514#endif
3515
3516#if ! defined (BSWAP_LIMB)
3517#if GMP_LIMB_BITS == 8
3518#define BSWAP_LIMB(dst, src)				\
3519  do { (dst) = (src); } while (0)
3520#endif
3521#if GMP_LIMB_BITS == 16
3522#define BSWAP_LIMB(dst, src)						\
3523  do {									\
3524    (dst) = ((src) << 8) + ((src) >> 8);				\
3525  } while (0)
3526#endif
3527#if GMP_LIMB_BITS == 32
3528#define BSWAP_LIMB(dst, src)						\
3529  do {									\
3530    (dst) =								\
3531      ((src) << 24)							\
3532      + (((src) & 0xFF00) << 8)						\
3533      + (((src) >> 8) & 0xFF00)						\
3534      + ((src) >> 24);							\
3535  } while (0)
3536#endif
3537#if GMP_LIMB_BITS == 64
3538#define BSWAP_LIMB(dst, src)						\
3539  do {									\
3540    (dst) =								\
3541      ((src) << 56)							\
3542      + (((src) & 0xFF00) << 40)					\
3543      + (((src) & 0xFF0000) << 24)					\
3544      + (((src) & 0xFF000000) << 8)					\
3545      + (((src) >> 8) & 0xFF000000)					\
3546      + (((src) >> 24) & 0xFF0000)					\
3547      + (((src) >> 40) & 0xFF00)					\
3548      + ((src) >> 56);							\
3549  } while (0)
3550#endif
3551#endif
3552
3553#if ! defined (BSWAP_LIMB)
3554#define BSWAP_LIMB(dst, src)						\
3555  do {									\
3556    mp_limb_t  __bswapl_src = (src);					\
3557    mp_limb_t  __dstl = 0;						\
3558    int	       __i;							\
3559    for (__i = 0; __i < GMP_LIMB_BYTES; __i++)			\
3560      {									\
3561	__dstl = (__dstl << 8) | (__bswapl_src & 0xFF);			\
3562	__bswapl_src >>= 8;						\
3563      }									\
3564    (dst) = __dstl;							\
3565  } while (0)
3566#endif
3567
3568
3569/* Apparently lwbrx might be slow on some PowerPC chips, so restrict it to
3570   those we know are fast.  */
3571#if defined (__GNUC__) && ! defined (NO_ASM)				\
3572  && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN			\
3573  && (HAVE_HOST_CPU_powerpc604						\
3574      || HAVE_HOST_CPU_powerpc604e					\
3575      || HAVE_HOST_CPU_powerpc750					\
3576      || HAVE_HOST_CPU_powerpc7400)
3577#define BSWAP_LIMB_FETCH(limb, src)					\
3578  do {									\
3579    mp_srcptr  __blf_src = (src);					\
3580    mp_limb_t  __limb;							\
3581    __asm__ ("lwbrx %0, 0, %1"						\
3582	     : "=r" (__limb)						\
3583	     : "r" (__blf_src),						\
3584	       "m" (*__blf_src));					\
3585    (limb) = __limb;							\
3586  } while (0)
3587#endif
3588
3589#if ! defined (BSWAP_LIMB_FETCH)
3590#define BSWAP_LIMB_FETCH(limb, src)  BSWAP_LIMB (limb, *(src))
3591#endif
3592
3593
3594/* On the same basis that lwbrx might be slow, restrict stwbrx to those we
3595   know are fast.  FIXME: Is this necessary?  */
3596#if defined (__GNUC__) && ! defined (NO_ASM)				\
3597  && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN			\
3598  && (HAVE_HOST_CPU_powerpc604						\
3599      || HAVE_HOST_CPU_powerpc604e					\
3600      || HAVE_HOST_CPU_powerpc750					\
3601      || HAVE_HOST_CPU_powerpc7400)
3602#define BSWAP_LIMB_STORE(dst, limb)					\
3603  do {									\
3604    mp_ptr     __dst = (dst);						\
3605    mp_limb_t  __limb = (limb);						\
3606    __asm__ ("stwbrx %1, 0, %2"						\
3607	     : "=m" (*__dst)						\
3608	     : "r" (__limb),						\
3609	       "r" (__dst));						\
3610  } while (0)
3611#endif
3612
3613#if ! defined (BSWAP_LIMB_STORE)
3614#define BSWAP_LIMB_STORE(dst, limb)  BSWAP_LIMB (*(dst), limb)
3615#endif
3616
3617
3618/* Byte swap limbs from {src,size} and store at {dst,size}. */
3619#define MPN_BSWAP(dst, src, size)					\
3620  do {									\
3621    mp_ptr     __dst = (dst);						\
3622    mp_srcptr  __src = (src);						\
3623    mp_size_t  __size = (size);						\
3624    mp_size_t  __i;							\
3625    ASSERT ((size) >= 0);						\
3626    ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size));			\
3627    CRAY_Pragma ("_CRI ivdep");						\
3628    for (__i = 0; __i < __size; __i++)					\
3629      {									\
3630	BSWAP_LIMB_FETCH (*__dst, __src);				\
3631	__dst++;							\
3632	__src++;							\
3633      }									\
3634  } while (0)
3635
3636/* Byte swap limbs from {dst,size} and store in reverse order at {src,size}. */
3637#define MPN_BSWAP_REVERSE(dst, src, size)				\
3638  do {									\
3639    mp_ptr     __dst = (dst);						\
3640    mp_size_t  __size = (size);						\
3641    mp_srcptr  __src = (src) + __size - 1;				\
3642    mp_size_t  __i;							\
3643    ASSERT ((size) >= 0);						\
3644    ASSERT (! MPN_OVERLAP_P (dst, size, src, size));			\
3645    CRAY_Pragma ("_CRI ivdep");						\
3646    for (__i = 0; __i < __size; __i++)					\
3647      {									\
3648	BSWAP_LIMB_FETCH (*__dst, __src);				\
3649	__dst++;							\
3650	__src--;							\
3651      }									\
3652  } while (0)
3653
3654
3655/* No processor claiming to be SPARC v9 compliant seems to
3656   implement the POPC instruction.  Disable pattern for now.  */
3657#if 0
3658#if defined __GNUC__ && defined __sparc_v9__ && GMP_LIMB_BITS == 64
3659#define popc_limb(result, input)					\
3660  do {									\
3661    DItype __res;							\
3662    __asm__ ("popc %1,%0" : "=r" (result) : "rI" (input));		\
3663  } while (0)
3664#endif
3665#endif
3666
3667#if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX
3668#define popc_limb(result, input)					\
3669  do {									\
3670    __asm__ ("ctpop %1, %0" : "=r" (result) : "r" (input));		\
3671  } while (0)
3672#endif
3673
3674/* Cray intrinsic. */
3675#ifdef _CRAY
3676#define popc_limb(result, input)					\
3677  do {									\
3678    (result) = _popcnt (input);						\
3679  } while (0)
3680#endif
3681
3682#if defined (__GNUC__) && ! defined (__INTEL_COMPILER)			\
3683    && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64
3684#define popc_limb(result, input)					\
3685  do {									\
3686    __asm__ ("popcnt %0 = %1" : "=r" (result) : "r" (input));		\
3687  } while (0)
3688#endif
3689
3690/* Cool population count of an mp_limb_t.
3691   You have to figure out how this works, We won't tell you!
3692
3693   The constants could also be expressed as:
3694     0x55... = [2^N / 3]     = [(2^N-1)/3]
3695     0x33... = [2^N / 5]     = [(2^N-1)/5]
3696     0x0f... = [2^N / 17]    = [(2^N-1)/17]
3697     (N is GMP_LIMB_BITS, [] denotes truncation.) */
3698
3699#if ! defined (popc_limb) && GMP_LIMB_BITS == 8
3700#define popc_limb(result, input)					\
3701  do {									\
3702    mp_limb_t  __x = (input);						\
3703    __x -= (__x >> 1) & MP_LIMB_T_MAX/3;				\
3704    __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);	\
3705    __x = ((__x >> 4) + __x);						\
3706    (result) = __x & 0x0f;						\
3707  } while (0)
3708#endif
3709
3710#if ! defined (popc_limb) && GMP_LIMB_BITS == 16
3711#define popc_limb(result, input)					\
3712  do {									\
3713    mp_limb_t  __x = (input);						\
3714    __x -= (__x >> 1) & MP_LIMB_T_MAX/3;				\
3715    __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);	\
3716    __x += (__x >> 4);							\
3717    __x = ((__x >> 8) & MP_LIMB_T_MAX/4369)+(__x & MP_LIMB_T_MAX/4369);	\
3718    (result) = __x;							\
3719  } while (0)
3720#endif
3721
3722#if ! defined (popc_limb) && GMP_LIMB_BITS == 32
3723#define popc_limb(result, input)					\
3724  do {									\
3725    mp_limb_t  __x = (input);						\
3726    __x -= (__x >> 1) & MP_LIMB_T_MAX/3;				\
3727    __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);	\
3728    __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;			\
3729    __x = ((__x >> 8) + __x);						\
3730    __x = ((__x >> 16) + __x);						\
3731    (result) = __x & 0xff;						\
3732  } while (0)
3733#endif
3734
3735#if ! defined (popc_limb) && GMP_LIMB_BITS == 64
3736#define popc_limb(result, input)					\
3737  do {									\
3738    mp_limb_t  __x = (input);						\
3739    __x -= (__x >> 1) & MP_LIMB_T_MAX/3;				\
3740    __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5);	\
3741    __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17;			\
3742    __x = ((__x >> 8) + __x);						\
3743    __x = ((__x >> 16) + __x);						\
3744    __x = ((__x >> 32) + __x);						\
3745    (result) = __x & 0xff;						\
3746  } while (0)
3747#endif
3748
3749
3750/* Define stuff for longlong.h.  */
3751#if HAVE_ATTRIBUTE_MODE
3752typedef unsigned int UQItype	__attribute__ ((mode (QI)));
3753typedef		 int SItype	__attribute__ ((mode (SI)));
3754typedef unsigned int USItype	__attribute__ ((mode (SI)));
3755typedef		 int DItype	__attribute__ ((mode (DI)));
3756typedef unsigned int UDItype	__attribute__ ((mode (DI)));
3757#else
3758typedef unsigned char UQItype;
3759typedef		 long SItype;
3760typedef unsigned long USItype;
3761#if HAVE_LONG_LONG
3762typedef	long long int DItype;
3763typedef unsigned long long int UDItype;
3764#else /* Assume `long' gives us a wide enough type.  Needed for hppa2.0w.  */
3765typedef long int DItype;
3766typedef unsigned long int UDItype;
3767#endif
3768#endif
3769
3770typedef mp_limb_t UWtype;
3771typedef unsigned int UHWtype;
3772#define W_TYPE_SIZE GMP_LIMB_BITS
3773
3774/* Define ieee_double_extract and _GMP_IEEE_FLOATS.
3775
3776   Bit field packing is "implementation defined" according to C99, which
3777   leaves us at the compiler's mercy here.  For some systems packing is
3778   defined in the ABI (eg. x86).  In any case so far it seems universal that
3779   little endian systems pack from low to high, and big endian from high to
3780   low within the given type.
3781
3782   Within the fields we rely on the integer endianness being the same as the
3783   float endianness, this is true everywhere we know of and it'd be a fairly
3784   strange system that did anything else.  */
3785
3786#if HAVE_DOUBLE_IEEE_LITTLE_SWAPPED
3787#define _GMP_IEEE_FLOATS 1
3788union ieee_double_extract
3789{
3790  struct
3791    {
3792      gmp_uint_least32_t manh:20;
3793      gmp_uint_least32_t exp:11;
3794      gmp_uint_least32_t sig:1;
3795      gmp_uint_least32_t manl:32;
3796    } s;
3797  double d;
3798};
3799#endif
3800
3801#if HAVE_DOUBLE_IEEE_LITTLE_ENDIAN
3802#define _GMP_IEEE_FLOATS 1
3803union ieee_double_extract
3804{
3805  struct
3806    {
3807      gmp_uint_least32_t manl:32;
3808      gmp_uint_least32_t manh:20;
3809      gmp_uint_least32_t exp:11;
3810      gmp_uint_least32_t sig:1;
3811    } s;
3812  double d;
3813};
3814#endif
3815
3816#if HAVE_DOUBLE_IEEE_BIG_ENDIAN
3817#define _GMP_IEEE_FLOATS 1
3818union ieee_double_extract
3819{
3820  struct
3821    {
3822      gmp_uint_least32_t sig:1;
3823      gmp_uint_least32_t exp:11;
3824      gmp_uint_least32_t manh:20;
3825      gmp_uint_least32_t manl:32;
3826    } s;
3827  double d;
3828};
3829#endif
3830
3831#if HAVE_DOUBLE_VAX_D
3832union double_extract
3833{
3834  struct
3835    {
3836      gmp_uint_least32_t man3:7;	/* highest 7 bits */
3837      gmp_uint_least32_t exp:8;		/* excess-128 exponent */
3838      gmp_uint_least32_t sig:1;
3839      gmp_uint_least32_t man2:16;
3840      gmp_uint_least32_t man1:16;
3841      gmp_uint_least32_t man0:16;	/* lowest 16 bits */
3842    } s;
3843  double d;
3844};
3845#endif
3846
3847/* Use (4.0 * ...) instead of (2.0 * ...) to work around buggy compilers
3848   that don't convert ulong->double correctly (eg. SunOS 4 native cc).  */
3849#define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (GMP_NUMB_BITS - 2)))
3850/* Maximum number of limbs it will take to store any `double'.
3851   We assume doubles have 53 mantissa bits.  */
3852#define LIMBS_PER_DOUBLE ((53 + GMP_NUMB_BITS - 2) / GMP_NUMB_BITS + 1)
3853
3854__GMP_DECLSPEC int __gmp_extract_double (mp_ptr, double);
3855
3856#define mpn_get_d __gmpn_get_d
3857__GMP_DECLSPEC double mpn_get_d (mp_srcptr, mp_size_t, mp_size_t, long) __GMP_ATTRIBUTE_PURE;
3858
3859
3860/* DOUBLE_NAN_INF_ACTION executes code a_nan if x is a NaN, or executes
3861   a_inf if x is an infinity.  Both are considered unlikely values, for
3862   branch prediction.  */
3863
3864#if _GMP_IEEE_FLOATS
3865#define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf)				\
3866  do {									\
3867    union ieee_double_extract  u;					\
3868    u.d = (x);								\
3869    if (UNLIKELY (u.s.exp == 0x7FF))					\
3870      {									\
3871	if (u.s.manl == 0 && u.s.manh == 0)				\
3872	  { a_inf; }							\
3873	else								\
3874	  { a_nan; }							\
3875      }									\
3876  } while (0)
3877#endif
3878
3879#if HAVE_DOUBLE_VAX_D || HAVE_DOUBLE_VAX_G || HAVE_DOUBLE_CRAY_CFP
3880/* no nans or infs in these formats */
3881#define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf)  \
3882  do { } while (0)
3883#endif
3884
3885#ifndef DOUBLE_NAN_INF_ACTION
3886/* Unknown format, try something generic.
3887   NaN should be "unordered", so x!=x.
3888   Inf should be bigger than DBL_MAX.  */
3889#define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf)				\
3890  do {									\
3891    {									\
3892      if (UNLIKELY ((x) != (x)))					\
3893	{ a_nan; }							\
3894      else if (UNLIKELY ((x) > DBL_MAX || (x) < -DBL_MAX))		\
3895	{ a_inf; }							\
3896    }									\
3897  } while (0)
3898#endif
3899
3900/* On m68k, x86 and amd64, gcc (and maybe other compilers) can hold doubles
3901   in the coprocessor, which means a bigger exponent range than normal, and
3902   depending on the rounding mode, a bigger mantissa than normal.  (See
3903   "Disappointments" in the gcc manual.)  FORCE_DOUBLE stores and fetches
3904   "d" through memory to force any rounding and overflows to occur.
3905
3906   On amd64, and on x86s with SSE2, gcc (depending on options) uses the xmm
3907   registers, where there's no such extra precision and no need for the
3908   FORCE_DOUBLE.  We don't bother to detect this since the present uses for
3909   FORCE_DOUBLE are only in test programs and default generic C code.
3910
3911   Not quite sure that an "automatic volatile" will use memory, but it does
3912   in gcc.  An asm("":"=m"(d):"0"(d)) can't be used to trick gcc, since
3913   apparently matching operands like "0" are only allowed on a register
3914   output.  gcc 3.4 warns about this, though in fact it and past versions
3915   seem to put the operand through memory as hoped.  */
3916
3917#if (HAVE_HOST_CPU_FAMILY_m68k || HAVE_HOST_CPU_FAMILY_x86      \
3918     || defined (__amd64__))
3919#define FORCE_DOUBLE(d) \
3920  do { volatile double __gmp_force = (d); (d) = __gmp_force; } while (0)
3921#else
3922#define FORCE_DOUBLE(d)  do { } while (0)
3923#endif
3924
3925
3926__GMP_DECLSPEC extern const unsigned char __gmp_digit_value_tab[];
3927
3928__GMP_DECLSPEC extern int __gmp_junk;
3929__GMP_DECLSPEC extern const int __gmp_0;
3930__GMP_DECLSPEC void __gmp_exception (int) ATTRIBUTE_NORETURN;
3931__GMP_DECLSPEC void __gmp_divide_by_zero (void) ATTRIBUTE_NORETURN;
3932__GMP_DECLSPEC void __gmp_sqrt_of_negative (void) ATTRIBUTE_NORETURN;
3933__GMP_DECLSPEC void __gmp_invalid_operation (void) ATTRIBUTE_NORETURN;
3934#define GMP_ERROR(code)   __gmp_exception (code)
3935#define DIVIDE_BY_ZERO    __gmp_divide_by_zero ()
3936#define SQRT_OF_NEGATIVE  __gmp_sqrt_of_negative ()
3937
3938#if defined _LONG_LONG_LIMB
3939#define CNST_LIMB(C) ((mp_limb_t) C##LL)
3940#else /* not _LONG_LONG_LIMB */
3941#define CNST_LIMB(C) ((mp_limb_t) C##L)
3942#endif /* _LONG_LONG_LIMB */
3943
3944/* Stuff used by mpn/generic/perfsqr.c and mpz/prime_p.c */
3945#if GMP_NUMB_BITS == 2
3946#define PP 0x3					/* 3 */
3947#define PP_FIRST_OMITTED 5
3948#endif
3949#if GMP_NUMB_BITS == 4
3950#define PP 0xF					/* 3 x 5 */
3951#define PP_FIRST_OMITTED 7
3952#endif
3953#if GMP_NUMB_BITS == 8
3954#define PP 0x69					/* 3 x 5 x 7 */
3955#define PP_FIRST_OMITTED 11
3956#endif
3957#if GMP_NUMB_BITS == 16
3958#define PP 0x3AA7				/* 3 x 5 x 7 x 11 x 13 */
3959#define PP_FIRST_OMITTED 17
3960#endif
3961#if GMP_NUMB_BITS == 32
3962#define PP 0xC0CFD797L				/* 3 x 5 x 7 x 11 x ... x 29 */
3963#define PP_INVERTED 0x53E5645CL
3964#define PP_FIRST_OMITTED 31
3965#endif
3966#if GMP_NUMB_BITS == 64
3967#define PP CNST_LIMB(0xE221F97C30E94E1D)	/* 3 x 5 x 7 x 11 x ... x 53 */
3968#define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B)
3969#define PP_FIRST_OMITTED 59
3970#endif
3971#ifndef PP_FIRST_OMITTED
3972#define PP_FIRST_OMITTED 3
3973#endif
3974
3975typedef struct
3976{
3977  mp_limb_t d0, d1;
3978} mp_double_limb_t;
3979
3980#define mpn_gcd_22 __MPN (gcd_22)
3981__GMP_DECLSPEC mp_double_limb_t mpn_gcd_22 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t);
3982
3983/* BIT1 means a result value in bit 1 (second least significant bit), with a
3984   zero bit representing +1 and a one bit representing -1.  Bits other than
3985   bit 1 are garbage.  These are meant to be kept in "int"s, and casts are
3986   used to ensure the expressions are "int"s even if a and/or b might be
3987   other types.
3988
3989   JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base
3990   and their speed is important.  Expressions are used rather than
3991   conditionals to accumulate sign changes, which effectively means XORs
3992   instead of conditional JUMPs. */
3993
3994/* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */
3995#define JACOBI_S0(a)   (((a) == 1) | ((a) == -1))
3996
3997/* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */
3998#define JACOBI_U0(a)   ((a) == 1)
3999
4000/* FIXME: JACOBI_LS0 and JACOBI_0LS are the same, so delete one and
4001   come up with a better name. */
4002
4003/* (a/0), with a given by low and size;
4004   is 1 if a=+/-1, 0 otherwise */
4005#define JACOBI_LS0(alow,asize) \
4006  (((asize) == 1 || (asize) == -1) && (alow) == 1)
4007
4008/* (a/0), with a an mpz_t;
4009   fetch of low limb always valid, even if size is zero */
4010#define JACOBI_Z0(a)   JACOBI_LS0 (PTR(a)[0], SIZ(a))
4011
4012/* (0/b), with b unsigned; is 1 if b=1, 0 otherwise */
4013#define JACOBI_0U(b)   ((b) == 1)
4014
4015/* (0/b), with b unsigned; is 1 if b=+/-1, 0 otherwise */
4016#define JACOBI_0S(b)   ((b) == 1 || (b) == -1)
4017
4018/* (0/b), with b given by low and size; is 1 if b=+/-1, 0 otherwise */
4019#define JACOBI_0LS(blow,bsize) \
4020  (((bsize) == 1 || (bsize) == -1) && (blow) == 1)
4021
4022/* Convert a bit1 to +1 or -1. */
4023#define JACOBI_BIT1_TO_PN(result_bit1) \
4024  (1 - ((int) (result_bit1) & 2))
4025
4026/* (2/b), with b unsigned and odd;
4027   is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and
4028   hence obtained from (b>>1)^b */
4029#define JACOBI_TWO_U_BIT1(b) \
4030  ((int) (((b) >> 1) ^ (b)))
4031
4032/* (2/b)^twos, with b unsigned and odd */
4033#define JACOBI_TWOS_U_BIT1(twos, b) \
4034  ((int) ((twos) << 1) & JACOBI_TWO_U_BIT1 (b))
4035
4036/* (2/b)^twos, with b unsigned and odd */
4037#define JACOBI_TWOS_U(twos, b) \
4038  (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b)))
4039
4040/* (-1/b), with b odd (signed or unsigned);
4041   is (-1)^((b-1)/2) */
4042#define JACOBI_N1B_BIT1(b) \
4043  ((int) (b))
4044
4045/* (a/b) effect due to sign of a: signed/unsigned, b odd;
4046   is (-1/b) if a<0, or +1 if a>=0 */
4047#define JACOBI_ASGN_SU_BIT1(a, b) \
4048  ((((a) < 0) << 1) & JACOBI_N1B_BIT1(b))
4049
4050/* (a/b) effect due to sign of b: signed/signed;
4051   is -1 if a and b both negative, +1 otherwise */
4052#define JACOBI_BSGN_SS_BIT1(a, b) \
4053  ((((a)<0) & ((b)<0)) << 1)
4054
4055/* (a/b) effect due to sign of b: signed/mpz;
4056   is -1 if a and b both negative, +1 otherwise */
4057#define JACOBI_BSGN_SZ_BIT1(a, b) \
4058  JACOBI_BSGN_SS_BIT1 (a, SIZ(b))
4059
4060/* (a/b) effect due to sign of b: mpz/signed;
4061   is -1 if a and b both negative, +1 otherwise */
4062#define JACOBI_BSGN_ZS_BIT1(a, b) \
4063  JACOBI_BSGN_SZ_BIT1 (b, a)
4064
4065/* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd;
4066   is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4, or -1 if
4067   both a,b==3mod4, achieved in bit 1 by a&b.  No ASSERT()s about a,b odd
4068   because this is used in a couple of places with only bit 1 of a or b
4069   valid. */
4070#define JACOBI_RECIP_UU_BIT1(a, b) \
4071  ((int) ((a) & (b)))
4072
4073/* Strip low zero limbs from {b_ptr,b_size} by incrementing b_ptr and
4074   decrementing b_size.  b_low should be b_ptr[0] on entry, and will be
4075   updated for the new b_ptr.  result_bit1 is updated according to the
4076   factors of 2 stripped, as per (a/2).  */
4077#define JACOBI_STRIP_LOW_ZEROS(result_bit1, a, b_ptr, b_size, b_low)	\
4078  do {									\
4079    ASSERT ((b_size) >= 1);						\
4080    ASSERT ((b_low) == (b_ptr)[0]);					\
4081									\
4082    while (UNLIKELY ((b_low) == 0))					\
4083      {									\
4084	(b_size)--;							\
4085	ASSERT ((b_size) >= 1);						\
4086	(b_ptr)++;							\
4087	(b_low) = *(b_ptr);						\
4088									\
4089	ASSERT (((a) & 1) != 0);					\
4090	if ((GMP_NUMB_BITS % 2) == 1)					\
4091	  (result_bit1) ^= JACOBI_TWO_U_BIT1(a);			\
4092      }									\
4093  } while (0)
4094
4095/* Set a_rem to {a_ptr,a_size} reduced modulo b, either using mod_1 or
4096   modexact_1_odd, but in either case leaving a_rem<b.  b must be odd and
4097   unsigned.  modexact_1_odd effectively calculates -a mod b, and
4098   result_bit1 is adjusted for the factor of -1.
4099
4100   The way mpn_modexact_1_odd sometimes bases its remainder on a_size and
4101   sometimes on a_size-1 means if GMP_NUMB_BITS is odd we can't know what
4102   factor to introduce into result_bit1, so for that case use mpn_mod_1
4103   unconditionally.
4104
4105   FIXME: mpn_modexact_1_odd is more efficient, so some way to get it used
4106   for odd GMP_NUMB_BITS would be good.  Perhaps it could mung its result,
4107   or not skip a divide step, or something. */
4108
4109#define JACOBI_MOD_OR_MODEXACT_1_ODD(result_bit1, a_rem, a_ptr, a_size, b) \
4110  do {									   \
4111    mp_srcptr  __a_ptr	= (a_ptr);					   \
4112    mp_size_t  __a_size = (a_size);					   \
4113    mp_limb_t  __b	= (b);						   \
4114									   \
4115    ASSERT (__a_size >= 1);						   \
4116    ASSERT (__b & 1);							   \
4117									   \
4118    if ((GMP_NUMB_BITS % 2) != 0					   \
4119	|| ABOVE_THRESHOLD (__a_size, BMOD_1_TO_MOD_1_THRESHOLD))	   \
4120      {									   \
4121	(a_rem) = mpn_mod_1 (__a_ptr, __a_size, __b);			   \
4122      }									   \
4123    else								   \
4124      {									   \
4125	(result_bit1) ^= JACOBI_N1B_BIT1 (__b);				   \
4126	(a_rem) = mpn_modexact_1_odd (__a_ptr, __a_size, __b);		   \
4127      }									   \
4128  } while (0)
4129
4130/* State for the Jacobi computation using Lehmer. */
4131#define jacobi_table __gmp_jacobi_table
4132__GMP_DECLSPEC extern const unsigned char jacobi_table[208];
4133
4134/* Bit layout for the initial state. b must be odd.
4135
4136      3  2  1 0
4137   +--+--+--+--+
4138   |a1|a0|b1| s|
4139   +--+--+--+--+
4140
4141 */
4142static inline unsigned
4143mpn_jacobi_init (unsigned a, unsigned b, unsigned s)
4144{
4145  ASSERT (b & 1);
4146  ASSERT (s <= 1);
4147  return ((a & 3) << 2) + (b & 2) + s;
4148}
4149
4150static inline int
4151mpn_jacobi_finish (unsigned bits)
4152{
4153  /* (a, b) = (1,0) or (0,1) */
4154  ASSERT ( (bits & 14) == 0);
4155
4156  return 1-2*(bits & 1);
4157}
4158
4159static inline unsigned
4160mpn_jacobi_update (unsigned bits, unsigned denominator, unsigned q)
4161{
4162  /* FIXME: Could halve table size by not including the e bit in the
4163   * index, and instead xor when updating. Then the lookup would be
4164   * like
4165   *
4166   *   bits ^= table[((bits & 30) << 2) + (denominator << 2) + q];
4167   */
4168
4169  ASSERT (bits < 26);
4170  ASSERT (denominator < 2);
4171  ASSERT (q < 4);
4172
4173  /* For almost all calls, denominator is constant and quite often q
4174     is constant too. So use addition rather than or, so the compiler
4175     can put the constant part can into the offset of an indexed
4176     addressing instruction.
4177
4178     With constant denominator, the below table lookup is compiled to
4179
4180       C Constant q = 1, constant denominator = 1
4181       movzbl table+5(%eax,8), %eax
4182
4183     or
4184
4185       C q in %edx, constant denominator = 1
4186       movzbl table+4(%edx,%eax,8), %eax
4187
4188     One could maintain the state preshifted 3 bits, to save a shift
4189     here, but at least on x86, that's no real saving.
4190  */
4191  return bits = jacobi_table[(bits << 3) + (denominator << 2) + q];
4192}
4193
4194/* Matrix multiplication */
4195#define   mpn_matrix22_mul __MPN(matrix22_mul)
4196__GMP_DECLSPEC void      mpn_matrix22_mul (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
4197#define   mpn_matrix22_mul_itch __MPN(matrix22_mul_itch)
4198__GMP_DECLSPEC mp_size_t mpn_matrix22_mul_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
4199
4200#ifndef MATRIX22_STRASSEN_THRESHOLD
4201#define MATRIX22_STRASSEN_THRESHOLD 30
4202#endif
4203
4204/* HGCD definitions */
4205
4206/* Extract one numb, shifting count bits left
4207    ________  ________
4208   |___xh___||___xl___|
4209	  |____r____|
4210   >count <
4211
4212   The count includes any nail bits, so it should work fine if count
4213   is computed using count_leading_zeros. If GMP_NAIL_BITS > 0, all of
4214   xh, xl and r include nail bits. Must have 0 < count < GMP_LIMB_BITS.
4215
4216   FIXME: Omit masking with GMP_NUMB_MASK, and let callers do that for
4217   those calls where the count high bits of xh may be non-zero.
4218*/
4219
4220#define MPN_EXTRACT_NUMB(count, xh, xl)				\
4221  ((((xh) << ((count) - GMP_NAIL_BITS)) & GMP_NUMB_MASK) |	\
4222   ((xl) >> (GMP_LIMB_BITS - (count))))
4223
4224
4225/* The matrix non-negative M = (u, u'; v,v') keeps track of the
4226   reduction (a;b) = M (alpha; beta) where alpha, beta are smaller
4227   than a, b. The determinant must always be one, so that M has an
4228   inverse (v', -u'; -v, u). Elements always fit in GMP_NUMB_BITS - 1
4229   bits. */
4230struct hgcd_matrix1
4231{
4232  mp_limb_t u[2][2];
4233};
4234
4235#define mpn_hgcd2 __MPN (hgcd2)
4236__GMP_DECLSPEC int mpn_hgcd2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t,	struct hgcd_matrix1 *);
4237
4238#define mpn_hgcd_mul_matrix1_vector __MPN (hgcd_mul_matrix1_vector)
4239__GMP_DECLSPEC mp_size_t mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t);
4240
4241#define mpn_matrix22_mul1_inverse_vector __MPN (matrix22_mul1_inverse_vector)
4242__GMP_DECLSPEC mp_size_t mpn_matrix22_mul1_inverse_vector (const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t);
4243
4244#define mpn_hgcd2_jacobi __MPN (hgcd2_jacobi)
4245__GMP_DECLSPEC int mpn_hgcd2_jacobi (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1 *, unsigned *);
4246
4247struct hgcd_matrix
4248{
4249  mp_size_t alloc;		/* for sanity checking only */
4250  mp_size_t n;
4251  mp_ptr p[2][2];
4252};
4253
4254#define MPN_HGCD_MATRIX_INIT_ITCH(n) (4 * ((n+1)/2 + 1))
4255
4256#define mpn_hgcd_matrix_init __MPN (hgcd_matrix_init)
4257__GMP_DECLSPEC void mpn_hgcd_matrix_init (struct hgcd_matrix *, mp_size_t, mp_ptr);
4258
4259#define mpn_hgcd_matrix_update_q __MPN (hgcd_matrix_update_q)
4260__GMP_DECLSPEC void mpn_hgcd_matrix_update_q (struct hgcd_matrix *, mp_srcptr, mp_size_t, unsigned, mp_ptr);
4261
4262#define mpn_hgcd_matrix_mul_1 __MPN (hgcd_matrix_mul_1)
4263__GMP_DECLSPEC void mpn_hgcd_matrix_mul_1 (struct hgcd_matrix *, const struct hgcd_matrix1 *, mp_ptr);
4264
4265#define mpn_hgcd_matrix_mul __MPN (hgcd_matrix_mul)
4266__GMP_DECLSPEC void mpn_hgcd_matrix_mul (struct hgcd_matrix *, const struct hgcd_matrix *, mp_ptr);
4267
4268#define mpn_hgcd_matrix_adjust __MPN (hgcd_matrix_adjust)
4269__GMP_DECLSPEC mp_size_t mpn_hgcd_matrix_adjust (const struct hgcd_matrix *, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
4270
4271#define mpn_hgcd_step __MPN(hgcd_step)
4272__GMP_DECLSPEC mp_size_t mpn_hgcd_step (mp_size_t, mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
4273
4274#define mpn_hgcd_reduce __MPN(hgcd_reduce)
4275__GMP_DECLSPEC mp_size_t mpn_hgcd_reduce (struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr);
4276
4277#define mpn_hgcd_reduce_itch __MPN(hgcd_reduce_itch)
4278__GMP_DECLSPEC mp_size_t mpn_hgcd_reduce_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST;
4279
4280#define mpn_hgcd_itch __MPN (hgcd_itch)
4281__GMP_DECLSPEC mp_size_t mpn_hgcd_itch (mp_size_t) ATTRIBUTE_CONST;
4282
4283#define mpn_hgcd __MPN (hgcd)
4284__GMP_DECLSPEC mp_size_t mpn_hgcd (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
4285
4286#define mpn_hgcd_appr_itch __MPN (hgcd_appr_itch)
4287__GMP_DECLSPEC mp_size_t mpn_hgcd_appr_itch (mp_size_t) ATTRIBUTE_CONST;
4288
4289#define mpn_hgcd_appr __MPN (hgcd_appr)
4290__GMP_DECLSPEC int mpn_hgcd_appr (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
4291
4292#define mpn_hgcd_jacobi __MPN (hgcd_jacobi)
4293__GMP_DECLSPEC mp_size_t mpn_hgcd_jacobi (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, unsigned *, mp_ptr);
4294
4295typedef void gcd_subdiv_step_hook(void *, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int);
4296
4297/* Needs storage for the quotient */
4298#define MPN_GCD_SUBDIV_STEP_ITCH(n) (n)
4299
4300#define mpn_gcd_subdiv_step __MPN(gcd_subdiv_step)
4301__GMP_DECLSPEC mp_size_t mpn_gcd_subdiv_step (mp_ptr, mp_ptr, mp_size_t, mp_size_t, gcd_subdiv_step_hook *, void *, mp_ptr);
4302
4303struct gcdext_ctx
4304{
4305  /* Result parameters. */
4306  mp_ptr gp;
4307  mp_size_t gn;
4308  mp_ptr up;
4309  mp_size_t *usize;
4310
4311  /* Cofactors updated in each step. */
4312  mp_size_t un;
4313  mp_ptr u0, u1, tp;
4314};
4315
4316#define mpn_gcdext_hook __MPN (gcdext_hook)
4317gcd_subdiv_step_hook mpn_gcdext_hook;
4318
4319#define MPN_GCDEXT_LEHMER_N_ITCH(n) (4*(n) + 3)
4320
4321#define mpn_gcdext_lehmer_n __MPN(gcdext_lehmer_n)
4322__GMP_DECLSPEC mp_size_t mpn_gcdext_lehmer_n (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
4323
4324/* 4*(an + 1) + 4*(bn + 1) + an */
4325#define MPN_GCDEXT_LEHMER_ITCH(an, bn) (5*(an) + 4*(bn) + 8)
4326
4327#ifndef HGCD_THRESHOLD
4328#define HGCD_THRESHOLD 400
4329#endif
4330
4331#ifndef HGCD_APPR_THRESHOLD
4332#define HGCD_APPR_THRESHOLD 400
4333#endif
4334
4335#ifndef HGCD_REDUCE_THRESHOLD
4336#define HGCD_REDUCE_THRESHOLD 1000
4337#endif
4338
4339#ifndef GCD_DC_THRESHOLD
4340#define GCD_DC_THRESHOLD 1000
4341#endif
4342
4343#ifndef GCDEXT_DC_THRESHOLD
4344#define GCDEXT_DC_THRESHOLD 600
4345#endif
4346
4347/* Definitions for mpn_set_str and mpn_get_str */
4348struct powers
4349{
4350  mp_ptr p;			/* actual power value */
4351  mp_size_t n;			/* # of limbs at p */
4352  mp_size_t shift;		/* weight of lowest limb, in limb base B */
4353  size_t digits_in_base;	/* number of corresponding digits */
4354  int base;
4355};
4356typedef struct powers powers_t;
4357#define mpn_str_powtab_alloc(n) ((n) + 2 * GMP_LIMB_BITS) /* FIXME: This can perhaps be trimmed */
4358#define mpn_dc_set_str_itch(n) ((n) + GMP_LIMB_BITS)
4359#define mpn_dc_get_str_itch(n) ((n) + GMP_LIMB_BITS)
4360
4361#define mpn_compute_powtab __MPN(compute_powtab)
4362__GMP_DECLSPEC size_t mpn_compute_powtab (powers_t *, mp_ptr, mp_size_t, int);
4363#define   mpn_dc_set_str __MPN(dc_set_str)
4364__GMP_DECLSPEC mp_size_t mpn_dc_set_str (mp_ptr, const unsigned char *, size_t, const powers_t *, mp_ptr);
4365#define   mpn_bc_set_str __MPN(bc_set_str)
4366__GMP_DECLSPEC mp_size_t mpn_bc_set_str (mp_ptr, const unsigned char *, size_t, int);
4367
4368
4369/* __GMPF_BITS_TO_PREC applies a minimum 53 bits, rounds upwards to a whole
4370   limb and adds an extra limb.  __GMPF_PREC_TO_BITS drops that extra limb,
4371   hence giving back the user's size in bits rounded up.  Notice that
4372   converting prec->bits->prec gives an unchanged value.  */
4373#define __GMPF_BITS_TO_PREC(n)						\
4374  ((mp_size_t) ((__GMP_MAX (53, n) + 2 * GMP_NUMB_BITS - 1) / GMP_NUMB_BITS))
4375#define __GMPF_PREC_TO_BITS(n) \
4376  ((mp_bitcnt_t) (n) * GMP_NUMB_BITS - GMP_NUMB_BITS)
4377
4378__GMP_DECLSPEC extern mp_size_t __gmp_default_fp_limb_precision;
4379
4380/* Compute the number of base-b digits corresponding to nlimbs limbs, rounding
4381   down.  */
4382#define DIGITS_IN_BASE_PER_LIMB(res, nlimbs, b)				\
4383  do {									\
4384    mp_limb_t _ph, _dummy;						\
4385    umul_ppmm (_ph, _dummy,						\
4386	       mp_bases[b].logb2, GMP_NUMB_BITS * (mp_limb_t) (nlimbs));\
4387    res = _ph;								\
4388  } while (0)
4389
4390/* Compute the number of limbs corresponding to ndigits base-b digits, rounding
4391   up.  */
4392#define LIMBS_PER_DIGIT_IN_BASE(res, ndigits, b)			\
4393  do {									\
4394    mp_limb_t _ph, _dummy;						\
4395    umul_ppmm (_ph, _dummy, mp_bases[b].log2b, (mp_limb_t) (ndigits));	\
4396    res = 8 * _ph / GMP_NUMB_BITS + 2;					\
4397  } while (0)
4398
4399
4400/* Set n to the number of significant digits an mpf of the given _mp_prec
4401   field, in the given base.  This is a rounded up value, designed to ensure
4402   there's enough digits to reproduce all the guaranteed part of the value.
4403
4404   There are prec many limbs, but the high might be only "1" so forget it
4405   and just count prec-1 limbs into chars.  +1 rounds that upwards, and a
4406   further +1 is because the limbs usually won't fall on digit boundaries.
4407
4408   FIXME: If base is a power of 2 and the bits per digit divides
4409   GMP_LIMB_BITS then the +2 is unnecessary.  This happens always for
4410   base==2, and in base==16 with the current 32 or 64 bit limb sizes. */
4411
4412#define MPF_SIGNIFICANT_DIGITS(n, base, prec)				\
4413  do {									\
4414    size_t rawn;							\
4415    ASSERT (base >= 2 && base < numberof (mp_bases));			\
4416    DIGITS_IN_BASE_PER_LIMB (rawn, (prec) - 1, base);			\
4417    n = rawn + 2;							\
4418  } while (0)
4419
4420
4421/* Decimal point string, from the current C locale.  Needs <langinfo.h> for
4422   nl_langinfo and constants, preferably with _GNU_SOURCE defined to get
4423   DECIMAL_POINT from glibc, and needs <locale.h> for localeconv, each under
4424   their respective #if HAVE_FOO_H.
4425
4426   GLIBC recommends nl_langinfo because getting only one facet can be
4427   faster, apparently. */
4428
4429/* DECIMAL_POINT seems to need _GNU_SOURCE defined to get it from glibc. */
4430#if HAVE_NL_LANGINFO && defined (DECIMAL_POINT)
4431#define GMP_DECIMAL_POINT  (nl_langinfo (DECIMAL_POINT))
4432#endif
4433/* RADIXCHAR is deprecated, still in unix98 or some such. */
4434#if HAVE_NL_LANGINFO && defined (RADIXCHAR) && ! defined (GMP_DECIMAL_POINT)
4435#define GMP_DECIMAL_POINT  (nl_langinfo (RADIXCHAR))
4436#endif
4437/* localeconv is slower since it returns all locale stuff */
4438#if HAVE_LOCALECONV && ! defined (GMP_DECIMAL_POINT)
4439#define GMP_DECIMAL_POINT  (localeconv()->decimal_point)
4440#endif
4441#if ! defined (GMP_DECIMAL_POINT)
4442#define GMP_DECIMAL_POINT  (".")
4443#endif
4444
4445
4446#define DOPRNT_CONV_FIXED        1
4447#define DOPRNT_CONV_SCIENTIFIC   2
4448#define DOPRNT_CONV_GENERAL      3
4449
4450#define DOPRNT_JUSTIFY_NONE      0
4451#define DOPRNT_JUSTIFY_LEFT      1
4452#define DOPRNT_JUSTIFY_RIGHT     2
4453#define DOPRNT_JUSTIFY_INTERNAL  3
4454
4455#define DOPRNT_SHOWBASE_YES      1
4456#define DOPRNT_SHOWBASE_NO       2
4457#define DOPRNT_SHOWBASE_NONZERO  3
4458
4459struct doprnt_params_t {
4460  int         base;          /* negative for upper case */
4461  int         conv;          /* choices above */
4462  const char  *expfmt;       /* exponent format */
4463  int         exptimes4;     /* exponent multiply by 4 */
4464  char        fill;          /* character */
4465  int         justify;       /* choices above */
4466  int         prec;          /* prec field, or -1 for all digits */
4467  int         showbase;      /* choices above */
4468  int         showpoint;     /* if radix point always shown */
4469  int         showtrailing;  /* if trailing zeros wanted */
4470  char        sign;          /* '+', ' ', or '\0' */
4471  int         width;         /* width field */
4472};
4473
4474#if _GMP_H_HAVE_VA_LIST
4475
4476typedef int (*doprnt_format_t) (void *, const char *, va_list);
4477typedef int (*doprnt_memory_t) (void *, const char *, size_t);
4478typedef int (*doprnt_reps_t)   (void *, int, int);
4479typedef int (*doprnt_final_t)  (void *);
4480
4481struct doprnt_funs_t {
4482  doprnt_format_t  format;
4483  doprnt_memory_t  memory;
4484  doprnt_reps_t    reps;
4485  doprnt_final_t   final;   /* NULL if not required */
4486};
4487
4488extern const struct doprnt_funs_t  __gmp_fprintf_funs;
4489extern const struct doprnt_funs_t  __gmp_sprintf_funs;
4490extern const struct doprnt_funs_t  __gmp_snprintf_funs;
4491extern const struct doprnt_funs_t  __gmp_obstack_printf_funs;
4492extern const struct doprnt_funs_t  __gmp_ostream_funs;
4493
4494/* "buf" is a __gmp_allocate_func block of "alloc" many bytes.  The first
4495   "size" of these have been written.  "alloc > size" is maintained, so
4496   there's room to store a '\0' at the end.  "result" is where the
4497   application wants the final block pointer.  */
4498struct gmp_asprintf_t {
4499  char    **result;
4500  char    *buf;
4501  size_t  size;
4502  size_t  alloc;
4503};
4504
4505#define GMP_ASPRINTF_T_INIT(d, output)					\
4506  do {									\
4507    (d).result = (output);						\
4508    (d).alloc = 256;							\
4509    (d).buf = (char *) (*__gmp_allocate_func) ((d).alloc);		\
4510    (d).size = 0;							\
4511  } while (0)
4512
4513/* If a realloc is necessary, use twice the size actually required, so as to
4514   avoid repeated small reallocs.  */
4515#define GMP_ASPRINTF_T_NEED(d, n)					\
4516  do {									\
4517    size_t  alloc, newsize, newalloc;					\
4518    ASSERT ((d)->alloc >= (d)->size + 1);				\
4519									\
4520    alloc = (d)->alloc;							\
4521    newsize = (d)->size + (n);						\
4522    if (alloc <= newsize)						\
4523      {									\
4524	newalloc = 2*newsize;						\
4525	(d)->alloc = newalloc;						\
4526	(d)->buf = __GMP_REALLOCATE_FUNC_TYPE ((d)->buf,		\
4527					       alloc, newalloc, char);	\
4528      }									\
4529  } while (0)
4530
4531__GMP_DECLSPEC int __gmp_asprintf_memory (struct gmp_asprintf_t *, const char *, size_t);
4532__GMP_DECLSPEC int __gmp_asprintf_reps (struct gmp_asprintf_t *, int, int);
4533__GMP_DECLSPEC int __gmp_asprintf_final (struct gmp_asprintf_t *);
4534
4535/* buf is where to write the next output, and size is how much space is left
4536   there.  If the application passed size==0 then that's what we'll have
4537   here, and nothing at all should be written.  */
4538struct gmp_snprintf_t {
4539  char    *buf;
4540  size_t  size;
4541};
4542
4543/* Add the bytes printed by the call to the total retval, or bail out on an
4544   error.  */
4545#define DOPRNT_ACCUMULATE(call)						\
4546  do {									\
4547    int  __ret;								\
4548    __ret = call;							\
4549    if (__ret == -1)							\
4550      goto error;							\
4551    retval += __ret;							\
4552  } while (0)
4553#define DOPRNT_ACCUMULATE_FUN(fun, params)				\
4554  do {									\
4555    ASSERT ((fun) != NULL);						\
4556    DOPRNT_ACCUMULATE ((*(fun)) params);				\
4557  } while (0)
4558
4559#define DOPRNT_FORMAT(fmt, ap)						\
4560  DOPRNT_ACCUMULATE_FUN (funs->format, (data, fmt, ap))
4561#define DOPRNT_MEMORY(ptr, len)						\
4562  DOPRNT_ACCUMULATE_FUN (funs->memory, (data, ptr, len))
4563#define DOPRNT_REPS(c, n)						\
4564  DOPRNT_ACCUMULATE_FUN (funs->reps, (data, c, n))
4565
4566#define DOPRNT_STRING(str)      DOPRNT_MEMORY (str, strlen (str))
4567
4568#define DOPRNT_REPS_MAYBE(c, n)						\
4569  do {									\
4570    if ((n) != 0)							\
4571      DOPRNT_REPS (c, n);						\
4572  } while (0)
4573#define DOPRNT_MEMORY_MAYBE(ptr, len)					\
4574  do {									\
4575    if ((len) != 0)							\
4576      DOPRNT_MEMORY (ptr, len);						\
4577  } while (0)
4578
4579__GMP_DECLSPEC int __gmp_doprnt (const struct doprnt_funs_t *, void *, const char *, va_list);
4580__GMP_DECLSPEC int __gmp_doprnt_integer (const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *);
4581
4582#define __gmp_doprnt_mpf __gmp_doprnt_mpf2
4583__GMP_DECLSPEC int __gmp_doprnt_mpf (const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *, mpf_srcptr);
4584
4585__GMP_DECLSPEC int __gmp_replacement_vsnprintf (char *, size_t, const char *, va_list);
4586#endif /* _GMP_H_HAVE_VA_LIST */
4587
4588
4589typedef int (*gmp_doscan_scan_t)  (void *, const char *, ...);
4590typedef void *(*gmp_doscan_step_t) (void *, int);
4591typedef int (*gmp_doscan_get_t)   (void *);
4592typedef int (*gmp_doscan_unget_t) (int, void *);
4593
4594struct gmp_doscan_funs_t {
4595  gmp_doscan_scan_t   scan;
4596  gmp_doscan_step_t   step;
4597  gmp_doscan_get_t    get;
4598  gmp_doscan_unget_t  unget;
4599};
4600extern const struct gmp_doscan_funs_t  __gmp_fscanf_funs;
4601extern const struct gmp_doscan_funs_t  __gmp_sscanf_funs;
4602
4603#if _GMP_H_HAVE_VA_LIST
4604__GMP_DECLSPEC int __gmp_doscan (const struct gmp_doscan_funs_t *, void *, const char *, va_list);
4605#endif
4606
4607
4608/* For testing and debugging.  */
4609#define MPZ_CHECK_FORMAT(z)						\
4610  do {									\
4611    ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0);		\
4612    ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z));				\
4613    ASSERT_ALWAYS_MPN (PTR(z), ABSIZ(z));				\
4614  } while (0)
4615
4616#define MPQ_CHECK_FORMAT(q)						\
4617  do {									\
4618    MPZ_CHECK_FORMAT (mpq_numref (q));					\
4619    MPZ_CHECK_FORMAT (mpq_denref (q));					\
4620    ASSERT_ALWAYS (SIZ(mpq_denref(q)) >= 1);				\
4621									\
4622    if (SIZ(mpq_numref(q)) == 0)					\
4623      {									\
4624	/* should have zero as 0/1 */					\
4625	ASSERT_ALWAYS (SIZ(mpq_denref(q)) == 1				\
4626		       && PTR(mpq_denref(q))[0] == 1);			\
4627      }									\
4628    else								\
4629      {									\
4630	/* should have no common factors */				\
4631	mpz_t  g;							\
4632	mpz_init (g);							\
4633	mpz_gcd (g, mpq_numref(q), mpq_denref(q));			\
4634	ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0);				\
4635	mpz_clear (g);							\
4636      }									\
4637  } while (0)
4638
4639#define MPF_CHECK_FORMAT(f)						\
4640  do {									\
4641    ASSERT_ALWAYS (PREC(f) >= __GMPF_BITS_TO_PREC(53));			\
4642    ASSERT_ALWAYS (ABSIZ(f) <= PREC(f)+1);				\
4643    if (SIZ(f) == 0)							\
4644      ASSERT_ALWAYS (EXP(f) == 0);					\
4645    if (SIZ(f) != 0)							\
4646      ASSERT_ALWAYS (PTR(f)[ABSIZ(f) - 1] != 0);			\
4647  } while (0)
4648
4649
4650/* Enhancement: The "mod" and "gcd_1" functions below could have
4651   __GMP_ATTRIBUTE_PURE, but currently (gcc 3.3) that's not supported on
4652   function pointers, only actual functions.  It probably doesn't make much
4653   difference to the gmp code, since hopefully we arrange calls so there's
4654   no great need for the compiler to move things around.  */
4655
4656#if WANT_FAT_BINARY && (HAVE_HOST_CPU_FAMILY_x86 || HAVE_HOST_CPU_FAMILY_x86_64)
4657/* NOTE: The function pointers in this struct are also in CPUVEC_FUNCS_LIST
4658   in mpn/x86/x86-defs.m4 and mpn/x86_64/x86_64-defs.m4.  Be sure to update
4659   those when changing here.  */
4660struct cpuvec_t {
4661  DECL_add_n           ((*add_n));
4662  DECL_addlsh1_n       ((*addlsh1_n));
4663  DECL_addlsh2_n       ((*addlsh2_n));
4664  DECL_addmul_1        ((*addmul_1));
4665  DECL_addmul_2        ((*addmul_2));
4666  DECL_bdiv_dbm1c      ((*bdiv_dbm1c));
4667  DECL_cnd_add_n       ((*cnd_add_n));
4668  DECL_cnd_sub_n       ((*cnd_sub_n));
4669  DECL_com             ((*com));
4670  DECL_copyd           ((*copyd));
4671  DECL_copyi           ((*copyi));
4672  DECL_divexact_1      ((*divexact_1));
4673  DECL_divrem_1        ((*divrem_1));
4674  DECL_gcd_11          ((*gcd_11));
4675  DECL_lshift          ((*lshift));
4676  DECL_lshiftc         ((*lshiftc));
4677  DECL_mod_1           ((*mod_1));
4678  DECL_mod_1_1p        ((*mod_1_1p));
4679  DECL_mod_1_1p_cps    ((*mod_1_1p_cps));
4680  DECL_mod_1s_2p       ((*mod_1s_2p));
4681  DECL_mod_1s_2p_cps   ((*mod_1s_2p_cps));
4682  DECL_mod_1s_4p       ((*mod_1s_4p));
4683  DECL_mod_1s_4p_cps   ((*mod_1s_4p_cps));
4684  DECL_mod_34lsub1     ((*mod_34lsub1));
4685  DECL_modexact_1c_odd ((*modexact_1c_odd));
4686  DECL_mul_1           ((*mul_1));
4687  DECL_mul_basecase    ((*mul_basecase));
4688  DECL_mullo_basecase  ((*mullo_basecase));
4689  DECL_preinv_divrem_1 ((*preinv_divrem_1));
4690  DECL_preinv_mod_1    ((*preinv_mod_1));
4691  DECL_redc_1          ((*redc_1));
4692  DECL_redc_2          ((*redc_2));
4693  DECL_rshift          ((*rshift));
4694  DECL_sqr_basecase    ((*sqr_basecase));
4695  DECL_sub_n           ((*sub_n));
4696  DECL_sublsh1_n       ((*sublsh1_n));
4697  DECL_submul_1        ((*submul_1));
4698  mp_size_t            mul_toom22_threshold;
4699  mp_size_t            mul_toom33_threshold;
4700  mp_size_t            sqr_toom2_threshold;
4701  mp_size_t            sqr_toom3_threshold;
4702  mp_size_t            bmod_1_to_mod_1_threshold;
4703};
4704__GMP_DECLSPEC extern struct cpuvec_t __gmpn_cpuvec;
4705__GMP_DECLSPEC extern int __gmpn_cpuvec_initialized;
4706#endif /* x86 fat binary */
4707
4708__GMP_DECLSPEC void __gmpn_cpuvec_init (void);
4709
4710/* Get a threshold "field" from __gmpn_cpuvec, running __gmpn_cpuvec_init()
4711   if that hasn't yet been done (to establish the right values).  */
4712#define CPUVEC_THRESHOLD(field)						      \
4713  ((LIKELY (__gmpn_cpuvec_initialized) ? 0 : (__gmpn_cpuvec_init (), 0)),     \
4714   __gmpn_cpuvec.field)
4715
4716
4717#if HAVE_NATIVE_mpn_add_nc
4718#define mpn_add_nc __MPN(add_nc)
4719__GMP_DECLSPEC mp_limb_t mpn_add_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
4720#else
4721static inline
4722mp_limb_t
4723mpn_add_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci)
4724{
4725  mp_limb_t co;
4726  co = mpn_add_n (rp, up, vp, n);
4727  co += mpn_add_1 (rp, rp, n, ci);
4728  return co;
4729}
4730#endif
4731
4732#if HAVE_NATIVE_mpn_sub_nc
4733#define mpn_sub_nc __MPN(sub_nc)
4734__GMP_DECLSPEC mp_limb_t mpn_sub_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
4735#else
4736static inline mp_limb_t
4737mpn_sub_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci)
4738{
4739  mp_limb_t co;
4740  co = mpn_sub_n (rp, up, vp, n);
4741  co += mpn_sub_1 (rp, rp, n, ci);
4742  return co;
4743}
4744#endif
4745
4746#if TUNE_PROGRAM_BUILD
4747/* Some extras wanted when recompiling some .c files for use by the tune
4748   program.  Not part of a normal build.
4749
4750   It's necessary to keep these thresholds as #defines (just to an
4751   identically named variable), since various defaults are established based
4752   on #ifdef in the .c files.  For some this is not so (the defaults are
4753   instead established above), but all are done this way for consistency. */
4754
4755#undef	MUL_TOOM22_THRESHOLD
4756#define MUL_TOOM22_THRESHOLD		mul_toom22_threshold
4757extern mp_size_t			mul_toom22_threshold;
4758
4759#undef	MUL_TOOM33_THRESHOLD
4760#define MUL_TOOM33_THRESHOLD		mul_toom33_threshold
4761extern mp_size_t			mul_toom33_threshold;
4762
4763#undef	MUL_TOOM44_THRESHOLD
4764#define MUL_TOOM44_THRESHOLD		mul_toom44_threshold
4765extern mp_size_t			mul_toom44_threshold;
4766
4767#undef	MUL_TOOM6H_THRESHOLD
4768#define MUL_TOOM6H_THRESHOLD		mul_toom6h_threshold
4769extern mp_size_t			mul_toom6h_threshold;
4770
4771#undef	MUL_TOOM8H_THRESHOLD
4772#define MUL_TOOM8H_THRESHOLD		mul_toom8h_threshold
4773extern mp_size_t			mul_toom8h_threshold;
4774
4775#undef	MUL_TOOM32_TO_TOOM43_THRESHOLD
4776#define MUL_TOOM32_TO_TOOM43_THRESHOLD	mul_toom32_to_toom43_threshold
4777extern mp_size_t			mul_toom32_to_toom43_threshold;
4778
4779#undef	MUL_TOOM32_TO_TOOM53_THRESHOLD
4780#define MUL_TOOM32_TO_TOOM53_THRESHOLD	mul_toom32_to_toom53_threshold
4781extern mp_size_t			mul_toom32_to_toom53_threshold;
4782
4783#undef	MUL_TOOM42_TO_TOOM53_THRESHOLD
4784#define MUL_TOOM42_TO_TOOM53_THRESHOLD	mul_toom42_to_toom53_threshold
4785extern mp_size_t			mul_toom42_to_toom53_threshold;
4786
4787#undef	MUL_TOOM42_TO_TOOM63_THRESHOLD
4788#define MUL_TOOM42_TO_TOOM63_THRESHOLD	mul_toom42_to_toom63_threshold
4789extern mp_size_t			mul_toom42_to_toom63_threshold;
4790
4791#undef  MUL_TOOM43_TO_TOOM54_THRESHOLD
4792#define MUL_TOOM43_TO_TOOM54_THRESHOLD	mul_toom43_to_toom54_threshold;
4793extern mp_size_t			mul_toom43_to_toom54_threshold;
4794
4795#undef	MUL_FFT_THRESHOLD
4796#define MUL_FFT_THRESHOLD		mul_fft_threshold
4797extern mp_size_t			mul_fft_threshold;
4798
4799#undef	MUL_FFT_MODF_THRESHOLD
4800#define MUL_FFT_MODF_THRESHOLD		mul_fft_modf_threshold
4801extern mp_size_t			mul_fft_modf_threshold;
4802
4803#undef	MUL_FFT_TABLE
4804#define MUL_FFT_TABLE			{ 0 }
4805
4806#undef	MUL_FFT_TABLE3
4807#define MUL_FFT_TABLE3			{ {0,0} }
4808
4809/* A native mpn_sqr_basecase is not tuned and SQR_BASECASE_THRESHOLD should
4810   remain as zero (always use it). */
4811#if ! HAVE_NATIVE_mpn_sqr_basecase
4812#undef	SQR_BASECASE_THRESHOLD
4813#define SQR_BASECASE_THRESHOLD		sqr_basecase_threshold
4814extern mp_size_t			sqr_basecase_threshold;
4815#endif
4816
4817#if TUNE_PROGRAM_BUILD_SQR
4818#undef	SQR_TOOM2_THRESHOLD
4819#define SQR_TOOM2_THRESHOLD		SQR_TOOM2_MAX_GENERIC
4820#else
4821#undef	SQR_TOOM2_THRESHOLD
4822#define SQR_TOOM2_THRESHOLD		sqr_toom2_threshold
4823extern mp_size_t			sqr_toom2_threshold;
4824#endif
4825
4826#undef	SQR_TOOM3_THRESHOLD
4827#define SQR_TOOM3_THRESHOLD		sqr_toom3_threshold
4828extern mp_size_t			sqr_toom3_threshold;
4829
4830#undef	SQR_TOOM4_THRESHOLD
4831#define SQR_TOOM4_THRESHOLD		sqr_toom4_threshold
4832extern mp_size_t			sqr_toom4_threshold;
4833
4834#undef	SQR_TOOM6_THRESHOLD
4835#define SQR_TOOM6_THRESHOLD		sqr_toom6_threshold
4836extern mp_size_t			sqr_toom6_threshold;
4837
4838#undef	SQR_TOOM8_THRESHOLD
4839#define SQR_TOOM8_THRESHOLD		sqr_toom8_threshold
4840extern mp_size_t			sqr_toom8_threshold;
4841
4842#undef  SQR_FFT_THRESHOLD
4843#define SQR_FFT_THRESHOLD		sqr_fft_threshold
4844extern mp_size_t			sqr_fft_threshold;
4845
4846#undef  SQR_FFT_MODF_THRESHOLD
4847#define SQR_FFT_MODF_THRESHOLD		sqr_fft_modf_threshold
4848extern mp_size_t			sqr_fft_modf_threshold;
4849
4850#undef	SQR_FFT_TABLE
4851#define SQR_FFT_TABLE			{ 0 }
4852
4853#undef	SQR_FFT_TABLE3
4854#define SQR_FFT_TABLE3			{ {0,0} }
4855
4856#undef	MULLO_BASECASE_THRESHOLD
4857#define MULLO_BASECASE_THRESHOLD	mullo_basecase_threshold
4858extern mp_size_t			mullo_basecase_threshold;
4859
4860#undef	MULLO_DC_THRESHOLD
4861#define MULLO_DC_THRESHOLD		mullo_dc_threshold
4862extern mp_size_t			mullo_dc_threshold;
4863
4864#undef	MULLO_MUL_N_THRESHOLD
4865#define MULLO_MUL_N_THRESHOLD		mullo_mul_n_threshold
4866extern mp_size_t			mullo_mul_n_threshold;
4867
4868#undef	SQRLO_BASECASE_THRESHOLD
4869#define SQRLO_BASECASE_THRESHOLD	sqrlo_basecase_threshold
4870extern mp_size_t			sqrlo_basecase_threshold;
4871
4872#undef	SQRLO_DC_THRESHOLD
4873#define SQRLO_DC_THRESHOLD		sqrlo_dc_threshold
4874extern mp_size_t			sqrlo_dc_threshold;
4875
4876#undef	SQRLO_SQR_THRESHOLD
4877#define SQRLO_SQR_THRESHOLD		sqrlo_sqr_threshold
4878extern mp_size_t			sqrlo_sqr_threshold;
4879
4880#undef	MULMID_TOOM42_THRESHOLD
4881#define MULMID_TOOM42_THRESHOLD		mulmid_toom42_threshold
4882extern mp_size_t			mulmid_toom42_threshold;
4883
4884#undef	DIV_QR_2_PI2_THRESHOLD
4885#define DIV_QR_2_PI2_THRESHOLD		div_qr_2_pi2_threshold
4886extern mp_size_t			div_qr_2_pi2_threshold;
4887
4888#undef	DC_DIV_QR_THRESHOLD
4889#define DC_DIV_QR_THRESHOLD		dc_div_qr_threshold
4890extern mp_size_t			dc_div_qr_threshold;
4891
4892#undef	DC_DIVAPPR_Q_THRESHOLD
4893#define DC_DIVAPPR_Q_THRESHOLD		dc_divappr_q_threshold
4894extern mp_size_t			dc_divappr_q_threshold;
4895
4896#undef	DC_BDIV_Q_THRESHOLD
4897#define DC_BDIV_Q_THRESHOLD		dc_bdiv_q_threshold
4898extern mp_size_t			dc_bdiv_q_threshold;
4899
4900#undef	DC_BDIV_QR_THRESHOLD
4901#define DC_BDIV_QR_THRESHOLD		dc_bdiv_qr_threshold
4902extern mp_size_t			dc_bdiv_qr_threshold;
4903
4904#undef	MU_DIV_QR_THRESHOLD
4905#define MU_DIV_QR_THRESHOLD		mu_div_qr_threshold
4906extern mp_size_t			mu_div_qr_threshold;
4907
4908#undef	MU_DIVAPPR_Q_THRESHOLD
4909#define MU_DIVAPPR_Q_THRESHOLD		mu_divappr_q_threshold
4910extern mp_size_t			mu_divappr_q_threshold;
4911
4912#undef	MUPI_DIV_QR_THRESHOLD
4913#define MUPI_DIV_QR_THRESHOLD		mupi_div_qr_threshold
4914extern mp_size_t			mupi_div_qr_threshold;
4915
4916#undef	MU_BDIV_QR_THRESHOLD
4917#define MU_BDIV_QR_THRESHOLD		mu_bdiv_qr_threshold
4918extern mp_size_t			mu_bdiv_qr_threshold;
4919
4920#undef	MU_BDIV_Q_THRESHOLD
4921#define MU_BDIV_Q_THRESHOLD		mu_bdiv_q_threshold
4922extern mp_size_t			mu_bdiv_q_threshold;
4923
4924#undef	INV_MULMOD_BNM1_THRESHOLD
4925#define INV_MULMOD_BNM1_THRESHOLD	inv_mulmod_bnm1_threshold
4926extern mp_size_t			inv_mulmod_bnm1_threshold;
4927
4928#undef	INV_NEWTON_THRESHOLD
4929#define INV_NEWTON_THRESHOLD		inv_newton_threshold
4930extern mp_size_t			inv_newton_threshold;
4931
4932#undef	INV_APPR_THRESHOLD
4933#define INV_APPR_THRESHOLD		inv_appr_threshold
4934extern mp_size_t			inv_appr_threshold;
4935
4936#undef	BINV_NEWTON_THRESHOLD
4937#define BINV_NEWTON_THRESHOLD		binv_newton_threshold
4938extern mp_size_t			binv_newton_threshold;
4939
4940#undef	REDC_1_TO_REDC_2_THRESHOLD
4941#define REDC_1_TO_REDC_2_THRESHOLD	redc_1_to_redc_2_threshold
4942extern mp_size_t			redc_1_to_redc_2_threshold;
4943
4944#undef	REDC_2_TO_REDC_N_THRESHOLD
4945#define REDC_2_TO_REDC_N_THRESHOLD	redc_2_to_redc_n_threshold
4946extern mp_size_t			redc_2_to_redc_n_threshold;
4947
4948#undef	REDC_1_TO_REDC_N_THRESHOLD
4949#define REDC_1_TO_REDC_N_THRESHOLD	redc_1_to_redc_n_threshold
4950extern mp_size_t			redc_1_to_redc_n_threshold;
4951
4952#undef	MATRIX22_STRASSEN_THRESHOLD
4953#define MATRIX22_STRASSEN_THRESHOLD	matrix22_strassen_threshold
4954extern mp_size_t			matrix22_strassen_threshold;
4955
4956typedef int hgcd2_func_t (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t,
4957			  struct hgcd_matrix1 *);
4958extern hgcd2_func_t *hgcd2_func;
4959
4960#undef	HGCD_THRESHOLD
4961#define HGCD_THRESHOLD			hgcd_threshold
4962extern mp_size_t			hgcd_threshold;
4963
4964#undef	HGCD_APPR_THRESHOLD
4965#define HGCD_APPR_THRESHOLD		hgcd_appr_threshold
4966extern mp_size_t			hgcd_appr_threshold;
4967
4968#undef	HGCD_REDUCE_THRESHOLD
4969#define HGCD_REDUCE_THRESHOLD		hgcd_reduce_threshold
4970extern mp_size_t			hgcd_reduce_threshold;
4971
4972#undef	GCD_DC_THRESHOLD
4973#define GCD_DC_THRESHOLD		gcd_dc_threshold
4974extern mp_size_t			gcd_dc_threshold;
4975
4976#undef  GCDEXT_DC_THRESHOLD
4977#define GCDEXT_DC_THRESHOLD		gcdext_dc_threshold
4978extern mp_size_t			gcdext_dc_threshold;
4979
4980#undef  DIV_QR_1N_PI1_METHOD
4981#define DIV_QR_1N_PI1_METHOD		div_qr_1n_pi1_method
4982extern int				div_qr_1n_pi1_method;
4983
4984#undef  DIV_QR_1_NORM_THRESHOLD
4985#define DIV_QR_1_NORM_THRESHOLD		div_qr_1_norm_threshold
4986extern mp_size_t			div_qr_1_norm_threshold;
4987
4988#undef  DIV_QR_1_UNNORM_THRESHOLD
4989#define DIV_QR_1_UNNORM_THRESHOLD	div_qr_1_unnorm_threshold
4990extern mp_size_t			div_qr_1_unnorm_threshold;
4991
4992#undef  DIVREM_1_NORM_THRESHOLD
4993#define DIVREM_1_NORM_THRESHOLD		divrem_1_norm_threshold
4994extern mp_size_t			divrem_1_norm_threshold;
4995
4996#undef  DIVREM_1_UNNORM_THRESHOLD
4997#define DIVREM_1_UNNORM_THRESHOLD	divrem_1_unnorm_threshold
4998extern mp_size_t			divrem_1_unnorm_threshold;
4999
5000#undef	MOD_1_NORM_THRESHOLD
5001#define MOD_1_NORM_THRESHOLD		mod_1_norm_threshold
5002extern mp_size_t			mod_1_norm_threshold;
5003
5004#undef	MOD_1_UNNORM_THRESHOLD
5005#define MOD_1_UNNORM_THRESHOLD		mod_1_unnorm_threshold
5006extern mp_size_t			mod_1_unnorm_threshold;
5007
5008#undef  MOD_1_1P_METHOD
5009#define MOD_1_1P_METHOD			mod_1_1p_method
5010extern int				mod_1_1p_method;
5011
5012#undef	MOD_1N_TO_MOD_1_1_THRESHOLD
5013#define MOD_1N_TO_MOD_1_1_THRESHOLD	mod_1n_to_mod_1_1_threshold
5014extern mp_size_t			mod_1n_to_mod_1_1_threshold;
5015
5016#undef	MOD_1U_TO_MOD_1_1_THRESHOLD
5017#define MOD_1U_TO_MOD_1_1_THRESHOLD	mod_1u_to_mod_1_1_threshold
5018extern mp_size_t			mod_1u_to_mod_1_1_threshold;
5019
5020#undef	MOD_1_1_TO_MOD_1_2_THRESHOLD
5021#define MOD_1_1_TO_MOD_1_2_THRESHOLD	mod_1_1_to_mod_1_2_threshold
5022extern mp_size_t			mod_1_1_to_mod_1_2_threshold;
5023
5024#undef	MOD_1_2_TO_MOD_1_4_THRESHOLD
5025#define MOD_1_2_TO_MOD_1_4_THRESHOLD	mod_1_2_to_mod_1_4_threshold
5026extern mp_size_t			mod_1_2_to_mod_1_4_threshold;
5027
5028#undef	PREINV_MOD_1_TO_MOD_1_THRESHOLD
5029#define PREINV_MOD_1_TO_MOD_1_THRESHOLD	preinv_mod_1_to_mod_1_threshold
5030extern mp_size_t			preinv_mod_1_to_mod_1_threshold;
5031
5032#if ! UDIV_PREINV_ALWAYS
5033#undef	DIVREM_2_THRESHOLD
5034#define DIVREM_2_THRESHOLD		divrem_2_threshold
5035extern mp_size_t			divrem_2_threshold;
5036#endif
5037
5038#undef	MULMOD_BNM1_THRESHOLD
5039#define MULMOD_BNM1_THRESHOLD		mulmod_bnm1_threshold
5040extern mp_size_t			mulmod_bnm1_threshold;
5041
5042#undef	SQRMOD_BNM1_THRESHOLD
5043#define SQRMOD_BNM1_THRESHOLD		sqrmod_bnm1_threshold
5044extern mp_size_t			sqrmod_bnm1_threshold;
5045
5046#undef	GET_STR_DC_THRESHOLD
5047#define GET_STR_DC_THRESHOLD		get_str_dc_threshold
5048extern mp_size_t			get_str_dc_threshold;
5049
5050#undef  GET_STR_PRECOMPUTE_THRESHOLD
5051#define GET_STR_PRECOMPUTE_THRESHOLD	get_str_precompute_threshold
5052extern mp_size_t			get_str_precompute_threshold;
5053
5054#undef	SET_STR_DC_THRESHOLD
5055#define SET_STR_DC_THRESHOLD		set_str_dc_threshold
5056extern mp_size_t			set_str_dc_threshold;
5057
5058#undef  SET_STR_PRECOMPUTE_THRESHOLD
5059#define SET_STR_PRECOMPUTE_THRESHOLD	set_str_precompute_threshold
5060extern mp_size_t			set_str_precompute_threshold;
5061
5062#undef  FAC_ODD_THRESHOLD
5063#define FAC_ODD_THRESHOLD		fac_odd_threshold
5064extern  mp_size_t			fac_odd_threshold;
5065
5066#undef  FAC_DSC_THRESHOLD
5067#define FAC_DSC_THRESHOLD		fac_dsc_threshold
5068extern  mp_size_t			fac_dsc_threshold;
5069
5070#undef  FFT_TABLE_ATTRS
5071#define FFT_TABLE_ATTRS
5072extern mp_size_t  mpn_fft_table[2][MPN_FFT_TABLE_SIZE];
5073#define FFT_TABLE3_SIZE 2000	/* generous space for tuning */
5074extern struct fft_table_nk mpn_fft_table3[2][FFT_TABLE3_SIZE];
5075
5076/* Sizes the tune program tests up to, used in a couple of recompilations. */
5077#undef MUL_TOOM22_THRESHOLD_LIMIT
5078#undef MUL_TOOM33_THRESHOLD_LIMIT
5079#undef MULLO_BASECASE_THRESHOLD_LIMIT
5080#undef SQRLO_BASECASE_THRESHOLD_LIMIT
5081#undef SQRLO_DC_THRESHOLD_LIMIT
5082#undef SQR_TOOM3_THRESHOLD_LIMIT
5083#define SQR_TOOM2_MAX_GENERIC           200
5084#define MUL_TOOM22_THRESHOLD_LIMIT      700
5085#define MUL_TOOM33_THRESHOLD_LIMIT      700
5086#define SQR_TOOM3_THRESHOLD_LIMIT       400
5087#define MUL_TOOM44_THRESHOLD_LIMIT     1000
5088#define SQR_TOOM4_THRESHOLD_LIMIT      1000
5089#define MUL_TOOM6H_THRESHOLD_LIMIT     1100
5090#define SQR_TOOM6_THRESHOLD_LIMIT      1100
5091#define MUL_TOOM8H_THRESHOLD_LIMIT     1200
5092#define SQR_TOOM8_THRESHOLD_LIMIT      1200
5093#define MULLO_BASECASE_THRESHOLD_LIMIT  200
5094#define SQRLO_BASECASE_THRESHOLD_LIMIT  200
5095#define SQRLO_DC_THRESHOLD_LIMIT        400
5096#define GET_STR_THRESHOLD_LIMIT         150
5097#define FAC_DSC_THRESHOLD_LIMIT        2048
5098
5099#endif /* TUNE_PROGRAM_BUILD */
5100
5101#if defined (__cplusplus)
5102}
5103#endif
5104
5105/* FIXME: Make these itch functions less conservative.  Also consider making
5106   them dependent on just 'an', and compute the allocation directly from 'an'
5107   instead of via n.  */
5108
5109/* toom22/toom2: Scratch need is 2*(an + k), k is the recursion depth.
5110   k is ths smallest k such that
5111     ceil(an/2^k) < MUL_TOOM22_THRESHOLD.
5112   which implies that
5113     k = bitsize of floor ((an-1)/(MUL_TOOM22_THRESHOLD-1))
5114       = 1 + floor (log_2 (floor ((an-1)/(MUL_TOOM22_THRESHOLD-1))))
5115*/
5116#define mpn_toom22_mul_itch(an, bn) \
5117  (2 * ((an) + GMP_NUMB_BITS))
5118#define mpn_toom2_sqr_itch(an) \
5119  (2 * ((an) + GMP_NUMB_BITS))
5120
5121/* toom33/toom3: Scratch need is 5an/2 + 10k, k is the recursion depth.
5122   We use 3an + C, so that we can use a smaller constant.
5123 */
5124#define mpn_toom33_mul_itch(an, bn) \
5125  (3 * (an) + GMP_NUMB_BITS)
5126#define mpn_toom3_sqr_itch(an) \
5127  (3 * (an) + GMP_NUMB_BITS)
5128
5129/* toom33/toom3: Scratch need is 8an/3 + 13k, k is the recursion depth.
5130   We use 3an + C, so that we can use a smaller constant.
5131 */
5132#define mpn_toom44_mul_itch(an, bn) \
5133  (3 * (an) + GMP_NUMB_BITS)
5134#define mpn_toom4_sqr_itch(an) \
5135  (3 * (an) + GMP_NUMB_BITS)
5136
5137#define mpn_toom6_sqr_itch(n)						\
5138  (((n) - SQR_TOOM6_THRESHOLD)*2 +					\
5139   MAX(SQR_TOOM6_THRESHOLD*2 + GMP_NUMB_BITS*6,				\
5140       mpn_toom4_sqr_itch(SQR_TOOM6_THRESHOLD)))
5141
5142#define MUL_TOOM6H_MIN							\
5143  ((MUL_TOOM6H_THRESHOLD > MUL_TOOM44_THRESHOLD) ?			\
5144    MUL_TOOM6H_THRESHOLD : MUL_TOOM44_THRESHOLD)
5145#define mpn_toom6_mul_n_itch(n)						\
5146  (((n) - MUL_TOOM6H_MIN)*2 +						\
5147   MAX(MUL_TOOM6H_MIN*2 + GMP_NUMB_BITS*6,				\
5148       mpn_toom44_mul_itch(MUL_TOOM6H_MIN,MUL_TOOM6H_MIN)))
5149
5150static inline mp_size_t
5151mpn_toom6h_mul_itch (mp_size_t an, mp_size_t bn) {
5152  mp_size_t estimatedN;
5153  estimatedN = (an + bn) / (size_t) 10 + 1;
5154  return mpn_toom6_mul_n_itch (estimatedN * 6);
5155}
5156
5157#define mpn_toom8_sqr_itch(n)						\
5158  ((((n)*15)>>3) - ((SQR_TOOM8_THRESHOLD*15)>>3) +			\
5159   MAX(((SQR_TOOM8_THRESHOLD*15)>>3) + GMP_NUMB_BITS*6,			\
5160       mpn_toom6_sqr_itch(SQR_TOOM8_THRESHOLD)))
5161
5162#define MUL_TOOM8H_MIN							\
5163  ((MUL_TOOM8H_THRESHOLD > MUL_TOOM6H_MIN) ?				\
5164    MUL_TOOM8H_THRESHOLD : MUL_TOOM6H_MIN)
5165#define mpn_toom8_mul_n_itch(n)						\
5166  ((((n)*15)>>3) - ((MUL_TOOM8H_MIN*15)>>3) +				\
5167   MAX(((MUL_TOOM8H_MIN*15)>>3) + GMP_NUMB_BITS*6,			\
5168       mpn_toom6_mul_n_itch(MUL_TOOM8H_MIN)))
5169
5170static inline mp_size_t
5171mpn_toom8h_mul_itch (mp_size_t an, mp_size_t bn) {
5172  mp_size_t estimatedN;
5173  estimatedN = (an + bn) / (size_t) 14 + 1;
5174  return mpn_toom8_mul_n_itch (estimatedN * 8);
5175}
5176
5177static inline mp_size_t
5178mpn_toom32_mul_itch (mp_size_t an, mp_size_t bn)
5179{
5180  mp_size_t n = 1 + (2 * an >= 3 * bn ? (an - 1) / (size_t) 3 : (bn - 1) >> 1);
5181  mp_size_t itch = 2 * n + 1;
5182
5183  return itch;
5184}
5185
5186static inline mp_size_t
5187mpn_toom42_mul_itch (mp_size_t an, mp_size_t bn)
5188{
5189  mp_size_t n = an >= 2 * bn ? (an + 3) >> 2 : (bn + 1) >> 1;
5190  return 6 * n + 3;
5191}
5192
5193static inline mp_size_t
5194mpn_toom43_mul_itch (mp_size_t an, mp_size_t bn)
5195{
5196  mp_size_t n = 1 + (3 * an >= 4 * bn ? (an - 1) >> 2 : (bn - 1) / (size_t) 3);
5197
5198  return 6*n + 4;
5199}
5200
5201static inline mp_size_t
5202mpn_toom52_mul_itch (mp_size_t an, mp_size_t bn)
5203{
5204  mp_size_t n = 1 + (2 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) >> 1);
5205  return 6*n + 4;
5206}
5207
5208static inline mp_size_t
5209mpn_toom53_mul_itch (mp_size_t an, mp_size_t bn)
5210{
5211  mp_size_t n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3);
5212  return 10 * n + 10;
5213}
5214
5215static inline mp_size_t
5216mpn_toom62_mul_itch (mp_size_t an, mp_size_t bn)
5217{
5218  mp_size_t n = 1 + (an >= 3 * bn ? (an - 1) / (size_t) 6 : (bn - 1) >> 1);
5219  return 10 * n + 10;
5220}
5221
5222static inline mp_size_t
5223mpn_toom63_mul_itch (mp_size_t an, mp_size_t bn)
5224{
5225  mp_size_t n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3);
5226  return 9 * n + 3;
5227}
5228
5229static inline mp_size_t
5230mpn_toom54_mul_itch (mp_size_t an, mp_size_t bn)
5231{
5232  mp_size_t n = 1 + (4 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 4);
5233  return 9 * n + 3;
5234}
5235
5236/* let S(n) = space required for input size n,
5237   then S(n) = 3 floor(n/2) + 1 + S(floor(n/2)).   */
5238#define mpn_toom42_mulmid_itch(n) \
5239  (3 * (n) + GMP_NUMB_BITS)
5240
5241#if 0
5242#define mpn_fft_mul mpn_mul_fft_full
5243#else
5244#define mpn_fft_mul mpn_nussbaumer_mul
5245#endif
5246
5247#ifdef __cplusplus
5248
5249/* A little helper for a null-terminated __gmp_allocate_func string.
5250   The destructor ensures it's freed even if an exception is thrown.
5251   The len field is needed by the destructor, and can be used by anyone else
5252   to avoid a second strlen pass over the data.
5253
5254   Since our input is a C string, using strlen is correct.  Perhaps it'd be
5255   more C++-ish style to use std::char_traits<char>::length, but char_traits
5256   isn't available in gcc 2.95.4.  */
5257
5258class gmp_allocated_string {
5259 public:
5260  char *str;
5261  size_t len;
5262  gmp_allocated_string(char *arg)
5263  {
5264    str = arg;
5265    len = std::strlen (str);
5266  }
5267  ~gmp_allocated_string()
5268  {
5269    (*__gmp_free_func) (str, len+1);
5270  }
5271};
5272
5273std::istream &__gmpz_operator_in_nowhite (std::istream &, mpz_ptr, char);
5274int __gmp_istream_set_base (std::istream &, char &, bool &, bool &);
5275void __gmp_istream_set_digits (std::string &, std::istream &, char &, bool &, int);
5276void __gmp_doprnt_params_from_ios (struct doprnt_params_t *, std::ios &);
5277std::ostream& __gmp_doprnt_integer_ostream (std::ostream &, struct doprnt_params_t *, char *);
5278extern const struct doprnt_funs_t  __gmp_asprintf_funs_noformat;
5279
5280#endif /* __cplusplus */
5281
5282#endif /* __GMP_IMPL_H__ */
5283