1/* mini-gmp, a minimalistic implementation of a GNU GMP subset.
2
3   Contributed to the GNU project by Niels M��ller
4
5Copyright 1991-1997, 1999-2019 Free Software Foundation, Inc.
6
7This file is part of the GNU MP Library.
8
9The GNU MP Library is free software; you can redistribute it and/or modify
10it under the terms of either:
11
12  * the GNU Lesser General Public License as published by the Free
13    Software Foundation; either version 3 of the License, or (at your
14    option) any later version.
15
16or
17
18  * the GNU General Public License as published by the Free Software
19    Foundation; either version 2 of the License, or (at your option) any
20    later version.
21
22or both in parallel, as here.
23
24The GNU MP Library is distributed in the hope that it will be useful, but
25WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
27for more details.
28
29You should have received copies of the GNU General Public License and the
30GNU Lesser General Public License along with the GNU MP Library.  If not,
31see https://www.gnu.org/licenses/.  */
32
33/* NOTE: All functions in this file which are not declared in
34   mini-gmp.h are internal, and are not intended to be compatible
35   with GMP or with future versions of mini-gmp. */
36
37/* Much of the material copied from GMP files, including: gmp-impl.h,
38   longlong.h, mpn/generic/add_n.c, mpn/generic/addmul_1.c,
39   mpn/generic/lshift.c, mpn/generic/mul_1.c,
40   mpn/generic/mul_basecase.c, mpn/generic/rshift.c,
41   mpn/generic/sbpi1_div_qr.c, mpn/generic/sub_n.c,
42   mpn/generic/submul_1.c. */
43
44#include <assert.h>
45#include <ctype.h>
46#include <limits.h>
47#include <stdio.h>
48#include <stdlib.h>
49#include <string.h>
50
51#include "mini-gmp.h"
52
53#if !defined(MINI_GMP_DONT_USE_FLOAT_H)
54#include <float.h>
55#endif
56
57
58/* Macros */
59#define GMP_LIMB_BITS (sizeof(mp_limb_t) * CHAR_BIT)
60
61#define GMP_LIMB_MAX ((mp_limb_t) ~ (mp_limb_t) 0)
62#define GMP_LIMB_HIGHBIT ((mp_limb_t) 1 << (GMP_LIMB_BITS - 1))
63
64#define GMP_HLIMB_BIT ((mp_limb_t) 1 << (GMP_LIMB_BITS / 2))
65#define GMP_LLIMB_MASK (GMP_HLIMB_BIT - 1)
66
67#define GMP_ULONG_BITS (sizeof(unsigned long) * CHAR_BIT)
68#define GMP_ULONG_HIGHBIT ((unsigned long) 1 << (GMP_ULONG_BITS - 1))
69
70#define GMP_ABS(x) ((x) >= 0 ? (x) : -(x))
71#define GMP_NEG_CAST(T,x) (-((T)((x) + 1) - 1))
72
73#define GMP_MIN(a, b) ((a) < (b) ? (a) : (b))
74#define GMP_MAX(a, b) ((a) > (b) ? (a) : (b))
75
76#define GMP_CMP(a,b) (((a) > (b)) - ((a) < (b)))
77
78#if defined(DBL_MANT_DIG) && FLT_RADIX == 2
79#define GMP_DBL_MANT_BITS DBL_MANT_DIG
80#else
81#define GMP_DBL_MANT_BITS (53)
82#endif
83
84/* Return non-zero if xp,xsize and yp,ysize overlap.
85   If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
86   overlap.  If both these are false, there's an overlap. */
87#define GMP_MPN_OVERLAP_P(xp, xsize, yp, ysize)				\
88  ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
89
90#define gmp_assert_nocarry(x) do { \
91    mp_limb_t __cy = (x);	   \
92    assert (__cy == 0);		   \
93  } while (0)
94
95#define gmp_clz(count, x) do {						\
96    mp_limb_t __clz_x = (x);						\
97    unsigned __clz_c = 0;						\
98    int LOCAL_SHIFT_BITS = 8;						\
99    if (GMP_LIMB_BITS > LOCAL_SHIFT_BITS)				\
100      for (;								\
101	   (__clz_x & ((mp_limb_t) 0xff << (GMP_LIMB_BITS - 8))) == 0;	\
102	   __clz_c += 8)						\
103	{ __clz_x <<= LOCAL_SHIFT_BITS;	}				\
104    for (; (__clz_x & GMP_LIMB_HIGHBIT) == 0; __clz_c++)		\
105      __clz_x <<= 1;							\
106    (count) = __clz_c;							\
107  } while (0)
108
109#define gmp_ctz(count, x) do {						\
110    mp_limb_t __ctz_x = (x);						\
111    unsigned __ctz_c = 0;						\
112    gmp_clz (__ctz_c, __ctz_x & - __ctz_x);				\
113    (count) = GMP_LIMB_BITS - 1 - __ctz_c;				\
114  } while (0)
115
116#define gmp_add_ssaaaa(sh, sl, ah, al, bh, bl) \
117  do {									\
118    mp_limb_t __x;							\
119    __x = (al) + (bl);							\
120    (sh) = (ah) + (bh) + (__x < (al));					\
121    (sl) = __x;								\
122  } while (0)
123
124#define gmp_sub_ddmmss(sh, sl, ah, al, bh, bl) \
125  do {									\
126    mp_limb_t __x;							\
127    __x = (al) - (bl);							\
128    (sh) = (ah) - (bh) - ((al) < (bl));					\
129    (sl) = __x;								\
130  } while (0)
131
132#define gmp_umul_ppmm(w1, w0, u, v)					\
133  do {									\
134    int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS;				\
135    if (sizeof(unsigned int) * CHAR_BIT >= 2 * GMP_LIMB_BITS)		\
136      {									\
137	unsigned int __ww = (unsigned int) (u) * (v);			\
138	w0 = (mp_limb_t) __ww;						\
139	w1 = (mp_limb_t) (__ww >> LOCAL_GMP_LIMB_BITS);			\
140      }									\
141    else if (GMP_ULONG_BITS >= 2 * GMP_LIMB_BITS)			\
142      {									\
143	unsigned long int __ww = (unsigned long int) (u) * (v);		\
144	w0 = (mp_limb_t) __ww;						\
145	w1 = (mp_limb_t) (__ww >> LOCAL_GMP_LIMB_BITS);			\
146      }									\
147    else {								\
148      mp_limb_t __x0, __x1, __x2, __x3;					\
149      unsigned __ul, __vl, __uh, __vh;					\
150      mp_limb_t __u = (u), __v = (v);					\
151									\
152      __ul = __u & GMP_LLIMB_MASK;					\
153      __uh = __u >> (GMP_LIMB_BITS / 2);				\
154      __vl = __v & GMP_LLIMB_MASK;					\
155      __vh = __v >> (GMP_LIMB_BITS / 2);				\
156									\
157      __x0 = (mp_limb_t) __ul * __vl;					\
158      __x1 = (mp_limb_t) __ul * __vh;					\
159      __x2 = (mp_limb_t) __uh * __vl;					\
160      __x3 = (mp_limb_t) __uh * __vh;					\
161									\
162      __x1 += __x0 >> (GMP_LIMB_BITS / 2);/* this can't give carry */	\
163      __x1 += __x2;		/* but this indeed can */		\
164      if (__x1 < __x2)		/* did we get it? */			\
165	__x3 += GMP_HLIMB_BIT;	/* yes, add it in the proper pos. */	\
166									\
167      (w1) = __x3 + (__x1 >> (GMP_LIMB_BITS / 2));			\
168      (w0) = (__x1 << (GMP_LIMB_BITS / 2)) + (__x0 & GMP_LLIMB_MASK);	\
169    }									\
170  } while (0)
171
172#define gmp_udiv_qrnnd_preinv(q, r, nh, nl, d, di)			\
173  do {									\
174    mp_limb_t _qh, _ql, _r, _mask;					\
175    gmp_umul_ppmm (_qh, _ql, (nh), (di));				\
176    gmp_add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl));		\
177    _r = (nl) - _qh * (d);						\
178    _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */		\
179    _qh += _mask;							\
180    _r += _mask & (d);							\
181    if (_r >= (d))							\
182      {									\
183	_r -= (d);							\
184	_qh++;								\
185      }									\
186									\
187    (r) = _r;								\
188    (q) = _qh;								\
189  } while (0)
190
191#define gmp_udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv)		\
192  do {									\
193    mp_limb_t _q0, _t1, _t0, _mask;					\
194    gmp_umul_ppmm ((q), _q0, (n2), (dinv));				\
195    gmp_add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1));			\
196									\
197    /* Compute the two most significant limbs of n - q'd */		\
198    (r1) = (n1) - (d1) * (q);						\
199    gmp_sub_ddmmss ((r1), (r0), (r1), (n0), (d1), (d0));		\
200    gmp_umul_ppmm (_t1, _t0, (d0), (q));				\
201    gmp_sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0);			\
202    (q)++;								\
203									\
204    /* Conditionally adjust q and the remainders */			\
205    _mask = - (mp_limb_t) ((r1) >= _q0);				\
206    (q) += _mask;							\
207    gmp_add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0)); \
208    if ((r1) >= (d1))							\
209      {									\
210	if ((r1) > (d1) || (r0) >= (d0))				\
211	  {								\
212	    (q)++;							\
213	    gmp_sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0));	\
214	  }								\
215      }									\
216  } while (0)
217
218/* Swap macros. */
219#define MP_LIMB_T_SWAP(x, y)						\
220  do {									\
221    mp_limb_t __mp_limb_t_swap__tmp = (x);				\
222    (x) = (y);								\
223    (y) = __mp_limb_t_swap__tmp;					\
224  } while (0)
225#define MP_SIZE_T_SWAP(x, y)						\
226  do {									\
227    mp_size_t __mp_size_t_swap__tmp = (x);				\
228    (x) = (y);								\
229    (y) = __mp_size_t_swap__tmp;					\
230  } while (0)
231#define MP_BITCNT_T_SWAP(x,y)			\
232  do {						\
233    mp_bitcnt_t __mp_bitcnt_t_swap__tmp = (x);	\
234    (x) = (y);					\
235    (y) = __mp_bitcnt_t_swap__tmp;		\
236  } while (0)
237#define MP_PTR_SWAP(x, y)						\
238  do {									\
239    mp_ptr __mp_ptr_swap__tmp = (x);					\
240    (x) = (y);								\
241    (y) = __mp_ptr_swap__tmp;						\
242  } while (0)
243#define MP_SRCPTR_SWAP(x, y)						\
244  do {									\
245    mp_srcptr __mp_srcptr_swap__tmp = (x);				\
246    (x) = (y);								\
247    (y) = __mp_srcptr_swap__tmp;					\
248  } while (0)
249
250#define MPN_PTR_SWAP(xp,xs, yp,ys)					\
251  do {									\
252    MP_PTR_SWAP (xp, yp);						\
253    MP_SIZE_T_SWAP (xs, ys);						\
254  } while(0)
255#define MPN_SRCPTR_SWAP(xp,xs, yp,ys)					\
256  do {									\
257    MP_SRCPTR_SWAP (xp, yp);						\
258    MP_SIZE_T_SWAP (xs, ys);						\
259  } while(0)
260
261#define MPZ_PTR_SWAP(x, y)						\
262  do {									\
263    mpz_ptr __mpz_ptr_swap__tmp = (x);					\
264    (x) = (y);								\
265    (y) = __mpz_ptr_swap__tmp;						\
266  } while (0)
267#define MPZ_SRCPTR_SWAP(x, y)						\
268  do {									\
269    mpz_srcptr __mpz_srcptr_swap__tmp = (x);				\
270    (x) = (y);								\
271    (y) = __mpz_srcptr_swap__tmp;					\
272  } while (0)
273
274const int mp_bits_per_limb = GMP_LIMB_BITS;
275
276
277/* Memory allocation and other helper functions. */
278static void
279gmp_die (const char *msg)
280{
281  fprintf (stderr, "%s\n", msg);
282  abort();
283}
284
285static void *
286gmp_default_alloc (size_t size)
287{
288  void *p;
289
290  assert (size > 0);
291
292  p = malloc (size);
293  if (!p)
294    gmp_die("gmp_default_alloc: Virtual memory exhausted.");
295
296  return p;
297}
298
299static void *
300gmp_default_realloc (void *old, size_t unused_old_size, size_t new_size)
301{
302  void * p;
303
304  p = realloc (old, new_size);
305
306  if (!p)
307    gmp_die("gmp_default_realloc: Virtual memory exhausted.");
308
309  return p;
310}
311
312static void
313gmp_default_free (void *p, size_t unused_size)
314{
315  free (p);
316}
317
318static void * (*gmp_allocate_func) (size_t) = gmp_default_alloc;
319static void * (*gmp_reallocate_func) (void *, size_t, size_t) = gmp_default_realloc;
320static void (*gmp_free_func) (void *, size_t) = gmp_default_free;
321
322void
323mp_get_memory_functions (void *(**alloc_func) (size_t),
324			 void *(**realloc_func) (void *, size_t, size_t),
325			 void (**free_func) (void *, size_t))
326{
327  if (alloc_func)
328    *alloc_func = gmp_allocate_func;
329
330  if (realloc_func)
331    *realloc_func = gmp_reallocate_func;
332
333  if (free_func)
334    *free_func = gmp_free_func;
335}
336
337void
338mp_set_memory_functions (void *(*alloc_func) (size_t),
339			 void *(*realloc_func) (void *, size_t, size_t),
340			 void (*free_func) (void *, size_t))
341{
342  if (!alloc_func)
343    alloc_func = gmp_default_alloc;
344  if (!realloc_func)
345    realloc_func = gmp_default_realloc;
346  if (!free_func)
347    free_func = gmp_default_free;
348
349  gmp_allocate_func = alloc_func;
350  gmp_reallocate_func = realloc_func;
351  gmp_free_func = free_func;
352}
353
354#define gmp_xalloc(size) ((*gmp_allocate_func)((size)))
355#define gmp_free(p) ((*gmp_free_func) ((p), 0))
356
357static mp_ptr
358gmp_xalloc_limbs (mp_size_t size)
359{
360  return (mp_ptr) gmp_xalloc (size * sizeof (mp_limb_t));
361}
362
363static mp_ptr
364gmp_xrealloc_limbs (mp_ptr old, mp_size_t size)
365{
366  assert (size > 0);
367  return (mp_ptr) (*gmp_reallocate_func) (old, 0, size * sizeof (mp_limb_t));
368}
369
370
371/* MPN interface */
372
373void
374mpn_copyi (mp_ptr d, mp_srcptr s, mp_size_t n)
375{
376  mp_size_t i;
377  for (i = 0; i < n; i++)
378    d[i] = s[i];
379}
380
381void
382mpn_copyd (mp_ptr d, mp_srcptr s, mp_size_t n)
383{
384  while (--n >= 0)
385    d[n] = s[n];
386}
387
388int
389mpn_cmp (mp_srcptr ap, mp_srcptr bp, mp_size_t n)
390{
391  while (--n >= 0)
392    {
393      if (ap[n] != bp[n])
394	return ap[n] > bp[n] ? 1 : -1;
395    }
396  return 0;
397}
398
399static int
400mpn_cmp4 (mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
401{
402  if (an != bn)
403    return an < bn ? -1 : 1;
404  else
405    return mpn_cmp (ap, bp, an);
406}
407
408static mp_size_t
409mpn_normalized_size (mp_srcptr xp, mp_size_t n)
410{
411  while (n > 0 && xp[n-1] == 0)
412    --n;
413  return n;
414}
415
416int
417mpn_zero_p(mp_srcptr rp, mp_size_t n)
418{
419  return mpn_normalized_size (rp, n) == 0;
420}
421
422void
423mpn_zero (mp_ptr rp, mp_size_t n)
424{
425  while (--n >= 0)
426    rp[n] = 0;
427}
428
429mp_limb_t
430mpn_add_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b)
431{
432  mp_size_t i;
433
434  assert (n > 0);
435  i = 0;
436  do
437    {
438      mp_limb_t r = ap[i] + b;
439      /* Carry out */
440      b = (r < b);
441      rp[i] = r;
442    }
443  while (++i < n);
444
445  return b;
446}
447
448mp_limb_t
449mpn_add_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
450{
451  mp_size_t i;
452  mp_limb_t cy;
453
454  for (i = 0, cy = 0; i < n; i++)
455    {
456      mp_limb_t a, b, r;
457      a = ap[i]; b = bp[i];
458      r = a + cy;
459      cy = (r < cy);
460      r += b;
461      cy += (r < b);
462      rp[i] = r;
463    }
464  return cy;
465}
466
467mp_limb_t
468mpn_add (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
469{
470  mp_limb_t cy;
471
472  assert (an >= bn);
473
474  cy = mpn_add_n (rp, ap, bp, bn);
475  if (an > bn)
476    cy = mpn_add_1 (rp + bn, ap + bn, an - bn, cy);
477  return cy;
478}
479
480mp_limb_t
481mpn_sub_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b)
482{
483  mp_size_t i;
484
485  assert (n > 0);
486
487  i = 0;
488  do
489    {
490      mp_limb_t a = ap[i];
491      /* Carry out */
492      mp_limb_t cy = a < b;
493      rp[i] = a - b;
494      b = cy;
495    }
496  while (++i < n);
497
498  return b;
499}
500
501mp_limb_t
502mpn_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
503{
504  mp_size_t i;
505  mp_limb_t cy;
506
507  for (i = 0, cy = 0; i < n; i++)
508    {
509      mp_limb_t a, b;
510      a = ap[i]; b = bp[i];
511      b += cy;
512      cy = (b < cy);
513      cy += (a < b);
514      rp[i] = a - b;
515    }
516  return cy;
517}
518
519mp_limb_t
520mpn_sub (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
521{
522  mp_limb_t cy;
523
524  assert (an >= bn);
525
526  cy = mpn_sub_n (rp, ap, bp, bn);
527  if (an > bn)
528    cy = mpn_sub_1 (rp + bn, ap + bn, an - bn, cy);
529  return cy;
530}
531
532mp_limb_t
533mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
534{
535  mp_limb_t ul, cl, hpl, lpl;
536
537  assert (n >= 1);
538
539  cl = 0;
540  do
541    {
542      ul = *up++;
543      gmp_umul_ppmm (hpl, lpl, ul, vl);
544
545      lpl += cl;
546      cl = (lpl < cl) + hpl;
547
548      *rp++ = lpl;
549    }
550  while (--n != 0);
551
552  return cl;
553}
554
555mp_limb_t
556mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
557{
558  mp_limb_t ul, cl, hpl, lpl, rl;
559
560  assert (n >= 1);
561
562  cl = 0;
563  do
564    {
565      ul = *up++;
566      gmp_umul_ppmm (hpl, lpl, ul, vl);
567
568      lpl += cl;
569      cl = (lpl < cl) + hpl;
570
571      rl = *rp;
572      lpl = rl + lpl;
573      cl += lpl < rl;
574      *rp++ = lpl;
575    }
576  while (--n != 0);
577
578  return cl;
579}
580
581mp_limb_t
582mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
583{
584  mp_limb_t ul, cl, hpl, lpl, rl;
585
586  assert (n >= 1);
587
588  cl = 0;
589  do
590    {
591      ul = *up++;
592      gmp_umul_ppmm (hpl, lpl, ul, vl);
593
594      lpl += cl;
595      cl = (lpl < cl) + hpl;
596
597      rl = *rp;
598      lpl = rl - lpl;
599      cl += lpl > rl;
600      *rp++ = lpl;
601    }
602  while (--n != 0);
603
604  return cl;
605}
606
607mp_limb_t
608mpn_mul (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
609{
610  assert (un >= vn);
611  assert (vn >= 1);
612  assert (!GMP_MPN_OVERLAP_P(rp, un + vn, up, un));
613  assert (!GMP_MPN_OVERLAP_P(rp, un + vn, vp, vn));
614
615  /* We first multiply by the low order limb. This result can be
616     stored, not added, to rp. We also avoid a loop for zeroing this
617     way. */
618
619  rp[un] = mpn_mul_1 (rp, up, un, vp[0]);
620
621  /* Now accumulate the product of up[] and the next higher limb from
622     vp[]. */
623
624  while (--vn >= 1)
625    {
626      rp += 1, vp += 1;
627      rp[un] = mpn_addmul_1 (rp, up, un, vp[0]);
628    }
629  return rp[un];
630}
631
632void
633mpn_mul_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
634{
635  mpn_mul (rp, ap, n, bp, n);
636}
637
638void
639mpn_sqr (mp_ptr rp, mp_srcptr ap, mp_size_t n)
640{
641  mpn_mul (rp, ap, n, ap, n);
642}
643
644mp_limb_t
645mpn_lshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
646{
647  mp_limb_t high_limb, low_limb;
648  unsigned int tnc;
649  mp_limb_t retval;
650
651  assert (n >= 1);
652  assert (cnt >= 1);
653  assert (cnt < GMP_LIMB_BITS);
654
655  up += n;
656  rp += n;
657
658  tnc = GMP_LIMB_BITS - cnt;
659  low_limb = *--up;
660  retval = low_limb >> tnc;
661  high_limb = (low_limb << cnt);
662
663  while (--n != 0)
664    {
665      low_limb = *--up;
666      *--rp = high_limb | (low_limb >> tnc);
667      high_limb = (low_limb << cnt);
668    }
669  *--rp = high_limb;
670
671  return retval;
672}
673
674mp_limb_t
675mpn_rshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
676{
677  mp_limb_t high_limb, low_limb;
678  unsigned int tnc;
679  mp_limb_t retval;
680
681  assert (n >= 1);
682  assert (cnt >= 1);
683  assert (cnt < GMP_LIMB_BITS);
684
685  tnc = GMP_LIMB_BITS - cnt;
686  high_limb = *up++;
687  retval = (high_limb << tnc);
688  low_limb = high_limb >> cnt;
689
690  while (--n != 0)
691    {
692      high_limb = *up++;
693      *rp++ = low_limb | (high_limb << tnc);
694      low_limb = high_limb >> cnt;
695    }
696  *rp = low_limb;
697
698  return retval;
699}
700
701static mp_bitcnt_t
702mpn_common_scan (mp_limb_t limb, mp_size_t i, mp_srcptr up, mp_size_t un,
703		 mp_limb_t ux)
704{
705  unsigned cnt;
706
707  assert (ux == 0 || ux == GMP_LIMB_MAX);
708  assert (0 <= i && i <= un );
709
710  while (limb == 0)
711    {
712      i++;
713      if (i == un)
714	return (ux == 0 ? ~(mp_bitcnt_t) 0 : un * GMP_LIMB_BITS);
715      limb = ux ^ up[i];
716    }
717  gmp_ctz (cnt, limb);
718  return (mp_bitcnt_t) i * GMP_LIMB_BITS + cnt;
719}
720
721mp_bitcnt_t
722mpn_scan1 (mp_srcptr ptr, mp_bitcnt_t bit)
723{
724  mp_size_t i;
725  i = bit / GMP_LIMB_BITS;
726
727  return mpn_common_scan ( ptr[i] & (GMP_LIMB_MAX << (bit % GMP_LIMB_BITS)),
728			  i, ptr, i, 0);
729}
730
731mp_bitcnt_t
732mpn_scan0 (mp_srcptr ptr, mp_bitcnt_t bit)
733{
734  mp_size_t i;
735  i = bit / GMP_LIMB_BITS;
736
737  return mpn_common_scan (~ptr[i] & (GMP_LIMB_MAX << (bit % GMP_LIMB_BITS)),
738			  i, ptr, i, GMP_LIMB_MAX);
739}
740
741void
742mpn_com (mp_ptr rp, mp_srcptr up, mp_size_t n)
743{
744  while (--n >= 0)
745    *rp++ = ~ *up++;
746}
747
748mp_limb_t
749mpn_neg (mp_ptr rp, mp_srcptr up, mp_size_t n)
750{
751  while (*up == 0)
752    {
753      *rp = 0;
754      if (!--n)
755	return 0;
756      ++up; ++rp;
757    }
758  *rp = - *up;
759  mpn_com (++rp, ++up, --n);
760  return 1;
761}
762
763
764/* MPN division interface. */
765
766/* The 3/2 inverse is defined as
767
768     m = floor( (B^3-1) / (B u1 + u0)) - B
769*/
770mp_limb_t
771mpn_invert_3by2 (mp_limb_t u1, mp_limb_t u0)
772{
773  mp_limb_t r, m;
774
775  {
776    mp_limb_t p, ql;
777    unsigned ul, uh, qh;
778
779    /* For notation, let b denote the half-limb base, so that B = b^2.
780       Split u1 = b uh + ul. */
781    ul = u1 & GMP_LLIMB_MASK;
782    uh = u1 >> (GMP_LIMB_BITS / 2);
783
784    /* Approximation of the high half of quotient. Differs from the 2/1
785       inverse of the half limb uh, since we have already subtracted
786       u0. */
787    qh = (u1 ^ GMP_LIMB_MAX) / uh;
788
789    /* Adjust to get a half-limb 3/2 inverse, i.e., we want
790
791       qh' = floor( (b^3 - 1) / u) - b = floor ((b^3 - b u - 1) / u
792	   = floor( (b (~u) + b-1) / u),
793
794       and the remainder
795
796       r = b (~u) + b-1 - qh (b uh + ul)
797       = b (~u - qh uh) + b-1 - qh ul
798
799       Subtraction of qh ul may underflow, which implies adjustments.
800       But by normalization, 2 u >= B > qh ul, so we need to adjust by
801       at most 2.
802    */
803
804    r = ((~u1 - (mp_limb_t) qh * uh) << (GMP_LIMB_BITS / 2)) | GMP_LLIMB_MASK;
805
806    p = (mp_limb_t) qh * ul;
807    /* Adjustment steps taken from udiv_qrnnd_c */
808    if (r < p)
809      {
810	qh--;
811	r += u1;
812	if (r >= u1) /* i.e. we didn't get carry when adding to r */
813	  if (r < p)
814	    {
815	      qh--;
816	      r += u1;
817	    }
818      }
819    r -= p;
820
821    /* Low half of the quotient is
822
823       ql = floor ( (b r + b-1) / u1).
824
825       This is a 3/2 division (on half-limbs), for which qh is a
826       suitable inverse. */
827
828    p = (r >> (GMP_LIMB_BITS / 2)) * qh + r;
829    /* Unlike full-limb 3/2, we can add 1 without overflow. For this to
830       work, it is essential that ql is a full mp_limb_t. */
831    ql = (p >> (GMP_LIMB_BITS / 2)) + 1;
832
833    /* By the 3/2 trick, we don't need the high half limb. */
834    r = (r << (GMP_LIMB_BITS / 2)) + GMP_LLIMB_MASK - ql * u1;
835
836    if (r >= (GMP_LIMB_MAX & (p << (GMP_LIMB_BITS / 2))))
837      {
838	ql--;
839	r += u1;
840      }
841    m = ((mp_limb_t) qh << (GMP_LIMB_BITS / 2)) + ql;
842    if (r >= u1)
843      {
844	m++;
845	r -= u1;
846      }
847  }
848
849  /* Now m is the 2/1 inverse of u1. If u0 > 0, adjust it to become a
850     3/2 inverse. */
851  if (u0 > 0)
852    {
853      mp_limb_t th, tl;
854      r = ~r;
855      r += u0;
856      if (r < u0)
857	{
858	  m--;
859	  if (r >= u1)
860	    {
861	      m--;
862	      r -= u1;
863	    }
864	  r -= u1;
865	}
866      gmp_umul_ppmm (th, tl, u0, m);
867      r += th;
868      if (r < th)
869	{
870	  m--;
871	  m -= ((r > u1) | ((r == u1) & (tl > u0)));
872	}
873    }
874
875  return m;
876}
877
878struct gmp_div_inverse
879{
880  /* Normalization shift count. */
881  unsigned shift;
882  /* Normalized divisor (d0 unused for mpn_div_qr_1) */
883  mp_limb_t d1, d0;
884  /* Inverse, for 2/1 or 3/2. */
885  mp_limb_t di;
886};
887
888static void
889mpn_div_qr_1_invert (struct gmp_div_inverse *inv, mp_limb_t d)
890{
891  unsigned shift;
892
893  assert (d > 0);
894  gmp_clz (shift, d);
895  inv->shift = shift;
896  inv->d1 = d << shift;
897  inv->di = mpn_invert_limb (inv->d1);
898}
899
900static void
901mpn_div_qr_2_invert (struct gmp_div_inverse *inv,
902		     mp_limb_t d1, mp_limb_t d0)
903{
904  unsigned shift;
905
906  assert (d1 > 0);
907  gmp_clz (shift, d1);
908  inv->shift = shift;
909  if (shift > 0)
910    {
911      d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift));
912      d0 <<= shift;
913    }
914  inv->d1 = d1;
915  inv->d0 = d0;
916  inv->di = mpn_invert_3by2 (d1, d0);
917}
918
919static void
920mpn_div_qr_invert (struct gmp_div_inverse *inv,
921		   mp_srcptr dp, mp_size_t dn)
922{
923  assert (dn > 0);
924
925  if (dn == 1)
926    mpn_div_qr_1_invert (inv, dp[0]);
927  else if (dn == 2)
928    mpn_div_qr_2_invert (inv, dp[1], dp[0]);
929  else
930    {
931      unsigned shift;
932      mp_limb_t d1, d0;
933
934      d1 = dp[dn-1];
935      d0 = dp[dn-2];
936      assert (d1 > 0);
937      gmp_clz (shift, d1);
938      inv->shift = shift;
939      if (shift > 0)
940	{
941	  d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift));
942	  d0 = (d0 << shift) | (dp[dn-3] >> (GMP_LIMB_BITS - shift));
943	}
944      inv->d1 = d1;
945      inv->d0 = d0;
946      inv->di = mpn_invert_3by2 (d1, d0);
947    }
948}
949
950/* Not matching current public gmp interface, rather corresponding to
951   the sbpi1_div_* functions. */
952static mp_limb_t
953mpn_div_qr_1_preinv (mp_ptr qp, mp_srcptr np, mp_size_t nn,
954		     const struct gmp_div_inverse *inv)
955{
956  mp_limb_t d, di;
957  mp_limb_t r;
958  mp_ptr tp = NULL;
959
960  if (inv->shift > 0)
961    {
962      /* Shift, reusing qp area if possible. In-place shift if qp == np. */
963      tp = qp ? qp : gmp_xalloc_limbs (nn);
964      r = mpn_lshift (tp, np, nn, inv->shift);
965      np = tp;
966    }
967  else
968    r = 0;
969
970  d = inv->d1;
971  di = inv->di;
972  while (--nn >= 0)
973    {
974      mp_limb_t q;
975
976      gmp_udiv_qrnnd_preinv (q, r, r, np[nn], d, di);
977      if (qp)
978	qp[nn] = q;
979    }
980  if ((inv->shift > 0) && (tp != qp))
981    gmp_free (tp);
982
983  return r >> inv->shift;
984}
985
986static void
987mpn_div_qr_2_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn,
988		     const struct gmp_div_inverse *inv)
989{
990  unsigned shift;
991  mp_size_t i;
992  mp_limb_t d1, d0, di, r1, r0;
993
994  assert (nn >= 2);
995  shift = inv->shift;
996  d1 = inv->d1;
997  d0 = inv->d0;
998  di = inv->di;
999
1000  if (shift > 0)
1001    r1 = mpn_lshift (np, np, nn, shift);
1002  else
1003    r1 = 0;
1004
1005  r0 = np[nn - 1];
1006
1007  i = nn - 2;
1008  do
1009    {
1010      mp_limb_t n0, q;
1011      n0 = np[i];
1012      gmp_udiv_qr_3by2 (q, r1, r0, r1, r0, n0, d1, d0, di);
1013
1014      if (qp)
1015	qp[i] = q;
1016    }
1017  while (--i >= 0);
1018
1019  if (shift > 0)
1020    {
1021      assert ((r0 & (GMP_LIMB_MAX >> (GMP_LIMB_BITS - shift))) == 0);
1022      r0 = (r0 >> shift) | (r1 << (GMP_LIMB_BITS - shift));
1023      r1 >>= shift;
1024    }
1025
1026  np[1] = r1;
1027  np[0] = r0;
1028}
1029
1030static void
1031mpn_div_qr_pi1 (mp_ptr qp,
1032		mp_ptr np, mp_size_t nn, mp_limb_t n1,
1033		mp_srcptr dp, mp_size_t dn,
1034		mp_limb_t dinv)
1035{
1036  mp_size_t i;
1037
1038  mp_limb_t d1, d0;
1039  mp_limb_t cy, cy1;
1040  mp_limb_t q;
1041
1042  assert (dn > 2);
1043  assert (nn >= dn);
1044
1045  d1 = dp[dn - 1];
1046  d0 = dp[dn - 2];
1047
1048  assert ((d1 & GMP_LIMB_HIGHBIT) != 0);
1049  /* Iteration variable is the index of the q limb.
1050   *
1051   * We divide <n1, np[dn-1+i], np[dn-2+i], np[dn-3+i],..., np[i]>
1052   * by            <d1,          d0,        dp[dn-3],  ..., dp[0] >
1053   */
1054
1055  i = nn - dn;
1056  do
1057    {
1058      mp_limb_t n0 = np[dn-1+i];
1059
1060      if (n1 == d1 && n0 == d0)
1061	{
1062	  q = GMP_LIMB_MAX;
1063	  mpn_submul_1 (np+i, dp, dn, q);
1064	  n1 = np[dn-1+i];	/* update n1, last loop's value will now be invalid */
1065	}
1066      else
1067	{
1068	  gmp_udiv_qr_3by2 (q, n1, n0, n1, n0, np[dn-2+i], d1, d0, dinv);
1069
1070	  cy = mpn_submul_1 (np + i, dp, dn-2, q);
1071
1072	  cy1 = n0 < cy;
1073	  n0 = n0 - cy;
1074	  cy = n1 < cy1;
1075	  n1 = n1 - cy1;
1076	  np[dn-2+i] = n0;
1077
1078	  if (cy != 0)
1079	    {
1080	      n1 += d1 + mpn_add_n (np + i, np + i, dp, dn - 1);
1081	      q--;
1082	    }
1083	}
1084
1085      if (qp)
1086	qp[i] = q;
1087    }
1088  while (--i >= 0);
1089
1090  np[dn - 1] = n1;
1091}
1092
1093static void
1094mpn_div_qr_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn,
1095		   mp_srcptr dp, mp_size_t dn,
1096		   const struct gmp_div_inverse *inv)
1097{
1098  assert (dn > 0);
1099  assert (nn >= dn);
1100
1101  if (dn == 1)
1102    np[0] = mpn_div_qr_1_preinv (qp, np, nn, inv);
1103  else if (dn == 2)
1104    mpn_div_qr_2_preinv (qp, np, nn, inv);
1105  else
1106    {
1107      mp_limb_t nh;
1108      unsigned shift;
1109
1110      assert (inv->d1 == dp[dn-1]);
1111      assert (inv->d0 == dp[dn-2]);
1112      assert ((inv->d1 & GMP_LIMB_HIGHBIT) != 0);
1113
1114      shift = inv->shift;
1115      if (shift > 0)
1116	nh = mpn_lshift (np, np, nn, shift);
1117      else
1118	nh = 0;
1119
1120      mpn_div_qr_pi1 (qp, np, nn, nh, dp, dn, inv->di);
1121
1122      if (shift > 0)
1123	gmp_assert_nocarry (mpn_rshift (np, np, dn, shift));
1124    }
1125}
1126
1127static void
1128mpn_div_qr (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
1129{
1130  struct gmp_div_inverse inv;
1131  mp_ptr tp = NULL;
1132
1133  assert (dn > 0);
1134  assert (nn >= dn);
1135
1136  mpn_div_qr_invert (&inv, dp, dn);
1137  if (dn > 2 && inv.shift > 0)
1138    {
1139      tp = gmp_xalloc_limbs (dn);
1140      gmp_assert_nocarry (mpn_lshift (tp, dp, dn, inv.shift));
1141      dp = tp;
1142    }
1143  mpn_div_qr_preinv (qp, np, nn, dp, dn, &inv);
1144  if (tp)
1145    gmp_free (tp);
1146}
1147
1148
1149/* MPN base conversion. */
1150static unsigned
1151mpn_base_power_of_two_p (unsigned b)
1152{
1153  switch (b)
1154    {
1155    case 2: return 1;
1156    case 4: return 2;
1157    case 8: return 3;
1158    case 16: return 4;
1159    case 32: return 5;
1160    case 64: return 6;
1161    case 128: return 7;
1162    case 256: return 8;
1163    default: return 0;
1164    }
1165}
1166
1167struct mpn_base_info
1168{
1169  /* bb is the largest power of the base which fits in one limb, and
1170     exp is the corresponding exponent. */
1171  unsigned exp;
1172  mp_limb_t bb;
1173};
1174
1175static void
1176mpn_get_base_info (struct mpn_base_info *info, mp_limb_t b)
1177{
1178  mp_limb_t m;
1179  mp_limb_t p;
1180  unsigned exp;
1181
1182  m = GMP_LIMB_MAX / b;
1183  for (exp = 1, p = b; p <= m; exp++)
1184    p *= b;
1185
1186  info->exp = exp;
1187  info->bb = p;
1188}
1189
1190static mp_bitcnt_t
1191mpn_limb_size_in_base_2 (mp_limb_t u)
1192{
1193  unsigned shift;
1194
1195  assert (u > 0);
1196  gmp_clz (shift, u);
1197  return GMP_LIMB_BITS - shift;
1198}
1199
1200static size_t
1201mpn_get_str_bits (unsigned char *sp, unsigned bits, mp_srcptr up, mp_size_t un)
1202{
1203  unsigned char mask;
1204  size_t sn, j;
1205  mp_size_t i;
1206  unsigned shift;
1207
1208  sn = ((un - 1) * GMP_LIMB_BITS + mpn_limb_size_in_base_2 (up[un-1])
1209	+ bits - 1) / bits;
1210
1211  mask = (1U << bits) - 1;
1212
1213  for (i = 0, j = sn, shift = 0; j-- > 0;)
1214    {
1215      unsigned char digit = up[i] >> shift;
1216
1217      shift += bits;
1218
1219      if (shift >= GMP_LIMB_BITS && ++i < un)
1220	{
1221	  shift -= GMP_LIMB_BITS;
1222	  digit |= up[i] << (bits - shift);
1223	}
1224      sp[j] = digit & mask;
1225    }
1226  return sn;
1227}
1228
1229/* We generate digits from the least significant end, and reverse at
1230   the end. */
1231static size_t
1232mpn_limb_get_str (unsigned char *sp, mp_limb_t w,
1233		  const struct gmp_div_inverse *binv)
1234{
1235  mp_size_t i;
1236  for (i = 0; w > 0; i++)
1237    {
1238      mp_limb_t h, l, r;
1239
1240      h = w >> (GMP_LIMB_BITS - binv->shift);
1241      l = w << binv->shift;
1242
1243      gmp_udiv_qrnnd_preinv (w, r, h, l, binv->d1, binv->di);
1244      assert ((r & (GMP_LIMB_MAX >> (GMP_LIMB_BITS - binv->shift))) == 0);
1245      r >>= binv->shift;
1246
1247      sp[i] = r;
1248    }
1249  return i;
1250}
1251
1252static size_t
1253mpn_get_str_other (unsigned char *sp,
1254		   int base, const struct mpn_base_info *info,
1255		   mp_ptr up, mp_size_t un)
1256{
1257  struct gmp_div_inverse binv;
1258  size_t sn;
1259  size_t i;
1260
1261  mpn_div_qr_1_invert (&binv, base);
1262
1263  sn = 0;
1264
1265  if (un > 1)
1266    {
1267      struct gmp_div_inverse bbinv;
1268      mpn_div_qr_1_invert (&bbinv, info->bb);
1269
1270      do
1271	{
1272	  mp_limb_t w;
1273	  size_t done;
1274	  w = mpn_div_qr_1_preinv (up, up, un, &bbinv);
1275	  un -= (up[un-1] == 0);
1276	  done = mpn_limb_get_str (sp + sn, w, &binv);
1277
1278	  for (sn += done; done < info->exp; done++)
1279	    sp[sn++] = 0;
1280	}
1281      while (un > 1);
1282    }
1283  sn += mpn_limb_get_str (sp + sn, up[0], &binv);
1284
1285  /* Reverse order */
1286  for (i = 0; 2*i + 1 < sn; i++)
1287    {
1288      unsigned char t = sp[i];
1289      sp[i] = sp[sn - i - 1];
1290      sp[sn - i - 1] = t;
1291    }
1292
1293  return sn;
1294}
1295
1296size_t
1297mpn_get_str (unsigned char *sp, int base, mp_ptr up, mp_size_t un)
1298{
1299  unsigned bits;
1300
1301  assert (un > 0);
1302  assert (up[un-1] > 0);
1303
1304  bits = mpn_base_power_of_two_p (base);
1305  if (bits)
1306    return mpn_get_str_bits (sp, bits, up, un);
1307  else
1308    {
1309      struct mpn_base_info info;
1310
1311      mpn_get_base_info (&info, base);
1312      return mpn_get_str_other (sp, base, &info, up, un);
1313    }
1314}
1315
1316static mp_size_t
1317mpn_set_str_bits (mp_ptr rp, const unsigned char *sp, size_t sn,
1318		  unsigned bits)
1319{
1320  mp_size_t rn;
1321  size_t j;
1322  unsigned shift;
1323
1324  for (j = sn, rn = 0, shift = 0; j-- > 0; )
1325    {
1326      if (shift == 0)
1327	{
1328	  rp[rn++] = sp[j];
1329	  shift += bits;
1330	}
1331      else
1332	{
1333	  rp[rn-1] |= (mp_limb_t) sp[j] << shift;
1334	  shift += bits;
1335	  if (shift >= GMP_LIMB_BITS)
1336	    {
1337	      shift -= GMP_LIMB_BITS;
1338	      if (shift > 0)
1339		rp[rn++] = (mp_limb_t) sp[j] >> (bits - shift);
1340	    }
1341	}
1342    }
1343  rn = mpn_normalized_size (rp, rn);
1344  return rn;
1345}
1346
1347/* Result is usually normalized, except for all-zero input, in which
1348   case a single zero limb is written at *RP, and 1 is returned. */
1349static mp_size_t
1350mpn_set_str_other (mp_ptr rp, const unsigned char *sp, size_t sn,
1351		   mp_limb_t b, const struct mpn_base_info *info)
1352{
1353  mp_size_t rn;
1354  mp_limb_t w;
1355  unsigned k;
1356  size_t j;
1357
1358  assert (sn > 0);
1359
1360  k = 1 + (sn - 1) % info->exp;
1361
1362  j = 0;
1363  w = sp[j++];
1364  while (--k != 0)
1365    w = w * b + sp[j++];
1366
1367  rp[0] = w;
1368
1369  for (rn = 1; j < sn;)
1370    {
1371      mp_limb_t cy;
1372
1373      w = sp[j++];
1374      for (k = 1; k < info->exp; k++)
1375	w = w * b + sp[j++];
1376
1377      cy = mpn_mul_1 (rp, rp, rn, info->bb);
1378      cy += mpn_add_1 (rp, rp, rn, w);
1379      if (cy > 0)
1380	rp[rn++] = cy;
1381    }
1382  assert (j == sn);
1383
1384  return rn;
1385}
1386
1387mp_size_t
1388mpn_set_str (mp_ptr rp, const unsigned char *sp, size_t sn, int base)
1389{
1390  unsigned bits;
1391
1392  if (sn == 0)
1393    return 0;
1394
1395  bits = mpn_base_power_of_two_p (base);
1396  if (bits)
1397    return mpn_set_str_bits (rp, sp, sn, bits);
1398  else
1399    {
1400      struct mpn_base_info info;
1401
1402      mpn_get_base_info (&info, base);
1403      return mpn_set_str_other (rp, sp, sn, base, &info);
1404    }
1405}
1406
1407
1408/* MPZ interface */
1409void
1410mpz_init (mpz_t r)
1411{
1412  static const mp_limb_t dummy_limb = GMP_LIMB_MAX & 0xc1a0;
1413
1414  r->_mp_alloc = 0;
1415  r->_mp_size = 0;
1416  r->_mp_d = (mp_ptr) &dummy_limb;
1417}
1418
1419/* The utility of this function is a bit limited, since many functions
1420   assigns the result variable using mpz_swap. */
1421void
1422mpz_init2 (mpz_t r, mp_bitcnt_t bits)
1423{
1424  mp_size_t rn;
1425
1426  bits -= (bits != 0);		/* Round down, except if 0 */
1427  rn = 1 + bits / GMP_LIMB_BITS;
1428
1429  r->_mp_alloc = rn;
1430  r->_mp_size = 0;
1431  r->_mp_d = gmp_xalloc_limbs (rn);
1432}
1433
1434void
1435mpz_clear (mpz_t r)
1436{
1437  if (r->_mp_alloc)
1438    gmp_free (r->_mp_d);
1439}
1440
1441static mp_ptr
1442mpz_realloc (mpz_t r, mp_size_t size)
1443{
1444  size = GMP_MAX (size, 1);
1445
1446  if (r->_mp_alloc)
1447    r->_mp_d = gmp_xrealloc_limbs (r->_mp_d, size);
1448  else
1449    r->_mp_d = gmp_xalloc_limbs (size);
1450  r->_mp_alloc = size;
1451
1452  if (GMP_ABS (r->_mp_size) > size)
1453    r->_mp_size = 0;
1454
1455  return r->_mp_d;
1456}
1457
1458/* Realloc for an mpz_t WHAT if it has less than NEEDED limbs.  */
1459#define MPZ_REALLOC(z,n) ((n) > (z)->_mp_alloc			\
1460			  ? mpz_realloc(z,n)			\
1461			  : (z)->_mp_d)
1462
1463/* MPZ assignment and basic conversions. */
1464void
1465mpz_set_si (mpz_t r, signed long int x)
1466{
1467  if (x >= 0)
1468    mpz_set_ui (r, x);
1469  else /* (x < 0) */
1470    if (GMP_LIMB_BITS < GMP_ULONG_BITS)
1471      {
1472	mpz_set_ui (r, GMP_NEG_CAST (unsigned long int, x));
1473	mpz_neg (r, r);
1474      }
1475  else
1476    {
1477      r->_mp_size = -1;
1478      MPZ_REALLOC (r, 1)[0] = GMP_NEG_CAST (unsigned long int, x);
1479    }
1480}
1481
1482void
1483mpz_set_ui (mpz_t r, unsigned long int x)
1484{
1485  if (x > 0)
1486    {
1487      r->_mp_size = 1;
1488      MPZ_REALLOC (r, 1)[0] = x;
1489      if (GMP_LIMB_BITS < GMP_ULONG_BITS)
1490	{
1491	  int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS;
1492	  while (x >>= LOCAL_GMP_LIMB_BITS)
1493	    {
1494	      ++ r->_mp_size;
1495	      MPZ_REALLOC (r, r->_mp_size)[r->_mp_size - 1] = x;
1496	    }
1497	}
1498    }
1499  else
1500    r->_mp_size = 0;
1501}
1502
1503void
1504mpz_set (mpz_t r, const mpz_t x)
1505{
1506  /* Allow the NOP r == x */
1507  if (r != x)
1508    {
1509      mp_size_t n;
1510      mp_ptr rp;
1511
1512      n = GMP_ABS (x->_mp_size);
1513      rp = MPZ_REALLOC (r, n);
1514
1515      mpn_copyi (rp, x->_mp_d, n);
1516      r->_mp_size = x->_mp_size;
1517    }
1518}
1519
1520void
1521mpz_init_set_si (mpz_t r, signed long int x)
1522{
1523  mpz_init (r);
1524  mpz_set_si (r, x);
1525}
1526
1527void
1528mpz_init_set_ui (mpz_t r, unsigned long int x)
1529{
1530  mpz_init (r);
1531  mpz_set_ui (r, x);
1532}
1533
1534void
1535mpz_init_set (mpz_t r, const mpz_t x)
1536{
1537  mpz_init (r);
1538  mpz_set (r, x);
1539}
1540
1541int
1542mpz_fits_slong_p (const mpz_t u)
1543{
1544  return (LONG_MAX + LONG_MIN == 0 || mpz_cmp_ui (u, LONG_MAX) <= 0) &&
1545    mpz_cmpabs_ui (u, GMP_NEG_CAST (unsigned long int, LONG_MIN)) <= 0;
1546}
1547
1548static int
1549mpn_absfits_ulong_p (mp_srcptr up, mp_size_t un)
1550{
1551  int ulongsize = GMP_ULONG_BITS / GMP_LIMB_BITS;
1552  mp_limb_t ulongrem = 0;
1553
1554  if (GMP_ULONG_BITS % GMP_LIMB_BITS != 0)
1555    ulongrem = (mp_limb_t) (ULONG_MAX >> GMP_LIMB_BITS * ulongsize) + 1;
1556
1557  return un <= ulongsize || (up[ulongsize] < ulongrem && un == ulongsize + 1);
1558}
1559
1560int
1561mpz_fits_ulong_p (const mpz_t u)
1562{
1563  mp_size_t us = u->_mp_size;
1564
1565  return us >= 0 && mpn_absfits_ulong_p (u->_mp_d, us);
1566}
1567
1568long int
1569mpz_get_si (const mpz_t u)
1570{
1571  unsigned long r = mpz_get_ui (u);
1572  unsigned long c = -LONG_MAX - LONG_MIN;
1573
1574  if (u->_mp_size < 0)
1575    /* This expression is necessary to properly handle -LONG_MIN */
1576    return -(long) c - (long) ((r - c) & LONG_MAX);
1577  else
1578    return (long) (r & LONG_MAX);
1579}
1580
1581unsigned long int
1582mpz_get_ui (const mpz_t u)
1583{
1584  if (GMP_LIMB_BITS < GMP_ULONG_BITS)
1585    {
1586      int LOCAL_GMP_LIMB_BITS = GMP_LIMB_BITS;
1587      unsigned long r = 0;
1588      mp_size_t n = GMP_ABS (u->_mp_size);
1589      n = GMP_MIN (n, 1 + (mp_size_t) (GMP_ULONG_BITS - 1) / GMP_LIMB_BITS);
1590      while (--n >= 0)
1591	r = (r << LOCAL_GMP_LIMB_BITS) + u->_mp_d[n];
1592      return r;
1593    }
1594
1595  return u->_mp_size == 0 ? 0 : u->_mp_d[0];
1596}
1597
1598size_t
1599mpz_size (const mpz_t u)
1600{
1601  return GMP_ABS (u->_mp_size);
1602}
1603
1604mp_limb_t
1605mpz_getlimbn (const mpz_t u, mp_size_t n)
1606{
1607  if (n >= 0 && n < GMP_ABS (u->_mp_size))
1608    return u->_mp_d[n];
1609  else
1610    return 0;
1611}
1612
1613void
1614mpz_realloc2 (mpz_t x, mp_bitcnt_t n)
1615{
1616  mpz_realloc (x, 1 + (n - (n != 0)) / GMP_LIMB_BITS);
1617}
1618
1619mp_srcptr
1620mpz_limbs_read (mpz_srcptr x)
1621{
1622  return x->_mp_d;
1623}
1624
1625mp_ptr
1626mpz_limbs_modify (mpz_t x, mp_size_t n)
1627{
1628  assert (n > 0);
1629  return MPZ_REALLOC (x, n);
1630}
1631
1632mp_ptr
1633mpz_limbs_write (mpz_t x, mp_size_t n)
1634{
1635  return mpz_limbs_modify (x, n);
1636}
1637
1638void
1639mpz_limbs_finish (mpz_t x, mp_size_t xs)
1640{
1641  mp_size_t xn;
1642  xn = mpn_normalized_size (x->_mp_d, GMP_ABS (xs));
1643  x->_mp_size = xs < 0 ? -xn : xn;
1644}
1645
1646static mpz_srcptr
1647mpz_roinit_normal_n (mpz_t x, mp_srcptr xp, mp_size_t xs)
1648{
1649  x->_mp_alloc = 0;
1650  x->_mp_d = (mp_ptr) xp;
1651  x->_mp_size = xs;
1652  return x;
1653}
1654
1655mpz_srcptr
1656mpz_roinit_n (mpz_t x, mp_srcptr xp, mp_size_t xs)
1657{
1658  mpz_roinit_normal_n (x, xp, xs);
1659  mpz_limbs_finish (x, xs);
1660  return x;
1661}
1662
1663
1664/* Conversions and comparison to double. */
1665void
1666mpz_set_d (mpz_t r, double x)
1667{
1668  int sign;
1669  mp_ptr rp;
1670  mp_size_t rn, i;
1671  double B;
1672  double Bi;
1673  mp_limb_t f;
1674
1675  /* x != x is true when x is a NaN, and x == x * 0.5 is true when x is
1676     zero or infinity. */
1677  if (x != x || x == x * 0.5)
1678    {
1679      r->_mp_size = 0;
1680      return;
1681    }
1682
1683  sign = x < 0.0 ;
1684  if (sign)
1685    x = - x;
1686
1687  if (x < 1.0)
1688    {
1689      r->_mp_size = 0;
1690      return;
1691    }
1692  B = 4.0 * (double) (GMP_LIMB_HIGHBIT >> 1);
1693  Bi = 1.0 / B;
1694  for (rn = 1; x >= B; rn++)
1695    x *= Bi;
1696
1697  rp = MPZ_REALLOC (r, rn);
1698
1699  f = (mp_limb_t) x;
1700  x -= f;
1701  assert (x < 1.0);
1702  i = rn-1;
1703  rp[i] = f;
1704  while (--i >= 0)
1705    {
1706      x = B * x;
1707      f = (mp_limb_t) x;
1708      x -= f;
1709      assert (x < 1.0);
1710      rp[i] = f;
1711    }
1712
1713  r->_mp_size = sign ? - rn : rn;
1714}
1715
1716void
1717mpz_init_set_d (mpz_t r, double x)
1718{
1719  mpz_init (r);
1720  mpz_set_d (r, x);
1721}
1722
1723double
1724mpz_get_d (const mpz_t u)
1725{
1726  int m;
1727  mp_limb_t l;
1728  mp_size_t un;
1729  double x;
1730  double B = 4.0 * (double) (GMP_LIMB_HIGHBIT >> 1);
1731
1732  un = GMP_ABS (u->_mp_size);
1733
1734  if (un == 0)
1735    return 0.0;
1736
1737  l = u->_mp_d[--un];
1738  gmp_clz (m, l);
1739  m = m + GMP_DBL_MANT_BITS - GMP_LIMB_BITS;
1740  if (m < 0)
1741    l &= GMP_LIMB_MAX << -m;
1742
1743  for (x = l; --un >= 0;)
1744    {
1745      x = B*x;
1746      if (m > 0) {
1747	l = u->_mp_d[un];
1748	m -= GMP_LIMB_BITS;
1749	if (m < 0)
1750	  l &= GMP_LIMB_MAX << -m;
1751	x += l;
1752      }
1753    }
1754
1755  if (u->_mp_size < 0)
1756    x = -x;
1757
1758  return x;
1759}
1760
1761int
1762mpz_cmpabs_d (const mpz_t x, double d)
1763{
1764  mp_size_t xn;
1765  double B, Bi;
1766  mp_size_t i;
1767
1768  xn = x->_mp_size;
1769  d = GMP_ABS (d);
1770
1771  if (xn != 0)
1772    {
1773      xn = GMP_ABS (xn);
1774
1775      B = 4.0 * (double) (GMP_LIMB_HIGHBIT >> 1);
1776      Bi = 1.0 / B;
1777
1778      /* Scale d so it can be compared with the top limb. */
1779      for (i = 1; i < xn; i++)
1780	d *= Bi;
1781
1782      if (d >= B)
1783	return -1;
1784
1785      /* Compare floor(d) to top limb, subtract and cancel when equal. */
1786      for (i = xn; i-- > 0;)
1787	{
1788	  mp_limb_t f, xl;
1789
1790	  f = (mp_limb_t) d;
1791	  xl = x->_mp_d[i];
1792	  if (xl > f)
1793	    return 1;
1794	  else if (xl < f)
1795	    return -1;
1796	  d = B * (d - f);
1797	}
1798    }
1799  return - (d > 0.0);
1800}
1801
1802int
1803mpz_cmp_d (const mpz_t x, double d)
1804{
1805  if (x->_mp_size < 0)
1806    {
1807      if (d >= 0.0)
1808	return -1;
1809      else
1810	return -mpz_cmpabs_d (x, d);
1811    }
1812  else
1813    {
1814      if (d < 0.0)
1815	return 1;
1816      else
1817	return mpz_cmpabs_d (x, d);
1818    }
1819}
1820
1821
1822/* MPZ comparisons and the like. */
1823int
1824mpz_sgn (const mpz_t u)
1825{
1826  return GMP_CMP (u->_mp_size, 0);
1827}
1828
1829int
1830mpz_cmp_si (const mpz_t u, long v)
1831{
1832  mp_size_t usize = u->_mp_size;
1833
1834  if (v >= 0)
1835    return mpz_cmp_ui (u, v);
1836  else if (usize >= 0)
1837    return 1;
1838  else
1839    return - mpz_cmpabs_ui (u, GMP_NEG_CAST (unsigned long int, v));
1840}
1841
1842int
1843mpz_cmp_ui (const mpz_t u, unsigned long v)
1844{
1845  mp_size_t usize = u->_mp_size;
1846
1847  if (usize < 0)
1848    return -1;
1849  else
1850    return mpz_cmpabs_ui (u, v);
1851}
1852
1853int
1854mpz_cmp (const mpz_t a, const mpz_t b)
1855{
1856  mp_size_t asize = a->_mp_size;
1857  mp_size_t bsize = b->_mp_size;
1858
1859  if (asize != bsize)
1860    return (asize < bsize) ? -1 : 1;
1861  else if (asize >= 0)
1862    return mpn_cmp (a->_mp_d, b->_mp_d, asize);
1863  else
1864    return mpn_cmp (b->_mp_d, a->_mp_d, -asize);
1865}
1866
1867int
1868mpz_cmpabs_ui (const mpz_t u, unsigned long v)
1869{
1870  mp_size_t un = GMP_ABS (u->_mp_size);
1871
1872  if (! mpn_absfits_ulong_p (u->_mp_d, un))
1873    return 1;
1874  else
1875    {
1876      unsigned long uu = mpz_get_ui (u);
1877      return GMP_CMP(uu, v);
1878    }
1879}
1880
1881int
1882mpz_cmpabs (const mpz_t u, const mpz_t v)
1883{
1884  return mpn_cmp4 (u->_mp_d, GMP_ABS (u->_mp_size),
1885		   v->_mp_d, GMP_ABS (v->_mp_size));
1886}
1887
1888void
1889mpz_abs (mpz_t r, const mpz_t u)
1890{
1891  mpz_set (r, u);
1892  r->_mp_size = GMP_ABS (r->_mp_size);
1893}
1894
1895void
1896mpz_neg (mpz_t r, const mpz_t u)
1897{
1898  mpz_set (r, u);
1899  r->_mp_size = -r->_mp_size;
1900}
1901
1902void
1903mpz_swap (mpz_t u, mpz_t v)
1904{
1905  MP_SIZE_T_SWAP (u->_mp_size, v->_mp_size);
1906  MP_SIZE_T_SWAP (u->_mp_alloc, v->_mp_alloc);
1907  MP_PTR_SWAP (u->_mp_d, v->_mp_d);
1908}
1909
1910
1911/* MPZ addition and subtraction */
1912
1913
1914void
1915mpz_add_ui (mpz_t r, const mpz_t a, unsigned long b)
1916{
1917  mpz_t bb;
1918  mpz_init_set_ui (bb, b);
1919  mpz_add (r, a, bb);
1920  mpz_clear (bb);
1921}
1922
1923void
1924mpz_sub_ui (mpz_t r, const mpz_t a, unsigned long b)
1925{
1926  mpz_ui_sub (r, b, a);
1927  mpz_neg (r, r);
1928}
1929
1930void
1931mpz_ui_sub (mpz_t r, unsigned long a, const mpz_t b)
1932{
1933  mpz_neg (r, b);
1934  mpz_add_ui (r, r, a);
1935}
1936
1937static mp_size_t
1938mpz_abs_add (mpz_t r, const mpz_t a, const mpz_t b)
1939{
1940  mp_size_t an = GMP_ABS (a->_mp_size);
1941  mp_size_t bn = GMP_ABS (b->_mp_size);
1942  mp_ptr rp;
1943  mp_limb_t cy;
1944
1945  if (an < bn)
1946    {
1947      MPZ_SRCPTR_SWAP (a, b);
1948      MP_SIZE_T_SWAP (an, bn);
1949    }
1950
1951  rp = MPZ_REALLOC (r, an + 1);
1952  cy = mpn_add (rp, a->_mp_d, an, b->_mp_d, bn);
1953
1954  rp[an] = cy;
1955
1956  return an + cy;
1957}
1958
1959static mp_size_t
1960mpz_abs_sub (mpz_t r, const mpz_t a, const mpz_t b)
1961{
1962  mp_size_t an = GMP_ABS (a->_mp_size);
1963  mp_size_t bn = GMP_ABS (b->_mp_size);
1964  int cmp;
1965  mp_ptr rp;
1966
1967  cmp = mpn_cmp4 (a->_mp_d, an, b->_mp_d, bn);
1968  if (cmp > 0)
1969    {
1970      rp = MPZ_REALLOC (r, an);
1971      gmp_assert_nocarry (mpn_sub (rp, a->_mp_d, an, b->_mp_d, bn));
1972      return mpn_normalized_size (rp, an);
1973    }
1974  else if (cmp < 0)
1975    {
1976      rp = MPZ_REALLOC (r, bn);
1977      gmp_assert_nocarry (mpn_sub (rp, b->_mp_d, bn, a->_mp_d, an));
1978      return -mpn_normalized_size (rp, bn);
1979    }
1980  else
1981    return 0;
1982}
1983
1984void
1985mpz_add (mpz_t r, const mpz_t a, const mpz_t b)
1986{
1987  mp_size_t rn;
1988
1989  if ( (a->_mp_size ^ b->_mp_size) >= 0)
1990    rn = mpz_abs_add (r, a, b);
1991  else
1992    rn = mpz_abs_sub (r, a, b);
1993
1994  r->_mp_size = a->_mp_size >= 0 ? rn : - rn;
1995}
1996
1997void
1998mpz_sub (mpz_t r, const mpz_t a, const mpz_t b)
1999{
2000  mp_size_t rn;
2001
2002  if ( (a->_mp_size ^ b->_mp_size) >= 0)
2003    rn = mpz_abs_sub (r, a, b);
2004  else
2005    rn = mpz_abs_add (r, a, b);
2006
2007  r->_mp_size = a->_mp_size >= 0 ? rn : - rn;
2008}
2009
2010
2011/* MPZ multiplication */
2012void
2013mpz_mul_si (mpz_t r, const mpz_t u, long int v)
2014{
2015  if (v < 0)
2016    {
2017      mpz_mul_ui (r, u, GMP_NEG_CAST (unsigned long int, v));
2018      mpz_neg (r, r);
2019    }
2020  else
2021    mpz_mul_ui (r, u, v);
2022}
2023
2024void
2025mpz_mul_ui (mpz_t r, const mpz_t u, unsigned long int v)
2026{
2027  mpz_t vv;
2028  mpz_init_set_ui (vv, v);
2029  mpz_mul (r, u, vv);
2030  mpz_clear (vv);
2031  return;
2032}
2033
2034void
2035mpz_mul (mpz_t r, const mpz_t u, const mpz_t v)
2036{
2037  int sign;
2038  mp_size_t un, vn, rn;
2039  mpz_t t;
2040  mp_ptr tp;
2041
2042  un = u->_mp_size;
2043  vn = v->_mp_size;
2044
2045  if (un == 0 || vn == 0)
2046    {
2047      r->_mp_size = 0;
2048      return;
2049    }
2050
2051  sign = (un ^ vn) < 0;
2052
2053  un = GMP_ABS (un);
2054  vn = GMP_ABS (vn);
2055
2056  mpz_init2 (t, (un + vn) * GMP_LIMB_BITS);
2057
2058  tp = t->_mp_d;
2059  if (un >= vn)
2060    mpn_mul (tp, u->_mp_d, un, v->_mp_d, vn);
2061  else
2062    mpn_mul (tp, v->_mp_d, vn, u->_mp_d, un);
2063
2064  rn = un + vn;
2065  rn -= tp[rn-1] == 0;
2066
2067  t->_mp_size = sign ? - rn : rn;
2068  mpz_swap (r, t);
2069  mpz_clear (t);
2070}
2071
2072void
2073mpz_mul_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bits)
2074{
2075  mp_size_t un, rn;
2076  mp_size_t limbs;
2077  unsigned shift;
2078  mp_ptr rp;
2079
2080  un = GMP_ABS (u->_mp_size);
2081  if (un == 0)
2082    {
2083      r->_mp_size = 0;
2084      return;
2085    }
2086
2087  limbs = bits / GMP_LIMB_BITS;
2088  shift = bits % GMP_LIMB_BITS;
2089
2090  rn = un + limbs + (shift > 0);
2091  rp = MPZ_REALLOC (r, rn);
2092  if (shift > 0)
2093    {
2094      mp_limb_t cy = mpn_lshift (rp + limbs, u->_mp_d, un, shift);
2095      rp[rn-1] = cy;
2096      rn -= (cy == 0);
2097    }
2098  else
2099    mpn_copyd (rp + limbs, u->_mp_d, un);
2100
2101  mpn_zero (rp, limbs);
2102
2103  r->_mp_size = (u->_mp_size < 0) ? - rn : rn;
2104}
2105
2106void
2107mpz_addmul_ui (mpz_t r, const mpz_t u, unsigned long int v)
2108{
2109  mpz_t t;
2110  mpz_init_set_ui (t, v);
2111  mpz_mul (t, u, t);
2112  mpz_add (r, r, t);
2113  mpz_clear (t);
2114}
2115
2116void
2117mpz_submul_ui (mpz_t r, const mpz_t u, unsigned long int v)
2118{
2119  mpz_t t;
2120  mpz_init_set_ui (t, v);
2121  mpz_mul (t, u, t);
2122  mpz_sub (r, r, t);
2123  mpz_clear (t);
2124}
2125
2126void
2127mpz_addmul (mpz_t r, const mpz_t u, const mpz_t v)
2128{
2129  mpz_t t;
2130  mpz_init (t);
2131  mpz_mul (t, u, v);
2132  mpz_add (r, r, t);
2133  mpz_clear (t);
2134}
2135
2136void
2137mpz_submul (mpz_t r, const mpz_t u, const mpz_t v)
2138{
2139  mpz_t t;
2140  mpz_init (t);
2141  mpz_mul (t, u, v);
2142  mpz_sub (r, r, t);
2143  mpz_clear (t);
2144}
2145
2146
2147/* MPZ division */
2148enum mpz_div_round_mode { GMP_DIV_FLOOR, GMP_DIV_CEIL, GMP_DIV_TRUNC };
2149
2150/* Allows q or r to be zero. Returns 1 iff remainder is non-zero. */
2151static int
2152mpz_div_qr (mpz_t q, mpz_t r,
2153	    const mpz_t n, const mpz_t d, enum mpz_div_round_mode mode)
2154{
2155  mp_size_t ns, ds, nn, dn, qs;
2156  ns = n->_mp_size;
2157  ds = d->_mp_size;
2158
2159  if (ds == 0)
2160    gmp_die("mpz_div_qr: Divide by zero.");
2161
2162  if (ns == 0)
2163    {
2164      if (q)
2165	q->_mp_size = 0;
2166      if (r)
2167	r->_mp_size = 0;
2168      return 0;
2169    }
2170
2171  nn = GMP_ABS (ns);
2172  dn = GMP_ABS (ds);
2173
2174  qs = ds ^ ns;
2175
2176  if (nn < dn)
2177    {
2178      if (mode == GMP_DIV_CEIL && qs >= 0)
2179	{
2180	  /* q = 1, r = n - d */
2181	  if (r)
2182	    mpz_sub (r, n, d);
2183	  if (q)
2184	    mpz_set_ui (q, 1);
2185	}
2186      else if (mode == GMP_DIV_FLOOR && qs < 0)
2187	{
2188	  /* q = -1, r = n + d */
2189	  if (r)
2190	    mpz_add (r, n, d);
2191	  if (q)
2192	    mpz_set_si (q, -1);
2193	}
2194      else
2195	{
2196	  /* q = 0, r = d */
2197	  if (r)
2198	    mpz_set (r, n);
2199	  if (q)
2200	    q->_mp_size = 0;
2201	}
2202      return 1;
2203    }
2204  else
2205    {
2206      mp_ptr np, qp;
2207      mp_size_t qn, rn;
2208      mpz_t tq, tr;
2209
2210      mpz_init_set (tr, n);
2211      np = tr->_mp_d;
2212
2213      qn = nn - dn + 1;
2214
2215      if (q)
2216	{
2217	  mpz_init2 (tq, qn * GMP_LIMB_BITS);
2218	  qp = tq->_mp_d;
2219	}
2220      else
2221	qp = NULL;
2222
2223      mpn_div_qr (qp, np, nn, d->_mp_d, dn);
2224
2225      if (qp)
2226	{
2227	  qn -= (qp[qn-1] == 0);
2228
2229	  tq->_mp_size = qs < 0 ? -qn : qn;
2230	}
2231      rn = mpn_normalized_size (np, dn);
2232      tr->_mp_size = ns < 0 ? - rn : rn;
2233
2234      if (mode == GMP_DIV_FLOOR && qs < 0 && rn != 0)
2235	{
2236	  if (q)
2237	    mpz_sub_ui (tq, tq, 1);
2238	  if (r)
2239	    mpz_add (tr, tr, d);
2240	}
2241      else if (mode == GMP_DIV_CEIL && qs >= 0 && rn != 0)
2242	{
2243	  if (q)
2244	    mpz_add_ui (tq, tq, 1);
2245	  if (r)
2246	    mpz_sub (tr, tr, d);
2247	}
2248
2249      if (q)
2250	{
2251	  mpz_swap (tq, q);
2252	  mpz_clear (tq);
2253	}
2254      if (r)
2255	mpz_swap (tr, r);
2256
2257      mpz_clear (tr);
2258
2259      return rn != 0;
2260    }
2261}
2262
2263void
2264mpz_cdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d)
2265{
2266  mpz_div_qr (q, r, n, d, GMP_DIV_CEIL);
2267}
2268
2269void
2270mpz_fdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d)
2271{
2272  mpz_div_qr (q, r, n, d, GMP_DIV_FLOOR);
2273}
2274
2275void
2276mpz_tdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d)
2277{
2278  mpz_div_qr (q, r, n, d, GMP_DIV_TRUNC);
2279}
2280
2281void
2282mpz_cdiv_q (mpz_t q, const mpz_t n, const mpz_t d)
2283{
2284  mpz_div_qr (q, NULL, n, d, GMP_DIV_CEIL);
2285}
2286
2287void
2288mpz_fdiv_q (mpz_t q, const mpz_t n, const mpz_t d)
2289{
2290  mpz_div_qr (q, NULL, n, d, GMP_DIV_FLOOR);
2291}
2292
2293void
2294mpz_tdiv_q (mpz_t q, const mpz_t n, const mpz_t d)
2295{
2296  mpz_div_qr (q, NULL, n, d, GMP_DIV_TRUNC);
2297}
2298
2299void
2300mpz_cdiv_r (mpz_t r, const mpz_t n, const mpz_t d)
2301{
2302  mpz_div_qr (NULL, r, n, d, GMP_DIV_CEIL);
2303}
2304
2305void
2306mpz_fdiv_r (mpz_t r, const mpz_t n, const mpz_t d)
2307{
2308  mpz_div_qr (NULL, r, n, d, GMP_DIV_FLOOR);
2309}
2310
2311void
2312mpz_tdiv_r (mpz_t r, const mpz_t n, const mpz_t d)
2313{
2314  mpz_div_qr (NULL, r, n, d, GMP_DIV_TRUNC);
2315}
2316
2317void
2318mpz_mod (mpz_t r, const mpz_t n, const mpz_t d)
2319{
2320  mpz_div_qr (NULL, r, n, d, d->_mp_size >= 0 ? GMP_DIV_FLOOR : GMP_DIV_CEIL);
2321}
2322
2323static void
2324mpz_div_q_2exp (mpz_t q, const mpz_t u, mp_bitcnt_t bit_index,
2325		enum mpz_div_round_mode mode)
2326{
2327  mp_size_t un, qn;
2328  mp_size_t limb_cnt;
2329  mp_ptr qp;
2330  int adjust;
2331
2332  un = u->_mp_size;
2333  if (un == 0)
2334    {
2335      q->_mp_size = 0;
2336      return;
2337    }
2338  limb_cnt = bit_index / GMP_LIMB_BITS;
2339  qn = GMP_ABS (un) - limb_cnt;
2340  bit_index %= GMP_LIMB_BITS;
2341
2342  if (mode == ((un > 0) ? GMP_DIV_CEIL : GMP_DIV_FLOOR)) /* un != 0 here. */
2343    /* Note: Below, the final indexing at limb_cnt is valid because at
2344       that point we have qn > 0. */
2345    adjust = (qn <= 0
2346	      || !mpn_zero_p (u->_mp_d, limb_cnt)
2347	      || (u->_mp_d[limb_cnt]
2348		  & (((mp_limb_t) 1 << bit_index) - 1)));
2349  else
2350    adjust = 0;
2351
2352  if (qn <= 0)
2353    qn = 0;
2354  else
2355    {
2356      qp = MPZ_REALLOC (q, qn);
2357
2358      if (bit_index != 0)
2359	{
2360	  mpn_rshift (qp, u->_mp_d + limb_cnt, qn, bit_index);
2361	  qn -= qp[qn - 1] == 0;
2362	}
2363      else
2364	{
2365	  mpn_copyi (qp, u->_mp_d + limb_cnt, qn);
2366	}
2367    }
2368
2369  q->_mp_size = qn;
2370
2371  if (adjust)
2372    mpz_add_ui (q, q, 1);
2373  if (un < 0)
2374    mpz_neg (q, q);
2375}
2376
2377static void
2378mpz_div_r_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bit_index,
2379		enum mpz_div_round_mode mode)
2380{
2381  mp_size_t us, un, rn;
2382  mp_ptr rp;
2383  mp_limb_t mask;
2384
2385  us = u->_mp_size;
2386  if (us == 0 || bit_index == 0)
2387    {
2388      r->_mp_size = 0;
2389      return;
2390    }
2391  rn = (bit_index + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS;
2392  assert (rn > 0);
2393
2394  rp = MPZ_REALLOC (r, rn);
2395  un = GMP_ABS (us);
2396
2397  mask = GMP_LIMB_MAX >> (rn * GMP_LIMB_BITS - bit_index);
2398
2399  if (rn > un)
2400    {
2401      /* Quotient (with truncation) is zero, and remainder is
2402	 non-zero */
2403      if (mode == ((us > 0) ? GMP_DIV_CEIL : GMP_DIV_FLOOR)) /* us != 0 here. */
2404	{
2405	  /* Have to negate and sign extend. */
2406	  mp_size_t i;
2407
2408	  gmp_assert_nocarry (! mpn_neg (rp, u->_mp_d, un));
2409	  for (i = un; i < rn - 1; i++)
2410	    rp[i] = GMP_LIMB_MAX;
2411
2412	  rp[rn-1] = mask;
2413	  us = -us;
2414	}
2415      else
2416	{
2417	  /* Just copy */
2418	  if (r != u)
2419	    mpn_copyi (rp, u->_mp_d, un);
2420
2421	  rn = un;
2422	}
2423    }
2424  else
2425    {
2426      if (r != u)
2427	mpn_copyi (rp, u->_mp_d, rn - 1);
2428
2429      rp[rn-1] = u->_mp_d[rn-1] & mask;
2430
2431      if (mode == ((us > 0) ? GMP_DIV_CEIL : GMP_DIV_FLOOR)) /* us != 0 here. */
2432	{
2433	  /* If r != 0, compute 2^{bit_count} - r. */
2434	  mpn_neg (rp, rp, rn);
2435
2436	  rp[rn-1] &= mask;
2437
2438	  /* us is not used for anything else, so we can modify it
2439	     here to indicate flipped sign. */
2440	  us = -us;
2441	}
2442    }
2443  rn = mpn_normalized_size (rp, rn);
2444  r->_mp_size = us < 0 ? -rn : rn;
2445}
2446
2447void
2448mpz_cdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
2449{
2450  mpz_div_q_2exp (r, u, cnt, GMP_DIV_CEIL);
2451}
2452
2453void
2454mpz_fdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
2455{
2456  mpz_div_q_2exp (r, u, cnt, GMP_DIV_FLOOR);
2457}
2458
2459void
2460mpz_tdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
2461{
2462  mpz_div_q_2exp (r, u, cnt, GMP_DIV_TRUNC);
2463}
2464
2465void
2466mpz_cdiv_r_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
2467{
2468  mpz_div_r_2exp (r, u, cnt, GMP_DIV_CEIL);
2469}
2470
2471void
2472mpz_fdiv_r_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
2473{
2474  mpz_div_r_2exp (r, u, cnt, GMP_DIV_FLOOR);
2475}
2476
2477void
2478mpz_tdiv_r_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
2479{
2480  mpz_div_r_2exp (r, u, cnt, GMP_DIV_TRUNC);
2481}
2482
2483void
2484mpz_divexact (mpz_t q, const mpz_t n, const mpz_t d)
2485{
2486  gmp_assert_nocarry (mpz_div_qr (q, NULL, n, d, GMP_DIV_TRUNC));
2487}
2488
2489int
2490mpz_divisible_p (const mpz_t n, const mpz_t d)
2491{
2492  return mpz_div_qr (NULL, NULL, n, d, GMP_DIV_TRUNC) == 0;
2493}
2494
2495int
2496mpz_congruent_p (const mpz_t a, const mpz_t b, const mpz_t m)
2497{
2498  mpz_t t;
2499  int res;
2500
2501  /* a == b (mod 0) iff a == b */
2502  if (mpz_sgn (m) == 0)
2503    return (mpz_cmp (a, b) == 0);
2504
2505  mpz_init (t);
2506  mpz_sub (t, a, b);
2507  res = mpz_divisible_p (t, m);
2508  mpz_clear (t);
2509
2510  return res;
2511}
2512
2513static unsigned long
2514mpz_div_qr_ui (mpz_t q, mpz_t r,
2515	       const mpz_t n, unsigned long d, enum mpz_div_round_mode mode)
2516{
2517  unsigned long ret;
2518  mpz_t rr, dd;
2519
2520  mpz_init (rr);
2521  mpz_init_set_ui (dd, d);
2522  mpz_div_qr (q, rr, n, dd, mode);
2523  mpz_clear (dd);
2524  ret = mpz_get_ui (rr);
2525
2526  if (r)
2527    mpz_swap (r, rr);
2528  mpz_clear (rr);
2529
2530  return ret;
2531}
2532
2533unsigned long
2534mpz_cdiv_qr_ui (mpz_t q, mpz_t r, const mpz_t n, unsigned long d)
2535{
2536  return mpz_div_qr_ui (q, r, n, d, GMP_DIV_CEIL);
2537}
2538
2539unsigned long
2540mpz_fdiv_qr_ui (mpz_t q, mpz_t r, const mpz_t n, unsigned long d)
2541{
2542  return mpz_div_qr_ui (q, r, n, d, GMP_DIV_FLOOR);
2543}
2544
2545unsigned long
2546mpz_tdiv_qr_ui (mpz_t q, mpz_t r, const mpz_t n, unsigned long d)
2547{
2548  return mpz_div_qr_ui (q, r, n, d, GMP_DIV_TRUNC);
2549}
2550
2551unsigned long
2552mpz_cdiv_q_ui (mpz_t q, const mpz_t n, unsigned long d)
2553{
2554  return mpz_div_qr_ui (q, NULL, n, d, GMP_DIV_CEIL);
2555}
2556
2557unsigned long
2558mpz_fdiv_q_ui (mpz_t q, const mpz_t n, unsigned long d)
2559{
2560  return mpz_div_qr_ui (q, NULL, n, d, GMP_DIV_FLOOR);
2561}
2562
2563unsigned long
2564mpz_tdiv_q_ui (mpz_t q, const mpz_t n, unsigned long d)
2565{
2566  return mpz_div_qr_ui (q, NULL, n, d, GMP_DIV_TRUNC);
2567}
2568
2569unsigned long
2570mpz_cdiv_r_ui (mpz_t r, const mpz_t n, unsigned long d)
2571{
2572  return mpz_div_qr_ui (NULL, r, n, d, GMP_DIV_CEIL);
2573}
2574unsigned long
2575mpz_fdiv_r_ui (mpz_t r, const mpz_t n, unsigned long d)
2576{
2577  return mpz_div_qr_ui (NULL, r, n, d, GMP_DIV_FLOOR);
2578}
2579unsigned long
2580mpz_tdiv_r_ui (mpz_t r, const mpz_t n, unsigned long d)
2581{
2582  return mpz_div_qr_ui (NULL, r, n, d, GMP_DIV_TRUNC);
2583}
2584
2585unsigned long
2586mpz_cdiv_ui (const mpz_t n, unsigned long d)
2587{
2588  return mpz_div_qr_ui (NULL, NULL, n, d, GMP_DIV_CEIL);
2589}
2590
2591unsigned long
2592mpz_fdiv_ui (const mpz_t n, unsigned long d)
2593{
2594  return mpz_div_qr_ui (NULL, NULL, n, d, GMP_DIV_FLOOR);
2595}
2596
2597unsigned long
2598mpz_tdiv_ui (const mpz_t n, unsigned long d)
2599{
2600  return mpz_div_qr_ui (NULL, NULL, n, d, GMP_DIV_TRUNC);
2601}
2602
2603unsigned long
2604mpz_mod_ui (mpz_t r, const mpz_t n, unsigned long d)
2605{
2606  return mpz_div_qr_ui (NULL, r, n, d, GMP_DIV_FLOOR);
2607}
2608
2609void
2610mpz_divexact_ui (mpz_t q, const mpz_t n, unsigned long d)
2611{
2612  gmp_assert_nocarry (mpz_div_qr_ui (q, NULL, n, d, GMP_DIV_TRUNC));
2613}
2614
2615int
2616mpz_divisible_ui_p (const mpz_t n, unsigned long d)
2617{
2618  return mpz_div_qr_ui (NULL, NULL, n, d, GMP_DIV_TRUNC) == 0;
2619}
2620
2621
2622/* GCD */
2623static mp_limb_t
2624mpn_gcd_11 (mp_limb_t u, mp_limb_t v)
2625{
2626  unsigned shift;
2627
2628  assert ( (u | v) > 0);
2629
2630  if (u == 0)
2631    return v;
2632  else if (v == 0)
2633    return u;
2634
2635  gmp_ctz (shift, u | v);
2636
2637  u >>= shift;
2638  v >>= shift;
2639
2640  if ( (u & 1) == 0)
2641    MP_LIMB_T_SWAP (u, v);
2642
2643  while ( (v & 1) == 0)
2644    v >>= 1;
2645
2646  while (u != v)
2647    {
2648      if (u > v)
2649	{
2650	  u -= v;
2651	  do
2652	    u >>= 1;
2653	  while ( (u & 1) == 0);
2654	}
2655      else
2656	{
2657	  v -= u;
2658	  do
2659	    v >>= 1;
2660	  while ( (v & 1) == 0);
2661	}
2662    }
2663  return u << shift;
2664}
2665
2666unsigned long
2667mpz_gcd_ui (mpz_t g, const mpz_t u, unsigned long v)
2668{
2669  mpz_t t;
2670  mpz_init_set_ui(t, v);
2671  mpz_gcd (t, u, t);
2672  if (v > 0)
2673    v = mpz_get_ui (t);
2674
2675  if (g)
2676    mpz_swap (t, g);
2677
2678  mpz_clear (t);
2679
2680  return v;
2681}
2682
2683static mp_bitcnt_t
2684mpz_make_odd (mpz_t r)
2685{
2686  mp_bitcnt_t shift;
2687
2688  assert (r->_mp_size > 0);
2689  /* Count trailing zeros, equivalent to mpn_scan1, because we know that there is a 1 */
2690  shift = mpn_common_scan (r->_mp_d[0], 0, r->_mp_d, 0, 0);
2691  mpz_tdiv_q_2exp (r, r, shift);
2692
2693  return shift;
2694}
2695
2696void
2697mpz_gcd (mpz_t g, const mpz_t u, const mpz_t v)
2698{
2699  mpz_t tu, tv;
2700  mp_bitcnt_t uz, vz, gz;
2701
2702  if (u->_mp_size == 0)
2703    {
2704      mpz_abs (g, v);
2705      return;
2706    }
2707  if (v->_mp_size == 0)
2708    {
2709      mpz_abs (g, u);
2710      return;
2711    }
2712
2713  mpz_init (tu);
2714  mpz_init (tv);
2715
2716  mpz_abs (tu, u);
2717  uz = mpz_make_odd (tu);
2718  mpz_abs (tv, v);
2719  vz = mpz_make_odd (tv);
2720  gz = GMP_MIN (uz, vz);
2721
2722  if (tu->_mp_size < tv->_mp_size)
2723    mpz_swap (tu, tv);
2724
2725  mpz_tdiv_r (tu, tu, tv);
2726  if (tu->_mp_size == 0)
2727    {
2728      mpz_swap (g, tv);
2729    }
2730  else
2731    for (;;)
2732      {
2733	int c;
2734
2735	mpz_make_odd (tu);
2736	c = mpz_cmp (tu, tv);
2737	if (c == 0)
2738	  {
2739	    mpz_swap (g, tu);
2740	    break;
2741	  }
2742	if (c < 0)
2743	  mpz_swap (tu, tv);
2744
2745	if (tv->_mp_size == 1)
2746	  {
2747	    mp_limb_t vl = tv->_mp_d[0];
2748	    mp_limb_t ul = mpz_tdiv_ui (tu, vl);
2749	    mpz_set_ui (g, mpn_gcd_11 (ul, vl));
2750	    break;
2751	  }
2752	mpz_sub (tu, tu, tv);
2753      }
2754  mpz_clear (tu);
2755  mpz_clear (tv);
2756  mpz_mul_2exp (g, g, gz);
2757}
2758
2759void
2760mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v)
2761{
2762  mpz_t tu, tv, s0, s1, t0, t1;
2763  mp_bitcnt_t uz, vz, gz;
2764  mp_bitcnt_t power;
2765
2766  if (u->_mp_size == 0)
2767    {
2768      /* g = 0 u + sgn(v) v */
2769      signed long sign = mpz_sgn (v);
2770      mpz_abs (g, v);
2771      if (s)
2772	s->_mp_size = 0;
2773      if (t)
2774	mpz_set_si (t, sign);
2775      return;
2776    }
2777
2778  if (v->_mp_size == 0)
2779    {
2780      /* g = sgn(u) u + 0 v */
2781      signed long sign = mpz_sgn (u);
2782      mpz_abs (g, u);
2783      if (s)
2784	mpz_set_si (s, sign);
2785      if (t)
2786	t->_mp_size = 0;
2787      return;
2788    }
2789
2790  mpz_init (tu);
2791  mpz_init (tv);
2792  mpz_init (s0);
2793  mpz_init (s1);
2794  mpz_init (t0);
2795  mpz_init (t1);
2796
2797  mpz_abs (tu, u);
2798  uz = mpz_make_odd (tu);
2799  mpz_abs (tv, v);
2800  vz = mpz_make_odd (tv);
2801  gz = GMP_MIN (uz, vz);
2802
2803  uz -= gz;
2804  vz -= gz;
2805
2806  /* Cofactors corresponding to odd gcd. gz handled later. */
2807  if (tu->_mp_size < tv->_mp_size)
2808    {
2809      mpz_swap (tu, tv);
2810      MPZ_SRCPTR_SWAP (u, v);
2811      MPZ_PTR_SWAP (s, t);
2812      MP_BITCNT_T_SWAP (uz, vz);
2813    }
2814
2815  /* Maintain
2816   *
2817   * u = t0 tu + t1 tv
2818   * v = s0 tu + s1 tv
2819   *
2820   * where u and v denote the inputs with common factors of two
2821   * eliminated, and det (s0, t0; s1, t1) = 2^p. Then
2822   *
2823   * 2^p tu =  s1 u - t1 v
2824   * 2^p tv = -s0 u + t0 v
2825   */
2826
2827  /* After initial division, tu = q tv + tu', we have
2828   *
2829   * u = 2^uz (tu' + q tv)
2830   * v = 2^vz tv
2831   *
2832   * or
2833   *
2834   * t0 = 2^uz, t1 = 2^uz q
2835   * s0 = 0,    s1 = 2^vz
2836   */
2837
2838  mpz_setbit (t0, uz);
2839  mpz_tdiv_qr (t1, tu, tu, tv);
2840  mpz_mul_2exp (t1, t1, uz);
2841
2842  mpz_setbit (s1, vz);
2843  power = uz + vz;
2844
2845  if (tu->_mp_size > 0)
2846    {
2847      mp_bitcnt_t shift;
2848      shift = mpz_make_odd (tu);
2849      mpz_mul_2exp (t0, t0, shift);
2850      mpz_mul_2exp (s0, s0, shift);
2851      power += shift;
2852
2853      for (;;)
2854	{
2855	  int c;
2856	  c = mpz_cmp (tu, tv);
2857	  if (c == 0)
2858	    break;
2859
2860	  if (c < 0)
2861	    {
2862	      /* tv = tv' + tu
2863	       *
2864	       * u = t0 tu + t1 (tv' + tu) = (t0 + t1) tu + t1 tv'
2865	       * v = s0 tu + s1 (tv' + tu) = (s0 + s1) tu + s1 tv' */
2866
2867	      mpz_sub (tv, tv, tu);
2868	      mpz_add (t0, t0, t1);
2869	      mpz_add (s0, s0, s1);
2870
2871	      shift = mpz_make_odd (tv);
2872	      mpz_mul_2exp (t1, t1, shift);
2873	      mpz_mul_2exp (s1, s1, shift);
2874	    }
2875	  else
2876	    {
2877	      mpz_sub (tu, tu, tv);
2878	      mpz_add (t1, t0, t1);
2879	      mpz_add (s1, s0, s1);
2880
2881	      shift = mpz_make_odd (tu);
2882	      mpz_mul_2exp (t0, t0, shift);
2883	      mpz_mul_2exp (s0, s0, shift);
2884	    }
2885	  power += shift;
2886	}
2887    }
2888
2889  /* Now tv = odd part of gcd, and -s0 and t0 are corresponding
2890     cofactors. */
2891
2892  mpz_mul_2exp (tv, tv, gz);
2893  mpz_neg (s0, s0);
2894
2895  /* 2^p g = s0 u + t0 v. Eliminate one factor of two at a time. To
2896     adjust cofactors, we need u / g and v / g */
2897
2898  mpz_divexact (s1, v, tv);
2899  mpz_abs (s1, s1);
2900  mpz_divexact (t1, u, tv);
2901  mpz_abs (t1, t1);
2902
2903  while (power-- > 0)
2904    {
2905      /* s0 u + t0 v = (s0 - v/g) u - (t0 + u/g) v */
2906      if (mpz_odd_p (s0) || mpz_odd_p (t0))
2907	{
2908	  mpz_sub (s0, s0, s1);
2909	  mpz_add (t0, t0, t1);
2910	}
2911      assert (mpz_even_p (t0) && mpz_even_p (s0));
2912      mpz_tdiv_q_2exp (s0, s0, 1);
2913      mpz_tdiv_q_2exp (t0, t0, 1);
2914    }
2915
2916  /* Arrange so that |s| < |u| / 2g */
2917  mpz_add (s1, s0, s1);
2918  if (mpz_cmpabs (s0, s1) > 0)
2919    {
2920      mpz_swap (s0, s1);
2921      mpz_sub (t0, t0, t1);
2922    }
2923  if (u->_mp_size < 0)
2924    mpz_neg (s0, s0);
2925  if (v->_mp_size < 0)
2926    mpz_neg (t0, t0);
2927
2928  mpz_swap (g, tv);
2929  if (s)
2930    mpz_swap (s, s0);
2931  if (t)
2932    mpz_swap (t, t0);
2933
2934  mpz_clear (tu);
2935  mpz_clear (tv);
2936  mpz_clear (s0);
2937  mpz_clear (s1);
2938  mpz_clear (t0);
2939  mpz_clear (t1);
2940}
2941
2942void
2943mpz_lcm (mpz_t r, const mpz_t u, const mpz_t v)
2944{
2945  mpz_t g;
2946
2947  if (u->_mp_size == 0 || v->_mp_size == 0)
2948    {
2949      r->_mp_size = 0;
2950      return;
2951    }
2952
2953  mpz_init (g);
2954
2955  mpz_gcd (g, u, v);
2956  mpz_divexact (g, u, g);
2957  mpz_mul (r, g, v);
2958
2959  mpz_clear (g);
2960  mpz_abs (r, r);
2961}
2962
2963void
2964mpz_lcm_ui (mpz_t r, const mpz_t u, unsigned long v)
2965{
2966  if (v == 0 || u->_mp_size == 0)
2967    {
2968      r->_mp_size = 0;
2969      return;
2970    }
2971
2972  v /= mpz_gcd_ui (NULL, u, v);
2973  mpz_mul_ui (r, u, v);
2974
2975  mpz_abs (r, r);
2976}
2977
2978int
2979mpz_invert (mpz_t r, const mpz_t u, const mpz_t m)
2980{
2981  mpz_t g, tr;
2982  int invertible;
2983
2984  if (u->_mp_size == 0 || mpz_cmpabs_ui (m, 1) <= 0)
2985    return 0;
2986
2987  mpz_init (g);
2988  mpz_init (tr);
2989
2990  mpz_gcdext (g, tr, NULL, u, m);
2991  invertible = (mpz_cmp_ui (g, 1) == 0);
2992
2993  if (invertible)
2994    {
2995      if (tr->_mp_size < 0)
2996	{
2997	  if (m->_mp_size >= 0)
2998	    mpz_add (tr, tr, m);
2999	  else
3000	    mpz_sub (tr, tr, m);
3001	}
3002      mpz_swap (r, tr);
3003    }
3004
3005  mpz_clear (g);
3006  mpz_clear (tr);
3007  return invertible;
3008}
3009
3010
3011/* Higher level operations (sqrt, pow and root) */
3012
3013void
3014mpz_pow_ui (mpz_t r, const mpz_t b, unsigned long e)
3015{
3016  unsigned long bit;
3017  mpz_t tr;
3018  mpz_init_set_ui (tr, 1);
3019
3020  bit = GMP_ULONG_HIGHBIT;
3021  do
3022    {
3023      mpz_mul (tr, tr, tr);
3024      if (e & bit)
3025	mpz_mul (tr, tr, b);
3026      bit >>= 1;
3027    }
3028  while (bit > 0);
3029
3030  mpz_swap (r, tr);
3031  mpz_clear (tr);
3032}
3033
3034void
3035mpz_ui_pow_ui (mpz_t r, unsigned long blimb, unsigned long e)
3036{
3037  mpz_t b;
3038
3039  mpz_init_set_ui (b, blimb);
3040  mpz_pow_ui (r, b, e);
3041  mpz_clear (b);
3042}
3043
3044void
3045mpz_powm (mpz_t r, const mpz_t b, const mpz_t e, const mpz_t m)
3046{
3047  mpz_t tr;
3048  mpz_t base;
3049  mp_size_t en, mn;
3050  mp_srcptr mp;
3051  struct gmp_div_inverse minv;
3052  unsigned shift;
3053  mp_ptr tp = NULL;
3054
3055  en = GMP_ABS (e->_mp_size);
3056  mn = GMP_ABS (m->_mp_size);
3057  if (mn == 0)
3058    gmp_die ("mpz_powm: Zero modulo.");
3059
3060  if (en == 0)
3061    {
3062      mpz_set_ui (r, 1);
3063      return;
3064    }
3065
3066  mp = m->_mp_d;
3067  mpn_div_qr_invert (&minv, mp, mn);
3068  shift = minv.shift;
3069
3070  if (shift > 0)
3071    {
3072      /* To avoid shifts, we do all our reductions, except the final
3073	 one, using a *normalized* m. */
3074      minv.shift = 0;
3075
3076      tp = gmp_xalloc_limbs (mn);
3077      gmp_assert_nocarry (mpn_lshift (tp, mp, mn, shift));
3078      mp = tp;
3079    }
3080
3081  mpz_init (base);
3082
3083  if (e->_mp_size < 0)
3084    {
3085      if (!mpz_invert (base, b, m))
3086	gmp_die ("mpz_powm: Negative exponent and non-invertible base.");
3087    }
3088  else
3089    {
3090      mp_size_t bn;
3091      mpz_abs (base, b);
3092
3093      bn = base->_mp_size;
3094      if (bn >= mn)
3095	{
3096	  mpn_div_qr_preinv (NULL, base->_mp_d, base->_mp_size, mp, mn, &minv);
3097	  bn = mn;
3098	}
3099
3100      /* We have reduced the absolute value. Now take care of the
3101	 sign. Note that we get zero represented non-canonically as
3102	 m. */
3103      if (b->_mp_size < 0)
3104	{
3105	  mp_ptr bp = MPZ_REALLOC (base, mn);
3106	  gmp_assert_nocarry (mpn_sub (bp, mp, mn, bp, bn));
3107	  bn = mn;
3108	}
3109      base->_mp_size = mpn_normalized_size (base->_mp_d, bn);
3110    }
3111  mpz_init_set_ui (tr, 1);
3112
3113  while (--en >= 0)
3114    {
3115      mp_limb_t w = e->_mp_d[en];
3116      mp_limb_t bit;
3117
3118      bit = GMP_LIMB_HIGHBIT;
3119      do
3120	{
3121	  mpz_mul (tr, tr, tr);
3122	  if (w & bit)
3123	    mpz_mul (tr, tr, base);
3124	  if (tr->_mp_size > mn)
3125	    {
3126	      mpn_div_qr_preinv (NULL, tr->_mp_d, tr->_mp_size, mp, mn, &minv);
3127	      tr->_mp_size = mpn_normalized_size (tr->_mp_d, mn);
3128	    }
3129	  bit >>= 1;
3130	}
3131      while (bit > 0);
3132    }
3133
3134  /* Final reduction */
3135  if (tr->_mp_size >= mn)
3136    {
3137      minv.shift = shift;
3138      mpn_div_qr_preinv (NULL, tr->_mp_d, tr->_mp_size, mp, mn, &minv);
3139      tr->_mp_size = mpn_normalized_size (tr->_mp_d, mn);
3140    }
3141  if (tp)
3142    gmp_free (tp);
3143
3144  mpz_swap (r, tr);
3145  mpz_clear (tr);
3146  mpz_clear (base);
3147}
3148
3149void
3150mpz_powm_ui (mpz_t r, const mpz_t b, unsigned long elimb, const mpz_t m)
3151{
3152  mpz_t e;
3153
3154  mpz_init_set_ui (e, elimb);
3155  mpz_powm (r, b, e, m);
3156  mpz_clear (e);
3157}
3158
3159/* x=trunc(y^(1/z)), r=y-x^z */
3160void
3161mpz_rootrem (mpz_t x, mpz_t r, const mpz_t y, unsigned long z)
3162{
3163  int sgn;
3164  mpz_t t, u;
3165
3166  sgn = y->_mp_size < 0;
3167  if ((~z & sgn) != 0)
3168    gmp_die ("mpz_rootrem: Negative argument, with even root.");
3169  if (z == 0)
3170    gmp_die ("mpz_rootrem: Zeroth root.");
3171
3172  if (mpz_cmpabs_ui (y, 1) <= 0) {
3173    if (x)
3174      mpz_set (x, y);
3175    if (r)
3176      r->_mp_size = 0;
3177    return;
3178  }
3179
3180  mpz_init (u);
3181  mpz_init (t);
3182  mpz_setbit (t, mpz_sizeinbase (y, 2) / z + 1);
3183
3184  if (z == 2) /* simplify sqrt loop: z-1 == 1 */
3185    do {
3186      mpz_swap (u, t);			/* u = x */
3187      mpz_tdiv_q (t, y, u);		/* t = y/x */
3188      mpz_add (t, t, u);		/* t = y/x + x */
3189      mpz_tdiv_q_2exp (t, t, 1);	/* x'= (y/x + x)/2 */
3190    } while (mpz_cmpabs (t, u) < 0);	/* |x'| < |x| */
3191  else /* z != 2 */ {
3192    mpz_t v;
3193
3194    mpz_init (v);
3195    if (sgn)
3196      mpz_neg (t, t);
3197
3198    do {
3199      mpz_swap (u, t);			/* u = x */
3200      mpz_pow_ui (t, u, z - 1);		/* t = x^(z-1) */
3201      mpz_tdiv_q (t, y, t);		/* t = y/x^(z-1) */
3202      mpz_mul_ui (v, u, z - 1);		/* v = x*(z-1) */
3203      mpz_add (t, t, v);		/* t = y/x^(z-1) + x*(z-1) */
3204      mpz_tdiv_q_ui (t, t, z);		/* x'=(y/x^(z-1) + x*(z-1))/z */
3205    } while (mpz_cmpabs (t, u) < 0);	/* |x'| < |x| */
3206
3207    mpz_clear (v);
3208  }
3209
3210  if (r) {
3211    mpz_pow_ui (t, u, z);
3212    mpz_sub (r, y, t);
3213  }
3214  if (x)
3215    mpz_swap (x, u);
3216  mpz_clear (u);
3217  mpz_clear (t);
3218}
3219
3220int
3221mpz_root (mpz_t x, const mpz_t y, unsigned long z)
3222{
3223  int res;
3224  mpz_t r;
3225
3226  mpz_init (r);
3227  mpz_rootrem (x, r, y, z);
3228  res = r->_mp_size == 0;
3229  mpz_clear (r);
3230
3231  return res;
3232}
3233
3234/* Compute s = floor(sqrt(u)) and r = u - s^2. Allows r == NULL */
3235void
3236mpz_sqrtrem (mpz_t s, mpz_t r, const mpz_t u)
3237{
3238  mpz_rootrem (s, r, u, 2);
3239}
3240
3241void
3242mpz_sqrt (mpz_t s, const mpz_t u)
3243{
3244  mpz_rootrem (s, NULL, u, 2);
3245}
3246
3247int
3248mpz_perfect_square_p (const mpz_t u)
3249{
3250  if (u->_mp_size <= 0)
3251    return (u->_mp_size == 0);
3252  else
3253    return mpz_root (NULL, u, 2);
3254}
3255
3256int
3257mpn_perfect_square_p (mp_srcptr p, mp_size_t n)
3258{
3259  mpz_t t;
3260
3261  assert (n > 0);
3262  assert (p [n-1] != 0);
3263  return mpz_root (NULL, mpz_roinit_normal_n (t, p, n), 2);
3264}
3265
3266mp_size_t
3267mpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr p, mp_size_t n)
3268{
3269  mpz_t s, r, u;
3270  mp_size_t res;
3271
3272  assert (n > 0);
3273  assert (p [n-1] != 0);
3274
3275  mpz_init (r);
3276  mpz_init (s);
3277  mpz_rootrem (s, r, mpz_roinit_normal_n (u, p, n), 2);
3278
3279  assert (s->_mp_size == (n+1)/2);
3280  mpn_copyd (sp, s->_mp_d, s->_mp_size);
3281  mpz_clear (s);
3282  res = r->_mp_size;
3283  if (rp)
3284    mpn_copyd (rp, r->_mp_d, res);
3285  mpz_clear (r);
3286  return res;
3287}
3288
3289/* Combinatorics */
3290
3291void
3292mpz_mfac_uiui (mpz_t x, unsigned long n, unsigned long m)
3293{
3294  mpz_set_ui (x, n + (n == 0));
3295  if (m + 1 < 2) return;
3296  while (n > m + 1)
3297    mpz_mul_ui (x, x, n -= m);
3298}
3299
3300void
3301mpz_2fac_ui (mpz_t x, unsigned long n)
3302{
3303  mpz_mfac_uiui (x, n, 2);
3304}
3305
3306void
3307mpz_fac_ui (mpz_t x, unsigned long n)
3308{
3309  mpz_mfac_uiui (x, n, 1);
3310}
3311
3312void
3313mpz_bin_uiui (mpz_t r, unsigned long n, unsigned long k)
3314{
3315  mpz_t t;
3316
3317  mpz_set_ui (r, k <= n);
3318
3319  if (k > (n >> 1))
3320    k = (k <= n) ? n - k : 0;
3321
3322  mpz_init (t);
3323  mpz_fac_ui (t, k);
3324
3325  for (; k > 0; --k)
3326    mpz_mul_ui (r, r, n--);
3327
3328  mpz_divexact (r, r, t);
3329  mpz_clear (t);
3330}
3331
3332
3333/* Primality testing */
3334
3335/* Computes Kronecker (a/b) with odd b, a!=0 and GCD(a,b) = 1 */
3336/* Adapted from JACOBI_BASE_METHOD==4 in mpn/generic/jacbase.c */
3337static int
3338gmp_jacobi_coprime (mp_limb_t a, mp_limb_t b)
3339{
3340  int c, bit = 0;
3341
3342  assert (b & 1);
3343  assert (a != 0);
3344  /* assert (mpn_gcd_11 (a, b) == 1); */
3345
3346  /* Below, we represent a and b shifted right so that the least
3347     significant one bit is implicit. */
3348  b >>= 1;
3349
3350  gmp_ctz(c, a);
3351  a >>= 1;
3352
3353  do
3354    {
3355      a >>= c;
3356      /* (2/b) = -1 if b = 3 or 5 mod 8 */
3357      bit ^= c & (b ^ (b >> 1));
3358      if (a < b)
3359	{
3360	  bit ^= a & b;
3361	  a = b - a;
3362	  b -= a;
3363	}
3364      else
3365	{
3366	  a -= b;
3367	  assert (a != 0);
3368	}
3369
3370      gmp_ctz(c, a);
3371      ++c;
3372    }
3373  while (b > 0);
3374
3375  return bit & 1 ? -1 : 1;
3376}
3377
3378static void
3379gmp_lucas_step_k_2k (mpz_t V, mpz_t Qk, const mpz_t n)
3380{
3381  mpz_mod (Qk, Qk, n);
3382  /* V_{2k} <- V_k ^ 2 - 2Q^k */
3383  mpz_mul (V, V, V);
3384  mpz_submul_ui (V, Qk, 2);
3385  mpz_tdiv_r (V, V, n);
3386  /* Q^{2k} = (Q^k)^2 */
3387  mpz_mul (Qk, Qk, Qk);
3388}
3389
3390/* Computes V_k, Q^k (mod n) for the Lucas' sequence */
3391/* with P=1, Q=Q; k = (n>>b0)|1. */
3392/* Requires an odd n > 4; b0 > 0; -2*Q must not overflow a long */
3393/* Returns (U_k == 0) and sets V=V_k and Qk=Q^k. */
3394static int
3395gmp_lucas_mod (mpz_t V, mpz_t Qk, long Q,
3396	       mp_bitcnt_t b0, const mpz_t n)
3397{
3398  mp_bitcnt_t bs;
3399  mpz_t U;
3400  int res;
3401
3402  assert (b0 > 0);
3403  assert (Q <= - (LONG_MIN / 2));
3404  assert (Q >= - (LONG_MAX / 2));
3405  assert (mpz_cmp_ui (n, 4) > 0);
3406  assert (mpz_odd_p (n));
3407
3408  mpz_init_set_ui (U, 1); /* U1 = 1 */
3409  mpz_set_ui (V, 1); /* V1 = 1 */
3410  mpz_set_si (Qk, Q);
3411
3412  for (bs = mpz_sizeinbase (n, 2) - 1; --bs >= b0;)
3413    {
3414      /* U_{2k} <- U_k * V_k */
3415      mpz_mul (U, U, V);
3416      /* V_{2k} <- V_k ^ 2 - 2Q^k */
3417      /* Q^{2k} = (Q^k)^2 */
3418      gmp_lucas_step_k_2k (V, Qk, n);
3419
3420      /* A step k->k+1 is performed if the bit in $n$ is 1	*/
3421      /* mpz_tstbit(n,bs) or the bit is 0 in $n$ but	*/
3422      /* should be 1 in $n+1$ (bs == b0)			*/
3423      if (b0 == bs || mpz_tstbit (n, bs))
3424	{
3425	  /* Q^{k+1} <- Q^k * Q */
3426	  mpz_mul_si (Qk, Qk, Q);
3427	  /* U_{k+1} <- (U_k + V_k) / 2 */
3428	  mpz_swap (U, V); /* Keep in V the old value of U_k */
3429	  mpz_add (U, U, V);
3430	  /* We have to compute U/2, so we need an even value, */
3431	  /* equivalent (mod n) */
3432	  if (mpz_odd_p (U))
3433	    mpz_add (U, U, n);
3434	  mpz_tdiv_q_2exp (U, U, 1);
3435	  /* V_{k+1} <-(D*U_k + V_k) / 2 =
3436			U_{k+1} + (D-1)/2*U_k = U_{k+1} - 2Q*U_k */
3437	  mpz_mul_si (V, V, -2*Q);
3438	  mpz_add (V, U, V);
3439	  mpz_tdiv_r (V, V, n);
3440	}
3441      mpz_tdiv_r (U, U, n);
3442    }
3443
3444  res = U->_mp_size == 0;
3445  mpz_clear (U);
3446  return res;
3447}
3448
3449/* Performs strong Lucas' test on x, with parameters suggested */
3450/* for the BPSW test. Qk is only passed to recycle a variable. */
3451/* Requires GCD (x,6) = 1.*/
3452static int
3453gmp_stronglucas (const mpz_t x, mpz_t Qk)
3454{
3455  mp_bitcnt_t b0;
3456  mpz_t V, n;
3457  mp_limb_t maxD, D; /* The absolute value is stored. */
3458  long Q;
3459  mp_limb_t tl;
3460
3461  /* Test on the absolute value. */
3462  mpz_roinit_normal_n (n, x->_mp_d, GMP_ABS (x->_mp_size));
3463
3464  assert (mpz_odd_p (n));
3465  /* assert (mpz_gcd_ui (NULL, n, 6) == 1); */
3466  if (mpz_root (Qk, n, 2))
3467    return 0; /* A square is composite. */
3468
3469  /* Check Ds up to square root (in case, n is prime)
3470     or avoid overflows */
3471  maxD = (Qk->_mp_size == 1) ? Qk->_mp_d [0] - 1 : GMP_LIMB_MAX;
3472
3473  D = 3;
3474  /* Search a D such that (D/n) = -1 in the sequence 5,-7,9,-11,.. */
3475  /* For those Ds we have (D/n) = (n/|D|) */
3476  do
3477    {
3478      if (D >= maxD)
3479	return 1 + (D != GMP_LIMB_MAX); /* (1 + ! ~ D) */
3480      D += 2;
3481      tl = mpz_tdiv_ui (n, D);
3482      if (tl == 0)
3483	return 0;
3484    }
3485  while (gmp_jacobi_coprime (tl, D) == 1);
3486
3487  mpz_init (V);
3488
3489  /* n-(D/n) = n+1 = d*2^{b0}, with d = (n>>b0) | 1 */
3490  b0 = mpz_scan0 (n, 0);
3491
3492  /* D= P^2 - 4Q; P = 1; Q = (1-D)/4 */
3493  Q = (D & 2) ? (long) (D >> 2) + 1 : -(long) (D >> 2);
3494
3495  if (! gmp_lucas_mod (V, Qk, Q, b0, n))	/* If Ud != 0 */
3496    while (V->_mp_size != 0 && --b0 != 0)	/* while Vk != 0 */
3497      /* V <- V ^ 2 - 2Q^k */
3498      /* Q^{2k} = (Q^k)^2 */
3499      gmp_lucas_step_k_2k (V, Qk, n);
3500
3501  mpz_clear (V);
3502  return (b0 != 0);
3503}
3504
3505static int
3506gmp_millerrabin (const mpz_t n, const mpz_t nm1, mpz_t y,
3507		 const mpz_t q, mp_bitcnt_t k)
3508{
3509  assert (k > 0);
3510
3511  /* Caller must initialize y to the base. */
3512  mpz_powm (y, y, q, n);
3513
3514  if (mpz_cmp_ui (y, 1) == 0 || mpz_cmp (y, nm1) == 0)
3515    return 1;
3516
3517  while (--k > 0)
3518    {
3519      mpz_powm_ui (y, y, 2, n);
3520      if (mpz_cmp (y, nm1) == 0)
3521	return 1;
3522      /* y == 1 means that the previous y was a non-trivial square root
3523	 of 1 (mod n). y == 0 means that n is a power of the base.
3524	 In either case, n is not prime. */
3525      if (mpz_cmp_ui (y, 1) <= 0)
3526	return 0;
3527    }
3528  return 0;
3529}
3530
3531/* This product is 0xc0cfd797, and fits in 32 bits. */
3532#define GMP_PRIME_PRODUCT \
3533  (3UL*5UL*7UL*11UL*13UL*17UL*19UL*23UL*29UL)
3534
3535/* Bit (p+1)/2 is set, for each odd prime <= 61 */
3536#define GMP_PRIME_MASK 0xc96996dcUL
3537
3538int
3539mpz_probab_prime_p (const mpz_t n, int reps)
3540{
3541  mpz_t nm1;
3542  mpz_t q;
3543  mpz_t y;
3544  mp_bitcnt_t k;
3545  int is_prime;
3546  int j;
3547
3548  /* Note that we use the absolute value of n only, for compatibility
3549     with the real GMP. */
3550  if (mpz_even_p (n))
3551    return (mpz_cmpabs_ui (n, 2) == 0) ? 2 : 0;
3552
3553  /* Above test excludes n == 0 */
3554  assert (n->_mp_size != 0);
3555
3556  if (mpz_cmpabs_ui (n, 64) < 0)
3557    return (GMP_PRIME_MASK >> (n->_mp_d[0] >> 1)) & 2;
3558
3559  if (mpz_gcd_ui (NULL, n, GMP_PRIME_PRODUCT) != 1)
3560    return 0;
3561
3562  /* All prime factors are >= 31. */
3563  if (mpz_cmpabs_ui (n, 31*31) < 0)
3564    return 2;
3565
3566  mpz_init (nm1);
3567  mpz_init (q);
3568
3569  /* Find q and k, where q is odd and n = 1 + 2**k * q.  */
3570  mpz_abs (nm1, n);
3571  nm1->_mp_d[0] -= 1;
3572  k = mpz_scan1 (nm1, 0);
3573  mpz_tdiv_q_2exp (q, nm1, k);
3574
3575  /* BPSW test */
3576  mpz_init_set_ui (y, 2);
3577  is_prime = gmp_millerrabin (n, nm1, y, q, k) && gmp_stronglucas (n, y);
3578  reps -= 24; /* skip the first 24 repetitions */
3579
3580  /* Use Miller-Rabin, with a deterministic sequence of bases, a[j] =
3581     j^2 + j + 41 using Euler's polynomial. We potentially stop early,
3582     if a[j] >= n - 1. Since n >= 31*31, this can happen only if reps >
3583     30 (a[30] == 971 > 31*31 == 961). */
3584
3585  for (j = 0; is_prime & (j < reps); j++)
3586    {
3587      mpz_set_ui (y, (unsigned long) j*j+j+41);
3588      if (mpz_cmp (y, nm1) >= 0)
3589	{
3590	  /* Don't try any further bases. This "early" break does not affect
3591	     the result for any reasonable reps value (<=5000 was tested) */
3592	  assert (j >= 30);
3593	  break;
3594	}
3595      is_prime = gmp_millerrabin (n, nm1, y, q, k);
3596    }
3597  mpz_clear (nm1);
3598  mpz_clear (q);
3599  mpz_clear (y);
3600
3601  return is_prime;
3602}
3603
3604
3605/* Logical operations and bit manipulation. */
3606
3607/* Numbers are treated as if represented in two's complement (and
3608   infinitely sign extended). For a negative values we get the two's
3609   complement from -x = ~x + 1, where ~ is bitwise complement.
3610   Negation transforms
3611
3612     xxxx10...0
3613
3614   into
3615
3616     yyyy10...0
3617
3618   where yyyy is the bitwise complement of xxxx. So least significant
3619   bits, up to and including the first one bit, are unchanged, and
3620   the more significant bits are all complemented.
3621
3622   To change a bit from zero to one in a negative number, subtract the
3623   corresponding power of two from the absolute value. This can never
3624   underflow. To change a bit from one to zero, add the corresponding
3625   power of two, and this might overflow. E.g., if x = -001111, the
3626   two's complement is 110001. Clearing the least significant bit, we
3627   get two's complement 110000, and -010000. */
3628
3629int
3630mpz_tstbit (const mpz_t d, mp_bitcnt_t bit_index)
3631{
3632  mp_size_t limb_index;
3633  unsigned shift;
3634  mp_size_t ds;
3635  mp_size_t dn;
3636  mp_limb_t w;
3637  int bit;
3638
3639  ds = d->_mp_size;
3640  dn = GMP_ABS (ds);
3641  limb_index = bit_index / GMP_LIMB_BITS;
3642  if (limb_index >= dn)
3643    return ds < 0;
3644
3645  shift = bit_index % GMP_LIMB_BITS;
3646  w = d->_mp_d[limb_index];
3647  bit = (w >> shift) & 1;
3648
3649  if (ds < 0)
3650    {
3651      /* d < 0. Check if any of the bits below is set: If so, our bit
3652	 must be complemented. */
3653      if (shift > 0 && (mp_limb_t) (w << (GMP_LIMB_BITS - shift)) > 0)
3654	return bit ^ 1;
3655      while (--limb_index >= 0)
3656	if (d->_mp_d[limb_index] > 0)
3657	  return bit ^ 1;
3658    }
3659  return bit;
3660}
3661
3662static void
3663mpz_abs_add_bit (mpz_t d, mp_bitcnt_t bit_index)
3664{
3665  mp_size_t dn, limb_index;
3666  mp_limb_t bit;
3667  mp_ptr dp;
3668
3669  dn = GMP_ABS (d->_mp_size);
3670
3671  limb_index = bit_index / GMP_LIMB_BITS;
3672  bit = (mp_limb_t) 1 << (bit_index % GMP_LIMB_BITS);
3673
3674  if (limb_index >= dn)
3675    {
3676      mp_size_t i;
3677      /* The bit should be set outside of the end of the number.
3678	 We have to increase the size of the number. */
3679      dp = MPZ_REALLOC (d, limb_index + 1);
3680
3681      dp[limb_index] = bit;
3682      for (i = dn; i < limb_index; i++)
3683	dp[i] = 0;
3684      dn = limb_index + 1;
3685    }
3686  else
3687    {
3688      mp_limb_t cy;
3689
3690      dp = d->_mp_d;
3691
3692      cy = mpn_add_1 (dp + limb_index, dp + limb_index, dn - limb_index, bit);
3693      if (cy > 0)
3694	{
3695	  dp = MPZ_REALLOC (d, dn + 1);
3696	  dp[dn++] = cy;
3697	}
3698    }
3699
3700  d->_mp_size = (d->_mp_size < 0) ? - dn : dn;
3701}
3702
3703static void
3704mpz_abs_sub_bit (mpz_t d, mp_bitcnt_t bit_index)
3705{
3706  mp_size_t dn, limb_index;
3707  mp_ptr dp;
3708  mp_limb_t bit;
3709
3710  dn = GMP_ABS (d->_mp_size);
3711  dp = d->_mp_d;
3712
3713  limb_index = bit_index / GMP_LIMB_BITS;
3714  bit = (mp_limb_t) 1 << (bit_index % GMP_LIMB_BITS);
3715
3716  assert (limb_index < dn);
3717
3718  gmp_assert_nocarry (mpn_sub_1 (dp + limb_index, dp + limb_index,
3719				 dn - limb_index, bit));
3720  dn = mpn_normalized_size (dp, dn);
3721  d->_mp_size = (d->_mp_size < 0) ? - dn : dn;
3722}
3723
3724void
3725mpz_setbit (mpz_t d, mp_bitcnt_t bit_index)
3726{
3727  if (!mpz_tstbit (d, bit_index))
3728    {
3729      if (d->_mp_size >= 0)
3730	mpz_abs_add_bit (d, bit_index);
3731      else
3732	mpz_abs_sub_bit (d, bit_index);
3733    }
3734}
3735
3736void
3737mpz_clrbit (mpz_t d, mp_bitcnt_t bit_index)
3738{
3739  if (mpz_tstbit (d, bit_index))
3740    {
3741      if (d->_mp_size >= 0)
3742	mpz_abs_sub_bit (d, bit_index);
3743      else
3744	mpz_abs_add_bit (d, bit_index);
3745    }
3746}
3747
3748void
3749mpz_combit (mpz_t d, mp_bitcnt_t bit_index)
3750{
3751  if (mpz_tstbit (d, bit_index) ^ (d->_mp_size < 0))
3752    mpz_abs_sub_bit (d, bit_index);
3753  else
3754    mpz_abs_add_bit (d, bit_index);
3755}
3756
3757void
3758mpz_com (mpz_t r, const mpz_t u)
3759{
3760  mpz_add_ui (r, u, 1);
3761  mpz_neg (r, r);
3762}
3763
3764void
3765mpz_and (mpz_t r, const mpz_t u, const mpz_t v)
3766{
3767  mp_size_t un, vn, rn, i;
3768  mp_ptr up, vp, rp;
3769
3770  mp_limb_t ux, vx, rx;
3771  mp_limb_t uc, vc, rc;
3772  mp_limb_t ul, vl, rl;
3773
3774  un = GMP_ABS (u->_mp_size);
3775  vn = GMP_ABS (v->_mp_size);
3776  if (un < vn)
3777    {
3778      MPZ_SRCPTR_SWAP (u, v);
3779      MP_SIZE_T_SWAP (un, vn);
3780    }
3781  if (vn == 0)
3782    {
3783      r->_mp_size = 0;
3784      return;
3785    }
3786
3787  uc = u->_mp_size < 0;
3788  vc = v->_mp_size < 0;
3789  rc = uc & vc;
3790
3791  ux = -uc;
3792  vx = -vc;
3793  rx = -rc;
3794
3795  /* If the smaller input is positive, higher limbs don't matter. */
3796  rn = vx ? un : vn;
3797
3798  rp = MPZ_REALLOC (r, rn + (mp_size_t) rc);
3799
3800  up = u->_mp_d;
3801  vp = v->_mp_d;
3802
3803  i = 0;
3804  do
3805    {
3806      ul = (up[i] ^ ux) + uc;
3807      uc = ul < uc;
3808
3809      vl = (vp[i] ^ vx) + vc;
3810      vc = vl < vc;
3811
3812      rl = ( (ul & vl) ^ rx) + rc;
3813      rc = rl < rc;
3814      rp[i] = rl;
3815    }
3816  while (++i < vn);
3817  assert (vc == 0);
3818
3819  for (; i < rn; i++)
3820    {
3821      ul = (up[i] ^ ux) + uc;
3822      uc = ul < uc;
3823
3824      rl = ( (ul & vx) ^ rx) + rc;
3825      rc = rl < rc;
3826      rp[i] = rl;
3827    }
3828  if (rc)
3829    rp[rn++] = rc;
3830  else
3831    rn = mpn_normalized_size (rp, rn);
3832
3833  r->_mp_size = rx ? -rn : rn;
3834}
3835
3836void
3837mpz_ior (mpz_t r, const mpz_t u, const mpz_t v)
3838{
3839  mp_size_t un, vn, rn, i;
3840  mp_ptr up, vp, rp;
3841
3842  mp_limb_t ux, vx, rx;
3843  mp_limb_t uc, vc, rc;
3844  mp_limb_t ul, vl, rl;
3845
3846  un = GMP_ABS (u->_mp_size);
3847  vn = GMP_ABS (v->_mp_size);
3848  if (un < vn)
3849    {
3850      MPZ_SRCPTR_SWAP (u, v);
3851      MP_SIZE_T_SWAP (un, vn);
3852    }
3853  if (vn == 0)
3854    {
3855      mpz_set (r, u);
3856      return;
3857    }
3858
3859  uc = u->_mp_size < 0;
3860  vc = v->_mp_size < 0;
3861  rc = uc | vc;
3862
3863  ux = -uc;
3864  vx = -vc;
3865  rx = -rc;
3866
3867  /* If the smaller input is negative, by sign extension higher limbs
3868     don't matter. */
3869  rn = vx ? vn : un;
3870
3871  rp = MPZ_REALLOC (r, rn + (mp_size_t) rc);
3872
3873  up = u->_mp_d;
3874  vp = v->_mp_d;
3875
3876  i = 0;
3877  do
3878    {
3879      ul = (up[i] ^ ux) + uc;
3880      uc = ul < uc;
3881
3882      vl = (vp[i] ^ vx) + vc;
3883      vc = vl < vc;
3884
3885      rl = ( (ul | vl) ^ rx) + rc;
3886      rc = rl < rc;
3887      rp[i] = rl;
3888    }
3889  while (++i < vn);
3890  assert (vc == 0);
3891
3892  for (; i < rn; i++)
3893    {
3894      ul = (up[i] ^ ux) + uc;
3895      uc = ul < uc;
3896
3897      rl = ( (ul | vx) ^ rx) + rc;
3898      rc = rl < rc;
3899      rp[i] = rl;
3900    }
3901  if (rc)
3902    rp[rn++] = rc;
3903  else
3904    rn = mpn_normalized_size (rp, rn);
3905
3906  r->_mp_size = rx ? -rn : rn;
3907}
3908
3909void
3910mpz_xor (mpz_t r, const mpz_t u, const mpz_t v)
3911{
3912  mp_size_t un, vn, i;
3913  mp_ptr up, vp, rp;
3914
3915  mp_limb_t ux, vx, rx;
3916  mp_limb_t uc, vc, rc;
3917  mp_limb_t ul, vl, rl;
3918
3919  un = GMP_ABS (u->_mp_size);
3920  vn = GMP_ABS (v->_mp_size);
3921  if (un < vn)
3922    {
3923      MPZ_SRCPTR_SWAP (u, v);
3924      MP_SIZE_T_SWAP (un, vn);
3925    }
3926  if (vn == 0)
3927    {
3928      mpz_set (r, u);
3929      return;
3930    }
3931
3932  uc = u->_mp_size < 0;
3933  vc = v->_mp_size < 0;
3934  rc = uc ^ vc;
3935
3936  ux = -uc;
3937  vx = -vc;
3938  rx = -rc;
3939
3940  rp = MPZ_REALLOC (r, un + (mp_size_t) rc);
3941
3942  up = u->_mp_d;
3943  vp = v->_mp_d;
3944
3945  i = 0;
3946  do
3947    {
3948      ul = (up[i] ^ ux) + uc;
3949      uc = ul < uc;
3950
3951      vl = (vp[i] ^ vx) + vc;
3952      vc = vl < vc;
3953
3954      rl = (ul ^ vl ^ rx) + rc;
3955      rc = rl < rc;
3956      rp[i] = rl;
3957    }
3958  while (++i < vn);
3959  assert (vc == 0);
3960
3961  for (; i < un; i++)
3962    {
3963      ul = (up[i] ^ ux) + uc;
3964      uc = ul < uc;
3965
3966      rl = (ul ^ ux) + rc;
3967      rc = rl < rc;
3968      rp[i] = rl;
3969    }
3970  if (rc)
3971    rp[un++] = rc;
3972  else
3973    un = mpn_normalized_size (rp, un);
3974
3975  r->_mp_size = rx ? -un : un;
3976}
3977
3978static unsigned
3979gmp_popcount_limb (mp_limb_t x)
3980{
3981  unsigned c;
3982
3983  /* Do 16 bits at a time, to avoid limb-sized constants. */
3984  int LOCAL_SHIFT_BITS = 16;
3985  for (c = 0; x > 0;)
3986    {
3987      unsigned w = x - ((x >> 1) & 0x5555);
3988      w = ((w >> 2) & 0x3333) + (w & 0x3333);
3989      w =  (w >> 4) + w;
3990      w = ((w >> 8) & 0x000f) + (w & 0x000f);
3991      c += w;
3992      if (GMP_LIMB_BITS > LOCAL_SHIFT_BITS)
3993	x >>= LOCAL_SHIFT_BITS;
3994      else
3995	x = 0;
3996    }
3997  return c;
3998}
3999
4000mp_bitcnt_t
4001mpn_popcount (mp_srcptr p, mp_size_t n)
4002{
4003  mp_size_t i;
4004  mp_bitcnt_t c;
4005
4006  for (c = 0, i = 0; i < n; i++)
4007    c += gmp_popcount_limb (p[i]);
4008
4009  return c;
4010}
4011
4012mp_bitcnt_t
4013mpz_popcount (const mpz_t u)
4014{
4015  mp_size_t un;
4016
4017  un = u->_mp_size;
4018
4019  if (un < 0)
4020    return ~(mp_bitcnt_t) 0;
4021
4022  return mpn_popcount (u->_mp_d, un);
4023}
4024
4025mp_bitcnt_t
4026mpz_hamdist (const mpz_t u, const mpz_t v)
4027{
4028  mp_size_t un, vn, i;
4029  mp_limb_t uc, vc, ul, vl, comp;
4030  mp_srcptr up, vp;
4031  mp_bitcnt_t c;
4032
4033  un = u->_mp_size;
4034  vn = v->_mp_size;
4035
4036  if ( (un ^ vn) < 0)
4037    return ~(mp_bitcnt_t) 0;
4038
4039  comp = - (uc = vc = (un < 0));
4040  if (uc)
4041    {
4042      assert (vn < 0);
4043      un = -un;
4044      vn = -vn;
4045    }
4046
4047  up = u->_mp_d;
4048  vp = v->_mp_d;
4049
4050  if (un < vn)
4051    MPN_SRCPTR_SWAP (up, un, vp, vn);
4052
4053  for (i = 0, c = 0; i < vn; i++)
4054    {
4055      ul = (up[i] ^ comp) + uc;
4056      uc = ul < uc;
4057
4058      vl = (vp[i] ^ comp) + vc;
4059      vc = vl < vc;
4060
4061      c += gmp_popcount_limb (ul ^ vl);
4062    }
4063  assert (vc == 0);
4064
4065  for (; i < un; i++)
4066    {
4067      ul = (up[i] ^ comp) + uc;
4068      uc = ul < uc;
4069
4070      c += gmp_popcount_limb (ul ^ comp);
4071    }
4072
4073  return c;
4074}
4075
4076mp_bitcnt_t
4077mpz_scan1 (const mpz_t u, mp_bitcnt_t starting_bit)
4078{
4079  mp_ptr up;
4080  mp_size_t us, un, i;
4081  mp_limb_t limb, ux;
4082
4083  us = u->_mp_size;
4084  un = GMP_ABS (us);
4085  i = starting_bit / GMP_LIMB_BITS;
4086
4087  /* Past the end there's no 1 bits for u>=0, or an immediate 1 bit
4088     for u<0. Notice this test picks up any u==0 too. */
4089  if (i >= un)
4090    return (us >= 0 ? ~(mp_bitcnt_t) 0 : starting_bit);
4091
4092  up = u->_mp_d;
4093  ux = 0;
4094  limb = up[i];
4095
4096  if (starting_bit != 0)
4097    {
4098      if (us < 0)
4099	{
4100	  ux = mpn_zero_p (up, i);
4101	  limb = ~ limb + ux;
4102	  ux = - (mp_limb_t) (limb >= ux);
4103	}
4104
4105      /* Mask to 0 all bits before starting_bit, thus ignoring them. */
4106      limb &= GMP_LIMB_MAX << (starting_bit % GMP_LIMB_BITS);
4107    }
4108
4109  return mpn_common_scan (limb, i, up, un, ux);
4110}
4111
4112mp_bitcnt_t
4113mpz_scan0 (const mpz_t u, mp_bitcnt_t starting_bit)
4114{
4115  mp_ptr up;
4116  mp_size_t us, un, i;
4117  mp_limb_t limb, ux;
4118
4119  us = u->_mp_size;
4120  ux = - (mp_limb_t) (us >= 0);
4121  un = GMP_ABS (us);
4122  i = starting_bit / GMP_LIMB_BITS;
4123
4124  /* When past end, there's an immediate 0 bit for u>=0, or no 0 bits for
4125     u<0.  Notice this test picks up all cases of u==0 too. */
4126  if (i >= un)
4127    return (ux ? starting_bit : ~(mp_bitcnt_t) 0);
4128
4129  up = u->_mp_d;
4130  limb = up[i] ^ ux;
4131
4132  if (ux == 0)
4133    limb -= mpn_zero_p (up, i); /* limb = ~(~limb + zero_p) */
4134
4135  /* Mask all bits before starting_bit, thus ignoring them. */
4136  limb &= GMP_LIMB_MAX << (starting_bit % GMP_LIMB_BITS);
4137
4138  return mpn_common_scan (limb, i, up, un, ux);
4139}
4140
4141
4142/* MPZ base conversion. */
4143
4144size_t
4145mpz_sizeinbase (const mpz_t u, int base)
4146{
4147  mp_size_t un;
4148  mp_srcptr up;
4149  mp_ptr tp;
4150  mp_bitcnt_t bits;
4151  struct gmp_div_inverse bi;
4152  size_t ndigits;
4153
4154  assert (base >= 2);
4155  assert (base <= 62);
4156
4157  un = GMP_ABS (u->_mp_size);
4158  if (un == 0)
4159    return 1;
4160
4161  up = u->_mp_d;
4162
4163  bits = (un - 1) * GMP_LIMB_BITS + mpn_limb_size_in_base_2 (up[un-1]);
4164  switch (base)
4165    {
4166    case 2:
4167      return bits;
4168    case 4:
4169      return (bits + 1) / 2;
4170    case 8:
4171      return (bits + 2) / 3;
4172    case 16:
4173      return (bits + 3) / 4;
4174    case 32:
4175      return (bits + 4) / 5;
4176      /* FIXME: Do something more clever for the common case of base
4177	 10. */
4178    }
4179
4180  tp = gmp_xalloc_limbs (un);
4181  mpn_copyi (tp, up, un);
4182  mpn_div_qr_1_invert (&bi, base);
4183
4184  ndigits = 0;
4185  do
4186    {
4187      ndigits++;
4188      mpn_div_qr_1_preinv (tp, tp, un, &bi);
4189      un -= (tp[un-1] == 0);
4190    }
4191  while (un > 0);
4192
4193  gmp_free (tp);
4194  return ndigits;
4195}
4196
4197char *
4198mpz_get_str (char *sp, int base, const mpz_t u)
4199{
4200  unsigned bits;
4201  const char *digits;
4202  mp_size_t un;
4203  size_t i, sn;
4204
4205  digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
4206  if (base > 1)
4207    {
4208      if (base <= 36)
4209	digits = "0123456789abcdefghijklmnopqrstuvwxyz";
4210      else if (base > 62)
4211	return NULL;
4212    }
4213  else if (base >= -1)
4214    base = 10;
4215  else
4216    {
4217      base = -base;
4218      if (base > 36)
4219	return NULL;
4220    }
4221
4222  sn = 1 + mpz_sizeinbase (u, base);
4223  if (!sp)
4224    sp = (char *) gmp_xalloc (1 + sn);
4225
4226  un = GMP_ABS (u->_mp_size);
4227
4228  if (un == 0)
4229    {
4230      sp[0] = '0';
4231      sp[1] = '\0';
4232      return sp;
4233    }
4234
4235  i = 0;
4236
4237  if (u->_mp_size < 0)
4238    sp[i++] = '-';
4239
4240  bits = mpn_base_power_of_two_p (base);
4241
4242  if (bits)
4243    /* Not modified in this case. */
4244    sn = i + mpn_get_str_bits ((unsigned char *) sp + i, bits, u->_mp_d, un);
4245  else
4246    {
4247      struct mpn_base_info info;
4248      mp_ptr tp;
4249
4250      mpn_get_base_info (&info, base);
4251      tp = gmp_xalloc_limbs (un);
4252      mpn_copyi (tp, u->_mp_d, un);
4253
4254      sn = i + mpn_get_str_other ((unsigned char *) sp + i, base, &info, tp, un);
4255      gmp_free (tp);
4256    }
4257
4258  for (; i < sn; i++)
4259    sp[i] = digits[(unsigned char) sp[i]];
4260
4261  sp[sn] = '\0';
4262  return sp;
4263}
4264
4265int
4266mpz_set_str (mpz_t r, const char *sp, int base)
4267{
4268  unsigned bits, value_of_a;
4269  mp_size_t rn, alloc;
4270  mp_ptr rp;
4271  size_t dn;
4272  int sign;
4273  unsigned char *dp;
4274
4275  assert (base == 0 || (base >= 2 && base <= 62));
4276
4277  while (isspace( (unsigned char) *sp))
4278    sp++;
4279
4280  sign = (*sp == '-');
4281  sp += sign;
4282
4283  if (base == 0)
4284    {
4285      if (sp[0] == '0')
4286	{
4287	  if (sp[1] == 'x' || sp[1] == 'X')
4288	    {
4289	      base = 16;
4290	      sp += 2;
4291	    }
4292	  else if (sp[1] == 'b' || sp[1] == 'B')
4293	    {
4294	      base = 2;
4295	      sp += 2;
4296	    }
4297	  else
4298	    base = 8;
4299	}
4300      else
4301	base = 10;
4302    }
4303
4304  if (!*sp)
4305    {
4306      r->_mp_size = 0;
4307      return -1;
4308    }
4309  dp = (unsigned char *) gmp_xalloc (strlen (sp));
4310
4311  value_of_a = (base > 36) ? 36 : 10;
4312  for (dn = 0; *sp; sp++)
4313    {
4314      unsigned digit;
4315
4316      if (isspace ((unsigned char) *sp))
4317	continue;
4318      else if (*sp >= '0' && *sp <= '9')
4319	digit = *sp - '0';
4320      else if (*sp >= 'a' && *sp <= 'z')
4321	digit = *sp - 'a' + value_of_a;
4322      else if (*sp >= 'A' && *sp <= 'Z')
4323	digit = *sp - 'A' + 10;
4324      else
4325	digit = base; /* fail */
4326
4327      if (digit >= (unsigned) base)
4328	{
4329	  gmp_free (dp);
4330	  r->_mp_size = 0;
4331	  return -1;
4332	}
4333
4334      dp[dn++] = digit;
4335    }
4336
4337  if (!dn)
4338    {
4339      gmp_free (dp);
4340      r->_mp_size = 0;
4341      return -1;
4342    }
4343  bits = mpn_base_power_of_two_p (base);
4344
4345  if (bits > 0)
4346    {
4347      alloc = (dn * bits + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS;
4348      rp = MPZ_REALLOC (r, alloc);
4349      rn = mpn_set_str_bits (rp, dp, dn, bits);
4350    }
4351  else
4352    {
4353      struct mpn_base_info info;
4354      mpn_get_base_info (&info, base);
4355      alloc = (dn + info.exp - 1) / info.exp;
4356      rp = MPZ_REALLOC (r, alloc);
4357      rn = mpn_set_str_other (rp, dp, dn, base, &info);
4358      /* Normalization, needed for all-zero input. */
4359      assert (rn > 0);
4360      rn -= rp[rn-1] == 0;
4361    }
4362  assert (rn <= alloc);
4363  gmp_free (dp);
4364
4365  r->_mp_size = sign ? - rn : rn;
4366
4367  return 0;
4368}
4369
4370int
4371mpz_init_set_str (mpz_t r, const char *sp, int base)
4372{
4373  mpz_init (r);
4374  return mpz_set_str (r, sp, base);
4375}
4376
4377size_t
4378mpz_out_str (FILE *stream, int base, const mpz_t x)
4379{
4380  char *str;
4381  size_t len;
4382
4383  str = mpz_get_str (NULL, base, x);
4384  if (!str)
4385    return 0;
4386  len = strlen (str);
4387  len = fwrite (str, 1, len, stream);
4388  gmp_free (str);
4389  return len;
4390}
4391
4392
4393static int
4394gmp_detect_endian (void)
4395{
4396  static const int i = 2;
4397  const unsigned char *p = (const unsigned char *) &i;
4398  return 1 - *p;
4399}
4400
4401/* Import and export. Does not support nails. */
4402void
4403mpz_import (mpz_t r, size_t count, int order, size_t size, int endian,
4404	    size_t nails, const void *src)
4405{
4406  const unsigned char *p;
4407  ptrdiff_t word_step;
4408  mp_ptr rp;
4409  mp_size_t rn;
4410
4411  /* The current (partial) limb. */
4412  mp_limb_t limb;
4413  /* The number of bytes already copied to this limb (starting from
4414     the low end). */
4415  size_t bytes;
4416  /* The index where the limb should be stored, when completed. */
4417  mp_size_t i;
4418
4419  if (nails != 0)
4420    gmp_die ("mpz_import: Nails not supported.");
4421
4422  assert (order == 1 || order == -1);
4423  assert (endian >= -1 && endian <= 1);
4424
4425  if (endian == 0)
4426    endian = gmp_detect_endian ();
4427
4428  p = (unsigned char *) src;
4429
4430  word_step = (order != endian) ? 2 * size : 0;
4431
4432  /* Process bytes from the least significant end, so point p at the
4433     least significant word. */
4434  if (order == 1)
4435    {
4436      p += size * (count - 1);
4437      word_step = - word_step;
4438    }
4439
4440  /* And at least significant byte of that word. */
4441  if (endian == 1)
4442    p += (size - 1);
4443
4444  rn = (size * count + sizeof(mp_limb_t) - 1) / sizeof(mp_limb_t);
4445  rp = MPZ_REALLOC (r, rn);
4446
4447  for (limb = 0, bytes = 0, i = 0; count > 0; count--, p += word_step)
4448    {
4449      size_t j;
4450      for (j = 0; j < size; j++, p -= (ptrdiff_t) endian)
4451	{
4452	  limb |= (mp_limb_t) *p << (bytes++ * CHAR_BIT);
4453	  if (bytes == sizeof(mp_limb_t))
4454	    {
4455	      rp[i++] = limb;
4456	      bytes = 0;
4457	      limb = 0;
4458	    }
4459	}
4460    }
4461  assert (i + (bytes > 0) == rn);
4462  if (limb != 0)
4463    rp[i++] = limb;
4464  else
4465    i = mpn_normalized_size (rp, i);
4466
4467  r->_mp_size = i;
4468}
4469
4470void *
4471mpz_export (void *r, size_t *countp, int order, size_t size, int endian,
4472	    size_t nails, const mpz_t u)
4473{
4474  size_t count;
4475  mp_size_t un;
4476
4477  if (nails != 0)
4478    gmp_die ("mpz_import: Nails not supported.");
4479
4480  assert (order == 1 || order == -1);
4481  assert (endian >= -1 && endian <= 1);
4482  assert (size > 0 || u->_mp_size == 0);
4483
4484  un = u->_mp_size;
4485  count = 0;
4486  if (un != 0)
4487    {
4488      size_t k;
4489      unsigned char *p;
4490      ptrdiff_t word_step;
4491      /* The current (partial) limb. */
4492      mp_limb_t limb;
4493      /* The number of bytes left to do in this limb. */
4494      size_t bytes;
4495      /* The index where the limb was read. */
4496      mp_size_t i;
4497
4498      un = GMP_ABS (un);
4499
4500      /* Count bytes in top limb. */
4501      limb = u->_mp_d[un-1];
4502      assert (limb != 0);
4503
4504      k = (GMP_LIMB_BITS <= CHAR_BIT);
4505      if (!k)
4506	{
4507	  do {
4508	    int LOCAL_CHAR_BIT = CHAR_BIT;
4509	    k++; limb >>= LOCAL_CHAR_BIT;
4510	  } while (limb != 0);
4511	}
4512      /* else limb = 0; */
4513
4514      count = (k + (un-1) * sizeof (mp_limb_t) + size - 1) / size;
4515
4516      if (!r)
4517	r = gmp_xalloc (count * size);
4518
4519      if (endian == 0)
4520	endian = gmp_detect_endian ();
4521
4522      p = (unsigned char *) r;
4523
4524      word_step = (order != endian) ? 2 * size : 0;
4525
4526      /* Process bytes from the least significant end, so point p at the
4527	 least significant word. */
4528      if (order == 1)
4529	{
4530	  p += size * (count - 1);
4531	  word_step = - word_step;
4532	}
4533
4534      /* And at least significant byte of that word. */
4535      if (endian == 1)
4536	p += (size - 1);
4537
4538      for (bytes = 0, i = 0, k = 0; k < count; k++, p += word_step)
4539	{
4540	  size_t j;
4541	  for (j = 0; j < size; ++j, p -= (ptrdiff_t) endian)
4542	    {
4543	      if (sizeof (mp_limb_t) == 1)
4544		{
4545		  if (i < un)
4546		    *p = u->_mp_d[i++];
4547		  else
4548		    *p = 0;
4549		}
4550	      else
4551		{
4552		  int LOCAL_CHAR_BIT = CHAR_BIT;
4553		  if (bytes == 0)
4554		    {
4555		      if (i < un)
4556			limb = u->_mp_d[i++];
4557		      bytes = sizeof (mp_limb_t);
4558		    }
4559		  *p = limb;
4560		  limb >>= LOCAL_CHAR_BIT;
4561		  bytes--;
4562		}
4563	    }
4564	}
4565      assert (i == un);
4566      assert (k == count);
4567    }
4568
4569  if (countp)
4570    *countp = count;
4571
4572  return r;
4573}
4574