• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt/router/samba-3.5.8/source4/heimdal/lib/hcrypto/imath/
1/*
2  Name:     imath.c
3  Purpose:  Arbitrary precision integer arithmetic routines.
4  Author:   M. J. Fromberger <http://spinning-yarns.org/michael/>
5  Info:     $Id: imath.c,v 1.1.1.1 2011/06/10 09:34:43 andrew Exp $
6
7  Copyright (C) 2002-2007 Michael J. Fromberger, All Rights Reserved.
8
9  Permission is hereby granted, free of charge, to any person
10  obtaining a copy of this software and associated documentation files
11  (the "Software"), to deal in the Software without restriction,
12  including without limitation the rights to use, copy, modify, merge,
13  publish, distribute, sublicense, and/or sell copies of the Software,
14  and to permit persons to whom the Software is furnished to do so,
15  subject to the following conditions:
16
17  The above copyright notice and this permission notice shall be
18  included in all copies or substantial portions of the Software.
19
20  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23  NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
24  BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
25  ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
26  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27  SOFTWARE.
28 */
29
30#include "imath.h"
31
32#if DEBUG
33#include <stdio.h>
34#endif
35
36#include <stdlib.h>
37#include <string.h>
38#include <ctype.h>
39
40#include <assert.h>
41
42#if DEBUG
43#define static
44#endif
45
46/* {{{ Constants */
47
48const mp_result MP_OK     = 0;  /* no error, all is well  */
49const mp_result MP_FALSE  = 0;  /* boolean false          */
50const mp_result MP_TRUE   = -1; /* boolean true           */
51const mp_result MP_MEMORY = -2; /* out of memory          */
52const mp_result MP_RANGE  = -3; /* argument out of range  */
53const mp_result MP_UNDEF  = -4; /* result undefined       */
54const mp_result MP_TRUNC  = -5; /* output truncated       */
55const mp_result MP_BADARG = -6; /* invalid null argument  */
56const mp_result MP_MINERR = -6;
57
58const mp_sign   MP_NEG  = 1;    /* value is strictly negative */
59const mp_sign   MP_ZPOS = 0;    /* value is non-negative      */
60
61static const char *s_unknown_err = "unknown result code";
62static const char *s_error_msg[] = {
63  "error code 0",
64  "boolean true",
65  "out of memory",
66  "argument out of range",
67  "result undefined",
68  "output truncated",
69  "invalid argument",
70  NULL
71};
72
73/* }}} */
74
75/* Argument checking macros
76   Use CHECK() where a return value is required; NRCHECK() elsewhere */
77#define CHECK(TEST)   assert(TEST)
78#define NRCHECK(TEST) assert(TEST)
79
80/* {{{ Logarithm table for computing output sizes */
81
82/* The ith entry of this table gives the value of log_i(2).
83
84   An integer value n requires ceil(log_i(n)) digits to be represented
85   in base i.  Since it is easy to compute lg(n), by counting bits, we
86   can compute log_i(n) = lg(n) * log_i(2).
87
88   The use of this table eliminates a dependency upon linkage against
89   the standard math libraries.
90 */
91static const double s_log2[] = {
92   0.000000000, 0.000000000, 1.000000000, 0.630929754, 	/*  0  1  2  3 */
93   0.500000000, 0.430676558, 0.386852807, 0.356207187, 	/*  4  5  6  7 */
94   0.333333333, 0.315464877, 0.301029996, 0.289064826, 	/*  8  9 10 11 */
95   0.278942946, 0.270238154, 0.262649535, 0.255958025, 	/* 12 13 14 15 */
96   0.250000000, 0.244650542, 0.239812467, 0.235408913, 	/* 16 17 18 19 */
97   0.231378213, 0.227670249, 0.224243824, 0.221064729, 	/* 20 21 22 23 */
98   0.218104292, 0.215338279, 0.212746054, 0.210309918, 	/* 24 25 26 27 */
99   0.208014598, 0.205846832, 0.203795047, 0.201849087, 	/* 28 29 30 31 */
100   0.200000000, 0.198239863, 0.196561632, 0.194959022, 	/* 32 33 34 35 */
101   0.193426404,                                         /* 36          */
102};
103
104/* }}} */
105/* {{{ Various macros */
106
107/* Return the number of digits needed to represent a static value */
108#define MP_VALUE_DIGITS(V) \
109((sizeof(V)+(sizeof(mp_digit)-1))/sizeof(mp_digit))
110
111/* Round precision P to nearest word boundary */
112#define ROUND_PREC(P) ((mp_size)(2*(((P)+1)/2)))
113
114/* Set array P of S digits to zero */
115#define ZERO(P, S) \
116do{mp_size i__=(S)*sizeof(mp_digit);mp_digit *p__=(P);memset(p__,0,i__);}while(0)
117
118/* Copy S digits from array P to array Q */
119#define COPY(P, Q, S) \
120do{mp_size i__=(S)*sizeof(mp_digit);mp_digit *p__=(P),*q__=(Q);\
121memcpy(q__,p__,i__);}while(0)
122
123/* Reverse N elements of type T in array A */
124#define REV(T, A, N) \
125do{T *u_=(A),*v_=u_+(N)-1;while(u_<v_){T xch=*u_;*u_++=*v_;*v_--=xch;}}while(0)
126
127#define CLAMP(Z) \
128do{mp_int z_=(Z);mp_size uz_=MP_USED(z_);mp_digit *dz_=MP_DIGITS(z_)+uz_-1;\
129while(uz_ > 1 && (*dz_-- == 0)) --uz_;MP_USED(z_)=uz_;}while(0)
130
131/* Select min/max.  Do not provide expressions for which multiple
132   evaluation would be problematic, e.g. x++ */
133#define MIN(A, B) ((B)<(A)?(B):(A))
134#define MAX(A, B) ((B)>(A)?(B):(A))
135
136/* Exchange lvalues A and B of type T, e.g.
137   SWAP(int, x, y) where x and y are variables of type int. */
138#define SWAP(T, A, B) do{T t_=(A);A=(B);B=t_;}while(0)
139
140/* Used to set up and access simple temp stacks within functions. */
141#define TEMP(K) (temp + (K))
142#define SETUP(E, C) \
143do{if((res = (E)) != MP_OK) goto CLEANUP; ++(C);}while(0)
144
145/* Compare value to zero. */
146#define CMPZ(Z) \
147(((Z)->used==1&&(Z)->digits[0]==0)?0:((Z)->sign==MP_NEG)?-1:1)
148
149/* Multiply X by Y into Z, ignoring signs.  Requires that Z have
150   enough storage preallocated to hold the result. */
151#define UMUL(X, Y, Z) \
152do{mp_size ua_=MP_USED(X),ub_=MP_USED(Y);mp_size o_=ua_+ub_;\
153ZERO(MP_DIGITS(Z),o_);\
154(void) s_kmul(MP_DIGITS(X),MP_DIGITS(Y),MP_DIGITS(Z),ua_,ub_);\
155MP_USED(Z)=o_;CLAMP(Z);}while(0)
156
157/* Square X into Z.  Requires that Z have enough storage to hold the
158   result. */
159#define USQR(X, Z) \
160do{mp_size ua_=MP_USED(X),o_=ua_+ua_;ZERO(MP_DIGITS(Z),o_);\
161(void) s_ksqr(MP_DIGITS(X),MP_DIGITS(Z),ua_);MP_USED(Z)=o_;CLAMP(Z);}while(0)
162
163#define UPPER_HALF(W)           ((mp_word)((W) >> MP_DIGIT_BIT))
164#define LOWER_HALF(W)           ((mp_digit)(W))
165#define HIGH_BIT_SET(W)         ((W) >> (MP_WORD_BIT - 1))
166#define ADD_WILL_OVERFLOW(W, V) ((MP_WORD_MAX - (V)) < (W))
167
168/* }}} */
169/* {{{ Default configuration settings */
170
171/* Default number of digits allocated to a new mp_int */
172#if IMATH_TEST
173mp_size default_precision = MP_DEFAULT_PREC;
174#else
175static const mp_size default_precision = MP_DEFAULT_PREC;
176#endif
177
178/* Minimum number of digits to invoke recursive multiply */
179#if IMATH_TEST
180mp_size multiply_threshold = MP_MULT_THRESH;
181#else
182static const mp_size multiply_threshold = MP_MULT_THRESH;
183#endif
184
185/* }}} */
186
187/* Allocate a buffer of (at least) num digits, or return
188   NULL if that couldn't be done.  */
189static mp_digit *s_alloc(mp_size num);
190
191/* Release a buffer of digits allocated by s_alloc(). */
192static void s_free(void *ptr);
193
194/* Insure that z has at least min digits allocated, resizing if
195   necessary.  Returns true if successful, false if out of memory. */
196static int  s_pad(mp_int z, mp_size min);
197
198/* Fill in a "fake" mp_int on the stack with a given value */
199static void      s_fake(mp_int z, mp_small value, mp_digit vbuf[]);
200
201/* Compare two runs of digits of given length, returns <0, 0, >0 */
202static int       s_cdig(mp_digit *da, mp_digit *db, mp_size len);
203
204/* Pack the unsigned digits of v into array t */
205static int       s_vpack(mp_small v, mp_digit t[]);
206
207/* Compare magnitudes of a and b, returns <0, 0, >0 */
208static int       s_ucmp(mp_int a, mp_int b);
209
210/* Compare magnitudes of a and v, returns <0, 0, >0 */
211static int       s_vcmp(mp_int a, mp_small v);
212
213/* Unsigned magnitude addition; assumes dc is big enough.
214   Carry out is returned (no memory allocated). */
215static mp_digit  s_uadd(mp_digit *da, mp_digit *db, mp_digit *dc,
216		        mp_size size_a, mp_size size_b);
217
218/* Unsigned magnitude subtraction.  Assumes dc is big enough. */
219static void      s_usub(mp_digit *da, mp_digit *db, mp_digit *dc,
220		        mp_size size_a, mp_size size_b);
221
222/* Unsigned recursive multiplication.  Assumes dc is big enough. */
223static int       s_kmul(mp_digit *da, mp_digit *db, mp_digit *dc,
224			mp_size size_a, mp_size size_b);
225
226/* Unsigned magnitude multiplication.  Assumes dc is big enough. */
227static void      s_umul(mp_digit *da, mp_digit *db, mp_digit *dc,
228			mp_size size_a, mp_size size_b);
229
230/* Unsigned recursive squaring.  Assumes dc is big enough. */
231static int       s_ksqr(mp_digit *da, mp_digit *dc, mp_size size_a);
232
233/* Unsigned magnitude squaring.  Assumes dc is big enough. */
234static void      s_usqr(mp_digit *da, mp_digit *dc, mp_size size_a);
235
236/* Single digit addition.  Assumes a is big enough. */
237static void      s_dadd(mp_int a, mp_digit b);
238
239/* Single digit multiplication.  Assumes a is big enough. */
240static void      s_dmul(mp_int a, mp_digit b);
241
242/* Single digit multiplication on buffers; assumes dc is big enough. */
243static void      s_dbmul(mp_digit *da, mp_digit b, mp_digit *dc,
244			 mp_size size_a);
245
246/* Single digit division.  Replaces a with the quotient,
247   returns the remainder.  */
248static mp_digit  s_ddiv(mp_int a, mp_digit b);
249
250/* Quick division by a power of 2, replaces z (no allocation) */
251static void      s_qdiv(mp_int z, mp_size p2);
252
253/* Quick remainder by a power of 2, replaces z (no allocation) */
254static void      s_qmod(mp_int z, mp_size p2);
255
256/* Quick multiplication by a power of 2, replaces z.
257   Allocates if necessary; returns false in case this fails. */
258static int       s_qmul(mp_int z, mp_size p2);
259
260/* Quick subtraction from a power of 2, replaces z.
261   Allocates if necessary; returns false in case this fails. */
262static int       s_qsub(mp_int z, mp_size p2);
263
264/* Return maximum k such that 2^k divides z. */
265static int       s_dp2k(mp_int z);
266
267/* Return k >= 0 such that z = 2^k, or -1 if there is no such k. */
268static int       s_isp2(mp_int z);
269
270/* Set z to 2^k.  May allocate; returns false in case this fails. */
271static int       s_2expt(mp_int z, mp_small k);
272
273/* Normalize a and b for division, returns normalization constant */
274static int       s_norm(mp_int a, mp_int b);
275
276/* Compute constant mu for Barrett reduction, given modulus m, result
277   replaces z, m is untouched. */
278static mp_result s_brmu(mp_int z, mp_int m);
279
280/* Reduce a modulo m, using Barrett's algorithm. */
281static int       s_reduce(mp_int x, mp_int m, mp_int mu, mp_int q1, mp_int q2);
282
283/* Modular exponentiation, using Barrett reduction */
284static mp_result s_embar(mp_int a, mp_int b, mp_int m, mp_int mu, mp_int c);
285
286/* Unsigned magnitude division.  Assumes |a| > |b|.  Allocates
287   temporaries; overwrites a with quotient, b with remainder. */
288static mp_result s_udiv(mp_int a, mp_int b);
289
290/* Compute the number of digits in radix r required to represent the
291   given value.  Does not account for sign flags, terminators, etc. */
292static int       s_outlen(mp_int z, mp_size r);
293
294/* Guess how many digits of precision will be needed to represent a
295   radix r value of the specified number of digits.  Returns a value
296   guaranteed to be no smaller than the actual number required. */
297static mp_size   s_inlen(int len, mp_size r);
298
299/* Convert a character to a digit value in radix r, or
300   -1 if out of range */
301static int       s_ch2val(char c, int r);
302
303/* Convert a digit value to a character */
304static char      s_val2ch(int v, int caps);
305
306/* Take 2's complement of a buffer in place */
307static void      s_2comp(unsigned char *buf, int len);
308
309/* Convert a value to binary, ignoring sign.  On input, *limpos is the
310   bound on how many bytes should be written to buf; on output, *limpos
311   is set to the number of bytes actually written. */
312static mp_result s_tobin(mp_int z, unsigned char *buf, int *limpos, int pad);
313
314#if DEBUG
315/* Dump a representation of the mp_int to standard output */
316void      s_print(char *tag, mp_int z);
317void      s_print_buf(char *tag, mp_digit *buf, mp_size num);
318#endif
319
320/* {{{ mp_int_init(z) */
321
322mp_result mp_int_init(mp_int z)
323{
324  if(z == NULL)
325    return MP_BADARG;
326
327  z->single = 0;
328  z->digits = &(z->single);
329  z->alloc  = 1;
330  z->used   = 1;
331  z->sign   = MP_ZPOS;
332
333  return MP_OK;
334}
335
336/* }}} */
337
338/* {{{ mp_int_alloc() */
339
340mp_int    mp_int_alloc(void)
341{
342  mp_int out = malloc(sizeof(mpz_t));
343
344  if(out != NULL)
345    mp_int_init(out);
346
347  return out;
348}
349
350/* }}} */
351
352/* {{{ mp_int_init_size(z, prec) */
353
354mp_result mp_int_init_size(mp_int z, mp_size prec)
355{
356  CHECK(z != NULL);
357
358  if(prec == 0)
359    prec = default_precision;
360  else if(prec == 1)
361    return mp_int_init(z);
362  else
363    prec = (mp_size) ROUND_PREC(prec);
364
365  if((MP_DIGITS(z) = s_alloc(prec)) == NULL)
366    return MP_MEMORY;
367
368  z->digits[0] = 0;
369  MP_USED(z) = 1;
370  MP_ALLOC(z) = prec;
371  MP_SIGN(z) = MP_ZPOS;
372
373  return MP_OK;
374}
375
376/* }}} */
377
378/* {{{ mp_int_init_copy(z, old) */
379
380mp_result mp_int_init_copy(mp_int z, mp_int old)
381{
382  mp_result  res;
383  mp_size    uold;
384
385  CHECK(z != NULL && old != NULL);
386
387  uold = MP_USED(old);
388  if(uold == 1) {
389    mp_int_init(z);
390  }
391  else {
392    mp_size target = MAX(uold, default_precision);
393
394    if((res = mp_int_init_size(z, target)) != MP_OK)
395      return res;
396  }
397
398  MP_USED(z) = uold;
399  MP_SIGN(z) = MP_SIGN(old);
400  COPY(MP_DIGITS(old), MP_DIGITS(z), uold);
401
402  return MP_OK;
403}
404
405/* }}} */
406
407/* {{{ mp_int_init_value(z, value) */
408
409mp_result mp_int_init_value(mp_int z, mp_small value)
410{
411  mpz_t     vtmp;
412  mp_digit  vbuf[MP_VALUE_DIGITS(value)];
413
414  s_fake(&vtmp, value, vbuf);
415  return mp_int_init_copy(z, &vtmp);
416}
417
418/* }}} */
419
420/* {{{ mp_int_set_value(z, value) */
421
422mp_result  mp_int_set_value(mp_int z, mp_small value)
423{
424  mpz_t    vtmp;
425  mp_digit vbuf[MP_VALUE_DIGITS(value)];
426
427  s_fake(&vtmp, value, vbuf);
428  return mp_int_copy(&vtmp, z);
429}
430
431/* }}} */
432
433/* {{{ mp_int_clear(z) */
434
435void      mp_int_clear(mp_int z)
436{
437  if(z == NULL)
438    return;
439
440  if(MP_DIGITS(z) != NULL) {
441    if((void *) MP_DIGITS(z) != (void *) z)
442      s_free(MP_DIGITS(z));
443
444    MP_DIGITS(z) = NULL;
445  }
446}
447
448/* }}} */
449
450/* {{{ mp_int_free(z) */
451
452void      mp_int_free(mp_int z)
453{
454  NRCHECK(z != NULL);
455
456  mp_int_clear(z);
457  free(z); /* note: NOT s_free() */
458}
459
460/* }}} */
461
462/* {{{ mp_int_copy(a, c) */
463
464mp_result mp_int_copy(mp_int a, mp_int c)
465{
466  CHECK(a != NULL && c != NULL);
467
468  if(a != c) {
469    mp_size   ua = MP_USED(a);
470    mp_digit *da, *dc;
471
472    if(!s_pad(c, ua))
473      return MP_MEMORY;
474
475    da = MP_DIGITS(a); dc = MP_DIGITS(c);
476    COPY(da, dc, ua);
477
478    MP_USED(c) = ua;
479    MP_SIGN(c) = MP_SIGN(a);
480  }
481
482  return MP_OK;
483}
484
485/* }}} */
486
487/* {{{ mp_int_swap(a, c) */
488
489void      mp_int_swap(mp_int a, mp_int c)
490{
491  if(a != c) {
492    mpz_t tmp = *a;
493
494    *a = *c;
495    *c = tmp;
496  }
497}
498
499/* }}} */
500
501/* {{{ mp_int_zero(z) */
502
503void      mp_int_zero(mp_int z)
504{
505  NRCHECK(z != NULL);
506
507  z->digits[0] = 0;
508  MP_USED(z) = 1;
509  MP_SIGN(z) = MP_ZPOS;
510}
511
512/* }}} */
513
514/* {{{ mp_int_abs(a, c) */
515
516mp_result mp_int_abs(mp_int a, mp_int c)
517{
518  mp_result res;
519
520  CHECK(a != NULL && c != NULL);
521
522  if((res = mp_int_copy(a, c)) != MP_OK)
523    return res;
524
525  MP_SIGN(c) = MP_ZPOS;
526  return MP_OK;
527}
528
529/* }}} */
530
531/* {{{ mp_int_neg(a, c) */
532
533mp_result mp_int_neg(mp_int a, mp_int c)
534{
535  mp_result res;
536
537  CHECK(a != NULL && c != NULL);
538
539  if((res = mp_int_copy(a, c)) != MP_OK)
540    return res;
541
542  if(CMPZ(c) != 0)
543    MP_SIGN(c) = 1 - MP_SIGN(a);
544
545  return MP_OK;
546}
547
548/* }}} */
549
550/* {{{ mp_int_add(a, b, c) */
551
552mp_result mp_int_add(mp_int a, mp_int b, mp_int c)
553{
554  mp_size  ua, ub, uc, max;
555
556  CHECK(a != NULL && b != NULL && c != NULL);
557
558  ua = MP_USED(a); ub = MP_USED(b); uc = MP_USED(c);
559  max = MAX(ua, ub);
560
561  if(MP_SIGN(a) == MP_SIGN(b)) {
562    /* Same sign -- add magnitudes, preserve sign of addends */
563    mp_digit carry;
564
565    if(!s_pad(c, max))
566      return MP_MEMORY;
567
568    carry = s_uadd(MP_DIGITS(a), MP_DIGITS(b), MP_DIGITS(c), ua, ub);
569    uc = max;
570
571    if(carry) {
572      if(!s_pad(c, max + 1))
573	return MP_MEMORY;
574
575      c->digits[max] = carry;
576      ++uc;
577    }
578
579    MP_USED(c) = uc;
580    MP_SIGN(c) = MP_SIGN(a);
581
582  }
583  else {
584    /* Different signs -- subtract magnitudes, preserve sign of greater */
585    mp_int  x, y;
586    int     cmp = s_ucmp(a, b); /* magnitude comparision, sign ignored */
587
588    /* Set x to max(a, b), y to min(a, b) to simplify later code.
589       A special case yields zero for equal magnitudes.
590    */
591    if(cmp == 0) {
592      mp_int_zero(c);
593      return MP_OK;
594    }
595    else if(cmp < 0) {
596      x = b; y = a;
597    }
598    else {
599      x = a; y = b;
600    }
601
602    if(!s_pad(c, MP_USED(x)))
603      return MP_MEMORY;
604
605    /* Subtract smaller from larger */
606    s_usub(MP_DIGITS(x), MP_DIGITS(y), MP_DIGITS(c), MP_USED(x), MP_USED(y));
607    MP_USED(c) = MP_USED(x);
608    CLAMP(c);
609
610    /* Give result the sign of the larger */
611    MP_SIGN(c) = MP_SIGN(x);
612  }
613
614  return MP_OK;
615}
616
617/* }}} */
618
619/* {{{ mp_int_add_value(a, value, c) */
620
621mp_result mp_int_add_value(mp_int a, mp_small value, mp_int c)
622{
623  mpz_t     vtmp;
624  mp_digit  vbuf[MP_VALUE_DIGITS(value)];
625
626  s_fake(&vtmp, value, vbuf);
627
628  return mp_int_add(a, &vtmp, c);
629}
630
631/* }}} */
632
633/* {{{ mp_int_sub(a, b, c) */
634
635mp_result mp_int_sub(mp_int a, mp_int b, mp_int c)
636{
637  mp_size  ua, ub, uc, max;
638
639  CHECK(a != NULL && b != NULL && c != NULL);
640
641  ua = MP_USED(a); ub = MP_USED(b); uc = MP_USED(c);
642  max = MAX(ua, ub);
643
644  if(MP_SIGN(a) != MP_SIGN(b)) {
645    /* Different signs -- add magnitudes and keep sign of a */
646    mp_digit carry;
647
648    if(!s_pad(c, max))
649      return MP_MEMORY;
650
651    carry = s_uadd(MP_DIGITS(a), MP_DIGITS(b), MP_DIGITS(c), ua, ub);
652    uc = max;
653
654    if(carry) {
655      if(!s_pad(c, max + 1))
656	return MP_MEMORY;
657
658      c->digits[max] = carry;
659      ++uc;
660    }
661
662    MP_USED(c) = uc;
663    MP_SIGN(c) = MP_SIGN(a);
664
665  }
666  else {
667    /* Same signs -- subtract magnitudes */
668    mp_int  x, y;
669    mp_sign osign;
670    int     cmp = s_ucmp(a, b);
671
672    if(!s_pad(c, max))
673      return MP_MEMORY;
674
675    if(cmp >= 0) {
676      x = a; y = b; osign = MP_ZPOS;
677    }
678    else {
679      x = b; y = a; osign = MP_NEG;
680    }
681
682    if(MP_SIGN(a) == MP_NEG && cmp != 0)
683      osign = 1 - osign;
684
685    s_usub(MP_DIGITS(x), MP_DIGITS(y), MP_DIGITS(c), MP_USED(x), MP_USED(y));
686    MP_USED(c) = MP_USED(x);
687    CLAMP(c);
688
689    MP_SIGN(c) = osign;
690  }
691
692  return MP_OK;
693}
694
695/* }}} */
696
697/* {{{ mp_int_sub_value(a, value, c) */
698
699mp_result mp_int_sub_value(mp_int a, mp_small value, mp_int c)
700{
701  mpz_t     vtmp;
702  mp_digit  vbuf[MP_VALUE_DIGITS(value)];
703
704  s_fake(&vtmp, value, vbuf);
705
706  return mp_int_sub(a, &vtmp, c);
707}
708
709/* }}} */
710
711/* {{{ mp_int_mul(a, b, c) */
712
713mp_result mp_int_mul(mp_int a, mp_int b, mp_int c)
714{
715  mp_digit *out;
716  mp_size   osize, ua, ub, p = 0;
717  mp_sign   osign;
718
719  CHECK(a != NULL && b != NULL && c != NULL);
720
721  /* If either input is zero, we can shortcut multiplication */
722  if(mp_int_compare_zero(a) == 0 || mp_int_compare_zero(b) == 0) {
723    mp_int_zero(c);
724    return MP_OK;
725  }
726
727  /* Output is positive if inputs have same sign, otherwise negative */
728  osign = (MP_SIGN(a) == MP_SIGN(b)) ? MP_ZPOS : MP_NEG;
729
730  /* If the output is not identical to any of the inputs, we'll write
731     the results directly; otherwise, allocate a temporary space. */
732  ua = MP_USED(a); ub = MP_USED(b);
733  osize = MAX(ua, ub);
734  osize = 4 * ((osize + 1) / 2);
735
736  if(c == a || c == b) {
737    p = ROUND_PREC(osize);
738    p = MAX(p, default_precision);
739
740    if((out = s_alloc(p)) == NULL)
741      return MP_MEMORY;
742  }
743  else {
744    if(!s_pad(c, osize))
745      return MP_MEMORY;
746
747    out = MP_DIGITS(c);
748  }
749  ZERO(out, osize);
750
751  if(!s_kmul(MP_DIGITS(a), MP_DIGITS(b), out, ua, ub))
752    return MP_MEMORY;
753
754  /* If we allocated a new buffer, get rid of whatever memory c was
755     already using, and fix up its fields to reflect that.
756   */
757  if(out != MP_DIGITS(c)) {
758    if((void *) MP_DIGITS(c) != (void *) c)
759      s_free(MP_DIGITS(c));
760    MP_DIGITS(c) = out;
761    MP_ALLOC(c) = p;
762  }
763
764  MP_USED(c) = osize; /* might not be true, but we'll fix it ... */
765  CLAMP(c);           /* ... right here */
766  MP_SIGN(c) = osign;
767
768  return MP_OK;
769}
770
771/* }}} */
772
773/* {{{ mp_int_mul_value(a, value, c) */
774
775mp_result mp_int_mul_value(mp_int a, mp_small value, mp_int c)
776{
777  mpz_t     vtmp;
778  mp_digit  vbuf[MP_VALUE_DIGITS(value)];
779
780  s_fake(&vtmp, value, vbuf);
781
782  return mp_int_mul(a, &vtmp, c);
783}
784
785/* }}} */
786
787/* {{{ mp_int_mul_pow2(a, p2, c) */
788
789mp_result mp_int_mul_pow2(mp_int a, mp_small p2, mp_int c)
790{
791  mp_result res;
792  CHECK(a != NULL && c != NULL && p2 >= 0);
793
794  if((res = mp_int_copy(a, c)) != MP_OK)
795    return res;
796
797  if(s_qmul(c, (mp_size) p2))
798    return MP_OK;
799  else
800    return MP_MEMORY;
801}
802
803/* }}} */
804
805/* {{{ mp_int_sqr(a, c) */
806
807mp_result mp_int_sqr(mp_int a, mp_int c)
808{
809  mp_digit *out;
810  mp_size   osize, p = 0;
811
812  CHECK(a != NULL && c != NULL);
813
814  /* Get a temporary buffer big enough to hold the result */
815  osize = (mp_size) 4 * ((MP_USED(a) + 1) / 2);
816  if(a == c) {
817    p = ROUND_PREC(osize);
818    p = MAX(p, default_precision);
819
820    if((out = s_alloc(p)) == NULL)
821      return MP_MEMORY;
822  }
823  else {
824    if(!s_pad(c, osize))
825      return MP_MEMORY;
826
827    out = MP_DIGITS(c);
828  }
829  ZERO(out, osize);
830
831  s_ksqr(MP_DIGITS(a), out, MP_USED(a));
832
833  /* Get rid of whatever memory c was already using, and fix up its
834     fields to reflect the new digit array it's using
835   */
836  if(out != MP_DIGITS(c)) {
837    if((void *) MP_DIGITS(c) != (void *) c)
838      s_free(MP_DIGITS(c));
839    MP_DIGITS(c) = out;
840    MP_ALLOC(c) = p;
841  }
842
843  MP_USED(c) = osize; /* might not be true, but we'll fix it ... */
844  CLAMP(c);           /* ... right here */
845  MP_SIGN(c) = MP_ZPOS;
846
847  return MP_OK;
848}
849
850/* }}} */
851
852/* {{{ mp_int_div(a, b, q, r) */
853
854mp_result mp_int_div(mp_int a, mp_int b, mp_int q, mp_int r)
855{
856  int       cmp, last = 0, lg;
857  mp_result res = MP_OK;
858  mpz_t     temp[2];
859  mp_int    qout, rout;
860  mp_sign   sa = MP_SIGN(a), sb = MP_SIGN(b);
861
862  CHECK(a != NULL && b != NULL && q != r);
863
864  if(CMPZ(b) == 0)
865    return MP_UNDEF;
866  else if((cmp = s_ucmp(a, b)) < 0) {
867    /* If |a| < |b|, no division is required:
868       q = 0, r = a
869     */
870    if(r && (res = mp_int_copy(a, r)) != MP_OK)
871      return res;
872
873    if(q)
874      mp_int_zero(q);
875
876    return MP_OK;
877  }
878  else if(cmp == 0) {
879    /* If |a| = |b|, no division is required:
880       q = 1 or -1, r = 0
881     */
882    if(r)
883      mp_int_zero(r);
884
885    if(q) {
886      mp_int_zero(q);
887      q->digits[0] = 1;
888
889      if(sa != sb)
890	MP_SIGN(q) = MP_NEG;
891    }
892
893    return MP_OK;
894  }
895
896  /* When |a| > |b|, real division is required.  We need someplace to
897     store quotient and remainder, but q and r are allowed to be NULL
898     or to overlap with the inputs.
899   */
900  if((lg = s_isp2(b)) < 0) {
901    if(q && b != q) {
902      if((res = mp_int_copy(a, q)) != MP_OK)
903	goto CLEANUP;
904      else
905	qout = q;
906    }
907    else {
908      qout = TEMP(last);
909      SETUP(mp_int_init_copy(TEMP(last), a), last);
910    }
911
912    if(r && a != r) {
913      if((res = mp_int_copy(b, r)) != MP_OK)
914	goto CLEANUP;
915      else
916	rout = r;
917    }
918    else {
919      rout = TEMP(last);
920      SETUP(mp_int_init_copy(TEMP(last), b), last);
921    }
922
923    if((res = s_udiv(qout, rout)) != MP_OK) goto CLEANUP;
924  }
925  else {
926    if(q && (res = mp_int_copy(a, q)) != MP_OK) goto CLEANUP;
927    if(r && (res = mp_int_copy(a, r)) != MP_OK) goto CLEANUP;
928
929    if(q) s_qdiv(q, (mp_size) lg); qout = q;
930    if(r) s_qmod(r, (mp_size) lg); rout = r;
931  }
932
933  /* Recompute signs for output */
934  if(rout) {
935    MP_SIGN(rout) = sa;
936    if(CMPZ(rout) == 0)
937      MP_SIGN(rout) = MP_ZPOS;
938  }
939  if(qout) {
940    MP_SIGN(qout) = (sa == sb) ? MP_ZPOS : MP_NEG;
941    if(CMPZ(qout) == 0)
942      MP_SIGN(qout) = MP_ZPOS;
943  }
944
945  if(q && (res = mp_int_copy(qout, q)) != MP_OK) goto CLEANUP;
946  if(r && (res = mp_int_copy(rout, r)) != MP_OK) goto CLEANUP;
947
948 CLEANUP:
949  while(--last >= 0)
950    mp_int_clear(TEMP(last));
951
952  return res;
953}
954
955/* }}} */
956
957/* {{{ mp_int_mod(a, m, c) */
958
959mp_result mp_int_mod(mp_int a, mp_int m, mp_int c)
960{
961  mp_result res;
962  mpz_t     tmp;
963  mp_int    out;
964
965  if(m == c) {
966    mp_int_init(&tmp);
967    out = &tmp;
968  }
969  else {
970    out = c;
971  }
972
973  if((res = mp_int_div(a, m, NULL, out)) != MP_OK)
974    goto CLEANUP;
975
976  if(CMPZ(out) < 0)
977    res = mp_int_add(out, m, c);
978  else
979    res = mp_int_copy(out, c);
980
981 CLEANUP:
982  if(out != c)
983    mp_int_clear(&tmp);
984
985  return res;
986}
987
988/* }}} */
989
990/* {{{ mp_int_div_value(a, value, q, r) */
991
992mp_result mp_int_div_value(mp_int a, mp_small value, mp_int q, mp_small *r)
993{
994  mpz_t     vtmp, rtmp;
995  mp_digit  vbuf[MP_VALUE_DIGITS(value)];
996  mp_result res;
997
998  mp_int_init(&rtmp);
999  s_fake(&vtmp, value, vbuf);
1000
1001  if((res = mp_int_div(a, &vtmp, q, &rtmp)) != MP_OK)
1002    goto CLEANUP;
1003
1004  if(r)
1005    (void) mp_int_to_int(&rtmp, r); /* can't fail */
1006
1007 CLEANUP:
1008  mp_int_clear(&rtmp);
1009  return res;
1010}
1011
1012/* }}} */
1013
1014/* {{{ mp_int_div_pow2(a, p2, q, r) */
1015
1016mp_result mp_int_div_pow2(mp_int a, mp_small p2, mp_int q, mp_int r)
1017{
1018  mp_result res = MP_OK;
1019
1020  CHECK(a != NULL && p2 >= 0 && q != r);
1021
1022  if(q != NULL && (res = mp_int_copy(a, q)) == MP_OK)
1023    s_qdiv(q, (mp_size) p2);
1024
1025  if(res == MP_OK && r != NULL && (res = mp_int_copy(a, r)) == MP_OK)
1026    s_qmod(r, (mp_size) p2);
1027
1028  return res;
1029}
1030
1031/* }}} */
1032
1033/* {{{ mp_int_expt(a, b, c) */
1034
1035mp_result mp_int_expt(mp_int a, mp_small b, mp_int c)
1036{
1037  mpz_t     t;
1038  mp_result res;
1039  unsigned int v = abs(b);
1040
1041  CHECK(b >= 0 && c != NULL);
1042
1043  if((res = mp_int_init_copy(&t, a)) != MP_OK)
1044    return res;
1045
1046  (void) mp_int_set_value(c, 1);
1047  while(v != 0) {
1048    if(v & 1) {
1049      if((res = mp_int_mul(c, &t, c)) != MP_OK)
1050	goto CLEANUP;
1051    }
1052
1053    v >>= 1;
1054    if(v == 0) break;
1055
1056    if((res = mp_int_sqr(&t, &t)) != MP_OK)
1057      goto CLEANUP;
1058  }
1059
1060 CLEANUP:
1061  mp_int_clear(&t);
1062  return res;
1063}
1064
1065/* }}} */
1066
1067/* {{{ mp_int_expt_value(a, b, c) */
1068
1069mp_result mp_int_expt_value(mp_small a, mp_small b, mp_int c)
1070{
1071  mpz_t     t;
1072  mp_result res;
1073  unsigned int v = abs(b);
1074
1075  CHECK(b >= 0 && c != NULL);
1076
1077  if((res = mp_int_init_value(&t, a)) != MP_OK)
1078    return res;
1079
1080  (void) mp_int_set_value(c, 1);
1081  while(v != 0) {
1082    if(v & 1) {
1083      if((res = mp_int_mul(c, &t, c)) != MP_OK)
1084	goto CLEANUP;
1085    }
1086
1087    v >>= 1;
1088    if(v == 0) break;
1089
1090    if((res = mp_int_sqr(&t, &t)) != MP_OK)
1091      goto CLEANUP;
1092  }
1093
1094 CLEANUP:
1095  mp_int_clear(&t);
1096  return res;
1097}
1098
1099/* }}} */
1100
1101/* {{{ mp_int_compare(a, b) */
1102
1103int       mp_int_compare(mp_int a, mp_int b)
1104{
1105  mp_sign sa;
1106
1107  CHECK(a != NULL && b != NULL);
1108
1109  sa = MP_SIGN(a);
1110  if(sa == MP_SIGN(b)) {
1111    int cmp = s_ucmp(a, b);
1112
1113    /* If they're both zero or positive, the normal comparison
1114       applies; if both negative, the sense is reversed. */
1115    if(sa == MP_ZPOS)
1116      return cmp;
1117    else
1118      return -cmp;
1119
1120  }
1121  else {
1122    if(sa == MP_ZPOS)
1123      return 1;
1124    else
1125      return -1;
1126  }
1127}
1128
1129/* }}} */
1130
1131/* {{{ mp_int_compare_unsigned(a, b) */
1132
1133int       mp_int_compare_unsigned(mp_int a, mp_int b)
1134{
1135  NRCHECK(a != NULL && b != NULL);
1136
1137  return s_ucmp(a, b);
1138}
1139
1140/* }}} */
1141
1142/* {{{ mp_int_compare_zero(z) */
1143
1144int       mp_int_compare_zero(mp_int z)
1145{
1146  NRCHECK(z != NULL);
1147
1148  if(MP_USED(z) == 1 && z->digits[0] == 0)
1149    return 0;
1150  else if(MP_SIGN(z) == MP_ZPOS)
1151    return 1;
1152  else
1153    return -1;
1154}
1155
1156/* }}} */
1157
1158/* {{{ mp_int_compare_value(z, value) */
1159
1160int       mp_int_compare_value(mp_int z, mp_small value)
1161{
1162  mp_sign vsign = (value < 0) ? MP_NEG : MP_ZPOS;
1163  int     cmp;
1164
1165  CHECK(z != NULL);
1166
1167  if(vsign == MP_SIGN(z)) {
1168    cmp = s_vcmp(z, value);
1169
1170    if(vsign == MP_ZPOS)
1171      return cmp;
1172    else
1173      return -cmp;
1174  }
1175  else {
1176    if(value < 0)
1177      return 1;
1178    else
1179      return -1;
1180  }
1181}
1182
1183/* }}} */
1184
1185/* {{{ mp_int_exptmod(a, b, m, c) */
1186
1187mp_result mp_int_exptmod(mp_int a, mp_int b, mp_int m, mp_int c)
1188{
1189  mp_result res;
1190  mp_size   um;
1191  mpz_t     temp[3];
1192  mp_int    s;
1193  int       last = 0;
1194
1195  CHECK(a != NULL && b != NULL && c != NULL && m != NULL);
1196
1197  /* Zero moduli and negative exponents are not considered. */
1198  if(CMPZ(m) == 0)
1199    return MP_UNDEF;
1200  if(CMPZ(b) < 0)
1201    return MP_RANGE;
1202
1203  um = MP_USED(m);
1204  SETUP(mp_int_init_size(TEMP(0), 2 * um), last);
1205  SETUP(mp_int_init_size(TEMP(1), 2 * um), last);
1206
1207  if(c == b || c == m) {
1208    SETUP(mp_int_init_size(TEMP(2), 2 * um), last);
1209    s = TEMP(2);
1210  }
1211  else {
1212    s = c;
1213  }
1214
1215  if((res = mp_int_mod(a, m, TEMP(0))) != MP_OK) goto CLEANUP;
1216
1217  if((res = s_brmu(TEMP(1), m)) != MP_OK) goto CLEANUP;
1218
1219  if((res = s_embar(TEMP(0), b, m, TEMP(1), s)) != MP_OK)
1220    goto CLEANUP;
1221
1222  res = mp_int_copy(s, c);
1223
1224 CLEANUP:
1225  while(--last >= 0)
1226    mp_int_clear(TEMP(last));
1227
1228  return res;
1229}
1230
1231/* }}} */
1232
1233/* {{{ mp_int_exptmod_evalue(a, value, m, c) */
1234
1235mp_result mp_int_exptmod_evalue(mp_int a, mp_small value, mp_int m, mp_int c)
1236{
1237  mpz_t    vtmp;
1238  mp_digit vbuf[MP_VALUE_DIGITS(value)];
1239
1240  s_fake(&vtmp, value, vbuf);
1241
1242  return mp_int_exptmod(a, &vtmp, m, c);
1243}
1244
1245/* }}} */
1246
1247/* {{{ mp_int_exptmod_bvalue(v, b, m, c) */
1248
1249mp_result mp_int_exptmod_bvalue(mp_small value, mp_int b,
1250				mp_int m, mp_int c)
1251{
1252  mpz_t    vtmp;
1253  mp_digit vbuf[MP_VALUE_DIGITS(value)];
1254
1255  s_fake(&vtmp, value, vbuf);
1256
1257  return mp_int_exptmod(&vtmp, b, m, c);
1258}
1259
1260/* }}} */
1261
1262/* {{{ mp_int_exptmod_known(a, b, m, mu, c) */
1263
1264mp_result mp_int_exptmod_known(mp_int a, mp_int b, mp_int m, mp_int mu, mp_int c)
1265{
1266  mp_result res;
1267  mp_size   um;
1268  mpz_t     temp[2];
1269  mp_int    s;
1270  int       last = 0;
1271
1272  CHECK(a && b && m && c);
1273
1274  /* Zero moduli and negative exponents are not considered. */
1275  if(CMPZ(m) == 0)
1276    return MP_UNDEF;
1277  if(CMPZ(b) < 0)
1278    return MP_RANGE;
1279
1280  um = MP_USED(m);
1281  SETUP(mp_int_init_size(TEMP(0), 2 * um), last);
1282
1283  if(c == b || c == m) {
1284    SETUP(mp_int_init_size(TEMP(1), 2 * um), last);
1285    s = TEMP(1);
1286  }
1287  else {
1288    s = c;
1289  }
1290
1291  if((res = mp_int_mod(a, m, TEMP(0))) != MP_OK) goto CLEANUP;
1292
1293  if((res = s_embar(TEMP(0), b, m, mu, s)) != MP_OK)
1294    goto CLEANUP;
1295
1296  res = mp_int_copy(s, c);
1297
1298 CLEANUP:
1299  while(--last >= 0)
1300    mp_int_clear(TEMP(last));
1301
1302  return res;
1303}
1304
1305/* }}} */
1306
1307/* {{{ mp_int_redux_const(m, c) */
1308
1309mp_result mp_int_redux_const(mp_int m, mp_int c)
1310{
1311  CHECK(m != NULL && c != NULL && m != c);
1312
1313  return s_brmu(c, m);
1314}
1315
1316/* }}} */
1317
1318/* {{{ mp_int_invmod(a, m, c) */
1319
1320mp_result mp_int_invmod(mp_int a, mp_int m, mp_int c)
1321{
1322  mp_result res;
1323  mp_sign   sa;
1324  int       last = 0;
1325  mpz_t     temp[2];
1326
1327  CHECK(a != NULL && m != NULL && c != NULL);
1328
1329  if(CMPZ(a) == 0 || CMPZ(m) <= 0)
1330    return MP_RANGE;
1331
1332  sa = MP_SIGN(a); /* need this for the result later */
1333
1334  for(last = 0; last < 2; ++last)
1335    mp_int_init(TEMP(last));
1336
1337  if((res = mp_int_egcd(a, m, TEMP(0), TEMP(1), NULL)) != MP_OK)
1338    goto CLEANUP;
1339
1340  if(mp_int_compare_value(TEMP(0), 1) != 0) {
1341    res = MP_UNDEF;
1342    goto CLEANUP;
1343  }
1344
1345  /* It is first necessary to constrain the value to the proper range */
1346  if((res = mp_int_mod(TEMP(1), m, TEMP(1))) != MP_OK)
1347    goto CLEANUP;
1348
1349  /* Now, if 'a' was originally negative, the value we have is
1350     actually the magnitude of the negative representative; to get the
1351     positive value we have to subtract from the modulus.  Otherwise,
1352     the value is okay as it stands.
1353   */
1354  if(sa == MP_NEG)
1355    res = mp_int_sub(m, TEMP(1), c);
1356  else
1357    res = mp_int_copy(TEMP(1), c);
1358
1359 CLEANUP:
1360  while(--last >= 0)
1361    mp_int_clear(TEMP(last));
1362
1363  return res;
1364}
1365
1366/* }}} */
1367
1368/* {{{ mp_int_gcd(a, b, c) */
1369
1370/* Binary GCD algorithm due to Josef Stein, 1961 */
1371mp_result mp_int_gcd(mp_int a, mp_int b, mp_int c)
1372{
1373  int       ca, cb, k = 0;
1374  mpz_t     u, v, t;
1375  mp_result res;
1376
1377  CHECK(a != NULL && b != NULL && c != NULL);
1378
1379  ca = CMPZ(a);
1380  cb = CMPZ(b);
1381  if(ca == 0 && cb == 0)
1382    return MP_UNDEF;
1383  else if(ca == 0)
1384    return mp_int_abs(b, c);
1385  else if(cb == 0)
1386    return mp_int_abs(a, c);
1387
1388  mp_int_init(&t);
1389  if((res = mp_int_init_copy(&u, a)) != MP_OK)
1390    goto U;
1391  if((res = mp_int_init_copy(&v, b)) != MP_OK)
1392    goto V;
1393
1394  MP_SIGN(&u) = MP_ZPOS; MP_SIGN(&v) = MP_ZPOS;
1395
1396  { /* Divide out common factors of 2 from u and v */
1397    int div2_u = s_dp2k(&u), div2_v = s_dp2k(&v);
1398
1399    k = MIN(div2_u, div2_v);
1400    s_qdiv(&u, (mp_size) k);
1401    s_qdiv(&v, (mp_size) k);
1402  }
1403
1404  if(mp_int_is_odd(&u)) {
1405    if((res = mp_int_neg(&v, &t)) != MP_OK)
1406      goto CLEANUP;
1407  }
1408  else {
1409    if((res = mp_int_copy(&u, &t)) != MP_OK)
1410      goto CLEANUP;
1411  }
1412
1413  for(;;) {
1414    s_qdiv(&t, s_dp2k(&t));
1415
1416    if(CMPZ(&t) > 0) {
1417      if((res = mp_int_copy(&t, &u)) != MP_OK)
1418	goto CLEANUP;
1419    }
1420    else {
1421      if((res = mp_int_neg(&t, &v)) != MP_OK)
1422	goto CLEANUP;
1423    }
1424
1425    if((res = mp_int_sub(&u, &v, &t)) != MP_OK)
1426      goto CLEANUP;
1427
1428    if(CMPZ(&t) == 0)
1429      break;
1430  }
1431
1432  if((res = mp_int_abs(&u, c)) != MP_OK)
1433    goto CLEANUP;
1434  if(!s_qmul(c, (mp_size) k))
1435    res = MP_MEMORY;
1436
1437 CLEANUP:
1438  mp_int_clear(&v);
1439 V: mp_int_clear(&u);
1440 U: mp_int_clear(&t);
1441
1442  return res;
1443}
1444
1445/* }}} */
1446
1447/* {{{ mp_int_egcd(a, b, c, x, y) */
1448
1449/* This is the binary GCD algorithm again, but this time we keep track
1450   of the elementary matrix operations as we go, so we can get values
1451   x and y satisfying c = ax + by.
1452 */
1453mp_result mp_int_egcd(mp_int a, mp_int b, mp_int c,
1454		      mp_int x, mp_int y)
1455{
1456  int       k, last = 0, ca, cb;
1457  mpz_t     temp[8];
1458  mp_result res;
1459
1460  CHECK(a != NULL && b != NULL && c != NULL &&
1461	(x != NULL || y != NULL));
1462
1463  ca = CMPZ(a);
1464  cb = CMPZ(b);
1465  if(ca == 0 && cb == 0)
1466    return MP_UNDEF;
1467  else if(ca == 0) {
1468    if((res = mp_int_abs(b, c)) != MP_OK) return res;
1469    mp_int_zero(x); (void) mp_int_set_value(y, 1); return MP_OK;
1470  }
1471  else if(cb == 0) {
1472    if((res = mp_int_abs(a, c)) != MP_OK) return res;
1473    (void) mp_int_set_value(x, 1); mp_int_zero(y); return MP_OK;
1474  }
1475
1476  /* Initialize temporaries:
1477     A:0, B:1, C:2, D:3, u:4, v:5, ou:6, ov:7 */
1478  for(last = 0; last < 4; ++last)
1479    mp_int_init(TEMP(last));
1480  TEMP(0)->digits[0] = 1;
1481  TEMP(3)->digits[0] = 1;
1482
1483  SETUP(mp_int_init_copy(TEMP(4), a), last);
1484  SETUP(mp_int_init_copy(TEMP(5), b), last);
1485
1486  /* We will work with absolute values here */
1487  MP_SIGN(TEMP(4)) = MP_ZPOS;
1488  MP_SIGN(TEMP(5)) = MP_ZPOS;
1489
1490  { /* Divide out common factors of 2 from u and v */
1491    int  div2_u = s_dp2k(TEMP(4)), div2_v = s_dp2k(TEMP(5));
1492
1493    k = MIN(div2_u, div2_v);
1494    s_qdiv(TEMP(4), k);
1495    s_qdiv(TEMP(5), k);
1496  }
1497
1498  SETUP(mp_int_init_copy(TEMP(6), TEMP(4)), last);
1499  SETUP(mp_int_init_copy(TEMP(7), TEMP(5)), last);
1500
1501  for(;;) {
1502    while(mp_int_is_even(TEMP(4))) {
1503      s_qdiv(TEMP(4), 1);
1504
1505      if(mp_int_is_odd(TEMP(0)) || mp_int_is_odd(TEMP(1))) {
1506	if((res = mp_int_add(TEMP(0), TEMP(7), TEMP(0))) != MP_OK)
1507	  goto CLEANUP;
1508	if((res = mp_int_sub(TEMP(1), TEMP(6), TEMP(1))) != MP_OK)
1509	  goto CLEANUP;
1510      }
1511
1512      s_qdiv(TEMP(0), 1);
1513      s_qdiv(TEMP(1), 1);
1514    }
1515
1516    while(mp_int_is_even(TEMP(5))) {
1517      s_qdiv(TEMP(5), 1);
1518
1519      if(mp_int_is_odd(TEMP(2)) || mp_int_is_odd(TEMP(3))) {
1520	if((res = mp_int_add(TEMP(2), TEMP(7), TEMP(2))) != MP_OK)
1521	  goto CLEANUP;
1522	if((res = mp_int_sub(TEMP(3), TEMP(6), TEMP(3))) != MP_OK)
1523	  goto CLEANUP;
1524      }
1525
1526      s_qdiv(TEMP(2), 1);
1527      s_qdiv(TEMP(3), 1);
1528    }
1529
1530    if(mp_int_compare(TEMP(4), TEMP(5)) >= 0) {
1531      if((res = mp_int_sub(TEMP(4), TEMP(5), TEMP(4))) != MP_OK) goto CLEANUP;
1532      if((res = mp_int_sub(TEMP(0), TEMP(2), TEMP(0))) != MP_OK) goto CLEANUP;
1533      if((res = mp_int_sub(TEMP(1), TEMP(3), TEMP(1))) != MP_OK) goto CLEANUP;
1534    }
1535    else {
1536      if((res = mp_int_sub(TEMP(5), TEMP(4), TEMP(5))) != MP_OK) goto CLEANUP;
1537      if((res = mp_int_sub(TEMP(2), TEMP(0), TEMP(2))) != MP_OK) goto CLEANUP;
1538      if((res = mp_int_sub(TEMP(3), TEMP(1), TEMP(3))) != MP_OK) goto CLEANUP;
1539    }
1540
1541    if(CMPZ(TEMP(4)) == 0) {
1542      if(x && (res = mp_int_copy(TEMP(2), x)) != MP_OK) goto CLEANUP;
1543      if(y && (res = mp_int_copy(TEMP(3), y)) != MP_OK) goto CLEANUP;
1544      if(c) {
1545	if(!s_qmul(TEMP(5), k)) {
1546	  res = MP_MEMORY;
1547	  goto CLEANUP;
1548	}
1549
1550	res = mp_int_copy(TEMP(5), c);
1551      }
1552
1553      break;
1554    }
1555  }
1556
1557 CLEANUP:
1558  while(--last >= 0)
1559    mp_int_clear(TEMP(last));
1560
1561  return res;
1562}
1563
1564/* }}} */
1565
1566/* {{{ mp_int_lcm(a, b, c) */
1567
1568mp_result mp_int_lcm(mp_int a, mp_int b, mp_int c)
1569{
1570  mpz_t     lcm;
1571  mp_result res;
1572
1573  CHECK(a != NULL && b != NULL && c != NULL);
1574
1575  /* Since a * b = gcd(a, b) * lcm(a, b), we can compute
1576     lcm(a, b) = (a / gcd(a, b)) * b.
1577
1578     This formulation insures everything works even if the input
1579     variables share space.
1580   */
1581  if((res = mp_int_init(&lcm)) != MP_OK)
1582    return res;
1583  if((res = mp_int_gcd(a, b, &lcm)) != MP_OK)
1584    goto CLEANUP;
1585  if((res = mp_int_div(a, &lcm, &lcm, NULL)) != MP_OK)
1586    goto CLEANUP;
1587  if((res = mp_int_mul(&lcm, b, &lcm)) != MP_OK)
1588    goto CLEANUP;
1589
1590  res = mp_int_copy(&lcm, c);
1591
1592  CLEANUP:
1593    mp_int_clear(&lcm);
1594
1595  return res;
1596}
1597
1598/* }}} */
1599
1600/* {{{ mp_int_divisible_value(a, v) */
1601
1602int       mp_int_divisible_value(mp_int a, mp_small v)
1603{
1604  mp_small rem = 0;
1605
1606  if(mp_int_div_value(a, v, NULL, &rem) != MP_OK)
1607    return 0;
1608
1609  return rem == 0;
1610}
1611
1612/* }}} */
1613
1614/* {{{ mp_int_is_pow2(z) */
1615
1616int       mp_int_is_pow2(mp_int z)
1617{
1618  CHECK(z != NULL);
1619
1620  return s_isp2(z);
1621}
1622
1623/* }}} */
1624
1625/* {{{ mp_int_root(a, b, c) */
1626
1627/* Implementation of Newton's root finding method, based loosely on a
1628   patch contributed by Hal Finkel <half@halssoftware.com>
1629   modified by M. J. Fromberger.
1630 */
1631mp_result mp_int_root(mp_int a, mp_small b, mp_int c)
1632{
1633  mp_result  res = MP_OK;
1634  mpz_t      temp[5];
1635  int        last = 0;
1636  int        flips = 0;
1637
1638  CHECK(a != NULL && c != NULL && b > 0);
1639
1640  if(b == 1) {
1641    return mp_int_copy(a, c);
1642  }
1643  if(MP_SIGN(a) == MP_NEG) {
1644    if(b % 2 == 0)
1645      return MP_UNDEF; /* root does not exist for negative a with even b */
1646    else
1647      flips = 1;
1648  }
1649
1650  SETUP(mp_int_init_copy(TEMP(last), a), last);
1651  SETUP(mp_int_init_copy(TEMP(last), a), last);
1652  SETUP(mp_int_init(TEMP(last)), last);
1653  SETUP(mp_int_init(TEMP(last)), last);
1654  SETUP(mp_int_init(TEMP(last)), last);
1655
1656  (void) mp_int_abs(TEMP(0), TEMP(0));
1657  (void) mp_int_abs(TEMP(1), TEMP(1));
1658
1659  for(;;) {
1660    if((res = mp_int_expt(TEMP(1), b, TEMP(2))) != MP_OK)
1661      goto CLEANUP;
1662
1663    if(mp_int_compare_unsigned(TEMP(2), TEMP(0)) <= 0)
1664      break;
1665
1666    if((res = mp_int_sub(TEMP(2), TEMP(0), TEMP(2))) != MP_OK)
1667      goto CLEANUP;
1668    if((res = mp_int_expt(TEMP(1), b - 1, TEMP(3))) != MP_OK)
1669      goto CLEANUP;
1670    if((res = mp_int_mul_value(TEMP(3), b, TEMP(3))) != MP_OK)
1671      goto CLEANUP;
1672    if((res = mp_int_div(TEMP(2), TEMP(3), TEMP(4), NULL)) != MP_OK)
1673      goto CLEANUP;
1674    if((res = mp_int_sub(TEMP(1), TEMP(4), TEMP(4))) != MP_OK)
1675      goto CLEANUP;
1676
1677    if(mp_int_compare_unsigned(TEMP(1), TEMP(4)) == 0) {
1678      if((res = mp_int_sub_value(TEMP(4), 1, TEMP(4))) != MP_OK)
1679	goto CLEANUP;
1680    }
1681    if((res = mp_int_copy(TEMP(4), TEMP(1))) != MP_OK)
1682      goto CLEANUP;
1683  }
1684
1685  if((res = mp_int_copy(TEMP(1), c)) != MP_OK)
1686    goto CLEANUP;
1687
1688  /* If the original value of a was negative, flip the output sign. */
1689  if(flips)
1690    (void) mp_int_neg(c, c); /* cannot fail */
1691
1692 CLEANUP:
1693  while(--last >= 0)
1694    mp_int_clear(TEMP(last));
1695
1696  return res;
1697}
1698
1699/* }}} */
1700
1701/* {{{ mp_int_to_int(z, out) */
1702
1703mp_result mp_int_to_int(mp_int z, mp_small *out)
1704{
1705  mp_usmall uv = 0;
1706  mp_size   uz;
1707  mp_digit *dz;
1708  mp_sign   sz;
1709
1710  CHECK(z != NULL);
1711
1712  /* Make sure the value is representable as an int */
1713  sz = MP_SIGN(z);
1714  if((sz == MP_ZPOS && mp_int_compare_value(z, MP_SMALL_MAX) > 0) ||
1715     mp_int_compare_value(z, MP_SMALL_MIN) < 0)
1716    return MP_RANGE;
1717
1718  uz = MP_USED(z);
1719  dz = MP_DIGITS(z) + uz - 1;
1720
1721  while(uz > 0) {
1722    uv <<= MP_DIGIT_BIT/2;
1723    uv = (uv << (MP_DIGIT_BIT/2)) | *dz--;
1724    --uz;
1725  }
1726
1727  if(out)
1728    *out = (sz == MP_NEG) ? -(mp_small)uv : (mp_small)uv;
1729
1730  return MP_OK;
1731}
1732
1733/* }}} */
1734
1735/* {{{ mp_int_to_uint(z, *out) */
1736
1737mp_result mp_int_to_uint(mp_int z, mp_usmall *out)
1738{
1739  mp_usmall uv = 0;
1740  mp_size   uz;
1741  mp_digit *dz;
1742  mp_sign   sz;
1743
1744  CHECK(z != NULL);
1745
1746  /* Make sure the value is representable as an int */
1747  sz = MP_SIGN(z);
1748  if(!(sz == MP_ZPOS && mp_int_compare_value(z, UINT_MAX) <= 0))
1749    return MP_RANGE;
1750
1751  uz = MP_USED(z);
1752  dz = MP_DIGITS(z) + uz - 1;
1753
1754  while(uz > 0) {
1755    uv <<= MP_DIGIT_BIT/2;
1756    uv = (uv << (MP_DIGIT_BIT/2)) | *dz--;
1757    --uz;
1758  }
1759
1760  if(out)
1761    *out = uv;
1762
1763  return MP_OK;
1764}
1765
1766/* }}} */
1767
1768/* {{{ mp_int_to_string(z, radix, str, limit) */
1769
1770mp_result mp_int_to_string(mp_int z, mp_size radix,
1771			   char *str, int limit)
1772{
1773  mp_result res;
1774  int       cmp = 0;
1775
1776  CHECK(z != NULL && str != NULL && limit >= 2);
1777
1778  if(radix < MP_MIN_RADIX || radix > MP_MAX_RADIX)
1779    return MP_RANGE;
1780
1781  if(CMPZ(z) == 0) {
1782    *str++ = s_val2ch(0, 1);
1783  }
1784  else {
1785    mpz_t tmp;
1786    char  *h, *t;
1787
1788    if((res = mp_int_init_copy(&tmp, z)) != MP_OK)
1789      return res;
1790
1791    if(MP_SIGN(z) == MP_NEG) {
1792      *str++ = '-';
1793      --limit;
1794    }
1795    h = str;
1796
1797    /* Generate digits in reverse order until finished or limit reached */
1798    for(/* */; limit > 0; --limit) {
1799      mp_digit d;
1800
1801      if((cmp = CMPZ(&tmp)) == 0)
1802	break;
1803
1804      d = s_ddiv(&tmp, (mp_digit)radix);
1805      *str++ = s_val2ch(d, 1);
1806    }
1807    t = str - 1;
1808
1809    /* Put digits back in correct output order */
1810    while(h < t) {
1811      char tc = *h;
1812      *h++ = *t;
1813      *t-- = tc;
1814    }
1815
1816    mp_int_clear(&tmp);
1817  }
1818
1819  *str = '\0';
1820  if(cmp == 0)
1821    return MP_OK;
1822  else
1823    return MP_TRUNC;
1824}
1825
1826/* }}} */
1827
1828/* {{{ mp_int_string_len(z, radix) */
1829
1830mp_result mp_int_string_len(mp_int z, mp_size radix)
1831{
1832  int  len;
1833
1834  CHECK(z != NULL);
1835
1836  if(radix < MP_MIN_RADIX || radix > MP_MAX_RADIX)
1837    return MP_RANGE;
1838
1839  len = s_outlen(z, radix) + 1; /* for terminator */
1840
1841  /* Allow for sign marker on negatives */
1842  if(MP_SIGN(z) == MP_NEG)
1843    len += 1;
1844
1845  return len;
1846}
1847
1848/* }}} */
1849
1850/* {{{ mp_int_read_string(z, radix, *str) */
1851
1852/* Read zero-terminated string into z */
1853mp_result mp_int_read_string(mp_int z, mp_size radix, const char *str)
1854{
1855  return mp_int_read_cstring(z, radix, str, NULL);
1856
1857}
1858
1859/* }}} */
1860
1861/* {{{ mp_int_read_cstring(z, radix, *str, **end) */
1862
1863mp_result mp_int_read_cstring(mp_int z, mp_size radix, const char *str, char **end)
1864{
1865  int       ch;
1866
1867  CHECK(z != NULL && str != NULL);
1868
1869  if(radix < MP_MIN_RADIX || radix > MP_MAX_RADIX)
1870    return MP_RANGE;
1871
1872  /* Skip leading whitespace */
1873  while(isspace((int)*str))
1874    ++str;
1875
1876  /* Handle leading sign tag (+/-, positive default) */
1877  switch(*str) {
1878  case '-':
1879    MP_SIGN(z) = MP_NEG;
1880    ++str;
1881    break;
1882  case '+':
1883    ++str; /* fallthrough */
1884  default:
1885    MP_SIGN(z) = MP_ZPOS;
1886    break;
1887  }
1888
1889  /* Skip leading zeroes */
1890  while((ch = s_ch2val(*str, radix)) == 0)
1891    ++str;
1892
1893  /* Make sure there is enough space for the value */
1894  if(!s_pad(z, s_inlen(strlen(str), radix)))
1895    return MP_MEMORY;
1896
1897  MP_USED(z) = 1; z->digits[0] = 0;
1898
1899  while(*str != '\0' && ((ch = s_ch2val(*str, radix)) >= 0)) {
1900    s_dmul(z, (mp_digit)radix);
1901    s_dadd(z, (mp_digit)ch);
1902    ++str;
1903  }
1904
1905  CLAMP(z);
1906
1907  /* Override sign for zero, even if negative specified. */
1908  if(CMPZ(z) == 0)
1909    MP_SIGN(z) = MP_ZPOS;
1910
1911  if(end != NULL)
1912    *end = (char *)str;
1913
1914  /* Return a truncation error if the string has unprocessed
1915     characters remaining, so the caller can tell if the whole string
1916     was done */
1917  if(*str != '\0')
1918    return MP_TRUNC;
1919  else
1920    return MP_OK;
1921}
1922
1923/* }}} */
1924
1925/* {{{ mp_int_count_bits(z) */
1926
1927mp_result mp_int_count_bits(mp_int z)
1928{
1929  mp_size  nbits = 0, uz;
1930  mp_digit d;
1931
1932  CHECK(z != NULL);
1933
1934  uz = MP_USED(z);
1935  if(uz == 1 && z->digits[0] == 0)
1936    return 1;
1937
1938  --uz;
1939  nbits = uz * MP_DIGIT_BIT;
1940  d = z->digits[uz];
1941
1942  while(d != 0) {
1943    d >>= 1;
1944    ++nbits;
1945  }
1946
1947  return nbits;
1948}
1949
1950/* }}} */
1951
1952/* {{{ mp_int_to_binary(z, buf, limit) */
1953
1954mp_result mp_int_to_binary(mp_int z, unsigned char *buf, int limit)
1955{
1956  static const int PAD_FOR_2C = 1;
1957
1958  mp_result res;
1959  int       limpos = limit;
1960
1961  CHECK(z != NULL && buf != NULL);
1962
1963  res = s_tobin(z, buf, &limpos, PAD_FOR_2C);
1964
1965  if(MP_SIGN(z) == MP_NEG)
1966    s_2comp(buf, limpos);
1967
1968  return res;
1969}
1970
1971/* }}} */
1972
1973/* {{{ mp_int_read_binary(z, buf, len) */
1974
1975mp_result mp_int_read_binary(mp_int z, unsigned char *buf, int len)
1976{
1977  mp_size need, i;
1978  unsigned char *tmp;
1979  mp_digit *dz;
1980
1981  CHECK(z != NULL && buf != NULL && len > 0);
1982
1983  /* Figure out how many digits are needed to represent this value */
1984  need = ((len * CHAR_BIT) + (MP_DIGIT_BIT - 1)) / MP_DIGIT_BIT;
1985  if(!s_pad(z, need))
1986    return MP_MEMORY;
1987
1988  mp_int_zero(z);
1989
1990  /* If the high-order bit is set, take the 2's complement before
1991     reading the value (it will be restored afterward) */
1992  if(buf[0] >> (CHAR_BIT - 1)) {
1993    MP_SIGN(z) = MP_NEG;
1994    s_2comp(buf, len);
1995  }
1996
1997  dz = MP_DIGITS(z);
1998  for(tmp = buf, i = len; i > 0; --i, ++tmp) {
1999    s_qmul(z, (mp_size) CHAR_BIT);
2000    *dz |= *tmp;
2001  }
2002
2003  /* Restore 2's complement if we took it before */
2004  if(MP_SIGN(z) == MP_NEG)
2005    s_2comp(buf, len);
2006
2007  return MP_OK;
2008}
2009
2010/* }}} */
2011
2012/* {{{ mp_int_binary_len(z) */
2013
2014mp_result mp_int_binary_len(mp_int z)
2015{
2016  mp_result  res = mp_int_count_bits(z);
2017  int        bytes = mp_int_unsigned_len(z);
2018
2019  if(res <= 0)
2020    return res;
2021
2022  bytes = (res + (CHAR_BIT - 1)) / CHAR_BIT;
2023
2024  /* If the highest-order bit falls exactly on a byte boundary, we
2025     need to pad with an extra byte so that the sign will be read
2026     correctly when reading it back in. */
2027  if(bytes * CHAR_BIT == res)
2028    ++bytes;
2029
2030  return bytes;
2031}
2032
2033/* }}} */
2034
2035/* {{{ mp_int_to_unsigned(z, buf, limit) */
2036
2037mp_result mp_int_to_unsigned(mp_int z, unsigned char *buf, int limit)
2038{
2039  static const int NO_PADDING = 0;
2040
2041  CHECK(z != NULL && buf != NULL);
2042
2043  return s_tobin(z, buf, &limit, NO_PADDING);
2044}
2045
2046/* }}} */
2047
2048/* {{{ mp_int_read_unsigned(z, buf, len) */
2049
2050mp_result mp_int_read_unsigned(mp_int z, unsigned char *buf, int len)
2051{
2052  mp_size need, i;
2053  unsigned char *tmp;
2054  mp_digit *dz;
2055
2056  CHECK(z != NULL && buf != NULL && len > 0);
2057
2058  /* Figure out how many digits are needed to represent this value */
2059  need = ((len * CHAR_BIT) + (MP_DIGIT_BIT - 1)) / MP_DIGIT_BIT;
2060  if(!s_pad(z, need))
2061    return MP_MEMORY;
2062
2063  mp_int_zero(z);
2064
2065  dz = MP_DIGITS(z);
2066  for(tmp = buf, i = len; i > 0; --i, ++tmp) {
2067    (void) s_qmul(z, CHAR_BIT);
2068    *dz |= *tmp;
2069  }
2070
2071  return MP_OK;
2072}
2073
2074/* }}} */
2075
2076/* {{{ mp_int_unsigned_len(z) */
2077
2078mp_result mp_int_unsigned_len(mp_int z)
2079{
2080  mp_result  res = mp_int_count_bits(z);
2081  int        bytes;
2082
2083  if(res <= 0)
2084    return res;
2085
2086  bytes = (res + (CHAR_BIT - 1)) / CHAR_BIT;
2087
2088  return bytes;
2089}
2090
2091/* }}} */
2092
2093/* {{{ mp_error_string(res) */
2094
2095const char *mp_error_string(mp_result res)
2096{
2097  int ix;
2098  if(res > 0)
2099    return s_unknown_err;
2100
2101  res = -res;
2102  for(ix = 0; ix < res && s_error_msg[ix] != NULL; ++ix)
2103    ;
2104
2105  if(s_error_msg[ix] != NULL)
2106    return s_error_msg[ix];
2107  else
2108    return s_unknown_err;
2109}
2110
2111/* }}} */
2112
2113/*------------------------------------------------------------------------*/
2114/* Private functions for internal use.  These make assumptions.           */
2115
2116/* {{{ s_alloc(num) */
2117
2118static mp_digit *s_alloc(mp_size num)
2119{
2120  mp_digit *out = malloc(num * sizeof(mp_digit));
2121
2122  assert(out != NULL); /* for debugging */
2123#if DEBUG > 1
2124  {
2125    mp_digit v = (mp_digit) 0xdeadbeef;
2126    int      ix;
2127
2128    for(ix = 0; ix < num; ++ix)
2129      out[ix] = v;
2130  }
2131#endif
2132
2133  return out;
2134}
2135
2136/* }}} */
2137
2138/* {{{ s_realloc(old, osize, nsize) */
2139
2140static mp_digit *s_realloc(mp_digit *old, mp_size osize, mp_size nsize)
2141{
2142#if DEBUG > 1
2143  mp_digit *new = s_alloc(nsize);
2144  int       ix;
2145
2146  for(ix = 0; ix < nsize; ++ix)
2147    new[ix] = (mp_digit) 0xdeadbeef;
2148
2149  memcpy(new, old, osize * sizeof(mp_digit));
2150#else
2151  mp_digit *new = realloc(old, nsize * sizeof(mp_digit));
2152
2153  assert(new != NULL); /* for debugging */
2154#endif
2155  return new;
2156}
2157
2158/* }}} */
2159
2160/* {{{ s_free(ptr) */
2161
2162static void s_free(void *ptr)
2163{
2164  free(ptr);
2165}
2166
2167/* }}} */
2168
2169/* {{{ s_pad(z, min) */
2170
2171static int      s_pad(mp_int z, mp_size min)
2172{
2173  if(MP_ALLOC(z) < min) {
2174    mp_size nsize = ROUND_PREC(min);
2175    mp_digit *tmp;
2176
2177    if((void *)z->digits == (void *)z) {
2178      if((tmp = s_alloc(nsize)) == NULL)
2179        return 0;
2180
2181      COPY(MP_DIGITS(z), tmp, MP_USED(z));
2182    }
2183    else if((tmp = s_realloc(MP_DIGITS(z), MP_ALLOC(z), nsize)) == NULL)
2184      return 0;
2185
2186    MP_DIGITS(z) = tmp;
2187    MP_ALLOC(z) = nsize;
2188  }
2189
2190  return 1;
2191}
2192
2193/* }}} */
2194
2195/* {{{ s_fake(z, value, vbuf) */
2196
2197static void      s_fake(mp_int z, mp_small value, mp_digit vbuf[])
2198{
2199  mp_size uv = (mp_size) s_vpack(value, vbuf);
2200
2201  z->used = uv;
2202  z->alloc = MP_VALUE_DIGITS(value);
2203  z->sign = (value < 0) ? MP_NEG : MP_ZPOS;
2204  z->digits = vbuf;
2205}
2206
2207/* }}} */
2208
2209/* {{{ s_cdig(da, db, len) */
2210
2211static int      s_cdig(mp_digit *da, mp_digit *db, mp_size len)
2212{
2213  mp_digit *dat = da + len - 1, *dbt = db + len - 1;
2214
2215  for(/* */; len != 0; --len, --dat, --dbt) {
2216    if(*dat > *dbt)
2217      return 1;
2218    else if(*dat < *dbt)
2219      return -1;
2220  }
2221
2222  return 0;
2223}
2224
2225/* }}} */
2226
2227/* {{{ s_vpack(v, t[]) */
2228
2229static int       s_vpack(mp_small v, mp_digit t[])
2230{
2231  mp_usmall    uv = (mp_usmall) ((v < 0) ? -v : v);
2232  int          ndig = 0;
2233
2234  if(uv == 0)
2235    t[ndig++] = 0;
2236  else {
2237    while(uv != 0) {
2238      t[ndig++] = (mp_digit) uv;
2239      uv >>= MP_DIGIT_BIT/2;
2240      uv >>= MP_DIGIT_BIT/2;
2241    }
2242  }
2243
2244  return ndig;
2245}
2246
2247/* }}} */
2248
2249/* {{{ s_ucmp(a, b) */
2250
2251static int      s_ucmp(mp_int a, mp_int b)
2252{
2253  mp_size  ua = MP_USED(a), ub = MP_USED(b);
2254
2255  if(ua > ub)
2256    return 1;
2257  else if(ub > ua)
2258    return -1;
2259  else
2260    return s_cdig(MP_DIGITS(a), MP_DIGITS(b), ua);
2261}
2262
2263/* }}} */
2264
2265/* {{{ s_vcmp(a, v) */
2266
2267static int      s_vcmp(mp_int a, mp_small v)
2268{
2269  mp_digit     vdig[MP_VALUE_DIGITS(v)];
2270  int          ndig = 0;
2271  mp_size      ua = MP_USED(a);
2272
2273  ndig = s_vpack(v, vdig);
2274
2275  if(ua > ndig)
2276    return 1;
2277  else if(ua < ndig)
2278    return -1;
2279  else
2280    return s_cdig(MP_DIGITS(a), vdig, ndig);
2281}
2282
2283/* }}} */
2284
2285/* {{{ s_uadd(da, db, dc, size_a, size_b) */
2286
2287static mp_digit s_uadd(mp_digit *da, mp_digit *db, mp_digit *dc,
2288		       mp_size size_a, mp_size size_b)
2289{
2290  mp_size pos;
2291  mp_word w = 0;
2292
2293  /* Insure that da is the longer of the two to simplify later code */
2294  if(size_b > size_a) {
2295    SWAP(mp_digit *, da, db);
2296    SWAP(mp_size, size_a, size_b);
2297  }
2298
2299  /* Add corresponding digits until the shorter number runs out */
2300  for(pos = 0; pos < size_b; ++pos, ++da, ++db, ++dc) {
2301    w = w + (mp_word) *da + (mp_word) *db;
2302    *dc = LOWER_HALF(w);
2303    w = UPPER_HALF(w);
2304  }
2305
2306  /* Propagate carries as far as necessary */
2307  for(/* */; pos < size_a; ++pos, ++da, ++dc) {
2308    w = w + *da;
2309
2310    *dc = LOWER_HALF(w);
2311    w = UPPER_HALF(w);
2312  }
2313
2314  /* Return carry out */
2315  return (mp_digit)w;
2316}
2317
2318/* }}} */
2319
2320/* {{{ s_usub(da, db, dc, size_a, size_b) */
2321
2322static void     s_usub(mp_digit *da, mp_digit *db, mp_digit *dc,
2323		       mp_size size_a, mp_size size_b)
2324{
2325  mp_size pos;
2326  mp_word w = 0;
2327
2328  /* We assume that |a| >= |b| so this should definitely hold */
2329  assert(size_a >= size_b);
2330
2331  /* Subtract corresponding digits and propagate borrow */
2332  for(pos = 0; pos < size_b; ++pos, ++da, ++db, ++dc) {
2333    w = ((mp_word)MP_DIGIT_MAX + 1 +  /* MP_RADIX */
2334	 (mp_word)*da) - w - (mp_word)*db;
2335
2336    *dc = LOWER_HALF(w);
2337    w = (UPPER_HALF(w) == 0);
2338  }
2339
2340  /* Finish the subtraction for remaining upper digits of da */
2341  for(/* */; pos < size_a; ++pos, ++da, ++dc) {
2342    w = ((mp_word)MP_DIGIT_MAX + 1 +  /* MP_RADIX */
2343	 (mp_word)*da) - w;
2344
2345    *dc = LOWER_HALF(w);
2346    w = (UPPER_HALF(w) == 0);
2347  }
2348
2349  /* If there is a borrow out at the end, it violates the precondition */
2350  assert(w == 0);
2351}
2352
2353/* }}} */
2354
2355/* {{{ s_kmul(da, db, dc, size_a, size_b) */
2356
2357static int       s_kmul(mp_digit *da, mp_digit *db, mp_digit *dc,
2358			mp_size size_a, mp_size size_b)
2359{
2360  mp_size  bot_size;
2361
2362  /* Make sure b is the smaller of the two input values */
2363  if(size_b > size_a) {
2364    SWAP(mp_digit *, da, db);
2365    SWAP(mp_size, size_a, size_b);
2366  }
2367
2368  /* Insure that the bottom is the larger half in an odd-length split;
2369     the code below relies on this being true.
2370   */
2371  bot_size = (size_a + 1) / 2;
2372
2373  /* If the values are big enough to bother with recursion, use the
2374     Karatsuba algorithm to compute the product; otherwise use the
2375     normal multiplication algorithm
2376   */
2377  if(multiply_threshold &&
2378     size_a >= multiply_threshold &&
2379     size_b > bot_size) {
2380
2381    mp_digit *t1, *t2, *t3, carry;
2382
2383    mp_digit *a_top = da + bot_size;
2384    mp_digit *b_top = db + bot_size;
2385
2386    mp_size  at_size = size_a - bot_size;
2387    mp_size  bt_size = size_b - bot_size;
2388    mp_size  buf_size = 2 * bot_size;
2389
2390    /* Do a single allocation for all three temporary buffers needed;
2391       each buffer must be big enough to hold the product of two
2392       bottom halves, and one buffer needs space for the completed
2393       product; twice the space is plenty.
2394     */
2395    if((t1 = s_alloc(4 * buf_size)) == NULL) return 0;
2396    t2 = t1 + buf_size;
2397    t3 = t2 + buf_size;
2398    ZERO(t1, 4 * buf_size);
2399
2400    /* t1 and t2 are initially used as temporaries to compute the inner product
2401       (a1 + a0)(b1 + b0) = a1b1 + a1b0 + a0b1 + a0b0
2402     */
2403    carry = s_uadd(da, a_top, t1, bot_size, at_size);      /* t1 = a1 + a0 */
2404    t1[bot_size] = carry;
2405
2406    carry = s_uadd(db, b_top, t2, bot_size, bt_size);      /* t2 = b1 + b0 */
2407    t2[bot_size] = carry;
2408
2409    (void) s_kmul(t1, t2, t3, bot_size + 1, bot_size + 1); /* t3 = t1 * t2 */
2410
2411    /* Now we'll get t1 = a0b0 and t2 = a1b1, and subtract them out so that
2412       we're left with only the pieces we want:  t3 = a1b0 + a0b1
2413     */
2414    ZERO(t1, buf_size);
2415    ZERO(t2, buf_size);
2416    (void) s_kmul(da, db, t1, bot_size, bot_size);     /* t1 = a0 * b0 */
2417    (void) s_kmul(a_top, b_top, t2, at_size, bt_size); /* t2 = a1 * b1 */
2418
2419    /* Subtract out t1 and t2 to get the inner product */
2420    s_usub(t3, t1, t3, buf_size + 2, buf_size);
2421    s_usub(t3, t2, t3, buf_size + 2, buf_size);
2422
2423    /* Assemble the output value */
2424    COPY(t1, dc, buf_size);
2425    carry = s_uadd(t3, dc + bot_size, dc + bot_size,
2426		   buf_size + 1, buf_size);
2427    assert(carry == 0);
2428
2429    carry = s_uadd(t2, dc + 2*bot_size, dc + 2*bot_size,
2430		   buf_size, buf_size);
2431    assert(carry == 0);
2432
2433    s_free(t1); /* note t2 and t3 are just internal pointers to t1 */
2434  }
2435  else {
2436    s_umul(da, db, dc, size_a, size_b);
2437  }
2438
2439  return 1;
2440}
2441
2442/* }}} */
2443
2444/* {{{ s_umul(da, db, dc, size_a, size_b) */
2445
2446static void     s_umul(mp_digit *da, mp_digit *db, mp_digit *dc,
2447		       mp_size size_a, mp_size size_b)
2448{
2449  mp_size   a, b;
2450  mp_word   w;
2451
2452  for(a = 0; a < size_a; ++a, ++dc, ++da) {
2453    mp_digit *dct = dc;
2454    mp_digit *dbt = db;
2455
2456    if(*da == 0)
2457      continue;
2458
2459    w = 0;
2460    for(b = 0; b < size_b; ++b, ++dbt, ++dct) {
2461      w = (mp_word)*da * (mp_word)*dbt + w + (mp_word)*dct;
2462
2463      *dct = LOWER_HALF(w);
2464      w = UPPER_HALF(w);
2465    }
2466
2467    *dct = (mp_digit)w;
2468  }
2469}
2470
2471/* }}} */
2472
2473/* {{{ s_ksqr(da, dc, size_a) */
2474
2475static int       s_ksqr(mp_digit *da, mp_digit *dc, mp_size size_a)
2476{
2477  if(multiply_threshold && size_a > multiply_threshold) {
2478    mp_size    bot_size = (size_a + 1) / 2;
2479    mp_digit  *a_top = da + bot_size;
2480    mp_digit  *t1, *t2, *t3, carry;
2481    mp_size    at_size = size_a - bot_size;
2482    mp_size    buf_size = 2 * bot_size;
2483
2484    if((t1 = s_alloc(4 * buf_size)) == NULL) return 0;
2485    t2 = t1 + buf_size;
2486    t3 = t2 + buf_size;
2487    ZERO(t1, 4 * buf_size);
2488
2489    (void) s_ksqr(da, t1, bot_size);    /* t1 = a0 ^ 2 */
2490    (void) s_ksqr(a_top, t2, at_size);  /* t2 = a1 ^ 2 */
2491
2492    (void) s_kmul(da, a_top, t3, bot_size, at_size);  /* t3 = a0 * a1 */
2493
2494    /* Quick multiply t3 by 2, shifting left (can't overflow) */
2495    {
2496      int     i, top = bot_size + at_size;
2497      mp_word w, save = 0;
2498
2499      for(i = 0; i < top; ++i) {
2500	w = t3[i];
2501	w = (w << 1) | save;
2502	t3[i] = LOWER_HALF(w);
2503	save = UPPER_HALF(w);
2504      }
2505      t3[i] = LOWER_HALF(save);
2506    }
2507
2508    /* Assemble the output value */
2509    COPY(t1, dc, 2 * bot_size);
2510    carry = s_uadd(t3, dc + bot_size, dc + bot_size,
2511		   buf_size + 1, buf_size);
2512    assert(carry == 0);
2513
2514    carry = s_uadd(t2, dc + 2*bot_size, dc + 2*bot_size,
2515		   buf_size, buf_size);
2516    assert(carry == 0);
2517
2518    s_free(t1); /* note that t2 and t2 are internal pointers only */
2519
2520  }
2521  else {
2522    s_usqr(da, dc, size_a);
2523  }
2524
2525  return 1;
2526}
2527
2528/* }}} */
2529
2530/* {{{ s_usqr(da, dc, size_a) */
2531
2532static void      s_usqr(mp_digit *da, mp_digit *dc, mp_size size_a)
2533{
2534  mp_size  i, j;
2535  mp_word  w;
2536
2537  for(i = 0; i < size_a; ++i, dc += 2, ++da) {
2538    mp_digit  *dct = dc, *dat = da;
2539
2540    if(*da == 0)
2541      continue;
2542
2543    /* Take care of the first digit, no rollover */
2544    w = (mp_word)*dat * (mp_word)*dat + (mp_word)*dct;
2545    *dct = LOWER_HALF(w);
2546    w = UPPER_HALF(w);
2547    ++dat; ++dct;
2548
2549    for(j = i + 1; j < size_a; ++j, ++dat, ++dct) {
2550      mp_word  t = (mp_word)*da * (mp_word)*dat;
2551      mp_word  u = w + (mp_word)*dct, ov = 0;
2552
2553      /* Check if doubling t will overflow a word */
2554      if(HIGH_BIT_SET(t))
2555	ov = 1;
2556
2557      w = t + t;
2558
2559      /* Check if adding u to w will overflow a word */
2560      if(ADD_WILL_OVERFLOW(w, u))
2561	ov = 1;
2562
2563      w += u;
2564
2565      *dct = LOWER_HALF(w);
2566      w = UPPER_HALF(w);
2567      if(ov) {
2568	w += MP_DIGIT_MAX; /* MP_RADIX */
2569	++w;
2570      }
2571    }
2572
2573    w = w + *dct;
2574    *dct = (mp_digit)w;
2575    while((w = UPPER_HALF(w)) != 0) {
2576      ++dct; w = w + *dct;
2577      *dct = LOWER_HALF(w);
2578    }
2579
2580    assert(w == 0);
2581  }
2582}
2583
2584/* }}} */
2585
2586/* {{{ s_dadd(a, b) */
2587
2588static void      s_dadd(mp_int a, mp_digit b)
2589{
2590  mp_word   w = 0;
2591  mp_digit *da = MP_DIGITS(a);
2592  mp_size   ua = MP_USED(a);
2593
2594  w = (mp_word)*da + b;
2595  *da++ = LOWER_HALF(w);
2596  w = UPPER_HALF(w);
2597
2598  for(ua -= 1; ua > 0; --ua, ++da) {
2599    w = (mp_word)*da + w;
2600
2601    *da = LOWER_HALF(w);
2602    w = UPPER_HALF(w);
2603  }
2604
2605  if(w) {
2606    *da = (mp_digit)w;
2607    MP_USED(a) += 1;
2608  }
2609}
2610
2611/* }}} */
2612
2613/* {{{ s_dmul(a, b) */
2614
2615static void      s_dmul(mp_int a, mp_digit b)
2616{
2617  mp_word   w = 0;
2618  mp_digit *da = MP_DIGITS(a);
2619  mp_size   ua = MP_USED(a);
2620
2621  while(ua > 0) {
2622    w = (mp_word)*da * b + w;
2623    *da++ = LOWER_HALF(w);
2624    w = UPPER_HALF(w);
2625    --ua;
2626  }
2627
2628  if(w) {
2629    *da = (mp_digit)w;
2630    MP_USED(a) += 1;
2631  }
2632}
2633
2634/* }}} */
2635
2636/* {{{ s_dbmul(da, b, dc, size_a) */
2637
2638static void      s_dbmul(mp_digit *da, mp_digit b, mp_digit *dc, mp_size size_a)
2639{
2640  mp_word  w = 0;
2641
2642  while(size_a > 0) {
2643    w = (mp_word)*da++ * (mp_word)b + w;
2644
2645    *dc++ = LOWER_HALF(w);
2646    w = UPPER_HALF(w);
2647    --size_a;
2648  }
2649
2650  if(w)
2651    *dc = LOWER_HALF(w);
2652}
2653
2654/* }}} */
2655
2656/* {{{ s_ddiv(da, d, dc, size_a) */
2657
2658static mp_digit  s_ddiv(mp_int a, mp_digit b)
2659{
2660  mp_word   w = 0, qdigit;
2661  mp_size   ua = MP_USED(a);
2662  mp_digit *da = MP_DIGITS(a) + ua - 1;
2663
2664  for(/* */; ua > 0; --ua, --da) {
2665    w = (w << MP_DIGIT_BIT) | *da;
2666
2667    if(w >= b) {
2668      qdigit = w / b;
2669      w = w % b;
2670    }
2671    else {
2672      qdigit = 0;
2673    }
2674
2675    *da = (mp_digit)qdigit;
2676  }
2677
2678  CLAMP(a);
2679  return (mp_digit)w;
2680}
2681
2682/* }}} */
2683
2684/* {{{ s_qdiv(z, p2) */
2685
2686static void     s_qdiv(mp_int z, mp_size p2)
2687{
2688  mp_size ndig = p2 / MP_DIGIT_BIT, nbits = p2 % MP_DIGIT_BIT;
2689  mp_size uz = MP_USED(z);
2690
2691  if(ndig) {
2692    mp_size  mark;
2693    mp_digit *to, *from;
2694
2695    if(ndig >= uz) {
2696      mp_int_zero(z);
2697      return;
2698    }
2699
2700    to = MP_DIGITS(z); from = to + ndig;
2701
2702    for(mark = ndig; mark < uz; ++mark)
2703      *to++ = *from++;
2704
2705    MP_USED(z) = uz - ndig;
2706  }
2707
2708  if(nbits) {
2709    mp_digit d = 0, *dz, save;
2710    mp_size  up = MP_DIGIT_BIT - nbits;
2711
2712    uz = MP_USED(z);
2713    dz = MP_DIGITS(z) + uz - 1;
2714
2715    for(/* */; uz > 0; --uz, --dz) {
2716      save = *dz;
2717
2718      *dz = (*dz >> nbits) | (d << up);
2719      d = save;
2720    }
2721
2722    CLAMP(z);
2723  }
2724
2725  if(MP_USED(z) == 1 && z->digits[0] == 0)
2726    MP_SIGN(z) = MP_ZPOS;
2727}
2728
2729/* }}} */
2730
2731/* {{{ s_qmod(z, p2) */
2732
2733static void     s_qmod(mp_int z, mp_size p2)
2734{
2735  mp_size   start = p2 / MP_DIGIT_BIT + 1, rest = p2 % MP_DIGIT_BIT;
2736  mp_size   uz = MP_USED(z);
2737  mp_digit  mask = (1 << rest) - 1;
2738
2739  if(start <= uz) {
2740    MP_USED(z) = start;
2741    z->digits[start - 1] &= mask;
2742    CLAMP(z);
2743  }
2744}
2745
2746/* }}} */
2747
2748/* {{{ s_qmul(z, p2) */
2749
2750static int      s_qmul(mp_int z, mp_size p2)
2751{
2752  mp_size   uz, need, rest, extra, i;
2753  mp_digit *from, *to, d;
2754
2755  if(p2 == 0)
2756    return 1;
2757
2758  uz = MP_USED(z);
2759  need = p2 / MP_DIGIT_BIT; rest = p2 % MP_DIGIT_BIT;
2760
2761  /* Figure out if we need an extra digit at the top end; this occurs
2762     if the topmost `rest' bits of the high-order digit of z are not
2763     zero, meaning they will be shifted off the end if not preserved */
2764  extra = 0;
2765  if(rest != 0) {
2766    mp_digit *dz = MP_DIGITS(z) + uz - 1;
2767
2768    if((*dz >> (MP_DIGIT_BIT - rest)) != 0)
2769      extra = 1;
2770  }
2771
2772  if(!s_pad(z, uz + need + extra))
2773    return 0;
2774
2775  /* If we need to shift by whole digits, do that in one pass, then
2776     to back and shift by partial digits.
2777   */
2778  if(need > 0) {
2779    from = MP_DIGITS(z) + uz - 1;
2780    to = from + need;
2781
2782    for(i = 0; i < uz; ++i)
2783      *to-- = *from--;
2784
2785    ZERO(MP_DIGITS(z), need);
2786    uz += need;
2787  }
2788
2789  if(rest) {
2790    d = 0;
2791    for(i = need, from = MP_DIGITS(z) + need; i < uz; ++i, ++from) {
2792      mp_digit save = *from;
2793
2794      *from = (*from << rest) | (d >> (MP_DIGIT_BIT - rest));
2795      d = save;
2796    }
2797
2798    d >>= (MP_DIGIT_BIT - rest);
2799    if(d != 0) {
2800      *from = d;
2801      uz += extra;
2802    }
2803  }
2804
2805  MP_USED(z) = uz;
2806  CLAMP(z);
2807
2808  return 1;
2809}
2810
2811/* }}} */
2812
2813/* {{{ s_qsub(z, p2) */
2814
2815/* Compute z = 2^p2 - |z|; requires that 2^p2 >= |z|
2816   The sign of the result is always zero/positive.
2817 */
2818static int       s_qsub(mp_int z, mp_size p2)
2819{
2820  mp_digit hi = (1 << (p2 % MP_DIGIT_BIT)), *zp;
2821  mp_size  tdig = (p2 / MP_DIGIT_BIT), pos;
2822  mp_word  w = 0;
2823
2824  if(!s_pad(z, tdig + 1))
2825    return 0;
2826
2827  for(pos = 0, zp = MP_DIGITS(z); pos < tdig; ++pos, ++zp) {
2828    w = ((mp_word) MP_DIGIT_MAX + 1) - w - (mp_word)*zp;
2829
2830    *zp = LOWER_HALF(w);
2831    w = UPPER_HALF(w) ? 0 : 1;
2832  }
2833
2834  w = ((mp_word) MP_DIGIT_MAX + 1 + hi) - w - (mp_word)*zp;
2835  *zp = LOWER_HALF(w);
2836
2837  assert(UPPER_HALF(w) != 0); /* no borrow out should be possible */
2838
2839  MP_SIGN(z) = MP_ZPOS;
2840  CLAMP(z);
2841
2842  return 1;
2843}
2844
2845/* }}} */
2846
2847/* {{{ s_dp2k(z) */
2848
2849static int      s_dp2k(mp_int z)
2850{
2851  int       k = 0;
2852  mp_digit *dp = MP_DIGITS(z), d;
2853
2854  if(MP_USED(z) == 1 && *dp == 0)
2855    return 1;
2856
2857  while(*dp == 0) {
2858    k += MP_DIGIT_BIT;
2859    ++dp;
2860  }
2861
2862  d = *dp;
2863  while((d & 1) == 0) {
2864    d >>= 1;
2865    ++k;
2866  }
2867
2868  return k;
2869}
2870
2871/* }}} */
2872
2873/* {{{ s_isp2(z) */
2874
2875static int       s_isp2(mp_int z)
2876{
2877  mp_size uz = MP_USED(z), k = 0;
2878  mp_digit *dz = MP_DIGITS(z), d;
2879
2880  while(uz > 1) {
2881    if(*dz++ != 0)
2882      return -1;
2883    k += MP_DIGIT_BIT;
2884    --uz;
2885  }
2886
2887  d = *dz;
2888  while(d > 1) {
2889    if(d & 1)
2890      return -1;
2891    ++k; d >>= 1;
2892  }
2893
2894  return (int) k;
2895}
2896
2897/* }}} */
2898
2899/* {{{ s_2expt(z, k) */
2900
2901static int       s_2expt(mp_int z, mp_small k)
2902{
2903  mp_size  ndig, rest;
2904  mp_digit *dz;
2905
2906  ndig = (k + MP_DIGIT_BIT) / MP_DIGIT_BIT;
2907  rest = k % MP_DIGIT_BIT;
2908
2909  if(!s_pad(z, ndig))
2910    return 0;
2911
2912  dz = MP_DIGITS(z);
2913  ZERO(dz, ndig);
2914  *(dz + ndig - 1) = (1 << rest);
2915  MP_USED(z) = ndig;
2916
2917  return 1;
2918}
2919
2920/* }}} */
2921
2922/* {{{ s_norm(a, b) */
2923
2924static int      s_norm(mp_int a, mp_int b)
2925{
2926  mp_digit d = b->digits[MP_USED(b) - 1];
2927  int      k = 0;
2928
2929  while(d < (mp_digit) (1 << (MP_DIGIT_BIT - 1))) { /* d < (MP_RADIX / 2) */
2930    d <<= 1;
2931    ++k;
2932  }
2933
2934  /* These multiplications can't fail */
2935  if(k != 0) {
2936    (void) s_qmul(a, (mp_size) k);
2937    (void) s_qmul(b, (mp_size) k);
2938  }
2939
2940  return k;
2941}
2942
2943/* }}} */
2944
2945/* {{{ s_brmu(z, m) */
2946
2947static mp_result s_brmu(mp_int z, mp_int m)
2948{
2949  mp_size um = MP_USED(m) * 2;
2950
2951  if(!s_pad(z, um))
2952    return MP_MEMORY;
2953
2954  s_2expt(z, MP_DIGIT_BIT * um);
2955  return mp_int_div(z, m, z, NULL);
2956}
2957
2958/* }}} */
2959
2960/* {{{ s_reduce(x, m, mu, q1, q2) */
2961
2962static int       s_reduce(mp_int x, mp_int m, mp_int mu, mp_int q1, mp_int q2)
2963{
2964  mp_size   um = MP_USED(m), umb_p1, umb_m1;
2965
2966  umb_p1 = (um + 1) * MP_DIGIT_BIT;
2967  umb_m1 = (um - 1) * MP_DIGIT_BIT;
2968
2969  if(mp_int_copy(x, q1) != MP_OK)
2970    return 0;
2971
2972  /* Compute q2 = floor((floor(x / b^(k-1)) * mu) / b^(k+1)) */
2973  s_qdiv(q1, umb_m1);
2974  UMUL(q1, mu, q2);
2975  s_qdiv(q2, umb_p1);
2976
2977  /* Set x = x mod b^(k+1) */
2978  s_qmod(x, umb_p1);
2979
2980  /* Now, q is a guess for the quotient a / m.
2981     Compute x - q * m mod b^(k+1), replacing x.  This may be off
2982     by a factor of 2m, but no more than that.
2983   */
2984  UMUL(q2, m, q1);
2985  s_qmod(q1, umb_p1);
2986  (void) mp_int_sub(x, q1, x); /* can't fail */
2987
2988  /* The result may be < 0; if it is, add b^(k+1) to pin it in the
2989     proper range. */
2990  if((CMPZ(x) < 0) && !s_qsub(x, umb_p1))
2991    return 0;
2992
2993  /* If x > m, we need to back it off until it is in range.
2994     This will be required at most twice.  */
2995  if(mp_int_compare(x, m) >= 0) {
2996    (void) mp_int_sub(x, m, x);
2997    if(mp_int_compare(x, m) >= 0)
2998      (void) mp_int_sub(x, m, x);
2999  }
3000
3001  /* At this point, x has been properly reduced. */
3002  return 1;
3003}
3004
3005/* }}} */
3006
3007/* {{{ s_embar(a, b, m, mu, c) */
3008
3009/* Perform modular exponentiation using Barrett's method, where mu is
3010   the reduction constant for m.  Assumes a < m, b > 0. */
3011static mp_result s_embar(mp_int a, mp_int b, mp_int m, mp_int mu, mp_int c)
3012{
3013  mp_digit  *db, *dbt, umu, d;
3014  mpz_t     temp[3];
3015  mp_result res;
3016  int       last = 0;
3017
3018  umu = MP_USED(mu); db = MP_DIGITS(b); dbt = db + MP_USED(b) - 1;
3019
3020  while(last < 3) {
3021    SETUP(mp_int_init_size(TEMP(last), 4 * umu), last);
3022    ZERO(MP_DIGITS(TEMP(last - 1)), MP_ALLOC(TEMP(last - 1)));
3023  }
3024
3025  (void) mp_int_set_value(c, 1);
3026
3027  /* Take care of low-order digits */
3028  while(db < dbt) {
3029    int      i;
3030
3031    for(d = *db, i = MP_DIGIT_BIT; i > 0; --i, d >>= 1) {
3032      if(d & 1) {
3033	/* The use of a second temporary avoids allocation */
3034	UMUL(c, a, TEMP(0));
3035	if(!s_reduce(TEMP(0), m, mu, TEMP(1), TEMP(2))) {
3036	  res = MP_MEMORY; goto CLEANUP;
3037	}
3038	mp_int_copy(TEMP(0), c);
3039      }
3040
3041
3042      USQR(a, TEMP(0));
3043      assert(MP_SIGN(TEMP(0)) == MP_ZPOS);
3044      if(!s_reduce(TEMP(0), m, mu, TEMP(1), TEMP(2))) {
3045	res = MP_MEMORY; goto CLEANUP;
3046      }
3047      assert(MP_SIGN(TEMP(0)) == MP_ZPOS);
3048      mp_int_copy(TEMP(0), a);
3049
3050
3051    }
3052
3053    ++db;
3054  }
3055
3056  /* Take care of highest-order digit */
3057  d = *dbt;
3058  for(;;) {
3059    if(d & 1) {
3060      UMUL(c, a, TEMP(0));
3061      if(!s_reduce(TEMP(0), m, mu, TEMP(1), TEMP(2))) {
3062	res = MP_MEMORY; goto CLEANUP;
3063      }
3064      mp_int_copy(TEMP(0), c);
3065    }
3066
3067    d >>= 1;
3068    if(!d) break;
3069
3070    USQR(a, TEMP(0));
3071    if(!s_reduce(TEMP(0), m, mu, TEMP(1), TEMP(2))) {
3072      res = MP_MEMORY; goto CLEANUP;
3073    }
3074    (void) mp_int_copy(TEMP(0), a);
3075  }
3076
3077 CLEANUP:
3078  while(--last >= 0)
3079    mp_int_clear(TEMP(last));
3080
3081  return res;
3082}
3083
3084/* }}} */
3085
3086/* {{{ s_udiv(a, b) */
3087
3088/* Precondition:  a >= b and b > 0
3089   Postcondition: a' = a / b, b' = a % b
3090 */
3091static mp_result s_udiv(mp_int a, mp_int b)
3092{
3093  mpz_t     q, r, t;
3094  mp_size   ua, ub, qpos = 0;
3095  mp_digit *da, btop;
3096  mp_result res = MP_OK;
3097  int       k, skip = 0;
3098
3099  /* Force signs to positive */
3100  MP_SIGN(a) = MP_ZPOS;
3101  MP_SIGN(b) = MP_ZPOS;
3102
3103  /* Normalize, per Knuth */
3104  k = s_norm(a, b);
3105
3106  ua = MP_USED(a); ub = MP_USED(b); btop = b->digits[ub - 1];
3107  if((res = mp_int_init_size(&q, ua)) != MP_OK) return res;
3108  if((res = mp_int_init_size(&t, ua + 1)) != MP_OK) goto CLEANUP;
3109
3110  da = MP_DIGITS(a);
3111  r.digits = da + ua - 1;  /* The contents of r are shared with a */
3112  r.used   = 1;
3113  r.sign   = MP_ZPOS;
3114  r.alloc  = MP_ALLOC(a);
3115  ZERO(t.digits, t.alloc);
3116
3117  /* Solve for quotient digits, store in q.digits in reverse order */
3118  while(r.digits >= da) {
3119    assert(qpos <= q.alloc);
3120
3121    if(s_ucmp(b, &r) > 0) {
3122      r.digits -= 1;
3123      r.used += 1;
3124
3125      if(++skip > 1 && qpos > 0)
3126	q.digits[qpos++] = 0;
3127
3128      CLAMP(&r);
3129    }
3130    else {
3131      mp_word  pfx = r.digits[r.used - 1];
3132      mp_word  qdigit;
3133
3134      if(r.used > 1 && pfx <= btop) {
3135	pfx <<= MP_DIGIT_BIT / 2;
3136	pfx <<= MP_DIGIT_BIT / 2;
3137	pfx |= r.digits[r.used - 2];
3138      }
3139
3140      qdigit = pfx / btop;
3141      if(qdigit > MP_DIGIT_MAX) {
3142	if(qdigit & MP_DIGIT_MAX)
3143	  qdigit = MP_DIGIT_MAX;
3144	else
3145	  qdigit = 1;
3146      }
3147
3148      s_dbmul(MP_DIGITS(b), (mp_digit) qdigit, t.digits, ub);
3149      t.used = ub + 1; CLAMP(&t);
3150      while(s_ucmp(&t, &r) > 0) {
3151	--qdigit;
3152	(void) mp_int_sub(&t, b, &t); /* cannot fail */
3153      }
3154
3155      s_usub(r.digits, t.digits, r.digits, r.used, t.used);
3156      CLAMP(&r);
3157
3158      q.digits[qpos++] = (mp_digit) qdigit;
3159      ZERO(t.digits, t.used);
3160      skip = 0;
3161    }
3162  }
3163
3164  /* Put quotient digits in the correct order, and discard extra zeroes */
3165  q.used = qpos;
3166  REV(mp_digit, q.digits, qpos);
3167  CLAMP(&q);
3168
3169  /* Denormalize the remainder */
3170  CLAMP(a);
3171  if(k != 0)
3172    s_qdiv(a, k);
3173
3174  mp_int_copy(a, b);  /* ok:  0 <= r < b */
3175  mp_int_copy(&q, a); /* ok:  q <= a     */
3176
3177  mp_int_clear(&t);
3178 CLEANUP:
3179  mp_int_clear(&q);
3180  return res;
3181}
3182
3183/* }}} */
3184
3185/* {{{ s_outlen(z, r) */
3186
3187static int       s_outlen(mp_int z, mp_size r)
3188{
3189  mp_result  bits;
3190  double     raw;
3191
3192  assert(r >= MP_MIN_RADIX && r <= MP_MAX_RADIX);
3193
3194  bits = mp_int_count_bits(z);
3195  raw = (double)bits * s_log2[r];
3196
3197  return (int)(raw + 0.999999);
3198}
3199
3200/* }}} */
3201
3202/* {{{ s_inlen(len, r) */
3203
3204static mp_size   s_inlen(int len, mp_size r)
3205{
3206  double  raw = (double)len / s_log2[r];
3207  mp_size bits = (mp_size)(raw + 0.5);
3208
3209  return (mp_size)((bits + (MP_DIGIT_BIT - 1)) / MP_DIGIT_BIT);
3210}
3211
3212/* }}} */
3213
3214/* {{{ s_ch2val(c, r) */
3215
3216static int       s_ch2val(char c, int r)
3217{
3218  int out;
3219
3220  if(isdigit((unsigned char) c))
3221    out = c - '0';
3222  else if(r > 10 && isalpha((unsigned char) c))
3223    out = toupper(c) - 'A' + 10;
3224  else
3225    return -1;
3226
3227  return (out >= r) ? -1 : out;
3228}
3229
3230/* }}} */
3231
3232/* {{{ s_val2ch(v, caps) */
3233
3234static char      s_val2ch(int v, int caps)
3235{
3236  assert(v >= 0);
3237
3238  if(v < 10)
3239    return v + '0';
3240  else {
3241    char out = (v - 10) + 'a';
3242
3243    if(caps)
3244      return toupper(out);
3245    else
3246      return out;
3247  }
3248}
3249
3250/* }}} */
3251
3252/* {{{ s_2comp(buf, len) */
3253
3254static void      s_2comp(unsigned char *buf, int len)
3255{
3256  int i;
3257  unsigned short s = 1;
3258
3259  for(i = len - 1; i >= 0; --i) {
3260    unsigned char c = ~buf[i];
3261
3262    s = c + s;
3263    c = s & UCHAR_MAX;
3264    s >>= CHAR_BIT;
3265
3266    buf[i] = c;
3267  }
3268
3269  /* last carry out is ignored */
3270}
3271
3272/* }}} */
3273
3274/* {{{ s_tobin(z, buf, *limpos) */
3275
3276static mp_result s_tobin(mp_int z, unsigned char *buf, int *limpos, int pad)
3277{
3278  mp_size uz;
3279  mp_digit *dz;
3280  int pos = 0, limit = *limpos;
3281
3282  uz = MP_USED(z); dz = MP_DIGITS(z);
3283  while(uz > 0 && pos < limit) {
3284    mp_digit d = *dz++;
3285    int i;
3286
3287    for(i = sizeof(mp_digit); i > 0 && pos < limit; --i) {
3288      buf[pos++] = (unsigned char)d;
3289      d >>= CHAR_BIT;
3290
3291      /* Don't write leading zeroes */
3292      if(d == 0 && uz == 1)
3293	i = 0; /* exit loop without signaling truncation */
3294    }
3295
3296    /* Detect truncation (loop exited with pos >= limit) */
3297    if(i > 0) break;
3298
3299    --uz;
3300  }
3301
3302  if(pad != 0 && (buf[pos - 1] >> (CHAR_BIT - 1))) {
3303    if(pos < limit)
3304      buf[pos++] = 0;
3305    else
3306      uz = 1;
3307  }
3308
3309  /* Digits are in reverse order, fix that */
3310  REV(unsigned char, buf, pos);
3311
3312  /* Return the number of bytes actually written */
3313  *limpos = pos;
3314
3315  return (uz == 0) ? MP_OK : MP_TRUNC;
3316}
3317
3318/* }}} */
3319
3320/* {{{ s_print(tag, z) */
3321
3322#if DEBUG
3323void      s_print(char *tag, mp_int z)
3324{
3325  int  i;
3326
3327  fprintf(stderr, "%s: %c ", tag,
3328	  (MP_SIGN(z) == MP_NEG) ? '-' : '+');
3329
3330  for(i = MP_USED(z) - 1; i >= 0; --i)
3331    fprintf(stderr, "%0*X", (int)(MP_DIGIT_BIT / 4), z->digits[i]);
3332
3333  fputc('\n', stderr);
3334
3335}
3336
3337void      s_print_buf(char *tag, mp_digit *buf, mp_size num)
3338{
3339  int  i;
3340
3341  fprintf(stderr, "%s: ", tag);
3342
3343  for(i = num - 1; i >= 0; --i)
3344    fprintf(stderr, "%0*X", (int)(MP_DIGIT_BIT / 4), buf[i]);
3345
3346  fputc('\n', stderr);
3347}
3348#endif
3349
3350/* }}} */
3351
3352/* HERE THERE BE DRAGONS */
3353