1/*	$NetBSD: bn_mp_exptmod_fast.c,v 1.1.1.1 2011/04/13 18:14:54 elric Exp $	*/
2
3#include <tommath.h>
4#ifdef BN_MP_EXPTMOD_FAST_C
5/* LibTomMath, multiple-precision integer library -- Tom St Denis
6 *
7 * LibTomMath is a library that provides multiple-precision
8 * integer arithmetic as well as number theoretic functionality.
9 *
10 * The library was designed directly after the MPI library by
11 * Michael Fromberger but has been written from scratch with
12 * additional optimizations in place.
13 *
14 * The library is free for all purposes without any express
15 * guarantee it works.
16 *
17 * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
18 */
19
20/* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
21 *
22 * Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
23 * The value of k changes based on the size of the exponent.
24 *
25 * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
26 */
27
28#ifdef MP_LOW_MEM
29   #define TAB_SIZE 32
30#else
31   #define TAB_SIZE 256
32#endif
33
34int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
35{
36  mp_int  M[TAB_SIZE], res;
37  mp_digit buf, mp;
38  int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
39
40  /* use a pointer to the reduction algorithm.  This allows us to use
41   * one of many reduction algorithms without modding the guts of
42   * the code with if statements everywhere.
43   */
44  int     (*redux)(mp_int*,mp_int*,mp_digit);
45
46  /* find window size */
47  x = mp_count_bits (X);
48  if (x <= 7) {
49    winsize = 2;
50  } else if (x <= 36) {
51    winsize = 3;
52  } else if (x <= 140) {
53    winsize = 4;
54  } else if (x <= 450) {
55    winsize = 5;
56  } else if (x <= 1303) {
57    winsize = 6;
58  } else if (x <= 3529) {
59    winsize = 7;
60  } else {
61    winsize = 8;
62  }
63
64#ifdef MP_LOW_MEM
65  if (winsize > 5) {
66     winsize = 5;
67  }
68#endif
69
70  /* init M array */
71  /* init first cell */
72  if ((err = mp_init(&M[1])) != MP_OKAY) {
73     return err;
74  }
75
76  /* now init the second half of the array */
77  for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
78    if ((err = mp_init(&M[x])) != MP_OKAY) {
79      for (y = 1<<(winsize-1); y < x; y++) {
80        mp_clear (&M[y]);
81      }
82      mp_clear(&M[1]);
83      return err;
84    }
85  }
86
87  /* determine and setup reduction code */
88  if (redmode == 0) {
89#ifdef BN_MP_MONTGOMERY_SETUP_C
90     /* now setup montgomery  */
91     if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
92        goto LBL_M;
93     }
94#else
95     err = MP_VAL;
96     goto LBL_M;
97#endif
98
99     /* automatically pick the comba one if available (saves quite a few calls/ifs) */
100#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
101     if (((P->used * 2 + 1) < MP_WARRAY) &&
102          P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
103        redux = fast_mp_montgomery_reduce;
104     } else
105#endif
106     {
107#ifdef BN_MP_MONTGOMERY_REDUCE_C
108        /* use slower baseline Montgomery method */
109        redux = mp_montgomery_reduce;
110#else
111        err = MP_VAL;
112        goto LBL_M;
113#endif
114     }
115  } else if (redmode == 1) {
116#if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C)
117     /* setup DR reduction for moduli of the form B**k - b */
118     mp_dr_setup(P, &mp);
119     redux = mp_dr_reduce;
120#else
121     err = MP_VAL;
122     goto LBL_M;
123#endif
124  } else {
125#if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
126     /* setup DR reduction for moduli of the form 2**k - b */
127     if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
128        goto LBL_M;
129     }
130     redux = mp_reduce_2k;
131#else
132     err = MP_VAL;
133     goto LBL_M;
134#endif
135  }
136
137  /* setup result */
138  if ((err = mp_init (&res)) != MP_OKAY) {
139    goto LBL_M;
140  }
141
142  /* create M table
143   *
144
145   *
146   * The first half of the table is not computed though accept for M[0] and M[1]
147   */
148
149  if (redmode == 0) {
150#ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
151     /* now we need R mod m */
152     if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
153       goto LBL_RES;
154     }
155#else
156     err = MP_VAL;
157     goto LBL_RES;
158#endif
159
160     /* now set M[1] to G * R mod m */
161     if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
162       goto LBL_RES;
163     }
164  } else {
165     mp_set(&res, 1);
166     if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
167        goto LBL_RES;
168     }
169  }
170
171  /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
172  if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
173    goto LBL_RES;
174  }
175
176  for (x = 0; x < (winsize - 1); x++) {
177    if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
178      goto LBL_RES;
179    }
180    if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
181      goto LBL_RES;
182    }
183  }
184
185  /* create upper table */
186  for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
187    if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
188      goto LBL_RES;
189    }
190    if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
191      goto LBL_RES;
192    }
193  }
194
195  /* set initial mode and bit cnt */
196  mode   = 0;
197  bitcnt = 1;
198  buf    = 0;
199  digidx = X->used - 1;
200  bitcpy = 0;
201  bitbuf = 0;
202
203  for (;;) {
204    /* grab next digit as required */
205    if (--bitcnt == 0) {
206      /* if digidx == -1 we are out of digits so break */
207      if (digidx == -1) {
208        break;
209      }
210      /* read next digit and reset bitcnt */
211      buf    = X->dp[digidx--];
212      bitcnt = (int)DIGIT_BIT;
213    }
214
215    /* grab the next msb from the exponent */
216    y     = (mp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
217    buf <<= (mp_digit)1;
218
219    /* if the bit is zero and mode == 0 then we ignore it
220     * These represent the leading zero bits before the first 1 bit
221     * in the exponent.  Technically this opt is not required but it
222     * does lower the # of trivial squaring/reductions used
223     */
224    if (mode == 0 && y == 0) {
225      continue;
226    }
227
228    /* if the bit is zero and mode == 1 then we square */
229    if (mode == 1 && y == 0) {
230      if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
231        goto LBL_RES;
232      }
233      if ((err = redux (&res, P, mp)) != MP_OKAY) {
234        goto LBL_RES;
235      }
236      continue;
237    }
238
239    /* else we add it to the window */
240    bitbuf |= (y << (winsize - ++bitcpy));
241    mode    = 2;
242
243    if (bitcpy == winsize) {
244      /* ok window is filled so square as required and multiply  */
245      /* square first */
246      for (x = 0; x < winsize; x++) {
247        if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
248          goto LBL_RES;
249        }
250        if ((err = redux (&res, P, mp)) != MP_OKAY) {
251          goto LBL_RES;
252        }
253      }
254
255      /* then multiply */
256      if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
257        goto LBL_RES;
258      }
259      if ((err = redux (&res, P, mp)) != MP_OKAY) {
260        goto LBL_RES;
261      }
262
263      /* empty window and reset */
264      bitcpy = 0;
265      bitbuf = 0;
266      mode   = 1;
267    }
268  }
269
270  /* if bits remain then square/multiply */
271  if (mode == 2 && bitcpy > 0) {
272    /* square then multiply if the bit is set */
273    for (x = 0; x < bitcpy; x++) {
274      if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
275        goto LBL_RES;
276      }
277      if ((err = redux (&res, P, mp)) != MP_OKAY) {
278        goto LBL_RES;
279      }
280
281      /* get next bit of the window */
282      bitbuf <<= 1;
283      if ((bitbuf & (1 << winsize)) != 0) {
284        /* then multiply */
285        if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
286          goto LBL_RES;
287        }
288        if ((err = redux (&res, P, mp)) != MP_OKAY) {
289          goto LBL_RES;
290        }
291      }
292    }
293  }
294
295  if (redmode == 0) {
296     /* fixup result if Montgomery reduction is used
297      * recall that any value in a Montgomery system is
298      * actually multiplied by R mod n.  So we have
299      * to reduce one more time to cancel out the factor
300      * of R.
301      */
302     if ((err = redux(&res, P, mp)) != MP_OKAY) {
303       goto LBL_RES;
304     }
305  }
306
307  /* swap res with Y */
308  mp_exch (&res, Y);
309  err = MP_OKAY;
310LBL_RES:mp_clear (&res);
311LBL_M:
312  mp_clear(&M[1]);
313  for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
314    mp_clear (&M[x]);
315  }
316  return err;
317}
318#endif
319
320
321/* Source: /cvs/libtom/libtommath/bn_mp_exptmod_fast.c,v */
322/* Revision: 1.4 */
323/* Date: 2006/12/28 01:25:13 */
324