1#include <tommath.h>
2#ifdef BN_FAST_S_MP_SQR_C
3/* LibTomMath, multiple-precision integer library -- Tom St Denis
4 *
5 * LibTomMath is a library that provides multiple-precision
6 * integer arithmetic as well as number theoretic functionality.
7 *
8 * The library was designed directly after the MPI library by
9 * Michael Fromberger but has been written from scratch with
10 * additional optimizations in place.
11 *
12 * The library is free for all purposes without any express
13 * guarantee it works.
14 *
15 * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
16 */
17
18/* the jist of squaring...
19 * you do like mult except the offset of the tmpx [one that
20 * starts closer to zero] can't equal the offset of tmpy.
21 * So basically you set up iy like before then you min it with
22 * (ty-tx) so that it never happens.  You double all those
23 * you add in the inner loop
24
25After that loop you do the squares and add them in.
26*/
27
28int fast_s_mp_sqr (mp_int * a, mp_int * b)
29{
30  int       olduse, res, pa, ix, iz;
31  mp_digit   W[MP_WARRAY], *tmpx;
32  mp_word   W1;
33
34  /* grow the destination as required */
35  pa = a->used + a->used;
36  if (b->alloc < pa) {
37    if ((res = mp_grow (b, pa)) != MP_OKAY) {
38      return res;
39    }
40  }
41
42  /* number of output digits to produce */
43  W1 = 0;
44  for (ix = 0; ix < pa; ix++) {
45      int      tx, ty, iy;
46      mp_word  _W;
47      mp_digit *tmpy;
48
49      /* clear counter */
50      _W = 0;
51
52      /* get offsets into the two bignums */
53      ty = MIN(a->used-1, ix);
54      tx = ix - ty;
55
56      /* setup temp aliases */
57      tmpx = a->dp + tx;
58      tmpy = a->dp + ty;
59
60      /* this is the number of times the loop will iterrate, essentially
61         while (tx++ < a->used && ty-- >= 0) { ... }
62       */
63      iy = MIN(a->used-tx, ty+1);
64
65      /* now for squaring tx can never equal ty
66       * we halve the distance since they approach at a rate of 2x
67       * and we have to round because odd cases need to be executed
68       */
69      iy = MIN(iy, (ty-tx+1)>>1);
70
71      /* execute loop */
72      for (iz = 0; iz < iy; iz++) {
73         _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
74      }
75
76      /* double the inner product and add carry */
77      _W = _W + _W + W1;
78
79      /* even columns have the square term in them */
80      if ((ix&1) == 0) {
81         _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
82      }
83
84      /* store it */
85      W[ix] = (mp_digit)(_W & MP_MASK);
86
87      /* make next carry */
88      W1 = _W >> ((mp_word)DIGIT_BIT);
89  }
90
91  /* setup dest */
92  olduse  = b->used;
93  b->used = a->used+a->used;
94
95  {
96    mp_digit *tmpb;
97    tmpb = b->dp;
98    for (ix = 0; ix < pa; ix++) {
99      *tmpb++ = W[ix] & MP_MASK;
100    }
101
102    /* clear unused digits [that existed in the old copy of c] */
103    for (; ix < olduse; ix++) {
104      *tmpb++ = 0;
105    }
106  }
107  mp_clamp (b);
108  return MP_OKAY;
109}
110#endif
111
112/* $Source: /cvsroot/tcl/libtommath/bn_fast_s_mp_sqr.c,v $ */
113/* $Revision: 1.1.1.4 $ */
114/* $Date: 2006/12/01 00:08:11 $ */
115