1#include <tommath.h>
2#ifdef BN_MP_TOOM_SQR_C
3/* LibTomMath, multiple-precision integer library -- Tom St Denis
4 *
5 * LibTomMath is a library that provides multiple-precision
6 * integer arithmetic as well as number theoretic functionality.
7 *
8 * The library was designed directly after the MPI library by
9 * Michael Fromberger but has been written from scratch with
10 * additional optimizations in place.
11 *
12 * The library is free for all purposes without any express
13 * guarantee it works.
14 *
15 * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
16 */
17
18/* squaring using Toom-Cook 3-way algorithm */
19int
20mp_toom_sqr(mp_int *a, mp_int *b)
21{
22    mp_int w0, w1, w2, w3, w4, tmp1, a0, a1, a2;
23    int res, B;
24
25    /* init temps */
26    if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL)) != MP_OKAY) {
27       return res;
28    }
29
30    /* B */
31    B = a->used / 3;
32
33    /* a = a2 * B**2 + a1 * B + a0 */
34    if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {
35       goto ERR;
36    }
37
38    if ((res = mp_copy(a, &a1)) != MP_OKAY) {
39       goto ERR;
40    }
41    mp_rshd(&a1, B);
42    mp_mod_2d(&a1, DIGIT_BIT * B, &a1);
43
44    if ((res = mp_copy(a, &a2)) != MP_OKAY) {
45       goto ERR;
46    }
47    mp_rshd(&a2, B*2);
48
49    /* w0 = a0*a0 */
50    if ((res = mp_sqr(&a0, &w0)) != MP_OKAY) {
51       goto ERR;
52    }
53
54    /* w4 = a2 * a2 */
55    if ((res = mp_sqr(&a2, &w4)) != MP_OKAY) {
56       goto ERR;
57    }
58
59    /* w1 = (a2 + 2(a1 + 2a0))**2 */
60    if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) {
61       goto ERR;
62    }
63    if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
64       goto ERR;
65    }
66    if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
67       goto ERR;
68    }
69    if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) {
70       goto ERR;
71    }
72
73    if ((res = mp_sqr(&tmp1, &w1)) != MP_OKAY) {
74       goto ERR;
75    }
76
77    /* w3 = (a0 + 2(a1 + 2a2))**2 */
78    if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) {
79       goto ERR;
80    }
81    if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
82       goto ERR;
83    }
84    if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
85       goto ERR;
86    }
87    if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
88       goto ERR;
89    }
90
91    if ((res = mp_sqr(&tmp1, &w3)) != MP_OKAY) {
92       goto ERR;
93    }
94
95
96    /* w2 = (a2 + a1 + a0)**2 */
97    if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) {
98       goto ERR;
99    }
100    if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
101       goto ERR;
102    }
103    if ((res = mp_sqr(&tmp1, &w2)) != MP_OKAY) {
104       goto ERR;
105    }
106
107    /* now solve the matrix
108
109       0  0  0  0  1
110       1  2  4  8  16
111       1  1  1  1  1
112       16 8  4  2  1
113       1  0  0  0  0
114
115       using 12 subtractions, 4 shifts, 2 small divisions and 1 small multiplication.
116     */
117
118     /* r1 - r4 */
119     if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) {
120        goto ERR;
121     }
122     /* r3 - r0 */
123     if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) {
124        goto ERR;
125     }
126     /* r1/2 */
127     if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) {
128        goto ERR;
129     }
130     /* r3/2 */
131     if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) {
132        goto ERR;
133     }
134     /* r2 - r0 - r4 */
135     if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) {
136        goto ERR;
137     }
138     if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) {
139        goto ERR;
140     }
141     /* r1 - r2 */
142     if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
143        goto ERR;
144     }
145     /* r3 - r2 */
146     if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
147        goto ERR;
148     }
149     /* r1 - 8r0 */
150     if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) {
151        goto ERR;
152     }
153     if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) {
154        goto ERR;
155     }
156     /* r3 - 8r4 */
157     if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) {
158        goto ERR;
159     }
160     if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) {
161        goto ERR;
162     }
163     /* 3r2 - r1 - r3 */
164     if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) {
165        goto ERR;
166     }
167     if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) {
168        goto ERR;
169     }
170     if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) {
171        goto ERR;
172     }
173     /* r1 - r2 */
174     if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
175        goto ERR;
176     }
177     /* r3 - r2 */
178     if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
179        goto ERR;
180     }
181     /* r1/3 */
182     if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) {
183        goto ERR;
184     }
185     /* r3/3 */
186     if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) {
187        goto ERR;
188     }
189
190     /* at this point shift W[n] by B*n */
191     if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) {
192        goto ERR;
193     }
194     if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) {
195        goto ERR;
196     }
197     if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) {
198        goto ERR;
199     }
200     if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) {
201        goto ERR;
202     }
203
204     if ((res = mp_add(&w0, &w1, b)) != MP_OKAY) {
205        goto ERR;
206     }
207     if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) {
208        goto ERR;
209     }
210     if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) {
211        goto ERR;
212     }
213     if ((res = mp_add(&tmp1, b, b)) != MP_OKAY) {
214        goto ERR;
215     }
216
217ERR:
218     mp_clear_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL);
219     return res;
220}
221
222#endif
223
224/* $Source$ */
225/* $Revision$ */
226/* $Date$ */
227