1/* $NetBSD: bn_mp_toom_mul.c,v 1.2 2017/01/28 21:31:47 christos Exp $ */ 2 3#include <tommath.h> 4#ifdef BN_MP_TOOM_MUL_C 5/* LibTomMath, multiple-precision integer library -- Tom St Denis 6 * 7 * LibTomMath is a library that provides multiple-precision 8 * integer arithmetic as well as number theoretic functionality. 9 * 10 * The library was designed directly after the MPI library by 11 * Michael Fromberger but has been written from scratch with 12 * additional optimizations in place. 13 * 14 * The library is free for all purposes without any express 15 * guarantee it works. 16 * 17 * Tom St Denis, tomstdenis@gmail.com, http://libtom.org 18 */ 19 20/* multiplication using the Toom-Cook 3-way algorithm 21 * 22 * Much more complicated than Karatsuba but has a lower 23 * asymptotic running time of O(N**1.464). This algorithm is 24 * only particularly useful on VERY large inputs 25 * (we're talking 1000s of digits here...). 26*/ 27int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c) 28{ 29 mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2; 30 int res, B; 31 32 /* init temps */ 33 if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, 34 &a0, &a1, &a2, &b0, &b1, 35 &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) { 36 return res; 37 } 38 39 /* B */ 40 B = MIN(a->used, b->used) / 3; 41 42 /* a = a2 * B**2 + a1 * B + a0 */ 43 if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) { 44 goto ERR; 45 } 46 47 if ((res = mp_copy(a, &a1)) != MP_OKAY) { 48 goto ERR; 49 } 50 mp_rshd(&a1, B); 51 mp_mod_2d(&a1, DIGIT_BIT * B, &a1); 52 53 if ((res = mp_copy(a, &a2)) != MP_OKAY) { 54 goto ERR; 55 } 56 mp_rshd(&a2, B*2); 57 58 /* b = b2 * B**2 + b1 * B + b0 */ 59 if ((res = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) { 60 goto ERR; 61 } 62 63 if ((res = mp_copy(b, &b1)) != MP_OKAY) { 64 goto ERR; 65 } 66 mp_rshd(&b1, B); 67 mp_mod_2d(&b1, DIGIT_BIT * B, &b1); 68 69 if ((res = mp_copy(b, &b2)) != MP_OKAY) { 70 goto ERR; 71 } 72 mp_rshd(&b2, B*2); 73 74 /* w0 = a0*b0 */ 75 if ((res = mp_mul(&a0, &b0, &w0)) != MP_OKAY) { 76 goto ERR; 77 } 78 79 /* w4 = a2 * b2 */ 80 if ((res = mp_mul(&a2, &b2, &w4)) != MP_OKAY) { 81 goto ERR; 82 } 83 84 /* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */ 85 if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) { 86 goto ERR; 87 } 88 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 89 goto ERR; 90 } 91 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) { 92 goto ERR; 93 } 94 if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) { 95 goto ERR; 96 } 97 98 if ((res = mp_mul_2(&b0, &tmp2)) != MP_OKAY) { 99 goto ERR; 100 } 101 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { 102 goto ERR; 103 } 104 if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) { 105 goto ERR; 106 } 107 if ((res = mp_add(&tmp2, &b2, &tmp2)) != MP_OKAY) { 108 goto ERR; 109 } 110 111 if ((res = mp_mul(&tmp1, &tmp2, &w1)) != MP_OKAY) { 112 goto ERR; 113 } 114 115 /* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */ 116 if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) { 117 goto ERR; 118 } 119 if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 120 goto ERR; 121 } 122 if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) { 123 goto ERR; 124 } 125 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 126 goto ERR; 127 } 128 129 if ((res = mp_mul_2(&b2, &tmp2)) != MP_OKAY) { 130 goto ERR; 131 } 132 if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { 133 goto ERR; 134 } 135 if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) { 136 goto ERR; 137 } 138 if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { 139 goto ERR; 140 } 141 142 if ((res = mp_mul(&tmp1, &tmp2, &w3)) != MP_OKAY) { 143 goto ERR; 144 } 145 146 147 /* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */ 148 if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) { 149 goto ERR; 150 } 151 if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 152 goto ERR; 153 } 154 if ((res = mp_add(&b2, &b1, &tmp2)) != MP_OKAY) { 155 goto ERR; 156 } 157 if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { 158 goto ERR; 159 } 160 if ((res = mp_mul(&tmp1, &tmp2, &w2)) != MP_OKAY) { 161 goto ERR; 162 } 163 164 /* now solve the matrix 165 166 0 0 0 0 1 167 1 2 4 8 16 168 1 1 1 1 1 169 16 8 4 2 1 170 1 0 0 0 0 171 172 using 12 subtractions, 4 shifts, 173 2 small divisions and 1 small multiplication 174 */ 175 176 /* r1 - r4 */ 177 if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) { 178 goto ERR; 179 } 180 /* r3 - r0 */ 181 if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) { 182 goto ERR; 183 } 184 /* r1/2 */ 185 if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) { 186 goto ERR; 187 } 188 /* r3/2 */ 189 if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) { 190 goto ERR; 191 } 192 /* r2 - r0 - r4 */ 193 if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) { 194 goto ERR; 195 } 196 if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) { 197 goto ERR; 198 } 199 /* r1 - r2 */ 200 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { 201 goto ERR; 202 } 203 /* r3 - r2 */ 204 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { 205 goto ERR; 206 } 207 /* r1 - 8r0 */ 208 if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) { 209 goto ERR; 210 } 211 if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) { 212 goto ERR; 213 } 214 /* r3 - 8r4 */ 215 if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) { 216 goto ERR; 217 } 218 if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) { 219 goto ERR; 220 } 221 /* 3r2 - r1 - r3 */ 222 if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) { 223 goto ERR; 224 } 225 if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) { 226 goto ERR; 227 } 228 if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) { 229 goto ERR; 230 } 231 /* r1 - r2 */ 232 if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) { 233 goto ERR; 234 } 235 /* r3 - r2 */ 236 if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) { 237 goto ERR; 238 } 239 /* r1/3 */ 240 if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) { 241 goto ERR; 242 } 243 /* r3/3 */ 244 if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) { 245 goto ERR; 246 } 247 248 /* at this point shift W[n] by B*n */ 249 if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) { 250 goto ERR; 251 } 252 if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) { 253 goto ERR; 254 } 255 if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) { 256 goto ERR; 257 } 258 if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) { 259 goto ERR; 260 } 261 262 if ((res = mp_add(&w0, &w1, c)) != MP_OKAY) { 263 goto ERR; 264 } 265 if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) { 266 goto ERR; 267 } 268 if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) { 269 goto ERR; 270 } 271 if ((res = mp_add(&tmp1, c, c)) != MP_OKAY) { 272 goto ERR; 273 } 274 275ERR: 276 mp_clear_multi(&w0, &w1, &w2, &w3, &w4, 277 &a0, &a1, &a2, &b0, &b1, 278 &b2, &tmp1, &tmp2, NULL); 279 return res; 280} 281 282#endif 283 284/* Source: /cvs/libtom/libtommath/bn_mp_toom_mul.c,v */ 285/* Revision: 1.4 */ 286/* Date: 2006/12/28 01:25:13 */ 287