1/* Implementation of the multiplication algorithm for Toom-Cook 8.5-way. 2 3 Contributed to the GNU project by Marco Bodrato. 4 5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 8 9Copyright 2009, 2010, 2012 Free Software Foundation, Inc. 10 11This file is part of the GNU MP Library. 12 13The GNU MP Library is free software; you can redistribute it and/or modify 14it under the terms of either: 15 16 * the GNU Lesser General Public License as published by the Free 17 Software Foundation; either version 3 of the License, or (at your 18 option) any later version. 19 20or 21 22 * the GNU General Public License as published by the Free Software 23 Foundation; either version 2 of the License, or (at your option) any 24 later version. 25 26or both in parallel, as here. 27 28The GNU MP Library is distributed in the hope that it will be useful, but 29WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 30or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 31for more details. 32 33You should have received copies of the GNU General Public License and the 34GNU Lesser General Public License along with the GNU MP Library. If not, 35see https://www.gnu.org/licenses/. */ 36 37 38#include "gmp-impl.h" 39 40 41#if GMP_NUMB_BITS < 29 42#error Not implemented. 43#endif 44 45#if GMP_NUMB_BITS < 43 46#define BIT_CORRECTION 1 47#define CORRECTION_BITS GMP_NUMB_BITS 48#else 49#define BIT_CORRECTION 0 50#define CORRECTION_BITS 0 51#endif 52 53 54#if TUNE_PROGRAM_BUILD 55#define MAYBE_mul_basecase 1 56#define MAYBE_mul_toom22 1 57#define MAYBE_mul_toom33 1 58#define MAYBE_mul_toom44 1 59#define MAYBE_mul_toom8h 1 60#else 61#define MAYBE_mul_basecase \ 62 (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM22_THRESHOLD) 63#define MAYBE_mul_toom22 \ 64 (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM33_THRESHOLD) 65#define MAYBE_mul_toom33 \ 66 (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM44_THRESHOLD) 67#define MAYBE_mul_toom44 \ 68 (MUL_TOOM8H_THRESHOLD < 8 * MUL_TOOM6H_THRESHOLD) 69#define MAYBE_mul_toom8h \ 70 (MUL_FFT_THRESHOLD >= 8 * MUL_TOOM8H_THRESHOLD) 71#endif 72 73#define TOOM8H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws) \ 74 do { \ 75 if (MAYBE_mul_basecase \ 76 && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) { \ 77 mpn_mul_basecase (p, a, n, b, n); \ 78 if (f) mpn_mul_basecase (p2, a2, n, b2, n); \ 79 } else if (MAYBE_mul_toom22 \ 80 && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) { \ 81 mpn_toom22_mul (p, a, n, b, n, ws); \ 82 if (f) mpn_toom22_mul (p2, a2, n, b2, n, ws); \ 83 } else if (MAYBE_mul_toom33 \ 84 && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) { \ 85 mpn_toom33_mul (p, a, n, b, n, ws); \ 86 if (f) mpn_toom33_mul (p2, a2, n, b2, n, ws); \ 87 } else if (MAYBE_mul_toom44 \ 88 && BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) { \ 89 mpn_toom44_mul (p, a, n, b, n, ws); \ 90 if (f) mpn_toom44_mul (p2, a2, n, b2, n, ws); \ 91 } else if (! MAYBE_mul_toom8h \ 92 || BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD)) { \ 93 mpn_toom6h_mul (p, a, n, b, n, ws); \ 94 if (f) mpn_toom6h_mul (p2, a2, n, b2, n, ws); \ 95 } else { \ 96 mpn_toom8h_mul (p, a, n, b, n, ws); \ 97 if (f) mpn_toom8h_mul (p2, a2, n, b2, n, ws); \ 98 } \ 99 } while (0) 100 101#define TOOM8H_MUL_REC(p, a, na, b, nb, ws) \ 102 do { mpn_mul (p, a, na, b, nb); } while (0) 103 104/* Toom-8.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn} 105 With: an >= bn >= 86, an*5 < bn * 11. 106 It _may_ work with bn<=?? and bn*?? < an*? < bn*?? 107 108 Evaluate in: infinity, +8,-8,+4,-4,+2,-2,+1,-1,+1/2,-1/2,+1/4,-1/4,+1/8,-1/8,0. 109*/ 110/* Estimate on needed scratch: 111 S(n) <= (n+7)\8*13+5+MAX(S((n+7)\8),1+2*(n+7)\8), 112 since n>80; S(n) <= ceil(log(n/10)/log(8))*(13+5)+n*15\8 < n*15\8 + lg2(n)*6 113 */ 114 115void 116mpn_toom8h_mul (mp_ptr pp, 117 mp_srcptr ap, mp_size_t an, 118 mp_srcptr bp, mp_size_t bn, mp_ptr scratch) 119{ 120 mp_size_t n, s, t; 121 int p, q, half; 122 int sign; 123 124 /***************************** decomposition *******************************/ 125 126 ASSERT (an >= bn); 127 /* Can not handle too small operands */ 128 ASSERT (bn >= 86); 129 /* Can not handle too much unbalancement */ 130 ASSERT (an <= bn*4); 131 ASSERT (GMP_NUMB_BITS > 11*3 || an*4 <= bn*11); 132 ASSERT (GMP_NUMB_BITS > 10*3 || an*1 <= bn* 2); 133 ASSERT (GMP_NUMB_BITS > 9*3 || an*2 <= bn* 3); 134 135 /* Limit num/den is a rational number between 136 (16/15)^(log(6)/log(2*6-1)) and (16/15)^(log(8)/log(2*8-1)) */ 137#define LIMIT_numerator (21) 138#define LIMIT_denominat (20) 139 140 if (LIKELY (an == bn) || an * (LIMIT_denominat>>1) < LIMIT_numerator * (bn>>1) ) /* is 8*... < 8*... */ 141 { 142 half = 0; 143 n = 1 + ((an - 1)>>3); 144 p = q = 7; 145 s = an - 7 * n; 146 t = bn - 7 * n; 147 } 148 else 149 { 150 if (an * 13 < 16 * bn) /* (an*7*LIMIT_numerator<LIMIT_denominat*9*bn) */ 151 { p = 9; q = 8; } 152 else if (GMP_NUMB_BITS <= 9*3 || 153 an *(LIMIT_denominat>>1) < (LIMIT_numerator/7*9) * (bn>>1)) 154 { p = 9; q = 7; } 155 else if (an * 10 < 33 * (bn>>1)) /* (an*3*LIMIT_numerator<LIMIT_denominat*5*bn) */ 156 { p =10; q = 7; } 157 else if (GMP_NUMB_BITS <= 10*3 || 158 an * (LIMIT_denominat/5) < (LIMIT_numerator/3) * bn) 159 { p =10; q = 6; } 160 else if (an * 6 < 13 * bn) /*(an * 5 * LIMIT_numerator < LIMIT_denominat *11 * bn)*/ 161 { p =11; q = 6; } 162 else if (GMP_NUMB_BITS <= 11*3 || 163 an * 4 < 9 * bn) 164 { p =11; q = 5; } 165 else if (an *(LIMIT_numerator/3) < LIMIT_denominat * bn) /* is 4*... <12*... */ 166 { p =12; q = 5; } 167 else if (GMP_NUMB_BITS <= 12*3 || 168 an * 9 < 28 * bn ) /* is 4*... <12*... */ 169 { p =12; q = 4; } 170 else 171 { p =13; q = 4; } 172 173 half = (p+q)&1; 174 n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q); 175 p--; q--; 176 177 s = an - p * n; 178 t = bn - q * n; 179 180 if(half) { /* Recover from badly chosen splitting */ 181 if (UNLIKELY (s<1)) {p--; s+=n; half=0;} 182 else if (UNLIKELY (t<1)) {q--; t+=n; half=0;} 183 } 184 } 185#undef LIMIT_numerator 186#undef LIMIT_denominat 187 188 ASSERT (0 < s && s <= n); 189 ASSERT (0 < t && t <= n); 190 ASSERT (half || s + t > 3); 191 ASSERT (n > 2); 192 193#define r6 (pp + 3 * n) /* 3n+1 */ 194#define r4 (pp + 7 * n) /* 3n+1 */ 195#define r2 (pp +11 * n) /* 3n+1 */ 196#define r0 (pp +15 * n) /* s+t <= 2*n */ 197#define r7 (scratch) /* 3n+1 */ 198#define r5 (scratch + 3 * n + 1) /* 3n+1 */ 199#define r3 (scratch + 6 * n + 2) /* 3n+1 */ 200#define r1 (scratch + 9 * n + 3) /* 3n+1 */ 201#define v0 (pp +11 * n) /* n+1 */ 202#define v1 (pp +12 * n+1) /* n+1 */ 203#define v2 (pp +13 * n+2) /* n+1 */ 204#define v3 (scratch +12 * n + 4) /* n+1 */ 205#define wsi (scratch +12 * n + 4) /* 3n+1 */ 206#define wse (scratch +13 * n + 5) /* 2n+1 */ 207 208 /* Alloc also 3n+1 limbs for wsi... toom_interpolate_16pts may 209 need all of them */ 210/* if (scratch == NULL) */ 211/* scratch = TMP_SALLOC_LIMBS(mpn_toom8_sqr_itch(n * 8)); */ 212 ASSERT (15 * n + 6 <= mpn_toom8h_mul_itch (an, bn)); 213 ASSERT (15 * n + 6 <= mpn_toom8_sqr_itch (n * 8)); 214 215 /********************** evaluation and recursive calls *********************/ 216 217 /* $\pm1/8$ */ 218 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 3, pp) ^ 219 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 3, pp); 220 /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */ 221 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r7, v2, v3, n + 1, wse); 222 mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3*(1+half), 3*(half)); 223 224 /* $\pm1/4$ */ 225 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^ 226 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp); 227 /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */ 228 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse); 229 mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half)); 230 231 /* $\pm2$ */ 232 sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^ 233 mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp); 234 /* A(-2)*B(-2) */ /* A(+2)*B(+2) */ 235 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse); 236 mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 1, 2); 237 238 /* $\pm8$ */ 239 sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 3, pp) ^ 240 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 3, pp); 241 /* A(-8)*B(-8) */ /* A(+8)*B(+8) */ 242 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse); 243 mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3, 6); 244 245 /* $\pm1/2$ */ 246 sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^ 247 mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp); 248 /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */ 249 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r6, v2, v3, n + 1, wse); 250 mpn_toom_couple_handling (r6, 2 * n + 1, pp, sign, n, 1+half, half); 251 252 /* $\pm1$ */ 253 sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s, pp); 254 if (GMP_NUMB_BITS > 12*3 && UNLIKELY (q == 3)) 255 sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t, pp); 256 else 257 sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t, pp); 258 /* A(-1)*B(-1) */ /* A(1)*B(1) */ 259 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse); 260 mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 0, 0); 261 262 /* $\pm4$ */ 263 sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^ 264 mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp); 265 /* A(-4)*B(-4) */ /* A(+4)*B(+4) */ 266 TOOM8H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse); 267 mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 2, 4); 268 269#undef v0 270#undef v1 271#undef v2 272#undef v3 273#undef wse 274 275 /* A(0)*B(0) */ 276 TOOM8H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi); 277 278 /* Infinity */ 279 if (UNLIKELY (half != 0)) { 280 if (s > t) { 281 TOOM8H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi); 282 } else { 283 TOOM8H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi); 284 }; 285 }; 286 287 mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, s+t, half, wsi); 288 289#undef r0 290#undef r1 291#undef r2 292#undef r3 293#undef r4 294#undef r5 295#undef r6 296#undef wsi 297} 298 299#undef TOOM8H_MUL_N_REC 300#undef TOOM8H_MUL_REC 301#undef MAYBE_mul_basecase 302#undef MAYBE_mul_toom22 303#undef MAYBE_mul_toom33 304#undef MAYBE_mul_toom44 305#undef MAYBE_mul_toom8h 306