1/* Implementation of the squaring algorithm with Toom-Cook 6.5-way. 2 3 Contributed to the GNU project by Marco Bodrato. 4 5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 8 9Copyright 2009 Free Software Foundation, Inc. 10 11This file is part of the GNU MP Library. 12 13The GNU MP Library is free software; you can redistribute it and/or modify 14it under the terms of the GNU Lesser General Public License as published by 15the Free Software Foundation; either version 3 of the License, or (at your 16option) any later version. 17 18The GNU MP Library is distributed in the hope that it will be useful, but 19WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 20or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 21License for more details. 22 23You should have received a copy of the GNU Lesser General Public License 24along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 25 26 27#include "gmp.h" 28#include "gmp-impl.h" 29 30 31#if GMP_NUMB_BITS < 21 32#error Not implemented. 33#endif 34 35 36#if TUNE_PROGRAM_BUILD 37#define MAYBE_sqr_basecase 1 38#define MAYBE_sqr_above_basecase 1 39#define MAYBE_sqr_toom2 1 40#define MAYBE_sqr_above_toom2 1 41#define MAYBE_sqr_toom3 1 42#define MAYBE_sqr_above_toom3 1 43#define MAYBE_sqr_above_toom4 1 44#else 45#ifdef SQR_TOOM8_THRESHOLD 46#define SQR_TOOM6_MAX ((SQR_TOOM8_THRESHOLD+6*2-1+5)/6) 47#else 48#define SQR_TOOM6_MAX \ 49 ((SQR_FFT_THRESHOLD <= MP_SIZE_T_MAX - (6*2-1+5)) ? \ 50 ((SQR_FFT_THRESHOLD+6*2-1+5)/6) \ 51 : MP_SIZE_T_MAX ) 52#endif 53#define MAYBE_sqr_basecase \ 54 (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM2_THRESHOLD) 55#define MAYBE_sqr_above_basecase \ 56 (SQR_TOOM6_MAX >= SQR_TOOM2_THRESHOLD) 57#define MAYBE_sqr_toom2 \ 58 (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM3_THRESHOLD) 59#define MAYBE_sqr_above_toom2 \ 60 (SQR_TOOM6_MAX >= SQR_TOOM3_THRESHOLD) 61#define MAYBE_sqr_toom3 \ 62 (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM4_THRESHOLD) 63#define MAYBE_sqr_above_toom3 \ 64 (SQR_TOOM6_MAX >= SQR_TOOM4_THRESHOLD) 65#define MAYBE_sqr_above_toom4 \ 66 (SQR_TOOM6_MAX >= SQR_TOOM6_THRESHOLD) 67#endif 68 69#define TOOM6_SQR_REC(p, a, n, ws) \ 70 do { \ 71 if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase \ 72 || BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))) \ 73 mpn_sqr_basecase (p, a, n); \ 74 else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2 \ 75 || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))) \ 76 mpn_toom2_sqr (p, a, n, ws); \ 77 else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3 \ 78 || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))) \ 79 mpn_toom3_sqr (p, a, n, ws); \ 80 else if (! MAYBE_sqr_above_toom4 \ 81 || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD)) \ 82 mpn_toom4_sqr (p, a, n, ws); \ 83 else \ 84 mpn_toom6_sqr (p, a, n, ws); \ 85 } while (0) 86 87void 88mpn_toom6_sqr (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch) 89{ 90 mp_size_t n, s; 91 92 /***************************** decomposition *******************************/ 93 94 ASSERT( an >= 18 ); 95 96 n = 1 + (an - 1) / (size_t) 6; 97 98 s = an - 5 * n; 99 100 ASSERT (0 < s && s <= n); 101 102#define r4 (pp + 3 * n) /* 3n+1 */ 103#define r2 (pp + 7 * n) /* 3n+1 */ 104#define r0 (pp +11 * n) /* s+t <= 2*n */ 105#define r5 (scratch) /* 3n+1 */ 106#define r3 (scratch + 3 * n + 1) /* 3n+1 */ 107#define r1 (scratch + 6 * n + 2) /* 3n+1 */ 108#define v0 (pp + 7 * n) /* n+1 */ 109#define v2 (pp + 9 * n+2) /* n+1 */ 110#define wse (scratch + 9 * n + 3) /* 3n+1 */ 111 112 /* Alloc also 3n+1 limbs for ws... toom_interpolate_12pts may 113 need all of them, when DO_mpn_sublsh_n usea a scratch */ 114/* if (scratch== NULL) */ 115/* scratch = TMP_SALLOC_LIMBS (12 * n + 6); */ 116 117 /********************** evaluation and recursive calls *********************/ 118 /* $\pm1/2$ */ 119 mpn_toom_eval_pm2rexp (v2, v0, 5, ap, n, s, 1, pp); 120 TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */ 121 TOOM6_SQR_REC(r5, v2, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */ 122 mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 1, 0); 123 124 /* $\pm1$ */ 125 mpn_toom_eval_pm1 (v2, v0, 5, ap, n, s, pp); 126 TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1)*B(-1) */ 127 TOOM6_SQR_REC(r3, v2, n + 1, wse); /* A(1)*B(1) */ 128 mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 0, 0); 129 130 /* $\pm4$ */ 131 mpn_toom_eval_pm2exp (v2, v0, 5, ap, n, s, 2, pp); 132 TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-4)*B(-4) */ 133 TOOM6_SQR_REC(r1, v2, n + 1, wse); /* A(+4)*B(+4) */ 134 mpn_toom_couple_handling (r1, 2 * n + 1, pp, 0, n, 2, 4); 135 136 /* $\pm1/4$ */ 137 mpn_toom_eval_pm2rexp (v2, v0, 5, ap, n, s, 2, pp); 138 TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */ 139 TOOM6_SQR_REC(r4, v2, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */ 140 mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 2, 0); 141 142 /* $\pm2$ */ 143 mpn_toom_eval_pm2 (v2, v0, 5, ap, n, s, pp); 144 TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-2)*B(-2) */ 145 TOOM6_SQR_REC(r2, v2, n + 1, wse); /* A(+2)*B(+2) */ 146 mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 1, 2); 147 148#undef v0 149#undef v2 150 151 /* A(0)*B(0) */ 152 TOOM6_SQR_REC(pp, ap, n, wse); 153 154 mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, 2 * s, 0, wse); 155 156#undef r0 157#undef r1 158#undef r2 159#undef r3 160#undef r4 161#undef r5 162 163} 164#undef TOOM6_SQR_REC 165#undef MAYBE_sqr_basecase 166#undef MAYBE_sqr_above_basecase 167#undef MAYBE_sqr_toom2 168#undef MAYBE_sqr_above_toom2 169#undef MAYBE_sqr_toom3 170#undef MAYBE_sqr_above_toom3 171#undef MAYBE_sqr_above_toom4 172