1/* mpn_toom_interpolate_6pts -- Interpolate for toom43, 52 2 3 Contributed to the GNU project by Marco Bodrato. 4 5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 8 9Copyright 2009, 2010 Free Software Foundation, Inc. 10 11This file is part of the GNU MP Library. 12 13The GNU MP Library is free software; you can redistribute it and/or modify 14it under the terms of the GNU Lesser General Public License as published by 15the Free Software Foundation; either version 3 of the License, or (at your 16option) any later version. 17 18The GNU MP Library is distributed in the hope that it will be useful, but 19WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 20or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 21License for more details. 22 23You should have received a copy of the GNU Lesser General Public License 24along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 25 26#include "gmp.h" 27#include "gmp-impl.h" 28 29/* For odd divisors, mpn_divexact_1 works fine with two's complement. */ 30#ifndef mpn_divexact_by3 31#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && MODLIMB_INVERSE_3 32#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,MODLIMB_INVERSE_3,0) 33#else 34#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3) 35#endif 36#endif 37 38/* Interpolation for Toom-3.5, using the evaluation points: infinity, 39 1, -1, 2, -2. More precisely, we want to compute 40 f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 5, given the 41 six values 42 43 w5 = f(0), 44 w4 = f(-1), 45 w3 = f(1) 46 w2 = f(-2), 47 w1 = f(2), 48 w0 = limit at infinity of f(x) / x^5, 49 50 The result is stored in {pp, 5*n + w0n}. At entry, w5 is stored at 51 {pp, 2n}, w3 is stored at {pp + 2n, 2n+1}, and w0 is stored at 52 {pp + 5n, w0n}. The other values are 2n + 1 limbs each (with most 53 significant limbs small). f(-1) and f(-2) may be negative, signs 54 determined by the flag bits. All intermediate results are positive. 55 Inputs are destroyed. 56 57 Interpolation sequence was taken from the paper: "Integer and 58 Polynomial Multiplication: Towards Optimal Toom-Cook Matrices". 59 Some slight variations were introduced: adaptation to "gmp 60 instruction set", and a final saving of an operation by interlacing 61 interpolation and recomposition phases. 62*/ 63 64void 65mpn_toom_interpolate_6pts (mp_ptr pp, mp_size_t n, enum toom6_flags flags, 66 mp_ptr w4, mp_ptr w2, mp_ptr w1, 67 mp_size_t w0n) 68{ 69 mp_limb_t cy; 70 /* cy6 can be stored in w1[2*n], cy4 in w4[0], embankment in w2[0] */ 71 mp_limb_t cy4, cy6, embankment; 72 73 ASSERT( n > 0 ); 74 ASSERT( 2*n >= w0n && w0n > 0 ); 75 76#define w5 pp /* 2n */ 77#define w3 (pp + 2 * n) /* 2n+1 */ 78#define w0 (pp + 5 * n) /* w0n */ 79 80 /* Interpolate with sequence: 81 W2 =(W1 - W2)>>2 82 W1 =(W1 - W5)>>1 83 W1 =(W1 - W2)>>1 84 W4 =(W3 - W4)>>1 85 W2 =(W2 - W4)/3 86 W3 = W3 - W4 - W5 87 W1 =(W1 - W3)/3 88 // Last steps are mixed with recomposition... 89 W2 = W2 - W0<<2 90 W4 = W4 - W2 91 W3 = W3 - W1 92 W2 = W2 - W0 93 */ 94 95 /* W2 =(W1 - W2)>>2 */ 96 if (flags & toom6_vm2_neg) 97 mpn_add_n (w2, w1, w2, 2 * n + 1); 98 else 99 mpn_sub_n (w2, w1, w2, 2 * n + 1); 100 mpn_rshift (w2, w2, 2 * n + 1, 2); 101 102 /* W1 =(W1 - W5)>>1 */ 103 w1[2*n] -= mpn_sub_n (w1, w1, w5, 2*n); 104 mpn_rshift (w1, w1, 2 * n + 1, 1); 105 106 /* W1 =(W1 - W2)>>1 */ 107#if HAVE_NATIVE_mpn_rsh1sub_n 108 mpn_rsh1sub_n (w1, w1, w2, 2 * n + 1); 109#else 110 mpn_sub_n (w1, w1, w2, 2 * n + 1); 111 mpn_rshift (w1, w1, 2 * n + 1, 1); 112#endif 113 114 /* W4 =(W3 - W4)>>1 */ 115 if (flags & toom6_vm1_neg) 116 { 117#if HAVE_NATIVE_mpn_rsh1add_n 118 mpn_rsh1add_n (w4, w3, w4, 2 * n + 1); 119#else 120 mpn_add_n (w4, w3, w4, 2 * n + 1); 121 mpn_rshift (w4, w4, 2 * n + 1, 1); 122#endif 123 } 124 else 125 { 126#if HAVE_NATIVE_mpn_rsh1sub_n 127 mpn_rsh1sub_n (w4, w3, w4, 2 * n + 1); 128#else 129 mpn_sub_n (w4, w3, w4, 2 * n + 1); 130 mpn_rshift (w4, w4, 2 * n + 1, 1); 131#endif 132 } 133 134 /* W2 =(W2 - W4)/3 */ 135 mpn_sub_n (w2, w2, w4, 2 * n + 1); 136 mpn_divexact_by3 (w2, w2, 2 * n + 1); 137 138 /* W3 = W3 - W4 - W5 */ 139 mpn_sub_n (w3, w3, w4, 2 * n + 1); 140 w3[2 * n] -= mpn_sub_n (w3, w3, w5, 2 * n); 141 142 /* W1 =(W1 - W3)/3 */ 143 mpn_sub_n (w1, w1, w3, 2 * n + 1); 144 mpn_divexact_by3 (w1, w1, 2 * n + 1); 145 146 /* 147 [1 0 0 0 0 0; 148 0 1 0 0 0 0; 149 1 0 1 0 0 0; 150 0 1 0 1 0 0; 151 1 0 1 0 1 0; 152 0 0 0 0 0 1] 153 154 pp[] prior to operations: 155 |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__| 156 157 summation scheme for remaining operations: 158 |______________5|n_____4|n_____3|n_____2|n______|n______|pp 159 |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__| 160 || H w4 | L w4 | 161 || H w2 | L w2 | 162 || H w1 | L w1 | 163 ||-H w1 |-L w1 | 164 |-H w0 |-L w0 ||-H w2 |-L w2 | 165 */ 166 cy = mpn_add_n (pp + n, pp + n, w4, 2 * n + 1); 167 MPN_INCR_U (pp + 3 * n + 1, n, cy); 168 169 /* W2 -= W0<<2 */ 170#if HAVE_NATIVE_mpn_sublsh_n || HAVE_NATIVE_mpn_sublsh2_n 171#if HAVE_NATIVE_mpn_sublsh2_n 172 cy = mpn_sublsh2_n(w2, w2, w0, w0n); 173#else 174 cy = mpn_sublsh_n(w2, w2, w0, w0n, 2); 175#endif 176#else 177 /* {W4,2*n+1} is now free and can be overwritten. */ 178 cy = mpn_lshift(w4, w0, w0n, 2); 179 cy+= mpn_sub_n(w2, w2, w4, w0n); 180#endif 181 MPN_DECR_U (w2 + w0n, 2 * n + 1 - w0n, cy); 182 183 /* W4L = W4L - W2L */ 184 cy = mpn_sub_n (pp + n, pp + n, w2, n); 185 MPN_DECR_U (w3, 2 * n + 1, cy); 186 187 /* W3H = W3H + W2L */ 188 cy4 = w3[2 * n] + mpn_add_n (pp + 3 * n, pp + 3 * n, w2, n); 189 /* W1L + W2H */ 190 cy = w2[2 * n] + mpn_add_n (pp + 4 * n, w1, w2 + n, n); 191 MPN_INCR_U (w1 + n, n + 1, cy); 192 193 /* W0 = W0 + W1H */ 194 if (LIKELY (w0n > n)) 195 cy6 = w1[2 * n] + mpn_add_n (w0, w0, w1 + n, n); 196 else 197 cy6 = mpn_add_n (w0, w0, w1 + n, w0n); 198 199 /* 200 summation scheme for the next operation: 201 |...____5|n_____4|n_____3|n_____2|n______|n______|pp 202 |...w0___|_w1_w2_|_H w3__|_L w3__|_H w5__|_L w5__| 203 ...-w0___|-w1_w2 | 204 */ 205 /* if(LIKELY(w0n>n)) the two operands below DO overlap! */ 206 cy = mpn_sub_n (pp + 2 * n, pp + 2 * n, pp + 4 * n, n + w0n); 207 208 /* embankment is a "dirty trick" to avoid carry/borrow propagation 209 beyond allocated memory */ 210 embankment = w0[w0n - 1] - 1; 211 w0[w0n - 1] = 1; 212 if (LIKELY (w0n > n)) { 213 if ( LIKELY(cy4 > cy6) ) 214 MPN_INCR_U (pp + 4 * n, w0n + n, cy4 - cy6); 215 else 216 MPN_DECR_U (pp + 4 * n, w0n + n, cy6 - cy4); 217 MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy); 218 MPN_INCR_U (w0 + n, w0n - n, cy6); 219 } else { 220 MPN_INCR_U (pp + 4 * n, w0n + n, cy4); 221 MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy + cy6); 222 } 223 w0[w0n - 1] += embankment; 224 225#undef w5 226#undef w3 227#undef w0 228 229} 230