1/* mpn_toom_interpolate_5pts -- Interpolate for toom3, 33, 42. 2 3 Contributed to the GNU project by Robert Harley. 4 Improvements by Paul Zimmermann and Marco Bodrato. 5 6 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 7 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 8 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 9 10Copyright 2000-2003, 2005-2007, 2009 Free Software Foundation, Inc. 11 12This file is part of the GNU MP Library. 13 14The GNU MP Library is free software; you can redistribute it and/or modify 15it under the terms of either: 16 17 * the GNU Lesser General Public License as published by the Free 18 Software Foundation; either version 3 of the License, or (at your 19 option) any later version. 20 21or 22 23 * the GNU General Public License as published by the Free Software 24 Foundation; either version 2 of the License, or (at your option) any 25 later version. 26 27or both in parallel, as here. 28 29The GNU MP Library is distributed in the hope that it will be useful, but 30WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 31or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 32for more details. 33 34You should have received copies of the GNU General Public License and the 35GNU Lesser General Public License along with the GNU MP Library. If not, 36see https://www.gnu.org/licenses/. */ 37 38#include "gmp-impl.h" 39 40void 41mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1, 42 mp_size_t k, mp_size_t twor, int sa, 43 mp_limb_t vinf0) 44{ 45 mp_limb_t cy, saved; 46 mp_size_t twok; 47 mp_size_t kk1; 48 mp_ptr c1, v1, c3, vinf; 49 50 twok = k + k; 51 kk1 = twok + 1; 52 53 c1 = c + k; 54 v1 = c1 + k; 55 c3 = v1 + k; 56 vinf = c3 + k; 57 58#define v0 (c) 59 /* (1) v2 <- v2-vm1 < v2+|vm1|, (16 8 4 2 1) - (1 -1 1 -1 1) = 60 thus 0 <= v2 < 50*B^(2k) < 2^6*B^(2k) (15 9 3 3 0) 61 */ 62 if (sa) 63 ASSERT_NOCARRY (mpn_add_n (v2, v2, vm1, kk1)); 64 else 65 ASSERT_NOCARRY (mpn_sub_n (v2, v2, vm1, kk1)); 66 67 /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} 68 v0 v1 hi(vinf) |vm1| v2-vm1 EMPTY */ 69 70 ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1)); /* v2 <- v2 / 3 */ 71 /* (5 3 1 1 0)*/ 72 73 /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} 74 v0 v1 hi(vinf) |vm1| (v2-vm1)/3 EMPTY */ 75 76 /* (2) vm1 <- tm1 := (v1 - vm1) / 2 [(1 1 1 1 1) - (1 -1 1 -1 1)] / 2 = 77 tm1 >= 0 (0 1 0 1 0) 78 No carry comes out from {v1, kk1} +/- {vm1, kk1}, 79 and the division by two is exact. 80 If (sa!=0) the sign of vm1 is negative */ 81 if (sa) 82 { 83#ifdef HAVE_NATIVE_mpn_rsh1add_n 84 mpn_rsh1add_n (vm1, v1, vm1, kk1); 85#else 86 ASSERT_NOCARRY (mpn_add_n (vm1, v1, vm1, kk1)); 87 ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1)); 88#endif 89 } 90 else 91 { 92#ifdef HAVE_NATIVE_mpn_rsh1sub_n 93 mpn_rsh1sub_n (vm1, v1, vm1, kk1); 94#else 95 ASSERT_NOCARRY (mpn_sub_n (vm1, v1, vm1, kk1)); 96 ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1)); 97#endif 98 } 99 100 /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} 101 v0 v1 hi(vinf) tm1 (v2-vm1)/3 EMPTY */ 102 103 /* (3) v1 <- t1 := v1 - v0 (1 1 1 1 1) - (0 0 0 0 1) = (1 1 1 1 0) 104 t1 >= 0 105 */ 106 vinf[0] -= mpn_sub_n (v1, v1, c, twok); 107 108 /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} 109 v0 v1-v0 hi(vinf) tm1 (v2-vm1)/3 EMPTY */ 110 111 /* (4) v2 <- t2 := ((v2-vm1)/3-t1)/2 = (v2-vm1-3*t1)/6 112 t2 >= 0 [(5 3 1 1 0) - (1 1 1 1 0)]/2 = (2 1 0 0 0) 113 */ 114#ifdef HAVE_NATIVE_mpn_rsh1sub_n 115 mpn_rsh1sub_n (v2, v2, v1, kk1); 116#else 117 ASSERT_NOCARRY (mpn_sub_n (v2, v2, v1, kk1)); 118 ASSERT_NOCARRY (mpn_rshift (v2, v2, kk1, 1)); 119#endif 120 121 /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} 122 v0 v1-v0 hi(vinf) tm1 (v2-vm1-3t1)/6 EMPTY */ 123 124 /* (5) v1 <- t1-tm1 (1 1 1 1 0) - (0 1 0 1 0) = (1 0 1 0 0) 125 result is v1 >= 0 126 */ 127 ASSERT_NOCARRY (mpn_sub_n (v1, v1, vm1, kk1)); 128 129 /* We do not need to read the value in vm1, so we add it in {c+k, ...} */ 130 cy = mpn_add_n (c1, c1, vm1, kk1); 131 MPN_INCR_U (c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */ 132 /* Memory allocated for vm1 is now free, it can be recycled ...*/ 133 134 /* (6) v2 <- v2 - 2*vinf, (2 1 0 0 0) - 2*(1 0 0 0 0) = (0 1 0 0 0) 135 result is v2 >= 0 */ 136 saved = vinf[0]; /* Remember v1's highest byte (will be overwritten). */ 137 vinf[0] = vinf0; /* Set the right value for vinf0 */ 138#ifdef HAVE_NATIVE_mpn_sublsh1_n_ip1 139 cy = mpn_sublsh1_n_ip1 (v2, vinf, twor); 140#else 141 /* Overwrite unused vm1 */ 142 cy = mpn_lshift (vm1, vinf, twor, 1); 143 cy += mpn_sub_n (v2, v2, vm1, twor); 144#endif 145 MPN_DECR_U (v2 + twor, kk1 - twor, cy); 146 147 /* Current matrix is 148 [1 0 0 0 0; vinf 149 0 1 0 0 0; v2 150 1 0 1 0 0; v1 151 0 1 0 1 0; vm1 152 0 0 0 0 1] v0 153 Some values already are in-place (we added vm1 in the correct position) 154 | vinf| v1 | v0 | 155 | vm1 | 156 One still is in a separated area 157 | +v2 | 158 We have to compute v1-=vinf; vm1 -= v2, 159 |-vinf| 160 | -v2 | 161 Carefully reordering operations we can avoid to compute twice the sum 162 of the high half of v2 plus the low half of vinf. 163 */ 164 165 /* Add the high half of t2 in {vinf} */ 166 if ( LIKELY(twor > k + 1) ) { /* This is the expected flow */ 167 cy = mpn_add_n (vinf, vinf, v2 + k, k + 1); 168 MPN_INCR_U (c3 + kk1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */ 169 } else { /* triggered only by very unbalanced cases like 170 (k+k+(k-2))x(k+k+1) , should be handled by toom32 */ 171 ASSERT_NOCARRY (mpn_add_n (vinf, vinf, v2 + k, twor)); 172 } 173 /* (7) v1 <- v1 - vinf, (1 0 1 0 0) - (1 0 0 0 0) = (0 0 1 0 0) 174 result is >= 0 */ 175 /* Side effect: we also subtracted (high half) vm1 -= v2 */ 176 cy = mpn_sub_n (v1, v1, vinf, twor); /* vinf is at most twor long. */ 177 vinf0 = vinf[0]; /* Save again the right value for vinf0 */ 178 vinf[0] = saved; 179 MPN_DECR_U (v1 + twor, kk1 - twor, cy); /* Treat the last bytes. */ 180 181 /* (8) vm1 <- vm1-v2 (0 1 0 1 0) - (0 1 0 0 0) = (0 0 0 1 0) 182 Operate only on the low half. 183 */ 184 cy = mpn_sub_n (c1, c1, v2, k); 185 MPN_DECR_U (v1, kk1, cy); 186 187 /********************* Beginning the final phase **********************/ 188 189 /* Most of the recomposition was done */ 190 191 /* add t2 in {c+3k, ...}, but only the low half */ 192 cy = mpn_add_n (c3, c3, v2, k); 193 vinf[0] += cy; 194 ASSERT(vinf[0] >= cy); /* No carry */ 195 MPN_INCR_U (vinf, twor, vinf0); /* Add vinf0, propagate carry. */ 196 197#undef v0 198} 199