1/* mpn_sqr_basecase -- Internal routine to square a natural number 2 of length n. 3 4 THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY 5 SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES. 6 7 8Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011, 2017 Free 9Software Foundation, Inc. 10 11This file is part of the GNU MP Library. 12 13The GNU MP Library is free software; you can redistribute it and/or modify 14it under the terms of either: 15 16 * the GNU Lesser General Public License as published by the Free 17 Software Foundation; either version 3 of the License, or (at your 18 option) any later version. 19 20or 21 22 * the GNU General Public License as published by the Free Software 23 Foundation; either version 2 of the License, or (at your option) any 24 later version. 25 26or both in parallel, as here. 27 28The GNU MP Library is distributed in the hope that it will be useful, but 29WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 30or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 31for more details. 32 33You should have received copies of the GNU General Public License and the 34GNU Lesser General Public License along with the GNU MP Library. If not, 35see https://www.gnu.org/licenses/. */ 36 37#include "gmp-impl.h" 38#include "longlong.h" 39 40 41#if HAVE_NATIVE_mpn_sqr_diagonal 42#define MPN_SQR_DIAGONAL(rp, up, n) \ 43 mpn_sqr_diagonal (rp, up, n) 44#else 45#define MPN_SQR_DIAGONAL(rp, up, n) \ 46 do { \ 47 mp_size_t _i; \ 48 for (_i = 0; _i < (n); _i++) \ 49 { \ 50 mp_limb_t ul, lpl; \ 51 ul = (up)[_i]; \ 52 umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS); \ 53 (rp)[2 * _i] = lpl >> GMP_NAIL_BITS; \ 54 } \ 55 } while (0) 56#endif 57 58#if HAVE_NATIVE_mpn_sqr_diag_addlsh1 59#define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \ 60 mpn_sqr_diag_addlsh1 (rp, tp, up, n) 61#else 62#if HAVE_NATIVE_mpn_addlsh1_n 63#define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \ 64 do { \ 65 mp_limb_t cy; \ 66 MPN_SQR_DIAGONAL (rp, up, n); \ 67 cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2); \ 68 rp[2 * n - 1] += cy; \ 69 } while (0) 70#else 71#define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \ 72 do { \ 73 mp_limb_t cy; \ 74 MPN_SQR_DIAGONAL (rp, up, n); \ 75 cy = mpn_lshift (tp, tp, 2 * n - 2, 1); \ 76 cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2); \ 77 rp[2 * n - 1] += cy; \ 78 } while (0) 79#endif 80#endif 81 82 83#undef READY_WITH_mpn_sqr_basecase 84 85 86#if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2s 87void 88mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n) 89{ 90 mp_size_t i; 91 mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD]; 92 mp_ptr tp = tarr; 93 mp_limb_t cy; 94 95 /* must fit 2*n limbs in tarr */ 96 ASSERT (n <= SQR_TOOM2_THRESHOLD); 97 98 if ((n & 1) != 0) 99 { 100 if (n == 1) 101 { 102 mp_limb_t ul, lpl; 103 ul = up[0]; 104 umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS); 105 rp[0] = lpl >> GMP_NAIL_BITS; 106 return; 107 } 108 109 MPN_ZERO (tp, n); 110 111 for (i = 0; i <= n - 2; i += 2) 112 { 113 cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i); 114 tp[n + i] = cy; 115 } 116 } 117 else 118 { 119 if (n == 2) 120 { 121#if HAVE_NATIVE_mpn_mul_2 122 rp[3] = mpn_mul_2 (rp, up, 2, up); 123#else 124 rp[0] = 0; 125 rp[1] = 0; 126 rp[3] = mpn_addmul_2 (rp, up, 2, up); 127#endif 128 return; 129 } 130 131 MPN_ZERO (tp, n); 132 133 for (i = 0; i <= n - 4; i += 2) 134 { 135 cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i); 136 tp[n + i] = cy; 137 } 138 cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]); 139 tp[2 * n - 3] = cy; 140 } 141 142 MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n); 143} 144#define READY_WITH_mpn_sqr_basecase 145#endif 146 147 148#if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2 149 150/* mpn_sqr_basecase using plain mpn_addmul_2. 151 152 This is tricky, since we have to let mpn_addmul_2 make some undesirable 153 multiplies, u[k]*u[k], that we would like to let mpn_sqr_diagonal handle. 154 This forces us to conditionally add or subtract the mpn_sqr_diagonal 155 results. Examples of the product we form: 156 157 n = 4 n = 5 n = 6 158 u1u0 * u3u2u1 u1u0 * u4u3u2u1 u1u0 * u5u4u3u2u1 159 u2 * u3 u3u2 * u4u3 u3u2 * u5u4u3 160 u4 * u5 161 add: u0 u2 u3 add: u0 u2 u4 add: u0 u2 u4 u5 162 sub: u1 sub: u1 u3 sub: u1 u3 163*/ 164 165void 166mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n) 167{ 168 mp_size_t i; 169 mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD]; 170 mp_ptr tp = tarr; 171 mp_limb_t cy; 172 173 /* must fit 2*n limbs in tarr */ 174 ASSERT (n <= SQR_TOOM2_THRESHOLD); 175 176 if ((n & 1) != 0) 177 { 178 mp_limb_t x0, x1; 179 180 if (n == 1) 181 { 182 mp_limb_t ul, lpl; 183 ul = up[0]; 184 umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS); 185 rp[0] = lpl >> GMP_NAIL_BITS; 186 return; 187 } 188 189 /* The code below doesn't like unnormalized operands. Since such 190 operands are unusual, handle them with a dumb recursion. */ 191 if (up[n - 1] == 0) 192 { 193 rp[2 * n - 2] = 0; 194 rp[2 * n - 1] = 0; 195 mpn_sqr_basecase (rp, up, n - 1); 196 return; 197 } 198 199 MPN_ZERO (tp, n); 200 201 for (i = 0; i <= n - 2; i += 2) 202 { 203 cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i); 204 tp[n + i] = cy; 205 } 206 207 MPN_SQR_DIAGONAL (rp, up, n); 208 209 for (i = 2;; i += 4) 210 { 211 x0 = rp[i + 0]; 212 rp[i + 0] = (-x0) & GMP_NUMB_MASK; 213 x1 = rp[i + 1]; 214 rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK; 215 __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0); 216 if (i + 4 >= 2 * n) 217 break; 218 mpn_incr_u (rp + i + 4, cy); 219 } 220 } 221 else 222 { 223 mp_limb_t x0, x1; 224 225 if (n == 2) 226 { 227#if HAVE_NATIVE_mpn_mul_2 228 rp[3] = mpn_mul_2 (rp, up, 2, up); 229#else 230 rp[0] = 0; 231 rp[1] = 0; 232 rp[3] = mpn_addmul_2 (rp, up, 2, up); 233#endif 234 return; 235 } 236 237 /* The code below doesn't like unnormalized operands. Since such 238 operands are unusual, handle them with a dumb recursion. */ 239 if (up[n - 1] == 0) 240 { 241 rp[2 * n - 2] = 0; 242 rp[2 * n - 1] = 0; 243 mpn_sqr_basecase (rp, up, n - 1); 244 return; 245 } 246 247 MPN_ZERO (tp, n); 248 249 for (i = 0; i <= n - 4; i += 2) 250 { 251 cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i); 252 tp[n + i] = cy; 253 } 254 cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]); 255 tp[2 * n - 3] = cy; 256 257 MPN_SQR_DIAGONAL (rp, up, n); 258 259 for (i = 2;; i += 4) 260 { 261 x0 = rp[i + 0]; 262 rp[i + 0] = (-x0) & GMP_NUMB_MASK; 263 x1 = rp[i + 1]; 264 rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK; 265 if (i + 6 >= 2 * n) 266 break; 267 __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0); 268 mpn_incr_u (rp + i + 4, cy); 269 } 270 mpn_decr_u (rp + i + 2, (x1 | x0) != 0); 271 } 272 273#if HAVE_NATIVE_mpn_addlsh1_n 274 cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2); 275#else 276 cy = mpn_lshift (tp, tp, 2 * n - 2, 1); 277 cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2); 278#endif 279 rp[2 * n - 1] += cy; 280} 281#define READY_WITH_mpn_sqr_basecase 282#endif 283 284 285#if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_sqr_diag_addlsh1 286 287/* mpn_sqr_basecase using mpn_addmul_1 and mpn_sqr_diag_addlsh1, avoiding stack 288 allocation. */ 289void 290mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n) 291{ 292 if (n == 1) 293 { 294 mp_limb_t ul, lpl; 295 ul = up[0]; 296 umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS); 297 rp[0] = lpl >> GMP_NAIL_BITS; 298 } 299 else 300 { 301 mp_size_t i; 302 mp_ptr xp; 303 304 rp += 1; 305 rp[n - 1] = mpn_mul_1 (rp, up + 1, n - 1, up[0]); 306 for (i = n - 2; i != 0; i--) 307 { 308 up += 1; 309 rp += 2; 310 rp[i] = mpn_addmul_1 (rp, up + 1, i, up[0]); 311 } 312 313 xp = rp - 2 * n + 3; 314 mpn_sqr_diag_addlsh1 (xp, xp + 1, up - n + 2, n); 315 } 316} 317#define READY_WITH_mpn_sqr_basecase 318#endif 319 320 321#if ! defined (READY_WITH_mpn_sqr_basecase) 322 323/* Default mpn_sqr_basecase using mpn_addmul_1. */ 324void 325mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n) 326{ 327 mp_size_t i; 328 329 ASSERT (n >= 1); 330 ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n)); 331 332 if (n == 1) 333 { 334 mp_limb_t ul, lpl; 335 ul = up[0]; 336 umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS); 337 rp[0] = lpl >> GMP_NAIL_BITS; 338 } 339 else 340 { 341 mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD]; 342 mp_ptr tp = tarr; 343 mp_limb_t cy; 344 345 /* must fit 2*n limbs in tarr */ 346 ASSERT (n <= SQR_TOOM2_THRESHOLD); 347 348 cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]); 349 tp[n - 1] = cy; 350 for (i = 2; i < n; i++) 351 { 352 mp_limb_t cy; 353 cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]); 354 tp[n + i - 2] = cy; 355 } 356 357 MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n); 358 } 359} 360#define READY_WITH_mpn_sqr_basecase 361#endif 362