1/* mpn_sqrlo_basecase -- Internal routine to square a natural number 2 of length n. 3 4 THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY 5 SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES. 6 7 8Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011, 2015, 92016 Free Software Foundation, Inc. 10 11This file is part of the GNU MP Library. 12 13The GNU MP Library is free software; you can redistribute it and/or modify 14it under the terms of either: 15 16 * the GNU Lesser General Public License as published by the Free 17 Software Foundation; either version 3 of the License, or (at your 18 option) any later version. 19 20or 21 22 * the GNU General Public License as published by the Free Software 23 Foundation; either version 2 of the License, or (at your option) any 24 later version. 25 26or both in parallel, as here. 27 28The GNU MP Library is distributed in the hope that it will be useful, but 29WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 30or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 31for more details. 32 33You should have received copies of the GNU General Public License and the 34GNU Lesser General Public License along with the GNU MP Library. If not, 35see https://www.gnu.org/licenses/. */ 36 37#include "gmp-impl.h" 38#include "longlong.h" 39 40#ifndef SQRLO_SHORTCUT_MULTIPLICATIONS 41#if HAVE_NATIVE_mpn_addmul_1 42#define SQRLO_SHORTCUT_MULTIPLICATIONS 0 43#else 44#define SQRLO_SHORTCUT_MULTIPLICATIONS 1 45#endif 46#endif 47 48#if HAVE_NATIVE_mpn_sqr_diagonal 49#define MPN_SQR_DIAGONAL(rp, up, n) \ 50 mpn_sqr_diagonal (rp, up, n) 51#else 52#define MPN_SQR_DIAGONAL(rp, up, n) \ 53 do { \ 54 mp_size_t _i; \ 55 for (_i = 0; _i < (n); _i++) \ 56 { \ 57 mp_limb_t ul, lpl; \ 58 ul = (up)[_i]; \ 59 umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS); \ 60 (rp)[2 * _i] = lpl >> GMP_NAIL_BITS; \ 61 } \ 62 } while (0) 63#endif 64 65#define MPN_SQRLO_DIAGONAL(rp, up, n) \ 66 do { \ 67 mp_size_t nhalf; \ 68 nhalf = (n) >> 1; \ 69 MPN_SQR_DIAGONAL ((rp), (up), nhalf); \ 70 if (((n) & 1) != 0) \ 71 { \ 72 mp_limb_t op; \ 73 op = (up)[nhalf]; \ 74 (rp)[(n) - 1] = (op * op) & GMP_NUMB_MASK; \ 75 } \ 76 } while (0) 77 78#if HAVE_NATIVE_mpn_addlsh1_n_ip1 79#define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n) \ 80 do { \ 81 MPN_SQRLO_DIAGONAL((rp), (up), (n)); \ 82 mpn_addlsh1_n_ip1 ((rp) + 1, (tp), (n) - 1); \ 83 } while (0) 84#else 85#define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n) \ 86 do { \ 87 MPN_SQRLO_DIAGONAL((rp), (up), (n)); \ 88 mpn_lshift ((tp), (tp), (n) - 1, 1); \ 89 mpn_add_n ((rp) + 1, (rp) + 1, (tp), (n) - 1); \ 90 } while (0) 91#endif 92 93/* Avoid zero allocations when SQRLO_LO_THRESHOLD is 0 (this code not used). */ 94#define SQRLO_BASECASE_ALLOC \ 95 (SQRLO_DC_THRESHOLD_LIMIT < 2 ? 1 : SQRLO_DC_THRESHOLD_LIMIT - 1) 96 97/* Default mpn_sqrlo_basecase using mpn_addmul_1. */ 98#ifndef SQRLO_SPECIAL_CASES 99#define SQRLO_SPECIAL_CASES 2 100#endif 101 102#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY 103#define MAYBE_special_cases 1 104#else 105#define MAYBE_special_cases \ 106 ((SQRLO_BASECASE_THRESHOLD <= SQRLO_SPECIAL_CASES) && (SQRLO_DC_THRESHOLD != 0)) 107#endif 108 109void 110mpn_sqrlo_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n) 111{ 112 mp_limb_t ul; 113 114 ASSERT (n >= 1); 115 ASSERT (! MPN_OVERLAP_P (rp, n, up, n)); 116 117 ul = up[0]; 118 119 if (MAYBE_special_cases && n <= SQRLO_SPECIAL_CASES) 120 { 121#if SQRLO_SPECIAL_CASES == 1 122 rp[0] = (ul * ul) & GMP_NUMB_MASK; 123#else 124 if (n == 1) 125 rp[0] = (ul * ul) & GMP_NUMB_MASK; 126 else 127 { 128 mp_limb_t hi, lo, ul1; 129 umul_ppmm (hi, lo, ul, ul << GMP_NAIL_BITS); 130 rp[0] = lo >> GMP_NAIL_BITS; 131 ul1 = up[1]; 132#if SQRLO_SPECIAL_CASES == 2 133 rp[1] = (hi + ul * ul1 * 2) & GMP_NUMB_MASK; 134#else 135 if (n == 2) 136 rp[1] = (hi + ul * ul1 * 2) & GMP_NUMB_MASK; 137 else 138 { 139 mp_limb_t hi1; 140#if GMP_NAIL_BITS != 0 141 ul <<= 1; 142#endif 143 umul_ppmm (hi1, lo, ul1 << GMP_NAIL_BITS, ul); 144 hi1 += ul * up[2]; 145#if GMP_NAIL_BITS == 0 146 hi1 = (hi1 << 1) | (lo >> (GMP_LIMB_BITS - 1)); 147 add_ssaaaa(rp[2], rp[1], hi1, lo << 1, ul1 * ul1, hi); 148#else 149 hi += lo >> GMP_NAIL_BITS; 150 rp[1] = hi & GMP_NUMB_MASK; 151 rp[2] = (hi1 + ul1 * ul1 + (hi >> GMP_NUMB_BITS)) & GMP_NUMB_MASK; 152#endif 153 } 154#endif 155 } 156#endif 157 } 158 else 159 { 160 mp_limb_t tp[SQRLO_BASECASE_ALLOC]; 161 mp_size_t i; 162 163 /* must fit n-1 limbs in tp */ 164 ASSERT (n <= SQRLO_DC_THRESHOLD_LIMIT); 165 166 --n; 167#if SQRLO_SHORTCUT_MULTIPLICATIONS 168 { 169 mp_limb_t cy; 170 171 cy = ul * up[n] + mpn_mul_1 (tp, up + 1, n - 1, ul); 172 for (i = 1; 2 * i + 1 < n; ++i) 173 { 174 ul = up[i]; 175 cy += ul * up[n - i] + mpn_addmul_1 (tp + 2 * i, up + i + 1, n - 2 * i - 1, ul); 176 } 177 tp [n-1] = (cy + ((n & 1)?up[i] * up[i + 1]:0)) & GMP_NUMB_MASK; 178 } 179#else 180 mpn_mul_1 (tp, up + 1, n, ul); 181 for (i = 1; 2 * i < n; ++i) 182 mpn_addmul_1 (tp + 2 * i, up + i + 1, n - 2 * i, up[i]); 183#endif 184 185 MPN_SQRLO_DIAG_ADDLSH1 (rp, tp, up, n + 1); 186 } 187} 188#undef SQRLO_SPECIAL_CASES 189#undef MAYBE_special_cases 190#undef SQRLO_BASECASE_ALLOC 191#undef SQRLO_SHORTCUT_MULTIPLICATIONS 192#undef MPN_SQR_DIAGONAL 193#undef MPN_SQRLO_DIAGONAL 194#undef MPN_SQRLO_DIAG_ADDLSH1 195