1/* mpn_mod_1s_4p (ap, n, b, cps) 2 Divide (ap,,n) by b. Return the single-limb remainder. 3 Requires that b < B / 4. 4 5 Contributed to the GNU project by Torbjorn Granlund. 6 Based on a suggestion by Peter L. Montgomery. 7 8 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY 9 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 10 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 11 12Copyright 2008-2010 Free Software Foundation, Inc. 13 14This file is part of the GNU MP Library. 15 16The GNU MP Library is free software; you can redistribute it and/or modify 17it under the terms of either: 18 19 * the GNU Lesser General Public License as published by the Free 20 Software Foundation; either version 3 of the License, or (at your 21 option) any later version. 22 23or 24 25 * the GNU General Public License as published by the Free Software 26 Foundation; either version 2 of the License, or (at your option) any 27 later version. 28 29or both in parallel, as here. 30 31The GNU MP Library is distributed in the hope that it will be useful, but 32WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 33or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 34for more details. 35 36You should have received copies of the GNU General Public License and the 37GNU Lesser General Public License along with the GNU MP Library. If not, 38see https://www.gnu.org/licenses/. */ 39 40#include "gmp-impl.h" 41#include "longlong.h" 42 43void 44mpn_mod_1s_4p_cps (mp_limb_t cps[7], mp_limb_t b) 45{ 46 mp_limb_t bi; 47 mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb; 48 int cnt; 49 50 ASSERT (b <= (~(mp_limb_t) 0) / 4); 51 52 count_leading_zeros (cnt, b); 53 54 b <<= cnt; 55 invert_limb (bi, b); 56 57 cps[0] = bi; 58 cps[1] = cnt; 59 60 B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt)); 61 ASSERT (B1modb <= b); /* NB: not fully reduced mod b */ 62 cps[2] = B1modb >> cnt; 63 64 udiv_rnnd_preinv (B2modb, B1modb, CNST_LIMB(0), b, bi); 65 cps[3] = B2modb >> cnt; 66 67 udiv_rnnd_preinv (B3modb, B2modb, CNST_LIMB(0), b, bi); 68 cps[4] = B3modb >> cnt; 69 70 udiv_rnnd_preinv (B4modb, B3modb, CNST_LIMB(0), b, bi); 71 cps[5] = B4modb >> cnt; 72 73 udiv_rnnd_preinv (B5modb, B4modb, CNST_LIMB(0), b, bi); 74 cps[6] = B5modb >> cnt; 75 76#if WANT_ASSERT 77 { 78 int i; 79 b = cps[2]; 80 for (i = 3; i <= 6; i++) 81 { 82 b += cps[i]; 83 ASSERT (b >= cps[i]); 84 } 85 } 86#endif 87} 88 89mp_limb_t 90mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, const mp_limb_t cps[7]) 91{ 92 mp_limb_t rh, rl, bi, ph, pl, ch, cl, r; 93 mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb; 94 mp_size_t i; 95 int cnt; 96 97 ASSERT (n >= 1); 98 99 B1modb = cps[2]; 100 B2modb = cps[3]; 101 B3modb = cps[4]; 102 B4modb = cps[5]; 103 B5modb = cps[6]; 104 105 switch (n & 3) 106 { 107 case 0: 108 umul_ppmm (ph, pl, ap[n - 3], B1modb); 109 add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 4]); 110 umul_ppmm (ch, cl, ap[n - 2], B2modb); 111 add_ssaaaa (ph, pl, ph, pl, ch, cl); 112 umul_ppmm (rh, rl, ap[n - 1], B3modb); 113 add_ssaaaa (rh, rl, rh, rl, ph, pl); 114 n -= 4; 115 break; 116 case 1: 117 rh = 0; 118 rl = ap[n - 1]; 119 n -= 1; 120 break; 121 case 2: 122 rh = ap[n - 1]; 123 rl = ap[n - 2]; 124 n -= 2; 125 break; 126 case 3: 127 umul_ppmm (ph, pl, ap[n - 2], B1modb); 128 add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[n - 3]); 129 umul_ppmm (rh, rl, ap[n - 1], B2modb); 130 add_ssaaaa (rh, rl, rh, rl, ph, pl); 131 n -= 3; 132 break; 133 } 134 135 for (i = n - 4; i >= 0; i -= 4) 136 { 137 /* rr = ap[i] < B 138 + ap[i+1] * (B mod b) <= (B-1)(b-1) 139 + ap[i+2] * (B^2 mod b) <= (B-1)(b-1) 140 + ap[i+3] * (B^3 mod b) <= (B-1)(b-1) 141 + LO(rr) * (B^4 mod b) <= (B-1)(b-1) 142 + HI(rr) * (B^5 mod b) <= (B-1)(b-1) 143 */ 144 umul_ppmm (ph, pl, ap[i + 1], B1modb); 145 add_ssaaaa (ph, pl, ph, pl, CNST_LIMB(0), ap[i + 0]); 146 147 umul_ppmm (ch, cl, ap[i + 2], B2modb); 148 add_ssaaaa (ph, pl, ph, pl, ch, cl); 149 150 umul_ppmm (ch, cl, ap[i + 3], B3modb); 151 add_ssaaaa (ph, pl, ph, pl, ch, cl); 152 153 umul_ppmm (ch, cl, rl, B4modb); 154 add_ssaaaa (ph, pl, ph, pl, ch, cl); 155 156 umul_ppmm (rh, rl, rh, B5modb); 157 add_ssaaaa (rh, rl, rh, rl, ph, pl); 158 } 159 160 umul_ppmm (rh, cl, rh, B1modb); 161 add_ssaaaa (rh, rl, rh, rl, CNST_LIMB(0), cl); 162 163 cnt = cps[1]; 164 bi = cps[0]; 165 166 r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)); 167 udiv_rnnd_preinv (r, r, rl << cnt, b, bi); 168 169 return r >> cnt; 170} 171