1/* mpn_mod_1s_3p (ap, n, b, cps) 2 Divide (ap,,n) by b. Return the single-limb remainder. 3 Requires that d < B / 4. 4 5 Contributed to the GNU project by Torbjorn Granlund. 6 7 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY 8 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 9 GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 10 11Copyright 2008, 2009 Free Software Foundation, Inc. 12 13This file is part of the GNU MP Library. 14 15The GNU MP Library is free software; you can redistribute it and/or modify 16it under the terms of the GNU Lesser General Public License as published by 17the Free Software Foundation; either version 3 of the License, or (at your 18option) any later version. 19 20The GNU MP Library is distributed in the hope that it will be useful, but 21WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 22or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 23License for more details. 24 25You should have received a copy of the GNU Lesser General Public License 26along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 27 28#include "gmp.h" 29#include "gmp-impl.h" 30#include "longlong.h" 31 32void 33mpn_mod_1s_4p_cps (mp_limb_t cps[7], mp_limb_t b) 34{ 35 mp_limb_t bi; 36 mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb; 37 int cnt; 38 39 ASSERT (b <= (~(mp_limb_t) 0) / 4); 40 41 count_leading_zeros (cnt, b); 42 43 b <<= cnt; 44 invert_limb (bi, b); 45 46 B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt)); 47 ASSERT (B1modb <= b); /* NB: not fully reduced mod b */ 48 udiv_rnd_preinv (B2modb, B1modb, b, bi); 49 udiv_rnd_preinv (B3modb, B2modb, b, bi); 50 udiv_rnd_preinv (B4modb, B3modb, b, bi); 51 udiv_rnd_preinv (B5modb, B4modb, b, bi); 52 53 cps[0] = bi; 54 cps[1] = cnt; 55 cps[2] = B1modb >> cnt; 56 cps[3] = B2modb >> cnt; 57 cps[4] = B3modb >> cnt; 58 cps[5] = B4modb >> cnt; 59 cps[6] = B5modb >> cnt; 60 61#if WANT_ASSERT 62 { 63 int i; 64 b = cps[2]; 65 for (i = 3; i <= 6; i++) 66 { 67 b += cps[i]; 68 ASSERT (b >= cps[i]); 69 } 70 } 71#endif 72} 73 74mp_limb_t 75mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[7]) 76{ 77 mp_limb_t rh, rl, bi, q, ph, pl, ch, cl, r; 78 mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb; 79 mp_size_t i; 80 int cnt; 81 82 ASSERT (n >= 1); 83 84 B1modb = cps[2]; 85 B2modb = cps[3]; 86 B3modb = cps[4]; 87 B4modb = cps[5]; 88 B5modb = cps[6]; 89 90 switch (n & 3) 91 { 92 case 0: 93 umul_ppmm (ph, pl, ap[n - 3], B1modb); 94 add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 4]); 95 umul_ppmm (ch, cl, ap[n - 2], B2modb); 96 add_ssaaaa (ph, pl, ph, pl, ch, cl); 97 umul_ppmm (rh, rl, ap[n - 1], B3modb); 98 add_ssaaaa (rh, rl, rh, rl, ph, pl); 99 n -= 4; 100 break; 101 case 1: 102 rh = 0; 103 rl = ap[n - 1]; 104 n -= 1; 105 break; 106 case 2: 107 umul_ppmm (ph, pl, ap[n - 1], B1modb); 108 add_ssaaaa (rh, rl, ph, pl, 0, ap[n - 2]); 109 n -= 2; 110 break; 111 case 3: 112 umul_ppmm (ph, pl, ap[n - 2], B1modb); 113 add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 3]); 114 umul_ppmm (rh, rl, ap[n - 1], B2modb); 115 add_ssaaaa (rh, rl, rh, rl, ph, pl); 116 n -= 3; 117 break; 118 } 119 120 for (i = n - 4; i >= 0; i -= 4) 121 { 122 /* rr = ap[i] < B 123 + ap[i+1] * (B mod b) <= (B-1)(b-1) 124 + ap[i+2] * (B^2 mod b) <= (B-1)(b-1) 125 + ap[i+3] * (B^3 mod b) <= (B-1)(b-1) 126 + LO(rr) * (B^4 mod b) <= (B-1)(b-1) 127 + HI(rr) * (B^5 mod b) <= (B-1)(b-1) 128 */ 129 umul_ppmm (ph, pl, ap[i + 1], B1modb); 130 add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 0]); 131 132 umul_ppmm (ch, cl, ap[i + 2], B2modb); 133 add_ssaaaa (ph, pl, ph, pl, ch, cl); 134 135 umul_ppmm (ch, cl, ap[i + 3], B3modb); 136 add_ssaaaa (ph, pl, ph, pl, ch, cl); 137 138 umul_ppmm (ch, cl, rl, B4modb); 139 add_ssaaaa (ph, pl, ph, pl, ch, cl); 140 141 umul_ppmm (rh, rl, rh, B5modb); 142 add_ssaaaa (rh, rl, rh, rl, ph, pl); 143 } 144 145 bi = cps[0]; 146 cnt = cps[1]; 147 148#if 1 149 umul_ppmm (rh, cl, rh, B1modb); 150 add_ssaaaa (rh, rl, rh, rl, 0, cl); 151 r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)); 152#else 153 udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt), 154 (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi); 155 ASSERT (q <= 4); /* optimize for small quotient? */ 156#endif 157 158 udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi); 159 160 return r >> cnt; 161} 162