1/* mpn_divexact_1 -- mpn by limb exact division. 2 3 THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST 4 CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN 5 FUTURE GNU MP RELEASES. 6 7Copyright 2000-2003, 2005, 2013 Free Software Foundation, Inc. 8 9This file is part of the GNU MP Library. 10 11The GNU MP Library is free software; you can redistribute it and/or modify 12it under the terms of either: 13 14 * the GNU Lesser General Public License as published by the Free 15 Software Foundation; either version 3 of the License, or (at your 16 option) any later version. 17 18or 19 20 * the GNU General Public License as published by the Free Software 21 Foundation; either version 2 of the License, or (at your option) any 22 later version. 23 24or both in parallel, as here. 25 26The GNU MP Library is distributed in the hope that it will be useful, but 27WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 28or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 29for more details. 30 31You should have received copies of the GNU General Public License and the 32GNU Lesser General Public License along with the GNU MP Library. If not, 33see https://www.gnu.org/licenses/. */ 34 35#include "gmp-impl.h" 36#include "longlong.h" 37 38 39 40/* Divide a={src,size} by d=divisor and store the quotient in q={dst,size}. 41 q will only be correct if d divides a exactly. 42 43 A separate loop is used for shift==0 because n<<GMP_LIMB_BITS doesn't 44 give zero on all CPUs (for instance it doesn't on the x86s). This 45 separate loop might run faster too, helping odd divisors. 46 47 Possibilities: 48 49 mpn_divexact_1c could be created, accepting and returning c. This would 50 let a long calculation be done piece by piece. Currently there's no 51 particular need for that, and not returning c means that a final umul can 52 be skipped. 53 54 Another use for returning c would be letting the caller know whether the 55 division was in fact exact. It would work just to return the carry bit 56 "c=(l>s)" and let the caller do a final umul if interested. 57 58 When the divisor is even, the factors of two could be handled with a 59 separate mpn_rshift, instead of shifting on the fly. That might be 60 faster on some CPUs and would mean just the shift==0 style loop would be 61 needed. 62 63 If n<<GMP_LIMB_BITS gives zero on a particular CPU then the separate 64 shift==0 loop is unnecessary, and could be eliminated if there's no great 65 speed difference. 66 67 It's not clear whether "/" is the best way to handle size==1. Alpha gcc 68 2.95 for instance has a poor "/" and might prefer the modular method. 69 Perhaps a tuned parameter should control this. 70 71 If src[size-1] < divisor then dst[size-1] will be zero, and one divide 72 step could be skipped. A test at last step for s<divisor (or ls in the 73 even case) might be a good way to do that. But if this code is often 74 used with small divisors then it might not be worth bothering */ 75 76void 77mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor) 78{ 79 mp_size_t i; 80 mp_limb_t c, h, l, ls, s, s_next, inverse, dummy; 81 unsigned shift; 82 83 ASSERT (size >= 1); 84 ASSERT (divisor != 0); 85 ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); 86 ASSERT_MPN (src, size); 87 ASSERT_LIMB (divisor); 88 89 if ((divisor & 1) == 0) 90 { 91 count_trailing_zeros (shift, divisor); 92 divisor >>= shift; 93 } 94 else 95 shift = 0; 96 97 binvert_limb (inverse, divisor); 98 divisor <<= GMP_NAIL_BITS; 99 100 if (shift != 0) 101 { 102 c = 0; 103 104 s = src[0]; 105 106 for (i = 1; i < size; i++) 107 { 108 s_next = src[i]; 109 ls = ((s >> shift) | (s_next << (GMP_NUMB_BITS-shift))) & GMP_NUMB_MASK; 110 s = s_next; 111 112 SUBC_LIMB (c, l, ls, c); 113 114 l = (l * inverse) & GMP_NUMB_MASK; 115 dst[i - 1] = l; 116 117 umul_ppmm (h, dummy, l, divisor); 118 c += h; 119 } 120 121 ls = s >> shift; 122 l = ls - c; 123 l = (l * inverse) & GMP_NUMB_MASK; 124 dst[size - 1] = l; 125 } 126 else 127 { 128 s = src[0]; 129 130 l = (s * inverse) & GMP_NUMB_MASK; 131 dst[0] = l; 132 c = 0; 133 134 for (i = 1; i < size; i++) 135 { 136 umul_ppmm (h, dummy, l, divisor); 137 c += h; 138 139 s = src[i]; 140 SUBC_LIMB (c, l, s, c); 141 142 l = (l * inverse) & GMP_NUMB_MASK; 143 dst[i] = l; 144 } 145 } 146} 147