1/* mpn_gcdext -- Extended Greatest Common Divisor. 2 3Copyright 1996, 1998, 2000-2005, 2008, 2009 Free Software Foundation, Inc. 4 5This file is part of the GNU MP Library. 6 7The GNU MP Library is free software; you can redistribute it and/or modify 8it under the terms of either: 9 10 * the GNU Lesser General Public License as published by the Free 11 Software Foundation; either version 3 of the License, or (at your 12 option) any later version. 13 14or 15 16 * the GNU General Public License as published by the Free Software 17 Foundation; either version 2 of the License, or (at your option) any 18 later version. 19 20or both in parallel, as here. 21 22The GNU MP Library is distributed in the hope that it will be useful, but 23WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25for more details. 26 27You should have received copies of the GNU General Public License and the 28GNU Lesser General Public License along with the GNU MP Library. If not, 29see https://www.gnu.org/licenses/. */ 30 31#include "gmp-impl.h" 32#include "longlong.h" 33 34#ifndef GCDEXT_1_USE_BINARY 35#define GCDEXT_1_USE_BINARY 0 36#endif 37 38#ifndef GCDEXT_1_BINARY_METHOD 39#define GCDEXT_1_BINARY_METHOD 2 40#endif 41 42#if GCDEXT_1_USE_BINARY 43 44mp_limb_t 45mpn_gcdext_1 (mp_limb_signed_t *sp, mp_limb_signed_t *tp, 46 mp_limb_t u, mp_limb_t v) 47{ 48 /* Maintain 49 50 U = t1 u + t0 v 51 V = s1 u + s0 v 52 53 where U, V are the inputs (without any shared power of two), 54 and the matrix has determinant � 2^{shift}. 55 */ 56 mp_limb_t s0 = 1; 57 mp_limb_t t0 = 0; 58 mp_limb_t s1 = 0; 59 mp_limb_t t1 = 1; 60 mp_limb_t ug; 61 mp_limb_t vg; 62 mp_limb_t ugh; 63 mp_limb_t vgh; 64 unsigned zero_bits; 65 unsigned shift; 66 unsigned i; 67#if GCDEXT_1_BINARY_METHOD == 2 68 mp_limb_t det_sign; 69#endif 70 71 ASSERT (u > 0); 72 ASSERT (v > 0); 73 74 count_trailing_zeros (zero_bits, u | v); 75 u >>= zero_bits; 76 v >>= zero_bits; 77 78 if ((u & 1) == 0) 79 { 80 count_trailing_zeros (shift, u); 81 u >>= shift; 82 t1 <<= shift; 83 } 84 else if ((v & 1) == 0) 85 { 86 count_trailing_zeros (shift, v); 87 v >>= shift; 88 s0 <<= shift; 89 } 90 else 91 shift = 0; 92 93#if GCDEXT_1_BINARY_METHOD == 1 94 while (u != v) 95 { 96 unsigned count; 97 if (u > v) 98 { 99 u -= v; 100 101 count_trailing_zeros (count, u); 102 u >>= count; 103 104 t0 += t1; t1 <<= count; 105 s0 += s1; s1 <<= count; 106 } 107 else 108 { 109 v -= u; 110 111 count_trailing_zeros (count, v); 112 v >>= count; 113 114 t1 += t0; t0 <<= count; 115 s1 += s0; s0 <<= count; 116 } 117 shift += count; 118 } 119#else 120# if GCDEXT_1_BINARY_METHOD == 2 121 u >>= 1; 122 v >>= 1; 123 124 det_sign = 0; 125 126 while (u != v) 127 { 128 unsigned count; 129 mp_limb_t d = u - v; 130 mp_limb_t vgtu = LIMB_HIGHBIT_TO_MASK (d); 131 mp_limb_t sx; 132 mp_limb_t tx; 133 134 /* When v <= u (vgtu == 0), the updates are: 135 136 (u; v) <-- ( (u - v) >> count; v) (det = +(1<<count) for corr. M factor) 137 (t1, t0) <-- (t1 << count, t0 + t1) 138 139 and when v > 0, the updates are 140 141 (u; v) <-- ( (v - u) >> count; u) (det = -(1<<count)) 142 (t1, t0) <-- (t0 << count, t0 + t1) 143 144 and similarly for s1, s0 145 */ 146 147 /* v <-- min (u, v) */ 148 v += (vgtu & d); 149 150 /* u <-- |u - v| */ 151 u = (d ^ vgtu) - vgtu; 152 153 /* Number of trailing zeros is the same no matter if we look at 154 * d or u, but using d gives more parallelism. */ 155 count_trailing_zeros (count, d); 156 157 det_sign ^= vgtu; 158 159 tx = vgtu & (t0 - t1); 160 sx = vgtu & (s0 - s1); 161 t0 += t1; 162 s0 += s1; 163 t1 += tx; 164 s1 += sx; 165 166 count++; 167 u >>= count; 168 t1 <<= count; 169 s1 <<= count; 170 shift += count; 171 } 172 u = (u << 1) + 1; 173# else /* GCDEXT_1_BINARY_METHOD == 2 */ 174# error Unknown GCDEXT_1_BINARY_METHOD 175# endif 176#endif 177 178 /* Now u = v = g = gcd (u,v). Compute U/g and V/g */ 179 ug = t0 + t1; 180 vg = s0 + s1; 181 182 ugh = ug/2 + (ug & 1); 183 vgh = vg/2 + (vg & 1); 184 185 /* Now 2^{shift} g = s0 U - t0 V. Get rid of the power of two, using 186 s0 U - t0 V = (s0 + V/g) U - (t0 + U/g) V. */ 187 for (i = 0; i < shift; i++) 188 { 189 mp_limb_t mask = - ( (s0 | t0) & 1); 190 191 s0 /= 2; 192 t0 /= 2; 193 s0 += mask & vgh; 194 t0 += mask & ugh; 195 } 196 197 ASSERT_ALWAYS (s0 <= vg); 198 ASSERT_ALWAYS (t0 <= ug); 199 200 if (s0 > vg - s0) 201 { 202 s0 -= vg; 203 t0 -= ug; 204 } 205#if GCDEXT_1_BINARY_METHOD == 2 206 /* Conditional negation. */ 207 s0 = (s0 ^ det_sign) - det_sign; 208 t0 = (t0 ^ det_sign) - det_sign; 209#endif 210 *sp = s0; 211 *tp = -t0; 212 213 return u << zero_bits; 214} 215 216#else /* !GCDEXT_1_USE_BINARY */ 217 218 219/* FIXME: Takes two single-word limbs. It could be extended to a 220 * function that accepts a bignum for the first input, and only 221 * returns the first co-factor. */ 222 223mp_limb_t 224mpn_gcdext_1 (mp_limb_signed_t *up, mp_limb_signed_t *vp, 225 mp_limb_t a, mp_limb_t b) 226{ 227 /* Maintain 228 229 a = u0 A + v0 B 230 b = u1 A + v1 B 231 232 where A, B are the original inputs. 233 */ 234 mp_limb_signed_t u0 = 1; 235 mp_limb_signed_t v0 = 0; 236 mp_limb_signed_t u1 = 0; 237 mp_limb_signed_t v1 = 1; 238 239 ASSERT (a > 0); 240 ASSERT (b > 0); 241 242 if (a < b) 243 goto divide_by_b; 244 245 for (;;) 246 { 247 mp_limb_t q; 248 249 q = a / b; 250 a -= q * b; 251 252 if (a == 0) 253 { 254 *up = u1; 255 *vp = v1; 256 return b; 257 } 258 u0 -= q * u1; 259 v0 -= q * v1; 260 261 divide_by_b: 262 q = b / a; 263 b -= q * a; 264 265 if (b == 0) 266 { 267 *up = u0; 268 *vp = v0; 269 return a; 270 } 271 u1 -= q * u0; 272 v1 -= q * v0; 273 } 274} 275#endif /* !GCDEXT_1_USE_BINARY */ 276