1/* mpn_gcdext -- Extended Greatest Common Divisor. 2 3Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 Free Software 4Foundation, Inc. 5 6This file is part of the GNU MP Library. 7 8The GNU MP Library is free software; you can redistribute it and/or modify 9it under the terms of the GNU Lesser General Public License as published by 10the Free Software Foundation; either version 3 of the License, or (at your 11option) any later version. 12 13The GNU MP Library is distributed in the hope that it will be useful, but 14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16License for more details. 17 18You should have received a copy of the GNU Lesser General Public License 19along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 20 21#include "gmp.h" 22#include "gmp-impl.h" 23#include "longlong.h" 24 25#ifndef GCDEXT_1_USE_BINARY 26#define GCDEXT_1_USE_BINARY 0 27#endif 28 29#ifndef GCDEXT_1_BINARY_METHOD 30#define GCDEXT_1_BINARY_METHOD 2 31#endif 32 33#ifndef USE_ZEROTAB 34#define USE_ZEROTAB 1 35#endif 36 37#if GCDEXT_1_USE_BINARY 38 39#if USE_ZEROTAB 40static unsigned char zerotab[0x40] = { 41 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 42 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 43 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 44 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 45}; 46#endif 47 48mp_limb_t 49mpn_gcdext_1 (mp_limb_signed_t *sp, mp_limb_signed_t *tp, 50 mp_limb_t u, mp_limb_t v) 51{ 52 /* Maintain 53 54 U = t1 u + t0 v 55 V = s1 u + s0 v 56 57 where U, V are the inputs (without any shared power of two), 58 and the matris has determinant � 2^{shift}. 59 */ 60 mp_limb_t s0 = 1; 61 mp_limb_t t0 = 0; 62 mp_limb_t s1 = 0; 63 mp_limb_t t1 = 1; 64 mp_limb_t ug; 65 mp_limb_t vg; 66 mp_limb_t ugh; 67 mp_limb_t vgh; 68 unsigned zero_bits; 69 unsigned shift; 70 unsigned i; 71#if GCDEXT_1_BINARY_METHOD == 2 72 mp_limb_t det_sign; 73#endif 74 75 ASSERT (u > 0); 76 ASSERT (v > 0); 77 78 count_trailing_zeros (zero_bits, u | v); 79 u >>= zero_bits; 80 v >>= zero_bits; 81 82 if ((u & 1) == 0) 83 { 84 count_trailing_zeros (shift, u); 85 u >>= shift; 86 t1 <<= shift; 87 } 88 else if ((v & 1) == 0) 89 { 90 count_trailing_zeros (shift, v); 91 v >>= shift; 92 s0 <<= shift; 93 } 94 else 95 shift = 0; 96 97#if GCDEXT_1_BINARY_METHOD == 1 98 while (u != v) 99 { 100 unsigned count; 101 if (u > v) 102 { 103 u -= v; 104#if USE_ZEROTAB 105 count = zerotab [u & 0x3f]; 106 u >>= count; 107 if (UNLIKELY (count == 6)) 108 { 109 unsigned c; 110 do 111 { 112 c = zerotab[u & 0x3f]; 113 u >>= c; 114 count += c; 115 } 116 while (c == 6); 117 } 118#else 119 count_trailing_zeros (count, u); 120 u >>= count; 121#endif 122 t0 += t1; t1 <<= count; 123 s0 += s1; s1 <<= count; 124 } 125 else 126 { 127 v -= u; 128#if USE_ZEROTAB 129 count = zerotab [v & 0x3f]; 130 v >>= count; 131 if (UNLIKELY (count == 6)) 132 { 133 unsigned c; 134 do 135 { 136 c = zerotab[v & 0x3f]; 137 v >>= c; 138 count += c; 139 } 140 while (c == 6); 141 } 142#else 143 count_trailing_zeros (count, v); 144 v >>= count; 145#endif 146 t1 += t0; t0 <<= count; 147 s1 += s0; s0 <<= count; 148 } 149 shift += count; 150 } 151#else 152# if GCDEXT_1_BINARY_METHOD == 2 153 u >>= 1; 154 v >>= 1; 155 156 det_sign = 0; 157 158 while (u != v) 159 { 160 unsigned count; 161 mp_limb_t d = u - v; 162 mp_limb_t vgtu = LIMB_HIGHBIT_TO_MASK (d); 163 mp_limb_t sx; 164 mp_limb_t tx; 165 166 /* When v <= u (vgtu == 0), the updates are: 167 168 (u; v) <-- ( (u - v) >> count; v) (det = +(1<<count) for corr. M factor) 169 (t1, t0) <-- (t1 << count, t0 + t1) 170 171 and when v > 0, the updates are 172 173 (u; v) <-- ( (v - u) >> count; u) (det = -(1<<count)) 174 (t1, t0) <-- (t0 << count, t0 + t1) 175 176 and similarly for s1, s0 177 */ 178 179 /* v <-- min (u, v) */ 180 v += (vgtu & d); 181 182 /* u <-- |u - v| */ 183 u = (d ^ vgtu) - vgtu; 184 185 /* Number of trailing zeros is the same no matter if we look at 186 * d or u, but using d gives more parallelism. */ 187#if USE_ZEROTAB 188 count = zerotab[d & 0x3f]; 189 if (UNLIKELY (count == 6)) 190 { 191 unsigned c = 6; 192 do 193 { 194 d >>= c; 195 c = zerotab[d & 0x3f]; 196 count += c; 197 } 198 while (c == 6); 199 } 200#else 201 count_trailing_zeros (count, d); 202#endif 203 det_sign ^= vgtu; 204 205 tx = vgtu & (t0 - t1); 206 sx = vgtu & (s0 - s1); 207 t0 += t1; 208 s0 += s1; 209 t1 += tx; 210 s1 += sx; 211 212 count++; 213 u >>= count; 214 t1 <<= count; 215 s1 <<= count; 216 shift += count; 217 } 218 u = (u << 1) + 1; 219# else /* GCDEXT_1_BINARY_METHOD == 2 */ 220# error Unknown GCDEXT_1_BINARY_METHOD 221# endif 222#endif 223 224 /* Now u = v = g = gcd (u,v). Compute U/g and V/g */ 225 ug = t0 + t1; 226 vg = s0 + s1; 227 228 ugh = ug/2 + (ug & 1); 229 vgh = vg/2 + (vg & 1); 230 231 /* Now �2^{shift} g = s0 U - t0 V. Get rid of the power of two, using 232 s0 U - t0 V = (s0 + V/g) U - (t0 + U/g) V. */ 233 for (i = 0; i < shift; i++) 234 { 235 mp_limb_t mask = - ( (s0 | t0) & 1); 236 237 s0 /= 2; 238 t0 /= 2; 239 s0 += mask & vgh; 240 t0 += mask & ugh; 241 } 242 /* FIXME: Try simplifying this condition. */ 243 if ( (s0 > 1 && 2*s0 >= vg) || (t0 > 1 && 2*t0 >= ug) ) 244 { 245 s0 -= vg; 246 t0 -= ug; 247 } 248#if GCDEXT_1_BINARY_METHOD == 2 249 /* Conditional negation. */ 250 s0 = (s0 ^ det_sign) - det_sign; 251 t0 = (t0 ^ det_sign) - det_sign; 252#endif 253 *sp = s0; 254 *tp = -t0; 255 256 return u << zero_bits; 257} 258 259#else /* !GCDEXT_1_USE_BINARY */ 260 261 262/* FIXME: Takes two single-word limbs. It could be extended to a 263 * function that accepts a bignum for the first input, and only 264 * returns the first co-factor. */ 265 266mp_limb_t 267mpn_gcdext_1 (mp_limb_signed_t *up, mp_limb_signed_t *vp, 268 mp_limb_t a, mp_limb_t b) 269{ 270 /* Maintain 271 272 a = u0 A + v0 B 273 b = u1 A + v1 B 274 275 where A, B are the original inputs. 276 */ 277 mp_limb_signed_t u0 = 1; 278 mp_limb_signed_t v0 = 0; 279 mp_limb_signed_t u1 = 0; 280 mp_limb_signed_t v1 = 1; 281 282 ASSERT (a > 0); 283 ASSERT (b > 0); 284 285 if (a < b) 286 goto divide_by_b; 287 288 for (;;) 289 { 290 mp_limb_t q; 291 292 q = a / b; 293 a -= q * b; 294 295 if (a == 0) 296 { 297 *up = u1; 298 *vp = v1; 299 return b; 300 } 301 u0 -= q * u1; 302 v0 -= q * v1; 303 304 divide_by_b: 305 q = b / a; 306 b -= q * a; 307 308 if (b == 0) 309 { 310 *up = u0; 311 *vp = v0; 312 return a; 313 } 314 u1 -= q * u0; 315 v1 -= q * v0; 316 } 317} 318#endif /* !GCDEXT_1_USE_BINARY */ 319