1/* mpn_gcdext -- Extended Greatest Common Divisor.
2
3Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 Free Software
4Foundation, Inc.
5
6This file is part of the GNU MP Library.
7
8The GNU MP Library is free software; you can redistribute it and/or modify
9it under the terms of the GNU Lesser General Public License as published by
10the Free Software Foundation; either version 3 of the License, or (at your
11option) any later version.
12
13The GNU MP Library is distributed in the hope that it will be useful, but
14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16License for more details.
17
18You should have received a copy of the GNU Lesser General Public License
19along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
20
21#include "gmp.h"
22#include "gmp-impl.h"
23#include "longlong.h"
24
25#ifndef GCDEXT_1_USE_BINARY
26#define GCDEXT_1_USE_BINARY 0
27#endif
28
29#ifndef GCDEXT_1_BINARY_METHOD
30#define GCDEXT_1_BINARY_METHOD 2
31#endif
32
33#ifndef USE_ZEROTAB
34#define USE_ZEROTAB 1
35#endif
36
37#if GCDEXT_1_USE_BINARY
38
39#if USE_ZEROTAB
40static unsigned char zerotab[0x40] = {
41  6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
42  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
43  5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
44  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
45};
46#endif
47
48mp_limb_t
49mpn_gcdext_1 (mp_limb_signed_t *sp, mp_limb_signed_t *tp,
50	      mp_limb_t u, mp_limb_t v)
51{
52  /* Maintain
53
54     U = t1 u + t0 v
55     V = s1 u + s0 v
56
57     where U, V are the inputs (without any shared power of two),
58     and the matris has determinant � 2^{shift}.
59  */
60  mp_limb_t s0 = 1;
61  mp_limb_t t0 = 0;
62  mp_limb_t s1 = 0;
63  mp_limb_t t1 = 1;
64  mp_limb_t ug;
65  mp_limb_t vg;
66  mp_limb_t ugh;
67  mp_limb_t vgh;
68  unsigned zero_bits;
69  unsigned shift;
70  unsigned i;
71#if GCDEXT_1_BINARY_METHOD == 2
72  mp_limb_t det_sign;
73#endif
74
75  ASSERT (u > 0);
76  ASSERT (v > 0);
77
78  count_trailing_zeros (zero_bits, u | v);
79  u >>= zero_bits;
80  v >>= zero_bits;
81
82  if ((u & 1) == 0)
83    {
84      count_trailing_zeros (shift, u);
85      u >>= shift;
86      t1 <<= shift;
87    }
88  else if ((v & 1) == 0)
89    {
90      count_trailing_zeros (shift, v);
91      v >>= shift;
92      s0 <<= shift;
93    }
94  else
95    shift = 0;
96
97#if GCDEXT_1_BINARY_METHOD == 1
98  while (u != v)
99    {
100      unsigned count;
101      if (u > v)
102	{
103	  u -= v;
104#if USE_ZEROTAB
105	  count = zerotab [u & 0x3f];
106	  u >>= count;
107	  if (UNLIKELY (count == 6))
108	    {
109	      unsigned c;
110	      do
111		{
112		  c = zerotab[u & 0x3f];
113		  u >>= c;
114		  count += c;
115		}
116	      while (c == 6);
117	    }
118#else
119	  count_trailing_zeros (count, u);
120	  u >>= count;
121#endif
122	  t0 += t1; t1 <<= count;
123	  s0 += s1; s1 <<= count;
124	}
125      else
126	{
127	  v -= u;
128#if USE_ZEROTAB
129	  count = zerotab [v & 0x3f];
130	  v >>= count;
131	  if (UNLIKELY (count == 6))
132	    {
133	      unsigned c;
134	      do
135		{
136		  c = zerotab[v & 0x3f];
137		  v >>= c;
138		  count += c;
139		}
140	      while (c == 6);
141	    }
142#else
143	  count_trailing_zeros (count, v);
144	  v >>= count;
145#endif
146	  t1 += t0; t0 <<= count;
147	  s1 += s0; s0 <<= count;
148	}
149      shift += count;
150    }
151#else
152# if GCDEXT_1_BINARY_METHOD == 2
153  u >>= 1;
154  v >>= 1;
155
156  det_sign = 0;
157
158  while (u != v)
159    {
160      unsigned count;
161      mp_limb_t d =  u - v;
162      mp_limb_t vgtu = LIMB_HIGHBIT_TO_MASK (d);
163      mp_limb_t sx;
164      mp_limb_t tx;
165
166      /* When v <= u (vgtu == 0), the updates are:
167
168	   (u; v)   <-- ( (u - v) >> count; v)    (det = +(1<<count) for corr. M factor)
169	   (t1, t0) <-- (t1 << count, t0 + t1)
170
171	 and when v > 0, the updates are
172
173	   (u; v)   <-- ( (v - u) >> count; u)    (det = -(1<<count))
174	   (t1, t0) <-- (t0 << count, t0 + t1)
175
176	 and similarly for s1, s0
177      */
178
179      /* v <-- min (u, v) */
180      v += (vgtu & d);
181
182      /* u <-- |u - v| */
183      u = (d ^ vgtu) - vgtu;
184
185      /* Number of trailing zeros is the same no matter if we look at
186       * d or u, but using d gives more parallelism. */
187#if USE_ZEROTAB
188      count = zerotab[d & 0x3f];
189      if (UNLIKELY (count == 6))
190	{
191	  unsigned c = 6;
192	  do
193	    {
194	      d >>= c;
195	      c = zerotab[d & 0x3f];
196	      count += c;
197	    }
198	  while (c == 6);
199	}
200#else
201      count_trailing_zeros (count, d);
202#endif
203      det_sign ^= vgtu;
204
205      tx = vgtu & (t0 - t1);
206      sx = vgtu & (s0 - s1);
207      t0 += t1;
208      s0 += s1;
209      t1 += tx;
210      s1 += sx;
211
212      count++;
213      u >>= count;
214      t1 <<= count;
215      s1 <<= count;
216      shift += count;
217    }
218  u = (u << 1) + 1;
219# else /* GCDEXT_1_BINARY_METHOD == 2 */
220#  error Unknown GCDEXT_1_BINARY_METHOD
221# endif
222#endif
223
224  /* Now u = v = g = gcd (u,v). Compute U/g and V/g */
225  ug = t0 + t1;
226  vg = s0 + s1;
227
228  ugh = ug/2 + (ug & 1);
229  vgh = vg/2 + (vg & 1);
230
231  /* Now �2^{shift} g = s0 U - t0 V. Get rid of the power of two, using
232     s0 U - t0 V = (s0 + V/g) U - (t0 + U/g) V. */
233  for (i = 0; i < shift; i++)
234    {
235      mp_limb_t mask = - ( (s0 | t0) & 1);
236
237      s0 /= 2;
238      t0 /= 2;
239      s0 += mask & vgh;
240      t0 += mask & ugh;
241    }
242  /* FIXME: Try simplifying this condition. */
243  if ( (s0 > 1 && 2*s0 >= vg) || (t0 > 1 && 2*t0 >= ug) )
244    {
245      s0 -= vg;
246      t0 -= ug;
247    }
248#if GCDEXT_1_BINARY_METHOD == 2
249  /* Conditional negation. */
250  s0 = (s0 ^ det_sign) - det_sign;
251  t0 = (t0 ^ det_sign) - det_sign;
252#endif
253  *sp = s0;
254  *tp = -t0;
255
256  return u << zero_bits;
257}
258
259#else /* !GCDEXT_1_USE_BINARY */
260
261
262/* FIXME: Takes two single-word limbs. It could be extended to a
263 * function that accepts a bignum for the first input, and only
264 * returns the first co-factor. */
265
266mp_limb_t
267mpn_gcdext_1 (mp_limb_signed_t *up, mp_limb_signed_t *vp,
268	      mp_limb_t a, mp_limb_t b)
269{
270  /* Maintain
271
272     a =  u0 A + v0 B
273     b =  u1 A + v1 B
274
275     where A, B are the original inputs.
276  */
277  mp_limb_signed_t u0 = 1;
278  mp_limb_signed_t v0 = 0;
279  mp_limb_signed_t u1 = 0;
280  mp_limb_signed_t v1 = 1;
281
282  ASSERT (a > 0);
283  ASSERT (b > 0);
284
285  if (a < b)
286    goto divide_by_b;
287
288  for (;;)
289    {
290      mp_limb_t q;
291
292      q = a / b;
293      a -= q * b;
294
295      if (a == 0)
296	{
297	  *up = u1;
298	  *vp = v1;
299	  return b;
300	}
301      u0 -= q * u1;
302      v0 -= q * v1;
303
304    divide_by_b:
305      q = b / a;
306      b -= q * a;
307
308      if (b == 0)
309	{
310	  *up = u0;
311	  *vp = v0;
312	  return a;
313	}
314      u1 -= q * u0;
315      v1 -= q * v0;
316    }
317}
318#endif /* !GCDEXT_1_USE_BINARY */
319