1/* mpn_gcdext -- Extended Greatest Common Divisor.
2
3Copyright 1996, 1998, 2000-2005, 2008, 2009 Free Software Foundation, Inc.
4
5This file is part of the GNU MP Library.
6
7The GNU MP Library is free software; you can redistribute it and/or modify
8it under the terms of either:
9
10  * the GNU Lesser General Public License as published by the Free
11    Software Foundation; either version 3 of the License, or (at your
12    option) any later version.
13
14or
15
16  * the GNU General Public License as published by the Free Software
17    Foundation; either version 2 of the License, or (at your option) any
18    later version.
19
20or both in parallel, as here.
21
22The GNU MP Library is distributed in the hope that it will be useful, but
23WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25for more details.
26
27You should have received copies of the GNU General Public License and the
28GNU Lesser General Public License along with the GNU MP Library.  If not,
29see https://www.gnu.org/licenses/.  */
30
31#include "gmp-impl.h"
32#include "longlong.h"
33
34#ifndef GCDEXT_1_USE_BINARY
35#define GCDEXT_1_USE_BINARY 0
36#endif
37
38#ifndef GCDEXT_1_BINARY_METHOD
39#define GCDEXT_1_BINARY_METHOD 2
40#endif
41
42#if GCDEXT_1_USE_BINARY
43
44mp_limb_t
45mpn_gcdext_1 (mp_limb_signed_t *sp, mp_limb_signed_t *tp,
46	      mp_limb_t u, mp_limb_t v)
47{
48  /* Maintain
49
50     U = t1 u + t0 v
51     V = s1 u + s0 v
52
53     where U, V are the inputs (without any shared power of two),
54     and the matrix has determinant � 2^{shift}.
55  */
56  mp_limb_t s0 = 1;
57  mp_limb_t t0 = 0;
58  mp_limb_t s1 = 0;
59  mp_limb_t t1 = 1;
60  mp_limb_t ug;
61  mp_limb_t vg;
62  mp_limb_t ugh;
63  mp_limb_t vgh;
64  unsigned zero_bits;
65  unsigned shift;
66  unsigned i;
67#if GCDEXT_1_BINARY_METHOD == 2
68  mp_limb_t det_sign;
69#endif
70
71  ASSERT (u > 0);
72  ASSERT (v > 0);
73
74  count_trailing_zeros (zero_bits, u | v);
75  u >>= zero_bits;
76  v >>= zero_bits;
77
78  if ((u & 1) == 0)
79    {
80      count_trailing_zeros (shift, u);
81      u >>= shift;
82      t1 <<= shift;
83    }
84  else if ((v & 1) == 0)
85    {
86      count_trailing_zeros (shift, v);
87      v >>= shift;
88      s0 <<= shift;
89    }
90  else
91    shift = 0;
92
93#if GCDEXT_1_BINARY_METHOD == 1
94  while (u != v)
95    {
96      unsigned count;
97      if (u > v)
98	{
99	  u -= v;
100
101	  count_trailing_zeros (count, u);
102	  u >>= count;
103
104	  t0 += t1; t1 <<= count;
105	  s0 += s1; s1 <<= count;
106	}
107      else
108	{
109	  v -= u;
110
111	  count_trailing_zeros (count, v);
112	  v >>= count;
113
114	  t1 += t0; t0 <<= count;
115	  s1 += s0; s0 <<= count;
116	}
117      shift += count;
118    }
119#else
120# if GCDEXT_1_BINARY_METHOD == 2
121  u >>= 1;
122  v >>= 1;
123
124  det_sign = 0;
125
126  while (u != v)
127    {
128      unsigned count;
129      mp_limb_t d =  u - v;
130      mp_limb_t vgtu = LIMB_HIGHBIT_TO_MASK (d);
131      mp_limb_t sx;
132      mp_limb_t tx;
133
134      /* When v <= u (vgtu == 0), the updates are:
135
136	   (u; v)   <-- ( (u - v) >> count; v)    (det = +(1<<count) for corr. M factor)
137	   (t1, t0) <-- (t1 << count, t0 + t1)
138
139	 and when v > 0, the updates are
140
141	   (u; v)   <-- ( (v - u) >> count; u)    (det = -(1<<count))
142	   (t1, t0) <-- (t0 << count, t0 + t1)
143
144	 and similarly for s1, s0
145      */
146
147      /* v <-- min (u, v) */
148      v += (vgtu & d);
149
150      /* u <-- |u - v| */
151      u = (d ^ vgtu) - vgtu;
152
153      /* Number of trailing zeros is the same no matter if we look at
154       * d or u, but using d gives more parallelism. */
155      count_trailing_zeros (count, d);
156
157      det_sign ^= vgtu;
158
159      tx = vgtu & (t0 - t1);
160      sx = vgtu & (s0 - s1);
161      t0 += t1;
162      s0 += s1;
163      t1 += tx;
164      s1 += sx;
165
166      count++;
167      u >>= count;
168      t1 <<= count;
169      s1 <<= count;
170      shift += count;
171    }
172  u = (u << 1) + 1;
173# else /* GCDEXT_1_BINARY_METHOD == 2 */
174#  error Unknown GCDEXT_1_BINARY_METHOD
175# endif
176#endif
177
178  /* Now u = v = g = gcd (u,v). Compute U/g and V/g */
179  ug = t0 + t1;
180  vg = s0 + s1;
181
182  ugh = ug/2 + (ug & 1);
183  vgh = vg/2 + (vg & 1);
184
185  /* Now 2^{shift} g = s0 U - t0 V. Get rid of the power of two, using
186     s0 U - t0 V = (s0 + V/g) U - (t0 + U/g) V. */
187  for (i = 0; i < shift; i++)
188    {
189      mp_limb_t mask = - ( (s0 | t0) & 1);
190
191      s0 /= 2;
192      t0 /= 2;
193      s0 += mask & vgh;
194      t0 += mask & ugh;
195    }
196
197  ASSERT_ALWAYS (s0 <= vg);
198  ASSERT_ALWAYS (t0 <= ug);
199
200  if (s0 > vg - s0)
201    {
202      s0 -= vg;
203      t0 -= ug;
204    }
205#if GCDEXT_1_BINARY_METHOD == 2
206  /* Conditional negation. */
207  s0 = (s0 ^ det_sign) - det_sign;
208  t0 = (t0 ^ det_sign) - det_sign;
209#endif
210  *sp = s0;
211  *tp = -t0;
212
213  return u << zero_bits;
214}
215
216#else /* !GCDEXT_1_USE_BINARY */
217
218
219/* FIXME: Takes two single-word limbs. It could be extended to a
220 * function that accepts a bignum for the first input, and only
221 * returns the first co-factor. */
222
223mp_limb_t
224mpn_gcdext_1 (mp_limb_signed_t *up, mp_limb_signed_t *vp,
225	      mp_limb_t a, mp_limb_t b)
226{
227  /* Maintain
228
229     a =  u0 A + v0 B
230     b =  u1 A + v1 B
231
232     where A, B are the original inputs.
233  */
234  mp_limb_signed_t u0 = 1;
235  mp_limb_signed_t v0 = 0;
236  mp_limb_signed_t u1 = 0;
237  mp_limb_signed_t v1 = 1;
238
239  ASSERT (a > 0);
240  ASSERT (b > 0);
241
242  if (a < b)
243    goto divide_by_b;
244
245  for (;;)
246    {
247      mp_limb_t q;
248
249      q = a / b;
250      a -= q * b;
251
252      if (a == 0)
253	{
254	  *up = u1;
255	  *vp = v1;
256	  return b;
257	}
258      u0 -= q * u1;
259      v0 -= q * v1;
260
261    divide_by_b:
262      q = b / a;
263      b -= q * a;
264
265      if (b == 0)
266	{
267	  *up = u0;
268	  *vp = v0;
269	  return a;
270	}
271      u1 -= q * u0;
272      v1 -= q * v0;
273    }
274}
275#endif /* !GCDEXT_1_USE_BINARY */
276