1/* mpn_gcdext -- Extended Greatest Common Divisor.
2
3Copyright 1996, 1998, 2000-2005, 2008, 2009, 2012 Free Software Foundation,
4Inc.
5
6This file is part of the GNU MP Library.
7
8The GNU MP Library is free software; you can redistribute it and/or modify
9it under the terms of either:
10
11  * the GNU Lesser General Public License as published by the Free
12    Software Foundation; either version 3 of the License, or (at your
13    option) any later version.
14
15or
16
17  * the GNU General Public License as published by the Free Software
18    Foundation; either version 2 of the License, or (at your option) any
19    later version.
20
21or both in parallel, as here.
22
23The GNU MP Library is distributed in the hope that it will be useful, but
24WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
26for more details.
27
28You should have received copies of the GNU General Public License and the
29GNU Lesser General Public License along with the GNU MP Library.  If not,
30see https://www.gnu.org/licenses/.  */
31
32#include "gmp-impl.h"
33#include "longlong.h"
34
35/* Here, d is the index of the cofactor to update. FIXME: Could use qn
36   = 0 for the common case q = 1. */
37void
38mpn_gcdext_hook (void *p, mp_srcptr gp, mp_size_t gn,
39		 mp_srcptr qp, mp_size_t qn, int d)
40{
41  struct gcdext_ctx *ctx = (struct gcdext_ctx *) p;
42  mp_size_t un = ctx->un;
43
44  if (gp)
45    {
46      mp_srcptr up;
47
48      ASSERT (gn > 0);
49      ASSERT (gp[gn-1] > 0);
50
51      MPN_COPY (ctx->gp, gp, gn);
52      ctx->gn = gn;
53
54      if (d < 0)
55	{
56	  int c;
57
58	  /* Must return the smallest cofactor, +u1 or -u0 */
59	  MPN_CMP (c, ctx->u0, ctx->u1, un);
60	  ASSERT (c != 0 || (un == 1 && ctx->u0[0] == 1 && ctx->u1[0] == 1));
61
62	  d = c < 0;
63	}
64
65      up = d ? ctx->u0 : ctx->u1;
66
67      MPN_NORMALIZE (up, un);
68      MPN_COPY (ctx->up, up, un);
69
70      *ctx->usize = d ? -un : un;
71    }
72  else
73    {
74      mp_limb_t cy;
75      mp_ptr u0 = ctx->u0;
76      mp_ptr u1 = ctx->u1;
77
78      ASSERT (d >= 0);
79
80      if (d)
81	MP_PTR_SWAP (u0, u1);
82
83      qn -= (qp[qn-1] == 0);
84
85      /* Update u0 += q  * u1 */
86      if (qn == 1)
87	{
88	  mp_limb_t q = qp[0];
89
90	  if (q == 1)
91	    /* A common case. */
92	    cy = mpn_add_n (u0, u0, u1, un);
93	  else
94	    cy = mpn_addmul_1 (u0, u1, un, q);
95	}
96      else
97	{
98	  mp_size_t u1n;
99	  mp_ptr tp;
100
101	  u1n = un;
102	  MPN_NORMALIZE (u1, u1n);
103
104	  if (u1n == 0)
105	    return;
106
107	  /* Should always have u1n == un here, and u1 >= u0. The
108	     reason is that we alternate adding u0 to u1 and u1 to u0
109	     (corresponding to subtractions a - b and b - a), and we
110	     can get a large quotient only just after a switch, which
111	     means that we'll add (a multiple of) the larger u to the
112	     smaller. */
113
114	  tp = ctx->tp;
115
116	  if (qn > u1n)
117	    mpn_mul (tp, qp, qn, u1, u1n);
118	  else
119	    mpn_mul (tp, u1, u1n, qp, qn);
120
121	  u1n += qn;
122	  u1n -= tp[u1n-1] == 0;
123
124	  if (u1n >= un)
125	    {
126	      cy = mpn_add (u0, tp, u1n, u0, un);
127	      un = u1n;
128	    }
129	  else
130	    /* Note: Unlikely case, maybe never happens? */
131	    cy = mpn_add (u0, u0, un, tp, u1n);
132
133	}
134      u0[un] = cy;
135      ctx->un = un + (cy > 0);
136    }
137}
138
139/* Temporary storage: 3*(n+1) for u. If hgcd2 succeeds, we need n for
140   the matrix-vector multiplication adjusting a, b. If hgcd fails, we
141   need at most n for the quotient and n+1 for the u update (reusing
142   the extra u). In all, 4n + 3. */
143
144mp_size_t
145mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
146		     mp_ptr ap, mp_ptr bp, mp_size_t n,
147		     mp_ptr tp)
148{
149  mp_size_t ualloc = n + 1;
150
151  /* Keeps track of the second row of the reduction matrix
152   *
153   *   M = (v0, v1 ; u0, u1)
154   *
155   * which correspond to the first column of the inverse
156   *
157   *   M^{-1} = (u1, -v1; -u0, v0)
158   *
159   * This implies that
160   *
161   *   a =  u1 A (mod B)
162   *   b = -u0 A (mod B)
163   *
164   * where A, B denotes the input values.
165   */
166
167  struct gcdext_ctx ctx;
168  mp_size_t un;
169  mp_ptr u0;
170  mp_ptr u1;
171  mp_ptr u2;
172
173  MPN_ZERO (tp, 3*ualloc);
174  u0 = tp; tp += ualloc;
175  u1 = tp; tp += ualloc;
176  u2 = tp; tp += ualloc;
177
178  u1[0] = 1; un = 1;
179
180  ctx.gp = gp;
181  ctx.up = up;
182  ctx.usize = usize;
183
184  /* FIXME: Handle n == 2 differently, after the loop? */
185  while (n >= 2)
186    {
187      struct hgcd_matrix1 M;
188      mp_limb_t ah, al, bh, bl;
189      mp_limb_t mask;
190
191      mask = ap[n-1] | bp[n-1];
192      ASSERT (mask > 0);
193
194      if (mask & GMP_NUMB_HIGHBIT)
195	{
196	  ah = ap[n-1]; al = ap[n-2];
197	  bh = bp[n-1]; bl = bp[n-2];
198	}
199      else if (n == 2)
200	{
201	  /* We use the full inputs without truncation, so we can
202	     safely shift left. */
203	  int shift;
204
205	  count_leading_zeros (shift, mask);
206	  ah = MPN_EXTRACT_NUMB (shift, ap[1], ap[0]);
207	  al = ap[0] << shift;
208	  bh = MPN_EXTRACT_NUMB (shift, bp[1], bp[0]);
209	  bl = bp[0] << shift;
210	}
211      else
212	{
213	  int shift;
214
215	  count_leading_zeros (shift, mask);
216	  ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
217	  al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
218	  bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
219	  bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
220	}
221
222      /* Try an mpn_nhgcd2 step */
223      if (mpn_hgcd2 (ah, al, bh, bl, &M))
224	{
225	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
226	  MP_PTR_SWAP (ap, tp);
227	  un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un);
228	  MP_PTR_SWAP (u0, u2);
229	}
230      else
231	{
232	  /* mpn_hgcd2 has failed. Then either one of a or b is very
233	     small, or the difference is very small. Perform one
234	     subtraction followed by one division. */
235	  ctx.u0 = u0;
236	  ctx.u1 = u1;
237	  ctx.tp = u2;
238	  ctx.un = un;
239
240	  /* Temporary storage n for the quotient and ualloc for the
241	     new cofactor. */
242	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
243	  if (n == 0)
244	    return ctx.gn;
245
246	  un = ctx.un;
247	}
248    }
249  ASSERT_ALWAYS (ap[0] > 0);
250  ASSERT_ALWAYS (bp[0] > 0);
251
252  if (ap[0] == bp[0])
253    {
254      int c;
255
256      /* Which cofactor to return now? Candidates are +u1 and -u0,
257	 depending on which of a and b was most recently reduced,
258	 which we don't keep track of. So compare and get the smallest
259	 one. */
260
261      gp[0] = ap[0];
262
263      MPN_CMP (c, u0, u1, un);
264      ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
265      if (c < 0)
266	{
267	  MPN_NORMALIZE (u0, un);
268	  MPN_COPY (up, u0, un);
269	  *usize = -un;
270	}
271      else
272	{
273	  MPN_NORMALIZE_NOT_ZERO (u1, un);
274	  MPN_COPY (up, u1, un);
275	  *usize = un;
276	}
277      return 1;
278    }
279  else
280    {
281      mp_limb_t uh, vh;
282      mp_limb_signed_t u;
283      mp_limb_signed_t v;
284      int negate;
285
286      gp[0] = mpn_gcdext_1 (&u, &v, ap[0], bp[0]);
287
288      /* Set up = u u1 - v u0. Keep track of size, un grows by one or
289	 two limbs. */
290
291      if (u == 0)
292	{
293	  ASSERT (v == 1);
294	  MPN_NORMALIZE (u0, un);
295	  MPN_COPY (up, u0, un);
296	  *usize = -un;
297	  return 1;
298	}
299      else if (v == 0)
300	{
301	  ASSERT (u == 1);
302	  MPN_NORMALIZE (u1, un);
303	  MPN_COPY (up, u1, un);
304	  *usize = un;
305	  return 1;
306	}
307      else if (u > 0)
308	{
309	  negate = 0;
310	  ASSERT (v < 0);
311	  v = -v;
312	}
313      else
314	{
315	  negate = 1;
316	  ASSERT (v > 0);
317	  u = -u;
318	}
319
320      uh = mpn_mul_1 (up, u1, un, u);
321      vh = mpn_addmul_1 (up, u0, un, v);
322
323      if ( (uh | vh) > 0)
324	{
325	  uh += vh;
326	  up[un++] = uh;
327	  if (uh < vh)
328	    up[un++] = 1;
329	}
330
331      MPN_NORMALIZE_NOT_ZERO (up, un);
332
333      *usize = negate ? -un : un;
334      return 1;
335    }
336}
337