1/* Implementation of the algorithm for Toom-Cook 4.5-way.
2
3   Contributed to the GNU project by Marco Bodrato.
4
5   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9Copyright 2009, 2012 Free Software Foundation, Inc.
10
11This file is part of the GNU MP Library.
12
13The GNU MP Library is free software; you can redistribute it and/or modify
14it under the terms of the GNU Lesser General Public License as published by
15the Free Software Foundation; either version 3 of the License, or (at your
16option) any later version.
17
18The GNU MP Library is distributed in the hope that it will be useful, but
19WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
21License for more details.
22
23You should have received a copy of the GNU Lesser General Public License
24along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
25
26
27#include "gmp.h"
28#include "gmp-impl.h"
29
30/* Stores |{ap,n}-{bp,n}| in {rp,n}, returns the sign. */
31static int
32abs_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
33{
34  mp_limb_t  x, y;
35  while (--n >= 0)
36    {
37      x = ap[n];
38      y = bp[n];
39      if (x != y)
40	{
41	  n++;
42	  if (x > y)
43	    {
44	      mpn_sub_n (rp, ap, bp, n);
45	      return 0;
46	    }
47	  else
48	    {
49	      mpn_sub_n (rp, bp, ap, n);
50	      return ~0;
51	    }
52	}
53      rp[n] = 0;
54    }
55  return 0;
56}
57
58static int
59abs_sub_add_n (mp_ptr rm, mp_ptr rp, mp_srcptr rs, mp_size_t n) {
60  int result;
61  result = abs_sub_n (rm, rp, rs, n);
62  ASSERT_NOCARRY(mpn_add_n (rp, rp, rs, n));
63  return result;
64}
65
66
67/* Toom-4.5, the splitting 6x3 unbalanced version.
68   Evaluate in: infinity, +4, -4, +2, -2, +1, -1, 0.
69
70  <--s-><--n--><--n--><--n--><--n--><--n-->
71   ____ ______ ______ ______ ______ ______
72  |_a5_|__a4__|__a3__|__a2__|__a1__|__a0__|
73			|b2_|__b1__|__b0__|
74			<-t-><--n--><--n-->
75
76*/
77#define TOOM_63_MUL_N_REC(p, a, b, n, ws)		\
78  do {	mpn_mul_n (p, a, b, n);				\
79  } while (0)
80
81#define TOOM_63_MUL_REC(p, a, na, b, nb, ws)		\
82  do {	mpn_mul (p, a, na, b, nb);			\
83  } while (0)
84
85void
86mpn_toom63_mul (mp_ptr pp,
87		mp_srcptr ap, mp_size_t an,
88		mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
89{
90  mp_size_t n, s, t;
91  mp_limb_t cy;
92  int sign;
93
94  /***************************** decomposition *******************************/
95#define a5  (ap + 5 * n)
96#define b0  (bp + 0 * n)
97#define b1  (bp + 1 * n)
98#define b2  (bp + 2 * n)
99
100  ASSERT (an >= bn);
101  n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3);
102
103  s = an - 5 * n;
104  t = bn - 2 * n;
105
106  ASSERT (0 < s && s <= n);
107  ASSERT (0 < t && t <= n);
108  /* WARNING! it assumes s+t>=n */
109  ASSERT ( s + t >= n );
110  ASSERT ( s + t > 4);
111  /* WARNING! it assumes n>1 */
112  ASSERT ( n > 2);
113
114#define   r8    pp				/* 2n   */
115#define   r7    scratch				/* 3n+1 */
116#define   r5    (pp + 3*n)			/* 3n+1 */
117#define   v0    (pp + 3*n)			/* n+1 */
118#define   v1    (pp + 4*n+1)			/* n+1 */
119#define   v2    (pp + 5*n+2)			/* n+1 */
120#define   v3    (pp + 6*n+3)			/* n+1 */
121#define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
122#define   r1    (pp + 7*n)			/* s+t <= 2*n */
123#define   ws    (scratch + 6 * n + 2)		/* ??? */
124
125  /* Alloc also 3n+1 limbs for ws... mpn_toom_interpolate_8pts may
126     need all of them, when DO_mpn_sublsh_n usea a scratch  */
127/*   if (scratch == NULL) scratch = TMP_SALLOC_LIMBS (9 * n + 3); */
128
129  /********************** evaluation and recursive calls *********************/
130  /* $\pm4$ */
131  sign = mpn_toom_eval_pm2exp (v2, v0, 5, ap, n, s, 2, pp);
132  pp[n] = mpn_lshift (pp, b1, n, 2); /* 4b1 */
133  /* FIXME: use addlsh */
134  v3[t] = mpn_lshift (v3, b2, t, 4);/* 16b2 */
135  if ( n == t )
136    v3[n]+= mpn_add_n (v3, v3, b0, n); /* 16b2+b0 */
137  else
138    v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 16b2+b0 */
139  sign ^= abs_sub_add_n (v1, v3, pp, n + 1);
140  TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-4)*B(-4) */
141  TOOM_63_MUL_N_REC(r3, v2, v3, n + 1, ws); /* A(+4)*B(+4) */
142  mpn_toom_couple_handling (r3, 2*n+1, pp, sign, n, 2, 4);
143
144  /* $\pm1$ */
145  sign = mpn_toom_eval_pm1 (v2, v0, 5, ap, n, s,    pp);
146  /* Compute bs1 and bsm1. Code taken from toom33 */
147  cy = mpn_add (ws, b0, n, b2, t);
148#if HAVE_NATIVE_mpn_add_n_sub_n
149  if (cy == 0 && mpn_cmp (ws, b1, n) < 0)
150    {
151      cy = mpn_add_n_sub_n (v3, v1, b1, ws, n);
152      v3[n] = cy >> 1;
153      v1[n] = 0;
154      sign = ~sign;
155    }
156  else
157    {
158      mp_limb_t cy2;
159      cy2 = mpn_add_n_sub_n (v3, v1, ws, b1, n);
160      v3[n] = cy + (cy2 >> 1);
161      v1[n] = cy - (cy2 & 1);
162    }
163#else
164  v3[n] = cy + mpn_add_n (v3, ws, b1, n);
165  if (cy == 0 && mpn_cmp (ws, b1, n) < 0)
166    {
167      mpn_sub_n (v1, b1, ws, n);
168      v1[n] = 0;
169      sign = ~sign;
170    }
171  else
172    {
173      cy -= mpn_sub_n (v1, ws, b1, n);
174      v1[n] = cy;
175    }
176#endif
177  TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-1)*B(-1) */
178  TOOM_63_MUL_N_REC(r7, v2, v3, n + 1, ws); /* A(1)*B(1) */
179  mpn_toom_couple_handling (r7, 2*n+1, pp, sign, n, 0, 0);
180
181  /* $\pm2$ */
182  sign = mpn_toom_eval_pm2 (v2, v0, 5, ap, n, s, pp);
183  pp[n] = mpn_lshift (pp, b1, n, 1); /* 2b1 */
184  /* FIXME: use addlsh or addlsh2 */
185  v3[t] = mpn_lshift (v3, b2, t, 2);/* 4b2 */
186  if ( n == t )
187    v3[n]+= mpn_add_n (v3, v3, b0, n); /* 4b2+b0 */
188  else
189    v3[n] = mpn_add (v3, b0, n, v3, t+1); /* 4b2+b0 */
190  sign ^= abs_sub_add_n (v1, v3, pp, n + 1);
191  TOOM_63_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-2)*B(-2) */
192  TOOM_63_MUL_N_REC(r5, v2, v3, n + 1, ws); /* A(+2)*B(+2) */
193  mpn_toom_couple_handling (r5, 2*n+1, pp, sign, n, 1, 2);
194
195  /* A(0)*B(0) */
196  TOOM_63_MUL_N_REC(pp, ap, bp, n, ws);
197
198  /* Infinity */
199  if (s > t) {
200    TOOM_63_MUL_REC(r1, a5, s, b2, t, ws);
201  } else {
202    TOOM_63_MUL_REC(r1, b2, t, a5, s, ws);
203  };
204
205  mpn_toom_interpolate_8pts (pp, n, r3, r7, s + t, ws);
206
207#undef a5
208#undef b0
209#undef b1
210#undef b2
211#undef r1
212#undef r3
213#undef r5
214#undef v0
215#undef v1
216#undef v2
217#undef v3
218#undef r7
219#undef r8
220#undef ws
221}
222