toom54_mul.c revision 1.1.1.1
1/* Implementation of the algorithm for Toom-Cook 4.5-way.
2
3   Contributed to the GNU project by Marco Bodrato.
4
5   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9Copyright 2009, 2012 Free Software Foundation, Inc.
10
11This file is part of the GNU MP Library.
12
13The GNU MP Library is free software; you can redistribute it and/or modify
14it under the terms of the GNU Lesser General Public License as published by
15the Free Software Foundation; either version 3 of the License, or (at your
16option) any later version.
17
18The GNU MP Library is distributed in the hope that it will be useful, but
19WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
21License for more details.
22
23You should have received a copy of the GNU Lesser General Public License
24along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
25
26#include "gmp.h"
27#include "gmp-impl.h"
28
29
30/* Toom-4.5, the splitting 5x4 unbalanced version.
31   Evaluate in: infinity, +4, -4, +2, -2, +1, -1, 0.
32
33  <--s-><--n--><--n--><--n--><--n-->
34   ____ ______ ______ ______ ______
35  |_a4_|__a3__|__a2__|__a1__|__a0__|
36	  |b3_|__b2__|__b1__|__b0__|
37	  <-t-><--n--><--n--><--n-->
38
39*/
40#define TOOM_54_MUL_N_REC(p, a, b, n, ws)		\
41  do {	mpn_mul_n (p, a, b, n);				\
42  } while (0)
43
44#define TOOM_54_MUL_REC(p, a, na, b, nb, ws)		\
45  do {	mpn_mul (p, a, na, b, nb);			\
46  } while (0)
47
48void
49mpn_toom54_mul (mp_ptr pp,
50		mp_srcptr ap, mp_size_t an,
51		mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
52{
53  mp_size_t n, s, t;
54  int sign;
55
56  /***************************** decomposition *******************************/
57#define a4  (ap + 4 * n)
58#define b3  (bp + 3 * n)
59
60  ASSERT (an >= bn);
61  n = 1 + (4 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 4);
62
63  s = an - 4 * n;
64  t = bn - 3 * n;
65
66  ASSERT (0 < s && s <= n);
67  ASSERT (0 < t && t <= n);
68  /* Required by mpn_toom_interpolate_8pts. */
69  ASSERT ( s + t >= n );
70  ASSERT ( s + t > 4);
71  ASSERT ( n > 2);
72
73#define   r8    pp				/* 2n   */
74#define   r7    scratch				/* 3n+1 */
75#define   r5    (pp + 3*n)			/* 3n+1 */
76#define   v0    (pp + 3*n)			/* n+1 */
77#define   v1    (pp + 4*n+1)			/* n+1 */
78#define   v2    (pp + 5*n+2)			/* n+1 */
79#define   v3    (pp + 6*n+3)			/* n+1 */
80#define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
81#define   r1    (pp + 7*n)			/* s+t <= 2*n */
82#define   ws    (scratch + 6 * n + 2)		/* ??? */
83
84  /* Alloc also 3n+1 limbs for ws... mpn_toom_interpolate_8pts may
85     need all of them, when DO_mpn_sublsh_n usea a scratch  */
86  /********************** evaluation and recursive calls *********************/
87  /* $\pm4$ */
88  sign = mpn_toom_eval_pm2exp (v2, v0, 4, ap, n, s, 2, pp)
89       ^ mpn_toom_eval_pm2exp (v3, v1, 3, bp, n, t, 2, pp);
90  TOOM_54_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-4)*B(-4) */
91  TOOM_54_MUL_N_REC(r3, v2, v3, n + 1, ws); /* A(+4)*B(+4) */
92  mpn_toom_couple_handling (r3, 2*n+1, pp, sign, n, 2, 4);
93
94  /* $\pm1$ */
95  sign = mpn_toom_eval_pm1 (v2, v0, 4, ap, n, s,    pp)
96       ^ mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t,    pp);
97  TOOM_54_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-1)*B(-1) */
98  TOOM_54_MUL_N_REC(r7, v2, v3, n + 1, ws); /* A(1)*B(1) */
99  mpn_toom_couple_handling (r7, 2*n+1, pp, sign, n, 0, 0);
100
101  /* $\pm2$ */
102  sign = mpn_toom_eval_pm2 (v2, v0, 4, ap, n, s, pp)
103       ^ mpn_toom_eval_dgr3_pm2 (v3, v1, bp, n, t, pp);
104  TOOM_54_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-2)*B(-2) */
105  TOOM_54_MUL_N_REC(r5, v2, v3, n + 1, ws); /* A(+2)*B(+2) */
106  mpn_toom_couple_handling (r5, 2*n+1, pp, sign, n, 1, 2);
107
108  /* A(0)*B(0) */
109  TOOM_54_MUL_N_REC(pp, ap, bp, n, ws);
110
111  /* Infinity */
112  if (s > t) {
113    TOOM_54_MUL_REC(r1, a4, s, b3, t, ws);
114  } else {
115    TOOM_54_MUL_REC(r1, b3, t, a4, s, ws);
116  };
117
118  mpn_toom_interpolate_8pts (pp, n, r3, r7, s + t, ws);
119
120#undef a4
121#undef b3
122#undef r1
123#undef r3
124#undef r5
125#undef v0
126#undef v1
127#undef v2
128#undef v3
129#undef r7
130#undef r8
131#undef ws
132}
133