1/* Implementation of the squaring algorithm with Toom-Cook 6.5-way.
2
3   Contributed to the GNU project by Marco Bodrato.
4
5   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8
9Copyright 2009 Free Software Foundation, Inc.
10
11This file is part of the GNU MP Library.
12
13The GNU MP Library is free software; you can redistribute it and/or modify
14it under the terms of the GNU Lesser General Public License as published by
15the Free Software Foundation; either version 3 of the License, or (at your
16option) any later version.
17
18The GNU MP Library is distributed in the hope that it will be useful, but
19WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
21License for more details.
22
23You should have received a copy of the GNU Lesser General Public License
24along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
25
26
27#include "gmp.h"
28#include "gmp-impl.h"
29
30
31#if GMP_NUMB_BITS < 21
32#error Not implemented.
33#endif
34
35
36#if TUNE_PROGRAM_BUILD
37#define MAYBE_sqr_basecase 1
38#define MAYBE_sqr_above_basecase   1
39#define MAYBE_sqr_toom2   1
40#define MAYBE_sqr_above_toom2   1
41#define MAYBE_sqr_toom3   1
42#define MAYBE_sqr_above_toom3   1
43#define MAYBE_sqr_above_toom4   1
44#else
45#ifdef  SQR_TOOM8_THRESHOLD
46#define SQR_TOOM6_MAX ((SQR_TOOM8_THRESHOLD+6*2-1+5)/6)
47#else
48#define SQR_TOOM6_MAX					\
49  ((SQR_FFT_THRESHOLD <= MP_SIZE_T_MAX - (6*2-1+5)) ?	\
50   ((SQR_FFT_THRESHOLD+6*2-1+5)/6)			\
51   : MP_SIZE_T_MAX )
52#endif
53#define MAYBE_sqr_basecase					\
54  (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM2_THRESHOLD)
55#define MAYBE_sqr_above_basecase				\
56  (SQR_TOOM6_MAX >=  SQR_TOOM2_THRESHOLD)
57#define MAYBE_sqr_toom2						\
58  (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM3_THRESHOLD)
59#define MAYBE_sqr_above_toom2					\
60  (SQR_TOOM6_MAX >= SQR_TOOM3_THRESHOLD)
61#define MAYBE_sqr_toom3						\
62  (SQR_TOOM6_THRESHOLD < 6 * SQR_TOOM4_THRESHOLD)
63#define MAYBE_sqr_above_toom3					\
64  (SQR_TOOM6_MAX >= SQR_TOOM4_THRESHOLD)
65#define MAYBE_sqr_above_toom4					\
66  (SQR_TOOM6_MAX >= SQR_TOOM6_THRESHOLD)
67#endif
68
69#define TOOM6_SQR_REC(p, a, n, ws)					\
70  do {									\
71    if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase		\
72	|| BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD)))			\
73      mpn_sqr_basecase (p, a, n);					\
74    else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2		\
75	     || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD)))		\
76      mpn_toom2_sqr (p, a, n, ws);					\
77    else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3		\
78	     || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD)))		\
79      mpn_toom3_sqr (p, a, n, ws);					\
80    else if (! MAYBE_sqr_above_toom4					\
81	     || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))		\
82      mpn_toom4_sqr (p, a, n, ws);					\
83    else								\
84      mpn_toom6_sqr (p, a, n, ws);					\
85  } while (0)
86
87void
88mpn_toom6_sqr  (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch)
89{
90  mp_size_t n, s;
91
92  /***************************** decomposition *******************************/
93
94  ASSERT( an >= 18 );
95
96  n = 1 + (an - 1) / (size_t) 6;
97
98  s = an - 5 * n;
99
100  ASSERT (0 < s && s <= n);
101
102#define   r4    (pp + 3 * n)			/* 3n+1 */
103#define   r2    (pp + 7 * n)			/* 3n+1 */
104#define   r0    (pp +11 * n)			/* s+t <= 2*n */
105#define   r5    (scratch)			/* 3n+1 */
106#define   r3    (scratch + 3 * n + 1)		/* 3n+1 */
107#define   r1    (scratch + 6 * n + 2)		/* 3n+1 */
108#define   v0    (pp + 7 * n)			/* n+1 */
109#define   v2    (pp + 9 * n+2)			/* n+1 */
110#define   wse   (scratch + 9 * n + 3)		/* 3n+1 */
111
112  /* Alloc also 3n+1 limbs for ws... toom_interpolate_12pts may
113     need all of them, when DO_mpn_sublsh_n usea a scratch  */
114/*   if (scratch== NULL) */
115/*     scratch = TMP_SALLOC_LIMBS (12 * n + 6); */
116
117  /********************** evaluation and recursive calls *********************/
118  /* $\pm1/2$ */
119  mpn_toom_eval_pm2rexp (v2, v0, 5, ap, n, s, 1, pp);
120  TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */
121  TOOM6_SQR_REC(r5, v2, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */
122  mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 1, 0);
123
124  /* $\pm1$ */
125  mpn_toom_eval_pm1 (v2, v0, 5, ap, n, s,    pp);
126  TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1)*B(-1) */
127  TOOM6_SQR_REC(r3, v2, n + 1, wse); /* A(1)*B(1) */
128  mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 0, 0);
129
130  /* $\pm4$ */
131  mpn_toom_eval_pm2exp (v2, v0, 5, ap, n, s, 2, pp);
132  TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-4)*B(-4) */
133  TOOM6_SQR_REC(r1, v2, n + 1, wse); /* A(+4)*B(+4) */
134  mpn_toom_couple_handling (r1, 2 * n + 1, pp, 0, n, 2, 4);
135
136  /* $\pm1/4$ */
137  mpn_toom_eval_pm2rexp (v2, v0, 5, ap, n, s, 2, pp);
138  TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */
139  TOOM6_SQR_REC(r4, v2, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */
140  mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 2, 0);
141
142  /* $\pm2$ */
143  mpn_toom_eval_pm2 (v2, v0, 5, ap, n, s, pp);
144  TOOM6_SQR_REC(pp, v0, n + 1, wse); /* A(-2)*B(-2) */
145  TOOM6_SQR_REC(r2, v2, n + 1, wse); /* A(+2)*B(+2) */
146  mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 1, 2);
147
148#undef v0
149#undef v2
150
151  /* A(0)*B(0) */
152  TOOM6_SQR_REC(pp, ap, n, wse);
153
154  mpn_toom_interpolate_12pts (pp, r1, r3, r5, n, 2 * s, 0, wse);
155
156#undef r0
157#undef r1
158#undef r2
159#undef r3
160#undef r4
161#undef r5
162
163}
164#undef TOOM6_SQR_REC
165#undef MAYBE_sqr_basecase
166#undef MAYBE_sqr_above_basecase
167#undef MAYBE_sqr_toom2
168#undef MAYBE_sqr_above_toom2
169#undef MAYBE_sqr_toom3
170#undef MAYBE_sqr_above_toom3
171#undef MAYBE_sqr_above_toom4
172