1/* mpn_sqr_basecase -- Internal routine to square a natural number
2   of length n.
3
4   THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE.  IT IS ONLY
5   SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
6
7
8Copyright 1991, 1992, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004,
92005, 2008 Free Software Foundation, Inc.
10
11This file is part of the GNU MP Library.
12
13The GNU MP Library is free software; you can redistribute it and/or modify
14it under the terms of the GNU Lesser General Public License as published by
15the Free Software Foundation; either version 3 of the License, or (at your
16option) any later version.
17
18The GNU MP Library is distributed in the hope that it will be useful, but
19WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
20or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
21License for more details.
22
23You should have received a copy of the GNU Lesser General Public License
24along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
25
26#include "gmp.h"
27#include "gmp-impl.h"
28#include "longlong.h"
29
30
31#if HAVE_NATIVE_mpn_sqr_diagonal
32#define MPN_SQR_DIAGONAL(rp, up, n)					\
33  mpn_sqr_diagonal (rp, up, n)
34#else
35#define MPN_SQR_DIAGONAL(rp, up, n)					\
36  do {									\
37    mp_size_t _i;							\
38    for (_i = 0; _i < (n); _i++)					\
39      {									\
40	mp_limb_t ul, lpl;						\
41	ul = (up)[_i];							\
42	umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS);	\
43	(rp)[2 * _i] = lpl >> GMP_NAIL_BITS;				\
44      }									\
45  } while (0)
46#endif
47
48
49#undef READY_WITH_mpn_sqr_basecase
50
51
52#if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2s
53void
54mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
55{
56  mp_size_t i;
57  mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
58  mp_ptr tp = tarr;
59  mp_limb_t cy;
60
61  /* must fit 2*n limbs in tarr */
62  ASSERT (n <= SQR_TOOM2_THRESHOLD);
63
64  if ((n & 1) != 0)
65    {
66      if (n == 1)
67	{
68	  mp_limb_t ul, lpl;
69	  ul = up[0];
70	  umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
71	  rp[0] = lpl >> GMP_NAIL_BITS;
72	  return;
73	}
74
75      MPN_ZERO (tp, n);
76
77      for (i = 0; i <= n - 2; i += 2)
78	{
79	  cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
80	  tp[n + i] = cy;
81	}
82    }
83  else
84    {
85      if (n == 2)
86	{
87	  rp[0] = 0;
88	  rp[1] = 0;
89	  rp[3] = mpn_addmul_2 (rp, up, 2, up);
90	  return;
91	}
92
93      MPN_ZERO (tp, n);
94
95      for (i = 0; i <= n - 4; i += 2)
96	{
97	  cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
98	  tp[n + i] = cy;
99	}
100      cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
101      tp[2 * n - 3] = cy;
102    }
103
104  MPN_SQR_DIAGONAL (rp, up, n);
105
106#if HAVE_NATIVE_mpn_addlsh1_n
107  cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
108#else
109  cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
110  cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
111#endif
112  rp[2 * n - 1] += cy;
113}
114#define READY_WITH_mpn_sqr_basecase
115#endif
116
117
118#if ! defined (READY_WITH_mpn_sqr_basecase) && HAVE_NATIVE_mpn_addmul_2
119
120/* mpn_sqr_basecase using plain mpn_addmul_2.
121
122   This is tricky, since we have to let mpn_addmul_2 make some undesirable
123   multiplies, u[k]*u[k], that we would like to let mpn_sqr_diagonal handle.
124   This forces us to conditionally add or subtract the mpn_sqr_diagonal
125   results.  Examples of the product we form:
126
127   n = 4              n = 5		n = 6
128   u1u0 * u3u2u1      u1u0 * u4u3u2u1	u1u0 * u5u4u3u2u1
129   u2 * u3	      u3u2 * u4u3	u3u2 * u5u4u3
130					u4 * u5
131   add: u0 u2 u3      add: u0 u2 u4	add: u0 u2 u4 u5
132   sub: u1	      sub: u1 u3	sub: u1 u3
133*/
134
135void
136mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
137{
138  mp_size_t i;
139  mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
140  mp_ptr tp = tarr;
141  mp_limb_t cy;
142
143  /* must fit 2*n limbs in tarr */
144  ASSERT (n <= SQR_TOOM2_THRESHOLD);
145
146  if ((n & 1) != 0)
147    {
148      mp_limb_t x0, x1;
149
150      if (n == 1)
151	{
152	  mp_limb_t ul, lpl;
153	  ul = up[0];
154	  umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
155	  rp[0] = lpl >> GMP_NAIL_BITS;
156	  return;
157	}
158
159      /* The code below doesn't like unnormalized operands.  Since such
160	 operands are unusual, handle them with a dumb recursion.  */
161      if (up[n - 1] == 0)
162	{
163	  rp[2 * n - 2] = 0;
164	  rp[2 * n - 1] = 0;
165	  mpn_sqr_basecase (rp, up, n - 1);
166	  return;
167	}
168
169      MPN_ZERO (tp, n);
170
171      for (i = 0; i <= n - 2; i += 2)
172	{
173	  cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
174	  tp[n + i] = cy;
175	}
176
177      MPN_SQR_DIAGONAL (rp, up, n);
178
179      for (i = 2;; i += 4)
180	{
181	  x0 = rp[i + 0];
182	  rp[i + 0] = (-x0) & GMP_NUMB_MASK;
183	  x1 = rp[i + 1];
184	  rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
185	  __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
186	  if (i + 4 >= 2 * n)
187	    break;
188	  mpn_incr_u (rp + i + 4, cy);
189	}
190    }
191  else
192    {
193      mp_limb_t x0, x1;
194
195      if (n == 2)
196	{
197	  rp[0] = 0;
198	  rp[1] = 0;
199	  rp[3] = mpn_addmul_2 (rp, up, 2, up);
200	  return;
201	}
202
203      /* The code below doesn't like unnormalized operands.  Since such
204	 operands are unusual, handle them with a dumb recursion.  */
205      if (up[n - 1] == 0)
206	{
207	  rp[2 * n - 2] = 0;
208	  rp[2 * n - 1] = 0;
209	  mpn_sqr_basecase (rp, up, n - 1);
210	  return;
211	}
212
213      MPN_ZERO (tp, n);
214
215      for (i = 0; i <= n - 4; i += 2)
216	{
217	  cy = mpn_addmul_2 (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
218	  tp[n + i] = cy;
219	}
220      cy = mpn_addmul_1 (tp + 2 * n - 4, up + n - 1, 1, up[n - 2]);
221      tp[2 * n - 3] = cy;
222
223      MPN_SQR_DIAGONAL (rp, up, n);
224
225      for (i = 2;; i += 4)
226	{
227	  x0 = rp[i + 0];
228	  rp[i + 0] = (-x0) & GMP_NUMB_MASK;
229	  x1 = rp[i + 1];
230	  rp[i + 1] = (-x1 - (x0 != 0)) & GMP_NUMB_MASK;
231	  if (i + 6 >= 2 * n)
232	    break;
233	  __GMPN_SUB_1 (cy, rp + i + 2, rp + i + 2, 2, (x1 | x0) != 0);
234	  mpn_incr_u (rp + i + 4, cy);
235	}
236      mpn_decr_u (rp + i + 2, (x1 | x0) != 0);
237    }
238
239#if HAVE_NATIVE_mpn_addlsh1_n
240  cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
241#else
242  cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
243  cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
244#endif
245  rp[2 * n - 1] += cy;
246}
247#define READY_WITH_mpn_sqr_basecase
248#endif
249
250
251#if ! defined (READY_WITH_mpn_sqr_basecase)
252
253/* Default mpn_sqr_basecase using mpn_addmul_1.  */
254
255void
256mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
257{
258  mp_size_t i;
259
260  ASSERT (n >= 1);
261  ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));
262
263  {
264    mp_limb_t ul, lpl;
265    ul = up[0];
266    umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
267    rp[0] = lpl >> GMP_NAIL_BITS;
268  }
269  if (n > 1)
270    {
271      mp_limb_t tarr[2 * SQR_TOOM2_THRESHOLD];
272      mp_ptr tp = tarr;
273      mp_limb_t cy;
274
275      /* must fit 2*n limbs in tarr */
276      ASSERT (n <= SQR_TOOM2_THRESHOLD);
277
278      cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
279      tp[n - 1] = cy;
280      for (i = 2; i < n; i++)
281	{
282	  mp_limb_t cy;
283	  cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
284	  tp[n + i - 2] = cy;
285	}
286      MPN_SQR_DIAGONAL (rp + 2, up + 1, n - 1);
287
288      {
289	mp_limb_t cy;
290#if HAVE_NATIVE_mpn_addlsh1_n
291	cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
292#else
293	cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
294	cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
295#endif
296	rp[2 * n - 1] += cy;
297      }
298    }
299}
300#endif
301