1/* Interpolaton for the algorithm Toom-Cook 8.5-way. 2 3 Contributed to the GNU project by Marco Bodrato. 4 5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 8 9Copyright 2009, 2010 Free Software Foundation, Inc. 10 11This file is part of the GNU MP Library. 12 13The GNU MP Library is free software; you can redistribute it and/or modify 14it under the terms of the GNU Lesser General Public License as published by 15the Free Software Foundation; either version 3 of the License, or (at your 16option) any later version. 17 18The GNU MP Library is distributed in the hope that it will be useful, but 19WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 20or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 21License for more details. 22 23You should have received a copy of the GNU Lesser General Public License 24along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 25 26 27#include "gmp.h" 28#include "gmp-impl.h" 29 30#if GMP_NUMB_BITS < 29 31#error Not implemented: Both sublsh_n(,,,28) should be corrected; r2 and r5 need one more LIMB. 32#endif 33 34#if GMP_NUMB_BITS < 28 35#error Not implemented: divexact_by188513325 and _by182712915 will not work. 36#endif 37 38 39#if HAVE_NATIVE_mpn_sublsh_n 40#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n(dst,dst,src,n,s) 41#else 42static mp_limb_t 43DO_mpn_sublsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws) 44{ 45#if USE_MUL_1 && 0 46 return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s)); 47#else 48 mp_limb_t __cy; 49 __cy = mpn_lshift(ws,src,n,s); 50 return __cy + mpn_sub_n(dst,dst,ws,n); 51#endif 52} 53#endif 54 55#if HAVE_NATIVE_mpn_addlsh_n 56#define DO_mpn_addlsh_n(dst,src,n,s,ws) mpn_addlsh_n(dst,dst,src,n,s) 57#else 58static mp_limb_t 59DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws) 60{ 61#if USE_MUL_1 && 0 62 return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s)); 63#else 64 mp_limb_t __cy; 65 __cy = mpn_lshift(ws,src,n,s); 66 return __cy + mpn_add_n(dst,dst,ws,n); 67#endif 68} 69#endif 70 71#if HAVE_NATIVE_mpn_subrsh 72#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh(dst,nd,src,ns,s) 73#else 74/* FIXME: This is not a correct definition, it assumes no carry */ 75#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) \ 76do { \ 77 mp_limb_t __cy; \ 78 MPN_DECR_U (dst, nd, src[0] >> s); \ 79 __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws); \ 80 MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy); \ 81} while (0) 82#endif 83 84 85/* FIXME: tuneup should decide the best variant */ 86#ifndef AORSMUL_FASTER_AORS_AORSLSH 87#define AORSMUL_FASTER_AORS_AORSLSH 1 88#endif 89#ifndef AORSMUL_FASTER_AORS_2AORSLSH 90#define AORSMUL_FASTER_AORS_2AORSLSH 1 91#endif 92#ifndef AORSMUL_FASTER_2AORSLSH 93#define AORSMUL_FASTER_2AORSLSH 1 94#endif 95#ifndef AORSMUL_FASTER_3AORSLSH 96#define AORSMUL_FASTER_3AORSLSH 1 97#endif 98 99#if GMP_NUMB_BITS < 43 100#define BIT_CORRECTION 1 101#define CORRECTION_BITS GMP_NUMB_BITS 102#else 103#define BIT_CORRECTION 0 104#define CORRECTION_BITS 0 105#endif 106 107#define BINVERT_9 \ 108 ((((GMP_NUMB_MAX / 9) << (6 - GMP_NUMB_BITS % 6)) * 8 & GMP_NUMB_MAX) | 0x39) 109 110#define BINVERT_255 \ 111 (GMP_NUMB_MAX - ((GMP_NUMB_MAX / 255) << (8 - GMP_NUMB_BITS % 8))) 112 113 /* FIXME: find some more general expressions for inverses */ 114#if GMP_LIMB_BITS == 32 115#define BINVERT_2835 (GMP_NUMB_MASK & CNST_LIMB(0x53E3771B)) 116#define BINVERT_42525 (GMP_NUMB_MASK & CNST_LIMB(0x9F314C35)) 117#define BINVERT_182712915 (GMP_NUMB_MASK & CNST_LIMB(0x550659DB)) 118#define BINVERT_188513325 (GMP_NUMB_MASK & CNST_LIMB(0xFBC333A5)) 119#define BINVERT_255x182712915L (GMP_NUMB_MASK & CNST_LIMB(0x6FC4CB25)) 120#define BINVERT_255x188513325L (GMP_NUMB_MASK & CNST_LIMB(0x6864275B)) 121#if GMP_NAIL_BITS == 0 122#define BINVERT_255x182712915H CNST_LIMB(0x1B649A07) 123#define BINVERT_255x188513325H CNST_LIMB(0x06DB993A) 124#else /* GMP_NAIL_BITS != 0 */ 125#define BINVERT_255x182712915H \ 126 (GMP_NUMB_MASK & CNST_LIMB((0x1B649A07<<GMP_NAIL_BITS) | (0x6FC4CB25>>GMP_NUMB_BITS))) 127#define BINVERT_255x188513325H \ 128 (GMP_NUMB_MASK & CNST_LIMB((0x06DB993A<<GMP_NAIL_BITS) | (0x6864275B>>GMP_NUMB_BITS))) 129#endif 130#else 131#if GMP_LIMB_BITS == 64 132#define BINVERT_2835 (GMP_NUMB_MASK & CNST_LIMB(0x938CC70553E3771B)) 133#define BINVERT_42525 (GMP_NUMB_MASK & CNST_LIMB(0xE7B40D449F314C35)) 134#define BINVERT_255x182712915 (GMP_NUMB_MASK & CNST_LIMB(0x1B649A076FC4CB25)) 135#define BINVERT_255x188513325 (GMP_NUMB_MASK & CNST_LIMB(0x06DB993A6864275B)) 136#endif 137#endif 138 139#ifndef mpn_divexact_by255 140#if GMP_NUMB_BITS % 8 == 0 141#define mpn_divexact_by255(dst,src,size) \ 142 (255 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 255))) 143#else 144#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 145#define mpn_divexact_by255(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,0) 146#else 147#define mpn_divexact_by255(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255)) 148#endif 149#endif 150#endif 151 152#ifndef mpn_divexact_by255x4 153#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 154#define mpn_divexact_by255x4(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(255),BINVERT_255,2) 155#else 156#define mpn_divexact_by255x4(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(255)<<2) 157#endif 158#endif 159 160#ifndef mpn_divexact_by9x16 161#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 162#define mpn_divexact_by9x16(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(9),BINVERT_9,4) 163#else 164#define mpn_divexact_by9x16(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(9)<<4) 165#endif 166#endif 167 168#ifndef mpn_divexact_by42525x16 169#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_42525) 170#define mpn_divexact_by42525x16(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(42525),BINVERT_42525,4) 171#else 172#define mpn_divexact_by42525x16(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(42525)<<4) 173#endif 174#endif 175 176#ifndef mpn_divexact_by2835x64 177#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_2835) 178#define mpn_divexact_by2835x64(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(2835),BINVERT_2835,6) 179#else 180#define mpn_divexact_by2835x64(dst,src,size) mpn_divexact_1(dst,src,size,CNST_LIMB(2835)<<6) 181#endif 182#endif 183 184#ifndef mpn_divexact_by255x182712915 185#if GMP_NUMB_BITS < 36 186#if HAVE_NATIVE_mpn_bdiv_q_2_pi2 && defined(BINVERT_255x182712915H) 187/* FIXME: use mpn_bdiv_q_2_pi2 */ 188#endif 189#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_182712915) 190#define mpn_divexact_by255x182712915(dst,src,size) \ 191 do { \ 192 mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(182712915),BINVERT_182712915,0); \ 193 mpn_divexact_by255(dst,dst,size); \ 194 } while(0) 195#else 196#define mpn_divexact_by255x182712915(dst,src,size) \ 197 do { \ 198 mpn_divexact_1(dst,src,size,CNST_LIMB(182712915)); \ 199 mpn_divexact_by255(dst,dst,size); \ 200 } while(0) 201#endif 202#else /* GMP_NUMB_BITS > 35 */ 203#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_255x182712915) 204#define mpn_divexact_by255x182712915(dst,src,size) \ 205 mpn_pi1_bdiv_q_1(dst,src,size,255*CNST_LIMB(182712915),BINVERT_255x182712915,0) 206#else 207#define mpn_divexact_by255x182712915(dst,src,size) mpn_divexact_1(dst,src,size,255*CNST_LIMB(182712915)) 208#endif 209#endif /* GMP_NUMB_BITS >?< 36 */ 210#endif 211 212#ifndef mpn_divexact_by255x188513325 213#if GMP_NUMB_BITS < 36 214#if HAVE_NATIVE_mpn_bdiv_q_1_pi2 && defined(BINVERT_255x188513325H) 215/* FIXME: use mpn_bdiv_q_1_pi2 */ 216#endif 217#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_188513325) 218#define mpn_divexact_by255x188513325(dst,src,size) \ 219 do { \ 220 mpn_pi1_bdiv_q_1(dst,src,size,CNST_LIMB(188513325),BINVERT_188513325,0); \ 221 mpn_divexact_by255(dst,dst,size); \ 222 } while(0) 223#else 224#define mpn_divexact_by255x188513325(dst,src,size) \ 225 do { \ 226 mpn_divexact_1(dst,src,size,CNST_LIMB(188513325)); \ 227 mpn_divexact_by255(dst,dst,size); \ 228 } while(0) 229#endif 230#else /* GMP_NUMB_BITS > 35 */ 231#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && defined(BINVERT_255x188513325) 232#define mpn_divexact_by255x188513325(dst,src,size) \ 233 mpn_pi1_bdiv_q_1(dst,src,size,255*CNST_LIMB(188513325),BINVERT_255x188513325,0) 234#else 235#define mpn_divexact_by255x188513325(dst,src,size) mpn_divexact_1(dst,src,size,255*CNST_LIMB(188513325)) 236#endif 237#endif /* GMP_NUMB_BITS >?< 36 */ 238#endif 239 240/* Interpolation for Toom-8.5 (or Toom-8), using the evaluation 241 points: infinity(8.5 only), +-8, +-4, +-2, +-1, +-1/4, +-1/2, 242 +-1/8, 0. More precisely, we want to compute 243 f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 15 (or 244 14), given the 16 (rsp. 15) values: 245 246 r0 = limit at infinity of f(x) / x^7, 247 r1 = f(8),f(-8), 248 r2 = f(4),f(-4), 249 r3 = f(2),f(-2), 250 r4 = f(1),f(-1), 251 r5 = f(1/4),f(-1/4), 252 r6 = f(1/2),f(-1/2), 253 r7 = f(1/8),f(-1/8), 254 r8 = f(0). 255 256 All couples of the form f(n),f(-n) must be already mixed with 257 toom_couple_handling(f(n),...,f(-n),...) 258 259 The result is stored in {pp, spt + 7*n (or 8*n)}. 260 At entry, r8 is stored at {pp, 2n}, 261 r6 is stored at {pp + 3n, 3n + 1}. 262 r4 is stored at {pp + 7n, 3n + 1}. 263 r2 is stored at {pp +11n, 3n + 1}. 264 r0 is stored at {pp +15n, spt}. 265 266 The other values are 3n+1 limbs each (with most significant limbs small). 267 268 Negative intermediate results are stored two-complemented. 269 Inputs are destroyed. 270*/ 271 272void 273mpn_toom_interpolate_16pts (mp_ptr pp, mp_ptr r1, mp_ptr r3, mp_ptr r5, mp_ptr r7, 274 mp_size_t n, mp_size_t spt, int half, mp_ptr wsi) 275{ 276 mp_limb_t cy; 277 mp_size_t n3; 278 mp_size_t n3p1; 279 n3 = 3 * n; 280 n3p1 = n3 + 1; 281 282#define r6 (pp + n3) /* 3n+1 */ 283#define r4 (pp + 7 * n) /* 3n+1 */ 284#define r2 (pp +11 * n) /* 3n+1 */ 285#define r0 (pp +15 * n) /* s+t <= 2*n */ 286 287 ASSERT( spt <= 2 * n ); 288 /******************************* interpolation *****************************/ 289 if( half != 0) { 290 cy = mpn_sub_n (r4, r4, r0, spt); 291 MPN_DECR_U (r4 + spt, n3p1 - spt, cy); 292 293 cy = DO_mpn_sublsh_n (r3, r0, spt, 14, wsi); 294 MPN_DECR_U (r3 + spt, n3p1 - spt, cy); 295 DO_mpn_subrsh(r6, n3p1, r0, spt, 2, wsi); 296 297 cy = DO_mpn_sublsh_n (r2, r0, spt, 28, wsi); 298 MPN_DECR_U (r2 + spt, n3p1 - spt, cy); 299 DO_mpn_subrsh(r5, n3p1, r0, spt, 4, wsi); 300 301 cy = DO_mpn_sublsh_n (r1 + BIT_CORRECTION, r0, spt, 42 - CORRECTION_BITS, wsi); 302 MPN_DECR_U (r1 + spt + BIT_CORRECTION, n3p1 - spt - BIT_CORRECTION, cy); 303#if BIT_CORRECTION 304 /* FIXME: assumes r7[n3p1] is writable (it is if r5 follows). */ 305 cy = r7[n3p1]; 306 r7[n3p1] = 0x80; 307#endif 308 DO_mpn_subrsh(r7, n3p1 + BIT_CORRECTION, r0, spt, 6, wsi); 309#if BIT_CORRECTION 310 /* FIXME: assumes r7[n3p1] is writable. */ 311 ASSERT ( BIT_CORRECTION > 0 || r7[n3p1] == 0x80 ); 312 r7[n3p1] = cy; 313#endif 314 }; 315 316 r5[n3] -= DO_mpn_sublsh_n (r5 + n, pp, 2 * n, 28, wsi); 317 DO_mpn_subrsh(r2 + n, 2 * n + 1, pp, 2 * n, 4, wsi); 318 319#if HAVE_NATIVE_mpn_add_n_sub_n 320 mpn_add_n_sub_n (r2, r5, r5, r2, n3p1); 321#else 322 mpn_sub_n (wsi, r5, r2, n3p1); /* can be negative */ 323 ASSERT_NOCARRY(mpn_add_n (r2, r2, r5, n3p1)); 324 MP_PTR_SWAP(r5, wsi); 325#endif 326 327 r6[n3] -= DO_mpn_sublsh_n (r6 + n, pp, 2 * n, 14, wsi); 328 DO_mpn_subrsh(r3 + n, 2 * n + 1, pp, 2 * n, 2, wsi); 329 330#if HAVE_NATIVE_mpn_add_n_sub_n 331 mpn_add_n_sub_n (r3, r6, r6, r3, n3p1); 332#else 333 ASSERT_NOCARRY(mpn_add_n (wsi, r3, r6, n3p1)); 334 mpn_sub_n (r6, r6, r3, n3p1); /* can be negative */ 335 MP_PTR_SWAP(r3, wsi); 336#endif 337 338 cy = DO_mpn_sublsh_n (r7 + n + BIT_CORRECTION, pp, 2 * n, 42 - CORRECTION_BITS, wsi); 339#if BIT_CORRECTION 340 MPN_DECR_U (r1 + n, 2 * n + 1, pp[0] >> 6); 341 cy = DO_mpn_sublsh_n (r1 + n, pp + 1, 2 * n - 1, GMP_NUMB_BITS - 6, wsi); 342 cy = mpn_sub_1(r1 + 3 * n - 1, r1 + 3 * n - 1, 2, cy); 343 ASSERT ( BIT_CORRECTION > 0 || cy != 0 ); 344#else 345 r7[n3] -= cy; 346 DO_mpn_subrsh(r1 + n, 2 * n + 1, pp, 2 * n, 6, wsi); 347#endif 348 349#if HAVE_NATIVE_mpn_add_n_sub_n 350 mpn_add_n_sub_n (r1, r7, r7, r1, n3p1); 351#else 352 mpn_sub_n (wsi, r7, r1, n3p1); /* can be negative */ 353 mpn_add_n (r1, r1, r7, n3p1); /* if BIT_CORRECTION != 0, can give a carry. */ 354 MP_PTR_SWAP(r7, wsi); 355#endif 356 357 r4[n3] -= mpn_sub_n (r4+n, r4+n, pp, 2 * n); 358 359#if AORSMUL_FASTER_2AORSLSH 360 mpn_submul_1 (r5, r6, n3p1, 1028); /* can be negative */ 361#else 362 DO_mpn_sublsh_n (r5, r6, n3p1, 2, wsi); /* can be negative */ 363 DO_mpn_sublsh_n (r5, r6, n3p1,10, wsi); /* can be negative */ 364#endif 365 366 mpn_submul_1 (r7, r5, n3p1, 1300); /* can be negative */ 367#if AORSMUL_FASTER_3AORSLSH 368 mpn_submul_1 (r7, r6, n3p1, 1052688); /* can be negative */ 369#else 370 DO_mpn_sublsh_n (r7, r6, n3p1, 4, wsi); /* can be negative */ 371 DO_mpn_sublsh_n (r7, r6, n3p1,12, wsi); /* can be negative */ 372 DO_mpn_sublsh_n (r7, r6, n3p1,20, wsi); /* can be negative */ 373#endif 374 mpn_divexact_by255x188513325(r7, r7, n3p1); 375 376 mpn_submul_1 (r5, r7, n3p1, 12567555); /* can be negative */ 377 /* A division by 2835x64 followsi. Warning: the operand can be negative! */ 378 mpn_divexact_by2835x64(r5, r5, n3p1); 379 if ((r5[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-7))) != 0) 380 r5[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-6)); 381 382#if AORSMUL_FASTER_AORS_AORSLSH 383 mpn_submul_1 (r6, r7, n3p1, 4095); /* can be negative */ 384#else 385 mpn_add_n (r6, r6, r7, n3p1); /* can give a carry */ 386 DO_mpn_sublsh_n (r6, r7, n3p1, 12, wsi); /* can be negative */ 387#endif 388#if AORSMUL_FASTER_2AORSLSH 389 mpn_addmul_1 (r6, r5, n3p1, 240); /* can be negative */ 390#else 391 DO_mpn_addlsh_n (r6, r5, n3p1, 8, wsi); /* can give a carry */ 392 DO_mpn_sublsh_n (r6, r5, n3p1, 4, wsi); /* can be negative */ 393#endif 394 /* A division by 255x4 followsi. Warning: the operand can be negative! */ 395 mpn_divexact_by255x4(r6, r6, n3p1); 396 if ((r6[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-3))) != 0) 397 r6[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-2)); 398 399 ASSERT_NOCARRY(DO_mpn_sublsh_n (r3, r4, n3p1, 7, wsi)); 400 401 ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r4, n3p1, 13, wsi)); 402 ASSERT_NOCARRY(mpn_submul_1 (r2, r3, n3p1, 400)); 403 404 /* If GMP_NUMB_BITS < 42 next operations on r1 can give a carry!*/ 405 DO_mpn_sublsh_n (r1, r4, n3p1, 19, wsi); 406 mpn_submul_1 (r1, r2, n3p1, 1428); 407 mpn_submul_1 (r1, r3, n3p1, 112896); 408 mpn_divexact_by255x182712915(r1, r1, n3p1); 409 410 ASSERT_NOCARRY(mpn_submul_1 (r2, r1, n3p1, 15181425)); 411 mpn_divexact_by42525x16(r2, r2, n3p1); 412 413#if AORSMUL_FASTER_AORS_2AORSLSH 414 ASSERT_NOCARRY(mpn_submul_1 (r3, r1, n3p1, 3969)); 415#else 416 ASSERT_NOCARRY(mpn_sub_n (r3, r3, r1, n3p1)); 417 ASSERT_NOCARRY(DO_mpn_addlsh_n (r3, r1, n3p1, 7, wsi)); 418 ASSERT_NOCARRY(DO_mpn_sublsh_n (r3, r1, n3p1, 12, wsi)); 419#endif 420 ASSERT_NOCARRY(mpn_submul_1 (r3, r2, n3p1, 900)); 421 mpn_divexact_by9x16(r3, r3, n3p1); 422 423 ASSERT_NOCARRY(mpn_sub_n (r4, r4, r1, n3p1)); 424 ASSERT_NOCARRY(mpn_sub_n (r4, r4, r3, n3p1)); 425 ASSERT_NOCARRY(mpn_sub_n (r4, r4, r2, n3p1)); 426 427 mpn_add_n (r6, r2, r6, n3p1); 428 ASSERT_NOCARRY(mpn_rshift(r6, r6, n3p1, 1)); 429 ASSERT_NOCARRY(mpn_sub_n (r2, r2, r6, n3p1)); 430 431 mpn_sub_n (r5, r3, r5, n3p1); 432 ASSERT_NOCARRY(mpn_rshift(r5, r5, n3p1, 1)); 433 ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, n3p1)); 434 435 mpn_add_n (r7, r1, r7, n3p1); 436 ASSERT_NOCARRY(mpn_rshift(r7, r7, n3p1, 1)); 437 ASSERT_NOCARRY(mpn_sub_n (r1, r1, r7, n3p1)); 438 439 /* last interpolation steps... */ 440 /* ... could be mixed with recomposition 441 ||H-r7|M-r7|L-r7| ||H-r5|M-r5|L-r5| 442 */ 443 444 /***************************** recomposition *******************************/ 445 /* 446 pp[] prior to operations: 447 |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|___||H r6|M r6|L r6|____|H_r8|L r8|pp 448 449 summation scheme for remaining operations: 450 |__16|n_15|n_14|n_13|n_12|n_11|n_10|n__9|n__8|n__7|n__6|n__5|n__4|n__3|n__2|n___|n___|pp 451 |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|___||H r6|M r6|L r6|____|H_r8|L r8|pp 452 ||H r1|M r1|L r1| ||H r3|M r3|L r3| ||H_r5|M_r5|L_r5| ||H r7|M r7|L r7| 453 */ 454 455 cy = mpn_add_n (pp + n, pp + n, r7, n); 456 cy = mpn_add_1 (pp + 2 * n, r7 + n, n, cy); 457#if HAVE_NATIVE_mpn_add_nc 458 cy = r7[n3] + mpn_add_nc(pp + n3, pp + n3, r7 + 2 * n, n, cy); 459#else 460 MPN_INCR_U (r7 + 2 * n, n + 1, cy); 461 cy = r7[n3] + mpn_add_n (pp + n3, pp + n3, r7 + 2 * n, n); 462#endif 463 MPN_INCR_U (pp + 4 * n, 2 * n + 1, cy); 464 465 pp[2 * n3]+= mpn_add_n (pp + 5 * n, pp + 5 * n, r5, n); 466 cy = mpn_add_1 (pp + 2 * n3, r5 + n, n, pp[2 * n3]); 467#if HAVE_NATIVE_mpn_add_nc 468 cy = r5[n3] + mpn_add_nc(pp + 7 * n, pp + 7 * n, r5 + 2 * n, n, cy); 469#else 470 MPN_INCR_U (r5 + 2 * n, n + 1, cy); 471 cy = r5[n3] + mpn_add_n (pp + 7 * n, pp + 7 * n, r5 + 2 * n, n); 472#endif 473 MPN_INCR_U (pp + 8 * n, 2 * n + 1, cy); 474 475 pp[10 * n]+= mpn_add_n (pp + 9 * n, pp + 9 * n, r3, n); 476 cy = mpn_add_1 (pp + 10 * n, r3 + n, n, pp[10 * n]); 477#if HAVE_NATIVE_mpn_add_nc 478 cy = r3[n3] + mpn_add_nc(pp +11 * n, pp +11 * n, r3 + 2 * n, n, cy); 479#else 480 MPN_INCR_U (r3 + 2 * n, n + 1, cy); 481 cy = r3[n3] + mpn_add_n (pp +11 * n, pp +11 * n, r3 + 2 * n, n); 482#endif 483 MPN_INCR_U (pp +12 * n, 2 * n + 1, cy); 484 485 pp[14 * n]+=mpn_add_n (pp +13 * n, pp +13 * n, r1, n); 486 if ( half ) { 487 cy = mpn_add_1 (pp + 14 * n, r1 + n, n, pp[14 * n]); 488#if HAVE_NATIVE_mpn_add_nc 489 if(LIKELY(spt > n)) { 490 cy = r1[n3] + mpn_add_nc(pp + 15 * n, pp + 15 * n, r1 + 2 * n, n, cy); 491 MPN_INCR_U (pp + 16 * n, spt - n, cy); 492 } else { 493 ASSERT_NOCARRY(mpn_add_nc(pp + 15 * n, pp + 15 * n, r1 + 2 * n, spt, cy)); 494 } 495#else 496 MPN_INCR_U (r1 + 2 * n, n + 1, cy); 497 if(LIKELY(spt > n)) { 498 cy = r1[n3] + mpn_add_n (pp + 15 * n, pp + 15 * n, r1 + 2 * n, n); 499 MPN_INCR_U (pp + 16 * n, spt - n, cy); 500 } else { 501 ASSERT_NOCARRY(mpn_add_n (pp + 15 * n, pp + 15 * n, r1 + 2 * n, spt)); 502 } 503#endif 504 } else { 505 ASSERT_NOCARRY(mpn_add_1 (pp + 14 * n, r1 + n, spt, pp[14 * n])); 506 } 507 508#undef r0 509#undef r2 510#undef r4 511#undef r6 512} 513