1/* Include file for internal GNU MP types and definitions. 2 3 THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO 4 BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES. 5 6Copyright 1991-2018 Free Software Foundation, Inc. 7 8This file is part of the GNU MP Library. 9 10The GNU MP Library is free software; you can redistribute it and/or modify 11it under the terms of either: 12 13 * the GNU Lesser General Public License as published by the Free 14 Software Foundation; either version 3 of the License, or (at your 15 option) any later version. 16 17or 18 19 * the GNU General Public License as published by the Free Software 20 Foundation; either version 2 of the License, or (at your option) any 21 later version. 22 23or both in parallel, as here. 24 25The GNU MP Library is distributed in the hope that it will be useful, but 26WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 27or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 28for more details. 29 30You should have received copies of the GNU General Public License and the 31GNU Lesser General Public License along with the GNU MP Library. If not, 32see https://www.gnu.org/licenses/. */ 33 34 35/* __GMP_DECLSPEC must be given on any global data that will be accessed 36 from outside libgmp, meaning from the test or development programs, or 37 from libgmpxx. Failing to do this will result in an incorrect address 38 being used for the accesses. On functions __GMP_DECLSPEC makes calls 39 from outside libgmp more efficient, but they'll still work fine without 40 it. */ 41 42 43#ifndef __GMP_IMPL_H__ 44#define __GMP_IMPL_H__ 45 46#if defined _CRAY 47#include <intrinsics.h> /* for _popcnt */ 48#endif 49 50/* For INT_MAX, etc. We used to avoid it because of a bug (on solaris, 51 gcc 2.95 under -mcpu=ultrasparc in ABI=32 ends up getting wrong 52 values (the ABI=64 values)), but it should be safe now. 53 54 On Cray vector systems, however, we need the system limits.h since sizes 55 of signed and unsigned types can differ there, depending on compiler 56 options (eg. -hnofastmd), making our SHRT_MAX etc expressions fail. For 57 reference, int can be 46 or 64 bits, whereas uint is always 64 bits; and 58 short can be 24, 32, 46 or 64 bits, and different for ushort. */ 59 60#include <limits.h> 61 62/* For fat.h and other fat binary stuff. 63 No need for __GMP_ATTRIBUTE_PURE or __GMP_NOTHROW, since functions 64 declared this way are only used to set function pointers in __gmpn_cpuvec, 65 they're not called directly. */ 66#define DECL_add_n(name) \ 67 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t) 68#define DECL_addlsh1_n(name) \ 69 DECL_add_n (name) 70#define DECL_addlsh2_n(name) \ 71 DECL_add_n (name) 72#define DECL_addmul_1(name) \ 73 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t) 74#define DECL_addmul_2(name) \ 75 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr) 76#define DECL_bdiv_dbm1c(name) \ 77 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) 78#define DECL_cnd_add_n(name) \ 79 __GMP_DECLSPEC mp_limb_t name (mp_limb_t, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t) 80#define DECL_cnd_sub_n(name) \ 81 __GMP_DECLSPEC mp_limb_t name (mp_limb_t, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t) 82#define DECL_com(name) \ 83 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t) 84#define DECL_copyd(name) \ 85 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t) 86#define DECL_copyi(name) \ 87 DECL_copyd (name) 88#define DECL_divexact_1(name) \ 89 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t) 90#define DECL_divexact_by3c(name) \ 91 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t) 92#define DECL_divrem_1(name) \ 93 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t) 94#define DECL_gcd_11(name) \ 95 __GMP_DECLSPEC mp_limb_t name (mp_limb_t, mp_limb_t) 96#define DECL_lshift(name) \ 97 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, unsigned) 98#define DECL_lshiftc(name) \ 99 DECL_lshift (name) 100#define DECL_mod_1(name) \ 101 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t) 102#define DECL_mod_1_1p(name) \ 103 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t []) 104#define DECL_mod_1_1p_cps(name) \ 105 __GMP_DECLSPEC void name (mp_limb_t cps[], mp_limb_t b) 106#define DECL_mod_1s_2p(name) \ 107 DECL_mod_1_1p (name) 108#define DECL_mod_1s_2p_cps(name) \ 109 DECL_mod_1_1p_cps (name) 110#define DECL_mod_1s_4p(name) \ 111 DECL_mod_1_1p (name) 112#define DECL_mod_1s_4p_cps(name) \ 113 DECL_mod_1_1p_cps (name) 114#define DECL_mod_34lsub1(name) \ 115 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t) 116#define DECL_modexact_1c_odd(name) \ 117 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) 118#define DECL_mul_1(name) \ 119 DECL_addmul_1 (name) 120#define DECL_mul_basecase(name) \ 121 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t) 122#define DECL_mullo_basecase(name) \ 123 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t) 124#define DECL_preinv_divrem_1(name) \ 125 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int) 126#define DECL_preinv_mod_1(name) \ 127 __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) 128#define DECL_redc_1(name) \ 129 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t) 130#define DECL_redc_2(name) \ 131 __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr) 132#define DECL_rshift(name) \ 133 DECL_lshift (name) 134#define DECL_sqr_basecase(name) \ 135 __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t) 136#define DECL_sub_n(name) \ 137 DECL_add_n (name) 138#define DECL_sublsh1_n(name) \ 139 DECL_add_n (name) 140#define DECL_submul_1(name) \ 141 DECL_addmul_1 (name) 142 143#if ! defined (__GMP_WITHIN_CONFIGURE) 144#include "config.h" 145#include "gmp.h" 146#include "gmp-mparam.h" 147#include "fib_table.h" 148#include "fac_table.h" 149#include "mp_bases.h" 150#if WANT_FAT_BINARY 151#include "fat.h" 152#endif 153#endif 154 155#if HAVE_INTTYPES_H /* for uint_least32_t */ 156# include <inttypes.h> 157#else 158# if HAVE_STDINT_H 159# include <stdint.h> 160# endif 161#endif 162 163#ifdef __cplusplus 164#include <cstring> /* for strlen */ 165#include <string> /* for std::string */ 166#endif 167 168 169#ifndef WANT_TMP_DEBUG /* for TMP_ALLOC_LIMBS_2 and others */ 170#define WANT_TMP_DEBUG 0 171#endif 172 173/* The following tries to get a good version of alloca. The tests are 174 adapted from autoconf AC_FUNC_ALLOCA, with a couple of additions. 175 Whether this succeeds is tested by GMP_FUNC_ALLOCA and HAVE_ALLOCA will 176 be setup appropriately. 177 178 ifndef alloca - a cpp define might already exist. 179 glibc <stdlib.h> includes <alloca.h> which uses GCC __builtin_alloca. 180 HP cc +Olibcalls adds a #define of alloca to __builtin_alloca. 181 182 GCC __builtin_alloca - preferred whenever available. 183 184 _AIX pragma - IBM compilers need a #pragma in "each module that needs to 185 use alloca". Pragma indented to protect pre-ANSI cpp's. _IBMR2 was 186 used in past versions of GMP, retained still in case it matters. 187 188 The autoconf manual says this pragma needs to be at the start of a C 189 file, apart from comments and preprocessor directives. Is that true? 190 xlc on aix 4.xxx doesn't seem to mind it being after prototypes etc 191 from gmp.h. 192*/ 193 194#ifndef alloca 195# ifdef __GNUC__ 196# define alloca __builtin_alloca 197# else 198# ifdef __DECC 199# define alloca(x) __ALLOCA(x) 200# else 201# ifdef _MSC_VER 202# include <malloc.h> 203# define alloca _alloca 204# else 205# if HAVE_ALLOCA_H 206# include <alloca.h> 207# else 208# if defined (_AIX) || defined (_IBMR2) 209 #pragma alloca 210# else 211# if !defined (__NetBSD__) 212 char *alloca (); 213# else 214# include <stdlib.h> 215# endif 216# endif 217# endif 218# endif 219# endif 220# endif 221#endif 222 223 224/* if not provided by gmp-mparam.h */ 225#ifndef GMP_LIMB_BYTES 226#define GMP_LIMB_BYTES SIZEOF_MP_LIMB_T 227#endif 228#ifndef GMP_LIMB_BITS 229#define GMP_LIMB_BITS (8 * SIZEOF_MP_LIMB_T) 230#endif 231 232#define BITS_PER_ULONG (8 * SIZEOF_UNSIGNED_LONG) 233 234 235/* gmp_uint_least32_t is an unsigned integer type with at least 32 bits. */ 236#if HAVE_UINT_LEAST32_T 237typedef uint_least32_t gmp_uint_least32_t; 238#else 239#if SIZEOF_UNSIGNED_SHORT >= 4 240typedef unsigned short gmp_uint_least32_t; 241#else 242#if SIZEOF_UNSIGNED >= 4 243typedef unsigned gmp_uint_least32_t; 244#else 245typedef unsigned long gmp_uint_least32_t; 246#endif 247#endif 248#endif 249 250 251/* gmp_intptr_t, for pointer to integer casts */ 252#if HAVE_INTPTR_T 253typedef intptr_t gmp_intptr_t; 254#else /* fallback */ 255typedef size_t gmp_intptr_t; 256#endif 257 258 259/* pre-inverse types for truncating division and modulo */ 260typedef struct {mp_limb_t inv32;} gmp_pi1_t; 261typedef struct {mp_limb_t inv21, inv32, inv53;} gmp_pi2_t; 262 263 264/* "const" basically means a function does nothing but examine its arguments 265 and give a return value, it doesn't read or write any memory (neither 266 global nor pointed to by arguments), and has no other side-effects. This 267 is more restrictive than "pure". See info node "(gcc)Function 268 Attributes". __GMP_NO_ATTRIBUTE_CONST_PURE lets tune/common.c etc turn 269 this off when trying to write timing loops. */ 270#if HAVE_ATTRIBUTE_CONST && ! defined (__GMP_NO_ATTRIBUTE_CONST_PURE) 271#define ATTRIBUTE_CONST __attribute__ ((const)) 272#else 273#define ATTRIBUTE_CONST 274#endif 275 276#if HAVE_ATTRIBUTE_NORETURN 277#define ATTRIBUTE_NORETURN __attribute__ ((noreturn)) 278#else 279#define ATTRIBUTE_NORETURN 280#endif 281 282/* "malloc" means a function behaves like malloc in that the pointer it 283 returns doesn't alias anything. */ 284#if HAVE_ATTRIBUTE_MALLOC 285#define ATTRIBUTE_MALLOC __attribute__ ((malloc)) 286#else 287#define ATTRIBUTE_MALLOC 288#endif 289 290 291#if ! HAVE_STRCHR 292#define strchr(s,c) index(s,c) 293#endif 294 295#if ! HAVE_MEMSET 296#define memset(p, c, n) \ 297 do { \ 298 unsigned char *__memset__p = (unsigned char *) (p); \ 299 int __i; \ 300 ASSERT ((n) >= 0); \ 301 for (__i = 0; __i < (n); __i++) \ 302 __memset__p[__i] = (c); \ 303 } while (0) 304#endif 305 306/* va_copy is standard in C99, and gcc provides __va_copy when in strict C89 307 mode. Falling back to a memcpy will give maximum portability, since it 308 works no matter whether va_list is a pointer, struct or array. */ 309#if ! defined (va_copy) && defined (__va_copy) 310#define va_copy(dst,src) __va_copy(dst,src) 311#endif 312#if ! defined (va_copy) 313#define va_copy(dst,src) \ 314 do { memcpy (&(dst), &(src), sizeof (va_list)); } while (0) 315#endif 316 317 318/* HAVE_HOST_CPU_alpha_CIX is 1 on an alpha with the CIX instructions 319 (ie. ctlz, ctpop, cttz). */ 320#if HAVE_HOST_CPU_alphaev67 || HAVE_HOST_CPU_alphaev68 \ 321 || HAVE_HOST_CPU_alphaev7 322#define HAVE_HOST_CPU_alpha_CIX 1 323#endif 324 325 326#if defined (__cplusplus) 327extern "C" { 328#endif 329 330 331/* Usage: TMP_DECL; 332 TMP_MARK; 333 ptr = TMP_ALLOC (bytes); 334 TMP_FREE; 335 336 Small allocations should use TMP_SALLOC, big allocations should use 337 TMP_BALLOC. Allocations that might be small or big should use TMP_ALLOC. 338 339 Functions that use just TMP_SALLOC should use TMP_SDECL, TMP_SMARK, and 340 TMP_SFREE. 341 342 TMP_DECL just declares a variable, but might be empty and so must be last 343 in a list of variables. TMP_MARK must be done before any TMP_ALLOC. 344 TMP_ALLOC(0) is not allowed. TMP_FREE doesn't need to be done if a 345 TMP_MARK was made, but then no TMP_ALLOCs. */ 346 347/* The alignment in bytes, used for TMP_ALLOCed blocks, when alloca or 348 __gmp_allocate_func doesn't already determine it. */ 349union tmp_align_t { 350 mp_limb_t l; 351 double d; 352 char *p; 353}; 354#define __TMP_ALIGN sizeof (union tmp_align_t) 355 356/* Return "a" rounded upwards to a multiple of "m", if it isn't already. 357 "a" must be an unsigned type. 358 This is designed for use with a compile-time constant "m". 359 The POW2 case is expected to be usual, and gcc 3.0 and up recognises 360 "(-(8*n))%8" or the like is always zero, which means the rounding up in 361 the WANT_TMP_NOTREENTRANT version of TMP_ALLOC below will be a noop. */ 362#define ROUND_UP_MULTIPLE(a,m) \ 363 (POW2_P(m) ? (a) + (-(a))%(m) \ 364 : (a)+(m)-1 - (((a)+(m)-1) % (m))) 365 366#if defined (WANT_TMP_ALLOCA) || defined (WANT_TMP_REENTRANT) 367struct tmp_reentrant_t { 368 struct tmp_reentrant_t *next; 369 size_t size; /* bytes, including header */ 370}; 371__GMP_DECLSPEC void *__gmp_tmp_reentrant_alloc (struct tmp_reentrant_t **, size_t) ATTRIBUTE_MALLOC; 372__GMP_DECLSPEC void __gmp_tmp_reentrant_free (struct tmp_reentrant_t *); 373#endif 374 375#if WANT_TMP_ALLOCA 376#define TMP_SDECL 377#define TMP_DECL struct tmp_reentrant_t *__tmp_marker 378#define TMP_SMARK 379#define TMP_MARK __tmp_marker = 0 380#define TMP_SALLOC(n) alloca(n) 381#define TMP_BALLOC(n) __gmp_tmp_reentrant_alloc (&__tmp_marker, n) 382/* The peculiar stack allocation limit here is chosen for efficient asm. */ 383#define TMP_ALLOC(n) \ 384 (LIKELY ((n) <= 0x7f00) ? TMP_SALLOC(n) : TMP_BALLOC(n)) 385#define TMP_SFREE 386#define TMP_FREE \ 387 do { \ 388 if (UNLIKELY (__tmp_marker != 0)) \ 389 __gmp_tmp_reentrant_free (__tmp_marker); \ 390 } while (0) 391#endif 392 393#if WANT_TMP_REENTRANT 394#define TMP_SDECL TMP_DECL 395#define TMP_DECL struct tmp_reentrant_t *__tmp_marker 396#define TMP_SMARK TMP_MARK 397#define TMP_MARK __tmp_marker = 0 398#define TMP_SALLOC(n) TMP_ALLOC(n) 399#define TMP_BALLOC(n) TMP_ALLOC(n) 400#define TMP_ALLOC(n) __gmp_tmp_reentrant_alloc (&__tmp_marker, n) 401#define TMP_SFREE TMP_FREE 402#define TMP_FREE __gmp_tmp_reentrant_free (__tmp_marker) 403#endif 404 405#if WANT_TMP_NOTREENTRANT 406struct tmp_marker 407{ 408 struct tmp_stack *which_chunk; 409 void *alloc_point; 410}; 411__GMP_DECLSPEC void *__gmp_tmp_alloc (unsigned long) ATTRIBUTE_MALLOC; 412__GMP_DECLSPEC void __gmp_tmp_mark (struct tmp_marker *); 413__GMP_DECLSPEC void __gmp_tmp_free (struct tmp_marker *); 414#define TMP_SDECL TMP_DECL 415#define TMP_DECL struct tmp_marker __tmp_marker 416#define TMP_SMARK TMP_MARK 417#define TMP_MARK __gmp_tmp_mark (&__tmp_marker) 418#define TMP_SALLOC(n) TMP_ALLOC(n) 419#define TMP_BALLOC(n) TMP_ALLOC(n) 420#define TMP_ALLOC(n) \ 421 __gmp_tmp_alloc (ROUND_UP_MULTIPLE ((unsigned long) (n), __TMP_ALIGN)) 422#define TMP_SFREE TMP_FREE 423#define TMP_FREE __gmp_tmp_free (&__tmp_marker) 424#endif 425 426#if WANT_TMP_DEBUG 427/* See tal-debug.c for some comments. */ 428struct tmp_debug_t { 429 struct tmp_debug_entry_t *list; 430 const char *file; 431 int line; 432}; 433struct tmp_debug_entry_t { 434 struct tmp_debug_entry_t *next; 435 void *block; 436 size_t size; 437}; 438__GMP_DECLSPEC void __gmp_tmp_debug_mark (const char *, int, struct tmp_debug_t **, 439 struct tmp_debug_t *, 440 const char *, const char *); 441__GMP_DECLSPEC void *__gmp_tmp_debug_alloc (const char *, int, int, 442 struct tmp_debug_t **, const char *, 443 size_t) ATTRIBUTE_MALLOC; 444__GMP_DECLSPEC void __gmp_tmp_debug_free (const char *, int, int, 445 struct tmp_debug_t **, 446 const char *, const char *); 447#define TMP_SDECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker") 448#define TMP_DECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker") 449#define TMP_SMARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker") 450#define TMP_MARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker") 451#define TMP_SFREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker") 452#define TMP_FREE TMP_FREE_NAME(__tmp_xmarker, "__tmp_marker") 453/* The marker variable is designed to provoke an uninitialized variable 454 warning from the compiler if TMP_FREE is used without a TMP_MARK. 455 __tmp_marker_inscope does the same for TMP_ALLOC. Runtime tests pick 456 these things up too. */ 457#define TMP_DECL_NAME(marker, marker_name) \ 458 int marker; \ 459 int __tmp_marker_inscope; \ 460 const char *__tmp_marker_name = marker_name; \ 461 struct tmp_debug_t __tmp_marker_struct; \ 462 /* don't demand NULL, just cast a zero */ \ 463 struct tmp_debug_t *__tmp_marker = (struct tmp_debug_t *) 0 464#define TMP_MARK_NAME(marker, marker_name) \ 465 do { \ 466 marker = 1; \ 467 __tmp_marker_inscope = 1; \ 468 __gmp_tmp_debug_mark (ASSERT_FILE, ASSERT_LINE, \ 469 &__tmp_marker, &__tmp_marker_struct, \ 470 __tmp_marker_name, marker_name); \ 471 } while (0) 472#define TMP_SALLOC(n) TMP_ALLOC(n) 473#define TMP_BALLOC(n) TMP_ALLOC(n) 474#define TMP_ALLOC(size) \ 475 __gmp_tmp_debug_alloc (ASSERT_FILE, ASSERT_LINE, \ 476 __tmp_marker_inscope, \ 477 &__tmp_marker, __tmp_marker_name, size) 478#define TMP_FREE_NAME(marker, marker_name) \ 479 do { \ 480 __gmp_tmp_debug_free (ASSERT_FILE, ASSERT_LINE, \ 481 marker, &__tmp_marker, \ 482 __tmp_marker_name, marker_name); \ 483 } while (0) 484#endif /* WANT_TMP_DEBUG */ 485 486 487/* Allocating various types. */ 488#define TMP_ALLOC_TYPE(n,type) ((type *) TMP_ALLOC ((n) * sizeof (type))) 489#define TMP_SALLOC_TYPE(n,type) ((type *) TMP_SALLOC ((n) * sizeof (type))) 490#define TMP_BALLOC_TYPE(n,type) ((type *) TMP_BALLOC ((n) * sizeof (type))) 491#define TMP_ALLOC_LIMBS(n) TMP_ALLOC_TYPE(n,mp_limb_t) 492#define TMP_SALLOC_LIMBS(n) TMP_SALLOC_TYPE(n,mp_limb_t) 493#define TMP_BALLOC_LIMBS(n) TMP_BALLOC_TYPE(n,mp_limb_t) 494#define TMP_ALLOC_MP_PTRS(n) TMP_ALLOC_TYPE(n,mp_ptr) 495#define TMP_SALLOC_MP_PTRS(n) TMP_SALLOC_TYPE(n,mp_ptr) 496#define TMP_BALLOC_MP_PTRS(n) TMP_BALLOC_TYPE(n,mp_ptr) 497 498/* It's more efficient to allocate one block than many. This is certainly 499 true of the malloc methods, but it can even be true of alloca if that 500 involves copying a chunk of stack (various RISCs), or a call to a stack 501 bounds check (mingw). In any case, when debugging keep separate blocks 502 so a redzoning malloc debugger can protect each individually. */ 503#define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize) \ 504 do { \ 505 if (WANT_TMP_DEBUG) \ 506 { \ 507 (xp) = TMP_ALLOC_LIMBS (xsize); \ 508 (yp) = TMP_ALLOC_LIMBS (ysize); \ 509 } \ 510 else \ 511 { \ 512 (xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize)); \ 513 (yp) = (xp) + (xsize); \ 514 } \ 515 } while (0) 516#define TMP_ALLOC_LIMBS_3(xp,xsize, yp,ysize, zp,zsize) \ 517 do { \ 518 if (WANT_TMP_DEBUG) \ 519 { \ 520 (xp) = TMP_ALLOC_LIMBS (xsize); \ 521 (yp) = TMP_ALLOC_LIMBS (ysize); \ 522 (zp) = TMP_ALLOC_LIMBS (zsize); \ 523 } \ 524 else \ 525 { \ 526 (xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize) + (zsize)); \ 527 (yp) = (xp) + (xsize); \ 528 (zp) = (yp) + (ysize); \ 529 } \ 530 } while (0) 531 532/* From gmp.h, nicer names for internal use. */ 533#define CRAY_Pragma(str) __GMP_CRAY_Pragma(str) 534#define MPN_CMP(result, xp, yp, size) __GMPN_CMP(result, xp, yp, size) 535#define LIKELY(cond) __GMP_LIKELY(cond) 536#define UNLIKELY(cond) __GMP_UNLIKELY(cond) 537 538#define ABS(x) ((x) >= 0 ? (x) : -(x)) 539#define NEG_CAST(T,x) (- (__GMP_CAST (T, (x) + 1) - 1)) 540#define ABS_CAST(T,x) ((x) >= 0 ? __GMP_CAST (T, x) : NEG_CAST (T,x)) 541#undef MIN 542#define MIN(l,o) ((l) < (o) ? (l) : (o)) 543#undef MAX 544#define MAX(h,i) ((h) > (i) ? (h) : (i)) 545#define numberof(x) (sizeof (x) / sizeof ((x)[0])) 546 547/* Field access macros. */ 548#define SIZ(x) ((x)->_mp_size) 549#define ABSIZ(x) ABS (SIZ (x)) 550#define PTR(x) ((x)->_mp_d) 551#define EXP(x) ((x)->_mp_exp) 552#define PREC(x) ((x)->_mp_prec) 553#define ALLOC(x) ((x)->_mp_alloc) 554#define NUM(x) mpq_numref(x) 555#define DEN(x) mpq_denref(x) 556 557/* n-1 inverts any low zeros and the lowest one bit. If n&(n-1) leaves zero 558 then that lowest one bit must have been the only bit set. n==0 will 559 return true though, so avoid that. */ 560#define POW2_P(n) (((n) & ((n) - 1)) == 0) 561 562/* This is intended for constant THRESHOLDs only, where the compiler 563 can completely fold the result. */ 564#define LOG2C(n) \ 565 (((n) >= 0x1) + ((n) >= 0x2) + ((n) >= 0x4) + ((n) >= 0x8) + \ 566 ((n) >= 0x10) + ((n) >= 0x20) + ((n) >= 0x40) + ((n) >= 0x80) + \ 567 ((n) >= 0x100) + ((n) >= 0x200) + ((n) >= 0x400) + ((n) >= 0x800) + \ 568 ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000)) 569 570#define MP_LIMB_T_MAX (~ (mp_limb_t) 0) 571 572/* Must cast ULONG_MAX etc to unsigned long etc, since they might not be 573 unsigned on a K&R compiler. In particular the HP-UX 10 bundled K&R cc 574 treats the plain decimal values in <limits.h> as signed. */ 575#define ULONG_HIGHBIT (ULONG_MAX ^ ((unsigned long) ULONG_MAX >> 1)) 576#define UINT_HIGHBIT (UINT_MAX ^ ((unsigned) UINT_MAX >> 1)) 577#define USHRT_HIGHBIT (USHRT_MAX ^ ((unsigned short) USHRT_MAX >> 1)) 578#define GMP_LIMB_HIGHBIT (MP_LIMB_T_MAX ^ (MP_LIMB_T_MAX >> 1)) 579 580#if __GMP_MP_SIZE_T_INT 581#define MP_SIZE_T_MAX INT_MAX 582#define MP_SIZE_T_MIN INT_MIN 583#else 584#define MP_SIZE_T_MAX LONG_MAX 585#define MP_SIZE_T_MIN LONG_MIN 586#endif 587 588/* mp_exp_t is the same as mp_size_t */ 589#define MP_EXP_T_MAX MP_SIZE_T_MAX 590#define MP_EXP_T_MIN MP_SIZE_T_MIN 591 592#define LONG_HIGHBIT LONG_MIN 593#define INT_HIGHBIT INT_MIN 594#define SHRT_HIGHBIT SHRT_MIN 595 596 597#define GMP_NUMB_HIGHBIT (CNST_LIMB(1) << (GMP_NUMB_BITS-1)) 598 599#if GMP_NAIL_BITS == 0 600#define GMP_NAIL_LOWBIT CNST_LIMB(0) 601#else 602#define GMP_NAIL_LOWBIT (CNST_LIMB(1) << GMP_NUMB_BITS) 603#endif 604 605#if GMP_NAIL_BITS != 0 606/* Set various *_THRESHOLD values to be used for nails. Thus we avoid using 607 code that has not yet been qualified. */ 608 609#undef DC_DIV_QR_THRESHOLD 610#define DC_DIV_QR_THRESHOLD 50 611 612#undef DIVREM_1_NORM_THRESHOLD 613#undef DIVREM_1_UNNORM_THRESHOLD 614#undef MOD_1_NORM_THRESHOLD 615#undef MOD_1_UNNORM_THRESHOLD 616#undef USE_PREINV_DIVREM_1 617#undef DIVREM_2_THRESHOLD 618#undef DIVEXACT_1_THRESHOLD 619#define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */ 620#define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */ 621#define MOD_1_NORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */ 622#define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* no preinv */ 623#define USE_PREINV_DIVREM_1 0 /* no preinv */ 624#define DIVREM_2_THRESHOLD MP_SIZE_T_MAX /* no preinv */ 625 626/* mpn/generic/mul_fft.c is not nails-capable. */ 627#undef MUL_FFT_THRESHOLD 628#undef SQR_FFT_THRESHOLD 629#define MUL_FFT_THRESHOLD MP_SIZE_T_MAX 630#define SQR_FFT_THRESHOLD MP_SIZE_T_MAX 631#endif 632 633/* Swap macros. */ 634 635#define MP_LIMB_T_SWAP(x, y) \ 636 do { \ 637 mp_limb_t __mp_limb_t_swap__tmp = (x); \ 638 (x) = (y); \ 639 (y) = __mp_limb_t_swap__tmp; \ 640 } while (0) 641#define MP_SIZE_T_SWAP(x, y) \ 642 do { \ 643 mp_size_t __mp_size_t_swap__tmp = (x); \ 644 (x) = (y); \ 645 (y) = __mp_size_t_swap__tmp; \ 646 } while (0) 647 648#define MP_PTR_SWAP(x, y) \ 649 do { \ 650 mp_ptr __mp_ptr_swap__tmp = (x); \ 651 (x) = (y); \ 652 (y) = __mp_ptr_swap__tmp; \ 653 } while (0) 654#define MP_SRCPTR_SWAP(x, y) \ 655 do { \ 656 mp_srcptr __mp_srcptr_swap__tmp = (x); \ 657 (x) = (y); \ 658 (y) = __mp_srcptr_swap__tmp; \ 659 } while (0) 660 661#define MPN_PTR_SWAP(xp,xs, yp,ys) \ 662 do { \ 663 MP_PTR_SWAP (xp, yp); \ 664 MP_SIZE_T_SWAP (xs, ys); \ 665 } while(0) 666#define MPN_SRCPTR_SWAP(xp,xs, yp,ys) \ 667 do { \ 668 MP_SRCPTR_SWAP (xp, yp); \ 669 MP_SIZE_T_SWAP (xs, ys); \ 670 } while(0) 671 672#define MPZ_PTR_SWAP(x, y) \ 673 do { \ 674 mpz_ptr __mpz_ptr_swap__tmp = (x); \ 675 (x) = (y); \ 676 (y) = __mpz_ptr_swap__tmp; \ 677 } while (0) 678#define MPZ_SRCPTR_SWAP(x, y) \ 679 do { \ 680 mpz_srcptr __mpz_srcptr_swap__tmp = (x); \ 681 (x) = (y); \ 682 (y) = __mpz_srcptr_swap__tmp; \ 683 } while (0) 684 685#define MPQ_PTR_SWAP(x, y) \ 686 do { \ 687 mpq_ptr __mpq_ptr_swap__tmp = (x); \ 688 (x) = (y); \ 689 (y) = __mpq_ptr_swap__tmp; \ 690 } while (0) 691#define MPQ_SRCPTR_SWAP(x, y) \ 692 do { \ 693 mpq_srcptr __mpq_srcptr_swap__tmp = (x); \ 694 (x) = (y); \ 695 (y) = __mpq_srcptr_swap__tmp; \ 696 } while (0) 697 698 699/* Enhancement: __gmp_allocate_func could have "__attribute__ ((malloc))", 700 but current gcc (3.0) doesn't seem to support that. */ 701__GMP_DECLSPEC extern void * (*__gmp_allocate_func) (size_t); 702__GMP_DECLSPEC extern void * (*__gmp_reallocate_func) (void *, size_t, size_t); 703__GMP_DECLSPEC extern void (*__gmp_free_func) (void *, size_t); 704 705__GMP_DECLSPEC void *__gmp_default_allocate (size_t); 706__GMP_DECLSPEC void *__gmp_default_reallocate (void *, size_t, size_t); 707__GMP_DECLSPEC void __gmp_default_free (void *, size_t); 708 709#define __GMP_ALLOCATE_FUNC_TYPE(n,type) \ 710 ((type *) (*__gmp_allocate_func) ((n) * sizeof (type))) 711#define __GMP_ALLOCATE_FUNC_LIMBS(n) __GMP_ALLOCATE_FUNC_TYPE (n, mp_limb_t) 712 713#define __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, type) \ 714 ((type *) (*__gmp_reallocate_func) \ 715 (p, (old_size) * sizeof (type), (new_size) * sizeof (type))) 716#define __GMP_REALLOCATE_FUNC_LIMBS(p, old_size, new_size) \ 717 __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, mp_limb_t) 718 719#define __GMP_FREE_FUNC_TYPE(p,n,type) (*__gmp_free_func) (p, (n) * sizeof (type)) 720#define __GMP_FREE_FUNC_LIMBS(p,n) __GMP_FREE_FUNC_TYPE (p, n, mp_limb_t) 721 722#define __GMP_REALLOCATE_FUNC_MAYBE(ptr, oldsize, newsize) \ 723 do { \ 724 if ((oldsize) != (newsize)) \ 725 (ptr) = (*__gmp_reallocate_func) (ptr, oldsize, newsize); \ 726 } while (0) 727 728#define __GMP_REALLOCATE_FUNC_MAYBE_TYPE(ptr, oldsize, newsize, type) \ 729 do { \ 730 if ((oldsize) != (newsize)) \ 731 (ptr) = (type *) (*__gmp_reallocate_func) \ 732 (ptr, (oldsize) * sizeof (type), (newsize) * sizeof (type)); \ 733 } while (0) 734 735 736/* Dummy for non-gcc, code involving it will go dead. */ 737#if ! defined (__GNUC__) || __GNUC__ < 2 738#define __builtin_constant_p(x) 0 739#endif 740 741 742/* In gcc 2.96 and up on i386, tail calls are optimized to jumps if the 743 stack usage is compatible. __attribute__ ((regparm (N))) helps by 744 putting leading parameters in registers, avoiding extra stack. 745 746 regparm cannot be used with calls going through the PLT, because the 747 binding code there may clobber the registers (%eax, %edx, %ecx) used for 748 the regparm parameters. Calls to local (ie. static) functions could 749 still use this, if we cared to differentiate locals and globals. 750 751 On athlon-unknown-freebsd4.9 with gcc 3.3.3, regparm cannot be used with 752 -p or -pg profiling, since that version of gcc doesn't realize the 753 .mcount calls will clobber the parameter registers. Other systems are 754 ok, like debian with glibc 2.3.2 (mcount doesn't clobber), but we don't 755 bother to try to detect this. regparm is only an optimization so we just 756 disable it when profiling (profiling being a slowdown anyway). */ 757 758#if HAVE_HOST_CPU_FAMILY_x86 && __GMP_GNUC_PREREQ (2,96) && ! defined (PIC) \ 759 && ! WANT_PROFILING_PROF && ! WANT_PROFILING_GPROF 760#define USE_LEADING_REGPARM 1 761#else 762#define USE_LEADING_REGPARM 0 763#endif 764 765/* Macros for altering parameter order according to regparm usage. */ 766#if USE_LEADING_REGPARM 767#define REGPARM_2_1(a,b,x) x,a,b 768#define REGPARM_3_1(a,b,c,x) x,a,b,c 769#define REGPARM_ATTR(n) __attribute__ ((regparm (n))) 770#else 771#define REGPARM_2_1(a,b,x) a,b,x 772#define REGPARM_3_1(a,b,c,x) a,b,c,x 773#define REGPARM_ATTR(n) 774#endif 775 776 777/* ASM_L gives a local label for a gcc asm block, for use when temporary 778 local labels like "1:" might not be available, which is the case for 779 instance on the x86s (the SCO assembler doesn't support them). 780 781 The label generated is made unique by including "%=" which is a unique 782 number for each insn. This ensures the same name can be used in multiple 783 asm blocks, perhaps via a macro. Since jumps between asm blocks are not 784 allowed there's no need for a label to be usable outside a single 785 block. */ 786 787#define ASM_L(name) LSYM_PREFIX "asm_%=_" #name 788 789 790#if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86 791#if 0 792/* FIXME: Check that these actually improve things. 793 FIXME: Need a cld after each std. 794 FIXME: Can't have inputs in clobbered registers, must describe them as 795 dummy outputs, and add volatile. */ 796#define MPN_COPY_INCR(DST, SRC, N) \ 797 __asm__ ("cld\n\trep\n\tmovsl" : : \ 798 "D" (DST), "S" (SRC), "c" (N) : \ 799 "cx", "di", "si", "memory") 800#define MPN_COPY_DECR(DST, SRC, N) \ 801 __asm__ ("std\n\trep\n\tmovsl" : : \ 802 "D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) : \ 803 "cx", "di", "si", "memory") 804#endif 805#endif 806 807 808__GMP_DECLSPEC void __gmpz_aorsmul_1 (REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_limb_t, mp_size_t)) REGPARM_ATTR(1); 809#define mpz_aorsmul_1(w,u,v,sub) __gmpz_aorsmul_1 (REGPARM_3_1 (w, u, v, sub)) 810 811#define mpz_n_pow_ui __gmpz_n_pow_ui 812__GMP_DECLSPEC void mpz_n_pow_ui (mpz_ptr, mp_srcptr, mp_size_t, unsigned long); 813 814 815#define mpn_addmul_1c __MPN(addmul_1c) 816__GMP_DECLSPEC mp_limb_t mpn_addmul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t); 817 818#ifndef mpn_addmul_2 /* if not done with cpuvec in a fat binary */ 819#define mpn_addmul_2 __MPN(addmul_2) 820__GMP_DECLSPEC mp_limb_t mpn_addmul_2 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 821#endif 822 823#define mpn_addmul_3 __MPN(addmul_3) 824__GMP_DECLSPEC mp_limb_t mpn_addmul_3 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 825 826#define mpn_addmul_4 __MPN(addmul_4) 827__GMP_DECLSPEC mp_limb_t mpn_addmul_4 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 828 829#define mpn_addmul_5 __MPN(addmul_5) 830__GMP_DECLSPEC mp_limb_t mpn_addmul_5 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 831 832#define mpn_addmul_6 __MPN(addmul_6) 833__GMP_DECLSPEC mp_limb_t mpn_addmul_6 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 834 835#define mpn_addmul_7 __MPN(addmul_7) 836__GMP_DECLSPEC mp_limb_t mpn_addmul_7 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 837 838#define mpn_addmul_8 __MPN(addmul_8) 839__GMP_DECLSPEC mp_limb_t mpn_addmul_8 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 840 841/* Alternative entry point in mpn_addmul_2 for the benefit of mpn_sqr_basecase. */ 842#define mpn_addmul_2s __MPN(addmul_2s) 843__GMP_DECLSPEC mp_limb_t mpn_addmul_2s (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 844 845/* Override mpn_addlsh1_n, mpn_addlsh2_n, mpn_sublsh1_n, etc with mpn_addlsh_n, 846 etc when !HAVE_NATIVE the former but HAVE_NATIVE_ the latter. Similarly, 847 override foo_ip1 functions with foo. We then lie and say these macros 848 represent native functions, but leave a trace by using the value 2 rather 849 than 1. */ 850 851#if HAVE_NATIVE_mpn_addlsh_n && ! HAVE_NATIVE_mpn_addlsh1_n 852#define mpn_addlsh1_n(a,b,c,d) mpn_addlsh_n(a,b,c,d,1) 853#define HAVE_NATIVE_mpn_addlsh1_n 2 854#endif 855 856#if HAVE_NATIVE_mpn_addlsh_nc && ! HAVE_NATIVE_mpn_addlsh1_nc 857#define mpn_addlsh1_nc(a,b,c,d,x) mpn_addlsh_nc(a,b,c,d,1,x) 858#define HAVE_NATIVE_mpn_addlsh1_nc 2 859#endif 860 861#if HAVE_NATIVE_mpn_addlsh1_n && ! HAVE_NATIVE_mpn_addlsh1_n_ip1 862#define mpn_addlsh1_n_ip1(a,b,n) mpn_addlsh1_n(a,a,b,n) 863#define HAVE_NATIVE_mpn_addlsh1_n_ip1 2 864#endif 865 866#if HAVE_NATIVE_mpn_addlsh1_nc && ! HAVE_NATIVE_mpn_addlsh1_nc_ip1 867#define mpn_addlsh1_nc_ip1(a,b,n,c) mpn_addlsh1_nc(a,a,b,n,c) 868#define HAVE_NATIVE_mpn_addlsh1_nc_ip1 2 869#endif 870 871#if HAVE_NATIVE_mpn_addlsh_n && ! HAVE_NATIVE_mpn_addlsh2_n 872#define mpn_addlsh2_n(a,b,c,d) mpn_addlsh_n(a,b,c,d,2) 873#define HAVE_NATIVE_mpn_addlsh2_n 2 874#endif 875 876#if HAVE_NATIVE_mpn_addlsh_nc && ! HAVE_NATIVE_mpn_addlsh2_nc 877#define mpn_addlsh2_nc(a,b,c,d,x) mpn_addlsh_nc(a,b,c,d,2,x) 878#define HAVE_NATIVE_mpn_addlsh2_nc 2 879#endif 880 881#if HAVE_NATIVE_mpn_addlsh2_n && ! HAVE_NATIVE_mpn_addlsh2_n_ip1 882#define mpn_addlsh2_n_ip1(a,b,n) mpn_addlsh2_n(a,a,b,n) 883#define HAVE_NATIVE_mpn_addlsh2_n_ip1 2 884#endif 885 886#if HAVE_NATIVE_mpn_addlsh2_nc && ! HAVE_NATIVE_mpn_addlsh2_nc_ip1 887#define mpn_addlsh2_nc_ip1(a,b,n,c) mpn_addlsh2_nc(a,a,b,n,c) 888#define HAVE_NATIVE_mpn_addlsh2_nc_ip1 2 889#endif 890 891#if HAVE_NATIVE_mpn_sublsh_n && ! HAVE_NATIVE_mpn_sublsh1_n 892#define mpn_sublsh1_n(a,b,c,d) mpn_sublsh_n(a,b,c,d,1) 893#define HAVE_NATIVE_mpn_sublsh1_n 2 894#endif 895 896#if HAVE_NATIVE_mpn_sublsh_nc && ! HAVE_NATIVE_mpn_sublsh1_nc 897#define mpn_sublsh1_nc(a,b,c,d,x) mpn_sublsh_nc(a,b,c,d,1,x) 898#define HAVE_NATIVE_mpn_sublsh1_nc 2 899#endif 900 901#if HAVE_NATIVE_mpn_sublsh1_n && ! HAVE_NATIVE_mpn_sublsh1_n_ip1 902#define mpn_sublsh1_n_ip1(a,b,n) mpn_sublsh1_n(a,a,b,n) 903#define HAVE_NATIVE_mpn_sublsh1_n_ip1 2 904#endif 905 906#if HAVE_NATIVE_mpn_sublsh1_nc && ! HAVE_NATIVE_mpn_sublsh1_nc_ip1 907#define mpn_sublsh1_nc_ip1(a,b,n,c) mpn_sublsh1_nc(a,a,b,n,c) 908#define HAVE_NATIVE_mpn_sublsh1_nc_ip1 2 909#endif 910 911#if HAVE_NATIVE_mpn_sublsh_n && ! HAVE_NATIVE_mpn_sublsh2_n 912#define mpn_sublsh2_n(a,b,c,d) mpn_sublsh_n(a,b,c,d,2) 913#define HAVE_NATIVE_mpn_sublsh2_n 2 914#endif 915 916#if HAVE_NATIVE_mpn_sublsh_nc && ! HAVE_NATIVE_mpn_sublsh2_nc 917#define mpn_sublsh2_nc(a,b,c,d,x) mpn_sublsh_nc(a,b,c,d,2,x) 918#define HAVE_NATIVE_mpn_sublsh2_nc 2 919#endif 920 921#if HAVE_NATIVE_mpn_sublsh2_n && ! HAVE_NATIVE_mpn_sublsh2_n_ip1 922#define mpn_sublsh2_n_ip1(a,b,n) mpn_sublsh2_n(a,a,b,n) 923#define HAVE_NATIVE_mpn_sublsh2_n_ip1 2 924#endif 925 926#if HAVE_NATIVE_mpn_sublsh2_nc && ! HAVE_NATIVE_mpn_sublsh2_nc_ip1 927#define mpn_sublsh2_nc_ip1(a,b,n,c) mpn_sublsh2_nc(a,a,b,n,c) 928#define HAVE_NATIVE_mpn_sublsh2_nc_ip1 2 929#endif 930 931#if HAVE_NATIVE_mpn_rsblsh_n && ! HAVE_NATIVE_mpn_rsblsh1_n 932#define mpn_rsblsh1_n(a,b,c,d) mpn_rsblsh_n(a,b,c,d,1) 933#define HAVE_NATIVE_mpn_rsblsh1_n 2 934#endif 935 936#if HAVE_NATIVE_mpn_rsblsh_nc && ! HAVE_NATIVE_mpn_rsblsh1_nc 937#define mpn_rsblsh1_nc(a,b,c,d,x) mpn_rsblsh_nc(a,b,c,d,1,x) 938#define HAVE_NATIVE_mpn_rsblsh1_nc 2 939#endif 940 941#if HAVE_NATIVE_mpn_rsblsh1_n && ! HAVE_NATIVE_mpn_rsblsh1_n_ip1 942#define mpn_rsblsh1_n_ip1(a,b,n) mpn_rsblsh1_n(a,a,b,n) 943#define HAVE_NATIVE_mpn_rsblsh1_n_ip1 2 944#endif 945 946#if HAVE_NATIVE_mpn_rsblsh1_nc && ! HAVE_NATIVE_mpn_rsblsh1_nc_ip1 947#define mpn_rsblsh1_nc_ip1(a,b,n,c) mpn_rsblsh1_nc(a,a,b,n,c) 948#define HAVE_NATIVE_mpn_rsblsh1_nc_ip1 2 949#endif 950 951#if HAVE_NATIVE_mpn_rsblsh_n && ! HAVE_NATIVE_mpn_rsblsh2_n 952#define mpn_rsblsh2_n(a,b,c,d) mpn_rsblsh_n(a,b,c,d,2) 953#define HAVE_NATIVE_mpn_rsblsh2_n 2 954#endif 955 956#if HAVE_NATIVE_mpn_rsblsh_nc && ! HAVE_NATIVE_mpn_rsblsh2_nc 957#define mpn_rsblsh2_nc(a,b,c,d,x) mpn_rsblsh_nc(a,b,c,d,2,x) 958#define HAVE_NATIVE_mpn_rsblsh2_nc 2 959#endif 960 961#if HAVE_NATIVE_mpn_rsblsh2_n && ! HAVE_NATIVE_mpn_rsblsh2_n_ip1 962#define mpn_rsblsh2_n_ip1(a,b,n) mpn_rsblsh2_n(a,a,b,n) 963#define HAVE_NATIVE_mpn_rsblsh2_n_ip1 2 964#endif 965 966#if HAVE_NATIVE_mpn_rsblsh2_nc && ! HAVE_NATIVE_mpn_rsblsh2_nc_ip1 967#define mpn_rsblsh2_nc_ip1(a,b,n,c) mpn_rsblsh2_nc(a,a,b,n,c) 968#define HAVE_NATIVE_mpn_rsblsh2_nc_ip1 2 969#endif 970 971 972#ifndef mpn_addlsh1_n 973#define mpn_addlsh1_n __MPN(addlsh1_n) 974__GMP_DECLSPEC mp_limb_t mpn_addlsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 975#endif 976#ifndef mpn_addlsh1_nc 977#define mpn_addlsh1_nc __MPN(addlsh1_nc) 978__GMP_DECLSPEC mp_limb_t mpn_addlsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 979#endif 980#ifndef mpn_addlsh1_n_ip1 981#define mpn_addlsh1_n_ip1 __MPN(addlsh1_n_ip1) 982__GMP_DECLSPEC mp_limb_t mpn_addlsh1_n_ip1 (mp_ptr, mp_srcptr, mp_size_t); 983#endif 984#ifndef mpn_addlsh1_nc_ip1 985#define mpn_addlsh1_nc_ip1 __MPN(addlsh1_nc_ip1) 986__GMP_DECLSPEC mp_limb_t mpn_addlsh1_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 987#endif 988 989#ifndef mpn_addlsh2_n 990#define mpn_addlsh2_n __MPN(addlsh2_n) 991__GMP_DECLSPEC mp_limb_t mpn_addlsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 992#endif 993#ifndef mpn_addlsh2_nc 994#define mpn_addlsh2_nc __MPN(addlsh2_nc) 995__GMP_DECLSPEC mp_limb_t mpn_addlsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 996#endif 997#ifndef mpn_addlsh2_n_ip1 998#define mpn_addlsh2_n_ip1 __MPN(addlsh2_n_ip1) 999__GMP_DECLSPEC mp_limb_t mpn_addlsh2_n_ip1 (mp_ptr, mp_srcptr, mp_size_t); 1000#endif 1001#ifndef mpn_addlsh2_nc_ip1 1002#define mpn_addlsh2_nc_ip1 __MPN(addlsh2_nc_ip1) 1003__GMP_DECLSPEC mp_limb_t mpn_addlsh2_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 1004#endif 1005 1006#ifndef mpn_addlsh_n 1007#define mpn_addlsh_n __MPN(addlsh_n) 1008__GMP_DECLSPEC mp_limb_t mpn_addlsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int); 1009#endif 1010#ifndef mpn_addlsh_nc 1011#define mpn_addlsh_nc __MPN(addlsh_nc) 1012__GMP_DECLSPEC mp_limb_t mpn_addlsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t); 1013#endif 1014#ifndef mpn_addlsh_n_ip1 1015#define mpn_addlsh_n_ip1 __MPN(addlsh_n_ip1) 1016 __GMP_DECLSPEC mp_limb_t mpn_addlsh_n_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int); 1017#endif 1018#ifndef mpn_addlsh_nc_ip1 1019#define mpn_addlsh_nc_ip1 __MPN(addlsh_nc_ip1) 1020__GMP_DECLSPEC mp_limb_t mpn_addlsh_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t); 1021#endif 1022 1023#ifndef mpn_sublsh1_n 1024#define mpn_sublsh1_n __MPN(sublsh1_n) 1025__GMP_DECLSPEC mp_limb_t mpn_sublsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 1026#endif 1027#ifndef mpn_sublsh1_nc 1028#define mpn_sublsh1_nc __MPN(sublsh1_nc) 1029__GMP_DECLSPEC mp_limb_t mpn_sublsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 1030#endif 1031#ifndef mpn_sublsh1_n_ip1 1032#define mpn_sublsh1_n_ip1 __MPN(sublsh1_n_ip1) 1033__GMP_DECLSPEC mp_limb_t mpn_sublsh1_n_ip1 (mp_ptr, mp_srcptr, mp_size_t); 1034#endif 1035#ifndef mpn_sublsh1_nc_ip1 1036#define mpn_sublsh1_nc_ip1 __MPN(sublsh1_nc_ip1) 1037__GMP_DECLSPEC mp_limb_t mpn_sublsh1_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 1038#endif 1039 1040#ifndef mpn_sublsh2_n 1041#define mpn_sublsh2_n __MPN(sublsh2_n) 1042__GMP_DECLSPEC mp_limb_t mpn_sublsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 1043#endif 1044#ifndef mpn_sublsh2_nc 1045#define mpn_sublsh2_nc __MPN(sublsh2_nc) 1046__GMP_DECLSPEC mp_limb_t mpn_sublsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 1047#endif 1048#ifndef mpn_sublsh2_n_ip1 1049#define mpn_sublsh2_n_ip1 __MPN(sublsh2_n_ip1) 1050__GMP_DECLSPEC mp_limb_t mpn_sublsh2_n_ip1 (mp_ptr, mp_srcptr, mp_size_t); 1051#endif 1052#ifndef mpn_sublsh2_nc_ip1 1053#define mpn_sublsh2_nc_ip1 __MPN(sublsh2_nc_ip1) 1054__GMP_DECLSPEC mp_limb_t mpn_sublsh2_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 1055#endif 1056 1057#ifndef mpn_sublsh_n 1058#define mpn_sublsh_n __MPN(sublsh_n) 1059__GMP_DECLSPEC mp_limb_t mpn_sublsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int); 1060#endif 1061#ifndef mpn_sublsh_nc 1062#define mpn_sublsh_nc __MPN(sublsh_nc) 1063__GMP_DECLSPEC mp_limb_t mpn_sublsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t); 1064#endif 1065#ifndef mpn_sublsh_n_ip1 1066#define mpn_sublsh_n_ip1 __MPN(sublsh_n_ip1) 1067 __GMP_DECLSPEC mp_limb_t mpn_sublsh_n_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int); 1068#endif 1069#ifndef mpn_sublsh_nc_ip1 1070#define mpn_sublsh_nc_ip1 __MPN(sublsh_nc_ip1) 1071__GMP_DECLSPEC mp_limb_t mpn_sublsh_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t); 1072#endif 1073 1074#define mpn_rsblsh1_n __MPN(rsblsh1_n) 1075__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 1076#define mpn_rsblsh1_nc __MPN(rsblsh1_nc) 1077__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 1078 1079#define mpn_rsblsh2_n __MPN(rsblsh2_n) 1080__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 1081#define mpn_rsblsh2_nc __MPN(rsblsh2_nc) 1082__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 1083 1084#define mpn_rsblsh_n __MPN(rsblsh_n) 1085__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int); 1086#define mpn_rsblsh_nc __MPN(rsblsh_nc) 1087__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t); 1088 1089#define mpn_rsh1add_n __MPN(rsh1add_n) 1090__GMP_DECLSPEC mp_limb_t mpn_rsh1add_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 1091#define mpn_rsh1add_nc __MPN(rsh1add_nc) 1092__GMP_DECLSPEC mp_limb_t mpn_rsh1add_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 1093 1094#define mpn_rsh1sub_n __MPN(rsh1sub_n) 1095__GMP_DECLSPEC mp_limb_t mpn_rsh1sub_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 1096#define mpn_rsh1sub_nc __MPN(rsh1sub_nc) 1097__GMP_DECLSPEC mp_limb_t mpn_rsh1sub_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 1098 1099#ifndef mpn_lshiftc /* if not done with cpuvec in a fat binary */ 1100#define mpn_lshiftc __MPN(lshiftc) 1101__GMP_DECLSPEC mp_limb_t mpn_lshiftc (mp_ptr, mp_srcptr, mp_size_t, unsigned int); 1102#endif 1103 1104#define mpn_add_err1_n __MPN(add_err1_n) 1105__GMP_DECLSPEC mp_limb_t mpn_add_err1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 1106 1107#define mpn_add_err2_n __MPN(add_err2_n) 1108__GMP_DECLSPEC mp_limb_t mpn_add_err2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 1109 1110#define mpn_add_err3_n __MPN(add_err3_n) 1111__GMP_DECLSPEC mp_limb_t mpn_add_err3_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 1112 1113#define mpn_sub_err1_n __MPN(sub_err1_n) 1114__GMP_DECLSPEC mp_limb_t mpn_sub_err1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 1115 1116#define mpn_sub_err2_n __MPN(sub_err2_n) 1117__GMP_DECLSPEC mp_limb_t mpn_sub_err2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 1118 1119#define mpn_sub_err3_n __MPN(sub_err3_n) 1120__GMP_DECLSPEC mp_limb_t mpn_sub_err3_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 1121 1122#define mpn_add_n_sub_n __MPN(add_n_sub_n) 1123__GMP_DECLSPEC mp_limb_t mpn_add_n_sub_n (mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 1124 1125#define mpn_add_n_sub_nc __MPN(add_n_sub_nc) 1126__GMP_DECLSPEC mp_limb_t mpn_add_n_sub_nc (mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 1127 1128#define mpn_addaddmul_1msb0 __MPN(addaddmul_1msb0) 1129__GMP_DECLSPEC mp_limb_t mpn_addaddmul_1msb0 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t); 1130 1131#define mpn_divrem_1c __MPN(divrem_1c) 1132__GMP_DECLSPEC mp_limb_t mpn_divrem_1c (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t); 1133 1134#define mpn_dump __MPN(dump) 1135__GMP_DECLSPEC void mpn_dump (mp_srcptr, mp_size_t); 1136 1137#define mpn_fib2_ui __MPN(fib2_ui) 1138__GMP_DECLSPEC mp_size_t mpn_fib2_ui (mp_ptr, mp_ptr, unsigned long); 1139 1140#define mpn_fib2m __MPN(fib2m) 1141__GMP_DECLSPEC int mpn_fib2m (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t); 1142 1143#define mpn_strongfibo __MPN(strongfibo) 1144__GMP_DECLSPEC int mpn_strongfibo (mp_srcptr, mp_size_t, mp_ptr); 1145 1146/* Remap names of internal mpn functions. */ 1147#define __clz_tab __MPN(clz_tab) 1148#define mpn_udiv_w_sdiv __MPN(udiv_w_sdiv) 1149 1150#define mpn_jacobi_base __MPN(jacobi_base) 1151__GMP_DECLSPEC int mpn_jacobi_base (mp_limb_t, mp_limb_t, int) ATTRIBUTE_CONST; 1152 1153#define mpn_jacobi_2 __MPN(jacobi_2) 1154__GMP_DECLSPEC int mpn_jacobi_2 (mp_srcptr, mp_srcptr, unsigned); 1155 1156#define mpn_jacobi_n __MPN(jacobi_n) 1157__GMP_DECLSPEC int mpn_jacobi_n (mp_ptr, mp_ptr, mp_size_t, unsigned); 1158 1159#define mpn_mod_1c __MPN(mod_1c) 1160__GMP_DECLSPEC mp_limb_t mpn_mod_1c (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) __GMP_ATTRIBUTE_PURE; 1161 1162#define mpn_mul_1c __MPN(mul_1c) 1163__GMP_DECLSPEC mp_limb_t mpn_mul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t); 1164 1165#define mpn_mul_2 __MPN(mul_2) 1166__GMP_DECLSPEC mp_limb_t mpn_mul_2 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 1167 1168#define mpn_mul_3 __MPN(mul_3) 1169__GMP_DECLSPEC mp_limb_t mpn_mul_3 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 1170 1171#define mpn_mul_4 __MPN(mul_4) 1172__GMP_DECLSPEC mp_limb_t mpn_mul_4 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 1173 1174#define mpn_mul_5 __MPN(mul_5) 1175__GMP_DECLSPEC mp_limb_t mpn_mul_5 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 1176 1177#define mpn_mul_6 __MPN(mul_6) 1178__GMP_DECLSPEC mp_limb_t mpn_mul_6 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 1179 1180#ifndef mpn_mul_basecase /* if not done with cpuvec in a fat binary */ 1181#define mpn_mul_basecase __MPN(mul_basecase) 1182__GMP_DECLSPEC void mpn_mul_basecase (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t); 1183#endif 1184 1185#define mpn_mullo_n __MPN(mullo_n) 1186__GMP_DECLSPEC void mpn_mullo_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 1187 1188#ifndef mpn_mullo_basecase /* if not done with cpuvec in a fat binary */ 1189#define mpn_mullo_basecase __MPN(mullo_basecase) 1190__GMP_DECLSPEC void mpn_mullo_basecase (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 1191#endif 1192 1193#ifndef mpn_sqr_basecase /* if not done with cpuvec in a fat binary */ 1194#define mpn_sqr_basecase __MPN(sqr_basecase) 1195__GMP_DECLSPEC void mpn_sqr_basecase (mp_ptr, mp_srcptr, mp_size_t); 1196#endif 1197 1198#define mpn_sqrlo __MPN(sqrlo) 1199__GMP_DECLSPEC void mpn_sqrlo (mp_ptr, mp_srcptr, mp_size_t); 1200 1201#define mpn_sqrlo_basecase __MPN(sqrlo_basecase) 1202__GMP_DECLSPEC void mpn_sqrlo_basecase (mp_ptr, mp_srcptr, mp_size_t); 1203 1204#define mpn_mulmid_basecase __MPN(mulmid_basecase) 1205__GMP_DECLSPEC void mpn_mulmid_basecase (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t); 1206 1207#define mpn_mulmid_n __MPN(mulmid_n) 1208__GMP_DECLSPEC void mpn_mulmid_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 1209 1210#define mpn_mulmid __MPN(mulmid) 1211__GMP_DECLSPEC void mpn_mulmid (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t); 1212 1213#define mpn_submul_1c __MPN(submul_1c) 1214__GMP_DECLSPEC mp_limb_t mpn_submul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t); 1215 1216#ifndef mpn_redc_1 /* if not done with cpuvec in a fat binary */ 1217#define mpn_redc_1 __MPN(redc_1) 1218__GMP_DECLSPEC mp_limb_t mpn_redc_1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 1219#endif 1220 1221#ifndef mpn_redc_2 /* if not done with cpuvec in a fat binary */ 1222#define mpn_redc_2 __MPN(redc_2) 1223__GMP_DECLSPEC mp_limb_t mpn_redc_2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 1224#endif 1225 1226#define mpn_redc_n __MPN(redc_n) 1227__GMP_DECLSPEC void mpn_redc_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr); 1228 1229 1230#ifndef mpn_mod_1_1p_cps /* if not done with cpuvec in a fat binary */ 1231#define mpn_mod_1_1p_cps __MPN(mod_1_1p_cps) 1232__GMP_DECLSPEC void mpn_mod_1_1p_cps (mp_limb_t [4], mp_limb_t); 1233#endif 1234#ifndef mpn_mod_1_1p /* if not done with cpuvec in a fat binary */ 1235#define mpn_mod_1_1p __MPN(mod_1_1p) 1236__GMP_DECLSPEC mp_limb_t mpn_mod_1_1p (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [4]) __GMP_ATTRIBUTE_PURE; 1237#endif 1238 1239#ifndef mpn_mod_1s_2p_cps /* if not done with cpuvec in a fat binary */ 1240#define mpn_mod_1s_2p_cps __MPN(mod_1s_2p_cps) 1241__GMP_DECLSPEC void mpn_mod_1s_2p_cps (mp_limb_t [5], mp_limb_t); 1242#endif 1243#ifndef mpn_mod_1s_2p /* if not done with cpuvec in a fat binary */ 1244#define mpn_mod_1s_2p __MPN(mod_1s_2p) 1245__GMP_DECLSPEC mp_limb_t mpn_mod_1s_2p (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [5]) __GMP_ATTRIBUTE_PURE; 1246#endif 1247 1248#ifndef mpn_mod_1s_3p_cps /* if not done with cpuvec in a fat binary */ 1249#define mpn_mod_1s_3p_cps __MPN(mod_1s_3p_cps) 1250__GMP_DECLSPEC void mpn_mod_1s_3p_cps (mp_limb_t [6], mp_limb_t); 1251#endif 1252#ifndef mpn_mod_1s_3p /* if not done with cpuvec in a fat binary */ 1253#define mpn_mod_1s_3p __MPN(mod_1s_3p) 1254__GMP_DECLSPEC mp_limb_t mpn_mod_1s_3p (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [6]) __GMP_ATTRIBUTE_PURE; 1255#endif 1256 1257#ifndef mpn_mod_1s_4p_cps /* if not done with cpuvec in a fat binary */ 1258#define mpn_mod_1s_4p_cps __MPN(mod_1s_4p_cps) 1259__GMP_DECLSPEC void mpn_mod_1s_4p_cps (mp_limb_t [7], mp_limb_t); 1260#endif 1261#ifndef mpn_mod_1s_4p /* if not done with cpuvec in a fat binary */ 1262#define mpn_mod_1s_4p __MPN(mod_1s_4p) 1263__GMP_DECLSPEC mp_limb_t mpn_mod_1s_4p (mp_srcptr, mp_size_t, mp_limb_t, const mp_limb_t [7]) __GMP_ATTRIBUTE_PURE; 1264#endif 1265 1266#define mpn_bc_mulmod_bnm1 __MPN(bc_mulmod_bnm1) 1267__GMP_DECLSPEC void mpn_bc_mulmod_bnm1 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr); 1268#define mpn_mulmod_bnm1 __MPN(mulmod_bnm1) 1269__GMP_DECLSPEC void mpn_mulmod_bnm1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1270#define mpn_mulmod_bnm1_next_size __MPN(mulmod_bnm1_next_size) 1271__GMP_DECLSPEC mp_size_t mpn_mulmod_bnm1_next_size (mp_size_t) ATTRIBUTE_CONST; 1272static inline mp_size_t 1273mpn_mulmod_bnm1_itch (mp_size_t rn, mp_size_t an, mp_size_t bn) { 1274 mp_size_t n, itch; 1275 n = rn >> 1; 1276 itch = rn + 4 + 1277 (an > n ? (bn > n ? rn : n) : 0); 1278 return itch; 1279} 1280 1281#define mpn_sqrmod_bnm1 __MPN(sqrmod_bnm1) 1282__GMP_DECLSPEC void mpn_sqrmod_bnm1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1283#define mpn_sqrmod_bnm1_next_size __MPN(sqrmod_bnm1_next_size) 1284__GMP_DECLSPEC mp_size_t mpn_sqrmod_bnm1_next_size (mp_size_t) ATTRIBUTE_CONST; 1285static inline mp_size_t 1286mpn_sqrmod_bnm1_itch (mp_size_t rn, mp_size_t an) { 1287 mp_size_t n, itch; 1288 n = rn >> 1; 1289 itch = rn + 3 + 1290 (an > n ? an : 0); 1291 return itch; 1292} 1293 1294typedef __gmp_randstate_struct *gmp_randstate_ptr; 1295typedef const __gmp_randstate_struct *gmp_randstate_srcptr; 1296 1297/* Pseudo-random number generator function pointers structure. */ 1298typedef struct { 1299 void (*randseed_fn) (gmp_randstate_t, mpz_srcptr); 1300 void (*randget_fn) (gmp_randstate_t, mp_ptr, unsigned long int); 1301 void (*randclear_fn) (gmp_randstate_t); 1302 void (*randiset_fn) (gmp_randstate_ptr, gmp_randstate_srcptr); 1303} gmp_randfnptr_t; 1304 1305/* Macro to obtain a void pointer to the function pointers structure. */ 1306#define RNG_FNPTR(rstate) ((rstate)->_mp_algdata._mp_lc) 1307 1308/* Macro to obtain a pointer to the generator's state. 1309 When used as a lvalue the rvalue needs to be cast to mp_ptr. */ 1310#define RNG_STATE(rstate) ((rstate)->_mp_seed->_mp_d) 1311 1312/* Write a given number of random bits to rp. */ 1313#define _gmp_rand(rp, state, bits) \ 1314 do { \ 1315 gmp_randstate_ptr __rstate = (state); \ 1316 (*((gmp_randfnptr_t *) RNG_FNPTR (__rstate))->randget_fn) \ 1317 (__rstate, rp, bits); \ 1318 } while (0) 1319 1320__GMP_DECLSPEC void __gmp_randinit_mt_noseed (gmp_randstate_t); 1321 1322 1323/* __gmp_rands is the global state for the old-style random functions, and 1324 is also used in the test programs (hence the __GMP_DECLSPEC). 1325 1326 There's no seeding here, so mpz_random etc will generate the same 1327 sequence every time. This is not unlike the C library random functions 1328 if you don't seed them, so perhaps it's acceptable. Digging up a seed 1329 from /dev/random or the like would work on many systems, but might 1330 encourage a false confidence, since it'd be pretty much impossible to do 1331 something that would work reliably everywhere. In any case the new style 1332 functions are recommended to applications which care about randomness, so 1333 the old functions aren't too important. */ 1334 1335__GMP_DECLSPEC extern char __gmp_rands_initialized; 1336__GMP_DECLSPEC extern gmp_randstate_t __gmp_rands; 1337 1338#define RANDS \ 1339 ((__gmp_rands_initialized ? 0 \ 1340 : (__gmp_rands_initialized = 1, \ 1341 __gmp_randinit_mt_noseed (__gmp_rands), 0)), \ 1342 __gmp_rands) 1343 1344/* this is used by the test programs, to free memory */ 1345#define RANDS_CLEAR() \ 1346 do { \ 1347 if (__gmp_rands_initialized) \ 1348 { \ 1349 __gmp_rands_initialized = 0; \ 1350 gmp_randclear (__gmp_rands); \ 1351 } \ 1352 } while (0) 1353 1354 1355/* For a threshold between algorithms A and B, size>=thresh is where B 1356 should be used. Special value MP_SIZE_T_MAX means only ever use A, or 1357 value 0 means only ever use B. The tests for these special values will 1358 be compile-time constants, so the compiler should be able to eliminate 1359 the code for the unwanted algorithm. */ 1360 1361#if ! defined (__GNUC__) || __GNUC__ < 2 1362#define ABOVE_THRESHOLD(size,thresh) \ 1363 ((thresh) == 0 \ 1364 || ((thresh) != MP_SIZE_T_MAX \ 1365 && (size) >= (thresh))) 1366#else 1367#define ABOVE_THRESHOLD(size,thresh) \ 1368 ((__builtin_constant_p (thresh) && (thresh) == 0) \ 1369 || (!(__builtin_constant_p (thresh) && (thresh) == MP_SIZE_T_MAX) \ 1370 && (size) >= (thresh))) 1371#endif 1372#define BELOW_THRESHOLD(size,thresh) (! ABOVE_THRESHOLD (size, thresh)) 1373 1374/* The minimal supported value for Toom22 depends also on Toom32 and 1375 Toom42 implementations. */ 1376#define MPN_TOOM22_MUL_MINSIZE 6 1377#define MPN_TOOM2_SQR_MINSIZE 4 1378 1379#define MPN_TOOM33_MUL_MINSIZE 17 1380#define MPN_TOOM3_SQR_MINSIZE 17 1381 1382#define MPN_TOOM44_MUL_MINSIZE 30 1383#define MPN_TOOM4_SQR_MINSIZE 30 1384 1385#define MPN_TOOM6H_MUL_MINSIZE 46 1386#define MPN_TOOM6_SQR_MINSIZE 46 1387 1388#define MPN_TOOM8H_MUL_MINSIZE 86 1389#define MPN_TOOM8_SQR_MINSIZE 86 1390 1391#define MPN_TOOM32_MUL_MINSIZE 10 1392#define MPN_TOOM42_MUL_MINSIZE 10 1393#define MPN_TOOM43_MUL_MINSIZE 25 1394#define MPN_TOOM53_MUL_MINSIZE 17 1395#define MPN_TOOM54_MUL_MINSIZE 31 1396#define MPN_TOOM63_MUL_MINSIZE 49 1397 1398#define MPN_TOOM42_MULMID_MINSIZE 4 1399 1400#define mpn_sqr_diagonal __MPN(sqr_diagonal) 1401__GMP_DECLSPEC void mpn_sqr_diagonal (mp_ptr, mp_srcptr, mp_size_t); 1402 1403#define mpn_sqr_diag_addlsh1 __MPN(sqr_diag_addlsh1) 1404__GMP_DECLSPEC void mpn_sqr_diag_addlsh1 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); 1405 1406#define mpn_toom_interpolate_5pts __MPN(toom_interpolate_5pts) 1407__GMP_DECLSPEC void mpn_toom_interpolate_5pts (mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_limb_t); 1408 1409enum toom6_flags {toom6_all_pos = 0, toom6_vm1_neg = 1, toom6_vm2_neg = 2}; 1410#define mpn_toom_interpolate_6pts __MPN(toom_interpolate_6pts) 1411__GMP_DECLSPEC void mpn_toom_interpolate_6pts (mp_ptr, mp_size_t, enum toom6_flags, mp_ptr, mp_ptr, mp_ptr, mp_size_t); 1412 1413enum toom7_flags { toom7_w1_neg = 1, toom7_w3_neg = 2 }; 1414#define mpn_toom_interpolate_7pts __MPN(toom_interpolate_7pts) 1415__GMP_DECLSPEC void mpn_toom_interpolate_7pts (mp_ptr, mp_size_t, enum toom7_flags, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr); 1416 1417#define mpn_toom_interpolate_8pts __MPN(toom_interpolate_8pts) 1418__GMP_DECLSPEC void mpn_toom_interpolate_8pts (mp_ptr, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr); 1419 1420#define mpn_toom_interpolate_12pts __MPN(toom_interpolate_12pts) 1421__GMP_DECLSPEC void mpn_toom_interpolate_12pts (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr); 1422 1423#define mpn_toom_interpolate_16pts __MPN(toom_interpolate_16pts) 1424__GMP_DECLSPEC void mpn_toom_interpolate_16pts (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr); 1425 1426#define mpn_toom_couple_handling __MPN(toom_couple_handling) 1427__GMP_DECLSPEC void mpn_toom_couple_handling (mp_ptr, mp_size_t, mp_ptr, int, mp_size_t, int, int); 1428 1429#define mpn_toom_eval_dgr3_pm1 __MPN(toom_eval_dgr3_pm1) 1430__GMP_DECLSPEC int mpn_toom_eval_dgr3_pm1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr); 1431 1432#define mpn_toom_eval_dgr3_pm2 __MPN(toom_eval_dgr3_pm2) 1433__GMP_DECLSPEC int mpn_toom_eval_dgr3_pm2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr); 1434 1435#define mpn_toom_eval_pm1 __MPN(toom_eval_pm1) 1436__GMP_DECLSPEC int mpn_toom_eval_pm1 (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr); 1437 1438#define mpn_toom_eval_pm2 __MPN(toom_eval_pm2) 1439__GMP_DECLSPEC int mpn_toom_eval_pm2 (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr); 1440 1441#define mpn_toom_eval_pm2exp __MPN(toom_eval_pm2exp) 1442__GMP_DECLSPEC int mpn_toom_eval_pm2exp (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr); 1443 1444#define mpn_toom_eval_pm2rexp __MPN(toom_eval_pm2rexp) 1445__GMP_DECLSPEC int mpn_toom_eval_pm2rexp (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr); 1446 1447#define mpn_toom22_mul __MPN(toom22_mul) 1448__GMP_DECLSPEC void mpn_toom22_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1449 1450#define mpn_toom32_mul __MPN(toom32_mul) 1451__GMP_DECLSPEC void mpn_toom32_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1452 1453#define mpn_toom42_mul __MPN(toom42_mul) 1454__GMP_DECLSPEC void mpn_toom42_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1455 1456#define mpn_toom52_mul __MPN(toom52_mul) 1457__GMP_DECLSPEC void mpn_toom52_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1458 1459#define mpn_toom62_mul __MPN(toom62_mul) 1460__GMP_DECLSPEC void mpn_toom62_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1461 1462#define mpn_toom2_sqr __MPN(toom2_sqr) 1463__GMP_DECLSPEC void mpn_toom2_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr); 1464 1465#define mpn_toom33_mul __MPN(toom33_mul) 1466__GMP_DECLSPEC void mpn_toom33_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1467 1468#define mpn_toom43_mul __MPN(toom43_mul) 1469__GMP_DECLSPEC void mpn_toom43_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1470 1471#define mpn_toom53_mul __MPN(toom53_mul) 1472__GMP_DECLSPEC void mpn_toom53_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1473 1474#define mpn_toom54_mul __MPN(toom54_mul) 1475__GMP_DECLSPEC void mpn_toom54_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1476 1477#define mpn_toom63_mul __MPN(toom63_mul) 1478__GMP_DECLSPEC void mpn_toom63_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1479 1480#define mpn_toom3_sqr __MPN(toom3_sqr) 1481__GMP_DECLSPEC void mpn_toom3_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr); 1482 1483#define mpn_toom44_mul __MPN(toom44_mul) 1484__GMP_DECLSPEC void mpn_toom44_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1485 1486#define mpn_toom4_sqr __MPN(toom4_sqr) 1487__GMP_DECLSPEC void mpn_toom4_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr); 1488 1489#define mpn_toom6h_mul __MPN(toom6h_mul) 1490__GMP_DECLSPEC void mpn_toom6h_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1491 1492#define mpn_toom6_sqr __MPN(toom6_sqr) 1493__GMP_DECLSPEC void mpn_toom6_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr); 1494 1495#define mpn_toom8h_mul __MPN(toom8h_mul) 1496__GMP_DECLSPEC void mpn_toom8h_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1497 1498#define mpn_toom8_sqr __MPN(toom8_sqr) 1499__GMP_DECLSPEC void mpn_toom8_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr); 1500 1501#define mpn_toom42_mulmid __MPN(toom42_mulmid) 1502__GMP_DECLSPEC void mpn_toom42_mulmid (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr); 1503 1504#define mpn_fft_best_k __MPN(fft_best_k) 1505__GMP_DECLSPEC int mpn_fft_best_k (mp_size_t, int) ATTRIBUTE_CONST; 1506 1507#define mpn_mul_fft __MPN(mul_fft) 1508__GMP_DECLSPEC mp_limb_t mpn_mul_fft (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int); 1509 1510#define mpn_mul_fft_full __MPN(mul_fft_full) 1511__GMP_DECLSPEC void mpn_mul_fft_full (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t); 1512 1513#define mpn_nussbaumer_mul __MPN(nussbaumer_mul) 1514__GMP_DECLSPEC void mpn_nussbaumer_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t); 1515 1516#define mpn_fft_next_size __MPN(fft_next_size) 1517__GMP_DECLSPEC mp_size_t mpn_fft_next_size (mp_size_t, int) ATTRIBUTE_CONST; 1518 1519#define mpn_div_qr_1n_pi1 __MPN(div_qr_1n_pi1) 1520 __GMP_DECLSPEC mp_limb_t mpn_div_qr_1n_pi1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t); 1521 1522#define mpn_div_qr_2n_pi1 __MPN(div_qr_2n_pi1) 1523 __GMP_DECLSPEC mp_limb_t mpn_div_qr_2n_pi1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t); 1524 1525#define mpn_div_qr_2u_pi1 __MPN(div_qr_2u_pi1) 1526 __GMP_DECLSPEC mp_limb_t mpn_div_qr_2u_pi1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int, mp_limb_t); 1527 1528#define mpn_sbpi1_div_qr __MPN(sbpi1_div_qr) 1529__GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t); 1530 1531#define mpn_sbpi1_div_q __MPN(sbpi1_div_q) 1532__GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t); 1533 1534#define mpn_sbpi1_divappr_q __MPN(sbpi1_divappr_q) 1535__GMP_DECLSPEC mp_limb_t mpn_sbpi1_divappr_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t); 1536 1537#define mpn_dcpi1_div_qr __MPN(dcpi1_div_qr) 1538__GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *); 1539#define mpn_dcpi1_div_qr_n __MPN(dcpi1_div_qr_n) 1540__GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, gmp_pi1_t *, mp_ptr); 1541 1542#define mpn_dcpi1_div_q __MPN(dcpi1_div_q) 1543__GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *); 1544 1545#define mpn_dcpi1_divappr_q __MPN(dcpi1_divappr_q) 1546__GMP_DECLSPEC mp_limb_t mpn_dcpi1_divappr_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *); 1547 1548#define mpn_mu_div_qr __MPN(mu_div_qr) 1549__GMP_DECLSPEC mp_limb_t mpn_mu_div_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1550#define mpn_mu_div_qr_itch __MPN(mu_div_qr_itch) 1551__GMP_DECLSPEC mp_size_t mpn_mu_div_qr_itch (mp_size_t, mp_size_t, int) ATTRIBUTE_CONST; 1552 1553#define mpn_preinv_mu_div_qr __MPN(preinv_mu_div_qr) 1554__GMP_DECLSPEC mp_limb_t mpn_preinv_mu_div_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1555#define mpn_preinv_mu_div_qr_itch __MPN(preinv_mu_div_qr_itch) 1556__GMP_DECLSPEC mp_size_t mpn_preinv_mu_div_qr_itch (mp_size_t, mp_size_t, mp_size_t) ATTRIBUTE_CONST; 1557 1558#define mpn_mu_divappr_q __MPN(mu_divappr_q) 1559__GMP_DECLSPEC mp_limb_t mpn_mu_divappr_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1560#define mpn_mu_divappr_q_itch __MPN(mu_divappr_q_itch) 1561__GMP_DECLSPEC mp_size_t mpn_mu_divappr_q_itch (mp_size_t, mp_size_t, int) ATTRIBUTE_CONST; 1562 1563#define mpn_mu_div_q __MPN(mu_div_q) 1564__GMP_DECLSPEC mp_limb_t mpn_mu_div_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1565#define mpn_mu_div_q_itch __MPN(mu_div_q_itch) 1566__GMP_DECLSPEC mp_size_t mpn_mu_div_q_itch (mp_size_t, mp_size_t, int) ATTRIBUTE_CONST; 1567 1568#define mpn_div_q __MPN(div_q) 1569__GMP_DECLSPEC void mpn_div_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1570 1571#define mpn_invert __MPN(invert) 1572__GMP_DECLSPEC void mpn_invert (mp_ptr, mp_srcptr, mp_size_t, mp_ptr); 1573#define mpn_invert_itch(n) mpn_invertappr_itch(n) 1574 1575#define mpn_ni_invertappr __MPN(ni_invertappr) 1576__GMP_DECLSPEC mp_limb_t mpn_ni_invertappr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr); 1577#define mpn_invertappr __MPN(invertappr) 1578__GMP_DECLSPEC mp_limb_t mpn_invertappr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr); 1579#define mpn_invertappr_itch(n) (2 * (n)) 1580 1581#define mpn_binvert __MPN(binvert) 1582__GMP_DECLSPEC void mpn_binvert (mp_ptr, mp_srcptr, mp_size_t, mp_ptr); 1583#define mpn_binvert_itch __MPN(binvert_itch) 1584__GMP_DECLSPEC mp_size_t mpn_binvert_itch (mp_size_t) ATTRIBUTE_CONST; 1585 1586#define mpn_bdiv_q_1 __MPN(bdiv_q_1) 1587__GMP_DECLSPEC mp_limb_t mpn_bdiv_q_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 1588 1589#define mpn_pi1_bdiv_q_1 __MPN(pi1_bdiv_q_1) 1590__GMP_DECLSPEC mp_limb_t mpn_pi1_bdiv_q_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int); 1591 1592#define mpn_sbpi1_bdiv_qr __MPN(sbpi1_bdiv_qr) 1593__GMP_DECLSPEC mp_limb_t mpn_sbpi1_bdiv_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t); 1594 1595#define mpn_sbpi1_bdiv_q __MPN(sbpi1_bdiv_q) 1596__GMP_DECLSPEC void mpn_sbpi1_bdiv_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t); 1597 1598#define mpn_sbpi1_bdiv_r __MPN(sbpi1_bdiv_r) 1599__GMP_DECLSPEC mp_limb_t mpn_sbpi1_bdiv_r (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t); 1600 1601#define mpn_dcpi1_bdiv_qr __MPN(dcpi1_bdiv_qr) 1602__GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t); 1603#define mpn_dcpi1_bdiv_qr_n_itch __MPN(dcpi1_bdiv_qr_n_itch) 1604__GMP_DECLSPEC mp_size_t mpn_dcpi1_bdiv_qr_n_itch (mp_size_t) ATTRIBUTE_CONST; 1605 1606#define mpn_dcpi1_bdiv_qr_n __MPN(dcpi1_bdiv_qr_n) 1607__GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr); 1608#define mpn_dcpi1_bdiv_q __MPN(dcpi1_bdiv_q) 1609__GMP_DECLSPEC void mpn_dcpi1_bdiv_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t); 1610 1611#define mpn_mu_bdiv_qr __MPN(mu_bdiv_qr) 1612__GMP_DECLSPEC mp_limb_t mpn_mu_bdiv_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1613#define mpn_mu_bdiv_qr_itch __MPN(mu_bdiv_qr_itch) 1614__GMP_DECLSPEC mp_size_t mpn_mu_bdiv_qr_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST; 1615 1616#define mpn_mu_bdiv_q __MPN(mu_bdiv_q) 1617__GMP_DECLSPEC void mpn_mu_bdiv_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1618#define mpn_mu_bdiv_q_itch __MPN(mu_bdiv_q_itch) 1619__GMP_DECLSPEC mp_size_t mpn_mu_bdiv_q_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST; 1620 1621#define mpn_bdiv_qr __MPN(bdiv_qr) 1622__GMP_DECLSPEC mp_limb_t mpn_bdiv_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1623#define mpn_bdiv_qr_itch __MPN(bdiv_qr_itch) 1624__GMP_DECLSPEC mp_size_t mpn_bdiv_qr_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST; 1625 1626#define mpn_bdiv_q __MPN(bdiv_q) 1627__GMP_DECLSPEC void mpn_bdiv_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1628#define mpn_bdiv_q_itch __MPN(bdiv_q_itch) 1629__GMP_DECLSPEC mp_size_t mpn_bdiv_q_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST; 1630 1631#define mpn_divexact __MPN(divexact) 1632__GMP_DECLSPEC void mpn_divexact (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t); 1633#define mpn_divexact_itch __MPN(divexact_itch) 1634__GMP_DECLSPEC mp_size_t mpn_divexact_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST; 1635 1636#ifndef mpn_bdiv_dbm1c /* if not done with cpuvec in a fat binary */ 1637#define mpn_bdiv_dbm1c __MPN(bdiv_dbm1c) 1638__GMP_DECLSPEC mp_limb_t mpn_bdiv_dbm1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t); 1639#endif 1640 1641#define mpn_bdiv_dbm1(dst, src, size, divisor) \ 1642 mpn_bdiv_dbm1c (dst, src, size, divisor, __GMP_CAST (mp_limb_t, 0)) 1643 1644#define mpn_powm __MPN(powm) 1645__GMP_DECLSPEC void mpn_powm (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr); 1646#define mpn_powlo __MPN(powlo) 1647__GMP_DECLSPEC void mpn_powlo (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr); 1648 1649#define mpn_sec_pi1_div_qr __MPN(sec_pi1_div_qr) 1650__GMP_DECLSPEC mp_limb_t mpn_sec_pi1_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr); 1651#define mpn_sec_pi1_div_r __MPN(sec_pi1_div_r) 1652__GMP_DECLSPEC void mpn_sec_pi1_div_r (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr); 1653 1654 1655#ifndef DIVEXACT_BY3_METHOD 1656#if GMP_NUMB_BITS % 2 == 0 && ! defined (HAVE_NATIVE_mpn_divexact_by3c) 1657#define DIVEXACT_BY3_METHOD 0 /* default to using mpn_bdiv_dbm1c */ 1658#else 1659#define DIVEXACT_BY3_METHOD 1 1660#endif 1661#endif 1662 1663#if DIVEXACT_BY3_METHOD == 0 1664#undef mpn_divexact_by3 1665#define mpn_divexact_by3(dst,src,size) \ 1666 (3 & mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3))) 1667/* override mpn_divexact_by3c defined in gmp.h */ 1668/* 1669#undef mpn_divexact_by3c 1670#define mpn_divexact_by3c(dst,src,size,cy) \ 1671 (3 & mpn_bdiv_dbm1c (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 3, GMP_NUMB_MASK / 3 * cy))) 1672*/ 1673#endif 1674 1675#if GMP_NUMB_BITS % 4 == 0 1676#define mpn_divexact_by5(dst,src,size) \ 1677 (7 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 5))) 1678#endif 1679 1680#if GMP_NUMB_BITS % 3 == 0 1681#define mpn_divexact_by7(dst,src,size) \ 1682 (7 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 7))) 1683#endif 1684 1685#if GMP_NUMB_BITS % 6 == 0 1686#define mpn_divexact_by9(dst,src,size) \ 1687 (15 & 7 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 9))) 1688#endif 1689 1690#if GMP_NUMB_BITS % 10 == 0 1691#define mpn_divexact_by11(dst,src,size) \ 1692 (15 & 5 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 11))) 1693#endif 1694 1695#if GMP_NUMB_BITS % 12 == 0 1696#define mpn_divexact_by13(dst,src,size) \ 1697 (15 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 13))) 1698#endif 1699 1700#if GMP_NUMB_BITS % 4 == 0 1701#define mpn_divexact_by15(dst,src,size) \ 1702 (15 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 15))) 1703#endif 1704 1705#define mpz_divexact_gcd __gmpz_divexact_gcd 1706__GMP_DECLSPEC void mpz_divexact_gcd (mpz_ptr, mpz_srcptr, mpz_srcptr); 1707 1708#define mpz_prodlimbs __gmpz_prodlimbs 1709__GMP_DECLSPEC mp_size_t mpz_prodlimbs (mpz_ptr, mp_ptr, mp_size_t); 1710 1711#define mpz_oddfac_1 __gmpz_oddfac_1 1712__GMP_DECLSPEC void mpz_oddfac_1 (mpz_ptr, mp_limb_t, unsigned); 1713 1714#define mpz_stronglucas __gmpz_stronglucas 1715__GMP_DECLSPEC int mpz_stronglucas (mpz_srcptr, mpz_ptr, mpz_ptr); 1716 1717#define mpz_lucas_mod __gmpz_lucas_mod 1718__GMP_DECLSPEC int mpz_lucas_mod (mpz_ptr, mpz_ptr, long, mp_bitcnt_t, mpz_srcptr, mpz_ptr, mpz_ptr); 1719 1720#define mpz_inp_str_nowhite __gmpz_inp_str_nowhite 1721#ifdef _GMP_H_HAVE_FILE 1722__GMP_DECLSPEC size_t mpz_inp_str_nowhite (mpz_ptr, FILE *, int, int, size_t); 1723#endif 1724 1725#define mpn_divisible_p __MPN(divisible_p) 1726__GMP_DECLSPEC int mpn_divisible_p (mp_srcptr, mp_size_t, mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE; 1727 1728#define mpn_rootrem __MPN(rootrem) 1729__GMP_DECLSPEC mp_size_t mpn_rootrem (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 1730 1731#define mpn_broot __MPN(broot) 1732__GMP_DECLSPEC void mpn_broot (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 1733 1734#define mpn_broot_invm1 __MPN(broot_invm1) 1735__GMP_DECLSPEC void mpn_broot_invm1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t); 1736 1737#define mpn_brootinv __MPN(brootinv) 1738__GMP_DECLSPEC void mpn_brootinv (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr); 1739 1740#define mpn_bsqrt __MPN(bsqrt) 1741__GMP_DECLSPEC void mpn_bsqrt (mp_ptr, mp_srcptr, mp_bitcnt_t, mp_ptr); 1742 1743#define mpn_bsqrtinv __MPN(bsqrtinv) 1744__GMP_DECLSPEC int mpn_bsqrtinv (mp_ptr, mp_srcptr, mp_bitcnt_t, mp_ptr); 1745 1746#if defined (_CRAY) 1747#define MPN_COPY_INCR(dst, src, n) \ 1748 do { \ 1749 int __i; /* Faster on some Crays with plain int */ \ 1750 _Pragma ("_CRI ivdep"); \ 1751 for (__i = 0; __i < (n); __i++) \ 1752 (dst)[__i] = (src)[__i]; \ 1753 } while (0) 1754#endif 1755 1756/* used by test programs, hence __GMP_DECLSPEC */ 1757#ifndef mpn_copyi /* if not done with cpuvec in a fat binary */ 1758#define mpn_copyi __MPN(copyi) 1759__GMP_DECLSPEC void mpn_copyi (mp_ptr, mp_srcptr, mp_size_t); 1760#endif 1761 1762#if ! defined (MPN_COPY_INCR) && HAVE_NATIVE_mpn_copyi 1763#define MPN_COPY_INCR(dst, src, size) \ 1764 do { \ 1765 ASSERT ((size) >= 0); \ 1766 ASSERT (MPN_SAME_OR_INCR_P (dst, src, size)); \ 1767 mpn_copyi (dst, src, size); \ 1768 } while (0) 1769#endif 1770 1771/* Copy N limbs from SRC to DST incrementing, N==0 allowed. */ 1772#if ! defined (MPN_COPY_INCR) 1773#define MPN_COPY_INCR(dst, src, n) \ 1774 do { \ 1775 ASSERT ((n) >= 0); \ 1776 ASSERT (MPN_SAME_OR_INCR_P (dst, src, n)); \ 1777 if ((n) != 0) \ 1778 { \ 1779 mp_size_t __n = (n) - 1; \ 1780 mp_ptr __dst = (dst); \ 1781 mp_srcptr __src = (src); \ 1782 mp_limb_t __x; \ 1783 __x = *__src++; \ 1784 if (__n != 0) \ 1785 { \ 1786 do \ 1787 { \ 1788 *__dst++ = __x; \ 1789 __x = *__src++; \ 1790 } \ 1791 while (--__n); \ 1792 } \ 1793 *__dst++ = __x; \ 1794 } \ 1795 } while (0) 1796#endif 1797 1798 1799#if defined (_CRAY) 1800#define MPN_COPY_DECR(dst, src, n) \ 1801 do { \ 1802 int __i; /* Faster on some Crays with plain int */ \ 1803 _Pragma ("_CRI ivdep"); \ 1804 for (__i = (n) - 1; __i >= 0; __i--) \ 1805 (dst)[__i] = (src)[__i]; \ 1806 } while (0) 1807#endif 1808 1809/* used by test programs, hence __GMP_DECLSPEC */ 1810#ifndef mpn_copyd /* if not done with cpuvec in a fat binary */ 1811#define mpn_copyd __MPN(copyd) 1812__GMP_DECLSPEC void mpn_copyd (mp_ptr, mp_srcptr, mp_size_t); 1813#endif 1814 1815#if ! defined (MPN_COPY_DECR) && HAVE_NATIVE_mpn_copyd 1816#define MPN_COPY_DECR(dst, src, size) \ 1817 do { \ 1818 ASSERT ((size) >= 0); \ 1819 ASSERT (MPN_SAME_OR_DECR_P (dst, src, size)); \ 1820 mpn_copyd (dst, src, size); \ 1821 } while (0) 1822#endif 1823 1824/* Copy N limbs from SRC to DST decrementing, N==0 allowed. */ 1825#if ! defined (MPN_COPY_DECR) 1826#define MPN_COPY_DECR(dst, src, n) \ 1827 do { \ 1828 ASSERT ((n) >= 0); \ 1829 ASSERT (MPN_SAME_OR_DECR_P (dst, src, n)); \ 1830 if ((n) != 0) \ 1831 { \ 1832 mp_size_t __n = (n) - 1; \ 1833 mp_ptr __dst = (dst) + __n; \ 1834 mp_srcptr __src = (src) + __n; \ 1835 mp_limb_t __x; \ 1836 __x = *__src--; \ 1837 if (__n != 0) \ 1838 { \ 1839 do \ 1840 { \ 1841 *__dst-- = __x; \ 1842 __x = *__src--; \ 1843 } \ 1844 while (--__n); \ 1845 } \ 1846 *__dst-- = __x; \ 1847 } \ 1848 } while (0) 1849#endif 1850 1851 1852#ifndef MPN_COPY 1853#define MPN_COPY(d,s,n) \ 1854 do { \ 1855 ASSERT (MPN_SAME_OR_SEPARATE_P (d, s, n)); \ 1856 MPN_COPY_INCR (d, s, n); \ 1857 } while (0) 1858#endif 1859 1860 1861/* Set {dst,size} to the limbs of {src,size} in reverse order. */ 1862#define MPN_REVERSE(dst, src, size) \ 1863 do { \ 1864 mp_ptr __dst = (dst); \ 1865 mp_size_t __size = (size); \ 1866 mp_srcptr __src = (src) + __size - 1; \ 1867 mp_size_t __i; \ 1868 ASSERT ((size) >= 0); \ 1869 ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \ 1870 CRAY_Pragma ("_CRI ivdep"); \ 1871 for (__i = 0; __i < __size; __i++) \ 1872 { \ 1873 *__dst = *__src; \ 1874 __dst++; \ 1875 __src--; \ 1876 } \ 1877 } while (0) 1878 1879 1880/* Zero n limbs at dst. 1881 1882 For power and powerpc we want an inline stu/bdnz loop for zeroing. On 1883 ppc630 for instance this is optimal since it can sustain only 1 store per 1884 cycle. 1885 1886 gcc 2.95.x (for powerpc64 -maix64, or powerpc32) doesn't recognise the 1887 "for" loop in the generic code below can become stu/bdnz. The do/while 1888 here helps it get to that. The same caveat about plain -mpowerpc64 mode 1889 applies here as to __GMPN_COPY_INCR in gmp.h. 1890 1891 xlc 3.1 already generates stu/bdnz from the generic C, and does so from 1892 this loop too. 1893 1894 Enhancement: GLIBC does some trickery with dcbz to zero whole cache lines 1895 at a time. MPN_ZERO isn't all that important in GMP, so it might be more 1896 trouble than it's worth to do the same, though perhaps a call to memset 1897 would be good when on a GNU system. */ 1898 1899#if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc 1900#define MPN_FILL(dst, n, f) \ 1901 do { \ 1902 mp_ptr __dst = (dst) - 1; \ 1903 mp_size_t __n = (n); \ 1904 ASSERT (__n > 0); \ 1905 do \ 1906 *++__dst = (f); \ 1907 while (--__n); \ 1908 } while (0) 1909#endif 1910 1911#ifndef MPN_FILL 1912#define MPN_FILL(dst, n, f) \ 1913 do { \ 1914 mp_ptr __dst = (dst); \ 1915 mp_size_t __n = (n); \ 1916 ASSERT (__n > 0); \ 1917 do \ 1918 *__dst++ = (f); \ 1919 while (--__n); \ 1920 } while (0) 1921#endif 1922 1923#define MPN_ZERO(dst, n) \ 1924 do { \ 1925 ASSERT ((n) >= 0); \ 1926 if ((n) != 0) \ 1927 MPN_FILL (dst, n, CNST_LIMB (0)); \ 1928 } while (0) 1929 1930/* On the x86s repe/scasl doesn't seem useful, since it takes many cycles to 1931 start up and would need to strip a lot of zeros before it'd be faster 1932 than a simple cmpl loop. Here are some times in cycles for 1933 std/repe/scasl/cld and cld/repe/scasl (the latter would be for stripping 1934 low zeros). 1935 1936 std cld 1937 P5 18 16 1938 P6 46 38 1939 K6 36 13 1940 K7 21 20 1941*/ 1942#ifndef MPN_NORMALIZE 1943#define MPN_NORMALIZE(DST, NLIMBS) \ 1944 do { \ 1945 while ((NLIMBS) > 0) \ 1946 { \ 1947 if ((DST)[(NLIMBS) - 1] != 0) \ 1948 break; \ 1949 (NLIMBS)--; \ 1950 } \ 1951 } while (0) 1952#endif 1953#ifndef MPN_NORMALIZE_NOT_ZERO 1954#define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \ 1955 do { \ 1956 while (1) \ 1957 { \ 1958 ASSERT ((NLIMBS) >= 1); \ 1959 if ((DST)[(NLIMBS) - 1] != 0) \ 1960 break; \ 1961 (NLIMBS)--; \ 1962 } \ 1963 } while (0) 1964#endif 1965 1966/* Strip least significant zero limbs from {ptr,size} by incrementing ptr 1967 and decrementing size. low should be ptr[0], and will be the new ptr[0] 1968 on returning. The number in {ptr,size} must be non-zero, ie. size!=0 and 1969 somewhere a non-zero limb. */ 1970#define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size, low) \ 1971 do { \ 1972 ASSERT ((size) >= 1); \ 1973 ASSERT ((low) == (ptr)[0]); \ 1974 \ 1975 while ((low) == 0) \ 1976 { \ 1977 (size)--; \ 1978 ASSERT ((size) >= 1); \ 1979 (ptr)++; \ 1980 (low) = *(ptr); \ 1981 } \ 1982 } while (0) 1983 1984/* Initialize X of type mpz_t with space for NLIMBS limbs. X should be a 1985 temporary variable; it will be automatically cleared out at function 1986 return. We use __x here to make it possible to accept both mpz_ptr and 1987 mpz_t arguments. */ 1988#define MPZ_TMP_INIT(X, NLIMBS) \ 1989 do { \ 1990 mpz_ptr __x = (X); \ 1991 ASSERT ((NLIMBS) >= 1); \ 1992 __x->_mp_alloc = (NLIMBS); \ 1993 __x->_mp_d = TMP_ALLOC_LIMBS (NLIMBS); \ 1994 } while (0) 1995 1996#if WANT_ASSERT 1997static inline void * 1998_mpz_newalloc (mpz_ptr z, mp_size_t n) 1999{ 2000 void * res = _mpz_realloc(z,n); 2001 /* If we are checking the code, force a random change to limbs. */ 2002 ((mp_ptr) res)[0] = ~ ((mp_ptr) res)[ALLOC (z) - 1]; 2003 return res; 2004} 2005#else 2006#define _mpz_newalloc _mpz_realloc 2007#endif 2008/* Realloc for an mpz_t WHAT if it has less than NEEDED limbs. */ 2009#define MPZ_REALLOC(z,n) (UNLIKELY ((n) > ALLOC(z)) \ 2010 ? (mp_ptr) _mpz_realloc(z,n) \ 2011 : PTR(z)) 2012#define MPZ_NEWALLOC(z,n) (UNLIKELY ((n) > ALLOC(z)) \ 2013 ? (mp_ptr) _mpz_newalloc(z,n) \ 2014 : PTR(z)) 2015 2016#define MPZ_EQUAL_1_P(z) (SIZ(z)==1 && PTR(z)[0] == 1) 2017 2018 2019/* MPN_FIB2_SIZE(n) is the size in limbs required by mpn_fib2_ui for fp and 2020 f1p. 2021 2022 From Knuth vol 1 section 1.2.8, F[n] = phi^n/sqrt(5) rounded to the 2023 nearest integer, where phi=(1+sqrt(5))/2 is the golden ratio. So the 2024 number of bits required is n*log_2((1+sqrt(5))/2) = n*0.6942419. 2025 2026 The multiplier used is 23/32=0.71875 for efficient calculation on CPUs 2027 without good floating point. There's +2 for rounding up, and a further 2028 +2 since at the last step x limbs are doubled into a 2x+1 limb region 2029 whereas the actual F[2k] value might be only 2x-1 limbs. 2030 2031 Note that a division is done first, since on a 32-bit system it's at 2032 least conceivable to go right up to n==ULONG_MAX. (F[2^32-1] would be 2033 about 380Mbytes, plus temporary workspace of about 1.2Gbytes here and 2034 whatever a multiply of two 190Mbyte numbers takes.) 2035 2036 Enhancement: When GMP_NUMB_BITS is not a power of 2 the division could be 2037 worked into the multiplier. */ 2038 2039#define MPN_FIB2_SIZE(n) \ 2040 ((mp_size_t) ((n) / 32 * 23 / GMP_NUMB_BITS) + 4) 2041 2042 2043/* FIB_TABLE(n) returns the Fibonacci number F[n]. Must have n in the range 2044 -1 <= n <= FIB_TABLE_LIMIT (that constant in fib_table.h). 2045 2046 FIB_TABLE_LUCNUM_LIMIT (in fib_table.h) is the largest n for which L[n] = 2047 F[n] + 2*F[n-1] fits in a limb. */ 2048 2049__GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[]; 2050#define FIB_TABLE(n) (__gmp_fib_table[(n)+1]) 2051 2052extern const mp_limb_t __gmp_oddfac_table[]; 2053extern const mp_limb_t __gmp_odd2fac_table[]; 2054extern const unsigned char __gmp_fac2cnt_table[]; 2055extern const mp_limb_t __gmp_limbroots_table[]; 2056 2057/* n^log <= GMP_NUMB_MAX, a limb can store log factors less than n */ 2058static inline unsigned 2059log_n_max (mp_limb_t n) 2060{ 2061 unsigned log; 2062 for (log = 8; n > __gmp_limbroots_table[log - 1]; log--); 2063 return log; 2064} 2065 2066#define SIEVESIZE 512 /* FIXME: Allow gmp_init_primesieve to choose */ 2067typedef struct 2068{ 2069 unsigned long d; /* current index in s[] */ 2070 unsigned long s0; /* number corresponding to s[0] */ 2071 unsigned long sqrt_s0; /* misnomer for sqrt(s[SIEVESIZE-1]) */ 2072 unsigned char s[SIEVESIZE + 1]; /* sieve table */ 2073} gmp_primesieve_t; 2074 2075#define gmp_init_primesieve __gmp_init_primesieve 2076__GMP_DECLSPEC void gmp_init_primesieve (gmp_primesieve_t *); 2077 2078#define gmp_nextprime __gmp_nextprime 2079__GMP_DECLSPEC unsigned long int gmp_nextprime (gmp_primesieve_t *); 2080 2081#define gmp_primesieve __gmp_primesieve 2082__GMP_DECLSPEC mp_limb_t gmp_primesieve (mp_ptr, mp_limb_t); 2083 2084 2085#ifndef MUL_TOOM22_THRESHOLD 2086#define MUL_TOOM22_THRESHOLD 30 2087#endif 2088 2089#ifndef MUL_TOOM33_THRESHOLD 2090#define MUL_TOOM33_THRESHOLD 100 2091#endif 2092 2093#ifndef MUL_TOOM44_THRESHOLD 2094#define MUL_TOOM44_THRESHOLD 300 2095#endif 2096 2097#ifndef MUL_TOOM6H_THRESHOLD 2098#define MUL_TOOM6H_THRESHOLD 350 2099#endif 2100 2101#ifndef SQR_TOOM6_THRESHOLD 2102#define SQR_TOOM6_THRESHOLD MUL_TOOM6H_THRESHOLD 2103#endif 2104 2105#ifndef MUL_TOOM8H_THRESHOLD 2106#define MUL_TOOM8H_THRESHOLD 450 2107#endif 2108 2109#ifndef SQR_TOOM8_THRESHOLD 2110#define SQR_TOOM8_THRESHOLD MUL_TOOM8H_THRESHOLD 2111#endif 2112 2113#ifndef MUL_TOOM32_TO_TOOM43_THRESHOLD 2114#define MUL_TOOM32_TO_TOOM43_THRESHOLD 100 2115#endif 2116 2117#ifndef MUL_TOOM32_TO_TOOM53_THRESHOLD 2118#define MUL_TOOM32_TO_TOOM53_THRESHOLD 110 2119#endif 2120 2121#ifndef MUL_TOOM42_TO_TOOM53_THRESHOLD 2122#define MUL_TOOM42_TO_TOOM53_THRESHOLD 100 2123#endif 2124 2125#ifndef MUL_TOOM42_TO_TOOM63_THRESHOLD 2126#define MUL_TOOM42_TO_TOOM63_THRESHOLD 110 2127#endif 2128 2129#ifndef MUL_TOOM43_TO_TOOM54_THRESHOLD 2130#define MUL_TOOM43_TO_TOOM54_THRESHOLD 150 2131#endif 2132 2133/* MUL_TOOM22_THRESHOLD_LIMIT is the maximum for MUL_TOOM22_THRESHOLD. In a 2134 normal build MUL_TOOM22_THRESHOLD is a constant and we use that. In a fat 2135 binary or tune program build MUL_TOOM22_THRESHOLD is a variable and a 2136 separate hard limit will have been defined. Similarly for TOOM3. */ 2137#ifndef MUL_TOOM22_THRESHOLD_LIMIT 2138#define MUL_TOOM22_THRESHOLD_LIMIT MUL_TOOM22_THRESHOLD 2139#endif 2140#ifndef MUL_TOOM33_THRESHOLD_LIMIT 2141#define MUL_TOOM33_THRESHOLD_LIMIT MUL_TOOM33_THRESHOLD 2142#endif 2143#ifndef MULLO_BASECASE_THRESHOLD_LIMIT 2144#define MULLO_BASECASE_THRESHOLD_LIMIT MULLO_BASECASE_THRESHOLD 2145#endif 2146#ifndef SQRLO_BASECASE_THRESHOLD_LIMIT 2147#define SQRLO_BASECASE_THRESHOLD_LIMIT SQRLO_BASECASE_THRESHOLD 2148#endif 2149#ifndef SQRLO_DC_THRESHOLD_LIMIT 2150#define SQRLO_DC_THRESHOLD_LIMIT SQRLO_DC_THRESHOLD 2151#endif 2152 2153/* SQR_BASECASE_THRESHOLD is where mpn_sqr_basecase should take over from 2154 mpn_mul_basecase. Default is to use mpn_sqr_basecase from 0. (Note that we 2155 certainly always want it if there's a native assembler mpn_sqr_basecase.) 2156 2157 If it turns out that mpn_toom2_sqr becomes faster than mpn_mul_basecase 2158 before mpn_sqr_basecase does, then SQR_BASECASE_THRESHOLD is the toom2 2159 threshold and SQR_TOOM2_THRESHOLD is 0. This oddity arises more or less 2160 because SQR_TOOM2_THRESHOLD represents the size up to which mpn_sqr_basecase 2161 should be used, and that may be never. */ 2162 2163#ifndef SQR_BASECASE_THRESHOLD 2164#define SQR_BASECASE_THRESHOLD 0 /* never use mpn_mul_basecase */ 2165#endif 2166 2167#ifndef SQR_TOOM2_THRESHOLD 2168#define SQR_TOOM2_THRESHOLD 50 2169#endif 2170 2171#ifndef SQR_TOOM3_THRESHOLD 2172#define SQR_TOOM3_THRESHOLD 120 2173#endif 2174 2175#ifndef SQR_TOOM4_THRESHOLD 2176#define SQR_TOOM4_THRESHOLD 400 2177#endif 2178 2179/* See comments above about MUL_TOOM33_THRESHOLD_LIMIT. */ 2180#ifndef SQR_TOOM3_THRESHOLD_LIMIT 2181#define SQR_TOOM3_THRESHOLD_LIMIT SQR_TOOM3_THRESHOLD 2182#endif 2183 2184#ifndef MULMID_TOOM42_THRESHOLD 2185#define MULMID_TOOM42_THRESHOLD MUL_TOOM22_THRESHOLD 2186#endif 2187 2188#ifndef MULLO_BASECASE_THRESHOLD 2189#define MULLO_BASECASE_THRESHOLD 0 /* never use mpn_mul_basecase */ 2190#endif 2191 2192#ifndef MULLO_DC_THRESHOLD 2193#define MULLO_DC_THRESHOLD (2*MUL_TOOM22_THRESHOLD) 2194#endif 2195 2196#ifndef MULLO_MUL_N_THRESHOLD 2197#define MULLO_MUL_N_THRESHOLD (2*MUL_FFT_THRESHOLD) 2198#endif 2199 2200#ifndef SQRLO_BASECASE_THRESHOLD 2201#define SQRLO_BASECASE_THRESHOLD 0 /* never use mpn_sqr_basecase */ 2202#endif 2203 2204#ifndef SQRLO_DC_THRESHOLD 2205#define SQRLO_DC_THRESHOLD (MULLO_DC_THRESHOLD) 2206#endif 2207 2208#ifndef SQRLO_SQR_THRESHOLD 2209#define SQRLO_SQR_THRESHOLD (MULLO_MUL_N_THRESHOLD) 2210#endif 2211 2212#ifndef DC_DIV_QR_THRESHOLD 2213#define DC_DIV_QR_THRESHOLD (2*MUL_TOOM22_THRESHOLD) 2214#endif 2215 2216#ifndef DC_DIVAPPR_Q_THRESHOLD 2217#define DC_DIVAPPR_Q_THRESHOLD 200 2218#endif 2219 2220#ifndef DC_BDIV_QR_THRESHOLD 2221#define DC_BDIV_QR_THRESHOLD (2*MUL_TOOM22_THRESHOLD) 2222#endif 2223 2224#ifndef DC_BDIV_Q_THRESHOLD 2225#define DC_BDIV_Q_THRESHOLD 180 2226#endif 2227 2228#ifndef DIVEXACT_JEB_THRESHOLD 2229#define DIVEXACT_JEB_THRESHOLD 25 2230#endif 2231 2232#ifndef INV_MULMOD_BNM1_THRESHOLD 2233#define INV_MULMOD_BNM1_THRESHOLD (4*MULMOD_BNM1_THRESHOLD) 2234#endif 2235 2236#ifndef INV_APPR_THRESHOLD 2237#define INV_APPR_THRESHOLD INV_NEWTON_THRESHOLD 2238#endif 2239 2240#ifndef INV_NEWTON_THRESHOLD 2241#define INV_NEWTON_THRESHOLD 200 2242#endif 2243 2244#ifndef BINV_NEWTON_THRESHOLD 2245#define BINV_NEWTON_THRESHOLD 300 2246#endif 2247 2248#ifndef MU_DIVAPPR_Q_THRESHOLD 2249#define MU_DIVAPPR_Q_THRESHOLD 2000 2250#endif 2251 2252#ifndef MU_DIV_QR_THRESHOLD 2253#define MU_DIV_QR_THRESHOLD 2000 2254#endif 2255 2256#ifndef MUPI_DIV_QR_THRESHOLD 2257#define MUPI_DIV_QR_THRESHOLD 200 2258#endif 2259 2260#ifndef MU_BDIV_Q_THRESHOLD 2261#define MU_BDIV_Q_THRESHOLD 2000 2262#endif 2263 2264#ifndef MU_BDIV_QR_THRESHOLD 2265#define MU_BDIV_QR_THRESHOLD 2000 2266#endif 2267 2268#ifndef MULMOD_BNM1_THRESHOLD 2269#define MULMOD_BNM1_THRESHOLD 16 2270#endif 2271 2272#ifndef SQRMOD_BNM1_THRESHOLD 2273#define SQRMOD_BNM1_THRESHOLD 16 2274#endif 2275 2276#ifndef MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD 2277#define MUL_TO_MULMOD_BNM1_FOR_2NXN_THRESHOLD (INV_MULMOD_BNM1_THRESHOLD/2) 2278#endif 2279 2280#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2 2281 2282#ifndef REDC_1_TO_REDC_2_THRESHOLD 2283#define REDC_1_TO_REDC_2_THRESHOLD 15 2284#endif 2285#ifndef REDC_2_TO_REDC_N_THRESHOLD 2286#define REDC_2_TO_REDC_N_THRESHOLD 100 2287#endif 2288 2289#else 2290 2291#ifndef REDC_1_TO_REDC_N_THRESHOLD 2292#define REDC_1_TO_REDC_N_THRESHOLD 100 2293#endif 2294 2295#endif /* HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2 */ 2296 2297 2298/* First k to use for an FFT modF multiply. A modF FFT is an order 2299 log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba, 2300 whereas k=4 is 1.33 which is faster than toom3 at 1.485. */ 2301#define FFT_FIRST_K 4 2302 2303/* Threshold at which FFT should be used to do a modF NxN -> N multiply. */ 2304#ifndef MUL_FFT_MODF_THRESHOLD 2305#define MUL_FFT_MODF_THRESHOLD (MUL_TOOM33_THRESHOLD * 3) 2306#endif 2307#ifndef SQR_FFT_MODF_THRESHOLD 2308#define SQR_FFT_MODF_THRESHOLD (SQR_TOOM3_THRESHOLD * 3) 2309#endif 2310 2311/* Threshold at which FFT should be used to do an NxN -> 2N multiply. This 2312 will be a size where FFT is using k=7 or k=8, since an FFT-k used for an 2313 NxN->2N multiply and not recursing into itself is an order 2314 log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which 2315 is the first better than toom3. */ 2316#ifndef MUL_FFT_THRESHOLD 2317#define MUL_FFT_THRESHOLD (MUL_FFT_MODF_THRESHOLD * 10) 2318#endif 2319#ifndef SQR_FFT_THRESHOLD 2320#define SQR_FFT_THRESHOLD (SQR_FFT_MODF_THRESHOLD * 10) 2321#endif 2322 2323/* Table of thresholds for successive modF FFT "k"s. The first entry is 2324 where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2, 2325 etc. See mpn_fft_best_k(). */ 2326#ifndef MUL_FFT_TABLE 2327#define MUL_FFT_TABLE \ 2328 { MUL_TOOM33_THRESHOLD * 4, /* k=5 */ \ 2329 MUL_TOOM33_THRESHOLD * 8, /* k=6 */ \ 2330 MUL_TOOM33_THRESHOLD * 16, /* k=7 */ \ 2331 MUL_TOOM33_THRESHOLD * 32, /* k=8 */ \ 2332 MUL_TOOM33_THRESHOLD * 96, /* k=9 */ \ 2333 MUL_TOOM33_THRESHOLD * 288, /* k=10 */ \ 2334 0 } 2335#endif 2336#ifndef SQR_FFT_TABLE 2337#define SQR_FFT_TABLE \ 2338 { SQR_TOOM3_THRESHOLD * 4, /* k=5 */ \ 2339 SQR_TOOM3_THRESHOLD * 8, /* k=6 */ \ 2340 SQR_TOOM3_THRESHOLD * 16, /* k=7 */ \ 2341 SQR_TOOM3_THRESHOLD * 32, /* k=8 */ \ 2342 SQR_TOOM3_THRESHOLD * 96, /* k=9 */ \ 2343 SQR_TOOM3_THRESHOLD * 288, /* k=10 */ \ 2344 0 } 2345#endif 2346 2347struct fft_table_nk 2348{ 2349 gmp_uint_least32_t n:27; 2350 gmp_uint_least32_t k:5; 2351}; 2352 2353#ifndef FFT_TABLE_ATTRS 2354#define FFT_TABLE_ATTRS static const 2355#endif 2356 2357#define MPN_FFT_TABLE_SIZE 16 2358 2359 2360#ifndef DC_DIV_QR_THRESHOLD 2361#define DC_DIV_QR_THRESHOLD (3 * MUL_TOOM22_THRESHOLD) 2362#endif 2363 2364#ifndef GET_STR_DC_THRESHOLD 2365#define GET_STR_DC_THRESHOLD 18 2366#endif 2367 2368#ifndef GET_STR_PRECOMPUTE_THRESHOLD 2369#define GET_STR_PRECOMPUTE_THRESHOLD 35 2370#endif 2371 2372#ifndef SET_STR_DC_THRESHOLD 2373#define SET_STR_DC_THRESHOLD 750 2374#endif 2375 2376#ifndef SET_STR_PRECOMPUTE_THRESHOLD 2377#define SET_STR_PRECOMPUTE_THRESHOLD 2000 2378#endif 2379 2380#ifndef FAC_ODD_THRESHOLD 2381#define FAC_ODD_THRESHOLD 35 2382#endif 2383 2384#ifndef FAC_DSC_THRESHOLD 2385#define FAC_DSC_THRESHOLD 400 2386#endif 2387 2388/* Return non-zero if xp,xsize and yp,ysize overlap. 2389 If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no 2390 overlap. If both these are false, there's an overlap. */ 2391#define MPN_OVERLAP_P(xp, xsize, yp, ysize) \ 2392 ((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp)) 2393#define MEM_OVERLAP_P(xp, xsize, yp, ysize) \ 2394 ( (char *) (xp) + (xsize) > (char *) (yp) \ 2395 && (char *) (yp) + (ysize) > (char *) (xp)) 2396 2397/* Return non-zero if xp,xsize and yp,ysize are either identical or not 2398 overlapping. Return zero if they're partially overlapping. */ 2399#define MPN_SAME_OR_SEPARATE_P(xp, yp, size) \ 2400 MPN_SAME_OR_SEPARATE2_P(xp, size, yp, size) 2401#define MPN_SAME_OR_SEPARATE2_P(xp, xsize, yp, ysize) \ 2402 ((xp) == (yp) || ! MPN_OVERLAP_P (xp, xsize, yp, ysize)) 2403 2404/* Return non-zero if dst,dsize and src,ssize are either identical or 2405 overlapping in a way suitable for an incrementing/decrementing algorithm. 2406 Return zero if they're partially overlapping in an unsuitable fashion. */ 2407#define MPN_SAME_OR_INCR2_P(dst, dsize, src, ssize) \ 2408 ((dst) <= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize)) 2409#define MPN_SAME_OR_INCR_P(dst, src, size) \ 2410 MPN_SAME_OR_INCR2_P(dst, size, src, size) 2411#define MPN_SAME_OR_DECR2_P(dst, dsize, src, ssize) \ 2412 ((dst) >= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize)) 2413#define MPN_SAME_OR_DECR_P(dst, src, size) \ 2414 MPN_SAME_OR_DECR2_P(dst, size, src, size) 2415 2416 2417/* ASSERT() is a private assertion checking scheme, similar to <assert.h>. 2418 ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS() 2419 does it always. Generally assertions are meant for development, but 2420 might help when looking for a problem later too. */ 2421 2422#ifdef __LINE__ 2423#define ASSERT_LINE __LINE__ 2424#else 2425#define ASSERT_LINE -1 2426#endif 2427 2428#ifdef __FILE__ 2429#define ASSERT_FILE __FILE__ 2430#else 2431#define ASSERT_FILE "" 2432#endif 2433 2434__GMP_DECLSPEC void __gmp_assert_header (const char *, int); 2435__GMP_DECLSPEC void __gmp_assert_fail (const char *, int, const char *) ATTRIBUTE_NORETURN; 2436 2437#define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr) 2438 2439#define ASSERT_ALWAYS(expr) \ 2440 do { \ 2441 if (UNLIKELY (!(expr))) \ 2442 ASSERT_FAIL (expr); \ 2443 } while (0) 2444 2445#if WANT_ASSERT 2446#define ASSERT(expr) ASSERT_ALWAYS (expr) 2447#else 2448#define ASSERT(expr) do {} while (0) 2449#endif 2450 2451 2452/* ASSERT_CARRY checks the expression is non-zero, and ASSERT_NOCARRY checks 2453 that it's zero. In both cases if assertion checking is disabled the 2454 expression is still evaluated. These macros are meant for use with 2455 routines like mpn_add_n() where the return value represents a carry or 2456 whatever that should or shouldn't occur in some context. For example, 2457 ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */ 2458#if WANT_ASSERT 2459#define ASSERT_CARRY(expr) ASSERT_ALWAYS ((expr) != 0) 2460#define ASSERT_NOCARRY(expr) ASSERT_ALWAYS ((expr) == 0) 2461#else 2462#define ASSERT_CARRY(expr) (expr) 2463#define ASSERT_NOCARRY(expr) (expr) 2464#endif 2465 2466 2467/* ASSERT_CODE includes code when assertion checking is wanted. This is the 2468 same as writing "#if WANT_ASSERT", but more compact. */ 2469#if WANT_ASSERT 2470#define ASSERT_CODE(expr) expr 2471#else 2472#define ASSERT_CODE(expr) 2473#endif 2474 2475 2476/* Test that an mpq_t is in fully canonical form. This can be used as 2477 protection on routines like mpq_equal which give wrong results on 2478 non-canonical inputs. */ 2479#if WANT_ASSERT 2480#define ASSERT_MPQ_CANONICAL(q) \ 2481 do { \ 2482 ASSERT (q->_mp_den._mp_size > 0); \ 2483 if (q->_mp_num._mp_size == 0) \ 2484 { \ 2485 /* zero should be 0/1 */ \ 2486 ASSERT (mpz_cmp_ui (mpq_denref(q), 1L) == 0); \ 2487 } \ 2488 else \ 2489 { \ 2490 /* no common factors */ \ 2491 mpz_t __g; \ 2492 mpz_init (__g); \ 2493 mpz_gcd (__g, mpq_numref(q), mpq_denref(q)); \ 2494 ASSERT (mpz_cmp_ui (__g, 1) == 0); \ 2495 mpz_clear (__g); \ 2496 } \ 2497 } while (0) 2498#else 2499#define ASSERT_MPQ_CANONICAL(q) do {} while (0) 2500#endif 2501 2502/* Check that the nail parts are zero. */ 2503#define ASSERT_ALWAYS_LIMB(limb) \ 2504 do { \ 2505 mp_limb_t __nail = (limb) & GMP_NAIL_MASK; \ 2506 ASSERT_ALWAYS (__nail == 0); \ 2507 } while (0) 2508#define ASSERT_ALWAYS_MPN(ptr, size) \ 2509 do { \ 2510 /* let whole loop go dead when no nails */ \ 2511 if (GMP_NAIL_BITS != 0) \ 2512 { \ 2513 mp_size_t __i; \ 2514 for (__i = 0; __i < (size); __i++) \ 2515 ASSERT_ALWAYS_LIMB ((ptr)[__i]); \ 2516 } \ 2517 } while (0) 2518#if WANT_ASSERT 2519#define ASSERT_LIMB(limb) ASSERT_ALWAYS_LIMB (limb) 2520#define ASSERT_MPN(ptr, size) ASSERT_ALWAYS_MPN (ptr, size) 2521#else 2522#define ASSERT_LIMB(limb) do {} while (0) 2523#define ASSERT_MPN(ptr, size) do {} while (0) 2524#endif 2525 2526 2527/* Assert that an mpn region {ptr,size} is zero, or non-zero. 2528 size==0 is allowed, and in that case {ptr,size} considered to be zero. */ 2529#if WANT_ASSERT 2530#define ASSERT_MPN_ZERO_P(ptr,size) \ 2531 do { \ 2532 mp_size_t __i; \ 2533 ASSERT ((size) >= 0); \ 2534 for (__i = 0; __i < (size); __i++) \ 2535 ASSERT ((ptr)[__i] == 0); \ 2536 } while (0) 2537#define ASSERT_MPN_NONZERO_P(ptr,size) \ 2538 do { \ 2539 mp_size_t __i; \ 2540 int __nonzero = 0; \ 2541 ASSERT ((size) >= 0); \ 2542 for (__i = 0; __i < (size); __i++) \ 2543 if ((ptr)[__i] != 0) \ 2544 { \ 2545 __nonzero = 1; \ 2546 break; \ 2547 } \ 2548 ASSERT (__nonzero); \ 2549 } while (0) 2550#else 2551#define ASSERT_MPN_ZERO_P(ptr,size) do {} while (0) 2552#define ASSERT_MPN_NONZERO_P(ptr,size) do {} while (0) 2553#endif 2554 2555 2556#if ! HAVE_NATIVE_mpn_com 2557#undef mpn_com 2558#define mpn_com(d,s,n) \ 2559 do { \ 2560 mp_ptr __d = (d); \ 2561 mp_srcptr __s = (s); \ 2562 mp_size_t __n = (n); \ 2563 ASSERT (__n >= 1); \ 2564 ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s, __n)); \ 2565 do \ 2566 *__d++ = (~ *__s++) & GMP_NUMB_MASK; \ 2567 while (--__n); \ 2568 } while (0) 2569#endif 2570 2571#define MPN_LOGOPS_N_INLINE(rp, up, vp, n, operation) \ 2572 do { \ 2573 mp_srcptr __up = (up); \ 2574 mp_srcptr __vp = (vp); \ 2575 mp_ptr __rp = (rp); \ 2576 mp_size_t __n = (n); \ 2577 mp_limb_t __a, __b; \ 2578 ASSERT (__n > 0); \ 2579 ASSERT (MPN_SAME_OR_SEPARATE_P (__rp, __up, __n)); \ 2580 ASSERT (MPN_SAME_OR_SEPARATE_P (__rp, __vp, __n)); \ 2581 __up += __n; \ 2582 __vp += __n; \ 2583 __rp += __n; \ 2584 __n = -__n; \ 2585 do { \ 2586 __a = __up[__n]; \ 2587 __b = __vp[__n]; \ 2588 __rp[__n] = operation; \ 2589 } while (++__n); \ 2590 } while (0) 2591 2592 2593#if ! HAVE_NATIVE_mpn_and_n 2594#undef mpn_and_n 2595#define mpn_and_n(rp, up, vp, n) \ 2596 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a & __b) 2597#endif 2598 2599#if ! HAVE_NATIVE_mpn_andn_n 2600#undef mpn_andn_n 2601#define mpn_andn_n(rp, up, vp, n) \ 2602 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a & ~__b) 2603#endif 2604 2605#if ! HAVE_NATIVE_mpn_nand_n 2606#undef mpn_nand_n 2607#define mpn_nand_n(rp, up, vp, n) \ 2608 MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a & __b) & GMP_NUMB_MASK) 2609#endif 2610 2611#if ! HAVE_NATIVE_mpn_ior_n 2612#undef mpn_ior_n 2613#define mpn_ior_n(rp, up, vp, n) \ 2614 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a | __b) 2615#endif 2616 2617#if ! HAVE_NATIVE_mpn_iorn_n 2618#undef mpn_iorn_n 2619#define mpn_iorn_n(rp, up, vp, n) \ 2620 MPN_LOGOPS_N_INLINE (rp, up, vp, n, (__a | ~__b) & GMP_NUMB_MASK) 2621#endif 2622 2623#if ! HAVE_NATIVE_mpn_nior_n 2624#undef mpn_nior_n 2625#define mpn_nior_n(rp, up, vp, n) \ 2626 MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a | __b) & GMP_NUMB_MASK) 2627#endif 2628 2629#if ! HAVE_NATIVE_mpn_xor_n 2630#undef mpn_xor_n 2631#define mpn_xor_n(rp, up, vp, n) \ 2632 MPN_LOGOPS_N_INLINE (rp, up, vp, n, __a ^ __b) 2633#endif 2634 2635#if ! HAVE_NATIVE_mpn_xnor_n 2636#undef mpn_xnor_n 2637#define mpn_xnor_n(rp, up, vp, n) \ 2638 MPN_LOGOPS_N_INLINE (rp, up, vp, n, ~(__a ^ __b) & GMP_NUMB_MASK) 2639#endif 2640 2641#define mpn_trialdiv __MPN(trialdiv) 2642__GMP_DECLSPEC mp_limb_t mpn_trialdiv (mp_srcptr, mp_size_t, mp_size_t, int *); 2643 2644#define mpn_remove __MPN(remove) 2645__GMP_DECLSPEC mp_bitcnt_t mpn_remove (mp_ptr, mp_size_t *, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_bitcnt_t); 2646 2647 2648/* ADDC_LIMB sets w=x+y and cout to 0 or 1 for a carry from that addition. */ 2649#if GMP_NAIL_BITS == 0 2650#define ADDC_LIMB(cout, w, x, y) \ 2651 do { \ 2652 mp_limb_t __x = (x); \ 2653 mp_limb_t __y = (y); \ 2654 mp_limb_t __w = __x + __y; \ 2655 (w) = __w; \ 2656 (cout) = __w < __x; \ 2657 } while (0) 2658#else 2659#define ADDC_LIMB(cout, w, x, y) \ 2660 do { \ 2661 mp_limb_t __w; \ 2662 ASSERT_LIMB (x); \ 2663 ASSERT_LIMB (y); \ 2664 __w = (x) + (y); \ 2665 (w) = __w & GMP_NUMB_MASK; \ 2666 (cout) = __w >> GMP_NUMB_BITS; \ 2667 } while (0) 2668#endif 2669 2670/* SUBC_LIMB sets w=x-y and cout to 0 or 1 for a borrow from that 2671 subtract. */ 2672#if GMP_NAIL_BITS == 0 2673#define SUBC_LIMB(cout, w, x, y) \ 2674 do { \ 2675 mp_limb_t __x = (x); \ 2676 mp_limb_t __y = (y); \ 2677 mp_limb_t __w = __x - __y; \ 2678 (w) = __w; \ 2679 (cout) = __w > __x; \ 2680 } while (0) 2681#else 2682#define SUBC_LIMB(cout, w, x, y) \ 2683 do { \ 2684 mp_limb_t __w = (x) - (y); \ 2685 (w) = __w & GMP_NUMB_MASK; \ 2686 (cout) = __w >> (GMP_LIMB_BITS-1); \ 2687 } while (0) 2688#endif 2689 2690 2691/* MPN_INCR_U does {ptr,size} += n, MPN_DECR_U does {ptr,size} -= n, both 2692 expecting no carry (or borrow) from that. 2693 2694 The size parameter is only for the benefit of assertion checking. In a 2695 normal build it's unused and the carry/borrow is just propagated as far 2696 as it needs to go. 2697 2698 On random data, usually only one or two limbs of {ptr,size} get updated, 2699 so there's no need for any sophisticated looping, just something compact 2700 and sensible. 2701 2702 FIXME: Switch all code from mpn_{incr,decr}_u to MPN_{INCR,DECR}_U, 2703 declaring their operand sizes, then remove the former. This is purely 2704 for the benefit of assertion checking. */ 2705 2706#if defined (__GNUC__) && GMP_NAIL_BITS == 0 && ! defined (NO_ASM) \ 2707 && (defined(HAVE_HOST_CPU_FAMILY_x86) || defined(HAVE_HOST_CPU_FAMILY_x86_64)) \ 2708 && ! WANT_ASSERT 2709/* Better flags handling than the generic C gives on i386, saving a few 2710 bytes of code and maybe a cycle or two. */ 2711 2712#define MPN_IORD_U(ptr, incr, aors) \ 2713 do { \ 2714 mp_ptr __ptr_dummy; \ 2715 if (__builtin_constant_p (incr) && (incr) == 0) \ 2716 { \ 2717 } \ 2718 else if (__builtin_constant_p (incr) && (incr) == 1) \ 2719 { \ 2720 __asm__ __volatile__ \ 2721 ("\n" ASM_L(top) ":\n" \ 2722 "\t" aors "\t$1, (%0)\n" \ 2723 "\tlea\t%c2(%0), %0\n" \ 2724 "\tjc\t" ASM_L(top) \ 2725 : "=r" (__ptr_dummy) \ 2726 : "0" (ptr), "n" (sizeof(mp_limb_t)) \ 2727 : "memory"); \ 2728 } \ 2729 else \ 2730 { \ 2731 __asm__ __volatile__ \ 2732 ( aors "\t%2, (%0)\n" \ 2733 "\tjnc\t" ASM_L(done) "\n" \ 2734 ASM_L(top) ":\n" \ 2735 "\t" aors "\t$1, %c3(%0)\n" \ 2736 "\tlea\t%c3(%0), %0\n" \ 2737 "\tjc\t" ASM_L(top) "\n" \ 2738 ASM_L(done) ":\n" \ 2739 : "=r" (__ptr_dummy) \ 2740 : "0" (ptr), \ 2741 "re" ((mp_limb_t) (incr)), "n" (sizeof(mp_limb_t)) \ 2742 : "memory"); \ 2743 } \ 2744 } while (0) 2745 2746#if GMP_LIMB_BITS == 32 2747#define MPN_INCR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "addl") 2748#define MPN_DECR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "subl") 2749#endif 2750#if GMP_LIMB_BITS == 64 2751#define MPN_INCR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "addq") 2752#define MPN_DECR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "subq") 2753#endif 2754#define mpn_incr_u(ptr, incr) MPN_INCR_U (ptr, 0, incr) 2755#define mpn_decr_u(ptr, incr) MPN_DECR_U (ptr, 0, incr) 2756#endif 2757 2758#if GMP_NAIL_BITS == 0 2759#ifndef mpn_incr_u 2760#define mpn_incr_u(p,incr) \ 2761 do { \ 2762 mp_limb_t __x; \ 2763 mp_ptr __p = (p); \ 2764 if (__builtin_constant_p (incr) && (incr) == 1) \ 2765 { \ 2766 while (++(*(__p++)) == 0) \ 2767 ; \ 2768 } \ 2769 else \ 2770 { \ 2771 __x = *__p + (incr); \ 2772 *__p = __x; \ 2773 if (__x < (incr)) \ 2774 while (++(*(++__p)) == 0) \ 2775 ; \ 2776 } \ 2777 } while (0) 2778#endif 2779#ifndef mpn_decr_u 2780#define mpn_decr_u(p,incr) \ 2781 do { \ 2782 mp_limb_t __x; \ 2783 mp_ptr __p = (p); \ 2784 if (__builtin_constant_p (incr) && (incr) == 1) \ 2785 { \ 2786 while ((*(__p++))-- == 0) \ 2787 ; \ 2788 } \ 2789 else \ 2790 { \ 2791 __x = *__p; \ 2792 *__p = __x - (incr); \ 2793 if (__x < (incr)) \ 2794 while ((*(++__p))-- == 0) \ 2795 ; \ 2796 } \ 2797 } while (0) 2798#endif 2799#endif 2800 2801#if GMP_NAIL_BITS >= 1 2802#ifndef mpn_incr_u 2803#define mpn_incr_u(p,incr) \ 2804 do { \ 2805 mp_limb_t __x; \ 2806 mp_ptr __p = (p); \ 2807 if (__builtin_constant_p (incr) && (incr) == 1) \ 2808 { \ 2809 do \ 2810 { \ 2811 __x = (*__p + 1) & GMP_NUMB_MASK; \ 2812 *__p++ = __x; \ 2813 } \ 2814 while (__x == 0); \ 2815 } \ 2816 else \ 2817 { \ 2818 __x = (*__p + (incr)); \ 2819 *__p++ = __x & GMP_NUMB_MASK; \ 2820 if (__x >> GMP_NUMB_BITS != 0) \ 2821 { \ 2822 do \ 2823 { \ 2824 __x = (*__p + 1) & GMP_NUMB_MASK; \ 2825 *__p++ = __x; \ 2826 } \ 2827 while (__x == 0); \ 2828 } \ 2829 } \ 2830 } while (0) 2831#endif 2832#ifndef mpn_decr_u 2833#define mpn_decr_u(p,incr) \ 2834 do { \ 2835 mp_limb_t __x; \ 2836 mp_ptr __p = (p); \ 2837 if (__builtin_constant_p (incr) && (incr) == 1) \ 2838 { \ 2839 do \ 2840 { \ 2841 __x = *__p; \ 2842 *__p++ = (__x - 1) & GMP_NUMB_MASK; \ 2843 } \ 2844 while (__x == 0); \ 2845 } \ 2846 else \ 2847 { \ 2848 __x = *__p - (incr); \ 2849 *__p++ = __x & GMP_NUMB_MASK; \ 2850 if (__x >> GMP_NUMB_BITS != 0) \ 2851 { \ 2852 do \ 2853 { \ 2854 __x = *__p; \ 2855 *__p++ = (__x - 1) & GMP_NUMB_MASK; \ 2856 } \ 2857 while (__x == 0); \ 2858 } \ 2859 } \ 2860 } while (0) 2861#endif 2862#endif 2863 2864#ifndef MPN_INCR_U 2865#if WANT_ASSERT 2866#define MPN_INCR_U(ptr, size, n) \ 2867 do { \ 2868 ASSERT ((size) >= 1); \ 2869 ASSERT_NOCARRY (mpn_add_1 (ptr, ptr, size, n)); \ 2870 } while (0) 2871#else 2872#define MPN_INCR_U(ptr, size, n) mpn_incr_u (ptr, n) 2873#endif 2874#endif 2875 2876#ifndef MPN_DECR_U 2877#if WANT_ASSERT 2878#define MPN_DECR_U(ptr, size, n) \ 2879 do { \ 2880 ASSERT ((size) >= 1); \ 2881 ASSERT_NOCARRY (mpn_sub_1 (ptr, ptr, size, n)); \ 2882 } while (0) 2883#else 2884#define MPN_DECR_U(ptr, size, n) mpn_decr_u (ptr, n) 2885#endif 2886#endif 2887 2888 2889/* Structure for conversion between internal binary format and strings. */ 2890struct bases 2891{ 2892 /* Number of digits in the conversion base that always fits in an mp_limb_t. 2893 For example, for base 10 on a machine where an mp_limb_t has 32 bits this 2894 is 9, since 10**9 is the largest number that fits into an mp_limb_t. */ 2895 int chars_per_limb; 2896 2897 /* log(2)/log(conversion_base) */ 2898 mp_limb_t logb2; 2899 2900 /* log(conversion_base)/log(2) */ 2901 mp_limb_t log2b; 2902 2903 /* base**chars_per_limb, i.e. the biggest number that fits a word, built by 2904 factors of base. Exception: For 2, 4, 8, etc, big_base is log2(base), 2905 i.e. the number of bits used to represent each digit in the base. */ 2906 mp_limb_t big_base; 2907 2908 /* A GMP_LIMB_BITS bit approximation to 1/big_base, represented as a 2909 fixed-point number. Instead of dividing by big_base an application can 2910 choose to multiply by big_base_inverted. */ 2911 mp_limb_t big_base_inverted; 2912}; 2913 2914#define mp_bases __MPN(bases) 2915__GMP_DECLSPEC extern const struct bases mp_bases[257]; 2916 2917 2918/* Compute the number of digits in base for nbits bits, making sure the result 2919 is never too small. The two variants of the macro implement the same 2920 function; the GT2 variant below works just for bases > 2. */ 2921#define DIGITS_IN_BASE_FROM_BITS(res, nbits, b) \ 2922 do { \ 2923 mp_limb_t _ph, _dummy; \ 2924 size_t _nbits = (nbits); \ 2925 umul_ppmm (_ph, _dummy, mp_bases[b].logb2, _nbits); \ 2926 _ph += (_dummy + _nbits < _dummy); \ 2927 res = _ph + 1; \ 2928 } while (0) 2929#define DIGITS_IN_BASEGT2_FROM_BITS(res, nbits, b) \ 2930 do { \ 2931 mp_limb_t _ph, _dummy; \ 2932 size_t _nbits = (nbits); \ 2933 umul_ppmm (_ph, _dummy, mp_bases[b].logb2 + 1, _nbits); \ 2934 res = _ph + 1; \ 2935 } while (0) 2936 2937/* For power of 2 bases this is exact. For other bases the result is either 2938 exact or one too big. 2939 2940 To be exact always it'd be necessary to examine all the limbs of the 2941 operand, since numbers like 100..000 and 99...999 generally differ only 2942 in the lowest limb. It'd be possible to examine just a couple of high 2943 limbs to increase the probability of being exact, but that doesn't seem 2944 worth bothering with. */ 2945 2946#define MPN_SIZEINBASE(result, ptr, size, base) \ 2947 do { \ 2948 int __lb_base, __cnt; \ 2949 size_t __totbits; \ 2950 \ 2951 ASSERT ((size) >= 0); \ 2952 ASSERT ((base) >= 2); \ 2953 ASSERT ((base) < numberof (mp_bases)); \ 2954 \ 2955 /* Special case for X == 0. */ \ 2956 if ((size) == 0) \ 2957 (result) = 1; \ 2958 else \ 2959 { \ 2960 /* Calculate the total number of significant bits of X. */ \ 2961 count_leading_zeros (__cnt, (ptr)[(size)-1]); \ 2962 __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\ 2963 \ 2964 if (POW2_P (base)) \ 2965 { \ 2966 __lb_base = mp_bases[base].big_base; \ 2967 (result) = (__totbits + __lb_base - 1) / __lb_base; \ 2968 } \ 2969 else \ 2970 { \ 2971 DIGITS_IN_BASEGT2_FROM_BITS (result, __totbits, base); \ 2972 } \ 2973 } \ 2974 } while (0) 2975 2976#define MPN_SIZEINBASE_2EXP(result, ptr, size, base2exp) \ 2977 do { \ 2978 int __cnt; \ 2979 mp_bitcnt_t __totbits; \ 2980 ASSERT ((size) > 0); \ 2981 ASSERT ((ptr)[(size)-1] != 0); \ 2982 count_leading_zeros (__cnt, (ptr)[(size)-1]); \ 2983 __totbits = (mp_bitcnt_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS); \ 2984 (result) = (__totbits + (base2exp)-1) / (base2exp); \ 2985 } while (0) 2986 2987 2988/* bit count to limb count, rounding up */ 2989#define BITS_TO_LIMBS(n) (((n) + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS) 2990 2991/* MPN_SET_UI sets an mpn (ptr, cnt) to given ui. MPZ_FAKE_UI creates fake 2992 mpz_t from ui. The zp argument must have room for LIMBS_PER_ULONG limbs 2993 in both cases (LIMBS_PER_ULONG is also defined here.) */ 2994#if BITS_PER_ULONG <= GMP_NUMB_BITS /* need one limb per ulong */ 2995 2996#define LIMBS_PER_ULONG 1 2997#define MPN_SET_UI(zp, zn, u) \ 2998 (zp)[0] = (u); \ 2999 (zn) = ((zp)[0] != 0); 3000#define MPZ_FAKE_UI(z, zp, u) \ 3001 (zp)[0] = (u); \ 3002 PTR (z) = (zp); \ 3003 SIZ (z) = ((zp)[0] != 0); \ 3004 ASSERT_CODE (ALLOC (z) = 1); 3005 3006#else /* need two limbs per ulong */ 3007 3008#define LIMBS_PER_ULONG 2 3009#define MPN_SET_UI(zp, zn, u) \ 3010 (zp)[0] = (u) & GMP_NUMB_MASK; \ 3011 (zp)[1] = (u) >> GMP_NUMB_BITS; \ 3012 (zn) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0); 3013#define MPZ_FAKE_UI(z, zp, u) \ 3014 (zp)[0] = (u) & GMP_NUMB_MASK; \ 3015 (zp)[1] = (u) >> GMP_NUMB_BITS; \ 3016 SIZ (z) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0); \ 3017 PTR (z) = (zp); \ 3018 ASSERT_CODE (ALLOC (z) = 2); 3019 3020#endif 3021 3022 3023#if HAVE_HOST_CPU_FAMILY_x86 3024#define TARGET_REGISTER_STARVED 1 3025#else 3026#define TARGET_REGISTER_STARVED 0 3027#endif 3028 3029 3030/* LIMB_HIGHBIT_TO_MASK(n) examines the high bit of a limb value and turns 1 3031 or 0 there into a limb 0xFF..FF or 0 respectively. 3032 3033 On most CPUs this is just an arithmetic right shift by GMP_LIMB_BITS-1, 3034 but C99 doesn't guarantee signed right shifts are arithmetic, so we have 3035 a little compile-time test and a fallback to a "? :" form. The latter is 3036 necessary for instance on Cray vector systems. 3037 3038 Recent versions of gcc (eg. 3.3) will in fact optimize a "? :" like this 3039 to an arithmetic right shift anyway, but it's good to get the desired 3040 shift on past versions too (in particular since an important use of 3041 LIMB_HIGHBIT_TO_MASK is in udiv_qrnnd_preinv). */ 3042 3043#define LIMB_HIGHBIT_TO_MASK(n) \ 3044 (((mp_limb_signed_t) -1 >> 1) < 0 \ 3045 ? (mp_limb_signed_t) (n) >> (GMP_LIMB_BITS - 1) \ 3046 : (n) & GMP_LIMB_HIGHBIT ? MP_LIMB_T_MAX : CNST_LIMB(0)) 3047 3048 3049/* Use a library function for invert_limb, if available. */ 3050#define mpn_invert_limb __MPN(invert_limb) 3051__GMP_DECLSPEC mp_limb_t mpn_invert_limb (mp_limb_t) ATTRIBUTE_CONST; 3052#if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb 3053#define invert_limb(invxl,xl) \ 3054 do { \ 3055 (invxl) = mpn_invert_limb (xl); \ 3056 } while (0) 3057#endif 3058 3059#ifndef invert_limb 3060#define invert_limb(invxl,xl) \ 3061 do { \ 3062 mp_limb_t _dummy; \ 3063 ASSERT ((xl) != 0); \ 3064 udiv_qrnnd (invxl, _dummy, ~(xl), ~CNST_LIMB(0), xl); \ 3065 } while (0) 3066#endif 3067 3068#define invert_pi1(dinv, d1, d0) \ 3069 do { \ 3070 mp_limb_t _v, _p, _t1, _t0, _mask; \ 3071 invert_limb (_v, d1); \ 3072 _p = (d1) * _v; \ 3073 _p += (d0); \ 3074 if (_p < (d0)) \ 3075 { \ 3076 _v--; \ 3077 _mask = -(mp_limb_t) (_p >= (d1)); \ 3078 _p -= (d1); \ 3079 _v += _mask; \ 3080 _p -= _mask & (d1); \ 3081 } \ 3082 umul_ppmm (_t1, _t0, d0, _v); \ 3083 _p += _t1; \ 3084 if (_p < _t1) \ 3085 { \ 3086 _v--; \ 3087 if (UNLIKELY (_p >= (d1))) \ 3088 { \ 3089 if (_p > (d1) || _t0 >= (d0)) \ 3090 _v--; \ 3091 } \ 3092 } \ 3093 (dinv).inv32 = _v; \ 3094 } while (0) 3095 3096 3097/* udiv_qrnnd_preinv -- Based on work by Niels M��ller and Torbj��rn Granlund. 3098 We write things strangely below, to help gcc. A more straightforward 3099 version: 3100 _r = (nl) - _qh * (d); 3101 _t = _r + (d); 3102 if (_r >= _ql) 3103 { 3104 _qh--; 3105 _r = _t; 3106 } 3107 For one operation shorter critical path, one may want to use this form: 3108 _p = _qh * (d) 3109 _s = (nl) + (d); 3110 _r = (nl) - _p; 3111 _t = _s - _p; 3112 if (_r >= _ql) 3113 { 3114 _qh--; 3115 _r = _t; 3116 } 3117*/ 3118#define udiv_qrnnd_preinv(q, r, nh, nl, d, di) \ 3119 do { \ 3120 mp_limb_t _qh, _ql, _r, _mask; \ 3121 umul_ppmm (_qh, _ql, (nh), (di)); \ 3122 if (__builtin_constant_p (nl) && (nl) == 0) \ 3123 { \ 3124 _qh += (nh) + 1; \ 3125 _r = - _qh * (d); \ 3126 _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \ 3127 _qh += _mask; \ 3128 _r += _mask & (d); \ 3129 } \ 3130 else \ 3131 { \ 3132 add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \ 3133 _r = (nl) - _qh * (d); \ 3134 _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \ 3135 _qh += _mask; \ 3136 _r += _mask & (d); \ 3137 if (UNLIKELY (_r >= (d))) \ 3138 { \ 3139 _r -= (d); \ 3140 _qh++; \ 3141 } \ 3142 } \ 3143 (r) = _r; \ 3144 (q) = _qh; \ 3145 } while (0) 3146 3147/* Dividing (NH, NL) by D, returning the remainder only. Unlike 3148 udiv_qrnnd_preinv, works also for the case NH == D, where the 3149 quotient doesn't quite fit in a single limb. */ 3150#define udiv_rnnd_preinv(r, nh, nl, d, di) \ 3151 do { \ 3152 mp_limb_t _qh, _ql, _r, _mask; \ 3153 umul_ppmm (_qh, _ql, (nh), (di)); \ 3154 if (__builtin_constant_p (nl) && (nl) == 0) \ 3155 { \ 3156 _r = ~(_qh + (nh)) * (d); \ 3157 _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \ 3158 _r += _mask & (d); \ 3159 } \ 3160 else \ 3161 { \ 3162 add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \ 3163 _r = (nl) - _qh * (d); \ 3164 _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \ 3165 _r += _mask & (d); \ 3166 if (UNLIKELY (_r >= (d))) \ 3167 _r -= (d); \ 3168 } \ 3169 (r) = _r; \ 3170 } while (0) 3171 3172/* Compute quotient the quotient and remainder for n / d. Requires d 3173 >= B^2 / 2 and n < d B. di is the inverse 3174 3175 floor ((B^3 - 1) / (d0 + d1 B)) - B. 3176 3177 NOTE: Output variables are updated multiple times. Only some inputs 3178 and outputs may overlap. 3179*/ 3180#define udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv) \ 3181 do { \ 3182 mp_limb_t _q0, _t1, _t0, _mask; \ 3183 umul_ppmm ((q), _q0, (n2), (dinv)); \ 3184 add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1)); \ 3185 \ 3186 /* Compute the two most significant limbs of n - q'd */ \ 3187 (r1) = (n1) - (d1) * (q); \ 3188 sub_ddmmss ((r1), (r0), (r1), (n0), (d1), (d0)); \ 3189 umul_ppmm (_t1, _t0, (d0), (q)); \ 3190 sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0); \ 3191 (q)++; \ 3192 \ 3193 /* Conditionally adjust q and the remainders */ \ 3194 _mask = - (mp_limb_t) ((r1) >= _q0); \ 3195 (q) += _mask; \ 3196 add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0)); \ 3197 if (UNLIKELY ((r1) >= (d1))) \ 3198 { \ 3199 if ((r1) > (d1) || (r0) >= (d0)) \ 3200 { \ 3201 (q)++; \ 3202 sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0)); \ 3203 } \ 3204 } \ 3205 } while (0) 3206 3207#ifndef mpn_preinv_divrem_1 /* if not done with cpuvec in a fat binary */ 3208#define mpn_preinv_divrem_1 __MPN(preinv_divrem_1) 3209__GMP_DECLSPEC mp_limb_t mpn_preinv_divrem_1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int); 3210#endif 3211 3212 3213/* USE_PREINV_DIVREM_1 is whether to use mpn_preinv_divrem_1, as opposed to the 3214 plain mpn_divrem_1. The default is yes, since the few CISC chips where 3215 preinv is not good have defines saying so. */ 3216#ifndef USE_PREINV_DIVREM_1 3217#define USE_PREINV_DIVREM_1 1 3218#endif 3219 3220#if USE_PREINV_DIVREM_1 3221#define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) \ 3222 mpn_preinv_divrem_1 (qp, xsize, ap, size, d, dinv, shift) 3223#else 3224#define MPN_DIVREM_OR_PREINV_DIVREM_1(qp,xsize,ap,size,d,dinv,shift) \ 3225 mpn_divrem_1 (qp, xsize, ap, size, d) 3226#endif 3227 3228#ifndef PREINV_MOD_1_TO_MOD_1_THRESHOLD 3229#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10 3230#endif 3231 3232/* This selection may seem backwards. The reason mpn_mod_1 typically takes 3233 over for larger sizes is that it uses the mod_1_1 function. */ 3234#define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse) \ 3235 (BELOW_THRESHOLD (size, PREINV_MOD_1_TO_MOD_1_THRESHOLD) \ 3236 ? mpn_preinv_mod_1 (src, size, divisor, inverse) \ 3237 : mpn_mod_1 (src, size, divisor)) 3238 3239 3240#ifndef mpn_mod_34lsub1 /* if not done with cpuvec in a fat binary */ 3241#define mpn_mod_34lsub1 __MPN(mod_34lsub1) 3242__GMP_DECLSPEC mp_limb_t mpn_mod_34lsub1 (mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE; 3243#endif 3244 3245 3246/* DIVEXACT_1_THRESHOLD is at what size to use mpn_divexact_1, as opposed to 3247 plain mpn_divrem_1. Likewise BMOD_1_TO_MOD_1_THRESHOLD for 3248 mpn_modexact_1_odd against plain mpn_mod_1. On most CPUs divexact and 3249 modexact are faster at all sizes, so the defaults are 0. Those CPUs 3250 where this is not right have a tuned threshold. */ 3251#ifndef DIVEXACT_1_THRESHOLD 3252#define DIVEXACT_1_THRESHOLD 0 3253#endif 3254#ifndef BMOD_1_TO_MOD_1_THRESHOLD 3255#define BMOD_1_TO_MOD_1_THRESHOLD 10 3256#endif 3257 3258#define MPN_DIVREM_OR_DIVEXACT_1(rp, up, n, d) \ 3259 do { \ 3260 if (BELOW_THRESHOLD (n, DIVEXACT_1_THRESHOLD)) \ 3261 ASSERT_NOCARRY (mpn_divrem_1 (rp, (mp_size_t) 0, up, n, d)); \ 3262 else \ 3263 { \ 3264 ASSERT (mpn_mod_1 (up, n, d) == 0); \ 3265 mpn_divexact_1 (rp, up, n, d); \ 3266 } \ 3267 } while (0) 3268 3269#ifndef mpn_modexact_1c_odd /* if not done with cpuvec in a fat binary */ 3270#define mpn_modexact_1c_odd __MPN(modexact_1c_odd) 3271__GMP_DECLSPEC mp_limb_t mpn_modexact_1c_odd (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) __GMP_ATTRIBUTE_PURE; 3272#endif 3273 3274#if HAVE_NATIVE_mpn_modexact_1_odd 3275#define mpn_modexact_1_odd __MPN(modexact_1_odd) 3276__GMP_DECLSPEC mp_limb_t mpn_modexact_1_odd (mp_srcptr, mp_size_t, mp_limb_t) __GMP_ATTRIBUTE_PURE; 3277#else 3278#define mpn_modexact_1_odd(src,size,divisor) \ 3279 mpn_modexact_1c_odd (src, size, divisor, CNST_LIMB(0)) 3280#endif 3281 3282#define MPN_MOD_OR_MODEXACT_1_ODD(src,size,divisor) \ 3283 (BELOW_THRESHOLD (size, BMOD_1_TO_MOD_1_THRESHOLD) \ 3284 ? mpn_modexact_1_odd (src, size, divisor) \ 3285 : mpn_mod_1 (src, size, divisor)) 3286 3287/* binvert_limb() sets inv to the multiplicative inverse of n modulo 3288 2^GMP_NUMB_BITS, ie. satisfying inv*n == 1 mod 2^GMP_NUMB_BITS. 3289 n must be odd (otherwise such an inverse doesn't exist). 3290 3291 This is not to be confused with invert_limb(), which is completely 3292 different. 3293 3294 The table lookup gives an inverse with the low 8 bits valid, and each 3295 multiply step doubles the number of bits. See Jebelean "An algorithm for 3296 exact division" end of section 4 (reference in gmp.texi). 3297 3298 Possible enhancement: Could use UHWtype until the last step, if half-size 3299 multiplies are faster (might help under _LONG_LONG_LIMB). 3300 3301 Alternative: As noted in Granlund and Montgomery "Division by Invariant 3302 Integers using Multiplication" (reference in gmp.texi), n itself gives a 3303 3-bit inverse immediately, and could be used instead of a table lookup. 3304 A 4-bit inverse can be obtained effectively from xoring bits 1 and 2 into 3305 bit 3, for instance with (((n + 2) & 4) << 1) ^ n. */ 3306 3307#define binvert_limb_table __gmp_binvert_limb_table 3308__GMP_DECLSPEC extern const unsigned char binvert_limb_table[128]; 3309 3310#define binvert_limb(inv,n) \ 3311 do { \ 3312 mp_limb_t __n = (n); \ 3313 mp_limb_t __inv; \ 3314 ASSERT ((__n & 1) == 1); \ 3315 \ 3316 __inv = binvert_limb_table[(__n/2) & 0x7F]; /* 8 */ \ 3317 if (GMP_NUMB_BITS > 8) __inv = 2 * __inv - __inv * __inv * __n; \ 3318 if (GMP_NUMB_BITS > 16) __inv = 2 * __inv - __inv * __inv * __n; \ 3319 if (GMP_NUMB_BITS > 32) __inv = 2 * __inv - __inv * __inv * __n; \ 3320 \ 3321 if (GMP_NUMB_BITS > 64) \ 3322 { \ 3323 int __invbits = 64; \ 3324 do { \ 3325 __inv = 2 * __inv - __inv * __inv * __n; \ 3326 __invbits *= 2; \ 3327 } while (__invbits < GMP_NUMB_BITS); \ 3328 } \ 3329 \ 3330 ASSERT ((__inv * __n & GMP_NUMB_MASK) == 1); \ 3331 (inv) = __inv & GMP_NUMB_MASK; \ 3332 } while (0) 3333#define modlimb_invert binvert_limb /* backward compatibility */ 3334 3335/* Multiplicative inverse of 3, modulo 2^GMP_NUMB_BITS. 3336 Eg. 0xAAAAAAAB for 32 bits, 0xAAAAAAAAAAAAAAAB for 64 bits. 3337 GMP_NUMB_MAX/3*2+1 is right when GMP_NUMB_BITS is even, but when it's odd 3338 we need to start from GMP_NUMB_MAX>>1. */ 3339#define MODLIMB_INVERSE_3 (((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 2)) / 3) * 2 + 1) 3340 3341/* ceil(GMP_NUMB_MAX/3) and ceil(2*GMP_NUMB_MAX/3). 3342 These expressions work because GMP_NUMB_MAX%3 != 0 for all GMP_NUMB_BITS. */ 3343#define GMP_NUMB_CEIL_MAX_DIV3 (GMP_NUMB_MAX / 3 + 1) 3344#define GMP_NUMB_CEIL_2MAX_DIV3 ((GMP_NUMB_MAX>>1) / 3 + 1 + GMP_NUMB_HIGHBIT) 3345 3346 3347/* Set r to -a mod d. a>=d is allowed. Can give r>d. All should be limbs. 3348 3349 It's not clear whether this is the best way to do this calculation. 3350 Anything congruent to -a would be fine for the one limb congruence 3351 tests. */ 3352 3353#define NEG_MOD(r, a, d) \ 3354 do { \ 3355 ASSERT ((d) != 0); \ 3356 ASSERT_LIMB (a); \ 3357 ASSERT_LIMB (d); \ 3358 \ 3359 if ((a) <= (d)) \ 3360 { \ 3361 /* small a is reasonably likely */ \ 3362 (r) = (d) - (a); \ 3363 } \ 3364 else \ 3365 { \ 3366 unsigned __twos; \ 3367 mp_limb_t __dnorm; \ 3368 count_leading_zeros (__twos, d); \ 3369 __twos -= GMP_NAIL_BITS; \ 3370 __dnorm = (d) << __twos; \ 3371 (r) = ((a) <= __dnorm ? __dnorm : 2*__dnorm) - (a); \ 3372 } \ 3373 \ 3374 ASSERT_LIMB (r); \ 3375 } while (0) 3376 3377/* A bit mask of all the least significant zero bits of n, or -1 if n==0. */ 3378#define LOW_ZEROS_MASK(n) (((n) & -(n)) - 1) 3379 3380 3381/* ULONG_PARITY sets "p" to 1 if there's an odd number of 1 bits in "n", or 3382 to 0 if there's an even number. "n" should be an unsigned long and "p" 3383 an int. */ 3384 3385#if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX 3386#define ULONG_PARITY(p, n) \ 3387 do { \ 3388 int __p; \ 3389 __asm__ ("ctpop %1, %0" : "=r" (__p) : "r" (n)); \ 3390 (p) = __p & 1; \ 3391 } while (0) 3392#endif 3393 3394/* Cray intrinsic _popcnt. */ 3395#ifdef _CRAY 3396#define ULONG_PARITY(p, n) \ 3397 do { \ 3398 (p) = _popcnt (n) & 1; \ 3399 } while (0) 3400#endif 3401 3402#if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \ 3403 && ! defined (NO_ASM) && defined (__ia64) 3404/* unsigned long is either 32 or 64 bits depending on the ABI, zero extend 3405 to a 64 bit unsigned long long for popcnt */ 3406#define ULONG_PARITY(p, n) \ 3407 do { \ 3408 unsigned long long __n = (unsigned long) (n); \ 3409 int __p; \ 3410 __asm__ ("popcnt %0 = %1" : "=r" (__p) : "r" (__n)); \ 3411 (p) = __p & 1; \ 3412 } while (0) 3413#endif 3414 3415#if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \ 3416 && ! defined (NO_ASM) && HAVE_HOST_CPU_FAMILY_x86 3417#if __GMP_GNUC_PREREQ (3,1) 3418#define __GMP_qm "=Qm" 3419#define __GMP_q "=Q" 3420#else 3421#define __GMP_qm "=qm" 3422#define __GMP_q "=q" 3423#endif 3424#define ULONG_PARITY(p, n) \ 3425 do { \ 3426 char __p; \ 3427 unsigned long __n = (n); \ 3428 __n ^= (__n >> 16); \ 3429 __asm__ ("xorb %h1, %b1\n\t" \ 3430 "setpo %0" \ 3431 : __GMP_qm (__p), __GMP_q (__n) \ 3432 : "1" (__n)); \ 3433 (p) = __p; \ 3434 } while (0) 3435#endif 3436 3437#if ! defined (ULONG_PARITY) 3438#define ULONG_PARITY(p, n) \ 3439 do { \ 3440 unsigned long __n = (n); \ 3441 int __p = 0; \ 3442 do \ 3443 { \ 3444 __p ^= 0x96696996L >> (__n & 0x1F); \ 3445 __n >>= 5; \ 3446 } \ 3447 while (__n != 0); \ 3448 \ 3449 (p) = __p & 1; \ 3450 } while (0) 3451#endif 3452 3453 3454/* 3 cycles on 604 or 750 since shifts and rlwimi's can pair. gcc (as of 3455 version 3.1 at least) doesn't seem to know how to generate rlwimi for 3456 anything other than bit-fields, so use "asm". */ 3457#if defined (__GNUC__) && ! defined (NO_ASM) \ 3458 && HAVE_HOST_CPU_FAMILY_powerpc && GMP_LIMB_BITS == 32 3459#define BSWAP_LIMB(dst, src) \ 3460 do { \ 3461 mp_limb_t __bswapl_src = (src); \ 3462 mp_limb_t __tmp1 = __bswapl_src >> 24; /* low byte */ \ 3463 mp_limb_t __tmp2 = __bswapl_src << 24; /* high byte */ \ 3464 __asm__ ("rlwimi %0, %2, 24, 16, 23" /* 2nd low */ \ 3465 : "=r" (__tmp1) : "0" (__tmp1), "r" (__bswapl_src)); \ 3466 __asm__ ("rlwimi %0, %2, 8, 8, 15" /* 3nd high */ \ 3467 : "=r" (__tmp2) : "0" (__tmp2), "r" (__bswapl_src)); \ 3468 (dst) = __tmp1 | __tmp2; /* whole */ \ 3469 } while (0) 3470#endif 3471 3472/* bswap is available on i486 and up and is fast. A combination rorw $8 / 3473 roll $16 / rorw $8 is used in glibc for plain i386 (and in the linux 3474 kernel with xchgb instead of rorw), but this is not done here, because 3475 i386 means generic x86 and mixing word and dword operations will cause 3476 partial register stalls on P6 chips. */ 3477#if defined (__GNUC__) && ! defined (NO_ASM) \ 3478 && HAVE_HOST_CPU_FAMILY_x86 && ! HAVE_HOST_CPU_i386 \ 3479 && GMP_LIMB_BITS == 32 3480#define BSWAP_LIMB(dst, src) \ 3481 do { \ 3482 __asm__ ("bswap %0" : "=r" (dst) : "0" (src)); \ 3483 } while (0) 3484#endif 3485 3486#if defined (__GNUC__) && ! defined (NO_ASM) \ 3487 && defined (__amd64__) && GMP_LIMB_BITS == 64 3488#define BSWAP_LIMB(dst, src) \ 3489 do { \ 3490 __asm__ ("bswap %q0" : "=r" (dst) : "0" (src)); \ 3491 } while (0) 3492#endif 3493 3494#if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \ 3495 && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64 3496#define BSWAP_LIMB(dst, src) \ 3497 do { \ 3498 __asm__ ("mux1 %0 = %1, @rev" : "=r" (dst) : "r" (src)); \ 3499 } while (0) 3500#endif 3501 3502/* As per glibc. */ 3503#if defined (__GNUC__) && ! defined (NO_ASM) \ 3504 && HAVE_HOST_CPU_FAMILY_m68k && GMP_LIMB_BITS == 32 3505#define BSWAP_LIMB(dst, src) \ 3506 do { \ 3507 mp_limb_t __bswapl_src = (src); \ 3508 __asm__ ("ror%.w %#8, %0\n\t" \ 3509 "swap %0\n\t" \ 3510 "ror%.w %#8, %0" \ 3511 : "=d" (dst) \ 3512 : "0" (__bswapl_src)); \ 3513 } while (0) 3514#endif 3515 3516#if ! defined (BSWAP_LIMB) 3517#if GMP_LIMB_BITS == 8 3518#define BSWAP_LIMB(dst, src) \ 3519 do { (dst) = (src); } while (0) 3520#endif 3521#if GMP_LIMB_BITS == 16 3522#define BSWAP_LIMB(dst, src) \ 3523 do { \ 3524 (dst) = ((src) << 8) + ((src) >> 8); \ 3525 } while (0) 3526#endif 3527#if GMP_LIMB_BITS == 32 3528#define BSWAP_LIMB(dst, src) \ 3529 do { \ 3530 (dst) = \ 3531 ((src) << 24) \ 3532 + (((src) & 0xFF00) << 8) \ 3533 + (((src) >> 8) & 0xFF00) \ 3534 + ((src) >> 24); \ 3535 } while (0) 3536#endif 3537#if GMP_LIMB_BITS == 64 3538#define BSWAP_LIMB(dst, src) \ 3539 do { \ 3540 (dst) = \ 3541 ((src) << 56) \ 3542 + (((src) & 0xFF00) << 40) \ 3543 + (((src) & 0xFF0000) << 24) \ 3544 + (((src) & 0xFF000000) << 8) \ 3545 + (((src) >> 8) & 0xFF000000) \ 3546 + (((src) >> 24) & 0xFF0000) \ 3547 + (((src) >> 40) & 0xFF00) \ 3548 + ((src) >> 56); \ 3549 } while (0) 3550#endif 3551#endif 3552 3553#if ! defined (BSWAP_LIMB) 3554#define BSWAP_LIMB(dst, src) \ 3555 do { \ 3556 mp_limb_t __bswapl_src = (src); \ 3557 mp_limb_t __dstl = 0; \ 3558 int __i; \ 3559 for (__i = 0; __i < GMP_LIMB_BYTES; __i++) \ 3560 { \ 3561 __dstl = (__dstl << 8) | (__bswapl_src & 0xFF); \ 3562 __bswapl_src >>= 8; \ 3563 } \ 3564 (dst) = __dstl; \ 3565 } while (0) 3566#endif 3567 3568 3569/* Apparently lwbrx might be slow on some PowerPC chips, so restrict it to 3570 those we know are fast. */ 3571#if defined (__GNUC__) && ! defined (NO_ASM) \ 3572 && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN \ 3573 && (HAVE_HOST_CPU_powerpc604 \ 3574 || HAVE_HOST_CPU_powerpc604e \ 3575 || HAVE_HOST_CPU_powerpc750 \ 3576 || HAVE_HOST_CPU_powerpc7400) 3577#define BSWAP_LIMB_FETCH(limb, src) \ 3578 do { \ 3579 mp_srcptr __blf_src = (src); \ 3580 mp_limb_t __limb; \ 3581 __asm__ ("lwbrx %0, 0, %1" \ 3582 : "=r" (__limb) \ 3583 : "r" (__blf_src), \ 3584 "m" (*__blf_src)); \ 3585 (limb) = __limb; \ 3586 } while (0) 3587#endif 3588 3589#if ! defined (BSWAP_LIMB_FETCH) 3590#define BSWAP_LIMB_FETCH(limb, src) BSWAP_LIMB (limb, *(src)) 3591#endif 3592 3593 3594/* On the same basis that lwbrx might be slow, restrict stwbrx to those we 3595 know are fast. FIXME: Is this necessary? */ 3596#if defined (__GNUC__) && ! defined (NO_ASM) \ 3597 && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN \ 3598 && (HAVE_HOST_CPU_powerpc604 \ 3599 || HAVE_HOST_CPU_powerpc604e \ 3600 || HAVE_HOST_CPU_powerpc750 \ 3601 || HAVE_HOST_CPU_powerpc7400) 3602#define BSWAP_LIMB_STORE(dst, limb) \ 3603 do { \ 3604 mp_ptr __dst = (dst); \ 3605 mp_limb_t __limb = (limb); \ 3606 __asm__ ("stwbrx %1, 0, %2" \ 3607 : "=m" (*__dst) \ 3608 : "r" (__limb), \ 3609 "r" (__dst)); \ 3610 } while (0) 3611#endif 3612 3613#if ! defined (BSWAP_LIMB_STORE) 3614#define BSWAP_LIMB_STORE(dst, limb) BSWAP_LIMB (*(dst), limb) 3615#endif 3616 3617 3618/* Byte swap limbs from {src,size} and store at {dst,size}. */ 3619#define MPN_BSWAP(dst, src, size) \ 3620 do { \ 3621 mp_ptr __dst = (dst); \ 3622 mp_srcptr __src = (src); \ 3623 mp_size_t __size = (size); \ 3624 mp_size_t __i; \ 3625 ASSERT ((size) >= 0); \ 3626 ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); \ 3627 CRAY_Pragma ("_CRI ivdep"); \ 3628 for (__i = 0; __i < __size; __i++) \ 3629 { \ 3630 BSWAP_LIMB_FETCH (*__dst, __src); \ 3631 __dst++; \ 3632 __src++; \ 3633 } \ 3634 } while (0) 3635 3636/* Byte swap limbs from {dst,size} and store in reverse order at {src,size}. */ 3637#define MPN_BSWAP_REVERSE(dst, src, size) \ 3638 do { \ 3639 mp_ptr __dst = (dst); \ 3640 mp_size_t __size = (size); \ 3641 mp_srcptr __src = (src) + __size - 1; \ 3642 mp_size_t __i; \ 3643 ASSERT ((size) >= 0); \ 3644 ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \ 3645 CRAY_Pragma ("_CRI ivdep"); \ 3646 for (__i = 0; __i < __size; __i++) \ 3647 { \ 3648 BSWAP_LIMB_FETCH (*__dst, __src); \ 3649 __dst++; \ 3650 __src--; \ 3651 } \ 3652 } while (0) 3653 3654 3655/* No processor claiming to be SPARC v9 compliant seems to 3656 implement the POPC instruction. Disable pattern for now. */ 3657#if 0 3658#if defined __GNUC__ && defined __sparc_v9__ && GMP_LIMB_BITS == 64 3659#define popc_limb(result, input) \ 3660 do { \ 3661 DItype __res; \ 3662 __asm__ ("popc %1,%0" : "=r" (result) : "rI" (input)); \ 3663 } while (0) 3664#endif 3665#endif 3666 3667#if defined (__GNUC__) && ! defined (NO_ASM) && HAVE_HOST_CPU_alpha_CIX 3668#define popc_limb(result, input) \ 3669 do { \ 3670 __asm__ ("ctpop %1, %0" : "=r" (result) : "r" (input)); \ 3671 } while (0) 3672#endif 3673 3674/* Cray intrinsic. */ 3675#ifdef _CRAY 3676#define popc_limb(result, input) \ 3677 do { \ 3678 (result) = _popcnt (input); \ 3679 } while (0) 3680#endif 3681 3682#if defined (__GNUC__) && ! defined (__INTEL_COMPILER) \ 3683 && ! defined (NO_ASM) && defined (__ia64) && GMP_LIMB_BITS == 64 3684#define popc_limb(result, input) \ 3685 do { \ 3686 __asm__ ("popcnt %0 = %1" : "=r" (result) : "r" (input)); \ 3687 } while (0) 3688#endif 3689 3690/* Cool population count of an mp_limb_t. 3691 You have to figure out how this works, We won't tell you! 3692 3693 The constants could also be expressed as: 3694 0x55... = [2^N / 3] = [(2^N-1)/3] 3695 0x33... = [2^N / 5] = [(2^N-1)/5] 3696 0x0f... = [2^N / 17] = [(2^N-1)/17] 3697 (N is GMP_LIMB_BITS, [] denotes truncation.) */ 3698 3699#if ! defined (popc_limb) && GMP_LIMB_BITS == 8 3700#define popc_limb(result, input) \ 3701 do { \ 3702 mp_limb_t __x = (input); \ 3703 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \ 3704 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \ 3705 __x = ((__x >> 4) + __x); \ 3706 (result) = __x & 0x0f; \ 3707 } while (0) 3708#endif 3709 3710#if ! defined (popc_limb) && GMP_LIMB_BITS == 16 3711#define popc_limb(result, input) \ 3712 do { \ 3713 mp_limb_t __x = (input); \ 3714 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \ 3715 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \ 3716 __x += (__x >> 4); \ 3717 __x = ((__x >> 8) & MP_LIMB_T_MAX/4369)+(__x & MP_LIMB_T_MAX/4369); \ 3718 (result) = __x; \ 3719 } while (0) 3720#endif 3721 3722#if ! defined (popc_limb) && GMP_LIMB_BITS == 32 3723#define popc_limb(result, input) \ 3724 do { \ 3725 mp_limb_t __x = (input); \ 3726 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \ 3727 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \ 3728 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \ 3729 __x = ((__x >> 8) + __x); \ 3730 __x = ((__x >> 16) + __x); \ 3731 (result) = __x & 0xff; \ 3732 } while (0) 3733#endif 3734 3735#if ! defined (popc_limb) && GMP_LIMB_BITS == 64 3736#define popc_limb(result, input) \ 3737 do { \ 3738 mp_limb_t __x = (input); \ 3739 __x -= (__x >> 1) & MP_LIMB_T_MAX/3; \ 3740 __x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \ 3741 __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \ 3742 __x = ((__x >> 8) + __x); \ 3743 __x = ((__x >> 16) + __x); \ 3744 __x = ((__x >> 32) + __x); \ 3745 (result) = __x & 0xff; \ 3746 } while (0) 3747#endif 3748 3749 3750/* Define stuff for longlong.h. */ 3751#if HAVE_ATTRIBUTE_MODE 3752typedef unsigned int UQItype __attribute__ ((mode (QI))); 3753typedef int SItype __attribute__ ((mode (SI))); 3754typedef unsigned int USItype __attribute__ ((mode (SI))); 3755typedef int DItype __attribute__ ((mode (DI))); 3756typedef unsigned int UDItype __attribute__ ((mode (DI))); 3757#else 3758typedef unsigned char UQItype; 3759typedef long SItype; 3760typedef unsigned long USItype; 3761#if HAVE_LONG_LONG 3762typedef long long int DItype; 3763typedef unsigned long long int UDItype; 3764#else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */ 3765typedef long int DItype; 3766typedef unsigned long int UDItype; 3767#endif 3768#endif 3769 3770typedef mp_limb_t UWtype; 3771typedef unsigned int UHWtype; 3772#define W_TYPE_SIZE GMP_LIMB_BITS 3773 3774/* Define ieee_double_extract and _GMP_IEEE_FLOATS. 3775 3776 Bit field packing is "implementation defined" according to C99, which 3777 leaves us at the compiler's mercy here. For some systems packing is 3778 defined in the ABI (eg. x86). In any case so far it seems universal that 3779 little endian systems pack from low to high, and big endian from high to 3780 low within the given type. 3781 3782 Within the fields we rely on the integer endianness being the same as the 3783 float endianness, this is true everywhere we know of and it'd be a fairly 3784 strange system that did anything else. */ 3785 3786#if HAVE_DOUBLE_IEEE_LITTLE_SWAPPED 3787#define _GMP_IEEE_FLOATS 1 3788union ieee_double_extract 3789{ 3790 struct 3791 { 3792 gmp_uint_least32_t manh:20; 3793 gmp_uint_least32_t exp:11; 3794 gmp_uint_least32_t sig:1; 3795 gmp_uint_least32_t manl:32; 3796 } s; 3797 double d; 3798}; 3799#endif 3800 3801#if HAVE_DOUBLE_IEEE_LITTLE_ENDIAN 3802#define _GMP_IEEE_FLOATS 1 3803union ieee_double_extract 3804{ 3805 struct 3806 { 3807 gmp_uint_least32_t manl:32; 3808 gmp_uint_least32_t manh:20; 3809 gmp_uint_least32_t exp:11; 3810 gmp_uint_least32_t sig:1; 3811 } s; 3812 double d; 3813}; 3814#endif 3815 3816#if HAVE_DOUBLE_IEEE_BIG_ENDIAN 3817#define _GMP_IEEE_FLOATS 1 3818union ieee_double_extract 3819{ 3820 struct 3821 { 3822 gmp_uint_least32_t sig:1; 3823 gmp_uint_least32_t exp:11; 3824 gmp_uint_least32_t manh:20; 3825 gmp_uint_least32_t manl:32; 3826 } s; 3827 double d; 3828}; 3829#endif 3830 3831#if HAVE_DOUBLE_VAX_D 3832union double_extract 3833{ 3834 struct 3835 { 3836 gmp_uint_least32_t man3:7; /* highest 7 bits */ 3837 gmp_uint_least32_t exp:8; /* excess-128 exponent */ 3838 gmp_uint_least32_t sig:1; 3839 gmp_uint_least32_t man2:16; 3840 gmp_uint_least32_t man1:16; 3841 gmp_uint_least32_t man0:16; /* lowest 16 bits */ 3842 } s; 3843 double d; 3844}; 3845#endif 3846 3847/* Use (4.0 * ...) instead of (2.0 * ...) to work around buggy compilers 3848 that don't convert ulong->double correctly (eg. SunOS 4 native cc). */ 3849#define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (GMP_NUMB_BITS - 2))) 3850/* Maximum number of limbs it will take to store any `double'. 3851 We assume doubles have 53 mantissa bits. */ 3852#define LIMBS_PER_DOUBLE ((53 + GMP_NUMB_BITS - 2) / GMP_NUMB_BITS + 1) 3853 3854__GMP_DECLSPEC int __gmp_extract_double (mp_ptr, double); 3855 3856#define mpn_get_d __gmpn_get_d 3857__GMP_DECLSPEC double mpn_get_d (mp_srcptr, mp_size_t, mp_size_t, long) __GMP_ATTRIBUTE_PURE; 3858 3859 3860/* DOUBLE_NAN_INF_ACTION executes code a_nan if x is a NaN, or executes 3861 a_inf if x is an infinity. Both are considered unlikely values, for 3862 branch prediction. */ 3863 3864#if _GMP_IEEE_FLOATS 3865#define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \ 3866 do { \ 3867 union ieee_double_extract u; \ 3868 u.d = (x); \ 3869 if (UNLIKELY (u.s.exp == 0x7FF)) \ 3870 { \ 3871 if (u.s.manl == 0 && u.s.manh == 0) \ 3872 { a_inf; } \ 3873 else \ 3874 { a_nan; } \ 3875 } \ 3876 } while (0) 3877#endif 3878 3879#if HAVE_DOUBLE_VAX_D || HAVE_DOUBLE_VAX_G || HAVE_DOUBLE_CRAY_CFP 3880/* no nans or infs in these formats */ 3881#define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \ 3882 do { } while (0) 3883#endif 3884 3885#ifndef DOUBLE_NAN_INF_ACTION 3886/* Unknown format, try something generic. 3887 NaN should be "unordered", so x!=x. 3888 Inf should be bigger than DBL_MAX. */ 3889#define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \ 3890 do { \ 3891 { \ 3892 if (UNLIKELY ((x) != (x))) \ 3893 { a_nan; } \ 3894 else if (UNLIKELY ((x) > DBL_MAX || (x) < -DBL_MAX)) \ 3895 { a_inf; } \ 3896 } \ 3897 } while (0) 3898#endif 3899 3900/* On m68k, x86 and amd64, gcc (and maybe other compilers) can hold doubles 3901 in the coprocessor, which means a bigger exponent range than normal, and 3902 depending on the rounding mode, a bigger mantissa than normal. (See 3903 "Disappointments" in the gcc manual.) FORCE_DOUBLE stores and fetches 3904 "d" through memory to force any rounding and overflows to occur. 3905 3906 On amd64, and on x86s with SSE2, gcc (depending on options) uses the xmm 3907 registers, where there's no such extra precision and no need for the 3908 FORCE_DOUBLE. We don't bother to detect this since the present uses for 3909 FORCE_DOUBLE are only in test programs and default generic C code. 3910 3911 Not quite sure that an "automatic volatile" will use memory, but it does 3912 in gcc. An asm("":"=m"(d):"0"(d)) can't be used to trick gcc, since 3913 apparently matching operands like "0" are only allowed on a register 3914 output. gcc 3.4 warns about this, though in fact it and past versions 3915 seem to put the operand through memory as hoped. */ 3916 3917#if (HAVE_HOST_CPU_FAMILY_m68k || HAVE_HOST_CPU_FAMILY_x86 \ 3918 || defined (__amd64__)) 3919#define FORCE_DOUBLE(d) \ 3920 do { volatile double __gmp_force = (d); (d) = __gmp_force; } while (0) 3921#else 3922#define FORCE_DOUBLE(d) do { } while (0) 3923#endif 3924 3925 3926__GMP_DECLSPEC extern const unsigned char __gmp_digit_value_tab[]; 3927 3928__GMP_DECLSPEC extern int __gmp_junk; 3929__GMP_DECLSPEC extern const int __gmp_0; 3930__GMP_DECLSPEC void __gmp_exception (int) ATTRIBUTE_NORETURN; 3931__GMP_DECLSPEC void __gmp_divide_by_zero (void) ATTRIBUTE_NORETURN; 3932__GMP_DECLSPEC void __gmp_sqrt_of_negative (void) ATTRIBUTE_NORETURN; 3933__GMP_DECLSPEC void __gmp_invalid_operation (void) ATTRIBUTE_NORETURN; 3934#define GMP_ERROR(code) __gmp_exception (code) 3935#define DIVIDE_BY_ZERO __gmp_divide_by_zero () 3936#define SQRT_OF_NEGATIVE __gmp_sqrt_of_negative () 3937 3938#if defined _LONG_LONG_LIMB 3939#define CNST_LIMB(C) ((mp_limb_t) C##LL) 3940#else /* not _LONG_LONG_LIMB */ 3941#define CNST_LIMB(C) ((mp_limb_t) C##L) 3942#endif /* _LONG_LONG_LIMB */ 3943 3944/* Stuff used by mpn/generic/perfsqr.c and mpz/prime_p.c */ 3945#if GMP_NUMB_BITS == 2 3946#define PP 0x3 /* 3 */ 3947#define PP_FIRST_OMITTED 5 3948#endif 3949#if GMP_NUMB_BITS == 4 3950#define PP 0xF /* 3 x 5 */ 3951#define PP_FIRST_OMITTED 7 3952#endif 3953#if GMP_NUMB_BITS == 8 3954#define PP 0x69 /* 3 x 5 x 7 */ 3955#define PP_FIRST_OMITTED 11 3956#endif 3957#if GMP_NUMB_BITS == 16 3958#define PP 0x3AA7 /* 3 x 5 x 7 x 11 x 13 */ 3959#define PP_FIRST_OMITTED 17 3960#endif 3961#if GMP_NUMB_BITS == 32 3962#define PP 0xC0CFD797L /* 3 x 5 x 7 x 11 x ... x 29 */ 3963#define PP_INVERTED 0x53E5645CL 3964#define PP_FIRST_OMITTED 31 3965#endif 3966#if GMP_NUMB_BITS == 64 3967#define PP CNST_LIMB(0xE221F97C30E94E1D) /* 3 x 5 x 7 x 11 x ... x 53 */ 3968#define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B) 3969#define PP_FIRST_OMITTED 59 3970#endif 3971#ifndef PP_FIRST_OMITTED 3972#define PP_FIRST_OMITTED 3 3973#endif 3974 3975typedef struct 3976{ 3977 mp_limb_t d0, d1; 3978} mp_double_limb_t; 3979 3980#define mpn_gcd_22 __MPN (gcd_22) 3981__GMP_DECLSPEC mp_double_limb_t mpn_gcd_22 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t); 3982 3983/* BIT1 means a result value in bit 1 (second least significant bit), with a 3984 zero bit representing +1 and a one bit representing -1. Bits other than 3985 bit 1 are garbage. These are meant to be kept in "int"s, and casts are 3986 used to ensure the expressions are "int"s even if a and/or b might be 3987 other types. 3988 3989 JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base 3990 and their speed is important. Expressions are used rather than 3991 conditionals to accumulate sign changes, which effectively means XORs 3992 instead of conditional JUMPs. */ 3993 3994/* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */ 3995#define JACOBI_S0(a) (((a) == 1) | ((a) == -1)) 3996 3997/* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */ 3998#define JACOBI_U0(a) ((a) == 1) 3999 4000/* FIXME: JACOBI_LS0 and JACOBI_0LS are the same, so delete one and 4001 come up with a better name. */ 4002 4003/* (a/0), with a given by low and size; 4004 is 1 if a=+/-1, 0 otherwise */ 4005#define JACOBI_LS0(alow,asize) \ 4006 (((asize) == 1 || (asize) == -1) && (alow) == 1) 4007 4008/* (a/0), with a an mpz_t; 4009 fetch of low limb always valid, even if size is zero */ 4010#define JACOBI_Z0(a) JACOBI_LS0 (PTR(a)[0], SIZ(a)) 4011 4012/* (0/b), with b unsigned; is 1 if b=1, 0 otherwise */ 4013#define JACOBI_0U(b) ((b) == 1) 4014 4015/* (0/b), with b unsigned; is 1 if b=+/-1, 0 otherwise */ 4016#define JACOBI_0S(b) ((b) == 1 || (b) == -1) 4017 4018/* (0/b), with b given by low and size; is 1 if b=+/-1, 0 otherwise */ 4019#define JACOBI_0LS(blow,bsize) \ 4020 (((bsize) == 1 || (bsize) == -1) && (blow) == 1) 4021 4022/* Convert a bit1 to +1 or -1. */ 4023#define JACOBI_BIT1_TO_PN(result_bit1) \ 4024 (1 - ((int) (result_bit1) & 2)) 4025 4026/* (2/b), with b unsigned and odd; 4027 is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and 4028 hence obtained from (b>>1)^b */ 4029#define JACOBI_TWO_U_BIT1(b) \ 4030 ((int) (((b) >> 1) ^ (b))) 4031 4032/* (2/b)^twos, with b unsigned and odd */ 4033#define JACOBI_TWOS_U_BIT1(twos, b) \ 4034 ((int) ((twos) << 1) & JACOBI_TWO_U_BIT1 (b)) 4035 4036/* (2/b)^twos, with b unsigned and odd */ 4037#define JACOBI_TWOS_U(twos, b) \ 4038 (JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b))) 4039 4040/* (-1/b), with b odd (signed or unsigned); 4041 is (-1)^((b-1)/2) */ 4042#define JACOBI_N1B_BIT1(b) \ 4043 ((int) (b)) 4044 4045/* (a/b) effect due to sign of a: signed/unsigned, b odd; 4046 is (-1/b) if a<0, or +1 if a>=0 */ 4047#define JACOBI_ASGN_SU_BIT1(a, b) \ 4048 ((((a) < 0) << 1) & JACOBI_N1B_BIT1(b)) 4049 4050/* (a/b) effect due to sign of b: signed/signed; 4051 is -1 if a and b both negative, +1 otherwise */ 4052#define JACOBI_BSGN_SS_BIT1(a, b) \ 4053 ((((a)<0) & ((b)<0)) << 1) 4054 4055/* (a/b) effect due to sign of b: signed/mpz; 4056 is -1 if a and b both negative, +1 otherwise */ 4057#define JACOBI_BSGN_SZ_BIT1(a, b) \ 4058 JACOBI_BSGN_SS_BIT1 (a, SIZ(b)) 4059 4060/* (a/b) effect due to sign of b: mpz/signed; 4061 is -1 if a and b both negative, +1 otherwise */ 4062#define JACOBI_BSGN_ZS_BIT1(a, b) \ 4063 JACOBI_BSGN_SZ_BIT1 (b, a) 4064 4065/* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd; 4066 is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4, or -1 if 4067 both a,b==3mod4, achieved in bit 1 by a&b. No ASSERT()s about a,b odd 4068 because this is used in a couple of places with only bit 1 of a or b 4069 valid. */ 4070#define JACOBI_RECIP_UU_BIT1(a, b) \ 4071 ((int) ((a) & (b))) 4072 4073/* Strip low zero limbs from {b_ptr,b_size} by incrementing b_ptr and 4074 decrementing b_size. b_low should be b_ptr[0] on entry, and will be 4075 updated for the new b_ptr. result_bit1 is updated according to the 4076 factors of 2 stripped, as per (a/2). */ 4077#define JACOBI_STRIP_LOW_ZEROS(result_bit1, a, b_ptr, b_size, b_low) \ 4078 do { \ 4079 ASSERT ((b_size) >= 1); \ 4080 ASSERT ((b_low) == (b_ptr)[0]); \ 4081 \ 4082 while (UNLIKELY ((b_low) == 0)) \ 4083 { \ 4084 (b_size)--; \ 4085 ASSERT ((b_size) >= 1); \ 4086 (b_ptr)++; \ 4087 (b_low) = *(b_ptr); \ 4088 \ 4089 ASSERT (((a) & 1) != 0); \ 4090 if ((GMP_NUMB_BITS % 2) == 1) \ 4091 (result_bit1) ^= JACOBI_TWO_U_BIT1(a); \ 4092 } \ 4093 } while (0) 4094 4095/* Set a_rem to {a_ptr,a_size} reduced modulo b, either using mod_1 or 4096 modexact_1_odd, but in either case leaving a_rem<b. b must be odd and 4097 unsigned. modexact_1_odd effectively calculates -a mod b, and 4098 result_bit1 is adjusted for the factor of -1. 4099 4100 The way mpn_modexact_1_odd sometimes bases its remainder on a_size and 4101 sometimes on a_size-1 means if GMP_NUMB_BITS is odd we can't know what 4102 factor to introduce into result_bit1, so for that case use mpn_mod_1 4103 unconditionally. 4104 4105 FIXME: mpn_modexact_1_odd is more efficient, so some way to get it used 4106 for odd GMP_NUMB_BITS would be good. Perhaps it could mung its result, 4107 or not skip a divide step, or something. */ 4108 4109#define JACOBI_MOD_OR_MODEXACT_1_ODD(result_bit1, a_rem, a_ptr, a_size, b) \ 4110 do { \ 4111 mp_srcptr __a_ptr = (a_ptr); \ 4112 mp_size_t __a_size = (a_size); \ 4113 mp_limb_t __b = (b); \ 4114 \ 4115 ASSERT (__a_size >= 1); \ 4116 ASSERT (__b & 1); \ 4117 \ 4118 if ((GMP_NUMB_BITS % 2) != 0 \ 4119 || ABOVE_THRESHOLD (__a_size, BMOD_1_TO_MOD_1_THRESHOLD)) \ 4120 { \ 4121 (a_rem) = mpn_mod_1 (__a_ptr, __a_size, __b); \ 4122 } \ 4123 else \ 4124 { \ 4125 (result_bit1) ^= JACOBI_N1B_BIT1 (__b); \ 4126 (a_rem) = mpn_modexact_1_odd (__a_ptr, __a_size, __b); \ 4127 } \ 4128 } while (0) 4129 4130/* State for the Jacobi computation using Lehmer. */ 4131#define jacobi_table __gmp_jacobi_table 4132__GMP_DECLSPEC extern const unsigned char jacobi_table[208]; 4133 4134/* Bit layout for the initial state. b must be odd. 4135 4136 3 2 1 0 4137 +--+--+--+--+ 4138 |a1|a0|b1| s| 4139 +--+--+--+--+ 4140 4141 */ 4142static inline unsigned 4143mpn_jacobi_init (unsigned a, unsigned b, unsigned s) 4144{ 4145 ASSERT (b & 1); 4146 ASSERT (s <= 1); 4147 return ((a & 3) << 2) + (b & 2) + s; 4148} 4149 4150static inline int 4151mpn_jacobi_finish (unsigned bits) 4152{ 4153 /* (a, b) = (1,0) or (0,1) */ 4154 ASSERT ( (bits & 14) == 0); 4155 4156 return 1-2*(bits & 1); 4157} 4158 4159static inline unsigned 4160mpn_jacobi_update (unsigned bits, unsigned denominator, unsigned q) 4161{ 4162 /* FIXME: Could halve table size by not including the e bit in the 4163 * index, and instead xor when updating. Then the lookup would be 4164 * like 4165 * 4166 * bits ^= table[((bits & 30) << 2) + (denominator << 2) + q]; 4167 */ 4168 4169 ASSERT (bits < 26); 4170 ASSERT (denominator < 2); 4171 ASSERT (q < 4); 4172 4173 /* For almost all calls, denominator is constant and quite often q 4174 is constant too. So use addition rather than or, so the compiler 4175 can put the constant part can into the offset of an indexed 4176 addressing instruction. 4177 4178 With constant denominator, the below table lookup is compiled to 4179 4180 C Constant q = 1, constant denominator = 1 4181 movzbl table+5(%eax,8), %eax 4182 4183 or 4184 4185 C q in %edx, constant denominator = 1 4186 movzbl table+4(%edx,%eax,8), %eax 4187 4188 One could maintain the state preshifted 3 bits, to save a shift 4189 here, but at least on x86, that's no real saving. 4190 */ 4191 return bits = jacobi_table[(bits << 3) + (denominator << 2) + q]; 4192} 4193 4194/* Matrix multiplication */ 4195#define mpn_matrix22_mul __MPN(matrix22_mul) 4196__GMP_DECLSPEC void mpn_matrix22_mul (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr); 4197#define mpn_matrix22_mul_itch __MPN(matrix22_mul_itch) 4198__GMP_DECLSPEC mp_size_t mpn_matrix22_mul_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST; 4199 4200#ifndef MATRIX22_STRASSEN_THRESHOLD 4201#define MATRIX22_STRASSEN_THRESHOLD 30 4202#endif 4203 4204/* HGCD definitions */ 4205 4206/* Extract one numb, shifting count bits left 4207 ________ ________ 4208 |___xh___||___xl___| 4209 |____r____| 4210 >count < 4211 4212 The count includes any nail bits, so it should work fine if count 4213 is computed using count_leading_zeros. If GMP_NAIL_BITS > 0, all of 4214 xh, xl and r include nail bits. Must have 0 < count < GMP_LIMB_BITS. 4215 4216 FIXME: Omit masking with GMP_NUMB_MASK, and let callers do that for 4217 those calls where the count high bits of xh may be non-zero. 4218*/ 4219 4220#define MPN_EXTRACT_NUMB(count, xh, xl) \ 4221 ((((xh) << ((count) - GMP_NAIL_BITS)) & GMP_NUMB_MASK) | \ 4222 ((xl) >> (GMP_LIMB_BITS - (count)))) 4223 4224 4225/* The matrix non-negative M = (u, u'; v,v') keeps track of the 4226 reduction (a;b) = M (alpha; beta) where alpha, beta are smaller 4227 than a, b. The determinant must always be one, so that M has an 4228 inverse (v', -u'; -v, u). Elements always fit in GMP_NUMB_BITS - 1 4229 bits. */ 4230struct hgcd_matrix1 4231{ 4232 mp_limb_t u[2][2]; 4233}; 4234 4235#define mpn_hgcd2 __MPN (hgcd2) 4236__GMP_DECLSPEC int mpn_hgcd2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1 *); 4237 4238#define mpn_hgcd_mul_matrix1_vector __MPN (hgcd_mul_matrix1_vector) 4239__GMP_DECLSPEC mp_size_t mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t); 4240 4241#define mpn_matrix22_mul1_inverse_vector __MPN (matrix22_mul1_inverse_vector) 4242__GMP_DECLSPEC mp_size_t mpn_matrix22_mul1_inverse_vector (const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t); 4243 4244#define mpn_hgcd2_jacobi __MPN (hgcd2_jacobi) 4245__GMP_DECLSPEC int mpn_hgcd2_jacobi (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1 *, unsigned *); 4246 4247struct hgcd_matrix 4248{ 4249 mp_size_t alloc; /* for sanity checking only */ 4250 mp_size_t n; 4251 mp_ptr p[2][2]; 4252}; 4253 4254#define MPN_HGCD_MATRIX_INIT_ITCH(n) (4 * ((n+1)/2 + 1)) 4255 4256#define mpn_hgcd_matrix_init __MPN (hgcd_matrix_init) 4257__GMP_DECLSPEC void mpn_hgcd_matrix_init (struct hgcd_matrix *, mp_size_t, mp_ptr); 4258 4259#define mpn_hgcd_matrix_update_q __MPN (hgcd_matrix_update_q) 4260__GMP_DECLSPEC void mpn_hgcd_matrix_update_q (struct hgcd_matrix *, mp_srcptr, mp_size_t, unsigned, mp_ptr); 4261 4262#define mpn_hgcd_matrix_mul_1 __MPN (hgcd_matrix_mul_1) 4263__GMP_DECLSPEC void mpn_hgcd_matrix_mul_1 (struct hgcd_matrix *, const struct hgcd_matrix1 *, mp_ptr); 4264 4265#define mpn_hgcd_matrix_mul __MPN (hgcd_matrix_mul) 4266__GMP_DECLSPEC void mpn_hgcd_matrix_mul (struct hgcd_matrix *, const struct hgcd_matrix *, mp_ptr); 4267 4268#define mpn_hgcd_matrix_adjust __MPN (hgcd_matrix_adjust) 4269__GMP_DECLSPEC mp_size_t mpn_hgcd_matrix_adjust (const struct hgcd_matrix *, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr); 4270 4271#define mpn_hgcd_step __MPN(hgcd_step) 4272__GMP_DECLSPEC mp_size_t mpn_hgcd_step (mp_size_t, mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr); 4273 4274#define mpn_hgcd_reduce __MPN(hgcd_reduce) 4275__GMP_DECLSPEC mp_size_t mpn_hgcd_reduce (struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr); 4276 4277#define mpn_hgcd_reduce_itch __MPN(hgcd_reduce_itch) 4278__GMP_DECLSPEC mp_size_t mpn_hgcd_reduce_itch (mp_size_t, mp_size_t) ATTRIBUTE_CONST; 4279 4280#define mpn_hgcd_itch __MPN (hgcd_itch) 4281__GMP_DECLSPEC mp_size_t mpn_hgcd_itch (mp_size_t) ATTRIBUTE_CONST; 4282 4283#define mpn_hgcd __MPN (hgcd) 4284__GMP_DECLSPEC mp_size_t mpn_hgcd (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr); 4285 4286#define mpn_hgcd_appr_itch __MPN (hgcd_appr_itch) 4287__GMP_DECLSPEC mp_size_t mpn_hgcd_appr_itch (mp_size_t) ATTRIBUTE_CONST; 4288 4289#define mpn_hgcd_appr __MPN (hgcd_appr) 4290__GMP_DECLSPEC int mpn_hgcd_appr (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr); 4291 4292#define mpn_hgcd_jacobi __MPN (hgcd_jacobi) 4293__GMP_DECLSPEC mp_size_t mpn_hgcd_jacobi (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, unsigned *, mp_ptr); 4294 4295typedef void gcd_subdiv_step_hook(void *, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int); 4296 4297/* Needs storage for the quotient */ 4298#define MPN_GCD_SUBDIV_STEP_ITCH(n) (n) 4299 4300#define mpn_gcd_subdiv_step __MPN(gcd_subdiv_step) 4301__GMP_DECLSPEC mp_size_t mpn_gcd_subdiv_step (mp_ptr, mp_ptr, mp_size_t, mp_size_t, gcd_subdiv_step_hook *, void *, mp_ptr); 4302 4303struct gcdext_ctx 4304{ 4305 /* Result parameters. */ 4306 mp_ptr gp; 4307 mp_size_t gn; 4308 mp_ptr up; 4309 mp_size_t *usize; 4310 4311 /* Cofactors updated in each step. */ 4312 mp_size_t un; 4313 mp_ptr u0, u1, tp; 4314}; 4315 4316#define mpn_gcdext_hook __MPN (gcdext_hook) 4317gcd_subdiv_step_hook mpn_gcdext_hook; 4318 4319#define MPN_GCDEXT_LEHMER_N_ITCH(n) (4*(n) + 3) 4320 4321#define mpn_gcdext_lehmer_n __MPN(gcdext_lehmer_n) 4322__GMP_DECLSPEC mp_size_t mpn_gcdext_lehmer_n (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr); 4323 4324/* 4*(an + 1) + 4*(bn + 1) + an */ 4325#define MPN_GCDEXT_LEHMER_ITCH(an, bn) (5*(an) + 4*(bn) + 8) 4326 4327#ifndef HGCD_THRESHOLD 4328#define HGCD_THRESHOLD 400 4329#endif 4330 4331#ifndef HGCD_APPR_THRESHOLD 4332#define HGCD_APPR_THRESHOLD 400 4333#endif 4334 4335#ifndef HGCD_REDUCE_THRESHOLD 4336#define HGCD_REDUCE_THRESHOLD 1000 4337#endif 4338 4339#ifndef GCD_DC_THRESHOLD 4340#define GCD_DC_THRESHOLD 1000 4341#endif 4342 4343#ifndef GCDEXT_DC_THRESHOLD 4344#define GCDEXT_DC_THRESHOLD 600 4345#endif 4346 4347/* Definitions for mpn_set_str and mpn_get_str */ 4348struct powers 4349{ 4350 mp_ptr p; /* actual power value */ 4351 mp_size_t n; /* # of limbs at p */ 4352 mp_size_t shift; /* weight of lowest limb, in limb base B */ 4353 size_t digits_in_base; /* number of corresponding digits */ 4354 int base; 4355}; 4356typedef struct powers powers_t; 4357#define mpn_str_powtab_alloc(n) ((n) + 2 * GMP_LIMB_BITS) /* FIXME: This can perhaps be trimmed */ 4358#define mpn_dc_set_str_itch(n) ((n) + GMP_LIMB_BITS) 4359#define mpn_dc_get_str_itch(n) ((n) + GMP_LIMB_BITS) 4360 4361#define mpn_compute_powtab __MPN(compute_powtab) 4362__GMP_DECLSPEC size_t mpn_compute_powtab (powers_t *, mp_ptr, mp_size_t, int); 4363#define mpn_dc_set_str __MPN(dc_set_str) 4364__GMP_DECLSPEC mp_size_t mpn_dc_set_str (mp_ptr, const unsigned char *, size_t, const powers_t *, mp_ptr); 4365#define mpn_bc_set_str __MPN(bc_set_str) 4366__GMP_DECLSPEC mp_size_t mpn_bc_set_str (mp_ptr, const unsigned char *, size_t, int); 4367 4368 4369/* __GMPF_BITS_TO_PREC applies a minimum 53 bits, rounds upwards to a whole 4370 limb and adds an extra limb. __GMPF_PREC_TO_BITS drops that extra limb, 4371 hence giving back the user's size in bits rounded up. Notice that 4372 converting prec->bits->prec gives an unchanged value. */ 4373#define __GMPF_BITS_TO_PREC(n) \ 4374 ((mp_size_t) ((__GMP_MAX (53, n) + 2 * GMP_NUMB_BITS - 1) / GMP_NUMB_BITS)) 4375#define __GMPF_PREC_TO_BITS(n) \ 4376 ((mp_bitcnt_t) (n) * GMP_NUMB_BITS - GMP_NUMB_BITS) 4377 4378__GMP_DECLSPEC extern mp_size_t __gmp_default_fp_limb_precision; 4379 4380/* Compute the number of base-b digits corresponding to nlimbs limbs, rounding 4381 down. */ 4382#define DIGITS_IN_BASE_PER_LIMB(res, nlimbs, b) \ 4383 do { \ 4384 mp_limb_t _ph, _dummy; \ 4385 umul_ppmm (_ph, _dummy, \ 4386 mp_bases[b].logb2, GMP_NUMB_BITS * (mp_limb_t) (nlimbs));\ 4387 res = _ph; \ 4388 } while (0) 4389 4390/* Compute the number of limbs corresponding to ndigits base-b digits, rounding 4391 up. */ 4392#define LIMBS_PER_DIGIT_IN_BASE(res, ndigits, b) \ 4393 do { \ 4394 mp_limb_t _ph, _dummy; \ 4395 umul_ppmm (_ph, _dummy, mp_bases[b].log2b, (mp_limb_t) (ndigits)); \ 4396 res = 8 * _ph / GMP_NUMB_BITS + 2; \ 4397 } while (0) 4398 4399 4400/* Set n to the number of significant digits an mpf of the given _mp_prec 4401 field, in the given base. This is a rounded up value, designed to ensure 4402 there's enough digits to reproduce all the guaranteed part of the value. 4403 4404 There are prec many limbs, but the high might be only "1" so forget it 4405 and just count prec-1 limbs into chars. +1 rounds that upwards, and a 4406 further +1 is because the limbs usually won't fall on digit boundaries. 4407 4408 FIXME: If base is a power of 2 and the bits per digit divides 4409 GMP_LIMB_BITS then the +2 is unnecessary. This happens always for 4410 base==2, and in base==16 with the current 32 or 64 bit limb sizes. */ 4411 4412#define MPF_SIGNIFICANT_DIGITS(n, base, prec) \ 4413 do { \ 4414 size_t rawn; \ 4415 ASSERT (base >= 2 && base < numberof (mp_bases)); \ 4416 DIGITS_IN_BASE_PER_LIMB (rawn, (prec) - 1, base); \ 4417 n = rawn + 2; \ 4418 } while (0) 4419 4420 4421/* Decimal point string, from the current C locale. Needs <langinfo.h> for 4422 nl_langinfo and constants, preferably with _GNU_SOURCE defined to get 4423 DECIMAL_POINT from glibc, and needs <locale.h> for localeconv, each under 4424 their respective #if HAVE_FOO_H. 4425 4426 GLIBC recommends nl_langinfo because getting only one facet can be 4427 faster, apparently. */ 4428 4429/* DECIMAL_POINT seems to need _GNU_SOURCE defined to get it from glibc. */ 4430#if HAVE_NL_LANGINFO && defined (DECIMAL_POINT) 4431#define GMP_DECIMAL_POINT (nl_langinfo (DECIMAL_POINT)) 4432#endif 4433/* RADIXCHAR is deprecated, still in unix98 or some such. */ 4434#if HAVE_NL_LANGINFO && defined (RADIXCHAR) && ! defined (GMP_DECIMAL_POINT) 4435#define GMP_DECIMAL_POINT (nl_langinfo (RADIXCHAR)) 4436#endif 4437/* localeconv is slower since it returns all locale stuff */ 4438#if HAVE_LOCALECONV && ! defined (GMP_DECIMAL_POINT) 4439#define GMP_DECIMAL_POINT (localeconv()->decimal_point) 4440#endif 4441#if ! defined (GMP_DECIMAL_POINT) 4442#define GMP_DECIMAL_POINT (".") 4443#endif 4444 4445 4446#define DOPRNT_CONV_FIXED 1 4447#define DOPRNT_CONV_SCIENTIFIC 2 4448#define DOPRNT_CONV_GENERAL 3 4449 4450#define DOPRNT_JUSTIFY_NONE 0 4451#define DOPRNT_JUSTIFY_LEFT 1 4452#define DOPRNT_JUSTIFY_RIGHT 2 4453#define DOPRNT_JUSTIFY_INTERNAL 3 4454 4455#define DOPRNT_SHOWBASE_YES 1 4456#define DOPRNT_SHOWBASE_NO 2 4457#define DOPRNT_SHOWBASE_NONZERO 3 4458 4459struct doprnt_params_t { 4460 int base; /* negative for upper case */ 4461 int conv; /* choices above */ 4462 const char *expfmt; /* exponent format */ 4463 int exptimes4; /* exponent multiply by 4 */ 4464 char fill; /* character */ 4465 int justify; /* choices above */ 4466 int prec; /* prec field, or -1 for all digits */ 4467 int showbase; /* choices above */ 4468 int showpoint; /* if radix point always shown */ 4469 int showtrailing; /* if trailing zeros wanted */ 4470 char sign; /* '+', ' ', or '\0' */ 4471 int width; /* width field */ 4472}; 4473 4474#if _GMP_H_HAVE_VA_LIST 4475 4476typedef int (*doprnt_format_t) (void *, const char *, va_list); 4477typedef int (*doprnt_memory_t) (void *, const char *, size_t); 4478typedef int (*doprnt_reps_t) (void *, int, int); 4479typedef int (*doprnt_final_t) (void *); 4480 4481struct doprnt_funs_t { 4482 doprnt_format_t format; 4483 doprnt_memory_t memory; 4484 doprnt_reps_t reps; 4485 doprnt_final_t final; /* NULL if not required */ 4486}; 4487 4488extern const struct doprnt_funs_t __gmp_fprintf_funs; 4489extern const struct doprnt_funs_t __gmp_sprintf_funs; 4490extern const struct doprnt_funs_t __gmp_snprintf_funs; 4491extern const struct doprnt_funs_t __gmp_obstack_printf_funs; 4492extern const struct doprnt_funs_t __gmp_ostream_funs; 4493 4494/* "buf" is a __gmp_allocate_func block of "alloc" many bytes. The first 4495 "size" of these have been written. "alloc > size" is maintained, so 4496 there's room to store a '\0' at the end. "result" is where the 4497 application wants the final block pointer. */ 4498struct gmp_asprintf_t { 4499 char **result; 4500 char *buf; 4501 size_t size; 4502 size_t alloc; 4503}; 4504 4505#define GMP_ASPRINTF_T_INIT(d, output) \ 4506 do { \ 4507 (d).result = (output); \ 4508 (d).alloc = 256; \ 4509 (d).buf = (char *) (*__gmp_allocate_func) ((d).alloc); \ 4510 (d).size = 0; \ 4511 } while (0) 4512 4513/* If a realloc is necessary, use twice the size actually required, so as to 4514 avoid repeated small reallocs. */ 4515#define GMP_ASPRINTF_T_NEED(d, n) \ 4516 do { \ 4517 size_t alloc, newsize, newalloc; \ 4518 ASSERT ((d)->alloc >= (d)->size + 1); \ 4519 \ 4520 alloc = (d)->alloc; \ 4521 newsize = (d)->size + (n); \ 4522 if (alloc <= newsize) \ 4523 { \ 4524 newalloc = 2*newsize; \ 4525 (d)->alloc = newalloc; \ 4526 (d)->buf = __GMP_REALLOCATE_FUNC_TYPE ((d)->buf, \ 4527 alloc, newalloc, char); \ 4528 } \ 4529 } while (0) 4530 4531__GMP_DECLSPEC int __gmp_asprintf_memory (struct gmp_asprintf_t *, const char *, size_t); 4532__GMP_DECLSPEC int __gmp_asprintf_reps (struct gmp_asprintf_t *, int, int); 4533__GMP_DECLSPEC int __gmp_asprintf_final (struct gmp_asprintf_t *); 4534 4535/* buf is where to write the next output, and size is how much space is left 4536 there. If the application passed size==0 then that's what we'll have 4537 here, and nothing at all should be written. */ 4538struct gmp_snprintf_t { 4539 char *buf; 4540 size_t size; 4541}; 4542 4543/* Add the bytes printed by the call to the total retval, or bail out on an 4544 error. */ 4545#define DOPRNT_ACCUMULATE(call) \ 4546 do { \ 4547 int __ret; \ 4548 __ret = call; \ 4549 if (__ret == -1) \ 4550 goto error; \ 4551 retval += __ret; \ 4552 } while (0) 4553#define DOPRNT_ACCUMULATE_FUN(fun, params) \ 4554 do { \ 4555 ASSERT ((fun) != NULL); \ 4556 DOPRNT_ACCUMULATE ((*(fun)) params); \ 4557 } while (0) 4558 4559#define DOPRNT_FORMAT(fmt, ap) \ 4560 DOPRNT_ACCUMULATE_FUN (funs->format, (data, fmt, ap)) 4561#define DOPRNT_MEMORY(ptr, len) \ 4562 DOPRNT_ACCUMULATE_FUN (funs->memory, (data, ptr, len)) 4563#define DOPRNT_REPS(c, n) \ 4564 DOPRNT_ACCUMULATE_FUN (funs->reps, (data, c, n)) 4565 4566#define DOPRNT_STRING(str) DOPRNT_MEMORY (str, strlen (str)) 4567 4568#define DOPRNT_REPS_MAYBE(c, n) \ 4569 do { \ 4570 if ((n) != 0) \ 4571 DOPRNT_REPS (c, n); \ 4572 } while (0) 4573#define DOPRNT_MEMORY_MAYBE(ptr, len) \ 4574 do { \ 4575 if ((len) != 0) \ 4576 DOPRNT_MEMORY (ptr, len); \ 4577 } while (0) 4578 4579__GMP_DECLSPEC int __gmp_doprnt (const struct doprnt_funs_t *, void *, const char *, va_list); 4580__GMP_DECLSPEC int __gmp_doprnt_integer (const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *); 4581 4582#define __gmp_doprnt_mpf __gmp_doprnt_mpf2 4583__GMP_DECLSPEC int __gmp_doprnt_mpf (const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *, mpf_srcptr); 4584 4585__GMP_DECLSPEC int __gmp_replacement_vsnprintf (char *, size_t, const char *, va_list); 4586#endif /* _GMP_H_HAVE_VA_LIST */ 4587 4588 4589typedef int (*gmp_doscan_scan_t) (void *, const char *, ...); 4590typedef void *(*gmp_doscan_step_t) (void *, int); 4591typedef int (*gmp_doscan_get_t) (void *); 4592typedef int (*gmp_doscan_unget_t) (int, void *); 4593 4594struct gmp_doscan_funs_t { 4595 gmp_doscan_scan_t scan; 4596 gmp_doscan_step_t step; 4597 gmp_doscan_get_t get; 4598 gmp_doscan_unget_t unget; 4599}; 4600extern const struct gmp_doscan_funs_t __gmp_fscanf_funs; 4601extern const struct gmp_doscan_funs_t __gmp_sscanf_funs; 4602 4603#if _GMP_H_HAVE_VA_LIST 4604__GMP_DECLSPEC int __gmp_doscan (const struct gmp_doscan_funs_t *, void *, const char *, va_list); 4605#endif 4606 4607 4608/* For testing and debugging. */ 4609#define MPZ_CHECK_FORMAT(z) \ 4610 do { \ 4611 ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0); \ 4612 ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z)); \ 4613 ASSERT_ALWAYS_MPN (PTR(z), ABSIZ(z)); \ 4614 } while (0) 4615 4616#define MPQ_CHECK_FORMAT(q) \ 4617 do { \ 4618 MPZ_CHECK_FORMAT (mpq_numref (q)); \ 4619 MPZ_CHECK_FORMAT (mpq_denref (q)); \ 4620 ASSERT_ALWAYS (SIZ(mpq_denref(q)) >= 1); \ 4621 \ 4622 if (SIZ(mpq_numref(q)) == 0) \ 4623 { \ 4624 /* should have zero as 0/1 */ \ 4625 ASSERT_ALWAYS (SIZ(mpq_denref(q)) == 1 \ 4626 && PTR(mpq_denref(q))[0] == 1); \ 4627 } \ 4628 else \ 4629 { \ 4630 /* should have no common factors */ \ 4631 mpz_t g; \ 4632 mpz_init (g); \ 4633 mpz_gcd (g, mpq_numref(q), mpq_denref(q)); \ 4634 ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0); \ 4635 mpz_clear (g); \ 4636 } \ 4637 } while (0) 4638 4639#define MPF_CHECK_FORMAT(f) \ 4640 do { \ 4641 ASSERT_ALWAYS (PREC(f) >= __GMPF_BITS_TO_PREC(53)); \ 4642 ASSERT_ALWAYS (ABSIZ(f) <= PREC(f)+1); \ 4643 if (SIZ(f) == 0) \ 4644 ASSERT_ALWAYS (EXP(f) == 0); \ 4645 if (SIZ(f) != 0) \ 4646 ASSERT_ALWAYS (PTR(f)[ABSIZ(f) - 1] != 0); \ 4647 } while (0) 4648 4649 4650/* Enhancement: The "mod" and "gcd_1" functions below could have 4651 __GMP_ATTRIBUTE_PURE, but currently (gcc 3.3) that's not supported on 4652 function pointers, only actual functions. It probably doesn't make much 4653 difference to the gmp code, since hopefully we arrange calls so there's 4654 no great need for the compiler to move things around. */ 4655 4656#if WANT_FAT_BINARY && (HAVE_HOST_CPU_FAMILY_x86 || HAVE_HOST_CPU_FAMILY_x86_64) 4657/* NOTE: The function pointers in this struct are also in CPUVEC_FUNCS_LIST 4658 in mpn/x86/x86-defs.m4 and mpn/x86_64/x86_64-defs.m4. Be sure to update 4659 those when changing here. */ 4660struct cpuvec_t { 4661 DECL_add_n ((*add_n)); 4662 DECL_addlsh1_n ((*addlsh1_n)); 4663 DECL_addlsh2_n ((*addlsh2_n)); 4664 DECL_addmul_1 ((*addmul_1)); 4665 DECL_addmul_2 ((*addmul_2)); 4666 DECL_bdiv_dbm1c ((*bdiv_dbm1c)); 4667 DECL_cnd_add_n ((*cnd_add_n)); 4668 DECL_cnd_sub_n ((*cnd_sub_n)); 4669 DECL_com ((*com)); 4670 DECL_copyd ((*copyd)); 4671 DECL_copyi ((*copyi)); 4672 DECL_divexact_1 ((*divexact_1)); 4673 DECL_divrem_1 ((*divrem_1)); 4674 DECL_gcd_11 ((*gcd_11)); 4675 DECL_lshift ((*lshift)); 4676 DECL_lshiftc ((*lshiftc)); 4677 DECL_mod_1 ((*mod_1)); 4678 DECL_mod_1_1p ((*mod_1_1p)); 4679 DECL_mod_1_1p_cps ((*mod_1_1p_cps)); 4680 DECL_mod_1s_2p ((*mod_1s_2p)); 4681 DECL_mod_1s_2p_cps ((*mod_1s_2p_cps)); 4682 DECL_mod_1s_4p ((*mod_1s_4p)); 4683 DECL_mod_1s_4p_cps ((*mod_1s_4p_cps)); 4684 DECL_mod_34lsub1 ((*mod_34lsub1)); 4685 DECL_modexact_1c_odd ((*modexact_1c_odd)); 4686 DECL_mul_1 ((*mul_1)); 4687 DECL_mul_basecase ((*mul_basecase)); 4688 DECL_mullo_basecase ((*mullo_basecase)); 4689 DECL_preinv_divrem_1 ((*preinv_divrem_1)); 4690 DECL_preinv_mod_1 ((*preinv_mod_1)); 4691 DECL_redc_1 ((*redc_1)); 4692 DECL_redc_2 ((*redc_2)); 4693 DECL_rshift ((*rshift)); 4694 DECL_sqr_basecase ((*sqr_basecase)); 4695 DECL_sub_n ((*sub_n)); 4696 DECL_sublsh1_n ((*sublsh1_n)); 4697 DECL_submul_1 ((*submul_1)); 4698 mp_size_t mul_toom22_threshold; 4699 mp_size_t mul_toom33_threshold; 4700 mp_size_t sqr_toom2_threshold; 4701 mp_size_t sqr_toom3_threshold; 4702 mp_size_t bmod_1_to_mod_1_threshold; 4703}; 4704__GMP_DECLSPEC extern struct cpuvec_t __gmpn_cpuvec; 4705__GMP_DECLSPEC extern int __gmpn_cpuvec_initialized; 4706#endif /* x86 fat binary */ 4707 4708__GMP_DECLSPEC void __gmpn_cpuvec_init (void); 4709 4710/* Get a threshold "field" from __gmpn_cpuvec, running __gmpn_cpuvec_init() 4711 if that hasn't yet been done (to establish the right values). */ 4712#define CPUVEC_THRESHOLD(field) \ 4713 ((LIKELY (__gmpn_cpuvec_initialized) ? 0 : (__gmpn_cpuvec_init (), 0)), \ 4714 __gmpn_cpuvec.field) 4715 4716 4717#if HAVE_NATIVE_mpn_add_nc 4718#define mpn_add_nc __MPN(add_nc) 4719__GMP_DECLSPEC mp_limb_t mpn_add_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 4720#else 4721static inline 4722mp_limb_t 4723mpn_add_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci) 4724{ 4725 mp_limb_t co; 4726 co = mpn_add_n (rp, up, vp, n); 4727 co += mpn_add_1 (rp, rp, n, ci); 4728 return co; 4729} 4730#endif 4731 4732#if HAVE_NATIVE_mpn_sub_nc 4733#define mpn_sub_nc __MPN(sub_nc) 4734__GMP_DECLSPEC mp_limb_t mpn_sub_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t); 4735#else 4736static inline mp_limb_t 4737mpn_sub_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci) 4738{ 4739 mp_limb_t co; 4740 co = mpn_sub_n (rp, up, vp, n); 4741 co += mpn_sub_1 (rp, rp, n, ci); 4742 return co; 4743} 4744#endif 4745 4746#if TUNE_PROGRAM_BUILD 4747/* Some extras wanted when recompiling some .c files for use by the tune 4748 program. Not part of a normal build. 4749 4750 It's necessary to keep these thresholds as #defines (just to an 4751 identically named variable), since various defaults are established based 4752 on #ifdef in the .c files. For some this is not so (the defaults are 4753 instead established above), but all are done this way for consistency. */ 4754 4755#undef MUL_TOOM22_THRESHOLD 4756#define MUL_TOOM22_THRESHOLD mul_toom22_threshold 4757extern mp_size_t mul_toom22_threshold; 4758 4759#undef MUL_TOOM33_THRESHOLD 4760#define MUL_TOOM33_THRESHOLD mul_toom33_threshold 4761extern mp_size_t mul_toom33_threshold; 4762 4763#undef MUL_TOOM44_THRESHOLD 4764#define MUL_TOOM44_THRESHOLD mul_toom44_threshold 4765extern mp_size_t mul_toom44_threshold; 4766 4767#undef MUL_TOOM6H_THRESHOLD 4768#define MUL_TOOM6H_THRESHOLD mul_toom6h_threshold 4769extern mp_size_t mul_toom6h_threshold; 4770 4771#undef MUL_TOOM8H_THRESHOLD 4772#define MUL_TOOM8H_THRESHOLD mul_toom8h_threshold 4773extern mp_size_t mul_toom8h_threshold; 4774 4775#undef MUL_TOOM32_TO_TOOM43_THRESHOLD 4776#define MUL_TOOM32_TO_TOOM43_THRESHOLD mul_toom32_to_toom43_threshold 4777extern mp_size_t mul_toom32_to_toom43_threshold; 4778 4779#undef MUL_TOOM32_TO_TOOM53_THRESHOLD 4780#define MUL_TOOM32_TO_TOOM53_THRESHOLD mul_toom32_to_toom53_threshold 4781extern mp_size_t mul_toom32_to_toom53_threshold; 4782 4783#undef MUL_TOOM42_TO_TOOM53_THRESHOLD 4784#define MUL_TOOM42_TO_TOOM53_THRESHOLD mul_toom42_to_toom53_threshold 4785extern mp_size_t mul_toom42_to_toom53_threshold; 4786 4787#undef MUL_TOOM42_TO_TOOM63_THRESHOLD 4788#define MUL_TOOM42_TO_TOOM63_THRESHOLD mul_toom42_to_toom63_threshold 4789extern mp_size_t mul_toom42_to_toom63_threshold; 4790 4791#undef MUL_TOOM43_TO_TOOM54_THRESHOLD 4792#define MUL_TOOM43_TO_TOOM54_THRESHOLD mul_toom43_to_toom54_threshold; 4793extern mp_size_t mul_toom43_to_toom54_threshold; 4794 4795#undef MUL_FFT_THRESHOLD 4796#define MUL_FFT_THRESHOLD mul_fft_threshold 4797extern mp_size_t mul_fft_threshold; 4798 4799#undef MUL_FFT_MODF_THRESHOLD 4800#define MUL_FFT_MODF_THRESHOLD mul_fft_modf_threshold 4801extern mp_size_t mul_fft_modf_threshold; 4802 4803#undef MUL_FFT_TABLE 4804#define MUL_FFT_TABLE { 0 } 4805 4806#undef MUL_FFT_TABLE3 4807#define MUL_FFT_TABLE3 { {0,0} } 4808 4809/* A native mpn_sqr_basecase is not tuned and SQR_BASECASE_THRESHOLD should 4810 remain as zero (always use it). */ 4811#if ! HAVE_NATIVE_mpn_sqr_basecase 4812#undef SQR_BASECASE_THRESHOLD 4813#define SQR_BASECASE_THRESHOLD sqr_basecase_threshold 4814extern mp_size_t sqr_basecase_threshold; 4815#endif 4816 4817#if TUNE_PROGRAM_BUILD_SQR 4818#undef SQR_TOOM2_THRESHOLD 4819#define SQR_TOOM2_THRESHOLD SQR_TOOM2_MAX_GENERIC 4820#else 4821#undef SQR_TOOM2_THRESHOLD 4822#define SQR_TOOM2_THRESHOLD sqr_toom2_threshold 4823extern mp_size_t sqr_toom2_threshold; 4824#endif 4825 4826#undef SQR_TOOM3_THRESHOLD 4827#define SQR_TOOM3_THRESHOLD sqr_toom3_threshold 4828extern mp_size_t sqr_toom3_threshold; 4829 4830#undef SQR_TOOM4_THRESHOLD 4831#define SQR_TOOM4_THRESHOLD sqr_toom4_threshold 4832extern mp_size_t sqr_toom4_threshold; 4833 4834#undef SQR_TOOM6_THRESHOLD 4835#define SQR_TOOM6_THRESHOLD sqr_toom6_threshold 4836extern mp_size_t sqr_toom6_threshold; 4837 4838#undef SQR_TOOM8_THRESHOLD 4839#define SQR_TOOM8_THRESHOLD sqr_toom8_threshold 4840extern mp_size_t sqr_toom8_threshold; 4841 4842#undef SQR_FFT_THRESHOLD 4843#define SQR_FFT_THRESHOLD sqr_fft_threshold 4844extern mp_size_t sqr_fft_threshold; 4845 4846#undef SQR_FFT_MODF_THRESHOLD 4847#define SQR_FFT_MODF_THRESHOLD sqr_fft_modf_threshold 4848extern mp_size_t sqr_fft_modf_threshold; 4849 4850#undef SQR_FFT_TABLE 4851#define SQR_FFT_TABLE { 0 } 4852 4853#undef SQR_FFT_TABLE3 4854#define SQR_FFT_TABLE3 { {0,0} } 4855 4856#undef MULLO_BASECASE_THRESHOLD 4857#define MULLO_BASECASE_THRESHOLD mullo_basecase_threshold 4858extern mp_size_t mullo_basecase_threshold; 4859 4860#undef MULLO_DC_THRESHOLD 4861#define MULLO_DC_THRESHOLD mullo_dc_threshold 4862extern mp_size_t mullo_dc_threshold; 4863 4864#undef MULLO_MUL_N_THRESHOLD 4865#define MULLO_MUL_N_THRESHOLD mullo_mul_n_threshold 4866extern mp_size_t mullo_mul_n_threshold; 4867 4868#undef SQRLO_BASECASE_THRESHOLD 4869#define SQRLO_BASECASE_THRESHOLD sqrlo_basecase_threshold 4870extern mp_size_t sqrlo_basecase_threshold; 4871 4872#undef SQRLO_DC_THRESHOLD 4873#define SQRLO_DC_THRESHOLD sqrlo_dc_threshold 4874extern mp_size_t sqrlo_dc_threshold; 4875 4876#undef SQRLO_SQR_THRESHOLD 4877#define SQRLO_SQR_THRESHOLD sqrlo_sqr_threshold 4878extern mp_size_t sqrlo_sqr_threshold; 4879 4880#undef MULMID_TOOM42_THRESHOLD 4881#define MULMID_TOOM42_THRESHOLD mulmid_toom42_threshold 4882extern mp_size_t mulmid_toom42_threshold; 4883 4884#undef DIV_QR_2_PI2_THRESHOLD 4885#define DIV_QR_2_PI2_THRESHOLD div_qr_2_pi2_threshold 4886extern mp_size_t div_qr_2_pi2_threshold; 4887 4888#undef DC_DIV_QR_THRESHOLD 4889#define DC_DIV_QR_THRESHOLD dc_div_qr_threshold 4890extern mp_size_t dc_div_qr_threshold; 4891 4892#undef DC_DIVAPPR_Q_THRESHOLD 4893#define DC_DIVAPPR_Q_THRESHOLD dc_divappr_q_threshold 4894extern mp_size_t dc_divappr_q_threshold; 4895 4896#undef DC_BDIV_Q_THRESHOLD 4897#define DC_BDIV_Q_THRESHOLD dc_bdiv_q_threshold 4898extern mp_size_t dc_bdiv_q_threshold; 4899 4900#undef DC_BDIV_QR_THRESHOLD 4901#define DC_BDIV_QR_THRESHOLD dc_bdiv_qr_threshold 4902extern mp_size_t dc_bdiv_qr_threshold; 4903 4904#undef MU_DIV_QR_THRESHOLD 4905#define MU_DIV_QR_THRESHOLD mu_div_qr_threshold 4906extern mp_size_t mu_div_qr_threshold; 4907 4908#undef MU_DIVAPPR_Q_THRESHOLD 4909#define MU_DIVAPPR_Q_THRESHOLD mu_divappr_q_threshold 4910extern mp_size_t mu_divappr_q_threshold; 4911 4912#undef MUPI_DIV_QR_THRESHOLD 4913#define MUPI_DIV_QR_THRESHOLD mupi_div_qr_threshold 4914extern mp_size_t mupi_div_qr_threshold; 4915 4916#undef MU_BDIV_QR_THRESHOLD 4917#define MU_BDIV_QR_THRESHOLD mu_bdiv_qr_threshold 4918extern mp_size_t mu_bdiv_qr_threshold; 4919 4920#undef MU_BDIV_Q_THRESHOLD 4921#define MU_BDIV_Q_THRESHOLD mu_bdiv_q_threshold 4922extern mp_size_t mu_bdiv_q_threshold; 4923 4924#undef INV_MULMOD_BNM1_THRESHOLD 4925#define INV_MULMOD_BNM1_THRESHOLD inv_mulmod_bnm1_threshold 4926extern mp_size_t inv_mulmod_bnm1_threshold; 4927 4928#undef INV_NEWTON_THRESHOLD 4929#define INV_NEWTON_THRESHOLD inv_newton_threshold 4930extern mp_size_t inv_newton_threshold; 4931 4932#undef INV_APPR_THRESHOLD 4933#define INV_APPR_THRESHOLD inv_appr_threshold 4934extern mp_size_t inv_appr_threshold; 4935 4936#undef BINV_NEWTON_THRESHOLD 4937#define BINV_NEWTON_THRESHOLD binv_newton_threshold 4938extern mp_size_t binv_newton_threshold; 4939 4940#undef REDC_1_TO_REDC_2_THRESHOLD 4941#define REDC_1_TO_REDC_2_THRESHOLD redc_1_to_redc_2_threshold 4942extern mp_size_t redc_1_to_redc_2_threshold; 4943 4944#undef REDC_2_TO_REDC_N_THRESHOLD 4945#define REDC_2_TO_REDC_N_THRESHOLD redc_2_to_redc_n_threshold 4946extern mp_size_t redc_2_to_redc_n_threshold; 4947 4948#undef REDC_1_TO_REDC_N_THRESHOLD 4949#define REDC_1_TO_REDC_N_THRESHOLD redc_1_to_redc_n_threshold 4950extern mp_size_t redc_1_to_redc_n_threshold; 4951 4952#undef MATRIX22_STRASSEN_THRESHOLD 4953#define MATRIX22_STRASSEN_THRESHOLD matrix22_strassen_threshold 4954extern mp_size_t matrix22_strassen_threshold; 4955 4956typedef int hgcd2_func_t (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, 4957 struct hgcd_matrix1 *); 4958extern hgcd2_func_t *hgcd2_func; 4959 4960#undef HGCD_THRESHOLD 4961#define HGCD_THRESHOLD hgcd_threshold 4962extern mp_size_t hgcd_threshold; 4963 4964#undef HGCD_APPR_THRESHOLD 4965#define HGCD_APPR_THRESHOLD hgcd_appr_threshold 4966extern mp_size_t hgcd_appr_threshold; 4967 4968#undef HGCD_REDUCE_THRESHOLD 4969#define HGCD_REDUCE_THRESHOLD hgcd_reduce_threshold 4970extern mp_size_t hgcd_reduce_threshold; 4971 4972#undef GCD_DC_THRESHOLD 4973#define GCD_DC_THRESHOLD gcd_dc_threshold 4974extern mp_size_t gcd_dc_threshold; 4975 4976#undef GCDEXT_DC_THRESHOLD 4977#define GCDEXT_DC_THRESHOLD gcdext_dc_threshold 4978extern mp_size_t gcdext_dc_threshold; 4979 4980#undef DIV_QR_1N_PI1_METHOD 4981#define DIV_QR_1N_PI1_METHOD div_qr_1n_pi1_method 4982extern int div_qr_1n_pi1_method; 4983 4984#undef DIV_QR_1_NORM_THRESHOLD 4985#define DIV_QR_1_NORM_THRESHOLD div_qr_1_norm_threshold 4986extern mp_size_t div_qr_1_norm_threshold; 4987 4988#undef DIV_QR_1_UNNORM_THRESHOLD 4989#define DIV_QR_1_UNNORM_THRESHOLD div_qr_1_unnorm_threshold 4990extern mp_size_t div_qr_1_unnorm_threshold; 4991 4992#undef DIVREM_1_NORM_THRESHOLD 4993#define DIVREM_1_NORM_THRESHOLD divrem_1_norm_threshold 4994extern mp_size_t divrem_1_norm_threshold; 4995 4996#undef DIVREM_1_UNNORM_THRESHOLD 4997#define DIVREM_1_UNNORM_THRESHOLD divrem_1_unnorm_threshold 4998extern mp_size_t divrem_1_unnorm_threshold; 4999 5000#undef MOD_1_NORM_THRESHOLD 5001#define MOD_1_NORM_THRESHOLD mod_1_norm_threshold 5002extern mp_size_t mod_1_norm_threshold; 5003 5004#undef MOD_1_UNNORM_THRESHOLD 5005#define MOD_1_UNNORM_THRESHOLD mod_1_unnorm_threshold 5006extern mp_size_t mod_1_unnorm_threshold; 5007 5008#undef MOD_1_1P_METHOD 5009#define MOD_1_1P_METHOD mod_1_1p_method 5010extern int mod_1_1p_method; 5011 5012#undef MOD_1N_TO_MOD_1_1_THRESHOLD 5013#define MOD_1N_TO_MOD_1_1_THRESHOLD mod_1n_to_mod_1_1_threshold 5014extern mp_size_t mod_1n_to_mod_1_1_threshold; 5015 5016#undef MOD_1U_TO_MOD_1_1_THRESHOLD 5017#define MOD_1U_TO_MOD_1_1_THRESHOLD mod_1u_to_mod_1_1_threshold 5018extern mp_size_t mod_1u_to_mod_1_1_threshold; 5019 5020#undef MOD_1_1_TO_MOD_1_2_THRESHOLD 5021#define MOD_1_1_TO_MOD_1_2_THRESHOLD mod_1_1_to_mod_1_2_threshold 5022extern mp_size_t mod_1_1_to_mod_1_2_threshold; 5023 5024#undef MOD_1_2_TO_MOD_1_4_THRESHOLD 5025#define MOD_1_2_TO_MOD_1_4_THRESHOLD mod_1_2_to_mod_1_4_threshold 5026extern mp_size_t mod_1_2_to_mod_1_4_threshold; 5027 5028#undef PREINV_MOD_1_TO_MOD_1_THRESHOLD 5029#define PREINV_MOD_1_TO_MOD_1_THRESHOLD preinv_mod_1_to_mod_1_threshold 5030extern mp_size_t preinv_mod_1_to_mod_1_threshold; 5031 5032#if ! UDIV_PREINV_ALWAYS 5033#undef DIVREM_2_THRESHOLD 5034#define DIVREM_2_THRESHOLD divrem_2_threshold 5035extern mp_size_t divrem_2_threshold; 5036#endif 5037 5038#undef MULMOD_BNM1_THRESHOLD 5039#define MULMOD_BNM1_THRESHOLD mulmod_bnm1_threshold 5040extern mp_size_t mulmod_bnm1_threshold; 5041 5042#undef SQRMOD_BNM1_THRESHOLD 5043#define SQRMOD_BNM1_THRESHOLD sqrmod_bnm1_threshold 5044extern mp_size_t sqrmod_bnm1_threshold; 5045 5046#undef GET_STR_DC_THRESHOLD 5047#define GET_STR_DC_THRESHOLD get_str_dc_threshold 5048extern mp_size_t get_str_dc_threshold; 5049 5050#undef GET_STR_PRECOMPUTE_THRESHOLD 5051#define GET_STR_PRECOMPUTE_THRESHOLD get_str_precompute_threshold 5052extern mp_size_t get_str_precompute_threshold; 5053 5054#undef SET_STR_DC_THRESHOLD 5055#define SET_STR_DC_THRESHOLD set_str_dc_threshold 5056extern mp_size_t set_str_dc_threshold; 5057 5058#undef SET_STR_PRECOMPUTE_THRESHOLD 5059#define SET_STR_PRECOMPUTE_THRESHOLD set_str_precompute_threshold 5060extern mp_size_t set_str_precompute_threshold; 5061 5062#undef FAC_ODD_THRESHOLD 5063#define FAC_ODD_THRESHOLD fac_odd_threshold 5064extern mp_size_t fac_odd_threshold; 5065 5066#undef FAC_DSC_THRESHOLD 5067#define FAC_DSC_THRESHOLD fac_dsc_threshold 5068extern mp_size_t fac_dsc_threshold; 5069 5070#undef FFT_TABLE_ATTRS 5071#define FFT_TABLE_ATTRS 5072extern mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE]; 5073#define FFT_TABLE3_SIZE 2000 /* generous space for tuning */ 5074extern struct fft_table_nk mpn_fft_table3[2][FFT_TABLE3_SIZE]; 5075 5076/* Sizes the tune program tests up to, used in a couple of recompilations. */ 5077#undef MUL_TOOM22_THRESHOLD_LIMIT 5078#undef MUL_TOOM33_THRESHOLD_LIMIT 5079#undef MULLO_BASECASE_THRESHOLD_LIMIT 5080#undef SQRLO_BASECASE_THRESHOLD_LIMIT 5081#undef SQRLO_DC_THRESHOLD_LIMIT 5082#undef SQR_TOOM3_THRESHOLD_LIMIT 5083#define SQR_TOOM2_MAX_GENERIC 200 5084#define MUL_TOOM22_THRESHOLD_LIMIT 700 5085#define MUL_TOOM33_THRESHOLD_LIMIT 700 5086#define SQR_TOOM3_THRESHOLD_LIMIT 400 5087#define MUL_TOOM44_THRESHOLD_LIMIT 1000 5088#define SQR_TOOM4_THRESHOLD_LIMIT 1000 5089#define MUL_TOOM6H_THRESHOLD_LIMIT 1100 5090#define SQR_TOOM6_THRESHOLD_LIMIT 1100 5091#define MUL_TOOM8H_THRESHOLD_LIMIT 1200 5092#define SQR_TOOM8_THRESHOLD_LIMIT 1200 5093#define MULLO_BASECASE_THRESHOLD_LIMIT 200 5094#define SQRLO_BASECASE_THRESHOLD_LIMIT 200 5095#define SQRLO_DC_THRESHOLD_LIMIT 400 5096#define GET_STR_THRESHOLD_LIMIT 150 5097#define FAC_DSC_THRESHOLD_LIMIT 2048 5098 5099#endif /* TUNE_PROGRAM_BUILD */ 5100 5101#if defined (__cplusplus) 5102} 5103#endif 5104 5105/* FIXME: Make these itch functions less conservative. Also consider making 5106 them dependent on just 'an', and compute the allocation directly from 'an' 5107 instead of via n. */ 5108 5109/* toom22/toom2: Scratch need is 2*(an + k), k is the recursion depth. 5110 k is ths smallest k such that 5111 ceil(an/2^k) < MUL_TOOM22_THRESHOLD. 5112 which implies that 5113 k = bitsize of floor ((an-1)/(MUL_TOOM22_THRESHOLD-1)) 5114 = 1 + floor (log_2 (floor ((an-1)/(MUL_TOOM22_THRESHOLD-1)))) 5115*/ 5116#define mpn_toom22_mul_itch(an, bn) \ 5117 (2 * ((an) + GMP_NUMB_BITS)) 5118#define mpn_toom2_sqr_itch(an) \ 5119 (2 * ((an) + GMP_NUMB_BITS)) 5120 5121/* toom33/toom3: Scratch need is 5an/2 + 10k, k is the recursion depth. 5122 We use 3an + C, so that we can use a smaller constant. 5123 */ 5124#define mpn_toom33_mul_itch(an, bn) \ 5125 (3 * (an) + GMP_NUMB_BITS) 5126#define mpn_toom3_sqr_itch(an) \ 5127 (3 * (an) + GMP_NUMB_BITS) 5128 5129/* toom33/toom3: Scratch need is 8an/3 + 13k, k is the recursion depth. 5130 We use 3an + C, so that we can use a smaller constant. 5131 */ 5132#define mpn_toom44_mul_itch(an, bn) \ 5133 (3 * (an) + GMP_NUMB_BITS) 5134#define mpn_toom4_sqr_itch(an) \ 5135 (3 * (an) + GMP_NUMB_BITS) 5136 5137#define mpn_toom6_sqr_itch(n) \ 5138 (((n) - SQR_TOOM6_THRESHOLD)*2 + \ 5139 MAX(SQR_TOOM6_THRESHOLD*2 + GMP_NUMB_BITS*6, \ 5140 mpn_toom4_sqr_itch(SQR_TOOM6_THRESHOLD))) 5141 5142#define MUL_TOOM6H_MIN \ 5143 ((MUL_TOOM6H_THRESHOLD > MUL_TOOM44_THRESHOLD) ? \ 5144 MUL_TOOM6H_THRESHOLD : MUL_TOOM44_THRESHOLD) 5145#define mpn_toom6_mul_n_itch(n) \ 5146 (((n) - MUL_TOOM6H_MIN)*2 + \ 5147 MAX(MUL_TOOM6H_MIN*2 + GMP_NUMB_BITS*6, \ 5148 mpn_toom44_mul_itch(MUL_TOOM6H_MIN,MUL_TOOM6H_MIN))) 5149 5150static inline mp_size_t 5151mpn_toom6h_mul_itch (mp_size_t an, mp_size_t bn) { 5152 mp_size_t estimatedN; 5153 estimatedN = (an + bn) / (size_t) 10 + 1; 5154 return mpn_toom6_mul_n_itch (estimatedN * 6); 5155} 5156 5157#define mpn_toom8_sqr_itch(n) \ 5158 ((((n)*15)>>3) - ((SQR_TOOM8_THRESHOLD*15)>>3) + \ 5159 MAX(((SQR_TOOM8_THRESHOLD*15)>>3) + GMP_NUMB_BITS*6, \ 5160 mpn_toom6_sqr_itch(SQR_TOOM8_THRESHOLD))) 5161 5162#define MUL_TOOM8H_MIN \ 5163 ((MUL_TOOM8H_THRESHOLD > MUL_TOOM6H_MIN) ? \ 5164 MUL_TOOM8H_THRESHOLD : MUL_TOOM6H_MIN) 5165#define mpn_toom8_mul_n_itch(n) \ 5166 ((((n)*15)>>3) - ((MUL_TOOM8H_MIN*15)>>3) + \ 5167 MAX(((MUL_TOOM8H_MIN*15)>>3) + GMP_NUMB_BITS*6, \ 5168 mpn_toom6_mul_n_itch(MUL_TOOM8H_MIN))) 5169 5170static inline mp_size_t 5171mpn_toom8h_mul_itch (mp_size_t an, mp_size_t bn) { 5172 mp_size_t estimatedN; 5173 estimatedN = (an + bn) / (size_t) 14 + 1; 5174 return mpn_toom8_mul_n_itch (estimatedN * 8); 5175} 5176 5177static inline mp_size_t 5178mpn_toom32_mul_itch (mp_size_t an, mp_size_t bn) 5179{ 5180 mp_size_t n = 1 + (2 * an >= 3 * bn ? (an - 1) / (size_t) 3 : (bn - 1) >> 1); 5181 mp_size_t itch = 2 * n + 1; 5182 5183 return itch; 5184} 5185 5186static inline mp_size_t 5187mpn_toom42_mul_itch (mp_size_t an, mp_size_t bn) 5188{ 5189 mp_size_t n = an >= 2 * bn ? (an + 3) >> 2 : (bn + 1) >> 1; 5190 return 6 * n + 3; 5191} 5192 5193static inline mp_size_t 5194mpn_toom43_mul_itch (mp_size_t an, mp_size_t bn) 5195{ 5196 mp_size_t n = 1 + (3 * an >= 4 * bn ? (an - 1) >> 2 : (bn - 1) / (size_t) 3); 5197 5198 return 6*n + 4; 5199} 5200 5201static inline mp_size_t 5202mpn_toom52_mul_itch (mp_size_t an, mp_size_t bn) 5203{ 5204 mp_size_t n = 1 + (2 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) >> 1); 5205 return 6*n + 4; 5206} 5207 5208static inline mp_size_t 5209mpn_toom53_mul_itch (mp_size_t an, mp_size_t bn) 5210{ 5211 mp_size_t n = 1 + (3 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 3); 5212 return 10 * n + 10; 5213} 5214 5215static inline mp_size_t 5216mpn_toom62_mul_itch (mp_size_t an, mp_size_t bn) 5217{ 5218 mp_size_t n = 1 + (an >= 3 * bn ? (an - 1) / (size_t) 6 : (bn - 1) >> 1); 5219 return 10 * n + 10; 5220} 5221 5222static inline mp_size_t 5223mpn_toom63_mul_itch (mp_size_t an, mp_size_t bn) 5224{ 5225 mp_size_t n = 1 + (an >= 2 * bn ? (an - 1) / (size_t) 6 : (bn - 1) / (size_t) 3); 5226 return 9 * n + 3; 5227} 5228 5229static inline mp_size_t 5230mpn_toom54_mul_itch (mp_size_t an, mp_size_t bn) 5231{ 5232 mp_size_t n = 1 + (4 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 4); 5233 return 9 * n + 3; 5234} 5235 5236/* let S(n) = space required for input size n, 5237 then S(n) = 3 floor(n/2) + 1 + S(floor(n/2)). */ 5238#define mpn_toom42_mulmid_itch(n) \ 5239 (3 * (n) + GMP_NUMB_BITS) 5240 5241#if 0 5242#define mpn_fft_mul mpn_mul_fft_full 5243#else 5244#define mpn_fft_mul mpn_nussbaumer_mul 5245#endif 5246 5247#ifdef __cplusplus 5248 5249/* A little helper for a null-terminated __gmp_allocate_func string. 5250 The destructor ensures it's freed even if an exception is thrown. 5251 The len field is needed by the destructor, and can be used by anyone else 5252 to avoid a second strlen pass over the data. 5253 5254 Since our input is a C string, using strlen is correct. Perhaps it'd be 5255 more C++-ish style to use std::char_traits<char>::length, but char_traits 5256 isn't available in gcc 2.95.4. */ 5257 5258class gmp_allocated_string { 5259 public: 5260 char *str; 5261 size_t len; 5262 gmp_allocated_string(char *arg) 5263 { 5264 str = arg; 5265 len = std::strlen (str); 5266 } 5267 ~gmp_allocated_string() 5268 { 5269 (*__gmp_free_func) (str, len+1); 5270 } 5271}; 5272 5273std::istream &__gmpz_operator_in_nowhite (std::istream &, mpz_ptr, char); 5274int __gmp_istream_set_base (std::istream &, char &, bool &, bool &); 5275void __gmp_istream_set_digits (std::string &, std::istream &, char &, bool &, int); 5276void __gmp_doprnt_params_from_ios (struct doprnt_params_t *, std::ios &); 5277std::ostream& __gmp_doprnt_integer_ostream (std::ostream &, struct doprnt_params_t *, char *); 5278extern const struct doprnt_funs_t __gmp_asprintf_funs_noformat; 5279 5280#endif /* __cplusplus */ 5281 5282#endif /* __GMP_IMPL_H__ */ 5283