1/* mpn_invert_limb -- Invert a normalized limb. 2 3Copyright 1991, 2000, 2001 Free Software Foundation, Inc. 4 5This file is part of the GNU MP Library. 6 7The GNU MP Library is free software; you can redistribute it and/or modify 8it under the terms of the GNU Lesser General Public License as published 9by the Free Software Foundation; either version 3 of the License, or (at 10your option) any later version. 11 12The GNU MP Library is distributed in the hope that it will be useful, but 13WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15License for more details. 16 17You should have received a copy of the GNU Lesser General Public License 18along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 19 20#include "gmp.h" 21#include "gmp-impl.h" 22#include "longlong.h" 23 24/* 25 This is needed to make configure define HAVE_NATIVE_mpn_invert_limb: 26 PROLOGUE(mpn_invert_limb) 27*/ 28 29static const unsigned short int approx_tab[0x100] = 30{ 31 /* 0x400, */ 32 0x3ff, 33 0x3fc, 0x3f8, 0x3f4, 0x3f0, 0x3ec, 0x3e8, 0x3e4, 34 0x3e0, 0x3dd, 0x3d9, 0x3d5, 0x3d2, 0x3ce, 0x3ca, 0x3c7, 35 0x3c3, 0x3c0, 0x3bc, 0x3b9, 0x3b5, 0x3b2, 0x3ae, 0x3ab, 36 0x3a8, 0x3a4, 0x3a1, 0x39e, 0x39b, 0x397, 0x394, 0x391, 37 0x38e, 0x38b, 0x387, 0x384, 0x381, 0x37e, 0x37b, 0x378, 38 0x375, 0x372, 0x36f, 0x36c, 0x369, 0x366, 0x364, 0x361, 39 0x35e, 0x35b, 0x358, 0x355, 0x353, 0x350, 0x34d, 0x34a, 40 0x348, 0x345, 0x342, 0x340, 0x33d, 0x33a, 0x338, 0x335, 41 0x333, 0x330, 0x32e, 0x32b, 0x329, 0x326, 0x324, 0x321, 42 0x31f, 0x31c, 0x31a, 0x317, 0x315, 0x313, 0x310, 0x30e, 43 0x30c, 0x309, 0x307, 0x305, 0x303, 0x300, 0x2fe, 0x2fc, 44 0x2fa, 0x2f7, 0x2f5, 0x2f3, 0x2f1, 0x2ef, 0x2ec, 0x2ea, 45 0x2e8, 0x2e6, 0x2e4, 0x2e2, 0x2e0, 0x2de, 0x2dc, 0x2da, 46 0x2d8, 0x2d6, 0x2d4, 0x2d2, 0x2d0, 0x2ce, 0x2cc, 0x2ca, 47 0x2c8, 0x2c6, 0x2c4, 0x2c2, 0x2c0, 0x2be, 0x2bc, 0x2bb, 48 0x2b9, 0x2b7, 0x2b5, 0x2b3, 0x2b1, 0x2b0, 0x2ae, 0x2ac, 49 0x2aa, 0x2a8, 0x2a7, 0x2a5, 0x2a3, 0x2a1, 0x2a0, 0x29e, 50 0x29c, 0x29b, 0x299, 0x297, 0x295, 0x294, 0x292, 0x291, 51 0x28f, 0x28d, 0x28c, 0x28a, 0x288, 0x287, 0x285, 0x284, 52 0x282, 0x280, 0x27f, 0x27d, 0x27c, 0x27a, 0x279, 0x277, 53 0x276, 0x274, 0x273, 0x271, 0x270, 0x26e, 0x26d, 0x26b, 54 0x26a, 0x268, 0x267, 0x265, 0x264, 0x263, 0x261, 0x260, 55 0x25e, 0x25d, 0x25c, 0x25a, 0x259, 0x257, 0x256, 0x255, 56 0x253, 0x252, 0x251, 0x24f, 0x24e, 0x24d, 0x24b, 0x24a, 57 0x249, 0x247, 0x246, 0x245, 0x243, 0x242, 0x241, 0x240, 58 0x23e, 0x23d, 0x23c, 0x23b, 0x239, 0x238, 0x237, 0x236, 59 0x234, 0x233, 0x232, 0x231, 0x230, 0x22e, 0x22d, 0x22c, 60 0x22b, 0x22a, 0x229, 0x227, 0x226, 0x225, 0x224, 0x223, 61 0x222, 0x220, 0x21f, 0x21e, 0x21d, 0x21c, 0x21b, 0x21a, 62 0x219, 0x218, 0x216, 0x215, 0x214, 0x213, 0x212, 0x211, 63 0x210, 0x20f, 0x20e, 0x20d, 0x20c, 0x20b, 0x20a, 0x209, 64 0x208, 0x207, 0x206, 0x205, 0x204, 0x203, 0x202, 0x201, 65}; 66 67/* iteration: z = 2z-(z**2)d */ 68 69mp_limb_t 70mpn_invert_limb (mp_limb_t d) 71{ 72 mp_limb_t z, z2l, z2h, tl, th; 73 mp_limb_t xh, xl; 74 mp_limb_t zh, zl; 75 76#if GMP_LIMB_BITS == 32 77 z = approx_tab[(d >> 23) - 0x100] << 6; /* z < 2^16 */ 78 79 z2l = z * z; /* z2l < 2^32 */ 80 umul_ppmm (th, tl, z2l, d); 81 z = (z << 17) - (th << 1); 82#endif 83#if GMP_LIMB_BITS == 64 84 z = approx_tab[(d >> 55) - 0x100] << 6; /* z < 2^16 */ 85 86 z2l = z * z; /* z2l < 2^32 */ 87 th = z2l * (d >> 32); /* th < 2^64 */ 88 z = (z << 17) - (th >> 31); /* z < 2^32 */ 89 90 z2l = z * z; 91 umul_ppmm (th, tl, z2l, d); 92 z = (z << 33) - (th << 1); 93#endif 94 95 umul_ppmm (z2h, z2l, z, z); 96 umul_ppmm (th, tl, z2h, d); 97 umul_ppmm (xh, xl, z2l, d); 98 tl += xh; 99 th += tl < xh; 100 th = (th << 2) | (tl >> GMP_LIMB_BITS - 2); 101 tl = tl << 2; 102 sub_ddmmss (zh, zl, z << 2, 0, th, tl); 103 104 umul_ppmm (xh, xl, d, zh); 105 xh += d; /* add_ssaaaa (xh, xl, xh, xl, d, 0); */ 106 if (~xh != 0) 107 { 108 add_ssaaaa (xh, xl, xh, xl, 0, d); 109 zh++; 110 } 111 112 add_ssaaaa (xh, xl, xh, xl, 0, d); 113 if (xh != 0) 114 zh++; 115 116 return zh; 117} 118