1/* UltraSPARC 64 support macros. 2 3 THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST 4 CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN 5 FUTURE GNU MP RELEASES. 6 7Copyright 2003 Free Software Foundation, Inc. 8 9This file is part of the GNU MP Library. 10 11The GNU MP Library is free software; you can redistribute it and/or modify 12it under the terms of the GNU Lesser General Public License as published by 13the Free Software Foundation; either version 3 of the License, or (at your 14option) any later version. 15 16The GNU MP Library is distributed in the hope that it will be useful, but 17WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 18or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 19License for more details. 20 21You should have received a copy of the GNU Lesser General Public License 22along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 23 24 25#define LOW32(x) ((x) & 0xFFFFFFFF) 26#define HIGH32(x) ((x) >> 32) 27 28 29/* Halfword number i in src is accessed as src[i+HALF_ENDIAN_ADJ(i)]. 30 Plain src[i] would be incorrect in big endian, HALF_ENDIAN_ADJ has the 31 effect of swapping the two halves in this case. */ 32#if HAVE_LIMB_BIG_ENDIAN 33#define HALF_ENDIAN_ADJ(i) (1 - (((i) & 1) << 1)) /* +1 even, -1 odd */ 34#endif 35#if HAVE_LIMB_LITTLE_ENDIAN 36#define HALF_ENDIAN_ADJ(i) 0 /* no adjust */ 37#endif 38#ifndef HALF_ENDIAN_ADJ 39Error, error, unknown limb endianness; 40#endif 41 42 43/* umul_ppmm_lowequal sets h to the high limb of q*d, assuming the low limb 44 of that product is equal to l. dh and dl are the 32-bit halves of d. 45 46 |-----high----||----low-----| 47 +------+------+ 48 | | ph = qh * dh 49 +------+------+ 50 +------+------+ 51 | | pm1 = ql * dh 52 +------+------+ 53 +------+------+ 54 | | pm2 = qh * dl 55 +------+------+ 56 +------+------+ 57 | | pl = ql * dl (not calculated) 58 +------+------+ 59 60 Knowing that the low 64 bits is equal to l means that LOW(pm1) + LOW(pm2) 61 + HIGH(pl) == HIGH(l). The only thing we need from those product parts 62 is whether they produce a carry into the high. 63 64 pm_l = LOW(pm1)+LOW(pm2) is done to contribute its carry, then the only 65 time there's a further carry from LOW(pm_l)+HIGH(pl) is if LOW(pm_l) > 66 HIGH(l). pl is never actually calculated. */ 67 68#define umul_ppmm_lowequal(h, q, d, dh, dl, l) \ 69 do { \ 70 mp_limb_t ql, qh, ph, pm1, pm2, pm_l; \ 71 ASSERT (dh == HIGH32(d)); \ 72 ASSERT (dl == LOW32(d)); \ 73 ASSERT (q*d == l); \ 74 \ 75 ql = LOW32 (q); \ 76 qh = HIGH32 (q); \ 77 \ 78 pm1 = ql * dh; \ 79 pm2 = qh * dl; \ 80 ph = qh * dh; \ 81 \ 82 pm_l = LOW32 (pm1) + LOW32 (pm2); \ 83 \ 84 (h) = ph + HIGH32 (pm1) + HIGH32 (pm2) \ 85 + HIGH32 (pm_l) + ((pm_l << 32) > l); \ 86 \ 87 ASSERT_HIGH_PRODUCT (h, q, d); \ 88 } while (0) 89 90 91/* Set h to the high of q*d, assuming the low limb of that product is equal 92 to l, and that d fits in 32-bits. 93 94 |-----high----||----low-----| 95 +------+------+ 96 | | pm = qh * dl 97 +------+------+ 98 +------+------+ 99 | | pl = ql * dl (not calculated) 100 +------+------+ 101 102 Knowing that LOW(pm) + HIGH(pl) == HIGH(l) (mod 2^32) means that the only 103 time there's a carry from that sum is when LOW(pm) > HIGH(l). There's no 104 need to calculate pl to determine this. */ 105 106#define umul_ppmm_half_lowequal(h, q, d, l) \ 107 do { \ 108 mp_limb_t pm; \ 109 ASSERT (q*d == l); \ 110 ASSERT (HIGH32(d) == 0); \ 111 \ 112 pm = HIGH32(q) * d; \ 113 (h) = HIGH32(pm) + ((pm << 32) > l); \ 114 ASSERT_HIGH_PRODUCT (h, q, d); \ 115 } while (0) 116 117 118/* check that h is the high limb of x*y */ 119#if WANT_ASSERT 120#define ASSERT_HIGH_PRODUCT(h, x, y) \ 121 do { \ 122 mp_limb_t want_h, dummy; \ 123 umul_ppmm (want_h, dummy, x, y); \ 124 ASSERT (h == want_h); \ 125 } while (0) 126#else 127#define ASSERT_HIGH_PRODUCT(h, q, d) \ 128 do { } while (0) 129#endif 130 131 132/* Count the leading zeros on a limb, but assuming it fits in 32 bits. 133 The count returned will be in the range 32 to 63. 134 This is the 32-bit generic C count_leading_zeros from longlong.h. */ 135#define count_leading_zeros_32(count, x) \ 136 do { \ 137 mp_limb_t __xr = (x); \ 138 unsigned __a; \ 139 ASSERT ((x) != 0); \ 140 ASSERT ((x) <= CNST_LIMB(0xFFFFFFFF)); \ 141 __a = __xr < ((UWtype) 1 << 16) ? (__xr < ((UWtype) 1 << 8) ? 1 : 8 + 1) \ 142 : (__xr < ((UWtype) 1 << 24) ? 16 + 1 : 24 + 1); \ 143 \ 144 (count) = W_TYPE_SIZE + 1 - __a - __clz_tab[__xr >> __a]; \ 145 } while (0) 146 147 148/* Set inv to a 32-bit inverse floor((b*(b-d)-1) / d), knowing that d fits 149 32 bits and is normalized (high bit set). */ 150#define invert_half_limb(inv, d) \ 151 do { \ 152 mp_limb_t _n; \ 153 ASSERT ((d) <= 0xFFFFFFFF); \ 154 ASSERT ((d) & 0x80000000); \ 155 _n = (((mp_limb_t) -(d)) << 32) - 1; \ 156 (inv) = (mp_limb_t) (unsigned) (_n / (d)); \ 157 } while (0) 158 159 160/* Divide nh:nl by d, setting q to the quotient and r to the remainder. 161 q, r, nh and nl are 32-bits each, d_limb is 32-bits but in an mp_limb_t, 162 dinv_limb is similarly a 32-bit inverse but in an mp_limb_t. */ 163 164#define udiv_qrnnd_half_preinv(q, r, nh, nl, d_limb, dinv_limb) \ 165 do { \ 166 unsigned _n2, _n10, _n1, _nadj, _q11n, _xh, _r, _q; \ 167 mp_limb_t _n, _x; \ 168 ASSERT (d_limb <= 0xFFFFFFFF); \ 169 ASSERT (dinv_limb <= 0xFFFFFFFF); \ 170 ASSERT (d_limb & 0x80000000); \ 171 ASSERT (nh < d_limb); \ 172 _n10 = (nl); \ 173 _n2 = (nh); \ 174 _n1 = (int) _n10 >> 31; \ 175 _nadj = _n10 + (_n1 & d_limb); \ 176 _x = dinv_limb * (_n2 - _n1) + _nadj; \ 177 _q11n = ~(_n2 + HIGH32 (_x)); /* -q1-1 */ \ 178 _n = ((mp_limb_t) _n2 << 32) + _n10; \ 179 _x = _n + d_limb * _q11n; /* n-q1*d-d */ \ 180 _xh = HIGH32 (_x) - d_limb; /* high(n-q1*d-d) */ \ 181 ASSERT (_xh == 0 || _xh == ~0); \ 182 _r = _x + (d_limb & _xh); /* addback */ \ 183 _q = _xh - _q11n; /* q1+1-addback */ \ 184 ASSERT (_r < d_limb); \ 185 ASSERT (d_limb * _q + _r == _n); \ 186 (r) = _r; \ 187 (q) = _q; \ 188 } while (0) 189 190 191