1/* UltraSPARC 64 support macros.
2
3   THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY.  THEY'RE ALMOST
4   CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
5   FUTURE GNU MP RELEASES.
6
7Copyright 2003 Free Software Foundation, Inc.
8
9This file is part of the GNU MP Library.
10
11The GNU MP Library is free software; you can redistribute it and/or modify
12it under the terms of the GNU Lesser General Public License as published by
13the Free Software Foundation; either version 3 of the License, or (at your
14option) any later version.
15
16The GNU MP Library is distributed in the hope that it will be useful, but
17WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
19License for more details.
20
21You should have received a copy of the GNU Lesser General Public License
22along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
23
24
25#define LOW32(x)   ((x) & 0xFFFFFFFF)
26#define HIGH32(x)  ((x) >> 32)
27
28
29/* Halfword number i in src is accessed as src[i+HALF_ENDIAN_ADJ(i)].
30   Plain src[i] would be incorrect in big endian, HALF_ENDIAN_ADJ has the
31   effect of swapping the two halves in this case.  */
32#if HAVE_LIMB_BIG_ENDIAN
33#define HALF_ENDIAN_ADJ(i)  (1 - (((i) & 1) << 1))   /* +1 even, -1 odd */
34#endif
35#if HAVE_LIMB_LITTLE_ENDIAN
36#define HALF_ENDIAN_ADJ(i)  0                        /* no adjust */
37#endif
38#ifndef HALF_ENDIAN_ADJ
39Error, error, unknown limb endianness;
40#endif
41
42
43/* umul_ppmm_lowequal sets h to the high limb of q*d, assuming the low limb
44   of that product is equal to l.  dh and dl are the 32-bit halves of d.
45
46   |-----high----||----low-----|
47   +------+------+
48   |             |                 ph = qh * dh
49   +------+------+
50          +------+------+
51          |             |          pm1 = ql * dh
52          +------+------+
53          +------+------+
54          |             |          pm2 = qh * dl
55          +------+------+
56                 +------+------+
57                 |             |   pl = ql * dl (not calculated)
58                 +------+------+
59
60   Knowing that the low 64 bits is equal to l means that LOW(pm1) + LOW(pm2)
61   + HIGH(pl) == HIGH(l).  The only thing we need from those product parts
62   is whether they produce a carry into the high.
63
64   pm_l = LOW(pm1)+LOW(pm2) is done to contribute its carry, then the only
65   time there's a further carry from LOW(pm_l)+HIGH(pl) is if LOW(pm_l) >
66   HIGH(l).  pl is never actually calculated.  */
67
68#define umul_ppmm_lowequal(h, q, d, dh, dl, l)  \
69  do {                                          \
70    mp_limb_t  ql, qh, ph, pm1, pm2, pm_l;      \
71    ASSERT (dh == HIGH32(d));                   \
72    ASSERT (dl == LOW32(d));                    \
73    ASSERT (q*d == l);                          \
74                                                \
75    ql = LOW32 (q);                             \
76    qh = HIGH32 (q);                            \
77                                                \
78    pm1 = ql * dh;                              \
79    pm2 = qh * dl;                              \
80    ph  = qh * dh;                              \
81                                                \
82    pm_l = LOW32 (pm1) + LOW32 (pm2);           \
83                                                \
84    (h) = ph + HIGH32 (pm1) + HIGH32 (pm2)      \
85      + HIGH32 (pm_l) + ((pm_l << 32) > l);     \
86                                                \
87    ASSERT_HIGH_PRODUCT (h, q, d);              \
88  } while (0)
89
90
91/* Set h to the high of q*d, assuming the low limb of that product is equal
92   to l, and that d fits in 32-bits.
93
94   |-----high----||----low-----|
95          +------+------+
96          |             |          pm = qh * dl
97          +------+------+
98                 +------+------+
99                 |             |   pl = ql * dl (not calculated)
100                 +------+------+
101
102   Knowing that LOW(pm) + HIGH(pl) == HIGH(l) (mod 2^32) means that the only
103   time there's a carry from that sum is when LOW(pm) > HIGH(l).  There's no
104   need to calculate pl to determine this.  */
105
106#define umul_ppmm_half_lowequal(h, q, d, l)     \
107  do {                                          \
108    mp_limb_t pm;                               \
109    ASSERT (q*d == l);                          \
110    ASSERT (HIGH32(d) == 0);                    \
111                                                \
112    pm = HIGH32(q) * d;                         \
113    (h) = HIGH32(pm) + ((pm << 32) > l);        \
114    ASSERT_HIGH_PRODUCT (h, q, d);              \
115  } while (0)
116
117
118/* check that h is the high limb of x*y */
119#if WANT_ASSERT
120#define ASSERT_HIGH_PRODUCT(h, x, y)    \
121  do {                                  \
122    mp_limb_t  want_h, dummy;           \
123    umul_ppmm (want_h, dummy, x, y);    \
124    ASSERT (h == want_h);               \
125  } while (0)
126#else
127#define ASSERT_HIGH_PRODUCT(h, q, d)    \
128  do { } while (0)
129#endif
130
131
132/* Count the leading zeros on a limb, but assuming it fits in 32 bits.
133   The count returned will be in the range 32 to 63.
134   This is the 32-bit generic C count_leading_zeros from longlong.h. */
135#define count_leading_zeros_32(count, x)                                      \
136  do {                                                                        \
137    mp_limb_t  __xr = (x);                                                    \
138    unsigned   __a;                                                           \
139    ASSERT ((x) != 0);                                                        \
140    ASSERT ((x) <= CNST_LIMB(0xFFFFFFFF));                                    \
141    __a = __xr < ((UWtype) 1 << 16) ? (__xr < ((UWtype) 1 << 8) ? 1 : 8 + 1)  \
142      : (__xr < ((UWtype) 1 << 24)  ? 16 + 1 : 24 + 1);                       \
143                                                                              \
144    (count) = W_TYPE_SIZE + 1 - __a - __clz_tab[__xr >> __a];                 \
145  } while (0)
146
147
148/* Set inv to a 32-bit inverse floor((b*(b-d)-1) / d), knowing that d fits
149   32 bits and is normalized (high bit set).  */
150#define invert_half_limb(inv, d)                \
151  do {                                          \
152    mp_limb_t  _n;                              \
153    ASSERT ((d) <= 0xFFFFFFFF);                 \
154    ASSERT ((d) & 0x80000000);                  \
155    _n = (((mp_limb_t) -(d)) << 32) - 1;        \
156    (inv) = (mp_limb_t) (unsigned) (_n / (d));  \
157  } while (0)
158
159
160/* Divide nh:nl by d, setting q to the quotient and r to the remainder.
161   q, r, nh and nl are 32-bits each, d_limb is 32-bits but in an mp_limb_t,
162   dinv_limb is similarly a 32-bit inverse but in an mp_limb_t.  */
163
164#define udiv_qrnnd_half_preinv(q, r, nh, nl, d_limb, dinv_limb)         \
165  do {                                                                  \
166    unsigned   _n2, _n10, _n1, _nadj, _q11n, _xh, _r, _q;               \
167    mp_limb_t  _n, _x;                                                  \
168    ASSERT (d_limb <= 0xFFFFFFFF);                                      \
169    ASSERT (dinv_limb <= 0xFFFFFFFF);                                   \
170    ASSERT (d_limb & 0x80000000);                                       \
171    ASSERT (nh < d_limb);                                               \
172    _n10 = (nl);                                                        \
173    _n2 = (nh);                                                         \
174    _n1 = (int) _n10 >> 31;                                             \
175    _nadj = _n10 + (_n1 & d_limb);                                      \
176    _x = dinv_limb * (_n2 - _n1) + _nadj;                               \
177    _q11n = ~(_n2 + HIGH32 (_x));             /* -q1-1 */               \
178    _n = ((mp_limb_t) _n2 << 32) + _n10;                                \
179    _x = _n + d_limb * _q11n;                 /* n-q1*d-d */            \
180    _xh = HIGH32 (_x) - d_limb;               /* high(n-q1*d-d) */      \
181    ASSERT (_xh == 0 || _xh == ~0);                                     \
182    _r = _x + (d_limb & _xh);                 /* addback */             \
183    _q = _xh - _q11n;                         /* q1+1-addback */        \
184    ASSERT (_r < d_limb);                                               \
185    ASSERT (d_limb * _q + _r == _n);                                    \
186    (r) = _r;                                                           \
187    (q) = _q;                                                           \
188  } while (0)
189
190
191