APInt.cpp revision 195340
1//===-- APInt.cpp - Implement APInt class ---------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements a class to represent arbitrary precision integer
11// constant values and provide a variety of arithmetic operations on them.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "apint"
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/FoldingSet.h"
18#include "llvm/ADT/SmallString.h"
19#include "llvm/Support/Debug.h"
20#include "llvm/Support/MathExtras.h"
21#include "llvm/Support/raw_ostream.h"
22#include <cmath>
23#include <limits>
24#include <cstring>
25#include <cstdlib>
26using namespace llvm;
27
28/// A utility function for allocating memory, checking for allocation failures,
29/// and ensuring the contents are zeroed.
30inline static uint64_t* getClearedMemory(unsigned numWords) {
31  uint64_t * result = new uint64_t[numWords];
32  assert(result && "APInt memory allocation fails!");
33  memset(result, 0, numWords * sizeof(uint64_t));
34  return result;
35}
36
37/// A utility function for allocating memory and checking for allocation
38/// failure.  The content is not zeroed.
39inline static uint64_t* getMemory(unsigned numWords) {
40  uint64_t * result = new uint64_t[numWords];
41  assert(result && "APInt memory allocation fails!");
42  return result;
43}
44
45void APInt::initSlowCase(unsigned numBits, uint64_t val, bool isSigned) {
46  pVal = getClearedMemory(getNumWords());
47  pVal[0] = val;
48  if (isSigned && int64_t(val) < 0)
49    for (unsigned i = 1; i < getNumWords(); ++i)
50      pVal[i] = -1ULL;
51}
52
53void APInt::initSlowCase(const APInt& that) {
54  pVal = getMemory(getNumWords());
55  memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE);
56}
57
58
59APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
60  : BitWidth(numBits), VAL(0) {
61  assert(BitWidth && "bitwidth too small");
62  assert(bigVal && "Null pointer detected!");
63  if (isSingleWord())
64    VAL = bigVal[0];
65  else {
66    // Get memory, cleared to 0
67    pVal = getClearedMemory(getNumWords());
68    // Calculate the number of words to copy
69    unsigned words = std::min<unsigned>(numWords, getNumWords());
70    // Copy the words from bigVal to pVal
71    memcpy(pVal, bigVal, words * APINT_WORD_SIZE);
72  }
73  // Make sure unused high bits are cleared
74  clearUnusedBits();
75}
76
77APInt::APInt(unsigned numbits, const char StrStart[], unsigned slen,
78             uint8_t radix)
79  : BitWidth(numbits), VAL(0) {
80  assert(BitWidth && "bitwidth too small");
81  fromString(numbits, StrStart, slen, radix);
82}
83
84APInt& APInt::AssignSlowCase(const APInt& RHS) {
85  // Don't do anything for X = X
86  if (this == &RHS)
87    return *this;
88
89  if (BitWidth == RHS.getBitWidth()) {
90    // assume same bit-width single-word case is already handled
91    assert(!isSingleWord());
92    memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE);
93    return *this;
94  }
95
96  if (isSingleWord()) {
97    // assume case where both are single words is already handled
98    assert(!RHS.isSingleWord());
99    VAL = 0;
100    pVal = getMemory(RHS.getNumWords());
101    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
102  } else if (getNumWords() == RHS.getNumWords())
103    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
104  else if (RHS.isSingleWord()) {
105    delete [] pVal;
106    VAL = RHS.VAL;
107  } else {
108    delete [] pVal;
109    pVal = getMemory(RHS.getNumWords());
110    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
111  }
112  BitWidth = RHS.BitWidth;
113  return clearUnusedBits();
114}
115
116APInt& APInt::operator=(uint64_t RHS) {
117  if (isSingleWord())
118    VAL = RHS;
119  else {
120    pVal[0] = RHS;
121    memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
122  }
123  return clearUnusedBits();
124}
125
126/// Profile - This method 'profiles' an APInt for use with FoldingSet.
127void APInt::Profile(FoldingSetNodeID& ID) const {
128  ID.AddInteger(BitWidth);
129
130  if (isSingleWord()) {
131    ID.AddInteger(VAL);
132    return;
133  }
134
135  unsigned NumWords = getNumWords();
136  for (unsigned i = 0; i < NumWords; ++i)
137    ID.AddInteger(pVal[i]);
138}
139
140/// add_1 - This function adds a single "digit" integer, y, to the multiple
141/// "digit" integer array,  x[]. x[] is modified to reflect the addition and
142/// 1 is returned if there is a carry out, otherwise 0 is returned.
143/// @returns the carry of the addition.
144static bool add_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
145  for (unsigned i = 0; i < len; ++i) {
146    dest[i] = y + x[i];
147    if (dest[i] < y)
148      y = 1; // Carry one to next digit.
149    else {
150      y = 0; // No need to carry so exit early
151      break;
152    }
153  }
154  return y;
155}
156
157/// @brief Prefix increment operator. Increments the APInt by one.
158APInt& APInt::operator++() {
159  if (isSingleWord())
160    ++VAL;
161  else
162    add_1(pVal, pVal, getNumWords(), 1);
163  return clearUnusedBits();
164}
165
166/// sub_1 - This function subtracts a single "digit" (64-bit word), y, from
167/// the multi-digit integer array, x[], propagating the borrowed 1 value until
168/// no further borrowing is neeeded or it runs out of "digits" in x.  The result
169/// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
170/// In other words, if y > x then this function returns 1, otherwise 0.
171/// @returns the borrow out of the subtraction
172static bool sub_1(uint64_t x[], unsigned len, uint64_t y) {
173  for (unsigned i = 0; i < len; ++i) {
174    uint64_t X = x[i];
175    x[i] -= y;
176    if (y > X)
177      y = 1;  // We have to "borrow 1" from next "digit"
178    else {
179      y = 0;  // No need to borrow
180      break;  // Remaining digits are unchanged so exit early
181    }
182  }
183  return bool(y);
184}
185
186/// @brief Prefix decrement operator. Decrements the APInt by one.
187APInt& APInt::operator--() {
188  if (isSingleWord())
189    --VAL;
190  else
191    sub_1(pVal, getNumWords(), 1);
192  return clearUnusedBits();
193}
194
195/// add - This function adds the integer array x to the integer array Y and
196/// places the result in dest.
197/// @returns the carry out from the addition
198/// @brief General addition of 64-bit integer arrays
199static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y,
200                unsigned len) {
201  bool carry = false;
202  for (unsigned i = 0; i< len; ++i) {
203    uint64_t limit = std::min(x[i],y[i]); // must come first in case dest == x
204    dest[i] = x[i] + y[i] + carry;
205    carry = dest[i] < limit || (carry && dest[i] == limit);
206  }
207  return carry;
208}
209
210/// Adds the RHS APint to this APInt.
211/// @returns this, after addition of RHS.
212/// @brief Addition assignment operator.
213APInt& APInt::operator+=(const APInt& RHS) {
214  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
215  if (isSingleWord())
216    VAL += RHS.VAL;
217  else {
218    add(pVal, pVal, RHS.pVal, getNumWords());
219  }
220  return clearUnusedBits();
221}
222
223/// Subtracts the integer array y from the integer array x
224/// @returns returns the borrow out.
225/// @brief Generalized subtraction of 64-bit integer arrays.
226static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y,
227                unsigned len) {
228  bool borrow = false;
229  for (unsigned i = 0; i < len; ++i) {
230    uint64_t x_tmp = borrow ? x[i] - 1 : x[i];
231    borrow = y[i] > x_tmp || (borrow && x[i] == 0);
232    dest[i] = x_tmp - y[i];
233  }
234  return borrow;
235}
236
237/// Subtracts the RHS APInt from this APInt
238/// @returns this, after subtraction
239/// @brief Subtraction assignment operator.
240APInt& APInt::operator-=(const APInt& RHS) {
241  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
242  if (isSingleWord())
243    VAL -= RHS.VAL;
244  else
245    sub(pVal, pVal, RHS.pVal, getNumWords());
246  return clearUnusedBits();
247}
248
249/// Multiplies an integer array, x by a a uint64_t integer and places the result
250/// into dest.
251/// @returns the carry out of the multiplication.
252/// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer.
253static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
254  // Split y into high 32-bit part (hy)  and low 32-bit part (ly)
255  uint64_t ly = y & 0xffffffffULL, hy = y >> 32;
256  uint64_t carry = 0;
257
258  // For each digit of x.
259  for (unsigned i = 0; i < len; ++i) {
260    // Split x into high and low words
261    uint64_t lx = x[i] & 0xffffffffULL;
262    uint64_t hx = x[i] >> 32;
263    // hasCarry - A flag to indicate if there is a carry to the next digit.
264    // hasCarry == 0, no carry
265    // hasCarry == 1, has carry
266    // hasCarry == 2, no carry and the calculation result == 0.
267    uint8_t hasCarry = 0;
268    dest[i] = carry + lx * ly;
269    // Determine if the add above introduces carry.
270    hasCarry = (dest[i] < carry) ? 1 : 0;
271    carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0);
272    // The upper limit of carry can be (2^32 - 1)(2^32 - 1) +
273    // (2^32 - 1) + 2^32 = 2^64.
274    hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
275
276    carry += (lx * hy) & 0xffffffffULL;
277    dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL);
278    carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) +
279            (carry >> 32) + ((lx * hy) >> 32) + hx * hy;
280  }
281  return carry;
282}
283
284/// Multiplies integer array x by integer array y and stores the result into
285/// the integer array dest. Note that dest's size must be >= xlen + ylen.
286/// @brief Generalized multiplicate of integer arrays.
287static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[],
288                unsigned ylen) {
289  dest[xlen] = mul_1(dest, x, xlen, y[0]);
290  for (unsigned i = 1; i < ylen; ++i) {
291    uint64_t ly = y[i] & 0xffffffffULL, hy = y[i] >> 32;
292    uint64_t carry = 0, lx = 0, hx = 0;
293    for (unsigned j = 0; j < xlen; ++j) {
294      lx = x[j] & 0xffffffffULL;
295      hx = x[j] >> 32;
296      // hasCarry - A flag to indicate if has carry.
297      // hasCarry == 0, no carry
298      // hasCarry == 1, has carry
299      // hasCarry == 2, no carry and the calculation result == 0.
300      uint8_t hasCarry = 0;
301      uint64_t resul = carry + lx * ly;
302      hasCarry = (resul < carry) ? 1 : 0;
303      carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + (resul >> 32);
304      hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
305
306      carry += (lx * hy) & 0xffffffffULL;
307      resul = (carry << 32) | (resul & 0xffffffffULL);
308      dest[i+j] += resul;
309      carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+
310              (carry >> 32) + (dest[i+j] < resul ? 1 : 0) +
311              ((lx * hy) >> 32) + hx * hy;
312    }
313    dest[i+xlen] = carry;
314  }
315}
316
317APInt& APInt::operator*=(const APInt& RHS) {
318  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
319  if (isSingleWord()) {
320    VAL *= RHS.VAL;
321    clearUnusedBits();
322    return *this;
323  }
324
325  // Get some bit facts about LHS and check for zero
326  unsigned lhsBits = getActiveBits();
327  unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
328  if (!lhsWords)
329    // 0 * X ===> 0
330    return *this;
331
332  // Get some bit facts about RHS and check for zero
333  unsigned rhsBits = RHS.getActiveBits();
334  unsigned rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
335  if (!rhsWords) {
336    // X * 0 ===> 0
337    clear();
338    return *this;
339  }
340
341  // Allocate space for the result
342  unsigned destWords = rhsWords + lhsWords;
343  uint64_t *dest = getMemory(destWords);
344
345  // Perform the long multiply
346  mul(dest, pVal, lhsWords, RHS.pVal, rhsWords);
347
348  // Copy result back into *this
349  clear();
350  unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords;
351  memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE);
352
353  // delete dest array and return
354  delete[] dest;
355  return *this;
356}
357
358APInt& APInt::operator&=(const APInt& RHS) {
359  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
360  if (isSingleWord()) {
361    VAL &= RHS.VAL;
362    return *this;
363  }
364  unsigned numWords = getNumWords();
365  for (unsigned i = 0; i < numWords; ++i)
366    pVal[i] &= RHS.pVal[i];
367  return *this;
368}
369
370APInt& APInt::operator|=(const APInt& RHS) {
371  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
372  if (isSingleWord()) {
373    VAL |= RHS.VAL;
374    return *this;
375  }
376  unsigned numWords = getNumWords();
377  for (unsigned i = 0; i < numWords; ++i)
378    pVal[i] |= RHS.pVal[i];
379  return *this;
380}
381
382APInt& APInt::operator^=(const APInt& RHS) {
383  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
384  if (isSingleWord()) {
385    VAL ^= RHS.VAL;
386    this->clearUnusedBits();
387    return *this;
388  }
389  unsigned numWords = getNumWords();
390  for (unsigned i = 0; i < numWords; ++i)
391    pVal[i] ^= RHS.pVal[i];
392  return clearUnusedBits();
393}
394
395APInt APInt::AndSlowCase(const APInt& RHS) const {
396  unsigned numWords = getNumWords();
397  uint64_t* val = getMemory(numWords);
398  for (unsigned i = 0; i < numWords; ++i)
399    val[i] = pVal[i] & RHS.pVal[i];
400  return APInt(val, getBitWidth());
401}
402
403APInt APInt::OrSlowCase(const APInt& RHS) const {
404  unsigned numWords = getNumWords();
405  uint64_t *val = getMemory(numWords);
406  for (unsigned i = 0; i < numWords; ++i)
407    val[i] = pVal[i] | RHS.pVal[i];
408  return APInt(val, getBitWidth());
409}
410
411APInt APInt::XorSlowCase(const APInt& RHS) const {
412  unsigned numWords = getNumWords();
413  uint64_t *val = getMemory(numWords);
414  for (unsigned i = 0; i < numWords; ++i)
415    val[i] = pVal[i] ^ RHS.pVal[i];
416
417  // 0^0==1 so clear the high bits in case they got set.
418  return APInt(val, getBitWidth()).clearUnusedBits();
419}
420
421bool APInt::operator !() const {
422  if (isSingleWord())
423    return !VAL;
424
425  for (unsigned i = 0; i < getNumWords(); ++i)
426    if (pVal[i])
427      return false;
428  return true;
429}
430
431APInt APInt::operator*(const APInt& RHS) const {
432  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
433  if (isSingleWord())
434    return APInt(BitWidth, VAL * RHS.VAL);
435  APInt Result(*this);
436  Result *= RHS;
437  return Result.clearUnusedBits();
438}
439
440APInt APInt::operator+(const APInt& RHS) const {
441  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
442  if (isSingleWord())
443    return APInt(BitWidth, VAL + RHS.VAL);
444  APInt Result(BitWidth, 0);
445  add(Result.pVal, this->pVal, RHS.pVal, getNumWords());
446  return Result.clearUnusedBits();
447}
448
449APInt APInt::operator-(const APInt& RHS) const {
450  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
451  if (isSingleWord())
452    return APInt(BitWidth, VAL - RHS.VAL);
453  APInt Result(BitWidth, 0);
454  sub(Result.pVal, this->pVal, RHS.pVal, getNumWords());
455  return Result.clearUnusedBits();
456}
457
458bool APInt::operator[](unsigned bitPosition) const {
459  return (maskBit(bitPosition) &
460          (isSingleWord() ?  VAL : pVal[whichWord(bitPosition)])) != 0;
461}
462
463bool APInt::EqualSlowCase(const APInt& RHS) const {
464  // Get some facts about the number of bits used in the two operands.
465  unsigned n1 = getActiveBits();
466  unsigned n2 = RHS.getActiveBits();
467
468  // If the number of bits isn't the same, they aren't equal
469  if (n1 != n2)
470    return false;
471
472  // If the number of bits fits in a word, we only need to compare the low word.
473  if (n1 <= APINT_BITS_PER_WORD)
474    return pVal[0] == RHS.pVal[0];
475
476  // Otherwise, compare everything
477  for (int i = whichWord(n1 - 1); i >= 0; --i)
478    if (pVal[i] != RHS.pVal[i])
479      return false;
480  return true;
481}
482
483bool APInt::EqualSlowCase(uint64_t Val) const {
484  unsigned n = getActiveBits();
485  if (n <= APINT_BITS_PER_WORD)
486    return pVal[0] == Val;
487  else
488    return false;
489}
490
491bool APInt::ult(const APInt& RHS) const {
492  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
493  if (isSingleWord())
494    return VAL < RHS.VAL;
495
496  // Get active bit length of both operands
497  unsigned n1 = getActiveBits();
498  unsigned n2 = RHS.getActiveBits();
499
500  // If magnitude of LHS is less than RHS, return true.
501  if (n1 < n2)
502    return true;
503
504  // If magnitude of RHS is greather than LHS, return false.
505  if (n2 < n1)
506    return false;
507
508  // If they bot fit in a word, just compare the low order word
509  if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD)
510    return pVal[0] < RHS.pVal[0];
511
512  // Otherwise, compare all words
513  unsigned topWord = whichWord(std::max(n1,n2)-1);
514  for (int i = topWord; i >= 0; --i) {
515    if (pVal[i] > RHS.pVal[i])
516      return false;
517    if (pVal[i] < RHS.pVal[i])
518      return true;
519  }
520  return false;
521}
522
523bool APInt::slt(const APInt& RHS) const {
524  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
525  if (isSingleWord()) {
526    int64_t lhsSext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth);
527    int64_t rhsSext = (int64_t(RHS.VAL) << (64-BitWidth)) >> (64-BitWidth);
528    return lhsSext < rhsSext;
529  }
530
531  APInt lhs(*this);
532  APInt rhs(RHS);
533  bool lhsNeg = isNegative();
534  bool rhsNeg = rhs.isNegative();
535  if (lhsNeg) {
536    // Sign bit is set so perform two's complement to make it positive
537    lhs.flip();
538    lhs++;
539  }
540  if (rhsNeg) {
541    // Sign bit is set so perform two's complement to make it positive
542    rhs.flip();
543    rhs++;
544  }
545
546  // Now we have unsigned values to compare so do the comparison if necessary
547  // based on the negativeness of the values.
548  if (lhsNeg)
549    if (rhsNeg)
550      return lhs.ugt(rhs);
551    else
552      return true;
553  else if (rhsNeg)
554    return false;
555  else
556    return lhs.ult(rhs);
557}
558
559APInt& APInt::set(unsigned bitPosition) {
560  if (isSingleWord())
561    VAL |= maskBit(bitPosition);
562  else
563    pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
564  return *this;
565}
566
567/// Set the given bit to 0 whose position is given as "bitPosition".
568/// @brief Set a given bit to 0.
569APInt& APInt::clear(unsigned bitPosition) {
570  if (isSingleWord())
571    VAL &= ~maskBit(bitPosition);
572  else
573    pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
574  return *this;
575}
576
577/// @brief Toggle every bit to its opposite value.
578
579/// Toggle a given bit to its opposite value whose position is given
580/// as "bitPosition".
581/// @brief Toggles a given bit to its opposite value.
582APInt& APInt::flip(unsigned bitPosition) {
583  assert(bitPosition < BitWidth && "Out of the bit-width range!");
584  if ((*this)[bitPosition]) clear(bitPosition);
585  else set(bitPosition);
586  return *this;
587}
588
589unsigned APInt::getBitsNeeded(const char* str, unsigned slen, uint8_t radix) {
590  assert(str != 0 && "Invalid value string");
591  assert(slen > 0 && "Invalid string length");
592
593  // Each computation below needs to know if its negative
594  unsigned isNegative = str[0] == '-';
595  if (isNegative) {
596    slen--;
597    str++;
598  }
599  // For radixes of power-of-two values, the bits required is accurately and
600  // easily computed
601  if (radix == 2)
602    return slen + isNegative;
603  if (radix == 8)
604    return slen * 3 + isNegative;
605  if (radix == 16)
606    return slen * 4 + isNegative;
607
608  // Otherwise it must be radix == 10, the hard case
609  assert(radix == 10 && "Invalid radix");
610
611  // This is grossly inefficient but accurate. We could probably do something
612  // with a computation of roughly slen*64/20 and then adjust by the value of
613  // the first few digits. But, I'm not sure how accurate that could be.
614
615  // Compute a sufficient number of bits that is always large enough but might
616  // be too large. This avoids the assertion in the constructor.
617  unsigned sufficient = slen*64/18;
618
619  // Convert to the actual binary value.
620  APInt tmp(sufficient, str, slen, radix);
621
622  // Compute how many bits are required.
623  return isNegative + tmp.logBase2() + 1;
624}
625
626// From http://www.burtleburtle.net, byBob Jenkins.
627// When targeting x86, both GCC and LLVM seem to recognize this as a
628// rotate instruction.
629#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
630
631// From http://www.burtleburtle.net, by Bob Jenkins.
632#define mix(a,b,c) \
633  { \
634    a -= c;  a ^= rot(c, 4);  c += b; \
635    b -= a;  b ^= rot(a, 6);  a += c; \
636    c -= b;  c ^= rot(b, 8);  b += a; \
637    a -= c;  a ^= rot(c,16);  c += b; \
638    b -= a;  b ^= rot(a,19);  a += c; \
639    c -= b;  c ^= rot(b, 4);  b += a; \
640  }
641
642// From http://www.burtleburtle.net, by Bob Jenkins.
643#define final(a,b,c) \
644  { \
645    c ^= b; c -= rot(b,14); \
646    a ^= c; a -= rot(c,11); \
647    b ^= a; b -= rot(a,25); \
648    c ^= b; c -= rot(b,16); \
649    a ^= c; a -= rot(c,4);  \
650    b ^= a; b -= rot(a,14); \
651    c ^= b; c -= rot(b,24); \
652  }
653
654// hashword() was adapted from http://www.burtleburtle.net, by Bob
655// Jenkins.  k is a pointer to an array of uint32_t values; length is
656// the length of the key, in 32-bit chunks.  This version only handles
657// keys that are a multiple of 32 bits in size.
658static inline uint32_t hashword(const uint64_t *k64, size_t length)
659{
660  const uint32_t *k = reinterpret_cast<const uint32_t *>(k64);
661  uint32_t a,b,c;
662
663  /* Set up the internal state */
664  a = b = c = 0xdeadbeef + (((uint32_t)length)<<2);
665
666  /*------------------------------------------------- handle most of the key */
667  while (length > 3)
668    {
669      a += k[0];
670      b += k[1];
671      c += k[2];
672      mix(a,b,c);
673      length -= 3;
674      k += 3;
675    }
676
677  /*------------------------------------------- handle the last 3 uint32_t's */
678  switch (length) {                  /* all the case statements fall through */
679  case 3 : c+=k[2];
680  case 2 : b+=k[1];
681  case 1 : a+=k[0];
682    final(a,b,c);
683    case 0:     /* case 0: nothing left to add */
684      break;
685    }
686  /*------------------------------------------------------ report the result */
687  return c;
688}
689
690// hashword8() was adapted from http://www.burtleburtle.net, by Bob
691// Jenkins.  This computes a 32-bit hash from one 64-bit word.  When
692// targeting x86 (32 or 64 bit), both LLVM and GCC compile this
693// function into about 35 instructions when inlined.
694static inline uint32_t hashword8(const uint64_t k64)
695{
696  uint32_t a,b,c;
697  a = b = c = 0xdeadbeef + 4;
698  b += k64 >> 32;
699  a += k64 & 0xffffffff;
700  final(a,b,c);
701  return c;
702}
703#undef final
704#undef mix
705#undef rot
706
707uint64_t APInt::getHashValue() const {
708  uint64_t hash;
709  if (isSingleWord())
710    hash = hashword8(VAL);
711  else
712    hash = hashword(pVal, getNumWords()*2);
713  return hash;
714}
715
716/// HiBits - This function returns the high "numBits" bits of this APInt.
717APInt APInt::getHiBits(unsigned numBits) const {
718  return APIntOps::lshr(*this, BitWidth - numBits);
719}
720
721/// LoBits - This function returns the low "numBits" bits of this APInt.
722APInt APInt::getLoBits(unsigned numBits) const {
723  return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits),
724                        BitWidth - numBits);
725}
726
727bool APInt::isPowerOf2() const {
728  return (!!*this) && !(*this & (*this - APInt(BitWidth,1)));
729}
730
731unsigned APInt::countLeadingZerosSlowCase() const {
732  unsigned Count = 0;
733  for (unsigned i = getNumWords(); i > 0u; --i) {
734    if (pVal[i-1] == 0)
735      Count += APINT_BITS_PER_WORD;
736    else {
737      Count += CountLeadingZeros_64(pVal[i-1]);
738      break;
739    }
740  }
741  unsigned remainder = BitWidth % APINT_BITS_PER_WORD;
742  if (remainder)
743    Count -= APINT_BITS_PER_WORD - remainder;
744  return std::min(Count, BitWidth);
745}
746
747static unsigned countLeadingOnes_64(uint64_t V, unsigned skip) {
748  unsigned Count = 0;
749  if (skip)
750    V <<= skip;
751  while (V && (V & (1ULL << 63))) {
752    Count++;
753    V <<= 1;
754  }
755  return Count;
756}
757
758unsigned APInt::countLeadingOnes() const {
759  if (isSingleWord())
760    return countLeadingOnes_64(VAL, APINT_BITS_PER_WORD - BitWidth);
761
762  unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
763  unsigned shift;
764  if (!highWordBits) {
765    highWordBits = APINT_BITS_PER_WORD;
766    shift = 0;
767  } else {
768    shift = APINT_BITS_PER_WORD - highWordBits;
769  }
770  int i = getNumWords() - 1;
771  unsigned Count = countLeadingOnes_64(pVal[i], shift);
772  if (Count == highWordBits) {
773    for (i--; i >= 0; --i) {
774      if (pVal[i] == -1ULL)
775        Count += APINT_BITS_PER_WORD;
776      else {
777        Count += countLeadingOnes_64(pVal[i], 0);
778        break;
779      }
780    }
781  }
782  return Count;
783}
784
785unsigned APInt::countTrailingZeros() const {
786  if (isSingleWord())
787    return std::min(unsigned(CountTrailingZeros_64(VAL)), BitWidth);
788  unsigned Count = 0;
789  unsigned i = 0;
790  for (; i < getNumWords() && pVal[i] == 0; ++i)
791    Count += APINT_BITS_PER_WORD;
792  if (i < getNumWords())
793    Count += CountTrailingZeros_64(pVal[i]);
794  return std::min(Count, BitWidth);
795}
796
797unsigned APInt::countTrailingOnesSlowCase() const {
798  unsigned Count = 0;
799  unsigned i = 0;
800  for (; i < getNumWords() && pVal[i] == -1ULL; ++i)
801    Count += APINT_BITS_PER_WORD;
802  if (i < getNumWords())
803    Count += CountTrailingOnes_64(pVal[i]);
804  return std::min(Count, BitWidth);
805}
806
807unsigned APInt::countPopulationSlowCase() const {
808  unsigned Count = 0;
809  for (unsigned i = 0; i < getNumWords(); ++i)
810    Count += CountPopulation_64(pVal[i]);
811  return Count;
812}
813
814APInt APInt::byteSwap() const {
815  assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
816  if (BitWidth == 16)
817    return APInt(BitWidth, ByteSwap_16(uint16_t(VAL)));
818  else if (BitWidth == 32)
819    return APInt(BitWidth, ByteSwap_32(unsigned(VAL)));
820  else if (BitWidth == 48) {
821    unsigned Tmp1 = unsigned(VAL >> 16);
822    Tmp1 = ByteSwap_32(Tmp1);
823    uint16_t Tmp2 = uint16_t(VAL);
824    Tmp2 = ByteSwap_16(Tmp2);
825    return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
826  } else if (BitWidth == 64)
827    return APInt(BitWidth, ByteSwap_64(VAL));
828  else {
829    APInt Result(BitWidth, 0);
830    char *pByte = (char*)Result.pVal;
831    for (unsigned i = 0; i < BitWidth / APINT_WORD_SIZE / 2; ++i) {
832      char Tmp = pByte[i];
833      pByte[i] = pByte[BitWidth / APINT_WORD_SIZE - 1 - i];
834      pByte[BitWidth / APINT_WORD_SIZE - i - 1] = Tmp;
835    }
836    return Result;
837  }
838}
839
840APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1,
841                                            const APInt& API2) {
842  APInt A = API1, B = API2;
843  while (!!B) {
844    APInt T = B;
845    B = APIntOps::urem(A, B);
846    A = T;
847  }
848  return A;
849}
850
851APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
852  union {
853    double D;
854    uint64_t I;
855  } T;
856  T.D = Double;
857
858  // Get the sign bit from the highest order bit
859  bool isNeg = T.I >> 63;
860
861  // Get the 11-bit exponent and adjust for the 1023 bit bias
862  int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
863
864  // If the exponent is negative, the value is < 0 so just return 0.
865  if (exp < 0)
866    return APInt(width, 0u);
867
868  // Extract the mantissa by clearing the top 12 bits (sign + exponent).
869  uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
870
871  // If the exponent doesn't shift all bits out of the mantissa
872  if (exp < 52)
873    return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
874                    APInt(width, mantissa >> (52 - exp));
875
876  // If the client didn't provide enough bits for us to shift the mantissa into
877  // then the result is undefined, just return 0
878  if (width <= exp - 52)
879    return APInt(width, 0);
880
881  // Otherwise, we have to shift the mantissa bits up to the right location
882  APInt Tmp(width, mantissa);
883  Tmp = Tmp.shl((unsigned)exp - 52);
884  return isNeg ? -Tmp : Tmp;
885}
886
887/// RoundToDouble - This function convert this APInt to a double.
888/// The layout for double is as following (IEEE Standard 754):
889///  --------------------------------------
890/// |  Sign    Exponent    Fraction    Bias |
891/// |-------------------------------------- |
892/// |  1[63]   11[62-52]   52[51-00]   1023 |
893///  --------------------------------------
894double APInt::roundToDouble(bool isSigned) const {
895
896  // Handle the simple case where the value is contained in one uint64_t.
897  if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
898    if (isSigned) {
899      int64_t sext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth);
900      return double(sext);
901    } else
902      return double(VAL);
903  }
904
905  // Determine if the value is negative.
906  bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
907
908  // Construct the absolute value if we're negative.
909  APInt Tmp(isNeg ? -(*this) : (*this));
910
911  // Figure out how many bits we're using.
912  unsigned n = Tmp.getActiveBits();
913
914  // The exponent (without bias normalization) is just the number of bits
915  // we are using. Note that the sign bit is gone since we constructed the
916  // absolute value.
917  uint64_t exp = n;
918
919  // Return infinity for exponent overflow
920  if (exp > 1023) {
921    if (!isSigned || !isNeg)
922      return std::numeric_limits<double>::infinity();
923    else
924      return -std::numeric_limits<double>::infinity();
925  }
926  exp += 1023; // Increment for 1023 bias
927
928  // Number of bits in mantissa is 52. To obtain the mantissa value, we must
929  // extract the high 52 bits from the correct words in pVal.
930  uint64_t mantissa;
931  unsigned hiWord = whichWord(n-1);
932  if (hiWord == 0) {
933    mantissa = Tmp.pVal[0];
934    if (n > 52)
935      mantissa >>= n - 52; // shift down, we want the top 52 bits.
936  } else {
937    assert(hiWord > 0 && "huh?");
938    uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
939    uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
940    mantissa = hibits | lobits;
941  }
942
943  // The leading bit of mantissa is implicit, so get rid of it.
944  uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
945  union {
946    double D;
947    uint64_t I;
948  } T;
949  T.I = sign | (exp << 52) | mantissa;
950  return T.D;
951}
952
953// Truncate to new width.
954APInt &APInt::trunc(unsigned width) {
955  assert(width < BitWidth && "Invalid APInt Truncate request");
956  assert(width && "Can't truncate to 0 bits");
957  unsigned wordsBefore = getNumWords();
958  BitWidth = width;
959  unsigned wordsAfter = getNumWords();
960  if (wordsBefore != wordsAfter) {
961    if (wordsAfter == 1) {
962      uint64_t *tmp = pVal;
963      VAL = pVal[0];
964      delete [] tmp;
965    } else {
966      uint64_t *newVal = getClearedMemory(wordsAfter);
967      for (unsigned i = 0; i < wordsAfter; ++i)
968        newVal[i] = pVal[i];
969      delete [] pVal;
970      pVal = newVal;
971    }
972  }
973  return clearUnusedBits();
974}
975
976// Sign extend to a new width.
977APInt &APInt::sext(unsigned width) {
978  assert(width > BitWidth && "Invalid APInt SignExtend request");
979  // If the sign bit isn't set, this is the same as zext.
980  if (!isNegative()) {
981    zext(width);
982    return *this;
983  }
984
985  // The sign bit is set. First, get some facts
986  unsigned wordsBefore = getNumWords();
987  unsigned wordBits = BitWidth % APINT_BITS_PER_WORD;
988  BitWidth = width;
989  unsigned wordsAfter = getNumWords();
990
991  // Mask the high order word appropriately
992  if (wordsBefore == wordsAfter) {
993    unsigned newWordBits = width % APINT_BITS_PER_WORD;
994    // The extension is contained to the wordsBefore-1th word.
995    uint64_t mask = ~0ULL;
996    if (newWordBits)
997      mask >>= APINT_BITS_PER_WORD - newWordBits;
998    mask <<= wordBits;
999    if (wordsBefore == 1)
1000      VAL |= mask;
1001    else
1002      pVal[wordsBefore-1] |= mask;
1003    return clearUnusedBits();
1004  }
1005
1006  uint64_t mask = wordBits == 0 ? 0 : ~0ULL << wordBits;
1007  uint64_t *newVal = getMemory(wordsAfter);
1008  if (wordsBefore == 1)
1009    newVal[0] = VAL | mask;
1010  else {
1011    for (unsigned i = 0; i < wordsBefore; ++i)
1012      newVal[i] = pVal[i];
1013    newVal[wordsBefore-1] |= mask;
1014  }
1015  for (unsigned i = wordsBefore; i < wordsAfter; i++)
1016    newVal[i] = -1ULL;
1017  if (wordsBefore != 1)
1018    delete [] pVal;
1019  pVal = newVal;
1020  return clearUnusedBits();
1021}
1022
1023//  Zero extend to a new width.
1024APInt &APInt::zext(unsigned width) {
1025  assert(width > BitWidth && "Invalid APInt ZeroExtend request");
1026  unsigned wordsBefore = getNumWords();
1027  BitWidth = width;
1028  unsigned wordsAfter = getNumWords();
1029  if (wordsBefore != wordsAfter) {
1030    uint64_t *newVal = getClearedMemory(wordsAfter);
1031    if (wordsBefore == 1)
1032      newVal[0] = VAL;
1033    else
1034      for (unsigned i = 0; i < wordsBefore; ++i)
1035        newVal[i] = pVal[i];
1036    if (wordsBefore != 1)
1037      delete [] pVal;
1038    pVal = newVal;
1039  }
1040  return *this;
1041}
1042
1043APInt &APInt::zextOrTrunc(unsigned width) {
1044  if (BitWidth < width)
1045    return zext(width);
1046  if (BitWidth > width)
1047    return trunc(width);
1048  return *this;
1049}
1050
1051APInt &APInt::sextOrTrunc(unsigned width) {
1052  if (BitWidth < width)
1053    return sext(width);
1054  if (BitWidth > width)
1055    return trunc(width);
1056  return *this;
1057}
1058
1059/// Arithmetic right-shift this APInt by shiftAmt.
1060/// @brief Arithmetic right-shift function.
1061APInt APInt::ashr(const APInt &shiftAmt) const {
1062  return ashr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1063}
1064
1065/// Arithmetic right-shift this APInt by shiftAmt.
1066/// @brief Arithmetic right-shift function.
1067APInt APInt::ashr(unsigned shiftAmt) const {
1068  assert(shiftAmt <= BitWidth && "Invalid shift amount");
1069  // Handle a degenerate case
1070  if (shiftAmt == 0)
1071    return *this;
1072
1073  // Handle single word shifts with built-in ashr
1074  if (isSingleWord()) {
1075    if (shiftAmt == BitWidth)
1076      return APInt(BitWidth, 0); // undefined
1077    else {
1078      unsigned SignBit = APINT_BITS_PER_WORD - BitWidth;
1079      return APInt(BitWidth,
1080        (((int64_t(VAL) << SignBit) >> SignBit) >> shiftAmt));
1081    }
1082  }
1083
1084  // If all the bits were shifted out, the result is, technically, undefined.
1085  // We return -1 if it was negative, 0 otherwise. We check this early to avoid
1086  // issues in the algorithm below.
1087  if (shiftAmt == BitWidth) {
1088    if (isNegative())
1089      return APInt(BitWidth, -1ULL, true);
1090    else
1091      return APInt(BitWidth, 0);
1092  }
1093
1094  // Create some space for the result.
1095  uint64_t * val = new uint64_t[getNumWords()];
1096
1097  // Compute some values needed by the following shift algorithms
1098  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word
1099  unsigned offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift
1100  unsigned breakWord = getNumWords() - 1 - offset; // last word affected
1101  unsigned bitsInWord = whichBit(BitWidth); // how many bits in last word?
1102  if (bitsInWord == 0)
1103    bitsInWord = APINT_BITS_PER_WORD;
1104
1105  // If we are shifting whole words, just move whole words
1106  if (wordShift == 0) {
1107    // Move the words containing significant bits
1108    for (unsigned i = 0; i <= breakWord; ++i)
1109      val[i] = pVal[i+offset]; // move whole word
1110
1111    // Adjust the top significant word for sign bit fill, if negative
1112    if (isNegative())
1113      if (bitsInWord < APINT_BITS_PER_WORD)
1114        val[breakWord] |= ~0ULL << bitsInWord; // set high bits
1115  } else {
1116    // Shift the low order words
1117    for (unsigned i = 0; i < breakWord; ++i) {
1118      // This combines the shifted corresponding word with the low bits from
1119      // the next word (shifted into this word's high bits).
1120      val[i] = (pVal[i+offset] >> wordShift) |
1121               (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1122    }
1123
1124    // Shift the break word. In this case there are no bits from the next word
1125    // to include in this word.
1126    val[breakWord] = pVal[breakWord+offset] >> wordShift;
1127
1128    // Deal with sign extenstion in the break word, and possibly the word before
1129    // it.
1130    if (isNegative()) {
1131      if (wordShift > bitsInWord) {
1132        if (breakWord > 0)
1133          val[breakWord-1] |=
1134            ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord));
1135        val[breakWord] |= ~0ULL;
1136      } else
1137        val[breakWord] |= (~0ULL << (bitsInWord - wordShift));
1138    }
1139  }
1140
1141  // Remaining words are 0 or -1, just assign them.
1142  uint64_t fillValue = (isNegative() ? -1ULL : 0);
1143  for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1144    val[i] = fillValue;
1145  return APInt(val, BitWidth).clearUnusedBits();
1146}
1147
1148/// Logical right-shift this APInt by shiftAmt.
1149/// @brief Logical right-shift function.
1150APInt APInt::lshr(const APInt &shiftAmt) const {
1151  return lshr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1152}
1153
1154/// Logical right-shift this APInt by shiftAmt.
1155/// @brief Logical right-shift function.
1156APInt APInt::lshr(unsigned shiftAmt) const {
1157  if (isSingleWord()) {
1158    if (shiftAmt == BitWidth)
1159      return APInt(BitWidth, 0);
1160    else
1161      return APInt(BitWidth, this->VAL >> shiftAmt);
1162  }
1163
1164  // If all the bits were shifted out, the result is 0. This avoids issues
1165  // with shifting by the size of the integer type, which produces undefined
1166  // results. We define these "undefined results" to always be 0.
1167  if (shiftAmt == BitWidth)
1168    return APInt(BitWidth, 0);
1169
1170  // If none of the bits are shifted out, the result is *this. This avoids
1171  // issues with shifting by the size of the integer type, which produces
1172  // undefined results in the code below. This is also an optimization.
1173  if (shiftAmt == 0)
1174    return *this;
1175
1176  // Create some space for the result.
1177  uint64_t * val = new uint64_t[getNumWords()];
1178
1179  // If we are shifting less than a word, compute the shift with a simple carry
1180  if (shiftAmt < APINT_BITS_PER_WORD) {
1181    uint64_t carry = 0;
1182    for (int i = getNumWords()-1; i >= 0; --i) {
1183      val[i] = (pVal[i] >> shiftAmt) | carry;
1184      carry = pVal[i] << (APINT_BITS_PER_WORD - shiftAmt);
1185    }
1186    return APInt(val, BitWidth).clearUnusedBits();
1187  }
1188
1189  // Compute some values needed by the remaining shift algorithms
1190  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
1191  unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
1192
1193  // If we are shifting whole words, just move whole words
1194  if (wordShift == 0) {
1195    for (unsigned i = 0; i < getNumWords() - offset; ++i)
1196      val[i] = pVal[i+offset];
1197    for (unsigned i = getNumWords()-offset; i < getNumWords(); i++)
1198      val[i] = 0;
1199    return APInt(val,BitWidth).clearUnusedBits();
1200  }
1201
1202  // Shift the low order words
1203  unsigned breakWord = getNumWords() - offset -1;
1204  for (unsigned i = 0; i < breakWord; ++i)
1205    val[i] = (pVal[i+offset] >> wordShift) |
1206             (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1207  // Shift the break word.
1208  val[breakWord] = pVal[breakWord+offset] >> wordShift;
1209
1210  // Remaining words are 0
1211  for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1212    val[i] = 0;
1213  return APInt(val, BitWidth).clearUnusedBits();
1214}
1215
1216/// Left-shift this APInt by shiftAmt.
1217/// @brief Left-shift function.
1218APInt APInt::shl(const APInt &shiftAmt) const {
1219  // It's undefined behavior in C to shift by BitWidth or greater.
1220  return shl((unsigned)shiftAmt.getLimitedValue(BitWidth));
1221}
1222
1223APInt APInt::shlSlowCase(unsigned shiftAmt) const {
1224  // If all the bits were shifted out, the result is 0. This avoids issues
1225  // with shifting by the size of the integer type, which produces undefined
1226  // results. We define these "undefined results" to always be 0.
1227  if (shiftAmt == BitWidth)
1228    return APInt(BitWidth, 0);
1229
1230  // If none of the bits are shifted out, the result is *this. This avoids a
1231  // lshr by the words size in the loop below which can produce incorrect
1232  // results. It also avoids the expensive computation below for a common case.
1233  if (shiftAmt == 0)
1234    return *this;
1235
1236  // Create some space for the result.
1237  uint64_t * val = new uint64_t[getNumWords()];
1238
1239  // If we are shifting less than a word, do it the easy way
1240  if (shiftAmt < APINT_BITS_PER_WORD) {
1241    uint64_t carry = 0;
1242    for (unsigned i = 0; i < getNumWords(); i++) {
1243      val[i] = pVal[i] << shiftAmt | carry;
1244      carry = pVal[i] >> (APINT_BITS_PER_WORD - shiftAmt);
1245    }
1246    return APInt(val, BitWidth).clearUnusedBits();
1247  }
1248
1249  // Compute some values needed by the remaining shift algorithms
1250  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
1251  unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
1252
1253  // If we are shifting whole words, just move whole words
1254  if (wordShift == 0) {
1255    for (unsigned i = 0; i < offset; i++)
1256      val[i] = 0;
1257    for (unsigned i = offset; i < getNumWords(); i++)
1258      val[i] = pVal[i-offset];
1259    return APInt(val,BitWidth).clearUnusedBits();
1260  }
1261
1262  // Copy whole words from this to Result.
1263  unsigned i = getNumWords() - 1;
1264  for (; i > offset; --i)
1265    val[i] = pVal[i-offset] << wordShift |
1266             pVal[i-offset-1] >> (APINT_BITS_PER_WORD - wordShift);
1267  val[offset] = pVal[0] << wordShift;
1268  for (i = 0; i < offset; ++i)
1269    val[i] = 0;
1270  return APInt(val, BitWidth).clearUnusedBits();
1271}
1272
1273APInt APInt::rotl(const APInt &rotateAmt) const {
1274  return rotl((unsigned)rotateAmt.getLimitedValue(BitWidth));
1275}
1276
1277APInt APInt::rotl(unsigned rotateAmt) const {
1278  if (rotateAmt == 0)
1279    return *this;
1280  // Don't get too fancy, just use existing shift/or facilities
1281  APInt hi(*this);
1282  APInt lo(*this);
1283  hi.shl(rotateAmt);
1284  lo.lshr(BitWidth - rotateAmt);
1285  return hi | lo;
1286}
1287
1288APInt APInt::rotr(const APInt &rotateAmt) const {
1289  return rotr((unsigned)rotateAmt.getLimitedValue(BitWidth));
1290}
1291
1292APInt APInt::rotr(unsigned rotateAmt) const {
1293  if (rotateAmt == 0)
1294    return *this;
1295  // Don't get too fancy, just use existing shift/or facilities
1296  APInt hi(*this);
1297  APInt lo(*this);
1298  lo.lshr(rotateAmt);
1299  hi.shl(BitWidth - rotateAmt);
1300  return hi | lo;
1301}
1302
1303// Square Root - this method computes and returns the square root of "this".
1304// Three mechanisms are used for computation. For small values (<= 5 bits),
1305// a table lookup is done. This gets some performance for common cases. For
1306// values using less than 52 bits, the value is converted to double and then
1307// the libc sqrt function is called. The result is rounded and then converted
1308// back to a uint64_t which is then used to construct the result. Finally,
1309// the Babylonian method for computing square roots is used.
1310APInt APInt::sqrt() const {
1311
1312  // Determine the magnitude of the value.
1313  unsigned magnitude = getActiveBits();
1314
1315  // Use a fast table for some small values. This also gets rid of some
1316  // rounding errors in libc sqrt for small values.
1317  if (magnitude <= 5) {
1318    static const uint8_t results[32] = {
1319      /*     0 */ 0,
1320      /*  1- 2 */ 1, 1,
1321      /*  3- 6 */ 2, 2, 2, 2,
1322      /*  7-12 */ 3, 3, 3, 3, 3, 3,
1323      /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1324      /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1325      /*    31 */ 6
1326    };
1327    return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]);
1328  }
1329
1330  // If the magnitude of the value fits in less than 52 bits (the precision of
1331  // an IEEE double precision floating point value), then we can use the
1332  // libc sqrt function which will probably use a hardware sqrt computation.
1333  // This should be faster than the algorithm below.
1334  if (magnitude < 52) {
1335#ifdef _MSC_VER
1336    // Amazingly, VC++ doesn't have round().
1337    return APInt(BitWidth,
1338                 uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5);
1339#else
1340    return APInt(BitWidth,
1341                 uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
1342#endif
1343  }
1344
1345  // Okay, all the short cuts are exhausted. We must compute it. The following
1346  // is a classical Babylonian method for computing the square root. This code
1347  // was adapted to APINt from a wikipedia article on such computations.
1348  // See http://www.wikipedia.org/ and go to the page named
1349  // Calculate_an_integer_square_root.
1350  unsigned nbits = BitWidth, i = 4;
1351  APInt testy(BitWidth, 16);
1352  APInt x_old(BitWidth, 1);
1353  APInt x_new(BitWidth, 0);
1354  APInt two(BitWidth, 2);
1355
1356  // Select a good starting value using binary logarithms.
1357  for (;; i += 2, testy = testy.shl(2))
1358    if (i >= nbits || this->ule(testy)) {
1359      x_old = x_old.shl(i / 2);
1360      break;
1361    }
1362
1363  // Use the Babylonian method to arrive at the integer square root:
1364  for (;;) {
1365    x_new = (this->udiv(x_old) + x_old).udiv(two);
1366    if (x_old.ule(x_new))
1367      break;
1368    x_old = x_new;
1369  }
1370
1371  // Make sure we return the closest approximation
1372  // NOTE: The rounding calculation below is correct. It will produce an
1373  // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1374  // determined to be a rounding issue with pari/gp as it begins to use a
1375  // floating point representation after 192 bits. There are no discrepancies
1376  // between this algorithm and pari/gp for bit widths < 192 bits.
1377  APInt square(x_old * x_old);
1378  APInt nextSquare((x_old + 1) * (x_old +1));
1379  if (this->ult(square))
1380    return x_old;
1381  else if (this->ule(nextSquare)) {
1382    APInt midpoint((nextSquare - square).udiv(two));
1383    APInt offset(*this - square);
1384    if (offset.ult(midpoint))
1385      return x_old;
1386    else
1387      return x_old + 1;
1388  } else
1389    assert(0 && "Error in APInt::sqrt computation");
1390  return x_old + 1;
1391}
1392
1393/// Computes the multiplicative inverse of this APInt for a given modulo. The
1394/// iterative extended Euclidean algorithm is used to solve for this value,
1395/// however we simplify it to speed up calculating only the inverse, and take
1396/// advantage of div+rem calculations. We also use some tricks to avoid copying
1397/// (potentially large) APInts around.
1398APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1399  assert(ult(modulo) && "This APInt must be smaller than the modulo");
1400
1401  // Using the properties listed at the following web page (accessed 06/21/08):
1402  //   http://www.numbertheory.org/php/euclid.html
1403  // (especially the properties numbered 3, 4 and 9) it can be proved that
1404  // BitWidth bits suffice for all the computations in the algorithm implemented
1405  // below. More precisely, this number of bits suffice if the multiplicative
1406  // inverse exists, but may not suffice for the general extended Euclidean
1407  // algorithm.
1408
1409  APInt r[2] = { modulo, *this };
1410  APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1411  APInt q(BitWidth, 0);
1412
1413  unsigned i;
1414  for (i = 0; r[i^1] != 0; i ^= 1) {
1415    // An overview of the math without the confusing bit-flipping:
1416    // q = r[i-2] / r[i-1]
1417    // r[i] = r[i-2] % r[i-1]
1418    // t[i] = t[i-2] - t[i-1] * q
1419    udivrem(r[i], r[i^1], q, r[i]);
1420    t[i] -= t[i^1] * q;
1421  }
1422
1423  // If this APInt and the modulo are not coprime, there is no multiplicative
1424  // inverse, so return 0. We check this by looking at the next-to-last
1425  // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1426  // algorithm.
1427  if (r[i] != 1)
1428    return APInt(BitWidth, 0);
1429
1430  // The next-to-last t is the multiplicative inverse.  However, we are
1431  // interested in a positive inverse. Calcuate a positive one from a negative
1432  // one if necessary. A simple addition of the modulo suffices because
1433  // abs(t[i]) is known to be less than *this/2 (see the link above).
1434  return t[i].isNegative() ? t[i] + modulo : t[i];
1435}
1436
1437/// Calculate the magic numbers required to implement a signed integer division
1438/// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1439/// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1440/// Warren, Jr., chapter 10.
1441APInt::ms APInt::magic() const {
1442  const APInt& d = *this;
1443  unsigned p;
1444  APInt ad, anc, delta, q1, r1, q2, r2, t;
1445  APInt allOnes = APInt::getAllOnesValue(d.getBitWidth());
1446  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1447  APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1448  struct ms mag;
1449
1450  ad = d.abs();
1451  t = signedMin + (d.lshr(d.getBitWidth() - 1));
1452  anc = t - 1 - t.urem(ad);   // absolute value of nc
1453  p = d.getBitWidth() - 1;    // initialize p
1454  q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1455  r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1456  q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1457  r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1458  do {
1459    p = p + 1;
1460    q1 = q1<<1;          // update q1 = 2p/abs(nc)
1461    r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1462    if (r1.uge(anc)) {  // must be unsigned comparison
1463      q1 = q1 + 1;
1464      r1 = r1 - anc;
1465    }
1466    q2 = q2<<1;          // update q2 = 2p/abs(d)
1467    r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1468    if (r2.uge(ad)) {   // must be unsigned comparison
1469      q2 = q2 + 1;
1470      r2 = r2 - ad;
1471    }
1472    delta = ad - r2;
1473  } while (q1.ule(delta) || (q1 == delta && r1 == 0));
1474
1475  mag.m = q2 + 1;
1476  if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1477  mag.s = p - d.getBitWidth();          // resulting shift
1478  return mag;
1479}
1480
1481/// Calculate the magic numbers required to implement an unsigned integer
1482/// division by a constant as a sequence of multiplies, adds and shifts.
1483/// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1484/// S. Warren, Jr., chapter 10.
1485APInt::mu APInt::magicu() const {
1486  const APInt& d = *this;
1487  unsigned p;
1488  APInt nc, delta, q1, r1, q2, r2;
1489  struct mu magu;
1490  magu.a = 0;               // initialize "add" indicator
1491  APInt allOnes = APInt::getAllOnesValue(d.getBitWidth());
1492  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1493  APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1494
1495  nc = allOnes - (-d).urem(d);
1496  p = d.getBitWidth() - 1;  // initialize p
1497  q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1498  r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1499  q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1500  r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1501  do {
1502    p = p + 1;
1503    if (r1.uge(nc - r1)) {
1504      q1 = q1 + q1 + 1;  // update q1
1505      r1 = r1 + r1 - nc; // update r1
1506    }
1507    else {
1508      q1 = q1+q1; // update q1
1509      r1 = r1+r1; // update r1
1510    }
1511    if ((r2 + 1).uge(d - r2)) {
1512      if (q2.uge(signedMax)) magu.a = 1;
1513      q2 = q2+q2 + 1;     // update q2
1514      r2 = r2+r2 + 1 - d; // update r2
1515    }
1516    else {
1517      if (q2.uge(signedMin)) magu.a = 1;
1518      q2 = q2+q2;     // update q2
1519      r2 = r2+r2 + 1; // update r2
1520    }
1521    delta = d - 1 - r2;
1522  } while (p < d.getBitWidth()*2 &&
1523           (q1.ult(delta) || (q1 == delta && r1 == 0)));
1524  magu.m = q2 + 1; // resulting magic number
1525  magu.s = p - d.getBitWidth();  // resulting shift
1526  return magu;
1527}
1528
1529/// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1530/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1531/// variables here have the same names as in the algorithm. Comments explain
1532/// the algorithm and any deviation from it.
1533static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
1534                     unsigned m, unsigned n) {
1535  assert(u && "Must provide dividend");
1536  assert(v && "Must provide divisor");
1537  assert(q && "Must provide quotient");
1538  assert(u != v && u != q && v != q && "Must us different memory");
1539  assert(n>1 && "n must be > 1");
1540
1541  // Knuth uses the value b as the base of the number system. In our case b
1542  // is 2^31 so we just set it to -1u.
1543  uint64_t b = uint64_t(1) << 32;
1544
1545#if 0
1546  DEBUG(cerr << "KnuthDiv: m=" << m << " n=" << n << '\n');
1547  DEBUG(cerr << "KnuthDiv: original:");
1548  DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << std::setbase(16) << u[i]);
1549  DEBUG(cerr << " by");
1550  DEBUG(for (int i = n; i >0; i--) cerr << " " << std::setbase(16) << v[i-1]);
1551  DEBUG(cerr << '\n');
1552#endif
1553  // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1554  // u and v by d. Note that we have taken Knuth's advice here to use a power
1555  // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1556  // 2 allows us to shift instead of multiply and it is easy to determine the
1557  // shift amount from the leading zeros.  We are basically normalizing the u
1558  // and v so that its high bits are shifted to the top of v's range without
1559  // overflow. Note that this can require an extra word in u so that u must
1560  // be of length m+n+1.
1561  unsigned shift = CountLeadingZeros_32(v[n-1]);
1562  unsigned v_carry = 0;
1563  unsigned u_carry = 0;
1564  if (shift) {
1565    for (unsigned i = 0; i < m+n; ++i) {
1566      unsigned u_tmp = u[i] >> (32 - shift);
1567      u[i] = (u[i] << shift) | u_carry;
1568      u_carry = u_tmp;
1569    }
1570    for (unsigned i = 0; i < n; ++i) {
1571      unsigned v_tmp = v[i] >> (32 - shift);
1572      v[i] = (v[i] << shift) | v_carry;
1573      v_carry = v_tmp;
1574    }
1575  }
1576  u[m+n] = u_carry;
1577#if 0
1578  DEBUG(cerr << "KnuthDiv:   normal:");
1579  DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << std::setbase(16) << u[i]);
1580  DEBUG(cerr << " by");
1581  DEBUG(for (int i = n; i >0; i--) cerr << " " << std::setbase(16) << v[i-1]);
1582  DEBUG(cerr << '\n');
1583#endif
1584
1585  // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1586  int j = m;
1587  do {
1588    DEBUG(cerr << "KnuthDiv: quotient digit #" << j << '\n');
1589    // D3. [Calculate q'.].
1590    //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1591    //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1592    // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1593    // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test
1594    // on v[n-2] determines at high speed most of the cases in which the trial
1595    // value qp is one too large, and it eliminates all cases where qp is two
1596    // too large.
1597    uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]);
1598    DEBUG(cerr << "KnuthDiv: dividend == " << dividend << '\n');
1599    uint64_t qp = dividend / v[n-1];
1600    uint64_t rp = dividend % v[n-1];
1601    if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1602      qp--;
1603      rp += v[n-1];
1604      if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1605        qp--;
1606    }
1607    DEBUG(cerr << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1608
1609    // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1610    // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1611    // consists of a simple multiplication by a one-place number, combined with
1612    // a subtraction.
1613    bool isNeg = false;
1614    for (unsigned i = 0; i < n; ++i) {
1615      uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32);
1616      uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]);
1617      bool borrow = subtrahend > u_tmp;
1618      DEBUG(cerr << "KnuthDiv: u_tmp == " << u_tmp
1619                 << ", subtrahend == " << subtrahend
1620                 << ", borrow = " << borrow << '\n');
1621
1622      uint64_t result = u_tmp - subtrahend;
1623      unsigned k = j + i;
1624      u[k++] = (unsigned)(result & (b-1)); // subtract low word
1625      u[k++] = (unsigned)(result >> 32);   // subtract high word
1626      while (borrow && k <= m+n) { // deal with borrow to the left
1627        borrow = u[k] == 0;
1628        u[k]--;
1629        k++;
1630      }
1631      isNeg |= borrow;
1632      DEBUG(cerr << "KnuthDiv: u[j+i] == " << u[j+i] << ",  u[j+i+1] == " <<
1633                    u[j+i+1] << '\n');
1634    }
1635    DEBUG(cerr << "KnuthDiv: after subtraction:");
1636    DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << u[i]);
1637    DEBUG(cerr << '\n');
1638    // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1639    // this step is actually negative, (u[j+n]...u[j]) should be left as the
1640    // true value plus b**(n+1), namely as the b's complement of
1641    // the true value, and a "borrow" to the left should be remembered.
1642    //
1643    if (isNeg) {
1644      bool carry = true;  // true because b's complement is "complement + 1"
1645      for (unsigned i = 0; i <= m+n; ++i) {
1646        u[i] = ~u[i] + carry; // b's complement
1647        carry = carry && u[i] == 0;
1648      }
1649    }
1650    DEBUG(cerr << "KnuthDiv: after complement:");
1651    DEBUG(for (int i = m+n; i >=0; i--) cerr << " " << u[i]);
1652    DEBUG(cerr << '\n');
1653
1654    // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1655    // negative, go to step D6; otherwise go on to step D7.
1656    q[j] = (unsigned)qp;
1657    if (isNeg) {
1658      // D6. [Add back]. The probability that this step is necessary is very
1659      // small, on the order of only 2/b. Make sure that test data accounts for
1660      // this possibility. Decrease q[j] by 1
1661      q[j]--;
1662      // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1663      // A carry will occur to the left of u[j+n], and it should be ignored
1664      // since it cancels with the borrow that occurred in D4.
1665      bool carry = false;
1666      for (unsigned i = 0; i < n; i++) {
1667        unsigned limit = std::min(u[j+i],v[i]);
1668        u[j+i] += v[i] + carry;
1669        carry = u[j+i] < limit || (carry && u[j+i] == limit);
1670      }
1671      u[j+n] += carry;
1672    }
1673    DEBUG(cerr << "KnuthDiv: after correction:");
1674    DEBUG(for (int i = m+n; i >=0; i--) cerr <<" " << u[i]);
1675    DEBUG(cerr << "\nKnuthDiv: digit result = " << q[j] << '\n');
1676
1677  // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1678  } while (--j >= 0);
1679
1680  DEBUG(cerr << "KnuthDiv: quotient:");
1681  DEBUG(for (int i = m; i >=0; i--) cerr <<" " << q[i]);
1682  DEBUG(cerr << '\n');
1683
1684  // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1685  // remainder may be obtained by dividing u[...] by d. If r is non-null we
1686  // compute the remainder (urem uses this).
1687  if (r) {
1688    // The value d is expressed by the "shift" value above since we avoided
1689    // multiplication by d by using a shift left. So, all we have to do is
1690    // shift right here. In order to mak
1691    if (shift) {
1692      unsigned carry = 0;
1693      DEBUG(cerr << "KnuthDiv: remainder:");
1694      for (int i = n-1; i >= 0; i--) {
1695        r[i] = (u[i] >> shift) | carry;
1696        carry = u[i] << (32 - shift);
1697        DEBUG(cerr << " " << r[i]);
1698      }
1699    } else {
1700      for (int i = n-1; i >= 0; i--) {
1701        r[i] = u[i];
1702        DEBUG(cerr << " " << r[i]);
1703      }
1704    }
1705    DEBUG(cerr << '\n');
1706  }
1707#if 0
1708  DEBUG(cerr << std::setbase(10) << '\n');
1709#endif
1710}
1711
1712void APInt::divide(const APInt LHS, unsigned lhsWords,
1713                   const APInt &RHS, unsigned rhsWords,
1714                   APInt *Quotient, APInt *Remainder)
1715{
1716  assert(lhsWords >= rhsWords && "Fractional result");
1717
1718  // First, compose the values into an array of 32-bit words instead of
1719  // 64-bit words. This is a necessity of both the "short division" algorithm
1720  // and the the Knuth "classical algorithm" which requires there to be native
1721  // operations for +, -, and * on an m bit value with an m*2 bit result. We
1722  // can't use 64-bit operands here because we don't have native results of
1723  // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1724  // work on large-endian machines.
1725  uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT);
1726  unsigned n = rhsWords * 2;
1727  unsigned m = (lhsWords * 2) - n;
1728
1729  // Allocate space for the temporary values we need either on the stack, if
1730  // it will fit, or on the heap if it won't.
1731  unsigned SPACE[128];
1732  unsigned *U = 0;
1733  unsigned *V = 0;
1734  unsigned *Q = 0;
1735  unsigned *R = 0;
1736  if ((Remainder?4:3)*n+2*m+1 <= 128) {
1737    U = &SPACE[0];
1738    V = &SPACE[m+n+1];
1739    Q = &SPACE[(m+n+1) + n];
1740    if (Remainder)
1741      R = &SPACE[(m+n+1) + n + (m+n)];
1742  } else {
1743    U = new unsigned[m + n + 1];
1744    V = new unsigned[n];
1745    Q = new unsigned[m+n];
1746    if (Remainder)
1747      R = new unsigned[n];
1748  }
1749
1750  // Initialize the dividend
1751  memset(U, 0, (m+n+1)*sizeof(unsigned));
1752  for (unsigned i = 0; i < lhsWords; ++i) {
1753    uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]);
1754    U[i * 2] = (unsigned)(tmp & mask);
1755    U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1756  }
1757  U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1758
1759  // Initialize the divisor
1760  memset(V, 0, (n)*sizeof(unsigned));
1761  for (unsigned i = 0; i < rhsWords; ++i) {
1762    uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]);
1763    V[i * 2] = (unsigned)(tmp & mask);
1764    V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1765  }
1766
1767  // initialize the quotient and remainder
1768  memset(Q, 0, (m+n) * sizeof(unsigned));
1769  if (Remainder)
1770    memset(R, 0, n * sizeof(unsigned));
1771
1772  // Now, adjust m and n for the Knuth division. n is the number of words in
1773  // the divisor. m is the number of words by which the dividend exceeds the
1774  // divisor (i.e. m+n is the length of the dividend). These sizes must not
1775  // contain any zero words or the Knuth algorithm fails.
1776  for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1777    n--;
1778    m++;
1779  }
1780  for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1781    m--;
1782
1783  // If we're left with only a single word for the divisor, Knuth doesn't work
1784  // so we implement the short division algorithm here. This is much simpler
1785  // and faster because we are certain that we can divide a 64-bit quantity
1786  // by a 32-bit quantity at hardware speed and short division is simply a
1787  // series of such operations. This is just like doing short division but we
1788  // are using base 2^32 instead of base 10.
1789  assert(n != 0 && "Divide by zero?");
1790  if (n == 1) {
1791    unsigned divisor = V[0];
1792    unsigned remainder = 0;
1793    for (int i = m+n-1; i >= 0; i--) {
1794      uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i];
1795      if (partial_dividend == 0) {
1796        Q[i] = 0;
1797        remainder = 0;
1798      } else if (partial_dividend < divisor) {
1799        Q[i] = 0;
1800        remainder = (unsigned)partial_dividend;
1801      } else if (partial_dividend == divisor) {
1802        Q[i] = 1;
1803        remainder = 0;
1804      } else {
1805        Q[i] = (unsigned)(partial_dividend / divisor);
1806        remainder = (unsigned)(partial_dividend - (Q[i] * divisor));
1807      }
1808    }
1809    if (R)
1810      R[0] = remainder;
1811  } else {
1812    // Now we're ready to invoke the Knuth classical divide algorithm. In this
1813    // case n > 1.
1814    KnuthDiv(U, V, Q, R, m, n);
1815  }
1816
1817  // If the caller wants the quotient
1818  if (Quotient) {
1819    // Set up the Quotient value's memory.
1820    if (Quotient->BitWidth != LHS.BitWidth) {
1821      if (Quotient->isSingleWord())
1822        Quotient->VAL = 0;
1823      else
1824        delete [] Quotient->pVal;
1825      Quotient->BitWidth = LHS.BitWidth;
1826      if (!Quotient->isSingleWord())
1827        Quotient->pVal = getClearedMemory(Quotient->getNumWords());
1828    } else
1829      Quotient->clear();
1830
1831    // The quotient is in Q. Reconstitute the quotient into Quotient's low
1832    // order words.
1833    if (lhsWords == 1) {
1834      uint64_t tmp =
1835        uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
1836      if (Quotient->isSingleWord())
1837        Quotient->VAL = tmp;
1838      else
1839        Quotient->pVal[0] = tmp;
1840    } else {
1841      assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
1842      for (unsigned i = 0; i < lhsWords; ++i)
1843        Quotient->pVal[i] =
1844          uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1845    }
1846  }
1847
1848  // If the caller wants the remainder
1849  if (Remainder) {
1850    // Set up the Remainder value's memory.
1851    if (Remainder->BitWidth != RHS.BitWidth) {
1852      if (Remainder->isSingleWord())
1853        Remainder->VAL = 0;
1854      else
1855        delete [] Remainder->pVal;
1856      Remainder->BitWidth = RHS.BitWidth;
1857      if (!Remainder->isSingleWord())
1858        Remainder->pVal = getClearedMemory(Remainder->getNumWords());
1859    } else
1860      Remainder->clear();
1861
1862    // The remainder is in R. Reconstitute the remainder into Remainder's low
1863    // order words.
1864    if (rhsWords == 1) {
1865      uint64_t tmp =
1866        uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2));
1867      if (Remainder->isSingleWord())
1868        Remainder->VAL = tmp;
1869      else
1870        Remainder->pVal[0] = tmp;
1871    } else {
1872      assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
1873      for (unsigned i = 0; i < rhsWords; ++i)
1874        Remainder->pVal[i] =
1875          uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1876    }
1877  }
1878
1879  // Clean up the memory we allocated.
1880  if (U != &SPACE[0]) {
1881    delete [] U;
1882    delete [] V;
1883    delete [] Q;
1884    delete [] R;
1885  }
1886}
1887
1888APInt APInt::udiv(const APInt& RHS) const {
1889  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1890
1891  // First, deal with the easy case
1892  if (isSingleWord()) {
1893    assert(RHS.VAL != 0 && "Divide by zero?");
1894    return APInt(BitWidth, VAL / RHS.VAL);
1895  }
1896
1897  // Get some facts about the LHS and RHS number of bits and words
1898  unsigned rhsBits = RHS.getActiveBits();
1899  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1900  assert(rhsWords && "Divided by zero???");
1901  unsigned lhsBits = this->getActiveBits();
1902  unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1903
1904  // Deal with some degenerate cases
1905  if (!lhsWords)
1906    // 0 / X ===> 0
1907    return APInt(BitWidth, 0);
1908  else if (lhsWords < rhsWords || this->ult(RHS)) {
1909    // X / Y ===> 0, iff X < Y
1910    return APInt(BitWidth, 0);
1911  } else if (*this == RHS) {
1912    // X / X ===> 1
1913    return APInt(BitWidth, 1);
1914  } else if (lhsWords == 1 && rhsWords == 1) {
1915    // All high words are zero, just use native divide
1916    return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]);
1917  }
1918
1919  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1920  APInt Quotient(1,0); // to hold result.
1921  divide(*this, lhsWords, RHS, rhsWords, &Quotient, 0);
1922  return Quotient;
1923}
1924
1925APInt APInt::urem(const APInt& RHS) const {
1926  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1927  if (isSingleWord()) {
1928    assert(RHS.VAL != 0 && "Remainder by zero?");
1929    return APInt(BitWidth, VAL % RHS.VAL);
1930  }
1931
1932  // Get some facts about the LHS
1933  unsigned lhsBits = getActiveBits();
1934  unsigned lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
1935
1936  // Get some facts about the RHS
1937  unsigned rhsBits = RHS.getActiveBits();
1938  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1939  assert(rhsWords && "Performing remainder operation by zero ???");
1940
1941  // Check the degenerate cases
1942  if (lhsWords == 0) {
1943    // 0 % Y ===> 0
1944    return APInt(BitWidth, 0);
1945  } else if (lhsWords < rhsWords || this->ult(RHS)) {
1946    // X % Y ===> X, iff X < Y
1947    return *this;
1948  } else if (*this == RHS) {
1949    // X % X == 0;
1950    return APInt(BitWidth, 0);
1951  } else if (lhsWords == 1) {
1952    // All high words are zero, just use native remainder
1953    return APInt(BitWidth, pVal[0] % RHS.pVal[0]);
1954  }
1955
1956  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1957  APInt Remainder(1,0);
1958  divide(*this, lhsWords, RHS, rhsWords, 0, &Remainder);
1959  return Remainder;
1960}
1961
1962void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1963                    APInt &Quotient, APInt &Remainder) {
1964  // Get some size facts about the dividend and divisor
1965  unsigned lhsBits  = LHS.getActiveBits();
1966  unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1967  unsigned rhsBits  = RHS.getActiveBits();
1968  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1969
1970  // Check the degenerate cases
1971  if (lhsWords == 0) {
1972    Quotient = 0;                // 0 / Y ===> 0
1973    Remainder = 0;               // 0 % Y ===> 0
1974    return;
1975  }
1976
1977  if (lhsWords < rhsWords || LHS.ult(RHS)) {
1978    Quotient = 0;               // X / Y ===> 0, iff X < Y
1979    Remainder = LHS;            // X % Y ===> X, iff X < Y
1980    return;
1981  }
1982
1983  if (LHS == RHS) {
1984    Quotient  = 1;              // X / X ===> 1
1985    Remainder = 0;              // X % X ===> 0;
1986    return;
1987  }
1988
1989  if (lhsWords == 1 && rhsWords == 1) {
1990    // There is only one word to consider so use the native versions.
1991    uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0];
1992    uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0];
1993    Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue);
1994    Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue);
1995    return;
1996  }
1997
1998  // Okay, lets do it the long way
1999  divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
2000}
2001
2002void APInt::fromString(unsigned numbits, const char *str, unsigned slen,
2003                       uint8_t radix) {
2004  // Check our assumptions here
2005  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
2006         "Radix should be 2, 8, 10, or 16!");
2007  assert(str && "String is null?");
2008  bool isNeg = str[0] == '-';
2009  if (isNeg)
2010    str++, slen--;
2011  assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2012  assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2013  assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2014  assert((((slen-1)*64)/22 <= numbits || radix != 10) && "Insufficient bit width");
2015
2016  // Allocate memory
2017  if (!isSingleWord())
2018    pVal = getClearedMemory(getNumWords());
2019
2020  // Figure out if we can shift instead of multiply
2021  unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2022
2023  // Set up an APInt for the digit to add outside the loop so we don't
2024  // constantly construct/destruct it.
2025  APInt apdigit(getBitWidth(), 0);
2026  APInt apradix(getBitWidth(), radix);
2027
2028  // Enter digit traversal loop
2029  for (unsigned i = 0; i < slen; i++) {
2030    // Get a digit
2031    unsigned digit = 0;
2032    char cdigit = str[i];
2033    if (radix == 16) {
2034      if (!isxdigit(cdigit))
2035        assert(0 && "Invalid hex digit in string");
2036      if (isdigit(cdigit))
2037        digit = cdigit - '0';
2038      else if (cdigit >= 'a')
2039        digit = cdigit - 'a' + 10;
2040      else if (cdigit >= 'A')
2041        digit = cdigit - 'A' + 10;
2042      else
2043        assert(0 && "huh? we shouldn't get here");
2044    } else if (isdigit(cdigit)) {
2045      digit = cdigit - '0';
2046      assert((radix == 10 ||
2047              (radix == 8 && digit != 8 && digit != 9) ||
2048              (radix == 2 && (digit == 0 || digit == 1))) &&
2049             "Invalid digit in string for given radix");
2050    } else {
2051      assert(0 && "Invalid character in digit string");
2052    }
2053
2054    // Shift or multiply the value by the radix
2055    if (slen > 1) {
2056      if (shift)
2057        *this <<= shift;
2058      else
2059        *this *= apradix;
2060    }
2061
2062    // Add in the digit we just interpreted
2063    if (apdigit.isSingleWord())
2064      apdigit.VAL = digit;
2065    else
2066      apdigit.pVal[0] = digit;
2067    *this += apdigit;
2068  }
2069  // If its negative, put it in two's complement form
2070  if (isNeg) {
2071    (*this)--;
2072    this->flip();
2073  }
2074}
2075
2076void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2077                     bool Signed) const {
2078  assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2) &&
2079         "Radix should be 2, 8, 10, or 16!");
2080
2081  // First, check for a zero value and just short circuit the logic below.
2082  if (*this == 0) {
2083    Str.push_back('0');
2084    return;
2085  }
2086
2087  static const char Digits[] = "0123456789ABCDEF";
2088
2089  if (isSingleWord()) {
2090    char Buffer[65];
2091    char *BufPtr = Buffer+65;
2092
2093    uint64_t N;
2094    if (Signed) {
2095      int64_t I = getSExtValue();
2096      if (I < 0) {
2097        Str.push_back('-');
2098        I = -I;
2099      }
2100      N = I;
2101    } else {
2102      N = getZExtValue();
2103    }
2104
2105    while (N) {
2106      *--BufPtr = Digits[N % Radix];
2107      N /= Radix;
2108    }
2109    Str.append(BufPtr, Buffer+65);
2110    return;
2111  }
2112
2113  APInt Tmp(*this);
2114
2115  if (Signed && isNegative()) {
2116    // They want to print the signed version and it is a negative value
2117    // Flip the bits and add one to turn it into the equivalent positive
2118    // value and put a '-' in the result.
2119    Tmp.flip();
2120    Tmp++;
2121    Str.push_back('-');
2122  }
2123
2124  // We insert the digits backward, then reverse them to get the right order.
2125  unsigned StartDig = Str.size();
2126
2127  // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2128  // because the number of bits per digit (1, 3 and 4 respectively) divides
2129  // equaly.  We just shift until the value is zero.
2130  if (Radix != 10) {
2131    // Just shift tmp right for each digit width until it becomes zero
2132    unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2133    unsigned MaskAmt = Radix - 1;
2134
2135    while (Tmp != 0) {
2136      unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2137      Str.push_back(Digits[Digit]);
2138      Tmp = Tmp.lshr(ShiftAmt);
2139    }
2140  } else {
2141    APInt divisor(4, 10);
2142    while (Tmp != 0) {
2143      APInt APdigit(1, 0);
2144      APInt tmp2(Tmp.getBitWidth(), 0);
2145      divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2,
2146             &APdigit);
2147      unsigned Digit = (unsigned)APdigit.getZExtValue();
2148      assert(Digit < Radix && "divide failed");
2149      Str.push_back(Digits[Digit]);
2150      Tmp = tmp2;
2151    }
2152  }
2153
2154  // Reverse the digits before returning.
2155  std::reverse(Str.begin()+StartDig, Str.end());
2156}
2157
2158/// toString - This returns the APInt as a std::string.  Note that this is an
2159/// inefficient method.  It is better to pass in a SmallVector/SmallString
2160/// to the methods above.
2161std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2162  SmallString<40> S;
2163  toString(S, Radix, Signed);
2164  return S.c_str();
2165}
2166
2167
2168void APInt::dump() const {
2169  SmallString<40> S, U;
2170  this->toStringUnsigned(U);
2171  this->toStringSigned(S);
2172  fprintf(stderr, "APInt(%db, %su %ss)", BitWidth, U.c_str(), S.c_str());
2173}
2174
2175void APInt::print(raw_ostream &OS, bool isSigned) const {
2176  SmallString<40> S;
2177  this->toString(S, 10, isSigned);
2178  OS << S.c_str();
2179}
2180
2181std::ostream &llvm::operator<<(std::ostream &o, const APInt &I) {
2182  raw_os_ostream OS(o);
2183  OS << I;
2184  return o;
2185}
2186
2187// This implements a variety of operations on a representation of
2188// arbitrary precision, two's-complement, bignum integer values.
2189
2190/* Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2191   and unrestricting assumption.  */
2192#define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
2193COMPILE_TIME_ASSERT(integerPartWidth % 2 == 0);
2194
2195/* Some handy functions local to this file.  */
2196namespace {
2197
2198  /* Returns the integer part with the least significant BITS set.
2199     BITS cannot be zero.  */
2200  static inline integerPart
2201  lowBitMask(unsigned int bits)
2202  {
2203    assert (bits != 0 && bits <= integerPartWidth);
2204
2205    return ~(integerPart) 0 >> (integerPartWidth - bits);
2206  }
2207
2208  /* Returns the value of the lower half of PART.  */
2209  static inline integerPart
2210  lowHalf(integerPart part)
2211  {
2212    return part & lowBitMask(integerPartWidth / 2);
2213  }
2214
2215  /* Returns the value of the upper half of PART.  */
2216  static inline integerPart
2217  highHalf(integerPart part)
2218  {
2219    return part >> (integerPartWidth / 2);
2220  }
2221
2222  /* Returns the bit number of the most significant set bit of a part.
2223     If the input number has no bits set -1U is returned.  */
2224  static unsigned int
2225  partMSB(integerPart value)
2226  {
2227    unsigned int n, msb;
2228
2229    if (value == 0)
2230      return -1U;
2231
2232    n = integerPartWidth / 2;
2233
2234    msb = 0;
2235    do {
2236      if (value >> n) {
2237        value >>= n;
2238        msb += n;
2239      }
2240
2241      n >>= 1;
2242    } while (n);
2243
2244    return msb;
2245  }
2246
2247  /* Returns the bit number of the least significant set bit of a
2248     part.  If the input number has no bits set -1U is returned.  */
2249  static unsigned int
2250  partLSB(integerPart value)
2251  {
2252    unsigned int n, lsb;
2253
2254    if (value == 0)
2255      return -1U;
2256
2257    lsb = integerPartWidth - 1;
2258    n = integerPartWidth / 2;
2259
2260    do {
2261      if (value << n) {
2262        value <<= n;
2263        lsb -= n;
2264      }
2265
2266      n >>= 1;
2267    } while (n);
2268
2269    return lsb;
2270  }
2271}
2272
2273/* Sets the least significant part of a bignum to the input value, and
2274   zeroes out higher parts.  */
2275void
2276APInt::tcSet(integerPart *dst, integerPart part, unsigned int parts)
2277{
2278  unsigned int i;
2279
2280  assert (parts > 0);
2281
2282  dst[0] = part;
2283  for(i = 1; i < parts; i++)
2284    dst[i] = 0;
2285}
2286
2287/* Assign one bignum to another.  */
2288void
2289APInt::tcAssign(integerPart *dst, const integerPart *src, unsigned int parts)
2290{
2291  unsigned int i;
2292
2293  for(i = 0; i < parts; i++)
2294    dst[i] = src[i];
2295}
2296
2297/* Returns true if a bignum is zero, false otherwise.  */
2298bool
2299APInt::tcIsZero(const integerPart *src, unsigned int parts)
2300{
2301  unsigned int i;
2302
2303  for(i = 0; i < parts; i++)
2304    if (src[i])
2305      return false;
2306
2307  return true;
2308}
2309
2310/* Extract the given bit of a bignum; returns 0 or 1.  */
2311int
2312APInt::tcExtractBit(const integerPart *parts, unsigned int bit)
2313{
2314  return(parts[bit / integerPartWidth]
2315         & ((integerPart) 1 << bit % integerPartWidth)) != 0;
2316}
2317
2318/* Set the given bit of a bignum.  */
2319void
2320APInt::tcSetBit(integerPart *parts, unsigned int bit)
2321{
2322  parts[bit / integerPartWidth] |= (integerPart) 1 << (bit % integerPartWidth);
2323}
2324
2325/* Returns the bit number of the least significant set bit of a
2326   number.  If the input number has no bits set -1U is returned.  */
2327unsigned int
2328APInt::tcLSB(const integerPart *parts, unsigned int n)
2329{
2330  unsigned int i, lsb;
2331
2332  for(i = 0; i < n; i++) {
2333      if (parts[i] != 0) {
2334          lsb = partLSB(parts[i]);
2335
2336          return lsb + i * integerPartWidth;
2337      }
2338  }
2339
2340  return -1U;
2341}
2342
2343/* Returns the bit number of the most significant set bit of a number.
2344   If the input number has no bits set -1U is returned.  */
2345unsigned int
2346APInt::tcMSB(const integerPart *parts, unsigned int n)
2347{
2348  unsigned int msb;
2349
2350  do {
2351      --n;
2352
2353      if (parts[n] != 0) {
2354          msb = partMSB(parts[n]);
2355
2356          return msb + n * integerPartWidth;
2357      }
2358  } while (n);
2359
2360  return -1U;
2361}
2362
2363/* Copy the bit vector of width srcBITS from SRC, starting at bit
2364   srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2365   the least significant bit of DST.  All high bits above srcBITS in
2366   DST are zero-filled.  */
2367void
2368APInt::tcExtract(integerPart *dst, unsigned int dstCount,const integerPart *src,
2369                 unsigned int srcBits, unsigned int srcLSB)
2370{
2371  unsigned int firstSrcPart, dstParts, shift, n;
2372
2373  dstParts = (srcBits + integerPartWidth - 1) / integerPartWidth;
2374  assert (dstParts <= dstCount);
2375
2376  firstSrcPart = srcLSB / integerPartWidth;
2377  tcAssign (dst, src + firstSrcPart, dstParts);
2378
2379  shift = srcLSB % integerPartWidth;
2380  tcShiftRight (dst, dstParts, shift);
2381
2382  /* We now have (dstParts * integerPartWidth - shift) bits from SRC
2383     in DST.  If this is less that srcBits, append the rest, else
2384     clear the high bits.  */
2385  n = dstParts * integerPartWidth - shift;
2386  if (n < srcBits) {
2387    integerPart mask = lowBitMask (srcBits - n);
2388    dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2389                          << n % integerPartWidth);
2390  } else if (n > srcBits) {
2391    if (srcBits % integerPartWidth)
2392      dst[dstParts - 1] &= lowBitMask (srcBits % integerPartWidth);
2393  }
2394
2395  /* Clear high parts.  */
2396  while (dstParts < dstCount)
2397    dst[dstParts++] = 0;
2398}
2399
2400/* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2401integerPart
2402APInt::tcAdd(integerPart *dst, const integerPart *rhs,
2403             integerPart c, unsigned int parts)
2404{
2405  unsigned int i;
2406
2407  assert(c <= 1);
2408
2409  for(i = 0; i < parts; i++) {
2410    integerPart l;
2411
2412    l = dst[i];
2413    if (c) {
2414      dst[i] += rhs[i] + 1;
2415      c = (dst[i] <= l);
2416    } else {
2417      dst[i] += rhs[i];
2418      c = (dst[i] < l);
2419    }
2420  }
2421
2422  return c;
2423}
2424
2425/* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2426integerPart
2427APInt::tcSubtract(integerPart *dst, const integerPart *rhs,
2428                  integerPart c, unsigned int parts)
2429{
2430  unsigned int i;
2431
2432  assert(c <= 1);
2433
2434  for(i = 0; i < parts; i++) {
2435    integerPart l;
2436
2437    l = dst[i];
2438    if (c) {
2439      dst[i] -= rhs[i] + 1;
2440      c = (dst[i] >= l);
2441    } else {
2442      dst[i] -= rhs[i];
2443      c = (dst[i] > l);
2444    }
2445  }
2446
2447  return c;
2448}
2449
2450/* Negate a bignum in-place.  */
2451void
2452APInt::tcNegate(integerPart *dst, unsigned int parts)
2453{
2454  tcComplement(dst, parts);
2455  tcIncrement(dst, parts);
2456}
2457
2458/*  DST += SRC * MULTIPLIER + CARRY   if add is true
2459    DST  = SRC * MULTIPLIER + CARRY   if add is false
2460
2461    Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2462    they must start at the same point, i.e. DST == SRC.
2463
2464    If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2465    returned.  Otherwise DST is filled with the least significant
2466    DSTPARTS parts of the result, and if all of the omitted higher
2467    parts were zero return zero, otherwise overflow occurred and
2468    return one.  */
2469int
2470APInt::tcMultiplyPart(integerPart *dst, const integerPart *src,
2471                      integerPart multiplier, integerPart carry,
2472                      unsigned int srcParts, unsigned int dstParts,
2473                      bool add)
2474{
2475  unsigned int i, n;
2476
2477  /* Otherwise our writes of DST kill our later reads of SRC.  */
2478  assert(dst <= src || dst >= src + srcParts);
2479  assert(dstParts <= srcParts + 1);
2480
2481  /* N loops; minimum of dstParts and srcParts.  */
2482  n = dstParts < srcParts ? dstParts: srcParts;
2483
2484  for(i = 0; i < n; i++) {
2485    integerPart low, mid, high, srcPart;
2486
2487      /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2488
2489         This cannot overflow, because
2490
2491         (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2492
2493         which is less than n^2.  */
2494
2495    srcPart = src[i];
2496
2497    if (multiplier == 0 || srcPart == 0)        {
2498      low = carry;
2499      high = 0;
2500    } else {
2501      low = lowHalf(srcPart) * lowHalf(multiplier);
2502      high = highHalf(srcPart) * highHalf(multiplier);
2503
2504      mid = lowHalf(srcPart) * highHalf(multiplier);
2505      high += highHalf(mid);
2506      mid <<= integerPartWidth / 2;
2507      if (low + mid < low)
2508        high++;
2509      low += mid;
2510
2511      mid = highHalf(srcPart) * lowHalf(multiplier);
2512      high += highHalf(mid);
2513      mid <<= integerPartWidth / 2;
2514      if (low + mid < low)
2515        high++;
2516      low += mid;
2517
2518      /* Now add carry.  */
2519      if (low + carry < low)
2520        high++;
2521      low += carry;
2522    }
2523
2524    if (add) {
2525      /* And now DST[i], and store the new low part there.  */
2526      if (low + dst[i] < low)
2527        high++;
2528      dst[i] += low;
2529    } else
2530      dst[i] = low;
2531
2532    carry = high;
2533  }
2534
2535  if (i < dstParts) {
2536    /* Full multiplication, there is no overflow.  */
2537    assert(i + 1 == dstParts);
2538    dst[i] = carry;
2539    return 0;
2540  } else {
2541    /* We overflowed if there is carry.  */
2542    if (carry)
2543      return 1;
2544
2545    /* We would overflow if any significant unwritten parts would be
2546       non-zero.  This is true if any remaining src parts are non-zero
2547       and the multiplier is non-zero.  */
2548    if (multiplier)
2549      for(; i < srcParts; i++)
2550        if (src[i])
2551          return 1;
2552
2553    /* We fitted in the narrow destination.  */
2554    return 0;
2555  }
2556}
2557
2558/* DST = LHS * RHS, where DST has the same width as the operands and
2559   is filled with the least significant parts of the result.  Returns
2560   one if overflow occurred, otherwise zero.  DST must be disjoint
2561   from both operands.  */
2562int
2563APInt::tcMultiply(integerPart *dst, const integerPart *lhs,
2564                  const integerPart *rhs, unsigned int parts)
2565{
2566  unsigned int i;
2567  int overflow;
2568
2569  assert(dst != lhs && dst != rhs);
2570
2571  overflow = 0;
2572  tcSet(dst, 0, parts);
2573
2574  for(i = 0; i < parts; i++)
2575    overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2576                               parts - i, true);
2577
2578  return overflow;
2579}
2580
2581/* DST = LHS * RHS, where DST has width the sum of the widths of the
2582   operands.  No overflow occurs.  DST must be disjoint from both
2583   operands.  Returns the number of parts required to hold the
2584   result.  */
2585unsigned int
2586APInt::tcFullMultiply(integerPart *dst, const integerPart *lhs,
2587                      const integerPart *rhs, unsigned int lhsParts,
2588                      unsigned int rhsParts)
2589{
2590  /* Put the narrower number on the LHS for less loops below.  */
2591  if (lhsParts > rhsParts) {
2592    return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2593  } else {
2594    unsigned int n;
2595
2596    assert(dst != lhs && dst != rhs);
2597
2598    tcSet(dst, 0, rhsParts);
2599
2600    for(n = 0; n < lhsParts; n++)
2601      tcMultiplyPart(&dst[n], rhs, lhs[n], 0, rhsParts, rhsParts + 1, true);
2602
2603    n = lhsParts + rhsParts;
2604
2605    return n - (dst[n - 1] == 0);
2606  }
2607}
2608
2609/* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2610   Otherwise set LHS to LHS / RHS with the fractional part discarded,
2611   set REMAINDER to the remainder, return zero.  i.e.
2612
2613   OLD_LHS = RHS * LHS + REMAINDER
2614
2615   SCRATCH is a bignum of the same size as the operands and result for
2616   use by the routine; its contents need not be initialized and are
2617   destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2618*/
2619int
2620APInt::tcDivide(integerPart *lhs, const integerPart *rhs,
2621                integerPart *remainder, integerPart *srhs,
2622                unsigned int parts)
2623{
2624  unsigned int n, shiftCount;
2625  integerPart mask;
2626
2627  assert(lhs != remainder && lhs != srhs && remainder != srhs);
2628
2629  shiftCount = tcMSB(rhs, parts) + 1;
2630  if (shiftCount == 0)
2631    return true;
2632
2633  shiftCount = parts * integerPartWidth - shiftCount;
2634  n = shiftCount / integerPartWidth;
2635  mask = (integerPart) 1 << (shiftCount % integerPartWidth);
2636
2637  tcAssign(srhs, rhs, parts);
2638  tcShiftLeft(srhs, parts, shiftCount);
2639  tcAssign(remainder, lhs, parts);
2640  tcSet(lhs, 0, parts);
2641
2642  /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2643     the total.  */
2644  for(;;) {
2645      int compare;
2646
2647      compare = tcCompare(remainder, srhs, parts);
2648      if (compare >= 0) {
2649        tcSubtract(remainder, srhs, 0, parts);
2650        lhs[n] |= mask;
2651      }
2652
2653      if (shiftCount == 0)
2654        break;
2655      shiftCount--;
2656      tcShiftRight(srhs, parts, 1);
2657      if ((mask >>= 1) == 0)
2658        mask = (integerPart) 1 << (integerPartWidth - 1), n--;
2659  }
2660
2661  return false;
2662}
2663
2664/* Shift a bignum left COUNT bits in-place.  Shifted in bits are zero.
2665   There are no restrictions on COUNT.  */
2666void
2667APInt::tcShiftLeft(integerPart *dst, unsigned int parts, unsigned int count)
2668{
2669  if (count) {
2670    unsigned int jump, shift;
2671
2672    /* Jump is the inter-part jump; shift is is intra-part shift.  */
2673    jump = count / integerPartWidth;
2674    shift = count % integerPartWidth;
2675
2676    while (parts > jump) {
2677      integerPart part;
2678
2679      parts--;
2680
2681      /* dst[i] comes from the two parts src[i - jump] and, if we have
2682         an intra-part shift, src[i - jump - 1].  */
2683      part = dst[parts - jump];
2684      if (shift) {
2685        part <<= shift;
2686        if (parts >= jump + 1)
2687          part |= dst[parts - jump - 1] >> (integerPartWidth - shift);
2688      }
2689
2690      dst[parts] = part;
2691    }
2692
2693    while (parts > 0)
2694      dst[--parts] = 0;
2695  }
2696}
2697
2698/* Shift a bignum right COUNT bits in-place.  Shifted in bits are
2699   zero.  There are no restrictions on COUNT.  */
2700void
2701APInt::tcShiftRight(integerPart *dst, unsigned int parts, unsigned int count)
2702{
2703  if (count) {
2704    unsigned int i, jump, shift;
2705
2706    /* Jump is the inter-part jump; shift is is intra-part shift.  */
2707    jump = count / integerPartWidth;
2708    shift = count % integerPartWidth;
2709
2710    /* Perform the shift.  This leaves the most significant COUNT bits
2711       of the result at zero.  */
2712    for(i = 0; i < parts; i++) {
2713      integerPart part;
2714
2715      if (i + jump >= parts) {
2716        part = 0;
2717      } else {
2718        part = dst[i + jump];
2719        if (shift) {
2720          part >>= shift;
2721          if (i + jump + 1 < parts)
2722            part |= dst[i + jump + 1] << (integerPartWidth - shift);
2723        }
2724      }
2725
2726      dst[i] = part;
2727    }
2728  }
2729}
2730
2731/* Bitwise and of two bignums.  */
2732void
2733APInt::tcAnd(integerPart *dst, const integerPart *rhs, unsigned int parts)
2734{
2735  unsigned int i;
2736
2737  for(i = 0; i < parts; i++)
2738    dst[i] &= rhs[i];
2739}
2740
2741/* Bitwise inclusive or of two bignums.  */
2742void
2743APInt::tcOr(integerPart *dst, const integerPart *rhs, unsigned int parts)
2744{
2745  unsigned int i;
2746
2747  for(i = 0; i < parts; i++)
2748    dst[i] |= rhs[i];
2749}
2750
2751/* Bitwise exclusive or of two bignums.  */
2752void
2753APInt::tcXor(integerPart *dst, const integerPart *rhs, unsigned int parts)
2754{
2755  unsigned int i;
2756
2757  for(i = 0; i < parts; i++)
2758    dst[i] ^= rhs[i];
2759}
2760
2761/* Complement a bignum in-place.  */
2762void
2763APInt::tcComplement(integerPart *dst, unsigned int parts)
2764{
2765  unsigned int i;
2766
2767  for(i = 0; i < parts; i++)
2768    dst[i] = ~dst[i];
2769}
2770
2771/* Comparison (unsigned) of two bignums.  */
2772int
2773APInt::tcCompare(const integerPart *lhs, const integerPart *rhs,
2774                 unsigned int parts)
2775{
2776  while (parts) {
2777      parts--;
2778      if (lhs[parts] == rhs[parts])
2779        continue;
2780
2781      if (lhs[parts] > rhs[parts])
2782        return 1;
2783      else
2784        return -1;
2785    }
2786
2787  return 0;
2788}
2789
2790/* Increment a bignum in-place, return the carry flag.  */
2791integerPart
2792APInt::tcIncrement(integerPart *dst, unsigned int parts)
2793{
2794  unsigned int i;
2795
2796  for(i = 0; i < parts; i++)
2797    if (++dst[i] != 0)
2798      break;
2799
2800  return i == parts;
2801}
2802
2803/* Set the least significant BITS bits of a bignum, clear the
2804   rest.  */
2805void
2806APInt::tcSetLeastSignificantBits(integerPart *dst, unsigned int parts,
2807                                 unsigned int bits)
2808{
2809  unsigned int i;
2810
2811  i = 0;
2812  while (bits > integerPartWidth) {
2813    dst[i++] = ~(integerPart) 0;
2814    bits -= integerPartWidth;
2815  }
2816
2817  if (bits)
2818    dst[i++] = ~(integerPart) 0 >> (integerPartWidth - bits);
2819
2820  while (i < parts)
2821    dst[i++] = 0;
2822}
2823