APInt.cpp revision 223017
1//===-- APInt.cpp - Implement APInt class ---------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements a class to represent arbitrary precision integer
11// constant values and provide a variety of arithmetic operations on them.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "apint"
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/StringRef.h"
18#include "llvm/ADT/FoldingSet.h"
19#include "llvm/ADT/SmallString.h"
20#include "llvm/Support/Debug.h"
21#include "llvm/Support/ErrorHandling.h"
22#include "llvm/Support/MathExtras.h"
23#include "llvm/Support/raw_ostream.h"
24#include <cmath>
25#include <limits>
26#include <cstring>
27#include <cstdlib>
28using namespace llvm;
29
30/// A utility function for allocating memory, checking for allocation failures,
31/// and ensuring the contents are zeroed.
32inline static uint64_t* getClearedMemory(unsigned numWords) {
33  uint64_t * result = new uint64_t[numWords];
34  assert(result && "APInt memory allocation fails!");
35  memset(result, 0, numWords * sizeof(uint64_t));
36  return result;
37}
38
39/// A utility function for allocating memory and checking for allocation
40/// failure.  The content is not zeroed.
41inline static uint64_t* getMemory(unsigned numWords) {
42  uint64_t * result = new uint64_t[numWords];
43  assert(result && "APInt memory allocation fails!");
44  return result;
45}
46
47/// A utility function that converts a character to a digit.
48inline static unsigned getDigit(char cdigit, uint8_t radix) {
49  unsigned r;
50
51  if (radix == 16) {
52    r = cdigit - '0';
53    if (r <= 9)
54      return r;
55
56    r = cdigit - 'A';
57    if (r <= 5)
58      return r + 10;
59
60    r = cdigit - 'a';
61    if (r <= 5)
62      return r + 10;
63  }
64
65  r = cdigit - '0';
66  if (r < radix)
67    return r;
68
69  return -1U;
70}
71
72
73void APInt::initSlowCase(unsigned numBits, uint64_t val, bool isSigned) {
74  pVal = getClearedMemory(getNumWords());
75  pVal[0] = val;
76  if (isSigned && int64_t(val) < 0)
77    for (unsigned i = 1; i < getNumWords(); ++i)
78      pVal[i] = -1ULL;
79}
80
81void APInt::initSlowCase(const APInt& that) {
82  pVal = getMemory(getNumWords());
83  memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE);
84}
85
86
87APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
88  : BitWidth(numBits), VAL(0) {
89  assert(BitWidth && "Bitwidth too small");
90  assert(bigVal && "Null pointer detected!");
91  if (isSingleWord())
92    VAL = bigVal[0];
93  else {
94    // Get memory, cleared to 0
95    pVal = getClearedMemory(getNumWords());
96    // Calculate the number of words to copy
97    unsigned words = std::min<unsigned>(numWords, getNumWords());
98    // Copy the words from bigVal to pVal
99    memcpy(pVal, bigVal, words * APINT_WORD_SIZE);
100  }
101  // Make sure unused high bits are cleared
102  clearUnusedBits();
103}
104
105APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
106  : BitWidth(numbits), VAL(0) {
107  assert(BitWidth && "Bitwidth too small");
108  fromString(numbits, Str, radix);
109}
110
111APInt& APInt::AssignSlowCase(const APInt& RHS) {
112  // Don't do anything for X = X
113  if (this == &RHS)
114    return *this;
115
116  if (BitWidth == RHS.getBitWidth()) {
117    // assume same bit-width single-word case is already handled
118    assert(!isSingleWord());
119    memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE);
120    return *this;
121  }
122
123  if (isSingleWord()) {
124    // assume case where both are single words is already handled
125    assert(!RHS.isSingleWord());
126    VAL = 0;
127    pVal = getMemory(RHS.getNumWords());
128    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
129  } else if (getNumWords() == RHS.getNumWords())
130    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
131  else if (RHS.isSingleWord()) {
132    delete [] pVal;
133    VAL = RHS.VAL;
134  } else {
135    delete [] pVal;
136    pVal = getMemory(RHS.getNumWords());
137    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
138  }
139  BitWidth = RHS.BitWidth;
140  return clearUnusedBits();
141}
142
143APInt& APInt::operator=(uint64_t RHS) {
144  if (isSingleWord())
145    VAL = RHS;
146  else {
147    pVal[0] = RHS;
148    memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
149  }
150  return clearUnusedBits();
151}
152
153/// Profile - This method 'profiles' an APInt for use with FoldingSet.
154void APInt::Profile(FoldingSetNodeID& ID) const {
155  ID.AddInteger(BitWidth);
156
157  if (isSingleWord()) {
158    ID.AddInteger(VAL);
159    return;
160  }
161
162  unsigned NumWords = getNumWords();
163  for (unsigned i = 0; i < NumWords; ++i)
164    ID.AddInteger(pVal[i]);
165}
166
167/// add_1 - This function adds a single "digit" integer, y, to the multiple
168/// "digit" integer array,  x[]. x[] is modified to reflect the addition and
169/// 1 is returned if there is a carry out, otherwise 0 is returned.
170/// @returns the carry of the addition.
171static bool add_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
172  for (unsigned i = 0; i < len; ++i) {
173    dest[i] = y + x[i];
174    if (dest[i] < y)
175      y = 1; // Carry one to next digit.
176    else {
177      y = 0; // No need to carry so exit early
178      break;
179    }
180  }
181  return y;
182}
183
184/// @brief Prefix increment operator. Increments the APInt by one.
185APInt& APInt::operator++() {
186  if (isSingleWord())
187    ++VAL;
188  else
189    add_1(pVal, pVal, getNumWords(), 1);
190  return clearUnusedBits();
191}
192
193/// sub_1 - This function subtracts a single "digit" (64-bit word), y, from
194/// the multi-digit integer array, x[], propagating the borrowed 1 value until
195/// no further borrowing is neeeded or it runs out of "digits" in x.  The result
196/// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
197/// In other words, if y > x then this function returns 1, otherwise 0.
198/// @returns the borrow out of the subtraction
199static bool sub_1(uint64_t x[], unsigned len, uint64_t y) {
200  for (unsigned i = 0; i < len; ++i) {
201    uint64_t X = x[i];
202    x[i] -= y;
203    if (y > X)
204      y = 1;  // We have to "borrow 1" from next "digit"
205    else {
206      y = 0;  // No need to borrow
207      break;  // Remaining digits are unchanged so exit early
208    }
209  }
210  return bool(y);
211}
212
213/// @brief Prefix decrement operator. Decrements the APInt by one.
214APInt& APInt::operator--() {
215  if (isSingleWord())
216    --VAL;
217  else
218    sub_1(pVal, getNumWords(), 1);
219  return clearUnusedBits();
220}
221
222/// add - This function adds the integer array x to the integer array Y and
223/// places the result in dest.
224/// @returns the carry out from the addition
225/// @brief General addition of 64-bit integer arrays
226static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y,
227                unsigned len) {
228  bool carry = false;
229  for (unsigned i = 0; i< len; ++i) {
230    uint64_t limit = std::min(x[i],y[i]); // must come first in case dest == x
231    dest[i] = x[i] + y[i] + carry;
232    carry = dest[i] < limit || (carry && dest[i] == limit);
233  }
234  return carry;
235}
236
237/// Adds the RHS APint to this APInt.
238/// @returns this, after addition of RHS.
239/// @brief Addition assignment operator.
240APInt& APInt::operator+=(const APInt& RHS) {
241  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
242  if (isSingleWord())
243    VAL += RHS.VAL;
244  else {
245    add(pVal, pVal, RHS.pVal, getNumWords());
246  }
247  return clearUnusedBits();
248}
249
250/// Subtracts the integer array y from the integer array x
251/// @returns returns the borrow out.
252/// @brief Generalized subtraction of 64-bit integer arrays.
253static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y,
254                unsigned len) {
255  bool borrow = false;
256  for (unsigned i = 0; i < len; ++i) {
257    uint64_t x_tmp = borrow ? x[i] - 1 : x[i];
258    borrow = y[i] > x_tmp || (borrow && x[i] == 0);
259    dest[i] = x_tmp - y[i];
260  }
261  return borrow;
262}
263
264/// Subtracts the RHS APInt from this APInt
265/// @returns this, after subtraction
266/// @brief Subtraction assignment operator.
267APInt& APInt::operator-=(const APInt& RHS) {
268  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
269  if (isSingleWord())
270    VAL -= RHS.VAL;
271  else
272    sub(pVal, pVal, RHS.pVal, getNumWords());
273  return clearUnusedBits();
274}
275
276/// Multiplies an integer array, x, by a uint64_t integer and places the result
277/// into dest.
278/// @returns the carry out of the multiplication.
279/// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer.
280static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
281  // Split y into high 32-bit part (hy)  and low 32-bit part (ly)
282  uint64_t ly = y & 0xffffffffULL, hy = y >> 32;
283  uint64_t carry = 0;
284
285  // For each digit of x.
286  for (unsigned i = 0; i < len; ++i) {
287    // Split x into high and low words
288    uint64_t lx = x[i] & 0xffffffffULL;
289    uint64_t hx = x[i] >> 32;
290    // hasCarry - A flag to indicate if there is a carry to the next digit.
291    // hasCarry == 0, no carry
292    // hasCarry == 1, has carry
293    // hasCarry == 2, no carry and the calculation result == 0.
294    uint8_t hasCarry = 0;
295    dest[i] = carry + lx * ly;
296    // Determine if the add above introduces carry.
297    hasCarry = (dest[i] < carry) ? 1 : 0;
298    carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0);
299    // The upper limit of carry can be (2^32 - 1)(2^32 - 1) +
300    // (2^32 - 1) + 2^32 = 2^64.
301    hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
302
303    carry += (lx * hy) & 0xffffffffULL;
304    dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL);
305    carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) +
306            (carry >> 32) + ((lx * hy) >> 32) + hx * hy;
307  }
308  return carry;
309}
310
311/// Multiplies integer array x by integer array y and stores the result into
312/// the integer array dest. Note that dest's size must be >= xlen + ylen.
313/// @brief Generalized multiplicate of integer arrays.
314static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[],
315                unsigned ylen) {
316  dest[xlen] = mul_1(dest, x, xlen, y[0]);
317  for (unsigned i = 1; i < ylen; ++i) {
318    uint64_t ly = y[i] & 0xffffffffULL, hy = y[i] >> 32;
319    uint64_t carry = 0, lx = 0, hx = 0;
320    for (unsigned j = 0; j < xlen; ++j) {
321      lx = x[j] & 0xffffffffULL;
322      hx = x[j] >> 32;
323      // hasCarry - A flag to indicate if has carry.
324      // hasCarry == 0, no carry
325      // hasCarry == 1, has carry
326      // hasCarry == 2, no carry and the calculation result == 0.
327      uint8_t hasCarry = 0;
328      uint64_t resul = carry + lx * ly;
329      hasCarry = (resul < carry) ? 1 : 0;
330      carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + (resul >> 32);
331      hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
332
333      carry += (lx * hy) & 0xffffffffULL;
334      resul = (carry << 32) | (resul & 0xffffffffULL);
335      dest[i+j] += resul;
336      carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+
337              (carry >> 32) + (dest[i+j] < resul ? 1 : 0) +
338              ((lx * hy) >> 32) + hx * hy;
339    }
340    dest[i+xlen] = carry;
341  }
342}
343
344APInt& APInt::operator*=(const APInt& RHS) {
345  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
346  if (isSingleWord()) {
347    VAL *= RHS.VAL;
348    clearUnusedBits();
349    return *this;
350  }
351
352  // Get some bit facts about LHS and check for zero
353  unsigned lhsBits = getActiveBits();
354  unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
355  if (!lhsWords)
356    // 0 * X ===> 0
357    return *this;
358
359  // Get some bit facts about RHS and check for zero
360  unsigned rhsBits = RHS.getActiveBits();
361  unsigned rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
362  if (!rhsWords) {
363    // X * 0 ===> 0
364    clearAllBits();
365    return *this;
366  }
367
368  // Allocate space for the result
369  unsigned destWords = rhsWords + lhsWords;
370  uint64_t *dest = getMemory(destWords);
371
372  // Perform the long multiply
373  mul(dest, pVal, lhsWords, RHS.pVal, rhsWords);
374
375  // Copy result back into *this
376  clearAllBits();
377  unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords;
378  memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE);
379
380  // delete dest array and return
381  delete[] dest;
382  return *this;
383}
384
385APInt& APInt::operator&=(const APInt& RHS) {
386  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
387  if (isSingleWord()) {
388    VAL &= RHS.VAL;
389    return *this;
390  }
391  unsigned numWords = getNumWords();
392  for (unsigned i = 0; i < numWords; ++i)
393    pVal[i] &= RHS.pVal[i];
394  return *this;
395}
396
397APInt& APInt::operator|=(const APInt& RHS) {
398  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
399  if (isSingleWord()) {
400    VAL |= RHS.VAL;
401    return *this;
402  }
403  unsigned numWords = getNumWords();
404  for (unsigned i = 0; i < numWords; ++i)
405    pVal[i] |= RHS.pVal[i];
406  return *this;
407}
408
409APInt& APInt::operator^=(const APInt& RHS) {
410  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
411  if (isSingleWord()) {
412    VAL ^= RHS.VAL;
413    this->clearUnusedBits();
414    return *this;
415  }
416  unsigned numWords = getNumWords();
417  for (unsigned i = 0; i < numWords; ++i)
418    pVal[i] ^= RHS.pVal[i];
419  return clearUnusedBits();
420}
421
422APInt APInt::AndSlowCase(const APInt& RHS) const {
423  unsigned numWords = getNumWords();
424  uint64_t* val = getMemory(numWords);
425  for (unsigned i = 0; i < numWords; ++i)
426    val[i] = pVal[i] & RHS.pVal[i];
427  return APInt(val, getBitWidth());
428}
429
430APInt APInt::OrSlowCase(const APInt& RHS) const {
431  unsigned numWords = getNumWords();
432  uint64_t *val = getMemory(numWords);
433  for (unsigned i = 0; i < numWords; ++i)
434    val[i] = pVal[i] | RHS.pVal[i];
435  return APInt(val, getBitWidth());
436}
437
438APInt APInt::XorSlowCase(const APInt& RHS) const {
439  unsigned numWords = getNumWords();
440  uint64_t *val = getMemory(numWords);
441  for (unsigned i = 0; i < numWords; ++i)
442    val[i] = pVal[i] ^ RHS.pVal[i];
443
444  // 0^0==1 so clear the high bits in case they got set.
445  return APInt(val, getBitWidth()).clearUnusedBits();
446}
447
448bool APInt::operator !() const {
449  if (isSingleWord())
450    return !VAL;
451
452  for (unsigned i = 0; i < getNumWords(); ++i)
453    if (pVal[i])
454      return false;
455  return true;
456}
457
458APInt APInt::operator*(const APInt& RHS) const {
459  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
460  if (isSingleWord())
461    return APInt(BitWidth, VAL * RHS.VAL);
462  APInt Result(*this);
463  Result *= RHS;
464  return Result.clearUnusedBits();
465}
466
467APInt APInt::operator+(const APInt& RHS) const {
468  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
469  if (isSingleWord())
470    return APInt(BitWidth, VAL + RHS.VAL);
471  APInt Result(BitWidth, 0);
472  add(Result.pVal, this->pVal, RHS.pVal, getNumWords());
473  return Result.clearUnusedBits();
474}
475
476APInt APInt::operator-(const APInt& RHS) const {
477  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
478  if (isSingleWord())
479    return APInt(BitWidth, VAL - RHS.VAL);
480  APInt Result(BitWidth, 0);
481  sub(Result.pVal, this->pVal, RHS.pVal, getNumWords());
482  return Result.clearUnusedBits();
483}
484
485bool APInt::operator[](unsigned bitPosition) const {
486  assert(bitPosition < getBitWidth() && "Bit position out of bounds!");
487  return (maskBit(bitPosition) &
488          (isSingleWord() ?  VAL : pVal[whichWord(bitPosition)])) != 0;
489}
490
491bool APInt::EqualSlowCase(const APInt& RHS) const {
492  // Get some facts about the number of bits used in the two operands.
493  unsigned n1 = getActiveBits();
494  unsigned n2 = RHS.getActiveBits();
495
496  // If the number of bits isn't the same, they aren't equal
497  if (n1 != n2)
498    return false;
499
500  // If the number of bits fits in a word, we only need to compare the low word.
501  if (n1 <= APINT_BITS_PER_WORD)
502    return pVal[0] == RHS.pVal[0];
503
504  // Otherwise, compare everything
505  for (int i = whichWord(n1 - 1); i >= 0; --i)
506    if (pVal[i] != RHS.pVal[i])
507      return false;
508  return true;
509}
510
511bool APInt::EqualSlowCase(uint64_t Val) const {
512  unsigned n = getActiveBits();
513  if (n <= APINT_BITS_PER_WORD)
514    return pVal[0] == Val;
515  else
516    return false;
517}
518
519bool APInt::ult(const APInt& RHS) const {
520  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
521  if (isSingleWord())
522    return VAL < RHS.VAL;
523
524  // Get active bit length of both operands
525  unsigned n1 = getActiveBits();
526  unsigned n2 = RHS.getActiveBits();
527
528  // If magnitude of LHS is less than RHS, return true.
529  if (n1 < n2)
530    return true;
531
532  // If magnitude of RHS is greather than LHS, return false.
533  if (n2 < n1)
534    return false;
535
536  // If they bot fit in a word, just compare the low order word
537  if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD)
538    return pVal[0] < RHS.pVal[0];
539
540  // Otherwise, compare all words
541  unsigned topWord = whichWord(std::max(n1,n2)-1);
542  for (int i = topWord; i >= 0; --i) {
543    if (pVal[i] > RHS.pVal[i])
544      return false;
545    if (pVal[i] < RHS.pVal[i])
546      return true;
547  }
548  return false;
549}
550
551bool APInt::slt(const APInt& RHS) const {
552  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
553  if (isSingleWord()) {
554    int64_t lhsSext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth);
555    int64_t rhsSext = (int64_t(RHS.VAL) << (64-BitWidth)) >> (64-BitWidth);
556    return lhsSext < rhsSext;
557  }
558
559  APInt lhs(*this);
560  APInt rhs(RHS);
561  bool lhsNeg = isNegative();
562  bool rhsNeg = rhs.isNegative();
563  if (lhsNeg) {
564    // Sign bit is set so perform two's complement to make it positive
565    lhs.flipAllBits();
566    lhs++;
567  }
568  if (rhsNeg) {
569    // Sign bit is set so perform two's complement to make it positive
570    rhs.flipAllBits();
571    rhs++;
572  }
573
574  // Now we have unsigned values to compare so do the comparison if necessary
575  // based on the negativeness of the values.
576  if (lhsNeg)
577    if (rhsNeg)
578      return lhs.ugt(rhs);
579    else
580      return true;
581  else if (rhsNeg)
582    return false;
583  else
584    return lhs.ult(rhs);
585}
586
587void APInt::setBit(unsigned bitPosition) {
588  if (isSingleWord())
589    VAL |= maskBit(bitPosition);
590  else
591    pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
592}
593
594/// Set the given bit to 0 whose position is given as "bitPosition".
595/// @brief Set a given bit to 0.
596void APInt::clearBit(unsigned bitPosition) {
597  if (isSingleWord())
598    VAL &= ~maskBit(bitPosition);
599  else
600    pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
601}
602
603/// @brief Toggle every bit to its opposite value.
604
605/// Toggle a given bit to its opposite value whose position is given
606/// as "bitPosition".
607/// @brief Toggles a given bit to its opposite value.
608void APInt::flipBit(unsigned bitPosition) {
609  assert(bitPosition < BitWidth && "Out of the bit-width range!");
610  if ((*this)[bitPosition]) clearBit(bitPosition);
611  else setBit(bitPosition);
612}
613
614unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
615  assert(!str.empty() && "Invalid string length");
616  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
617         "Radix should be 2, 8, 10, or 16!");
618
619  size_t slen = str.size();
620
621  // Each computation below needs to know if it's negative.
622  StringRef::iterator p = str.begin();
623  unsigned isNegative = *p == '-';
624  if (*p == '-' || *p == '+') {
625    p++;
626    slen--;
627    assert(slen && "String is only a sign, needs a value.");
628  }
629
630  // For radixes of power-of-two values, the bits required is accurately and
631  // easily computed
632  if (radix == 2)
633    return slen + isNegative;
634  if (radix == 8)
635    return slen * 3 + isNegative;
636  if (radix == 16)
637    return slen * 4 + isNegative;
638
639  // This is grossly inefficient but accurate. We could probably do something
640  // with a computation of roughly slen*64/20 and then adjust by the value of
641  // the first few digits. But, I'm not sure how accurate that could be.
642
643  // Compute a sufficient number of bits that is always large enough but might
644  // be too large. This avoids the assertion in the constructor. This
645  // calculation doesn't work appropriately for the numbers 0-9, so just use 4
646  // bits in that case.
647  unsigned sufficient = slen == 1 ? 4 : slen * 64/18;
648
649  // Convert to the actual binary value.
650  APInt tmp(sufficient, StringRef(p, slen), radix);
651
652  // Compute how many bits are required. If the log is infinite, assume we need
653  // just bit.
654  unsigned log = tmp.logBase2();
655  if (log == (unsigned)-1) {
656    return isNegative + 1;
657  } else {
658    return isNegative + log + 1;
659  }
660}
661
662// From http://www.burtleburtle.net, byBob Jenkins.
663// When targeting x86, both GCC and LLVM seem to recognize this as a
664// rotate instruction.
665#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
666
667// From http://www.burtleburtle.net, by Bob Jenkins.
668#define mix(a,b,c) \
669  { \
670    a -= c;  a ^= rot(c, 4);  c += b; \
671    b -= a;  b ^= rot(a, 6);  a += c; \
672    c -= b;  c ^= rot(b, 8);  b += a; \
673    a -= c;  a ^= rot(c,16);  c += b; \
674    b -= a;  b ^= rot(a,19);  a += c; \
675    c -= b;  c ^= rot(b, 4);  b += a; \
676  }
677
678// From http://www.burtleburtle.net, by Bob Jenkins.
679#define final(a,b,c) \
680  { \
681    c ^= b; c -= rot(b,14); \
682    a ^= c; a -= rot(c,11); \
683    b ^= a; b -= rot(a,25); \
684    c ^= b; c -= rot(b,16); \
685    a ^= c; a -= rot(c,4);  \
686    b ^= a; b -= rot(a,14); \
687    c ^= b; c -= rot(b,24); \
688  }
689
690// hashword() was adapted from http://www.burtleburtle.net, by Bob
691// Jenkins.  k is a pointer to an array of uint32_t values; length is
692// the length of the key, in 32-bit chunks.  This version only handles
693// keys that are a multiple of 32 bits in size.
694static inline uint32_t hashword(const uint64_t *k64, size_t length)
695{
696  const uint32_t *k = reinterpret_cast<const uint32_t *>(k64);
697  uint32_t a,b,c;
698
699  /* Set up the internal state */
700  a = b = c = 0xdeadbeef + (((uint32_t)length)<<2);
701
702  /*------------------------------------------------- handle most of the key */
703  while (length > 3) {
704    a += k[0];
705    b += k[1];
706    c += k[2];
707    mix(a,b,c);
708    length -= 3;
709    k += 3;
710  }
711
712  /*------------------------------------------- handle the last 3 uint32_t's */
713  switch (length) {                  /* all the case statements fall through */
714  case 3 : c+=k[2];
715  case 2 : b+=k[1];
716  case 1 : a+=k[0];
717    final(a,b,c);
718    case 0:     /* case 0: nothing left to add */
719      break;
720    }
721  /*------------------------------------------------------ report the result */
722  return c;
723}
724
725// hashword8() was adapted from http://www.burtleburtle.net, by Bob
726// Jenkins.  This computes a 32-bit hash from one 64-bit word.  When
727// targeting x86 (32 or 64 bit), both LLVM and GCC compile this
728// function into about 35 instructions when inlined.
729static inline uint32_t hashword8(const uint64_t k64)
730{
731  uint32_t a,b,c;
732  a = b = c = 0xdeadbeef + 4;
733  b += k64 >> 32;
734  a += k64 & 0xffffffff;
735  final(a,b,c);
736  return c;
737}
738#undef final
739#undef mix
740#undef rot
741
742uint64_t APInt::getHashValue() const {
743  uint64_t hash;
744  if (isSingleWord())
745    hash = hashword8(VAL);
746  else
747    hash = hashword(pVal, getNumWords()*2);
748  return hash;
749}
750
751/// HiBits - This function returns the high "numBits" bits of this APInt.
752APInt APInt::getHiBits(unsigned numBits) const {
753  return APIntOps::lshr(*this, BitWidth - numBits);
754}
755
756/// LoBits - This function returns the low "numBits" bits of this APInt.
757APInt APInt::getLoBits(unsigned numBits) const {
758  return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits),
759                        BitWidth - numBits);
760}
761
762unsigned APInt::countLeadingZerosSlowCase() const {
763  // Treat the most significand word differently because it might have
764  // meaningless bits set beyond the precision.
765  unsigned BitsInMSW = BitWidth % APINT_BITS_PER_WORD;
766  integerPart MSWMask;
767  if (BitsInMSW) MSWMask = (integerPart(1) << BitsInMSW) - 1;
768  else {
769    MSWMask = ~integerPart(0);
770    BitsInMSW = APINT_BITS_PER_WORD;
771  }
772
773  unsigned i = getNumWords();
774  integerPart MSW = pVal[i-1] & MSWMask;
775  if (MSW)
776    return CountLeadingZeros_64(MSW) - (APINT_BITS_PER_WORD - BitsInMSW);
777
778  unsigned Count = BitsInMSW;
779  for (--i; i > 0u; --i) {
780    if (pVal[i-1] == 0)
781      Count += APINT_BITS_PER_WORD;
782    else {
783      Count += CountLeadingZeros_64(pVal[i-1]);
784      break;
785    }
786  }
787  return Count;
788}
789
790static unsigned countLeadingOnes_64(uint64_t V, unsigned skip) {
791  unsigned Count = 0;
792  if (skip)
793    V <<= skip;
794  while (V && (V & (1ULL << 63))) {
795    Count++;
796    V <<= 1;
797  }
798  return Count;
799}
800
801unsigned APInt::countLeadingOnes() const {
802  if (isSingleWord())
803    return countLeadingOnes_64(VAL, APINT_BITS_PER_WORD - BitWidth);
804
805  unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
806  unsigned shift;
807  if (!highWordBits) {
808    highWordBits = APINT_BITS_PER_WORD;
809    shift = 0;
810  } else {
811    shift = APINT_BITS_PER_WORD - highWordBits;
812  }
813  int i = getNumWords() - 1;
814  unsigned Count = countLeadingOnes_64(pVal[i], shift);
815  if (Count == highWordBits) {
816    for (i--; i >= 0; --i) {
817      if (pVal[i] == -1ULL)
818        Count += APINT_BITS_PER_WORD;
819      else {
820        Count += countLeadingOnes_64(pVal[i], 0);
821        break;
822      }
823    }
824  }
825  return Count;
826}
827
828unsigned APInt::countTrailingZeros() const {
829  if (isSingleWord())
830    return std::min(unsigned(CountTrailingZeros_64(VAL)), BitWidth);
831  unsigned Count = 0;
832  unsigned i = 0;
833  for (; i < getNumWords() && pVal[i] == 0; ++i)
834    Count += APINT_BITS_PER_WORD;
835  if (i < getNumWords())
836    Count += CountTrailingZeros_64(pVal[i]);
837  return std::min(Count, BitWidth);
838}
839
840unsigned APInt::countTrailingOnesSlowCase() const {
841  unsigned Count = 0;
842  unsigned i = 0;
843  for (; i < getNumWords() && pVal[i] == -1ULL; ++i)
844    Count += APINT_BITS_PER_WORD;
845  if (i < getNumWords())
846    Count += CountTrailingOnes_64(pVal[i]);
847  return std::min(Count, BitWidth);
848}
849
850unsigned APInt::countPopulationSlowCase() const {
851  unsigned Count = 0;
852  for (unsigned i = 0; i < getNumWords(); ++i)
853    Count += CountPopulation_64(pVal[i]);
854  return Count;
855}
856
857APInt APInt::byteSwap() const {
858  assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
859  if (BitWidth == 16)
860    return APInt(BitWidth, ByteSwap_16(uint16_t(VAL)));
861  else if (BitWidth == 32)
862    return APInt(BitWidth, ByteSwap_32(unsigned(VAL)));
863  else if (BitWidth == 48) {
864    unsigned Tmp1 = unsigned(VAL >> 16);
865    Tmp1 = ByteSwap_32(Tmp1);
866    uint16_t Tmp2 = uint16_t(VAL);
867    Tmp2 = ByteSwap_16(Tmp2);
868    return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
869  } else if (BitWidth == 64)
870    return APInt(BitWidth, ByteSwap_64(VAL));
871  else {
872    APInt Result(BitWidth, 0);
873    char *pByte = (char*)Result.pVal;
874    for (unsigned i = 0; i < BitWidth / APINT_WORD_SIZE / 2; ++i) {
875      char Tmp = pByte[i];
876      pByte[i] = pByte[BitWidth / APINT_WORD_SIZE - 1 - i];
877      pByte[BitWidth / APINT_WORD_SIZE - i - 1] = Tmp;
878    }
879    return Result;
880  }
881}
882
883APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1,
884                                            const APInt& API2) {
885  APInt A = API1, B = API2;
886  while (!!B) {
887    APInt T = B;
888    B = APIntOps::urem(A, B);
889    A = T;
890  }
891  return A;
892}
893
894APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
895  union {
896    double D;
897    uint64_t I;
898  } T;
899  T.D = Double;
900
901  // Get the sign bit from the highest order bit
902  bool isNeg = T.I >> 63;
903
904  // Get the 11-bit exponent and adjust for the 1023 bit bias
905  int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
906
907  // If the exponent is negative, the value is < 0 so just return 0.
908  if (exp < 0)
909    return APInt(width, 0u);
910
911  // Extract the mantissa by clearing the top 12 bits (sign + exponent).
912  uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
913
914  // If the exponent doesn't shift all bits out of the mantissa
915  if (exp < 52)
916    return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
917                    APInt(width, mantissa >> (52 - exp));
918
919  // If the client didn't provide enough bits for us to shift the mantissa into
920  // then the result is undefined, just return 0
921  if (width <= exp - 52)
922    return APInt(width, 0);
923
924  // Otherwise, we have to shift the mantissa bits up to the right location
925  APInt Tmp(width, mantissa);
926  Tmp = Tmp.shl((unsigned)exp - 52);
927  return isNeg ? -Tmp : Tmp;
928}
929
930/// RoundToDouble - This function converts this APInt to a double.
931/// The layout for double is as following (IEEE Standard 754):
932///  --------------------------------------
933/// |  Sign    Exponent    Fraction    Bias |
934/// |-------------------------------------- |
935/// |  1[63]   11[62-52]   52[51-00]   1023 |
936///  --------------------------------------
937double APInt::roundToDouble(bool isSigned) const {
938
939  // Handle the simple case where the value is contained in one uint64_t.
940  // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
941  if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
942    if (isSigned) {
943      int64_t sext = (int64_t(getWord(0)) << (64-BitWidth)) >> (64-BitWidth);
944      return double(sext);
945    } else
946      return double(getWord(0));
947  }
948
949  // Determine if the value is negative.
950  bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
951
952  // Construct the absolute value if we're negative.
953  APInt Tmp(isNeg ? -(*this) : (*this));
954
955  // Figure out how many bits we're using.
956  unsigned n = Tmp.getActiveBits();
957
958  // The exponent (without bias normalization) is just the number of bits
959  // we are using. Note that the sign bit is gone since we constructed the
960  // absolute value.
961  uint64_t exp = n;
962
963  // Return infinity for exponent overflow
964  if (exp > 1023) {
965    if (!isSigned || !isNeg)
966      return std::numeric_limits<double>::infinity();
967    else
968      return -std::numeric_limits<double>::infinity();
969  }
970  exp += 1023; // Increment for 1023 bias
971
972  // Number of bits in mantissa is 52. To obtain the mantissa value, we must
973  // extract the high 52 bits from the correct words in pVal.
974  uint64_t mantissa;
975  unsigned hiWord = whichWord(n-1);
976  if (hiWord == 0) {
977    mantissa = Tmp.pVal[0];
978    if (n > 52)
979      mantissa >>= n - 52; // shift down, we want the top 52 bits.
980  } else {
981    assert(hiWord > 0 && "huh?");
982    uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
983    uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
984    mantissa = hibits | lobits;
985  }
986
987  // The leading bit of mantissa is implicit, so get rid of it.
988  uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
989  union {
990    double D;
991    uint64_t I;
992  } T;
993  T.I = sign | (exp << 52) | mantissa;
994  return T.D;
995}
996
997// Truncate to new width.
998APInt APInt::trunc(unsigned width) const {
999  assert(width < BitWidth && "Invalid APInt Truncate request");
1000  assert(width && "Can't truncate to 0 bits");
1001
1002  if (width <= APINT_BITS_PER_WORD)
1003    return APInt(width, getRawData()[0]);
1004
1005  APInt Result(getMemory(getNumWords(width)), width);
1006
1007  // Copy full words.
1008  unsigned i;
1009  for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
1010    Result.pVal[i] = pVal[i];
1011
1012  // Truncate and copy any partial word.
1013  unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
1014  if (bits != 0)
1015    Result.pVal[i] = pVal[i] << bits >> bits;
1016
1017  return Result;
1018}
1019
1020// Sign extend to a new width.
1021APInt APInt::sext(unsigned width) const {
1022  assert(width > BitWidth && "Invalid APInt SignExtend request");
1023
1024  if (width <= APINT_BITS_PER_WORD) {
1025    uint64_t val = VAL << (APINT_BITS_PER_WORD - BitWidth);
1026    val = (int64_t)val >> (width - BitWidth);
1027    return APInt(width, val >> (APINT_BITS_PER_WORD - width));
1028  }
1029
1030  APInt Result(getMemory(getNumWords(width)), width);
1031
1032  // Copy full words.
1033  unsigned i;
1034  uint64_t word = 0;
1035  for (i = 0; i != BitWidth / APINT_BITS_PER_WORD; i++) {
1036    word = getRawData()[i];
1037    Result.pVal[i] = word;
1038  }
1039
1040  // Read and sign-extend any partial word.
1041  unsigned bits = (0 - BitWidth) % APINT_BITS_PER_WORD;
1042  if (bits != 0)
1043    word = (int64_t)getRawData()[i] << bits >> bits;
1044  else
1045    word = (int64_t)word >> (APINT_BITS_PER_WORD - 1);
1046
1047  // Write remaining full words.
1048  for (; i != width / APINT_BITS_PER_WORD; i++) {
1049    Result.pVal[i] = word;
1050    word = (int64_t)word >> (APINT_BITS_PER_WORD - 1);
1051  }
1052
1053  // Write any partial word.
1054  bits = (0 - width) % APINT_BITS_PER_WORD;
1055  if (bits != 0)
1056    Result.pVal[i] = word << bits >> bits;
1057
1058  return Result;
1059}
1060
1061//  Zero extend to a new width.
1062APInt APInt::zext(unsigned width) const {
1063  assert(width > BitWidth && "Invalid APInt ZeroExtend request");
1064
1065  if (width <= APINT_BITS_PER_WORD)
1066    return APInt(width, VAL);
1067
1068  APInt Result(getMemory(getNumWords(width)), width);
1069
1070  // Copy words.
1071  unsigned i;
1072  for (i = 0; i != getNumWords(); i++)
1073    Result.pVal[i] = getRawData()[i];
1074
1075  // Zero remaining words.
1076  memset(&Result.pVal[i], 0, (Result.getNumWords() - i) * APINT_WORD_SIZE);
1077
1078  return Result;
1079}
1080
1081APInt APInt::zextOrTrunc(unsigned width) const {
1082  if (BitWidth < width)
1083    return zext(width);
1084  if (BitWidth > width)
1085    return trunc(width);
1086  return *this;
1087}
1088
1089APInt APInt::sextOrTrunc(unsigned width) const {
1090  if (BitWidth < width)
1091    return sext(width);
1092  if (BitWidth > width)
1093    return trunc(width);
1094  return *this;
1095}
1096
1097/// Arithmetic right-shift this APInt by shiftAmt.
1098/// @brief Arithmetic right-shift function.
1099APInt APInt::ashr(const APInt &shiftAmt) const {
1100  return ashr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1101}
1102
1103/// Arithmetic right-shift this APInt by shiftAmt.
1104/// @brief Arithmetic right-shift function.
1105APInt APInt::ashr(unsigned shiftAmt) const {
1106  assert(shiftAmt <= BitWidth && "Invalid shift amount");
1107  // Handle a degenerate case
1108  if (shiftAmt == 0)
1109    return *this;
1110
1111  // Handle single word shifts with built-in ashr
1112  if (isSingleWord()) {
1113    if (shiftAmt == BitWidth)
1114      return APInt(BitWidth, 0); // undefined
1115    else {
1116      unsigned SignBit = APINT_BITS_PER_WORD - BitWidth;
1117      return APInt(BitWidth,
1118        (((int64_t(VAL) << SignBit) >> SignBit) >> shiftAmt));
1119    }
1120  }
1121
1122  // If all the bits were shifted out, the result is, technically, undefined.
1123  // We return -1 if it was negative, 0 otherwise. We check this early to avoid
1124  // issues in the algorithm below.
1125  if (shiftAmt == BitWidth) {
1126    if (isNegative())
1127      return APInt(BitWidth, -1ULL, true);
1128    else
1129      return APInt(BitWidth, 0);
1130  }
1131
1132  // Create some space for the result.
1133  uint64_t * val = new uint64_t[getNumWords()];
1134
1135  // Compute some values needed by the following shift algorithms
1136  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word
1137  unsigned offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift
1138  unsigned breakWord = getNumWords() - 1 - offset; // last word affected
1139  unsigned bitsInWord = whichBit(BitWidth); // how many bits in last word?
1140  if (bitsInWord == 0)
1141    bitsInWord = APINT_BITS_PER_WORD;
1142
1143  // If we are shifting whole words, just move whole words
1144  if (wordShift == 0) {
1145    // Move the words containing significant bits
1146    for (unsigned i = 0; i <= breakWord; ++i)
1147      val[i] = pVal[i+offset]; // move whole word
1148
1149    // Adjust the top significant word for sign bit fill, if negative
1150    if (isNegative())
1151      if (bitsInWord < APINT_BITS_PER_WORD)
1152        val[breakWord] |= ~0ULL << bitsInWord; // set high bits
1153  } else {
1154    // Shift the low order words
1155    for (unsigned i = 0; i < breakWord; ++i) {
1156      // This combines the shifted corresponding word with the low bits from
1157      // the next word (shifted into this word's high bits).
1158      val[i] = (pVal[i+offset] >> wordShift) |
1159               (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1160    }
1161
1162    // Shift the break word. In this case there are no bits from the next word
1163    // to include in this word.
1164    val[breakWord] = pVal[breakWord+offset] >> wordShift;
1165
1166    // Deal with sign extenstion in the break word, and possibly the word before
1167    // it.
1168    if (isNegative()) {
1169      if (wordShift > bitsInWord) {
1170        if (breakWord > 0)
1171          val[breakWord-1] |=
1172            ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord));
1173        val[breakWord] |= ~0ULL;
1174      } else
1175        val[breakWord] |= (~0ULL << (bitsInWord - wordShift));
1176    }
1177  }
1178
1179  // Remaining words are 0 or -1, just assign them.
1180  uint64_t fillValue = (isNegative() ? -1ULL : 0);
1181  for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1182    val[i] = fillValue;
1183  return APInt(val, BitWidth).clearUnusedBits();
1184}
1185
1186/// Logical right-shift this APInt by shiftAmt.
1187/// @brief Logical right-shift function.
1188APInt APInt::lshr(const APInt &shiftAmt) const {
1189  return lshr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1190}
1191
1192/// Logical right-shift this APInt by shiftAmt.
1193/// @brief Logical right-shift function.
1194APInt APInt::lshr(unsigned shiftAmt) const {
1195  if (isSingleWord()) {
1196    if (shiftAmt == BitWidth)
1197      return APInt(BitWidth, 0);
1198    else
1199      return APInt(BitWidth, this->VAL >> shiftAmt);
1200  }
1201
1202  // If all the bits were shifted out, the result is 0. This avoids issues
1203  // with shifting by the size of the integer type, which produces undefined
1204  // results. We define these "undefined results" to always be 0.
1205  if (shiftAmt == BitWidth)
1206    return APInt(BitWidth, 0);
1207
1208  // If none of the bits are shifted out, the result is *this. This avoids
1209  // issues with shifting by the size of the integer type, which produces
1210  // undefined results in the code below. This is also an optimization.
1211  if (shiftAmt == 0)
1212    return *this;
1213
1214  // Create some space for the result.
1215  uint64_t * val = new uint64_t[getNumWords()];
1216
1217  // If we are shifting less than a word, compute the shift with a simple carry
1218  if (shiftAmt < APINT_BITS_PER_WORD) {
1219    uint64_t carry = 0;
1220    for (int i = getNumWords()-1; i >= 0; --i) {
1221      val[i] = (pVal[i] >> shiftAmt) | carry;
1222      carry = pVal[i] << (APINT_BITS_PER_WORD - shiftAmt);
1223    }
1224    return APInt(val, BitWidth).clearUnusedBits();
1225  }
1226
1227  // Compute some values needed by the remaining shift algorithms
1228  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
1229  unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
1230
1231  // If we are shifting whole words, just move whole words
1232  if (wordShift == 0) {
1233    for (unsigned i = 0; i < getNumWords() - offset; ++i)
1234      val[i] = pVal[i+offset];
1235    for (unsigned i = getNumWords()-offset; i < getNumWords(); i++)
1236      val[i] = 0;
1237    return APInt(val,BitWidth).clearUnusedBits();
1238  }
1239
1240  // Shift the low order words
1241  unsigned breakWord = getNumWords() - offset -1;
1242  for (unsigned i = 0; i < breakWord; ++i)
1243    val[i] = (pVal[i+offset] >> wordShift) |
1244             (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1245  // Shift the break word.
1246  val[breakWord] = pVal[breakWord+offset] >> wordShift;
1247
1248  // Remaining words are 0
1249  for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1250    val[i] = 0;
1251  return APInt(val, BitWidth).clearUnusedBits();
1252}
1253
1254/// Left-shift this APInt by shiftAmt.
1255/// @brief Left-shift function.
1256APInt APInt::shl(const APInt &shiftAmt) const {
1257  // It's undefined behavior in C to shift by BitWidth or greater.
1258  return shl((unsigned)shiftAmt.getLimitedValue(BitWidth));
1259}
1260
1261APInt APInt::shlSlowCase(unsigned shiftAmt) const {
1262  // If all the bits were shifted out, the result is 0. This avoids issues
1263  // with shifting by the size of the integer type, which produces undefined
1264  // results. We define these "undefined results" to always be 0.
1265  if (shiftAmt == BitWidth)
1266    return APInt(BitWidth, 0);
1267
1268  // If none of the bits are shifted out, the result is *this. This avoids a
1269  // lshr by the words size in the loop below which can produce incorrect
1270  // results. It also avoids the expensive computation below for a common case.
1271  if (shiftAmt == 0)
1272    return *this;
1273
1274  // Create some space for the result.
1275  uint64_t * val = new uint64_t[getNumWords()];
1276
1277  // If we are shifting less than a word, do it the easy way
1278  if (shiftAmt < APINT_BITS_PER_WORD) {
1279    uint64_t carry = 0;
1280    for (unsigned i = 0; i < getNumWords(); i++) {
1281      val[i] = pVal[i] << shiftAmt | carry;
1282      carry = pVal[i] >> (APINT_BITS_PER_WORD - shiftAmt);
1283    }
1284    return APInt(val, BitWidth).clearUnusedBits();
1285  }
1286
1287  // Compute some values needed by the remaining shift algorithms
1288  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
1289  unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
1290
1291  // If we are shifting whole words, just move whole words
1292  if (wordShift == 0) {
1293    for (unsigned i = 0; i < offset; i++)
1294      val[i] = 0;
1295    for (unsigned i = offset; i < getNumWords(); i++)
1296      val[i] = pVal[i-offset];
1297    return APInt(val,BitWidth).clearUnusedBits();
1298  }
1299
1300  // Copy whole words from this to Result.
1301  unsigned i = getNumWords() - 1;
1302  for (; i > offset; --i)
1303    val[i] = pVal[i-offset] << wordShift |
1304             pVal[i-offset-1] >> (APINT_BITS_PER_WORD - wordShift);
1305  val[offset] = pVal[0] << wordShift;
1306  for (i = 0; i < offset; ++i)
1307    val[i] = 0;
1308  return APInt(val, BitWidth).clearUnusedBits();
1309}
1310
1311APInt APInt::rotl(const APInt &rotateAmt) const {
1312  return rotl((unsigned)rotateAmt.getLimitedValue(BitWidth));
1313}
1314
1315APInt APInt::rotl(unsigned rotateAmt) const {
1316  if (rotateAmt == 0)
1317    return *this;
1318  // Don't get too fancy, just use existing shift/or facilities
1319  APInt hi(*this);
1320  APInt lo(*this);
1321  hi.shl(rotateAmt);
1322  lo.lshr(BitWidth - rotateAmt);
1323  return hi | lo;
1324}
1325
1326APInt APInt::rotr(const APInt &rotateAmt) const {
1327  return rotr((unsigned)rotateAmt.getLimitedValue(BitWidth));
1328}
1329
1330APInt APInt::rotr(unsigned rotateAmt) const {
1331  if (rotateAmt == 0)
1332    return *this;
1333  // Don't get too fancy, just use existing shift/or facilities
1334  APInt hi(*this);
1335  APInt lo(*this);
1336  lo.lshr(rotateAmt);
1337  hi.shl(BitWidth - rotateAmt);
1338  return hi | lo;
1339}
1340
1341// Square Root - this method computes and returns the square root of "this".
1342// Three mechanisms are used for computation. For small values (<= 5 bits),
1343// a table lookup is done. This gets some performance for common cases. For
1344// values using less than 52 bits, the value is converted to double and then
1345// the libc sqrt function is called. The result is rounded and then converted
1346// back to a uint64_t which is then used to construct the result. Finally,
1347// the Babylonian method for computing square roots is used.
1348APInt APInt::sqrt() const {
1349
1350  // Determine the magnitude of the value.
1351  unsigned magnitude = getActiveBits();
1352
1353  // Use a fast table for some small values. This also gets rid of some
1354  // rounding errors in libc sqrt for small values.
1355  if (magnitude <= 5) {
1356    static const uint8_t results[32] = {
1357      /*     0 */ 0,
1358      /*  1- 2 */ 1, 1,
1359      /*  3- 6 */ 2, 2, 2, 2,
1360      /*  7-12 */ 3, 3, 3, 3, 3, 3,
1361      /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1362      /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1363      /*    31 */ 6
1364    };
1365    return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]);
1366  }
1367
1368  // If the magnitude of the value fits in less than 52 bits (the precision of
1369  // an IEEE double precision floating point value), then we can use the
1370  // libc sqrt function which will probably use a hardware sqrt computation.
1371  // This should be faster than the algorithm below.
1372  if (magnitude < 52) {
1373#if HAVE_ROUND
1374    return APInt(BitWidth,
1375                 uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
1376#else
1377    return APInt(BitWidth,
1378                 uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0])) + 0.5));
1379#endif
1380  }
1381
1382  // Okay, all the short cuts are exhausted. We must compute it. The following
1383  // is a classical Babylonian method for computing the square root. This code
1384  // was adapted to APINt from a wikipedia article on such computations.
1385  // See http://www.wikipedia.org/ and go to the page named
1386  // Calculate_an_integer_square_root.
1387  unsigned nbits = BitWidth, i = 4;
1388  APInt testy(BitWidth, 16);
1389  APInt x_old(BitWidth, 1);
1390  APInt x_new(BitWidth, 0);
1391  APInt two(BitWidth, 2);
1392
1393  // Select a good starting value using binary logarithms.
1394  for (;; i += 2, testy = testy.shl(2))
1395    if (i >= nbits || this->ule(testy)) {
1396      x_old = x_old.shl(i / 2);
1397      break;
1398    }
1399
1400  // Use the Babylonian method to arrive at the integer square root:
1401  for (;;) {
1402    x_new = (this->udiv(x_old) + x_old).udiv(two);
1403    if (x_old.ule(x_new))
1404      break;
1405    x_old = x_new;
1406  }
1407
1408  // Make sure we return the closest approximation
1409  // NOTE: The rounding calculation below is correct. It will produce an
1410  // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1411  // determined to be a rounding issue with pari/gp as it begins to use a
1412  // floating point representation after 192 bits. There are no discrepancies
1413  // between this algorithm and pari/gp for bit widths < 192 bits.
1414  APInt square(x_old * x_old);
1415  APInt nextSquare((x_old + 1) * (x_old +1));
1416  if (this->ult(square))
1417    return x_old;
1418  else if (this->ule(nextSquare)) {
1419    APInt midpoint((nextSquare - square).udiv(two));
1420    APInt offset(*this - square);
1421    if (offset.ult(midpoint))
1422      return x_old;
1423    else
1424      return x_old + 1;
1425  } else
1426    llvm_unreachable("Error in APInt::sqrt computation");
1427  return x_old + 1;
1428}
1429
1430/// Computes the multiplicative inverse of this APInt for a given modulo. The
1431/// iterative extended Euclidean algorithm is used to solve for this value,
1432/// however we simplify it to speed up calculating only the inverse, and take
1433/// advantage of div+rem calculations. We also use some tricks to avoid copying
1434/// (potentially large) APInts around.
1435APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1436  assert(ult(modulo) && "This APInt must be smaller than the modulo");
1437
1438  // Using the properties listed at the following web page (accessed 06/21/08):
1439  //   http://www.numbertheory.org/php/euclid.html
1440  // (especially the properties numbered 3, 4 and 9) it can be proved that
1441  // BitWidth bits suffice for all the computations in the algorithm implemented
1442  // below. More precisely, this number of bits suffice if the multiplicative
1443  // inverse exists, but may not suffice for the general extended Euclidean
1444  // algorithm.
1445
1446  APInt r[2] = { modulo, *this };
1447  APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1448  APInt q(BitWidth, 0);
1449
1450  unsigned i;
1451  for (i = 0; r[i^1] != 0; i ^= 1) {
1452    // An overview of the math without the confusing bit-flipping:
1453    // q = r[i-2] / r[i-1]
1454    // r[i] = r[i-2] % r[i-1]
1455    // t[i] = t[i-2] - t[i-1] * q
1456    udivrem(r[i], r[i^1], q, r[i]);
1457    t[i] -= t[i^1] * q;
1458  }
1459
1460  // If this APInt and the modulo are not coprime, there is no multiplicative
1461  // inverse, so return 0. We check this by looking at the next-to-last
1462  // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1463  // algorithm.
1464  if (r[i] != 1)
1465    return APInt(BitWidth, 0);
1466
1467  // The next-to-last t is the multiplicative inverse.  However, we are
1468  // interested in a positive inverse. Calcuate a positive one from a negative
1469  // one if necessary. A simple addition of the modulo suffices because
1470  // abs(t[i]) is known to be less than *this/2 (see the link above).
1471  return t[i].isNegative() ? t[i] + modulo : t[i];
1472}
1473
1474/// Calculate the magic numbers required to implement a signed integer division
1475/// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1476/// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1477/// Warren, Jr., chapter 10.
1478APInt::ms APInt::magic() const {
1479  const APInt& d = *this;
1480  unsigned p;
1481  APInt ad, anc, delta, q1, r1, q2, r2, t;
1482  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1483  struct ms mag;
1484
1485  ad = d.abs();
1486  t = signedMin + (d.lshr(d.getBitWidth() - 1));
1487  anc = t - 1 - t.urem(ad);   // absolute value of nc
1488  p = d.getBitWidth() - 1;    // initialize p
1489  q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1490  r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1491  q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1492  r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1493  do {
1494    p = p + 1;
1495    q1 = q1<<1;          // update q1 = 2p/abs(nc)
1496    r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1497    if (r1.uge(anc)) {  // must be unsigned comparison
1498      q1 = q1 + 1;
1499      r1 = r1 - anc;
1500    }
1501    q2 = q2<<1;          // update q2 = 2p/abs(d)
1502    r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1503    if (r2.uge(ad)) {   // must be unsigned comparison
1504      q2 = q2 + 1;
1505      r2 = r2 - ad;
1506    }
1507    delta = ad - r2;
1508  } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1509
1510  mag.m = q2 + 1;
1511  if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1512  mag.s = p - d.getBitWidth();          // resulting shift
1513  return mag;
1514}
1515
1516/// Calculate the magic numbers required to implement an unsigned integer
1517/// division by a constant as a sequence of multiplies, adds and shifts.
1518/// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1519/// S. Warren, Jr., chapter 10.
1520/// LeadingZeros can be used to simplify the calculation if the upper bits
1521/// of the divided value are known zero.
1522APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1523  const APInt& d = *this;
1524  unsigned p;
1525  APInt nc, delta, q1, r1, q2, r2;
1526  struct mu magu;
1527  magu.a = 0;               // initialize "add" indicator
1528  APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1529  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1530  APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1531
1532  nc = allOnes - (-d).urem(d);
1533  p = d.getBitWidth() - 1;  // initialize p
1534  q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1535  r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1536  q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1537  r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1538  do {
1539    p = p + 1;
1540    if (r1.uge(nc - r1)) {
1541      q1 = q1 + q1 + 1;  // update q1
1542      r1 = r1 + r1 - nc; // update r1
1543    }
1544    else {
1545      q1 = q1+q1; // update q1
1546      r1 = r1+r1; // update r1
1547    }
1548    if ((r2 + 1).uge(d - r2)) {
1549      if (q2.uge(signedMax)) magu.a = 1;
1550      q2 = q2+q2 + 1;     // update q2
1551      r2 = r2+r2 + 1 - d; // update r2
1552    }
1553    else {
1554      if (q2.uge(signedMin)) magu.a = 1;
1555      q2 = q2+q2;     // update q2
1556      r2 = r2+r2 + 1; // update r2
1557    }
1558    delta = d - 1 - r2;
1559  } while (p < d.getBitWidth()*2 &&
1560           (q1.ult(delta) || (q1 == delta && r1 == 0)));
1561  magu.m = q2 + 1; // resulting magic number
1562  magu.s = p - d.getBitWidth();  // resulting shift
1563  return magu;
1564}
1565
1566/// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1567/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1568/// variables here have the same names as in the algorithm. Comments explain
1569/// the algorithm and any deviation from it.
1570static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
1571                     unsigned m, unsigned n) {
1572  assert(u && "Must provide dividend");
1573  assert(v && "Must provide divisor");
1574  assert(q && "Must provide quotient");
1575  assert(u != v && u != q && v != q && "Must us different memory");
1576  assert(n>1 && "n must be > 1");
1577
1578  // Knuth uses the value b as the base of the number system. In our case b
1579  // is 2^31 so we just set it to -1u.
1580  uint64_t b = uint64_t(1) << 32;
1581
1582#if 0
1583  DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1584  DEBUG(dbgs() << "KnuthDiv: original:");
1585  DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1586  DEBUG(dbgs() << " by");
1587  DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
1588  DEBUG(dbgs() << '\n');
1589#endif
1590  // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1591  // u and v by d. Note that we have taken Knuth's advice here to use a power
1592  // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1593  // 2 allows us to shift instead of multiply and it is easy to determine the
1594  // shift amount from the leading zeros.  We are basically normalizing the u
1595  // and v so that its high bits are shifted to the top of v's range without
1596  // overflow. Note that this can require an extra word in u so that u must
1597  // be of length m+n+1.
1598  unsigned shift = CountLeadingZeros_32(v[n-1]);
1599  unsigned v_carry = 0;
1600  unsigned u_carry = 0;
1601  if (shift) {
1602    for (unsigned i = 0; i < m+n; ++i) {
1603      unsigned u_tmp = u[i] >> (32 - shift);
1604      u[i] = (u[i] << shift) | u_carry;
1605      u_carry = u_tmp;
1606    }
1607    for (unsigned i = 0; i < n; ++i) {
1608      unsigned v_tmp = v[i] >> (32 - shift);
1609      v[i] = (v[i] << shift) | v_carry;
1610      v_carry = v_tmp;
1611    }
1612  }
1613  u[m+n] = u_carry;
1614#if 0
1615  DEBUG(dbgs() << "KnuthDiv:   normal:");
1616  DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1617  DEBUG(dbgs() << " by");
1618  DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
1619  DEBUG(dbgs() << '\n');
1620#endif
1621
1622  // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1623  int j = m;
1624  do {
1625    DEBUG(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1626    // D3. [Calculate q'.].
1627    //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1628    //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1629    // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1630    // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test
1631    // on v[n-2] determines at high speed most of the cases in which the trial
1632    // value qp is one too large, and it eliminates all cases where qp is two
1633    // too large.
1634    uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]);
1635    DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1636    uint64_t qp = dividend / v[n-1];
1637    uint64_t rp = dividend % v[n-1];
1638    if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1639      qp--;
1640      rp += v[n-1];
1641      if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1642        qp--;
1643    }
1644    DEBUG(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1645
1646    // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1647    // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1648    // consists of a simple multiplication by a one-place number, combined with
1649    // a subtraction.
1650    bool isNeg = false;
1651    for (unsigned i = 0; i < n; ++i) {
1652      uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32);
1653      uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]);
1654      bool borrow = subtrahend > u_tmp;
1655      DEBUG(dbgs() << "KnuthDiv: u_tmp == " << u_tmp
1656                   << ", subtrahend == " << subtrahend
1657                   << ", borrow = " << borrow << '\n');
1658
1659      uint64_t result = u_tmp - subtrahend;
1660      unsigned k = j + i;
1661      u[k++] = (unsigned)(result & (b-1)); // subtract low word
1662      u[k++] = (unsigned)(result >> 32);   // subtract high word
1663      while (borrow && k <= m+n) { // deal with borrow to the left
1664        borrow = u[k] == 0;
1665        u[k]--;
1666        k++;
1667      }
1668      isNeg |= borrow;
1669      DEBUG(dbgs() << "KnuthDiv: u[j+i] == " << u[j+i] << ",  u[j+i+1] == " <<
1670                    u[j+i+1] << '\n');
1671    }
1672    DEBUG(dbgs() << "KnuthDiv: after subtraction:");
1673    DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1674    DEBUG(dbgs() << '\n');
1675    // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1676    // this step is actually negative, (u[j+n]...u[j]) should be left as the
1677    // true value plus b**(n+1), namely as the b's complement of
1678    // the true value, and a "borrow" to the left should be remembered.
1679    //
1680    if (isNeg) {
1681      bool carry = true;  // true because b's complement is "complement + 1"
1682      for (unsigned i = 0; i <= m+n; ++i) {
1683        u[i] = ~u[i] + carry; // b's complement
1684        carry = carry && u[i] == 0;
1685      }
1686    }
1687    DEBUG(dbgs() << "KnuthDiv: after complement:");
1688    DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1689    DEBUG(dbgs() << '\n');
1690
1691    // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1692    // negative, go to step D6; otherwise go on to step D7.
1693    q[j] = (unsigned)qp;
1694    if (isNeg) {
1695      // D6. [Add back]. The probability that this step is necessary is very
1696      // small, on the order of only 2/b. Make sure that test data accounts for
1697      // this possibility. Decrease q[j] by 1
1698      q[j]--;
1699      // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1700      // A carry will occur to the left of u[j+n], and it should be ignored
1701      // since it cancels with the borrow that occurred in D4.
1702      bool carry = false;
1703      for (unsigned i = 0; i < n; i++) {
1704        unsigned limit = std::min(u[j+i],v[i]);
1705        u[j+i] += v[i] + carry;
1706        carry = u[j+i] < limit || (carry && u[j+i] == limit);
1707      }
1708      u[j+n] += carry;
1709    }
1710    DEBUG(dbgs() << "KnuthDiv: after correction:");
1711    DEBUG(for (int i = m+n; i >=0; i--) dbgs() <<" " << u[i]);
1712    DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1713
1714  // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1715  } while (--j >= 0);
1716
1717  DEBUG(dbgs() << "KnuthDiv: quotient:");
1718  DEBUG(for (int i = m; i >=0; i--) dbgs() <<" " << q[i]);
1719  DEBUG(dbgs() << '\n');
1720
1721  // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1722  // remainder may be obtained by dividing u[...] by d. If r is non-null we
1723  // compute the remainder (urem uses this).
1724  if (r) {
1725    // The value d is expressed by the "shift" value above since we avoided
1726    // multiplication by d by using a shift left. So, all we have to do is
1727    // shift right here. In order to mak
1728    if (shift) {
1729      unsigned carry = 0;
1730      DEBUG(dbgs() << "KnuthDiv: remainder:");
1731      for (int i = n-1; i >= 0; i--) {
1732        r[i] = (u[i] >> shift) | carry;
1733        carry = u[i] << (32 - shift);
1734        DEBUG(dbgs() << " " << r[i]);
1735      }
1736    } else {
1737      for (int i = n-1; i >= 0; i--) {
1738        r[i] = u[i];
1739        DEBUG(dbgs() << " " << r[i]);
1740      }
1741    }
1742    DEBUG(dbgs() << '\n');
1743  }
1744#if 0
1745  DEBUG(dbgs() << '\n');
1746#endif
1747}
1748
1749void APInt::divide(const APInt LHS, unsigned lhsWords,
1750                   const APInt &RHS, unsigned rhsWords,
1751                   APInt *Quotient, APInt *Remainder)
1752{
1753  assert(lhsWords >= rhsWords && "Fractional result");
1754
1755  // First, compose the values into an array of 32-bit words instead of
1756  // 64-bit words. This is a necessity of both the "short division" algorithm
1757  // and the Knuth "classical algorithm" which requires there to be native
1758  // operations for +, -, and * on an m bit value with an m*2 bit result. We
1759  // can't use 64-bit operands here because we don't have native results of
1760  // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1761  // work on large-endian machines.
1762  uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT);
1763  unsigned n = rhsWords * 2;
1764  unsigned m = (lhsWords * 2) - n;
1765
1766  // Allocate space for the temporary values we need either on the stack, if
1767  // it will fit, or on the heap if it won't.
1768  unsigned SPACE[128];
1769  unsigned *U = 0;
1770  unsigned *V = 0;
1771  unsigned *Q = 0;
1772  unsigned *R = 0;
1773  if ((Remainder?4:3)*n+2*m+1 <= 128) {
1774    U = &SPACE[0];
1775    V = &SPACE[m+n+1];
1776    Q = &SPACE[(m+n+1) + n];
1777    if (Remainder)
1778      R = &SPACE[(m+n+1) + n + (m+n)];
1779  } else {
1780    U = new unsigned[m + n + 1];
1781    V = new unsigned[n];
1782    Q = new unsigned[m+n];
1783    if (Remainder)
1784      R = new unsigned[n];
1785  }
1786
1787  // Initialize the dividend
1788  memset(U, 0, (m+n+1)*sizeof(unsigned));
1789  for (unsigned i = 0; i < lhsWords; ++i) {
1790    uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]);
1791    U[i * 2] = (unsigned)(tmp & mask);
1792    U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1793  }
1794  U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1795
1796  // Initialize the divisor
1797  memset(V, 0, (n)*sizeof(unsigned));
1798  for (unsigned i = 0; i < rhsWords; ++i) {
1799    uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]);
1800    V[i * 2] = (unsigned)(tmp & mask);
1801    V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1802  }
1803
1804  // initialize the quotient and remainder
1805  memset(Q, 0, (m+n) * sizeof(unsigned));
1806  if (Remainder)
1807    memset(R, 0, n * sizeof(unsigned));
1808
1809  // Now, adjust m and n for the Knuth division. n is the number of words in
1810  // the divisor. m is the number of words by which the dividend exceeds the
1811  // divisor (i.e. m+n is the length of the dividend). These sizes must not
1812  // contain any zero words or the Knuth algorithm fails.
1813  for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1814    n--;
1815    m++;
1816  }
1817  for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1818    m--;
1819
1820  // If we're left with only a single word for the divisor, Knuth doesn't work
1821  // so we implement the short division algorithm here. This is much simpler
1822  // and faster because we are certain that we can divide a 64-bit quantity
1823  // by a 32-bit quantity at hardware speed and short division is simply a
1824  // series of such operations. This is just like doing short division but we
1825  // are using base 2^32 instead of base 10.
1826  assert(n != 0 && "Divide by zero?");
1827  if (n == 1) {
1828    unsigned divisor = V[0];
1829    unsigned remainder = 0;
1830    for (int i = m+n-1; i >= 0; i--) {
1831      uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i];
1832      if (partial_dividend == 0) {
1833        Q[i] = 0;
1834        remainder = 0;
1835      } else if (partial_dividend < divisor) {
1836        Q[i] = 0;
1837        remainder = (unsigned)partial_dividend;
1838      } else if (partial_dividend == divisor) {
1839        Q[i] = 1;
1840        remainder = 0;
1841      } else {
1842        Q[i] = (unsigned)(partial_dividend / divisor);
1843        remainder = (unsigned)(partial_dividend - (Q[i] * divisor));
1844      }
1845    }
1846    if (R)
1847      R[0] = remainder;
1848  } else {
1849    // Now we're ready to invoke the Knuth classical divide algorithm. In this
1850    // case n > 1.
1851    KnuthDiv(U, V, Q, R, m, n);
1852  }
1853
1854  // If the caller wants the quotient
1855  if (Quotient) {
1856    // Set up the Quotient value's memory.
1857    if (Quotient->BitWidth != LHS.BitWidth) {
1858      if (Quotient->isSingleWord())
1859        Quotient->VAL = 0;
1860      else
1861        delete [] Quotient->pVal;
1862      Quotient->BitWidth = LHS.BitWidth;
1863      if (!Quotient->isSingleWord())
1864        Quotient->pVal = getClearedMemory(Quotient->getNumWords());
1865    } else
1866      Quotient->clearAllBits();
1867
1868    // The quotient is in Q. Reconstitute the quotient into Quotient's low
1869    // order words.
1870    if (lhsWords == 1) {
1871      uint64_t tmp =
1872        uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
1873      if (Quotient->isSingleWord())
1874        Quotient->VAL = tmp;
1875      else
1876        Quotient->pVal[0] = tmp;
1877    } else {
1878      assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
1879      for (unsigned i = 0; i < lhsWords; ++i)
1880        Quotient->pVal[i] =
1881          uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1882    }
1883  }
1884
1885  // If the caller wants the remainder
1886  if (Remainder) {
1887    // Set up the Remainder value's memory.
1888    if (Remainder->BitWidth != RHS.BitWidth) {
1889      if (Remainder->isSingleWord())
1890        Remainder->VAL = 0;
1891      else
1892        delete [] Remainder->pVal;
1893      Remainder->BitWidth = RHS.BitWidth;
1894      if (!Remainder->isSingleWord())
1895        Remainder->pVal = getClearedMemory(Remainder->getNumWords());
1896    } else
1897      Remainder->clearAllBits();
1898
1899    // The remainder is in R. Reconstitute the remainder into Remainder's low
1900    // order words.
1901    if (rhsWords == 1) {
1902      uint64_t tmp =
1903        uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2));
1904      if (Remainder->isSingleWord())
1905        Remainder->VAL = tmp;
1906      else
1907        Remainder->pVal[0] = tmp;
1908    } else {
1909      assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
1910      for (unsigned i = 0; i < rhsWords; ++i)
1911        Remainder->pVal[i] =
1912          uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1913    }
1914  }
1915
1916  // Clean up the memory we allocated.
1917  if (U != &SPACE[0]) {
1918    delete [] U;
1919    delete [] V;
1920    delete [] Q;
1921    delete [] R;
1922  }
1923}
1924
1925APInt APInt::udiv(const APInt& RHS) const {
1926  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1927
1928  // First, deal with the easy case
1929  if (isSingleWord()) {
1930    assert(RHS.VAL != 0 && "Divide by zero?");
1931    return APInt(BitWidth, VAL / RHS.VAL);
1932  }
1933
1934  // Get some facts about the LHS and RHS number of bits and words
1935  unsigned rhsBits = RHS.getActiveBits();
1936  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1937  assert(rhsWords && "Divided by zero???");
1938  unsigned lhsBits = this->getActiveBits();
1939  unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1940
1941  // Deal with some degenerate cases
1942  if (!lhsWords)
1943    // 0 / X ===> 0
1944    return APInt(BitWidth, 0);
1945  else if (lhsWords < rhsWords || this->ult(RHS)) {
1946    // X / Y ===> 0, iff X < Y
1947    return APInt(BitWidth, 0);
1948  } else if (*this == RHS) {
1949    // X / X ===> 1
1950    return APInt(BitWidth, 1);
1951  } else if (lhsWords == 1 && rhsWords == 1) {
1952    // All high words are zero, just use native divide
1953    return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]);
1954  }
1955
1956  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1957  APInt Quotient(1,0); // to hold result.
1958  divide(*this, lhsWords, RHS, rhsWords, &Quotient, 0);
1959  return Quotient;
1960}
1961
1962APInt APInt::urem(const APInt& RHS) const {
1963  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1964  if (isSingleWord()) {
1965    assert(RHS.VAL != 0 && "Remainder by zero?");
1966    return APInt(BitWidth, VAL % RHS.VAL);
1967  }
1968
1969  // Get some facts about the LHS
1970  unsigned lhsBits = getActiveBits();
1971  unsigned lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
1972
1973  // Get some facts about the RHS
1974  unsigned rhsBits = RHS.getActiveBits();
1975  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1976  assert(rhsWords && "Performing remainder operation by zero ???");
1977
1978  // Check the degenerate cases
1979  if (lhsWords == 0) {
1980    // 0 % Y ===> 0
1981    return APInt(BitWidth, 0);
1982  } else if (lhsWords < rhsWords || this->ult(RHS)) {
1983    // X % Y ===> X, iff X < Y
1984    return *this;
1985  } else if (*this == RHS) {
1986    // X % X == 0;
1987    return APInt(BitWidth, 0);
1988  } else if (lhsWords == 1) {
1989    // All high words are zero, just use native remainder
1990    return APInt(BitWidth, pVal[0] % RHS.pVal[0]);
1991  }
1992
1993  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1994  APInt Remainder(1,0);
1995  divide(*this, lhsWords, RHS, rhsWords, 0, &Remainder);
1996  return Remainder;
1997}
1998
1999void APInt::udivrem(const APInt &LHS, const APInt &RHS,
2000                    APInt &Quotient, APInt &Remainder) {
2001  // Get some size facts about the dividend and divisor
2002  unsigned lhsBits  = LHS.getActiveBits();
2003  unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
2004  unsigned rhsBits  = RHS.getActiveBits();
2005  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
2006
2007  // Check the degenerate cases
2008  if (lhsWords == 0) {
2009    Quotient = 0;                // 0 / Y ===> 0
2010    Remainder = 0;               // 0 % Y ===> 0
2011    return;
2012  }
2013
2014  if (lhsWords < rhsWords || LHS.ult(RHS)) {
2015    Remainder = LHS;            // X % Y ===> X, iff X < Y
2016    Quotient = 0;               // X / Y ===> 0, iff X < Y
2017    return;
2018  }
2019
2020  if (LHS == RHS) {
2021    Quotient  = 1;              // X / X ===> 1
2022    Remainder = 0;              // X % X ===> 0;
2023    return;
2024  }
2025
2026  if (lhsWords == 1 && rhsWords == 1) {
2027    // There is only one word to consider so use the native versions.
2028    uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0];
2029    uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0];
2030    Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue);
2031    Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue);
2032    return;
2033  }
2034
2035  // Okay, lets do it the long way
2036  divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
2037}
2038
2039APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
2040  APInt Res = *this+RHS;
2041  Overflow = isNonNegative() == RHS.isNonNegative() &&
2042             Res.isNonNegative() != isNonNegative();
2043  return Res;
2044}
2045
2046APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
2047  APInt Res = *this+RHS;
2048  Overflow = Res.ult(RHS);
2049  return Res;
2050}
2051
2052APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
2053  APInt Res = *this - RHS;
2054  Overflow = isNonNegative() != RHS.isNonNegative() &&
2055             Res.isNonNegative() != isNonNegative();
2056  return Res;
2057}
2058
2059APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
2060  APInt Res = *this-RHS;
2061  Overflow = Res.ugt(*this);
2062  return Res;
2063}
2064
2065APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
2066  // MININT/-1  -->  overflow.
2067  Overflow = isMinSignedValue() && RHS.isAllOnesValue();
2068  return sdiv(RHS);
2069}
2070
2071APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
2072  APInt Res = *this * RHS;
2073
2074  if (*this != 0 && RHS != 0)
2075    Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
2076  else
2077    Overflow = false;
2078  return Res;
2079}
2080
2081APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
2082  APInt Res = *this * RHS;
2083
2084  if (*this != 0 && RHS != 0)
2085    Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS;
2086  else
2087    Overflow = false;
2088  return Res;
2089}
2090
2091APInt APInt::sshl_ov(unsigned ShAmt, bool &Overflow) const {
2092  Overflow = ShAmt >= getBitWidth();
2093  if (Overflow)
2094    ShAmt = getBitWidth()-1;
2095
2096  if (isNonNegative()) // Don't allow sign change.
2097    Overflow = ShAmt >= countLeadingZeros();
2098  else
2099    Overflow = ShAmt >= countLeadingOnes();
2100
2101  return *this << ShAmt;
2102}
2103
2104
2105
2106
2107void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2108  // Check our assumptions here
2109  assert(!str.empty() && "Invalid string length");
2110  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
2111         "Radix should be 2, 8, 10, or 16!");
2112
2113  StringRef::iterator p = str.begin();
2114  size_t slen = str.size();
2115  bool isNeg = *p == '-';
2116  if (*p == '-' || *p == '+') {
2117    p++;
2118    slen--;
2119    assert(slen && "String is only a sign, needs a value.");
2120  }
2121  assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2122  assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2123  assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2124  assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2125         "Insufficient bit width");
2126
2127  // Allocate memory
2128  if (!isSingleWord())
2129    pVal = getClearedMemory(getNumWords());
2130
2131  // Figure out if we can shift instead of multiply
2132  unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2133
2134  // Set up an APInt for the digit to add outside the loop so we don't
2135  // constantly construct/destruct it.
2136  APInt apdigit(getBitWidth(), 0);
2137  APInt apradix(getBitWidth(), radix);
2138
2139  // Enter digit traversal loop
2140  for (StringRef::iterator e = str.end(); p != e; ++p) {
2141    unsigned digit = getDigit(*p, radix);
2142    assert(digit < radix && "Invalid character in digit string");
2143
2144    // Shift or multiply the value by the radix
2145    if (slen > 1) {
2146      if (shift)
2147        *this <<= shift;
2148      else
2149        *this *= apradix;
2150    }
2151
2152    // Add in the digit we just interpreted
2153    if (apdigit.isSingleWord())
2154      apdigit.VAL = digit;
2155    else
2156      apdigit.pVal[0] = digit;
2157    *this += apdigit;
2158  }
2159  // If its negative, put it in two's complement form
2160  if (isNeg) {
2161    (*this)--;
2162    this->flipAllBits();
2163  }
2164}
2165
2166void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2167                     bool Signed) const {
2168  assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2) &&
2169         "Radix should be 2, 8, 10, or 16!");
2170
2171  // First, check for a zero value and just short circuit the logic below.
2172  if (*this == 0) {
2173    Str.push_back('0');
2174    return;
2175  }
2176
2177  static const char Digits[] = "0123456789ABCDEF";
2178
2179  if (isSingleWord()) {
2180    char Buffer[65];
2181    char *BufPtr = Buffer+65;
2182
2183    uint64_t N;
2184    if (!Signed) {
2185      N = getZExtValue();
2186    } else {
2187      int64_t I = getSExtValue();
2188      if (I >= 0) {
2189        N = I;
2190      } else {
2191        Str.push_back('-');
2192        N = -(uint64_t)I;
2193      }
2194    }
2195
2196    while (N) {
2197      *--BufPtr = Digits[N % Radix];
2198      N /= Radix;
2199    }
2200    Str.append(BufPtr, Buffer+65);
2201    return;
2202  }
2203
2204  APInt Tmp(*this);
2205
2206  if (Signed && isNegative()) {
2207    // They want to print the signed version and it is a negative value
2208    // Flip the bits and add one to turn it into the equivalent positive
2209    // value and put a '-' in the result.
2210    Tmp.flipAllBits();
2211    Tmp++;
2212    Str.push_back('-');
2213  }
2214
2215  // We insert the digits backward, then reverse them to get the right order.
2216  unsigned StartDig = Str.size();
2217
2218  // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2219  // because the number of bits per digit (1, 3 and 4 respectively) divides
2220  // equaly.  We just shift until the value is zero.
2221  if (Radix != 10) {
2222    // Just shift tmp right for each digit width until it becomes zero
2223    unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2224    unsigned MaskAmt = Radix - 1;
2225
2226    while (Tmp != 0) {
2227      unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2228      Str.push_back(Digits[Digit]);
2229      Tmp = Tmp.lshr(ShiftAmt);
2230    }
2231  } else {
2232    APInt divisor(4, 10);
2233    while (Tmp != 0) {
2234      APInt APdigit(1, 0);
2235      APInt tmp2(Tmp.getBitWidth(), 0);
2236      divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2,
2237             &APdigit);
2238      unsigned Digit = (unsigned)APdigit.getZExtValue();
2239      assert(Digit < Radix && "divide failed");
2240      Str.push_back(Digits[Digit]);
2241      Tmp = tmp2;
2242    }
2243  }
2244
2245  // Reverse the digits before returning.
2246  std::reverse(Str.begin()+StartDig, Str.end());
2247}
2248
2249/// toString - This returns the APInt as a std::string.  Note that this is an
2250/// inefficient method.  It is better to pass in a SmallVector/SmallString
2251/// to the methods above.
2252std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2253  SmallString<40> S;
2254  toString(S, Radix, Signed);
2255  return S.str();
2256}
2257
2258
2259void APInt::dump() const {
2260  SmallString<40> S, U;
2261  this->toStringUnsigned(U);
2262  this->toStringSigned(S);
2263  dbgs() << "APInt(" << BitWidth << "b, "
2264         << U.str() << "u " << S.str() << "s)";
2265}
2266
2267void APInt::print(raw_ostream &OS, bool isSigned) const {
2268  SmallString<40> S;
2269  this->toString(S, 10, isSigned);
2270  OS << S.str();
2271}
2272
2273// This implements a variety of operations on a representation of
2274// arbitrary precision, two's-complement, bignum integer values.
2275
2276// Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2277// and unrestricting assumption.
2278#define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
2279COMPILE_TIME_ASSERT(integerPartWidth % 2 == 0);
2280
2281/* Some handy functions local to this file.  */
2282namespace {
2283
2284  /* Returns the integer part with the least significant BITS set.
2285     BITS cannot be zero.  */
2286  static inline integerPart
2287  lowBitMask(unsigned int bits)
2288  {
2289    assert(bits != 0 && bits <= integerPartWidth);
2290
2291    return ~(integerPart) 0 >> (integerPartWidth - bits);
2292  }
2293
2294  /* Returns the value of the lower half of PART.  */
2295  static inline integerPart
2296  lowHalf(integerPart part)
2297  {
2298    return part & lowBitMask(integerPartWidth / 2);
2299  }
2300
2301  /* Returns the value of the upper half of PART.  */
2302  static inline integerPart
2303  highHalf(integerPart part)
2304  {
2305    return part >> (integerPartWidth / 2);
2306  }
2307
2308  /* Returns the bit number of the most significant set bit of a part.
2309     If the input number has no bits set -1U is returned.  */
2310  static unsigned int
2311  partMSB(integerPart value)
2312  {
2313    unsigned int n, msb;
2314
2315    if (value == 0)
2316      return -1U;
2317
2318    n = integerPartWidth / 2;
2319
2320    msb = 0;
2321    do {
2322      if (value >> n) {
2323        value >>= n;
2324        msb += n;
2325      }
2326
2327      n >>= 1;
2328    } while (n);
2329
2330    return msb;
2331  }
2332
2333  /* Returns the bit number of the least significant set bit of a
2334     part.  If the input number has no bits set -1U is returned.  */
2335  static unsigned int
2336  partLSB(integerPart value)
2337  {
2338    unsigned int n, lsb;
2339
2340    if (value == 0)
2341      return -1U;
2342
2343    lsb = integerPartWidth - 1;
2344    n = integerPartWidth / 2;
2345
2346    do {
2347      if (value << n) {
2348        value <<= n;
2349        lsb -= n;
2350      }
2351
2352      n >>= 1;
2353    } while (n);
2354
2355    return lsb;
2356  }
2357}
2358
2359/* Sets the least significant part of a bignum to the input value, and
2360   zeroes out higher parts.  */
2361void
2362APInt::tcSet(integerPart *dst, integerPart part, unsigned int parts)
2363{
2364  unsigned int i;
2365
2366  assert(parts > 0);
2367
2368  dst[0] = part;
2369  for (i = 1; i < parts; i++)
2370    dst[i] = 0;
2371}
2372
2373/* Assign one bignum to another.  */
2374void
2375APInt::tcAssign(integerPart *dst, const integerPart *src, unsigned int parts)
2376{
2377  unsigned int i;
2378
2379  for (i = 0; i < parts; i++)
2380    dst[i] = src[i];
2381}
2382
2383/* Returns true if a bignum is zero, false otherwise.  */
2384bool
2385APInt::tcIsZero(const integerPart *src, unsigned int parts)
2386{
2387  unsigned int i;
2388
2389  for (i = 0; i < parts; i++)
2390    if (src[i])
2391      return false;
2392
2393  return true;
2394}
2395
2396/* Extract the given bit of a bignum; returns 0 or 1.  */
2397int
2398APInt::tcExtractBit(const integerPart *parts, unsigned int bit)
2399{
2400  return (parts[bit / integerPartWidth] &
2401          ((integerPart) 1 << bit % integerPartWidth)) != 0;
2402}
2403
2404/* Set the given bit of a bignum. */
2405void
2406APInt::tcSetBit(integerPart *parts, unsigned int bit)
2407{
2408  parts[bit / integerPartWidth] |= (integerPart) 1 << (bit % integerPartWidth);
2409}
2410
2411/* Clears the given bit of a bignum. */
2412void
2413APInt::tcClearBit(integerPart *parts, unsigned int bit)
2414{
2415  parts[bit / integerPartWidth] &=
2416    ~((integerPart) 1 << (bit % integerPartWidth));
2417}
2418
2419/* Returns the bit number of the least significant set bit of a
2420   number.  If the input number has no bits set -1U is returned.  */
2421unsigned int
2422APInt::tcLSB(const integerPart *parts, unsigned int n)
2423{
2424  unsigned int i, lsb;
2425
2426  for (i = 0; i < n; i++) {
2427      if (parts[i] != 0) {
2428          lsb = partLSB(parts[i]);
2429
2430          return lsb + i * integerPartWidth;
2431      }
2432  }
2433
2434  return -1U;
2435}
2436
2437/* Returns the bit number of the most significant set bit of a number.
2438   If the input number has no bits set -1U is returned.  */
2439unsigned int
2440APInt::tcMSB(const integerPart *parts, unsigned int n)
2441{
2442  unsigned int msb;
2443
2444  do {
2445    --n;
2446
2447    if (parts[n] != 0) {
2448      msb = partMSB(parts[n]);
2449
2450      return msb + n * integerPartWidth;
2451    }
2452  } while (n);
2453
2454  return -1U;
2455}
2456
2457/* Copy the bit vector of width srcBITS from SRC, starting at bit
2458   srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2459   the least significant bit of DST.  All high bits above srcBITS in
2460   DST are zero-filled.  */
2461void
2462APInt::tcExtract(integerPart *dst, unsigned int dstCount,const integerPart *src,
2463                 unsigned int srcBits, unsigned int srcLSB)
2464{
2465  unsigned int firstSrcPart, dstParts, shift, n;
2466
2467  dstParts = (srcBits + integerPartWidth - 1) / integerPartWidth;
2468  assert(dstParts <= dstCount);
2469
2470  firstSrcPart = srcLSB / integerPartWidth;
2471  tcAssign (dst, src + firstSrcPart, dstParts);
2472
2473  shift = srcLSB % integerPartWidth;
2474  tcShiftRight (dst, dstParts, shift);
2475
2476  /* We now have (dstParts * integerPartWidth - shift) bits from SRC
2477     in DST.  If this is less that srcBits, append the rest, else
2478     clear the high bits.  */
2479  n = dstParts * integerPartWidth - shift;
2480  if (n < srcBits) {
2481    integerPart mask = lowBitMask (srcBits - n);
2482    dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2483                          << n % integerPartWidth);
2484  } else if (n > srcBits) {
2485    if (srcBits % integerPartWidth)
2486      dst[dstParts - 1] &= lowBitMask (srcBits % integerPartWidth);
2487  }
2488
2489  /* Clear high parts.  */
2490  while (dstParts < dstCount)
2491    dst[dstParts++] = 0;
2492}
2493
2494/* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2495integerPart
2496APInt::tcAdd(integerPart *dst, const integerPart *rhs,
2497             integerPart c, unsigned int parts)
2498{
2499  unsigned int i;
2500
2501  assert(c <= 1);
2502
2503  for (i = 0; i < parts; i++) {
2504    integerPart l;
2505
2506    l = dst[i];
2507    if (c) {
2508      dst[i] += rhs[i] + 1;
2509      c = (dst[i] <= l);
2510    } else {
2511      dst[i] += rhs[i];
2512      c = (dst[i] < l);
2513    }
2514  }
2515
2516  return c;
2517}
2518
2519/* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2520integerPart
2521APInt::tcSubtract(integerPart *dst, const integerPart *rhs,
2522                  integerPart c, unsigned int parts)
2523{
2524  unsigned int i;
2525
2526  assert(c <= 1);
2527
2528  for (i = 0; i < parts; i++) {
2529    integerPart l;
2530
2531    l = dst[i];
2532    if (c) {
2533      dst[i] -= rhs[i] + 1;
2534      c = (dst[i] >= l);
2535    } else {
2536      dst[i] -= rhs[i];
2537      c = (dst[i] > l);
2538    }
2539  }
2540
2541  return c;
2542}
2543
2544/* Negate a bignum in-place.  */
2545void
2546APInt::tcNegate(integerPart *dst, unsigned int parts)
2547{
2548  tcComplement(dst, parts);
2549  tcIncrement(dst, parts);
2550}
2551
2552/*  DST += SRC * MULTIPLIER + CARRY   if add is true
2553    DST  = SRC * MULTIPLIER + CARRY   if add is false
2554
2555    Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2556    they must start at the same point, i.e. DST == SRC.
2557
2558    If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2559    returned.  Otherwise DST is filled with the least significant
2560    DSTPARTS parts of the result, and if all of the omitted higher
2561    parts were zero return zero, otherwise overflow occurred and
2562    return one.  */
2563int
2564APInt::tcMultiplyPart(integerPart *dst, const integerPart *src,
2565                      integerPart multiplier, integerPart carry,
2566                      unsigned int srcParts, unsigned int dstParts,
2567                      bool add)
2568{
2569  unsigned int i, n;
2570
2571  /* Otherwise our writes of DST kill our later reads of SRC.  */
2572  assert(dst <= src || dst >= src + srcParts);
2573  assert(dstParts <= srcParts + 1);
2574
2575  /* N loops; minimum of dstParts and srcParts.  */
2576  n = dstParts < srcParts ? dstParts: srcParts;
2577
2578  for (i = 0; i < n; i++) {
2579    integerPart low, mid, high, srcPart;
2580
2581      /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2582
2583         This cannot overflow, because
2584
2585         (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2586
2587         which is less than n^2.  */
2588
2589    srcPart = src[i];
2590
2591    if (multiplier == 0 || srcPart == 0)        {
2592      low = carry;
2593      high = 0;
2594    } else {
2595      low = lowHalf(srcPart) * lowHalf(multiplier);
2596      high = highHalf(srcPart) * highHalf(multiplier);
2597
2598      mid = lowHalf(srcPart) * highHalf(multiplier);
2599      high += highHalf(mid);
2600      mid <<= integerPartWidth / 2;
2601      if (low + mid < low)
2602        high++;
2603      low += mid;
2604
2605      mid = highHalf(srcPart) * lowHalf(multiplier);
2606      high += highHalf(mid);
2607      mid <<= integerPartWidth / 2;
2608      if (low + mid < low)
2609        high++;
2610      low += mid;
2611
2612      /* Now add carry.  */
2613      if (low + carry < low)
2614        high++;
2615      low += carry;
2616    }
2617
2618    if (add) {
2619      /* And now DST[i], and store the new low part there.  */
2620      if (low + dst[i] < low)
2621        high++;
2622      dst[i] += low;
2623    } else
2624      dst[i] = low;
2625
2626    carry = high;
2627  }
2628
2629  if (i < dstParts) {
2630    /* Full multiplication, there is no overflow.  */
2631    assert(i + 1 == dstParts);
2632    dst[i] = carry;
2633    return 0;
2634  } else {
2635    /* We overflowed if there is carry.  */
2636    if (carry)
2637      return 1;
2638
2639    /* We would overflow if any significant unwritten parts would be
2640       non-zero.  This is true if any remaining src parts are non-zero
2641       and the multiplier is non-zero.  */
2642    if (multiplier)
2643      for (; i < srcParts; i++)
2644        if (src[i])
2645          return 1;
2646
2647    /* We fitted in the narrow destination.  */
2648    return 0;
2649  }
2650}
2651
2652/* DST = LHS * RHS, where DST has the same width as the operands and
2653   is filled with the least significant parts of the result.  Returns
2654   one if overflow occurred, otherwise zero.  DST must be disjoint
2655   from both operands.  */
2656int
2657APInt::tcMultiply(integerPart *dst, const integerPart *lhs,
2658                  const integerPart *rhs, unsigned int parts)
2659{
2660  unsigned int i;
2661  int overflow;
2662
2663  assert(dst != lhs && dst != rhs);
2664
2665  overflow = 0;
2666  tcSet(dst, 0, parts);
2667
2668  for (i = 0; i < parts; i++)
2669    overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2670                               parts - i, true);
2671
2672  return overflow;
2673}
2674
2675/* DST = LHS * RHS, where DST has width the sum of the widths of the
2676   operands.  No overflow occurs.  DST must be disjoint from both
2677   operands.  Returns the number of parts required to hold the
2678   result.  */
2679unsigned int
2680APInt::tcFullMultiply(integerPart *dst, const integerPart *lhs,
2681                      const integerPart *rhs, unsigned int lhsParts,
2682                      unsigned int rhsParts)
2683{
2684  /* Put the narrower number on the LHS for less loops below.  */
2685  if (lhsParts > rhsParts) {
2686    return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2687  } else {
2688    unsigned int n;
2689
2690    assert(dst != lhs && dst != rhs);
2691
2692    tcSet(dst, 0, rhsParts);
2693
2694    for (n = 0; n < lhsParts; n++)
2695      tcMultiplyPart(&dst[n], rhs, lhs[n], 0, rhsParts, rhsParts + 1, true);
2696
2697    n = lhsParts + rhsParts;
2698
2699    return n - (dst[n - 1] == 0);
2700  }
2701}
2702
2703/* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2704   Otherwise set LHS to LHS / RHS with the fractional part discarded,
2705   set REMAINDER to the remainder, return zero.  i.e.
2706
2707   OLD_LHS = RHS * LHS + REMAINDER
2708
2709   SCRATCH is a bignum of the same size as the operands and result for
2710   use by the routine; its contents need not be initialized and are
2711   destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2712*/
2713int
2714APInt::tcDivide(integerPart *lhs, const integerPart *rhs,
2715                integerPart *remainder, integerPart *srhs,
2716                unsigned int parts)
2717{
2718  unsigned int n, shiftCount;
2719  integerPart mask;
2720
2721  assert(lhs != remainder && lhs != srhs && remainder != srhs);
2722
2723  shiftCount = tcMSB(rhs, parts) + 1;
2724  if (shiftCount == 0)
2725    return true;
2726
2727  shiftCount = parts * integerPartWidth - shiftCount;
2728  n = shiftCount / integerPartWidth;
2729  mask = (integerPart) 1 << (shiftCount % integerPartWidth);
2730
2731  tcAssign(srhs, rhs, parts);
2732  tcShiftLeft(srhs, parts, shiftCount);
2733  tcAssign(remainder, lhs, parts);
2734  tcSet(lhs, 0, parts);
2735
2736  /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2737     the total.  */
2738  for (;;) {
2739      int compare;
2740
2741      compare = tcCompare(remainder, srhs, parts);
2742      if (compare >= 0) {
2743        tcSubtract(remainder, srhs, 0, parts);
2744        lhs[n] |= mask;
2745      }
2746
2747      if (shiftCount == 0)
2748        break;
2749      shiftCount--;
2750      tcShiftRight(srhs, parts, 1);
2751      if ((mask >>= 1) == 0)
2752        mask = (integerPart) 1 << (integerPartWidth - 1), n--;
2753  }
2754
2755  return false;
2756}
2757
2758/* Shift a bignum left COUNT bits in-place.  Shifted in bits are zero.
2759   There are no restrictions on COUNT.  */
2760void
2761APInt::tcShiftLeft(integerPart *dst, unsigned int parts, unsigned int count)
2762{
2763  if (count) {
2764    unsigned int jump, shift;
2765
2766    /* Jump is the inter-part jump; shift is is intra-part shift.  */
2767    jump = count / integerPartWidth;
2768    shift = count % integerPartWidth;
2769
2770    while (parts > jump) {
2771      integerPart part;
2772
2773      parts--;
2774
2775      /* dst[i] comes from the two parts src[i - jump] and, if we have
2776         an intra-part shift, src[i - jump - 1].  */
2777      part = dst[parts - jump];
2778      if (shift) {
2779        part <<= shift;
2780        if (parts >= jump + 1)
2781          part |= dst[parts - jump - 1] >> (integerPartWidth - shift);
2782      }
2783
2784      dst[parts] = part;
2785    }
2786
2787    while (parts > 0)
2788      dst[--parts] = 0;
2789  }
2790}
2791
2792/* Shift a bignum right COUNT bits in-place.  Shifted in bits are
2793   zero.  There are no restrictions on COUNT.  */
2794void
2795APInt::tcShiftRight(integerPart *dst, unsigned int parts, unsigned int count)
2796{
2797  if (count) {
2798    unsigned int i, jump, shift;
2799
2800    /* Jump is the inter-part jump; shift is is intra-part shift.  */
2801    jump = count / integerPartWidth;
2802    shift = count % integerPartWidth;
2803
2804    /* Perform the shift.  This leaves the most significant COUNT bits
2805       of the result at zero.  */
2806    for (i = 0; i < parts; i++) {
2807      integerPart part;
2808
2809      if (i + jump >= parts) {
2810        part = 0;
2811      } else {
2812        part = dst[i + jump];
2813        if (shift) {
2814          part >>= shift;
2815          if (i + jump + 1 < parts)
2816            part |= dst[i + jump + 1] << (integerPartWidth - shift);
2817        }
2818      }
2819
2820      dst[i] = part;
2821    }
2822  }
2823}
2824
2825/* Bitwise and of two bignums.  */
2826void
2827APInt::tcAnd(integerPart *dst, const integerPart *rhs, unsigned int parts)
2828{
2829  unsigned int i;
2830
2831  for (i = 0; i < parts; i++)
2832    dst[i] &= rhs[i];
2833}
2834
2835/* Bitwise inclusive or of two bignums.  */
2836void
2837APInt::tcOr(integerPart *dst, const integerPart *rhs, unsigned int parts)
2838{
2839  unsigned int i;
2840
2841  for (i = 0; i < parts; i++)
2842    dst[i] |= rhs[i];
2843}
2844
2845/* Bitwise exclusive or of two bignums.  */
2846void
2847APInt::tcXor(integerPart *dst, const integerPart *rhs, unsigned int parts)
2848{
2849  unsigned int i;
2850
2851  for (i = 0; i < parts; i++)
2852    dst[i] ^= rhs[i];
2853}
2854
2855/* Complement a bignum in-place.  */
2856void
2857APInt::tcComplement(integerPart *dst, unsigned int parts)
2858{
2859  unsigned int i;
2860
2861  for (i = 0; i < parts; i++)
2862    dst[i] = ~dst[i];
2863}
2864
2865/* Comparison (unsigned) of two bignums.  */
2866int
2867APInt::tcCompare(const integerPart *lhs, const integerPart *rhs,
2868                 unsigned int parts)
2869{
2870  while (parts) {
2871      parts--;
2872      if (lhs[parts] == rhs[parts])
2873        continue;
2874
2875      if (lhs[parts] > rhs[parts])
2876        return 1;
2877      else
2878        return -1;
2879    }
2880
2881  return 0;
2882}
2883
2884/* Increment a bignum in-place, return the carry flag.  */
2885integerPart
2886APInt::tcIncrement(integerPart *dst, unsigned int parts)
2887{
2888  unsigned int i;
2889
2890  for (i = 0; i < parts; i++)
2891    if (++dst[i] != 0)
2892      break;
2893
2894  return i == parts;
2895}
2896
2897/* Set the least significant BITS bits of a bignum, clear the
2898   rest.  */
2899void
2900APInt::tcSetLeastSignificantBits(integerPart *dst, unsigned int parts,
2901                                 unsigned int bits)
2902{
2903  unsigned int i;
2904
2905  i = 0;
2906  while (bits > integerPartWidth) {
2907    dst[i++] = ~(integerPart) 0;
2908    bits -= integerPartWidth;
2909  }
2910
2911  if (bits)
2912    dst[i++] = ~(integerPart) 0 >> (integerPartWidth - bits);
2913
2914  while (i < parts)
2915    dst[i++] = 0;
2916}
2917