APInt.cpp revision 206083
1284345Ssjg//===-- APInt.cpp - Implement APInt class ---------------------------------===//
2284345Ssjg//
3284345Ssjg//                     The LLVM Compiler Infrastructure
4284345Ssjg//
5284345Ssjg// This file is distributed under the University of Illinois Open Source
6284345Ssjg// License. See LICENSE.TXT for details.
7284345Ssjg//
8284345Ssjg//===----------------------------------------------------------------------===//
9284345Ssjg//
10284345Ssjg// This file implements a class to represent arbitrary precision integer
11284345Ssjg// constant values and provide a variety of arithmetic operations on them.
12284345Ssjg//
13284345Ssjg//===----------------------------------------------------------------------===//
14284345Ssjg
15284345Ssjg#define DEBUG_TYPE "apint"
16284345Ssjg#include "llvm/ADT/APInt.h"
17284345Ssjg#include "llvm/ADT/StringRef.h"
18284345Ssjg#include "llvm/ADT/FoldingSet.h"
19284345Ssjg#include "llvm/ADT/SmallString.h"
20#include "llvm/Support/Debug.h"
21#include "llvm/Support/ErrorHandling.h"
22#include "llvm/Support/MathExtras.h"
23#include "llvm/Support/raw_ostream.h"
24#include <cmath>
25#include <limits>
26#include <cstring>
27#include <cstdlib>
28using namespace llvm;
29
30/// A utility function for allocating memory, checking for allocation failures,
31/// and ensuring the contents are zeroed.
32inline static uint64_t* getClearedMemory(unsigned numWords) {
33  uint64_t * result = new uint64_t[numWords];
34  assert(result && "APInt memory allocation fails!");
35  memset(result, 0, numWords * sizeof(uint64_t));
36  return result;
37}
38
39/// A utility function for allocating memory and checking for allocation
40/// failure.  The content is not zeroed.
41inline static uint64_t* getMemory(unsigned numWords) {
42  uint64_t * result = new uint64_t[numWords];
43  assert(result && "APInt memory allocation fails!");
44  return result;
45}
46
47/// A utility function that converts a character to a digit.
48inline static unsigned getDigit(char cdigit, uint8_t radix) {
49  unsigned r;
50
51  if (radix == 16) {
52    r = cdigit - '0';
53    if (r <= 9)
54      return r;
55
56    r = cdigit - 'A';
57    if (r <= 5)
58      return r + 10;
59
60    r = cdigit - 'a';
61    if (r <= 5)
62      return r + 10;
63  }
64
65  r = cdigit - '0';
66  if (r < radix)
67    return r;
68
69  return -1U;
70}
71
72
73void APInt::initSlowCase(unsigned numBits, uint64_t val, bool isSigned) {
74  pVal = getClearedMemory(getNumWords());
75  pVal[0] = val;
76  if (isSigned && int64_t(val) < 0)
77    for (unsigned i = 1; i < getNumWords(); ++i)
78      pVal[i] = -1ULL;
79}
80
81void APInt::initSlowCase(const APInt& that) {
82  pVal = getMemory(getNumWords());
83  memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE);
84}
85
86
87APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
88  : BitWidth(numBits), VAL(0) {
89  assert(BitWidth && "Bitwidth too small");
90  assert(bigVal && "Null pointer detected!");
91  if (isSingleWord())
92    VAL = bigVal[0];
93  else {
94    // Get memory, cleared to 0
95    pVal = getClearedMemory(getNumWords());
96    // Calculate the number of words to copy
97    unsigned words = std::min<unsigned>(numWords, getNumWords());
98    // Copy the words from bigVal to pVal
99    memcpy(pVal, bigVal, words * APINT_WORD_SIZE);
100  }
101  // Make sure unused high bits are cleared
102  clearUnusedBits();
103}
104
105APInt::APInt(unsigned numbits, const StringRef& Str, uint8_t radix)
106  : BitWidth(numbits), VAL(0) {
107  assert(BitWidth && "Bitwidth too small");
108  fromString(numbits, Str, radix);
109}
110
111APInt& APInt::AssignSlowCase(const APInt& RHS) {
112  // Don't do anything for X = X
113  if (this == &RHS)
114    return *this;
115
116  if (BitWidth == RHS.getBitWidth()) {
117    // assume same bit-width single-word case is already handled
118    assert(!isSingleWord());
119    memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE);
120    return *this;
121  }
122
123  if (isSingleWord()) {
124    // assume case where both are single words is already handled
125    assert(!RHS.isSingleWord());
126    VAL = 0;
127    pVal = getMemory(RHS.getNumWords());
128    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
129  } else if (getNumWords() == RHS.getNumWords())
130    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
131  else if (RHS.isSingleWord()) {
132    delete [] pVal;
133    VAL = RHS.VAL;
134  } else {
135    delete [] pVal;
136    pVal = getMemory(RHS.getNumWords());
137    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
138  }
139  BitWidth = RHS.BitWidth;
140  return clearUnusedBits();
141}
142
143APInt& APInt::operator=(uint64_t RHS) {
144  if (isSingleWord())
145    VAL = RHS;
146  else {
147    pVal[0] = RHS;
148    memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
149  }
150  return clearUnusedBits();
151}
152
153/// Profile - This method 'profiles' an APInt for use with FoldingSet.
154void APInt::Profile(FoldingSetNodeID& ID) const {
155  ID.AddInteger(BitWidth);
156
157  if (isSingleWord()) {
158    ID.AddInteger(VAL);
159    return;
160  }
161
162  unsigned NumWords = getNumWords();
163  for (unsigned i = 0; i < NumWords; ++i)
164    ID.AddInteger(pVal[i]);
165}
166
167/// add_1 - This function adds a single "digit" integer, y, to the multiple
168/// "digit" integer array,  x[]. x[] is modified to reflect the addition and
169/// 1 is returned if there is a carry out, otherwise 0 is returned.
170/// @returns the carry of the addition.
171static bool add_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
172  for (unsigned i = 0; i < len; ++i) {
173    dest[i] = y + x[i];
174    if (dest[i] < y)
175      y = 1; // Carry one to next digit.
176    else {
177      y = 0; // No need to carry so exit early
178      break;
179    }
180  }
181  return y;
182}
183
184/// @brief Prefix increment operator. Increments the APInt by one.
185APInt& APInt::operator++() {
186  if (isSingleWord())
187    ++VAL;
188  else
189    add_1(pVal, pVal, getNumWords(), 1);
190  return clearUnusedBits();
191}
192
193/// sub_1 - This function subtracts a single "digit" (64-bit word), y, from
194/// the multi-digit integer array, x[], propagating the borrowed 1 value until
195/// no further borrowing is neeeded or it runs out of "digits" in x.  The result
196/// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
197/// In other words, if y > x then this function returns 1, otherwise 0.
198/// @returns the borrow out of the subtraction
199static bool sub_1(uint64_t x[], unsigned len, uint64_t y) {
200  for (unsigned i = 0; i < len; ++i) {
201    uint64_t X = x[i];
202    x[i] -= y;
203    if (y > X)
204      y = 1;  // We have to "borrow 1" from next "digit"
205    else {
206      y = 0;  // No need to borrow
207      break;  // Remaining digits are unchanged so exit early
208    }
209  }
210  return bool(y);
211}
212
213/// @brief Prefix decrement operator. Decrements the APInt by one.
214APInt& APInt::operator--() {
215  if (isSingleWord())
216    --VAL;
217  else
218    sub_1(pVal, getNumWords(), 1);
219  return clearUnusedBits();
220}
221
222/// add - This function adds the integer array x to the integer array Y and
223/// places the result in dest.
224/// @returns the carry out from the addition
225/// @brief General addition of 64-bit integer arrays
226static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y,
227                unsigned len) {
228  bool carry = false;
229  for (unsigned i = 0; i< len; ++i) {
230    uint64_t limit = std::min(x[i],y[i]); // must come first in case dest == x
231    dest[i] = x[i] + y[i] + carry;
232    carry = dest[i] < limit || (carry && dest[i] == limit);
233  }
234  return carry;
235}
236
237/// Adds the RHS APint to this APInt.
238/// @returns this, after addition of RHS.
239/// @brief Addition assignment operator.
240APInt& APInt::operator+=(const APInt& RHS) {
241  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
242  if (isSingleWord())
243    VAL += RHS.VAL;
244  else {
245    add(pVal, pVal, RHS.pVal, getNumWords());
246  }
247  return clearUnusedBits();
248}
249
250/// Subtracts the integer array y from the integer array x
251/// @returns returns the borrow out.
252/// @brief Generalized subtraction of 64-bit integer arrays.
253static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y,
254                unsigned len) {
255  bool borrow = false;
256  for (unsigned i = 0; i < len; ++i) {
257    uint64_t x_tmp = borrow ? x[i] - 1 : x[i];
258    borrow = y[i] > x_tmp || (borrow && x[i] == 0);
259    dest[i] = x_tmp - y[i];
260  }
261  return borrow;
262}
263
264/// Subtracts the RHS APInt from this APInt
265/// @returns this, after subtraction
266/// @brief Subtraction assignment operator.
267APInt& APInt::operator-=(const APInt& RHS) {
268  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
269  if (isSingleWord())
270    VAL -= RHS.VAL;
271  else
272    sub(pVal, pVal, RHS.pVal, getNumWords());
273  return clearUnusedBits();
274}
275
276/// Multiplies an integer array, x, by a uint64_t integer and places the result
277/// into dest.
278/// @returns the carry out of the multiplication.
279/// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer.
280static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
281  // Split y into high 32-bit part (hy)  and low 32-bit part (ly)
282  uint64_t ly = y & 0xffffffffULL, hy = y >> 32;
283  uint64_t carry = 0;
284
285  // For each digit of x.
286  for (unsigned i = 0; i < len; ++i) {
287    // Split x into high and low words
288    uint64_t lx = x[i] & 0xffffffffULL;
289    uint64_t hx = x[i] >> 32;
290    // hasCarry - A flag to indicate if there is a carry to the next digit.
291    // hasCarry == 0, no carry
292    // hasCarry == 1, has carry
293    // hasCarry == 2, no carry and the calculation result == 0.
294    uint8_t hasCarry = 0;
295    dest[i] = carry + lx * ly;
296    // Determine if the add above introduces carry.
297    hasCarry = (dest[i] < carry) ? 1 : 0;
298    carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0);
299    // The upper limit of carry can be (2^32 - 1)(2^32 - 1) +
300    // (2^32 - 1) + 2^32 = 2^64.
301    hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
302
303    carry += (lx * hy) & 0xffffffffULL;
304    dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL);
305    carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) +
306            (carry >> 32) + ((lx * hy) >> 32) + hx * hy;
307  }
308  return carry;
309}
310
311/// Multiplies integer array x by integer array y and stores the result into
312/// the integer array dest. Note that dest's size must be >= xlen + ylen.
313/// @brief Generalized multiplicate of integer arrays.
314static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[],
315                unsigned ylen) {
316  dest[xlen] = mul_1(dest, x, xlen, y[0]);
317  for (unsigned i = 1; i < ylen; ++i) {
318    uint64_t ly = y[i] & 0xffffffffULL, hy = y[i] >> 32;
319    uint64_t carry = 0, lx = 0, hx = 0;
320    for (unsigned j = 0; j < xlen; ++j) {
321      lx = x[j] & 0xffffffffULL;
322      hx = x[j] >> 32;
323      // hasCarry - A flag to indicate if has carry.
324      // hasCarry == 0, no carry
325      // hasCarry == 1, has carry
326      // hasCarry == 2, no carry and the calculation result == 0.
327      uint8_t hasCarry = 0;
328      uint64_t resul = carry + lx * ly;
329      hasCarry = (resul < carry) ? 1 : 0;
330      carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + (resul >> 32);
331      hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
332
333      carry += (lx * hy) & 0xffffffffULL;
334      resul = (carry << 32) | (resul & 0xffffffffULL);
335      dest[i+j] += resul;
336      carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+
337              (carry >> 32) + (dest[i+j] < resul ? 1 : 0) +
338              ((lx * hy) >> 32) + hx * hy;
339    }
340    dest[i+xlen] = carry;
341  }
342}
343
344APInt& APInt::operator*=(const APInt& RHS) {
345  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
346  if (isSingleWord()) {
347    VAL *= RHS.VAL;
348    clearUnusedBits();
349    return *this;
350  }
351
352  // Get some bit facts about LHS and check for zero
353  unsigned lhsBits = getActiveBits();
354  unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
355  if (!lhsWords)
356    // 0 * X ===> 0
357    return *this;
358
359  // Get some bit facts about RHS and check for zero
360  unsigned rhsBits = RHS.getActiveBits();
361  unsigned rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
362  if (!rhsWords) {
363    // X * 0 ===> 0
364    clear();
365    return *this;
366  }
367
368  // Allocate space for the result
369  unsigned destWords = rhsWords + lhsWords;
370  uint64_t *dest = getMemory(destWords);
371
372  // Perform the long multiply
373  mul(dest, pVal, lhsWords, RHS.pVal, rhsWords);
374
375  // Copy result back into *this
376  clear();
377  unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords;
378  memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE);
379
380  // delete dest array and return
381  delete[] dest;
382  return *this;
383}
384
385APInt& APInt::operator&=(const APInt& RHS) {
386  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
387  if (isSingleWord()) {
388    VAL &= RHS.VAL;
389    return *this;
390  }
391  unsigned numWords = getNumWords();
392  for (unsigned i = 0; i < numWords; ++i)
393    pVal[i] &= RHS.pVal[i];
394  return *this;
395}
396
397APInt& APInt::operator|=(const APInt& RHS) {
398  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
399  if (isSingleWord()) {
400    VAL |= RHS.VAL;
401    return *this;
402  }
403  unsigned numWords = getNumWords();
404  for (unsigned i = 0; i < numWords; ++i)
405    pVal[i] |= RHS.pVal[i];
406  return *this;
407}
408
409APInt& APInt::operator^=(const APInt& RHS) {
410  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
411  if (isSingleWord()) {
412    VAL ^= RHS.VAL;
413    this->clearUnusedBits();
414    return *this;
415  }
416  unsigned numWords = getNumWords();
417  for (unsigned i = 0; i < numWords; ++i)
418    pVal[i] ^= RHS.pVal[i];
419  return clearUnusedBits();
420}
421
422APInt APInt::AndSlowCase(const APInt& RHS) const {
423  unsigned numWords = getNumWords();
424  uint64_t* val = getMemory(numWords);
425  for (unsigned i = 0; i < numWords; ++i)
426    val[i] = pVal[i] & RHS.pVal[i];
427  return APInt(val, getBitWidth());
428}
429
430APInt APInt::OrSlowCase(const APInt& RHS) const {
431  unsigned numWords = getNumWords();
432  uint64_t *val = getMemory(numWords);
433  for (unsigned i = 0; i < numWords; ++i)
434    val[i] = pVal[i] | RHS.pVal[i];
435  return APInt(val, getBitWidth());
436}
437
438APInt APInt::XorSlowCase(const APInt& RHS) const {
439  unsigned numWords = getNumWords();
440  uint64_t *val = getMemory(numWords);
441  for (unsigned i = 0; i < numWords; ++i)
442    val[i] = pVal[i] ^ RHS.pVal[i];
443
444  // 0^0==1 so clear the high bits in case they got set.
445  return APInt(val, getBitWidth()).clearUnusedBits();
446}
447
448bool APInt::operator !() const {
449  if (isSingleWord())
450    return !VAL;
451
452  for (unsigned i = 0; i < getNumWords(); ++i)
453    if (pVal[i])
454      return false;
455  return true;
456}
457
458APInt APInt::operator*(const APInt& RHS) const {
459  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
460  if (isSingleWord())
461    return APInt(BitWidth, VAL * RHS.VAL);
462  APInt Result(*this);
463  Result *= RHS;
464  return Result.clearUnusedBits();
465}
466
467APInt APInt::operator+(const APInt& RHS) const {
468  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
469  if (isSingleWord())
470    return APInt(BitWidth, VAL + RHS.VAL);
471  APInt Result(BitWidth, 0);
472  add(Result.pVal, this->pVal, RHS.pVal, getNumWords());
473  return Result.clearUnusedBits();
474}
475
476APInt APInt::operator-(const APInt& RHS) const {
477  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
478  if (isSingleWord())
479    return APInt(BitWidth, VAL - RHS.VAL);
480  APInt Result(BitWidth, 0);
481  sub(Result.pVal, this->pVal, RHS.pVal, getNumWords());
482  return Result.clearUnusedBits();
483}
484
485bool APInt::operator[](unsigned bitPosition) const {
486  return (maskBit(bitPosition) &
487          (isSingleWord() ?  VAL : pVal[whichWord(bitPosition)])) != 0;
488}
489
490bool APInt::EqualSlowCase(const APInt& RHS) const {
491  // Get some facts about the number of bits used in the two operands.
492  unsigned n1 = getActiveBits();
493  unsigned n2 = RHS.getActiveBits();
494
495  // If the number of bits isn't the same, they aren't equal
496  if (n1 != n2)
497    return false;
498
499  // If the number of bits fits in a word, we only need to compare the low word.
500  if (n1 <= APINT_BITS_PER_WORD)
501    return pVal[0] == RHS.pVal[0];
502
503  // Otherwise, compare everything
504  for (int i = whichWord(n1 - 1); i >= 0; --i)
505    if (pVal[i] != RHS.pVal[i])
506      return false;
507  return true;
508}
509
510bool APInt::EqualSlowCase(uint64_t Val) const {
511  unsigned n = getActiveBits();
512  if (n <= APINT_BITS_PER_WORD)
513    return pVal[0] == Val;
514  else
515    return false;
516}
517
518bool APInt::ult(const APInt& RHS) const {
519  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
520  if (isSingleWord())
521    return VAL < RHS.VAL;
522
523  // Get active bit length of both operands
524  unsigned n1 = getActiveBits();
525  unsigned n2 = RHS.getActiveBits();
526
527  // If magnitude of LHS is less than RHS, return true.
528  if (n1 < n2)
529    return true;
530
531  // If magnitude of RHS is greather than LHS, return false.
532  if (n2 < n1)
533    return false;
534
535  // If they bot fit in a word, just compare the low order word
536  if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD)
537    return pVal[0] < RHS.pVal[0];
538
539  // Otherwise, compare all words
540  unsigned topWord = whichWord(std::max(n1,n2)-1);
541  for (int i = topWord; i >= 0; --i) {
542    if (pVal[i] > RHS.pVal[i])
543      return false;
544    if (pVal[i] < RHS.pVal[i])
545      return true;
546  }
547  return false;
548}
549
550bool APInt::slt(const APInt& RHS) const {
551  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
552  if (isSingleWord()) {
553    int64_t lhsSext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth);
554    int64_t rhsSext = (int64_t(RHS.VAL) << (64-BitWidth)) >> (64-BitWidth);
555    return lhsSext < rhsSext;
556  }
557
558  APInt lhs(*this);
559  APInt rhs(RHS);
560  bool lhsNeg = isNegative();
561  bool rhsNeg = rhs.isNegative();
562  if (lhsNeg) {
563    // Sign bit is set so perform two's complement to make it positive
564    lhs.flip();
565    lhs++;
566  }
567  if (rhsNeg) {
568    // Sign bit is set so perform two's complement to make it positive
569    rhs.flip();
570    rhs++;
571  }
572
573  // Now we have unsigned values to compare so do the comparison if necessary
574  // based on the negativeness of the values.
575  if (lhsNeg)
576    if (rhsNeg)
577      return lhs.ugt(rhs);
578    else
579      return true;
580  else if (rhsNeg)
581    return false;
582  else
583    return lhs.ult(rhs);
584}
585
586APInt& APInt::set(unsigned bitPosition) {
587  if (isSingleWord())
588    VAL |= maskBit(bitPosition);
589  else
590    pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
591  return *this;
592}
593
594/// Set the given bit to 0 whose position is given as "bitPosition".
595/// @brief Set a given bit to 0.
596APInt& APInt::clear(unsigned bitPosition) {
597  if (isSingleWord())
598    VAL &= ~maskBit(bitPosition);
599  else
600    pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
601  return *this;
602}
603
604/// @brief Toggle every bit to its opposite value.
605
606/// Toggle a given bit to its opposite value whose position is given
607/// as "bitPosition".
608/// @brief Toggles a given bit to its opposite value.
609APInt& APInt::flip(unsigned bitPosition) {
610  assert(bitPosition < BitWidth && "Out of the bit-width range!");
611  if ((*this)[bitPosition]) clear(bitPosition);
612  else set(bitPosition);
613  return *this;
614}
615
616unsigned APInt::getBitsNeeded(const StringRef& str, uint8_t radix) {
617  assert(!str.empty() && "Invalid string length");
618  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
619         "Radix should be 2, 8, 10, or 16!");
620
621  size_t slen = str.size();
622
623  // Each computation below needs to know if it's negative.
624  StringRef::iterator p = str.begin();
625  unsigned isNegative = *p == '-';
626  if (*p == '-' || *p == '+') {
627    p++;
628    slen--;
629    assert(slen && "String is only a sign, needs a value.");
630  }
631
632  // For radixes of power-of-two values, the bits required is accurately and
633  // easily computed
634  if (radix == 2)
635    return slen + isNegative;
636  if (radix == 8)
637    return slen * 3 + isNegative;
638  if (radix == 16)
639    return slen * 4 + isNegative;
640
641  // This is grossly inefficient but accurate. We could probably do something
642  // with a computation of roughly slen*64/20 and then adjust by the value of
643  // the first few digits. But, I'm not sure how accurate that could be.
644
645  // Compute a sufficient number of bits that is always large enough but might
646  // be too large. This avoids the assertion in the constructor. This
647  // calculation doesn't work appropriately for the numbers 0-9, so just use 4
648  // bits in that case.
649  unsigned sufficient = slen == 1 ? 4 : slen * 64/18;
650
651  // Convert to the actual binary value.
652  APInt tmp(sufficient, StringRef(p, slen), radix);
653
654  // Compute how many bits are required. If the log is infinite, assume we need
655  // just bit.
656  unsigned log = tmp.logBase2();
657  if (log == (unsigned)-1) {
658    return isNegative + 1;
659  } else {
660    return isNegative + log + 1;
661  }
662}
663
664// From http://www.burtleburtle.net, byBob Jenkins.
665// When targeting x86, both GCC and LLVM seem to recognize this as a
666// rotate instruction.
667#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
668
669// From http://www.burtleburtle.net, by Bob Jenkins.
670#define mix(a,b,c) \
671  { \
672    a -= c;  a ^= rot(c, 4);  c += b; \
673    b -= a;  b ^= rot(a, 6);  a += c; \
674    c -= b;  c ^= rot(b, 8);  b += a; \
675    a -= c;  a ^= rot(c,16);  c += b; \
676    b -= a;  b ^= rot(a,19);  a += c; \
677    c -= b;  c ^= rot(b, 4);  b += a; \
678  }
679
680// From http://www.burtleburtle.net, by Bob Jenkins.
681#define final(a,b,c) \
682  { \
683    c ^= b; c -= rot(b,14); \
684    a ^= c; a -= rot(c,11); \
685    b ^= a; b -= rot(a,25); \
686    c ^= b; c -= rot(b,16); \
687    a ^= c; a -= rot(c,4);  \
688    b ^= a; b -= rot(a,14); \
689    c ^= b; c -= rot(b,24); \
690  }
691
692// hashword() was adapted from http://www.burtleburtle.net, by Bob
693// Jenkins.  k is a pointer to an array of uint32_t values; length is
694// the length of the key, in 32-bit chunks.  This version only handles
695// keys that are a multiple of 32 bits in size.
696static inline uint32_t hashword(const uint64_t *k64, size_t length)
697{
698  const uint32_t *k = reinterpret_cast<const uint32_t *>(k64);
699  uint32_t a,b,c;
700
701  /* Set up the internal state */
702  a = b = c = 0xdeadbeef + (((uint32_t)length)<<2);
703
704  /*------------------------------------------------- handle most of the key */
705  while (length > 3) {
706    a += k[0];
707    b += k[1];
708    c += k[2];
709    mix(a,b,c);
710    length -= 3;
711    k += 3;
712  }
713
714  /*------------------------------------------- handle the last 3 uint32_t's */
715  switch (length) {                  /* all the case statements fall through */
716  case 3 : c+=k[2];
717  case 2 : b+=k[1];
718  case 1 : a+=k[0];
719    final(a,b,c);
720    case 0:     /* case 0: nothing left to add */
721      break;
722    }
723  /*------------------------------------------------------ report the result */
724  return c;
725}
726
727// hashword8() was adapted from http://www.burtleburtle.net, by Bob
728// Jenkins.  This computes a 32-bit hash from one 64-bit word.  When
729// targeting x86 (32 or 64 bit), both LLVM and GCC compile this
730// function into about 35 instructions when inlined.
731static inline uint32_t hashword8(const uint64_t k64)
732{
733  uint32_t a,b,c;
734  a = b = c = 0xdeadbeef + 4;
735  b += k64 >> 32;
736  a += k64 & 0xffffffff;
737  final(a,b,c);
738  return c;
739}
740#undef final
741#undef mix
742#undef rot
743
744uint64_t APInt::getHashValue() const {
745  uint64_t hash;
746  if (isSingleWord())
747    hash = hashword8(VAL);
748  else
749    hash = hashword(pVal, getNumWords()*2);
750  return hash;
751}
752
753/// HiBits - This function returns the high "numBits" bits of this APInt.
754APInt APInt::getHiBits(unsigned numBits) const {
755  return APIntOps::lshr(*this, BitWidth - numBits);
756}
757
758/// LoBits - This function returns the low "numBits" bits of this APInt.
759APInt APInt::getLoBits(unsigned numBits) const {
760  return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits),
761                        BitWidth - numBits);
762}
763
764bool APInt::isPowerOf2() const {
765  return (!!*this) && !(*this & (*this - APInt(BitWidth,1)));
766}
767
768unsigned APInt::countLeadingZerosSlowCase() const {
769  // Treat the most significand word differently because it might have
770  // meaningless bits set beyond the precision.
771  unsigned BitsInMSW = BitWidth % APINT_BITS_PER_WORD;
772  integerPart MSWMask;
773  if (BitsInMSW) MSWMask = (integerPart(1) << BitsInMSW) - 1;
774  else {
775    MSWMask = ~integerPart(0);
776    BitsInMSW = APINT_BITS_PER_WORD;
777  }
778
779  unsigned i = getNumWords();
780  integerPart MSW = pVal[i-1] & MSWMask;
781  if (MSW)
782    return CountLeadingZeros_64(MSW) - (APINT_BITS_PER_WORD - BitsInMSW);
783
784  unsigned Count = BitsInMSW;
785  for (--i; i > 0u; --i) {
786    if (pVal[i-1] == 0)
787      Count += APINT_BITS_PER_WORD;
788    else {
789      Count += CountLeadingZeros_64(pVal[i-1]);
790      break;
791    }
792  }
793  return Count;
794}
795
796static unsigned countLeadingOnes_64(uint64_t V, unsigned skip) {
797  unsigned Count = 0;
798  if (skip)
799    V <<= skip;
800  while (V && (V & (1ULL << 63))) {
801    Count++;
802    V <<= 1;
803  }
804  return Count;
805}
806
807unsigned APInt::countLeadingOnes() const {
808  if (isSingleWord())
809    return countLeadingOnes_64(VAL, APINT_BITS_PER_WORD - BitWidth);
810
811  unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
812  unsigned shift;
813  if (!highWordBits) {
814    highWordBits = APINT_BITS_PER_WORD;
815    shift = 0;
816  } else {
817    shift = APINT_BITS_PER_WORD - highWordBits;
818  }
819  int i = getNumWords() - 1;
820  unsigned Count = countLeadingOnes_64(pVal[i], shift);
821  if (Count == highWordBits) {
822    for (i--; i >= 0; --i) {
823      if (pVal[i] == -1ULL)
824        Count += APINT_BITS_PER_WORD;
825      else {
826        Count += countLeadingOnes_64(pVal[i], 0);
827        break;
828      }
829    }
830  }
831  return Count;
832}
833
834unsigned APInt::countTrailingZeros() const {
835  if (isSingleWord())
836    return std::min(unsigned(CountTrailingZeros_64(VAL)), BitWidth);
837  unsigned Count = 0;
838  unsigned i = 0;
839  for (; i < getNumWords() && pVal[i] == 0; ++i)
840    Count += APINT_BITS_PER_WORD;
841  if (i < getNumWords())
842    Count += CountTrailingZeros_64(pVal[i]);
843  return std::min(Count, BitWidth);
844}
845
846unsigned APInt::countTrailingOnesSlowCase() const {
847  unsigned Count = 0;
848  unsigned i = 0;
849  for (; i < getNumWords() && pVal[i] == -1ULL; ++i)
850    Count += APINT_BITS_PER_WORD;
851  if (i < getNumWords())
852    Count += CountTrailingOnes_64(pVal[i]);
853  return std::min(Count, BitWidth);
854}
855
856unsigned APInt::countPopulationSlowCase() const {
857  unsigned Count = 0;
858  for (unsigned i = 0; i < getNumWords(); ++i)
859    Count += CountPopulation_64(pVal[i]);
860  return Count;
861}
862
863APInt APInt::byteSwap() const {
864  assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
865  if (BitWidth == 16)
866    return APInt(BitWidth, ByteSwap_16(uint16_t(VAL)));
867  else if (BitWidth == 32)
868    return APInt(BitWidth, ByteSwap_32(unsigned(VAL)));
869  else if (BitWidth == 48) {
870    unsigned Tmp1 = unsigned(VAL >> 16);
871    Tmp1 = ByteSwap_32(Tmp1);
872    uint16_t Tmp2 = uint16_t(VAL);
873    Tmp2 = ByteSwap_16(Tmp2);
874    return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
875  } else if (BitWidth == 64)
876    return APInt(BitWidth, ByteSwap_64(VAL));
877  else {
878    APInt Result(BitWidth, 0);
879    char *pByte = (char*)Result.pVal;
880    for (unsigned i = 0; i < BitWidth / APINT_WORD_SIZE / 2; ++i) {
881      char Tmp = pByte[i];
882      pByte[i] = pByte[BitWidth / APINT_WORD_SIZE - 1 - i];
883      pByte[BitWidth / APINT_WORD_SIZE - i - 1] = Tmp;
884    }
885    return Result;
886  }
887}
888
889APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1,
890                                            const APInt& API2) {
891  APInt A = API1, B = API2;
892  while (!!B) {
893    APInt T = B;
894    B = APIntOps::urem(A, B);
895    A = T;
896  }
897  return A;
898}
899
900APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
901  union {
902    double D;
903    uint64_t I;
904  } T;
905  T.D = Double;
906
907  // Get the sign bit from the highest order bit
908  bool isNeg = T.I >> 63;
909
910  // Get the 11-bit exponent and adjust for the 1023 bit bias
911  int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
912
913  // If the exponent is negative, the value is < 0 so just return 0.
914  if (exp < 0)
915    return APInt(width, 0u);
916
917  // Extract the mantissa by clearing the top 12 bits (sign + exponent).
918  uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
919
920  // If the exponent doesn't shift all bits out of the mantissa
921  if (exp < 52)
922    return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
923                    APInt(width, mantissa >> (52 - exp));
924
925  // If the client didn't provide enough bits for us to shift the mantissa into
926  // then the result is undefined, just return 0
927  if (width <= exp - 52)
928    return APInt(width, 0);
929
930  // Otherwise, we have to shift the mantissa bits up to the right location
931  APInt Tmp(width, mantissa);
932  Tmp = Tmp.shl((unsigned)exp - 52);
933  return isNeg ? -Tmp : Tmp;
934}
935
936/// RoundToDouble - This function converts this APInt to a double.
937/// The layout for double is as following (IEEE Standard 754):
938///  --------------------------------------
939/// |  Sign    Exponent    Fraction    Bias |
940/// |-------------------------------------- |
941/// |  1[63]   11[62-52]   52[51-00]   1023 |
942///  --------------------------------------
943double APInt::roundToDouble(bool isSigned) const {
944
945  // Handle the simple case where the value is contained in one uint64_t.
946  // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
947  if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
948    if (isSigned) {
949      int64_t sext = (int64_t(getWord(0)) << (64-BitWidth)) >> (64-BitWidth);
950      return double(sext);
951    } else
952      return double(getWord(0));
953  }
954
955  // Determine if the value is negative.
956  bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
957
958  // Construct the absolute value if we're negative.
959  APInt Tmp(isNeg ? -(*this) : (*this));
960
961  // Figure out how many bits we're using.
962  unsigned n = Tmp.getActiveBits();
963
964  // The exponent (without bias normalization) is just the number of bits
965  // we are using. Note that the sign bit is gone since we constructed the
966  // absolute value.
967  uint64_t exp = n;
968
969  // Return infinity for exponent overflow
970  if (exp > 1023) {
971    if (!isSigned || !isNeg)
972      return std::numeric_limits<double>::infinity();
973    else
974      return -std::numeric_limits<double>::infinity();
975  }
976  exp += 1023; // Increment for 1023 bias
977
978  // Number of bits in mantissa is 52. To obtain the mantissa value, we must
979  // extract the high 52 bits from the correct words in pVal.
980  uint64_t mantissa;
981  unsigned hiWord = whichWord(n-1);
982  if (hiWord == 0) {
983    mantissa = Tmp.pVal[0];
984    if (n > 52)
985      mantissa >>= n - 52; // shift down, we want the top 52 bits.
986  } else {
987    assert(hiWord > 0 && "huh?");
988    uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
989    uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
990    mantissa = hibits | lobits;
991  }
992
993  // The leading bit of mantissa is implicit, so get rid of it.
994  uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
995  union {
996    double D;
997    uint64_t I;
998  } T;
999  T.I = sign | (exp << 52) | mantissa;
1000  return T.D;
1001}
1002
1003// Truncate to new width.
1004APInt &APInt::trunc(unsigned width) {
1005  assert(width < BitWidth && "Invalid APInt Truncate request");
1006  assert(width && "Can't truncate to 0 bits");
1007  unsigned wordsBefore = getNumWords();
1008  BitWidth = width;
1009  unsigned wordsAfter = getNumWords();
1010  if (wordsBefore != wordsAfter) {
1011    if (wordsAfter == 1) {
1012      uint64_t *tmp = pVal;
1013      VAL = pVal[0];
1014      delete [] tmp;
1015    } else {
1016      uint64_t *newVal = getClearedMemory(wordsAfter);
1017      for (unsigned i = 0; i < wordsAfter; ++i)
1018        newVal[i] = pVal[i];
1019      delete [] pVal;
1020      pVal = newVal;
1021    }
1022  }
1023  return clearUnusedBits();
1024}
1025
1026// Sign extend to a new width.
1027APInt &APInt::sext(unsigned width) {
1028  assert(width > BitWidth && "Invalid APInt SignExtend request");
1029  // If the sign bit isn't set, this is the same as zext.
1030  if (!isNegative()) {
1031    zext(width);
1032    return *this;
1033  }
1034
1035  // The sign bit is set. First, get some facts
1036  unsigned wordsBefore = getNumWords();
1037  unsigned wordBits = BitWidth % APINT_BITS_PER_WORD;
1038  BitWidth = width;
1039  unsigned wordsAfter = getNumWords();
1040
1041  // Mask the high order word appropriately
1042  if (wordsBefore == wordsAfter) {
1043    unsigned newWordBits = width % APINT_BITS_PER_WORD;
1044    // The extension is contained to the wordsBefore-1th word.
1045    uint64_t mask = ~0ULL;
1046    if (newWordBits)
1047      mask >>= APINT_BITS_PER_WORD - newWordBits;
1048    mask <<= wordBits;
1049    if (wordsBefore == 1)
1050      VAL |= mask;
1051    else
1052      pVal[wordsBefore-1] |= mask;
1053    return clearUnusedBits();
1054  }
1055
1056  uint64_t mask = wordBits == 0 ? 0 : ~0ULL << wordBits;
1057  uint64_t *newVal = getMemory(wordsAfter);
1058  if (wordsBefore == 1)
1059    newVal[0] = VAL | mask;
1060  else {
1061    for (unsigned i = 0; i < wordsBefore; ++i)
1062      newVal[i] = pVal[i];
1063    newVal[wordsBefore-1] |= mask;
1064  }
1065  for (unsigned i = wordsBefore; i < wordsAfter; i++)
1066    newVal[i] = -1ULL;
1067  if (wordsBefore != 1)
1068    delete [] pVal;
1069  pVal = newVal;
1070  return clearUnusedBits();
1071}
1072
1073//  Zero extend to a new width.
1074APInt &APInt::zext(unsigned width) {
1075  assert(width > BitWidth && "Invalid APInt ZeroExtend request");
1076  unsigned wordsBefore = getNumWords();
1077  BitWidth = width;
1078  unsigned wordsAfter = getNumWords();
1079  if (wordsBefore != wordsAfter) {
1080    uint64_t *newVal = getClearedMemory(wordsAfter);
1081    if (wordsBefore == 1)
1082      newVal[0] = VAL;
1083    else
1084      for (unsigned i = 0; i < wordsBefore; ++i)
1085        newVal[i] = pVal[i];
1086    if (wordsBefore != 1)
1087      delete [] pVal;
1088    pVal = newVal;
1089  }
1090  return *this;
1091}
1092
1093APInt &APInt::zextOrTrunc(unsigned width) {
1094  if (BitWidth < width)
1095    return zext(width);
1096  if (BitWidth > width)
1097    return trunc(width);
1098  return *this;
1099}
1100
1101APInt &APInt::sextOrTrunc(unsigned width) {
1102  if (BitWidth < width)
1103    return sext(width);
1104  if (BitWidth > width)
1105    return trunc(width);
1106  return *this;
1107}
1108
1109/// Arithmetic right-shift this APInt by shiftAmt.
1110/// @brief Arithmetic right-shift function.
1111APInt APInt::ashr(const APInt &shiftAmt) const {
1112  return ashr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1113}
1114
1115/// Arithmetic right-shift this APInt by shiftAmt.
1116/// @brief Arithmetic right-shift function.
1117APInt APInt::ashr(unsigned shiftAmt) const {
1118  assert(shiftAmt <= BitWidth && "Invalid shift amount");
1119  // Handle a degenerate case
1120  if (shiftAmt == 0)
1121    return *this;
1122
1123  // Handle single word shifts with built-in ashr
1124  if (isSingleWord()) {
1125    if (shiftAmt == BitWidth)
1126      return APInt(BitWidth, 0); // undefined
1127    else {
1128      unsigned SignBit = APINT_BITS_PER_WORD - BitWidth;
1129      return APInt(BitWidth,
1130        (((int64_t(VAL) << SignBit) >> SignBit) >> shiftAmt));
1131    }
1132  }
1133
1134  // If all the bits were shifted out, the result is, technically, undefined.
1135  // We return -1 if it was negative, 0 otherwise. We check this early to avoid
1136  // issues in the algorithm below.
1137  if (shiftAmt == BitWidth) {
1138    if (isNegative())
1139      return APInt(BitWidth, -1ULL, true);
1140    else
1141      return APInt(BitWidth, 0);
1142  }
1143
1144  // Create some space for the result.
1145  uint64_t * val = new uint64_t[getNumWords()];
1146
1147  // Compute some values needed by the following shift algorithms
1148  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word
1149  unsigned offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift
1150  unsigned breakWord = getNumWords() - 1 - offset; // last word affected
1151  unsigned bitsInWord = whichBit(BitWidth); // how many bits in last word?
1152  if (bitsInWord == 0)
1153    bitsInWord = APINT_BITS_PER_WORD;
1154
1155  // If we are shifting whole words, just move whole words
1156  if (wordShift == 0) {
1157    // Move the words containing significant bits
1158    for (unsigned i = 0; i <= breakWord; ++i)
1159      val[i] = pVal[i+offset]; // move whole word
1160
1161    // Adjust the top significant word for sign bit fill, if negative
1162    if (isNegative())
1163      if (bitsInWord < APINT_BITS_PER_WORD)
1164        val[breakWord] |= ~0ULL << bitsInWord; // set high bits
1165  } else {
1166    // Shift the low order words
1167    for (unsigned i = 0; i < breakWord; ++i) {
1168      // This combines the shifted corresponding word with the low bits from
1169      // the next word (shifted into this word's high bits).
1170      val[i] = (pVal[i+offset] >> wordShift) |
1171               (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1172    }
1173
1174    // Shift the break word. In this case there are no bits from the next word
1175    // to include in this word.
1176    val[breakWord] = pVal[breakWord+offset] >> wordShift;
1177
1178    // Deal with sign extenstion in the break word, and possibly the word before
1179    // it.
1180    if (isNegative()) {
1181      if (wordShift > bitsInWord) {
1182        if (breakWord > 0)
1183          val[breakWord-1] |=
1184            ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord));
1185        val[breakWord] |= ~0ULL;
1186      } else
1187        val[breakWord] |= (~0ULL << (bitsInWord - wordShift));
1188    }
1189  }
1190
1191  // Remaining words are 0 or -1, just assign them.
1192  uint64_t fillValue = (isNegative() ? -1ULL : 0);
1193  for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1194    val[i] = fillValue;
1195  return APInt(val, BitWidth).clearUnusedBits();
1196}
1197
1198/// Logical right-shift this APInt by shiftAmt.
1199/// @brief Logical right-shift function.
1200APInt APInt::lshr(const APInt &shiftAmt) const {
1201  return lshr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1202}
1203
1204/// Logical right-shift this APInt by shiftAmt.
1205/// @brief Logical right-shift function.
1206APInt APInt::lshr(unsigned shiftAmt) const {
1207  if (isSingleWord()) {
1208    if (shiftAmt == BitWidth)
1209      return APInt(BitWidth, 0);
1210    else
1211      return APInt(BitWidth, this->VAL >> shiftAmt);
1212  }
1213
1214  // If all the bits were shifted out, the result is 0. This avoids issues
1215  // with shifting by the size of the integer type, which produces undefined
1216  // results. We define these "undefined results" to always be 0.
1217  if (shiftAmt == BitWidth)
1218    return APInt(BitWidth, 0);
1219
1220  // If none of the bits are shifted out, the result is *this. This avoids
1221  // issues with shifting by the size of the integer type, which produces
1222  // undefined results in the code below. This is also an optimization.
1223  if (shiftAmt == 0)
1224    return *this;
1225
1226  // Create some space for the result.
1227  uint64_t * val = new uint64_t[getNumWords()];
1228
1229  // If we are shifting less than a word, compute the shift with a simple carry
1230  if (shiftAmt < APINT_BITS_PER_WORD) {
1231    uint64_t carry = 0;
1232    for (int i = getNumWords()-1; i >= 0; --i) {
1233      val[i] = (pVal[i] >> shiftAmt) | carry;
1234      carry = pVal[i] << (APINT_BITS_PER_WORD - shiftAmt);
1235    }
1236    return APInt(val, BitWidth).clearUnusedBits();
1237  }
1238
1239  // Compute some values needed by the remaining shift algorithms
1240  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
1241  unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
1242
1243  // If we are shifting whole words, just move whole words
1244  if (wordShift == 0) {
1245    for (unsigned i = 0; i < getNumWords() - offset; ++i)
1246      val[i] = pVal[i+offset];
1247    for (unsigned i = getNumWords()-offset; i < getNumWords(); i++)
1248      val[i] = 0;
1249    return APInt(val,BitWidth).clearUnusedBits();
1250  }
1251
1252  // Shift the low order words
1253  unsigned breakWord = getNumWords() - offset -1;
1254  for (unsigned i = 0; i < breakWord; ++i)
1255    val[i] = (pVal[i+offset] >> wordShift) |
1256             (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1257  // Shift the break word.
1258  val[breakWord] = pVal[breakWord+offset] >> wordShift;
1259
1260  // Remaining words are 0
1261  for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1262    val[i] = 0;
1263  return APInt(val, BitWidth).clearUnusedBits();
1264}
1265
1266/// Left-shift this APInt by shiftAmt.
1267/// @brief Left-shift function.
1268APInt APInt::shl(const APInt &shiftAmt) const {
1269  // It's undefined behavior in C to shift by BitWidth or greater.
1270  return shl((unsigned)shiftAmt.getLimitedValue(BitWidth));
1271}
1272
1273APInt APInt::shlSlowCase(unsigned shiftAmt) const {
1274  // If all the bits were shifted out, the result is 0. This avoids issues
1275  // with shifting by the size of the integer type, which produces undefined
1276  // results. We define these "undefined results" to always be 0.
1277  if (shiftAmt == BitWidth)
1278    return APInt(BitWidth, 0);
1279
1280  // If none of the bits are shifted out, the result is *this. This avoids a
1281  // lshr by the words size in the loop below which can produce incorrect
1282  // results. It also avoids the expensive computation below for a common case.
1283  if (shiftAmt == 0)
1284    return *this;
1285
1286  // Create some space for the result.
1287  uint64_t * val = new uint64_t[getNumWords()];
1288
1289  // If we are shifting less than a word, do it the easy way
1290  if (shiftAmt < APINT_BITS_PER_WORD) {
1291    uint64_t carry = 0;
1292    for (unsigned i = 0; i < getNumWords(); i++) {
1293      val[i] = pVal[i] << shiftAmt | carry;
1294      carry = pVal[i] >> (APINT_BITS_PER_WORD - shiftAmt);
1295    }
1296    return APInt(val, BitWidth).clearUnusedBits();
1297  }
1298
1299  // Compute some values needed by the remaining shift algorithms
1300  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
1301  unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
1302
1303  // If we are shifting whole words, just move whole words
1304  if (wordShift == 0) {
1305    for (unsigned i = 0; i < offset; i++)
1306      val[i] = 0;
1307    for (unsigned i = offset; i < getNumWords(); i++)
1308      val[i] = pVal[i-offset];
1309    return APInt(val,BitWidth).clearUnusedBits();
1310  }
1311
1312  // Copy whole words from this to Result.
1313  unsigned i = getNumWords() - 1;
1314  for (; i > offset; --i)
1315    val[i] = pVal[i-offset] << wordShift |
1316             pVal[i-offset-1] >> (APINT_BITS_PER_WORD - wordShift);
1317  val[offset] = pVal[0] << wordShift;
1318  for (i = 0; i < offset; ++i)
1319    val[i] = 0;
1320  return APInt(val, BitWidth).clearUnusedBits();
1321}
1322
1323APInt APInt::rotl(const APInt &rotateAmt) const {
1324  return rotl((unsigned)rotateAmt.getLimitedValue(BitWidth));
1325}
1326
1327APInt APInt::rotl(unsigned rotateAmt) const {
1328  if (rotateAmt == 0)
1329    return *this;
1330  // Don't get too fancy, just use existing shift/or facilities
1331  APInt hi(*this);
1332  APInt lo(*this);
1333  hi.shl(rotateAmt);
1334  lo.lshr(BitWidth - rotateAmt);
1335  return hi | lo;
1336}
1337
1338APInt APInt::rotr(const APInt &rotateAmt) const {
1339  return rotr((unsigned)rotateAmt.getLimitedValue(BitWidth));
1340}
1341
1342APInt APInt::rotr(unsigned rotateAmt) const {
1343  if (rotateAmt == 0)
1344    return *this;
1345  // Don't get too fancy, just use existing shift/or facilities
1346  APInt hi(*this);
1347  APInt lo(*this);
1348  lo.lshr(rotateAmt);
1349  hi.shl(BitWidth - rotateAmt);
1350  return hi | lo;
1351}
1352
1353// Square Root - this method computes and returns the square root of "this".
1354// Three mechanisms are used for computation. For small values (<= 5 bits),
1355// a table lookup is done. This gets some performance for common cases. For
1356// values using less than 52 bits, the value is converted to double and then
1357// the libc sqrt function is called. The result is rounded and then converted
1358// back to a uint64_t which is then used to construct the result. Finally,
1359// the Babylonian method for computing square roots is used.
1360APInt APInt::sqrt() const {
1361
1362  // Determine the magnitude of the value.
1363  unsigned magnitude = getActiveBits();
1364
1365  // Use a fast table for some small values. This also gets rid of some
1366  // rounding errors in libc sqrt for small values.
1367  if (magnitude <= 5) {
1368    static const uint8_t results[32] = {
1369      /*     0 */ 0,
1370      /*  1- 2 */ 1, 1,
1371      /*  3- 6 */ 2, 2, 2, 2,
1372      /*  7-12 */ 3, 3, 3, 3, 3, 3,
1373      /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1374      /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1375      /*    31 */ 6
1376    };
1377    return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]);
1378  }
1379
1380  // If the magnitude of the value fits in less than 52 bits (the precision of
1381  // an IEEE double precision floating point value), then we can use the
1382  // libc sqrt function which will probably use a hardware sqrt computation.
1383  // This should be faster than the algorithm below.
1384  if (magnitude < 52) {
1385#if defined( _MSC_VER ) || defined(_MINIX)
1386    // Amazingly, VC++ and Minix don't have round().
1387    return APInt(BitWidth,
1388                 uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5);
1389#else
1390    return APInt(BitWidth,
1391                 uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
1392#endif
1393  }
1394
1395  // Okay, all the short cuts are exhausted. We must compute it. The following
1396  // is a classical Babylonian method for computing the square root. This code
1397  // was adapted to APINt from a wikipedia article on such computations.
1398  // See http://www.wikipedia.org/ and go to the page named
1399  // Calculate_an_integer_square_root.
1400  unsigned nbits = BitWidth, i = 4;
1401  APInt testy(BitWidth, 16);
1402  APInt x_old(BitWidth, 1);
1403  APInt x_new(BitWidth, 0);
1404  APInt two(BitWidth, 2);
1405
1406  // Select a good starting value using binary logarithms.
1407  for (;; i += 2, testy = testy.shl(2))
1408    if (i >= nbits || this->ule(testy)) {
1409      x_old = x_old.shl(i / 2);
1410      break;
1411    }
1412
1413  // Use the Babylonian method to arrive at the integer square root:
1414  for (;;) {
1415    x_new = (this->udiv(x_old) + x_old).udiv(two);
1416    if (x_old.ule(x_new))
1417      break;
1418    x_old = x_new;
1419  }
1420
1421  // Make sure we return the closest approximation
1422  // NOTE: The rounding calculation below is correct. It will produce an
1423  // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1424  // determined to be a rounding issue with pari/gp as it begins to use a
1425  // floating point representation after 192 bits. There are no discrepancies
1426  // between this algorithm and pari/gp for bit widths < 192 bits.
1427  APInt square(x_old * x_old);
1428  APInt nextSquare((x_old + 1) * (x_old +1));
1429  if (this->ult(square))
1430    return x_old;
1431  else if (this->ule(nextSquare)) {
1432    APInt midpoint((nextSquare - square).udiv(two));
1433    APInt offset(*this - square);
1434    if (offset.ult(midpoint))
1435      return x_old;
1436    else
1437      return x_old + 1;
1438  } else
1439    llvm_unreachable("Error in APInt::sqrt computation");
1440  return x_old + 1;
1441}
1442
1443/// Computes the multiplicative inverse of this APInt for a given modulo. The
1444/// iterative extended Euclidean algorithm is used to solve for this value,
1445/// however we simplify it to speed up calculating only the inverse, and take
1446/// advantage of div+rem calculations. We also use some tricks to avoid copying
1447/// (potentially large) APInts around.
1448APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1449  assert(ult(modulo) && "This APInt must be smaller than the modulo");
1450
1451  // Using the properties listed at the following web page (accessed 06/21/08):
1452  //   http://www.numbertheory.org/php/euclid.html
1453  // (especially the properties numbered 3, 4 and 9) it can be proved that
1454  // BitWidth bits suffice for all the computations in the algorithm implemented
1455  // below. More precisely, this number of bits suffice if the multiplicative
1456  // inverse exists, but may not suffice for the general extended Euclidean
1457  // algorithm.
1458
1459  APInt r[2] = { modulo, *this };
1460  APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1461  APInt q(BitWidth, 0);
1462
1463  unsigned i;
1464  for (i = 0; r[i^1] != 0; i ^= 1) {
1465    // An overview of the math without the confusing bit-flipping:
1466    // q = r[i-2] / r[i-1]
1467    // r[i] = r[i-2] % r[i-1]
1468    // t[i] = t[i-2] - t[i-1] * q
1469    udivrem(r[i], r[i^1], q, r[i]);
1470    t[i] -= t[i^1] * q;
1471  }
1472
1473  // If this APInt and the modulo are not coprime, there is no multiplicative
1474  // inverse, so return 0. We check this by looking at the next-to-last
1475  // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1476  // algorithm.
1477  if (r[i] != 1)
1478    return APInt(BitWidth, 0);
1479
1480  // The next-to-last t is the multiplicative inverse.  However, we are
1481  // interested in a positive inverse. Calcuate a positive one from a negative
1482  // one if necessary. A simple addition of the modulo suffices because
1483  // abs(t[i]) is known to be less than *this/2 (see the link above).
1484  return t[i].isNegative() ? t[i] + modulo : t[i];
1485}
1486
1487/// Calculate the magic numbers required to implement a signed integer division
1488/// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1489/// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1490/// Warren, Jr., chapter 10.
1491APInt::ms APInt::magic() const {
1492  const APInt& d = *this;
1493  unsigned p;
1494  APInt ad, anc, delta, q1, r1, q2, r2, t;
1495  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1496  struct ms mag;
1497
1498  ad = d.abs();
1499  t = signedMin + (d.lshr(d.getBitWidth() - 1));
1500  anc = t - 1 - t.urem(ad);   // absolute value of nc
1501  p = d.getBitWidth() - 1;    // initialize p
1502  q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1503  r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1504  q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1505  r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1506  do {
1507    p = p + 1;
1508    q1 = q1<<1;          // update q1 = 2p/abs(nc)
1509    r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1510    if (r1.uge(anc)) {  // must be unsigned comparison
1511      q1 = q1 + 1;
1512      r1 = r1 - anc;
1513    }
1514    q2 = q2<<1;          // update q2 = 2p/abs(d)
1515    r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1516    if (r2.uge(ad)) {   // must be unsigned comparison
1517      q2 = q2 + 1;
1518      r2 = r2 - ad;
1519    }
1520    delta = ad - r2;
1521  } while (q1.ule(delta) || (q1 == delta && r1 == 0));
1522
1523  mag.m = q2 + 1;
1524  if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1525  mag.s = p - d.getBitWidth();          // resulting shift
1526  return mag;
1527}
1528
1529/// Calculate the magic numbers required to implement an unsigned integer
1530/// division by a constant as a sequence of multiplies, adds and shifts.
1531/// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1532/// S. Warren, Jr., chapter 10.
1533APInt::mu APInt::magicu() const {
1534  const APInt& d = *this;
1535  unsigned p;
1536  APInt nc, delta, q1, r1, q2, r2;
1537  struct mu magu;
1538  magu.a = 0;               // initialize "add" indicator
1539  APInt allOnes = APInt::getAllOnesValue(d.getBitWidth());
1540  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1541  APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1542
1543  nc = allOnes - (-d).urem(d);
1544  p = d.getBitWidth() - 1;  // initialize p
1545  q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1546  r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1547  q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1548  r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1549  do {
1550    p = p + 1;
1551    if (r1.uge(nc - r1)) {
1552      q1 = q1 + q1 + 1;  // update q1
1553      r1 = r1 + r1 - nc; // update r1
1554    }
1555    else {
1556      q1 = q1+q1; // update q1
1557      r1 = r1+r1; // update r1
1558    }
1559    if ((r2 + 1).uge(d - r2)) {
1560      if (q2.uge(signedMax)) magu.a = 1;
1561      q2 = q2+q2 + 1;     // update q2
1562      r2 = r2+r2 + 1 - d; // update r2
1563    }
1564    else {
1565      if (q2.uge(signedMin)) magu.a = 1;
1566      q2 = q2+q2;     // update q2
1567      r2 = r2+r2 + 1; // update r2
1568    }
1569    delta = d - 1 - r2;
1570  } while (p < d.getBitWidth()*2 &&
1571           (q1.ult(delta) || (q1 == delta && r1 == 0)));
1572  magu.m = q2 + 1; // resulting magic number
1573  magu.s = p - d.getBitWidth();  // resulting shift
1574  return magu;
1575}
1576
1577/// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1578/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1579/// variables here have the same names as in the algorithm. Comments explain
1580/// the algorithm and any deviation from it.
1581static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
1582                     unsigned m, unsigned n) {
1583  assert(u && "Must provide dividend");
1584  assert(v && "Must provide divisor");
1585  assert(q && "Must provide quotient");
1586  assert(u != v && u != q && v != q && "Must us different memory");
1587  assert(n>1 && "n must be > 1");
1588
1589  // Knuth uses the value b as the base of the number system. In our case b
1590  // is 2^31 so we just set it to -1u.
1591  uint64_t b = uint64_t(1) << 32;
1592
1593#if 0
1594  DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1595  DEBUG(dbgs() << "KnuthDiv: original:");
1596  DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1597  DEBUG(dbgs() << " by");
1598  DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
1599  DEBUG(dbgs() << '\n');
1600#endif
1601  // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1602  // u and v by d. Note that we have taken Knuth's advice here to use a power
1603  // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1604  // 2 allows us to shift instead of multiply and it is easy to determine the
1605  // shift amount from the leading zeros.  We are basically normalizing the u
1606  // and v so that its high bits are shifted to the top of v's range without
1607  // overflow. Note that this can require an extra word in u so that u must
1608  // be of length m+n+1.
1609  unsigned shift = CountLeadingZeros_32(v[n-1]);
1610  unsigned v_carry = 0;
1611  unsigned u_carry = 0;
1612  if (shift) {
1613    for (unsigned i = 0; i < m+n; ++i) {
1614      unsigned u_tmp = u[i] >> (32 - shift);
1615      u[i] = (u[i] << shift) | u_carry;
1616      u_carry = u_tmp;
1617    }
1618    for (unsigned i = 0; i < n; ++i) {
1619      unsigned v_tmp = v[i] >> (32 - shift);
1620      v[i] = (v[i] << shift) | v_carry;
1621      v_carry = v_tmp;
1622    }
1623  }
1624  u[m+n] = u_carry;
1625#if 0
1626  DEBUG(dbgs() << "KnuthDiv:   normal:");
1627  DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1628  DEBUG(dbgs() << " by");
1629  DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
1630  DEBUG(dbgs() << '\n');
1631#endif
1632
1633  // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1634  int j = m;
1635  do {
1636    DEBUG(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1637    // D3. [Calculate q'.].
1638    //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1639    //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1640    // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1641    // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test
1642    // on v[n-2] determines at high speed most of the cases in which the trial
1643    // value qp is one too large, and it eliminates all cases where qp is two
1644    // too large.
1645    uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]);
1646    DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1647    uint64_t qp = dividend / v[n-1];
1648    uint64_t rp = dividend % v[n-1];
1649    if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1650      qp--;
1651      rp += v[n-1];
1652      if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1653        qp--;
1654    }
1655    DEBUG(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1656
1657    // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1658    // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1659    // consists of a simple multiplication by a one-place number, combined with
1660    // a subtraction.
1661    bool isNeg = false;
1662    for (unsigned i = 0; i < n; ++i) {
1663      uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32);
1664      uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]);
1665      bool borrow = subtrahend > u_tmp;
1666      DEBUG(dbgs() << "KnuthDiv: u_tmp == " << u_tmp
1667                   << ", subtrahend == " << subtrahend
1668                   << ", borrow = " << borrow << '\n');
1669
1670      uint64_t result = u_tmp - subtrahend;
1671      unsigned k = j + i;
1672      u[k++] = (unsigned)(result & (b-1)); // subtract low word
1673      u[k++] = (unsigned)(result >> 32);   // subtract high word
1674      while (borrow && k <= m+n) { // deal with borrow to the left
1675        borrow = u[k] == 0;
1676        u[k]--;
1677        k++;
1678      }
1679      isNeg |= borrow;
1680      DEBUG(dbgs() << "KnuthDiv: u[j+i] == " << u[j+i] << ",  u[j+i+1] == " <<
1681                    u[j+i+1] << '\n');
1682    }
1683    DEBUG(dbgs() << "KnuthDiv: after subtraction:");
1684    DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1685    DEBUG(dbgs() << '\n');
1686    // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1687    // this step is actually negative, (u[j+n]...u[j]) should be left as the
1688    // true value plus b**(n+1), namely as the b's complement of
1689    // the true value, and a "borrow" to the left should be remembered.
1690    //
1691    if (isNeg) {
1692      bool carry = true;  // true because b's complement is "complement + 1"
1693      for (unsigned i = 0; i <= m+n; ++i) {
1694        u[i] = ~u[i] + carry; // b's complement
1695        carry = carry && u[i] == 0;
1696      }
1697    }
1698    DEBUG(dbgs() << "KnuthDiv: after complement:");
1699    DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1700    DEBUG(dbgs() << '\n');
1701
1702    // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1703    // negative, go to step D6; otherwise go on to step D7.
1704    q[j] = (unsigned)qp;
1705    if (isNeg) {
1706      // D6. [Add back]. The probability that this step is necessary is very
1707      // small, on the order of only 2/b. Make sure that test data accounts for
1708      // this possibility. Decrease q[j] by 1
1709      q[j]--;
1710      // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1711      // A carry will occur to the left of u[j+n], and it should be ignored
1712      // since it cancels with the borrow that occurred in D4.
1713      bool carry = false;
1714      for (unsigned i = 0; i < n; i++) {
1715        unsigned limit = std::min(u[j+i],v[i]);
1716        u[j+i] += v[i] + carry;
1717        carry = u[j+i] < limit || (carry && u[j+i] == limit);
1718      }
1719      u[j+n] += carry;
1720    }
1721    DEBUG(dbgs() << "KnuthDiv: after correction:");
1722    DEBUG(for (int i = m+n; i >=0; i--) dbgs() <<" " << u[i]);
1723    DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1724
1725  // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1726  } while (--j >= 0);
1727
1728  DEBUG(dbgs() << "KnuthDiv: quotient:");
1729  DEBUG(for (int i = m; i >=0; i--) dbgs() <<" " << q[i]);
1730  DEBUG(dbgs() << '\n');
1731
1732  // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1733  // remainder may be obtained by dividing u[...] by d. If r is non-null we
1734  // compute the remainder (urem uses this).
1735  if (r) {
1736    // The value d is expressed by the "shift" value above since we avoided
1737    // multiplication by d by using a shift left. So, all we have to do is
1738    // shift right here. In order to mak
1739    if (shift) {
1740      unsigned carry = 0;
1741      DEBUG(dbgs() << "KnuthDiv: remainder:");
1742      for (int i = n-1; i >= 0; i--) {
1743        r[i] = (u[i] >> shift) | carry;
1744        carry = u[i] << (32 - shift);
1745        DEBUG(dbgs() << " " << r[i]);
1746      }
1747    } else {
1748      for (int i = n-1; i >= 0; i--) {
1749        r[i] = u[i];
1750        DEBUG(dbgs() << " " << r[i]);
1751      }
1752    }
1753    DEBUG(dbgs() << '\n');
1754  }
1755#if 0
1756  DEBUG(dbgs() << '\n');
1757#endif
1758}
1759
1760void APInt::divide(const APInt LHS, unsigned lhsWords,
1761                   const APInt &RHS, unsigned rhsWords,
1762                   APInt *Quotient, APInt *Remainder)
1763{
1764  assert(lhsWords >= rhsWords && "Fractional result");
1765
1766  // First, compose the values into an array of 32-bit words instead of
1767  // 64-bit words. This is a necessity of both the "short division" algorithm
1768  // and the Knuth "classical algorithm" which requires there to be native
1769  // operations for +, -, and * on an m bit value with an m*2 bit result. We
1770  // can't use 64-bit operands here because we don't have native results of
1771  // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1772  // work on large-endian machines.
1773  uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT);
1774  unsigned n = rhsWords * 2;
1775  unsigned m = (lhsWords * 2) - n;
1776
1777  // Allocate space for the temporary values we need either on the stack, if
1778  // it will fit, or on the heap if it won't.
1779  unsigned SPACE[128];
1780  unsigned *U = 0;
1781  unsigned *V = 0;
1782  unsigned *Q = 0;
1783  unsigned *R = 0;
1784  if ((Remainder?4:3)*n+2*m+1 <= 128) {
1785    U = &SPACE[0];
1786    V = &SPACE[m+n+1];
1787    Q = &SPACE[(m+n+1) + n];
1788    if (Remainder)
1789      R = &SPACE[(m+n+1) + n + (m+n)];
1790  } else {
1791    U = new unsigned[m + n + 1];
1792    V = new unsigned[n];
1793    Q = new unsigned[m+n];
1794    if (Remainder)
1795      R = new unsigned[n];
1796  }
1797
1798  // Initialize the dividend
1799  memset(U, 0, (m+n+1)*sizeof(unsigned));
1800  for (unsigned i = 0; i < lhsWords; ++i) {
1801    uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]);
1802    U[i * 2] = (unsigned)(tmp & mask);
1803    U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1804  }
1805  U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1806
1807  // Initialize the divisor
1808  memset(V, 0, (n)*sizeof(unsigned));
1809  for (unsigned i = 0; i < rhsWords; ++i) {
1810    uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]);
1811    V[i * 2] = (unsigned)(tmp & mask);
1812    V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1813  }
1814
1815  // initialize the quotient and remainder
1816  memset(Q, 0, (m+n) * sizeof(unsigned));
1817  if (Remainder)
1818    memset(R, 0, n * sizeof(unsigned));
1819
1820  // Now, adjust m and n for the Knuth division. n is the number of words in
1821  // the divisor. m is the number of words by which the dividend exceeds the
1822  // divisor (i.e. m+n is the length of the dividend). These sizes must not
1823  // contain any zero words or the Knuth algorithm fails.
1824  for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1825    n--;
1826    m++;
1827  }
1828  for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1829    m--;
1830
1831  // If we're left with only a single word for the divisor, Knuth doesn't work
1832  // so we implement the short division algorithm here. This is much simpler
1833  // and faster because we are certain that we can divide a 64-bit quantity
1834  // by a 32-bit quantity at hardware speed and short division is simply a
1835  // series of such operations. This is just like doing short division but we
1836  // are using base 2^32 instead of base 10.
1837  assert(n != 0 && "Divide by zero?");
1838  if (n == 1) {
1839    unsigned divisor = V[0];
1840    unsigned remainder = 0;
1841    for (int i = m+n-1; i >= 0; i--) {
1842      uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i];
1843      if (partial_dividend == 0) {
1844        Q[i] = 0;
1845        remainder = 0;
1846      } else if (partial_dividend < divisor) {
1847        Q[i] = 0;
1848        remainder = (unsigned)partial_dividend;
1849      } else if (partial_dividend == divisor) {
1850        Q[i] = 1;
1851        remainder = 0;
1852      } else {
1853        Q[i] = (unsigned)(partial_dividend / divisor);
1854        remainder = (unsigned)(partial_dividend - (Q[i] * divisor));
1855      }
1856    }
1857    if (R)
1858      R[0] = remainder;
1859  } else {
1860    // Now we're ready to invoke the Knuth classical divide algorithm. In this
1861    // case n > 1.
1862    KnuthDiv(U, V, Q, R, m, n);
1863  }
1864
1865  // If the caller wants the quotient
1866  if (Quotient) {
1867    // Set up the Quotient value's memory.
1868    if (Quotient->BitWidth != LHS.BitWidth) {
1869      if (Quotient->isSingleWord())
1870        Quotient->VAL = 0;
1871      else
1872        delete [] Quotient->pVal;
1873      Quotient->BitWidth = LHS.BitWidth;
1874      if (!Quotient->isSingleWord())
1875        Quotient->pVal = getClearedMemory(Quotient->getNumWords());
1876    } else
1877      Quotient->clear();
1878
1879    // The quotient is in Q. Reconstitute the quotient into Quotient's low
1880    // order words.
1881    if (lhsWords == 1) {
1882      uint64_t tmp =
1883        uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
1884      if (Quotient->isSingleWord())
1885        Quotient->VAL = tmp;
1886      else
1887        Quotient->pVal[0] = tmp;
1888    } else {
1889      assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
1890      for (unsigned i = 0; i < lhsWords; ++i)
1891        Quotient->pVal[i] =
1892          uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1893    }
1894  }
1895
1896  // If the caller wants the remainder
1897  if (Remainder) {
1898    // Set up the Remainder value's memory.
1899    if (Remainder->BitWidth != RHS.BitWidth) {
1900      if (Remainder->isSingleWord())
1901        Remainder->VAL = 0;
1902      else
1903        delete [] Remainder->pVal;
1904      Remainder->BitWidth = RHS.BitWidth;
1905      if (!Remainder->isSingleWord())
1906        Remainder->pVal = getClearedMemory(Remainder->getNumWords());
1907    } else
1908      Remainder->clear();
1909
1910    // The remainder is in R. Reconstitute the remainder into Remainder's low
1911    // order words.
1912    if (rhsWords == 1) {
1913      uint64_t tmp =
1914        uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2));
1915      if (Remainder->isSingleWord())
1916        Remainder->VAL = tmp;
1917      else
1918        Remainder->pVal[0] = tmp;
1919    } else {
1920      assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
1921      for (unsigned i = 0; i < rhsWords; ++i)
1922        Remainder->pVal[i] =
1923          uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1924    }
1925  }
1926
1927  // Clean up the memory we allocated.
1928  if (U != &SPACE[0]) {
1929    delete [] U;
1930    delete [] V;
1931    delete [] Q;
1932    delete [] R;
1933  }
1934}
1935
1936APInt APInt::udiv(const APInt& RHS) const {
1937  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1938
1939  // First, deal with the easy case
1940  if (isSingleWord()) {
1941    assert(RHS.VAL != 0 && "Divide by zero?");
1942    return APInt(BitWidth, VAL / RHS.VAL);
1943  }
1944
1945  // Get some facts about the LHS and RHS number of bits and words
1946  unsigned rhsBits = RHS.getActiveBits();
1947  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1948  assert(rhsWords && "Divided by zero???");
1949  unsigned lhsBits = this->getActiveBits();
1950  unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1951
1952  // Deal with some degenerate cases
1953  if (!lhsWords)
1954    // 0 / X ===> 0
1955    return APInt(BitWidth, 0);
1956  else if (lhsWords < rhsWords || this->ult(RHS)) {
1957    // X / Y ===> 0, iff X < Y
1958    return APInt(BitWidth, 0);
1959  } else if (*this == RHS) {
1960    // X / X ===> 1
1961    return APInt(BitWidth, 1);
1962  } else if (lhsWords == 1 && rhsWords == 1) {
1963    // All high words are zero, just use native divide
1964    return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]);
1965  }
1966
1967  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1968  APInt Quotient(1,0); // to hold result.
1969  divide(*this, lhsWords, RHS, rhsWords, &Quotient, 0);
1970  return Quotient;
1971}
1972
1973APInt APInt::urem(const APInt& RHS) const {
1974  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1975  if (isSingleWord()) {
1976    assert(RHS.VAL != 0 && "Remainder by zero?");
1977    return APInt(BitWidth, VAL % RHS.VAL);
1978  }
1979
1980  // Get some facts about the LHS
1981  unsigned lhsBits = getActiveBits();
1982  unsigned lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
1983
1984  // Get some facts about the RHS
1985  unsigned rhsBits = RHS.getActiveBits();
1986  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1987  assert(rhsWords && "Performing remainder operation by zero ???");
1988
1989  // Check the degenerate cases
1990  if (lhsWords == 0) {
1991    // 0 % Y ===> 0
1992    return APInt(BitWidth, 0);
1993  } else if (lhsWords < rhsWords || this->ult(RHS)) {
1994    // X % Y ===> X, iff X < Y
1995    return *this;
1996  } else if (*this == RHS) {
1997    // X % X == 0;
1998    return APInt(BitWidth, 0);
1999  } else if (lhsWords == 1) {
2000    // All high words are zero, just use native remainder
2001    return APInt(BitWidth, pVal[0] % RHS.pVal[0]);
2002  }
2003
2004  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
2005  APInt Remainder(1,0);
2006  divide(*this, lhsWords, RHS, rhsWords, 0, &Remainder);
2007  return Remainder;
2008}
2009
2010void APInt::udivrem(const APInt &LHS, const APInt &RHS,
2011                    APInt &Quotient, APInt &Remainder) {
2012  // Get some size facts about the dividend and divisor
2013  unsigned lhsBits  = LHS.getActiveBits();
2014  unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
2015  unsigned rhsBits  = RHS.getActiveBits();
2016  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
2017
2018  // Check the degenerate cases
2019  if (lhsWords == 0) {
2020    Quotient = 0;                // 0 / Y ===> 0
2021    Remainder = 0;               // 0 % Y ===> 0
2022    return;
2023  }
2024
2025  if (lhsWords < rhsWords || LHS.ult(RHS)) {
2026    Remainder = LHS;            // X % Y ===> X, iff X < Y
2027    Quotient = 0;               // X / Y ===> 0, iff X < Y
2028    return;
2029  }
2030
2031  if (LHS == RHS) {
2032    Quotient  = 1;              // X / X ===> 1
2033    Remainder = 0;              // X % X ===> 0;
2034    return;
2035  }
2036
2037  if (lhsWords == 1 && rhsWords == 1) {
2038    // There is only one word to consider so use the native versions.
2039    uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0];
2040    uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0];
2041    Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue);
2042    Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue);
2043    return;
2044  }
2045
2046  // Okay, lets do it the long way
2047  divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
2048}
2049
2050void APInt::fromString(unsigned numbits, const StringRef& str, uint8_t radix) {
2051  // Check our assumptions here
2052  assert(!str.empty() && "Invalid string length");
2053  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
2054         "Radix should be 2, 8, 10, or 16!");
2055
2056  StringRef::iterator p = str.begin();
2057  size_t slen = str.size();
2058  bool isNeg = *p == '-';
2059  if (*p == '-' || *p == '+') {
2060    p++;
2061    slen--;
2062    assert(slen && "String is only a sign, needs a value.");
2063  }
2064  assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2065  assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2066  assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2067  assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2068         "Insufficient bit width");
2069
2070  // Allocate memory
2071  if (!isSingleWord())
2072    pVal = getClearedMemory(getNumWords());
2073
2074  // Figure out if we can shift instead of multiply
2075  unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2076
2077  // Set up an APInt for the digit to add outside the loop so we don't
2078  // constantly construct/destruct it.
2079  APInt apdigit(getBitWidth(), 0);
2080  APInt apradix(getBitWidth(), radix);
2081
2082  // Enter digit traversal loop
2083  for (StringRef::iterator e = str.end(); p != e; ++p) {
2084    unsigned digit = getDigit(*p, radix);
2085    assert(digit < radix && "Invalid character in digit string");
2086
2087    // Shift or multiply the value by the radix
2088    if (slen > 1) {
2089      if (shift)
2090        *this <<= shift;
2091      else
2092        *this *= apradix;
2093    }
2094
2095    // Add in the digit we just interpreted
2096    if (apdigit.isSingleWord())
2097      apdigit.VAL = digit;
2098    else
2099      apdigit.pVal[0] = digit;
2100    *this += apdigit;
2101  }
2102  // If its negative, put it in two's complement form
2103  if (isNeg) {
2104    (*this)--;
2105    this->flip();
2106  }
2107}
2108
2109void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2110                     bool Signed) const {
2111  assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2) &&
2112         "Radix should be 2, 8, 10, or 16!");
2113
2114  // First, check for a zero value and just short circuit the logic below.
2115  if (*this == 0) {
2116    Str.push_back('0');
2117    return;
2118  }
2119
2120  static const char Digits[] = "0123456789ABCDEF";
2121
2122  if (isSingleWord()) {
2123    char Buffer[65];
2124    char *BufPtr = Buffer+65;
2125
2126    uint64_t N;
2127    if (Signed) {
2128      int64_t I = getSExtValue();
2129      if (I < 0) {
2130        Str.push_back('-');
2131        I = -I;
2132      }
2133      N = I;
2134    } else {
2135      N = getZExtValue();
2136    }
2137
2138    while (N) {
2139      *--BufPtr = Digits[N % Radix];
2140      N /= Radix;
2141    }
2142    Str.append(BufPtr, Buffer+65);
2143    return;
2144  }
2145
2146  APInt Tmp(*this);
2147
2148  if (Signed && isNegative()) {
2149    // They want to print the signed version and it is a negative value
2150    // Flip the bits and add one to turn it into the equivalent positive
2151    // value and put a '-' in the result.
2152    Tmp.flip();
2153    Tmp++;
2154    Str.push_back('-');
2155  }
2156
2157  // We insert the digits backward, then reverse them to get the right order.
2158  unsigned StartDig = Str.size();
2159
2160  // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2161  // because the number of bits per digit (1, 3 and 4 respectively) divides
2162  // equaly.  We just shift until the value is zero.
2163  if (Radix != 10) {
2164    // Just shift tmp right for each digit width until it becomes zero
2165    unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2166    unsigned MaskAmt = Radix - 1;
2167
2168    while (Tmp != 0) {
2169      unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2170      Str.push_back(Digits[Digit]);
2171      Tmp = Tmp.lshr(ShiftAmt);
2172    }
2173  } else {
2174    APInt divisor(4, 10);
2175    while (Tmp != 0) {
2176      APInt APdigit(1, 0);
2177      APInt tmp2(Tmp.getBitWidth(), 0);
2178      divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2,
2179             &APdigit);
2180      unsigned Digit = (unsigned)APdigit.getZExtValue();
2181      assert(Digit < Radix && "divide failed");
2182      Str.push_back(Digits[Digit]);
2183      Tmp = tmp2;
2184    }
2185  }
2186
2187  // Reverse the digits before returning.
2188  std::reverse(Str.begin()+StartDig, Str.end());
2189}
2190
2191/// toString - This returns the APInt as a std::string.  Note that this is an
2192/// inefficient method.  It is better to pass in a SmallVector/SmallString
2193/// to the methods above.
2194std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2195  SmallString<40> S;
2196  toString(S, Radix, Signed);
2197  return S.str();
2198}
2199
2200
2201void APInt::dump() const {
2202  SmallString<40> S, U;
2203  this->toStringUnsigned(U);
2204  this->toStringSigned(S);
2205  dbgs() << "APInt(" << BitWidth << "b, "
2206         << U.str() << "u " << S.str() << "s)";
2207}
2208
2209void APInt::print(raw_ostream &OS, bool isSigned) const {
2210  SmallString<40> S;
2211  this->toString(S, 10, isSigned);
2212  OS << S.str();
2213}
2214
2215// This implements a variety of operations on a representation of
2216// arbitrary precision, two's-complement, bignum integer values.
2217
2218// Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2219// and unrestricting assumption.
2220#define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
2221COMPILE_TIME_ASSERT(integerPartWidth % 2 == 0);
2222
2223/* Some handy functions local to this file.  */
2224namespace {
2225
2226  /* Returns the integer part with the least significant BITS set.
2227     BITS cannot be zero.  */
2228  static inline integerPart
2229  lowBitMask(unsigned int bits)
2230  {
2231    assert(bits != 0 && bits <= integerPartWidth);
2232
2233    return ~(integerPart) 0 >> (integerPartWidth - bits);
2234  }
2235
2236  /* Returns the value of the lower half of PART.  */
2237  static inline integerPart
2238  lowHalf(integerPart part)
2239  {
2240    return part & lowBitMask(integerPartWidth / 2);
2241  }
2242
2243  /* Returns the value of the upper half of PART.  */
2244  static inline integerPart
2245  highHalf(integerPart part)
2246  {
2247    return part >> (integerPartWidth / 2);
2248  }
2249
2250  /* Returns the bit number of the most significant set bit of a part.
2251     If the input number has no bits set -1U is returned.  */
2252  static unsigned int
2253  partMSB(integerPart value)
2254  {
2255    unsigned int n, msb;
2256
2257    if (value == 0)
2258      return -1U;
2259
2260    n = integerPartWidth / 2;
2261
2262    msb = 0;
2263    do {
2264      if (value >> n) {
2265        value >>= n;
2266        msb += n;
2267      }
2268
2269      n >>= 1;
2270    } while (n);
2271
2272    return msb;
2273  }
2274
2275  /* Returns the bit number of the least significant set bit of a
2276     part.  If the input number has no bits set -1U is returned.  */
2277  static unsigned int
2278  partLSB(integerPart value)
2279  {
2280    unsigned int n, lsb;
2281
2282    if (value == 0)
2283      return -1U;
2284
2285    lsb = integerPartWidth - 1;
2286    n = integerPartWidth / 2;
2287
2288    do {
2289      if (value << n) {
2290        value <<= n;
2291        lsb -= n;
2292      }
2293
2294      n >>= 1;
2295    } while (n);
2296
2297    return lsb;
2298  }
2299}
2300
2301/* Sets the least significant part of a bignum to the input value, and
2302   zeroes out higher parts.  */
2303void
2304APInt::tcSet(integerPart *dst, integerPart part, unsigned int parts)
2305{
2306  unsigned int i;
2307
2308  assert(parts > 0);
2309
2310  dst[0] = part;
2311  for (i = 1; i < parts; i++)
2312    dst[i] = 0;
2313}
2314
2315/* Assign one bignum to another.  */
2316void
2317APInt::tcAssign(integerPart *dst, const integerPart *src, unsigned int parts)
2318{
2319  unsigned int i;
2320
2321  for (i = 0; i < parts; i++)
2322    dst[i] = src[i];
2323}
2324
2325/* Returns true if a bignum is zero, false otherwise.  */
2326bool
2327APInt::tcIsZero(const integerPart *src, unsigned int parts)
2328{
2329  unsigned int i;
2330
2331  for (i = 0; i < parts; i++)
2332    if (src[i])
2333      return false;
2334
2335  return true;
2336}
2337
2338/* Extract the given bit of a bignum; returns 0 or 1.  */
2339int
2340APInt::tcExtractBit(const integerPart *parts, unsigned int bit)
2341{
2342  return (parts[bit / integerPartWidth] &
2343          ((integerPart) 1 << bit % integerPartWidth)) != 0;
2344}
2345
2346/* Set the given bit of a bignum. */
2347void
2348APInt::tcSetBit(integerPart *parts, unsigned int bit)
2349{
2350  parts[bit / integerPartWidth] |= (integerPart) 1 << (bit % integerPartWidth);
2351}
2352
2353/* Clears the given bit of a bignum. */
2354void
2355APInt::tcClearBit(integerPart *parts, unsigned int bit)
2356{
2357  parts[bit / integerPartWidth] &=
2358    ~((integerPart) 1 << (bit % integerPartWidth));
2359}
2360
2361/* Returns the bit number of the least significant set bit of a
2362   number.  If the input number has no bits set -1U is returned.  */
2363unsigned int
2364APInt::tcLSB(const integerPart *parts, unsigned int n)
2365{
2366  unsigned int i, lsb;
2367
2368  for (i = 0; i < n; i++) {
2369      if (parts[i] != 0) {
2370          lsb = partLSB(parts[i]);
2371
2372          return lsb + i * integerPartWidth;
2373      }
2374  }
2375
2376  return -1U;
2377}
2378
2379/* Returns the bit number of the most significant set bit of a number.
2380   If the input number has no bits set -1U is returned.  */
2381unsigned int
2382APInt::tcMSB(const integerPart *parts, unsigned int n)
2383{
2384  unsigned int msb;
2385
2386  do {
2387    --n;
2388
2389    if (parts[n] != 0) {
2390      msb = partMSB(parts[n]);
2391
2392      return msb + n * integerPartWidth;
2393    }
2394  } while (n);
2395
2396  return -1U;
2397}
2398
2399/* Copy the bit vector of width srcBITS from SRC, starting at bit
2400   srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2401   the least significant bit of DST.  All high bits above srcBITS in
2402   DST are zero-filled.  */
2403void
2404APInt::tcExtract(integerPart *dst, unsigned int dstCount,const integerPart *src,
2405                 unsigned int srcBits, unsigned int srcLSB)
2406{
2407  unsigned int firstSrcPart, dstParts, shift, n;
2408
2409  dstParts = (srcBits + integerPartWidth - 1) / integerPartWidth;
2410  assert(dstParts <= dstCount);
2411
2412  firstSrcPart = srcLSB / integerPartWidth;
2413  tcAssign (dst, src + firstSrcPart, dstParts);
2414
2415  shift = srcLSB % integerPartWidth;
2416  tcShiftRight (dst, dstParts, shift);
2417
2418  /* We now have (dstParts * integerPartWidth - shift) bits from SRC
2419     in DST.  If this is less that srcBits, append the rest, else
2420     clear the high bits.  */
2421  n = dstParts * integerPartWidth - shift;
2422  if (n < srcBits) {
2423    integerPart mask = lowBitMask (srcBits - n);
2424    dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2425                          << n % integerPartWidth);
2426  } else if (n > srcBits) {
2427    if (srcBits % integerPartWidth)
2428      dst[dstParts - 1] &= lowBitMask (srcBits % integerPartWidth);
2429  }
2430
2431  /* Clear high parts.  */
2432  while (dstParts < dstCount)
2433    dst[dstParts++] = 0;
2434}
2435
2436/* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2437integerPart
2438APInt::tcAdd(integerPart *dst, const integerPart *rhs,
2439             integerPart c, unsigned int parts)
2440{
2441  unsigned int i;
2442
2443  assert(c <= 1);
2444
2445  for (i = 0; i < parts; i++) {
2446    integerPart l;
2447
2448    l = dst[i];
2449    if (c) {
2450      dst[i] += rhs[i] + 1;
2451      c = (dst[i] <= l);
2452    } else {
2453      dst[i] += rhs[i];
2454      c = (dst[i] < l);
2455    }
2456  }
2457
2458  return c;
2459}
2460
2461/* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2462integerPart
2463APInt::tcSubtract(integerPart *dst, const integerPart *rhs,
2464                  integerPart c, unsigned int parts)
2465{
2466  unsigned int i;
2467
2468  assert(c <= 1);
2469
2470  for (i = 0; i < parts; i++) {
2471    integerPart l;
2472
2473    l = dst[i];
2474    if (c) {
2475      dst[i] -= rhs[i] + 1;
2476      c = (dst[i] >= l);
2477    } else {
2478      dst[i] -= rhs[i];
2479      c = (dst[i] > l);
2480    }
2481  }
2482
2483  return c;
2484}
2485
2486/* Negate a bignum in-place.  */
2487void
2488APInt::tcNegate(integerPart *dst, unsigned int parts)
2489{
2490  tcComplement(dst, parts);
2491  tcIncrement(dst, parts);
2492}
2493
2494/*  DST += SRC * MULTIPLIER + CARRY   if add is true
2495    DST  = SRC * MULTIPLIER + CARRY   if add is false
2496
2497    Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2498    they must start at the same point, i.e. DST == SRC.
2499
2500    If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2501    returned.  Otherwise DST is filled with the least significant
2502    DSTPARTS parts of the result, and if all of the omitted higher
2503    parts were zero return zero, otherwise overflow occurred and
2504    return one.  */
2505int
2506APInt::tcMultiplyPart(integerPart *dst, const integerPart *src,
2507                      integerPart multiplier, integerPart carry,
2508                      unsigned int srcParts, unsigned int dstParts,
2509                      bool add)
2510{
2511  unsigned int i, n;
2512
2513  /* Otherwise our writes of DST kill our later reads of SRC.  */
2514  assert(dst <= src || dst >= src + srcParts);
2515  assert(dstParts <= srcParts + 1);
2516
2517  /* N loops; minimum of dstParts and srcParts.  */
2518  n = dstParts < srcParts ? dstParts: srcParts;
2519
2520  for (i = 0; i < n; i++) {
2521    integerPart low, mid, high, srcPart;
2522
2523      /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2524
2525         This cannot overflow, because
2526
2527         (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2528
2529         which is less than n^2.  */
2530
2531    srcPart = src[i];
2532
2533    if (multiplier == 0 || srcPart == 0)        {
2534      low = carry;
2535      high = 0;
2536    } else {
2537      low = lowHalf(srcPart) * lowHalf(multiplier);
2538      high = highHalf(srcPart) * highHalf(multiplier);
2539
2540      mid = lowHalf(srcPart) * highHalf(multiplier);
2541      high += highHalf(mid);
2542      mid <<= integerPartWidth / 2;
2543      if (low + mid < low)
2544        high++;
2545      low += mid;
2546
2547      mid = highHalf(srcPart) * lowHalf(multiplier);
2548      high += highHalf(mid);
2549      mid <<= integerPartWidth / 2;
2550      if (low + mid < low)
2551        high++;
2552      low += mid;
2553
2554      /* Now add carry.  */
2555      if (low + carry < low)
2556        high++;
2557      low += carry;
2558    }
2559
2560    if (add) {
2561      /* And now DST[i], and store the new low part there.  */
2562      if (low + dst[i] < low)
2563        high++;
2564      dst[i] += low;
2565    } else
2566      dst[i] = low;
2567
2568    carry = high;
2569  }
2570
2571  if (i < dstParts) {
2572    /* Full multiplication, there is no overflow.  */
2573    assert(i + 1 == dstParts);
2574    dst[i] = carry;
2575    return 0;
2576  } else {
2577    /* We overflowed if there is carry.  */
2578    if (carry)
2579      return 1;
2580
2581    /* We would overflow if any significant unwritten parts would be
2582       non-zero.  This is true if any remaining src parts are non-zero
2583       and the multiplier is non-zero.  */
2584    if (multiplier)
2585      for (; i < srcParts; i++)
2586        if (src[i])
2587          return 1;
2588
2589    /* We fitted in the narrow destination.  */
2590    return 0;
2591  }
2592}
2593
2594/* DST = LHS * RHS, where DST has the same width as the operands and
2595   is filled with the least significant parts of the result.  Returns
2596   one if overflow occurred, otherwise zero.  DST must be disjoint
2597   from both operands.  */
2598int
2599APInt::tcMultiply(integerPart *dst, const integerPart *lhs,
2600                  const integerPart *rhs, unsigned int parts)
2601{
2602  unsigned int i;
2603  int overflow;
2604
2605  assert(dst != lhs && dst != rhs);
2606
2607  overflow = 0;
2608  tcSet(dst, 0, parts);
2609
2610  for (i = 0; i < parts; i++)
2611    overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2612                               parts - i, true);
2613
2614  return overflow;
2615}
2616
2617/* DST = LHS * RHS, where DST has width the sum of the widths of the
2618   operands.  No overflow occurs.  DST must be disjoint from both
2619   operands.  Returns the number of parts required to hold the
2620   result.  */
2621unsigned int
2622APInt::tcFullMultiply(integerPart *dst, const integerPart *lhs,
2623                      const integerPart *rhs, unsigned int lhsParts,
2624                      unsigned int rhsParts)
2625{
2626  /* Put the narrower number on the LHS for less loops below.  */
2627  if (lhsParts > rhsParts) {
2628    return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2629  } else {
2630    unsigned int n;
2631
2632    assert(dst != lhs && dst != rhs);
2633
2634    tcSet(dst, 0, rhsParts);
2635
2636    for (n = 0; n < lhsParts; n++)
2637      tcMultiplyPart(&dst[n], rhs, lhs[n], 0, rhsParts, rhsParts + 1, true);
2638
2639    n = lhsParts + rhsParts;
2640
2641    return n - (dst[n - 1] == 0);
2642  }
2643}
2644
2645/* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2646   Otherwise set LHS to LHS / RHS with the fractional part discarded,
2647   set REMAINDER to the remainder, return zero.  i.e.
2648
2649   OLD_LHS = RHS * LHS + REMAINDER
2650
2651   SCRATCH is a bignum of the same size as the operands and result for
2652   use by the routine; its contents need not be initialized and are
2653   destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2654*/
2655int
2656APInt::tcDivide(integerPart *lhs, const integerPart *rhs,
2657                integerPart *remainder, integerPart *srhs,
2658                unsigned int parts)
2659{
2660  unsigned int n, shiftCount;
2661  integerPart mask;
2662
2663  assert(lhs != remainder && lhs != srhs && remainder != srhs);
2664
2665  shiftCount = tcMSB(rhs, parts) + 1;
2666  if (shiftCount == 0)
2667    return true;
2668
2669  shiftCount = parts * integerPartWidth - shiftCount;
2670  n = shiftCount / integerPartWidth;
2671  mask = (integerPart) 1 << (shiftCount % integerPartWidth);
2672
2673  tcAssign(srhs, rhs, parts);
2674  tcShiftLeft(srhs, parts, shiftCount);
2675  tcAssign(remainder, lhs, parts);
2676  tcSet(lhs, 0, parts);
2677
2678  /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2679     the total.  */
2680  for (;;) {
2681      int compare;
2682
2683      compare = tcCompare(remainder, srhs, parts);
2684      if (compare >= 0) {
2685        tcSubtract(remainder, srhs, 0, parts);
2686        lhs[n] |= mask;
2687      }
2688
2689      if (shiftCount == 0)
2690        break;
2691      shiftCount--;
2692      tcShiftRight(srhs, parts, 1);
2693      if ((mask >>= 1) == 0)
2694        mask = (integerPart) 1 << (integerPartWidth - 1), n--;
2695  }
2696
2697  return false;
2698}
2699
2700/* Shift a bignum left COUNT bits in-place.  Shifted in bits are zero.
2701   There are no restrictions on COUNT.  */
2702void
2703APInt::tcShiftLeft(integerPart *dst, unsigned int parts, unsigned int count)
2704{
2705  if (count) {
2706    unsigned int jump, shift;
2707
2708    /* Jump is the inter-part jump; shift is is intra-part shift.  */
2709    jump = count / integerPartWidth;
2710    shift = count % integerPartWidth;
2711
2712    while (parts > jump) {
2713      integerPart part;
2714
2715      parts--;
2716
2717      /* dst[i] comes from the two parts src[i - jump] and, if we have
2718         an intra-part shift, src[i - jump - 1].  */
2719      part = dst[parts - jump];
2720      if (shift) {
2721        part <<= shift;
2722        if (parts >= jump + 1)
2723          part |= dst[parts - jump - 1] >> (integerPartWidth - shift);
2724      }
2725
2726      dst[parts] = part;
2727    }
2728
2729    while (parts > 0)
2730      dst[--parts] = 0;
2731  }
2732}
2733
2734/* Shift a bignum right COUNT bits in-place.  Shifted in bits are
2735   zero.  There are no restrictions on COUNT.  */
2736void
2737APInt::tcShiftRight(integerPart *dst, unsigned int parts, unsigned int count)
2738{
2739  if (count) {
2740    unsigned int i, jump, shift;
2741
2742    /* Jump is the inter-part jump; shift is is intra-part shift.  */
2743    jump = count / integerPartWidth;
2744    shift = count % integerPartWidth;
2745
2746    /* Perform the shift.  This leaves the most significant COUNT bits
2747       of the result at zero.  */
2748    for (i = 0; i < parts; i++) {
2749      integerPart part;
2750
2751      if (i + jump >= parts) {
2752        part = 0;
2753      } else {
2754        part = dst[i + jump];
2755        if (shift) {
2756          part >>= shift;
2757          if (i + jump + 1 < parts)
2758            part |= dst[i + jump + 1] << (integerPartWidth - shift);
2759        }
2760      }
2761
2762      dst[i] = part;
2763    }
2764  }
2765}
2766
2767/* Bitwise and of two bignums.  */
2768void
2769APInt::tcAnd(integerPart *dst, const integerPart *rhs, unsigned int parts)
2770{
2771  unsigned int i;
2772
2773  for (i = 0; i < parts; i++)
2774    dst[i] &= rhs[i];
2775}
2776
2777/* Bitwise inclusive or of two bignums.  */
2778void
2779APInt::tcOr(integerPart *dst, const integerPart *rhs, unsigned int parts)
2780{
2781  unsigned int i;
2782
2783  for (i = 0; i < parts; i++)
2784    dst[i] |= rhs[i];
2785}
2786
2787/* Bitwise exclusive or of two bignums.  */
2788void
2789APInt::tcXor(integerPart *dst, const integerPart *rhs, unsigned int parts)
2790{
2791  unsigned int i;
2792
2793  for (i = 0; i < parts; i++)
2794    dst[i] ^= rhs[i];
2795}
2796
2797/* Complement a bignum in-place.  */
2798void
2799APInt::tcComplement(integerPart *dst, unsigned int parts)
2800{
2801  unsigned int i;
2802
2803  for (i = 0; i < parts; i++)
2804    dst[i] = ~dst[i];
2805}
2806
2807/* Comparison (unsigned) of two bignums.  */
2808int
2809APInt::tcCompare(const integerPart *lhs, const integerPart *rhs,
2810                 unsigned int parts)
2811{
2812  while (parts) {
2813      parts--;
2814      if (lhs[parts] == rhs[parts])
2815        continue;
2816
2817      if (lhs[parts] > rhs[parts])
2818        return 1;
2819      else
2820        return -1;
2821    }
2822
2823  return 0;
2824}
2825
2826/* Increment a bignum in-place, return the carry flag.  */
2827integerPart
2828APInt::tcIncrement(integerPart *dst, unsigned int parts)
2829{
2830  unsigned int i;
2831
2832  for (i = 0; i < parts; i++)
2833    if (++dst[i] != 0)
2834      break;
2835
2836  return i == parts;
2837}
2838
2839/* Set the least significant BITS bits of a bignum, clear the
2840   rest.  */
2841void
2842APInt::tcSetLeastSignificantBits(integerPart *dst, unsigned int parts,
2843                                 unsigned int bits)
2844{
2845  unsigned int i;
2846
2847  i = 0;
2848  while (bits > integerPartWidth) {
2849    dst[i++] = ~(integerPart) 0;
2850    bits -= integerPartWidth;
2851  }
2852
2853  if (bits)
2854    dst[i++] = ~(integerPart) 0 >> (integerPartWidth - bits);
2855
2856  while (i < parts)
2857    dst[i++] = 0;
2858}
2859