APInt.cpp revision 219077
1//===-- APInt.cpp - Implement APInt class ---------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements a class to represent arbitrary precision integer
11// constant values and provide a variety of arithmetic operations on them.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "apint"
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/StringRef.h"
18#include "llvm/ADT/FoldingSet.h"
19#include "llvm/ADT/SmallString.h"
20#include "llvm/Support/Debug.h"
21#include "llvm/Support/ErrorHandling.h"
22#include "llvm/Support/MathExtras.h"
23#include "llvm/Support/raw_ostream.h"
24#include <cmath>
25#include <limits>
26#include <cstring>
27#include <cstdlib>
28using namespace llvm;
29
30/// A utility function for allocating memory, checking for allocation failures,
31/// and ensuring the contents are zeroed.
32inline static uint64_t* getClearedMemory(unsigned numWords) {
33  uint64_t * result = new uint64_t[numWords];
34  assert(result && "APInt memory allocation fails!");
35  memset(result, 0, numWords * sizeof(uint64_t));
36  return result;
37}
38
39/// A utility function for allocating memory and checking for allocation
40/// failure.  The content is not zeroed.
41inline static uint64_t* getMemory(unsigned numWords) {
42  uint64_t * result = new uint64_t[numWords];
43  assert(result && "APInt memory allocation fails!");
44  return result;
45}
46
47/// A utility function that converts a character to a digit.
48inline static unsigned getDigit(char cdigit, uint8_t radix) {
49  unsigned r;
50
51  if (radix == 16) {
52    r = cdigit - '0';
53    if (r <= 9)
54      return r;
55
56    r = cdigit - 'A';
57    if (r <= 5)
58      return r + 10;
59
60    r = cdigit - 'a';
61    if (r <= 5)
62      return r + 10;
63  }
64
65  r = cdigit - '0';
66  if (r < radix)
67    return r;
68
69  return -1U;
70}
71
72
73void APInt::initSlowCase(unsigned numBits, uint64_t val, bool isSigned) {
74  pVal = getClearedMemory(getNumWords());
75  pVal[0] = val;
76  if (isSigned && int64_t(val) < 0)
77    for (unsigned i = 1; i < getNumWords(); ++i)
78      pVal[i] = -1ULL;
79}
80
81void APInt::initSlowCase(const APInt& that) {
82  pVal = getMemory(getNumWords());
83  memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE);
84}
85
86
87APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
88  : BitWidth(numBits), VAL(0) {
89  assert(BitWidth && "Bitwidth too small");
90  assert(bigVal && "Null pointer detected!");
91  if (isSingleWord())
92    VAL = bigVal[0];
93  else {
94    // Get memory, cleared to 0
95    pVal = getClearedMemory(getNumWords());
96    // Calculate the number of words to copy
97    unsigned words = std::min<unsigned>(numWords, getNumWords());
98    // Copy the words from bigVal to pVal
99    memcpy(pVal, bigVal, words * APINT_WORD_SIZE);
100  }
101  // Make sure unused high bits are cleared
102  clearUnusedBits();
103}
104
105APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
106  : BitWidth(numbits), VAL(0) {
107  assert(BitWidth && "Bitwidth too small");
108  fromString(numbits, Str, radix);
109}
110
111APInt& APInt::AssignSlowCase(const APInt& RHS) {
112  // Don't do anything for X = X
113  if (this == &RHS)
114    return *this;
115
116  if (BitWidth == RHS.getBitWidth()) {
117    // assume same bit-width single-word case is already handled
118    assert(!isSingleWord());
119    memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE);
120    return *this;
121  }
122
123  if (isSingleWord()) {
124    // assume case where both are single words is already handled
125    assert(!RHS.isSingleWord());
126    VAL = 0;
127    pVal = getMemory(RHS.getNumWords());
128    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
129  } else if (getNumWords() == RHS.getNumWords())
130    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
131  else if (RHS.isSingleWord()) {
132    delete [] pVal;
133    VAL = RHS.VAL;
134  } else {
135    delete [] pVal;
136    pVal = getMemory(RHS.getNumWords());
137    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
138  }
139  BitWidth = RHS.BitWidth;
140  return clearUnusedBits();
141}
142
143APInt& APInt::operator=(uint64_t RHS) {
144  if (isSingleWord())
145    VAL = RHS;
146  else {
147    pVal[0] = RHS;
148    memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
149  }
150  return clearUnusedBits();
151}
152
153/// Profile - This method 'profiles' an APInt for use with FoldingSet.
154void APInt::Profile(FoldingSetNodeID& ID) const {
155  ID.AddInteger(BitWidth);
156
157  if (isSingleWord()) {
158    ID.AddInteger(VAL);
159    return;
160  }
161
162  unsigned NumWords = getNumWords();
163  for (unsigned i = 0; i < NumWords; ++i)
164    ID.AddInteger(pVal[i]);
165}
166
167/// add_1 - This function adds a single "digit" integer, y, to the multiple
168/// "digit" integer array,  x[]. x[] is modified to reflect the addition and
169/// 1 is returned if there is a carry out, otherwise 0 is returned.
170/// @returns the carry of the addition.
171static bool add_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
172  for (unsigned i = 0; i < len; ++i) {
173    dest[i] = y + x[i];
174    if (dest[i] < y)
175      y = 1; // Carry one to next digit.
176    else {
177      y = 0; // No need to carry so exit early
178      break;
179    }
180  }
181  return y;
182}
183
184/// @brief Prefix increment operator. Increments the APInt by one.
185APInt& APInt::operator++() {
186  if (isSingleWord())
187    ++VAL;
188  else
189    add_1(pVal, pVal, getNumWords(), 1);
190  return clearUnusedBits();
191}
192
193/// sub_1 - This function subtracts a single "digit" (64-bit word), y, from
194/// the multi-digit integer array, x[], propagating the borrowed 1 value until
195/// no further borrowing is neeeded or it runs out of "digits" in x.  The result
196/// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
197/// In other words, if y > x then this function returns 1, otherwise 0.
198/// @returns the borrow out of the subtraction
199static bool sub_1(uint64_t x[], unsigned len, uint64_t y) {
200  for (unsigned i = 0; i < len; ++i) {
201    uint64_t X = x[i];
202    x[i] -= y;
203    if (y > X)
204      y = 1;  // We have to "borrow 1" from next "digit"
205    else {
206      y = 0;  // No need to borrow
207      break;  // Remaining digits are unchanged so exit early
208    }
209  }
210  return bool(y);
211}
212
213/// @brief Prefix decrement operator. Decrements the APInt by one.
214APInt& APInt::operator--() {
215  if (isSingleWord())
216    --VAL;
217  else
218    sub_1(pVal, getNumWords(), 1);
219  return clearUnusedBits();
220}
221
222/// add - This function adds the integer array x to the integer array Y and
223/// places the result in dest.
224/// @returns the carry out from the addition
225/// @brief General addition of 64-bit integer arrays
226static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y,
227                unsigned len) {
228  bool carry = false;
229  for (unsigned i = 0; i< len; ++i) {
230    uint64_t limit = std::min(x[i],y[i]); // must come first in case dest == x
231    dest[i] = x[i] + y[i] + carry;
232    carry = dest[i] < limit || (carry && dest[i] == limit);
233  }
234  return carry;
235}
236
237/// Adds the RHS APint to this APInt.
238/// @returns this, after addition of RHS.
239/// @brief Addition assignment operator.
240APInt& APInt::operator+=(const APInt& RHS) {
241  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
242  if (isSingleWord())
243    VAL += RHS.VAL;
244  else {
245    add(pVal, pVal, RHS.pVal, getNumWords());
246  }
247  return clearUnusedBits();
248}
249
250/// Subtracts the integer array y from the integer array x
251/// @returns returns the borrow out.
252/// @brief Generalized subtraction of 64-bit integer arrays.
253static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y,
254                unsigned len) {
255  bool borrow = false;
256  for (unsigned i = 0; i < len; ++i) {
257    uint64_t x_tmp = borrow ? x[i] - 1 : x[i];
258    borrow = y[i] > x_tmp || (borrow && x[i] == 0);
259    dest[i] = x_tmp - y[i];
260  }
261  return borrow;
262}
263
264/// Subtracts the RHS APInt from this APInt
265/// @returns this, after subtraction
266/// @brief Subtraction assignment operator.
267APInt& APInt::operator-=(const APInt& RHS) {
268  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
269  if (isSingleWord())
270    VAL -= RHS.VAL;
271  else
272    sub(pVal, pVal, RHS.pVal, getNumWords());
273  return clearUnusedBits();
274}
275
276/// Multiplies an integer array, x, by a uint64_t integer and places the result
277/// into dest.
278/// @returns the carry out of the multiplication.
279/// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer.
280static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
281  // Split y into high 32-bit part (hy)  and low 32-bit part (ly)
282  uint64_t ly = y & 0xffffffffULL, hy = y >> 32;
283  uint64_t carry = 0;
284
285  // For each digit of x.
286  for (unsigned i = 0; i < len; ++i) {
287    // Split x into high and low words
288    uint64_t lx = x[i] & 0xffffffffULL;
289    uint64_t hx = x[i] >> 32;
290    // hasCarry - A flag to indicate if there is a carry to the next digit.
291    // hasCarry == 0, no carry
292    // hasCarry == 1, has carry
293    // hasCarry == 2, no carry and the calculation result == 0.
294    uint8_t hasCarry = 0;
295    dest[i] = carry + lx * ly;
296    // Determine if the add above introduces carry.
297    hasCarry = (dest[i] < carry) ? 1 : 0;
298    carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0);
299    // The upper limit of carry can be (2^32 - 1)(2^32 - 1) +
300    // (2^32 - 1) + 2^32 = 2^64.
301    hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
302
303    carry += (lx * hy) & 0xffffffffULL;
304    dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL);
305    carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) +
306            (carry >> 32) + ((lx * hy) >> 32) + hx * hy;
307  }
308  return carry;
309}
310
311/// Multiplies integer array x by integer array y and stores the result into
312/// the integer array dest. Note that dest's size must be >= xlen + ylen.
313/// @brief Generalized multiplicate of integer arrays.
314static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[],
315                unsigned ylen) {
316  dest[xlen] = mul_1(dest, x, xlen, y[0]);
317  for (unsigned i = 1; i < ylen; ++i) {
318    uint64_t ly = y[i] & 0xffffffffULL, hy = y[i] >> 32;
319    uint64_t carry = 0, lx = 0, hx = 0;
320    for (unsigned j = 0; j < xlen; ++j) {
321      lx = x[j] & 0xffffffffULL;
322      hx = x[j] >> 32;
323      // hasCarry - A flag to indicate if has carry.
324      // hasCarry == 0, no carry
325      // hasCarry == 1, has carry
326      // hasCarry == 2, no carry and the calculation result == 0.
327      uint8_t hasCarry = 0;
328      uint64_t resul = carry + lx * ly;
329      hasCarry = (resul < carry) ? 1 : 0;
330      carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + (resul >> 32);
331      hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
332
333      carry += (lx * hy) & 0xffffffffULL;
334      resul = (carry << 32) | (resul & 0xffffffffULL);
335      dest[i+j] += resul;
336      carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+
337              (carry >> 32) + (dest[i+j] < resul ? 1 : 0) +
338              ((lx * hy) >> 32) + hx * hy;
339    }
340    dest[i+xlen] = carry;
341  }
342}
343
344APInt& APInt::operator*=(const APInt& RHS) {
345  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
346  if (isSingleWord()) {
347    VAL *= RHS.VAL;
348    clearUnusedBits();
349    return *this;
350  }
351
352  // Get some bit facts about LHS and check for zero
353  unsigned lhsBits = getActiveBits();
354  unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
355  if (!lhsWords)
356    // 0 * X ===> 0
357    return *this;
358
359  // Get some bit facts about RHS and check for zero
360  unsigned rhsBits = RHS.getActiveBits();
361  unsigned rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
362  if (!rhsWords) {
363    // X * 0 ===> 0
364    clearAllBits();
365    return *this;
366  }
367
368  // Allocate space for the result
369  unsigned destWords = rhsWords + lhsWords;
370  uint64_t *dest = getMemory(destWords);
371
372  // Perform the long multiply
373  mul(dest, pVal, lhsWords, RHS.pVal, rhsWords);
374
375  // Copy result back into *this
376  clearAllBits();
377  unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords;
378  memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE);
379
380  // delete dest array and return
381  delete[] dest;
382  return *this;
383}
384
385APInt& APInt::operator&=(const APInt& RHS) {
386  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
387  if (isSingleWord()) {
388    VAL &= RHS.VAL;
389    return *this;
390  }
391  unsigned numWords = getNumWords();
392  for (unsigned i = 0; i < numWords; ++i)
393    pVal[i] &= RHS.pVal[i];
394  return *this;
395}
396
397APInt& APInt::operator|=(const APInt& RHS) {
398  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
399  if (isSingleWord()) {
400    VAL |= RHS.VAL;
401    return *this;
402  }
403  unsigned numWords = getNumWords();
404  for (unsigned i = 0; i < numWords; ++i)
405    pVal[i] |= RHS.pVal[i];
406  return *this;
407}
408
409APInt& APInt::operator^=(const APInt& RHS) {
410  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
411  if (isSingleWord()) {
412    VAL ^= RHS.VAL;
413    this->clearUnusedBits();
414    return *this;
415  }
416  unsigned numWords = getNumWords();
417  for (unsigned i = 0; i < numWords; ++i)
418    pVal[i] ^= RHS.pVal[i];
419  return clearUnusedBits();
420}
421
422APInt APInt::AndSlowCase(const APInt& RHS) const {
423  unsigned numWords = getNumWords();
424  uint64_t* val = getMemory(numWords);
425  for (unsigned i = 0; i < numWords; ++i)
426    val[i] = pVal[i] & RHS.pVal[i];
427  return APInt(val, getBitWidth());
428}
429
430APInt APInt::OrSlowCase(const APInt& RHS) const {
431  unsigned numWords = getNumWords();
432  uint64_t *val = getMemory(numWords);
433  for (unsigned i = 0; i < numWords; ++i)
434    val[i] = pVal[i] | RHS.pVal[i];
435  return APInt(val, getBitWidth());
436}
437
438APInt APInt::XorSlowCase(const APInt& RHS) const {
439  unsigned numWords = getNumWords();
440  uint64_t *val = getMemory(numWords);
441  for (unsigned i = 0; i < numWords; ++i)
442    val[i] = pVal[i] ^ RHS.pVal[i];
443
444  // 0^0==1 so clear the high bits in case they got set.
445  return APInt(val, getBitWidth()).clearUnusedBits();
446}
447
448bool APInt::operator !() const {
449  if (isSingleWord())
450    return !VAL;
451
452  for (unsigned i = 0; i < getNumWords(); ++i)
453    if (pVal[i])
454      return false;
455  return true;
456}
457
458APInt APInt::operator*(const APInt& RHS) const {
459  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
460  if (isSingleWord())
461    return APInt(BitWidth, VAL * RHS.VAL);
462  APInt Result(*this);
463  Result *= RHS;
464  return Result.clearUnusedBits();
465}
466
467APInt APInt::operator+(const APInt& RHS) const {
468  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
469  if (isSingleWord())
470    return APInt(BitWidth, VAL + RHS.VAL);
471  APInt Result(BitWidth, 0);
472  add(Result.pVal, this->pVal, RHS.pVal, getNumWords());
473  return Result.clearUnusedBits();
474}
475
476APInt APInt::operator-(const APInt& RHS) const {
477  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
478  if (isSingleWord())
479    return APInt(BitWidth, VAL - RHS.VAL);
480  APInt Result(BitWidth, 0);
481  sub(Result.pVal, this->pVal, RHS.pVal, getNumWords());
482  return Result.clearUnusedBits();
483}
484
485bool APInt::operator[](unsigned bitPosition) const {
486  assert(bitPosition < getBitWidth() && "Bit position out of bounds!");
487  return (maskBit(bitPosition) &
488          (isSingleWord() ?  VAL : pVal[whichWord(bitPosition)])) != 0;
489}
490
491bool APInt::EqualSlowCase(const APInt& RHS) const {
492  // Get some facts about the number of bits used in the two operands.
493  unsigned n1 = getActiveBits();
494  unsigned n2 = RHS.getActiveBits();
495
496  // If the number of bits isn't the same, they aren't equal
497  if (n1 != n2)
498    return false;
499
500  // If the number of bits fits in a word, we only need to compare the low word.
501  if (n1 <= APINT_BITS_PER_WORD)
502    return pVal[0] == RHS.pVal[0];
503
504  // Otherwise, compare everything
505  for (int i = whichWord(n1 - 1); i >= 0; --i)
506    if (pVal[i] != RHS.pVal[i])
507      return false;
508  return true;
509}
510
511bool APInt::EqualSlowCase(uint64_t Val) const {
512  unsigned n = getActiveBits();
513  if (n <= APINT_BITS_PER_WORD)
514    return pVal[0] == Val;
515  else
516    return false;
517}
518
519bool APInt::ult(const APInt& RHS) const {
520  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
521  if (isSingleWord())
522    return VAL < RHS.VAL;
523
524  // Get active bit length of both operands
525  unsigned n1 = getActiveBits();
526  unsigned n2 = RHS.getActiveBits();
527
528  // If magnitude of LHS is less than RHS, return true.
529  if (n1 < n2)
530    return true;
531
532  // If magnitude of RHS is greather than LHS, return false.
533  if (n2 < n1)
534    return false;
535
536  // If they bot fit in a word, just compare the low order word
537  if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD)
538    return pVal[0] < RHS.pVal[0];
539
540  // Otherwise, compare all words
541  unsigned topWord = whichWord(std::max(n1,n2)-1);
542  for (int i = topWord; i >= 0; --i) {
543    if (pVal[i] > RHS.pVal[i])
544      return false;
545    if (pVal[i] < RHS.pVal[i])
546      return true;
547  }
548  return false;
549}
550
551bool APInt::slt(const APInt& RHS) const {
552  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
553  if (isSingleWord()) {
554    int64_t lhsSext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth);
555    int64_t rhsSext = (int64_t(RHS.VAL) << (64-BitWidth)) >> (64-BitWidth);
556    return lhsSext < rhsSext;
557  }
558
559  APInt lhs(*this);
560  APInt rhs(RHS);
561  bool lhsNeg = isNegative();
562  bool rhsNeg = rhs.isNegative();
563  if (lhsNeg) {
564    // Sign bit is set so perform two's complement to make it positive
565    lhs.flipAllBits();
566    lhs++;
567  }
568  if (rhsNeg) {
569    // Sign bit is set so perform two's complement to make it positive
570    rhs.flipAllBits();
571    rhs++;
572  }
573
574  // Now we have unsigned values to compare so do the comparison if necessary
575  // based on the negativeness of the values.
576  if (lhsNeg)
577    if (rhsNeg)
578      return lhs.ugt(rhs);
579    else
580      return true;
581  else if (rhsNeg)
582    return false;
583  else
584    return lhs.ult(rhs);
585}
586
587void APInt::setBit(unsigned bitPosition) {
588  if (isSingleWord())
589    VAL |= maskBit(bitPosition);
590  else
591    pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
592}
593
594/// Set the given bit to 0 whose position is given as "bitPosition".
595/// @brief Set a given bit to 0.
596void APInt::clearBit(unsigned bitPosition) {
597  if (isSingleWord())
598    VAL &= ~maskBit(bitPosition);
599  else
600    pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
601}
602
603/// @brief Toggle every bit to its opposite value.
604
605/// Toggle a given bit to its opposite value whose position is given
606/// as "bitPosition".
607/// @brief Toggles a given bit to its opposite value.
608void APInt::flipBit(unsigned bitPosition) {
609  assert(bitPosition < BitWidth && "Out of the bit-width range!");
610  if ((*this)[bitPosition]) clearBit(bitPosition);
611  else setBit(bitPosition);
612}
613
614unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
615  assert(!str.empty() && "Invalid string length");
616  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
617         "Radix should be 2, 8, 10, or 16!");
618
619  size_t slen = str.size();
620
621  // Each computation below needs to know if it's negative.
622  StringRef::iterator p = str.begin();
623  unsigned isNegative = *p == '-';
624  if (*p == '-' || *p == '+') {
625    p++;
626    slen--;
627    assert(slen && "String is only a sign, needs a value.");
628  }
629
630  // For radixes of power-of-two values, the bits required is accurately and
631  // easily computed
632  if (radix == 2)
633    return slen + isNegative;
634  if (radix == 8)
635    return slen * 3 + isNegative;
636  if (radix == 16)
637    return slen * 4 + isNegative;
638
639  // This is grossly inefficient but accurate. We could probably do something
640  // with a computation of roughly slen*64/20 and then adjust by the value of
641  // the first few digits. But, I'm not sure how accurate that could be.
642
643  // Compute a sufficient number of bits that is always large enough but might
644  // be too large. This avoids the assertion in the constructor. This
645  // calculation doesn't work appropriately for the numbers 0-9, so just use 4
646  // bits in that case.
647  unsigned sufficient = slen == 1 ? 4 : slen * 64/18;
648
649  // Convert to the actual binary value.
650  APInt tmp(sufficient, StringRef(p, slen), radix);
651
652  // Compute how many bits are required. If the log is infinite, assume we need
653  // just bit.
654  unsigned log = tmp.logBase2();
655  if (log == (unsigned)-1) {
656    return isNegative + 1;
657  } else {
658    return isNegative + log + 1;
659  }
660}
661
662// From http://www.burtleburtle.net, byBob Jenkins.
663// When targeting x86, both GCC and LLVM seem to recognize this as a
664// rotate instruction.
665#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
666
667// From http://www.burtleburtle.net, by Bob Jenkins.
668#define mix(a,b,c) \
669  { \
670    a -= c;  a ^= rot(c, 4);  c += b; \
671    b -= a;  b ^= rot(a, 6);  a += c; \
672    c -= b;  c ^= rot(b, 8);  b += a; \
673    a -= c;  a ^= rot(c,16);  c += b; \
674    b -= a;  b ^= rot(a,19);  a += c; \
675    c -= b;  c ^= rot(b, 4);  b += a; \
676  }
677
678// From http://www.burtleburtle.net, by Bob Jenkins.
679#define final(a,b,c) \
680  { \
681    c ^= b; c -= rot(b,14); \
682    a ^= c; a -= rot(c,11); \
683    b ^= a; b -= rot(a,25); \
684    c ^= b; c -= rot(b,16); \
685    a ^= c; a -= rot(c,4);  \
686    b ^= a; b -= rot(a,14); \
687    c ^= b; c -= rot(b,24); \
688  }
689
690// hashword() was adapted from http://www.burtleburtle.net, by Bob
691// Jenkins.  k is a pointer to an array of uint32_t values; length is
692// the length of the key, in 32-bit chunks.  This version only handles
693// keys that are a multiple of 32 bits in size.
694static inline uint32_t hashword(const uint64_t *k64, size_t length)
695{
696  const uint32_t *k = reinterpret_cast<const uint32_t *>(k64);
697  uint32_t a,b,c;
698
699  /* Set up the internal state */
700  a = b = c = 0xdeadbeef + (((uint32_t)length)<<2);
701
702  /*------------------------------------------------- handle most of the key */
703  while (length > 3) {
704    a += k[0];
705    b += k[1];
706    c += k[2];
707    mix(a,b,c);
708    length -= 3;
709    k += 3;
710  }
711
712  /*------------------------------------------- handle the last 3 uint32_t's */
713  switch (length) {                  /* all the case statements fall through */
714  case 3 : c+=k[2];
715  case 2 : b+=k[1];
716  case 1 : a+=k[0];
717    final(a,b,c);
718    case 0:     /* case 0: nothing left to add */
719      break;
720    }
721  /*------------------------------------------------------ report the result */
722  return c;
723}
724
725// hashword8() was adapted from http://www.burtleburtle.net, by Bob
726// Jenkins.  This computes a 32-bit hash from one 64-bit word.  When
727// targeting x86 (32 or 64 bit), both LLVM and GCC compile this
728// function into about 35 instructions when inlined.
729static inline uint32_t hashword8(const uint64_t k64)
730{
731  uint32_t a,b,c;
732  a = b = c = 0xdeadbeef + 4;
733  b += k64 >> 32;
734  a += k64 & 0xffffffff;
735  final(a,b,c);
736  return c;
737}
738#undef final
739#undef mix
740#undef rot
741
742uint64_t APInt::getHashValue() const {
743  uint64_t hash;
744  if (isSingleWord())
745    hash = hashword8(VAL);
746  else
747    hash = hashword(pVal, getNumWords()*2);
748  return hash;
749}
750
751/// HiBits - This function returns the high "numBits" bits of this APInt.
752APInt APInt::getHiBits(unsigned numBits) const {
753  return APIntOps::lshr(*this, BitWidth - numBits);
754}
755
756/// LoBits - This function returns the low "numBits" bits of this APInt.
757APInt APInt::getLoBits(unsigned numBits) const {
758  return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits),
759                        BitWidth - numBits);
760}
761
762unsigned APInt::countLeadingZerosSlowCase() const {
763  // Treat the most significand word differently because it might have
764  // meaningless bits set beyond the precision.
765  unsigned BitsInMSW = BitWidth % APINT_BITS_PER_WORD;
766  integerPart MSWMask;
767  if (BitsInMSW) MSWMask = (integerPart(1) << BitsInMSW) - 1;
768  else {
769    MSWMask = ~integerPart(0);
770    BitsInMSW = APINT_BITS_PER_WORD;
771  }
772
773  unsigned i = getNumWords();
774  integerPart MSW = pVal[i-1] & MSWMask;
775  if (MSW)
776    return CountLeadingZeros_64(MSW) - (APINT_BITS_PER_WORD - BitsInMSW);
777
778  unsigned Count = BitsInMSW;
779  for (--i; i > 0u; --i) {
780    if (pVal[i-1] == 0)
781      Count += APINT_BITS_PER_WORD;
782    else {
783      Count += CountLeadingZeros_64(pVal[i-1]);
784      break;
785    }
786  }
787  return Count;
788}
789
790static unsigned countLeadingOnes_64(uint64_t V, unsigned skip) {
791  unsigned Count = 0;
792  if (skip)
793    V <<= skip;
794  while (V && (V & (1ULL << 63))) {
795    Count++;
796    V <<= 1;
797  }
798  return Count;
799}
800
801unsigned APInt::countLeadingOnes() const {
802  if (isSingleWord())
803    return countLeadingOnes_64(VAL, APINT_BITS_PER_WORD - BitWidth);
804
805  unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
806  unsigned shift;
807  if (!highWordBits) {
808    highWordBits = APINT_BITS_PER_WORD;
809    shift = 0;
810  } else {
811    shift = APINT_BITS_PER_WORD - highWordBits;
812  }
813  int i = getNumWords() - 1;
814  unsigned Count = countLeadingOnes_64(pVal[i], shift);
815  if (Count == highWordBits) {
816    for (i--; i >= 0; --i) {
817      if (pVal[i] == -1ULL)
818        Count += APINT_BITS_PER_WORD;
819      else {
820        Count += countLeadingOnes_64(pVal[i], 0);
821        break;
822      }
823    }
824  }
825  return Count;
826}
827
828unsigned APInt::countTrailingZeros() const {
829  if (isSingleWord())
830    return std::min(unsigned(CountTrailingZeros_64(VAL)), BitWidth);
831  unsigned Count = 0;
832  unsigned i = 0;
833  for (; i < getNumWords() && pVal[i] == 0; ++i)
834    Count += APINT_BITS_PER_WORD;
835  if (i < getNumWords())
836    Count += CountTrailingZeros_64(pVal[i]);
837  return std::min(Count, BitWidth);
838}
839
840unsigned APInt::countTrailingOnesSlowCase() const {
841  unsigned Count = 0;
842  unsigned i = 0;
843  for (; i < getNumWords() && pVal[i] == -1ULL; ++i)
844    Count += APINT_BITS_PER_WORD;
845  if (i < getNumWords())
846    Count += CountTrailingOnes_64(pVal[i]);
847  return std::min(Count, BitWidth);
848}
849
850unsigned APInt::countPopulationSlowCase() const {
851  unsigned Count = 0;
852  for (unsigned i = 0; i < getNumWords(); ++i)
853    Count += CountPopulation_64(pVal[i]);
854  return Count;
855}
856
857APInt APInt::byteSwap() const {
858  assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
859  if (BitWidth == 16)
860    return APInt(BitWidth, ByteSwap_16(uint16_t(VAL)));
861  else if (BitWidth == 32)
862    return APInt(BitWidth, ByteSwap_32(unsigned(VAL)));
863  else if (BitWidth == 48) {
864    unsigned Tmp1 = unsigned(VAL >> 16);
865    Tmp1 = ByteSwap_32(Tmp1);
866    uint16_t Tmp2 = uint16_t(VAL);
867    Tmp2 = ByteSwap_16(Tmp2);
868    return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
869  } else if (BitWidth == 64)
870    return APInt(BitWidth, ByteSwap_64(VAL));
871  else {
872    APInt Result(BitWidth, 0);
873    char *pByte = (char*)Result.pVal;
874    for (unsigned i = 0; i < BitWidth / APINT_WORD_SIZE / 2; ++i) {
875      char Tmp = pByte[i];
876      pByte[i] = pByte[BitWidth / APINT_WORD_SIZE - 1 - i];
877      pByte[BitWidth / APINT_WORD_SIZE - i - 1] = Tmp;
878    }
879    return Result;
880  }
881}
882
883APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1,
884                                            const APInt& API2) {
885  APInt A = API1, B = API2;
886  while (!!B) {
887    APInt T = B;
888    B = APIntOps::urem(A, B);
889    A = T;
890  }
891  return A;
892}
893
894APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
895  union {
896    double D;
897    uint64_t I;
898  } T;
899  T.D = Double;
900
901  // Get the sign bit from the highest order bit
902  bool isNeg = T.I >> 63;
903
904  // Get the 11-bit exponent and adjust for the 1023 bit bias
905  int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
906
907  // If the exponent is negative, the value is < 0 so just return 0.
908  if (exp < 0)
909    return APInt(width, 0u);
910
911  // Extract the mantissa by clearing the top 12 bits (sign + exponent).
912  uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
913
914  // If the exponent doesn't shift all bits out of the mantissa
915  if (exp < 52)
916    return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
917                    APInt(width, mantissa >> (52 - exp));
918
919  // If the client didn't provide enough bits for us to shift the mantissa into
920  // then the result is undefined, just return 0
921  if (width <= exp - 52)
922    return APInt(width, 0);
923
924  // Otherwise, we have to shift the mantissa bits up to the right location
925  APInt Tmp(width, mantissa);
926  Tmp = Tmp.shl((unsigned)exp - 52);
927  return isNeg ? -Tmp : Tmp;
928}
929
930/// RoundToDouble - This function converts this APInt to a double.
931/// The layout for double is as following (IEEE Standard 754):
932///  --------------------------------------
933/// |  Sign    Exponent    Fraction    Bias |
934/// |-------------------------------------- |
935/// |  1[63]   11[62-52]   52[51-00]   1023 |
936///  --------------------------------------
937double APInt::roundToDouble(bool isSigned) const {
938
939  // Handle the simple case where the value is contained in one uint64_t.
940  // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
941  if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
942    if (isSigned) {
943      int64_t sext = (int64_t(getWord(0)) << (64-BitWidth)) >> (64-BitWidth);
944      return double(sext);
945    } else
946      return double(getWord(0));
947  }
948
949  // Determine if the value is negative.
950  bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
951
952  // Construct the absolute value if we're negative.
953  APInt Tmp(isNeg ? -(*this) : (*this));
954
955  // Figure out how many bits we're using.
956  unsigned n = Tmp.getActiveBits();
957
958  // The exponent (without bias normalization) is just the number of bits
959  // we are using. Note that the sign bit is gone since we constructed the
960  // absolute value.
961  uint64_t exp = n;
962
963  // Return infinity for exponent overflow
964  if (exp > 1023) {
965    if (!isSigned || !isNeg)
966      return std::numeric_limits<double>::infinity();
967    else
968      return -std::numeric_limits<double>::infinity();
969  }
970  exp += 1023; // Increment for 1023 bias
971
972  // Number of bits in mantissa is 52. To obtain the mantissa value, we must
973  // extract the high 52 bits from the correct words in pVal.
974  uint64_t mantissa;
975  unsigned hiWord = whichWord(n-1);
976  if (hiWord == 0) {
977    mantissa = Tmp.pVal[0];
978    if (n > 52)
979      mantissa >>= n - 52; // shift down, we want the top 52 bits.
980  } else {
981    assert(hiWord > 0 && "huh?");
982    uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
983    uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
984    mantissa = hibits | lobits;
985  }
986
987  // The leading bit of mantissa is implicit, so get rid of it.
988  uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
989  union {
990    double D;
991    uint64_t I;
992  } T;
993  T.I = sign | (exp << 52) | mantissa;
994  return T.D;
995}
996
997// Truncate to new width.
998APInt APInt::trunc(unsigned width) const {
999  assert(width < BitWidth && "Invalid APInt Truncate request");
1000  assert(width && "Can't truncate to 0 bits");
1001
1002  if (width <= APINT_BITS_PER_WORD)
1003    return APInt(width, getRawData()[0]);
1004
1005  APInt Result(getMemory(getNumWords(width)), width);
1006
1007  // Copy full words.
1008  unsigned i;
1009  for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
1010    Result.pVal[i] = pVal[i];
1011
1012  // Truncate and copy any partial word.
1013  unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
1014  if (bits != 0)
1015    Result.pVal[i] = pVal[i] << bits >> bits;
1016
1017  return Result;
1018}
1019
1020// Sign extend to a new width.
1021APInt APInt::sext(unsigned width) const {
1022  assert(width > BitWidth && "Invalid APInt SignExtend request");
1023
1024  if (width <= APINT_BITS_PER_WORD) {
1025    uint64_t val = VAL << (APINT_BITS_PER_WORD - BitWidth);
1026    val = (int64_t)val >> (width - BitWidth);
1027    return APInt(width, val >> (APINT_BITS_PER_WORD - width));
1028  }
1029
1030  APInt Result(getMemory(getNumWords(width)), width);
1031
1032  // Copy full words.
1033  unsigned i;
1034  uint64_t word = 0;
1035  for (i = 0; i != BitWidth / APINT_BITS_PER_WORD; i++) {
1036    word = getRawData()[i];
1037    Result.pVal[i] = word;
1038  }
1039
1040  // Read and sign-extend any partial word.
1041  unsigned bits = (0 - BitWidth) % APINT_BITS_PER_WORD;
1042  if (bits != 0)
1043    word = (int64_t)getRawData()[i] << bits >> bits;
1044  else
1045    word = (int64_t)word >> (APINT_BITS_PER_WORD - 1);
1046
1047  // Write remaining full words.
1048  for (; i != width / APINT_BITS_PER_WORD; i++) {
1049    Result.pVal[i] = word;
1050    word = (int64_t)word >> (APINT_BITS_PER_WORD - 1);
1051  }
1052
1053  // Write any partial word.
1054  bits = (0 - width) % APINT_BITS_PER_WORD;
1055  if (bits != 0)
1056    Result.pVal[i] = word << bits >> bits;
1057
1058  return Result;
1059}
1060
1061//  Zero extend to a new width.
1062APInt APInt::zext(unsigned width) const {
1063  assert(width > BitWidth && "Invalid APInt ZeroExtend request");
1064
1065  if (width <= APINT_BITS_PER_WORD)
1066    return APInt(width, VAL);
1067
1068  APInt Result(getMemory(getNumWords(width)), width);
1069
1070  // Copy words.
1071  unsigned i;
1072  for (i = 0; i != getNumWords(); i++)
1073    Result.pVal[i] = getRawData()[i];
1074
1075  // Zero remaining words.
1076  memset(&Result.pVal[i], 0, (Result.getNumWords() - i) * APINT_WORD_SIZE);
1077
1078  return Result;
1079}
1080
1081APInt APInt::zextOrTrunc(unsigned width) const {
1082  if (BitWidth < width)
1083    return zext(width);
1084  if (BitWidth > width)
1085    return trunc(width);
1086  return *this;
1087}
1088
1089APInt APInt::sextOrTrunc(unsigned width) const {
1090  if (BitWidth < width)
1091    return sext(width);
1092  if (BitWidth > width)
1093    return trunc(width);
1094  return *this;
1095}
1096
1097/// Arithmetic right-shift this APInt by shiftAmt.
1098/// @brief Arithmetic right-shift function.
1099APInt APInt::ashr(const APInt &shiftAmt) const {
1100  return ashr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1101}
1102
1103/// Arithmetic right-shift this APInt by shiftAmt.
1104/// @brief Arithmetic right-shift function.
1105APInt APInt::ashr(unsigned shiftAmt) const {
1106  assert(shiftAmt <= BitWidth && "Invalid shift amount");
1107  // Handle a degenerate case
1108  if (shiftAmt == 0)
1109    return *this;
1110
1111  // Handle single word shifts with built-in ashr
1112  if (isSingleWord()) {
1113    if (shiftAmt == BitWidth)
1114      return APInt(BitWidth, 0); // undefined
1115    else {
1116      unsigned SignBit = APINT_BITS_PER_WORD - BitWidth;
1117      return APInt(BitWidth,
1118        (((int64_t(VAL) << SignBit) >> SignBit) >> shiftAmt));
1119    }
1120  }
1121
1122  // If all the bits were shifted out, the result is, technically, undefined.
1123  // We return -1 if it was negative, 0 otherwise. We check this early to avoid
1124  // issues in the algorithm below.
1125  if (shiftAmt == BitWidth) {
1126    if (isNegative())
1127      return APInt(BitWidth, -1ULL, true);
1128    else
1129      return APInt(BitWidth, 0);
1130  }
1131
1132  // Create some space for the result.
1133  uint64_t * val = new uint64_t[getNumWords()];
1134
1135  // Compute some values needed by the following shift algorithms
1136  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word
1137  unsigned offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift
1138  unsigned breakWord = getNumWords() - 1 - offset; // last word affected
1139  unsigned bitsInWord = whichBit(BitWidth); // how many bits in last word?
1140  if (bitsInWord == 0)
1141    bitsInWord = APINT_BITS_PER_WORD;
1142
1143  // If we are shifting whole words, just move whole words
1144  if (wordShift == 0) {
1145    // Move the words containing significant bits
1146    for (unsigned i = 0; i <= breakWord; ++i)
1147      val[i] = pVal[i+offset]; // move whole word
1148
1149    // Adjust the top significant word for sign bit fill, if negative
1150    if (isNegative())
1151      if (bitsInWord < APINT_BITS_PER_WORD)
1152        val[breakWord] |= ~0ULL << bitsInWord; // set high bits
1153  } else {
1154    // Shift the low order words
1155    for (unsigned i = 0; i < breakWord; ++i) {
1156      // This combines the shifted corresponding word with the low bits from
1157      // the next word (shifted into this word's high bits).
1158      val[i] = (pVal[i+offset] >> wordShift) |
1159               (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1160    }
1161
1162    // Shift the break word. In this case there are no bits from the next word
1163    // to include in this word.
1164    val[breakWord] = pVal[breakWord+offset] >> wordShift;
1165
1166    // Deal with sign extenstion in the break word, and possibly the word before
1167    // it.
1168    if (isNegative()) {
1169      if (wordShift > bitsInWord) {
1170        if (breakWord > 0)
1171          val[breakWord-1] |=
1172            ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord));
1173        val[breakWord] |= ~0ULL;
1174      } else
1175        val[breakWord] |= (~0ULL << (bitsInWord - wordShift));
1176    }
1177  }
1178
1179  // Remaining words are 0 or -1, just assign them.
1180  uint64_t fillValue = (isNegative() ? -1ULL : 0);
1181  for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1182    val[i] = fillValue;
1183  return APInt(val, BitWidth).clearUnusedBits();
1184}
1185
1186/// Logical right-shift this APInt by shiftAmt.
1187/// @brief Logical right-shift function.
1188APInt APInt::lshr(const APInt &shiftAmt) const {
1189  return lshr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1190}
1191
1192/// Logical right-shift this APInt by shiftAmt.
1193/// @brief Logical right-shift function.
1194APInt APInt::lshr(unsigned shiftAmt) const {
1195  if (isSingleWord()) {
1196    if (shiftAmt == BitWidth)
1197      return APInt(BitWidth, 0);
1198    else
1199      return APInt(BitWidth, this->VAL >> shiftAmt);
1200  }
1201
1202  // If all the bits were shifted out, the result is 0. This avoids issues
1203  // with shifting by the size of the integer type, which produces undefined
1204  // results. We define these "undefined results" to always be 0.
1205  if (shiftAmt == BitWidth)
1206    return APInt(BitWidth, 0);
1207
1208  // If none of the bits are shifted out, the result is *this. This avoids
1209  // issues with shifting by the size of the integer type, which produces
1210  // undefined results in the code below. This is also an optimization.
1211  if (shiftAmt == 0)
1212    return *this;
1213
1214  // Create some space for the result.
1215  uint64_t * val = new uint64_t[getNumWords()];
1216
1217  // If we are shifting less than a word, compute the shift with a simple carry
1218  if (shiftAmt < APINT_BITS_PER_WORD) {
1219    uint64_t carry = 0;
1220    for (int i = getNumWords()-1; i >= 0; --i) {
1221      val[i] = (pVal[i] >> shiftAmt) | carry;
1222      carry = pVal[i] << (APINT_BITS_PER_WORD - shiftAmt);
1223    }
1224    return APInt(val, BitWidth).clearUnusedBits();
1225  }
1226
1227  // Compute some values needed by the remaining shift algorithms
1228  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
1229  unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
1230
1231  // If we are shifting whole words, just move whole words
1232  if (wordShift == 0) {
1233    for (unsigned i = 0; i < getNumWords() - offset; ++i)
1234      val[i] = pVal[i+offset];
1235    for (unsigned i = getNumWords()-offset; i < getNumWords(); i++)
1236      val[i] = 0;
1237    return APInt(val,BitWidth).clearUnusedBits();
1238  }
1239
1240  // Shift the low order words
1241  unsigned breakWord = getNumWords() - offset -1;
1242  for (unsigned i = 0; i < breakWord; ++i)
1243    val[i] = (pVal[i+offset] >> wordShift) |
1244             (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1245  // Shift the break word.
1246  val[breakWord] = pVal[breakWord+offset] >> wordShift;
1247
1248  // Remaining words are 0
1249  for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1250    val[i] = 0;
1251  return APInt(val, BitWidth).clearUnusedBits();
1252}
1253
1254/// Left-shift this APInt by shiftAmt.
1255/// @brief Left-shift function.
1256APInt APInt::shl(const APInt &shiftAmt) const {
1257  // It's undefined behavior in C to shift by BitWidth or greater.
1258  return shl((unsigned)shiftAmt.getLimitedValue(BitWidth));
1259}
1260
1261APInt APInt::shlSlowCase(unsigned shiftAmt) const {
1262  // If all the bits were shifted out, the result is 0. This avoids issues
1263  // with shifting by the size of the integer type, which produces undefined
1264  // results. We define these "undefined results" to always be 0.
1265  if (shiftAmt == BitWidth)
1266    return APInt(BitWidth, 0);
1267
1268  // If none of the bits are shifted out, the result is *this. This avoids a
1269  // lshr by the words size in the loop below which can produce incorrect
1270  // results. It also avoids the expensive computation below for a common case.
1271  if (shiftAmt == 0)
1272    return *this;
1273
1274  // Create some space for the result.
1275  uint64_t * val = new uint64_t[getNumWords()];
1276
1277  // If we are shifting less than a word, do it the easy way
1278  if (shiftAmt < APINT_BITS_PER_WORD) {
1279    uint64_t carry = 0;
1280    for (unsigned i = 0; i < getNumWords(); i++) {
1281      val[i] = pVal[i] << shiftAmt | carry;
1282      carry = pVal[i] >> (APINT_BITS_PER_WORD - shiftAmt);
1283    }
1284    return APInt(val, BitWidth).clearUnusedBits();
1285  }
1286
1287  // Compute some values needed by the remaining shift algorithms
1288  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
1289  unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
1290
1291  // If we are shifting whole words, just move whole words
1292  if (wordShift == 0) {
1293    for (unsigned i = 0; i < offset; i++)
1294      val[i] = 0;
1295    for (unsigned i = offset; i < getNumWords(); i++)
1296      val[i] = pVal[i-offset];
1297    return APInt(val,BitWidth).clearUnusedBits();
1298  }
1299
1300  // Copy whole words from this to Result.
1301  unsigned i = getNumWords() - 1;
1302  for (; i > offset; --i)
1303    val[i] = pVal[i-offset] << wordShift |
1304             pVal[i-offset-1] >> (APINT_BITS_PER_WORD - wordShift);
1305  val[offset] = pVal[0] << wordShift;
1306  for (i = 0; i < offset; ++i)
1307    val[i] = 0;
1308  return APInt(val, BitWidth).clearUnusedBits();
1309}
1310
1311APInt APInt::rotl(const APInt &rotateAmt) const {
1312  return rotl((unsigned)rotateAmt.getLimitedValue(BitWidth));
1313}
1314
1315APInt APInt::rotl(unsigned rotateAmt) const {
1316  if (rotateAmt == 0)
1317    return *this;
1318  // Don't get too fancy, just use existing shift/or facilities
1319  APInt hi(*this);
1320  APInt lo(*this);
1321  hi.shl(rotateAmt);
1322  lo.lshr(BitWidth - rotateAmt);
1323  return hi | lo;
1324}
1325
1326APInt APInt::rotr(const APInt &rotateAmt) const {
1327  return rotr((unsigned)rotateAmt.getLimitedValue(BitWidth));
1328}
1329
1330APInt APInt::rotr(unsigned rotateAmt) const {
1331  if (rotateAmt == 0)
1332    return *this;
1333  // Don't get too fancy, just use existing shift/or facilities
1334  APInt hi(*this);
1335  APInt lo(*this);
1336  lo.lshr(rotateAmt);
1337  hi.shl(BitWidth - rotateAmt);
1338  return hi | lo;
1339}
1340
1341// Square Root - this method computes and returns the square root of "this".
1342// Three mechanisms are used for computation. For small values (<= 5 bits),
1343// a table lookup is done. This gets some performance for common cases. For
1344// values using less than 52 bits, the value is converted to double and then
1345// the libc sqrt function is called. The result is rounded and then converted
1346// back to a uint64_t which is then used to construct the result. Finally,
1347// the Babylonian method for computing square roots is used.
1348APInt APInt::sqrt() const {
1349
1350  // Determine the magnitude of the value.
1351  unsigned magnitude = getActiveBits();
1352
1353  // Use a fast table for some small values. This also gets rid of some
1354  // rounding errors in libc sqrt for small values.
1355  if (magnitude <= 5) {
1356    static const uint8_t results[32] = {
1357      /*     0 */ 0,
1358      /*  1- 2 */ 1, 1,
1359      /*  3- 6 */ 2, 2, 2, 2,
1360      /*  7-12 */ 3, 3, 3, 3, 3, 3,
1361      /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1362      /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1363      /*    31 */ 6
1364    };
1365    return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]);
1366  }
1367
1368  // If the magnitude of the value fits in less than 52 bits (the precision of
1369  // an IEEE double precision floating point value), then we can use the
1370  // libc sqrt function which will probably use a hardware sqrt computation.
1371  // This should be faster than the algorithm below.
1372  if (magnitude < 52) {
1373#if HAVE_ROUND
1374    return APInt(BitWidth,
1375                 uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
1376#else
1377    return APInt(BitWidth,
1378                 uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5);
1379#endif
1380  }
1381
1382  // Okay, all the short cuts are exhausted. We must compute it. The following
1383  // is a classical Babylonian method for computing the square root. This code
1384  // was adapted to APINt from a wikipedia article on such computations.
1385  // See http://www.wikipedia.org/ and go to the page named
1386  // Calculate_an_integer_square_root.
1387  unsigned nbits = BitWidth, i = 4;
1388  APInt testy(BitWidth, 16);
1389  APInt x_old(BitWidth, 1);
1390  APInt x_new(BitWidth, 0);
1391  APInt two(BitWidth, 2);
1392
1393  // Select a good starting value using binary logarithms.
1394  for (;; i += 2, testy = testy.shl(2))
1395    if (i >= nbits || this->ule(testy)) {
1396      x_old = x_old.shl(i / 2);
1397      break;
1398    }
1399
1400  // Use the Babylonian method to arrive at the integer square root:
1401  for (;;) {
1402    x_new = (this->udiv(x_old) + x_old).udiv(two);
1403    if (x_old.ule(x_new))
1404      break;
1405    x_old = x_new;
1406  }
1407
1408  // Make sure we return the closest approximation
1409  // NOTE: The rounding calculation below is correct. It will produce an
1410  // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1411  // determined to be a rounding issue with pari/gp as it begins to use a
1412  // floating point representation after 192 bits. There are no discrepancies
1413  // between this algorithm and pari/gp for bit widths < 192 bits.
1414  APInt square(x_old * x_old);
1415  APInt nextSquare((x_old + 1) * (x_old +1));
1416  if (this->ult(square))
1417    return x_old;
1418  else if (this->ule(nextSquare)) {
1419    APInt midpoint((nextSquare - square).udiv(two));
1420    APInt offset(*this - square);
1421    if (offset.ult(midpoint))
1422      return x_old;
1423    else
1424      return x_old + 1;
1425  } else
1426    llvm_unreachable("Error in APInt::sqrt computation");
1427  return x_old + 1;
1428}
1429
1430/// Computes the multiplicative inverse of this APInt for a given modulo. The
1431/// iterative extended Euclidean algorithm is used to solve for this value,
1432/// however we simplify it to speed up calculating only the inverse, and take
1433/// advantage of div+rem calculations. We also use some tricks to avoid copying
1434/// (potentially large) APInts around.
1435APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1436  assert(ult(modulo) && "This APInt must be smaller than the modulo");
1437
1438  // Using the properties listed at the following web page (accessed 06/21/08):
1439  //   http://www.numbertheory.org/php/euclid.html
1440  // (especially the properties numbered 3, 4 and 9) it can be proved that
1441  // BitWidth bits suffice for all the computations in the algorithm implemented
1442  // below. More precisely, this number of bits suffice if the multiplicative
1443  // inverse exists, but may not suffice for the general extended Euclidean
1444  // algorithm.
1445
1446  APInt r[2] = { modulo, *this };
1447  APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1448  APInt q(BitWidth, 0);
1449
1450  unsigned i;
1451  for (i = 0; r[i^1] != 0; i ^= 1) {
1452    // An overview of the math without the confusing bit-flipping:
1453    // q = r[i-2] / r[i-1]
1454    // r[i] = r[i-2] % r[i-1]
1455    // t[i] = t[i-2] - t[i-1] * q
1456    udivrem(r[i], r[i^1], q, r[i]);
1457    t[i] -= t[i^1] * q;
1458  }
1459
1460  // If this APInt and the modulo are not coprime, there is no multiplicative
1461  // inverse, so return 0. We check this by looking at the next-to-last
1462  // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1463  // algorithm.
1464  if (r[i] != 1)
1465    return APInt(BitWidth, 0);
1466
1467  // The next-to-last t is the multiplicative inverse.  However, we are
1468  // interested in a positive inverse. Calcuate a positive one from a negative
1469  // one if necessary. A simple addition of the modulo suffices because
1470  // abs(t[i]) is known to be less than *this/2 (see the link above).
1471  return t[i].isNegative() ? t[i] + modulo : t[i];
1472}
1473
1474/// Calculate the magic numbers required to implement a signed integer division
1475/// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1476/// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1477/// Warren, Jr., chapter 10.
1478APInt::ms APInt::magic() const {
1479  const APInt& d = *this;
1480  unsigned p;
1481  APInt ad, anc, delta, q1, r1, q2, r2, t;
1482  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1483  struct ms mag;
1484
1485  ad = d.abs();
1486  t = signedMin + (d.lshr(d.getBitWidth() - 1));
1487  anc = t - 1 - t.urem(ad);   // absolute value of nc
1488  p = d.getBitWidth() - 1;    // initialize p
1489  q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1490  r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1491  q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1492  r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1493  do {
1494    p = p + 1;
1495    q1 = q1<<1;          // update q1 = 2p/abs(nc)
1496    r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1497    if (r1.uge(anc)) {  // must be unsigned comparison
1498      q1 = q1 + 1;
1499      r1 = r1 - anc;
1500    }
1501    q2 = q2<<1;          // update q2 = 2p/abs(d)
1502    r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1503    if (r2.uge(ad)) {   // must be unsigned comparison
1504      q2 = q2 + 1;
1505      r2 = r2 - ad;
1506    }
1507    delta = ad - r2;
1508  } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1509
1510  mag.m = q2 + 1;
1511  if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1512  mag.s = p - d.getBitWidth();          // resulting shift
1513  return mag;
1514}
1515
1516/// Calculate the magic numbers required to implement an unsigned integer
1517/// division by a constant as a sequence of multiplies, adds and shifts.
1518/// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1519/// S. Warren, Jr., chapter 10.
1520APInt::mu APInt::magicu() const {
1521  const APInt& d = *this;
1522  unsigned p;
1523  APInt nc, delta, q1, r1, q2, r2;
1524  struct mu magu;
1525  magu.a = 0;               // initialize "add" indicator
1526  APInt allOnes = APInt::getAllOnesValue(d.getBitWidth());
1527  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1528  APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1529
1530  nc = allOnes - (-d).urem(d);
1531  p = d.getBitWidth() - 1;  // initialize p
1532  q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1533  r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1534  q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1535  r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1536  do {
1537    p = p + 1;
1538    if (r1.uge(nc - r1)) {
1539      q1 = q1 + q1 + 1;  // update q1
1540      r1 = r1 + r1 - nc; // update r1
1541    }
1542    else {
1543      q1 = q1+q1; // update q1
1544      r1 = r1+r1; // update r1
1545    }
1546    if ((r2 + 1).uge(d - r2)) {
1547      if (q2.uge(signedMax)) magu.a = 1;
1548      q2 = q2+q2 + 1;     // update q2
1549      r2 = r2+r2 + 1 - d; // update r2
1550    }
1551    else {
1552      if (q2.uge(signedMin)) magu.a = 1;
1553      q2 = q2+q2;     // update q2
1554      r2 = r2+r2 + 1; // update r2
1555    }
1556    delta = d - 1 - r2;
1557  } while (p < d.getBitWidth()*2 &&
1558           (q1.ult(delta) || (q1 == delta && r1 == 0)));
1559  magu.m = q2 + 1; // resulting magic number
1560  magu.s = p - d.getBitWidth();  // resulting shift
1561  return magu;
1562}
1563
1564/// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1565/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1566/// variables here have the same names as in the algorithm. Comments explain
1567/// the algorithm and any deviation from it.
1568static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
1569                     unsigned m, unsigned n) {
1570  assert(u && "Must provide dividend");
1571  assert(v && "Must provide divisor");
1572  assert(q && "Must provide quotient");
1573  assert(u != v && u != q && v != q && "Must us different memory");
1574  assert(n>1 && "n must be > 1");
1575
1576  // Knuth uses the value b as the base of the number system. In our case b
1577  // is 2^31 so we just set it to -1u.
1578  uint64_t b = uint64_t(1) << 32;
1579
1580#if 0
1581  DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1582  DEBUG(dbgs() << "KnuthDiv: original:");
1583  DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1584  DEBUG(dbgs() << " by");
1585  DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
1586  DEBUG(dbgs() << '\n');
1587#endif
1588  // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1589  // u and v by d. Note that we have taken Knuth's advice here to use a power
1590  // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1591  // 2 allows us to shift instead of multiply and it is easy to determine the
1592  // shift amount from the leading zeros.  We are basically normalizing the u
1593  // and v so that its high bits are shifted to the top of v's range without
1594  // overflow. Note that this can require an extra word in u so that u must
1595  // be of length m+n+1.
1596  unsigned shift = CountLeadingZeros_32(v[n-1]);
1597  unsigned v_carry = 0;
1598  unsigned u_carry = 0;
1599  if (shift) {
1600    for (unsigned i = 0; i < m+n; ++i) {
1601      unsigned u_tmp = u[i] >> (32 - shift);
1602      u[i] = (u[i] << shift) | u_carry;
1603      u_carry = u_tmp;
1604    }
1605    for (unsigned i = 0; i < n; ++i) {
1606      unsigned v_tmp = v[i] >> (32 - shift);
1607      v[i] = (v[i] << shift) | v_carry;
1608      v_carry = v_tmp;
1609    }
1610  }
1611  u[m+n] = u_carry;
1612#if 0
1613  DEBUG(dbgs() << "KnuthDiv:   normal:");
1614  DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1615  DEBUG(dbgs() << " by");
1616  DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
1617  DEBUG(dbgs() << '\n');
1618#endif
1619
1620  // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1621  int j = m;
1622  do {
1623    DEBUG(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1624    // D3. [Calculate q'.].
1625    //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1626    //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1627    // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1628    // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test
1629    // on v[n-2] determines at high speed most of the cases in which the trial
1630    // value qp is one too large, and it eliminates all cases where qp is two
1631    // too large.
1632    uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]);
1633    DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1634    uint64_t qp = dividend / v[n-1];
1635    uint64_t rp = dividend % v[n-1];
1636    if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1637      qp--;
1638      rp += v[n-1];
1639      if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1640        qp--;
1641    }
1642    DEBUG(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1643
1644    // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1645    // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1646    // consists of a simple multiplication by a one-place number, combined with
1647    // a subtraction.
1648    bool isNeg = false;
1649    for (unsigned i = 0; i < n; ++i) {
1650      uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32);
1651      uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]);
1652      bool borrow = subtrahend > u_tmp;
1653      DEBUG(dbgs() << "KnuthDiv: u_tmp == " << u_tmp
1654                   << ", subtrahend == " << subtrahend
1655                   << ", borrow = " << borrow << '\n');
1656
1657      uint64_t result = u_tmp - subtrahend;
1658      unsigned k = j + i;
1659      u[k++] = (unsigned)(result & (b-1)); // subtract low word
1660      u[k++] = (unsigned)(result >> 32);   // subtract high word
1661      while (borrow && k <= m+n) { // deal with borrow to the left
1662        borrow = u[k] == 0;
1663        u[k]--;
1664        k++;
1665      }
1666      isNeg |= borrow;
1667      DEBUG(dbgs() << "KnuthDiv: u[j+i] == " << u[j+i] << ",  u[j+i+1] == " <<
1668                    u[j+i+1] << '\n');
1669    }
1670    DEBUG(dbgs() << "KnuthDiv: after subtraction:");
1671    DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1672    DEBUG(dbgs() << '\n');
1673    // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1674    // this step is actually negative, (u[j+n]...u[j]) should be left as the
1675    // true value plus b**(n+1), namely as the b's complement of
1676    // the true value, and a "borrow" to the left should be remembered.
1677    //
1678    if (isNeg) {
1679      bool carry = true;  // true because b's complement is "complement + 1"
1680      for (unsigned i = 0; i <= m+n; ++i) {
1681        u[i] = ~u[i] + carry; // b's complement
1682        carry = carry && u[i] == 0;
1683      }
1684    }
1685    DEBUG(dbgs() << "KnuthDiv: after complement:");
1686    DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1687    DEBUG(dbgs() << '\n');
1688
1689    // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1690    // negative, go to step D6; otherwise go on to step D7.
1691    q[j] = (unsigned)qp;
1692    if (isNeg) {
1693      // D6. [Add back]. The probability that this step is necessary is very
1694      // small, on the order of only 2/b. Make sure that test data accounts for
1695      // this possibility. Decrease q[j] by 1
1696      q[j]--;
1697      // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1698      // A carry will occur to the left of u[j+n], and it should be ignored
1699      // since it cancels with the borrow that occurred in D4.
1700      bool carry = false;
1701      for (unsigned i = 0; i < n; i++) {
1702        unsigned limit = std::min(u[j+i],v[i]);
1703        u[j+i] += v[i] + carry;
1704        carry = u[j+i] < limit || (carry && u[j+i] == limit);
1705      }
1706      u[j+n] += carry;
1707    }
1708    DEBUG(dbgs() << "KnuthDiv: after correction:");
1709    DEBUG(for (int i = m+n; i >=0; i--) dbgs() <<" " << u[i]);
1710    DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1711
1712  // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1713  } while (--j >= 0);
1714
1715  DEBUG(dbgs() << "KnuthDiv: quotient:");
1716  DEBUG(for (int i = m; i >=0; i--) dbgs() <<" " << q[i]);
1717  DEBUG(dbgs() << '\n');
1718
1719  // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1720  // remainder may be obtained by dividing u[...] by d. If r is non-null we
1721  // compute the remainder (urem uses this).
1722  if (r) {
1723    // The value d is expressed by the "shift" value above since we avoided
1724    // multiplication by d by using a shift left. So, all we have to do is
1725    // shift right here. In order to mak
1726    if (shift) {
1727      unsigned carry = 0;
1728      DEBUG(dbgs() << "KnuthDiv: remainder:");
1729      for (int i = n-1; i >= 0; i--) {
1730        r[i] = (u[i] >> shift) | carry;
1731        carry = u[i] << (32 - shift);
1732        DEBUG(dbgs() << " " << r[i]);
1733      }
1734    } else {
1735      for (int i = n-1; i >= 0; i--) {
1736        r[i] = u[i];
1737        DEBUG(dbgs() << " " << r[i]);
1738      }
1739    }
1740    DEBUG(dbgs() << '\n');
1741  }
1742#if 0
1743  DEBUG(dbgs() << '\n');
1744#endif
1745}
1746
1747void APInt::divide(const APInt LHS, unsigned lhsWords,
1748                   const APInt &RHS, unsigned rhsWords,
1749                   APInt *Quotient, APInt *Remainder)
1750{
1751  assert(lhsWords >= rhsWords && "Fractional result");
1752
1753  // First, compose the values into an array of 32-bit words instead of
1754  // 64-bit words. This is a necessity of both the "short division" algorithm
1755  // and the Knuth "classical algorithm" which requires there to be native
1756  // operations for +, -, and * on an m bit value with an m*2 bit result. We
1757  // can't use 64-bit operands here because we don't have native results of
1758  // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1759  // work on large-endian machines.
1760  uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT);
1761  unsigned n = rhsWords * 2;
1762  unsigned m = (lhsWords * 2) - n;
1763
1764  // Allocate space for the temporary values we need either on the stack, if
1765  // it will fit, or on the heap if it won't.
1766  unsigned SPACE[128];
1767  unsigned *U = 0;
1768  unsigned *V = 0;
1769  unsigned *Q = 0;
1770  unsigned *R = 0;
1771  if ((Remainder?4:3)*n+2*m+1 <= 128) {
1772    U = &SPACE[0];
1773    V = &SPACE[m+n+1];
1774    Q = &SPACE[(m+n+1) + n];
1775    if (Remainder)
1776      R = &SPACE[(m+n+1) + n + (m+n)];
1777  } else {
1778    U = new unsigned[m + n + 1];
1779    V = new unsigned[n];
1780    Q = new unsigned[m+n];
1781    if (Remainder)
1782      R = new unsigned[n];
1783  }
1784
1785  // Initialize the dividend
1786  memset(U, 0, (m+n+1)*sizeof(unsigned));
1787  for (unsigned i = 0; i < lhsWords; ++i) {
1788    uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]);
1789    U[i * 2] = (unsigned)(tmp & mask);
1790    U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1791  }
1792  U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1793
1794  // Initialize the divisor
1795  memset(V, 0, (n)*sizeof(unsigned));
1796  for (unsigned i = 0; i < rhsWords; ++i) {
1797    uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]);
1798    V[i * 2] = (unsigned)(tmp & mask);
1799    V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1800  }
1801
1802  // initialize the quotient and remainder
1803  memset(Q, 0, (m+n) * sizeof(unsigned));
1804  if (Remainder)
1805    memset(R, 0, n * sizeof(unsigned));
1806
1807  // Now, adjust m and n for the Knuth division. n is the number of words in
1808  // the divisor. m is the number of words by which the dividend exceeds the
1809  // divisor (i.e. m+n is the length of the dividend). These sizes must not
1810  // contain any zero words or the Knuth algorithm fails.
1811  for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1812    n--;
1813    m++;
1814  }
1815  for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1816    m--;
1817
1818  // If we're left with only a single word for the divisor, Knuth doesn't work
1819  // so we implement the short division algorithm here. This is much simpler
1820  // and faster because we are certain that we can divide a 64-bit quantity
1821  // by a 32-bit quantity at hardware speed and short division is simply a
1822  // series of such operations. This is just like doing short division but we
1823  // are using base 2^32 instead of base 10.
1824  assert(n != 0 && "Divide by zero?");
1825  if (n == 1) {
1826    unsigned divisor = V[0];
1827    unsigned remainder = 0;
1828    for (int i = m+n-1; i >= 0; i--) {
1829      uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i];
1830      if (partial_dividend == 0) {
1831        Q[i] = 0;
1832        remainder = 0;
1833      } else if (partial_dividend < divisor) {
1834        Q[i] = 0;
1835        remainder = (unsigned)partial_dividend;
1836      } else if (partial_dividend == divisor) {
1837        Q[i] = 1;
1838        remainder = 0;
1839      } else {
1840        Q[i] = (unsigned)(partial_dividend / divisor);
1841        remainder = (unsigned)(partial_dividend - (Q[i] * divisor));
1842      }
1843    }
1844    if (R)
1845      R[0] = remainder;
1846  } else {
1847    // Now we're ready to invoke the Knuth classical divide algorithm. In this
1848    // case n > 1.
1849    KnuthDiv(U, V, Q, R, m, n);
1850  }
1851
1852  // If the caller wants the quotient
1853  if (Quotient) {
1854    // Set up the Quotient value's memory.
1855    if (Quotient->BitWidth != LHS.BitWidth) {
1856      if (Quotient->isSingleWord())
1857        Quotient->VAL = 0;
1858      else
1859        delete [] Quotient->pVal;
1860      Quotient->BitWidth = LHS.BitWidth;
1861      if (!Quotient->isSingleWord())
1862        Quotient->pVal = getClearedMemory(Quotient->getNumWords());
1863    } else
1864      Quotient->clearAllBits();
1865
1866    // The quotient is in Q. Reconstitute the quotient into Quotient's low
1867    // order words.
1868    if (lhsWords == 1) {
1869      uint64_t tmp =
1870        uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
1871      if (Quotient->isSingleWord())
1872        Quotient->VAL = tmp;
1873      else
1874        Quotient->pVal[0] = tmp;
1875    } else {
1876      assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
1877      for (unsigned i = 0; i < lhsWords; ++i)
1878        Quotient->pVal[i] =
1879          uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1880    }
1881  }
1882
1883  // If the caller wants the remainder
1884  if (Remainder) {
1885    // Set up the Remainder value's memory.
1886    if (Remainder->BitWidth != RHS.BitWidth) {
1887      if (Remainder->isSingleWord())
1888        Remainder->VAL = 0;
1889      else
1890        delete [] Remainder->pVal;
1891      Remainder->BitWidth = RHS.BitWidth;
1892      if (!Remainder->isSingleWord())
1893        Remainder->pVal = getClearedMemory(Remainder->getNumWords());
1894    } else
1895      Remainder->clearAllBits();
1896
1897    // The remainder is in R. Reconstitute the remainder into Remainder's low
1898    // order words.
1899    if (rhsWords == 1) {
1900      uint64_t tmp =
1901        uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2));
1902      if (Remainder->isSingleWord())
1903        Remainder->VAL = tmp;
1904      else
1905        Remainder->pVal[0] = tmp;
1906    } else {
1907      assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
1908      for (unsigned i = 0; i < rhsWords; ++i)
1909        Remainder->pVal[i] =
1910          uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1911    }
1912  }
1913
1914  // Clean up the memory we allocated.
1915  if (U != &SPACE[0]) {
1916    delete [] U;
1917    delete [] V;
1918    delete [] Q;
1919    delete [] R;
1920  }
1921}
1922
1923APInt APInt::udiv(const APInt& RHS) const {
1924  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1925
1926  // First, deal with the easy case
1927  if (isSingleWord()) {
1928    assert(RHS.VAL != 0 && "Divide by zero?");
1929    return APInt(BitWidth, VAL / RHS.VAL);
1930  }
1931
1932  // Get some facts about the LHS and RHS number of bits and words
1933  unsigned rhsBits = RHS.getActiveBits();
1934  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1935  assert(rhsWords && "Divided by zero???");
1936  unsigned lhsBits = this->getActiveBits();
1937  unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1938
1939  // Deal with some degenerate cases
1940  if (!lhsWords)
1941    // 0 / X ===> 0
1942    return APInt(BitWidth, 0);
1943  else if (lhsWords < rhsWords || this->ult(RHS)) {
1944    // X / Y ===> 0, iff X < Y
1945    return APInt(BitWidth, 0);
1946  } else if (*this == RHS) {
1947    // X / X ===> 1
1948    return APInt(BitWidth, 1);
1949  } else if (lhsWords == 1 && rhsWords == 1) {
1950    // All high words are zero, just use native divide
1951    return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]);
1952  }
1953
1954  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1955  APInt Quotient(1,0); // to hold result.
1956  divide(*this, lhsWords, RHS, rhsWords, &Quotient, 0);
1957  return Quotient;
1958}
1959
1960APInt APInt::urem(const APInt& RHS) const {
1961  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1962  if (isSingleWord()) {
1963    assert(RHS.VAL != 0 && "Remainder by zero?");
1964    return APInt(BitWidth, VAL % RHS.VAL);
1965  }
1966
1967  // Get some facts about the LHS
1968  unsigned lhsBits = getActiveBits();
1969  unsigned lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
1970
1971  // Get some facts about the RHS
1972  unsigned rhsBits = RHS.getActiveBits();
1973  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1974  assert(rhsWords && "Performing remainder operation by zero ???");
1975
1976  // Check the degenerate cases
1977  if (lhsWords == 0) {
1978    // 0 % Y ===> 0
1979    return APInt(BitWidth, 0);
1980  } else if (lhsWords < rhsWords || this->ult(RHS)) {
1981    // X % Y ===> X, iff X < Y
1982    return *this;
1983  } else if (*this == RHS) {
1984    // X % X == 0;
1985    return APInt(BitWidth, 0);
1986  } else if (lhsWords == 1) {
1987    // All high words are zero, just use native remainder
1988    return APInt(BitWidth, pVal[0] % RHS.pVal[0]);
1989  }
1990
1991  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1992  APInt Remainder(1,0);
1993  divide(*this, lhsWords, RHS, rhsWords, 0, &Remainder);
1994  return Remainder;
1995}
1996
1997void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1998                    APInt &Quotient, APInt &Remainder) {
1999  // Get some size facts about the dividend and divisor
2000  unsigned lhsBits  = LHS.getActiveBits();
2001  unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
2002  unsigned rhsBits  = RHS.getActiveBits();
2003  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
2004
2005  // Check the degenerate cases
2006  if (lhsWords == 0) {
2007    Quotient = 0;                // 0 / Y ===> 0
2008    Remainder = 0;               // 0 % Y ===> 0
2009    return;
2010  }
2011
2012  if (lhsWords < rhsWords || LHS.ult(RHS)) {
2013    Remainder = LHS;            // X % Y ===> X, iff X < Y
2014    Quotient = 0;               // X / Y ===> 0, iff X < Y
2015    return;
2016  }
2017
2018  if (LHS == RHS) {
2019    Quotient  = 1;              // X / X ===> 1
2020    Remainder = 0;              // X % X ===> 0;
2021    return;
2022  }
2023
2024  if (lhsWords == 1 && rhsWords == 1) {
2025    // There is only one word to consider so use the native versions.
2026    uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0];
2027    uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0];
2028    Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue);
2029    Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue);
2030    return;
2031  }
2032
2033  // Okay, lets do it the long way
2034  divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
2035}
2036
2037APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
2038  APInt Res = *this+RHS;
2039  Overflow = isNonNegative() == RHS.isNonNegative() &&
2040             Res.isNonNegative() != isNonNegative();
2041  return Res;
2042}
2043
2044APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
2045  APInt Res = *this+RHS;
2046  Overflow = Res.ult(RHS);
2047  return Res;
2048}
2049
2050APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
2051  APInt Res = *this - RHS;
2052  Overflow = isNonNegative() != RHS.isNonNegative() &&
2053             Res.isNonNegative() != isNonNegative();
2054  return Res;
2055}
2056
2057APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
2058  APInt Res = *this-RHS;
2059  Overflow = Res.ugt(*this);
2060  return Res;
2061}
2062
2063APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
2064  // MININT/-1  -->  overflow.
2065  Overflow = isMinSignedValue() && RHS.isAllOnesValue();
2066  return sdiv(RHS);
2067}
2068
2069APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
2070  APInt Res = *this * RHS;
2071
2072  if (*this != 0 && RHS != 0)
2073    Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
2074  else
2075    Overflow = false;
2076  return Res;
2077}
2078
2079APInt APInt::sshl_ov(unsigned ShAmt, bool &Overflow) const {
2080  Overflow = ShAmt >= getBitWidth();
2081  if (Overflow)
2082    ShAmt = getBitWidth()-1;
2083
2084  if (isNonNegative()) // Don't allow sign change.
2085    Overflow = ShAmt >= countLeadingZeros();
2086  else
2087    Overflow = ShAmt >= countLeadingOnes();
2088
2089  return *this << ShAmt;
2090}
2091
2092
2093
2094
2095void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2096  // Check our assumptions here
2097  assert(!str.empty() && "Invalid string length");
2098  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
2099         "Radix should be 2, 8, 10, or 16!");
2100
2101  StringRef::iterator p = str.begin();
2102  size_t slen = str.size();
2103  bool isNeg = *p == '-';
2104  if (*p == '-' || *p == '+') {
2105    p++;
2106    slen--;
2107    assert(slen && "String is only a sign, needs a value.");
2108  }
2109  assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2110  assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2111  assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2112  assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2113         "Insufficient bit width");
2114
2115  // Allocate memory
2116  if (!isSingleWord())
2117    pVal = getClearedMemory(getNumWords());
2118
2119  // Figure out if we can shift instead of multiply
2120  unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2121
2122  // Set up an APInt for the digit to add outside the loop so we don't
2123  // constantly construct/destruct it.
2124  APInt apdigit(getBitWidth(), 0);
2125  APInt apradix(getBitWidth(), radix);
2126
2127  // Enter digit traversal loop
2128  for (StringRef::iterator e = str.end(); p != e; ++p) {
2129    unsigned digit = getDigit(*p, radix);
2130    assert(digit < radix && "Invalid character in digit string");
2131
2132    // Shift or multiply the value by the radix
2133    if (slen > 1) {
2134      if (shift)
2135        *this <<= shift;
2136      else
2137        *this *= apradix;
2138    }
2139
2140    // Add in the digit we just interpreted
2141    if (apdigit.isSingleWord())
2142      apdigit.VAL = digit;
2143    else
2144      apdigit.pVal[0] = digit;
2145    *this += apdigit;
2146  }
2147  // If its negative, put it in two's complement form
2148  if (isNeg) {
2149    (*this)--;
2150    this->flipAllBits();
2151  }
2152}
2153
2154void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2155                     bool Signed) const {
2156  assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2) &&
2157         "Radix should be 2, 8, 10, or 16!");
2158
2159  // First, check for a zero value and just short circuit the logic below.
2160  if (*this == 0) {
2161    Str.push_back('0');
2162    return;
2163  }
2164
2165  static const char Digits[] = "0123456789ABCDEF";
2166
2167  if (isSingleWord()) {
2168    char Buffer[65];
2169    char *BufPtr = Buffer+65;
2170
2171    uint64_t N;
2172    if (!Signed) {
2173      N = getZExtValue();
2174    } else {
2175      int64_t I = getSExtValue();
2176      if (I >= 0) {
2177        N = I;
2178      } else {
2179        Str.push_back('-');
2180        N = -(uint64_t)I;
2181      }
2182    }
2183
2184    while (N) {
2185      *--BufPtr = Digits[N % Radix];
2186      N /= Radix;
2187    }
2188    Str.append(BufPtr, Buffer+65);
2189    return;
2190  }
2191
2192  APInt Tmp(*this);
2193
2194  if (Signed && isNegative()) {
2195    // They want to print the signed version and it is a negative value
2196    // Flip the bits and add one to turn it into the equivalent positive
2197    // value and put a '-' in the result.
2198    Tmp.flipAllBits();
2199    Tmp++;
2200    Str.push_back('-');
2201  }
2202
2203  // We insert the digits backward, then reverse them to get the right order.
2204  unsigned StartDig = Str.size();
2205
2206  // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2207  // because the number of bits per digit (1, 3 and 4 respectively) divides
2208  // equaly.  We just shift until the value is zero.
2209  if (Radix != 10) {
2210    // Just shift tmp right for each digit width until it becomes zero
2211    unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2212    unsigned MaskAmt = Radix - 1;
2213
2214    while (Tmp != 0) {
2215      unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2216      Str.push_back(Digits[Digit]);
2217      Tmp = Tmp.lshr(ShiftAmt);
2218    }
2219  } else {
2220    APInt divisor(4, 10);
2221    while (Tmp != 0) {
2222      APInt APdigit(1, 0);
2223      APInt tmp2(Tmp.getBitWidth(), 0);
2224      divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2,
2225             &APdigit);
2226      unsigned Digit = (unsigned)APdigit.getZExtValue();
2227      assert(Digit < Radix && "divide failed");
2228      Str.push_back(Digits[Digit]);
2229      Tmp = tmp2;
2230    }
2231  }
2232
2233  // Reverse the digits before returning.
2234  std::reverse(Str.begin()+StartDig, Str.end());
2235}
2236
2237/// toString - This returns the APInt as a std::string.  Note that this is an
2238/// inefficient method.  It is better to pass in a SmallVector/SmallString
2239/// to the methods above.
2240std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2241  SmallString<40> S;
2242  toString(S, Radix, Signed);
2243  return S.str();
2244}
2245
2246
2247void APInt::dump() const {
2248  SmallString<40> S, U;
2249  this->toStringUnsigned(U);
2250  this->toStringSigned(S);
2251  dbgs() << "APInt(" << BitWidth << "b, "
2252         << U.str() << "u " << S.str() << "s)";
2253}
2254
2255void APInt::print(raw_ostream &OS, bool isSigned) const {
2256  SmallString<40> S;
2257  this->toString(S, 10, isSigned);
2258  OS << S.str();
2259}
2260
2261// This implements a variety of operations on a representation of
2262// arbitrary precision, two's-complement, bignum integer values.
2263
2264// Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2265// and unrestricting assumption.
2266#define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
2267COMPILE_TIME_ASSERT(integerPartWidth % 2 == 0);
2268
2269/* Some handy functions local to this file.  */
2270namespace {
2271
2272  /* Returns the integer part with the least significant BITS set.
2273     BITS cannot be zero.  */
2274  static inline integerPart
2275  lowBitMask(unsigned int bits)
2276  {
2277    assert(bits != 0 && bits <= integerPartWidth);
2278
2279    return ~(integerPart) 0 >> (integerPartWidth - bits);
2280  }
2281
2282  /* Returns the value of the lower half of PART.  */
2283  static inline integerPart
2284  lowHalf(integerPart part)
2285  {
2286    return part & lowBitMask(integerPartWidth / 2);
2287  }
2288
2289  /* Returns the value of the upper half of PART.  */
2290  static inline integerPart
2291  highHalf(integerPart part)
2292  {
2293    return part >> (integerPartWidth / 2);
2294  }
2295
2296  /* Returns the bit number of the most significant set bit of a part.
2297     If the input number has no bits set -1U is returned.  */
2298  static unsigned int
2299  partMSB(integerPart value)
2300  {
2301    unsigned int n, msb;
2302
2303    if (value == 0)
2304      return -1U;
2305
2306    n = integerPartWidth / 2;
2307
2308    msb = 0;
2309    do {
2310      if (value >> n) {
2311        value >>= n;
2312        msb += n;
2313      }
2314
2315      n >>= 1;
2316    } while (n);
2317
2318    return msb;
2319  }
2320
2321  /* Returns the bit number of the least significant set bit of a
2322     part.  If the input number has no bits set -1U is returned.  */
2323  static unsigned int
2324  partLSB(integerPart value)
2325  {
2326    unsigned int n, lsb;
2327
2328    if (value == 0)
2329      return -1U;
2330
2331    lsb = integerPartWidth - 1;
2332    n = integerPartWidth / 2;
2333
2334    do {
2335      if (value << n) {
2336        value <<= n;
2337        lsb -= n;
2338      }
2339
2340      n >>= 1;
2341    } while (n);
2342
2343    return lsb;
2344  }
2345}
2346
2347/* Sets the least significant part of a bignum to the input value, and
2348   zeroes out higher parts.  */
2349void
2350APInt::tcSet(integerPart *dst, integerPart part, unsigned int parts)
2351{
2352  unsigned int i;
2353
2354  assert(parts > 0);
2355
2356  dst[0] = part;
2357  for (i = 1; i < parts; i++)
2358    dst[i] = 0;
2359}
2360
2361/* Assign one bignum to another.  */
2362void
2363APInt::tcAssign(integerPart *dst, const integerPart *src, unsigned int parts)
2364{
2365  unsigned int i;
2366
2367  for (i = 0; i < parts; i++)
2368    dst[i] = src[i];
2369}
2370
2371/* Returns true if a bignum is zero, false otherwise.  */
2372bool
2373APInt::tcIsZero(const integerPart *src, unsigned int parts)
2374{
2375  unsigned int i;
2376
2377  for (i = 0; i < parts; i++)
2378    if (src[i])
2379      return false;
2380
2381  return true;
2382}
2383
2384/* Extract the given bit of a bignum; returns 0 or 1.  */
2385int
2386APInt::tcExtractBit(const integerPart *parts, unsigned int bit)
2387{
2388  return (parts[bit / integerPartWidth] &
2389          ((integerPart) 1 << bit % integerPartWidth)) != 0;
2390}
2391
2392/* Set the given bit of a bignum. */
2393void
2394APInt::tcSetBit(integerPart *parts, unsigned int bit)
2395{
2396  parts[bit / integerPartWidth] |= (integerPart) 1 << (bit % integerPartWidth);
2397}
2398
2399/* Clears the given bit of a bignum. */
2400void
2401APInt::tcClearBit(integerPart *parts, unsigned int bit)
2402{
2403  parts[bit / integerPartWidth] &=
2404    ~((integerPart) 1 << (bit % integerPartWidth));
2405}
2406
2407/* Returns the bit number of the least significant set bit of a
2408   number.  If the input number has no bits set -1U is returned.  */
2409unsigned int
2410APInt::tcLSB(const integerPart *parts, unsigned int n)
2411{
2412  unsigned int i, lsb;
2413
2414  for (i = 0; i < n; i++) {
2415      if (parts[i] != 0) {
2416          lsb = partLSB(parts[i]);
2417
2418          return lsb + i * integerPartWidth;
2419      }
2420  }
2421
2422  return -1U;
2423}
2424
2425/* Returns the bit number of the most significant set bit of a number.
2426   If the input number has no bits set -1U is returned.  */
2427unsigned int
2428APInt::tcMSB(const integerPart *parts, unsigned int n)
2429{
2430  unsigned int msb;
2431
2432  do {
2433    --n;
2434
2435    if (parts[n] != 0) {
2436      msb = partMSB(parts[n]);
2437
2438      return msb + n * integerPartWidth;
2439    }
2440  } while (n);
2441
2442  return -1U;
2443}
2444
2445/* Copy the bit vector of width srcBITS from SRC, starting at bit
2446   srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2447   the least significant bit of DST.  All high bits above srcBITS in
2448   DST are zero-filled.  */
2449void
2450APInt::tcExtract(integerPart *dst, unsigned int dstCount,const integerPart *src,
2451                 unsigned int srcBits, unsigned int srcLSB)
2452{
2453  unsigned int firstSrcPart, dstParts, shift, n;
2454
2455  dstParts = (srcBits + integerPartWidth - 1) / integerPartWidth;
2456  assert(dstParts <= dstCount);
2457
2458  firstSrcPart = srcLSB / integerPartWidth;
2459  tcAssign (dst, src + firstSrcPart, dstParts);
2460
2461  shift = srcLSB % integerPartWidth;
2462  tcShiftRight (dst, dstParts, shift);
2463
2464  /* We now have (dstParts * integerPartWidth - shift) bits from SRC
2465     in DST.  If this is less that srcBits, append the rest, else
2466     clear the high bits.  */
2467  n = dstParts * integerPartWidth - shift;
2468  if (n < srcBits) {
2469    integerPart mask = lowBitMask (srcBits - n);
2470    dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2471                          << n % integerPartWidth);
2472  } else if (n > srcBits) {
2473    if (srcBits % integerPartWidth)
2474      dst[dstParts - 1] &= lowBitMask (srcBits % integerPartWidth);
2475  }
2476
2477  /* Clear high parts.  */
2478  while (dstParts < dstCount)
2479    dst[dstParts++] = 0;
2480}
2481
2482/* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2483integerPart
2484APInt::tcAdd(integerPart *dst, const integerPart *rhs,
2485             integerPart c, unsigned int parts)
2486{
2487  unsigned int i;
2488
2489  assert(c <= 1);
2490
2491  for (i = 0; i < parts; i++) {
2492    integerPart l;
2493
2494    l = dst[i];
2495    if (c) {
2496      dst[i] += rhs[i] + 1;
2497      c = (dst[i] <= l);
2498    } else {
2499      dst[i] += rhs[i];
2500      c = (dst[i] < l);
2501    }
2502  }
2503
2504  return c;
2505}
2506
2507/* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2508integerPart
2509APInt::tcSubtract(integerPart *dst, const integerPart *rhs,
2510                  integerPart c, unsigned int parts)
2511{
2512  unsigned int i;
2513
2514  assert(c <= 1);
2515
2516  for (i = 0; i < parts; i++) {
2517    integerPart l;
2518
2519    l = dst[i];
2520    if (c) {
2521      dst[i] -= rhs[i] + 1;
2522      c = (dst[i] >= l);
2523    } else {
2524      dst[i] -= rhs[i];
2525      c = (dst[i] > l);
2526    }
2527  }
2528
2529  return c;
2530}
2531
2532/* Negate a bignum in-place.  */
2533void
2534APInt::tcNegate(integerPart *dst, unsigned int parts)
2535{
2536  tcComplement(dst, parts);
2537  tcIncrement(dst, parts);
2538}
2539
2540/*  DST += SRC * MULTIPLIER + CARRY   if add is true
2541    DST  = SRC * MULTIPLIER + CARRY   if add is false
2542
2543    Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2544    they must start at the same point, i.e. DST == SRC.
2545
2546    If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2547    returned.  Otherwise DST is filled with the least significant
2548    DSTPARTS parts of the result, and if all of the omitted higher
2549    parts were zero return zero, otherwise overflow occurred and
2550    return one.  */
2551int
2552APInt::tcMultiplyPart(integerPart *dst, const integerPart *src,
2553                      integerPart multiplier, integerPart carry,
2554                      unsigned int srcParts, unsigned int dstParts,
2555                      bool add)
2556{
2557  unsigned int i, n;
2558
2559  /* Otherwise our writes of DST kill our later reads of SRC.  */
2560  assert(dst <= src || dst >= src + srcParts);
2561  assert(dstParts <= srcParts + 1);
2562
2563  /* N loops; minimum of dstParts and srcParts.  */
2564  n = dstParts < srcParts ? dstParts: srcParts;
2565
2566  for (i = 0; i < n; i++) {
2567    integerPart low, mid, high, srcPart;
2568
2569      /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2570
2571         This cannot overflow, because
2572
2573         (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2574
2575         which is less than n^2.  */
2576
2577    srcPart = src[i];
2578
2579    if (multiplier == 0 || srcPart == 0)        {
2580      low = carry;
2581      high = 0;
2582    } else {
2583      low = lowHalf(srcPart) * lowHalf(multiplier);
2584      high = highHalf(srcPart) * highHalf(multiplier);
2585
2586      mid = lowHalf(srcPart) * highHalf(multiplier);
2587      high += highHalf(mid);
2588      mid <<= integerPartWidth / 2;
2589      if (low + mid < low)
2590        high++;
2591      low += mid;
2592
2593      mid = highHalf(srcPart) * lowHalf(multiplier);
2594      high += highHalf(mid);
2595      mid <<= integerPartWidth / 2;
2596      if (low + mid < low)
2597        high++;
2598      low += mid;
2599
2600      /* Now add carry.  */
2601      if (low + carry < low)
2602        high++;
2603      low += carry;
2604    }
2605
2606    if (add) {
2607      /* And now DST[i], and store the new low part there.  */
2608      if (low + dst[i] < low)
2609        high++;
2610      dst[i] += low;
2611    } else
2612      dst[i] = low;
2613
2614    carry = high;
2615  }
2616
2617  if (i < dstParts) {
2618    /* Full multiplication, there is no overflow.  */
2619    assert(i + 1 == dstParts);
2620    dst[i] = carry;
2621    return 0;
2622  } else {
2623    /* We overflowed if there is carry.  */
2624    if (carry)
2625      return 1;
2626
2627    /* We would overflow if any significant unwritten parts would be
2628       non-zero.  This is true if any remaining src parts are non-zero
2629       and the multiplier is non-zero.  */
2630    if (multiplier)
2631      for (; i < srcParts; i++)
2632        if (src[i])
2633          return 1;
2634
2635    /* We fitted in the narrow destination.  */
2636    return 0;
2637  }
2638}
2639
2640/* DST = LHS * RHS, where DST has the same width as the operands and
2641   is filled with the least significant parts of the result.  Returns
2642   one if overflow occurred, otherwise zero.  DST must be disjoint
2643   from both operands.  */
2644int
2645APInt::tcMultiply(integerPart *dst, const integerPart *lhs,
2646                  const integerPart *rhs, unsigned int parts)
2647{
2648  unsigned int i;
2649  int overflow;
2650
2651  assert(dst != lhs && dst != rhs);
2652
2653  overflow = 0;
2654  tcSet(dst, 0, parts);
2655
2656  for (i = 0; i < parts; i++)
2657    overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2658                               parts - i, true);
2659
2660  return overflow;
2661}
2662
2663/* DST = LHS * RHS, where DST has width the sum of the widths of the
2664   operands.  No overflow occurs.  DST must be disjoint from both
2665   operands.  Returns the number of parts required to hold the
2666   result.  */
2667unsigned int
2668APInt::tcFullMultiply(integerPart *dst, const integerPart *lhs,
2669                      const integerPart *rhs, unsigned int lhsParts,
2670                      unsigned int rhsParts)
2671{
2672  /* Put the narrower number on the LHS for less loops below.  */
2673  if (lhsParts > rhsParts) {
2674    return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2675  } else {
2676    unsigned int n;
2677
2678    assert(dst != lhs && dst != rhs);
2679
2680    tcSet(dst, 0, rhsParts);
2681
2682    for (n = 0; n < lhsParts; n++)
2683      tcMultiplyPart(&dst[n], rhs, lhs[n], 0, rhsParts, rhsParts + 1, true);
2684
2685    n = lhsParts + rhsParts;
2686
2687    return n - (dst[n - 1] == 0);
2688  }
2689}
2690
2691/* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2692   Otherwise set LHS to LHS / RHS with the fractional part discarded,
2693   set REMAINDER to the remainder, return zero.  i.e.
2694
2695   OLD_LHS = RHS * LHS + REMAINDER
2696
2697   SCRATCH is a bignum of the same size as the operands and result for
2698   use by the routine; its contents need not be initialized and are
2699   destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2700*/
2701int
2702APInt::tcDivide(integerPart *lhs, const integerPart *rhs,
2703                integerPart *remainder, integerPart *srhs,
2704                unsigned int parts)
2705{
2706  unsigned int n, shiftCount;
2707  integerPart mask;
2708
2709  assert(lhs != remainder && lhs != srhs && remainder != srhs);
2710
2711  shiftCount = tcMSB(rhs, parts) + 1;
2712  if (shiftCount == 0)
2713    return true;
2714
2715  shiftCount = parts * integerPartWidth - shiftCount;
2716  n = shiftCount / integerPartWidth;
2717  mask = (integerPart) 1 << (shiftCount % integerPartWidth);
2718
2719  tcAssign(srhs, rhs, parts);
2720  tcShiftLeft(srhs, parts, shiftCount);
2721  tcAssign(remainder, lhs, parts);
2722  tcSet(lhs, 0, parts);
2723
2724  /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2725     the total.  */
2726  for (;;) {
2727      int compare;
2728
2729      compare = tcCompare(remainder, srhs, parts);
2730      if (compare >= 0) {
2731        tcSubtract(remainder, srhs, 0, parts);
2732        lhs[n] |= mask;
2733      }
2734
2735      if (shiftCount == 0)
2736        break;
2737      shiftCount--;
2738      tcShiftRight(srhs, parts, 1);
2739      if ((mask >>= 1) == 0)
2740        mask = (integerPart) 1 << (integerPartWidth - 1), n--;
2741  }
2742
2743  return false;
2744}
2745
2746/* Shift a bignum left COUNT bits in-place.  Shifted in bits are zero.
2747   There are no restrictions on COUNT.  */
2748void
2749APInt::tcShiftLeft(integerPart *dst, unsigned int parts, unsigned int count)
2750{
2751  if (count) {
2752    unsigned int jump, shift;
2753
2754    /* Jump is the inter-part jump; shift is is intra-part shift.  */
2755    jump = count / integerPartWidth;
2756    shift = count % integerPartWidth;
2757
2758    while (parts > jump) {
2759      integerPart part;
2760
2761      parts--;
2762
2763      /* dst[i] comes from the two parts src[i - jump] and, if we have
2764         an intra-part shift, src[i - jump - 1].  */
2765      part = dst[parts - jump];
2766      if (shift) {
2767        part <<= shift;
2768        if (parts >= jump + 1)
2769          part |= dst[parts - jump - 1] >> (integerPartWidth - shift);
2770      }
2771
2772      dst[parts] = part;
2773    }
2774
2775    while (parts > 0)
2776      dst[--parts] = 0;
2777  }
2778}
2779
2780/* Shift a bignum right COUNT bits in-place.  Shifted in bits are
2781   zero.  There are no restrictions on COUNT.  */
2782void
2783APInt::tcShiftRight(integerPart *dst, unsigned int parts, unsigned int count)
2784{
2785  if (count) {
2786    unsigned int i, jump, shift;
2787
2788    /* Jump is the inter-part jump; shift is is intra-part shift.  */
2789    jump = count / integerPartWidth;
2790    shift = count % integerPartWidth;
2791
2792    /* Perform the shift.  This leaves the most significant COUNT bits
2793       of the result at zero.  */
2794    for (i = 0; i < parts; i++) {
2795      integerPart part;
2796
2797      if (i + jump >= parts) {
2798        part = 0;
2799      } else {
2800        part = dst[i + jump];
2801        if (shift) {
2802          part >>= shift;
2803          if (i + jump + 1 < parts)
2804            part |= dst[i + jump + 1] << (integerPartWidth - shift);
2805        }
2806      }
2807
2808      dst[i] = part;
2809    }
2810  }
2811}
2812
2813/* Bitwise and of two bignums.  */
2814void
2815APInt::tcAnd(integerPart *dst, const integerPart *rhs, unsigned int parts)
2816{
2817  unsigned int i;
2818
2819  for (i = 0; i < parts; i++)
2820    dst[i] &= rhs[i];
2821}
2822
2823/* Bitwise inclusive or of two bignums.  */
2824void
2825APInt::tcOr(integerPart *dst, const integerPart *rhs, unsigned int parts)
2826{
2827  unsigned int i;
2828
2829  for (i = 0; i < parts; i++)
2830    dst[i] |= rhs[i];
2831}
2832
2833/* Bitwise exclusive or of two bignums.  */
2834void
2835APInt::tcXor(integerPart *dst, const integerPart *rhs, unsigned int parts)
2836{
2837  unsigned int i;
2838
2839  for (i = 0; i < parts; i++)
2840    dst[i] ^= rhs[i];
2841}
2842
2843/* Complement a bignum in-place.  */
2844void
2845APInt::tcComplement(integerPart *dst, unsigned int parts)
2846{
2847  unsigned int i;
2848
2849  for (i = 0; i < parts; i++)
2850    dst[i] = ~dst[i];
2851}
2852
2853/* Comparison (unsigned) of two bignums.  */
2854int
2855APInt::tcCompare(const integerPart *lhs, const integerPart *rhs,
2856                 unsigned int parts)
2857{
2858  while (parts) {
2859      parts--;
2860      if (lhs[parts] == rhs[parts])
2861        continue;
2862
2863      if (lhs[parts] > rhs[parts])
2864        return 1;
2865      else
2866        return -1;
2867    }
2868
2869  return 0;
2870}
2871
2872/* Increment a bignum in-place, return the carry flag.  */
2873integerPart
2874APInt::tcIncrement(integerPart *dst, unsigned int parts)
2875{
2876  unsigned int i;
2877
2878  for (i = 0; i < parts; i++)
2879    if (++dst[i] != 0)
2880      break;
2881
2882  return i == parts;
2883}
2884
2885/* Set the least significant BITS bits of a bignum, clear the
2886   rest.  */
2887void
2888APInt::tcSetLeastSignificantBits(integerPart *dst, unsigned int parts,
2889                                 unsigned int bits)
2890{
2891  unsigned int i;
2892
2893  i = 0;
2894  while (bits > integerPartWidth) {
2895    dst[i++] = ~(integerPart) 0;
2896    bits -= integerPartWidth;
2897  }
2898
2899  if (bits)
2900    dst[i++] = ~(integerPart) 0 >> (integerPartWidth - bits);
2901
2902  while (i < parts)
2903    dst[i++] = 0;
2904}
2905