APInt.cpp revision 201360
1//===-- APInt.cpp - Implement APInt class ---------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements a class to represent arbitrary precision integer
11// constant values and provide a variety of arithmetic operations on them.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "apint"
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/StringRef.h"
18#include "llvm/ADT/FoldingSet.h"
19#include "llvm/ADT/SmallString.h"
20#include "llvm/Support/Debug.h"
21#include "llvm/Support/ErrorHandling.h"
22#include "llvm/Support/MathExtras.h"
23#include "llvm/Support/raw_ostream.h"
24#include <cmath>
25#include <limits>
26#include <cstring>
27#include <cstdlib>
28using namespace llvm;
29
30/// A utility function for allocating memory, checking for allocation failures,
31/// and ensuring the contents are zeroed.
32inline static uint64_t* getClearedMemory(unsigned numWords) {
33  uint64_t * result = new uint64_t[numWords];
34  assert(result && "APInt memory allocation fails!");
35  memset(result, 0, numWords * sizeof(uint64_t));
36  return result;
37}
38
39/// A utility function for allocating memory and checking for allocation
40/// failure.  The content is not zeroed.
41inline static uint64_t* getMemory(unsigned numWords) {
42  uint64_t * result = new uint64_t[numWords];
43  assert(result && "APInt memory allocation fails!");
44  return result;
45}
46
47/// A utility function that converts a character to a digit.
48inline static unsigned getDigit(char cdigit, uint8_t radix) {
49  unsigned r;
50
51  if (radix == 16) {
52    r = cdigit - '0';
53    if (r <= 9)
54      return r;
55
56    r = cdigit - 'A';
57    if (r <= 5)
58      return r + 10;
59
60    r = cdigit - 'a';
61    if (r <= 5)
62      return r + 10;
63  }
64
65  r = cdigit - '0';
66  if (r < radix)
67    return r;
68
69  return -1U;
70}
71
72
73void APInt::initSlowCase(unsigned numBits, uint64_t val, bool isSigned) {
74  pVal = getClearedMemory(getNumWords());
75  pVal[0] = val;
76  if (isSigned && int64_t(val) < 0)
77    for (unsigned i = 1; i < getNumWords(); ++i)
78      pVal[i] = -1ULL;
79}
80
81void APInt::initSlowCase(const APInt& that) {
82  pVal = getMemory(getNumWords());
83  memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE);
84}
85
86
87APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
88  : BitWidth(numBits), VAL(0) {
89  assert(BitWidth && "Bitwidth too small");
90  assert(bigVal && "Null pointer detected!");
91  if (isSingleWord())
92    VAL = bigVal[0];
93  else {
94    // Get memory, cleared to 0
95    pVal = getClearedMemory(getNumWords());
96    // Calculate the number of words to copy
97    unsigned words = std::min<unsigned>(numWords, getNumWords());
98    // Copy the words from bigVal to pVal
99    memcpy(pVal, bigVal, words * APINT_WORD_SIZE);
100  }
101  // Make sure unused high bits are cleared
102  clearUnusedBits();
103}
104
105APInt::APInt(unsigned numbits, const StringRef& Str, uint8_t radix)
106  : BitWidth(numbits), VAL(0) {
107  assert(BitWidth && "Bitwidth too small");
108  fromString(numbits, Str, radix);
109}
110
111APInt& APInt::AssignSlowCase(const APInt& RHS) {
112  // Don't do anything for X = X
113  if (this == &RHS)
114    return *this;
115
116  if (BitWidth == RHS.getBitWidth()) {
117    // assume same bit-width single-word case is already handled
118    assert(!isSingleWord());
119    memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE);
120    return *this;
121  }
122
123  if (isSingleWord()) {
124    // assume case where both are single words is already handled
125    assert(!RHS.isSingleWord());
126    VAL = 0;
127    pVal = getMemory(RHS.getNumWords());
128    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
129  } else if (getNumWords() == RHS.getNumWords())
130    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
131  else if (RHS.isSingleWord()) {
132    delete [] pVal;
133    VAL = RHS.VAL;
134  } else {
135    delete [] pVal;
136    pVal = getMemory(RHS.getNumWords());
137    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
138  }
139  BitWidth = RHS.BitWidth;
140  return clearUnusedBits();
141}
142
143APInt& APInt::operator=(uint64_t RHS) {
144  if (isSingleWord())
145    VAL = RHS;
146  else {
147    pVal[0] = RHS;
148    memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
149  }
150  return clearUnusedBits();
151}
152
153/// Profile - This method 'profiles' an APInt for use with FoldingSet.
154void APInt::Profile(FoldingSetNodeID& ID) const {
155  ID.AddInteger(BitWidth);
156
157  if (isSingleWord()) {
158    ID.AddInteger(VAL);
159    return;
160  }
161
162  unsigned NumWords = getNumWords();
163  for (unsigned i = 0; i < NumWords; ++i)
164    ID.AddInteger(pVal[i]);
165}
166
167/// add_1 - This function adds a single "digit" integer, y, to the multiple
168/// "digit" integer array,  x[]. x[] is modified to reflect the addition and
169/// 1 is returned if there is a carry out, otherwise 0 is returned.
170/// @returns the carry of the addition.
171static bool add_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
172  for (unsigned i = 0; i < len; ++i) {
173    dest[i] = y + x[i];
174    if (dest[i] < y)
175      y = 1; // Carry one to next digit.
176    else {
177      y = 0; // No need to carry so exit early
178      break;
179    }
180  }
181  return y;
182}
183
184/// @brief Prefix increment operator. Increments the APInt by one.
185APInt& APInt::operator++() {
186  if (isSingleWord())
187    ++VAL;
188  else
189    add_1(pVal, pVal, getNumWords(), 1);
190  return clearUnusedBits();
191}
192
193/// sub_1 - This function subtracts a single "digit" (64-bit word), y, from
194/// the multi-digit integer array, x[], propagating the borrowed 1 value until
195/// no further borrowing is neeeded or it runs out of "digits" in x.  The result
196/// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
197/// In other words, if y > x then this function returns 1, otherwise 0.
198/// @returns the borrow out of the subtraction
199static bool sub_1(uint64_t x[], unsigned len, uint64_t y) {
200  for (unsigned i = 0; i < len; ++i) {
201    uint64_t X = x[i];
202    x[i] -= y;
203    if (y > X)
204      y = 1;  // We have to "borrow 1" from next "digit"
205    else {
206      y = 0;  // No need to borrow
207      break;  // Remaining digits are unchanged so exit early
208    }
209  }
210  return bool(y);
211}
212
213/// @brief Prefix decrement operator. Decrements the APInt by one.
214APInt& APInt::operator--() {
215  if (isSingleWord())
216    --VAL;
217  else
218    sub_1(pVal, getNumWords(), 1);
219  return clearUnusedBits();
220}
221
222/// add - This function adds the integer array x to the integer array Y and
223/// places the result in dest.
224/// @returns the carry out from the addition
225/// @brief General addition of 64-bit integer arrays
226static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y,
227                unsigned len) {
228  bool carry = false;
229  for (unsigned i = 0; i< len; ++i) {
230    uint64_t limit = std::min(x[i],y[i]); // must come first in case dest == x
231    dest[i] = x[i] + y[i] + carry;
232    carry = dest[i] < limit || (carry && dest[i] == limit);
233  }
234  return carry;
235}
236
237/// Adds the RHS APint to this APInt.
238/// @returns this, after addition of RHS.
239/// @brief Addition assignment operator.
240APInt& APInt::operator+=(const APInt& RHS) {
241  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
242  if (isSingleWord())
243    VAL += RHS.VAL;
244  else {
245    add(pVal, pVal, RHS.pVal, getNumWords());
246  }
247  return clearUnusedBits();
248}
249
250/// Subtracts the integer array y from the integer array x
251/// @returns returns the borrow out.
252/// @brief Generalized subtraction of 64-bit integer arrays.
253static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y,
254                unsigned len) {
255  bool borrow = false;
256  for (unsigned i = 0; i < len; ++i) {
257    uint64_t x_tmp = borrow ? x[i] - 1 : x[i];
258    borrow = y[i] > x_tmp || (borrow && x[i] == 0);
259    dest[i] = x_tmp - y[i];
260  }
261  return borrow;
262}
263
264/// Subtracts the RHS APInt from this APInt
265/// @returns this, after subtraction
266/// @brief Subtraction assignment operator.
267APInt& APInt::operator-=(const APInt& RHS) {
268  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
269  if (isSingleWord())
270    VAL -= RHS.VAL;
271  else
272    sub(pVal, pVal, RHS.pVal, getNumWords());
273  return clearUnusedBits();
274}
275
276/// Multiplies an integer array, x by a a uint64_t integer and places the result
277/// into dest.
278/// @returns the carry out of the multiplication.
279/// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer.
280static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
281  // Split y into high 32-bit part (hy)  and low 32-bit part (ly)
282  uint64_t ly = y & 0xffffffffULL, hy = y >> 32;
283  uint64_t carry = 0;
284
285  // For each digit of x.
286  for (unsigned i = 0; i < len; ++i) {
287    // Split x into high and low words
288    uint64_t lx = x[i] & 0xffffffffULL;
289    uint64_t hx = x[i] >> 32;
290    // hasCarry - A flag to indicate if there is a carry to the next digit.
291    // hasCarry == 0, no carry
292    // hasCarry == 1, has carry
293    // hasCarry == 2, no carry and the calculation result == 0.
294    uint8_t hasCarry = 0;
295    dest[i] = carry + lx * ly;
296    // Determine if the add above introduces carry.
297    hasCarry = (dest[i] < carry) ? 1 : 0;
298    carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0);
299    // The upper limit of carry can be (2^32 - 1)(2^32 - 1) +
300    // (2^32 - 1) + 2^32 = 2^64.
301    hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
302
303    carry += (lx * hy) & 0xffffffffULL;
304    dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL);
305    carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) +
306            (carry >> 32) + ((lx * hy) >> 32) + hx * hy;
307  }
308  return carry;
309}
310
311/// Multiplies integer array x by integer array y and stores the result into
312/// the integer array dest. Note that dest's size must be >= xlen + ylen.
313/// @brief Generalized multiplicate of integer arrays.
314static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[],
315                unsigned ylen) {
316  dest[xlen] = mul_1(dest, x, xlen, y[0]);
317  for (unsigned i = 1; i < ylen; ++i) {
318    uint64_t ly = y[i] & 0xffffffffULL, hy = y[i] >> 32;
319    uint64_t carry = 0, lx = 0, hx = 0;
320    for (unsigned j = 0; j < xlen; ++j) {
321      lx = x[j] & 0xffffffffULL;
322      hx = x[j] >> 32;
323      // hasCarry - A flag to indicate if has carry.
324      // hasCarry == 0, no carry
325      // hasCarry == 1, has carry
326      // hasCarry == 2, no carry and the calculation result == 0.
327      uint8_t hasCarry = 0;
328      uint64_t resul = carry + lx * ly;
329      hasCarry = (resul < carry) ? 1 : 0;
330      carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + (resul >> 32);
331      hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
332
333      carry += (lx * hy) & 0xffffffffULL;
334      resul = (carry << 32) | (resul & 0xffffffffULL);
335      dest[i+j] += resul;
336      carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+
337              (carry >> 32) + (dest[i+j] < resul ? 1 : 0) +
338              ((lx * hy) >> 32) + hx * hy;
339    }
340    dest[i+xlen] = carry;
341  }
342}
343
344APInt& APInt::operator*=(const APInt& RHS) {
345  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
346  if (isSingleWord()) {
347    VAL *= RHS.VAL;
348    clearUnusedBits();
349    return *this;
350  }
351
352  // Get some bit facts about LHS and check for zero
353  unsigned lhsBits = getActiveBits();
354  unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
355  if (!lhsWords)
356    // 0 * X ===> 0
357    return *this;
358
359  // Get some bit facts about RHS and check for zero
360  unsigned rhsBits = RHS.getActiveBits();
361  unsigned rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
362  if (!rhsWords) {
363    // X * 0 ===> 0
364    clear();
365    return *this;
366  }
367
368  // Allocate space for the result
369  unsigned destWords = rhsWords + lhsWords;
370  uint64_t *dest = getMemory(destWords);
371
372  // Perform the long multiply
373  mul(dest, pVal, lhsWords, RHS.pVal, rhsWords);
374
375  // Copy result back into *this
376  clear();
377  unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords;
378  memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE);
379
380  // delete dest array and return
381  delete[] dest;
382  return *this;
383}
384
385APInt& APInt::operator&=(const APInt& RHS) {
386  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
387  if (isSingleWord()) {
388    VAL &= RHS.VAL;
389    return *this;
390  }
391  unsigned numWords = getNumWords();
392  for (unsigned i = 0; i < numWords; ++i)
393    pVal[i] &= RHS.pVal[i];
394  return *this;
395}
396
397APInt& APInt::operator|=(const APInt& RHS) {
398  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
399  if (isSingleWord()) {
400    VAL |= RHS.VAL;
401    return *this;
402  }
403  unsigned numWords = getNumWords();
404  for (unsigned i = 0; i < numWords; ++i)
405    pVal[i] |= RHS.pVal[i];
406  return *this;
407}
408
409APInt& APInt::operator^=(const APInt& RHS) {
410  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
411  if (isSingleWord()) {
412    VAL ^= RHS.VAL;
413    this->clearUnusedBits();
414    return *this;
415  }
416  unsigned numWords = getNumWords();
417  for (unsigned i = 0; i < numWords; ++i)
418    pVal[i] ^= RHS.pVal[i];
419  return clearUnusedBits();
420}
421
422APInt APInt::AndSlowCase(const APInt& RHS) const {
423  unsigned numWords = getNumWords();
424  uint64_t* val = getMemory(numWords);
425  for (unsigned i = 0; i < numWords; ++i)
426    val[i] = pVal[i] & RHS.pVal[i];
427  return APInt(val, getBitWidth());
428}
429
430APInt APInt::OrSlowCase(const APInt& RHS) const {
431  unsigned numWords = getNumWords();
432  uint64_t *val = getMemory(numWords);
433  for (unsigned i = 0; i < numWords; ++i)
434    val[i] = pVal[i] | RHS.pVal[i];
435  return APInt(val, getBitWidth());
436}
437
438APInt APInt::XorSlowCase(const APInt& RHS) const {
439  unsigned numWords = getNumWords();
440  uint64_t *val = getMemory(numWords);
441  for (unsigned i = 0; i < numWords; ++i)
442    val[i] = pVal[i] ^ RHS.pVal[i];
443
444  // 0^0==1 so clear the high bits in case they got set.
445  return APInt(val, getBitWidth()).clearUnusedBits();
446}
447
448bool APInt::operator !() const {
449  if (isSingleWord())
450    return !VAL;
451
452  for (unsigned i = 0; i < getNumWords(); ++i)
453    if (pVal[i])
454      return false;
455  return true;
456}
457
458APInt APInt::operator*(const APInt& RHS) const {
459  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
460  if (isSingleWord())
461    return APInt(BitWidth, VAL * RHS.VAL);
462  APInt Result(*this);
463  Result *= RHS;
464  return Result.clearUnusedBits();
465}
466
467APInt APInt::operator+(const APInt& RHS) const {
468  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
469  if (isSingleWord())
470    return APInt(BitWidth, VAL + RHS.VAL);
471  APInt Result(BitWidth, 0);
472  add(Result.pVal, this->pVal, RHS.pVal, getNumWords());
473  return Result.clearUnusedBits();
474}
475
476APInt APInt::operator-(const APInt& RHS) const {
477  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
478  if (isSingleWord())
479    return APInt(BitWidth, VAL - RHS.VAL);
480  APInt Result(BitWidth, 0);
481  sub(Result.pVal, this->pVal, RHS.pVal, getNumWords());
482  return Result.clearUnusedBits();
483}
484
485bool APInt::operator[](unsigned bitPosition) const {
486  return (maskBit(bitPosition) &
487          (isSingleWord() ?  VAL : pVal[whichWord(bitPosition)])) != 0;
488}
489
490bool APInt::EqualSlowCase(const APInt& RHS) const {
491  // Get some facts about the number of bits used in the two operands.
492  unsigned n1 = getActiveBits();
493  unsigned n2 = RHS.getActiveBits();
494
495  // If the number of bits isn't the same, they aren't equal
496  if (n1 != n2)
497    return false;
498
499  // If the number of bits fits in a word, we only need to compare the low word.
500  if (n1 <= APINT_BITS_PER_WORD)
501    return pVal[0] == RHS.pVal[0];
502
503  // Otherwise, compare everything
504  for (int i = whichWord(n1 - 1); i >= 0; --i)
505    if (pVal[i] != RHS.pVal[i])
506      return false;
507  return true;
508}
509
510bool APInt::EqualSlowCase(uint64_t Val) const {
511  unsigned n = getActiveBits();
512  if (n <= APINT_BITS_PER_WORD)
513    return pVal[0] == Val;
514  else
515    return false;
516}
517
518bool APInt::ult(const APInt& RHS) const {
519  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
520  if (isSingleWord())
521    return VAL < RHS.VAL;
522
523  // Get active bit length of both operands
524  unsigned n1 = getActiveBits();
525  unsigned n2 = RHS.getActiveBits();
526
527  // If magnitude of LHS is less than RHS, return true.
528  if (n1 < n2)
529    return true;
530
531  // If magnitude of RHS is greather than LHS, return false.
532  if (n2 < n1)
533    return false;
534
535  // If they bot fit in a word, just compare the low order word
536  if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD)
537    return pVal[0] < RHS.pVal[0];
538
539  // Otherwise, compare all words
540  unsigned topWord = whichWord(std::max(n1,n2)-1);
541  for (int i = topWord; i >= 0; --i) {
542    if (pVal[i] > RHS.pVal[i])
543      return false;
544    if (pVal[i] < RHS.pVal[i])
545      return true;
546  }
547  return false;
548}
549
550bool APInt::slt(const APInt& RHS) const {
551  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
552  if (isSingleWord()) {
553    int64_t lhsSext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth);
554    int64_t rhsSext = (int64_t(RHS.VAL) << (64-BitWidth)) >> (64-BitWidth);
555    return lhsSext < rhsSext;
556  }
557
558  APInt lhs(*this);
559  APInt rhs(RHS);
560  bool lhsNeg = isNegative();
561  bool rhsNeg = rhs.isNegative();
562  if (lhsNeg) {
563    // Sign bit is set so perform two's complement to make it positive
564    lhs.flip();
565    lhs++;
566  }
567  if (rhsNeg) {
568    // Sign bit is set so perform two's complement to make it positive
569    rhs.flip();
570    rhs++;
571  }
572
573  // Now we have unsigned values to compare so do the comparison if necessary
574  // based on the negativeness of the values.
575  if (lhsNeg)
576    if (rhsNeg)
577      return lhs.ugt(rhs);
578    else
579      return true;
580  else if (rhsNeg)
581    return false;
582  else
583    return lhs.ult(rhs);
584}
585
586APInt& APInt::set(unsigned bitPosition) {
587  if (isSingleWord())
588    VAL |= maskBit(bitPosition);
589  else
590    pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
591  return *this;
592}
593
594/// Set the given bit to 0 whose position is given as "bitPosition".
595/// @brief Set a given bit to 0.
596APInt& APInt::clear(unsigned bitPosition) {
597  if (isSingleWord())
598    VAL &= ~maskBit(bitPosition);
599  else
600    pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
601  return *this;
602}
603
604/// @brief Toggle every bit to its opposite value.
605
606/// Toggle a given bit to its opposite value whose position is given
607/// as "bitPosition".
608/// @brief Toggles a given bit to its opposite value.
609APInt& APInt::flip(unsigned bitPosition) {
610  assert(bitPosition < BitWidth && "Out of the bit-width range!");
611  if ((*this)[bitPosition]) clear(bitPosition);
612  else set(bitPosition);
613  return *this;
614}
615
616unsigned APInt::getBitsNeeded(const StringRef& str, uint8_t radix) {
617  assert(!str.empty() && "Invalid string length");
618  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
619         "Radix should be 2, 8, 10, or 16!");
620
621  size_t slen = str.size();
622
623  // Each computation below needs to know if it's negative.
624  StringRef::iterator p = str.begin();
625  unsigned isNegative = *p == '-';
626  if (*p == '-' || *p == '+') {
627    p++;
628    slen--;
629    assert(slen && "String is only a sign, needs a value.");
630  }
631
632  // For radixes of power-of-two values, the bits required is accurately and
633  // easily computed
634  if (radix == 2)
635    return slen + isNegative;
636  if (radix == 8)
637    return slen * 3 + isNegative;
638  if (radix == 16)
639    return slen * 4 + isNegative;
640
641  // This is grossly inefficient but accurate. We could probably do something
642  // with a computation of roughly slen*64/20 and then adjust by the value of
643  // the first few digits. But, I'm not sure how accurate that could be.
644
645  // Compute a sufficient number of bits that is always large enough but might
646  // be too large. This avoids the assertion in the constructor. This
647  // calculation doesn't work appropriately for the numbers 0-9, so just use 4
648  // bits in that case.
649  unsigned sufficient = slen == 1 ? 4 : slen * 64/18;
650
651  // Convert to the actual binary value.
652  APInt tmp(sufficient, StringRef(p, slen), radix);
653
654  // Compute how many bits are required. If the log is infinite, assume we need
655  // just bit.
656  unsigned log = tmp.logBase2();
657  if (log == (unsigned)-1) {
658    return isNegative + 1;
659  } else {
660    return isNegative + log + 1;
661  }
662}
663
664// From http://www.burtleburtle.net, byBob Jenkins.
665// When targeting x86, both GCC and LLVM seem to recognize this as a
666// rotate instruction.
667#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
668
669// From http://www.burtleburtle.net, by Bob Jenkins.
670#define mix(a,b,c) \
671  { \
672    a -= c;  a ^= rot(c, 4);  c += b; \
673    b -= a;  b ^= rot(a, 6);  a += c; \
674    c -= b;  c ^= rot(b, 8);  b += a; \
675    a -= c;  a ^= rot(c,16);  c += b; \
676    b -= a;  b ^= rot(a,19);  a += c; \
677    c -= b;  c ^= rot(b, 4);  b += a; \
678  }
679
680// From http://www.burtleburtle.net, by Bob Jenkins.
681#define final(a,b,c) \
682  { \
683    c ^= b; c -= rot(b,14); \
684    a ^= c; a -= rot(c,11); \
685    b ^= a; b -= rot(a,25); \
686    c ^= b; c -= rot(b,16); \
687    a ^= c; a -= rot(c,4);  \
688    b ^= a; b -= rot(a,14); \
689    c ^= b; c -= rot(b,24); \
690  }
691
692// hashword() was adapted from http://www.burtleburtle.net, by Bob
693// Jenkins.  k is a pointer to an array of uint32_t values; length is
694// the length of the key, in 32-bit chunks.  This version only handles
695// keys that are a multiple of 32 bits in size.
696static inline uint32_t hashword(const uint64_t *k64, size_t length)
697{
698  const uint32_t *k = reinterpret_cast<const uint32_t *>(k64);
699  uint32_t a,b,c;
700
701  /* Set up the internal state */
702  a = b = c = 0xdeadbeef + (((uint32_t)length)<<2);
703
704  /*------------------------------------------------- handle most of the key */
705  while (length > 3)
706    {
707      a += k[0];
708      b += k[1];
709      c += k[2];
710      mix(a,b,c);
711      length -= 3;
712      k += 3;
713    }
714
715  /*------------------------------------------- handle the last 3 uint32_t's */
716  switch (length) {                  /* all the case statements fall through */
717  case 3 : c+=k[2];
718  case 2 : b+=k[1];
719  case 1 : a+=k[0];
720    final(a,b,c);
721    case 0:     /* case 0: nothing left to add */
722      break;
723    }
724  /*------------------------------------------------------ report the result */
725  return c;
726}
727
728// hashword8() was adapted from http://www.burtleburtle.net, by Bob
729// Jenkins.  This computes a 32-bit hash from one 64-bit word.  When
730// targeting x86 (32 or 64 bit), both LLVM and GCC compile this
731// function into about 35 instructions when inlined.
732static inline uint32_t hashword8(const uint64_t k64)
733{
734  uint32_t a,b,c;
735  a = b = c = 0xdeadbeef + 4;
736  b += k64 >> 32;
737  a += k64 & 0xffffffff;
738  final(a,b,c);
739  return c;
740}
741#undef final
742#undef mix
743#undef rot
744
745uint64_t APInt::getHashValue() const {
746  uint64_t hash;
747  if (isSingleWord())
748    hash = hashword8(VAL);
749  else
750    hash = hashword(pVal, getNumWords()*2);
751  return hash;
752}
753
754/// HiBits - This function returns the high "numBits" bits of this APInt.
755APInt APInt::getHiBits(unsigned numBits) const {
756  return APIntOps::lshr(*this, BitWidth - numBits);
757}
758
759/// LoBits - This function returns the low "numBits" bits of this APInt.
760APInt APInt::getLoBits(unsigned numBits) const {
761  return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits),
762                        BitWidth - numBits);
763}
764
765bool APInt::isPowerOf2() const {
766  return (!!*this) && !(*this & (*this - APInt(BitWidth,1)));
767}
768
769unsigned APInt::countLeadingZerosSlowCase() const {
770  unsigned Count = 0;
771  for (unsigned i = getNumWords(); i > 0u; --i) {
772    if (pVal[i-1] == 0)
773      Count += APINT_BITS_PER_WORD;
774    else {
775      Count += CountLeadingZeros_64(pVal[i-1]);
776      break;
777    }
778  }
779  unsigned remainder = BitWidth % APINT_BITS_PER_WORD;
780  if (remainder)
781    Count -= APINT_BITS_PER_WORD - remainder;
782  return std::min(Count, BitWidth);
783}
784
785static unsigned countLeadingOnes_64(uint64_t V, unsigned skip) {
786  unsigned Count = 0;
787  if (skip)
788    V <<= skip;
789  while (V && (V & (1ULL << 63))) {
790    Count++;
791    V <<= 1;
792  }
793  return Count;
794}
795
796unsigned APInt::countLeadingOnes() const {
797  if (isSingleWord())
798    return countLeadingOnes_64(VAL, APINT_BITS_PER_WORD - BitWidth);
799
800  unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
801  unsigned shift;
802  if (!highWordBits) {
803    highWordBits = APINT_BITS_PER_WORD;
804    shift = 0;
805  } else {
806    shift = APINT_BITS_PER_WORD - highWordBits;
807  }
808  int i = getNumWords() - 1;
809  unsigned Count = countLeadingOnes_64(pVal[i], shift);
810  if (Count == highWordBits) {
811    for (i--; i >= 0; --i) {
812      if (pVal[i] == -1ULL)
813        Count += APINT_BITS_PER_WORD;
814      else {
815        Count += countLeadingOnes_64(pVal[i], 0);
816        break;
817      }
818    }
819  }
820  return Count;
821}
822
823unsigned APInt::countTrailingZeros() const {
824  if (isSingleWord())
825    return std::min(unsigned(CountTrailingZeros_64(VAL)), BitWidth);
826  unsigned Count = 0;
827  unsigned i = 0;
828  for (; i < getNumWords() && pVal[i] == 0; ++i)
829    Count += APINT_BITS_PER_WORD;
830  if (i < getNumWords())
831    Count += CountTrailingZeros_64(pVal[i]);
832  return std::min(Count, BitWidth);
833}
834
835unsigned APInt::countTrailingOnesSlowCase() const {
836  unsigned Count = 0;
837  unsigned i = 0;
838  for (; i < getNumWords() && pVal[i] == -1ULL; ++i)
839    Count += APINT_BITS_PER_WORD;
840  if (i < getNumWords())
841    Count += CountTrailingOnes_64(pVal[i]);
842  return std::min(Count, BitWidth);
843}
844
845unsigned APInt::countPopulationSlowCase() const {
846  unsigned Count = 0;
847  for (unsigned i = 0; i < getNumWords(); ++i)
848    Count += CountPopulation_64(pVal[i]);
849  return Count;
850}
851
852APInt APInt::byteSwap() const {
853  assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
854  if (BitWidth == 16)
855    return APInt(BitWidth, ByteSwap_16(uint16_t(VAL)));
856  else if (BitWidth == 32)
857    return APInt(BitWidth, ByteSwap_32(unsigned(VAL)));
858  else if (BitWidth == 48) {
859    unsigned Tmp1 = unsigned(VAL >> 16);
860    Tmp1 = ByteSwap_32(Tmp1);
861    uint16_t Tmp2 = uint16_t(VAL);
862    Tmp2 = ByteSwap_16(Tmp2);
863    return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
864  } else if (BitWidth == 64)
865    return APInt(BitWidth, ByteSwap_64(VAL));
866  else {
867    APInt Result(BitWidth, 0);
868    char *pByte = (char*)Result.pVal;
869    for (unsigned i = 0; i < BitWidth / APINT_WORD_SIZE / 2; ++i) {
870      char Tmp = pByte[i];
871      pByte[i] = pByte[BitWidth / APINT_WORD_SIZE - 1 - i];
872      pByte[BitWidth / APINT_WORD_SIZE - i - 1] = Tmp;
873    }
874    return Result;
875  }
876}
877
878APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1,
879                                            const APInt& API2) {
880  APInt A = API1, B = API2;
881  while (!!B) {
882    APInt T = B;
883    B = APIntOps::urem(A, B);
884    A = T;
885  }
886  return A;
887}
888
889APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
890  union {
891    double D;
892    uint64_t I;
893  } T;
894  T.D = Double;
895
896  // Get the sign bit from the highest order bit
897  bool isNeg = T.I >> 63;
898
899  // Get the 11-bit exponent and adjust for the 1023 bit bias
900  int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
901
902  // If the exponent is negative, the value is < 0 so just return 0.
903  if (exp < 0)
904    return APInt(width, 0u);
905
906  // Extract the mantissa by clearing the top 12 bits (sign + exponent).
907  uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
908
909  // If the exponent doesn't shift all bits out of the mantissa
910  if (exp < 52)
911    return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
912                    APInt(width, mantissa >> (52 - exp));
913
914  // If the client didn't provide enough bits for us to shift the mantissa into
915  // then the result is undefined, just return 0
916  if (width <= exp - 52)
917    return APInt(width, 0);
918
919  // Otherwise, we have to shift the mantissa bits up to the right location
920  APInt Tmp(width, mantissa);
921  Tmp = Tmp.shl((unsigned)exp - 52);
922  return isNeg ? -Tmp : Tmp;
923}
924
925/// RoundToDouble - This function converts this APInt to a double.
926/// The layout for double is as following (IEEE Standard 754):
927///  --------------------------------------
928/// |  Sign    Exponent    Fraction    Bias |
929/// |-------------------------------------- |
930/// |  1[63]   11[62-52]   52[51-00]   1023 |
931///  --------------------------------------
932double APInt::roundToDouble(bool isSigned) const {
933
934  // Handle the simple case where the value is contained in one uint64_t.
935  // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
936  if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
937    if (isSigned) {
938      int64_t sext = (int64_t(getWord(0)) << (64-BitWidth)) >> (64-BitWidth);
939      return double(sext);
940    } else
941      return double(getWord(0));
942  }
943
944  // Determine if the value is negative.
945  bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
946
947  // Construct the absolute value if we're negative.
948  APInt Tmp(isNeg ? -(*this) : (*this));
949
950  // Figure out how many bits we're using.
951  unsigned n = Tmp.getActiveBits();
952
953  // The exponent (without bias normalization) is just the number of bits
954  // we are using. Note that the sign bit is gone since we constructed the
955  // absolute value.
956  uint64_t exp = n;
957
958  // Return infinity for exponent overflow
959  if (exp > 1023) {
960    if (!isSigned || !isNeg)
961      return std::numeric_limits<double>::infinity();
962    else
963      return -std::numeric_limits<double>::infinity();
964  }
965  exp += 1023; // Increment for 1023 bias
966
967  // Number of bits in mantissa is 52. To obtain the mantissa value, we must
968  // extract the high 52 bits from the correct words in pVal.
969  uint64_t mantissa;
970  unsigned hiWord = whichWord(n-1);
971  if (hiWord == 0) {
972    mantissa = Tmp.pVal[0];
973    if (n > 52)
974      mantissa >>= n - 52; // shift down, we want the top 52 bits.
975  } else {
976    assert(hiWord > 0 && "huh?");
977    uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
978    uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
979    mantissa = hibits | lobits;
980  }
981
982  // The leading bit of mantissa is implicit, so get rid of it.
983  uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
984  union {
985    double D;
986    uint64_t I;
987  } T;
988  T.I = sign | (exp << 52) | mantissa;
989  return T.D;
990}
991
992// Truncate to new width.
993APInt &APInt::trunc(unsigned width) {
994  assert(width < BitWidth && "Invalid APInt Truncate request");
995  assert(width && "Can't truncate to 0 bits");
996  unsigned wordsBefore = getNumWords();
997  BitWidth = width;
998  unsigned wordsAfter = getNumWords();
999  if (wordsBefore != wordsAfter) {
1000    if (wordsAfter == 1) {
1001      uint64_t *tmp = pVal;
1002      VAL = pVal[0];
1003      delete [] tmp;
1004    } else {
1005      uint64_t *newVal = getClearedMemory(wordsAfter);
1006      for (unsigned i = 0; i < wordsAfter; ++i)
1007        newVal[i] = pVal[i];
1008      delete [] pVal;
1009      pVal = newVal;
1010    }
1011  }
1012  return clearUnusedBits();
1013}
1014
1015// Sign extend to a new width.
1016APInt &APInt::sext(unsigned width) {
1017  assert(width > BitWidth && "Invalid APInt SignExtend request");
1018  // If the sign bit isn't set, this is the same as zext.
1019  if (!isNegative()) {
1020    zext(width);
1021    return *this;
1022  }
1023
1024  // The sign bit is set. First, get some facts
1025  unsigned wordsBefore = getNumWords();
1026  unsigned wordBits = BitWidth % APINT_BITS_PER_WORD;
1027  BitWidth = width;
1028  unsigned wordsAfter = getNumWords();
1029
1030  // Mask the high order word appropriately
1031  if (wordsBefore == wordsAfter) {
1032    unsigned newWordBits = width % APINT_BITS_PER_WORD;
1033    // The extension is contained to the wordsBefore-1th word.
1034    uint64_t mask = ~0ULL;
1035    if (newWordBits)
1036      mask >>= APINT_BITS_PER_WORD - newWordBits;
1037    mask <<= wordBits;
1038    if (wordsBefore == 1)
1039      VAL |= mask;
1040    else
1041      pVal[wordsBefore-1] |= mask;
1042    return clearUnusedBits();
1043  }
1044
1045  uint64_t mask = wordBits == 0 ? 0 : ~0ULL << wordBits;
1046  uint64_t *newVal = getMemory(wordsAfter);
1047  if (wordsBefore == 1)
1048    newVal[0] = VAL | mask;
1049  else {
1050    for (unsigned i = 0; i < wordsBefore; ++i)
1051      newVal[i] = pVal[i];
1052    newVal[wordsBefore-1] |= mask;
1053  }
1054  for (unsigned i = wordsBefore; i < wordsAfter; i++)
1055    newVal[i] = -1ULL;
1056  if (wordsBefore != 1)
1057    delete [] pVal;
1058  pVal = newVal;
1059  return clearUnusedBits();
1060}
1061
1062//  Zero extend to a new width.
1063APInt &APInt::zext(unsigned width) {
1064  assert(width > BitWidth && "Invalid APInt ZeroExtend request");
1065  unsigned wordsBefore = getNumWords();
1066  BitWidth = width;
1067  unsigned wordsAfter = getNumWords();
1068  if (wordsBefore != wordsAfter) {
1069    uint64_t *newVal = getClearedMemory(wordsAfter);
1070    if (wordsBefore == 1)
1071      newVal[0] = VAL;
1072    else
1073      for (unsigned i = 0; i < wordsBefore; ++i)
1074        newVal[i] = pVal[i];
1075    if (wordsBefore != 1)
1076      delete [] pVal;
1077    pVal = newVal;
1078  }
1079  return *this;
1080}
1081
1082APInt &APInt::zextOrTrunc(unsigned width) {
1083  if (BitWidth < width)
1084    return zext(width);
1085  if (BitWidth > width)
1086    return trunc(width);
1087  return *this;
1088}
1089
1090APInt &APInt::sextOrTrunc(unsigned width) {
1091  if (BitWidth < width)
1092    return sext(width);
1093  if (BitWidth > width)
1094    return trunc(width);
1095  return *this;
1096}
1097
1098/// Arithmetic right-shift this APInt by shiftAmt.
1099/// @brief Arithmetic right-shift function.
1100APInt APInt::ashr(const APInt &shiftAmt) const {
1101  return ashr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1102}
1103
1104/// Arithmetic right-shift this APInt by shiftAmt.
1105/// @brief Arithmetic right-shift function.
1106APInt APInt::ashr(unsigned shiftAmt) const {
1107  assert(shiftAmt <= BitWidth && "Invalid shift amount");
1108  // Handle a degenerate case
1109  if (shiftAmt == 0)
1110    return *this;
1111
1112  // Handle single word shifts with built-in ashr
1113  if (isSingleWord()) {
1114    if (shiftAmt == BitWidth)
1115      return APInt(BitWidth, 0); // undefined
1116    else {
1117      unsigned SignBit = APINT_BITS_PER_WORD - BitWidth;
1118      return APInt(BitWidth,
1119        (((int64_t(VAL) << SignBit) >> SignBit) >> shiftAmt));
1120    }
1121  }
1122
1123  // If all the bits were shifted out, the result is, technically, undefined.
1124  // We return -1 if it was negative, 0 otherwise. We check this early to avoid
1125  // issues in the algorithm below.
1126  if (shiftAmt == BitWidth) {
1127    if (isNegative())
1128      return APInt(BitWidth, -1ULL, true);
1129    else
1130      return APInt(BitWidth, 0);
1131  }
1132
1133  // Create some space for the result.
1134  uint64_t * val = new uint64_t[getNumWords()];
1135
1136  // Compute some values needed by the following shift algorithms
1137  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word
1138  unsigned offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift
1139  unsigned breakWord = getNumWords() - 1 - offset; // last word affected
1140  unsigned bitsInWord = whichBit(BitWidth); // how many bits in last word?
1141  if (bitsInWord == 0)
1142    bitsInWord = APINT_BITS_PER_WORD;
1143
1144  // If we are shifting whole words, just move whole words
1145  if (wordShift == 0) {
1146    // Move the words containing significant bits
1147    for (unsigned i = 0; i <= breakWord; ++i)
1148      val[i] = pVal[i+offset]; // move whole word
1149
1150    // Adjust the top significant word for sign bit fill, if negative
1151    if (isNegative())
1152      if (bitsInWord < APINT_BITS_PER_WORD)
1153        val[breakWord] |= ~0ULL << bitsInWord; // set high bits
1154  } else {
1155    // Shift the low order words
1156    for (unsigned i = 0; i < breakWord; ++i) {
1157      // This combines the shifted corresponding word with the low bits from
1158      // the next word (shifted into this word's high bits).
1159      val[i] = (pVal[i+offset] >> wordShift) |
1160               (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1161    }
1162
1163    // Shift the break word. In this case there are no bits from the next word
1164    // to include in this word.
1165    val[breakWord] = pVal[breakWord+offset] >> wordShift;
1166
1167    // Deal with sign extenstion in the break word, and possibly the word before
1168    // it.
1169    if (isNegative()) {
1170      if (wordShift > bitsInWord) {
1171        if (breakWord > 0)
1172          val[breakWord-1] |=
1173            ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord));
1174        val[breakWord] |= ~0ULL;
1175      } else
1176        val[breakWord] |= (~0ULL << (bitsInWord - wordShift));
1177    }
1178  }
1179
1180  // Remaining words are 0 or -1, just assign them.
1181  uint64_t fillValue = (isNegative() ? -1ULL : 0);
1182  for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1183    val[i] = fillValue;
1184  return APInt(val, BitWidth).clearUnusedBits();
1185}
1186
1187/// Logical right-shift this APInt by shiftAmt.
1188/// @brief Logical right-shift function.
1189APInt APInt::lshr(const APInt &shiftAmt) const {
1190  return lshr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1191}
1192
1193/// Logical right-shift this APInt by shiftAmt.
1194/// @brief Logical right-shift function.
1195APInt APInt::lshr(unsigned shiftAmt) const {
1196  if (isSingleWord()) {
1197    if (shiftAmt == BitWidth)
1198      return APInt(BitWidth, 0);
1199    else
1200      return APInt(BitWidth, this->VAL >> shiftAmt);
1201  }
1202
1203  // If all the bits were shifted out, the result is 0. This avoids issues
1204  // with shifting by the size of the integer type, which produces undefined
1205  // results. We define these "undefined results" to always be 0.
1206  if (shiftAmt == BitWidth)
1207    return APInt(BitWidth, 0);
1208
1209  // If none of the bits are shifted out, the result is *this. This avoids
1210  // issues with shifting by the size of the integer type, which produces
1211  // undefined results in the code below. This is also an optimization.
1212  if (shiftAmt == 0)
1213    return *this;
1214
1215  // Create some space for the result.
1216  uint64_t * val = new uint64_t[getNumWords()];
1217
1218  // If we are shifting less than a word, compute the shift with a simple carry
1219  if (shiftAmt < APINT_BITS_PER_WORD) {
1220    uint64_t carry = 0;
1221    for (int i = getNumWords()-1; i >= 0; --i) {
1222      val[i] = (pVal[i] >> shiftAmt) | carry;
1223      carry = pVal[i] << (APINT_BITS_PER_WORD - shiftAmt);
1224    }
1225    return APInt(val, BitWidth).clearUnusedBits();
1226  }
1227
1228  // Compute some values needed by the remaining shift algorithms
1229  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
1230  unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
1231
1232  // If we are shifting whole words, just move whole words
1233  if (wordShift == 0) {
1234    for (unsigned i = 0; i < getNumWords() - offset; ++i)
1235      val[i] = pVal[i+offset];
1236    for (unsigned i = getNumWords()-offset; i < getNumWords(); i++)
1237      val[i] = 0;
1238    return APInt(val,BitWidth).clearUnusedBits();
1239  }
1240
1241  // Shift the low order words
1242  unsigned breakWord = getNumWords() - offset -1;
1243  for (unsigned i = 0; i < breakWord; ++i)
1244    val[i] = (pVal[i+offset] >> wordShift) |
1245             (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1246  // Shift the break word.
1247  val[breakWord] = pVal[breakWord+offset] >> wordShift;
1248
1249  // Remaining words are 0
1250  for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1251    val[i] = 0;
1252  return APInt(val, BitWidth).clearUnusedBits();
1253}
1254
1255/// Left-shift this APInt by shiftAmt.
1256/// @brief Left-shift function.
1257APInt APInt::shl(const APInt &shiftAmt) const {
1258  // It's undefined behavior in C to shift by BitWidth or greater.
1259  return shl((unsigned)shiftAmt.getLimitedValue(BitWidth));
1260}
1261
1262APInt APInt::shlSlowCase(unsigned shiftAmt) const {
1263  // If all the bits were shifted out, the result is 0. This avoids issues
1264  // with shifting by the size of the integer type, which produces undefined
1265  // results. We define these "undefined results" to always be 0.
1266  if (shiftAmt == BitWidth)
1267    return APInt(BitWidth, 0);
1268
1269  // If none of the bits are shifted out, the result is *this. This avoids a
1270  // lshr by the words size in the loop below which can produce incorrect
1271  // results. It also avoids the expensive computation below for a common case.
1272  if (shiftAmt == 0)
1273    return *this;
1274
1275  // Create some space for the result.
1276  uint64_t * val = new uint64_t[getNumWords()];
1277
1278  // If we are shifting less than a word, do it the easy way
1279  if (shiftAmt < APINT_BITS_PER_WORD) {
1280    uint64_t carry = 0;
1281    for (unsigned i = 0; i < getNumWords(); i++) {
1282      val[i] = pVal[i] << shiftAmt | carry;
1283      carry = pVal[i] >> (APINT_BITS_PER_WORD - shiftAmt);
1284    }
1285    return APInt(val, BitWidth).clearUnusedBits();
1286  }
1287
1288  // Compute some values needed by the remaining shift algorithms
1289  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
1290  unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
1291
1292  // If we are shifting whole words, just move whole words
1293  if (wordShift == 0) {
1294    for (unsigned i = 0; i < offset; i++)
1295      val[i] = 0;
1296    for (unsigned i = offset; i < getNumWords(); i++)
1297      val[i] = pVal[i-offset];
1298    return APInt(val,BitWidth).clearUnusedBits();
1299  }
1300
1301  // Copy whole words from this to Result.
1302  unsigned i = getNumWords() - 1;
1303  for (; i > offset; --i)
1304    val[i] = pVal[i-offset] << wordShift |
1305             pVal[i-offset-1] >> (APINT_BITS_PER_WORD - wordShift);
1306  val[offset] = pVal[0] << wordShift;
1307  for (i = 0; i < offset; ++i)
1308    val[i] = 0;
1309  return APInt(val, BitWidth).clearUnusedBits();
1310}
1311
1312APInt APInt::rotl(const APInt &rotateAmt) const {
1313  return rotl((unsigned)rotateAmt.getLimitedValue(BitWidth));
1314}
1315
1316APInt APInt::rotl(unsigned rotateAmt) const {
1317  if (rotateAmt == 0)
1318    return *this;
1319  // Don't get too fancy, just use existing shift/or facilities
1320  APInt hi(*this);
1321  APInt lo(*this);
1322  hi.shl(rotateAmt);
1323  lo.lshr(BitWidth - rotateAmt);
1324  return hi | lo;
1325}
1326
1327APInt APInt::rotr(const APInt &rotateAmt) const {
1328  return rotr((unsigned)rotateAmt.getLimitedValue(BitWidth));
1329}
1330
1331APInt APInt::rotr(unsigned rotateAmt) const {
1332  if (rotateAmt == 0)
1333    return *this;
1334  // Don't get too fancy, just use existing shift/or facilities
1335  APInt hi(*this);
1336  APInt lo(*this);
1337  lo.lshr(rotateAmt);
1338  hi.shl(BitWidth - rotateAmt);
1339  return hi | lo;
1340}
1341
1342// Square Root - this method computes and returns the square root of "this".
1343// Three mechanisms are used for computation. For small values (<= 5 bits),
1344// a table lookup is done. This gets some performance for common cases. For
1345// values using less than 52 bits, the value is converted to double and then
1346// the libc sqrt function is called. The result is rounded and then converted
1347// back to a uint64_t which is then used to construct the result. Finally,
1348// the Babylonian method for computing square roots is used.
1349APInt APInt::sqrt() const {
1350
1351  // Determine the magnitude of the value.
1352  unsigned magnitude = getActiveBits();
1353
1354  // Use a fast table for some small values. This also gets rid of some
1355  // rounding errors in libc sqrt for small values.
1356  if (magnitude <= 5) {
1357    static const uint8_t results[32] = {
1358      /*     0 */ 0,
1359      /*  1- 2 */ 1, 1,
1360      /*  3- 6 */ 2, 2, 2, 2,
1361      /*  7-12 */ 3, 3, 3, 3, 3, 3,
1362      /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1363      /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1364      /*    31 */ 6
1365    };
1366    return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]);
1367  }
1368
1369  // If the magnitude of the value fits in less than 52 bits (the precision of
1370  // an IEEE double precision floating point value), then we can use the
1371  // libc sqrt function which will probably use a hardware sqrt computation.
1372  // This should be faster than the algorithm below.
1373  if (magnitude < 52) {
1374#ifdef _MSC_VER
1375    // Amazingly, VC++ doesn't have round().
1376    return APInt(BitWidth,
1377                 uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5);
1378#else
1379    return APInt(BitWidth,
1380                 uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
1381#endif
1382  }
1383
1384  // Okay, all the short cuts are exhausted. We must compute it. The following
1385  // is a classical Babylonian method for computing the square root. This code
1386  // was adapted to APINt from a wikipedia article on such computations.
1387  // See http://www.wikipedia.org/ and go to the page named
1388  // Calculate_an_integer_square_root.
1389  unsigned nbits = BitWidth, i = 4;
1390  APInt testy(BitWidth, 16);
1391  APInt x_old(BitWidth, 1);
1392  APInt x_new(BitWidth, 0);
1393  APInt two(BitWidth, 2);
1394
1395  // Select a good starting value using binary logarithms.
1396  for (;; i += 2, testy = testy.shl(2))
1397    if (i >= nbits || this->ule(testy)) {
1398      x_old = x_old.shl(i / 2);
1399      break;
1400    }
1401
1402  // Use the Babylonian method to arrive at the integer square root:
1403  for (;;) {
1404    x_new = (this->udiv(x_old) + x_old).udiv(two);
1405    if (x_old.ule(x_new))
1406      break;
1407    x_old = x_new;
1408  }
1409
1410  // Make sure we return the closest approximation
1411  // NOTE: The rounding calculation below is correct. It will produce an
1412  // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1413  // determined to be a rounding issue with pari/gp as it begins to use a
1414  // floating point representation after 192 bits. There are no discrepancies
1415  // between this algorithm and pari/gp for bit widths < 192 bits.
1416  APInt square(x_old * x_old);
1417  APInt nextSquare((x_old + 1) * (x_old +1));
1418  if (this->ult(square))
1419    return x_old;
1420  else if (this->ule(nextSquare)) {
1421    APInt midpoint((nextSquare - square).udiv(two));
1422    APInt offset(*this - square);
1423    if (offset.ult(midpoint))
1424      return x_old;
1425    else
1426      return x_old + 1;
1427  } else
1428    llvm_unreachable("Error in APInt::sqrt computation");
1429  return x_old + 1;
1430}
1431
1432/// Computes the multiplicative inverse of this APInt for a given modulo. The
1433/// iterative extended Euclidean algorithm is used to solve for this value,
1434/// however we simplify it to speed up calculating only the inverse, and take
1435/// advantage of div+rem calculations. We also use some tricks to avoid copying
1436/// (potentially large) APInts around.
1437APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1438  assert(ult(modulo) && "This APInt must be smaller than the modulo");
1439
1440  // Using the properties listed at the following web page (accessed 06/21/08):
1441  //   http://www.numbertheory.org/php/euclid.html
1442  // (especially the properties numbered 3, 4 and 9) it can be proved that
1443  // BitWidth bits suffice for all the computations in the algorithm implemented
1444  // below. More precisely, this number of bits suffice if the multiplicative
1445  // inverse exists, but may not suffice for the general extended Euclidean
1446  // algorithm.
1447
1448  APInt r[2] = { modulo, *this };
1449  APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1450  APInt q(BitWidth, 0);
1451
1452  unsigned i;
1453  for (i = 0; r[i^1] != 0; i ^= 1) {
1454    // An overview of the math without the confusing bit-flipping:
1455    // q = r[i-2] / r[i-1]
1456    // r[i] = r[i-2] % r[i-1]
1457    // t[i] = t[i-2] - t[i-1] * q
1458    udivrem(r[i], r[i^1], q, r[i]);
1459    t[i] -= t[i^1] * q;
1460  }
1461
1462  // If this APInt and the modulo are not coprime, there is no multiplicative
1463  // inverse, so return 0. We check this by looking at the next-to-last
1464  // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1465  // algorithm.
1466  if (r[i] != 1)
1467    return APInt(BitWidth, 0);
1468
1469  // The next-to-last t is the multiplicative inverse.  However, we are
1470  // interested in a positive inverse. Calcuate a positive one from a negative
1471  // one if necessary. A simple addition of the modulo suffices because
1472  // abs(t[i]) is known to be less than *this/2 (see the link above).
1473  return t[i].isNegative() ? t[i] + modulo : t[i];
1474}
1475
1476/// Calculate the magic numbers required to implement a signed integer division
1477/// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1478/// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1479/// Warren, Jr., chapter 10.
1480APInt::ms APInt::magic() const {
1481  const APInt& d = *this;
1482  unsigned p;
1483  APInt ad, anc, delta, q1, r1, q2, r2, t;
1484  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1485  struct ms mag;
1486
1487  ad = d.abs();
1488  t = signedMin + (d.lshr(d.getBitWidth() - 1));
1489  anc = t - 1 - t.urem(ad);   // absolute value of nc
1490  p = d.getBitWidth() - 1;    // initialize p
1491  q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1492  r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1493  q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1494  r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1495  do {
1496    p = p + 1;
1497    q1 = q1<<1;          // update q1 = 2p/abs(nc)
1498    r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1499    if (r1.uge(anc)) {  // must be unsigned comparison
1500      q1 = q1 + 1;
1501      r1 = r1 - anc;
1502    }
1503    q2 = q2<<1;          // update q2 = 2p/abs(d)
1504    r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1505    if (r2.uge(ad)) {   // must be unsigned comparison
1506      q2 = q2 + 1;
1507      r2 = r2 - ad;
1508    }
1509    delta = ad - r2;
1510  } while (q1.ule(delta) || (q1 == delta && r1 == 0));
1511
1512  mag.m = q2 + 1;
1513  if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1514  mag.s = p - d.getBitWidth();          // resulting shift
1515  return mag;
1516}
1517
1518/// Calculate the magic numbers required to implement an unsigned integer
1519/// division by a constant as a sequence of multiplies, adds and shifts.
1520/// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1521/// S. Warren, Jr., chapter 10.
1522APInt::mu APInt::magicu() const {
1523  const APInt& d = *this;
1524  unsigned p;
1525  APInt nc, delta, q1, r1, q2, r2;
1526  struct mu magu;
1527  magu.a = 0;               // initialize "add" indicator
1528  APInt allOnes = APInt::getAllOnesValue(d.getBitWidth());
1529  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1530  APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1531
1532  nc = allOnes - (-d).urem(d);
1533  p = d.getBitWidth() - 1;  // initialize p
1534  q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1535  r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1536  q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1537  r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1538  do {
1539    p = p + 1;
1540    if (r1.uge(nc - r1)) {
1541      q1 = q1 + q1 + 1;  // update q1
1542      r1 = r1 + r1 - nc; // update r1
1543    }
1544    else {
1545      q1 = q1+q1; // update q1
1546      r1 = r1+r1; // update r1
1547    }
1548    if ((r2 + 1).uge(d - r2)) {
1549      if (q2.uge(signedMax)) magu.a = 1;
1550      q2 = q2+q2 + 1;     // update q2
1551      r2 = r2+r2 + 1 - d; // update r2
1552    }
1553    else {
1554      if (q2.uge(signedMin)) magu.a = 1;
1555      q2 = q2+q2;     // update q2
1556      r2 = r2+r2 + 1; // update r2
1557    }
1558    delta = d - 1 - r2;
1559  } while (p < d.getBitWidth()*2 &&
1560           (q1.ult(delta) || (q1 == delta && r1 == 0)));
1561  magu.m = q2 + 1; // resulting magic number
1562  magu.s = p - d.getBitWidth();  // resulting shift
1563  return magu;
1564}
1565
1566/// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1567/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1568/// variables here have the same names as in the algorithm. Comments explain
1569/// the algorithm and any deviation from it.
1570static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
1571                     unsigned m, unsigned n) {
1572  assert(u && "Must provide dividend");
1573  assert(v && "Must provide divisor");
1574  assert(q && "Must provide quotient");
1575  assert(u != v && u != q && v != q && "Must us different memory");
1576  assert(n>1 && "n must be > 1");
1577
1578  // Knuth uses the value b as the base of the number system. In our case b
1579  // is 2^31 so we just set it to -1u.
1580  uint64_t b = uint64_t(1) << 32;
1581
1582#if 0
1583  DEBUG(errs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1584  DEBUG(errs() << "KnuthDiv: original:");
1585  DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]);
1586  DEBUG(errs() << " by");
1587  DEBUG(for (int i = n; i >0; i--) errs() << " " << v[i-1]);
1588  DEBUG(errs() << '\n');
1589#endif
1590  // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1591  // u and v by d. Note that we have taken Knuth's advice here to use a power
1592  // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1593  // 2 allows us to shift instead of multiply and it is easy to determine the
1594  // shift amount from the leading zeros.  We are basically normalizing the u
1595  // and v so that its high bits are shifted to the top of v's range without
1596  // overflow. Note that this can require an extra word in u so that u must
1597  // be of length m+n+1.
1598  unsigned shift = CountLeadingZeros_32(v[n-1]);
1599  unsigned v_carry = 0;
1600  unsigned u_carry = 0;
1601  if (shift) {
1602    for (unsigned i = 0; i < m+n; ++i) {
1603      unsigned u_tmp = u[i] >> (32 - shift);
1604      u[i] = (u[i] << shift) | u_carry;
1605      u_carry = u_tmp;
1606    }
1607    for (unsigned i = 0; i < n; ++i) {
1608      unsigned v_tmp = v[i] >> (32 - shift);
1609      v[i] = (v[i] << shift) | v_carry;
1610      v_carry = v_tmp;
1611    }
1612  }
1613  u[m+n] = u_carry;
1614#if 0
1615  DEBUG(errs() << "KnuthDiv:   normal:");
1616  DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]);
1617  DEBUG(errs() << " by");
1618  DEBUG(for (int i = n; i >0; i--) errs() << " " << v[i-1]);
1619  DEBUG(errs() << '\n');
1620#endif
1621
1622  // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1623  int j = m;
1624  do {
1625    DEBUG(errs() << "KnuthDiv: quotient digit #" << j << '\n');
1626    // D3. [Calculate q'.].
1627    //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1628    //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1629    // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1630    // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test
1631    // on v[n-2] determines at high speed most of the cases in which the trial
1632    // value qp is one too large, and it eliminates all cases where qp is two
1633    // too large.
1634    uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]);
1635    DEBUG(errs() << "KnuthDiv: dividend == " << dividend << '\n');
1636    uint64_t qp = dividend / v[n-1];
1637    uint64_t rp = dividend % v[n-1];
1638    if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1639      qp--;
1640      rp += v[n-1];
1641      if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1642        qp--;
1643    }
1644    DEBUG(errs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1645
1646    // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1647    // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1648    // consists of a simple multiplication by a one-place number, combined with
1649    // a subtraction.
1650    bool isNeg = false;
1651    for (unsigned i = 0; i < n; ++i) {
1652      uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32);
1653      uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]);
1654      bool borrow = subtrahend > u_tmp;
1655      DEBUG(errs() << "KnuthDiv: u_tmp == " << u_tmp
1656                   << ", subtrahend == " << subtrahend
1657                   << ", borrow = " << borrow << '\n');
1658
1659      uint64_t result = u_tmp - subtrahend;
1660      unsigned k = j + i;
1661      u[k++] = (unsigned)(result & (b-1)); // subtract low word
1662      u[k++] = (unsigned)(result >> 32);   // subtract high word
1663      while (borrow && k <= m+n) { // deal with borrow to the left
1664        borrow = u[k] == 0;
1665        u[k]--;
1666        k++;
1667      }
1668      isNeg |= borrow;
1669      DEBUG(errs() << "KnuthDiv: u[j+i] == " << u[j+i] << ",  u[j+i+1] == " <<
1670                    u[j+i+1] << '\n');
1671    }
1672    DEBUG(errs() << "KnuthDiv: after subtraction:");
1673    DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]);
1674    DEBUG(errs() << '\n');
1675    // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1676    // this step is actually negative, (u[j+n]...u[j]) should be left as the
1677    // true value plus b**(n+1), namely as the b's complement of
1678    // the true value, and a "borrow" to the left should be remembered.
1679    //
1680    if (isNeg) {
1681      bool carry = true;  // true because b's complement is "complement + 1"
1682      for (unsigned i = 0; i <= m+n; ++i) {
1683        u[i] = ~u[i] + carry; // b's complement
1684        carry = carry && u[i] == 0;
1685      }
1686    }
1687    DEBUG(errs() << "KnuthDiv: after complement:");
1688    DEBUG(for (int i = m+n; i >=0; i--) errs() << " " << u[i]);
1689    DEBUG(errs() << '\n');
1690
1691    // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1692    // negative, go to step D6; otherwise go on to step D7.
1693    q[j] = (unsigned)qp;
1694    if (isNeg) {
1695      // D6. [Add back]. The probability that this step is necessary is very
1696      // small, on the order of only 2/b. Make sure that test data accounts for
1697      // this possibility. Decrease q[j] by 1
1698      q[j]--;
1699      // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1700      // A carry will occur to the left of u[j+n], and it should be ignored
1701      // since it cancels with the borrow that occurred in D4.
1702      bool carry = false;
1703      for (unsigned i = 0; i < n; i++) {
1704        unsigned limit = std::min(u[j+i],v[i]);
1705        u[j+i] += v[i] + carry;
1706        carry = u[j+i] < limit || (carry && u[j+i] == limit);
1707      }
1708      u[j+n] += carry;
1709    }
1710    DEBUG(errs() << "KnuthDiv: after correction:");
1711    DEBUG(for (int i = m+n; i >=0; i--) errs() <<" " << u[i]);
1712    DEBUG(errs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1713
1714  // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1715  } while (--j >= 0);
1716
1717  DEBUG(errs() << "KnuthDiv: quotient:");
1718  DEBUG(for (int i = m; i >=0; i--) errs() <<" " << q[i]);
1719  DEBUG(errs() << '\n');
1720
1721  // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1722  // remainder may be obtained by dividing u[...] by d. If r is non-null we
1723  // compute the remainder (urem uses this).
1724  if (r) {
1725    // The value d is expressed by the "shift" value above since we avoided
1726    // multiplication by d by using a shift left. So, all we have to do is
1727    // shift right here. In order to mak
1728    if (shift) {
1729      unsigned carry = 0;
1730      DEBUG(errs() << "KnuthDiv: remainder:");
1731      for (int i = n-1; i >= 0; i--) {
1732        r[i] = (u[i] >> shift) | carry;
1733        carry = u[i] << (32 - shift);
1734        DEBUG(errs() << " " << r[i]);
1735      }
1736    } else {
1737      for (int i = n-1; i >= 0; i--) {
1738        r[i] = u[i];
1739        DEBUG(errs() << " " << r[i]);
1740      }
1741    }
1742    DEBUG(errs() << '\n');
1743  }
1744#if 0
1745  DEBUG(errs() << '\n');
1746#endif
1747}
1748
1749void APInt::divide(const APInt LHS, unsigned lhsWords,
1750                   const APInt &RHS, unsigned rhsWords,
1751                   APInt *Quotient, APInt *Remainder)
1752{
1753  assert(lhsWords >= rhsWords && "Fractional result");
1754
1755  // First, compose the values into an array of 32-bit words instead of
1756  // 64-bit words. This is a necessity of both the "short division" algorithm
1757  // and the the Knuth "classical algorithm" which requires there to be native
1758  // operations for +, -, and * on an m bit value with an m*2 bit result. We
1759  // can't use 64-bit operands here because we don't have native results of
1760  // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1761  // work on large-endian machines.
1762  uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT);
1763  unsigned n = rhsWords * 2;
1764  unsigned m = (lhsWords * 2) - n;
1765
1766  // Allocate space for the temporary values we need either on the stack, if
1767  // it will fit, or on the heap if it won't.
1768  unsigned SPACE[128];
1769  unsigned *U = 0;
1770  unsigned *V = 0;
1771  unsigned *Q = 0;
1772  unsigned *R = 0;
1773  if ((Remainder?4:3)*n+2*m+1 <= 128) {
1774    U = &SPACE[0];
1775    V = &SPACE[m+n+1];
1776    Q = &SPACE[(m+n+1) + n];
1777    if (Remainder)
1778      R = &SPACE[(m+n+1) + n + (m+n)];
1779  } else {
1780    U = new unsigned[m + n + 1];
1781    V = new unsigned[n];
1782    Q = new unsigned[m+n];
1783    if (Remainder)
1784      R = new unsigned[n];
1785  }
1786
1787  // Initialize the dividend
1788  memset(U, 0, (m+n+1)*sizeof(unsigned));
1789  for (unsigned i = 0; i < lhsWords; ++i) {
1790    uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]);
1791    U[i * 2] = (unsigned)(tmp & mask);
1792    U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1793  }
1794  U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1795
1796  // Initialize the divisor
1797  memset(V, 0, (n)*sizeof(unsigned));
1798  for (unsigned i = 0; i < rhsWords; ++i) {
1799    uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]);
1800    V[i * 2] = (unsigned)(tmp & mask);
1801    V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1802  }
1803
1804  // initialize the quotient and remainder
1805  memset(Q, 0, (m+n) * sizeof(unsigned));
1806  if (Remainder)
1807    memset(R, 0, n * sizeof(unsigned));
1808
1809  // Now, adjust m and n for the Knuth division. n is the number of words in
1810  // the divisor. m is the number of words by which the dividend exceeds the
1811  // divisor (i.e. m+n is the length of the dividend). These sizes must not
1812  // contain any zero words or the Knuth algorithm fails.
1813  for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1814    n--;
1815    m++;
1816  }
1817  for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1818    m--;
1819
1820  // If we're left with only a single word for the divisor, Knuth doesn't work
1821  // so we implement the short division algorithm here. This is much simpler
1822  // and faster because we are certain that we can divide a 64-bit quantity
1823  // by a 32-bit quantity at hardware speed and short division is simply a
1824  // series of such operations. This is just like doing short division but we
1825  // are using base 2^32 instead of base 10.
1826  assert(n != 0 && "Divide by zero?");
1827  if (n == 1) {
1828    unsigned divisor = V[0];
1829    unsigned remainder = 0;
1830    for (int i = m+n-1; i >= 0; i--) {
1831      uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i];
1832      if (partial_dividend == 0) {
1833        Q[i] = 0;
1834        remainder = 0;
1835      } else if (partial_dividend < divisor) {
1836        Q[i] = 0;
1837        remainder = (unsigned)partial_dividend;
1838      } else if (partial_dividend == divisor) {
1839        Q[i] = 1;
1840        remainder = 0;
1841      } else {
1842        Q[i] = (unsigned)(partial_dividend / divisor);
1843        remainder = (unsigned)(partial_dividend - (Q[i] * divisor));
1844      }
1845    }
1846    if (R)
1847      R[0] = remainder;
1848  } else {
1849    // Now we're ready to invoke the Knuth classical divide algorithm. In this
1850    // case n > 1.
1851    KnuthDiv(U, V, Q, R, m, n);
1852  }
1853
1854  // If the caller wants the quotient
1855  if (Quotient) {
1856    // Set up the Quotient value's memory.
1857    if (Quotient->BitWidth != LHS.BitWidth) {
1858      if (Quotient->isSingleWord())
1859        Quotient->VAL = 0;
1860      else
1861        delete [] Quotient->pVal;
1862      Quotient->BitWidth = LHS.BitWidth;
1863      if (!Quotient->isSingleWord())
1864        Quotient->pVal = getClearedMemory(Quotient->getNumWords());
1865    } else
1866      Quotient->clear();
1867
1868    // The quotient is in Q. Reconstitute the quotient into Quotient's low
1869    // order words.
1870    if (lhsWords == 1) {
1871      uint64_t tmp =
1872        uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
1873      if (Quotient->isSingleWord())
1874        Quotient->VAL = tmp;
1875      else
1876        Quotient->pVal[0] = tmp;
1877    } else {
1878      assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
1879      for (unsigned i = 0; i < lhsWords; ++i)
1880        Quotient->pVal[i] =
1881          uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1882    }
1883  }
1884
1885  // If the caller wants the remainder
1886  if (Remainder) {
1887    // Set up the Remainder value's memory.
1888    if (Remainder->BitWidth != RHS.BitWidth) {
1889      if (Remainder->isSingleWord())
1890        Remainder->VAL = 0;
1891      else
1892        delete [] Remainder->pVal;
1893      Remainder->BitWidth = RHS.BitWidth;
1894      if (!Remainder->isSingleWord())
1895        Remainder->pVal = getClearedMemory(Remainder->getNumWords());
1896    } else
1897      Remainder->clear();
1898
1899    // The remainder is in R. Reconstitute the remainder into Remainder's low
1900    // order words.
1901    if (rhsWords == 1) {
1902      uint64_t tmp =
1903        uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2));
1904      if (Remainder->isSingleWord())
1905        Remainder->VAL = tmp;
1906      else
1907        Remainder->pVal[0] = tmp;
1908    } else {
1909      assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
1910      for (unsigned i = 0; i < rhsWords; ++i)
1911        Remainder->pVal[i] =
1912          uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1913    }
1914  }
1915
1916  // Clean up the memory we allocated.
1917  if (U != &SPACE[0]) {
1918    delete [] U;
1919    delete [] V;
1920    delete [] Q;
1921    delete [] R;
1922  }
1923}
1924
1925APInt APInt::udiv(const APInt& RHS) const {
1926  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1927
1928  // First, deal with the easy case
1929  if (isSingleWord()) {
1930    assert(RHS.VAL != 0 && "Divide by zero?");
1931    return APInt(BitWidth, VAL / RHS.VAL);
1932  }
1933
1934  // Get some facts about the LHS and RHS number of bits and words
1935  unsigned rhsBits = RHS.getActiveBits();
1936  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1937  assert(rhsWords && "Divided by zero???");
1938  unsigned lhsBits = this->getActiveBits();
1939  unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1940
1941  // Deal with some degenerate cases
1942  if (!lhsWords)
1943    // 0 / X ===> 0
1944    return APInt(BitWidth, 0);
1945  else if (lhsWords < rhsWords || this->ult(RHS)) {
1946    // X / Y ===> 0, iff X < Y
1947    return APInt(BitWidth, 0);
1948  } else if (*this == RHS) {
1949    // X / X ===> 1
1950    return APInt(BitWidth, 1);
1951  } else if (lhsWords == 1 && rhsWords == 1) {
1952    // All high words are zero, just use native divide
1953    return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]);
1954  }
1955
1956  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1957  APInt Quotient(1,0); // to hold result.
1958  divide(*this, lhsWords, RHS, rhsWords, &Quotient, 0);
1959  return Quotient;
1960}
1961
1962APInt APInt::urem(const APInt& RHS) const {
1963  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1964  if (isSingleWord()) {
1965    assert(RHS.VAL != 0 && "Remainder by zero?");
1966    return APInt(BitWidth, VAL % RHS.VAL);
1967  }
1968
1969  // Get some facts about the LHS
1970  unsigned lhsBits = getActiveBits();
1971  unsigned lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
1972
1973  // Get some facts about the RHS
1974  unsigned rhsBits = RHS.getActiveBits();
1975  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1976  assert(rhsWords && "Performing remainder operation by zero ???");
1977
1978  // Check the degenerate cases
1979  if (lhsWords == 0) {
1980    // 0 % Y ===> 0
1981    return APInt(BitWidth, 0);
1982  } else if (lhsWords < rhsWords || this->ult(RHS)) {
1983    // X % Y ===> X, iff X < Y
1984    return *this;
1985  } else if (*this == RHS) {
1986    // X % X == 0;
1987    return APInt(BitWidth, 0);
1988  } else if (lhsWords == 1) {
1989    // All high words are zero, just use native remainder
1990    return APInt(BitWidth, pVal[0] % RHS.pVal[0]);
1991  }
1992
1993  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1994  APInt Remainder(1,0);
1995  divide(*this, lhsWords, RHS, rhsWords, 0, &Remainder);
1996  return Remainder;
1997}
1998
1999void APInt::udivrem(const APInt &LHS, const APInt &RHS,
2000                    APInt &Quotient, APInt &Remainder) {
2001  // Get some size facts about the dividend and divisor
2002  unsigned lhsBits  = LHS.getActiveBits();
2003  unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
2004  unsigned rhsBits  = RHS.getActiveBits();
2005  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
2006
2007  // Check the degenerate cases
2008  if (lhsWords == 0) {
2009    Quotient = 0;                // 0 / Y ===> 0
2010    Remainder = 0;               // 0 % Y ===> 0
2011    return;
2012  }
2013
2014  if (lhsWords < rhsWords || LHS.ult(RHS)) {
2015    Remainder = LHS;            // X % Y ===> X, iff X < Y
2016    Quotient = 0;               // X / Y ===> 0, iff X < Y
2017    return;
2018  }
2019
2020  if (LHS == RHS) {
2021    Quotient  = 1;              // X / X ===> 1
2022    Remainder = 0;              // X % X ===> 0;
2023    return;
2024  }
2025
2026  if (lhsWords == 1 && rhsWords == 1) {
2027    // There is only one word to consider so use the native versions.
2028    uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0];
2029    uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0];
2030    Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue);
2031    Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue);
2032    return;
2033  }
2034
2035  // Okay, lets do it the long way
2036  divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
2037}
2038
2039void APInt::fromString(unsigned numbits, const StringRef& str, uint8_t radix) {
2040  // Check our assumptions here
2041  assert(!str.empty() && "Invalid string length");
2042  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
2043         "Radix should be 2, 8, 10, or 16!");
2044
2045  StringRef::iterator p = str.begin();
2046  size_t slen = str.size();
2047  bool isNeg = *p == '-';
2048  if (*p == '-' || *p == '+') {
2049    p++;
2050    slen--;
2051    assert(slen && "String is only a sign, needs a value.");
2052  }
2053  assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2054  assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2055  assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2056  assert((((slen-1)*64)/22 <= numbits || radix != 10)
2057         && "Insufficient bit width");
2058
2059  // Allocate memory
2060  if (!isSingleWord())
2061    pVal = getClearedMemory(getNumWords());
2062
2063  // Figure out if we can shift instead of multiply
2064  unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2065
2066  // Set up an APInt for the digit to add outside the loop so we don't
2067  // constantly construct/destruct it.
2068  APInt apdigit(getBitWidth(), 0);
2069  APInt apradix(getBitWidth(), radix);
2070
2071  // Enter digit traversal loop
2072  for (StringRef::iterator e = str.end(); p != e; ++p) {
2073    unsigned digit = getDigit(*p, radix);
2074    assert(digit < radix && "Invalid character in digit string");
2075
2076    // Shift or multiply the value by the radix
2077    if (slen > 1) {
2078      if (shift)
2079        *this <<= shift;
2080      else
2081        *this *= apradix;
2082    }
2083
2084    // Add in the digit we just interpreted
2085    if (apdigit.isSingleWord())
2086      apdigit.VAL = digit;
2087    else
2088      apdigit.pVal[0] = digit;
2089    *this += apdigit;
2090  }
2091  // If its negative, put it in two's complement form
2092  if (isNeg) {
2093    (*this)--;
2094    this->flip();
2095  }
2096}
2097
2098void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2099                     bool Signed) const {
2100  assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2) &&
2101         "Radix should be 2, 8, 10, or 16!");
2102
2103  // First, check for a zero value and just short circuit the logic below.
2104  if (*this == 0) {
2105    Str.push_back('0');
2106    return;
2107  }
2108
2109  static const char Digits[] = "0123456789ABCDEF";
2110
2111  if (isSingleWord()) {
2112    char Buffer[65];
2113    char *BufPtr = Buffer+65;
2114
2115    uint64_t N;
2116    if (Signed) {
2117      int64_t I = getSExtValue();
2118      if (I < 0) {
2119        Str.push_back('-');
2120        I = -I;
2121      }
2122      N = I;
2123    } else {
2124      N = getZExtValue();
2125    }
2126
2127    while (N) {
2128      *--BufPtr = Digits[N % Radix];
2129      N /= Radix;
2130    }
2131    Str.append(BufPtr, Buffer+65);
2132    return;
2133  }
2134
2135  APInt Tmp(*this);
2136
2137  if (Signed && isNegative()) {
2138    // They want to print the signed version and it is a negative value
2139    // Flip the bits and add one to turn it into the equivalent positive
2140    // value and put a '-' in the result.
2141    Tmp.flip();
2142    Tmp++;
2143    Str.push_back('-');
2144  }
2145
2146  // We insert the digits backward, then reverse them to get the right order.
2147  unsigned StartDig = Str.size();
2148
2149  // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2150  // because the number of bits per digit (1, 3 and 4 respectively) divides
2151  // equaly.  We just shift until the value is zero.
2152  if (Radix != 10) {
2153    // Just shift tmp right for each digit width until it becomes zero
2154    unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2155    unsigned MaskAmt = Radix - 1;
2156
2157    while (Tmp != 0) {
2158      unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2159      Str.push_back(Digits[Digit]);
2160      Tmp = Tmp.lshr(ShiftAmt);
2161    }
2162  } else {
2163    APInt divisor(4, 10);
2164    while (Tmp != 0) {
2165      APInt APdigit(1, 0);
2166      APInt tmp2(Tmp.getBitWidth(), 0);
2167      divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2,
2168             &APdigit);
2169      unsigned Digit = (unsigned)APdigit.getZExtValue();
2170      assert(Digit < Radix && "divide failed");
2171      Str.push_back(Digits[Digit]);
2172      Tmp = tmp2;
2173    }
2174  }
2175
2176  // Reverse the digits before returning.
2177  std::reverse(Str.begin()+StartDig, Str.end());
2178}
2179
2180/// toString - This returns the APInt as a std::string.  Note that this is an
2181/// inefficient method.  It is better to pass in a SmallVector/SmallString
2182/// to the methods above.
2183std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2184  SmallString<40> S;
2185  toString(S, Radix, Signed);
2186  return S.str();
2187}
2188
2189
2190void APInt::dump() const {
2191  SmallString<40> S, U;
2192  this->toStringUnsigned(U);
2193  this->toStringSigned(S);
2194  errs() << "APInt(" << BitWidth << "b, "
2195         << U.str() << "u " << S.str() << "s)";
2196}
2197
2198void APInt::print(raw_ostream &OS, bool isSigned) const {
2199  SmallString<40> S;
2200  this->toString(S, 10, isSigned);
2201  OS << S.str();
2202}
2203
2204// This implements a variety of operations on a representation of
2205// arbitrary precision, two's-complement, bignum integer values.
2206
2207// Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2208// and unrestricting assumption.
2209#define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
2210COMPILE_TIME_ASSERT(integerPartWidth % 2 == 0);
2211
2212/* Some handy functions local to this file.  */
2213namespace {
2214
2215  /* Returns the integer part with the least significant BITS set.
2216     BITS cannot be zero.  */
2217  static inline integerPart
2218  lowBitMask(unsigned int bits)
2219  {
2220    assert (bits != 0 && bits <= integerPartWidth);
2221
2222    return ~(integerPart) 0 >> (integerPartWidth - bits);
2223  }
2224
2225  /* Returns the value of the lower half of PART.  */
2226  static inline integerPart
2227  lowHalf(integerPart part)
2228  {
2229    return part & lowBitMask(integerPartWidth / 2);
2230  }
2231
2232  /* Returns the value of the upper half of PART.  */
2233  static inline integerPart
2234  highHalf(integerPart part)
2235  {
2236    return part >> (integerPartWidth / 2);
2237  }
2238
2239  /* Returns the bit number of the most significant set bit of a part.
2240     If the input number has no bits set -1U is returned.  */
2241  static unsigned int
2242  partMSB(integerPart value)
2243  {
2244    unsigned int n, msb;
2245
2246    if (value == 0)
2247      return -1U;
2248
2249    n = integerPartWidth / 2;
2250
2251    msb = 0;
2252    do {
2253      if (value >> n) {
2254        value >>= n;
2255        msb += n;
2256      }
2257
2258      n >>= 1;
2259    } while (n);
2260
2261    return msb;
2262  }
2263
2264  /* Returns the bit number of the least significant set bit of a
2265     part.  If the input number has no bits set -1U is returned.  */
2266  static unsigned int
2267  partLSB(integerPart value)
2268  {
2269    unsigned int n, lsb;
2270
2271    if (value == 0)
2272      return -1U;
2273
2274    lsb = integerPartWidth - 1;
2275    n = integerPartWidth / 2;
2276
2277    do {
2278      if (value << n) {
2279        value <<= n;
2280        lsb -= n;
2281      }
2282
2283      n >>= 1;
2284    } while (n);
2285
2286    return lsb;
2287  }
2288}
2289
2290/* Sets the least significant part of a bignum to the input value, and
2291   zeroes out higher parts.  */
2292void
2293APInt::tcSet(integerPart *dst, integerPart part, unsigned int parts)
2294{
2295  unsigned int i;
2296
2297  assert (parts > 0);
2298
2299  dst[0] = part;
2300  for(i = 1; i < parts; i++)
2301    dst[i] = 0;
2302}
2303
2304/* Assign one bignum to another.  */
2305void
2306APInt::tcAssign(integerPart *dst, const integerPart *src, unsigned int parts)
2307{
2308  unsigned int i;
2309
2310  for(i = 0; i < parts; i++)
2311    dst[i] = src[i];
2312}
2313
2314/* Returns true if a bignum is zero, false otherwise.  */
2315bool
2316APInt::tcIsZero(const integerPart *src, unsigned int parts)
2317{
2318  unsigned int i;
2319
2320  for(i = 0; i < parts; i++)
2321    if (src[i])
2322      return false;
2323
2324  return true;
2325}
2326
2327/* Extract the given bit of a bignum; returns 0 or 1.  */
2328int
2329APInt::tcExtractBit(const integerPart *parts, unsigned int bit)
2330{
2331  return(parts[bit / integerPartWidth]
2332         & ((integerPart) 1 << bit % integerPartWidth)) != 0;
2333}
2334
2335/* Set the given bit of a bignum.  */
2336void
2337APInt::tcSetBit(integerPart *parts, unsigned int bit)
2338{
2339  parts[bit / integerPartWidth] |= (integerPart) 1 << (bit % integerPartWidth);
2340}
2341
2342/* Returns the bit number of the least significant set bit of a
2343   number.  If the input number has no bits set -1U is returned.  */
2344unsigned int
2345APInt::tcLSB(const integerPart *parts, unsigned int n)
2346{
2347  unsigned int i, lsb;
2348
2349  for(i = 0; i < n; i++) {
2350      if (parts[i] != 0) {
2351          lsb = partLSB(parts[i]);
2352
2353          return lsb + i * integerPartWidth;
2354      }
2355  }
2356
2357  return -1U;
2358}
2359
2360/* Returns the bit number of the most significant set bit of a number.
2361   If the input number has no bits set -1U is returned.  */
2362unsigned int
2363APInt::tcMSB(const integerPart *parts, unsigned int n)
2364{
2365  unsigned int msb;
2366
2367  do {
2368      --n;
2369
2370      if (parts[n] != 0) {
2371          msb = partMSB(parts[n]);
2372
2373          return msb + n * integerPartWidth;
2374      }
2375  } while (n);
2376
2377  return -1U;
2378}
2379
2380/* Copy the bit vector of width srcBITS from SRC, starting at bit
2381   srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2382   the least significant bit of DST.  All high bits above srcBITS in
2383   DST are zero-filled.  */
2384void
2385APInt::tcExtract(integerPart *dst, unsigned int dstCount,const integerPart *src,
2386                 unsigned int srcBits, unsigned int srcLSB)
2387{
2388  unsigned int firstSrcPart, dstParts, shift, n;
2389
2390  dstParts = (srcBits + integerPartWidth - 1) / integerPartWidth;
2391  assert (dstParts <= dstCount);
2392
2393  firstSrcPart = srcLSB / integerPartWidth;
2394  tcAssign (dst, src + firstSrcPart, dstParts);
2395
2396  shift = srcLSB % integerPartWidth;
2397  tcShiftRight (dst, dstParts, shift);
2398
2399  /* We now have (dstParts * integerPartWidth - shift) bits from SRC
2400     in DST.  If this is less that srcBits, append the rest, else
2401     clear the high bits.  */
2402  n = dstParts * integerPartWidth - shift;
2403  if (n < srcBits) {
2404    integerPart mask = lowBitMask (srcBits - n);
2405    dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2406                          << n % integerPartWidth);
2407  } else if (n > srcBits) {
2408    if (srcBits % integerPartWidth)
2409      dst[dstParts - 1] &= lowBitMask (srcBits % integerPartWidth);
2410  }
2411
2412  /* Clear high parts.  */
2413  while (dstParts < dstCount)
2414    dst[dstParts++] = 0;
2415}
2416
2417/* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2418integerPart
2419APInt::tcAdd(integerPart *dst, const integerPart *rhs,
2420             integerPart c, unsigned int parts)
2421{
2422  unsigned int i;
2423
2424  assert(c <= 1);
2425
2426  for(i = 0; i < parts; i++) {
2427    integerPart l;
2428
2429    l = dst[i];
2430    if (c) {
2431      dst[i] += rhs[i] + 1;
2432      c = (dst[i] <= l);
2433    } else {
2434      dst[i] += rhs[i];
2435      c = (dst[i] < l);
2436    }
2437  }
2438
2439  return c;
2440}
2441
2442/* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2443integerPart
2444APInt::tcSubtract(integerPart *dst, const integerPart *rhs,
2445                  integerPart c, unsigned int parts)
2446{
2447  unsigned int i;
2448
2449  assert(c <= 1);
2450
2451  for(i = 0; i < parts; i++) {
2452    integerPart l;
2453
2454    l = dst[i];
2455    if (c) {
2456      dst[i] -= rhs[i] + 1;
2457      c = (dst[i] >= l);
2458    } else {
2459      dst[i] -= rhs[i];
2460      c = (dst[i] > l);
2461    }
2462  }
2463
2464  return c;
2465}
2466
2467/* Negate a bignum in-place.  */
2468void
2469APInt::tcNegate(integerPart *dst, unsigned int parts)
2470{
2471  tcComplement(dst, parts);
2472  tcIncrement(dst, parts);
2473}
2474
2475/*  DST += SRC * MULTIPLIER + CARRY   if add is true
2476    DST  = SRC * MULTIPLIER + CARRY   if add is false
2477
2478    Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2479    they must start at the same point, i.e. DST == SRC.
2480
2481    If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2482    returned.  Otherwise DST is filled with the least significant
2483    DSTPARTS parts of the result, and if all of the omitted higher
2484    parts were zero return zero, otherwise overflow occurred and
2485    return one.  */
2486int
2487APInt::tcMultiplyPart(integerPart *dst, const integerPart *src,
2488                      integerPart multiplier, integerPart carry,
2489                      unsigned int srcParts, unsigned int dstParts,
2490                      bool add)
2491{
2492  unsigned int i, n;
2493
2494  /* Otherwise our writes of DST kill our later reads of SRC.  */
2495  assert(dst <= src || dst >= src + srcParts);
2496  assert(dstParts <= srcParts + 1);
2497
2498  /* N loops; minimum of dstParts and srcParts.  */
2499  n = dstParts < srcParts ? dstParts: srcParts;
2500
2501  for(i = 0; i < n; i++) {
2502    integerPart low, mid, high, srcPart;
2503
2504      /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2505
2506         This cannot overflow, because
2507
2508         (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2509
2510         which is less than n^2.  */
2511
2512    srcPart = src[i];
2513
2514    if (multiplier == 0 || srcPart == 0)        {
2515      low = carry;
2516      high = 0;
2517    } else {
2518      low = lowHalf(srcPart) * lowHalf(multiplier);
2519      high = highHalf(srcPart) * highHalf(multiplier);
2520
2521      mid = lowHalf(srcPart) * highHalf(multiplier);
2522      high += highHalf(mid);
2523      mid <<= integerPartWidth / 2;
2524      if (low + mid < low)
2525        high++;
2526      low += mid;
2527
2528      mid = highHalf(srcPart) * lowHalf(multiplier);
2529      high += highHalf(mid);
2530      mid <<= integerPartWidth / 2;
2531      if (low + mid < low)
2532        high++;
2533      low += mid;
2534
2535      /* Now add carry.  */
2536      if (low + carry < low)
2537        high++;
2538      low += carry;
2539    }
2540
2541    if (add) {
2542      /* And now DST[i], and store the new low part there.  */
2543      if (low + dst[i] < low)
2544        high++;
2545      dst[i] += low;
2546    } else
2547      dst[i] = low;
2548
2549    carry = high;
2550  }
2551
2552  if (i < dstParts) {
2553    /* Full multiplication, there is no overflow.  */
2554    assert(i + 1 == dstParts);
2555    dst[i] = carry;
2556    return 0;
2557  } else {
2558    /* We overflowed if there is carry.  */
2559    if (carry)
2560      return 1;
2561
2562    /* We would overflow if any significant unwritten parts would be
2563       non-zero.  This is true if any remaining src parts are non-zero
2564       and the multiplier is non-zero.  */
2565    if (multiplier)
2566      for(; i < srcParts; i++)
2567        if (src[i])
2568          return 1;
2569
2570    /* We fitted in the narrow destination.  */
2571    return 0;
2572  }
2573}
2574
2575/* DST = LHS * RHS, where DST has the same width as the operands and
2576   is filled with the least significant parts of the result.  Returns
2577   one if overflow occurred, otherwise zero.  DST must be disjoint
2578   from both operands.  */
2579int
2580APInt::tcMultiply(integerPart *dst, const integerPart *lhs,
2581                  const integerPart *rhs, unsigned int parts)
2582{
2583  unsigned int i;
2584  int overflow;
2585
2586  assert(dst != lhs && dst != rhs);
2587
2588  overflow = 0;
2589  tcSet(dst, 0, parts);
2590
2591  for(i = 0; i < parts; i++)
2592    overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2593                               parts - i, true);
2594
2595  return overflow;
2596}
2597
2598/* DST = LHS * RHS, where DST has width the sum of the widths of the
2599   operands.  No overflow occurs.  DST must be disjoint from both
2600   operands.  Returns the number of parts required to hold the
2601   result.  */
2602unsigned int
2603APInt::tcFullMultiply(integerPart *dst, const integerPart *lhs,
2604                      const integerPart *rhs, unsigned int lhsParts,
2605                      unsigned int rhsParts)
2606{
2607  /* Put the narrower number on the LHS for less loops below.  */
2608  if (lhsParts > rhsParts) {
2609    return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2610  } else {
2611    unsigned int n;
2612
2613    assert(dst != lhs && dst != rhs);
2614
2615    tcSet(dst, 0, rhsParts);
2616
2617    for(n = 0; n < lhsParts; n++)
2618      tcMultiplyPart(&dst[n], rhs, lhs[n], 0, rhsParts, rhsParts + 1, true);
2619
2620    n = lhsParts + rhsParts;
2621
2622    return n - (dst[n - 1] == 0);
2623  }
2624}
2625
2626/* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2627   Otherwise set LHS to LHS / RHS with the fractional part discarded,
2628   set REMAINDER to the remainder, return zero.  i.e.
2629
2630   OLD_LHS = RHS * LHS + REMAINDER
2631
2632   SCRATCH is a bignum of the same size as the operands and result for
2633   use by the routine; its contents need not be initialized and are
2634   destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2635*/
2636int
2637APInt::tcDivide(integerPart *lhs, const integerPart *rhs,
2638                integerPart *remainder, integerPart *srhs,
2639                unsigned int parts)
2640{
2641  unsigned int n, shiftCount;
2642  integerPart mask;
2643
2644  assert(lhs != remainder && lhs != srhs && remainder != srhs);
2645
2646  shiftCount = tcMSB(rhs, parts) + 1;
2647  if (shiftCount == 0)
2648    return true;
2649
2650  shiftCount = parts * integerPartWidth - shiftCount;
2651  n = shiftCount / integerPartWidth;
2652  mask = (integerPart) 1 << (shiftCount % integerPartWidth);
2653
2654  tcAssign(srhs, rhs, parts);
2655  tcShiftLeft(srhs, parts, shiftCount);
2656  tcAssign(remainder, lhs, parts);
2657  tcSet(lhs, 0, parts);
2658
2659  /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2660     the total.  */
2661  for(;;) {
2662      int compare;
2663
2664      compare = tcCompare(remainder, srhs, parts);
2665      if (compare >= 0) {
2666        tcSubtract(remainder, srhs, 0, parts);
2667        lhs[n] |= mask;
2668      }
2669
2670      if (shiftCount == 0)
2671        break;
2672      shiftCount--;
2673      tcShiftRight(srhs, parts, 1);
2674      if ((mask >>= 1) == 0)
2675        mask = (integerPart) 1 << (integerPartWidth - 1), n--;
2676  }
2677
2678  return false;
2679}
2680
2681/* Shift a bignum left COUNT bits in-place.  Shifted in bits are zero.
2682   There are no restrictions on COUNT.  */
2683void
2684APInt::tcShiftLeft(integerPart *dst, unsigned int parts, unsigned int count)
2685{
2686  if (count) {
2687    unsigned int jump, shift;
2688
2689    /* Jump is the inter-part jump; shift is is intra-part shift.  */
2690    jump = count / integerPartWidth;
2691    shift = count % integerPartWidth;
2692
2693    while (parts > jump) {
2694      integerPart part;
2695
2696      parts--;
2697
2698      /* dst[i] comes from the two parts src[i - jump] and, if we have
2699         an intra-part shift, src[i - jump - 1].  */
2700      part = dst[parts - jump];
2701      if (shift) {
2702        part <<= shift;
2703        if (parts >= jump + 1)
2704          part |= dst[parts - jump - 1] >> (integerPartWidth - shift);
2705      }
2706
2707      dst[parts] = part;
2708    }
2709
2710    while (parts > 0)
2711      dst[--parts] = 0;
2712  }
2713}
2714
2715/* Shift a bignum right COUNT bits in-place.  Shifted in bits are
2716   zero.  There are no restrictions on COUNT.  */
2717void
2718APInt::tcShiftRight(integerPart *dst, unsigned int parts, unsigned int count)
2719{
2720  if (count) {
2721    unsigned int i, jump, shift;
2722
2723    /* Jump is the inter-part jump; shift is is intra-part shift.  */
2724    jump = count / integerPartWidth;
2725    shift = count % integerPartWidth;
2726
2727    /* Perform the shift.  This leaves the most significant COUNT bits
2728       of the result at zero.  */
2729    for(i = 0; i < parts; i++) {
2730      integerPart part;
2731
2732      if (i + jump >= parts) {
2733        part = 0;
2734      } else {
2735        part = dst[i + jump];
2736        if (shift) {
2737          part >>= shift;
2738          if (i + jump + 1 < parts)
2739            part |= dst[i + jump + 1] << (integerPartWidth - shift);
2740        }
2741      }
2742
2743      dst[i] = part;
2744    }
2745  }
2746}
2747
2748/* Bitwise and of two bignums.  */
2749void
2750APInt::tcAnd(integerPart *dst, const integerPart *rhs, unsigned int parts)
2751{
2752  unsigned int i;
2753
2754  for(i = 0; i < parts; i++)
2755    dst[i] &= rhs[i];
2756}
2757
2758/* Bitwise inclusive or of two bignums.  */
2759void
2760APInt::tcOr(integerPart *dst, const integerPart *rhs, unsigned int parts)
2761{
2762  unsigned int i;
2763
2764  for(i = 0; i < parts; i++)
2765    dst[i] |= rhs[i];
2766}
2767
2768/* Bitwise exclusive or of two bignums.  */
2769void
2770APInt::tcXor(integerPart *dst, const integerPart *rhs, unsigned int parts)
2771{
2772  unsigned int i;
2773
2774  for(i = 0; i < parts; i++)
2775    dst[i] ^= rhs[i];
2776}
2777
2778/* Complement a bignum in-place.  */
2779void
2780APInt::tcComplement(integerPart *dst, unsigned int parts)
2781{
2782  unsigned int i;
2783
2784  for(i = 0; i < parts; i++)
2785    dst[i] = ~dst[i];
2786}
2787
2788/* Comparison (unsigned) of two bignums.  */
2789int
2790APInt::tcCompare(const integerPart *lhs, const integerPart *rhs,
2791                 unsigned int parts)
2792{
2793  while (parts) {
2794      parts--;
2795      if (lhs[parts] == rhs[parts])
2796        continue;
2797
2798      if (lhs[parts] > rhs[parts])
2799        return 1;
2800      else
2801        return -1;
2802    }
2803
2804  return 0;
2805}
2806
2807/* Increment a bignum in-place, return the carry flag.  */
2808integerPart
2809APInt::tcIncrement(integerPart *dst, unsigned int parts)
2810{
2811  unsigned int i;
2812
2813  for(i = 0; i < parts; i++)
2814    if (++dst[i] != 0)
2815      break;
2816
2817  return i == parts;
2818}
2819
2820/* Set the least significant BITS bits of a bignum, clear the
2821   rest.  */
2822void
2823APInt::tcSetLeastSignificantBits(integerPart *dst, unsigned int parts,
2824                                 unsigned int bits)
2825{
2826  unsigned int i;
2827
2828  i = 0;
2829  while (bits > integerPartWidth) {
2830    dst[i++] = ~(integerPart) 0;
2831    bits -= integerPartWidth;
2832  }
2833
2834  if (bits)
2835    dst[i++] = ~(integerPart) 0 >> (integerPartWidth - bits);
2836
2837  while (i < parts)
2838    dst[i++] = 0;
2839}
2840