APInt.cpp revision 212904
1//===-- APInt.cpp - Implement APInt class ---------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements a class to represent arbitrary precision integer
11// constant values and provide a variety of arithmetic operations on them.
12//
13//===----------------------------------------------------------------------===//
14
15#define DEBUG_TYPE "apint"
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/StringRef.h"
18#include "llvm/ADT/FoldingSet.h"
19#include "llvm/ADT/SmallString.h"
20#include "llvm/Support/Debug.h"
21#include "llvm/Support/ErrorHandling.h"
22#include "llvm/Support/MathExtras.h"
23#include "llvm/Support/raw_ostream.h"
24#include <cmath>
25#include <limits>
26#include <cstring>
27#include <cstdlib>
28using namespace llvm;
29
30/// A utility function for allocating memory, checking for allocation failures,
31/// and ensuring the contents are zeroed.
32inline static uint64_t* getClearedMemory(unsigned numWords) {
33  uint64_t * result = new uint64_t[numWords];
34  assert(result && "APInt memory allocation fails!");
35  memset(result, 0, numWords * sizeof(uint64_t));
36  return result;
37}
38
39/// A utility function for allocating memory and checking for allocation
40/// failure.  The content is not zeroed.
41inline static uint64_t* getMemory(unsigned numWords) {
42  uint64_t * result = new uint64_t[numWords];
43  assert(result && "APInt memory allocation fails!");
44  return result;
45}
46
47/// A utility function that converts a character to a digit.
48inline static unsigned getDigit(char cdigit, uint8_t radix) {
49  unsigned r;
50
51  if (radix == 16) {
52    r = cdigit - '0';
53    if (r <= 9)
54      return r;
55
56    r = cdigit - 'A';
57    if (r <= 5)
58      return r + 10;
59
60    r = cdigit - 'a';
61    if (r <= 5)
62      return r + 10;
63  }
64
65  r = cdigit - '0';
66  if (r < radix)
67    return r;
68
69  return -1U;
70}
71
72
73void APInt::initSlowCase(unsigned numBits, uint64_t val, bool isSigned) {
74  pVal = getClearedMemory(getNumWords());
75  pVal[0] = val;
76  if (isSigned && int64_t(val) < 0)
77    for (unsigned i = 1; i < getNumWords(); ++i)
78      pVal[i] = -1ULL;
79}
80
81void APInt::initSlowCase(const APInt& that) {
82  pVal = getMemory(getNumWords());
83  memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE);
84}
85
86
87APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
88  : BitWidth(numBits), VAL(0) {
89  assert(BitWidth && "Bitwidth too small");
90  assert(bigVal && "Null pointer detected!");
91  if (isSingleWord())
92    VAL = bigVal[0];
93  else {
94    // Get memory, cleared to 0
95    pVal = getClearedMemory(getNumWords());
96    // Calculate the number of words to copy
97    unsigned words = std::min<unsigned>(numWords, getNumWords());
98    // Copy the words from bigVal to pVal
99    memcpy(pVal, bigVal, words * APINT_WORD_SIZE);
100  }
101  // Make sure unused high bits are cleared
102  clearUnusedBits();
103}
104
105APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
106  : BitWidth(numbits), VAL(0) {
107  assert(BitWidth && "Bitwidth too small");
108  fromString(numbits, Str, radix);
109}
110
111APInt& APInt::AssignSlowCase(const APInt& RHS) {
112  // Don't do anything for X = X
113  if (this == &RHS)
114    return *this;
115
116  if (BitWidth == RHS.getBitWidth()) {
117    // assume same bit-width single-word case is already handled
118    assert(!isSingleWord());
119    memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE);
120    return *this;
121  }
122
123  if (isSingleWord()) {
124    // assume case where both are single words is already handled
125    assert(!RHS.isSingleWord());
126    VAL = 0;
127    pVal = getMemory(RHS.getNumWords());
128    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
129  } else if (getNumWords() == RHS.getNumWords())
130    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
131  else if (RHS.isSingleWord()) {
132    delete [] pVal;
133    VAL = RHS.VAL;
134  } else {
135    delete [] pVal;
136    pVal = getMemory(RHS.getNumWords());
137    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
138  }
139  BitWidth = RHS.BitWidth;
140  return clearUnusedBits();
141}
142
143APInt& APInt::operator=(uint64_t RHS) {
144  if (isSingleWord())
145    VAL = RHS;
146  else {
147    pVal[0] = RHS;
148    memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
149  }
150  return clearUnusedBits();
151}
152
153/// Profile - This method 'profiles' an APInt for use with FoldingSet.
154void APInt::Profile(FoldingSetNodeID& ID) const {
155  ID.AddInteger(BitWidth);
156
157  if (isSingleWord()) {
158    ID.AddInteger(VAL);
159    return;
160  }
161
162  unsigned NumWords = getNumWords();
163  for (unsigned i = 0; i < NumWords; ++i)
164    ID.AddInteger(pVal[i]);
165}
166
167/// add_1 - This function adds a single "digit" integer, y, to the multiple
168/// "digit" integer array,  x[]. x[] is modified to reflect the addition and
169/// 1 is returned if there is a carry out, otherwise 0 is returned.
170/// @returns the carry of the addition.
171static bool add_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
172  for (unsigned i = 0; i < len; ++i) {
173    dest[i] = y + x[i];
174    if (dest[i] < y)
175      y = 1; // Carry one to next digit.
176    else {
177      y = 0; // No need to carry so exit early
178      break;
179    }
180  }
181  return y;
182}
183
184/// @brief Prefix increment operator. Increments the APInt by one.
185APInt& APInt::operator++() {
186  if (isSingleWord())
187    ++VAL;
188  else
189    add_1(pVal, pVal, getNumWords(), 1);
190  return clearUnusedBits();
191}
192
193/// sub_1 - This function subtracts a single "digit" (64-bit word), y, from
194/// the multi-digit integer array, x[], propagating the borrowed 1 value until
195/// no further borrowing is neeeded or it runs out of "digits" in x.  The result
196/// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
197/// In other words, if y > x then this function returns 1, otherwise 0.
198/// @returns the borrow out of the subtraction
199static bool sub_1(uint64_t x[], unsigned len, uint64_t y) {
200  for (unsigned i = 0; i < len; ++i) {
201    uint64_t X = x[i];
202    x[i] -= y;
203    if (y > X)
204      y = 1;  // We have to "borrow 1" from next "digit"
205    else {
206      y = 0;  // No need to borrow
207      break;  // Remaining digits are unchanged so exit early
208    }
209  }
210  return bool(y);
211}
212
213/// @brief Prefix decrement operator. Decrements the APInt by one.
214APInt& APInt::operator--() {
215  if (isSingleWord())
216    --VAL;
217  else
218    sub_1(pVal, getNumWords(), 1);
219  return clearUnusedBits();
220}
221
222/// add - This function adds the integer array x to the integer array Y and
223/// places the result in dest.
224/// @returns the carry out from the addition
225/// @brief General addition of 64-bit integer arrays
226static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y,
227                unsigned len) {
228  bool carry = false;
229  for (unsigned i = 0; i< len; ++i) {
230    uint64_t limit = std::min(x[i],y[i]); // must come first in case dest == x
231    dest[i] = x[i] + y[i] + carry;
232    carry = dest[i] < limit || (carry && dest[i] == limit);
233  }
234  return carry;
235}
236
237/// Adds the RHS APint to this APInt.
238/// @returns this, after addition of RHS.
239/// @brief Addition assignment operator.
240APInt& APInt::operator+=(const APInt& RHS) {
241  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
242  if (isSingleWord())
243    VAL += RHS.VAL;
244  else {
245    add(pVal, pVal, RHS.pVal, getNumWords());
246  }
247  return clearUnusedBits();
248}
249
250/// Subtracts the integer array y from the integer array x
251/// @returns returns the borrow out.
252/// @brief Generalized subtraction of 64-bit integer arrays.
253static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y,
254                unsigned len) {
255  bool borrow = false;
256  for (unsigned i = 0; i < len; ++i) {
257    uint64_t x_tmp = borrow ? x[i] - 1 : x[i];
258    borrow = y[i] > x_tmp || (borrow && x[i] == 0);
259    dest[i] = x_tmp - y[i];
260  }
261  return borrow;
262}
263
264/// Subtracts the RHS APInt from this APInt
265/// @returns this, after subtraction
266/// @brief Subtraction assignment operator.
267APInt& APInt::operator-=(const APInt& RHS) {
268  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
269  if (isSingleWord())
270    VAL -= RHS.VAL;
271  else
272    sub(pVal, pVal, RHS.pVal, getNumWords());
273  return clearUnusedBits();
274}
275
276/// Multiplies an integer array, x, by a uint64_t integer and places the result
277/// into dest.
278/// @returns the carry out of the multiplication.
279/// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer.
280static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
281  // Split y into high 32-bit part (hy)  and low 32-bit part (ly)
282  uint64_t ly = y & 0xffffffffULL, hy = y >> 32;
283  uint64_t carry = 0;
284
285  // For each digit of x.
286  for (unsigned i = 0; i < len; ++i) {
287    // Split x into high and low words
288    uint64_t lx = x[i] & 0xffffffffULL;
289    uint64_t hx = x[i] >> 32;
290    // hasCarry - A flag to indicate if there is a carry to the next digit.
291    // hasCarry == 0, no carry
292    // hasCarry == 1, has carry
293    // hasCarry == 2, no carry and the calculation result == 0.
294    uint8_t hasCarry = 0;
295    dest[i] = carry + lx * ly;
296    // Determine if the add above introduces carry.
297    hasCarry = (dest[i] < carry) ? 1 : 0;
298    carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0);
299    // The upper limit of carry can be (2^32 - 1)(2^32 - 1) +
300    // (2^32 - 1) + 2^32 = 2^64.
301    hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
302
303    carry += (lx * hy) & 0xffffffffULL;
304    dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL);
305    carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) +
306            (carry >> 32) + ((lx * hy) >> 32) + hx * hy;
307  }
308  return carry;
309}
310
311/// Multiplies integer array x by integer array y and stores the result into
312/// the integer array dest. Note that dest's size must be >= xlen + ylen.
313/// @brief Generalized multiplicate of integer arrays.
314static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[],
315                unsigned ylen) {
316  dest[xlen] = mul_1(dest, x, xlen, y[0]);
317  for (unsigned i = 1; i < ylen; ++i) {
318    uint64_t ly = y[i] & 0xffffffffULL, hy = y[i] >> 32;
319    uint64_t carry = 0, lx = 0, hx = 0;
320    for (unsigned j = 0; j < xlen; ++j) {
321      lx = x[j] & 0xffffffffULL;
322      hx = x[j] >> 32;
323      // hasCarry - A flag to indicate if has carry.
324      // hasCarry == 0, no carry
325      // hasCarry == 1, has carry
326      // hasCarry == 2, no carry and the calculation result == 0.
327      uint8_t hasCarry = 0;
328      uint64_t resul = carry + lx * ly;
329      hasCarry = (resul < carry) ? 1 : 0;
330      carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + (resul >> 32);
331      hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
332
333      carry += (lx * hy) & 0xffffffffULL;
334      resul = (carry << 32) | (resul & 0xffffffffULL);
335      dest[i+j] += resul;
336      carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+
337              (carry >> 32) + (dest[i+j] < resul ? 1 : 0) +
338              ((lx * hy) >> 32) + hx * hy;
339    }
340    dest[i+xlen] = carry;
341  }
342}
343
344APInt& APInt::operator*=(const APInt& RHS) {
345  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
346  if (isSingleWord()) {
347    VAL *= RHS.VAL;
348    clearUnusedBits();
349    return *this;
350  }
351
352  // Get some bit facts about LHS and check for zero
353  unsigned lhsBits = getActiveBits();
354  unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
355  if (!lhsWords)
356    // 0 * X ===> 0
357    return *this;
358
359  // Get some bit facts about RHS and check for zero
360  unsigned rhsBits = RHS.getActiveBits();
361  unsigned rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
362  if (!rhsWords) {
363    // X * 0 ===> 0
364    clear();
365    return *this;
366  }
367
368  // Allocate space for the result
369  unsigned destWords = rhsWords + lhsWords;
370  uint64_t *dest = getMemory(destWords);
371
372  // Perform the long multiply
373  mul(dest, pVal, lhsWords, RHS.pVal, rhsWords);
374
375  // Copy result back into *this
376  clear();
377  unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords;
378  memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE);
379
380  // delete dest array and return
381  delete[] dest;
382  return *this;
383}
384
385APInt& APInt::operator&=(const APInt& RHS) {
386  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
387  if (isSingleWord()) {
388    VAL &= RHS.VAL;
389    return *this;
390  }
391  unsigned numWords = getNumWords();
392  for (unsigned i = 0; i < numWords; ++i)
393    pVal[i] &= RHS.pVal[i];
394  return *this;
395}
396
397APInt& APInt::operator|=(const APInt& RHS) {
398  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
399  if (isSingleWord()) {
400    VAL |= RHS.VAL;
401    return *this;
402  }
403  unsigned numWords = getNumWords();
404  for (unsigned i = 0; i < numWords; ++i)
405    pVal[i] |= RHS.pVal[i];
406  return *this;
407}
408
409APInt& APInt::operator^=(const APInt& RHS) {
410  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
411  if (isSingleWord()) {
412    VAL ^= RHS.VAL;
413    this->clearUnusedBits();
414    return *this;
415  }
416  unsigned numWords = getNumWords();
417  for (unsigned i = 0; i < numWords; ++i)
418    pVal[i] ^= RHS.pVal[i];
419  return clearUnusedBits();
420}
421
422APInt APInt::AndSlowCase(const APInt& RHS) const {
423  unsigned numWords = getNumWords();
424  uint64_t* val = getMemory(numWords);
425  for (unsigned i = 0; i < numWords; ++i)
426    val[i] = pVal[i] & RHS.pVal[i];
427  return APInt(val, getBitWidth());
428}
429
430APInt APInt::OrSlowCase(const APInt& RHS) const {
431  unsigned numWords = getNumWords();
432  uint64_t *val = getMemory(numWords);
433  for (unsigned i = 0; i < numWords; ++i)
434    val[i] = pVal[i] | RHS.pVal[i];
435  return APInt(val, getBitWidth());
436}
437
438APInt APInt::XorSlowCase(const APInt& RHS) const {
439  unsigned numWords = getNumWords();
440  uint64_t *val = getMemory(numWords);
441  for (unsigned i = 0; i < numWords; ++i)
442    val[i] = pVal[i] ^ RHS.pVal[i];
443
444  // 0^0==1 so clear the high bits in case they got set.
445  return APInt(val, getBitWidth()).clearUnusedBits();
446}
447
448bool APInt::operator !() const {
449  if (isSingleWord())
450    return !VAL;
451
452  for (unsigned i = 0; i < getNumWords(); ++i)
453    if (pVal[i])
454      return false;
455  return true;
456}
457
458APInt APInt::operator*(const APInt& RHS) const {
459  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
460  if (isSingleWord())
461    return APInt(BitWidth, VAL * RHS.VAL);
462  APInt Result(*this);
463  Result *= RHS;
464  return Result.clearUnusedBits();
465}
466
467APInt APInt::operator+(const APInt& RHS) const {
468  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
469  if (isSingleWord())
470    return APInt(BitWidth, VAL + RHS.VAL);
471  APInt Result(BitWidth, 0);
472  add(Result.pVal, this->pVal, RHS.pVal, getNumWords());
473  return Result.clearUnusedBits();
474}
475
476APInt APInt::operator-(const APInt& RHS) const {
477  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
478  if (isSingleWord())
479    return APInt(BitWidth, VAL - RHS.VAL);
480  APInt Result(BitWidth, 0);
481  sub(Result.pVal, this->pVal, RHS.pVal, getNumWords());
482  return Result.clearUnusedBits();
483}
484
485bool APInt::operator[](unsigned bitPosition) const {
486  return (maskBit(bitPosition) &
487          (isSingleWord() ?  VAL : pVal[whichWord(bitPosition)])) != 0;
488}
489
490bool APInt::EqualSlowCase(const APInt& RHS) const {
491  // Get some facts about the number of bits used in the two operands.
492  unsigned n1 = getActiveBits();
493  unsigned n2 = RHS.getActiveBits();
494
495  // If the number of bits isn't the same, they aren't equal
496  if (n1 != n2)
497    return false;
498
499  // If the number of bits fits in a word, we only need to compare the low word.
500  if (n1 <= APINT_BITS_PER_WORD)
501    return pVal[0] == RHS.pVal[0];
502
503  // Otherwise, compare everything
504  for (int i = whichWord(n1 - 1); i >= 0; --i)
505    if (pVal[i] != RHS.pVal[i])
506      return false;
507  return true;
508}
509
510bool APInt::EqualSlowCase(uint64_t Val) const {
511  unsigned n = getActiveBits();
512  if (n <= APINT_BITS_PER_WORD)
513    return pVal[0] == Val;
514  else
515    return false;
516}
517
518bool APInt::ult(const APInt& RHS) const {
519  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
520  if (isSingleWord())
521    return VAL < RHS.VAL;
522
523  // Get active bit length of both operands
524  unsigned n1 = getActiveBits();
525  unsigned n2 = RHS.getActiveBits();
526
527  // If magnitude of LHS is less than RHS, return true.
528  if (n1 < n2)
529    return true;
530
531  // If magnitude of RHS is greather than LHS, return false.
532  if (n2 < n1)
533    return false;
534
535  // If they bot fit in a word, just compare the low order word
536  if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD)
537    return pVal[0] < RHS.pVal[0];
538
539  // Otherwise, compare all words
540  unsigned topWord = whichWord(std::max(n1,n2)-1);
541  for (int i = topWord; i >= 0; --i) {
542    if (pVal[i] > RHS.pVal[i])
543      return false;
544    if (pVal[i] < RHS.pVal[i])
545      return true;
546  }
547  return false;
548}
549
550bool APInt::slt(const APInt& RHS) const {
551  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
552  if (isSingleWord()) {
553    int64_t lhsSext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth);
554    int64_t rhsSext = (int64_t(RHS.VAL) << (64-BitWidth)) >> (64-BitWidth);
555    return lhsSext < rhsSext;
556  }
557
558  APInt lhs(*this);
559  APInt rhs(RHS);
560  bool lhsNeg = isNegative();
561  bool rhsNeg = rhs.isNegative();
562  if (lhsNeg) {
563    // Sign bit is set so perform two's complement to make it positive
564    lhs.flip();
565    lhs++;
566  }
567  if (rhsNeg) {
568    // Sign bit is set so perform two's complement to make it positive
569    rhs.flip();
570    rhs++;
571  }
572
573  // Now we have unsigned values to compare so do the comparison if necessary
574  // based on the negativeness of the values.
575  if (lhsNeg)
576    if (rhsNeg)
577      return lhs.ugt(rhs);
578    else
579      return true;
580  else if (rhsNeg)
581    return false;
582  else
583    return lhs.ult(rhs);
584}
585
586APInt& APInt::set(unsigned bitPosition) {
587  if (isSingleWord())
588    VAL |= maskBit(bitPosition);
589  else
590    pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
591  return *this;
592}
593
594/// Set the given bit to 0 whose position is given as "bitPosition".
595/// @brief Set a given bit to 0.
596APInt& APInt::clear(unsigned bitPosition) {
597  if (isSingleWord())
598    VAL &= ~maskBit(bitPosition);
599  else
600    pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
601  return *this;
602}
603
604/// @brief Toggle every bit to its opposite value.
605
606/// Toggle a given bit to its opposite value whose position is given
607/// as "bitPosition".
608/// @brief Toggles a given bit to its opposite value.
609APInt& APInt::flip(unsigned bitPosition) {
610  assert(bitPosition < BitWidth && "Out of the bit-width range!");
611  if ((*this)[bitPosition]) clear(bitPosition);
612  else set(bitPosition);
613  return *this;
614}
615
616unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
617  assert(!str.empty() && "Invalid string length");
618  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
619         "Radix should be 2, 8, 10, or 16!");
620
621  size_t slen = str.size();
622
623  // Each computation below needs to know if it's negative.
624  StringRef::iterator p = str.begin();
625  unsigned isNegative = *p == '-';
626  if (*p == '-' || *p == '+') {
627    p++;
628    slen--;
629    assert(slen && "String is only a sign, needs a value.");
630  }
631
632  // For radixes of power-of-two values, the bits required is accurately and
633  // easily computed
634  if (radix == 2)
635    return slen + isNegative;
636  if (radix == 8)
637    return slen * 3 + isNegative;
638  if (radix == 16)
639    return slen * 4 + isNegative;
640
641  // This is grossly inefficient but accurate. We could probably do something
642  // with a computation of roughly slen*64/20 and then adjust by the value of
643  // the first few digits. But, I'm not sure how accurate that could be.
644
645  // Compute a sufficient number of bits that is always large enough but might
646  // be too large. This avoids the assertion in the constructor. This
647  // calculation doesn't work appropriately for the numbers 0-9, so just use 4
648  // bits in that case.
649  unsigned sufficient = slen == 1 ? 4 : slen * 64/18;
650
651  // Convert to the actual binary value.
652  APInt tmp(sufficient, StringRef(p, slen), radix);
653
654  // Compute how many bits are required. If the log is infinite, assume we need
655  // just bit.
656  unsigned log = tmp.logBase2();
657  if (log == (unsigned)-1) {
658    return isNegative + 1;
659  } else {
660    return isNegative + log + 1;
661  }
662}
663
664// From http://www.burtleburtle.net, byBob Jenkins.
665// When targeting x86, both GCC and LLVM seem to recognize this as a
666// rotate instruction.
667#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
668
669// From http://www.burtleburtle.net, by Bob Jenkins.
670#define mix(a,b,c) \
671  { \
672    a -= c;  a ^= rot(c, 4);  c += b; \
673    b -= a;  b ^= rot(a, 6);  a += c; \
674    c -= b;  c ^= rot(b, 8);  b += a; \
675    a -= c;  a ^= rot(c,16);  c += b; \
676    b -= a;  b ^= rot(a,19);  a += c; \
677    c -= b;  c ^= rot(b, 4);  b += a; \
678  }
679
680// From http://www.burtleburtle.net, by Bob Jenkins.
681#define final(a,b,c) \
682  { \
683    c ^= b; c -= rot(b,14); \
684    a ^= c; a -= rot(c,11); \
685    b ^= a; b -= rot(a,25); \
686    c ^= b; c -= rot(b,16); \
687    a ^= c; a -= rot(c,4);  \
688    b ^= a; b -= rot(a,14); \
689    c ^= b; c -= rot(b,24); \
690  }
691
692// hashword() was adapted from http://www.burtleburtle.net, by Bob
693// Jenkins.  k is a pointer to an array of uint32_t values; length is
694// the length of the key, in 32-bit chunks.  This version only handles
695// keys that are a multiple of 32 bits in size.
696static inline uint32_t hashword(const uint64_t *k64, size_t length)
697{
698  const uint32_t *k = reinterpret_cast<const uint32_t *>(k64);
699  uint32_t a,b,c;
700
701  /* Set up the internal state */
702  a = b = c = 0xdeadbeef + (((uint32_t)length)<<2);
703
704  /*------------------------------------------------- handle most of the key */
705  while (length > 3) {
706    a += k[0];
707    b += k[1];
708    c += k[2];
709    mix(a,b,c);
710    length -= 3;
711    k += 3;
712  }
713
714  /*------------------------------------------- handle the last 3 uint32_t's */
715  switch (length) {                  /* all the case statements fall through */
716  case 3 : c+=k[2];
717  case 2 : b+=k[1];
718  case 1 : a+=k[0];
719    final(a,b,c);
720    case 0:     /* case 0: nothing left to add */
721      break;
722    }
723  /*------------------------------------------------------ report the result */
724  return c;
725}
726
727// hashword8() was adapted from http://www.burtleburtle.net, by Bob
728// Jenkins.  This computes a 32-bit hash from one 64-bit word.  When
729// targeting x86 (32 or 64 bit), both LLVM and GCC compile this
730// function into about 35 instructions when inlined.
731static inline uint32_t hashword8(const uint64_t k64)
732{
733  uint32_t a,b,c;
734  a = b = c = 0xdeadbeef + 4;
735  b += k64 >> 32;
736  a += k64 & 0xffffffff;
737  final(a,b,c);
738  return c;
739}
740#undef final
741#undef mix
742#undef rot
743
744uint64_t APInt::getHashValue() const {
745  uint64_t hash;
746  if (isSingleWord())
747    hash = hashword8(VAL);
748  else
749    hash = hashword(pVal, getNumWords()*2);
750  return hash;
751}
752
753/// HiBits - This function returns the high "numBits" bits of this APInt.
754APInt APInt::getHiBits(unsigned numBits) const {
755  return APIntOps::lshr(*this, BitWidth - numBits);
756}
757
758/// LoBits - This function returns the low "numBits" bits of this APInt.
759APInt APInt::getLoBits(unsigned numBits) const {
760  return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits),
761                        BitWidth - numBits);
762}
763
764bool APInt::isPowerOf2() const {
765  return (!!*this) && !(*this & (*this - APInt(BitWidth,1)));
766}
767
768unsigned APInt::countLeadingZerosSlowCase() const {
769  // Treat the most significand word differently because it might have
770  // meaningless bits set beyond the precision.
771  unsigned BitsInMSW = BitWidth % APINT_BITS_PER_WORD;
772  integerPart MSWMask;
773  if (BitsInMSW) MSWMask = (integerPart(1) << BitsInMSW) - 1;
774  else {
775    MSWMask = ~integerPart(0);
776    BitsInMSW = APINT_BITS_PER_WORD;
777  }
778
779  unsigned i = getNumWords();
780  integerPart MSW = pVal[i-1] & MSWMask;
781  if (MSW)
782    return CountLeadingZeros_64(MSW) - (APINT_BITS_PER_WORD - BitsInMSW);
783
784  unsigned Count = BitsInMSW;
785  for (--i; i > 0u; --i) {
786    if (pVal[i-1] == 0)
787      Count += APINT_BITS_PER_WORD;
788    else {
789      Count += CountLeadingZeros_64(pVal[i-1]);
790      break;
791    }
792  }
793  return Count;
794}
795
796static unsigned countLeadingOnes_64(uint64_t V, unsigned skip) {
797  unsigned Count = 0;
798  if (skip)
799    V <<= skip;
800  while (V && (V & (1ULL << 63))) {
801    Count++;
802    V <<= 1;
803  }
804  return Count;
805}
806
807unsigned APInt::countLeadingOnes() const {
808  if (isSingleWord())
809    return countLeadingOnes_64(VAL, APINT_BITS_PER_WORD - BitWidth);
810
811  unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
812  unsigned shift;
813  if (!highWordBits) {
814    highWordBits = APINT_BITS_PER_WORD;
815    shift = 0;
816  } else {
817    shift = APINT_BITS_PER_WORD - highWordBits;
818  }
819  int i = getNumWords() - 1;
820  unsigned Count = countLeadingOnes_64(pVal[i], shift);
821  if (Count == highWordBits) {
822    for (i--; i >= 0; --i) {
823      if (pVal[i] == -1ULL)
824        Count += APINT_BITS_PER_WORD;
825      else {
826        Count += countLeadingOnes_64(pVal[i], 0);
827        break;
828      }
829    }
830  }
831  return Count;
832}
833
834unsigned APInt::countTrailingZeros() const {
835  if (isSingleWord())
836    return std::min(unsigned(CountTrailingZeros_64(VAL)), BitWidth);
837  unsigned Count = 0;
838  unsigned i = 0;
839  for (; i < getNumWords() && pVal[i] == 0; ++i)
840    Count += APINT_BITS_PER_WORD;
841  if (i < getNumWords())
842    Count += CountTrailingZeros_64(pVal[i]);
843  return std::min(Count, BitWidth);
844}
845
846unsigned APInt::countTrailingOnesSlowCase() const {
847  unsigned Count = 0;
848  unsigned i = 0;
849  for (; i < getNumWords() && pVal[i] == -1ULL; ++i)
850    Count += APINT_BITS_PER_WORD;
851  if (i < getNumWords())
852    Count += CountTrailingOnes_64(pVal[i]);
853  return std::min(Count, BitWidth);
854}
855
856unsigned APInt::countPopulationSlowCase() const {
857  unsigned Count = 0;
858  for (unsigned i = 0; i < getNumWords(); ++i)
859    Count += CountPopulation_64(pVal[i]);
860  return Count;
861}
862
863APInt APInt::byteSwap() const {
864  assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
865  if (BitWidth == 16)
866    return APInt(BitWidth, ByteSwap_16(uint16_t(VAL)));
867  else if (BitWidth == 32)
868    return APInt(BitWidth, ByteSwap_32(unsigned(VAL)));
869  else if (BitWidth == 48) {
870    unsigned Tmp1 = unsigned(VAL >> 16);
871    Tmp1 = ByteSwap_32(Tmp1);
872    uint16_t Tmp2 = uint16_t(VAL);
873    Tmp2 = ByteSwap_16(Tmp2);
874    return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
875  } else if (BitWidth == 64)
876    return APInt(BitWidth, ByteSwap_64(VAL));
877  else {
878    APInt Result(BitWidth, 0);
879    char *pByte = (char*)Result.pVal;
880    for (unsigned i = 0; i < BitWidth / APINT_WORD_SIZE / 2; ++i) {
881      char Tmp = pByte[i];
882      pByte[i] = pByte[BitWidth / APINT_WORD_SIZE - 1 - i];
883      pByte[BitWidth / APINT_WORD_SIZE - i - 1] = Tmp;
884    }
885    return Result;
886  }
887}
888
889APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1,
890                                            const APInt& API2) {
891  APInt A = API1, B = API2;
892  while (!!B) {
893    APInt T = B;
894    B = APIntOps::urem(A, B);
895    A = T;
896  }
897  return A;
898}
899
900APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
901  union {
902    double D;
903    uint64_t I;
904  } T;
905  T.D = Double;
906
907  // Get the sign bit from the highest order bit
908  bool isNeg = T.I >> 63;
909
910  // Get the 11-bit exponent and adjust for the 1023 bit bias
911  int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
912
913  // If the exponent is negative, the value is < 0 so just return 0.
914  if (exp < 0)
915    return APInt(width, 0u);
916
917  // Extract the mantissa by clearing the top 12 bits (sign + exponent).
918  uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
919
920  // If the exponent doesn't shift all bits out of the mantissa
921  if (exp < 52)
922    return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
923                    APInt(width, mantissa >> (52 - exp));
924
925  // If the client didn't provide enough bits for us to shift the mantissa into
926  // then the result is undefined, just return 0
927  if (width <= exp - 52)
928    return APInt(width, 0);
929
930  // Otherwise, we have to shift the mantissa bits up to the right location
931  APInt Tmp(width, mantissa);
932  Tmp = Tmp.shl((unsigned)exp - 52);
933  return isNeg ? -Tmp : Tmp;
934}
935
936/// RoundToDouble - This function converts this APInt to a double.
937/// The layout for double is as following (IEEE Standard 754):
938///  --------------------------------------
939/// |  Sign    Exponent    Fraction    Bias |
940/// |-------------------------------------- |
941/// |  1[63]   11[62-52]   52[51-00]   1023 |
942///  --------------------------------------
943double APInt::roundToDouble(bool isSigned) const {
944
945  // Handle the simple case where the value is contained in one uint64_t.
946  // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
947  if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
948    if (isSigned) {
949      int64_t sext = (int64_t(getWord(0)) << (64-BitWidth)) >> (64-BitWidth);
950      return double(sext);
951    } else
952      return double(getWord(0));
953  }
954
955  // Determine if the value is negative.
956  bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
957
958  // Construct the absolute value if we're negative.
959  APInt Tmp(isNeg ? -(*this) : (*this));
960
961  // Figure out how many bits we're using.
962  unsigned n = Tmp.getActiveBits();
963
964  // The exponent (without bias normalization) is just the number of bits
965  // we are using. Note that the sign bit is gone since we constructed the
966  // absolute value.
967  uint64_t exp = n;
968
969  // Return infinity for exponent overflow
970  if (exp > 1023) {
971    if (!isSigned || !isNeg)
972      return std::numeric_limits<double>::infinity();
973    else
974      return -std::numeric_limits<double>::infinity();
975  }
976  exp += 1023; // Increment for 1023 bias
977
978  // Number of bits in mantissa is 52. To obtain the mantissa value, we must
979  // extract the high 52 bits from the correct words in pVal.
980  uint64_t mantissa;
981  unsigned hiWord = whichWord(n-1);
982  if (hiWord == 0) {
983    mantissa = Tmp.pVal[0];
984    if (n > 52)
985      mantissa >>= n - 52; // shift down, we want the top 52 bits.
986  } else {
987    assert(hiWord > 0 && "huh?");
988    uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
989    uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
990    mantissa = hibits | lobits;
991  }
992
993  // The leading bit of mantissa is implicit, so get rid of it.
994  uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
995  union {
996    double D;
997    uint64_t I;
998  } T;
999  T.I = sign | (exp << 52) | mantissa;
1000  return T.D;
1001}
1002
1003// Truncate to new width.
1004APInt &APInt::trunc(unsigned width) {
1005  assert(width < BitWidth && "Invalid APInt Truncate request");
1006  assert(width && "Can't truncate to 0 bits");
1007  unsigned wordsBefore = getNumWords();
1008  BitWidth = width;
1009  unsigned wordsAfter = getNumWords();
1010  if (wordsBefore != wordsAfter) {
1011    if (wordsAfter == 1) {
1012      uint64_t *tmp = pVal;
1013      VAL = pVal[0];
1014      delete [] tmp;
1015    } else {
1016      uint64_t *newVal = getClearedMemory(wordsAfter);
1017      for (unsigned i = 0; i < wordsAfter; ++i)
1018        newVal[i] = pVal[i];
1019      delete [] pVal;
1020      pVal = newVal;
1021    }
1022  }
1023  return clearUnusedBits();
1024}
1025
1026// Sign extend to a new width.
1027APInt &APInt::sext(unsigned width) {
1028  assert(width > BitWidth && "Invalid APInt SignExtend request");
1029  // If the sign bit isn't set, this is the same as zext.
1030  if (!isNegative()) {
1031    zext(width);
1032    return *this;
1033  }
1034
1035  // The sign bit is set. First, get some facts
1036  unsigned wordsBefore = getNumWords();
1037  unsigned wordBits = BitWidth % APINT_BITS_PER_WORD;
1038  BitWidth = width;
1039  unsigned wordsAfter = getNumWords();
1040
1041  // Mask the high order word appropriately
1042  if (wordsBefore == wordsAfter) {
1043    unsigned newWordBits = width % APINT_BITS_PER_WORD;
1044    // The extension is contained to the wordsBefore-1th word.
1045    uint64_t mask = ~0ULL;
1046    if (newWordBits)
1047      mask >>= APINT_BITS_PER_WORD - newWordBits;
1048    mask <<= wordBits;
1049    if (wordsBefore == 1)
1050      VAL |= mask;
1051    else
1052      pVal[wordsBefore-1] |= mask;
1053    return clearUnusedBits();
1054  }
1055
1056  uint64_t mask = wordBits == 0 ? 0 : ~0ULL << wordBits;
1057  uint64_t *newVal = getMemory(wordsAfter);
1058  if (wordsBefore == 1)
1059    newVal[0] = VAL | mask;
1060  else {
1061    for (unsigned i = 0; i < wordsBefore; ++i)
1062      newVal[i] = pVal[i];
1063    newVal[wordsBefore-1] |= mask;
1064  }
1065  for (unsigned i = wordsBefore; i < wordsAfter; i++)
1066    newVal[i] = -1ULL;
1067  if (wordsBefore != 1)
1068    delete [] pVal;
1069  pVal = newVal;
1070  return clearUnusedBits();
1071}
1072
1073//  Zero extend to a new width.
1074APInt &APInt::zext(unsigned width) {
1075  assert(width > BitWidth && "Invalid APInt ZeroExtend request");
1076  unsigned wordsBefore = getNumWords();
1077  BitWidth = width;
1078  unsigned wordsAfter = getNumWords();
1079  if (wordsBefore != wordsAfter) {
1080    uint64_t *newVal = getClearedMemory(wordsAfter);
1081    if (wordsBefore == 1)
1082      newVal[0] = VAL;
1083    else
1084      for (unsigned i = 0; i < wordsBefore; ++i)
1085        newVal[i] = pVal[i];
1086    if (wordsBefore != 1)
1087      delete [] pVal;
1088    pVal = newVal;
1089  }
1090  return *this;
1091}
1092
1093APInt &APInt::zextOrTrunc(unsigned width) {
1094  if (BitWidth < width)
1095    return zext(width);
1096  if (BitWidth > width)
1097    return trunc(width);
1098  return *this;
1099}
1100
1101APInt &APInt::sextOrTrunc(unsigned width) {
1102  if (BitWidth < width)
1103    return sext(width);
1104  if (BitWidth > width)
1105    return trunc(width);
1106  return *this;
1107}
1108
1109/// Arithmetic right-shift this APInt by shiftAmt.
1110/// @brief Arithmetic right-shift function.
1111APInt APInt::ashr(const APInt &shiftAmt) const {
1112  return ashr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1113}
1114
1115/// Arithmetic right-shift this APInt by shiftAmt.
1116/// @brief Arithmetic right-shift function.
1117APInt APInt::ashr(unsigned shiftAmt) const {
1118  assert(shiftAmt <= BitWidth && "Invalid shift amount");
1119  // Handle a degenerate case
1120  if (shiftAmt == 0)
1121    return *this;
1122
1123  // Handle single word shifts with built-in ashr
1124  if (isSingleWord()) {
1125    if (shiftAmt == BitWidth)
1126      return APInt(BitWidth, 0); // undefined
1127    else {
1128      unsigned SignBit = APINT_BITS_PER_WORD - BitWidth;
1129      return APInt(BitWidth,
1130        (((int64_t(VAL) << SignBit) >> SignBit) >> shiftAmt));
1131    }
1132  }
1133
1134  // If all the bits were shifted out, the result is, technically, undefined.
1135  // We return -1 if it was negative, 0 otherwise. We check this early to avoid
1136  // issues in the algorithm below.
1137  if (shiftAmt == BitWidth) {
1138    if (isNegative())
1139      return APInt(BitWidth, -1ULL, true);
1140    else
1141      return APInt(BitWidth, 0);
1142  }
1143
1144  // Create some space for the result.
1145  uint64_t * val = new uint64_t[getNumWords()];
1146
1147  // Compute some values needed by the following shift algorithms
1148  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word
1149  unsigned offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift
1150  unsigned breakWord = getNumWords() - 1 - offset; // last word affected
1151  unsigned bitsInWord = whichBit(BitWidth); // how many bits in last word?
1152  if (bitsInWord == 0)
1153    bitsInWord = APINT_BITS_PER_WORD;
1154
1155  // If we are shifting whole words, just move whole words
1156  if (wordShift == 0) {
1157    // Move the words containing significant bits
1158    for (unsigned i = 0; i <= breakWord; ++i)
1159      val[i] = pVal[i+offset]; // move whole word
1160
1161    // Adjust the top significant word for sign bit fill, if negative
1162    if (isNegative())
1163      if (bitsInWord < APINT_BITS_PER_WORD)
1164        val[breakWord] |= ~0ULL << bitsInWord; // set high bits
1165  } else {
1166    // Shift the low order words
1167    for (unsigned i = 0; i < breakWord; ++i) {
1168      // This combines the shifted corresponding word with the low bits from
1169      // the next word (shifted into this word's high bits).
1170      val[i] = (pVal[i+offset] >> wordShift) |
1171               (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1172    }
1173
1174    // Shift the break word. In this case there are no bits from the next word
1175    // to include in this word.
1176    val[breakWord] = pVal[breakWord+offset] >> wordShift;
1177
1178    // Deal with sign extenstion in the break word, and possibly the word before
1179    // it.
1180    if (isNegative()) {
1181      if (wordShift > bitsInWord) {
1182        if (breakWord > 0)
1183          val[breakWord-1] |=
1184            ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord));
1185        val[breakWord] |= ~0ULL;
1186      } else
1187        val[breakWord] |= (~0ULL << (bitsInWord - wordShift));
1188    }
1189  }
1190
1191  // Remaining words are 0 or -1, just assign them.
1192  uint64_t fillValue = (isNegative() ? -1ULL : 0);
1193  for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1194    val[i] = fillValue;
1195  return APInt(val, BitWidth).clearUnusedBits();
1196}
1197
1198/// Logical right-shift this APInt by shiftAmt.
1199/// @brief Logical right-shift function.
1200APInt APInt::lshr(const APInt &shiftAmt) const {
1201  return lshr((unsigned)shiftAmt.getLimitedValue(BitWidth));
1202}
1203
1204/// Logical right-shift this APInt by shiftAmt.
1205/// @brief Logical right-shift function.
1206APInt APInt::lshr(unsigned shiftAmt) const {
1207  if (isSingleWord()) {
1208    if (shiftAmt == BitWidth)
1209      return APInt(BitWidth, 0);
1210    else
1211      return APInt(BitWidth, this->VAL >> shiftAmt);
1212  }
1213
1214  // If all the bits were shifted out, the result is 0. This avoids issues
1215  // with shifting by the size of the integer type, which produces undefined
1216  // results. We define these "undefined results" to always be 0.
1217  if (shiftAmt == BitWidth)
1218    return APInt(BitWidth, 0);
1219
1220  // If none of the bits are shifted out, the result is *this. This avoids
1221  // issues with shifting by the size of the integer type, which produces
1222  // undefined results in the code below. This is also an optimization.
1223  if (shiftAmt == 0)
1224    return *this;
1225
1226  // Create some space for the result.
1227  uint64_t * val = new uint64_t[getNumWords()];
1228
1229  // If we are shifting less than a word, compute the shift with a simple carry
1230  if (shiftAmt < APINT_BITS_PER_WORD) {
1231    uint64_t carry = 0;
1232    for (int i = getNumWords()-1; i >= 0; --i) {
1233      val[i] = (pVal[i] >> shiftAmt) | carry;
1234      carry = pVal[i] << (APINT_BITS_PER_WORD - shiftAmt);
1235    }
1236    return APInt(val, BitWidth).clearUnusedBits();
1237  }
1238
1239  // Compute some values needed by the remaining shift algorithms
1240  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
1241  unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
1242
1243  // If we are shifting whole words, just move whole words
1244  if (wordShift == 0) {
1245    for (unsigned i = 0; i < getNumWords() - offset; ++i)
1246      val[i] = pVal[i+offset];
1247    for (unsigned i = getNumWords()-offset; i < getNumWords(); i++)
1248      val[i] = 0;
1249    return APInt(val,BitWidth).clearUnusedBits();
1250  }
1251
1252  // Shift the low order words
1253  unsigned breakWord = getNumWords() - offset -1;
1254  for (unsigned i = 0; i < breakWord; ++i)
1255    val[i] = (pVal[i+offset] >> wordShift) |
1256             (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
1257  // Shift the break word.
1258  val[breakWord] = pVal[breakWord+offset] >> wordShift;
1259
1260  // Remaining words are 0
1261  for (unsigned i = breakWord+1; i < getNumWords(); ++i)
1262    val[i] = 0;
1263  return APInt(val, BitWidth).clearUnusedBits();
1264}
1265
1266/// Left-shift this APInt by shiftAmt.
1267/// @brief Left-shift function.
1268APInt APInt::shl(const APInt &shiftAmt) const {
1269  // It's undefined behavior in C to shift by BitWidth or greater.
1270  return shl((unsigned)shiftAmt.getLimitedValue(BitWidth));
1271}
1272
1273APInt APInt::shlSlowCase(unsigned shiftAmt) const {
1274  // If all the bits were shifted out, the result is 0. This avoids issues
1275  // with shifting by the size of the integer type, which produces undefined
1276  // results. We define these "undefined results" to always be 0.
1277  if (shiftAmt == BitWidth)
1278    return APInt(BitWidth, 0);
1279
1280  // If none of the bits are shifted out, the result is *this. This avoids a
1281  // lshr by the words size in the loop below which can produce incorrect
1282  // results. It also avoids the expensive computation below for a common case.
1283  if (shiftAmt == 0)
1284    return *this;
1285
1286  // Create some space for the result.
1287  uint64_t * val = new uint64_t[getNumWords()];
1288
1289  // If we are shifting less than a word, do it the easy way
1290  if (shiftAmt < APINT_BITS_PER_WORD) {
1291    uint64_t carry = 0;
1292    for (unsigned i = 0; i < getNumWords(); i++) {
1293      val[i] = pVal[i] << shiftAmt | carry;
1294      carry = pVal[i] >> (APINT_BITS_PER_WORD - shiftAmt);
1295    }
1296    return APInt(val, BitWidth).clearUnusedBits();
1297  }
1298
1299  // Compute some values needed by the remaining shift algorithms
1300  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
1301  unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
1302
1303  // If we are shifting whole words, just move whole words
1304  if (wordShift == 0) {
1305    for (unsigned i = 0; i < offset; i++)
1306      val[i] = 0;
1307    for (unsigned i = offset; i < getNumWords(); i++)
1308      val[i] = pVal[i-offset];
1309    return APInt(val,BitWidth).clearUnusedBits();
1310  }
1311
1312  // Copy whole words from this to Result.
1313  unsigned i = getNumWords() - 1;
1314  for (; i > offset; --i)
1315    val[i] = pVal[i-offset] << wordShift |
1316             pVal[i-offset-1] >> (APINT_BITS_PER_WORD - wordShift);
1317  val[offset] = pVal[0] << wordShift;
1318  for (i = 0; i < offset; ++i)
1319    val[i] = 0;
1320  return APInt(val, BitWidth).clearUnusedBits();
1321}
1322
1323APInt APInt::rotl(const APInt &rotateAmt) const {
1324  return rotl((unsigned)rotateAmt.getLimitedValue(BitWidth));
1325}
1326
1327APInt APInt::rotl(unsigned rotateAmt) const {
1328  if (rotateAmt == 0)
1329    return *this;
1330  // Don't get too fancy, just use existing shift/or facilities
1331  APInt hi(*this);
1332  APInt lo(*this);
1333  hi.shl(rotateAmt);
1334  lo.lshr(BitWidth - rotateAmt);
1335  return hi | lo;
1336}
1337
1338APInt APInt::rotr(const APInt &rotateAmt) const {
1339  return rotr((unsigned)rotateAmt.getLimitedValue(BitWidth));
1340}
1341
1342APInt APInt::rotr(unsigned rotateAmt) const {
1343  if (rotateAmt == 0)
1344    return *this;
1345  // Don't get too fancy, just use existing shift/or facilities
1346  APInt hi(*this);
1347  APInt lo(*this);
1348  lo.lshr(rotateAmt);
1349  hi.shl(BitWidth - rotateAmt);
1350  return hi | lo;
1351}
1352
1353// Square Root - this method computes and returns the square root of "this".
1354// Three mechanisms are used for computation. For small values (<= 5 bits),
1355// a table lookup is done. This gets some performance for common cases. For
1356// values using less than 52 bits, the value is converted to double and then
1357// the libc sqrt function is called. The result is rounded and then converted
1358// back to a uint64_t which is then used to construct the result. Finally,
1359// the Babylonian method for computing square roots is used.
1360APInt APInt::sqrt() const {
1361
1362  // Determine the magnitude of the value.
1363  unsigned magnitude = getActiveBits();
1364
1365  // Use a fast table for some small values. This also gets rid of some
1366  // rounding errors in libc sqrt for small values.
1367  if (magnitude <= 5) {
1368    static const uint8_t results[32] = {
1369      /*     0 */ 0,
1370      /*  1- 2 */ 1, 1,
1371      /*  3- 6 */ 2, 2, 2, 2,
1372      /*  7-12 */ 3, 3, 3, 3, 3, 3,
1373      /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1374      /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1375      /*    31 */ 6
1376    };
1377    return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]);
1378  }
1379
1380  // If the magnitude of the value fits in less than 52 bits (the precision of
1381  // an IEEE double precision floating point value), then we can use the
1382  // libc sqrt function which will probably use a hardware sqrt computation.
1383  // This should be faster than the algorithm below.
1384  if (magnitude < 52) {
1385#if HAVE_ROUND
1386    return APInt(BitWidth,
1387                 uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
1388#else
1389    return APInt(BitWidth,
1390                 uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5);
1391#endif
1392  }
1393
1394  // Okay, all the short cuts are exhausted. We must compute it. The following
1395  // is a classical Babylonian method for computing the square root. This code
1396  // was adapted to APINt from a wikipedia article on such computations.
1397  // See http://www.wikipedia.org/ and go to the page named
1398  // Calculate_an_integer_square_root.
1399  unsigned nbits = BitWidth, i = 4;
1400  APInt testy(BitWidth, 16);
1401  APInt x_old(BitWidth, 1);
1402  APInt x_new(BitWidth, 0);
1403  APInt two(BitWidth, 2);
1404
1405  // Select a good starting value using binary logarithms.
1406  for (;; i += 2, testy = testy.shl(2))
1407    if (i >= nbits || this->ule(testy)) {
1408      x_old = x_old.shl(i / 2);
1409      break;
1410    }
1411
1412  // Use the Babylonian method to arrive at the integer square root:
1413  for (;;) {
1414    x_new = (this->udiv(x_old) + x_old).udiv(two);
1415    if (x_old.ule(x_new))
1416      break;
1417    x_old = x_new;
1418  }
1419
1420  // Make sure we return the closest approximation
1421  // NOTE: The rounding calculation below is correct. It will produce an
1422  // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1423  // determined to be a rounding issue with pari/gp as it begins to use a
1424  // floating point representation after 192 bits. There are no discrepancies
1425  // between this algorithm and pari/gp for bit widths < 192 bits.
1426  APInt square(x_old * x_old);
1427  APInt nextSquare((x_old + 1) * (x_old +1));
1428  if (this->ult(square))
1429    return x_old;
1430  else if (this->ule(nextSquare)) {
1431    APInt midpoint((nextSquare - square).udiv(two));
1432    APInt offset(*this - square);
1433    if (offset.ult(midpoint))
1434      return x_old;
1435    else
1436      return x_old + 1;
1437  } else
1438    llvm_unreachable("Error in APInt::sqrt computation");
1439  return x_old + 1;
1440}
1441
1442/// Computes the multiplicative inverse of this APInt for a given modulo. The
1443/// iterative extended Euclidean algorithm is used to solve for this value,
1444/// however we simplify it to speed up calculating only the inverse, and take
1445/// advantage of div+rem calculations. We also use some tricks to avoid copying
1446/// (potentially large) APInts around.
1447APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1448  assert(ult(modulo) && "This APInt must be smaller than the modulo");
1449
1450  // Using the properties listed at the following web page (accessed 06/21/08):
1451  //   http://www.numbertheory.org/php/euclid.html
1452  // (especially the properties numbered 3, 4 and 9) it can be proved that
1453  // BitWidth bits suffice for all the computations in the algorithm implemented
1454  // below. More precisely, this number of bits suffice if the multiplicative
1455  // inverse exists, but may not suffice for the general extended Euclidean
1456  // algorithm.
1457
1458  APInt r[2] = { modulo, *this };
1459  APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1460  APInt q(BitWidth, 0);
1461
1462  unsigned i;
1463  for (i = 0; r[i^1] != 0; i ^= 1) {
1464    // An overview of the math without the confusing bit-flipping:
1465    // q = r[i-2] / r[i-1]
1466    // r[i] = r[i-2] % r[i-1]
1467    // t[i] = t[i-2] - t[i-1] * q
1468    udivrem(r[i], r[i^1], q, r[i]);
1469    t[i] -= t[i^1] * q;
1470  }
1471
1472  // If this APInt and the modulo are not coprime, there is no multiplicative
1473  // inverse, so return 0. We check this by looking at the next-to-last
1474  // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1475  // algorithm.
1476  if (r[i] != 1)
1477    return APInt(BitWidth, 0);
1478
1479  // The next-to-last t is the multiplicative inverse.  However, we are
1480  // interested in a positive inverse. Calcuate a positive one from a negative
1481  // one if necessary. A simple addition of the modulo suffices because
1482  // abs(t[i]) is known to be less than *this/2 (see the link above).
1483  return t[i].isNegative() ? t[i] + modulo : t[i];
1484}
1485
1486/// Calculate the magic numbers required to implement a signed integer division
1487/// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1488/// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1489/// Warren, Jr., chapter 10.
1490APInt::ms APInt::magic() const {
1491  const APInt& d = *this;
1492  unsigned p;
1493  APInt ad, anc, delta, q1, r1, q2, r2, t;
1494  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1495  struct ms mag;
1496
1497  ad = d.abs();
1498  t = signedMin + (d.lshr(d.getBitWidth() - 1));
1499  anc = t - 1 - t.urem(ad);   // absolute value of nc
1500  p = d.getBitWidth() - 1;    // initialize p
1501  q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1502  r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1503  q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1504  r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1505  do {
1506    p = p + 1;
1507    q1 = q1<<1;          // update q1 = 2p/abs(nc)
1508    r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1509    if (r1.uge(anc)) {  // must be unsigned comparison
1510      q1 = q1 + 1;
1511      r1 = r1 - anc;
1512    }
1513    q2 = q2<<1;          // update q2 = 2p/abs(d)
1514    r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1515    if (r2.uge(ad)) {   // must be unsigned comparison
1516      q2 = q2 + 1;
1517      r2 = r2 - ad;
1518    }
1519    delta = ad - r2;
1520  } while (q1.ule(delta) || (q1 == delta && r1 == 0));
1521
1522  mag.m = q2 + 1;
1523  if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1524  mag.s = p - d.getBitWidth();          // resulting shift
1525  return mag;
1526}
1527
1528/// Calculate the magic numbers required to implement an unsigned integer
1529/// division by a constant as a sequence of multiplies, adds and shifts.
1530/// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1531/// S. Warren, Jr., chapter 10.
1532APInt::mu APInt::magicu() const {
1533  const APInt& d = *this;
1534  unsigned p;
1535  APInt nc, delta, q1, r1, q2, r2;
1536  struct mu magu;
1537  magu.a = 0;               // initialize "add" indicator
1538  APInt allOnes = APInt::getAllOnesValue(d.getBitWidth());
1539  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1540  APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1541
1542  nc = allOnes - (-d).urem(d);
1543  p = d.getBitWidth() - 1;  // initialize p
1544  q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1545  r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1546  q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1547  r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1548  do {
1549    p = p + 1;
1550    if (r1.uge(nc - r1)) {
1551      q1 = q1 + q1 + 1;  // update q1
1552      r1 = r1 + r1 - nc; // update r1
1553    }
1554    else {
1555      q1 = q1+q1; // update q1
1556      r1 = r1+r1; // update r1
1557    }
1558    if ((r2 + 1).uge(d - r2)) {
1559      if (q2.uge(signedMax)) magu.a = 1;
1560      q2 = q2+q2 + 1;     // update q2
1561      r2 = r2+r2 + 1 - d; // update r2
1562    }
1563    else {
1564      if (q2.uge(signedMin)) magu.a = 1;
1565      q2 = q2+q2;     // update q2
1566      r2 = r2+r2 + 1; // update r2
1567    }
1568    delta = d - 1 - r2;
1569  } while (p < d.getBitWidth()*2 &&
1570           (q1.ult(delta) || (q1 == delta && r1 == 0)));
1571  magu.m = q2 + 1; // resulting magic number
1572  magu.s = p - d.getBitWidth();  // resulting shift
1573  return magu;
1574}
1575
1576/// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1577/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1578/// variables here have the same names as in the algorithm. Comments explain
1579/// the algorithm and any deviation from it.
1580static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
1581                     unsigned m, unsigned n) {
1582  assert(u && "Must provide dividend");
1583  assert(v && "Must provide divisor");
1584  assert(q && "Must provide quotient");
1585  assert(u != v && u != q && v != q && "Must us different memory");
1586  assert(n>1 && "n must be > 1");
1587
1588  // Knuth uses the value b as the base of the number system. In our case b
1589  // is 2^31 so we just set it to -1u.
1590  uint64_t b = uint64_t(1) << 32;
1591
1592#if 0
1593  DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1594  DEBUG(dbgs() << "KnuthDiv: original:");
1595  DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1596  DEBUG(dbgs() << " by");
1597  DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
1598  DEBUG(dbgs() << '\n');
1599#endif
1600  // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1601  // u and v by d. Note that we have taken Knuth's advice here to use a power
1602  // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1603  // 2 allows us to shift instead of multiply and it is easy to determine the
1604  // shift amount from the leading zeros.  We are basically normalizing the u
1605  // and v so that its high bits are shifted to the top of v's range without
1606  // overflow. Note that this can require an extra word in u so that u must
1607  // be of length m+n+1.
1608  unsigned shift = CountLeadingZeros_32(v[n-1]);
1609  unsigned v_carry = 0;
1610  unsigned u_carry = 0;
1611  if (shift) {
1612    for (unsigned i = 0; i < m+n; ++i) {
1613      unsigned u_tmp = u[i] >> (32 - shift);
1614      u[i] = (u[i] << shift) | u_carry;
1615      u_carry = u_tmp;
1616    }
1617    for (unsigned i = 0; i < n; ++i) {
1618      unsigned v_tmp = v[i] >> (32 - shift);
1619      v[i] = (v[i] << shift) | v_carry;
1620      v_carry = v_tmp;
1621    }
1622  }
1623  u[m+n] = u_carry;
1624#if 0
1625  DEBUG(dbgs() << "KnuthDiv:   normal:");
1626  DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1627  DEBUG(dbgs() << " by");
1628  DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
1629  DEBUG(dbgs() << '\n');
1630#endif
1631
1632  // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1633  int j = m;
1634  do {
1635    DEBUG(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1636    // D3. [Calculate q'.].
1637    //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1638    //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1639    // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1640    // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test
1641    // on v[n-2] determines at high speed most of the cases in which the trial
1642    // value qp is one too large, and it eliminates all cases where qp is two
1643    // too large.
1644    uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]);
1645    DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1646    uint64_t qp = dividend / v[n-1];
1647    uint64_t rp = dividend % v[n-1];
1648    if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1649      qp--;
1650      rp += v[n-1];
1651      if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1652        qp--;
1653    }
1654    DEBUG(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1655
1656    // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1657    // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1658    // consists of a simple multiplication by a one-place number, combined with
1659    // a subtraction.
1660    bool isNeg = false;
1661    for (unsigned i = 0; i < n; ++i) {
1662      uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32);
1663      uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]);
1664      bool borrow = subtrahend > u_tmp;
1665      DEBUG(dbgs() << "KnuthDiv: u_tmp == " << u_tmp
1666                   << ", subtrahend == " << subtrahend
1667                   << ", borrow = " << borrow << '\n');
1668
1669      uint64_t result = u_tmp - subtrahend;
1670      unsigned k = j + i;
1671      u[k++] = (unsigned)(result & (b-1)); // subtract low word
1672      u[k++] = (unsigned)(result >> 32);   // subtract high word
1673      while (borrow && k <= m+n) { // deal with borrow to the left
1674        borrow = u[k] == 0;
1675        u[k]--;
1676        k++;
1677      }
1678      isNeg |= borrow;
1679      DEBUG(dbgs() << "KnuthDiv: u[j+i] == " << u[j+i] << ",  u[j+i+1] == " <<
1680                    u[j+i+1] << '\n');
1681    }
1682    DEBUG(dbgs() << "KnuthDiv: after subtraction:");
1683    DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1684    DEBUG(dbgs() << '\n');
1685    // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1686    // this step is actually negative, (u[j+n]...u[j]) should be left as the
1687    // true value plus b**(n+1), namely as the b's complement of
1688    // the true value, and a "borrow" to the left should be remembered.
1689    //
1690    if (isNeg) {
1691      bool carry = true;  // true because b's complement is "complement + 1"
1692      for (unsigned i = 0; i <= m+n; ++i) {
1693        u[i] = ~u[i] + carry; // b's complement
1694        carry = carry && u[i] == 0;
1695      }
1696    }
1697    DEBUG(dbgs() << "KnuthDiv: after complement:");
1698    DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
1699    DEBUG(dbgs() << '\n');
1700
1701    // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1702    // negative, go to step D6; otherwise go on to step D7.
1703    q[j] = (unsigned)qp;
1704    if (isNeg) {
1705      // D6. [Add back]. The probability that this step is necessary is very
1706      // small, on the order of only 2/b. Make sure that test data accounts for
1707      // this possibility. Decrease q[j] by 1
1708      q[j]--;
1709      // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1710      // A carry will occur to the left of u[j+n], and it should be ignored
1711      // since it cancels with the borrow that occurred in D4.
1712      bool carry = false;
1713      for (unsigned i = 0; i < n; i++) {
1714        unsigned limit = std::min(u[j+i],v[i]);
1715        u[j+i] += v[i] + carry;
1716        carry = u[j+i] < limit || (carry && u[j+i] == limit);
1717      }
1718      u[j+n] += carry;
1719    }
1720    DEBUG(dbgs() << "KnuthDiv: after correction:");
1721    DEBUG(for (int i = m+n; i >=0; i--) dbgs() <<" " << u[i]);
1722    DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1723
1724  // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1725  } while (--j >= 0);
1726
1727  DEBUG(dbgs() << "KnuthDiv: quotient:");
1728  DEBUG(for (int i = m; i >=0; i--) dbgs() <<" " << q[i]);
1729  DEBUG(dbgs() << '\n');
1730
1731  // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1732  // remainder may be obtained by dividing u[...] by d. If r is non-null we
1733  // compute the remainder (urem uses this).
1734  if (r) {
1735    // The value d is expressed by the "shift" value above since we avoided
1736    // multiplication by d by using a shift left. So, all we have to do is
1737    // shift right here. In order to mak
1738    if (shift) {
1739      unsigned carry = 0;
1740      DEBUG(dbgs() << "KnuthDiv: remainder:");
1741      for (int i = n-1; i >= 0; i--) {
1742        r[i] = (u[i] >> shift) | carry;
1743        carry = u[i] << (32 - shift);
1744        DEBUG(dbgs() << " " << r[i]);
1745      }
1746    } else {
1747      for (int i = n-1; i >= 0; i--) {
1748        r[i] = u[i];
1749        DEBUG(dbgs() << " " << r[i]);
1750      }
1751    }
1752    DEBUG(dbgs() << '\n');
1753  }
1754#if 0
1755  DEBUG(dbgs() << '\n');
1756#endif
1757}
1758
1759void APInt::divide(const APInt LHS, unsigned lhsWords,
1760                   const APInt &RHS, unsigned rhsWords,
1761                   APInt *Quotient, APInt *Remainder)
1762{
1763  assert(lhsWords >= rhsWords && "Fractional result");
1764
1765  // First, compose the values into an array of 32-bit words instead of
1766  // 64-bit words. This is a necessity of both the "short division" algorithm
1767  // and the Knuth "classical algorithm" which requires there to be native
1768  // operations for +, -, and * on an m bit value with an m*2 bit result. We
1769  // can't use 64-bit operands here because we don't have native results of
1770  // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1771  // work on large-endian machines.
1772  uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT);
1773  unsigned n = rhsWords * 2;
1774  unsigned m = (lhsWords * 2) - n;
1775
1776  // Allocate space for the temporary values we need either on the stack, if
1777  // it will fit, or on the heap if it won't.
1778  unsigned SPACE[128];
1779  unsigned *U = 0;
1780  unsigned *V = 0;
1781  unsigned *Q = 0;
1782  unsigned *R = 0;
1783  if ((Remainder?4:3)*n+2*m+1 <= 128) {
1784    U = &SPACE[0];
1785    V = &SPACE[m+n+1];
1786    Q = &SPACE[(m+n+1) + n];
1787    if (Remainder)
1788      R = &SPACE[(m+n+1) + n + (m+n)];
1789  } else {
1790    U = new unsigned[m + n + 1];
1791    V = new unsigned[n];
1792    Q = new unsigned[m+n];
1793    if (Remainder)
1794      R = new unsigned[n];
1795  }
1796
1797  // Initialize the dividend
1798  memset(U, 0, (m+n+1)*sizeof(unsigned));
1799  for (unsigned i = 0; i < lhsWords; ++i) {
1800    uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]);
1801    U[i * 2] = (unsigned)(tmp & mask);
1802    U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1803  }
1804  U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1805
1806  // Initialize the divisor
1807  memset(V, 0, (n)*sizeof(unsigned));
1808  for (unsigned i = 0; i < rhsWords; ++i) {
1809    uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]);
1810    V[i * 2] = (unsigned)(tmp & mask);
1811    V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
1812  }
1813
1814  // initialize the quotient and remainder
1815  memset(Q, 0, (m+n) * sizeof(unsigned));
1816  if (Remainder)
1817    memset(R, 0, n * sizeof(unsigned));
1818
1819  // Now, adjust m and n for the Knuth division. n is the number of words in
1820  // the divisor. m is the number of words by which the dividend exceeds the
1821  // divisor (i.e. m+n is the length of the dividend). These sizes must not
1822  // contain any zero words or the Knuth algorithm fails.
1823  for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1824    n--;
1825    m++;
1826  }
1827  for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1828    m--;
1829
1830  // If we're left with only a single word for the divisor, Knuth doesn't work
1831  // so we implement the short division algorithm here. This is much simpler
1832  // and faster because we are certain that we can divide a 64-bit quantity
1833  // by a 32-bit quantity at hardware speed and short division is simply a
1834  // series of such operations. This is just like doing short division but we
1835  // are using base 2^32 instead of base 10.
1836  assert(n != 0 && "Divide by zero?");
1837  if (n == 1) {
1838    unsigned divisor = V[0];
1839    unsigned remainder = 0;
1840    for (int i = m+n-1; i >= 0; i--) {
1841      uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i];
1842      if (partial_dividend == 0) {
1843        Q[i] = 0;
1844        remainder = 0;
1845      } else if (partial_dividend < divisor) {
1846        Q[i] = 0;
1847        remainder = (unsigned)partial_dividend;
1848      } else if (partial_dividend == divisor) {
1849        Q[i] = 1;
1850        remainder = 0;
1851      } else {
1852        Q[i] = (unsigned)(partial_dividend / divisor);
1853        remainder = (unsigned)(partial_dividend - (Q[i] * divisor));
1854      }
1855    }
1856    if (R)
1857      R[0] = remainder;
1858  } else {
1859    // Now we're ready to invoke the Knuth classical divide algorithm. In this
1860    // case n > 1.
1861    KnuthDiv(U, V, Q, R, m, n);
1862  }
1863
1864  // If the caller wants the quotient
1865  if (Quotient) {
1866    // Set up the Quotient value's memory.
1867    if (Quotient->BitWidth != LHS.BitWidth) {
1868      if (Quotient->isSingleWord())
1869        Quotient->VAL = 0;
1870      else
1871        delete [] Quotient->pVal;
1872      Quotient->BitWidth = LHS.BitWidth;
1873      if (!Quotient->isSingleWord())
1874        Quotient->pVal = getClearedMemory(Quotient->getNumWords());
1875    } else
1876      Quotient->clear();
1877
1878    // The quotient is in Q. Reconstitute the quotient into Quotient's low
1879    // order words.
1880    if (lhsWords == 1) {
1881      uint64_t tmp =
1882        uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
1883      if (Quotient->isSingleWord())
1884        Quotient->VAL = tmp;
1885      else
1886        Quotient->pVal[0] = tmp;
1887    } else {
1888      assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
1889      for (unsigned i = 0; i < lhsWords; ++i)
1890        Quotient->pVal[i] =
1891          uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1892    }
1893  }
1894
1895  // If the caller wants the remainder
1896  if (Remainder) {
1897    // Set up the Remainder value's memory.
1898    if (Remainder->BitWidth != RHS.BitWidth) {
1899      if (Remainder->isSingleWord())
1900        Remainder->VAL = 0;
1901      else
1902        delete [] Remainder->pVal;
1903      Remainder->BitWidth = RHS.BitWidth;
1904      if (!Remainder->isSingleWord())
1905        Remainder->pVal = getClearedMemory(Remainder->getNumWords());
1906    } else
1907      Remainder->clear();
1908
1909    // The remainder is in R. Reconstitute the remainder into Remainder's low
1910    // order words.
1911    if (rhsWords == 1) {
1912      uint64_t tmp =
1913        uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2));
1914      if (Remainder->isSingleWord())
1915        Remainder->VAL = tmp;
1916      else
1917        Remainder->pVal[0] = tmp;
1918    } else {
1919      assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
1920      for (unsigned i = 0; i < rhsWords; ++i)
1921        Remainder->pVal[i] =
1922          uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2));
1923    }
1924  }
1925
1926  // Clean up the memory we allocated.
1927  if (U != &SPACE[0]) {
1928    delete [] U;
1929    delete [] V;
1930    delete [] Q;
1931    delete [] R;
1932  }
1933}
1934
1935APInt APInt::udiv(const APInt& RHS) const {
1936  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1937
1938  // First, deal with the easy case
1939  if (isSingleWord()) {
1940    assert(RHS.VAL != 0 && "Divide by zero?");
1941    return APInt(BitWidth, VAL / RHS.VAL);
1942  }
1943
1944  // Get some facts about the LHS and RHS number of bits and words
1945  unsigned rhsBits = RHS.getActiveBits();
1946  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1947  assert(rhsWords && "Divided by zero???");
1948  unsigned lhsBits = this->getActiveBits();
1949  unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
1950
1951  // Deal with some degenerate cases
1952  if (!lhsWords)
1953    // 0 / X ===> 0
1954    return APInt(BitWidth, 0);
1955  else if (lhsWords < rhsWords || this->ult(RHS)) {
1956    // X / Y ===> 0, iff X < Y
1957    return APInt(BitWidth, 0);
1958  } else if (*this == RHS) {
1959    // X / X ===> 1
1960    return APInt(BitWidth, 1);
1961  } else if (lhsWords == 1 && rhsWords == 1) {
1962    // All high words are zero, just use native divide
1963    return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]);
1964  }
1965
1966  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1967  APInt Quotient(1,0); // to hold result.
1968  divide(*this, lhsWords, RHS, rhsWords, &Quotient, 0);
1969  return Quotient;
1970}
1971
1972APInt APInt::urem(const APInt& RHS) const {
1973  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1974  if (isSingleWord()) {
1975    assert(RHS.VAL != 0 && "Remainder by zero?");
1976    return APInt(BitWidth, VAL % RHS.VAL);
1977  }
1978
1979  // Get some facts about the LHS
1980  unsigned lhsBits = getActiveBits();
1981  unsigned lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
1982
1983  // Get some facts about the RHS
1984  unsigned rhsBits = RHS.getActiveBits();
1985  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
1986  assert(rhsWords && "Performing remainder operation by zero ???");
1987
1988  // Check the degenerate cases
1989  if (lhsWords == 0) {
1990    // 0 % Y ===> 0
1991    return APInt(BitWidth, 0);
1992  } else if (lhsWords < rhsWords || this->ult(RHS)) {
1993    // X % Y ===> X, iff X < Y
1994    return *this;
1995  } else if (*this == RHS) {
1996    // X % X == 0;
1997    return APInt(BitWidth, 0);
1998  } else if (lhsWords == 1) {
1999    // All high words are zero, just use native remainder
2000    return APInt(BitWidth, pVal[0] % RHS.pVal[0]);
2001  }
2002
2003  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
2004  APInt Remainder(1,0);
2005  divide(*this, lhsWords, RHS, rhsWords, 0, &Remainder);
2006  return Remainder;
2007}
2008
2009void APInt::udivrem(const APInt &LHS, const APInt &RHS,
2010                    APInt &Quotient, APInt &Remainder) {
2011  // Get some size facts about the dividend and divisor
2012  unsigned lhsBits  = LHS.getActiveBits();
2013  unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
2014  unsigned rhsBits  = RHS.getActiveBits();
2015  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
2016
2017  // Check the degenerate cases
2018  if (lhsWords == 0) {
2019    Quotient = 0;                // 0 / Y ===> 0
2020    Remainder = 0;               // 0 % Y ===> 0
2021    return;
2022  }
2023
2024  if (lhsWords < rhsWords || LHS.ult(RHS)) {
2025    Remainder = LHS;            // X % Y ===> X, iff X < Y
2026    Quotient = 0;               // X / Y ===> 0, iff X < Y
2027    return;
2028  }
2029
2030  if (LHS == RHS) {
2031    Quotient  = 1;              // X / X ===> 1
2032    Remainder = 0;              // X % X ===> 0;
2033    return;
2034  }
2035
2036  if (lhsWords == 1 && rhsWords == 1) {
2037    // There is only one word to consider so use the native versions.
2038    uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0];
2039    uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0];
2040    Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue);
2041    Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue);
2042    return;
2043  }
2044
2045  // Okay, lets do it the long way
2046  divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
2047}
2048
2049void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2050  // Check our assumptions here
2051  assert(!str.empty() && "Invalid string length");
2052  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
2053         "Radix should be 2, 8, 10, or 16!");
2054
2055  StringRef::iterator p = str.begin();
2056  size_t slen = str.size();
2057  bool isNeg = *p == '-';
2058  if (*p == '-' || *p == '+') {
2059    p++;
2060    slen--;
2061    assert(slen && "String is only a sign, needs a value.");
2062  }
2063  assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2064  assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2065  assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2066  assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2067         "Insufficient bit width");
2068
2069  // Allocate memory
2070  if (!isSingleWord())
2071    pVal = getClearedMemory(getNumWords());
2072
2073  // Figure out if we can shift instead of multiply
2074  unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2075
2076  // Set up an APInt for the digit to add outside the loop so we don't
2077  // constantly construct/destruct it.
2078  APInt apdigit(getBitWidth(), 0);
2079  APInt apradix(getBitWidth(), radix);
2080
2081  // Enter digit traversal loop
2082  for (StringRef::iterator e = str.end(); p != e; ++p) {
2083    unsigned digit = getDigit(*p, radix);
2084    assert(digit < radix && "Invalid character in digit string");
2085
2086    // Shift or multiply the value by the radix
2087    if (slen > 1) {
2088      if (shift)
2089        *this <<= shift;
2090      else
2091        *this *= apradix;
2092    }
2093
2094    // Add in the digit we just interpreted
2095    if (apdigit.isSingleWord())
2096      apdigit.VAL = digit;
2097    else
2098      apdigit.pVal[0] = digit;
2099    *this += apdigit;
2100  }
2101  // If its negative, put it in two's complement form
2102  if (isNeg) {
2103    (*this)--;
2104    this->flip();
2105  }
2106}
2107
2108void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2109                     bool Signed) const {
2110  assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2) &&
2111         "Radix should be 2, 8, 10, or 16!");
2112
2113  // First, check for a zero value and just short circuit the logic below.
2114  if (*this == 0) {
2115    Str.push_back('0');
2116    return;
2117  }
2118
2119  static const char Digits[] = "0123456789ABCDEF";
2120
2121  if (isSingleWord()) {
2122    char Buffer[65];
2123    char *BufPtr = Buffer+65;
2124
2125    uint64_t N;
2126    if (!Signed) {
2127      N = getZExtValue();
2128    } else {
2129      int64_t I = getSExtValue();
2130      if (I >= 0) {
2131        N = I;
2132      } else {
2133        Str.push_back('-');
2134        N = -(uint64_t)I;
2135      }
2136    }
2137
2138    while (N) {
2139      *--BufPtr = Digits[N % Radix];
2140      N /= Radix;
2141    }
2142    Str.append(BufPtr, Buffer+65);
2143    return;
2144  }
2145
2146  APInt Tmp(*this);
2147
2148  if (Signed && isNegative()) {
2149    // They want to print the signed version and it is a negative value
2150    // Flip the bits and add one to turn it into the equivalent positive
2151    // value and put a '-' in the result.
2152    Tmp.flip();
2153    Tmp++;
2154    Str.push_back('-');
2155  }
2156
2157  // We insert the digits backward, then reverse them to get the right order.
2158  unsigned StartDig = Str.size();
2159
2160  // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2161  // because the number of bits per digit (1, 3 and 4 respectively) divides
2162  // equaly.  We just shift until the value is zero.
2163  if (Radix != 10) {
2164    // Just shift tmp right for each digit width until it becomes zero
2165    unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2166    unsigned MaskAmt = Radix - 1;
2167
2168    while (Tmp != 0) {
2169      unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2170      Str.push_back(Digits[Digit]);
2171      Tmp = Tmp.lshr(ShiftAmt);
2172    }
2173  } else {
2174    APInt divisor(4, 10);
2175    while (Tmp != 0) {
2176      APInt APdigit(1, 0);
2177      APInt tmp2(Tmp.getBitWidth(), 0);
2178      divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2,
2179             &APdigit);
2180      unsigned Digit = (unsigned)APdigit.getZExtValue();
2181      assert(Digit < Radix && "divide failed");
2182      Str.push_back(Digits[Digit]);
2183      Tmp = tmp2;
2184    }
2185  }
2186
2187  // Reverse the digits before returning.
2188  std::reverse(Str.begin()+StartDig, Str.end());
2189}
2190
2191/// toString - This returns the APInt as a std::string.  Note that this is an
2192/// inefficient method.  It is better to pass in a SmallVector/SmallString
2193/// to the methods above.
2194std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2195  SmallString<40> S;
2196  toString(S, Radix, Signed);
2197  return S.str();
2198}
2199
2200
2201void APInt::dump() const {
2202  SmallString<40> S, U;
2203  this->toStringUnsigned(U);
2204  this->toStringSigned(S);
2205  dbgs() << "APInt(" << BitWidth << "b, "
2206         << U.str() << "u " << S.str() << "s)";
2207}
2208
2209void APInt::print(raw_ostream &OS, bool isSigned) const {
2210  SmallString<40> S;
2211  this->toString(S, 10, isSigned);
2212  OS << S.str();
2213}
2214
2215// This implements a variety of operations on a representation of
2216// arbitrary precision, two's-complement, bignum integer values.
2217
2218// Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2219// and unrestricting assumption.
2220#define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
2221COMPILE_TIME_ASSERT(integerPartWidth % 2 == 0);
2222
2223/* Some handy functions local to this file.  */
2224namespace {
2225
2226  /* Returns the integer part with the least significant BITS set.
2227     BITS cannot be zero.  */
2228  static inline integerPart
2229  lowBitMask(unsigned int bits)
2230  {
2231    assert(bits != 0 && bits <= integerPartWidth);
2232
2233    return ~(integerPart) 0 >> (integerPartWidth - bits);
2234  }
2235
2236  /* Returns the value of the lower half of PART.  */
2237  static inline integerPart
2238  lowHalf(integerPart part)
2239  {
2240    return part & lowBitMask(integerPartWidth / 2);
2241  }
2242
2243  /* Returns the value of the upper half of PART.  */
2244  static inline integerPart
2245  highHalf(integerPart part)
2246  {
2247    return part >> (integerPartWidth / 2);
2248  }
2249
2250  /* Returns the bit number of the most significant set bit of a part.
2251     If the input number has no bits set -1U is returned.  */
2252  static unsigned int
2253  partMSB(integerPart value)
2254  {
2255    unsigned int n, msb;
2256
2257    if (value == 0)
2258      return -1U;
2259
2260    n = integerPartWidth / 2;
2261
2262    msb = 0;
2263    do {
2264      if (value >> n) {
2265        value >>= n;
2266        msb += n;
2267      }
2268
2269      n >>= 1;
2270    } while (n);
2271
2272    return msb;
2273  }
2274
2275  /* Returns the bit number of the least significant set bit of a
2276     part.  If the input number has no bits set -1U is returned.  */
2277  static unsigned int
2278  partLSB(integerPart value)
2279  {
2280    unsigned int n, lsb;
2281
2282    if (value == 0)
2283      return -1U;
2284
2285    lsb = integerPartWidth - 1;
2286    n = integerPartWidth / 2;
2287
2288    do {
2289      if (value << n) {
2290        value <<= n;
2291        lsb -= n;
2292      }
2293
2294      n >>= 1;
2295    } while (n);
2296
2297    return lsb;
2298  }
2299}
2300
2301/* Sets the least significant part of a bignum to the input value, and
2302   zeroes out higher parts.  */
2303void
2304APInt::tcSet(integerPart *dst, integerPart part, unsigned int parts)
2305{
2306  unsigned int i;
2307
2308  assert(parts > 0);
2309
2310  dst[0] = part;
2311  for (i = 1; i < parts; i++)
2312    dst[i] = 0;
2313}
2314
2315/* Assign one bignum to another.  */
2316void
2317APInt::tcAssign(integerPart *dst, const integerPart *src, unsigned int parts)
2318{
2319  unsigned int i;
2320
2321  for (i = 0; i < parts; i++)
2322    dst[i] = src[i];
2323}
2324
2325/* Returns true if a bignum is zero, false otherwise.  */
2326bool
2327APInt::tcIsZero(const integerPart *src, unsigned int parts)
2328{
2329  unsigned int i;
2330
2331  for (i = 0; i < parts; i++)
2332    if (src[i])
2333      return false;
2334
2335  return true;
2336}
2337
2338/* Extract the given bit of a bignum; returns 0 or 1.  */
2339int
2340APInt::tcExtractBit(const integerPart *parts, unsigned int bit)
2341{
2342  return (parts[bit / integerPartWidth] &
2343          ((integerPart) 1 << bit % integerPartWidth)) != 0;
2344}
2345
2346/* Set the given bit of a bignum. */
2347void
2348APInt::tcSetBit(integerPart *parts, unsigned int bit)
2349{
2350  parts[bit / integerPartWidth] |= (integerPart) 1 << (bit % integerPartWidth);
2351}
2352
2353/* Clears the given bit of a bignum. */
2354void
2355APInt::tcClearBit(integerPart *parts, unsigned int bit)
2356{
2357  parts[bit / integerPartWidth] &=
2358    ~((integerPart) 1 << (bit % integerPartWidth));
2359}
2360
2361/* Returns the bit number of the least significant set bit of a
2362   number.  If the input number has no bits set -1U is returned.  */
2363unsigned int
2364APInt::tcLSB(const integerPart *parts, unsigned int n)
2365{
2366  unsigned int i, lsb;
2367
2368  for (i = 0; i < n; i++) {
2369      if (parts[i] != 0) {
2370          lsb = partLSB(parts[i]);
2371
2372          return lsb + i * integerPartWidth;
2373      }
2374  }
2375
2376  return -1U;
2377}
2378
2379/* Returns the bit number of the most significant set bit of a number.
2380   If the input number has no bits set -1U is returned.  */
2381unsigned int
2382APInt::tcMSB(const integerPart *parts, unsigned int n)
2383{
2384  unsigned int msb;
2385
2386  do {
2387    --n;
2388
2389    if (parts[n] != 0) {
2390      msb = partMSB(parts[n]);
2391
2392      return msb + n * integerPartWidth;
2393    }
2394  } while (n);
2395
2396  return -1U;
2397}
2398
2399/* Copy the bit vector of width srcBITS from SRC, starting at bit
2400   srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2401   the least significant bit of DST.  All high bits above srcBITS in
2402   DST are zero-filled.  */
2403void
2404APInt::tcExtract(integerPart *dst, unsigned int dstCount,const integerPart *src,
2405                 unsigned int srcBits, unsigned int srcLSB)
2406{
2407  unsigned int firstSrcPart, dstParts, shift, n;
2408
2409  dstParts = (srcBits + integerPartWidth - 1) / integerPartWidth;
2410  assert(dstParts <= dstCount);
2411
2412  firstSrcPart = srcLSB / integerPartWidth;
2413  tcAssign (dst, src + firstSrcPart, dstParts);
2414
2415  shift = srcLSB % integerPartWidth;
2416  tcShiftRight (dst, dstParts, shift);
2417
2418  /* We now have (dstParts * integerPartWidth - shift) bits from SRC
2419     in DST.  If this is less that srcBits, append the rest, else
2420     clear the high bits.  */
2421  n = dstParts * integerPartWidth - shift;
2422  if (n < srcBits) {
2423    integerPart mask = lowBitMask (srcBits - n);
2424    dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2425                          << n % integerPartWidth);
2426  } else if (n > srcBits) {
2427    if (srcBits % integerPartWidth)
2428      dst[dstParts - 1] &= lowBitMask (srcBits % integerPartWidth);
2429  }
2430
2431  /* Clear high parts.  */
2432  while (dstParts < dstCount)
2433    dst[dstParts++] = 0;
2434}
2435
2436/* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2437integerPart
2438APInt::tcAdd(integerPart *dst, const integerPart *rhs,
2439             integerPart c, unsigned int parts)
2440{
2441  unsigned int i;
2442
2443  assert(c <= 1);
2444
2445  for (i = 0; i < parts; i++) {
2446    integerPart l;
2447
2448    l = dst[i];
2449    if (c) {
2450      dst[i] += rhs[i] + 1;
2451      c = (dst[i] <= l);
2452    } else {
2453      dst[i] += rhs[i];
2454      c = (dst[i] < l);
2455    }
2456  }
2457
2458  return c;
2459}
2460
2461/* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2462integerPart
2463APInt::tcSubtract(integerPart *dst, const integerPart *rhs,
2464                  integerPart c, unsigned int parts)
2465{
2466  unsigned int i;
2467
2468  assert(c <= 1);
2469
2470  for (i = 0; i < parts; i++) {
2471    integerPart l;
2472
2473    l = dst[i];
2474    if (c) {
2475      dst[i] -= rhs[i] + 1;
2476      c = (dst[i] >= l);
2477    } else {
2478      dst[i] -= rhs[i];
2479      c = (dst[i] > l);
2480    }
2481  }
2482
2483  return c;
2484}
2485
2486/* Negate a bignum in-place.  */
2487void
2488APInt::tcNegate(integerPart *dst, unsigned int parts)
2489{
2490  tcComplement(dst, parts);
2491  tcIncrement(dst, parts);
2492}
2493
2494/*  DST += SRC * MULTIPLIER + CARRY   if add is true
2495    DST  = SRC * MULTIPLIER + CARRY   if add is false
2496
2497    Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2498    they must start at the same point, i.e. DST == SRC.
2499
2500    If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2501    returned.  Otherwise DST is filled with the least significant
2502    DSTPARTS parts of the result, and if all of the omitted higher
2503    parts were zero return zero, otherwise overflow occurred and
2504    return one.  */
2505int
2506APInt::tcMultiplyPart(integerPart *dst, const integerPart *src,
2507                      integerPart multiplier, integerPart carry,
2508                      unsigned int srcParts, unsigned int dstParts,
2509                      bool add)
2510{
2511  unsigned int i, n;
2512
2513  /* Otherwise our writes of DST kill our later reads of SRC.  */
2514  assert(dst <= src || dst >= src + srcParts);
2515  assert(dstParts <= srcParts + 1);
2516
2517  /* N loops; minimum of dstParts and srcParts.  */
2518  n = dstParts < srcParts ? dstParts: srcParts;
2519
2520  for (i = 0; i < n; i++) {
2521    integerPart low, mid, high, srcPart;
2522
2523      /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2524
2525         This cannot overflow, because
2526
2527         (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2528
2529         which is less than n^2.  */
2530
2531    srcPart = src[i];
2532
2533    if (multiplier == 0 || srcPart == 0)        {
2534      low = carry;
2535      high = 0;
2536    } else {
2537      low = lowHalf(srcPart) * lowHalf(multiplier);
2538      high = highHalf(srcPart) * highHalf(multiplier);
2539
2540      mid = lowHalf(srcPart) * highHalf(multiplier);
2541      high += highHalf(mid);
2542      mid <<= integerPartWidth / 2;
2543      if (low + mid < low)
2544        high++;
2545      low += mid;
2546
2547      mid = highHalf(srcPart) * lowHalf(multiplier);
2548      high += highHalf(mid);
2549      mid <<= integerPartWidth / 2;
2550      if (low + mid < low)
2551        high++;
2552      low += mid;
2553
2554      /* Now add carry.  */
2555      if (low + carry < low)
2556        high++;
2557      low += carry;
2558    }
2559
2560    if (add) {
2561      /* And now DST[i], and store the new low part there.  */
2562      if (low + dst[i] < low)
2563        high++;
2564      dst[i] += low;
2565    } else
2566      dst[i] = low;
2567
2568    carry = high;
2569  }
2570
2571  if (i < dstParts) {
2572    /* Full multiplication, there is no overflow.  */
2573    assert(i + 1 == dstParts);
2574    dst[i] = carry;
2575    return 0;
2576  } else {
2577    /* We overflowed if there is carry.  */
2578    if (carry)
2579      return 1;
2580
2581    /* We would overflow if any significant unwritten parts would be
2582       non-zero.  This is true if any remaining src parts are non-zero
2583       and the multiplier is non-zero.  */
2584    if (multiplier)
2585      for (; i < srcParts; i++)
2586        if (src[i])
2587          return 1;
2588
2589    /* We fitted in the narrow destination.  */
2590    return 0;
2591  }
2592}
2593
2594/* DST = LHS * RHS, where DST has the same width as the operands and
2595   is filled with the least significant parts of the result.  Returns
2596   one if overflow occurred, otherwise zero.  DST must be disjoint
2597   from both operands.  */
2598int
2599APInt::tcMultiply(integerPart *dst, const integerPart *lhs,
2600                  const integerPart *rhs, unsigned int parts)
2601{
2602  unsigned int i;
2603  int overflow;
2604
2605  assert(dst != lhs && dst != rhs);
2606
2607  overflow = 0;
2608  tcSet(dst, 0, parts);
2609
2610  for (i = 0; i < parts; i++)
2611    overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2612                               parts - i, true);
2613
2614  return overflow;
2615}
2616
2617/* DST = LHS * RHS, where DST has width the sum of the widths of the
2618   operands.  No overflow occurs.  DST must be disjoint from both
2619   operands.  Returns the number of parts required to hold the
2620   result.  */
2621unsigned int
2622APInt::tcFullMultiply(integerPart *dst, const integerPart *lhs,
2623                      const integerPart *rhs, unsigned int lhsParts,
2624                      unsigned int rhsParts)
2625{
2626  /* Put the narrower number on the LHS for less loops below.  */
2627  if (lhsParts > rhsParts) {
2628    return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2629  } else {
2630    unsigned int n;
2631
2632    assert(dst != lhs && dst != rhs);
2633
2634    tcSet(dst, 0, rhsParts);
2635
2636    for (n = 0; n < lhsParts; n++)
2637      tcMultiplyPart(&dst[n], rhs, lhs[n], 0, rhsParts, rhsParts + 1, true);
2638
2639    n = lhsParts + rhsParts;
2640
2641    return n - (dst[n - 1] == 0);
2642  }
2643}
2644
2645/* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2646   Otherwise set LHS to LHS / RHS with the fractional part discarded,
2647   set REMAINDER to the remainder, return zero.  i.e.
2648
2649   OLD_LHS = RHS * LHS + REMAINDER
2650
2651   SCRATCH is a bignum of the same size as the operands and result for
2652   use by the routine; its contents need not be initialized and are
2653   destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2654*/
2655int
2656APInt::tcDivide(integerPart *lhs, const integerPart *rhs,
2657                integerPart *remainder, integerPart *srhs,
2658                unsigned int parts)
2659{
2660  unsigned int n, shiftCount;
2661  integerPart mask;
2662
2663  assert(lhs != remainder && lhs != srhs && remainder != srhs);
2664
2665  shiftCount = tcMSB(rhs, parts) + 1;
2666  if (shiftCount == 0)
2667    return true;
2668
2669  shiftCount = parts * integerPartWidth - shiftCount;
2670  n = shiftCount / integerPartWidth;
2671  mask = (integerPart) 1 << (shiftCount % integerPartWidth);
2672
2673  tcAssign(srhs, rhs, parts);
2674  tcShiftLeft(srhs, parts, shiftCount);
2675  tcAssign(remainder, lhs, parts);
2676  tcSet(lhs, 0, parts);
2677
2678  /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2679     the total.  */
2680  for (;;) {
2681      int compare;
2682
2683      compare = tcCompare(remainder, srhs, parts);
2684      if (compare >= 0) {
2685        tcSubtract(remainder, srhs, 0, parts);
2686        lhs[n] |= mask;
2687      }
2688
2689      if (shiftCount == 0)
2690        break;
2691      shiftCount--;
2692      tcShiftRight(srhs, parts, 1);
2693      if ((mask >>= 1) == 0)
2694        mask = (integerPart) 1 << (integerPartWidth - 1), n--;
2695  }
2696
2697  return false;
2698}
2699
2700/* Shift a bignum left COUNT bits in-place.  Shifted in bits are zero.
2701   There are no restrictions on COUNT.  */
2702void
2703APInt::tcShiftLeft(integerPart *dst, unsigned int parts, unsigned int count)
2704{
2705  if (count) {
2706    unsigned int jump, shift;
2707
2708    /* Jump is the inter-part jump; shift is is intra-part shift.  */
2709    jump = count / integerPartWidth;
2710    shift = count % integerPartWidth;
2711
2712    while (parts > jump) {
2713      integerPart part;
2714
2715      parts--;
2716
2717      /* dst[i] comes from the two parts src[i - jump] and, if we have
2718         an intra-part shift, src[i - jump - 1].  */
2719      part = dst[parts - jump];
2720      if (shift) {
2721        part <<= shift;
2722        if (parts >= jump + 1)
2723          part |= dst[parts - jump - 1] >> (integerPartWidth - shift);
2724      }
2725
2726      dst[parts] = part;
2727    }
2728
2729    while (parts > 0)
2730      dst[--parts] = 0;
2731  }
2732}
2733
2734/* Shift a bignum right COUNT bits in-place.  Shifted in bits are
2735   zero.  There are no restrictions on COUNT.  */
2736void
2737APInt::tcShiftRight(integerPart *dst, unsigned int parts, unsigned int count)
2738{
2739  if (count) {
2740    unsigned int i, jump, shift;
2741
2742    /* Jump is the inter-part jump; shift is is intra-part shift.  */
2743    jump = count / integerPartWidth;
2744    shift = count % integerPartWidth;
2745
2746    /* Perform the shift.  This leaves the most significant COUNT bits
2747       of the result at zero.  */
2748    for (i = 0; i < parts; i++) {
2749      integerPart part;
2750
2751      if (i + jump >= parts) {
2752        part = 0;
2753      } else {
2754        part = dst[i + jump];
2755        if (shift) {
2756          part >>= shift;
2757          if (i + jump + 1 < parts)
2758            part |= dst[i + jump + 1] << (integerPartWidth - shift);
2759        }
2760      }
2761
2762      dst[i] = part;
2763    }
2764  }
2765}
2766
2767/* Bitwise and of two bignums.  */
2768void
2769APInt::tcAnd(integerPart *dst, const integerPart *rhs, unsigned int parts)
2770{
2771  unsigned int i;
2772
2773  for (i = 0; i < parts; i++)
2774    dst[i] &= rhs[i];
2775}
2776
2777/* Bitwise inclusive or of two bignums.  */
2778void
2779APInt::tcOr(integerPart *dst, const integerPart *rhs, unsigned int parts)
2780{
2781  unsigned int i;
2782
2783  for (i = 0; i < parts; i++)
2784    dst[i] |= rhs[i];
2785}
2786
2787/* Bitwise exclusive or of two bignums.  */
2788void
2789APInt::tcXor(integerPart *dst, const integerPart *rhs, unsigned int parts)
2790{
2791  unsigned int i;
2792
2793  for (i = 0; i < parts; i++)
2794    dst[i] ^= rhs[i];
2795}
2796
2797/* Complement a bignum in-place.  */
2798void
2799APInt::tcComplement(integerPart *dst, unsigned int parts)
2800{
2801  unsigned int i;
2802
2803  for (i = 0; i < parts; i++)
2804    dst[i] = ~dst[i];
2805}
2806
2807/* Comparison (unsigned) of two bignums.  */
2808int
2809APInt::tcCompare(const integerPart *lhs, const integerPart *rhs,
2810                 unsigned int parts)
2811{
2812  while (parts) {
2813      parts--;
2814      if (lhs[parts] == rhs[parts])
2815        continue;
2816
2817      if (lhs[parts] > rhs[parts])
2818        return 1;
2819      else
2820        return -1;
2821    }
2822
2823  return 0;
2824}
2825
2826/* Increment a bignum in-place, return the carry flag.  */
2827integerPart
2828APInt::tcIncrement(integerPart *dst, unsigned int parts)
2829{
2830  unsigned int i;
2831
2832  for (i = 0; i < parts; i++)
2833    if (++dst[i] != 0)
2834      break;
2835
2836  return i == parts;
2837}
2838
2839/* Set the least significant BITS bits of a bignum, clear the
2840   rest.  */
2841void
2842APInt::tcSetLeastSignificantBits(integerPart *dst, unsigned int parts,
2843                                 unsigned int bits)
2844{
2845  unsigned int i;
2846
2847  i = 0;
2848  while (bits > integerPartWidth) {
2849    dst[i++] = ~(integerPart) 0;
2850    bits -= integerPartWidth;
2851  }
2852
2853  if (bits)
2854    dst[i++] = ~(integerPart) 0 >> (integerPartWidth - bits);
2855
2856  while (i < parts)
2857    dst[i++] = 0;
2858}
2859