APInt.cpp revision 344779
1//===-- APInt.cpp - Implement APInt class ---------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements a class to represent arbitrary precision integer
11// constant values and provide a variety of arithmetic operations on them.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/FoldingSet.h"
18#include "llvm/ADT/Hashing.h"
19#include "llvm/ADT/Optional.h"
20#include "llvm/ADT/SmallString.h"
21#include "llvm/ADT/StringRef.h"
22#include "llvm/ADT/bit.h"
23#include "llvm/Config/llvm-config.h"
24#include "llvm/Support/Debug.h"
25#include "llvm/Support/ErrorHandling.h"
26#include "llvm/Support/MathExtras.h"
27#include "llvm/Support/raw_ostream.h"
28#include <climits>
29#include <cmath>
30#include <cstdlib>
31#include <cstring>
32using namespace llvm;
33
34#define DEBUG_TYPE "apint"
35
36/// A utility function for allocating memory, checking for allocation failures,
37/// and ensuring the contents are zeroed.
38inline static uint64_t* getClearedMemory(unsigned numWords) {
39  uint64_t *result = new uint64_t[numWords];
40  memset(result, 0, numWords * sizeof(uint64_t));
41  return result;
42}
43
44/// A utility function for allocating memory and checking for allocation
45/// failure.  The content is not zeroed.
46inline static uint64_t* getMemory(unsigned numWords) {
47  return new uint64_t[numWords];
48}
49
50/// A utility function that converts a character to a digit.
51inline static unsigned getDigit(char cdigit, uint8_t radix) {
52  unsigned r;
53
54  if (radix == 16 || radix == 36) {
55    r = cdigit - '0';
56    if (r <= 9)
57      return r;
58
59    r = cdigit - 'A';
60    if (r <= radix - 11U)
61      return r + 10;
62
63    r = cdigit - 'a';
64    if (r <= radix - 11U)
65      return r + 10;
66
67    radix = 10;
68  }
69
70  r = cdigit - '0';
71  if (r < radix)
72    return r;
73
74  return -1U;
75}
76
77
78void APInt::initSlowCase(uint64_t val, bool isSigned) {
79  U.pVal = getClearedMemory(getNumWords());
80  U.pVal[0] = val;
81  if (isSigned && int64_t(val) < 0)
82    for (unsigned i = 1; i < getNumWords(); ++i)
83      U.pVal[i] = WORDTYPE_MAX;
84  clearUnusedBits();
85}
86
87void APInt::initSlowCase(const APInt& that) {
88  U.pVal = getMemory(getNumWords());
89  memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
90}
91
92void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
93  assert(BitWidth && "Bitwidth too small");
94  assert(bigVal.data() && "Null pointer detected!");
95  if (isSingleWord())
96    U.VAL = bigVal[0];
97  else {
98    // Get memory, cleared to 0
99    U.pVal = getClearedMemory(getNumWords());
100    // Calculate the number of words to copy
101    unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
102    // Copy the words from bigVal to pVal
103    memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
104  }
105  // Make sure unused high bits are cleared
106  clearUnusedBits();
107}
108
109APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
110  : BitWidth(numBits) {
111  initFromArray(bigVal);
112}
113
114APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
115  : BitWidth(numBits) {
116  initFromArray(makeArrayRef(bigVal, numWords));
117}
118
119APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
120  : BitWidth(numbits) {
121  assert(BitWidth && "Bitwidth too small");
122  fromString(numbits, Str, radix);
123}
124
125void APInt::reallocate(unsigned NewBitWidth) {
126  // If the number of words is the same we can just change the width and stop.
127  if (getNumWords() == getNumWords(NewBitWidth)) {
128    BitWidth = NewBitWidth;
129    return;
130  }
131
132  // If we have an allocation, delete it.
133  if (!isSingleWord())
134    delete [] U.pVal;
135
136  // Update BitWidth.
137  BitWidth = NewBitWidth;
138
139  // If we are supposed to have an allocation, create it.
140  if (!isSingleWord())
141    U.pVal = getMemory(getNumWords());
142}
143
144void APInt::AssignSlowCase(const APInt& RHS) {
145  // Don't do anything for X = X
146  if (this == &RHS)
147    return;
148
149  // Adjust the bit width and handle allocations as necessary.
150  reallocate(RHS.getBitWidth());
151
152  // Copy the data.
153  if (isSingleWord())
154    U.VAL = RHS.U.VAL;
155  else
156    memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
157}
158
159/// This method 'profiles' an APInt for use with FoldingSet.
160void APInt::Profile(FoldingSetNodeID& ID) const {
161  ID.AddInteger(BitWidth);
162
163  if (isSingleWord()) {
164    ID.AddInteger(U.VAL);
165    return;
166  }
167
168  unsigned NumWords = getNumWords();
169  for (unsigned i = 0; i < NumWords; ++i)
170    ID.AddInteger(U.pVal[i]);
171}
172
173/// Prefix increment operator. Increments the APInt by one.
174APInt& APInt::operator++() {
175  if (isSingleWord())
176    ++U.VAL;
177  else
178    tcIncrement(U.pVal, getNumWords());
179  return clearUnusedBits();
180}
181
182/// Prefix decrement operator. Decrements the APInt by one.
183APInt& APInt::operator--() {
184  if (isSingleWord())
185    --U.VAL;
186  else
187    tcDecrement(U.pVal, getNumWords());
188  return clearUnusedBits();
189}
190
191/// Adds the RHS APint to this APInt.
192/// @returns this, after addition of RHS.
193/// Addition assignment operator.
194APInt& APInt::operator+=(const APInt& RHS) {
195  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
196  if (isSingleWord())
197    U.VAL += RHS.U.VAL;
198  else
199    tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
200  return clearUnusedBits();
201}
202
203APInt& APInt::operator+=(uint64_t RHS) {
204  if (isSingleWord())
205    U.VAL += RHS;
206  else
207    tcAddPart(U.pVal, RHS, getNumWords());
208  return clearUnusedBits();
209}
210
211/// Subtracts the RHS APInt from this APInt
212/// @returns this, after subtraction
213/// Subtraction assignment operator.
214APInt& APInt::operator-=(const APInt& RHS) {
215  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
216  if (isSingleWord())
217    U.VAL -= RHS.U.VAL;
218  else
219    tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
220  return clearUnusedBits();
221}
222
223APInt& APInt::operator-=(uint64_t RHS) {
224  if (isSingleWord())
225    U.VAL -= RHS;
226  else
227    tcSubtractPart(U.pVal, RHS, getNumWords());
228  return clearUnusedBits();
229}
230
231APInt APInt::operator*(const APInt& RHS) const {
232  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
233  if (isSingleWord())
234    return APInt(BitWidth, U.VAL * RHS.U.VAL);
235
236  APInt Result(getMemory(getNumWords()), getBitWidth());
237
238  tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
239
240  Result.clearUnusedBits();
241  return Result;
242}
243
244void APInt::AndAssignSlowCase(const APInt& RHS) {
245  tcAnd(U.pVal, RHS.U.pVal, getNumWords());
246}
247
248void APInt::OrAssignSlowCase(const APInt& RHS) {
249  tcOr(U.pVal, RHS.U.pVal, getNumWords());
250}
251
252void APInt::XorAssignSlowCase(const APInt& RHS) {
253  tcXor(U.pVal, RHS.U.pVal, getNumWords());
254}
255
256APInt& APInt::operator*=(const APInt& RHS) {
257  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
258  *this = *this * RHS;
259  return *this;
260}
261
262APInt& APInt::operator*=(uint64_t RHS) {
263  if (isSingleWord()) {
264    U.VAL *= RHS;
265  } else {
266    unsigned NumWords = getNumWords();
267    tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
268  }
269  return clearUnusedBits();
270}
271
272bool APInt::EqualSlowCase(const APInt& RHS) const {
273  return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
274}
275
276int APInt::compare(const APInt& RHS) const {
277  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
278  if (isSingleWord())
279    return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
280
281  return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
282}
283
284int APInt::compareSigned(const APInt& RHS) const {
285  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
286  if (isSingleWord()) {
287    int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
288    int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
289    return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
290  }
291
292  bool lhsNeg = isNegative();
293  bool rhsNeg = RHS.isNegative();
294
295  // If the sign bits don't match, then (LHS < RHS) if LHS is negative
296  if (lhsNeg != rhsNeg)
297    return lhsNeg ? -1 : 1;
298
299  // Otherwise we can just use an unsigned comparison, because even negative
300  // numbers compare correctly this way if both have the same signed-ness.
301  return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
302}
303
304void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
305  unsigned loWord = whichWord(loBit);
306  unsigned hiWord = whichWord(hiBit);
307
308  // Create an initial mask for the low word with zeros below loBit.
309  uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
310
311  // If hiBit is not aligned, we need a high mask.
312  unsigned hiShiftAmt = whichBit(hiBit);
313  if (hiShiftAmt != 0) {
314    // Create a high mask with zeros above hiBit.
315    uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
316    // If loWord and hiWord are equal, then we combine the masks. Otherwise,
317    // set the bits in hiWord.
318    if (hiWord == loWord)
319      loMask &= hiMask;
320    else
321      U.pVal[hiWord] |= hiMask;
322  }
323  // Apply the mask to the low word.
324  U.pVal[loWord] |= loMask;
325
326  // Fill any words between loWord and hiWord with all ones.
327  for (unsigned word = loWord + 1; word < hiWord; ++word)
328    U.pVal[word] = WORDTYPE_MAX;
329}
330
331/// Toggle every bit to its opposite value.
332void APInt::flipAllBitsSlowCase() {
333  tcComplement(U.pVal, getNumWords());
334  clearUnusedBits();
335}
336
337/// Toggle a given bit to its opposite value whose position is given
338/// as "bitPosition".
339/// Toggles a given bit to its opposite value.
340void APInt::flipBit(unsigned bitPosition) {
341  assert(bitPosition < BitWidth && "Out of the bit-width range!");
342  if ((*this)[bitPosition]) clearBit(bitPosition);
343  else setBit(bitPosition);
344}
345
346void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
347  unsigned subBitWidth = subBits.getBitWidth();
348  assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
349         "Illegal bit insertion");
350
351  // Insertion is a direct copy.
352  if (subBitWidth == BitWidth) {
353    *this = subBits;
354    return;
355  }
356
357  // Single word result can be done as a direct bitmask.
358  if (isSingleWord()) {
359    uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
360    U.VAL &= ~(mask << bitPosition);
361    U.VAL |= (subBits.U.VAL << bitPosition);
362    return;
363  }
364
365  unsigned loBit = whichBit(bitPosition);
366  unsigned loWord = whichWord(bitPosition);
367  unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
368
369  // Insertion within a single word can be done as a direct bitmask.
370  if (loWord == hi1Word) {
371    uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
372    U.pVal[loWord] &= ~(mask << loBit);
373    U.pVal[loWord] |= (subBits.U.VAL << loBit);
374    return;
375  }
376
377  // Insert on word boundaries.
378  if (loBit == 0) {
379    // Direct copy whole words.
380    unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
381    memcpy(U.pVal + loWord, subBits.getRawData(),
382           numWholeSubWords * APINT_WORD_SIZE);
383
384    // Mask+insert remaining bits.
385    unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
386    if (remainingBits != 0) {
387      uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
388      U.pVal[hi1Word] &= ~mask;
389      U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
390    }
391    return;
392  }
393
394  // General case - set/clear individual bits in dst based on src.
395  // TODO - there is scope for optimization here, but at the moment this code
396  // path is barely used so prefer readability over performance.
397  for (unsigned i = 0; i != subBitWidth; ++i) {
398    if (subBits[i])
399      setBit(bitPosition + i);
400    else
401      clearBit(bitPosition + i);
402  }
403}
404
405APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
406  assert(numBits > 0 && "Can't extract zero bits");
407  assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
408         "Illegal bit extraction");
409
410  if (isSingleWord())
411    return APInt(numBits, U.VAL >> bitPosition);
412
413  unsigned loBit = whichBit(bitPosition);
414  unsigned loWord = whichWord(bitPosition);
415  unsigned hiWord = whichWord(bitPosition + numBits - 1);
416
417  // Single word result extracting bits from a single word source.
418  if (loWord == hiWord)
419    return APInt(numBits, U.pVal[loWord] >> loBit);
420
421  // Extracting bits that start on a source word boundary can be done
422  // as a fast memory copy.
423  if (loBit == 0)
424    return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
425
426  // General case - shift + copy source words directly into place.
427  APInt Result(numBits, 0);
428  unsigned NumSrcWords = getNumWords();
429  unsigned NumDstWords = Result.getNumWords();
430
431  uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
432  for (unsigned word = 0; word < NumDstWords; ++word) {
433    uint64_t w0 = U.pVal[loWord + word];
434    uint64_t w1 =
435        (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
436    DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
437  }
438
439  return Result.clearUnusedBits();
440}
441
442unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
443  assert(!str.empty() && "Invalid string length");
444  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
445          radix == 36) &&
446         "Radix should be 2, 8, 10, 16, or 36!");
447
448  size_t slen = str.size();
449
450  // Each computation below needs to know if it's negative.
451  StringRef::iterator p = str.begin();
452  unsigned isNegative = *p == '-';
453  if (*p == '-' || *p == '+') {
454    p++;
455    slen--;
456    assert(slen && "String is only a sign, needs a value.");
457  }
458
459  // For radixes of power-of-two values, the bits required is accurately and
460  // easily computed
461  if (radix == 2)
462    return slen + isNegative;
463  if (radix == 8)
464    return slen * 3 + isNegative;
465  if (radix == 16)
466    return slen * 4 + isNegative;
467
468  // FIXME: base 36
469
470  // This is grossly inefficient but accurate. We could probably do something
471  // with a computation of roughly slen*64/20 and then adjust by the value of
472  // the first few digits. But, I'm not sure how accurate that could be.
473
474  // Compute a sufficient number of bits that is always large enough but might
475  // be too large. This avoids the assertion in the constructor. This
476  // calculation doesn't work appropriately for the numbers 0-9, so just use 4
477  // bits in that case.
478  unsigned sufficient
479    = radix == 10? (slen == 1 ? 4 : slen * 64/18)
480                 : (slen == 1 ? 7 : slen * 16/3);
481
482  // Convert to the actual binary value.
483  APInt tmp(sufficient, StringRef(p, slen), radix);
484
485  // Compute how many bits are required. If the log is infinite, assume we need
486  // just bit.
487  unsigned log = tmp.logBase2();
488  if (log == (unsigned)-1) {
489    return isNegative + 1;
490  } else {
491    return isNegative + log + 1;
492  }
493}
494
495hash_code llvm::hash_value(const APInt &Arg) {
496  if (Arg.isSingleWord())
497    return hash_combine(Arg.U.VAL);
498
499  return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords());
500}
501
502bool APInt::isSplat(unsigned SplatSizeInBits) const {
503  assert(getBitWidth() % SplatSizeInBits == 0 &&
504         "SplatSizeInBits must divide width!");
505  // We can check that all parts of an integer are equal by making use of a
506  // little trick: rotate and check if it's still the same value.
507  return *this == rotl(SplatSizeInBits);
508}
509
510/// This function returns the high "numBits" bits of this APInt.
511APInt APInt::getHiBits(unsigned numBits) const {
512  return this->lshr(BitWidth - numBits);
513}
514
515/// This function returns the low "numBits" bits of this APInt.
516APInt APInt::getLoBits(unsigned numBits) const {
517  APInt Result(getLowBitsSet(BitWidth, numBits));
518  Result &= *this;
519  return Result;
520}
521
522/// Return a value containing V broadcasted over NewLen bits.
523APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
524  assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
525
526  APInt Val = V.zextOrSelf(NewLen);
527  for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
528    Val |= Val << I;
529
530  return Val;
531}
532
533unsigned APInt::countLeadingZerosSlowCase() const {
534  unsigned Count = 0;
535  for (int i = getNumWords()-1; i >= 0; --i) {
536    uint64_t V = U.pVal[i];
537    if (V == 0)
538      Count += APINT_BITS_PER_WORD;
539    else {
540      Count += llvm::countLeadingZeros(V);
541      break;
542    }
543  }
544  // Adjust for unused bits in the most significant word (they are zero).
545  unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
546  Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
547  return Count;
548}
549
550unsigned APInt::countLeadingOnesSlowCase() const {
551  unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
552  unsigned shift;
553  if (!highWordBits) {
554    highWordBits = APINT_BITS_PER_WORD;
555    shift = 0;
556  } else {
557    shift = APINT_BITS_PER_WORD - highWordBits;
558  }
559  int i = getNumWords() - 1;
560  unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
561  if (Count == highWordBits) {
562    for (i--; i >= 0; --i) {
563      if (U.pVal[i] == WORDTYPE_MAX)
564        Count += APINT_BITS_PER_WORD;
565      else {
566        Count += llvm::countLeadingOnes(U.pVal[i]);
567        break;
568      }
569    }
570  }
571  return Count;
572}
573
574unsigned APInt::countTrailingZerosSlowCase() const {
575  unsigned Count = 0;
576  unsigned i = 0;
577  for (; i < getNumWords() && U.pVal[i] == 0; ++i)
578    Count += APINT_BITS_PER_WORD;
579  if (i < getNumWords())
580    Count += llvm::countTrailingZeros(U.pVal[i]);
581  return std::min(Count, BitWidth);
582}
583
584unsigned APInt::countTrailingOnesSlowCase() const {
585  unsigned Count = 0;
586  unsigned i = 0;
587  for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
588    Count += APINT_BITS_PER_WORD;
589  if (i < getNumWords())
590    Count += llvm::countTrailingOnes(U.pVal[i]);
591  assert(Count <= BitWidth);
592  return Count;
593}
594
595unsigned APInt::countPopulationSlowCase() const {
596  unsigned Count = 0;
597  for (unsigned i = 0; i < getNumWords(); ++i)
598    Count += llvm::countPopulation(U.pVal[i]);
599  return Count;
600}
601
602bool APInt::intersectsSlowCase(const APInt &RHS) const {
603  for (unsigned i = 0, e = getNumWords(); i != e; ++i)
604    if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
605      return true;
606
607  return false;
608}
609
610bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
611  for (unsigned i = 0, e = getNumWords(); i != e; ++i)
612    if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
613      return false;
614
615  return true;
616}
617
618APInt APInt::byteSwap() const {
619  assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
620  if (BitWidth == 16)
621    return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
622  if (BitWidth == 32)
623    return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
624  if (BitWidth == 48) {
625    unsigned Tmp1 = unsigned(U.VAL >> 16);
626    Tmp1 = ByteSwap_32(Tmp1);
627    uint16_t Tmp2 = uint16_t(U.VAL);
628    Tmp2 = ByteSwap_16(Tmp2);
629    return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
630  }
631  if (BitWidth == 64)
632    return APInt(BitWidth, ByteSwap_64(U.VAL));
633
634  APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
635  for (unsigned I = 0, N = getNumWords(); I != N; ++I)
636    Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
637  if (Result.BitWidth != BitWidth) {
638    Result.lshrInPlace(Result.BitWidth - BitWidth);
639    Result.BitWidth = BitWidth;
640  }
641  return Result;
642}
643
644APInt APInt::reverseBits() const {
645  switch (BitWidth) {
646  case 64:
647    return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
648  case 32:
649    return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
650  case 16:
651    return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
652  case 8:
653    return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
654  default:
655    break;
656  }
657
658  APInt Val(*this);
659  APInt Reversed(BitWidth, 0);
660  unsigned S = BitWidth;
661
662  for (; Val != 0; Val.lshrInPlace(1)) {
663    Reversed <<= 1;
664    Reversed |= Val[0];
665    --S;
666  }
667
668  Reversed <<= S;
669  return Reversed;
670}
671
672APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
673  // Fast-path a common case.
674  if (A == B) return A;
675
676  // Corner cases: if either operand is zero, the other is the gcd.
677  if (!A) return B;
678  if (!B) return A;
679
680  // Count common powers of 2 and remove all other powers of 2.
681  unsigned Pow2;
682  {
683    unsigned Pow2_A = A.countTrailingZeros();
684    unsigned Pow2_B = B.countTrailingZeros();
685    if (Pow2_A > Pow2_B) {
686      A.lshrInPlace(Pow2_A - Pow2_B);
687      Pow2 = Pow2_B;
688    } else if (Pow2_B > Pow2_A) {
689      B.lshrInPlace(Pow2_B - Pow2_A);
690      Pow2 = Pow2_A;
691    } else {
692      Pow2 = Pow2_A;
693    }
694  }
695
696  // Both operands are odd multiples of 2^Pow_2:
697  //
698  //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
699  //
700  // This is a modified version of Stein's algorithm, taking advantage of
701  // efficient countTrailingZeros().
702  while (A != B) {
703    if (A.ugt(B)) {
704      A -= B;
705      A.lshrInPlace(A.countTrailingZeros() - Pow2);
706    } else {
707      B -= A;
708      B.lshrInPlace(B.countTrailingZeros() - Pow2);
709    }
710  }
711
712  return A;
713}
714
715APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
716  uint64_t I = bit_cast<uint64_t>(Double);
717
718  // Get the sign bit from the highest order bit
719  bool isNeg = I >> 63;
720
721  // Get the 11-bit exponent and adjust for the 1023 bit bias
722  int64_t exp = ((I >> 52) & 0x7ff) - 1023;
723
724  // If the exponent is negative, the value is < 0 so just return 0.
725  if (exp < 0)
726    return APInt(width, 0u);
727
728  // Extract the mantissa by clearing the top 12 bits (sign + exponent).
729  uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
730
731  // If the exponent doesn't shift all bits out of the mantissa
732  if (exp < 52)
733    return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
734                    APInt(width, mantissa >> (52 - exp));
735
736  // If the client didn't provide enough bits for us to shift the mantissa into
737  // then the result is undefined, just return 0
738  if (width <= exp - 52)
739    return APInt(width, 0);
740
741  // Otherwise, we have to shift the mantissa bits up to the right location
742  APInt Tmp(width, mantissa);
743  Tmp <<= (unsigned)exp - 52;
744  return isNeg ? -Tmp : Tmp;
745}
746
747/// This function converts this APInt to a double.
748/// The layout for double is as following (IEEE Standard 754):
749///  --------------------------------------
750/// |  Sign    Exponent    Fraction    Bias |
751/// |-------------------------------------- |
752/// |  1[63]   11[62-52]   52[51-00]   1023 |
753///  --------------------------------------
754double APInt::roundToDouble(bool isSigned) const {
755
756  // Handle the simple case where the value is contained in one uint64_t.
757  // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
758  if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
759    if (isSigned) {
760      int64_t sext = SignExtend64(getWord(0), BitWidth);
761      return double(sext);
762    } else
763      return double(getWord(0));
764  }
765
766  // Determine if the value is negative.
767  bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
768
769  // Construct the absolute value if we're negative.
770  APInt Tmp(isNeg ? -(*this) : (*this));
771
772  // Figure out how many bits we're using.
773  unsigned n = Tmp.getActiveBits();
774
775  // The exponent (without bias normalization) is just the number of bits
776  // we are using. Note that the sign bit is gone since we constructed the
777  // absolute value.
778  uint64_t exp = n;
779
780  // Return infinity for exponent overflow
781  if (exp > 1023) {
782    if (!isSigned || !isNeg)
783      return std::numeric_limits<double>::infinity();
784    else
785      return -std::numeric_limits<double>::infinity();
786  }
787  exp += 1023; // Increment for 1023 bias
788
789  // Number of bits in mantissa is 52. To obtain the mantissa value, we must
790  // extract the high 52 bits from the correct words in pVal.
791  uint64_t mantissa;
792  unsigned hiWord = whichWord(n-1);
793  if (hiWord == 0) {
794    mantissa = Tmp.U.pVal[0];
795    if (n > 52)
796      mantissa >>= n - 52; // shift down, we want the top 52 bits.
797  } else {
798    assert(hiWord > 0 && "huh?");
799    uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
800    uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
801    mantissa = hibits | lobits;
802  }
803
804  // The leading bit of mantissa is implicit, so get rid of it.
805  uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
806  uint64_t I = sign | (exp << 52) | mantissa;
807  return bit_cast<double>(I);
808}
809
810// Truncate to new width.
811APInt APInt::trunc(unsigned width) const {
812  assert(width < BitWidth && "Invalid APInt Truncate request");
813  assert(width && "Can't truncate to 0 bits");
814
815  if (width <= APINT_BITS_PER_WORD)
816    return APInt(width, getRawData()[0]);
817
818  APInt Result(getMemory(getNumWords(width)), width);
819
820  // Copy full words.
821  unsigned i;
822  for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
823    Result.U.pVal[i] = U.pVal[i];
824
825  // Truncate and copy any partial word.
826  unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
827  if (bits != 0)
828    Result.U.pVal[i] = U.pVal[i] << bits >> bits;
829
830  return Result;
831}
832
833// Sign extend to a new width.
834APInt APInt::sext(unsigned Width) const {
835  assert(Width > BitWidth && "Invalid APInt SignExtend request");
836
837  if (Width <= APINT_BITS_PER_WORD)
838    return APInt(Width, SignExtend64(U.VAL, BitWidth));
839
840  APInt Result(getMemory(getNumWords(Width)), Width);
841
842  // Copy words.
843  std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
844
845  // Sign extend the last word since there may be unused bits in the input.
846  Result.U.pVal[getNumWords() - 1] =
847      SignExtend64(Result.U.pVal[getNumWords() - 1],
848                   ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
849
850  // Fill with sign bits.
851  std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
852              (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
853  Result.clearUnusedBits();
854  return Result;
855}
856
857//  Zero extend to a new width.
858APInt APInt::zext(unsigned width) const {
859  assert(width > BitWidth && "Invalid APInt ZeroExtend request");
860
861  if (width <= APINT_BITS_PER_WORD)
862    return APInt(width, U.VAL);
863
864  APInt Result(getMemory(getNumWords(width)), width);
865
866  // Copy words.
867  std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
868
869  // Zero remaining words.
870  std::memset(Result.U.pVal + getNumWords(), 0,
871              (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
872
873  return Result;
874}
875
876APInt APInt::zextOrTrunc(unsigned width) const {
877  if (BitWidth < width)
878    return zext(width);
879  if (BitWidth > width)
880    return trunc(width);
881  return *this;
882}
883
884APInt APInt::sextOrTrunc(unsigned width) const {
885  if (BitWidth < width)
886    return sext(width);
887  if (BitWidth > width)
888    return trunc(width);
889  return *this;
890}
891
892APInt APInt::zextOrSelf(unsigned width) const {
893  if (BitWidth < width)
894    return zext(width);
895  return *this;
896}
897
898APInt APInt::sextOrSelf(unsigned width) const {
899  if (BitWidth < width)
900    return sext(width);
901  return *this;
902}
903
904/// Arithmetic right-shift this APInt by shiftAmt.
905/// Arithmetic right-shift function.
906void APInt::ashrInPlace(const APInt &shiftAmt) {
907  ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
908}
909
910/// Arithmetic right-shift this APInt by shiftAmt.
911/// Arithmetic right-shift function.
912void APInt::ashrSlowCase(unsigned ShiftAmt) {
913  // Don't bother performing a no-op shift.
914  if (!ShiftAmt)
915    return;
916
917  // Save the original sign bit for later.
918  bool Negative = isNegative();
919
920  // WordShift is the inter-part shift; BitShift is intra-part shift.
921  unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
922  unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
923
924  unsigned WordsToMove = getNumWords() - WordShift;
925  if (WordsToMove != 0) {
926    // Sign extend the last word to fill in the unused bits.
927    U.pVal[getNumWords() - 1] = SignExtend64(
928        U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
929
930    // Fastpath for moving by whole words.
931    if (BitShift == 0) {
932      std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
933    } else {
934      // Move the words containing significant bits.
935      for (unsigned i = 0; i != WordsToMove - 1; ++i)
936        U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
937                    (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
938
939      // Handle the last word which has no high bits to copy.
940      U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
941      // Sign extend one more time.
942      U.pVal[WordsToMove - 1] =
943          SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
944    }
945  }
946
947  // Fill in the remainder based on the original sign.
948  std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
949              WordShift * APINT_WORD_SIZE);
950  clearUnusedBits();
951}
952
953/// Logical right-shift this APInt by shiftAmt.
954/// Logical right-shift function.
955void APInt::lshrInPlace(const APInt &shiftAmt) {
956  lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
957}
958
959/// Logical right-shift this APInt by shiftAmt.
960/// Logical right-shift function.
961void APInt::lshrSlowCase(unsigned ShiftAmt) {
962  tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
963}
964
965/// Left-shift this APInt by shiftAmt.
966/// Left-shift function.
967APInt &APInt::operator<<=(const APInt &shiftAmt) {
968  // It's undefined behavior in C to shift by BitWidth or greater.
969  *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
970  return *this;
971}
972
973void APInt::shlSlowCase(unsigned ShiftAmt) {
974  tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
975  clearUnusedBits();
976}
977
978// Calculate the rotate amount modulo the bit width.
979static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
980  unsigned rotBitWidth = rotateAmt.getBitWidth();
981  APInt rot = rotateAmt;
982  if (rotBitWidth < BitWidth) {
983    // Extend the rotate APInt, so that the urem doesn't divide by 0.
984    // e.g. APInt(1, 32) would give APInt(1, 0).
985    rot = rotateAmt.zext(BitWidth);
986  }
987  rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
988  return rot.getLimitedValue(BitWidth);
989}
990
991APInt APInt::rotl(const APInt &rotateAmt) const {
992  return rotl(rotateModulo(BitWidth, rotateAmt));
993}
994
995APInt APInt::rotl(unsigned rotateAmt) const {
996  rotateAmt %= BitWidth;
997  if (rotateAmt == 0)
998    return *this;
999  return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1000}
1001
1002APInt APInt::rotr(const APInt &rotateAmt) const {
1003  return rotr(rotateModulo(BitWidth, rotateAmt));
1004}
1005
1006APInt APInt::rotr(unsigned rotateAmt) const {
1007  rotateAmt %= BitWidth;
1008  if (rotateAmt == 0)
1009    return *this;
1010  return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1011}
1012
1013// Square Root - this method computes and returns the square root of "this".
1014// Three mechanisms are used for computation. For small values (<= 5 bits),
1015// a table lookup is done. This gets some performance for common cases. For
1016// values using less than 52 bits, the value is converted to double and then
1017// the libc sqrt function is called. The result is rounded and then converted
1018// back to a uint64_t which is then used to construct the result. Finally,
1019// the Babylonian method for computing square roots is used.
1020APInt APInt::sqrt() const {
1021
1022  // Determine the magnitude of the value.
1023  unsigned magnitude = getActiveBits();
1024
1025  // Use a fast table for some small values. This also gets rid of some
1026  // rounding errors in libc sqrt for small values.
1027  if (magnitude <= 5) {
1028    static const uint8_t results[32] = {
1029      /*     0 */ 0,
1030      /*  1- 2 */ 1, 1,
1031      /*  3- 6 */ 2, 2, 2, 2,
1032      /*  7-12 */ 3, 3, 3, 3, 3, 3,
1033      /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1034      /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1035      /*    31 */ 6
1036    };
1037    return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1038  }
1039
1040  // If the magnitude of the value fits in less than 52 bits (the precision of
1041  // an IEEE double precision floating point value), then we can use the
1042  // libc sqrt function which will probably use a hardware sqrt computation.
1043  // This should be faster than the algorithm below.
1044  if (magnitude < 52) {
1045    return APInt(BitWidth,
1046                 uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1047                                                               : U.pVal[0])))));
1048  }
1049
1050  // Okay, all the short cuts are exhausted. We must compute it. The following
1051  // is a classical Babylonian method for computing the square root. This code
1052  // was adapted to APInt from a wikipedia article on such computations.
1053  // See http://www.wikipedia.org/ and go to the page named
1054  // Calculate_an_integer_square_root.
1055  unsigned nbits = BitWidth, i = 4;
1056  APInt testy(BitWidth, 16);
1057  APInt x_old(BitWidth, 1);
1058  APInt x_new(BitWidth, 0);
1059  APInt two(BitWidth, 2);
1060
1061  // Select a good starting value using binary logarithms.
1062  for (;; i += 2, testy = testy.shl(2))
1063    if (i >= nbits || this->ule(testy)) {
1064      x_old = x_old.shl(i / 2);
1065      break;
1066    }
1067
1068  // Use the Babylonian method to arrive at the integer square root:
1069  for (;;) {
1070    x_new = (this->udiv(x_old) + x_old).udiv(two);
1071    if (x_old.ule(x_new))
1072      break;
1073    x_old = x_new;
1074  }
1075
1076  // Make sure we return the closest approximation
1077  // NOTE: The rounding calculation below is correct. It will produce an
1078  // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1079  // determined to be a rounding issue with pari/gp as it begins to use a
1080  // floating point representation after 192 bits. There are no discrepancies
1081  // between this algorithm and pari/gp for bit widths < 192 bits.
1082  APInt square(x_old * x_old);
1083  APInt nextSquare((x_old + 1) * (x_old +1));
1084  if (this->ult(square))
1085    return x_old;
1086  assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1087  APInt midpoint((nextSquare - square).udiv(two));
1088  APInt offset(*this - square);
1089  if (offset.ult(midpoint))
1090    return x_old;
1091  return x_old + 1;
1092}
1093
1094/// Computes the multiplicative inverse of this APInt for a given modulo. The
1095/// iterative extended Euclidean algorithm is used to solve for this value,
1096/// however we simplify it to speed up calculating only the inverse, and take
1097/// advantage of div+rem calculations. We also use some tricks to avoid copying
1098/// (potentially large) APInts around.
1099APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1100  assert(ult(modulo) && "This APInt must be smaller than the modulo");
1101
1102  // Using the properties listed at the following web page (accessed 06/21/08):
1103  //   http://www.numbertheory.org/php/euclid.html
1104  // (especially the properties numbered 3, 4 and 9) it can be proved that
1105  // BitWidth bits suffice for all the computations in the algorithm implemented
1106  // below. More precisely, this number of bits suffice if the multiplicative
1107  // inverse exists, but may not suffice for the general extended Euclidean
1108  // algorithm.
1109
1110  APInt r[2] = { modulo, *this };
1111  APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1112  APInt q(BitWidth, 0);
1113
1114  unsigned i;
1115  for (i = 0; r[i^1] != 0; i ^= 1) {
1116    // An overview of the math without the confusing bit-flipping:
1117    // q = r[i-2] / r[i-1]
1118    // r[i] = r[i-2] % r[i-1]
1119    // t[i] = t[i-2] - t[i-1] * q
1120    udivrem(r[i], r[i^1], q, r[i]);
1121    t[i] -= t[i^1] * q;
1122  }
1123
1124  // If this APInt and the modulo are not coprime, there is no multiplicative
1125  // inverse, so return 0. We check this by looking at the next-to-last
1126  // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1127  // algorithm.
1128  if (r[i] != 1)
1129    return APInt(BitWidth, 0);
1130
1131  // The next-to-last t is the multiplicative inverse.  However, we are
1132  // interested in a positive inverse. Calculate a positive one from a negative
1133  // one if necessary. A simple addition of the modulo suffices because
1134  // abs(t[i]) is known to be less than *this/2 (see the link above).
1135  if (t[i].isNegative())
1136    t[i] += modulo;
1137
1138  return std::move(t[i]);
1139}
1140
1141/// Calculate the magic numbers required to implement a signed integer division
1142/// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1143/// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1144/// Warren, Jr., chapter 10.
1145APInt::ms APInt::magic() const {
1146  const APInt& d = *this;
1147  unsigned p;
1148  APInt ad, anc, delta, q1, r1, q2, r2, t;
1149  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1150  struct ms mag;
1151
1152  ad = d.abs();
1153  t = signedMin + (d.lshr(d.getBitWidth() - 1));
1154  anc = t - 1 - t.urem(ad);   // absolute value of nc
1155  p = d.getBitWidth() - 1;    // initialize p
1156  q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1157  r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1158  q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1159  r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1160  do {
1161    p = p + 1;
1162    q1 = q1<<1;          // update q1 = 2p/abs(nc)
1163    r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1164    if (r1.uge(anc)) {  // must be unsigned comparison
1165      q1 = q1 + 1;
1166      r1 = r1 - anc;
1167    }
1168    q2 = q2<<1;          // update q2 = 2p/abs(d)
1169    r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1170    if (r2.uge(ad)) {   // must be unsigned comparison
1171      q2 = q2 + 1;
1172      r2 = r2 - ad;
1173    }
1174    delta = ad - r2;
1175  } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1176
1177  mag.m = q2 + 1;
1178  if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1179  mag.s = p - d.getBitWidth();          // resulting shift
1180  return mag;
1181}
1182
1183/// Calculate the magic numbers required to implement an unsigned integer
1184/// division by a constant as a sequence of multiplies, adds and shifts.
1185/// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1186/// S. Warren, Jr., chapter 10.
1187/// LeadingZeros can be used to simplify the calculation if the upper bits
1188/// of the divided value are known zero.
1189APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1190  const APInt& d = *this;
1191  unsigned p;
1192  APInt nc, delta, q1, r1, q2, r2;
1193  struct mu magu;
1194  magu.a = 0;               // initialize "add" indicator
1195  APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1196  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1197  APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1198
1199  nc = allOnes - (allOnes - d).urem(d);
1200  p = d.getBitWidth() - 1;  // initialize p
1201  q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1202  r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1203  q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1204  r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1205  do {
1206    p = p + 1;
1207    if (r1.uge(nc - r1)) {
1208      q1 = q1 + q1 + 1;  // update q1
1209      r1 = r1 + r1 - nc; // update r1
1210    }
1211    else {
1212      q1 = q1+q1; // update q1
1213      r1 = r1+r1; // update r1
1214    }
1215    if ((r2 + 1).uge(d - r2)) {
1216      if (q2.uge(signedMax)) magu.a = 1;
1217      q2 = q2+q2 + 1;     // update q2
1218      r2 = r2+r2 + 1 - d; // update r2
1219    }
1220    else {
1221      if (q2.uge(signedMin)) magu.a = 1;
1222      q2 = q2+q2;     // update q2
1223      r2 = r2+r2 + 1; // update r2
1224    }
1225    delta = d - 1 - r2;
1226  } while (p < d.getBitWidth()*2 &&
1227           (q1.ult(delta) || (q1 == delta && r1 == 0)));
1228  magu.m = q2 + 1; // resulting magic number
1229  magu.s = p - d.getBitWidth();  // resulting shift
1230  return magu;
1231}
1232
1233/// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1234/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1235/// variables here have the same names as in the algorithm. Comments explain
1236/// the algorithm and any deviation from it.
1237static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1238                     unsigned m, unsigned n) {
1239  assert(u && "Must provide dividend");
1240  assert(v && "Must provide divisor");
1241  assert(q && "Must provide quotient");
1242  assert(u != v && u != q && v != q && "Must use different memory");
1243  assert(n>1 && "n must be > 1");
1244
1245  // b denotes the base of the number system. In our case b is 2^32.
1246  const uint64_t b = uint64_t(1) << 32;
1247
1248// The DEBUG macros here tend to be spam in the debug output if you're not
1249// debugging this code. Disable them unless KNUTH_DEBUG is defined.
1250#ifdef KNUTH_DEBUG
1251#define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1252#else
1253#define DEBUG_KNUTH(X) do {} while(false)
1254#endif
1255
1256  DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1257  DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1258  DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1259  DEBUG_KNUTH(dbgs() << " by");
1260  DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1261  DEBUG_KNUTH(dbgs() << '\n');
1262  // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1263  // u and v by d. Note that we have taken Knuth's advice here to use a power
1264  // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1265  // 2 allows us to shift instead of multiply and it is easy to determine the
1266  // shift amount from the leading zeros.  We are basically normalizing the u
1267  // and v so that its high bits are shifted to the top of v's range without
1268  // overflow. Note that this can require an extra word in u so that u must
1269  // be of length m+n+1.
1270  unsigned shift = countLeadingZeros(v[n-1]);
1271  uint32_t v_carry = 0;
1272  uint32_t u_carry = 0;
1273  if (shift) {
1274    for (unsigned i = 0; i < m+n; ++i) {
1275      uint32_t u_tmp = u[i] >> (32 - shift);
1276      u[i] = (u[i] << shift) | u_carry;
1277      u_carry = u_tmp;
1278    }
1279    for (unsigned i = 0; i < n; ++i) {
1280      uint32_t v_tmp = v[i] >> (32 - shift);
1281      v[i] = (v[i] << shift) | v_carry;
1282      v_carry = v_tmp;
1283    }
1284  }
1285  u[m+n] = u_carry;
1286
1287  DEBUG_KNUTH(dbgs() << "KnuthDiv:   normal:");
1288  DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1289  DEBUG_KNUTH(dbgs() << " by");
1290  DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1291  DEBUG_KNUTH(dbgs() << '\n');
1292
1293  // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1294  int j = m;
1295  do {
1296    DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1297    // D3. [Calculate q'.].
1298    //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1299    //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1300    // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1301    // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1302    // on v[n-2] determines at high speed most of the cases in which the trial
1303    // value qp is one too large, and it eliminates all cases where qp is two
1304    // too large.
1305    uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1306    DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1307    uint64_t qp = dividend / v[n-1];
1308    uint64_t rp = dividend % v[n-1];
1309    if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1310      qp--;
1311      rp += v[n-1];
1312      if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1313        qp--;
1314    }
1315    DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1316
1317    // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1318    // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1319    // consists of a simple multiplication by a one-place number, combined with
1320    // a subtraction.
1321    // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1322    // this step is actually negative, (u[j+n]...u[j]) should be left as the
1323    // true value plus b**(n+1), namely as the b's complement of
1324    // the true value, and a "borrow" to the left should be remembered.
1325    int64_t borrow = 0;
1326    for (unsigned i = 0; i < n; ++i) {
1327      uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1328      int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1329      u[j+i] = Lo_32(subres);
1330      borrow = Hi_32(p) - Hi_32(subres);
1331      DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1332                        << ", borrow = " << borrow << '\n');
1333    }
1334    bool isNeg = u[j+n] < borrow;
1335    u[j+n] -= Lo_32(borrow);
1336
1337    DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1338    DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1339    DEBUG_KNUTH(dbgs() << '\n');
1340
1341    // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1342    // negative, go to step D6; otherwise go on to step D7.
1343    q[j] = Lo_32(qp);
1344    if (isNeg) {
1345      // D6. [Add back]. The probability that this step is necessary is very
1346      // small, on the order of only 2/b. Make sure that test data accounts for
1347      // this possibility. Decrease q[j] by 1
1348      q[j]--;
1349      // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1350      // A carry will occur to the left of u[j+n], and it should be ignored
1351      // since it cancels with the borrow that occurred in D4.
1352      bool carry = false;
1353      for (unsigned i = 0; i < n; i++) {
1354        uint32_t limit = std::min(u[j+i],v[i]);
1355        u[j+i] += v[i] + carry;
1356        carry = u[j+i] < limit || (carry && u[j+i] == limit);
1357      }
1358      u[j+n] += carry;
1359    }
1360    DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1361    DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1362    DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1363
1364    // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1365  } while (--j >= 0);
1366
1367  DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1368  DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1369  DEBUG_KNUTH(dbgs() << '\n');
1370
1371  // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1372  // remainder may be obtained by dividing u[...] by d. If r is non-null we
1373  // compute the remainder (urem uses this).
1374  if (r) {
1375    // The value d is expressed by the "shift" value above since we avoided
1376    // multiplication by d by using a shift left. So, all we have to do is
1377    // shift right here.
1378    if (shift) {
1379      uint32_t carry = 0;
1380      DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1381      for (int i = n-1; i >= 0; i--) {
1382        r[i] = (u[i] >> shift) | carry;
1383        carry = u[i] << (32 - shift);
1384        DEBUG_KNUTH(dbgs() << " " << r[i]);
1385      }
1386    } else {
1387      for (int i = n-1; i >= 0; i--) {
1388        r[i] = u[i];
1389        DEBUG_KNUTH(dbgs() << " " << r[i]);
1390      }
1391    }
1392    DEBUG_KNUTH(dbgs() << '\n');
1393  }
1394  DEBUG_KNUTH(dbgs() << '\n');
1395}
1396
1397void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1398                   unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1399  assert(lhsWords >= rhsWords && "Fractional result");
1400
1401  // First, compose the values into an array of 32-bit words instead of
1402  // 64-bit words. This is a necessity of both the "short division" algorithm
1403  // and the Knuth "classical algorithm" which requires there to be native
1404  // operations for +, -, and * on an m bit value with an m*2 bit result. We
1405  // can't use 64-bit operands here because we don't have native results of
1406  // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1407  // work on large-endian machines.
1408  unsigned n = rhsWords * 2;
1409  unsigned m = (lhsWords * 2) - n;
1410
1411  // Allocate space for the temporary values we need either on the stack, if
1412  // it will fit, or on the heap if it won't.
1413  uint32_t SPACE[128];
1414  uint32_t *U = nullptr;
1415  uint32_t *V = nullptr;
1416  uint32_t *Q = nullptr;
1417  uint32_t *R = nullptr;
1418  if ((Remainder?4:3)*n+2*m+1 <= 128) {
1419    U = &SPACE[0];
1420    V = &SPACE[m+n+1];
1421    Q = &SPACE[(m+n+1) + n];
1422    if (Remainder)
1423      R = &SPACE[(m+n+1) + n + (m+n)];
1424  } else {
1425    U = new uint32_t[m + n + 1];
1426    V = new uint32_t[n];
1427    Q = new uint32_t[m+n];
1428    if (Remainder)
1429      R = new uint32_t[n];
1430  }
1431
1432  // Initialize the dividend
1433  memset(U, 0, (m+n+1)*sizeof(uint32_t));
1434  for (unsigned i = 0; i < lhsWords; ++i) {
1435    uint64_t tmp = LHS[i];
1436    U[i * 2] = Lo_32(tmp);
1437    U[i * 2 + 1] = Hi_32(tmp);
1438  }
1439  U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1440
1441  // Initialize the divisor
1442  memset(V, 0, (n)*sizeof(uint32_t));
1443  for (unsigned i = 0; i < rhsWords; ++i) {
1444    uint64_t tmp = RHS[i];
1445    V[i * 2] = Lo_32(tmp);
1446    V[i * 2 + 1] = Hi_32(tmp);
1447  }
1448
1449  // initialize the quotient and remainder
1450  memset(Q, 0, (m+n) * sizeof(uint32_t));
1451  if (Remainder)
1452    memset(R, 0, n * sizeof(uint32_t));
1453
1454  // Now, adjust m and n for the Knuth division. n is the number of words in
1455  // the divisor. m is the number of words by which the dividend exceeds the
1456  // divisor (i.e. m+n is the length of the dividend). These sizes must not
1457  // contain any zero words or the Knuth algorithm fails.
1458  for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1459    n--;
1460    m++;
1461  }
1462  for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1463    m--;
1464
1465  // If we're left with only a single word for the divisor, Knuth doesn't work
1466  // so we implement the short division algorithm here. This is much simpler
1467  // and faster because we are certain that we can divide a 64-bit quantity
1468  // by a 32-bit quantity at hardware speed and short division is simply a
1469  // series of such operations. This is just like doing short division but we
1470  // are using base 2^32 instead of base 10.
1471  assert(n != 0 && "Divide by zero?");
1472  if (n == 1) {
1473    uint32_t divisor = V[0];
1474    uint32_t remainder = 0;
1475    for (int i = m; i >= 0; i--) {
1476      uint64_t partial_dividend = Make_64(remainder, U[i]);
1477      if (partial_dividend == 0) {
1478        Q[i] = 0;
1479        remainder = 0;
1480      } else if (partial_dividend < divisor) {
1481        Q[i] = 0;
1482        remainder = Lo_32(partial_dividend);
1483      } else if (partial_dividend == divisor) {
1484        Q[i] = 1;
1485        remainder = 0;
1486      } else {
1487        Q[i] = Lo_32(partial_dividend / divisor);
1488        remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1489      }
1490    }
1491    if (R)
1492      R[0] = remainder;
1493  } else {
1494    // Now we're ready to invoke the Knuth classical divide algorithm. In this
1495    // case n > 1.
1496    KnuthDiv(U, V, Q, R, m, n);
1497  }
1498
1499  // If the caller wants the quotient
1500  if (Quotient) {
1501    for (unsigned i = 0; i < lhsWords; ++i)
1502      Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1503  }
1504
1505  // If the caller wants the remainder
1506  if (Remainder) {
1507    for (unsigned i = 0; i < rhsWords; ++i)
1508      Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1509  }
1510
1511  // Clean up the memory we allocated.
1512  if (U != &SPACE[0]) {
1513    delete [] U;
1514    delete [] V;
1515    delete [] Q;
1516    delete [] R;
1517  }
1518}
1519
1520APInt APInt::udiv(const APInt &RHS) const {
1521  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1522
1523  // First, deal with the easy case
1524  if (isSingleWord()) {
1525    assert(RHS.U.VAL != 0 && "Divide by zero?");
1526    return APInt(BitWidth, U.VAL / RHS.U.VAL);
1527  }
1528
1529  // Get some facts about the LHS and RHS number of bits and words
1530  unsigned lhsWords = getNumWords(getActiveBits());
1531  unsigned rhsBits  = RHS.getActiveBits();
1532  unsigned rhsWords = getNumWords(rhsBits);
1533  assert(rhsWords && "Divided by zero???");
1534
1535  // Deal with some degenerate cases
1536  if (!lhsWords)
1537    // 0 / X ===> 0
1538    return APInt(BitWidth, 0);
1539  if (rhsBits == 1)
1540    // X / 1 ===> X
1541    return *this;
1542  if (lhsWords < rhsWords || this->ult(RHS))
1543    // X / Y ===> 0, iff X < Y
1544    return APInt(BitWidth, 0);
1545  if (*this == RHS)
1546    // X / X ===> 1
1547    return APInt(BitWidth, 1);
1548  if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1549    // All high words are zero, just use native divide
1550    return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1551
1552  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1553  APInt Quotient(BitWidth, 0); // to hold result.
1554  divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1555  return Quotient;
1556}
1557
1558APInt APInt::udiv(uint64_t RHS) const {
1559  assert(RHS != 0 && "Divide by zero?");
1560
1561  // First, deal with the easy case
1562  if (isSingleWord())
1563    return APInt(BitWidth, U.VAL / RHS);
1564
1565  // Get some facts about the LHS words.
1566  unsigned lhsWords = getNumWords(getActiveBits());
1567
1568  // Deal with some degenerate cases
1569  if (!lhsWords)
1570    // 0 / X ===> 0
1571    return APInt(BitWidth, 0);
1572  if (RHS == 1)
1573    // X / 1 ===> X
1574    return *this;
1575  if (this->ult(RHS))
1576    // X / Y ===> 0, iff X < Y
1577    return APInt(BitWidth, 0);
1578  if (*this == RHS)
1579    // X / X ===> 1
1580    return APInt(BitWidth, 1);
1581  if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1582    // All high words are zero, just use native divide
1583    return APInt(BitWidth, this->U.pVal[0] / RHS);
1584
1585  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1586  APInt Quotient(BitWidth, 0); // to hold result.
1587  divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1588  return Quotient;
1589}
1590
1591APInt APInt::sdiv(const APInt &RHS) const {
1592  if (isNegative()) {
1593    if (RHS.isNegative())
1594      return (-(*this)).udiv(-RHS);
1595    return -((-(*this)).udiv(RHS));
1596  }
1597  if (RHS.isNegative())
1598    return -(this->udiv(-RHS));
1599  return this->udiv(RHS);
1600}
1601
1602APInt APInt::sdiv(int64_t RHS) const {
1603  if (isNegative()) {
1604    if (RHS < 0)
1605      return (-(*this)).udiv(-RHS);
1606    return -((-(*this)).udiv(RHS));
1607  }
1608  if (RHS < 0)
1609    return -(this->udiv(-RHS));
1610  return this->udiv(RHS);
1611}
1612
1613APInt APInt::urem(const APInt &RHS) const {
1614  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1615  if (isSingleWord()) {
1616    assert(RHS.U.VAL != 0 && "Remainder by zero?");
1617    return APInt(BitWidth, U.VAL % RHS.U.VAL);
1618  }
1619
1620  // Get some facts about the LHS
1621  unsigned lhsWords = getNumWords(getActiveBits());
1622
1623  // Get some facts about the RHS
1624  unsigned rhsBits = RHS.getActiveBits();
1625  unsigned rhsWords = getNumWords(rhsBits);
1626  assert(rhsWords && "Performing remainder operation by zero ???");
1627
1628  // Check the degenerate cases
1629  if (lhsWords == 0)
1630    // 0 % Y ===> 0
1631    return APInt(BitWidth, 0);
1632  if (rhsBits == 1)
1633    // X % 1 ===> 0
1634    return APInt(BitWidth, 0);
1635  if (lhsWords < rhsWords || this->ult(RHS))
1636    // X % Y ===> X, iff X < Y
1637    return *this;
1638  if (*this == RHS)
1639    // X % X == 0;
1640    return APInt(BitWidth, 0);
1641  if (lhsWords == 1)
1642    // All high words are zero, just use native remainder
1643    return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1644
1645  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1646  APInt Remainder(BitWidth, 0);
1647  divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1648  return Remainder;
1649}
1650
1651uint64_t APInt::urem(uint64_t RHS) const {
1652  assert(RHS != 0 && "Remainder by zero?");
1653
1654  if (isSingleWord())
1655    return U.VAL % RHS;
1656
1657  // Get some facts about the LHS
1658  unsigned lhsWords = getNumWords(getActiveBits());
1659
1660  // Check the degenerate cases
1661  if (lhsWords == 0)
1662    // 0 % Y ===> 0
1663    return 0;
1664  if (RHS == 1)
1665    // X % 1 ===> 0
1666    return 0;
1667  if (this->ult(RHS))
1668    // X % Y ===> X, iff X < Y
1669    return getZExtValue();
1670  if (*this == RHS)
1671    // X % X == 0;
1672    return 0;
1673  if (lhsWords == 1)
1674    // All high words are zero, just use native remainder
1675    return U.pVal[0] % RHS;
1676
1677  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1678  uint64_t Remainder;
1679  divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1680  return Remainder;
1681}
1682
1683APInt APInt::srem(const APInt &RHS) const {
1684  if (isNegative()) {
1685    if (RHS.isNegative())
1686      return -((-(*this)).urem(-RHS));
1687    return -((-(*this)).urem(RHS));
1688  }
1689  if (RHS.isNegative())
1690    return this->urem(-RHS);
1691  return this->urem(RHS);
1692}
1693
1694int64_t APInt::srem(int64_t RHS) const {
1695  if (isNegative()) {
1696    if (RHS < 0)
1697      return -((-(*this)).urem(-RHS));
1698    return -((-(*this)).urem(RHS));
1699  }
1700  if (RHS < 0)
1701    return this->urem(-RHS);
1702  return this->urem(RHS);
1703}
1704
1705void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1706                    APInt &Quotient, APInt &Remainder) {
1707  assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1708  unsigned BitWidth = LHS.BitWidth;
1709
1710  // First, deal with the easy case
1711  if (LHS.isSingleWord()) {
1712    assert(RHS.U.VAL != 0 && "Divide by zero?");
1713    uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1714    uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1715    Quotient = APInt(BitWidth, QuotVal);
1716    Remainder = APInt(BitWidth, RemVal);
1717    return;
1718  }
1719
1720  // Get some size facts about the dividend and divisor
1721  unsigned lhsWords = getNumWords(LHS.getActiveBits());
1722  unsigned rhsBits  = RHS.getActiveBits();
1723  unsigned rhsWords = getNumWords(rhsBits);
1724  assert(rhsWords && "Performing divrem operation by zero ???");
1725
1726  // Check the degenerate cases
1727  if (lhsWords == 0) {
1728    Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1729    Remainder = APInt(BitWidth, 0);   // 0 % Y ===> 0
1730    return;
1731  }
1732
1733  if (rhsBits == 1) {
1734    Quotient = LHS;                   // X / 1 ===> X
1735    Remainder = APInt(BitWidth, 0);   // X % 1 ===> 0
1736  }
1737
1738  if (lhsWords < rhsWords || LHS.ult(RHS)) {
1739    Remainder = LHS;                  // X % Y ===> X, iff X < Y
1740    Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1741    return;
1742  }
1743
1744  if (LHS == RHS) {
1745    Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1746    Remainder = APInt(BitWidth, 0);   // X % X ===> 0;
1747    return;
1748  }
1749
1750  // Make sure there is enough space to hold the results.
1751  // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1752  // change the size. This is necessary if Quotient or Remainder is aliased
1753  // with LHS or RHS.
1754  Quotient.reallocate(BitWidth);
1755  Remainder.reallocate(BitWidth);
1756
1757  if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1758    // There is only one word to consider so use the native versions.
1759    uint64_t lhsValue = LHS.U.pVal[0];
1760    uint64_t rhsValue = RHS.U.pVal[0];
1761    Quotient = lhsValue / rhsValue;
1762    Remainder = lhsValue % rhsValue;
1763    return;
1764  }
1765
1766  // Okay, lets do it the long way
1767  divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1768         Remainder.U.pVal);
1769  // Clear the rest of the Quotient and Remainder.
1770  std::memset(Quotient.U.pVal + lhsWords, 0,
1771              (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1772  std::memset(Remainder.U.pVal + rhsWords, 0,
1773              (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1774}
1775
1776void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1777                    uint64_t &Remainder) {
1778  assert(RHS != 0 && "Divide by zero?");
1779  unsigned BitWidth = LHS.BitWidth;
1780
1781  // First, deal with the easy case
1782  if (LHS.isSingleWord()) {
1783    uint64_t QuotVal = LHS.U.VAL / RHS;
1784    Remainder = LHS.U.VAL % RHS;
1785    Quotient = APInt(BitWidth, QuotVal);
1786    return;
1787  }
1788
1789  // Get some size facts about the dividend and divisor
1790  unsigned lhsWords = getNumWords(LHS.getActiveBits());
1791
1792  // Check the degenerate cases
1793  if (lhsWords == 0) {
1794    Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1795    Remainder = 0;                    // 0 % Y ===> 0
1796    return;
1797  }
1798
1799  if (RHS == 1) {
1800    Quotient = LHS;                   // X / 1 ===> X
1801    Remainder = 0;                    // X % 1 ===> 0
1802    return;
1803  }
1804
1805  if (LHS.ult(RHS)) {
1806    Remainder = LHS.getZExtValue();   // X % Y ===> X, iff X < Y
1807    Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1808    return;
1809  }
1810
1811  if (LHS == RHS) {
1812    Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1813    Remainder = 0;                    // X % X ===> 0;
1814    return;
1815  }
1816
1817  // Make sure there is enough space to hold the results.
1818  // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1819  // change the size. This is necessary if Quotient is aliased with LHS.
1820  Quotient.reallocate(BitWidth);
1821
1822  if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1823    // There is only one word to consider so use the native versions.
1824    uint64_t lhsValue = LHS.U.pVal[0];
1825    Quotient = lhsValue / RHS;
1826    Remainder = lhsValue % RHS;
1827    return;
1828  }
1829
1830  // Okay, lets do it the long way
1831  divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1832  // Clear the rest of the Quotient.
1833  std::memset(Quotient.U.pVal + lhsWords, 0,
1834              (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1835}
1836
1837void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1838                    APInt &Quotient, APInt &Remainder) {
1839  if (LHS.isNegative()) {
1840    if (RHS.isNegative())
1841      APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1842    else {
1843      APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1844      Quotient.negate();
1845    }
1846    Remainder.negate();
1847  } else if (RHS.isNegative()) {
1848    APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1849    Quotient.negate();
1850  } else {
1851    APInt::udivrem(LHS, RHS, Quotient, Remainder);
1852  }
1853}
1854
1855void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1856                    APInt &Quotient, int64_t &Remainder) {
1857  uint64_t R = Remainder;
1858  if (LHS.isNegative()) {
1859    if (RHS < 0)
1860      APInt::udivrem(-LHS, -RHS, Quotient, R);
1861    else {
1862      APInt::udivrem(-LHS, RHS, Quotient, R);
1863      Quotient.negate();
1864    }
1865    R = -R;
1866  } else if (RHS < 0) {
1867    APInt::udivrem(LHS, -RHS, Quotient, R);
1868    Quotient.negate();
1869  } else {
1870    APInt::udivrem(LHS, RHS, Quotient, R);
1871  }
1872  Remainder = R;
1873}
1874
1875APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1876  APInt Res = *this+RHS;
1877  Overflow = isNonNegative() == RHS.isNonNegative() &&
1878             Res.isNonNegative() != isNonNegative();
1879  return Res;
1880}
1881
1882APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1883  APInt Res = *this+RHS;
1884  Overflow = Res.ult(RHS);
1885  return Res;
1886}
1887
1888APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1889  APInt Res = *this - RHS;
1890  Overflow = isNonNegative() != RHS.isNonNegative() &&
1891             Res.isNonNegative() != isNonNegative();
1892  return Res;
1893}
1894
1895APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1896  APInt Res = *this-RHS;
1897  Overflow = Res.ugt(*this);
1898  return Res;
1899}
1900
1901APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1902  // MININT/-1  -->  overflow.
1903  Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1904  return sdiv(RHS);
1905}
1906
1907APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1908  APInt Res = *this * RHS;
1909
1910  if (*this != 0 && RHS != 0)
1911    Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1912  else
1913    Overflow = false;
1914  return Res;
1915}
1916
1917APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1918  APInt Res = *this * RHS;
1919
1920  if (*this != 0 && RHS != 0)
1921    Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS;
1922  else
1923    Overflow = false;
1924  return Res;
1925}
1926
1927APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
1928  Overflow = ShAmt.uge(getBitWidth());
1929  if (Overflow)
1930    return APInt(BitWidth, 0);
1931
1932  if (isNonNegative()) // Don't allow sign change.
1933    Overflow = ShAmt.uge(countLeadingZeros());
1934  else
1935    Overflow = ShAmt.uge(countLeadingOnes());
1936
1937  return *this << ShAmt;
1938}
1939
1940APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
1941  Overflow = ShAmt.uge(getBitWidth());
1942  if (Overflow)
1943    return APInt(BitWidth, 0);
1944
1945  Overflow = ShAmt.ugt(countLeadingZeros());
1946
1947  return *this << ShAmt;
1948}
1949
1950APInt APInt::sadd_sat(const APInt &RHS) const {
1951  bool Overflow;
1952  APInt Res = sadd_ov(RHS, Overflow);
1953  if (!Overflow)
1954    return Res;
1955
1956  return isNegative() ? APInt::getSignedMinValue(BitWidth)
1957                      : APInt::getSignedMaxValue(BitWidth);
1958}
1959
1960APInt APInt::uadd_sat(const APInt &RHS) const {
1961  bool Overflow;
1962  APInt Res = uadd_ov(RHS, Overflow);
1963  if (!Overflow)
1964    return Res;
1965
1966  return APInt::getMaxValue(BitWidth);
1967}
1968
1969APInt APInt::ssub_sat(const APInt &RHS) const {
1970  bool Overflow;
1971  APInt Res = ssub_ov(RHS, Overflow);
1972  if (!Overflow)
1973    return Res;
1974
1975  return isNegative() ? APInt::getSignedMinValue(BitWidth)
1976                      : APInt::getSignedMaxValue(BitWidth);
1977}
1978
1979APInt APInt::usub_sat(const APInt &RHS) const {
1980  bool Overflow;
1981  APInt Res = usub_ov(RHS, Overflow);
1982  if (!Overflow)
1983    return Res;
1984
1985  return APInt(BitWidth, 0);
1986}
1987
1988
1989void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
1990  // Check our assumptions here
1991  assert(!str.empty() && "Invalid string length");
1992  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
1993          radix == 36) &&
1994         "Radix should be 2, 8, 10, 16, or 36!");
1995
1996  StringRef::iterator p = str.begin();
1997  size_t slen = str.size();
1998  bool isNeg = *p == '-';
1999  if (*p == '-' || *p == '+') {
2000    p++;
2001    slen--;
2002    assert(slen && "String is only a sign, needs a value.");
2003  }
2004  assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2005  assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2006  assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2007  assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2008         "Insufficient bit width");
2009
2010  // Allocate memory if needed
2011  if (isSingleWord())
2012    U.VAL = 0;
2013  else
2014    U.pVal = getClearedMemory(getNumWords());
2015
2016  // Figure out if we can shift instead of multiply
2017  unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2018
2019  // Enter digit traversal loop
2020  for (StringRef::iterator e = str.end(); p != e; ++p) {
2021    unsigned digit = getDigit(*p, radix);
2022    assert(digit < radix && "Invalid character in digit string");
2023
2024    // Shift or multiply the value by the radix
2025    if (slen > 1) {
2026      if (shift)
2027        *this <<= shift;
2028      else
2029        *this *= radix;
2030    }
2031
2032    // Add in the digit we just interpreted
2033    *this += digit;
2034  }
2035  // If its negative, put it in two's complement form
2036  if (isNeg)
2037    this->negate();
2038}
2039
2040void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2041                     bool Signed, bool formatAsCLiteral) const {
2042  assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2043          Radix == 36) &&
2044         "Radix should be 2, 8, 10, 16, or 36!");
2045
2046  const char *Prefix = "";
2047  if (formatAsCLiteral) {
2048    switch (Radix) {
2049      case 2:
2050        // Binary literals are a non-standard extension added in gcc 4.3:
2051        // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2052        Prefix = "0b";
2053        break;
2054      case 8:
2055        Prefix = "0";
2056        break;
2057      case 10:
2058        break; // No prefix
2059      case 16:
2060        Prefix = "0x";
2061        break;
2062      default:
2063        llvm_unreachable("Invalid radix!");
2064    }
2065  }
2066
2067  // First, check for a zero value and just short circuit the logic below.
2068  if (*this == 0) {
2069    while (*Prefix) {
2070      Str.push_back(*Prefix);
2071      ++Prefix;
2072    };
2073    Str.push_back('0');
2074    return;
2075  }
2076
2077  static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2078
2079  if (isSingleWord()) {
2080    char Buffer[65];
2081    char *BufPtr = std::end(Buffer);
2082
2083    uint64_t N;
2084    if (!Signed) {
2085      N = getZExtValue();
2086    } else {
2087      int64_t I = getSExtValue();
2088      if (I >= 0) {
2089        N = I;
2090      } else {
2091        Str.push_back('-');
2092        N = -(uint64_t)I;
2093      }
2094    }
2095
2096    while (*Prefix) {
2097      Str.push_back(*Prefix);
2098      ++Prefix;
2099    };
2100
2101    while (N) {
2102      *--BufPtr = Digits[N % Radix];
2103      N /= Radix;
2104    }
2105    Str.append(BufPtr, std::end(Buffer));
2106    return;
2107  }
2108
2109  APInt Tmp(*this);
2110
2111  if (Signed && isNegative()) {
2112    // They want to print the signed version and it is a negative value
2113    // Flip the bits and add one to turn it into the equivalent positive
2114    // value and put a '-' in the result.
2115    Tmp.negate();
2116    Str.push_back('-');
2117  }
2118
2119  while (*Prefix) {
2120    Str.push_back(*Prefix);
2121    ++Prefix;
2122  };
2123
2124  // We insert the digits backward, then reverse them to get the right order.
2125  unsigned StartDig = Str.size();
2126
2127  // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2128  // because the number of bits per digit (1, 3 and 4 respectively) divides
2129  // equally.  We just shift until the value is zero.
2130  if (Radix == 2 || Radix == 8 || Radix == 16) {
2131    // Just shift tmp right for each digit width until it becomes zero
2132    unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2133    unsigned MaskAmt = Radix - 1;
2134
2135    while (Tmp.getBoolValue()) {
2136      unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2137      Str.push_back(Digits[Digit]);
2138      Tmp.lshrInPlace(ShiftAmt);
2139    }
2140  } else {
2141    while (Tmp.getBoolValue()) {
2142      uint64_t Digit;
2143      udivrem(Tmp, Radix, Tmp, Digit);
2144      assert(Digit < Radix && "divide failed");
2145      Str.push_back(Digits[Digit]);
2146    }
2147  }
2148
2149  // Reverse the digits before returning.
2150  std::reverse(Str.begin()+StartDig, Str.end());
2151}
2152
2153/// Returns the APInt as a std::string. Note that this is an inefficient method.
2154/// It is better to pass in a SmallVector/SmallString to the methods above.
2155std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2156  SmallString<40> S;
2157  toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2158  return S.str();
2159}
2160
2161#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2162LLVM_DUMP_METHOD void APInt::dump() const {
2163  SmallString<40> S, U;
2164  this->toStringUnsigned(U);
2165  this->toStringSigned(S);
2166  dbgs() << "APInt(" << BitWidth << "b, "
2167         << U << "u " << S << "s)\n";
2168}
2169#endif
2170
2171void APInt::print(raw_ostream &OS, bool isSigned) const {
2172  SmallString<40> S;
2173  this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2174  OS << S;
2175}
2176
2177// This implements a variety of operations on a representation of
2178// arbitrary precision, two's-complement, bignum integer values.
2179
2180// Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2181// and unrestricting assumption.
2182static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2183              "Part width must be divisible by 2!");
2184
2185/* Some handy functions local to this file.  */
2186
2187/* Returns the integer part with the least significant BITS set.
2188   BITS cannot be zero.  */
2189static inline APInt::WordType lowBitMask(unsigned bits) {
2190  assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2191
2192  return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2193}
2194
2195/* Returns the value of the lower half of PART.  */
2196static inline APInt::WordType lowHalf(APInt::WordType part) {
2197  return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2198}
2199
2200/* Returns the value of the upper half of PART.  */
2201static inline APInt::WordType highHalf(APInt::WordType part) {
2202  return part >> (APInt::APINT_BITS_PER_WORD / 2);
2203}
2204
2205/* Returns the bit number of the most significant set bit of a part.
2206   If the input number has no bits set -1U is returned.  */
2207static unsigned partMSB(APInt::WordType value) {
2208  return findLastSet(value, ZB_Max);
2209}
2210
2211/* Returns the bit number of the least significant set bit of a
2212   part.  If the input number has no bits set -1U is returned.  */
2213static unsigned partLSB(APInt::WordType value) {
2214  return findFirstSet(value, ZB_Max);
2215}
2216
2217/* Sets the least significant part of a bignum to the input value, and
2218   zeroes out higher parts.  */
2219void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2220  assert(parts > 0);
2221
2222  dst[0] = part;
2223  for (unsigned i = 1; i < parts; i++)
2224    dst[i] = 0;
2225}
2226
2227/* Assign one bignum to another.  */
2228void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2229  for (unsigned i = 0; i < parts; i++)
2230    dst[i] = src[i];
2231}
2232
2233/* Returns true if a bignum is zero, false otherwise.  */
2234bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2235  for (unsigned i = 0; i < parts; i++)
2236    if (src[i])
2237      return false;
2238
2239  return true;
2240}
2241
2242/* Extract the given bit of a bignum; returns 0 or 1.  */
2243int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2244  return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2245}
2246
2247/* Set the given bit of a bignum. */
2248void APInt::tcSetBit(WordType *parts, unsigned bit) {
2249  parts[whichWord(bit)] |= maskBit(bit);
2250}
2251
2252/* Clears the given bit of a bignum. */
2253void APInt::tcClearBit(WordType *parts, unsigned bit) {
2254  parts[whichWord(bit)] &= ~maskBit(bit);
2255}
2256
2257/* Returns the bit number of the least significant set bit of a
2258   number.  If the input number has no bits set -1U is returned.  */
2259unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2260  for (unsigned i = 0; i < n; i++) {
2261    if (parts[i] != 0) {
2262      unsigned lsb = partLSB(parts[i]);
2263
2264      return lsb + i * APINT_BITS_PER_WORD;
2265    }
2266  }
2267
2268  return -1U;
2269}
2270
2271/* Returns the bit number of the most significant set bit of a number.
2272   If the input number has no bits set -1U is returned.  */
2273unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2274  do {
2275    --n;
2276
2277    if (parts[n] != 0) {
2278      unsigned msb = partMSB(parts[n]);
2279
2280      return msb + n * APINT_BITS_PER_WORD;
2281    }
2282  } while (n);
2283
2284  return -1U;
2285}
2286
2287/* Copy the bit vector of width srcBITS from SRC, starting at bit
2288   srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2289   the least significant bit of DST.  All high bits above srcBITS in
2290   DST are zero-filled.  */
2291void
2292APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2293                 unsigned srcBits, unsigned srcLSB) {
2294  unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2295  assert(dstParts <= dstCount);
2296
2297  unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2298  tcAssign (dst, src + firstSrcPart, dstParts);
2299
2300  unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2301  tcShiftRight (dst, dstParts, shift);
2302
2303  /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2304     in DST.  If this is less that srcBits, append the rest, else
2305     clear the high bits.  */
2306  unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2307  if (n < srcBits) {
2308    WordType mask = lowBitMask (srcBits - n);
2309    dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2310                          << n % APINT_BITS_PER_WORD);
2311  } else if (n > srcBits) {
2312    if (srcBits % APINT_BITS_PER_WORD)
2313      dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2314  }
2315
2316  /* Clear high parts.  */
2317  while (dstParts < dstCount)
2318    dst[dstParts++] = 0;
2319}
2320
2321/* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2322APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2323                             WordType c, unsigned parts) {
2324  assert(c <= 1);
2325
2326  for (unsigned i = 0; i < parts; i++) {
2327    WordType l = dst[i];
2328    if (c) {
2329      dst[i] += rhs[i] + 1;
2330      c = (dst[i] <= l);
2331    } else {
2332      dst[i] += rhs[i];
2333      c = (dst[i] < l);
2334    }
2335  }
2336
2337  return c;
2338}
2339
2340/// This function adds a single "word" integer, src, to the multiple
2341/// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2342/// 1 is returned if there is a carry out, otherwise 0 is returned.
2343/// @returns the carry of the addition.
2344APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2345                                 unsigned parts) {
2346  for (unsigned i = 0; i < parts; ++i) {
2347    dst[i] += src;
2348    if (dst[i] >= src)
2349      return 0; // No need to carry so exit early.
2350    src = 1; // Carry one to next digit.
2351  }
2352
2353  return 1;
2354}
2355
2356/* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2357APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2358                                  WordType c, unsigned parts) {
2359  assert(c <= 1);
2360
2361  for (unsigned i = 0; i < parts; i++) {
2362    WordType l = dst[i];
2363    if (c) {
2364      dst[i] -= rhs[i] + 1;
2365      c = (dst[i] >= l);
2366    } else {
2367      dst[i] -= rhs[i];
2368      c = (dst[i] > l);
2369    }
2370  }
2371
2372  return c;
2373}
2374
2375/// This function subtracts a single "word" (64-bit word), src, from
2376/// the multi-word integer array, dst[], propagating the borrowed 1 value until
2377/// no further borrowing is needed or it runs out of "words" in dst.  The result
2378/// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2379/// exhausted. In other words, if src > dst then this function returns 1,
2380/// otherwise 0.
2381/// @returns the borrow out of the subtraction
2382APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2383                                      unsigned parts) {
2384  for (unsigned i = 0; i < parts; ++i) {
2385    WordType Dst = dst[i];
2386    dst[i] -= src;
2387    if (src <= Dst)
2388      return 0; // No need to borrow so exit early.
2389    src = 1; // We have to "borrow 1" from next "word"
2390  }
2391
2392  return 1;
2393}
2394
2395/* Negate a bignum in-place.  */
2396void APInt::tcNegate(WordType *dst, unsigned parts) {
2397  tcComplement(dst, parts);
2398  tcIncrement(dst, parts);
2399}
2400
2401/*  DST += SRC * MULTIPLIER + CARRY   if add is true
2402    DST  = SRC * MULTIPLIER + CARRY   if add is false
2403
2404    Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2405    they must start at the same point, i.e. DST == SRC.
2406
2407    If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2408    returned.  Otherwise DST is filled with the least significant
2409    DSTPARTS parts of the result, and if all of the omitted higher
2410    parts were zero return zero, otherwise overflow occurred and
2411    return one.  */
2412int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2413                          WordType multiplier, WordType carry,
2414                          unsigned srcParts, unsigned dstParts,
2415                          bool add) {
2416  /* Otherwise our writes of DST kill our later reads of SRC.  */
2417  assert(dst <= src || dst >= src + srcParts);
2418  assert(dstParts <= srcParts + 1);
2419
2420  /* N loops; minimum of dstParts and srcParts.  */
2421  unsigned n = std::min(dstParts, srcParts);
2422
2423  for (unsigned i = 0; i < n; i++) {
2424    WordType low, mid, high, srcPart;
2425
2426      /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2427
2428         This cannot overflow, because
2429
2430         (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2431
2432         which is less than n^2.  */
2433
2434    srcPart = src[i];
2435
2436    if (multiplier == 0 || srcPart == 0) {
2437      low = carry;
2438      high = 0;
2439    } else {
2440      low = lowHalf(srcPart) * lowHalf(multiplier);
2441      high = highHalf(srcPart) * highHalf(multiplier);
2442
2443      mid = lowHalf(srcPart) * highHalf(multiplier);
2444      high += highHalf(mid);
2445      mid <<= APINT_BITS_PER_WORD / 2;
2446      if (low + mid < low)
2447        high++;
2448      low += mid;
2449
2450      mid = highHalf(srcPart) * lowHalf(multiplier);
2451      high += highHalf(mid);
2452      mid <<= APINT_BITS_PER_WORD / 2;
2453      if (low + mid < low)
2454        high++;
2455      low += mid;
2456
2457      /* Now add carry.  */
2458      if (low + carry < low)
2459        high++;
2460      low += carry;
2461    }
2462
2463    if (add) {
2464      /* And now DST[i], and store the new low part there.  */
2465      if (low + dst[i] < low)
2466        high++;
2467      dst[i] += low;
2468    } else
2469      dst[i] = low;
2470
2471    carry = high;
2472  }
2473
2474  if (srcParts < dstParts) {
2475    /* Full multiplication, there is no overflow.  */
2476    assert(srcParts + 1 == dstParts);
2477    dst[srcParts] = carry;
2478    return 0;
2479  }
2480
2481  /* We overflowed if there is carry.  */
2482  if (carry)
2483    return 1;
2484
2485  /* We would overflow if any significant unwritten parts would be
2486     non-zero.  This is true if any remaining src parts are non-zero
2487     and the multiplier is non-zero.  */
2488  if (multiplier)
2489    for (unsigned i = dstParts; i < srcParts; i++)
2490      if (src[i])
2491        return 1;
2492
2493  /* We fitted in the narrow destination.  */
2494  return 0;
2495}
2496
2497/* DST = LHS * RHS, where DST has the same width as the operands and
2498   is filled with the least significant parts of the result.  Returns
2499   one if overflow occurred, otherwise zero.  DST must be disjoint
2500   from both operands.  */
2501int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2502                      const WordType *rhs, unsigned parts) {
2503  assert(dst != lhs && dst != rhs);
2504
2505  int overflow = 0;
2506  tcSet(dst, 0, parts);
2507
2508  for (unsigned i = 0; i < parts; i++)
2509    overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2510                               parts - i, true);
2511
2512  return overflow;
2513}
2514
2515/// DST = LHS * RHS, where DST has width the sum of the widths of the
2516/// operands. No overflow occurs. DST must be disjoint from both operands.
2517void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2518                           const WordType *rhs, unsigned lhsParts,
2519                           unsigned rhsParts) {
2520  /* Put the narrower number on the LHS for less loops below.  */
2521  if (lhsParts > rhsParts)
2522    return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2523
2524  assert(dst != lhs && dst != rhs);
2525
2526  tcSet(dst, 0, rhsParts);
2527
2528  for (unsigned i = 0; i < lhsParts; i++)
2529    tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2530}
2531
2532/* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2533   Otherwise set LHS to LHS / RHS with the fractional part discarded,
2534   set REMAINDER to the remainder, return zero.  i.e.
2535
2536   OLD_LHS = RHS * LHS + REMAINDER
2537
2538   SCRATCH is a bignum of the same size as the operands and result for
2539   use by the routine; its contents need not be initialized and are
2540   destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2541*/
2542int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2543                    WordType *remainder, WordType *srhs,
2544                    unsigned parts) {
2545  assert(lhs != remainder && lhs != srhs && remainder != srhs);
2546
2547  unsigned shiftCount = tcMSB(rhs, parts) + 1;
2548  if (shiftCount == 0)
2549    return true;
2550
2551  shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2552  unsigned n = shiftCount / APINT_BITS_PER_WORD;
2553  WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2554
2555  tcAssign(srhs, rhs, parts);
2556  tcShiftLeft(srhs, parts, shiftCount);
2557  tcAssign(remainder, lhs, parts);
2558  tcSet(lhs, 0, parts);
2559
2560  /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2561     the total.  */
2562  for (;;) {
2563    int compare = tcCompare(remainder, srhs, parts);
2564    if (compare >= 0) {
2565      tcSubtract(remainder, srhs, 0, parts);
2566      lhs[n] |= mask;
2567    }
2568
2569    if (shiftCount == 0)
2570      break;
2571    shiftCount--;
2572    tcShiftRight(srhs, parts, 1);
2573    if ((mask >>= 1) == 0) {
2574      mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2575      n--;
2576    }
2577  }
2578
2579  return false;
2580}
2581
2582/// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2583/// no restrictions on Count.
2584void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2585  // Don't bother performing a no-op shift.
2586  if (!Count)
2587    return;
2588
2589  // WordShift is the inter-part shift; BitShift is the intra-part shift.
2590  unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2591  unsigned BitShift = Count % APINT_BITS_PER_WORD;
2592
2593  // Fastpath for moving by whole words.
2594  if (BitShift == 0) {
2595    std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2596  } else {
2597    while (Words-- > WordShift) {
2598      Dst[Words] = Dst[Words - WordShift] << BitShift;
2599      if (Words > WordShift)
2600        Dst[Words] |=
2601          Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2602    }
2603  }
2604
2605  // Fill in the remainder with 0s.
2606  std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2607}
2608
2609/// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2610/// are no restrictions on Count.
2611void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2612  // Don't bother performing a no-op shift.
2613  if (!Count)
2614    return;
2615
2616  // WordShift is the inter-part shift; BitShift is the intra-part shift.
2617  unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2618  unsigned BitShift = Count % APINT_BITS_PER_WORD;
2619
2620  unsigned WordsToMove = Words - WordShift;
2621  // Fastpath for moving by whole words.
2622  if (BitShift == 0) {
2623    std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2624  } else {
2625    for (unsigned i = 0; i != WordsToMove; ++i) {
2626      Dst[i] = Dst[i + WordShift] >> BitShift;
2627      if (i + 1 != WordsToMove)
2628        Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2629    }
2630  }
2631
2632  // Fill in the remainder with 0s.
2633  std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2634}
2635
2636/* Bitwise and of two bignums.  */
2637void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2638  for (unsigned i = 0; i < parts; i++)
2639    dst[i] &= rhs[i];
2640}
2641
2642/* Bitwise inclusive or of two bignums.  */
2643void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2644  for (unsigned i = 0; i < parts; i++)
2645    dst[i] |= rhs[i];
2646}
2647
2648/* Bitwise exclusive or of two bignums.  */
2649void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2650  for (unsigned i = 0; i < parts; i++)
2651    dst[i] ^= rhs[i];
2652}
2653
2654/* Complement a bignum in-place.  */
2655void APInt::tcComplement(WordType *dst, unsigned parts) {
2656  for (unsigned i = 0; i < parts; i++)
2657    dst[i] = ~dst[i];
2658}
2659
2660/* Comparison (unsigned) of two bignums.  */
2661int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2662                     unsigned parts) {
2663  while (parts) {
2664    parts--;
2665    if (lhs[parts] != rhs[parts])
2666      return (lhs[parts] > rhs[parts]) ? 1 : -1;
2667  }
2668
2669  return 0;
2670}
2671
2672/* Set the least significant BITS bits of a bignum, clear the
2673   rest.  */
2674void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2675                                      unsigned bits) {
2676  unsigned i = 0;
2677  while (bits > APINT_BITS_PER_WORD) {
2678    dst[i++] = ~(WordType) 0;
2679    bits -= APINT_BITS_PER_WORD;
2680  }
2681
2682  if (bits)
2683    dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2684
2685  while (i < parts)
2686    dst[i++] = 0;
2687}
2688
2689APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2690                                   APInt::Rounding RM) {
2691  // Currently udivrem always rounds down.
2692  switch (RM) {
2693  case APInt::Rounding::DOWN:
2694  case APInt::Rounding::TOWARD_ZERO:
2695    return A.udiv(B);
2696  case APInt::Rounding::UP: {
2697    APInt Quo, Rem;
2698    APInt::udivrem(A, B, Quo, Rem);
2699    if (Rem == 0)
2700      return Quo;
2701    return Quo + 1;
2702  }
2703  }
2704  llvm_unreachable("Unknown APInt::Rounding enum");
2705}
2706
2707APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2708                                   APInt::Rounding RM) {
2709  switch (RM) {
2710  case APInt::Rounding::DOWN:
2711  case APInt::Rounding::UP: {
2712    APInt Quo, Rem;
2713    APInt::sdivrem(A, B, Quo, Rem);
2714    if (Rem == 0)
2715      return Quo;
2716    // This algorithm deals with arbitrary rounding mode used by sdivrem.
2717    // We want to check whether the non-integer part of the mathematical value
2718    // is negative or not. If the non-integer part is negative, we need to round
2719    // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2720    // already rounded down.
2721    if (RM == APInt::Rounding::DOWN) {
2722      if (Rem.isNegative() != B.isNegative())
2723        return Quo - 1;
2724      return Quo;
2725    }
2726    if (Rem.isNegative() != B.isNegative())
2727      return Quo;
2728    return Quo + 1;
2729  }
2730  // Currently sdiv rounds twards zero.
2731  case APInt::Rounding::TOWARD_ZERO:
2732    return A.sdiv(B);
2733  }
2734  llvm_unreachable("Unknown APInt::Rounding enum");
2735}
2736
2737Optional<APInt>
2738llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2739                                           unsigned RangeWidth) {
2740  unsigned CoeffWidth = A.getBitWidth();
2741  assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2742  assert(RangeWidth <= CoeffWidth &&
2743         "Value range width should be less than coefficient width");
2744  assert(RangeWidth > 1 && "Value range bit width should be > 1");
2745
2746  LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2747                    << "x + " << C << ", rw:" << RangeWidth << '\n');
2748
2749  // Identify 0 as a (non)solution immediately.
2750  if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
2751    LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2752    return APInt(CoeffWidth, 0);
2753  }
2754
2755  // The result of APInt arithmetic has the same bit width as the operands,
2756  // so it can actually lose high bits. A product of two n-bit integers needs
2757  // 2n-1 bits to represent the full value.
2758  // The operation done below (on quadratic coefficients) that can produce
2759  // the largest value is the evaluation of the equation during bisection,
2760  // which needs 3 times the bitwidth of the coefficient, so the total number
2761  // of required bits is 3n.
2762  //
2763  // The purpose of this extension is to simulate the set Z of all integers,
2764  // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2765  // and negative numbers (not so much in a modulo arithmetic). The method
2766  // used to solve the equation is based on the standard formula for real
2767  // numbers, and uses the concepts of "positive" and "negative" with their
2768  // usual meanings.
2769  CoeffWidth *= 3;
2770  A = A.sext(CoeffWidth);
2771  B = B.sext(CoeffWidth);
2772  C = C.sext(CoeffWidth);
2773
2774  // Make A > 0 for simplicity. Negate cannot overflow at this point because
2775  // the bit width has increased.
2776  if (A.isNegative()) {
2777    A.negate();
2778    B.negate();
2779    C.negate();
2780  }
2781
2782  // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2783  // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2784  // and R = 2^BitWidth.
2785  // Since we're trying not only to find exact solutions, but also values
2786  // that "wrap around", such a set will always have a solution, i.e. an x
2787  // that satisfies at least one of the equations, or such that |q(x)|
2788  // exceeds kR, while |q(x-1)| for the same k does not.
2789  //
2790  // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2791  // positive solution n (in the above sense), and also such that the n
2792  // will be the least among all solutions corresponding to k = 0, 1, ...
2793  // (more precisely, the least element in the set
2794  //   { n(k) | k is such that a solution n(k) exists }).
2795  //
2796  // Consider the parabola (over real numbers) that corresponds to the
2797  // quadratic equation. Since A > 0, the arms of the parabola will point
2798  // up. Picking different values of k will shift it up and down by R.
2799  //
2800  // We want to shift the parabola in such a way as to reduce the problem
2801  // of solving q(x) = kR to solving shifted_q(x) = 0.
2802  // (The interesting solutions are the ceilings of the real number
2803  // solutions.)
2804  APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2805  APInt TwoA = 2 * A;
2806  APInt SqrB = B * B;
2807  bool PickLow;
2808
2809  auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2810    assert(A.isStrictlyPositive());
2811    APInt T = V.abs().urem(A);
2812    if (T.isNullValue())
2813      return V;
2814    return V.isNegative() ? V+T : V+(A-T);
2815  };
2816
2817  // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2818  // iff B is positive.
2819  if (B.isNonNegative()) {
2820    // If B >= 0, the vertex it at a negative location (or at 0), so in
2821    // order to have a non-negative solution we need to pick k that makes
2822    // C-kR negative. To satisfy all the requirements for the solution
2823    // that we are looking for, it needs to be closest to 0 of all k.
2824    C = C.srem(R);
2825    if (C.isStrictlyPositive())
2826      C -= R;
2827    // Pick the greater solution.
2828    PickLow = false;
2829  } else {
2830    // If B < 0, the vertex is at a positive location. For any solution
2831    // to exist, the discriminant must be non-negative. This means that
2832    // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2833    // lower bound on values of k: kR >= C - B^2/4A.
2834    APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2835    // Round LowkR up (towards +inf) to the nearest kR.
2836    LowkR = RoundUp(LowkR, R);
2837
2838    // If there exists k meeting the condition above, and such that
2839    // C-kR > 0, there will be two positive real number solutions of
2840    // q(x) = kR. Out of all such values of k, pick the one that makes
2841    // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2842    // In other words, find maximum k such that LowkR <= kR < C.
2843    if (C.sgt(LowkR)) {
2844      // If LowkR < C, then such a k is guaranteed to exist because
2845      // LowkR itself is a multiple of R.
2846      C -= -RoundUp(-C, R);      // C = C - RoundDown(C, R)
2847      // Pick the smaller solution.
2848      PickLow = true;
2849    } else {
2850      // If C-kR < 0 for all potential k's, it means that one solution
2851      // will be negative, while the other will be positive. The positive
2852      // solution will shift towards 0 if the parabola is moved up.
2853      // Pick the kR closest to the lower bound (i.e. make C-kR closest
2854      // to 0, or in other words, out of all parabolas that have solutions,
2855      // pick the one that is the farthest "up").
2856      // Since LowkR is itself a multiple of R, simply take C-LowkR.
2857      C -= LowkR;
2858      // Pick the greater solution.
2859      PickLow = false;
2860    }
2861  }
2862
2863  LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2864                    << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2865
2866  APInt D = SqrB - 4*A*C;
2867  assert(D.isNonNegative() && "Negative discriminant");
2868  APInt SQ = D.sqrt();
2869
2870  APInt Q = SQ * SQ;
2871  bool InexactSQ = Q != D;
2872  // The calculated SQ may actually be greater than the exact (non-integer)
2873  // value. If that's the case, decremement SQ to get a value that is lower.
2874  if (Q.sgt(D))
2875    SQ -= 1;
2876
2877  APInt X;
2878  APInt Rem;
2879
2880  // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
2881  // When using the quadratic formula directly, the calculated low root
2882  // may be greater than the exact one, since we would be subtracting SQ.
2883  // To make sure that the calculated root is not greater than the exact
2884  // one, subtract SQ+1 when calculating the low root (for inexact value
2885  // of SQ).
2886  if (PickLow)
2887    APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
2888  else
2889    APInt::sdivrem(-B + SQ, TwoA, X, Rem);
2890
2891  // The updated coefficients should be such that the (exact) solution is
2892  // positive. Since APInt division rounds towards 0, the calculated one
2893  // can be 0, but cannot be negative.
2894  assert(X.isNonNegative() && "Solution should be non-negative");
2895
2896  if (!InexactSQ && Rem.isNullValue()) {
2897    LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
2898    return X;
2899  }
2900
2901  assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
2902  // The exact value of the square root of D should be between SQ and SQ+1.
2903  // This implies that the solution should be between that corresponding to
2904  // SQ (i.e. X) and that corresponding to SQ+1.
2905  //
2906  // The calculated X cannot be greater than the exact (real) solution.
2907  // Actually it must be strictly less than the exact solution, while
2908  // X+1 will be greater than or equal to it.
2909
2910  APInt VX = (A*X + B)*X + C;
2911  APInt VY = VX + TwoA*X + A + B;
2912  bool SignChange = VX.isNegative() != VY.isNegative() ||
2913                    VX.isNullValue() != VY.isNullValue();
2914  // If the sign did not change between X and X+1, X is not a valid solution.
2915  // This could happen when the actual (exact) roots don't have an integer
2916  // between them, so they would both be contained between X and X+1.
2917  if (!SignChange) {
2918    LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
2919    return None;
2920  }
2921
2922  X += 1;
2923  LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
2924  return X;
2925}
2926