1//===-- APInt.cpp - Implement APInt class ---------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a class to represent arbitrary precision integer
10// constant values and provide a variety of arithmetic operations on them.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/APInt.h"
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/FoldingSet.h"
17#include "llvm/ADT/Hashing.h"
18#include "llvm/ADT/Optional.h"
19#include "llvm/ADT/SmallString.h"
20#include "llvm/ADT/StringRef.h"
21#include "llvm/ADT/bit.h"
22#include "llvm/Config/llvm-config.h"
23#include "llvm/Support/Debug.h"
24#include "llvm/Support/ErrorHandling.h"
25#include "llvm/Support/MathExtras.h"
26#include "llvm/Support/raw_ostream.h"
27#include <climits>
28#include <cmath>
29#include <cstdlib>
30#include <cstring>
31using namespace llvm;
32
33#define DEBUG_TYPE "apint"
34
35/// A utility function for allocating memory, checking for allocation failures,
36/// and ensuring the contents are zeroed.
37inline static uint64_t* getClearedMemory(unsigned numWords) {
38  uint64_t *result = new uint64_t[numWords];
39  memset(result, 0, numWords * sizeof(uint64_t));
40  return result;
41}
42
43/// A utility function for allocating memory and checking for allocation
44/// failure.  The content is not zeroed.
45inline static uint64_t* getMemory(unsigned numWords) {
46  return new uint64_t[numWords];
47}
48
49/// A utility function that converts a character to a digit.
50inline static unsigned getDigit(char cdigit, uint8_t radix) {
51  unsigned r;
52
53  if (radix == 16 || radix == 36) {
54    r = cdigit - '0';
55    if (r <= 9)
56      return r;
57
58    r = cdigit - 'A';
59    if (r <= radix - 11U)
60      return r + 10;
61
62    r = cdigit - 'a';
63    if (r <= radix - 11U)
64      return r + 10;
65
66    radix = 10;
67  }
68
69  r = cdigit - '0';
70  if (r < radix)
71    return r;
72
73  return -1U;
74}
75
76
77void APInt::initSlowCase(uint64_t val, bool isSigned) {
78  U.pVal = getClearedMemory(getNumWords());
79  U.pVal[0] = val;
80  if (isSigned && int64_t(val) < 0)
81    for (unsigned i = 1; i < getNumWords(); ++i)
82      U.pVal[i] = WORDTYPE_MAX;
83  clearUnusedBits();
84}
85
86void APInt::initSlowCase(const APInt& that) {
87  U.pVal = getMemory(getNumWords());
88  memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
89}
90
91void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
92  assert(BitWidth && "Bitwidth too small");
93  assert(bigVal.data() && "Null pointer detected!");
94  if (isSingleWord())
95    U.VAL = bigVal[0];
96  else {
97    // Get memory, cleared to 0
98    U.pVal = getClearedMemory(getNumWords());
99    // Calculate the number of words to copy
100    unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
101    // Copy the words from bigVal to pVal
102    memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
103  }
104  // Make sure unused high bits are cleared
105  clearUnusedBits();
106}
107
108APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
109  : BitWidth(numBits) {
110  initFromArray(bigVal);
111}
112
113APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
114  : BitWidth(numBits) {
115  initFromArray(makeArrayRef(bigVal, numWords));
116}
117
118APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
119  : BitWidth(numbits) {
120  assert(BitWidth && "Bitwidth too small");
121  fromString(numbits, Str, radix);
122}
123
124void APInt::reallocate(unsigned NewBitWidth) {
125  // If the number of words is the same we can just change the width and stop.
126  if (getNumWords() == getNumWords(NewBitWidth)) {
127    BitWidth = NewBitWidth;
128    return;
129  }
130
131  // If we have an allocation, delete it.
132  if (!isSingleWord())
133    delete [] U.pVal;
134
135  // Update BitWidth.
136  BitWidth = NewBitWidth;
137
138  // If we are supposed to have an allocation, create it.
139  if (!isSingleWord())
140    U.pVal = getMemory(getNumWords());
141}
142
143void APInt::AssignSlowCase(const APInt& RHS) {
144  // Don't do anything for X = X
145  if (this == &RHS)
146    return;
147
148  // Adjust the bit width and handle allocations as necessary.
149  reallocate(RHS.getBitWidth());
150
151  // Copy the data.
152  if (isSingleWord())
153    U.VAL = RHS.U.VAL;
154  else
155    memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
156}
157
158/// This method 'profiles' an APInt for use with FoldingSet.
159void APInt::Profile(FoldingSetNodeID& ID) const {
160  ID.AddInteger(BitWidth);
161
162  if (isSingleWord()) {
163    ID.AddInteger(U.VAL);
164    return;
165  }
166
167  unsigned NumWords = getNumWords();
168  for (unsigned i = 0; i < NumWords; ++i)
169    ID.AddInteger(U.pVal[i]);
170}
171
172/// Prefix increment operator. Increments the APInt by one.
173APInt& APInt::operator++() {
174  if (isSingleWord())
175    ++U.VAL;
176  else
177    tcIncrement(U.pVal, getNumWords());
178  return clearUnusedBits();
179}
180
181/// Prefix decrement operator. Decrements the APInt by one.
182APInt& APInt::operator--() {
183  if (isSingleWord())
184    --U.VAL;
185  else
186    tcDecrement(U.pVal, getNumWords());
187  return clearUnusedBits();
188}
189
190/// Adds the RHS APInt to this APInt.
191/// @returns this, after addition of RHS.
192/// Addition assignment operator.
193APInt& APInt::operator+=(const APInt& RHS) {
194  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
195  if (isSingleWord())
196    U.VAL += RHS.U.VAL;
197  else
198    tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
199  return clearUnusedBits();
200}
201
202APInt& APInt::operator+=(uint64_t RHS) {
203  if (isSingleWord())
204    U.VAL += RHS;
205  else
206    tcAddPart(U.pVal, RHS, getNumWords());
207  return clearUnusedBits();
208}
209
210/// Subtracts the RHS APInt from this APInt
211/// @returns this, after subtraction
212/// Subtraction assignment operator.
213APInt& APInt::operator-=(const APInt& RHS) {
214  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
215  if (isSingleWord())
216    U.VAL -= RHS.U.VAL;
217  else
218    tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
219  return clearUnusedBits();
220}
221
222APInt& APInt::operator-=(uint64_t RHS) {
223  if (isSingleWord())
224    U.VAL -= RHS;
225  else
226    tcSubtractPart(U.pVal, RHS, getNumWords());
227  return clearUnusedBits();
228}
229
230APInt APInt::operator*(const APInt& RHS) const {
231  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
232  if (isSingleWord())
233    return APInt(BitWidth, U.VAL * RHS.U.VAL);
234
235  APInt Result(getMemory(getNumWords()), getBitWidth());
236
237  tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
238
239  Result.clearUnusedBits();
240  return Result;
241}
242
243void APInt::AndAssignSlowCase(const APInt& RHS) {
244  tcAnd(U.pVal, RHS.U.pVal, getNumWords());
245}
246
247void APInt::OrAssignSlowCase(const APInt& RHS) {
248  tcOr(U.pVal, RHS.U.pVal, getNumWords());
249}
250
251void APInt::XorAssignSlowCase(const APInt& RHS) {
252  tcXor(U.pVal, RHS.U.pVal, getNumWords());
253}
254
255APInt& APInt::operator*=(const APInt& RHS) {
256  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
257  *this = *this * RHS;
258  return *this;
259}
260
261APInt& APInt::operator*=(uint64_t RHS) {
262  if (isSingleWord()) {
263    U.VAL *= RHS;
264  } else {
265    unsigned NumWords = getNumWords();
266    tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
267  }
268  return clearUnusedBits();
269}
270
271bool APInt::EqualSlowCase(const APInt& RHS) const {
272  return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
273}
274
275int APInt::compare(const APInt& RHS) const {
276  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
277  if (isSingleWord())
278    return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
279
280  return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
281}
282
283int APInt::compareSigned(const APInt& RHS) const {
284  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
285  if (isSingleWord()) {
286    int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
287    int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
288    return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
289  }
290
291  bool lhsNeg = isNegative();
292  bool rhsNeg = RHS.isNegative();
293
294  // If the sign bits don't match, then (LHS < RHS) if LHS is negative
295  if (lhsNeg != rhsNeg)
296    return lhsNeg ? -1 : 1;
297
298  // Otherwise we can just use an unsigned comparison, because even negative
299  // numbers compare correctly this way if both have the same signed-ness.
300  return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
301}
302
303void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
304  unsigned loWord = whichWord(loBit);
305  unsigned hiWord = whichWord(hiBit);
306
307  // Create an initial mask for the low word with zeros below loBit.
308  uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
309
310  // If hiBit is not aligned, we need a high mask.
311  unsigned hiShiftAmt = whichBit(hiBit);
312  if (hiShiftAmt != 0) {
313    // Create a high mask with zeros above hiBit.
314    uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
315    // If loWord and hiWord are equal, then we combine the masks. Otherwise,
316    // set the bits in hiWord.
317    if (hiWord == loWord)
318      loMask &= hiMask;
319    else
320      U.pVal[hiWord] |= hiMask;
321  }
322  // Apply the mask to the low word.
323  U.pVal[loWord] |= loMask;
324
325  // Fill any words between loWord and hiWord with all ones.
326  for (unsigned word = loWord + 1; word < hiWord; ++word)
327    U.pVal[word] = WORDTYPE_MAX;
328}
329
330/// Toggle every bit to its opposite value.
331void APInt::flipAllBitsSlowCase() {
332  tcComplement(U.pVal, getNumWords());
333  clearUnusedBits();
334}
335
336/// Toggle a given bit to its opposite value whose position is given
337/// as "bitPosition".
338/// Toggles a given bit to its opposite value.
339void APInt::flipBit(unsigned bitPosition) {
340  assert(bitPosition < BitWidth && "Out of the bit-width range!");
341  if ((*this)[bitPosition]) clearBit(bitPosition);
342  else setBit(bitPosition);
343}
344
345void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
346  unsigned subBitWidth = subBits.getBitWidth();
347  assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
348         "Illegal bit insertion");
349
350  // Insertion is a direct copy.
351  if (subBitWidth == BitWidth) {
352    *this = subBits;
353    return;
354  }
355
356  // Single word result can be done as a direct bitmask.
357  if (isSingleWord()) {
358    uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
359    U.VAL &= ~(mask << bitPosition);
360    U.VAL |= (subBits.U.VAL << bitPosition);
361    return;
362  }
363
364  unsigned loBit = whichBit(bitPosition);
365  unsigned loWord = whichWord(bitPosition);
366  unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
367
368  // Insertion within a single word can be done as a direct bitmask.
369  if (loWord == hi1Word) {
370    uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
371    U.pVal[loWord] &= ~(mask << loBit);
372    U.pVal[loWord] |= (subBits.U.VAL << loBit);
373    return;
374  }
375
376  // Insert on word boundaries.
377  if (loBit == 0) {
378    // Direct copy whole words.
379    unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
380    memcpy(U.pVal + loWord, subBits.getRawData(),
381           numWholeSubWords * APINT_WORD_SIZE);
382
383    // Mask+insert remaining bits.
384    unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
385    if (remainingBits != 0) {
386      uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
387      U.pVal[hi1Word] &= ~mask;
388      U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
389    }
390    return;
391  }
392
393  // General case - set/clear individual bits in dst based on src.
394  // TODO - there is scope for optimization here, but at the moment this code
395  // path is barely used so prefer readability over performance.
396  for (unsigned i = 0; i != subBitWidth; ++i) {
397    if (subBits[i])
398      setBit(bitPosition + i);
399    else
400      clearBit(bitPosition + i);
401  }
402}
403
404void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) {
405  uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
406  subBits &= maskBits;
407  if (isSingleWord()) {
408    U.VAL &= ~(maskBits << bitPosition);
409    U.VAL |= subBits << bitPosition;
410    return;
411  }
412
413  unsigned loBit = whichBit(bitPosition);
414  unsigned loWord = whichWord(bitPosition);
415  unsigned hiWord = whichWord(bitPosition + numBits - 1);
416  if (loWord == hiWord) {
417    U.pVal[loWord] &= ~(maskBits << loBit);
418    U.pVal[loWord] |= subBits << loBit;
419    return;
420  }
421
422  static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
423  unsigned wordBits = 8 * sizeof(WordType);
424  U.pVal[loWord] &= ~(maskBits << loBit);
425  U.pVal[loWord] |= subBits << loBit;
426
427  U.pVal[hiWord] &= ~(maskBits >> (wordBits - loBit));
428  U.pVal[hiWord] |= subBits >> (wordBits - loBit);
429}
430
431APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
432  assert(numBits > 0 && "Can't extract zero bits");
433  assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
434         "Illegal bit extraction");
435
436  if (isSingleWord())
437    return APInt(numBits, U.VAL >> bitPosition);
438
439  unsigned loBit = whichBit(bitPosition);
440  unsigned loWord = whichWord(bitPosition);
441  unsigned hiWord = whichWord(bitPosition + numBits - 1);
442
443  // Single word result extracting bits from a single word source.
444  if (loWord == hiWord)
445    return APInt(numBits, U.pVal[loWord] >> loBit);
446
447  // Extracting bits that start on a source word boundary can be done
448  // as a fast memory copy.
449  if (loBit == 0)
450    return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
451
452  // General case - shift + copy source words directly into place.
453  APInt Result(numBits, 0);
454  unsigned NumSrcWords = getNumWords();
455  unsigned NumDstWords = Result.getNumWords();
456
457  uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
458  for (unsigned word = 0; word < NumDstWords; ++word) {
459    uint64_t w0 = U.pVal[loWord + word];
460    uint64_t w1 =
461        (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
462    DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
463  }
464
465  return Result.clearUnusedBits();
466}
467
468uint64_t APInt::extractBitsAsZExtValue(unsigned numBits,
469                                       unsigned bitPosition) const {
470  assert(numBits > 0 && "Can't extract zero bits");
471  assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
472         "Illegal bit extraction");
473  assert(numBits <= 64 && "Illegal bit extraction");
474
475  uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
476  if (isSingleWord())
477    return (U.VAL >> bitPosition) & maskBits;
478
479  unsigned loBit = whichBit(bitPosition);
480  unsigned loWord = whichWord(bitPosition);
481  unsigned hiWord = whichWord(bitPosition + numBits - 1);
482  if (loWord == hiWord)
483    return (U.pVal[loWord] >> loBit) & maskBits;
484
485  static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
486  unsigned wordBits = 8 * sizeof(WordType);
487  uint64_t retBits = U.pVal[loWord] >> loBit;
488  retBits |= U.pVal[hiWord] << (wordBits - loBit);
489  retBits &= maskBits;
490  return retBits;
491}
492
493unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
494  assert(!str.empty() && "Invalid string length");
495  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
496          radix == 36) &&
497         "Radix should be 2, 8, 10, 16, or 36!");
498
499  size_t slen = str.size();
500
501  // Each computation below needs to know if it's negative.
502  StringRef::iterator p = str.begin();
503  unsigned isNegative = *p == '-';
504  if (*p == '-' || *p == '+') {
505    p++;
506    slen--;
507    assert(slen && "String is only a sign, needs a value.");
508  }
509
510  // For radixes of power-of-two values, the bits required is accurately and
511  // easily computed
512  if (radix == 2)
513    return slen + isNegative;
514  if (radix == 8)
515    return slen * 3 + isNegative;
516  if (radix == 16)
517    return slen * 4 + isNegative;
518
519  // FIXME: base 36
520
521  // This is grossly inefficient but accurate. We could probably do something
522  // with a computation of roughly slen*64/20 and then adjust by the value of
523  // the first few digits. But, I'm not sure how accurate that could be.
524
525  // Compute a sufficient number of bits that is always large enough but might
526  // be too large. This avoids the assertion in the constructor. This
527  // calculation doesn't work appropriately for the numbers 0-9, so just use 4
528  // bits in that case.
529  unsigned sufficient
530    = radix == 10? (slen == 1 ? 4 : slen * 64/18)
531                 : (slen == 1 ? 7 : slen * 16/3);
532
533  // Convert to the actual binary value.
534  APInt tmp(sufficient, StringRef(p, slen), radix);
535
536  // Compute how many bits are required. If the log is infinite, assume we need
537  // just bit. If the log is exact and value is negative, then the value is
538  // MinSignedValue with (log + 1) bits.
539  unsigned log = tmp.logBase2();
540  if (log == (unsigned)-1) {
541    return isNegative + 1;
542  } else if (isNegative && tmp.isPowerOf2()) {
543    return isNegative + log;
544  } else {
545    return isNegative + log + 1;
546  }
547}
548
549hash_code llvm::hash_value(const APInt &Arg) {
550  if (Arg.isSingleWord())
551    return hash_combine(Arg.BitWidth, Arg.U.VAL);
552
553  return hash_combine(
554      Arg.BitWidth,
555      hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords()));
556}
557
558bool APInt::isSplat(unsigned SplatSizeInBits) const {
559  assert(getBitWidth() % SplatSizeInBits == 0 &&
560         "SplatSizeInBits must divide width!");
561  // We can check that all parts of an integer are equal by making use of a
562  // little trick: rotate and check if it's still the same value.
563  return *this == rotl(SplatSizeInBits);
564}
565
566/// This function returns the high "numBits" bits of this APInt.
567APInt APInt::getHiBits(unsigned numBits) const {
568  return this->lshr(BitWidth - numBits);
569}
570
571/// This function returns the low "numBits" bits of this APInt.
572APInt APInt::getLoBits(unsigned numBits) const {
573  APInt Result(getLowBitsSet(BitWidth, numBits));
574  Result &= *this;
575  return Result;
576}
577
578/// Return a value containing V broadcasted over NewLen bits.
579APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
580  assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
581
582  APInt Val = V.zextOrSelf(NewLen);
583  for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
584    Val |= Val << I;
585
586  return Val;
587}
588
589unsigned APInt::countLeadingZerosSlowCase() const {
590  unsigned Count = 0;
591  for (int i = getNumWords()-1; i >= 0; --i) {
592    uint64_t V = U.pVal[i];
593    if (V == 0)
594      Count += APINT_BITS_PER_WORD;
595    else {
596      Count += llvm::countLeadingZeros(V);
597      break;
598    }
599  }
600  // Adjust for unused bits in the most significant word (they are zero).
601  unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
602  Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
603  return Count;
604}
605
606unsigned APInt::countLeadingOnesSlowCase() const {
607  unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
608  unsigned shift;
609  if (!highWordBits) {
610    highWordBits = APINT_BITS_PER_WORD;
611    shift = 0;
612  } else {
613    shift = APINT_BITS_PER_WORD - highWordBits;
614  }
615  int i = getNumWords() - 1;
616  unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
617  if (Count == highWordBits) {
618    for (i--; i >= 0; --i) {
619      if (U.pVal[i] == WORDTYPE_MAX)
620        Count += APINT_BITS_PER_WORD;
621      else {
622        Count += llvm::countLeadingOnes(U.pVal[i]);
623        break;
624      }
625    }
626  }
627  return Count;
628}
629
630unsigned APInt::countTrailingZerosSlowCase() const {
631  unsigned Count = 0;
632  unsigned i = 0;
633  for (; i < getNumWords() && U.pVal[i] == 0; ++i)
634    Count += APINT_BITS_PER_WORD;
635  if (i < getNumWords())
636    Count += llvm::countTrailingZeros(U.pVal[i]);
637  return std::min(Count, BitWidth);
638}
639
640unsigned APInt::countTrailingOnesSlowCase() const {
641  unsigned Count = 0;
642  unsigned i = 0;
643  for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
644    Count += APINT_BITS_PER_WORD;
645  if (i < getNumWords())
646    Count += llvm::countTrailingOnes(U.pVal[i]);
647  assert(Count <= BitWidth);
648  return Count;
649}
650
651unsigned APInt::countPopulationSlowCase() const {
652  unsigned Count = 0;
653  for (unsigned i = 0; i < getNumWords(); ++i)
654    Count += llvm::countPopulation(U.pVal[i]);
655  return Count;
656}
657
658bool APInt::intersectsSlowCase(const APInt &RHS) const {
659  for (unsigned i = 0, e = getNumWords(); i != e; ++i)
660    if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
661      return true;
662
663  return false;
664}
665
666bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
667  for (unsigned i = 0, e = getNumWords(); i != e; ++i)
668    if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
669      return false;
670
671  return true;
672}
673
674APInt APInt::byteSwap() const {
675  assert(BitWidth >= 16 && BitWidth % 8 == 0 && "Cannot byteswap!");
676  if (BitWidth == 16)
677    return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
678  if (BitWidth == 32)
679    return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
680  if (BitWidth <= 64) {
681    uint64_t Tmp1 = ByteSwap_64(U.VAL);
682    Tmp1 >>= (64 - BitWidth);
683    return APInt(BitWidth, Tmp1);
684  }
685
686  APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
687  for (unsigned I = 0, N = getNumWords(); I != N; ++I)
688    Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
689  if (Result.BitWidth != BitWidth) {
690    Result.lshrInPlace(Result.BitWidth - BitWidth);
691    Result.BitWidth = BitWidth;
692  }
693  return Result;
694}
695
696APInt APInt::reverseBits() const {
697  switch (BitWidth) {
698  case 64:
699    return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
700  case 32:
701    return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
702  case 16:
703    return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
704  case 8:
705    return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
706  default:
707    break;
708  }
709
710  APInt Val(*this);
711  APInt Reversed(BitWidth, 0);
712  unsigned S = BitWidth;
713
714  for (; Val != 0; Val.lshrInPlace(1)) {
715    Reversed <<= 1;
716    Reversed |= Val[0];
717    --S;
718  }
719
720  Reversed <<= S;
721  return Reversed;
722}
723
724APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
725  // Fast-path a common case.
726  if (A == B) return A;
727
728  // Corner cases: if either operand is zero, the other is the gcd.
729  if (!A) return B;
730  if (!B) return A;
731
732  // Count common powers of 2 and remove all other powers of 2.
733  unsigned Pow2;
734  {
735    unsigned Pow2_A = A.countTrailingZeros();
736    unsigned Pow2_B = B.countTrailingZeros();
737    if (Pow2_A > Pow2_B) {
738      A.lshrInPlace(Pow2_A - Pow2_B);
739      Pow2 = Pow2_B;
740    } else if (Pow2_B > Pow2_A) {
741      B.lshrInPlace(Pow2_B - Pow2_A);
742      Pow2 = Pow2_A;
743    } else {
744      Pow2 = Pow2_A;
745    }
746  }
747
748  // Both operands are odd multiples of 2^Pow_2:
749  //
750  //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
751  //
752  // This is a modified version of Stein's algorithm, taking advantage of
753  // efficient countTrailingZeros().
754  while (A != B) {
755    if (A.ugt(B)) {
756      A -= B;
757      A.lshrInPlace(A.countTrailingZeros() - Pow2);
758    } else {
759      B -= A;
760      B.lshrInPlace(B.countTrailingZeros() - Pow2);
761    }
762  }
763
764  return A;
765}
766
767APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
768  uint64_t I = bit_cast<uint64_t>(Double);
769
770  // Get the sign bit from the highest order bit
771  bool isNeg = I >> 63;
772
773  // Get the 11-bit exponent and adjust for the 1023 bit bias
774  int64_t exp = ((I >> 52) & 0x7ff) - 1023;
775
776  // If the exponent is negative, the value is < 0 so just return 0.
777  if (exp < 0)
778    return APInt(width, 0u);
779
780  // Extract the mantissa by clearing the top 12 bits (sign + exponent).
781  uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
782
783  // If the exponent doesn't shift all bits out of the mantissa
784  if (exp < 52)
785    return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
786                    APInt(width, mantissa >> (52 - exp));
787
788  // If the client didn't provide enough bits for us to shift the mantissa into
789  // then the result is undefined, just return 0
790  if (width <= exp - 52)
791    return APInt(width, 0);
792
793  // Otherwise, we have to shift the mantissa bits up to the right location
794  APInt Tmp(width, mantissa);
795  Tmp <<= (unsigned)exp - 52;
796  return isNeg ? -Tmp : Tmp;
797}
798
799/// This function converts this APInt to a double.
800/// The layout for double is as following (IEEE Standard 754):
801///  --------------------------------------
802/// |  Sign    Exponent    Fraction    Bias |
803/// |-------------------------------------- |
804/// |  1[63]   11[62-52]   52[51-00]   1023 |
805///  --------------------------------------
806double APInt::roundToDouble(bool isSigned) const {
807
808  // Handle the simple case where the value is contained in one uint64_t.
809  // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
810  if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
811    if (isSigned) {
812      int64_t sext = SignExtend64(getWord(0), BitWidth);
813      return double(sext);
814    } else
815      return double(getWord(0));
816  }
817
818  // Determine if the value is negative.
819  bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
820
821  // Construct the absolute value if we're negative.
822  APInt Tmp(isNeg ? -(*this) : (*this));
823
824  // Figure out how many bits we're using.
825  unsigned n = Tmp.getActiveBits();
826
827  // The exponent (without bias normalization) is just the number of bits
828  // we are using. Note that the sign bit is gone since we constructed the
829  // absolute value.
830  uint64_t exp = n;
831
832  // Return infinity for exponent overflow
833  if (exp > 1023) {
834    if (!isSigned || !isNeg)
835      return std::numeric_limits<double>::infinity();
836    else
837      return -std::numeric_limits<double>::infinity();
838  }
839  exp += 1023; // Increment for 1023 bias
840
841  // Number of bits in mantissa is 52. To obtain the mantissa value, we must
842  // extract the high 52 bits from the correct words in pVal.
843  uint64_t mantissa;
844  unsigned hiWord = whichWord(n-1);
845  if (hiWord == 0) {
846    mantissa = Tmp.U.pVal[0];
847    if (n > 52)
848      mantissa >>= n - 52; // shift down, we want the top 52 bits.
849  } else {
850    assert(hiWord > 0 && "huh?");
851    uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
852    uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
853    mantissa = hibits | lobits;
854  }
855
856  // The leading bit of mantissa is implicit, so get rid of it.
857  uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
858  uint64_t I = sign | (exp << 52) | mantissa;
859  return bit_cast<double>(I);
860}
861
862// Truncate to new width.
863APInt APInt::trunc(unsigned width) const {
864  assert(width < BitWidth && "Invalid APInt Truncate request");
865  assert(width && "Can't truncate to 0 bits");
866
867  if (width <= APINT_BITS_PER_WORD)
868    return APInt(width, getRawData()[0]);
869
870  APInt Result(getMemory(getNumWords(width)), width);
871
872  // Copy full words.
873  unsigned i;
874  for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
875    Result.U.pVal[i] = U.pVal[i];
876
877  // Truncate and copy any partial word.
878  unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
879  if (bits != 0)
880    Result.U.pVal[i] = U.pVal[i] << bits >> bits;
881
882  return Result;
883}
884
885// Truncate to new width with unsigned saturation.
886APInt APInt::truncUSat(unsigned width) const {
887  assert(width < BitWidth && "Invalid APInt Truncate request");
888  assert(width && "Can't truncate to 0 bits");
889
890  // Can we just losslessly truncate it?
891  if (isIntN(width))
892    return trunc(width);
893  // If not, then just return the new limit.
894  return APInt::getMaxValue(width);
895}
896
897// Truncate to new width with signed saturation.
898APInt APInt::truncSSat(unsigned width) const {
899  assert(width < BitWidth && "Invalid APInt Truncate request");
900  assert(width && "Can't truncate to 0 bits");
901
902  // Can we just losslessly truncate it?
903  if (isSignedIntN(width))
904    return trunc(width);
905  // If not, then just return the new limits.
906  return isNegative() ? APInt::getSignedMinValue(width)
907                      : APInt::getSignedMaxValue(width);
908}
909
910// Sign extend to a new width.
911APInt APInt::sext(unsigned Width) const {
912  assert(Width > BitWidth && "Invalid APInt SignExtend request");
913
914  if (Width <= APINT_BITS_PER_WORD)
915    return APInt(Width, SignExtend64(U.VAL, BitWidth));
916
917  APInt Result(getMemory(getNumWords(Width)), Width);
918
919  // Copy words.
920  std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
921
922  // Sign extend the last word since there may be unused bits in the input.
923  Result.U.pVal[getNumWords() - 1] =
924      SignExtend64(Result.U.pVal[getNumWords() - 1],
925                   ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
926
927  // Fill with sign bits.
928  std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
929              (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
930  Result.clearUnusedBits();
931  return Result;
932}
933
934//  Zero extend to a new width.
935APInt APInt::zext(unsigned width) const {
936  assert(width > BitWidth && "Invalid APInt ZeroExtend request");
937
938  if (width <= APINT_BITS_PER_WORD)
939    return APInt(width, U.VAL);
940
941  APInt Result(getMemory(getNumWords(width)), width);
942
943  // Copy words.
944  std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
945
946  // Zero remaining words.
947  std::memset(Result.U.pVal + getNumWords(), 0,
948              (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
949
950  return Result;
951}
952
953APInt APInt::zextOrTrunc(unsigned width) const {
954  if (BitWidth < width)
955    return zext(width);
956  if (BitWidth > width)
957    return trunc(width);
958  return *this;
959}
960
961APInt APInt::sextOrTrunc(unsigned width) const {
962  if (BitWidth < width)
963    return sext(width);
964  if (BitWidth > width)
965    return trunc(width);
966  return *this;
967}
968
969APInt APInt::zextOrSelf(unsigned width) const {
970  if (BitWidth < width)
971    return zext(width);
972  return *this;
973}
974
975APInt APInt::sextOrSelf(unsigned width) const {
976  if (BitWidth < width)
977    return sext(width);
978  return *this;
979}
980
981/// Arithmetic right-shift this APInt by shiftAmt.
982/// Arithmetic right-shift function.
983void APInt::ashrInPlace(const APInt &shiftAmt) {
984  ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
985}
986
987/// Arithmetic right-shift this APInt by shiftAmt.
988/// Arithmetic right-shift function.
989void APInt::ashrSlowCase(unsigned ShiftAmt) {
990  // Don't bother performing a no-op shift.
991  if (!ShiftAmt)
992    return;
993
994  // Save the original sign bit for later.
995  bool Negative = isNegative();
996
997  // WordShift is the inter-part shift; BitShift is intra-part shift.
998  unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
999  unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
1000
1001  unsigned WordsToMove = getNumWords() - WordShift;
1002  if (WordsToMove != 0) {
1003    // Sign extend the last word to fill in the unused bits.
1004    U.pVal[getNumWords() - 1] = SignExtend64(
1005        U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
1006
1007    // Fastpath for moving by whole words.
1008    if (BitShift == 0) {
1009      std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
1010    } else {
1011      // Move the words containing significant bits.
1012      for (unsigned i = 0; i != WordsToMove - 1; ++i)
1013        U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
1014                    (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
1015
1016      // Handle the last word which has no high bits to copy.
1017      U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
1018      // Sign extend one more time.
1019      U.pVal[WordsToMove - 1] =
1020          SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
1021    }
1022  }
1023
1024  // Fill in the remainder based on the original sign.
1025  std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
1026              WordShift * APINT_WORD_SIZE);
1027  clearUnusedBits();
1028}
1029
1030/// Logical right-shift this APInt by shiftAmt.
1031/// Logical right-shift function.
1032void APInt::lshrInPlace(const APInt &shiftAmt) {
1033  lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
1034}
1035
1036/// Logical right-shift this APInt by shiftAmt.
1037/// Logical right-shift function.
1038void APInt::lshrSlowCase(unsigned ShiftAmt) {
1039  tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
1040}
1041
1042/// Left-shift this APInt by shiftAmt.
1043/// Left-shift function.
1044APInt &APInt::operator<<=(const APInt &shiftAmt) {
1045  // It's undefined behavior in C to shift by BitWidth or greater.
1046  *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
1047  return *this;
1048}
1049
1050void APInt::shlSlowCase(unsigned ShiftAmt) {
1051  tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
1052  clearUnusedBits();
1053}
1054
1055// Calculate the rotate amount modulo the bit width.
1056static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
1057  unsigned rotBitWidth = rotateAmt.getBitWidth();
1058  APInt rot = rotateAmt;
1059  if (rotBitWidth < BitWidth) {
1060    // Extend the rotate APInt, so that the urem doesn't divide by 0.
1061    // e.g. APInt(1, 32) would give APInt(1, 0).
1062    rot = rotateAmt.zext(BitWidth);
1063  }
1064  rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
1065  return rot.getLimitedValue(BitWidth);
1066}
1067
1068APInt APInt::rotl(const APInt &rotateAmt) const {
1069  return rotl(rotateModulo(BitWidth, rotateAmt));
1070}
1071
1072APInt APInt::rotl(unsigned rotateAmt) const {
1073  rotateAmt %= BitWidth;
1074  if (rotateAmt == 0)
1075    return *this;
1076  return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1077}
1078
1079APInt APInt::rotr(const APInt &rotateAmt) const {
1080  return rotr(rotateModulo(BitWidth, rotateAmt));
1081}
1082
1083APInt APInt::rotr(unsigned rotateAmt) const {
1084  rotateAmt %= BitWidth;
1085  if (rotateAmt == 0)
1086    return *this;
1087  return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1088}
1089
1090// Square Root - this method computes and returns the square root of "this".
1091// Three mechanisms are used for computation. For small values (<= 5 bits),
1092// a table lookup is done. This gets some performance for common cases. For
1093// values using less than 52 bits, the value is converted to double and then
1094// the libc sqrt function is called. The result is rounded and then converted
1095// back to a uint64_t which is then used to construct the result. Finally,
1096// the Babylonian method for computing square roots is used.
1097APInt APInt::sqrt() const {
1098
1099  // Determine the magnitude of the value.
1100  unsigned magnitude = getActiveBits();
1101
1102  // Use a fast table for some small values. This also gets rid of some
1103  // rounding errors in libc sqrt for small values.
1104  if (magnitude <= 5) {
1105    static const uint8_t results[32] = {
1106      /*     0 */ 0,
1107      /*  1- 2 */ 1, 1,
1108      /*  3- 6 */ 2, 2, 2, 2,
1109      /*  7-12 */ 3, 3, 3, 3, 3, 3,
1110      /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1111      /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1112      /*    31 */ 6
1113    };
1114    return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1115  }
1116
1117  // If the magnitude of the value fits in less than 52 bits (the precision of
1118  // an IEEE double precision floating point value), then we can use the
1119  // libc sqrt function which will probably use a hardware sqrt computation.
1120  // This should be faster than the algorithm below.
1121  if (magnitude < 52) {
1122    return APInt(BitWidth,
1123                 uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1124                                                               : U.pVal[0])))));
1125  }
1126
1127  // Okay, all the short cuts are exhausted. We must compute it. The following
1128  // is a classical Babylonian method for computing the square root. This code
1129  // was adapted to APInt from a wikipedia article on such computations.
1130  // See http://www.wikipedia.org/ and go to the page named
1131  // Calculate_an_integer_square_root.
1132  unsigned nbits = BitWidth, i = 4;
1133  APInt testy(BitWidth, 16);
1134  APInt x_old(BitWidth, 1);
1135  APInt x_new(BitWidth, 0);
1136  APInt two(BitWidth, 2);
1137
1138  // Select a good starting value using binary logarithms.
1139  for (;; i += 2, testy = testy.shl(2))
1140    if (i >= nbits || this->ule(testy)) {
1141      x_old = x_old.shl(i / 2);
1142      break;
1143    }
1144
1145  // Use the Babylonian method to arrive at the integer square root:
1146  for (;;) {
1147    x_new = (this->udiv(x_old) + x_old).udiv(two);
1148    if (x_old.ule(x_new))
1149      break;
1150    x_old = x_new;
1151  }
1152
1153  // Make sure we return the closest approximation
1154  // NOTE: The rounding calculation below is correct. It will produce an
1155  // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1156  // determined to be a rounding issue with pari/gp as it begins to use a
1157  // floating point representation after 192 bits. There are no discrepancies
1158  // between this algorithm and pari/gp for bit widths < 192 bits.
1159  APInt square(x_old * x_old);
1160  APInt nextSquare((x_old + 1) * (x_old +1));
1161  if (this->ult(square))
1162    return x_old;
1163  assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1164  APInt midpoint((nextSquare - square).udiv(two));
1165  APInt offset(*this - square);
1166  if (offset.ult(midpoint))
1167    return x_old;
1168  return x_old + 1;
1169}
1170
1171/// Computes the multiplicative inverse of this APInt for a given modulo. The
1172/// iterative extended Euclidean algorithm is used to solve for this value,
1173/// however we simplify it to speed up calculating only the inverse, and take
1174/// advantage of div+rem calculations. We also use some tricks to avoid copying
1175/// (potentially large) APInts around.
1176/// WARNING: a value of '0' may be returned,
1177///          signifying that no multiplicative inverse exists!
1178APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1179  assert(ult(modulo) && "This APInt must be smaller than the modulo");
1180
1181  // Using the properties listed at the following web page (accessed 06/21/08):
1182  //   http://www.numbertheory.org/php/euclid.html
1183  // (especially the properties numbered 3, 4 and 9) it can be proved that
1184  // BitWidth bits suffice for all the computations in the algorithm implemented
1185  // below. More precisely, this number of bits suffice if the multiplicative
1186  // inverse exists, but may not suffice for the general extended Euclidean
1187  // algorithm.
1188
1189  APInt r[2] = { modulo, *this };
1190  APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1191  APInt q(BitWidth, 0);
1192
1193  unsigned i;
1194  for (i = 0; r[i^1] != 0; i ^= 1) {
1195    // An overview of the math without the confusing bit-flipping:
1196    // q = r[i-2] / r[i-1]
1197    // r[i] = r[i-2] % r[i-1]
1198    // t[i] = t[i-2] - t[i-1] * q
1199    udivrem(r[i], r[i^1], q, r[i]);
1200    t[i] -= t[i^1] * q;
1201  }
1202
1203  // If this APInt and the modulo are not coprime, there is no multiplicative
1204  // inverse, so return 0. We check this by looking at the next-to-last
1205  // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1206  // algorithm.
1207  if (r[i] != 1)
1208    return APInt(BitWidth, 0);
1209
1210  // The next-to-last t is the multiplicative inverse.  However, we are
1211  // interested in a positive inverse. Calculate a positive one from a negative
1212  // one if necessary. A simple addition of the modulo suffices because
1213  // abs(t[i]) is known to be less than *this/2 (see the link above).
1214  if (t[i].isNegative())
1215    t[i] += modulo;
1216
1217  return std::move(t[i]);
1218}
1219
1220/// Calculate the magic numbers required to implement a signed integer division
1221/// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1222/// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1223/// Warren, Jr., chapter 10.
1224APInt::ms APInt::magic() const {
1225  const APInt& d = *this;
1226  unsigned p;
1227  APInt ad, anc, delta, q1, r1, q2, r2, t;
1228  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1229  struct ms mag;
1230
1231  ad = d.abs();
1232  t = signedMin + (d.lshr(d.getBitWidth() - 1));
1233  anc = t - 1 - t.urem(ad);   // absolute value of nc
1234  p = d.getBitWidth() - 1;    // initialize p
1235  q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1236  r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1237  q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1238  r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1239  do {
1240    p = p + 1;
1241    q1 = q1<<1;          // update q1 = 2p/abs(nc)
1242    r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1243    if (r1.uge(anc)) {  // must be unsigned comparison
1244      q1 = q1 + 1;
1245      r1 = r1 - anc;
1246    }
1247    q2 = q2<<1;          // update q2 = 2p/abs(d)
1248    r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1249    if (r2.uge(ad)) {   // must be unsigned comparison
1250      q2 = q2 + 1;
1251      r2 = r2 - ad;
1252    }
1253    delta = ad - r2;
1254  } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1255
1256  mag.m = q2 + 1;
1257  if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1258  mag.s = p - d.getBitWidth();          // resulting shift
1259  return mag;
1260}
1261
1262/// Calculate the magic numbers required to implement an unsigned integer
1263/// division by a constant as a sequence of multiplies, adds and shifts.
1264/// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1265/// S. Warren, Jr., chapter 10.
1266/// LeadingZeros can be used to simplify the calculation if the upper bits
1267/// of the divided value are known zero.
1268APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1269  const APInt& d = *this;
1270  unsigned p;
1271  APInt nc, delta, q1, r1, q2, r2;
1272  struct mu magu;
1273  magu.a = 0;               // initialize "add" indicator
1274  APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1275  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1276  APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1277
1278  nc = allOnes - (allOnes - d).urem(d);
1279  p = d.getBitWidth() - 1;  // initialize p
1280  q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1281  r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1282  q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1283  r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1284  do {
1285    p = p + 1;
1286    if (r1.uge(nc - r1)) {
1287      q1 = q1 + q1 + 1;  // update q1
1288      r1 = r1 + r1 - nc; // update r1
1289    }
1290    else {
1291      q1 = q1+q1; // update q1
1292      r1 = r1+r1; // update r1
1293    }
1294    if ((r2 + 1).uge(d - r2)) {
1295      if (q2.uge(signedMax)) magu.a = 1;
1296      q2 = q2+q2 + 1;     // update q2
1297      r2 = r2+r2 + 1 - d; // update r2
1298    }
1299    else {
1300      if (q2.uge(signedMin)) magu.a = 1;
1301      q2 = q2+q2;     // update q2
1302      r2 = r2+r2 + 1; // update r2
1303    }
1304    delta = d - 1 - r2;
1305  } while (p < d.getBitWidth()*2 &&
1306           (q1.ult(delta) || (q1 == delta && r1 == 0)));
1307  magu.m = q2 + 1; // resulting magic number
1308  magu.s = p - d.getBitWidth();  // resulting shift
1309  return magu;
1310}
1311
1312/// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1313/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1314/// variables here have the same names as in the algorithm. Comments explain
1315/// the algorithm and any deviation from it.
1316static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1317                     unsigned m, unsigned n) {
1318  assert(u && "Must provide dividend");
1319  assert(v && "Must provide divisor");
1320  assert(q && "Must provide quotient");
1321  assert(u != v && u != q && v != q && "Must use different memory");
1322  assert(n>1 && "n must be > 1");
1323
1324  // b denotes the base of the number system. In our case b is 2^32.
1325  const uint64_t b = uint64_t(1) << 32;
1326
1327// The DEBUG macros here tend to be spam in the debug output if you're not
1328// debugging this code. Disable them unless KNUTH_DEBUG is defined.
1329#ifdef KNUTH_DEBUG
1330#define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1331#else
1332#define DEBUG_KNUTH(X) do {} while(false)
1333#endif
1334
1335  DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1336  DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1337  DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1338  DEBUG_KNUTH(dbgs() << " by");
1339  DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1340  DEBUG_KNUTH(dbgs() << '\n');
1341  // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1342  // u and v by d. Note that we have taken Knuth's advice here to use a power
1343  // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1344  // 2 allows us to shift instead of multiply and it is easy to determine the
1345  // shift amount from the leading zeros.  We are basically normalizing the u
1346  // and v so that its high bits are shifted to the top of v's range without
1347  // overflow. Note that this can require an extra word in u so that u must
1348  // be of length m+n+1.
1349  unsigned shift = countLeadingZeros(v[n-1]);
1350  uint32_t v_carry = 0;
1351  uint32_t u_carry = 0;
1352  if (shift) {
1353    for (unsigned i = 0; i < m+n; ++i) {
1354      uint32_t u_tmp = u[i] >> (32 - shift);
1355      u[i] = (u[i] << shift) | u_carry;
1356      u_carry = u_tmp;
1357    }
1358    for (unsigned i = 0; i < n; ++i) {
1359      uint32_t v_tmp = v[i] >> (32 - shift);
1360      v[i] = (v[i] << shift) | v_carry;
1361      v_carry = v_tmp;
1362    }
1363  }
1364  u[m+n] = u_carry;
1365
1366  DEBUG_KNUTH(dbgs() << "KnuthDiv:   normal:");
1367  DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1368  DEBUG_KNUTH(dbgs() << " by");
1369  DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1370  DEBUG_KNUTH(dbgs() << '\n');
1371
1372  // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1373  int j = m;
1374  do {
1375    DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1376    // D3. [Calculate q'.].
1377    //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1378    //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1379    // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1380    // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1381    // on v[n-2] determines at high speed most of the cases in which the trial
1382    // value qp is one too large, and it eliminates all cases where qp is two
1383    // too large.
1384    uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1385    DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1386    uint64_t qp = dividend / v[n-1];
1387    uint64_t rp = dividend % v[n-1];
1388    if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1389      qp--;
1390      rp += v[n-1];
1391      if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1392        qp--;
1393    }
1394    DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1395
1396    // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1397    // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1398    // consists of a simple multiplication by a one-place number, combined with
1399    // a subtraction.
1400    // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1401    // this step is actually negative, (u[j+n]...u[j]) should be left as the
1402    // true value plus b**(n+1), namely as the b's complement of
1403    // the true value, and a "borrow" to the left should be remembered.
1404    int64_t borrow = 0;
1405    for (unsigned i = 0; i < n; ++i) {
1406      uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1407      int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1408      u[j+i] = Lo_32(subres);
1409      borrow = Hi_32(p) - Hi_32(subres);
1410      DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1411                        << ", borrow = " << borrow << '\n');
1412    }
1413    bool isNeg = u[j+n] < borrow;
1414    u[j+n] -= Lo_32(borrow);
1415
1416    DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1417    DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1418    DEBUG_KNUTH(dbgs() << '\n');
1419
1420    // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1421    // negative, go to step D6; otherwise go on to step D7.
1422    q[j] = Lo_32(qp);
1423    if (isNeg) {
1424      // D6. [Add back]. The probability that this step is necessary is very
1425      // small, on the order of only 2/b. Make sure that test data accounts for
1426      // this possibility. Decrease q[j] by 1
1427      q[j]--;
1428      // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1429      // A carry will occur to the left of u[j+n], and it should be ignored
1430      // since it cancels with the borrow that occurred in D4.
1431      bool carry = false;
1432      for (unsigned i = 0; i < n; i++) {
1433        uint32_t limit = std::min(u[j+i],v[i]);
1434        u[j+i] += v[i] + carry;
1435        carry = u[j+i] < limit || (carry && u[j+i] == limit);
1436      }
1437      u[j+n] += carry;
1438    }
1439    DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1440    DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1441    DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1442
1443    // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1444  } while (--j >= 0);
1445
1446  DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1447  DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1448  DEBUG_KNUTH(dbgs() << '\n');
1449
1450  // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1451  // remainder may be obtained by dividing u[...] by d. If r is non-null we
1452  // compute the remainder (urem uses this).
1453  if (r) {
1454    // The value d is expressed by the "shift" value above since we avoided
1455    // multiplication by d by using a shift left. So, all we have to do is
1456    // shift right here.
1457    if (shift) {
1458      uint32_t carry = 0;
1459      DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1460      for (int i = n-1; i >= 0; i--) {
1461        r[i] = (u[i] >> shift) | carry;
1462        carry = u[i] << (32 - shift);
1463        DEBUG_KNUTH(dbgs() << " " << r[i]);
1464      }
1465    } else {
1466      for (int i = n-1; i >= 0; i--) {
1467        r[i] = u[i];
1468        DEBUG_KNUTH(dbgs() << " " << r[i]);
1469      }
1470    }
1471    DEBUG_KNUTH(dbgs() << '\n');
1472  }
1473  DEBUG_KNUTH(dbgs() << '\n');
1474}
1475
1476void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1477                   unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1478  assert(lhsWords >= rhsWords && "Fractional result");
1479
1480  // First, compose the values into an array of 32-bit words instead of
1481  // 64-bit words. This is a necessity of both the "short division" algorithm
1482  // and the Knuth "classical algorithm" which requires there to be native
1483  // operations for +, -, and * on an m bit value with an m*2 bit result. We
1484  // can't use 64-bit operands here because we don't have native results of
1485  // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1486  // work on large-endian machines.
1487  unsigned n = rhsWords * 2;
1488  unsigned m = (lhsWords * 2) - n;
1489
1490  // Allocate space for the temporary values we need either on the stack, if
1491  // it will fit, or on the heap if it won't.
1492  uint32_t SPACE[128];
1493  uint32_t *U = nullptr;
1494  uint32_t *V = nullptr;
1495  uint32_t *Q = nullptr;
1496  uint32_t *R = nullptr;
1497  if ((Remainder?4:3)*n+2*m+1 <= 128) {
1498    U = &SPACE[0];
1499    V = &SPACE[m+n+1];
1500    Q = &SPACE[(m+n+1) + n];
1501    if (Remainder)
1502      R = &SPACE[(m+n+1) + n + (m+n)];
1503  } else {
1504    U = new uint32_t[m + n + 1];
1505    V = new uint32_t[n];
1506    Q = new uint32_t[m+n];
1507    if (Remainder)
1508      R = new uint32_t[n];
1509  }
1510
1511  // Initialize the dividend
1512  memset(U, 0, (m+n+1)*sizeof(uint32_t));
1513  for (unsigned i = 0; i < lhsWords; ++i) {
1514    uint64_t tmp = LHS[i];
1515    U[i * 2] = Lo_32(tmp);
1516    U[i * 2 + 1] = Hi_32(tmp);
1517  }
1518  U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1519
1520  // Initialize the divisor
1521  memset(V, 0, (n)*sizeof(uint32_t));
1522  for (unsigned i = 0; i < rhsWords; ++i) {
1523    uint64_t tmp = RHS[i];
1524    V[i * 2] = Lo_32(tmp);
1525    V[i * 2 + 1] = Hi_32(tmp);
1526  }
1527
1528  // initialize the quotient and remainder
1529  memset(Q, 0, (m+n) * sizeof(uint32_t));
1530  if (Remainder)
1531    memset(R, 0, n * sizeof(uint32_t));
1532
1533  // Now, adjust m and n for the Knuth division. n is the number of words in
1534  // the divisor. m is the number of words by which the dividend exceeds the
1535  // divisor (i.e. m+n is the length of the dividend). These sizes must not
1536  // contain any zero words or the Knuth algorithm fails.
1537  for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1538    n--;
1539    m++;
1540  }
1541  for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1542    m--;
1543
1544  // If we're left with only a single word for the divisor, Knuth doesn't work
1545  // so we implement the short division algorithm here. This is much simpler
1546  // and faster because we are certain that we can divide a 64-bit quantity
1547  // by a 32-bit quantity at hardware speed and short division is simply a
1548  // series of such operations. This is just like doing short division but we
1549  // are using base 2^32 instead of base 10.
1550  assert(n != 0 && "Divide by zero?");
1551  if (n == 1) {
1552    uint32_t divisor = V[0];
1553    uint32_t remainder = 0;
1554    for (int i = m; i >= 0; i--) {
1555      uint64_t partial_dividend = Make_64(remainder, U[i]);
1556      if (partial_dividend == 0) {
1557        Q[i] = 0;
1558        remainder = 0;
1559      } else if (partial_dividend < divisor) {
1560        Q[i] = 0;
1561        remainder = Lo_32(partial_dividend);
1562      } else if (partial_dividend == divisor) {
1563        Q[i] = 1;
1564        remainder = 0;
1565      } else {
1566        Q[i] = Lo_32(partial_dividend / divisor);
1567        remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1568      }
1569    }
1570    if (R)
1571      R[0] = remainder;
1572  } else {
1573    // Now we're ready to invoke the Knuth classical divide algorithm. In this
1574    // case n > 1.
1575    KnuthDiv(U, V, Q, R, m, n);
1576  }
1577
1578  // If the caller wants the quotient
1579  if (Quotient) {
1580    for (unsigned i = 0; i < lhsWords; ++i)
1581      Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1582  }
1583
1584  // If the caller wants the remainder
1585  if (Remainder) {
1586    for (unsigned i = 0; i < rhsWords; ++i)
1587      Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1588  }
1589
1590  // Clean up the memory we allocated.
1591  if (U != &SPACE[0]) {
1592    delete [] U;
1593    delete [] V;
1594    delete [] Q;
1595    delete [] R;
1596  }
1597}
1598
1599APInt APInt::udiv(const APInt &RHS) const {
1600  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1601
1602  // First, deal with the easy case
1603  if (isSingleWord()) {
1604    assert(RHS.U.VAL != 0 && "Divide by zero?");
1605    return APInt(BitWidth, U.VAL / RHS.U.VAL);
1606  }
1607
1608  // Get some facts about the LHS and RHS number of bits and words
1609  unsigned lhsWords = getNumWords(getActiveBits());
1610  unsigned rhsBits  = RHS.getActiveBits();
1611  unsigned rhsWords = getNumWords(rhsBits);
1612  assert(rhsWords && "Divided by zero???");
1613
1614  // Deal with some degenerate cases
1615  if (!lhsWords)
1616    // 0 / X ===> 0
1617    return APInt(BitWidth, 0);
1618  if (rhsBits == 1)
1619    // X / 1 ===> X
1620    return *this;
1621  if (lhsWords < rhsWords || this->ult(RHS))
1622    // X / Y ===> 0, iff X < Y
1623    return APInt(BitWidth, 0);
1624  if (*this == RHS)
1625    // X / X ===> 1
1626    return APInt(BitWidth, 1);
1627  if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1628    // All high words are zero, just use native divide
1629    return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1630
1631  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1632  APInt Quotient(BitWidth, 0); // to hold result.
1633  divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1634  return Quotient;
1635}
1636
1637APInt APInt::udiv(uint64_t RHS) const {
1638  assert(RHS != 0 && "Divide by zero?");
1639
1640  // First, deal with the easy case
1641  if (isSingleWord())
1642    return APInt(BitWidth, U.VAL / RHS);
1643
1644  // Get some facts about the LHS words.
1645  unsigned lhsWords = getNumWords(getActiveBits());
1646
1647  // Deal with some degenerate cases
1648  if (!lhsWords)
1649    // 0 / X ===> 0
1650    return APInt(BitWidth, 0);
1651  if (RHS == 1)
1652    // X / 1 ===> X
1653    return *this;
1654  if (this->ult(RHS))
1655    // X / Y ===> 0, iff X < Y
1656    return APInt(BitWidth, 0);
1657  if (*this == RHS)
1658    // X / X ===> 1
1659    return APInt(BitWidth, 1);
1660  if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1661    // All high words are zero, just use native divide
1662    return APInt(BitWidth, this->U.pVal[0] / RHS);
1663
1664  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1665  APInt Quotient(BitWidth, 0); // to hold result.
1666  divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1667  return Quotient;
1668}
1669
1670APInt APInt::sdiv(const APInt &RHS) const {
1671  if (isNegative()) {
1672    if (RHS.isNegative())
1673      return (-(*this)).udiv(-RHS);
1674    return -((-(*this)).udiv(RHS));
1675  }
1676  if (RHS.isNegative())
1677    return -(this->udiv(-RHS));
1678  return this->udiv(RHS);
1679}
1680
1681APInt APInt::sdiv(int64_t RHS) const {
1682  if (isNegative()) {
1683    if (RHS < 0)
1684      return (-(*this)).udiv(-RHS);
1685    return -((-(*this)).udiv(RHS));
1686  }
1687  if (RHS < 0)
1688    return -(this->udiv(-RHS));
1689  return this->udiv(RHS);
1690}
1691
1692APInt APInt::urem(const APInt &RHS) const {
1693  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1694  if (isSingleWord()) {
1695    assert(RHS.U.VAL != 0 && "Remainder by zero?");
1696    return APInt(BitWidth, U.VAL % RHS.U.VAL);
1697  }
1698
1699  // Get some facts about the LHS
1700  unsigned lhsWords = getNumWords(getActiveBits());
1701
1702  // Get some facts about the RHS
1703  unsigned rhsBits = RHS.getActiveBits();
1704  unsigned rhsWords = getNumWords(rhsBits);
1705  assert(rhsWords && "Performing remainder operation by zero ???");
1706
1707  // Check the degenerate cases
1708  if (lhsWords == 0)
1709    // 0 % Y ===> 0
1710    return APInt(BitWidth, 0);
1711  if (rhsBits == 1)
1712    // X % 1 ===> 0
1713    return APInt(BitWidth, 0);
1714  if (lhsWords < rhsWords || this->ult(RHS))
1715    // X % Y ===> X, iff X < Y
1716    return *this;
1717  if (*this == RHS)
1718    // X % X == 0;
1719    return APInt(BitWidth, 0);
1720  if (lhsWords == 1)
1721    // All high words are zero, just use native remainder
1722    return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1723
1724  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1725  APInt Remainder(BitWidth, 0);
1726  divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1727  return Remainder;
1728}
1729
1730uint64_t APInt::urem(uint64_t RHS) const {
1731  assert(RHS != 0 && "Remainder by zero?");
1732
1733  if (isSingleWord())
1734    return U.VAL % RHS;
1735
1736  // Get some facts about the LHS
1737  unsigned lhsWords = getNumWords(getActiveBits());
1738
1739  // Check the degenerate cases
1740  if (lhsWords == 0)
1741    // 0 % Y ===> 0
1742    return 0;
1743  if (RHS == 1)
1744    // X % 1 ===> 0
1745    return 0;
1746  if (this->ult(RHS))
1747    // X % Y ===> X, iff X < Y
1748    return getZExtValue();
1749  if (*this == RHS)
1750    // X % X == 0;
1751    return 0;
1752  if (lhsWords == 1)
1753    // All high words are zero, just use native remainder
1754    return U.pVal[0] % RHS;
1755
1756  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1757  uint64_t Remainder;
1758  divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1759  return Remainder;
1760}
1761
1762APInt APInt::srem(const APInt &RHS) const {
1763  if (isNegative()) {
1764    if (RHS.isNegative())
1765      return -((-(*this)).urem(-RHS));
1766    return -((-(*this)).urem(RHS));
1767  }
1768  if (RHS.isNegative())
1769    return this->urem(-RHS);
1770  return this->urem(RHS);
1771}
1772
1773int64_t APInt::srem(int64_t RHS) const {
1774  if (isNegative()) {
1775    if (RHS < 0)
1776      return -((-(*this)).urem(-RHS));
1777    return -((-(*this)).urem(RHS));
1778  }
1779  if (RHS < 0)
1780    return this->urem(-RHS);
1781  return this->urem(RHS);
1782}
1783
1784void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1785                    APInt &Quotient, APInt &Remainder) {
1786  assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1787  unsigned BitWidth = LHS.BitWidth;
1788
1789  // First, deal with the easy case
1790  if (LHS.isSingleWord()) {
1791    assert(RHS.U.VAL != 0 && "Divide by zero?");
1792    uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1793    uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1794    Quotient = APInt(BitWidth, QuotVal);
1795    Remainder = APInt(BitWidth, RemVal);
1796    return;
1797  }
1798
1799  // Get some size facts about the dividend and divisor
1800  unsigned lhsWords = getNumWords(LHS.getActiveBits());
1801  unsigned rhsBits  = RHS.getActiveBits();
1802  unsigned rhsWords = getNumWords(rhsBits);
1803  assert(rhsWords && "Performing divrem operation by zero ???");
1804
1805  // Check the degenerate cases
1806  if (lhsWords == 0) {
1807    Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1808    Remainder = APInt(BitWidth, 0);   // 0 % Y ===> 0
1809    return;
1810  }
1811
1812  if (rhsBits == 1) {
1813    Quotient = LHS;                   // X / 1 ===> X
1814    Remainder = APInt(BitWidth, 0);   // X % 1 ===> 0
1815  }
1816
1817  if (lhsWords < rhsWords || LHS.ult(RHS)) {
1818    Remainder = LHS;                  // X % Y ===> X, iff X < Y
1819    Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1820    return;
1821  }
1822
1823  if (LHS == RHS) {
1824    Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1825    Remainder = APInt(BitWidth, 0);   // X % X ===> 0;
1826    return;
1827  }
1828
1829  // Make sure there is enough space to hold the results.
1830  // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1831  // change the size. This is necessary if Quotient or Remainder is aliased
1832  // with LHS or RHS.
1833  Quotient.reallocate(BitWidth);
1834  Remainder.reallocate(BitWidth);
1835
1836  if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1837    // There is only one word to consider so use the native versions.
1838    uint64_t lhsValue = LHS.U.pVal[0];
1839    uint64_t rhsValue = RHS.U.pVal[0];
1840    Quotient = lhsValue / rhsValue;
1841    Remainder = lhsValue % rhsValue;
1842    return;
1843  }
1844
1845  // Okay, lets do it the long way
1846  divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1847         Remainder.U.pVal);
1848  // Clear the rest of the Quotient and Remainder.
1849  std::memset(Quotient.U.pVal + lhsWords, 0,
1850              (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1851  std::memset(Remainder.U.pVal + rhsWords, 0,
1852              (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1853}
1854
1855void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1856                    uint64_t &Remainder) {
1857  assert(RHS != 0 && "Divide by zero?");
1858  unsigned BitWidth = LHS.BitWidth;
1859
1860  // First, deal with the easy case
1861  if (LHS.isSingleWord()) {
1862    uint64_t QuotVal = LHS.U.VAL / RHS;
1863    Remainder = LHS.U.VAL % RHS;
1864    Quotient = APInt(BitWidth, QuotVal);
1865    return;
1866  }
1867
1868  // Get some size facts about the dividend and divisor
1869  unsigned lhsWords = getNumWords(LHS.getActiveBits());
1870
1871  // Check the degenerate cases
1872  if (lhsWords == 0) {
1873    Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1874    Remainder = 0;                    // 0 % Y ===> 0
1875    return;
1876  }
1877
1878  if (RHS == 1) {
1879    Quotient = LHS;                   // X / 1 ===> X
1880    Remainder = 0;                    // X % 1 ===> 0
1881    return;
1882  }
1883
1884  if (LHS.ult(RHS)) {
1885    Remainder = LHS.getZExtValue();   // X % Y ===> X, iff X < Y
1886    Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1887    return;
1888  }
1889
1890  if (LHS == RHS) {
1891    Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1892    Remainder = 0;                    // X % X ===> 0;
1893    return;
1894  }
1895
1896  // Make sure there is enough space to hold the results.
1897  // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1898  // change the size. This is necessary if Quotient is aliased with LHS.
1899  Quotient.reallocate(BitWidth);
1900
1901  if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1902    // There is only one word to consider so use the native versions.
1903    uint64_t lhsValue = LHS.U.pVal[0];
1904    Quotient = lhsValue / RHS;
1905    Remainder = lhsValue % RHS;
1906    return;
1907  }
1908
1909  // Okay, lets do it the long way
1910  divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1911  // Clear the rest of the Quotient.
1912  std::memset(Quotient.U.pVal + lhsWords, 0,
1913              (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1914}
1915
1916void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1917                    APInt &Quotient, APInt &Remainder) {
1918  if (LHS.isNegative()) {
1919    if (RHS.isNegative())
1920      APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1921    else {
1922      APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1923      Quotient.negate();
1924    }
1925    Remainder.negate();
1926  } else if (RHS.isNegative()) {
1927    APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1928    Quotient.negate();
1929  } else {
1930    APInt::udivrem(LHS, RHS, Quotient, Remainder);
1931  }
1932}
1933
1934void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1935                    APInt &Quotient, int64_t &Remainder) {
1936  uint64_t R = Remainder;
1937  if (LHS.isNegative()) {
1938    if (RHS < 0)
1939      APInt::udivrem(-LHS, -RHS, Quotient, R);
1940    else {
1941      APInt::udivrem(-LHS, RHS, Quotient, R);
1942      Quotient.negate();
1943    }
1944    R = -R;
1945  } else if (RHS < 0) {
1946    APInt::udivrem(LHS, -RHS, Quotient, R);
1947    Quotient.negate();
1948  } else {
1949    APInt::udivrem(LHS, RHS, Quotient, R);
1950  }
1951  Remainder = R;
1952}
1953
1954APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1955  APInt Res = *this+RHS;
1956  Overflow = isNonNegative() == RHS.isNonNegative() &&
1957             Res.isNonNegative() != isNonNegative();
1958  return Res;
1959}
1960
1961APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1962  APInt Res = *this+RHS;
1963  Overflow = Res.ult(RHS);
1964  return Res;
1965}
1966
1967APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1968  APInt Res = *this - RHS;
1969  Overflow = isNonNegative() != RHS.isNonNegative() &&
1970             Res.isNonNegative() != isNonNegative();
1971  return Res;
1972}
1973
1974APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1975  APInt Res = *this-RHS;
1976  Overflow = Res.ugt(*this);
1977  return Res;
1978}
1979
1980APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1981  // MININT/-1  -->  overflow.
1982  Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1983  return sdiv(RHS);
1984}
1985
1986APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1987  APInt Res = *this * RHS;
1988
1989  if (*this != 0 && RHS != 0)
1990    Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1991  else
1992    Overflow = false;
1993  return Res;
1994}
1995
1996APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1997  if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
1998    Overflow = true;
1999    return *this * RHS;
2000  }
2001
2002  APInt Res = lshr(1) * RHS;
2003  Overflow = Res.isNegative();
2004  Res <<= 1;
2005  if ((*this)[0]) {
2006    Res += RHS;
2007    if (Res.ult(RHS))
2008      Overflow = true;
2009  }
2010  return Res;
2011}
2012
2013APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
2014  Overflow = ShAmt.uge(getBitWidth());
2015  if (Overflow)
2016    return APInt(BitWidth, 0);
2017
2018  if (isNonNegative()) // Don't allow sign change.
2019    Overflow = ShAmt.uge(countLeadingZeros());
2020  else
2021    Overflow = ShAmt.uge(countLeadingOnes());
2022
2023  return *this << ShAmt;
2024}
2025
2026APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
2027  Overflow = ShAmt.uge(getBitWidth());
2028  if (Overflow)
2029    return APInt(BitWidth, 0);
2030
2031  Overflow = ShAmt.ugt(countLeadingZeros());
2032
2033  return *this << ShAmt;
2034}
2035
2036APInt APInt::sadd_sat(const APInt &RHS) const {
2037  bool Overflow;
2038  APInt Res = sadd_ov(RHS, Overflow);
2039  if (!Overflow)
2040    return Res;
2041
2042  return isNegative() ? APInt::getSignedMinValue(BitWidth)
2043                      : APInt::getSignedMaxValue(BitWidth);
2044}
2045
2046APInt APInt::uadd_sat(const APInt &RHS) const {
2047  bool Overflow;
2048  APInt Res = uadd_ov(RHS, Overflow);
2049  if (!Overflow)
2050    return Res;
2051
2052  return APInt::getMaxValue(BitWidth);
2053}
2054
2055APInt APInt::ssub_sat(const APInt &RHS) const {
2056  bool Overflow;
2057  APInt Res = ssub_ov(RHS, Overflow);
2058  if (!Overflow)
2059    return Res;
2060
2061  return isNegative() ? APInt::getSignedMinValue(BitWidth)
2062                      : APInt::getSignedMaxValue(BitWidth);
2063}
2064
2065APInt APInt::usub_sat(const APInt &RHS) const {
2066  bool Overflow;
2067  APInt Res = usub_ov(RHS, Overflow);
2068  if (!Overflow)
2069    return Res;
2070
2071  return APInt(BitWidth, 0);
2072}
2073
2074APInt APInt::smul_sat(const APInt &RHS) const {
2075  bool Overflow;
2076  APInt Res = smul_ov(RHS, Overflow);
2077  if (!Overflow)
2078    return Res;
2079
2080  // The result is negative if one and only one of inputs is negative.
2081  bool ResIsNegative = isNegative() ^ RHS.isNegative();
2082
2083  return ResIsNegative ? APInt::getSignedMinValue(BitWidth)
2084                       : APInt::getSignedMaxValue(BitWidth);
2085}
2086
2087APInt APInt::umul_sat(const APInt &RHS) const {
2088  bool Overflow;
2089  APInt Res = umul_ov(RHS, Overflow);
2090  if (!Overflow)
2091    return Res;
2092
2093  return APInt::getMaxValue(BitWidth);
2094}
2095
2096APInt APInt::sshl_sat(const APInt &RHS) const {
2097  bool Overflow;
2098  APInt Res = sshl_ov(RHS, Overflow);
2099  if (!Overflow)
2100    return Res;
2101
2102  return isNegative() ? APInt::getSignedMinValue(BitWidth)
2103                      : APInt::getSignedMaxValue(BitWidth);
2104}
2105
2106APInt APInt::ushl_sat(const APInt &RHS) const {
2107  bool Overflow;
2108  APInt Res = ushl_ov(RHS, Overflow);
2109  if (!Overflow)
2110    return Res;
2111
2112  return APInt::getMaxValue(BitWidth);
2113}
2114
2115void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2116  // Check our assumptions here
2117  assert(!str.empty() && "Invalid string length");
2118  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
2119          radix == 36) &&
2120         "Radix should be 2, 8, 10, 16, or 36!");
2121
2122  StringRef::iterator p = str.begin();
2123  size_t slen = str.size();
2124  bool isNeg = *p == '-';
2125  if (*p == '-' || *p == '+') {
2126    p++;
2127    slen--;
2128    assert(slen && "String is only a sign, needs a value.");
2129  }
2130  assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2131  assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2132  assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2133  assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2134         "Insufficient bit width");
2135
2136  // Allocate memory if needed
2137  if (isSingleWord())
2138    U.VAL = 0;
2139  else
2140    U.pVal = getClearedMemory(getNumWords());
2141
2142  // Figure out if we can shift instead of multiply
2143  unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2144
2145  // Enter digit traversal loop
2146  for (StringRef::iterator e = str.end(); p != e; ++p) {
2147    unsigned digit = getDigit(*p, radix);
2148    assert(digit < radix && "Invalid character in digit string");
2149
2150    // Shift or multiply the value by the radix
2151    if (slen > 1) {
2152      if (shift)
2153        *this <<= shift;
2154      else
2155        *this *= radix;
2156    }
2157
2158    // Add in the digit we just interpreted
2159    *this += digit;
2160  }
2161  // If its negative, put it in two's complement form
2162  if (isNeg)
2163    this->negate();
2164}
2165
2166void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2167                     bool Signed, bool formatAsCLiteral) const {
2168  assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2169          Radix == 36) &&
2170         "Radix should be 2, 8, 10, 16, or 36!");
2171
2172  const char *Prefix = "";
2173  if (formatAsCLiteral) {
2174    switch (Radix) {
2175      case 2:
2176        // Binary literals are a non-standard extension added in gcc 4.3:
2177        // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2178        Prefix = "0b";
2179        break;
2180      case 8:
2181        Prefix = "0";
2182        break;
2183      case 10:
2184        break; // No prefix
2185      case 16:
2186        Prefix = "0x";
2187        break;
2188      default:
2189        llvm_unreachable("Invalid radix!");
2190    }
2191  }
2192
2193  // First, check for a zero value and just short circuit the logic below.
2194  if (*this == 0) {
2195    while (*Prefix) {
2196      Str.push_back(*Prefix);
2197      ++Prefix;
2198    };
2199    Str.push_back('0');
2200    return;
2201  }
2202
2203  static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2204
2205  if (isSingleWord()) {
2206    char Buffer[65];
2207    char *BufPtr = std::end(Buffer);
2208
2209    uint64_t N;
2210    if (!Signed) {
2211      N = getZExtValue();
2212    } else {
2213      int64_t I = getSExtValue();
2214      if (I >= 0) {
2215        N = I;
2216      } else {
2217        Str.push_back('-');
2218        N = -(uint64_t)I;
2219      }
2220    }
2221
2222    while (*Prefix) {
2223      Str.push_back(*Prefix);
2224      ++Prefix;
2225    };
2226
2227    while (N) {
2228      *--BufPtr = Digits[N % Radix];
2229      N /= Radix;
2230    }
2231    Str.append(BufPtr, std::end(Buffer));
2232    return;
2233  }
2234
2235  APInt Tmp(*this);
2236
2237  if (Signed && isNegative()) {
2238    // They want to print the signed version and it is a negative value
2239    // Flip the bits and add one to turn it into the equivalent positive
2240    // value and put a '-' in the result.
2241    Tmp.negate();
2242    Str.push_back('-');
2243  }
2244
2245  while (*Prefix) {
2246    Str.push_back(*Prefix);
2247    ++Prefix;
2248  };
2249
2250  // We insert the digits backward, then reverse them to get the right order.
2251  unsigned StartDig = Str.size();
2252
2253  // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2254  // because the number of bits per digit (1, 3 and 4 respectively) divides
2255  // equally.  We just shift until the value is zero.
2256  if (Radix == 2 || Radix == 8 || Radix == 16) {
2257    // Just shift tmp right for each digit width until it becomes zero
2258    unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2259    unsigned MaskAmt = Radix - 1;
2260
2261    while (Tmp.getBoolValue()) {
2262      unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2263      Str.push_back(Digits[Digit]);
2264      Tmp.lshrInPlace(ShiftAmt);
2265    }
2266  } else {
2267    while (Tmp.getBoolValue()) {
2268      uint64_t Digit;
2269      udivrem(Tmp, Radix, Tmp, Digit);
2270      assert(Digit < Radix && "divide failed");
2271      Str.push_back(Digits[Digit]);
2272    }
2273  }
2274
2275  // Reverse the digits before returning.
2276  std::reverse(Str.begin()+StartDig, Str.end());
2277}
2278
2279/// Returns the APInt as a std::string. Note that this is an inefficient method.
2280/// It is better to pass in a SmallVector/SmallString to the methods above.
2281std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2282  SmallString<40> S;
2283  toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2284  return std::string(S.str());
2285}
2286
2287#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2288LLVM_DUMP_METHOD void APInt::dump() const {
2289  SmallString<40> S, U;
2290  this->toStringUnsigned(U);
2291  this->toStringSigned(S);
2292  dbgs() << "APInt(" << BitWidth << "b, "
2293         << U << "u " << S << "s)\n";
2294}
2295#endif
2296
2297void APInt::print(raw_ostream &OS, bool isSigned) const {
2298  SmallString<40> S;
2299  this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2300  OS << S;
2301}
2302
2303// This implements a variety of operations on a representation of
2304// arbitrary precision, two's-complement, bignum integer values.
2305
2306// Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2307// and unrestricting assumption.
2308static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2309              "Part width must be divisible by 2!");
2310
2311/* Some handy functions local to this file.  */
2312
2313/* Returns the integer part with the least significant BITS set.
2314   BITS cannot be zero.  */
2315static inline APInt::WordType lowBitMask(unsigned bits) {
2316  assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2317
2318  return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2319}
2320
2321/* Returns the value of the lower half of PART.  */
2322static inline APInt::WordType lowHalf(APInt::WordType part) {
2323  return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2324}
2325
2326/* Returns the value of the upper half of PART.  */
2327static inline APInt::WordType highHalf(APInt::WordType part) {
2328  return part >> (APInt::APINT_BITS_PER_WORD / 2);
2329}
2330
2331/* Returns the bit number of the most significant set bit of a part.
2332   If the input number has no bits set -1U is returned.  */
2333static unsigned partMSB(APInt::WordType value) {
2334  return findLastSet(value, ZB_Max);
2335}
2336
2337/* Returns the bit number of the least significant set bit of a
2338   part.  If the input number has no bits set -1U is returned.  */
2339static unsigned partLSB(APInt::WordType value) {
2340  return findFirstSet(value, ZB_Max);
2341}
2342
2343/* Sets the least significant part of a bignum to the input value, and
2344   zeroes out higher parts.  */
2345void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2346  assert(parts > 0);
2347
2348  dst[0] = part;
2349  for (unsigned i = 1; i < parts; i++)
2350    dst[i] = 0;
2351}
2352
2353/* Assign one bignum to another.  */
2354void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2355  for (unsigned i = 0; i < parts; i++)
2356    dst[i] = src[i];
2357}
2358
2359/* Returns true if a bignum is zero, false otherwise.  */
2360bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2361  for (unsigned i = 0; i < parts; i++)
2362    if (src[i])
2363      return false;
2364
2365  return true;
2366}
2367
2368/* Extract the given bit of a bignum; returns 0 or 1.  */
2369int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2370  return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2371}
2372
2373/* Set the given bit of a bignum. */
2374void APInt::tcSetBit(WordType *parts, unsigned bit) {
2375  parts[whichWord(bit)] |= maskBit(bit);
2376}
2377
2378/* Clears the given bit of a bignum. */
2379void APInt::tcClearBit(WordType *parts, unsigned bit) {
2380  parts[whichWord(bit)] &= ~maskBit(bit);
2381}
2382
2383/* Returns the bit number of the least significant set bit of a
2384   number.  If the input number has no bits set -1U is returned.  */
2385unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2386  for (unsigned i = 0; i < n; i++) {
2387    if (parts[i] != 0) {
2388      unsigned lsb = partLSB(parts[i]);
2389
2390      return lsb + i * APINT_BITS_PER_WORD;
2391    }
2392  }
2393
2394  return -1U;
2395}
2396
2397/* Returns the bit number of the most significant set bit of a number.
2398   If the input number has no bits set -1U is returned.  */
2399unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2400  do {
2401    --n;
2402
2403    if (parts[n] != 0) {
2404      unsigned msb = partMSB(parts[n]);
2405
2406      return msb + n * APINT_BITS_PER_WORD;
2407    }
2408  } while (n);
2409
2410  return -1U;
2411}
2412
2413/* Copy the bit vector of width srcBITS from SRC, starting at bit
2414   srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2415   the least significant bit of DST.  All high bits above srcBITS in
2416   DST are zero-filled.  */
2417void
2418APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2419                 unsigned srcBits, unsigned srcLSB) {
2420  unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2421  assert(dstParts <= dstCount);
2422
2423  unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2424  tcAssign (dst, src + firstSrcPart, dstParts);
2425
2426  unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2427  tcShiftRight (dst, dstParts, shift);
2428
2429  /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2430     in DST.  If this is less that srcBits, append the rest, else
2431     clear the high bits.  */
2432  unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2433  if (n < srcBits) {
2434    WordType mask = lowBitMask (srcBits - n);
2435    dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2436                          << n % APINT_BITS_PER_WORD);
2437  } else if (n > srcBits) {
2438    if (srcBits % APINT_BITS_PER_WORD)
2439      dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2440  }
2441
2442  /* Clear high parts.  */
2443  while (dstParts < dstCount)
2444    dst[dstParts++] = 0;
2445}
2446
2447/* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2448APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2449                             WordType c, unsigned parts) {
2450  assert(c <= 1);
2451
2452  for (unsigned i = 0; i < parts; i++) {
2453    WordType l = dst[i];
2454    if (c) {
2455      dst[i] += rhs[i] + 1;
2456      c = (dst[i] <= l);
2457    } else {
2458      dst[i] += rhs[i];
2459      c = (dst[i] < l);
2460    }
2461  }
2462
2463  return c;
2464}
2465
2466/// This function adds a single "word" integer, src, to the multiple
2467/// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2468/// 1 is returned if there is a carry out, otherwise 0 is returned.
2469/// @returns the carry of the addition.
2470APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2471                                 unsigned parts) {
2472  for (unsigned i = 0; i < parts; ++i) {
2473    dst[i] += src;
2474    if (dst[i] >= src)
2475      return 0; // No need to carry so exit early.
2476    src = 1; // Carry one to next digit.
2477  }
2478
2479  return 1;
2480}
2481
2482/* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2483APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2484                                  WordType c, unsigned parts) {
2485  assert(c <= 1);
2486
2487  for (unsigned i = 0; i < parts; i++) {
2488    WordType l = dst[i];
2489    if (c) {
2490      dst[i] -= rhs[i] + 1;
2491      c = (dst[i] >= l);
2492    } else {
2493      dst[i] -= rhs[i];
2494      c = (dst[i] > l);
2495    }
2496  }
2497
2498  return c;
2499}
2500
2501/// This function subtracts a single "word" (64-bit word), src, from
2502/// the multi-word integer array, dst[], propagating the borrowed 1 value until
2503/// no further borrowing is needed or it runs out of "words" in dst.  The result
2504/// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2505/// exhausted. In other words, if src > dst then this function returns 1,
2506/// otherwise 0.
2507/// @returns the borrow out of the subtraction
2508APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2509                                      unsigned parts) {
2510  for (unsigned i = 0; i < parts; ++i) {
2511    WordType Dst = dst[i];
2512    dst[i] -= src;
2513    if (src <= Dst)
2514      return 0; // No need to borrow so exit early.
2515    src = 1; // We have to "borrow 1" from next "word"
2516  }
2517
2518  return 1;
2519}
2520
2521/* Negate a bignum in-place.  */
2522void APInt::tcNegate(WordType *dst, unsigned parts) {
2523  tcComplement(dst, parts);
2524  tcIncrement(dst, parts);
2525}
2526
2527/*  DST += SRC * MULTIPLIER + CARRY   if add is true
2528    DST  = SRC * MULTIPLIER + CARRY   if add is false
2529
2530    Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2531    they must start at the same point, i.e. DST == SRC.
2532
2533    If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2534    returned.  Otherwise DST is filled with the least significant
2535    DSTPARTS parts of the result, and if all of the omitted higher
2536    parts were zero return zero, otherwise overflow occurred and
2537    return one.  */
2538int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2539                          WordType multiplier, WordType carry,
2540                          unsigned srcParts, unsigned dstParts,
2541                          bool add) {
2542  /* Otherwise our writes of DST kill our later reads of SRC.  */
2543  assert(dst <= src || dst >= src + srcParts);
2544  assert(dstParts <= srcParts + 1);
2545
2546  /* N loops; minimum of dstParts and srcParts.  */
2547  unsigned n = std::min(dstParts, srcParts);
2548
2549  for (unsigned i = 0; i < n; i++) {
2550    WordType low, mid, high, srcPart;
2551
2552      /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2553
2554         This cannot overflow, because
2555
2556         (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2557
2558         which is less than n^2.  */
2559
2560    srcPart = src[i];
2561
2562    if (multiplier == 0 || srcPart == 0) {
2563      low = carry;
2564      high = 0;
2565    } else {
2566      low = lowHalf(srcPart) * lowHalf(multiplier);
2567      high = highHalf(srcPart) * highHalf(multiplier);
2568
2569      mid = lowHalf(srcPart) * highHalf(multiplier);
2570      high += highHalf(mid);
2571      mid <<= APINT_BITS_PER_WORD / 2;
2572      if (low + mid < low)
2573        high++;
2574      low += mid;
2575
2576      mid = highHalf(srcPart) * lowHalf(multiplier);
2577      high += highHalf(mid);
2578      mid <<= APINT_BITS_PER_WORD / 2;
2579      if (low + mid < low)
2580        high++;
2581      low += mid;
2582
2583      /* Now add carry.  */
2584      if (low + carry < low)
2585        high++;
2586      low += carry;
2587    }
2588
2589    if (add) {
2590      /* And now DST[i], and store the new low part there.  */
2591      if (low + dst[i] < low)
2592        high++;
2593      dst[i] += low;
2594    } else
2595      dst[i] = low;
2596
2597    carry = high;
2598  }
2599
2600  if (srcParts < dstParts) {
2601    /* Full multiplication, there is no overflow.  */
2602    assert(srcParts + 1 == dstParts);
2603    dst[srcParts] = carry;
2604    return 0;
2605  }
2606
2607  /* We overflowed if there is carry.  */
2608  if (carry)
2609    return 1;
2610
2611  /* We would overflow if any significant unwritten parts would be
2612     non-zero.  This is true if any remaining src parts are non-zero
2613     and the multiplier is non-zero.  */
2614  if (multiplier)
2615    for (unsigned i = dstParts; i < srcParts; i++)
2616      if (src[i])
2617        return 1;
2618
2619  /* We fitted in the narrow destination.  */
2620  return 0;
2621}
2622
2623/* DST = LHS * RHS, where DST has the same width as the operands and
2624   is filled with the least significant parts of the result.  Returns
2625   one if overflow occurred, otherwise zero.  DST must be disjoint
2626   from both operands.  */
2627int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2628                      const WordType *rhs, unsigned parts) {
2629  assert(dst != lhs && dst != rhs);
2630
2631  int overflow = 0;
2632  tcSet(dst, 0, parts);
2633
2634  for (unsigned i = 0; i < parts; i++)
2635    overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2636                               parts - i, true);
2637
2638  return overflow;
2639}
2640
2641/// DST = LHS * RHS, where DST has width the sum of the widths of the
2642/// operands. No overflow occurs. DST must be disjoint from both operands.
2643void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2644                           const WordType *rhs, unsigned lhsParts,
2645                           unsigned rhsParts) {
2646  /* Put the narrower number on the LHS for less loops below.  */
2647  if (lhsParts > rhsParts)
2648    return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2649
2650  assert(dst != lhs && dst != rhs);
2651
2652  tcSet(dst, 0, rhsParts);
2653
2654  for (unsigned i = 0; i < lhsParts; i++)
2655    tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2656}
2657
2658/* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2659   Otherwise set LHS to LHS / RHS with the fractional part discarded,
2660   set REMAINDER to the remainder, return zero.  i.e.
2661
2662   OLD_LHS = RHS * LHS + REMAINDER
2663
2664   SCRATCH is a bignum of the same size as the operands and result for
2665   use by the routine; its contents need not be initialized and are
2666   destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2667*/
2668int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2669                    WordType *remainder, WordType *srhs,
2670                    unsigned parts) {
2671  assert(lhs != remainder && lhs != srhs && remainder != srhs);
2672
2673  unsigned shiftCount = tcMSB(rhs, parts) + 1;
2674  if (shiftCount == 0)
2675    return true;
2676
2677  shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2678  unsigned n = shiftCount / APINT_BITS_PER_WORD;
2679  WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2680
2681  tcAssign(srhs, rhs, parts);
2682  tcShiftLeft(srhs, parts, shiftCount);
2683  tcAssign(remainder, lhs, parts);
2684  tcSet(lhs, 0, parts);
2685
2686  /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2687     the total.  */
2688  for (;;) {
2689    int compare = tcCompare(remainder, srhs, parts);
2690    if (compare >= 0) {
2691      tcSubtract(remainder, srhs, 0, parts);
2692      lhs[n] |= mask;
2693    }
2694
2695    if (shiftCount == 0)
2696      break;
2697    shiftCount--;
2698    tcShiftRight(srhs, parts, 1);
2699    if ((mask >>= 1) == 0) {
2700      mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2701      n--;
2702    }
2703  }
2704
2705  return false;
2706}
2707
2708/// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2709/// no restrictions on Count.
2710void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2711  // Don't bother performing a no-op shift.
2712  if (!Count)
2713    return;
2714
2715  // WordShift is the inter-part shift; BitShift is the intra-part shift.
2716  unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2717  unsigned BitShift = Count % APINT_BITS_PER_WORD;
2718
2719  // Fastpath for moving by whole words.
2720  if (BitShift == 0) {
2721    std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2722  } else {
2723    while (Words-- > WordShift) {
2724      Dst[Words] = Dst[Words - WordShift] << BitShift;
2725      if (Words > WordShift)
2726        Dst[Words] |=
2727          Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2728    }
2729  }
2730
2731  // Fill in the remainder with 0s.
2732  std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2733}
2734
2735/// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2736/// are no restrictions on Count.
2737void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2738  // Don't bother performing a no-op shift.
2739  if (!Count)
2740    return;
2741
2742  // WordShift is the inter-part shift; BitShift is the intra-part shift.
2743  unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2744  unsigned BitShift = Count % APINT_BITS_PER_WORD;
2745
2746  unsigned WordsToMove = Words - WordShift;
2747  // Fastpath for moving by whole words.
2748  if (BitShift == 0) {
2749    std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2750  } else {
2751    for (unsigned i = 0; i != WordsToMove; ++i) {
2752      Dst[i] = Dst[i + WordShift] >> BitShift;
2753      if (i + 1 != WordsToMove)
2754        Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2755    }
2756  }
2757
2758  // Fill in the remainder with 0s.
2759  std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2760}
2761
2762/* Bitwise and of two bignums.  */
2763void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2764  for (unsigned i = 0; i < parts; i++)
2765    dst[i] &= rhs[i];
2766}
2767
2768/* Bitwise inclusive or of two bignums.  */
2769void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2770  for (unsigned i = 0; i < parts; i++)
2771    dst[i] |= rhs[i];
2772}
2773
2774/* Bitwise exclusive or of two bignums.  */
2775void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2776  for (unsigned i = 0; i < parts; i++)
2777    dst[i] ^= rhs[i];
2778}
2779
2780/* Complement a bignum in-place.  */
2781void APInt::tcComplement(WordType *dst, unsigned parts) {
2782  for (unsigned i = 0; i < parts; i++)
2783    dst[i] = ~dst[i];
2784}
2785
2786/* Comparison (unsigned) of two bignums.  */
2787int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2788                     unsigned parts) {
2789  while (parts) {
2790    parts--;
2791    if (lhs[parts] != rhs[parts])
2792      return (lhs[parts] > rhs[parts]) ? 1 : -1;
2793  }
2794
2795  return 0;
2796}
2797
2798/* Set the least significant BITS bits of a bignum, clear the
2799   rest.  */
2800void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2801                                      unsigned bits) {
2802  unsigned i = 0;
2803  while (bits > APINT_BITS_PER_WORD) {
2804    dst[i++] = ~(WordType) 0;
2805    bits -= APINT_BITS_PER_WORD;
2806  }
2807
2808  if (bits)
2809    dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2810
2811  while (i < parts)
2812    dst[i++] = 0;
2813}
2814
2815APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2816                                   APInt::Rounding RM) {
2817  // Currently udivrem always rounds down.
2818  switch (RM) {
2819  case APInt::Rounding::DOWN:
2820  case APInt::Rounding::TOWARD_ZERO:
2821    return A.udiv(B);
2822  case APInt::Rounding::UP: {
2823    APInt Quo, Rem;
2824    APInt::udivrem(A, B, Quo, Rem);
2825    if (Rem == 0)
2826      return Quo;
2827    return Quo + 1;
2828  }
2829  }
2830  llvm_unreachable("Unknown APInt::Rounding enum");
2831}
2832
2833APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2834                                   APInt::Rounding RM) {
2835  switch (RM) {
2836  case APInt::Rounding::DOWN:
2837  case APInt::Rounding::UP: {
2838    APInt Quo, Rem;
2839    APInt::sdivrem(A, B, Quo, Rem);
2840    if (Rem == 0)
2841      return Quo;
2842    // This algorithm deals with arbitrary rounding mode used by sdivrem.
2843    // We want to check whether the non-integer part of the mathematical value
2844    // is negative or not. If the non-integer part is negative, we need to round
2845    // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2846    // already rounded down.
2847    if (RM == APInt::Rounding::DOWN) {
2848      if (Rem.isNegative() != B.isNegative())
2849        return Quo - 1;
2850      return Quo;
2851    }
2852    if (Rem.isNegative() != B.isNegative())
2853      return Quo;
2854    return Quo + 1;
2855  }
2856  // Currently sdiv rounds towards zero.
2857  case APInt::Rounding::TOWARD_ZERO:
2858    return A.sdiv(B);
2859  }
2860  llvm_unreachable("Unknown APInt::Rounding enum");
2861}
2862
2863Optional<APInt>
2864llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2865                                           unsigned RangeWidth) {
2866  unsigned CoeffWidth = A.getBitWidth();
2867  assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2868  assert(RangeWidth <= CoeffWidth &&
2869         "Value range width should be less than coefficient width");
2870  assert(RangeWidth > 1 && "Value range bit width should be > 1");
2871
2872  LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2873                    << "x + " << C << ", rw:" << RangeWidth << '\n');
2874
2875  // Identify 0 as a (non)solution immediately.
2876  if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
2877    LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2878    return APInt(CoeffWidth, 0);
2879  }
2880
2881  // The result of APInt arithmetic has the same bit width as the operands,
2882  // so it can actually lose high bits. A product of two n-bit integers needs
2883  // 2n-1 bits to represent the full value.
2884  // The operation done below (on quadratic coefficients) that can produce
2885  // the largest value is the evaluation of the equation during bisection,
2886  // which needs 3 times the bitwidth of the coefficient, so the total number
2887  // of required bits is 3n.
2888  //
2889  // The purpose of this extension is to simulate the set Z of all integers,
2890  // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2891  // and negative numbers (not so much in a modulo arithmetic). The method
2892  // used to solve the equation is based on the standard formula for real
2893  // numbers, and uses the concepts of "positive" and "negative" with their
2894  // usual meanings.
2895  CoeffWidth *= 3;
2896  A = A.sext(CoeffWidth);
2897  B = B.sext(CoeffWidth);
2898  C = C.sext(CoeffWidth);
2899
2900  // Make A > 0 for simplicity. Negate cannot overflow at this point because
2901  // the bit width has increased.
2902  if (A.isNegative()) {
2903    A.negate();
2904    B.negate();
2905    C.negate();
2906  }
2907
2908  // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2909  // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2910  // and R = 2^BitWidth.
2911  // Since we're trying not only to find exact solutions, but also values
2912  // that "wrap around", such a set will always have a solution, i.e. an x
2913  // that satisfies at least one of the equations, or such that |q(x)|
2914  // exceeds kR, while |q(x-1)| for the same k does not.
2915  //
2916  // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2917  // positive solution n (in the above sense), and also such that the n
2918  // will be the least among all solutions corresponding to k = 0, 1, ...
2919  // (more precisely, the least element in the set
2920  //   { n(k) | k is such that a solution n(k) exists }).
2921  //
2922  // Consider the parabola (over real numbers) that corresponds to the
2923  // quadratic equation. Since A > 0, the arms of the parabola will point
2924  // up. Picking different values of k will shift it up and down by R.
2925  //
2926  // We want to shift the parabola in such a way as to reduce the problem
2927  // of solving q(x) = kR to solving shifted_q(x) = 0.
2928  // (The interesting solutions are the ceilings of the real number
2929  // solutions.)
2930  APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2931  APInt TwoA = 2 * A;
2932  APInt SqrB = B * B;
2933  bool PickLow;
2934
2935  auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2936    assert(A.isStrictlyPositive());
2937    APInt T = V.abs().urem(A);
2938    if (T.isNullValue())
2939      return V;
2940    return V.isNegative() ? V+T : V+(A-T);
2941  };
2942
2943  // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2944  // iff B is positive.
2945  if (B.isNonNegative()) {
2946    // If B >= 0, the vertex it at a negative location (or at 0), so in
2947    // order to have a non-negative solution we need to pick k that makes
2948    // C-kR negative. To satisfy all the requirements for the solution
2949    // that we are looking for, it needs to be closest to 0 of all k.
2950    C = C.srem(R);
2951    if (C.isStrictlyPositive())
2952      C -= R;
2953    // Pick the greater solution.
2954    PickLow = false;
2955  } else {
2956    // If B < 0, the vertex is at a positive location. For any solution
2957    // to exist, the discriminant must be non-negative. This means that
2958    // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2959    // lower bound on values of k: kR >= C - B^2/4A.
2960    APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2961    // Round LowkR up (towards +inf) to the nearest kR.
2962    LowkR = RoundUp(LowkR, R);
2963
2964    // If there exists k meeting the condition above, and such that
2965    // C-kR > 0, there will be two positive real number solutions of
2966    // q(x) = kR. Out of all such values of k, pick the one that makes
2967    // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2968    // In other words, find maximum k such that LowkR <= kR < C.
2969    if (C.sgt(LowkR)) {
2970      // If LowkR < C, then such a k is guaranteed to exist because
2971      // LowkR itself is a multiple of R.
2972      C -= -RoundUp(-C, R);      // C = C - RoundDown(C, R)
2973      // Pick the smaller solution.
2974      PickLow = true;
2975    } else {
2976      // If C-kR < 0 for all potential k's, it means that one solution
2977      // will be negative, while the other will be positive. The positive
2978      // solution will shift towards 0 if the parabola is moved up.
2979      // Pick the kR closest to the lower bound (i.e. make C-kR closest
2980      // to 0, or in other words, out of all parabolas that have solutions,
2981      // pick the one that is the farthest "up").
2982      // Since LowkR is itself a multiple of R, simply take C-LowkR.
2983      C -= LowkR;
2984      // Pick the greater solution.
2985      PickLow = false;
2986    }
2987  }
2988
2989  LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2990                    << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2991
2992  APInt D = SqrB - 4*A*C;
2993  assert(D.isNonNegative() && "Negative discriminant");
2994  APInt SQ = D.sqrt();
2995
2996  APInt Q = SQ * SQ;
2997  bool InexactSQ = Q != D;
2998  // The calculated SQ may actually be greater than the exact (non-integer)
2999  // value. If that's the case, decrement SQ to get a value that is lower.
3000  if (Q.sgt(D))
3001    SQ -= 1;
3002
3003  APInt X;
3004  APInt Rem;
3005
3006  // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
3007  // When using the quadratic formula directly, the calculated low root
3008  // may be greater than the exact one, since we would be subtracting SQ.
3009  // To make sure that the calculated root is not greater than the exact
3010  // one, subtract SQ+1 when calculating the low root (for inexact value
3011  // of SQ).
3012  if (PickLow)
3013    APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
3014  else
3015    APInt::sdivrem(-B + SQ, TwoA, X, Rem);
3016
3017  // The updated coefficients should be such that the (exact) solution is
3018  // positive. Since APInt division rounds towards 0, the calculated one
3019  // can be 0, but cannot be negative.
3020  assert(X.isNonNegative() && "Solution should be non-negative");
3021
3022  if (!InexactSQ && Rem.isNullValue()) {
3023    LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
3024    return X;
3025  }
3026
3027  assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
3028  // The exact value of the square root of D should be between SQ and SQ+1.
3029  // This implies that the solution should be between that corresponding to
3030  // SQ (i.e. X) and that corresponding to SQ+1.
3031  //
3032  // The calculated X cannot be greater than the exact (real) solution.
3033  // Actually it must be strictly less than the exact solution, while
3034  // X+1 will be greater than or equal to it.
3035
3036  APInt VX = (A*X + B)*X + C;
3037  APInt VY = VX + TwoA*X + A + B;
3038  bool SignChange = VX.isNegative() != VY.isNegative() ||
3039                    VX.isNullValue() != VY.isNullValue();
3040  // If the sign did not change between X and X+1, X is not a valid solution.
3041  // This could happen when the actual (exact) roots don't have an integer
3042  // between them, so they would both be contained between X and X+1.
3043  if (!SignChange) {
3044    LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
3045    return None;
3046  }
3047
3048  X += 1;
3049  LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
3050  return X;
3051}
3052
3053Optional<unsigned>
3054llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) {
3055  assert(A.getBitWidth() == B.getBitWidth() && "Must have the same bitwidth");
3056  if (A == B)
3057    return llvm::None;
3058  return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1);
3059}
3060
3061/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
3062/// with the integer held in IntVal.
3063void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
3064                            unsigned StoreBytes) {
3065  assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
3066  const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
3067
3068  if (sys::IsLittleEndianHost) {
3069    // Little-endian host - the source is ordered from LSB to MSB.  Order the
3070    // destination from LSB to MSB: Do a straight copy.
3071    memcpy(Dst, Src, StoreBytes);
3072  } else {
3073    // Big-endian host - the source is an array of 64 bit words ordered from
3074    // LSW to MSW.  Each word is ordered from MSB to LSB.  Order the destination
3075    // from MSB to LSB: Reverse the word order, but not the bytes in a word.
3076    while (StoreBytes > sizeof(uint64_t)) {
3077      StoreBytes -= sizeof(uint64_t);
3078      // May not be aligned so use memcpy.
3079      memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
3080      Src += sizeof(uint64_t);
3081    }
3082
3083    memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
3084  }
3085}
3086
3087/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
3088/// from Src into IntVal, which is assumed to be wide enough and to hold zero.
3089void llvm::LoadIntFromMemory(APInt &IntVal, const uint8_t *Src,
3090                             unsigned LoadBytes) {
3091  assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
3092  uint8_t *Dst = reinterpret_cast<uint8_t *>(
3093                   const_cast<uint64_t *>(IntVal.getRawData()));
3094
3095  if (sys::IsLittleEndianHost)
3096    // Little-endian host - the destination must be ordered from LSB to MSB.
3097    // The source is ordered from LSB to MSB: Do a straight copy.
3098    memcpy(Dst, Src, LoadBytes);
3099  else {
3100    // Big-endian - the destination is an array of 64 bit words ordered from
3101    // LSW to MSW.  Each word must be ordered from MSB to LSB.  The source is
3102    // ordered from MSB to LSB: Reverse the word order, but not the bytes in
3103    // a word.
3104    while (LoadBytes > sizeof(uint64_t)) {
3105      LoadBytes -= sizeof(uint64_t);
3106      // May not be aligned so use memcpy.
3107      memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
3108      Dst += sizeof(uint64_t);
3109    }
3110
3111    memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
3112  }
3113}
3114