1//===-- APInt.cpp - Implement APInt class ---------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a class to represent arbitrary precision integer
10// constant values and provide a variety of arithmetic operations on them.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/APInt.h"
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/FoldingSet.h"
17#include "llvm/ADT/Hashing.h"
18#include "llvm/ADT/Optional.h"
19#include "llvm/ADT/SmallString.h"
20#include "llvm/ADT/StringRef.h"
21#include "llvm/ADT/bit.h"
22#include "llvm/Config/llvm-config.h"
23#include "llvm/Support/Debug.h"
24#include "llvm/Support/ErrorHandling.h"
25#include "llvm/Support/MathExtras.h"
26#include "llvm/Support/raw_ostream.h"
27#include <climits>
28#include <cmath>
29#include <cstdlib>
30#include <cstring>
31using namespace llvm;
32
33#define DEBUG_TYPE "apint"
34
35/// A utility function for allocating memory, checking for allocation failures,
36/// and ensuring the contents are zeroed.
37inline static uint64_t* getClearedMemory(unsigned numWords) {
38  uint64_t *result = new uint64_t[numWords];
39  memset(result, 0, numWords * sizeof(uint64_t));
40  return result;
41}
42
43/// A utility function for allocating memory and checking for allocation
44/// failure.  The content is not zeroed.
45inline static uint64_t* getMemory(unsigned numWords) {
46  return new uint64_t[numWords];
47}
48
49/// A utility function that converts a character to a digit.
50inline static unsigned getDigit(char cdigit, uint8_t radix) {
51  unsigned r;
52
53  if (radix == 16 || radix == 36) {
54    r = cdigit - '0';
55    if (r <= 9)
56      return r;
57
58    r = cdigit - 'A';
59    if (r <= radix - 11U)
60      return r + 10;
61
62    r = cdigit - 'a';
63    if (r <= radix - 11U)
64      return r + 10;
65
66    radix = 10;
67  }
68
69  r = cdigit - '0';
70  if (r < radix)
71    return r;
72
73  return -1U;
74}
75
76
77void APInt::initSlowCase(uint64_t val, bool isSigned) {
78  U.pVal = getClearedMemory(getNumWords());
79  U.pVal[0] = val;
80  if (isSigned && int64_t(val) < 0)
81    for (unsigned i = 1; i < getNumWords(); ++i)
82      U.pVal[i] = WORDTYPE_MAX;
83  clearUnusedBits();
84}
85
86void APInt::initSlowCase(const APInt& that) {
87  U.pVal = getMemory(getNumWords());
88  memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
89}
90
91void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
92  assert(BitWidth && "Bitwidth too small");
93  assert(bigVal.data() && "Null pointer detected!");
94  if (isSingleWord())
95    U.VAL = bigVal[0];
96  else {
97    // Get memory, cleared to 0
98    U.pVal = getClearedMemory(getNumWords());
99    // Calculate the number of words to copy
100    unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
101    // Copy the words from bigVal to pVal
102    memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
103  }
104  // Make sure unused high bits are cleared
105  clearUnusedBits();
106}
107
108APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
109  : BitWidth(numBits) {
110  initFromArray(bigVal);
111}
112
113APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
114  : BitWidth(numBits) {
115  initFromArray(makeArrayRef(bigVal, numWords));
116}
117
118APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
119  : BitWidth(numbits) {
120  assert(BitWidth && "Bitwidth too small");
121  fromString(numbits, Str, radix);
122}
123
124void APInt::reallocate(unsigned NewBitWidth) {
125  // If the number of words is the same we can just change the width and stop.
126  if (getNumWords() == getNumWords(NewBitWidth)) {
127    BitWidth = NewBitWidth;
128    return;
129  }
130
131  // If we have an allocation, delete it.
132  if (!isSingleWord())
133    delete [] U.pVal;
134
135  // Update BitWidth.
136  BitWidth = NewBitWidth;
137
138  // If we are supposed to have an allocation, create it.
139  if (!isSingleWord())
140    U.pVal = getMemory(getNumWords());
141}
142
143void APInt::AssignSlowCase(const APInt& RHS) {
144  // Don't do anything for X = X
145  if (this == &RHS)
146    return;
147
148  // Adjust the bit width and handle allocations as necessary.
149  reallocate(RHS.getBitWidth());
150
151  // Copy the data.
152  if (isSingleWord())
153    U.VAL = RHS.U.VAL;
154  else
155    memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
156}
157
158/// This method 'profiles' an APInt for use with FoldingSet.
159void APInt::Profile(FoldingSetNodeID& ID) const {
160  ID.AddInteger(BitWidth);
161
162  if (isSingleWord()) {
163    ID.AddInteger(U.VAL);
164    return;
165  }
166
167  unsigned NumWords = getNumWords();
168  for (unsigned i = 0; i < NumWords; ++i)
169    ID.AddInteger(U.pVal[i]);
170}
171
172/// Prefix increment operator. Increments the APInt by one.
173APInt& APInt::operator++() {
174  if (isSingleWord())
175    ++U.VAL;
176  else
177    tcIncrement(U.pVal, getNumWords());
178  return clearUnusedBits();
179}
180
181/// Prefix decrement operator. Decrements the APInt by one.
182APInt& APInt::operator--() {
183  if (isSingleWord())
184    --U.VAL;
185  else
186    tcDecrement(U.pVal, getNumWords());
187  return clearUnusedBits();
188}
189
190/// Adds the RHS APInt to this APInt.
191/// @returns this, after addition of RHS.
192/// Addition assignment operator.
193APInt& APInt::operator+=(const APInt& RHS) {
194  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
195  if (isSingleWord())
196    U.VAL += RHS.U.VAL;
197  else
198    tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
199  return clearUnusedBits();
200}
201
202APInt& APInt::operator+=(uint64_t RHS) {
203  if (isSingleWord())
204    U.VAL += RHS;
205  else
206    tcAddPart(U.pVal, RHS, getNumWords());
207  return clearUnusedBits();
208}
209
210/// Subtracts the RHS APInt from this APInt
211/// @returns this, after subtraction
212/// Subtraction assignment operator.
213APInt& APInt::operator-=(const APInt& RHS) {
214  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
215  if (isSingleWord())
216    U.VAL -= RHS.U.VAL;
217  else
218    tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
219  return clearUnusedBits();
220}
221
222APInt& APInt::operator-=(uint64_t RHS) {
223  if (isSingleWord())
224    U.VAL -= RHS;
225  else
226    tcSubtractPart(U.pVal, RHS, getNumWords());
227  return clearUnusedBits();
228}
229
230APInt APInt::operator*(const APInt& RHS) const {
231  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
232  if (isSingleWord())
233    return APInt(BitWidth, U.VAL * RHS.U.VAL);
234
235  APInt Result(getMemory(getNumWords()), getBitWidth());
236
237  tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
238
239  Result.clearUnusedBits();
240  return Result;
241}
242
243void APInt::AndAssignSlowCase(const APInt& RHS) {
244  tcAnd(U.pVal, RHS.U.pVal, getNumWords());
245}
246
247void APInt::OrAssignSlowCase(const APInt& RHS) {
248  tcOr(U.pVal, RHS.U.pVal, getNumWords());
249}
250
251void APInt::XorAssignSlowCase(const APInt& RHS) {
252  tcXor(U.pVal, RHS.U.pVal, getNumWords());
253}
254
255APInt& APInt::operator*=(const APInt& RHS) {
256  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
257  *this = *this * RHS;
258  return *this;
259}
260
261APInt& APInt::operator*=(uint64_t RHS) {
262  if (isSingleWord()) {
263    U.VAL *= RHS;
264  } else {
265    unsigned NumWords = getNumWords();
266    tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
267  }
268  return clearUnusedBits();
269}
270
271bool APInt::EqualSlowCase(const APInt& RHS) const {
272  return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
273}
274
275int APInt::compare(const APInt& RHS) const {
276  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
277  if (isSingleWord())
278    return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
279
280  return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
281}
282
283int APInt::compareSigned(const APInt& RHS) const {
284  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
285  if (isSingleWord()) {
286    int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
287    int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
288    return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
289  }
290
291  bool lhsNeg = isNegative();
292  bool rhsNeg = RHS.isNegative();
293
294  // If the sign bits don't match, then (LHS < RHS) if LHS is negative
295  if (lhsNeg != rhsNeg)
296    return lhsNeg ? -1 : 1;
297
298  // Otherwise we can just use an unsigned comparison, because even negative
299  // numbers compare correctly this way if both have the same signed-ness.
300  return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
301}
302
303void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
304  unsigned loWord = whichWord(loBit);
305  unsigned hiWord = whichWord(hiBit);
306
307  // Create an initial mask for the low word with zeros below loBit.
308  uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
309
310  // If hiBit is not aligned, we need a high mask.
311  unsigned hiShiftAmt = whichBit(hiBit);
312  if (hiShiftAmt != 0) {
313    // Create a high mask with zeros above hiBit.
314    uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
315    // If loWord and hiWord are equal, then we combine the masks. Otherwise,
316    // set the bits in hiWord.
317    if (hiWord == loWord)
318      loMask &= hiMask;
319    else
320      U.pVal[hiWord] |= hiMask;
321  }
322  // Apply the mask to the low word.
323  U.pVal[loWord] |= loMask;
324
325  // Fill any words between loWord and hiWord with all ones.
326  for (unsigned word = loWord + 1; word < hiWord; ++word)
327    U.pVal[word] = WORDTYPE_MAX;
328}
329
330/// Toggle every bit to its opposite value.
331void APInt::flipAllBitsSlowCase() {
332  tcComplement(U.pVal, getNumWords());
333  clearUnusedBits();
334}
335
336/// Toggle a given bit to its opposite value whose position is given
337/// as "bitPosition".
338/// Toggles a given bit to its opposite value.
339void APInt::flipBit(unsigned bitPosition) {
340  assert(bitPosition < BitWidth && "Out of the bit-width range!");
341  if ((*this)[bitPosition]) clearBit(bitPosition);
342  else setBit(bitPosition);
343}
344
345void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
346  unsigned subBitWidth = subBits.getBitWidth();
347  assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
348         "Illegal bit insertion");
349
350  // Insertion is a direct copy.
351  if (subBitWidth == BitWidth) {
352    *this = subBits;
353    return;
354  }
355
356  // Single word result can be done as a direct bitmask.
357  if (isSingleWord()) {
358    uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
359    U.VAL &= ~(mask << bitPosition);
360    U.VAL |= (subBits.U.VAL << bitPosition);
361    return;
362  }
363
364  unsigned loBit = whichBit(bitPosition);
365  unsigned loWord = whichWord(bitPosition);
366  unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
367
368  // Insertion within a single word can be done as a direct bitmask.
369  if (loWord == hi1Word) {
370    uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
371    U.pVal[loWord] &= ~(mask << loBit);
372    U.pVal[loWord] |= (subBits.U.VAL << loBit);
373    return;
374  }
375
376  // Insert on word boundaries.
377  if (loBit == 0) {
378    // Direct copy whole words.
379    unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
380    memcpy(U.pVal + loWord, subBits.getRawData(),
381           numWholeSubWords * APINT_WORD_SIZE);
382
383    // Mask+insert remaining bits.
384    unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
385    if (remainingBits != 0) {
386      uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
387      U.pVal[hi1Word] &= ~mask;
388      U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
389    }
390    return;
391  }
392
393  // General case - set/clear individual bits in dst based on src.
394  // TODO - there is scope for optimization here, but at the moment this code
395  // path is barely used so prefer readability over performance.
396  for (unsigned i = 0; i != subBitWidth; ++i) {
397    if (subBits[i])
398      setBit(bitPosition + i);
399    else
400      clearBit(bitPosition + i);
401  }
402}
403
404void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) {
405  uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
406  subBits &= maskBits;
407  if (isSingleWord()) {
408    U.VAL &= ~(maskBits << bitPosition);
409    U.VAL |= subBits << bitPosition;
410    return;
411  }
412
413  unsigned loBit = whichBit(bitPosition);
414  unsigned loWord = whichWord(bitPosition);
415  unsigned hiWord = whichWord(bitPosition + numBits - 1);
416  if (loWord == hiWord) {
417    U.pVal[loWord] &= ~(maskBits << loBit);
418    U.pVal[loWord] |= subBits << loBit;
419    return;
420  }
421
422  static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
423  unsigned wordBits = 8 * sizeof(WordType);
424  U.pVal[loWord] &= ~(maskBits << loBit);
425  U.pVal[loWord] |= subBits << loBit;
426
427  U.pVal[hiWord] &= ~(maskBits >> (wordBits - loBit));
428  U.pVal[hiWord] |= subBits >> (wordBits - loBit);
429}
430
431APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
432  assert(numBits > 0 && "Can't extract zero bits");
433  assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
434         "Illegal bit extraction");
435
436  if (isSingleWord())
437    return APInt(numBits, U.VAL >> bitPosition);
438
439  unsigned loBit = whichBit(bitPosition);
440  unsigned loWord = whichWord(bitPosition);
441  unsigned hiWord = whichWord(bitPosition + numBits - 1);
442
443  // Single word result extracting bits from a single word source.
444  if (loWord == hiWord)
445    return APInt(numBits, U.pVal[loWord] >> loBit);
446
447  // Extracting bits that start on a source word boundary can be done
448  // as a fast memory copy.
449  if (loBit == 0)
450    return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
451
452  // General case - shift + copy source words directly into place.
453  APInt Result(numBits, 0);
454  unsigned NumSrcWords = getNumWords();
455  unsigned NumDstWords = Result.getNumWords();
456
457  uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
458  for (unsigned word = 0; word < NumDstWords; ++word) {
459    uint64_t w0 = U.pVal[loWord + word];
460    uint64_t w1 =
461        (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
462    DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
463  }
464
465  return Result.clearUnusedBits();
466}
467
468uint64_t APInt::extractBitsAsZExtValue(unsigned numBits,
469                                       unsigned bitPosition) const {
470  assert(numBits > 0 && "Can't extract zero bits");
471  assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
472         "Illegal bit extraction");
473  assert(numBits <= 64 && "Illegal bit extraction");
474
475  uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
476  if (isSingleWord())
477    return (U.VAL >> bitPosition) & maskBits;
478
479  unsigned loBit = whichBit(bitPosition);
480  unsigned loWord = whichWord(bitPosition);
481  unsigned hiWord = whichWord(bitPosition + numBits - 1);
482  if (loWord == hiWord)
483    return (U.pVal[loWord] >> loBit) & maskBits;
484
485  static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
486  unsigned wordBits = 8 * sizeof(WordType);
487  uint64_t retBits = U.pVal[loWord] >> loBit;
488  retBits |= U.pVal[hiWord] << (wordBits - loBit);
489  retBits &= maskBits;
490  return retBits;
491}
492
493unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
494  assert(!str.empty() && "Invalid string length");
495  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
496          radix == 36) &&
497         "Radix should be 2, 8, 10, 16, or 36!");
498
499  size_t slen = str.size();
500
501  // Each computation below needs to know if it's negative.
502  StringRef::iterator p = str.begin();
503  unsigned isNegative = *p == '-';
504  if (*p == '-' || *p == '+') {
505    p++;
506    slen--;
507    assert(slen && "String is only a sign, needs a value.");
508  }
509
510  // For radixes of power-of-two values, the bits required is accurately and
511  // easily computed
512  if (radix == 2)
513    return slen + isNegative;
514  if (radix == 8)
515    return slen * 3 + isNegative;
516  if (radix == 16)
517    return slen * 4 + isNegative;
518
519  // FIXME: base 36
520
521  // This is grossly inefficient but accurate. We could probably do something
522  // with a computation of roughly slen*64/20 and then adjust by the value of
523  // the first few digits. But, I'm not sure how accurate that could be.
524
525  // Compute a sufficient number of bits that is always large enough but might
526  // be too large. This avoids the assertion in the constructor. This
527  // calculation doesn't work appropriately for the numbers 0-9, so just use 4
528  // bits in that case.
529  unsigned sufficient
530    = radix == 10? (slen == 1 ? 4 : slen * 64/18)
531                 : (slen == 1 ? 7 : slen * 16/3);
532
533  // Convert to the actual binary value.
534  APInt tmp(sufficient, StringRef(p, slen), radix);
535
536  // Compute how many bits are required. If the log is infinite, assume we need
537  // just bit. If the log is exact and value is negative, then the value is
538  // MinSignedValue with (log + 1) bits.
539  unsigned log = tmp.logBase2();
540  if (log == (unsigned)-1) {
541    return isNegative + 1;
542  } else if (isNegative && tmp.isPowerOf2()) {
543    return isNegative + log;
544  } else {
545    return isNegative + log + 1;
546  }
547}
548
549hash_code llvm::hash_value(const APInt &Arg) {
550  if (Arg.isSingleWord())
551    return hash_combine(Arg.U.VAL);
552
553  return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords());
554}
555
556bool APInt::isSplat(unsigned SplatSizeInBits) const {
557  assert(getBitWidth() % SplatSizeInBits == 0 &&
558         "SplatSizeInBits must divide width!");
559  // We can check that all parts of an integer are equal by making use of a
560  // little trick: rotate and check if it's still the same value.
561  return *this == rotl(SplatSizeInBits);
562}
563
564/// This function returns the high "numBits" bits of this APInt.
565APInt APInt::getHiBits(unsigned numBits) const {
566  return this->lshr(BitWidth - numBits);
567}
568
569/// This function returns the low "numBits" bits of this APInt.
570APInt APInt::getLoBits(unsigned numBits) const {
571  APInt Result(getLowBitsSet(BitWidth, numBits));
572  Result &= *this;
573  return Result;
574}
575
576/// Return a value containing V broadcasted over NewLen bits.
577APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
578  assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
579
580  APInt Val = V.zextOrSelf(NewLen);
581  for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
582    Val |= Val << I;
583
584  return Val;
585}
586
587unsigned APInt::countLeadingZerosSlowCase() const {
588  unsigned Count = 0;
589  for (int i = getNumWords()-1; i >= 0; --i) {
590    uint64_t V = U.pVal[i];
591    if (V == 0)
592      Count += APINT_BITS_PER_WORD;
593    else {
594      Count += llvm::countLeadingZeros(V);
595      break;
596    }
597  }
598  // Adjust for unused bits in the most significant word (they are zero).
599  unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
600  Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
601  return Count;
602}
603
604unsigned APInt::countLeadingOnesSlowCase() const {
605  unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
606  unsigned shift;
607  if (!highWordBits) {
608    highWordBits = APINT_BITS_PER_WORD;
609    shift = 0;
610  } else {
611    shift = APINT_BITS_PER_WORD - highWordBits;
612  }
613  int i = getNumWords() - 1;
614  unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
615  if (Count == highWordBits) {
616    for (i--; i >= 0; --i) {
617      if (U.pVal[i] == WORDTYPE_MAX)
618        Count += APINT_BITS_PER_WORD;
619      else {
620        Count += llvm::countLeadingOnes(U.pVal[i]);
621        break;
622      }
623    }
624  }
625  return Count;
626}
627
628unsigned APInt::countTrailingZerosSlowCase() const {
629  unsigned Count = 0;
630  unsigned i = 0;
631  for (; i < getNumWords() && U.pVal[i] == 0; ++i)
632    Count += APINT_BITS_PER_WORD;
633  if (i < getNumWords())
634    Count += llvm::countTrailingZeros(U.pVal[i]);
635  return std::min(Count, BitWidth);
636}
637
638unsigned APInt::countTrailingOnesSlowCase() const {
639  unsigned Count = 0;
640  unsigned i = 0;
641  for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
642    Count += APINT_BITS_PER_WORD;
643  if (i < getNumWords())
644    Count += llvm::countTrailingOnes(U.pVal[i]);
645  assert(Count <= BitWidth);
646  return Count;
647}
648
649unsigned APInt::countPopulationSlowCase() const {
650  unsigned Count = 0;
651  for (unsigned i = 0; i < getNumWords(); ++i)
652    Count += llvm::countPopulation(U.pVal[i]);
653  return Count;
654}
655
656bool APInt::intersectsSlowCase(const APInt &RHS) const {
657  for (unsigned i = 0, e = getNumWords(); i != e; ++i)
658    if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
659      return true;
660
661  return false;
662}
663
664bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
665  for (unsigned i = 0, e = getNumWords(); i != e; ++i)
666    if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
667      return false;
668
669  return true;
670}
671
672APInt APInt::byteSwap() const {
673  assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
674  if (BitWidth == 16)
675    return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
676  if (BitWidth == 32)
677    return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
678  if (BitWidth == 48) {
679    unsigned Tmp1 = unsigned(U.VAL >> 16);
680    Tmp1 = ByteSwap_32(Tmp1);
681    uint16_t Tmp2 = uint16_t(U.VAL);
682    Tmp2 = ByteSwap_16(Tmp2);
683    return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
684  }
685  if (BitWidth == 64)
686    return APInt(BitWidth, ByteSwap_64(U.VAL));
687
688  APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
689  for (unsigned I = 0, N = getNumWords(); I != N; ++I)
690    Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
691  if (Result.BitWidth != BitWidth) {
692    Result.lshrInPlace(Result.BitWidth - BitWidth);
693    Result.BitWidth = BitWidth;
694  }
695  return Result;
696}
697
698APInt APInt::reverseBits() const {
699  switch (BitWidth) {
700  case 64:
701    return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
702  case 32:
703    return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
704  case 16:
705    return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
706  case 8:
707    return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
708  default:
709    break;
710  }
711
712  APInt Val(*this);
713  APInt Reversed(BitWidth, 0);
714  unsigned S = BitWidth;
715
716  for (; Val != 0; Val.lshrInPlace(1)) {
717    Reversed <<= 1;
718    Reversed |= Val[0];
719    --S;
720  }
721
722  Reversed <<= S;
723  return Reversed;
724}
725
726APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
727  // Fast-path a common case.
728  if (A == B) return A;
729
730  // Corner cases: if either operand is zero, the other is the gcd.
731  if (!A) return B;
732  if (!B) return A;
733
734  // Count common powers of 2 and remove all other powers of 2.
735  unsigned Pow2;
736  {
737    unsigned Pow2_A = A.countTrailingZeros();
738    unsigned Pow2_B = B.countTrailingZeros();
739    if (Pow2_A > Pow2_B) {
740      A.lshrInPlace(Pow2_A - Pow2_B);
741      Pow2 = Pow2_B;
742    } else if (Pow2_B > Pow2_A) {
743      B.lshrInPlace(Pow2_B - Pow2_A);
744      Pow2 = Pow2_A;
745    } else {
746      Pow2 = Pow2_A;
747    }
748  }
749
750  // Both operands are odd multiples of 2^Pow_2:
751  //
752  //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
753  //
754  // This is a modified version of Stein's algorithm, taking advantage of
755  // efficient countTrailingZeros().
756  while (A != B) {
757    if (A.ugt(B)) {
758      A -= B;
759      A.lshrInPlace(A.countTrailingZeros() - Pow2);
760    } else {
761      B -= A;
762      B.lshrInPlace(B.countTrailingZeros() - Pow2);
763    }
764  }
765
766  return A;
767}
768
769APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
770  uint64_t I = bit_cast<uint64_t>(Double);
771
772  // Get the sign bit from the highest order bit
773  bool isNeg = I >> 63;
774
775  // Get the 11-bit exponent and adjust for the 1023 bit bias
776  int64_t exp = ((I >> 52) & 0x7ff) - 1023;
777
778  // If the exponent is negative, the value is < 0 so just return 0.
779  if (exp < 0)
780    return APInt(width, 0u);
781
782  // Extract the mantissa by clearing the top 12 bits (sign + exponent).
783  uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
784
785  // If the exponent doesn't shift all bits out of the mantissa
786  if (exp < 52)
787    return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
788                    APInt(width, mantissa >> (52 - exp));
789
790  // If the client didn't provide enough bits for us to shift the mantissa into
791  // then the result is undefined, just return 0
792  if (width <= exp - 52)
793    return APInt(width, 0);
794
795  // Otherwise, we have to shift the mantissa bits up to the right location
796  APInt Tmp(width, mantissa);
797  Tmp <<= (unsigned)exp - 52;
798  return isNeg ? -Tmp : Tmp;
799}
800
801/// This function converts this APInt to a double.
802/// The layout for double is as following (IEEE Standard 754):
803///  --------------------------------------
804/// |  Sign    Exponent    Fraction    Bias |
805/// |-------------------------------------- |
806/// |  1[63]   11[62-52]   52[51-00]   1023 |
807///  --------------------------------------
808double APInt::roundToDouble(bool isSigned) const {
809
810  // Handle the simple case where the value is contained in one uint64_t.
811  // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
812  if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
813    if (isSigned) {
814      int64_t sext = SignExtend64(getWord(0), BitWidth);
815      return double(sext);
816    } else
817      return double(getWord(0));
818  }
819
820  // Determine if the value is negative.
821  bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
822
823  // Construct the absolute value if we're negative.
824  APInt Tmp(isNeg ? -(*this) : (*this));
825
826  // Figure out how many bits we're using.
827  unsigned n = Tmp.getActiveBits();
828
829  // The exponent (without bias normalization) is just the number of bits
830  // we are using. Note that the sign bit is gone since we constructed the
831  // absolute value.
832  uint64_t exp = n;
833
834  // Return infinity for exponent overflow
835  if (exp > 1023) {
836    if (!isSigned || !isNeg)
837      return std::numeric_limits<double>::infinity();
838    else
839      return -std::numeric_limits<double>::infinity();
840  }
841  exp += 1023; // Increment for 1023 bias
842
843  // Number of bits in mantissa is 52. To obtain the mantissa value, we must
844  // extract the high 52 bits from the correct words in pVal.
845  uint64_t mantissa;
846  unsigned hiWord = whichWord(n-1);
847  if (hiWord == 0) {
848    mantissa = Tmp.U.pVal[0];
849    if (n > 52)
850      mantissa >>= n - 52; // shift down, we want the top 52 bits.
851  } else {
852    assert(hiWord > 0 && "huh?");
853    uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
854    uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
855    mantissa = hibits | lobits;
856  }
857
858  // The leading bit of mantissa is implicit, so get rid of it.
859  uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
860  uint64_t I = sign | (exp << 52) | mantissa;
861  return bit_cast<double>(I);
862}
863
864// Truncate to new width.
865APInt APInt::trunc(unsigned width) const {
866  assert(width < BitWidth && "Invalid APInt Truncate request");
867  assert(width && "Can't truncate to 0 bits");
868
869  if (width <= APINT_BITS_PER_WORD)
870    return APInt(width, getRawData()[0]);
871
872  APInt Result(getMemory(getNumWords(width)), width);
873
874  // Copy full words.
875  unsigned i;
876  for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
877    Result.U.pVal[i] = U.pVal[i];
878
879  // Truncate and copy any partial word.
880  unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
881  if (bits != 0)
882    Result.U.pVal[i] = U.pVal[i] << bits >> bits;
883
884  return Result;
885}
886
887// Truncate to new width with unsigned saturation.
888APInt APInt::truncUSat(unsigned width) const {
889  assert(width < BitWidth && "Invalid APInt Truncate request");
890  assert(width && "Can't truncate to 0 bits");
891
892  // Can we just losslessly truncate it?
893  if (isIntN(width))
894    return trunc(width);
895  // If not, then just return the new limit.
896  return APInt::getMaxValue(width);
897}
898
899// Truncate to new width with signed saturation.
900APInt APInt::truncSSat(unsigned width) const {
901  assert(width < BitWidth && "Invalid APInt Truncate request");
902  assert(width && "Can't truncate to 0 bits");
903
904  // Can we just losslessly truncate it?
905  if (isSignedIntN(width))
906    return trunc(width);
907  // If not, then just return the new limits.
908  return isNegative() ? APInt::getSignedMinValue(width)
909                      : APInt::getSignedMaxValue(width);
910}
911
912// Sign extend to a new width.
913APInt APInt::sext(unsigned Width) const {
914  assert(Width > BitWidth && "Invalid APInt SignExtend request");
915
916  if (Width <= APINT_BITS_PER_WORD)
917    return APInt(Width, SignExtend64(U.VAL, BitWidth));
918
919  APInt Result(getMemory(getNumWords(Width)), Width);
920
921  // Copy words.
922  std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
923
924  // Sign extend the last word since there may be unused bits in the input.
925  Result.U.pVal[getNumWords() - 1] =
926      SignExtend64(Result.U.pVal[getNumWords() - 1],
927                   ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
928
929  // Fill with sign bits.
930  std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
931              (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
932  Result.clearUnusedBits();
933  return Result;
934}
935
936//  Zero extend to a new width.
937APInt APInt::zext(unsigned width) const {
938  assert(width > BitWidth && "Invalid APInt ZeroExtend request");
939
940  if (width <= APINT_BITS_PER_WORD)
941    return APInt(width, U.VAL);
942
943  APInt Result(getMemory(getNumWords(width)), width);
944
945  // Copy words.
946  std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
947
948  // Zero remaining words.
949  std::memset(Result.U.pVal + getNumWords(), 0,
950              (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
951
952  return Result;
953}
954
955APInt APInt::zextOrTrunc(unsigned width) const {
956  if (BitWidth < width)
957    return zext(width);
958  if (BitWidth > width)
959    return trunc(width);
960  return *this;
961}
962
963APInt APInt::sextOrTrunc(unsigned width) const {
964  if (BitWidth < width)
965    return sext(width);
966  if (BitWidth > width)
967    return trunc(width);
968  return *this;
969}
970
971APInt APInt::zextOrSelf(unsigned width) const {
972  if (BitWidth < width)
973    return zext(width);
974  return *this;
975}
976
977APInt APInt::sextOrSelf(unsigned width) const {
978  if (BitWidth < width)
979    return sext(width);
980  return *this;
981}
982
983/// Arithmetic right-shift this APInt by shiftAmt.
984/// Arithmetic right-shift function.
985void APInt::ashrInPlace(const APInt &shiftAmt) {
986  ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
987}
988
989/// Arithmetic right-shift this APInt by shiftAmt.
990/// Arithmetic right-shift function.
991void APInt::ashrSlowCase(unsigned ShiftAmt) {
992  // Don't bother performing a no-op shift.
993  if (!ShiftAmt)
994    return;
995
996  // Save the original sign bit for later.
997  bool Negative = isNegative();
998
999  // WordShift is the inter-part shift; BitShift is intra-part shift.
1000  unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
1001  unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
1002
1003  unsigned WordsToMove = getNumWords() - WordShift;
1004  if (WordsToMove != 0) {
1005    // Sign extend the last word to fill in the unused bits.
1006    U.pVal[getNumWords() - 1] = SignExtend64(
1007        U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
1008
1009    // Fastpath for moving by whole words.
1010    if (BitShift == 0) {
1011      std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
1012    } else {
1013      // Move the words containing significant bits.
1014      for (unsigned i = 0; i != WordsToMove - 1; ++i)
1015        U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
1016                    (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
1017
1018      // Handle the last word which has no high bits to copy.
1019      U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
1020      // Sign extend one more time.
1021      U.pVal[WordsToMove - 1] =
1022          SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
1023    }
1024  }
1025
1026  // Fill in the remainder based on the original sign.
1027  std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
1028              WordShift * APINT_WORD_SIZE);
1029  clearUnusedBits();
1030}
1031
1032/// Logical right-shift this APInt by shiftAmt.
1033/// Logical right-shift function.
1034void APInt::lshrInPlace(const APInt &shiftAmt) {
1035  lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
1036}
1037
1038/// Logical right-shift this APInt by shiftAmt.
1039/// Logical right-shift function.
1040void APInt::lshrSlowCase(unsigned ShiftAmt) {
1041  tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
1042}
1043
1044/// Left-shift this APInt by shiftAmt.
1045/// Left-shift function.
1046APInt &APInt::operator<<=(const APInt &shiftAmt) {
1047  // It's undefined behavior in C to shift by BitWidth or greater.
1048  *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
1049  return *this;
1050}
1051
1052void APInt::shlSlowCase(unsigned ShiftAmt) {
1053  tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
1054  clearUnusedBits();
1055}
1056
1057// Calculate the rotate amount modulo the bit width.
1058static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
1059  unsigned rotBitWidth = rotateAmt.getBitWidth();
1060  APInt rot = rotateAmt;
1061  if (rotBitWidth < BitWidth) {
1062    // Extend the rotate APInt, so that the urem doesn't divide by 0.
1063    // e.g. APInt(1, 32) would give APInt(1, 0).
1064    rot = rotateAmt.zext(BitWidth);
1065  }
1066  rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
1067  return rot.getLimitedValue(BitWidth);
1068}
1069
1070APInt APInt::rotl(const APInt &rotateAmt) const {
1071  return rotl(rotateModulo(BitWidth, rotateAmt));
1072}
1073
1074APInt APInt::rotl(unsigned rotateAmt) const {
1075  rotateAmt %= BitWidth;
1076  if (rotateAmt == 0)
1077    return *this;
1078  return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1079}
1080
1081APInt APInt::rotr(const APInt &rotateAmt) const {
1082  return rotr(rotateModulo(BitWidth, rotateAmt));
1083}
1084
1085APInt APInt::rotr(unsigned rotateAmt) const {
1086  rotateAmt %= BitWidth;
1087  if (rotateAmt == 0)
1088    return *this;
1089  return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1090}
1091
1092// Square Root - this method computes and returns the square root of "this".
1093// Three mechanisms are used for computation. For small values (<= 5 bits),
1094// a table lookup is done. This gets some performance for common cases. For
1095// values using less than 52 bits, the value is converted to double and then
1096// the libc sqrt function is called. The result is rounded and then converted
1097// back to a uint64_t which is then used to construct the result. Finally,
1098// the Babylonian method for computing square roots is used.
1099APInt APInt::sqrt() const {
1100
1101  // Determine the magnitude of the value.
1102  unsigned magnitude = getActiveBits();
1103
1104  // Use a fast table for some small values. This also gets rid of some
1105  // rounding errors in libc sqrt for small values.
1106  if (magnitude <= 5) {
1107    static const uint8_t results[32] = {
1108      /*     0 */ 0,
1109      /*  1- 2 */ 1, 1,
1110      /*  3- 6 */ 2, 2, 2, 2,
1111      /*  7-12 */ 3, 3, 3, 3, 3, 3,
1112      /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1113      /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1114      /*    31 */ 6
1115    };
1116    return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1117  }
1118
1119  // If the magnitude of the value fits in less than 52 bits (the precision of
1120  // an IEEE double precision floating point value), then we can use the
1121  // libc sqrt function which will probably use a hardware sqrt computation.
1122  // This should be faster than the algorithm below.
1123  if (magnitude < 52) {
1124    return APInt(BitWidth,
1125                 uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1126                                                               : U.pVal[0])))));
1127  }
1128
1129  // Okay, all the short cuts are exhausted. We must compute it. The following
1130  // is a classical Babylonian method for computing the square root. This code
1131  // was adapted to APInt from a wikipedia article on such computations.
1132  // See http://www.wikipedia.org/ and go to the page named
1133  // Calculate_an_integer_square_root.
1134  unsigned nbits = BitWidth, i = 4;
1135  APInt testy(BitWidth, 16);
1136  APInt x_old(BitWidth, 1);
1137  APInt x_new(BitWidth, 0);
1138  APInt two(BitWidth, 2);
1139
1140  // Select a good starting value using binary logarithms.
1141  for (;; i += 2, testy = testy.shl(2))
1142    if (i >= nbits || this->ule(testy)) {
1143      x_old = x_old.shl(i / 2);
1144      break;
1145    }
1146
1147  // Use the Babylonian method to arrive at the integer square root:
1148  for (;;) {
1149    x_new = (this->udiv(x_old) + x_old).udiv(two);
1150    if (x_old.ule(x_new))
1151      break;
1152    x_old = x_new;
1153  }
1154
1155  // Make sure we return the closest approximation
1156  // NOTE: The rounding calculation below is correct. It will produce an
1157  // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1158  // determined to be a rounding issue with pari/gp as it begins to use a
1159  // floating point representation after 192 bits. There are no discrepancies
1160  // between this algorithm and pari/gp for bit widths < 192 bits.
1161  APInt square(x_old * x_old);
1162  APInt nextSquare((x_old + 1) * (x_old +1));
1163  if (this->ult(square))
1164    return x_old;
1165  assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1166  APInt midpoint((nextSquare - square).udiv(two));
1167  APInt offset(*this - square);
1168  if (offset.ult(midpoint))
1169    return x_old;
1170  return x_old + 1;
1171}
1172
1173/// Computes the multiplicative inverse of this APInt for a given modulo. The
1174/// iterative extended Euclidean algorithm is used to solve for this value,
1175/// however we simplify it to speed up calculating only the inverse, and take
1176/// advantage of div+rem calculations. We also use some tricks to avoid copying
1177/// (potentially large) APInts around.
1178/// WARNING: a value of '0' may be returned,
1179///          signifying that no multiplicative inverse exists!
1180APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1181  assert(ult(modulo) && "This APInt must be smaller than the modulo");
1182
1183  // Using the properties listed at the following web page (accessed 06/21/08):
1184  //   http://www.numbertheory.org/php/euclid.html
1185  // (especially the properties numbered 3, 4 and 9) it can be proved that
1186  // BitWidth bits suffice for all the computations in the algorithm implemented
1187  // below. More precisely, this number of bits suffice if the multiplicative
1188  // inverse exists, but may not suffice for the general extended Euclidean
1189  // algorithm.
1190
1191  APInt r[2] = { modulo, *this };
1192  APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1193  APInt q(BitWidth, 0);
1194
1195  unsigned i;
1196  for (i = 0; r[i^1] != 0; i ^= 1) {
1197    // An overview of the math without the confusing bit-flipping:
1198    // q = r[i-2] / r[i-1]
1199    // r[i] = r[i-2] % r[i-1]
1200    // t[i] = t[i-2] - t[i-1] * q
1201    udivrem(r[i], r[i^1], q, r[i]);
1202    t[i] -= t[i^1] * q;
1203  }
1204
1205  // If this APInt and the modulo are not coprime, there is no multiplicative
1206  // inverse, so return 0. We check this by looking at the next-to-last
1207  // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1208  // algorithm.
1209  if (r[i] != 1)
1210    return APInt(BitWidth, 0);
1211
1212  // The next-to-last t is the multiplicative inverse.  However, we are
1213  // interested in a positive inverse. Calculate a positive one from a negative
1214  // one if necessary. A simple addition of the modulo suffices because
1215  // abs(t[i]) is known to be less than *this/2 (see the link above).
1216  if (t[i].isNegative())
1217    t[i] += modulo;
1218
1219  return std::move(t[i]);
1220}
1221
1222/// Calculate the magic numbers required to implement a signed integer division
1223/// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1224/// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1225/// Warren, Jr., chapter 10.
1226APInt::ms APInt::magic() const {
1227  const APInt& d = *this;
1228  unsigned p;
1229  APInt ad, anc, delta, q1, r1, q2, r2, t;
1230  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1231  struct ms mag;
1232
1233  ad = d.abs();
1234  t = signedMin + (d.lshr(d.getBitWidth() - 1));
1235  anc = t - 1 - t.urem(ad);   // absolute value of nc
1236  p = d.getBitWidth() - 1;    // initialize p
1237  q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1238  r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1239  q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1240  r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1241  do {
1242    p = p + 1;
1243    q1 = q1<<1;          // update q1 = 2p/abs(nc)
1244    r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1245    if (r1.uge(anc)) {  // must be unsigned comparison
1246      q1 = q1 + 1;
1247      r1 = r1 - anc;
1248    }
1249    q2 = q2<<1;          // update q2 = 2p/abs(d)
1250    r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1251    if (r2.uge(ad)) {   // must be unsigned comparison
1252      q2 = q2 + 1;
1253      r2 = r2 - ad;
1254    }
1255    delta = ad - r2;
1256  } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1257
1258  mag.m = q2 + 1;
1259  if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1260  mag.s = p - d.getBitWidth();          // resulting shift
1261  return mag;
1262}
1263
1264/// Calculate the magic numbers required to implement an unsigned integer
1265/// division by a constant as a sequence of multiplies, adds and shifts.
1266/// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1267/// S. Warren, Jr., chapter 10.
1268/// LeadingZeros can be used to simplify the calculation if the upper bits
1269/// of the divided value are known zero.
1270APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1271  const APInt& d = *this;
1272  unsigned p;
1273  APInt nc, delta, q1, r1, q2, r2;
1274  struct mu magu;
1275  magu.a = 0;               // initialize "add" indicator
1276  APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1277  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1278  APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1279
1280  nc = allOnes - (allOnes - d).urem(d);
1281  p = d.getBitWidth() - 1;  // initialize p
1282  q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1283  r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1284  q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1285  r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1286  do {
1287    p = p + 1;
1288    if (r1.uge(nc - r1)) {
1289      q1 = q1 + q1 + 1;  // update q1
1290      r1 = r1 + r1 - nc; // update r1
1291    }
1292    else {
1293      q1 = q1+q1; // update q1
1294      r1 = r1+r1; // update r1
1295    }
1296    if ((r2 + 1).uge(d - r2)) {
1297      if (q2.uge(signedMax)) magu.a = 1;
1298      q2 = q2+q2 + 1;     // update q2
1299      r2 = r2+r2 + 1 - d; // update r2
1300    }
1301    else {
1302      if (q2.uge(signedMin)) magu.a = 1;
1303      q2 = q2+q2;     // update q2
1304      r2 = r2+r2 + 1; // update r2
1305    }
1306    delta = d - 1 - r2;
1307  } while (p < d.getBitWidth()*2 &&
1308           (q1.ult(delta) || (q1 == delta && r1 == 0)));
1309  magu.m = q2 + 1; // resulting magic number
1310  magu.s = p - d.getBitWidth();  // resulting shift
1311  return magu;
1312}
1313
1314/// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1315/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1316/// variables here have the same names as in the algorithm. Comments explain
1317/// the algorithm and any deviation from it.
1318static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1319                     unsigned m, unsigned n) {
1320  assert(u && "Must provide dividend");
1321  assert(v && "Must provide divisor");
1322  assert(q && "Must provide quotient");
1323  assert(u != v && u != q && v != q && "Must use different memory");
1324  assert(n>1 && "n must be > 1");
1325
1326  // b denotes the base of the number system. In our case b is 2^32.
1327  const uint64_t b = uint64_t(1) << 32;
1328
1329// The DEBUG macros here tend to be spam in the debug output if you're not
1330// debugging this code. Disable them unless KNUTH_DEBUG is defined.
1331#ifdef KNUTH_DEBUG
1332#define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1333#else
1334#define DEBUG_KNUTH(X) do {} while(false)
1335#endif
1336
1337  DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1338  DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1339  DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1340  DEBUG_KNUTH(dbgs() << " by");
1341  DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1342  DEBUG_KNUTH(dbgs() << '\n');
1343  // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1344  // u and v by d. Note that we have taken Knuth's advice here to use a power
1345  // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1346  // 2 allows us to shift instead of multiply and it is easy to determine the
1347  // shift amount from the leading zeros.  We are basically normalizing the u
1348  // and v so that its high bits are shifted to the top of v's range without
1349  // overflow. Note that this can require an extra word in u so that u must
1350  // be of length m+n+1.
1351  unsigned shift = countLeadingZeros(v[n-1]);
1352  uint32_t v_carry = 0;
1353  uint32_t u_carry = 0;
1354  if (shift) {
1355    for (unsigned i = 0; i < m+n; ++i) {
1356      uint32_t u_tmp = u[i] >> (32 - shift);
1357      u[i] = (u[i] << shift) | u_carry;
1358      u_carry = u_tmp;
1359    }
1360    for (unsigned i = 0; i < n; ++i) {
1361      uint32_t v_tmp = v[i] >> (32 - shift);
1362      v[i] = (v[i] << shift) | v_carry;
1363      v_carry = v_tmp;
1364    }
1365  }
1366  u[m+n] = u_carry;
1367
1368  DEBUG_KNUTH(dbgs() << "KnuthDiv:   normal:");
1369  DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1370  DEBUG_KNUTH(dbgs() << " by");
1371  DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1372  DEBUG_KNUTH(dbgs() << '\n');
1373
1374  // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1375  int j = m;
1376  do {
1377    DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1378    // D3. [Calculate q'.].
1379    //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1380    //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1381    // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1382    // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1383    // on v[n-2] determines at high speed most of the cases in which the trial
1384    // value qp is one too large, and it eliminates all cases where qp is two
1385    // too large.
1386    uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1387    DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1388    uint64_t qp = dividend / v[n-1];
1389    uint64_t rp = dividend % v[n-1];
1390    if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1391      qp--;
1392      rp += v[n-1];
1393      if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1394        qp--;
1395    }
1396    DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1397
1398    // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1399    // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1400    // consists of a simple multiplication by a one-place number, combined with
1401    // a subtraction.
1402    // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1403    // this step is actually negative, (u[j+n]...u[j]) should be left as the
1404    // true value plus b**(n+1), namely as the b's complement of
1405    // the true value, and a "borrow" to the left should be remembered.
1406    int64_t borrow = 0;
1407    for (unsigned i = 0; i < n; ++i) {
1408      uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1409      int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1410      u[j+i] = Lo_32(subres);
1411      borrow = Hi_32(p) - Hi_32(subres);
1412      DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1413                        << ", borrow = " << borrow << '\n');
1414    }
1415    bool isNeg = u[j+n] < borrow;
1416    u[j+n] -= Lo_32(borrow);
1417
1418    DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1419    DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1420    DEBUG_KNUTH(dbgs() << '\n');
1421
1422    // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1423    // negative, go to step D6; otherwise go on to step D7.
1424    q[j] = Lo_32(qp);
1425    if (isNeg) {
1426      // D6. [Add back]. The probability that this step is necessary is very
1427      // small, on the order of only 2/b. Make sure that test data accounts for
1428      // this possibility. Decrease q[j] by 1
1429      q[j]--;
1430      // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1431      // A carry will occur to the left of u[j+n], and it should be ignored
1432      // since it cancels with the borrow that occurred in D4.
1433      bool carry = false;
1434      for (unsigned i = 0; i < n; i++) {
1435        uint32_t limit = std::min(u[j+i],v[i]);
1436        u[j+i] += v[i] + carry;
1437        carry = u[j+i] < limit || (carry && u[j+i] == limit);
1438      }
1439      u[j+n] += carry;
1440    }
1441    DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1442    DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1443    DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1444
1445    // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1446  } while (--j >= 0);
1447
1448  DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1449  DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1450  DEBUG_KNUTH(dbgs() << '\n');
1451
1452  // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1453  // remainder may be obtained by dividing u[...] by d. If r is non-null we
1454  // compute the remainder (urem uses this).
1455  if (r) {
1456    // The value d is expressed by the "shift" value above since we avoided
1457    // multiplication by d by using a shift left. So, all we have to do is
1458    // shift right here.
1459    if (shift) {
1460      uint32_t carry = 0;
1461      DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1462      for (int i = n-1; i >= 0; i--) {
1463        r[i] = (u[i] >> shift) | carry;
1464        carry = u[i] << (32 - shift);
1465        DEBUG_KNUTH(dbgs() << " " << r[i]);
1466      }
1467    } else {
1468      for (int i = n-1; i >= 0; i--) {
1469        r[i] = u[i];
1470        DEBUG_KNUTH(dbgs() << " " << r[i]);
1471      }
1472    }
1473    DEBUG_KNUTH(dbgs() << '\n');
1474  }
1475  DEBUG_KNUTH(dbgs() << '\n');
1476}
1477
1478void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1479                   unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1480  assert(lhsWords >= rhsWords && "Fractional result");
1481
1482  // First, compose the values into an array of 32-bit words instead of
1483  // 64-bit words. This is a necessity of both the "short division" algorithm
1484  // and the Knuth "classical algorithm" which requires there to be native
1485  // operations for +, -, and * on an m bit value with an m*2 bit result. We
1486  // can't use 64-bit operands here because we don't have native results of
1487  // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1488  // work on large-endian machines.
1489  unsigned n = rhsWords * 2;
1490  unsigned m = (lhsWords * 2) - n;
1491
1492  // Allocate space for the temporary values we need either on the stack, if
1493  // it will fit, or on the heap if it won't.
1494  uint32_t SPACE[128];
1495  uint32_t *U = nullptr;
1496  uint32_t *V = nullptr;
1497  uint32_t *Q = nullptr;
1498  uint32_t *R = nullptr;
1499  if ((Remainder?4:3)*n+2*m+1 <= 128) {
1500    U = &SPACE[0];
1501    V = &SPACE[m+n+1];
1502    Q = &SPACE[(m+n+1) + n];
1503    if (Remainder)
1504      R = &SPACE[(m+n+1) + n + (m+n)];
1505  } else {
1506    U = new uint32_t[m + n + 1];
1507    V = new uint32_t[n];
1508    Q = new uint32_t[m+n];
1509    if (Remainder)
1510      R = new uint32_t[n];
1511  }
1512
1513  // Initialize the dividend
1514  memset(U, 0, (m+n+1)*sizeof(uint32_t));
1515  for (unsigned i = 0; i < lhsWords; ++i) {
1516    uint64_t tmp = LHS[i];
1517    U[i * 2] = Lo_32(tmp);
1518    U[i * 2 + 1] = Hi_32(tmp);
1519  }
1520  U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1521
1522  // Initialize the divisor
1523  memset(V, 0, (n)*sizeof(uint32_t));
1524  for (unsigned i = 0; i < rhsWords; ++i) {
1525    uint64_t tmp = RHS[i];
1526    V[i * 2] = Lo_32(tmp);
1527    V[i * 2 + 1] = Hi_32(tmp);
1528  }
1529
1530  // initialize the quotient and remainder
1531  memset(Q, 0, (m+n) * sizeof(uint32_t));
1532  if (Remainder)
1533    memset(R, 0, n * sizeof(uint32_t));
1534
1535  // Now, adjust m and n for the Knuth division. n is the number of words in
1536  // the divisor. m is the number of words by which the dividend exceeds the
1537  // divisor (i.e. m+n is the length of the dividend). These sizes must not
1538  // contain any zero words or the Knuth algorithm fails.
1539  for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1540    n--;
1541    m++;
1542  }
1543  for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1544    m--;
1545
1546  // If we're left with only a single word for the divisor, Knuth doesn't work
1547  // so we implement the short division algorithm here. This is much simpler
1548  // and faster because we are certain that we can divide a 64-bit quantity
1549  // by a 32-bit quantity at hardware speed and short division is simply a
1550  // series of such operations. This is just like doing short division but we
1551  // are using base 2^32 instead of base 10.
1552  assert(n != 0 && "Divide by zero?");
1553  if (n == 1) {
1554    uint32_t divisor = V[0];
1555    uint32_t remainder = 0;
1556    for (int i = m; i >= 0; i--) {
1557      uint64_t partial_dividend = Make_64(remainder, U[i]);
1558      if (partial_dividend == 0) {
1559        Q[i] = 0;
1560        remainder = 0;
1561      } else if (partial_dividend < divisor) {
1562        Q[i] = 0;
1563        remainder = Lo_32(partial_dividend);
1564      } else if (partial_dividend == divisor) {
1565        Q[i] = 1;
1566        remainder = 0;
1567      } else {
1568        Q[i] = Lo_32(partial_dividend / divisor);
1569        remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1570      }
1571    }
1572    if (R)
1573      R[0] = remainder;
1574  } else {
1575    // Now we're ready to invoke the Knuth classical divide algorithm. In this
1576    // case n > 1.
1577    KnuthDiv(U, V, Q, R, m, n);
1578  }
1579
1580  // If the caller wants the quotient
1581  if (Quotient) {
1582    for (unsigned i = 0; i < lhsWords; ++i)
1583      Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1584  }
1585
1586  // If the caller wants the remainder
1587  if (Remainder) {
1588    for (unsigned i = 0; i < rhsWords; ++i)
1589      Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1590  }
1591
1592  // Clean up the memory we allocated.
1593  if (U != &SPACE[0]) {
1594    delete [] U;
1595    delete [] V;
1596    delete [] Q;
1597    delete [] R;
1598  }
1599}
1600
1601APInt APInt::udiv(const APInt &RHS) const {
1602  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1603
1604  // First, deal with the easy case
1605  if (isSingleWord()) {
1606    assert(RHS.U.VAL != 0 && "Divide by zero?");
1607    return APInt(BitWidth, U.VAL / RHS.U.VAL);
1608  }
1609
1610  // Get some facts about the LHS and RHS number of bits and words
1611  unsigned lhsWords = getNumWords(getActiveBits());
1612  unsigned rhsBits  = RHS.getActiveBits();
1613  unsigned rhsWords = getNumWords(rhsBits);
1614  assert(rhsWords && "Divided by zero???");
1615
1616  // Deal with some degenerate cases
1617  if (!lhsWords)
1618    // 0 / X ===> 0
1619    return APInt(BitWidth, 0);
1620  if (rhsBits == 1)
1621    // X / 1 ===> X
1622    return *this;
1623  if (lhsWords < rhsWords || this->ult(RHS))
1624    // X / Y ===> 0, iff X < Y
1625    return APInt(BitWidth, 0);
1626  if (*this == RHS)
1627    // X / X ===> 1
1628    return APInt(BitWidth, 1);
1629  if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1630    // All high words are zero, just use native divide
1631    return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1632
1633  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1634  APInt Quotient(BitWidth, 0); // to hold result.
1635  divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1636  return Quotient;
1637}
1638
1639APInt APInt::udiv(uint64_t RHS) const {
1640  assert(RHS != 0 && "Divide by zero?");
1641
1642  // First, deal with the easy case
1643  if (isSingleWord())
1644    return APInt(BitWidth, U.VAL / RHS);
1645
1646  // Get some facts about the LHS words.
1647  unsigned lhsWords = getNumWords(getActiveBits());
1648
1649  // Deal with some degenerate cases
1650  if (!lhsWords)
1651    // 0 / X ===> 0
1652    return APInt(BitWidth, 0);
1653  if (RHS == 1)
1654    // X / 1 ===> X
1655    return *this;
1656  if (this->ult(RHS))
1657    // X / Y ===> 0, iff X < Y
1658    return APInt(BitWidth, 0);
1659  if (*this == RHS)
1660    // X / X ===> 1
1661    return APInt(BitWidth, 1);
1662  if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1663    // All high words are zero, just use native divide
1664    return APInt(BitWidth, this->U.pVal[0] / RHS);
1665
1666  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1667  APInt Quotient(BitWidth, 0); // to hold result.
1668  divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1669  return Quotient;
1670}
1671
1672APInt APInt::sdiv(const APInt &RHS) const {
1673  if (isNegative()) {
1674    if (RHS.isNegative())
1675      return (-(*this)).udiv(-RHS);
1676    return -((-(*this)).udiv(RHS));
1677  }
1678  if (RHS.isNegative())
1679    return -(this->udiv(-RHS));
1680  return this->udiv(RHS);
1681}
1682
1683APInt APInt::sdiv(int64_t RHS) const {
1684  if (isNegative()) {
1685    if (RHS < 0)
1686      return (-(*this)).udiv(-RHS);
1687    return -((-(*this)).udiv(RHS));
1688  }
1689  if (RHS < 0)
1690    return -(this->udiv(-RHS));
1691  return this->udiv(RHS);
1692}
1693
1694APInt APInt::urem(const APInt &RHS) const {
1695  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1696  if (isSingleWord()) {
1697    assert(RHS.U.VAL != 0 && "Remainder by zero?");
1698    return APInt(BitWidth, U.VAL % RHS.U.VAL);
1699  }
1700
1701  // Get some facts about the LHS
1702  unsigned lhsWords = getNumWords(getActiveBits());
1703
1704  // Get some facts about the RHS
1705  unsigned rhsBits = RHS.getActiveBits();
1706  unsigned rhsWords = getNumWords(rhsBits);
1707  assert(rhsWords && "Performing remainder operation by zero ???");
1708
1709  // Check the degenerate cases
1710  if (lhsWords == 0)
1711    // 0 % Y ===> 0
1712    return APInt(BitWidth, 0);
1713  if (rhsBits == 1)
1714    // X % 1 ===> 0
1715    return APInt(BitWidth, 0);
1716  if (lhsWords < rhsWords || this->ult(RHS))
1717    // X % Y ===> X, iff X < Y
1718    return *this;
1719  if (*this == RHS)
1720    // X % X == 0;
1721    return APInt(BitWidth, 0);
1722  if (lhsWords == 1)
1723    // All high words are zero, just use native remainder
1724    return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1725
1726  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1727  APInt Remainder(BitWidth, 0);
1728  divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1729  return Remainder;
1730}
1731
1732uint64_t APInt::urem(uint64_t RHS) const {
1733  assert(RHS != 0 && "Remainder by zero?");
1734
1735  if (isSingleWord())
1736    return U.VAL % RHS;
1737
1738  // Get some facts about the LHS
1739  unsigned lhsWords = getNumWords(getActiveBits());
1740
1741  // Check the degenerate cases
1742  if (lhsWords == 0)
1743    // 0 % Y ===> 0
1744    return 0;
1745  if (RHS == 1)
1746    // X % 1 ===> 0
1747    return 0;
1748  if (this->ult(RHS))
1749    // X % Y ===> X, iff X < Y
1750    return getZExtValue();
1751  if (*this == RHS)
1752    // X % X == 0;
1753    return 0;
1754  if (lhsWords == 1)
1755    // All high words are zero, just use native remainder
1756    return U.pVal[0] % RHS;
1757
1758  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1759  uint64_t Remainder;
1760  divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1761  return Remainder;
1762}
1763
1764APInt APInt::srem(const APInt &RHS) const {
1765  if (isNegative()) {
1766    if (RHS.isNegative())
1767      return -((-(*this)).urem(-RHS));
1768    return -((-(*this)).urem(RHS));
1769  }
1770  if (RHS.isNegative())
1771    return this->urem(-RHS);
1772  return this->urem(RHS);
1773}
1774
1775int64_t APInt::srem(int64_t RHS) const {
1776  if (isNegative()) {
1777    if (RHS < 0)
1778      return -((-(*this)).urem(-RHS));
1779    return -((-(*this)).urem(RHS));
1780  }
1781  if (RHS < 0)
1782    return this->urem(-RHS);
1783  return this->urem(RHS);
1784}
1785
1786void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1787                    APInt &Quotient, APInt &Remainder) {
1788  assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1789  unsigned BitWidth = LHS.BitWidth;
1790
1791  // First, deal with the easy case
1792  if (LHS.isSingleWord()) {
1793    assert(RHS.U.VAL != 0 && "Divide by zero?");
1794    uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1795    uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1796    Quotient = APInt(BitWidth, QuotVal);
1797    Remainder = APInt(BitWidth, RemVal);
1798    return;
1799  }
1800
1801  // Get some size facts about the dividend and divisor
1802  unsigned lhsWords = getNumWords(LHS.getActiveBits());
1803  unsigned rhsBits  = RHS.getActiveBits();
1804  unsigned rhsWords = getNumWords(rhsBits);
1805  assert(rhsWords && "Performing divrem operation by zero ???");
1806
1807  // Check the degenerate cases
1808  if (lhsWords == 0) {
1809    Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1810    Remainder = APInt(BitWidth, 0);   // 0 % Y ===> 0
1811    return;
1812  }
1813
1814  if (rhsBits == 1) {
1815    Quotient = LHS;                   // X / 1 ===> X
1816    Remainder = APInt(BitWidth, 0);   // X % 1 ===> 0
1817  }
1818
1819  if (lhsWords < rhsWords || LHS.ult(RHS)) {
1820    Remainder = LHS;                  // X % Y ===> X, iff X < Y
1821    Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1822    return;
1823  }
1824
1825  if (LHS == RHS) {
1826    Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1827    Remainder = APInt(BitWidth, 0);   // X % X ===> 0;
1828    return;
1829  }
1830
1831  // Make sure there is enough space to hold the results.
1832  // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1833  // change the size. This is necessary if Quotient or Remainder is aliased
1834  // with LHS or RHS.
1835  Quotient.reallocate(BitWidth);
1836  Remainder.reallocate(BitWidth);
1837
1838  if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1839    // There is only one word to consider so use the native versions.
1840    uint64_t lhsValue = LHS.U.pVal[0];
1841    uint64_t rhsValue = RHS.U.pVal[0];
1842    Quotient = lhsValue / rhsValue;
1843    Remainder = lhsValue % rhsValue;
1844    return;
1845  }
1846
1847  // Okay, lets do it the long way
1848  divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1849         Remainder.U.pVal);
1850  // Clear the rest of the Quotient and Remainder.
1851  std::memset(Quotient.U.pVal + lhsWords, 0,
1852              (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1853  std::memset(Remainder.U.pVal + rhsWords, 0,
1854              (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1855}
1856
1857void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1858                    uint64_t &Remainder) {
1859  assert(RHS != 0 && "Divide by zero?");
1860  unsigned BitWidth = LHS.BitWidth;
1861
1862  // First, deal with the easy case
1863  if (LHS.isSingleWord()) {
1864    uint64_t QuotVal = LHS.U.VAL / RHS;
1865    Remainder = LHS.U.VAL % RHS;
1866    Quotient = APInt(BitWidth, QuotVal);
1867    return;
1868  }
1869
1870  // Get some size facts about the dividend and divisor
1871  unsigned lhsWords = getNumWords(LHS.getActiveBits());
1872
1873  // Check the degenerate cases
1874  if (lhsWords == 0) {
1875    Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1876    Remainder = 0;                    // 0 % Y ===> 0
1877    return;
1878  }
1879
1880  if (RHS == 1) {
1881    Quotient = LHS;                   // X / 1 ===> X
1882    Remainder = 0;                    // X % 1 ===> 0
1883    return;
1884  }
1885
1886  if (LHS.ult(RHS)) {
1887    Remainder = LHS.getZExtValue();   // X % Y ===> X, iff X < Y
1888    Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1889    return;
1890  }
1891
1892  if (LHS == RHS) {
1893    Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1894    Remainder = 0;                    // X % X ===> 0;
1895    return;
1896  }
1897
1898  // Make sure there is enough space to hold the results.
1899  // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1900  // change the size. This is necessary if Quotient is aliased with LHS.
1901  Quotient.reallocate(BitWidth);
1902
1903  if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1904    // There is only one word to consider so use the native versions.
1905    uint64_t lhsValue = LHS.U.pVal[0];
1906    Quotient = lhsValue / RHS;
1907    Remainder = lhsValue % RHS;
1908    return;
1909  }
1910
1911  // Okay, lets do it the long way
1912  divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1913  // Clear the rest of the Quotient.
1914  std::memset(Quotient.U.pVal + lhsWords, 0,
1915              (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1916}
1917
1918void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1919                    APInt &Quotient, APInt &Remainder) {
1920  if (LHS.isNegative()) {
1921    if (RHS.isNegative())
1922      APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1923    else {
1924      APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1925      Quotient.negate();
1926    }
1927    Remainder.negate();
1928  } else if (RHS.isNegative()) {
1929    APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1930    Quotient.negate();
1931  } else {
1932    APInt::udivrem(LHS, RHS, Quotient, Remainder);
1933  }
1934}
1935
1936void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1937                    APInt &Quotient, int64_t &Remainder) {
1938  uint64_t R = Remainder;
1939  if (LHS.isNegative()) {
1940    if (RHS < 0)
1941      APInt::udivrem(-LHS, -RHS, Quotient, R);
1942    else {
1943      APInt::udivrem(-LHS, RHS, Quotient, R);
1944      Quotient.negate();
1945    }
1946    R = -R;
1947  } else if (RHS < 0) {
1948    APInt::udivrem(LHS, -RHS, Quotient, R);
1949    Quotient.negate();
1950  } else {
1951    APInt::udivrem(LHS, RHS, Quotient, R);
1952  }
1953  Remainder = R;
1954}
1955
1956APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1957  APInt Res = *this+RHS;
1958  Overflow = isNonNegative() == RHS.isNonNegative() &&
1959             Res.isNonNegative() != isNonNegative();
1960  return Res;
1961}
1962
1963APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1964  APInt Res = *this+RHS;
1965  Overflow = Res.ult(RHS);
1966  return Res;
1967}
1968
1969APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1970  APInt Res = *this - RHS;
1971  Overflow = isNonNegative() != RHS.isNonNegative() &&
1972             Res.isNonNegative() != isNonNegative();
1973  return Res;
1974}
1975
1976APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1977  APInt Res = *this-RHS;
1978  Overflow = Res.ugt(*this);
1979  return Res;
1980}
1981
1982APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1983  // MININT/-1  -->  overflow.
1984  Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1985  return sdiv(RHS);
1986}
1987
1988APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1989  APInt Res = *this * RHS;
1990
1991  if (*this != 0 && RHS != 0)
1992    Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1993  else
1994    Overflow = false;
1995  return Res;
1996}
1997
1998APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1999  if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
2000    Overflow = true;
2001    return *this * RHS;
2002  }
2003
2004  APInt Res = lshr(1) * RHS;
2005  Overflow = Res.isNegative();
2006  Res <<= 1;
2007  if ((*this)[0]) {
2008    Res += RHS;
2009    if (Res.ult(RHS))
2010      Overflow = true;
2011  }
2012  return Res;
2013}
2014
2015APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
2016  Overflow = ShAmt.uge(getBitWidth());
2017  if (Overflow)
2018    return APInt(BitWidth, 0);
2019
2020  if (isNonNegative()) // Don't allow sign change.
2021    Overflow = ShAmt.uge(countLeadingZeros());
2022  else
2023    Overflow = ShAmt.uge(countLeadingOnes());
2024
2025  return *this << ShAmt;
2026}
2027
2028APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
2029  Overflow = ShAmt.uge(getBitWidth());
2030  if (Overflow)
2031    return APInt(BitWidth, 0);
2032
2033  Overflow = ShAmt.ugt(countLeadingZeros());
2034
2035  return *this << ShAmt;
2036}
2037
2038APInt APInt::sadd_sat(const APInt &RHS) const {
2039  bool Overflow;
2040  APInt Res = sadd_ov(RHS, Overflow);
2041  if (!Overflow)
2042    return Res;
2043
2044  return isNegative() ? APInt::getSignedMinValue(BitWidth)
2045                      : APInt::getSignedMaxValue(BitWidth);
2046}
2047
2048APInt APInt::uadd_sat(const APInt &RHS) const {
2049  bool Overflow;
2050  APInt Res = uadd_ov(RHS, Overflow);
2051  if (!Overflow)
2052    return Res;
2053
2054  return APInt::getMaxValue(BitWidth);
2055}
2056
2057APInt APInt::ssub_sat(const APInt &RHS) const {
2058  bool Overflow;
2059  APInt Res = ssub_ov(RHS, Overflow);
2060  if (!Overflow)
2061    return Res;
2062
2063  return isNegative() ? APInt::getSignedMinValue(BitWidth)
2064                      : APInt::getSignedMaxValue(BitWidth);
2065}
2066
2067APInt APInt::usub_sat(const APInt &RHS) const {
2068  bool Overflow;
2069  APInt Res = usub_ov(RHS, Overflow);
2070  if (!Overflow)
2071    return Res;
2072
2073  return APInt(BitWidth, 0);
2074}
2075
2076APInt APInt::smul_sat(const APInt &RHS) const {
2077  bool Overflow;
2078  APInt Res = smul_ov(RHS, Overflow);
2079  if (!Overflow)
2080    return Res;
2081
2082  // The result is negative if one and only one of inputs is negative.
2083  bool ResIsNegative = isNegative() ^ RHS.isNegative();
2084
2085  return ResIsNegative ? APInt::getSignedMinValue(BitWidth)
2086                       : APInt::getSignedMaxValue(BitWidth);
2087}
2088
2089APInt APInt::umul_sat(const APInt &RHS) const {
2090  bool Overflow;
2091  APInt Res = umul_ov(RHS, Overflow);
2092  if (!Overflow)
2093    return Res;
2094
2095  return APInt::getMaxValue(BitWidth);
2096}
2097
2098APInt APInt::sshl_sat(const APInt &RHS) const {
2099  bool Overflow;
2100  APInt Res = sshl_ov(RHS, Overflow);
2101  if (!Overflow)
2102    return Res;
2103
2104  return isNegative() ? APInt::getSignedMinValue(BitWidth)
2105                      : APInt::getSignedMaxValue(BitWidth);
2106}
2107
2108APInt APInt::ushl_sat(const APInt &RHS) const {
2109  bool Overflow;
2110  APInt Res = ushl_ov(RHS, Overflow);
2111  if (!Overflow)
2112    return Res;
2113
2114  return APInt::getMaxValue(BitWidth);
2115}
2116
2117void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2118  // Check our assumptions here
2119  assert(!str.empty() && "Invalid string length");
2120  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
2121          radix == 36) &&
2122         "Radix should be 2, 8, 10, 16, or 36!");
2123
2124  StringRef::iterator p = str.begin();
2125  size_t slen = str.size();
2126  bool isNeg = *p == '-';
2127  if (*p == '-' || *p == '+') {
2128    p++;
2129    slen--;
2130    assert(slen && "String is only a sign, needs a value.");
2131  }
2132  assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2133  assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2134  assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2135  assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2136         "Insufficient bit width");
2137
2138  // Allocate memory if needed
2139  if (isSingleWord())
2140    U.VAL = 0;
2141  else
2142    U.pVal = getClearedMemory(getNumWords());
2143
2144  // Figure out if we can shift instead of multiply
2145  unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2146
2147  // Enter digit traversal loop
2148  for (StringRef::iterator e = str.end(); p != e; ++p) {
2149    unsigned digit = getDigit(*p, radix);
2150    assert(digit < radix && "Invalid character in digit string");
2151
2152    // Shift or multiply the value by the radix
2153    if (slen > 1) {
2154      if (shift)
2155        *this <<= shift;
2156      else
2157        *this *= radix;
2158    }
2159
2160    // Add in the digit we just interpreted
2161    *this += digit;
2162  }
2163  // If its negative, put it in two's complement form
2164  if (isNeg)
2165    this->negate();
2166}
2167
2168void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2169                     bool Signed, bool formatAsCLiteral) const {
2170  assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2171          Radix == 36) &&
2172         "Radix should be 2, 8, 10, 16, or 36!");
2173
2174  const char *Prefix = "";
2175  if (formatAsCLiteral) {
2176    switch (Radix) {
2177      case 2:
2178        // Binary literals are a non-standard extension added in gcc 4.3:
2179        // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2180        Prefix = "0b";
2181        break;
2182      case 8:
2183        Prefix = "0";
2184        break;
2185      case 10:
2186        break; // No prefix
2187      case 16:
2188        Prefix = "0x";
2189        break;
2190      default:
2191        llvm_unreachable("Invalid radix!");
2192    }
2193  }
2194
2195  // First, check for a zero value and just short circuit the logic below.
2196  if (*this == 0) {
2197    while (*Prefix) {
2198      Str.push_back(*Prefix);
2199      ++Prefix;
2200    };
2201    Str.push_back('0');
2202    return;
2203  }
2204
2205  static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2206
2207  if (isSingleWord()) {
2208    char Buffer[65];
2209    char *BufPtr = std::end(Buffer);
2210
2211    uint64_t N;
2212    if (!Signed) {
2213      N = getZExtValue();
2214    } else {
2215      int64_t I = getSExtValue();
2216      if (I >= 0) {
2217        N = I;
2218      } else {
2219        Str.push_back('-');
2220        N = -(uint64_t)I;
2221      }
2222    }
2223
2224    while (*Prefix) {
2225      Str.push_back(*Prefix);
2226      ++Prefix;
2227    };
2228
2229    while (N) {
2230      *--BufPtr = Digits[N % Radix];
2231      N /= Radix;
2232    }
2233    Str.append(BufPtr, std::end(Buffer));
2234    return;
2235  }
2236
2237  APInt Tmp(*this);
2238
2239  if (Signed && isNegative()) {
2240    // They want to print the signed version and it is a negative value
2241    // Flip the bits and add one to turn it into the equivalent positive
2242    // value and put a '-' in the result.
2243    Tmp.negate();
2244    Str.push_back('-');
2245  }
2246
2247  while (*Prefix) {
2248    Str.push_back(*Prefix);
2249    ++Prefix;
2250  };
2251
2252  // We insert the digits backward, then reverse them to get the right order.
2253  unsigned StartDig = Str.size();
2254
2255  // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2256  // because the number of bits per digit (1, 3 and 4 respectively) divides
2257  // equally.  We just shift until the value is zero.
2258  if (Radix == 2 || Radix == 8 || Radix == 16) {
2259    // Just shift tmp right for each digit width until it becomes zero
2260    unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2261    unsigned MaskAmt = Radix - 1;
2262
2263    while (Tmp.getBoolValue()) {
2264      unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2265      Str.push_back(Digits[Digit]);
2266      Tmp.lshrInPlace(ShiftAmt);
2267    }
2268  } else {
2269    while (Tmp.getBoolValue()) {
2270      uint64_t Digit;
2271      udivrem(Tmp, Radix, Tmp, Digit);
2272      assert(Digit < Radix && "divide failed");
2273      Str.push_back(Digits[Digit]);
2274    }
2275  }
2276
2277  // Reverse the digits before returning.
2278  std::reverse(Str.begin()+StartDig, Str.end());
2279}
2280
2281/// Returns the APInt as a std::string. Note that this is an inefficient method.
2282/// It is better to pass in a SmallVector/SmallString to the methods above.
2283std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2284  SmallString<40> S;
2285  toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2286  return S.str();
2287}
2288
2289#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2290LLVM_DUMP_METHOD void APInt::dump() const {
2291  SmallString<40> S, U;
2292  this->toStringUnsigned(U);
2293  this->toStringSigned(S);
2294  dbgs() << "APInt(" << BitWidth << "b, "
2295         << U << "u " << S << "s)\n";
2296}
2297#endif
2298
2299void APInt::print(raw_ostream &OS, bool isSigned) const {
2300  SmallString<40> S;
2301  this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2302  OS << S;
2303}
2304
2305// This implements a variety of operations on a representation of
2306// arbitrary precision, two's-complement, bignum integer values.
2307
2308// Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2309// and unrestricting assumption.
2310static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2311              "Part width must be divisible by 2!");
2312
2313/* Some handy functions local to this file.  */
2314
2315/* Returns the integer part with the least significant BITS set.
2316   BITS cannot be zero.  */
2317static inline APInt::WordType lowBitMask(unsigned bits) {
2318  assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2319
2320  return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2321}
2322
2323/* Returns the value of the lower half of PART.  */
2324static inline APInt::WordType lowHalf(APInt::WordType part) {
2325  return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2326}
2327
2328/* Returns the value of the upper half of PART.  */
2329static inline APInt::WordType highHalf(APInt::WordType part) {
2330  return part >> (APInt::APINT_BITS_PER_WORD / 2);
2331}
2332
2333/* Returns the bit number of the most significant set bit of a part.
2334   If the input number has no bits set -1U is returned.  */
2335static unsigned partMSB(APInt::WordType value) {
2336  return findLastSet(value, ZB_Max);
2337}
2338
2339/* Returns the bit number of the least significant set bit of a
2340   part.  If the input number has no bits set -1U is returned.  */
2341static unsigned partLSB(APInt::WordType value) {
2342  return findFirstSet(value, ZB_Max);
2343}
2344
2345/* Sets the least significant part of a bignum to the input value, and
2346   zeroes out higher parts.  */
2347void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2348  assert(parts > 0);
2349
2350  dst[0] = part;
2351  for (unsigned i = 1; i < parts; i++)
2352    dst[i] = 0;
2353}
2354
2355/* Assign one bignum to another.  */
2356void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2357  for (unsigned i = 0; i < parts; i++)
2358    dst[i] = src[i];
2359}
2360
2361/* Returns true if a bignum is zero, false otherwise.  */
2362bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2363  for (unsigned i = 0; i < parts; i++)
2364    if (src[i])
2365      return false;
2366
2367  return true;
2368}
2369
2370/* Extract the given bit of a bignum; returns 0 or 1.  */
2371int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2372  return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2373}
2374
2375/* Set the given bit of a bignum. */
2376void APInt::tcSetBit(WordType *parts, unsigned bit) {
2377  parts[whichWord(bit)] |= maskBit(bit);
2378}
2379
2380/* Clears the given bit of a bignum. */
2381void APInt::tcClearBit(WordType *parts, unsigned bit) {
2382  parts[whichWord(bit)] &= ~maskBit(bit);
2383}
2384
2385/* Returns the bit number of the least significant set bit of a
2386   number.  If the input number has no bits set -1U is returned.  */
2387unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2388  for (unsigned i = 0; i < n; i++) {
2389    if (parts[i] != 0) {
2390      unsigned lsb = partLSB(parts[i]);
2391
2392      return lsb + i * APINT_BITS_PER_WORD;
2393    }
2394  }
2395
2396  return -1U;
2397}
2398
2399/* Returns the bit number of the most significant set bit of a number.
2400   If the input number has no bits set -1U is returned.  */
2401unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2402  do {
2403    --n;
2404
2405    if (parts[n] != 0) {
2406      unsigned msb = partMSB(parts[n]);
2407
2408      return msb + n * APINT_BITS_PER_WORD;
2409    }
2410  } while (n);
2411
2412  return -1U;
2413}
2414
2415/* Copy the bit vector of width srcBITS from SRC, starting at bit
2416   srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2417   the least significant bit of DST.  All high bits above srcBITS in
2418   DST are zero-filled.  */
2419void
2420APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2421                 unsigned srcBits, unsigned srcLSB) {
2422  unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2423  assert(dstParts <= dstCount);
2424
2425  unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2426  tcAssign (dst, src + firstSrcPart, dstParts);
2427
2428  unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2429  tcShiftRight (dst, dstParts, shift);
2430
2431  /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2432     in DST.  If this is less that srcBits, append the rest, else
2433     clear the high bits.  */
2434  unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2435  if (n < srcBits) {
2436    WordType mask = lowBitMask (srcBits - n);
2437    dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2438                          << n % APINT_BITS_PER_WORD);
2439  } else if (n > srcBits) {
2440    if (srcBits % APINT_BITS_PER_WORD)
2441      dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2442  }
2443
2444  /* Clear high parts.  */
2445  while (dstParts < dstCount)
2446    dst[dstParts++] = 0;
2447}
2448
2449/* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2450APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2451                             WordType c, unsigned parts) {
2452  assert(c <= 1);
2453
2454  for (unsigned i = 0; i < parts; i++) {
2455    WordType l = dst[i];
2456    if (c) {
2457      dst[i] += rhs[i] + 1;
2458      c = (dst[i] <= l);
2459    } else {
2460      dst[i] += rhs[i];
2461      c = (dst[i] < l);
2462    }
2463  }
2464
2465  return c;
2466}
2467
2468/// This function adds a single "word" integer, src, to the multiple
2469/// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2470/// 1 is returned if there is a carry out, otherwise 0 is returned.
2471/// @returns the carry of the addition.
2472APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2473                                 unsigned parts) {
2474  for (unsigned i = 0; i < parts; ++i) {
2475    dst[i] += src;
2476    if (dst[i] >= src)
2477      return 0; // No need to carry so exit early.
2478    src = 1; // Carry one to next digit.
2479  }
2480
2481  return 1;
2482}
2483
2484/* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2485APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2486                                  WordType c, unsigned parts) {
2487  assert(c <= 1);
2488
2489  for (unsigned i = 0; i < parts; i++) {
2490    WordType l = dst[i];
2491    if (c) {
2492      dst[i] -= rhs[i] + 1;
2493      c = (dst[i] >= l);
2494    } else {
2495      dst[i] -= rhs[i];
2496      c = (dst[i] > l);
2497    }
2498  }
2499
2500  return c;
2501}
2502
2503/// This function subtracts a single "word" (64-bit word), src, from
2504/// the multi-word integer array, dst[], propagating the borrowed 1 value until
2505/// no further borrowing is needed or it runs out of "words" in dst.  The result
2506/// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2507/// exhausted. In other words, if src > dst then this function returns 1,
2508/// otherwise 0.
2509/// @returns the borrow out of the subtraction
2510APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2511                                      unsigned parts) {
2512  for (unsigned i = 0; i < parts; ++i) {
2513    WordType Dst = dst[i];
2514    dst[i] -= src;
2515    if (src <= Dst)
2516      return 0; // No need to borrow so exit early.
2517    src = 1; // We have to "borrow 1" from next "word"
2518  }
2519
2520  return 1;
2521}
2522
2523/* Negate a bignum in-place.  */
2524void APInt::tcNegate(WordType *dst, unsigned parts) {
2525  tcComplement(dst, parts);
2526  tcIncrement(dst, parts);
2527}
2528
2529/*  DST += SRC * MULTIPLIER + CARRY   if add is true
2530    DST  = SRC * MULTIPLIER + CARRY   if add is false
2531
2532    Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2533    they must start at the same point, i.e. DST == SRC.
2534
2535    If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2536    returned.  Otherwise DST is filled with the least significant
2537    DSTPARTS parts of the result, and if all of the omitted higher
2538    parts were zero return zero, otherwise overflow occurred and
2539    return one.  */
2540int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2541                          WordType multiplier, WordType carry,
2542                          unsigned srcParts, unsigned dstParts,
2543                          bool add) {
2544  /* Otherwise our writes of DST kill our later reads of SRC.  */
2545  assert(dst <= src || dst >= src + srcParts);
2546  assert(dstParts <= srcParts + 1);
2547
2548  /* N loops; minimum of dstParts and srcParts.  */
2549  unsigned n = std::min(dstParts, srcParts);
2550
2551  for (unsigned i = 0; i < n; i++) {
2552    WordType low, mid, high, srcPart;
2553
2554      /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2555
2556         This cannot overflow, because
2557
2558         (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2559
2560         which is less than n^2.  */
2561
2562    srcPart = src[i];
2563
2564    if (multiplier == 0 || srcPart == 0) {
2565      low = carry;
2566      high = 0;
2567    } else {
2568      low = lowHalf(srcPart) * lowHalf(multiplier);
2569      high = highHalf(srcPart) * highHalf(multiplier);
2570
2571      mid = lowHalf(srcPart) * highHalf(multiplier);
2572      high += highHalf(mid);
2573      mid <<= APINT_BITS_PER_WORD / 2;
2574      if (low + mid < low)
2575        high++;
2576      low += mid;
2577
2578      mid = highHalf(srcPart) * lowHalf(multiplier);
2579      high += highHalf(mid);
2580      mid <<= APINT_BITS_PER_WORD / 2;
2581      if (low + mid < low)
2582        high++;
2583      low += mid;
2584
2585      /* Now add carry.  */
2586      if (low + carry < low)
2587        high++;
2588      low += carry;
2589    }
2590
2591    if (add) {
2592      /* And now DST[i], and store the new low part there.  */
2593      if (low + dst[i] < low)
2594        high++;
2595      dst[i] += low;
2596    } else
2597      dst[i] = low;
2598
2599    carry = high;
2600  }
2601
2602  if (srcParts < dstParts) {
2603    /* Full multiplication, there is no overflow.  */
2604    assert(srcParts + 1 == dstParts);
2605    dst[srcParts] = carry;
2606    return 0;
2607  }
2608
2609  /* We overflowed if there is carry.  */
2610  if (carry)
2611    return 1;
2612
2613  /* We would overflow if any significant unwritten parts would be
2614     non-zero.  This is true if any remaining src parts are non-zero
2615     and the multiplier is non-zero.  */
2616  if (multiplier)
2617    for (unsigned i = dstParts; i < srcParts; i++)
2618      if (src[i])
2619        return 1;
2620
2621  /* We fitted in the narrow destination.  */
2622  return 0;
2623}
2624
2625/* DST = LHS * RHS, where DST has the same width as the operands and
2626   is filled with the least significant parts of the result.  Returns
2627   one if overflow occurred, otherwise zero.  DST must be disjoint
2628   from both operands.  */
2629int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2630                      const WordType *rhs, unsigned parts) {
2631  assert(dst != lhs && dst != rhs);
2632
2633  int overflow = 0;
2634  tcSet(dst, 0, parts);
2635
2636  for (unsigned i = 0; i < parts; i++)
2637    overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2638                               parts - i, true);
2639
2640  return overflow;
2641}
2642
2643/// DST = LHS * RHS, where DST has width the sum of the widths of the
2644/// operands. No overflow occurs. DST must be disjoint from both operands.
2645void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2646                           const WordType *rhs, unsigned lhsParts,
2647                           unsigned rhsParts) {
2648  /* Put the narrower number on the LHS for less loops below.  */
2649  if (lhsParts > rhsParts)
2650    return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2651
2652  assert(dst != lhs && dst != rhs);
2653
2654  tcSet(dst, 0, rhsParts);
2655
2656  for (unsigned i = 0; i < lhsParts; i++)
2657    tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2658}
2659
2660/* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2661   Otherwise set LHS to LHS / RHS with the fractional part discarded,
2662   set REMAINDER to the remainder, return zero.  i.e.
2663
2664   OLD_LHS = RHS * LHS + REMAINDER
2665
2666   SCRATCH is a bignum of the same size as the operands and result for
2667   use by the routine; its contents need not be initialized and are
2668   destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2669*/
2670int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2671                    WordType *remainder, WordType *srhs,
2672                    unsigned parts) {
2673  assert(lhs != remainder && lhs != srhs && remainder != srhs);
2674
2675  unsigned shiftCount = tcMSB(rhs, parts) + 1;
2676  if (shiftCount == 0)
2677    return true;
2678
2679  shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2680  unsigned n = shiftCount / APINT_BITS_PER_WORD;
2681  WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2682
2683  tcAssign(srhs, rhs, parts);
2684  tcShiftLeft(srhs, parts, shiftCount);
2685  tcAssign(remainder, lhs, parts);
2686  tcSet(lhs, 0, parts);
2687
2688  /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2689     the total.  */
2690  for (;;) {
2691    int compare = tcCompare(remainder, srhs, parts);
2692    if (compare >= 0) {
2693      tcSubtract(remainder, srhs, 0, parts);
2694      lhs[n] |= mask;
2695    }
2696
2697    if (shiftCount == 0)
2698      break;
2699    shiftCount--;
2700    tcShiftRight(srhs, parts, 1);
2701    if ((mask >>= 1) == 0) {
2702      mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2703      n--;
2704    }
2705  }
2706
2707  return false;
2708}
2709
2710/// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2711/// no restrictions on Count.
2712void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2713  // Don't bother performing a no-op shift.
2714  if (!Count)
2715    return;
2716
2717  // WordShift is the inter-part shift; BitShift is the intra-part shift.
2718  unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2719  unsigned BitShift = Count % APINT_BITS_PER_WORD;
2720
2721  // Fastpath for moving by whole words.
2722  if (BitShift == 0) {
2723    std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2724  } else {
2725    while (Words-- > WordShift) {
2726      Dst[Words] = Dst[Words - WordShift] << BitShift;
2727      if (Words > WordShift)
2728        Dst[Words] |=
2729          Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2730    }
2731  }
2732
2733  // Fill in the remainder with 0s.
2734  std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2735}
2736
2737/// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2738/// are no restrictions on Count.
2739void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2740  // Don't bother performing a no-op shift.
2741  if (!Count)
2742    return;
2743
2744  // WordShift is the inter-part shift; BitShift is the intra-part shift.
2745  unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2746  unsigned BitShift = Count % APINT_BITS_PER_WORD;
2747
2748  unsigned WordsToMove = Words - WordShift;
2749  // Fastpath for moving by whole words.
2750  if (BitShift == 0) {
2751    std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2752  } else {
2753    for (unsigned i = 0; i != WordsToMove; ++i) {
2754      Dst[i] = Dst[i + WordShift] >> BitShift;
2755      if (i + 1 != WordsToMove)
2756        Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2757    }
2758  }
2759
2760  // Fill in the remainder with 0s.
2761  std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2762}
2763
2764/* Bitwise and of two bignums.  */
2765void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2766  for (unsigned i = 0; i < parts; i++)
2767    dst[i] &= rhs[i];
2768}
2769
2770/* Bitwise inclusive or of two bignums.  */
2771void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2772  for (unsigned i = 0; i < parts; i++)
2773    dst[i] |= rhs[i];
2774}
2775
2776/* Bitwise exclusive or of two bignums.  */
2777void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2778  for (unsigned i = 0; i < parts; i++)
2779    dst[i] ^= rhs[i];
2780}
2781
2782/* Complement a bignum in-place.  */
2783void APInt::tcComplement(WordType *dst, unsigned parts) {
2784  for (unsigned i = 0; i < parts; i++)
2785    dst[i] = ~dst[i];
2786}
2787
2788/* Comparison (unsigned) of two bignums.  */
2789int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2790                     unsigned parts) {
2791  while (parts) {
2792    parts--;
2793    if (lhs[parts] != rhs[parts])
2794      return (lhs[parts] > rhs[parts]) ? 1 : -1;
2795  }
2796
2797  return 0;
2798}
2799
2800/* Set the least significant BITS bits of a bignum, clear the
2801   rest.  */
2802void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2803                                      unsigned bits) {
2804  unsigned i = 0;
2805  while (bits > APINT_BITS_PER_WORD) {
2806    dst[i++] = ~(WordType) 0;
2807    bits -= APINT_BITS_PER_WORD;
2808  }
2809
2810  if (bits)
2811    dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2812
2813  while (i < parts)
2814    dst[i++] = 0;
2815}
2816
2817APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2818                                   APInt::Rounding RM) {
2819  // Currently udivrem always rounds down.
2820  switch (RM) {
2821  case APInt::Rounding::DOWN:
2822  case APInt::Rounding::TOWARD_ZERO:
2823    return A.udiv(B);
2824  case APInt::Rounding::UP: {
2825    APInt Quo, Rem;
2826    APInt::udivrem(A, B, Quo, Rem);
2827    if (Rem == 0)
2828      return Quo;
2829    return Quo + 1;
2830  }
2831  }
2832  llvm_unreachable("Unknown APInt::Rounding enum");
2833}
2834
2835APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2836                                   APInt::Rounding RM) {
2837  switch (RM) {
2838  case APInt::Rounding::DOWN:
2839  case APInt::Rounding::UP: {
2840    APInt Quo, Rem;
2841    APInt::sdivrem(A, B, Quo, Rem);
2842    if (Rem == 0)
2843      return Quo;
2844    // This algorithm deals with arbitrary rounding mode used by sdivrem.
2845    // We want to check whether the non-integer part of the mathematical value
2846    // is negative or not. If the non-integer part is negative, we need to round
2847    // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2848    // already rounded down.
2849    if (RM == APInt::Rounding::DOWN) {
2850      if (Rem.isNegative() != B.isNegative())
2851        return Quo - 1;
2852      return Quo;
2853    }
2854    if (Rem.isNegative() != B.isNegative())
2855      return Quo;
2856    return Quo + 1;
2857  }
2858  // Currently sdiv rounds towards zero.
2859  case APInt::Rounding::TOWARD_ZERO:
2860    return A.sdiv(B);
2861  }
2862  llvm_unreachable("Unknown APInt::Rounding enum");
2863}
2864
2865Optional<APInt>
2866llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2867                                           unsigned RangeWidth) {
2868  unsigned CoeffWidth = A.getBitWidth();
2869  assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2870  assert(RangeWidth <= CoeffWidth &&
2871         "Value range width should be less than coefficient width");
2872  assert(RangeWidth > 1 && "Value range bit width should be > 1");
2873
2874  LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2875                    << "x + " << C << ", rw:" << RangeWidth << '\n');
2876
2877  // Identify 0 as a (non)solution immediately.
2878  if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
2879    LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2880    return APInt(CoeffWidth, 0);
2881  }
2882
2883  // The result of APInt arithmetic has the same bit width as the operands,
2884  // so it can actually lose high bits. A product of two n-bit integers needs
2885  // 2n-1 bits to represent the full value.
2886  // The operation done below (on quadratic coefficients) that can produce
2887  // the largest value is the evaluation of the equation during bisection,
2888  // which needs 3 times the bitwidth of the coefficient, so the total number
2889  // of required bits is 3n.
2890  //
2891  // The purpose of this extension is to simulate the set Z of all integers,
2892  // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2893  // and negative numbers (not so much in a modulo arithmetic). The method
2894  // used to solve the equation is based on the standard formula for real
2895  // numbers, and uses the concepts of "positive" and "negative" with their
2896  // usual meanings.
2897  CoeffWidth *= 3;
2898  A = A.sext(CoeffWidth);
2899  B = B.sext(CoeffWidth);
2900  C = C.sext(CoeffWidth);
2901
2902  // Make A > 0 for simplicity. Negate cannot overflow at this point because
2903  // the bit width has increased.
2904  if (A.isNegative()) {
2905    A.negate();
2906    B.negate();
2907    C.negate();
2908  }
2909
2910  // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2911  // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2912  // and R = 2^BitWidth.
2913  // Since we're trying not only to find exact solutions, but also values
2914  // that "wrap around", such a set will always have a solution, i.e. an x
2915  // that satisfies at least one of the equations, or such that |q(x)|
2916  // exceeds kR, while |q(x-1)| for the same k does not.
2917  //
2918  // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2919  // positive solution n (in the above sense), and also such that the n
2920  // will be the least among all solutions corresponding to k = 0, 1, ...
2921  // (more precisely, the least element in the set
2922  //   { n(k) | k is such that a solution n(k) exists }).
2923  //
2924  // Consider the parabola (over real numbers) that corresponds to the
2925  // quadratic equation. Since A > 0, the arms of the parabola will point
2926  // up. Picking different values of k will shift it up and down by R.
2927  //
2928  // We want to shift the parabola in such a way as to reduce the problem
2929  // of solving q(x) = kR to solving shifted_q(x) = 0.
2930  // (The interesting solutions are the ceilings of the real number
2931  // solutions.)
2932  APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2933  APInt TwoA = 2 * A;
2934  APInt SqrB = B * B;
2935  bool PickLow;
2936
2937  auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2938    assert(A.isStrictlyPositive());
2939    APInt T = V.abs().urem(A);
2940    if (T.isNullValue())
2941      return V;
2942    return V.isNegative() ? V+T : V+(A-T);
2943  };
2944
2945  // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2946  // iff B is positive.
2947  if (B.isNonNegative()) {
2948    // If B >= 0, the vertex it at a negative location (or at 0), so in
2949    // order to have a non-negative solution we need to pick k that makes
2950    // C-kR negative. To satisfy all the requirements for the solution
2951    // that we are looking for, it needs to be closest to 0 of all k.
2952    C = C.srem(R);
2953    if (C.isStrictlyPositive())
2954      C -= R;
2955    // Pick the greater solution.
2956    PickLow = false;
2957  } else {
2958    // If B < 0, the vertex is at a positive location. For any solution
2959    // to exist, the discriminant must be non-negative. This means that
2960    // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2961    // lower bound on values of k: kR >= C - B^2/4A.
2962    APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2963    // Round LowkR up (towards +inf) to the nearest kR.
2964    LowkR = RoundUp(LowkR, R);
2965
2966    // If there exists k meeting the condition above, and such that
2967    // C-kR > 0, there will be two positive real number solutions of
2968    // q(x) = kR. Out of all such values of k, pick the one that makes
2969    // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2970    // In other words, find maximum k such that LowkR <= kR < C.
2971    if (C.sgt(LowkR)) {
2972      // If LowkR < C, then such a k is guaranteed to exist because
2973      // LowkR itself is a multiple of R.
2974      C -= -RoundUp(-C, R);      // C = C - RoundDown(C, R)
2975      // Pick the smaller solution.
2976      PickLow = true;
2977    } else {
2978      // If C-kR < 0 for all potential k's, it means that one solution
2979      // will be negative, while the other will be positive. The positive
2980      // solution will shift towards 0 if the parabola is moved up.
2981      // Pick the kR closest to the lower bound (i.e. make C-kR closest
2982      // to 0, or in other words, out of all parabolas that have solutions,
2983      // pick the one that is the farthest "up").
2984      // Since LowkR is itself a multiple of R, simply take C-LowkR.
2985      C -= LowkR;
2986      // Pick the greater solution.
2987      PickLow = false;
2988    }
2989  }
2990
2991  LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2992                    << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2993
2994  APInt D = SqrB - 4*A*C;
2995  assert(D.isNonNegative() && "Negative discriminant");
2996  APInt SQ = D.sqrt();
2997
2998  APInt Q = SQ * SQ;
2999  bool InexactSQ = Q != D;
3000  // The calculated SQ may actually be greater than the exact (non-integer)
3001  // value. If that's the case, decrement SQ to get a value that is lower.
3002  if (Q.sgt(D))
3003    SQ -= 1;
3004
3005  APInt X;
3006  APInt Rem;
3007
3008  // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
3009  // When using the quadratic formula directly, the calculated low root
3010  // may be greater than the exact one, since we would be subtracting SQ.
3011  // To make sure that the calculated root is not greater than the exact
3012  // one, subtract SQ+1 when calculating the low root (for inexact value
3013  // of SQ).
3014  if (PickLow)
3015    APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
3016  else
3017    APInt::sdivrem(-B + SQ, TwoA, X, Rem);
3018
3019  // The updated coefficients should be such that the (exact) solution is
3020  // positive. Since APInt division rounds towards 0, the calculated one
3021  // can be 0, but cannot be negative.
3022  assert(X.isNonNegative() && "Solution should be non-negative");
3023
3024  if (!InexactSQ && Rem.isNullValue()) {
3025    LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
3026    return X;
3027  }
3028
3029  assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
3030  // The exact value of the square root of D should be between SQ and SQ+1.
3031  // This implies that the solution should be between that corresponding to
3032  // SQ (i.e. X) and that corresponding to SQ+1.
3033  //
3034  // The calculated X cannot be greater than the exact (real) solution.
3035  // Actually it must be strictly less than the exact solution, while
3036  // X+1 will be greater than or equal to it.
3037
3038  APInt VX = (A*X + B)*X + C;
3039  APInt VY = VX + TwoA*X + A + B;
3040  bool SignChange = VX.isNegative() != VY.isNegative() ||
3041                    VX.isNullValue() != VY.isNullValue();
3042  // If the sign did not change between X and X+1, X is not a valid solution.
3043  // This could happen when the actual (exact) roots don't have an integer
3044  // between them, so they would both be contained between X and X+1.
3045  if (!SignChange) {
3046    LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
3047    return None;
3048  }
3049
3050  X += 1;
3051  LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
3052  return X;
3053}
3054
3055Optional<unsigned>
3056llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) {
3057  assert(A.getBitWidth() == B.getBitWidth() && "Must have the same bitwidth");
3058  if (A == B)
3059    return llvm::None;
3060  return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1);
3061}
3062
3063/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
3064/// with the integer held in IntVal.
3065void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
3066                            unsigned StoreBytes) {
3067  assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
3068  const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
3069
3070  if (sys::IsLittleEndianHost) {
3071    // Little-endian host - the source is ordered from LSB to MSB.  Order the
3072    // destination from LSB to MSB: Do a straight copy.
3073    memcpy(Dst, Src, StoreBytes);
3074  } else {
3075    // Big-endian host - the source is an array of 64 bit words ordered from
3076    // LSW to MSW.  Each word is ordered from MSB to LSB.  Order the destination
3077    // from MSB to LSB: Reverse the word order, but not the bytes in a word.
3078    while (StoreBytes > sizeof(uint64_t)) {
3079      StoreBytes -= sizeof(uint64_t);
3080      // May not be aligned so use memcpy.
3081      memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
3082      Src += sizeof(uint64_t);
3083    }
3084
3085    memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
3086  }
3087}
3088
3089/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
3090/// from Src into IntVal, which is assumed to be wide enough and to hold zero.
3091void llvm::LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
3092  assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
3093  uint8_t *Dst = reinterpret_cast<uint8_t *>(
3094                   const_cast<uint64_t *>(IntVal.getRawData()));
3095
3096  if (sys::IsLittleEndianHost)
3097    // Little-endian host - the destination must be ordered from LSB to MSB.
3098    // The source is ordered from LSB to MSB: Do a straight copy.
3099    memcpy(Dst, Src, LoadBytes);
3100  else {
3101    // Big-endian - the destination is an array of 64 bit words ordered from
3102    // LSW to MSW.  Each word must be ordered from MSB to LSB.  The source is
3103    // ordered from MSB to LSB: Reverse the word order, but not the bytes in
3104    // a word.
3105    while (LoadBytes > sizeof(uint64_t)) {
3106      LoadBytes -= sizeof(uint64_t);
3107      // May not be aligned so use memcpy.
3108      memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
3109      Dst += sizeof(uint64_t);
3110    }
3111
3112    memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
3113  }
3114}
3115