APInt.cpp revision 353358
1//===-- APInt.cpp - Implement APInt class ---------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a class to represent arbitrary precision integer
10// constant values and provide a variety of arithmetic operations on them.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/APInt.h"
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/FoldingSet.h"
17#include "llvm/ADT/Hashing.h"
18#include "llvm/ADT/Optional.h"
19#include "llvm/ADT/SmallString.h"
20#include "llvm/ADT/StringRef.h"
21#include "llvm/ADT/bit.h"
22#include "llvm/Config/llvm-config.h"
23#include "llvm/Support/Debug.h"
24#include "llvm/Support/ErrorHandling.h"
25#include "llvm/Support/MathExtras.h"
26#include "llvm/Support/raw_ostream.h"
27#include <climits>
28#include <cmath>
29#include <cstdlib>
30#include <cstring>
31using namespace llvm;
32
33#define DEBUG_TYPE "apint"
34
35/// A utility function for allocating memory, checking for allocation failures,
36/// and ensuring the contents are zeroed.
37inline static uint64_t* getClearedMemory(unsigned numWords) {
38  uint64_t *result = new uint64_t[numWords];
39  memset(result, 0, numWords * sizeof(uint64_t));
40  return result;
41}
42
43/// A utility function for allocating memory and checking for allocation
44/// failure.  The content is not zeroed.
45inline static uint64_t* getMemory(unsigned numWords) {
46  return new uint64_t[numWords];
47}
48
49/// A utility function that converts a character to a digit.
50inline static unsigned getDigit(char cdigit, uint8_t radix) {
51  unsigned r;
52
53  if (radix == 16 || radix == 36) {
54    r = cdigit - '0';
55    if (r <= 9)
56      return r;
57
58    r = cdigit - 'A';
59    if (r <= radix - 11U)
60      return r + 10;
61
62    r = cdigit - 'a';
63    if (r <= radix - 11U)
64      return r + 10;
65
66    radix = 10;
67  }
68
69  r = cdigit - '0';
70  if (r < radix)
71    return r;
72
73  return -1U;
74}
75
76
77void APInt::initSlowCase(uint64_t val, bool isSigned) {
78  U.pVal = getClearedMemory(getNumWords());
79  U.pVal[0] = val;
80  if (isSigned && int64_t(val) < 0)
81    for (unsigned i = 1; i < getNumWords(); ++i)
82      U.pVal[i] = WORDTYPE_MAX;
83  clearUnusedBits();
84}
85
86void APInt::initSlowCase(const APInt& that) {
87  U.pVal = getMemory(getNumWords());
88  memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
89}
90
91void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
92  assert(BitWidth && "Bitwidth too small");
93  assert(bigVal.data() && "Null pointer detected!");
94  if (isSingleWord())
95    U.VAL = bigVal[0];
96  else {
97    // Get memory, cleared to 0
98    U.pVal = getClearedMemory(getNumWords());
99    // Calculate the number of words to copy
100    unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
101    // Copy the words from bigVal to pVal
102    memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
103  }
104  // Make sure unused high bits are cleared
105  clearUnusedBits();
106}
107
108APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
109  : BitWidth(numBits) {
110  initFromArray(bigVal);
111}
112
113APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
114  : BitWidth(numBits) {
115  initFromArray(makeArrayRef(bigVal, numWords));
116}
117
118APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
119  : BitWidth(numbits) {
120  assert(BitWidth && "Bitwidth too small");
121  fromString(numbits, Str, radix);
122}
123
124void APInt::reallocate(unsigned NewBitWidth) {
125  // If the number of words is the same we can just change the width and stop.
126  if (getNumWords() == getNumWords(NewBitWidth)) {
127    BitWidth = NewBitWidth;
128    return;
129  }
130
131  // If we have an allocation, delete it.
132  if (!isSingleWord())
133    delete [] U.pVal;
134
135  // Update BitWidth.
136  BitWidth = NewBitWidth;
137
138  // If we are supposed to have an allocation, create it.
139  if (!isSingleWord())
140    U.pVal = getMemory(getNumWords());
141}
142
143void APInt::AssignSlowCase(const APInt& RHS) {
144  // Don't do anything for X = X
145  if (this == &RHS)
146    return;
147
148  // Adjust the bit width and handle allocations as necessary.
149  reallocate(RHS.getBitWidth());
150
151  // Copy the data.
152  if (isSingleWord())
153    U.VAL = RHS.U.VAL;
154  else
155    memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
156}
157
158/// This method 'profiles' an APInt for use with FoldingSet.
159void APInt::Profile(FoldingSetNodeID& ID) const {
160  ID.AddInteger(BitWidth);
161
162  if (isSingleWord()) {
163    ID.AddInteger(U.VAL);
164    return;
165  }
166
167  unsigned NumWords = getNumWords();
168  for (unsigned i = 0; i < NumWords; ++i)
169    ID.AddInteger(U.pVal[i]);
170}
171
172/// Prefix increment operator. Increments the APInt by one.
173APInt& APInt::operator++() {
174  if (isSingleWord())
175    ++U.VAL;
176  else
177    tcIncrement(U.pVal, getNumWords());
178  return clearUnusedBits();
179}
180
181/// Prefix decrement operator. Decrements the APInt by one.
182APInt& APInt::operator--() {
183  if (isSingleWord())
184    --U.VAL;
185  else
186    tcDecrement(U.pVal, getNumWords());
187  return clearUnusedBits();
188}
189
190/// Adds the RHS APint to this APInt.
191/// @returns this, after addition of RHS.
192/// Addition assignment operator.
193APInt& APInt::operator+=(const APInt& RHS) {
194  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
195  if (isSingleWord())
196    U.VAL += RHS.U.VAL;
197  else
198    tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
199  return clearUnusedBits();
200}
201
202APInt& APInt::operator+=(uint64_t RHS) {
203  if (isSingleWord())
204    U.VAL += RHS;
205  else
206    tcAddPart(U.pVal, RHS, getNumWords());
207  return clearUnusedBits();
208}
209
210/// Subtracts the RHS APInt from this APInt
211/// @returns this, after subtraction
212/// Subtraction assignment operator.
213APInt& APInt::operator-=(const APInt& RHS) {
214  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
215  if (isSingleWord())
216    U.VAL -= RHS.U.VAL;
217  else
218    tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
219  return clearUnusedBits();
220}
221
222APInt& APInt::operator-=(uint64_t RHS) {
223  if (isSingleWord())
224    U.VAL -= RHS;
225  else
226    tcSubtractPart(U.pVal, RHS, getNumWords());
227  return clearUnusedBits();
228}
229
230APInt APInt::operator*(const APInt& RHS) const {
231  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
232  if (isSingleWord())
233    return APInt(BitWidth, U.VAL * RHS.U.VAL);
234
235  APInt Result(getMemory(getNumWords()), getBitWidth());
236
237  tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
238
239  Result.clearUnusedBits();
240  return Result;
241}
242
243void APInt::AndAssignSlowCase(const APInt& RHS) {
244  tcAnd(U.pVal, RHS.U.pVal, getNumWords());
245}
246
247void APInt::OrAssignSlowCase(const APInt& RHS) {
248  tcOr(U.pVal, RHS.U.pVal, getNumWords());
249}
250
251void APInt::XorAssignSlowCase(const APInt& RHS) {
252  tcXor(U.pVal, RHS.U.pVal, getNumWords());
253}
254
255APInt& APInt::operator*=(const APInt& RHS) {
256  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
257  *this = *this * RHS;
258  return *this;
259}
260
261APInt& APInt::operator*=(uint64_t RHS) {
262  if (isSingleWord()) {
263    U.VAL *= RHS;
264  } else {
265    unsigned NumWords = getNumWords();
266    tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
267  }
268  return clearUnusedBits();
269}
270
271bool APInt::EqualSlowCase(const APInt& RHS) const {
272  return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
273}
274
275int APInt::compare(const APInt& RHS) const {
276  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
277  if (isSingleWord())
278    return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
279
280  return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
281}
282
283int APInt::compareSigned(const APInt& RHS) const {
284  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
285  if (isSingleWord()) {
286    int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
287    int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
288    return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
289  }
290
291  bool lhsNeg = isNegative();
292  bool rhsNeg = RHS.isNegative();
293
294  // If the sign bits don't match, then (LHS < RHS) if LHS is negative
295  if (lhsNeg != rhsNeg)
296    return lhsNeg ? -1 : 1;
297
298  // Otherwise we can just use an unsigned comparison, because even negative
299  // numbers compare correctly this way if both have the same signed-ness.
300  return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
301}
302
303void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
304  unsigned loWord = whichWord(loBit);
305  unsigned hiWord = whichWord(hiBit);
306
307  // Create an initial mask for the low word with zeros below loBit.
308  uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
309
310  // If hiBit is not aligned, we need a high mask.
311  unsigned hiShiftAmt = whichBit(hiBit);
312  if (hiShiftAmt != 0) {
313    // Create a high mask with zeros above hiBit.
314    uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
315    // If loWord and hiWord are equal, then we combine the masks. Otherwise,
316    // set the bits in hiWord.
317    if (hiWord == loWord)
318      loMask &= hiMask;
319    else
320      U.pVal[hiWord] |= hiMask;
321  }
322  // Apply the mask to the low word.
323  U.pVal[loWord] |= loMask;
324
325  // Fill any words between loWord and hiWord with all ones.
326  for (unsigned word = loWord + 1; word < hiWord; ++word)
327    U.pVal[word] = WORDTYPE_MAX;
328}
329
330/// Toggle every bit to its opposite value.
331void APInt::flipAllBitsSlowCase() {
332  tcComplement(U.pVal, getNumWords());
333  clearUnusedBits();
334}
335
336/// Toggle a given bit to its opposite value whose position is given
337/// as "bitPosition".
338/// Toggles a given bit to its opposite value.
339void APInt::flipBit(unsigned bitPosition) {
340  assert(bitPosition < BitWidth && "Out of the bit-width range!");
341  if ((*this)[bitPosition]) clearBit(bitPosition);
342  else setBit(bitPosition);
343}
344
345void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
346  unsigned subBitWidth = subBits.getBitWidth();
347  assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
348         "Illegal bit insertion");
349
350  // Insertion is a direct copy.
351  if (subBitWidth == BitWidth) {
352    *this = subBits;
353    return;
354  }
355
356  // Single word result can be done as a direct bitmask.
357  if (isSingleWord()) {
358    uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
359    U.VAL &= ~(mask << bitPosition);
360    U.VAL |= (subBits.U.VAL << bitPosition);
361    return;
362  }
363
364  unsigned loBit = whichBit(bitPosition);
365  unsigned loWord = whichWord(bitPosition);
366  unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
367
368  // Insertion within a single word can be done as a direct bitmask.
369  if (loWord == hi1Word) {
370    uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
371    U.pVal[loWord] &= ~(mask << loBit);
372    U.pVal[loWord] |= (subBits.U.VAL << loBit);
373    return;
374  }
375
376  // Insert on word boundaries.
377  if (loBit == 0) {
378    // Direct copy whole words.
379    unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
380    memcpy(U.pVal + loWord, subBits.getRawData(),
381           numWholeSubWords * APINT_WORD_SIZE);
382
383    // Mask+insert remaining bits.
384    unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
385    if (remainingBits != 0) {
386      uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
387      U.pVal[hi1Word] &= ~mask;
388      U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
389    }
390    return;
391  }
392
393  // General case - set/clear individual bits in dst based on src.
394  // TODO - there is scope for optimization here, but at the moment this code
395  // path is barely used so prefer readability over performance.
396  for (unsigned i = 0; i != subBitWidth; ++i) {
397    if (subBits[i])
398      setBit(bitPosition + i);
399    else
400      clearBit(bitPosition + i);
401  }
402}
403
404APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
405  assert(numBits > 0 && "Can't extract zero bits");
406  assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
407         "Illegal bit extraction");
408
409  if (isSingleWord())
410    return APInt(numBits, U.VAL >> bitPosition);
411
412  unsigned loBit = whichBit(bitPosition);
413  unsigned loWord = whichWord(bitPosition);
414  unsigned hiWord = whichWord(bitPosition + numBits - 1);
415
416  // Single word result extracting bits from a single word source.
417  if (loWord == hiWord)
418    return APInt(numBits, U.pVal[loWord] >> loBit);
419
420  // Extracting bits that start on a source word boundary can be done
421  // as a fast memory copy.
422  if (loBit == 0)
423    return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
424
425  // General case - shift + copy source words directly into place.
426  APInt Result(numBits, 0);
427  unsigned NumSrcWords = getNumWords();
428  unsigned NumDstWords = Result.getNumWords();
429
430  uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
431  for (unsigned word = 0; word < NumDstWords; ++word) {
432    uint64_t w0 = U.pVal[loWord + word];
433    uint64_t w1 =
434        (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
435    DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
436  }
437
438  return Result.clearUnusedBits();
439}
440
441unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
442  assert(!str.empty() && "Invalid string length");
443  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
444          radix == 36) &&
445         "Radix should be 2, 8, 10, 16, or 36!");
446
447  size_t slen = str.size();
448
449  // Each computation below needs to know if it's negative.
450  StringRef::iterator p = str.begin();
451  unsigned isNegative = *p == '-';
452  if (*p == '-' || *p == '+') {
453    p++;
454    slen--;
455    assert(slen && "String is only a sign, needs a value.");
456  }
457
458  // For radixes of power-of-two values, the bits required is accurately and
459  // easily computed
460  if (radix == 2)
461    return slen + isNegative;
462  if (radix == 8)
463    return slen * 3 + isNegative;
464  if (radix == 16)
465    return slen * 4 + isNegative;
466
467  // FIXME: base 36
468
469  // This is grossly inefficient but accurate. We could probably do something
470  // with a computation of roughly slen*64/20 and then adjust by the value of
471  // the first few digits. But, I'm not sure how accurate that could be.
472
473  // Compute a sufficient number of bits that is always large enough but might
474  // be too large. This avoids the assertion in the constructor. This
475  // calculation doesn't work appropriately for the numbers 0-9, so just use 4
476  // bits in that case.
477  unsigned sufficient
478    = radix == 10? (slen == 1 ? 4 : slen * 64/18)
479                 : (slen == 1 ? 7 : slen * 16/3);
480
481  // Convert to the actual binary value.
482  APInt tmp(sufficient, StringRef(p, slen), radix);
483
484  // Compute how many bits are required. If the log is infinite, assume we need
485  // just bit. If the log is exact and value is negative, then the value is
486  // MinSignedValue with (log + 1) bits.
487  unsigned log = tmp.logBase2();
488  if (log == (unsigned)-1) {
489    return isNegative + 1;
490  } else if (isNegative && tmp.isPowerOf2()) {
491    return isNegative + log;
492  } else {
493    return isNegative + log + 1;
494  }
495}
496
497hash_code llvm::hash_value(const APInt &Arg) {
498  if (Arg.isSingleWord())
499    return hash_combine(Arg.U.VAL);
500
501  return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords());
502}
503
504bool APInt::isSplat(unsigned SplatSizeInBits) const {
505  assert(getBitWidth() % SplatSizeInBits == 0 &&
506         "SplatSizeInBits must divide width!");
507  // We can check that all parts of an integer are equal by making use of a
508  // little trick: rotate and check if it's still the same value.
509  return *this == rotl(SplatSizeInBits);
510}
511
512/// This function returns the high "numBits" bits of this APInt.
513APInt APInt::getHiBits(unsigned numBits) const {
514  return this->lshr(BitWidth - numBits);
515}
516
517/// This function returns the low "numBits" bits of this APInt.
518APInt APInt::getLoBits(unsigned numBits) const {
519  APInt Result(getLowBitsSet(BitWidth, numBits));
520  Result &= *this;
521  return Result;
522}
523
524/// Return a value containing V broadcasted over NewLen bits.
525APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
526  assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
527
528  APInt Val = V.zextOrSelf(NewLen);
529  for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
530    Val |= Val << I;
531
532  return Val;
533}
534
535unsigned APInt::countLeadingZerosSlowCase() const {
536  unsigned Count = 0;
537  for (int i = getNumWords()-1; i >= 0; --i) {
538    uint64_t V = U.pVal[i];
539    if (V == 0)
540      Count += APINT_BITS_PER_WORD;
541    else {
542      Count += llvm::countLeadingZeros(V);
543      break;
544    }
545  }
546  // Adjust for unused bits in the most significant word (they are zero).
547  unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
548  Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
549  return Count;
550}
551
552unsigned APInt::countLeadingOnesSlowCase() const {
553  unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
554  unsigned shift;
555  if (!highWordBits) {
556    highWordBits = APINT_BITS_PER_WORD;
557    shift = 0;
558  } else {
559    shift = APINT_BITS_PER_WORD - highWordBits;
560  }
561  int i = getNumWords() - 1;
562  unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
563  if (Count == highWordBits) {
564    for (i--; i >= 0; --i) {
565      if (U.pVal[i] == WORDTYPE_MAX)
566        Count += APINT_BITS_PER_WORD;
567      else {
568        Count += llvm::countLeadingOnes(U.pVal[i]);
569        break;
570      }
571    }
572  }
573  return Count;
574}
575
576unsigned APInt::countTrailingZerosSlowCase() const {
577  unsigned Count = 0;
578  unsigned i = 0;
579  for (; i < getNumWords() && U.pVal[i] == 0; ++i)
580    Count += APINT_BITS_PER_WORD;
581  if (i < getNumWords())
582    Count += llvm::countTrailingZeros(U.pVal[i]);
583  return std::min(Count, BitWidth);
584}
585
586unsigned APInt::countTrailingOnesSlowCase() const {
587  unsigned Count = 0;
588  unsigned i = 0;
589  for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
590    Count += APINT_BITS_PER_WORD;
591  if (i < getNumWords())
592    Count += llvm::countTrailingOnes(U.pVal[i]);
593  assert(Count <= BitWidth);
594  return Count;
595}
596
597unsigned APInt::countPopulationSlowCase() const {
598  unsigned Count = 0;
599  for (unsigned i = 0; i < getNumWords(); ++i)
600    Count += llvm::countPopulation(U.pVal[i]);
601  return Count;
602}
603
604bool APInt::intersectsSlowCase(const APInt &RHS) const {
605  for (unsigned i = 0, e = getNumWords(); i != e; ++i)
606    if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
607      return true;
608
609  return false;
610}
611
612bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
613  for (unsigned i = 0, e = getNumWords(); i != e; ++i)
614    if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
615      return false;
616
617  return true;
618}
619
620APInt APInt::byteSwap() const {
621  assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
622  if (BitWidth == 16)
623    return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
624  if (BitWidth == 32)
625    return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
626  if (BitWidth == 48) {
627    unsigned Tmp1 = unsigned(U.VAL >> 16);
628    Tmp1 = ByteSwap_32(Tmp1);
629    uint16_t Tmp2 = uint16_t(U.VAL);
630    Tmp2 = ByteSwap_16(Tmp2);
631    return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
632  }
633  if (BitWidth == 64)
634    return APInt(BitWidth, ByteSwap_64(U.VAL));
635
636  APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
637  for (unsigned I = 0, N = getNumWords(); I != N; ++I)
638    Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
639  if (Result.BitWidth != BitWidth) {
640    Result.lshrInPlace(Result.BitWidth - BitWidth);
641    Result.BitWidth = BitWidth;
642  }
643  return Result;
644}
645
646APInt APInt::reverseBits() const {
647  switch (BitWidth) {
648  case 64:
649    return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
650  case 32:
651    return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
652  case 16:
653    return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
654  case 8:
655    return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
656  default:
657    break;
658  }
659
660  APInt Val(*this);
661  APInt Reversed(BitWidth, 0);
662  unsigned S = BitWidth;
663
664  for (; Val != 0; Val.lshrInPlace(1)) {
665    Reversed <<= 1;
666    Reversed |= Val[0];
667    --S;
668  }
669
670  Reversed <<= S;
671  return Reversed;
672}
673
674APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
675  // Fast-path a common case.
676  if (A == B) return A;
677
678  // Corner cases: if either operand is zero, the other is the gcd.
679  if (!A) return B;
680  if (!B) return A;
681
682  // Count common powers of 2 and remove all other powers of 2.
683  unsigned Pow2;
684  {
685    unsigned Pow2_A = A.countTrailingZeros();
686    unsigned Pow2_B = B.countTrailingZeros();
687    if (Pow2_A > Pow2_B) {
688      A.lshrInPlace(Pow2_A - Pow2_B);
689      Pow2 = Pow2_B;
690    } else if (Pow2_B > Pow2_A) {
691      B.lshrInPlace(Pow2_B - Pow2_A);
692      Pow2 = Pow2_A;
693    } else {
694      Pow2 = Pow2_A;
695    }
696  }
697
698  // Both operands are odd multiples of 2^Pow_2:
699  //
700  //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
701  //
702  // This is a modified version of Stein's algorithm, taking advantage of
703  // efficient countTrailingZeros().
704  while (A != B) {
705    if (A.ugt(B)) {
706      A -= B;
707      A.lshrInPlace(A.countTrailingZeros() - Pow2);
708    } else {
709      B -= A;
710      B.lshrInPlace(B.countTrailingZeros() - Pow2);
711    }
712  }
713
714  return A;
715}
716
717APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
718  uint64_t I = bit_cast<uint64_t>(Double);
719
720  // Get the sign bit from the highest order bit
721  bool isNeg = I >> 63;
722
723  // Get the 11-bit exponent and adjust for the 1023 bit bias
724  int64_t exp = ((I >> 52) & 0x7ff) - 1023;
725
726  // If the exponent is negative, the value is < 0 so just return 0.
727  if (exp < 0)
728    return APInt(width, 0u);
729
730  // Extract the mantissa by clearing the top 12 bits (sign + exponent).
731  uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
732
733  // If the exponent doesn't shift all bits out of the mantissa
734  if (exp < 52)
735    return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
736                    APInt(width, mantissa >> (52 - exp));
737
738  // If the client didn't provide enough bits for us to shift the mantissa into
739  // then the result is undefined, just return 0
740  if (width <= exp - 52)
741    return APInt(width, 0);
742
743  // Otherwise, we have to shift the mantissa bits up to the right location
744  APInt Tmp(width, mantissa);
745  Tmp <<= (unsigned)exp - 52;
746  return isNeg ? -Tmp : Tmp;
747}
748
749/// This function converts this APInt to a double.
750/// The layout for double is as following (IEEE Standard 754):
751///  --------------------------------------
752/// |  Sign    Exponent    Fraction    Bias |
753/// |-------------------------------------- |
754/// |  1[63]   11[62-52]   52[51-00]   1023 |
755///  --------------------------------------
756double APInt::roundToDouble(bool isSigned) const {
757
758  // Handle the simple case where the value is contained in one uint64_t.
759  // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
760  if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
761    if (isSigned) {
762      int64_t sext = SignExtend64(getWord(0), BitWidth);
763      return double(sext);
764    } else
765      return double(getWord(0));
766  }
767
768  // Determine if the value is negative.
769  bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
770
771  // Construct the absolute value if we're negative.
772  APInt Tmp(isNeg ? -(*this) : (*this));
773
774  // Figure out how many bits we're using.
775  unsigned n = Tmp.getActiveBits();
776
777  // The exponent (without bias normalization) is just the number of bits
778  // we are using. Note that the sign bit is gone since we constructed the
779  // absolute value.
780  uint64_t exp = n;
781
782  // Return infinity for exponent overflow
783  if (exp > 1023) {
784    if (!isSigned || !isNeg)
785      return std::numeric_limits<double>::infinity();
786    else
787      return -std::numeric_limits<double>::infinity();
788  }
789  exp += 1023; // Increment for 1023 bias
790
791  // Number of bits in mantissa is 52. To obtain the mantissa value, we must
792  // extract the high 52 bits from the correct words in pVal.
793  uint64_t mantissa;
794  unsigned hiWord = whichWord(n-1);
795  if (hiWord == 0) {
796    mantissa = Tmp.U.pVal[0];
797    if (n > 52)
798      mantissa >>= n - 52; // shift down, we want the top 52 bits.
799  } else {
800    assert(hiWord > 0 && "huh?");
801    uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
802    uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
803    mantissa = hibits | lobits;
804  }
805
806  // The leading bit of mantissa is implicit, so get rid of it.
807  uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
808  uint64_t I = sign | (exp << 52) | mantissa;
809  return bit_cast<double>(I);
810}
811
812// Truncate to new width.
813APInt APInt::trunc(unsigned width) const {
814  assert(width < BitWidth && "Invalid APInt Truncate request");
815  assert(width && "Can't truncate to 0 bits");
816
817  if (width <= APINT_BITS_PER_WORD)
818    return APInt(width, getRawData()[0]);
819
820  APInt Result(getMemory(getNumWords(width)), width);
821
822  // Copy full words.
823  unsigned i;
824  for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
825    Result.U.pVal[i] = U.pVal[i];
826
827  // Truncate and copy any partial word.
828  unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
829  if (bits != 0)
830    Result.U.pVal[i] = U.pVal[i] << bits >> bits;
831
832  return Result;
833}
834
835// Sign extend to a new width.
836APInt APInt::sext(unsigned Width) const {
837  assert(Width > BitWidth && "Invalid APInt SignExtend request");
838
839  if (Width <= APINT_BITS_PER_WORD)
840    return APInt(Width, SignExtend64(U.VAL, BitWidth));
841
842  APInt Result(getMemory(getNumWords(Width)), Width);
843
844  // Copy words.
845  std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
846
847  // Sign extend the last word since there may be unused bits in the input.
848  Result.U.pVal[getNumWords() - 1] =
849      SignExtend64(Result.U.pVal[getNumWords() - 1],
850                   ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
851
852  // Fill with sign bits.
853  std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
854              (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
855  Result.clearUnusedBits();
856  return Result;
857}
858
859//  Zero extend to a new width.
860APInt APInt::zext(unsigned width) const {
861  assert(width > BitWidth && "Invalid APInt ZeroExtend request");
862
863  if (width <= APINT_BITS_PER_WORD)
864    return APInt(width, U.VAL);
865
866  APInt Result(getMemory(getNumWords(width)), width);
867
868  // Copy words.
869  std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
870
871  // Zero remaining words.
872  std::memset(Result.U.pVal + getNumWords(), 0,
873              (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
874
875  return Result;
876}
877
878APInt APInt::zextOrTrunc(unsigned width) const {
879  if (BitWidth < width)
880    return zext(width);
881  if (BitWidth > width)
882    return trunc(width);
883  return *this;
884}
885
886APInt APInt::sextOrTrunc(unsigned width) const {
887  if (BitWidth < width)
888    return sext(width);
889  if (BitWidth > width)
890    return trunc(width);
891  return *this;
892}
893
894APInt APInt::zextOrSelf(unsigned width) const {
895  if (BitWidth < width)
896    return zext(width);
897  return *this;
898}
899
900APInt APInt::sextOrSelf(unsigned width) const {
901  if (BitWidth < width)
902    return sext(width);
903  return *this;
904}
905
906/// Arithmetic right-shift this APInt by shiftAmt.
907/// Arithmetic right-shift function.
908void APInt::ashrInPlace(const APInt &shiftAmt) {
909  ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
910}
911
912/// Arithmetic right-shift this APInt by shiftAmt.
913/// Arithmetic right-shift function.
914void APInt::ashrSlowCase(unsigned ShiftAmt) {
915  // Don't bother performing a no-op shift.
916  if (!ShiftAmt)
917    return;
918
919  // Save the original sign bit for later.
920  bool Negative = isNegative();
921
922  // WordShift is the inter-part shift; BitShift is intra-part shift.
923  unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
924  unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
925
926  unsigned WordsToMove = getNumWords() - WordShift;
927  if (WordsToMove != 0) {
928    // Sign extend the last word to fill in the unused bits.
929    U.pVal[getNumWords() - 1] = SignExtend64(
930        U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
931
932    // Fastpath for moving by whole words.
933    if (BitShift == 0) {
934      std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
935    } else {
936      // Move the words containing significant bits.
937      for (unsigned i = 0; i != WordsToMove - 1; ++i)
938        U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
939                    (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
940
941      // Handle the last word which has no high bits to copy.
942      U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
943      // Sign extend one more time.
944      U.pVal[WordsToMove - 1] =
945          SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
946    }
947  }
948
949  // Fill in the remainder based on the original sign.
950  std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
951              WordShift * APINT_WORD_SIZE);
952  clearUnusedBits();
953}
954
955/// Logical right-shift this APInt by shiftAmt.
956/// Logical right-shift function.
957void APInt::lshrInPlace(const APInt &shiftAmt) {
958  lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
959}
960
961/// Logical right-shift this APInt by shiftAmt.
962/// Logical right-shift function.
963void APInt::lshrSlowCase(unsigned ShiftAmt) {
964  tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
965}
966
967/// Left-shift this APInt by shiftAmt.
968/// Left-shift function.
969APInt &APInt::operator<<=(const APInt &shiftAmt) {
970  // It's undefined behavior in C to shift by BitWidth or greater.
971  *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
972  return *this;
973}
974
975void APInt::shlSlowCase(unsigned ShiftAmt) {
976  tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
977  clearUnusedBits();
978}
979
980// Calculate the rotate amount modulo the bit width.
981static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
982  unsigned rotBitWidth = rotateAmt.getBitWidth();
983  APInt rot = rotateAmt;
984  if (rotBitWidth < BitWidth) {
985    // Extend the rotate APInt, so that the urem doesn't divide by 0.
986    // e.g. APInt(1, 32) would give APInt(1, 0).
987    rot = rotateAmt.zext(BitWidth);
988  }
989  rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
990  return rot.getLimitedValue(BitWidth);
991}
992
993APInt APInt::rotl(const APInt &rotateAmt) const {
994  return rotl(rotateModulo(BitWidth, rotateAmt));
995}
996
997APInt APInt::rotl(unsigned rotateAmt) const {
998  rotateAmt %= BitWidth;
999  if (rotateAmt == 0)
1000    return *this;
1001  return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1002}
1003
1004APInt APInt::rotr(const APInt &rotateAmt) const {
1005  return rotr(rotateModulo(BitWidth, rotateAmt));
1006}
1007
1008APInt APInt::rotr(unsigned rotateAmt) const {
1009  rotateAmt %= BitWidth;
1010  if (rotateAmt == 0)
1011    return *this;
1012  return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1013}
1014
1015// Square Root - this method computes and returns the square root of "this".
1016// Three mechanisms are used for computation. For small values (<= 5 bits),
1017// a table lookup is done. This gets some performance for common cases. For
1018// values using less than 52 bits, the value is converted to double and then
1019// the libc sqrt function is called. The result is rounded and then converted
1020// back to a uint64_t which is then used to construct the result. Finally,
1021// the Babylonian method for computing square roots is used.
1022APInt APInt::sqrt() const {
1023
1024  // Determine the magnitude of the value.
1025  unsigned magnitude = getActiveBits();
1026
1027  // Use a fast table for some small values. This also gets rid of some
1028  // rounding errors in libc sqrt for small values.
1029  if (magnitude <= 5) {
1030    static const uint8_t results[32] = {
1031      /*     0 */ 0,
1032      /*  1- 2 */ 1, 1,
1033      /*  3- 6 */ 2, 2, 2, 2,
1034      /*  7-12 */ 3, 3, 3, 3, 3, 3,
1035      /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1036      /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1037      /*    31 */ 6
1038    };
1039    return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1040  }
1041
1042  // If the magnitude of the value fits in less than 52 bits (the precision of
1043  // an IEEE double precision floating point value), then we can use the
1044  // libc sqrt function which will probably use a hardware sqrt computation.
1045  // This should be faster than the algorithm below.
1046  if (magnitude < 52) {
1047    return APInt(BitWidth,
1048                 uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1049                                                               : U.pVal[0])))));
1050  }
1051
1052  // Okay, all the short cuts are exhausted. We must compute it. The following
1053  // is a classical Babylonian method for computing the square root. This code
1054  // was adapted to APInt from a wikipedia article on such computations.
1055  // See http://www.wikipedia.org/ and go to the page named
1056  // Calculate_an_integer_square_root.
1057  unsigned nbits = BitWidth, i = 4;
1058  APInt testy(BitWidth, 16);
1059  APInt x_old(BitWidth, 1);
1060  APInt x_new(BitWidth, 0);
1061  APInt two(BitWidth, 2);
1062
1063  // Select a good starting value using binary logarithms.
1064  for (;; i += 2, testy = testy.shl(2))
1065    if (i >= nbits || this->ule(testy)) {
1066      x_old = x_old.shl(i / 2);
1067      break;
1068    }
1069
1070  // Use the Babylonian method to arrive at the integer square root:
1071  for (;;) {
1072    x_new = (this->udiv(x_old) + x_old).udiv(two);
1073    if (x_old.ule(x_new))
1074      break;
1075    x_old = x_new;
1076  }
1077
1078  // Make sure we return the closest approximation
1079  // NOTE: The rounding calculation below is correct. It will produce an
1080  // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1081  // determined to be a rounding issue with pari/gp as it begins to use a
1082  // floating point representation after 192 bits. There are no discrepancies
1083  // between this algorithm and pari/gp for bit widths < 192 bits.
1084  APInt square(x_old * x_old);
1085  APInt nextSquare((x_old + 1) * (x_old +1));
1086  if (this->ult(square))
1087    return x_old;
1088  assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1089  APInt midpoint((nextSquare - square).udiv(two));
1090  APInt offset(*this - square);
1091  if (offset.ult(midpoint))
1092    return x_old;
1093  return x_old + 1;
1094}
1095
1096/// Computes the multiplicative inverse of this APInt for a given modulo. The
1097/// iterative extended Euclidean algorithm is used to solve for this value,
1098/// however we simplify it to speed up calculating only the inverse, and take
1099/// advantage of div+rem calculations. We also use some tricks to avoid copying
1100/// (potentially large) APInts around.
1101/// WARNING: a value of '0' may be returned,
1102///          signifying that no multiplicative inverse exists!
1103APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1104  assert(ult(modulo) && "This APInt must be smaller than the modulo");
1105
1106  // Using the properties listed at the following web page (accessed 06/21/08):
1107  //   http://www.numbertheory.org/php/euclid.html
1108  // (especially the properties numbered 3, 4 and 9) it can be proved that
1109  // BitWidth bits suffice for all the computations in the algorithm implemented
1110  // below. More precisely, this number of bits suffice if the multiplicative
1111  // inverse exists, but may not suffice for the general extended Euclidean
1112  // algorithm.
1113
1114  APInt r[2] = { modulo, *this };
1115  APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1116  APInt q(BitWidth, 0);
1117
1118  unsigned i;
1119  for (i = 0; r[i^1] != 0; i ^= 1) {
1120    // An overview of the math without the confusing bit-flipping:
1121    // q = r[i-2] / r[i-1]
1122    // r[i] = r[i-2] % r[i-1]
1123    // t[i] = t[i-2] - t[i-1] * q
1124    udivrem(r[i], r[i^1], q, r[i]);
1125    t[i] -= t[i^1] * q;
1126  }
1127
1128  // If this APInt and the modulo are not coprime, there is no multiplicative
1129  // inverse, so return 0. We check this by looking at the next-to-last
1130  // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1131  // algorithm.
1132  if (r[i] != 1)
1133    return APInt(BitWidth, 0);
1134
1135  // The next-to-last t is the multiplicative inverse.  However, we are
1136  // interested in a positive inverse. Calculate a positive one from a negative
1137  // one if necessary. A simple addition of the modulo suffices because
1138  // abs(t[i]) is known to be less than *this/2 (see the link above).
1139  if (t[i].isNegative())
1140    t[i] += modulo;
1141
1142  return std::move(t[i]);
1143}
1144
1145/// Calculate the magic numbers required to implement a signed integer division
1146/// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1147/// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1148/// Warren, Jr., chapter 10.
1149APInt::ms APInt::magic() const {
1150  const APInt& d = *this;
1151  unsigned p;
1152  APInt ad, anc, delta, q1, r1, q2, r2, t;
1153  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1154  struct ms mag;
1155
1156  ad = d.abs();
1157  t = signedMin + (d.lshr(d.getBitWidth() - 1));
1158  anc = t - 1 - t.urem(ad);   // absolute value of nc
1159  p = d.getBitWidth() - 1;    // initialize p
1160  q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1161  r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1162  q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1163  r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1164  do {
1165    p = p + 1;
1166    q1 = q1<<1;          // update q1 = 2p/abs(nc)
1167    r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1168    if (r1.uge(anc)) {  // must be unsigned comparison
1169      q1 = q1 + 1;
1170      r1 = r1 - anc;
1171    }
1172    q2 = q2<<1;          // update q2 = 2p/abs(d)
1173    r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1174    if (r2.uge(ad)) {   // must be unsigned comparison
1175      q2 = q2 + 1;
1176      r2 = r2 - ad;
1177    }
1178    delta = ad - r2;
1179  } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1180
1181  mag.m = q2 + 1;
1182  if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1183  mag.s = p - d.getBitWidth();          // resulting shift
1184  return mag;
1185}
1186
1187/// Calculate the magic numbers required to implement an unsigned integer
1188/// division by a constant as a sequence of multiplies, adds and shifts.
1189/// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1190/// S. Warren, Jr., chapter 10.
1191/// LeadingZeros can be used to simplify the calculation if the upper bits
1192/// of the divided value are known zero.
1193APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1194  const APInt& d = *this;
1195  unsigned p;
1196  APInt nc, delta, q1, r1, q2, r2;
1197  struct mu magu;
1198  magu.a = 0;               // initialize "add" indicator
1199  APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1200  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1201  APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1202
1203  nc = allOnes - (allOnes - d).urem(d);
1204  p = d.getBitWidth() - 1;  // initialize p
1205  q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1206  r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1207  q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1208  r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1209  do {
1210    p = p + 1;
1211    if (r1.uge(nc - r1)) {
1212      q1 = q1 + q1 + 1;  // update q1
1213      r1 = r1 + r1 - nc; // update r1
1214    }
1215    else {
1216      q1 = q1+q1; // update q1
1217      r1 = r1+r1; // update r1
1218    }
1219    if ((r2 + 1).uge(d - r2)) {
1220      if (q2.uge(signedMax)) magu.a = 1;
1221      q2 = q2+q2 + 1;     // update q2
1222      r2 = r2+r2 + 1 - d; // update r2
1223    }
1224    else {
1225      if (q2.uge(signedMin)) magu.a = 1;
1226      q2 = q2+q2;     // update q2
1227      r2 = r2+r2 + 1; // update r2
1228    }
1229    delta = d - 1 - r2;
1230  } while (p < d.getBitWidth()*2 &&
1231           (q1.ult(delta) || (q1 == delta && r1 == 0)));
1232  magu.m = q2 + 1; // resulting magic number
1233  magu.s = p - d.getBitWidth();  // resulting shift
1234  return magu;
1235}
1236
1237/// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1238/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1239/// variables here have the same names as in the algorithm. Comments explain
1240/// the algorithm and any deviation from it.
1241static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1242                     unsigned m, unsigned n) {
1243  assert(u && "Must provide dividend");
1244  assert(v && "Must provide divisor");
1245  assert(q && "Must provide quotient");
1246  assert(u != v && u != q && v != q && "Must use different memory");
1247  assert(n>1 && "n must be > 1");
1248
1249  // b denotes the base of the number system. In our case b is 2^32.
1250  const uint64_t b = uint64_t(1) << 32;
1251
1252// The DEBUG macros here tend to be spam in the debug output if you're not
1253// debugging this code. Disable them unless KNUTH_DEBUG is defined.
1254#ifdef KNUTH_DEBUG
1255#define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1256#else
1257#define DEBUG_KNUTH(X) do {} while(false)
1258#endif
1259
1260  DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1261  DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1262  DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1263  DEBUG_KNUTH(dbgs() << " by");
1264  DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1265  DEBUG_KNUTH(dbgs() << '\n');
1266  // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1267  // u and v by d. Note that we have taken Knuth's advice here to use a power
1268  // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1269  // 2 allows us to shift instead of multiply and it is easy to determine the
1270  // shift amount from the leading zeros.  We are basically normalizing the u
1271  // and v so that its high bits are shifted to the top of v's range without
1272  // overflow. Note that this can require an extra word in u so that u must
1273  // be of length m+n+1.
1274  unsigned shift = countLeadingZeros(v[n-1]);
1275  uint32_t v_carry = 0;
1276  uint32_t u_carry = 0;
1277  if (shift) {
1278    for (unsigned i = 0; i < m+n; ++i) {
1279      uint32_t u_tmp = u[i] >> (32 - shift);
1280      u[i] = (u[i] << shift) | u_carry;
1281      u_carry = u_tmp;
1282    }
1283    for (unsigned i = 0; i < n; ++i) {
1284      uint32_t v_tmp = v[i] >> (32 - shift);
1285      v[i] = (v[i] << shift) | v_carry;
1286      v_carry = v_tmp;
1287    }
1288  }
1289  u[m+n] = u_carry;
1290
1291  DEBUG_KNUTH(dbgs() << "KnuthDiv:   normal:");
1292  DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1293  DEBUG_KNUTH(dbgs() << " by");
1294  DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1295  DEBUG_KNUTH(dbgs() << '\n');
1296
1297  // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1298  int j = m;
1299  do {
1300    DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1301    // D3. [Calculate q'.].
1302    //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1303    //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1304    // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1305    // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1306    // on v[n-2] determines at high speed most of the cases in which the trial
1307    // value qp is one too large, and it eliminates all cases where qp is two
1308    // too large.
1309    uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1310    DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1311    uint64_t qp = dividend / v[n-1];
1312    uint64_t rp = dividend % v[n-1];
1313    if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1314      qp--;
1315      rp += v[n-1];
1316      if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1317        qp--;
1318    }
1319    DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1320
1321    // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1322    // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1323    // consists of a simple multiplication by a one-place number, combined with
1324    // a subtraction.
1325    // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1326    // this step is actually negative, (u[j+n]...u[j]) should be left as the
1327    // true value plus b**(n+1), namely as the b's complement of
1328    // the true value, and a "borrow" to the left should be remembered.
1329    int64_t borrow = 0;
1330    for (unsigned i = 0; i < n; ++i) {
1331      uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1332      int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1333      u[j+i] = Lo_32(subres);
1334      borrow = Hi_32(p) - Hi_32(subres);
1335      DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1336                        << ", borrow = " << borrow << '\n');
1337    }
1338    bool isNeg = u[j+n] < borrow;
1339    u[j+n] -= Lo_32(borrow);
1340
1341    DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1342    DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1343    DEBUG_KNUTH(dbgs() << '\n');
1344
1345    // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1346    // negative, go to step D6; otherwise go on to step D7.
1347    q[j] = Lo_32(qp);
1348    if (isNeg) {
1349      // D6. [Add back]. The probability that this step is necessary is very
1350      // small, on the order of only 2/b. Make sure that test data accounts for
1351      // this possibility. Decrease q[j] by 1
1352      q[j]--;
1353      // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1354      // A carry will occur to the left of u[j+n], and it should be ignored
1355      // since it cancels with the borrow that occurred in D4.
1356      bool carry = false;
1357      for (unsigned i = 0; i < n; i++) {
1358        uint32_t limit = std::min(u[j+i],v[i]);
1359        u[j+i] += v[i] + carry;
1360        carry = u[j+i] < limit || (carry && u[j+i] == limit);
1361      }
1362      u[j+n] += carry;
1363    }
1364    DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1365    DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1366    DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1367
1368    // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1369  } while (--j >= 0);
1370
1371  DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1372  DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1373  DEBUG_KNUTH(dbgs() << '\n');
1374
1375  // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1376  // remainder may be obtained by dividing u[...] by d. If r is non-null we
1377  // compute the remainder (urem uses this).
1378  if (r) {
1379    // The value d is expressed by the "shift" value above since we avoided
1380    // multiplication by d by using a shift left. So, all we have to do is
1381    // shift right here.
1382    if (shift) {
1383      uint32_t carry = 0;
1384      DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1385      for (int i = n-1; i >= 0; i--) {
1386        r[i] = (u[i] >> shift) | carry;
1387        carry = u[i] << (32 - shift);
1388        DEBUG_KNUTH(dbgs() << " " << r[i]);
1389      }
1390    } else {
1391      for (int i = n-1; i >= 0; i--) {
1392        r[i] = u[i];
1393        DEBUG_KNUTH(dbgs() << " " << r[i]);
1394      }
1395    }
1396    DEBUG_KNUTH(dbgs() << '\n');
1397  }
1398  DEBUG_KNUTH(dbgs() << '\n');
1399}
1400
1401void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1402                   unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1403  assert(lhsWords >= rhsWords && "Fractional result");
1404
1405  // First, compose the values into an array of 32-bit words instead of
1406  // 64-bit words. This is a necessity of both the "short division" algorithm
1407  // and the Knuth "classical algorithm" which requires there to be native
1408  // operations for +, -, and * on an m bit value with an m*2 bit result. We
1409  // can't use 64-bit operands here because we don't have native results of
1410  // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1411  // work on large-endian machines.
1412  unsigned n = rhsWords * 2;
1413  unsigned m = (lhsWords * 2) - n;
1414
1415  // Allocate space for the temporary values we need either on the stack, if
1416  // it will fit, or on the heap if it won't.
1417  uint32_t SPACE[128];
1418  uint32_t *U = nullptr;
1419  uint32_t *V = nullptr;
1420  uint32_t *Q = nullptr;
1421  uint32_t *R = nullptr;
1422  if ((Remainder?4:3)*n+2*m+1 <= 128) {
1423    U = &SPACE[0];
1424    V = &SPACE[m+n+1];
1425    Q = &SPACE[(m+n+1) + n];
1426    if (Remainder)
1427      R = &SPACE[(m+n+1) + n + (m+n)];
1428  } else {
1429    U = new uint32_t[m + n + 1];
1430    V = new uint32_t[n];
1431    Q = new uint32_t[m+n];
1432    if (Remainder)
1433      R = new uint32_t[n];
1434  }
1435
1436  // Initialize the dividend
1437  memset(U, 0, (m+n+1)*sizeof(uint32_t));
1438  for (unsigned i = 0; i < lhsWords; ++i) {
1439    uint64_t tmp = LHS[i];
1440    U[i * 2] = Lo_32(tmp);
1441    U[i * 2 + 1] = Hi_32(tmp);
1442  }
1443  U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1444
1445  // Initialize the divisor
1446  memset(V, 0, (n)*sizeof(uint32_t));
1447  for (unsigned i = 0; i < rhsWords; ++i) {
1448    uint64_t tmp = RHS[i];
1449    V[i * 2] = Lo_32(tmp);
1450    V[i * 2 + 1] = Hi_32(tmp);
1451  }
1452
1453  // initialize the quotient and remainder
1454  memset(Q, 0, (m+n) * sizeof(uint32_t));
1455  if (Remainder)
1456    memset(R, 0, n * sizeof(uint32_t));
1457
1458  // Now, adjust m and n for the Knuth division. n is the number of words in
1459  // the divisor. m is the number of words by which the dividend exceeds the
1460  // divisor (i.e. m+n is the length of the dividend). These sizes must not
1461  // contain any zero words or the Knuth algorithm fails.
1462  for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1463    n--;
1464    m++;
1465  }
1466  for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1467    m--;
1468
1469  // If we're left with only a single word for the divisor, Knuth doesn't work
1470  // so we implement the short division algorithm here. This is much simpler
1471  // and faster because we are certain that we can divide a 64-bit quantity
1472  // by a 32-bit quantity at hardware speed and short division is simply a
1473  // series of such operations. This is just like doing short division but we
1474  // are using base 2^32 instead of base 10.
1475  assert(n != 0 && "Divide by zero?");
1476  if (n == 1) {
1477    uint32_t divisor = V[0];
1478    uint32_t remainder = 0;
1479    for (int i = m; i >= 0; i--) {
1480      uint64_t partial_dividend = Make_64(remainder, U[i]);
1481      if (partial_dividend == 0) {
1482        Q[i] = 0;
1483        remainder = 0;
1484      } else if (partial_dividend < divisor) {
1485        Q[i] = 0;
1486        remainder = Lo_32(partial_dividend);
1487      } else if (partial_dividend == divisor) {
1488        Q[i] = 1;
1489        remainder = 0;
1490      } else {
1491        Q[i] = Lo_32(partial_dividend / divisor);
1492        remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1493      }
1494    }
1495    if (R)
1496      R[0] = remainder;
1497  } else {
1498    // Now we're ready to invoke the Knuth classical divide algorithm. In this
1499    // case n > 1.
1500    KnuthDiv(U, V, Q, R, m, n);
1501  }
1502
1503  // If the caller wants the quotient
1504  if (Quotient) {
1505    for (unsigned i = 0; i < lhsWords; ++i)
1506      Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1507  }
1508
1509  // If the caller wants the remainder
1510  if (Remainder) {
1511    for (unsigned i = 0; i < rhsWords; ++i)
1512      Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1513  }
1514
1515  // Clean up the memory we allocated.
1516  if (U != &SPACE[0]) {
1517    delete [] U;
1518    delete [] V;
1519    delete [] Q;
1520    delete [] R;
1521  }
1522}
1523
1524APInt APInt::udiv(const APInt &RHS) const {
1525  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1526
1527  // First, deal with the easy case
1528  if (isSingleWord()) {
1529    assert(RHS.U.VAL != 0 && "Divide by zero?");
1530    return APInt(BitWidth, U.VAL / RHS.U.VAL);
1531  }
1532
1533  // Get some facts about the LHS and RHS number of bits and words
1534  unsigned lhsWords = getNumWords(getActiveBits());
1535  unsigned rhsBits  = RHS.getActiveBits();
1536  unsigned rhsWords = getNumWords(rhsBits);
1537  assert(rhsWords && "Divided by zero???");
1538
1539  // Deal with some degenerate cases
1540  if (!lhsWords)
1541    // 0 / X ===> 0
1542    return APInt(BitWidth, 0);
1543  if (rhsBits == 1)
1544    // X / 1 ===> X
1545    return *this;
1546  if (lhsWords < rhsWords || this->ult(RHS))
1547    // X / Y ===> 0, iff X < Y
1548    return APInt(BitWidth, 0);
1549  if (*this == RHS)
1550    // X / X ===> 1
1551    return APInt(BitWidth, 1);
1552  if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1553    // All high words are zero, just use native divide
1554    return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1555
1556  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1557  APInt Quotient(BitWidth, 0); // to hold result.
1558  divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1559  return Quotient;
1560}
1561
1562APInt APInt::udiv(uint64_t RHS) const {
1563  assert(RHS != 0 && "Divide by zero?");
1564
1565  // First, deal with the easy case
1566  if (isSingleWord())
1567    return APInt(BitWidth, U.VAL / RHS);
1568
1569  // Get some facts about the LHS words.
1570  unsigned lhsWords = getNumWords(getActiveBits());
1571
1572  // Deal with some degenerate cases
1573  if (!lhsWords)
1574    // 0 / X ===> 0
1575    return APInt(BitWidth, 0);
1576  if (RHS == 1)
1577    // X / 1 ===> X
1578    return *this;
1579  if (this->ult(RHS))
1580    // X / Y ===> 0, iff X < Y
1581    return APInt(BitWidth, 0);
1582  if (*this == RHS)
1583    // X / X ===> 1
1584    return APInt(BitWidth, 1);
1585  if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1586    // All high words are zero, just use native divide
1587    return APInt(BitWidth, this->U.pVal[0] / RHS);
1588
1589  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1590  APInt Quotient(BitWidth, 0); // to hold result.
1591  divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1592  return Quotient;
1593}
1594
1595APInt APInt::sdiv(const APInt &RHS) const {
1596  if (isNegative()) {
1597    if (RHS.isNegative())
1598      return (-(*this)).udiv(-RHS);
1599    return -((-(*this)).udiv(RHS));
1600  }
1601  if (RHS.isNegative())
1602    return -(this->udiv(-RHS));
1603  return this->udiv(RHS);
1604}
1605
1606APInt APInt::sdiv(int64_t RHS) const {
1607  if (isNegative()) {
1608    if (RHS < 0)
1609      return (-(*this)).udiv(-RHS);
1610    return -((-(*this)).udiv(RHS));
1611  }
1612  if (RHS < 0)
1613    return -(this->udiv(-RHS));
1614  return this->udiv(RHS);
1615}
1616
1617APInt APInt::urem(const APInt &RHS) const {
1618  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1619  if (isSingleWord()) {
1620    assert(RHS.U.VAL != 0 && "Remainder by zero?");
1621    return APInt(BitWidth, U.VAL % RHS.U.VAL);
1622  }
1623
1624  // Get some facts about the LHS
1625  unsigned lhsWords = getNumWords(getActiveBits());
1626
1627  // Get some facts about the RHS
1628  unsigned rhsBits = RHS.getActiveBits();
1629  unsigned rhsWords = getNumWords(rhsBits);
1630  assert(rhsWords && "Performing remainder operation by zero ???");
1631
1632  // Check the degenerate cases
1633  if (lhsWords == 0)
1634    // 0 % Y ===> 0
1635    return APInt(BitWidth, 0);
1636  if (rhsBits == 1)
1637    // X % 1 ===> 0
1638    return APInt(BitWidth, 0);
1639  if (lhsWords < rhsWords || this->ult(RHS))
1640    // X % Y ===> X, iff X < Y
1641    return *this;
1642  if (*this == RHS)
1643    // X % X == 0;
1644    return APInt(BitWidth, 0);
1645  if (lhsWords == 1)
1646    // All high words are zero, just use native remainder
1647    return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1648
1649  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1650  APInt Remainder(BitWidth, 0);
1651  divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1652  return Remainder;
1653}
1654
1655uint64_t APInt::urem(uint64_t RHS) const {
1656  assert(RHS != 0 && "Remainder by zero?");
1657
1658  if (isSingleWord())
1659    return U.VAL % RHS;
1660
1661  // Get some facts about the LHS
1662  unsigned lhsWords = getNumWords(getActiveBits());
1663
1664  // Check the degenerate cases
1665  if (lhsWords == 0)
1666    // 0 % Y ===> 0
1667    return 0;
1668  if (RHS == 1)
1669    // X % 1 ===> 0
1670    return 0;
1671  if (this->ult(RHS))
1672    // X % Y ===> X, iff X < Y
1673    return getZExtValue();
1674  if (*this == RHS)
1675    // X % X == 0;
1676    return 0;
1677  if (lhsWords == 1)
1678    // All high words are zero, just use native remainder
1679    return U.pVal[0] % RHS;
1680
1681  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1682  uint64_t Remainder;
1683  divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1684  return Remainder;
1685}
1686
1687APInt APInt::srem(const APInt &RHS) const {
1688  if (isNegative()) {
1689    if (RHS.isNegative())
1690      return -((-(*this)).urem(-RHS));
1691    return -((-(*this)).urem(RHS));
1692  }
1693  if (RHS.isNegative())
1694    return this->urem(-RHS);
1695  return this->urem(RHS);
1696}
1697
1698int64_t APInt::srem(int64_t RHS) const {
1699  if (isNegative()) {
1700    if (RHS < 0)
1701      return -((-(*this)).urem(-RHS));
1702    return -((-(*this)).urem(RHS));
1703  }
1704  if (RHS < 0)
1705    return this->urem(-RHS);
1706  return this->urem(RHS);
1707}
1708
1709void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1710                    APInt &Quotient, APInt &Remainder) {
1711  assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1712  unsigned BitWidth = LHS.BitWidth;
1713
1714  // First, deal with the easy case
1715  if (LHS.isSingleWord()) {
1716    assert(RHS.U.VAL != 0 && "Divide by zero?");
1717    uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1718    uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1719    Quotient = APInt(BitWidth, QuotVal);
1720    Remainder = APInt(BitWidth, RemVal);
1721    return;
1722  }
1723
1724  // Get some size facts about the dividend and divisor
1725  unsigned lhsWords = getNumWords(LHS.getActiveBits());
1726  unsigned rhsBits  = RHS.getActiveBits();
1727  unsigned rhsWords = getNumWords(rhsBits);
1728  assert(rhsWords && "Performing divrem operation by zero ???");
1729
1730  // Check the degenerate cases
1731  if (lhsWords == 0) {
1732    Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1733    Remainder = APInt(BitWidth, 0);   // 0 % Y ===> 0
1734    return;
1735  }
1736
1737  if (rhsBits == 1) {
1738    Quotient = LHS;                   // X / 1 ===> X
1739    Remainder = APInt(BitWidth, 0);   // X % 1 ===> 0
1740  }
1741
1742  if (lhsWords < rhsWords || LHS.ult(RHS)) {
1743    Remainder = LHS;                  // X % Y ===> X, iff X < Y
1744    Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1745    return;
1746  }
1747
1748  if (LHS == RHS) {
1749    Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1750    Remainder = APInt(BitWidth, 0);   // X % X ===> 0;
1751    return;
1752  }
1753
1754  // Make sure there is enough space to hold the results.
1755  // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1756  // change the size. This is necessary if Quotient or Remainder is aliased
1757  // with LHS or RHS.
1758  Quotient.reallocate(BitWidth);
1759  Remainder.reallocate(BitWidth);
1760
1761  if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1762    // There is only one word to consider so use the native versions.
1763    uint64_t lhsValue = LHS.U.pVal[0];
1764    uint64_t rhsValue = RHS.U.pVal[0];
1765    Quotient = lhsValue / rhsValue;
1766    Remainder = lhsValue % rhsValue;
1767    return;
1768  }
1769
1770  // Okay, lets do it the long way
1771  divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1772         Remainder.U.pVal);
1773  // Clear the rest of the Quotient and Remainder.
1774  std::memset(Quotient.U.pVal + lhsWords, 0,
1775              (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1776  std::memset(Remainder.U.pVal + rhsWords, 0,
1777              (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1778}
1779
1780void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1781                    uint64_t &Remainder) {
1782  assert(RHS != 0 && "Divide by zero?");
1783  unsigned BitWidth = LHS.BitWidth;
1784
1785  // First, deal with the easy case
1786  if (LHS.isSingleWord()) {
1787    uint64_t QuotVal = LHS.U.VAL / RHS;
1788    Remainder = LHS.U.VAL % RHS;
1789    Quotient = APInt(BitWidth, QuotVal);
1790    return;
1791  }
1792
1793  // Get some size facts about the dividend and divisor
1794  unsigned lhsWords = getNumWords(LHS.getActiveBits());
1795
1796  // Check the degenerate cases
1797  if (lhsWords == 0) {
1798    Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1799    Remainder = 0;                    // 0 % Y ===> 0
1800    return;
1801  }
1802
1803  if (RHS == 1) {
1804    Quotient = LHS;                   // X / 1 ===> X
1805    Remainder = 0;                    // X % 1 ===> 0
1806    return;
1807  }
1808
1809  if (LHS.ult(RHS)) {
1810    Remainder = LHS.getZExtValue();   // X % Y ===> X, iff X < Y
1811    Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1812    return;
1813  }
1814
1815  if (LHS == RHS) {
1816    Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1817    Remainder = 0;                    // X % X ===> 0;
1818    return;
1819  }
1820
1821  // Make sure there is enough space to hold the results.
1822  // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1823  // change the size. This is necessary if Quotient is aliased with LHS.
1824  Quotient.reallocate(BitWidth);
1825
1826  if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1827    // There is only one word to consider so use the native versions.
1828    uint64_t lhsValue = LHS.U.pVal[0];
1829    Quotient = lhsValue / RHS;
1830    Remainder = lhsValue % RHS;
1831    return;
1832  }
1833
1834  // Okay, lets do it the long way
1835  divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1836  // Clear the rest of the Quotient.
1837  std::memset(Quotient.U.pVal + lhsWords, 0,
1838              (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1839}
1840
1841void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1842                    APInt &Quotient, APInt &Remainder) {
1843  if (LHS.isNegative()) {
1844    if (RHS.isNegative())
1845      APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1846    else {
1847      APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1848      Quotient.negate();
1849    }
1850    Remainder.negate();
1851  } else if (RHS.isNegative()) {
1852    APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1853    Quotient.negate();
1854  } else {
1855    APInt::udivrem(LHS, RHS, Quotient, Remainder);
1856  }
1857}
1858
1859void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1860                    APInt &Quotient, int64_t &Remainder) {
1861  uint64_t R = Remainder;
1862  if (LHS.isNegative()) {
1863    if (RHS < 0)
1864      APInt::udivrem(-LHS, -RHS, Quotient, R);
1865    else {
1866      APInt::udivrem(-LHS, RHS, Quotient, R);
1867      Quotient.negate();
1868    }
1869    R = -R;
1870  } else if (RHS < 0) {
1871    APInt::udivrem(LHS, -RHS, Quotient, R);
1872    Quotient.negate();
1873  } else {
1874    APInt::udivrem(LHS, RHS, Quotient, R);
1875  }
1876  Remainder = R;
1877}
1878
1879APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1880  APInt Res = *this+RHS;
1881  Overflow = isNonNegative() == RHS.isNonNegative() &&
1882             Res.isNonNegative() != isNonNegative();
1883  return Res;
1884}
1885
1886APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1887  APInt Res = *this+RHS;
1888  Overflow = Res.ult(RHS);
1889  return Res;
1890}
1891
1892APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1893  APInt Res = *this - RHS;
1894  Overflow = isNonNegative() != RHS.isNonNegative() &&
1895             Res.isNonNegative() != isNonNegative();
1896  return Res;
1897}
1898
1899APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1900  APInt Res = *this-RHS;
1901  Overflow = Res.ugt(*this);
1902  return Res;
1903}
1904
1905APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1906  // MININT/-1  -->  overflow.
1907  Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1908  return sdiv(RHS);
1909}
1910
1911APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1912  APInt Res = *this * RHS;
1913
1914  if (*this != 0 && RHS != 0)
1915    Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1916  else
1917    Overflow = false;
1918  return Res;
1919}
1920
1921APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1922  if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
1923    Overflow = true;
1924    return *this * RHS;
1925  }
1926
1927  APInt Res = lshr(1) * RHS;
1928  Overflow = Res.isNegative();
1929  Res <<= 1;
1930  if ((*this)[0]) {
1931    Res += RHS;
1932    if (Res.ult(RHS))
1933      Overflow = true;
1934  }
1935  return Res;
1936}
1937
1938APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
1939  Overflow = ShAmt.uge(getBitWidth());
1940  if (Overflow)
1941    return APInt(BitWidth, 0);
1942
1943  if (isNonNegative()) // Don't allow sign change.
1944    Overflow = ShAmt.uge(countLeadingZeros());
1945  else
1946    Overflow = ShAmt.uge(countLeadingOnes());
1947
1948  return *this << ShAmt;
1949}
1950
1951APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
1952  Overflow = ShAmt.uge(getBitWidth());
1953  if (Overflow)
1954    return APInt(BitWidth, 0);
1955
1956  Overflow = ShAmt.ugt(countLeadingZeros());
1957
1958  return *this << ShAmt;
1959}
1960
1961APInt APInt::sadd_sat(const APInt &RHS) const {
1962  bool Overflow;
1963  APInt Res = sadd_ov(RHS, Overflow);
1964  if (!Overflow)
1965    return Res;
1966
1967  return isNegative() ? APInt::getSignedMinValue(BitWidth)
1968                      : APInt::getSignedMaxValue(BitWidth);
1969}
1970
1971APInt APInt::uadd_sat(const APInt &RHS) const {
1972  bool Overflow;
1973  APInt Res = uadd_ov(RHS, Overflow);
1974  if (!Overflow)
1975    return Res;
1976
1977  return APInt::getMaxValue(BitWidth);
1978}
1979
1980APInt APInt::ssub_sat(const APInt &RHS) const {
1981  bool Overflow;
1982  APInt Res = ssub_ov(RHS, Overflow);
1983  if (!Overflow)
1984    return Res;
1985
1986  return isNegative() ? APInt::getSignedMinValue(BitWidth)
1987                      : APInt::getSignedMaxValue(BitWidth);
1988}
1989
1990APInt APInt::usub_sat(const APInt &RHS) const {
1991  bool Overflow;
1992  APInt Res = usub_ov(RHS, Overflow);
1993  if (!Overflow)
1994    return Res;
1995
1996  return APInt(BitWidth, 0);
1997}
1998
1999
2000void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2001  // Check our assumptions here
2002  assert(!str.empty() && "Invalid string length");
2003  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
2004          radix == 36) &&
2005         "Radix should be 2, 8, 10, 16, or 36!");
2006
2007  StringRef::iterator p = str.begin();
2008  size_t slen = str.size();
2009  bool isNeg = *p == '-';
2010  if (*p == '-' || *p == '+') {
2011    p++;
2012    slen--;
2013    assert(slen && "String is only a sign, needs a value.");
2014  }
2015  assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2016  assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2017  assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2018  assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2019         "Insufficient bit width");
2020
2021  // Allocate memory if needed
2022  if (isSingleWord())
2023    U.VAL = 0;
2024  else
2025    U.pVal = getClearedMemory(getNumWords());
2026
2027  // Figure out if we can shift instead of multiply
2028  unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2029
2030  // Enter digit traversal loop
2031  for (StringRef::iterator e = str.end(); p != e; ++p) {
2032    unsigned digit = getDigit(*p, radix);
2033    assert(digit < radix && "Invalid character in digit string");
2034
2035    // Shift or multiply the value by the radix
2036    if (slen > 1) {
2037      if (shift)
2038        *this <<= shift;
2039      else
2040        *this *= radix;
2041    }
2042
2043    // Add in the digit we just interpreted
2044    *this += digit;
2045  }
2046  // If its negative, put it in two's complement form
2047  if (isNeg)
2048    this->negate();
2049}
2050
2051void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2052                     bool Signed, bool formatAsCLiteral) const {
2053  assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2054          Radix == 36) &&
2055         "Radix should be 2, 8, 10, 16, or 36!");
2056
2057  const char *Prefix = "";
2058  if (formatAsCLiteral) {
2059    switch (Radix) {
2060      case 2:
2061        // Binary literals are a non-standard extension added in gcc 4.3:
2062        // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2063        Prefix = "0b";
2064        break;
2065      case 8:
2066        Prefix = "0";
2067        break;
2068      case 10:
2069        break; // No prefix
2070      case 16:
2071        Prefix = "0x";
2072        break;
2073      default:
2074        llvm_unreachable("Invalid radix!");
2075    }
2076  }
2077
2078  // First, check for a zero value and just short circuit the logic below.
2079  if (*this == 0) {
2080    while (*Prefix) {
2081      Str.push_back(*Prefix);
2082      ++Prefix;
2083    };
2084    Str.push_back('0');
2085    return;
2086  }
2087
2088  static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2089
2090  if (isSingleWord()) {
2091    char Buffer[65];
2092    char *BufPtr = std::end(Buffer);
2093
2094    uint64_t N;
2095    if (!Signed) {
2096      N = getZExtValue();
2097    } else {
2098      int64_t I = getSExtValue();
2099      if (I >= 0) {
2100        N = I;
2101      } else {
2102        Str.push_back('-');
2103        N = -(uint64_t)I;
2104      }
2105    }
2106
2107    while (*Prefix) {
2108      Str.push_back(*Prefix);
2109      ++Prefix;
2110    };
2111
2112    while (N) {
2113      *--BufPtr = Digits[N % Radix];
2114      N /= Radix;
2115    }
2116    Str.append(BufPtr, std::end(Buffer));
2117    return;
2118  }
2119
2120  APInt Tmp(*this);
2121
2122  if (Signed && isNegative()) {
2123    // They want to print the signed version and it is a negative value
2124    // Flip the bits and add one to turn it into the equivalent positive
2125    // value and put a '-' in the result.
2126    Tmp.negate();
2127    Str.push_back('-');
2128  }
2129
2130  while (*Prefix) {
2131    Str.push_back(*Prefix);
2132    ++Prefix;
2133  };
2134
2135  // We insert the digits backward, then reverse them to get the right order.
2136  unsigned StartDig = Str.size();
2137
2138  // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2139  // because the number of bits per digit (1, 3 and 4 respectively) divides
2140  // equally.  We just shift until the value is zero.
2141  if (Radix == 2 || Radix == 8 || Radix == 16) {
2142    // Just shift tmp right for each digit width until it becomes zero
2143    unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2144    unsigned MaskAmt = Radix - 1;
2145
2146    while (Tmp.getBoolValue()) {
2147      unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2148      Str.push_back(Digits[Digit]);
2149      Tmp.lshrInPlace(ShiftAmt);
2150    }
2151  } else {
2152    while (Tmp.getBoolValue()) {
2153      uint64_t Digit;
2154      udivrem(Tmp, Radix, Tmp, Digit);
2155      assert(Digit < Radix && "divide failed");
2156      Str.push_back(Digits[Digit]);
2157    }
2158  }
2159
2160  // Reverse the digits before returning.
2161  std::reverse(Str.begin()+StartDig, Str.end());
2162}
2163
2164/// Returns the APInt as a std::string. Note that this is an inefficient method.
2165/// It is better to pass in a SmallVector/SmallString to the methods above.
2166std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2167  SmallString<40> S;
2168  toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2169  return S.str();
2170}
2171
2172#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2173LLVM_DUMP_METHOD void APInt::dump() const {
2174  SmallString<40> S, U;
2175  this->toStringUnsigned(U);
2176  this->toStringSigned(S);
2177  dbgs() << "APInt(" << BitWidth << "b, "
2178         << U << "u " << S << "s)\n";
2179}
2180#endif
2181
2182void APInt::print(raw_ostream &OS, bool isSigned) const {
2183  SmallString<40> S;
2184  this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2185  OS << S;
2186}
2187
2188// This implements a variety of operations on a representation of
2189// arbitrary precision, two's-complement, bignum integer values.
2190
2191// Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2192// and unrestricting assumption.
2193static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2194              "Part width must be divisible by 2!");
2195
2196/* Some handy functions local to this file.  */
2197
2198/* Returns the integer part with the least significant BITS set.
2199   BITS cannot be zero.  */
2200static inline APInt::WordType lowBitMask(unsigned bits) {
2201  assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2202
2203  return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2204}
2205
2206/* Returns the value of the lower half of PART.  */
2207static inline APInt::WordType lowHalf(APInt::WordType part) {
2208  return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2209}
2210
2211/* Returns the value of the upper half of PART.  */
2212static inline APInt::WordType highHalf(APInt::WordType part) {
2213  return part >> (APInt::APINT_BITS_PER_WORD / 2);
2214}
2215
2216/* Returns the bit number of the most significant set bit of a part.
2217   If the input number has no bits set -1U is returned.  */
2218static unsigned partMSB(APInt::WordType value) {
2219  return findLastSet(value, ZB_Max);
2220}
2221
2222/* Returns the bit number of the least significant set bit of a
2223   part.  If the input number has no bits set -1U is returned.  */
2224static unsigned partLSB(APInt::WordType value) {
2225  return findFirstSet(value, ZB_Max);
2226}
2227
2228/* Sets the least significant part of a bignum to the input value, and
2229   zeroes out higher parts.  */
2230void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2231  assert(parts > 0);
2232
2233  dst[0] = part;
2234  for (unsigned i = 1; i < parts; i++)
2235    dst[i] = 0;
2236}
2237
2238/* Assign one bignum to another.  */
2239void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2240  for (unsigned i = 0; i < parts; i++)
2241    dst[i] = src[i];
2242}
2243
2244/* Returns true if a bignum is zero, false otherwise.  */
2245bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2246  for (unsigned i = 0; i < parts; i++)
2247    if (src[i])
2248      return false;
2249
2250  return true;
2251}
2252
2253/* Extract the given bit of a bignum; returns 0 or 1.  */
2254int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2255  return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2256}
2257
2258/* Set the given bit of a bignum. */
2259void APInt::tcSetBit(WordType *parts, unsigned bit) {
2260  parts[whichWord(bit)] |= maskBit(bit);
2261}
2262
2263/* Clears the given bit of a bignum. */
2264void APInt::tcClearBit(WordType *parts, unsigned bit) {
2265  parts[whichWord(bit)] &= ~maskBit(bit);
2266}
2267
2268/* Returns the bit number of the least significant set bit of a
2269   number.  If the input number has no bits set -1U is returned.  */
2270unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2271  for (unsigned i = 0; i < n; i++) {
2272    if (parts[i] != 0) {
2273      unsigned lsb = partLSB(parts[i]);
2274
2275      return lsb + i * APINT_BITS_PER_WORD;
2276    }
2277  }
2278
2279  return -1U;
2280}
2281
2282/* Returns the bit number of the most significant set bit of a number.
2283   If the input number has no bits set -1U is returned.  */
2284unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2285  do {
2286    --n;
2287
2288    if (parts[n] != 0) {
2289      unsigned msb = partMSB(parts[n]);
2290
2291      return msb + n * APINT_BITS_PER_WORD;
2292    }
2293  } while (n);
2294
2295  return -1U;
2296}
2297
2298/* Copy the bit vector of width srcBITS from SRC, starting at bit
2299   srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2300   the least significant bit of DST.  All high bits above srcBITS in
2301   DST are zero-filled.  */
2302void
2303APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2304                 unsigned srcBits, unsigned srcLSB) {
2305  unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2306  assert(dstParts <= dstCount);
2307
2308  unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2309  tcAssign (dst, src + firstSrcPart, dstParts);
2310
2311  unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2312  tcShiftRight (dst, dstParts, shift);
2313
2314  /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2315     in DST.  If this is less that srcBits, append the rest, else
2316     clear the high bits.  */
2317  unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2318  if (n < srcBits) {
2319    WordType mask = lowBitMask (srcBits - n);
2320    dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2321                          << n % APINT_BITS_PER_WORD);
2322  } else if (n > srcBits) {
2323    if (srcBits % APINT_BITS_PER_WORD)
2324      dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2325  }
2326
2327  /* Clear high parts.  */
2328  while (dstParts < dstCount)
2329    dst[dstParts++] = 0;
2330}
2331
2332/* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2333APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2334                             WordType c, unsigned parts) {
2335  assert(c <= 1);
2336
2337  for (unsigned i = 0; i < parts; i++) {
2338    WordType l = dst[i];
2339    if (c) {
2340      dst[i] += rhs[i] + 1;
2341      c = (dst[i] <= l);
2342    } else {
2343      dst[i] += rhs[i];
2344      c = (dst[i] < l);
2345    }
2346  }
2347
2348  return c;
2349}
2350
2351/// This function adds a single "word" integer, src, to the multiple
2352/// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2353/// 1 is returned if there is a carry out, otherwise 0 is returned.
2354/// @returns the carry of the addition.
2355APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2356                                 unsigned parts) {
2357  for (unsigned i = 0; i < parts; ++i) {
2358    dst[i] += src;
2359    if (dst[i] >= src)
2360      return 0; // No need to carry so exit early.
2361    src = 1; // Carry one to next digit.
2362  }
2363
2364  return 1;
2365}
2366
2367/* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2368APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2369                                  WordType c, unsigned parts) {
2370  assert(c <= 1);
2371
2372  for (unsigned i = 0; i < parts; i++) {
2373    WordType l = dst[i];
2374    if (c) {
2375      dst[i] -= rhs[i] + 1;
2376      c = (dst[i] >= l);
2377    } else {
2378      dst[i] -= rhs[i];
2379      c = (dst[i] > l);
2380    }
2381  }
2382
2383  return c;
2384}
2385
2386/// This function subtracts a single "word" (64-bit word), src, from
2387/// the multi-word integer array, dst[], propagating the borrowed 1 value until
2388/// no further borrowing is needed or it runs out of "words" in dst.  The result
2389/// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2390/// exhausted. In other words, if src > dst then this function returns 1,
2391/// otherwise 0.
2392/// @returns the borrow out of the subtraction
2393APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2394                                      unsigned parts) {
2395  for (unsigned i = 0; i < parts; ++i) {
2396    WordType Dst = dst[i];
2397    dst[i] -= src;
2398    if (src <= Dst)
2399      return 0; // No need to borrow so exit early.
2400    src = 1; // We have to "borrow 1" from next "word"
2401  }
2402
2403  return 1;
2404}
2405
2406/* Negate a bignum in-place.  */
2407void APInt::tcNegate(WordType *dst, unsigned parts) {
2408  tcComplement(dst, parts);
2409  tcIncrement(dst, parts);
2410}
2411
2412/*  DST += SRC * MULTIPLIER + CARRY   if add is true
2413    DST  = SRC * MULTIPLIER + CARRY   if add is false
2414
2415    Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2416    they must start at the same point, i.e. DST == SRC.
2417
2418    If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2419    returned.  Otherwise DST is filled with the least significant
2420    DSTPARTS parts of the result, and if all of the omitted higher
2421    parts were zero return zero, otherwise overflow occurred and
2422    return one.  */
2423int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2424                          WordType multiplier, WordType carry,
2425                          unsigned srcParts, unsigned dstParts,
2426                          bool add) {
2427  /* Otherwise our writes of DST kill our later reads of SRC.  */
2428  assert(dst <= src || dst >= src + srcParts);
2429  assert(dstParts <= srcParts + 1);
2430
2431  /* N loops; minimum of dstParts and srcParts.  */
2432  unsigned n = std::min(dstParts, srcParts);
2433
2434  for (unsigned i = 0; i < n; i++) {
2435    WordType low, mid, high, srcPart;
2436
2437      /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2438
2439         This cannot overflow, because
2440
2441         (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2442
2443         which is less than n^2.  */
2444
2445    srcPart = src[i];
2446
2447    if (multiplier == 0 || srcPart == 0) {
2448      low = carry;
2449      high = 0;
2450    } else {
2451      low = lowHalf(srcPart) * lowHalf(multiplier);
2452      high = highHalf(srcPart) * highHalf(multiplier);
2453
2454      mid = lowHalf(srcPart) * highHalf(multiplier);
2455      high += highHalf(mid);
2456      mid <<= APINT_BITS_PER_WORD / 2;
2457      if (low + mid < low)
2458        high++;
2459      low += mid;
2460
2461      mid = highHalf(srcPart) * lowHalf(multiplier);
2462      high += highHalf(mid);
2463      mid <<= APINT_BITS_PER_WORD / 2;
2464      if (low + mid < low)
2465        high++;
2466      low += mid;
2467
2468      /* Now add carry.  */
2469      if (low + carry < low)
2470        high++;
2471      low += carry;
2472    }
2473
2474    if (add) {
2475      /* And now DST[i], and store the new low part there.  */
2476      if (low + dst[i] < low)
2477        high++;
2478      dst[i] += low;
2479    } else
2480      dst[i] = low;
2481
2482    carry = high;
2483  }
2484
2485  if (srcParts < dstParts) {
2486    /* Full multiplication, there is no overflow.  */
2487    assert(srcParts + 1 == dstParts);
2488    dst[srcParts] = carry;
2489    return 0;
2490  }
2491
2492  /* We overflowed if there is carry.  */
2493  if (carry)
2494    return 1;
2495
2496  /* We would overflow if any significant unwritten parts would be
2497     non-zero.  This is true if any remaining src parts are non-zero
2498     and the multiplier is non-zero.  */
2499  if (multiplier)
2500    for (unsigned i = dstParts; i < srcParts; i++)
2501      if (src[i])
2502        return 1;
2503
2504  /* We fitted in the narrow destination.  */
2505  return 0;
2506}
2507
2508/* DST = LHS * RHS, where DST has the same width as the operands and
2509   is filled with the least significant parts of the result.  Returns
2510   one if overflow occurred, otherwise zero.  DST must be disjoint
2511   from both operands.  */
2512int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2513                      const WordType *rhs, unsigned parts) {
2514  assert(dst != lhs && dst != rhs);
2515
2516  int overflow = 0;
2517  tcSet(dst, 0, parts);
2518
2519  for (unsigned i = 0; i < parts; i++)
2520    overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2521                               parts - i, true);
2522
2523  return overflow;
2524}
2525
2526/// DST = LHS * RHS, where DST has width the sum of the widths of the
2527/// operands. No overflow occurs. DST must be disjoint from both operands.
2528void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2529                           const WordType *rhs, unsigned lhsParts,
2530                           unsigned rhsParts) {
2531  /* Put the narrower number on the LHS for less loops below.  */
2532  if (lhsParts > rhsParts)
2533    return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2534
2535  assert(dst != lhs && dst != rhs);
2536
2537  tcSet(dst, 0, rhsParts);
2538
2539  for (unsigned i = 0; i < lhsParts; i++)
2540    tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2541}
2542
2543/* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2544   Otherwise set LHS to LHS / RHS with the fractional part discarded,
2545   set REMAINDER to the remainder, return zero.  i.e.
2546
2547   OLD_LHS = RHS * LHS + REMAINDER
2548
2549   SCRATCH is a bignum of the same size as the operands and result for
2550   use by the routine; its contents need not be initialized and are
2551   destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2552*/
2553int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2554                    WordType *remainder, WordType *srhs,
2555                    unsigned parts) {
2556  assert(lhs != remainder && lhs != srhs && remainder != srhs);
2557
2558  unsigned shiftCount = tcMSB(rhs, parts) + 1;
2559  if (shiftCount == 0)
2560    return true;
2561
2562  shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2563  unsigned n = shiftCount / APINT_BITS_PER_WORD;
2564  WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2565
2566  tcAssign(srhs, rhs, parts);
2567  tcShiftLeft(srhs, parts, shiftCount);
2568  tcAssign(remainder, lhs, parts);
2569  tcSet(lhs, 0, parts);
2570
2571  /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2572     the total.  */
2573  for (;;) {
2574    int compare = tcCompare(remainder, srhs, parts);
2575    if (compare >= 0) {
2576      tcSubtract(remainder, srhs, 0, parts);
2577      lhs[n] |= mask;
2578    }
2579
2580    if (shiftCount == 0)
2581      break;
2582    shiftCount--;
2583    tcShiftRight(srhs, parts, 1);
2584    if ((mask >>= 1) == 0) {
2585      mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2586      n--;
2587    }
2588  }
2589
2590  return false;
2591}
2592
2593/// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2594/// no restrictions on Count.
2595void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2596  // Don't bother performing a no-op shift.
2597  if (!Count)
2598    return;
2599
2600  // WordShift is the inter-part shift; BitShift is the intra-part shift.
2601  unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2602  unsigned BitShift = Count % APINT_BITS_PER_WORD;
2603
2604  // Fastpath for moving by whole words.
2605  if (BitShift == 0) {
2606    std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2607  } else {
2608    while (Words-- > WordShift) {
2609      Dst[Words] = Dst[Words - WordShift] << BitShift;
2610      if (Words > WordShift)
2611        Dst[Words] |=
2612          Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2613    }
2614  }
2615
2616  // Fill in the remainder with 0s.
2617  std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2618}
2619
2620/// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2621/// are no restrictions on Count.
2622void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2623  // Don't bother performing a no-op shift.
2624  if (!Count)
2625    return;
2626
2627  // WordShift is the inter-part shift; BitShift is the intra-part shift.
2628  unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2629  unsigned BitShift = Count % APINT_BITS_PER_WORD;
2630
2631  unsigned WordsToMove = Words - WordShift;
2632  // Fastpath for moving by whole words.
2633  if (BitShift == 0) {
2634    std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2635  } else {
2636    for (unsigned i = 0; i != WordsToMove; ++i) {
2637      Dst[i] = Dst[i + WordShift] >> BitShift;
2638      if (i + 1 != WordsToMove)
2639        Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2640    }
2641  }
2642
2643  // Fill in the remainder with 0s.
2644  std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2645}
2646
2647/* Bitwise and of two bignums.  */
2648void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2649  for (unsigned i = 0; i < parts; i++)
2650    dst[i] &= rhs[i];
2651}
2652
2653/* Bitwise inclusive or of two bignums.  */
2654void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2655  for (unsigned i = 0; i < parts; i++)
2656    dst[i] |= rhs[i];
2657}
2658
2659/* Bitwise exclusive or of two bignums.  */
2660void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2661  for (unsigned i = 0; i < parts; i++)
2662    dst[i] ^= rhs[i];
2663}
2664
2665/* Complement a bignum in-place.  */
2666void APInt::tcComplement(WordType *dst, unsigned parts) {
2667  for (unsigned i = 0; i < parts; i++)
2668    dst[i] = ~dst[i];
2669}
2670
2671/* Comparison (unsigned) of two bignums.  */
2672int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2673                     unsigned parts) {
2674  while (parts) {
2675    parts--;
2676    if (lhs[parts] != rhs[parts])
2677      return (lhs[parts] > rhs[parts]) ? 1 : -1;
2678  }
2679
2680  return 0;
2681}
2682
2683/* Set the least significant BITS bits of a bignum, clear the
2684   rest.  */
2685void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2686                                      unsigned bits) {
2687  unsigned i = 0;
2688  while (bits > APINT_BITS_PER_WORD) {
2689    dst[i++] = ~(WordType) 0;
2690    bits -= APINT_BITS_PER_WORD;
2691  }
2692
2693  if (bits)
2694    dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2695
2696  while (i < parts)
2697    dst[i++] = 0;
2698}
2699
2700APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2701                                   APInt::Rounding RM) {
2702  // Currently udivrem always rounds down.
2703  switch (RM) {
2704  case APInt::Rounding::DOWN:
2705  case APInt::Rounding::TOWARD_ZERO:
2706    return A.udiv(B);
2707  case APInt::Rounding::UP: {
2708    APInt Quo, Rem;
2709    APInt::udivrem(A, B, Quo, Rem);
2710    if (Rem == 0)
2711      return Quo;
2712    return Quo + 1;
2713  }
2714  }
2715  llvm_unreachable("Unknown APInt::Rounding enum");
2716}
2717
2718APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2719                                   APInt::Rounding RM) {
2720  switch (RM) {
2721  case APInt::Rounding::DOWN:
2722  case APInt::Rounding::UP: {
2723    APInt Quo, Rem;
2724    APInt::sdivrem(A, B, Quo, Rem);
2725    if (Rem == 0)
2726      return Quo;
2727    // This algorithm deals with arbitrary rounding mode used by sdivrem.
2728    // We want to check whether the non-integer part of the mathematical value
2729    // is negative or not. If the non-integer part is negative, we need to round
2730    // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2731    // already rounded down.
2732    if (RM == APInt::Rounding::DOWN) {
2733      if (Rem.isNegative() != B.isNegative())
2734        return Quo - 1;
2735      return Quo;
2736    }
2737    if (Rem.isNegative() != B.isNegative())
2738      return Quo;
2739    return Quo + 1;
2740  }
2741  // Currently sdiv rounds twards zero.
2742  case APInt::Rounding::TOWARD_ZERO:
2743    return A.sdiv(B);
2744  }
2745  llvm_unreachable("Unknown APInt::Rounding enum");
2746}
2747
2748Optional<APInt>
2749llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2750                                           unsigned RangeWidth) {
2751  unsigned CoeffWidth = A.getBitWidth();
2752  assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2753  assert(RangeWidth <= CoeffWidth &&
2754         "Value range width should be less than coefficient width");
2755  assert(RangeWidth > 1 && "Value range bit width should be > 1");
2756
2757  LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2758                    << "x + " << C << ", rw:" << RangeWidth << '\n');
2759
2760  // Identify 0 as a (non)solution immediately.
2761  if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
2762    LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2763    return APInt(CoeffWidth, 0);
2764  }
2765
2766  // The result of APInt arithmetic has the same bit width as the operands,
2767  // so it can actually lose high bits. A product of two n-bit integers needs
2768  // 2n-1 bits to represent the full value.
2769  // The operation done below (on quadratic coefficients) that can produce
2770  // the largest value is the evaluation of the equation during bisection,
2771  // which needs 3 times the bitwidth of the coefficient, so the total number
2772  // of required bits is 3n.
2773  //
2774  // The purpose of this extension is to simulate the set Z of all integers,
2775  // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2776  // and negative numbers (not so much in a modulo arithmetic). The method
2777  // used to solve the equation is based on the standard formula for real
2778  // numbers, and uses the concepts of "positive" and "negative" with their
2779  // usual meanings.
2780  CoeffWidth *= 3;
2781  A = A.sext(CoeffWidth);
2782  B = B.sext(CoeffWidth);
2783  C = C.sext(CoeffWidth);
2784
2785  // Make A > 0 for simplicity. Negate cannot overflow at this point because
2786  // the bit width has increased.
2787  if (A.isNegative()) {
2788    A.negate();
2789    B.negate();
2790    C.negate();
2791  }
2792
2793  // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2794  // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2795  // and R = 2^BitWidth.
2796  // Since we're trying not only to find exact solutions, but also values
2797  // that "wrap around", such a set will always have a solution, i.e. an x
2798  // that satisfies at least one of the equations, or such that |q(x)|
2799  // exceeds kR, while |q(x-1)| for the same k does not.
2800  //
2801  // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2802  // positive solution n (in the above sense), and also such that the n
2803  // will be the least among all solutions corresponding to k = 0, 1, ...
2804  // (more precisely, the least element in the set
2805  //   { n(k) | k is such that a solution n(k) exists }).
2806  //
2807  // Consider the parabola (over real numbers) that corresponds to the
2808  // quadratic equation. Since A > 0, the arms of the parabola will point
2809  // up. Picking different values of k will shift it up and down by R.
2810  //
2811  // We want to shift the parabola in such a way as to reduce the problem
2812  // of solving q(x) = kR to solving shifted_q(x) = 0.
2813  // (The interesting solutions are the ceilings of the real number
2814  // solutions.)
2815  APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2816  APInt TwoA = 2 * A;
2817  APInt SqrB = B * B;
2818  bool PickLow;
2819
2820  auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2821    assert(A.isStrictlyPositive());
2822    APInt T = V.abs().urem(A);
2823    if (T.isNullValue())
2824      return V;
2825    return V.isNegative() ? V+T : V+(A-T);
2826  };
2827
2828  // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2829  // iff B is positive.
2830  if (B.isNonNegative()) {
2831    // If B >= 0, the vertex it at a negative location (or at 0), so in
2832    // order to have a non-negative solution we need to pick k that makes
2833    // C-kR negative. To satisfy all the requirements for the solution
2834    // that we are looking for, it needs to be closest to 0 of all k.
2835    C = C.srem(R);
2836    if (C.isStrictlyPositive())
2837      C -= R;
2838    // Pick the greater solution.
2839    PickLow = false;
2840  } else {
2841    // If B < 0, the vertex is at a positive location. For any solution
2842    // to exist, the discriminant must be non-negative. This means that
2843    // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2844    // lower bound on values of k: kR >= C - B^2/4A.
2845    APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2846    // Round LowkR up (towards +inf) to the nearest kR.
2847    LowkR = RoundUp(LowkR, R);
2848
2849    // If there exists k meeting the condition above, and such that
2850    // C-kR > 0, there will be two positive real number solutions of
2851    // q(x) = kR. Out of all such values of k, pick the one that makes
2852    // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2853    // In other words, find maximum k such that LowkR <= kR < C.
2854    if (C.sgt(LowkR)) {
2855      // If LowkR < C, then such a k is guaranteed to exist because
2856      // LowkR itself is a multiple of R.
2857      C -= -RoundUp(-C, R);      // C = C - RoundDown(C, R)
2858      // Pick the smaller solution.
2859      PickLow = true;
2860    } else {
2861      // If C-kR < 0 for all potential k's, it means that one solution
2862      // will be negative, while the other will be positive. The positive
2863      // solution will shift towards 0 if the parabola is moved up.
2864      // Pick the kR closest to the lower bound (i.e. make C-kR closest
2865      // to 0, or in other words, out of all parabolas that have solutions,
2866      // pick the one that is the farthest "up").
2867      // Since LowkR is itself a multiple of R, simply take C-LowkR.
2868      C -= LowkR;
2869      // Pick the greater solution.
2870      PickLow = false;
2871    }
2872  }
2873
2874  LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2875                    << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2876
2877  APInt D = SqrB - 4*A*C;
2878  assert(D.isNonNegative() && "Negative discriminant");
2879  APInt SQ = D.sqrt();
2880
2881  APInt Q = SQ * SQ;
2882  bool InexactSQ = Q != D;
2883  // The calculated SQ may actually be greater than the exact (non-integer)
2884  // value. If that's the case, decremement SQ to get a value that is lower.
2885  if (Q.sgt(D))
2886    SQ -= 1;
2887
2888  APInt X;
2889  APInt Rem;
2890
2891  // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
2892  // When using the quadratic formula directly, the calculated low root
2893  // may be greater than the exact one, since we would be subtracting SQ.
2894  // To make sure that the calculated root is not greater than the exact
2895  // one, subtract SQ+1 when calculating the low root (for inexact value
2896  // of SQ).
2897  if (PickLow)
2898    APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
2899  else
2900    APInt::sdivrem(-B + SQ, TwoA, X, Rem);
2901
2902  // The updated coefficients should be such that the (exact) solution is
2903  // positive. Since APInt division rounds towards 0, the calculated one
2904  // can be 0, but cannot be negative.
2905  assert(X.isNonNegative() && "Solution should be non-negative");
2906
2907  if (!InexactSQ && Rem.isNullValue()) {
2908    LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
2909    return X;
2910  }
2911
2912  assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
2913  // The exact value of the square root of D should be between SQ and SQ+1.
2914  // This implies that the solution should be between that corresponding to
2915  // SQ (i.e. X) and that corresponding to SQ+1.
2916  //
2917  // The calculated X cannot be greater than the exact (real) solution.
2918  // Actually it must be strictly less than the exact solution, while
2919  // X+1 will be greater than or equal to it.
2920
2921  APInt VX = (A*X + B)*X + C;
2922  APInt VY = VX + TwoA*X + A + B;
2923  bool SignChange = VX.isNegative() != VY.isNegative() ||
2924                    VX.isNullValue() != VY.isNullValue();
2925  // If the sign did not change between X and X+1, X is not a valid solution.
2926  // This could happen when the actual (exact) roots don't have an integer
2927  // between them, so they would both be contained between X and X+1.
2928  if (!SignChange) {
2929    LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
2930    return None;
2931  }
2932
2933  X += 1;
2934  LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
2935  return X;
2936}
2937
2938/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
2939/// with the integer held in IntVal.
2940void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
2941                            unsigned StoreBytes) {
2942  assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
2943  const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
2944
2945  if (sys::IsLittleEndianHost) {
2946    // Little-endian host - the source is ordered from LSB to MSB.  Order the
2947    // destination from LSB to MSB: Do a straight copy.
2948    memcpy(Dst, Src, StoreBytes);
2949  } else {
2950    // Big-endian host - the source is an array of 64 bit words ordered from
2951    // LSW to MSW.  Each word is ordered from MSB to LSB.  Order the destination
2952    // from MSB to LSB: Reverse the word order, but not the bytes in a word.
2953    while (StoreBytes > sizeof(uint64_t)) {
2954      StoreBytes -= sizeof(uint64_t);
2955      // May not be aligned so use memcpy.
2956      memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
2957      Src += sizeof(uint64_t);
2958    }
2959
2960    memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
2961  }
2962}
2963
2964/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
2965/// from Src into IntVal, which is assumed to be wide enough and to hold zero.
2966void llvm::LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
2967  assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
2968  uint8_t *Dst = reinterpret_cast<uint8_t *>(
2969                   const_cast<uint64_t *>(IntVal.getRawData()));
2970
2971  if (sys::IsLittleEndianHost)
2972    // Little-endian host - the destination must be ordered from LSB to MSB.
2973    // The source is ordered from LSB to MSB: Do a straight copy.
2974    memcpy(Dst, Src, LoadBytes);
2975  else {
2976    // Big-endian - the destination is an array of 64 bit words ordered from
2977    // LSW to MSW.  Each word must be ordered from MSB to LSB.  The source is
2978    // ordered from MSB to LSB: Reverse the word order, but not the bytes in
2979    // a word.
2980    while (LoadBytes > sizeof(uint64_t)) {
2981      LoadBytes -= sizeof(uint64_t);
2982      // May not be aligned so use memcpy.
2983      memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
2984      Dst += sizeof(uint64_t);
2985    }
2986
2987    memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
2988  }
2989}
2990