APInt.cpp revision 344779
1//===-- APInt.cpp - Implement APInt class ---------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements a class to represent arbitrary precision integer 11// constant values and provide a variety of arithmetic operations on them. 12// 13//===----------------------------------------------------------------------===// 14 15#include "llvm/ADT/APInt.h" 16#include "llvm/ADT/ArrayRef.h" 17#include "llvm/ADT/FoldingSet.h" 18#include "llvm/ADT/Hashing.h" 19#include "llvm/ADT/Optional.h" 20#include "llvm/ADT/SmallString.h" 21#include "llvm/ADT/StringRef.h" 22#include "llvm/ADT/bit.h" 23#include "llvm/Config/llvm-config.h" 24#include "llvm/Support/Debug.h" 25#include "llvm/Support/ErrorHandling.h" 26#include "llvm/Support/MathExtras.h" 27#include "llvm/Support/raw_ostream.h" 28#include <climits> 29#include <cmath> 30#include <cstdlib> 31#include <cstring> 32using namespace llvm; 33 34#define DEBUG_TYPE "apint" 35 36/// A utility function for allocating memory, checking for allocation failures, 37/// and ensuring the contents are zeroed. 38inline static uint64_t* getClearedMemory(unsigned numWords) { 39 uint64_t *result = new uint64_t[numWords]; 40 memset(result, 0, numWords * sizeof(uint64_t)); 41 return result; 42} 43 44/// A utility function for allocating memory and checking for allocation 45/// failure. The content is not zeroed. 46inline static uint64_t* getMemory(unsigned numWords) { 47 return new uint64_t[numWords]; 48} 49 50/// A utility function that converts a character to a digit. 51inline static unsigned getDigit(char cdigit, uint8_t radix) { 52 unsigned r; 53 54 if (radix == 16 || radix == 36) { 55 r = cdigit - '0'; 56 if (r <= 9) 57 return r; 58 59 r = cdigit - 'A'; 60 if (r <= radix - 11U) 61 return r + 10; 62 63 r = cdigit - 'a'; 64 if (r <= radix - 11U) 65 return r + 10; 66 67 radix = 10; 68 } 69 70 r = cdigit - '0'; 71 if (r < radix) 72 return r; 73 74 return -1U; 75} 76 77 78void APInt::initSlowCase(uint64_t val, bool isSigned) { 79 U.pVal = getClearedMemory(getNumWords()); 80 U.pVal[0] = val; 81 if (isSigned && int64_t(val) < 0) 82 for (unsigned i = 1; i < getNumWords(); ++i) 83 U.pVal[i] = WORDTYPE_MAX; 84 clearUnusedBits(); 85} 86 87void APInt::initSlowCase(const APInt& that) { 88 U.pVal = getMemory(getNumWords()); 89 memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE); 90} 91 92void APInt::initFromArray(ArrayRef<uint64_t> bigVal) { 93 assert(BitWidth && "Bitwidth too small"); 94 assert(bigVal.data() && "Null pointer detected!"); 95 if (isSingleWord()) 96 U.VAL = bigVal[0]; 97 else { 98 // Get memory, cleared to 0 99 U.pVal = getClearedMemory(getNumWords()); 100 // Calculate the number of words to copy 101 unsigned words = std::min<unsigned>(bigVal.size(), getNumWords()); 102 // Copy the words from bigVal to pVal 103 memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE); 104 } 105 // Make sure unused high bits are cleared 106 clearUnusedBits(); 107} 108 109APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal) 110 : BitWidth(numBits) { 111 initFromArray(bigVal); 112} 113 114APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]) 115 : BitWidth(numBits) { 116 initFromArray(makeArrayRef(bigVal, numWords)); 117} 118 119APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix) 120 : BitWidth(numbits) { 121 assert(BitWidth && "Bitwidth too small"); 122 fromString(numbits, Str, radix); 123} 124 125void APInt::reallocate(unsigned NewBitWidth) { 126 // If the number of words is the same we can just change the width and stop. 127 if (getNumWords() == getNumWords(NewBitWidth)) { 128 BitWidth = NewBitWidth; 129 return; 130 } 131 132 // If we have an allocation, delete it. 133 if (!isSingleWord()) 134 delete [] U.pVal; 135 136 // Update BitWidth. 137 BitWidth = NewBitWidth; 138 139 // If we are supposed to have an allocation, create it. 140 if (!isSingleWord()) 141 U.pVal = getMemory(getNumWords()); 142} 143 144void APInt::AssignSlowCase(const APInt& RHS) { 145 // Don't do anything for X = X 146 if (this == &RHS) 147 return; 148 149 // Adjust the bit width and handle allocations as necessary. 150 reallocate(RHS.getBitWidth()); 151 152 // Copy the data. 153 if (isSingleWord()) 154 U.VAL = RHS.U.VAL; 155 else 156 memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE); 157} 158 159/// This method 'profiles' an APInt for use with FoldingSet. 160void APInt::Profile(FoldingSetNodeID& ID) const { 161 ID.AddInteger(BitWidth); 162 163 if (isSingleWord()) { 164 ID.AddInteger(U.VAL); 165 return; 166 } 167 168 unsigned NumWords = getNumWords(); 169 for (unsigned i = 0; i < NumWords; ++i) 170 ID.AddInteger(U.pVal[i]); 171} 172 173/// Prefix increment operator. Increments the APInt by one. 174APInt& APInt::operator++() { 175 if (isSingleWord()) 176 ++U.VAL; 177 else 178 tcIncrement(U.pVal, getNumWords()); 179 return clearUnusedBits(); 180} 181 182/// Prefix decrement operator. Decrements the APInt by one. 183APInt& APInt::operator--() { 184 if (isSingleWord()) 185 --U.VAL; 186 else 187 tcDecrement(U.pVal, getNumWords()); 188 return clearUnusedBits(); 189} 190 191/// Adds the RHS APint to this APInt. 192/// @returns this, after addition of RHS. 193/// Addition assignment operator. 194APInt& APInt::operator+=(const APInt& RHS) { 195 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 196 if (isSingleWord()) 197 U.VAL += RHS.U.VAL; 198 else 199 tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords()); 200 return clearUnusedBits(); 201} 202 203APInt& APInt::operator+=(uint64_t RHS) { 204 if (isSingleWord()) 205 U.VAL += RHS; 206 else 207 tcAddPart(U.pVal, RHS, getNumWords()); 208 return clearUnusedBits(); 209} 210 211/// Subtracts the RHS APInt from this APInt 212/// @returns this, after subtraction 213/// Subtraction assignment operator. 214APInt& APInt::operator-=(const APInt& RHS) { 215 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 216 if (isSingleWord()) 217 U.VAL -= RHS.U.VAL; 218 else 219 tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords()); 220 return clearUnusedBits(); 221} 222 223APInt& APInt::operator-=(uint64_t RHS) { 224 if (isSingleWord()) 225 U.VAL -= RHS; 226 else 227 tcSubtractPart(U.pVal, RHS, getNumWords()); 228 return clearUnusedBits(); 229} 230 231APInt APInt::operator*(const APInt& RHS) const { 232 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 233 if (isSingleWord()) 234 return APInt(BitWidth, U.VAL * RHS.U.VAL); 235 236 APInt Result(getMemory(getNumWords()), getBitWidth()); 237 238 tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords()); 239 240 Result.clearUnusedBits(); 241 return Result; 242} 243 244void APInt::AndAssignSlowCase(const APInt& RHS) { 245 tcAnd(U.pVal, RHS.U.pVal, getNumWords()); 246} 247 248void APInt::OrAssignSlowCase(const APInt& RHS) { 249 tcOr(U.pVal, RHS.U.pVal, getNumWords()); 250} 251 252void APInt::XorAssignSlowCase(const APInt& RHS) { 253 tcXor(U.pVal, RHS.U.pVal, getNumWords()); 254} 255 256APInt& APInt::operator*=(const APInt& RHS) { 257 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 258 *this = *this * RHS; 259 return *this; 260} 261 262APInt& APInt::operator*=(uint64_t RHS) { 263 if (isSingleWord()) { 264 U.VAL *= RHS; 265 } else { 266 unsigned NumWords = getNumWords(); 267 tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false); 268 } 269 return clearUnusedBits(); 270} 271 272bool APInt::EqualSlowCase(const APInt& RHS) const { 273 return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal); 274} 275 276int APInt::compare(const APInt& RHS) const { 277 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); 278 if (isSingleWord()) 279 return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL; 280 281 return tcCompare(U.pVal, RHS.U.pVal, getNumWords()); 282} 283 284int APInt::compareSigned(const APInt& RHS) const { 285 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); 286 if (isSingleWord()) { 287 int64_t lhsSext = SignExtend64(U.VAL, BitWidth); 288 int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth); 289 return lhsSext < rhsSext ? -1 : lhsSext > rhsSext; 290 } 291 292 bool lhsNeg = isNegative(); 293 bool rhsNeg = RHS.isNegative(); 294 295 // If the sign bits don't match, then (LHS < RHS) if LHS is negative 296 if (lhsNeg != rhsNeg) 297 return lhsNeg ? -1 : 1; 298 299 // Otherwise we can just use an unsigned comparison, because even negative 300 // numbers compare correctly this way if both have the same signed-ness. 301 return tcCompare(U.pVal, RHS.U.pVal, getNumWords()); 302} 303 304void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) { 305 unsigned loWord = whichWord(loBit); 306 unsigned hiWord = whichWord(hiBit); 307 308 // Create an initial mask for the low word with zeros below loBit. 309 uint64_t loMask = WORDTYPE_MAX << whichBit(loBit); 310 311 // If hiBit is not aligned, we need a high mask. 312 unsigned hiShiftAmt = whichBit(hiBit); 313 if (hiShiftAmt != 0) { 314 // Create a high mask with zeros above hiBit. 315 uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt); 316 // If loWord and hiWord are equal, then we combine the masks. Otherwise, 317 // set the bits in hiWord. 318 if (hiWord == loWord) 319 loMask &= hiMask; 320 else 321 U.pVal[hiWord] |= hiMask; 322 } 323 // Apply the mask to the low word. 324 U.pVal[loWord] |= loMask; 325 326 // Fill any words between loWord and hiWord with all ones. 327 for (unsigned word = loWord + 1; word < hiWord; ++word) 328 U.pVal[word] = WORDTYPE_MAX; 329} 330 331/// Toggle every bit to its opposite value. 332void APInt::flipAllBitsSlowCase() { 333 tcComplement(U.pVal, getNumWords()); 334 clearUnusedBits(); 335} 336 337/// Toggle a given bit to its opposite value whose position is given 338/// as "bitPosition". 339/// Toggles a given bit to its opposite value. 340void APInt::flipBit(unsigned bitPosition) { 341 assert(bitPosition < BitWidth && "Out of the bit-width range!"); 342 if ((*this)[bitPosition]) clearBit(bitPosition); 343 else setBit(bitPosition); 344} 345 346void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { 347 unsigned subBitWidth = subBits.getBitWidth(); 348 assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth && 349 "Illegal bit insertion"); 350 351 // Insertion is a direct copy. 352 if (subBitWidth == BitWidth) { 353 *this = subBits; 354 return; 355 } 356 357 // Single word result can be done as a direct bitmask. 358 if (isSingleWord()) { 359 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth); 360 U.VAL &= ~(mask << bitPosition); 361 U.VAL |= (subBits.U.VAL << bitPosition); 362 return; 363 } 364 365 unsigned loBit = whichBit(bitPosition); 366 unsigned loWord = whichWord(bitPosition); 367 unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1); 368 369 // Insertion within a single word can be done as a direct bitmask. 370 if (loWord == hi1Word) { 371 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth); 372 U.pVal[loWord] &= ~(mask << loBit); 373 U.pVal[loWord] |= (subBits.U.VAL << loBit); 374 return; 375 } 376 377 // Insert on word boundaries. 378 if (loBit == 0) { 379 // Direct copy whole words. 380 unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD; 381 memcpy(U.pVal + loWord, subBits.getRawData(), 382 numWholeSubWords * APINT_WORD_SIZE); 383 384 // Mask+insert remaining bits. 385 unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD; 386 if (remainingBits != 0) { 387 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits); 388 U.pVal[hi1Word] &= ~mask; 389 U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1); 390 } 391 return; 392 } 393 394 // General case - set/clear individual bits in dst based on src. 395 // TODO - there is scope for optimization here, but at the moment this code 396 // path is barely used so prefer readability over performance. 397 for (unsigned i = 0; i != subBitWidth; ++i) { 398 if (subBits[i]) 399 setBit(bitPosition + i); 400 else 401 clearBit(bitPosition + i); 402 } 403} 404 405APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { 406 assert(numBits > 0 && "Can't extract zero bits"); 407 assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth && 408 "Illegal bit extraction"); 409 410 if (isSingleWord()) 411 return APInt(numBits, U.VAL >> bitPosition); 412 413 unsigned loBit = whichBit(bitPosition); 414 unsigned loWord = whichWord(bitPosition); 415 unsigned hiWord = whichWord(bitPosition + numBits - 1); 416 417 // Single word result extracting bits from a single word source. 418 if (loWord == hiWord) 419 return APInt(numBits, U.pVal[loWord] >> loBit); 420 421 // Extracting bits that start on a source word boundary can be done 422 // as a fast memory copy. 423 if (loBit == 0) 424 return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord)); 425 426 // General case - shift + copy source words directly into place. 427 APInt Result(numBits, 0); 428 unsigned NumSrcWords = getNumWords(); 429 unsigned NumDstWords = Result.getNumWords(); 430 431 uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal; 432 for (unsigned word = 0; word < NumDstWords; ++word) { 433 uint64_t w0 = U.pVal[loWord + word]; 434 uint64_t w1 = 435 (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0; 436 DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit)); 437 } 438 439 return Result.clearUnusedBits(); 440} 441 442unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) { 443 assert(!str.empty() && "Invalid string length"); 444 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || 445 radix == 36) && 446 "Radix should be 2, 8, 10, 16, or 36!"); 447 448 size_t slen = str.size(); 449 450 // Each computation below needs to know if it's negative. 451 StringRef::iterator p = str.begin(); 452 unsigned isNegative = *p == '-'; 453 if (*p == '-' || *p == '+') { 454 p++; 455 slen--; 456 assert(slen && "String is only a sign, needs a value."); 457 } 458 459 // For radixes of power-of-two values, the bits required is accurately and 460 // easily computed 461 if (radix == 2) 462 return slen + isNegative; 463 if (radix == 8) 464 return slen * 3 + isNegative; 465 if (radix == 16) 466 return slen * 4 + isNegative; 467 468 // FIXME: base 36 469 470 // This is grossly inefficient but accurate. We could probably do something 471 // with a computation of roughly slen*64/20 and then adjust by the value of 472 // the first few digits. But, I'm not sure how accurate that could be. 473 474 // Compute a sufficient number of bits that is always large enough but might 475 // be too large. This avoids the assertion in the constructor. This 476 // calculation doesn't work appropriately for the numbers 0-9, so just use 4 477 // bits in that case. 478 unsigned sufficient 479 = radix == 10? (slen == 1 ? 4 : slen * 64/18) 480 : (slen == 1 ? 7 : slen * 16/3); 481 482 // Convert to the actual binary value. 483 APInt tmp(sufficient, StringRef(p, slen), radix); 484 485 // Compute how many bits are required. If the log is infinite, assume we need 486 // just bit. 487 unsigned log = tmp.logBase2(); 488 if (log == (unsigned)-1) { 489 return isNegative + 1; 490 } else { 491 return isNegative + log + 1; 492 } 493} 494 495hash_code llvm::hash_value(const APInt &Arg) { 496 if (Arg.isSingleWord()) 497 return hash_combine(Arg.U.VAL); 498 499 return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords()); 500} 501 502bool APInt::isSplat(unsigned SplatSizeInBits) const { 503 assert(getBitWidth() % SplatSizeInBits == 0 && 504 "SplatSizeInBits must divide width!"); 505 // We can check that all parts of an integer are equal by making use of a 506 // little trick: rotate and check if it's still the same value. 507 return *this == rotl(SplatSizeInBits); 508} 509 510/// This function returns the high "numBits" bits of this APInt. 511APInt APInt::getHiBits(unsigned numBits) const { 512 return this->lshr(BitWidth - numBits); 513} 514 515/// This function returns the low "numBits" bits of this APInt. 516APInt APInt::getLoBits(unsigned numBits) const { 517 APInt Result(getLowBitsSet(BitWidth, numBits)); 518 Result &= *this; 519 return Result; 520} 521 522/// Return a value containing V broadcasted over NewLen bits. 523APInt APInt::getSplat(unsigned NewLen, const APInt &V) { 524 assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!"); 525 526 APInt Val = V.zextOrSelf(NewLen); 527 for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1) 528 Val |= Val << I; 529 530 return Val; 531} 532 533unsigned APInt::countLeadingZerosSlowCase() const { 534 unsigned Count = 0; 535 for (int i = getNumWords()-1; i >= 0; --i) { 536 uint64_t V = U.pVal[i]; 537 if (V == 0) 538 Count += APINT_BITS_PER_WORD; 539 else { 540 Count += llvm::countLeadingZeros(V); 541 break; 542 } 543 } 544 // Adjust for unused bits in the most significant word (they are zero). 545 unsigned Mod = BitWidth % APINT_BITS_PER_WORD; 546 Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0; 547 return Count; 548} 549 550unsigned APInt::countLeadingOnesSlowCase() const { 551 unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD; 552 unsigned shift; 553 if (!highWordBits) { 554 highWordBits = APINT_BITS_PER_WORD; 555 shift = 0; 556 } else { 557 shift = APINT_BITS_PER_WORD - highWordBits; 558 } 559 int i = getNumWords() - 1; 560 unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift); 561 if (Count == highWordBits) { 562 for (i--; i >= 0; --i) { 563 if (U.pVal[i] == WORDTYPE_MAX) 564 Count += APINT_BITS_PER_WORD; 565 else { 566 Count += llvm::countLeadingOnes(U.pVal[i]); 567 break; 568 } 569 } 570 } 571 return Count; 572} 573 574unsigned APInt::countTrailingZerosSlowCase() const { 575 unsigned Count = 0; 576 unsigned i = 0; 577 for (; i < getNumWords() && U.pVal[i] == 0; ++i) 578 Count += APINT_BITS_PER_WORD; 579 if (i < getNumWords()) 580 Count += llvm::countTrailingZeros(U.pVal[i]); 581 return std::min(Count, BitWidth); 582} 583 584unsigned APInt::countTrailingOnesSlowCase() const { 585 unsigned Count = 0; 586 unsigned i = 0; 587 for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i) 588 Count += APINT_BITS_PER_WORD; 589 if (i < getNumWords()) 590 Count += llvm::countTrailingOnes(U.pVal[i]); 591 assert(Count <= BitWidth); 592 return Count; 593} 594 595unsigned APInt::countPopulationSlowCase() const { 596 unsigned Count = 0; 597 for (unsigned i = 0; i < getNumWords(); ++i) 598 Count += llvm::countPopulation(U.pVal[i]); 599 return Count; 600} 601 602bool APInt::intersectsSlowCase(const APInt &RHS) const { 603 for (unsigned i = 0, e = getNumWords(); i != e; ++i) 604 if ((U.pVal[i] & RHS.U.pVal[i]) != 0) 605 return true; 606 607 return false; 608} 609 610bool APInt::isSubsetOfSlowCase(const APInt &RHS) const { 611 for (unsigned i = 0, e = getNumWords(); i != e; ++i) 612 if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0) 613 return false; 614 615 return true; 616} 617 618APInt APInt::byteSwap() const { 619 assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!"); 620 if (BitWidth == 16) 621 return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL))); 622 if (BitWidth == 32) 623 return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL))); 624 if (BitWidth == 48) { 625 unsigned Tmp1 = unsigned(U.VAL >> 16); 626 Tmp1 = ByteSwap_32(Tmp1); 627 uint16_t Tmp2 = uint16_t(U.VAL); 628 Tmp2 = ByteSwap_16(Tmp2); 629 return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1); 630 } 631 if (BitWidth == 64) 632 return APInt(BitWidth, ByteSwap_64(U.VAL)); 633 634 APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0); 635 for (unsigned I = 0, N = getNumWords(); I != N; ++I) 636 Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]); 637 if (Result.BitWidth != BitWidth) { 638 Result.lshrInPlace(Result.BitWidth - BitWidth); 639 Result.BitWidth = BitWidth; 640 } 641 return Result; 642} 643 644APInt APInt::reverseBits() const { 645 switch (BitWidth) { 646 case 64: 647 return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL)); 648 case 32: 649 return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL)); 650 case 16: 651 return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL)); 652 case 8: 653 return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL)); 654 default: 655 break; 656 } 657 658 APInt Val(*this); 659 APInt Reversed(BitWidth, 0); 660 unsigned S = BitWidth; 661 662 for (; Val != 0; Val.lshrInPlace(1)) { 663 Reversed <<= 1; 664 Reversed |= Val[0]; 665 --S; 666 } 667 668 Reversed <<= S; 669 return Reversed; 670} 671 672APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) { 673 // Fast-path a common case. 674 if (A == B) return A; 675 676 // Corner cases: if either operand is zero, the other is the gcd. 677 if (!A) return B; 678 if (!B) return A; 679 680 // Count common powers of 2 and remove all other powers of 2. 681 unsigned Pow2; 682 { 683 unsigned Pow2_A = A.countTrailingZeros(); 684 unsigned Pow2_B = B.countTrailingZeros(); 685 if (Pow2_A > Pow2_B) { 686 A.lshrInPlace(Pow2_A - Pow2_B); 687 Pow2 = Pow2_B; 688 } else if (Pow2_B > Pow2_A) { 689 B.lshrInPlace(Pow2_B - Pow2_A); 690 Pow2 = Pow2_A; 691 } else { 692 Pow2 = Pow2_A; 693 } 694 } 695 696 // Both operands are odd multiples of 2^Pow_2: 697 // 698 // gcd(a, b) = gcd(|a - b| / 2^i, min(a, b)) 699 // 700 // This is a modified version of Stein's algorithm, taking advantage of 701 // efficient countTrailingZeros(). 702 while (A != B) { 703 if (A.ugt(B)) { 704 A -= B; 705 A.lshrInPlace(A.countTrailingZeros() - Pow2); 706 } else { 707 B -= A; 708 B.lshrInPlace(B.countTrailingZeros() - Pow2); 709 } 710 } 711 712 return A; 713} 714 715APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) { 716 uint64_t I = bit_cast<uint64_t>(Double); 717 718 // Get the sign bit from the highest order bit 719 bool isNeg = I >> 63; 720 721 // Get the 11-bit exponent and adjust for the 1023 bit bias 722 int64_t exp = ((I >> 52) & 0x7ff) - 1023; 723 724 // If the exponent is negative, the value is < 0 so just return 0. 725 if (exp < 0) 726 return APInt(width, 0u); 727 728 // Extract the mantissa by clearing the top 12 bits (sign + exponent). 729 uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52; 730 731 // If the exponent doesn't shift all bits out of the mantissa 732 if (exp < 52) 733 return isNeg ? -APInt(width, mantissa >> (52 - exp)) : 734 APInt(width, mantissa >> (52 - exp)); 735 736 // If the client didn't provide enough bits for us to shift the mantissa into 737 // then the result is undefined, just return 0 738 if (width <= exp - 52) 739 return APInt(width, 0); 740 741 // Otherwise, we have to shift the mantissa bits up to the right location 742 APInt Tmp(width, mantissa); 743 Tmp <<= (unsigned)exp - 52; 744 return isNeg ? -Tmp : Tmp; 745} 746 747/// This function converts this APInt to a double. 748/// The layout for double is as following (IEEE Standard 754): 749/// -------------------------------------- 750/// | Sign Exponent Fraction Bias | 751/// |-------------------------------------- | 752/// | 1[63] 11[62-52] 52[51-00] 1023 | 753/// -------------------------------------- 754double APInt::roundToDouble(bool isSigned) const { 755 756 // Handle the simple case where the value is contained in one uint64_t. 757 // It is wrong to optimize getWord(0) to VAL; there might be more than one word. 758 if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) { 759 if (isSigned) { 760 int64_t sext = SignExtend64(getWord(0), BitWidth); 761 return double(sext); 762 } else 763 return double(getWord(0)); 764 } 765 766 // Determine if the value is negative. 767 bool isNeg = isSigned ? (*this)[BitWidth-1] : false; 768 769 // Construct the absolute value if we're negative. 770 APInt Tmp(isNeg ? -(*this) : (*this)); 771 772 // Figure out how many bits we're using. 773 unsigned n = Tmp.getActiveBits(); 774 775 // The exponent (without bias normalization) is just the number of bits 776 // we are using. Note that the sign bit is gone since we constructed the 777 // absolute value. 778 uint64_t exp = n; 779 780 // Return infinity for exponent overflow 781 if (exp > 1023) { 782 if (!isSigned || !isNeg) 783 return std::numeric_limits<double>::infinity(); 784 else 785 return -std::numeric_limits<double>::infinity(); 786 } 787 exp += 1023; // Increment for 1023 bias 788 789 // Number of bits in mantissa is 52. To obtain the mantissa value, we must 790 // extract the high 52 bits from the correct words in pVal. 791 uint64_t mantissa; 792 unsigned hiWord = whichWord(n-1); 793 if (hiWord == 0) { 794 mantissa = Tmp.U.pVal[0]; 795 if (n > 52) 796 mantissa >>= n - 52; // shift down, we want the top 52 bits. 797 } else { 798 assert(hiWord > 0 && "huh?"); 799 uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD); 800 uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD); 801 mantissa = hibits | lobits; 802 } 803 804 // The leading bit of mantissa is implicit, so get rid of it. 805 uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0; 806 uint64_t I = sign | (exp << 52) | mantissa; 807 return bit_cast<double>(I); 808} 809 810// Truncate to new width. 811APInt APInt::trunc(unsigned width) const { 812 assert(width < BitWidth && "Invalid APInt Truncate request"); 813 assert(width && "Can't truncate to 0 bits"); 814 815 if (width <= APINT_BITS_PER_WORD) 816 return APInt(width, getRawData()[0]); 817 818 APInt Result(getMemory(getNumWords(width)), width); 819 820 // Copy full words. 821 unsigned i; 822 for (i = 0; i != width / APINT_BITS_PER_WORD; i++) 823 Result.U.pVal[i] = U.pVal[i]; 824 825 // Truncate and copy any partial word. 826 unsigned bits = (0 - width) % APINT_BITS_PER_WORD; 827 if (bits != 0) 828 Result.U.pVal[i] = U.pVal[i] << bits >> bits; 829 830 return Result; 831} 832 833// Sign extend to a new width. 834APInt APInt::sext(unsigned Width) const { 835 assert(Width > BitWidth && "Invalid APInt SignExtend request"); 836 837 if (Width <= APINT_BITS_PER_WORD) 838 return APInt(Width, SignExtend64(U.VAL, BitWidth)); 839 840 APInt Result(getMemory(getNumWords(Width)), Width); 841 842 // Copy words. 843 std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); 844 845 // Sign extend the last word since there may be unused bits in the input. 846 Result.U.pVal[getNumWords() - 1] = 847 SignExtend64(Result.U.pVal[getNumWords() - 1], 848 ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); 849 850 // Fill with sign bits. 851 std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0, 852 (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); 853 Result.clearUnusedBits(); 854 return Result; 855} 856 857// Zero extend to a new width. 858APInt APInt::zext(unsigned width) const { 859 assert(width > BitWidth && "Invalid APInt ZeroExtend request"); 860 861 if (width <= APINT_BITS_PER_WORD) 862 return APInt(width, U.VAL); 863 864 APInt Result(getMemory(getNumWords(width)), width); 865 866 // Copy words. 867 std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); 868 869 // Zero remaining words. 870 std::memset(Result.U.pVal + getNumWords(), 0, 871 (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); 872 873 return Result; 874} 875 876APInt APInt::zextOrTrunc(unsigned width) const { 877 if (BitWidth < width) 878 return zext(width); 879 if (BitWidth > width) 880 return trunc(width); 881 return *this; 882} 883 884APInt APInt::sextOrTrunc(unsigned width) const { 885 if (BitWidth < width) 886 return sext(width); 887 if (BitWidth > width) 888 return trunc(width); 889 return *this; 890} 891 892APInt APInt::zextOrSelf(unsigned width) const { 893 if (BitWidth < width) 894 return zext(width); 895 return *this; 896} 897 898APInt APInt::sextOrSelf(unsigned width) const { 899 if (BitWidth < width) 900 return sext(width); 901 return *this; 902} 903 904/// Arithmetic right-shift this APInt by shiftAmt. 905/// Arithmetic right-shift function. 906void APInt::ashrInPlace(const APInt &shiftAmt) { 907 ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth)); 908} 909 910/// Arithmetic right-shift this APInt by shiftAmt. 911/// Arithmetic right-shift function. 912void APInt::ashrSlowCase(unsigned ShiftAmt) { 913 // Don't bother performing a no-op shift. 914 if (!ShiftAmt) 915 return; 916 917 // Save the original sign bit for later. 918 bool Negative = isNegative(); 919 920 // WordShift is the inter-part shift; BitShift is intra-part shift. 921 unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD; 922 unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD; 923 924 unsigned WordsToMove = getNumWords() - WordShift; 925 if (WordsToMove != 0) { 926 // Sign extend the last word to fill in the unused bits. 927 U.pVal[getNumWords() - 1] = SignExtend64( 928 U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); 929 930 // Fastpath for moving by whole words. 931 if (BitShift == 0) { 932 std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE); 933 } else { 934 // Move the words containing significant bits. 935 for (unsigned i = 0; i != WordsToMove - 1; ++i) 936 U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) | 937 (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift)); 938 939 // Handle the last word which has no high bits to copy. 940 U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift; 941 // Sign extend one more time. 942 U.pVal[WordsToMove - 1] = 943 SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift); 944 } 945 } 946 947 // Fill in the remainder based on the original sign. 948 std::memset(U.pVal + WordsToMove, Negative ? -1 : 0, 949 WordShift * APINT_WORD_SIZE); 950 clearUnusedBits(); 951} 952 953/// Logical right-shift this APInt by shiftAmt. 954/// Logical right-shift function. 955void APInt::lshrInPlace(const APInt &shiftAmt) { 956 lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth)); 957} 958 959/// Logical right-shift this APInt by shiftAmt. 960/// Logical right-shift function. 961void APInt::lshrSlowCase(unsigned ShiftAmt) { 962 tcShiftRight(U.pVal, getNumWords(), ShiftAmt); 963} 964 965/// Left-shift this APInt by shiftAmt. 966/// Left-shift function. 967APInt &APInt::operator<<=(const APInt &shiftAmt) { 968 // It's undefined behavior in C to shift by BitWidth or greater. 969 *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth); 970 return *this; 971} 972 973void APInt::shlSlowCase(unsigned ShiftAmt) { 974 tcShiftLeft(U.pVal, getNumWords(), ShiftAmt); 975 clearUnusedBits(); 976} 977 978// Calculate the rotate amount modulo the bit width. 979static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) { 980 unsigned rotBitWidth = rotateAmt.getBitWidth(); 981 APInt rot = rotateAmt; 982 if (rotBitWidth < BitWidth) { 983 // Extend the rotate APInt, so that the urem doesn't divide by 0. 984 // e.g. APInt(1, 32) would give APInt(1, 0). 985 rot = rotateAmt.zext(BitWidth); 986 } 987 rot = rot.urem(APInt(rot.getBitWidth(), BitWidth)); 988 return rot.getLimitedValue(BitWidth); 989} 990 991APInt APInt::rotl(const APInt &rotateAmt) const { 992 return rotl(rotateModulo(BitWidth, rotateAmt)); 993} 994 995APInt APInt::rotl(unsigned rotateAmt) const { 996 rotateAmt %= BitWidth; 997 if (rotateAmt == 0) 998 return *this; 999 return shl(rotateAmt) | lshr(BitWidth - rotateAmt); 1000} 1001 1002APInt APInt::rotr(const APInt &rotateAmt) const { 1003 return rotr(rotateModulo(BitWidth, rotateAmt)); 1004} 1005 1006APInt APInt::rotr(unsigned rotateAmt) const { 1007 rotateAmt %= BitWidth; 1008 if (rotateAmt == 0) 1009 return *this; 1010 return lshr(rotateAmt) | shl(BitWidth - rotateAmt); 1011} 1012 1013// Square Root - this method computes and returns the square root of "this". 1014// Three mechanisms are used for computation. For small values (<= 5 bits), 1015// a table lookup is done. This gets some performance for common cases. For 1016// values using less than 52 bits, the value is converted to double and then 1017// the libc sqrt function is called. The result is rounded and then converted 1018// back to a uint64_t which is then used to construct the result. Finally, 1019// the Babylonian method for computing square roots is used. 1020APInt APInt::sqrt() const { 1021 1022 // Determine the magnitude of the value. 1023 unsigned magnitude = getActiveBits(); 1024 1025 // Use a fast table for some small values. This also gets rid of some 1026 // rounding errors in libc sqrt for small values. 1027 if (magnitude <= 5) { 1028 static const uint8_t results[32] = { 1029 /* 0 */ 0, 1030 /* 1- 2 */ 1, 1, 1031 /* 3- 6 */ 2, 2, 2, 2, 1032 /* 7-12 */ 3, 3, 3, 3, 3, 3, 1033 /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4, 1034 /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1035 /* 31 */ 6 1036 }; 1037 return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]); 1038 } 1039 1040 // If the magnitude of the value fits in less than 52 bits (the precision of 1041 // an IEEE double precision floating point value), then we can use the 1042 // libc sqrt function which will probably use a hardware sqrt computation. 1043 // This should be faster than the algorithm below. 1044 if (magnitude < 52) { 1045 return APInt(BitWidth, 1046 uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL 1047 : U.pVal[0]))))); 1048 } 1049 1050 // Okay, all the short cuts are exhausted. We must compute it. The following 1051 // is a classical Babylonian method for computing the square root. This code 1052 // was adapted to APInt from a wikipedia article on such computations. 1053 // See http://www.wikipedia.org/ and go to the page named 1054 // Calculate_an_integer_square_root. 1055 unsigned nbits = BitWidth, i = 4; 1056 APInt testy(BitWidth, 16); 1057 APInt x_old(BitWidth, 1); 1058 APInt x_new(BitWidth, 0); 1059 APInt two(BitWidth, 2); 1060 1061 // Select a good starting value using binary logarithms. 1062 for (;; i += 2, testy = testy.shl(2)) 1063 if (i >= nbits || this->ule(testy)) { 1064 x_old = x_old.shl(i / 2); 1065 break; 1066 } 1067 1068 // Use the Babylonian method to arrive at the integer square root: 1069 for (;;) { 1070 x_new = (this->udiv(x_old) + x_old).udiv(two); 1071 if (x_old.ule(x_new)) 1072 break; 1073 x_old = x_new; 1074 } 1075 1076 // Make sure we return the closest approximation 1077 // NOTE: The rounding calculation below is correct. It will produce an 1078 // off-by-one discrepancy with results from pari/gp. That discrepancy has been 1079 // determined to be a rounding issue with pari/gp as it begins to use a 1080 // floating point representation after 192 bits. There are no discrepancies 1081 // between this algorithm and pari/gp for bit widths < 192 bits. 1082 APInt square(x_old * x_old); 1083 APInt nextSquare((x_old + 1) * (x_old +1)); 1084 if (this->ult(square)) 1085 return x_old; 1086 assert(this->ule(nextSquare) && "Error in APInt::sqrt computation"); 1087 APInt midpoint((nextSquare - square).udiv(two)); 1088 APInt offset(*this - square); 1089 if (offset.ult(midpoint)) 1090 return x_old; 1091 return x_old + 1; 1092} 1093 1094/// Computes the multiplicative inverse of this APInt for a given modulo. The 1095/// iterative extended Euclidean algorithm is used to solve for this value, 1096/// however we simplify it to speed up calculating only the inverse, and take 1097/// advantage of div+rem calculations. We also use some tricks to avoid copying 1098/// (potentially large) APInts around. 1099APInt APInt::multiplicativeInverse(const APInt& modulo) const { 1100 assert(ult(modulo) && "This APInt must be smaller than the modulo"); 1101 1102 // Using the properties listed at the following web page (accessed 06/21/08): 1103 // http://www.numbertheory.org/php/euclid.html 1104 // (especially the properties numbered 3, 4 and 9) it can be proved that 1105 // BitWidth bits suffice for all the computations in the algorithm implemented 1106 // below. More precisely, this number of bits suffice if the multiplicative 1107 // inverse exists, but may not suffice for the general extended Euclidean 1108 // algorithm. 1109 1110 APInt r[2] = { modulo, *this }; 1111 APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) }; 1112 APInt q(BitWidth, 0); 1113 1114 unsigned i; 1115 for (i = 0; r[i^1] != 0; i ^= 1) { 1116 // An overview of the math without the confusing bit-flipping: 1117 // q = r[i-2] / r[i-1] 1118 // r[i] = r[i-2] % r[i-1] 1119 // t[i] = t[i-2] - t[i-1] * q 1120 udivrem(r[i], r[i^1], q, r[i]); 1121 t[i] -= t[i^1] * q; 1122 } 1123 1124 // If this APInt and the modulo are not coprime, there is no multiplicative 1125 // inverse, so return 0. We check this by looking at the next-to-last 1126 // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean 1127 // algorithm. 1128 if (r[i] != 1) 1129 return APInt(BitWidth, 0); 1130 1131 // The next-to-last t is the multiplicative inverse. However, we are 1132 // interested in a positive inverse. Calculate a positive one from a negative 1133 // one if necessary. A simple addition of the modulo suffices because 1134 // abs(t[i]) is known to be less than *this/2 (see the link above). 1135 if (t[i].isNegative()) 1136 t[i] += modulo; 1137 1138 return std::move(t[i]); 1139} 1140 1141/// Calculate the magic numbers required to implement a signed integer division 1142/// by a constant as a sequence of multiplies, adds and shifts. Requires that 1143/// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S. 1144/// Warren, Jr., chapter 10. 1145APInt::ms APInt::magic() const { 1146 const APInt& d = *this; 1147 unsigned p; 1148 APInt ad, anc, delta, q1, r1, q2, r2, t; 1149 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); 1150 struct ms mag; 1151 1152 ad = d.abs(); 1153 t = signedMin + (d.lshr(d.getBitWidth() - 1)); 1154 anc = t - 1 - t.urem(ad); // absolute value of nc 1155 p = d.getBitWidth() - 1; // initialize p 1156 q1 = signedMin.udiv(anc); // initialize q1 = 2p/abs(nc) 1157 r1 = signedMin - q1*anc; // initialize r1 = rem(2p,abs(nc)) 1158 q2 = signedMin.udiv(ad); // initialize q2 = 2p/abs(d) 1159 r2 = signedMin - q2*ad; // initialize r2 = rem(2p,abs(d)) 1160 do { 1161 p = p + 1; 1162 q1 = q1<<1; // update q1 = 2p/abs(nc) 1163 r1 = r1<<1; // update r1 = rem(2p/abs(nc)) 1164 if (r1.uge(anc)) { // must be unsigned comparison 1165 q1 = q1 + 1; 1166 r1 = r1 - anc; 1167 } 1168 q2 = q2<<1; // update q2 = 2p/abs(d) 1169 r2 = r2<<1; // update r2 = rem(2p/abs(d)) 1170 if (r2.uge(ad)) { // must be unsigned comparison 1171 q2 = q2 + 1; 1172 r2 = r2 - ad; 1173 } 1174 delta = ad - r2; 1175 } while (q1.ult(delta) || (q1 == delta && r1 == 0)); 1176 1177 mag.m = q2 + 1; 1178 if (d.isNegative()) mag.m = -mag.m; // resulting magic number 1179 mag.s = p - d.getBitWidth(); // resulting shift 1180 return mag; 1181} 1182 1183/// Calculate the magic numbers required to implement an unsigned integer 1184/// division by a constant as a sequence of multiplies, adds and shifts. 1185/// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry 1186/// S. Warren, Jr., chapter 10. 1187/// LeadingZeros can be used to simplify the calculation if the upper bits 1188/// of the divided value are known zero. 1189APInt::mu APInt::magicu(unsigned LeadingZeros) const { 1190 const APInt& d = *this; 1191 unsigned p; 1192 APInt nc, delta, q1, r1, q2, r2; 1193 struct mu magu; 1194 magu.a = 0; // initialize "add" indicator 1195 APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros); 1196 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); 1197 APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth()); 1198 1199 nc = allOnes - (allOnes - d).urem(d); 1200 p = d.getBitWidth() - 1; // initialize p 1201 q1 = signedMin.udiv(nc); // initialize q1 = 2p/nc 1202 r1 = signedMin - q1*nc; // initialize r1 = rem(2p,nc) 1203 q2 = signedMax.udiv(d); // initialize q2 = (2p-1)/d 1204 r2 = signedMax - q2*d; // initialize r2 = rem((2p-1),d) 1205 do { 1206 p = p + 1; 1207 if (r1.uge(nc - r1)) { 1208 q1 = q1 + q1 + 1; // update q1 1209 r1 = r1 + r1 - nc; // update r1 1210 } 1211 else { 1212 q1 = q1+q1; // update q1 1213 r1 = r1+r1; // update r1 1214 } 1215 if ((r2 + 1).uge(d - r2)) { 1216 if (q2.uge(signedMax)) magu.a = 1; 1217 q2 = q2+q2 + 1; // update q2 1218 r2 = r2+r2 + 1 - d; // update r2 1219 } 1220 else { 1221 if (q2.uge(signedMin)) magu.a = 1; 1222 q2 = q2+q2; // update q2 1223 r2 = r2+r2 + 1; // update r2 1224 } 1225 delta = d - 1 - r2; 1226 } while (p < d.getBitWidth()*2 && 1227 (q1.ult(delta) || (q1 == delta && r1 == 0))); 1228 magu.m = q2 + 1; // resulting magic number 1229 magu.s = p - d.getBitWidth(); // resulting shift 1230 return magu; 1231} 1232 1233/// Implementation of Knuth's Algorithm D (Division of nonnegative integers) 1234/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The 1235/// variables here have the same names as in the algorithm. Comments explain 1236/// the algorithm and any deviation from it. 1237static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r, 1238 unsigned m, unsigned n) { 1239 assert(u && "Must provide dividend"); 1240 assert(v && "Must provide divisor"); 1241 assert(q && "Must provide quotient"); 1242 assert(u != v && u != q && v != q && "Must use different memory"); 1243 assert(n>1 && "n must be > 1"); 1244 1245 // b denotes the base of the number system. In our case b is 2^32. 1246 const uint64_t b = uint64_t(1) << 32; 1247 1248// The DEBUG macros here tend to be spam in the debug output if you're not 1249// debugging this code. Disable them unless KNUTH_DEBUG is defined. 1250#ifdef KNUTH_DEBUG 1251#define DEBUG_KNUTH(X) LLVM_DEBUG(X) 1252#else 1253#define DEBUG_KNUTH(X) do {} while(false) 1254#endif 1255 1256 DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n'); 1257 DEBUG_KNUTH(dbgs() << "KnuthDiv: original:"); 1258 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1259 DEBUG_KNUTH(dbgs() << " by"); 1260 DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]); 1261 DEBUG_KNUTH(dbgs() << '\n'); 1262 // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of 1263 // u and v by d. Note that we have taken Knuth's advice here to use a power 1264 // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of 1265 // 2 allows us to shift instead of multiply and it is easy to determine the 1266 // shift amount from the leading zeros. We are basically normalizing the u 1267 // and v so that its high bits are shifted to the top of v's range without 1268 // overflow. Note that this can require an extra word in u so that u must 1269 // be of length m+n+1. 1270 unsigned shift = countLeadingZeros(v[n-1]); 1271 uint32_t v_carry = 0; 1272 uint32_t u_carry = 0; 1273 if (shift) { 1274 for (unsigned i = 0; i < m+n; ++i) { 1275 uint32_t u_tmp = u[i] >> (32 - shift); 1276 u[i] = (u[i] << shift) | u_carry; 1277 u_carry = u_tmp; 1278 } 1279 for (unsigned i = 0; i < n; ++i) { 1280 uint32_t v_tmp = v[i] >> (32 - shift); 1281 v[i] = (v[i] << shift) | v_carry; 1282 v_carry = v_tmp; 1283 } 1284 } 1285 u[m+n] = u_carry; 1286 1287 DEBUG_KNUTH(dbgs() << "KnuthDiv: normal:"); 1288 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1289 DEBUG_KNUTH(dbgs() << " by"); 1290 DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]); 1291 DEBUG_KNUTH(dbgs() << '\n'); 1292 1293 // D2. [Initialize j.] Set j to m. This is the loop counter over the places. 1294 int j = m; 1295 do { 1296 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n'); 1297 // D3. [Calculate q'.]. 1298 // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q') 1299 // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r') 1300 // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease 1301 // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test 1302 // on v[n-2] determines at high speed most of the cases in which the trial 1303 // value qp is one too large, and it eliminates all cases where qp is two 1304 // too large. 1305 uint64_t dividend = Make_64(u[j+n], u[j+n-1]); 1306 DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n'); 1307 uint64_t qp = dividend / v[n-1]; 1308 uint64_t rp = dividend % v[n-1]; 1309 if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) { 1310 qp--; 1311 rp += v[n-1]; 1312 if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2])) 1313 qp--; 1314 } 1315 DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n'); 1316 1317 // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with 1318 // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation 1319 // consists of a simple multiplication by a one-place number, combined with 1320 // a subtraction. 1321 // The digits (u[j+n]...u[j]) should be kept positive; if the result of 1322 // this step is actually negative, (u[j+n]...u[j]) should be left as the 1323 // true value plus b**(n+1), namely as the b's complement of 1324 // the true value, and a "borrow" to the left should be remembered. 1325 int64_t borrow = 0; 1326 for (unsigned i = 0; i < n; ++i) { 1327 uint64_t p = uint64_t(qp) * uint64_t(v[i]); 1328 int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p); 1329 u[j+i] = Lo_32(subres); 1330 borrow = Hi_32(p) - Hi_32(subres); 1331 DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i] 1332 << ", borrow = " << borrow << '\n'); 1333 } 1334 bool isNeg = u[j+n] < borrow; 1335 u[j+n] -= Lo_32(borrow); 1336 1337 DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:"); 1338 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1339 DEBUG_KNUTH(dbgs() << '\n'); 1340 1341 // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was 1342 // negative, go to step D6; otherwise go on to step D7. 1343 q[j] = Lo_32(qp); 1344 if (isNeg) { 1345 // D6. [Add back]. The probability that this step is necessary is very 1346 // small, on the order of only 2/b. Make sure that test data accounts for 1347 // this possibility. Decrease q[j] by 1 1348 q[j]--; 1349 // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]). 1350 // A carry will occur to the left of u[j+n], and it should be ignored 1351 // since it cancels with the borrow that occurred in D4. 1352 bool carry = false; 1353 for (unsigned i = 0; i < n; i++) { 1354 uint32_t limit = std::min(u[j+i],v[i]); 1355 u[j+i] += v[i] + carry; 1356 carry = u[j+i] < limit || (carry && u[j+i] == limit); 1357 } 1358 u[j+n] += carry; 1359 } 1360 DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:"); 1361 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1362 DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n'); 1363 1364 // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3. 1365 } while (--j >= 0); 1366 1367 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:"); 1368 DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]); 1369 DEBUG_KNUTH(dbgs() << '\n'); 1370 1371 // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired 1372 // remainder may be obtained by dividing u[...] by d. If r is non-null we 1373 // compute the remainder (urem uses this). 1374 if (r) { 1375 // The value d is expressed by the "shift" value above since we avoided 1376 // multiplication by d by using a shift left. So, all we have to do is 1377 // shift right here. 1378 if (shift) { 1379 uint32_t carry = 0; 1380 DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:"); 1381 for (int i = n-1; i >= 0; i--) { 1382 r[i] = (u[i] >> shift) | carry; 1383 carry = u[i] << (32 - shift); 1384 DEBUG_KNUTH(dbgs() << " " << r[i]); 1385 } 1386 } else { 1387 for (int i = n-1; i >= 0; i--) { 1388 r[i] = u[i]; 1389 DEBUG_KNUTH(dbgs() << " " << r[i]); 1390 } 1391 } 1392 DEBUG_KNUTH(dbgs() << '\n'); 1393 } 1394 DEBUG_KNUTH(dbgs() << '\n'); 1395} 1396 1397void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS, 1398 unsigned rhsWords, WordType *Quotient, WordType *Remainder) { 1399 assert(lhsWords >= rhsWords && "Fractional result"); 1400 1401 // First, compose the values into an array of 32-bit words instead of 1402 // 64-bit words. This is a necessity of both the "short division" algorithm 1403 // and the Knuth "classical algorithm" which requires there to be native 1404 // operations for +, -, and * on an m bit value with an m*2 bit result. We 1405 // can't use 64-bit operands here because we don't have native results of 1406 // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't 1407 // work on large-endian machines. 1408 unsigned n = rhsWords * 2; 1409 unsigned m = (lhsWords * 2) - n; 1410 1411 // Allocate space for the temporary values we need either on the stack, if 1412 // it will fit, or on the heap if it won't. 1413 uint32_t SPACE[128]; 1414 uint32_t *U = nullptr; 1415 uint32_t *V = nullptr; 1416 uint32_t *Q = nullptr; 1417 uint32_t *R = nullptr; 1418 if ((Remainder?4:3)*n+2*m+1 <= 128) { 1419 U = &SPACE[0]; 1420 V = &SPACE[m+n+1]; 1421 Q = &SPACE[(m+n+1) + n]; 1422 if (Remainder) 1423 R = &SPACE[(m+n+1) + n + (m+n)]; 1424 } else { 1425 U = new uint32_t[m + n + 1]; 1426 V = new uint32_t[n]; 1427 Q = new uint32_t[m+n]; 1428 if (Remainder) 1429 R = new uint32_t[n]; 1430 } 1431 1432 // Initialize the dividend 1433 memset(U, 0, (m+n+1)*sizeof(uint32_t)); 1434 for (unsigned i = 0; i < lhsWords; ++i) { 1435 uint64_t tmp = LHS[i]; 1436 U[i * 2] = Lo_32(tmp); 1437 U[i * 2 + 1] = Hi_32(tmp); 1438 } 1439 U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm. 1440 1441 // Initialize the divisor 1442 memset(V, 0, (n)*sizeof(uint32_t)); 1443 for (unsigned i = 0; i < rhsWords; ++i) { 1444 uint64_t tmp = RHS[i]; 1445 V[i * 2] = Lo_32(tmp); 1446 V[i * 2 + 1] = Hi_32(tmp); 1447 } 1448 1449 // initialize the quotient and remainder 1450 memset(Q, 0, (m+n) * sizeof(uint32_t)); 1451 if (Remainder) 1452 memset(R, 0, n * sizeof(uint32_t)); 1453 1454 // Now, adjust m and n for the Knuth division. n is the number of words in 1455 // the divisor. m is the number of words by which the dividend exceeds the 1456 // divisor (i.e. m+n is the length of the dividend). These sizes must not 1457 // contain any zero words or the Knuth algorithm fails. 1458 for (unsigned i = n; i > 0 && V[i-1] == 0; i--) { 1459 n--; 1460 m++; 1461 } 1462 for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--) 1463 m--; 1464 1465 // If we're left with only a single word for the divisor, Knuth doesn't work 1466 // so we implement the short division algorithm here. This is much simpler 1467 // and faster because we are certain that we can divide a 64-bit quantity 1468 // by a 32-bit quantity at hardware speed and short division is simply a 1469 // series of such operations. This is just like doing short division but we 1470 // are using base 2^32 instead of base 10. 1471 assert(n != 0 && "Divide by zero?"); 1472 if (n == 1) { 1473 uint32_t divisor = V[0]; 1474 uint32_t remainder = 0; 1475 for (int i = m; i >= 0; i--) { 1476 uint64_t partial_dividend = Make_64(remainder, U[i]); 1477 if (partial_dividend == 0) { 1478 Q[i] = 0; 1479 remainder = 0; 1480 } else if (partial_dividend < divisor) { 1481 Q[i] = 0; 1482 remainder = Lo_32(partial_dividend); 1483 } else if (partial_dividend == divisor) { 1484 Q[i] = 1; 1485 remainder = 0; 1486 } else { 1487 Q[i] = Lo_32(partial_dividend / divisor); 1488 remainder = Lo_32(partial_dividend - (Q[i] * divisor)); 1489 } 1490 } 1491 if (R) 1492 R[0] = remainder; 1493 } else { 1494 // Now we're ready to invoke the Knuth classical divide algorithm. In this 1495 // case n > 1. 1496 KnuthDiv(U, V, Q, R, m, n); 1497 } 1498 1499 // If the caller wants the quotient 1500 if (Quotient) { 1501 for (unsigned i = 0; i < lhsWords; ++i) 1502 Quotient[i] = Make_64(Q[i*2+1], Q[i*2]); 1503 } 1504 1505 // If the caller wants the remainder 1506 if (Remainder) { 1507 for (unsigned i = 0; i < rhsWords; ++i) 1508 Remainder[i] = Make_64(R[i*2+1], R[i*2]); 1509 } 1510 1511 // Clean up the memory we allocated. 1512 if (U != &SPACE[0]) { 1513 delete [] U; 1514 delete [] V; 1515 delete [] Q; 1516 delete [] R; 1517 } 1518} 1519 1520APInt APInt::udiv(const APInt &RHS) const { 1521 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 1522 1523 // First, deal with the easy case 1524 if (isSingleWord()) { 1525 assert(RHS.U.VAL != 0 && "Divide by zero?"); 1526 return APInt(BitWidth, U.VAL / RHS.U.VAL); 1527 } 1528 1529 // Get some facts about the LHS and RHS number of bits and words 1530 unsigned lhsWords = getNumWords(getActiveBits()); 1531 unsigned rhsBits = RHS.getActiveBits(); 1532 unsigned rhsWords = getNumWords(rhsBits); 1533 assert(rhsWords && "Divided by zero???"); 1534 1535 // Deal with some degenerate cases 1536 if (!lhsWords) 1537 // 0 / X ===> 0 1538 return APInt(BitWidth, 0); 1539 if (rhsBits == 1) 1540 // X / 1 ===> X 1541 return *this; 1542 if (lhsWords < rhsWords || this->ult(RHS)) 1543 // X / Y ===> 0, iff X < Y 1544 return APInt(BitWidth, 0); 1545 if (*this == RHS) 1546 // X / X ===> 1 1547 return APInt(BitWidth, 1); 1548 if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1. 1549 // All high words are zero, just use native divide 1550 return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]); 1551 1552 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1553 APInt Quotient(BitWidth, 0); // to hold result. 1554 divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr); 1555 return Quotient; 1556} 1557 1558APInt APInt::udiv(uint64_t RHS) const { 1559 assert(RHS != 0 && "Divide by zero?"); 1560 1561 // First, deal with the easy case 1562 if (isSingleWord()) 1563 return APInt(BitWidth, U.VAL / RHS); 1564 1565 // Get some facts about the LHS words. 1566 unsigned lhsWords = getNumWords(getActiveBits()); 1567 1568 // Deal with some degenerate cases 1569 if (!lhsWords) 1570 // 0 / X ===> 0 1571 return APInt(BitWidth, 0); 1572 if (RHS == 1) 1573 // X / 1 ===> X 1574 return *this; 1575 if (this->ult(RHS)) 1576 // X / Y ===> 0, iff X < Y 1577 return APInt(BitWidth, 0); 1578 if (*this == RHS) 1579 // X / X ===> 1 1580 return APInt(BitWidth, 1); 1581 if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1. 1582 // All high words are zero, just use native divide 1583 return APInt(BitWidth, this->U.pVal[0] / RHS); 1584 1585 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1586 APInt Quotient(BitWidth, 0); // to hold result. 1587 divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr); 1588 return Quotient; 1589} 1590 1591APInt APInt::sdiv(const APInt &RHS) const { 1592 if (isNegative()) { 1593 if (RHS.isNegative()) 1594 return (-(*this)).udiv(-RHS); 1595 return -((-(*this)).udiv(RHS)); 1596 } 1597 if (RHS.isNegative()) 1598 return -(this->udiv(-RHS)); 1599 return this->udiv(RHS); 1600} 1601 1602APInt APInt::sdiv(int64_t RHS) const { 1603 if (isNegative()) { 1604 if (RHS < 0) 1605 return (-(*this)).udiv(-RHS); 1606 return -((-(*this)).udiv(RHS)); 1607 } 1608 if (RHS < 0) 1609 return -(this->udiv(-RHS)); 1610 return this->udiv(RHS); 1611} 1612 1613APInt APInt::urem(const APInt &RHS) const { 1614 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 1615 if (isSingleWord()) { 1616 assert(RHS.U.VAL != 0 && "Remainder by zero?"); 1617 return APInt(BitWidth, U.VAL % RHS.U.VAL); 1618 } 1619 1620 // Get some facts about the LHS 1621 unsigned lhsWords = getNumWords(getActiveBits()); 1622 1623 // Get some facts about the RHS 1624 unsigned rhsBits = RHS.getActiveBits(); 1625 unsigned rhsWords = getNumWords(rhsBits); 1626 assert(rhsWords && "Performing remainder operation by zero ???"); 1627 1628 // Check the degenerate cases 1629 if (lhsWords == 0) 1630 // 0 % Y ===> 0 1631 return APInt(BitWidth, 0); 1632 if (rhsBits == 1) 1633 // X % 1 ===> 0 1634 return APInt(BitWidth, 0); 1635 if (lhsWords < rhsWords || this->ult(RHS)) 1636 // X % Y ===> X, iff X < Y 1637 return *this; 1638 if (*this == RHS) 1639 // X % X == 0; 1640 return APInt(BitWidth, 0); 1641 if (lhsWords == 1) 1642 // All high words are zero, just use native remainder 1643 return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]); 1644 1645 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1646 APInt Remainder(BitWidth, 0); 1647 divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal); 1648 return Remainder; 1649} 1650 1651uint64_t APInt::urem(uint64_t RHS) const { 1652 assert(RHS != 0 && "Remainder by zero?"); 1653 1654 if (isSingleWord()) 1655 return U.VAL % RHS; 1656 1657 // Get some facts about the LHS 1658 unsigned lhsWords = getNumWords(getActiveBits()); 1659 1660 // Check the degenerate cases 1661 if (lhsWords == 0) 1662 // 0 % Y ===> 0 1663 return 0; 1664 if (RHS == 1) 1665 // X % 1 ===> 0 1666 return 0; 1667 if (this->ult(RHS)) 1668 // X % Y ===> X, iff X < Y 1669 return getZExtValue(); 1670 if (*this == RHS) 1671 // X % X == 0; 1672 return 0; 1673 if (lhsWords == 1) 1674 // All high words are zero, just use native remainder 1675 return U.pVal[0] % RHS; 1676 1677 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1678 uint64_t Remainder; 1679 divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder); 1680 return Remainder; 1681} 1682 1683APInt APInt::srem(const APInt &RHS) const { 1684 if (isNegative()) { 1685 if (RHS.isNegative()) 1686 return -((-(*this)).urem(-RHS)); 1687 return -((-(*this)).urem(RHS)); 1688 } 1689 if (RHS.isNegative()) 1690 return this->urem(-RHS); 1691 return this->urem(RHS); 1692} 1693 1694int64_t APInt::srem(int64_t RHS) const { 1695 if (isNegative()) { 1696 if (RHS < 0) 1697 return -((-(*this)).urem(-RHS)); 1698 return -((-(*this)).urem(RHS)); 1699 } 1700 if (RHS < 0) 1701 return this->urem(-RHS); 1702 return this->urem(RHS); 1703} 1704 1705void APInt::udivrem(const APInt &LHS, const APInt &RHS, 1706 APInt &Quotient, APInt &Remainder) { 1707 assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same"); 1708 unsigned BitWidth = LHS.BitWidth; 1709 1710 // First, deal with the easy case 1711 if (LHS.isSingleWord()) { 1712 assert(RHS.U.VAL != 0 && "Divide by zero?"); 1713 uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL; 1714 uint64_t RemVal = LHS.U.VAL % RHS.U.VAL; 1715 Quotient = APInt(BitWidth, QuotVal); 1716 Remainder = APInt(BitWidth, RemVal); 1717 return; 1718 } 1719 1720 // Get some size facts about the dividend and divisor 1721 unsigned lhsWords = getNumWords(LHS.getActiveBits()); 1722 unsigned rhsBits = RHS.getActiveBits(); 1723 unsigned rhsWords = getNumWords(rhsBits); 1724 assert(rhsWords && "Performing divrem operation by zero ???"); 1725 1726 // Check the degenerate cases 1727 if (lhsWords == 0) { 1728 Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0 1729 Remainder = APInt(BitWidth, 0); // 0 % Y ===> 0 1730 return; 1731 } 1732 1733 if (rhsBits == 1) { 1734 Quotient = LHS; // X / 1 ===> X 1735 Remainder = APInt(BitWidth, 0); // X % 1 ===> 0 1736 } 1737 1738 if (lhsWords < rhsWords || LHS.ult(RHS)) { 1739 Remainder = LHS; // X % Y ===> X, iff X < Y 1740 Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y 1741 return; 1742 } 1743 1744 if (LHS == RHS) { 1745 Quotient = APInt(BitWidth, 1); // X / X ===> 1 1746 Remainder = APInt(BitWidth, 0); // X % X ===> 0; 1747 return; 1748 } 1749 1750 // Make sure there is enough space to hold the results. 1751 // NOTE: This assumes that reallocate won't affect any bits if it doesn't 1752 // change the size. This is necessary if Quotient or Remainder is aliased 1753 // with LHS or RHS. 1754 Quotient.reallocate(BitWidth); 1755 Remainder.reallocate(BitWidth); 1756 1757 if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1. 1758 // There is only one word to consider so use the native versions. 1759 uint64_t lhsValue = LHS.U.pVal[0]; 1760 uint64_t rhsValue = RHS.U.pVal[0]; 1761 Quotient = lhsValue / rhsValue; 1762 Remainder = lhsValue % rhsValue; 1763 return; 1764 } 1765 1766 // Okay, lets do it the long way 1767 divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, 1768 Remainder.U.pVal); 1769 // Clear the rest of the Quotient and Remainder. 1770 std::memset(Quotient.U.pVal + lhsWords, 0, 1771 (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE); 1772 std::memset(Remainder.U.pVal + rhsWords, 0, 1773 (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE); 1774} 1775 1776void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient, 1777 uint64_t &Remainder) { 1778 assert(RHS != 0 && "Divide by zero?"); 1779 unsigned BitWidth = LHS.BitWidth; 1780 1781 // First, deal with the easy case 1782 if (LHS.isSingleWord()) { 1783 uint64_t QuotVal = LHS.U.VAL / RHS; 1784 Remainder = LHS.U.VAL % RHS; 1785 Quotient = APInt(BitWidth, QuotVal); 1786 return; 1787 } 1788 1789 // Get some size facts about the dividend and divisor 1790 unsigned lhsWords = getNumWords(LHS.getActiveBits()); 1791 1792 // Check the degenerate cases 1793 if (lhsWords == 0) { 1794 Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0 1795 Remainder = 0; // 0 % Y ===> 0 1796 return; 1797 } 1798 1799 if (RHS == 1) { 1800 Quotient = LHS; // X / 1 ===> X 1801 Remainder = 0; // X % 1 ===> 0 1802 return; 1803 } 1804 1805 if (LHS.ult(RHS)) { 1806 Remainder = LHS.getZExtValue(); // X % Y ===> X, iff X < Y 1807 Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y 1808 return; 1809 } 1810 1811 if (LHS == RHS) { 1812 Quotient = APInt(BitWidth, 1); // X / X ===> 1 1813 Remainder = 0; // X % X ===> 0; 1814 return; 1815 } 1816 1817 // Make sure there is enough space to hold the results. 1818 // NOTE: This assumes that reallocate won't affect any bits if it doesn't 1819 // change the size. This is necessary if Quotient is aliased with LHS. 1820 Quotient.reallocate(BitWidth); 1821 1822 if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1. 1823 // There is only one word to consider so use the native versions. 1824 uint64_t lhsValue = LHS.U.pVal[0]; 1825 Quotient = lhsValue / RHS; 1826 Remainder = lhsValue % RHS; 1827 return; 1828 } 1829 1830 // Okay, lets do it the long way 1831 divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder); 1832 // Clear the rest of the Quotient. 1833 std::memset(Quotient.U.pVal + lhsWords, 0, 1834 (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE); 1835} 1836 1837void APInt::sdivrem(const APInt &LHS, const APInt &RHS, 1838 APInt &Quotient, APInt &Remainder) { 1839 if (LHS.isNegative()) { 1840 if (RHS.isNegative()) 1841 APInt::udivrem(-LHS, -RHS, Quotient, Remainder); 1842 else { 1843 APInt::udivrem(-LHS, RHS, Quotient, Remainder); 1844 Quotient.negate(); 1845 } 1846 Remainder.negate(); 1847 } else if (RHS.isNegative()) { 1848 APInt::udivrem(LHS, -RHS, Quotient, Remainder); 1849 Quotient.negate(); 1850 } else { 1851 APInt::udivrem(LHS, RHS, Quotient, Remainder); 1852 } 1853} 1854 1855void APInt::sdivrem(const APInt &LHS, int64_t RHS, 1856 APInt &Quotient, int64_t &Remainder) { 1857 uint64_t R = Remainder; 1858 if (LHS.isNegative()) { 1859 if (RHS < 0) 1860 APInt::udivrem(-LHS, -RHS, Quotient, R); 1861 else { 1862 APInt::udivrem(-LHS, RHS, Quotient, R); 1863 Quotient.negate(); 1864 } 1865 R = -R; 1866 } else if (RHS < 0) { 1867 APInt::udivrem(LHS, -RHS, Quotient, R); 1868 Quotient.negate(); 1869 } else { 1870 APInt::udivrem(LHS, RHS, Quotient, R); 1871 } 1872 Remainder = R; 1873} 1874 1875APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const { 1876 APInt Res = *this+RHS; 1877 Overflow = isNonNegative() == RHS.isNonNegative() && 1878 Res.isNonNegative() != isNonNegative(); 1879 return Res; 1880} 1881 1882APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const { 1883 APInt Res = *this+RHS; 1884 Overflow = Res.ult(RHS); 1885 return Res; 1886} 1887 1888APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const { 1889 APInt Res = *this - RHS; 1890 Overflow = isNonNegative() != RHS.isNonNegative() && 1891 Res.isNonNegative() != isNonNegative(); 1892 return Res; 1893} 1894 1895APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const { 1896 APInt Res = *this-RHS; 1897 Overflow = Res.ugt(*this); 1898 return Res; 1899} 1900 1901APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const { 1902 // MININT/-1 --> overflow. 1903 Overflow = isMinSignedValue() && RHS.isAllOnesValue(); 1904 return sdiv(RHS); 1905} 1906 1907APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const { 1908 APInt Res = *this * RHS; 1909 1910 if (*this != 0 && RHS != 0) 1911 Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS; 1912 else 1913 Overflow = false; 1914 return Res; 1915} 1916 1917APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const { 1918 APInt Res = *this * RHS; 1919 1920 if (*this != 0 && RHS != 0) 1921 Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS; 1922 else 1923 Overflow = false; 1924 return Res; 1925} 1926 1927APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const { 1928 Overflow = ShAmt.uge(getBitWidth()); 1929 if (Overflow) 1930 return APInt(BitWidth, 0); 1931 1932 if (isNonNegative()) // Don't allow sign change. 1933 Overflow = ShAmt.uge(countLeadingZeros()); 1934 else 1935 Overflow = ShAmt.uge(countLeadingOnes()); 1936 1937 return *this << ShAmt; 1938} 1939 1940APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const { 1941 Overflow = ShAmt.uge(getBitWidth()); 1942 if (Overflow) 1943 return APInt(BitWidth, 0); 1944 1945 Overflow = ShAmt.ugt(countLeadingZeros()); 1946 1947 return *this << ShAmt; 1948} 1949 1950APInt APInt::sadd_sat(const APInt &RHS) const { 1951 bool Overflow; 1952 APInt Res = sadd_ov(RHS, Overflow); 1953 if (!Overflow) 1954 return Res; 1955 1956 return isNegative() ? APInt::getSignedMinValue(BitWidth) 1957 : APInt::getSignedMaxValue(BitWidth); 1958} 1959 1960APInt APInt::uadd_sat(const APInt &RHS) const { 1961 bool Overflow; 1962 APInt Res = uadd_ov(RHS, Overflow); 1963 if (!Overflow) 1964 return Res; 1965 1966 return APInt::getMaxValue(BitWidth); 1967} 1968 1969APInt APInt::ssub_sat(const APInt &RHS) const { 1970 bool Overflow; 1971 APInt Res = ssub_ov(RHS, Overflow); 1972 if (!Overflow) 1973 return Res; 1974 1975 return isNegative() ? APInt::getSignedMinValue(BitWidth) 1976 : APInt::getSignedMaxValue(BitWidth); 1977} 1978 1979APInt APInt::usub_sat(const APInt &RHS) const { 1980 bool Overflow; 1981 APInt Res = usub_ov(RHS, Overflow); 1982 if (!Overflow) 1983 return Res; 1984 1985 return APInt(BitWidth, 0); 1986} 1987 1988 1989void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) { 1990 // Check our assumptions here 1991 assert(!str.empty() && "Invalid string length"); 1992 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || 1993 radix == 36) && 1994 "Radix should be 2, 8, 10, 16, or 36!"); 1995 1996 StringRef::iterator p = str.begin(); 1997 size_t slen = str.size(); 1998 bool isNeg = *p == '-'; 1999 if (*p == '-' || *p == '+') { 2000 p++; 2001 slen--; 2002 assert(slen && "String is only a sign, needs a value."); 2003 } 2004 assert((slen <= numbits || radix != 2) && "Insufficient bit width"); 2005 assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width"); 2006 assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width"); 2007 assert((((slen-1)*64)/22 <= numbits || radix != 10) && 2008 "Insufficient bit width"); 2009 2010 // Allocate memory if needed 2011 if (isSingleWord()) 2012 U.VAL = 0; 2013 else 2014 U.pVal = getClearedMemory(getNumWords()); 2015 2016 // Figure out if we can shift instead of multiply 2017 unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0); 2018 2019 // Enter digit traversal loop 2020 for (StringRef::iterator e = str.end(); p != e; ++p) { 2021 unsigned digit = getDigit(*p, radix); 2022 assert(digit < radix && "Invalid character in digit string"); 2023 2024 // Shift or multiply the value by the radix 2025 if (slen > 1) { 2026 if (shift) 2027 *this <<= shift; 2028 else 2029 *this *= radix; 2030 } 2031 2032 // Add in the digit we just interpreted 2033 *this += digit; 2034 } 2035 // If its negative, put it in two's complement form 2036 if (isNeg) 2037 this->negate(); 2038} 2039 2040void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, 2041 bool Signed, bool formatAsCLiteral) const { 2042 assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 || 2043 Radix == 36) && 2044 "Radix should be 2, 8, 10, 16, or 36!"); 2045 2046 const char *Prefix = ""; 2047 if (formatAsCLiteral) { 2048 switch (Radix) { 2049 case 2: 2050 // Binary literals are a non-standard extension added in gcc 4.3: 2051 // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html 2052 Prefix = "0b"; 2053 break; 2054 case 8: 2055 Prefix = "0"; 2056 break; 2057 case 10: 2058 break; // No prefix 2059 case 16: 2060 Prefix = "0x"; 2061 break; 2062 default: 2063 llvm_unreachable("Invalid radix!"); 2064 } 2065 } 2066 2067 // First, check for a zero value and just short circuit the logic below. 2068 if (*this == 0) { 2069 while (*Prefix) { 2070 Str.push_back(*Prefix); 2071 ++Prefix; 2072 }; 2073 Str.push_back('0'); 2074 return; 2075 } 2076 2077 static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 2078 2079 if (isSingleWord()) { 2080 char Buffer[65]; 2081 char *BufPtr = std::end(Buffer); 2082 2083 uint64_t N; 2084 if (!Signed) { 2085 N = getZExtValue(); 2086 } else { 2087 int64_t I = getSExtValue(); 2088 if (I >= 0) { 2089 N = I; 2090 } else { 2091 Str.push_back('-'); 2092 N = -(uint64_t)I; 2093 } 2094 } 2095 2096 while (*Prefix) { 2097 Str.push_back(*Prefix); 2098 ++Prefix; 2099 }; 2100 2101 while (N) { 2102 *--BufPtr = Digits[N % Radix]; 2103 N /= Radix; 2104 } 2105 Str.append(BufPtr, std::end(Buffer)); 2106 return; 2107 } 2108 2109 APInt Tmp(*this); 2110 2111 if (Signed && isNegative()) { 2112 // They want to print the signed version and it is a negative value 2113 // Flip the bits and add one to turn it into the equivalent positive 2114 // value and put a '-' in the result. 2115 Tmp.negate(); 2116 Str.push_back('-'); 2117 } 2118 2119 while (*Prefix) { 2120 Str.push_back(*Prefix); 2121 ++Prefix; 2122 }; 2123 2124 // We insert the digits backward, then reverse them to get the right order. 2125 unsigned StartDig = Str.size(); 2126 2127 // For the 2, 8 and 16 bit cases, we can just shift instead of divide 2128 // because the number of bits per digit (1, 3 and 4 respectively) divides 2129 // equally. We just shift until the value is zero. 2130 if (Radix == 2 || Radix == 8 || Radix == 16) { 2131 // Just shift tmp right for each digit width until it becomes zero 2132 unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1)); 2133 unsigned MaskAmt = Radix - 1; 2134 2135 while (Tmp.getBoolValue()) { 2136 unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt; 2137 Str.push_back(Digits[Digit]); 2138 Tmp.lshrInPlace(ShiftAmt); 2139 } 2140 } else { 2141 while (Tmp.getBoolValue()) { 2142 uint64_t Digit; 2143 udivrem(Tmp, Radix, Tmp, Digit); 2144 assert(Digit < Radix && "divide failed"); 2145 Str.push_back(Digits[Digit]); 2146 } 2147 } 2148 2149 // Reverse the digits before returning. 2150 std::reverse(Str.begin()+StartDig, Str.end()); 2151} 2152 2153/// Returns the APInt as a std::string. Note that this is an inefficient method. 2154/// It is better to pass in a SmallVector/SmallString to the methods above. 2155std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const { 2156 SmallString<40> S; 2157 toString(S, Radix, Signed, /* formatAsCLiteral = */false); 2158 return S.str(); 2159} 2160 2161#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2162LLVM_DUMP_METHOD void APInt::dump() const { 2163 SmallString<40> S, U; 2164 this->toStringUnsigned(U); 2165 this->toStringSigned(S); 2166 dbgs() << "APInt(" << BitWidth << "b, " 2167 << U << "u " << S << "s)\n"; 2168} 2169#endif 2170 2171void APInt::print(raw_ostream &OS, bool isSigned) const { 2172 SmallString<40> S; 2173 this->toString(S, 10, isSigned, /* formatAsCLiteral = */false); 2174 OS << S; 2175} 2176 2177// This implements a variety of operations on a representation of 2178// arbitrary precision, two's-complement, bignum integer values. 2179 2180// Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe 2181// and unrestricting assumption. 2182static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0, 2183 "Part width must be divisible by 2!"); 2184 2185/* Some handy functions local to this file. */ 2186 2187/* Returns the integer part with the least significant BITS set. 2188 BITS cannot be zero. */ 2189static inline APInt::WordType lowBitMask(unsigned bits) { 2190 assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD); 2191 2192 return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits); 2193} 2194 2195/* Returns the value of the lower half of PART. */ 2196static inline APInt::WordType lowHalf(APInt::WordType part) { 2197 return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2); 2198} 2199 2200/* Returns the value of the upper half of PART. */ 2201static inline APInt::WordType highHalf(APInt::WordType part) { 2202 return part >> (APInt::APINT_BITS_PER_WORD / 2); 2203} 2204 2205/* Returns the bit number of the most significant set bit of a part. 2206 If the input number has no bits set -1U is returned. */ 2207static unsigned partMSB(APInt::WordType value) { 2208 return findLastSet(value, ZB_Max); 2209} 2210 2211/* Returns the bit number of the least significant set bit of a 2212 part. If the input number has no bits set -1U is returned. */ 2213static unsigned partLSB(APInt::WordType value) { 2214 return findFirstSet(value, ZB_Max); 2215} 2216 2217/* Sets the least significant part of a bignum to the input value, and 2218 zeroes out higher parts. */ 2219void APInt::tcSet(WordType *dst, WordType part, unsigned parts) { 2220 assert(parts > 0); 2221 2222 dst[0] = part; 2223 for (unsigned i = 1; i < parts; i++) 2224 dst[i] = 0; 2225} 2226 2227/* Assign one bignum to another. */ 2228void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) { 2229 for (unsigned i = 0; i < parts; i++) 2230 dst[i] = src[i]; 2231} 2232 2233/* Returns true if a bignum is zero, false otherwise. */ 2234bool APInt::tcIsZero(const WordType *src, unsigned parts) { 2235 for (unsigned i = 0; i < parts; i++) 2236 if (src[i]) 2237 return false; 2238 2239 return true; 2240} 2241 2242/* Extract the given bit of a bignum; returns 0 or 1. */ 2243int APInt::tcExtractBit(const WordType *parts, unsigned bit) { 2244 return (parts[whichWord(bit)] & maskBit(bit)) != 0; 2245} 2246 2247/* Set the given bit of a bignum. */ 2248void APInt::tcSetBit(WordType *parts, unsigned bit) { 2249 parts[whichWord(bit)] |= maskBit(bit); 2250} 2251 2252/* Clears the given bit of a bignum. */ 2253void APInt::tcClearBit(WordType *parts, unsigned bit) { 2254 parts[whichWord(bit)] &= ~maskBit(bit); 2255} 2256 2257/* Returns the bit number of the least significant set bit of a 2258 number. If the input number has no bits set -1U is returned. */ 2259unsigned APInt::tcLSB(const WordType *parts, unsigned n) { 2260 for (unsigned i = 0; i < n; i++) { 2261 if (parts[i] != 0) { 2262 unsigned lsb = partLSB(parts[i]); 2263 2264 return lsb + i * APINT_BITS_PER_WORD; 2265 } 2266 } 2267 2268 return -1U; 2269} 2270 2271/* Returns the bit number of the most significant set bit of a number. 2272 If the input number has no bits set -1U is returned. */ 2273unsigned APInt::tcMSB(const WordType *parts, unsigned n) { 2274 do { 2275 --n; 2276 2277 if (parts[n] != 0) { 2278 unsigned msb = partMSB(parts[n]); 2279 2280 return msb + n * APINT_BITS_PER_WORD; 2281 } 2282 } while (n); 2283 2284 return -1U; 2285} 2286 2287/* Copy the bit vector of width srcBITS from SRC, starting at bit 2288 srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes 2289 the least significant bit of DST. All high bits above srcBITS in 2290 DST are zero-filled. */ 2291void 2292APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src, 2293 unsigned srcBits, unsigned srcLSB) { 2294 unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD; 2295 assert(dstParts <= dstCount); 2296 2297 unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD; 2298 tcAssign (dst, src + firstSrcPart, dstParts); 2299 2300 unsigned shift = srcLSB % APINT_BITS_PER_WORD; 2301 tcShiftRight (dst, dstParts, shift); 2302 2303 /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC 2304 in DST. If this is less that srcBits, append the rest, else 2305 clear the high bits. */ 2306 unsigned n = dstParts * APINT_BITS_PER_WORD - shift; 2307 if (n < srcBits) { 2308 WordType mask = lowBitMask (srcBits - n); 2309 dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask) 2310 << n % APINT_BITS_PER_WORD); 2311 } else if (n > srcBits) { 2312 if (srcBits % APINT_BITS_PER_WORD) 2313 dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD); 2314 } 2315 2316 /* Clear high parts. */ 2317 while (dstParts < dstCount) 2318 dst[dstParts++] = 0; 2319} 2320 2321/* DST += RHS + C where C is zero or one. Returns the carry flag. */ 2322APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs, 2323 WordType c, unsigned parts) { 2324 assert(c <= 1); 2325 2326 for (unsigned i = 0; i < parts; i++) { 2327 WordType l = dst[i]; 2328 if (c) { 2329 dst[i] += rhs[i] + 1; 2330 c = (dst[i] <= l); 2331 } else { 2332 dst[i] += rhs[i]; 2333 c = (dst[i] < l); 2334 } 2335 } 2336 2337 return c; 2338} 2339 2340/// This function adds a single "word" integer, src, to the multiple 2341/// "word" integer array, dst[]. dst[] is modified to reflect the addition and 2342/// 1 is returned if there is a carry out, otherwise 0 is returned. 2343/// @returns the carry of the addition. 2344APInt::WordType APInt::tcAddPart(WordType *dst, WordType src, 2345 unsigned parts) { 2346 for (unsigned i = 0; i < parts; ++i) { 2347 dst[i] += src; 2348 if (dst[i] >= src) 2349 return 0; // No need to carry so exit early. 2350 src = 1; // Carry one to next digit. 2351 } 2352 2353 return 1; 2354} 2355 2356/* DST -= RHS + C where C is zero or one. Returns the carry flag. */ 2357APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs, 2358 WordType c, unsigned parts) { 2359 assert(c <= 1); 2360 2361 for (unsigned i = 0; i < parts; i++) { 2362 WordType l = dst[i]; 2363 if (c) { 2364 dst[i] -= rhs[i] + 1; 2365 c = (dst[i] >= l); 2366 } else { 2367 dst[i] -= rhs[i]; 2368 c = (dst[i] > l); 2369 } 2370 } 2371 2372 return c; 2373} 2374 2375/// This function subtracts a single "word" (64-bit word), src, from 2376/// the multi-word integer array, dst[], propagating the borrowed 1 value until 2377/// no further borrowing is needed or it runs out of "words" in dst. The result 2378/// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not 2379/// exhausted. In other words, if src > dst then this function returns 1, 2380/// otherwise 0. 2381/// @returns the borrow out of the subtraction 2382APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src, 2383 unsigned parts) { 2384 for (unsigned i = 0; i < parts; ++i) { 2385 WordType Dst = dst[i]; 2386 dst[i] -= src; 2387 if (src <= Dst) 2388 return 0; // No need to borrow so exit early. 2389 src = 1; // We have to "borrow 1" from next "word" 2390 } 2391 2392 return 1; 2393} 2394 2395/* Negate a bignum in-place. */ 2396void APInt::tcNegate(WordType *dst, unsigned parts) { 2397 tcComplement(dst, parts); 2398 tcIncrement(dst, parts); 2399} 2400 2401/* DST += SRC * MULTIPLIER + CARRY if add is true 2402 DST = SRC * MULTIPLIER + CARRY if add is false 2403 2404 Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC 2405 they must start at the same point, i.e. DST == SRC. 2406 2407 If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is 2408 returned. Otherwise DST is filled with the least significant 2409 DSTPARTS parts of the result, and if all of the omitted higher 2410 parts were zero return zero, otherwise overflow occurred and 2411 return one. */ 2412int APInt::tcMultiplyPart(WordType *dst, const WordType *src, 2413 WordType multiplier, WordType carry, 2414 unsigned srcParts, unsigned dstParts, 2415 bool add) { 2416 /* Otherwise our writes of DST kill our later reads of SRC. */ 2417 assert(dst <= src || dst >= src + srcParts); 2418 assert(dstParts <= srcParts + 1); 2419 2420 /* N loops; minimum of dstParts and srcParts. */ 2421 unsigned n = std::min(dstParts, srcParts); 2422 2423 for (unsigned i = 0; i < n; i++) { 2424 WordType low, mid, high, srcPart; 2425 2426 /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY. 2427 2428 This cannot overflow, because 2429 2430 (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1) 2431 2432 which is less than n^2. */ 2433 2434 srcPart = src[i]; 2435 2436 if (multiplier == 0 || srcPart == 0) { 2437 low = carry; 2438 high = 0; 2439 } else { 2440 low = lowHalf(srcPart) * lowHalf(multiplier); 2441 high = highHalf(srcPart) * highHalf(multiplier); 2442 2443 mid = lowHalf(srcPart) * highHalf(multiplier); 2444 high += highHalf(mid); 2445 mid <<= APINT_BITS_PER_WORD / 2; 2446 if (low + mid < low) 2447 high++; 2448 low += mid; 2449 2450 mid = highHalf(srcPart) * lowHalf(multiplier); 2451 high += highHalf(mid); 2452 mid <<= APINT_BITS_PER_WORD / 2; 2453 if (low + mid < low) 2454 high++; 2455 low += mid; 2456 2457 /* Now add carry. */ 2458 if (low + carry < low) 2459 high++; 2460 low += carry; 2461 } 2462 2463 if (add) { 2464 /* And now DST[i], and store the new low part there. */ 2465 if (low + dst[i] < low) 2466 high++; 2467 dst[i] += low; 2468 } else 2469 dst[i] = low; 2470 2471 carry = high; 2472 } 2473 2474 if (srcParts < dstParts) { 2475 /* Full multiplication, there is no overflow. */ 2476 assert(srcParts + 1 == dstParts); 2477 dst[srcParts] = carry; 2478 return 0; 2479 } 2480 2481 /* We overflowed if there is carry. */ 2482 if (carry) 2483 return 1; 2484 2485 /* We would overflow if any significant unwritten parts would be 2486 non-zero. This is true if any remaining src parts are non-zero 2487 and the multiplier is non-zero. */ 2488 if (multiplier) 2489 for (unsigned i = dstParts; i < srcParts; i++) 2490 if (src[i]) 2491 return 1; 2492 2493 /* We fitted in the narrow destination. */ 2494 return 0; 2495} 2496 2497/* DST = LHS * RHS, where DST has the same width as the operands and 2498 is filled with the least significant parts of the result. Returns 2499 one if overflow occurred, otherwise zero. DST must be disjoint 2500 from both operands. */ 2501int APInt::tcMultiply(WordType *dst, const WordType *lhs, 2502 const WordType *rhs, unsigned parts) { 2503 assert(dst != lhs && dst != rhs); 2504 2505 int overflow = 0; 2506 tcSet(dst, 0, parts); 2507 2508 for (unsigned i = 0; i < parts; i++) 2509 overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts, 2510 parts - i, true); 2511 2512 return overflow; 2513} 2514 2515/// DST = LHS * RHS, where DST has width the sum of the widths of the 2516/// operands. No overflow occurs. DST must be disjoint from both operands. 2517void APInt::tcFullMultiply(WordType *dst, const WordType *lhs, 2518 const WordType *rhs, unsigned lhsParts, 2519 unsigned rhsParts) { 2520 /* Put the narrower number on the LHS for less loops below. */ 2521 if (lhsParts > rhsParts) 2522 return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts); 2523 2524 assert(dst != lhs && dst != rhs); 2525 2526 tcSet(dst, 0, rhsParts); 2527 2528 for (unsigned i = 0; i < lhsParts; i++) 2529 tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true); 2530} 2531 2532/* If RHS is zero LHS and REMAINDER are left unchanged, return one. 2533 Otherwise set LHS to LHS / RHS with the fractional part discarded, 2534 set REMAINDER to the remainder, return zero. i.e. 2535 2536 OLD_LHS = RHS * LHS + REMAINDER 2537 2538 SCRATCH is a bignum of the same size as the operands and result for 2539 use by the routine; its contents need not be initialized and are 2540 destroyed. LHS, REMAINDER and SCRATCH must be distinct. 2541*/ 2542int APInt::tcDivide(WordType *lhs, const WordType *rhs, 2543 WordType *remainder, WordType *srhs, 2544 unsigned parts) { 2545 assert(lhs != remainder && lhs != srhs && remainder != srhs); 2546 2547 unsigned shiftCount = tcMSB(rhs, parts) + 1; 2548 if (shiftCount == 0) 2549 return true; 2550 2551 shiftCount = parts * APINT_BITS_PER_WORD - shiftCount; 2552 unsigned n = shiftCount / APINT_BITS_PER_WORD; 2553 WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD); 2554 2555 tcAssign(srhs, rhs, parts); 2556 tcShiftLeft(srhs, parts, shiftCount); 2557 tcAssign(remainder, lhs, parts); 2558 tcSet(lhs, 0, parts); 2559 2560 /* Loop, subtracting SRHS if REMAINDER is greater and adding that to 2561 the total. */ 2562 for (;;) { 2563 int compare = tcCompare(remainder, srhs, parts); 2564 if (compare >= 0) { 2565 tcSubtract(remainder, srhs, 0, parts); 2566 lhs[n] |= mask; 2567 } 2568 2569 if (shiftCount == 0) 2570 break; 2571 shiftCount--; 2572 tcShiftRight(srhs, parts, 1); 2573 if ((mask >>= 1) == 0) { 2574 mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1); 2575 n--; 2576 } 2577 } 2578 2579 return false; 2580} 2581 2582/// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are 2583/// no restrictions on Count. 2584void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) { 2585 // Don't bother performing a no-op shift. 2586 if (!Count) 2587 return; 2588 2589 // WordShift is the inter-part shift; BitShift is the intra-part shift. 2590 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); 2591 unsigned BitShift = Count % APINT_BITS_PER_WORD; 2592 2593 // Fastpath for moving by whole words. 2594 if (BitShift == 0) { 2595 std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE); 2596 } else { 2597 while (Words-- > WordShift) { 2598 Dst[Words] = Dst[Words - WordShift] << BitShift; 2599 if (Words > WordShift) 2600 Dst[Words] |= 2601 Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift); 2602 } 2603 } 2604 2605 // Fill in the remainder with 0s. 2606 std::memset(Dst, 0, WordShift * APINT_WORD_SIZE); 2607} 2608 2609/// Shift a bignum right Count bits in-place. Shifted in bits are zero. There 2610/// are no restrictions on Count. 2611void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) { 2612 // Don't bother performing a no-op shift. 2613 if (!Count) 2614 return; 2615 2616 // WordShift is the inter-part shift; BitShift is the intra-part shift. 2617 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); 2618 unsigned BitShift = Count % APINT_BITS_PER_WORD; 2619 2620 unsigned WordsToMove = Words - WordShift; 2621 // Fastpath for moving by whole words. 2622 if (BitShift == 0) { 2623 std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE); 2624 } else { 2625 for (unsigned i = 0; i != WordsToMove; ++i) { 2626 Dst[i] = Dst[i + WordShift] >> BitShift; 2627 if (i + 1 != WordsToMove) 2628 Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift); 2629 } 2630 } 2631 2632 // Fill in the remainder with 0s. 2633 std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE); 2634} 2635 2636/* Bitwise and of two bignums. */ 2637void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) { 2638 for (unsigned i = 0; i < parts; i++) 2639 dst[i] &= rhs[i]; 2640} 2641 2642/* Bitwise inclusive or of two bignums. */ 2643void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) { 2644 for (unsigned i = 0; i < parts; i++) 2645 dst[i] |= rhs[i]; 2646} 2647 2648/* Bitwise exclusive or of two bignums. */ 2649void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) { 2650 for (unsigned i = 0; i < parts; i++) 2651 dst[i] ^= rhs[i]; 2652} 2653 2654/* Complement a bignum in-place. */ 2655void APInt::tcComplement(WordType *dst, unsigned parts) { 2656 for (unsigned i = 0; i < parts; i++) 2657 dst[i] = ~dst[i]; 2658} 2659 2660/* Comparison (unsigned) of two bignums. */ 2661int APInt::tcCompare(const WordType *lhs, const WordType *rhs, 2662 unsigned parts) { 2663 while (parts) { 2664 parts--; 2665 if (lhs[parts] != rhs[parts]) 2666 return (lhs[parts] > rhs[parts]) ? 1 : -1; 2667 } 2668 2669 return 0; 2670} 2671 2672/* Set the least significant BITS bits of a bignum, clear the 2673 rest. */ 2674void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts, 2675 unsigned bits) { 2676 unsigned i = 0; 2677 while (bits > APINT_BITS_PER_WORD) { 2678 dst[i++] = ~(WordType) 0; 2679 bits -= APINT_BITS_PER_WORD; 2680 } 2681 2682 if (bits) 2683 dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits); 2684 2685 while (i < parts) 2686 dst[i++] = 0; 2687} 2688 2689APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B, 2690 APInt::Rounding RM) { 2691 // Currently udivrem always rounds down. 2692 switch (RM) { 2693 case APInt::Rounding::DOWN: 2694 case APInt::Rounding::TOWARD_ZERO: 2695 return A.udiv(B); 2696 case APInt::Rounding::UP: { 2697 APInt Quo, Rem; 2698 APInt::udivrem(A, B, Quo, Rem); 2699 if (Rem == 0) 2700 return Quo; 2701 return Quo + 1; 2702 } 2703 } 2704 llvm_unreachable("Unknown APInt::Rounding enum"); 2705} 2706 2707APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B, 2708 APInt::Rounding RM) { 2709 switch (RM) { 2710 case APInt::Rounding::DOWN: 2711 case APInt::Rounding::UP: { 2712 APInt Quo, Rem; 2713 APInt::sdivrem(A, B, Quo, Rem); 2714 if (Rem == 0) 2715 return Quo; 2716 // This algorithm deals with arbitrary rounding mode used by sdivrem. 2717 // We want to check whether the non-integer part of the mathematical value 2718 // is negative or not. If the non-integer part is negative, we need to round 2719 // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's 2720 // already rounded down. 2721 if (RM == APInt::Rounding::DOWN) { 2722 if (Rem.isNegative() != B.isNegative()) 2723 return Quo - 1; 2724 return Quo; 2725 } 2726 if (Rem.isNegative() != B.isNegative()) 2727 return Quo; 2728 return Quo + 1; 2729 } 2730 // Currently sdiv rounds twards zero. 2731 case APInt::Rounding::TOWARD_ZERO: 2732 return A.sdiv(B); 2733 } 2734 llvm_unreachable("Unknown APInt::Rounding enum"); 2735} 2736 2737Optional<APInt> 2738llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C, 2739 unsigned RangeWidth) { 2740 unsigned CoeffWidth = A.getBitWidth(); 2741 assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth()); 2742 assert(RangeWidth <= CoeffWidth && 2743 "Value range width should be less than coefficient width"); 2744 assert(RangeWidth > 1 && "Value range bit width should be > 1"); 2745 2746 LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B 2747 << "x + " << C << ", rw:" << RangeWidth << '\n'); 2748 2749 // Identify 0 as a (non)solution immediately. 2750 if (C.sextOrTrunc(RangeWidth).isNullValue() ) { 2751 LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n"); 2752 return APInt(CoeffWidth, 0); 2753 } 2754 2755 // The result of APInt arithmetic has the same bit width as the operands, 2756 // so it can actually lose high bits. A product of two n-bit integers needs 2757 // 2n-1 bits to represent the full value. 2758 // The operation done below (on quadratic coefficients) that can produce 2759 // the largest value is the evaluation of the equation during bisection, 2760 // which needs 3 times the bitwidth of the coefficient, so the total number 2761 // of required bits is 3n. 2762 // 2763 // The purpose of this extension is to simulate the set Z of all integers, 2764 // where n+1 > n for all n in Z. In Z it makes sense to talk about positive 2765 // and negative numbers (not so much in a modulo arithmetic). The method 2766 // used to solve the equation is based on the standard formula for real 2767 // numbers, and uses the concepts of "positive" and "negative" with their 2768 // usual meanings. 2769 CoeffWidth *= 3; 2770 A = A.sext(CoeffWidth); 2771 B = B.sext(CoeffWidth); 2772 C = C.sext(CoeffWidth); 2773 2774 // Make A > 0 for simplicity. Negate cannot overflow at this point because 2775 // the bit width has increased. 2776 if (A.isNegative()) { 2777 A.negate(); 2778 B.negate(); 2779 C.negate(); 2780 } 2781 2782 // Solving an equation q(x) = 0 with coefficients in modular arithmetic 2783 // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ..., 2784 // and R = 2^BitWidth. 2785 // Since we're trying not only to find exact solutions, but also values 2786 // that "wrap around", such a set will always have a solution, i.e. an x 2787 // that satisfies at least one of the equations, or such that |q(x)| 2788 // exceeds kR, while |q(x-1)| for the same k does not. 2789 // 2790 // We need to find a value k, such that Ax^2 + Bx + C = kR will have a 2791 // positive solution n (in the above sense), and also such that the n 2792 // will be the least among all solutions corresponding to k = 0, 1, ... 2793 // (more precisely, the least element in the set 2794 // { n(k) | k is such that a solution n(k) exists }). 2795 // 2796 // Consider the parabola (over real numbers) that corresponds to the 2797 // quadratic equation. Since A > 0, the arms of the parabola will point 2798 // up. Picking different values of k will shift it up and down by R. 2799 // 2800 // We want to shift the parabola in such a way as to reduce the problem 2801 // of solving q(x) = kR to solving shifted_q(x) = 0. 2802 // (The interesting solutions are the ceilings of the real number 2803 // solutions.) 2804 APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth); 2805 APInt TwoA = 2 * A; 2806 APInt SqrB = B * B; 2807 bool PickLow; 2808 2809 auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt { 2810 assert(A.isStrictlyPositive()); 2811 APInt T = V.abs().urem(A); 2812 if (T.isNullValue()) 2813 return V; 2814 return V.isNegative() ? V+T : V+(A-T); 2815 }; 2816 2817 // The vertex of the parabola is at -B/2A, but since A > 0, it's negative 2818 // iff B is positive. 2819 if (B.isNonNegative()) { 2820 // If B >= 0, the vertex it at a negative location (or at 0), so in 2821 // order to have a non-negative solution we need to pick k that makes 2822 // C-kR negative. To satisfy all the requirements for the solution 2823 // that we are looking for, it needs to be closest to 0 of all k. 2824 C = C.srem(R); 2825 if (C.isStrictlyPositive()) 2826 C -= R; 2827 // Pick the greater solution. 2828 PickLow = false; 2829 } else { 2830 // If B < 0, the vertex is at a positive location. For any solution 2831 // to exist, the discriminant must be non-negative. This means that 2832 // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a 2833 // lower bound on values of k: kR >= C - B^2/4A. 2834 APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0. 2835 // Round LowkR up (towards +inf) to the nearest kR. 2836 LowkR = RoundUp(LowkR, R); 2837 2838 // If there exists k meeting the condition above, and such that 2839 // C-kR > 0, there will be two positive real number solutions of 2840 // q(x) = kR. Out of all such values of k, pick the one that makes 2841 // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0). 2842 // In other words, find maximum k such that LowkR <= kR < C. 2843 if (C.sgt(LowkR)) { 2844 // If LowkR < C, then such a k is guaranteed to exist because 2845 // LowkR itself is a multiple of R. 2846 C -= -RoundUp(-C, R); // C = C - RoundDown(C, R) 2847 // Pick the smaller solution. 2848 PickLow = true; 2849 } else { 2850 // If C-kR < 0 for all potential k's, it means that one solution 2851 // will be negative, while the other will be positive. The positive 2852 // solution will shift towards 0 if the parabola is moved up. 2853 // Pick the kR closest to the lower bound (i.e. make C-kR closest 2854 // to 0, or in other words, out of all parabolas that have solutions, 2855 // pick the one that is the farthest "up"). 2856 // Since LowkR is itself a multiple of R, simply take C-LowkR. 2857 C -= LowkR; 2858 // Pick the greater solution. 2859 PickLow = false; 2860 } 2861 } 2862 2863 LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + " 2864 << B << "x + " << C << ", rw:" << RangeWidth << '\n'); 2865 2866 APInt D = SqrB - 4*A*C; 2867 assert(D.isNonNegative() && "Negative discriminant"); 2868 APInt SQ = D.sqrt(); 2869 2870 APInt Q = SQ * SQ; 2871 bool InexactSQ = Q != D; 2872 // The calculated SQ may actually be greater than the exact (non-integer) 2873 // value. If that's the case, decremement SQ to get a value that is lower. 2874 if (Q.sgt(D)) 2875 SQ -= 1; 2876 2877 APInt X; 2878 APInt Rem; 2879 2880 // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact. 2881 // When using the quadratic formula directly, the calculated low root 2882 // may be greater than the exact one, since we would be subtracting SQ. 2883 // To make sure that the calculated root is not greater than the exact 2884 // one, subtract SQ+1 when calculating the low root (for inexact value 2885 // of SQ). 2886 if (PickLow) 2887 APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem); 2888 else 2889 APInt::sdivrem(-B + SQ, TwoA, X, Rem); 2890 2891 // The updated coefficients should be such that the (exact) solution is 2892 // positive. Since APInt division rounds towards 0, the calculated one 2893 // can be 0, but cannot be negative. 2894 assert(X.isNonNegative() && "Solution should be non-negative"); 2895 2896 if (!InexactSQ && Rem.isNullValue()) { 2897 LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n'); 2898 return X; 2899 } 2900 2901 assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D"); 2902 // The exact value of the square root of D should be between SQ and SQ+1. 2903 // This implies that the solution should be between that corresponding to 2904 // SQ (i.e. X) and that corresponding to SQ+1. 2905 // 2906 // The calculated X cannot be greater than the exact (real) solution. 2907 // Actually it must be strictly less than the exact solution, while 2908 // X+1 will be greater than or equal to it. 2909 2910 APInt VX = (A*X + B)*X + C; 2911 APInt VY = VX + TwoA*X + A + B; 2912 bool SignChange = VX.isNegative() != VY.isNegative() || 2913 VX.isNullValue() != VY.isNullValue(); 2914 // If the sign did not change between X and X+1, X is not a valid solution. 2915 // This could happen when the actual (exact) roots don't have an integer 2916 // between them, so they would both be contained between X and X+1. 2917 if (!SignChange) { 2918 LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n"); 2919 return None; 2920 } 2921 2922 X += 1; 2923 LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n'); 2924 return X; 2925} 2926