lib/Support/APInt.cpp

   1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements a class to represent arbitrary precision integer
  11 // constant values and provide a variety of arithmetic operations on them.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "llvm/ADT/APInt.h"
  16 #include "llvm/ADT/ArrayRef.h"
  17 #include "llvm/ADT/FoldingSet.h"
  18 #include "llvm/ADT/Hashing.h"
  19 #include "llvm/ADT/Optional.h"
  20 #include "llvm/ADT/SmallString.h"
  21 #include "llvm/ADT/StringRef.h"
  22 #include "llvm/ADT/bit.h"
  23 #include "llvm/Config/llvm-config.h"
  24 #include "llvm/Support/Debug.h"
  25 #include "llvm/Support/ErrorHandling.h"
  26 #include "llvm/Support/MathExtras.h"
  27 #include "llvm/Support/raw_ostream.h"
  28 #include <climits>
  29 #include <cmath>
  30 #include <cstdlib>
  31 #include <cstring>
  32 using namespace llvm;
  33
  34 #define DEBUG_TYPE "apint"
  35
  36 /// A utility function for allocating memory, checking for allocation failures,
  37 /// and ensuring the contents are zeroed.
  38 inline static uint64_t* getClearedMemory(unsigned numWords) {
  39   uint64_t *result = new uint64_t[numWords];
  40   memset(result, 0, numWords * sizeof(uint64_t));
  41   return result;
  42 }
  43
  44 /// A utility function for allocating memory and checking for allocation
  45 /// failure.  The content is not zeroed.
  46 inline static uint64_t* getMemory(unsigned numWords) {
  47   return new uint64_t[numWords];
  48 }
  49
  50 /// A utility function that converts a character to a digit.
  51 inline static unsigned getDigit(char cdigit, uint8_t radix) {
  52   unsigned r;
  53
  54   if (radix == 16 || radix == 36) {
  55     r = cdigit - '0';
  56     if (r <= 9)
  57       return r;
  58
  59     r = cdigit - 'A';
  60     if (r <= radix - 11U)
  61       return r + 10;
  62
  63     r = cdigit - 'a';
  64     if (r <= radix - 11U)
  65       return r + 10;
  66
  67     radix = 10;
  68   }
  69
  70   r = cdigit - '0';
  71   if (r < radix)
  72     return r;
  73
  74   return -1U;
  75 }
  76
  77
  78 void APInt::initSlowCase(uint64_t val, bool isSigned) {
  79   U.pVal = getClearedMemory(getNumWords());
  80   U.pVal[0] = val;
  81   if (isSigned && int64_t(val) < 0)
  82     for (unsigned i = 1; i < getNumWords(); ++i)
  83       U.pVal[i] = WORDTYPE_MAX;
  84   clearUnusedBits();
  85 }
  86
  87 void APInt::initSlowCase(const APInt& that) {
  88   U.pVal = getMemory(getNumWords());
  89   memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
  90 }
  91
  92 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
  93   assert(BitWidth && "Bitwidth too small");
  94   assert(bigVal.data() && "Null pointer detected!");
  95   if (isSingleWord())
  96     U.VAL = bigVal[0];
  97   else {
  98     // Get memory, cleared to 0
  99     U.pVal = getClearedMemory(getNumWords());
 100     // Calculate the number of words to copy
 101     unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
 102     // Copy the words from bigVal to pVal
 103     memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
 104   }
 105   // Make sure unused high bits are cleared
 106   clearUnusedBits();
 107 }
 108
 109 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
 110   : BitWidth(numBits) {
 111   initFromArray(bigVal);
 112 }
 113
 114 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
 115   : BitWidth(numBits) {
 116   initFromArray(makeArrayRef(bigVal, numWords));
 117 }
 118
 119 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
 120   : BitWidth(numbits) {
 121   assert(BitWidth && "Bitwidth too small");
 122   fromString(numbits, Str, radix);
 123 }
 124
 125 void APInt::reallocate(unsigned NewBitWidth) {
 126   // If the number of words is the same we can just change the width and stop.
 127   if (getNumWords() == getNumWords(NewBitWidth)) {
 128     BitWidth = NewBitWidth;
 129     return;
 130   }
 131
 132   // If we have an allocation, delete it.
 133   if (!isSingleWord())
 134     delete [] U.pVal;
 135
 136   // Update BitWidth.
 137   BitWidth = NewBitWidth;
 138
 139   // If we are supposed to have an allocation, create it.
 140   if (!isSingleWord())
 141     U.pVal = getMemory(getNumWords());
 142 }
 143
 144 void APInt::AssignSlowCase(const APInt& RHS) {
 145   // Don't do anything for X = X
 146   if (this == &RHS)
 147     return;
 148
 149   // Adjust the bit width and handle allocations as necessary.
 150   reallocate(RHS.getBitWidth());
 151
 152   // Copy the data.
 153   if (isSingleWord())
 154     U.VAL = RHS.U.VAL;
 155   else
 156     memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
 157 }
 158
 159 /// This method 'profiles' an APInt for use with FoldingSet.
 160 void APInt::Profile(FoldingSetNodeID& ID) const {
 161   ID.AddInteger(BitWidth);
 162
 163   if (isSingleWord()) {
 164     ID.AddInteger(U.VAL);
 165     return;
 166   }
 167
 168   unsigned NumWords = getNumWords();
 169   for (unsigned i = 0; i < NumWords; ++i)
 170     ID.AddInteger(U.pVal[i]);
 171 }
 172
 173 /// Prefix increment operator. Increments the APInt by one.
 174 APInt& APInt::operator++() {
 175   if (isSingleWord())
 176     ++U.VAL;
 177   else
 178     tcIncrement(U.pVal, getNumWords());
 179   return clearUnusedBits();
 180 }
 181
 182 /// Prefix decrement operator. Decrements the APInt by one.
 183 APInt& APInt::operator--() {
 184   if (isSingleWord())
 185     --U.VAL;
 186   else
 187     tcDecrement(U.pVal, getNumWords());
 188   return clearUnusedBits();
 189 }
 190
 191 /// Adds the RHS APint to this APInt.
 192 /// @returns this, after addition of RHS.
 193 /// Addition assignment operator.
 194 APInt& APInt::operator+=(const APInt& RHS) {
 195   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 196   if (isSingleWord())
 197     U.VAL += RHS.U.VAL;
 198   else
 199     tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
 200   return clearUnusedBits();
 201 }
 202
 203 APInt& APInt::operator+=(uint64_t RHS) {
 204   if (isSingleWord())
 205     U.VAL += RHS;
 206   else
 207     tcAddPart(U.pVal, RHS, getNumWords());
 208   return clearUnusedBits();
 209 }
 210
 211 /// Subtracts the RHS APInt from this APInt
 212 /// @returns this, after subtraction
 213 /// Subtraction assignment operator.
 214 APInt& APInt::operator-=(const APInt& RHS) {
 215   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 216   if (isSingleWord())
 217     U.VAL -= RHS.U.VAL;
 218   else
 219     tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
 220   return clearUnusedBits();
 221 }
 222
 223 APInt& APInt::operator-=(uint64_t RHS) {
 224   if (isSingleWord())
 225     U.VAL -= RHS;
 226   else
 227     tcSubtractPart(U.pVal, RHS, getNumWords());
 228   return clearUnusedBits();
 229 }
 230
 231 APInt APInt::operator*(const APInt& RHS) const {
 232   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 233   if (isSingleWord())
 234     return APInt(BitWidth, U.VAL * RHS.U.VAL);
 235
 236   APInt Result(getMemory(getNumWords()), getBitWidth());
 237
 238   tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
 239
 240   Result.clearUnusedBits();
 241   return Result;
 242 }
 243
 244 void APInt::AndAssignSlowCase(const APInt& RHS) {
 245   tcAnd(U.pVal, RHS.U.pVal, getNumWords());
 246 }
 247
 248 void APInt::OrAssignSlowCase(const APInt& RHS) {
 249   tcOr(U.pVal, RHS.U.pVal, getNumWords());
 250 }
 251
 252 void APInt::XorAssignSlowCase(const APInt& RHS) {
 253   tcXor(U.pVal, RHS.U.pVal, getNumWords());
 254 }
 255
 256 APInt& APInt::operator*=(const APInt& RHS) {
 257   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 258   *this = *this * RHS;
 259   return *this;
 260 }
 261
 262 APInt& APInt::operator*=(uint64_t RHS) {
 263   if (isSingleWord()) {
 264     U.VAL *= RHS;
 265   } else {
 266     unsigned NumWords = getNumWords();
 267     tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
 268   }
 269   return clearUnusedBits();
 270 }
 271
 272 bool APInt::EqualSlowCase(const APInt& RHS) const {
 273   return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
 274 }
 275
 276 int APInt::compare(const APInt& RHS) const {
 277   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
 278   if (isSingleWord())
 279     return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
 280
 281   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
 282 }
 283
 284 int APInt::compareSigned(const APInt& RHS) const {
 285   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
 286   if (isSingleWord()) {
 287     int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
 288     int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
 289     return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
 290   }
 291
 292   bool lhsNeg = isNegative();
 293   bool rhsNeg = RHS.isNegative();
 294
 295   // If the sign bits don't match, then (LHS < RHS) if LHS is negative
 296   if (lhsNeg != rhsNeg)
 297     return lhsNeg ? -1 : 1;
 298
 299   // Otherwise we can just use an unsigned comparison, because even negative
 300   // numbers compare correctly this way if both have the same signed-ness.
 301   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
 302 }
 303
 304 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
 305   unsigned loWord = whichWord(loBit);
 306   unsigned hiWord = whichWord(hiBit);
 307
 308   // Create an initial mask for the low word with zeros below loBit.
 309   uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
 310
 311   // If hiBit is not aligned, we need a high mask.
 312   unsigned hiShiftAmt = whichBit(hiBit);
 313   if (hiShiftAmt != 0) {
 314     // Create a high mask with zeros above hiBit.
 315     uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
 316     // If loWord and hiWord are equal, then we combine the masks. Otherwise,
 317     // set the bits in hiWord.
 318     if (hiWord == loWord)
 319       loMask &= hiMask;
 320     else
 321       U.pVal[hiWord] |= hiMask;
 322   }
 323   // Apply the mask to the low word.
 324   U.pVal[loWord] |= loMask;
 325
 326   // Fill any words between loWord and hiWord with all ones.
 327   for (unsigned word = loWord + 1; word < hiWord; ++word)
 328     U.pVal[word] = WORDTYPE_MAX;
 329 }
 330
 331 /// Toggle every bit to its opposite value.
 332 void APInt::flipAllBitsSlowCase() {
 333   tcComplement(U.pVal, getNumWords());
 334   clearUnusedBits();
 335 }
 336
 337 /// Toggle a given bit to its opposite value whose position is given
 338 /// as "bitPosition".
 339 /// Toggles a given bit to its opposite value.
 340 void APInt::flipBit(unsigned bitPosition) {
 341   assert(bitPosition < BitWidth && "Out of the bit-width range!");
 342   if ((*this)[bitPosition]) clearBit(bitPosition);
 343   else setBit(bitPosition);
 344 }
 345
 346 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
 347   unsigned subBitWidth = subBits.getBitWidth();
 348   assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
 349          "Illegal bit insertion");
 350
 351   // Insertion is a direct copy.
 352   if (subBitWidth == BitWidth) {
 353     *this = subBits;
 354     return;
 355   }
 356
 357   // Single word result can be done as a direct bitmask.
 358   if (isSingleWord()) {
 359     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
 360     U.VAL &= ~(mask << bitPosition);
 361     U.VAL |= (subBits.U.VAL << bitPosition);
 362     return;
 363   }
 364
 365   unsigned loBit = whichBit(bitPosition);
 366   unsigned loWord = whichWord(bitPosition);
 367   unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
 368
 369   // Insertion within a single word can be done as a direct bitmask.
 370   if (loWord == hi1Word) {
 371     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
 372     U.pVal[loWord] &= ~(mask << loBit);
 373     U.pVal[loWord] |= (subBits.U.VAL << loBit);
 374     return;
 375   }
 376
 377   // Insert on word boundaries.
 378   if (loBit == 0) {
 379     // Direct copy whole words.
 380     unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
 381     memcpy(U.pVal + loWord, subBits.getRawData(),
 382            numWholeSubWords * APINT_WORD_SIZE);
 383
 384     // Mask+insert remaining bits.
 385     unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
 386     if (remainingBits != 0) {
 387       uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
 388       U.pVal[hi1Word] &= ~mask;
 389       U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
 390     }
 391     return;
 392   }
 393
 394   // General case - set/clear individual bits in dst based on src.
 395   // TODO - there is scope for optimization here, but at the moment this code
 396   // path is barely used so prefer readability over performance.
 397   for (unsigned i = 0; i != subBitWidth; ++i) {
 398     if (subBits[i])
 399       setBit(bitPosition + i);
 400     else
 401       clearBit(bitPosition + i);
 402   }
 403 }
 404
 405 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
 406   assert(numBits > 0 && "Can't extract zero bits");
 407   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
 408          "Illegal bit extraction");
 409
 410   if (isSingleWord())
 411     return APInt(numBits, U.VAL >> bitPosition);
 412
 413   unsigned loBit = whichBit(bitPosition);
 414   unsigned loWord = whichWord(bitPosition);
 415   unsigned hiWord = whichWord(bitPosition + numBits - 1);
 416
 417   // Single word result extracting bits from a single word source.
 418   if (loWord == hiWord)
 419     return APInt(numBits, U.pVal[loWord] >> loBit);
 420
 421   // Extracting bits that start on a source word boundary can be done
 422   // as a fast memory copy.
 423   if (loBit == 0)
 424     return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
 425
 426   // General case - shift + copy source words directly into place.
 427   APInt Result(numBits, 0);
 428   unsigned NumSrcWords = getNumWords();
 429   unsigned NumDstWords = Result.getNumWords();
 430
 431   uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
 432   for (unsigned word = 0; word < NumDstWords; ++word) {
 433     uint64_t w0 = U.pVal[loWord + word];
 434     uint64_t w1 =
 435         (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
 436     DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
 437   }
 438
 439   return Result.clearUnusedBits();
 440 }
 441
 442 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
 443   assert(!str.empty() && "Invalid string length");
 444   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
 445           radix == 36) &&
 446          "Radix should be 2, 8, 10, 16, or 36!");
 447
 448   size_t slen = str.size();
 449
 450   // Each computation below needs to know if it's negative.
 451   StringRef::iterator p = str.begin();
 452   unsigned isNegative = *p == '-';
 453   if (*p == '-' || *p == '+') {
 454     p++;
 455     slen--;
 456     assert(slen && "String is only a sign, needs a value.");
 457   }
 458
 459   // For radixes of power-of-two values, the bits required is accurately and
 460   // easily computed
 461   if (radix == 2)
 462     return slen + isNegative;
 463   if (radix == 8)
 464     return slen * 3 + isNegative;
 465   if (radix == 16)
 466     return slen * 4 + isNegative;
 467
 468   // FIXME: base 36
 469
 470   // This is grossly inefficient but accurate. We could probably do something
 471   // with a computation of roughly slen*64/20 and then adjust by the value of
 472   // the first few digits. But, I'm not sure how accurate that could be.
 473
 474   // Compute a sufficient number of bits that is always large enough but might
 475   // be too large. This avoids the assertion in the constructor. This
 476   // calculation doesn't work appropriately for the numbers 0-9, so just use 4
 477   // bits in that case.
 478   unsigned sufficient
 479     = radix == 10? (slen == 1 ? 4 : slen * 64/18)
 480                  : (slen == 1 ? 7 : slen * 16/3);
 481
 482   // Convert to the actual binary value.
 483   APInt tmp(sufficient, StringRef(p, slen), radix);
 484
 485   // Compute how many bits are required. If the log is infinite, assume we need
 486   // just bit.
 487   unsigned log = tmp.logBase2();
 488   if (log == (unsigned)-1) {
 489     return isNegative + 1;
 490   } else {
 491     return isNegative + log + 1;
 492   }
 493 }
 494
 495 hash_code llvm::hash_value(const APInt &Arg) {
 496   if (Arg.isSingleWord())
 497     return hash_combine(Arg.U.VAL);
 498
 499   return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords());
 500 }
 501
 502 bool APInt::isSplat(unsigned SplatSizeInBits) const {
 503   assert(getBitWidth() % SplatSizeInBits == 0 &&
 504          "SplatSizeInBits must divide width!");
 505   // We can check that all parts of an integer are equal by making use of a
 506   // little trick: rotate and check if it's still the same value.
 507   return *this == rotl(SplatSizeInBits);
 508 }
 509
 510 /// This function returns the high "numBits" bits of this APInt.
 511 APInt APInt::getHiBits(unsigned numBits) const {
 512   return this->lshr(BitWidth - numBits);
 513 }
 514
 515 /// This function returns the low "numBits" bits of this APInt.
 516 APInt APInt::getLoBits(unsigned numBits) const {
 517   APInt Result(getLowBitsSet(BitWidth, numBits));
 518   Result &= *this;
 519   return Result;
 520 }
 521
 522 /// Return a value containing V broadcasted over NewLen bits.
 523 APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
 524   assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
 525
 526   APInt Val = V.zextOrSelf(NewLen);
 527   for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
 528     Val |= Val << I;
 529
 530   return Val;
 531 }
 532
 533 unsigned APInt::countLeadingZerosSlowCase() const {
 534   unsigned Count = 0;
 535   for (int i = getNumWords()-1; i >= 0; --i) {
 536     uint64_t V = U.pVal[i];
 537     if (V == 0)
 538       Count += APINT_BITS_PER_WORD;
 539     else {
 540       Count += llvm::countLeadingZeros(V);
 541       break;
 542     }
 543   }
 544   // Adjust for unused bits in the most significant word (they are zero).
 545   unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
 546   Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
 547   return Count;
 548 }
 549
 550 unsigned APInt::countLeadingOnesSlowCase() const {
 551   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
 552   unsigned shift;
 553   if (!highWordBits) {
 554     highWordBits = APINT_BITS_PER_WORD;
 555     shift = 0;
 556   } else {
 557     shift = APINT_BITS_PER_WORD - highWordBits;
 558   }
 559   int i = getNumWords() - 1;
 560   unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
 561   if (Count == highWordBits) {
 562     for (i--; i >= 0; --i) {
 563       if (U.pVal[i] == WORDTYPE_MAX)
 564         Count += APINT_BITS_PER_WORD;
 565       else {
 566         Count += llvm::countLeadingOnes(U.pVal[i]);
 567         break;
 568       }
 569     }
 570   }
 571   return Count;
 572 }
 573
 574 unsigned APInt::countTrailingZerosSlowCase() const {
 575   unsigned Count = 0;
 576   unsigned i = 0;
 577   for (; i < getNumWords() && U.pVal[i] == 0; ++i)
 578     Count += APINT_BITS_PER_WORD;
 579   if (i < getNumWords())
 580     Count += llvm::countTrailingZeros(U.pVal[i]);
 581   return std::min(Count, BitWidth);
 582 }
 583
 584 unsigned APInt::countTrailingOnesSlowCase() const {
 585   unsigned Count = 0;
 586   unsigned i = 0;
 587   for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
 588     Count += APINT_BITS_PER_WORD;
 589   if (i < getNumWords())
 590     Count += llvm::countTrailingOnes(U.pVal[i]);
 591   assert(Count <= BitWidth);
 592   return Count;
 593 }
 594
 595 unsigned APInt::countPopulationSlowCase() const {
 596   unsigned Count = 0;
 597   for (unsigned i = 0; i < getNumWords(); ++i)
 598     Count += llvm::countPopulation(U.pVal[i]);
 599   return Count;
 600 }
 601
 602 bool APInt::intersectsSlowCase(const APInt &RHS) const {
 603   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
 604     if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
 605       return true;
 606
 607   return false;
 608 }
 609
 610 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
 611   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
 612     if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
 613       return false;
 614
 615   return true;
 616 }
 617
 618 APInt APInt::byteSwap() const {
 619   assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
 620   if (BitWidth == 16)
 621     return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
 622   if (BitWidth == 32)
 623     return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
 624   if (BitWidth == 48) {
 625     unsigned Tmp1 = unsigned(U.VAL >> 16);
 626     Tmp1 = ByteSwap_32(Tmp1);
 627     uint16_t Tmp2 = uint16_t(U.VAL);
 628     Tmp2 = ByteSwap_16(Tmp2);
 629     return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
 630   }
 631   if (BitWidth == 64)
 632     return APInt(BitWidth, ByteSwap_64(U.VAL));
 633
 634   APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
 635   for (unsigned I = 0, N = getNumWords(); I != N; ++I)
 636     Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
 637   if (Result.BitWidth != BitWidth) {
 638     Result.lshrInPlace(Result.BitWidth - BitWidth);
 639     Result.BitWidth = BitWidth;
 640   }
 641   return Result;
 642 }
 643
 644 APInt APInt::reverseBits() const {
 645   switch (BitWidth) {
 646   case 64:
 647     return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
 648   case 32:
 649     return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
 650   case 16:
 651     return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
 652   case 8:
 653     return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
 654   default:
 655     break;
 656   }
 657
 658   APInt Val(*this);
 659   APInt Reversed(BitWidth, 0);
 660   unsigned S = BitWidth;
 661
 662   for (; Val != 0; Val.lshrInPlace(1)) {
 663     Reversed <<= 1;
 664     Reversed |= Val[0];
 665     --S;
 666   }
 667
 668   Reversed <<= S;
 669   return Reversed;
 670 }
 671
 672 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
 673   // Fast-path a common case.
 674   if (A == B) return A;
 675
 676   // Corner cases: if either operand is zero, the other is the gcd.
 677   if (!A) return B;
 678   if (!B) return A;
 679
 680   // Count common powers of 2 and remove all other powers of 2.
 681   unsigned Pow2;
 682   {
 683     unsigned Pow2_A = A.countTrailingZeros();
 684     unsigned Pow2_B = B.countTrailingZeros();
 685     if (Pow2_A > Pow2_B) {
 686       A.lshrInPlace(Pow2_A - Pow2_B);
 687       Pow2 = Pow2_B;
 688     } else if (Pow2_B > Pow2_A) {
 689       B.lshrInPlace(Pow2_B - Pow2_A);
 690       Pow2 = Pow2_A;
 691     } else {
 692       Pow2 = Pow2_A;
 693     }
 694   }
 695
 696   // Both operands are odd multiples of 2^Pow_2:
 697   //
 698   //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
 699   //
 700   // This is a modified version of Stein's algorithm, taking advantage of
 701   // efficient countTrailingZeros().
 702   while (A != B) {
 703     if (A.ugt(B)) {
 704       A -= B;
 705       A.lshrInPlace(A.countTrailingZeros() - Pow2);
 706     } else {
 707       B -= A;
 708       B.lshrInPlace(B.countTrailingZeros() - Pow2);
 709     }
 710   }
 711
 712   return A;
 713 }
 714
 715 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
 716   uint64_t I = bit_cast<uint64_t>(Double);
 717
 718   // Get the sign bit from the highest order bit
 719   bool isNeg = I >> 63;
 720
 721   // Get the 11-bit exponent and adjust for the 1023 bit bias
 722   int64_t exp = ((I >> 52) & 0x7ff) - 1023;
 723
 724   // If the exponent is negative, the value is < 0 so just return 0.
 725   if (exp < 0)
 726     return APInt(width, 0u);
 727
 728   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
 729   uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
 730
 731   // If the exponent doesn't shift all bits out of the mantissa
 732   if (exp < 52)
 733     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
 734                     APInt(width, mantissa >> (52 - exp));
 735
 736   // If the client didn't provide enough bits for us to shift the mantissa into
 737   // then the result is undefined, just return 0
 738   if (width <= exp - 52)
 739     return APInt(width, 0);
 740
 741   // Otherwise, we have to shift the mantissa bits up to the right location
 742   APInt Tmp(width, mantissa);
 743   Tmp <<= (unsigned)exp - 52;
 744   return isNeg ? -Tmp : Tmp;
 745 }
 746
 747 /// This function converts this APInt to a double.
 748 /// The layout for double is as following (IEEE Standard 754):
 749 ///  --------------------------------------
 750 /// |  Sign    Exponent    Fraction    Bias |
 751 /// |-------------------------------------- |
 752 /// |  1[63]   11[62-52]   52[51-00]   1023 |
 753 ///  --------------------------------------
 754 double APInt::roundToDouble(bool isSigned) const {
 755
 756   // Handle the simple case where the value is contained in one uint64_t.
 757   // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
 758   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
 759     if (isSigned) {
 760       int64_t sext = SignExtend64(getWord(0), BitWidth);
 761       return double(sext);
 762     } else
 763       return double(getWord(0));
 764   }
 765
 766   // Determine if the value is negative.
 767   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
 768
 769   // Construct the absolute value if we're negative.
 770   APInt Tmp(isNeg ? -(*this) : (*this));
 771
 772   // Figure out how many bits we're using.
 773   unsigned n = Tmp.getActiveBits();
 774
 775   // The exponent (without bias normalization) is just the number of bits
 776   // we are using. Note that the sign bit is gone since we constructed the
 777   // absolute value.
 778   uint64_t exp = n;
 779
 780   // Return infinity for exponent overflow
 781   if (exp > 1023) {
 782     if (!isSigned || !isNeg)
 783       return std::numeric_limits<double>::infinity();
 784     else
 785       return -std::numeric_limits<double>::infinity();
 786   }
 787   exp += 1023; // Increment for 1023 bias
 788
 789   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
 790   // extract the high 52 bits from the correct words in pVal.
 791   uint64_t mantissa;
 792   unsigned hiWord = whichWord(n-1);
 793   if (hiWord == 0) {
 794     mantissa = Tmp.U.pVal[0];
 795     if (n > 52)
 796       mantissa >>= n - 52; // shift down, we want the top 52 bits.
 797   } else {
 798     assert(hiWord > 0 && "huh?");
 799     uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
 800     uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
 801     mantissa = hibits | lobits;
 802   }
 803
 804   // The leading bit of mantissa is implicit, so get rid of it.
 805   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
 806   uint64_t I = sign | (exp << 52) | mantissa;
 807   return bit_cast<double>(I);
 808 }
 809
 810 // Truncate to new width.
 811 APInt APInt::trunc(unsigned width) const {
 812   assert(width < BitWidth && "Invalid APInt Truncate request");
 813   assert(width && "Can't truncate to 0 bits");
 814
 815   if (width <= APINT_BITS_PER_WORD)
 816     return APInt(width, getRawData()[0]);
 817
 818   APInt Result(getMemory(getNumWords(width)), width);
 819
 820   // Copy full words.
 821   unsigned i;
 822   for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
 823     Result.U.pVal[i] = U.pVal[i];
 824
 825   // Truncate and copy any partial word.
 826   unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
 827   if (bits != 0)
 828     Result.U.pVal[i] = U.pVal[i] << bits >> bits;
 829
 830   return Result;
 831 }
 832
 833 // Sign extend to a new width.
 834 APInt APInt::sext(unsigned Width) const {
 835   assert(Width > BitWidth && "Invalid APInt SignExtend request");
 836
 837   if (Width <= APINT_BITS_PER_WORD)
 838     return APInt(Width, SignExtend64(U.VAL, BitWidth));
 839
 840   APInt Result(getMemory(getNumWords(Width)), Width);
 841
 842   // Copy words.
 843   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
 844
 845   // Sign extend the last word since there may be unused bits in the input.
 846   Result.U.pVal[getNumWords() - 1] =
 847       SignExtend64(Result.U.pVal[getNumWords() - 1],
 848                    ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
 849
 850   // Fill with sign bits.
 851   std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
 852               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
 853   Result.clearUnusedBits();
 854   return Result;
 855 }
 856
 857 //  Zero extend to a new width.
 858 APInt APInt::zext(unsigned width) const {
 859   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
 860
 861   if (width <= APINT_BITS_PER_WORD)
 862     return APInt(width, U.VAL);
 863
 864   APInt Result(getMemory(getNumWords(width)), width);
 865
 866   // Copy words.
 867   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
 868
 869   // Zero remaining words.
 870   std::memset(Result.U.pVal + getNumWords(), 0,
 871               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
 872
 873   return Result;
 874 }
 875
 876 APInt APInt::zextOrTrunc(unsigned width) const {
 877   if (BitWidth < width)
 878     return zext(width);
 879   if (BitWidth > width)
 880     return trunc(width);
 881   return *this;
 882 }
 883
 884 APInt APInt::sextOrTrunc(unsigned width) const {
 885   if (BitWidth < width)
 886     return sext(width);
 887   if (BitWidth > width)
 888     return trunc(width);
 889   return *this;
 890 }
 891
 892 APInt APInt::zextOrSelf(unsigned width) const {
 893   if (BitWidth < width)
 894     return zext(width);
 895   return *this;
 896 }
 897
 898 APInt APInt::sextOrSelf(unsigned width) const {
 899   if (BitWidth < width)
 900     return sext(width);
 901   return *this;
 902 }
 903
 904 /// Arithmetic right-shift this APInt by shiftAmt.
 905 /// Arithmetic right-shift function.
 906 void APInt::ashrInPlace(const APInt &shiftAmt) {
 907   ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
 908 }
 909
 910 /// Arithmetic right-shift this APInt by shiftAmt.
 911 /// Arithmetic right-shift function.
 912 void APInt::ashrSlowCase(unsigned ShiftAmt) {
 913   // Don't bother performing a no-op shift.
 914   if (!ShiftAmt)
 915     return;
 916
 917   // Save the original sign bit for later.
 918   bool Negative = isNegative();
 919
 920   // WordShift is the inter-part shift; BitShift is intra-part shift.
 921   unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
 922   unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
 923
 924   unsigned WordsToMove = getNumWords() - WordShift;
 925   if (WordsToMove != 0) {
 926     // Sign extend the last word to fill in the unused bits.
 927     U.pVal[getNumWords() - 1] = SignExtend64(
 928         U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
 929
 930     // Fastpath for moving by whole words.
 931     if (BitShift == 0) {
 932       std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
 933     } else {
 934       // Move the words containing significant bits.
 935       for (unsigned i = 0; i != WordsToMove - 1; ++i)
 936         U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
 937                     (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
 938
 939       // Handle the last word which has no high bits to copy.
 940       U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
 941       // Sign extend one more time.
 942       U.pVal[WordsToMove - 1] =
 943           SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
 944     }
 945   }
 946
 947   // Fill in the remainder based on the original sign.
 948   std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
 949               WordShift * APINT_WORD_SIZE);
 950   clearUnusedBits();
 951 }
 952
 953 /// Logical right-shift this APInt by shiftAmt.
 954 /// Logical right-shift function.
 955 void APInt::lshrInPlace(const APInt &shiftAmt) {
 956   lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
 957 }
 958
 959 /// Logical right-shift this APInt by shiftAmt.
 960 /// Logical right-shift function.
 961 void APInt::lshrSlowCase(unsigned ShiftAmt) {
 962   tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
 963 }
 964
 965 /// Left-shift this APInt by shiftAmt.
 966 /// Left-shift function.
 967 APInt &APInt::operator<<=(const APInt &shiftAmt) {
 968   // It's undefined behavior in C to shift by BitWidth or greater.
 969   *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
 970   return *this;
 971 }
 972
 973 void APInt::shlSlowCase(unsigned ShiftAmt) {
 974   tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
 975   clearUnusedBits();
 976 }
 977
 978 // Calculate the rotate amount modulo the bit width.
 979 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
 980   unsigned rotBitWidth = rotateAmt.getBitWidth();
 981   APInt rot = rotateAmt;
 982   if (rotBitWidth < BitWidth) {
 983     // Extend the rotate APInt, so that the urem doesn't divide by 0.
 984     // e.g. APInt(1, 32) would give APInt(1, 0).
 985     rot = rotateAmt.zext(BitWidth);
 986   }
 987   rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
 988   return rot.getLimitedValue(BitWidth);
 989 }
 990
 991 APInt APInt::rotl(const APInt &rotateAmt) const {
 992   return rotl(rotateModulo(BitWidth, rotateAmt));
 993 }
 994
 995 APInt APInt::rotl(unsigned rotateAmt) const {
 996   rotateAmt %= BitWidth;
 997   if (rotateAmt == 0)
 998     return *this;
 999   return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1000 }
1001
1002 APInt APInt::rotr(const APInt &rotateAmt) const {
1003   return rotr(rotateModulo(BitWidth, rotateAmt));
1004 }
1005
1006 APInt APInt::rotr(unsigned rotateAmt) const {
1007   rotateAmt %= BitWidth;
1008   if (rotateAmt == 0)
1009     return *this;
1010   return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1011 }
1012
1013 // Square Root - this method computes and returns the square root of "this".
1014 // Three mechanisms are used for computation. For small values (<= 5 bits),
1015 // a table lookup is done. This gets some performance for common cases. For
1016 // values using less than 52 bits, the value is converted to double and then
1017 // the libc sqrt function is called. The result is rounded and then converted
1018 // back to a uint64_t which is then used to construct the result. Finally,
1019 // the Babylonian method for computing square roots is used.
1020 APInt APInt::sqrt() const {
1021
1022   // Determine the magnitude of the value.
1023   unsigned magnitude = getActiveBits();
1024
1025   // Use a fast table for some small values. This also gets rid of some
1026   // rounding errors in libc sqrt for small values.
1027   if (magnitude <= 5) {
1028     static const uint8_t results[32] = {
1029       /*     0 */ 0,
1030       /*  1- 2 */ 1, 1,
1031       /*  3- 6 */ 2, 2, 2, 2,
1032       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1033       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1034       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1035       /*    31 */ 6
1036     };
1037     return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1038   }
1039
1040   // If the magnitude of the value fits in less than 52 bits (the precision of
1041   // an IEEE double precision floating point value), then we can use the
1042   // libc sqrt function which will probably use a hardware sqrt computation.
1043   // This should be faster than the algorithm below.
1044   if (magnitude < 52) {
1045     return APInt(BitWidth,
1046                  uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1047                                                                : U.pVal[0])))));
1048   }
1049
1050   // Okay, all the short cuts are exhausted. We must compute it. The following
1051   // is a classical Babylonian method for computing the square root. This code
1052   // was adapted to APInt from a wikipedia article on such computations.
1053   // See http://www.wikipedia.org/ and go to the page named
1054   // Calculate_an_integer_square_root.
1055   unsigned nbits = BitWidth, i = 4;
1056   APInt testy(BitWidth, 16);
1057   APInt x_old(BitWidth, 1);
1058   APInt x_new(BitWidth, 0);
1059   APInt two(BitWidth, 2);
1060
1061   // Select a good starting value using binary logarithms.
1062   for (;; i += 2, testy = testy.shl(2))
1063     if (i >= nbits || this->ule(testy)) {
1064       x_old = x_old.shl(i / 2);
1065       break;
1066     }
1067
1068   // Use the Babylonian method to arrive at the integer square root:
1069   for (;;) {
1070     x_new = (this->udiv(x_old) + x_old).udiv(two);
1071     if (x_old.ule(x_new))
1072       break;
1073     x_old = x_new;
1074   }
1075
1076   // Make sure we return the closest approximation
1077   // NOTE: The rounding calculation below is correct. It will produce an
1078   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1079   // determined to be a rounding issue with pari/gp as it begins to use a
1080   // floating point representation after 192 bits. There are no discrepancies
1081   // between this algorithm and pari/gp for bit widths < 192 bits.
1082   APInt square(x_old * x_old);
1083   APInt nextSquare((x_old + 1) * (x_old +1));
1084   if (this->ult(square))
1085     return x_old;
1086   assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1087   APInt midpoint((nextSquare - square).udiv(two));
1088   APInt offset(*this - square);
1089   if (offset.ult(midpoint))
1090     return x_old;
1091   return x_old + 1;
1092 }
1093
1094 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1095 /// iterative extended Euclidean algorithm is used to solve for this value,
1096 /// however we simplify it to speed up calculating only the inverse, and take
1097 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1098 /// (potentially large) APInts around.
1099 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1100   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1101
1102   // Using the properties listed at the following web page (accessed 06/21/08):
1103   //   http://www.numbertheory.org/php/euclid.html
1104   // (especially the properties numbered 3, 4 and 9) it can be proved that
1105   // BitWidth bits suffice for all the computations in the algorithm implemented
1106   // below. More precisely, this number of bits suffice if the multiplicative
1107   // inverse exists, but may not suffice for the general extended Euclidean
1108   // algorithm.
1109
1110   APInt r[2] = { modulo, *this };
1111   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1112   APInt q(BitWidth, 0);
1113
1114   unsigned i;
1115   for (i = 0; r[i^1] != 0; i ^= 1) {
1116     // An overview of the math without the confusing bit-flipping:
1117     // q = r[i-2] / r[i-1]
1118     // r[i] = r[i-2] % r[i-1]
1119     // t[i] = t[i-2] - t[i-1] * q
1120     udivrem(r[i], r[i^1], q, r[i]);
1121     t[i] -= t[i^1] * q;
1122   }
1123
1124   // If this APInt and the modulo are not coprime, there is no multiplicative
1125   // inverse, so return 0. We check this by looking at the next-to-last
1126   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1127   // algorithm.
1128   if (r[i] != 1)
1129     return APInt(BitWidth, 0);
1130
1131   // The next-to-last t is the multiplicative inverse.  However, we are
1132   // interested in a positive inverse. Calculate a positive one from a negative
1133   // one if necessary. A simple addition of the modulo suffices because
1134   // abs(t[i]) is known to be less than *this/2 (see the link above).
1135   if (t[i].isNegative())
1136     t[i] += modulo;
1137
1138   return std::move(t[i]);
1139 }
1140
1141 /// Calculate the magic numbers required to implement a signed integer division
1142 /// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1143 /// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1144 /// Warren, Jr., chapter 10.
1145 APInt::ms APInt::magic() const {
1146   const APInt& d = *this;
1147   unsigned p;
1148   APInt ad, anc, delta, q1, r1, q2, r2, t;
1149   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1150   struct ms mag;
1151
1152   ad = d.abs();
1153   t = signedMin + (d.lshr(d.getBitWidth() - 1));
1154   anc = t - 1 - t.urem(ad);   // absolute value of nc
1155   p = d.getBitWidth() - 1;    // initialize p
1156   q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1157   r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1158   q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1159   r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1160   do {
1161     p = p + 1;
1162     q1 = q1<<1;          // update q1 = 2p/abs(nc)
1163     r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1164     if (r1.uge(anc)) {  // must be unsigned comparison
1165       q1 = q1 + 1;
1166       r1 = r1 - anc;
1167     }
1168     q2 = q2<<1;          // update q2 = 2p/abs(d)
1169     r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1170     if (r2.uge(ad)) {   // must be unsigned comparison
1171       q2 = q2 + 1;
1172       r2 = r2 - ad;
1173     }
1174     delta = ad - r2;
1175   } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1176
1177   mag.m = q2 + 1;
1178   if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1179   mag.s = p - d.getBitWidth();          // resulting shift
1180   return mag;
1181 }
1182
1183 /// Calculate the magic numbers required to implement an unsigned integer
1184 /// division by a constant as a sequence of multiplies, adds and shifts.
1185 /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1186 /// S. Warren, Jr., chapter 10.
1187 /// LeadingZeros can be used to simplify the calculation if the upper bits
1188 /// of the divided value are known zero.
1189 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1190   const APInt& d = *this;
1191   unsigned p;
1192   APInt nc, delta, q1, r1, q2, r2;
1193   struct mu magu;
1194   magu.a = 0;               // initialize "add" indicator
1195   APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1196   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1197   APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1198
1199   nc = allOnes - (allOnes - d).urem(d);
1200   p = d.getBitWidth() - 1;  // initialize p
1201   q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1202   r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1203   q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1204   r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1205   do {
1206     p = p + 1;
1207     if (r1.uge(nc - r1)) {
1208       q1 = q1 + q1 + 1;  // update q1
1209       r1 = r1 + r1 - nc; // update r1
1210     }
1211     else {
1212       q1 = q1+q1; // update q1
1213       r1 = r1+r1; // update r1
1214     }
1215     if ((r2 + 1).uge(d - r2)) {
1216       if (q2.uge(signedMax)) magu.a = 1;
1217       q2 = q2+q2 + 1;     // update q2
1218       r2 = r2+r2 + 1 - d; // update r2
1219     }
1220     else {
1221       if (q2.uge(signedMin)) magu.a = 1;
1222       q2 = q2+q2;     // update q2
1223       r2 = r2+r2 + 1; // update r2
1224     }
1225     delta = d - 1 - r2;
1226   } while (p < d.getBitWidth()*2 &&
1227            (q1.ult(delta) || (q1 == delta && r1 == 0)));
1228   magu.m = q2 + 1; // resulting magic number
1229   magu.s = p - d.getBitWidth();  // resulting shift
1230   return magu;
1231 }
1232
1233 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1234 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1235 /// variables here have the same names as in the algorithm. Comments explain
1236 /// the algorithm and any deviation from it.
1237 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1238                      unsigned m, unsigned n) {
1239   assert(u && "Must provide dividend");
1240   assert(v && "Must provide divisor");
1241   assert(q && "Must provide quotient");
1242   assert(u != v && u != q && v != q && "Must use different memory");
1243   assert(n>1 && "n must be > 1");
1244
1245   // b denotes the base of the number system. In our case b is 2^32.
1246   const uint64_t b = uint64_t(1) << 32;
1247
1248 // The DEBUG macros here tend to be spam in the debug output if you're not
1249 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1250 #ifdef KNUTH_DEBUG
1251 #define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1252 #else
1253 #define DEBUG_KNUTH(X) do {} while(false)
1254 #endif
1255
1256   DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1257   DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1258   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1259   DEBUG_KNUTH(dbgs() << " by");
1260   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1261   DEBUG_KNUTH(dbgs() << '\n');
1262   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1263   // u and v by d. Note that we have taken Knuth's advice here to use a power
1264   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1265   // 2 allows us to shift instead of multiply and it is easy to determine the
1266   // shift amount from the leading zeros.  We are basically normalizing the u
1267   // and v so that its high bits are shifted to the top of v's range without
1268   // overflow. Note that this can require an extra word in u so that u must
1269   // be of length m+n+1.
1270   unsigned shift = countLeadingZeros(v[n-1]);
1271   uint32_t v_carry = 0;
1272   uint32_t u_carry = 0;
1273   if (shift) {
1274     for (unsigned i = 0; i < m+n; ++i) {
1275       uint32_t u_tmp = u[i] >> (32 - shift);
1276       u[i] = (u[i] << shift) | u_carry;
1277       u_carry = u_tmp;
1278     }
1279     for (unsigned i = 0; i < n; ++i) {
1280       uint32_t v_tmp = v[i] >> (32 - shift);
1281       v[i] = (v[i] << shift) | v_carry;
1282       v_carry = v_tmp;
1283     }
1284   }
1285   u[m+n] = u_carry;
1286
1287   DEBUG_KNUTH(dbgs() << "KnuthDiv:   normal:");
1288   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1289   DEBUG_KNUTH(dbgs() << " by");
1290   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1291   DEBUG_KNUTH(dbgs() << '\n');
1292
1293   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1294   int j = m;
1295   do {
1296     DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1297     // D3. [Calculate q'.].
1298     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1299     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1300     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1301     // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1302     // on v[n-2] determines at high speed most of the cases in which the trial
1303     // value qp is one too large, and it eliminates all cases where qp is two
1304     // too large.
1305     uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1306     DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1307     uint64_t qp = dividend / v[n-1];
1308     uint64_t rp = dividend % v[n-1];
1309     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1310       qp--;
1311       rp += v[n-1];
1312       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1313         qp--;
1314     }
1315     DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1316
1317     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1318     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1319     // consists of a simple multiplication by a one-place number, combined with
1320     // a subtraction.
1321     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1322     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1323     // true value plus b**(n+1), namely as the b's complement of
1324     // the true value, and a "borrow" to the left should be remembered.
1325     int64_t borrow = 0;
1326     for (unsigned i = 0; i < n; ++i) {
1327       uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1328       int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1329       u[j+i] = Lo_32(subres);
1330       borrow = Hi_32(p) - Hi_32(subres);
1331       DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1332                         << ", borrow = " << borrow << '\n');
1333     }
1334     bool isNeg = u[j+n] < borrow;
1335     u[j+n] -= Lo_32(borrow);
1336
1337     DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1338     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1339     DEBUG_KNUTH(dbgs() << '\n');
1340
1341     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1342     // negative, go to step D6; otherwise go on to step D7.
1343     q[j] = Lo_32(qp);
1344     if (isNeg) {
1345       // D6. [Add back]. The probability that this step is necessary is very
1346       // small, on the order of only 2/b. Make sure that test data accounts for
1347       // this possibility. Decrease q[j] by 1
1348       q[j]--;
1349       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1350       // A carry will occur to the left of u[j+n], and it should be ignored
1351       // since it cancels with the borrow that occurred in D4.
1352       bool carry = false;
1353       for (unsigned i = 0; i < n; i++) {
1354         uint32_t limit = std::min(u[j+i],v[i]);
1355         u[j+i] += v[i] + carry;
1356         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1357       }
1358       u[j+n] += carry;
1359     }
1360     DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1361     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1362     DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1363
1364     // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1365   } while (--j >= 0);
1366
1367   DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1368   DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1369   DEBUG_KNUTH(dbgs() << '\n');
1370
1371   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1372   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1373   // compute the remainder (urem uses this).
1374   if (r) {
1375     // The value d is expressed by the "shift" value above since we avoided
1376     // multiplication by d by using a shift left. So, all we have to do is
1377     // shift right here.
1378     if (shift) {
1379       uint32_t carry = 0;
1380       DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1381       for (int i = n-1; i >= 0; i--) {
1382         r[i] = (u[i] >> shift) | carry;
1383         carry = u[i] << (32 - shift);
1384         DEBUG_KNUTH(dbgs() << " " << r[i]);
1385       }
1386     } else {
1387       for (int i = n-1; i >= 0; i--) {
1388         r[i] = u[i];
1389         DEBUG_KNUTH(dbgs() << " " << r[i]);
1390       }
1391     }
1392     DEBUG_KNUTH(dbgs() << '\n');
1393   }
1394   DEBUG_KNUTH(dbgs() << '\n');
1395 }
1396
1397 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1398                    unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1399   assert(lhsWords >= rhsWords && "Fractional result");
1400
1401   // First, compose the values into an array of 32-bit words instead of
1402   // 64-bit words. This is a necessity of both the "short division" algorithm
1403   // and the Knuth "classical algorithm" which requires there to be native
1404   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1405   // can't use 64-bit operands here because we don't have native results of
1406   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1407   // work on large-endian machines.
1408   unsigned n = rhsWords * 2;
1409   unsigned m = (lhsWords * 2) - n;
1410
1411   // Allocate space for the temporary values we need either on the stack, if
1412   // it will fit, or on the heap if it won't.
1413   uint32_t SPACE[128];
1414   uint32_t *U = nullptr;
1415   uint32_t *V = nullptr;
1416   uint32_t *Q = nullptr;
1417   uint32_t *R = nullptr;
1418   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1419     U = &SPACE[0];
1420     V = &SPACE[m+n+1];
1421     Q = &SPACE[(m+n+1) + n];
1422     if (Remainder)
1423       R = &SPACE[(m+n+1) + n + (m+n)];
1424   } else {
1425     U = new uint32_t[m + n + 1];
1426     V = new uint32_t[n];
1427     Q = new uint32_t[m+n];
1428     if (Remainder)
1429       R = new uint32_t[n];
1430   }
1431
1432   // Initialize the dividend
1433   memset(U, 0, (m+n+1)*sizeof(uint32_t));
1434   for (unsigned i = 0; i < lhsWords; ++i) {
1435     uint64_t tmp = LHS[i];
1436     U[i * 2] = Lo_32(tmp);
1437     U[i * 2 + 1] = Hi_32(tmp);
1438   }
1439   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1440
1441   // Initialize the divisor
1442   memset(V, 0, (n)*sizeof(uint32_t));
1443   for (unsigned i = 0; i < rhsWords; ++i) {
1444     uint64_t tmp = RHS[i];
1445     V[i * 2] = Lo_32(tmp);
1446     V[i * 2 + 1] = Hi_32(tmp);
1447   }
1448
1449   // initialize the quotient and remainder
1450   memset(Q, 0, (m+n) * sizeof(uint32_t));
1451   if (Remainder)
1452     memset(R, 0, n * sizeof(uint32_t));
1453
1454   // Now, adjust m and n for the Knuth division. n is the number of words in
1455   // the divisor. m is the number of words by which the dividend exceeds the
1456   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1457   // contain any zero words or the Knuth algorithm fails.
1458   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1459     n--;
1460     m++;
1461   }
1462   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1463     m--;
1464
1465   // If we're left with only a single word for the divisor, Knuth doesn't work
1466   // so we implement the short division algorithm here. This is much simpler
1467   // and faster because we are certain that we can divide a 64-bit quantity
1468   // by a 32-bit quantity at hardware speed and short division is simply a
1469   // series of such operations. This is just like doing short division but we
1470   // are using base 2^32 instead of base 10.
1471   assert(n != 0 && "Divide by zero?");
1472   if (n == 1) {
1473     uint32_t divisor = V[0];
1474     uint32_t remainder = 0;
1475     for (int i = m; i >= 0; i--) {
1476       uint64_t partial_dividend = Make_64(remainder, U[i]);
1477       if (partial_dividend == 0) {
1478         Q[i] = 0;
1479         remainder = 0;
1480       } else if (partial_dividend < divisor) {
1481         Q[i] = 0;
1482         remainder = Lo_32(partial_dividend);
1483       } else if (partial_dividend == divisor) {
1484         Q[i] = 1;
1485         remainder = 0;
1486       } else {
1487         Q[i] = Lo_32(partial_dividend / divisor);
1488         remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1489       }
1490     }
1491     if (R)
1492       R[0] = remainder;
1493   } else {
1494     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1495     // case n > 1.
1496     KnuthDiv(U, V, Q, R, m, n);
1497   }
1498
1499   // If the caller wants the quotient
1500   if (Quotient) {
1501     for (unsigned i = 0; i < lhsWords; ++i)
1502       Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1503   }
1504
1505   // If the caller wants the remainder
1506   if (Remainder) {
1507     for (unsigned i = 0; i < rhsWords; ++i)
1508       Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1509   }
1510
1511   // Clean up the memory we allocated.
1512   if (U != &SPACE[0]) {
1513     delete [] U;
1514     delete [] V;
1515     delete [] Q;
1516     delete [] R;
1517   }
1518 }
1519
1520 APInt APInt::udiv(const APInt &RHS) const {
1521   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1522
1523   // First, deal with the easy case
1524   if (isSingleWord()) {
1525     assert(RHS.U.VAL != 0 && "Divide by zero?");
1526     return APInt(BitWidth, U.VAL / RHS.U.VAL);
1527   }
1528
1529   // Get some facts about the LHS and RHS number of bits and words
1530   unsigned lhsWords = getNumWords(getActiveBits());
1531   unsigned rhsBits  = RHS.getActiveBits();
1532   unsigned rhsWords = getNumWords(rhsBits);
1533   assert(rhsWords && "Divided by zero???");
1534
1535   // Deal with some degenerate cases
1536   if (!lhsWords)
1537     // 0 / X ===> 0
1538     return APInt(BitWidth, 0);
1539   if (rhsBits == 1)
1540     // X / 1 ===> X
1541     return *this;
1542   if (lhsWords < rhsWords || this->ult(RHS))
1543     // X / Y ===> 0, iff X < Y
1544     return APInt(BitWidth, 0);
1545   if (*this == RHS)
1546     // X / X ===> 1
1547     return APInt(BitWidth, 1);
1548   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1549     // All high words are zero, just use native divide
1550     return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1551
1552   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1553   APInt Quotient(BitWidth, 0); // to hold result.
1554   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1555   return Quotient;
1556 }
1557
1558 APInt APInt::udiv(uint64_t RHS) const {
1559   assert(RHS != 0 && "Divide by zero?");
1560
1561   // First, deal with the easy case
1562   if (isSingleWord())
1563     return APInt(BitWidth, U.VAL / RHS);
1564
1565   // Get some facts about the LHS words.
1566   unsigned lhsWords = getNumWords(getActiveBits());
1567
1568   // Deal with some degenerate cases
1569   if (!lhsWords)
1570     // 0 / X ===> 0
1571     return APInt(BitWidth, 0);
1572   if (RHS == 1)
1573     // X / 1 ===> X
1574     return *this;
1575   if (this->ult(RHS))
1576     // X / Y ===> 0, iff X < Y
1577     return APInt(BitWidth, 0);
1578   if (*this == RHS)
1579     // X / X ===> 1
1580     return APInt(BitWidth, 1);
1581   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1582     // All high words are zero, just use native divide
1583     return APInt(BitWidth, this->U.pVal[0] / RHS);
1584
1585   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1586   APInt Quotient(BitWidth, 0); // to hold result.
1587   divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1588   return Quotient;
1589 }
1590
1591 APInt APInt::sdiv(const APInt &RHS) const {
1592   if (isNegative()) {
1593     if (RHS.isNegative())
1594       return (-(*this)).udiv(-RHS);
1595     return -((-(*this)).udiv(RHS));
1596   }
1597   if (RHS.isNegative())
1598     return -(this->udiv(-RHS));
1599   return this->udiv(RHS);
1600 }
1601
1602 APInt APInt::sdiv(int64_t RHS) const {
1603   if (isNegative()) {
1604     if (RHS < 0)
1605       return (-(*this)).udiv(-RHS);
1606     return -((-(*this)).udiv(RHS));
1607   }
1608   if (RHS < 0)
1609     return -(this->udiv(-RHS));
1610   return this->udiv(RHS);
1611 }
1612
1613 APInt APInt::urem(const APInt &RHS) const {
1614   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1615   if (isSingleWord()) {
1616     assert(RHS.U.VAL != 0 && "Remainder by zero?");
1617     return APInt(BitWidth, U.VAL % RHS.U.VAL);
1618   }
1619
1620   // Get some facts about the LHS
1621   unsigned lhsWords = getNumWords(getActiveBits());
1622
1623   // Get some facts about the RHS
1624   unsigned rhsBits = RHS.getActiveBits();
1625   unsigned rhsWords = getNumWords(rhsBits);
1626   assert(rhsWords && "Performing remainder operation by zero ???");
1627
1628   // Check the degenerate cases
1629   if (lhsWords == 0)
1630     // 0 % Y ===> 0
1631     return APInt(BitWidth, 0);
1632   if (rhsBits == 1)
1633     // X % 1 ===> 0
1634     return APInt(BitWidth, 0);
1635   if (lhsWords < rhsWords || this->ult(RHS))
1636     // X % Y ===> X, iff X < Y
1637     return *this;
1638   if (*this == RHS)
1639     // X % X == 0;
1640     return APInt(BitWidth, 0);
1641   if (lhsWords == 1)
1642     // All high words are zero, just use native remainder
1643     return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1644
1645   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1646   APInt Remainder(BitWidth, 0);
1647   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1648   return Remainder;
1649 }
1650
1651 uint64_t APInt::urem(uint64_t RHS) const {
1652   assert(RHS != 0 && "Remainder by zero?");
1653
1654   if (isSingleWord())
1655     return U.VAL % RHS;
1656
1657   // Get some facts about the LHS
1658   unsigned lhsWords = getNumWords(getActiveBits());
1659
1660   // Check the degenerate cases
1661   if (lhsWords == 0)
1662     // 0 % Y ===> 0
1663     return 0;
1664   if (RHS == 1)
1665     // X % 1 ===> 0
1666     return 0;
1667   if (this->ult(RHS))
1668     // X % Y ===> X, iff X < Y
1669     return getZExtValue();
1670   if (*this == RHS)
1671     // X % X == 0;
1672     return 0;
1673   if (lhsWords == 1)
1674     // All high words are zero, just use native remainder
1675     return U.pVal[0] % RHS;
1676
1677   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1678   uint64_t Remainder;
1679   divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1680   return Remainder;
1681 }
1682
1683 APInt APInt::srem(const APInt &RHS) const {
1684   if (isNegative()) {
1685     if (RHS.isNegative())
1686       return -((-(*this)).urem(-RHS));
1687     return -((-(*this)).urem(RHS));
1688   }
1689   if (RHS.isNegative())
1690     return this->urem(-RHS);
1691   return this->urem(RHS);
1692 }
1693
1694 int64_t APInt::srem(int64_t RHS) const {
1695   if (isNegative()) {
1696     if (RHS < 0)
1697       return -((-(*this)).urem(-RHS));
1698     return -((-(*this)).urem(RHS));
1699   }
1700   if (RHS < 0)
1701     return this->urem(-RHS);
1702   return this->urem(RHS);
1703 }
1704
1705 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1706                     APInt &Quotient, APInt &Remainder) {
1707   assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1708   unsigned BitWidth = LHS.BitWidth;
1709
1710   // First, deal with the easy case
1711   if (LHS.isSingleWord()) {
1712     assert(RHS.U.VAL != 0 && "Divide by zero?");
1713     uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1714     uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1715     Quotient = APInt(BitWidth, QuotVal);
1716     Remainder = APInt(BitWidth, RemVal);
1717     return;
1718   }
1719
1720   // Get some size facts about the dividend and divisor
1721   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1722   unsigned rhsBits  = RHS.getActiveBits();
1723   unsigned rhsWords = getNumWords(rhsBits);
1724   assert(rhsWords && "Performing divrem operation by zero ???");
1725
1726   // Check the degenerate cases
1727   if (lhsWords == 0) {
1728     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1729     Remainder = APInt(BitWidth, 0);   // 0 % Y ===> 0
1730     return;
1731   }
1732
1733   if (rhsBits == 1) {
1734     Quotient = LHS;                   // X / 1 ===> X
1735     Remainder = APInt(BitWidth, 0);   // X % 1 ===> 0
1736   }
1737
1738   if (lhsWords < rhsWords || LHS.ult(RHS)) {
1739     Remainder = LHS;                  // X % Y ===> X, iff X < Y
1740     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1741     return;
1742   }
1743
1744   if (LHS == RHS) {
1745     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1746     Remainder = APInt(BitWidth, 0);   // X % X ===> 0;
1747     return;
1748   }
1749
1750   // Make sure there is enough space to hold the results.
1751   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1752   // change the size. This is necessary if Quotient or Remainder is aliased
1753   // with LHS or RHS.
1754   Quotient.reallocate(BitWidth);
1755   Remainder.reallocate(BitWidth);
1756
1757   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1758     // There is only one word to consider so use the native versions.
1759     uint64_t lhsValue = LHS.U.pVal[0];
1760     uint64_t rhsValue = RHS.U.pVal[0];
1761     Quotient = lhsValue / rhsValue;
1762     Remainder = lhsValue % rhsValue;
1763     return;
1764   }
1765
1766   // Okay, lets do it the long way
1767   divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1768          Remainder.U.pVal);
1769   // Clear the rest of the Quotient and Remainder.
1770   std::memset(Quotient.U.pVal + lhsWords, 0,
1771               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1772   std::memset(Remainder.U.pVal + rhsWords, 0,
1773               (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1774 }
1775
1776 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1777                     uint64_t &Remainder) {
1778   assert(RHS != 0 && "Divide by zero?");
1779   unsigned BitWidth = LHS.BitWidth;
1780
1781   // First, deal with the easy case
1782   if (LHS.isSingleWord()) {
1783     uint64_t QuotVal = LHS.U.VAL / RHS;
1784     Remainder = LHS.U.VAL % RHS;
1785     Quotient = APInt(BitWidth, QuotVal);
1786     return;
1787   }
1788
1789   // Get some size facts about the dividend and divisor
1790   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1791
1792   // Check the degenerate cases
1793   if (lhsWords == 0) {
1794     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1795     Remainder = 0;                    // 0 % Y ===> 0
1796     return;
1797   }
1798
1799   if (RHS == 1) {
1800     Quotient = LHS;                   // X / 1 ===> X
1801     Remainder = 0;                    // X % 1 ===> 0
1802     return;
1803   }
1804
1805   if (LHS.ult(RHS)) {
1806     Remainder = LHS.getZExtValue();   // X % Y ===> X, iff X < Y
1807     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1808     return;
1809   }
1810
1811   if (LHS == RHS) {
1812     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1813     Remainder = 0;                    // X % X ===> 0;
1814     return;
1815   }
1816
1817   // Make sure there is enough space to hold the results.
1818   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1819   // change the size. This is necessary if Quotient is aliased with LHS.
1820   Quotient.reallocate(BitWidth);
1821
1822   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1823     // There is only one word to consider so use the native versions.
1824     uint64_t lhsValue = LHS.U.pVal[0];
1825     Quotient = lhsValue / RHS;
1826     Remainder = lhsValue % RHS;
1827     return;
1828   }
1829
1830   // Okay, lets do it the long way
1831   divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1832   // Clear the rest of the Quotient.
1833   std::memset(Quotient.U.pVal + lhsWords, 0,
1834               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1835 }
1836
1837 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1838                     APInt &Quotient, APInt &Remainder) {
1839   if (LHS.isNegative()) {
1840     if (RHS.isNegative())
1841       APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1842     else {
1843       APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1844       Quotient.negate();
1845     }
1846     Remainder.negate();
1847   } else if (RHS.isNegative()) {
1848     APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1849     Quotient.negate();
1850   } else {
1851     APInt::udivrem(LHS, RHS, Quotient, Remainder);
1852   }
1853 }
1854
1855 void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1856                     APInt &Quotient, int64_t &Remainder) {
1857   uint64_t R = Remainder;
1858   if (LHS.isNegative()) {
1859     if (RHS < 0)
1860       APInt::udivrem(-LHS, -RHS, Quotient, R);
1861     else {
1862       APInt::udivrem(-LHS, RHS, Quotient, R);
1863       Quotient.negate();
1864     }
1865     R = -R;
1866   } else if (RHS < 0) {
1867     APInt::udivrem(LHS, -RHS, Quotient, R);
1868     Quotient.negate();
1869   } else {
1870     APInt::udivrem(LHS, RHS, Quotient, R);
1871   }
1872   Remainder = R;
1873 }
1874
1875 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1876   APInt Res = *this+RHS;
1877   Overflow = isNonNegative() == RHS.isNonNegative() &&
1878              Res.isNonNegative() != isNonNegative();
1879   return Res;
1880 }
1881
1882 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1883   APInt Res = *this+RHS;
1884   Overflow = Res.ult(RHS);
1885   return Res;
1886 }
1887
1888 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1889   APInt Res = *this - RHS;
1890   Overflow = isNonNegative() != RHS.isNonNegative() &&
1891              Res.isNonNegative() != isNonNegative();
1892   return Res;
1893 }
1894
1895 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1896   APInt Res = *this-RHS;
1897   Overflow = Res.ugt(*this);
1898   return Res;
1899 }
1900
1901 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1902   // MININT/-1  -->  overflow.
1903   Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1904   return sdiv(RHS);
1905 }
1906
1907 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1908   APInt Res = *this * RHS;
1909
1910   if (*this != 0 && RHS != 0)
1911     Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1912   else
1913     Overflow = false;
1914   return Res;
1915 }
1916
1917 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1918   APInt Res = *this * RHS;
1919
1920   if (*this != 0 && RHS != 0)
1921     Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS;
1922   else
1923     Overflow = false;
1924   return Res;
1925 }
1926
1927 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
1928   Overflow = ShAmt.uge(getBitWidth());
1929   if (Overflow)
1930     return APInt(BitWidth, 0);
1931
1932   if (isNonNegative()) // Don't allow sign change.
1933     Overflow = ShAmt.uge(countLeadingZeros());
1934   else
1935     Overflow = ShAmt.uge(countLeadingOnes());
1936
1937   return *this << ShAmt;
1938 }
1939
1940 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
1941   Overflow = ShAmt.uge(getBitWidth());
1942   if (Overflow)
1943     return APInt(BitWidth, 0);
1944
1945   Overflow = ShAmt.ugt(countLeadingZeros());
1946
1947   return *this << ShAmt;
1948 }
1949
1950
1951
1952
1953 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
1954   // Check our assumptions here
1955   assert(!str.empty() && "Invalid string length");
1956   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
1957           radix == 36) &&
1958          "Radix should be 2, 8, 10, 16, or 36!");
1959
1960   StringRef::iterator p = str.begin();
1961   size_t slen = str.size();
1962   bool isNeg = *p == '-';
1963   if (*p == '-' || *p == '+') {
1964     p++;
1965     slen--;
1966     assert(slen && "String is only a sign, needs a value.");
1967   }
1968   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
1969   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
1970   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
1971   assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
1972          "Insufficient bit width");
1973
1974   // Allocate memory if needed
1975   if (isSingleWord())
1976     U.VAL = 0;
1977   else
1978     U.pVal = getClearedMemory(getNumWords());
1979
1980   // Figure out if we can shift instead of multiply
1981   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
1982
1983   // Enter digit traversal loop
1984   for (StringRef::iterator e = str.end(); p != e; ++p) {
1985     unsigned digit = getDigit(*p, radix);
1986     assert(digit < radix && "Invalid character in digit string");
1987
1988     // Shift or multiply the value by the radix
1989     if (slen > 1) {
1990       if (shift)
1991         *this <<= shift;
1992       else
1993         *this *= radix;
1994     }
1995
1996     // Add in the digit we just interpreted
1997     *this += digit;
1998   }
1999   // If its negative, put it in two's complement form
2000   if (isNeg)
2001     this->negate();
2002 }
2003
2004 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2005                      bool Signed, bool formatAsCLiteral) const {
2006   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2007           Radix == 36) &&
2008          "Radix should be 2, 8, 10, 16, or 36!");
2009
2010   const char *Prefix = "";
2011   if (formatAsCLiteral) {
2012     switch (Radix) {
2013       case 2:
2014         // Binary literals are a non-standard extension added in gcc 4.3:
2015         // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2016         Prefix = "0b";
2017         break;
2018       case 8:
2019         Prefix = "0";
2020         break;
2021       case 10:
2022         break; // No prefix
2023       case 16:
2024         Prefix = "0x";
2025         break;
2026       default:
2027         llvm_unreachable("Invalid radix!");
2028     }
2029   }
2030
2031   // First, check for a zero value and just short circuit the logic below.
2032   if (*this == 0) {
2033     while (*Prefix) {
2034       Str.push_back(*Prefix);
2035       ++Prefix;
2036     };
2037     Str.push_back('0');
2038     return;
2039   }
2040
2041   static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2042
2043   if (isSingleWord()) {
2044     char Buffer[65];
2045     char *BufPtr = std::end(Buffer);
2046
2047     uint64_t N;
2048     if (!Signed) {
2049       N = getZExtValue();
2050     } else {
2051       int64_t I = getSExtValue();
2052       if (I >= 0) {
2053         N = I;
2054       } else {
2055         Str.push_back('-');
2056         N = -(uint64_t)I;
2057       }
2058     }
2059
2060     while (*Prefix) {
2061       Str.push_back(*Prefix);
2062       ++Prefix;
2063     };
2064
2065     while (N) {
2066       *--BufPtr = Digits[N % Radix];
2067       N /= Radix;
2068     }
2069     Str.append(BufPtr, std::end(Buffer));
2070     return;
2071   }
2072
2073   APInt Tmp(*this);
2074
2075   if (Signed && isNegative()) {
2076     // They want to print the signed version and it is a negative value
2077     // Flip the bits and add one to turn it into the equivalent positive
2078     // value and put a '-' in the result.
2079     Tmp.negate();
2080     Str.push_back('-');
2081   }
2082
2083   while (*Prefix) {
2084     Str.push_back(*Prefix);
2085     ++Prefix;
2086   };
2087
2088   // We insert the digits backward, then reverse them to get the right order.
2089   unsigned StartDig = Str.size();
2090
2091   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2092   // because the number of bits per digit (1, 3 and 4 respectively) divides
2093   // equally.  We just shift until the value is zero.
2094   if (Radix == 2 || Radix == 8 || Radix == 16) {
2095     // Just shift tmp right for each digit width until it becomes zero
2096     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2097     unsigned MaskAmt = Radix - 1;
2098
2099     while (Tmp.getBoolValue()) {
2100       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2101       Str.push_back(Digits[Digit]);
2102       Tmp.lshrInPlace(ShiftAmt);
2103     }
2104   } else {
2105     while (Tmp.getBoolValue()) {
2106       uint64_t Digit;
2107       udivrem(Tmp, Radix, Tmp, Digit);
2108       assert(Digit < Radix && "divide failed");
2109       Str.push_back(Digits[Digit]);
2110     }
2111   }
2112
2113   // Reverse the digits before returning.
2114   std::reverse(Str.begin()+StartDig, Str.end());
2115 }
2116
2117 /// Returns the APInt as a std::string. Note that this is an inefficient method.
2118 /// It is better to pass in a SmallVector/SmallString to the methods above.
2119 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2120   SmallString<40> S;
2121   toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2122   return S.str();
2123 }
2124
2125 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2126 LLVM_DUMP_METHOD void APInt::dump() const {
2127   SmallString<40> S, U;
2128   this->toStringUnsigned(U);
2129   this->toStringSigned(S);
2130   dbgs() << "APInt(" << BitWidth << "b, "
2131          << U << "u " << S << "s)\n";
2132 }
2133 #endif
2134
2135 void APInt::print(raw_ostream &OS, bool isSigned) const {
2136   SmallString<40> S;
2137   this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2138   OS << S;
2139 }
2140
2141 // This implements a variety of operations on a representation of
2142 // arbitrary precision, two's-complement, bignum integer values.
2143
2144 // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2145 // and unrestricting assumption.
2146 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2147               "Part width must be divisible by 2!");
2148
2149 /* Some handy functions local to this file.  */
2150
2151 /* Returns the integer part with the least significant BITS set.
2152    BITS cannot be zero.  */
2153 static inline APInt::WordType lowBitMask(unsigned bits) {
2154   assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2155
2156   return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2157 }
2158
2159 /* Returns the value of the lower half of PART.  */
2160 static inline APInt::WordType lowHalf(APInt::WordType part) {
2161   return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2162 }
2163
2164 /* Returns the value of the upper half of PART.  */
2165 static inline APInt::WordType highHalf(APInt::WordType part) {
2166   return part >> (APInt::APINT_BITS_PER_WORD / 2);
2167 }
2168
2169 /* Returns the bit number of the most significant set bit of a part.
2170    If the input number has no bits set -1U is returned.  */
2171 static unsigned partMSB(APInt::WordType value) {
2172   return findLastSet(value, ZB_Max);
2173 }
2174
2175 /* Returns the bit number of the least significant set bit of a
2176    part.  If the input number has no bits set -1U is returned.  */
2177 static unsigned partLSB(APInt::WordType value) {
2178   return findFirstSet(value, ZB_Max);
2179 }
2180
2181 /* Sets the least significant part of a bignum to the input value, and
2182    zeroes out higher parts.  */
2183 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2184   assert(parts > 0);
2185
2186   dst[0] = part;
2187   for (unsigned i = 1; i < parts; i++)
2188     dst[i] = 0;
2189 }
2190
2191 /* Assign one bignum to another.  */
2192 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2193   for (unsigned i = 0; i < parts; i++)
2194     dst[i] = src[i];
2195 }
2196
2197 /* Returns true if a bignum is zero, false otherwise.  */
2198 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2199   for (unsigned i = 0; i < parts; i++)
2200     if (src[i])
2201       return false;
2202
2203   return true;
2204 }
2205
2206 /* Extract the given bit of a bignum; returns 0 or 1.  */
2207 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2208   return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2209 }
2210
2211 /* Set the given bit of a bignum. */
2212 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2213   parts[whichWord(bit)] |= maskBit(bit);
2214 }
2215
2216 /* Clears the given bit of a bignum. */
2217 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2218   parts[whichWord(bit)] &= ~maskBit(bit);
2219 }
2220
2221 /* Returns the bit number of the least significant set bit of a
2222    number.  If the input number has no bits set -1U is returned.  */
2223 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2224   for (unsigned i = 0; i < n; i++) {
2225     if (parts[i] != 0) {
2226       unsigned lsb = partLSB(parts[i]);
2227
2228       return lsb + i * APINT_BITS_PER_WORD;
2229     }
2230   }
2231
2232   return -1U;
2233 }
2234
2235 /* Returns the bit number of the most significant set bit of a number.
2236    If the input number has no bits set -1U is returned.  */
2237 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2238   do {
2239     --n;
2240
2241     if (parts[n] != 0) {
2242       unsigned msb = partMSB(parts[n]);
2243
2244       return msb + n * APINT_BITS_PER_WORD;
2245     }
2246   } while (n);
2247
2248   return -1U;
2249 }
2250
2251 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2252    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2253    the least significant bit of DST.  All high bits above srcBITS in
2254    DST are zero-filled.  */
2255 void
2256 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2257                  unsigned srcBits, unsigned srcLSB) {
2258   unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2259   assert(dstParts <= dstCount);
2260
2261   unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2262   tcAssign (dst, src + firstSrcPart, dstParts);
2263
2264   unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2265   tcShiftRight (dst, dstParts, shift);
2266
2267   /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2268      in DST.  If this is less that srcBits, append the rest, else
2269      clear the high bits.  */
2270   unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2271   if (n < srcBits) {
2272     WordType mask = lowBitMask (srcBits - n);
2273     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2274                           << n % APINT_BITS_PER_WORD);
2275   } else if (n > srcBits) {
2276     if (srcBits % APINT_BITS_PER_WORD)
2277       dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2278   }
2279
2280   /* Clear high parts.  */
2281   while (dstParts < dstCount)
2282     dst[dstParts++] = 0;
2283 }
2284
2285 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2286 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2287                              WordType c, unsigned parts) {
2288   assert(c <= 1);
2289
2290   for (unsigned i = 0; i < parts; i++) {
2291     WordType l = dst[i];
2292     if (c) {
2293       dst[i] += rhs[i] + 1;
2294       c = (dst[i] <= l);
2295     } else {
2296       dst[i] += rhs[i];
2297       c = (dst[i] < l);
2298     }
2299   }
2300
2301   return c;
2302 }
2303
2304 /// This function adds a single "word" integer, src, to the multiple
2305 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2306 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2307 /// @returns the carry of the addition.
2308 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2309                                  unsigned parts) {
2310   for (unsigned i = 0; i < parts; ++i) {
2311     dst[i] += src;
2312     if (dst[i] >= src)
2313       return 0; // No need to carry so exit early.
2314     src = 1; // Carry one to next digit.
2315   }
2316
2317   return 1;
2318 }
2319
2320 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2321 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2322                                   WordType c, unsigned parts) {
2323   assert(c <= 1);
2324
2325   for (unsigned i = 0; i < parts; i++) {
2326     WordType l = dst[i];
2327     if (c) {
2328       dst[i] -= rhs[i] + 1;
2329       c = (dst[i] >= l);
2330     } else {
2331       dst[i] -= rhs[i];
2332       c = (dst[i] > l);
2333     }
2334   }
2335
2336   return c;
2337 }
2338
2339 /// This function subtracts a single "word" (64-bit word), src, from
2340 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2341 /// no further borrowing is needed or it runs out of "words" in dst.  The result
2342 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2343 /// exhausted. In other words, if src > dst then this function returns 1,
2344 /// otherwise 0.
2345 /// @returns the borrow out of the subtraction
2346 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2347                                       unsigned parts) {
2348   for (unsigned i = 0; i < parts; ++i) {
2349     WordType Dst = dst[i];
2350     dst[i] -= src;
2351     if (src <= Dst)
2352       return 0; // No need to borrow so exit early.
2353     src = 1; // We have to "borrow 1" from next "word"
2354   }
2355
2356   return 1;
2357 }
2358
2359 /* Negate a bignum in-place.  */
2360 void APInt::tcNegate(WordType *dst, unsigned parts) {
2361   tcComplement(dst, parts);
2362   tcIncrement(dst, parts);
2363 }
2364
2365 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2366     DST  = SRC * MULTIPLIER + CARRY   if add is false
2367
2368     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2369     they must start at the same point, i.e. DST == SRC.
2370
2371     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2372     returned.  Otherwise DST is filled with the least significant
2373     DSTPARTS parts of the result, and if all of the omitted higher
2374     parts were zero return zero, otherwise overflow occurred and
2375     return one.  */
2376 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2377                           WordType multiplier, WordType carry,
2378                           unsigned srcParts, unsigned dstParts,
2379                           bool add) {
2380   /* Otherwise our writes of DST kill our later reads of SRC.  */
2381   assert(dst <= src || dst >= src + srcParts);
2382   assert(dstParts <= srcParts + 1);
2383
2384   /* N loops; minimum of dstParts and srcParts.  */
2385   unsigned n = std::min(dstParts, srcParts);
2386
2387   for (unsigned i = 0; i < n; i++) {
2388     WordType low, mid, high, srcPart;
2389
2390       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2391
2392          This cannot overflow, because
2393
2394          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2395
2396          which is less than n^2.  */
2397
2398     srcPart = src[i];
2399
2400     if (multiplier == 0 || srcPart == 0) {
2401       low = carry;
2402       high = 0;
2403     } else {
2404       low = lowHalf(srcPart) * lowHalf(multiplier);
2405       high = highHalf(srcPart) * highHalf(multiplier);
2406
2407       mid = lowHalf(srcPart) * highHalf(multiplier);
2408       high += highHalf(mid);
2409       mid <<= APINT_BITS_PER_WORD / 2;
2410       if (low + mid < low)
2411         high++;
2412       low += mid;
2413
2414       mid = highHalf(srcPart) * lowHalf(multiplier);
2415       high += highHalf(mid);
2416       mid <<= APINT_BITS_PER_WORD / 2;
2417       if (low + mid < low)
2418         high++;
2419       low += mid;
2420
2421       /* Now add carry.  */
2422       if (low + carry < low)
2423         high++;
2424       low += carry;
2425     }
2426
2427     if (add) {
2428       /* And now DST[i], and store the new low part there.  */
2429       if (low + dst[i] < low)
2430         high++;
2431       dst[i] += low;
2432     } else
2433       dst[i] = low;
2434
2435     carry = high;
2436   }
2437
2438   if (srcParts < dstParts) {
2439     /* Full multiplication, there is no overflow.  */
2440     assert(srcParts + 1 == dstParts);
2441     dst[srcParts] = carry;
2442     return 0;
2443   }
2444
2445   /* We overflowed if there is carry.  */
2446   if (carry)
2447     return 1;
2448
2449   /* We would overflow if any significant unwritten parts would be
2450      non-zero.  This is true if any remaining src parts are non-zero
2451      and the multiplier is non-zero.  */
2452   if (multiplier)
2453     for (unsigned i = dstParts; i < srcParts; i++)
2454       if (src[i])
2455         return 1;
2456
2457   /* We fitted in the narrow destination.  */
2458   return 0;
2459 }
2460
2461 /* DST = LHS * RHS, where DST has the same width as the operands and
2462    is filled with the least significant parts of the result.  Returns
2463    one if overflow occurred, otherwise zero.  DST must be disjoint
2464    from both operands.  */
2465 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2466                       const WordType *rhs, unsigned parts) {
2467   assert(dst != lhs && dst != rhs);
2468
2469   int overflow = 0;
2470   tcSet(dst, 0, parts);
2471
2472   for (unsigned i = 0; i < parts; i++)
2473     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2474                                parts - i, true);
2475
2476   return overflow;
2477 }
2478
2479 /// DST = LHS * RHS, where DST has width the sum of the widths of the
2480 /// operands. No overflow occurs. DST must be disjoint from both operands.
2481 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2482                            const WordType *rhs, unsigned lhsParts,
2483                            unsigned rhsParts) {
2484   /* Put the narrower number on the LHS for less loops below.  */
2485   if (lhsParts > rhsParts)
2486     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2487
2488   assert(dst != lhs && dst != rhs);
2489
2490   tcSet(dst, 0, rhsParts);
2491
2492   for (unsigned i = 0; i < lhsParts; i++)
2493     tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2494 }
2495
2496 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2497    Otherwise set LHS to LHS / RHS with the fractional part discarded,
2498    set REMAINDER to the remainder, return zero.  i.e.
2499
2500    OLD_LHS = RHS * LHS + REMAINDER
2501
2502    SCRATCH is a bignum of the same size as the operands and result for
2503    use by the routine; its contents need not be initialized and are
2504    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2505 */
2506 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2507                     WordType *remainder, WordType *srhs,
2508                     unsigned parts) {
2509   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2510
2511   unsigned shiftCount = tcMSB(rhs, parts) + 1;
2512   if (shiftCount == 0)
2513     return true;
2514
2515   shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2516   unsigned n = shiftCount / APINT_BITS_PER_WORD;
2517   WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2518
2519   tcAssign(srhs, rhs, parts);
2520   tcShiftLeft(srhs, parts, shiftCount);
2521   tcAssign(remainder, lhs, parts);
2522   tcSet(lhs, 0, parts);
2523
2524   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2525      the total.  */
2526   for (;;) {
2527     int compare = tcCompare(remainder, srhs, parts);
2528     if (compare >= 0) {
2529       tcSubtract(remainder, srhs, 0, parts);
2530       lhs[n] |= mask;
2531     }
2532
2533     if (shiftCount == 0)
2534       break;
2535     shiftCount--;
2536     tcShiftRight(srhs, parts, 1);
2537     if ((mask >>= 1) == 0) {
2538       mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2539       n--;
2540     }
2541   }
2542
2543   return false;
2544 }
2545
2546 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2547 /// no restrictions on Count.
2548 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2549   // Don't bother performing a no-op shift.
2550   if (!Count)
2551     return;
2552
2553   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2554   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2555   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2556
2557   // Fastpath for moving by whole words.
2558   if (BitShift == 0) {
2559     std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2560   } else {
2561     while (Words-- > WordShift) {
2562       Dst[Words] = Dst[Words - WordShift] << BitShift;
2563       if (Words > WordShift)
2564         Dst[Words] |=
2565           Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2566     }
2567   }
2568
2569   // Fill in the remainder with 0s.
2570   std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2571 }
2572
2573 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2574 /// are no restrictions on Count.
2575 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2576   // Don't bother performing a no-op shift.
2577   if (!Count)
2578     return;
2579
2580   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2581   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2582   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2583
2584   unsigned WordsToMove = Words - WordShift;
2585   // Fastpath for moving by whole words.
2586   if (BitShift == 0) {
2587     std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2588   } else {
2589     for (unsigned i = 0; i != WordsToMove; ++i) {
2590       Dst[i] = Dst[i + WordShift] >> BitShift;
2591       if (i + 1 != WordsToMove)
2592         Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2593     }
2594   }
2595
2596   // Fill in the remainder with 0s.
2597   std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2598 }
2599
2600 /* Bitwise and of two bignums.  */
2601 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2602   for (unsigned i = 0; i < parts; i++)
2603     dst[i] &= rhs[i];
2604 }
2605
2606 /* Bitwise inclusive or of two bignums.  */
2607 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2608   for (unsigned i = 0; i < parts; i++)
2609     dst[i] |= rhs[i];
2610 }
2611
2612 /* Bitwise exclusive or of two bignums.  */
2613 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2614   for (unsigned i = 0; i < parts; i++)
2615     dst[i] ^= rhs[i];
2616 }
2617
2618 /* Complement a bignum in-place.  */
2619 void APInt::tcComplement(WordType *dst, unsigned parts) {
2620   for (unsigned i = 0; i < parts; i++)
2621     dst[i] = ~dst[i];
2622 }
2623
2624 /* Comparison (unsigned) of two bignums.  */
2625 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2626                      unsigned parts) {
2627   while (parts) {
2628     parts--;
2629     if (lhs[parts] != rhs[parts])
2630       return (lhs[parts] > rhs[parts]) ? 1 : -1;
2631   }
2632
2633   return 0;
2634 }
2635
2636 /* Set the least significant BITS bits of a bignum, clear the
2637    rest.  */
2638 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2639                                       unsigned bits) {
2640   unsigned i = 0;
2641   while (bits > APINT_BITS_PER_WORD) {
2642     dst[i++] = ~(WordType) 0;
2643     bits -= APINT_BITS_PER_WORD;
2644   }
2645
2646   if (bits)
2647     dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2648
2649   while (i < parts)
2650     dst[i++] = 0;
2651 }
2652
2653 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2654                                    APInt::Rounding RM) {
2655   // Currently udivrem always rounds down.
2656   switch (RM) {
2657   case APInt::Rounding::DOWN:
2658   case APInt::Rounding::TOWARD_ZERO:
2659     return A.udiv(B);
2660   case APInt::Rounding::UP: {
2661     APInt Quo, Rem;
2662     APInt::udivrem(A, B, Quo, Rem);
2663     if (Rem == 0)
2664       return Quo;
2665     return Quo + 1;
2666   }
2667   }
2668   llvm_unreachable("Unknown APInt::Rounding enum");
2669 }
2670
2671 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2672                                    APInt::Rounding RM) {
2673   switch (RM) {
2674   case APInt::Rounding::DOWN:
2675   case APInt::Rounding::UP: {
2676     APInt Quo, Rem;
2677     APInt::sdivrem(A, B, Quo, Rem);
2678     if (Rem == 0)
2679       return Quo;
2680     // This algorithm deals with arbitrary rounding mode used by sdivrem.
2681     // We want to check whether the non-integer part of the mathematical value
2682     // is negative or not. If the non-integer part is negative, we need to round
2683     // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2684     // already rounded down.
2685     if (RM == APInt::Rounding::DOWN) {
2686       if (Rem.isNegative() != B.isNegative())
2687         return Quo - 1;
2688       return Quo;
2689     }
2690     if (Rem.isNegative() != B.isNegative())
2691       return Quo;
2692     return Quo + 1;
2693   }
2694   // Currently sdiv rounds twards zero.
2695   case APInt::Rounding::TOWARD_ZERO:
2696     return A.sdiv(B);
2697   }
2698   llvm_unreachable("Unknown APInt::Rounding enum");
2699 }
2700
2701 Optional<APInt>
2702 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2703                                            unsigned RangeWidth) {
2704   unsigned CoeffWidth = A.getBitWidth();
2705   assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2706   assert(RangeWidth <= CoeffWidth &&
2707          "Value range width should be less than coefficient width");
2708   assert(RangeWidth > 1 && "Value range bit width should be > 1");
2709
2710   LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2711                     << "x + " << C << ", rw:" << RangeWidth << '\n');
2712
2713   // Identify 0 as a (non)solution immediately.
2714   if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
2715     LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2716     return APInt(CoeffWidth, 0);
2717   }
2718
2719   // The result of APInt arithmetic has the same bit width as the operands,
2720   // so it can actually lose high bits. A product of two n-bit integers needs
2721   // 2n-1 bits to represent the full value.
2722   // The operation done below (on quadratic coefficients) that can produce
2723   // the largest value is the evaluation of the equation during bisection,
2724   // which needs 3 times the bitwidth of the coefficient, so the total number
2725   // of required bits is 3n.
2726   //
2727   // The purpose of this extension is to simulate the set Z of all integers,
2728   // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2729   // and negative numbers (not so much in a modulo arithmetic). The method
2730   // used to solve the equation is based on the standard formula for real
2731   // numbers, and uses the concepts of "positive" and "negative" with their
2732   // usual meanings.
2733   CoeffWidth *= 3;
2734   A = A.sext(CoeffWidth);
2735   B = B.sext(CoeffWidth);
2736   C = C.sext(CoeffWidth);
2737
2738   // Make A > 0 for simplicity. Negate cannot overflow at this point because
2739   // the bit width has increased.
2740   if (A.isNegative()) {
2741     A.negate();
2742     B.negate();
2743     C.negate();
2744   }
2745
2746   // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2747   // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2748   // and R = 2^BitWidth.
2749   // Since we're trying not only to find exact solutions, but also values
2750   // that "wrap around", such a set will always have a solution, i.e. an x
2751   // that satisfies at least one of the equations, or such that |q(x)|
2752   // exceeds kR, while |q(x-1)| for the same k does not.
2753   //
2754   // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2755   // positive solution n (in the above sense), and also such that the n
2756   // will be the least among all solutions corresponding to k = 0, 1, ...
2757   // (more precisely, the least element in the set
2758   //   { n(k) | k is such that a solution n(k) exists }).
2759   //
2760   // Consider the parabola (over real numbers) that corresponds to the
2761   // quadratic equation. Since A > 0, the arms of the parabola will point
2762   // up. Picking different values of k will shift it up and down by R.
2763   //
2764   // We want to shift the parabola in such a way as to reduce the problem
2765   // of solving q(x) = kR to solving shifted_q(x) = 0.
2766   // (The interesting solutions are the ceilings of the real number
2767   // solutions.)
2768   APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2769   APInt TwoA = 2 * A;
2770   APInt SqrB = B * B;
2771   bool PickLow;
2772
2773   auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2774     assert(A.isStrictlyPositive());
2775     APInt T = V.abs().urem(A);
2776     if (T.isNullValue())
2777       return V;
2778     return V.isNegative() ? V+T : V+(A-T);
2779   };
2780
2781   // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2782   // iff B is positive.
2783   if (B.isNonNegative()) {
2784     // If B >= 0, the vertex it at a negative location (or at 0), so in
2785     // order to have a non-negative solution we need to pick k that makes
2786     // C-kR negative. To satisfy all the requirements for the solution
2787     // that we are looking for, it needs to be closest to 0 of all k.
2788     C = C.srem(R);
2789     if (C.isStrictlyPositive())
2790       C -= R;
2791     // Pick the greater solution.
2792     PickLow = false;
2793   } else {
2794     // If B < 0, the vertex is at a positive location. For any solution
2795     // to exist, the discriminant must be non-negative. This means that
2796     // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2797     // lower bound on values of k: kR >= C - B^2/4A.
2798     APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2799     // Round LowkR up (towards +inf) to the nearest kR.
2800     LowkR = RoundUp(LowkR, R);
2801
2802     // If there exists k meeting the condition above, and such that
2803     // C-kR > 0, there will be two positive real number solutions of
2804     // q(x) = kR. Out of all such values of k, pick the one that makes
2805     // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2806     // In other words, find maximum k such that LowkR <= kR < C.
2807     if (C.sgt(LowkR)) {
2808       // If LowkR < C, then such a k is guaranteed to exist because
2809       // LowkR itself is a multiple of R.
2810       C -= -RoundUp(-C, R);      // C = C - RoundDown(C, R)
2811       // Pick the smaller solution.
2812       PickLow = true;
2813     } else {
2814       // If C-kR < 0 for all potential k's, it means that one solution
2815       // will be negative, while the other will be positive. The positive
2816       // solution will shift towards 0 if the parabola is moved up.
2817       // Pick the kR closest to the lower bound (i.e. make C-kR closest
2818       // to 0, or in other words, out of all parabolas that have solutions,
2819       // pick the one that is the farthest "up").
2820       // Since LowkR is itself a multiple of R, simply take C-LowkR.
2821       C -= LowkR;
2822       // Pick the greater solution.
2823       PickLow = false;
2824     }
2825   }
2826
2827   LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2828                     << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2829
2830   APInt D = SqrB - 4*A*C;
2831   assert(D.isNonNegative() && "Negative discriminant");
2832   APInt SQ = D.sqrt();
2833
2834   APInt Q = SQ * SQ;
2835   bool InexactSQ = Q != D;
2836   // The calculated SQ may actually be greater than the exact (non-integer)
2837   // value. If that's the case, decremement SQ to get a value that is lower.
2838   if (Q.sgt(D))
2839     SQ -= 1;
2840
2841   APInt X;
2842   APInt Rem;
2843
2844   // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
2845   // When using the quadratic formula directly, the calculated low root
2846   // may be greater than the exact one, since we would be subtracting SQ.
2847   // To make sure that the calculated root is not greater than the exact
2848   // one, subtract SQ+1 when calculating the low root (for inexact value
2849   // of SQ).
2850   if (PickLow)
2851     APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
2852   else
2853     APInt::sdivrem(-B + SQ, TwoA, X, Rem);
2854
2855   // The updated coefficients should be such that the (exact) solution is
2856   // positive. Since APInt division rounds towards 0, the calculated one
2857   // can be 0, but cannot be negative.
2858   assert(X.isNonNegative() && "Solution should be non-negative");
2859
2860   if (!InexactSQ && Rem.isNullValue()) {
2861     LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
2862     return X;
2863   }
2864
2865   assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
2866   // The exact value of the square root of D should be between SQ and SQ+1.
2867   // This implies that the solution should be between that corresponding to
2868   // SQ (i.e. X) and that corresponding to SQ+1.
2869   //
2870   // The calculated X cannot be greater than the exact (real) solution.
2871   // Actually it must be strictly less than the exact solution, while
2872   // X+1 will be greater than or equal to it.
2873
2874   APInt VX = (A*X + B)*X + C;
2875   APInt VY = VX + TwoA*X + A + B;
2876   bool SignChange = VX.isNegative() != VY.isNegative() ||
2877                     VX.isNullValue() != VY.isNullValue();
2878   // If the sign did not change between X and X+1, X is not a valid solution.
2879   // This could happen when the actual (exact) roots don't have an integer
2880   // between them, so they would both be contained between X and X+1.
2881   if (!SignChange) {
2882     LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
2883     return None;
2884   }
2885
2886   X += 1;
2887   LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
2888   return X;
2889 }