PR14972: SROA vs. GVN exposed a really bad bug in SROA.

author Chandler Carruth <chandlerc@gmail.com>

Thu, 14 Mar 2013 11:32:24 +0000 (11:32 +0000)

committer Chandler Carruth <chandlerc@gmail.com>

Thu, 14 Mar 2013 11:32:24 +0000 (11:32 +0000)
author Chandler Carruth <chandlerc@gmail.com>
Thu, 14 Mar 2013 11:32:24 +0000 (11:32 +0000)
committer Chandler Carruth <chandlerc@gmail.com>
Thu, 14 Mar 2013 11:32:24 +0000 (11:32 +0000)
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp

index 810a553..cffbc39 100644 (file)
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -163,16 +163,27 @@ public:
    /// The bounds of these uses are determined by intersecting the bounds of the
    /// memory use itself with a particular partition. As a consequence there is
    /// intentionally overlap between various uses of the same partition.
-  struct PartitionUse : public ByteRange {
+  class PartitionUse : public ByteRange {
+    /// \brief Combined storage for both the Use* and split state.
+    PointerIntPair<Use*, 1, bool> UsePtrAndIsSplit;
+
+  public:
+    PartitionUse() : ByteRange(), UsePtrAndIsSplit() {}
+    PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U,
+                 bool IsSplit)
+        : ByteRange(BeginOffset, EndOffset), UsePtrAndIsSplit(U, IsSplit) {}
+
      /// \brief The use in question. Provides access to both user and used value.
      ///
      /// Note that this may be null if the partition use is *dead*, that is, it
      /// should be ignored.
-    Use *U;
+    Use *getUse() const { return UsePtrAndIsSplit.getPointer(); }
+
+    /// \brief Set the use for this partition use range.
+    void setUse(Use *U) { UsePtrAndIsSplit.setPointer(U); }
  
-    PartitionUse() : ByteRange(), U() {}
-    PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U)
-        : ByteRange(BeginOffset, EndOffset), U(U) {}
+    /// \brief Whether this use is split across multiple partitions.
+    bool isSplit() const { return UsePtrAndIsSplit.getInt(); }
    };
  
    /// \brief Construct a partitioning of a particular alloca.
@@ -456,10 +467,10 @@ private:
  
      // Clamp the end offset to the end of the allocation. Note that this is
      // formulated to handle even the case where "BeginOffset + Size" overflows.
-    // NOTE! This may appear superficially to be something we could ignore
-    // entirely, but that is not so! There may be PHI-node uses where some
-    // instructions are dead but not others. We can't completely ignore the
-    // PHI node, and so have to record at least the information here.
+    // This may appear superficially to be something we could ignore entirely,
+    // but that is not so! There may be widened loads or PHI-node uses where
+    // some instructions are dead but not others. We can't completely ignore
+    // them, and so have to record at least the information here.
      assert(AllocSize >= BeginOffset); // Established above.
      if (Size > AllocSize - BeginOffset) {
        DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset
@@ -474,33 +485,17 @@ private:
    }
  
    void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
-                         bool IsVolatile) {
-    uint64_t Size = DL.getTypeStoreSize(Ty);
-
-    // If this memory access can be shown to *statically* extend outside the
-    // bounds of of the allocation, it's behavior is undefined, so simply
-    // ignore it. Note that this is more strict than the generic clamping
-    // behavior of insertUse. We also try to handle cases which might run the
-    // risk of overflow.
-    // FIXME: We should instead consider the pointer to have escaped if this
-    // function is being instrumented for addressing bugs or race conditions.
-    if (Offset.isNegative() || Size > AllocSize ||
-        Offset.ugt(AllocSize - Size)) {
-      DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte "
-                   << (isa<LoadInst>(I) ? "load" : "store") << " @" << Offset
-                   << " which extends past the end of the " << AllocSize
-                   << " byte alloca:\n"
-                   << "    alloca: " << P.AI << "\n"
-                   << "       use: " << I << "\n");
-      return;
-    }
-
+                         uint64_t Size, bool IsVolatile) {
      // We allow splitting of loads and stores where the type is an integer type
-    // and which cover the entire alloca. Such integer loads and stores
-    // often require decomposition into fine grained loads and stores.
-    bool IsSplittable = false;
-    if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
-      IsSplittable = !IsVolatile && ITy->getBitWidth() == AllocSize*8;
+    // and cover the entire alloca. This prevents us from splitting over
+    // eagerly.
+    // FIXME: In the great blue eventually, we should eagerly split all integer
+    // loads and stores, and then have a separate step that merges adjacent
+    // alloca partitions into a single partition suitable for integer widening.
+    // Or we should skip the merge step and rely on GVN and other passes to
+    // merge adjacent loads and stores that survive mem2reg.
+    bool IsSplittable =
+        Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize;
  
      insertUse(I, Offset, Size, IsSplittable);
    }
@@ -512,7 +507,8 @@ private:
      if (!IsOffsetKnown)
        return PI.setAborted(&LI);
  
-    return handleLoadOrStore(LI.getType(), LI, Offset, LI.isVolatile());
+    uint64_t Size = DL.getTypeStoreSize(LI.getType());
+    return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
    }
  
    void visitStoreInst(StoreInst &SI) {
@@ -522,9 +518,28 @@ private:
      if (!IsOffsetKnown)
        return PI.setAborted(&SI);
  
+    uint64_t Size = DL.getTypeStoreSize(ValOp->getType());
+
+    // If this memory access can be shown to *statically* extend outside the
+    // bounds of of the allocation, it's behavior is undefined, so simply
+    // ignore it. Note that this is more strict than the generic clamping
+    // behavior of insertUse. We also try to handle cases which might run the
+    // risk of overflow.
+    // FIXME: We should instead consider the pointer to have escaped if this
+    // function is being instrumented for addressing bugs or race conditions.
+    if (Offset.isNegative() || Size > AllocSize ||
+        Offset.ugt(AllocSize - Size)) {
+      DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset
+                   << " which extends past the end of the " << AllocSize
+                   << " byte alloca:\n"
+                   << "    alloca: " << P.AI << "\n"
+                   << "       use: " << SI << "\n");
+      return;
+    }
+
      assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
             "All simple FCA stores should have been pre-split");
-    handleLoadOrStore(ValOp->getType(), SI, Offset, SI.isVolatile());
+    handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
    }
  
  
@@ -795,13 +810,14 @@ private:
        EndOffset = AllocSize;
  
      // NB: This only works if we have zero overlapping partitions.
-    iterator B = std::lower_bound(P.begin(), P.end(), BeginOffset);
-    if (B != P.begin() && llvm::prior(B)->EndOffset > BeginOffset)
-      B = llvm::prior(B);
-    for (iterator I = B, E = P.end(); I != E && I->BeginOffset < EndOffset;
-         ++I) {
+    iterator I = std::lower_bound(P.begin(), P.end(), BeginOffset);
+    if (I != P.begin() && llvm::prior(I)->EndOffset > BeginOffset)
+      I = llvm::prior(I);
+    iterator E = P.end();
+    bool IsSplit = llvm::next(I) != E && llvm::next(I)->BeginOffset < EndOffset;
+    for (; I != E && I->BeginOffset < EndOffset; ++I) {
        PartitionUse NewPU(std::max(I->BeginOffset, BeginOffset),
-                         std::min(I->EndOffset, EndOffset), U);
+                         std::min(I->EndOffset, EndOffset), U, IsSplit);
        P.use_push_back(I, NewPU);
        if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser()))
          P.PHIOrSelectOpMap[U]
@@ -809,20 +825,6 @@ private:
      }
    }
  
-  void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset) {
-    uint64_t Size = DL.getTypeStoreSize(Ty);
-
-    // If this memory access can be shown to *statically* extend outside the
-    // bounds of of the allocation, it's behavior is undefined, so simply
-    // ignore it. Note that this is more strict than the generic clamping
-    // behavior of insertUse.
-    if (Offset.isNegative() || Size > AllocSize ||
-        Offset.ugt(AllocSize - Size))
-      return markAsDead(I);
-
-    insertUse(I, Offset, Size);
-  }
-
    void visitBitCastInst(BitCastInst &BC) {
      if (BC.use_empty())
        return markAsDead(BC);
@@ -839,12 +841,23 @@ private:
  
    void visitLoadInst(LoadInst &LI) {
      assert(IsOffsetKnown);
-    handleLoadOrStore(LI.getType(), LI, Offset);
+    uint64_t Size = DL.getTypeStoreSize(LI.getType());
+    insertUse(LI, Offset, Size);
    }
  
    void visitStoreInst(StoreInst &SI) {
      assert(IsOffsetKnown);
-    handleLoadOrStore(SI.getOperand(0)->getType(), SI, Offset);
+    uint64_t Size = DL.getTypeStoreSize(SI.getOperand(0)->getType());
+
+    // If this memory access can be shown to *statically* extend outside the
+    // bounds of of the allocation, it's behavior is undefined, so simply
+    // ignore it. Note that this is more strict than the generic clamping
+    // behavior of insertUse.
+    if (Offset.isNegative() || Size > AllocSize ||
+        Offset.ugt(AllocSize - Size))
+      return markAsDead(SI);
+
+    insertUse(SI, Offset, Size);
    }
  
    void visitMemSetInst(MemSetInst &II) {
@@ -1089,17 +1102,18 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI)
  Type *AllocaPartitioning::getCommonType(iterator I) const {
    Type *Ty = 0;
    for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) {
-    if (!UI->U)
+    Use *U = UI->getUse();
+    if (!U)
        continue; // Skip dead uses.
-    if (isa<IntrinsicInst>(*UI->U->getUser()))
+    if (isa<IntrinsicInst>(*U->getUser()))
        continue;
      if (UI->BeginOffset != I->BeginOffset || UI->EndOffset != I->EndOffset)
        continue;
  
      Type *UserTy = 0;
-    if (LoadInst *LI = dyn_cast<LoadInst>(UI->U->getUser()))
+    if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser()))
        UserTy = LI->getType();
-    else if (StoreInst *SI = dyn_cast<StoreInst>(UI->U->getUser()))
+    else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser()))
        UserTy = SI->getValueOperand()->getType();
      else
        return 0; // Bail if we have weird uses.
@@ -1139,11 +1153,12 @@ void AllocaPartitioning::print(raw_ostream &OS, const_iterator I,
  void AllocaPartitioning::printUsers(raw_ostream &OS, const_iterator I,
                                      StringRef Indent) const {
    for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) {
-    if (!UI->U)
+    if (!UI->getUse())
        continue; // Skip dead uses.
      OS << Indent << "  [" << UI->BeginOffset << "," << UI->EndOffset << ") "
-       << "used by: " << *UI->U->getUser() << "\n";
-    if (MemTransferInst *II = dyn_cast<MemTransferInst>(UI->U->getUser())) {
+       << "used by: " << *UI->getUse()->getUser() << "\n";
+    if (MemTransferInst *II =
+            dyn_cast<MemTransferInst>(UI->getUse()->getUser())) {
        const MemTransferOffsets &MTO = MemTransferInstData.lookup(II);
        bool IsDest;
        if (!MTO.IsSplittable)
@@ -1375,10 +1390,10 @@ public:
      // new uses, and so we can use the initial size bound.
      for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) {
        const AllocaPartitioning::PartitionUse &PU = P.getUse(PI, Idx);
-      if (!PU.U)
+      if (!PU.getUse())
          continue; // Skip dead use.
  
-      visit(cast<Instruction>(PU.U->getUser()));
+      visit(cast<Instruction>(PU.getUse()->getUser()));
      }
    }
  
@@ -1522,8 +1537,8 @@ private:
        // inside the load.
        AllocaPartitioning::use_iterator UI
          = P.findPartitionUseForPHIOrSelectOperand(InUse);
-      assert(isa<PHINode>(*UI->U->getUser()));
-      UI->U = &Load->getOperandUse(Load->getPointerOperandIndex());
+      assert(isa<PHINode>(*UI->getUse()->getUser()));
+      UI->setUse(&Load->getOperandUse(Load->getPointerOperandIndex()));
      }
      DEBUG(dbgs() << "          speculated to: " << *NewPN << "\n");
    }
@@ -1590,7 +1605,7 @@ private:
          PUs[i] = *UI;
          // Clear out the use here so that the offsets into the use list remain
          // stable but this use is ignored when rewriting.
-        UI->U = 0;
+        UI->setUse(0);
        }
      }
  
@@ -1622,8 +1637,8 @@ private:
        for (unsigned i = 0, e = 2; i != e; ++i) {
          if (PIs[i] != P.end()) {
            Use *LoadUse = &Loads[i]->getOperandUse(0);
-          assert(PUs[i].U->get() == LoadUse->get());
-          PUs[i].U = LoadUse;
+          assert(PUs[i].getUse()->get() == LoadUse->get());
+          PUs[i].setUse(LoadUse);
            P.use_push_back(PIs[i], PUs[i]);
          }
        }
@@ -1910,6 +1925,10 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
  static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
    if (OldTy == NewTy)
      return true;
+  if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy))
+    if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy))
+      if (NewITy->getBitWidth() >= OldITy->getBitWidth())
+        return true;
    if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy))
      return false;
    if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
@@ -1938,6 +1957,10 @@ static Value *convertValue(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
           "Value not convertable to type");
    if (V->getType() == Ty)
      return V;
+  if (IntegerType *OldITy = dyn_cast<IntegerType>(V->getType()))
+    if (IntegerType *NewITy = dyn_cast<IntegerType>(Ty))
+      if (NewITy->getBitWidth() > OldITy->getBitWidth())
+        return IRB.CreateZExt(V, NewITy);
    if (V->getType()->isIntegerTy() && Ty->isPointerTy())
      return IRB.CreateIntToPtr(V, Ty);
    if (V->getType()->isPointerTy() && Ty->isIntegerTy())
@@ -1976,7 +1999,8 @@ static bool isVectorPromotionViable(const DataLayout &TD,
    ElementSize /= 8;
  
    for (; I != E; ++I) {
-    if (!I->U)
+    Use *U = I->getUse();
+    if (!U)
        continue; // Skip dead use.
  
      uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset;
@@ -1996,24 +2020,24 @@ static bool isVectorPromotionViable(const DataLayout &TD,
        = (NumElements == 1) ? Ty->getElementType()
                             : VectorType::get(Ty->getElementType(), NumElements);
  
-    if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
+    if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
        if (MI->isVolatile())
          return false;
-      if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {
+      if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) {
          const AllocaPartitioning::MemTransferOffsets &MTO
            = P.getMemTransferOffsets(*MTI);
          if (!MTO.IsSplittable)
            return false;
        }
-    } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) {
+    } else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
        // Disable vector promotion when there are loads or stores of an FCA.
        return false;
-    } else if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
+    } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
        if (LI->isVolatile())
          return false;
        if (!canConvertValue(TD, PartitionTy, LI->getType()))
          return false;
-    } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
        if (SI->isVolatile())
          return false;
        if (!canConvertValue(TD, SI->getValueOperand()->getType(), PartitionTy))
@@ -2062,7 +2086,8 @@ static bool isIntegerWideningViable(const DataLayout &TD,
    // unsplittable entry (which we may make splittable later).
    bool WholeAllocaOp = false;
    for (; I != E; ++I) {
-    if (!I->U)
+    Use *U = I->getUse();
+    if (!U)
        continue; // Skip dead use.
  
      uint64_t RelBegin = I->BeginOffset - AllocBeginOffset;
@@ -2073,7 +2098,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
      if (RelEnd > Size)
        return false;
  
-    if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
+    if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
        if (LI->isVolatile())
          return false;
        if (RelBegin == 0 && RelEnd == Size)
@@ -2088,7 +2113,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
        if (RelBegin != 0 || RelEnd != Size ||
            !canConvertValue(TD, AllocaTy, LI->getType()))
          return false;
-    } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
        Type *ValueTy = SI->getValueOperand()->getType();
        if (SI->isVolatile())
          return false;
@@ -2104,16 +2129,16 @@ static bool isIntegerWideningViable(const DataLayout &TD,
        if (RelBegin != 0 || RelEnd != Size ||
            !canConvertValue(TD, ValueTy, AllocaTy))
          return false;
-    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
+    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
        if (MI->isVolatile() || !isa<Constant>(MI->getLength()))
          return false;
-      if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {
+      if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) {
          const AllocaPartitioning::MemTransferOffsets &MTO
            = P.getMemTransferOffsets(*MTI);
          if (!MTO.IsSplittable)
            return false;
        }
-    } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->U->getUser())) {
+    } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
        if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
            II->getIntrinsicID() != Intrinsic::lifetime_end)
          return false;
@@ -2296,6 +2321,7 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
  
    // The offset of the partition user currently being rewritten.
    uint64_t BeginOffset, EndOffset;
+  bool IsSplit;
    Use *OldUse;
    Instruction *OldPtr;
  
@@ -2313,7 +2339,7 @@ public:
        NewAllocaEndOffset(NewEndOffset),
        NewAllocaTy(NewAI.getAllocatedType()),
        VecTy(), ElementTy(), ElementSize(), IntTy(),
-      BeginOffset(), EndOffset() {
+      BeginOffset(), EndOffset(), IsSplit(), OldUse(), OldPtr() {
    }
  
    /// \brief Visit the users of the alloca partition and rewrite them.
@@ -2335,14 +2361,15 @@ public:
      }
      bool CanSROA = true;
      for (; I != E; ++I) {
-      if (!I->U)
+      if (!I->getUse())
          continue; // Skip dead uses.
        BeginOffset = I->BeginOffset;
        EndOffset = I->EndOffset;
-      OldUse = I->U;
-      OldPtr = cast<Instruction>(I->U->get());
+      IsSplit = I->isSplit();
+      OldUse = I->getUse();
+      OldPtr = cast<Instruction>(OldUse->get());
        NamePrefix = (Twine(NewAI.getName()) + "." + Twine(BeginOffset)).str();
-      CanSROA &= visit(cast<Instruction>(I->U->getUser()));
+      CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
      }
      if (VecTy) {
        assert(CanSROA);
@@ -2451,27 +2478,10 @@ private:
      assert(OldOp == OldPtr);
  
      uint64_t Size = EndOffset - BeginOffset;
-    bool IsSplitIntLoad = Size < TD.getTypeStoreSize(LI.getType());
-
-    // If this memory access can be shown to *statically* extend outside the
-    // bounds of the original allocation it's behavior is undefined. Rather
-    // than trying to transform it, just replace it with undef.
-    // FIXME: We should do something more clever for functions being
-    // instrumented by asan.
-    // FIXME: Eventually, once ASan and friends can flush out bugs here, this
-    // should be transformed to a load of null making it unreachable.
-    uint64_t OldAllocSize = TD.getTypeAllocSize(OldAI.getAllocatedType());
-    if (TD.getTypeStoreSize(LI.getType()) > OldAllocSize) {
-      LI.replaceAllUsesWith(UndefValue::get(LI.getType()));
-      Pass.DeadInsts.insert(&LI);
-      deleteIfTriviallyDead(OldOp);
-      DEBUG(dbgs() << "          to: undef!!\n");
-      return true;
-    }
  
      IRBuilder<> IRB(&LI);
-    Type *TargetTy = IsSplitIntLoad ? Type::getIntNTy(LI.getContext(), Size * 8)
-                                    : LI.getType();
+    Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), Size * 8)
+                             : LI.getType();
      bool IsPtrAdjusted = false;
      Value *V;
      if (VecTy) {
@@ -2491,16 +2501,15 @@ private:
      }
      V = convertValue(TD, IRB, V, TargetTy);
  
-    if (IsSplitIntLoad) {
+    if (IsSplit) {
        assert(!LI.isVolatile());
        assert(LI.getType()->isIntegerTy() &&
               "Only integer type loads and stores are split");
+      assert(Size < TD.getTypeStoreSize(LI.getType()) &&
+             "Split load isn't smaller than original load");
        assert(LI.getType()->getIntegerBitWidth() ==
               TD.getTypeStoreSizeInBits(LI.getType()) &&
               "Non-byte-multiple bit width");
-      assert(LI.getType()->getIntegerBitWidth() ==
-             TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) &&
-             "Only alloca-wide loads can be split and recomposed");
        // Move the insertion point just past the load so that we can refer to it.
        IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI)));
        // Create a placeholder value with the same type as LI to use as the
@@ -2587,14 +2596,12 @@ private:
      uint64_t Size = EndOffset - BeginOffset;
      if (Size < TD.getTypeStoreSize(V->getType())) {
        assert(!SI.isVolatile());
+      assert(IsSplit && "A seemingly split store isn't splittable");
        assert(V->getType()->isIntegerTy() &&
               "Only integer type loads and stores are split");
        assert(V->getType()->getIntegerBitWidth() ==
               TD.getTypeStoreSizeInBits(V->getType()) &&
               "Non-byte-multiple bit width");
-      assert(V->getType()->getIntegerBitWidth() ==
-             TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) &&
-             "Only alloca-wide stores can be split and recomposed");
        IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
        V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset,
                           getName(".extract"));
@@ -3380,7 +3387,7 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI,
    for (AllocaPartitioning::use_iterator UI = P.use_begin(PI),
                                          UE = P.use_end(PI);
         UI != UE && !IsLive; ++UI)
-    if (UI->U)
+    if (UI->getUse())
        IsLive = true;
    if (!IsLive)
      return false; // No live uses left of this partition.
diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll

index efc01ac..30dd217 100644 (file)
--- a/test/Transforms/SROA/basictest.ll
+++ b/test/Transforms/SROA/basictest.ll
@@ -500,14 +500,27 @@ entry:
  
  define i64 @test9() {
  ; Ensure we can handle loads off the end of an alloca even when wrapped in
-; weird bit casts and types. The result is undef, but this shouldn't crash
-; anything.
+; weird bit casts and types. This is valid IR due to the alignment and masking
+; off the bits past the end of the alloca.
+;
  ; CHECK: @test9
  ; CHECK-NOT: alloca
-; CHECK: ret i64 undef
+; CHECK:      %[[b2:.*]] = zext i8 26 to i64
+; CHECK-NEXT: %[[s2:.*]] = shl i64 %[[b2]], 16
+; CHECK-NEXT: %[[m2:.*]] = and i64 undef, -16711681
+; CHECK-NEXT: %[[i2:.*]] = or i64 %[[m2]], %[[s2]]
+; CHECK-NEXT: %[[b1:.*]] = zext i8 0 to i64
+; CHECK-NEXT: %[[s1:.*]] = shl i64 %[[b1]], 8
+; CHECK-NEXT: %[[m1:.*]] = and i64 %[[i2]], -65281
+; CHECK-NEXT: %[[i1:.*]] = or i64 %[[m1]], %[[s1]]
+; CHECK-NEXT: %[[b0:.*]] = zext i8 0 to i64
+; CHECK-NEXT: %[[m0:.*]] = and i64 %[[i1]], -256
+; CHECK-NEXT: %[[i0:.*]] = or i64 %[[m0]], %[[b0]]
+; CHECK-NEXT: %[[result:.*]] = and i64 %[[i0]], 16777215
+; CHECK-NEXT: ret i64 %[[result]]
  
  entry:
-  %a = alloca { [3 x i8] }
+  %a = alloca { [3 x i8] }, align 8
    %gep1 = getelementptr inbounds { [3 x i8] }* %a, i32 0, i32 0, i32 0
    store i8 0, i8* %gep1, align 1
    %gep2 = getelementptr inbounds { [3 x i8] }* %a, i32 0, i32 0, i32 1
@@ -516,7 +529,8 @@ entry:
    store i8 26, i8* %gep3, align 1
    %cast = bitcast { [3 x i8] }* %a to { i64 }*
    %elt = getelementptr inbounds { i64 }* %cast, i32 0, i32 0
-  %result = load i64* %elt
+  %load = load i64* %elt
+  %result = and i64 %load, 16777215
    ret i64 %result
  }
  
@@ -617,11 +631,12 @@ define i32 @test13() {
  ; Ensure we don't crash and handle undefined loads that straddle the end of the
  ; allocation.
  ; CHECK: @test13
-; CHECK: %[[ret:.*]] = zext i16 undef to i32
-; CHECK: ret i32 %[[ret]]
+; CHECK:      %[[value:.*]] = zext i8 0 to i16
+; CHECK-NEXT: %[[ret:.*]] = zext i16 %[[value]] to i32
+; CHECK-NEXT: ret i32 %[[ret]]
  
  entry:
-  %a = alloca [3 x i8]
+  %a = alloca [3 x i8], align 2
    %b0ptr = getelementptr [3 x i8]* %a, i64 0, i32 0
    store i8 0, i8* %b0ptr
    %b1ptr = getelementptr [3 x i8]* %a, i64 0, i32 1
@@ -1160,20 +1175,25 @@ define void @PR14548(i1 %x) {
  entry:
    %a = alloca <{ i1 }>, align 8
    %b = alloca <{ i1 }>, align 8
-; Nothing of interest is simplified here.
-; CHECK: alloca
-; CHECK: alloca
+; CHECK:      %[[a:.*]] = alloca i8, align 8
  
    %b.i1 = bitcast <{ i1 }>* %b to i1*
    store i1 %x, i1* %b.i1, align 8
    %b.i8 = bitcast <{ i1 }>* %b to i8*
    %foo = load i8* %b.i8, align 1
+; CHECK-NEXT: {{.*}} = zext i1 %x to i8
+; CHECK-NEXT: %[[ext:.*]] = zext i1 %x to i8
+; CHECK-NEXT: store i8 %[[ext]], i8* %[[a]], align 8
+; CHECK-NEXT: {{.*}} = load i8* %[[a]], align 8
  
    %a.i8 = bitcast <{ i1 }>* %a to i8*
    call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.i8, i8* %b.i8, i32 1, i32 1, i1 false) nounwind
    %bar = load i8* %a.i8, align 1
    %a.i1 = getelementptr inbounds <{ i1 }>* %a, i32 0, i32 0
    %baz = load i1* %a.i1, align 1
+; CHECK-NEXT: %[[a_cast:.*]] = bitcast i8* %[[a]] to i1*
+; CHECK-NEXT: {{.*}} = load i1* %[[a_cast]], align 8
+
    ret void
  }
  
diff --git a/test/Transforms/SROA/phi-and-select.ll b/test/Transforms/SROA/phi-and-select.ll

index 921016a..b993180 100644 (file)
--- a/test/Transforms/SROA/phi-and-select.ll
+++ b/test/Transforms/SROA/phi-and-select.ll
@@ -396,9 +396,10 @@ define i64 @PR14132(i1 %flag) {
  ; Here we form a PHI-node by promoting the pointer alloca first, and then in
  ; order to promote the other two allocas, we speculate the load of the
  ; now-phi-node-pointer. In doing so we end up loading a 64-bit value from an i8
-; alloca, which is completely bogus. However, we were asserting on trying to
-; rewrite it. Now it is replaced with undef. Eventually we may replace it with
-; unrechable and even the CFG will go away here.
+; alloca. While this is a bit dubious, we were asserting on trying to
+; rewrite it. The trick is that the code using the value may carefully take
+; steps to only use the not-undef bits, and so we need to at least loosely
+; support this..
  entry:
    %a = alloca i64
    %b = alloca i8
@@ -414,13 +415,14 @@ entry:
  if.then:
    store i8* %b, i8** %ptr.cast
    br label %if.end
+; CHECK-NOT: store
+; CHECK: %[[ext:.*]] = zext i8 1 to i64
  
  if.end:
    %tmp = load i64** %ptr
    %result = load i64* %tmp
-; CHECK-NOT: store
  ; CHECK-NOT: load
-; CHECK: %[[result:.*]] = phi i64 [ undef, %if.then ], [ 0, %entry ]
+; CHECK: %[[result:.*]] = phi i64 [ %[[ext]], %if.then ], [ 0, %entry ]
  
    ret i64 %result
  ; CHECK-NEXT: ret i64 %[[result]]
author	Chandler Carruth <chandlerc@gmail.com>
	Thu, 14 Mar 2013 11:32:24 +0000 (11:32 +0000)
committer	Chandler Carruth <chandlerc@gmail.com>
	Thu, 14 Mar 2013 11:32:24 +0000 (11:32 +0000)
lib/Transforms/Scalar/SROA.cpp		patch \| blob \| history
test/Transforms/SROA/basictest.ll		patch \| blob \| history
test/Transforms/SROA/phi-and-select.ll		patch \| blob \| history