AllocaInst should store Align instead of MaybeAlign.

author Eli Friedman <efriedma@quicinc.com>

Fri, 15 May 2020 20:23:14 +0000 (13:23 -0700)

committer Eli Friedman <efriedma@quicinc.com>

Sat, 16 May 2020 21:53:16 +0000 (14:53 -0700)
author Eli Friedman <efriedma@quicinc.com>
Fri, 15 May 2020 20:23:14 +0000 (13:23 -0700)
committer Eli Friedman <efriedma@quicinc.com>
Sat, 16 May 2020 21:53:16 +0000 (14:53 -0700)
diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h

index 090674b..522441b 100644 (file)
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -66,21 +66,19 @@ protected:
    AllocaInst *cloneImpl() const;
  
  public:
-  explicit AllocaInst(Type *Ty, unsigned AddrSpace,
-                      Value *ArraySize = nullptr,
-                      const Twine &Name = "",
-                      Instruction *InsertBefore = nullptr);
+  explicit AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize,
+                      const Twine &Name, Instruction *InsertBefore);
    AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize,
               const Twine &Name, BasicBlock *InsertAtEnd);
  
-  AllocaInst(Type *Ty, unsigned AddrSpace,
-             const Twine &Name, Instruction *InsertBefore = nullptr);
+  AllocaInst(Type *Ty, unsigned AddrSpace, const Twine &Name,
+             Instruction *InsertBefore);
    AllocaInst(Type *Ty, unsigned AddrSpace,
               const Twine &Name, BasicBlock *InsertAtEnd);
  
-  AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize, MaybeAlign Align,
+  AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize, Align Align,
               const Twine &Name = "", Instruction *InsertBefore = nullptr);
-  AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize, MaybeAlign Align,
+  AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize, Align Align,
               const Twine &Name, BasicBlock *InsertAtEnd);
  
    /// Return true if there is an allocation size parameter to the allocation
@@ -109,16 +107,12 @@ public:
  
    /// Return the alignment of the memory that is being allocated by the
    /// instruction.
-  MaybeAlign getAlign() const {
-    return decodeMaybeAlign(getSubclassDataFromInstruction() & 31);
+  Align getAlign() const {
+    return *decodeMaybeAlign(getSubclassDataFromInstruction() & 31);
    }
    // FIXME: Remove this one transition to Align is over.
-  unsigned getAlignment() const {
-    if (const auto MA = getAlign())
-      return MA->value();
-    return 0;
-  }
-  void setAlignment(MaybeAlign Align);
+  unsigned getAlignment() const { return getAlign().value(); }
+  void setAlignment(Align Align);
  
    /// Return true if this alloca is in the entry block of the function and is a
    /// constant size. If so, the code generator will fold it into the
diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h

index 41180c5..c191035 100644 (file)
--- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
+++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
@@ -61,7 +61,7 @@ private:
    bool processMemCpy(MemCpyInst *M);
    bool processMemMove(MemMoveInst *M);
    bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc,
-                            uint64_t cpyLen, unsigned cpyAlign, CallInst *C);
+                            uint64_t cpyLen, Align cpyAlign, CallInst *C);
    bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep);
    bool processMemSetMemCpyDependence(MemCpyInst *M, MemSetInst *MDep);
    bool performMemCpyToMemSetOptzn(MemCpyInst *M, MemSetInst *MDep);
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp

index c0db588..9e8fe96 100644 (file)
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -7007,7 +7007,12 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) {
    if (Size && !Size->getType()->isIntegerTy())
      return Error(SizeLoc, "element count must have integer type");
  
-  AllocaInst *AI = new AllocaInst(Ty, AddrSpace, Size, Alignment);
+  SmallPtrSet<Type *, 4> Visited;
+  if (!Alignment && !Ty->isSized(&Visited))
+    return Error(TyLoc, "Cannot allocate unsized type");
+  if (!Alignment)
+    Alignment = M->getDataLayout().getPrefTypeAlign(Ty);
+  AllocaInst *AI = new AllocaInst(Ty, AddrSpace, Size, *Alignment);
    AI->setUsedWithInAlloca(IsInAlloca);
    AI->setSwiftError(IsSwiftError);
    Inst = AI;
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp

index 7b62bab..e95d7b1 100644 (file)
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -4826,7 +4826,13 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
        const DataLayout &DL = TheModule->getDataLayout();
        unsigned AS = DL.getAllocaAddrSpace();
  
-      AllocaInst *AI = new AllocaInst(Ty, AS, Size, Align);
+      SmallPtrSet<Type *, 4> Visited;
+      if (!Align && !Ty->isSized(&Visited))
+        return error("alloca of unsized type");
+      if (!Align)
+        Align = DL.getPrefTypeAlign(Ty);
+
+      AllocaInst *AI = new AllocaInst(Ty, AS, Size, *Align);
        AI->setUsedWithInAlloca(InAlloca);
        AI->setSwiftError(SwiftError);
        I = AI;
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp

index 1c547ab..59cbaf4 100644 (file)
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -1959,7 +1959,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
        AllocaInst *AI;
        if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign &&
            DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
-        AI->setAlignment(MaybeAlign(PrefAlign));
+        AI->setAlignment(Align(PrefAlign));
        // Global variables can only be aligned if they are defined in this
        // object (i.e. they are uniquely initialized in this object), and
        // over-aligning global variables that have an explicit section is
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

index aa71ceb..9edc83e 100644 (file)
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1875,7 +1875,7 @@ bool IRTranslator::translateAlloca(const User &U,
        MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign.value() - 1));
    auto AlignedAlloc = MIRBuilder.buildAnd(IntPtrTy, AllocAdd, AlignCst);
  
-  Align Alignment = max(AI.getAlign(), DL->getPrefTypeAlign(Ty));
+  Align Alignment = std::max(AI.getAlign(), DL->getPrefTypeAlign(Ty));
    if (Alignment <= StackAlign)
      Alignment = Align(1);
    MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Alignment);
diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp

index c482125..4f46f19 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -139,7 +139,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
          // or the preferred alignment of the type if none is specified.
          //
          // (Unspecified alignment on allocas will be going away soon.)
-        Align SpecifiedAlign = AI->getAlign() ? *AI->getAlign() : TyPrefAlign;
+        Align SpecifiedAlign = AI->getAlign();
  
          // If the preferred alignment of the type is higher than the specified
          // alignment of the alloca, promote the alignment, as long as it doesn't
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

index 00f5a1e..bac0f09 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3890,7 +3890,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
    auto &DL = DAG.getDataLayout();
    uint64_t TySize = DL.getTypeAllocSize(Ty);
-  MaybeAlign Alignment = max(DL.getPrefTypeAlign(Ty), I.getAlign());
+  MaybeAlign Alignment = std::max(DL.getPrefTypeAlign(Ty), I.getAlign());
  
    SDValue AllocSize = getValue(I.getArraySize());
  
@@ -9507,8 +9507,7 @@ static void tryToElideArgumentCopy(
                    "object size\n");
      return;
    }
-  Align RequiredAlignment = AI->getAlign().getValueOr(
-      FuncInfo.MF->getDataLayout().getABITypeAlign(AI->getAllocatedType()));
+  Align RequiredAlignment = AI->getAlign();
    if (MFI.getObjectAlign(FixedIndex) < RequiredAlignment) {
      LLVM_DEBUG(dbgs() << "  argument copy elision failed: alignment of alloca "
                           "greater than stack argument alignment ("
diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp

index 35e4717..dbaf622 100644 (file)
--- a/llvm/lib/IR/Core.cpp
+++ b/llvm/lib/IR/Core.cpp
@@ -2021,7 +2021,7 @@ void LLVMSetAlignment(LLVMValueRef V, unsigned Bytes) {
    if (GlobalObject *GV = dyn_cast<GlobalObject>(P))
      GV->setAlignment(MaybeAlign(Bytes));
    else if (AllocaInst *AI = dyn_cast<AllocaInst>(P))
-    AI->setAlignment(MaybeAlign(Bytes));
+    AI->setAlignment(Align(Bytes));
    else if (LoadInst *LI = dyn_cast<LoadInst>(P))
      LI->setAlignment(Align(Bytes));
    else if (StoreInst *SI = dyn_cast<StoreInst>(P))
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp

index 1d20949..7ddf25b 100644 (file)
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -1246,6 +1246,15 @@ static Value *getAISize(LLVMContext &Context, Value *Amt) {
    return Amt;
  }
  
+Align computeAllocaDefaultAlign(Type *Ty, BasicBlock *BB) {
+  const DataLayout &DL = BB->getModule()->getDataLayout();
+  return DL.getPrefTypeAlign(Ty);
+}
+
+Align computeAllocaDefaultAlign(Type *Ty, Instruction *I) {
+  return computeAllocaDefaultAlign(Ty, I->getParent());
+}
+
  AllocaInst::AllocaInst(Type *Ty, unsigned AddrSpace, const Twine &Name,
                         Instruction *InsertBefore)
    : AllocaInst(Ty, AddrSpace, /*ArraySize=*/nullptr, Name, InsertBefore) {}
@@ -1256,27 +1265,29 @@ AllocaInst::AllocaInst(Type *Ty, unsigned AddrSpace, const Twine &Name,
  
  AllocaInst::AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize,
                         const Twine &Name, Instruction *InsertBefore)
-    : AllocaInst(Ty, AddrSpace, ArraySize, /*Align=*/None, Name, InsertBefore) {
-}
+    : AllocaInst(Ty, AddrSpace, ArraySize,
+                 computeAllocaDefaultAlign(Ty, InsertBefore), Name,
+                 InsertBefore) {}
  
  AllocaInst::AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize,
                         const Twine &Name, BasicBlock *InsertAtEnd)
-    : AllocaInst(Ty, AddrSpace, ArraySize, /*Align=*/None, Name, InsertAtEnd) {}
+    : AllocaInst(Ty, AddrSpace, ArraySize,
+                 computeAllocaDefaultAlign(Ty, InsertAtEnd), Name,
+                 InsertAtEnd) {}
  
  AllocaInst::AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize,
-                       MaybeAlign Align, const Twine &Name,
+                       Align Align, const Twine &Name,
                         Instruction *InsertBefore)
      : UnaryInstruction(PointerType::get(Ty, AddrSpace), Alloca,
                         getAISize(Ty->getContext(), ArraySize), InsertBefore),
        AllocatedType(Ty) {
-  setAlignment(MaybeAlign(Align));
+  setAlignment(Align);
    assert(!Ty->isVoidTy() && "Cannot allocate void!");
    setName(Name);
  }
  
  AllocaInst::AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize,
-                       MaybeAlign Align, const Twine &Name,
-                       BasicBlock *InsertAtEnd)
+                       Align Align, const Twine &Name, BasicBlock *InsertAtEnd)
      : UnaryInstruction(PointerType::get(Ty, AddrSpace), Alloca,
                         getAISize(Ty->getContext(), ArraySize), InsertAtEnd),
        AllocatedType(Ty) {
@@ -1285,16 +1296,12 @@ AllocaInst::AllocaInst(Type *Ty, unsigned AddrSpace, Value *ArraySize,
    setName(Name);
  }
  
-void AllocaInst::setAlignment(MaybeAlign Align) {
-  assert((!Align || *Align <= MaximumAlignment) &&
+void AllocaInst::setAlignment(Align Align) {
+  assert(Align <= MaximumAlignment &&
           "Alignment is greater than MaximumAlignment!");
    setInstructionSubclassData((getSubclassDataFromInstruction() & ~31) |
                               encode(Align));
-  if (Align)
-    assert(getAlignment() == Align->value() &&
-           "Alignment representation error!");
-  else
-    assert(getAlignment() == 0 && "Alignment representation error!");
+  assert(getAlignment() == Align.value() && "Alignment representation error!");
  }
  
  bool AllocaInst::isArrayAllocation() const {
@@ -4240,7 +4247,7 @@ InsertValueInst *InsertValueInst::cloneImpl() const {
  AllocaInst *AllocaInst::cloneImpl() const {
    AllocaInst *Result =
        new AllocaInst(getAllocatedType(), getType()->getAddressSpace(),
-                     (Value *)getOperand(0), MaybeAlign(getAlignment()));
+                     getOperand(0), getAlign());
    Result->setUsedWithInAlloca(isUsedWithInAlloca());
    Result->setSwiftError(isSwiftError());
    return Result;
diff --git a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp

index 8169c49..42f6bfb 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
+++ b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
@@ -484,7 +484,7 @@ void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) {
    auto *NewAI = new AllocaInst(
        TypeWithPadding, Info.AI->getType()->getAddressSpace(), nullptr, "", Info.AI);
    NewAI->takeName(Info.AI);
-  NewAI->setAlignment(MaybeAlign(Info.AI->getAlignment()));
+  NewAI->setAlignment(Info.AI->getAlign());
    NewAI->setUsedWithInAlloca(Info.AI->isUsedWithInAlloca());
    NewAI->setSwiftError(Info.AI->isSwiftError());
    NewAI->copyMetadata(*Info.AI);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp

index 86e437e..956508e 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -1416,8 +1416,8 @@ AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B,
    B.SetInsertPoint(&*ItNew);
    AllocaInst *Alloc = B.CreateAlloca(RetType, 0,
      std::string(prefix) + UI->getName());
-  Alloc->setAlignment(MaybeAlign(
-      UCallee->getParent()->getDataLayout().getTypeAllocSize(RetType)));
+  Alloc->setAlignment(
+      Align(UCallee->getParent()->getDataLayout().getTypeAllocSize(RetType)));
    return Alloc;
  }
  
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp

index c3c5f6f..e60b5ee 100644 (file)
--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -159,12 +159,14 @@ void NVPTXLowerArgs::handleByValParam(Argument *Arg) {
    assert(PType && "Expecting pointer type in handleByValParam");
  
    Type *StructType = PType->getElementType();
-  unsigned AS = Func->getParent()->getDataLayout().getAllocaAddrSpace();
+  const DataLayout &DL = Func->getParent()->getDataLayout();
+  unsigned AS = DL.getAllocaAddrSpace();
    AllocaInst *AllocA = new AllocaInst(StructType, AS, Arg->getName(), FirstInst);
    // Set the alignment to alignment of the byval parameter. This is because,
    // later load/stores assume that alignment, and we are going to replace
    // the use of the byval parameter with this alloca instruction.
-  AllocA->setAlignment(MaybeAlign(Func->getParamAlignment(Arg->getArgNo())));
+  AllocA->setAlignment(Func->getParamAlign(Arg->getArgNo())
+                           .getValueOr(DL.getPrefTypeAlign(StructType)));
    Arg->replaceAllUsesWith(AllocA);
  
    Value *ArgInParam = new AddrSpaceCastInst(
diff --git a/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/llvm/lib/Transforms/Coroutines/CoroElide.cpp

index e89c890..9d364b3 100644 (file)
--- a/llvm/lib/Transforms/Coroutines/CoroElide.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroElide.cpp
@@ -34,8 +34,8 @@ struct Lowerer : coro::LowererBase {
  
    Lowerer(Module &M) : LowererBase(M) {}
  
-  void elideHeapAllocations(Function *F, uint64_t FrameSize,
-                            MaybeAlign FrameAlign, AAResults &AA);
+  void elideHeapAllocations(Function *F, uint64_t FrameSize, Align FrameAlign,
+                            AAResults &AA);
    bool shouldElide(Function *F, DominatorTree &DT) const;
    void collectPostSplitCoroIds(Function *F);
    bool processCoroId(CoroIdInst *, AAResults &AA, DominatorTree &DT);
@@ -95,7 +95,7 @@ static void removeTailCallAttribute(AllocaInst *Frame, AAResults &AA) {
  
  // Given a resume function @f.resume(%f.frame* %frame), returns the size
  // and expected alignment of %f.frame type.
-static std::pair<uint64_t, MaybeAlign> getFrameLayout(Function *Resume) {
+static std::pair<uint64_t, Align> getFrameLayout(Function *Resume) {
    // Prefer to pull information from the function attributes.
    auto Size = Resume->getParamDereferenceableBytes(0);
    auto Align = Resume->getParamAlign(0);
@@ -109,7 +109,7 @@ static std::pair<uint64_t, MaybeAlign> getFrameLayout(Function *Resume) {
      if (!Align) Align = DL.getABITypeAlign(FrameTy);
    }
  
-  return std::make_pair(Size, Align);
+  return std::make_pair(Size, *Align);
  }
  
  // Finds first non alloca instruction in the entry block of a function.
@@ -123,7 +123,7 @@ static Instruction *getFirstNonAllocaInTheEntryBlock(Function *F) {
  // To elide heap allocations we need to suppress code blocks guarded by
  // llvm.coro.alloc and llvm.coro.free instructions.
  void Lowerer::elideHeapAllocations(Function *F, uint64_t FrameSize,
-                                   MaybeAlign FrameAlign, AAResults &AA) {
+                                   Align FrameAlign, AAResults &AA) {
    LLVMContext &C = F->getContext();
    auto *InsertPt =
        getFirstNonAllocaInTheEntryBlock(CoroIds.front()->getFunction());
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp

index 204a469..a628c48 100644 (file)
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -1266,7 +1266,7 @@ static void lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst*> LocalAllocas,
  
      // Allocate memory.
      auto Alloca = Builder.CreateAlloca(Builder.getInt8Ty(), AI->getSize());
-    Alloca->setAlignment(MaybeAlign(AI->getAlignment()));
+    Alloca->setAlignment(Align(AI->getAlignment()));
  
      for (auto U : AI->users()) {
        // Replace gets with the allocation.
diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp

index 05aa497..3f7656d 100644 (file)
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -385,9 +385,10 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
  
        // Just add all the struct element types.
        Type *AgTy = cast<PointerType>(I->getType())->getElementType();
-      Value *TheAlloca =
-          new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr,
-                         MaybeAlign(I->getParamAlignment()), "", InsertPt);
+      Value *TheAlloca = new AllocaInst(
+          AgTy, DL.getAllocaAddrSpace(), nullptr,
+          I->getParamAlign().getValueOr(DL.getPrefTypeAlign(AgTy)), "",
+          InsertPt);
        StructType *STy = cast<StructType>(AgTy);
        Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(F->getContext()), 0),
                          nullptr};
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp

index e270b82..3af6d08 100644 (file)
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -4724,7 +4724,7 @@ struct AAHeapToStackImpl : public AAHeapToStack {
        LLVM_DEBUG(dbgs() << "H2S: Removing malloc call: " << *MallocCall
                          << "\n");
  
-      MaybeAlign Alignment;
+      Align Alignment;
        Constant *Size;
        if (isCallocLikeFn(MallocCall, TLI)) {
          auto *Num = cast<ConstantInt>(MallocCall->getOperand(0));
@@ -4736,7 +4736,8 @@ struct AAHeapToStackImpl : public AAHeapToStack {
          Size = cast<ConstantInt>(MallocCall->getOperand(1));
          Alignment = MaybeAlign(cast<ConstantInt>(MallocCall->getOperand(0))
                                     ->getValue()
-                                   .getZExtValue());
+                                   .getZExtValue())
+                        .valueOrOne();
        } else {
          Size = cast<ConstantInt>(MallocCall->getOperand(0));
        }
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp

index 35a16cb..770ca2e 100644 (file)
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -229,7 +229,7 @@ static void mergeInlinedArrayAllocas(Function *Caller, InlineFunctionInfo &IFI,
          }
  
          if (Align1 > Align2)
-          AvailableAlloca->setAlignment(MaybeAlign(AI->getAlignment()));
+          AvailableAlloca->setAlignment(AI->getAlign());
        }
  
        AI->eraseFromParent();
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp

index c5b0956..16d94d6 100644 (file)
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -141,7 +141,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
    }
  
    AllocaInst *New = Builder.CreateAlloca(CastElTy, Amt);
-  New->setAlignment(MaybeAlign(AI.getAlignment()));
+  New->setAlignment(AI.getAlign());
    New->takeName(&AI);
    New->setUsedWithInAlloca(AI.isUsedWithInAlloca());
  
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp

index 048867c..8bcffcf 100644 (file)
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -181,7 +181,7 @@ static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
      if (C->getValue().getActiveBits() <= 64) {
        Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
        AllocaInst *New = IC.Builder.CreateAlloca(NewTy, nullptr, AI.getName());
-      New->setAlignment(MaybeAlign(AI.getAlignment()));
+      New->setAlignment(AI.getAlign());
  
        // Scan to the end of the allocation instructions, to skip over a block of
        // allocas if possible...also skip interleaved debug info
@@ -327,11 +327,6 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
      return I;
  
    if (AI.getAllocatedType()->isSized()) {
-    // If the alignment is 0 (unspecified), assign it the preferred alignment.
-    if (AI.getAlignment() == 0)
-      AI.setAlignment(
-          MaybeAlign(DL.getPrefTypeAlignment(AI.getAllocatedType())));
-
      // Move all alloca's of zero byte objects to the entry block and merge them
      // together.  Note that we only do this for alloca's, because malloc should
      // allocate and return a unique pointer, even for a zero byte allocation.
@@ -358,16 +353,10 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
            return &AI;
          }
  
-        // If the alignment of the entry block alloca is 0 (unspecified),
-        // assign it the preferred alignment.
-        if (EntryAI->getAlignment() == 0)
-          EntryAI->setAlignment(
-              MaybeAlign(DL.getPrefTypeAlignment(EntryAI->getAllocatedType())));
          // Replace this zero-sized alloca with the one at the start of the entry
          // block after ensuring that the address will be aligned enough for both
          // types.
-        const MaybeAlign MaxAlign(
-            std::max(EntryAI->getAlignment(), AI.getAlignment()));
+        const Align MaxAlign = std::max(EntryAI->getAlign(), AI.getAlign());
          EntryAI->setAlignment(MaxAlign);
          if (AI.getType() != EntryAI->getType())
            return new BitCastInst(EntryAI, AI.getType());
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp

index 1d679ff..0fbc4d5 100644 (file)
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -2944,7 +2944,7 @@ Value *FunctionStackPoisoner::createAllocaForLayout(
    }
    assert((ClRealignStack & (ClRealignStack - 1)) == 0);
    size_t FrameAlignment = std::max(L.FrameAlignment, (size_t)ClRealignStack);
-  Alloca->setAlignment(MaybeAlign(FrameAlignment));
+  Alloca->setAlignment(Align(FrameAlignment));
    return IRB.CreatePointerCast(Alloca, IntptrTy);
  }
  
@@ -3329,7 +3329,7 @@ void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size,
  void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) {
    IRBuilder<> IRB(AI);
  
-  const unsigned Align = std::max(kAllocaRzSize, AI->getAlignment());
+  const unsigned Alignment = std::max(kAllocaRzSize, AI->getAlignment());
    const uint64_t AllocaRedzoneMask = kAllocaRzSize - 1;
  
    Value *Zero = Constant::getNullValue(IntptrTy);
@@ -3356,21 +3356,21 @@ void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) {
    Value *Cond = IRB.CreateICmpNE(Misalign, AllocaRzSize);
    Value *PartialPadding = IRB.CreateSelect(Cond, Misalign, Zero);
  
-  // AdditionalChunkSize = Align + PartialPadding + kAllocaRzSize
-  // Align is added to locate left redzone, PartialPadding for possible
+  // AdditionalChunkSize = Alignment + PartialPadding + kAllocaRzSize
+  // Alignment is added to locate left redzone, PartialPadding for possible
    // partial redzone and kAllocaRzSize for right redzone respectively.
    Value *AdditionalChunkSize = IRB.CreateAdd(
-      ConstantInt::get(IntptrTy, Align + kAllocaRzSize), PartialPadding);
+      ConstantInt::get(IntptrTy, Alignment + kAllocaRzSize), PartialPadding);
  
    Value *NewSize = IRB.CreateAdd(OldSize, AdditionalChunkSize);
  
-  // Insert new alloca with new NewSize and Align params.
+  // Insert new alloca with new NewSize and Alignment params.
    AllocaInst *NewAlloca = IRB.CreateAlloca(IRB.getInt8Ty(), NewSize);
-  NewAlloca->setAlignment(MaybeAlign(Align));
+  NewAlloca->setAlignment(Align(Alignment));
  
-  // NewAddress = Address + Align
+  // NewAddress = Address + Alignment
    Value *NewAddress = IRB.CreateAdd(IRB.CreatePtrToInt(NewAlloca, IntptrTy),
-                                    ConstantInt::get(IntptrTy, Align));
+                                    ConstantInt::get(IntptrTy, Alignment));
  
    // Insert __asan_alloca_poison call for new created alloca.
    IRB.CreateCall(AsanAllocaPoisonFunc, {NewAddress, OldSize});
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp

index 0054826..42882f8 100644 (file)
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -1159,7 +1159,7 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) {
      uint64_t Size = getAllocaSizeInBytes(*AI);
      uint64_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
      AI->setAlignment(
-        MaybeAlign(std::max(AI->getAlignment(), Mapping.getObjectAlignment())));
+        Align(std::max(AI->getAlignment(), Mapping.getObjectAlignment())));
      if (Size != AlignedSize) {
        Type *AllocatedType = AI->getAllocatedType();
        if (AI->isArrayAllocation()) {
@@ -1172,7 +1172,7 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) {
        auto *NewAI = new AllocaInst(
            TypeWithPadding, AI->getType()->getAddressSpace(), nullptr, "", AI);
        NewAI->takeName(AI);
-      NewAI->setAlignment(MaybeAlign(AI->getAlignment()));
+      NewAI->setAlignment(AI->getAlign());
        NewAI->setUsedWithInAlloca(AI->isUsedWithInAlloca());
        NewAI->setSwiftError(AI->isSwiftError());
        NewAI->copyMetadata(*AI);
diff --git a/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/llvm/lib/Transforms/Scalar/GVNHoist.cpp

index a091bf6..9c4cdf2 100644 (file)
--- a/llvm/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/llvm/lib/Transforms/Scalar/GVNHoist.cpp
@@ -898,9 +898,8 @@ private:
                                                cast<StoreInst>(I)->getAlign()));
        ++NumStoresRemoved;
      } else if (auto *ReplacementAlloca = dyn_cast<AllocaInst>(Repl)) {
-      ReplacementAlloca->setAlignment(
-          MaybeAlign(std::max(ReplacementAlloca->getAlignment(),
-                              cast<AllocaInst>(I)->getAlignment())));
+      ReplacementAlloca->setAlignment(std::max(
+          ReplacementAlloca->getAlign(), cast<AllocaInst>(I)->getAlign()));
      } else if (isa<CallInst>(Repl)) {
        ++NumCallsRemoved;
      }
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

index a2e13a9..ff02926 100644 (file)
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -636,7 +636,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
              LI, SI->getPointerOperand()->stripPointerCasts(),
              LI->getPointerOperand()->stripPointerCasts(),
              DL.getTypeStoreSize(SI->getOperand(0)->getType()),
-            findCommonAlignment(DL, SI, LI).value(), C);
+            findCommonAlignment(DL, SI, LI), C);
          if (changed) {
            MD->removeInstruction(SI);
            SI->eraseFromParent();
@@ -707,7 +707,7 @@ bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
  /// the call write its result directly into the destination of the memcpy.
  bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest,
                                           Value *cpySrc, uint64_t cpyLen,
-                                         unsigned cpyAlign, CallInst *C) {
+                                         Align cpyAlign, CallInst *C) {
    // The general transformation to keep in mind is
    //
    //   call @func(..., src, ...)
@@ -785,9 +785,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest,
    }
  
    // Check that dest points to memory that is at least as aligned as src.
-  unsigned srcAlign = srcAlloca->getAlignment();
-  if (!srcAlign)
-    srcAlign = DL.getABITypeAlignment(srcAlloca->getAllocatedType());
+  Align srcAlign = srcAlloca->getAlign();
    bool isDestSufficientlyAligned = srcAlign <= cpyAlign;
    // If dest is not aligned enough and we can't increase its alignment then
    // bail out.
@@ -882,7 +880,7 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest,
    // If the destination wasn't sufficiently aligned then increase its alignment.
    if (!isDestSufficientlyAligned) {
      assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
-    cast<AllocaInst>(cpyDest)->setAlignment(MaybeAlign(srcAlign));
+    cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
    }
  
    // Drop any cached information about the call, because we may have changed
@@ -1167,10 +1165,10 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {
      if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
        // FIXME: Can we pass in either of dest/src alignment here instead
        // of conservatively taking the minimum?
-      unsigned Align = MinAlign(M->getDestAlignment(), M->getSourceAlignment());
+      Align Alignment = std::min(M->getDestAlign().valueOrOne(),
+                                 M->getSourceAlign().valueOrOne());
        if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
-                               CopySize->getZExtValue(), Align,
-                               C)) {
+                               CopySize->getZExtValue(), Alignment, C)) {
          MD->removeInstruction(M);
          M->eraseFromParent();
          return true;
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp

index 8b95bca..01b7063 100644 (file)
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -4198,7 +4198,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
      const bool IsUnconstrained = Alignment <= DL.getABITypeAlignment(SliceTy);
      NewAI = new AllocaInst(
          SliceTy, AI.getType()->getAddressSpace(), nullptr,
-        IsUnconstrained ? MaybeAlign() : Alignment,
+        IsUnconstrained ? DL.getPrefTypeAlign(SliceTy) : Alignment,
          AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()), &AI);
      // Copy the old AI debug location over to the new one.
      NewAI->setDebugLoc(AI.getDebugLoc());
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp

index 6d6e74c..ae4ef97 100644 (file)
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -1181,7 +1181,7 @@ static Align enforceKnownAlignment(Value *V, Align Alignment, Align PrefAlign,
      // stripPointerCasts recurses through infinite layers of bitcasts,
      // while computeKnownBits is not allowed to traverse more than 6
      // levels.
-    Alignment = max(AI->getAlign(), Alignment);
+    Alignment = std::max(AI->getAlign(), Alignment);
      if (PrefAlign <= Alignment)
        return Alignment;
  
diff --git a/llvm/test/Assembler/alloca-addrspace-elems.ll b/llvm/test/Assembler/alloca-addrspace-elems.ll

index 8c02760..40f7a05 100644 (file)
--- a/llvm/test/Assembler/alloca-addrspace-elems.ll
+++ b/llvm/test/Assembler/alloca-addrspace-elems.ll
@@ -4,11 +4,11 @@ target datalayout = "A5"
  ; CHECK: target datalayout = "A5"
  
  
-; CHECK: %alloca_array_no_align = alloca i32, i32 9, addrspace(5)
+; CHECK: %alloca_array_no_align = alloca i32, i32 9, align 4, addrspace(5)
  ; CHECK-NEXT: %alloca_array_align4 = alloca i32, i32 9, align 4, addrspace(5)
-; CHECK-NEXT: %alloca_array_no_align_metadata = alloca i32, i32 9, addrspace(5), !foo !0
+; CHECK-NEXT: %alloca_array_no_align_metadata = alloca i32, i32 9, align 4, addrspace(5), !foo !0
  ; CHECK-NEXT: %alloca_array_align4_metadata = alloca i32, i32 9, align 4, addrspace(5), !foo !0
-; CHECK-NEXT: %alloca_inalloca_array_no_align = alloca inalloca i32, i32 9, addrspace(5)
+; CHECK-NEXT: %alloca_inalloca_array_no_align = alloca inalloca i32, i32 9, align 4, addrspace(5)
  ; CHECK-NEXT: %alloca_inalloca_array_align4_metadata = alloca inalloca i32, i32 9, align 4, addrspace(5), !foo !0
  
  define void @use_alloca() {
diff --git a/llvm/test/Assembler/alloca-addrspace0.ll b/llvm/test/Assembler/alloca-addrspace0.ll

index 09b7a32..1ad4f14 100644 (file)
--- a/llvm/test/Assembler/alloca-addrspace0.ll
+++ b/llvm/test/Assembler/alloca-addrspace0.ll
@@ -4,11 +4,11 @@ target datalayout = "A0"
  ; CHECK: target datalayout = "A0"
  
  
-; CHECK: %alloca_scalar_no_align = alloca i32
+; CHECK: %alloca_scalar_no_align = alloca i32, align 4
  ; CHECK-NEXT: %alloca_scalar_align4 = alloca i32, align 4
-; CHECK-NEXT: %alloca_scalar_no_align_metadata = alloca i32, !foo !0
+; CHECK-NEXT: %alloca_scalar_no_align_metadata = alloca i32, align 4, !foo !0
  ; CHECK-NEXT: %alloca_scalar_align4_metadata = alloca i32, align 4, !foo !0
-; CHECK-NEXT: %alloca_inalloca_scalar_no_align = alloca inalloca i32
+; CHECK-NEXT: %alloca_inalloca_scalar_no_align = alloca inalloca i32, align 4
  ; CHECK-NEXT: %alloca_inalloca_scalar_align4_metadata = alloca inalloca i32, align 4, !foo !0
  define void @use_alloca() {
    %alloca_scalar_no_align = alloca i32, addrspace(0)
diff --git a/llvm/test/Assembler/block-labels.ll b/llvm/test/Assembler/block-labels.ll

index 9ab55e0..af9f824 100644 (file)
--- a/llvm/test/Assembler/block-labels.ll
+++ b/llvm/test/Assembler/block-labels.ll
@@ -21,7 +21,7 @@ $N:
  }
  
  ; CHECK-LABEL: define i32 @test1(i32 %X) {
-; CHECK-NEXT:   %1 = alloca i32
+; CHECK-NEXT:   %1 = alloca i32, align 4
  ; CHECK-NEXT:   br label %2
  ; CHECK:      2:       ; preds = %0
  ; CHECK-NEXT:   br label %3
diff --git a/llvm/test/Assembler/datalayout-alloca-addrspace-mismatch-0.ll b/llvm/test/Assembler/datalayout-alloca-addrspace-mismatch-0.ll

index 828aa13..3ab5890 100644 (file)
--- a/llvm/test/Assembler/datalayout-alloca-addrspace-mismatch-0.ll
+++ b/llvm/test/Assembler/datalayout-alloca-addrspace-mismatch-0.ll
@@ -3,7 +3,7 @@
  target datalayout = "A1"
  
  ; CHECK: Allocation instruction pointer not in the stack address space!
-; CHECK-NEXT:  %alloca_scalar_no_align = alloca i32, addrspace(2)
+; CHECK-NEXT:  %alloca_scalar_no_align = alloca i32, align 4, addrspace(2)
  
  define void @use_alloca() {
    %alloca_scalar_no_align = alloca i32, addrspace(2)
diff --git a/llvm/test/Assembler/datalayout-alloca-addrspace.ll b/llvm/test/Assembler/datalayout-alloca-addrspace.ll

index 578b7ef..6e95ef1 100644 (file)
--- a/llvm/test/Assembler/datalayout-alloca-addrspace.ll
+++ b/llvm/test/Assembler/datalayout-alloca-addrspace.ll
@@ -3,11 +3,11 @@
  target datalayout = "A1"
  ; CHECK: target datalayout = "A1"
  
-; CHECK: %alloca_scalar_no_align = alloca i32, addrspace(1)
+; CHECK: %alloca_scalar_no_align = alloca i32, align 4, addrspace(1)
  ; CHECK-NEXT: %alloca_scalar_align4 = alloca i32, align 4, addrspace(1)
-; CHECK-NEXT: %alloca_scalar_no_align_metadata = alloca i32, addrspace(1), !foo !0
+; CHECK-NEXT: %alloca_scalar_no_align_metadata = alloca i32, align 4, addrspace(1), !foo !0
  ; CHECK-NEXT: %alloca_scalar_align4_metadata = alloca i32, align 4, addrspace(1), !foo !0
-; CHECK-NEXT: %alloca_inalloca_scalar_no_align = alloca inalloca i32, addrspace(1)
+; CHECK-NEXT: %alloca_inalloca_scalar_no_align = alloca inalloca i32, align 4, addrspace(1)
  ; CHECK-NEXT: %alloca_inalloca_scalar_align4_metadata = alloca inalloca i32, align 4, addrspace(1), !foo !0
  define void @use_alloca() {
    %alloca_scalar_no_align = alloca i32, addrspace(1)
diff --git a/llvm/test/Assembler/drop-debug-info-nonzero-alloca.ll b/llvm/test/Assembler/drop-debug-info-nonzero-alloca.ll

index 3e2aefb..01446df 100644 (file)
--- a/llvm/test/Assembler/drop-debug-info-nonzero-alloca.ll
+++ b/llvm/test/Assembler/drop-debug-info-nonzero-alloca.ll
@@ -6,7 +6,7 @@
  define void @foo() {
  entry:
  ; DIS: target datalayout = "A5"
-; DIS: %tmp = alloca i32, addrspace(5)
+; DIS: %tmp = alloca i32, align 4, addrspace(5)
    %tmp = alloca i32, addrspace(5)
    call void @llvm.dbg.value(
        metadata i8* undef,
diff --git a/llvm/test/CodeGen/AMDGPU/alloca.ll b/llvm/test/CodeGen/AMDGPU/alloca.ll

index 58d8d2f..0e82bb9 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/alloca.ll
+++ b/llvm/test/CodeGen/AMDGPU/alloca.ll
@@ -4,7 +4,7 @@
  ; RUN: opt -data-layout=A5 -S < %s
  ; RUN: llvm-as -data-layout=A5 < %s | opt -S
  
-; CHECK: %tmp = alloca i32, addrspace(5)
+; CHECK: %tmp = alloca i32, align 4, addrspace(5)
  define amdgpu_kernel void @test() {
    %tmp = alloca i32, addrspace(5)
    ret void
diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll b/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll

index 3a70a96..9daaeee 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll
@@ -246,7 +246,7 @@ define amdgpu_kernel void @kern_i24(i24 %arg0) {
  ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
  ; HSA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
  ; HSA-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24
-; HSA-NEXT:    store i24 [[TMP2]], i24 addrspace(1)* undef
+; HSA-NEXT:    store i24 [[TMP2]], i24 addrspace(1)* undef, align 4
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @kern_i24(
@@ -255,7 +255,7 @@ define amdgpu_kernel void @kern_i24(i24 %arg0) {
  ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]] to i32 addrspace(4)*
  ; MESA-NEXT:    [[TMP1:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
  ; MESA-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24
-; MESA-NEXT:    store i24 [[TMP2]], i24 addrspace(1)* undef
+; MESA-NEXT:    store i24 [[TMP2]], i24 addrspace(1)* undef, align 4
  ; MESA-NEXT:    ret void
  ;
    store i24 %arg0, i24 addrspace(1)* undef
@@ -268,7 +268,7 @@ define amdgpu_kernel void @kern_i32(i32 %arg0) {
  ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_KERNARG_SEGMENT]], i64 0
  ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
  ; HSA-NEXT:    [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
-; HSA-NEXT:    store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef
+; HSA-NEXT:    store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef, align 4
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @kern_i32(
@@ -276,7 +276,7 @@ define amdgpu_kernel void @kern_i32(i32 %arg0) {
  ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_I32_KERNARG_SEGMENT]], i64 36
  ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
  ; MESA-NEXT:    [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
-; MESA-NEXT:    store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef
+; MESA-NEXT:    store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef, align 4
  ; MESA-NEXT:    ret void
  ;
    store i32 %arg0, i32 addrspace(1)* undef
@@ -289,7 +289,7 @@ define amdgpu_kernel void @kern_f32(float %arg0) {
  ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F32_KERNARG_SEGMENT]], i64 0
  ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to float addrspace(4)*
  ; HSA-NEXT:    [[ARG0_LOAD:%.*]] = load float, float addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
-; HSA-NEXT:    store float [[ARG0_LOAD]], float addrspace(1)* undef
+; HSA-NEXT:    store float [[ARG0_LOAD]], float addrspace(1)* undef, align 4
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @kern_f32(
@@ -297,7 +297,7 @@ define amdgpu_kernel void @kern_f32(float %arg0) {
  ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_F32_KERNARG_SEGMENT]], i64 36
  ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to float addrspace(4)*
  ; MESA-NEXT:    [[ARG0_LOAD:%.*]] = load float, float addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
-; MESA-NEXT:    store float [[ARG0_LOAD]], float addrspace(1)* undef
+; MESA-NEXT:    store float [[ARG0_LOAD]], float addrspace(1)* undef, align 4
  ; MESA-NEXT:    ret void
  ;
    store float %arg0, float addrspace(1)* undef
@@ -333,7 +333,7 @@ define amdgpu_kernel void @kern_v8i32(<8 x i32> %arg) #0 {
  ; HSA-NEXT:    [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I32_KERNARG_SEGMENT]], i64 0
  ; HSA-NEXT:    [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i32> addrspace(4)*
  ; HSA-NEXT:    [[ARG_LOAD:%.*]] = load <8 x i32>, <8 x i32> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
-; HSA-NEXT:    store <8 x i32> [[ARG_LOAD]], <8 x i32> addrspace(1)* undef
+; HSA-NEXT:    store <8 x i32> [[ARG_LOAD]], <8 x i32> addrspace(1)* undef, align 32
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @kern_v8i32(
@@ -341,7 +341,7 @@ define amdgpu_kernel void @kern_v8i32(<8 x i32> %arg) #0 {
  ; MESA-NEXT:    [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I32_KERNARG_SEGMENT]], i64 36
  ; MESA-NEXT:    [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i32> addrspace(4)*
  ; MESA-NEXT:    [[ARG_LOAD:%.*]] = load <8 x i32>, <8 x i32> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
-; MESA-NEXT:    store <8 x i32> [[ARG_LOAD]], <8 x i32> addrspace(1)* undef
+; MESA-NEXT:    store <8 x i32> [[ARG_LOAD]], <8 x i32> addrspace(1)* undef, align 32
  ; MESA-NEXT:    ret void
  ;
    store <8 x i32> %arg, <8 x i32> addrspace(1)* undef
@@ -354,7 +354,7 @@ define amdgpu_kernel void @kern_v8i64(<8 x i64> %arg) #0 {
  ; HSA-NEXT:    [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I64_KERNARG_SEGMENT]], i64 0
  ; HSA-NEXT:    [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i64> addrspace(4)*
  ; HSA-NEXT:    [[ARG_LOAD:%.*]] = load <8 x i64>, <8 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
-; HSA-NEXT:    store <8 x i64> [[ARG_LOAD]], <8 x i64> addrspace(1)* undef
+; HSA-NEXT:    store <8 x i64> [[ARG_LOAD]], <8 x i64> addrspace(1)* undef, align 64
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @kern_v8i64(
@@ -362,7 +362,7 @@ define amdgpu_kernel void @kern_v8i64(<8 x i64> %arg) #0 {
  ; MESA-NEXT:    [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V8I64_KERNARG_SEGMENT]], i64 36
  ; MESA-NEXT:    [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <8 x i64> addrspace(4)*
  ; MESA-NEXT:    [[ARG_LOAD:%.*]] = load <8 x i64>, <8 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
-; MESA-NEXT:    store <8 x i64> [[ARG_LOAD]], <8 x i64> addrspace(1)* undef
+; MESA-NEXT:    store <8 x i64> [[ARG_LOAD]], <8 x i64> addrspace(1)* undef, align 64
  ; MESA-NEXT:    ret void
  ;
    store <8 x i64> %arg, <8 x i64> addrspace(1)* undef
@@ -375,7 +375,7 @@ define amdgpu_kernel void @kern_v16i64(<16 x i64> %arg) #0 {
  ; HSA-NEXT:    [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V16I64_KERNARG_SEGMENT]], i64 0
  ; HSA-NEXT:    [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <16 x i64> addrspace(4)*
  ; HSA-NEXT:    [[ARG_LOAD:%.*]] = load <16 x i64>, <16 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
-; HSA-NEXT:    store <16 x i64> [[ARG_LOAD]], <16 x i64> addrspace(1)* undef
+; HSA-NEXT:    store <16 x i64> [[ARG_LOAD]], <16 x i64> addrspace(1)* undef, align 128
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @kern_v16i64(
@@ -383,7 +383,7 @@ define amdgpu_kernel void @kern_v16i64(<16 x i64> %arg) #0 {
  ; MESA-NEXT:    [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_V16I64_KERNARG_SEGMENT]], i64 36
  ; MESA-NEXT:    [[ARG_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG_KERNARG_OFFSET]] to <16 x i64> addrspace(4)*
  ; MESA-NEXT:    [[ARG_LOAD:%.*]] = load <16 x i64>, <16 x i64> addrspace(4)* [[ARG_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
-; MESA-NEXT:    store <16 x i64> [[ARG_LOAD]], <16 x i64> addrspace(1)* undef
+; MESA-NEXT:    store <16 x i64> [[ARG_LOAD]], <16 x i64> addrspace(1)* undef, align 128
  ; MESA-NEXT:    ret void
  ;
    store <16 x i64> %arg, <16 x i64> addrspace(1)* undef
@@ -400,7 +400,7 @@ define amdgpu_kernel void @kern_i32_v3i32(i32 %arg0, <3 x i32> %arg1) {
  ; HSA-NEXT:    [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to <4 x i32> addrspace(4)*
  ; HSA-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
  ; HSA-NEXT:    [[ARG1_LOAD:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
-; HSA-NEXT:    store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef
+; HSA-NEXT:    store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef, align 4
  ; HSA-NEXT:    store <3 x i32> [[ARG1_LOAD]], <3 x i32> addrspace(1)* undef, align 4
  ; HSA-NEXT:    ret void
  ;
@@ -413,7 +413,7 @@ define amdgpu_kernel void @kern_i32_v3i32(i32 %arg0, <3 x i32> %arg1) {
  ; MESA-NEXT:    [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to <4 x i32> addrspace(4)*
  ; MESA-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32> addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
  ; MESA-NEXT:    [[ARG1_LOAD:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
-; MESA-NEXT:    store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef
+; MESA-NEXT:    store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef, align 4
  ; MESA-NEXT:    store <3 x i32> [[ARG1_LOAD]], <3 x i32> addrspace(1)* undef, align 4
  ; MESA-NEXT:    ret void
  ;
@@ -431,7 +431,7 @@ define amdgpu_kernel void @kern_struct_a(%struct.a %arg0) {
  ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_A_KERNARG_SEGMENT]], i64 0
  ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_A:%.*]] addrspace(4)*
  ; HSA-NEXT:    [[ARG0_LOAD:%.*]] = load [[STRUCT_A]], [[STRUCT_A]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
-; HSA-NEXT:    store [[STRUCT_A]] %arg0.load, [[STRUCT_A]] addrspace(1)* undef
+; HSA-NEXT:    store [[STRUCT_A]] %arg0.load, [[STRUCT_A]] addrspace(1)* undef, align 4
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @kern_struct_a(
@@ -439,7 +439,7 @@ define amdgpu_kernel void @kern_struct_a(%struct.a %arg0) {
  ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_A_KERNARG_SEGMENT]], i64 36
  ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_A:%.*]] addrspace(4)*
  ; MESA-NEXT:    [[ARG0_LOAD:%.*]] = load [[STRUCT_A]], [[STRUCT_A]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
-; MESA-NEXT:    store [[STRUCT_A]] %arg0.load, [[STRUCT_A]] addrspace(1)* undef
+; MESA-NEXT:    store [[STRUCT_A]] %arg0.load, [[STRUCT_A]] addrspace(1)* undef, align 4
  ; MESA-NEXT:    ret void
  ;
    store %struct.a %arg0, %struct.a addrspace(1)* undef
@@ -452,7 +452,7 @@ define amdgpu_kernel void @kern_struct_b_packed(%struct.b.packed %arg0) #0 {
  ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 0
  ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_B_PACKED:%.*]] addrspace(4)*
  ; HSA-NEXT:    [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED]], [[STRUCT_B_PACKED]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
-; HSA-NEXT:    store [[STRUCT_B_PACKED]] %arg0.load, [[STRUCT_B_PACKED]] addrspace(1)* undef
+; HSA-NEXT:    store [[STRUCT_B_PACKED]] %arg0.load, [[STRUCT_B_PACKED]] addrspace(1)* undef, align 16
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @kern_struct_b_packed(
@@ -460,7 +460,7 @@ define amdgpu_kernel void @kern_struct_b_packed(%struct.b.packed %arg0) #0 {
  ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 36
  ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to [[STRUCT_B_PACKED:%.*]] addrspace(4)*
  ; MESA-NEXT:    [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED]], [[STRUCT_B_PACKED]] addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
-; MESA-NEXT:    store [[STRUCT_B_PACKED]] %arg0.load, [[STRUCT_B_PACKED]] addrspace(1)* undef
+; MESA-NEXT:    store [[STRUCT_B_PACKED]] %arg0.load, [[STRUCT_B_PACKED]] addrspace(1)* undef, align 16
  ; MESA-NEXT:    ret void
  ;
    store %struct.b.packed %arg0, %struct.b.packed addrspace(1)* undef
@@ -473,7 +473,7 @@ define amdgpu_kernel void @kern_implicit_arg_num_bytes(i32 %arg0) #1 {
  ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT]], i64 0
  ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
  ; HSA-NEXT:    [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
-; HSA-NEXT:    store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef
+; HSA-NEXT:    store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef, align 4
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @kern_implicit_arg_num_bytes(
@@ -481,7 +481,7 @@ define amdgpu_kernel void @kern_implicit_arg_num_bytes(i32 %arg0) #1 {
  ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT]], i64 36
  ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
  ; MESA-NEXT:    [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
-; MESA-NEXT:    store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef
+; MESA-NEXT:    store i32 [[ARG0_LOAD]], i32 addrspace(1)* undef, align 4
  ; MESA-NEXT:    ret void
  ;
    store i32 %arg0, i32 addrspace(1)* undef
@@ -494,7 +494,7 @@ define amdgpu_kernel void @kernel_implicitarg_no_struct_align(<16 x i32>, i32 %a
  ; HSA-NEXT:    [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT]], i64 64
  ; HSA-NEXT:    [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)*
  ; HSA-NEXT:    [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
-; HSA-NEXT:    store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef
+; HSA-NEXT:    store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef, align 4
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @kernel_implicitarg_no_struct_align(
@@ -502,7 +502,7 @@ define amdgpu_kernel void @kernel_implicitarg_no_struct_align(<16 x i32>, i32 %a
  ; MESA-NEXT:    [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT]], i64 100
  ; MESA-NEXT:    [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)*
  ; MESA-NEXT:    [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
-; MESA-NEXT:    store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef
+; MESA-NEXT:    store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef, align 4
  ; MESA-NEXT:    ret void
  ;
    store i32 %arg1, i32 addrspace(1)* undef
@@ -560,8 +560,8 @@ define amdgpu_kernel void @kern_realign_i8_i8(i8 %arg0, i8 %arg1) #0 {
  ; HSA-NEXT:    [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
  ; HSA-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
  ; HSA-NEXT:    [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
-; HSA-NEXT:    store volatile i8 [[TMP2]], i8 addrspace(1)* undef
-; HSA-NEXT:    store volatile i8 [[TMP5]], i8 addrspace(1)* undef
+; HSA-NEXT:    store volatile i8 [[TMP2]], i8 addrspace(1)* undef, align 1
+; HSA-NEXT:    store volatile i8 [[TMP5]], i8 addrspace(1)* undef, align 1
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @kern_realign_i8_i8(
@@ -575,8 +575,8 @@ define amdgpu_kernel void @kern_realign_i8_i8(i8 %arg0, i8 %arg1) #0 {
  ; MESA-NEXT:    [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
  ; MESA-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
  ; MESA-NEXT:    [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
-; MESA-NEXT:    store volatile i8 [[TMP2]], i8 addrspace(1)* undef
-; MESA-NEXT:    store volatile i8 [[TMP5]], i8 addrspace(1)* undef
+; MESA-NEXT:    store volatile i8 [[TMP2]], i8 addrspace(1)* undef, align 1
+; MESA-NEXT:    store volatile i8 [[TMP5]], i8 addrspace(1)* undef, align 1
  ; MESA-NEXT:    ret void
  ;
    store volatile i8 %arg0, i8 addrspace(1)* undef
@@ -601,9 +601,9 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2) #
  ; HSA-NEXT:    [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
  ; HSA-NEXT:    [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
  ; HSA-NEXT:    [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
-; HSA-NEXT:    store volatile i8 [[TMP2]], i8 addrspace(1)* undef
-; HSA-NEXT:    store volatile i8 [[TMP5]], i8 addrspace(1)* undef
-; HSA-NEXT:    store volatile i8 [[TMP8]], i8 addrspace(1)* undef
+; HSA-NEXT:    store volatile i8 [[TMP2]], i8 addrspace(1)* undef, align 1
+; HSA-NEXT:    store volatile i8 [[TMP5]], i8 addrspace(1)* undef, align 1
+; HSA-NEXT:    store volatile i8 [[TMP8]], i8 addrspace(1)* undef, align 1
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @kern_realign_i8_i8_i8(
@@ -622,9 +622,9 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2) #
  ; MESA-NEXT:    [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
  ; MESA-NEXT:    [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
  ; MESA-NEXT:    [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8
-; MESA-NEXT:    store volatile i8 [[TMP2]], i8 addrspace(1)* undef
-; MESA-NEXT:    store volatile i8 [[TMP5]], i8 addrspace(1)* undef
-; MESA-NEXT:    store volatile i8 [[TMP8]], i8 addrspace(1)* undef
+; MESA-NEXT:    store volatile i8 [[TMP2]], i8 addrspace(1)* undef, align 1
+; MESA-NEXT:    store volatile i8 [[TMP5]], i8 addrspace(1)* undef, align 1
+; MESA-NEXT:    store volatile i8 [[TMP8]], i8 addrspace(1)* undef, align 1
  ; MESA-NEXT:    ret void
  ;
    store volatile i8 %arg0, i8 addrspace(1)* undef
@@ -655,10 +655,10 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2
  ; HSA-NEXT:    [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
  ; HSA-NEXT:    [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
  ; HSA-NEXT:    [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8
-; HSA-NEXT:    store volatile i8 [[TMP2]], i8 addrspace(1)* undef
-; HSA-NEXT:    store volatile i8 [[TMP5]], i8 addrspace(1)* undef
-; HSA-NEXT:    store volatile i8 [[TMP8]], i8 addrspace(1)* undef
-; HSA-NEXT:    store volatile i8 [[TMP11]], i8 addrspace(1)* undef
+; HSA-NEXT:    store volatile i8 [[TMP2]], i8 addrspace(1)* undef, align 1
+; HSA-NEXT:    store volatile i8 [[TMP5]], i8 addrspace(1)* undef, align 1
+; HSA-NEXT:    store volatile i8 [[TMP8]], i8 addrspace(1)* undef, align 1
+; HSA-NEXT:    store volatile i8 [[TMP11]], i8 addrspace(1)* undef, align 1
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @kern_realign_i8_i8_i8_i8(
@@ -682,10 +682,10 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2
  ; MESA-NEXT:    [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
  ; MESA-NEXT:    [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
  ; MESA-NEXT:    [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8
-; MESA-NEXT:    store volatile i8 [[TMP2]], i8 addrspace(1)* undef
-; MESA-NEXT:    store volatile i8 [[TMP5]], i8 addrspace(1)* undef
-; MESA-NEXT:    store volatile i8 [[TMP8]], i8 addrspace(1)* undef
-; MESA-NEXT:    store volatile i8 [[TMP11]], i8 addrspace(1)* undef
+; MESA-NEXT:    store volatile i8 [[TMP2]], i8 addrspace(1)* undef, align 1
+; MESA-NEXT:    store volatile i8 [[TMP5]], i8 addrspace(1)* undef, align 1
+; MESA-NEXT:    store volatile i8 [[TMP8]], i8 addrspace(1)* undef, align 1
+; MESA-NEXT:    store volatile i8 [[TMP11]], i8 addrspace(1)* undef, align 1
  ; MESA-NEXT:    ret void
  ;
    store volatile i8 %arg0, i8 addrspace(1)* undef
@@ -707,8 +707,8 @@ define amdgpu_kernel void @kern_realign_i8_v3i8(i8 %arg0, <3 x i8> %arg1) #0 {
  ; HSA-NEXT:    [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
  ; HSA-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i24
  ; HSA-NEXT:    [[ARG1_LOAD:%.*]] = bitcast i24 [[TMP4]] to <3 x i8>
-; HSA-NEXT:    store volatile i8 [[TMP2]], i8 addrspace(1)* undef
-; HSA-NEXT:    store volatile <3 x i8> [[ARG1_LOAD]], <3 x i8> addrspace(1)* undef
+; HSA-NEXT:    store volatile i8 [[TMP2]], i8 addrspace(1)* undef, align 1
+; HSA-NEXT:    store volatile <3 x i8> [[ARG1_LOAD]], <3 x i8> addrspace(1)* undef, align 4
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @kern_realign_i8_v3i8(
@@ -722,8 +722,8 @@ define amdgpu_kernel void @kern_realign_i8_v3i8(i8 %arg0, <3 x i8> %arg1) #0 {
  ; MESA-NEXT:    [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0
  ; MESA-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i24
  ; MESA-NEXT:    [[ARG1_LOAD:%.*]] = bitcast i24 [[TMP4]] to <3 x i8>
-; MESA-NEXT:    store volatile i8 [[TMP2]], i8 addrspace(1)* undef
-; MESA-NEXT:    store volatile <3 x i8> [[ARG1_LOAD]], <3 x i8> addrspace(1)* undef
+; MESA-NEXT:    store volatile i8 [[TMP2]], i8 addrspace(1)* undef, align 1
+; MESA-NEXT:    store volatile <3 x i8> [[ARG1_LOAD]], <3 x i8> addrspace(1)* undef, align 4
  ; MESA-NEXT:    ret void
  ;
    store volatile i8 %arg0, i8 addrspace(1)* undef
@@ -743,8 +743,8 @@ define amdgpu_kernel void @kern_realign_i8_i16(i8 %arg0, i16 %arg1) #0 {
  ; HSA-NEXT:    [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
  ; HSA-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
  ; HSA-NEXT:    [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
-; HSA-NEXT:    store volatile i8 [[TMP2]], i8 addrspace(1)* undef
-; HSA-NEXT:    store volatile i16 [[TMP5]], i16 addrspace(1)* undef
+; HSA-NEXT:    store volatile i8 [[TMP2]], i8 addrspace(1)* undef, align 1
+; HSA-NEXT:    store volatile i16 [[TMP5]], i16 addrspace(1)* undef, align 2
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @kern_realign_i8_i16(
@@ -758,8 +758,8 @@ define amdgpu_kernel void @kern_realign_i8_i16(i8 %arg0, i16 %arg1) #0 {
  ; MESA-NEXT:    [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
  ; MESA-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
  ; MESA-NEXT:    [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
-; MESA-NEXT:    store volatile i8 [[TMP2]], i8 addrspace(1)* undef
-; MESA-NEXT:    store volatile i16 [[TMP5]], i16 addrspace(1)* undef
+; MESA-NEXT:    store volatile i8 [[TMP2]], i8 addrspace(1)* undef, align 1
+; MESA-NEXT:    store volatile i16 [[TMP5]], i16 addrspace(1)* undef, align 2
  ; MESA-NEXT:    ret void
  ;
    store volatile i8 %arg0, i8 addrspace(1)* undef
@@ -779,8 +779,8 @@ define amdgpu_kernel void @kern_realign_i1_i1(i1 %arg0, i1 %arg1) #0 {
  ; HSA-NEXT:    [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
  ; HSA-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
  ; HSA-NEXT:    [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
-; HSA-NEXT:    store volatile i1 [[TMP2]], i1 addrspace(1)* undef
-; HSA-NEXT:    store volatile i1 [[TMP5]], i1 addrspace(1)* undef
+; HSA-NEXT:    store volatile i1 [[TMP2]], i1 addrspace(1)* undef, align 1
+; HSA-NEXT:    store volatile i1 [[TMP5]], i1 addrspace(1)* undef, align 1
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @kern_realign_i1_i1(
@@ -794,8 +794,8 @@ define amdgpu_kernel void @kern_realign_i1_i1(i1 %arg0, i1 %arg1) #0 {
  ; MESA-NEXT:    [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
  ; MESA-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 8
  ; MESA-NEXT:    [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1
-; MESA-NEXT:    store volatile i1 [[TMP2]], i1 addrspace(1)* undef
-; MESA-NEXT:    store volatile i1 [[TMP5]], i1 addrspace(1)* undef
+; MESA-NEXT:    store volatile i1 [[TMP2]], i1 addrspace(1)* undef, align 1
+; MESA-NEXT:    store volatile i1 [[TMP5]], i1 addrspace(1)* undef, align 1
  ; MESA-NEXT:    ret void
  ;
    store volatile i1 %arg0, i1 addrspace(1)* undef
@@ -820,9 +820,9 @@ define amdgpu_kernel void @kern_realign_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2) #
  ; HSA-NEXT:    [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
  ; HSA-NEXT:    [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
  ; HSA-NEXT:    [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1
-; HSA-NEXT:    store volatile i1 [[TMP2]], i1 addrspace(1)* undef
-; HSA-NEXT:    store volatile i1 [[TMP5]], i1 addrspace(1)* undef
-; HSA-NEXT:    store volatile i1 [[TMP8]], i1 addrspace(1)* undef
+; HSA-NEXT:    store volatile i1 [[TMP2]], i1 addrspace(1)* undef, align 1
+; HSA-NEXT:    store volatile i1 [[TMP5]], i1 addrspace(1)* undef, align 1
+; HSA-NEXT:    store volatile i1 [[TMP8]], i1 addrspace(1)* undef, align 1
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @kern_realign_i1_i1_i1(
@@ -841,9 +841,9 @@ define amdgpu_kernel void @kern_realign_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2) #
  ; MESA-NEXT:    [[TMP6:%.*]] = load i32, i32 addrspace(4)* [[ARG2_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
  ; MESA-NEXT:    [[TMP7:%.*]] = lshr i32 [[TMP6]], 16
  ; MESA-NEXT:    [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1
-; MESA-NEXT:    store volatile i1 [[TMP2]], i1 addrspace(1)* undef
-; MESA-NEXT:    store volatile i1 [[TMP5]], i1 addrspace(1)* undef
-; MESA-NEXT:    store volatile i1 [[TMP8]], i1 addrspace(1)* undef
+; MESA-NEXT:    store volatile i1 [[TMP2]], i1 addrspace(1)* undef, align 1
+; MESA-NEXT:    store volatile i1 [[TMP5]], i1 addrspace(1)* undef, align 1
+; MESA-NEXT:    store volatile i1 [[TMP8]], i1 addrspace(1)* undef, align 1
  ; MESA-NEXT:    ret void
  ;
    store volatile i1 %arg0, i1 addrspace(1)* undef
@@ -874,10 +874,10 @@ define amdgpu_kernel void @kern_realign_i1_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2
  ; HSA-NEXT:    [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
  ; HSA-NEXT:    [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
  ; HSA-NEXT:    [[TMP11:%.*]] = trunc i32 [[TMP10]] to i1
-; HSA-NEXT:    store volatile i1 [[TMP2]], i1 addrspace(1)* undef
-; HSA-NEXT:    store volatile i1 [[TMP5]], i1 addrspace(1)* undef
-; HSA-NEXT:    store volatile i1 [[TMP8]], i1 addrspace(1)* undef
-; HSA-NEXT:    store volatile i1 [[TMP11]], i1 addrspace(1)* undef
+; HSA-NEXT:    store volatile i1 [[TMP2]], i1 addrspace(1)* undef, align 1
+; HSA-NEXT:    store volatile i1 [[TMP5]], i1 addrspace(1)* undef, align 1
+; HSA-NEXT:    store volatile i1 [[TMP8]], i1 addrspace(1)* undef, align 1
+; HSA-NEXT:    store volatile i1 [[TMP11]], i1 addrspace(1)* undef, align 1
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @kern_realign_i1_i1_i1_i1(
@@ -901,10 +901,10 @@ define amdgpu_kernel void @kern_realign_i1_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2
  ; MESA-NEXT:    [[TMP9:%.*]] = load i32, i32 addrspace(4)* [[ARG3_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
  ; MESA-NEXT:    [[TMP10:%.*]] = lshr i32 [[TMP9]], 24
  ; MESA-NEXT:    [[TMP11:%.*]] = trunc i32 [[TMP10]] to i1
-; MESA-NEXT:    store volatile i1 [[TMP2]], i1 addrspace(1)* undef
-; MESA-NEXT:    store volatile i1 [[TMP5]], i1 addrspace(1)* undef
-; MESA-NEXT:    store volatile i1 [[TMP8]], i1 addrspace(1)* undef
-; MESA-NEXT:    store volatile i1 [[TMP11]], i1 addrspace(1)* undef
+; MESA-NEXT:    store volatile i1 [[TMP2]], i1 addrspace(1)* undef, align 1
+; MESA-NEXT:    store volatile i1 [[TMP5]], i1 addrspace(1)* undef, align 1
+; MESA-NEXT:    store volatile i1 [[TMP8]], i1 addrspace(1)* undef, align 1
+; MESA-NEXT:    store volatile i1 [[TMP11]], i1 addrspace(1)* undef, align 1
  ; MESA-NEXT:    ret void
  ;
    store volatile i1 %arg0, i1 addrspace(1)* undef
@@ -926,8 +926,8 @@ define amdgpu_kernel void @kern_realign_i1_v3i1(i1 %arg0, <3 x i1> %arg1) #0 {
  ; HSA-NEXT:    [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
  ; HSA-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
  ; HSA-NEXT:    [[ARG1_LOAD:%.*]] = bitcast i3 [[TMP4]] to <3 x i1>
-; HSA-NEXT:    store volatile i1 [[TMP2]], i1 addrspace(1)* undef
-; HSA-NEXT:    store volatile <3 x i1> [[ARG1_LOAD]], <3 x i1> addrspace(1)* undef
+; HSA-NEXT:    store volatile i1 [[TMP2]], i1 addrspace(1)* undef, align 1
+; HSA-NEXT:    store volatile <3 x i1> [[ARG1_LOAD]], <3 x i1> addrspace(1)* undef, align 4
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @kern_realign_i1_v3i1(
@@ -941,8 +941,8 @@ define amdgpu_kernel void @kern_realign_i1_v3i1(i1 %arg0, <3 x i1> %arg1) #0 {
  ; MESA-NEXT:    [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0
  ; MESA-NEXT:    [[TMP4:%.*]] = trunc i32 [[TMP3]] to i3
  ; MESA-NEXT:    [[ARG1_LOAD:%.*]] = bitcast i3 [[TMP4]] to <3 x i1>
-; MESA-NEXT:    store volatile i1 [[TMP2]], i1 addrspace(1)* undef
-; MESA-NEXT:    store volatile <3 x i1> [[ARG1_LOAD]], <3 x i1> addrspace(1)* undef
+; MESA-NEXT:    store volatile i1 [[TMP2]], i1 addrspace(1)* undef, align 1
+; MESA-NEXT:    store volatile <3 x i1> [[ARG1_LOAD]], <3 x i1> addrspace(1)* undef, align 4
  ; MESA-NEXT:    ret void
  ;
    store volatile i1 %arg0, i1 addrspace(1)* undef
@@ -962,8 +962,8 @@ define amdgpu_kernel void @kern_realign_i1_i16(i1 %arg0, i16 %arg1) #0 {
  ; HSA-NEXT:    [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 16, !invariant.load !0
  ; HSA-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
  ; HSA-NEXT:    [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
-; HSA-NEXT:    store volatile i1 [[TMP2]], i1 addrspace(1)* undef
-; HSA-NEXT:    store volatile i16 [[TMP5]], i16 addrspace(1)* undef
+; HSA-NEXT:    store volatile i1 [[TMP2]], i1 addrspace(1)* undef, align 1
+; HSA-NEXT:    store volatile i16 [[TMP5]], i16 addrspace(1)* undef, align 2
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @kern_realign_i1_i16(
@@ -977,8 +977,8 @@ define amdgpu_kernel void @kern_realign_i1_i16(i1 %arg0, i16 %arg1) #0 {
  ; MESA-NEXT:    [[TMP3:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
  ; MESA-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
  ; MESA-NEXT:    [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
-; MESA-NEXT:    store volatile i1 [[TMP2]], i1 addrspace(1)* undef
-; MESA-NEXT:    store volatile i16 [[TMP5]], i16 addrspace(1)* undef
+; MESA-NEXT:    store volatile i1 [[TMP2]], i1 addrspace(1)* undef, align 1
+; MESA-NEXT:    store volatile i16 [[TMP5]], i16 addrspace(1)* undef, align 2
  ; MESA-NEXT:    ret void
  ;
    store volatile i1 %arg0, i1 addrspace(1)* undef
@@ -1023,13 +1023,13 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(i8 %arg0, i8 %ar
  ; HSA-NEXT:    [[TMP18:%.*]] = load i32, i32 addrspace(4)* [[ARG7_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 4, !invariant.load !0
  ; HSA-NEXT:    [[TMP19:%.*]] = lshr i32 [[TMP18]], 24
  ; HSA-NEXT:    [[TMP20:%.*]] = trunc i32 [[TMP19]] to i8
-; HSA-NEXT:    store volatile i8 [[TMP2]], i8 addrspace(1)* undef
-; HSA-NEXT:    store volatile i8 [[TMP5]], i8 addrspace(1)* undef
-; HSA-NEXT:    store volatile i8 [[TMP8]], i8 addrspace(1)* undef
-; HSA-NEXT:    store volatile i8 [[TMP11]], i8 addrspace(1)* undef
-; HSA-NEXT:    store volatile i8 [[TMP14]], i8 addrspace(1)* undef
-; HSA-NEXT:    store volatile i8 [[TMP17]], i8 addrspace(1)* undef
-; HSA-NEXT:    store volatile i8 [[TMP20]], i8 addrspace(1)* undef
+; HSA-NEXT:    store volatile i8 [[TMP2]], i8 addrspace(1)* undef, align 1
+; HSA-NEXT:    store volatile i8 [[TMP5]], i8 addrspace(1)* undef, align 1
+; HSA-NEXT:    store volatile i8 [[TMP8]], i8 addrspace(1)* undef, align 1
+; HSA-NEXT:    store volatile i8 [[TMP11]], i8 addrspace(1)* undef, align 1
+; HSA-NEXT:    store volatile i8 [[TMP14]], i8 addrspace(1)* undef, align 1
+; HSA-NEXT:    store volatile i8 [[TMP17]], i8 addrspace(1)* undef, align 1
+; HSA-NEXT:    store volatile i8 [[TMP20]], i8 addrspace(1)* undef, align 1
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(
@@ -1068,13 +1068,13 @@ define amdgpu_kernel void @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(i8 %arg0, i8 %ar
  ; MESA-NEXT:    [[TMP18:%.*]] = load i32, i32 addrspace(4)* [[ARG7_KERNARG_OFFSET_ALIGN_DOWN_CAST]], align 8, !invariant.load !0
  ; MESA-NEXT:    [[TMP19:%.*]] = lshr i32 [[TMP18]], 24
  ; MESA-NEXT:    [[TMP20:%.*]] = trunc i32 [[TMP19]] to i8
-; MESA-NEXT:    store volatile i8 [[TMP2]], i8 addrspace(1)* undef
-; MESA-NEXT:    store volatile i8 [[TMP5]], i8 addrspace(1)* undef
-; MESA-NEXT:    store volatile i8 [[TMP8]], i8 addrspace(1)* undef
-; MESA-NEXT:    store volatile i8 [[TMP11]], i8 addrspace(1)* undef
-; MESA-NEXT:    store volatile i8 [[TMP14]], i8 addrspace(1)* undef
-; MESA-NEXT:    store volatile i8 [[TMP17]], i8 addrspace(1)* undef
-; MESA-NEXT:    store volatile i8 [[TMP20]], i8 addrspace(1)* undef
+; MESA-NEXT:    store volatile i8 [[TMP2]], i8 addrspace(1)* undef, align 1
+; MESA-NEXT:    store volatile i8 [[TMP5]], i8 addrspace(1)* undef, align 1
+; MESA-NEXT:    store volatile i8 [[TMP8]], i8 addrspace(1)* undef, align 1
+; MESA-NEXT:    store volatile i8 [[TMP11]], i8 addrspace(1)* undef, align 1
+; MESA-NEXT:    store volatile i8 [[TMP14]], i8 addrspace(1)* undef, align 1
+; MESA-NEXT:    store volatile i8 [[TMP17]], i8 addrspace(1)* undef, align 1
+; MESA-NEXT:    store volatile i8 [[TMP20]], i8 addrspace(1)* undef, align 1
  ; MESA-NEXT:    ret void
  ;
    store volatile i8 %arg0, i8 addrspace(1)* undef
@@ -1101,8 +1101,8 @@ define amdgpu_kernel void @kern_realign_f16_f16(half %arg0, half %arg1) #0 {
  ; HSA-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
  ; HSA-NEXT:    [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
  ; HSA-NEXT:    [[ARG1_LOAD:%.*]] = bitcast i16 [[TMP5]] to half
-; HSA-NEXT:    store volatile half [[ARG0_LOAD]], half addrspace(1)* undef
-; HSA-NEXT:    store volatile half [[ARG1_LOAD]], half addrspace(1)* undef
+; HSA-NEXT:    store volatile half [[ARG0_LOAD]], half addrspace(1)* undef, align 2
+; HSA-NEXT:    store volatile half [[ARG1_LOAD]], half addrspace(1)* undef, align 2
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @kern_realign_f16_f16(
@@ -1118,8 +1118,8 @@ define amdgpu_kernel void @kern_realign_f16_f16(half %arg0, half %arg1) #0 {
  ; MESA-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 16
  ; MESA-NEXT:    [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
  ; MESA-NEXT:    [[ARG1_LOAD:%.*]] = bitcast i16 [[TMP5]] to half
-; MESA-NEXT:    store volatile half [[ARG0_LOAD]], half addrspace(1)* undef
-; MESA-NEXT:    store volatile half [[ARG1_LOAD]], half addrspace(1)* undef
+; MESA-NEXT:    store volatile half [[ARG0_LOAD]], half addrspace(1)* undef, align 2
+; MESA-NEXT:    store volatile half [[ARG1_LOAD]], half addrspace(1)* undef, align 2
  ; MESA-NEXT:    ret void
  ;
    store volatile half %arg0, half addrspace(1)* undef
@@ -1133,7 +1133,7 @@ define amdgpu_kernel void @kern_global_ptr(i8 addrspace(1)* %ptr) #0 {
  ; HSA-NEXT:    [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
  ; HSA-NEXT:    [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
  ; HSA-NEXT:    [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
-; HSA-NEXT:    store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
+; HSA-NEXT:    store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef, align 8
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @kern_global_ptr(
@@ -1141,7 +1141,7 @@ define amdgpu_kernel void @kern_global_ptr(i8 addrspace(1)* %ptr) #0 {
  ; MESA-NEXT:    [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
  ; MESA-NEXT:    [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
  ; MESA-NEXT:    [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
-; MESA-NEXT:    store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
+; MESA-NEXT:    store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef, align 8
  ; MESA-NEXT:    ret void
  ;
    store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef
@@ -1154,7 +1154,7 @@ define amdgpu_kernel void @kern_global_ptr_dereferencable(i8 addrspace(1)* deref
  ; HSA-NEXT:    [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 0
  ; HSA-NEXT:    [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
  ; HSA-NEXT:    [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !dereferenceable !1
-; HSA-NEXT:    store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
+; HSA-NEXT:    store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef, align 8
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @kern_global_ptr_dereferencable(
@@ -1162,7 +1162,7 @@ define amdgpu_kernel void @kern_global_ptr_dereferencable(i8 addrspace(1)* deref
  ; MESA-NEXT:    [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 36
  ; MESA-NEXT:    [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
  ; MESA-NEXT:    [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !dereferenceable !1
-; MESA-NEXT:    store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
+; MESA-NEXT:    store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef, align 8
  ; MESA-NEXT:    ret void
  ;
    store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef
@@ -1175,7 +1175,7 @@ define amdgpu_kernel void @kern_global_ptr_dereferencable_or_null(i8 addrspace(1
  ; HSA-NEXT:    [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 0
  ; HSA-NEXT:    [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
  ; HSA-NEXT:    [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !dereferenceable_or_null !2
-; HSA-NEXT:    store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
+; HSA-NEXT:    store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef, align 8
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @kern_global_ptr_dereferencable_or_null(
@@ -1183,7 +1183,7 @@ define amdgpu_kernel void @kern_global_ptr_dereferencable_or_null(i8 addrspace(1
  ; MESA-NEXT:    [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 36
  ; MESA-NEXT:    [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
  ; MESA-NEXT:    [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !dereferenceable_or_null !2
-; MESA-NEXT:    store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
+; MESA-NEXT:    store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef, align 8
  ; MESA-NEXT:    ret void
  ;
    store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef
@@ -1196,7 +1196,7 @@ define amdgpu_kernel void @kern_nonnull_global_ptr(i8 addrspace(1)* nonnull %ptr
  ; HSA-NEXT:    [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
  ; HSA-NEXT:    [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
  ; HSA-NEXT:    [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !nonnull !0
-; HSA-NEXT:    store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
+; HSA-NEXT:    store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef, align 8
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @kern_nonnull_global_ptr(
@@ -1204,7 +1204,7 @@ define amdgpu_kernel void @kern_nonnull_global_ptr(i8 addrspace(1)* nonnull %ptr
  ; MESA-NEXT:    [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
  ; MESA-NEXT:    [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
  ; MESA-NEXT:    [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !nonnull !0
-; MESA-NEXT:    store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
+; MESA-NEXT:    store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef, align 8
  ; MESA-NEXT:    ret void
  ;
    store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef
@@ -1217,7 +1217,7 @@ define amdgpu_kernel void @kern_align32_global_ptr(i8 addrspace(1)* align 1024 %
  ; HSA-NEXT:    [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0
  ; HSA-NEXT:    [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
  ; HSA-NEXT:    [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0, !align !3
-; HSA-NEXT:    store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
+; HSA-NEXT:    store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef, align 8
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @kern_align32_global_ptr(
@@ -1225,7 +1225,7 @@ define amdgpu_kernel void @kern_align32_global_ptr(i8 addrspace(1)* align 1024 %
  ; MESA-NEXT:    [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36
  ; MESA-NEXT:    [[PTR_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[PTR_KERNARG_OFFSET]] to i8 addrspace(1)* addrspace(4)*
  ; MESA-NEXT:    [[PTR_LOAD:%.*]] = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* [[PTR_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0, !align !3
-; MESA-NEXT:    store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef
+; MESA-NEXT:    store volatile i8 addrspace(1)* [[PTR_LOAD]], i8 addrspace(1)* addrspace(1)* undef, align 8
  ; MESA-NEXT:    ret void
  ;
    store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef
@@ -1235,12 +1235,12 @@ define amdgpu_kernel void @kern_align32_global_ptr(i8 addrspace(1)* align 1024 %
  define amdgpu_kernel void @kern_noalias_global_ptr(i8 addrspace(1)* noalias %ptr) #0 {
  ; HSA-LABEL: @kern_noalias_global_ptr(
  ; HSA-NEXT:    [[KERN_NOALIAS_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
-; HSA-NEXT:    store volatile i8 addrspace(1)* [[PTR:%.*]], i8 addrspace(1)* addrspace(1)* undef
+; HSA-NEXT:    store volatile i8 addrspace(1)* [[PTR:%.*]], i8 addrspace(1)* addrspace(1)* undef, align 8
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @kern_noalias_global_ptr(
  ; MESA-NEXT:    [[KERN_NOALIAS_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
-; MESA-NEXT:    store volatile i8 addrspace(1)* [[PTR:%.*]], i8 addrspace(1)* addrspace(1)* undef
+; MESA-NEXT:    store volatile i8 addrspace(1)* [[PTR:%.*]], i8 addrspace(1)* addrspace(1)* undef, align 8
  ; MESA-NEXT:    ret void
  ;
    store volatile i8 addrspace(1)* %ptr, i8 addrspace(1)* addrspace(1)* undef
@@ -1250,14 +1250,14 @@ define amdgpu_kernel void @kern_noalias_global_ptr(i8 addrspace(1)* noalias %ptr
  define amdgpu_kernel void @kern_noalias_global_ptr_x2(i8 addrspace(1)* noalias %ptr0, i8 addrspace(1)* noalias %ptr1) #0 {
  ; HSA-LABEL: @kern_noalias_global_ptr_x2(
  ; HSA-NEXT:    [[KERN_NOALIAS_GLOBAL_PTR_X2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(16) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
-; HSA-NEXT:    store volatile i8 addrspace(1)* [[PTR0:%.*]], i8 addrspace(1)* addrspace(1)* undef
-; HSA-NEXT:    store volatile i8 addrspace(1)* [[PTR1:%.*]], i8 addrspace(1)* addrspace(1)* undef
+; HSA-NEXT:    store volatile i8 addrspace(1)* [[PTR0:%.*]], i8 addrspace(1)* addrspace(1)* undef, align 8
+; HSA-NEXT:    store volatile i8 addrspace(1)* [[PTR1:%.*]], i8 addrspace(1)* addrspace(1)* undef, align 8
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @kern_noalias_global_ptr_x2(
  ; MESA-NEXT:    [[KERN_NOALIAS_GLOBAL_PTR_X2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(52) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
-; MESA-NEXT:    store volatile i8 addrspace(1)* [[PTR0:%.*]], i8 addrspace(1)* addrspace(1)* undef
-; MESA-NEXT:    store volatile i8 addrspace(1)* [[PTR1:%.*]], i8 addrspace(1)* addrspace(1)* undef
+; MESA-NEXT:    store volatile i8 addrspace(1)* [[PTR0:%.*]], i8 addrspace(1)* addrspace(1)* undef, align 8
+; MESA-NEXT:    store volatile i8 addrspace(1)* [[PTR1:%.*]], i8 addrspace(1)* addrspace(1)* undef, align 8
  ; MESA-NEXT:    ret void
  ;
    store volatile i8 addrspace(1)* %ptr0, i8 addrspace(1)* addrspace(1)* undef
@@ -1414,7 +1414,7 @@ define amdgpu_kernel void @empty_struct_with_other({} %empty, i32 %arg1) #0 {
  ; HSA-NEXT:    [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT]], i64 0
  ; HSA-NEXT:    [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)*
  ; HSA-NEXT:    [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
-; HSA-NEXT:    store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef
+; HSA-NEXT:    store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef, align 4
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @empty_struct_with_other(
@@ -1422,7 +1422,7 @@ define amdgpu_kernel void @empty_struct_with_other({} %empty, i32 %arg1) #0 {
  ; MESA-NEXT:    [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT]], i64 36
  ; MESA-NEXT:    [[ARG1_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG1_KERNARG_OFFSET]] to i32 addrspace(4)*
  ; MESA-NEXT:    [[ARG1_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG1_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
-; MESA-NEXT:    store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef
+; MESA-NEXT:    store i32 [[ARG1_LOAD]], i32 addrspace(1)* undef, align 4
  ; MESA-NEXT:    ret void
  ;
    store i32 %arg1, i32 addrspace(1)* undef
@@ -1432,21 +1432,21 @@ define amdgpu_kernel void @empty_struct_with_other({} %empty, i32 %arg1) #0 {
  ; Should insert code after the allocas
  define amdgpu_kernel void @static_alloca_kern_i32(i32 %arg0) {
  ; HSA-LABEL: @static_alloca_kern_i32(
-; HSA-NEXT:    [[ALLOCA:%.*]] = alloca i32, addrspace(5)
+; HSA-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
  ; HSA-NEXT:    [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
  ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT]], i64 0
  ; HSA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
  ; HSA-NEXT:    [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
-; HSA-NEXT:    store volatile i32 [[ARG0_LOAD]], i32 addrspace(5)* [[ALLOCA]]
+; HSA-NEXT:    store volatile i32 [[ARG0_LOAD]], i32 addrspace(5)* [[ALLOCA]], align 4
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @static_alloca_kern_i32(
-; MESA-NEXT:    [[ALLOCA:%.*]] = alloca i32, addrspace(5)
+; MESA-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
  ; MESA-NEXT:    [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
  ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT]], i64 36
  ; MESA-NEXT:    [[ARG0_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[ARG0_KERNARG_OFFSET]] to i32 addrspace(4)*
  ; MESA-NEXT:    [[ARG0_LOAD:%.*]] = load i32, i32 addrspace(4)* [[ARG0_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
-; MESA-NEXT:    store volatile i32 [[ARG0_LOAD]], i32 addrspace(5)* [[ALLOCA]]
+; MESA-NEXT:    store volatile i32 [[ARG0_LOAD]], i32 addrspace(5)* [[ALLOCA]], align 4
  ; MESA-NEXT:    ret void
  ;
    %alloca = alloca i32, addrspace(5)
@@ -1458,25 +1458,25 @@ define amdgpu_kernel void @static_alloca_kern_i32(i32 %arg0) {
  ; kernargs.
  define amdgpu_kernel void @dyn_alloca_kernarg_i32(i32 %n) {
  ; HSA-LABEL: @dyn_alloca_kernarg_i32(
-; HSA-NEXT:    [[ALLOCA0:%.*]] = alloca i32, addrspace(5)
+; HSA-NEXT:    [[ALLOCA0:%.*]] = alloca i32, align 4, addrspace(5)
  ; HSA-NEXT:    [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(4) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
  ; HSA-NEXT:    [[N_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT]], i64 0
  ; HSA-NEXT:    [[N_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[N_KERNARG_OFFSET]] to i32 addrspace(4)*
  ; HSA-NEXT:    [[N_LOAD:%.*]] = load i32, i32 addrspace(4)* [[N_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0
-; HSA-NEXT:    [[ALLOCA1:%.*]] = alloca i32, i32 [[N_LOAD]], addrspace(5)
-; HSA-NEXT:    store volatile i32 0, i32 addrspace(5)* [[ALLOCA0]]
-; HSA-NEXT:    store volatile i32 1, i32 addrspace(5)* [[ALLOCA1]]
+; HSA-NEXT:    [[ALLOCA1:%.*]] = alloca i32, i32 [[N_LOAD]], align 4, addrspace(5)
+; HSA-NEXT:    store volatile i32 0, i32 addrspace(5)* [[ALLOCA0]], align 4
+; HSA-NEXT:    store volatile i32 1, i32 addrspace(5)* [[ALLOCA1]], align 4
  ; HSA-NEXT:    ret void
  ;
  ; MESA-LABEL: @dyn_alloca_kernarg_i32(
-; MESA-NEXT:    [[ALLOCA0:%.*]] = alloca i32, addrspace(5)
+; MESA-NEXT:    [[ALLOCA0:%.*]] = alloca i32, align 4, addrspace(5)
  ; MESA-NEXT:    [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(40) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
  ; MESA-NEXT:    [[N_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT]], i64 36
  ; MESA-NEXT:    [[N_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[N_KERNARG_OFFSET]] to i32 addrspace(4)*
  ; MESA-NEXT:    [[N_LOAD:%.*]] = load i32, i32 addrspace(4)* [[N_KERNARG_OFFSET_CAST]], align 4, !invariant.load !0
-; MESA-NEXT:    [[ALLOCA1:%.*]] = alloca i32, i32 [[N_LOAD]], addrspace(5)
-; MESA-NEXT:    store volatile i32 0, i32 addrspace(5)* [[ALLOCA0]]
-; MESA-NEXT:    store volatile i32 1, i32 addrspace(5)* [[ALLOCA1]]
+; MESA-NEXT:    [[ALLOCA1:%.*]] = alloca i32, i32 [[N_LOAD]], align 4, addrspace(5)
+; MESA-NEXT:    store volatile i32 0, i32 addrspace(5)* [[ALLOCA0]], align 4
+; MESA-NEXT:    store volatile i32 1, i32 addrspace(5)* [[ALLOCA1]], align 4
  ; MESA-NEXT:    ret void
  ;
    %alloca0 = alloca i32, addrspace(5)
diff --git a/llvm/test/Transforms/ArgumentPromotion/attrs.ll b/llvm/test/Transforms/ArgumentPromotion/attrs.ll

index 7d801eb..07b9603 100644 (file)
--- a/llvm/test/Transforms/ArgumentPromotion/attrs.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/attrs.ll
@@ -9,16 +9,16 @@ define internal void @f(%struct.ss* byval %b, i32* byval %X, i32 %i) nounwind {
  ; CHECK-LABEL: define {{[^@]+}}@f
  ; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]], i32* byval [[X:%.*]], i32 [[I:%.*]])
  ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_SS:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
  ; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
-; CHECK-NEXT:    store i32 [[B_0]], i32* [[DOT0]]
+; CHECK-NEXT:    store i32 [[B_0]], i32* [[DOT0]], align 4
  ; CHECK-NEXT:    [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1
-; CHECK-NEXT:    store i64 [[B_1]], i64* [[DOT1]]
+; CHECK-NEXT:    store i64 [[B_1]], i64* [[DOT1]], align 4
  ; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
  ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
  ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
  ; CHECK-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 4
-; CHECK-NEXT:    store i32 0, i32* [[X]]
+; CHECK-NEXT:    store i32 0, i32* [[X]], align 4
  ; CHECK-NEXT:    ret void
  ;
  entry:
@@ -37,15 +37,15 @@ define i32 @test(i32* %X) {
  ; CHECK-LABEL: define {{[^@]+}}@test
  ; CHECK-SAME: (i32* [[X:%.*]])
  ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]]
+; CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
  ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
  ; CHECK-NEXT:    store i32 1, i32* [[TMP1]], align 8
  ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
  ; CHECK-NEXT:    store i64 2, i64* [[TMP4]], align 4
  ; CHECK-NEXT:    [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
-; CHECK-NEXT:    [[S_0_VAL:%.*]] = load i32, i32* [[S_0]]
+; CHECK-NEXT:    [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4
  ; CHECK-NEXT:    [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
-; CHECK-NEXT:    [[S_1_VAL:%.*]] = load i64, i64* [[S_1]]
+; CHECK-NEXT:    [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4
  ; CHECK-NEXT:    call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]], i32* byval [[X]], i32 zeroext 0)
  ; CHECK-NEXT:    ret i32 0
  ;
diff --git a/llvm/test/Transforms/ArgumentPromotion/byval-2.ll b/llvm/test/Transforms/ArgumentPromotion/byval-2.ll

index f46a1f3..f95493f 100644 (file)
--- a/llvm/test/Transforms/ArgumentPromotion/byval-2.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/byval-2.ll
@@ -11,16 +11,16 @@ define internal void @f(%struct.ss* byval  %b, i32* byval %X) nounwind  {
  ; CHECK-LABEL: define {{[^@]+}}@f
  ; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]], i32* byval [[X:%.*]])
  ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_SS:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
  ; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
-; CHECK-NEXT:    store i32 [[B_0]], i32* [[DOT0]]
+; CHECK-NEXT:    store i32 [[B_0]], i32* [[DOT0]], align 4
  ; CHECK-NEXT:    [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1
-; CHECK-NEXT:    store i64 [[B_1]], i64* [[DOT1]]
+; CHECK-NEXT:    store i64 [[B_1]], i64* [[DOT1]], align 4
  ; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
  ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
  ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
  ; CHECK-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 4
-; CHECK-NEXT:    store i32 0, i32* [[X]]
+; CHECK-NEXT:    store i32 0, i32* [[X]], align 4
  ; CHECK-NEXT:    ret void
  ;
  entry:
@@ -37,15 +37,15 @@ define i32 @test(i32* %X) {
  ; CHECK-LABEL: define {{[^@]+}}@test
  ; CHECK-SAME: (i32* [[X:%.*]])
  ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]]
+; CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
  ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
  ; CHECK-NEXT:    store i32 1, i32* [[TMP1]], align 8
  ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
  ; CHECK-NEXT:    store i64 2, i64* [[TMP4]], align 4
  ; CHECK-NEXT:    [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
-; CHECK-NEXT:    [[S_0_VAL:%.*]] = load i32, i32* [[S_0]]
+; CHECK-NEXT:    [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4
  ; CHECK-NEXT:    [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
-; CHECK-NEXT:    [[S_1_VAL:%.*]] = load i64, i64* [[S_1]]
+; CHECK-NEXT:    [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4
  ; CHECK-NEXT:    call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]], i32* byval [[X]])
  ; CHECK-NEXT:    ret i32 0
  ;
diff --git a/llvm/test/Transforms/ArgumentPromotion/byval.ll b/llvm/test/Transforms/ArgumentPromotion/byval.ll

index 020c0f2..359ce4e 100644 (file)
--- a/llvm/test/Transforms/ArgumentPromotion/byval.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/byval.ll
@@ -10,11 +10,11 @@ define internal void @f(%struct.ss* byval  %b) nounwind  {
  ; CHECK-LABEL: define {{[^@]+}}@f
  ; CHECK-SAME: (i32 [[B_0:%.*]], i64 [[B_1:%.*]])
  ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_SS:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 4
  ; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
-; CHECK-NEXT:    store i32 [[B_0]], i32* [[DOT0]]
+; CHECK-NEXT:    store i32 [[B_0]], i32* [[DOT0]], align 4
  ; CHECK-NEXT:    [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1
-; CHECK-NEXT:    store i64 [[B_1]], i64* [[DOT1]]
+; CHECK-NEXT:    store i64 [[B_1]], i64* [[DOT1]], align 4
  ; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
  ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
  ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
@@ -36,9 +36,9 @@ define internal void @g(%struct.ss* byval align 32 %b) nounwind {
  ; CHECK-NEXT:  entry:
  ; CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_SS:%.*]], align 32
  ; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
-; CHECK-NEXT:    store i32 [[B_0]], i32* [[DOT0]]
+; CHECK-NEXT:    store i32 [[B_0]], i32* [[DOT0]], align 4
  ; CHECK-NEXT:    [[DOT1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 1
-; CHECK-NEXT:    store i64 [[B_1]], i64* [[DOT1]]
+; CHECK-NEXT:    store i64 [[B_1]], i64* [[DOT1]], align 4
  ; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
  ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
  ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
@@ -57,20 +57,20 @@ entry:
  define i32 @main() nounwind  {
  ; CHECK-LABEL: define {{[^@]+}}@main()
  ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]]
+; CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 4
  ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
  ; CHECK-NEXT:    store i32 1, i32* [[TMP1]], align 8
  ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
  ; CHECK-NEXT:    store i64 2, i64* [[TMP4]], align 4
  ; CHECK-NEXT:    [[S_0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
-; CHECK-NEXT:    [[S_0_VAL:%.*]] = load i32, i32* [[S_0]]
+; CHECK-NEXT:    [[S_0_VAL:%.*]] = load i32, i32* [[S_0]], align 4
  ; CHECK-NEXT:    [[S_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
-; CHECK-NEXT:    [[S_1_VAL:%.*]] = load i64, i64* [[S_1]]
+; CHECK-NEXT:    [[S_1_VAL:%.*]] = load i64, i64* [[S_1]], align 4
  ; CHECK-NEXT:    call void @f(i32 [[S_0_VAL]], i64 [[S_1_VAL]])
  ; CHECK-NEXT:    [[S_01:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
-; CHECK-NEXT:    [[S_01_VAL:%.*]] = load i32, i32* [[S_01]]
+; CHECK-NEXT:    [[S_01_VAL:%.*]] = load i32, i32* [[S_01]], align 4
  ; CHECK-NEXT:    [[S_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
-; CHECK-NEXT:    [[S_12_VAL:%.*]] = load i64, i64* [[S_12]]
+; CHECK-NEXT:    [[S_12_VAL:%.*]] = load i64, i64* [[S_12]], align 4
  ; CHECK-NEXT:    call void @g(i32 [[S_01_VAL]], i64 [[S_12_VAL]])
  ; CHECK-NEXT:    ret i32 0
  ;
diff --git a/llvm/test/Transforms/ArgumentPromotion/dbg.ll b/llvm/test/Transforms/ArgumentPromotion/dbg.ll

index e576ec0..1da97c3 100644 (file)
--- a/llvm/test/Transforms/ArgumentPromotion/dbg.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/dbg.ll
@@ -21,11 +21,11 @@ define internal void @test(i32** %X) !dbg !2 {
  define internal void @test_byval(%struct.pair* byval %P) {
  ; CHECK-LABEL: define {{[^@]+}}@test_byval
  ; CHECK-SAME: (i32 [[P_0:%.*]], i32 [[P_1:%.*]])
-; CHECK-NEXT:    [[P:%.*]] = alloca [[STRUCT_PAIR:%.*]]
+; CHECK-NEXT:    [[P:%.*]] = alloca [[STRUCT_PAIR:%.*]], align 8
  ; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 0
-; CHECK-NEXT:    store i32 [[P_0]], i32* [[DOT0]]
+; CHECK-NEXT:    store i32 [[P_0]], i32* [[DOT0]], align 4
  ; CHECK-NEXT:    [[DOT1:%.*]] = getelementptr [[STRUCT_PAIR]], %struct.pair* [[P]], i32 0, i32 1
-; CHECK-NEXT:    store i32 [[P_1]], i32* [[DOT1]]
+; CHECK-NEXT:    store i32 [[P_1]], i32* [[DOT1]], align 4
  ; CHECK-NEXT:    ret void
  ;
    ret void
diff --git a/llvm/test/Transforms/ArgumentPromotion/tail.ll b/llvm/test/Transforms/ArgumentPromotion/tail.ll

index f4bb06a..62c6f20 100644 (file)
--- a/llvm/test/Transforms/ArgumentPromotion/tail.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/tail.ll
@@ -12,11 +12,11 @@ declare i8* @foo(%pair*)
  define internal void @bar(%pair* byval %Data) {
  ; CHECK-LABEL: define {{[^@]+}}@bar
  ; CHECK-SAME: (i32 [[DATA_0:%.*]], i32 [[DATA_1:%.*]])
-; CHECK-NEXT:    [[DATA:%.*]] = alloca [[PAIR:%.*]]
+; CHECK-NEXT:    [[DATA:%.*]] = alloca [[PAIR:%.*]], align 8
  ; CHECK-NEXT:    [[DOT0:%.*]] = getelementptr [[PAIR]], %pair* [[DATA]], i32 0, i32 0
-; CHECK-NEXT:    store i32 [[DATA_0]], i32* [[DOT0]]
+; CHECK-NEXT:    store i32 [[DATA_0]], i32* [[DOT0]], align 4
  ; CHECK-NEXT:    [[DOT1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA]], i32 0, i32 1
-; CHECK-NEXT:    store i32 [[DATA_1]], i32* [[DOT1]]
+; CHECK-NEXT:    store i32 [[DATA_1]], i32* [[DOT1]], align 4
  ; CHECK-NEXT:    [[TMP1:%.*]] = call i8* @foo(%pair* [[DATA]])
  ; CHECK-NEXT:    ret void
  ;
@@ -28,9 +28,9 @@ define void @zed(%pair* byval %Data) {
  ; CHECK-LABEL: define {{[^@]+}}@zed
  ; CHECK-SAME: (%pair* byval [[DATA:%.*]])
  ; CHECK-NEXT:    [[DATA_0:%.*]] = getelementptr [[PAIR:%.*]], %pair* [[DATA]], i32 0, i32 0
-; CHECK-NEXT:    [[DATA_0_VAL:%.*]] = load i32, i32* [[DATA_0]]
+; CHECK-NEXT:    [[DATA_0_VAL:%.*]] = load i32, i32* [[DATA_0]], align 4
  ; CHECK-NEXT:    [[DATA_1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA]], i32 0, i32 1
-; CHECK-NEXT:    [[DATA_1_VAL:%.*]] = load i32, i32* [[DATA_1]]
+; CHECK-NEXT:    [[DATA_1_VAL:%.*]] = load i32, i32* [[DATA_1]], align 4
  ; CHECK-NEXT:    call void @bar(i32 [[DATA_0_VAL]], i32 [[DATA_1_VAL]])
  ; CHECK-NEXT:    ret void
  ;
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll

index 401fccc..524b26c 100644 (file)
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll
@@ -24,13 +24,13 @@ define internal i32 @f(%struct.ss* byval %b, i32* byval %X, i32 %i) nounwind {
  ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@f
  ; IS__TUNIT_NPM-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[I:%.*]])
  ; IS__TUNIT_NPM-NEXT:  entry:
-; IS__TUNIT_NPM-NEXT:    [[X_PRIV:%.*]] = alloca i32
-; IS__TUNIT_NPM-NEXT:    store i32 [[TMP2]], i32* [[X_PRIV]]
-; IS__TUNIT_NPM-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]]
+; IS__TUNIT_NPM-NEXT:    [[X_PRIV:%.*]] = alloca i32, align 4
+; IS__TUNIT_NPM-NEXT:    store i32 [[TMP2]], i32* [[X_PRIV]], align 4
+; IS__TUNIT_NPM-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
  ; IS__TUNIT_NPM-NEXT:    [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
-; IS__TUNIT_NPM-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]]
+; IS__TUNIT_NPM-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]], align 4
  ; IS__TUNIT_NPM-NEXT:    [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
-; IS__TUNIT_NPM-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]]
+; IS__TUNIT_NPM-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]], align 4
  ; IS__TUNIT_NPM-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
  ; IS__TUNIT_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8
  ; IS__TUNIT_NPM-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
@@ -55,9 +55,9 @@ define internal i32 @f(%struct.ss* byval %b, i32* byval %X, i32 %i) nounwind {
  ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@f
  ; IS__CGSCC_NPM-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 [[TMP2:%.*]])
  ; IS__CGSCC_NPM-NEXT:  entry:
-; IS__CGSCC_NPM-NEXT:    [[X_PRIV:%.*]] = alloca i32
+; IS__CGSCC_NPM-NEXT:    [[X_PRIV:%.*]] = alloca i32, align 4
  ; IS__CGSCC_NPM-NEXT:    store i32 [[TMP2]], i32* [[X_PRIV]], align 4
-; IS__CGSCC_NPM-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]]
+; IS__CGSCC_NPM-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
  ; IS__CGSCC_NPM-NEXT:    [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
  ; IS__CGSCC_NPM-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]], align 4
  ; IS__CGSCC_NPM-NEXT:    [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
@@ -90,7 +90,7 @@ define i32 @test(i32* %X) {
  ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test
  ; IS__TUNIT_OPM-SAME: (i32* nocapture nofree readonly align 4 [[X:%.*]])
  ; IS__TUNIT_OPM-NEXT:  entry:
-; IS__TUNIT_OPM-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]]
+; IS__TUNIT_OPM-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
  ; IS__TUNIT_OPM-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
  ; IS__TUNIT_OPM-NEXT:    store i32 1, i32* [[TMP1]], align 8
  ; IS__TUNIT_OPM-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
@@ -101,7 +101,7 @@ define i32 @test(i32* %X) {
  ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@test
  ; IS__TUNIT_NPM-SAME: (i32* nocapture nofree readonly align 4 [[X:%.*]])
  ; IS__TUNIT_NPM-NEXT:  entry:
-; IS__TUNIT_NPM-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]]
+; IS__TUNIT_NPM-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
  ; IS__TUNIT_NPM-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
  ; IS__TUNIT_NPM-NEXT:    store i32 1, i32* [[TMP1]], align 8
  ; IS__TUNIT_NPM-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
@@ -117,7 +117,7 @@ define i32 @test(i32* %X) {
  ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test
  ; IS__CGSCC_OPM-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(4) [[X:%.*]])
  ; IS__CGSCC_OPM-NEXT:  entry:
-; IS__CGSCC_OPM-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]]
+; IS__CGSCC_OPM-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
  ; IS__CGSCC_OPM-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
  ; IS__CGSCC_OPM-NEXT:    store i32 1, i32* [[TMP1]], align 8
  ; IS__CGSCC_OPM-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
@@ -128,7 +128,7 @@ define i32 @test(i32* %X) {
  ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test
  ; IS__CGSCC_NPM-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X:%.*]])
  ; IS__CGSCC_NPM-NEXT:  entry:
-; IS__CGSCC_NPM-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]]
+; IS__CGSCC_NPM-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
  ; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
  ; IS__CGSCC_NPM-NEXT:    store i32 1, i32* [[TMP1]], align 8
  ; IS__CGSCC_NPM-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll

index b70d05f..ccfbb94 100644 (file)
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll
@@ -20,9 +20,9 @@ define internal void @f(%struct.ss* byval  %b, i32* byval %X) nounwind  {
  ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@f
  ; IS__CGSCC_NPM-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 [[TMP2:%.*]])
  ; IS__CGSCC_NPM-NEXT:  entry:
-; IS__CGSCC_NPM-NEXT:    [[X_PRIV:%.*]] = alloca i32
+; IS__CGSCC_NPM-NEXT:    [[X_PRIV:%.*]] = alloca i32, align 4
  ; IS__CGSCC_NPM-NEXT:    store i32 [[TMP2]], i32* [[X_PRIV]], align 4
-; IS__CGSCC_NPM-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]]
+; IS__CGSCC_NPM-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
  ; IS__CGSCC_NPM-NEXT:    [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
  ; IS__CGSCC_NPM-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]], align 4
  ; IS__CGSCC_NPM-NEXT:    [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
@@ -49,7 +49,7 @@ define i32 @test(i32* %X) {
  ; IS__TUNIT____-LABEL: define {{[^@]+}}@test
  ; IS__TUNIT____-SAME: (i32* nocapture nofree readonly align 4 [[X:%.*]])
  ; IS__TUNIT____-NEXT:  entry:
-; IS__TUNIT____-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]]
+; IS__TUNIT____-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
  ; IS__TUNIT____-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
  ; IS__TUNIT____-NEXT:    store i32 1, i32* [[TMP1]], align 8
  ; IS__TUNIT____-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
@@ -59,7 +59,7 @@ define i32 @test(i32* %X) {
  ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test
  ; IS__CGSCC_OPM-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(4) [[X:%.*]])
  ; IS__CGSCC_OPM-NEXT:  entry:
-; IS__CGSCC_OPM-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]]
+; IS__CGSCC_OPM-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
  ; IS__CGSCC_OPM-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
  ; IS__CGSCC_OPM-NEXT:    store i32 1, i32* [[TMP1]], align 8
  ; IS__CGSCC_OPM-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
@@ -69,7 +69,7 @@ define i32 @test(i32* %X) {
  ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test
  ; IS__CGSCC_NPM-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X:%.*]])
  ; IS__CGSCC_NPM-NEXT:  entry:
-; IS__CGSCC_NPM-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]]
+; IS__CGSCC_NPM-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
  ; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
  ; IS__CGSCC_NPM-NEXT:    store i32 1, i32* [[TMP1]], align 8
  ; IS__CGSCC_NPM-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll

index eb6f666..194188c 100644 (file)
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll
@@ -18,33 +18,19 @@ define internal i32 @f(%struct.ss* byval  %b) nounwind  {
  ; IS________OPM-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 8
  ; IS________OPM-NEXT:    ret i32 [[TMP1]]
  ;
-; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@f
-; IS__TUNIT_NPM-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]])
-; IS__TUNIT_NPM-NEXT:  entry:
-; IS__TUNIT_NPM-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]]
-; IS__TUNIT_NPM-NEXT:    [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
-; IS__TUNIT_NPM-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]]
-; IS__TUNIT_NPM-NEXT:    [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
-; IS__TUNIT_NPM-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]]
-; IS__TUNIT_NPM-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
-; IS__TUNIT_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8
-; IS__TUNIT_NPM-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
-; IS__TUNIT_NPM-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 8
-; IS__TUNIT_NPM-NEXT:    ret i32 [[TMP1]]
-;
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@f
-; IS__CGSCC_NPM-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]])
-; IS__CGSCC_NPM-NEXT:  entry:
-; IS__CGSCC_NPM-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]]
-; IS__CGSCC_NPM-NEXT:    [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
-; IS__CGSCC_NPM-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]], align 4
-; IS__CGSCC_NPM-NEXT:    [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
-; IS__CGSCC_NPM-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]], align 4
-; IS__CGSCC_NPM-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
-; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8
-; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
-; IS__CGSCC_NPM-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 8
-; IS__CGSCC_NPM-NEXT:    ret i32 [[TMP1]]
+; IS________NPM-LABEL: define {{[^@]+}}@f
+; IS________NPM-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]])
+; IS________NPM-NEXT:  entry:
+; IS________NPM-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]], align 4
+; IS________NPM-NEXT:    [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
+; IS________NPM-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]], align 4
+; IS________NPM-NEXT:    [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
+; IS________NPM-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]], align 4
+; IS________NPM-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
+; IS________NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8
+; IS________NPM-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
+; IS________NPM-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 8
+; IS________NPM-NEXT:    ret i32 [[TMP1]]
  ;
  entry:
    %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
@@ -65,33 +51,19 @@ define internal i32 @g(%struct.ss* byval align 32 %b) nounwind {
  ; IS________OPM-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 32
  ; IS________OPM-NEXT:    ret i32 [[TMP2]]
  ;
-; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@g
-; IS__TUNIT_NPM-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]])
-; IS__TUNIT_NPM-NEXT:  entry:
-; IS__TUNIT_NPM-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]]
-; IS__TUNIT_NPM-NEXT:    [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
-; IS__TUNIT_NPM-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]]
-; IS__TUNIT_NPM-NEXT:    [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
-; IS__TUNIT_NPM-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]]
-; IS__TUNIT_NPM-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
-; IS__TUNIT_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 32
-; IS__TUNIT_NPM-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
-; IS__TUNIT_NPM-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 32
-; IS__TUNIT_NPM-NEXT:    ret i32 [[TMP2]]
-;
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@g
-; IS__CGSCC_NPM-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]])
-; IS__CGSCC_NPM-NEXT:  entry:
-; IS__CGSCC_NPM-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]]
-; IS__CGSCC_NPM-NEXT:    [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
-; IS__CGSCC_NPM-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]], align 4
-; IS__CGSCC_NPM-NEXT:    [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
-; IS__CGSCC_NPM-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]], align 4
-; IS__CGSCC_NPM-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
-; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 32
-; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
-; IS__CGSCC_NPM-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 32
-; IS__CGSCC_NPM-NEXT:    ret i32 [[TMP2]]
+; IS________NPM-LABEL: define {{[^@]+}}@g
+; IS________NPM-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]])
+; IS________NPM-NEXT:  entry:
+; IS________NPM-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]], align 4
+; IS________NPM-NEXT:    [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
+; IS________NPM-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]], align 4
+; IS________NPM-NEXT:    [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
+; IS________NPM-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]], align 4
+; IS________NPM-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
+; IS________NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 32
+; IS________NPM-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
+; IS________NPM-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 32
+; IS________NPM-NEXT:    ret i32 [[TMP2]]
  ;
  entry:
    %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
@@ -105,7 +77,7 @@ entry:
  define i32 @main() nounwind  {
  ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@main()
  ; IS__TUNIT_OPM-NEXT:  entry:
-; IS__TUNIT_OPM-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]]
+; IS__TUNIT_OPM-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 4
  ; IS__TUNIT_OPM-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
  ; IS__TUNIT_OPM-NEXT:    store i32 1, i32* [[TMP1]], align 8
  ; IS__TUNIT_OPM-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
@@ -117,7 +89,7 @@ define i32 @main() nounwind  {
  ;
  ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@main()
  ; IS__TUNIT_NPM-NEXT:  entry:
-; IS__TUNIT_NPM-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]]
+; IS__TUNIT_NPM-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 4
  ; IS__TUNIT_NPM-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
  ; IS__TUNIT_NPM-NEXT:    store i32 1, i32* [[TMP1]], align 8
  ; IS__TUNIT_NPM-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
@@ -137,7 +109,7 @@ define i32 @main() nounwind  {
  ;
  ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@main()
  ; IS__CGSCC_OPM-NEXT:  entry:
-; IS__CGSCC_OPM-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]]
+; IS__CGSCC_OPM-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 4
  ; IS__CGSCC_OPM-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
  ; IS__CGSCC_OPM-NEXT:    store i32 1, i32* [[TMP1]], align 32
  ; IS__CGSCC_OPM-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
@@ -149,7 +121,7 @@ define i32 @main() nounwind  {
  ;
  ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@main()
  ; IS__CGSCC_NPM-NEXT:  entry:
-; IS__CGSCC_NPM-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]]
+; IS__CGSCC_NPM-NEXT:    [[S:%.*]] = alloca [[STRUCT_SS:%.*]], align 4
  ; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
  ; IS__CGSCC_NPM-NEXT:    store i32 1, i32* [[TMP1]], align 32
  ; IS__CGSCC_NPM-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll

index 129146d..5c2ed40 100644 (file)
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll
@@ -75,12 +75,12 @@ define internal i64 @CaptureAStruct(%struct.Foo* byval %a) {
  ; IS__CGSCC____-LABEL: define {{[^@]+}}@CaptureAStruct
  ; IS__CGSCC____-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]])
  ; IS__CGSCC____-NEXT:  entry:
-; IS__CGSCC____-NEXT:    [[A_PRIV:%.*]] = alloca [[STRUCT_FOO:%.*]]
+; IS__CGSCC____-NEXT:    [[A_PRIV:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8
  ; IS__CGSCC____-NEXT:    [[A_PRIV_CAST:%.*]] = bitcast %struct.Foo* [[A_PRIV]] to i32*
  ; IS__CGSCC____-NEXT:    store i32 [[TMP0]], i32* [[A_PRIV_CAST]], align 4
  ; IS__CGSCC____-NEXT:    [[A_PRIV_0_1:%.*]] = getelementptr [[STRUCT_FOO]], %struct.Foo* [[A_PRIV]], i32 0, i32 1
  ; IS__CGSCC____-NEXT:    store i64 [[TMP1]], i64* [[A_PRIV_0_1]], align 8
-; IS__CGSCC____-NEXT:    [[A_PTR:%.*]] = alloca %struct.Foo*
+; IS__CGSCC____-NEXT:    [[A_PTR:%.*]] = alloca %struct.Foo*, align 8
  ; IS__CGSCC____-NEXT:    br label [[LOOP:%.*]]
  ; IS__CGSCC____:       loop:
  ; IS__CGSCC____-NEXT:    [[PHI:%.*]] = phi %struct.Foo* [ null, [[ENTRY:%.*]] ], [ [[GEP:%.*]], [[LOOP]] ]
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll

index 3f82fb7..f8625b2 100644 (file)
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll
@@ -42,7 +42,7 @@ entry:
  define i32 @main() {
  ; CHECK-LABEL: define {{[^@]+}}@main()
  ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[S:%.*]] = alloca inalloca [[STRUCT_SS:%.*]]
+; CHECK-NEXT:    [[S:%.*]] = alloca inalloca [[STRUCT_SS:%.*]], align 4
  ; CHECK-NEXT:    [[F0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0
  ; CHECK-NEXT:    [[F1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
  ; CHECK-NEXT:    store i32 1, i32* [[F0]], align 4
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll

index 436d5af..1920e01 100644 (file)
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll
@@ -19,17 +19,17 @@ define internal void @bar(%pair* byval %Data) {
  ;
  ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@bar
  ; IS__TUNIT_NPM-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]])
-; IS__TUNIT_NPM-NEXT:    [[DATA_PRIV:%.*]] = alloca [[PAIR:%.*]]
+; IS__TUNIT_NPM-NEXT:    [[DATA_PRIV:%.*]] = alloca [[PAIR:%.*]], align 8
  ; IS__TUNIT_NPM-NEXT:    [[DATA_PRIV_CAST:%.*]] = bitcast %pair* [[DATA_PRIV]] to i32*
-; IS__TUNIT_NPM-NEXT:    store i32 [[TMP0]], i32* [[DATA_PRIV_CAST]]
+; IS__TUNIT_NPM-NEXT:    store i32 [[TMP0]], i32* [[DATA_PRIV_CAST]], align 4
  ; IS__TUNIT_NPM-NEXT:    [[DATA_PRIV_0_1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA_PRIV]], i32 0, i32 1
-; IS__TUNIT_NPM-NEXT:    store i32 [[TMP1]], i32* [[DATA_PRIV_0_1]]
+; IS__TUNIT_NPM-NEXT:    store i32 [[TMP1]], i32* [[DATA_PRIV_0_1]], align 4
  ; IS__TUNIT_NPM-NEXT:    [[TMP3:%.*]] = call i8* @foo(%pair* nonnull dereferenceable(8) [[DATA_PRIV]])
  ; IS__TUNIT_NPM-NEXT:    ret void
  ;
  ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@bar
  ; IS__CGSCC_NPM-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]])
-; IS__CGSCC_NPM-NEXT:    [[DATA_PRIV:%.*]] = alloca [[PAIR:%.*]]
+; IS__CGSCC_NPM-NEXT:    [[DATA_PRIV:%.*]] = alloca [[PAIR:%.*]], align 8
  ; IS__CGSCC_NPM-NEXT:    [[DATA_PRIV_CAST:%.*]] = bitcast %pair* [[DATA_PRIV]] to i32*
  ; IS__CGSCC_NPM-NEXT:    store i32 [[TMP0]], i32* [[DATA_PRIV_CAST]], align 4
  ; IS__CGSCC_NPM-NEXT:    [[DATA_PRIV_0_1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA_PRIV]], i32 0, i32 1
diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll

index 9a680a2..e7d9a63 100644 (file)
--- a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll
@@ -23,7 +23,7 @@ define internal void @vfu1(%struct.MYstr* byval align 4 %u) nounwind {
  ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@vfu1
  ; IS__CGSCC_NPM-SAME: (i8 [[TMP0:%.*]], i32 [[TMP1:%.*]])
  ; IS__CGSCC_NPM-NEXT:  entry:
-; IS__CGSCC_NPM-NEXT:    [[U_PRIV:%.*]] = alloca [[STRUCT_MYSTR:%.*]]
+; IS__CGSCC_NPM-NEXT:    [[U_PRIV:%.*]] = alloca [[STRUCT_MYSTR:%.*]], align 8
  ; IS__CGSCC_NPM-NEXT:    [[U_PRIV_CAST:%.*]] = bitcast %struct.MYstr* [[U_PRIV]] to i8*
  ; IS__CGSCC_NPM-NEXT:    store i8 [[TMP0]], i8* [[U_PRIV_CAST]], align 1
  ; IS__CGSCC_NPM-NEXT:    [[U_PRIV_0_1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
@@ -62,11 +62,11 @@ define internal i32 @vfu2(%struct.MYstr* byval align 4 %u) nounwind readonly {
  ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@vfu2
  ; IS__TUNIT_NPM-SAME: (i8 [[TMP0:%.*]], i32 [[TMP1:%.*]])
  ; IS__TUNIT_NPM-NEXT:  entry:
-; IS__TUNIT_NPM-NEXT:    [[U_PRIV:%.*]] = alloca [[STRUCT_MYSTR:%.*]]
+; IS__TUNIT_NPM-NEXT:    [[U_PRIV:%.*]] = alloca [[STRUCT_MYSTR:%.*]], align 8
  ; IS__TUNIT_NPM-NEXT:    [[U_PRIV_CAST:%.*]] = bitcast %struct.MYstr* [[U_PRIV]] to i8*
-; IS__TUNIT_NPM-NEXT:    store i8 [[TMP0]], i8* [[U_PRIV_CAST]]
+; IS__TUNIT_NPM-NEXT:    store i8 [[TMP0]], i8* [[U_PRIV_CAST]], align 1
  ; IS__TUNIT_NPM-NEXT:    [[U_PRIV_0_1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
-; IS__TUNIT_NPM-NEXT:    store i32 [[TMP1]], i32* [[U_PRIV_0_1]]
+; IS__TUNIT_NPM-NEXT:    store i32 [[TMP1]], i32* [[U_PRIV_0_1]], align 4
  ; IS__TUNIT_NPM-NEXT:    [[TMP2:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i32 0, i32 1
  ; IS__TUNIT_NPM-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
  ; IS__TUNIT_NPM-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i32 0, i32 0
@@ -135,41 +135,23 @@ define internal i32 @vfu2_v2(%struct.MYstr* byval align 4 %u) nounwind readonly
  ; IS________OPM-NEXT:    [[TMP5:%.*]] = add i32 [[TMP4]], [[TMP1]]
  ; IS________OPM-NEXT:    ret i32 [[TMP5]]
  ;
-; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@vfu2_v2
-; IS__TUNIT_NPM-SAME: (i8 [[TMP0:%.*]], i32 [[TMP1:%.*]])
-; IS__TUNIT_NPM-NEXT:  entry:
-; IS__TUNIT_NPM-NEXT:    [[U_PRIV:%.*]] = alloca [[STRUCT_MYSTR:%.*]]
-; IS__TUNIT_NPM-NEXT:    [[U_PRIV_CAST:%.*]] = bitcast %struct.MYstr* [[U_PRIV]] to i8*
-; IS__TUNIT_NPM-NEXT:    store i8 [[TMP0]], i8* [[U_PRIV_CAST]]
-; IS__TUNIT_NPM-NEXT:    [[U_PRIV_0_1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
-; IS__TUNIT_NPM-NEXT:    store i32 [[TMP1]], i32* [[U_PRIV_0_1]]
-; IS__TUNIT_NPM-NEXT:    [[Z:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
-; IS__TUNIT_NPM-NEXT:    store i32 99, i32* [[Z]], align 4
-; IS__TUNIT_NPM-NEXT:    [[TMP2:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
-; IS__TUNIT_NPM-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
-; IS__TUNIT_NPM-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 0
-; IS__TUNIT_NPM-NEXT:    [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 8
-; IS__TUNIT_NPM-NEXT:    [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
-; IS__TUNIT_NPM-NEXT:    [[TMP7:%.*]] = add i32 [[TMP6]], [[TMP3]]
-; IS__TUNIT_NPM-NEXT:    ret i32 [[TMP7]]
-;
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@vfu2_v2
-; IS__CGSCC_NPM-SAME: (i8 [[TMP0:%.*]], i32 [[TMP1:%.*]])
-; IS__CGSCC_NPM-NEXT:  entry:
-; IS__CGSCC_NPM-NEXT:    [[U_PRIV:%.*]] = alloca [[STRUCT_MYSTR:%.*]]
-; IS__CGSCC_NPM-NEXT:    [[U_PRIV_CAST:%.*]] = bitcast %struct.MYstr* [[U_PRIV]] to i8*
-; IS__CGSCC_NPM-NEXT:    store i8 [[TMP0]], i8* [[U_PRIV_CAST]], align 1
-; IS__CGSCC_NPM-NEXT:    [[U_PRIV_0_1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
-; IS__CGSCC_NPM-NEXT:    store i32 [[TMP1]], i32* [[U_PRIV_0_1]], align 4
-; IS__CGSCC_NPM-NEXT:    [[Z:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
-; IS__CGSCC_NPM-NEXT:    store i32 99, i32* [[Z]], align 4
-; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
-; IS__CGSCC_NPM-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
-; IS__CGSCC_NPM-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 0
-; IS__CGSCC_NPM-NEXT:    [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 8
-; IS__CGSCC_NPM-NEXT:    [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
-; IS__CGSCC_NPM-NEXT:    [[TMP7:%.*]] = add i32 [[TMP6]], [[TMP3]]
-; IS__CGSCC_NPM-NEXT:    ret i32 [[TMP7]]
+; IS________NPM-LABEL: define {{[^@]+}}@vfu2_v2
+; IS________NPM-SAME: (i8 [[TMP0:%.*]], i32 [[TMP1:%.*]])
+; IS________NPM-NEXT:  entry:
+; IS________NPM-NEXT:    [[U_PRIV:%.*]] = alloca [[STRUCT_MYSTR:%.*]], align 8
+; IS________NPM-NEXT:    [[U_PRIV_CAST:%.*]] = bitcast %struct.MYstr* [[U_PRIV]] to i8*
+; IS________NPM-NEXT:    store i8 [[TMP0]], i8* [[U_PRIV_CAST]], align 1
+; IS________NPM-NEXT:    [[U_PRIV_0_1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
+; IS________NPM-NEXT:    store i32 [[TMP1]], i32* [[U_PRIV_0_1]], align 4
+; IS________NPM-NEXT:    [[Z:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
+; IS________NPM-NEXT:    store i32 99, i32* [[Z]], align 4
+; IS________NPM-NEXT:    [[TMP2:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
+; IS________NPM-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; IS________NPM-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 0
+; IS________NPM-NEXT:    [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 8
+; IS________NPM-NEXT:    [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
+; IS________NPM-NEXT:    [[TMP7:%.*]] = add i32 [[TMP6]], [[TMP3]]
+; IS________NPM-NEXT:    ret i32 [[TMP7]]
  ;
  entry:
    %z = getelementptr %struct.MYstr, %struct.MYstr* %u, i32 0, i32 1
diff --git a/llvm/test/Transforms/Attributor/value-simplify.ll b/llvm/test/Transforms/Attributor/value-simplify.ll

index 3b92a32..9155911 100644 (file)
--- a/llvm/test/Transforms/Attributor/value-simplify.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify.ll
@@ -333,7 +333,7 @@ define internal void @test_byval(%struct.X* byval %a) {
  ;
  ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test_byval
  ; IS__CGSCC_NPM-SAME: (i8* nocapture nofree readnone [[TMP0:%.*]])
-; IS__CGSCC_NPM-NEXT:    [[A_PRIV:%.*]] = alloca [[STRUCT_X:%.*]]
+; IS__CGSCC_NPM-NEXT:    [[A_PRIV:%.*]] = alloca [[STRUCT_X:%.*]], align 8
  ; IS__CGSCC_NPM-NEXT:    [[A_PRIV_CAST:%.*]] = bitcast %struct.X* [[A_PRIV]] to i8**
  ; IS__CGSCC_NPM-NEXT:    store i8* [[TMP0]], i8** [[A_PRIV_CAST]], align 8
  ; IS__CGSCC_NPM-NEXT:    [[G0:%.*]] = getelementptr [[STRUCT_X]], %struct.X* [[A_PRIV]], i32 0, i32 0
diff --git a/llvm/test/Transforms/NewGVN/pr33367.ll b/llvm/test/Transforms/NewGVN/pr33367.ll

index 6d24e1c..f4d49cd 100644 (file)
--- a/llvm/test/Transforms/NewGVN/pr33367.ll
+++ b/llvm/test/Transforms/NewGVN/pr33367.ll
@@ -9,7 +9,7 @@ declare i64 @llvm.x86.bmi.bextr.64(i64, i64) #3
  define %MNR_struct @f000316011717_2(%DS_struct* %pDS, [64 x i64]* %pCG) #2 {
  ; CHECK-LABEL: @f000316011717_2(
  ; CHECK-NEXT:  Entry:
-; CHECK-NEXT:    [[RESTART:%.*]] = alloca [[MNR_STRUCT:%.*]]
+; CHECK-NEXT:    [[RESTART:%.*]] = alloca [[MNR_STRUCT:%.*]], align 8
  ; CHECK-NEXT:    [[PCARRY:%.*]] = getelementptr [[DS_STRUCT:%.*]], %DS_struct* [[PDS:%.*]], i32 0, i32 1
  ; CHECK-NEXT:    [[PBRBASE:%.*]] = getelementptr [[DS_STRUCT]], %DS_struct* [[PDS]], i32 0, i32 0
  ; CHECK-NEXT:    [[PBASE:%.*]] = getelementptr [32 x i64*], [32 x i64*]* [[PBRBASE]], i64 0, i64 0
diff --git a/llvm/test/Transforms/SROA/alignment.ll b/llvm/test/Transforms/SROA/alignment.ll

index f411ed0..4b43619 100644 (file)
--- a/llvm/test/Transforms/SROA/alignment.ll
+++ b/llvm/test/Transforms/SROA/alignment.ll
@@ -133,8 +133,8 @@ define void @test6() {
  ; We should set the alignment on all load and store operations; make sure
  ; we choose an appropriate alignment.
  ; CHECK-LABEL: @test6(
-; CHECK: alloca double{{$}}
-; CHECK: alloca double{{$}}
+; CHECK: alloca double, align 8{{$}}
+; CHECK: alloca double, align 8{{$}}
  ; CHECK: store{{.*}}, align 8
  ; CHECK: load{{.*}}, align 8
  ; CHECK: store{{.*}}, align 8
diff --git a/llvm/test/Transforms/SROA/pointer-offset-size.ll b/llvm/test/Transforms/SROA/pointer-offset-size.ll

index c632c37..caad3c2 100644 (file)
--- a/llvm/test/Transforms/SROA/pointer-offset-size.ll
+++ b/llvm/test/Transforms/SROA/pointer-offset-size.ll
@@ -8,10 +8,10 @@ target datalayout = "e-p:64:64:64:32"
  define i16 @test(%struct.test* %ts2.i) {
  ; CHECK-LABEL: @test(
  ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[S_SROA_0:%.*]] = alloca [3 x i8], align 2
+; CHECK-NEXT:    [[S_SROA_0:%.*]] = alloca [3 x i8], align 8
  ; CHECK-NEXT:    [[S_SROA_0_0__SROA_CAST:%.*]] = bitcast %struct.test* [[TS2_I:%.*]] to i8*
  ; CHECK-NEXT:    [[S_SROA_0_0__SROA_IDX:%.*]] = getelementptr inbounds [3 x i8], [3 x i8]* [[S_SROA_0]], i32 0, i32 0
-; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[S_SROA_0_0__SROA_CAST]], i8* align 2 [[S_SROA_0_0__SROA_IDX]], i32 3, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[S_SROA_0_0__SROA_CAST]], i8* align 8 [[S_SROA_0_0__SROA_IDX]], i32 3, i1 false)
  ; CHECK-NEXT:    [[X1_I_I:%.*]] = getelementptr inbounds [[STRUCT_TEST:%.*]], %struct.test* [[TS2_I]], i32 0, i32 0, i32 0
  ; CHECK-NEXT:    [[TMP0:%.*]] = load i16, i16* [[X1_I_I]]
  ; CHECK-NEXT:    ret i16 [[TMP0]]
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td

index 3939917..9a7836c 100644 (file)
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
@@ -225,7 +225,7 @@ def LLVM_AllocaOp :
      if ($alignment.hasValue()) {
        auto align = $alignment.getValue().getZExtValue();
        if (align != 0)
-        alloca->setAlignment(llvm::MaybeAlign(align));
+        alloca->setAlignment(llvm::Align(align));
      }
      $res = alloca;
    }];
diff --git a/polly/lib/CodeGen/BlockGenerators.cpp b/polly/lib/CodeGen/BlockGenerators.cpp

index d844f87..5a64cc8 100644 (file)
--- a/polly/lib/CodeGen/BlockGenerators.cpp
+++ b/polly/lib/CodeGen/BlockGenerators.cpp
@@ -511,8 +511,9 @@ Value *BlockGenerator::getOrCreateAlloca(const ScopArrayInfo *Array) {
  
    const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout();
  
-  Addr = new AllocaInst(Ty, DL.getAllocaAddrSpace(),
-                        ScalarBase->getName() + NameExt);
+  Addr =
+      new AllocaInst(Ty, DL.getAllocaAddrSpace(), nullptr,
+                     DL.getPrefTypeAlign(Ty), ScalarBase->getName() + NameExt);
    EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock();
    Addr->insertBefore(&*EntryBB->getFirstInsertionPt());
  
diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp

index 9f7e01b..a329661 100644 (file)
--- a/polly/lib/CodeGen/IslNodeBuilder.cpp
+++ b/polly/lib/CodeGen/IslNodeBuilder.cpp
@@ -1395,8 +1395,8 @@ bool IslNodeBuilder::preloadInvariantEquivClass(
  
    BasicBlock *EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock();
    auto *Alloca = new AllocaInst(AccInstTy, DL.getAllocaAddrSpace(),
-                                AccInst->getName() + ".preload.s2a");
-  Alloca->insertBefore(&*EntryBB->getFirstInsertionPt());
+                                AccInst->getName() + ".preload.s2a",
+                                &*EntryBB->getFirstInsertionPt());
    Builder.CreateStore(PreloadVal, Alloca);
    ValueMapT PreloadedPointer;
    PreloadedPointer[PreloadVal] = AccInst;
@@ -1496,8 +1496,8 @@ void IslNodeBuilder::allocateNewArrays(BBPair StartExitBlocks) {
  
        auto *CreatedArray = new AllocaInst(NewArrayType, DL.getAllocaAddrSpace(),
                                            SAI->getName(), &*InstIt);
-      CreatedArray->setAlignment(
-          MaybeAlign(PollyTargetFirstLevelCacheLineSize));
+      if (PollyTargetFirstLevelCacheLineSize)
+        CreatedArray->setAlignment(Align(PollyTargetFirstLevelCacheLineSize));
        SAI->setBasePtr(CreatedArray);
      }
    }
author	Eli Friedman <efriedma@quicinc.com>
	Fri, 15 May 2020 20:23:14 +0000 (13:23 -0700)
committer	Eli Friedman <efriedma@quicinc.com>
	Sat, 16 May 2020 21:53:16 +0000 (14:53 -0700)
llvm/include/llvm/IR/Instructions.h		patch \| blob \| history
llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h		patch \| blob \| history
llvm/lib/AsmParser/LLParser.cpp		patch \| blob \| history
llvm/lib/Bitcode/Reader/BitcodeReader.cpp		patch \| blob \| history
llvm/lib/CodeGen/CodeGenPrepare.cpp		patch \| blob \| history
llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp		patch \| blob \| history
llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp		patch \| blob \| history
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp		patch \| blob \| history
llvm/lib/IR/Core.cpp		patch \| blob \| history
llvm/lib/IR/Instructions.cpp		patch \| blob \| history
llvm/lib/Target/AArch64/AArch64StackTagging.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp		patch \| blob \| history
llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp		patch \| blob \| history
llvm/lib/Transforms/Coroutines/CoroElide.cpp		patch \| blob \| history
llvm/lib/Transforms/Coroutines/CoroFrame.cpp		patch \| blob \| history
llvm/lib/Transforms/IPO/ArgumentPromotion.cpp		patch \| blob \| history
llvm/lib/Transforms/IPO/AttributorAttributes.cpp		patch \| blob \| history
llvm/lib/Transforms/IPO/Inliner.cpp		patch \| blob \| history
llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp		patch \| blob \| history
llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp		patch \| blob \| history
llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp		patch \| blob \| history
llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp		patch \| blob \| history
llvm/lib/Transforms/Scalar/GVNHoist.cpp		patch \| blob \| history
llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp		patch \| blob \| history
llvm/lib/Transforms/Scalar/SROA.cpp		patch \| blob \| history
llvm/lib/Transforms/Utils/Local.cpp		patch \| blob \| history
llvm/test/Assembler/alloca-addrspace-elems.ll		patch \| blob \| history
llvm/test/Assembler/alloca-addrspace0.ll		patch \| blob \| history
llvm/test/Assembler/block-labels.ll		patch \| blob \| history
llvm/test/Assembler/datalayout-alloca-addrspace-mismatch-0.ll		patch \| blob \| history
llvm/test/Assembler/datalayout-alloca-addrspace.ll		patch \| blob \| history
llvm/test/Assembler/drop-debug-info-nonzero-alloca.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/alloca.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/lower-kernargs.ll		patch \| blob \| history
llvm/test/Transforms/ArgumentPromotion/attrs.ll		patch \| blob \| history
llvm/test/Transforms/ArgumentPromotion/byval-2.ll		patch \| blob \| history
llvm/test/Transforms/ArgumentPromotion/byval.ll		patch \| blob \| history
llvm/test/Transforms/ArgumentPromotion/dbg.ll		patch \| blob \| history
llvm/test/Transforms/ArgumentPromotion/tail.ll		patch \| blob \| history
llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll		patch \| blob \| history
llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll		patch \| blob \| history
llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll		patch \| blob \| history
llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll		patch \| blob \| history
llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll		patch \| blob \| history
llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll		patch \| blob \| history
llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll		patch \| blob \| history
llvm/test/Transforms/Attributor/value-simplify.ll		patch \| blob \| history
llvm/test/Transforms/NewGVN/pr33367.ll		patch \| blob \| history
llvm/test/Transforms/SROA/alignment.ll		patch \| blob \| history
llvm/test/Transforms/SROA/pointer-offset-size.ll		patch \| blob \| history
mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td		patch \| blob \| history
polly/lib/CodeGen/BlockGenerators.cpp		patch \| blob \| history
polly/lib/CodeGen/IslNodeBuilder.cpp		patch \| blob \| history