From 52e98f620caf29f75c6d41f51a45610c26f68c65 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sun, 17 May 2020 22:14:42 +0200 Subject: [PATCH] [Alignment] Remove unnecessary getValueOrABITypeAlignment calls (NFC) Now that load/store alignment is required, we no longer need most of them. Also switch the getLoadStoreAlignment() helper to return Align instead of MaybeAlign. --- llvm/include/llvm/IR/Instructions.h | 6 +-- llvm/lib/Analysis/Loads.cpp | 3 +- llvm/lib/Analysis/VectorUtils.cpp | 9 +--- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 2 +- .../CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 5 +- llvm/lib/Target/X86/X86FastISel.cpp | 4 +- .../Instrumentation/DataFlowSanitizer.cpp | 7 +-- .../Transforms/Scalar/AlignmentFromAssumptions.cpp | 9 +--- llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp | 35 +++--------- llvm/lib/Transforms/Scalar/SROA.cpp | 62 +++++++--------------- .../Transforms/Vectorize/LoadStoreVectorizer.cpp | 13 +---- .../Vectorize/LoopVectorizationLegality.cpp | 8 +-- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 28 ++++------ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 13 ++--- 14 files changed, 58 insertions(+), 146 deletions(-) diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index 522441be462..b2fe803ed97 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -5151,12 +5151,12 @@ inline Value *getPointerOperand(Value *V) { } /// A helper function that returns the alignment of load or store instruction. -inline MaybeAlign getLoadStoreAlignment(Value *I) { +inline Align getLoadStoreAlignment(Value *I) { assert((isa(I) || isa(I)) && "Expected Load or Store instruction"); if (auto *LI = dyn_cast(I)) - return MaybeAlign(LI->getAlignment()); - return MaybeAlign(cast(I)->getAlignment()); + return LI->getAlign(); + return cast(I)->getAlign(); } /// A helper function that returns the address space of the pointer operand of diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index 02be3693c35..bf1ede5b97c 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -210,8 +210,7 @@ bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()), DL.getTypeStoreSize(LI->getType())); - const Align Alignment = DL.getValueOrABITypeAlignment( - MaybeAlign(LI->getAlignment()), LI->getType()); + const Align Alignment = LI->getAlign(); Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI(); diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index 4c2f9006651..2276c1fd843 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -946,13 +946,8 @@ void InterleavedAccessInfo::collectConstStrideAccesses( const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); PointerType *PtrTy = cast(Ptr->getType()); uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType()); - - // An alignment of 0 means target ABI alignment. - MaybeAlign Alignment = MaybeAlign(getLoadStoreAlignment(&I)); - if (!Alignment) - Alignment = Align(DL.getABITypeAlignment(PtrTy->getElementType())); - - AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, *Alignment); + AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, + getLoadStoreAlignment(&I)); } } diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 9edc83e9c6c..18fcec55dcf 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -247,7 +247,7 @@ Align IRTranslator::getMemOpAlign(const Instruction &I) { if (const StoreInst *SI = dyn_cast(&I)) return SI->getAlign(); if (const LoadInst *LI = dyn_cast(&I)) { - return DL->getValueOrABITypeAlignment(LI->getAlign(), LI->getType()); + return LI->getAlign(); } if (const AtomicCmpXchgInst *AI = dyn_cast(&I)) { // TODO(PR27168): This instruction has no alignment attribute, but unlike diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index bac0f09cbed..cfbbbedc64f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3956,7 +3956,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { SDValue Ptr = getValue(SV); Type *Ty = I.getType(); - Align Alignment = DL->getValueOrABITypeAlignment(I.getAlign(), Ty); + Align Alignment = I.getAlign(); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); @@ -4149,8 +4149,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { SDValue Root = I.isVolatile() ? getRoot() : getMemoryRoot(); SmallVector Chains(std::min(MaxParallelChains, NumValues)); SDLoc dl = getCurSDLoc(); - Align Alignment = - DL->getValueOrABITypeAlignment(I.getAlign(), SrcV->getType()); + Align Alignment = I.getAlign(); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index 0ed58a4b4fc..5bc4edcef22 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -3930,14 +3930,12 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, const X86InstrInfo &XII = (const X86InstrInfo &)TII; unsigned Size = DL.getTypeAllocSize(LI->getType()); - Align Alignment = - DL.getValueOrABITypeAlignment(LI->getAlign(), LI->getType()); SmallVector AddrOps; AM.getFullAddress(AddrOps); MachineInstr *Result = XII.foldMemoryOperandImpl( - *FuncInfo.MF, *MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, Alignment, + *FuncInfo.MF, *MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, LI->getAlign(), /*AllowCommute=*/true); if (!Result) return false; diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index 20d29beebc9..a67107cc227 100644 --- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -1441,17 +1441,14 @@ void DFSanVisitor::visitStoreInst(StoreInst &SI) { if (Size == 0) return; - const Align Alignement = - ClPreserveAlignment ? DL.getValueOrABITypeAlignment( - SI.getAlign(), SI.getValueOperand()->getType()) - : Align(1); + const Align Alignment = ClPreserveAlignment ? SI.getAlign() : Align(1); Value* Shadow = DFSF.getShadow(SI.getValueOperand()); if (ClCombinePointerLabelsOnStore) { Value *PtrShadow = DFSF.getShadow(SI.getPointerOperand()); Shadow = DFSF.combineShadows(Shadow, PtrShadow, &SI); } - DFSF.storeShadow(SI.getPointerOperand(), Size, Alignement, Shadow, &SI); + DFSF.storeShadow(SI.getPointerOperand(), Size, Alignment, Shadow, &SI); if (ClEventCallbacks) { IRBuilder<> IRB(&SI); IRB.CreateCall(DFSF.DFS.DFSanStoreCallbackFn, Shadow); diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp index 91f1ee58395..6f596388093 100644 --- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp +++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp @@ -320,24 +320,19 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall) { WorkList.push_back(K); } - const DataLayout &DL = SE->getDataLayout(); while (!WorkList.empty()) { Instruction *J = WorkList.pop_back_val(); if (LoadInst *LI = dyn_cast(J)) { Align NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV, LI->getPointerOperand(), SE); - Align OldAlignment = - DL.getValueOrABITypeAlignment(LI->getAlign(), LI->getType()); - if (NewAlignment > OldAlignment) { + if (NewAlignment > LI->getAlign()) { LI->setAlignment(NewAlignment); ++NumLoadAlignChanged; } } else if (StoreInst *SI = dyn_cast(J)) { Align NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV, SI->getPointerOperand(), SE); - Align OldAlignment = DL.getValueOrABITypeAlignment( - SI->getAlign(), SI->getOperand(0)->getType()); - if (NewAlignment > OldAlignment) { + if (NewAlignment > SI->getAlign()) { SI->setAlignment(NewAlignment); ++NumStoreAlignChanged; } diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index ff02926aa53..33dd0de3b67 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -143,23 +143,6 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const { return TheStores.size() > NumPointerStores+NumByteStores; } - -static Align findStoreAlignment(const DataLayout &DL, const StoreInst *SI) { - return DL.getValueOrABITypeAlignment(SI->getAlign(), - SI->getOperand(0)->getType()); -} - -static Align findLoadAlignment(const DataLayout &DL, const LoadInst *LI) { - return DL.getValueOrABITypeAlignment(LI->getAlign(), LI->getType()); -} - -static Align findCommonAlignment(const DataLayout &DL, const StoreInst *SI, - const LoadInst *LI) { - Align StoreAlign = findStoreAlignment(DL, SI); - Align LoadAlign = findLoadAlignment(DL, LI); - return commonAlignment(StoreAlign, LoadAlign); -} - namespace { class MemsetRanges { @@ -190,7 +173,7 @@ public: int64_t StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType()); addRange(OffsetFromFirst, StoreSize, SI->getPointerOperand(), - findStoreAlignment(DL, SI).value(), SI); + SI->getAlign().value(), SI); } void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) { @@ -579,12 +562,12 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { Instruction *M; if (UseMemMove) M = Builder.CreateMemMove( - SI->getPointerOperand(), findStoreAlignment(DL, SI), - LI->getPointerOperand(), findLoadAlignment(DL, LI), Size); + SI->getPointerOperand(), SI->getAlign(), + LI->getPointerOperand(), LI->getAlign(), Size); else M = Builder.CreateMemCpy( - SI->getPointerOperand(), findStoreAlignment(DL, SI), - LI->getPointerOperand(), findLoadAlignment(DL, LI), Size); + SI->getPointerOperand(), SI->getAlign(), + LI->getPointerOperand(), LI->getAlign(), Size); LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => " << *M << "\n"); @@ -636,7 +619,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { LI, SI->getPointerOperand()->stripPointerCasts(), LI->getPointerOperand()->stripPointerCasts(), DL.getTypeStoreSize(SI->getOperand(0)->getType()), - findCommonAlignment(DL, SI, LI), C); + commonAlignment(SI->getAlign(), LI->getAlign()), C); if (changed) { MD->removeInstruction(SI); SI->eraseFromParent(); @@ -669,11 +652,9 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { auto *T = V->getType(); if (T->isAggregateType()) { uint64_t Size = DL.getTypeStoreSize(T); - const Align MA = - DL.getValueOrABITypeAlignment(MaybeAlign(SI->getAlignment()), T); IRBuilder<> Builder(SI); - auto *M = - Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size, MA); + auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size, + SI->getAlign()); LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n"); diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 01b706332e7..00e16f8030e 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -1267,7 +1267,6 @@ static void speculatePHINodeLoads(PHINode &PN) { LoadInst *SomeLoad = cast(PN.user_back()); Type *LoadTy = SomeLoad->getType(); - const DataLayout &DL = PN.getModule()->getDataLayout(); IRBuilderTy PHIBuilder(&PN); PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(), PN.getName() + ".sroa.speculated"); @@ -1276,8 +1275,7 @@ static void speculatePHINodeLoads(PHINode &PN) { // matter which one we get and if any differ. AAMDNodes AATags; SomeLoad->getAAMetadata(AATags); - Align Alignment = - DL.getValueOrABITypeAlignment(SomeLoad->getAlign(), SomeLoad->getType()); + Align Alignment = SomeLoad->getAlign(); // Rewrite all loads of the PN to use the new PHI. while (!PN.use_empty()) { @@ -1304,11 +1302,10 @@ static void speculatePHINodeLoads(PHINode &PN) { Instruction *TI = Pred->getTerminator(); IRBuilderTy PredBuilder(TI); - LoadInst *Load = PredBuilder.CreateLoad( - LoadTy, InVal, + LoadInst *Load = PredBuilder.CreateAlignedLoad( + LoadTy, InVal, Alignment, (PN.getName() + ".sroa.speculate.load." + Pred->getName())); ++NumLoadsSpeculated; - Load->setAlignment(Alignment); if (AATags) Load->setAAMetadata(AATags); NewPN->addIncoming(Load, Pred); @@ -1688,20 +1685,8 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, } /// Compute the adjusted alignment for a load or store from an offset. -static Align getAdjustedAlignment(Instruction *I, uint64_t Offset, - const DataLayout &DL) { - MaybeAlign Alignment; - Type *Ty; - if (auto *LI = dyn_cast(I)) { - Alignment = MaybeAlign(LI->getAlignment()); - Ty = LI->getType(); - } else if (auto *SI = dyn_cast(I)) { - Alignment = MaybeAlign(SI->getAlignment()); - Ty = SI->getValueOperand()->getType(); - } else { - llvm_unreachable("Only loads and stores are allowed!"); - } - return commonAlignment(DL.getValueOrABITypeAlignment(Alignment, Ty), Offset); +static Align getAdjustedAlignment(Instruction *I, uint64_t Offset) { + return commonAlignment(getLoadStoreAlignment(I), Offset); } /// Test whether we can convert a value from the old to the new type. @@ -2448,9 +2433,8 @@ private: /// You can optionally pass a type to this routine and if that type's ABI /// alignment is itself suitable, this will return zero. Align getSliceAlign() { - Align NewAIAlign = DL.getValueOrABITypeAlignment( - MaybeAlign(NewAI.getAlignment()), NewAI.getAllocatedType()); - return commonAlignment(NewAIAlign, NewBeginOffset - NewAllocaBeginOffset); + return commonAlignment(NewAI.getAlign(), + NewBeginOffset - NewAllocaBeginOffset); } unsigned getIndex(uint64_t Offset) { @@ -3139,17 +3123,12 @@ private: Instruction *I = Uses.pop_back_val(); if (LoadInst *LI = dyn_cast(I)) { - Align LoadAlign = - DL.getValueOrABITypeAlignment(LI->getAlign(), LI->getType()); - LI->setAlignment(std::min(LoadAlign, getSliceAlign())); + LI->setAlignment(std::min(LI->getAlign(), getSliceAlign())); continue; } if (StoreInst *SI = dyn_cast(I)) { - Value *Op = SI->getOperand(0); - Align StoreAlign = DL.getValueOrABITypeAlignment( - MaybeAlign(SI->getAlignment()), Op->getType()); - SI->setAlignment(std::min(StoreAlign, getSliceAlign())); - continue; + SI->setAlignment(std::min(SI->getAlign(), getSliceAlign())); + continue; } assert(isa(I) || isa(I) || @@ -3399,7 +3378,7 @@ private: AAMDNodes AATags; LI.getAAMetadata(AATags); LoadOpSplitter Splitter(&LI, *U, LI.getType(), AATags, - getAdjustedAlignment(&LI, 0, DL), DL); + getAdjustedAlignment(&LI, 0), DL); Value *V = UndefValue::get(LI.getType()); Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca"); LI.replaceAllUsesWith(V); @@ -3446,7 +3425,7 @@ private: AAMDNodes AATags; SI.getAAMetadata(AATags); StoreOpSplitter Splitter(&SI, *U, V->getType(), AATags, - getAdjustedAlignment(&SI, 0, DL), DL); + getAdjustedAlignment(&SI, 0), DL); Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca"); SI.eraseFromParent(); return true; @@ -3895,7 +3874,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { getAdjustedPtr(IRB, DL, BasePtr, APInt(DL.getIndexSizeInBits(AS), PartOffset), PartPtrTy, BasePtr->getName() + "."), - getAdjustedAlignment(LI, PartOffset, DL), + getAdjustedAlignment(LI, PartOffset), /*IsVolatile*/ false, LI->getName()); PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); @@ -3953,7 +3932,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { getAdjustedPtr(IRB, DL, StoreBasePtr, APInt(DL.getIndexSizeInBits(AS), PartOffset), PartPtrTy, StoreBasePtr->getName() + "."), - getAdjustedAlignment(SI, PartOffset, DL), + getAdjustedAlignment(SI, PartOffset), /*IsVolatile*/ false); PStore->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); @@ -4038,7 +4017,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { getAdjustedPtr(IRB, DL, LoadBasePtr, APInt(DL.getIndexSizeInBits(AS), PartOffset), LoadPartPtrTy, LoadBasePtr->getName() + "."), - getAdjustedAlignment(LI, PartOffset, DL), + getAdjustedAlignment(LI, PartOffset), /*IsVolatile*/ false, LI->getName()); } @@ -4050,7 +4029,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { getAdjustedPtr(IRB, DL, StoreBasePtr, APInt(DL.getIndexSizeInBits(AS), PartOffset), StorePartPtrTy, StoreBasePtr->getName() + "."), - getAdjustedAlignment(SI, PartOffset, DL), + getAdjustedAlignment(SI, PartOffset), /*IsVolatile*/ false); // Now build a new slice for the alloca. @@ -4186,13 +4165,8 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, // FIXME: We might want to defer PHI speculation until after here. // FIXME: return nullptr; } else { - // If alignment is unspecified we fallback on the one required by the ABI - // for this type. We also make sure the alignment is compatible with - // P.beginOffset(). - const Align Alignment = commonAlignment( - DL.getValueOrABITypeAlignment(MaybeAlign(AI.getAlignment()), - AI.getAllocatedType()), - P.beginOffset()); + // Make sure the alignment is compatible with P.beginOffset(). + const Align Alignment = commonAlignment(AI.getAlign(), P.beginOffset()); // If we will get at least this much alignment from the type alone, leave // the alloca's alignment unconstrained. const bool IsUnconstrained = Alignment <= DL.getABITypeAlignment(SliceTy); diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp index bdde44f6af2..9915e27c17b 100644 --- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -128,15 +128,6 @@ public: private: unsigned getPointerAddressSpace(Value *I); - Align getAlign(LoadInst *LI) const { - return DL.getValueOrABITypeAlignment(LI->getAlign(), LI->getType()); - } - - Align getAlign(StoreInst *SI) const { - return DL.getValueOrABITypeAlignment(SI->getAlign(), - SI->getValueOperand()->getType()); - } - static const unsigned MaxDepth = 3; bool isConsecutiveAccess(Value *A, Value *B); @@ -950,7 +941,7 @@ bool Vectorizer::vectorizeStoreChain( unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS); unsigned VF = VecRegSize / Sz; unsigned ChainSize = Chain.size(); - Align Alignment = getAlign(S0); + Align Alignment = S0->getAlign(); if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) { InstructionsProcessed->insert(Chain.begin(), Chain.end()); @@ -1103,7 +1094,7 @@ bool Vectorizer::vectorizeLoadChain( unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS); unsigned VF = VecRegSize / Sz; unsigned ChainSize = Chain.size(); - Align Alignment = getAlign(L0); + Align Alignment = L0->getAlign(); if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) { InstructionsProcessed->insert(Chain.begin(), Chain.end()); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 10a96f474be..bf19405cd4e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -769,9 +769,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // Arbitrarily try a vector of 2 elements. Type *VecTy = VectorType::get(T, /*NumElements=*/2); assert(VecTy && "did not find vectorized version of stored type"); - const MaybeAlign Alignment = getLoadStoreAlignment(ST); - assert(Alignment && "Alignment should be set"); - if (!TTI->isLegalNTStore(VecTy, *Alignment)) { + if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) { reportVectorizationFailure( "nontemporal store instruction cannot be vectorized", "nontemporal store instruction cannot be vectorized", @@ -786,9 +784,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // supported on the target (arbitrarily try a vector of 2 elements). Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2); assert(VecTy && "did not find vectorized version of load type"); - const MaybeAlign Alignment = getLoadStoreAlignment(LD); - assert(Alignment && "Alignment should be set"); - if (!TTI->isLegalNTLoad(VecTy, *Alignment)) { + if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) { reportVectorizationFailure( "nontemporal load instruction cannot be vectorized", "nontemporal load instruction cannot be vectorized", diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index b442f1ef097..247211f3607 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1233,7 +1233,7 @@ public: if (!LI && !SI) return false; auto *Ty = getMemInstValueType(V); - MaybeAlign Align = getLoadStoreAlignment(V); + Align Align = getLoadStoreAlignment(V); return (LI && isLegalMaskedGather(Ty, Align)) || (SI && isLegalMaskedScatter(Ty, Align)); } @@ -2383,11 +2383,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr, Type *ScalarDataTy = getMemInstValueType(Instr); Type *DataTy = VectorType::get(ScalarDataTy, VF); - // An alignment of 0 means target abi alignment. We need to use the scalar's - // target abi alignment in such a case. - const DataLayout &DL = Instr->getModule()->getDataLayout(); - const Align Alignment = - DL.getValueOrABITypeAlignment(getLoadStoreAlignment(Instr), ScalarDataTy); + const Align Alignment = getLoadStoreAlignment(Instr); // Determine if the pointer operand of the access is either consecutive or // reverse consecutive. @@ -4650,7 +4646,7 @@ bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I, unsigne "Widening decision should be ready at this moment"); return WideningDecision == CM_Scalarize; } - const MaybeAlign Alignment = getLoadStoreAlignment(I); + const Align Alignment = getLoadStoreAlignment(I); return isa(I) ? !(isLegalMaskedLoad(Ty, Ptr, Alignment) || isLegalMaskedGather(Ty, Alignment)) : !(isLegalMaskedStore(Ty, Ptr, Alignment) || @@ -4697,7 +4693,7 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(Instruction *I, "Masked interleave-groups for predicated accesses are not enabled."); auto *Ty = getMemInstValueType(I); - const MaybeAlign Alignment = getLoadStoreAlignment(I); + const Align Alignment = getLoadStoreAlignment(I); return isa(I) ? TTI.isLegalMaskedLoad(Ty, Alignment) : TTI.isLegalMaskedStore(Ty, Alignment); } @@ -5845,7 +5841,7 @@ unsigned LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I, // Don't pass *I here, since it is scalar but will actually be part of a // vectorized loop where the user of it is a vectorized instruction. - const MaybeAlign Alignment = getLoadStoreAlignment(I); + const Align Alignment = getLoadStoreAlignment(I); Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), Alignment, AS, TTI::TCK_RecipThroughput); @@ -5880,12 +5876,11 @@ unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I, assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) && "Stride should be 1 or -1 for consecutive memory access"); - const MaybeAlign Alignment = getLoadStoreAlignment(I); + const Align Alignment = getLoadStoreAlignment(I); unsigned Cost = 0; if (Legal->isMaskRequired(I)) Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, - Alignment ? Alignment->value() : 0, AS, - CostKind); + Alignment.value(), AS, CostKind); else Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS, CostKind, I); @@ -5900,7 +5895,7 @@ unsigned LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I, unsigned VF) { Type *ValTy = getMemInstValueType(I); auto *VectorTy = cast(ToVectorTy(ValTy, VF)); - const MaybeAlign Alignment = getLoadStoreAlignment(I); + const Align Alignment = getLoadStoreAlignment(I); unsigned AS = getLoadStoreAddressSpace(I); enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; if (isa(I)) { @@ -5925,13 +5920,12 @@ unsigned LoopVectorizationCostModel::getGatherScatterCost(Instruction *I, unsigned VF) { Type *ValTy = getMemInstValueType(I); auto *VectorTy = cast(ToVectorTy(ValTy, VF)); - const MaybeAlign Alignment = getLoadStoreAlignment(I); + const Align Alignment = getLoadStoreAlignment(I); Value *Ptr = getLoadStorePointerOperand(I); return TTI.getAddressComputationCost(VectorTy) + TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr, - Legal->isMaskRequired(I), - Alignment ? Alignment->value() : 0, + Legal->isMaskRequired(I), Alignment.value(), TargetTransformInfo::TCK_RecipThroughput, I); } @@ -5981,7 +5975,7 @@ unsigned LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I, // moment. if (VF == 1) { Type *ValTy = getMemInstValueType(I); - const MaybeAlign Alignment = getLoadStoreAlignment(I); + const Align Alignment = getLoadStoreAlignment(I); unsigned AS = getLoadStoreAddressSpace(I); return TTI.getAddressComputationCost(ValTy) + diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 828ed6265b4..2d0a95736e1 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -4401,7 +4401,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { setInsertPointAfterBundle(E); LoadInst *LI = cast(VL0); - Type *ScalarLoadTy = LI->getType(); unsigned AS = LI->getPointerAddressSpace(); Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(), @@ -4414,9 +4413,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { if (getTreeEntry(PO)) ExternalUses.push_back(ExternalUser(PO, cast(VecPtr), 0)); - Align Alignment = DL->getValueOrABITypeAlignment(LI->getAlign(), - ScalarLoadTy); - LI = Builder.CreateAlignedLoad(VecTy, VecPtr, Alignment); + LI = Builder.CreateAlignedLoad(VecTy, VecPtr, LI->getAlign()); Value *V = propagateMetadata(LI, E->Scalars); if (IsReorder) { SmallVector Mask; @@ -4437,7 +4434,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { bool IsReorder = !E->ReorderIndices.empty(); auto *SI = cast( IsReorder ? E->Scalars[E->ReorderIndices.front()] : VL0); - unsigned Alignment = SI->getAlignment(); unsigned AS = SI->getPointerAddressSpace(); setInsertPointAfterBundle(E); @@ -4453,7 +4449,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Value *ScalarPtr = SI->getPointerOperand(); Value *VecPtr = Builder.CreateBitCast( ScalarPtr, VecValue->getType()->getPointerTo(AS)); - StoreInst *ST = Builder.CreateStore(VecValue, VecPtr); + StoreInst *ST = Builder.CreateAlignedStore(VecValue, VecPtr, + SI->getAlign()); // The pointer operand uses an in-tree scalar, so add the new BitCast to // ExternalUses to make sure that an extract will be generated in the @@ -4461,10 +4458,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { if (getTreeEntry(ScalarPtr)) ExternalUses.push_back(ExternalUser(ScalarPtr, cast(VecPtr), 0)); - if (!Alignment) - Alignment = DL->getABITypeAlignment(SI->getValueOperand()->getType()); - - ST->setAlignment(Align(Alignment)); Value *V = propagateMetadata(ST, E->Scalars); if (NeedToShuffleReuses) { V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), -- 2.11.0