From e1ae008085fc0ccb07c31544a07302461cd6132d Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 22 Jun 2017 22:58:12 +0000 Subject: [PATCH] Remove the LoadCombine pass. It was never enabled and is unsupported. Based on discussions with the author on mailing lists. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306067 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/InitializePasses.h | 1 - include/llvm/Transforms/IPO/PassManagerBuilder.h | 1 - include/llvm/Transforms/Scalar.h | 6 - lib/Passes/PassBuilder.cpp | 3 - lib/Transforms/IPO/PassManagerBuilder.cpp | 11 - lib/Transforms/Scalar/CMakeLists.txt | 1 - lib/Transforms/Scalar/LoadCombine.cpp | 295 --------------------- lib/Transforms/Scalar/Scalar.cpp | 1 - test/Transforms/LoadCombine/deadcode.ll | 39 --- test/Transforms/LoadCombine/load-combine-aa.ll | 63 ----- test/Transforms/LoadCombine/load-combine-assume.ll | 44 --- .../LoadCombine/load-combine-negativegep.ll | 19 -- test/Transforms/LoadCombine/load-combine.ll | 190 ------------- 13 files changed, 674 deletions(-) delete mode 100644 lib/Transforms/Scalar/LoadCombine.cpp delete mode 100644 test/Transforms/LoadCombine/deadcode.ll delete mode 100644 test/Transforms/LoadCombine/load-combine-aa.ll delete mode 100644 test/Transforms/LoadCombine/load-combine-assume.ll delete mode 100644 test/Transforms/LoadCombine/load-combine-negativegep.ll delete mode 100644 test/Transforms/LoadCombine/load-combine.ll diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index abb0aa3e3ca..a52fa3b542a 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -192,7 +192,6 @@ void initializeLiveRangeShrinkPass(PassRegistry&); void initializeLiveRegMatrixPass(PassRegistry&); void initializeLiveStacksPass(PassRegistry&); void initializeLiveVariablesPass(PassRegistry&); -void initializeLoadCombinePass(PassRegistry&); void initializeLoadStoreVectorizerPass(PassRegistry&); void initializeLoaderPassPass(PassRegistry&); void initializeLocalStackSlotPassPass(PassRegistry&); diff --git a/include/llvm/Transforms/IPO/PassManagerBuilder.h b/include/llvm/Transforms/IPO/PassManagerBuilder.h index 247382c35ee..db4bfb15f51 100644 --- a/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ b/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -149,7 +149,6 @@ public: bool SLPVectorize; bool LoopVectorize; bool RerollLoops; - bool LoadCombine; bool NewGVN; bool DisableGVNLoadPRE; bool VerifyInput; diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h index 856c288a071..1913a9d5da0 100644 --- a/include/llvm/Transforms/Scalar.h +++ b/include/llvm/Transforms/Scalar.h @@ -487,12 +487,6 @@ FunctionPass *createSpeculativeExecutionIfHasBranchDivergencePass(); //===----------------------------------------------------------------------===// // -// LoadCombine - Combine loads into bigger loads. -// -BasicBlockPass *createLoadCombinePass(); - -//===----------------------------------------------------------------------===// -// // StraightLineStrengthReduce - This pass strength-reduces some certain // instruction patterns in straight-line code. // diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp index afd66f55720..f4e0b8dcf4a 100644 --- a/lib/Passes/PassBuilder.cpp +++ b/lib/Passes/PassBuilder.cpp @@ -923,9 +923,6 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, MainFPM.add(AlignmentFromAssumptionsPass()); #endif - // FIXME: Conditionally run LoadCombine here, after it's ported - // (in case we still have this pass, given its questionable usefulness). - // FIXME: add peephole extensions to the PM here. MainFPM.addPass(InstCombinePass()); MainFPM.addPass(JumpThreadingPass()); diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 4bc64ab698f..c876123d815 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -72,10 +72,6 @@ static cl::opt RunLoopRerolling("reroll-loops", cl::Hidden, cl::desc("Run the loop rerolling pass")); -static cl::opt RunLoadCombine("combine-loads", cl::init(false), - cl::Hidden, - cl::desc("Run the load combining pass")); - static cl::opt RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass")); @@ -174,7 +170,6 @@ PassManagerBuilder::PassManagerBuilder() { SLPVectorize = RunSLPVectorization; LoopVectorize = RunLoopVectorization; RerollLoops = RunLoopRerolling; - LoadCombine = RunLoadCombine; NewGVN = RunNewGVN; DisableGVNLoadPRE = false; VerifyInput = false; @@ -407,9 +402,6 @@ void PassManagerBuilder::addFunctionSimplificationPasses( } } - if (LoadCombine) - MPM.add(createLoadCombinePass()); - MPM.add(createAggressiveDCEPass()); // Delete dead instructions MPM.add(createCFGSimplificationPass()); // Merge & remove BBs // Clean up after everything. @@ -850,9 +842,6 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // alignments. PM.add(createAlignmentFromAssumptionsPass()); - if (LoadCombine) - PM.add(createLoadCombinePass()); - // Cleanup and simplify the code after the scalar optimizations. addInstructionCombiningPass(PM); addExtensionsToPM(EP_Peephole, PM); diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index f5196cc4618..457c9427ab9 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -22,7 +22,6 @@ add_llvm_library(LLVMScalarOpts LICM.cpp LoopAccessAnalysisPrinter.cpp LoopSink.cpp - LoadCombine.cpp LoopDeletion.cpp LoopDataPrefetch.cpp LoopDistribute.cpp diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp deleted file mode 100644 index 025ba1bfedc..00000000000 --- a/lib/Transforms/Scalar/LoadCombine.cpp +++ /dev/null @@ -1,295 +0,0 @@ -//===- LoadCombine.cpp - Combine Adjacent Loads ---------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// This transformation combines adjacent loads. -/// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AliasSetTracker.h" -#include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/TargetFolder.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Scalar.h" - -using namespace llvm; - -#define DEBUG_TYPE "load-combine" - -STATISTIC(NumLoadsAnalyzed, "Number of loads analyzed for combining"); -STATISTIC(NumLoadsCombined, "Number of loads combined"); - -#define LDCOMBINE_NAME "Combine Adjacent Loads" - -namespace { -struct PointerOffsetPair { - Value *Pointer; - APInt Offset; -}; - -struct LoadPOPPair { - LoadInst *Load; - PointerOffsetPair POP; - /// \brief The new load needs to be created before the first load in IR order. - unsigned InsertOrder; -}; - -class LoadCombine : public BasicBlockPass { - LLVMContext *C; - AliasAnalysis *AA; - DominatorTree *DT; - -public: - LoadCombine() : BasicBlockPass(ID), C(nullptr), AA(nullptr) { - initializeLoadCombinePass(*PassRegistry::getPassRegistry()); - } - - using llvm::Pass::doInitialization; - bool doInitialization(Function &) override; - bool runOnBasicBlock(BasicBlock &BB) override; - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addRequired(); - AU.addRequired(); - AU.addPreserved(); - } - - StringRef getPassName() const override { return LDCOMBINE_NAME; } - static char ID; - - typedef IRBuilder BuilderTy; - -private: - BuilderTy *Builder; - - PointerOffsetPair getPointerOffsetPair(LoadInst &); - bool combineLoads(DenseMap> &); - bool aggregateLoads(SmallVectorImpl &); - bool combineLoads(SmallVectorImpl &); -}; -} - -bool LoadCombine::doInitialization(Function &F) { - DEBUG(dbgs() << "LoadCombine function: " << F.getName() << "\n"); - C = &F.getContext(); - return true; -} - -PointerOffsetPair LoadCombine::getPointerOffsetPair(LoadInst &LI) { - auto &DL = LI.getModule()->getDataLayout(); - - PointerOffsetPair POP; - POP.Pointer = LI.getPointerOperand(); - unsigned BitWidth = DL.getPointerSizeInBits(LI.getPointerAddressSpace()); - POP.Offset = APInt(BitWidth, 0); - - while (isa(POP.Pointer) || isa(POP.Pointer)) { - if (auto *GEP = dyn_cast(POP.Pointer)) { - APInt LastOffset = POP.Offset; - if (!GEP->accumulateConstantOffset(DL, POP.Offset)) { - // Can't handle GEPs with variable indices. - POP.Offset = LastOffset; - return POP; - } - POP.Pointer = GEP->getPointerOperand(); - } else if (auto *BC = dyn_cast(POP.Pointer)) { - POP.Pointer = BC->getOperand(0); - } - } - return POP; -} - -bool LoadCombine::combineLoads( - DenseMap> &LoadMap) { - bool Combined = false; - for (auto &Loads : LoadMap) { - if (Loads.second.size() < 2) - continue; - std::sort(Loads.second.begin(), Loads.second.end(), - [](const LoadPOPPair &A, const LoadPOPPair &B) { - return A.POP.Offset.slt(B.POP.Offset); - }); - if (aggregateLoads(Loads.second)) - Combined = true; - } - return Combined; -} - -/// \brief Try to aggregate loads from a sorted list of loads to be combined. -/// -/// It is guaranteed that no writes occur between any of the loads. All loads -/// have the same base pointer. There are at least two loads. -bool LoadCombine::aggregateLoads(SmallVectorImpl &Loads) { - assert(Loads.size() >= 2 && "Insufficient loads!"); - LoadInst *BaseLoad = nullptr; - SmallVector AggregateLoads; - bool Combined = false; - bool ValidPrevOffset = false; - APInt PrevOffset; - uint64_t PrevSize = 0; - for (auto &L : Loads) { - if (ValidPrevOffset == false) { - BaseLoad = L.Load; - PrevOffset = L.POP.Offset; - PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize( - L.Load->getType()); - AggregateLoads.push_back(L); - ValidPrevOffset = true; - continue; - } - if (L.Load->getAlignment() > BaseLoad->getAlignment()) - continue; - APInt PrevEnd = PrevOffset + PrevSize; - if (L.POP.Offset.sgt(PrevEnd)) { - // No other load will be combinable - if (combineLoads(AggregateLoads)) - Combined = true; - AggregateLoads.clear(); - ValidPrevOffset = false; - continue; - } - if (L.POP.Offset != PrevEnd) - // This load is offset less than the size of the last load. - // FIXME: We may want to handle this case. - continue; - PrevOffset = L.POP.Offset; - PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize( - L.Load->getType()); - AggregateLoads.push_back(L); - } - if (combineLoads(AggregateLoads)) - Combined = true; - return Combined; -} - -/// \brief Given a list of combinable load. Combine the maximum number of them. -bool LoadCombine::combineLoads(SmallVectorImpl &Loads) { - // Remove loads from the end while the size is not a power of 2. - unsigned TotalSize = 0; - for (const auto &L : Loads) - TotalSize += L.Load->getType()->getPrimitiveSizeInBits(); - while (TotalSize != 0 && !isPowerOf2_32(TotalSize)) - TotalSize -= Loads.pop_back_val().Load->getType()->getPrimitiveSizeInBits(); - if (Loads.size() < 2) - return false; - - DEBUG({ - dbgs() << "***** Combining Loads ******\n"; - for (const auto &L : Loads) { - dbgs() << L.POP.Offset << ": " << *L.Load << "\n"; - } - }); - - // Find first load. This is where we put the new load. - LoadPOPPair FirstLP; - FirstLP.InsertOrder = -1u; - for (const auto &L : Loads) - if (L.InsertOrder < FirstLP.InsertOrder) - FirstLP = L; - - unsigned AddressSpace = - FirstLP.POP.Pointer->getType()->getPointerAddressSpace(); - - Builder->SetInsertPoint(FirstLP.Load); - Value *Ptr = Builder->CreateConstGEP1_64( - Builder->CreatePointerCast(Loads[0].POP.Pointer, - Builder->getInt8PtrTy(AddressSpace)), - Loads[0].POP.Offset.getSExtValue()); - LoadInst *NewLoad = new LoadInst( - Builder->CreatePointerCast( - Ptr, PointerType::get(IntegerType::get(Ptr->getContext(), TotalSize), - Ptr->getType()->getPointerAddressSpace())), - Twine(Loads[0].Load->getName()) + ".combined", false, - Loads[0].Load->getAlignment(), FirstLP.Load); - - for (const auto &L : Loads) { - Builder->SetInsertPoint(L.Load); - Value *V = Builder->CreateExtractInteger( - L.Load->getModule()->getDataLayout(), NewLoad, - cast(L.Load->getType()), - (L.POP.Offset - Loads[0].POP.Offset).getZExtValue(), "combine.extract"); - L.Load->replaceAllUsesWith(V); - } - - NumLoadsCombined += Loads.size(); - return true; -} - -bool LoadCombine::runOnBasicBlock(BasicBlock &BB) { - if (skipBasicBlock(BB)) - return false; - - AA = &getAnalysis().getAAResults(); - DT = &getAnalysis().getDomTree(); - - // Skip analysing dead blocks (not forward reachable from function entry). - if (!DT->isReachableFromEntry(&BB)) { - DEBUG(dbgs() << "LC: skipping unreachable " << BB.getName() << - " in " << BB.getParent()->getName() << "\n"); - return false; - } - - IRBuilder TheBuilder( - BB.getContext(), TargetFolder(BB.getModule()->getDataLayout())); - Builder = &TheBuilder; - - DenseMap> LoadMap; - AliasSetTracker AST(*AA); - - bool Combined = false; - unsigned Index = 0; - for (auto &I : BB) { - if (I.mayThrow() || AST.containsUnknown(&I)) { - if (combineLoads(LoadMap)) - Combined = true; - LoadMap.clear(); - AST.clear(); - continue; - } - if (I.mayWriteToMemory()) { - AST.add(&I); - continue; - } - LoadInst *LI = dyn_cast(&I); - if (!LI) - continue; - ++NumLoadsAnalyzed; - if (!LI->isSimple() || !LI->getType()->isIntegerTy()) - continue; - auto POP = getPointerOffsetPair(*LI); - if (!POP.Pointer) - continue; - LoadMap[POP.Pointer].push_back({LI, std::move(POP), Index++}); - AST.add(LI); - } - if (combineLoads(LoadMap)) - Combined = true; - return Combined; -} - -char LoadCombine::ID = 0; - -BasicBlockPass *llvm::createLoadCombinePass() { - return new LoadCombine(); -} - -INITIALIZE_PASS_BEGIN(LoadCombine, "load-combine", LDCOMBINE_NAME, false, false) -INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(LoadCombine, "load-combine", LDCOMBINE_NAME, false, false) diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index 850a01114ee..ce6f93eb0c1 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -91,7 +91,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeSeparateConstOffsetFromGEPPass(Registry); initializeSpeculativeExecutionLegacyPassPass(Registry); initializeStraightLineStrengthReducePass(Registry); - initializeLoadCombinePass(Registry); initializePlaceBackedgeSafepointsImplPass(Registry); initializePlaceSafepointsPass(Registry); initializeFloat2IntLegacyPassPass(Registry); diff --git a/test/Transforms/LoadCombine/deadcode.ll b/test/Transforms/LoadCombine/deadcode.ll deleted file mode 100644 index ed72824ffb4..00000000000 --- a/test/Transforms/LoadCombine/deadcode.ll +++ /dev/null @@ -1,39 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -load-combine -S < %s | FileCheck %s - -; It has been detected that dead loops like the one in this test case can be -; created by -jump-threading (it was detected by a csmith generated program). -; -; According to -verify this is valid input (even if it could be discussed if -; the dead loop really satisfies SSA form). -; -; The problem found was that the -load-combine pass ends up in an infinite loop -; when analysing the 'bb1' basic block. -define void @test1() { -; CHECK-LABEL: @test1( -; CHECK-NEXT: ret void -; CHECK: bb1: -; CHECK-NEXT: [[_TMP4:%.*]] = load i16, i16* [[_TMP10:%.*]], align 1 -; CHECK-NEXT: [[_TMP10]] = getelementptr i16, i16* [[_TMP10]], i16 1 -; CHECK-NEXT: br label [[BB1:%.*]] -; CHECK: bb2: -; CHECK-NEXT: [[_TMP7:%.*]] = load i16, i16* [[_TMP12:%.*]], align 1 -; CHECK-NEXT: [[_TMP12]] = getelementptr i16, i16* [[_TMP12]], i16 1 -; CHECK-NEXT: br label [[BB2:%.*]] -; - ret void - -bb1: - %_tmp4 = load i16, i16* %_tmp10, align 1 - %_tmp10 = getelementptr i16, i16* %_tmp10, i16 1 - br label %bb1 - -; A second basic block. Running the test with -debug-pass=Executions shows -; that we only run the Dominator Tree Construction one time for each function, -; also when having multiple basic blocks in the function. -bb2: - %_tmp7 = load i16, i16* %_tmp12, align 1 - %_tmp12 = getelementptr i16, i16* %_tmp12, i16 1 - br label %bb2 - -} diff --git a/test/Transforms/LoadCombine/load-combine-aa.ll b/test/Transforms/LoadCombine/load-combine-aa.ll deleted file mode 100644 index 5a577516fb4..00000000000 --- a/test/Transforms/LoadCombine/load-combine-aa.ll +++ /dev/null @@ -1,63 +0,0 @@ -; RUN: opt -basicaa -load-combine -S < %s | FileCheck %s -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -define i64 @test1(i32* nocapture readonly noalias %a, i32* nocapture readonly noalias %b) { -; CHECK-LABEL: @test1 - -; CHECK: load i64, i64* -; CHECK: ret i64 - - %load1 = load i32, i32* %a, align 4 - %conv = zext i32 %load1 to i64 - %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 1 - store i32 %load1, i32* %b, align 4 - %load2 = load i32, i32* %arrayidx1, align 4 - %conv2 = zext i32 %load2 to i64 - %shl = shl nuw i64 %conv2, 32 - %add = or i64 %shl, %conv - ret i64 %add -} - -define i64 @test2(i32* nocapture readonly %a, i32* nocapture readonly %b) { -; CHECK-LABEL: @test2 - -; CHECK-NOT: load i64 -; CHECK: load i32, i32* -; CHECK: load i32, i32* -; CHECK: ret i64 - - %load1 = load i32, i32* %a, align 4 - %conv = zext i32 %load1 to i64 - %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 1 - store i32 %load1, i32* %b, align 4 - %load2 = load i32, i32* %arrayidx1, align 4 - %conv2 = zext i32 %load2 to i64 - %shl = shl nuw i64 %conv2, 32 - %add = or i64 %shl, %conv - ret i64 %add -} - -%rec11 = type { i16, i16, i16 } -@str = global %rec11 { i16 1, i16 2, i16 3 } - -; PR31517 - Check that loads which span an aliasing store are not combined. -define i16 @test3() { -; CHECK-LABEL: @test3 - -; CHECK-NOT: load i32 -; CHECK: load i16, i16* -; CHECK: store i16 -; CHECK: load i16, i16* -; CHECK: ret i16 - - %_tmp9 = getelementptr %rec11, %rec11* @str, i16 0, i32 1 - %_tmp10 = load i16, i16* %_tmp9 - %_tmp12 = getelementptr %rec11, %rec11* @str, i16 0, i32 0 - store i16 %_tmp10, i16* %_tmp12 - %_tmp13 = getelementptr %rec11, %rec11* @str, i16 0, i32 0 - %_tmp14 = load i16, i16* %_tmp13 - %_tmp15 = icmp eq i16 %_tmp14, 3 - %_tmp16 = select i1 %_tmp15, i16 1, i16 0 - ret i16 %_tmp16 -} diff --git a/test/Transforms/LoadCombine/load-combine-assume.ll b/test/Transforms/LoadCombine/load-combine-assume.ll deleted file mode 100644 index 2d6d160f12f..00000000000 --- a/test/Transforms/LoadCombine/load-combine-assume.ll +++ /dev/null @@ -1,44 +0,0 @@ -; RUN: opt -basicaa -load-combine -instcombine -S < %s | FileCheck %s -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -declare void @llvm.assume(i1) nounwind - -; 'load' before the 'call' gets optimized: -define i64 @test1(i32* nocapture readonly %a, i1 %b) { -; CHECK-LABEL: @test1 - -; CHECK-DAG: load i64, i64* %1, align 4 -; CHECK-DAG: tail call void @llvm.assume(i1 %b) -; CHECK: ret i64 - - %load1 = load i32, i32* %a, align 4 - %conv = zext i32 %load1 to i64 - %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 1 - %load2 = load i32, i32* %arrayidx1, align 4 - tail call void @llvm.assume(i1 %b) - %conv2 = zext i32 %load2 to i64 - %shl = shl nuw i64 %conv2, 32 - %add = or i64 %shl, %conv - ret i64 %add -} - -; 'call' before the 'load' doesn't get optimized: -define i64 @test2(i32* nocapture readonly %a, i1 %b) { -; CHECK-LABEL: @test2 - -; CHECK-DAG: load i64, i64* %1, align 4 -; CHECK-DAG: tail call void @llvm.assume(i1 %b) -; CHECK: ret i64 - - %load1 = load i32, i32* %a, align 4 - %conv = zext i32 %load1 to i64 - %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 1 - tail call void @llvm.assume(i1 %b) - %load2 = load i32, i32* %arrayidx1, align 4 - %conv2 = zext i32 %load2 to i64 - %shl = shl nuw i64 %conv2, 32 - %add = or i64 %shl, %conv - ret i64 %add -} - diff --git a/test/Transforms/LoadCombine/load-combine-negativegep.ll b/test/Transforms/LoadCombine/load-combine-negativegep.ll deleted file mode 100644 index 7c5700b4295..00000000000 --- a/test/Transforms/LoadCombine/load-combine-negativegep.ll +++ /dev/null @@ -1,19 +0,0 @@ -; RUN: opt -basicaa -load-combine -S < %s | FileCheck %s -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -define i32 @Load_NegGep(i32* %i){ - %1 = getelementptr inbounds i32, i32* %i, i64 -1 - %2 = load i32, i32* %1, align 4 - %3 = load i32, i32* %i, align 4 - %4 = add nsw i32 %3, %2 - ret i32 %4 -; CHECK-LABEL: @Load_NegGep( -; CHECK: %[[load:.*]] = load i64 -; CHECK: %[[combine_extract_lo:.*]] = trunc i64 %[[load]] to i32 -; CHECK: %[[combine_extract_shift:.*]] = lshr i64 %[[load]], 32 -; CHECK: %[[combine_extract_hi:.*]] = trunc i64 %[[combine_extract_shift]] to i32 -; CHECK: %[[add:.*]] = add nsw i32 %[[combine_extract_hi]], %[[combine_extract_lo]] -} - - diff --git a/test/Transforms/LoadCombine/load-combine.ll b/test/Transforms/LoadCombine/load-combine.ll deleted file mode 100644 index d5068787639..00000000000 --- a/test/Transforms/LoadCombine/load-combine.ll +++ /dev/null @@ -1,190 +0,0 @@ -; RUN: opt < %s -load-combine -instcombine -S | FileCheck %s - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Combine read from char* idiom. -define i64 @LoadU64_x64_0(i64* %pData) { - %1 = bitcast i64* %pData to i8* - %2 = load i8, i8* %1, align 1 - %3 = zext i8 %2 to i64 - %4 = shl nuw i64 %3, 56 - %5 = getelementptr inbounds i8, i8* %1, i64 1 - %6 = load i8, i8* %5, align 1 - %7 = zext i8 %6 to i64 - %8 = shl nuw nsw i64 %7, 48 - %9 = or i64 %8, %4 - %10 = getelementptr inbounds i8, i8* %1, i64 2 - %11 = load i8, i8* %10, align 1 - %12 = zext i8 %11 to i64 - %13 = shl nuw nsw i64 %12, 40 - %14 = or i64 %9, %13 - %15 = getelementptr inbounds i8, i8* %1, i64 3 - %16 = load i8, i8* %15, align 1 - %17 = zext i8 %16 to i64 - %18 = shl nuw nsw i64 %17, 32 - %19 = or i64 %14, %18 - %20 = getelementptr inbounds i8, i8* %1, i64 4 - %21 = load i8, i8* %20, align 1 - %22 = zext i8 %21 to i64 - %23 = shl nuw nsw i64 %22, 24 - %24 = or i64 %19, %23 - %25 = getelementptr inbounds i8, i8* %1, i64 5 - %26 = load i8, i8* %25, align 1 - %27 = zext i8 %26 to i64 - %28 = shl nuw nsw i64 %27, 16 - %29 = or i64 %24, %28 - %30 = getelementptr inbounds i8, i8* %1, i64 6 - %31 = load i8, i8* %30, align 1 - %32 = zext i8 %31 to i64 - %33 = shl nuw nsw i64 %32, 8 - %34 = or i64 %29, %33 - %35 = getelementptr inbounds i8, i8* %1, i64 7 - %36 = load i8, i8* %35, align 1 - %37 = zext i8 %36 to i64 - %38 = or i64 %34, %37 - ret i64 %38 -; CHECK-LABEL: @LoadU64_x64_0( -; CHECK: load i64, i64* %{{.*}}, align 1 -; CHECK-NOT: load -} - -; Combine simple adjacent loads. -define i32 @"2xi16_i32"(i16* %x) { - %1 = load i16, i16* %x, align 2 - %2 = getelementptr inbounds i16, i16* %x, i64 1 - %3 = load i16, i16* %2, align 2 - %4 = zext i16 %3 to i32 - %5 = shl nuw i32 %4, 16 - %6 = zext i16 %1 to i32 - %7 = or i32 %5, %6 - ret i32 %7 -; CHECK-LABEL: @"2xi16_i32"( -; CHECK: load i32, i32* %{{.*}}, align 2 -; CHECK-NOT: load -} - -; Don't combine loads across stores. -define i32 @"2xi16_i32_store"(i16* %x, i16* %y) { - %1 = load i16, i16* %x, align 2 - store i16 0, i16* %y, align 2 - %2 = getelementptr inbounds i16, i16* %x, i64 1 - %3 = load i16, i16* %2, align 2 - %4 = zext i16 %3 to i32 - %5 = shl nuw i32 %4, 16 - %6 = zext i16 %1 to i32 - %7 = or i32 %5, %6 - ret i32 %7 -; CHECK-LABEL: @"2xi16_i32_store"( -; CHECK: load i16, i16* %{{.*}}, align 2 -; CHECK: store -; CHECK: load i16, i16* %{{.*}}, align 2 -} - -; Don't combine loads with a gap. -define i32 @"2xi16_i32_gap"(i16* %x) { - %1 = load i16, i16* %x, align 2 - %2 = getelementptr inbounds i16, i16* %x, i64 2 - %3 = load i16, i16* %2, align 2 - %4 = zext i16 %3 to i32 - %5 = shl nuw i32 %4, 16 - %6 = zext i16 %1 to i32 - %7 = or i32 %5, %6 - ret i32 %7 -; CHECK-LABEL: @"2xi16_i32_gap"( -; CHECK: load i16, i16* %{{.*}}, align 2 -; CHECK: load i16, i16* %{{.*}}, align 2 -} - -; Combine out of order loads. -define i32 @"2xi16_i32_order"(i16* %x) { - %1 = getelementptr inbounds i16, i16* %x, i64 1 - %2 = load i16, i16* %1, align 2 - %3 = zext i16 %2 to i32 - %4 = load i16, i16* %x, align 2 - %5 = shl nuw i32 %3, 16 - %6 = zext i16 %4 to i32 - %7 = or i32 %5, %6 - ret i32 %7 -; CHECK-LABEL: @"2xi16_i32_order"( -; CHECK: load i32, i32* %{{.*}}, align 2 -; CHECK-NOT: load -} - -; Overlapping loads. -define i32 @"2xi16_i32_overlap"(i8* %x) { - %1 = bitcast i8* %x to i16* - %2 = load i16, i16* %1, align 2 - %3 = getelementptr inbounds i8, i8* %x, i64 1 - %4 = bitcast i8* %3 to i16* - %5 = load i16, i16* %4, align 2 - %6 = zext i16 %5 to i32 - %7 = shl nuw i32 %6, 16 - %8 = zext i16 %2 to i32 - %9 = or i32 %7, %8 - ret i32 %9 -; CHECK-LABEL: @"2xi16_i32_overlap"( -; CHECK: load i16, i16* %{{.*}}, align 2 -; CHECK: load i16, i16* %{{.*}}, align 2 -} - -; Combine valid alignments. -define i64 @"2xi16_i64_align"(i8* %x) { - %1 = bitcast i8* %x to i32* - %2 = load i32, i32* %1, align 4 - %3 = getelementptr inbounds i8, i8* %x, i64 4 - %4 = bitcast i8* %3 to i16* - %5 = load i16, i16* %4, align 2 - %6 = getelementptr inbounds i8, i8* %x, i64 6 - %7 = bitcast i8* %6 to i16* - %8 = load i16, i16* %7, align 2 - %9 = zext i16 %8 to i64 - %10 = shl nuw i64 %9, 48 - %11 = zext i16 %5 to i64 - %12 = shl nuw nsw i64 %11, 32 - %13 = zext i32 %2 to i64 - %14 = or i64 %12, %13 - %15 = or i64 %14, %10 - ret i64 %15 -; CHECK-LABEL: @"2xi16_i64_align"( -; CHECK: load i64, i64* %{{.*}}, align 4 -} - -; Non power of two. -define i64 @"2xi16_i64_npo2"(i8* %x) { - %1 = load i8, i8* %x, align 1 - %2 = zext i8 %1 to i64 - %3 = getelementptr inbounds i8, i8* %x, i64 1 - %4 = load i8, i8* %3, align 1 - %5 = zext i8 %4 to i64 - %6 = shl nuw nsw i64 %5, 8 - %7 = or i64 %6, %2 - %8 = getelementptr inbounds i8, i8* %x, i64 2 - %9 = load i8, i8* %8, align 1 - %10 = zext i8 %9 to i64 - %11 = shl nuw nsw i64 %10, 16 - %12 = or i64 %11, %7 - %13 = getelementptr inbounds i8, i8* %x, i64 3 - %14 = load i8, i8* %13, align 1 - %15 = zext i8 %14 to i64 - %16 = shl nuw nsw i64 %15, 24 - %17 = or i64 %16, %12 - %18 = getelementptr inbounds i8, i8* %x, i64 4 - %19 = load i8, i8* %18, align 1 - %20 = zext i8 %19 to i64 - %21 = shl nuw nsw i64 %20, 32 - %22 = or i64 %21, %17 - %23 = getelementptr inbounds i8, i8* %x, i64 5 - %24 = load i8, i8* %23, align 1 - %25 = zext i8 %24 to i64 - %26 = shl nuw nsw i64 %25, 40 - %27 = or i64 %26, %22 - %28 = getelementptr inbounds i8, i8* %x, i64 6 - %29 = load i8, i8* %28, align 1 - %30 = zext i8 %29 to i64 - %31 = shl nuw nsw i64 %30, 48 - %32 = or i64 %31, %27 - ret i64 %32 -; CHECK-LABEL: @"2xi16_i64_npo2"( -; CHECK: load i32, i32* %{{.*}}, align 1 -} -- 2.11.0