[SVE] Eliminate calls to default-false VectorType::get() from Vectorize

author Christopher Tetreault <ctetreau@quicinc.com>

Fri, 29 May 2020 17:06:26 +0000 (10:06 -0700)

committer Christopher Tetreault <ctetreau@quicinc.com>

Fri, 29 May 2020 18:31:24 +0000 (11:31 -0700)
author Christopher Tetreault <ctetreau@quicinc.com>
Fri, 29 May 2020 17:06:26 +0000 (10:06 -0700)
committer Christopher Tetreault <ctetreau@quicinc.com>
Fri, 29 May 2020 18:31:24 +0000 (11:31 -0700)
diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

index c02b8f8..4885dd4 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -1030,10 +1030,10 @@ bool Vectorizer::vectorizeStoreChain(
    VectorType *VecTy;
    VectorType *VecStoreTy = dyn_cast<VectorType>(StoreTy);
    if (VecStoreTy)
-    VecTy = VectorType::get(StoreTy->getScalarType(),
-                            Chain.size() * VecStoreTy->getNumElements());
+    VecTy = FixedVectorType::get(StoreTy->getScalarType(),
+                                 Chain.size() * VecStoreTy->getNumElements());
    else
-    VecTy = VectorType::get(StoreTy, Chain.size());
+    VecTy = FixedVectorType::get(StoreTy, Chain.size());
  
    // If it's more than the max vector size or the target has a better
    // vector factor, break it into two pieces.
@@ -1182,10 +1182,10 @@ bool Vectorizer::vectorizeLoadChain(
    VectorType *VecTy;
    VectorType *VecLoadTy = dyn_cast<VectorType>(LoadTy);
    if (VecLoadTy)
-    VecTy = VectorType::get(LoadTy->getScalarType(),
-                            Chain.size() * VecLoadTy->getNumElements());
+    VecTy = FixedVectorType::get(LoadTy->getScalarType(),
+                                 Chain.size() * VecLoadTy->getNumElements());
    else
-    VecTy = VectorType::get(LoadTy, Chain.size());
+    VecTy = FixedVectorType::get(LoadTy, Chain.size());
  
    // If it's more than the max vector size or the target has a better
    // vector factor, break it into two pieces.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

index bf19405..eb8709a 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -767,7 +767,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
          // supported on the target.
          if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
            // Arbitrarily try a vector of 2 elements.
-          Type *VecTy = VectorType::get(T, /*NumElements=*/2);
+          auto *VecTy = FixedVectorType::get(T, /*NumElements=*/2);
            assert(VecTy && "did not find vectorized version of stored type");
            if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
              reportVectorizationFailure(
@@ -782,7 +782,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
          if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
            // For nontemporal loads, check that a nontemporal vector version is
            // supported on the target (arbitrarily try a vector of 2 elements).
-          Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2);
+          auto *VecTy = FixedVectorType::get(I.getType(), /*NumElements=*/2);
            assert(VecTy && "did not find vectorized version of load type");
            if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
              reportVectorizationFailure(
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

index 8d52ddc..5e5f029 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -311,7 +311,7 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL, unsigned VF) {
    // Determine if an array of VF elements of type Ty is "bitcast compatible"
    // with a <VF x Ty> vector.
    if (VF > 1) {
-    auto *VectorTy = VectorType::get(Ty, VF);
+    auto *VectorTy = FixedVectorType::get(Ty, VF);
      return VF * DL.getTypeAllocSize(Ty) != DL.getTypeStoreSize(VectorTy);
    }
  
@@ -2074,7 +2074,7 @@ Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) {
        VectorLoopValueMap.setVectorValue(V, Part, VectorValue);
      } else {
        // Initialize packing with insertelements to start from undef.
-      Value *Undef = UndefValue::get(VectorType::get(V->getType(), VF));
+      Value *Undef = UndefValue::get(FixedVectorType::get(V->getType(), VF));
        VectorLoopValueMap.setVectorValue(V, Part, Undef);
        for (unsigned Lane = 0; Lane < VF; ++Lane)
          packScalarIntoVectorValue(V, {Part, Lane});
@@ -2196,7 +2196,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
    // Prepare for the vector type of the interleaved load/store.
    Type *ScalarTy = getMemInstValueType(Instr);
    unsigned InterleaveFactor = Group->getFactor();
-  Type *VecTy = VectorType::get(ScalarTy, InterleaveFactor * VF);
+  auto *VecTy = FixedVectorType::get(ScalarTy, InterleaveFactor * VF);
  
    // Prepare for the new pointers.
    SmallVector<Value *, 2> AddrParts;
@@ -2300,7 +2300,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
  
          // If this member has different type, cast the result type.
          if (Member->getType() != ScalarTy) {
-          VectorType *OtherVTy = VectorType::get(Member->getType(), VF);
+          VectorType *OtherVTy = FixedVectorType::get(Member->getType(), VF);
            StridedVec = createBitOrPointerCast(StridedVec, OtherVTy, DL);
          }
  
@@ -2314,7 +2314,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
    }
  
    // The sub vector type for current instruction.
-  VectorType *SubVT = VectorType::get(ScalarTy, VF);
+  auto *SubVT = FixedVectorType::get(ScalarTy, VF);
  
    // Vectorize the interleaved store group.
    for (unsigned Part = 0; Part < UF; Part++) {
@@ -2385,7 +2385,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
           "CM decision is not to widen the memory instruction");
  
    Type *ScalarDataTy = getMemInstValueType(Instr);
-  Type *DataTy = VectorType::get(ScalarDataTy, VF);
+  auto *DataTy = FixedVectorType::get(ScalarDataTy, VF);
    const Align Alignment = getLoadStoreAlignment(Instr);
  
    // Determine if the pointer operand of the access is either consecutive or
@@ -2688,7 +2688,7 @@ Value *InnerLoopVectorizer::createBitOrPointerCast(Value *V, VectorType *DstVTy,
           "Only one type should be a floating point type");
    Type *IntTy =
        IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy));
-  VectorType *VecIntTy = VectorType::get(IntTy, VF);
+  auto *VecIntTy = FixedVectorType::get(IntTy, VF);
    Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);
    return Builder.CreateBitOrPointerCast(CastVal, DstVTy);
  }
@@ -3359,7 +3359,7 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
        Type *OriginalTy = I->getType();
        Type *ScalarTruncatedTy =
            IntegerType::get(OriginalTy->getContext(), KV.second);
-      Type *TruncatedTy = VectorType::get(
+      auto *TruncatedTy = FixedVectorType::get(
            ScalarTruncatedTy, cast<VectorType>(OriginalTy)->getNumElements());
        if (TruncatedTy == OriginalTy)
          continue;
@@ -3413,11 +3413,13 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
          auto Elements0 =
              cast<VectorType>(SI->getOperand(0)->getType())->getNumElements();
          auto *O0 = B.CreateZExtOrTrunc(
-            SI->getOperand(0), VectorType::get(ScalarTruncatedTy, Elements0));
+            SI->getOperand(0),
+            FixedVectorType::get(ScalarTruncatedTy, Elements0));
          auto Elements1 =
              cast<VectorType>(SI->getOperand(1)->getType())->getNumElements();
          auto *O1 = B.CreateZExtOrTrunc(
-            SI->getOperand(1), VectorType::get(ScalarTruncatedTy, Elements1));
+            SI->getOperand(1),
+            FixedVectorType::get(ScalarTruncatedTy, Elements1));
  
          NewI = B.CreateShuffleVector(O0, O1, SI->getShuffleMask());
        } else if (isa<LoadInst>(I) || isa<PHINode>(I)) {
@@ -3427,14 +3429,16 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
          auto Elements =
              cast<VectorType>(IE->getOperand(0)->getType())->getNumElements();
          auto *O0 = B.CreateZExtOrTrunc(
-            IE->getOperand(0), VectorType::get(ScalarTruncatedTy, Elements));
+            IE->getOperand(0),
+            FixedVectorType::get(ScalarTruncatedTy, Elements));
          auto *O1 = B.CreateZExtOrTrunc(IE->getOperand(1), ScalarTruncatedTy);
          NewI = B.CreateInsertElement(O0, O1, IE->getOperand(2));
        } else if (auto *EE = dyn_cast<ExtractElementInst>(I)) {
          auto Elements =
              cast<VectorType>(EE->getOperand(0)->getType())->getNumElements();
          auto *O0 = B.CreateZExtOrTrunc(
-            EE->getOperand(0), VectorType::get(ScalarTruncatedTy, Elements));
+            EE->getOperand(0),
+            FixedVectorType::get(ScalarTruncatedTy, Elements));
          NewI = B.CreateExtractElement(O0, EE->getOperand(2));
        } else {
          // If we don't know what to do, be conservative and don't do anything.
@@ -3598,8 +3602,8 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
    if (VF > 1) {
      Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
      VectorInit = Builder.CreateInsertElement(
-        UndefValue::get(VectorType::get(VectorInit->getType(), VF)), VectorInit,
-        Builder.getInt32(VF - 1), "vector.recur.init");
+        UndefValue::get(FixedVectorType::get(VectorInit->getType(), VF)),
+        VectorInit, Builder.getInt32(VF - 1), "vector.recur.init");
    }
  
    // We constructed a temporary phi node in the first phase of vectorization.
@@ -3821,7 +3825,7 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) {
    // then extend the loop exit value to enable InstCombine to evaluate the
    // entire expression in the smaller type.
    if (VF > 1 && Phi->getType() != RdxDesc.getRecurrenceType()) {
-    Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF);
+    Type *RdxVecTy = FixedVectorType::get(RdxDesc.getRecurrenceType(), VF);
      Builder.SetInsertPoint(
          LI->getLoopFor(LoopVectorBody)->getLoopLatch()->getTerminator());
      VectorParts RdxParts(UF);
@@ -4148,7 +4152,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
      // Create a vector phi with no operands - the vector phi operands will be
      // set at the end of vector code generation.
      Type *VecTy =
-        (VF == 1) ? PN->getType() : VectorType::get(PN->getType(), VF);
+        (VF == 1) ? PN->getType() : FixedVectorType::get(PN->getType(), VF);
      Value *VecPhi = Builder.CreatePHI(VecTy, PN->getNumOperands(), "vec.phi");
      VectorLoopValueMap.setVectorValue(P, 0, VecPhi);
      OrigPHIsToFix.push_back(P);
@@ -4167,7 +4171,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
      for (unsigned Part = 0; Part < UF; ++Part) {
        // This is phase one of vectorizing PHIs.
        Type *VecTy =
-          (VF == 1) ? PN->getType() : VectorType::get(PN->getType(), VF);
+          (VF == 1) ? PN->getType() : FixedVectorType::get(PN->getType(), VF);
        Value *EntryPart = PHINode::Create(
            VecTy, 2, "vec.phi", &*LoopVectorBody->getFirstInsertionPt());
        VectorLoopValueMap.setVectorValue(P, Part, EntryPart);
@@ -4327,7 +4331,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I, VPUser &User,
  
      /// Vectorize casts.
      Type *DestTy =
-        (VF == 1) ? CI->getType() : VectorType::get(CI->getType(), VF);
+        (VF == 1) ? CI->getType() : FixedVectorType::get(CI->getType(), VF);
  
      for (unsigned Part = 0; Part < UF; ++Part) {
        Value *A = State.get(User.getOperand(0), Part);
@@ -4387,7 +4391,8 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPUser &ArgOperands,
        // Use vector version of the intrinsic.
        Type *TysForDecl[] = {CI->getType()};
        if (VF > 1)
-        TysForDecl[0] = VectorType::get(CI->getType()->getScalarType(), VF);
+        TysForDecl[0] =
+            FixedVectorType::get(CI->getType()->getScalarType(), VF);
        VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl);
        assert(VectorF && "Can't retrieve vector intrinsic.");
      } else {
@@ -5947,7 +5952,7 @@ unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
    assert(Group && "Fail to get an interleaved access group.");
  
    unsigned InterleaveFactor = Group->getFactor();
-  VectorType *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor);
+  auto *WideVecTy = FixedVectorType::get(ValTy, VF * InterleaveFactor);
  
    // Holds the indices of existing members in an interleaved load group.
    // An interleaved store group doesn't need this as it doesn't allow gaps.
@@ -6349,7 +6354,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
      bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
      Type *CondTy = SI->getCondition()->getType();
      if (!ScalarCond)
-      CondTy = VectorType::get(CondTy, VF);
+      CondTy = FixedVectorType::get(CondTy, VF);
  
      return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy,
                                    CostKind, I);
@@ -7510,8 +7515,8 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
      if (AlsoPack && State.VF > 1) {
        // If we're constructing lane 0, initialize to start from undef.
        if (State.Instance->Lane == 0) {
-        Value *Undef =
-            UndefValue::get(VectorType::get(Ingredient->getType(), State.VF));
+        Value *Undef = UndefValue::get(
+            FixedVectorType::get(Ingredient->getType(), State.VF));
          State.ValueMap.setVectorValue(Ingredient, State.Instance->Part, Undef);
        }
        State.ILV->packScalarIntoVectorValue(Ingredient, *State.Instance);
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

index 1657b9e..4c18fab 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3165,7 +3165,7 @@ unsigned BoUpSLP::canMapToVector(Type *T, const DataLayout &DL) const {
  
    if (!isValidElementType(EltTy))
      return 0;
-  uint64_t VTSize = DL.getTypeStoreSizeInBits(VectorType::get(EltTy, N));
+  uint64_t VTSize = DL.getTypeStoreSizeInBits(FixedVectorType::get(EltTy, N));
    if (VTSize < MinVecRegSize || VTSize > MaxVecRegSize || VTSize != DL.getTypeStoreSizeInBits(T))
      return 0;
    return N;
@@ -3265,7 +3265,7 @@ getVectorCallCosts(CallInst *CI, VectorType *VecTy, TargetTransformInfo *TTI,
      SmallVector<Type *, 4> VecTys;
      for (Use &Arg : CI->args())
        VecTys.push_back(
-          VectorType::get(Arg->getType(), VecTy->getNumElements()));
+          FixedVectorType::get(Arg->getType(), VecTy->getNumElements()));
  
      // If the corresponding vector call is cheaper, return its cost.
      LibCost = TTI->getCallInstrCost(nullptr, VecTy, VecTys,
@@ -3425,7 +3425,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
        // Calculate the cost of this instruction.
        int ScalarCost = VL.size() * ScalarEltCost;
  
-      VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size());
+      auto *SrcVecTy = FixedVectorType::get(SrcTy, VL.size());
        int VecCost = 0;
        // Check if the values are candidates to demote.
        if (!MinBWs.count(VL0) || VecTy != SrcVecTy) {
@@ -3445,7 +3445,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
        if (NeedToShuffleReuses) {
          ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
        }
-      VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size());
+      auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size());
        int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
        int VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy,
                                              CostKind, VL0);
@@ -3633,8 +3633,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
        } else {
          Type *Src0SclTy = E->getMainOp()->getOperand(0)->getType();
          Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType();
-        VectorType *Src0Ty = VectorType::get(Src0SclTy, VL.size());
-        VectorType *Src1Ty = VectorType::get(Src1SclTy, VL.size());
+        auto *Src0Ty = FixedVectorType::get(Src0SclTy, VL.size());
+        auto *Src1Ty = FixedVectorType::get(Src1SclTy, VL.size());
          VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty,
                                          CostKind);
          VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty,
@@ -3807,7 +3807,7 @@ int BoUpSLP::getSpillCost() const {
      if (NumCalls) {
        SmallVector<Type*, 4> V;
        for (auto *II : LiveValues)
-        V.push_back(VectorType::get(II->getType(), BundleWidth));
+        V.push_back(FixedVectorType::get(II->getType(), BundleWidth));
        Cost += NumCalls * TTI->getCostOfKeepingLiveOverCall(V);
      }
  
@@ -4100,7 +4100,7 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
      else
        VL = UniqueValues;
    }
-  VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
+  auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
  
    Value *V = Gather(VL, VecTy);
    if (!ReuseShuffleIndicies.empty()) {
@@ -4135,7 +4135,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
    Type *ScalarTy = VL0->getType();
    if (StoreInst *SI = dyn_cast<StoreInst>(VL0))
      ScalarTy = SI->getValueOperand()->getType();
-  VectorType *VecTy = VectorType::get(ScalarTy, E->Scalars.size());
+  auto *VecTy = FixedVectorType::get(ScalarTy, E->Scalars.size());
  
    bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
  
@@ -4532,7 +4532,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
        }
  
        Module *M = F->getParent();
-      Type *Tys[] = { VectorType::get(CI->getType(), E->Scalars.size()) };
+      Type *Tys[] = {FixedVectorType::get(CI->getType(), E->Scalars.size())};
        Function *CF = Intrinsic::getDeclaration(M, ID, Tys);
  
        if (!UseIntrinsic) {
@@ -4660,7 +4660,7 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
        Builder.SetInsertPoint(&*++BasicBlock::iterator(I));
      auto BundleWidth = VectorizableTree[0]->Scalars.size();
      auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first);
-    auto *VecTy = VectorType::get(MinTy, BundleWidth);
+    auto *VecTy = FixedVectorType::get(MinTy, BundleWidth);
      auto *Trunc = Builder.CreateTrunc(VectorRoot, VecTy);
      VectorizableTree[0]->VectorizedValue = Trunc;
    }
@@ -5988,7 +5988,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
      // No actual vectorization should happen, if number of parts is the same as
      // provided vectorization factor (i.e. the scalar type is used for vector
      // code during codegen).
-    auto *VecTy = VectorType::get(VL[0]->getType(), VF);
+    auto *VecTy = FixedVectorType::get(VL[0]->getType(), VF);
      if (TTI->getNumberOfParts(VecTy) == VF)
        continue;
      for (unsigned I = NextInst; I < MaxInst; ++I) {
author	Christopher Tetreault <ctetreau@quicinc.com>
	Fri, 29 May 2020 17:06:26 +0000 (10:06 -0700)
committer	Christopher Tetreault <ctetreau@quicinc.com>
	Fri, 29 May 2020 18:31:24 +0000 (11:31 -0700)
llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp		patch \| blob \| history
llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp		patch \| blob \| history
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp		patch \| blob \| history