[SLP] Fix vectorization for tree with trunc to minimum required bit width.

author Alexey Bataev <a.bataev@hotmail.com>

Fri, 19 Jan 2018 14:40:13 +0000 (14:40 +0000)

committer Alexey Bataev <a.bataev@hotmail.com>

Fri, 19 Jan 2018 14:40:13 +0000 (14:40 +0000)
author Alexey Bataev <a.bataev@hotmail.com>
Fri, 19 Jan 2018 14:40:13 +0000 (14:40 +0000)
committer Alexey Bataev <a.bataev@hotmail.com>
Fri, 19 Jan 2018 14:40:13 +0000 (14:40 +0000)
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp

index 90f84e0..f748ba4 100644 (file)
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2065,7 +2065,10 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
                                                           VL0->getType(), SrcTy, VL0);
  
        VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size());
-      int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy, VL0);
+      int VecCost = 0;
+      // Check if the values are candidates to demote.
+      if (!MinBWs.count(VL0) || VecTy != SrcVecTy)
+        VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy, VL0);
        return VecCost - ScalarCost;
      }
      case Instruction::FCmp:
@@ -4014,9 +4017,24 @@ void BoUpSLP::computeMinimumValueSizes() {
    // additional roots that require investigating in Roots.
    SmallVector<Value *, 32> ToDemote;
    SmallVector<Value *, 4> Roots;
-  for (auto *Root : TreeRoot)
+  for (auto *Root : TreeRoot) {
+    // Do not include top zext/sext/trunc operations to those to be demoted, it
+    // produces noise cast<vect>, trunc <vect>, exctract <vect>, cast <extract>
+    // sequence.
+    if (isa<Constant>(Root))
+      continue;
+    auto *I = dyn_cast<Instruction>(Root);
+    if (!I || !I->hasOneUse() || !Expr.count(I))
+      return;
+    if (isa<ZExtInst>(I) || isa<SExtInst>(I))
+      continue;
+    if (auto *TI = dyn_cast<TruncInst>(I)) {
+      Roots.push_back(TI->getOperand(0));
+      continue;
+    }
      if (!collectValuesToDemote(Root, Expr, ToDemote, Roots))
        return;
+  }
  
    // The maximum bit width required to represent all the values that can be
    // demoted without loss of precision. It would be safe to truncate the roots
diff --git a/test/Transforms/SLPVectorizer/X86/PR35777.ll b/test/Transforms/SLPVectorizer/X86/PR35777.ll

index f3983d7..adfe77f 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/PR35777.ll
+++ b/test/Transforms/SLPVectorizer/X86/PR35777.ll
@@ -16,13 +16,10 @@ define { i64, i64 } @patatino(double %arg) {
  ; CHECK-NEXT:    [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], [[TMP5]]
  ; CHECK-NEXT:    [[TMP8:%.*]] = fptosi <2 x double> [[TMP7]] to <2 x i32>
  ; CHECK-NEXT:    [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64>
-; CHECK-NEXT:    [[TMP10:%.*]] = trunc <2 x i64> [[TMP9]] to <2 x i32>
-; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i32> [[TMP10]], i32 0
-; CHECK-NEXT:    [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
-; CHECK-NEXT:    [[TMP16:%.*]] = insertvalue { i64, i64 } undef, i64 [[TMP12]], 0
-; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x i32> [[TMP10]], i32 1
-; CHECK-NEXT:    [[TMP14:%.*]] = sext i32 [[TMP13]] to i64
-; CHECK-NEXT:    [[TMP17:%.*]] = insertvalue { i64, i64 } [[TMP16]], i64 [[TMP14]], 1
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0
+; CHECK-NEXT:    [[TMP16:%.*]] = insertvalue { i64, i64 } undef, i64 [[TMP10]], 0
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1
+; CHECK-NEXT:    [[TMP17:%.*]] = insertvalue { i64, i64 } [[TMP16]], i64 [[TMP11]], 1
  ; CHECK-NEXT:    ret { i64, i64 } [[TMP17]]
  ;
  bb:
diff --git a/test/Transforms/SLPVectorizer/X86/sign-extend.ll b/test/Transforms/SLPVectorizer/X86/sign-extend.ll

index 924422d..c9971b6 100644 (file)
--- a/test/Transforms/SLPVectorizer/X86/sign-extend.ll
+++ b/test/Transforms/SLPVectorizer/X86/sign-extend.ll
@@ -4,18 +4,15 @@
  define <4 x i32> @sign_extend_v_v(<4 x i16> %lhs) {
  ; CHECK-LABEL: @sign_extend_v_v(
  ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x i16> [[LHS:%.*]], i32 0
-; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[VECEXT]] to i32
-; CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x i32> undef, i32 [[CONV]], i32 0
-; CHECK-NEXT:    [[VECEXT1:%.*]] = extractelement <4 x i16> [[LHS]], i32 1
-; CHECK-NEXT:    [[CONV2:%.*]] = sext i16 [[VECEXT1]] to i32
-; CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[CONV2]], i32 1
-; CHECK-NEXT:    [[VECEXT4:%.*]] = extractelement <4 x i16> [[LHS]], i32 2
-; CHECK-NEXT:    [[CONV5:%.*]] = sext i16 [[VECEXT4]] to i32
-; CHECK-NEXT:    [[VECINIT6:%.*]] = insertelement <4 x i32> [[VECINIT3]], i32 [[CONV5]], i32 2
-; CHECK-NEXT:    [[VECEXT7:%.*]] = extractelement <4 x i16> [[LHS]], i32 3
-; CHECK-NEXT:    [[CONV8:%.*]] = sext i16 [[VECEXT7]] to i32
-; CHECK-NEXT:    [[VECINIT9:%.*]] = insertelement <4 x i32> [[VECINIT6]], i32 [[CONV8]], i32 3
+; CHECK-NEXT:    [[TMP0:%.*]] = sext <4 x i16> [[LHS:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> [[TMP0]], i32 0
+; CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1
+; CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[TMP2]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[TMP0]], i32 2
+; CHECK-NEXT:    [[VECINIT6:%.*]] = insertelement <4 x i32> [[VECINIT3]], i32 [[TMP3]], i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
+; CHECK-NEXT:    [[VECINIT9:%.*]] = insertelement <4 x i32> [[VECINIT6]], i32 [[TMP4]], i32 3
  ; CHECK-NEXT:    ret <4 x i32> [[VECINIT9]]
  ;
  entry:
author	Alexey Bataev <a.bataev@hotmail.com>
	Fri, 19 Jan 2018 14:40:13 +0000 (14:40 +0000)
committer	Alexey Bataev <a.bataev@hotmail.com>
	Fri, 19 Jan 2018 14:40:13 +0000 (14:40 +0000)
lib/Transforms/Vectorize/SLPVectorizer.cpp		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/PR35777.ll		patch \| blob \| history
test/Transforms/SLPVectorizer/X86/sign-extend.ll		patch \| blob \| history