[X86] Give unary PERMI priority over SHUF128 in lowerV8I64VectorShuffle to make it...

author Craig Topper <craig.topper@intel.com>

Fri, 3 Nov 2017 22:48:13 +0000 (22:48 +0000)

committer Craig Topper <craig.topper@intel.com>

Fri, 3 Nov 2017 22:48:13 +0000 (22:48 +0000)
author Craig Topper <craig.topper@intel.com>
Fri, 3 Nov 2017 22:48:13 +0000 (22:48 +0000)
committer Craig Topper <craig.topper@intel.com>
Fri, 3 Nov 2017 22:48:13 +0000 (22:48 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index d65a65e..ea97dc2 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -13709,10 +13709,6 @@ static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
    assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
    assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
  
-  if (SDValue Shuf128 =
-          lowerV4X128VectorShuffle(DL, MVT::v8i64, Mask, V1, V2, DAG))
-    return Shuf128;
-
    if (V2.isUndef()) {
      // When the shuffle is mirrored between the 128-bit lanes of the unit, we
      // can use lower latency instructions that will operate on all four
@@ -13734,6 +13730,10 @@ static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
                           getV4X86ShuffleImm8ForMask(Repeated256Mask, DL, DAG));
    }
  
+  if (SDValue Shuf128 =
+          lowerV4X128VectorShuffle(DL, MVT::v8i64, Mask, V1, V2, DAG))
+    return Shuf128;
+
    // Try to use shift instructions.
    if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i64, V1, V2, Mask,
                                                  Zeroable, Subtarget, DAG))
diff --git a/test/CodeGen/X86/vector-shuffle-512-v8.ll b/test/CodeGen/X86/vector-shuffle-512-v8.ll

index 6c98055..1d17ef1 100644 (file)
--- a/test/CodeGen/X86/vector-shuffle-512-v8.ll
+++ b/test/CodeGen/X86/vector-shuffle-512-v8.ll
@@ -1165,14 +1165,31 @@ define <8 x i64> @shuffle_v8i64_70000000(<8 x i64> %a, <8 x i64> %b) {
  define <8 x i64> @shuffle_v8i64_01014545(<8 x i64> %a, <8 x i64> %b) {
  ; AVX512F-LABEL: shuffle_v8i64_01014545:
  ; AVX512F:       # BB#0:
-; AVX512F-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
+; AVX512F-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
  ; AVX512F-NEXT:    retq
  ;
  ; AVX512F-32-LABEL: shuffle_v8i64_01014545:
  ; AVX512F-32:       # BB#0:
-; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
+; AVX512F-32-NEXT:    vpermpd {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
+; AVX512F-32-NEXT:    retl
+
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_01014545_mem(<8 x i64>* %ptr, <8 x i64> %b) {
+; AVX512F-LABEL: shuffle_v8i64_01014545_mem:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vpermpd {{.*#+}} zmm0 = mem[0,1,0,1,4,5,4,5]
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_01014545_mem:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    vpermpd {{.*#+}} zmm0 = mem[0,1,0,1,4,5,4,5]
  ; AVX512F-32-NEXT:    retl
  
+  %a = load <8 x i64>, <8 x i64>* %ptr
    %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
    ret <8 x i64> %shuffle
  }
author	Craig Topper <craig.topper@intel.com>
	Fri, 3 Nov 2017 22:48:13 +0000 (22:48 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Fri, 3 Nov 2017 22:48:13 +0000 (22:48 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/vector-shuffle-512-v8.ll		patch \| blob \| history