OSDN Git Service

[x86][icelake]BITALG
authorCoby Tayree <coby.tayree@intel.com>
Thu, 23 Nov 2017 11:15:50 +0000 (11:15 +0000)
committerCoby Tayree <coby.tayree@intel.com>
Thu, 23 Nov 2017 11:15:50 +0000 (11:15 +0000)
2/3
vpshufbitqmb encoding
3/3
vpshufbitqmb intrinsics
Differential Revision: https://reviews.llvm.org/D40222

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@318904 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/IR/IntrinsicsX86.td
lib/Target/X86/X86ISelLowering.cpp
lib/Target/X86/X86ISelLowering.h
lib/Target/X86/X86InstrAVX512.td
lib/Target/X86/X86InstrFragmentsSIMD.td
lib/Target/X86/X86IntrinsicsInfo.h
test/CodeGen/X86/vpshufbitqbm-intrinsics.ll [new file with mode: 0644]
test/MC/X86/avx512bitalg-encoding.s
test/MC/X86/avx512vl_bitalg-encoding.s

index aeed363..5903997 100644 (file)
@@ -1527,6 +1527,25 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
                   [IntrArgMemOnly]>;
 }
 
+// BITALG bits shuffle
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx512_mask_vpshufbitqmb_128 :
+    GCCBuiltin<"__builtin_ia32_vpshufbitqmb128_mask">,
+    Intrinsic<[llvm_i16_ty],
+              [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty],
+              [IntrNoMem]>;
+  def int_x86_avx512_mask_vpshufbitqmb_256 :
+    GCCBuiltin<"__builtin_ia32_vpshufbitqmb256_mask">,
+    Intrinsic<[llvm_i32_ty],
+              [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty],
+              [IntrNoMem]>;
+  def int_x86_avx512_mask_vpshufbitqmb_512 :
+    GCCBuiltin<"__builtin_ia32_vpshufbitqmb512_mask">,
+    Intrinsic<[llvm_i64_ty],
+              [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
+              [IntrNoMem]>;
+}
+
 //===----------------------------------------------------------------------===//
 // AVX2
 
index 0deda4c..658302b 100644 (file)
@@ -25267,6 +25267,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::VPDPBUSDS:          return "X86ISD::VPDPBUSDS";
   case X86ISD::VPDPWSSD:           return "X86ISD::VPDPWSSD";
   case X86ISD::VPDPWSSDS:          return "X86ISD::VPDPWSSDS";
+  case X86ISD::VPSHUFBITQMB:       return "X86ISD::VPSHUFBITQMB";
   }
   return nullptr;
 }
index 3b1cc2e..6576bce 100644 (file)
@@ -519,6 +519,9 @@ namespace llvm {
       COMPRESS,
       EXPAND,
 
+      // Bits shuffle
+      VPSHUFBITQMB,
+
       // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
       SINT_TO_FP_RND, UINT_TO_FP_RND,
       SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND,
index 6a87a26..626ad00 100644 (file)
@@ -10215,3 +10215,30 @@ defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop,
                                    avx512vl_i16_info, HasBITALG>,
                 avx512_unary_lowering<ctpop, avx512vl_i16_info, HasBITALG>, VEX_W;
 
+multiclass VPSHUFBITQMB_rm<X86VectorVTInfo VTI> {
+  defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
+                                (ins VTI.RC:$src1, VTI.RC:$src2),
+                                "vpshufbitqmb",
+                                "$src2, $src1", "$src1, $src2",
+                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
+                                 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD;
+  defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
+                                (ins VTI.RC:$src1, VTI.MemOp:$src2),
+                                "vpshufbitqmb",
+                                "$src2, $src1", "$src1, $src2",
+                                (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
+                                 (VTI.VT (bitconvert (VTI.LdFrag addr:$src2))))>,
+                                EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD;
+}
+
+multiclass VPSHUFBITQMB_common<AVX512VLVectorVTInfo VTI> {
+  let Predicates = [HasBITALG] in
+  defm Z      : VPSHUFBITQMB_rm<VTI.info512>, EVEX_V512;
+  let Predicates = [HasBITALG, HasVLX] in {
+    defm Z256 : VPSHUFBITQMB_rm<VTI.info256>, EVEX_V256;
+    defm Z128 : VPSHUFBITQMB_rm<VTI.info128>, EVEX_V128;
+  }
+}
+
+defm VPSHUFBITQMB : VPSHUFBITQMB_common<avx512vl_i8_info>;
+
index 2eb735a..cb7c430 100644 (file)
@@ -575,6 +575,13 @@ def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 1,
 def X86expand  : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 1,
                               [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>;
 
+// vpshufbitqmb
+def X86Vpshufbitqmb : SDNode<"X86ISD::VPSHUFBITQMB",
+                             SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+                                                  SDTCisSameAs<1,2>,
+                                                  SDTCVecEltisVT<0,i1>,
+                                                  SDTCisSameNumEltsAs<0,1>]>>;
+
 def SDTintToFPRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>,
                                           SDTCisSameAs<0,1>, SDTCisInt<2>,
                                           SDTCisVT<3, i32>]>;
index bc1a5ec..6f39568 100644 (file)
@@ -1292,6 +1292,13 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_vpshrdv_w_256, FMA_OP_MASK, X86ISD::VSHRDV, 0),
   X86_INTRINSIC_DATA(avx512_mask_vpshrdv_w_512, FMA_OP_MASK, X86ISD::VSHRDV, 0),
 
+  X86_INTRINSIC_DATA(avx512_mask_vpshufbitqmb_128, CMP_MASK,
+                     X86ISD::VPSHUFBITQMB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshufbitqmb_256, CMP_MASK,
+                     X86ISD::VPSHUFBITQMB, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshufbitqmb_512, CMP_MASK,
+                     X86ISD::VPSHUFBITQMB, 0),
+
   X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_128, FMA_OP_MASK3, ISD::FMA, 0),
   X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_256, FMA_OP_MASK3, ISD::FMA, 0),
   X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_512, FMA_OP_MASK3, ISD::FMA,
diff --git a/test/CodeGen/X86/vpshufbitqbm-intrinsics.ll b/test/CodeGen/X86/vpshufbitqbm-intrinsics.ll
new file mode 100644 (file)
index 0000000..fc96a16
--- /dev/null
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512bitalg,+avx512vl | FileCheck %s
+
+declare i16 @llvm.x86.avx512.mask.vpshufbitqmb.128(<16 x i8> %a, <16 x i8> %b, i16 %mask)
+define i16 @test_vpshufbitqmb_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
+; CHECK-LABEL: test_vpshufbitqmb_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpshufbitqmb %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovd %k0, %eax
+; CHECK-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
+; CHECK-NEXT:    retq
+  %res = call i16 @llvm.x86.avx512.mask.vpshufbitqmb.128(<16 x i8> %a, <16 x i8> %b, i16 %mask)
+  ret i16 %res
+}
+
+declare i32 @llvm.x86.avx512.mask.vpshufbitqmb.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
+define i32 @test_vpshufbitqmb_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
+; CHECK-LABEL: test_vpshufbitqmb_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpshufbitqmb %ymm1, %ymm0, %k0 {%k1}
+; CHECK-NEXT:    kmovd %k0, %eax
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %res = call i32 @llvm.x86.avx512.mask.vpshufbitqmb.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
+  ret i32 %res
+}
+
+declare i64 @llvm.x86.avx512.mask.vpshufbitqmb.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
+define i64 @test_vpshufbitqmb_512(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
+; CHECK-LABEL: test_vpshufbitqmb_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovq %rdi, %k1
+; CHECK-NEXT:    vpshufbitqmb %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovq %k0, %rax
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %res = call i64 @llvm.x86.avx512.mask.vpshufbitqmb.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
+  ret i64 %res
+}
index bd128bd..e926ab0 100644 (file)
 // CHECK: encoding: [0x62,0xa2,0xfd,0x4a,0x54,0xac,0xf1,0x02,0x00,0x00,0xe0]
           vpopcntw  -536870910(%rcx,%r14,8), %zmm21 {%k2}
 
+// CHECK: vpshufbitqmb %zmm2, %zmm23, %k1
+// CHECK: encoding: [0x62,0xf2,0x45,0x40,0x8f,0xca]
+          vpshufbitqmb %zmm2, %zmm23, %k1
+
+// CHECK: vpshufbitqmb %zmm2, %zmm23, %k1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x45,0x42,0x8f,0xca]
+          vpshufbitqmb %zmm2, %zmm23, %k1 {%k2}
+
+// CHECK: vpshufbitqmb  (%rcx), %zmm23, %k1
+// CHECK: encoding: [0x62,0xf2,0x45,0x40,0x8f,0x09]
+          vpshufbitqmb  (%rcx), %zmm23, %k1
+
+// CHECK: vpshufbitqmb  -256(%rsp), %zmm23, %k1
+// CHECK: encoding: [0x62,0xf2,0x45,0x40,0x8f,0x4c,0x24,0xfc]
+          vpshufbitqmb  -256(%rsp), %zmm23, %k1
+
+// CHECK: vpshufbitqmb  256(%rsp), %zmm23, %k1
+// CHECK: encoding: [0x62,0xf2,0x45,0x40,0x8f,0x4c,0x24,0x04]
+          vpshufbitqmb  256(%rsp), %zmm23, %k1
+
+// CHECK: vpshufbitqmb  268435456(%rcx,%r14,8), %zmm23, %k1
+// CHECK: encoding: [0x62,0xb2,0x45,0x40,0x8f,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshufbitqmb  268435456(%rcx,%r14,8), %zmm23, %k1
+
+// CHECK: vpshufbitqmb  -536870912(%rcx,%r14,8), %zmm23, %k1
+// CHECK: encoding: [0x62,0xb2,0x45,0x40,0x8f,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshufbitqmb  -536870912(%rcx,%r14,8), %zmm23, %k1
+
+// CHECK: vpshufbitqmb  -536870910(%rcx,%r14,8), %zmm23, %k1
+// CHECK: encoding: [0x62,0xb2,0x45,0x40,0x8f,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshufbitqmb  -536870910(%rcx,%r14,8), %zmm23, %k1
+
+// CHECK: vpshufbitqmb  (%rcx), %zmm23, %k1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x45,0x42,0x8f,0x09]
+          vpshufbitqmb  (%rcx), %zmm23, %k1 {%k2}
+
+// CHECK: vpshufbitqmb  -256(%rsp), %zmm23, %k1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x45,0x42,0x8f,0x4c,0x24,0xfc]
+          vpshufbitqmb  -256(%rsp), %zmm23, %k1 {%k2}
+
+// CHECK: vpshufbitqmb  256(%rsp), %zmm23, %k1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x45,0x42,0x8f,0x4c,0x24,0x04]
+          vpshufbitqmb  256(%rsp), %zmm23, %k1 {%k2}
+
+// CHECK: vpshufbitqmb  268435456(%rcx,%r14,8), %zmm23, %k1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x45,0x42,0x8f,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshufbitqmb  268435456(%rcx,%r14,8), %zmm23, %k1 {%k2}
+
+// CHECK: vpshufbitqmb  -536870912(%rcx,%r14,8), %zmm23, %k1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x45,0x42,0x8f,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshufbitqmb  -536870912(%rcx,%r14,8), %zmm23, %k1 {%k2}
+
+// CHECK: vpshufbitqmb  -536870910(%rcx,%r14,8), %zmm23, %k1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x45,0x42,0x8f,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshufbitqmb  -536870910(%rcx,%r14,8), %zmm23, %k1 {%k2}
+
index 5389866..f6f4e7f 100644 (file)
 // CHECK: encoding: [0x62,0xa2,0xfd,0x2a,0x54,0xac,0xf1,0x02,0x00,0x00,0xe0]
           vpopcntw  -536870910(%rcx,%r14,8), %ymm21 {%k2}
 
+// CHECK: vpshufbitqmb %xmm2, %xmm23, %k1
+// CHECK: encoding: [0x62,0xf2,0x45,0x00,0x8f,0xca]
+          vpshufbitqmb %xmm2, %xmm23, %k1
+
+// CHECK: vpshufbitqmb %xmm2, %xmm23, %k1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x45,0x02,0x8f,0xca]
+          vpshufbitqmb %xmm2, %xmm23, %k1 {%k2}
+
+// CHECK: vpshufbitqmb  (%rcx), %xmm23, %k1
+// CHECK: encoding: [0x62,0xf2,0x45,0x00,0x8f,0x09]
+          vpshufbitqmb  (%rcx), %xmm23, %k1
+
+// CHECK: vpshufbitqmb  -64(%rsp), %xmm23, %k1
+// CHECK: encoding: [0x62,0xf2,0x45,0x00,0x8f,0x4c,0x24,0xfc]
+          vpshufbitqmb  -64(%rsp), %xmm23, %k1
+
+// CHECK: vpshufbitqmb  64(%rsp), %xmm23, %k1
+// CHECK: encoding: [0x62,0xf2,0x45,0x00,0x8f,0x4c,0x24,0x04]
+          vpshufbitqmb  64(%rsp), %xmm23, %k1
+
+// CHECK: vpshufbitqmb  268435456(%rcx,%r14,8), %xmm23, %k1
+// CHECK: encoding: [0x62,0xb2,0x45,0x00,0x8f,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshufbitqmb  268435456(%rcx,%r14,8), %xmm23, %k1
+
+// CHECK: vpshufbitqmb  -536870912(%rcx,%r14,8), %xmm23, %k1
+// CHECK: encoding: [0x62,0xb2,0x45,0x00,0x8f,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshufbitqmb  -536870912(%rcx,%r14,8), %xmm23, %k1
+
+// CHECK: vpshufbitqmb  -536870910(%rcx,%r14,8), %xmm23, %k1
+// CHECK: encoding: [0x62,0xb2,0x45,0x00,0x8f,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshufbitqmb  -536870910(%rcx,%r14,8), %xmm23, %k1
+
+// CHECK: vpshufbitqmb  (%rcx), %xmm23, %k1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x45,0x02,0x8f,0x09]
+          vpshufbitqmb  (%rcx), %xmm23, %k1 {%k2}
+
+// CHECK: vpshufbitqmb  -64(%rsp), %xmm23, %k1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x45,0x02,0x8f,0x4c,0x24,0xfc]
+          vpshufbitqmb  -64(%rsp), %xmm23, %k1 {%k2}
+
+// CHECK: vpshufbitqmb  64(%rsp), %xmm23, %k1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x45,0x02,0x8f,0x4c,0x24,0x04]
+          vpshufbitqmb  64(%rsp), %xmm23, %k1 {%k2}
+
+// CHECK: vpshufbitqmb  268435456(%rcx,%r14,8), %xmm23, %k1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x45,0x02,0x8f,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshufbitqmb  268435456(%rcx,%r14,8), %xmm23, %k1 {%k2}
+
+// CHECK: vpshufbitqmb  -536870912(%rcx,%r14,8), %xmm23, %k1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x45,0x02,0x8f,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshufbitqmb  -536870912(%rcx,%r14,8), %xmm23, %k1 {%k2}
+
+// CHECK: vpshufbitqmb  -536870910(%rcx,%r14,8), %xmm23, %k1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x45,0x02,0x8f,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshufbitqmb  -536870910(%rcx,%r14,8), %xmm23, %k1 {%k2}
+
+// CHECK: vpshufbitqmb %ymm2, %ymm23, %k1
+// CHECK: encoding: [0x62,0xf2,0x45,0x20,0x8f,0xca]
+          vpshufbitqmb %ymm2, %ymm23, %k1
+
+// CHECK: vpshufbitqmb %ymm2, %ymm23, %k1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x45,0x22,0x8f,0xca]
+          vpshufbitqmb %ymm2, %ymm23, %k1 {%k2}
+
+// CHECK: vpshufbitqmb  (%rcx), %ymm23, %k1
+// CHECK: encoding: [0x62,0xf2,0x45,0x20,0x8f,0x09]
+          vpshufbitqmb  (%rcx), %ymm23, %k1
+
+// CHECK: vpshufbitqmb  -128(%rsp), %ymm23, %k1
+// CHECK: encoding: [0x62,0xf2,0x45,0x20,0x8f,0x4c,0x24,0xfc]
+          vpshufbitqmb  -128(%rsp), %ymm23, %k1
+
+// CHECK: vpshufbitqmb  128(%rsp), %ymm23, %k1
+// CHECK: encoding: [0x62,0xf2,0x45,0x20,0x8f,0x4c,0x24,0x04]
+          vpshufbitqmb  128(%rsp), %ymm23, %k1
+
+// CHECK: vpshufbitqmb  268435456(%rcx,%r14,8), %ymm23, %k1
+// CHECK: encoding: [0x62,0xb2,0x45,0x20,0x8f,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshufbitqmb  268435456(%rcx,%r14,8), %ymm23, %k1
+
+// CHECK: vpshufbitqmb  -536870912(%rcx,%r14,8), %ymm23, %k1
+// CHECK: encoding: [0x62,0xb2,0x45,0x20,0x8f,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshufbitqmb  -536870912(%rcx,%r14,8), %ymm23, %k1
+
+// CHECK: vpshufbitqmb  -536870910(%rcx,%r14,8), %ymm23, %k1
+// CHECK: encoding: [0x62,0xb2,0x45,0x20,0x8f,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshufbitqmb  -536870910(%rcx,%r14,8), %ymm23, %k1
+
+// CHECK: vpshufbitqmb  (%rcx), %ymm23, %k1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x45,0x22,0x8f,0x09]
+          vpshufbitqmb  (%rcx), %ymm23, %k1 {%k2}
+
+// CHECK: vpshufbitqmb  -128(%rsp), %ymm23, %k1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x45,0x22,0x8f,0x4c,0x24,0xfc]
+          vpshufbitqmb  -128(%rsp), %ymm23, %k1 {%k2}
+
+// CHECK: vpshufbitqmb  128(%rsp), %ymm23, %k1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x45,0x22,0x8f,0x4c,0x24,0x04]
+          vpshufbitqmb  128(%rsp), %ymm23, %k1 {%k2}
+
+// CHECK: vpshufbitqmb  268435456(%rcx,%r14,8), %ymm23, %k1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x45,0x22,0x8f,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshufbitqmb  268435456(%rcx,%r14,8), %ymm23, %k1 {%k2}
+
+// CHECK: vpshufbitqmb  -536870912(%rcx,%r14,8), %ymm23, %k1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x45,0x22,0x8f,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshufbitqmb  -536870912(%rcx,%r14,8), %ymm23, %k1 {%k2}
+
+// CHECK: vpshufbitqmb  -536870910(%rcx,%r14,8), %ymm23, %k1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x45,0x22,0x8f,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshufbitqmb  -536870910(%rcx,%r14,8), %ymm23, %k1 {%k2}
+