From 50520f329ad3e2e150a67e82ac8725b167daeee3 Mon Sep 17 00:00:00 2001 From: Michael Zuckerman Date: Sun, 15 Jan 2017 16:43:14 +0000 Subject: [PATCH] Fix blend mask by switch the side of the operand since Blend node uses opposite mask then Select NODE. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@292066 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 4 +- test/CodeGen/X86/avx512-mask-op.ll | 4 +- test/CodeGen/X86/merge-consecutive-loads-512.ll | 48 ++++++++++++---------- test/CodeGen/X86/sse3-avx-addsub.ll | 4 +- test/CodeGen/X86/vector-shuffle-128-v16.ll | 8 ++-- test/CodeGen/X86/vector-shuffle-256-v16.ll | 8 ++-- test/CodeGen/X86/vector-shuffle-256-v32.ll | 54 ++++++++++++------------- test/CodeGen/X86/vector-shuffle-512-v16.ll | 2 +- test/CodeGen/X86/vector-shuffle-512-v32.ll | 7 ++-- test/CodeGen/X86/vector-shuffle-512-v8.ll | 11 +++-- test/CodeGen/X86/vector-shuffle-avx512.ll | 48 +++++++++++----------- 11 files changed, 100 insertions(+), 98 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 989039f3ee4..31e10dcc2a1 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -8436,7 +8436,7 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, MVT IntegerType = MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8)); SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType); - return getVectorMaskingNode(V1, MaskNode, V2, Subtarget, DAG); + return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG); } // Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB. @@ -8485,7 +8485,7 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, MVT IntegerType = MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8)); SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType); - return getVectorMaskingNode(V1, MaskNode, V2, Subtarget, DAG); + return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG); } default: llvm_unreachable("Not a supported integer vector type!"); diff --git a/test/CodeGen/X86/avx512-mask-op.ll b/test/CodeGen/X86/avx512-mask-op.ll index a2afe843ec8..89bd1980e52 100644 --- a/test/CodeGen/X86/avx512-mask-op.ll +++ b/test/CodeGen/X86/avx512-mask-op.ll @@ -542,7 +542,7 @@ define <64 x i8> @test16(i64 %x) { ; SKX-NEXT: vpmovm2b %k0, %zmm1 ; SKX-NEXT: movl $32, %eax ; SKX-NEXT: kmovd %eax, %k1 -; SKX-NEXT: vmovdqu8 %ymm1, %ymm0 {%k1} +; SKX-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 ; SKX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; SKX-NEXT: vpmovb2m %zmm0, %k0 @@ -610,7 +610,7 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) { ; SKX-NEXT: vpmovm2b %k0, %zmm1 ; SKX-NEXT: movl $32, %eax ; SKX-NEXT: kmovd %eax, %k1 -; SKX-NEXT: vmovdqu8 %ymm1, %ymm0 {%k1} +; SKX-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 ; SKX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; SKX-NEXT: vpmovb2m %zmm0, %k0 diff --git a/test/CodeGen/X86/merge-consecutive-loads-512.ll b/test/CodeGen/X86/merge-consecutive-loads-512.ll index eced69fcb85..0111e341c45 100644 --- a/test/CodeGen/X86/merge-consecutive-loads-512.ll +++ b/test/CodeGen/X86/merge-consecutive-loads-512.ll @@ -138,17 +138,19 @@ define <8 x double> @merge_8f64_f64_12zzuuzz(double* %ptr) nounwind uwtable noin define <8 x double> @merge_8f64_f64_1u3u5zu8(double* %ptr) nounwind uwtable noinline ssp { ; ALL-LABEL: merge_8f64_f64_1u3u5zu8: ; ALL: # BB#0: -; ALL-NEXT: movb $32, %al -; ALL-NEXT: kmovw %eax, %k1 -; ALL-NEXT: vmovupd 8(%rdi), %zmm0 {%k1} {z} +; ALL-NEXT: movb $32, %al +; ALL-NEXT: kmovw %eax, %k0 +; ALL-NEXT: knotw %k0, %k1 +; ALL-NEXT: vmovupd 8(%rdi), %zmm0 {%k1} {z} ; ALL-NEXT: retq ; ; X32-AVX512F-LABEL: merge_8f64_f64_1u3u5zu8: ; X32-AVX512F: # BB#0: -; X32-AVX512F-NEXT: movl 4(%esp), %eax -; X32-AVX512F-NEXT: movb $32, %cl -; X32-AVX512F-NEXT: kmovw %ecx, %k1 -; X32-AVX512F-NEXT: vmovupd 8(%eax), %zmm0 {%k1} {z} +; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-AVX512F-NEXT: movb $32, %cl +; X32-AVX512F-NEXT: kmovw %ecx, %k0 +; X32-AVX512F-NEXT: knotw %k0, %k1 +; X32-AVX512F-NEXT: vmovupd 8(%eax), %zmm0 {%k1} {z} ; X32-AVX512F-NEXT: retl %ptr0 = getelementptr inbounds double, double* %ptr, i64 1 %ptr2 = getelementptr inbounds double, double* %ptr, i64 3 @@ -223,17 +225,19 @@ define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline s define <8 x i64> @merge_8i64_i64_1u3u5zu8(i64* %ptr) nounwind uwtable noinline ssp { ; ALL-LABEL: merge_8i64_i64_1u3u5zu8: ; ALL: # BB#0: -; ALL-NEXT: movb $32, %al -; ALL-NEXT: kmovw %eax, %k1 -; ALL-NEXT: vmovdqu64 8(%rdi), %zmm0 {%k1} {z} +; ALL-NEXT: movb $32, %al +; ALL-NEXT: kmovw %eax, %k0 +; ALL-NEXT: knotw %k0, %k1 +; ALL-NEXT: vmovdqu64 8(%rdi), %zmm0 {%k1} {z} ; ALL-NEXT: retq ; ; X32-AVX512F-LABEL: merge_8i64_i64_1u3u5zu8: ; X32-AVX512F: # BB#0: -; X32-AVX512F-NEXT: movl 4(%esp), %eax -; X32-AVX512F-NEXT: movb $32, %cl -; X32-AVX512F-NEXT: kmovw %ecx, %k1 -; X32-AVX512F-NEXT: vmovdqu64 8(%eax), %zmm0 {%k1} {z} +; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-AVX512F-NEXT: movb $32, %cl +; X32-AVX512F-NEXT: kmovw %ecx, %k0 +; X32-AVX512F-NEXT: knotw %k0, %k1 +; X32-AVX512F-NEXT: vmovdqu64 8(%eax), %zmm0 {%k1} {z} ; X32-AVX512F-NEXT: retl %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 1 %ptr2 = getelementptr inbounds i64, i64* %ptr, i64 3 @@ -444,17 +448,19 @@ define <16 x i32> @merge_16i32_i32_0uu3uuuuuuuuCuEF(i32* %ptr) nounwind uwtable define <16 x i32> @merge_16i32_i32_0uu3zzuuuuuzCuEF(i32* %ptr) nounwind uwtable noinline ssp { ; ALL-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF: ; ALL: # BB#0: -; ALL-NEXT: movw $8240, %ax # imm = 0x2030 -; ALL-NEXT: kmovw %eax, %k1 -; ALL-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} +; ALL-NEXT: movw $8240, %ax # imm = 0x2030 +; ALL-NEXT: kmovw %eax, %k0 +; ALL-NEXT: knotw %k0, %k1 +; ALL-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} ; ALL-NEXT: retq ; ; X32-AVX512F-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF: ; X32-AVX512F: # BB#0: -; X32-AVX512F-NEXT: movl 4(%esp), %eax -; X32-AVX512F-NEXT: movw $8240, %cx # imm = 0x2030 -; X32-AVX512F-NEXT: kmovw %ecx, %k1 -; X32-AVX512F-NEXT: vmovdqu32 (%eax), %zmm0 {%k1} {z} +; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-AVX512F-NEXT: movw $8240, %cx # imm = 0x2030 +; X32-AVX512F-NEXT: kmovw %ecx, %k0 +; X32-AVX512F-NEXT: knotw %k0, %k1 +; X32-AVX512F-NEXT: vmovdqu32 (%eax), %zmm0 {%k1} {z} ; X32-AVX512F-NEXT: retl %ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0 %ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3 diff --git a/test/CodeGen/X86/sse3-avx-addsub.ll b/test/CodeGen/X86/sse3-avx-addsub.ll index e7fab2e8ec5..0e0cf485256 100644 --- a/test/CodeGen/X86/sse3-avx-addsub.ll +++ b/test/CodeGen/X86/sse3-avx-addsub.ll @@ -119,10 +119,10 @@ define <16 x float> @test5(<16 x float> %A, <16 x float> %B) { ; ; AVX512-LABEL: test5: ; AVX512: # BB#0: -; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm2 +; AVX512-NEXT: vsubps %zmm1, %zmm0, %zmm2 ; AVX512-NEXT: movw $-21846, %ax # imm = 0xAAAA ; AVX512-NEXT: kmovw %eax, %k1 -; AVX512-NEXT: vsubps %zmm1, %zmm0, %zmm2 {%k1} +; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm2 {%k1} ; AVX512-NEXT: vmovaps %zmm2, %zmm0 ; AVX512-NEXT: retq %add = fadd <16 x float> %A, %B diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll index b5290414b98..3e19da2cd07 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -425,7 +425,7 @@ define <16 x i8> @shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31( ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movw $-21846, %ax # imm = 0xAAAA ; AVX512VL-NEXT: kmovw %eax, %k1 -; AVX512VL-NEXT: vpblendmb %xmm0, %xmm1, %xmm0 {%k1} +; AVX512VL-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -465,7 +465,7 @@ define <16 x i8> @shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31( ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movw $-30584, %ax # imm = 0x8888 ; AVX512VL-NEXT: kmovw %eax, %k1 -; AVX512VL-NEXT: vpblendmb %xmm0, %xmm1, %xmm0 {%k1} +; AVX512VL-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -524,7 +524,7 @@ define <16 x i8> @shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31( ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movw $-28528, %ax # imm = 0x9090 ; AVX512VL-NEXT: kmovw %eax, %k1 -; AVX512VL-NEXT: vpblendmb %xmm0, %xmm1, %xmm0 {%k1} +; AVX512VL-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -565,7 +565,7 @@ define <16 x i8> @shuffle_v16i8_16_17_18_19_04_05_06_07_24_25_10_11_28_13_30_15( ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movw $-21264, %ax # imm = 0xACF0 ; AVX512VL-NEXT: kmovw %eax, %k1 -; AVX512VL-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} +; AVX512VL-NEXT: vpblendmb %xmm0, %xmm1, %xmm0 {%k1} ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle diff --git a/test/CodeGen/X86/vector-shuffle-256-v16.ll b/test/CodeGen/X86/vector-shuffle-256-v16.ll index a1cf97d787a..682a34d3cdb 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v16.ll @@ -719,7 +719,7 @@ define <16 x i16> @shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_3 ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movw $-32768, %ax # imm = 0x8000 ; AVX512VL-NEXT: kmovw %eax, %k1 -; AVX512VL-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1} +; AVX512VL-NEXT: vmovdqu16 %ymm1, %ymm0 {%k1} ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -744,7 +744,7 @@ define <16 x i16> @shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_1 ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movw $1, %ax ; AVX512VL-NEXT: kmovw %eax, %k1 -; AVX512VL-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1} +; AVX512VL-NEXT: vmovdqu16 %ymm1, %ymm0 {%k1} ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -769,7 +769,7 @@ define <16 x i16> @shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_1 ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movw $21930, %ax # imm = 0x55AA ; AVX512VL-NEXT: kmovw %eax, %k1 -; AVX512VL-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1} +; AVX512VL-NEXT: vmovdqu16 %ymm1, %ymm0 {%k1} ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle @@ -794,7 +794,7 @@ define <16 x i16> @shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_3 ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movw $-21931, %ax # imm = 0xAA55 ; AVX512VL-NEXT: kmovw %eax, %k1 -; AVX512VL-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1} +; AVX512VL-NEXT: vmovdqu16 %ymm1, %ymm0 {%k1} ; AVX512VL-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %shuffle diff --git a/test/CodeGen/X86/vector-shuffle-256-v32.ll b/test/CodeGen/X86/vector-shuffle-256-v32.ll index 390ab16699d..8b0e25ce43f 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -317,11 +317,13 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_ ; AVX512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX512VL-NEXT: vpbroadcastb %xmm0, %xmm0 ; AVX512VL-NEXT: vpxor %ymm2, %ymm2, %ymm2 +; AVX512VL-NEXT: vpshufb %ymm2, %ymm1, %ymm1 +; AVX512VL-NEXT: vpbroadcastb %xmm0, %xmm0 ; AVX512VL-NEXT: movl $32767, %eax # imm = 0x7FFF ; AVX512VL-NEXT: kmovd %eax, %k1 -; AVX512VL-NEXT: vpshufb %ymm2, %ymm1, %ymm0 {%k1} +; AVX512VL-NEXT: vmovdqu8 %ymm0, %ymm1 {%k1} +; AVX512VL-NEXT: vmovdqa %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -351,8 +353,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_ ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] ; AVX512VL-NEXT: movl $1, %eax ; AVX512VL-NEXT: kmovd %eax, %k1 -; AVX512VL-NEXT: vmovdqu8 %ymm1, %ymm0 {%k1} -; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VL-NEXT: vmovdqu8 %ymm0, %ymm1 {%k1} +; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VL-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -382,8 +384,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_ ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] ; AVX512VL-NEXT: movw $1, %ax ; AVX512VL-NEXT: kmovw %eax, %k1 -; AVX512VL-NEXT: vmovdqu16 %ymm1, %ymm0 {%k1} -; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VL-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1} +; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VL-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -413,8 +415,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_ ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] ; AVX512VL-NEXT: movw $1, %ax ; AVX512VL-NEXT: kmovw %eax, %k1 -; AVX512VL-NEXT: vmovdqu16 %ymm1, %ymm0 {%k1} -; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VL-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1} +; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VL-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -1036,7 +1038,7 @@ define <32 x i8> @shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_ ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA ; AVX512VL-NEXT: kmovd %eax, %k1 -; AVX512VL-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} +; AVX512VL-NEXT: vmovdqu8 %ymm1, %ymm0 {%k1} ; AVX512VL-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -1061,7 +1063,7 @@ define <32 x i8> @shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_ ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA ; AVX512VL-NEXT: kmovd %eax, %k1 -; AVX512VL-NEXT: vmovdqu8 %ymm1, %ymm0 {%k1} +; AVX512VL-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} ; AVX512VL-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -1076,8 +1078,7 @@ define <32 x i8> @shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_ ; AVX512VL-LABEL: shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA -; AVX512VL-NEXT: kmovd %eax, %k0 -; AVX512VL-NEXT: knotd %k0, %k1 +; AVX512VL-NEXT: kmovd %eax, %k1 ; AVX512VL-NEXT: vmovdqu8 %ymm0, %ymm0 {%k1} {z} ; AVX512VL-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> @@ -1144,14 +1145,12 @@ define <32 x i8> @shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_ ; ; AVX512VL-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpxor %ymm2, %ymm2, %ymm2 -; AVX512VL-NEXT: vpshufb %ymm2, %ymm1, %ymm1 ; AVX512VL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15] ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5] +; AVX512VL-NEXT: vpxor %ymm2, %ymm2, %ymm2 ; AVX512VL-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA ; AVX512VL-NEXT: kmovd %eax, %k1 -; AVX512VL-NEXT: vmovdqu8 %ymm0, %ymm1 {%k1} -; AVX512VL-NEXT: vmovdqa %ymm1, %ymm0 +; AVX512VL-NEXT: vpshufb %ymm2, %ymm1, %ymm0 {%k1} ; AVX512VL-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -1394,11 +1393,10 @@ define <32 x i8> @shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_ ; ; AVX512VL-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,0,u,1,u,2,u,3,u,4,u,5,u,6,u,7,u,24,u,25,u,26,u,27,u,28,u,29,u,30,u,31] +; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,u,1,u,2,u,3,u,4,u,5,u,6,u,7,u,24,u,25,u,26,u,27,u,28,u,29,u,30,u,31,u] ; AVX512VL-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA ; AVX512VL-NEXT: kmovd %eax, %k1 -; AVX512VL-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[0,u,1,u,2,u,3,u,4,u,5,u,6,u,7,u,24,u,25,u,26,u,27,u,28,u,29,u,30,u,31,u] -; AVX512VL-NEXT: vmovdqa %ymm1, %ymm0 +; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm1[u,0,u,1,u,2,u,3,u,4,u,5,u,6,u,7,u,24,u,25,u,26,u,27,u,28,u,29,u,30,u,31] ; AVX512VL-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -1424,11 +1422,10 @@ define <32 x i8> @shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_ ; ; AVX512VL-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,8,u,9,u,10,u,11,u,12,u,13,u,14,u,15,u,16,u,17,u,18,u,19,u,20,u,21,u,22,u,23] +; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,u,9,u,10,u,11,u,12,u,13,u,14,u,15,u,16,u,17,u,18,u,19,u,20,u,21,u,22,u,23,u] ; AVX512VL-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA ; AVX512VL-NEXT: kmovd %eax, %k1 -; AVX512VL-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[8,u,9,u,10,u,11,u,12,u,13,u,14,u,15,u,16,u,17,u,18,u,19,u,20,u,21,u,22,u,23,u] -; AVX512VL-NEXT: vmovdqa %ymm1, %ymm0 +; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm1[u,8,u,9,u,10,u,11,u,12,u,13,u,14,u,15,u,16,u,17,u,18,u,19,u,20,u,21,u,22,u,23] ; AVX512VL-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle @@ -1681,17 +1678,18 @@ define <32 x i8> @shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_ ; AVX512VL-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm1[2,3,0,1] -; AVX512VL-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,u,u,u,u,12,u,u,u,u,u,u,u,0,3,u,u,u,u,u,u,21,16,u,26,u,u,20,18,20,23] +; AVX512VL-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[10,13,u,u,3,3,u,8,u,u,u,12,1,u,u,u,u,u,20,u,17,22,u,u,16,u,27,u,u,u,u,u] ; AVX512VL-NEXT: movl $-222248896, %eax # imm = 0xF2C0C040 ; AVX512VL-NEXT: kmovd %eax, %k1 -; AVX512VL-NEXT: vpshufb {{.*#+}} ymm2 {%k1} = ymm1[10,13,u,u,3,3,u,8,u,u,u,12,1,u,u,u,u,u,20,u,17,22,u,u,16,u,27,u,u,u,u,u] -; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1] -; AVX512VL-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,1,6,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,23,u,u,u,u] +; AVX512VL-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm2[u,u,u,u,u,u,12,u,u,u,u,u,u,u,0,3,u,u,u,u,u,u,21,16,u,26,u,u,20,18,20,23] +; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm0[2,3,0,1] +; AVX512VL-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,u,u,u,u,u,u,1,6,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,23,u,u,u,u] ; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,12,13,u,u,u,u,u,u,u,u,u,12,u,u,20,19,u,19,u,u,u,u,u,u,u,u,u,u,u,u] -; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4,5],ymm1[6],ymm0[7] +; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm2[2],ymm0[3,4,5],ymm2[6],ymm0[7] ; AVX512VL-NEXT: movl $134948620, %eax # imm = 0x80B270C ; AVX512VL-NEXT: kmovd %eax, %k1 -; AVX512VL-NEXT: vmovdqu8 %ymm2, %ymm0 {%k1} +; AVX512VL-NEXT: vmovdqu8 %ymm0, %ymm1 {%k1} +; AVX512VL-NEXT: vmovdqa %ymm1, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %shuffle diff --git a/test/CodeGen/X86/vector-shuffle-512-v16.ll b/test/CodeGen/X86/vector-shuffle-512-v16.ll index 628cb935828..983360184fb 100644 --- a/test/CodeGen/X86/vector-shuffle-512-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-512-v16.ll @@ -254,7 +254,7 @@ define <16 x i32> @shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u(<16 x i32> %a ; ALL: # BB#0: ; ALL-NEXT: movw $8, %ax ; ALL-NEXT: kmovw %eax, %k1 -; ALL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} +; ALL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} ; ALL-NEXT: retq %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> ret <16 x i32> %c diff --git a/test/CodeGen/X86/vector-shuffle-512-v32.ll b/test/CodeGen/X86/vector-shuffle-512-v32.ll index 18041dac2c6..26cd7301fe6 100644 --- a/test/CodeGen/X86/vector-shuffle-512-v32.ll +++ b/test/CodeGen/X86/vector-shuffle-512-v32.ll @@ -110,10 +110,9 @@ define <32 x i16> @shuffle_v32i16_1_1_0_0_5_5_4_4_9_9_11_11_13_13_12_12_17_17_19 define <32 x i16> @shuffle_v32i16_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz(<32 x i16> %a) { ; ALL-LABEL: shuffle_v32i16_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz: ; ALL: # BB#0: -; ALL-NEXT: movl $1, %eax -; ALL-NEXT: kmovd %eax, %k0 -; ALL-NEXT: knotd %k0, %k1 -; ALL-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} +; ALL-NEXT: movl $1, %eax +; ALL-NEXT: kmovd %eax, %k1 +; ALL-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} ; ALL-NEXT: retq %shuffle = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> ret <32 x i16> %shuffle diff --git a/test/CodeGen/X86/vector-shuffle-512-v8.ll b/test/CodeGen/X86/vector-shuffle-512-v8.ll index 59a6629081f..26e08d365e0 100644 --- a/test/CodeGen/X86/vector-shuffle-512-v8.ll +++ b/test/CodeGen/X86/vector-shuffle-512-v8.ll @@ -1179,17 +1179,16 @@ define <8 x i64> @shuffle_v8i64_81a3c5e7(<8 x i64> %a, <8 x i64> %b) { ; ; AVX512F-LABEL: shuffle_v8i64_81a3c5e7: ; AVX512F: # BB#0: -; AVX512F-NEXT: movb $-86, %al -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} -; AVX512F-NEXT: retq -; +; AVX512F-NEXT: movb $-86, %al +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} +; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8i64_81a3c5e7: ; AVX512F-32: # BB#0: ; AVX512F-32-NEXT: movb $-86, %al ; AVX512F-32-NEXT: kmovw %eax, %k1 -; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} +; AVX512F-32-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> ret <8 x i64> %shuffle diff --git a/test/CodeGen/X86/vector-shuffle-avx512.ll b/test/CodeGen/X86/vector-shuffle-avx512.ll index 5cef69a0e22..4098d16d288 100644 --- a/test/CodeGen/X86/vector-shuffle-avx512.ll +++ b/test/CodeGen/X86/vector-shuffle-avx512.ll @@ -595,7 +595,7 @@ define <64 x i8> @test_mm512_mask_blend_epi8(<64 x i8> %A, <64 x i8> %W){ ; SKX64: # BB#0: # %entry ; SKX64-NEXT: movabsq $-6148914691236517206, %rax # imm = 0xAAAAAAAAAAAAAAAA ; SKX64-NEXT: kmovq %rax, %k1 -; SKX64-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1} +; SKX64-NEXT: vpblendmb %zmm0, %zmm1, %zmm0 {%k1} ; SKX64-NEXT: retq ; ; KNL64-LABEL: test_mm512_mask_blend_epi8: @@ -610,7 +610,7 @@ define <64 x i8> @test_mm512_mask_blend_epi8(<64 x i8> %A, <64 x i8> %W){ ; SKX32-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA ; SKX32-NEXT: kmovd %eax, %k0 ; SKX32-NEXT: kunpckdq %k0, %k0, %k1 -; SKX32-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1} +; SKX32-NEXT: vpblendmb %zmm0, %zmm1, %zmm0 {%k1} ; SKX32-NEXT: retl ; ; KNL32-LABEL: test_mm512_mask_blend_epi8: @@ -641,7 +641,7 @@ define <32 x i16> @test_mm512_mask_blend_epi16(<32 x i16> %A, <32 x i16> %W){ ; SKX64: # BB#0: # %entry ; SKX64-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA ; SKX64-NEXT: kmovd %eax, %k1 -; SKX64-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} +; SKX64-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1} ; SKX64-NEXT: retq ; ; KNL64-LABEL: test_mm512_mask_blend_epi16: @@ -654,7 +654,7 @@ define <32 x i16> @test_mm512_mask_blend_epi16(<32 x i16> %A, <32 x i16> %W){ ; SKX32: # BB#0: # %entry ; SKX32-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA ; SKX32-NEXT: kmovd %eax, %k1 -; SKX32-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} +; SKX32-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1} ; SKX32-NEXT: retl ; ; KNL32-LABEL: test_mm512_mask_blend_epi16: @@ -684,28 +684,28 @@ define <16 x i32> @test_mm512_mask_blend_epi32(<16 x i32> %A, <16 x i32> %W){ ; SKX64: # BB#0: # %entry ; SKX64-NEXT: movw $-21846, %ax # imm = 0xAAAA ; SKX64-NEXT: kmovw %eax, %k1 -; SKX64-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} +; SKX64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; SKX64-NEXT: retq ; ; KNL64-LABEL: test_mm512_mask_blend_epi32: ; KNL64: # BB#0: # %entry ; KNL64-NEXT: movw $-21846, %ax # imm = 0xAAAA ; KNL64-NEXT: kmovw %eax, %k1 -; KNL64-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} +; KNL64-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; KNL64-NEXT: retq ; ; SKX32-LABEL: test_mm512_mask_blend_epi32: ; SKX32: # BB#0: # %entry ; SKX32-NEXT: movw $-21846, %ax # imm = 0xAAAA ; SKX32-NEXT: kmovw %eax, %k1 -; SKX32-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} +; SKX32-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; SKX32-NEXT: retl ; ; KNL32-LABEL: test_mm512_mask_blend_epi32: ; KNL32: # BB#0: # %entry ; KNL32-NEXT: movw $-21846, %ax # imm = 0xAAAA ; KNL32-NEXT: kmovw %eax, %k1 -; KNL32-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} +; KNL32-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; KNL32-NEXT: retl entry: %0 = shufflevector <16 x i32> %A, <16 x i32> %W, <16 x i32> @@ -717,28 +717,28 @@ define <8 x i64> @test_mm512_mask_blend_epi64(<8 x i64> %A, <8 x i64> %W){ ; SKX64: # BB#0: # %entry ; SKX64-NEXT: movb $-86, %al ; SKX64-NEXT: kmovb %eax, %k1 -; SKX64-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} +; SKX64-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; SKX64-NEXT: retq ; ; KNL64-LABEL: test_mm512_mask_blend_epi64: ; KNL64: # BB#0: # %entry ; KNL64-NEXT: movb $-86, %al ; KNL64-NEXT: kmovw %eax, %k1 -; KNL64-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} +; KNL64-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; KNL64-NEXT: retq ; ; SKX32-LABEL: test_mm512_mask_blend_epi64: ; SKX32: # BB#0: # %entry ; SKX32-NEXT: movb $-86, %al ; SKX32-NEXT: kmovb %eax, %k1 -; SKX32-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} +; SKX32-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; SKX32-NEXT: retl ; ; KNL32-LABEL: test_mm512_mask_blend_epi64: ; KNL32: # BB#0: # %entry ; KNL32-NEXT: movb $-86, %al ; KNL32-NEXT: kmovw %eax, %k1 -; KNL32-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} +; KNL32-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; KNL32-NEXT: retl entry: %0 = shufflevector <8 x i64> %A, <8 x i64> %W, <8 x i32> @@ -750,28 +750,28 @@ define <16 x float> @test_mm512_mask_blend_ps(<16 x float> %A, <16 x float> %W){ ; SKX64: # BB#0: # %entry ; SKX64-NEXT: movw $-21846, %ax # imm = 0xAAAA ; SKX64-NEXT: kmovw %eax, %k1 -; SKX64-NEXT: vmovaps %zmm1, %zmm0 {%k1} +; SKX64-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ; SKX64-NEXT: retq ; ; KNL64-LABEL: test_mm512_mask_blend_ps: ; KNL64: # BB#0: # %entry ; KNL64-NEXT: movw $-21846, %ax # imm = 0xAAAA ; KNL64-NEXT: kmovw %eax, %k1 -; KNL64-NEXT: vmovaps %zmm1, %zmm0 {%k1} +; KNL64-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ; KNL64-NEXT: retq ; ; SKX32-LABEL: test_mm512_mask_blend_ps: ; SKX32: # BB#0: # %entry ; SKX32-NEXT: movw $-21846, %ax # imm = 0xAAAA ; SKX32-NEXT: kmovw %eax, %k1 -; SKX32-NEXT: vmovaps %zmm1, %zmm0 {%k1} +; SKX32-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ; SKX32-NEXT: retl ; ; KNL32-LABEL: test_mm512_mask_blend_ps: ; KNL32: # BB#0: # %entry ; KNL32-NEXT: movw $-21846, %ax # imm = 0xAAAA ; KNL32-NEXT: kmovw %eax, %k1 -; KNL32-NEXT: vmovaps %zmm1, %zmm0 {%k1} +; KNL32-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ; KNL32-NEXT: retl entry: %0 = shufflevector <16 x float> %A, <16 x float> %W, <16 x i32> @@ -783,28 +783,28 @@ define <8 x double> @test_mm512_mask_blend_pd(<8 x double> %A, <8 x double> %W){ ; SKX64: # BB#0: # %entry ; SKX64-NEXT: movb $-88, %al ; SKX64-NEXT: kmovb %eax, %k1 -; SKX64-NEXT: vmovapd %zmm1, %zmm0 {%k1} +; SKX64-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; SKX64-NEXT: retq ; ; KNL64-LABEL: test_mm512_mask_blend_pd: ; KNL64: # BB#0: # %entry ; KNL64-NEXT: movb $-88, %al ; KNL64-NEXT: kmovw %eax, %k1 -; KNL64-NEXT: vmovapd %zmm1, %zmm0 {%k1} +; KNL64-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; KNL64-NEXT: retq ; ; SKX32-LABEL: test_mm512_mask_blend_pd: ; SKX32: # BB#0: # %entry ; SKX32-NEXT: movb $-88, %al ; SKX32-NEXT: kmovb %eax, %k1 -; SKX32-NEXT: vmovapd %zmm1, %zmm0 {%k1} +; SKX32-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; SKX32-NEXT: retl ; ; KNL32-LABEL: test_mm512_mask_blend_pd: ; KNL32: # BB#0: # %entry ; KNL32-NEXT: movb $-88, %al ; KNL32-NEXT: kmovw %eax, %k1 -; KNL32-NEXT: vmovapd %zmm1, %zmm0 {%k1} +; KNL32-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; KNL32-NEXT: retl entry: %0 = shufflevector <8 x double> %A, <8 x double> %W, <8 x i32> @@ -817,7 +817,7 @@ define <32 x i8> @test_mm256_mask_blend_epi8(<32 x i8> %A, <32 x i8> %W){ ; SKX64: # BB#0: # %entry ; SKX64-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA ; SKX64-NEXT: kmovd %eax, %k1 -; SKX64-NEXT: vmovdqu8 %ymm1, %ymm0 {%k1} +; SKX64-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} ; SKX64-NEXT: retq ; ; KNL64-LABEL: test_mm256_mask_blend_epi8: @@ -830,7 +830,7 @@ define <32 x i8> @test_mm256_mask_blend_epi8(<32 x i8> %A, <32 x i8> %W){ ; SKX32: # BB#0: # %entry ; SKX32-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA ; SKX32-NEXT: kmovd %eax, %k1 -; SKX32-NEXT: vmovdqu8 %ymm1, %ymm0 {%k1} +; SKX32-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} ; SKX32-NEXT: retl ; ; KNL32-LABEL: test_mm256_mask_blend_epi8: @@ -848,7 +848,7 @@ define <16 x i8> @test_mm_mask_blend_epi8(<16 x i8> %A, <16 x i8> %W){ ; SKX64: # BB#0: # %entry ; SKX64-NEXT: movw $-21846, %ax # imm = 0xAAAA ; SKX64-NEXT: kmovw %eax, %k1 -; SKX64-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} +; SKX64-NEXT: vpblendmb %xmm0, %xmm1, %xmm0 {%k1} ; SKX64-NEXT: retq ; ; KNL64-LABEL: test_mm_mask_blend_epi8: @@ -861,7 +861,7 @@ define <16 x i8> @test_mm_mask_blend_epi8(<16 x i8> %A, <16 x i8> %W){ ; SKX32: # BB#0: # %entry ; SKX32-NEXT: movw $-21846, %ax # imm = 0xAAAA ; SKX32-NEXT: kmovw %eax, %k1 -; SKX32-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} +; SKX32-NEXT: vpblendmb %xmm0, %xmm1, %xmm0 {%k1} ; SKX32-NEXT: retl ; ; KNL32-LABEL: test_mm_mask_blend_epi8: -- 2.11.0