From 9a553e994e8feb9317382a640b49985a1a7ad58a Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 10 Oct 2017 06:36:46 +0000 Subject: [PATCH] [AVX512] Add patterns to commute integer comparison instructions during isel. This enables broadcast loads to be commuted and allows normal loads to be folded without the peephole pass. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315274 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 41 +++++++ test/CodeGen/X86/avx512vl-vec-masked-cmp.ll | 160 ++++++++++------------------ 2 files changed, 95 insertions(+), 106 deletions(-) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 5fd8694eb3b..aeb08676bf9 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1998,6 +1998,24 @@ defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm, avx512vl_i64_info, HasAVX512>, T8PD, VEX_W, EVEX_CD8<64, CD8VF>; +// Transforms to swizzle an immediate to help matching memory operand in first +// operand. +def CommutePCMPCC : SDNodeXFormgetZExtValue() & 0x7; + switch (Imm) { + default: llvm_unreachable("Unreachable!"); + case 0x01: Imm = 0x06; break; // LT -> NLE + case 0x02: Imm = 0x05; break; // LE -> NLT + case 0x05: Imm = 0x02; break; // NLT -> LE + case 0x06: Imm = 0x01; break; // NLE -> LT + case 0x00: // EQ + case 0x03: // FALSE + case 0x04: // NE + case 0x07: // TRUE + break; + } + return getI8Imm(Imm, SDLoc(N)); +}]>; multiclass avx512_icmp_cc opc, string Suffix, SDNode OpNode, X86VectorVTInfo _> { @@ -2069,6 +2087,17 @@ multiclass avx512_icmp_cc opc, string Suffix, SDNode OpNode, "$dst {${mask}}, $src1, $src2, $cc}"), [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K; } + + def : Pat<(OpNode (bitconvert (_.LdFrag addr:$src2)), + (_.VT _.RC:$src1), imm:$cc), + (!cast(NAME#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, + (CommutePCMPCC imm:$cc))>; + + def : Pat<(and _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src2)), + (_.VT _.RC:$src1), imm:$cc)), + (!cast(NAME#_.ZSuffix#"rmik") _.KRCWM:$mask, + _.RC:$src1, addr:$src2, + (CommutePCMPCC imm:$cc))>; } multiclass avx512_icmp_cc_rmb opc, string Suffix, SDNode OpNode, @@ -2113,6 +2142,18 @@ multiclass avx512_icmp_cc_rmb opc, string Suffix, SDNode OpNode, "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B; } + + def : Pat<(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src2)), + (_.VT _.RC:$src1), imm:$cc), + (!cast(NAME#_.ZSuffix#"rmib") _.RC:$src1, addr:$src2, + (CommutePCMPCC imm:$cc))>; + + def : Pat<(and _.KRCWM:$mask, (OpNode (X86VBroadcast + (_.ScalarLdFrag addr:$src2)), + (_.VT _.RC:$src1), imm:$cc)), + (!cast(NAME#_.ZSuffix#"rmibk") _.KRCWM:$mask, + _.RC:$src1, addr:$src2, + (CommutePCMPCC imm:$cc))>; } multiclass avx512_icmp_cc_vl opc, string Suffix, SDNode OpNode, diff --git a/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll b/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll index f56b1671b4f..5ee06fde127 100644 --- a/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll +++ b/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=VLX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=NoVLX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -disable-peephole -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=VLX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -disable-peephole -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=NoVLX define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask: @@ -29317,8 +29317,7 @@ entry: define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 +; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq @@ -29379,9 +29378,8 @@ entry: define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; VLX-NEXT: kmovd %edi, %k1 -; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1} +; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq @@ -29735,8 +29733,7 @@ entry: define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 +; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq @@ -29796,9 +29793,8 @@ entry: define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; VLX-NEXT: kmovd %edi, %k1 -; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1} +; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq @@ -30095,8 +30091,7 @@ entry: define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 +; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; @@ -30142,9 +30137,8 @@ entry: define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; VLX-NEXT: kmovd %edi, %k1 -; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1} +; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; @@ -30451,8 +30445,7 @@ entry: define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 +; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; @@ -30504,9 +30497,8 @@ entry: define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; VLX-NEXT: kmovd %edi, %k1 -; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1} +; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; @@ -30705,8 +30697,7 @@ entry: define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 +; VLX-NEXT: vpcmpnltd (%rdi){1to8}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper @@ -30737,9 +30728,8 @@ entry: define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; VLX-NEXT: kmovd %edi, %k1 -; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1} +; VLX-NEXT: vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper @@ -31077,8 +31067,7 @@ entry: define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 +; VLX-NEXT: vpcmpnltd (%rdi){1to8}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq @@ -31153,9 +31142,8 @@ entry: define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; VLX-NEXT: kmovd %edi, %k1 -; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1} +; VLX-NEXT: vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq @@ -31557,8 +31545,7 @@ entry: define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 +; VLX-NEXT: vpcmpnltd (%rdi){1to8}, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq @@ -31638,9 +31625,8 @@ entry: define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; VLX-NEXT: kmovd %edi, %k1 -; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1} +; VLX-NEXT: vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq @@ -32199,8 +32185,7 @@ entry: define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastd (%rdi), %zmm1 -; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; VLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq @@ -32224,8 +32209,7 @@ define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* % ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .cfi_offset %r15, -24 -; NoVLX-NEXT: vpbroadcastd (%rdi), %zmm1 -; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 @@ -32319,9 +32303,8 @@ entry: define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastd (%rsi), %zmm1 ; VLX-NEXT: kmovd %edi, %k1 -; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} +; VLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq @@ -32345,9 +32328,8 @@ define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b(i16 zeroext %__ ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .cfi_offset %r15, -24 -; NoVLX-NEXT: vpbroadcastd (%rsi), %zmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 @@ -32939,8 +32921,7 @@ entry: define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastd (%rdi), %zmm1 -; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; VLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq @@ -32964,8 +32945,7 @@ define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* % ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .cfi_offset %r15, -24 -; NoVLX-NEXT: vpbroadcastd (%rdi), %zmm1 -; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 +; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -33064,9 +33044,8 @@ entry: define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastd (%rsi), %zmm1 ; VLX-NEXT: kmovd %edi, %k1 -; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} +; VLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq @@ -33090,9 +33069,8 @@ define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b(i16 zeroext %__ ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .cfi_offset %r15, -24 -; NoVLX-NEXT: vpbroadcastd (%rsi), %zmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -33343,8 +33321,7 @@ entry: define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 +; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; VLX-NEXT: retq @@ -33377,9 +33354,8 @@ entry: define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; VLX-NEXT: kmovd %edi, %k1 -; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1} +; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; VLX-NEXT: retq @@ -33621,8 +33597,7 @@ entry: define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 +; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq @@ -33667,9 +33642,8 @@ entry: define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; VLX-NEXT: kmovd %edi, %k1 -; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1} +; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq @@ -33919,8 +33893,7 @@ entry: define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 +; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq @@ -33964,9 +33937,8 @@ entry: define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; VLX-NEXT: kmovd %edi, %k1 -; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1} +; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq @@ -34223,8 +34195,7 @@ entry: define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 +; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; @@ -34270,9 +34241,8 @@ entry: define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; VLX-NEXT: kmovd %edi, %k1 -; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1} +; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; @@ -34555,8 +34525,7 @@ entry: define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 +; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; @@ -34608,9 +34577,8 @@ entry: define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; VLX-NEXT: kmovd %edi, %k1 -; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1} +; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; @@ -34963,8 +34931,7 @@ entry: define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 +; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: vzeroupper @@ -35027,9 +34994,8 @@ entry: define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; VLX-NEXT: kmovd %edi, %k1 -; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1} +; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: vzeroupper @@ -35399,8 +35365,7 @@ entry: define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 +; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper @@ -35462,9 +35427,8 @@ entry: define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; VLX-NEXT: kmovd %edi, %k1 -; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1} +; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper @@ -35777,8 +35741,7 @@ entry: define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 +; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq @@ -35826,9 +35789,8 @@ entry: define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; VLX-NEXT: kmovd %edi, %k1 -; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1} +; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq @@ -36151,8 +36113,7 @@ entry: define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 +; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq @@ -36206,9 +36167,8 @@ entry: define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; VLX-NEXT: kmovd %edi, %k1 -; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1} +; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq @@ -36395,8 +36355,7 @@ entry: define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastq (%rdi), %zmm1 -; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; VLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper @@ -36404,8 +36363,7 @@ define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %_ ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry -; NoVLX-NEXT: vpbroadcastq (%rdi), %zmm1 -; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -36424,9 +36382,8 @@ entry: define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastq (%rsi), %zmm1 ; VLX-NEXT: kmovd %edi, %k1 -; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; VLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper @@ -36434,9 +36391,8 @@ define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry -; NoVLX-NEXT: vpbroadcastq (%rsi), %zmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: vzeroupper @@ -36753,8 +36709,7 @@ entry: define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastq (%rdi), %zmm1 -; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; VLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq @@ -36768,8 +36723,7 @@ define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %_ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpbroadcastq (%rdi), %zmm1 -; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 @@ -36828,9 +36782,8 @@ entry: define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastq (%rsi), %zmm1 ; VLX-NEXT: kmovd %edi, %k1 -; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; VLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq @@ -36844,9 +36797,8 @@ define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp -; NoVLX-NEXT: vpbroadcastq (%rsi), %zmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 @@ -37223,8 +37175,7 @@ entry: define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastq (%rdi), %zmm1 -; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; VLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq @@ -37238,8 +37189,7 @@ define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %_ ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpbroadcastq (%rdi), %zmm1 -; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 +; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) @@ -37303,9 +37253,8 @@ entry: define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry -; VLX-NEXT: vpbroadcastq (%rsi), %zmm1 ; VLX-NEXT: kmovd %edi, %k1 -; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; VLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq @@ -37319,9 +37268,8 @@ define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: vpbroadcastq (%rsi), %zmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) -- 2.11.0