def : Pat<(i1 (trunc (i32 GR32:$src))),
(COPY_TO_REGCLASS (KMOVWkr (AND32ri8 $src, (i32 1))), VK1)>;
+ def : Pat<(i1 (trunc (i32 (assertzext_i1 GR32:$src)))),
+ (COPY_TO_REGCLASS GR32:$src, VK1)>;
+
def : Pat<(i1 (trunc (i8 GR8:$src))),
(COPY_TO_REGCLASS
(KMOVWkr (AND32ri8 (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit), (i32 1))),
VK1)>;
+
def : Pat<(i1 (trunc (i16 GR16:$src))),
(COPY_TO_REGCLASS
(KMOVWkr (AND32ri8 (SUBREG_TO_REG (i32 0), $src, sub_16bit), (i32 1))),
def : Pat<(i32 (zext VK1:$src)),
(AND32ri8 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>;
+
def : Pat<(i32 (anyext VK1:$src)),
- (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16))>;
+ (COPY_TO_REGCLASS VK1:$src, GR32)>;
def : Pat<(i8 (zext VK1:$src)),
(EXTRACT_SUBREG
(AND32ri8 (KMOVWrk
(COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>;
+
def : Pat<(i8 (anyext VK1:$src)),
- (EXTRACT_SUBREG
- (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_8bit)>;
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_8bit)>;
def : Pat<(i64 (zext VK1:$src)),
(AND64ri8 (SUBREG_TO_REG (i64 0),
(KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>;
+
def : Pat<(i64 (anyext VK1:$src)),
(SUBREG_TO_REG (i64 0),
- (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit)>;
+ (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_32bit)>;
def : Pat<(i16 (zext VK1:$src)),
(EXTRACT_SUBREG
(AND32ri8 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)),
sub_16bit)>;
+
def : Pat<(i16 (anyext VK1:$src)),
- (EXTRACT_SUBREG
- (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)),
- sub_16bit)>;
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_16bit)>;
}
def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
(COPY_TO_REGCLASS VK1:$src, VK16)>;
(X86mtruncstore node:$src1, node:$src2, node:$src3), [{
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
}]>;
+
+def assertzext_i1 :
+ PatFrag<(ops node:$src), (assertzext node:$src), [{
+ return cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i1;
+}]>;
\ No newline at end of file
define i64 @func2(i1 zeroext %i, i32 %j) {
; CHECK-LABEL: func2:
; CHECK: # BB#0: # %entry
+; CHECK-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; CHECK-NEXT: testl %esi, %esi
; CHECK-NEXT: je .LBB1_1
; CHECK-NEXT: # BB#2: # %if.then
; CHECK-NEXT: jmp bar # TAILCALL
; CHECK-NEXT: .LBB1_1: # %return
-; CHECK-NEXT: andl $1, %edi
-; CHECK-NEXT: kmovw %edi, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: orq $-2, %rax
+; CHECK-NEXT: orq $-2, %rdi
+; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: retq
entry:
%tobool = icmp eq i32 %j, 0
;
; AVX512-LABEL: select_cmov_i16:
; AVX512: ## BB#0:
-; AVX512-NEXT: andl $1, %edi
; AVX512-NEXT: kmovw %edi, %k0
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: cmovew %dx, %si
;
; AVX512-LABEL: select_cmov_i32:
; AVX512: ## BB#0:
-; AVX512-NEXT: andl $1, %edi
; AVX512-NEXT: kmovw %edi, %k0
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: cmovel %edx, %esi
;
; AVX512-LABEL: select_cmov_i64:
; AVX512: ## BB#0:
-; AVX512-NEXT: andl $1, %edi
; AVX512-NEXT: kmovw %edi, %k0
; AVX512-NEXT: kortestw %k0, %k0
; AVX512-NEXT: cmoveq %rdx, %rsi
-; RUN: llc -mattr=+avx512f < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
+; RUN: llc < %s -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
+
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Note that the kmovs should really *not* appear in the output, this is an
; artifact of the current poor lowering. This is tracked by PR28175.
-; CHECK-LABEL: @foo64
-; CHECK: kmov
-; CHECK: kmov
-; CHECK: orq $-2, %rax
-; CHECK: ret
-define i64 @foo64(i1 zeroext %i, i32 %j) #0 {
+define i64 @foo64(i1 zeroext %i) #0 {
+; CHECK-LABEL: foo64:
+; CHECK: # BB#0:
+; CHECK-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
+; CHECK-NEXT: orq $-2, %rdi
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: retq
br label %bb
bb:
ret i64 %v
}
-; CHECK-LABEL: @foo16
-; CHECK: kmov
-; CHECK: kmov
-; CHECK: orl $65534, %eax
-; CHECK: retq
-define i16 @foo16(i1 zeroext %i, i32 %j) #0 {
+define i16 @foo16(i1 zeroext %i) #0 {
+; CHECK-LABEL: foo16:
+; CHECK: # BB#0:
+; CHECK-NEXT: orl $65534, %edi # imm = 0xFFFE
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: retq
br label %bb
bb:
end:
ret i16 %v
}
+
+; This code is still not optimal
+define i16 @foo16_1(i1 zeroext %i, i32 %j) #0 {
+; KNL-LABEL: foo16_1:
+; KNL: # BB#0:
+; KNL-NEXT: kmovw %edi, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: orl $2, %eax
+; KNL-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; KNL-NEXT: retq
+;
+; SKX-LABEL: foo16_1:
+; SKX: # BB#0:
+; SKX-NEXT: kmovd %edi, %k0
+; SKX-NEXT: kmovw %k0, %eax
+; SKX-NEXT: andl $1, %eax
+; SKX-NEXT: orl $2, %eax
+; SKX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; SKX-NEXT: retq
+ br label %bb
+
+bb:
+ %z = zext i1 %i to i16
+ %v = or i16 %z, 2
+ br label %end
+
+end:
+ ret i16 %v
+}
+
+define i32 @foo32(i1 zeroext %i) #0 {
+; CHECK-LABEL: foo32:
+; CHECK: # BB#0:
+; CHECK-NEXT: orl $-2, %edi
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: retq
+ br label %bb
+
+bb:
+ %z = zext i1 %i to i32
+ %v = or i32 %z, -2
+ br label %end
+
+end:
+ ret i32 %v
+}
+
+define i8 @foo8(i1 zeroext %i) #0 {
+; CHECK-LABEL: foo8:
+; CHECK: # BB#0:
+; CHECK-NEXT: orb $-2, %dil
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: retq
+ br label %bb
+
+bb:
+ %z = zext i1 %i to i8
+ %v = or i8 %z, -2
+ br label %end
+
+end:
+ ret i8 %v
+}
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-darwin-unknown < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SDAG
; RUN: llc -mtriple=x86_64-darwin-unknown -fast-isel -fast-isel-abort=1 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FAST
; RUN: llc -mtriple=x86_64-darwin-unknown -mcpu=knl < %s | FileCheck %s --check-prefix=KNL
; KNL-LABEL: bug27873:
; KNL: ## BB#0:
; KNL-NEXT: andl $1, %esi
+; KNL-NEXT: kmovw %esi, %k0
; KNL-NEXT: movl $160, %ecx
; KNL-NEXT: movq %rdi, %rax
; KNL-NEXT: mulq %rcx
-; KNL-NEXT: kmovw %esi, %k0
; KNL-NEXT: seto %al
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: # kill
+; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; KNL-NEXT: retq
%mul = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %c1, i64 160)
%mul.overflow = extractvalue { i64, i1 } %mul, 1