}
let Predicates = [HasBMI2] in {
- def : Pat<(and GR32:$src, (add (shl 1, GR8:$lz), -1)),
- (BZHI32rr GR32:$src,
- (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
-
- def : Pat<(and (loadi32 addr:$src), (add (shl 1, GR8:$lz), -1)),
- (BZHI32rm addr:$src,
- (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
+ multiclass _bmi_bzhi_pattern<dag regpattern, dag mempattern, RegisterClass RC,
+ ValueType VT, Instruction DstInst,
+ Instruction DstMemInst> {
+ def : Pat<regpattern,
+ (DstInst RC:$src,
+ (INSERT_SUBREG (VT (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
+ def : Pat<mempattern,
+ (DstMemInst addr:$src,
+ (INSERT_SUBREG (VT (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
+ }
- def : Pat<(and GR64:$src, (add (shl 1, GR8:$lz), -1)),
- (BZHI64rr GR64:$src,
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
+ multiclass bmi_bzhi_patterns<RegisterClass RC, int bitwidth, ValueType VT,
+ Instruction DstInst, X86MemOperand x86memop,
+ Instruction DstMemInst> {
+ // x & ((1 << y) - 1)
+ defm : _bmi_bzhi_pattern<(and RC:$src, (add (shl 1, GR8:$lz), -1)),
+ (and (x86memop addr:$src),
+ (add (shl 1, GR8:$lz), -1)),
+ RC, VT, DstInst, DstMemInst>;
+
+ // x & ~(-1 << y)
+ defm : _bmi_bzhi_pattern<(and RC:$src, (xor (shl -1, GR8:$lz), -1)),
+ (and (x86memop addr:$src),
+ (xor (shl -1, GR8:$lz), -1)),
+ RC, VT, DstInst, DstMemInst>;
+
+ // x & (-1 >> (bitwidth - y))
+ defm : _bmi_bzhi_pattern<(and RC:$src, (srl -1, (sub bitwidth, GR8:$lz))),
+ (and (x86memop addr:$src),
+ (srl -1, (sub bitwidth, GR8:$lz))),
+ RC, VT, DstInst, DstMemInst>;
+
+ // x << (bitwidth - y) >> (bitwidth - y)
+ defm : _bmi_bzhi_pattern<(srl (shl RC:$src, (sub bitwidth, GR8:$lz)),
+ (sub bitwidth, GR8:$lz)),
+ (srl (shl (x86memop addr:$src),
+ (sub bitwidth, GR8:$lz)),
+ (sub bitwidth, GR8:$lz)),
+ RC, VT, DstInst, DstMemInst>;
+ }
- def : Pat<(and (loadi64 addr:$src), (add (shl 1, GR8:$lz), -1)),
- (BZHI64rm addr:$src,
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
+ defm : bmi_bzhi_patterns<GR32, 32, i32, BZHI32rr, loadi32, BZHI32rm>;
+ defm : bmi_bzhi_patterns<GR64, 64, i64, BZHI64rr, loadi64, BZHI64rm>;
// x & (-1 >> (32 - y))
def : Pat<(and GR32:$src, (srl -1, (i8 (trunc (sub 32, GR32:$lz))))),
; X86-BMI1BMI2-LABEL: bzhi32_b0:
; X86-BMI1BMI2: # %bb.0:
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT: movl $-1, %ecx
-; X86-BMI1BMI2-NEXT: shlxl %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI1BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi32_b0:
;
; X64-BMI1BMI2-LABEL: bzhi32_b0:
; X64-BMI1BMI2: # %bb.0:
-; X64-BMI1BMI2-NEXT: movl $-1, %eax
-; X64-BMI1BMI2-NEXT: shlxl %esi, %eax, %eax
-; X64-BMI1BMI2-NEXT: andnl %edi, %eax, %eax
+; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax
; X64-BMI1BMI2-NEXT: retq
%notmask = shl i32 -1, %numlowbits
%mask = xor i32 %notmask, -1
; X86-BMI1BMI2-LABEL: bzhi32_b1_indexzext:
; X86-BMI1BMI2: # %bb.0:
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT: movl $-1, %ecx
-; X86-BMI1BMI2-NEXT: shlxl %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI1BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi32_b1_indexzext:
;
; X64-BMI1BMI2-LABEL: bzhi32_b1_indexzext:
; X64-BMI1BMI2: # %bb.0:
-; X64-BMI1BMI2-NEXT: movl $-1, %eax
-; X64-BMI1BMI2-NEXT: shlxl %esi, %eax, %eax
-; X64-BMI1BMI2-NEXT: andnl %edi, %eax, %eax
+; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax
; X64-BMI1BMI2-NEXT: retq
%conv = zext i8 %numlowbits to i32
%notmask = shl i32 -1, %conv
; X86-BMI1BMI2: # %bb.0:
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %edx
-; X86-BMI1BMI2-NEXT: shlxl %ecx, %edx, %ecx
-; X86-BMI1BMI2-NEXT: andnl (%eax), %ecx, %eax
+; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax
; X86-BMI1BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi32_b2_load:
;
; X64-BMI1BMI2-LABEL: bzhi32_b2_load:
; X64-BMI1BMI2: # %bb.0:
-; X64-BMI1BMI2-NEXT: movl $-1, %eax
-; X64-BMI1BMI2-NEXT: shlxl %esi, %eax, %eax
-; X64-BMI1BMI2-NEXT: andnl (%rdi), %eax, %eax
+; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax
; X64-BMI1BMI2-NEXT: retq
%val = load i32, i32* %w
%notmask = shl i32 -1, %numlowbits
; X86-BMI1BMI2: # %bb.0:
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: movl $-1, %edx
-; X86-BMI1BMI2-NEXT: shlxl %ecx, %edx, %ecx
-; X86-BMI1BMI2-NEXT: andnl (%eax), %ecx, %eax
+; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax
; X86-BMI1BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi32_b3_load_indexzext:
;
; X64-BMI1BMI2-LABEL: bzhi32_b3_load_indexzext:
; X64-BMI1BMI2: # %bb.0:
-; X64-BMI1BMI2-NEXT: movl $-1, %eax
-; X64-BMI1BMI2-NEXT: shlxl %esi, %eax, %eax
-; X64-BMI1BMI2-NEXT: andnl (%rdi), %eax, %eax
+; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax
; X64-BMI1BMI2-NEXT: retq
%val = load i32, i32* %w
%conv = zext i8 %numlowbits to i32
; X86-BMI1BMI2-LABEL: bzhi32_b4_commutative:
; X86-BMI1BMI2: # %bb.0:
; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT: movl $-1, %ecx
-; X86-BMI1BMI2-NEXT: shlxl %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI1BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi32_b4_commutative:
;
; X64-BMI1BMI2-LABEL: bzhi32_b4_commutative:
; X64-BMI1BMI2: # %bb.0:
-; X64-BMI1BMI2-NEXT: movl $-1, %eax
-; X64-BMI1BMI2-NEXT: shlxl %esi, %eax, %eax
-; X64-BMI1BMI2-NEXT: andnl %edi, %eax, %eax
+; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax
; X64-BMI1BMI2-NEXT: retq
%notmask = shl i32 -1, %numlowbits
%mask = xor i32 %notmask, -1
;
; X64-BMI1BMI2-LABEL: bzhi64_b0:
; X64-BMI1BMI2: # %bb.0:
-; X64-BMI1BMI2-NEXT: movq $-1, %rax
-; X64-BMI1BMI2-NEXT: shlxq %rsi, %rax, %rax
-; X64-BMI1BMI2-NEXT: andnq %rdi, %rax, %rax
+; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax
; X64-BMI1BMI2-NEXT: retq
%notmask = shl i64 -1, %numlowbits
%mask = xor i64 %notmask, -1
; X64-BMI1BMI2-LABEL: bzhi64_b1_indexzext:
; X64-BMI1BMI2: # %bb.0:
; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi
-; X64-BMI1BMI2-NEXT: movq $-1, %rax
-; X64-BMI1BMI2-NEXT: shlxq %rsi, %rax, %rax
-; X64-BMI1BMI2-NEXT: andnq %rdi, %rax, %rax
+; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax
; X64-BMI1BMI2-NEXT: retq
%conv = zext i8 %numlowbits to i64
%notmask = shl i64 -1, %conv
;
; X64-BMI1BMI2-LABEL: bzhi64_b2_load:
; X64-BMI1BMI2: # %bb.0:
-; X64-BMI1BMI2-NEXT: movq $-1, %rax
-; X64-BMI1BMI2-NEXT: shlxq %rsi, %rax, %rax
-; X64-BMI1BMI2-NEXT: andnq (%rdi), %rax, %rax
+; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax
; X64-BMI1BMI2-NEXT: retq
%val = load i64, i64* %w
%notmask = shl i64 -1, %numlowbits
; X64-BMI1BMI2-LABEL: bzhi64_b3_load_indexzext:
; X64-BMI1BMI2: # %bb.0:
; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi
-; X64-BMI1BMI2-NEXT: movq $-1, %rax
-; X64-BMI1BMI2-NEXT: shlxq %rsi, %rax, %rax
-; X64-BMI1BMI2-NEXT: andnq (%rdi), %rax, %rax
+; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax
; X64-BMI1BMI2-NEXT: retq
%val = load i64, i64* %w
%conv = zext i8 %numlowbits to i64
;
; X64-BMI1BMI2-LABEL: bzhi64_b4_commutative:
; X64-BMI1BMI2: # %bb.0:
-; X64-BMI1BMI2-NEXT: movq $-1, %rax
-; X64-BMI1BMI2-NEXT: shlxq %rsi, %rax, %rax
-; X64-BMI1BMI2-NEXT: andnq %rdi, %rax, %rax
+; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax
; X64-BMI1BMI2-NEXT: retq
%notmask = shl i64 -1, %numlowbits
%mask = xor i64 %notmask, -1
;
; X86-BMI1BMI2-LABEL: bzhi32_c1_indexzext:
; X86-BMI1BMI2: # %bb.0:
-; X86-BMI1BMI2-NEXT: movb $32, %al
-; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT: movl $-1, %ecx
-; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax
-; X86-BMI1BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI1BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi32_c1_indexzext:
;
; X64-BMI1BMI2-LABEL: bzhi32_c1_indexzext:
; X64-BMI1BMI2: # %bb.0:
-; X64-BMI1BMI2-NEXT: movb $32, %al
-; X64-BMI1BMI2-NEXT: subb %sil, %al
-; X64-BMI1BMI2-NEXT: movl $-1, %ecx
-; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax
-; X64-BMI1BMI2-NEXT: andl %edi, %eax
+; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax
; X64-BMI1BMI2-NEXT: retq
%numhighbits = sub i8 32, %numlowbits
%sh_prom = zext i8 %numhighbits to i32
;
; X86-BMI1BMI2-LABEL: bzhi32_c3_load_indexzext:
; X86-BMI1BMI2: # %bb.0:
-; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT: movb $32, %al
-; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT: movl $-1, %edx
-; X86-BMI1BMI2-NEXT: shrxl %eax, %edx, %eax
-; X86-BMI1BMI2-NEXT: andl (%ecx), %eax
+; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax
; X86-BMI1BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi32_c3_load_indexzext:
;
; X64-BMI1BMI2-LABEL: bzhi32_c3_load_indexzext:
; X64-BMI1BMI2: # %bb.0:
-; X64-BMI1BMI2-NEXT: movb $32, %al
-; X64-BMI1BMI2-NEXT: subb %sil, %al
-; X64-BMI1BMI2-NEXT: movl $-1, %ecx
-; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax
-; X64-BMI1BMI2-NEXT: andl (%rdi), %eax
+; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax
; X64-BMI1BMI2-NEXT: retq
%val = load i32, i32* %w
%numhighbits = sub i8 32, %numlowbits
;
; X64-BMI1BMI2-LABEL: bzhi64_c1_indexzext:
; X64-BMI1BMI2: # %bb.0:
-; X64-BMI1BMI2-NEXT: movb $64, %al
-; X64-BMI1BMI2-NEXT: subb %sil, %al
-; X64-BMI1BMI2-NEXT: movq $-1, %rcx
-; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rax
-; X64-BMI1BMI2-NEXT: andq %rdi, %rax
+; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax
; X64-BMI1BMI2-NEXT: retq
%numhighbits = sub i8 64, %numlowbits
%sh_prom = zext i8 %numhighbits to i64
;
; X64-BMI1BMI2-LABEL: bzhi64_c3_load_indexzext:
; X64-BMI1BMI2: # %bb.0:
-; X64-BMI1BMI2-NEXT: movb $64, %al
-; X64-BMI1BMI2-NEXT: subb %sil, %al
-; X64-BMI1BMI2-NEXT: movq $-1, %rcx
-; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rax
-; X64-BMI1BMI2-NEXT: andq (%rdi), %rax
+; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax
; X64-BMI1BMI2-NEXT: retq
%val = load i64, i64* %w
%numhighbits = sub i8 64, %numlowbits
;
; X86-BMI1BMI2-LABEL: bzhi32_d1_indexzext:
; X86-BMI1BMI2: # %bb.0:
-; X86-BMI1BMI2-NEXT: movb $32, %al
-; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %al
-; X86-BMI1BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %ecx
-; X86-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-BMI1BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax
; X86-BMI1BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi32_d1_indexzext:
;
; X64-BMI1BMI2-LABEL: bzhi32_d1_indexzext:
; X64-BMI1BMI2: # %bb.0:
-; X64-BMI1BMI2-NEXT: movb $32, %al
-; X64-BMI1BMI2-NEXT: subb %sil, %al
-; X64-BMI1BMI2-NEXT: shlxl %eax, %edi, %ecx
-; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax
+; X64-BMI1BMI2-NEXT: bzhil %esi, %edi, %eax
; X64-BMI1BMI2-NEXT: retq
%numhighbits = sub i8 32, %numlowbits
%sh_prom = zext i8 %numhighbits to i32
; X86-BMI1BMI2-LABEL: bzhi32_d3_load_indexzext:
; X86-BMI1BMI2: # %bb.0:
; X86-BMI1BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-BMI1BMI2-NEXT: movb $32, %cl
-; X86-BMI1BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl
-; X86-BMI1BMI2-NEXT: shlxl %ecx, (%eax), %eax
-; X86-BMI1BMI2-NEXT: shrxl %ecx, %eax, %eax
+; X86-BMI1BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-BMI1BMI2-NEXT: bzhil %ecx, (%eax), %eax
; X86-BMI1BMI2-NEXT: retl
;
; X64-NOBMI-LABEL: bzhi32_d3_load_indexzext:
;
; X64-BMI1BMI2-LABEL: bzhi32_d3_load_indexzext:
; X64-BMI1BMI2: # %bb.0:
-; X64-BMI1BMI2-NEXT: movb $32, %al
-; X64-BMI1BMI2-NEXT: subb %sil, %al
-; X64-BMI1BMI2-NEXT: shlxl %eax, (%rdi), %ecx
-; X64-BMI1BMI2-NEXT: shrxl %eax, %ecx, %eax
+; X64-BMI1BMI2-NEXT: bzhil %esi, (%rdi), %eax
; X64-BMI1BMI2-NEXT: retq
%val = load i32, i32* %w
%numhighbits = sub i8 32, %numlowbits
;
; X64-BMI1BMI2-LABEL: bzhi64_d1_indexzext:
; X64-BMI1BMI2: # %bb.0:
-; X64-BMI1BMI2-NEXT: movb $64, %al
-; X64-BMI1BMI2-NEXT: subb %sil, %al
-; X64-BMI1BMI2-NEXT: shlxq %rax, %rdi, %rcx
-; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rax
+; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-BMI1BMI2-NEXT: bzhiq %rsi, %rdi, %rax
; X64-BMI1BMI2-NEXT: retq
%numhighbits = sub i8 64, %numlowbits
%sh_prom = zext i8 %numhighbits to i64
;
; X64-BMI1BMI2-LABEL: bzhi64_d3_load_indexzext:
; X64-BMI1BMI2: # %bb.0:
-; X64-BMI1BMI2-NEXT: movb $64, %al
-; X64-BMI1BMI2-NEXT: subb %sil, %al
-; X64-BMI1BMI2-NEXT: shlxq %rax, (%rdi), %rcx
-; X64-BMI1BMI2-NEXT: shrxq %rax, %rcx, %rax
+; X64-BMI1BMI2-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-BMI1BMI2-NEXT: bzhiq %rsi, (%rdi), %rax
; X64-BMI1BMI2-NEXT: retq
%val = load i64, i64* %w
%numhighbits = sub i8 64, %numlowbits