(VBROADCASTI32X4rm addr:$src)>;
def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
(VBROADCASTI32X4rm addr:$src)>;
+
+// Patterns for selects of bitcasted operations.
+def : Pat<(vselect VK16WM:$mask,
+ (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
+ (bc_v16f32 (v16i32 immAllZerosV))),
+ (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
+def : Pat<(vselect VK16WM:$mask,
+ (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
+ VR512:$src0),
+ (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
+def : Pat<(vselect VK16WM:$mask,
+ (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
+ (v16i32 immAllZerosV)),
+ (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
+def : Pat<(vselect VK16WM:$mask,
+ (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
+ VR512:$src0),
+ (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
+
+def : Pat<(vselect VK8WM:$mask,
+ (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
+ (bc_v8f64 (v16i32 immAllZerosV))),
+ (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
+def : Pat<(vselect VK8WM:$mask,
+ (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
+ VR512:$src0),
+ (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
+def : Pat<(vselect VK8WM:$mask,
+ (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))),
+ (bc_v8i64 (v16i32 immAllZerosV))),
+ (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
+def : Pat<(vselect VK8WM:$mask,
+ (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src))))),
+ VR512:$src0),
+ (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
}
let Predicates = [HasVLX] in {
def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
(VBROADCASTI32X4Z256rm addr:$src)>;
+// Patterns for selects of bitcasted operations.
+def : Pat<(vselect VK8WM:$mask,
+ (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
+ (bc_v8f32 (v8i32 immAllZerosV))),
+ (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
+def : Pat<(vselect VK8WM:$mask,
+ (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
+ VR256X:$src0),
+ (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
+def : Pat<(vselect VK8WM:$mask,
+ (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
+ (v8i32 immAllZerosV)),
+ (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
+def : Pat<(vselect VK8WM:$mask,
+ (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
+ VR256X:$src0),
+ (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
+
+
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
v4f64x_info, v2f64x_info>, VEX_W,
EVEX_V256, EVEX_CD8<64, CD8VT2>;
+
+// Patterns for selects of bitcasted operations.
+def : Pat<(vselect VK4WM:$mask,
+ (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
+ (bc_v4f64 (v8i32 immAllZerosV))),
+ (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
+def : Pat<(vselect VK4WM:$mask,
+ (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
+ VR256X:$src0),
+ (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
+def : Pat<(vselect VK4WM:$mask,
+ (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
+ (bc_v4i64 (v8i32 immAllZerosV))),
+ (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
+def : Pat<(vselect VK4WM:$mask,
+ (bc_v4i64 (v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
+ VR256X:$src0),
+ (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
}
let Predicates = [HasDQI] in {
defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
v16f32_info, v8f32x_info>,
EVEX_V512, EVEX_CD8<32, CD8VT8>;
+
+// Patterns for selects of bitcasted operations.
+def : Pat<(vselect VK16WM:$mask,
+ (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
+ (bc_v16f32 (v16i32 immAllZerosV))),
+ (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
+def : Pat<(vselect VK16WM:$mask,
+ (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
+ VR512:$src0),
+ (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
+def : Pat<(vselect VK16WM:$mask,
+ (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
+ (v16i32 immAllZerosV)),
+ (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
+def : Pat<(vselect VK16WM:$mask,
+ (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
+ VR512:$src0),
+ (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
+
+def : Pat<(vselect VK8WM:$mask,
+ (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
+ (bc_v8f64 (v16i32 immAllZerosV))),
+ (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
+def : Pat<(vselect VK8WM:$mask,
+ (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
+ VR512:$src0),
+ (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
+def : Pat<(vselect VK8WM:$mask,
+ (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
+ (bc_v8i64 (v16i32 immAllZerosV))),
+ (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
+def : Pat<(vselect VK8WM:$mask,
+ (bc_v8i64 (v16i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src))))),
+ VR512:$src0),
+ (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
}
multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
ret <2 x double> %res
}
-define <8 x float> @test_broadcast_2f64_8f32(<2 x double> *%p, i8 %mask) nounwind {
-; CHECK-LABEL: test_broadcast_2f64_8f32:
+define <8 x float> @test_broadcast_2f64_8f32_mask(<2 x double> *%p, i8 %mask, <8 x float> %passthru) nounwind {
+; CHECK-LABEL: test_broadcast_2f64_8f32_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
+; CHECK-NEXT: retq
+ %1 = load <2 x double>, <2 x double> *%p
+ %2 = shufflevector <2 x double> %1, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ %3 = bitcast <4 x double> %2 to <8 x float>
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res = select <8 x i1> %mask.cast, <8 x float> %3, <8 x float> %passthru
+ ret <8 x float> %res
+}
+
+define <8 x float> @test_broadcast_2f64_8f32_maskz(<2 x double> *%p, i8 %mask) nounwind {
+; CHECK-LABEL: test_broadcast_2f64_8f32_maskz:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
ret <8 x float> %res
}
-define <8 x i32> @test_broadcast_2i64_8i32(<2 x i64> *%p, i8 %mask) nounwind {
-; CHECK-LABEL: test_broadcast_2i64_8i32:
+define <8 x i32> @test_broadcast_2i64_8i32_mask(<2 x i64> *%p, i8 %mask, <8 x i32> %passthru) nounwind {
+; CHECK-LABEL: test_broadcast_2i64_8i32_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
+; CHECK-NEXT: retq
+ %1 = load <2 x i64>, <2 x i64> *%p
+ %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ %3 = bitcast <4 x i64> %2 to <8 x i32>
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res = select <8 x i1> %mask.cast, <8 x i32> %3, <8 x i32> %passthru
+ ret <8 x i32> %res
+}
+
+define <8 x i32> @test_broadcast_2i64_8i32_maskz(<2 x i64> *%p, i8 %mask) nounwind {
+; CHECK-LABEL: test_broadcast_2i64_8i32_maskz:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
ret <8 x i32> %res
}
-define <16 x float> @test_broadcast_2f64_16f32(<2 x double> *%p, i16 %mask) nounwind {
-; CHECK-LABEL: test_broadcast_2f64_16f32:
+define <16 x float> @test_broadcast_2f64_16f32_mask(<2 x double> *%p, i16 %mask, <16 x float> %passthru) nounwind {
+; CHECK-LABEL: test_broadcast_2f64_16f32_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; CHECK-NEXT: retq
+ %1 = load <2 x double>, <2 x double> *%p
+ %2 = shufflevector <2 x double> %1, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ %3 = bitcast <8 x double> %2 to <16 x float>
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res = select <16 x i1> %mask.cast, <16 x float> %3, <16 x float> %passthru
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_broadcast_2f64_16f32_maskz(<2 x double> *%p, i16 %mask) nounwind {
+; CHECK-LABEL: test_broadcast_2f64_16f32_maskz:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
ret <16 x float> %res
}
-define <16 x i32> @test_broadcast_2i64_16i32(<2 x i64> *%p, i16 %mask) nounwind {
-; CHECK-LABEL: test_broadcast_2i64_16i32:
+define <16 x i32> @test_broadcast_2i64_16i32_mask(<2 x i64> *%p, i16 %mask, <16 x i32> %passthru) nounwind {
+; CHECK-LABEL: test_broadcast_2i64_16i32_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; CHECK-NEXT: retq
+ %1 = load <2 x i64>, <2 x i64> *%p
+ %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ %3 = bitcast <8 x i64> %2 to <16 x i32>
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res = select <16 x i1> %mask.cast, <16 x i32> %3, <16 x i32> %passthru
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_broadcast_2i64_16i32_maskz(<2 x i64> *%p, i16 %mask) nounwind {
+; CHECK-LABEL: test_broadcast_2i64_16i32_maskz:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
ret <16 x i32> %res
}
-define <16 x float> @test_broadcast_4f64_16f32(<4 x double> *%p, i16 %mask) nounwind {
-; CHECK-LABEL: test_broadcast_4f64_16f32:
+define <16 x float> @test_broadcast_4f64_16f32_mask(<4 x double> *%p, i16 %mask, <16 x float> %passthru) nounwind {
+; CHECK-LABEL: test_broadcast_4f64_16f32_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
+; CHECK-NEXT: retq
+ %1 = load <4 x double>, <4 x double> *%p
+ %2 = shufflevector <4 x double> %1, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %3 = bitcast <8 x double> %2 to <16 x float>
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res = select <16 x i1> %mask.cast, <16 x float> %3, <16 x float> %passthru
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_broadcast_4f64_16f32_maskz(<4 x double> *%p, i16 %mask) nounwind {
+; CHECK-LABEL: test_broadcast_4f64_16f32_maskz:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
ret <16 x float> %res
}
-define <16 x i32> @test_broadcast_4i64_16i32(<4 x i64> *%p, i16 %mask) nounwind {
-; CHECK-LABEL: test_broadcast_4i64_16i32:
+define <16 x i32> @test_broadcast_4i64_16i32_mask(<4 x i64> *%p, i16 %mask, <16 x i32> %passthru) nounwind {
+; CHECK-LABEL: test_broadcast_4i64_16i32_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
+; CHECK-NEXT: retq
+ %1 = load <4 x i64>, <4 x i64> *%p
+ %2 = shufflevector <4 x i64> %1, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %3 = bitcast <8 x i64> %2 to <16 x i32>
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res = select <16 x i1> %mask.cast, <16 x i32> %3, <16 x i32> %passthru
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_broadcast_4i64_16i32_maskz(<4 x i64> *%p, i16 %mask) nounwind {
+; CHECK-LABEL: test_broadcast_4i64_16i32_maskz:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
ret <16 x i32> %res
}
-define <4 x double> @test_broadcast_4f32_4f64(<4 x float> *%p, i8 %mask) nounwind {
-; CHECK-LABEL: test_broadcast_4f32_4f64:
+define <4 x double> @test_broadcast_4f32_4f64_mask(<4 x float> *%p, i8 %mask, <4 x double> %passthru) nounwind {
+; CHECK-LABEL: test_broadcast_4f32_4f64_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
+; CHECK-NEXT: retq
+ %1 = load <4 x float>, <4 x float> *%p
+ %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %3 = bitcast <8 x float> %2 to <4 x double>
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %res = select <4 x i1> %mask.extract, <4 x double> %3, <4 x double> %passthru
+ ret <4 x double> %res
+}
+
+define <4 x double> @test_broadcast_4f32_4f64_maskz(<4 x float> *%p, i8 %mask) nounwind {
+; CHECK-LABEL: test_broadcast_4f32_4f64_maskz:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
ret <4 x double> %res
}
-define <4 x i64> @test_broadcast_4i32_4i64(<4 x i32> *%p, i8 %mask) nounwind {
-; CHECK-LABEL: test_broadcast_4i32_4i64:
+define <4 x i64> @test_broadcast_4i32_4i64_mask(<4 x i32> *%p, i8 %mask, <4 x i64> %passthru) nounwind {
+; CHECK-LABEL: test_broadcast_4i32_4i64_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
+; CHECK-NEXT: retq
+ %1 = load <4 x i32>, <4 x i32> *%p
+ %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %3 = bitcast <8 x i32> %2 to <4 x i64>
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %res = select <4 x i1> %mask.extract, <4 x i64> %3, <4 x i64> %passthru
+ ret <4 x i64> %res
+}
+
+define <4 x i64> @test_broadcast_4i32_4i64_maskz(<4 x i32> *%p, i8 %mask) nounwind {
+; CHECK-LABEL: test_broadcast_4i32_4i64_maskz:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
ret <4 x i64> %res
}
-define <8 x double> @test_broadcast_4f32_8f64(<4 x float> *%p, i8 %mask) nounwind {
-; CHECK-LABEL: test_broadcast_4f32_8f64:
+define <8 x double> @test_broadcast_4f32_8f64_mask(<4 x float> *%p, i8 %mask, <8 x double> %passthru) nounwind {
+; CHECK-LABEL: test_broadcast_4f32_8f64_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
+; CHECK-NEXT: retq
+ %1 = load <4 x float>, <4 x float> *%p
+ %2 = shufflevector <4 x float> %1, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %3 = bitcast <16 x float> %2 to <8 x double>
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res = select <8 x i1> %mask.cast, <8 x double> %3, <8 x double> %passthru
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_broadcast_4f32_8f64_maskz(<4 x float> *%p, i8 %mask) nounwind {
+; CHECK-LABEL: test_broadcast_4f32_8f64_maskz:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
ret <8 x double> %res
}
-define <8 x i64> @test_broadcast_4i32_8i64(<4 x i32> *%p, i8 %mask) nounwind {
-; CHECK-LABEL: test_broadcast_4i32_8i64:
+define <8 x i64> @test_broadcast_4i32_8i64_mask(<4 x i32> *%p, i8 %mask, <8 x i64> %passthru) nounwind {
+; CHECK-LABEL: test_broadcast_4i32_8i64_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
+; CHECK-NEXT: retq
+ %1 = load <4 x i32>, <4 x i32> *%p
+ %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %3 = bitcast <16 x i32> %2 to <8 x i64>
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res = select <8 x i1> %mask.cast, <8 x i64> %3, <8 x i64> %passthru
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_broadcast_4i32_8i64_maskz(<4 x i32> *%p, i8 %mask) nounwind {
+; CHECK-LABEL: test_broadcast_4i32_8i64_maskz:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
ret <8 x i64> %res
}
-define <8 x double> @test_broadcast_8f32_8f64(<8 x float> *%p, i8 %mask) nounwind {
-; CHECK-LABEL: test_broadcast_8f32_8f64:
+define <8 x double> @test_broadcast_8f32_8f64_mask(<8 x float> *%p, i8 %mask, <8 x double> %passthru) nounwind {
+; CHECK-LABEL: test_broadcast_8f32_8f64_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
+; CHECK-NEXT: retq
+ %1 = load <8 x float>, <8 x float> *%p
+ %2 = shufflevector <8 x float> %1, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %3 = bitcast <16 x float> %2 to <8 x double>
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res = select <8 x i1> %mask.cast, <8 x double> %3, <8 x double> %passthru
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_broadcast_8f32_8f64_maskz(<8 x float> *%p, i8 %mask) nounwind {
+; CHECK-LABEL: test_broadcast_8f32_8f64_maskz:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
ret <8 x double> %res
}
-define <8 x i64> @test_broadcast_8i32_8i64(<8 x i32> *%p, i8 %mask) nounwind {
-; CHECK-LABEL: test_broadcast_8i32_8i64:
+define <8 x i64> @test_broadcast_8i32_8i64_mask(<8 x i32> *%p, i8 %mask, <8 x i64> %passthru) nounwind {
+; CHECK-LABEL: test_broadcast_8i32_8i64_mask:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovd %esi, %k1
+; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
+; CHECK-NEXT: retq
+ %1 = load <8 x i32>, <8 x i32> *%p
+ %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %3 = bitcast <16 x i32> %2 to <8 x i64>
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res = select <8 x i1> %mask.cast, <8 x i64> %3, <8 x i64> %passthru
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_broadcast_8i32_8i64_maskz(<8 x i32> *%p, i8 %mask) nounwind {
+; CHECK-LABEL: test_broadcast_8i32_8i64_maskz:
; CHECK: # %bb.0:
; CHECK-NEXT: kmovd %esi, %k1
; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]