SmallVector<int, 16> WidenedMask;
if (VT.getScalarSizeInBits() < 64 && !Is1BitVector &&
canWidenShuffleElements(ZeroableMask, WidenedMask)) {
+ // Shuffle mask widening should not interfere with a broadcast opportunity
+ // by obfuscating the operands with bitcasts.
+ // TODO: Avoid lowering directly from this top-level function: make this
+ // a query (canLowerAsBroadcast) and defer lowering to the type-based calls.
+ if (SDValue Broadcast =
+ lowerVectorShuffleAsBroadcast(DL, VT, V1, V2, Mask, Subtarget, DAG))
+ return Broadcast;
+
MVT NewEltVT = VT.isFloatingPoint()
? MVT::getFloatingPointVT(VT.getScalarSizeInBits() * 2)
: MVT::getIntegerVT(VT.getScalarSizeInBits() * 2);
;
; AVX2-SLOW-LABEL: add_ps_007_2:
; AVX2-SLOW: # %bb.0:
-; AVX2-SLOW-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]
+; AVX2-SLOW-NEXT: vbroadcastss %xmm0, %xmm1
; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
; AVX2-SLOW-NEXT: vaddps %xmm0, %xmm1, %xmm0
; AVX2-SLOW-NEXT: retq
;
; AVX2-SLOW-LABEL: add_ps_018:
; AVX2-SLOW: # %bb.0:
-; AVX2-SLOW-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]
+; AVX2-SLOW-NEXT: vbroadcastss %xmm0, %xmm1
; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
; AVX2-SLOW-NEXT: vaddps %xmm0, %xmm1, %xmm0
; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; X64SSE4-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0]
; X64SSE4-NEXT: retq
;
-; X32AVX1-LABEL: elt6_v8f32:
-; X32AVX1: # %bb.0:
-; X32AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
-; X32AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X32AVX1-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7]
-; X32AVX1-NEXT: retl
+; X32AVX-LABEL: elt6_v8f32:
+; X32AVX: # %bb.0:
+; X32AVX-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
+; X32AVX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7]
+; X32AVX-NEXT: retl
;
; X64AVX1-LABEL: elt6_v8f32:
; X64AVX1: # %bb.0:
; X64AVX1-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7]
; X64AVX1-NEXT: retq
;
-; X32AVX2-LABEL: elt6_v8f32:
-; X32AVX2: # %bb.0:
-; X32AVX2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
-; X32AVX2-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7]
-; X32AVX2-NEXT: retl
-;
; X64AVX2-LABEL: elt6_v8f32:
; X64AVX2: # %bb.0:
-; X64AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
+; X64AVX2-NEXT: vbroadcastss %xmm0, %ymm0
; X64AVX2-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7]
; X64AVX2-NEXT: retq
;
-; X32AVX512F-LABEL: elt6_v8f32:
-; X32AVX512F: # %bb.0:
-; X32AVX512F-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32AVX512F-NEXT: vbroadcastsd %xmm0, %ymm0
-; X32AVX512F-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7]
-; X32AVX512F-NEXT: retl
-;
; X64AVX512F-LABEL: elt6_v8f32:
; X64AVX512F: # %bb.0:
-; X64AVX512F-NEXT: vbroadcastsd %xmm0, %ymm0
+; X64AVX512F-NEXT: vbroadcastss %xmm0, %ymm0
; X64AVX512F-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7]
; X64AVX512F-NEXT: retq
%ins = insertelement <8 x float> <float 42.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0>, float %x, i32 6
define <4 x i32> @undef_splatmask5(<4 x i32> %v, <4 x i32>* %p) nounwind {
; AVX2-LABEL: undef_splatmask5:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastd %xmm0, %xmm1
-; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
-; AVX2-NEXT: vmovdqa %xmm0, (%rdi)
-; AVX2-NEXT: vmovdqa %xmm1, %xmm0
+; AVX2-NEXT: vbroadcastss %xmm0, %xmm0
+; AVX2-NEXT: vmovaps %xmm0, (%rdi)
; AVX2-NEXT: retq
%res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 undef>
%res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 3>
;
; AVX2OR512VL-LABEL: shuffle_v4i32_0142:
; AVX2OR512VL: # %bb.0:
-; AVX2OR512VL-NEXT: vpbroadcastq %xmm1, %xmm1
-; AVX2OR512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,2]
-; AVX2OR512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
+; AVX2OR512VL-NEXT: vbroadcastss %xmm1, %xmm1
+; AVX2OR512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,2,2]
+; AVX2OR512VL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
; AVX2OR512VL-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
ret <4 x i32> %shuffle
;
; AVX2-LABEL: combine_nested_undef_test12:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
+; AVX2-NEXT: vbroadcastss %xmm0, %xmm0
; AVX2-NEXT: retq
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 0, i32 2, i32 4>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 4, i32 0, i32 4>