int M0 = SV0->getMaskElt(i);
int M1 = SV1->getMaskElt(i);
- // Both shuffle indexes are undef. Propagate Undef.
- if (M0 < 0 && M1 < 0) {
+ // Determine if either index is pointing to a zero vector.
+ bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
+ bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
+
+ // If one element is zero and the otherside is undef, keep undef.
+ // This also handles the case that both are undef.
+ if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
Mask[i] = -1;
continue;
}
- // Determine if either index is pointing to a zero vector.
- bool M0Zero = M0 >= 0 && (ZeroN00 == (M0 < NumElts));
- bool M1Zero = M1 >= 0 && (ZeroN10 == (M1 < NumElts));
+ // Make sure only one of the elements is zero.
if (M0Zero == M1Zero) {
CanFold = false;
break;
}
+ assert((M0 >= 0 || M1 >= 0) && "Undef index!");
+
// We have a zero and non-zero element. If the non-zero came from
// SV0 make the index a LHS index. If it came from SV1, make it
// a RHS index. We need to mod by NumElts because we don't care
define <4 x i32> @test2e(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test2e:
; CHECK: # BB#0:
-; CHECK-NEXT: pxor %xmm2, %xmm2
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5,6,7]
-; CHECK-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero
-; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 undef, i32 4, i32 2, i32 3>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 0, i32 1, i32 4, i32 4>
define <4 x i32> @test2f(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test2f:
; CHECK: # BB#0:
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 4, i32 4, i32 2, i32 3>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>, <4 x i32><i32 undef, i32 1, i32 4, i32 4>