/// It makes no assumptions about whether this is the *best* lowering, it simply
/// uses it.
static SDValue lowerShuffleWithSHUFPS(const SDLoc &DL, MVT VT,
- ArrayRef<int> Mask, SDValue V1,
+ ArrayRef<int> OriginalMask, SDValue V1,
SDValue V2, SelectionDAG &DAG) {
SDValue LowV = V1, HighV = V2;
- int NewMask[4] = {Mask[0], Mask[1], Mask[2], Mask[3]};
+ SmallVector<int, 4> Mask(OriginalMask.begin(), OriginalMask.end());
+ SmallVector<int, 4> NewMask = Mask;
int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; });
NewMask[V2Index] = 0; // We shifted the V2 element into V2[0].
}
} else if (NumV2Elements == 2) {
+ // If we are likely to fold V1 but not V2, then commute the shuffle.
+ if (MayFoldLoad(V1) && !MayFoldLoad(V2)) {
+ ShuffleVectorSDNode::commuteMask(Mask);
+ NewMask = Mask;
+ std::swap(V1, V2);
+ std::swap(LowV, HighV);
+ }
+
if (Mask[0] < 4 && Mask[1] < 4) {
// Handle the easy case where we have V1 in the low lanes and V2 in the
// high lanes.
define <4 x float> @shuffle_mem_v4f32_0624(<4 x float> %a0, <4 x float>* %a1) {
; SSE-LABEL: shuffle_mem_v4f32_0624:
; SSE: # %bb.0:
-; SSE-NEXT: movaps (%rdi), %xmm1
-; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,0]
-; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],mem[0,2]
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0,3,1]
; SSE-NEXT: retq
;
; AVX1OR2-LABEL: shuffle_mem_v4f32_0624:
; AVX1OR2: # %bb.0:
-; AVX1OR2-NEXT: vmovaps (%rdi), %xmm1
-; AVX1OR2-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[2,0]
-; AVX1OR2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX1OR2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,0],mem[0,2]
+; AVX1OR2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,1]
; AVX1OR2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_mem_v4f32_0624: