[x86] commute blendvb with constant condition op to allow load folding

author Sanjay Patel <spatel@rotateright.com>

Fri, 26 Oct 2018 14:58:13 +0000 (14:58 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Fri, 26 Oct 2018 14:58:13 +0000 (14:58 +0000)
author Sanjay Patel <spatel@rotateright.com>
Fri, 26 Oct 2018 14:58:13 +0000 (14:58 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Fri, 26 Oct 2018 14:58:13 +0000 (14:58 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 0426c80..dd37010 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -10068,6 +10068,15 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
      // type.
      MVT BlendVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
  
+    // x86 allows load folding with blendvb from the 2nd source operand. But
+    // we are still using LLVM select here (see comment below), so that's V1.
+    // If V2 can be load-folded and V1 cannot be load-folded, then commute to
+    // allow that load-folding possibility.
+    if (!ISD::isNormalLoad(V1.getNode()) && ISD::isNormalLoad(V2.getNode())) {
+      ShuffleVectorSDNode::commuteMask(Mask);
+      std::swap(V1, V2);
+    }
+
      // Compute the VSELECT mask. Note that VSELECT is really confusing in the
      // mix of LLVM's code generator and the x86 backend. We tell the code
      // generator that boolean values in the elements of an x86 vector register
diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll

index d241005..bf34c03 100644 (file)
--- a/test/CodeGen/X86/vector-shuffle-128-v16.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll
@@ -601,17 +601,15 @@ define <16 x i8> @load_fold_pblendvb(<16 x i8>* %px, <16 x i8> %y) {
  ; SSE41-LABEL: load_fold_pblendvb:
  ; SSE41:       # %bb.0:
  ; SSE41-NEXT:    movdqa %xmm0, %xmm1
-; SSE41-NEXT:    movdqa (%rdi), %xmm2
-; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
-; SSE41-NEXT:    pblendvb %xmm0, %xmm1, %xmm2
-; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
+; SSE41-NEXT:    pblendvb %xmm0, (%rdi), %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
  ; SSE41-NEXT:    retq
  ;
  ; AVX1OR2-LABEL: load_fold_pblendvb:
  ; AVX1OR2:       # %bb.0:
-; AVX1OR2-NEXT:    vmovdqa (%rdi), %xmm1
-; AVX1OR2-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
-; AVX1OR2-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1OR2-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
+; AVX1OR2-NEXT:    vpblendvb %xmm1, (%rdi), %xmm0, %xmm0
  ; AVX1OR2-NEXT:    retq
  ;
  ; AVX512VL-LABEL: load_fold_pblendvb:
diff --git a/test/CodeGen/X86/vector-shuffle-256-v32.ll b/test/CodeGen/X86/vector-shuffle-256-v32.ll

index 8189be0..c4759ab 100644 (file)
--- a/test/CodeGen/X86/vector-shuffle-256-v32.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v32.ll
@@ -1656,9 +1656,8 @@ define <32 x i8> @load_fold_pblendvb(<32 x i8>* %px, <32 x i8> %y) {
  ;
  ; AVX2-LABEL: load_fold_pblendvb:
  ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vmovdqa (%rdi), %ymm1
-; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
-; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
+; AVX2-NEXT:    vpblendvb %ymm1, (%rdi), %ymm0, %ymm0
  ; AVX2-NEXT:    retq
  ;
  ; AVX512VL-LABEL: load_fold_pblendvb:
author	Sanjay Patel <spatel@rotateright.com>
	Fri, 26 Oct 2018 14:58:13 +0000 (14:58 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Fri, 26 Oct 2018 14:58:13 +0000 (14:58 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/vector-shuffle-128-v16.ll		patch \| blob \| history
test/CodeGen/X86/vector-shuffle-256-v32.ll		patch \| blob \| history