[X86][SSE] Added support for lowering to ADDSUBPS/ADDSUBPD with commuted inputs

author Simon Pilgrim <llvm-dev@redking.me.uk>

Sun, 29 Nov 2015 16:41:04 +0000 (16:41 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Sun, 29 Nov 2015 16:41:04 +0000 (16:41 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Sun, 29 Nov 2015 16:41:04 +0000 (16:41 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Sun, 29 Nov 2015 16:41:04 +0000 (16:41 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 29742df..3904d27 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -23141,14 +23141,19 @@ static SDValue combineShuffleToAddSub(SDNode *N, SelectionDAG &DAG) {
      return SDValue();
  
    auto *SVN = cast<ShuffleVectorSDNode>(N);
-  ArrayRef<int> Mask = SVN->getMask();
+  SmallVector<int, 8> Mask;
+  for (int M : SVN->getMask())
+    Mask.push_back(M);
+
    SDValue V1 = N->getOperand(0);
    SDValue V2 = N->getOperand(1);
  
-  // We require the first shuffle operand to be the SUB node, and the second to
-  // be the ADD node.
-  // FIXME: We should support the commuted patterns.
-  if (V1->getOpcode() != ISD::FSUB || V2->getOpcode() != ISD::FADD)
+  // We require the first shuffle operand to be the FSUB node, and the second to
+  // be the FADD node.
+  if (V1.getOpcode() == ISD::FADD && V2.getOpcode() == ISD::FSUB) {
+    ShuffleVectorSDNode::commuteMask(Mask);
+    std::swap(V1, V2);
+  } else if (V1.getOpcode() != ISD::FSUB || V2.getOpcode() != ISD::FADD)
      return SDValue();
  
    // If there are other uses of these operations we can't fold them.
diff --git a/test/CodeGen/X86/sse3-avx-addsub.ll b/test/CodeGen/X86/sse3-avx-addsub.ll

index a3324ad..8665edf 100644 (file)
--- a/test/CodeGen/X86/sse3-avx-addsub.ll
+++ b/test/CodeGen/X86/sse3-avx-addsub.ll
@@ -170,3 +170,73 @@ define <2 x double> @test4b(<2 x double> %A, <2 x double>* %B) {
    %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3>
    ret <2 x double> %vecinit2
  }
+
+define <4 x float> @test1c(<4 x float> %A, <4 x float>* %B) {
+; SSE-LABEL: test1c:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsubps (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test1c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = load <4 x float>, <4 x float>* %B
+  %add = fadd <4 x float> %A, %1
+  %sub = fsub <4 x float> %A, %1
+  %vecinit6 = shufflevector <4 x float> %add, <4 x float> %sub, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  ret <4 x float> %vecinit6
+}
+
+define <8 x float> @test2c(<8 x float> %A, <8 x float>* %B) {
+; SSE-LABEL: test2c:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsubps (%rdi), %xmm0
+; SSE-NEXT:    addsubps 16(%rdi), %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test2c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsubps (%rdi), %ymm0, %ymm0
+; AVX-NEXT:    retq
+  %1 = load <8 x float>, <8 x float>* %B
+  %add = fadd <8 x float> %A, %1
+  %sub = fsub <8 x float> %A, %1
+  %vecinit14 = shufflevector <8 x float> %add, <8 x float> %sub, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
+  ret <8 x float> %vecinit14
+}
+
+define <4 x double> @test3c(<4 x double> %A, <4 x double>* %B) {
+; SSE-LABEL: test3c:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsubpd (%rdi), %xmm0
+; SSE-NEXT:    addsubpd 16(%rdi), %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test3c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsubpd (%rdi), %ymm0, %ymm0
+; AVX-NEXT:    retq
+  %1 = load <4 x double>, <4 x double>* %B
+  %add = fadd <4 x double> %A, %1
+  %sub = fsub <4 x double> %A, %1
+  %vecinit6 = shufflevector <4 x double> %add, <4 x double> %sub, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  ret <4 x double> %vecinit6
+}
+
+define <2 x double> @test4c(<2 x double> %A, <2 x double>* %B) {
+; SSE-LABEL: test4c:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsubpd (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test4c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = load <2 x double>, <2 x double>* %B
+  %sub = fsub <2 x double> %A, %1
+  %add = fadd <2 x double> %A, %1
+  %vecinit2 = shufflevector <2 x double> %add, <2 x double> %sub, <2 x i32> <i32 2, i32 1>
+  ret <2 x double> %vecinit2
+}
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Sun, 29 Nov 2015 16:41:04 +0000 (16:41 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Sun, 29 Nov 2015 16:41:04 +0000 (16:41 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/sse3-avx-addsub.ll		patch \| blob \| history