From 47a12d8d2c07a6271006b3337206badb29ba553e Mon Sep 17 00:00:00 2001
From: Chandler Carruth <chandlerc@gmail.com>
Date: Sat, 26 Jul 2014 01:52:13 +0000
Subject: [PATCH] [x86] Fix PR20355 (and dups) by not using unsigned
 multiplication when signed multiplication is requested. While there is not a
 difference in the *low* half of the result, the *high* half (used
 specifically to implement the signed division by these constants) certainly
 is used. The test case I've nuked was actively asserting wrong code.

There is a delightful solution to doing signed multiplication even when
we don't have it that Richard Smith has crafted, but I'll add the
machinery back and implement that in a follow-up patch. This at least
restores correctness.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@214007 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86ISelLowering.cpp |  8 +++++---
 test/CodeGen/X86/pmul.ll           |  8 ++++----
 test/CodeGen/X86/vector-idiv.ll    | 16 +++++-----------
 3 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index ce06eb9cdeb..c961d229a41 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -970,7 +970,6 @@ void X86TargetLowering::resetOperationActions() {
     setOperationAction(ISD::MUL,                MVT::v4i32, Custom);
     setOperationAction(ISD::MUL,                MVT::v2i64, Custom);
     setOperationAction(ISD::UMUL_LOHI,          MVT::v4i32, Custom);
-    setOperationAction(ISD::SMUL_LOHI,          MVT::v4i32, Custom);
     setOperationAction(ISD::MULHU,              MVT::v8i16, Legal);
     setOperationAction(ISD::MULHS,              MVT::v8i16, Legal);
     setOperationAction(ISD::SUB,                MVT::v16i8, Legal);
@@ -1113,6 +1112,8 @@ void X86TargetLowering::resetOperationActions() {
     // FIXME: Do we need to handle scalar-to-vector here?
     setOperationAction(ISD::MUL,                MVT::v4i32, Legal);
 
+    setOperationAction(ISD::SMUL_LOHI,          MVT::v4i32, Custom);
+
     setOperationAction(ISD::VSELECT,            MVT::v2f64, Custom);
     setOperationAction(ISD::VSELECT,            MVT::v2i64, Custom);
     setOperationAction(ISD::VSELECT,            MVT::v4i32, Custom);
@@ -15432,8 +15433,9 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
   // ints.
   MVT MulVT = VT == MVT::v4i32 ? MVT::v2i64 : MVT::v4i64;
   bool IsSigned = Op->getOpcode() == ISD::SMUL_LOHI;
-  unsigned Opcode =
-      (!IsSigned || !Subtarget->hasSSE41()) ? X86ISD::PMULUDQ : X86ISD::PMULDQ;
+  assert((!IsSigned || Subtarget->hasSSE41()) &&
+         "We need PMULDQ for signed multiplies!");
+  unsigned Opcode = IsSigned ? X86ISD::PMULDQ : X86ISD::PMULUDQ;
   // PMULUDQ <4 x i32> <a|b|c|d>, <4 x i32> <e|f|g|h>
   // => <2 x i64> <ae|cg>
   SDValue Mul1 = DAG.getNode(ISD::BITCAST, dl, VT,
diff --git a/test/CodeGen/X86/pmul.ll b/test/CodeGen/X86/pmul.ll
index 4944d6b9083..e129d5618aa 100644
--- a/test/CodeGen/X86/pmul.ll
+++ b/test/CodeGen/X86/pmul.ll
@@ -84,10 +84,10 @@ entry:
 }
 
 define <2 x i64> @f(<2 x i64> %i, <2 x i64> %j) nounwind  {
-; CHECK-LABEL: f:
-; CHECK:         pmuludq
-; CHECK:         pmuludq
-; CHECK:         pmuludq
+; ALL-LABEL: f:
+; ALL:         pmuludq
+; ALL:         pmuludq
+; ALL:         pmuludq
 entry:
   ; Use a call to force spills.
   call void @foo()
diff --git a/test/CodeGen/X86/vector-idiv.ll b/test/CodeGen/X86/vector-idiv.ll
index ec1ce3da5e1..0214a47f44c 100644
--- a/test/CodeGen/X86/vector-idiv.ll
+++ b/test/CodeGen/X86/vector-idiv.ll
@@ -131,18 +131,12 @@ define <4 x i32> @test8(<4 x i32> %a) {
 ; SSE41: psrad $2
 ; SSE41: padd
 
+; FIXME: scalarized -- there is no signed multiply in SSE.
 ; SSE-LABEL: test8:
-; SSE: pmuludq
-; SSE: pshufd	$49
-; SSE: pshufd	$49
-; SSE: pmuludq
-; SSE: shufps	$-35
-; SSE: pshufd	$-40
-; SSE: psubd
-; SSE: padd
-; SSE: psrld $31
-; SSE: psrad $2
-; SSE: padd
+; SSE: imulq
+; SSE: imulq
+; SSE: imulq
+; SSE: imulq
 
 ; AVX-LABEL: test8:
 ; AVX: vpmuldq
-- 
2.11.0