Enhance DAGCombine for transforming 128->256 casts into a vmovaps, rather

author Chad Rosier <mcrosier@apple.com>

Tue, 3 Jan 2012 21:05:52 +0000 (21:05 +0000)

committer Chad Rosier <mcrosier@apple.com>

Tue, 3 Jan 2012 21:05:52 +0000 (21:05 +0000)
author Chad Rosier <mcrosier@apple.com>
Tue, 3 Jan 2012 21:05:52 +0000 (21:05 +0000)
committer Chad Rosier <mcrosier@apple.com>
Tue, 3 Jan 2012 21:05:52 +0000 (21:05 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 08c09bd..47b80d0 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -12731,6 +12731,20 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
            !isUndefOrEqual(SVOp->getMaskElt(i+NumElems/2), NumElems))
          return SDValue();
  
+    // If V1 is coming from a vector load then just fold to a VZEXT_LOAD.
+    if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(V1.getOperand(0))) {
+      SDVTList Tys = DAG.getVTList(MVT::v4i64, MVT::Other);
+      SDValue Ops[] = { Ld->getChain(), Ld->getBasePtr() };
+      SDValue ResNode =
+        DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2,
+                                Ld->getMemoryVT(),
+                                Ld->getPointerInfo(),
+                                Ld->getAlignment(),
+                                false/*isVolatile*/, true/*ReadMem*/,
+                                false/*WriteMem*/);
+      return DAG.getNode(ISD::BITCAST, dl, VT, ResNode);
+    } 
+
      // Emit a zeroed vector and insert the desired subvector on its
      // first half.
      SDValue Zeros = getZeroVector(VT, true /* HasXMMInt */, DAG, dl);
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td

index 4becf99..49776c6 100644 (file)
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -4719,6 +4719,11 @@ let Predicates = [HasAVX], AddedComplexity = 20 in {
              (VMOVZQI2PQIrm addr:$src)>;
  }
  
+let Predicates = [HasAVX] in {
+def : Pat<(v4i64 (X86vzload addr:$src)),
+          (SUBREG_TO_REG (i32 0), (VMOVAPSrm addr:$src), sub_xmm)>;
+}
+
  //===---------------------------------------------------------------------===//
  // Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
  // IA32 document. movq xmm1, xmm2 does clear the high bits.
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll

index 8532b40..ad611fc 100644 (file)
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -31,4 +31,27 @@ define <8 x float> @test4(float %a) nounwind {
    ret <8 x float> %b
  ; CHECK: test4:
  ; CHECK: vinsertf128
+}
+
+; rdar://10594409
+define <8 x float> @test5(float* nocapture %f) nounwind uwtable readonly ssp {
+entry:
+  %0 = bitcast float* %f to <4 x float>*
+  %1 = load <4 x float>* %0, align 16
+; CHECK: vmovaps
+; CHECK-NOT: vxorps
+; CHECK-NOT: vinsertf128
+  %shuffle.i = shufflevector <4 x float> %1, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
+  ret <8 x float> %shuffle.i
+}
+
+define <4 x double> @test6(double* nocapture %d) nounwind uwtable readonly ssp {
+entry:
+  %0 = bitcast double* %d to <2 x double>*
+  %1 = load <2 x double>* %0, align 16
+; CHECK: vmovaps
+; CHECK-NOT: vxorps
+; CHECK-NOT: vinsertf128
+  %shuffle.i = shufflevector <2 x double> %1, <2 x double> <double 0.000000e+00, double undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+  ret <4 x double> %shuffle.i
  }
 \ No newline at end of file
author	Chad Rosier <mcrosier@apple.com>
	Tue, 3 Jan 2012 21:05:52 +0000 (21:05 +0000)
committer	Chad Rosier <mcrosier@apple.com>
	Tue, 3 Jan 2012 21:05:52 +0000 (21:05 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86InstrSSE.td		patch \| blob \| history
test/CodeGen/X86/avx-shuffle.ll		patch \| blob \| history