[SystemZ] Wait with VGBM selection until after DAGCombine2.

author Jonas Paulsson <paulsson@linux.vnet.ibm.com>

Wed, 6 Feb 2019 18:59:19 +0000 (18:59 +0000)

committer Jonas Paulsson <paulsson@linux.vnet.ibm.com>

Wed, 6 Feb 2019 18:59:19 +0000 (18:59 +0000)
author Jonas Paulsson <paulsson@linux.vnet.ibm.com>
Wed, 6 Feb 2019 18:59:19 +0000 (18:59 +0000)
committer Jonas Paulsson <paulsson@linux.vnet.ibm.com>
Wed, 6 Feb 2019 18:59:19 +0000 (18:59 +0000)
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp

index ab29eb1..01f3925 100644 (file)
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -11,6 +11,7 @@
  //===----------------------------------------------------------------------===//
  
  #include "SystemZTargetMachine.h"
+#include "SystemZISelLowering.h"
  #include "llvm/Analysis/AliasAnalysis.h"
  #include "llvm/CodeGen/SelectionDAGISel.h"
  #include "llvm/Support/Debug.h"
@@ -1526,6 +1527,20 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
      break;
    }
  
+  case ISD::BUILD_VECTOR: {
+    auto *BVN = cast<BuildVectorSDNode>(Node);
+    SDLoc DL(Node);
+    EVT VT = Node->getValueType(0);
+    uint64_t Mask = 0;
+    if (SystemZTargetLowering::tryBuildVectorByteMask(BVN, Mask)) {
+      SDNode *Res = CurDAG->getMachineNode(SystemZ::VGBM, DL, VT,
+                                CurDAG->getTargetConstant(Mask, DL, MVT::i32));
+      ReplaceNode(Node, Res);
+      return;
+    }
+    break;
+  }
+
    case ISD::STORE: {
      if (tryFoldLoadStoreIntoMemOperand(Node))
        return;
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp

index 5aeb5ed..4e9ee7f 100644 (file)
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -2510,9 +2510,8 @@ SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
      break;
    }
    if (Invert) {
-    SDValue Mask = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
-                               DAG.getConstant(65535, DL, MVT::i32));
-    Mask = DAG.getNode(ISD::BITCAST, DL, VT, Mask);
+    SDValue Mask =
+      DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
      Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
    }
    return Cmp;
@@ -3330,14 +3329,14 @@ SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
        break;
      }
      case 32: {
-      SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
-                                DAG.getConstant(0, DL, MVT::i32));
+      SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
+                                            DAG.getConstant(0, DL, MVT::i32));
        Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
        break;
      }
      case 64: {
-      SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
-                                DAG.getConstant(0, DL, MVT::i32));
+      SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
+                                            DAG.getConstant(0, DL, MVT::i32));
        Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
        Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
        break;
@@ -4259,10 +4258,10 @@ static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
    return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
  }
  
-// Try to represent constant BUILD_VECTOR node BVN using a
-// SystemZISD::BYTE_MASK-style mask.  Store the mask value in Mask
-// on success.
-static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) {
+// Try to represent constant BUILD_VECTOR node BVN using a BYTE MASK style
+// mask.  Store the mask value in Mask on success.
+bool SystemZTargetLowering::
+tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) {
    EVT ElemVT = BVN->getValueType(0).getVectorElementType();
    unsigned BytesPerElement = ElemVT.getStoreSize();
    for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) {
@@ -4541,13 +4540,11 @@ SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
      // Try using VECTOR GENERATE BYTE MASK.  This is the architecturally-
      // preferred way of creating all-zero and all-one vectors so give it
      // priority over other methods below.
-    uint64_t Mask = 0;
-    if (tryBuildVectorByteMask(BVN, Mask)) {
-      SDValue Op = DAG.getNode(
-          SystemZISD::BYTE_MASK, DL, MVT::v16i8,
-          DAG.getConstant(Mask, DL, MVT::i32, false, true /*isOpaque*/));
-      return DAG.getNode(ISD::BITCAST, DL, VT, Op);
-    }
+    uint64_t Mask;
+    if (ISD::isBuildVectorAllZeros(Op.getNode()) ||
+        ISD::isBuildVectorAllOnes(Op.getNode()) ||
+        (VT.isInteger() && tryBuildVectorByteMask(BVN, Mask)))
+      return Op;
  
      // Try using some form of replication.
      APInt SplatBits, SplatUndef;
@@ -5027,7 +5024,6 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
      OPCODE(TBEGIN);
      OPCODE(TBEGIN_NOFLOAT);
      OPCODE(TEND);
-    OPCODE(BYTE_MASK);
      OPCODE(ROTATE_MASK);
      OPCODE(REPLICATE);
      OPCODE(JOIN_DWORDS);
@@ -5339,8 +5335,7 @@ SDValue SystemZTargetLowering::combineMERGE(
    SDValue Op1 = N->getOperand(1);
    if (Op0.getOpcode() == ISD::BITCAST)
      Op0 = Op0.getOperand(0);
-  if (Op0.getOpcode() == SystemZISD::BYTE_MASK &&
-      cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0) {
+  if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
      // (z_merge_* 0, 0) -> 0.  This is mostly useful for using VLLEZF
      // for v4f32.
      if (Op1 == N->getOperand(0))
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h

index cd0e4c3..a40eb4c 100644 (file)
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -161,10 +161,6 @@ enum NodeType : unsigned {
    // Transaction end.  Just the chain operand.  Returns CC value and chain.
    TEND,
  
-  // Create a vector constant by filling byte N of the result with bit
-  // 15-N of the single operand.
-  BYTE_MASK,
-
    // Create a vector constant by replicating an element-sized RISBG-style mask.
    // The first operand specifies the starting set bit and the second operand
    // specifies the ending set bit.  Both operands count from the MSB of the
@@ -515,6 +511,8 @@ public:
      return true;
    }
  
+  static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask);
+
  private:
    const SystemZSubtarget &Subtarget;
  
diff --git a/lib/Target/SystemZ/SystemZInstrVector.td b/lib/Target/SystemZ/SystemZInstrVector.td

index 82cca0b..dd2a0d5 100644 (file)
--- a/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/lib/Target/SystemZ/SystemZInstrVector.td
@@ -60,7 +60,7 @@ let Predicates = [FeatureVector] in {
      // Generate byte mask.
      def VZERO : InherentVRIa<"vzero", 0xE744, 0>;
      def VONE  : InherentVRIa<"vone", 0xE744, 0xffff>;
-    def VGBM  : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>;
+    def VGBM  : UnaryVRIa<"vgbm", 0xE744, null_frag, v128b, imm32zx16>;
  
      // Generate mask.
      def VGM  : BinaryVRIbGeneric<"vgm", 0xE746>;
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td

index 4b938ce..9914db8 100644 (file)
--- a/lib/Target/SystemZ/SystemZOperators.td
+++ b/lib/Target/SystemZ/SystemZOperators.td
@@ -286,7 +286,6 @@ def z_vector_insert     : SDNode<"ISD::INSERT_VECTOR_ELT",
                                   SDT_ZInsertVectorElt>;
  def z_vector_extract    : SDNode<"ISD::EXTRACT_VECTOR_ELT",
                                   SDT_ZExtractVectorElt>;
-def z_byte_mask         : SDNode<"SystemZISD::BYTE_MASK", SDT_ZReplicate>;
  def z_rotate_mask       : SDNode<"SystemZISD::ROTATE_MASK", SDT_ZRotateMask>;
  def z_replicate         : SDNode<"SystemZISD::REPLICATE", SDT_ZReplicate>;
  def z_join_dwords       : SDNode<"SystemZISD::JOIN_DWORDS", SDT_ZJoinDwords>;
@@ -708,10 +707,6 @@ class shiftop<SDPatternOperator operator>
               [(operator node:$val, node:$count),
                (operator node:$val, (and node:$count, imm32bottom6set))]>;
  
-// Vector representation of all-zeros and all-ones.
-def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>;
-def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>;
-
  // Load a scalar and replicate it in all elements of a vector.
  class z_replicate_load<ValueType scalartype, SDPatternOperator load>
    : PatFrag<(ops node:$addr),
@@ -739,13 +734,13 @@ def z_vlef64 : z_vle<f64, load>;
  // zeroed vector.
  class z_vllez<ValueType scalartype, SDPatternOperator load, int index>
    : PatFrag<(ops node:$addr),
-            (z_vector_insert (z_vzero),
+            (z_vector_insert (immAllZerosV),
                               (scalartype (load node:$addr)), (i32 index))>;
  def z_vllezi8  : z_vllez<i32, anyextloadi8, 7>;
  def z_vllezi16 : z_vllez<i32, anyextloadi16, 3>;
  def z_vllezi32 : z_vllez<i32, load, 1>;
  def z_vllezi64 : PatFrags<(ops node:$addr),
-                          [(z_vector_insert (z_vzero),
+                          [(z_vector_insert (immAllZerosV),
                                              (i64 (load node:$addr)), (i32 0)),
                             (z_join_dwords (i64 (load node:$addr)), (i64 0))]>;
  // We use high merges to form a v4f32 from four f32s.  Propagating zero
@@ -758,11 +753,12 @@ def z_vllezf32 : PatFrag<(ops node:$addr),
                               (bitconvert
                                (v4f32 (scalar_to_vector
                                        (f32 (load node:$addr)))))))),
-                          (v2i64 (z_vzero)))>;
+                          (v2i64
+                           (bitconvert (v4f32 (immAllZerosV)))))>;
  def z_vllezf64 : PatFrag<(ops node:$addr),
                           (z_merge_high
                            (v2f64 (scalar_to_vector (f64 (load node:$addr)))),
-                          (z_vzero))>;
+                          (immAllZerosV))>;
  
  // Similarly for the high element of a zeroed vector.
  def z_vllezli32 : z_vllez<i32, load, 0>;
@@ -773,8 +769,9 @@ def z_vllezlf32 : PatFrag<(ops node:$addr),
                               (z_merge_high
                                (v4f32 (scalar_to_vector
                                        (f32 (load node:$addr)))),
-                              (v4f32 (z_vzero))))),
-                           (v2i64 (z_vzero)))>;
+                              (v4f32 (immAllZerosV))))),
+                           (v2i64
+                            (bitconvert (v4f32 (immAllZerosV)))))>;
  
  // Store one element of a vector.
  class z_vste<ValueType scalartype, SDPatternOperator store>
@@ -789,16 +786,16 @@ def z_vstef32 : z_vste<f32, store>;
  def z_vstef64 : z_vste<f64, store>;
  
  // Arithmetic negation on vectors.
-def z_vneg : PatFrag<(ops node:$x), (sub (z_vzero), node:$x)>;
+def z_vneg : PatFrag<(ops node:$x), (sub (immAllZerosV), node:$x)>;
  
  // Bitwise negation on vectors.
-def z_vnot : PatFrag<(ops node:$x), (xor node:$x, (z_vones))>;
+def z_vnot : PatFrag<(ops node:$x), (xor node:$x, (immAllOnesV))>;
  
  // Signed "integer greater than zero" on vectors.
-def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, (z_vzero))>;
+def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, (immAllZerosV))>;
  
  // Signed "integer less than zero" on vectors.
-def z_vicmpl_zero : PatFrag<(ops node:$x), (z_vicmph (z_vzero), node:$x)>;
+def z_vicmpl_zero : PatFrag<(ops node:$x), (z_vicmph (immAllZerosV), node:$x)>;
  
  // Integer absolute on vectors.
  class z_viabs<int shift>
diff --git a/test/CodeGen/SystemZ/buildvector-00.ll b/test/CodeGen/SystemZ/buildvector-00.ll

new file mode 100644 (file)

index 0000000..dc1c0ff
--- /dev/null
+++ b/test/CodeGen/SystemZ/buildvector-00.ll
@@ -0,0 +1,36 @@
+; Test that the dag combiner can understand that some vector operands are
+; all-zeros and then optimize the logical operations.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+define void @f1() {
+; CHECK-LABEL: f1:
+; CHECK: vno
+; CHECK-NOT: vno
+
+bb:
+  %tmp = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> zeroinitializer
+  br label %bb1
+
+bb1:                                              ; preds = %bb
+  %tmp2 = load i64, i64* undef, align 8
+  %tmp3 = insertelement <2 x i64> undef, i64 %tmp2, i32 1
+  %tmp4 = icmp ne <2 x i64> undef, zeroinitializer
+  %tmp5 = xor <2 x i1> %tmp4, zeroinitializer
+  %tmp6 = xor <2 x i1> zeroinitializer, %tmp5
+  %tmp7 = and <2 x i64> %tmp3, %tmp
+  %tmp8 = icmp ne <2 x i64> %tmp7, zeroinitializer
+  %tmp9 = xor <2 x i1> zeroinitializer, %tmp8
+  %tmp10 = icmp ne <2 x i64> undef, zeroinitializer
+  %tmp11 = xor <2 x i1> %tmp10, %tmp9
+  %tmp12 = and <2 x i1> %tmp6, %tmp11
+  %tmp13 = extractelement <2 x i1> %tmp12, i32 0
+  br i1 %tmp13, label %bb14, label %bb15
+
+bb14:                                             ; preds = %bb1
+  store i64 undef, i64* undef, align 8
+  br label %bb15
+
+bb15:                                             ; preds = %bb14, %bb1
+  unreachable
+}
diff --git a/test/CodeGen/SystemZ/vec-const-05.ll b/test/CodeGen/SystemZ/vec-const-05.ll

index 55f3cdd..719280e 100644 (file)
--- a/test/CodeGen/SystemZ/vec-const-05.ll
+++ b/test/CodeGen/SystemZ/vec-const-05.ll
@@ -1,63 +1,28 @@
-; Test vector byte masks, v4f32 version.
+; Test vector byte masks, v4f32 version. Only all-zero vectors are handled.
  ;
  ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
  
  ; Test an all-zeros vector.
-define <4 x float> @f1() {
-; CHECK-LABEL: f1:
+define <4 x float> @f0() {
+; CHECK-LABEL: f0:
  ; CHECK: vgbm %v24, 0
  ; CHECK: br %r14
    ret <4 x float> zeroinitializer
  }
  
-; Test an all-ones vector.
-define <4 x float> @f2() {
-; CHECK-LABEL: f2:
-; CHECK: vgbm %v24, 65535
-; CHECK: br %r14
-  ret <4 x float> <float 0xffffffffe0000000, float 0xffffffffe0000000,
-                   float 0xffffffffe0000000, float 0xffffffffe0000000>
-}
-
-; Test a mixed vector (mask 0xc731).
-define <4 x float> @f3() {
-; CHECK-LABEL: f3:
-; CHECK: vgbm %v24, 50993
-; CHECK: br %r14
-  ret <4 x float> <float 0xffffe00000000000, float 0x381fffffe0000000,
-                   float 0x379fffe000000000, float 0x371fe00000000000>
-}
-
-; Test that undefs are treated as zero (mask 0xc031).
-define <4 x float> @f4() {
-; CHECK-LABEL: f4:
-; CHECK: vgbm %v24, 49201
-; CHECK: br %r14
-  ret <4 x float> <float 0xffffe00000000000, float undef,
-                   float 0x379fffe000000000, float 0x371fe00000000000>
-}
-
-; Test that we don't use VGBM if one of the bytes is not 0 or 0xff.
-define <4 x float> @f5() {
-; CHECK-LABEL: f5:
-; CHECK-NOT: vgbm
+; Test that undefs are treated as zero.
+define <4 x float> @f1() {
+; CHECK-LABEL: f1:
+; CHECK: vgbm %v24, 0
  ; CHECK: br %r14
-  ret <4 x float> <float 0xffffe00000000000, float 0x381fffffc0000000,
-                   float 0x379fffe000000000, float 0x371fe00000000000>
+  ret <4 x float> <float zeroinitializer, float undef,
+                   float zeroinitializer, float undef>
  }
  
  ; Test an all-zeros v2f32 that gets promoted to v4f32.
-define <2 x float> @f6() {
-; CHECK-LABEL: f6:
+define <2 x float> @f2() {
+; CHECK-LABEL: f2:
  ; CHECK: vgbm %v24, 0
  ; CHECK: br %r14
    ret <2 x float> zeroinitializer
  }
-
-; Test a mixed v2f32 that gets promoted to v4f32 (mask 0xc700).
-define <2 x float> @f7() {
-; CHECK-LABEL: f7:
-; CHECK: vgbm %v24, 50944
-; CHECK: br %r14
-  ret <2 x float> <float 0xffffe00000000000, float 0x381fffffe0000000>
-}
diff --git a/test/CodeGen/SystemZ/vec-const-06.ll b/test/CodeGen/SystemZ/vec-const-06.ll

index be53a05..6144e0f 100644 (file)
--- a/test/CodeGen/SystemZ/vec-const-06.ll
+++ b/test/CodeGen/SystemZ/vec-const-06.ll
@@ -1,43 +1,19 @@
-; Test vector byte masks, v2f64 version.
+; Test vector byte masks, v2f64 version. Only all-zero vectors are handled.
  ;
  ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
  
  ; Test an all-zeros vector.
-define <2 x double> @f1() {
-; CHECK-LABEL: f1:
+define <2 x double> @f0() {
+; CHECK-LABEL: f0:
  ; CHECK: vgbm %v24, 0
  ; CHECK: br %r14
    ret <2 x double> zeroinitializer
  }
  
-; Test an all-ones vector.
-define <2 x double> @f2() {
-; CHECK-LABEL: f2:
-; CHECK: vgbm %v24, 65535
-; CHECK: br %r14
-  ret <2 x double> <double 0xffffffffffffffff, double 0xffffffffffffffff>
-}
-
-; Test a mixed vector (mask 0x8c76).
-define <2 x double> @f3() {
-; CHECK-LABEL: f3:
-; CHECK: vgbm %v24, 35958
-; CHECK: br %r14
-  ret <2 x double> <double 0xff000000ffff0000, double 0x00ffffff00ffff00>
-}
-
-; Test that undefs are treated as zero (mask 0x8c00).
-define <2 x double> @f4() {
-; CHECK-LABEL: f4:
-; CHECK: vgbm %v24, 35840
-; CHECK: br %r14
-  ret <2 x double> <double 0xff000000ffff0000, double undef>
-}
-
-; Test that we don't use VGBM if one of the bytes is not 0 or 0xff.
-define <2 x double> @f5() {
-; CHECK-LABEL: f5:
-; CHECK-NOT: vgbm
+; Test that undefs are treated as zero.
+define <2 x double> @f1() {
+; CHECK-LABEL: f1:
+; CHECK: vgbm %v24, 0
  ; CHECK: br %r14
-  ret <2 x double> <double 0xfe000000ffff0000, double 0x00ffffff00ffff00>
+  ret <2 x double> <double zeroinitializer, double undef>
  }
author	Jonas Paulsson <paulsson@linux.vnet.ibm.com>
	Wed, 6 Feb 2019 18:59:19 +0000 (18:59 +0000)
committer	Jonas Paulsson <paulsson@linux.vnet.ibm.com>
	Wed, 6 Feb 2019 18:59:19 +0000 (18:59 +0000)
lib/Target/SystemZ/SystemZISelDAGToDAG.cpp		patch \| blob \| history
lib/Target/SystemZ/SystemZISelLowering.cpp		patch \| blob \| history
lib/Target/SystemZ/SystemZISelLowering.h		patch \| blob \| history
lib/Target/SystemZ/SystemZInstrVector.td		patch \| blob \| history
lib/Target/SystemZ/SystemZOperators.td		patch \| blob \| history
test/CodeGen/SystemZ/buildvector-00.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/SystemZ/vec-const-05.ll		patch \| blob \| history
test/CodeGen/SystemZ/vec-const-06.ll		patch \| blob \| history