From 3b7a193521b60d769fb1949f1d8618d7dc592cb7 Mon Sep 17 00:00:00 2001
From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
Date: Mon, 4 Aug 2014 13:13:57 +0000
Subject: [PATCH] [PowerPC] Fix and improve vector comparisons

This patch refactors code generation of vector comparisons.

This fixes a wrong code-gen bug for ISD::SETGE for floating-point types,
and improves generated code for vector comparisons in general.

Specifically, the patch moves all logic deciding how to implement vector
comparisons into getVCmpInst, which gets two extra boolean outputs
indicating to its caller whether its needs to swap the input operands
and/or negate the result of the comparison.  Apart from implementing
these two modifications as directed by getVCmpInst, there is no need
to ever implement vector comparisons in any other manner; in particular,
there is never a need to perform two separate comparisons (e.g. one for
equal and one for greater-than, as code used to do before this patch).

Reviewed by Bill Schmidt.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@214714 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 251 +++++++++++++++------------------
 lib/Target/PowerPC/PPCISelLowering.cpp |  10 --
 test/CodeGen/PowerPC/vec_cmp.ll        | 108 +++++++++-----
 3 files changed, 180 insertions(+), 189 deletions(-)

diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 238c9a75c57..490f6d2bcd4 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -663,94 +663,105 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
 // getVCmpInst: return the vector compare instruction for the specified
 // vector type and condition code. Since this is for altivec specific code,
 // only support the altivec types (v16i8, v8i16, v4i32, and v4f32).
-static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC,
-                                bool HasVSX) {
-  switch (CC) {
-    case ISD::SETEQ:
-    case ISD::SETUEQ:
-    case ISD::SETNE:
-    case ISD::SETUNE:
-      if (VecVT == MVT::v16i8)
-        return PPC::VCMPEQUB;
-      else if (VecVT == MVT::v8i16)
-        return PPC::VCMPEQUH;
-      else if (VecVT == MVT::v4i32)
-        return PPC::VCMPEQUW;
-      // v4f32 != v4f32 could be translate to unordered not equal
-      else if (VecVT == MVT::v4f32)
-        return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
-      else if (VecVT == MVT::v2f64)
-        return PPC::XVCMPEQDP;
-      break;
-    case ISD::SETLT:
-    case ISD::SETGT:
-    case ISD::SETLE:
-    case ISD::SETGE:
-      if (VecVT == MVT::v16i8)
-        return PPC::VCMPGTSB;
-      else if (VecVT == MVT::v8i16)
-        return PPC::VCMPGTSH;
-      else if (VecVT == MVT::v4i32)
-        return PPC::VCMPGTSW;
-      else if (VecVT == MVT::v4f32)
-        return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
-      else if (VecVT == MVT::v2f64)
-        return PPC::XVCMPGTDP;
-      break;
-    case ISD::SETULT:
-    case ISD::SETUGT:
-    case ISD::SETUGE:
-    case ISD::SETULE:
-      if (VecVT == MVT::v16i8)
-        return PPC::VCMPGTUB;
-      else if (VecVT == MVT::v8i16)
-        return PPC::VCMPGTUH;
-      else if (VecVT == MVT::v4i32)
-        return PPC::VCMPGTUW;
-      break;
-    case ISD::SETOEQ:
-      if (VecVT == MVT::v4f32)
-        return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
-      else if (VecVT == MVT::v2f64)
-        return PPC::XVCMPEQDP;
-      break;
-    case ISD::SETOLT:
-    case ISD::SETOGT:
-    case ISD::SETOLE:
-      if (VecVT == MVT::v4f32)
-        return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
-      else if (VecVT == MVT::v2f64)
-        return PPC::XVCMPGTDP;
-      break;
-    case ISD::SETOGE:
-      if (VecVT == MVT::v4f32)
-        return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
-      else if (VecVT == MVT::v2f64)
-        return PPC::XVCMPGEDP;
-      break;
-    default:
-      break;
-  }
-  llvm_unreachable("Invalid integer vector compare condition");
-}
+static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
+                                bool HasVSX, bool &Swap, bool &Negate) {
+  Swap = false;
+  Negate = false;
 
-// getVCmpEQInst: return the equal compare instruction for the specified vector
-// type. Since this is for altivec specific code, only support the altivec
-// types (v16i8, v8i16, v4i32, and v4f32).
-static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT, bool HasVSX) {
-  switch (VecVT) {
-    case MVT::v16i8:
-      return PPC::VCMPEQUB;
-    case MVT::v8i16:
-      return PPC::VCMPEQUH;
-    case MVT::v4i32:
-      return PPC::VCMPEQUW;
-    case MVT::v4f32:
-      return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
-    case MVT::v2f64:
-      return PPC::XVCMPEQDP;
-    default:
-      llvm_unreachable("Invalid integer vector compare condition");
+  if (VecVT.isFloatingPoint()) {
+    /* Handle some cases by swapping input operands.  */
+    switch (CC) {
+      case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
+      case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
+      case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
+      case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
+      case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
+      case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
+      default: break;
+    }
+    /* Handle some cases by negating the result.  */
+    switch (CC) {
+      case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
+      case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
+      case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
+      case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
+      default: break;
+    }
+    /* We have instructions implementing the remaining cases.  */
+    switch (CC) {
+      case ISD::SETEQ:
+      case ISD::SETOEQ:
+        if (VecVT == MVT::v4f32)
+          return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
+        else if (VecVT == MVT::v2f64)
+          return PPC::XVCMPEQDP;
+        break;
+      case ISD::SETGT:
+      case ISD::SETOGT:
+        if (VecVT == MVT::v4f32)
+          return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
+        else if (VecVT == MVT::v2f64)
+          return PPC::XVCMPGTDP;
+        break;
+      case ISD::SETGE:
+      case ISD::SETOGE:
+        if (VecVT == MVT::v4f32)
+          return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
+        else if (VecVT == MVT::v2f64)
+          return PPC::XVCMPGEDP;
+        break;
+      default:
+        break;
+    }
+    llvm_unreachable("Invalid floating-point vector compare condition");
+  } else {
+    /* Handle some cases by swapping input operands.  */
+    switch (CC) {
+      case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
+      case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
+      case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
+      case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
+      default: break;
+    }
+    /* Handle some cases by negating the result.  */
+    switch (CC) {
+      case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
+      case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
+      case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
+      case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
+      default: break;
+    }
+    /* We have instructions implementing the remaining cases.  */
+    switch (CC) {
+      case ISD::SETEQ:
+      case ISD::SETUEQ:
+        if (VecVT == MVT::v16i8)
+          return PPC::VCMPEQUB;
+        else if (VecVT == MVT::v8i16)
+          return PPC::VCMPEQUH;
+        else if (VecVT == MVT::v4i32)
+          return PPC::VCMPEQUW;
+        break;
+      case ISD::SETGT:
+        if (VecVT == MVT::v16i8)
+          return PPC::VCMPGTSB;
+        else if (VecVT == MVT::v8i16)
+          return PPC::VCMPGTSH;
+        else if (VecVT == MVT::v4i32)
+          return PPC::VCMPGTSW;
+        break;
+      case ISD::SETUGT:
+        if (VecVT == MVT::v16i8)
+          return PPC::VCMPGTUB;
+        else if (VecVT == MVT::v8i16)
+          return PPC::VCMPGTUH;
+        else if (VecVT == MVT::v4i32)
+          return PPC::VCMPGTUW;
+        break;
+      default:
+        break;
+    }
+    llvm_unreachable("Invalid integer vector compare condition");
   }
 }
 
@@ -842,60 +853,20 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
   // vector compare operations return the same type as the operands.
   if (LHS.getValueType().isVector()) {
     EVT VecVT = LHS.getValueType();
-    MVT::SimpleValueType VT = VecVT.getSimpleVT().SimpleTy;
-    unsigned int VCmpInst = getVCmpInst(VT, CC, PPCSubTarget->hasVSX());
-
-    switch (CC) {
-      case ISD::SETEQ:
-      case ISD::SETOEQ:
-      case ISD::SETUEQ:
-        return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
-      case ISD::SETNE:
-      case ISD::SETONE:
-      case ISD::SETUNE: {
-        SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
-        return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR :
-                                                               PPC::VNOR,
-                                    VecVT, VCmp, VCmp);
-      } 
-      case ISD::SETLT:
-      case ISD::SETOLT:
-      case ISD::SETULT:
-        return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, RHS, LHS);
-      case ISD::SETGT:
-      case ISD::SETOGT:
-      case ISD::SETUGT:
-        return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
-      case ISD::SETGE:
-      case ISD::SETOGE:
-      case ISD::SETUGE: {
-        // Small optimization: Altivec provides a 'Vector Compare Greater Than
-        // or Equal To' instruction (vcmpgefp), so in this case there is no
-        // need for extra logic for the equal compare.
-        if (VecVT.getSimpleVT().isFloatingPoint()) {
-          return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
-        } else {
-          SDValue VCmpGT(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
-          unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget->hasVSX());
-          SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
-          return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLOR :
-                                                                 PPC::VOR,
-                                      VecVT, VCmpGT, VCmpEQ);
-        }
-      }
-      case ISD::SETLE:
-      case ISD::SETOLE:
-      case ISD::SETULE: {
-        SDValue VCmpLE(CurDAG->getMachineNode(VCmpInst, dl, VecVT, RHS, LHS), 0);
-        unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget->hasVSX());
-        SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
-        return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLOR :
-                                                               PPC::VOR,
-                                    VecVT, VCmpLE, VCmpEQ);
-      }
-      default:
-        llvm_unreachable("Invalid vector compare type: should be expanded by legalize");
+    bool Swap, Negate;
+    unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC,
+                                        PPCSubTarget->hasVSX(), Swap, Negate);
+    if (Swap)
+      std::swap(LHS, RHS);
+
+    if (Negate) {
+      SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
+      return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR :
+                                                              PPC::VNOR,
+                                  VecVT, VCmp, VCmp);
     }
+
+    return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
   }
 
   if (PPCSubTarget->useCRBits())
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 9ba2f9194a4..a0f72568ad1 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -526,11 +526,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
     // Altivec does not contain unordered floating-point compare instructions
     setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
     setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
-    setCondCodeAction(ISD::SETUGT, MVT::v4f32, Expand);
-    setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand);
-    setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand);
-    setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand);
-
     setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
     setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
 
@@ -561,11 +556,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
       // Share the Altivec comparison restrictions.
       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
-      setCondCodeAction(ISD::SETUGT, MVT::v2f64, Expand);
-      setCondCodeAction(ISD::SETUGE, MVT::v2f64, Expand);
-      setCondCodeAction(ISD::SETULT, MVT::v2f64, Expand);
-      setCondCodeAction(ISD::SETULE, MVT::v2f64, Expand);
-
       setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
       setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
 
diff --git a/test/CodeGen/PowerPC/vec_cmp.ll b/test/CodeGen/PowerPC/vec_cmp.ll
index 2733089fcb1..516b2dd58b9 100644
--- a/test/CodeGen/PowerPC/vec_cmp.ll
+++ b/test/CodeGen/PowerPC/vec_cmp.ll
@@ -63,9 +63,8 @@ entry:
   ret <16 x i8> %sext
 }
 ; CHECK-LABEL:      v16si8_cmp_le:
-; CHECK:      vcmpequb [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtsb [[RCMPLE:[0-9]+]], 3, 2
-; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
+; CHECK:      vcmpgtsb [[RET:[0-9]+]], 2, 3
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
 
 define <16 x i8> @v16ui8_cmp_le(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
 entry:
@@ -74,9 +73,8 @@ entry:
   ret <16 x i8> %sext
 }
 ; CHECK-LABEL:      v16ui8_cmp_le:
-; CHECK:      vcmpequb [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtub [[RCMPLE:[0-9]+]], 3, 2
-; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
+; CHECK:      vcmpgtub [[RET:[0-9]+]], 2, 3
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
 
 define <16 x i8> @v16si8_cmp_lt(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
 entry:
@@ -121,9 +119,8 @@ entry:
   ret <16 x i8> %sext
 }
 ; CHECK-LABEL:      v16si8_cmp_ge:
-; CHECK:      vcmpequb [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtsb [[RCMPGT:[0-9]+]], 2, 3
-; CHECK-NEXT: vor      2, [[RCMPGT]], [[RCMPEQ]]
+; CHECK:      vcmpgtsb [[RET:[0-9]+]], 3, 2
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
 
 define <16 x i8> @v16ui8_cmp_ge(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
 entry:
@@ -132,9 +129,8 @@ entry:
   ret <16 x i8> %sext
 }
 ; CHECK-LABEL:      v16ui8_cmp_ge:
-; CHECK:      vcmpequb [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtub [[RCMPGT:[0-9]+]], 2, 3
-; CHECK-NEXT: vor      2, [[RCMPGT]], [[RCMPEQ]]
+; CHECK:      vcmpgtub [[RET:[0-9]+]], 3, 2
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
 
 
 define <32 x i8> @v32si8_cmp(<32 x i8> %x, <32 x i8> %y) nounwind readnone {
@@ -193,9 +189,8 @@ entry:
   ret <8 x i16> %sext
 }
 ; CHECK-LABEL:      v8si16_cmp_le:
-; CHECK:      vcmpequh [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtsh [[RCMPLE:[0-9]+]], 3, 2
-; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
+; CHECK:      vcmpgtsh [[RET:[0-9]+]], 2, 3
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
 
 define <8 x i16> @v8ui16_cmp_le(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
 entry:
@@ -204,9 +199,8 @@ entry:
   ret <8 x i16> %sext
 }
 ; CHECK-LABEL:      v8ui16_cmp_le:
-; CHECK:      vcmpequh [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtuh [[RCMPLE:[0-9]+]], 3, 2
-; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
+; CHECK:      vcmpgtuh [[RET:[0-9]+]], 2, 3
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
 
 define <8 x i16> @v8si16_cmp_lt(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
 entry:
@@ -251,9 +245,8 @@ entry:
   ret <8 x i16> %sext
 }
 ; CHECK-LABEL:      v8si16_cmp_ge:
-; CHECK:      vcmpequh [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtsh [[RCMPGT:[0-9]+]], 2, 3
-; CHECK-NEXT: vor      2, [[RCMPGT]], [[RCMPEQ]]
+; CHECK:      vcmpgtsh [[RET:[0-9]+]], 3, 2
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
 
 define <8 x i16> @v8ui16_cmp_ge(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
 entry:
@@ -262,9 +255,8 @@ entry:
   ret <8 x i16> %sext
 }
 ; CHECK-LABEL:      v8ui16_cmp_ge:
-; CHECK:      vcmpequh [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtuh [[RCMPGT:[0-9]+]], 2, 3
-; CHECK-NEXT: vor      2, [[RCMPGT]], [[RCMPEQ]]
+; CHECK:      vcmpgtuh [[RET:[0-9]+]], 3, 2
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
 
 
 define <16 x i16> @v16si16_cmp(<16 x i16> %x, <16 x i16> %y) nounwind readnone {
@@ -326,9 +318,8 @@ entry:
   ret <4 x i32> %sext
 }
 ; CHECK-LABEL:      v4si32_cmp_le:
-; CHECK:      vcmpequw [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtsw [[RCMPLE:[0-9]+]], 3, 2
-; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
+; CHECK:      vcmpgtsw [[RET:[0-9]+]], 2, 3
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
 
 define <4 x i32> @v4ui32_cmp_le(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
 entry:
@@ -337,9 +328,8 @@ entry:
   ret <4 x i32> %sext
 }
 ; CHECK-LABEL:      v4ui32_cmp_le:
-; CHECK:      vcmpequw [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtuw [[RCMPLE:[0-9]+]], 3, 2
-; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
+; CHECK:      vcmpgtuw [[RET:[0-9]+]], 2, 3
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
 
 define <4 x i32> @v4si32_cmp_lt(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
 entry:
@@ -384,9 +374,8 @@ entry:
   ret <4 x i32> %sext
 }
 ; CHECK-LABEL:      v4si32_cmp_ge:
-; CHECK:      vcmpequw [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtsw [[RCMPGT:[0-9]+]], 2, 3
-; CHECK-NEXT: vor      2, [[RCMPGT]], [[RCMPEQ]]
+; CHECK:      vcmpgtsw [[RET:[0-9]+]], 3, 2
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
 
 define <4 x i32> @v4ui32_cmp_ge(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
 entry:
@@ -395,9 +384,8 @@ entry:
   ret <4 x i32> %sext
 }
 ; CHECK-LABEL:      v4ui32_cmp_ge:
-; CHECK:      vcmpequw [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtuw [[RCMPGT:[0-9]+]], 2, 3
-; CHECK-NEXT: vor      2, [[RCMPGT]], [[RCMPEQ]]
+; CHECK:      vcmpgtuw [[RET:[0-9]+]], 3, 2
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
 
 
 define <8 x i32> @v8si32_cmp(<8 x i32> %x, <8 x i32> %y) nounwind readnone {
@@ -480,9 +468,7 @@ entry:
   ret <4 x float> %0
 }
 ; CHECK-LABEL:      v4f32_cmp_le:
-; CHECK:      vcmpeqfp [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtfp [[RCMPLE:[0-9]+]], 3, 2
-; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
+; CHECK: vcmpgefp 2, 3, 2
 
 define <4 x float> @v4f32_cmp_lt(<4 x float> %x, <4 x float> %y) nounwind readnone {
 entry:
@@ -514,6 +500,50 @@ entry:
 ; CHECK-LABEL: v4f32_cmp_gt:
 ; CHECK: vcmpgtfp 2, 2, 3
 
+define <4 x float> @v4f32_cmp_ule(<4 x float> %x, <4 x float> %y) nounwind readnone {
+entry:
+  %cmp = fcmp ule <4 x float> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %0 = bitcast <4 x i32> %sext to <4 x float>
+  ret <4 x float> %0
+}
+; CHECK-LABEL: v4f32_cmp_ule:
+; CHECK:      vcmpgtfp [[RET:[0-9]+]], 2, 3
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
+
+define <4 x float> @v4f32_cmp_ult(<4 x float> %x, <4 x float> %y) nounwind readnone {
+entry:
+  %cmp = fcmp ult <4 x float> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %0 = bitcast <4 x i32> %sext to <4 x float>
+  ret <4 x float> %0
+}
+; CHECK-LABEL: v4f32_cmp_ult:
+; CHECK:      vcmpgefp [[RET:[0-9]+]], 2, 3
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
+
+define <4 x float> @v4f32_cmp_uge(<4 x float> %x, <4 x float> %y) nounwind readnone {
+entry:
+  %cmp = fcmp uge <4 x float> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %0 = bitcast <4 x i32> %sext to <4 x float>
+  ret <4 x float> %0
+}
+; CHECK-LABEL: v4f32_cmp_uge:
+; CHECK:      vcmpgtfp [[RET:[0-9]+]], 3, 2
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
+
+define <4 x float> @v4f32_cmp_ugt(<4 x float> %x, <4 x float> %y) nounwind readnone {
+entry:
+  %cmp = fcmp ugt <4 x float> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %0 = bitcast <4 x i32> %sext to <4 x float>
+  ret <4 x float> %0
+}
+; CHECK-LABEL: v4f32_cmp_ugt:
+; CHECK:      vcmpgefp [[RET:[0-9]+]], 3, 2
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
+
 
 define <8 x float> @v8f32_cmp(<8 x float> %x, <8 x float> %y) nounwind readnone {
 entry:
-- 
2.11.0