From bb47fd04c9b1616c0371eb2c488c5f0f665c25f8 Mon Sep 17 00:00:00 2001
From: Daniel Sanders <daniel.sanders@imgtec.com>
Date: Tue, 12 Nov 2013 10:45:18 +0000
Subject: [PATCH] [mips][msa] Added support for matching bclr, and bclri from
 normal IR (i.e. not intrinsics)

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194471 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/Mips/MSA.txt                |   5 ++
 lib/Target/Mips/MipsAsmPrinter.cpp     |   9 +++
 lib/Target/Mips/MipsAsmPrinter.h       |   1 +
 lib/Target/Mips/MipsISelDAGToDAG.cpp   |   5 ++
 lib/Target/Mips/MipsISelDAGToDAG.h     |   3 +
 lib/Target/Mips/MipsMSAInstrInfo.td    | 103 ++++++++++++++++---------
 lib/Target/Mips/MipsSEISelDAGToDAG.cpp |  21 +++++
 lib/Target/Mips/MipsSEISelDAGToDAG.h   |   3 +
 lib/Target/Mips/MipsSEISelLowering.cpp |  49 ++++++++++++
 test/CodeGen/Mips/msa/bitwise.ll       | 137 +++++++++++++++++++++++++++++++++
 test/CodeGen/Mips/msa/i5-b.ll          |   3 +-
 11 files changed, 300 insertions(+), 39 deletions(-)

diff --git a/lib/Target/Mips/MSA.txt b/lib/Target/Mips/MSA.txt
index 270a723048e..d1c41932fcb 100644
--- a/lib/Target/Mips/MSA.txt
+++ b/lib/Target/Mips/MSA.txt
@@ -14,6 +14,11 @@ This section describes any quirks of instruction selection for MSA. For
 example, two instructions might be equally valid for some given IR and one is
 chosen in preference to the other.
 
+bclri.b:
+        It is not possible to emit bclri.b since andi.b covers exactly the
+        same cases. andi.b should use fractionally less power than bclri.b in
+        most hardware implementations so it is used in preference to bclri.b.
+
 vshf.w:
         It is not possible to emit vshf.w when the shuffle description is
         constant since shf.w covers exactly the same cases. shf.w is used
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 9fb24a2fc6c..3bef2fa0050 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -569,6 +569,15 @@ void MipsAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum,
     printOperand(MI, opNum, O);
 }
 
+void MipsAsmPrinter::printUnsignedImm8(const MachineInstr *MI, int opNum,
+                                       raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+  if (MO.isImm())
+    O << (unsigned short int)(unsigned char)MO.getImm();
+  else
+    printOperand(MI, opNum, O);
+}
+
 void MipsAsmPrinter::
 printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O) {
   // Load/Store memory operands -- imm($reg)
diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h
index 44b8b068dfa..11c6acd208d 100644
--- a/lib/Target/Mips/MipsAsmPrinter.h
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@@ -96,6 +96,7 @@ public:
                              raw_ostream &O);
   void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
   void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O);
+  void printUnsignedImm8(const MachineInstr *MI, int opNum, raw_ostream &O);
   void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
   void printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O);
   void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index d0a41e755ee..c417bd59341 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -149,6 +149,11 @@ bool MipsDAGToDAGISel::selectVSplatUimmPow2(SDValue N, SDValue &Imm) const {
   return false;
 }
 
+bool MipsDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N, SDValue &Imm) const {
+  llvm_unreachable("Unimplemented function.");
+  return false;
+}
+
 bool MipsDAGToDAGISel::selectVSplatMaskL(SDValue N, SDValue &Imm) const {
   llvm_unreachable("Unimplemented function.");
   return false;
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h
index 6bc96446d36..a4d9da532b2 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsISelDAGToDAG.h
@@ -96,6 +96,9 @@ private:
   virtual bool selectVSplatSimm5(SDValue N, SDValue &Imm) const;
   /// \brief Select constant vector splats whose value is a power of 2.
   virtual bool selectVSplatUimmPow2(SDValue N, SDValue &Imm) const;
+  /// \brief Select constant vector splats whose value is the inverse of a
+  /// power of 2.
+  virtual bool selectVSplatUimmInvPow2(SDValue N, SDValue &Imm) const;
   /// \brief Select constant vector splats whose value is a run of set bits
   /// ending at the most significant bit
   virtual bool selectVSplatMaskL(SDValue N, SDValue &Imm) const;
diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td
index 028070097fe..f3c1e955b2b 100644
--- a/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -66,15 +66,15 @@ def uimm2 : Operand<i32> {
 }
 
 def uimm3 : Operand<i32> {
-  let PrintMethod = "printUnsignedImm";
+  let PrintMethod = "printUnsignedImm8";
 }
 
 def uimm4 : Operand<i32> {
-  let PrintMethod = "printUnsignedImm";
+  let PrintMethod = "printUnsignedImm8";
 }
 
 def uimm8 : Operand<i32> {
-  let PrintMethod = "printUnsignedImm";
+  let PrintMethod = "printUnsignedImm8";
 }
 
 def simm5 : Operand<i32>;
@@ -90,23 +90,23 @@ def vsplat_uimm2 : Operand<vAny> {
 }
 
 def vsplat_uimm3 : Operand<vAny> {
-  let PrintMethod = "printUnsignedImm";
+  let PrintMethod = "printUnsignedImm8";
 }
 
 def vsplat_uimm4 : Operand<vAny> {
-  let PrintMethod = "printUnsignedImm";
+  let PrintMethod = "printUnsignedImm8";
 }
 
 def vsplat_uimm5 : Operand<vAny> {
-  let PrintMethod = "printUnsignedImm";
+  let PrintMethod = "printUnsignedImm8";
 }
 
 def vsplat_uimm6 : Operand<vAny> {
-  let PrintMethod = "printUnsignedImm";
+  let PrintMethod = "printUnsignedImm8";
 }
 
 def vsplat_uimm8 : Operand<vAny> {
-  let PrintMethod = "printUnsignedImm";
+  let PrintMethod = "printUnsignedImm8";
 }
 
 def vsplat_simm5 : Operand<vAny>;
@@ -315,6 +315,11 @@ def vsplati64_simm5 : SplatComplexPattern<vsplat_simm5, v2i64, 1,
 def vsplat_uimm_pow2 : ComplexPattern<vAny, 1, "selectVSplatUimmPow2",
                                       [build_vector, bitconvert]>;
 
+// Any build_vector that is a constant splat with a value that is the bitwise
+// inverse of an exact power of 2
+def vsplat_uimm_inv_pow2 : ComplexPattern<vAny, 1, "selectVSplatUimmInvPow2",
+                                          [build_vector, bitconvert]>;
+
 // Any build_vector that is a constant splat with only a consecutive sequence
 // of left-most bits set.
 def vsplat_maskl_bits : SplatComplexPattern<vsplat_uimm8, vAny, 1,
@@ -328,7 +333,7 @@ def vsplat_maskr_bits : SplatComplexPattern<vsplat_uimm8, vAny, 1,
                                             [build_vector, bitconvert]>;
 
 // Any build_vector that is a constant splat with a value that equals 1
-// FIXME: This should be a ComplexPattern but we can't use them because the
+// FIXME: These should be a ComplexPattern but we can't use them because the
 //        ISel generator requires the uses to have a name, but providing a name
 //        causes other errors ("used in pattern but not operand list")
 def vsplat_imm_eq_1 : PatLeaf<(build_vector), [{
@@ -348,6 +353,20 @@ def vsplati64_imm_eq_1 : PatLeaf<(bitconvert (v4i32 (build_vector))), [{
          Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1;
 }]>;
 
+def vbclr_b : PatFrag<(ops node:$ws, node:$wt),
+                      (and node:$ws, (xor (shl vsplat_imm_eq_1, node:$wt),
+                                          immAllOnesV))>;
+def vbclr_h : PatFrag<(ops node:$ws, node:$wt),
+                      (and node:$ws, (xor (shl vsplat_imm_eq_1, node:$wt),
+                                          immAllOnesV))>;
+def vbclr_w : PatFrag<(ops node:$ws, node:$wt),
+                      (and node:$ws, (xor (shl vsplat_imm_eq_1, node:$wt),
+                                          immAllOnesV))>;
+def vbclr_d : PatFrag<(ops node:$ws, node:$wt),
+                      (and node:$ws, (xor (shl (v2i64 vsplati64_imm_eq_1),
+                                               node:$wt),
+                                          (bitconvert (v4i32 immAllOnesV))))>;
+
 def vbneg_b : PatFrag<(ops node:$ws, node:$wt),
                       (xor node:$ws, (shl vsplat_imm_eq_1, node:$wt))>;
 def vbneg_h : PatFrag<(ops node:$ws, node:$wt),
@@ -366,7 +385,7 @@ def vbset_w : PatFrag<(ops node:$ws, node:$wt),
                       (or node:$ws, (shl vsplat_imm_eq_1, node:$wt))>;
 def vbset_d : PatFrag<(ops node:$ws, node:$wt),
                       (or node:$ws, (shl (v2i64 vsplati64_imm_eq_1),
-                                    node:$wt))>;
+                                         node:$wt))>;
 
 def fms : PatFrag<(ops node:$wd, node:$ws, node:$wt),
                   (fsub node:$wd, (fmul node:$ws, node:$wt))>;
@@ -1093,42 +1112,46 @@ class XORI_B_ENC : MSA_I8_FMT<0b11, 0b000000>;
 
 // Instruction desc.
 class MSA_BIT_B_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
-                          RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+                          ComplexPattern Imm, RegisterOperand ROWD,
+                          RegisterOperand ROWS = ROWD,
                           InstrItinClass itin = NoItinerary> {
   dag OutOperandList = (outs ROWD:$wd);
   dag InOperandList = (ins ROWS:$ws, vsplat_uimm3:$m);
   string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
-  list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, vsplat_uimm_pow2:$m))];
+  list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, Imm:$m))];
   InstrItinClass Itinerary = itin;
 }
 
 class MSA_BIT_H_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
-                          RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+                          ComplexPattern Imm, RegisterOperand ROWD,
+                          RegisterOperand ROWS = ROWD,
                           InstrItinClass itin = NoItinerary> {
   dag OutOperandList = (outs ROWD:$wd);
   dag InOperandList = (ins ROWS:$ws, vsplat_uimm4:$m);
   string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
-  list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, vsplat_uimm_pow2:$m))];
+  list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, Imm:$m))];
   InstrItinClass Itinerary = itin;
 }
 
 class MSA_BIT_W_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
-                          RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+                          ComplexPattern Imm, RegisterOperand ROWD,
+                          RegisterOperand ROWS = ROWD,
                           InstrItinClass itin = NoItinerary> {
   dag OutOperandList = (outs ROWD:$wd);
   dag InOperandList = (ins ROWS:$ws, vsplat_uimm5:$m);
   string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
-  list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, vsplat_uimm_pow2:$m))];
+  list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, Imm:$m))];
   InstrItinClass Itinerary = itin;
 }
 
 class MSA_BIT_D_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
-                          RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+                          ComplexPattern Imm, RegisterOperand ROWD,
+                          RegisterOperand ROWS = ROWD,
                           InstrItinClass itin = NoItinerary> {
   dag OutOperandList = (outs ROWD:$wd);
   dag InOperandList = (ins ROWS:$ws, vsplat_uimm6:$m);
   string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
-  list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, vsplat_uimm_pow2:$m))];
+  list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, Imm:$m))];
   InstrItinClass Itinerary = itin;
 }
 
@@ -1602,19 +1625,19 @@ class AVER_U_W_DESC : MSA_3R_DESC_BASE<"aver_u.w", int_mips_aver_u_w,
 class AVER_U_D_DESC : MSA_3R_DESC_BASE<"aver_u.d", int_mips_aver_u_d,
                                        MSA128DOpnd>, IsCommutable;
 
-class BCLR_B_DESC : MSA_3R_DESC_BASE<"bclr.b", int_mips_bclr_b, MSA128BOpnd>;
-class BCLR_H_DESC : MSA_3R_DESC_BASE<"bclr.h", int_mips_bclr_h, MSA128HOpnd>;
-class BCLR_W_DESC : MSA_3R_DESC_BASE<"bclr.w", int_mips_bclr_w, MSA128WOpnd>;
-class BCLR_D_DESC : MSA_3R_DESC_BASE<"bclr.d", int_mips_bclr_d, MSA128DOpnd>;
+class BCLR_B_DESC : MSA_3R_DESC_BASE<"bclr.b", vbclr_b, MSA128BOpnd>;
+class BCLR_H_DESC : MSA_3R_DESC_BASE<"bclr.h", vbclr_h, MSA128HOpnd>;
+class BCLR_W_DESC : MSA_3R_DESC_BASE<"bclr.w", vbclr_w, MSA128WOpnd>;
+class BCLR_D_DESC : MSA_3R_DESC_BASE<"bclr.d", vbclr_d, MSA128DOpnd>;
 
-class BCLRI_B_DESC : MSA_BIT_B_X_DESC_BASE<"bclri.b", int_mips_bclri_b,
-                                           MSA128BOpnd>;
-class BCLRI_H_DESC : MSA_BIT_H_X_DESC_BASE<"bclri.h", int_mips_bclri_h,
-                                           MSA128HOpnd>;
-class BCLRI_W_DESC : MSA_BIT_W_X_DESC_BASE<"bclri.w", int_mips_bclri_w,
-                                           MSA128WOpnd>;
-class BCLRI_D_DESC : MSA_BIT_D_X_DESC_BASE<"bclri.d", int_mips_bclri_d,
-                                           MSA128DOpnd>;
+class BCLRI_B_DESC : MSA_BIT_B_DESC_BASE<"bclri.b", and, vsplat_uimm_inv_pow2,
+                                         MSA128BOpnd>;
+class BCLRI_H_DESC : MSA_BIT_H_DESC_BASE<"bclri.h", and, vsplat_uimm_inv_pow2,
+                                         MSA128HOpnd>;
+class BCLRI_W_DESC : MSA_BIT_W_DESC_BASE<"bclri.w", and, vsplat_uimm_inv_pow2,
+                                         MSA128WOpnd>;
+class BCLRI_D_DESC : MSA_BIT_D_DESC_BASE<"bclri.d", and, vsplat_uimm_inv_pow2,
+                                         MSA128DOpnd>;
 
 class BINSL_B_DESC : MSA_3R_BINSX_DESC_BASE<"binsl.b", int_mips_binsl_b,
                                             MSA128BOpnd>;
@@ -1697,10 +1720,10 @@ class BNEG_H_DESC : MSA_3R_DESC_BASE<"bneg.h", vbneg_h, MSA128HOpnd>;
 class BNEG_W_DESC : MSA_3R_DESC_BASE<"bneg.w", vbneg_w, MSA128WOpnd>;
 class BNEG_D_DESC : MSA_3R_DESC_BASE<"bneg.d", vbneg_d, MSA128DOpnd>;
 
-class BNEGI_B_DESC : MSA_BIT_B_DESC_BASE<"bnegi.b", xor, MSA128BOpnd>;
-class BNEGI_H_DESC : MSA_BIT_H_DESC_BASE<"bnegi.h", xor, MSA128HOpnd>;
-class BNEGI_W_DESC : MSA_BIT_W_DESC_BASE<"bnegi.w", xor, MSA128WOpnd>;
-class BNEGI_D_DESC : MSA_BIT_D_DESC_BASE<"bnegi.d", xor, MSA128DOpnd>;
+class BNEGI_B_DESC : MSA_BIT_B_DESC_BASE<"bnegi.b", xor, vsplat_uimm_pow2, MSA128BOpnd>;
+class BNEGI_H_DESC : MSA_BIT_H_DESC_BASE<"bnegi.h", xor, vsplat_uimm_pow2, MSA128HOpnd>;
+class BNEGI_W_DESC : MSA_BIT_W_DESC_BASE<"bnegi.w", xor, vsplat_uimm_pow2, MSA128WOpnd>;
+class BNEGI_D_DESC : MSA_BIT_D_DESC_BASE<"bnegi.d", xor, vsplat_uimm_pow2, MSA128DOpnd>;
 
 class BNZ_B_DESC : MSA_CBRANCH_DESC_BASE<"bnz.b", MSA128BOpnd>;
 class BNZ_H_DESC : MSA_CBRANCH_DESC_BASE<"bnz.h", MSA128HOpnd>;
@@ -1738,10 +1761,14 @@ class BSET_H_DESC : MSA_3R_DESC_BASE<"bset.h", vbset_h, MSA128HOpnd>;
 class BSET_W_DESC : MSA_3R_DESC_BASE<"bset.w", vbset_w, MSA128WOpnd>;
 class BSET_D_DESC : MSA_3R_DESC_BASE<"bset.d", vbset_d, MSA128DOpnd>;
 
-class BSETI_B_DESC : MSA_BIT_B_DESC_BASE<"bseti.b", or, MSA128BOpnd>;
-class BSETI_H_DESC : MSA_BIT_H_DESC_BASE<"bseti.h", or, MSA128HOpnd>;
-class BSETI_W_DESC : MSA_BIT_W_DESC_BASE<"bseti.w", or, MSA128WOpnd>;
-class BSETI_D_DESC : MSA_BIT_D_DESC_BASE<"bseti.d", or, MSA128DOpnd>;
+class BSETI_B_DESC : MSA_BIT_B_DESC_BASE<"bseti.b", or, vsplat_uimm_pow2,
+                                         MSA128BOpnd>;
+class BSETI_H_DESC : MSA_BIT_H_DESC_BASE<"bseti.h", or, vsplat_uimm_pow2,
+                                         MSA128HOpnd>;
+class BSETI_W_DESC : MSA_BIT_W_DESC_BASE<"bseti.w", or, vsplat_uimm_pow2,
+                                         MSA128WOpnd>;
+class BSETI_D_DESC : MSA_BIT_D_DESC_BASE<"bseti.d", or, vsplat_uimm_pow2,
+                                         MSA128DOpnd>;
 
 class BZ_B_DESC : MSA_CBRANCH_DESC_BASE<"bz.b", MSA128BOpnd>;
 class BZ_H_DESC : MSA_CBRANCH_DESC_BASE<"bz.h", MSA128HOpnd>;
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 93e7bdf38a6..463c4e90c25 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -579,6 +579,27 @@ bool MipsSEDAGToDAGISel::selectVSplatMaskR(SDValue N, SDValue &Imm) const {
   return false;
 }
 
+bool MipsSEDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N,
+                                                 SDValue &Imm) const {
+  APInt ImmValue;
+  EVT EltTy = N->getValueType(0).getVectorElementType();
+
+  if (N->getOpcode() == ISD::BITCAST)
+    N = N->getOperand(0);
+
+  if (selectVSplat(N.getNode(), ImmValue) &&
+      ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
+    int32_t Log2 = (~ImmValue).exactLogBase2();
+
+    if (Log2 != -1) {
+      Imm = CurDAG->getTargetConstant(Log2, EltTy);
+      return true;
+    }
+  }
+
+  return false;
+}
+
 std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
   unsigned Opcode = Node->getOpcode();
   SDLoc DL(Node);
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h
index 851fba0e19c..dc52064c983 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -81,6 +81,9 @@ private:
   virtual bool selectVSplatSimm5(SDValue N, SDValue &Imm) const;
   /// \brief Select constant vector splats whose value is a power of 2.
   virtual bool selectVSplatUimmPow2(SDValue N, SDValue &Imm) const;
+  /// \brief Select constant vector splats whose value is the inverse of a
+  /// power of 2.
+  virtual bool selectVSplatUimmInvPow2(SDValue N, SDValue &Imm) const;
   /// \brief Select constant vector splats whose value is a run of set bits
   /// ending at the most significant bit
   virtual bool selectVSplatMaskL(SDValue N, SDValue &Imm) const;
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index 2dcc69c557e..4b2bdcd95ef 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -1321,6 +1321,45 @@ static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG,
   return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm);
 }
 
+static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) {
+  EVT ResTy = Op->getValueType(0);
+  EVT ViaVecTy = ResTy == MVT::v2i64 ? MVT::v4i32 : ResTy;
+  SDLoc DL(Op);
+  SDValue One = lowerMSASplatImm(DL, ResTy, DAG.getConstant(1, MVT::i32), DAG);
+  SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, Op->getOperand(2));
+
+  SDValue AllOnes = DAG.getConstant(-1, MVT::i32);
+  SDValue AllOnesOperands[16] = { AllOnes, AllOnes, AllOnes, AllOnes,
+                                  AllOnes, AllOnes, AllOnes, AllOnes,
+                                  AllOnes, AllOnes, AllOnes, AllOnes,
+                                  AllOnes, AllOnes, AllOnes, AllOnes };
+  AllOnes = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, AllOnesOperands,
+                        ViaVecTy.getVectorNumElements());
+  if (ResTy != ViaVecTy)
+    AllOnes = DAG.getNode(ISD::BITCAST, DL, ResTy, AllOnes);
+
+  Bit = DAG.getNode(ISD::XOR, DL, ResTy, Bit, AllOnes);
+
+  return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), Bit);
+}
+
+static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) {
+  SDLoc DL(Op);
+  EVT ResTy = Op->getValueType(0);
+  unsigned ResTyNumElements = ResTy.getVectorNumElements();
+  SDValue SHAmount = Op->getOperand(2);
+  EVT ImmTy = SHAmount->getValueType(0);
+  SDValue Bit =
+      DAG.getNode(ISD::SHL, DL, ImmTy, DAG.getConstant(1, ImmTy), SHAmount);
+  SDValue BitMask = DAG.getNOT(DL, Bit, ImmTy);
+
+  assert(ResTyNumElements <= 16);
+
+  BitMask = lowerMSASplatImm(DL, ResTy, BitMask, DAG);
+
+  return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask);
+}
+
 SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
                                                       SelectionDAG &DAG) const {
   SDLoc DL(Op);
@@ -1378,6 +1417,16 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
   case Intrinsic::mips_andi_b:
     return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1),
                        lowerMSASplatImm(Op, 2, DAG));
+  case Intrinsic::mips_bclr_b:
+  case Intrinsic::mips_bclr_h:
+  case Intrinsic::mips_bclr_w:
+  case Intrinsic::mips_bclr_d:
+    return lowerMSABitClear(Op, DAG);
+  case Intrinsic::mips_bclri_b:
+  case Intrinsic::mips_bclri_h:
+  case Intrinsic::mips_bclri_w:
+  case Intrinsic::mips_bclri_d:
+    return lowerMSABitClearImm(Op, DAG);
   case Intrinsic::mips_binsli_b:
   case Intrinsic::mips_binsli_h:
   case Intrinsic::mips_binsli_w:
diff --git a/test/CodeGen/Mips/msa/bitwise.ll b/test/CodeGen/Mips/msa/bitwise.ll
index 2104921ebe1..5831a08d919 100644
--- a/test/CodeGen/Mips/msa/bitwise.ll
+++ b/test/CodeGen/Mips/msa/bitwise.ll
@@ -1243,6 +1243,78 @@ define void @binsr_v2i64_i(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind
   ; CHECK: .size binsr_v2i64_i
 }
 
+define void @bclr_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+  ; CHECK: bclr_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <16 x i8>* %b
+  ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2
+  %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %5 = and <16 x i8> %1, %4
+  ; CHECK-DAG: bclr.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <16 x i8> %5, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bclr_v16i8
+}
+
+define void @bclr_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
+  ; CHECK: bclr_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <8 x i16>* %b
+  ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2
+  %4 = xor <8 x i16> %3, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  %5 = and <8 x i16> %1, %4
+  ; CHECK-DAG: bclr.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <8 x i16> %5, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bclr_v8i16
+}
+
+define void @bclr_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
+  ; CHECK: bclr_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <4 x i32>* %b
+  ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2
+  %4 = xor <4 x i32> %3, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %5 = and <4 x i32> %1, %4
+  ; CHECK-DAG: bclr.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <4 x i32> %5, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bclr_v4i32
+}
+
+define void @bclr_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+  ; CHECK: bclr_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = load <2 x i64>* %b
+  ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
+  %3 = shl <2 x i64> <i64 1, i64 1>, %2
+  %4 = xor <2 x i64> %3, <i64 -1, i64 -1>
+  %5 = and <2 x i64> %1, %4
+  ; CHECK-DAG: bclr.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
+  store <2 x i64> %5, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bclr_v2i64
+}
+
 define void @bset_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: bset_v16i8:
 
@@ -1379,6 +1451,71 @@ define void @bneg_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: .size bneg_v2i64
 }
 
+define void @bclri_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
+  ; CHECK: bclri_v16i8:
+
+  %1 = load <16 x i8>* %a
+  ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+  %2 = xor <16 x i8> <i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8>,
+                     <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %3 = and <16 x i8> %1, %2
+  ; bclri.b and andi.b are exactly equivalent.
+  ; CHECK-DAG: andi.b [[R3:\$w[0-9]+]], [[R1]], 247
+  store <16 x i8> %3, <16 x i8>* %c
+  ; CHECK-DAG: st.b [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bclri_v16i8
+}
+
+define void @bclri_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
+  ; CHECK: bclri_v8i16:
+
+  %1 = load <8 x i16>* %a
+  ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
+  %2 = xor <8 x i16> <i16  8, i16  8, i16  8, i16  8, i16  8, i16  8, i16  8, i16  8>,
+                     <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  %3 = and <8 x i16> %1, %2
+  ; CHECK-DAG: bclri.h [[R3:\$w[0-9]+]], [[R1]], 3
+  store <8 x i16> %3, <8 x i16>* %c
+  ; CHECK-DAG: st.h [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bclri_v8i16
+}
+
+define void @bclri_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
+  ; CHECK: bclri_v4i32:
+
+  %1 = load <4 x i32>* %a
+  ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
+  %2 = xor <4 x i32> <i32  8, i32  8, i32  8, i32  8>,
+                     <i32 -1, i32 -1, i32 -1, i32 -1>
+  %3 = and <4 x i32> %1, %2
+  ; CHECK-DAG: bclri.w [[R3:\$w[0-9]+]], [[R1]], 3
+  store <4 x i32> %3, <4 x i32>* %c
+  ; CHECK-DAG: st.w [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bclri_v4i32
+}
+
+define void @bclri_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
+  ; CHECK: bclri_v2i64:
+
+  %1 = load <2 x i64>* %a
+  ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
+  %2 = xor <2 x i64> <i64  8, i64  8>,
+                     <i64 -1, i64 -1>
+  %3 = and <2 x i64> %1, %2
+  ; CHECK-DAG: bclri.d [[R3:\$w[0-9]+]], [[R1]], 3
+  store <2 x i64> %3, <2 x i64>* %c
+  ; CHECK-DAG: st.d [[R3]], 0($4)
+
+  ret void
+  ; CHECK: .size bclri_v2i64
+}
+
 define void @bseti_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   ; CHECK: bseti_v16i8:
 
diff --git a/test/CodeGen/Mips/msa/i5-b.ll b/test/CodeGen/Mips/msa/i5-b.ll
index 14f2066ded7..d1df6e9db6d 100644
--- a/test/CodeGen/Mips/msa/i5-b.ll
+++ b/test/CodeGen/Mips/msa/i5-b.ll
@@ -18,7 +18,8 @@ declare <16 x i8> @llvm.mips.bclri.b(<16 x i8>, i32) nounwind
 
 ; CHECK: llvm_mips_bclri_b_test:
 ; CHECK: ld.b
-; CHECK: bclri.b
+; andi.b is equivalent to bclri.b
+; CHECK: andi.b {{\$w[0-9]}}, {{\$w[0-9]}}, 127
 ; CHECK: st.b
 ; CHECK: .size llvm_mips_bclri_b_test
 ;
-- 
2.11.0