From 93d995719e2459a6e9ccdb2c93a8ede8fa88c899 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Tue, 24 Sep 2013 14:20:00 +0000 Subject: [PATCH] [mips][msa] Added support for matching shf from normal IR (i.e. not intrinsics) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191302 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp | 5 ++ lib/Target/Mips/InstPrinter/MipsInstPrinter.h | 1 + lib/Target/Mips/MipsISelLowering.cpp | 1 + lib/Target/Mips/MipsISelLowering.h | 1 + lib/Target/Mips/MipsMSAInstrInfo.td | 19 ++++++- lib/Target/Mips/MipsSEISelLowering.cpp | 74 +++++++++++++++++++++++++ test/CodeGen/Mips/msa/shuffle.ll | 59 +++++++++++++++++--- 7 files changed, 149 insertions(+), 11 deletions(-) diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp index 26321c99243..2612e3ba1ce 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp @@ -211,6 +211,11 @@ printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O) { O << MipsFCCToString((Mips::CondCode)MO.getImm()); } +void MipsInstPrinter:: +printSHFMask(const MCInst *MI, int opNum, raw_ostream &O) { + llvm_unreachable("TODO"); +} + bool MipsInstPrinter::printAlias(const char *Str, const MCInst &MI, unsigned OpNo, raw_ostream &OS) { OS << "\t" << Str << "\t"; diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h index 1253ab0c48a..11590ad8241 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h @@ -96,6 +96,7 @@ private: void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O); void printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O); void printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O); + void printSHFMask(const MCInst *MI, int opNum, raw_ostream &O); bool printAlias(const char *Str, const MCInst &MI, unsigned OpNo, raw_ostream &OS); diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 04f90833d82..90d2cc6f290 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -225,6 +225,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::VEXTRACT_ZEXT_ELT: return "MipsISD::VEXTRACT_ZEXT_ELT"; case MipsISD::VNOR: return "MipsISD::VNOR"; case MipsISD::VSHF: return "MipsISD::VSHF"; + case MipsISD::SHF: return "MipsISD::SHF"; default: return NULL; } } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 0cb67b8d440..9e9e9959183 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -174,6 +174,7 @@ namespace llvm { // Vector Shuffle with mask as an operand VSHF, // Generic shuffle + SHF, // 4-element set shuffle. // Combined (XOR (OR $a, $b), -1) VNOR, diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 4909743e349..5592ac5690b 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -23,6 +23,8 @@ def SDT_VFSetCC : SDTypeProfile<1, 3, [SDTCisInt<0>, def SDT_VSHF : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisVec<0>, SDTCisInt<1>, SDTCisVec<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>; +def SDT_SHF : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>, + SDTCisVT<1, i32>, SDTCisSameAs<0, 2>]>; def MipsVAllNonZero : SDNode<"MipsISD::VALL_NONZERO", SDT_MipsVecCond>; def MipsVAnyNonZero : SDNode<"MipsISD::VANY_NONZERO", SDT_MipsVecCond>; @@ -39,6 +41,7 @@ def MipsVUMin : SDNode<"MipsISD::VUMIN", SDTIntBinOp, def MipsVNOR : SDNode<"MipsISD::VNOR", SDTIntBinOp, [SDNPCommutative, SDNPAssociative]>; def MipsVSHF : SDNode<"MipsISD::VSHF", SDT_VSHF>; +def MipsSHF : SDNode<"MipsISD::SHF", SDT_SHF>; def vsetcc : SDNode<"ISD::SETCC", SDT_VSetCC>; def vfsetcc : SDNode<"ISD::SETCC", SDT_VFSetCC>; @@ -1074,6 +1077,16 @@ class MSA_I8_X_DESC_BASE { + dag OutOperandList = (outs RCWD:$wd); + dag InOperandList = (ins RCWS:$ws, uimm8:$u8); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u8"); + list Pattern = [(set RCWD:$wd, (MipsSHF immZExt8:$u8, RCWS:$ws))]; + InstrItinClass Itinerary = itin; +} + class MSA_I10_LDI_DESC_BASE { dag OutOperandList = (outs RCWD:$wd); @@ -2066,9 +2079,9 @@ class SAT_U_H_DESC : MSA_BIT_H_DESC_BASE<"sat_u.h", int_mips_sat_u_h, MSA128H>; class SAT_U_W_DESC : MSA_BIT_W_DESC_BASE<"sat_u.w", int_mips_sat_u_w, MSA128W>; class SAT_U_D_DESC : MSA_BIT_D_DESC_BASE<"sat_u.d", int_mips_sat_u_d, MSA128D>; -class SHF_B_DESC : MSA_I8_X_DESC_BASE<"shf.b", int_mips_shf_b, MSA128B>; -class SHF_H_DESC : MSA_I8_X_DESC_BASE<"shf.h", int_mips_shf_h, MSA128H>; -class SHF_W_DESC : MSA_I8_X_DESC_BASE<"shf.w", int_mips_shf_w, MSA128W>; +class SHF_B_DESC : MSA_I8_SHF_DESC_BASE<"shf.b", MSA128B>; +class SHF_H_DESC : MSA_I8_SHF_DESC_BASE<"shf.h", MSA128H>; +class SHF_W_DESC : MSA_I8_SHF_DESC_BASE<"shf.w", MSA128W>; class SLD_B_DESC : MSA_3R_DESC_BASE<"sld.b", int_mips_sld_b, MSA128B>; class SLD_H_DESC : MSA_3R_DESC_BASE<"sld.h", int_mips_sld_h, MSA128H>; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 929e91eb60e..b79e5321b69 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -1428,6 +1428,11 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_pcnt_w: case Intrinsic::mips_pcnt_d: return lowerMSAUnaryIntr(Op, DAG, ISD::CTPOP); + case Intrinsic::mips_shf_b: + case Intrinsic::mips_shf_h: + case Intrinsic::mips_shf_w: + return DAG.getNode(MipsISD::SHF, SDLoc(Op), Op->getValueType(0), + Op->getOperand(2), Op->getOperand(1)); case Intrinsic::mips_sll_b: case Intrinsic::mips_sll_h: case Intrinsic::mips_sll_w: @@ -1735,6 +1740,72 @@ SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, return SDValue(); } +// Lower VECTOR_SHUFFLE into SHF (if possible). +// +// SHF splits the vector into blocks of four elements, then shuffles these +// elements according to a <4 x i2> constant (encoded as an integer immediate). +// +// It is therefore possible to lower into SHF when the mask takes the form: +// +// When undef's appear they are treated as if they were whatever value is +// necessary in order to fit the above form. +// +// For example: +// %2 = shufflevector <8 x i16> %0, <8 x i16> undef, +// <8 x i32> +// is lowered to: +// (SHF_H $w0, $w1, 27) +// where the 27 comes from: +// 3 + (2 << 2) + (1 << 4) + (0 << 6) +static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, + SmallVector Indices, + SelectionDAG &DAG) { + int SHFIndices[4] = { -1, -1, -1, -1 }; + + if (Indices.size() < 4) + return SDValue(); + + for (unsigned i = 0; i < 4; ++i) { + for (unsigned j = i; j < Indices.size(); j += 4) { + int Idx = Indices[j]; + + // Convert from vector index to 4-element subvector index + // If an index refers to an element outside of the subvector then give up + if (Idx != -1) { + Idx -= 4 * (j / 4); + if (Idx < 0 || Idx >= 4) + return SDValue(); + } + + // If the mask has an undef, replace it with the current index. + // Note that it might still be undef if the current index is also undef + if (SHFIndices[i] == -1) + SHFIndices[i] = Idx; + + // Check that non-undef values are the same as in the mask. If they + // aren't then give up + if (!(Idx == -1 || Idx == SHFIndices[i])) + return SDValue(); + } + } + + // Calculate the immediate. Replace any remaining undefs with zero + APInt Imm(32, 0); + for (int i = 3; i >= 0; --i) { + int Idx = SHFIndices[i]; + + if (Idx == -1) + Idx = 0; + + Imm <<= 2; + Imm |= Idx & 0x3; + } + + return DAG.getNode(MipsISD::SHF, SDLoc(Op), ResTy, + DAG.getConstant(Imm, MVT::i32), Op->getOperand(0)); +} + // Lower VECTOR_SHUFFLE into VSHF. // // This mostly consists of converting the shuffle indices in Indices into a @@ -1802,6 +1873,9 @@ SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, for (int i = 0; i < ResTyNumElts; ++i) Indices.push_back(Node->getMaskElt(i)); + SDValue Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG); + if (Result.getNode()) + return Result; return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, DAG); } diff --git a/test/CodeGen/Mips/msa/shuffle.ll b/test/CodeGen/Mips/msa/shuffle.ll index 35a5cf8658c..9854234c719 100644 --- a/test/CodeGen/Mips/msa/shuffle.ll +++ b/test/CodeGen/Mips/msa/shuffle.ll @@ -156,14 +156,16 @@ define void @vshf_v8i16_4(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind ; CHECK: .size vshf_v8i16_4 } +; Note: v4i32 only has one 4-element set so it's impossible to get a vshf.w +; instruction when using a single vector. + define void @vshf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { ; CHECK: vshf_v4i32_0: %1 = load <4 x i32>* %a ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> - ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], %lo - ; CHECK-DAG: vshf.w [[R3:\$w[0-9]+]], [[R1]], [[R1]] + ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27 store <4 x i32> %2, <4 x i32>* %c ; CHECK-DAG: st.w [[R3]], 0($4) @@ -177,8 +179,7 @@ define void @vshf_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind %1 = load <4 x i32>* %a ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> - ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1 - ; CHECK-DAG: vshf.w [[R3:\$w[0-9]+]], [[R1]], [[R1]] + ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85 store <4 x i32> %2, <4 x i32>* %c ; CHECK-DAG: st.w [[R3]], 0($4) @@ -193,8 +194,7 @@ define void @vshf_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind %2 = load <4 x i32>* %b ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> - ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], %lo - ; CHECK-DAG: vshf.w [[R3:\$w[0-9]+]], [[R2]], [[R2]] + ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R2]], 36 store <4 x i32> %3, <4 x i32>* %c ; CHECK-DAG: st.w [[R3]], 0($4) @@ -225,8 +225,7 @@ define void @vshf_v4i32_4(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind %1 = load <4 x i32>* %a ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) %2 = shufflevector <4 x i32> %1, <4 x i32> %1, <4 x i32> - ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1 - ; CHECK-DAG: vshf.w [[R3:\$w[0-9]+]], [[R1]], [[R1]] + ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85 store <4 x i32> %2, <4 x i32>* %c ; CHECK-DAG: st.w [[R3]], 0($4) @@ -311,3 +310,47 @@ define void @vshf_v2i64_4(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind ret void ; CHECK: .size vshf_v2i64_4 } + +define void @shf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { + ; CHECK: shf_v16i8_0: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> + ; CHECK-DAG: shf.b [[R3:\$w[0-9]+]], [[R1]], 45 + store <16 x i8> %2, <16 x i8>* %c + ; CHECK-DAG: st.b [[R3]], 0($4) + + ret void + ; CHECK: .size shf_v16i8_0 +} + +define void @shf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { + ; CHECK: shf_v8i16_0: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> + ; CHECK-DAG: shf.h [[R3:\$w[0-9]+]], [[R1]], 27 + store <8 x i16> %2, <8 x i16>* %c + ; CHECK-DAG: st.h [[R3]], 0($4) + + ret void + ; CHECK: .size shf_v8i16_0 +} + +define void @shf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { + ; CHECK: shf_v4i32_0: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> + ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27 + store <4 x i32> %2, <4 x i32>* %c + ; CHECK-DAG: st.w [[R3]], 0($4) + + ret void + ; CHECK: .size shf_v4i32_0 +} + +; shf.d does not exist -- 2.11.0