case ARMISD::VCGTU: return "ARMISD::VCGTU";
case ARMISD::VTST: return "ARMISD::VTST";
- case ARMISD::VSHL: return "ARMISD::VSHL";
- case ARMISD::VSHRs: return "ARMISD::VSHRs";
- case ARMISD::VSHRu: return "ARMISD::VSHRu";
- case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
- case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
- case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
- case ARMISD::VQSHLs: return "ARMISD::VQSHLs";
- case ARMISD::VQSHLu: return "ARMISD::VQSHLu";
- case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu";
- case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs";
- case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu";
- case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu";
- case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs";
- case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu";
- case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
- case ARMISD::VSLI: return "ARMISD::VSLI";
- case ARMISD::VSRI: return "ARMISD::VSRI";
+ case ARMISD::VSHLs: return "ARMISD::VSHLs";
+ case ARMISD::VSHLu: return "ARMISD::VSHLu";
+ case ARMISD::VSHLIMM: return "ARMISD::VSHLIMM";
+ case ARMISD::VSHRsIMM: return "ARMISD::VSHRsIMM";
+ case ARMISD::VSHRuIMM: return "ARMISD::VSHRuIMM";
+ case ARMISD::VRSHRsIMM: return "ARMISD::VRSHRsIMM";
+ case ARMISD::VRSHRuIMM: return "ARMISD::VRSHRuIMM";
+ case ARMISD::VRSHRNIMM: return "ARMISD::VRSHRNIMM";
+ case ARMISD::VQSHLsIMM: return "ARMISD::VQSHLsIMM";
+ case ARMISD::VQSHLuIMM: return "ARMISD::VQSHLuIMM";
+ case ARMISD::VQSHLsuIMM: return "ARMISD::VQSHLsuIMM";
+ case ARMISD::VQSHRNsIMM: return "ARMISD::VQSHRNsIMM";
+ case ARMISD::VQSHRNuIMM: return "ARMISD::VQSHRNuIMM";
+ case ARMISD::VQSHRNsuIMM: return "ARMISD::VQSHRNsuIMM";
+ case ARMISD::VQRSHRNsIMM: return "ARMISD::VQRSHRNsIMM";
+ case ARMISD::VQRSHRNuIMM: return "ARMISD::VQRSHRNuIMM";
+ case ARMISD::VQRSHRNsuIMM: return "ARMISD::VQRSHRNsuIMM";
+ case ARMISD::VSLIIMM: return "ARMISD::VSLIIMM";
+ case ARMISD::VSRIIMM: return "ARMISD::VSRIIMM";
case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
if (VT == MVT::f64)
- Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+ Mask = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
DAG.getConstant(32, dl, MVT::i32));
else /*if (VT == MVT::f32)*/
if (SrcVT == MVT::f32) {
Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
if (VT == MVT::f64)
- Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+ Tmp1 = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
DAG.getConstant(32, dl, MVT::i32));
} else if (VT == MVT::f32)
- Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
+ Tmp1 = DAG.getNode(ARMISD::VSHRuIMM, dl, MVT::v1i64,
DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
DAG.getConstant(32, dl, MVT::i32));
Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
return Res;
}
+/// Getvshiftimm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift operation, where all the elements of the
+/// build_vector must have the same constant integer value.
+static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
+ // Ignore bit_converts.
+ while (Op.getOpcode() == ISD::BITCAST)
+ Op = Op.getOperand(0);
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (!BVN ||
+ !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
+ ElementBits) ||
+ SplatBitSize > ElementBits)
+ return false;
+ Cnt = SplatBits.getSExtValue();
+ return true;
+}
+
+/// isVShiftLImm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift left operation. That value must be in the range:
+/// 0 <= Value < ElementBits for a left shift; or
+/// 0 <= Value <= ElementBits for a long left shift.
+static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
+ assert(VT.isVector() && "vector shift count is not a vector type");
+ int64_t ElementBits = VT.getScalarSizeInBits();
+ if (!getVShiftImm(Op, ElementBits, Cnt))
+ return false;
+ return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
+}
+
+/// isVShiftRImm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift right operation. For a shift opcode, the value
+/// is positive, but for an intrinsic the value count must be negative. The
+/// absolute value must be in the range:
+/// 1 <= |Value| <= ElementBits for a right shift; or
+/// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
+static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
+ int64_t &Cnt) {
+ assert(VT.isVector() && "vector shift count is not a vector type");
+ int64_t ElementBits = VT.getScalarSizeInBits();
+ if (!getVShiftImm(Op, ElementBits, Cnt))
+ return false;
+ if (!isIntrinsic)
+ return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
+ if (Cnt >= -(isNarrow ? ElementBits / 2 : ElementBits) && Cnt <= -1) {
+ Cnt = -Cnt;
+ return true;
+ }
+ return false;
+}
+
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
SDLoc dl(N);
+ int64_t Cnt;
if (!VT.isVector())
return SDValue();
- // Lower vector shifts on NEON to use VSHL.
- assert(ST->hasNEON() && "unexpected vector shift");
+ // We essentially have two forms here. Shift by an immediate and shift by a
+ // vector register. We cannot easily match shift by an immediate in tablegen
+ // so we do that here and generate a VSHLIMM/VSHRsIMM/VSHRuIMM. For shifting
+ // by a vector, we don't have VSHR, only VSHL (which can be signed or
+ // unsigned, and a negative shift indicates a shift right).
+ if (N->getOpcode() == ISD::SHL) {
+ if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
+ return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
+ DAG.getConstant(Cnt, dl, MVT::i32));
+ return DAG.getNode(ARMISD::VSHLu, dl, VT, N->getOperand(0),
+ N->getOperand(1));
+ }
- // Left shifts translate directly to the vshiftu intrinsic.
- if (N->getOpcode() == ISD::SHL)
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(Intrinsic::arm_neon_vshiftu, dl,
- MVT::i32),
- N->getOperand(0), N->getOperand(1));
+ assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
+ "unexpected vector shift opcode");
- assert((N->getOpcode() == ISD::SRA ||
- N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
+ if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
+ unsigned VShiftOpc =
+ (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
+ return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
+ DAG.getConstant(Cnt, dl, MVT::i32));
+ }
- // NEON uses the same intrinsics for both left and right shifts. For
- // right shifts, the shift amounts are negative, so negate the vector of
- // shift amounts.
+ // Other right shifts we don't have operations for (we use a shift left by a
+ // negative number).
EVT ShiftVT = N->getOperand(1).getValueType();
- SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
- getZeroVector(ShiftVT, DAG, dl),
- N->getOperand(1));
- Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
- Intrinsic::arm_neon_vshifts :
- Intrinsic::arm_neon_vshiftu);
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
- DAG.getConstant(vshiftInt, dl, MVT::i32),
- N->getOperand(0), NegatedCount);
+ SDValue NegatedCount = DAG.getNode(
+ ISD::SUB, dl, ShiftVT, getZeroVector(ShiftVT, DAG, dl), N->getOperand(1));
+ unsigned VShiftOpc =
+ (N->getOpcode() == ISD::SRA ? ARMISD::VSHLs : ARMISD::VSHLu);
+ return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0), NegatedCount);
}
static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
ConvInput, DAG.getConstant(C, dl, MVT::i32));
}
-/// Getvshiftimm - Check if this is a valid build_vector for the immediate
-/// operand of a vector shift operation, where all the elements of the
-/// build_vector must have the same constant integer value.
-static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
- // Ignore bit_converts.
- while (Op.getOpcode() == ISD::BITCAST)
- Op = Op.getOperand(0);
- BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
- APInt SplatBits, SplatUndef;
- unsigned SplatBitSize;
- bool HasAnyUndefs;
- if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
- HasAnyUndefs, ElementBits) ||
- SplatBitSize > ElementBits)
- return false;
- Cnt = SplatBits.getSExtValue();
- return true;
-}
-
-/// isVShiftLImm - Check if this is a valid build_vector for the immediate
-/// operand of a vector shift left operation. That value must be in the range:
-/// 0 <= Value < ElementBits for a left shift; or
-/// 0 <= Value <= ElementBits for a long left shift.
-static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
- assert(VT.isVector() && "vector shift count is not a vector type");
- int64_t ElementBits = VT.getScalarSizeInBits();
- if (! getVShiftImm(Op, ElementBits, Cnt))
- return false;
- return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
-}
-
-/// isVShiftRImm - Check if this is a valid build_vector for the immediate
-/// operand of a vector shift right operation. For a shift opcode, the value
-/// is positive, but for an intrinsic the value count must be negative. The
-/// absolute value must be in the range:
-/// 1 <= |Value| <= ElementBits for a right shift; or
-/// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
-static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
- int64_t &Cnt) {
- assert(VT.isVector() && "vector shift count is not a vector type");
- int64_t ElementBits = VT.getScalarSizeInBits();
- if (! getVShiftImm(Op, ElementBits, Cnt))
- return false;
- if (!isIntrinsic)
- return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
- if (Cnt >= -(isNarrow ? ElementBits/2 : ElementBits) && Cnt <= -1) {
- Cnt = -Cnt;
- return true;
- }
- return false;
-}
-
/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
case Intrinsic::arm_neon_vshifts:
case Intrinsic::arm_neon_vshiftu:
if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
- VShiftOpc = ARMISD::VSHL;
+ VShiftOpc = ARMISD::VSHLIMM;
break;
}
if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
- VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
- ARMISD::VSHRs : ARMISD::VSHRu);
+ VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? ARMISD::VSHRsIMM
+ : ARMISD::VSHRuIMM);
break;
}
return SDValue();
// Opcode already set above.
break;
case Intrinsic::arm_neon_vrshifts:
- VShiftOpc = ARMISD::VRSHRs; break;
+ VShiftOpc = ARMISD::VRSHRsIMM;
+ break;
case Intrinsic::arm_neon_vrshiftu:
- VShiftOpc = ARMISD::VRSHRu; break;
+ VShiftOpc = ARMISD::VRSHRuIMM;
+ break;
case Intrinsic::arm_neon_vrshiftn:
- VShiftOpc = ARMISD::VRSHRN; break;
+ VShiftOpc = ARMISD::VRSHRNIMM;
+ break;
case Intrinsic::arm_neon_vqshifts:
- VShiftOpc = ARMISD::VQSHLs; break;
+ VShiftOpc = ARMISD::VQSHLsIMM;
+ break;
case Intrinsic::arm_neon_vqshiftu:
- VShiftOpc = ARMISD::VQSHLu; break;
+ VShiftOpc = ARMISD::VQSHLuIMM;
+ break;
case Intrinsic::arm_neon_vqshiftsu:
- VShiftOpc = ARMISD::VQSHLsu; break;
+ VShiftOpc = ARMISD::VQSHLsuIMM;
+ break;
case Intrinsic::arm_neon_vqshiftns:
- VShiftOpc = ARMISD::VQSHRNs; break;
+ VShiftOpc = ARMISD::VQSHRNsIMM;
+ break;
case Intrinsic::arm_neon_vqshiftnu:
- VShiftOpc = ARMISD::VQSHRNu; break;
+ VShiftOpc = ARMISD::VQSHRNuIMM;
+ break;
case Intrinsic::arm_neon_vqshiftnsu:
- VShiftOpc = ARMISD::VQSHRNsu; break;
+ VShiftOpc = ARMISD::VQSHRNsuIMM;
+ break;
case Intrinsic::arm_neon_vqrshiftns:
- VShiftOpc = ARMISD::VQRSHRNs; break;
+ VShiftOpc = ARMISD::VQRSHRNsIMM;
+ break;
case Intrinsic::arm_neon_vqrshiftnu:
- VShiftOpc = ARMISD::VQRSHRNu; break;
+ VShiftOpc = ARMISD::VQRSHRNuIMM;
+ break;
case Intrinsic::arm_neon_vqrshiftnsu:
- VShiftOpc = ARMISD::VQRSHRNsu; break;
+ VShiftOpc = ARMISD::VQRSHRNsuIMM;
+ break;
}
SDLoc dl(N);
unsigned VShiftOpc = 0;
if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
- VShiftOpc = ARMISD::VSLI;
+ VShiftOpc = ARMISD::VSLIIMM;
else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
- VShiftOpc = ARMISD::VSRI;
+ VShiftOpc = ARMISD::VSRIIMM;
else {
llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
}
case ISD::SHL:
if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
SDLoc dl(N);
- return DAG.getNode(ARMISD::VSHL, dl, VT, N->getOperand(0),
+ return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
DAG.getConstant(Cnt, dl, MVT::i32));
}
break;
case ISD::SRA:
case ISD::SRL:
if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
- unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
- ARMISD::VSHRs : ARMISD::VSHRu);
+ unsigned VShiftOpc =
+ (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
SDLoc dl(N);
return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
DAG.getConstant(Cnt, dl, MVT::i32));
SDNode *U = *ExtVal->use_begin();
if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
- U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHL))
+ U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHLIMM))
return false;
return true;
VCGTU, // Vector compare unsigned greater than.
VTST, // Vector test bits.
+ // Vector shift by vector
+ VSHLs, // ...left/right by signed
+ VSHLu, // ...left/right by unsigned
+
// Vector shift by immediate:
- VSHL, // ...left
- VSHRs, // ...right (signed)
- VSHRu, // ...right (unsigned)
+ VSHLIMM, // ...left
+ VSHRsIMM, // ...right (signed)
+ VSHRuIMM, // ...right (unsigned)
// Vector rounding shift by immediate:
- VRSHRs, // ...right (signed)
- VRSHRu, // ...right (unsigned)
- VRSHRN, // ...right narrow
+ VRSHRsIMM, // ...right (signed)
+ VRSHRuIMM, // ...right (unsigned)
+ VRSHRNIMM, // ...right narrow
// Vector saturating shift by immediate:
- VQSHLs, // ...left (signed)
- VQSHLu, // ...left (unsigned)
- VQSHLsu, // ...left (signed to unsigned)
- VQSHRNs, // ...right narrow (signed)
- VQSHRNu, // ...right narrow (unsigned)
- VQSHRNsu, // ...right narrow (signed to unsigned)
+ VQSHLsIMM, // ...left (signed)
+ VQSHLuIMM, // ...left (unsigned)
+ VQSHLsuIMM, // ...left (signed to unsigned)
+ VQSHRNsIMM, // ...right narrow (signed)
+ VQSHRNuIMM, // ...right narrow (unsigned)
+ VQSHRNsuIMM, // ...right narrow (signed to unsigned)
// Vector saturating rounding shift by immediate:
- VQRSHRNs, // ...right narrow (signed)
- VQRSHRNu, // ...right narrow (unsigned)
- VQRSHRNsu, // ...right narrow (signed to unsigned)
+ VQRSHRNsIMM, // ...right narrow (signed)
+ VQRSHRNuIMM, // ...right narrow (unsigned)
+ VQRSHRNsuIMM, // ...right narrow (signed to unsigned)
// Vector shift and insert:
- VSLI, // ...left
- VSRI, // ...right
+ VSLIIMM, // ...left
+ VSRIIMM, // ...right
// Vector get lane (VMOV scalar to ARM core register)
// (These are used for 8- and 16-bit element types only.)
def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>;
+// Vector Shifts
+def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,]>;
+
+def NEONvshls : SDNode<"ARMISD::VSHLs", SDTARMVSH>;
+def NEONvshlu : SDNode<"ARMISD::VSHLu", SDTARMVSH>;
+
// Types for vector shift by immediates. The "SHX" version is for long and
// narrow operations where the source and destination vectors have different
// types. The "SHINS" version is for shift and insert operations.
-def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
- SDTCisVT<2, i32>]>;
-def SDTARMVSHX : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
- SDTCisVT<2, i32>]>;
-def SDTARMVSHINS : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
- SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
-
-def NEONvshl : SDNode<"ARMISD::VSHL", SDTARMVSH>;
-def NEONvshrs : SDNode<"ARMISD::VSHRs", SDTARMVSH>;
-def NEONvshru : SDNode<"ARMISD::VSHRu", SDTARMVSH>;
-def NEONvshrn : SDNode<"ARMISD::VSHRN", SDTARMVSHX>;
-
-def NEONvrshrs : SDNode<"ARMISD::VRSHRs", SDTARMVSH>;
-def NEONvrshru : SDNode<"ARMISD::VRSHRu", SDTARMVSH>;
-def NEONvrshrn : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>;
-
-def NEONvqshls : SDNode<"ARMISD::VQSHLs", SDTARMVSH>;
-def NEONvqshlu : SDNode<"ARMISD::VQSHLu", SDTARMVSH>;
-def NEONvqshlsu : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>;
-def NEONvqshrns : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>;
-def NEONvqshrnu : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>;
-def NEONvqshrnsu : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>;
-
-def NEONvqrshrns : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>;
-def NEONvqrshrnu : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>;
-def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>;
-
-def NEONvsli : SDNode<"ARMISD::VSLI", SDTARMVSHINS>;
-def NEONvsri : SDNode<"ARMISD::VSRI", SDTARMVSHINS>;
+def SDTARMVSHIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+ SDTCisVT<2, i32>]>;
+def SDTARMVSHXIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisVT<2, i32>]>;
+def SDTARMVSHINSIMM : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
+
+def NEONvshlImm : SDNode<"ARMISD::VSHLIMM", SDTARMVSHIMM>;
+def NEONvshrsImm : SDNode<"ARMISD::VSHRsIMM", SDTARMVSHIMM>;
+def NEONvshruImm : SDNode<"ARMISD::VSHRuIMM", SDTARMVSHIMM>;
+def NEONvshrnImm : SDNode<"ARMISD::VSHRNIMM", SDTARMVSHXIMM>;
+
+def NEONvrshrsImm : SDNode<"ARMISD::VRSHRsIMM", SDTARMVSHIMM>;
+def NEONvrshruImm : SDNode<"ARMISD::VRSHRuIMM", SDTARMVSHIMM>;
+def NEONvrshrnImm : SDNode<"ARMISD::VRSHRNIMM", SDTARMVSHXIMM>;
+
+def NEONvqshlsImm : SDNode<"ARMISD::VQSHLsIMM", SDTARMVSHIMM>;
+def NEONvqshluImm : SDNode<"ARMISD::VQSHLuIMM", SDTARMVSHIMM>;
+def NEONvqshlsuImm : SDNode<"ARMISD::VQSHLsuIMM", SDTARMVSHIMM>;
+def NEONvqshrnsImm : SDNode<"ARMISD::VQSHRNsIMM", SDTARMVSHXIMM>;
+def NEONvqshrnuImm : SDNode<"ARMISD::VQSHRNuIMM", SDTARMVSHXIMM>;
+def NEONvqshrnsuImm : SDNode<"ARMISD::VQSHRNsuIMM", SDTARMVSHXIMM>;
+
+def NEONvqrshrnsImm : SDNode<"ARMISD::VQRSHRNsIMM", SDTARMVSHXIMM>;
+def NEONvqrshrnuImm : SDNode<"ARMISD::VQRSHRNuIMM", SDTARMVSHXIMM>;
+def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>;
+
+def NEONvsliImm : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>;
+def NEONvsriImm : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>;
def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
SDTCisVT<2, i32>]>;
string OpcodeStr> {
// 64-bit vector types.
def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
- N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> {
+ N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsliImm> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
- N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> {
+ N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsliImm> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
- N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> {
+ N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsliImm> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm,
- N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>;
+ N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsliImm>;
// imm6 = xxxxxx
// 128-bit vector types.
def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
- N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> {
+ N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsliImm> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
- N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> {
+ N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsliImm> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
- N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> {
+ N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsliImm> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm,
- N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>;
+ N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsliImm>;
// imm6 = xxxxxx
}
multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
string OpcodeStr> {
// 64-bit vector types.
def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8,
- N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> {
+ N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsriImm> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16,
- N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> {
+ N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsriImm> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32,
- N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> {
+ N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsriImm> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64,
- N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>;
+ N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsriImm>;
// imm6 = xxxxxx
// 128-bit vector types.
def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8,
- N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> {
+ N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsriImm> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16,
- N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> {
+ N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsriImm> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32,
- N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> {
+ N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsriImm> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64,
- N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>;
+ N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsriImm>;
// imm6 = xxxxxx
}
int_arm_neon_vraddhn, 1>;
let Predicates = [HasNEON] in {
-def : Pat<(v8i8 (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
+def : Pat<(v8i8 (trunc (NEONvshruImm (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
(VADDHNv8i8 QPR:$Vn, QPR:$Vm)>;
-def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
+def : Pat<(v4i16 (trunc (NEONvshruImm (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
(VADDHNv4i16 QPR:$Vn, QPR:$Vm)>;
-def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
+def : Pat<(v2i32 (trunc (NEONvshruImm (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
(VADDHNv2i32 QPR:$Vn, QPR:$Vm)>;
}
int_arm_neon_vrsubhn, 0>;
let Predicates = [HasNEON] in {
-def : Pat<(v8i8 (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
+def : Pat<(v8i8 (trunc (NEONvshruImm (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
(VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>;
-def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
+def : Pat<(v4i16 (trunc (NEONvshruImm (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
(VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>;
-def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
+def : Pat<(v2i32 (trunc (NEONvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
(VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>;
}
def abd_shr :
PatFrag<(ops node:$in1, node:$in2, node:$shift),
- (NEONvshrs (sub (zext node:$in1),
+ (NEONvshrsImm (sub (zext node:$in1),
(zext node:$in2)), (i32 $shift))>;
let Predicates = [HasNEON] in {
IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
"vshl", "u", int_arm_neon_vshiftu>;
+let Predicates = [HasNEON] in {
+def : Pat<(v8i8 (NEONvshls (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
+ (VSHLsv8i8 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v4i16 (NEONvshls (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
+ (VSHLsv4i16 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v2i32 (NEONvshls (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
+ (VSHLsv2i32 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v1i64 (NEONvshls (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
+ (VSHLsv1i64 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v16i8 (NEONvshls (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
+ (VSHLsv16i8 QPR:$Dn, QPR:$Dm)>;
+def : Pat<(v8i16 (NEONvshls (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
+ (VSHLsv8i16 QPR:$Dn, QPR:$Dm)>;
+def : Pat<(v4i32 (NEONvshls (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
+ (VSHLsv4i32 QPR:$Dn, QPR:$Dm)>;
+def : Pat<(v2i64 (NEONvshls (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
+ (VSHLsv2i64 QPR:$Dn, QPR:$Dm)>;
+
+def : Pat<(v8i8 (NEONvshlu (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
+ (VSHLuv8i8 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v4i16 (NEONvshlu (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
+ (VSHLuv4i16 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v2i32 (NEONvshlu (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
+ (VSHLuv2i32 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v1i64 (NEONvshlu (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
+ (VSHLuv1i64 DPR:$Dn, DPR:$Dm)>;
+def : Pat<(v16i8 (NEONvshlu (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
+ (VSHLuv16i8 QPR:$Dn, QPR:$Dm)>;
+def : Pat<(v8i16 (NEONvshlu (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
+ (VSHLuv8i16 QPR:$Dn, QPR:$Dm)>;
+def : Pat<(v4i32 (NEONvshlu (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
+ (VSHLuv4i32 QPR:$Dn, QPR:$Dm)>;
+def : Pat<(v2i64 (NEONvshlu (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
+ (VSHLuv2i64 QPR:$Dn, QPR:$Dm)>;
+
+}
+
// VSHL : Vector Shift Left (Immediate)
-defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>;
+defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshlImm>;
// VSHR : Vector Shift Right (Immediate)
defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs",
- NEONvshrs>;
+ NEONvshrsImm>;
defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu",
- NEONvshru>;
+ NEONvshruImm>;
// VSHLL : Vector Shift Left Long
defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s",
- PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (sext node:$LHS), node:$RHS)>>;
+ PatFrag<(ops node:$LHS, node:$RHS), (NEONvshlImm (sext node:$LHS), node:$RHS)>>;
defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u",
- PatFrag<(ops node:$LHS, node:$RHS), (NEONvshl (zext node:$LHS), node:$RHS)>>;
+ PatFrag<(ops node:$LHS, node:$RHS), (NEONvshlImm (zext node:$LHS), node:$RHS)>>;
// VSHLL : Vector Shift Left Long (with maximum shift count)
class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
v2i64, v2i32, imm32>;
let Predicates = [HasNEON] in {
-def : Pat<(v8i16 (NEONvshl (zext (v8i8 DPR:$Rn)), (i32 8))),
+def : Pat<(v8i16 (NEONvshlImm (zext (v8i8 DPR:$Rn)), (i32 8))),
(VSHLLi8 DPR:$Rn, 8)>;
-def : Pat<(v4i32 (NEONvshl (zext (v4i16 DPR:$Rn)), (i32 16))),
+def : Pat<(v4i32 (NEONvshlImm (zext (v4i16 DPR:$Rn)), (i32 16))),
(VSHLLi16 DPR:$Rn, 16)>;
-def : Pat<(v2i64 (NEONvshl (zext (v2i32 DPR:$Rn)), (i32 32))),
+def : Pat<(v2i64 (NEONvshlImm (zext (v2i32 DPR:$Rn)), (i32 32))),
(VSHLLi32 DPR:$Rn, 32)>;
-def : Pat<(v8i16 (NEONvshl (sext (v8i8 DPR:$Rn)), (i32 8))),
+def : Pat<(v8i16 (NEONvshlImm (sext (v8i8 DPR:$Rn)), (i32 8))),
(VSHLLi8 DPR:$Rn, 8)>;
-def : Pat<(v4i32 (NEONvshl (sext (v4i16 DPR:$Rn)), (i32 16))),
+def : Pat<(v4i32 (NEONvshlImm (sext (v4i16 DPR:$Rn)), (i32 16))),
(VSHLLi16 DPR:$Rn, 16)>;
-def : Pat<(v2i64 (NEONvshl (sext (v2i32 DPR:$Rn)), (i32 32))),
+def : Pat<(v2i64 (NEONvshlImm (sext (v2i32 DPR:$Rn)), (i32 32))),
(VSHLLi32 DPR:$Rn, 32)>;
-def : Pat<(v8i16 (NEONvshl (anyext (v8i8 DPR:$Rn)), (i32 8))),
+def : Pat<(v8i16 (NEONvshlImm (anyext (v8i8 DPR:$Rn)), (i32 8))),
(VSHLLi8 DPR:$Rn, 8)>;
-def : Pat<(v4i32 (NEONvshl (anyext (v4i16 DPR:$Rn)), (i32 16))),
+def : Pat<(v4i32 (NEONvshlImm (anyext (v4i16 DPR:$Rn)), (i32 16))),
(VSHLLi16 DPR:$Rn, 16)>;
-def : Pat<(v2i64 (NEONvshl (anyext (v2i32 DPR:$Rn)), (i32 32))),
+def : Pat<(v2i64 (NEONvshlImm (anyext (v2i32 DPR:$Rn)), (i32 32))),
(VSHLLi32 DPR:$Rn, 32)>;
}
// VSHRN : Vector Shift Right and Narrow
defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
PatFrag<(ops node:$Rn, node:$amt),
- (trunc (NEONvshrs node:$Rn, node:$amt))>>;
+ (trunc (NEONvshrsImm node:$Rn, node:$amt))>>;
let Predicates = [HasNEON] in {
-def : Pat<(v8i8 (trunc (NEONvshru (v8i16 QPR:$Vn), shr_imm8:$amt))),
+def : Pat<(v8i8 (trunc (NEONvshruImm (v8i16 QPR:$Vn), shr_imm8:$amt))),
(VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>;
-def : Pat<(v4i16 (trunc (NEONvshru (v4i32 QPR:$Vn), shr_imm16:$amt))),
+def : Pat<(v4i16 (trunc (NEONvshruImm (v4i32 QPR:$Vn), shr_imm16:$amt))),
(VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>;
-def : Pat<(v2i32 (trunc (NEONvshru (v2i64 QPR:$Vn), shr_imm32:$amt))),
+def : Pat<(v2i32 (trunc (NEONvshruImm (v2i64 QPR:$Vn), shr_imm32:$amt))),
(VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>;
}
"vrshl", "u", int_arm_neon_vrshiftu>;
// VRSHR : Vector Rounding Shift Right
defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs",
- NEONvrshrs>;
+ NEONvrshrsImm>;
defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu",
- NEONvrshru>;
+ NEONvrshruImm>;
// VRSHRN : Vector Rounding Shift Right and Narrow
defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
- NEONvrshrn>;
+ NEONvrshrnImm>;
// VQSHL : Vector Saturating Shift
defm VQSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
"vqshl", "u", int_arm_neon_vqshiftu>;
// VQSHL : Vector Saturating Shift Left (Immediate)
-defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>;
-defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>;
+defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshlsImm>;
+defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshluImm>;
// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned)
-defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>;
+defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsuImm>;
// VQSHRN : Vector Saturating Shift Right and Narrow
defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
- NEONvqshrns>;
+ NEONvqshrnsImm>;
defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
- NEONvqshrnu>;
+ NEONvqshrnuImm>;
// VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned)
defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
- NEONvqshrnsu>;
+ NEONvqshrnsuImm>;
// VQRSHL : Vector Saturating Rounding Shift
defm VQRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow
defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
- NEONvqrshrns>;
+ NEONvqrshrnsImm>;
defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
- NEONvqrshrnu>;
+ NEONvqrshrnuImm>;
// VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
- NEONvqrshrnsu>;
+ NEONvqrshrnsuImm>;
// VSRA : Vector Shift Right and Accumulate
-defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>;
-defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>;
+defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrsImm>;
+defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshruImm>;
// VRSRA : Vector Rounding Shift Right and Accumulate
-defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>;
-defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>;
+defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrsImm>;
+defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshruImm>;
// VSLI : Vector Shift Left and Insert
defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">;
define void @addCombineToVPADDL_s8(<16 x i8> *%cbcr, <4 x i16> *%X) nounwind ssp {
; CHECK-LABEL: addCombineToVPADDL_s8:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vmov.i16 d16, #0x8
-; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
-; CHECK-NEXT: vext.8 d17, d18, d16, #1
-; CHECK-NEXT: vneg.s16 d16, d16
-; CHECK-NEXT: vshl.i16 d18, d18, #8
-; CHECK-NEXT: vshl.i16 d17, d17, #8
-; CHECK-NEXT: vshl.s16 d18, d18, d16
-; CHECK-NEXT: vshl.s16 d16, d17, d16
-; CHECK-NEXT: vadd.i16 d16, d16, d18
-; CHECK-NEXT: vstr d16, [r1]
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: vld1.64 {d16, d17}, [r0]
+; CHECK-NEXT: vext.8 d18, d16, d16, #1
+; CHECK-NEXT: vshl.i16 d16, d16, #8
+; CHECK-NEXT: vshl.i16 d18, d18, #8
+; CHECK-NEXT: vshr.s16 d17, d18, #8
+; CHECK-NEXT: vsra.s16 d17, d16, #8
+; CHECK-NEXT: vstr d17, [r1]
+; CHECK-NEXT: mov pc, lr
%tmp = load <16 x i8>, <16 x i8>* %cbcr
%tmp1 = shufflevector <16 x i8> %tmp, <16 x i8> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; And <2 x i8> to <2 x i32>
define <2 x i8> @fromExtendingExtractVectorElt_2i8(<8 x i8> %in) {
; CHECK-LABEL: fromExtendingExtractVectorElt_2i8:
-; CHECK: vadd.i32
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vmov.u8 r1, d16[1]
+; CHECK-NEXT: vmov.u8 r0, d16[0]
+; CHECK-NEXT: vmov.u8 r2, d16[2]
+; CHECK-NEXT: vmov.u8 r3, d16[3]
+; CHECK-NEXT: vmov.32 d17[0], r1
+; CHECK-NEXT: vmov.32 d16[0], r0
+; CHECK-NEXT: vmov.32 d17[1], r3
+; CHECK-NEXT: vmov.32 d16[1], r2
+; CHECK-NEXT: vadd.i32 d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
%tmp1 = shufflevector <8 x i8> %in, <8 x i8> undef, <2 x i32> <i32 0, i32 2>
%tmp2 = shufflevector <8 x i8> %in, <8 x i8> undef, <2 x i32> <i32 1, i32 3>
%x = add <2 x i8> %tmp2, %tmp1
define <2 x i16> @fromExtendingExtractVectorElt_2i16(<8 x i16> %in) {
; CHECK-LABEL: fromExtendingExtractVectorElt_2i16:
-; CHECK: vadd.i32
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vmov.u16 r0, d16[0]
+; CHECK-NEXT: vmov.u16 r1, d16[1]
+; CHECK-NEXT: vmov.u16 r3, d16[3]
+; CHECK-NEXT: vmov.u16 r2, d16[2]
+; CHECK-NEXT: vmov.32 d16[0], r0
+; CHECK-NEXT: vmov.32 d17[0], r1
+; CHECK-NEXT: vmov.32 d16[1], r2
+; CHECK-NEXT: vmov.32 d17[1], r3
+; CHECK-NEXT: vadd.i32 d16, d17, d16
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
%tmp1 = shufflevector <8 x i16> %in, <8 x i16> undef, <2 x i32> <i32 0, i32 2>
%tmp2 = shufflevector <8 x i16> %in, <8 x i16> undef, <2 x i32> <i32 1, i32 3>
%x = add <2 x i16> %tmp2, %tmp1
; CHECK-NEXT: vld1.64 {d18, d19}, [lr]
; CHECK-NEXT: vcgt.u32 q8, q9, q8
; CHECK-NEXT: vld1.32 {d18[0]}, [r12:32]
-; CHECK-NEXT: vmov.i8 d19, #0x7
-; CHECK-NEXT: vmovl.u8 q10, d18
+; CHECK-NEXT: vmovl.u8 q9, d18
; CHECK-NEXT: vmovn.i32 d16, q8
-; CHECK-NEXT: vneg.s8 d17, d19
-; CHECK-NEXT: vmov d18, r2, r3
-; CHECK-NEXT: vuzp.8 d16, d20
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: vuzp.8 d16, d18
+; CHECK-NEXT: vmov d18, r0, r1
; CHECK-NEXT: vshl.i8 d16, d16, #7
-; CHECK-NEXT: vshl.s8 d16, d16, d17
-; CHECK-NEXT: vmov d17, r0, r1
-; CHECK-NEXT: vbsl d16, d17, d18
+; CHECK-NEXT: vshr.s8 d16, d16, #7
+; CHECK-NEXT: vbsl d16, d18, d17
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: pop {r11, lr}
; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: add r12, sp, #16
; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
; CHECK-NEXT: vcgt.u32 q8, q9, q8
-; CHECK-NEXT: vmov.i8 d18, #0x7
+; CHECK-NEXT: vmov d18, r0, r1
; CHECK-NEXT: vmovn.i32 d16, q8
; CHECK-NEXT: vuzp.8 d16, d17
-; CHECK-NEXT: vneg.s8 d17, d18
+; CHECK-NEXT: vmov d17, r2, r3
; CHECK-NEXT: vshl.i8 d16, d16, #7
-; CHECK-NEXT: vmov d18, r2, r3
-; CHECK-NEXT: vshl.s8 d16, d16, d17
-; CHECK-NEXT: vmov d17, r0, r1
-; CHECK-NEXT: vbsl d16, d17, d18
+; CHECK-NEXT: vshr.s8 d16, d16, #7
+; CHECK-NEXT: vbsl d16, d18, d17
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: mov pc, lr
<4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {
; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
; CHECK-NEXT: vcgt.u32 q8, q9, q8
; CHECK-NEXT: vldr d18, .LCPI22_0
-; CHECK-NEXT: vmov.i8 d19, #0x7
; CHECK-NEXT: vmovn.i32 d16, q8
; CHECK-NEXT: vtbl.8 d16, {d16}, d18
-; CHECK-NEXT: vneg.s8 d17, d19
-; CHECK-NEXT: vmov d18, r2, r3
+; CHECK-NEXT: vmov d17, r2, r3
+; CHECK-NEXT: vmov d18, r0, r1
; CHECK-NEXT: vshl.i8 d16, d16, #7
-; CHECK-NEXT: vshl.s8 d16, d16, d17
-; CHECK-NEXT: vmov d17, r0, r1
-; CHECK-NEXT: vbsl d16, d17, d18
+; CHECK-NEXT: vshr.s8 d16, d16, #7
+; CHECK-NEXT: vbsl d16, d18, d17
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: vcgt.u32 q8, q9, q8
; CHECK-NEXT: vmovn.i32 d19, q10
; CHECK-NEXT: vmov.u8 lr, d23[3]
-; CHECK-NEXT: vldr d20, .LCPI23_0
; CHECK-NEXT: vmovn.i32 d18, q8
; CHECK-NEXT: vmovn.i16 d22, q9
-; CHECK-NEXT: vmov.i8 q9, #0x7
-; CHECK-NEXT: vneg.s8 q9, q9
+; CHECK-NEXT: vldr d18, .LCPI23_0
; CHECK-NEXT: vmov.8 d17[0], lr
-; CHECK-NEXT: vtbl.8 d16, {d22, d23}, d20
+; CHECK-NEXT: vtbl.8 d16, {d22, d23}, d18
+; CHECK-NEXT: vmov d19, r2, r3
; CHECK-NEXT: vld1.8 {d17[1]}, [r4]
; CHECK-NEXT: add r4, sp, #8
+; CHECK-NEXT: vmov d18, r0, r1
; CHECK-NEXT: vshl.i8 q8, q8, #7
; CHECK-NEXT: vld1.64 {d20, d21}, [r4]
-; CHECK-NEXT: vshl.s8 q8, q8, q9
-; CHECK-NEXT: vmov d19, r2, r3
-; CHECK-NEXT: vmov d18, r0, r1
+; CHECK-NEXT: vshr.s8 q8, q8, #7
; CHECK-NEXT: vbsl q8, q9, q10
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17