def int_x86_ssse3_pshuf_b_128 : GCCBuiltin<"__builtin_ia32_pshufb128">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
llvm_v16i8_ty], [IntrNoMem]>;
- def int_x86_sse2_pshuf_d :
- Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i8_ty],
- [IntrNoMem]>;
- def int_x86_sse2_pshufl_w :
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i8_ty],
- [IntrNoMem]>;
- def int_x86_sse2_pshufh_w :
- Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i8_ty],
- [IntrNoMem]>;
def int_x86_sse_pshuf_w : GCCBuiltin<"__builtin_ia32_pshufw">,
Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i8_ty],
[IntrNoMem]>;
Name.startswith("x86.avx2.vbroadcast") ||
Name.startswith("x86.avx2.pbroadcast") ||
Name.startswith("x86.avx.vpermil.") ||
+ Name.startswith("x86.sse2.pshuf") ||
Name.startswith("x86.sse41.pmovsx") ||
Name.startswith("x86.sse41.pmovzx") ||
Name.startswith("x86.avx2.pmovsx") ||
Rep = Builder.CreateShuffleVector(Op0, UndefV, Idxs);
} else if (Name == "llvm.stackprotectorcheck") {
Rep = nullptr;
- } else if (Name.startswith("llvm.x86.avx.vpermil.")) {
+ } else if (Name.startswith("llvm.x86.avx.vpermil.") ||
+ Name == "llvm.x86.sse2.pshuf.d") {
Value *Op0 = CI->getArgOperand(0);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
VectorType *VecTy = cast<VectorType>(CI->getType());
Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
+ } else if (Name == "llvm.x86.sse2.pshufl.w") {
+ Value *Op0 = CI->getArgOperand(0);
+ unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+ unsigned NumElts = CI->getType()->getVectorNumElements();
+
+ SmallVector<uint32_t, 16> Idxs(NumElts);
+ for (unsigned l = 0; l != NumElts; l += 8) {
+ for (unsigned i = 0; i != 4; ++i)
+ Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
+ for (unsigned i = 4; i != 8; ++i)
+ Idxs[i + l] = i + l;
+ }
+
+ Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
+ } else if (Name == "llvm.x86.sse2.pshufh.w") {
+ Value *Op0 = CI->getArgOperand(0);
+ unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+ unsigned NumElts = CI->getType()->getVectorNumElements();
+
+ SmallVector<uint32_t, 16> Idxs(NumElts);
+ for (unsigned l = 0; l != NumElts; l += 8) {
+ for (unsigned i = 0; i != 4; ++i)
+ Idxs[i + l] = i + l;
+ for (unsigned i = 0; i != 4; ++i)
+ Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
+ }
+
+ Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
} else {
llvm_unreachable("Unknown function for CallInst upgrade.");
}
X86_INTRINSIC_DATA(sse2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0),
X86_INTRINSIC_DATA(sse2_pmulu_dq, INTR_TYPE_2OP, X86ISD::PMULUDQ, 0),
X86_INTRINSIC_DATA(sse2_psad_bw, INTR_TYPE_2OP, X86ISD::PSADBW, 0),
- X86_INTRINSIC_DATA(sse2_pshuf_d, INTR_TYPE_2OP, X86ISD::PSHUFD, 0),
- X86_INTRINSIC_DATA(sse2_pshufh_w, INTR_TYPE_2OP, X86ISD::PSHUFHW, 0),
- X86_INTRINSIC_DATA(sse2_pshufl_w, INTR_TYPE_2OP, X86ISD::PSHUFLW, 0),
X86_INTRINSIC_DATA(sse2_psll_d, INTR_TYPE_2OP, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(sse2_psll_q, INTR_TYPE_2OP, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(sse2_psll_w, INTR_TYPE_2OP, X86ISD::VSHL, 0),
}
declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
+define <4 x i32> @test_x86_sse2_pshuf_d(<4 x i32> %a) {
+; CHECK-LABEL: test_x86_sse2_pshuf_d:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; CHECK-NEXT: retl
+entry:
+ %res = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) nounwind readnone
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8) nounwind readnone
+define <8 x i16> @test_x86_sse2_pshufl_w(<8 x i16> %a) {
+; CHECK-LABEL: test_x86_sse2_pshufl_w:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; CHECK-NEXT: retl
+entry:
+ %res = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) nounwind readnone
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8) nounwind readnone
+
+define <8 x i16> @test_x86_sse2_pshufh_w(<8 x i16> %a) {
+; CHECK-LABEL: test_x86_sse2_pshufh_w:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; CHECK-NEXT: retl
+entry:
+ %res = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27) nounwind readnone
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8) nounwind readnone
ret void
}
declare void @llvm.x86.sse2.pause() nounwind
-
-define <4 x i32> @test_x86_sse2_pshuf_d(<4 x i32> %a) {
-; SSE-LABEL: test_x86_sse2_pshuf_d:
-; SSE: ## BB#0: ## %entry
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
-; SSE-NEXT: retl
-;
-; KNL-LABEL: test_x86_sse2_pshuf_d:
-; KNL: ## BB#0: ## %entry
-; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
-; KNL-NEXT: retl
-entry:
- %res = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) nounwind readnone
- ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8) nounwind readnone
-
-define <8 x i16> @test_x86_sse2_pshufl_w(<8 x i16> %a) {
-; SSE-LABEL: test_x86_sse2_pshufl_w:
-; SSE: ## BB#0: ## %entry
-; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; SSE-NEXT: retl
-;
-; KNL-LABEL: test_x86_sse2_pshufl_w:
-; KNL: ## BB#0: ## %entry
-; KNL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; KNL-NEXT: retl
-entry:
- %res = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) nounwind readnone
- ret <8 x i16> %res
-}
-declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8) nounwind readnone
-
-define <8 x i16> @test_x86_sse2_pshufh_w(<8 x i16> %a) {
-; SSE-LABEL: test_x86_sse2_pshufh_w:
-; SSE: ## BB#0: ## %entry
-; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
-; SSE-NEXT: retl
-;
-; KNL-LABEL: test_x86_sse2_pshufh_w:
-; KNL: ## BB#0: ## %entry
-; KNL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
-; KNL-NEXT: retl
-entry:
- %res = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27) nounwind readnone
- ret <8 x i16> %res
-}
-declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8) nounwind readnone
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE-NEXT: retq
;
-; AVX-LABEL: combine_pshufd6:
-; AVX: # BB#0: # %entry
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
-; AVX-NEXT: retq
+; AVX1-LABEL: combine_pshufd6:
+; AVX1: # BB#0: # %entry
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: combine_pshufd6:
+; AVX2: # BB#0: # %entry
+; AVX2-NEXT: vbroadcastss %xmm0, %xmm0
+; AVX2-NEXT: retq
entry:
%b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 0)
%c = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %b, i8 8)