From: Craig Topper Date: Sun, 12 Jun 2016 14:11:32 +0000 (+0000) Subject: [X86] Remove sse2 pshufd/pshuflw/pshufhw intrinsics and upgrade them to shufflevector. X-Git-Tag: android-x86-7.1-r4~31880 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=b2cfb64e72d3d9cfd666fe68574e89cb87997fdc;p=android-x86%2Fexternal-llvm.git [X86] Remove sse2 pshufd/pshuflw/pshufhw intrinsics and upgrade them to shufflevector. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@272510 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index c89d99e0a0b..b140a80387e 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -662,15 +662,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_ssse3_pshuf_b_128 : GCCBuiltin<"__builtin_ia32_pshufb128">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; - def int_x86_sse2_pshuf_d : - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_sse2_pshufl_w : - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; - def int_x86_sse2_pshufh_w : - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i8_ty], - [IntrNoMem]>; def int_x86_sse_pshuf_w : GCCBuiltin<"__builtin_ia32_pshufw">, Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i8_ty], [IntrNoMem]>; diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp index 45f4d3930a7..86f827e1506 100644 --- a/lib/IR/AutoUpgrade.cpp +++ b/lib/IR/AutoUpgrade.cpp @@ -177,6 +177,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Name.startswith("x86.avx2.vbroadcast") || Name.startswith("x86.avx2.pbroadcast") || Name.startswith("x86.avx.vpermil.") || + Name.startswith("x86.sse2.pshuf") || Name.startswith("x86.sse41.pmovsx") || Name.startswith("x86.sse41.pmovzx") || Name.startswith("x86.avx2.pmovsx") || @@ -880,7 +881,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Rep = Builder.CreateShuffleVector(Op0, UndefV, Idxs); } else if (Name == "llvm.stackprotectorcheck") { Rep = nullptr; - } else if (Name.startswith("llvm.x86.avx.vpermil.")) { + } else if (Name.startswith("llvm.x86.avx.vpermil.") || + Name == "llvm.x86.sse2.pshuf.d") { Value *Op0 = CI->getArgOperand(0); unsigned Imm = cast(CI->getArgOperand(1))->getZExtValue(); VectorType *VecTy = cast(CI->getType()); @@ -897,6 +899,34 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask); Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); + } else if (Name == "llvm.x86.sse2.pshufl.w") { + Value *Op0 = CI->getArgOperand(0); + unsigned Imm = cast(CI->getArgOperand(1))->getZExtValue(); + unsigned NumElts = CI->getType()->getVectorNumElements(); + + SmallVector Idxs(NumElts); + for (unsigned l = 0; l != NumElts; l += 8) { + for (unsigned i = 0; i != 4; ++i) + Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l; + for (unsigned i = 4; i != 8; ++i) + Idxs[i + l] = i + l; + } + + Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); + } else if (Name == "llvm.x86.sse2.pshufh.w") { + Value *Op0 = CI->getArgOperand(0); + unsigned Imm = cast(CI->getArgOperand(1))->getZExtValue(); + unsigned NumElts = CI->getType()->getVectorNumElements(); + + SmallVector Idxs(NumElts); + for (unsigned l = 0; l != NumElts; l += 8) { + for (unsigned i = 0; i != 4; ++i) + Idxs[i + l] = i + l; + for (unsigned i = 0; i != 4; ++i) + Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l; + } + + Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); } else { llvm_unreachable("Unknown function for CallInst upgrade."); } diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index 54d97df5b82..d732606c87d 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -2133,9 +2133,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(sse2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0), X86_INTRINSIC_DATA(sse2_pmulu_dq, INTR_TYPE_2OP, X86ISD::PMULUDQ, 0), X86_INTRINSIC_DATA(sse2_psad_bw, INTR_TYPE_2OP, X86ISD::PSADBW, 0), - X86_INTRINSIC_DATA(sse2_pshuf_d, INTR_TYPE_2OP, X86ISD::PSHUFD, 0), - X86_INTRINSIC_DATA(sse2_pshufh_w, INTR_TYPE_2OP, X86ISD::PSHUFHW, 0), - X86_INTRINSIC_DATA(sse2_pshufl_w, INTR_TYPE_2OP, X86ISD::PSHUFLW, 0), X86_INTRINSIC_DATA(sse2_psll_d, INTR_TYPE_2OP, X86ISD::VSHL, 0), X86_INTRINSIC_DATA(sse2_psll_q, INTR_TYPE_2OP, X86ISD::VSHL, 0), X86_INTRINSIC_DATA(sse2_psll_w, INTR_TYPE_2OP, X86ISD::VSHL, 0), diff --git a/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll b/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll index 878420f553b..f3573e93ac2 100644 --- a/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll +++ b/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll @@ -138,4 +138,35 @@ define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { } declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind +define <4 x i32> @test_x86_sse2_pshuf_d(<4 x i32> %a) { +; CHECK-LABEL: test_x86_sse2_pshuf_d: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] +; CHECK-NEXT: retl +entry: + %res = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) nounwind readnone + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8) nounwind readnone +define <8 x i16> @test_x86_sse2_pshufl_w(<8 x i16> %a) { +; CHECK-LABEL: test_x86_sse2_pshufl_w: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] +; CHECK-NEXT: retl +entry: + %res = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) nounwind readnone + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8) nounwind readnone + +define <8 x i16> @test_x86_sse2_pshufh_w(<8 x i16> %a) { +; CHECK-LABEL: test_x86_sse2_pshufh_w: +; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] +; CHECK-NEXT: retl +entry: + %res = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27) nounwind readnone + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8) nounwind readnone diff --git a/test/CodeGen/X86/sse2-intrinsics-x86.ll b/test/CodeGen/X86/sse2-intrinsics-x86.ll index 88c4f6a169f..9b595fc44cb 100644 --- a/test/CodeGen/X86/sse2-intrinsics-x86.ll +++ b/test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -1266,51 +1266,3 @@ define void @test_x86_sse2_pause() { ret void } declare void @llvm.x86.sse2.pause() nounwind - -define <4 x i32> @test_x86_sse2_pshuf_d(<4 x i32> %a) { -; SSE-LABEL: test_x86_sse2_pshuf_d: -; SSE: ## BB#0: ## %entry -; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] -; SSE-NEXT: retl -; -; KNL-LABEL: test_x86_sse2_pshuf_d: -; KNL: ## BB#0: ## %entry -; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] -; KNL-NEXT: retl -entry: - %res = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) nounwind readnone - ret <4 x i32> %res -} -declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8) nounwind readnone - -define <8 x i16> @test_x86_sse2_pshufl_w(<8 x i16> %a) { -; SSE-LABEL: test_x86_sse2_pshufl_w: -; SSE: ## BB#0: ## %entry -; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] -; SSE-NEXT: retl -; -; KNL-LABEL: test_x86_sse2_pshufl_w: -; KNL: ## BB#0: ## %entry -; KNL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] -; KNL-NEXT: retl -entry: - %res = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) nounwind readnone - ret <8 x i16> %res -} -declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8) nounwind readnone - -define <8 x i16> @test_x86_sse2_pshufh_w(<8 x i16> %a) { -; SSE-LABEL: test_x86_sse2_pshufh_w: -; SSE: ## BB#0: ## %entry -; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] -; SSE-NEXT: retl -; -; KNL-LABEL: test_x86_sse2_pshufh_w: -; KNL: ## BB#0: ## %entry -; KNL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] -; KNL-NEXT: retl -entry: - %res = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27) nounwind readnone - ret <8 x i16> %res -} -declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8) nounwind readnone diff --git a/test/CodeGen/X86/vector-shuffle-combining.ll b/test/CodeGen/X86/vector-shuffle-combining.ll index a2a29e5b894..aa7363669a0 100644 --- a/test/CodeGen/X86/vector-shuffle-combining.ll +++ b/test/CodeGen/X86/vector-shuffle-combining.ll @@ -96,10 +96,15 @@ define <4 x i32> @combine_pshufd6(<4 x i32> %a) { ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE-NEXT: retq ; -; AVX-LABEL: combine_pshufd6: -; AVX: # BB#0: # %entry -; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] -; AVX-NEXT: retq +; AVX1-LABEL: combine_pshufd6: +; AVX1: # BB#0: # %entry +; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; AVX1-NEXT: retq +; +; AVX2-LABEL: combine_pshufd6: +; AVX2: # BB#0: # %entry +; AVX2-NEXT: vbroadcastss %xmm0, %xmm0 +; AVX2-NEXT: retq entry: %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 0) %c = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %b, i8 8)