OSDN Git Service

[X86] Remove int_x86_sse2_psll_dq_bs and int_x86_sse2_psrl_dq_bs intrinsics. The...
authorCraig Topper <craig.topper@gmail.com>
Fri, 13 Feb 2015 06:07:24 +0000 (06:07 +0000)
committerCraig Topper <craig.topper@gmail.com>
Fri, 13 Feb 2015 06:07:24 +0000 (06:07 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@229069 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/IR/IntrinsicsX86.td
lib/IR/AutoUpgrade.cpp
lib/Target/X86/X86InstrSSE.td
test/CodeGen/X86/avx-intrinsics-x86.ll
test/CodeGen/X86/sse2-intrinsics-x86.ll

index b5230dc..b602224 100644 (file)
@@ -460,12 +460,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse2_psrl_dq :
               Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
                          llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_sse2_psll_dq_bs : GCCBuiltin<"__builtin_ia32_pslldqi128_byteshift">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_sse2_psrl_dq_bs : GCCBuiltin<"__builtin_ia32_psrldqi128_byteshift">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
-                         llvm_i32_ty], [IntrNoMem]>;
 }
 
 // Conversion ops
index 7f70fce..6d53d11 100644 (file)
@@ -163,6 +163,8 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
         Name == "x86.avx.vbroadcast.ss" ||
         Name == "x86.avx.vbroadcast.ss.256" ||
         Name == "x86.avx.vbroadcast.sd.256" ||
+        Name == "x86.sse2.psll.dq.bs" ||
+        Name == "x86.sse2.psrl.dq.bs" ||
         (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
       NewFn = nullptr;
       return true;
@@ -487,6 +489,43 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       for (unsigned I = 0; I < EltNum; ++I)
         Rep = Builder.CreateInsertElement(Rep, Load,
                                           ConstantInt::get(I32Ty, I));
+    } else if (Name == "llvm.x86.sse2.psll.dq.bs") {
+      Value *Op0 = ConstantVector::getSplat(16, Builder.getInt8(0));
+      Value *Op1 = Builder.CreateBitCast(CI->getArgOperand(0),
+                                         VectorType::get(Type::getInt8Ty(C),16),
+                                         "cast");
+
+      unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+
+      if (Shift < 16) {
+        SmallVector<Constant*, 16> Idxs;
+        for (unsigned i = 16; i != 32; ++i)
+          Idxs.push_back(Builder.getInt32(i - Shift));
+
+        Op0 = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
+      }
+
+      Rep = Builder.CreateBitCast(Op0,
+                                  VectorType::get(Type::getInt64Ty(C), 2),
+                                  "cast");
+    } else if (Name == "llvm.x86.sse2.psrl.dq.bs") {
+      Value *Op0 = Builder.CreateBitCast(CI->getArgOperand(0),
+                                         VectorType::get(Type::getInt8Ty(C),16),
+                                         "cast");
+      Value *Op1 = ConstantVector::getSplat(16, Builder.getInt8(0));
+
+      unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+
+      if (Shift < 16) {
+        SmallVector<Constant*, 16> Idxs;
+        for (unsigned i = 0; i != 16; ++i)
+          Idxs.push_back(Builder.getInt32(i + Shift));
+
+        Op1 = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
+      }
+      Rep = Builder.CreateBitCast(Op1,
+                                  VectorType::get(Type::getInt64Ty(C), 2),
+                                  "cast");
     } else {
       bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
       if (Name == "llvm.x86.avx.vpermil.pd.256")
index 3e9eafb..0d6a9f0 100644 (file)
@@ -4367,20 +4367,16 @@ defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
                             VR128, v4i32, v4i32, bc_v4i32, loadv2i64,
                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
 
-let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in {
   // 128-bit logical shifts.
   def VPSLLDQri : PDIi8<0x73, MRM7r,
                     (outs VR128:$dst), (ins VR128:$src1, i32u8imm:$src2),
                     "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                    [(set VR128:$dst,
-                      (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))]>,
-                    VEX_4V;
+                    []>, VEX_4V;
   def VPSRLDQri : PDIi8<0x73, MRM3r,
                     (outs VR128:$dst), (ins VR128:$src1, i32u8imm:$src2),
                     "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                    [(set VR128:$dst,
-                      (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))]>,
-                    VEX_4V;
+                    []>, VEX_4V;
   // PSRADQri doesn't exist in SSE[1-3].
 }
 } // Predicates = [HasAVX]
@@ -4459,20 +4455,16 @@ defm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai,
                            VR128, v4i32, v4i32, bc_v4i32, memopv2i64,
                            SSE_INTSHIFT_ITINS_P>;
 
-let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in {
   // 128-bit logical shifts.
   def PSLLDQri : PDIi8<0x73, MRM7r,
                        (outs VR128:$dst), (ins VR128:$src1, i32u8imm:$src2),
                        "pslldq\t{$src2, $dst|$dst, $src2}",
-                       [(set VR128:$dst,
-                         (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))],
-                         IIC_SSE_INTSHDQ_P_RI>;
+                       [], IIC_SSE_INTSHDQ_P_RI>;
   def PSRLDQri : PDIi8<0x73, MRM3r,
                        (outs VR128:$dst), (ins VR128:$src1, i32u8imm:$src2),
                        "psrldq\t{$src2, $dst|$dst, $src2}",
-                       [(set VR128:$dst,
-                         (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))],
-                         IIC_SSE_INTSHDQ_P_RI>;
+                       [], IIC_SSE_INTSHDQ_P_RI>;
   // PSRADQri doesn't exist in SSE[1-3].
 }
 } // Constraints = "$src1 = $dst"
index bb9354c..2696f3b 100644 (file)
@@ -465,14 +465,6 @@ define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
 declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
 
 
-define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
-  ; CHECK: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
-  %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
-
-
 define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
   ; CHECK: vpsllq
   %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
@@ -561,14 +553,6 @@ define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
 declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
 
 
-define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
-  ; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
-  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
-
-
 define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
   ; CHECK: vpsrlq
   %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
index ddb0421..99357e7 100644 (file)
@@ -418,14 +418,6 @@ define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
 declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
 
 
-define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
-  ; CHECK: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
-  %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
-
-
 define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
   ; CHECK: psllq
   %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
@@ -514,14 +506,6 @@ define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
 declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
 
 
-define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
-  ; CHECK: psrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
-  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
-
-
 define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
   ; CHECK: psrlq
   %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]