OSDN Git Service

Add new X86 AVX2 VBROADCAST instructions.
authorCraig Topper <craig.topper@gmail.com>
Thu, 3 Nov 2011 07:35:53 +0000 (07:35 +0000)
committerCraig Topper <craig.topper@gmail.com>
Thu, 3 Nov 2011 07:35:53 +0000 (07:35 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143612 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/IntrinsicsX86.td
lib/Target/X86/X86InstrSSE.td
test/CodeGen/X86/avx-intrinsics-x86.ll
test/CodeGen/X86/avx2-intrinsics-x86.ll

index 04a119f..09f958c 100644 (file)
@@ -1281,13 +1281,13 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
 // Vector load with broadcast
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx_vbroadcastss :
+  def int_x86_avx_vbroadcast_ss :
         GCCBuiltin<"__builtin_ia32_vbroadcastss">,
         Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
   def int_x86_avx_vbroadcast_sd_256 :
         GCCBuiltin<"__builtin_ia32_vbroadcastsd256">,
         Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadMem]>;
-  def int_x86_avx_vbroadcastss_256 :
+  def int_x86_avx_vbroadcast_ss_256 :
         GCCBuiltin<"__builtin_ia32_vbroadcastss256">,
         Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
   def int_x86_avx_vbroadcastf128_pd_256 :
@@ -1672,6 +1672,15 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
 // Vector load with broadcast
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_vbroadcast_ss_ps :
+        GCCBuiltin<"__builtin_ia32_vbroadcastss_ps">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrReadMem]>;
+  def int_x86_avx2_vbroadcast_sd_pd_256 :
+        GCCBuiltin<"__builtin_ia32_vbroadcastsd_pd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v2f64_ty], [IntrReadMem]>;
+  def int_x86_avx2_vbroadcast_ss_ps_256 :
+        GCCBuiltin<"__builtin_ia32_vbroadcastss_ps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v4f32_ty], [IntrReadMem]>;
   def int_x86_avx2_vbroadcasti128 :
         GCCBuiltin<"__builtin_ia32_vbroadcastsi256">,
         Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
index 77a9031..8f7e27b 100644 (file)
@@ -7083,35 +7083,48 @@ class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
         [(set RC:$dst, (Int addr:$src))]>, VEX;
 
-def VBROADCASTSS   : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
-                                   int_x86_avx_vbroadcastss>;
-def VBROADCASTSSY  : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
-                                   int_x86_avx_vbroadcastss_256>;
-def VBROADCASTSD   : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
-                                   int_x86_avx_vbroadcast_sd_256>;
+class avx_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC,
+                        Intrinsic Int> :
+  AVX8I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
+        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+        [(set RC:$dst, (Int VR128:$src))]>, VEX;
+
+def VBROADCASTSSrm  : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
+                                    int_x86_avx_vbroadcast_ss>;
+def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
+                                    int_x86_avx_vbroadcast_ss_256>;
+def VBROADCASTSDrm  : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
+                                    int_x86_avx_vbroadcast_sd_256>;
 def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
                                    int_x86_avx_vbroadcastf128_pd_256>;
 
-let Predicates = [HasAVX2] in
+let Predicates = [HasAVX2] in {
 def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem,
                                    int_x86_avx2_vbroadcasti128>;
+def VBROADCASTSSrr  : avx_broadcast_reg<0x18, "vbroadcastss", VR128,
+                                        int_x86_avx2_vbroadcast_ss_ps>;
+def VBROADCASTSSYrr : avx_broadcast_reg<0x18, "vbroadcastss", VR256,
+                                        int_x86_avx2_vbroadcast_ss_ps_256>;
+def VBROADCASTSDrr  : avx_broadcast_reg<0x19, "vbroadcastsd", VR256,
+                                        int_x86_avx2_vbroadcast_sd_pd_256>;
+}
 
 def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
           (VBROADCASTF128 addr:$src)>;
 
 def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
-          (VBROADCASTSSY addr:$src)>;
+          (VBROADCASTSSYrm addr:$src)>;
 def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
-          (VBROADCASTSD addr:$src)>;
+          (VBROADCASTSDrm addr:$src)>;
 def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
-          (VBROADCASTSSY addr:$src)>;
+          (VBROADCASTSSYrm addr:$src)>;
 def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
-          (VBROADCASTSD addr:$src)>;
+          (VBROADCASTSDrm addr:$src)>;
 
 def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
-          (VBROADCASTSS addr:$src)>;
+          (VBROADCASTSSrm addr:$src)>;
 def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
-          (VBROADCASTSS addr:$src)>;
+          (VBROADCASTSSrm addr:$src)>;
 
 //===----------------------------------------------------------------------===//
 // VINSERTF128 - Insert packed floating-point values
index 2aad672..276209e 100644 (file)
@@ -2292,20 +2292,20 @@ define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
 declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
 
 
-define <4 x float> @test_x86_avx_vbroadcastss(i8* %a0) {
+define <4 x float> @test_x86_avx_vbroadcast_ss(i8* %a0) {
   ; CHECK: vbroadcastss
-  %res = call <4 x float> @llvm.x86.avx.vbroadcastss(i8* %a0) ; <<4 x float>> [#uses=1]
+  %res = call <4 x float> @llvm.x86.avx.vbroadcast.ss(i8* %a0) ; <<4 x float>> [#uses=1]
   ret <4 x float> %res
 }
-declare <4 x float> @llvm.x86.avx.vbroadcastss(i8*) nounwind readonly
+declare <4 x float> @llvm.x86.avx.vbroadcast.ss(i8*) nounwind readonly
 
 
-define <8 x float> @test_x86_avx_vbroadcastss_256(i8* %a0) {
+define <8 x float> @test_x86_avx_vbroadcast_ss_256(i8* %a0) {
   ; CHECK: vbroadcastss
-  %res = call <8 x float> @llvm.x86.avx.vbroadcastss.256(i8* %a0) ; <<8 x float>> [#uses=1]
+  %res = call <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8* %a0) ; <<8 x float>> [#uses=1]
   ret <8 x float> %res
 }
-declare <8 x float> @llvm.x86.avx.vbroadcastss.256(i8*) nounwind readonly
+declare <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8*) nounwind readonly
 
 
 define <2 x double> @test_x86_avx_vextractf128_pd_256(<4 x double> %a0) {
index 8aef255..81ffdea 100644 (file)
@@ -743,3 +743,26 @@ define <4 x i64> @test_x86_avx2_vbroadcasti128(i8* %a0) {
   ret <4 x i64> %res
 }
 declare <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8*) nounwind readonly
+
+define <4 x double> @test_x86_avx2_vbroadcast_sd_pd_256(<2 x double> %a0) {
+  ; CHECK: vbroadcastsd
+  %res = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double>) nounwind readonly
+
+
+define <4 x float> @test_x86_avx2_vbroadcast_ss_ps(<4 x float> %a0) {
+  ; CHECK: vbroadcastss
+  %res = call <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float>) nounwind readonly
+
+
+define <8 x float> @test_x86_avx2_vbroadcast_ss_ps_256(<4 x float> %a0) {
+  ; CHECK: vbroadcastss
+  %res = call <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float> %a0) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind readonly