OSDN Git Service

[X86] Add broadcast patterns that allow a scalar_to_vector between the broadcast...
authorCraig Topper <craig.topper@intel.com>
Tue, 10 Oct 2017 22:40:31 +0000 (22:40 +0000)
committerCraig Topper <craig.topper@intel.com>
Tue, 10 Oct 2017 22:40:31 +0000 (22:40 +0000)
We already have these patterns for AVX512VL, but not AVX1 or 2.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315382 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/X86/X86InstrSSE.td
test/CodeGen/X86/avx2-vbroadcast.ll

index c2f0864..3bbe310 100644 (file)
@@ -7381,6 +7381,15 @@ let ExeDomain = SSEPackedDouble, Predicates = [HasAVX2, NoVLX] in
 def VBROADCASTSDYrr  : avx2_broadcast_rr<0x19, "vbroadcastsd", VR256,
                                          v4f64, v2f64, WriteFShuffle256>, VEX_L;
 
+let Predicates = [HasAVX, NoVLX] in {
+  def : Pat<(v4f32 (X86VBroadcast (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
+            (VBROADCASTSSrm addr:$src)>;
+  def : Pat<(v8f32 (X86VBroadcast (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
+            (VBROADCASTSSYrm addr:$src)>;
+  def : Pat<(v4f64 (X86VBroadcast (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
+            (VBROADCASTSDYrm addr:$src)>;
+}
+
 //===----------------------------------------------------------------------===//
 // VBROADCAST*128 - Load from memory and broadcast 128-bit vector to both
 //                  halves of a 256-bit vector.
@@ -7861,6 +7870,15 @@ let Predicates = [HasAVX2, NoVLX] in {
             (VPBROADCASTQrm addr:$src)>;
   def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
             (VPBROADCASTQYrm addr:$src)>;
+
+  def : Pat<(v4i32 (X86VBroadcast (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
+            (VPBROADCASTDrm addr:$src)>;
+  def : Pat<(v8i32 (X86VBroadcast (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
+            (VPBROADCASTDYrm addr:$src)>;
+  def : Pat<(v2i64 (X86VBroadcast (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+            (VPBROADCASTQrm addr:$src)>;
+  def : Pat<(v4i64 (X86VBroadcast (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+            (VPBROADCASTQYrm addr:$src)>;
 }
 let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
   // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
index 90e1232..08a1160 100644 (file)
@@ -273,8 +273,7 @@ define <16 x i16> @broadcast_mem_v4i16_v16i16(<4 x i16>* %ptr) {
 ;
 ; X64-AVX2-LABEL: broadcast_mem_v4i16_v16i16:
 ; X64-AVX2:       ## BB#0:
-; X64-AVX2-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X64-AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
+; X64-AVX2-NEXT:    vbroadcastsd (%rdi), %ymm0
 ; X64-AVX2-NEXT:    retq
 ;
 ; X32-AVX512VL-LABEL: broadcast_mem_v4i16_v16i16: