Allow more cases of undef shuffle indices and add tests for them.

author Bob Wilson <bob.wilson@apple.com>

Tue, 17 Aug 2010 05:54:34 +0000 (05:54 +0000)

committer Bob Wilson <bob.wilson@apple.com>

Tue, 17 Aug 2010 05:54:34 +0000 (05:54 +0000)
author Bob Wilson <bob.wilson@apple.com>
Tue, 17 Aug 2010 05:54:34 +0000 (05:54 +0000)
committer Bob Wilson <bob.wilson@apple.com>
Tue, 17 Aug 2010 05:54:34 +0000 (05:54 +0000)
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp

index b05c5dd..c2bd471 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -3148,6 +3148,11 @@ static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
                         bool &ReverseVEXT, unsigned &Imm) {
    unsigned NumElts = VT.getVectorNumElements();
    ReverseVEXT = false;
+
+  // Assume that the first shuffle index is not UNDEF.  Fail if it is.
+  if (M[0] < 0)
+    return false;
+
    Imm = M[0];
  
    // If this is a VEXT shuffle, the immediate value is the index of the first
@@ -3163,6 +3168,7 @@ static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
        ReverseVEXT = true;
      }
  
+    if (M[i] < 0) continue; // ignore UNDEF indices
      if (ExpectedElt != static_cast<unsigned>(M[i]))
        return false;
    }
@@ -3188,13 +3194,16 @@ static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
  
    unsigned NumElts = VT.getVectorNumElements();
    unsigned BlockElts = M[0] + 1;
+  // If the first shuffle index is UNDEF, be optimistic.
+  if (M[0] < 0)
+    BlockElts = BlockSize / EltSz;
  
    if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
      return false;
  
    for (unsigned i = 0; i < NumElts; ++i) {
-    if ((unsigned) M[i] !=
-        (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
+    if (M[i] < 0) continue; // ignore UNDEF indices
+    if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
        return false;
    }
  
@@ -3210,8 +3219,8 @@ static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
    unsigned NumElts = VT.getVectorNumElements();
    WhichResult = (M[0] == 0 ? 0 : 1);
    for (unsigned i = 0; i < NumElts; i += 2) {
-    if ((unsigned) M[i] != i + WhichResult ||
-        (unsigned) M[i+1] != i + NumElts + WhichResult)
+    if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
+        (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
        return false;
    }
    return true;
@@ -3229,9 +3238,8 @@ static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
    unsigned NumElts = VT.getVectorNumElements();
    WhichResult = (M[0] == 0 ? 0 : 1);
    for (unsigned i = 0; i < NumElts; i += 2) {
-    if (M[i] < 0) continue;
-    if ((unsigned) M[i] != i + WhichResult ||
-        (unsigned) M[i+1] != i + WhichResult)
+    if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
+        (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
        return false;
    }
    return true;
@@ -3246,6 +3254,7 @@ static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
    unsigned NumElts = VT.getVectorNumElements();
    WhichResult = (M[0] == 0 ? 0 : 1);
    for (unsigned i = 0; i != NumElts; ++i) {
+    if (M[i] < 0) continue; // ignore UNDEF indices
      if ((unsigned) M[i] != 2 * i + WhichResult)
        return false;
    }
@@ -3271,7 +3280,8 @@ static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
    for (unsigned j = 0; j != 2; ++j) {
      unsigned Idx = WhichResult;
      for (unsigned i = 0; i != Half; ++i) {
-      if ((unsigned) M[i + j * Half] != Idx)
+      int MIdx = M[i + j * Half];
+      if (MIdx >= 0 && (unsigned) MIdx != Idx)
          return false;
        Idx += 2;
      }
@@ -3294,8 +3304,8 @@ static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
    WhichResult = (M[0] == 0 ? 0 : 1);
    unsigned Idx = WhichResult * NumElts / 2;
    for (unsigned i = 0; i != NumElts; i += 2) {
-    if ((unsigned) M[i] != Idx ||
-        (unsigned) M[i+1] != Idx + NumElts)
+    if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
+        (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
        return false;
      Idx += 1;
    }
@@ -3320,8 +3330,8 @@ static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
    WhichResult = (M[0] == 0 ? 0 : 1);
    unsigned Idx = WhichResult * NumElts / 2;
    for (unsigned i = 0; i != NumElts; i += 2) {
-    if ((unsigned) M[i] != Idx ||
-        (unsigned) M[i+1] != Idx)
+    if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
+        (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
        return false;
      Idx += 1;
    }
diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll

index c11a67c..e460a84 100644 (file)
--- a/test/CodeGen/ARM/vext.ll
+++ b/test/CodeGen/ARM/vext.ll
@@ -54,3 +54,23 @@ define <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
         ret <4 x i32> %tmp3
  }
  
+; Undef shuffle indices should not prevent matching to VEXT:
+
+define <8 x i8> @test_vextd_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: test_vextd_undef:
+;CHECK: vext
+       %tmp1 = load <8 x i8>* %A
+       %tmp2 = load <8 x i8>* %B
+       %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10>
+       ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @test_vextRq_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: test_vextRq_undef:
+;CHECK: vext
+       %tmp1 = load <16 x i8>* %A
+       %tmp2 = load <16 x i8>* %B
+       %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 undef, i32 undef, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 undef, i32 6>
+       ret <16 x i8> %tmp3
+}
+
diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll

index deed554..e1fe64b 100644 (file)
--- a/test/CodeGen/ARM/vrev.ll
+++ b/test/CodeGen/ARM/vrev.ll
@@ -111,3 +111,21 @@ define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
         %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
         ret <16 x i8> %tmp2
  }
+
+; Undef shuffle indices should not prevent matching to VREV:
+
+define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind {
+;CHECK: test_vrev64D8_undef:
+;CHECK: vrev64.8
+       %tmp1 = load <8 x i8>* %A
+       %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0>
+       ret <8 x i8> %tmp2
+}
+
+define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind {
+;CHECK: test_vrev32Q16_undef:
+;CHECK: vrev32.16
+       %tmp1 = load <8 x i16>* %A
+       %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
+       ret <8 x i16> %tmp2
+}
diff --git a/test/CodeGen/ARM/vtrn.ll b/test/CodeGen/ARM/vtrn.ll

index 10bb10a..b1c2f93 100644 (file)
--- a/test/CodeGen/ARM/vtrn.ll
+++ b/test/CodeGen/ARM/vtrn.ll
@@ -95,3 +95,30 @@ define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind {
          %tmp5 = fadd <4 x float> %tmp3, %tmp4
         ret <4 x float> %tmp5
  }
+
+; Undef shuffle indices should not prevent matching to VTRN:
+
+define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vtrni8_undef:
+;CHECK: vtrn.8
+;CHECK-NEXT: vadd.i8
+       %tmp1 = load <8 x i8>* %A
+       %tmp2 = load <8 x i8>* %B
+       %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14>
+       %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+       ret <8 x i8> %tmp5
+}
+
+define <8 x i16> @vtrnQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vtrnQi16_undef:
+;CHECK: vtrn.16
+;CHECK-NEXT: vadd.i16
+       %tmp1 = load <8 x i16>* %A
+       %tmp2 = load <8 x i16>* %B
+       %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14>
+       %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+       ret <8 x i16> %tmp5
+}
+
diff --git a/test/CodeGen/ARM/vuzp.ll b/test/CodeGen/ARM/vuzp.ll

index 6cef188..9130f62 100644 (file)
--- a/test/CodeGen/ARM/vuzp.ll
+++ b/test/CodeGen/ARM/vuzp.ll
@@ -73,3 +73,30 @@ define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind {
          %tmp5 = fadd <4 x float> %tmp3, %tmp4
         ret <4 x float> %tmp5
  }
+
+; Undef shuffle indices should not prevent matching to VUZP:
+
+define <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vuzpi8_undef:
+;CHECK: vuzp.8
+;CHECK-NEXT: vadd.i8
+       %tmp1 = load <8 x i8>* %A
+       %tmp2 = load <8 x i8>* %B
+       %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14>
+       %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+       ret <8 x i8> %tmp5
+}
+
+define <8 x i16> @vuzpQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vuzpQi16_undef:
+;CHECK: vuzp.16
+;CHECK-NEXT: vadd.i16
+       %tmp1 = load <8 x i16>* %A
+       %tmp2 = load <8 x i16>* %B
+       %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14>
+       %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+       ret <8 x i16> %tmp5
+}
+
diff --git a/test/CodeGen/ARM/vzip.ll b/test/CodeGen/ARM/vzip.ll

index a9ecdca..926970a 100644 (file)
--- a/test/CodeGen/ARM/vzip.ll
+++ b/test/CodeGen/ARM/vzip.ll
@@ -73,3 +73,30 @@ define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind {
          %tmp5 = fadd <4 x float> %tmp3, %tmp4
         ret <4 x float> %tmp5
  }
+
+; Undef shuffle indices should not prevent matching to VZIP:
+
+define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vzipi8_undef:
+;CHECK: vzip.8
+;CHECK-NEXT: vadd.i8
+       %tmp1 = load <8 x i8>* %A
+       %tmp2 = load <8 x i8>* %B
+       %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11>
+       %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+       ret <8 x i8> %tmp5
+}
+
+define <16 x i8> @vzipQi8_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vzipQi8_undef:
+;CHECK: vzip.8
+;CHECK-NEXT: vadd.i8
+       %tmp1 = load <16 x i8>* %A
+       %tmp2 = load <16 x i8>* %B
+       %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+       %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31>
+        %tmp5 = add <16 x i8> %tmp3, %tmp4
+       ret <16 x i8> %tmp5
+}
+
author	Bob Wilson <bob.wilson@apple.com>
	Tue, 17 Aug 2010 05:54:34 +0000 (05:54 +0000)
committer	Bob Wilson <bob.wilson@apple.com>
	Tue, 17 Aug 2010 05:54:34 +0000 (05:54 +0000)
lib/Target/ARM/ARMISelLowering.cpp		patch \| blob \| history
test/CodeGen/ARM/vext.ll		patch \| blob \| history
test/CodeGen/ARM/vrev.ll		patch \| blob \| history
test/CodeGen/ARM/vtrn.ll		patch \| blob \| history
test/CodeGen/ARM/vuzp.ll		patch \| blob \| history
test/CodeGen/ARM/vzip.ll		patch \| blob \| history