OSDN Git Service

Instcombile optimization: extractelement(cast) -> cast(extractelement)
authorNadav Rotem <nadav.rotem@intel.com>
Thu, 31 Mar 2011 22:57:29 +0000 (22:57 +0000)
committerNadav Rotem <nadav.rotem@intel.com>
Thu, 31 Mar 2011 22:57:29 +0000 (22:57 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@128683 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Transforms/InstCombine/InstCombineVectorOps.cpp
test/Transforms/InstCombine/ExtractCast.ll [new file with mode: 0644]

index 5caa12d..ad6a8d0 100644 (file)
@@ -230,8 +230,16 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
                                           ConstantInt::get(Int32Ty,
                                                            SrcIdx, false));
       }
+    } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
+      // Canonicalize extractelement(cast) -> cast(extractelement)
+      // bitcasts can change the number of vector elements and they cost nothing
+      if (CI->hasOneUse() && EI.hasOneUse() &&
+          (CI->getOpcode() != Instruction::BitCast)) {
+        Value *EE = Builder->CreateExtractElement(CI->getOperand(0),
+                                                  EI.getIndexOperand());
+        return CastInst::Create(CI->getOpcode(), EE, EI.getType());
+      }
     }
-    // FIXME: Canonicalize extractelement(bitcast) -> bitcast(extractelement)
   }
   return 0;
 }
diff --git a/test/Transforms/InstCombine/ExtractCast.ll b/test/Transforms/InstCombine/ExtractCast.ll
new file mode 100644 (file)
index 0000000..5ebbefd
--- /dev/null
@@ -0,0 +1,27 @@
+; RUN: opt < %s -instcombine -S -o - | FileCheck %s
+
+; CHECK: @a
+define i32 @a(<4 x i64> %I) {
+entry:
+; CHECK-NOT: trunc <4 x i64>
+        %J = trunc <4 x i64> %I to <4 x i32>
+        %K = extractelement <4 x i32> %J, i32 3
+; CHECK: extractelement <4 x i64>
+; CHECK: trunc i64
+; CHECK: ret
+        ret i32 %K
+}
+
+
+; CHECK: @b
+define i32 @b(<4 x float> %I) {
+entry:
+; CHECK-NOT: fptosi <4 x float>
+        %J = fptosi <4 x float> %I to <4 x i32>
+        %K = extractelement <4 x i32> %J, i32 3
+; CHECK: extractelement <4 x float>
+; CHECK: fptosi float
+; CHECK: ret
+        ret i32 %K
+}
+