From 153162f4b6391efdcc005cff348db703b135c0ec Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 19 Apr 2016 12:56:46 +0000
Subject: [PATCH] [InstCombine][X86] Regenerate SSE combine tests as part of
 setup for D17490

Regenerated with utils/update_test_checks.py

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@266731 91177308-0d34-0410-b5e6-96231b3b80d8
---
 test/Transforms/InstCombine/x86-f16c.ll  |  37 ++--
 test/Transforms/InstCombine/x86-sse.ll   | 299 +++++++++++++++++--------------
 test/Transforms/InstCombine/x86-sse2.ll  | 249 +++++++++++++------------
 test/Transforms/InstCombine/x86-sse41.ll |  77 ++++----
 test/Transforms/InstCombine/x86-sse4a.ll | 228 +++++++++++++----------
 test/Transforms/InstCombine/x86-xop.ll   | 159 ++++++++--------
 6 files changed, 581 insertions(+), 468 deletions(-)
diff --git a/test/Transforms/InstCombine/x86-f16c.ll b/test/Transforms/InstCombine/x86-f16c.ll
index e10b339907e..6b5b6cb26ed 100644
--- a/test/Transforms/InstCombine/x86-f16c.ll
+++ b/test/Transforms/InstCombine/x86-f16c.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>)
@@ -9,9 +10,10 @@ declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>)
 
 ; Only bottom 4 elements required.
 define <4 x float> @demand_vcvtph2ps_128(<8 x i16> %A) {
-; CHECK-LABEL: @demand_vcvtph2ps_128
-; CHECK-NEXT: %1 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %A)
-; CHECK-NEXT: ret <4 x float> %1
+; CHECK-LABEL: @demand_vcvtph2ps_128(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %A)
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
   %1 = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   %2 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %1)
   ret <4 x float> %2
@@ -19,10 +21,11 @@ define <4 x float> @demand_vcvtph2ps_128(<8 x i16> %A) {
 
 ; All 8 elements required.
 define <8 x float> @demand_vcvtph2ps_256(<8 x i16> %A) {
-; CHECK-LABEL: @demand_vcvtph2ps_256
-; CHECK-NEXT: %1 = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: %2 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %1)
-; CHECK-NEXT: ret <8 x float> %2
+; CHECK-LABEL: @demand_vcvtph2ps_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> [[TMP1]])
+; CHECK-NEXT:    ret <8 x float> [[TMP2]]
+;
   %1 = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   %2 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %1)
   ret <8 x float> %2
@@ -33,29 +36,33 @@ define <8 x float> @demand_vcvtph2ps_256(<8 x i16> %A) {
 ;
 
 define <4 x float> @fold_vcvtph2ps_128() {
-; CHECK-LABEL: @fold_vcvtph2ps_128
-; CHECK-NEXT: ret <4 x float> <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float -0.000000e+00>
+; CHECK-LABEL: @fold_vcvtph2ps_128(
+; CHECK-NEXT:    ret <4 x float> <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float -0.000000e+00>
+;
   %1 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> <i16 0, i16 14336, i16 15360, i16 32768, i16 16384, i16 31743, i16 48128, i16 49152>)
   ret <4 x float> %1
 }
 
 define <8 x float> @fold_vcvtph2ps_256() {
-; CHECK-LABEL: @fold_vcvtph2ps_256
-; CHECK-NEXT: ret <8 x float> <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float -0.000000e+00, float 2.000000e+00, float 6.550400e+04, float -1.000000e+00, float -2.000000e+00>
+; CHECK-LABEL: @fold_vcvtph2ps_256(
+; CHECK-NEXT:    ret <8 x float> <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float -0.000000e+00, float 2.000000e+00, float 6.550400e+04, float -1.000000e+00, float -2.000000e+00>
+;
   %1 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> <i16 0, i16 14336, i16 15360, i16 32768, i16 16384, i16 31743, i16 48128, i16 49152>)
   ret <8 x float> %1
 }
 
 define <4 x float> @fold_vcvtph2ps_128_zero() {
-; CHECK-LABEL: @fold_vcvtph2ps_128_zero
-; CHECK-NEXT: ret <4 x float> zeroinitializer
+; CHECK-LABEL: @fold_vcvtph2ps_128_zero(
+; CHECK-NEXT:    ret <4 x float> zeroinitializer
+;
   %1 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
   ret <4 x float> %1
 }
 
 define <8 x float> @fold_vcvtph2ps_256_zero() {
-; CHECK-LABEL: @fold_vcvtph2ps_256_zero
-; CHECK-NEXT: ret <8 x float> zeroinitializer
+; CHECK-LABEL: @fold_vcvtph2ps_256_zero(
+; CHECK-NEXT:    ret <8 x float> zeroinitializer
+;
   %1 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
   ret <8 x float> %1
 }
diff --git a/test/Transforms/InstCombine/x86-sse.ll b/test/Transforms/InstCombine/x86-sse.ll
index c38aae914fa..08509dbd717 100644
--- a/test/Transforms/InstCombine/x86-sse.ll
+++ b/test/Transforms/InstCombine/x86-sse.ll
@@ -1,15 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 define float @test_rcp_ss_0(float %a) {
-; CHECK-LABEL: @test_rcp_ss_0
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-; CHECK-NEXT: %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
-; CHECK-NEXT: %6 = extractelement <4 x float> %5, i32 0
-; CHECK-NEXT: ret float %6
+; CHECK-LABEL: @test_rcp_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> [[TMP4]])
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[TMP5]], i32 0
+; CHECK-NEXT:    ret float [[TMP6]]
+;
   %1 = insertelement <4 x float> undef, float %a, i32 0
   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@@ -20,14 +22,15 @@ define float @test_rcp_ss_0(float %a) {
 }
 
 define float @test_sqrt_ss_0(float %a) {
-; CHECK-LABEL: @test_sqrt_ss_0
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-; CHECK-NEXT: %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4)
-; CHECK-NEXT: %6 = extractelement <4 x float> %5, i32 0
-; CHECK-NEXT: ret float %6
+; CHECK-LABEL: @test_sqrt_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> [[TMP4]])
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[TMP5]], i32 0
+; CHECK-NEXT:    ret float [[TMP6]]
+;
   %1 = insertelement <4 x float> undef, float %a, i32 0
   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@@ -38,14 +41,15 @@ define float @test_sqrt_ss_0(float %a) {
 }
 
 define float @test_rsqrt_ss_0(float %a) {
-; CHECK-LABEL: @test_rsqrt_ss_0
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-; CHECK-NEXT: %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
-; CHECK-NEXT: %6 = extractelement <4 x float> %5, i32 0
-; CHECK-NEXT: ret float %6
+; CHECK-LABEL: @test_rsqrt_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> [[TMP4]])
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[TMP5]], i32 0
+; CHECK-NEXT:    ret float [[TMP6]]
+;
   %1 = insertelement <4 x float> undef, float %a, i32 0
   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@@ -56,18 +60,19 @@ define float @test_rsqrt_ss_0(float %a) {
 }
 
 define float @test_add_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_add_ss_0
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-; CHECK-NEXT: %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-; CHECK-NEXT: %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-; CHECK-NEXT: %9 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %8)
-; CHECK-NEXT: %r = extractelement <4 x float> %9, i32 0
-; CHECK-NEXT: ret float %r
+; CHECK-LABEL: @test_add_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float 4.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float 5.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float 6.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP9:%.*]] = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> [[TMP4]], <4 x float> [[TMP8]])
+; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> [[TMP9]], i32 0
+; CHECK-NEXT:    ret float [[R]]
+;
   %1 = insertelement <4 x float> undef, float %a, i32 0
   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@@ -82,9 +87,10 @@ define float @test_add_ss_0(float %a, float %b) {
 }
 
 define float @test_sub_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_sub_ss_0
-; CHECK-NEXT: %1 = fsub float %a, %b
-; CHECK-NEXT: ret float %1
+; CHECK-LABEL: @test_sub_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub float %a, %b
+; CHECK-NEXT:    ret float [[TMP1]]
+;
   %1 = insertelement <4 x float> undef, float %a, i32 0
   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@@ -99,9 +105,10 @@ define float @test_sub_ss_0(float %a, float %b) {
 }
 
 define float @test_mul_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_mul_ss_0
-; CHECK-NEXT: %1 = fmul float %a, %b
-; CHECK-NEXT: ret float %1
+; CHECK-LABEL: @test_mul_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul float %a, %b
+; CHECK-NEXT:    ret float [[TMP1]]
+;
   %1 = insertelement <4 x float> undef, float %a, i32 0
   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@@ -116,18 +123,19 @@ define float @test_mul_ss_0(float %a, float %b) {
 }
 
 define float @test_div_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_div_ss_0
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-; CHECK-NEXT: %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-; CHECK-NEXT: %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-; CHECK-NEXT: %9 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %8)
-; CHECK-NEXT: %r = extractelement <4 x float> %9, i32 0
-; CHECK-NEXT: ret float %r
+; CHECK-LABEL: @test_div_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float 4.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float 5.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float 6.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP9:%.*]] = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> [[TMP4]], <4 x float> [[TMP8]])
+; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> [[TMP9]], i32 0
+; CHECK-NEXT:    ret float [[R]]
+;
   %1 = insertelement <4 x float> undef, float %a, i32 0
   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@@ -142,12 +150,13 @@ define float @test_div_ss_0(float %a, float %b) {
 }
 
 define float @test_min_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_min_ss_0
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %3 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %1, <4 x float> %2)
-; CHECK-NEXT: %4 = extractelement <4 x float> %3, i32 0
-; CHECK-NEXT: ret float %4
+; CHECK-LABEL: @test_min_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
+; CHECK-NEXT:    ret float [[TMP4]]
+;
   %1 = insertelement <4 x float> undef, float %a, i32 0
   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@@ -162,12 +171,13 @@ define float @test_min_ss_0(float %a, float %b) {
 }
 
 define float @test_max_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_max_ss_0
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %3 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %1, <4 x float> %2)
-; CHECK-NEXT: %4 = extractelement <4 x float> %3, i32 0
-; CHECK-NEXT: ret float %4
+; CHECK-LABEL: @test_max_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
+; CHECK-NEXT:    ret float [[TMP4]]
+;
   %1 = insertelement <4 x float> undef, float %a, i32 0
   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@@ -182,18 +192,19 @@ define float @test_max_ss_0(float %a, float %b) {
 }
 
 define float @test_cmp_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_cmp_ss_0
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-; CHECK-NEXT: %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-; CHECK-NEXT: %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-; CHECK-NEXT: %9 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %8, i8 0)
-; CHECK-NEXT: %r = extractelement <4 x float> %9, i32 0
-; CHECK-NEXT: ret float %r
+; CHECK-LABEL: @test_cmp_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float 4.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float 5.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float 6.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP9:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[TMP4]], <4 x float> [[TMP8]], i8 0)
+; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> [[TMP9]], i32 0
+; CHECK-NEXT:    ret float [[R]]
+;
   %1 = insertelement <4 x float> undef, float %a, i32 0
   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@@ -208,11 +219,12 @@ define float @test_cmp_ss_0(float %a, float %b) {
 }
 
 define i32 @test_comieq_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_comieq_ss_0
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> %1, <4 x float> %2)
-; CHECK-NEXT: ret i32 %3
+; CHECK-LABEL: @test_comieq_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
   %1 = insertelement <4 x float> undef, float %a, i32 0
   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@@ -226,11 +238,12 @@ define i32 @test_comieq_ss_0(float %a, float %b) {
 }
 
 define i32 @test_comige_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_comige_ss_0
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> %1, <4 x float> %2)
-; CHECK-NEXT: ret i32 %3
+; CHECK-LABEL: @test_comige_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
   %1 = insertelement <4 x float> undef, float %a, i32 0
   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@@ -244,11 +257,12 @@ define i32 @test_comige_ss_0(float %a, float %b) {
 }
 
 define i32 @test_comigt_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_comigt_ss_0
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> %1, <4 x float> %2)
-; CHECK-NEXT: ret i32 %3
+; CHECK-LABEL: @test_comigt_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
   %1 = insertelement <4 x float> undef, float %a, i32 0
   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@@ -262,11 +276,12 @@ define i32 @test_comigt_ss_0(float %a, float %b) {
 }
 
 define i32 @test_comile_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_comile_ss_0
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> %1, <4 x float> %2)
-; CHECK-NEXT: ret i32 %3
+; CHECK-LABEL: @test_comile_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
   %1 = insertelement <4 x float> undef, float %a, i32 0
   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@@ -280,11 +295,12 @@ define i32 @test_comile_ss_0(float %a, float %b) {
 }
 
 define i32 @test_comilt_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_comilt_ss_0
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> %1, <4 x float> %2)
-; CHECK-NEXT: ret i32 %3
+; CHECK-LABEL: @test_comilt_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
   %1 = insertelement <4 x float> undef, float %a, i32 0
   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@@ -298,11 +314,12 @@ define i32 @test_comilt_ss_0(float %a, float %b) {
 }
 
 define i32 @test_comineq_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_comineq_ss_0
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> %1, <4 x float> %2)
-; CHECK-NEXT: ret i32 %3
+; CHECK-LABEL: @test_comineq_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
   %1 = insertelement <4 x float> undef, float %a, i32 0
   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@@ -316,11 +333,12 @@ define i32 @test_comineq_ss_0(float %a, float %b) {
 }
 
 define i32 @test_ucomieq_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_ucomieq_ss_0
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %1, <4 x float> %2)
-; CHECK-NEXT: ret i32 %3
+; CHECK-LABEL: @test_ucomieq_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
   %1 = insertelement <4 x float> undef, float %a, i32 0
   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@@ -334,11 +352,12 @@ define i32 @test_ucomieq_ss_0(float %a, float %b) {
 }
 
 define i32 @test_ucomige_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_ucomige_ss_0
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %1, <4 x float> %2)
-; CHECK-NEXT: ret i32 %3
+; CHECK-LABEL: @test_ucomige_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
   %1 = insertelement <4 x float> undef, float %a, i32 0
   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@@ -352,11 +371,12 @@ define i32 @test_ucomige_ss_0(float %a, float %b) {
 }
 
 define i32 @test_ucomigt_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_ucomigt_ss_0
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %1, <4 x float> %2)
-; CHECK-NEXT: ret i32 %3
+; CHECK-LABEL: @test_ucomigt_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
   %1 = insertelement <4 x float> undef, float %a, i32 0
   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@@ -370,11 +390,12 @@ define i32 @test_ucomigt_ss_0(float %a, float %b) {
 }
 
 define i32 @test_ucomile_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_ucomile_ss_0
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %1, <4 x float> %2)
-; CHECK-NEXT: ret i32 %3
+; CHECK-LABEL: @test_ucomile_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
   %1 = insertelement <4 x float> undef, float %a, i32 0
   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@@ -388,11 +409,12 @@ define i32 @test_ucomile_ss_0(float %a, float %b) {
 }
 
 define i32 @test_ucomilt_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_ucomilt_ss_0
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %1, <4 x float> %2)
-; CHECK-NEXT: ret i32 %3
+; CHECK-LABEL: @test_ucomilt_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
   %1 = insertelement <4 x float> undef, float %a, i32 0
   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@@ -406,11 +428,12 @@ define i32 @test_ucomilt_ss_0(float %a, float %b) {
 }
 
 define i32 @test_ucomineq_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_ucomineq_ss_0
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %1, <4 x float> %2)
-; CHECK-NEXT: ret i32 %3
+; CHECK-LABEL: @test_ucomineq_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
   %1 = insertelement <4 x float> undef, float %a, i32 0
   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
diff --git a/test/Transforms/InstCombine/x86-sse2.ll b/test/Transforms/InstCombine/x86-sse2.ll
index 132239e05a7..3a4b49c4a23 100644
--- a/test/Transforms/InstCombine/x86-sse2.ll
+++ b/test/Transforms/InstCombine/x86-sse2.ll
@@ -1,13 +1,15 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 define double @test_sqrt_sd_0(double %a) {
-; CHECK-LABEL: @test_sqrt_sd_0
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-; CHECK-NEXT: %3 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2)
-; CHECK-NEXT: %4 = extractelement <2 x double> %3, i32 0
-; CHECK-NEXT: ret double %4
+; CHECK-LABEL: @test_sqrt_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
+; CHECK-NEXT:    ret double [[TMP4]]
+;
   %1 = insertelement <2 x double> undef, double %a, i32 0
   %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
   %3 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2)
@@ -16,108 +18,115 @@ define double @test_sqrt_sd_0(double %a) {
 }
 
 define double @test_add_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_add_sd_0
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-; CHECK-NEXT: %3 = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-; CHECK-NEXT: %5 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %2, <2 x double> %4)
-; CHECK-NEXT: %6 = extractelement <2 x double> %5, i32 0
-; CHECK-NEXT: ret double %6
+; CHECK-LABEL: @test_add_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 2.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> [[TMP2]], <2 x double> [[TMP4]])
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
+; CHECK-NEXT:    ret double [[TMP6]]
+;
   %1 = insertelement <2 x double> undef, double %a, i32 0
   %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
   %3 = insertelement <2 x double> undef, double %b, i32 0
   %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
   %5 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %2, <2 x double> %4)
   %6 = extractelement <2 x double> %5, i32 0
-  ret double %6 
+  ret double %6
 }
 
 define double @test_sub_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_sub_sd_0
-; CHECK-NEXT: %1 = fsub double %a, %b
-; CHECK-NEXT: ret double %1
+; CHECK-LABEL: @test_sub_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub double %a, %b
+; CHECK-NEXT:    ret double [[TMP1]]
+;
   %1 = insertelement <2 x double> undef, double %a, i32 0
   %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
   %3 = insertelement <2 x double> undef, double %b, i32 0
   %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
   %5 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %2, <2 x double> %4)
   %6 = extractelement <2 x double> %5, i32 0
-  ret double %6 
+  ret double %6
 }
 
 define double @test_mul_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_mul_sd_0
-; CHECK-NEXT: %1 = fmul double %a, %b
-; CHECK-NEXT: ret double %1
+; CHECK-LABEL: @test_mul_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul double %a, %b
+; CHECK-NEXT:    ret double [[TMP1]]
+;
   %1 = insertelement <2 x double> undef, double %a, i32 0
   %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
   %3 = insertelement <2 x double> undef, double %b, i32 0
   %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
   %5 = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %2, <2 x double> %4)
   %6 = extractelement <2 x double> %5, i32 0
-  ret double %6 
+  ret double %6
 }
 
 define double @test_div_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_div_sd_0
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-; CHECK-NEXT: %3 = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-; CHECK-NEXT: %5 = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %2, <2 x double> %4)
-; CHECK-NEXT: %6 = extractelement <2 x double> %5, i32 0
-; CHECK-NEXT: ret double %6
+; CHECK-LABEL: @test_div_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 2.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> [[TMP2]], <2 x double> [[TMP4]])
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
+; CHECK-NEXT:    ret double [[TMP6]]
+;
   %1 = insertelement <2 x double> undef, double %a, i32 0
   %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
   %3 = insertelement <2 x double> undef, double %b, i32 0
   %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
   %5 = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %2, <2 x double> %4)
   %6 = extractelement <2 x double> %5, i32 0
-  ret double %6 
+  ret double %6
 }
 
 define double @test_min_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_min_sd_0
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %3 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %1, <2 x double> %2)
-; CHECK-NEXT: %4 = extractelement <2 x double> %3, i32 0
-; CHECK-NEXT: ret double %4
+; CHECK-LABEL: @test_min_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
+; CHECK-NEXT:    ret double [[TMP4]]
+;
   %1 = insertelement <2 x double> undef, double %a, i32 0
   %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
   %3 = insertelement <2 x double> undef, double %b, i32 0
   %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
   %5 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %2, <2 x double> %4)
   %6 = extractelement <2 x double> %5, i32 0
-  ret double %6 
+  ret double %6
 }
 
 define double @test_max_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_max_sd_0
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %3 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %1, <2 x double> %2)
-; CHECK-NEXT: %4 = extractelement <2 x double> %3, i32 0
-; CHECK-NEXT: ret double %4
+; CHECK-LABEL: @test_max_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
+; CHECK-NEXT:    ret double [[TMP4]]
+;
   %1 = insertelement <2 x double> undef, double %a, i32 0
   %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
   %3 = insertelement <2 x double> undef, double %b, i32 0
   %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
   %5 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %2, <2 x double> %4)
   %6 = extractelement <2 x double> %5, i32 0
-  ret double %6 
+  ret double %6
 }
 
 define double @test_cmp_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_cmp_sd_0
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-; CHECK-NEXT: %3 = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-; CHECK-NEXT: %5 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %2, <2 x double> %4, i8 0)
-; CHECK-NEXT: %6 = extractelement <2 x double> %5, i32 0
-; CHECK-NEXT: ret double %6
+; CHECK-LABEL: @test_cmp_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 2.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[TMP2]], <2 x double> [[TMP4]], i8 0)
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
+; CHECK-NEXT:    ret double [[TMP6]]
+;
   %1 = insertelement <2 x double> undef, double %a, i32 0
   %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
   %3 = insertelement <2 x double> undef, double %b, i32 0
@@ -128,11 +137,12 @@ define double @test_cmp_sd_0(double %a, double %b) {
 }
 
 define i32 @test_comieq_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_comieq_sd_0
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %1, <2 x double> %2)
-; CHECK-NEXT: ret i32 %3
+; CHECK-LABEL: @test_comieq_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comieq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
   %1 = insertelement <2 x double> undef, double %a, i32 0
   %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
   %3 = insertelement <2 x double> undef, double %b, i32 0
@@ -142,11 +152,12 @@ define i32 @test_comieq_sd_0(double %a, double %b) {
 }
 
 define i32 @test_comige_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_comige_sd_0
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.comige.sd(<2 x double> %1, <2 x double> %2)
-; CHECK-NEXT: ret i32 %3
+; CHECK-LABEL: @test_comige_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comige.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
   %1 = insertelement <2 x double> undef, double %a, i32 0
   %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
   %3 = insertelement <2 x double> undef, double %b, i32 0
@@ -156,11 +167,12 @@ define i32 @test_comige_sd_0(double %a, double %b) {
 }
 
 define i32 @test_comigt_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_comigt_sd_0
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %1, <2 x double> %2)
-; CHECK-NEXT: ret i32 %3
+; CHECK-LABEL: @test_comigt_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comigt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
   %1 = insertelement <2 x double> undef, double %a, i32 0
   %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
   %3 = insertelement <2 x double> undef, double %b, i32 0
@@ -170,11 +182,12 @@ define i32 @test_comigt_sd_0(double %a, double %b) {
 }
 
 define i32 @test_comile_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_comile_sd_0
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.comile.sd(<2 x double> %1, <2 x double> %2)
-; CHECK-NEXT: ret i32 %3
+; CHECK-LABEL: @test_comile_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comile.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
   %1 = insertelement <2 x double> undef, double %a, i32 0
   %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
   %3 = insertelement <2 x double> undef, double %b, i32 0
@@ -184,11 +197,12 @@ define i32 @test_comile_sd_0(double %a, double %b) {
 }
 
 define i32 @test_comilt_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_comilt_sd_0
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %1, <2 x double> %2)
-; CHECK-NEXT: ret i32 %3
+; CHECK-LABEL: @test_comilt_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comilt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
   %1 = insertelement <2 x double> undef, double %a, i32 0
   %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
   %3 = insertelement <2 x double> undef, double %b, i32 0
@@ -198,11 +212,12 @@ define i32 @test_comilt_sd_0(double %a, double %b) {
 }
 
 define i32 @test_comineq_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_comineq_sd_0
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %1, <2 x double> %2)
-; CHECK-NEXT: ret i32 %3
+; CHECK-LABEL: @test_comineq_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comineq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
   %1 = insertelement <2 x double> undef, double %a, i32 0
   %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
   %3 = insertelement <2 x double> undef, double %b, i32 0
@@ -212,11 +227,12 @@ define i32 @test_comineq_sd_0(double %a, double %b) {
 }
 
 define i32 @test_ucomieq_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_ucomieq_sd_0
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %1, <2 x double> %2)
-; CHECK-NEXT: ret i32 %3
+; CHECK-LABEL: @test_ucomieq_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
   %1 = insertelement <2 x double> undef, double %a, i32 0
   %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
   %3 = insertelement <2 x double> undef, double %b, i32 0
@@ -226,11 +242,12 @@ define i32 @test_ucomieq_sd_0(double %a, double %b) {
 }
 
 define i32 @test_ucomige_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_ucomige_sd_0
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %1, <2 x double> %2)
-; CHECK-NEXT: ret i32 %3
+; CHECK-LABEL: @test_ucomige_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
   %1 = insertelement <2 x double> undef, double %a, i32 0
   %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
   %3 = insertelement <2 x double> undef, double %b, i32 0
@@ -240,11 +257,12 @@ define i32 @test_ucomige_sd_0(double %a, double %b) {
 }
 
 define i32 @test_ucomigt_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_ucomigt_sd_0
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %1, <2 x double> %2)
-; CHECK-NEXT: ret i32 %3
+; CHECK-LABEL: @test_ucomigt_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
   %1 = insertelement <2 x double> undef, double %a, i32 0
   %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
   %3 = insertelement <2 x double> undef, double %b, i32 0
@@ -254,11 +272,12 @@ define i32 @test_ucomigt_sd_0(double %a, double %b) {
 }
 
 define i32 @test_ucomile_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_ucomile_sd_0
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %1, <2 x double> %2)
-; CHECK-NEXT: ret i32 %3
+; CHECK-LABEL: @test_ucomile_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
   %1 = insertelement <2 x double> undef, double %a, i32 0
   %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
   %3 = insertelement <2 x double> undef, double %b, i32 0
@@ -268,11 +287,12 @@ define i32 @test_ucomile_sd_0(double %a, double %b) {
 }
 
 define i32 @test_ucomilt_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_ucomilt_sd_0
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %1, <2 x double> %2)
-; CHECK-NEXT: ret i32 %3
+; CHECK-LABEL: @test_ucomilt_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
   %1 = insertelement <2 x double> undef, double %a, i32 0
   %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
   %3 = insertelement <2 x double> undef, double %b, i32 0
@@ -282,11 +302,12 @@ define i32 @test_ucomilt_sd_0(double %a, double %b) {
 }
 
 define i32 @test_ucomineq_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_ucomineq_sd_0
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %1, <2 x double> %2)
-; CHECK-NEXT: ret i32 %3
+; CHECK-LABEL: @test_ucomineq_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
   %1 = insertelement <2 x double> undef, double %a, i32 0
   %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
   %3 = insertelement <2 x double> undef, double %b, i32 0
diff --git a/test/Transforms/InstCombine/x86-sse41.ll b/test/Transforms/InstCombine/x86-sse41.ll
index 20c147d7e53..c109410e854 100644
--- a/test/Transforms/InstCombine/x86-sse41.ll
+++ b/test/Transforms/InstCombine/x86-sse41.ll
@@ -1,12 +1,14 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 define <2 x double> @test_round_sd(<2 x double> %a, <2 x double> %b) {
-; CHECK-LABEL: @test_round_sd
-; CHECK-NEXT: %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
-; CHECK-NEXT: %3 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %1, <2 x double> %2, i32 10)
-; CHECK-NEXT: ret <2 x double> %3
+; CHECK-LABEL: @test_round_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> %a, double 1.000000e+00, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]], i32 10)
+; CHECK-NEXT:    ret <2 x double> [[TMP3]]
+;
   %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 0
   %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
   %3 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %1, <2 x double> %2, i32 10)
@@ -14,33 +16,35 @@ define <2 x double> @test_round_sd(<2 x double> %a, <2 x double> %b) {
 }
 
 define double @test_round_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_round_sd_0
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-; CHECK-NEXT: %3 = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT: %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-; CHECK-NEXT: %5 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %2, <2 x double> %4, i32 10)
-; CHECK-NEXT: %6 = extractelement <2 x double> %5, i32 0
-; CHECK-NEXT: ret double %6
+; CHECK-LABEL: @test_round_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 2.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> [[TMP2]], <2 x double> [[TMP4]], i32 10)
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
+; CHECK-NEXT:    ret double [[TMP6]]
+;
   %1 = insertelement <2 x double> undef, double %a, i32 0
   %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
   %3 = insertelement <2 x double> undef, double %b, i32 0
   %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
   %5 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %2, <2 x double> %4, i32 10)
   %6 = extractelement <2 x double> %5, i32 0
-  ret double %6 
+  ret double %6
 }
 
 define <4 x float> @test_round_ss(<4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: @test_round
-; CHECK-NEXT: %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
-; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-; CHECK-NEXT: %4 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
-; CHECK-NEXT: %5 = insertelement <4 x float> %4, float 2.000000e+00, i32 2
-; CHECK-NEXT: %6 = insertelement <4 x float> %5, float 3.000000e+00, i32 3
-; CHECK-NEXT: %7 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %3, <4 x float> %6, i32 10)
-; CHECK-NEXT: ret <4 x float> %7
+; CHECK-LABEL: @test_round_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> %a, float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP7:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> [[TMP3]], <4 x float> [[TMP6]], i32 10)
+; CHECK-NEXT:    ret <4 x float> [[TMP7]]
+;
   %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
   %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
   %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
@@ -52,18 +56,19 @@ define <4 x float> @test_round_ss(<4 x float> %a, <4 x float> %b) {
 }
 
 define float @test_round_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_round_ss_0
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT: %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-; CHECK-NEXT: %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-; CHECK-NEXT: %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-; CHECK-NEXT: %9 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %4, <4 x float> %8, i32 10)
-; CHECK-NEXT: %r = extractelement <4 x float> %9, i32 0
-; CHECK-NEXT: ret float %r
+; CHECK-LABEL: @test_round_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float 4.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float 5.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float 6.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP9:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> [[TMP4]], <4 x float> [[TMP8]], i32 10)
+; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> [[TMP9]], i32 0
+; CHECK-NEXT:    ret float [[R]]
+;
   %1 = insertelement <4 x float> undef, float %a, i32 0
   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@@ -78,4 +83,4 @@ define float @test_round_ss_0(float %a, float %b) {
 }
 
 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
-declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
\ No newline at end of file
+declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
diff --git a/test/Transforms/InstCombine/x86-sse4a.ll b/test/Transforms/InstCombine/x86-sse4a.ll
index 815d26bd225..53353abefb3 100644
--- a/test/Transforms/InstCombine/x86-sse4a.ll
+++ b/test/Transforms/InstCombine/x86-sse4a.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 ;
@@ -5,45 +6,51 @@
 ;
 
 define <2 x i64> @test_extrq_call(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_call
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @test_extrq_call(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) #1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
   ret <2 x i64> %1
 }
 
 define <2 x i64> @test_extrq_zero_arg0(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_zero_arg0
-; CHECK-NEXT: ret <2 x i64> <i64 0, i64 undef>
+; CHECK-LABEL: @test_extrq_zero_arg0(
+; CHECK-NEXT:    ret <2 x i64> <i64 0, i64 undef>
+;
   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> zeroinitializer, <16 x i8> %y) nounwind
   ret <2 x i64> %1
 }
 
 define <2 x i64> @test_extrq_zero_arg1(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_zero_arg1
-; CHECK-NEXT: ret <2 x i64> %x
+; CHECK-LABEL: @test_extrq_zero_arg1(
+; CHECK-NEXT:    ret <2 x i64> %x
+;
   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> zeroinitializer) nounwind
   ret <2 x i64> %1
 }
 
 define <2 x i64> @test_extrq_to_extqi(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_to_extqi
-; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 15)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @test_extrq_to_extqi(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 15)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
   ret <2 x i64> %1
 }
 
 define <2 x i64> @test_extrq_constant(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_constant
-; CHECK-NEXT: ret <2 x i64> <i64 255, i64 undef>
+; CHECK-LABEL: @test_extrq_constant(
+; CHECK-NEXT:    ret <2 x i64> <i64 255, i64 undef>
+;
   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 55>, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
   ret <2 x i64> %1
 }
 
 define <2 x i64> @test_extrq_constant_undef(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_constant_undef
-; CHECK-NEXT: ret <2 x i64> <i64 65535, i64 undef>
+; CHECK-LABEL: @test_extrq_constant_undef(
+; CHECK-NEXT:    ret <2 x i64> <i64 65535, i64 undef>
+;
   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 undef>, <16 x i8> <i8 16, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
   ret <2 x i64> %1
 }
@@ -53,57 +60,64 @@ define <2 x i64> @test_extrq_constant_undef(<2 x i64> %x, <16 x i8> %y) {
 ;
 
 define <2 x i64> @test_extrqi_call(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_call
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @test_extrqi_call(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23)
   ret <2 x i64> %1
 }
 
 define <2 x i64> @test_extrqi_shuffle_1zuu(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_shuffle_1zuu
-; CHECK-NEXT: %1 = bitcast <2 x i64> %x to <16 x i8>
-; CHECK-NEXT: %2 = shufflevector <16 x i8> %1, <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> %3
+; CHECK-LABEL: @test_extrqi_shuffle_1zuu(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> %x to <16 x i8>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
+;
   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 32, i8 32)
   ret <2 x i64> %1
 }
 
 define <2 x i64> @test_extrqi_shuffle_2zzzzzzzuuuuuuuu(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_shuffle_2zzzzzzzuuuuuuuu
-; CHECK-NEXT: %1 = bitcast <2 x i64> %x to <16 x i8>
-; CHECK-NEXT: %2 = shufflevector <16 x i8> %1, <16 x i8> <i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 2, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> %3
+; CHECK-LABEL: @test_extrqi_shuffle_2zzzzzzzuuuuuuuu(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> %x to <16 x i8>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> <i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 2, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
+;
   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 16)
   ret <2 x i64> %1
 }
 
 define <2 x i64> @test_extrqi_undef(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_undef
-; CHECK-NEXT: ret <2 x i64> undef
+; CHECK-LABEL: @test_extrqi_undef(
+; CHECK-NEXT:    ret <2 x i64> undef
+;
   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 32, i8 33)
   ret <2 x i64> %1
 }
 
 define <2 x i64> @test_extrqi_zero(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_zero
-; CHECK-NEXT: ret <2 x i64> <i64 0, i64 undef>
+; CHECK-LABEL: @test_extrqi_zero(
+; CHECK-NEXT:    ret <2 x i64> <i64 0, i64 undef>
+;
   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 3, i8 18)
   ret <2 x i64> %1
 }
 
 define <2 x i64> @test_extrqi_constant(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_constant
-; CHECK-NEXT: ret <2 x i64> <i64 7, i64 undef>
+; CHECK-LABEL: @test_extrqi_constant(
+; CHECK-NEXT:    ret <2 x i64> <i64 7, i64 undef>
+;
   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 55>, i8 3, i8 18)
   ret <2 x i64> %1
 }
 
 define <2 x i64> @test_extrqi_constant_undef(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_constant_undef
-; CHECK-NEXT: ret <2 x i64> <i64 15, i64 undef>
+; CHECK-LABEL: @test_extrqi_constant_undef(
+; CHECK-NEXT:    ret <2 x i64> <i64 15, i64 undef>
+;
   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 undef>, i8 4, i8 18)
   ret <2 x i64> %1
 }
@@ -113,31 +127,35 @@ define <2 x i64> @test_extrqi_constant_undef(<2 x i64> %x) {
 ;
 
 define <2 x i64> @test_insertq_call(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertq_call
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @test_insertq_call(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) #1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
   ret <2 x i64> %1
 }
 
 define <2 x i64> @test_insertq_to_insertqi(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertq_to_insertqi
-; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> <i64 8, i64 undef>, i8 18, i8 2)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @test_insertq_to_insertqi(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> <i64 8, i64 undef>, i8 18, i8 2)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> <i64 8, i64 658>) nounwind
   ret <2 x i64> %1
 }
 
 define <2 x i64> @test_insertq_constant(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertq_constant
-; CHECK-NEXT: ret <2 x i64> <i64 32, i64 undef>
+; CHECK-LABEL: @test_insertq_constant(
+; CHECK-NEXT:    ret <2 x i64> <i64 32, i64 undef>
+;
   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 8, i64 658>) nounwind
   ret <2 x i64> %1
 }
 
 define <2 x i64> @test_insertq_constant_undef(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertq_constant_undef
-; CHECK-NEXT: ret <2 x i64> <i64 33, i64 undef>
+; CHECK-LABEL: @test_insertq_constant_undef(
+; CHECK-NEXT:    ret <2 x i64> <i64 33, i64 undef>
+;
   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 1, i64 undef>, <2 x i64> <i64 8, i64 658>) nounwind
   ret <2 x i64> %1
 }
@@ -147,9 +165,10 @@ define <2 x i64> @test_insertq_constant_undef(<2 x i64> %x, <2 x i64> %y) {
 ;
 
 define <16 x i8> @test_insertqi_shuffle_04uu(<16 x i8> %v, <16 x i8> %i) {
-; CHECK-LABEL: @test_insertqi_shuffle_04uu
-; CHECK-NEXT: %1 = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: ret <16 x i8> %1
+; CHECK-LABEL: @test_insertqi_shuffle_04uu(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
   %1 = bitcast <16 x i8> %v to <2 x i64>
   %2 = bitcast <16 x i8> %i to <2 x i64>
   %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 32, i8 32)
@@ -158,9 +177,10 @@ define <16 x i8> @test_insertqi_shuffle_04uu(<16 x i8> %v, <16 x i8> %i) {
 }
 
 define <16 x i8> @test_insertqi_shuffle_8123uuuu(<16 x i8> %v, <16 x i8> %i) {
-; CHECK-LABEL: @test_insertqi_shuffle_8123uuuu
-; CHECK-NEXT: %1 = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: ret <16 x i8> %1
+; CHECK-LABEL: @test_insertqi_shuffle_8123uuuu(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
   %1 = bitcast <16 x i8> %v to <2 x i64>
   %2 = bitcast <16 x i8> %i to <2 x i64>
   %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 16, i8 0)
@@ -169,8 +189,9 @@ define <16 x i8> @test_insertqi_shuffle_8123uuuu(<16 x i8> %v, <16 x i8> %i) {
 }
 
 define <2 x i64> @test_insertqi_constant(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @test_insertqi_constant
-; CHECK-NEXT: ret <2 x i64> <i64 -131055, i64 undef>
+; CHECK-LABEL: @test_insertqi_constant(
+; CHECK-NEXT:    ret <2 x i64> <i64 -131055, i64 undef>
+;
   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 8, i64 0>, i8 16, i8 1)
   ret <2 x i64> %1
 }
@@ -179,36 +200,41 @@ define <2 x i64> @test_insertqi_constant(<2 x i64> %v, <2 x i64> %i) {
 ; the result are undefined, and we copy the bottom 64 bits from the
 ; second arg
 define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @testInsert64Bits
-; CHECK-NEXT: ret <2 x i64> %i
+; CHECK-LABEL: @testInsert64Bits(
+; CHECK-NEXT:    ret <2 x i64> %i
+;
   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0)
   ret <2 x i64> %1
 }
 
 define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @testZeroLength
-; CHECK-NEXT: ret <2 x i64> %i
+; CHECK-LABEL: @testZeroLength(
+; CHECK-NEXT:    ret <2 x i64> %i
+;
   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0)
   ret <2 x i64> %1
 }
 
 define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @testUndefinedInsertq_1
-; CHECK-NEXT: ret <2 x i64> undef
+; CHECK-LABEL: @testUndefinedInsertq_1(
+; CHECK-NEXT:    ret <2 x i64> undef
+;
   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16)
   ret <2 x i64> %1
 }
 
 define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @testUndefinedInsertq_2
-; CHECK-NEXT: ret <2 x i64> undef
+; CHECK-LABEL: @testUndefinedInsertq_2(
+; CHECK-NEXT:    ret <2 x i64> undef
+;
   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32)
   ret <2 x i64> %1
 }
 
 define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @testUndefinedInsertq_3
-; CHECK-NEXT: ret <2 x i64> undef
+; CHECK-LABEL: @testUndefinedInsertq_3(
+; CHECK-NEXT:    ret <2 x i64> undef
+;
   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16)
   ret <2 x i64> %1
 }
@@ -218,27 +244,30 @@ define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) {
 ;
 
 define <2 x i64> @test_extrq_arg0(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_arg0
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @test_extrq_arg0(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) #1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
   %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
   %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %y) nounwind
   ret <2 x i64> %2
 }
 
 define <2 x i64> @test_extrq_arg1(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_arg1
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @test_extrq_arg1(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) #1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
   %1 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %1) nounwind
   ret <2 x i64> %2
 }
 
 define <2 x i64> @test_extrq_args01(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_args01
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @test_extrq_args01(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) #1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
   %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
   %2 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   %3 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %2) nounwind
@@ -246,69 +275,77 @@ define <2 x i64> @test_extrq_args01(<2 x i64> %x, <16 x i8> %y) {
 }
 
 define <2 x i64> @test_extrq_ret(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_ret
-; CHECK-NEXT: ret <2 x i64> undef
+; CHECK-LABEL: @test_extrq_ret(
+; CHECK-NEXT:    ret <2 x i64> undef
+;
   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
   %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
   ret <2 x i64> %2
 }
 
 define <2 x i64> @test_extrqi_arg0(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_arg0
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @test_extrqi_arg0(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
   %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
   %2 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %1, i8 3, i8 2)
   ret <2 x i64> %2
 }
 
 define <2 x i64> @test_extrqi_ret(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_ret
-; CHECK-NEXT: ret <2 x i64> undef
+; CHECK-LABEL: @test_extrqi_ret(
+; CHECK-NEXT:    ret <2 x i64> undef
+;
   %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2) nounwind
   %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
   ret <2 x i64> %2
 }
 
 define <2 x i64> @test_insertq_arg0(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertq_arg0
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @test_insertq_arg0(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) #1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
   %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
   %2 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %1, <2 x i64> %y) nounwind
   ret <2 x i64> %2
 }
 
 define <2 x i64> @test_insertq_ret(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertq_ret
-; CHECK-NEXT: ret <2 x i64> undef
+; CHECK-LABEL: @test_insertq_ret(
+; CHECK-NEXT:    ret <2 x i64> undef
+;
   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
   %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
   ret <2 x i64> %2
 }
 
 define <2 x i64> @test_insertqi_arg0(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertqi_arg0
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @test_insertqi_arg0(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) #1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
   %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
   %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %y, i8 3, i8 2) nounwind
   ret <2 x i64> %2
 }
 
 define <2 x i64> @test_insertqi_arg1(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertqi_arg1
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @test_insertqi_arg1(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) #1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
   %1 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
   %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %1, i8 3, i8 2) nounwind
   ret <2 x i64> %2
 }
 
 define <2 x i64> @test_insertqi_args01(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertqi_args01
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @test_insertqi_args01(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) #1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
   %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
   %2 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
   %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 3, i8 2) nounwind
@@ -316,8 +353,9 @@ define <2 x i64> @test_insertqi_args01(<2 x i64> %x, <2 x i64> %y) {
 }
 
 define <2 x i64> @test_insertqi_ret(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertqi_ret
-; CHECK-NEXT: ret <2 x i64> undef
+; CHECK-LABEL: @test_insertqi_ret(
+; CHECK-NEXT:    ret <2 x i64> undef
+;
   %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) nounwind
   %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
   ret <2 x i64> %2
diff --git a/test/Transforms/InstCombine/x86-xop.ll b/test/Transforms/InstCombine/x86-xop.ll
index 345ac982226..90b0a841eed 100644
--- a/test/Transforms/InstCombine/x86-xop.ll
+++ b/test/Transforms/InstCombine/x86-xop.ll
@@ -1,12 +1,14 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define double @test_vfrcz_sd_0(double %a) {
-; CHECK-LABEL: @test_vfrcz_sd_0
-; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT: %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-; CHECK-NEXT: %3 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %2)
-; CHECK-NEXT: %4 = extractelement <2 x double> %3, i32 0
-; CHECK-NEXT: ret double %4
+; CHECK-LABEL: @test_vfrcz_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
+; CHECK-NEXT:    ret double [[TMP4]]
+;
   %1 = insertelement <2 x double> undef, double %a, i32 0
   %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
   %3 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %2)
@@ -15,14 +17,15 @@ define double @test_vfrcz_sd_0(double %a) {
 }
 
 define float @test_vfrcz_ss_0(float %a) {
-; CHECK-LABEL: @test_vfrcz_ss_0
-; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-; CHECK-NEXT: %5 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %4)
-; CHECK-NEXT: %6 = extractelement <4 x float> %5, i32 0
-; CHECK-NEXT: ret float %6
+; CHECK-LABEL: @test_vfrcz_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> [[TMP4]])
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[TMP5]], i32 0
+; CHECK-NEXT:    ret float [[TMP6]]
+;
   %1 = insertelement <4 x float> undef, float %a, i32 0
   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
@@ -33,137 +36,153 @@ define float @test_vfrcz_ss_0(float %a) {
 }
 
 define <2 x i64> @cmp_slt_v2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: @cmp_slt_v2i64
-; CHECK-NEXT: %1 = icmp slt <2 x i64> %a, %b
-; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> %2
+; CHECK-LABEL: @cmp_slt_v2i64(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i64> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
+;
   %1 = tail call <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64> %a, <2 x i64> %b)
   ret <2 x i64> %1
 }
 
 define <2 x i64> @cmp_ult_v2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: @cmp_ult_v2i64
-; CHECK-NEXT: %1 = icmp ult <2 x i64> %a, %b
-; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> %2
+; CHECK-LABEL: @cmp_ult_v2i64(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i64> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
+;
   %1 = tail call <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64> %a, <2 x i64> %b)
   ret <2 x i64> %1
 }
 
 define <2 x i64> @cmp_sle_v2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: @cmp_sle_v2i64
-; CHECK-NEXT: %1 = icmp sle <2 x i64> %a, %b
-; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> %2
+; CHECK-LABEL: @cmp_sle_v2i64(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sle <2 x i64> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
+;
   %1 = tail call <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64> %a, <2 x i64> %b)
   ret <2 x i64> %1
 }
 
 define <2 x i64> @cmp_ule_v2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: @cmp_ule_v2i64
-; CHECK-NEXT: %1 = icmp ule <2 x i64> %a, %b
-; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> %2
+; CHECK-LABEL: @cmp_ule_v2i64(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <2 x i64> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
+;
   %1 = tail call <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64> %a, <2 x i64> %b)
   ret <2 x i64> %1
 }
 
 define <4 x i32> @cmp_sgt_v4i32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: @cmp_sgt_v4i32
-; CHECK-NEXT: %1 = icmp sgt <4 x i32> %a, %b
-; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32>
-; CHECK-NEXT: ret <4 x i32> %2
+; CHECK-LABEL: @cmp_sgt_v4i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
   %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32> %a, <4 x i32> %b)
   ret <4 x i32> %1
 }
 
 define <4 x i32> @cmp_ugt_v4i32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: @cmp_ugt_v4i32
-; CHECK-NEXT: %1 = icmp ugt <4 x i32> %a, %b
-; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32>
-; CHECK-NEXT: ret <4 x i32> %2
+; CHECK-LABEL: @cmp_ugt_v4i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i32> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
   %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32> %a, <4 x i32> %b)
   ret <4 x i32> %1
 }
 
 define <4 x i32> @cmp_sge_v4i32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: @cmp_sge_v4i32
-; CHECK-NEXT: %1 = icmp sge <4 x i32> %a, %b
-; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32>
-; CHECK-NEXT: ret <4 x i32> %2
+; CHECK-LABEL: @cmp_sge_v4i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sge <4 x i32> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
   %1 = tail call <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32> %a, <4 x i32> %b)
   ret <4 x i32> %1
 }
 
 define <4 x i32> @cmp_uge_v4i32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: @cmp_uge_v4i32
-; CHECK-NEXT: %1 = icmp uge <4 x i32> %a, %b
-; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32>
-; CHECK-NEXT: ret <4 x i32> %2
+; CHECK-LABEL: @cmp_uge_v4i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge <4 x i32> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
   %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32> %a, <4 x i32> %b)
   ret <4 x i32> %1
 }
 
 define <8 x i16> @cmp_seq_v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: @cmp_seq_v8i16
-; CHECK-NEXT: %1 = icmp eq <8 x i16> %a, %b
-; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16>
-; CHECK-NEXT: ret <8 x i16> %2
+; CHECK-LABEL: @cmp_seq_v8i16(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i16> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
+; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+;
   %1 = tail call <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16> %a, <8 x i16> %b)
   ret <8 x i16> %1
 }
 
 define <8 x i16> @cmp_ueq_v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: @cmp_ueq_v8i16
-; CHECK-NEXT: %1 = icmp eq <8 x i16> %a, %b
-; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16>
-; CHECK-NEXT: ret <8 x i16> %2
+; CHECK-LABEL: @cmp_ueq_v8i16(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i16> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
+; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+;
   %1 = tail call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a, <8 x i16> %b)
   ret <8 x i16> %1
 }
 
 define <8 x i16> @cmp_sne_v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: @cmp_sne_v8i16
-; CHECK-NEXT: %1 = icmp ne <8 x i16> %a, %b
-; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16>
-; CHECK-NEXT: ret <8 x i16> %2
+; CHECK-LABEL: @cmp_sne_v8i16(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <8 x i16> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
+; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+;
   %1 = tail call <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16> %a, <8 x i16> %b)
   ret <8 x i16> %1
 }
 
 define <8 x i16> @cmp_une_v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: @cmp_une_v8i16
-; CHECK-NEXT: %1 = icmp ne <8 x i16> %a, %b
-; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16>
-; CHECK-NEXT: ret <8 x i16> %2
+; CHECK-LABEL: @cmp_une_v8i16(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <8 x i16> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
+; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+;
   %1 = tail call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a, <8 x i16> %b)
   ret <8 x i16> %1
 }
 
 define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: @cmp_strue_v16i8
-; CHECK-NEXT: ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-LABEL: @cmp_strue_v16i8(
+; CHECK-NEXT:    ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+;
   %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a, <16 x i8> %b)
   ret <16 x i8> %1
 }
 
 define <16 x i8> @cmp_utrue_v16i8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: @cmp_utrue_v16i8
-; CHECK-NEXT: ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-LABEL: @cmp_utrue_v16i8(
+; CHECK-NEXT:    ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+;
   %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a, <16 x i8> %b)
   ret <16 x i8> %1
 }
 
 define <16 x i8> @cmp_sfalse_v16i8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: @cmp_sfalse_v16i8
-; CHECK-NEXT: ret <16 x i8> zeroinitializer
+; CHECK-LABEL: @cmp_sfalse_v16i8(
+; CHECK-NEXT:    ret <16 x i8> zeroinitializer
+;
   %1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a, <16 x i8> %b)
   ret <16 x i8> %1
 }
 
 define <16 x i8> @cmp_ufalse_v16i8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: @cmp_ufalse_v16i8
-; CHECK-NEXT: ret <16 x i8> zeroinitializer
+; CHECK-LABEL: @cmp_ufalse_v16i8(
+; CHECK-NEXT:    ret <16 x i8> zeroinitializer
+;
   %1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a, <16 x i8> %b)
   ret <16 x i8> %1
 }
-- 
2.11.0