From 153162f4b6391efdcc005cff348db703b135c0ec Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 19 Apr 2016 12:56:46 +0000 Subject: [PATCH] [InstCombine][X86] Regenerate SSE combine tests as part of setup for D17490 Regenerated with utils/update_test_checks.py git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@266731 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/Transforms/InstCombine/x86-f16c.ll | 37 ++-- test/Transforms/InstCombine/x86-sse.ll | 299 +++++++++++++++++-------------- test/Transforms/InstCombine/x86-sse2.ll | 249 +++++++++++++------------ test/Transforms/InstCombine/x86-sse41.ll | 77 ++++---- test/Transforms/InstCombine/x86-sse4a.ll | 228 +++++++++++++---------- test/Transforms/InstCombine/x86-xop.ll | 159 ++++++++-------- 6 files changed, 581 insertions(+), 468 deletions(-) diff --git a/test/Transforms/InstCombine/x86-f16c.ll b/test/Transforms/InstCombine/x86-f16c.ll index e10b339907e..6b5b6cb26ed 100644 --- a/test/Transforms/InstCombine/x86-f16c.ll +++ b/test/Transforms/InstCombine/x86-f16c.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>) @@ -9,9 +10,10 @@ declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) ; Only bottom 4 elements required. define <4 x float> @demand_vcvtph2ps_128(<8 x i16> %A) { -; CHECK-LABEL: @demand_vcvtph2ps_128 -; CHECK-NEXT: %1 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %A) -; CHECK-NEXT: ret <4 x float> %1 +; CHECK-LABEL: @demand_vcvtph2ps_128( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %A) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; %1 = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> %2 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %1) ret <4 x float> %2 @@ -19,10 +21,11 @@ define <4 x float> @demand_vcvtph2ps_128(<8 x i16> %A) { ; All 8 elements required. define <8 x float> @demand_vcvtph2ps_256(<8 x i16> %A) { -; CHECK-LABEL: @demand_vcvtph2ps_256 -; CHECK-NEXT: %1 = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> -; CHECK-NEXT: %2 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %1) -; CHECK-NEXT: ret <8 x float> %2 +; CHECK-LABEL: @demand_vcvtph2ps_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> [[TMP1]]) +; CHECK-NEXT: ret <8 x float> [[TMP2]] +; %1 = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> %2 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %1) ret <8 x float> %2 @@ -33,29 +36,33 @@ define <8 x float> @demand_vcvtph2ps_256(<8 x i16> %A) { ; define <4 x float> @fold_vcvtph2ps_128() { -; CHECK-LABEL: @fold_vcvtph2ps_128 -; CHECK-NEXT: ret <4 x float> +; CHECK-LABEL: @fold_vcvtph2ps_128( +; CHECK-NEXT: ret <4 x float> +; %1 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> ) ret <4 x float> %1 } define <8 x float> @fold_vcvtph2ps_256() { -; CHECK-LABEL: @fold_vcvtph2ps_256 -; CHECK-NEXT: ret <8 x float> +; CHECK-LABEL: @fold_vcvtph2ps_256( +; CHECK-NEXT: ret <8 x float> +; %1 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> ) ret <8 x float> %1 } define <4 x float> @fold_vcvtph2ps_128_zero() { -; CHECK-LABEL: @fold_vcvtph2ps_128_zero -; CHECK-NEXT: ret <4 x float> zeroinitializer +; CHECK-LABEL: @fold_vcvtph2ps_128_zero( +; CHECK-NEXT: ret <4 x float> zeroinitializer +; %1 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> ) ret <4 x float> %1 } define <8 x float> @fold_vcvtph2ps_256_zero() { -; CHECK-LABEL: @fold_vcvtph2ps_256_zero -; CHECK-NEXT: ret <8 x float> zeroinitializer +; CHECK-LABEL: @fold_vcvtph2ps_256_zero( +; CHECK-NEXT: ret <8 x float> zeroinitializer +; %1 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> ) ret <8 x float> %1 } diff --git a/test/Transforms/InstCombine/x86-sse.ll b/test/Transforms/InstCombine/x86-sse.ll index c38aae914fa..08509dbd717 100644 --- a/test/Transforms/InstCombine/x86-sse.ll +++ b/test/Transforms/InstCombine/x86-sse.ll @@ -1,15 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define float @test_rcp_ss_0(float %a) { -; CHECK-LABEL: @test_rcp_ss_0 -; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 -; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 -; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 -; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 -; CHECK-NEXT: %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4) -; CHECK-NEXT: %6 = extractelement <4 x float> %5, i32 0 -; CHECK-NEXT: ret float %6 +; CHECK-LABEL: @test_rcp_ss_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP5]], i32 0 +; CHECK-NEXT: ret float [[TMP6]] +; %1 = insertelement <4 x float> undef, float %a, i32 0 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 @@ -20,14 +22,15 @@ define float @test_rcp_ss_0(float %a) { } define float @test_sqrt_ss_0(float %a) { -; CHECK-LABEL: @test_sqrt_ss_0 -; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 -; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 -; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 -; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 -; CHECK-NEXT: %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4) -; CHECK-NEXT: %6 = extractelement <4 x float> %5, i32 0 -; CHECK-NEXT: ret float %6 +; CHECK-LABEL: @test_sqrt_ss_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP5]], i32 0 +; CHECK-NEXT: ret float [[TMP6]] +; %1 = insertelement <4 x float> undef, float %a, i32 0 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 @@ -38,14 +41,15 @@ define float @test_sqrt_ss_0(float %a) { } define float @test_rsqrt_ss_0(float %a) { -; CHECK-LABEL: @test_rsqrt_ss_0 -; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 -; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 -; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 -; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 -; CHECK-NEXT: %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4) -; CHECK-NEXT: %6 = extractelement <4 x float> %5, i32 0 -; CHECK-NEXT: ret float %6 +; CHECK-LABEL: @test_rsqrt_ss_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP5]], i32 0 +; CHECK-NEXT: ret float [[TMP6]] +; %1 = insertelement <4 x float> undef, float %a, i32 0 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 @@ -56,18 +60,19 @@ define float @test_rsqrt_ss_0(float %a) { } define float @test_add_ss_0(float %a, float %b) { -; CHECK-LABEL: @test_add_ss_0 -; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 -; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 -; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 -; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 -; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %b, i32 0 -; CHECK-NEXT: %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 -; CHECK-NEXT: %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 -; CHECK-NEXT: %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 -; CHECK-NEXT: %9 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %8) -; CHECK-NEXT: %r = extractelement <4 x float> %9, i32 0 -; CHECK-NEXT: ret float %r +; CHECK-LABEL: @test_add_ss_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float 4.000000e+00, i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float 5.000000e+00, i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float 6.000000e+00, i32 3 +; CHECK-NEXT: [[TMP9:%.*]] = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> [[TMP4]], <4 x float> [[TMP8]]) +; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP9]], i32 0 +; CHECK-NEXT: ret float [[R]] +; %1 = insertelement <4 x float> undef, float %a, i32 0 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 @@ -82,9 +87,10 @@ define float @test_add_ss_0(float %a, float %b) { } define float @test_sub_ss_0(float %a, float %b) { -; CHECK-LABEL: @test_sub_ss_0 -; CHECK-NEXT: %1 = fsub float %a, %b -; CHECK-NEXT: ret float %1 +; CHECK-LABEL: @test_sub_ss_0( +; CHECK-NEXT: [[TMP1:%.*]] = fsub float %a, %b +; CHECK-NEXT: ret float [[TMP1]] +; %1 = insertelement <4 x float> undef, float %a, i32 0 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 @@ -99,9 +105,10 @@ define float @test_sub_ss_0(float %a, float %b) { } define float @test_mul_ss_0(float %a, float %b) { -; CHECK-LABEL: @test_mul_ss_0 -; CHECK-NEXT: %1 = fmul float %a, %b -; CHECK-NEXT: ret float %1 +; CHECK-LABEL: @test_mul_ss_0( +; CHECK-NEXT: [[TMP1:%.*]] = fmul float %a, %b +; CHECK-NEXT: ret float [[TMP1]] +; %1 = insertelement <4 x float> undef, float %a, i32 0 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 @@ -116,18 +123,19 @@ define float @test_mul_ss_0(float %a, float %b) { } define float @test_div_ss_0(float %a, float %b) { -; CHECK-LABEL: @test_div_ss_0 -; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 -; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 -; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 -; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 -; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %b, i32 0 -; CHECK-NEXT: %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 -; CHECK-NEXT: %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 -; CHECK-NEXT: %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 -; CHECK-NEXT: %9 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %8) -; CHECK-NEXT: %r = extractelement <4 x float> %9, i32 0 -; CHECK-NEXT: ret float %r +; CHECK-LABEL: @test_div_ss_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float 4.000000e+00, i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float 5.000000e+00, i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float 6.000000e+00, i32 3 +; CHECK-NEXT: [[TMP9:%.*]] = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> [[TMP4]], <4 x float> [[TMP8]]) +; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP9]], i32 0 +; CHECK-NEXT: ret float [[R]] +; %1 = insertelement <4 x float> undef, float %a, i32 0 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 @@ -142,12 +150,13 @@ define float @test_div_ss_0(float %a, float %b) { } define float @test_min_ss_0(float %a, float %b) { -; CHECK-LABEL: @test_min_ss_0 -; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 -; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0 -; CHECK-NEXT: %3 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %1, <4 x float> %2) -; CHECK-NEXT: %4 = extractelement <4 x float> %3, i32 0 -; CHECK-NEXT: ret float %4 +; CHECK-LABEL: @test_min_ss_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0 +; CHECK-NEXT: ret float [[TMP4]] +; %1 = insertelement <4 x float> undef, float %a, i32 0 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 @@ -162,12 +171,13 @@ define float @test_min_ss_0(float %a, float %b) { } define float @test_max_ss_0(float %a, float %b) { -; CHECK-LABEL: @test_max_ss_0 -; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 -; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0 -; CHECK-NEXT: %3 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %1, <4 x float> %2) -; CHECK-NEXT: %4 = extractelement <4 x float> %3, i32 0 -; CHECK-NEXT: ret float %4 +; CHECK-LABEL: @test_max_ss_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0 +; CHECK-NEXT: ret float [[TMP4]] +; %1 = insertelement <4 x float> undef, float %a, i32 0 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 @@ -182,18 +192,19 @@ define float @test_max_ss_0(float %a, float %b) { } define float @test_cmp_ss_0(float %a, float %b) { -; CHECK-LABEL: @test_cmp_ss_0 -; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 -; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 -; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 -; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 -; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %b, i32 0 -; CHECK-NEXT: %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 -; CHECK-NEXT: %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 -; CHECK-NEXT: %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 -; CHECK-NEXT: %9 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %8, i8 0) -; CHECK-NEXT: %r = extractelement <4 x float> %9, i32 0 -; CHECK-NEXT: ret float %r +; CHECK-LABEL: @test_cmp_ss_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float 4.000000e+00, i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float 5.000000e+00, i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float 6.000000e+00, i32 3 +; CHECK-NEXT: [[TMP9:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[TMP4]], <4 x float> [[TMP8]], i8 0) +; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP9]], i32 0 +; CHECK-NEXT: ret float [[R]] +; %1 = insertelement <4 x float> undef, float %a, i32 0 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 @@ -208,11 +219,12 @@ define float @test_cmp_ss_0(float %a, float %b) { } define i32 @test_comieq_ss_0(float %a, float %b) { -; CHECK-LABEL: @test_comieq_ss_0 -; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 -; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0 -; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> %1, <4 x float> %2) -; CHECK-NEXT: ret i32 %3 +; CHECK-LABEL: @test_comieq_ss_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] +; %1 = insertelement <4 x float> undef, float %a, i32 0 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 @@ -226,11 +238,12 @@ define i32 @test_comieq_ss_0(float %a, float %b) { } define i32 @test_comige_ss_0(float %a, float %b) { -; CHECK-LABEL: @test_comige_ss_0 -; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 -; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0 -; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> %1, <4 x float> %2) -; CHECK-NEXT: ret i32 %3 +; CHECK-LABEL: @test_comige_ss_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] +; %1 = insertelement <4 x float> undef, float %a, i32 0 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 @@ -244,11 +257,12 @@ define i32 @test_comige_ss_0(float %a, float %b) { } define i32 @test_comigt_ss_0(float %a, float %b) { -; CHECK-LABEL: @test_comigt_ss_0 -; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 -; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0 -; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> %1, <4 x float> %2) -; CHECK-NEXT: ret i32 %3 +; CHECK-LABEL: @test_comigt_ss_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] +; %1 = insertelement <4 x float> undef, float %a, i32 0 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 @@ -262,11 +276,12 @@ define i32 @test_comigt_ss_0(float %a, float %b) { } define i32 @test_comile_ss_0(float %a, float %b) { -; CHECK-LABEL: @test_comile_ss_0 -; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 -; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0 -; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> %1, <4 x float> %2) -; CHECK-NEXT: ret i32 %3 +; CHECK-LABEL: @test_comile_ss_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] +; %1 = insertelement <4 x float> undef, float %a, i32 0 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 @@ -280,11 +295,12 @@ define i32 @test_comile_ss_0(float %a, float %b) { } define i32 @test_comilt_ss_0(float %a, float %b) { -; CHECK-LABEL: @test_comilt_ss_0 -; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 -; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0 -; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> %1, <4 x float> %2) -; CHECK-NEXT: ret i32 %3 +; CHECK-LABEL: @test_comilt_ss_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] +; %1 = insertelement <4 x float> undef, float %a, i32 0 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 @@ -298,11 +314,12 @@ define i32 @test_comilt_ss_0(float %a, float %b) { } define i32 @test_comineq_ss_0(float %a, float %b) { -; CHECK-LABEL: @test_comineq_ss_0 -; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 -; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0 -; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> %1, <4 x float> %2) -; CHECK-NEXT: ret i32 %3 +; CHECK-LABEL: @test_comineq_ss_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] +; %1 = insertelement <4 x float> undef, float %a, i32 0 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 @@ -316,11 +333,12 @@ define i32 @test_comineq_ss_0(float %a, float %b) { } define i32 @test_ucomieq_ss_0(float %a, float %b) { -; CHECK-LABEL: @test_ucomieq_ss_0 -; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 -; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0 -; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %1, <4 x float> %2) -; CHECK-NEXT: ret i32 %3 +; CHECK-LABEL: @test_ucomieq_ss_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] +; %1 = insertelement <4 x float> undef, float %a, i32 0 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 @@ -334,11 +352,12 @@ define i32 @test_ucomieq_ss_0(float %a, float %b) { } define i32 @test_ucomige_ss_0(float %a, float %b) { -; CHECK-LABEL: @test_ucomige_ss_0 -; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 -; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0 -; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %1, <4 x float> %2) -; CHECK-NEXT: ret i32 %3 +; CHECK-LABEL: @test_ucomige_ss_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] +; %1 = insertelement <4 x float> undef, float %a, i32 0 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 @@ -352,11 +371,12 @@ define i32 @test_ucomige_ss_0(float %a, float %b) { } define i32 @test_ucomigt_ss_0(float %a, float %b) { -; CHECK-LABEL: @test_ucomigt_ss_0 -; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 -; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0 -; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %1, <4 x float> %2) -; CHECK-NEXT: ret i32 %3 +; CHECK-LABEL: @test_ucomigt_ss_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] +; %1 = insertelement <4 x float> undef, float %a, i32 0 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 @@ -370,11 +390,12 @@ define i32 @test_ucomigt_ss_0(float %a, float %b) { } define i32 @test_ucomile_ss_0(float %a, float %b) { -; CHECK-LABEL: @test_ucomile_ss_0 -; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 -; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0 -; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %1, <4 x float> %2) -; CHECK-NEXT: ret i32 %3 +; CHECK-LABEL: @test_ucomile_ss_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] +; %1 = insertelement <4 x float> undef, float %a, i32 0 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 @@ -388,11 +409,12 @@ define i32 @test_ucomile_ss_0(float %a, float %b) { } define i32 @test_ucomilt_ss_0(float %a, float %b) { -; CHECK-LABEL: @test_ucomilt_ss_0 -; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 -; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0 -; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %1, <4 x float> %2) -; CHECK-NEXT: ret i32 %3 +; CHECK-LABEL: @test_ucomilt_ss_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] +; %1 = insertelement <4 x float> undef, float %a, i32 0 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 @@ -406,11 +428,12 @@ define i32 @test_ucomilt_ss_0(float %a, float %b) { } define i32 @test_ucomineq_ss_0(float %a, float %b) { -; CHECK-LABEL: @test_ucomineq_ss_0 -; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 -; CHECK-NEXT: %2 = insertelement <4 x float> undef, float %b, i32 0 -; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %1, <4 x float> %2) -; CHECK-NEXT: ret i32 %3 +; CHECK-LABEL: @test_ucomineq_ss_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] +; %1 = insertelement <4 x float> undef, float %a, i32 0 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 diff --git a/test/Transforms/InstCombine/x86-sse2.ll b/test/Transforms/InstCombine/x86-sse2.ll index 132239e05a7..3a4b49c4a23 100644 --- a/test/Transforms/InstCombine/x86-sse2.ll +++ b/test/Transforms/InstCombine/x86-sse2.ll @@ -1,13 +1,15 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define double @test_sqrt_sd_0(double %a) { -; CHECK-LABEL: @test_sqrt_sd_0 -; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 -; CHECK-NEXT: %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 -; CHECK-NEXT: %3 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2) -; CHECK-NEXT: %4 = extractelement <2 x double> %3, i32 0 -; CHECK-NEXT: ret double %4 +; CHECK-LABEL: @test_sqrt_sd_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0 +; CHECK-NEXT: ret double [[TMP4]] +; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 %3 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2) @@ -16,108 +18,115 @@ define double @test_sqrt_sd_0(double %a) { } define double @test_add_sd_0(double %a, double %b) { -; CHECK-LABEL: @test_add_sd_0 -; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 -; CHECK-NEXT: %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 -; CHECK-NEXT: %3 = insertelement <2 x double> undef, double %b, i32 0 -; CHECK-NEXT: %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 -; CHECK-NEXT: %5 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %2, <2 x double> %4) -; CHECK-NEXT: %6 = extractelement <2 x double> %5, i32 0 -; CHECK-NEXT: ret double %6 +; CHECK-LABEL: @test_add_sd_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 2.000000e+00, i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> [[TMP2]], <2 x double> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 +; CHECK-NEXT: ret double [[TMP6]] +; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 %3 = insertelement <2 x double> undef, double %b, i32 0 %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 %5 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %2, <2 x double> %4) %6 = extractelement <2 x double> %5, i32 0 - ret double %6 + ret double %6 } define double @test_sub_sd_0(double %a, double %b) { -; CHECK-LABEL: @test_sub_sd_0 -; CHECK-NEXT: %1 = fsub double %a, %b -; CHECK-NEXT: ret double %1 +; CHECK-LABEL: @test_sub_sd_0( +; CHECK-NEXT: [[TMP1:%.*]] = fsub double %a, %b +; CHECK-NEXT: ret double [[TMP1]] +; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 %3 = insertelement <2 x double> undef, double %b, i32 0 %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 %5 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %2, <2 x double> %4) %6 = extractelement <2 x double> %5, i32 0 - ret double %6 + ret double %6 } define double @test_mul_sd_0(double %a, double %b) { -; CHECK-LABEL: @test_mul_sd_0 -; CHECK-NEXT: %1 = fmul double %a, %b -; CHECK-NEXT: ret double %1 +; CHECK-LABEL: @test_mul_sd_0( +; CHECK-NEXT: [[TMP1:%.*]] = fmul double %a, %b +; CHECK-NEXT: ret double [[TMP1]] +; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 %3 = insertelement <2 x double> undef, double %b, i32 0 %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 %5 = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %2, <2 x double> %4) %6 = extractelement <2 x double> %5, i32 0 - ret double %6 + ret double %6 } define double @test_div_sd_0(double %a, double %b) { -; CHECK-LABEL: @test_div_sd_0 -; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 -; CHECK-NEXT: %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 -; CHECK-NEXT: %3 = insertelement <2 x double> undef, double %b, i32 0 -; CHECK-NEXT: %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 -; CHECK-NEXT: %5 = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %2, <2 x double> %4) -; CHECK-NEXT: %6 = extractelement <2 x double> %5, i32 0 -; CHECK-NEXT: ret double %6 +; CHECK-LABEL: @test_div_sd_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 2.000000e+00, i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> [[TMP2]], <2 x double> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 +; CHECK-NEXT: ret double [[TMP6]] +; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 %3 = insertelement <2 x double> undef, double %b, i32 0 %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 %5 = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %2, <2 x double> %4) %6 = extractelement <2 x double> %5, i32 0 - ret double %6 + ret double %6 } define double @test_min_sd_0(double %a, double %b) { -; CHECK-LABEL: @test_min_sd_0 -; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 -; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0 -; CHECK-NEXT: %3 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %1, <2 x double> %2) -; CHECK-NEXT: %4 = extractelement <2 x double> %3, i32 0 -; CHECK-NEXT: ret double %4 +; CHECK-LABEL: @test_min_sd_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0 +; CHECK-NEXT: ret double [[TMP4]] +; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 %3 = insertelement <2 x double> undef, double %b, i32 0 %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 %5 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %2, <2 x double> %4) %6 = extractelement <2 x double> %5, i32 0 - ret double %6 + ret double %6 } define double @test_max_sd_0(double %a, double %b) { -; CHECK-LABEL: @test_max_sd_0 -; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 -; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0 -; CHECK-NEXT: %3 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %1, <2 x double> %2) -; CHECK-NEXT: %4 = extractelement <2 x double> %3, i32 0 -; CHECK-NEXT: ret double %4 +; CHECK-LABEL: @test_max_sd_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0 +; CHECK-NEXT: ret double [[TMP4]] +; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 %3 = insertelement <2 x double> undef, double %b, i32 0 %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 %5 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %2, <2 x double> %4) %6 = extractelement <2 x double> %5, i32 0 - ret double %6 + ret double %6 } define double @test_cmp_sd_0(double %a, double %b) { -; CHECK-LABEL: @test_cmp_sd_0 -; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 -; CHECK-NEXT: %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 -; CHECK-NEXT: %3 = insertelement <2 x double> undef, double %b, i32 0 -; CHECK-NEXT: %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 -; CHECK-NEXT: %5 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %2, <2 x double> %4, i8 0) -; CHECK-NEXT: %6 = extractelement <2 x double> %5, i32 0 -; CHECK-NEXT: ret double %6 +; CHECK-LABEL: @test_cmp_sd_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 2.000000e+00, i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[TMP2]], <2 x double> [[TMP4]], i8 0) +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 +; CHECK-NEXT: ret double [[TMP6]] +; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 %3 = insertelement <2 x double> undef, double %b, i32 0 @@ -128,11 +137,12 @@ define double @test_cmp_sd_0(double %a, double %b) { } define i32 @test_comieq_sd_0(double %a, double %b) { -; CHECK-LABEL: @test_comieq_sd_0 -; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 -; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0 -; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %1, <2 x double> %2) -; CHECK-NEXT: ret i32 %3 +; CHECK-LABEL: @test_comieq_sd_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comieq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] +; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 %3 = insertelement <2 x double> undef, double %b, i32 0 @@ -142,11 +152,12 @@ define i32 @test_comieq_sd_0(double %a, double %b) { } define i32 @test_comige_sd_0(double %a, double %b) { -; CHECK-LABEL: @test_comige_sd_0 -; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 -; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0 -; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.comige.sd(<2 x double> %1, <2 x double> %2) -; CHECK-NEXT: ret i32 %3 +; CHECK-LABEL: @test_comige_sd_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comige.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] +; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 %3 = insertelement <2 x double> undef, double %b, i32 0 @@ -156,11 +167,12 @@ define i32 @test_comige_sd_0(double %a, double %b) { } define i32 @test_comigt_sd_0(double %a, double %b) { -; CHECK-LABEL: @test_comigt_sd_0 -; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 -; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0 -; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %1, <2 x double> %2) -; CHECK-NEXT: ret i32 %3 +; CHECK-LABEL: @test_comigt_sd_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comigt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] +; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 %3 = insertelement <2 x double> undef, double %b, i32 0 @@ -170,11 +182,12 @@ define i32 @test_comigt_sd_0(double %a, double %b) { } define i32 @test_comile_sd_0(double %a, double %b) { -; CHECK-LABEL: @test_comile_sd_0 -; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 -; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0 -; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.comile.sd(<2 x double> %1, <2 x double> %2) -; CHECK-NEXT: ret i32 %3 +; CHECK-LABEL: @test_comile_sd_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comile.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] +; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 %3 = insertelement <2 x double> undef, double %b, i32 0 @@ -184,11 +197,12 @@ define i32 @test_comile_sd_0(double %a, double %b) { } define i32 @test_comilt_sd_0(double %a, double %b) { -; CHECK-LABEL: @test_comilt_sd_0 -; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 -; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0 -; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %1, <2 x double> %2) -; CHECK-NEXT: ret i32 %3 +; CHECK-LABEL: @test_comilt_sd_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comilt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] +; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 %3 = insertelement <2 x double> undef, double %b, i32 0 @@ -198,11 +212,12 @@ define i32 @test_comilt_sd_0(double %a, double %b) { } define i32 @test_comineq_sd_0(double %a, double %b) { -; CHECK-LABEL: @test_comineq_sd_0 -; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 -; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0 -; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %1, <2 x double> %2) -; CHECK-NEXT: ret i32 %3 +; CHECK-LABEL: @test_comineq_sd_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comineq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] +; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 %3 = insertelement <2 x double> undef, double %b, i32 0 @@ -212,11 +227,12 @@ define i32 @test_comineq_sd_0(double %a, double %b) { } define i32 @test_ucomieq_sd_0(double %a, double %b) { -; CHECK-LABEL: @test_ucomieq_sd_0 -; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 -; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0 -; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %1, <2 x double> %2) -; CHECK-NEXT: ret i32 %3 +; CHECK-LABEL: @test_ucomieq_sd_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] +; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 %3 = insertelement <2 x double> undef, double %b, i32 0 @@ -226,11 +242,12 @@ define i32 @test_ucomieq_sd_0(double %a, double %b) { } define i32 @test_ucomige_sd_0(double %a, double %b) { -; CHECK-LABEL: @test_ucomige_sd_0 -; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 -; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0 -; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %1, <2 x double> %2) -; CHECK-NEXT: ret i32 %3 +; CHECK-LABEL: @test_ucomige_sd_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] +; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 %3 = insertelement <2 x double> undef, double %b, i32 0 @@ -240,11 +257,12 @@ define i32 @test_ucomige_sd_0(double %a, double %b) { } define i32 @test_ucomigt_sd_0(double %a, double %b) { -; CHECK-LABEL: @test_ucomigt_sd_0 -; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 -; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0 -; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %1, <2 x double> %2) -; CHECK-NEXT: ret i32 %3 +; CHECK-LABEL: @test_ucomigt_sd_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] +; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 %3 = insertelement <2 x double> undef, double %b, i32 0 @@ -254,11 +272,12 @@ define i32 @test_ucomigt_sd_0(double %a, double %b) { } define i32 @test_ucomile_sd_0(double %a, double %b) { -; CHECK-LABEL: @test_ucomile_sd_0 -; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 -; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0 -; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %1, <2 x double> %2) -; CHECK-NEXT: ret i32 %3 +; CHECK-LABEL: @test_ucomile_sd_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] +; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 %3 = insertelement <2 x double> undef, double %b, i32 0 @@ -268,11 +287,12 @@ define i32 @test_ucomile_sd_0(double %a, double %b) { } define i32 @test_ucomilt_sd_0(double %a, double %b) { -; CHECK-LABEL: @test_ucomilt_sd_0 -; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 -; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0 -; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %1, <2 x double> %2) -; CHECK-NEXT: ret i32 %3 +; CHECK-LABEL: @test_ucomilt_sd_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] +; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 %3 = insertelement <2 x double> undef, double %b, i32 0 @@ -282,11 +302,12 @@ define i32 @test_ucomilt_sd_0(double %a, double %b) { } define i32 @test_ucomineq_sd_0(double %a, double %b) { -; CHECK-LABEL: @test_ucomineq_sd_0 -; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 -; CHECK-NEXT: %2 = insertelement <2 x double> undef, double %b, i32 0 -; CHECK-NEXT: %3 = tail call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %1, <2 x double> %2) -; CHECK-NEXT: ret i32 %3 +; CHECK-LABEL: @test_ucomineq_sd_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP3]] +; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 %3 = insertelement <2 x double> undef, double %b, i32 0 diff --git a/test/Transforms/InstCombine/x86-sse41.ll b/test/Transforms/InstCombine/x86-sse41.ll index 20c147d7e53..c109410e854 100644 --- a/test/Transforms/InstCombine/x86-sse41.ll +++ b/test/Transforms/InstCombine/x86-sse41.ll @@ -1,12 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define <2 x double> @test_round_sd(<2 x double> %a, <2 x double> %b) { -; CHECK-LABEL: @test_round_sd -; CHECK-NEXT: %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 0 -; CHECK-NEXT: %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 -; CHECK-NEXT: %3 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %1, <2 x double> %2, i32 10) -; CHECK-NEXT: ret <2 x double> %3 +; CHECK-LABEL: @test_round_sd( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> %a, double 1.000000e+00, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> %b, double 2.000000e+00, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]], i32 10) +; CHECK-NEXT: ret <2 x double> [[TMP3]] +; %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 0 %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1 %3 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %1, <2 x double> %2, i32 10) @@ -14,33 +16,35 @@ define <2 x double> @test_round_sd(<2 x double> %a, <2 x double> %b) { } define double @test_round_sd_0(double %a, double %b) { -; CHECK-LABEL: @test_round_sd_0 -; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 -; CHECK-NEXT: %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 -; CHECK-NEXT: %3 = insertelement <2 x double> undef, double %b, i32 0 -; CHECK-NEXT: %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 -; CHECK-NEXT: %5 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %2, <2 x double> %4, i32 10) -; CHECK-NEXT: %6 = extractelement <2 x double> %5, i32 0 -; CHECK-NEXT: ret double %6 +; CHECK-LABEL: @test_round_sd_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double %b, i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double 2.000000e+00, i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> [[TMP2]], <2 x double> [[TMP4]], i32 10) +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 +; CHECK-NEXT: ret double [[TMP6]] +; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 %3 = insertelement <2 x double> undef, double %b, i32 0 %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1 %5 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %2, <2 x double> %4, i32 10) %6 = extractelement <2 x double> %5, i32 0 - ret double %6 + ret double %6 } define <4 x float> @test_round_ss(<4 x float> %a, <4 x float> %b) { -; CHECK-LABEL: @test_round -; CHECK-NEXT: %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1 -; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 -; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 -; CHECK-NEXT: %4 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 -; CHECK-NEXT: %5 = insertelement <4 x float> %4, float 2.000000e+00, i32 2 -; CHECK-NEXT: %6 = insertelement <4 x float> %5, float 3.000000e+00, i32 3 -; CHECK-NEXT: %7 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %3, <4 x float> %6, i32 10) -; CHECK-NEXT: ret <4 x float> %7 +; CHECK-LABEL: @test_round_ss( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> %a, float 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 2.000000e+00, i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 3.000000e+00, i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> %b, float 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float 2.000000e+00, i32 2 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float 3.000000e+00, i32 3 +; CHECK-NEXT: [[TMP7:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> [[TMP3]], <4 x float> [[TMP6]], i32 10) +; CHECK-NEXT: ret <4 x float> [[TMP7]] +; %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 @@ -52,18 +56,19 @@ define <4 x float> @test_round_ss(<4 x float> %a, <4 x float> %b) { } define float @test_round_ss_0(float %a, float %b) { -; CHECK-LABEL: @test_round_ss_0 -; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 -; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 -; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 -; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 -; CHECK-NEXT: %5 = insertelement <4 x float> undef, float %b, i32 0 -; CHECK-NEXT: %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 -; CHECK-NEXT: %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 -; CHECK-NEXT: %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 -; CHECK-NEXT: %9 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %4, <4 x float> %8, i32 10) -; CHECK-NEXT: %r = extractelement <4 x float> %9, i32 0 -; CHECK-NEXT: ret float %r +; CHECK-LABEL: @test_round_ss_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> undef, float %b, i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float 4.000000e+00, i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float 5.000000e+00, i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float 6.000000e+00, i32 3 +; CHECK-NEXT: [[TMP9:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> [[TMP4]], <4 x float> [[TMP8]], i32 10) +; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP9]], i32 0 +; CHECK-NEXT: ret float [[R]] +; %1 = insertelement <4 x float> undef, float %a, i32 0 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 @@ -78,4 +83,4 @@ define float @test_round_ss_0(float %a, float %b) { } declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone -declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone \ No newline at end of file +declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone diff --git a/test/Transforms/InstCombine/x86-sse4a.ll b/test/Transforms/InstCombine/x86-sse4a.ll index 815d26bd225..53353abefb3 100644 --- a/test/Transforms/InstCombine/x86-sse4a.ll +++ b/test/Transforms/InstCombine/x86-sse4a.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s ; @@ -5,45 +6,51 @@ ; define <2 x i64> @test_extrq_call(<2 x i64> %x, <16 x i8> %y) { -; CHECK-LABEL: @test_extrq_call -; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) -; CHECK-NEXT: ret <2 x i64> %1 +; CHECK-LABEL: @test_extrq_call( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) #1 +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind ret <2 x i64> %1 } define <2 x i64> @test_extrq_zero_arg0(<2 x i64> %x, <16 x i8> %y) { -; CHECK-LABEL: @test_extrq_zero_arg0 -; CHECK-NEXT: ret <2 x i64> +; CHECK-LABEL: @test_extrq_zero_arg0( +; CHECK-NEXT: ret <2 x i64> +; %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> zeroinitializer, <16 x i8> %y) nounwind ret <2 x i64> %1 } define <2 x i64> @test_extrq_zero_arg1(<2 x i64> %x, <16 x i8> %y) { -; CHECK-LABEL: @test_extrq_zero_arg1 -; CHECK-NEXT: ret <2 x i64> %x +; CHECK-LABEL: @test_extrq_zero_arg1( +; CHECK-NEXT: ret <2 x i64> %x +; %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> zeroinitializer) nounwind ret <2 x i64> %1 } define <2 x i64> @test_extrq_to_extqi(<2 x i64> %x, <16 x i8> %y) { -; CHECK-LABEL: @test_extrq_to_extqi -; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 15) -; CHECK-NEXT: ret <2 x i64> %1 +; CHECK-LABEL: @test_extrq_to_extqi( +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 15) +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> ) nounwind ret <2 x i64> %1 } define <2 x i64> @test_extrq_constant(<2 x i64> %x, <16 x i8> %y) { -; CHECK-LABEL: @test_extrq_constant -; CHECK-NEXT: ret <2 x i64> +; CHECK-LABEL: @test_extrq_constant( +; CHECK-NEXT: ret <2 x i64> +; %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> , <16 x i8> ) nounwind ret <2 x i64> %1 } define <2 x i64> @test_extrq_constant_undef(<2 x i64> %x, <16 x i8> %y) { -; CHECK-LABEL: @test_extrq_constant_undef -; CHECK-NEXT: ret <2 x i64> +; CHECK-LABEL: @test_extrq_constant_undef( +; CHECK-NEXT: ret <2 x i64> +; %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> , <16 x i8> ) nounwind ret <2 x i64> %1 } @@ -53,57 +60,64 @@ define <2 x i64> @test_extrq_constant_undef(<2 x i64> %x, <16 x i8> %y) { ; define <2 x i64> @test_extrqi_call(<2 x i64> %x) { -; CHECK-LABEL: @test_extrqi_call -; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23) -; CHECK-NEXT: ret <2 x i64> %1 +; CHECK-LABEL: @test_extrqi_call( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23) +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23) ret <2 x i64> %1 } define <2 x i64> @test_extrqi_shuffle_1zuu(<2 x i64> %x) { -; CHECK-LABEL: @test_extrqi_shuffle_1zuu -; CHECK-NEXT: %1 = bitcast <2 x i64> %x to <16 x i8> -; CHECK-NEXT: %2 = shufflevector <16 x i8> %1, <16 x i8> , <16 x i32> -; CHECK-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> -; CHECK-NEXT: ret <2 x i64> %3 +; CHECK-LABEL: @test_extrqi_shuffle_1zuu( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> %x to <16 x i8> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> , <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 32, i8 32) ret <2 x i64> %1 } define <2 x i64> @test_extrqi_shuffle_2zzzzzzzuuuuuuuu(<2 x i64> %x) { -; CHECK-LABEL: @test_extrqi_shuffle_2zzzzzzzuuuuuuuu -; CHECK-NEXT: %1 = bitcast <2 x i64> %x to <16 x i8> -; CHECK-NEXT: %2 = shufflevector <16 x i8> %1, <16 x i8> , <16 x i32> -; CHECK-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> -; CHECK-NEXT: ret <2 x i64> %3 +; CHECK-LABEL: @test_extrqi_shuffle_2zzzzzzzuuuuuuuu( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> %x to <16 x i8> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> , <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP3]] +; %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 16) ret <2 x i64> %1 } define <2 x i64> @test_extrqi_undef(<2 x i64> %x) { -; CHECK-LABEL: @test_extrqi_undef -; CHECK-NEXT: ret <2 x i64> undef +; CHECK-LABEL: @test_extrqi_undef( +; CHECK-NEXT: ret <2 x i64> undef +; %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 32, i8 33) ret <2 x i64> %1 } define <2 x i64> @test_extrqi_zero(<2 x i64> %x) { -; CHECK-LABEL: @test_extrqi_zero -; CHECK-NEXT: ret <2 x i64> +; CHECK-LABEL: @test_extrqi_zero( +; CHECK-NEXT: ret <2 x i64> +; %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 3, i8 18) ret <2 x i64> %1 } define <2 x i64> @test_extrqi_constant(<2 x i64> %x) { -; CHECK-LABEL: @test_extrqi_constant -; CHECK-NEXT: ret <2 x i64> +; CHECK-LABEL: @test_extrqi_constant( +; CHECK-NEXT: ret <2 x i64> +; %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> , i8 3, i8 18) ret <2 x i64> %1 } define <2 x i64> @test_extrqi_constant_undef(<2 x i64> %x) { -; CHECK-LABEL: @test_extrqi_constant_undef -; CHECK-NEXT: ret <2 x i64> +; CHECK-LABEL: @test_extrqi_constant_undef( +; CHECK-NEXT: ret <2 x i64> +; %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> , i8 4, i8 18) ret <2 x i64> %1 } @@ -113,31 +127,35 @@ define <2 x i64> @test_extrqi_constant_undef(<2 x i64> %x) { ; define <2 x i64> @test_insertq_call(<2 x i64> %x, <2 x i64> %y) { -; CHECK-LABEL: @test_insertq_call -; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) -; CHECK-NEXT: ret <2 x i64> %1 +; CHECK-LABEL: @test_insertq_call( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) #1 +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind ret <2 x i64> %1 } define <2 x i64> @test_insertq_to_insertqi(<2 x i64> %x, <2 x i64> %y) { -; CHECK-LABEL: @test_insertq_to_insertqi -; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> , i8 18, i8 2) -; CHECK-NEXT: ret <2 x i64> %1 +; CHECK-LABEL: @test_insertq_to_insertqi( +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> , i8 18, i8 2) +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> ) nounwind ret <2 x i64> %1 } define <2 x i64> @test_insertq_constant(<2 x i64> %x, <2 x i64> %y) { -; CHECK-LABEL: @test_insertq_constant -; CHECK-NEXT: ret <2 x i64> +; CHECK-LABEL: @test_insertq_constant( +; CHECK-NEXT: ret <2 x i64> +; %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> , <2 x i64> ) nounwind ret <2 x i64> %1 } define <2 x i64> @test_insertq_constant_undef(<2 x i64> %x, <2 x i64> %y) { -; CHECK-LABEL: @test_insertq_constant_undef -; CHECK-NEXT: ret <2 x i64> +; CHECK-LABEL: @test_insertq_constant_undef( +; CHECK-NEXT: ret <2 x i64> +; %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> , <2 x i64> ) nounwind ret <2 x i64> %1 } @@ -147,9 +165,10 @@ define <2 x i64> @test_insertq_constant_undef(<2 x i64> %x, <2 x i64> %y) { ; define <16 x i8> @test_insertqi_shuffle_04uu(<16 x i8> %v, <16 x i8> %i) { -; CHECK-LABEL: @test_insertqi_shuffle_04uu -; CHECK-NEXT: %1 = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> -; CHECK-NEXT: ret <16 x i8> %1 +; CHECK-LABEL: @test_insertqi_shuffle_04uu( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[TMP1]] +; %1 = bitcast <16 x i8> %v to <2 x i64> %2 = bitcast <16 x i8> %i to <2 x i64> %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 32, i8 32) @@ -158,9 +177,10 @@ define <16 x i8> @test_insertqi_shuffle_04uu(<16 x i8> %v, <16 x i8> %i) { } define <16 x i8> @test_insertqi_shuffle_8123uuuu(<16 x i8> %v, <16 x i8> %i) { -; CHECK-LABEL: @test_insertqi_shuffle_8123uuuu -; CHECK-NEXT: %1 = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> -; CHECK-NEXT: ret <16 x i8> %1 +; CHECK-LABEL: @test_insertqi_shuffle_8123uuuu( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[TMP1]] +; %1 = bitcast <16 x i8> %v to <2 x i64> %2 = bitcast <16 x i8> %i to <2 x i64> %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 16, i8 0) @@ -169,8 +189,9 @@ define <16 x i8> @test_insertqi_shuffle_8123uuuu(<16 x i8> %v, <16 x i8> %i) { } define <2 x i64> @test_insertqi_constant(<2 x i64> %v, <2 x i64> %i) { -; CHECK-LABEL: @test_insertqi_constant -; CHECK-NEXT: ret <2 x i64> +; CHECK-LABEL: @test_insertqi_constant( +; CHECK-NEXT: ret <2 x i64> +; %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> , <2 x i64> , i8 16, i8 1) ret <2 x i64> %1 } @@ -179,36 +200,41 @@ define <2 x i64> @test_insertqi_constant(<2 x i64> %v, <2 x i64> %i) { ; the result are undefined, and we copy the bottom 64 bits from the ; second arg define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) { -; CHECK-LABEL: @testInsert64Bits -; CHECK-NEXT: ret <2 x i64> %i +; CHECK-LABEL: @testInsert64Bits( +; CHECK-NEXT: ret <2 x i64> %i +; %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0) ret <2 x i64> %1 } define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) { -; CHECK-LABEL: @testZeroLength -; CHECK-NEXT: ret <2 x i64> %i +; CHECK-LABEL: @testZeroLength( +; CHECK-NEXT: ret <2 x i64> %i +; %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0) ret <2 x i64> %1 } define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) { -; CHECK-LABEL: @testUndefinedInsertq_1 -; CHECK-NEXT: ret <2 x i64> undef +; CHECK-LABEL: @testUndefinedInsertq_1( +; CHECK-NEXT: ret <2 x i64> undef +; %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16) ret <2 x i64> %1 } define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) { -; CHECK-LABEL: @testUndefinedInsertq_2 -; CHECK-NEXT: ret <2 x i64> undef +; CHECK-LABEL: @testUndefinedInsertq_2( +; CHECK-NEXT: ret <2 x i64> undef +; %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32) ret <2 x i64> %1 } define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) { -; CHECK-LABEL: @testUndefinedInsertq_3 -; CHECK-NEXT: ret <2 x i64> undef +; CHECK-LABEL: @testUndefinedInsertq_3( +; CHECK-NEXT: ret <2 x i64> undef +; %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16) ret <2 x i64> %1 } @@ -218,27 +244,30 @@ define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) { ; define <2 x i64> @test_extrq_arg0(<2 x i64> %x, <16 x i8> %y) { -; CHECK-LABEL: @test_extrq_arg0 -; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) -; CHECK-NEXT: ret <2 x i64> %1 +; CHECK-LABEL: @test_extrq_arg0( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) #1 +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %y) nounwind ret <2 x i64> %2 } define <2 x i64> @test_extrq_arg1(<2 x i64> %x, <16 x i8> %y) { -; CHECK-LABEL: @test_extrq_arg1 -; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) -; CHECK-NEXT: ret <2 x i64> %1 +; CHECK-LABEL: @test_extrq_arg1( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) #1 +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; %1 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %1) nounwind ret <2 x i64> %2 } define <2 x i64> @test_extrq_args01(<2 x i64> %x, <16 x i8> %y) { -; CHECK-LABEL: @test_extrq_args01 -; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) -; CHECK-NEXT: ret <2 x i64> %1 +; CHECK-LABEL: @test_extrq_args01( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) #1 +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> %2 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> %3 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %2) nounwind @@ -246,69 +275,77 @@ define <2 x i64> @test_extrq_args01(<2 x i64> %x, <16 x i8> %y) { } define <2 x i64> @test_extrq_ret(<2 x i64> %x, <16 x i8> %y) { -; CHECK-LABEL: @test_extrq_ret -; CHECK-NEXT: ret <2 x i64> undef +; CHECK-LABEL: @test_extrq_ret( +; CHECK-NEXT: ret <2 x i64> undef +; %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> ret <2 x i64> %2 } define <2 x i64> @test_extrqi_arg0(<2 x i64> %x) { -; CHECK-LABEL: @test_extrqi_arg0 -; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2) -; CHECK-NEXT: ret <2 x i64> %1 +; CHECK-LABEL: @test_extrqi_arg0( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2) +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> %2 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %1, i8 3, i8 2) ret <2 x i64> %2 } define <2 x i64> @test_extrqi_ret(<2 x i64> %x) { -; CHECK-LABEL: @test_extrqi_ret -; CHECK-NEXT: ret <2 x i64> undef +; CHECK-LABEL: @test_extrqi_ret( +; CHECK-NEXT: ret <2 x i64> undef +; %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2) nounwind %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> ret <2 x i64> %2 } define <2 x i64> @test_insertq_arg0(<2 x i64> %x, <2 x i64> %y) { -; CHECK-LABEL: @test_insertq_arg0 -; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) -; CHECK-NEXT: ret <2 x i64> %1 +; CHECK-LABEL: @test_insertq_arg0( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) #1 +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> %2 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %1, <2 x i64> %y) nounwind ret <2 x i64> %2 } define <2 x i64> @test_insertq_ret(<2 x i64> %x, <2 x i64> %y) { -; CHECK-LABEL: @test_insertq_ret -; CHECK-NEXT: ret <2 x i64> undef +; CHECK-LABEL: @test_insertq_ret( +; CHECK-NEXT: ret <2 x i64> undef +; %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> ret <2 x i64> %2 } define <2 x i64> @test_insertqi_arg0(<2 x i64> %x, <2 x i64> %y) { -; CHECK-LABEL: @test_insertqi_arg0 -; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) -; CHECK-NEXT: ret <2 x i64> %1 +; CHECK-LABEL: @test_insertqi_arg0( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) #1 +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %y, i8 3, i8 2) nounwind ret <2 x i64> %2 } define <2 x i64> @test_insertqi_arg1(<2 x i64> %x, <2 x i64> %y) { -; CHECK-LABEL: @test_insertqi_arg1 -; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) -; CHECK-NEXT: ret <2 x i64> %1 +; CHECK-LABEL: @test_insertqi_arg1( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) #1 +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; %1 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %1, i8 3, i8 2) nounwind ret <2 x i64> %2 } define <2 x i64> @test_insertqi_args01(<2 x i64> %x, <2 x i64> %y) { -; CHECK-LABEL: @test_insertqi_args01 -; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) -; CHECK-NEXT: ret <2 x i64> %1 +; CHECK-LABEL: @test_insertqi_args01( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) #1 +; CHECK-NEXT: ret <2 x i64> [[TMP1]] +; %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> %2 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 3, i8 2) nounwind @@ -316,8 +353,9 @@ define <2 x i64> @test_insertqi_args01(<2 x i64> %x, <2 x i64> %y) { } define <2 x i64> @test_insertqi_ret(<2 x i64> %x, <2 x i64> %y) { -; CHECK-LABEL: @test_insertqi_ret -; CHECK-NEXT: ret <2 x i64> undef +; CHECK-LABEL: @test_insertqi_ret( +; CHECK-NEXT: ret <2 x i64> undef +; %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) nounwind %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> ret <2 x i64> %2 diff --git a/test/Transforms/InstCombine/x86-xop.ll b/test/Transforms/InstCombine/x86-xop.ll index 345ac982226..90b0a841eed 100644 --- a/test/Transforms/InstCombine/x86-xop.ll +++ b/test/Transforms/InstCombine/x86-xop.ll @@ -1,12 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s define double @test_vfrcz_sd_0(double %a) { -; CHECK-LABEL: @test_vfrcz_sd_0 -; CHECK-NEXT: %1 = insertelement <2 x double> undef, double %a, i32 0 -; CHECK-NEXT: %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 -; CHECK-NEXT: %3 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %2) -; CHECK-NEXT: %4 = extractelement <2 x double> %3, i32 0 -; CHECK-NEXT: ret double %4 +; CHECK-LABEL: @test_vfrcz_sd_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0 +; CHECK-NEXT: ret double [[TMP4]] +; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 %3 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %2) @@ -15,14 +17,15 @@ define double @test_vfrcz_sd_0(double %a) { } define float @test_vfrcz_ss_0(float %a) { -; CHECK-LABEL: @test_vfrcz_ss_0 -; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %a, i32 0 -; CHECK-NEXT: %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 -; CHECK-NEXT: %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 -; CHECK-NEXT: %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 -; CHECK-NEXT: %5 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %4) -; CHECK-NEXT: %6 = extractelement <4 x float> %5, i32 0 -; CHECK-NEXT: ret float %6 +; CHECK-LABEL: @test_vfrcz_ss_0( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float 2.000000e+00, i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float 3.000000e+00, i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP5]], i32 0 +; CHECK-NEXT: ret float [[TMP6]] +; %1 = insertelement <4 x float> undef, float %a, i32 0 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 @@ -33,137 +36,153 @@ define float @test_vfrcz_ss_0(float %a) { } define <2 x i64> @cmp_slt_v2i64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: @cmp_slt_v2i64 -; CHECK-NEXT: %1 = icmp slt <2 x i64> %a, %b -; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64> -; CHECK-NEXT: ret <2 x i64> %2 +; CHECK-LABEL: @cmp_slt_v2i64( +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i64> %a, %b +; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP2]] +; %1 = tail call <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64> %a, <2 x i64> %b) ret <2 x i64> %1 } define <2 x i64> @cmp_ult_v2i64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: @cmp_ult_v2i64 -; CHECK-NEXT: %1 = icmp ult <2 x i64> %a, %b -; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64> -; CHECK-NEXT: ret <2 x i64> %2 +; CHECK-LABEL: @cmp_ult_v2i64( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i64> %a, %b +; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP2]] +; %1 = tail call <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64> %a, <2 x i64> %b) ret <2 x i64> %1 } define <2 x i64> @cmp_sle_v2i64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: @cmp_sle_v2i64 -; CHECK-NEXT: %1 = icmp sle <2 x i64> %a, %b -; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64> -; CHECK-NEXT: ret <2 x i64> %2 +; CHECK-LABEL: @cmp_sle_v2i64( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sle <2 x i64> %a, %b +; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP2]] +; %1 = tail call <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64> %a, <2 x i64> %b) ret <2 x i64> %1 } define <2 x i64> @cmp_ule_v2i64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: @cmp_ule_v2i64 -; CHECK-NEXT: %1 = icmp ule <2 x i64> %a, %b -; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64> -; CHECK-NEXT: ret <2 x i64> %2 +; CHECK-LABEL: @cmp_ule_v2i64( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <2 x i64> %a, %b +; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP2]] +; %1 = tail call <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64> %a, <2 x i64> %b) ret <2 x i64> %1 } define <4 x i32> @cmp_sgt_v4i32(<4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: @cmp_sgt_v4i32 -; CHECK-NEXT: %1 = icmp sgt <4 x i32> %a, %b -; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32> -; CHECK-NEXT: ret <4 x i32> %2 +; CHECK-LABEL: @cmp_sgt_v4i32( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> %a, %b +; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[TMP2]] +; %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32> %a, <4 x i32> %b) ret <4 x i32> %1 } define <4 x i32> @cmp_ugt_v4i32(<4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: @cmp_ugt_v4i32 -; CHECK-NEXT: %1 = icmp ugt <4 x i32> %a, %b -; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32> -; CHECK-NEXT: ret <4 x i32> %2 +; CHECK-LABEL: @cmp_ugt_v4i32( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i32> %a, %b +; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[TMP2]] +; %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32> %a, <4 x i32> %b) ret <4 x i32> %1 } define <4 x i32> @cmp_sge_v4i32(<4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: @cmp_sge_v4i32 -; CHECK-NEXT: %1 = icmp sge <4 x i32> %a, %b -; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32> -; CHECK-NEXT: ret <4 x i32> %2 +; CHECK-LABEL: @cmp_sge_v4i32( +; CHECK-NEXT: [[TMP1:%.*]] = icmp sge <4 x i32> %a, %b +; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[TMP2]] +; %1 = tail call <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32> %a, <4 x i32> %b) ret <4 x i32> %1 } define <4 x i32> @cmp_uge_v4i32(<4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: @cmp_uge_v4i32 -; CHECK-NEXT: %1 = icmp uge <4 x i32> %a, %b -; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32> -; CHECK-NEXT: ret <4 x i32> %2 +; CHECK-LABEL: @cmp_uge_v4i32( +; CHECK-NEXT: [[TMP1:%.*]] = icmp uge <4 x i32> %a, %b +; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[TMP2]] +; %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32> %a, <4 x i32> %b) ret <4 x i32> %1 } define <8 x i16> @cmp_seq_v8i16(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: @cmp_seq_v8i16 -; CHECK-NEXT: %1 = icmp eq <8 x i16> %a, %b -; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16> -; CHECK-NEXT: ret <8 x i16> %2 +; CHECK-LABEL: @cmp_seq_v8i16( +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i16> %a, %b +; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16> +; CHECK-NEXT: ret <8 x i16> [[TMP2]] +; %1 = tail call <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16> %a, <8 x i16> %b) ret <8 x i16> %1 } define <8 x i16> @cmp_ueq_v8i16(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: @cmp_ueq_v8i16 -; CHECK-NEXT: %1 = icmp eq <8 x i16> %a, %b -; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16> -; CHECK-NEXT: ret <8 x i16> %2 +; CHECK-LABEL: @cmp_ueq_v8i16( +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i16> %a, %b +; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16> +; CHECK-NEXT: ret <8 x i16> [[TMP2]] +; %1 = tail call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a, <8 x i16> %b) ret <8 x i16> %1 } define <8 x i16> @cmp_sne_v8i16(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: @cmp_sne_v8i16 -; CHECK-NEXT: %1 = icmp ne <8 x i16> %a, %b -; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16> -; CHECK-NEXT: ret <8 x i16> %2 +; CHECK-LABEL: @cmp_sne_v8i16( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <8 x i16> %a, %b +; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16> +; CHECK-NEXT: ret <8 x i16> [[TMP2]] +; %1 = tail call <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16> %a, <8 x i16> %b) ret <8 x i16> %1 } define <8 x i16> @cmp_une_v8i16(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: @cmp_une_v8i16 -; CHECK-NEXT: %1 = icmp ne <8 x i16> %a, %b -; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16> -; CHECK-NEXT: ret <8 x i16> %2 +; CHECK-LABEL: @cmp_une_v8i16( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <8 x i16> %a, %b +; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16> +; CHECK-NEXT: ret <8 x i16> [[TMP2]] +; %1 = tail call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a, <8 x i16> %b) ret <8 x i16> %1 } define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: @cmp_strue_v16i8 -; CHECK-NEXT: ret <16 x i8> +; CHECK-LABEL: @cmp_strue_v16i8( +; CHECK-NEXT: ret <16 x i8> +; %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a, <16 x i8> %b) ret <16 x i8> %1 } define <16 x i8> @cmp_utrue_v16i8(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: @cmp_utrue_v16i8 -; CHECK-NEXT: ret <16 x i8> +; CHECK-LABEL: @cmp_utrue_v16i8( +; CHECK-NEXT: ret <16 x i8> +; %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a, <16 x i8> %b) ret <16 x i8> %1 } define <16 x i8> @cmp_sfalse_v16i8(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: @cmp_sfalse_v16i8 -; CHECK-NEXT: ret <16 x i8> zeroinitializer +; CHECK-LABEL: @cmp_sfalse_v16i8( +; CHECK-NEXT: ret <16 x i8> zeroinitializer +; %1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a, <16 x i8> %b) ret <16 x i8> %1 } define <16 x i8> @cmp_ufalse_v16i8(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: @cmp_ufalse_v16i8 -; CHECK-NEXT: ret <16 x i8> zeroinitializer +; CHECK-LABEL: @cmp_ufalse_v16i8( +; CHECK-NEXT: ret <16 x i8> zeroinitializer +; %1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a, <16 x i8> %b) ret <16 x i8> %1 } -- 2.11.0