From 85ecb4f64c3a6a8b7836ff908ad943f6daf91751 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 24 Jun 2019 16:58:19 +0000 Subject: [PATCH] [AArch64] Regenerate 2velem tests. NFCI. Prep work for an upcoming patch git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364204 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/AArch64/arm64-neon-2velem.ll | 2101 ++++++++++++++++++++++------- 1 file changed, 1622 insertions(+), 479 deletions(-) diff --git a/test/CodeGen/AArch64/arm64-neon-2velem.ll b/test/CodeGen/AArch64/arm64-neon-2velem.ll index 60d9bb5fd36..d11ed170d3c 100644 --- a/test/CodeGen/AArch64/arm64-neon-2velem.ll +++ b/test/CodeGen/AArch64/arm64-neon-2velem.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,GENERIC ; The instruction latencies of Exynos-M1 trigger the transform we see under the Exynos check. ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast -mcpu=exynos-m1 | FileCheck %s --check-prefixes=CHECK,EXYNOSM1 @@ -47,7 +48,10 @@ declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmla_lane_s16: -; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mla v0.4h, v1.4h, v2.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %b @@ -57,7 +61,10 @@ entry: define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlaq_lane_s16: -; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mla v0.8h, v1.8h, v2.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %b @@ -67,7 +74,10 @@ entry: define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmla_lane_s32: -; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mla v0.2s, v1.2s, v2.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %b @@ -77,7 +87,10 @@ entry: define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlaq_lane_s32: -; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mla v0.4s, v1.4s, v2.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %b @@ -87,7 +100,9 @@ entry: define <4 x i16> @test_vmla_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmla_laneq_s16: -; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mla v0.4h, v1.4h, v2.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %b @@ -97,7 +112,9 @@ entry: define <8 x i16> @test_vmlaq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlaq_laneq_s16: -; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mla v0.8h, v1.8h, v2.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %b @@ -107,7 +124,9 @@ entry: define <2 x i32> @test_vmla_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmla_laneq_s32: -; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mla v0.2s, v1.2s, v2.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %b @@ -117,7 +136,9 @@ entry: define <4 x i32> @test_vmlaq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlaq_laneq_s32: -; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mla v0.4s, v1.4s, v2.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %b @@ -127,7 +148,10 @@ entry: define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmls_lane_s16: -; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mls v0.4h, v1.4h, v2.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %b @@ -137,7 +161,10 @@ entry: define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlsq_lane_s16: -; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mls v0.8h, v1.8h, v2.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %b @@ -147,7 +174,10 @@ entry: define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmls_lane_s32: -; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mls v0.2s, v1.2s, v2.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %b @@ -157,7 +187,10 @@ entry: define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlsq_lane_s32: -; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mls v0.4s, v1.4s, v2.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %b @@ -167,7 +200,9 @@ entry: define <4 x i16> @test_vmls_laneq_s16(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmls_laneq_s16: -; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mls v0.4h, v1.4h, v2.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %b @@ -177,7 +212,9 @@ entry: define <8 x i16> @test_vmlsq_laneq_s16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlsq_laneq_s16: -; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mls v0.8h, v1.8h, v2.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %b @@ -187,7 +224,9 @@ entry: define <2 x i32> @test_vmls_laneq_s32(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmls_laneq_s32: -; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mls v0.2s, v1.2s, v2.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %b @@ -197,7 +236,9 @@ entry: define <4 x i32> @test_vmlsq_laneq_s32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlsq_laneq_s32: -; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mls v0.4s, v1.4s, v2.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %b @@ -207,7 +248,10 @@ entry: define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmul_lane_s16: -; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %a @@ -216,7 +260,10 @@ entry: define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmulq_lane_s16: -; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %a @@ -225,7 +272,10 @@ entry: define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmul_lane_s32: -; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %a @@ -234,7 +284,10 @@ entry: define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmulq_lane_s32: -; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %a @@ -243,7 +296,10 @@ entry: define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmul_lane_u16: -; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %a @@ -252,7 +308,10 @@ entry: define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmulq_lane_u16: -; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %a @@ -261,7 +320,10 @@ entry: define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmul_lane_u32: -; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %a @@ -270,7 +332,10 @@ entry: define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmulq_lane_u32: -; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %a @@ -279,7 +344,9 @@ entry: define <4 x i16> @test_vmul_laneq_s16(<4 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmul_laneq_s16: -; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %a @@ -288,7 +355,9 @@ entry: define <8 x i16> @test_vmulq_laneq_s16(<8 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmulq_laneq_s16: -; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %a @@ -297,7 +366,9 @@ entry: define <2 x i32> @test_vmul_laneq_s32(<2 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmul_laneq_s32: -; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %a @@ -306,7 +377,9 @@ entry: define <4 x i32> @test_vmulq_laneq_s32(<4 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmulq_laneq_s32: -; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %a @@ -315,7 +388,9 @@ entry: define <4 x i16> @test_vmul_laneq_u16(<4 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmul_laneq_u16: -; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %mul = mul <4 x i16> %shuffle, %a @@ -324,7 +399,9 @@ entry: define <8 x i16> @test_vmulq_laneq_u16(<8 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmulq_laneq_u16: -; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> %mul = mul <8 x i16> %shuffle, %a @@ -333,7 +410,9 @@ entry: define <2 x i32> @test_vmul_laneq_u32(<2 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmul_laneq_u32: -; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %mul = mul <2 x i32> %shuffle, %a @@ -342,7 +421,9 @@ entry: define <4 x i32> @test_vmulq_laneq_u32(<4 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmulq_laneq_u32: -; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> %mul = mul <4 x i32> %shuffle, %a @@ -350,11 +431,24 @@ entry: } define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) { -; CHECK-LABEL: test_vfma_lane_f32: -; GENERIC: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[1] -; EXYNOSM1: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s -; EXYNOSM3: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; GENERIC-LABEL: test_vfma_lane_f32: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: // kill: def $d2 killed $d2 def $q2 +; GENERIC-NEXT: fmla v0.2s, v1.2s, v2.s[1] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vfma_lane_f32: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: // kill: def $d2 killed $d2 def $q2 +; EXYNOSM1-NEXT: dup v2.2s, v2.s[1] +; EXYNOSM1-NEXT: fmla v0.2s, v1.2s, v2.2s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vfma_lane_f32: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: // kill: def $d2 killed $d2 def $q2 +; EXYNOSM3-NEXT: fmla v0.2s, v1.2s, v2.s[1] +; EXYNOSM3-NEXT: ret entry: %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) @@ -364,11 +458,24 @@ entry: declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) { -; CHECK-LABEL: test_vfmaq_lane_f32: -; GENERIC: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[1] -; EXYNOSM1: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s -; EXYNOSM3: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; GENERIC-LABEL: test_vfmaq_lane_f32: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: // kill: def $d2 killed $d2 def $q2 +; GENERIC-NEXT: fmla v0.4s, v1.4s, v2.s[1] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vfmaq_lane_f32: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: // kill: def $d2 killed $d2 def $q2 +; EXYNOSM1-NEXT: dup v2.4s, v2.s[1] +; EXYNOSM1-NEXT: fmla v0.4s, v1.4s, v2.4s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vfmaq_lane_f32: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: // kill: def $d2 killed $d2 def $q2 +; EXYNOSM3-NEXT: fmla v0.4s, v1.4s, v2.s[1] +; EXYNOSM3-NEXT: ret entry: %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) @@ -378,11 +485,21 @@ entry: declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) { -; CHECK-LABEL: test_vfma_laneq_f32: -; GENERIC: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[3] -; EXYNOSM1: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s -; EXYNOSM3: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; GENERIC-LABEL: test_vfma_laneq_f32: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: fmla v0.2s, v1.2s, v2.s[3] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vfma_laneq_f32: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: dup v2.2s, v2.s[3] +; EXYNOSM1-NEXT: fmla v0.2s, v1.2s, v2.2s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vfma_laneq_f32: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: fmla v0.2s, v1.2s, v2.s[3] +; EXYNOSM3-NEXT: ret entry: %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) @@ -390,11 +507,21 @@ entry: } define <4 x float> @test_vfmaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) { -; CHECK-LABEL: test_vfmaq_laneq_f32: -; GENERIC: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[3] -; EXYNOSM1: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s -; EXYNOSM3: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; GENERIC-LABEL: test_vfmaq_laneq_f32: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: fmla v0.4s, v1.4s, v2.s[3] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vfmaq_laneq_f32: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: dup v2.4s, v2.s[3] +; EXYNOSM1-NEXT: fmla v0.4s, v1.4s, v2.4s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vfmaq_laneq_f32: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: fmla v0.4s, v1.4s, v2.s[3] +; EXYNOSM3-NEXT: ret entry: %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) @@ -402,11 +529,24 @@ entry: } define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) { -; CHECK-LABEL: test_vfms_lane_f32: -; GENERIC: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[1] -; EXYNOSM1: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s -; EXYNOSM3: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; GENERIC-LABEL: test_vfms_lane_f32: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: // kill: def $d2 killed $d2 def $q2 +; GENERIC-NEXT: fmls v0.2s, v1.2s, v2.s[1] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vfms_lane_f32: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: // kill: def $d2 killed $d2 def $q2 +; EXYNOSM1-NEXT: dup v2.2s, v2.s[1] +; EXYNOSM1-NEXT: fmls v0.2s, v1.2s, v2.2s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vfms_lane_f32: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: // kill: def $d2 killed $d2 def $q2 +; EXYNOSM3-NEXT: fmls v0.2s, v1.2s, v2.s[1] +; EXYNOSM3-NEXT: ret entry: %sub = fsub <2 x float> , %v %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> @@ -415,11 +555,24 @@ entry: } define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) { -; CHECK-LABEL: test_vfmsq_lane_f32: -; GENERIC: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[1] -; EXYNOSM1: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s -; EXYNOSM3: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; GENERIC-LABEL: test_vfmsq_lane_f32: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: // kill: def $d2 killed $d2 def $q2 +; GENERIC-NEXT: fmls v0.4s, v1.4s, v2.s[1] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vfmsq_lane_f32: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: // kill: def $d2 killed $d2 def $q2 +; EXYNOSM1-NEXT: dup v2.4s, v2.s[1] +; EXYNOSM1-NEXT: fmls v0.4s, v1.4s, v2.4s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vfmsq_lane_f32: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: // kill: def $d2 killed $d2 def $q2 +; EXYNOSM3-NEXT: fmls v0.4s, v1.4s, v2.s[1] +; EXYNOSM3-NEXT: ret entry: %sub = fsub <2 x float> , %v %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> @@ -428,11 +581,21 @@ entry: } define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) { -; CHECK-LABEL: test_vfms_laneq_f32: -; GENERIC: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[3] -; EXYNOSM1: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s -; EXYNOSM3: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; GENERIC-LABEL: test_vfms_laneq_f32: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: fmls v0.2s, v1.2s, v2.s[3] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vfms_laneq_f32: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: dup v2.2s, v2.s[3] +; EXYNOSM1-NEXT: fmls v0.2s, v1.2s, v2.2s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vfms_laneq_f32: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: fmls v0.2s, v1.2s, v2.s[3] +; EXYNOSM3-NEXT: ret entry: %sub = fsub <4 x float> , %v %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> @@ -441,11 +604,21 @@ entry: } define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) { -; CHECK-LABEL: test_vfmsq_laneq_f32: -; GENERIC: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[3] -; EXYNOSM1: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s -; EXYNOSM3: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; GENERIC-LABEL: test_vfmsq_laneq_f32: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: fmls v0.4s, v1.4s, v2.s[3] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vfmsq_laneq_f32: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: dup v2.4s, v2.s[3] +; EXYNOSM1-NEXT: fmls v0.4s, v1.4s, v2.4s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vfmsq_laneq_f32: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: fmls v0.4s, v1.4s, v2.s[3] +; EXYNOSM3-NEXT: ret entry: %sub = fsub <4 x float> , %v %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> @@ -454,11 +627,24 @@ entry: } define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) { -; CHECK-LABEL: test_vfmaq_lane_f64: -; GENERIC: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] -; EXYNOSM1: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d -; EXYNOSM3: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; GENERIC-LABEL: test_vfmaq_lane_f64: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: // kill: def $d2 killed $d2 def $q2 +; GENERIC-NEXT: fmla v0.2d, v1.2d, v2.d[0] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vfmaq_lane_f64: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: // kill: def $d2 killed $d2 def $q2 +; EXYNOSM1-NEXT: dup v2.2d, v2.d[0] +; EXYNOSM1-NEXT: fmla v0.2d, v1.2d, v2.2d +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vfmaq_lane_f64: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: // kill: def $d2 killed $d2 def $q2 +; EXYNOSM3-NEXT: fmla v0.2d, v1.2d, v2.d[0] +; EXYNOSM3-NEXT: ret entry: %lane = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) @@ -468,11 +654,21 @@ entry: declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) define <2 x double> @test_vfmaq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) { -; CHECK-LABEL: test_vfmaq_laneq_f64: -; GENERIC: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] -; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[1] -; EXYNOSM1: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d -; EXYNOSM3: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] +; GENERIC-LABEL: test_vfmaq_laneq_f64: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: fmla v0.2d, v1.2d, v2.d[1] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vfmaq_laneq_f64: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: dup v2.2d, v2.d[1] +; EXYNOSM1-NEXT: fmla v0.2d, v1.2d, v2.2d +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vfmaq_laneq_f64: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: fmla v0.2d, v1.2d, v2.d[1] +; EXYNOSM3-NEXT: ret entry: %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) @@ -480,11 +676,24 @@ entry: } define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) { -; CHECK-LABEL: test_vfmsq_lane_f64: -; GENERIC: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] -; EXYNOSM1: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d -; EXYNOSM3: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; GENERIC-LABEL: test_vfmsq_lane_f64: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: // kill: def $d2 killed $d2 def $q2 +; GENERIC-NEXT: fmls v0.2d, v1.2d, v2.d[0] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vfmsq_lane_f64: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: // kill: def $d2 killed $d2 def $q2 +; EXYNOSM1-NEXT: dup v2.2d, v2.d[0] +; EXYNOSM1-NEXT: fmls v0.2d, v1.2d, v2.2d +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vfmsq_lane_f64: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: // kill: def $d2 killed $d2 def $q2 +; EXYNOSM3-NEXT: fmls v0.2d, v1.2d, v2.d[0] +; EXYNOSM3-NEXT: ret entry: %sub = fsub <1 x double> , %v %lane = shufflevector <1 x double> %sub, <1 x double> undef, <2 x i32> zeroinitializer @@ -493,11 +702,21 @@ entry: } define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) { -; CHECK-LABEL: test_vfmsq_laneq_f64: -; GENERIC: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] -; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[1] -; EXYNOSM1: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d -; EXYNOSM3: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] +; GENERIC-LABEL: test_vfmsq_laneq_f64: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: fmls v0.2d, v1.2d, v2.d[1] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vfmsq_laneq_f64: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: dup v2.2d, v2.d[1] +; EXYNOSM1-NEXT: fmls v0.2d, v1.2d, v2.2d +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vfmsq_laneq_f64: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: fmls v0.2d, v1.2d, v2.d[1] +; EXYNOSM3-NEXT: ret entry: %sub = fsub <2 x double> , %v %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> @@ -506,8 +725,10 @@ entry: } define float @test_vfmas_laneq_f32(float %a, float %b, <4 x float> %v) { -; CHECK-LABEL: test_vfmas_laneq_f32 -; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] +; CHECK-LABEL: test_vfmas_laneq_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmla s0, s1, v2.s[3] +; CHECK-NEXT: ret entry: %extract = extractelement <4 x float> %v, i32 3 %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a) @@ -517,8 +738,10 @@ entry: declare float @llvm.fma.f32(float, float, float) define double @test_vfmsd_lane_f64(double %a, double %b, <1 x double> %v) { -; CHECK-LABEL: test_vfmsd_lane_f64 -; CHECK: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +; CHECK-LABEL: test_vfmsd_lane_f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmsub d0, d1, d2, d0 +; CHECK-NEXT: ret entry: %extract.rhs = extractelement <1 x double> %v, i32 0 %extract = fsub double -0.000000e+00, %extract.rhs @@ -529,8 +752,11 @@ entry: declare double @llvm.fma.f64(double, double, double) define float @test_vfmss_lane_f32(float %a, float %b, <2 x float> %v) { -; CHECK-LABEL: test_vfmss_lane_f32 -; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] +; CHECK-LABEL: test_vfmss_lane_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: fmls s0, s1, v2.s[1] +; CHECK-NEXT: ret entry: %extract.rhs = extractelement <2 x float> %v, i32 1 %extract = fsub float -0.000000e+00, %extract.rhs @@ -539,8 +765,10 @@ entry: } define float @test_vfmss_laneq_f32(float %a, float %b, <4 x float> %v) { -; CHECK-LABEL: test_vfmss_laneq_f32 -; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] +; CHECK-LABEL: test_vfmss_laneq_f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmls s0, s1, v2.s[3] +; CHECK-NEXT: ret entry: %extract.rhs = extractelement <4 x float> %v, i32 3 %extract = fsub float -0.000000e+00, %extract.rhs @@ -549,8 +777,10 @@ entry: } define double @test_vfmsd_laneq_f64(double %a, double %b, <2 x double> %v) { -; CHECK-LABEL: test_vfmsd_laneq_f64 -; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] +; CHECK-LABEL: test_vfmsd_laneq_f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmls d0, d1, v2.d[1] +; CHECK-NEXT: ret entry: %extract.rhs = extractelement <2 x double> %v, i32 1 %extract = fsub double -0.000000e+00, %extract.rhs @@ -559,9 +789,10 @@ entry: } define double @test_vfmsd_lane_f64_0(double %a, double %b, <1 x double> %v) { -; CHECK-LABEL: test_vfmsd_lane_f64_0 -; CHECK: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} -; CHECK-NEXT: ret +; CHECK-LABEL: test_vfmsd_lane_f64_0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmsub d0, d1, d2, d0 +; CHECK-NEXT: ret entry: %tmp0 = fsub <1 x double> , %v %tmp1 = extractelement <1 x double> %tmp0, i32 0 @@ -570,8 +801,11 @@ entry: } define float @test_vfmss_lane_f32_0(float %a, float %b, <2 x float> %v) { -; CHECK-LABEL: test_vfmss_lane_f32_0 -; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] +; CHECK-LABEL: test_vfmss_lane_f32_0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: fmls s0, s1, v2.s[1] +; CHECK-NEXT: ret entry: %tmp0 = fsub <2 x float> , %v %tmp1 = extractelement <2 x float> %tmp0, i32 1 @@ -580,8 +814,10 @@ entry: } define float @test_vfmss_laneq_f32_0(float %a, float %b, <4 x float> %v) { -; CHECK-LABEL: test_vfmss_laneq_f32_0 -; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] +; CHECK-LABEL: test_vfmss_laneq_f32_0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmls s0, s1, v2.s[3] +; CHECK-NEXT: ret entry: %tmp0 = fsub <4 x float>, %v %tmp1 = extractelement <4 x float> %tmp0, i32 3 @@ -590,8 +826,10 @@ entry: } define double @test_vfmsd_laneq_f64_0(double %a, double %b, <2 x double> %v) { -; CHECK-LABEL: test_vfmsd_laneq_f64_0 -; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] +; CHECK-LABEL: test_vfmsd_laneq_f64_0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmls d0, d1, v2.d[1] +; CHECK-NEXT: ret entry: %tmp0 = fsub <2 x double>, %v %tmp1 = extractelement <2 x double> %tmp0, i32 1 @@ -601,7 +839,10 @@ entry: define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlal_lane_s16: -; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: smlal v0.4s, v1.4h, v2.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -611,7 +852,10 @@ entry: define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlal_lane_s32: -; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: smlal v0.2d, v1.2s, v2.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -621,7 +865,9 @@ entry: define <4 x i32> @test_vmlal_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlal_laneq_s16: -; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smlal v0.4s, v1.4h, v2.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -631,7 +877,9 @@ entry: define <2 x i64> @test_vmlal_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlal_laneq_s32: -; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smlal v0.2d, v1.2s, v2.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -641,7 +889,10 @@ entry: define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlal_high_lane_s16: -; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: smlal2 v0.4s, v1.8h, v2.h[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -652,7 +903,10 @@ entry: define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlal_high_lane_s32: -; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: smlal2 v0.2d, v1.4s, v2.s[1] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -663,7 +917,9 @@ entry: define <4 x i32> @test_vmlal_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlal_high_laneq_s16: -; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smlal2 v0.4s, v1.8h, v2.h[7] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> @@ -674,7 +930,9 @@ entry: define <2 x i64> @test_vmlal_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlal_high_laneq_s32: -; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smlal2 v0.2d, v1.4s, v2.s[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> @@ -685,7 +943,10 @@ entry: define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlsl_lane_s16: -; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: smlsl v0.4s, v1.4h, v2.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -695,7 +956,10 @@ entry: define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlsl_lane_s32: -; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: smlsl v0.2d, v1.2s, v2.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -705,7 +969,9 @@ entry: define <4 x i32> @test_vmlsl_laneq_s16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlsl_laneq_s16: -; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smlsl v0.4s, v1.4h, v2.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -715,7 +981,9 @@ entry: define <2 x i64> @test_vmlsl_laneq_s32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlsl_laneq_s32: -; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smlsl v0.2d, v1.2s, v2.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -725,7 +993,10 @@ entry: define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlsl_high_lane_s16: -; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: smlsl2 v0.4s, v1.8h, v2.h[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -736,7 +1007,10 @@ entry: define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlsl_high_lane_s32: -; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: smlsl2 v0.2d, v1.4s, v2.s[1] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -747,7 +1021,9 @@ entry: define <4 x i32> @test_vmlsl_high_laneq_s16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlsl_high_laneq_s16: -; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smlsl2 v0.4s, v1.8h, v2.h[7] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> @@ -758,7 +1034,9 @@ entry: define <2 x i64> @test_vmlsl_high_laneq_s32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlsl_high_laneq_s32: -; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smlsl2 v0.2d, v1.4s, v2.s[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> @@ -769,7 +1047,10 @@ entry: define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlal_lane_u16: -; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: umlal v0.4s, v1.4h, v2.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -779,7 +1060,10 @@ entry: define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlal_lane_u32: -; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: umlal v0.2d, v1.2s, v2.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -789,7 +1073,9 @@ entry: define <4 x i32> @test_vmlal_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlal_laneq_u16: -; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: umlal v0.4s, v1.4h, v2.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -799,7 +1085,9 @@ entry: define <2 x i64> @test_vmlal_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlal_laneq_u32: -; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: umlal v0.2d, v1.2s, v2.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -809,7 +1097,10 @@ entry: define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlal_high_lane_u16: -; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: umlal2 v0.4s, v1.8h, v2.h[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -820,7 +1111,10 @@ entry: define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlal_high_lane_u32: -; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: umlal2 v0.2d, v1.4s, v2.s[1] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -831,7 +1125,9 @@ entry: define <4 x i32> @test_vmlal_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlal_high_laneq_u16: -; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: umlal2 v0.4s, v1.8h, v2.h[7] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> @@ -842,7 +1138,9 @@ entry: define <2 x i64> @test_vmlal_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlal_high_laneq_u32: -; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: umlal2 v0.2d, v1.4s, v2.s[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> @@ -853,7 +1151,10 @@ entry: define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlsl_lane_u16: -; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: umlsl v0.4s, v1.4h, v2.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -863,7 +1164,10 @@ entry: define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlsl_lane_u32: -; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: umlsl v0.2d, v1.2s, v2.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -873,7 +1177,9 @@ entry: define <4 x i32> @test_vmlsl_laneq_u16(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlsl_laneq_u16: -; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: umlsl v0.4s, v1.4h, v2.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -883,7 +1189,9 @@ entry: define <2 x i64> @test_vmlsl_laneq_u32(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlsl_laneq_u32: -; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: umlsl v0.2d, v1.2s, v2.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -893,7 +1201,10 @@ entry: define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlsl_high_lane_u16: -; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: umlsl2 v0.4s, v1.8h, v2.h[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -904,7 +1215,10 @@ entry: define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlsl_high_lane_u32: -; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: umlsl2 v0.2d, v1.4s, v2.s[1] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -915,7 +1229,9 @@ entry: define <4 x i32> @test_vmlsl_high_laneq_u16(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlsl_high_laneq_u16: -; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: umlsl2 v0.4s, v1.8h, v2.h[7] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> @@ -926,7 +1242,9 @@ entry: define <2 x i64> @test_vmlsl_high_laneq_u32(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlsl_high_laneq_u32: -; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: umlsl2 v0.2d, v1.4s, v2.s[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> @@ -937,7 +1255,10 @@ entry: define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmull_lane_s16: -; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: smull v0.4s, v0.4h, v1.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -946,7 +1267,10 @@ entry: define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmull_lane_s32: -; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: smull v0.2d, v0.2s, v1.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -955,7 +1279,10 @@ entry: define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmull_lane_u16: -; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: umull v0.4s, v0.4h, v1.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -964,7 +1291,10 @@ entry: define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmull_lane_u32: -; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: umull v0.2d, v0.2s, v1.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -973,7 +1303,10 @@ entry: define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmull_high_lane_s16: -; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.h[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -983,7 +1316,10 @@ entry: define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmull_high_lane_s32: -; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: smull2 v0.2d, v0.4s, v1.s[1] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -993,7 +1329,10 @@ entry: define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmull_high_lane_u16: -; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: umull2 v0.4s, v0.8h, v1.h[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -1003,7 +1342,10 @@ entry: define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmull_high_lane_u32: -; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: umull2 v0.2d, v0.4s, v1.s[1] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -1013,7 +1355,9 @@ entry: define <4 x i32> @test_vmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmull_laneq_s16: -; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smull v0.4s, v0.4h, v1.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -1022,7 +1366,9 @@ entry: define <2 x i64> @test_vmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmull_laneq_s32: -; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smull v0.2d, v0.2s, v1.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -1031,7 +1377,9 @@ entry: define <4 x i32> @test_vmull_laneq_u16(<4 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmull_laneq_u16: -; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: umull v0.4s, v0.4h, v1.h[7] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -1040,7 +1388,9 @@ entry: define <2 x i64> @test_vmull_laneq_u32(<2 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmull_laneq_u32: -; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: umull v0.2d, v0.2s, v1.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -1049,7 +1399,9 @@ entry: define <4 x i32> @test_vmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmull_high_laneq_s16: -; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.h[7] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> @@ -1059,7 +1411,9 @@ entry: define <2 x i64> @test_vmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmull_high_laneq_s32: -; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smull2 v0.2d, v0.4s, v1.s[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> @@ -1069,7 +1423,9 @@ entry: define <4 x i32> @test_vmull_high_laneq_u16(<8 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmull_high_laneq_u16: -; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: umull2 v0.4s, v0.8h, v1.h[7] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> @@ -1079,7 +1435,9 @@ entry: define <2 x i64> @test_vmull_high_laneq_u32(<4 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmull_high_laneq_u32: -; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: umull2 v0.2d, v0.4s, v1.s[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> @@ -1089,7 +1447,10 @@ entry: define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmlal_lane_s16: -; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: sqdmlal v0.4s, v1.4h, v2.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -1099,7 +1460,10 @@ entry: define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmlal_lane_s32: -; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: sqdmlal v0.2d, v1.2s, v2.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -1109,7 +1473,10 @@ entry: define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmlal_high_lane_s16: -; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: sqdmlal2 v0.4s, v1.8h, v2.h[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -1120,7 +1487,10 @@ entry: define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmlal_high_lane_s32: -; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: sqdmlal2 v0.2d, v1.4s, v2.s[1] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -1131,7 +1501,10 @@ entry: define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmlsl_lane_s16: -; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: sqdmlsl v0.4s, v1.4h, v2.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -1141,7 +1514,10 @@ entry: define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmlsl_lane_s32: -; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: sqdmlsl v0.2d, v1.2s, v2.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -1151,7 +1527,10 @@ entry: define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmlsl_high_lane_s16: -; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: sqdmlsl2 v0.4s, v1.8h, v2.h[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -1162,7 +1541,10 @@ entry: define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmlsl_high_lane_s32: -; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: sqdmlsl2 v0.2d, v1.4s, v2.s[1] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -1173,7 +1555,10 @@ entry: define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmull_lane_s16: -; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -1182,7 +1567,10 @@ entry: define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmull_lane_s32: -; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqdmull v0.2d, v0.2s, v1.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -1191,7 +1579,9 @@ entry: define <4 x i32> @test_vqdmull_laneq_s16(<4 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vqdmull_laneq_s16: -; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -1200,7 +1590,9 @@ entry: define <2 x i64> @test_vqdmull_laneq_s32(<2 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vqdmull_laneq_s32: -; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sqdmull v0.2d, v0.2s, v1.s[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -1209,7 +1601,10 @@ entry: define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmull_high_lane_s16: -; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqdmull2 v0.4s, v0.8h, v1.h[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> @@ -1219,7 +1614,10 @@ entry: define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmull_high_lane_s32: -; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqdmull2 v0.2d, v0.4s, v1.s[1] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> @@ -1229,7 +1627,9 @@ entry: define <4 x i32> @test_vqdmull_high_laneq_s16(<8 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vqdmull_high_laneq_s16: -; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sqdmull2 v0.4s, v0.8h, v1.h[7] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> @@ -1239,7 +1639,9 @@ entry: define <2 x i64> @test_vqdmull_high_laneq_s32(<4 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vqdmull_high_laneq_s32: -; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sqdmull2 v0.2d, v0.4s, v1.s[3] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> @@ -1249,7 +1651,10 @@ entry: define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmulh_lane_s16: -; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqdmulh v0.4h, v0.4h, v1.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) @@ -1258,7 +1663,10 @@ entry: define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmulhq_lane_s16: -; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqdmulh v0.8h, v0.8h, v1.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) @@ -1267,7 +1675,10 @@ entry: define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmulh_lane_s32: -; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqdmulh v0.2s, v0.2s, v1.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) @@ -1276,7 +1687,10 @@ entry: define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmulhq_lane_s32: -; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqdmulh v0.4s, v0.4s, v1.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) @@ -1285,7 +1699,10 @@ entry: define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vqrdmulh_lane_s16: -; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqrdmulh v0.4h, v0.4h, v1.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) @@ -1294,7 +1711,10 @@ entry: define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vqrdmulhq_lane_s16: -; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqrdmulh v0.8h, v0.8h, v1.h[3] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) @@ -1303,7 +1723,10 @@ entry: define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vqrdmulh_lane_s32: -; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqrdmulh v0.2s, v0.2s, v1.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) @@ -1312,7 +1735,10 @@ entry: define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vqrdmulhq_lane_s32: -; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqrdmulh v0.4s, v0.4s, v1.s[1] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) @@ -1320,11 +1746,24 @@ entry: } define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) { -; CHECK-LABEL: test_vmul_lane_f32: -; GENERIC: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[1] -; EXYNOSM1: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s -; EXYNOSM3: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; GENERIC-LABEL: test_vmul_lane_f32: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: // kill: def $d1 killed $d1 def $q1 +; GENERIC-NEXT: fmul v0.2s, v0.2s, v1.s[1] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vmul_lane_f32: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: // kill: def $d1 killed $d1 def $q1 +; EXYNOSM1-NEXT: dup v1.2s, v1.s[1] +; EXYNOSM1-NEXT: fmul v0.2s, v0.2s, v1.2s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vmul_lane_f32: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: // kill: def $d1 killed $d1 def $q1 +; EXYNOSM3-NEXT: fmul v0.2s, v0.2s, v1.s[1] +; EXYNOSM3-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> %mul = fmul <2 x float> %shuffle, %a @@ -1333,7 +1772,9 @@ entry: define <1 x double> @test_vmul_lane_f64(<1 x double> %a, <1 x double> %v) { ; CHECK-LABEL: test_vmul_lane_f64: -; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmul d0, d0, d1 +; CHECK-NEXT: ret entry: %0 = bitcast <1 x double> %a to <8 x i8> %1 = bitcast <8 x i8> %0 to double @@ -1344,11 +1785,24 @@ entry: } define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) { -; CHECK-LABEL: test_vmulq_lane_f32: -; GENERIC: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[1] -; EXYNOSM1: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s -; EXYNOSM3: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; GENERIC-LABEL: test_vmulq_lane_f32: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: // kill: def $d1 killed $d1 def $q1 +; GENERIC-NEXT: fmul v0.4s, v0.4s, v1.s[1] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vmulq_lane_f32: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: // kill: def $d1 killed $d1 def $q1 +; EXYNOSM1-NEXT: dup v1.4s, v1.s[1] +; EXYNOSM1-NEXT: fmul v0.4s, v0.4s, v1.4s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vmulq_lane_f32: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: // kill: def $d1 killed $d1 def $q1 +; EXYNOSM3-NEXT: fmul v0.4s, v0.4s, v1.s[1] +; EXYNOSM3-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> %mul = fmul <4 x float> %shuffle, %a @@ -1356,11 +1810,24 @@ entry: } define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) { -; CHECK-LABEL: test_vmulq_lane_f64: -; GENERIC: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] -; EXYNOSM1: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d -; EXYNOSM3: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; GENERIC-LABEL: test_vmulq_lane_f64: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: // kill: def $d1 killed $d1 def $q1 +; GENERIC-NEXT: fmul v0.2d, v0.2d, v1.d[0] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vmulq_lane_f64: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: // kill: def $d1 killed $d1 def $q1 +; EXYNOSM1-NEXT: dup v1.2d, v1.d[0] +; EXYNOSM1-NEXT: fmul v0.2d, v0.2d, v1.2d +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vmulq_lane_f64: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: // kill: def $d1 killed $d1 def $q1 +; EXYNOSM3-NEXT: fmul v0.2d, v0.2d, v1.d[0] +; EXYNOSM3-NEXT: ret entry: %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer %mul = fmul <2 x double> %shuffle, %a @@ -1368,11 +1835,21 @@ entry: } define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) { -; CHECK-LABEL: test_vmul_laneq_f32: -; GENERIC: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[3] -; EXYNOSM1: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s -; EXYNOSM3: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; GENERIC-LABEL: test_vmul_laneq_f32: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: fmul v0.2s, v0.2s, v1.s[3] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vmul_laneq_f32: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: dup v1.2s, v1.s[3] +; EXYNOSM1-NEXT: fmul v0.2s, v0.2s, v1.2s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vmul_laneq_f32: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: fmul v0.2s, v0.2s, v1.s[3] +; EXYNOSM3-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> %mul = fmul <2 x float> %shuffle, %a @@ -1381,7 +1858,9 @@ entry: define <1 x double> @test_vmul_laneq_f64(<1 x double> %a, <2 x double> %v) { ; CHECK-LABEL: test_vmul_laneq_f64: -; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmul d0, d0, v1.d[1] +; CHECK-NEXT: ret entry: %0 = bitcast <1 x double> %a to <8 x i8> %1 = bitcast <8 x i8> %0 to double @@ -1392,11 +1871,21 @@ entry: } define <4 x float> @test_vmulq_laneq_f32(<4 x float> %a, <4 x float> %v) { -; CHECK-LABEL: test_vmulq_laneq_f32: -; GENERIC: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[3] -; EXYNOSM1: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s -; EXYNOSM3: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; GENERIC-LABEL: test_vmulq_laneq_f32: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: fmul v0.4s, v0.4s, v1.s[3] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vmulq_laneq_f32: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: dup v1.4s, v1.s[3] +; EXYNOSM1-NEXT: fmul v0.4s, v0.4s, v1.4s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vmulq_laneq_f32: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: fmul v0.4s, v0.4s, v1.s[3] +; EXYNOSM3-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> %mul = fmul <4 x float> %shuffle, %a @@ -1404,11 +1893,21 @@ entry: } define <2 x double> @test_vmulq_laneq_f64(<2 x double> %a, <2 x double> %v) { -; CHECK-LABEL: test_vmulq_laneq_f64: -; GENERIC: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] -; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[1] -; EXYNOSM1: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d -; EXYNOSM3: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] +; GENERIC-LABEL: test_vmulq_laneq_f64: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: fmul v0.2d, v0.2d, v1.d[1] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vmulq_laneq_f64: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: dup v1.2d, v1.d[1] +; EXYNOSM1-NEXT: fmul v0.2d, v0.2d, v1.2d +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vmulq_laneq_f64: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: fmul v0.2d, v0.2d, v1.d[1] +; EXYNOSM3-NEXT: ret entry: %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> %mul = fmul <2 x double> %shuffle, %a @@ -1416,11 +1915,24 @@ entry: } define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) { -; CHECK-LABEL: test_vmulx_lane_f32: -; GENERIC: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[1] -; EXYNOSM1: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s -; EXYNOSM3: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; GENERIC-LABEL: test_vmulx_lane_f32: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: // kill: def $d1 killed $d1 def $q1 +; GENERIC-NEXT: fmulx v0.2s, v0.2s, v1.s[1] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vmulx_lane_f32: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: // kill: def $d1 killed $d1 def $q1 +; EXYNOSM1-NEXT: dup v1.2s, v1.s[1] +; EXYNOSM1-NEXT: fmulx v0.2s, v0.2s, v1.2s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vmulx_lane_f32: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: // kill: def $d1 killed $d1 def $q1 +; EXYNOSM3-NEXT: fmulx v0.2s, v0.2s, v1.s[1] +; EXYNOSM3-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) @@ -1428,11 +1940,24 @@ entry: } define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) { -; CHECK-LABEL: test_vmulxq_lane_f32: -; GENERIC: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[1] -; EXYNOSM1: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s -; EXYNOSM3: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; GENERIC-LABEL: test_vmulxq_lane_f32: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: // kill: def $d1 killed $d1 def $q1 +; GENERIC-NEXT: fmulx v0.4s, v0.4s, v1.s[1] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vmulxq_lane_f32: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: // kill: def $d1 killed $d1 def $q1 +; EXYNOSM1-NEXT: dup v1.4s, v1.s[1] +; EXYNOSM1-NEXT: fmulx v0.4s, v0.4s, v1.4s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vmulxq_lane_f32: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: // kill: def $d1 killed $d1 def $q1 +; EXYNOSM3-NEXT: fmulx v0.4s, v0.4s, v1.s[1] +; EXYNOSM3-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) @@ -1440,11 +1965,24 @@ entry: } define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) { -; CHECK-LABEL: test_vmulxq_lane_f64: -; GENERIC: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] -; EXYNOSM1: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d -; EXYNOSM3: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; GENERIC-LABEL: test_vmulxq_lane_f64: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: // kill: def $d1 killed $d1 def $q1 +; GENERIC-NEXT: fmulx v0.2d, v0.2d, v1.d[0] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vmulxq_lane_f64: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: // kill: def $d1 killed $d1 def $q1 +; EXYNOSM1-NEXT: dup v1.2d, v1.d[0] +; EXYNOSM1-NEXT: fmulx v0.2d, v0.2d, v1.2d +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vmulxq_lane_f64: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: // kill: def $d1 killed $d1 def $q1 +; EXYNOSM3-NEXT: fmulx v0.2d, v0.2d, v1.d[0] +; EXYNOSM3-NEXT: ret entry: %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) @@ -1452,11 +1990,21 @@ entry: } define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) { -; CHECK-LABEL: test_vmulx_laneq_f32: -; GENERIC: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] -; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[3] -; EXYNOSM1: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s -; EXYNOSM3: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +; GENERIC-LABEL: test_vmulx_laneq_f32: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: fmulx v0.2s, v0.2s, v1.s[3] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vmulx_laneq_f32: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: dup v1.2s, v1.s[3] +; EXYNOSM1-NEXT: fmulx v0.2s, v0.2s, v1.2s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vmulx_laneq_f32: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: fmulx v0.2s, v0.2s, v1.s[3] +; EXYNOSM3-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) @@ -1464,11 +2012,21 @@ entry: } define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) { -; CHECK-LABEL: test_vmulxq_laneq_f32: -; GENERIC: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[3] -; EXYNOSM1: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s -; EXYNOSM3: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; GENERIC-LABEL: test_vmulxq_laneq_f32: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: fmulx v0.4s, v0.4s, v1.s[3] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vmulxq_laneq_f32: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: dup v1.4s, v1.s[3] +; EXYNOSM1-NEXT: fmulx v0.4s, v0.4s, v1.4s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vmulxq_laneq_f32: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: fmulx v0.4s, v0.4s, v1.s[3] +; EXYNOSM3-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) @@ -1476,11 +2034,21 @@ entry: } define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) { -; CHECK-LABEL: test_vmulxq_laneq_f64: -; GENERIC: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] -; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[1] -; EXYNOSM1: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d -; EXYNOSM3: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] +; GENERIC-LABEL: test_vmulxq_laneq_f64: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: fmulx v0.2d, v0.2d, v1.d[1] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vmulxq_laneq_f64: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: dup v1.2d, v1.d[1] +; EXYNOSM1-NEXT: fmulx v0.2d, v0.2d, v1.2d +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vmulxq_laneq_f64: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: fmulx v0.2d, v0.2d, v1.d[1] +; EXYNOSM3-NEXT: ret entry: %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) @@ -1489,7 +2057,10 @@ entry: define <4 x i16> @test_vmla_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmla_lane_s16_0: -; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mla v0.4h, v1.4h, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %b @@ -1499,7 +2070,10 @@ entry: define <8 x i16> @test_vmlaq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlaq_lane_s16_0: -; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mla v0.8h, v1.8h, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %b @@ -1509,7 +2083,10 @@ entry: define <2 x i32> @test_vmla_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmla_lane_s32_0: -; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mla v0.2s, v1.2s, v2.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %b @@ -1519,7 +2096,10 @@ entry: define <4 x i32> @test_vmlaq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlaq_lane_s32_0: -; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mla v0.4s, v1.4s, v2.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %b @@ -1529,7 +2109,9 @@ entry: define <4 x i16> @test_vmla_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmla_laneq_s16_0: -; CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mla v0.4h, v1.4h, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %b @@ -1539,7 +2121,9 @@ entry: define <8 x i16> @test_vmlaq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlaq_laneq_s16_0: -; CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mla v0.8h, v1.8h, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %b @@ -1549,7 +2133,9 @@ entry: define <2 x i32> @test_vmla_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmla_laneq_s32_0: -; CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mla v0.2s, v1.2s, v2.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %b @@ -1559,7 +2145,9 @@ entry: define <4 x i32> @test_vmlaq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlaq_laneq_s32_0: -; CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mla v0.4s, v1.4s, v2.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %b @@ -1569,7 +2157,10 @@ entry: define <4 x i16> @test_vmls_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmls_lane_s16_0: -; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mls v0.4h, v1.4h, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %b @@ -1579,7 +2170,10 @@ entry: define <8 x i16> @test_vmlsq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlsq_lane_s16_0: -; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mls v0.8h, v1.8h, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %b @@ -1589,7 +2183,10 @@ entry: define <2 x i32> @test_vmls_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmls_lane_s32_0: -; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mls v0.2s, v1.2s, v2.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %b @@ -1599,7 +2196,10 @@ entry: define <4 x i32> @test_vmlsq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlsq_lane_s32_0: -; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mls v0.4s, v1.4s, v2.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %b @@ -1609,7 +2209,9 @@ entry: define <4 x i16> @test_vmls_laneq_s16_0(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmls_laneq_s16_0: -; CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mls v0.4h, v1.4h, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %b @@ -1619,7 +2221,9 @@ entry: define <8 x i16> @test_vmlsq_laneq_s16_0(<8 x i16> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlsq_laneq_s16_0: -; CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mls v0.8h, v1.8h, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %b @@ -1629,7 +2233,9 @@ entry: define <2 x i32> @test_vmls_laneq_s32_0(<2 x i32> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmls_laneq_s32_0: -; CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mls v0.2s, v1.2s, v2.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %b @@ -1639,7 +2245,9 @@ entry: define <4 x i32> @test_vmlsq_laneq_s32_0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlsq_laneq_s32_0: -; CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mls v0.4s, v1.4s, v2.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %b @@ -1649,7 +2257,10 @@ entry: define <4 x i16> @test_vmul_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmul_lane_s16_0: -; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %a @@ -1658,7 +2269,10 @@ entry: define <8 x i16> @test_vmulq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmulq_lane_s16_0: -; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %a @@ -1667,7 +2281,10 @@ entry: define <2 x i32> @test_vmul_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmul_lane_s32_0: -; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %a @@ -1676,7 +2293,10 @@ entry: define <4 x i32> @test_vmulq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmulq_lane_s32_0: -; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %a @@ -1685,7 +2305,10 @@ entry: define <4 x i16> @test_vmul_lane_u16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmul_lane_u16_0: -; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %a @@ -1694,7 +2317,10 @@ entry: define <8 x i16> @test_vmulq_lane_u16_0(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmulq_lane_u16_0: -; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %a @@ -1703,7 +2329,10 @@ entry: define <2 x i32> @test_vmul_lane_u32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmul_lane_u32_0: -; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %a @@ -1712,7 +2341,10 @@ entry: define <4 x i32> @test_vmulq_lane_u32_0(<4 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmulq_lane_u32_0: -; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %a @@ -1721,7 +2353,9 @@ entry: define <4 x i16> @test_vmul_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmul_laneq_s16_0: -; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %a @@ -1730,7 +2364,9 @@ entry: define <8 x i16> @test_vmulq_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmulq_laneq_s16_0: -; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %a @@ -1739,7 +2375,9 @@ entry: define <2 x i32> @test_vmul_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmul_laneq_s32_0: -; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %a @@ -1748,7 +2386,9 @@ entry: define <4 x i32> @test_vmulq_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmulq_laneq_s32_0: -; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %a @@ -1757,7 +2397,9 @@ entry: define <4 x i16> @test_vmul_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmul_laneq_u16_0: -; CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul v0.4h, v0.4h, v1.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %mul = mul <4 x i16> %shuffle, %a @@ -1766,7 +2408,9 @@ entry: define <8 x i16> @test_vmulq_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmulq_laneq_u16_0: -; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> zeroinitializer %mul = mul <8 x i16> %shuffle, %a @@ -1775,7 +2419,9 @@ entry: define <2 x i32> @test_vmul_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmul_laneq_u32_0: -; CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul v0.2s, v0.2s, v1.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %mul = mul <2 x i32> %shuffle, %a @@ -1784,7 +2430,9 @@ entry: define <4 x i32> @test_vmulq_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmulq_laneq_u32_0: -; CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mul v0.4s, v0.4s, v1.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> zeroinitializer %mul = mul <4 x i32> %shuffle, %a @@ -1792,11 +2440,24 @@ entry: } define <2 x float> @test_vfma_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) { -; CHECK-LABEL: test_vfma_lane_f32_0: -; GENERIC: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] -; EXYNOSM1: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s -; EXYNOSM3: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; GENERIC-LABEL: test_vfma_lane_f32_0: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: // kill: def $d2 killed $d2 def $q2 +; GENERIC-NEXT: fmla v0.2s, v1.2s, v2.s[0] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vfma_lane_f32_0: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: // kill: def $d2 killed $d2 def $q2 +; EXYNOSM1-NEXT: dup v2.2s, v2.s[0] +; EXYNOSM1-NEXT: fmla v0.2s, v1.2s, v2.2s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vfma_lane_f32_0: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: // kill: def $d2 killed $d2 def $q2 +; EXYNOSM3-NEXT: fmla v0.2s, v1.2s, v2.s[0] +; EXYNOSM3-NEXT: ret entry: %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) @@ -1804,11 +2465,24 @@ entry: } define <4 x float> @test_vfmaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) { -; CHECK-LABEL: test_vfmaq_lane_f32_0: -; GENERIC: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] -; EXYNOSM1: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s -; EXYNOSM3: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; GENERIC-LABEL: test_vfmaq_lane_f32_0: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: // kill: def $d2 killed $d2 def $q2 +; GENERIC-NEXT: fmla v0.4s, v1.4s, v2.s[0] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vfmaq_lane_f32_0: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: // kill: def $d2 killed $d2 def $q2 +; EXYNOSM1-NEXT: dup v2.4s, v2.s[0] +; EXYNOSM1-NEXT: fmla v0.4s, v1.4s, v2.4s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vfmaq_lane_f32_0: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: // kill: def $d2 killed $d2 def $q2 +; EXYNOSM3-NEXT: fmla v0.4s, v1.4s, v2.s[0] +; EXYNOSM3-NEXT: ret entry: %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) @@ -1816,11 +2490,21 @@ entry: } define <2 x float> @test_vfma_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) { -; CHECK-LABEL: test_vfma_laneq_f32_0: -; GENERIC: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] -; EXYNOSM1: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s -; EXYNOSM3: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; GENERIC-LABEL: test_vfma_laneq_f32_0: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: fmla v0.2s, v1.2s, v2.s[0] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vfma_laneq_f32_0: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: dup v2.2s, v2.s[0] +; EXYNOSM1-NEXT: fmla v0.2s, v1.2s, v2.2s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vfma_laneq_f32_0: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: fmla v0.2s, v1.2s, v2.s[0] +; EXYNOSM3-NEXT: ret entry: %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) @@ -1828,11 +2512,21 @@ entry: } define <4 x float> @test_vfmaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) { -; CHECK-LABEL: test_vfmaq_laneq_f32_0: -; GENERIC: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] -; EXYNOSM1: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s -; EXYNOSM3: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; GENERIC-LABEL: test_vfmaq_laneq_f32_0: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: fmla v0.4s, v1.4s, v2.s[0] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vfmaq_laneq_f32_0: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: dup v2.4s, v2.s[0] +; EXYNOSM1-NEXT: fmla v0.4s, v1.4s, v2.4s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vfmaq_laneq_f32_0: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: fmla v0.4s, v1.4s, v2.s[0] +; EXYNOSM3-NEXT: ret entry: %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) @@ -1840,11 +2534,24 @@ entry: } define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) { -; CHECK-LABEL: test_vfms_lane_f32_0: -; GENERIC: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] -; EXYNOSM1: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s -; EXYNOSM3: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; GENERIC-LABEL: test_vfms_lane_f32_0: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: // kill: def $d2 killed $d2 def $q2 +; GENERIC-NEXT: fmls v0.2s, v1.2s, v2.s[0] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vfms_lane_f32_0: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: // kill: def $d2 killed $d2 def $q2 +; EXYNOSM1-NEXT: dup v2.2s, v2.s[0] +; EXYNOSM1-NEXT: fmls v0.2s, v1.2s, v2.2s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vfms_lane_f32_0: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: // kill: def $d2 killed $d2 def $q2 +; EXYNOSM3-NEXT: fmls v0.2s, v1.2s, v2.s[0] +; EXYNOSM3-NEXT: ret entry: %sub = fsub <2 x float> , %v %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> zeroinitializer @@ -1853,11 +2560,24 @@ entry: } define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) { -; CHECK-LABEL: test_vfmsq_lane_f32_0: -; GENERIC: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] -; EXYNOSM1: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s -; EXYNOSM3: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; GENERIC-LABEL: test_vfmsq_lane_f32_0: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: // kill: def $d2 killed $d2 def $q2 +; GENERIC-NEXT: fmls v0.4s, v1.4s, v2.s[0] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vfmsq_lane_f32_0: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: // kill: def $d2 killed $d2 def $q2 +; EXYNOSM1-NEXT: dup v2.4s, v2.s[0] +; EXYNOSM1-NEXT: fmls v0.4s, v1.4s, v2.4s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vfmsq_lane_f32_0: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: // kill: def $d2 killed $d2 def $q2 +; EXYNOSM3-NEXT: fmls v0.4s, v1.4s, v2.s[0] +; EXYNOSM3-NEXT: ret entry: %sub = fsub <2 x float> , %v %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> zeroinitializer @@ -1866,11 +2586,21 @@ entry: } define <2 x float> @test_vfms_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) { -; CHECK-LABEL: test_vfms_laneq_f32_0: -; GENERIC: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] -; EXYNOSM1: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s -; EXYNOSM3: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; GENERIC-LABEL: test_vfms_laneq_f32_0: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: fmls v0.2s, v1.2s, v2.s[0] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vfms_laneq_f32_0: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: dup v2.2s, v2.s[0] +; EXYNOSM1-NEXT: fmls v0.2s, v1.2s, v2.2s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vfms_laneq_f32_0: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: fmls v0.2s, v1.2s, v2.s[0] +; EXYNOSM3-NEXT: ret entry: %sub = fsub <4 x float> , %v %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> zeroinitializer @@ -1879,11 +2609,21 @@ entry: } define <4 x float> @test_vfmsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) { -; CHECK-LABEL: test_vfmsq_laneq_f32_0: -; GENERIC: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] -; EXYNOSM1: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s -; EXYNOSM3: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; GENERIC-LABEL: test_vfmsq_laneq_f32_0: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: fmls v0.4s, v1.4s, v2.s[0] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vfmsq_laneq_f32_0: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: dup v2.4s, v2.s[0] +; EXYNOSM1-NEXT: fmls v0.4s, v1.4s, v2.4s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vfmsq_laneq_f32_0: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: fmls v0.4s, v1.4s, v2.s[0] +; EXYNOSM3-NEXT: ret entry: %sub = fsub <4 x float> , %v %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> zeroinitializer @@ -1892,11 +2632,21 @@ entry: } define <2 x double> @test_vfmaq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) { -; CHECK-LABEL: test_vfmaq_laneq_f64_0: -; GENERIC: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] -; EXYNOSM1: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d -; EXYNOSM3: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; GENERIC-LABEL: test_vfmaq_laneq_f64_0: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: fmla v0.2d, v1.2d, v2.d[0] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vfmaq_laneq_f64_0: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: dup v2.2d, v2.d[0] +; EXYNOSM1-NEXT: fmla v0.2d, v1.2d, v2.2d +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vfmaq_laneq_f64_0: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: fmla v0.2d, v1.2d, v2.d[0] +; EXYNOSM3-NEXT: ret entry: %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) @@ -1904,11 +2654,21 @@ entry: } define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) { -; CHECK-LABEL: test_vfmsq_laneq_f64_0: -; GENERIC: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] -; EXYNOSM1: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d -; EXYNOSM3: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; GENERIC-LABEL: test_vfmsq_laneq_f64_0: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: fmls v0.2d, v1.2d, v2.d[0] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vfmsq_laneq_f64_0: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: dup v2.2d, v2.d[0] +; EXYNOSM1-NEXT: fmls v0.2d, v1.2d, v2.2d +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vfmsq_laneq_f64_0: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: fmls v0.2d, v1.2d, v2.d[0] +; EXYNOSM3-NEXT: ret entry: %sub = fsub <2 x double> , %v %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> zeroinitializer @@ -1918,7 +2678,10 @@ entry: define <4 x i32> @test_vmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlal_lane_s16_0: -; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: smlal v0.4s, v1.4h, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -1928,7 +2691,10 @@ entry: define <2 x i64> @test_vmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlal_lane_s32_0: -; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: smlal v0.2d, v1.2s, v2.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -1938,7 +2704,9 @@ entry: define <4 x i32> @test_vmlal_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlal_laneq_s16_0: -; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smlal v0.4s, v1.4h, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -1948,7 +2716,9 @@ entry: define <2 x i64> @test_vmlal_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlal_laneq_s32_0: -; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smlal v0.2d, v1.2s, v2.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -1958,7 +2728,10 @@ entry: define <4 x i32> @test_vmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlal_high_lane_s16_0: -; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: smlal2 v0.4s, v1.8h, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -1969,7 +2742,10 @@ entry: define <2 x i64> @test_vmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlal_high_lane_s32_0: -; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: smlal2 v0.2d, v1.4s, v2.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -1980,7 +2756,9 @@ entry: define <4 x i32> @test_vmlal_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlal_high_laneq_s16_0: -; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smlal2 v0.4s, v1.8h, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer @@ -1991,7 +2769,9 @@ entry: define <2 x i64> @test_vmlal_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlal_high_laneq_s32_0: -; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smlal2 v0.2d, v1.4s, v2.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer @@ -2002,7 +2782,10 @@ entry: define <4 x i32> @test_vmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlsl_lane_s16_0: -; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: smlsl v0.4s, v1.4h, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -2012,7 +2795,10 @@ entry: define <2 x i64> @test_vmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlsl_lane_s32_0: -; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: smlsl v0.2d, v1.2s, v2.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -2022,7 +2808,9 @@ entry: define <4 x i32> @test_vmlsl_laneq_s16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlsl_laneq_s16_0: -; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smlsl v0.4s, v1.4h, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -2032,7 +2820,9 @@ entry: define <2 x i64> @test_vmlsl_laneq_s32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlsl_laneq_s32_0: -; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smlsl v0.2d, v1.2s, v2.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -2042,7 +2832,10 @@ entry: define <4 x i32> @test_vmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlsl_high_lane_s16_0: -; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: smlsl2 v0.4s, v1.8h, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -2053,7 +2846,10 @@ entry: define <2 x i64> @test_vmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlsl_high_lane_s32_0: -; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: smlsl2 v0.2d, v1.4s, v2.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -2064,7 +2860,9 @@ entry: define <4 x i32> @test_vmlsl_high_laneq_s16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlsl_high_laneq_s16_0: -; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smlsl2 v0.4s, v1.8h, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer @@ -2075,7 +2873,9 @@ entry: define <2 x i64> @test_vmlsl_high_laneq_s32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlsl_high_laneq_s32_0: -; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smlsl2 v0.2d, v1.4s, v2.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer @@ -2086,7 +2886,10 @@ entry: define <4 x i32> @test_vmlal_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlal_lane_u16_0: -; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: umlal v0.4s, v1.4h, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -2096,7 +2899,10 @@ entry: define <2 x i64> @test_vmlal_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlal_lane_u32_0: -; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: umlal v0.2d, v1.2s, v2.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -2106,7 +2912,9 @@ entry: define <4 x i32> @test_vmlal_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlal_laneq_u16_0: -; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: umlal v0.4s, v1.4h, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -2116,7 +2924,9 @@ entry: define <2 x i64> @test_vmlal_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlal_laneq_u32_0: -; CHECK: mlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: umlal v0.2d, v1.2s, v2.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -2126,7 +2936,10 @@ entry: define <4 x i32> @test_vmlal_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlal_high_lane_u16_0: -; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: umlal2 v0.4s, v1.8h, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -2137,7 +2950,10 @@ entry: define <2 x i64> @test_vmlal_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlal_high_lane_u32_0: -; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: umlal2 v0.2d, v1.4s, v2.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -2148,7 +2964,9 @@ entry: define <4 x i32> @test_vmlal_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlal_high_laneq_u16_0: -; CHECK: mlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: umlal2 v0.4s, v1.8h, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer @@ -2159,7 +2977,9 @@ entry: define <2 x i64> @test_vmlal_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlal_high_laneq_u32_0: -; CHECK: mlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: umlal2 v0.2d, v1.4s, v2.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer @@ -2170,7 +2990,10 @@ entry: define <4 x i32> @test_vmlsl_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlsl_lane_u16_0: -; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: umlsl v0.4s, v1.4h, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -2180,7 +3003,10 @@ entry: define <2 x i64> @test_vmlsl_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlsl_lane_u32_0: -; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: umlsl v0.2d, v1.2s, v2.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -2190,7 +3016,9 @@ entry: define <4 x i32> @test_vmlsl_laneq_u16_0(<4 x i32> %a, <4 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlsl_laneq_u16_0: -; CHECK: mlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: umlsl v0.4s, v1.4h, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -2200,7 +3028,9 @@ entry: define <2 x i64> @test_vmlsl_laneq_u32_0(<2 x i64> %a, <2 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlsl_laneq_u32_0: -; CHECK: mlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: umlsl v0.2d, v1.2s, v2.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -2210,7 +3040,10 @@ entry: define <4 x i32> @test_vmlsl_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlsl_high_lane_u16_0: -; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: umlsl2 v0.4s, v1.8h, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -2221,7 +3054,10 @@ entry: define <2 x i64> @test_vmlsl_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vmlsl_high_lane_u32_0: -; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: umlsl2 v0.2d, v1.4s, v2.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -2232,7 +3068,9 @@ entry: define <4 x i32> @test_vmlsl_high_laneq_u16_0(<4 x i32> %a, <8 x i16> %b, <8 x i16> %v) { ; CHECK-LABEL: test_vmlsl_high_laneq_u16_0: -; CHECK: mlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: umlsl2 v0.4s, v1.8h, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer @@ -2243,7 +3081,9 @@ entry: define <2 x i64> @test_vmlsl_high_laneq_u32_0(<2 x i64> %a, <4 x i32> %b, <4 x i32> %v) { ; CHECK-LABEL: test_vmlsl_high_laneq_u32_0: -; CHECK: mlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: umlsl2 v0.2d, v1.4s, v2.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer @@ -2254,7 +3094,10 @@ entry: define <4 x i32> @test_vmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmull_lane_s16_0: -; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: smull v0.4s, v0.4h, v1.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -2263,7 +3106,10 @@ entry: define <2 x i64> @test_vmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmull_lane_s32_0: -; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: smull v0.2d, v0.2s, v1.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -2272,7 +3118,10 @@ entry: define <4 x i32> @test_vmull_lane_u16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmull_lane_u16_0: -; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: umull v0.4s, v0.4h, v1.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -2281,7 +3130,10 @@ entry: define <2 x i64> @test_vmull_lane_u32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmull_lane_u32_0: -; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: umull v0.2d, v0.2s, v1.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -2290,7 +3142,10 @@ entry: define <4 x i32> @test_vmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmull_high_lane_s16_0: -; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -2300,7 +3155,10 @@ entry: define <2 x i64> @test_vmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmull_high_lane_s32_0: -; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: smull2 v0.2d, v0.4s, v1.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -2310,7 +3168,10 @@ entry: define <4 x i32> @test_vmull_high_lane_u16_0(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vmull_high_lane_u16_0: -; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: umull2 v0.4s, v0.8h, v1.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -2320,7 +3181,10 @@ entry: define <2 x i64> @test_vmull_high_lane_u32_0(<4 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vmull_high_lane_u32_0: -; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: umull2 v0.2d, v0.4s, v1.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -2330,7 +3194,9 @@ entry: define <4 x i32> @test_vmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmull_laneq_s16_0: -; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smull v0.4s, v0.4h, v1.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -2339,7 +3205,9 @@ entry: define <2 x i64> @test_vmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmull_laneq_s32_0: -; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smull v0.2d, v0.2s, v1.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -2348,7 +3216,9 @@ entry: define <4 x i32> @test_vmull_laneq_u16_0(<4 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmull_laneq_u16_0: -; CHECK: mull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: umull v0.4s, v0.4h, v1.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -2357,7 +3227,9 @@ entry: define <2 x i64> @test_vmull_laneq_u32_0(<2 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmull_laneq_u32_0: -; CHECK: mull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: umull v0.2d, v0.2s, v1.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -2366,7 +3238,9 @@ entry: define <4 x i32> @test_vmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmull_high_laneq_s16_0: -; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer @@ -2376,7 +3250,9 @@ entry: define <2 x i64> @test_vmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmull_high_laneq_s32_0: -; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smull2 v0.2d, v0.4s, v1.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer @@ -2386,7 +3262,9 @@ entry: define <4 x i32> @test_vmull_high_laneq_u16_0(<8 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vmull_high_laneq_u16_0: -; CHECK: mull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: umull2 v0.4s, v0.8h, v1.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer @@ -2396,7 +3274,9 @@ entry: define <2 x i64> @test_vmull_high_laneq_u32_0(<4 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vmull_high_laneq_u32_0: -; CHECK: mull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: umull2 v0.2d, v0.4s, v1.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer @@ -2406,7 +3286,10 @@ entry: define <4 x i32> @test_vqdmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmlal_lane_s16_0: -; CHECK: qdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: sqdmlal v0.4s, v1.4h, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vqdmlal2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -2416,7 +3299,10 @@ entry: define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmlal_lane_s32_0: -; CHECK: qdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: sqdmlal v0.2d, v1.2s, v2.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vqdmlal2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -2426,7 +3312,10 @@ entry: define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmlal_high_lane_s16_0: -; CHECK: qdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: sqdmlal2 v0.4s, v1.8h, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -2437,7 +3326,10 @@ entry: define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmlal_high_lane_s32_0: -; CHECK: qdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: sqdmlal2 v0.2d, v1.4s, v2.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -2448,7 +3340,10 @@ entry: define <4 x i32> @test_vqdmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmlsl_lane_s16_0: -; CHECK: qdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: sqdmlsl v0.4s, v1.4h, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vqdmlsl2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %shuffle) @@ -2458,7 +3353,10 @@ entry: define <2 x i64> @test_vqdmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmlsl_lane_s32_0: -; CHECK: qdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: sqdmlsl v0.2d, v1.2s, v2.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vqdmlsl2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %shuffle) @@ -2468,7 +3366,10 @@ entry: define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmlsl_high_lane_s16_0: -; CHECK: qdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: sqdmlsl2 v0.4s, v1.8h, v2.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -2479,7 +3380,10 @@ entry: define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmlsl_high_lane_s32_0: -; CHECK: qdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: sqdmlsl2 v0.2d, v1.4s, v2.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -2490,7 +3394,10 @@ entry: define <4 x i32> @test_vqdmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmull_lane_s16_0: -; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -2499,7 +3406,10 @@ entry: define <2 x i64> @test_vqdmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmull_lane_s32_0: -; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqdmull v0.2d, v0.2s, v1.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -2508,7 +3418,9 @@ entry: define <4 x i32> @test_vqdmull_laneq_s16_0(<4 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vqdmull_laneq_s16_0: -; CHECK: qdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer %vqdmull2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) @@ -2517,7 +3429,9 @@ entry: define <2 x i64> @test_vqdmull_laneq_s32_0(<2 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vqdmull_laneq_s32_0: -; CHECK: qdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sqdmull v0.2d, v0.2s, v1.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer %vqdmull2.i = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) @@ -2526,7 +3440,10 @@ entry: define <4 x i32> @test_vqdmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmull_high_lane_s16_0: -; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqdmull2 v0.4s, v0.8h, v1.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer @@ -2536,7 +3453,10 @@ entry: define <2 x i64> @test_vqdmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmull_high_lane_s32_0: -; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqdmull2 v0.2d, v0.4s, v1.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer @@ -2546,7 +3466,9 @@ entry: define <4 x i32> @test_vqdmull_high_laneq_s16_0(<8 x i16> %a, <8 x i16> %v) { ; CHECK-LABEL: test_vqdmull_high_laneq_s16_0: -; CHECK: qdmull2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sqdmull2 v0.4s, v0.8h, v1.h[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> zeroinitializer @@ -2556,7 +3478,9 @@ entry: define <2 x i64> @test_vqdmull_high_laneq_s32_0(<4 x i32> %a, <4 x i32> %v) { ; CHECK-LABEL: test_vqdmull_high_laneq_s32_0: -; CHECK: qdmull2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sqdmull2 v0.2d, v0.4s, v1.s[0] +; CHECK-NEXT: ret entry: %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %shuffle = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> zeroinitializer @@ -2566,7 +3490,10 @@ entry: define <4 x i16> @test_vqdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmulh_lane_s16_0: -; CHECK: qdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqdmulh v0.4h, v0.4h, v1.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vqdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) @@ -2575,7 +3502,10 @@ entry: define <8 x i16> @test_vqdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vqdmulhq_lane_s16_0: -; CHECK: qdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqdmulh v0.8h, v0.8h, v1.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer %vqdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) @@ -2584,7 +3514,10 @@ entry: define <2 x i32> @test_vqdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmulh_lane_s32_0: -; CHECK: qdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqdmulh v0.2s, v0.2s, v1.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vqdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) @@ -2593,7 +3526,10 @@ entry: define <4 x i32> @test_vqdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vqdmulhq_lane_s32_0: -; CHECK: qdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqdmulh v0.4s, v0.4s, v1.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer %vqdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) @@ -2602,7 +3538,10 @@ entry: define <4 x i16> @test_vqrdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vqrdmulh_lane_s16_0: -; CHECK: qrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqrdmulh v0.4h, v0.4h, v1.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> zeroinitializer %vqrdmulh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) @@ -2611,7 +3550,10 @@ entry: define <8 x i16> @test_vqrdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) { ; CHECK-LABEL: test_vqrdmulhq_lane_s16_0: -; CHECK: qrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqrdmulh v0.8h, v0.8h, v1.h[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> zeroinitializer %vqrdmulh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) @@ -2620,7 +3562,10 @@ entry: define <2 x i32> @test_vqrdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vqrdmulh_lane_s32_0: -; CHECK: qrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqrdmulh v0.2s, v0.2s, v1.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> zeroinitializer %vqrdmulh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) @@ -2629,7 +3574,10 @@ entry: define <4 x i32> @test_vqrdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) { ; CHECK-LABEL: test_vqrdmulhq_lane_s32_0: -; CHECK: qrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: sqrdmulh v0.4s, v0.4s, v1.s[0] +; CHECK-NEXT: ret entry: %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> zeroinitializer %vqrdmulh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) @@ -2637,11 +3585,24 @@ entry: } define <2 x float> @test_vmul_lane_f32_0(<2 x float> %a, <2 x float> %v) { -; CHECK-LABEL: test_vmul_lane_f32_0: -; GENERIC: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] -; EXYNOSM1: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s -; EXYNOSM3: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; GENERIC-LABEL: test_vmul_lane_f32_0: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: // kill: def $d1 killed $d1 def $q1 +; GENERIC-NEXT: fmul v0.2s, v0.2s, v1.s[0] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vmul_lane_f32_0: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: // kill: def $d1 killed $d1 def $q1 +; EXYNOSM1-NEXT: dup v1.2s, v1.s[0] +; EXYNOSM1-NEXT: fmul v0.2s, v0.2s, v1.2s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vmul_lane_f32_0: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: // kill: def $d1 killed $d1 def $q1 +; EXYNOSM3-NEXT: fmul v0.2s, v0.2s, v1.s[0] +; EXYNOSM3-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer %mul = fmul <2 x float> %shuffle, %a @@ -2649,11 +3610,24 @@ entry: } define <4 x float> @test_vmulq_lane_f32_0(<4 x float> %a, <2 x float> %v) { -; CHECK-LABEL: test_vmulq_lane_f32_0: -; GENERIC: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] -; EXYNOSM1: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s -; EXYNOSM3: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; GENERIC-LABEL: test_vmulq_lane_f32_0: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: // kill: def $d1 killed $d1 def $q1 +; GENERIC-NEXT: fmul v0.4s, v0.4s, v1.s[0] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vmulq_lane_f32_0: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: // kill: def $d1 killed $d1 def $q1 +; EXYNOSM1-NEXT: dup v1.4s, v1.s[0] +; EXYNOSM1-NEXT: fmul v0.4s, v0.4s, v1.4s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vmulq_lane_f32_0: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: // kill: def $d1 killed $d1 def $q1 +; EXYNOSM3-NEXT: fmul v0.4s, v0.4s, v1.s[0] +; EXYNOSM3-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer %mul = fmul <4 x float> %shuffle, %a @@ -2661,11 +3635,21 @@ entry: } define <2 x float> @test_vmul_laneq_f32_0(<2 x float> %a, <4 x float> %v) { -; CHECK-LABEL: test_vmul_laneq_f32_0: -; GENERIC: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] -; EXYNOSM1: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s -; EXYNOSM3: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; GENERIC-LABEL: test_vmul_laneq_f32_0: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: fmul v0.2s, v0.2s, v1.s[0] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vmul_laneq_f32_0: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: dup v1.2s, v1.s[0] +; EXYNOSM1-NEXT: fmul v0.2s, v0.2s, v1.2s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vmul_laneq_f32_0: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: fmul v0.2s, v0.2s, v1.s[0] +; EXYNOSM3-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer %mul = fmul <2 x float> %shuffle, %a @@ -2674,7 +3658,9 @@ entry: define <1 x double> @test_vmul_laneq_f64_0(<1 x double> %a, <2 x double> %v) { ; CHECK-LABEL: test_vmul_laneq_f64_0: -; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmul d0, d0, v1.d[0] +; CHECK-NEXT: ret entry: %0 = bitcast <1 x double> %a to <8 x i8> %1 = bitcast <8 x i8> %0 to double @@ -2685,11 +3671,21 @@ entry: } define <4 x float> @test_vmulq_laneq_f32_0(<4 x float> %a, <4 x float> %v) { -; CHECK-LABEL: test_vmulq_laneq_f32_0: -; GENERIC: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] -; EXYNOSM1: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s -; EXYNOSM3: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; GENERIC-LABEL: test_vmulq_laneq_f32_0: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: fmul v0.4s, v0.4s, v1.s[0] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vmulq_laneq_f32_0: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: dup v1.4s, v1.s[0] +; EXYNOSM1-NEXT: fmul v0.4s, v0.4s, v1.4s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vmulq_laneq_f32_0: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: fmul v0.4s, v0.4s, v1.s[0] +; EXYNOSM3-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer %mul = fmul <4 x float> %shuffle, %a @@ -2697,11 +3693,21 @@ entry: } define <2 x double> @test_vmulq_laneq_f64_0(<2 x double> %a, <2 x double> %v) { -; CHECK-LABEL: test_vmulq_laneq_f64_0: -; GENERIC: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] -; EXYNOSM1: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d -; EXYNOSM3: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; GENERIC-LABEL: test_vmulq_laneq_f64_0: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: fmul v0.2d, v0.2d, v1.d[0] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vmulq_laneq_f64_0: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: dup v1.2d, v1.d[0] +; EXYNOSM1-NEXT: fmul v0.2d, v0.2d, v1.2d +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vmulq_laneq_f64_0: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: fmul v0.2d, v0.2d, v1.d[0] +; EXYNOSM3-NEXT: ret entry: %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer %mul = fmul <2 x double> %shuffle, %a @@ -2709,11 +3715,24 @@ entry: } define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) { -; CHECK-LABEL: test_vmulx_lane_f32_0: -; GENERIC: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] -; EXYNOSM1: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s -; EXYNOSM3: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; GENERIC-LABEL: test_vmulx_lane_f32_0: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: // kill: def $d1 killed $d1 def $q1 +; GENERIC-NEXT: fmulx v0.2s, v0.2s, v1.s[0] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vmulx_lane_f32_0: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: // kill: def $d1 killed $d1 def $q1 +; EXYNOSM1-NEXT: dup v1.2s, v1.s[0] +; EXYNOSM1-NEXT: fmulx v0.2s, v0.2s, v1.2s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vmulx_lane_f32_0: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: // kill: def $d1 killed $d1 def $q1 +; EXYNOSM3-NEXT: fmulx v0.2s, v0.2s, v1.s[0] +; EXYNOSM3-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) @@ -2721,11 +3740,24 @@ entry: } define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) { -; CHECK-LABEL: test_vmulxq_lane_f32_0: -; GENERIC: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] -; EXYNOSM1: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s -; EXYNOSM3: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; GENERIC-LABEL: test_vmulxq_lane_f32_0: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: // kill: def $d1 killed $d1 def $q1 +; GENERIC-NEXT: fmulx v0.4s, v0.4s, v1.s[0] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vmulxq_lane_f32_0: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: // kill: def $d1 killed $d1 def $q1 +; EXYNOSM1-NEXT: dup v1.4s, v1.s[0] +; EXYNOSM1-NEXT: fmulx v0.4s, v0.4s, v1.4s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vmulxq_lane_f32_0: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: // kill: def $d1 killed $d1 def $q1 +; EXYNOSM3-NEXT: fmulx v0.4s, v0.4s, v1.s[0] +; EXYNOSM3-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) @@ -2733,11 +3765,24 @@ entry: } define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) { -; CHECK-LABEL: test_vmulxq_lane_f64_0: -; GENERIC: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] -; EXYNOSM1: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d -; EXYNOSM3: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; GENERIC-LABEL: test_vmulxq_lane_f64_0: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: // kill: def $d1 killed $d1 def $q1 +; GENERIC-NEXT: fmulx v0.2d, v0.2d, v1.d[0] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vmulxq_lane_f64_0: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: // kill: def $d1 killed $d1 def $q1 +; EXYNOSM1-NEXT: dup v1.2d, v1.d[0] +; EXYNOSM1-NEXT: fmulx v0.2d, v0.2d, v1.2d +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vmulxq_lane_f64_0: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: // kill: def $d1 killed $d1 def $q1 +; EXYNOSM3-NEXT: fmulx v0.2d, v0.2d, v1.d[0] +; EXYNOSM3-NEXT: ret entry: %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) @@ -2745,11 +3790,21 @@ entry: } define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) { -; CHECK-LABEL: test_vmulx_laneq_f32_0: -; GENERIC: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -; EXYNOSM1: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[0] -; EXYNOSM1: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s -; EXYNOSM3: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +; GENERIC-LABEL: test_vmulx_laneq_f32_0: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: fmulx v0.2s, v0.2s, v1.s[0] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vmulx_laneq_f32_0: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: dup v1.2s, v1.s[0] +; EXYNOSM1-NEXT: fmulx v0.2s, v0.2s, v1.2s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vmulx_laneq_f32_0: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: fmulx v0.2s, v0.2s, v1.s[0] +; EXYNOSM3-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) @@ -2757,11 +3812,21 @@ entry: } define <4 x float> @test_vmulxq_laneq_f32_0(<4 x float> %a, <4 x float> %v) { -; CHECK-LABEL: test_vmulxq_laneq_f32_0: -; GENERIC: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[0] -; EXYNOSM1: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s -; EXYNOSM3: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +; GENERIC-LABEL: test_vmulxq_laneq_f32_0: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: fmulx v0.4s, v0.4s, v1.s[0] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vmulxq_laneq_f32_0: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: dup v1.4s, v1.s[0] +; EXYNOSM1-NEXT: fmulx v0.4s, v0.4s, v1.4s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vmulxq_laneq_f32_0: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: fmulx v0.4s, v0.4s, v1.s[0] +; EXYNOSM3-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) @@ -2769,11 +3834,21 @@ entry: } define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) { -; CHECK-LABEL: test_vmulxq_laneq_f64_0: -; GENERIC: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -; EXYNOSM1: dup [[V:v[0-9]+]].2d, {{v[0-9]+}}.d[0] -; EXYNOSM1: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[V]].2d -; EXYNOSM3: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] +; GENERIC-LABEL: test_vmulxq_laneq_f64_0: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: fmulx v0.2d, v0.2d, v1.d[0] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vmulxq_laneq_f64_0: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: dup v1.2d, v1.d[0] +; EXYNOSM1-NEXT: fmulx v0.2d, v0.2d, v1.2d +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vmulxq_laneq_f64_0: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: fmulx v0.2d, v0.2d, v1.d[0] +; EXYNOSM3-NEXT: ret entry: %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) @@ -2781,14 +3856,24 @@ entry: } define <4 x float> @optimize_dup(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %v) { -; CHECK-LABEL: optimize_dup: -; GENERIC: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; GENERIC: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[3] -; EXYNOSM1: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s -; EXYNOSM1: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s -; EXYNOSM3: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; EXYNOSM3: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; GENERIC-LABEL: optimize_dup: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: fmla v0.4s, v1.4s, v3.s[3] +; GENERIC-NEXT: fmls v0.4s, v2.4s, v3.s[3] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: optimize_dup: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: dup v3.4s, v3.s[3] +; EXYNOSM1-NEXT: fmla v0.4s, v1.4s, v3.4s +; EXYNOSM1-NEXT: fmls v0.4s, v2.4s, v3.4s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: optimize_dup: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: fmla v0.4s, v1.4s, v3.s[3] +; EXYNOSM3-NEXT: fmls v0.4s, v2.4s, v3.s[3] +; EXYNOSM3-NEXT: ret entry: %lane1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane1, <4 x float> %b, <4 x float> %a) @@ -2799,15 +3884,25 @@ entry: } define <4 x float> @no_optimize_dup(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %v) { -; CHECK-LABEL: no_optimize_dup: -; GENERIC: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; GENERIC: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -; EXYNOSM1: dup [[V:v[0-9]+]].4s, {{v[0-9]+}}.s[3] -; EXYNOSM1: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[V]].4s -; EXYNOSM1: dup [[W:v[0-9]+]].4s, {{v[0-9]+}}.s[1] -; EXYNOSM1: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[W]].4s -; EXYNOSM3: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] -; EXYNOSM3: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; GENERIC-LABEL: no_optimize_dup: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: fmla v0.4s, v1.4s, v3.s[3] +; GENERIC-NEXT: fmls v0.4s, v2.4s, v3.s[1] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: no_optimize_dup: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: dup v4.4s, v3.s[3] +; EXYNOSM1-NEXT: fmla v0.4s, v1.4s, v4.4s +; EXYNOSM1-NEXT: dup v1.4s, v3.s[1] +; EXYNOSM1-NEXT: fmls v0.4s, v2.4s, v1.4s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: no_optimize_dup: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: fmla v0.4s, v1.4s, v3.s[3] +; EXYNOSM3-NEXT: fmls v0.4s, v2.4s, v3.s[1] +; EXYNOSM3-NEXT: ret entry: %lane1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane1, <4 x float> %b, <4 x float> %a) @@ -2818,8 +3913,24 @@ entry: } define <2 x float> @test_vfma_lane_simdinstr_opt_pass_caching_a57(<2 x float> %a, <2 x float> %b, <2 x float> %v) "target-cpu"="cortex-a57" { -; CHECK-LABEL: test_vfma_lane_simdinstr_opt_pass_caching_a57: -; GENERIC: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; GENERIC-LABEL: test_vfma_lane_simdinstr_opt_pass_caching_a57: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: // kill: def $d2 killed $d2 def $q2 +; GENERIC-NEXT: fmla v0.2s, v1.2s, v2.s[1] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vfma_lane_simdinstr_opt_pass_caching_a57: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: // kill: def $d2 killed $d2 def $q2 +; EXYNOSM1-NEXT: dup v2.2s, v2.s[1] +; EXYNOSM1-NEXT: fmla v0.2s, v1.2s, v2.2s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vfma_lane_simdinstr_opt_pass_caching_a57: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: // kill: def $d2 killed $d2 def $q2 +; EXYNOSM3-NEXT: fmla v0.2s, v1.2s, v2.s[1] +; EXYNOSM3-NEXT: ret entry: %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) @@ -2827,9 +3938,25 @@ entry: } define <2 x float> @test_vfma_lane_simdinstr_opt_pass_caching_m1(<2 x float> %a, <2 x float> %b, <2 x float> %v) "target-cpu"="exynos-m1" { -; CHECK-LABEL: test_vfma_lane_simdinstr_opt_pass_caching_m1: -; GENERIC: dup [[V:v[0-9]+]].2s, {{v[0-9]+}}.s[1] -; GENERIC: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[V]].2s +; GENERIC-LABEL: test_vfma_lane_simdinstr_opt_pass_caching_m1: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: // kill: def $d2 killed $d2 def $q2 +; GENERIC-NEXT: dup v2.2s, v2.s[1] +; GENERIC-NEXT: fmla v0.2s, v1.2s, v2.2s +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vfma_lane_simdinstr_opt_pass_caching_m1: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: // kill: def $d2 killed $d2 def $q2 +; EXYNOSM1-NEXT: dup v2.2s, v2.s[1] +; EXYNOSM1-NEXT: fmla v0.2s, v1.2s, v2.2s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vfma_lane_simdinstr_opt_pass_caching_m1: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: // kill: def $d2 killed $d2 def $q2 +; EXYNOSM3-NEXT: fmla v0.2s, v1.2s, v2.s[1] +; EXYNOSM3-NEXT: ret entry: %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) @@ -2837,8 +3964,24 @@ entry: } define <2 x float> @test_vfma_lane_simdinstr_opt_pass_caching_m3(<2 x float> %a, <2 x float> %b, <2 x float> %v) "target-cpu"="exynos-m3" { -; CHECK-LABEL: test_vfma_lane_simdinstr_opt_pass_caching_m3: -; GENERIC: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; GENERIC-LABEL: test_vfma_lane_simdinstr_opt_pass_caching_m3: +; GENERIC: // %bb.0: // %entry +; GENERIC-NEXT: // kill: def $d2 killed $d2 def $q2 +; GENERIC-NEXT: fmla v0.2s, v1.2s, v2.s[1] +; GENERIC-NEXT: ret +; +; EXYNOSM1-LABEL: test_vfma_lane_simdinstr_opt_pass_caching_m3: +; EXYNOSM1: // %bb.0: // %entry +; EXYNOSM1-NEXT: // kill: def $d2 killed $d2 def $q2 +; EXYNOSM1-NEXT: dup v2.2s, v2.s[1] +; EXYNOSM1-NEXT: fmla v0.2s, v1.2s, v2.2s +; EXYNOSM1-NEXT: ret +; +; EXYNOSM3-LABEL: test_vfma_lane_simdinstr_opt_pass_caching_m3: +; EXYNOSM3: // %bb.0: // %entry +; EXYNOSM3-NEXT: // kill: def $d2 killed $d2 def $q2 +; EXYNOSM3-NEXT: fmla v0.2s, v1.2s, v2.s[1] +; EXYNOSM3-NEXT: ret entry: %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) -- 2.11.0