From ca6fd009ad7905f39ce66f16164c652337e14c61 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Thu, 31 Mar 2011 22:14:03 +0000 Subject: [PATCH] Fix ARM tests to be register allocator independent. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@128680 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/ARM/2009-10-27-double-align.ll | 4 +++- test/CodeGen/ARM/arguments.ll | 2 +- test/CodeGen/ARM/arm-and-tst-peephole.ll | 6 +++--- test/CodeGen/ARM/arm-returnaddr.ll | 9 ++++++--- test/CodeGen/ARM/fnmscs.ll | 17 +++++++++-------- test/CodeGen/ARM/indirectbr.ll | 8 ++++---- test/CodeGen/ARM/ldrd.ll | 12 +++++++----- test/CodeGen/ARM/memcpy-inline.ll | 14 +++++++++----- test/CodeGen/ARM/peephole-bitcast.ll | 5 ++++- test/CodeGen/ARM/reg_sequence.ll | 22 ++++++++++++---------- test/CodeGen/ARM/str_pre-2.ll | 5 ++++- test/CodeGen/ARM/thumb1-varalloc.ll | 6 ++++-- test/CodeGen/ARM/vcgt.ll | 7 ++++--- test/CodeGen/ARM/vfp.ll | 5 +++-- test/CodeGen/ARM/vld1.ll | 7 ++++--- test/CodeGen/ARM/vld3.ll | 7 ++++--- test/CodeGen/ARM/vldlane.ll | 3 ++- 17 files changed, 83 insertions(+), 56 deletions(-) diff --git a/test/CodeGen/ARM/2009-10-27-double-align.ll b/test/CodeGen/ARM/2009-10-27-double-align.ll index c31b116c55b..10ca722f9e9 100644 --- a/test/CodeGen/ARM/2009-10-27-double-align.ll +++ b/test/CodeGen/ARM/2009-10-27-double-align.ll @@ -1,4 +1,6 @@ -; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s +; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=linearscan | FileCheck %s + +; This test depends on linear scan's reserved register coalescing. @.str = private constant [1 x i8] zeroinitializer, align 1 diff --git a/test/CodeGen/ARM/arguments.ll b/test/CodeGen/ARM/arguments.ll index c7fcb9755d9..a8b42e63b71 100644 --- a/test/CodeGen/ARM/arguments.ll +++ b/test/CodeGen/ARM/arguments.ll @@ -14,7 +14,7 @@ define i32 @f1(i32 %a, i64 %b) { define i32 @f2() nounwind optsize { ; ELF: f2: ; ELF: mov [[REGISTER:(r[0-9]+)]], #128 -; ELF: str [[REGISTER]], [sp] +; ELF: str [[REGISTER]], [ ; DARWIN: f2: ; DARWIN: mov r3, #128 entry: diff --git a/test/CodeGen/ARM/arm-and-tst-peephole.ll b/test/CodeGen/ARM/arm-and-tst-peephole.ll index 50c638b7393..444dce7bf67 100644 --- a/test/CodeGen/ARM/arm-and-tst-peephole.ll +++ b/test/CodeGen/ARM/arm-and-tst-peephole.ll @@ -26,9 +26,9 @@ tailrecurse: ; preds = %sw.bb, %entry ; ARM: ands r12, r12, #3 ; ARM-NEXT: beq -; THUMB: movs r5, #3 -; THUMB-NEXT: ands r5, r4 -; THUMB-NEXT: cmp r5, #0 +; THUMB: movs r[[R0:[0-9]+]], #3 +; THUMB-NEXT: ands r[[R0]], r +; THUMB-NEXT: cmp r[[R0]], #0 ; THUMB-NEXT: beq ; T2: ands r12, r12, #3 diff --git a/test/CodeGen/ARM/arm-returnaddr.ll b/test/CodeGen/ARM/arm-returnaddr.ll index 382a1833460..ec2ebc9aaeb 100644 --- a/test/CodeGen/ARM/arm-returnaddr.ll +++ b/test/CodeGen/ARM/arm-returnaddr.ll @@ -1,8 +1,11 @@ -; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=linearscan | FileCheck %s +; RUN: llc < %s -mtriple=thumbv6-apple-darwin -regalloc=linearscan | FileCheck %s ; rdar://8015977 ; rdar://8020118 +; This test needs the reserved register r7 to be coalesced into the ldr. +; So far, only linear scan can do that. + define i8* @rt0(i32 %x) nounwind readnone { entry: ; CHECK: rt0: @@ -16,7 +19,7 @@ define i8* @rt2() nounwind readnone { entry: ; CHECK: rt2: ; CHECK: {r7, lr} -; CHECK: ldr r0, [r7] +; CHECK: ldr r[[R0:[0-9]+]], [r7] ; CHECK: ldr r0, [r0] ; CHECK: ldr r0, [r0, #4] %0 = tail call i8* @llvm.returnaddress(i32 2) diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll index 76c806761f7..9facf20fee7 100644 --- a/test/CodeGen/ARM/fnmscs.ll +++ b/test/CodeGen/ARM/fnmscs.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8 +; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=A8 define float @t1(float %acc, float %a, float %b) nounwind { entry: @@ -11,8 +12,8 @@ entry: ; NEON: vnmla.f32 ; A8: t1: -; A8: vnmul.f32 s0, s{{[01]}}, s{{[01]}} -; A8: vsub.f32 d0, d0, d1 +; A8: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}} +; A8: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}} %0 = fmul float %a, %b %1 = fsub float -0.0, %0 %2 = fsub float %1, %acc @@ -28,8 +29,8 @@ entry: ; NEON: vnmla.f32 ; A8: t2: -; A8: vnmul.f32 s0, s{{[01]}}, s{{[01]}} -; A8: vsub.f32 d0, d0, d1 +; A8: vnmul.f32 s{{[0123]}}, s{{[0123]}}, s{{[0123]}} +; A8: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}} %0 = fmul float %a, %b %1 = fmul float -1.0, %0 %2 = fsub float %1, %acc @@ -45,8 +46,8 @@ entry: ; NEON: vnmla.f64 ; A8: t3: -; A8: vnmul.f64 d16, d1{{[67]}}, d1{{[67]}} -; A8: vsub.f64 d16, d16, d17 +; A8: vnmul.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}} +; A8: vsub.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}} %0 = fmul double %a, %b %1 = fsub double -0.0, %0 %2 = fsub double %1, %acc @@ -62,8 +63,8 @@ entry: ; NEON: vnmla.f64 ; A8: t4: -; A8: vnmul.f64 d16, d1{{[67]}}, d1{{[67]}} -; A8: vsub.f64 d16, d16, d17 +; A8: vnmul.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}} +; A8: vsub.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}} %0 = fmul double %a, %b %1 = fmul double -1.0, %0 %2 = fsub double %1, %acc diff --git a/test/CodeGen/ARM/indirectbr.ll b/test/CodeGen/ARM/indirectbr.ll index 0aac9d16ec6..19dad3adfe6 100644 --- a/test/CodeGen/ARM/indirectbr.ll +++ b/test/CodeGen/ARM/indirectbr.ll @@ -14,15 +14,15 @@ entry: %1 = icmp eq i8* %0, null ; [#uses=1] ; indirect branch gets duplicated here ; ARM: bx -; THUMB: mov pc, r1 -; THUMB2: mov pc, r2 +; THUMB: mov pc, +; THUMB2: mov pc, br i1 %1, label %bb3, label %bb2 bb2: ; preds = %entry, %bb3 %gotovar.4.0 = phi i8* [ %gotovar.4.0.pre, %bb3 ], [ %0, %entry ] ; [#uses=1] ; ARM: bx -; THUMB: mov pc, r1 -; THUMB2: mov pc, r2 +; THUMB: mov pc, +; THUMB2: mov pc, indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1] bb3: ; preds = %entry diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll index 895562a1d31..187006a3be9 100644 --- a/test/CodeGen/ARM/ldrd.ll +++ b/test/CodeGen/ARM/ldrd.ll @@ -1,19 +1,21 @@ -; RUN: llc < %s -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=V6 +; RUN: llc < %s -mtriple=armv6-apple-darwin -regalloc=linearscan | FileCheck %s -check-prefix=V6 ; RUN: llc < %s -mtriple=armv5-apple-darwin | FileCheck %s -check-prefix=V5 ; RUN: llc < %s -mtriple=armv6-eabi | FileCheck %s -check-prefix=EABI ; rdar://r6949835 +; Magic ARM pair hints works best with linearscan. + @b = external global i64* define i64 @t(i64 %a) nounwind readonly { entry: ;V6: ldrd r2, [r2] -;V5: ldr r3, [r2] -;V5: ldr r2, [r2, #4] +;V5: ldr r{{[0-9]+}}, [r2] +;V5: ldr r{{[0-9]+}}, [r2, #4] -;EABI: ldr r3, [r2] -;EABI: ldr r2, [r2, #4] +;EABI: ldr r{{[0-9]+}}, [r2] +;EABI: ldr r{{[0-9]+}}, [r2, #4] %0 = load i64** @b, align 4 %1 = load i64* %0, align 4 diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll index ed20c32dc0d..e8a2a3b7d5b 100644 --- a/test/CodeGen/ARM/memcpy-inline.ll +++ b/test/CodeGen/ARM/memcpy-inline.ll @@ -1,9 +1,13 @@ -; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldmia -; RUN: llc < %s -mtriple=arm-apple-darwin | grep stmia -; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldrb -; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldrh +; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=linearscan -disable-post-ra | FileCheck %s +; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=basic -disable-post-ra | FileCheck %s - %struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 } +; The ARM magic hinting works best with linear scan. +; CHECK: ldmia +; CHECK: stmia +; CHECK: ldrh +; CHECK: ldrb + +%struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 } @src = external global %struct.x @dst = external global %struct.x diff --git a/test/CodeGen/ARM/peephole-bitcast.ll b/test/CodeGen/ARM/peephole-bitcast.ll index 8d95d75b1d0..e670a5be3bc 100644 --- a/test/CodeGen/ARM/peephole-bitcast.ll +++ b/test/CodeGen/ARM/peephole-bitcast.ll @@ -1,8 +1,11 @@ -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s +; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=linearscan | FileCheck %s ; vmov s0, r0 + vmov r0, s0 should have been optimized away. ; rdar://9104514 +; Peephole leaves a dead vmovsr instruction behind, and depends on linear scan +; to remove it. + define void @t(float %x) nounwind ssp { entry: ; CHECK: t: diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll index 53214fd4c30..d350937c683 100644 --- a/test/CodeGen/ARM/reg_sequence.ll +++ b/test/CodeGen/ARM/reg_sequence.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s +; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=basic | FileCheck %s ; Implementing vld / vst as REG_SEQUENCE eliminates the extra vmov's. %struct.int16x8_t = type { <8 x i16> } @@ -123,9 +124,9 @@ return1: return2: ; CHECK: %return2 ; CHECK: vadd.i32 -; CHECK: vmov q9, q11 +; CHECK: vmov {{q[0-9]+}}, {{q[0-9]+}} ; CHECK-NOT: vmov -; CHECK: vst2.32 {d16, d17, d18, d19} +; CHECK: vst2.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}} %tmp100 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 ; <<4 x i32>> [#uses=1] %tmp101 = extractvalue %struct.__neon_int32x4x2_t %tmp5, 1 ; <<4 x i32>> [#uses=1] %tmp102 = add <4 x i32> %tmp100, %tmp101 ; <<4 x i32>> [#uses=1] @@ -137,9 +138,10 @@ return2: define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind { ; CHECK: t5: ; CHECK: vldmia -; CHECK: vmov q9, q8 +; How can FileCheck match Q and D registers? We need a lisp interpreter. +; CHECK: vmov {{q[0-9]+}}, {{q[0-9]+}} ; CHECK-NOT: vmov -; CHECK: vld2.16 {d16[1], d18[1]}, [r0] +; CHECK: vld2.16 {d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0] ; CHECK-NOT: vmov ; CHECK: vadd.i16 %tmp0 = bitcast i16* %A to i8* ; [#uses=1] @@ -154,8 +156,8 @@ define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind { define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind { ; CHECK: t6: ; CHECK: vldr.64 -; CHECK: vmov d17, d16 -; CHECK-NEXT: vld2.8 {d16[1], d17[1]} +; CHECK: vmov d[[D0:[0-9]+]], d[[D1:[0-9]+]] +; CHECK-NEXT: vld2.8 {d[[D1]][1], d[[D0]][1]} %tmp1 = load <8 x i8>* %B ; <<8 x i8>> [#uses=2] %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ; <%struct.__neon_int8x8x2_t> [#uses=2] %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 ; <<8 x i8>> [#uses=1] @@ -169,10 +171,10 @@ entry: ; CHECK: t7: ; CHECK: vld2.32 ; CHECK: vst2.32 -; CHECK: vld1.32 {d16, d17}, -; CHECK: vmov q9, q8 +; CHECK: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, +; CHECK: vmov q[[Q0:[0-9]+]], q[[Q1:[0-9]+]] ; CHECK-NOT: vmov -; CHECK: vuzp.32 q8, q9 +; CHECK: vuzp.32 q[[Q1]], q[[Q0]] ; CHECK: vst1.32 %0 = bitcast i32* %iptr to i8* ; [#uses=2] %1 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %0, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2] @@ -271,7 +273,7 @@ define arm_aapcs_vfpcc i32 @t10() nounwind { entry: ; CHECK: t10: ; CHECK: vmul.f32 q8, q8, d0[0] -; CHECK: vmov.i32 q9, #0x3F000000 +; CHECK: vmov.i32 q[[Q0:[0-9]+]], #0x3F000000 ; CHECK: vadd.f32 q8, q8, q8 %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1] %1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1] diff --git a/test/CodeGen/ARM/str_pre-2.ll b/test/CodeGen/ARM/str_pre-2.ll index 465c7e676c5..b24f75a6e2b 100644 --- a/test/CodeGen/ARM/str_pre-2.ll +++ b/test/CodeGen/ARM/str_pre-2.ll @@ -1,4 +1,7 @@ -; RUN: llc < %s -mtriple=armv6-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=armv6-linux-gnu -regalloc=linearscan | FileCheck %s +; RUN: llc < %s -mtriple=armv6-linux-gnu -regalloc=basic | FileCheck %s + +; The greedy register allocator uses a single CSR here, invalidating the test. @b = external global i64* diff --git a/test/CodeGen/ARM/thumb1-varalloc.ll b/test/CodeGen/ARM/thumb1-varalloc.ll index 25093fee225..aa88ae0c1a8 100644 --- a/test/CodeGen/ARM/thumb1-varalloc.ll +++ b/test/CodeGen/ARM/thumb1-varalloc.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple=thumbv6-apple-darwin -regalloc=basic | FileCheck %s ; rdar://8819685 @__bar = external hidden global i8* @@ -12,12 +13,13 @@ entry: %0 = load i8** @__bar, align 4 %1 = icmp eq i8* %0, null br i1 %1, label %bb1, label %bb3 +; CHECK: bne bb1: store i32 1026, i32* %size, align 4 %2 = alloca [1026 x i8], align 1 -; CHECK: mov r0, sp -; CHECK: adds r4, r0, r4 +; CHECK: mov [[R0:r[0-9]+]], sp +; CHECK: adds {{r[0-9]+}}, [[R0]], {{r[0-9]+}} %3 = getelementptr inbounds [1026 x i8]* %2, i32 0, i32 0 %4 = call i32 @_called_func(i8* %3, i32* %size) nounwind %5 = icmp eq i32 %4, 0 diff --git a/test/CodeGen/ARM/vcgt.ll b/test/CodeGen/ARM/vcgt.ll index c3c4cb35630..2243bac91fb 100644 --- a/test/CodeGen/ARM/vcgt.ll +++ b/test/CodeGen/ARM/vcgt.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s define <8 x i8> @vcgts8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vcgts8: @@ -161,9 +162,9 @@ define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind { ; rdar://7923010 define <4 x i32> @vcgt_zext(<4 x float>* %A, <4 x float>* %B) nounwind { ;CHECK: vcgt_zext: -;CHECK: vmov.i32 q10, #0x1 -;CHECK: vcgt.f32 q8 -;CHECK: vand q8, q8, q10 +;CHECK: vmov.i32 [[Q0:q[0-9]+]], #0x1 +;CHECK: vcgt.f32 [[Q1:q[0-9]+]] +;CHECK: vand [[Q2:q[0-9]+]], [[Q1]], [[Q0]] %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B %tmp3 = fcmp ogt <4 x float> %tmp1, %tmp2 diff --git a/test/CodeGen/ARM/vfp.ll b/test/CodeGen/ARM/vfp.ll index 44a44afe9af..390457fc21b 100644 --- a/test/CodeGen/ARM/vfp.ll +++ b/test/CodeGen/ARM/vfp.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+vfp2 -disable-post-ra | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+vfp2 -disable-post-ra -regalloc=basic | FileCheck %s define void @test(float* %P, double* %D) { %A = load float* %P ; [#uses=1] @@ -39,10 +40,10 @@ define void @test_add(float* %P, double* %D) { define void @test_ext_round(float* %P, double* %D) { ;CHECK: test_ext_round: %a = load float* %P ; [#uses=1] +;CHECK: vcvt.f32.f64 ;CHECK: vcvt.f64.f32 %b = fpext float %a to double ; [#uses=1] %A = load double* %D ; [#uses=1] -;CHECK: vcvt.f32.f64 %B = fptrunc double %A to float ; [#uses=1] store double %b, double* %D store float %B, float* %P diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll index c886125a2fb..02e543cccd1 100644 --- a/test/CodeGen/ARM/vld1.ll +++ b/test/CodeGen/ARM/vld1.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s define <8 x i8> @vld1i8(i8* %A) nounwind { ;CHECK: vld1i8: @@ -19,7 +20,7 @@ define <4 x i16> @vld1i16(i16* %A) nounwind { ;Check for a post-increment updating load. define <4 x i16> @vld1i16_update(i16** %ptr) nounwind { ;CHECK: vld1i16_update: -;CHECK: vld1.16 {d16}, [r1]! +;CHECK: vld1.16 {d16}, [{{r[0-9]+}}]! %A = load i16** %ptr %tmp0 = bitcast i16* %A to i8* %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1) @@ -39,7 +40,7 @@ define <2 x i32> @vld1i32(i32* %A) nounwind { ;Check for a post-increment updating load with register increment. define <2 x i32> @vld1i32_update(i32** %ptr, i32 %inc) nounwind { ;CHECK: vld1i32_update: -;CHECK: vld1.32 {d16}, [r2], r1 +;CHECK: vld1.32 {d16}, [{{r[0-9]+}}], {{r[0-9]+}} %A = load i32** %ptr %tmp0 = bitcast i32* %A to i8* %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1) @@ -75,7 +76,7 @@ define <16 x i8> @vld1Qi8(i8* %A) nounwind { ;Check for a post-increment updating load. define <16 x i8> @vld1Qi8_update(i8** %ptr) nounwind { ;CHECK: vld1Qi8_update: -;CHECK: vld1.8 {d16, d17}, [r1, :64]! +;CHECK: vld1.8 {d16, d17}, [{{r[0-9]+}}, :64]! %A = load i8** %ptr %tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8) %tmp2 = getelementptr i8* %A, i32 16 diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll index dde530f6df1..b495319830b 100644 --- a/test/CodeGen/ARM/vld3.ll +++ b/test/CodeGen/ARM/vld3.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s %struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } %struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> } @@ -36,7 +37,7 @@ define <4 x i16> @vld3i16(i16* %A) nounwind { ;Check for a post-increment updating load with register increment. define <4 x i16> @vld3i16_update(i16** %ptr, i32 %inc) nounwind { ;CHECK: vld3i16_update: -;CHECK: vld3.16 {d16, d17, d18}, [r2], r1 +;CHECK: vld3.16 {d16, d17, d18}, [{{r[0-9]+}}], {{r[0-9]+}} %A = load i16** %ptr %tmp0 = bitcast i16* %A to i8* %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1) @@ -121,8 +122,8 @@ define <4 x i32> @vld3Qi32(i32* %A) nounwind { ;Check for a post-increment updating load. define <4 x i32> @vld3Qi32_update(i32** %ptr) nounwind { ;CHECK: vld3Qi32_update: -;CHECK: vld3.32 {d16, d18, d20}, [r1]! -;CHECK: vld3.32 {d17, d19, d21}, [r1]! +;CHECK: vld3.32 {d16, d18, d20}, [r[[R:[0-9]+]]]! +;CHECK: vld3.32 {d17, d19, d21}, [r[[R]]]! %A = load i32** %ptr %tmp0 = bitcast i32* %A to i8* %tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 1) diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll index 770ed071ac1..68dd503bcc4 100644 --- a/test/CodeGen/ARM/vldlane.ll +++ b/test/CodeGen/ARM/vldlane.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vld1lanei8: @@ -279,7 +280,7 @@ define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind { ;Check for a post-increment updating load with register increment. define <8 x i16> @vld3laneQi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind { ;CHECK: vld3laneQi16_update: -;CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [r2], r1 +;CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [{{r[0-9]+}}], {{r[0-9]+}} %A = load i16** %ptr %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B -- 2.11.0