leaveBasicBlock(MBB);
}
- // Clear the LiveOuts vectors. Should we also collapse any remaining
- // DomainValues?
- for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end();
- i != e; ++i)
- delete[] i->second;
+ // Clear the LiveOuts vectors and collapse any remaining DomainValues.
+ for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
+ MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
+ LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI);
+ if (FI == LiveOuts.end())
+ continue;
+ assert(FI->second && "Null entry");
+ // The DomainValue is collapsed when the last reference is killed.
+ LiveRegs = FI->second;
+ for (unsigned i = 0, e = NumRegs; i != e; ++i)
+ if (LiveRegs[i])
+ Kill(i);
+ delete[] LiveRegs;
+ }
LiveOuts.clear();
Avail.clear();
Allocator.DestroyAll();
define void @test_x86_sse2_movnt_dq(i8* %a0, <2 x i64> %a1) {
+ ; CHECK: test_x86_sse2_movnt_dq
; CHECK: movl
; CHECK: vmovntdq
- call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a1)
+ ; add operation forces the execution domain.
+ %a2 = add <2 x i64> %a1, <i64 1, i64 1>
+ call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a2)
ret void
}
declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x i64>) nounwind
define void @test_x86_sse2_movnt_pd(i8* %a0, <2 x double> %a1) {
+ ; CHECK test_x86_sse2_movnt_pd
; CHECK: movl
; CHECK: vmovntpd
- call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a1)
+ ; fadd operation forces the execution domain.
+ %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
+ call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a2)
ret void
}
declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind
define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: test_x86_sse2_mul_sd
; CHECK: vmulsd
%res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
+ ; CHECK: test_x86_sse2_storel_dq
; CHECK: movl
; CHECK: vmovq
call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
+ ; CHECK: test_x86_sse2_storeu_dq
; CHECK: movl
; CHECK: vmovdqu
call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a1)
define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
+ ; CHECK: test_x86_sse2_storeu_pd
; CHECK: movl
; CHECK: vmovupd
- call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a1)
+ %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
+ call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
ret void
}
declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: test_x86_sse2_sub_sd
; CHECK: vsubsd
%res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
ret <2 x double> %res
; CHECK: vpandn %xmm
define <2 x i64> @vpandn(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
entry:
- %y = xor <2 x i64> %a, <i64 -1, i64 -1>
+ ; Force the execution domain with an add.
+ %a2 = add <2 x i64> %a, <i64 1, i64 1>
+ %y = xor <2 x i64> %a2, <i64 -1, i64 -1>
%x = and <2 x i64> %a, %y
ret <2 x i64> %x
}
; CHECK: vpand %xmm
define <2 x i64> @vpand(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
entry:
- %x = and <2 x i64> %a, %b
+ ; Force the execution domain with an add.
+ %a2 = add <2 x i64> %a, <i64 1, i64 1>
+ %x = and <2 x i64> %a2, %b
ret <2 x i64> %x
}
define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E) {
; CHECK: movntps
%cast = bitcast i8* %B to <4 x float>*
- store <4 x float> %A, <4 x float>* %cast, align 16, !nontemporal !0
+ %A2 = fadd <4 x float> %A, <float 0x0, float 0x0, float 0x0, float 0x4200000000000000>
+ store <4 x float> %A2, <4 x float>* %cast, align 16, !nontemporal !0
; CHECK: movntdq
%cast1 = bitcast i8* %B to <2 x i64>*
- store <2 x i64> %E, <2 x i64>* %cast1, align 16, !nontemporal !0
+ %E2 = add <2 x i64> %E, <i64 1, i64 2>
+ store <2 x i64> %E2, <2 x i64>* %cast1, align 16, !nontemporal !0
; CHECK: movntpd
%cast2 = bitcast i8* %B to <2 x double>*
- store <2 x double> %C, <2 x double>* %cast2, align 16, !nontemporal !0
+ %C2 = fadd <2 x double> %C, <double 0x0, double 0x4200000000000000>
+ store <2 x double> %C2, <2 x double>* %cast2, align 16, !nontemporal !0
; CHECK: movnti
%cast3 = bitcast i8* %B to i32*
store i32 %D, i32* %cast3, align 16, !nontemporal !0
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
; CHECK-NOT: movapd
; CHECK: movaps
-; CHECK-NOT: movaps
-; CHECK: movapd
+; CHECK-NOT: movapd
+; CHECK: movaps
; CHECK-NOT: movap
define void @foo(<4 x float>* %p, <4 x float> %x) nounwind {
ret void
}
-; FIXME: The -mattr=+sse2,-sse41 disable the ExecutionDepsFix pass causing the
-; mixed domains here.
+; Without forcing instructions, fall back to the preferred PS domain.
; CHECK: vsel_i64
; CHECK: xorps
-; CHECK: pand
+; CHECK: andps
; CHECK: andnps
; CHECK: orps
; CHECK: ret
ret void
}
-; FIXME: The -mattr=+sse2,-sse41 disable the ExecutionDepsFix pass causing the
-; mixed domains here.
+; Without forcing instructions, fall back to the preferred PS domain.
; CHECK: vsel_double
; CHECK: xorps
-; CHECK: pand
+; CHECK: andps
; CHECK: andnps
; CHECK: orps
; CHECK: ret
-
define void@vsel_double(<4 x double>* %v1, <4 x double>* %v2) {
%A = load <4 x double>* %v1
%B = load <4 x double>* %v2
%tmp7 = insertelement <2 x double> %tmp, double %b, i32 1 ; <<2 x double>> [#uses=1]
ret <2 x double> %tmp7
; CHECK: test11:
-; CHECK: movapd 4(%esp), %xmm0
+; CHECK: movaps 4(%esp), %xmm0
}
define void @test12() nounwind {
-; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
-; RUN: grep movq %t | count 1
-; RUN: grep pshufd %t | count 1
-; RUN: grep movupd %t | count 1
-; RUN: grep pshufhw %t | count 1
+; RUN: llc < %s -march=x86 -mcpu=core2 | FileCheck %s
+; CHECK: test_v4sf
+; CHECK: movq 8(%esp)
+; CHECK: pshufd $80
define void @test_v4sf(<4 x float>* %P, float %X, float %Y) nounwind {
%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0 ; <<4 x float>> [#uses=1]
%tmp2 = insertelement <4 x float> %tmp, float %X, i32 1 ; <<4 x float>> [#uses=1]
ret void
}
+; CHECK: test_v2sd
+; CHECK: movups 8(%esp)
+; CHECK: movaps
define void @test_v2sd(<2 x double>* %P, double %X, double %Y) nounwind {
%tmp = insertelement <2 x double> zeroinitializer, double %X, i32 0 ; <<2 x double>> [#uses=1]
%tmp2 = insertelement <2 x double> %tmp, double %Y, i32 1 ; <<2 x double>> [#uses=1]
ret void
}
+; CHECK: test_v8i16
+; CHECK: pshufhw $-58
+; CHECK: movdqa
define void @test_v8i16(<2 x i64>* %res, <2 x i64>* %A) nounwind {
%tmp = load <2 x i64>* %A ; <<2 x i64>> [#uses=1]
%tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16> ; <<8 x i16>> [#uses=8]