Kill and collapse outstanding DomainValues.

author Jakob Stoklund Olesen <stoklund@2pi.dk>

Mon, 7 Nov 2011 23:08:21 +0000 (23:08 +0000)

committer Stephen Hines <srhines@google.com>

Mon, 14 Nov 2011 17:11:57 +0000 (09:11 -0800)
author Jakob Stoklund Olesen <stoklund@2pi.dk>
Mon, 7 Nov 2011 23:08:21 +0000 (23:08 +0000)
committer Stephen Hines <srhines@google.com>
Mon, 14 Nov 2011 17:11:57 +0000 (09:11 -0800)
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp

index 3d6f256..bd77f65 100644 (file)
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -510,11 +510,20 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
      leaveBasicBlock(MBB);
    }
  
-  // Clear the LiveOuts vectors. Should we also collapse any remaining
-  // DomainValues?
-  for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end();
-         i != e; ++i)
-    delete[] i->second;
+  // Clear the LiveOuts vectors and collapse any remaining DomainValues.
+  for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
+         MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
+    LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI);
+    if (FI == LiveOuts.end())
+      continue;
+    assert(FI->second && "Null entry");
+    // The DomainValue is collapsed when the last reference is killed.
+    LiveRegs = FI->second;
+    for (unsigned i = 0, e = NumRegs; i != e; ++i)
+      if (LiveRegs[i])
+        Kill(i);
+    delete[] LiveRegs;
+  }
    LiveOuts.clear();
    Avail.clear();
    Allocator.DestroyAll();
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll

index 276209e..3fa1d95 100644 (file)
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -315,24 +315,31 @@ declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
  
  
  define void @test_x86_sse2_movnt_dq(i8* %a0, <2 x i64> %a1) {
+  ; CHECK: test_x86_sse2_movnt_dq
    ; CHECK: movl
    ; CHECK: vmovntdq
-  call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a1)
+  ; add operation forces the execution domain.
+  %a2 = add <2 x i64> %a1, <i64 1, i64 1>
+  call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a2)
    ret void
  }
  declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x i64>) nounwind
  
  
  define void @test_x86_sse2_movnt_pd(i8* %a0, <2 x double> %a1) {
+  ; CHECK test_x86_sse2_movnt_pd
    ; CHECK: movl
    ; CHECK: vmovntpd
-  call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a1)
+  ; fadd operation forces the execution domain.
+  %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
+  call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a2)
    ret void
  }
  declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind
  
  
  define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: test_x86_sse2_mul_sd
    ; CHECK: vmulsd
    %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    ret <2 x double> %res
@@ -749,6 +756,7 @@ declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
  
  
  define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
+  ; CHECK: test_x86_sse2_storel_dq
    ; CHECK: movl
    ; CHECK: vmovq
    call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
@@ -758,6 +766,7 @@ declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
  
  
  define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
+  ; CHECK: test_x86_sse2_storeu_dq
    ; CHECK: movl
    ; CHECK: vmovdqu
    call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a1)
@@ -767,15 +776,18 @@ declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
  
  
  define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
+  ; CHECK: test_x86_sse2_storeu_pd
    ; CHECK: movl
    ; CHECK: vmovupd
-  call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a1)
+  %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
+  call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
    ret void
  }
  declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
  
  
  define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: test_x86_sse2_sub_sd
    ; CHECK: vsubsd
    %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    ret <2 x double> %res
diff --git a/test/CodeGen/X86/avx-logic.ll b/test/CodeGen/X86/avx-logic.ll

index 518c09c..cd37135 100644 (file)
--- a/test/CodeGen/X86/avx-logic.ll
+++ b/test/CodeGen/X86/avx-logic.ll
@@ -165,7 +165,9 @@ entry:
  ; CHECK: vpandn  %xmm
  define <2 x i64> @vpandn(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
  entry:
-  %y = xor <2 x i64> %a, <i64 -1, i64 -1>
+  ; Force the execution domain with an add.
+  %a2 = add <2 x i64> %a, <i64 1, i64 1>
+  %y = xor <2 x i64> %a2, <i64 -1, i64 -1>
    %x = and <2 x i64> %a, %y
    ret <2 x i64> %x
  }
@@ -173,7 +175,9 @@ entry:
  ; CHECK: vpand %xmm
  define <2 x i64> @vpand(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
  entry:
-  %x = and <2 x i64> %a, %b
+  ; Force the execution domain with an add.
+  %a2 = add <2 x i64> %a, <i64 1, i64 1>
+  %x = and <2 x i64> %a2, %b
    ret <2 x i64> %x
  }
  
diff --git a/test/CodeGen/X86/nontemporal.ll b/test/CodeGen/X86/nontemporal.ll

index 1d09535..ae04435 100644 (file)
--- a/test/CodeGen/X86/nontemporal.ll
+++ b/test/CodeGen/X86/nontemporal.ll
@@ -3,13 +3,16 @@
  define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E) {
  ; CHECK: movntps
    %cast = bitcast i8* %B to <4 x float>*
-  store <4 x float> %A, <4 x float>* %cast, align 16, !nontemporal !0
+  %A2 = fadd <4 x float> %A, <float 0x0, float 0x0, float 0x0, float 0x4200000000000000>
+  store <4 x float> %A2, <4 x float>* %cast, align 16, !nontemporal !0
  ; CHECK: movntdq
    %cast1 = bitcast i8* %B to <2 x i64>*
-  store <2 x i64> %E, <2 x i64>* %cast1, align 16, !nontemporal !0
+  %E2 = add <2 x i64> %E, <i64 1, i64 2>
+  store <2 x i64> %E2, <2 x i64>* %cast1, align 16, !nontemporal !0
  ; CHECK: movntpd
    %cast2 = bitcast i8* %B to <2 x double>*
-  store <2 x double> %C, <2 x double>* %cast2, align 16, !nontemporal !0
+  %C2 = fadd <2 x double> %C, <double 0x0, double 0x4200000000000000>
+  store <2 x double> %C2, <2 x double>* %cast2, align 16, !nontemporal !0
  ; CHECK: movnti
    %cast3 = bitcast i8* %B to i32*
    store i32 %D, i32* %cast3, align 16, !nontemporal !0
diff --git a/test/CodeGen/X86/sse-align-3.ll b/test/CodeGen/X86/sse-align-3.ll

index 04f2161..b6b0471 100644 (file)
--- a/test/CodeGen/X86/sse-align-3.ll
+++ b/test/CodeGen/X86/sse-align-3.ll
@@ -1,8 +1,8 @@
  ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
  ; CHECK-NOT:     movapd
  ; CHECK:     movaps
-; CHECK-NOT:     movaps
-; CHECK:     movapd
+; CHECK-NOT:     movapd
+; CHECK:     movaps
  ; CHECK-NOT:     movap
  
  define void @foo(<4 x float>* %p, <4 x float> %x) nounwind {
diff --git a/test/CodeGen/X86/sse2-blend.ll b/test/CodeGen/X86/sse2-blend.ll

index 4ff1d03..2f4317b 100644 (file)
--- a/test/CodeGen/X86/sse2-blend.ll
+++ b/test/CodeGen/X86/sse2-blend.ll
@@ -26,11 +26,10 @@ define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) {
    ret void
  }
  
-; FIXME: The -mattr=+sse2,-sse41 disable the ExecutionDepsFix pass causing the
-; mixed domains here.
+; Without forcing instructions, fall back to the preferred PS domain.
  ; CHECK: vsel_i64
  ; CHECK: xorps
-; CHECK: pand
+; CHECK: andps
  ; CHECK: andnps
  ; CHECK: orps
  ; CHECK: ret
@@ -43,16 +42,14 @@ define void@vsel_i64(<4 x i64>* %v1, <4 x i64>* %v2) {
    ret void
  }
  
-; FIXME: The -mattr=+sse2,-sse41 disable the ExecutionDepsFix pass causing the
-; mixed domains here.
+; Without forcing instructions, fall back to the preferred PS domain.
  ; CHECK: vsel_double
  ; CHECK: xorps
-; CHECK: pand
+; CHECK: andps
  ; CHECK: andnps
  ; CHECK: orps
  ; CHECK: ret
  
-
  define void@vsel_double(<4 x double>* %v1, <4 x double>* %v2) {
    %A = load <4 x double>* %v1
    %B = load <4 x double>* %v2
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll

index d520d5c..1d74af2 100644 (file)
--- a/test/CodeGen/X86/sse2.ll
+++ b/test/CodeGen/X86/sse2.ll
@@ -144,7 +144,7 @@ define <2 x double> @test11(double %a, double %b) nounwind {
         %tmp7 = insertelement <2 x double> %tmp, double %b, i32 1               ; <<2 x double>> [#uses=1]
         ret <2 x double> %tmp7
  ; CHECK: test11:
-; CHECK: movapd        4(%esp), %xmm0
+; CHECK: movaps        4(%esp), %xmm0
  }
  
  define void @test12() nounwind {
diff --git a/test/CodeGen/X86/vec_shuffle.ll b/test/CodeGen/X86/vec_shuffle.ll

index 2a48de2..d20b3e7 100644 (file)
--- a/test/CodeGen/X86/vec_shuffle.ll
+++ b/test/CodeGen/X86/vec_shuffle.ll
@@ -1,9 +1,8 @@
-; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
-; RUN: grep movq    %t | count 1
-; RUN: grep pshufd  %t | count 1
-; RUN: grep movupd  %t | count 1
-; RUN: grep pshufhw %t | count 1
+; RUN: llc < %s -march=x86 -mcpu=core2 | FileCheck %s
  
+; CHECK: test_v4sf
+; CHECK: movq 8(%esp)
+; CHECK: pshufd $80
  define void @test_v4sf(<4 x float>* %P, float %X, float %Y) nounwind {
         %tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0               ; <<4 x float>> [#uses=1]
         %tmp2 = insertelement <4 x float> %tmp, float %X, i32 1         ; <<4 x float>> [#uses=1]
@@ -13,6 +12,9 @@ define void @test_v4sf(<4 x float>* %P, float %X, float %Y) nounwind {
         ret void
  }
  
+; CHECK: test_v2sd
+; CHECK: movups        8(%esp)
+; CHECK: movaps
  define void @test_v2sd(<2 x double>* %P, double %X, double %Y) nounwind {
         %tmp = insertelement <2 x double> zeroinitializer, double %X, i32 0             ; <<2 x double>> [#uses=1]
         %tmp2 = insertelement <2 x double> %tmp, double %Y, i32 1               ; <<2 x double>> [#uses=1]
@@ -20,6 +22,9 @@ define void @test_v2sd(<2 x double>* %P, double %X, double %Y) nounwind {
         ret void
  }
  
+; CHECK: test_v8i16
+; CHECK: pshufhw $-58
+; CHECK: movdqa
  define void @test_v8i16(<2 x i64>* %res, <2 x i64>* %A) nounwind {
         %tmp = load <2 x i64>* %A               ; <<2 x i64>> [#uses=1]
         %tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16>              ; <<8 x i16>> [#uses=8]
author	Jakob Stoklund Olesen <stoklund@2pi.dk>
	Mon, 7 Nov 2011 23:08:21 +0000 (23:08 +0000)
committer	Stephen Hines <srhines@google.com>
	Mon, 14 Nov 2011 17:11:57 +0000 (09:11 -0800)
lib/CodeGen/ExecutionDepsFix.cpp		patch \| blob \| history
test/CodeGen/X86/avx-intrinsics-x86.ll		patch \| blob \| history
test/CodeGen/X86/avx-logic.ll		patch \| blob \| history
test/CodeGen/X86/nontemporal.ll		patch \| blob \| history
test/CodeGen/X86/sse-align-3.ll		patch \| blob \| history
test/CodeGen/X86/sse2-blend.ll		patch \| blob \| history
test/CodeGen/X86/sse2.ll		patch \| blob \| history
test/CodeGen/X86/vec_shuffle.ll		patch \| blob \| history