[MachineLICM][X86][AMDGPU] Fix subtle bug in the updating of PhysRegClobbers in post...

author Craig Topper <craig.topper@intel.com>

Wed, 5 Dec 2018 03:41:26 +0000 (03:41 +0000)

committer Craig Topper <craig.topper@intel.com>

Wed, 5 Dec 2018 03:41:26 +0000 (03:41 +0000)
author Craig Topper <craig.topper@intel.com>
Wed, 5 Dec 2018 03:41:26 +0000 (03:41 +0000)
committer Craig Topper <craig.topper@intel.com>
Wed, 5 Dec 2018 03:41:26 +0000 (03:41 +0000)
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp

index 2c9b596..58fd1f2 100644 (file)
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -463,8 +463,12 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI,
      for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) {
        if (PhysRegDefs.test(*AS))
          PhysRegClobbers.set(*AS);
-      PhysRegDefs.set(*AS);
      }
+    // Need a second loop because MCRegAliasIterator can visit the same
+    // register twice.
+    for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS)
+      PhysRegDefs.set(*AS);
+
      if (PhysRegClobbers.test(Reg))
        // MI defined register is seen defined by another instruction in
        // the loop, it cannot be a LICM candidate.
diff --git a/test/CodeGen/AMDGPU/branch-relaxation.ll b/test/CodeGen/AMDGPU/branch-relaxation.ll

index 03c3c50..45ed056 100644 (file)
--- a/test/CodeGen/AMDGPU/branch-relaxation.ll
+++ b/test/CodeGen/AMDGPU/branch-relaxation.ll
@@ -444,7 +444,7 @@ endif:
  ; GCN-NEXT: s_xor_b64 exec, exec, [[TEMP_MASK1]]
  ; GCN-NEXT: ; mask branch [[RET:BB[0-9]+_[0-9]+]]
  
-; GCN: [[LOOP_BODY:BB[0-9]+_[0-9]+]]: ; %loop
+; GCN: [[LOOP_BODY:BB[0-9]+_[0-9]+]]: ; %loop{{$}}
  ; GCN: ;;#ASMSTART
  ; GCN: v_nop_e64
  ; GCN: v_nop_e64
@@ -453,7 +453,7 @@ endif:
  ; GCN: v_nop_e64
  ; GCN: v_nop_e64
  ; GCN: ;;#ASMEND
-; GCN: s_cbranch_execz [[RET]]
+; GCN: s_cbranch_vccz [[RET]]
  
  ; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %loop
  ; GCN-NEXT: ; in Loop: Header=[[LOOP_BODY]] Depth=1
diff --git a/test/CodeGen/AMDGPU/infinite-loop.ll b/test/CodeGen/AMDGPU/infinite-loop.ll

index e265f5c..75ad58d 100644 (file)
--- a/test/CodeGen/AMDGPU/infinite-loop.ll
+++ b/test/CodeGen/AMDGPU/infinite-loop.ll
@@ -32,11 +32,11 @@ loop:
  ; SI: s_cbranch_execz [[RET:BB[0-9]+_[0-9]+]]
  
  ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7
-; SI: [[LOOP:BB[0-9]+_[0-9]+]]:  ; %loop
  ; SI: s_and_b64 vcc, exec, -1
+; SI: [[LOOP:BB[0-9]+_[0-9]+]]:  ; %loop
  ; SI: s_waitcnt lgkmcnt(0)
  ; SI: buffer_store_dword [[REG]]
-; SI: s_cbranch_execnz [[LOOP]]
+; SI: s_cbranch_vccnz [[LOOP]]
  
  ; SI: [[RET]]:  ; %UnifiedReturnBlock
  ; SI: s_endpgm
diff --git a/test/CodeGen/X86/atomic_mi.ll b/test/CodeGen/X86/atomic_mi.ll

index 308e138..353580b 100644 (file)
--- a/test/CodeGen/X86/atomic_mi.ll
+++ b/test/CodeGen/X86/atomic_mi.ll
@@ -93,11 +93,11 @@ define void @store_atomic_imm_64(i64* %p) {
  ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
  ; X32-NEXT:    movl (%esi), %eax
  ; X32-NEXT:    movl 4(%esi), %edx
+; X32-NEXT:    xorl %ecx, %ecx
+; X32-NEXT:    movl $42, %ebx
  ; X32-NEXT:    .p2align 4, 0x90
  ; X32-NEXT:  .LBB3_1: # %atomicrmw.start
  ; X32-NEXT:    # =>This Inner Loop Header: Depth=1
-; X32-NEXT:    xorl %ecx, %ecx
-; X32-NEXT:    movl $42, %ebx
  ; X32-NEXT:    lock cmpxchg8b (%esi)
  ; X32-NEXT:    jne .LBB3_1
  ; X32-NEXT:  # %bb.2: # %atomicrmw.end
@@ -132,11 +132,11 @@ define void @store_atomic_imm_64_big(i64* %p) {
  ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
  ; X32-NEXT:    movl (%esi), %eax
  ; X32-NEXT:    movl 4(%esi), %edx
+; X32-NEXT:    movl $23, %ecx
+; X32-NEXT:    movl $1215752192, %ebx # imm = 0x4876E800
  ; X32-NEXT:    .p2align 4, 0x90
  ; X32-NEXT:  .LBB4_1: # %atomicrmw.start
  ; X32-NEXT:    # =>This Inner Loop Header: Depth=1
-; X32-NEXT:    movl $23, %ecx
-; X32-NEXT:    movl $1215752192, %ebx # imm = 0x4876E800
  ; X32-NEXT:    lock cmpxchg8b (%esi)
  ; X32-NEXT:    jne .LBB4_1
  ; X32-NEXT:  # %bb.2: # %atomicrmw.end
@@ -753,10 +753,10 @@ define void @and_64i(i64* %p) {
  ; X32-NEXT:    andl $2, %ebx
  ; X32-NEXT:    movl (%esi), %eax
  ; X32-NEXT:    movl 4(%esi), %edx
+; X32-NEXT:    xorl %ecx, %ecx
  ; X32-NEXT:    .p2align 4, 0x90
  ; X32-NEXT:  .LBB31_1: # %atomicrmw.start
  ; X32-NEXT:    # =>This Inner Loop Header: Depth=1
-; X32-NEXT:    xorl %ecx, %ecx
  ; X32-NEXT:    lock cmpxchg8b (%esi)
  ; X32-NEXT:    jne .LBB31_1
  ; X32-NEXT:  # %bb.2: # %atomicrmw.end
diff --git a/test/CodeGen/X86/x86-shrink-wrapping.ll b/test/CodeGen/X86/x86-shrink-wrapping.ll

index dc4af10..3d65eed 100644 (file)
--- a/test/CodeGen/X86/x86-shrink-wrapping.ll
+++ b/test/CodeGen/X86/x86-shrink-wrapping.ll
@@ -126,7 +126,7 @@ for.preheader:
  for.body:                                         ; preds = %entry, %for.body
    %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
    %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
-  %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+  %call = tail call i32 asm sideeffect "movl $$1, $0", "=r,~{ebx}"()
    %add = add nsw i32 %call, %sum.04
    %inc = add nuw nsw i32 %i.05, 1
    %exitcond = icmp eq i32 %inc, 10
@@ -178,7 +178,7 @@ for.preheader:
  for.body:                                         ; preds = %for.body, %entry
    %i.04 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]
    %sum.03 = phi i32 [ 0, %for.preheader ], [ %add, %for.body ]
-  %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+  %call = tail call i32 asm sideeffect "movl $$1, $0", "=r,~{ebx}"()
    %add = add nsw i32 %call, %sum.03
    %inc = add nuw nsw i32 %i.04, 1
    %exitcond = icmp eq i32 %inc, 10
@@ -248,7 +248,7 @@ for.preheader:
  for.body:                                         ; preds = %entry, %for.body
    %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
    %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
-  %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+  %call = tail call i32 asm sideeffect "movl $$1, $0", "=r,~{ebx}"()
    %add = add nsw i32 %call, %sum.04
    %inc = add nuw nsw i32 %i.05, 1
    %exitcond = icmp eq i32 %inc, 10
@@ -324,7 +324,7 @@ if.then:                                          ; preds = %entry
  for.body:                                         ; preds = %for.body, %if.then
    %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ]
    %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
-  %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+  %call = tail call i32 asm sideeffect "movl $$1, $0", "=r,~{ebx}"()
    %add = add nsw i32 %call, %sum.04
    %inc = add nuw nsw i32 %i.05, 1
    %exitcond = icmp eq i32 %inc, 10
diff --git a/test/CodeGen/X86/x86-win64-shrink-wrapping.ll b/test/CodeGen/X86/x86-win64-shrink-wrapping.ll

index 5d9b2ba..9142267 100644 (file)
--- a/test/CodeGen/X86/x86-win64-shrink-wrapping.ll
+++ b/test/CodeGen/X86/x86-win64-shrink-wrapping.ll
@@ -100,7 +100,7 @@ for.preheader:                                    ; preds = %entry
  for.body:                                         ; preds = %for.body, %for.preheader
    %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
    %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
-  %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+  %call = tail call i32 asm sideeffect "movl $$1, $0", "=r,~{ebx}"()
    %add = add nsw i32 %call, %sum.04
    %inc = add nuw nsw i32 %i.05, 1
    %exitcond = icmp eq i32 %inc, 10
author	Craig Topper <craig.topper@intel.com>
	Wed, 5 Dec 2018 03:41:26 +0000 (03:41 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Wed, 5 Dec 2018 03:41:26 +0000 (03:41 +0000)
lib/CodeGen/MachineLICM.cpp		patch \| blob \| history
test/CodeGen/AMDGPU/branch-relaxation.ll		patch \| blob \| history
test/CodeGen/AMDGPU/infinite-loop.ll		patch \| blob \| history
test/CodeGen/X86/atomic_mi.ll		patch \| blob \| history
test/CodeGen/X86/x86-shrink-wrapping.ll		patch \| blob \| history
test/CodeGen/X86/x86-win64-shrink-wrapping.ll		patch \| blob \| history