From 23084bdf814dacaabd1a7e1fc8b10d7d07873c86 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Wed, 5 Dec 2018 03:41:26 +0000
Subject: [PATCH] [MachineLICM][X86][AMDGPU] Fix subtle bug in the updating of
 PhysRegClobbers in post-RA LICM

It looks like MCRegAliasIterator can visit the same physical register twice. When this happens in this code in LICM we end up setting the PhysRegDef and then later in the same loop visit the register again. Now we see that PhysRegDef is set from the earlier iteration so now set PhysRegClobber.

This patch splits the loop so we have one that uses the previous value of PhysRegDef to update PhysRegClobber and second loop that updates PhysRegDef.

The X86 atomic test is an improvement. I had to add sideeffect to the two shrink wrapping tests to prevent hoisting from occurring. I'm not sure about the AMDGPU tests. It looks like the branch instruction changed at end the of the loops. And in the branch-relaxation test I think there is now "and vcc, exec, -1" instruction that wasn't there before.

Differential Revision: https://reviews.llvm.org/D55102

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@348330 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/MachineLICM.cpp                   |  6 +++++-
 test/CodeGen/AMDGPU/branch-relaxation.ll      |  4 ++--
 test/CodeGen/AMDGPU/infinite-loop.ll          |  4 ++--
 test/CodeGen/X86/atomic_mi.ll                 | 10 +++++-----
 test/CodeGen/X86/x86-shrink-wrapping.ll       |  8 ++++----
 test/CodeGen/X86/x86-win64-shrink-wrapping.ll |  2 +-
 6 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 2c9b5963537..58fd1f23842 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -463,8 +463,12 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI,
     for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) {
       if (PhysRegDefs.test(*AS))
         PhysRegClobbers.set(*AS);
-      PhysRegDefs.set(*AS);
     }
+    // Need a second loop because MCRegAliasIterator can visit the same
+    // register twice.
+    for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS)
+      PhysRegDefs.set(*AS);
+
     if (PhysRegClobbers.test(Reg))
       // MI defined register is seen defined by another instruction in
       // the loop, it cannot be a LICM candidate.
diff --git a/test/CodeGen/AMDGPU/branch-relaxation.ll b/test/CodeGen/AMDGPU/branch-relaxation.ll
index 03c3c5031b3..45ed056567c 100644
--- a/test/CodeGen/AMDGPU/branch-relaxation.ll
+++ b/test/CodeGen/AMDGPU/branch-relaxation.ll
@@ -444,7 +444,7 @@ endif:
 ; GCN-NEXT: s_xor_b64 exec, exec, [[TEMP_MASK1]]
 ; GCN-NEXT: ; mask branch [[RET:BB[0-9]+_[0-9]+]]
 
-; GCN: [[LOOP_BODY:BB[0-9]+_[0-9]+]]: ; %loop
+; GCN: [[LOOP_BODY:BB[0-9]+_[0-9]+]]: ; %loop{{$}}
 ; GCN: ;;#ASMSTART
 ; GCN: v_nop_e64
 ; GCN: v_nop_e64
@@ -453,7 +453,7 @@ endif:
 ; GCN: v_nop_e64
 ; GCN: v_nop_e64
 ; GCN: ;;#ASMEND
-; GCN: s_cbranch_execz [[RET]]
+; GCN: s_cbranch_vccz [[RET]]
 
 ; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %loop
 ; GCN-NEXT: ; in Loop: Header=[[LOOP_BODY]] Depth=1
diff --git a/test/CodeGen/AMDGPU/infinite-loop.ll b/test/CodeGen/AMDGPU/infinite-loop.ll
index e265f5ca4ea..75ad58df43b 100644
--- a/test/CodeGen/AMDGPU/infinite-loop.ll
+++ b/test/CodeGen/AMDGPU/infinite-loop.ll
@@ -32,11 +32,11 @@ loop:
 ; SI: s_cbranch_execz [[RET:BB[0-9]+_[0-9]+]]
 
 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7
-; SI: [[LOOP:BB[0-9]+_[0-9]+]]:  ; %loop
 ; SI: s_and_b64 vcc, exec, -1
+; SI: [[LOOP:BB[0-9]+_[0-9]+]]:  ; %loop
 ; SI: s_waitcnt lgkmcnt(0)
 ; SI: buffer_store_dword [[REG]]
-; SI: s_cbranch_execnz [[LOOP]]
+; SI: s_cbranch_vccnz [[LOOP]]
 
 ; SI: [[RET]]:  ; %UnifiedReturnBlock
 ; SI: s_endpgm
diff --git a/test/CodeGen/X86/atomic_mi.ll b/test/CodeGen/X86/atomic_mi.ll
index 308e138acd8..353580bbf7e 100644
--- a/test/CodeGen/X86/atomic_mi.ll
+++ b/test/CodeGen/X86/atomic_mi.ll
@@ -93,11 +93,11 @@ define void @store_atomic_imm_64(i64* %p) {
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X32-NEXT:    movl (%esi), %eax
 ; X32-NEXT:    movl 4(%esi), %edx
+; X32-NEXT:    xorl %ecx, %ecx
+; X32-NEXT:    movl $42, %ebx
 ; X32-NEXT:    .p2align 4, 0x90
 ; X32-NEXT:  .LBB3_1: # %atomicrmw.start
 ; X32-NEXT:    # =>This Inner Loop Header: Depth=1
-; X32-NEXT:    xorl %ecx, %ecx
-; X32-NEXT:    movl $42, %ebx
 ; X32-NEXT:    lock cmpxchg8b (%esi)
 ; X32-NEXT:    jne .LBB3_1
 ; X32-NEXT:  # %bb.2: # %atomicrmw.end
@@ -132,11 +132,11 @@ define void @store_atomic_imm_64_big(i64* %p) {
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X32-NEXT:    movl (%esi), %eax
 ; X32-NEXT:    movl 4(%esi), %edx
+; X32-NEXT:    movl $23, %ecx
+; X32-NEXT:    movl $1215752192, %ebx # imm = 0x4876E800
 ; X32-NEXT:    .p2align 4, 0x90
 ; X32-NEXT:  .LBB4_1: # %atomicrmw.start
 ; X32-NEXT:    # =>This Inner Loop Header: Depth=1
-; X32-NEXT:    movl $23, %ecx
-; X32-NEXT:    movl $1215752192, %ebx # imm = 0x4876E800
 ; X32-NEXT:    lock cmpxchg8b (%esi)
 ; X32-NEXT:    jne .LBB4_1
 ; X32-NEXT:  # %bb.2: # %atomicrmw.end
@@ -753,10 +753,10 @@ define void @and_64i(i64* %p) {
 ; X32-NEXT:    andl $2, %ebx
 ; X32-NEXT:    movl (%esi), %eax
 ; X32-NEXT:    movl 4(%esi), %edx
+; X32-NEXT:    xorl %ecx, %ecx
 ; X32-NEXT:    .p2align 4, 0x90
 ; X32-NEXT:  .LBB31_1: # %atomicrmw.start
 ; X32-NEXT:    # =>This Inner Loop Header: Depth=1
-; X32-NEXT:    xorl %ecx, %ecx
 ; X32-NEXT:    lock cmpxchg8b (%esi)
 ; X32-NEXT:    jne .LBB31_1
 ; X32-NEXT:  # %bb.2: # %atomicrmw.end
diff --git a/test/CodeGen/X86/x86-shrink-wrapping.ll b/test/CodeGen/X86/x86-shrink-wrapping.ll
index dc4af10e8a9..3d65eed6ebb 100644
--- a/test/CodeGen/X86/x86-shrink-wrapping.ll
+++ b/test/CodeGen/X86/x86-shrink-wrapping.ll
@@ -126,7 +126,7 @@ for.preheader:
 for.body:                                         ; preds = %entry, %for.body
   %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
   %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
-  %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+  %call = tail call i32 asm sideeffect "movl $$1, $0", "=r,~{ebx}"()
   %add = add nsw i32 %call, %sum.04
   %inc = add nuw nsw i32 %i.05, 1
   %exitcond = icmp eq i32 %inc, 10
@@ -178,7 +178,7 @@ for.preheader:
 for.body:                                         ; preds = %for.body, %entry
   %i.04 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]
   %sum.03 = phi i32 [ 0, %for.preheader ], [ %add, %for.body ]
-  %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+  %call = tail call i32 asm sideeffect "movl $$1, $0", "=r,~{ebx}"()
   %add = add nsw i32 %call, %sum.03
   %inc = add nuw nsw i32 %i.04, 1
   %exitcond = icmp eq i32 %inc, 10
@@ -248,7 +248,7 @@ for.preheader:
 for.body:                                         ; preds = %entry, %for.body
   %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
   %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
-  %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+  %call = tail call i32 asm sideeffect "movl $$1, $0", "=r,~{ebx}"()
   %add = add nsw i32 %call, %sum.04
   %inc = add nuw nsw i32 %i.05, 1
   %exitcond = icmp eq i32 %inc, 10
@@ -324,7 +324,7 @@ if.then:                                          ; preds = %entry
 for.body:                                         ; preds = %for.body, %if.then
   %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ]
   %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
-  %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+  %call = tail call i32 asm sideeffect "movl $$1, $0", "=r,~{ebx}"()
   %add = add nsw i32 %call, %sum.04
   %inc = add nuw nsw i32 %i.05, 1
   %exitcond = icmp eq i32 %inc, 10
diff --git a/test/CodeGen/X86/x86-win64-shrink-wrapping.ll b/test/CodeGen/X86/x86-win64-shrink-wrapping.ll
index 5d9b2ba3267..9142267d157 100644
--- a/test/CodeGen/X86/x86-win64-shrink-wrapping.ll
+++ b/test/CodeGen/X86/x86-win64-shrink-wrapping.ll
@@ -100,7 +100,7 @@ for.preheader:                                    ; preds = %entry
 for.body:                                         ; preds = %for.body, %for.preheader
   %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
   %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
-  %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+  %call = tail call i32 asm sideeffect "movl $$1, $0", "=r,~{ebx}"()
   %add = add nsw i32 %call, %sum.04
   %inc = add nuw nsw i32 %i.05, 1
   %exitcond = icmp eq i32 %inc, 10
-- 
2.11.0