AMDGPU: Fix return of non-void-returning shaders

author Nicolai Haehnle <nhaehnle@gmail.com>

Wed, 6 Jul 2016 08:35:17 +0000 (08:35 +0000)

committer Nicolai Haehnle <nhaehnle@gmail.com>

Wed, 6 Jul 2016 08:35:17 +0000 (08:35 +0000)
author Nicolai Haehnle <nhaehnle@gmail.com>
Wed, 6 Jul 2016 08:35:17 +0000 (08:35 +0000)
committer Nicolai Haehnle <nhaehnle@gmail.com>
Wed, 6 Jul 2016 08:35:17 +0000 (08:35 +0000)
diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp

index ae23a96..f65d7d8 100644 (file)
--- a/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -729,14 +729,13 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
  
            break;
  
-        case AMDGPU::S_ENDPGM: {
-          if (MF.getInfo<SIMachineFunctionInfo>()->returnsVoid())
-            break;
+        case AMDGPU::SI_RETURN: {
+          assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid());
  
            // Graphics shaders returning non-void shouldn't contain S_ENDPGM,
            // because external bytecode will be appended at the end.
            if (BI != --MF.end() || I != MBB.getFirstTerminator()) {
-            // S_ENDPGM is not the last instruction. Add an empty block at
+            // SI_RETURN is not the last instruction. Add an empty block at
              // the end and jump there.
              if (!EmptyMBBAtEnd) {
                EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
@@ -746,9 +745,8 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
              MBB.addSuccessor(EmptyMBBAtEnd);
              BuildMI(*BI, I, MI.getDebugLoc(), TII->get(AMDGPU::S_BRANCH))
                      .addMBB(EmptyMBBAtEnd);
+            I->eraseFromParent();
            }
-
-          I->eraseFromParent();
            break;
          }
        }
diff --git a/test/CodeGen/AMDGPU/ret_jump.ll b/test/CodeGen/AMDGPU/ret_jump.ll

index 1e1a757..f7380cd 100644 (file)
--- a/test/CodeGen/AMDGPU/ret_jump.ll
+++ b/test/CodeGen/AMDGPU/ret_jump.ll
@@ -12,10 +12,11 @@
  ; GCN-NEXT: ; mask branch [[UNREACHABLE_BB:BB[0-9]+_[0-9]+]]
  
  ; GCN: [[RET_BB]]:
-; GCN-NEXT: ; return
+; GCN-NEXT: s_branch [[FINAL_BB:BB[0-9]+_[0-9]+]]
  
  ; GCN-NEXT: [[UNREACHABLE_BB]]:
  ; GCN-NEXT: s_or_b64 exec, exec, [[XOR_EXEC]]
+; GCN-NEXT: [[FINAL_BB]]:
  ; GCN-NEXT: .Lfunc_end0
  define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <8 x i32>] addrspace(2)* byval, i32 addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
  main_body:
author	Nicolai Haehnle <nhaehnle@gmail.com>
	Wed, 6 Jul 2016 08:35:17 +0000 (08:35 +0000)
committer	Nicolai Haehnle <nhaehnle@gmail.com>
	Wed, 6 Jul 2016 08:35:17 +0000 (08:35 +0000)
lib/Target/AMDGPU/SILowerControlFlow.cpp		patch \| blob \| history
test/CodeGen/AMDGPU/ret_jump.ll		patch \| blob \| history