AMDGPU: Add amdgpu-ps-wqm-outputs function attributes

author Nicolai Haehnle <nhaehnle@gmail.com>

Tue, 7 Jun 2016 21:37:17 +0000 (21:37 +0000)

committer Nicolai Haehnle <nhaehnle@gmail.com>

Tue, 7 Jun 2016 21:37:17 +0000 (21:37 +0000)
author Nicolai Haehnle <nhaehnle@gmail.com>
Tue, 7 Jun 2016 21:37:17 +0000 (21:37 +0000)
committer Nicolai Haehnle <nhaehnle@gmail.com>
Tue, 7 Jun 2016 21:37:17 +0000 (21:37 +0000)
diff --git a/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/lib/Target/AMDGPU/SIWholeQuadMode.cpp

index dd133d3..7979685 100644 (file)
--- a/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -154,6 +154,7 @@ FunctionPass *llvm::createSIWholeQuadModePass() {
  char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
                                         std::vector<WorkItem> &Worklist) {
    char GlobalFlags = 0;
+  bool WQMOutputs = MF.getFunction()->hasFnAttribute("amdgpu-ps-wqm-outputs");
  
    for (auto BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) {
      MachineBasicBlock &MBB = *BI;
@@ -161,7 +162,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
      for (auto II = MBB.begin(), IE = MBB.end(); II != IE; ++II) {
        MachineInstr &MI = *II;
        unsigned Opcode = MI.getOpcode();
-      char Flags;
+      char Flags = 0;
  
        if (TII->isWQM(Opcode) || TII->isDS(Opcode)) {
          Flags = StateWQM;
@@ -175,15 +176,39 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
              ExecExports.push_back(&MI);
          } else if (Opcode == AMDGPU::SI_PS_LIVE) {
            LiveMaskQueries.push_back(&MI);
+        } else if (WQMOutputs) {
+          // The function is in machine SSA form, which means that physical
+          // VGPRs correspond to shader inputs and outputs. Inputs are
+          // only used, outputs are only defined.
+          for (const MachineOperand &MO : MI.defs()) {
+            if (!MO.isReg())
+              continue;
+
+            unsigned Reg = MO.getReg();
+
+            if (!TRI->isVirtualRegister(Reg) &&
+                TRI->hasVGPRs(TRI->getPhysRegClass(Reg))) {
+              Flags = StateWQM;
+              break;
+            }
+          }
          }
  
-        continue;
+        if (!Flags)
+          continue;
        }
  
        Instructions[&MI].Needs = Flags;
        Worklist.push_back(&MI);
        GlobalFlags |= Flags;
      }
+
+    if (WQMOutputs && MBB.succ_empty()) {
+      // This is a prolog shader. Make sure we go back to exact mode at the end.
+      Blocks[&MBB].OutNeeds = StateExact;
+      Worklist.push_back(&MBB);
+      GlobalFlags |= StateExact;
+    }
    }
  
    return GlobalFlags;
diff --git a/test/CodeGen/AMDGPU/wqm.ll b/test/CodeGen/AMDGPU/wqm.ll

index 30915a8..4eab0ae 100644 (file)
--- a/test/CodeGen/AMDGPU/wqm.ll
+++ b/test/CodeGen/AMDGPU/wqm.ll
@@ -332,6 +332,19 @@ main_body:
    ret <4 x float> %tex
  }
  
+; Check prolog shaders.
+;
+; CHECK-LABEL: {{^}}test_prolog_1:
+; CHECK: s_mov_b64 [[ORIG:s\[[0-9]+:[0-9]+\]]], exec
+; CHECK: s_wqm_b64 exec, exec
+; CHECK: v_add_f32_e32 v0,
+; CHECK: s_and_b64 exec, exec, [[ORIG]]
+define amdgpu_ps float @test_prolog_1(float %a, float %b) #4 {
+main_body:
+  %s = fadd float %a, %b
+  ret float %s
+}
+
  declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
  
  declare <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2
@@ -345,3 +358,4 @@ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float
  attributes #1 = { nounwind }
  attributes #2 = { nounwind readonly }
  attributes #3 = { nounwind readnone }
+attributes #4 = { "amdgpu-ps-wqm-outputs" }
author	Nicolai Haehnle <nhaehnle@gmail.com>
	Tue, 7 Jun 2016 21:37:17 +0000 (21:37 +0000)
committer	Nicolai Haehnle <nhaehnle@gmail.com>
	Tue, 7 Jun 2016 21:37:17 +0000 (21:37 +0000)
lib/Target/AMDGPU/SIWholeQuadMode.cpp		patch \| blob \| history
test/CodeGen/AMDGPU/wqm.ll		patch \| blob \| history