Update LLVM for 3.5 rebase (r209712).

[android-x86/external-llvm.git] / lib / Target / R600 / SILowerControlFlow.cpp
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp

index c2e8f02..6601f2a 100644 (file)
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -55,6 +55,7 @@
  #include "llvm/CodeGen/MachineFunctionPass.h"
  #include "llvm/CodeGen/MachineInstrBuilder.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constants.h"
  
  using namespace llvm;
  
@@ -66,8 +67,8 @@ private:
    static const unsigned SkipThreshold = 12;
  
    static char ID;
-  const TargetRegisterInfo *TRI;
-  const TargetInstrInfo *TII;
+  const SIRegisterInfo *TRI;
+  const SIInstrInfo *TII;
  
    bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
  
@@ -91,11 +92,11 @@ private:
  
  public:
    SILowerControlFlowPass(TargetMachine &tm) :
-    MachineFunctionPass(ID), TRI(0), TII(0) { }
+    MachineFunctionPass(ID), TRI(nullptr), TII(nullptr) { }
  
-  virtual bool runOnMachineFunction(MachineFunction &MF);
+  bool runOnMachineFunction(MachineFunction &MF) override;
  
-  const char *getPassName() const {
+  const char *getPassName() const override {
      return "SI Lower control flow instructions";
    }
  
@@ -145,7 +146,9 @@ void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {
    MachineBasicBlock &MBB = *MI.getParent();
    DebugLoc DL = MI.getDebugLoc();
  
-  if (!shouldSkip(&MBB, &MBB.getParent()->back()))
+  if (MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType !=
+      ShaderType::PIXEL ||
+      !shouldSkip(&MBB, &MBB.getParent()->back()))
      return;
  
    MachineBasicBlock::iterator Insert = &MI;
@@ -283,27 +286,36 @@ void SILowerControlFlowPass::EndCf(MachineInstr &MI) {
  }
  
  void SILowerControlFlowPass::Branch(MachineInstr &MI) {
-  MachineBasicBlock *Next = MI.getParent()->getNextNode();
-  MachineBasicBlock *Target = MI.getOperand(0).getMBB();
-  if (Target == Next)
+  if (MI.getOperand(0).getMBB() == MI.getParent()->getNextNode())
      MI.eraseFromParent();
-  else
-    assert(0);
+
+  // If these aren't equal, this is probably an infinite loop.
  }
  
  void SILowerControlFlowPass::Kill(MachineInstr &MI) {
-
    MachineBasicBlock &MBB = *MI.getParent();
    DebugLoc DL = MI.getDebugLoc();
+  const MachineOperand &Op = MI.getOperand(0);
  
-  // Kill is only allowed in pixel shaders
+  // Kill is only allowed in pixel / geometry shaders
    assert(MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType ==
-         ShaderType::PIXEL);
-
-  // Clear this pixel from the exec mask if the operand is negative
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
-          .addImm(0)
-          .addOperand(MI.getOperand(0));
+         ShaderType::PIXEL ||
+         MBB.getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType ==
+         ShaderType::GEOMETRY);
+
+  // Clear this thread from the exec mask if the operand is negative
+  if ((Op.isImm() || Op.isFPImm())) {
+    // Constant operand: Set exec mask to 0 or do nothing
+    if (Op.isImm() ? (Op.getImm() & 0x80000000) :
+        Op.getFPImm()->isNegative()) {
+      BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+              .addImm(0);
+    }
+  } else {
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
+           .addImm(0)
+           .addOperand(Op);
+  }
  
    MI.eraseFromParent();
  }
@@ -333,12 +345,13 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
            .addReg(AMDGPU::EXEC);
  
    // Read the next variant into VCC (lower 32 bits) <- also loop target
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32_e32), AMDGPU::VCC)
+  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+          AMDGPU::VCC_LO)
            .addReg(Idx);
  
    // Move index from VCC into M0
    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
-          .addReg(AMDGPU::VCC);
+          .addReg(AMDGPU::VCC_LO);
  
    // Compare the just read M0 value to all possible Idx values
    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC)
@@ -377,10 +390,13 @@ void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
    unsigned Dst = MI.getOperand(0).getReg();
    unsigned Vec = MI.getOperand(2).getReg();
    unsigned Off = MI.getOperand(4).getImm();
+  unsigned SubReg = TRI->getSubReg(Vec, AMDGPU::sub0);
+  if (!SubReg)
+    SubReg = Vec;
  
-  MachineInstr *MovRel = 
+  MachineInstr *MovRel =
      BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
-            .addReg(TRI->getSubReg(Vec, AMDGPU::sub0) + Off)
+            .addReg(SubReg + Off)
              .addReg(AMDGPU::M0, RegState::Implicit)
              .addReg(Vec, RegState::Implicit);
  
@@ -395,10 +411,13 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
    unsigned Dst = MI.getOperand(0).getReg();
    unsigned Off = MI.getOperand(4).getImm();
    unsigned Val = MI.getOperand(5).getReg();
+  unsigned SubReg = TRI->getSubReg(Dst, AMDGPU::sub0);
+  if (!SubReg)
+    SubReg = Dst;
  
    MachineInstr *MovRel = 
      BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
-            .addReg(TRI->getSubReg(Dst, AMDGPU::sub0) + Off, RegState::Define)
+            .addReg(SubReg + Off, RegState::Define)
              .addReg(Val)
              .addReg(AMDGPU::M0, RegState::Implicit)
              .addReg(Dst, RegState::Implicit);
@@ -407,8 +426,9 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
  }
  
  bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
-  TII = MF.getTarget().getInstrInfo();
-  TRI = MF.getTarget().getRegisterInfo();
+  TII = static_cast<const SIInstrInfo*>(MF.getTarget().getInstrInfo());
+  TRI = static_cast<const SIRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
  
    bool HaveKill = false;
    bool NeedM0 = false;
@@ -419,11 +439,16 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
         BI != BE; ++BI) {
  
      MachineBasicBlock &MBB = *BI;
-    for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
-         I != MBB.end(); I = Next) {
+    MachineBasicBlock::iterator I, Next;
+    for (I = MBB.begin(); I != MBB.end(); I = Next) {
+      Next = std::next(I);
  
-      Next = llvm::next(I);
        MachineInstr &MI = *I;
+      if (TII->isDS(MI.getOpcode())) {
+        NeedM0 = true;
+        NeedWQM = true;
+      }
+
        switch (MI.getOpcode()) {
          default: break;
          case AMDGPU::SI_IF:
@@ -476,6 +501,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
            IndirectSrc(MI);
            break;
  
+        case AMDGPU::SI_INDIRECT_DST_V1:
          case AMDGPU::SI_INDIRECT_DST_V2:
          case AMDGPU::SI_INDIRECT_DST_V4:
          case AMDGPU::SI_INDIRECT_DST_V8:
@@ -483,13 +509,6 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
            IndirectDst(MI);
            break;
  
-        case AMDGPU::DS_READ_B32:
-          NeedWQM = true;
-          // Fall through
-        case AMDGPU::DS_WRITE_B32:
-          NeedM0 = true;
-          break;
-
          case AMDGPU::V_INTERP_P1_F32:
          case AMDGPU::V_INTERP_P2_F32:
          case AMDGPU::V_INTERP_MOV_F32:
@@ -508,7 +527,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
              AMDGPU::M0).addImm(0xffffffff);
    }
  
-  if (NeedWQM) {
+  if (NeedWQM && MFI->ShaderType == ShaderType::PIXEL) {
      MachineBasicBlock &MBB = MF.front();
      BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
              AMDGPU::EXEC).addReg(AMDGPU::EXEC);