nv50/ir: support different unordered_set implementations

[android-x86/external-mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_lowering_nvc0.cpp
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp

index dc1eb5e..bef1359 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -24,6 +24,7 @@
  #include "codegen/nv50_ir_build_util.h"
  
  #include "codegen/nv50_ir_target_nvc0.h"
+#include "codegen/nv50_ir_lowering_nvc0.h"
  
  #include <limits>
  
@@ -39,20 +40,6 @@ namespace nv50_ir {
     ((QOP_##q << 6) | (QOP_##r << 4) |           \
      (QOP_##s << 2) | (QOP_##t << 0))
  
-class NVC0LegalizeSSA : public Pass
-{
-private:
-   virtual bool visit(BasicBlock *);
-   virtual bool visit(Function *);
-
-   // we want to insert calls to the builtin library only after optimization
-   void handleDIV(Instruction *); // integer division, modulus
-   void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt
-
-private:
-   BuildUtil bld;
-};
-
  void
  NVC0LegalizeSSA::handleDIV(Instruction *i)
  {
@@ -86,6 +73,25 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
     // TODO
  }
  
+void
+NVC0LegalizeSSA::handleFTZ(Instruction *i)
+{
+   // Only want to flush float inputs
+   assert(i->sType == TYPE_F32);
+
+   // If we're already flushing denorms (and NaN's) to zero, no need for this.
+   if (i->dnz)
+      return;
+
+   // Only certain classes of operations can flush
+   OpClass cls = prog->getTarget()->getOpClass(i->op);
+   if (cls != OPCLASS_ARITH && cls != OPCLASS_COMPARE &&
+       cls != OPCLASS_CONVERT)
+      return;
+
+   i->ftz = true;
+}
+
  bool
  NVC0LegalizeSSA::visit(Function *fn)
  {
@@ -99,8 +105,11 @@ NVC0LegalizeSSA::visit(BasicBlock *bb)
     Instruction *next;
     for (Instruction *i = bb->getEntry(); i; i = next) {
        next = i->next;
-      if (i->dType == TYPE_F32)
+      if (i->sType == TYPE_F32) {
+         if (prog->getType() != Program::TYPE_COMPUTE)
+            handleFTZ(i);
           continue;
+      }
        switch (i->op) {
        case OP_DIV:
        case OP_MOD:
@@ -118,49 +127,6 @@ NVC0LegalizeSSA::visit(BasicBlock *bb)
     return true;
  }
  
-class NVC0LegalizePostRA : public Pass
-{
-public:
-   NVC0LegalizePostRA(const Program *);
-
-private:
-   virtual bool visit(Function *);
-   virtual bool visit(BasicBlock *);
-
-   void replaceZero(Instruction *);
-   bool tryReplaceContWithBra(BasicBlock *);
-   void propagateJoin(BasicBlock *);
-
-   struct TexUse
-   {
-      TexUse(Instruction *use, const Instruction *tex)
-         : insn(use), tex(tex), level(-1) { }
-      Instruction *insn;
-      const Instruction *tex; // or split / mov
-      int level;
-   };
-   struct Limits
-   {
-      Limits() { }
-      Limits(int min, int max) : min(min), max(max) { }
-      int min, max;
-   };
-   bool insertTextureBarriers(Function *);
-   inline bool insnDominatedBy(const Instruction *, const Instruction *) const;
-   void findFirstUses(const Instruction *tex, const Instruction *def,
-                      std::list<TexUse>&);
-   void findOverwritingDefs(const Instruction *tex, Instruction *insn,
-                            const BasicBlock *term,
-                            std::list<TexUse>&);
-   void addTexUse(std::list<TexUse>&, Instruction *, const Instruction *);
-   const Instruction *recurseDef(const Instruction *);
-
-private:
-   LValue *rZero;
-   LValue *carry;
-   const bool needTexBar;
-};
-
  NVC0LegalizePostRA::NVC0LegalizePostRA(const Program *prog)
     : rZero(NULL),
       carry(NULL),
@@ -179,7 +145,7 @@ NVC0LegalizePostRA::insnDominatedBy(const Instruction *later,
  
  void
  NVC0LegalizePostRA::addTexUse(std::list<TexUse> &uses,
-                              Instruction *usei, const Instruction *insn)
+                              Instruction *usei, const Instruction *texi)
  {
     bool add = true;
     for (std::list<TexUse>::iterator it = uses.begin();
@@ -194,7 +160,7 @@ NVC0LegalizePostRA::addTexUse(std::list<TexUse> &uses,
           ++it;
     }
     if (add)
-      uses.push_back(TexUse(usei, insn));
+      uses.push_back(TexUse(usei, texi));
  }
  
  void
@@ -206,7 +172,8 @@ NVC0LegalizePostRA::findOverwritingDefs(const Instruction *texi,
     while (insn->op == OP_MOV && insn->getDef(0)->equals(insn->getSrc(0)))
        insn = insn->getSrc(0)->getUniqueInsn();
  
-   if (!insn || !insn->bb->reachableBy(texi->bb, term))
+   // NOTE: the tex itself is, of course, not an overwriting definition
+   if (insn == texi || !insn->bb->reachableBy(texi->bb, term))
        return;
  
     switch (insn->op) {
@@ -230,17 +197,36 @@ NVC0LegalizePostRA::findOverwritingDefs(const Instruction *texi,
  }
  
  void
-NVC0LegalizePostRA::findFirstUses(const Instruction *texi,
-                                  const Instruction *insn,
-                                  std::list<TexUse> &uses)
+NVC0LegalizePostRA::findFirstUses(
+      const Instruction *texi,
+      const Instruction *insn,
+      std::list<TexUse> &uses,
+      unordered_set<const Instruction *>& visited)
  {
     for (int d = 0; insn->defExists(d); ++d) {
        Value *v = insn->getDef(d);
        for (Value::UseIterator u = v->uses.begin(); u != v->uses.end(); ++u) {
           Instruction *usei = (*u)->getInsn();
  
+         // NOTE: In case of a loop that overwrites a value but never uses
+         // it, it can happen that we have a cycle of uses that consists only
+         // of phis and no-op moves and will thus cause an infinite loop here
+         // since these are not considered actual uses.
+         // The most obvious (and perhaps the only) way to prevent this is to
+         // remember which instructions we've already visited.
+
+         if (visited.find(usei) != visited.end())
+            continue;
+
+         visited.insert(usei);
+
           if (usei->op == OP_PHI || usei->op == OP_UNION) {
-            // need a barrier before WAW cases
+            // need a barrier before WAW cases, like:
+            //   %r0 = tex
+            //   if ...
+            //     texbar <- is required or tex might replace x again
+            //     %r1 = x <- overwriting def
+            //   %r2 = phi %r0, %r1
              for (int s = 0; usei->srcExists(s); ++s) {
                 Instruction *defi = usei->getSrc(s)->getUniqueInsn();
                 if (defi && &usei->src(s) != *u)
@@ -253,13 +239,13 @@ NVC0LegalizePostRA::findFirstUses(const Instruction *texi,
               usei->op == OP_PHI ||
               usei->op == OP_UNION) {
              // these uses don't manifest in the machine code
-            findFirstUses(texi, usei, uses);
+            findFirstUses(texi, usei, uses, visited);
           } else
           if (usei->op == OP_MOV && usei->getDef(0)->equals(usei->getSrc(0)) &&
               usei->subOp != NV50_IR_SUBOP_MOV_FINAL) {
-            findFirstUses(texi, usei, uses);
+            findFirstUses(texi, usei, uses, visited);
           } else {
-            addTexUse(uses, usei, insn);
+            addTexUse(uses, usei, texi);
           }
        }
     }
@@ -313,8 +299,10 @@ NVC0LegalizePostRA::insertTextureBarriers(Function *fn)
     uses = new std::list<TexUse>[texes.size()];
     if (!uses)
        return false;
-   for (size_t i = 0; i < texes.size(); ++i)
-      findFirstUses(texes[i], texes[i], uses[i]);
+   for (size_t i = 0; i < texes.size(); ++i) {
+      unordered_set<const Instruction *> visited;
+      findFirstUses(texes[i], texes[i], uses[i], visited);
+   }
  
     // determine the barrier level at each use
     for (size_t i = 0; i < texes.size(); ++i) {
@@ -349,7 +337,6 @@ NVC0LegalizePostRA::insertTextureBarriers(Function *fn)
        }
     }
     delete[] uses;
-   uses = NULL;
  
     // insert the barriers
     for (size_t i = 0; i < useVec.size(); ++i) {
@@ -370,11 +357,8 @@ NVC0LegalizePostRA::insertTextureBarriers(Function *fn)
        }
     }
  
-   if (fn->getProgram()->optLevel < 3) {
-      if (uses)
-         delete[] uses;
+   if (fn->getProgram()->optLevel < 3)
        return true;
-   }
  
     std::vector<Limits> limitT, limitB, limitS; // entry, exit, single
  
@@ -459,8 +443,6 @@ NVC0LegalizePostRA::insertTextureBarriers(Function *fn)
              prev = i;
        }
     }
-   if (uses)
-      delete[] uses;
     return true;
  }
  
@@ -550,6 +532,7 @@ NVC0LegalizePostRA::visit(BasicBlock *bb)
              i->setDef(0, NULL);
           if (i->src(0).getFile() == FILE_IMMEDIATE)
              i->setSrc(0, rZero); // initial value must be 0
+         replaceZero(i);
        } else
        if (i->isNop()) {
           bb->remove(i);
@@ -576,53 +559,6 @@ NVC0LegalizePostRA::visit(BasicBlock *bb)
     return true;
  }
  
-class NVC0LoweringPass : public Pass
-{
-public:
-   NVC0LoweringPass(Program *);
-
-private:
-   virtual bool visit(Function *);
-   virtual bool visit(BasicBlock *);
-   virtual bool visit(Instruction *);
-
-   bool handleRDSV(Instruction *);
-   bool handleWRSV(Instruction *);
-   bool handleEXPORT(Instruction *);
-   bool handleOUT(Instruction *);
-   bool handleDIV(Instruction *);
-   bool handleMOD(Instruction *);
-   bool handleSQRT(Instruction *);
-   bool handlePOW(Instruction *);
-   bool handleTEX(TexInstruction *);
-   bool handleTXD(TexInstruction *);
-   bool handleTXQ(TexInstruction *);
-   bool handleManualTXD(TexInstruction *);
-   bool handleTXLQ(TexInstruction *);
-   bool handleATOM(Instruction *);
-   bool handleCasExch(Instruction *, bool needCctl);
-   void handleSurfaceOpNVE4(TexInstruction *);
-
-   void checkPredicate(Instruction *);
-
-   void readTessCoord(LValue *dst, int c);
-
-   Value *loadResInfo32(Value *ptr, uint32_t off);
-   Value *loadMsInfo32(Value *ptr, uint32_t off);
-   Value *loadTexHandle(Value *ptr, unsigned int slot);
-
-   void adjustCoordinatesMS(TexInstruction *);
-   void processSurfaceCoordsNVE4(TexInstruction *);
-
-private:
-   const Target *const targ;
-
-   BuildUtil bld;
-
-   Symbol *gMemBase;
-   LValue *gpEmitAddress;
-};
-
  NVC0LoweringPass::NVC0LoweringPass(Program *prog) : targ(prog->getTarget())
  {
     bld.setProgram(prog);
@@ -669,11 +605,59 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
     const int lyr = arg - (i->tex.target.isMS() ? 2 : 1);
     const int chipset = prog->getTarget()->getChipset();
  
+   // Arguments to the TEX instruction are a little insane. Even though the
+   // encoding is identical between SM20 and SM30, the arguments mean
+   // different things between Fermi and Kepler+. A lot of arguments are
+   // optional based on flags passed to the instruction. This summarizes the
+   // order of things.
+   //
+   // Fermi:
+   //  array/indirect
+   //  coords
+   //  sample
+   //  lod bias
+   //  depth compare
+   //  offsets:
+   //    - tg4: 8 bits each, either 2 (1 offset reg) or 8 (2 offset reg)
+   //    - other: 4 bits each, single reg
+   //
+   // Kepler+:
+   //  indirect handle
+   //  array (+ offsets for txd in upper 16 bits)
+   //  coords
+   //  sample
+   //  lod bias
+   //  depth compare
+   //  offsets (same as fermi, except txd which takes it with array)
+   //
+   // Maxwell (tex):
+   //  array
+   //  coords
+   //  indirect handle
+   //  sample
+   //  lod bias
+   //  depth compare
+   //  offsets
+   //
+   // Maxwell (txd):
+   //  indirect handle
+   //  coords
+   //  array + offsets
+   //  derivatives
+
     if (chipset >= NVISA_GK104_CHIPSET) {
        if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) {
-         WARN("indirect TEX not implemented\n");
-      }
-      if (i->tex.r == i->tex.s) {
+         // XXX this ignores tsc, and assumes a 1:1 mapping
+         assert(i->tex.rIndirectSrc >= 0);
+         Value *hnd = loadTexHandle(
+               bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
+                          i->getIndirectR(), bld.mkImm(2)),
+               i->tex.r);
+         i->tex.r = 0xff;
+         i->tex.s = 0x1f;
+         i->setIndirectR(hnd);
+         i->setIndirectS(NULL);
+      } else if (i->tex.r == i->tex.s) {
           i->tex.r += prog->driver->io.texBindBase / 4;
           i->tex.s  = 0; // only a single cX[] value possible here
        } else {
@@ -693,23 +677,51 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
           const int sat = (i->op == OP_TXF) ? 1 : 0;
           DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32;
           bld.mkCvt(OP_CVT, TYPE_U16, layer, sTy, src)->saturate = sat;
-         for (int s = dim; s >= 1; --s)
-            i->setSrc(s, i->getSrc(s - 1));
-         i->setSrc(0, layer);
+         if (i->op != OP_TXD || chipset < NVISA_GM107_CHIPSET) {
+            for (int s = dim; s >= 1; --s)
+               i->setSrc(s, i->getSrc(s - 1));
+            i->setSrc(0, layer);
+         } else {
+            i->setSrc(dim, layer);
+         }
+      }
+      // Move the indirect reference to the first place
+      if (i->tex.rIndirectSrc >= 0 && (
+                i->op == OP_TXD || chipset < NVISA_GM107_CHIPSET)) {
+         Value *hnd = i->getIndirectR();
+
+         i->setIndirectR(NULL);
+         i->moveSources(0, 1);
+         i->setSrc(0, hnd);
+         i->tex.rIndirectSrc = 0;
+         i->tex.sIndirectSrc = -1;
        }
     } else
     // (nvc0) generate and move the tsc/tic/array source to the front
     if (i->tex.target.isArray() || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) {
        LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa
  
+      Value *ticRel = i->getIndirectR();
+      Value *tscRel = i->getIndirectS();
+
+      if (ticRel) {
+         i->setSrc(i->tex.rIndirectSrc, NULL);
+         if (i->tex.r)
+            ticRel = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(),
+                                ticRel, bld.mkImm(i->tex.r));
+      }
+      if (tscRel) {
+         i->setSrc(i->tex.sIndirectSrc, NULL);
+         if (i->tex.s)
+            tscRel = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(),
+                                tscRel, bld.mkImm(i->tex.s));
+      }
+
        Value *arrayIndex = i->tex.target.isArray() ? i->getSrc(lyr) : NULL;
        for (int s = dim; s >= 1; --s)
           i->setSrc(s, i->getSrc(s - 1));
        i->setSrc(0, arrayIndex);
  
-      Value *ticRel = i->getIndirectR();
-      Value *tscRel = i->getIndirectS();
-
        if (arrayIndex) {
           int sat = (i->op == OP_TXF) ? 1 : 0;
           DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32;
@@ -718,14 +730,10 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
           bld.loadImm(src, 0);
        }
  
-      if (ticRel) {
-         i->setSrc(i->tex.rIndirectSrc, NULL);
+      if (ticRel)
           bld.mkOp3(OP_INSBF, TYPE_U32, src, ticRel, bld.mkImm(0x0917), src);
-      }
-      if (tscRel) {
-         i->setSrc(i->tex.sIndirectSrc, NULL);
+      if (tscRel)
           bld.mkOp3(OP_INSBF, TYPE_U32, src, tscRel, bld.mkImm(0x0710), src);
-      }
  
        i->setSrc(0, src);
     }
@@ -737,14 +745,18 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
     assert(chipset >= NVISA_GK104_CHIPSET ||
            !i->tex.useOffsets || !i->tex.target.isMS());
  
-   // offset is last source (lod 1st, dc 2nd)
+   // offset is between lod and dc
     if (i->tex.useOffsets) {
        int n, c;
        int s = i->srcCount(0xff, true);
-      if (i->srcExists(s)) // move potential predicate out of the way
-         i->moveSources(s, 1);
-      if (i->tex.useOffsets == 4 && i->srcExists(s + 1))
-         i->moveSources(s + 1, 1);
+      if (i->op != OP_TXD || chipset < NVISA_GK104_CHIPSET) {
+         if (i->tex.target.isShadow())
+            s--;
+         if (i->srcExists(s)) // move potential predicate out of the way
+            i->moveSources(s, 1);
+         if (i->tex.useOffsets == 4 && i->srcExists(s + 1))
+            i->moveSources(s + 1, 1);
+      }
        if (i->op == OP_TXG) {
           // Either there is 1 offset, which goes into the 2 low bytes of the
           // first source, or there are 4 offsets, which go into 2 sources (8
@@ -770,10 +782,28 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
           assert(i->tex.useOffsets == 1);
           for (c = 0; c < 3; ++c) {
              ImmediateValue val;
-            assert(i->offset[0][c].getImmediate(val));
+            if (!i->offset[0][c].getImmediate(val))
+               assert(!"non-immediate offset passed to non-TXG");
              imm |= (val.reg.data.u32 & 0xf) << (c * 4);
           }
-         i->setSrc(s, bld.loadImm(NULL, imm));
+         if (i->op == OP_TXD && chipset >= NVISA_GK104_CHIPSET) {
+            // The offset goes into the upper 16 bits of the array index. So
+            // create it if it's not already there, and INSBF it if it already
+            // is.
+            s = (i->tex.rIndirectSrc >= 0) ? 1 : 0;
+            if (chipset >= NVISA_GM107_CHIPSET)
+               s += dim;
+            if (i->tex.target.isArray()) {
+               bld.mkOp3(OP_INSBF, TYPE_U32, i->getSrc(s),
+                         bld.loadImm(NULL, imm), bld.mkImm(0xc10),
+                         i->getSrc(s));
+            } else {
+               i->moveSources(s, 1);
+               i->setSrc(s, bld.loadImm(NULL, imm << 16));
+            }
+         } else {
+            i->setSrc(s, bld.loadImm(NULL, imm));
+         }
        }
     }
  
@@ -812,6 +842,7 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i)
     Value *zero = bld.loadImm(bld.getSSA(), 0);
     int l, c;
     const int dim = i->tex.target.getDim();
+   const int array = i->tex.target.isArray();
  
     i->op = OP_TEX; // no need to clone dPdx/dPdy later
  
@@ -822,7 +853,7 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i)
     for (l = 0; l < 4; ++l) {
        // mov coordinates from lane l to all lanes
        for (c = 0; c < dim; ++c)
-         bld.mkQuadop(0x00, crd[c], l, i->getSrc(c), zero);
+         bld.mkQuadop(0x00, crd[c], l, i->getSrc(c + array), zero);
        // add dPdx from lane l to lanes dx
        for (c = 0; c < dim; ++c)
           bld.mkQuadop(qOps[l][0], crd[c], l, i->dPdx[c].get(), crd[c]);
@@ -832,7 +863,7 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i)
        // texture
        bld.insert(tex = cloneForward(func, i));
        for (c = 0; c < dim; ++c)
-         tex->setSrc(c, crd[c]);
+         tex->setSrc(c + array, crd[c]);
        // save results
        for (c = 0; i->defExists(c); ++c) {
           Instruction *mov;
@@ -858,19 +889,38 @@ bool
  NVC0LoweringPass::handleTXD(TexInstruction *txd)
  {
     int dim = txd->tex.target.getDim();
-   int arg = txd->tex.target.getArgCount();
+   unsigned arg = txd->tex.target.getArgCount();
+   unsigned expected_args = arg;
+   const int chipset = prog->getTarget()->getChipset();
+
+   if (chipset >= NVISA_GK104_CHIPSET) {
+      if (!txd->tex.target.isArray() && txd->tex.useOffsets)
+         expected_args++;
+      if (txd->tex.rIndirectSrc >= 0 || txd->tex.sIndirectSrc >= 0)
+         expected_args++;
+   } else {
+      if (txd->tex.useOffsets)
+         expected_args++;
+      if (!txd->tex.target.isArray() && (
+                txd->tex.rIndirectSrc >= 0 || txd->tex.sIndirectSrc >= 0))
+         expected_args++;
+   }
+
+   if (expected_args > 4 ||
+       dim > 2 ||
+       txd->tex.target.isShadow() ||
+       txd->tex.target.isCube())
+      txd->op = OP_TEX;
  
     handleTEX(txd);
     while (txd->srcExists(arg))
        ++arg;
  
     txd->tex.derivAll = true;
-   if (dim > 2 ||
-       txd->tex.target.isCube() ||
-       arg > 4 ||
-       txd->tex.target.isShadow())
+   if (txd->op == OP_TEX)
        return handleManualTXD(txd);
  
+   assert(arg == expected_args);
     for (int c = 0; c < dim; ++c) {
        txd->setSrc(arg + c * 2 + 0, txd->dPdx[c]);
        txd->setSrc(arg + c * 2 + 1, txd->dPdy[c]);
@@ -1418,7 +1468,14 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
     switch (sv) {
     case SV_POSITION:
        assert(prog->getType() == Program::TYPE_FRAGMENT);
-      bld.mkInterp(NV50_IR_INTERP_LINEAR, i->getDef(0), addr, NULL);
+      if (i->srcExists(1)) {
+         // Pass offset through to the interpolation logic
+         ld = bld.mkInterp(NV50_IR_INTERP_LINEAR | NV50_IR_INTERP_OFFSET,
+                           i->getDef(0), addr, NULL);
+         ld->setSrc(1, i->getSrc(1));
+      } else {
+         bld.mkInterp(NV50_IR_INTERP_LINEAR, i->getDef(0), addr, NULL);
+      }
        break;
     case SV_FACE:
     {
@@ -1564,14 +1621,21 @@ NVC0LoweringPass::handleEXPORT(Instruction *i)
  bool
  NVC0LoweringPass::handleOUT(Instruction *i)
  {
-   if (i->op == OP_RESTART && i->prev && i->prev->op == OP_EMIT) {
+   Instruction *prev = i->prev;
+   ImmediateValue stream, prevStream;
+
+   // Only merge if the stream ids match. Also, note that the previous
+   // instruction would have already been lowered, so we take arg1 from it.
+   if (i->op == OP_RESTART && prev && prev->op == OP_EMIT &&
+       i->src(0).getImmediate(stream) &&
+       prev->src(1).getImmediate(prevStream) &&
+       stream.reg.data.u32 == prevStream.reg.data.u32) {
        i->prev->subOp = NV50_IR_SUBOP_EMIT_RESTART;
        delete_Instruction(prog, i);
     } else {
        assert(gpEmitAddress);
        i->setDef(0, gpEmitAddress);
-      if (i->srcExists(0))
-         i->setSrc(1, i->getSrc(0));
+      i->setSrc(1, i->getSrc(0));
        i->setSrc(0, gpEmitAddress);
     }
     return true;
@@ -1656,10 +1720,25 @@ NVC0LoweringPass::visit(Instruction *i)
              Value *ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
                                      i->getIndirect(0, 0), bld.mkImm(4));
              i->setIndirect(0, 0, ptr);
+            i->op = OP_VFETCH;
           } else {
              i->op = OP_VFETCH;
              assert(prog->getType() != Program::TYPE_FRAGMENT); // INTERP
           }
+      } else if (i->src(0).getFile() == FILE_MEMORY_CONST) {
+         if (i->src(0).isIndirect(1)) {
+            Value *ptr;
+            if (i->src(0).isIndirect(0))
+               ptr = bld.mkOp3v(OP_INSBF, TYPE_U32, bld.getSSA(),
+                                i->getIndirect(0, 1), bld.mkImm(0x1010),
+                                i->getIndirect(0, 0));
+            else
+               ptr = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
+                                i->getIndirect(0, 1), bld.mkImm(16));
+            i->setIndirect(0, 1, NULL);
+            i->setIndirect(0, 0, ptr);
+            i->subOp = NV50_IR_SUBOP_LDC_IS;
+         }
        }
        break;
     case OP_ATOM: