OSDN Git Service

nv50/ir: import SM4 converter
authorChristoph Bumiller <e0425955@student.tuwien.ac.at>
Tue, 18 Oct 2011 16:36:57 +0000 (18:36 +0200)
committerChristoph Bumiller <e0425955@student.tuwien.ac.at>
Fri, 21 Oct 2011 21:00:40 +0000 (23:00 +0200)
src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp
src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h
src/gallium/drivers/nv50/codegen/nv50_ir_from_sm4.cpp [new file with mode: 0644]
src/gallium/drivers/nv50/codegen/nv50_ir_from_sm4.h [new file with mode: 0644]

index f9c18fa..53a73b4 100644 (file)
@@ -207,7 +207,7 @@ BuildUtil::mkCvt(operation op,
    return insn;
 }
 
-Instruction *
+CmpInstruction *
 BuildUtil::mkCmp(operation op, CondCode cc, DataType ty, Value *dst,
                  Value *src0, Value *src1, Value *src2)
 {
index dbc6723..0bb853a 100644 (file)
@@ -68,9 +68,9 @@ public:
                         Value *attrRel, Value *primRel);
 
    Instruction *mkCvt(operation, DataType, Value *, DataType, Value *);
-   Instruction *mkCmp(operation, CondCode, DataType,
-                      Value *,
-                      Value *, Value *, Value * = NULL);
+   CmpInstruction *mkCmp(operation, CondCode, DataType,
+                        Value *,
+                        Value *, Value *, Value * = NULL);
    Instruction *mkTex(operation, TexTarget, uint8_t tic, uint8_t tsc,
                       Value **def, Value **src);
    Instruction *mkQuadop(uint8_t qop, Value *, uint8_t l, Value *, Value *);
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_from_sm4.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_from_sm4.cpp
new file mode 100644 (file)
index 0000000..7ed00e3
--- /dev/null
@@ -0,0 +1,2326 @@
+
+#include "nv50_ir.h"
+#include "nv50_ir_target.h"
+#include "nv50_ir_build_util.h"
+
+#include "nv50_ir_from_sm4.h"
+
+// WTF: pass-through is implicit ??? check ReadWriteMask
+
+namespace tgsi {
+
+static nv50_ir::SVSemantic irSemantic(unsigned sn)
+{
+   switch (sn) {
+   case TGSI_SEMANTIC_POSITION:      return nv50_ir::SV_POSITION;
+   case TGSI_SEMANTIC_FACE:          return nv50_ir::SV_FACE;
+   case NV50_SEMANTIC_LAYER:         return nv50_ir::SV_LAYER;
+   case NV50_SEMANTIC_VIEWPORTINDEX: return nv50_ir::SV_VIEWPORT_INDEX;
+   case TGSI_SEMANTIC_PSIZE:         return nv50_ir::SV_POINT_SIZE;
+   case NV50_SEMANTIC_CLIPDISTANCE:  return nv50_ir::SV_CLIP_DISTANCE;
+   case NV50_SEMANTIC_VERTEXID:      return nv50_ir::SV_VERTEX_ID;
+   case TGSI_SEMANTIC_INSTANCEID:    return nv50_ir::SV_INSTANCE_ID;
+   case TGSI_SEMANTIC_PRIMID:        return nv50_ir::SV_PRIMITIVE_ID;
+   case NV50_SEMANTIC_TESSFACTOR:    return nv50_ir::SV_TESS_FACTOR;
+   case NV50_SEMANTIC_TESSCOORD:     return nv50_ir::SV_TESS_COORD;
+   default:
+      return nv50_ir::SV_UNDEFINED;
+   }
+}
+
+} // namespace tgsi
+
+namespace {
+
+using namespace nv50_ir;
+
+#define NV50_IR_MAX_RESOURCES 64
+
+class Converter : public BuildUtil
+{
+public:
+   Converter(Program *, struct nv50_ir_prog_info *);
+   ~Converter();
+
+private:
+   DataArray tData32;
+   DataArray tData64;
+   unsigned int nrRegVals;
+
+   DataArray *lData;
+   unsigned int nrArrays;
+   unsigned int arrayVol;
+
+   DataArray oData;
+
+   uint8_t interpMode[PIPE_MAX_SHADER_INPUTS];
+
+   // outputs for each phase
+   struct nv50_ir_varying out[3][PIPE_MAX_SHADER_OUTPUTS];
+
+   int phase;
+   int subPhaseCnt[2];
+   int subPhase;
+   unsigned int phaseStart;
+   unsigned int phaseInstance;
+   unsigned int *phaseInstCnt[2];
+   bool unrollPhase;
+   bool phaseInstanceUsed;
+   int phaseEnded; // (phase + 1) if $phase ended
+
+   bool finalized;
+
+   Value *srcPtr[3][3]; // for indirect addressing, save pointer values
+   Value *dstPtr[3];
+   Value *vtxBase[3]; // base address of vertex in a primitive (TP/GP)
+
+   Value *domainPt[3]; // pre-fetched TessCoord
+
+   unsigned int nDstOpnds;
+
+   Stack condBBs;
+   Stack joinBBs;
+   Stack loopBBs;
+   Stack breakBBs;
+   Stack entryBBs;
+   Stack leaveBBs;
+   Stack retIPs;
+
+   bool shadow[NV50_IR_MAX_RESOURCES];
+   TexTarget resourceType[NV50_IR_MAX_RESOURCES][2];
+
+   struct nv50_ir_prog_info& info;
+
+   Value *fragCoord[4];
+
+public:
+   bool run();
+
+private:
+   bool handleInstruction(unsigned int pos);
+   bool inspectInstruction(unsigned int pos);
+   bool handleDeclaration(const sm4_dcl& dcl);
+   bool inspectDeclaration(const sm4_dcl& dcl);
+   bool parseSignature();
+
+   bool haveNextPhase(unsigned int pos) const;
+
+   void allocateValues();
+   void exportOutputs();
+
+   void emitTex(Value *dst0[4], TexInstruction *, const uint8_t swizzle[4]);
+   void handleLOAD(Value *dst0[4]);
+   void handleSAMPLE(operation, Value *dst0[4]);
+   void handleQUERY(Value *dst0[4], enum TexQuery query);
+   void handleDP(Value *dst0[4], int dim);
+
+   Symbol *iSym(int i, int c);
+   Symbol *oSym(int i, int c);
+
+   Value *src(int i, int c);
+   Value *src(const sm4_op&, int c, int i);
+   Value *dst(int i, int c);
+   Value *dst(const sm4_op&, int c, int i);
+   void saveDst(int i, int c, Value *value);
+   void saveDst(const sm4_op&, int c, Value *value, int i);
+   void saveFragDepth(operation op, Value *value);
+
+   Value *interpolate(const sm4_op&, int c, int i);
+
+   Value *getSrcPtr(int s, int dim, int shl);
+   Value *getDstPtr(int d, int dim, int shl);
+   Value *getVtxPtr(int s);
+
+   bool checkDstSrcAliasing() const;
+   void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork);
+   void finalizeShader();
+
+   operation cvtOpcode(enum sm4_opcode op) const;
+   unsigned int getDstOpndCount(enum sm4_opcode opcode) const;
+
+   DataType inferSrcType(enum sm4_opcode op) const;
+   DataType inferDstType(enum sm4_opcode op) const;
+
+   unsigned g3dPrim(const unsigned prim, unsigned *patchSize = NULL) const;
+   CondCode cvtCondCode(enum sm4_opcode op) const;
+   RoundMode cvtRoundingMode(enum sm4_opcode op) const;
+   TexTarget cvtTexTarget(enum sm4_target,
+                           enum sm4_opcode, operation *) const;
+   SVSemantic cvtSemantic(enum sm4_sv, uint8_t &index) const;
+   uint8_t cvtInterpMode(enum sm4_interpolation) const;
+
+   unsigned tgsiSemantic(SVSemantic, int index);
+   void recordSV(unsigned sn, unsigned si, unsigned mask, bool input);
+
+private:
+   sm4_insn *insn;
+   DataType dTy, sTy;
+
+   const struct sm4_program& sm4;
+   Program *prog;
+};
+
+#define PRIM_CASE(a, b) \
+   case D3D_PRIMITIVE_TOPOLOGY_##a: return PIPE_PRIM_##b;
+
+unsigned
+Converter::g3dPrim(const unsigned prim, unsigned *patchSize) const
+{
+   switch (prim) {
+   PRIM_CASE(UNDEFINED, POINTS);
+   PRIM_CASE(POINTLIST, POINTS);
+   PRIM_CASE(LINELIST, LINES);
+   PRIM_CASE(LINESTRIP, LINE_STRIP);
+   PRIM_CASE(TRIANGLELIST, TRIANGLES);
+   PRIM_CASE(TRIANGLESTRIP, TRIANGLE_STRIP);
+   PRIM_CASE(LINELIST_ADJ, LINES_ADJACENCY);
+   PRIM_CASE(LINESTRIP_ADJ, LINE_STRIP_ADJACENCY);
+   PRIM_CASE(TRIANGLELIST_ADJ, TRIANGLES_ADJACENCY);
+   PRIM_CASE(TRIANGLESTRIP_ADJ, TRIANGLES_ADJACENCY);
+   default:
+      if (prim < D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST ||
+          prim > D3D_PRIMITIVE_TOPOLOGY_32_CONTROL_POINT_PATCHLIST)
+         return PIPE_PRIM_POINTS;
+      if (patchSize)
+         *patchSize =
+            prim - D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + 1;
+      return NV50_PRIM_PATCHES;
+   }
+}
+
+#define IPM_CASE(n, a, b) \
+   case SM4_INTERPOLATION_##n: return NV50_IR_INTERP_##a | NV50_IR_INTERP_##b
+
+uint8_t
+Converter::cvtInterpMode(enum sm4_interpolation mode) const
+{
+   switch (mode) {
+   IPM_CASE(CONSTANT,                      FLAT, FLAT);
+   IPM_CASE(LINEAR,                        PERSPECTIVE, PERSPECTIVE);
+   IPM_CASE(LINEAR_CENTROID,               PERSPECTIVE, CENTROID);
+   IPM_CASE(LINEAR_NOPERSPECTIVE,          LINEAR, LINEAR);
+   IPM_CASE(LINEAR_NOPERSPECTIVE_CENTROID, LINEAR, CENTROID);
+   IPM_CASE(LINEAR_SAMPLE,                 PERSPECTIVE, OFFSET);
+   IPM_CASE(LINEAR_NOPERSPECTIVE_SAMPLE,   LINEAR, OFFSET);
+   IPM_CASE(UNDEFINED,                     LINEAR, LINEAR);
+   default:
+      assert(!"invalid interpolation mode");
+      return 0;
+   }
+}
+
+static void
+setVaryingInterpMode(struct nv50_ir_varying *var, uint8_t mode)
+{
+   switch (mode & NV50_IR_INTERP_MODE_MASK) {
+   case NV50_IR_INTERP_LINEAR:
+      var->linear = 1;
+      break;
+   case NV50_IR_INTERP_FLAT:
+      var->flat = 1;
+      break;
+   default:
+      break;
+   }
+   if (mode & NV50_IR_INTERP_CENTROID)
+      var->centroid = 1;
+}
+
+RoundMode
+Converter::cvtRoundingMode(enum sm4_opcode op) const
+{
+   switch (op) {
+   case SM4_OPCODE_ROUND_NE: return ROUND_NI;
+   case SM4_OPCODE_ROUND_NI: return ROUND_MI;
+   case SM4_OPCODE_ROUND_PI: return ROUND_PI;
+   case SM4_OPCODE_ROUND_Z:  return ROUND_ZI;
+   default:
+      return ROUND_N;
+   }
+}
+
+CondCode
+Converter::cvtCondCode(enum sm4_opcode op) const
+{
+   switch (op) {
+   case SM4_OPCODE_EQ:
+   case SM4_OPCODE_DEQ:
+   case SM4_OPCODE_IEQ: return CC_EQ;
+   case SM4_OPCODE_GE:
+   case SM4_OPCODE_DGE:
+   case SM4_OPCODE_IGE:
+   case SM4_OPCODE_UGE: return CC_GE;
+   case SM4_OPCODE_LT:
+   case SM4_OPCODE_DLT:
+   case SM4_OPCODE_ILT:
+   case SM4_OPCODE_ULT: return CC_LT;
+   case SM4_OPCODE_NE:
+   case SM4_OPCODE_INE:
+   case SM4_OPCODE_DNE: return CC_NEU;
+   default:
+      return CC_ALWAYS;
+   }
+}
+
+DataType
+Converter::inferSrcType(enum sm4_opcode op) const
+{
+   switch (op) {
+   case SM4_OPCODE_IADD:
+   case SM4_OPCODE_IEQ:
+   case SM4_OPCODE_IGE:
+   case SM4_OPCODE_ILT:
+   case SM4_OPCODE_IMAD:
+   case SM4_OPCODE_IMAX:
+   case SM4_OPCODE_IMIN:
+   case SM4_OPCODE_IMUL:
+   case SM4_OPCODE_INE:
+   case SM4_OPCODE_INEG:
+   case SM4_OPCODE_ISHL:
+   case SM4_OPCODE_ISHR:
+   case SM4_OPCODE_ITOF:
+   case SM4_OPCODE_ATOMIC_IADD:
+   case SM4_OPCODE_ATOMIC_IMAX:
+   case SM4_OPCODE_ATOMIC_IMIN:
+      return TYPE_S32;
+   case SM4_OPCODE_AND:
+   case SM4_OPCODE_NOT:
+   case SM4_OPCODE_OR:
+   case SM4_OPCODE_UDIV:
+   case SM4_OPCODE_ULT:
+   case SM4_OPCODE_UGE:
+   case SM4_OPCODE_UMUL:
+   case SM4_OPCODE_UMAD:
+   case SM4_OPCODE_UMAX:
+   case SM4_OPCODE_UMIN:
+   case SM4_OPCODE_USHR:
+   case SM4_OPCODE_UTOF:
+   case SM4_OPCODE_XOR:
+   case SM4_OPCODE_UADDC:
+   case SM4_OPCODE_USUBB:
+   case SM4_OPCODE_ATOMIC_AND:
+   case SM4_OPCODE_ATOMIC_OR:
+   case SM4_OPCODE_ATOMIC_XOR:
+   case SM4_OPCODE_ATOMIC_UMAX:
+   case SM4_OPCODE_ATOMIC_UMIN:
+      return TYPE_U32;
+   case SM4_OPCODE_DADD:
+   case SM4_OPCODE_DMAX:
+   case SM4_OPCODE_DMIN:
+   case SM4_OPCODE_DMUL:
+   case SM4_OPCODE_DEQ:
+   case SM4_OPCODE_DGE:
+   case SM4_OPCODE_DLT:
+   case SM4_OPCODE_DNE:
+   case SM4_OPCODE_DMOV:
+   case SM4_OPCODE_DMOVC:
+   case SM4_OPCODE_DTOF:
+      return TYPE_F64;
+   case SM4_OPCODE_F16TOF32:
+      return TYPE_F16;
+   default:
+      return TYPE_F32;
+   }
+}
+
+DataType
+Converter::inferDstType(enum sm4_opcode op) const
+{
+   switch (op) {
+   case SM4_OPCODE_FTOI:
+      return TYPE_S32;
+   case SM4_OPCODE_FTOU:
+   case SM4_OPCODE_EQ:
+   case SM4_OPCODE_GE:
+   case SM4_OPCODE_LT:
+   case SM4_OPCODE_NE:
+      return TYPE_U32;
+   case SM4_OPCODE_FTOD:
+      return TYPE_F64;
+   case SM4_OPCODE_F32TOF16:
+      return TYPE_F16;
+   case SM4_OPCODE_ITOF:
+   case SM4_OPCODE_UTOF:
+   case SM4_OPCODE_DTOF:
+      return TYPE_F32;
+   default:
+      return inferSrcType(op);
+   }
+}
+
+operation
+Converter::cvtOpcode(enum sm4_opcode op) const
+{
+   switch (op) {
+   case SM4_OPCODE_ADD:         return OP_ADD;
+   case SM4_OPCODE_AND:         return OP_AND;
+   case SM4_OPCODE_BREAK:       return OP_BREAK;
+   case SM4_OPCODE_BREAKC:      return OP_BREAK;
+   case SM4_OPCODE_CALL:        return OP_CALL;
+   case SM4_OPCODE_CALLC:       return OP_CALL;
+   case SM4_OPCODE_CASE:        return OP_NOP;
+   case SM4_OPCODE_CONTINUE:    return OP_CONT;
+   case SM4_OPCODE_CONTINUEC:   return OP_CONT;
+   case SM4_OPCODE_CUT:         return OP_RESTART;
+   case SM4_OPCODE_DEFAULT:     return OP_NOP;
+   case SM4_OPCODE_DERIV_RTX:   return OP_DFDX;
+   case SM4_OPCODE_DERIV_RTY:   return OP_DFDY;
+   case SM4_OPCODE_DISCARD:     return OP_DISCARD;
+   case SM4_OPCODE_DIV:         return OP_DIV;
+   case SM4_OPCODE_DP2:         return OP_MAD;
+   case SM4_OPCODE_DP3:         return OP_MAD;
+   case SM4_OPCODE_DP4:         return OP_MAD;
+   case SM4_OPCODE_ELSE:        return OP_BRA;
+   case SM4_OPCODE_EMIT:        return OP_EMIT;
+   case SM4_OPCODE_EMITTHENCUT: return OP_EMIT;
+   case SM4_OPCODE_ENDIF:       return OP_BRA;
+   case SM4_OPCODE_ENDLOOP:     return OP_PREBREAK;
+   case SM4_OPCODE_ENDSWITCH:   return OP_NOP;
+   case SM4_OPCODE_EQ:          return OP_SET;
+   case SM4_OPCODE_EXP:         return OP_EX2;
+   case SM4_OPCODE_FRC:         return OP_CVT;
+   case SM4_OPCODE_FTOI:        return OP_CVT;
+   case SM4_OPCODE_FTOU:        return OP_CVT;
+   case SM4_OPCODE_GE:          return OP_SET;
+   case SM4_OPCODE_IADD:        return OP_ADD;
+   case SM4_OPCODE_IF:          return OP_BRA;
+   case SM4_OPCODE_IEQ:         return OP_SET;
+   case SM4_OPCODE_IGE:         return OP_SET;
+   case SM4_OPCODE_ILT:         return OP_SET;
+   case SM4_OPCODE_IMAD:        return OP_MAD;
+   case SM4_OPCODE_IMAX:        return OP_MAX;
+   case SM4_OPCODE_IMIN:        return OP_MIN;
+   case SM4_OPCODE_IMUL:        return OP_MUL;
+   case SM4_OPCODE_INE:         return OP_SET;
+   case SM4_OPCODE_INEG:        return OP_NEG;
+   case SM4_OPCODE_ISHL:        return OP_SHL;
+   case SM4_OPCODE_ISHR:        return OP_SHR;
+   case SM4_OPCODE_ITOF:        return OP_CVT;
+   case SM4_OPCODE_LD:          return OP_TXF;
+   case SM4_OPCODE_LD_MS:       return OP_TXF;
+   case SM4_OPCODE_LOG:         return OP_LG2;
+   case SM4_OPCODE_LOOP:        return OP_PRECONT;
+   case SM4_OPCODE_LT:          return OP_SET;
+   case SM4_OPCODE_MAD:         return OP_MAD;
+   case SM4_OPCODE_MIN:         return OP_MIN;
+   case SM4_OPCODE_MAX:         return OP_MAX;
+   case SM4_OPCODE_MOV:         return OP_MOV;
+   case SM4_OPCODE_MOVC:        return OP_MOV;
+   case SM4_OPCODE_MUL:         return OP_MUL;
+   case SM4_OPCODE_NE:          return OP_SET;
+   case SM4_OPCODE_NOP:         return OP_NOP;
+   case SM4_OPCODE_NOT:         return OP_NOT;
+   case SM4_OPCODE_OR:          return OP_OR;
+   case SM4_OPCODE_RESINFO:     return OP_TXQ;
+   case SM4_OPCODE_RET:         return OP_RET;
+   case SM4_OPCODE_RETC:        return OP_RET;
+   case SM4_OPCODE_ROUND_NE:    return OP_CVT;
+   case SM4_OPCODE_ROUND_NI:    return OP_FLOOR;
+   case SM4_OPCODE_ROUND_PI:    return OP_CEIL;
+   case SM4_OPCODE_ROUND_Z:     return OP_TRUNC;
+   case SM4_OPCODE_RSQ:         return OP_RSQ;
+   case SM4_OPCODE_SAMPLE:      return OP_TEX;
+   case SM4_OPCODE_SAMPLE_C:    return OP_TEX;
+   case SM4_OPCODE_SAMPLE_C_LZ: return OP_TEX;
+   case SM4_OPCODE_SAMPLE_L:    return OP_TXL;
+   case SM4_OPCODE_SAMPLE_D:    return OP_TXD;
+   case SM4_OPCODE_SAMPLE_B:    return OP_TXB;
+   case SM4_OPCODE_SQRT:        return OP_SQRT;
+   case SM4_OPCODE_SWITCH:      return OP_NOP;
+   case SM4_OPCODE_SINCOS:      return OP_PRESIN;
+   case SM4_OPCODE_UDIV:        return OP_DIV;
+   case SM4_OPCODE_ULT:         return OP_SET;
+   case SM4_OPCODE_UGE:         return OP_SET;
+   case SM4_OPCODE_UMUL:        return OP_MUL;
+   case SM4_OPCODE_UMAD:        return OP_MAD;
+   case SM4_OPCODE_UMAX:        return OP_MAX;
+   case SM4_OPCODE_UMIN:        return OP_MIN;
+   case SM4_OPCODE_USHR:        return OP_SHR;
+   case SM4_OPCODE_UTOF:        return OP_CVT;
+   case SM4_OPCODE_XOR:         return OP_XOR;
+
+   case SM4_OPCODE_GATHER4:            return OP_TXG;
+   case SM4_OPCODE_SAMPLE_POS:         return OP_PIXLD;
+   case SM4_OPCODE_SAMPLE_INFO:        return OP_PIXLD;
+   case SM4_OPCODE_EMIT_STREAM:        return OP_EMIT;
+   case SM4_OPCODE_CUT_STREAM:         return OP_RESTART;
+   case SM4_OPCODE_EMITTHENCUT_STREAM: return OP_EMIT;
+   case SM4_OPCODE_INTERFACE_CALL:     return OP_CALL;
+   case SM4_OPCODE_BUFINFO:            return OP_TXQ;
+   case SM4_OPCODE_DERIV_RTX_COARSE:   return OP_DFDX;
+   case SM4_OPCODE_DERIV_RTX_FINE:     return OP_DFDX;
+   case SM4_OPCODE_DERIV_RTY_COARSE:   return OP_DFDY;
+   case SM4_OPCODE_DERIV_RTY_FINE:     return OP_DFDY;
+   case SM4_OPCODE_GATHER4_C:          return OP_TXG;
+   case SM4_OPCODE_GATHER4_PO:         return OP_TXG;
+   case SM4_OPCODE_GATHER4_PO_C:       return OP_TXG;
+
+   case SM4_OPCODE_RCP:       return OP_RCP;
+   case SM4_OPCODE_F32TOF16:  return OP_CVT;
+   case SM4_OPCODE_F16TOF32:  return OP_CVT;
+   case SM4_OPCODE_UADDC:     return OP_ADD;
+   case SM4_OPCODE_USUBB:     return OP_SUB;
+   case SM4_OPCODE_COUNTBITS: return OP_POPCNT;
+
+   case SM4_OPCODE_ATOMIC_AND:       return OP_AND;
+   case SM4_OPCODE_ATOMIC_OR:        return OP_OR;
+   case SM4_OPCODE_ATOMIC_XOR:       return OP_XOR;
+   case SM4_OPCODE_ATOMIC_CMP_STORE: return OP_STORE;
+   case SM4_OPCODE_ATOMIC_IADD:      return OP_ADD;
+   case SM4_OPCODE_ATOMIC_IMAX:      return OP_MAX;
+   case SM4_OPCODE_ATOMIC_IMIN:      return OP_MIN;
+   case SM4_OPCODE_ATOMIC_UMAX:      return OP_MAX;
+   case SM4_OPCODE_ATOMIC_UMIN:      return OP_MIN;
+
+   case SM4_OPCODE_SYNC:  return OP_MEMBAR;
+   case SM4_OPCODE_DADD:  return OP_ADD;
+   case SM4_OPCODE_DMAX:  return OP_MAX;
+   case SM4_OPCODE_DMIN:  return OP_MIN;
+   case SM4_OPCODE_DMUL:  return OP_MUL;
+   case SM4_OPCODE_DEQ:   return OP_SET;
+   case SM4_OPCODE_DGE:   return OP_SET;
+   case SM4_OPCODE_DLT:   return OP_SET;
+   case SM4_OPCODE_DNE:   return OP_SET;
+   case SM4_OPCODE_DMOV:  return OP_MOV;
+   case SM4_OPCODE_DMOVC: return OP_MOV;
+   case SM4_OPCODE_DTOF:  return OP_CVT;
+   case SM4_OPCODE_FTOD:  return OP_CVT;
+
+   default:
+      return OP_NOP;
+   }
+}
+
+unsigned int
+Converter::getDstOpndCount(enum sm4_opcode opcode) const
+{
+   switch (opcode) {
+   case SM4_OPCODE_SINCOS:
+   case SM4_OPCODE_UDIV:
+   case SM4_OPCODE_IMUL:
+   case SM4_OPCODE_UMUL:
+      return 2;
+   case SM4_OPCODE_BREAK:
+   case SM4_OPCODE_BREAKC:
+   case SM4_OPCODE_CALL:
+   case SM4_OPCODE_CALLC:
+   case SM4_OPCODE_CONTINUE:
+   case SM4_OPCODE_CONTINUEC:
+   case SM4_OPCODE_DISCARD:
+   case SM4_OPCODE_EMIT:
+   case SM4_OPCODE_EMIT_STREAM:
+   case SM4_OPCODE_CUT:
+   case SM4_OPCODE_CUT_STREAM:
+   case SM4_OPCODE_EMITTHENCUT:
+   case SM4_OPCODE_EMITTHENCUT_STREAM:
+   case SM4_OPCODE_IF:
+   case SM4_OPCODE_ELSE:
+   case SM4_OPCODE_ENDIF:
+   case SM4_OPCODE_LOOP:
+   case SM4_OPCODE_ENDLOOP:
+   case SM4_OPCODE_RET:
+   case SM4_OPCODE_RETC:
+   case SM4_OPCODE_SYNC:
+   case SM4_OPCODE_SWITCH:
+   case SM4_OPCODE_CASE:
+   case SM4_OPCODE_HS_DECLS:
+   case SM4_OPCODE_HS_CONTROL_POINT_PHASE:
+   case SM4_OPCODE_HS_FORK_PHASE:
+   case SM4_OPCODE_HS_JOIN_PHASE:
+      return 0;
+   default:
+      return 1;
+   }
+}
+
+#define TARG_CASE_1(a, b) case SM4_TARGET_##a: return TEX_TARGET_##b;
+#define TARG_CASE_2(a, b) case SM4_TARGET_##a: \
+   return dc ? TEX_TARGET_##b##_SHADOW : TEX_TARGET_##b
+
+TexTarget
+Converter::cvtTexTarget(enum sm4_target targ,
+                        enum sm4_opcode op, operation *opr) const
+{
+   bool dc = (op == SM4_OPCODE_SAMPLE_C ||
+              op == SM4_OPCODE_SAMPLE_C_LZ ||
+              op == SM4_OPCODE_GATHER4_C ||
+              op == SM4_OPCODE_GATHER4_PO_C);
+
+   if (opr) {
+      switch (targ) {
+      case SM4_TARGET_RAW_BUFFER:        *opr = OP_LOAD; break;
+      case SM4_TARGET_STRUCTURED_BUFFER: *opr = OP_SULD; break;
+      default:
+         *opr = OP_TEX;
+         break;
+      }
+   }
+
+   switch (targ) {
+   TARG_CASE_1(UNKNOWN, 2D);
+   TARG_CASE_2(TEXTURE1D,         1D);
+   TARG_CASE_2(TEXTURE2D,         2D);
+   TARG_CASE_1(TEXTURE2DMS,       2D_MS);
+   TARG_CASE_1(TEXTURE3D,         3D);
+   TARG_CASE_2(TEXTURECUBE,       CUBE);
+   TARG_CASE_2(TEXTURE1DARRAY,    1D_ARRAY);
+   TARG_CASE_2(TEXTURE2DARRAY,    2D_ARRAY);
+   TARG_CASE_1(TEXTURE2DMSARRAY,  2D_MS_ARRAY);
+   TARG_CASE_2(TEXTURECUBEARRAY,  CUBE_ARRAY);
+   TARG_CASE_1(BUFFER,            BUFFER);
+   TARG_CASE_1(RAW_BUFFER,        BUFFER);
+   TARG_CASE_1(STRUCTURED_BUFFER, BUFFER);
+   default:
+      assert(!"invalid SM4 texture target");
+      return dc ? TEX_TARGET_2D_SHADOW : TEX_TARGET_2D;
+   }
+}
+
+static inline uint32_t
+getSVIndex(enum sm4_sv sv)
+{
+   switch (sv) {
+   case SM4_SV_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: return 0;
+   case SM4_SV_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: return 1;
+   case SM4_SV_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: return 2;
+   case SM4_SV_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: return 3;
+
+   case SM4_SV_FINAL_QUAD_U_INSIDE_TESSFACTOR: return 4;
+   case SM4_SV_FINAL_QUAD_V_INSIDE_TESSFACTOR: return 5;
+
+   case SM4_SV_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: return 0;
+   case SM4_SV_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: return 1;
+   case SM4_SV_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: return 2;
+
+   case SM4_SV_FINAL_TRI_INSIDE_TESSFACTOR: return 4;
+
+   case SM4_SV_FINAL_LINE_DETAIL_TESSFACTOR: return 0;
+
+   case SM4_SV_FINAL_LINE_DENSITY_TESSFACTOR: return 4;
+
+   default:
+      return 0;
+   }
+}
+
+SVSemantic
+Converter::cvtSemantic(enum sm4_sv sv, uint8_t &idx) const
+{
+   idx = 0;
+
+   switch (sv) {
+   case SM4_SV_UNDEFINED:     return SV_UNDEFINED;
+   case SM4_SV_POSITION:      return SV_POSITION;
+   case SM4_SV_CLIP_DISTANCE: return SV_CLIP_DISTANCE;
+   case SM4_SV_CULL_DISTANCE: return SV_CLIP_DISTANCE; // XXX: distinction
+   case SM4_SV_RENDER_TARGET_ARRAY_INDEX: return SV_LAYER;
+   case SM4_SV_VIEWPORT_ARRAY_INDEX:  return SV_VIEWPORT_INDEX;
+   case SM4_SV_VERTEX_ID:     return SV_VERTEX_ID;
+   case SM4_SV_PRIMITIVE_ID:  return SV_PRIMITIVE_ID;
+   case SM4_SV_INSTANCE_ID:   return SV_INSTANCE_ID;
+   case SM4_SV_IS_FRONT_FACE: return SV_FACE;
+   case SM4_SV_SAMPLE_INDEX:  return SV_SAMPLE_INDEX;
+
+   case SM4_SV_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR:
+   case SM4_SV_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR:
+   case SM4_SV_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR:
+   case SM4_SV_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR:
+   case SM4_SV_FINAL_QUAD_U_INSIDE_TESSFACTOR:
+   case SM4_SV_FINAL_QUAD_V_INSIDE_TESSFACTOR:
+   case SM4_SV_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR:
+   case SM4_SV_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR:
+   case SM4_SV_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR:
+   case SM4_SV_FINAL_TRI_INSIDE_TESSFACTOR:
+   case SM4_SV_FINAL_LINE_DETAIL_TESSFACTOR:
+   case SM4_SV_FINAL_LINE_DENSITY_TESSFACTOR:
+      idx = getSVIndex(sv);
+      return SV_TESS_FACTOR;
+
+   default:
+      assert(!"invalid SM4 system value");
+      return SV_UNDEFINED;
+   }
+}
+
+unsigned
+Converter::tgsiSemantic(SVSemantic sv, int index)
+{
+   switch (sv) {
+   case SV_POSITION:       return TGSI_SEMANTIC_POSITION;
+   case SV_FACE:           return TGSI_SEMANTIC_FACE;
+   case SV_LAYER:          return NV50_SEMANTIC_LAYER;
+   case SV_VIEWPORT_INDEX: return NV50_SEMANTIC_VIEWPORTINDEX;
+   case SV_POINT_SIZE:     return TGSI_SEMANTIC_PSIZE;
+   case SV_CLIP_DISTANCE:  return NV50_SEMANTIC_CLIPDISTANCE;
+   case SV_VERTEX_ID:      return NV50_SEMANTIC_VERTEXID;
+   case SV_INSTANCE_ID:    return TGSI_SEMANTIC_INSTANCEID;
+   case SV_PRIMITIVE_ID:   return TGSI_SEMANTIC_PRIMID;
+   case SV_TESS_FACTOR:    return NV50_SEMANTIC_TESSFACTOR;
+   case SV_TESS_COORD:     return NV50_SEMANTIC_TESSCOORD;
+   case SV_INVOCATION_ID:  return NV50_SEMANTIC_INVOCATIONID;
+   default:
+      return TGSI_SEMANTIC_GENERIC;
+   }
+}
+
+void
+Converter::recordSV(unsigned sn, unsigned si, unsigned mask, bool input)
+{
+   unsigned int i;
+   for (i = 0; i < info.numSysVals; ++i)
+      if (info.sv[i].sn == sn &&
+          info.sv[i].si == si)
+         return;
+   info.numSysVals = i + 1;
+   info.sv[i].sn = sn;
+   info.sv[i].si = si;
+   info.sv[i].mask = mask;
+   info.sv[i].input = input ? 1 : 0;
+}
+
+bool
+Converter::parseSignature()
+{
+   struct nv50_ir_varying *patch;
+   unsigned int i, r, n;
+
+   info.numInputs = 0;
+   info.numOutputs = 0;
+   info.numPatchConstants = 0;
+
+   for (n = 0, i = 0; i < sm4.num_params_in; ++i) {
+      r = sm4.params_in[i].Register;
+
+      info.in[r].mask |= sm4.params_in[i].ReadWriteMask;
+      // mask might be uninitialized ...
+      if (!sm4.params_in[i].ReadWriteMask)
+         info.in[r].mask = 0xf;
+      info.in[r].id = r;
+      if (info.in[r].regular) // already assigned semantic name/index
+         continue;
+      info.in[r].regular = 1;
+      info.in[r].patch = 0;
+
+      info.numInputs = MAX2(info.numInputs, r + 1);
+
+      switch (sm4.params_in[i].SystemValueType) {
+      case D3D_NAME_UNDEFINED:
+         info.in[r].sn = TGSI_SEMANTIC_GENERIC;
+         info.in[r].si = n++;
+         break;
+      case D3D_NAME_POSITION:
+         info.in[r].sn = TGSI_SEMANTIC_POSITION;
+         break;
+      case D3D_NAME_VERTEX_ID:
+         info.in[r].sn = NV50_SEMANTIC_VERTEXID;
+         break;
+      case D3D_NAME_PRIMITIVE_ID:
+         info.in[r].sn = TGSI_SEMANTIC_PRIMID;
+         // no corresponding output
+         recordSV(TGSI_SEMANTIC_PRIMID, 0, 1, true);
+         break;
+      case D3D_NAME_INSTANCE_ID:
+         info.in[r].sn = TGSI_SEMANTIC_INSTANCEID;
+         break;
+      case D3D_NAME_IS_FRONT_FACE:
+         info.in[r].sn = TGSI_SEMANTIC_FACE;
+         // no corresponding output
+         recordSV(TGSI_SEMANTIC_FACE, 0, 1, true);
+         break;
+      default:
+         assert(!"invalid/unsupported input linkage semantic");
+         break;
+      }
+   }
+
+   for (n = 0, i = 0; i < sm4.num_params_out; ++i) {
+      r = sm4.params_out[i].Register;
+
+      info.out[r].mask |= ~sm4.params_out[i].ReadWriteMask;
+      info.out[r].id = r;
+      if (info.out[r].regular) // already assigned semantic name/index
+         continue;
+      info.out[r].regular = 1;
+      info.out[r].patch = 0;
+
+      info.numOutputs = MAX2(info.numOutputs, r + 1);
+
+      switch (sm4.params_out[i].SystemValueType) {
+      case D3D_NAME_UNDEFINED:
+         if (prog->getType() == Program::TYPE_FRAGMENT) {
+            info.out[r].sn = TGSI_SEMANTIC_COLOR;
+            info.out[r].si = info.prop.fp.numColourResults++;
+         } else {
+            info.out[r].sn = TGSI_SEMANTIC_GENERIC;
+            info.out[r].si = n++;
+         }
+         break;
+      case D3D_NAME_POSITION:
+      case D3D_NAME_DEPTH:
+      case D3D_NAME_DEPTH_GREATER_EQUAL:
+      case D3D_NAME_DEPTH_LESS_EQUAL:
+         info.out[r].sn = TGSI_SEMANTIC_POSITION;
+         info.io.fragDepth = r;
+         break;
+      case D3D_NAME_CULL_DISTANCE:
+      case D3D_NAME_CLIP_DISTANCE:
+         info.out[r].sn = NV50_SEMANTIC_CLIPDISTANCE;
+         info.out[r].si = sm4.params_out[i].SemanticIndex;
+         break;
+      case D3D_NAME_RENDER_TARGET_ARRAY_INDEX:
+         info.out[r].sn = NV50_SEMANTIC_LAYER;
+         break;
+      case D3D_NAME_VIEWPORT_ARRAY_INDEX:
+         info.out[r].sn = NV50_SEMANTIC_VIEWPORTINDEX;
+         break;
+      case D3D_NAME_PRIMITIVE_ID:
+         info.out[r].sn = TGSI_SEMANTIC_PRIMID;
+         break;
+      case D3D_NAME_TARGET:
+         info.out[r].sn = TGSI_SEMANTIC_COLOR;
+         info.out[r].si = sm4.params_out[i].SemanticIndex;
+         break;
+      case D3D_NAME_COVERAGE:
+         info.out[r].sn = NV50_SEMANTIC_SAMPLEMASK;
+         info.io.sampleMask = r;
+         break;
+      case D3D_NAME_SAMPLE_INDEX:
+      default:
+         assert(!"invalid/unsupported output linkage semantic");
+         break;
+      }
+   }
+
+   if (prog->getType() == Program::TYPE_TESSELLATION_EVAL)
+      patch = &info.in[info.numInputs];
+   else
+      patch = &info.out[info.numOutputs];
+
+   for (n = 0, i = 0; i < sm4.num_params_patch; ++i) {
+      r = sm4.params_patch[i].Register;
+
+      patch[r].mask |= sm4.params_patch[i].Mask;
+      patch[r].id = r;
+      if (patch[r].regular) // already visited
+         continue;
+      patch[r].regular = 1;
+      patch[r].patch = 1;
+
+      info.numPatchConstants = MAX2(info.numPatchConstants, r + 1);
+
+      switch (sm4.params_patch[i].SystemValueType) {
+      case D3D_NAME_UNDEFINED:
+         patch[r].sn = TGSI_SEMANTIC_GENERIC;
+         patch[r].si = n++;
+         break;
+      case D3D_NAME_FINAL_QUAD_EDGE_TESSFACTOR:
+      case D3D_NAME_FINAL_TRI_EDGE_TESSFACTOR:
+      case D3D_NAME_FINAL_LINE_DETAIL_TESSFACTOR:
+         patch[r].sn = NV50_SEMANTIC_TESSFACTOR;
+         patch[r].si = sm4.params_patch[i].SemanticIndex;
+         break;
+      case D3D_NAME_FINAL_QUAD_INSIDE_TESSFACTOR:
+      case D3D_NAME_FINAL_TRI_INSIDE_TESSFACTOR:
+      case D3D_NAME_FINAL_LINE_DENSITY_TESSFACTOR:
+         patch[r].sn = NV50_SEMANTIC_TESSFACTOR;
+         patch[r].si = sm4.params_patch[i].SemanticIndex + 4;
+         break;
+      default:
+         assert(!"invalid patch-constant linkage semantic");
+         break;
+      }
+   }
+   if (prog->getType() == Program::TYPE_TESSELLATION_EVAL)
+      info.numInputs += info.numPatchConstants;
+   else
+      info.numOutputs += info.numPatchConstants;
+
+   return true;
+}
+
+bool
+Converter::inspectDeclaration(const sm4_dcl& dcl)
+{
+   int idx = -1;
+   enum sm4_interpolation ipa_mode;
+
+   if (dcl.op.get() && dcl.op->is_index_simple(0))
+      idx = dcl.op->indices[0].disp;
+
+   switch (dcl.opcode) {
+   case SM4_OPCODE_DCL_SAMPLER:
+      assert(idx >= 0);
+      shadow[idx] = dcl.dcl_sampler.shadow;
+      break;
+   case SM4_OPCODE_DCL_RESOURCE:
+   {
+      enum sm4_target targ = (enum sm4_target)dcl.dcl_resource.target;
+
+      assert(idx >= 0 && idx < NV50_IR_MAX_RESOURCES);
+      resourceType[idx][0] = cvtTexTarget(targ, SM4_OPCODE_SAMPLE, NULL);
+      resourceType[idx][1] = cvtTexTarget(targ, SM4_OPCODE_SAMPLE_C, NULL);
+   }
+      break;
+   case SM4_OPCODE_DCL_CONSTANT_BUFFER:
+      // nothing to do
+      break;
+   case SM4_OPCODE_CUSTOMDATA:
+      info.immd.bufSize = dcl.num * 4;
+      info.immd.buf = (uint32_t *)MALLOC(info.immd.bufSize);
+      memcpy(info.immd.buf, dcl.data, info.immd.bufSize);
+      break;
+   case SM4_OPCODE_DCL_INDEX_RANGE:
+      // XXX: ?
+      break;
+   case SM4_OPCODE_DCL_INPUT_PS_SGV:
+   case SM4_OPCODE_DCL_INPUT_PS_SIV:
+   case SM4_OPCODE_DCL_INPUT_PS:
+   {
+      assert(idx >= 0 && idx < info.numInputs);
+      ipa_mode = (enum sm4_interpolation)dcl.dcl_input_ps.interpolation;
+      interpMode[idx] = cvtInterpMode(ipa_mode);
+      setVaryingInterpMode(&info.in[idx], interpMode[idx]);
+   }
+      break;
+   case SM4_OPCODE_DCL_INPUT_SGV:
+   case SM4_OPCODE_DCL_INPUT_SIV:
+   case SM4_OPCODE_DCL_INPUT:
+      if (dcl.op->file == SM4_FILE_INPUT_DOMAIN_POINT) {
+         idx = info.numInputs++;
+         info.in[idx].sn = NV50_SEMANTIC_TESSCOORD;
+         info.in[idx].mask = dcl.op->mask;
+      }
+      // rest handled in parseSignature
+      break;
+   case SM4_OPCODE_DCL_OUTPUT_SGV:
+   case SM4_OPCODE_DCL_OUTPUT_SIV:
+      switch (dcl.sv) {
+      case SM4_SV_POSITION:
+         assert(prog->getType() != Program::TYPE_FRAGMENT);
+         break;
+      case SM4_SV_CULL_DISTANCE: // XXX: order ?
+         info.io.cullDistanceMask |= 1 << info.io.clipDistanceCount;
+        // fall through
+      case SM4_SV_CLIP_DISTANCE:
+         info.io.clipDistanceCount++;
+         break;
+      default:
+         break;
+      }
+      switch (dcl.op->file) {
+      case SM4_FILE_OUTPUT_DEPTH_LESS_EQUAL:
+      case SM4_FILE_OUTPUT_DEPTH_GREATER_EQUAL:
+      case SM4_FILE_OUTPUT_DEPTH:
+         if (info.io.fragDepth < 0xff)
+            break;
+         idx = info.io.fragDepth = info.numOutputs++;
+         info.out[idx].sn = TGSI_SEMANTIC_POSITION;
+         break;
+      case SM4_FILE_OUTPUT_COVERAGE_MASK:
+         if (info.io.sampleMask < 0xff)
+            break;
+         idx = info.io.sampleMask = info.numOutputs++;
+         info.out[idx].sn = NV50_SEMANTIC_SAMPLEMASK;
+         break;
+      default:
+         break;
+      }
+      break;
+   case SM4_OPCODE_DCL_OUTPUT:
+      // handled in parseSignature
+      break;
+   case SM4_OPCODE_DCL_TEMPS:
+      nrRegVals += dcl.num;
+      break;
+   case SM4_OPCODE_DCL_INDEXABLE_TEMP:
+      nrArrays++;
+      break;
+   case SM4_OPCODE_DCL_GLOBAL_FLAGS:
+      if (prog->getType() == Program::TYPE_FRAGMENT)
+         info.prop.fp.earlyFragTests = dcl.dcl_global_flags.early_depth_stencil;
+      break;
+
+   case SM4_OPCODE_DCL_FUNCTION_BODY:
+      break;
+   case SM4_OPCODE_DCL_FUNCTION_TABLE:
+      break;
+   case SM4_OPCODE_DCL_INTERFACE:
+      break;
+
+      // GP
+   case SM4_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY:
+      info.prop.gp.outputPrim = g3dPrim(
+         dcl.dcl_gs_output_primitive_topology.primitive_topology);
+      break;
+   case SM4_OPCODE_DCL_GS_INPUT_PRIMITIVE:
+      info.prop.gp.inputPrim = g3dPrim(dcl.dcl_gs_input_primitive.primitive);
+      break;
+   case SM4_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
+      info.prop.gp.maxVertices = dcl.num;
+      break;
+   case SM4_OPCODE_DCL_GS_INSTANCE_COUNT:
+      info.prop.gp.instanceCount = dcl.num;
+      break;
+   case SM4_OPCODE_DCL_STREAM:
+      break;
+
+      // TCP/TEP
+   case SM4_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT:
+      info.prop.tp.inputPatchSize =
+         dcl.dcl_input_control_point_count.control_points;
+      break;
+   case SM4_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT:
+      info.prop.tp.outputPatchSize =
+         dcl.dcl_output_control_point_count.control_points;
+      break;
+   case SM4_OPCODE_DCL_TESS_DOMAIN:
+      switch (dcl.dcl_tess_domain.domain) {
+      case D3D_TESSELLATOR_DOMAIN_ISOLINE:
+         info.prop.tp.domain = PIPE_PRIM_LINES;
+         break;
+      case D3D_TESSELLATOR_DOMAIN_TRI:
+         info.prop.tp.domain = PIPE_PRIM_TRIANGLES;
+         break;
+      case D3D_TESSELLATOR_DOMAIN_QUAD:
+         info.prop.tp.domain = PIPE_PRIM_QUADS;
+         break;
+      case D3D_TESSELLATOR_DOMAIN_UNDEFINED:
+      default:
+         info.prop.tp.domain = PIPE_PRIM_MAX;
+         break;
+      }
+      break;
+   case SM4_OPCODE_DCL_TESS_PARTITIONING:
+      switch (dcl.dcl_tess_partitioning.partitioning) {
+      case D3D_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD:
+         info.prop.tp.partitioning = NV50_TESS_PART_FRACT_ODD;
+         break;
+      case D3D_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN:
+         info.prop.tp.partitioning = NV50_TESS_PART_FRACT_EVEN;
+         break;
+      case D3D_TESSELLATOR_PARTITIONING_POW2:
+         info.prop.tp.partitioning = NV50_TESS_PART_POW2;
+         break;
+      case D3D_TESSELLATOR_PARTITIONING_INTEGER:
+      case D3D_TESSELLATOR_PARTITIONING_UNDEFINED:
+      default:
+         info.prop.tp.partitioning = NV50_TESS_PART_INTEGER;
+         break;
+      }
+      break;
+   case SM4_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE:
+      switch (dcl.dcl_tess_output_primitive.primitive) {
+      case D3D_TESSELLATOR_OUTPUT_LINE:
+         info.prop.tp.outputPrim = PIPE_PRIM_LINES;
+         break;
+      case D3D_TESSELLATOR_OUTPUT_TRIANGLE_CW:
+         info.prop.tp.outputPrim = PIPE_PRIM_TRIANGLES;
+         info.prop.tp.winding = +1;
+         break;
+      case D3D_TESSELLATOR_OUTPUT_TRIANGLE_CCW:
+         info.prop.tp.outputPrim = PIPE_PRIM_TRIANGLES;
+         info.prop.tp.winding = -1;
+         break;
+      case D3D_TESSELLATOR_OUTPUT_POINT:
+         info.prop.tp.outputPrim = PIPE_PRIM_POINTS;
+         break;
+      case D3D_TESSELLATOR_OUTPUT_UNDEFINED:
+      default:
+         info.prop.tp.outputPrim = PIPE_PRIM_MAX;
+         break;
+      }
+      break;
+
+   case SM4_OPCODE_HS_FORK_PHASE:
+      ++subPhaseCnt[0];
+      phase = 1;
+      break;
+   case SM4_OPCODE_HS_JOIN_PHASE:
+      phase = 2;
+      ++subPhaseCnt[1];
+      break;
+   case SM4_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
+   case SM4_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT:
+   case SM4_OPCODE_DCL_HS_MAX_TESSFACTOR:
+      break;
+
+      // weird stuff
+   case SM4_OPCODE_DCL_THREAD_GROUP:
+   case SM4_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED:
+   case SM4_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW:
+   case SM4_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED:
+   case SM4_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW:
+   case SM4_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED:
+   case SM4_OPCODE_DCL_RESOURCE_RAW:
+   case SM4_OPCODE_DCL_RESOURCE_STRUCTURED:
+      ERROR("unhandled declaration\n");
+      abort();
+      return false;
+
+   default:
+      assert(!"invalid SM4 declaration");
+      return false;
+   }
+   return true;
+}
+
+void
+Converter::allocateValues()
+{
+   lData = new DataArray[nrArrays];
+
+   for (unsigned int i = 0; i < nrArrays; ++i)
+      lData[i].setParent(this);
+
+   tData32.setup(0, nrRegVals, 4, 4, FILE_GPR);
+   tData64.setup(0, nrRegVals, 2, 8, FILE_GPR);
+
+   if (prog->getType() == Program::TYPE_FRAGMENT)
+      oData.setup(0, info.numOutputs, 4, 4, FILE_GPR);
+}
+
+bool Converter::handleDeclaration(const sm4_dcl& dcl)
+{
+   switch (dcl.opcode) {
+   case SM4_OPCODE_DCL_INDEXABLE_TEMP:
+      lData[nrArrays++].setup(arrayVol,
+                              dcl.indexable_temp.num, dcl.indexable_temp.comps,
+                              4, FILE_MEMORY_LOCAL);
+      arrayVol += dcl.indexable_temp.num * dcl.indexable_temp.comps * 4;
+      break;
+   case SM4_OPCODE_HS_FORK_PHASE:
+      if (subPhaseCnt[0])
+         phaseInstCnt[0][subPhaseCnt[0]] = phaseInstCnt[0][subPhaseCnt[0] - 1];
+      ++subPhaseCnt[0];
+      break;
+   case SM4_OPCODE_HS_JOIN_PHASE:
+      if (subPhaseCnt[1])
+         phaseInstCnt[1][subPhaseCnt[1]] = phaseInstCnt[1][subPhaseCnt[1] - 1];
+      ++subPhaseCnt[1];
+      break;
+   case SM4_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
+      phaseInstCnt[0][subPhaseCnt[0] - 1] = dcl.num;
+      break;
+   case SM4_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT:
+      phaseInstCnt[1][subPhaseCnt[1] - 1] = dcl.num;
+      break;
+
+   default:
+      break; // already handled in inspection
+   }
+
+   return true;
+}
+
+Symbol *
+Converter::iSym(int i, int c)
+{
+   if (info.in[i].regular) {
+      return mkSymbol(FILE_SHADER_INPUT, 0, sTy, info.in[i].slot[c] * 4);
+   } else {
+      return mkSysVal(tgsi::irSemantic(info.in[i].sn), info.in[i].si);
+   }
+}
+
+Symbol *
+Converter::oSym(int i, int c)
+{
+   if (info.out[i].regular) {
+      return mkSymbol(FILE_SHADER_OUTPUT, 0, dTy, info.out[i].slot[c] * 4);
+   } else {
+      return mkSysVal(tgsi::irSemantic(info.out[i].sn), info.out[i].si);
+   }
+}
+
+Value *
+Converter::getSrcPtr(int s, int dim, int shl)
+{
+   if (srcPtr[s][dim])
+      return srcPtr[s][dim];
+
+   sm4_op *op = insn->ops[s + nDstOpnds]->indices[dim].reg.get();
+
+   if (!op)
+      return NULL;
+
+   Value *index = src(*op, 0, s);
+
+   srcPtr[s][dim] = index;
+   if (shl)
+      srcPtr[s][dim] = mkOp2v(OP_SHL, TYPE_U32, getSSA(), index, mkImm(shl));
+   return srcPtr[s][dim];
+}
+
+Value *
+Converter::getDstPtr(int d, int dim, int shl)
+{
+   assert(d == 0);
+   if (dstPtr[dim])
+      return dstPtr[dim];
+
+   sm4_op *op = insn->ops[d]->indices[dim].reg.get();
+   if (!op)
+      return NULL;
+
+   Value *index = src(*op, 0, d);
+   if (shl)
+      index = mkOp2v(OP_SHL, TYPE_U32, getSSA(), index, mkImm(shl));
+
+   return (dstPtr[dim] = index);
+}
+
+Value *
+Converter::getVtxPtr(int s)
+{
+   assert(s < 3);
+   if (vtxBase[s])
+      return vtxBase[s];
+
+   sm4_op *op = insn->ops[s + nDstOpnds].get();
+   if (!op)
+      return NULL;
+   int idx = op->indices[0].disp;
+
+   vtxBase[s] = getSrcPtr(s, 0, 0);
+   vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(), mkImm(idx), vtxBase[s]);
+   return vtxBase[s];
+}
+
+Value *
+Converter::src(int i, int c)
+{
+   return src(*insn->ops[i + nDstOpnds], c, i);
+}
+
+Value *
+Converter::dst(int i, int c)
+{
+   return dst(*insn->ops[i], c, i);
+}
+
+void
+Converter::saveDst(int i, int c, Value *value)
+{
+   if (insn->insn.sat)
+      mkOp1(OP_SAT, dTy, value, value);
+   return saveDst(*insn->ops[i], c, value, i);
+}
+
+Value *
+Converter::interpolate(const sm4_op& op, int c, int i)
+{
+   int idx = op.indices[0].disp;
+   int swz = op.swizzle[c];
+   operation opr =
+      (info.in[idx].linear || info.in[idx].flat) ? OP_LINTERP : OP_PINTERP;
+
+   Value *ptr = getSrcPtr(i, 0, 4);
+
+   Instruction *insn = new_Instruction(func, opr, TYPE_F32);
+
+   insn->setDef(0, getScratch());
+   insn->setSrc(0, iSym(idx, swz));
+   if (opr == OP_PINTERP)
+      insn->setSrc(1, fragCoord[3]);
+   if (ptr)
+      insn->setIndirect(0, 0, ptr);
+
+   insn->setInterpolate(interpMode[idx]);
+
+   bb->insertTail(insn);
+   return insn->getDef(0);
+}
+
+Value *
+Converter::src(const sm4_op& op, int c, int s)
+{
+   const int size = typeSizeof(sTy);
+
+   Instruction *ld;
+   Value *res, *ptr, *vtx;
+   int idx, dim, off;
+   const int swz = op.swizzle[c];
+
+   switch (op.file) {
+   case SM4_FILE_IMMEDIATE32:
+      res = loadImm(NULL, (uint32_t)op.imm_values[swz].u32);
+      break;
+   case SM4_FILE_IMMEDIATE64:
+      assert(c < 2);
+      res = loadImm(NULL, op.imm_values[swz].u64);
+      break;
+   case SM4_FILE_TEMP:
+      assert(op.is_index_simple(0));
+      idx = op.indices[0].disp;
+      if (size == 8)
+         res = tData64.load(idx, swz, NULL);
+      else
+         res = tData32.load(idx, swz, NULL);
+      break;
+   case SM4_FILE_INPUT:
+   case SM4_FILE_INPUT_CONTROL_POINT:
+   case SM4_FILE_INPUT_PATCH_CONSTANT:
+      if (prog->getType() == Program::TYPE_FRAGMENT)
+         return interpolate(op, c, s);
+
+      idx = 0;
+      if (op.file == SM4_FILE_INPUT_PATCH_CONSTANT)
+         idx = info.numInputs - info.numPatchConstants;
+
+      if (op.num_indices == 2) {
+         vtx = getVtxPtr(s);
+         ptr = getSrcPtr(s, 1, 4);
+         idx += op.indices[1].disp;
+         res = getSSA();
+         ld = mkOp1(OP_VFETCH, TYPE_U32, res, iSym(idx, swz));
+         ld->setIndirect(0, 0, ptr);
+         ld->setIndirect(0, 1, vtx);
+      } else {
+         idx += op.indices[0].disp;
+         res = mkLoad(sTy, iSym(idx, swz), getSrcPtr(s, 0, 4));
+      }
+      if (op.file == SM4_FILE_INPUT_PATCH_CONSTANT)
+         res->defs->getInsn()->perPatch = 1;
+      break;
+   case SM4_FILE_CONSTANT_BUFFER:
+      assert(op.num_indices == 2);
+      assert(op.is_index_simple(0));
+
+      ptr = getSrcPtr(s, 1, 4);
+      dim = op.indices[0].disp;
+      off = (op.indices[1].disp * 4 + swz) * (sTy == TYPE_F64 ? 8 : 4);
+
+      res = mkLoad(sTy, mkSymbol(FILE_MEMORY_CONST, dim, sTy, off), ptr);
+      break;
+   case SM4_FILE_IMMEDIATE_CONSTANT_BUFFER:
+      ptr = getSrcPtr(s, 0, 4);
+      off = (op.indices[0].disp * 4 + swz) * 4;
+      res = mkLoad(sTy, mkSymbol(FILE_MEMORY_CONST, 14, sTy, off), ptr);
+      break;
+   case SM4_FILE_INDEXABLE_TEMP:
+   {
+      assert(op.is_index_simple(0));
+      int a = op.indices[0].disp;
+      idx = op.indices[1].disp;
+      res = lData[a].load(idx, swz, getSrcPtr(s, 1, 4));
+   }
+      break;
+   case SM4_FILE_INPUT_PRIMITIVEID:
+      recordSV(TGSI_SEMANTIC_PRIMID, 0, 1, true);
+      res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_PRIMITIVE_ID, 0));
+      break;
+   case SM4_FILE_INPUT_GS_INSTANCE_ID:
+   case SM4_FILE_OUTPUT_CONTROL_POINT_ID:
+      recordSV(NV50_SEMANTIC_INVOCATIONID, 0, 1, true);
+      res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0));
+      break;
+   case SM4_FILE_CYCLE_COUNTER:
+      res =
+         mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_CLOCK, swz ? 1 : 0));
+      break;
+   case SM4_FILE_INPUT_FORK_INSTANCE_ID:
+   case SM4_FILE_INPUT_JOIN_INSTANCE_ID:
+   {
+      phaseInstanceUsed = true;
+      if (unrollPhase)
+         return loadImm(NULL, phaseInstance);
+      const unsigned int cnt = phaseInstCnt[phase - 1][subPhase];
+      res = getScratch();
+      res = mkOp1v(OP_RDSV, TYPE_U32, res, mkSysVal(SV_INVOCATION_ID, 0));
+      res = mkOp2v(OP_MIN, TYPE_U32, res, res, loadImm(NULL, cnt - 1));
+   }
+      break;
+   case SM4_FILE_INPUT_DOMAIN_POINT:
+      assert(swz < 3);
+      res = domainPt[swz];
+      break;
+   case SM4_FILE_THREAD_GROUP_SHARED_MEMORY:
+      off = (op.indices[0].disp * 4 + swz) * (sTy == TYPE_F64 ? 8 : 4);
+      ptr = getSrcPtr(s, 0, 4);
+      res = mkLoad(sTy, mkSymbol(FILE_MEMORY_SHARED, 0, sTy, off), ptr);
+      break;
+   case SM4_FILE_RESOURCE:
+   case SM4_FILE_SAMPLER:
+   case SM4_FILE_UNORDERED_ACCESS_VIEW:
+      return NULL;
+   case SM4_FILE_INPUT_THREAD_ID:
+      res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_TID, swz));
+      break;
+   case SM4_FILE_INPUT_THREAD_GROUP_ID:
+      res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_CTAID, swz));
+      break;
+   case SM4_FILE_FUNCTION_INPUT:
+   case SM4_FILE_INPUT_THREAD_ID_IN_GROUP:
+      assert(!"unhandled source file");
+      return NULL;
+   default:
+      assert(!"invalid source file");
+      return NULL;
+   }
+
+   if (op.abs)
+      res = mkOp1v(OP_ABS, sTy, getSSA(res->reg.size), res);
+   if (op.neg)
+      res = mkOp1v(OP_NEG, sTy, getSSA(res->reg.size), res);
+   return res;
+}
+
+Value *
+Converter::dst(const sm4_op &op, int c, int i)
+{
+   switch (op.file) {
+   case SM4_FILE_TEMP:
+      return tData32.acquire(op.indices[0].disp, c);
+   case SM4_FILE_INDEXABLE_TEMP:
+      return getScratch();
+   case SM4_FILE_OUTPUT:
+      if (prog->getType() == Program::TYPE_FRAGMENT)
+         return oData.acquire(op.indices[0].disp, c);
+      return getScratch();
+   case SM4_FILE_NULL:
+      return NULL;
+   case SM4_FILE_OUTPUT_DEPTH:
+   case SM4_FILE_OUTPUT_DEPTH_GREATER_EQUAL:
+   case SM4_FILE_OUTPUT_DEPTH_LESS_EQUAL:
+   case SM4_FILE_OUTPUT_COVERAGE_MASK:
+      return getScratch();
+   case SM4_FILE_IMMEDIATE32:
+   case SM4_FILE_IMMEDIATE64:
+   case SM4_FILE_CONSTANT_BUFFER:
+   case SM4_FILE_RESOURCE:
+   case SM4_FILE_SAMPLER:
+   case SM4_FILE_UNORDERED_ACCESS_VIEW:
+      assert(!"invalid destination file");
+      return NULL;
+   default:
+      assert(!"invalid file");
+      return NULL;
+   }
+}
+
+void
+Converter::saveFragDepth(operation op, Value *value)
+{
+   if (op == OP_MIN || op == OP_MAX) {
+      Value *zIn;
+      zIn = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 2));
+      value = mkOp2v(op, TYPE_F32, getSSA(), value, zIn);
+   }
+   oData.store(info.io.fragDepth, 2, NULL, value);
+}
+
+void
+Converter::saveDst(const sm4_op &op, int c, Value *value, int s)
+{
+   Symbol *sym;
+   Instruction *st;
+   int a, idx;
+
+   switch (op.file) {
+   case SM4_FILE_TEMP:
+      idx = op.indices[0].disp;
+      tData32.store(idx, c, NULL, value);
+      break;
+   case SM4_FILE_INDEXABLE_TEMP:
+      a = op.indices[0].disp;
+      idx = op.indices[1].disp;
+      // FIXME: shift is wrong, depends in lData
+      lData[a].store(idx, c, getDstPtr(s, 1, 4), value);
+      break;
+   case SM4_FILE_OUTPUT:
+      assert(op.num_indices == 1);
+      idx = op.indices[0].disp;
+      if (prog->getType() == Program::TYPE_FRAGMENT) {
+         oData.store(idx, c, NULL, value);
+      } else {
+         if (phase)
+            idx += info.numOutputs - info.numPatchConstants;
+         const int shl = (info.out[idx].sn == NV50_SEMANTIC_TESSFACTOR) ? 2 : 4;
+         sym = oSym(idx, c);
+         if (sym->reg.file == FILE_SHADER_OUTPUT)
+            st = mkStore(OP_EXPORT, dTy, sym, getDstPtr(s, 0, shl), value);
+         else
+            st = mkStore(OP_WRSV, dTy, sym, getDstPtr(s, 0, 2), value);
+         st->perPatch = phase ? 1 : 0;
+      }
+      break;
+   case SM4_FILE_OUTPUT_DEPTH_GREATER_EQUAL:
+      saveFragDepth(OP_MAX, value);
+      break;
+   case SM4_FILE_OUTPUT_DEPTH_LESS_EQUAL:
+      saveFragDepth(OP_MIN, value);
+      break;
+   case SM4_FILE_OUTPUT_DEPTH:
+      saveFragDepth(OP_NOP, value);
+      break;
+   case SM4_FILE_OUTPUT_COVERAGE_MASK:
+      oData.store(info.io.sampleMask, 0, NULL, value);
+      break;
+   case SM4_FILE_IMMEDIATE32:
+   case SM4_FILE_IMMEDIATE64:
+   case SM4_FILE_INPUT:
+   case SM4_FILE_CONSTANT_BUFFER:
+   case SM4_FILE_RESOURCE:
+   case SM4_FILE_SAMPLER:
+      assert(!"invalid destination file");
+      return;
+   default:
+      assert(!"invalid file");
+      return;
+   }
+}
+
+void
+Converter::emitTex(Value *dst0[4], TexInstruction *tex, const uint8_t swz[4])
+{
+   Value *res[4] = { NULL, NULL, NULL, NULL };
+   unsigned int c, d;
+
+   for (c = 0; c < 4; ++c)
+      if (dst0[c])
+         tex->tex.mask |= 1 << swz[c];
+   for (d = 0, c = 0; c < 4; ++c)
+      if (tex->tex.mask & (1 << c))
+         tex->setDef(d++, (res[c] = getScratch()));
+
+   bb->insertTail(tex);
+
+   if (insn->opcode == SM4_OPCODE_RESINFO) {
+      if (tex->tex.target.getDim() == 1) {
+        res[2] = loadImm(NULL, 0);
+         if (!tex->tex.target.isArray())
+            res[1] = res[2];
+      } else
+      if (tex->tex.target.getDim() == 2 && !tex->tex.target.isArray()) {
+         res[2] = loadImm(NULL, 0);
+      }
+      for (c = 0; c < 4; ++c) {
+         if (!dst0[c])
+            continue;
+         Value *src = res[swz[c]];
+         assert(src);
+         switch (insn->insn.resinfo_return_type) {
+         case 0:
+            mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_U32, src);
+            break;
+         case 1:
+            mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_U32, src);
+            if (swz[c] < tex->tex.target.getDim())
+               mkOp1(OP_RCP, TYPE_F32, dst0[c], dst0[c]);
+            break;
+         default:
+            mkMov(dst0[c], src);
+            break;
+         }
+      }
+   } else {
+      for (c = 0; c < 4; ++c)
+         if (dst0[c])
+            mkMov(dst0[c], res[swz[c]]);
+   }
+}
+
+void
+Converter::handleQUERY(Value *dst0[4], enum TexQuery query)
+{
+   TexInstruction *texi = new_TexInstruction(func, OP_TXQ);
+   texi->tex.query = query;
+
+   assert(insn->ops[2]->file == SM4_FILE_RESOURCE); // TODO: UAVs
+
+   const int rOp = (query == TXQ_DIMS) ? 2 : 1;
+   const int sOp = (query == TXQ_DIMS) ? 0 : 1;
+
+   const int tR = insn->ops[rOp]->indices[0].disp;
+
+   texi->setTexture(resourceType[tR][0], tR, 0);
+
+   texi->setSrc(0, src(sOp, 0)); // mip level or sample index
+
+   emitTex(dst0, texi, insn->ops[rOp]->swizzle);
+}
+
+void
+Converter::handleLOAD(Value *dst0[4])
+{
+   TexInstruction *texi = new_TexInstruction(func, OP_TXF);
+   unsigned int c;
+
+   const int tR = insn->ops[2]->indices[0].disp;
+
+   texi->setTexture(resourceType[tR][0], tR, 0);
+
+   for (c = 0; c < texi->tex.target.getArgCount(); ++c)
+      texi->setSrc(c, src(0, c));
+
+   if (texi->tex.target == TEX_TARGET_BUFFER) {
+      texi->tex.levelZero = true;
+   } else {
+      texi->setSrc(c++, src(0, 3));
+      for (c = 0; c < 3; ++c) {
+         texi->tex.offset[0][c] = insn->sample_offset[c];
+        if (texi->tex.offset[0][c])
+            texi->tex.useOffsets = 1;
+      }
+   }
+
+   emitTex(dst0, texi, insn->ops[2]->swizzle);
+}
+
+// order of nv50 ir sources: x y z/layer lod/bias dc
+void
+Converter::handleSAMPLE(operation opr, Value *dst0[4])
+{
+   TexInstruction *texi = new_TexInstruction(func, opr);
+   unsigned int c, s;
+   Value *arg[4], *src0[4];
+   Value *val;
+   Value *lod = NULL, *dc = NULL;
+
+   const int tR = insn->ops[2]->indices[0].disp;
+   const int tS = insn->ops[3]->indices[0].disp;
+
+   TexInstruction::Target tgt = resourceType[tR][shadow[tS] ? 1 : 0];
+
+   for (c = 0; c < tgt.getArgCount(); ++c)
+      arg[c] = src0[c] = src(0, c);
+
+   if (insn->opcode == SM4_OPCODE_SAMPLE_L ||
+       insn->opcode == SM4_OPCODE_SAMPLE_B) {
+      lod = src(3, 0);
+   } else
+   if (insn->opcode == SM4_OPCODE_SAMPLE_C ||
+       insn->opcode == SM4_OPCODE_SAMPLE_C_LZ) {
+      dc = src(3, 0);
+      if (insn->opcode == SM4_OPCODE_SAMPLE_C_LZ)
+         texi->tex.levelZero = true;
+   } else
+   if (insn->opcode == SM4_OPCODE_SAMPLE_D) {
+      for (c = 0; c < tgt.getDim(); ++c) {
+         texi->dPdx[c] = src(3, c);
+         texi->dPdy[c] = src(4, c);
+      }
+   }
+
+   if (tgt.isCube()) {
+      for (c = 0; c < 3; ++c)
+         src0[c] = mkOp1v(OP_ABS, TYPE_F32, getSSA(), arg[c]);
+      val = getScratch();
+      mkOp2(OP_MAX, TYPE_F32, val, src0[0], src0[1]);
+      mkOp2(OP_MAX, TYPE_F32, val, src0[2], val);
+      mkOp1(OP_RCP, TYPE_F32, val, val);
+      for (c = 0; c < 3; ++c)
+         src0[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), arg[c], val);
+   }
+
+   for (s = 0; s < tgt.getArgCount(); ++s)
+      texi->setSrc(s, src0[s]);
+   if (lod)
+      texi->setSrc(s++, lod);
+   if (dc)
+      texi->setSrc(s++, dc);
+
+   for (c = 0; c < 3; ++c) {
+      texi->tex.offset[0][c] = insn->sample_offset[c];
+      if (texi->tex.offset[0][c])
+         texi->tex.useOffsets = 1;
+   }
+
+   texi->setTexture(tgt, tR, tS);
+
+   emitTex(dst0, texi, insn->ops[2]->swizzle);
+}
+
+void
+Converter::handleDP(Value *dst0[4], int dim)
+{
+   Value *src0 = src(0, 0), *src1 = src(1, 0);
+   Value *dotp = getScratch();
+
+   assert(dim > 0);
+
+   mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1);
+   for (int c = 1; c < dim; ++c)
+      mkOp3(OP_MAD, TYPE_F32, dotp, src(0, c), src(1, c), dotp);
+
+   for (int c = 0; c < 4; ++c)
+      dst0[c] = dotp;
+}
+
+void
+Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork)
+{
+   FlowInstruction *join = new_FlowInstruction(func, OP_JOIN, NULL);
+   join->fixed = 1;
+   conv->insertHead(join);
+
+   fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv);
+   fork->insertBefore(fork->getExit(), fork->joinAt);
+}
+
+void
+Converter::finalizeShader()
+{
+   if (finalized)
+      return;
+   BasicBlock *epilogue = reinterpret_cast<BasicBlock *>(leaveBBs.pop().u.p);
+   entryBBs.pop();
+
+   finalized = true;
+
+   bb->cfg.attach(&epilogue->cfg, Graph::Edge::TREE);
+   setPosition(epilogue, true);
+
+   if (prog->getType() == Program::TYPE_FRAGMENT)
+      exportOutputs();
+
+   mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1;
+}
+
+#define FOR_EACH_DST0_ENABLED_CHANNEL32(chan)         \
+   for ((chan) = 0; (chan) < 4; ++(chan))             \
+      if (insn->ops[0].get()->mask & (1 << (chan)))
+
+#define FOR_EACH_DST0_ENABLED_CHANNEL64(chan)         \
+   for ((chan) = 0; (chan) < 2; ++(chan))             \
+      if (insn->ops[0].get()->mask & (1 << (chan)))
+
+bool
+Converter::checkDstSrcAliasing() const
+{
+   for (unsigned int d = 0; d < nDstOpnds; ++d) {
+      for (unsigned int s = nDstOpnds; s < insn->num_ops; ++s) {
+         if (insn->ops[d]->file != insn->ops[s]->file)
+            continue;
+         int i = insn->ops[s]->num_indices - 1;
+         if (i != insn->ops[d]->num_indices - 1)
+            continue;
+         if (insn->ops[d]->is_index_simple(i) &&
+             insn->ops[s]->is_index_simple(i) &&
+             insn->ops[d]->indices[i].disp == insn->ops[s]->indices[i].disp)
+            return true;
+      }
+   }
+   return false;
+}
+
+bool
+Converter::handleInstruction(unsigned int pos)
+{
+   Value *dst0[4], *rDst0[4];
+   Value *dst1[4], *rDst1[4];
+   int c, nc;
+
+   insn = sm4.insns[pos];
+   enum sm4_opcode opcode = static_cast<sm4_opcode>(insn->opcode);
+
+   operation op = cvtOpcode(opcode);
+
+   sTy = inferSrcType(opcode);
+   dTy = inferDstType(opcode);
+
+   nc = dTy == TYPE_F64 ? 2 : 4;
+
+   nDstOpnds = getDstOpndCount(opcode);
+
+   bool useScratchDst = checkDstSrcAliasing();
+
+   INFO("SM4_OPCODE_##%u, aliasing = %u\n", insn->opcode, useScratchDst);
+
+   if (nDstOpnds >= 1) {
+      for (c = 0; c < nc; ++c)
+         rDst0[c] = dst0[c] =
+            insn->ops[0].get()->mask & (1 << c) ? dst(0, c) : NULL;
+      if (useScratchDst)
+         for (c = 0; c < nc; ++c)
+            dst0[c] = rDst0[c] ? getScratch() : NULL;
+   }
+
+   if (nDstOpnds >= 2) {
+      for (c = 0; c < nc; ++c)
+         rDst1[c] = dst1[c] =
+            insn->ops[1].get()->mask & (1 << c) ? dst(1, c) : NULL;
+      if (useScratchDst)
+         for (c = 0; c < nc; ++c)
+            dst1[c] = rDst1[c] ? getScratch() : NULL;
+   }
+
+   switch (insn->opcode) {
+   case SM4_OPCODE_ADD:
+   case SM4_OPCODE_AND:
+   case SM4_OPCODE_DIV:
+   case SM4_OPCODE_IADD:
+   case SM4_OPCODE_IMAX:
+   case SM4_OPCODE_IMIN:
+   case SM4_OPCODE_MIN:
+   case SM4_OPCODE_MAX:
+   case SM4_OPCODE_MUL:
+   case SM4_OPCODE_OR:
+   case SM4_OPCODE_UMAX:
+   case SM4_OPCODE_UMIN:
+   case SM4_OPCODE_XOR:
+      FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
+         Instruction *insn = mkOp2(op, dTy, dst0[c], src(0, c), src(1, c));
+         if (dTy == TYPE_F32)
+            insn->ftz = 1;
+      }
+      break;
+
+   case SM4_OPCODE_ISHL:
+   case SM4_OPCODE_ISHR:
+   case SM4_OPCODE_USHR:
+      FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
+         Instruction *insn = mkOp2(op, dTy, dst0[c], src(0, c), src(1, c));
+         insn->subOp = NV50_IR_SUBOP_SHIFT_WRAP;
+      }
+      break;
+
+   case SM4_OPCODE_IMAD:
+   case SM4_OPCODE_MAD:
+   case SM4_OPCODE_UMAD:
+      FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
+         mkOp3(OP_MAD, dTy, dst0[c], src(0, c), src(1, c), src(2, c));
+      }
+      break;
+
+   case SM4_OPCODE_DADD:
+   case SM4_OPCODE_DMAX:
+   case SM4_OPCODE_DMIN:
+   case SM4_OPCODE_DMUL:
+      FOR_EACH_DST0_ENABLED_CHANNEL64(c) {
+         mkOp2(op, dTy, dst0[c], src(0, c), src(1, c));
+      }
+      break;
+
+   case SM4_OPCODE_UDIV:
+      for (c = 0; c < 4; ++c) {
+         Value *dvn, *dvs;
+         if (dst0[c] || dst1[c]) {
+            dvn = src(0, c);
+            dvs = src(1, c);
+         }
+         if (dst0[c])
+            mkOp2(OP_DIV, TYPE_U32, dst0[c], dvn, dvs);
+         if (dst1[c])
+            mkOp2(OP_MOD, TYPE_U32, dst1[c], dvn, dvs);
+      }
+      break;
+
+   case SM4_OPCODE_IMUL:
+   case SM4_OPCODE_UMUL:
+      for (c = 0; c < 4; ++c) {
+         Value *a, *b;
+         if (dst0[c] || dst1[c]) {
+            a = src(0, c);
+            b = src(1, c);
+         }
+         if (dst0[c])
+            mkOp2(OP_MUL, dTy, dst0[c], a, b)->subOp =
+               NV50_IR_SUBOP_MUL_HIGH;
+         if (dst1[c])
+            mkOp2(OP_MUL, dTy, dst1[c], a, b);
+      }
+      break;
+
+   case SM4_OPCODE_DP2:
+      handleDP(dst0, 2);
+      break;
+   case SM4_OPCODE_DP3:
+      handleDP(dst0, 3);
+      break;
+   case SM4_OPCODE_DP4:
+      handleDP(dst0, 4);
+      break;
+
+   case SM4_OPCODE_DERIV_RTX:
+   case SM4_OPCODE_DERIV_RTX_COARSE:
+   case SM4_OPCODE_DERIV_RTX_FINE:
+   case SM4_OPCODE_DERIV_RTY:
+   case SM4_OPCODE_DERIV_RTY_COARSE:
+   case SM4_OPCODE_DERIV_RTY_FINE:
+   case SM4_OPCODE_MOV:
+   case SM4_OPCODE_INEG:
+   case SM4_OPCODE_NOT:
+   case SM4_OPCODE_SQRT:
+   case SM4_OPCODE_COUNTBITS:
+   case SM4_OPCODE_EXP:
+   case SM4_OPCODE_LOG:
+   case SM4_OPCODE_RCP:
+      FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
+         mkOp1(op, dTy, dst0[c], src(0, c));
+      }
+      break;
+
+   case SM4_OPCODE_FRC:
+      FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
+         Value *val = getScratch();
+         Value *src0 = src(0, c);
+         mkOp1(OP_FLOOR, TYPE_F32, val, src0);
+         mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val);
+      }
+      break;
+
+   case SM4_OPCODE_MOVC:
+      FOR_EACH_DST0_ENABLED_CHANNEL32(c)
+         mkCmp(OP_SLCT, CC_NE, TYPE_U32, dst0[c], src(1, c), src(2, c),
+               src(0, c));
+      break;
+
+   case SM4_OPCODE_ROUND_NE:
+   case SM4_OPCODE_ROUND_NI:
+   case SM4_OPCODE_ROUND_PI:
+   case SM4_OPCODE_ROUND_Z:
+      FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
+         Instruction *rnd = mkOp1(op, dTy, dst0[c], src(0, c));
+         rnd->ftz = 1;
+         rnd->rnd = cvtRoundingMode(opcode);
+      }
+      break;
+
+   case SM4_OPCODE_RSQ:
+      FOR_EACH_DST0_ENABLED_CHANNEL32(c)
+         mkOp1(op, dTy, dst0[c], src(0, c));
+      break;
+
+   case SM4_OPCODE_SINCOS:
+      for (c = 0; c < 4; ++c) {
+         if (!dst0[c] && !dst1[c])
+            continue;
+         Value *val = mkOp1v(OP_PRESIN, TYPE_F32, getScratch(), src(0, c));
+         if (dst0[c])
+            mkOp1(OP_SIN, TYPE_F32, dst0[c], val);
+         if (dst1[c])
+            mkOp1(OP_COS, TYPE_F32, dst1[c], val);
+      }
+      break;
+
+   case SM4_OPCODE_EQ:
+   case SM4_OPCODE_GE:
+   case SM4_OPCODE_IEQ:
+   case SM4_OPCODE_IGE:
+   case SM4_OPCODE_ILT:
+   case SM4_OPCODE_LT:
+   case SM4_OPCODE_NE:
+   case SM4_OPCODE_INE:
+   case SM4_OPCODE_ULT:
+   case SM4_OPCODE_UGE:
+   case SM4_OPCODE_DEQ:
+   case SM4_OPCODE_DGE:
+   case SM4_OPCODE_DLT:
+   case SM4_OPCODE_DNE:
+   {
+      CondCode cc = cvtCondCode(opcode);
+      FOR_EACH_DST0_ENABLED_CHANNEL32(c) {
+         CmpInstruction *set;
+         set = mkCmp(op, cc, sTy, dst0[c], src(0, c), src(1, c), NULL);
+         set->setType(dTy, sTy);
+         if (sTy == TYPE_F32)
+            set->ftz = 1;
+      }
+   }
+      break;
+
+   case SM4_OPCODE_FTOI:
+   case SM4_OPCODE_FTOU:
+      FOR_EACH_DST0_ENABLED_CHANNEL32(c)
+         mkCvt(op, dTy, dst0[c], sTy, src(0, c))->rnd = ROUND_Z;
+      break;
+   case SM4_OPCODE_ITOF:
+   case SM4_OPCODE_UTOF:
+   case SM4_OPCODE_F32TOF16:
+   case SM4_OPCODE_F16TOF32:
+   case SM4_OPCODE_DTOF:
+   case SM4_OPCODE_FTOD:
+      FOR_EACH_DST0_ENABLED_CHANNEL32(c)
+         mkCvt(op, dTy, dst0[c], sTy, src(0, c));
+      break;
+
+   case SM4_OPCODE_CUT:
+   case SM4_OPCODE_CUT_STREAM:
+      mkOp1(OP_RESTART, TYPE_U32, NULL, mkImm(0))->fixed = 1;
+      break;
+   case SM4_OPCODE_EMIT:
+   case SM4_OPCODE_EMIT_STREAM:
+      mkOp1(OP_EMIT, TYPE_U32, NULL, mkImm(0))->fixed = 1;
+      break;
+   case SM4_OPCODE_EMITTHENCUT:
+   case SM4_OPCODE_EMITTHENCUT_STREAM:
+   {
+      Instruction *cut = mkOp1(OP_EMIT, TYPE_U32, NULL,  mkImm(0));
+      cut->fixed = 1;
+      cut->subOp = NV50_IR_SUBOP_EMIT_RESTART;
+   }
+      break;
+
+   case SM4_OPCODE_DISCARD:
+      info.prop.fp.usesDiscard = TRUE;
+      mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(
+         insn->insn.test_nz ? CC_P : CC_NOT_P, src(0, 0));
+      break;
+
+   case SM4_OPCODE_CALL:
+   case SM4_OPCODE_CALLC:
+      assert(!"CALL/CALLC not implemented");
+      break;
+
+   case SM4_OPCODE_RET:
+      // XXX: the following doesn't work with subroutines / early ret
+      if (!haveNextPhase(pos))
+         finalizeShader();
+      else
+         phaseEnded = phase + 1;
+      break;
+
+   case SM4_OPCODE_IF:
+   {
+      BasicBlock *ifClause = new BasicBlock(func);
+
+      bb->cfg.attach(&ifClause->cfg, Graph::Edge::TREE);
+      condBBs.push(bb);
+      joinBBs.push(bb);
+
+      mkFlow(OP_BRA, NULL, insn->insn.test_nz ? CC_NOT_P : CC_P, src(0, 0));
+
+      setPosition(ifClause, true);
+   }
+      break;
+   case SM4_OPCODE_ELSE:
+   {
+      BasicBlock *elseClause = new BasicBlock(func);
+      BasicBlock *forkPoint = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
+
+      forkPoint->cfg.attach(&elseClause->cfg, Graph::Edge::TREE);
+      condBBs.push(bb);
+
+      forkPoint->getExit()->asFlow()->target.bb = elseClause;
+      if (!bb->isTerminated())
+         mkFlow(OP_BRA, NULL, CC_ALWAYS, NULL);
+
+      setPosition(elseClause, true);
+   }
+      break;
+   case SM4_OPCODE_ENDIF:
+   {
+      BasicBlock *convPoint = new BasicBlock(func);
+      BasicBlock *lastBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p);
+      BasicBlock *forkPoint = reinterpret_cast<BasicBlock *>(joinBBs.pop().u.p);
+
+      if (!bb->isTerminated()) {
+         // we only want join if none of the clauses ended with CONT/BREAK/RET
+         if (lastBB->getExit()->op == OP_BRA && joinBBs.getSize() < 6)
+            insertConvergenceOps(convPoint, forkPoint);
+         mkFlow(OP_BRA, convPoint, CC_ALWAYS, NULL);
+         bb->cfg.attach(&convPoint->cfg, Graph::Edge::FORWARD);
+      }
+
+      if (lastBB->getExit()->op == OP_BRA) {
+         lastBB->cfg.attach(&convPoint->cfg, Graph::Edge::FORWARD);
+         lastBB->getExit()->asFlow()->target.bb = convPoint;
+      }
+      setPosition(convPoint, true);
+   }
+      break;
+
+   case SM4_OPCODE_SWITCH:
+   case SM4_OPCODE_CASE:
+   case SM4_OPCODE_ENDSWITCH:
+      assert(!"SWITCH/CASE/ENDSWITCH not implemented");
+      break;
+
+   case SM4_OPCODE_LOOP:
+   {
+      BasicBlock *loopHeader = new BasicBlock(func);
+      BasicBlock *loopBreak = new BasicBlock(func);
+
+      loopBBs.push(loopHeader);
+      breakBBs.push(loopBreak);
+      if (loopBBs.getSize() > func->loopNestingBound)
+         func->loopNestingBound++;
+
+      mkFlow(OP_PREBREAK, loopBreak, CC_ALWAYS, NULL);
+
+      bb->cfg.attach(&loopHeader->cfg, Graph::Edge::TREE);
+      setPosition(loopHeader, true);
+      mkFlow(OP_PRECONT, loopHeader, CC_ALWAYS, NULL);
+   }
+      break;
+   case SM4_OPCODE_ENDLOOP:
+   {
+      BasicBlock *loopBB = reinterpret_cast<BasicBlock *>(loopBBs.pop().u.p);
+
+      if (!bb->isTerminated()) {
+         mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
+         bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
+      }
+      setPosition(reinterpret_cast<BasicBlock *>(breakBBs.pop().u.p), true);
+   }
+      break;
+   case SM4_OPCODE_BREAK:
+   {
+      if (bb->isTerminated())
+         break;
+      BasicBlock *breakBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p);
+      mkFlow(OP_BREAK, breakBB, CC_ALWAYS, NULL);
+      bb->cfg.attach(&breakBB->cfg, Graph::Edge::CROSS);
+   }
+      break;
+   case SM4_OPCODE_BREAKC:
+   {
+      BasicBlock *nextBB = new BasicBlock(func);
+      BasicBlock *breakBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p);
+      CondCode cc = insn->insn.test_nz ? CC_P : CC_NOT_P;
+      mkFlow(OP_BREAK, breakBB, cc, src(0, 0));
+      bb->cfg.attach(&breakBB->cfg, Graph::Edge::CROSS);
+      bb->cfg.attach(&nextBB->cfg, Graph::Edge::FORWARD);
+      setPosition(nextBB, true);
+   }
+      break;
+   case SM4_OPCODE_CONTINUE:
+   {
+      if (bb->isTerminated())
+         break;
+      BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p);
+      mkFlow(OP_CONT, contBB, CC_ALWAYS, NULL);
+      contBB->explicitCont = true;
+      bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK);
+   }
+      break;
+   case SM4_OPCODE_CONTINUEC:
+   {
+      BasicBlock *nextBB = new BasicBlock(func);
+      BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p);
+      mkFlow(OP_CONT, contBB, insn->insn.test_nz ? CC_P : CC_NOT_P, src(0, 0));
+      bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK);
+      bb->cfg.attach(&nextBB->cfg, Graph::Edge::FORWARD);
+      setPosition(nextBB, true);
+   }
+      break;
+
+   case SM4_OPCODE_SAMPLE:
+   case SM4_OPCODE_SAMPLE_C:
+   case SM4_OPCODE_SAMPLE_C_LZ:
+   case SM4_OPCODE_SAMPLE_L:
+   case SM4_OPCODE_SAMPLE_D:
+   case SM4_OPCODE_SAMPLE_B:
+      handleSAMPLE(op, dst0);
+      break;
+   case SM4_OPCODE_LD:
+   case SM4_OPCODE_LD_MS:
+      handleLOAD(dst0);
+      break;
+
+   case SM4_OPCODE_GATHER4:
+      assert(!"GATHER4 not implemented\n");
+      break;
+
+   case SM4_OPCODE_RESINFO:
+      handleQUERY(dst0, TXQ_DIMS);
+      break;
+   case SM4_OPCODE_SAMPLE_POS:
+      handleQUERY(dst0, TXQ_SAMPLE_POSITION);
+      break;
+
+   case SM4_OPCODE_NOP:
+      mkOp(OP_NOP, TYPE_NONE, NULL);
+      break;
+
+   case SM4_OPCODE_HS_DECLS:
+      // XXX: any significance ?
+      break;
+   case SM4_OPCODE_HS_CONTROL_POINT_PHASE:
+      phase = 0;
+      break;
+   case SM4_OPCODE_HS_FORK_PHASE:
+      if (phase != 1)
+         subPhase = 0;
+      phase = 1;
+      phaseInstance = (phaseStart == pos) ? (phaseInstance + 1) : 0;
+      phaseStart = pos;
+      if (info.prop.tp.outputPatchSize < phaseInstCnt[0][subPhase])
+         unrollPhase = true;
+      break;
+   case SM4_OPCODE_HS_JOIN_PHASE:
+      if (phase != 2)
+         subPhase = 0;
+      phase = 2;
+      phaseInstance = (phaseStart == pos) ? (phaseInstance + 1) : 0;
+      phaseStart = pos;
+      if (info.prop.tp.outputPatchSize < phaseInstCnt[1][subPhase])
+         unrollPhase = true;
+      break;
+
+   default:
+      ERROR("SM4_OPCODE_#%u illegal / not supported\n", insn->opcode);
+      abort();
+      return false;
+   }
+
+   for (c = 0; c < nc; ++c) {
+      if (nDstOpnds >= 1 && rDst0[c]) {
+         if (dst0[c] != rDst0[c])
+            mkMov(rDst0[c], dst0[c]);
+         saveDst(0, c, rDst0[c]);
+      }
+      if (nDstOpnds >= 2 && rDst1[c]) {
+         if (dst1[c] != rDst1[c])
+            mkMov(rDst1[c], dst1[c]);
+         saveDst(1, c, rDst1[c]);
+      }
+   }
+
+   memset(srcPtr, 0, sizeof(srcPtr));
+   memset(dstPtr, 0, sizeof(dstPtr));
+   memset(vtxBase, 0, sizeof(vtxBase));
+   return true;
+}
+
+void
+Converter::exportOutputs()
+{
+   for (int i = 0; i < info.numOutputs; ++i) {
+      for (int c = 0; c < 4; ++c) {
+         if (!oData.exists(i, c))
+            continue;
+         Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32,
+                                info.out[i].slot[c] * 4);
+         Value *val = oData.load(i, c, NULL);
+         if (val)
+            mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val);
+      }
+   }
+}
+
+Converter::Converter(Program *p, struct nv50_ir_prog_info *s)
+   : tData32(this),
+     tData64(this),
+     oData(this),
+     info(*s),
+     sm4(*reinterpret_cast<const sm4_program *>(s->bin.source)),
+     prog(p)
+{
+   memset(srcPtr, 0, sizeof(srcPtr));
+   memset(dstPtr, 0, sizeof(dstPtr));
+   memset(vtxBase, 0, sizeof(vtxBase));
+
+   memset(interpMode, 0, sizeof(interpMode));
+
+   nrRegVals = nrArrays = arrayVol = 0;
+
+   for (phase = 3; phase > 0; --phase)
+      for (unsigned int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i)
+         out[phase - 1][i].sn = TGSI_SEMANTIC_COUNT;
+
+   unrollPhase = false;
+   phaseStart = 0;
+   subPhaseCnt[0] = subPhaseCnt[1] = 0;
+}
+
+Converter::~Converter()
+{
+   if (lData)
+      delete[] lData;
+
+   if (subPhaseCnt[0])
+      delete[] phaseInstCnt[0];
+   if (subPhaseCnt[1])
+      delete[] phaseInstCnt[1];
+}
+
+bool
+Converter::haveNextPhase(unsigned int pos) const
+{
+   ++pos;
+   return (pos < sm4.insns.size()) &&
+      (sm4.insns[pos]->opcode == SM4_OPCODE_HS_FORK_PHASE ||
+       sm4.insns[pos]->opcode == SM4_OPCODE_HS_JOIN_PHASE);
+}
+
+bool
+Converter::run()
+{
+   parseSignature();
+
+   for (unsigned int pos = 0; pos < sm4.dcls.size(); ++pos)
+      inspectDeclaration(*sm4.dcls[pos]);
+
+   phaseInstCnt[0] = new unsigned int [subPhaseCnt[0]];
+   phaseInstCnt[1] = new unsigned int [subPhaseCnt[1]];
+   for (int i = 0; i < subPhaseCnt[0]; ++i)
+      phaseInstCnt[0][i] = -1;
+   for (int i = 0; i < subPhaseCnt[1]; ++i)
+      phaseInstCnt[1][i] = -1;
+   // re-increased in handleDeclaration:
+   subPhaseCnt[0] = subPhaseCnt[1] = 0;
+
+   allocateValues();
+   nrArrays = 0;
+   for (unsigned int pos = 0; pos < sm4.dcls.size(); ++pos)
+      handleDeclaration(*sm4.dcls[pos]);
+
+   info.assignSlots(&info);
+
+   if (sm4.dcls.size() == 0 && sm4.insns.size() == 0)
+      return true;
+
+   BasicBlock *entry = new BasicBlock(prog->main);
+   BasicBlock *leave = new BasicBlock(prog->main);
+
+   prog->main->setEntry(entry);
+   prog->main->setExit(leave);
+
+   setPosition(entry, true);
+
+   entryBBs.push(entry);
+   leaveBBs.push(leave);
+
+   if (prog->getType() == Program::TYPE_FRAGMENT) {
+      Symbol *sv = mkSysVal(SV_POSITION, 3);
+      fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv);
+      mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]);
+   } else
+   if (prog->getType() == Program::TYPE_TESSELLATION_EVAL) {
+      const int n = (info.prop.tp.domain == PIPE_PRIM_TRIANGLES) ? 3 : 2;
+      int c;
+      for (c = 0; c < n; ++c)
+         domainPt[c] =
+            mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_TESS_COORD, c));
+      if (c == 2)
+         domainPt[2] = loadImm(NULL, 0.0f);
+   }
+
+   finalized = false;
+   phaseEnded = 0;
+   phase = 0;
+   subPhase = 0;
+   for (unsigned int pos = 0; pos < sm4.insns.size(); ++pos) {
+      handleInstruction(pos);
+      if (likely(phase == 0) || (phaseEnded < 2))
+         continue;
+      phaseEnded = 0;
+      if (!unrollPhase || !phaseInstanceUsed) {
+         ++subPhase;
+         continue;
+      }
+      phaseInstanceUsed = false;
+      if (phaseInstance < (phaseInstCnt[phase - 1][subPhase] - 1))
+         pos = phaseStart - 1;
+      else
+         ++subPhase;
+   }
+   finalizeShader();
+
+   return true;
+}
+
+} // anonymous namespace
+
+namespace nv50_ir {
+
+bool
+Program::makeFromSM4(struct nv50_ir_prog_info *info)
+{
+   Converter bld(this, info);
+   return bld.run();
+}
+
+} // namespace nv50_ir
diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_from_sm4.h b/src/gallium/drivers/nv50/codegen/nv50_ir_from_sm4.h
new file mode 100644 (file)
index 0000000..3c7b55a
--- /dev/null
@@ -0,0 +1,183 @@
+
+#ifndef __NV50_IR_FROM_SM4_H__
+#define __NV50_IR_FROM_SM4_H__
+
+typedef enum D3D_PRIMITIVE_TOPOLOGY {
+    D3D_PRIMITIVE_TOPOLOGY_UNDEFINED = 0,
+    D3D_PRIMITIVE_TOPOLOGY_POINTLIST = 1,
+    D3D_PRIMITIVE_TOPOLOGY_LINELIST = 2,
+    D3D_PRIMITIVE_TOPOLOGY_LINESTRIP = 3,
+    D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST = 4,
+    D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP = 5,
+    D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ = 10,
+    D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ = 11,
+    D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ = 12,
+    D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ = 13,
+    D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST = 33,
+    D3D_PRIMITIVE_TOPOLOGY_2_CONTROL_POINT_PATCHLIST = 34,
+    D3D_PRIMITIVE_TOPOLOGY_3_CONTROL_POINT_PATCHLIST = 35,
+    D3D_PRIMITIVE_TOPOLOGY_4_CONTROL_POINT_PATCHLIST = 36,
+    D3D_PRIMITIVE_TOPOLOGY_5_CONTROL_POINT_PATCHLIST = 37,
+    D3D_PRIMITIVE_TOPOLOGY_6_CONTROL_POINT_PATCHLIST = 38,
+    D3D_PRIMITIVE_TOPOLOGY_7_CONTROL_POINT_PATCHLIST = 39,
+    D3D_PRIMITIVE_TOPOLOGY_8_CONTROL_POINT_PATCHLIST = 40,
+    D3D_PRIMITIVE_TOPOLOGY_9_CONTROL_POINT_PATCHLIST = 41,
+    D3D_PRIMITIVE_TOPOLOGY_10_CONTROL_POINT_PATCHLIST = 42,
+    D3D_PRIMITIVE_TOPOLOGY_11_CONTROL_POINT_PATCHLIST = 43,
+    D3D_PRIMITIVE_TOPOLOGY_12_CONTROL_POINT_PATCHLIST = 44,
+    D3D_PRIMITIVE_TOPOLOGY_13_CONTROL_POINT_PATCHLIST = 45,
+    D3D_PRIMITIVE_TOPOLOGY_14_CONTROL_POINT_PATCHLIST = 46,
+    D3D_PRIMITIVE_TOPOLOGY_15_CONTROL_POINT_PATCHLIST = 47,
+    D3D_PRIMITIVE_TOPOLOGY_16_CONTROL_POINT_PATCHLIST = 48,
+    D3D_PRIMITIVE_TOPOLOGY_17_CONTROL_POINT_PATCHLIST = 49,
+    D3D_PRIMITIVE_TOPOLOGY_18_CONTROL_POINT_PATCHLIST = 50,
+    D3D_PRIMITIVE_TOPOLOGY_19_CONTROL_POINT_PATCHLIST = 51,
+    D3D_PRIMITIVE_TOPOLOGY_20_CONTROL_POINT_PATCHLIST = 52,
+    D3D_PRIMITIVE_TOPOLOGY_21_CONTROL_POINT_PATCHLIST = 53,
+    D3D_PRIMITIVE_TOPOLOGY_22_CONTROL_POINT_PATCHLIST = 54,
+    D3D_PRIMITIVE_TOPOLOGY_23_CONTROL_POINT_PATCHLIST = 55,
+    D3D_PRIMITIVE_TOPOLOGY_24_CONTROL_POINT_PATCHLIST = 56,
+    D3D_PRIMITIVE_TOPOLOGY_25_CONTROL_POINT_PATCHLIST = 57,
+    D3D_PRIMITIVE_TOPOLOGY_26_CONTROL_POINT_PATCHLIST = 58,
+    D3D_PRIMITIVE_TOPOLOGY_27_CONTROL_POINT_PATCHLIST = 59,
+    D3D_PRIMITIVE_TOPOLOGY_28_CONTROL_POINT_PATCHLIST = 60,
+    D3D_PRIMITIVE_TOPOLOGY_29_CONTROL_POINT_PATCHLIST = 61,
+    D3D_PRIMITIVE_TOPOLOGY_30_CONTROL_POINT_PATCHLIST = 62,
+    D3D_PRIMITIVE_TOPOLOGY_31_CONTROL_POINT_PATCHLIST = 63,
+    D3D_PRIMITIVE_TOPOLOGY_32_CONTROL_POINT_PATCHLIST = 64,
+} D3D_PRIMITIVE_TOPOLOGY;
+
+typedef enum D3D_RESOURCE_RETURN_TYPE {
+    D3D_RETURN_TYPE_UNORM = 1,
+    D3D_RETURN_TYPE_SNORM = 2,
+    D3D_RETURN_TYPE_SINT = 3,
+    D3D_RETURN_TYPE_UINT = 4,
+    D3D_RETURN_TYPE_FLOAT = 5,
+    D3D_RETURN_TYPE_MIXED = 6,
+    D3D_RETURN_TYPE_DOUBLE = 7,
+    D3D_RETURN_TYPE_CONTINUED = 8,
+    D3D10_RETURN_TYPE_UNORM = 1,
+    D3D10_RETURN_TYPE_SNORM = 2,
+    D3D10_RETURN_TYPE_SINT = 3,
+    D3D10_RETURN_TYPE_UINT = 4,
+    D3D10_RETURN_TYPE_FLOAT = 5,
+    D3D10_RETURN_TYPE_MIXED = 6,
+    D3D11_RETURN_TYPE_UNORM = 1,
+    D3D11_RETURN_TYPE_SNORM = 2,
+    D3D11_RETURN_TYPE_SINT = 3,
+    D3D11_RETURN_TYPE_UINT = 4,
+    D3D11_RETURN_TYPE_FLOAT = 5,
+    D3D11_RETURN_TYPE_MIXED = 6,
+    D3D11_RETURN_TYPE_DOUBLE = 7,
+    D3D11_RETURN_TYPE_CONTINUED = 8
+} D3D_RESOURCE_RETURN_TYPE;
+
+typedef enum D3D_REGISTER_COMPONENT_TYPE {
+    D3D_REGISTER_COMPONENT_UNKNOWN = 0,
+    D3D_REGISTER_COMPONENT_UINT32 = 1,
+    D3D_REGISTER_COMPONENT_SINT32 = 2,
+    D3D_REGISTER_COMPONENT_FLOAT32 = 3,
+    D3D10_REGISTER_COMPONENT_UNKNOWN = 0,
+    D3D10_REGISTER_COMPONENT_UINT32 = 1,
+    D3D10_REGISTER_COMPONENT_SINT32 = 2,
+    D3D10_REGISTER_COMPONENT_FLOAT32 = 3
+} D3D_REGISTER_COMPONENT_TYPE;
+
+typedef enum D3D_TESSELLATOR_DOMAIN {
+    D3D_TESSELLATOR_DOMAIN_UNDEFINED = 0,
+    D3D_TESSELLATOR_DOMAIN_ISOLINE = 1,
+    D3D_TESSELLATOR_DOMAIN_TRI = 2,
+    D3D_TESSELLATOR_DOMAIN_QUAD = 3,
+    D3D11_TESSELLATOR_DOMAIN_UNDEFINED = 0,
+    D3D11_TESSELLATOR_DOMAIN_ISOLINE = 1,
+    D3D11_TESSELLATOR_DOMAIN_TRI = 2,
+    D3D11_TESSELLATOR_DOMAIN_QUAD = 3
+} D3D_TESSELLATOR_DOMAIN;
+
+typedef enum D3D_TESSELLATOR_PARTITIONING {
+    D3D_TESSELLATOR_PARTITIONING_UNDEFINED = 0,
+    D3D_TESSELLATOR_PARTITIONING_INTEGER = 1,
+    D3D_TESSELLATOR_PARTITIONING_POW2 = 2,
+    D3D_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD = 3,
+    D3D_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN = 4,
+    D3D11_TESSELLATOR_PARTITIONING_UNDEFINED = 0,
+    D3D11_TESSELLATOR_PARTITIONING_INTEGER = 1,
+    D3D11_TESSELLATOR_PARTITIONING_POW2 = 2,
+    D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD = 3,
+    D3D11_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN = 4
+} D3D_TESSELLATOR_PARTITIONING;
+
+typedef enum D3D_TESSELLATOR_OUTPUT_PRIMITIVE {
+    D3D_TESSELLATOR_OUTPUT_UNDEFINED = 0,
+    D3D_TESSELLATOR_OUTPUT_POINT = 1,
+    D3D_TESSELLATOR_OUTPUT_LINE = 2,
+    D3D_TESSELLATOR_OUTPUT_TRIANGLE_CW = 3,
+    D3D_TESSELLATOR_OUTPUT_TRIANGLE_CCW = 4,
+    D3D11_TESSELLATOR_OUTPUT_UNDEFINED = 0,
+    D3D11_TESSELLATOR_OUTPUT_POINT = 1,
+    D3D11_TESSELLATOR_OUTPUT_LINE = 2,
+    D3D11_TESSELLATOR_OUTPUT_TRIANGLE_CW = 3,
+    D3D11_TESSELLATOR_OUTPUT_TRIANGLE_CCW = 4
+} D3D_TESSELLATOR_OUTPUT_PRIMITIVE;
+
+typedef enum D3D_NAME {
+    D3D_NAME_UNDEFINED = 0,
+    D3D_NAME_POSITION = 1,
+    D3D_NAME_CLIP_DISTANCE = 2,
+    D3D_NAME_CULL_DISTANCE = 3,
+    D3D_NAME_RENDER_TARGET_ARRAY_INDEX = 4,
+    D3D_NAME_VIEWPORT_ARRAY_INDEX = 5,
+    D3D_NAME_VERTEX_ID = 6,
+    D3D_NAME_PRIMITIVE_ID = 7,
+    D3D_NAME_INSTANCE_ID = 8,
+    D3D_NAME_IS_FRONT_FACE = 9,
+    D3D_NAME_SAMPLE_INDEX = 10,
+    D3D_NAME_FINAL_QUAD_EDGE_TESSFACTOR = 11,
+    D3D_NAME_FINAL_QUAD_INSIDE_TESSFACTOR = 12,
+    D3D_NAME_FINAL_TRI_EDGE_TESSFACTOR = 13,
+    D3D_NAME_FINAL_TRI_INSIDE_TESSFACTOR = 14,
+    D3D_NAME_FINAL_LINE_DETAIL_TESSFACTOR = 15,
+    D3D_NAME_FINAL_LINE_DENSITY_TESSFACTOR = 16,
+    D3D_NAME_TARGET = 64,
+    D3D_NAME_DEPTH = 65,
+    D3D_NAME_COVERAGE = 66,
+    D3D_NAME_DEPTH_GREATER_EQUAL = 67,
+    D3D_NAME_DEPTH_LESS_EQUAL = 68,
+    D3D10_NAME_UNDEFINED = 0,
+    D3D10_NAME_POSITION = 1,
+    D3D10_NAME_CLIP_DISTANCE = 2,
+    D3D10_NAME_CULL_DISTANCE = 3,
+    D3D10_NAME_RENDER_TARGET_ARRAY_INDEX = 4,
+    D3D10_NAME_VIEWPORT_ARRAY_INDEX = 5,
+    D3D10_NAME_VERTEX_ID = 6,
+    D3D10_NAME_PRIMITIVE_ID = 7,
+    D3D10_NAME_INSTANCE_ID = 8,
+    D3D10_NAME_IS_FRONT_FACE = 9,
+    D3D10_NAME_SAMPLE_INDEX = 10,
+    D3D11_NAME_FINAL_QUAD_EDGE_TESSFACTOR = 11,
+    D3D11_NAME_FINAL_QUAD_INSIDE_TESSFACTOR = 12,
+    D3D11_NAME_FINAL_TRI_EDGE_TESSFACTOR = 13,
+    D3D11_NAME_FINAL_TRI_INSIDE_TESSFACTOR = 14,
+    D3D11_NAME_FINAL_LINE_DETAIL_TESSFACTOR = 15,
+    D3D11_NAME_FINAL_LINE_DENSITY_TESSFACTOR = 16,
+    D3D10_NAME_TARGET = 64,
+    D3D10_NAME_DEPTH = 65,
+    D3D10_NAME_COVERAGE = 66,
+    D3D11_NAME_DEPTH_GREATER_EQUAL = 67,
+    D3D11_NAME_DEPTH_LESS_EQUAL = 68
+} D3D_NAME;
+
+typedef struct _D3D11_SIGNATURE_PARAMETER_DESC {
+    const char* SemanticName;
+    unsigned int SemanticIndex;
+    unsigned int Register;
+    D3D_NAME SystemValueType;
+    D3D_REGISTER_COMPONENT_TYPE ComponentType;
+    unsigned char Mask;
+    unsigned char ReadWriteMask;
+    unsigned int Stream;
+} D3D11_SIGNATURE_PARAMETER_DESC;
+
+#include "../../../state_trackers/d3d1x/d3d1xshader/include/sm4.h"
+
+#endif // __NV50_IR_FROM_SM4_H__