OSDN Git Service

gl_VertexID implementation
[android-x86/external-swiftshader.git] / src / Shader / VertexProgram.cpp
index efae39c..c9ed8aa 100644 (file)
@@ -1,13 +1,16 @@
-// SwiftShader Software Renderer
+// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
 //
-// Copyright(c) 2005-2013 TransGaming Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
 //
-// All rights reserved. No part of this software may be copied, distributed, transmitted,
-// transcribed, stored in a retrieval system, translated into any human or computer
-// language by any means, or disclosed to third parties without the explicit written
-// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
-// or implied, including but not limited to any patent rights, are granted to you.
+//    http://www.apache.org/licenses/LICENSE-2.0
 //
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
 
 #include "VertexProgram.hpp"
 
@@ -47,7 +50,7 @@ namespace sw
                        enableContinue = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
                }
 
-               if(shader->instanceIdDeclared)
+               if(shader->isInstanceIdDeclared())
                {
                        instanceID = *Pointer<Int>(data + OFFSET(DrawData,instanceID));
                }
@@ -61,7 +64,7 @@ namespace sw
                }
        }
 
-       void VertexProgram::pipeline()
+       void VertexProgram::pipeline(UInt& index)
        {
                for(int i = 0; i < VERTEX_TEXTURE_IMAGE_UNITS; i++)
                {
@@ -70,7 +73,7 @@ namespace sw
 
                if(!state.preTransformed)
                {
-                       program();
+                       program(index);
                }
                else
                {
@@ -78,7 +81,7 @@ namespace sw
                }
        }
 
-       void VertexProgram::program()
+       void VertexProgram::program(UInt& index)
        {
        //      shader->print("VertexShader-%0.8X.txt", state.shaderID);
 
@@ -92,6 +95,21 @@ namespace sw
                        enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
                }
 
+               if(shader->isVertexIdDeclared())
+               {
+                       if(state.textureSampling)
+                       {
+                               vertexID = Int4(index);
+                       }
+                       else
+                       {
+                               vertexID = Insert(vertexID, As<Int>(index), 0);
+                               vertexID = Insert(vertexID, As<Int>(index + 1), 1);
+                               vertexID = Insert(vertexID, As<Int>(index + 2), 2);
+                               vertexID = Insert(vertexID, As<Int>(index + 3), 3);
+                       }
+               }
+
                // Create all call site return blocks up front
                for(size_t i = 0; i < shader->getLength(); i++)
                {
@@ -136,11 +154,11 @@ namespace sw
                        Vector4f s3;
                        Vector4f s4;
 
-                       if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegisterF(src0);
-                       if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegisterF(src1);
-                       if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegisterF(src2);
-                       if(src3.type != Shader::PARAMETER_VOID) s3 = fetchRegisterF(src3);
-                       if(src4.type != Shader::PARAMETER_VOID) s4 = fetchRegisterF(src4);
+                       if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegister(src0);
+                       if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegister(src1);
+                       if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegister(src2);
+                       if(src3.type != Shader::PARAMETER_VOID) s3 = fetchRegister(src3);
+                       if(src4.type != Shader::PARAMETER_VOID) s4 = fetchRegister(src4);
 
                        switch(opcode)
                        {
@@ -157,6 +175,7 @@ namespace sw
                        case Shader::OPCODE_DEFB:                                       break;
                        case Shader::OPCODE_NOP:                                        break;
                        case Shader::OPCODE_ABS:        abs(d, s0);                     break;
+                       case Shader::OPCODE_IABS:       iabs(d, s0);                    break;
                        case Shader::OPCODE_ADD:        add(d, s0, s1);                 break;
                        case Shader::OPCODE_IADD:       iadd(d, s0, s1);                break;
                        case Shader::OPCODE_CRS:        crs(d, s0, s1);                 break;
@@ -202,6 +221,12 @@ namespace sw
                        case Shader::OPCODE_FLOATBITSTOUINT:
                        case Shader::OPCODE_INTBITSTOFLOAT:
                        case Shader::OPCODE_UINTBITSTOFLOAT: d = s0;                    break;
+                       case Shader::OPCODE_PACKSNORM2x16:   packSnorm2x16(d, s0);      break;
+                       case Shader::OPCODE_PACKUNORM2x16:   packUnorm2x16(d, s0);      break;
+                       case Shader::OPCODE_PACKHALF2x16:    packHalf2x16(d, s0);       break;
+                       case Shader::OPCODE_UNPACKSNORM2x16: unpackSnorm2x16(d, s0);    break;
+                       case Shader::OPCODE_UNPACKUNORM2x16: unpackUnorm2x16(d, s0);    break;
+                       case Shader::OPCODE_UNPACKHALF2x16:  unpackHalf2x16(d, s0);     break;
                        case Shader::OPCODE_M3X2:       M3X2(d, s0, src1);              break;
                        case Shader::OPCODE_M3X3:       M3X3(d, s0, src1);              break;
                        case Shader::OPCODE_M3X4:       M3X4(d, s0, src1);              break;
@@ -256,6 +281,7 @@ namespace sw
                        case Shader::OPCODE_DIST4:      dist4(d.x, s0, s1, pp);         break;
                        case Shader::OPCODE_SGE:        step(d, s1, s0);                break;
                        case Shader::OPCODE_SGN:        sgn(d, s0);                     break;
+                       case Shader::OPCODE_ISGN:       isgn(d, s0);                    break;
                        case Shader::OPCODE_SINCOS:     sincos(d, s0, pp);              break;
                        case Shader::OPCODE_COS:        cos(d, s0, pp);                 break;
                        case Shader::OPCODE_SIN:        sin(d, s0, pp);                 break;
@@ -285,12 +311,14 @@ namespace sw
                        case Shader::OPCODE_ENDLOOP:    ENDLOOP();                      break;
                        case Shader::OPCODE_ENDREP:     ENDREP();                       break;
                        case Shader::OPCODE_ENDWHILE:   ENDWHILE();                     break;
+                       case Shader::OPCODE_ENDSWITCH:  ENDSWITCH();                    break;
                        case Shader::OPCODE_IF:         IF(src0);                       break;
                        case Shader::OPCODE_IFC:        IFC(s0, s1, control);           break;
                        case Shader::OPCODE_LABEL:      LABEL(dst.index);               break;
                        case Shader::OPCODE_LOOP:       LOOP(src1);                     break;
                        case Shader::OPCODE_REP:        REP(src0);                      break;
                        case Shader::OPCODE_WHILE:      WHILE(src0);                    break;
+                       case Shader::OPCODE_SWITCH:     SWITCH();                       break;
                        case Shader::OPCODE_RET:        RET();                          break;
                        case Shader::OPCODE_LEAVE:      LEAVE();                        break;
                        case Shader::OPCODE_CMP:        cmp(d, s0, s1, control);        break;
@@ -301,18 +329,18 @@ namespace sw
                        case Shader::OPCODE_INSERT:     insert(d, s0, s1.x, s2.x);      break;
                        case Shader::OPCODE_ALL:        all(d.x, s0);                   break;
                        case Shader::OPCODE_ANY:        any(d.x, s0);                   break;
-                       case Shader::OPCODE_NOT:        not(d, s0);                     break;
-                       case Shader::OPCODE_OR:         or(d, s0, s1);                  break;
-                       case Shader::OPCODE_XOR:        xor(d, s0, s1);                 break;
-                       case Shader::OPCODE_AND:        and(d, s0, s1);                 break;
+                       case Shader::OPCODE_NOT:        bitwise_not(d, s0);             break;
+                       case Shader::OPCODE_OR:         bitwise_or(d, s0, s1);          break;
+                       case Shader::OPCODE_XOR:        bitwise_xor(d, s0, s1);         break;
+                       case Shader::OPCODE_AND:        bitwise_and(d, s0, s1);         break;
                        case Shader::OPCODE_EQ:         equal(d, s0, s1);               break;
                        case Shader::OPCODE_NE:         notEqual(d, s0, s1);            break;
                        case Shader::OPCODE_TEXLDL:     TEXLDL(d, s0, src1);            break;
                        case Shader::OPCODE_TEX:        TEX(d, s0, src1);               break;
-                       case Shader::OPCODE_TEXOFFSET:  TEXOFFSET(d, s0, src1, s2, s3); break;
+                       case Shader::OPCODE_TEXOFFSET:  TEXOFFSET(d, s0, src1, s2);     break;
                        case Shader::OPCODE_TEXLDLOFFSET: TEXLDL(d, s0, src1, s2);      break;
-                       case Shader::OPCODE_TEXELFETCH: TEXELFETCH(d, s0, src1, s2);    break;
-                       case Shader::OPCODE_TEXELFETCHOFFSET: TEXELFETCH(d, s0, src1, s2, s3); break;
+                       case Shader::OPCODE_TEXELFETCH: TEXELFETCH(d, s0, src1);        break;
+                       case Shader::OPCODE_TEXELFETCHOFFSET: TEXELFETCH(d, s0, src1, s2); break;
                        case Shader::OPCODE_TEXGRAD:    TEXGRAD(d, s0, src1, s2, s3);   break;
                        case Shader::OPCODE_TEXGRADOFFSET: TEXGRAD(d, s0, src1, s2, s3, s4); break;
                        case Shader::OPCODE_TEXSIZE:    TEXSIZE(d, s0.x, src1);         break;
@@ -397,10 +425,10 @@ namespace sw
                                                }
                                                break;
                                        case Shader::PARAMETER_ATTROUT:
-                                               if(dst.x) pDst.x = o[D0 + dst.index].x;
-                                               if(dst.y) pDst.y = o[D0 + dst.index].y;
-                                               if(dst.z) pDst.z = o[D0 + dst.index].z;
-                                               if(dst.w) pDst.w = o[D0 + dst.index].w;
+                                               if(dst.x) pDst.x = o[C0 + dst.index].x;
+                                               if(dst.y) pDst.y = o[C0 + dst.index].y;
+                                               if(dst.z) pDst.z = o[C0 + dst.index].z;
+                                               if(dst.w) pDst.w = o[C0 + dst.index].w;
                                                break;
                                        case Shader::PARAMETER_TEXCRDOUT:
                                //      case Shader::PARAMETER_OUTPUT:
@@ -528,10 +556,10 @@ namespace sw
                                        }
                                        break;
                                case Shader::PARAMETER_ATTROUT:
-                                       if(dst.x) o[D0 + dst.index].x = d.x;
-                                       if(dst.y) o[D0 + dst.index].y = d.y;
-                                       if(dst.z) o[D0 + dst.index].z = d.z;
-                                       if(dst.w) o[D0 + dst.index].w = d.w;
+                                       if(dst.x) o[C0 + dst.index].x = d.x;
+                                       if(dst.y) o[C0 + dst.index].y = d.y;
+                                       if(dst.z) o[C0 + dst.index].z = d.z;
+                                       if(dst.w) o[C0 + dst.index].w = d.w;
                                        break;
                                case Shader::PARAMETER_TEXCRDOUT:
                        //      case Shader::PARAMETER_OUTPUT:
@@ -581,9 +609,9 @@ namespace sw
        {
                if(shader)
                {
-                       for(int i = 0; i < 12; i++)
+                       for(int i = 0; i < MAX_VERTEX_OUTPUTS; i++)
                        {
-                               unsigned char usage = shader->output[i][0].usage;
+                               unsigned char usage = shader->getOutput(i, 0).usage;
 
                                switch(usage)
                                {
@@ -627,10 +655,10 @@ namespace sw
 
                        for(int i = 0; i < 2; i++)
                        {
-                               o[D0 + i].x = v[Color0 + i].x;
-                               o[D0 + i].y = v[Color0 + i].y;
-                               o[D0 + i].z = v[Color0 + i].z;
-                               o[D0 + i].w = v[Color0 + i].w;
+                               o[C0 + i].x = v[Color0 + i].x;
+                               o[C0 + i].y = v[Color0 + i].y;
+                               o[C0 + i].z = v[Color0 + i].z;
+                               o[C0 + i].w = v[Color0 + i].w;
                        }
 
                        for(int i = 0; i < 8; i++)
@@ -645,7 +673,7 @@ namespace sw
                }
        }
 
-       Vector4f VertexProgram::fetchRegisterF(const Src &src, unsigned int offset)
+       Vector4f VertexProgram::fetchRegister(const Src &src, unsigned int offset)
        {
                Vector4f reg;
                unsigned int i = src.index + offset;
@@ -666,7 +694,7 @@ namespace sw
                        reg = readConstant(src, offset);
                        break;
                case Shader::PARAMETER_INPUT:
-            if(src.rel.type == Shader::PARAMETER_VOID)
+                       if(src.rel.type == Shader::PARAMETER_VOID)
                        {
                                reg = v[i];
                        }
@@ -674,7 +702,7 @@ namespace sw
                        {
                                reg = v[i + relativeAddress(src, src.bufferIndex)];
                        }
-            break;
+                       break;
                case Shader::PARAMETER_VOID: return r[0];   // Dummy
                case Shader::PARAMETER_FLOAT4LITERAL:
                        reg.x = Float4(src.value[0]);
@@ -698,7 +726,7 @@ namespace sw
                        }
                        return reg;
                case Shader::PARAMETER_OUTPUT:
-            if(src.rel.type == Shader::PARAMETER_VOID)
+                       if(src.rel.type == Shader::PARAMETER_VOID)
                        {
                                reg = o[i];
                        }
@@ -708,7 +736,15 @@ namespace sw
                        }
                        break;
                case Shader::PARAMETER_MISCTYPE:
-                       reg.x = As<Float>(Int(instanceID));
+                       if(src.index == Shader::InstanceIDIndex)
+                       {
+                               reg.x = As<Float>(instanceID);
+                       }
+                       else if(src.index == Shader::VertexIDIndex)
+                       {
+                               reg.x = As<Float4>(vertexID);
+                       }
+                       else ASSERT(false);
                        return reg;
                default:
                        ASSERT(false);
@@ -828,7 +864,7 @@ namespace sw
                        if(src.rel.deterministic)
                        {
                                Int a = relativeAddress(src, src.bufferIndex);
-                       
+
                                c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, a));
 
                                c.x = c.x.xxxx;
@@ -843,11 +879,22 @@ namespace sw
 
                                switch(src.rel.type)
                                {
-                               case Shader::PARAMETER_ADDR:   a = a0[component]; break;
-                               case Shader::PARAMETER_TEMP:   a = r[src.rel.index][component]; break;
-                               case Shader::PARAMETER_INPUT:  a = v[src.rel.index][component]; break;
-                               case Shader::PARAMETER_OUTPUT: a = o[src.rel.index][component]; break;
-                               case Shader::PARAMETER_CONST:  a = *Pointer<Float>(uniformAddress(src.bufferIndex, src.rel.index) + component * sizeof(float)); break;
+                               case Shader::PARAMETER_ADDR:     a = a0[component]; break;
+                               case Shader::PARAMETER_TEMP:     a = r[src.rel.index][component]; break;
+                               case Shader::PARAMETER_INPUT:    a = v[src.rel.index][component]; break;
+                               case Shader::PARAMETER_OUTPUT:   a = o[src.rel.index][component]; break;
+                               case Shader::PARAMETER_CONST:    a = *Pointer<Float>(uniformAddress(src.bufferIndex, src.rel.index) + component * sizeof(float)); break;
+                               case Shader::PARAMETER_MISCTYPE:
+                                       if(src.rel.index == Shader::InstanceIDIndex)
+                                       {
+                                               a = As<Float4>(Int4(instanceID)); break;
+                                       }
+                                       else if(src.rel.index == Shader::VertexIDIndex)
+                                       {
+                                               a = As<Float4>(vertexID); break;
+                                       }
+                                       else ASSERT(false);
+                                       break;
                                default: ASSERT(false);
                                }
 
@@ -928,8 +975,8 @@ namespace sw
 
        void VertexProgram::M3X2(Vector4f &dst, Vector4f &src0, Src &src1)
        {
-               Vector4f row0 = fetchRegisterF(src1, 0);
-               Vector4f row1 = fetchRegisterF(src1, 1);
+               Vector4f row0 = fetchRegister(src1, 0);
+               Vector4f row1 = fetchRegister(src1, 1);
 
                dst.x = dot3(src0, row0);
                dst.y = dot3(src0, row1);
@@ -937,9 +984,9 @@ namespace sw
 
        void VertexProgram::M3X3(Vector4f &dst, Vector4f &src0, Src &src1)
        {
-               Vector4f row0 = fetchRegisterF(src1, 0);
-               Vector4f row1 = fetchRegisterF(src1, 1);
-               Vector4f row2 = fetchRegisterF(src1, 2);
+               Vector4f row0 = fetchRegister(src1, 0);
+               Vector4f row1 = fetchRegister(src1, 1);
+               Vector4f row2 = fetchRegister(src1, 2);
 
                dst.x = dot3(src0, row0);
                dst.y = dot3(src0, row1);
@@ -948,10 +995,10 @@ namespace sw
 
        void VertexProgram::M3X4(Vector4f &dst, Vector4f &src0, Src &src1)
        {
-               Vector4f row0 = fetchRegisterF(src1, 0);
-               Vector4f row1 = fetchRegisterF(src1, 1);
-               Vector4f row2 = fetchRegisterF(src1, 2);
-               Vector4f row3 = fetchRegisterF(src1, 3);
+               Vector4f row0 = fetchRegister(src1, 0);
+               Vector4f row1 = fetchRegister(src1, 1);
+               Vector4f row2 = fetchRegister(src1, 2);
+               Vector4f row3 = fetchRegister(src1, 3);
 
                dst.x = dot3(src0, row0);
                dst.y = dot3(src0, row1);
@@ -961,9 +1008,9 @@ namespace sw
 
        void VertexProgram::M4X3(Vector4f &dst, Vector4f &src0, Src &src1)
        {
-               Vector4f row0 = fetchRegisterF(src1, 0);
-               Vector4f row1 = fetchRegisterF(src1, 1);
-               Vector4f row2 = fetchRegisterF(src1, 2);
+               Vector4f row0 = fetchRegister(src1, 0);
+               Vector4f row1 = fetchRegister(src1, 1);
+               Vector4f row2 = fetchRegister(src1, 2);
 
                dst.x = dot4(src0, row0);
                dst.y = dot4(src0, row1);
@@ -972,10 +1019,10 @@ namespace sw
 
        void VertexProgram::M4X4(Vector4f &dst, Vector4f &src0, Src &src1)
        {
-               Vector4f row0 = fetchRegisterF(src1, 0);
-               Vector4f row1 = fetchRegisterF(src1, 1);
-               Vector4f row2 = fetchRegisterF(src1, 2);
-               Vector4f row3 = fetchRegisterF(src1, 3);
+               Vector4f row0 = fetchRegister(src1, 0);
+               Vector4f row1 = fetchRegister(src1, 1);
+               Vector4f row2 = fetchRegister(src1, 2);
+               Vector4f row3 = fetchRegister(src1, 3);
 
                dst.x = dot4(src0, row0);
                dst.y = dot4(src0, row1);
@@ -985,8 +1032,8 @@ namespace sw
 
        void VertexProgram::BREAK()
        {
-               llvm::BasicBlock *deadBlock = Nucleus::createBasicBlock();
-               llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
+               BasicBlock *deadBlock = Nucleus::createBasicBlock();
+               BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
 
                if(breakDepth == 0)
                {
@@ -1041,8 +1088,8 @@ namespace sw
        {
                condition &= enableStack[enableIndex];
 
-               llvm::BasicBlock *continueBlock = Nucleus::createBasicBlock();
-               llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
+               BasicBlock *continueBlock = Nucleus::createBasicBlock();
+               BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
 
                enableBreak = enableBreak & ~condition;
                Bool allBreak = SignMask(enableBreak) == 0x0;
@@ -1161,8 +1208,8 @@ namespace sw
        {
                ifDepth--;
 
-               llvm::BasicBlock *falseBlock = ifFalseBlock[ifDepth];
-               llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
+               BasicBlock *falseBlock = ifFalseBlock[ifDepth];
+               BasicBlock *endBlock = Nucleus::createBasicBlock();
 
                if(isConditionalIf[ifDepth])
                {
@@ -1188,7 +1235,7 @@ namespace sw
        {
                ifDepth--;
 
-               llvm::BasicBlock *endBlock = ifFalseBlock[ifDepth];
+               BasicBlock *endBlock = ifFalseBlock[ifDepth];
 
                Nucleus::createBr(endBlock);
                Nucleus::setInsertBlock(endBlock);
@@ -1206,8 +1253,8 @@ namespace sw
 
                aL[loopDepth] = aL[loopDepth] + increment[loopDepth];   // FIXME: +=
 
-               llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
-               llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
+               BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
+               BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
 
                Nucleus::createBr(testBlock);
                Nucleus::setInsertBlock(endBlock);
@@ -1220,8 +1267,8 @@ namespace sw
        {
                loopRepDepth--;
 
-               llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
-               llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
+               BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
+               BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
 
                Nucleus::createBr(testBlock);
                Nucleus::setInsertBlock(endBlock);
@@ -1234,8 +1281,8 @@ namespace sw
        {
                loopRepDepth--;
 
-               llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
-               llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
+               BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
+               BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
 
                Nucleus::createBr(testBlock);
                Nucleus::setInsertBlock(endBlock);
@@ -1245,6 +1292,19 @@ namespace sw
                whileTest = false;
        }
 
+       void VertexProgram::ENDSWITCH()
+       {
+               loopRepDepth--;
+
+               BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
+
+               Nucleus::createBr(loopRepEndBlock[loopRepDepth]);
+               Nucleus::setInsertBlock(endBlock);
+
+               enableIndex--;
+               enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+       }
+
        void VertexProgram::IF(const Src &src)
        {
                if(src.type == Shader::PARAMETER_CONSTBOOL)
@@ -1257,7 +1317,7 @@ namespace sw
                }
                else
                {
-                       Int4 condition = As<Int4>(fetchRegisterF(src).x);
+                       Int4 condition = As<Int4>(fetchRegister(src).x);
                        IF(condition);
                }
        }
@@ -1273,8 +1333,8 @@ namespace sw
                        condition = !condition;
                }
 
-               llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
-               llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
+               BasicBlock *trueBlock = Nucleus::createBasicBlock();
+               BasicBlock *falseBlock = Nucleus::createBasicBlock();
 
                branch(condition, trueBlock, falseBlock);
 
@@ -1322,8 +1382,8 @@ namespace sw
                enableIndex++;
                enableStack[enableIndex] = condition;
 
-               llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
-               llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
+               BasicBlock *trueBlock = Nucleus::createBasicBlock();
+               BasicBlock *falseBlock = Nucleus::createBasicBlock();
 
                Bool notAllFalse = SignMask(condition) != 0;
 
@@ -1361,9 +1421,9 @@ namespace sw
                        increment[loopDepth] = 1;
                }
 
-               llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
-               llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
-               llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
+               BasicBlock *loopBlock = Nucleus::createBasicBlock();
+               BasicBlock *testBlock = Nucleus::createBasicBlock();
+               BasicBlock *endBlock = Nucleus::createBasicBlock();
 
                loopRepTestBlock[loopRepDepth] = testBlock;
                loopRepEndBlock[loopRepDepth] = endBlock;
@@ -1388,9 +1448,9 @@ namespace sw
                iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData,vs.i[integerRegister.index][0]));
                aL[loopDepth] = aL[loopDepth - 1];
 
-               llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
-               llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
-               llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
+               BasicBlock *loopBlock = Nucleus::createBasicBlock();
+               BasicBlock *testBlock = Nucleus::createBasicBlock();
+               BasicBlock *endBlock = Nucleus::createBasicBlock();
 
                loopRepTestBlock[loopRepDepth] = testBlock;
                loopRepEndBlock[loopRepDepth] = endBlock;
@@ -1412,9 +1472,9 @@ namespace sw
        {
                enableIndex++;
 
-               llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
-               llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
-               llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
+               BasicBlock *loopBlock = Nucleus::createBasicBlock();
+               BasicBlock *testBlock = Nucleus::createBasicBlock();
+               BasicBlock *endBlock = Nucleus::createBasicBlock();
 
                loopRepTestBlock[loopRepDepth] = testBlock;
                loopRepEndBlock[loopRepDepth] = endBlock;
@@ -1427,9 +1487,10 @@ namespace sw
                Nucleus::setInsertBlock(testBlock);
                enableContinue = restoreContinue;
 
-               const Vector4f &src = fetchRegisterF(temporaryRegister);
+               const Vector4f &src = fetchRegister(temporaryRegister);
                Int4 condition = As<Int4>(src.x);
                condition &= enableStack[enableIndex - 1];
+               if(shader->containsLeaveInstruction()) condition &= enableLeave;
                enableStack[enableIndex] = condition;
 
                Bool notAllFalse = SignMask(condition) != 0;
@@ -1444,6 +1505,20 @@ namespace sw
                breakDepth = 0;
        }
 
+       void VertexProgram::SWITCH()
+       {
+               enableIndex++;
+               enableStack[enableIndex] = Int4(0xFFFFFFFF);
+
+               BasicBlock *endBlock = Nucleus::createBasicBlock();
+
+               loopRepTestBlock[loopRepDepth] = nullptr;
+               loopRepEndBlock[loopRepDepth] = endBlock;
+
+               loopRepDepth++;
+               breakDepth = 0;
+       }
+
        void VertexProgram::RET()
        {
                if(currentLabel == -1)
@@ -1453,19 +1528,19 @@ namespace sw
                }
                else
                {
-                       llvm::BasicBlock *unreachableBlock = Nucleus::createBasicBlock();
+                       BasicBlock *unreachableBlock = Nucleus::createBasicBlock();
 
                        if(callRetBlock[currentLabel].size() > 1)   // Pop the return destination from the call stack
                        {
                                // FIXME: Encapsulate
                                UInt index = callStack[--stackIndex];
 
-                               llvm::Value *value = index.loadValue();
-                               llvm::Value *switchInst = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size());
+                               Value *value = index.loadValue();
+                               SwitchCases *switchCases = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size());
 
                                for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++)
                                {
-                                       Nucleus::addSwitchCase(switchInst, i, callRetBlock[currentLabel][i]);
+                                       Nucleus::addSwitchCase(switchCases, i, callRetBlock[currentLabel][i]);
                                }
                        }
                        else if(callRetBlock[currentLabel].size() == 1)   // Jump directly to the unique return destination
@@ -1492,92 +1567,82 @@ namespace sw
 
        void VertexProgram::TEXLDL(Vector4f &dst, Vector4f &src0, const Src &src1)
        {
-               Vector4f tmp;
-               sampleTexture(tmp, src1, src0.x, src0.y, src0.z, src0.w);
-
-               dst.x = tmp[(src1.swizzle >> 0) & 0x3];
-               dst.y = tmp[(src1.swizzle >> 2) & 0x3];
-               dst.z = tmp[(src1.swizzle >> 4) & 0x3];
-               dst.w = tmp[(src1.swizzle >> 6) & 0x3];
+               sampleTexture(dst, src1, src0, a0, a0, src0, Lod);
        }
 
        void VertexProgram::TEX(Vector4f &dst, Vector4f &src0, const Src &src1)
        {
-               Float4 lod = Float4(0.0f);
-               Vector4f tmp;
-               sampleTexture(tmp, src1, src0.x, src0.y, src0.z, lod);
-
-               dst.x = tmp[(src1.swizzle >> 0) & 0x3];
-               dst.y = tmp[(src1.swizzle >> 2) & 0x3];
-               dst.z = tmp[(src1.swizzle >> 4) & 0x3];
-               dst.w = tmp[(src1.swizzle >> 6) & 0x3];
+               src0.w = Float(0.0f);
+               sampleTexture(dst, src1, src0, a0, a0, src0, Lod);
        }
 
-       void VertexProgram::TEXOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3)
+       void VertexProgram::TEXOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2)
        {
-               UNIMPLEMENTED();
+               src0.w = Float(0.0f);
+               sampleTexture(dst, src1, src0, a0, a0, src2, {Lod, Offset});
        }
 
-       void VertexProgram::TEXLDL(Vector4f &dst, Vector4f &src, const Src&, Vector4f &offset)
+       void VertexProgram::TEXLDL(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset)
        {
-               UNIMPLEMENTED();
+               sampleTexture(dst, src1, src0, a0, a0, offset, {Lod, Offset});
        }
 
-       void VertexProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2)
+       void VertexProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1)
        {
-               UNIMPLEMENTED();
+               sampleTexture(dst, src1, src0, src0, src0, src0, Fetch);
        }
 
-       void VertexProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &offset)
+       void VertexProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset)
        {
-               UNIMPLEMENTED();
+               sampleTexture(dst, src1, src0, src0, src0, offset, {Fetch, Offset});
        }
 
        void VertexProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3)
        {
-               UNIMPLEMENTED();
+               sampleTexture(dst, src1, src0, src2, src3, src0, Grad);
        }
 
        void VertexProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3, Vector4f &offset)
        {
-               UNIMPLEMENTED();
+               sampleTexture(dst, src1, src0, src2, src3, offset, {Grad, Offset});
        }
 
        void VertexProgram::TEXSIZE(Vector4f &dst, Float4 &lod, const Src &src1)
        {
-               Pointer<Byte> textureMipmap = data + OFFSET(DrawData, mipmap[16]) + src1.index * sizeof(Texture) + OFFSET(Texture, mipmap);
-               for(int i = 0; i < 4; ++i)
-               {
-                       Pointer<Byte> mipmap = textureMipmap + (As<Int>(Extract(lod, i)) + Int(1)) * sizeof(Mipmap);
-                       dst.x = Insert(dst.x, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, width)))), i);
-                       dst.y = Insert(dst.y, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, height)))), i);
-                       dst.z = Insert(dst.z, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, depth)))), i);
-               }
+               Pointer<Byte> texture = data + OFFSET(DrawData, mipmap[16]) + src1.index * sizeof(Texture);
+               sampler[src1.index]->textureSize(texture, dst, lod);
        }
 
-       void VertexProgram::sampleTexture(Vector4f &c, const Src &s, Float4 &u, Float4 &v, Float4 &w, Float4 &q)
+       void VertexProgram::sampleTexture(Vector4f &c, const Src &s, Vector4f &uvwq, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
        {
+               Vector4f tmp;
+
                if(s.type == Shader::PARAMETER_SAMPLER && s.rel.type == Shader::PARAMETER_VOID)
                {
-                       Pointer<Byte> texture = data + OFFSET(DrawData,mipmap[16]) + s.index * sizeof(Texture);
-                       sampler[s.index]->sampleTexture(texture, c, u, v, w, q, a0, a0, false, false, true);
+                       Pointer<Byte> texture = data + OFFSET(DrawData, mipmap[TEXTURE_IMAGE_UNITS]) + s.index * sizeof(Texture);
+                       sampler[s.index]->sampleTexture(texture, tmp, uvwq.x, uvwq.y, uvwq.z, uvwq.w, dsx, dsy, offset, function);
                }
                else
                {
-                       Int index = As<Int>(Float(fetchRegisterF(s).x.x));
+                       Int index = As<Int>(Float(fetchRegister(s).x.x));
 
-                       for(int i = 0; i < 16; i++)
+                       for(int i = 0; i < VERTEX_TEXTURE_IMAGE_UNITS; i++)
                        {
                                if(shader->usesSampler(i))
                                {
                                        If(index == i)
                                        {
-                                               Pointer<Byte> texture = data + OFFSET(DrawData,mipmap[16]) + i * sizeof(Texture);
-                                               sampler[i]->sampleTexture(texture, c, u, v, w, q, a0, a0, false, false, true);
+                                               Pointer<Byte> texture = data + OFFSET(DrawData, mipmap[TEXTURE_IMAGE_UNITS]) + i * sizeof(Texture);
+                                               sampler[i]->sampleTexture(texture, tmp, uvwq.x, uvwq.y, uvwq.z, uvwq.w, dsx, dsy, offset, function);
                                                // FIXME: When the sampler states are the same, we could use one sampler and just index the texture
                                        }
                                }
                        }
                }
+
+               c.x = tmp[(s.swizzle >> 0) & 0x3];
+               c.y = tmp[(s.swizzle >> 2) & 0x3];
+               c.z = tmp[(s.swizzle >> 4) & 0x3];
+               c.w = tmp[(s.swizzle >> 6) & 0x3];
        }
 }