OSDN Git Service

Implement gather/scatter operations for shader register files.
authorNicolas Capens <capn@google.com>
Mon, 28 May 2018 17:18:59 +0000 (13:18 -0400)
committerNicolas Capens <nicolascapens@google.com>
Tue, 29 May 2018 13:59:43 +0000 (13:59 +0000)
This allows to address the registers with a vector of indices.

Also rename 'dynamic' register files to 'indirect addressable', to
disambiguate from 'dynamic indexing' at the shader level. Indexing with
a uniform does not require gather/scatter operations, but does require
indirect addressing.

Bug chromium:845103
Bug skia:7846

Change-Id: I3c42be33def66328688f2900c61c80246bf1e584
Reviewed-on: https://swiftshader-review.googlesource.com/18989
Tested-by: Nicolas Capens <nicolascapens@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
src/Shader/PixelProgram.hpp
src/Shader/PixelRoutine.cpp
src/Shader/PixelShader.cpp
src/Shader/Shader.cpp
src/Shader/Shader.hpp
src/Shader/ShaderCore.cpp
src/Shader/ShaderCore.hpp
src/Shader/VertexProgram.cpp
src/Shader/VertexRoutine.cpp
src/Shader/VertexShader.cpp

index 1f60bde..ef6c2c0 100644 (file)
@@ -24,7 +24,7 @@ namespace sw
        {
        public:
                PixelProgram(const PixelProcessor::State &state, const PixelShader *shader) :
-                       PixelRoutine(state, shader), r(shader->dynamicallyIndexedTemporaries),
+                       PixelRoutine(state, shader), r(shader->indirectAddressableTemporaries),
                        loopDepth(-1), ifDepth(0), loopRepDepth(0), currentLabel(-1), whileTest(false)
                {
                        for(int i = 0; i < 2048; ++i)
index 1c300b0..146e42d 100644 (file)
@@ -29,7 +29,8 @@ namespace sw
        extern bool exactColorRounding;
        extern bool forceClearRegisters;
 
-       PixelRoutine::PixelRoutine(const PixelProcessor::State &state, const PixelShader *shader) : QuadRasterizer(state, shader), v(shader && shader->dynamicallyIndexedInput)
+       PixelRoutine::PixelRoutine(const PixelProcessor::State &state, const PixelShader *shader)
+               : QuadRasterizer(state, shader), v(shader && shader->indirectAddressableInput)
        {
                if(!shader || shader->getShaderModel() < 0x0200 || forceClearRegisters)
                {
index 9e281d9..d24e7c2 100644 (file)
@@ -160,7 +160,7 @@ namespace sw
                analyzeDynamicBranching();
                analyzeSamplers();
                analyzeCallSites();
-               analyzeDynamicIndexing();
+               analyzeIndirectAddressing();
        }
 
        void PixelShader::analyzeZOverride()
index 6874051..36192c9 100644 (file)
@@ -1890,40 +1890,34 @@ namespace sw
                }
        }
 
-       void Shader::analyzeDynamicIndexing()
+       void Shader::analyzeIndirectAddressing()
        {
-               dynamicallyIndexedTemporaries = false;
-               dynamicallyIndexedInput = false;
-               dynamicallyIndexedOutput = false;
+               indirectAddressableTemporaries = false;
+               indirectAddressableInput = false;
+               indirectAddressableOutput = false;
 
                for(const auto &inst : instruction)
                {
-                       if(inst->dst.rel.type == PARAMETER_ADDR ||
-                          inst->dst.rel.type == PARAMETER_LOOP ||
-                          inst->dst.rel.type == PARAMETER_TEMP ||
-                          inst->dst.rel.type == PARAMETER_CONST)
+                       if(inst->dst.rel.type != PARAMETER_VOID)
                        {
                                switch(inst->dst.type)
                                {
-                               case PARAMETER_TEMP:   dynamicallyIndexedTemporaries = true; break;
-                               case PARAMETER_INPUT:  dynamicallyIndexedInput = true;       break;
-                               case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true;      break;
+                               case PARAMETER_TEMP:   indirectAddressableTemporaries = true; break;
+                               case PARAMETER_INPUT:  indirectAddressableInput = true;       break;
+                               case PARAMETER_OUTPUT: indirectAddressableOutput = true;      break;
                                default: break;
                                }
                        }
 
                        for(int j = 0; j < 3; j++)
                        {
-                               if(inst->src[j].rel.type == PARAMETER_ADDR ||
-                                  inst->src[j].rel.type == PARAMETER_LOOP ||
-                                  inst->src[j].rel.type == PARAMETER_TEMP ||
-                                  inst->src[j].rel.type == PARAMETER_CONST)
+                               if(inst->src[j].rel.type != PARAMETER_VOID)
                                {
                                        switch(inst->src[j].type)
                                        {
-                                       case PARAMETER_TEMP:   dynamicallyIndexedTemporaries = true; break;
-                                       case PARAMETER_INPUT:  dynamicallyIndexedInput = true;       break;
-                                       case PARAMETER_OUTPUT: dynamicallyIndexedOutput = true;      break;
+                                       case PARAMETER_TEMP:   indirectAddressableTemporaries = true; break;
+                                       case PARAMETER_INPUT:  indirectAddressableInput = true;       break;
+                                       case PARAMETER_OUTPUT: indirectAddressableOutput = true;      break;
                                        default: break;
                                        }
                                }
index 6755cd4..6d431f5 100644 (file)
@@ -612,9 +612,9 @@ namespace sw
                unsigned int dirtyConstantsI;
                unsigned int dirtyConstantsB;
 
-               bool dynamicallyIndexedTemporaries;
-               bool dynamicallyIndexedInput;
-               bool dynamicallyIndexedOutput;
+               bool indirectAddressableTemporaries;
+               bool indirectAddressableInput;
+               bool indirectAddressableOutput;
 
        protected:
                void parse(const unsigned long *token);
@@ -627,7 +627,7 @@ namespace sw
                void analyzeDynamicBranching();
                void analyzeSamplers();
                void analyzeCallSites();
-               void analyzeDynamicIndexing();
+               void analyzeIndirectAddressing();
                void markFunctionAnalysis(unsigned int functionLabel, Analysis flag);
 
                ShaderType shaderType;
index 338605c..4ea3260 100644 (file)
@@ -560,6 +560,100 @@ namespace sw
                }
        }
 
+       const Vector4f RegisterFile::operator[](RValue<Int4> index)
+       {
+               ASSERT(indirectAddressable);
+
+               Int index0 = Extract(index, 0);
+               Int index1 = Extract(index, 1);
+               Int index2 = Extract(index, 2);
+               Int index3 = Extract(index, 3);
+
+               Vector4f r;
+
+               r.x.x = Extract(x[0][index0], 0);
+               r.x.y = Extract(x[0][index1], 1);
+               r.x.z = Extract(x[0][index2], 2);
+               r.x.w = Extract(x[0][index3], 3);
+
+               r.y.x = Extract(y[0][index0], 0);
+               r.y.y = Extract(y[0][index1], 1);
+               r.y.z = Extract(y[0][index2], 2);
+               r.y.w = Extract(y[0][index3], 3);
+
+               r.z.x = Extract(z[0][index0], 0);
+               r.z.y = Extract(z[0][index1], 1);
+               r.z.z = Extract(z[0][index2], 2);
+               r.z.w = Extract(z[0][index3], 3);
+
+               r.w.x = Extract(w[0][index0], 0);
+               r.w.y = Extract(w[0][index1], 1);
+               r.w.z = Extract(w[0][index2], 2);
+               r.w.w = Extract(w[0][index3], 3);
+
+               return r;
+       }
+
+       void RegisterFile::scatter_x(Int4 index, RValue<Float4> r)
+       {
+               ASSERT(indirectAddressable);
+
+               Int index0 = Extract(index, 0);
+               Int index1 = Extract(index, 1);
+               Int index2 = Extract(index, 2);
+               Int index3 = Extract(index, 3);
+
+               x[0][index0] = Insert(x[0][index0], Extract(r, 0), 0);
+               x[0][index1] = Insert(x[0][index1], Extract(r, 1), 1);
+               x[0][index2] = Insert(x[0][index2], Extract(r, 2), 2);
+               x[0][index3] = Insert(x[0][index3], Extract(r, 3), 3);
+       }
+
+       void RegisterFile::scatter_y(Int4 index, RValue<Float4> r)
+       {
+               ASSERT(indirectAddressable);
+
+               Int index0 = Extract(index, 0);
+               Int index1 = Extract(index, 1);
+               Int index2 = Extract(index, 2);
+               Int index3 = Extract(index, 3);
+
+               y[0][index0] = Insert(y[0][index0], Extract(r, 0), 0);
+               y[0][index1] = Insert(y[0][index1], Extract(r, 1), 1);
+               y[0][index2] = Insert(y[0][index2], Extract(r, 2), 2);
+               y[0][index3] = Insert(y[0][index3], Extract(r, 3), 3);
+       }
+
+       void RegisterFile::scatter_z(Int4 index, RValue<Float4> r)
+       {
+               ASSERT(indirectAddressable);
+
+               Int index0 = Extract(index, 0);
+               Int index1 = Extract(index, 1);
+               Int index2 = Extract(index, 2);
+               Int index3 = Extract(index, 3);
+
+               z[0][index0] = Insert(z[0][index0], Extract(r, 0), 0);
+               z[0][index1] = Insert(z[0][index1], Extract(r, 1), 1);
+               z[0][index2] = Insert(z[0][index2], Extract(r, 2), 2);
+               z[0][index3] = Insert(z[0][index3], Extract(r, 3), 3);
+       }
+
+       void RegisterFile::scatter_w(Int4 index, RValue<Float4> r)
+       {
+               ASSERT(indirectAddressable);
+
+               Int index0 = Extract(index, 0);
+               Int index1 = Extract(index, 1);
+               Int index2 = Extract(index, 2);
+               Int index3 = Extract(index, 3);
+
+               w[0][index0] = Insert(w[0][index0], Extract(r, 0), 0);
+               w[0][index1] = Insert(w[0][index1], Extract(r, 1), 1);
+               w[0][index2] = Insert(w[0][index2], Extract(r, 2), 2);
+               w[0][index3] = Insert(w[0][index3], Extract(r, 3), 3);
+       }
+
        void ShaderCore::mov(Vector4f &dst, const Vector4f &src, bool integerDestination)
        {
                if(integerDestination)
index 249e058..4dc109f 100644 (file)
@@ -147,31 +147,30 @@ namespace sw
                Reference<Float4> w;
        };
 
-       template<int S, bool D = false>
-       class RegisterArray
+       class RegisterFile
        {
        public:
-               RegisterArray(bool dynamic = D) : dynamic(dynamic)
+               RegisterFile(int size, bool indirectAddressable) : size(size), indirectAddressable(indirectAddressable)
                {
-                       if(dynamic)
+                       if(indirectAddressable)
                        {
-                               x = new Array<Float4>(S);
-                               y = new Array<Float4>(S);
-                               z = new Array<Float4>(S);
-                               w = new Array<Float4>(S);
+                               x = new Array<Float4>(size);
+                               y = new Array<Float4>(size);
+                               z = new Array<Float4>(size);
+                               w = new Array<Float4>(size);
                        }
                        else
                        {
-                               x = new Array<Float4>[S];
-                               y = new Array<Float4>[S];
-                               z = new Array<Float4>[S];
-                               w = new Array<Float4>[S];
+                               x = new Array<Float4>[size];
+                               y = new Array<Float4>[size];
+                               z = new Array<Float4>[size];
+                               w = new Array<Float4>[size];
                        }
                }
 
-               ~RegisterArray()
+               ~RegisterFile()
                {
-                       if(dynamic)
+                       if(indirectAddressable)
                        {
                                delete x;
                                delete y;
@@ -189,7 +188,7 @@ namespace sw
 
                Register operator[](int i)
                {
-                       if(dynamic)
+                       if(indirectAddressable)
                        {
                                return Register(x[0][i], y[0][i], z[0][i], w[0][i]);
                        }
@@ -201,19 +200,36 @@ namespace sw
 
                Register operator[](RValue<Int> i)
                {
-                       ASSERT(dynamic);
+                       ASSERT(indirectAddressable);
 
                        return Register(x[0][i], y[0][i], z[0][i], w[0][i]);
                }
 
-       private:
-               const bool dynamic;
+               const Vector4f operator[](RValue<Int4> i);   // Gather operation (read only).
+
+               void scatter_x(Int4 i, RValue<Float4> r);
+               void scatter_y(Int4 i, RValue<Float4> r);
+               void scatter_z(Int4 i, RValue<Float4> r);
+               void scatter_w(Int4 i, RValue<Float4> r);
+
+       protected:
+               const int size;
+               const bool indirectAddressable;
                Array<Float4> *x;
                Array<Float4> *y;
                Array<Float4> *z;
                Array<Float4> *w;
        };
 
+       template<int S, bool I = false>
+       class RegisterArray : public RegisterFile
+       {
+       public:
+               RegisterArray(bool indirectAddressable = I) : RegisterFile(S, indirectAddressable)
+               {
+               }
+       };
+
        class ShaderCore
        {
                typedef Shader::Control Control;
index 4f8ba1a..8dbd600 100644 (file)
@@ -24,7 +24,7 @@
 namespace sw
 {
        VertexProgram::VertexProgram(const VertexProcessor::State &state, const VertexShader *shader)
-               : VertexRoutine(state, shader), shader(shader), r(shader->dynamicallyIndexedTemporaries)
+               : VertexRoutine(state, shader), shader(shader), r(shader->indirectAddressableTemporaries)
        {
                ifDepth = 0;
                loopRepDepth = 0;
index 2d7c2c6..9b8d336 100644 (file)
@@ -27,8 +27,8 @@ namespace sw
        extern bool symmetricNormalizedDepth;   // [-1, 1] instead of [0, 1]
 
        VertexRoutine::VertexRoutine(const VertexProcessor::State &state, const VertexShader *shader)
-               : v(shader && shader->dynamicallyIndexedInput),
-                 o(shader && shader->dynamicallyIndexedOutput),
+               : v(shader && shader->indirectAddressableInput),
+                 o(shader && shader->indirectAddressableOutput),
                  state(state)
        {
        }
index 33c2241..8f1c4f8 100644 (file)
@@ -176,7 +176,7 @@ namespace sw
                setOutput(posReg, 4, sw::Shader::Semantic(sw::Shader::USAGE_POSITION, 0));
                positionRegister = posReg;
        }
-       
+
        void VertexShader::setPointSizeRegister(int ptSizeReg)
        {
                setOutput(ptSizeReg, 4, sw::Shader::Semantic(sw::Shader::USAGE_PSIZE, 0));
@@ -207,7 +207,7 @@ namespace sw
                analyzeDynamicBranching();
                analyzeSamplers();
                analyzeCallSites();
-               analyzeDynamicIndexing();
+               analyzeIndirectAddressing();
        }
 
        void VertexShader::analyzeInput()