From f1d728411726fd0bc5baf9746e838f609900836e Mon Sep 17 00:00:00 2001 From: Tim Rowley Date: Thu, 2 Mar 2017 16:41:02 -0600 Subject: [PATCH] swr: implement geometry shaders Reviewed-by: Bruce Cherniak --- src/gallium/drivers/swr/swr_context.cpp | 2 +- src/gallium/drivers/swr/swr_context.h | 26 +- src/gallium/drivers/swr/swr_draw.cpp | 50 +-- src/gallium/drivers/swr/swr_fence_work.cpp | 21 ++ src/gallium/drivers/swr/swr_fence_work.h | 3 + src/gallium/drivers/swr/swr_scratch.cpp | 1 + src/gallium/drivers/swr/swr_scratch.h | 1 + src/gallium/drivers/swr/swr_screen.cpp | 6 +- src/gallium/drivers/swr/swr_shader.cpp | 475 ++++++++++++++++++++++++++++- src/gallium/drivers/swr/swr_shader.h | 22 ++ src/gallium/drivers/swr/swr_state.cpp | 100 +++++- src/gallium/drivers/swr/swr_state.h | 50 +++ src/gallium/drivers/swr/swr_tex_sample.cpp | 6 + 13 files changed, 700 insertions(+), 63 deletions(-) diff --git a/src/gallium/drivers/swr/swr_context.cpp b/src/gallium/drivers/swr/swr_context.cpp index b89ce1ba621..1c98ac2de34 100644 --- a/src/gallium/drivers/swr/swr_context.cpp +++ b/src/gallium/drivers/swr/swr_context.cpp @@ -307,7 +307,7 @@ swr_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info) util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vertex_buffer); util_blitter_save_vertex_elements(ctx->blitter, (void *)ctx->velems); util_blitter_save_vertex_shader(ctx->blitter, (void *)ctx->vs); - /*util_blitter_save_geometry_shader(ctx->blitter, (void*)ctx->gs);*/ + util_blitter_save_geometry_shader(ctx->blitter, (void*)ctx->gs); util_blitter_save_so_targets( ctx->blitter, ctx->num_so_targets, diff --git a/src/gallium/drivers/swr/swr_context.h b/src/gallium/drivers/swr/swr_context.h index 04e11fe78a8..46ca6112f91 100644 --- a/src/gallium/drivers/swr/swr_context.h +++ b/src/gallium/drivers/swr/swr_context.h @@ -40,16 +40,17 @@ #define SWR_NEW_SAMPLER_VIEW (1 << 4) #define SWR_NEW_VS (1 << 5) #define SWR_NEW_FS (1 << 6) -#define SWR_NEW_VSCONSTANTS (1 << 7) -#define SWR_NEW_FSCONSTANTS (1 << 8) -#define SWR_NEW_VERTEX (1 << 9) -#define SWR_NEW_STIPPLE (1 << 10) -#define SWR_NEW_SCISSOR (1 << 11) -#define SWR_NEW_VIEWPORT (1 << 12) -#define SWR_NEW_FRAMEBUFFER (1 << 13) -#define SWR_NEW_CLIP (1 << 14) -#define SWR_NEW_SO (1 << 15) -#define SWR_NEW_ALL 0x0000ffff +#define SWR_NEW_GS (1 << 7) +#define SWR_NEW_VSCONSTANTS (1 << 8) +#define SWR_NEW_FSCONSTANTS (1 << 9) +#define SWR_NEW_GSCONSTANTS (1 << 10) +#define SWR_NEW_VERTEX (1 << 11) +#define SWR_NEW_STIPPLE (1 << 12) +#define SWR_NEW_SCISSOR (1 << 13) +#define SWR_NEW_VIEWPORT (1 << 14) +#define SWR_NEW_FRAMEBUFFER (1 << 15) +#define SWR_NEW_CLIP (1 << 16) +#define SWR_NEW_SO (1 << 17) namespace std { @@ -85,11 +86,15 @@ struct swr_draw_context { uint32_t num_constantsVS[PIPE_MAX_CONSTANT_BUFFERS]; const float *constantFS[PIPE_MAX_CONSTANT_BUFFERS]; uint32_t num_constantsFS[PIPE_MAX_CONSTANT_BUFFERS]; + const float *constantGS[PIPE_MAX_CONSTANT_BUFFERS]; + uint32_t num_constantsGS[PIPE_MAX_CONSTANT_BUFFERS]; swr_jit_texture texturesVS[PIPE_MAX_SHADER_SAMPLER_VIEWS]; swr_jit_sampler samplersVS[PIPE_MAX_SAMPLERS]; swr_jit_texture texturesFS[PIPE_MAX_SHADER_SAMPLER_VIEWS]; swr_jit_sampler samplersFS[PIPE_MAX_SAMPLERS]; + swr_jit_texture texturesGS[PIPE_MAX_SHADER_SAMPLER_VIEWS]; + swr_jit_sampler samplersGS[PIPE_MAX_SAMPLERS]; float userClipPlanes[PIPE_MAX_CLIP_PLANES][4]; @@ -112,6 +117,7 @@ struct swr_context { struct swr_vertex_shader *vs; struct swr_fragment_shader *fs; + struct swr_geometry_shader *gs; struct swr_vertex_element_state *velems; /** Other rendering state */ diff --git a/src/gallium/drivers/swr/swr_draw.cpp b/src/gallium/drivers/swr/swr_draw.cpp index f764efee19c..c43f4a5f64d 100644 --- a/src/gallium/drivers/swr/swr_draw.cpp +++ b/src/gallium/drivers/swr/swr_draw.cpp @@ -32,48 +32,6 @@ #include "util/u_prim.h" /* - * Convert mesa PIPE_PRIM_X to SWR enum PRIMITIVE_TOPOLOGY - */ -static INLINE enum PRIMITIVE_TOPOLOGY -swr_convert_prim_topology(const unsigned mode) -{ - switch (mode) { - case PIPE_PRIM_POINTS: - return TOP_POINT_LIST; - case PIPE_PRIM_LINES: - return TOP_LINE_LIST; - case PIPE_PRIM_LINE_LOOP: - return TOP_LINE_LOOP; - case PIPE_PRIM_LINE_STRIP: - return TOP_LINE_STRIP; - case PIPE_PRIM_TRIANGLES: - return TOP_TRIANGLE_LIST; - case PIPE_PRIM_TRIANGLE_STRIP: - return TOP_TRIANGLE_STRIP; - case PIPE_PRIM_TRIANGLE_FAN: - return TOP_TRIANGLE_FAN; - case PIPE_PRIM_QUADS: - return TOP_QUAD_LIST; - case PIPE_PRIM_QUAD_STRIP: - return TOP_QUAD_STRIP; - case PIPE_PRIM_POLYGON: - return TOP_TRIANGLE_FAN; /* XXX TOP_POLYGON; */ - case PIPE_PRIM_LINES_ADJACENCY: - return TOP_LINE_LIST_ADJ; - case PIPE_PRIM_LINE_STRIP_ADJACENCY: - return TOP_LISTSTRIP_ADJ; - case PIPE_PRIM_TRIANGLES_ADJACENCY: - return TOP_TRI_LIST_ADJ; - case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: - return TOP_TRI_STRIP_ADJ; - default: - assert(0 && "Unknown topology"); - return TOP_UNKNOWN; - } -}; - - -/* * Draw vertex arrays, with optional indexing, optional instancing. */ static void @@ -171,7 +129,13 @@ swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) feState.provokingVertex = {2, 1, 2}; } - switch (info->mode) { + enum pipe_prim_type topology; + if (ctx->gs) + topology = (pipe_prim_type)ctx->gs->info.base.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]; + else + topology = info->mode; + + switch (topology) { case PIPE_PRIM_TRIANGLE_FAN: feState.topologyProvokingVertex = feState.provokingVertex.triFan; break; diff --git a/src/gallium/drivers/swr/swr_fence_work.cpp b/src/gallium/drivers/swr/swr_fence_work.cpp index 1fd2a834afc..594ebaac859 100644 --- a/src/gallium/drivers/swr/swr_fence_work.cpp +++ b/src/gallium/drivers/swr/swr_fence_work.cpp @@ -99,6 +99,12 @@ swr_delete_fs_cb(struct swr_fence_work *work) delete work->free.swr_fs; } +static void +swr_delete_gs_cb(struct swr_fence_work *work) +{ + delete work->free.swr_gs; +} + bool swr_fence_work_free(struct pipe_fence_handle *fence, void *data, bool aligned_free) @@ -146,3 +152,18 @@ swr_fence_work_delete_fs(struct pipe_fence_handle *fence, return true; } + +bool +swr_fence_work_delete_gs(struct pipe_fence_handle *fence, + struct swr_geometry_shader *swr_gs) +{ + struct swr_fence_work *work = CALLOC_STRUCT(swr_fence_work); + if (!work) + return false; + work->callback = swr_delete_gs_cb; + work->free.swr_gs = swr_gs; + + swr_add_fence_work(fence, work); + + return true; +} diff --git a/src/gallium/drivers/swr/swr_fence_work.h b/src/gallium/drivers/swr/swr_fence_work.h index 12403605305..a1e72fd20aa 100644 --- a/src/gallium/drivers/swr/swr_fence_work.h +++ b/src/gallium/drivers/swr/swr_fence_work.h @@ -31,6 +31,7 @@ struct swr_fence_work { void *data; struct swr_vertex_shader *swr_vs; struct swr_fragment_shader *swr_fs; + struct swr_geometry_shader *swr_gs; } free; struct swr_fence_work *next; @@ -44,4 +45,6 @@ bool swr_fence_work_delete_vs(struct pipe_fence_handle *fence, struct swr_vertex_shader *swr_vs); bool swr_fence_work_delete_fs(struct pipe_fence_handle *fence, struct swr_fragment_shader *swr_vs); +bool swr_fence_work_delete_gs(struct pipe_fence_handle *fence, + struct swr_geometry_shader *swr_gs); #endif diff --git a/src/gallium/drivers/swr/swr_scratch.cpp b/src/gallium/drivers/swr/swr_scratch.cpp index 58d18d04fab..db095dea7e3 100644 --- a/src/gallium/drivers/swr/swr_scratch.cpp +++ b/src/gallium/drivers/swr/swr_scratch.cpp @@ -99,6 +99,7 @@ swr_destroy_scratch_buffers(struct swr_context *ctx) if (scratch) { AlignedFree(scratch->vs_constants.base); AlignedFree(scratch->fs_constants.base); + AlignedFree(scratch->gs_constants.base); AlignedFree(scratch->vertex_buffer.base); AlignedFree(scratch->index_buffer.base); FREE(scratch); diff --git a/src/gallium/drivers/swr/swr_scratch.h b/src/gallium/drivers/swr/swr_scratch.h index 74218d63644..79c9b7aad11 100644 --- a/src/gallium/drivers/swr/swr_scratch.h +++ b/src/gallium/drivers/swr/swr_scratch.h @@ -35,6 +35,7 @@ struct swr_scratch_space { struct swr_scratch_buffers { struct swr_scratch_space vs_constants; struct swr_scratch_space fs_constants; + struct swr_scratch_space gs_constants; struct swr_scratch_space vertex_buffer; struct swr_scratch_space index_buffer; }; diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp index f41020b8e80..0488a5b7683 100644 --- a/src/gallium/drivers/swr/swr_screen.cpp +++ b/src/gallium/drivers/swr/swr_screen.cpp @@ -347,10 +347,12 @@ swr_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_shader_cap param) { - if (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_FRAGMENT) + if (shader == PIPE_SHADER_VERTEX || + shader == PIPE_SHADER_FRAGMENT || + shader == PIPE_SHADER_GEOMETRY) return gallivm_get_shader_param(param); - // Todo: geometry, tesselation, compute + // Todo: tesselation, compute return 0; } diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp index 9169f6dc74e..09d8145a8f5 100644 --- a/src/gallium/drivers/swr/swr_shader.cpp +++ b/src/gallium/drivers/swr/swr_shader.cpp @@ -35,6 +35,7 @@ #include "tgsi/tgsi_strings.h" #include "util/u_format.h" +#include "util/u_prim.h" #include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_flow.h" #include "gallivm/lp_bld_struct.h" @@ -47,6 +48,7 @@ #include "swr_screen.h" using namespace SwrJit; +using namespace llvm; static unsigned locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info); @@ -66,6 +68,11 @@ bool operator==(const swr_jit_fetch_key &lhs, const swr_jit_fetch_key &rhs) return !memcmp(&lhs, &rhs, sizeof(lhs)); } +bool operator==(const swr_jit_gs_key &lhs, const swr_jit_gs_key &rhs) +{ + return !memcmp(&lhs, &rhs, sizeof(lhs)); +} + static void swr_generate_sampler_key(const struct lp_tgsi_info &info, struct swr_context *ctx, @@ -137,11 +144,18 @@ swr_generate_fs_key(struct swr_jit_fs_key &key, key.nr_cbufs = ctx->framebuffer.nr_cbufs; key.light_twoside = ctx->rasterizer->light_twoside; key.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable; + + struct tgsi_shader_info *pPrevShader; + if (ctx->gs) + pPrevShader = &ctx->gs->info.base; + else + pPrevShader = &ctx->vs->info.base; + memcpy(&key.vs_output_semantic_name, - &ctx->vs->info.base.output_semantic_name, + &pPrevShader->output_semantic_name, sizeof(key.vs_output_semantic_name)); memcpy(&key.vs_output_semantic_idx, - &ctx->vs->info.base.output_semantic_index, + &pPrevShader->output_semantic_index, sizeof(key.vs_output_semantic_idx)); swr_generate_sampler_key(swr_fs->info, ctx, PIPE_SHADER_FRAGMENT, key); @@ -171,6 +185,25 @@ swr_generate_fetch_key(struct swr_jit_fetch_key &key, key.fsState = velems->fsState; } +void +swr_generate_gs_key(struct swr_jit_gs_key &key, + struct swr_context *ctx, + swr_geometry_shader *swr_gs) +{ + memset(&key, 0, sizeof(key)); + + struct tgsi_shader_info *pPrevShader = &ctx->vs->info.base; + + memcpy(&key.vs_output_semantic_name, + &pPrevShader->output_semantic_name, + sizeof(key.vs_output_semantic_name)); + memcpy(&key.vs_output_semantic_idx, + &pPrevShader->output_semantic_index, + sizeof(key.vs_output_semantic_idx)); + + swr_generate_sampler_key(swr_gs->info, ctx, PIPE_SHADER_GEOMETRY, key); +} + struct BuilderSWR : public Builder { BuilderSWR(JitManager *pJitMgr, const char *pName) : Builder(pJitMgr) @@ -187,8 +220,433 @@ struct BuilderSWR : public Builder { struct gallivm_state *gallivm; PFN_VERTEX_FUNC CompileVS(struct swr_context *ctx, swr_jit_vs_key &key); PFN_PIXEL_KERNEL CompileFS(struct swr_context *ctx, swr_jit_fs_key &key); + PFN_GS_FUNC CompileGS(struct swr_context *ctx, swr_jit_gs_key &key); + + LLVMValueRef + swr_gs_llvm_fetch_input(const struct lp_build_tgsi_gs_iface *gs_iface, + struct lp_build_tgsi_context * bld_base, + boolean is_vindex_indirect, + LLVMValueRef vertex_index, + boolean is_aindex_indirect, + LLVMValueRef attrib_index, + LLVMValueRef swizzle_index); + void + swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base, + struct lp_build_tgsi_context * bld_base, + LLVMValueRef (*outputs)[4], + LLVMValueRef emitted_vertices_vec); + + void + swr_gs_llvm_end_primitive(const struct lp_build_tgsi_gs_iface *gs_base, + struct lp_build_tgsi_context * bld_base, + LLVMValueRef verts_per_prim_vec, + LLVMValueRef emitted_prims_vec); + + void + swr_gs_llvm_epilogue(const struct lp_build_tgsi_gs_iface *gs_base, + struct lp_build_tgsi_context * bld_base, + LLVMValueRef total_emitted_vertices_vec, + LLVMValueRef emitted_prims_vec); + }; +struct swr_gs_llvm_iface { + struct lp_build_tgsi_gs_iface base; + struct tgsi_shader_info *info; + + BuilderSWR *pBuilder; + + Value *pGsCtx; + SWR_GS_STATE *pGsState; + uint32_t num_outputs; + uint32_t num_verts_per_prim; + + Value *pVtxAttribMap; +}; + +// trampoline functions so we can use the builder llvm construction methods +static LLVMValueRef +swr_gs_llvm_fetch_input(const struct lp_build_tgsi_gs_iface *gs_iface, + struct lp_build_tgsi_context * bld_base, + boolean is_vindex_indirect, + LLVMValueRef vertex_index, + boolean is_aindex_indirect, + LLVMValueRef attrib_index, + LLVMValueRef swizzle_index) +{ + swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_iface; + + return iface->pBuilder->swr_gs_llvm_fetch_input(gs_iface, bld_base, + is_vindex_indirect, + vertex_index, + is_aindex_indirect, + attrib_index, + swizzle_index); +} + +static void +swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base, + struct lp_build_tgsi_context * bld_base, + LLVMValueRef (*outputs)[4], + LLVMValueRef emitted_vertices_vec) +{ + swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base; + + iface->pBuilder->swr_gs_llvm_emit_vertex(gs_base, bld_base, + outputs, + emitted_vertices_vec); +} + +static void +swr_gs_llvm_end_primitive(const struct lp_build_tgsi_gs_iface *gs_base, + struct lp_build_tgsi_context * bld_base, + LLVMValueRef verts_per_prim_vec, + LLVMValueRef emitted_prims_vec) +{ + swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base; + + iface->pBuilder->swr_gs_llvm_end_primitive(gs_base, bld_base, + verts_per_prim_vec, + emitted_prims_vec); +} + +static void +swr_gs_llvm_epilogue(const struct lp_build_tgsi_gs_iface *gs_base, + struct lp_build_tgsi_context * bld_base, + LLVMValueRef total_emitted_vertices_vec, + LLVMValueRef emitted_prims_vec) +{ + swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base; + + iface->pBuilder->swr_gs_llvm_epilogue(gs_base, bld_base, + total_emitted_vertices_vec, + emitted_prims_vec); +} + +LLVMValueRef +BuilderSWR::swr_gs_llvm_fetch_input(const struct lp_build_tgsi_gs_iface *gs_iface, + struct lp_build_tgsi_context * bld_base, + boolean is_vindex_indirect, + LLVMValueRef vertex_index, + boolean is_aindex_indirect, + LLVMValueRef attrib_index, + LLVMValueRef swizzle_index) +{ + swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_iface; + + IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); + + assert(is_vindex_indirect == false && is_aindex_indirect == false); + + Value *attrib = + LOAD(GEP(iface->pVtxAttribMap, {C(0), unwrap(attrib_index)})); + + Value *pInput = + LOAD(GEP(iface->pGsCtx, + {C(0), + C(SWR_GS_CONTEXT_vert), + unwrap(vertex_index), + C(0), + attrib, + unwrap(swizzle_index)})); + + return wrap(pInput); +} + +void +BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base, + struct lp_build_tgsi_context * bld_base, + LLVMValueRef (*outputs)[4], + LLVMValueRef emitted_vertices_vec) +{ + swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base; + SWR_GS_STATE *pGS = iface->pGsState; + + IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); + + const uint32_t simdVertexStride = sizeof(simdvertex); + const uint32_t numSimdBatches = (pGS->maxNumVerts + 7) / 8; + const uint32_t inputPrimStride = numSimdBatches * simdVertexStride; + + Value *pStream = LOAD(iface->pGsCtx, { 0, SWR_GS_CONTEXT_pStream }); + Value *vMask = LOAD(iface->pGsCtx, { 0, SWR_GS_CONTEXT_mask }); + Value *vMask1 = TRUNC(vMask, VectorType::get(mInt1Ty, 8)); + + Value *vOffsets = C({ + inputPrimStride * 0, + inputPrimStride * 1, + inputPrimStride * 2, + inputPrimStride * 3, + inputPrimStride * 4, + inputPrimStride * 5, + inputPrimStride * 6, + inputPrimStride * 7 } ); + + Value *vVertexSlot = ASHR(unwrap(emitted_vertices_vec), 3); + Value *vSimdSlot = AND(unwrap(emitted_vertices_vec), 7); + + for (uint32_t attrib = 0; attrib < iface->num_outputs; ++attrib) { + uint32_t attribSlot = attrib; + if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE) + attribSlot = VERTEX_POINT_SIZE_SLOT; + else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_PRIMID) + attribSlot = VERTEX_PRIMID_SLOT; + + Value *vOffsetsAttrib = + ADD(vOffsets, MUL(vVertexSlot, VIMMED1((uint32_t)sizeof(simdvertex)))); + vOffsetsAttrib = + ADD(vOffsetsAttrib, VIMMED1((uint32_t)(attribSlot*sizeof(simdvector)))); + vOffsetsAttrib = + ADD(vOffsetsAttrib, MUL(vSimdSlot, VIMMED1((uint32_t)sizeof(float)))); + + for (uint32_t channel = 0; channel < 4; ++channel) { + Value *vData = LOAD(unwrap(outputs[attrib][channel])); + Value *vPtrs = GEP(pStream, vOffsetsAttrib); + + vPtrs = BITCAST(vPtrs, + VectorType::get(PointerType::get(mFP32Ty, 0), 8)); + + MASKED_SCATTER(vData, vPtrs, 32, vMask1); + + vOffsetsAttrib = + ADD(vOffsetsAttrib, VIMMED1((uint32_t)sizeof(simdscalar))); + } + } +} + +void +BuilderSWR::swr_gs_llvm_end_primitive(const struct lp_build_tgsi_gs_iface *gs_base, + struct lp_build_tgsi_context * bld_base, + LLVMValueRef verts_per_prim_vec, + LLVMValueRef emitted_prims_vec) +{ + swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base; + SWR_GS_STATE *pGS = iface->pGsState; + + IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); + + Value *pCutBuffer = + LOAD(iface->pGsCtx, {0, SWR_GS_CONTEXT_pCutOrStreamIdBuffer}); + Value *vMask = LOAD(iface->pGsCtx, { 0, SWR_GS_CONTEXT_mask }); + Value *vMask1 = TRUNC(vMask, VectorType::get(mInt1Ty, 8)); + + uint32_t vertsPerPrim = iface->num_verts_per_prim; + + Value *vCount = + ADD(MUL(unwrap(emitted_prims_vec), VIMMED1(vertsPerPrim)), + unwrap(verts_per_prim_vec)); + + struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); + vCount = LOAD(unwrap(bld->total_emitted_vertices_vec_ptr)); + + struct lp_exec_mask *exec_mask = &bld->exec_mask; + Value *mask = unwrap(lp_build_mask_value(bld->mask)); + if (exec_mask->has_mask) + mask = AND(mask, unwrap(exec_mask->exec_mask)); + + Value *cmpMask = VMASK(ICMP_NE(unwrap(verts_per_prim_vec), VIMMED1(0))); + mask = AND(mask, cmpMask); + vMask1 = TRUNC(mask, VectorType::get(mInt1Ty, 8)); + + const uint32_t cutPrimStride = + (pGS->maxNumVerts + JM()->mVWidth - 1) / JM()->mVWidth; + Value *vOffsets = C({ + (uint32_t)(cutPrimStride * 0), + (uint32_t)(cutPrimStride * 1), + (uint32_t)(cutPrimStride * 2), + (uint32_t)(cutPrimStride * 3), + (uint32_t)(cutPrimStride * 4), + (uint32_t)(cutPrimStride * 5), + (uint32_t)(cutPrimStride * 6), + (uint32_t)(cutPrimStride * 7) } ); + + vCount = SUB(vCount, VIMMED1(1)); + Value *vOffset = ADD(UDIV(vCount, VIMMED1(8)), vOffsets); + Value *vValue = SHL(VIMMED1(1), UREM(vCount, VIMMED1(8))); + + vValue = TRUNC(vValue, VectorType::get(mInt8Ty, 8)); + + Value *vPtrs = GEP(pCutBuffer, vOffset); + vPtrs = + BITCAST(vPtrs, VectorType::get(PointerType::get(mInt8Ty, 0), JM()->mVWidth)); + + Value *vGather = MASKED_GATHER(vPtrs, 32, vMask1); + vValue = OR(vGather, vValue); + MASKED_SCATTER(vValue, vPtrs, 32, vMask1); +} + +void +BuilderSWR::swr_gs_llvm_epilogue(const struct lp_build_tgsi_gs_iface *gs_base, + struct lp_build_tgsi_context * bld_base, + LLVMValueRef total_emitted_vertices_vec, + LLVMValueRef emitted_prims_vec) +{ + swr_gs_llvm_iface *iface = (swr_gs_llvm_iface*)gs_base; + SWR_GS_STATE *pGS = iface->pGsState; + + IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); + + STORE(unwrap(total_emitted_vertices_vec), iface->pGsCtx, {0, SWR_GS_CONTEXT_vertexCount}); +} + +PFN_GS_FUNC +BuilderSWR::CompileGS(struct swr_context *ctx, swr_jit_gs_key &key) +{ + SWR_GS_STATE *pGS = &ctx->gs->gsState; + struct tgsi_shader_info *info = &ctx->gs->info.base; + + pGS->gsEnable = true; + + pGS->numInputAttribs = info->num_inputs; + pGS->outputTopology = + swr_convert_prim_topology(info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]); + pGS->maxNumVerts = info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES]; + pGS->instanceCount = info->properties[TGSI_PROPERTY_GS_INVOCATIONS]; + + pGS->emitsRenderTargetArrayIndex = info->writes_layer; + pGS->emitsPrimitiveID = info->writes_primid; + pGS->emitsViewportArrayIndex = info->writes_viewport_index; + + // XXX: single stream for now... + pGS->isSingleStream = true; + pGS->singleStreamID = 0; + + struct swr_geometry_shader *gs = ctx->gs; + + LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; + LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; + + memset(outputs, 0, sizeof(outputs)); + + AttrBuilder attrBuilder; + attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float)); + AttributeSet attrSet = AttributeSet::get( + JM()->mContext, AttributeSet::FunctionIndex, attrBuilder); + + std::vector gsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0), + PointerType::get(Gen_SWR_GS_CONTEXT(JM()), 0)}; + FunctionType *vsFuncType = + FunctionType::get(Type::getVoidTy(JM()->mContext), gsArgs, false); + + // create new vertex shader function + auto pFunction = Function::Create(vsFuncType, + GlobalValue::ExternalLinkage, + "GS", + JM()->mpCurrentModule); + pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet); + + BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction); + IRB()->SetInsertPoint(block); + LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block)); + + auto argitr = pFunction->arg_begin(); + Value *hPrivateData = &*argitr++; + hPrivateData->setName("hPrivateData"); + Value *pGsCtx = &*argitr++; + pGsCtx->setName("gsCtx"); + + Value *consts_ptr = + GEP(hPrivateData, {C(0), C(swr_draw_context_constantGS)}); + consts_ptr->setName("gs_constants"); + Value *const_sizes_ptr = + GEP(hPrivateData, {0, swr_draw_context_num_constantsGS}); + const_sizes_ptr->setName("num_gs_constants"); + + struct lp_build_sampler_soa *sampler = + swr_sampler_soa_create(key.sampler, PIPE_SHADER_GEOMETRY); + + struct lp_bld_tgsi_system_values system_values; + memset(&system_values, 0, sizeof(system_values)); + system_values.prim_id = wrap(LOAD(pGsCtx, {0, SWR_GS_CONTEXT_PrimitiveID})); + system_values.instance_id = wrap(LOAD(pGsCtx, {0, SWR_GS_CONTEXT_InstanceID})); + + std::vector mapConstants; + Value *vtxAttribMap = ALLOCA(ArrayType::get(mInt32Ty, PIPE_MAX_SHADER_INPUTS)); + for (unsigned slot = 0; slot < info->num_inputs; slot++) { + ubyte semantic_name = info->input_semantic_name[slot]; + ubyte semantic_idx = info->input_semantic_index[slot]; + + unsigned vs_slot = + locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base) + 1; + + STORE(C(vs_slot), vtxAttribMap, {0, slot}); + mapConstants.push_back(C(vs_slot)); + } + + struct lp_build_mask_context mask; + Value *mask_val = LOAD(pGsCtx, {0, SWR_GS_CONTEXT_mask}, "gsMask"); + lp_build_mask_begin(&mask, gallivm, + lp_type_float_vec(32, 32 * 8), wrap(mask_val)); + + // zero out cut buffer so we can load/modify/store bits + MEMSET(LOAD(pGsCtx, {0, SWR_GS_CONTEXT_pCutOrStreamIdBuffer}), + C((char)0), + pGS->instanceCount * ((pGS->maxNumVerts + 7) / 8) * JM()->mVWidth, + sizeof(float) * KNOB_SIMD_WIDTH); + + struct swr_gs_llvm_iface gs_iface; + gs_iface.base.fetch_input = ::swr_gs_llvm_fetch_input; + gs_iface.base.emit_vertex = ::swr_gs_llvm_emit_vertex; + gs_iface.base.end_primitive = ::swr_gs_llvm_end_primitive; + gs_iface.base.gs_epilogue = ::swr_gs_llvm_epilogue; + gs_iface.pBuilder = this; + gs_iface.pGsCtx = pGsCtx; + gs_iface.pGsState = pGS; + gs_iface.num_outputs = gs->info.base.num_outputs; + gs_iface.num_verts_per_prim = + u_vertices_per_prim((pipe_prim_type)info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM]); + gs_iface.info = info; + gs_iface.pVtxAttribMap = vtxAttribMap; + + lp_build_tgsi_soa(gallivm, + gs->pipe.tokens, + lp_type_float_vec(32, 32 * 8), + &mask, + wrap(consts_ptr), + wrap(const_sizes_ptr), + &system_values, + inputs, + outputs, + wrap(hPrivateData), // (sampler context) + NULL, // thread data + sampler, + &gs->info.base, + &gs_iface.base); + + lp_build_mask_end(&mask); + + sampler->destroy(sampler); + + IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); + + RET_VOID(); + + gallivm_verify_function(gallivm, wrap(pFunction)); + gallivm_compile_module(gallivm); + + PFN_GS_FUNC pFunc = + (PFN_GS_FUNC)gallivm_jit_function(gallivm, wrap(pFunction)); + + debug_printf("geom shader %p\n", pFunc); + assert(pFunc && "Error: GeomShader = NULL"); + + JM()->mIsModuleFinalized = true; + + return pFunc; +} + +PFN_GS_FUNC +swr_compile_gs(struct swr_context *ctx, swr_jit_gs_key &key) +{ + BuilderSWR builder( + reinterpret_cast(swr_screen(ctx->pipe.screen)->hJitMgr), + "GS"); + PFN_GS_FUNC func = builder.CompileGS(ctx, key); + + ctx->gs->map.insert(std::make_pair(key, make_unique(builder.gallivm, func))); + return func; +} + PFN_VERTEX_FUNC BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key) { @@ -396,6 +854,12 @@ BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key) { struct swr_fragment_shader *swr_fs = ctx->fs; + struct tgsi_shader_info *pPrevShader; + if (ctx->gs) + pPrevShader = &ctx->gs->info.base; + else + pPrevShader = &ctx->vs->info.base; + LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; @@ -530,11 +994,12 @@ BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key) } unsigned linkedAttrib = - locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base); + locate_linkage(semantic_name, semantic_idx, pPrevShader); + if (semantic_name == TGSI_SEMANTIC_GENERIC && key.sprite_coord_enable & (1 << semantic_idx)) { /* we add an extra attrib to the backendState in swr_update_derived. */ - linkedAttrib = ctx->vs->info.base.num_outputs - 1; + linkedAttrib = pPrevShader->num_outputs - 1; swr_fs->pointSpriteMask |= (1 << linkedAttrib); } else if (linkedAttrib == 0xFFFFFFFF) { inputs[attrib][0] = wrap(VIMMED1(0.0f)); @@ -558,7 +1023,7 @@ BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key) Value *offset = NULL; if (semantic_name == TGSI_SEMANTIC_COLOR && key.light_twoside) { bcolorAttrib = locate_linkage( - TGSI_SEMANTIC_BCOLOR, semantic_idx, &ctx->vs->info.base); + TGSI_SEMANTIC_BCOLOR, semantic_idx, pPrevShader); /* Neither front nor back colors were available. Nothing to load. */ if (bcolorAttrib == 0xFFFFFFFF && linkedAttrib == 0xFFFFFFFF) continue; diff --git a/src/gallium/drivers/swr/swr_shader.h b/src/gallium/drivers/swr/swr_shader.h index 266573f7eaf..c9df5b00917 100644 --- a/src/gallium/drivers/swr/swr_shader.h +++ b/src/gallium/drivers/swr/swr_shader.h @@ -25,8 +25,10 @@ struct swr_vertex_shader; struct swr_fragment_shader; +struct swr_geometry_shader; struct swr_jit_fs_key; struct swr_jit_vs_key; +struct swr_jit_gs_key; PFN_VERTEX_FUNC swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key); @@ -34,6 +36,9 @@ swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key); PFN_PIXEL_KERNEL swr_compile_fs(struct swr_context *ctx, swr_jit_fs_key &key); +PFN_GS_FUNC +swr_compile_gs(struct swr_context *ctx, swr_jit_gs_key &key); + void swr_generate_fs_key(struct swr_jit_fs_key &key, struct swr_context *ctx, swr_fragment_shader *swr_fs); @@ -45,6 +50,10 @@ void swr_generate_vs_key(struct swr_jit_vs_key &key, void swr_generate_fetch_key(struct swr_jit_fetch_key &key, struct swr_vertex_element_state *velems); +void swr_generate_gs_key(struct swr_jit_gs_key &key, + struct swr_context *ctx, + swr_geometry_shader *swr_gs); + struct swr_jit_sampler_key { unsigned nr_samplers; unsigned nr_sampler_views; @@ -67,6 +76,11 @@ struct swr_jit_fetch_key { FETCH_COMPILE_STATE fsState; }; +struct swr_jit_gs_key : swr_jit_sampler_key { + ubyte vs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; + ubyte vs_output_semantic_idx[PIPE_MAX_SHADER_OUTPUTS]; +}; + namespace std { template <> struct hash { @@ -89,8 +103,16 @@ template <> struct hash { return util_hash_crc32(&k, sizeof(k)); } }; + +template <> struct hash { + std::size_t operator()(const swr_jit_gs_key &k) const + { + return util_hash_crc32(&k, sizeof(k)); + } +}; }; bool operator==(const swr_jit_fs_key &lhs, const swr_jit_fs_key &rhs); bool operator==(const swr_jit_vs_key &lhs, const swr_jit_vs_key &rhs); bool operator==(const swr_jit_fetch_key &lhs, const swr_jit_fetch_key &rhs); +bool operator==(const swr_jit_gs_key &lhs, const swr_jit_gs_key &rhs); diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp index e1f173476f6..b3896649848 100644 --- a/src/gallium/drivers/swr/swr_state.cpp +++ b/src/gallium/drivers/swr/swr_state.cpp @@ -416,6 +416,44 @@ swr_delete_fs_state(struct pipe_context *pipe, void *fs) swr_fence_work_delete_fs(screen->flush_fence, swr_fs); } +static void * +swr_create_gs_state(struct pipe_context *pipe, + const struct pipe_shader_state *gs) +{ + struct swr_geometry_shader *swr_gs = new swr_geometry_shader; + if (!swr_gs) + return NULL; + + swr_gs->pipe.tokens = tgsi_dup_tokens(gs->tokens); + + lp_build_tgsi_info(gs->tokens, &swr_gs->info); + + return swr_gs; +} + + +static void +swr_bind_gs_state(struct pipe_context *pipe, void *gs) +{ + struct swr_context *ctx = swr_context(pipe); + + if (ctx->gs == gs) + return; + + ctx->gs = (swr_geometry_shader *)gs; + ctx->dirty |= SWR_NEW_GS; +} + +static void +swr_delete_gs_state(struct pipe_context *pipe, void *gs) +{ + struct swr_geometry_shader *swr_gs = (swr_geometry_shader *)gs; + FREE((void *)swr_gs->pipe.tokens); + struct swr_screen *screen = swr_screen(pipe->screen); + + /* Defer deleton of fs state */ + swr_fence_work_delete_gs(screen->flush_fence, swr_gs); +} static void swr_set_constant_buffer(struct pipe_context *pipe, @@ -432,10 +470,12 @@ swr_set_constant_buffer(struct pipe_context *pipe, /* note: reference counting */ util_copy_constant_buffer(&ctx->constants[shader][index], cb); - if (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_GEOMETRY) { + if (shader == PIPE_SHADER_VERTEX) { ctx->dirty |= SWR_NEW_VSCONSTANTS; } else if (shader == PIPE_SHADER_FRAGMENT) { ctx->dirty |= SWR_NEW_FSCONSTANTS; + } else if (shader == PIPE_SHADER_GEOMETRY) { + ctx->dirty |= SWR_NEW_GSCONSTANTS; } if (cb && cb->user_buffer) { @@ -799,6 +839,11 @@ swr_update_constants(struct swr_context *ctx, enum pipe_shader_type shaderType) num_constants = pDC->num_constantsFS; scratch = &ctx->scratch->fs_constants; break; + case PIPE_SHADER_GEOMETRY: + constant = pDC->constantGS; + num_constants = pDC->num_constantsGS; + scratch = &ctx->scratch->gs_constants; + break; default: debug_printf("Unsupported shader type constants\n"); return; @@ -1234,6 +1279,47 @@ swr_update_derived(struct pipe_context *pipe, } } + /* GeometryShader */ + if (ctx->dirty & (SWR_NEW_GS | + SWR_NEW_VS | + SWR_NEW_SAMPLER | + SWR_NEW_SAMPLER_VIEW)) { + if (ctx->gs) { + swr_jit_gs_key key; + swr_generate_gs_key(key, ctx, ctx->gs); + auto search = ctx->gs->map.find(key); + PFN_GS_FUNC func; + if (search != ctx->gs->map.end()) { + func = search->second->shader; + } else { + func = swr_compile_gs(ctx, key); + } + SwrSetGsFunc(ctx->swrContext, func); + + /* JIT sampler state */ + if (ctx->dirty & SWR_NEW_SAMPLER) { + swr_update_sampler_state(ctx, + PIPE_SHADER_GEOMETRY, + key.nr_samplers, + ctx->swrDC.samplersGS); + } + + /* JIT sampler view state */ + if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) { + swr_update_texture_state(ctx, + PIPE_SHADER_GEOMETRY, + key.nr_sampler_views, + ctx->swrDC.texturesGS); + } + + SwrSetGsState(ctx->swrContext, &ctx->gs->gsState); + } else { + SWR_GS_STATE state = { 0 }; + SwrSetGsState(ctx->swrContext, &state); + SwrSetGsFunc(ctx->swrContext, NULL); + } + } + /* VertexShader */ if (ctx->dirty & (SWR_NEW_VS | SWR_NEW_RASTERIZER | // for clip planes @@ -1271,6 +1357,7 @@ swr_update_derived(struct pipe_context *pipe, /* FragmentShader */ if (ctx->dirty & (SWR_NEW_FS | SWR_NEW_VS | + SWR_NEW_GS | SWR_NEW_RASTERIZER | SWR_NEW_SAMPLER | SWR_NEW_SAMPLER_VIEW | @@ -1356,6 +1443,11 @@ swr_update_derived(struct pipe_context *pipe, swr_update_constants(ctx, PIPE_SHADER_FRAGMENT); } + /* GeometryShader Constants */ + if (ctx->dirty & SWR_NEW_GSCONSTANTS) { + swr_update_constants(ctx, PIPE_SHADER_GEOMETRY); + } + /* Depth/stencil state */ if (ctx->dirty & (SWR_NEW_DEPTH_STENCIL_ALPHA | SWR_NEW_FRAMEBUFFER)) { struct pipe_depth_state *depth = &(ctx->depth_stencil->depth); @@ -1557,7 +1649,7 @@ swr_update_derived(struct pipe_context *pipe, // set up backend state SWR_BACKEND_STATE backendState = {0}; backendState.numAttributes = - ctx->vs->info.base.num_outputs - 1 + + ((ctx->gs ? ctx->gs->info.base.num_outputs : ctx->vs->info.base.num_outputs) - 1) + (ctx->rasterizer->sprite_coord_enable ? 1 : 0); for (unsigned i = 0; i < backendState.numAttributes; i++) backendState.numComponents[i] = 4; @@ -1665,6 +1757,10 @@ swr_state_init(struct pipe_context *pipe) pipe->bind_fs_state = swr_bind_fs_state; pipe->delete_fs_state = swr_delete_fs_state; + pipe->create_gs_state = swr_create_gs_state; + pipe->bind_gs_state = swr_bind_gs_state; + pipe->delete_gs_state = swr_delete_gs_state; + pipe->set_constant_buffer = swr_set_constant_buffer; pipe->create_vertex_elements_state = swr_create_vertex_elements_state; diff --git a/src/gallium/drivers/swr/swr_state.h b/src/gallium/drivers/swr/swr_state.h index 202f16547f9..c89e303ba33 100644 --- a/src/gallium/drivers/swr/swr_state.h +++ b/src/gallium/drivers/swr/swr_state.h @@ -48,6 +48,7 @@ struct ShaderVariant { typedef ShaderVariant VariantVS; typedef ShaderVariant VariantFS; +typedef ShaderVariant VariantGS; /* skeleton */ struct swr_vertex_shader { @@ -67,6 +68,14 @@ struct swr_fragment_shader { std::unordered_map> map; }; +struct swr_geometry_shader { + struct pipe_shader_state pipe; + struct lp_tgsi_info info; + SWR_GS_STATE gsState; + + std::unordered_map> map; +}; + /* Vertex element state */ struct swr_vertex_element_state { FETCH_COMPILE_STATE fsState; @@ -321,4 +330,45 @@ swr_convert_target_type(const enum pipe_texture_target target) return SURFACE_NULL; } } + +/* + * Convert mesa PIPE_PRIM_X to SWR enum PRIMITIVE_TOPOLOGY + */ +static INLINE enum PRIMITIVE_TOPOLOGY +swr_convert_prim_topology(const unsigned mode) +{ + switch (mode) { + case PIPE_PRIM_POINTS: + return TOP_POINT_LIST; + case PIPE_PRIM_LINES: + return TOP_LINE_LIST; + case PIPE_PRIM_LINE_LOOP: + return TOP_LINE_LOOP; + case PIPE_PRIM_LINE_STRIP: + return TOP_LINE_STRIP; + case PIPE_PRIM_TRIANGLES: + return TOP_TRIANGLE_LIST; + case PIPE_PRIM_TRIANGLE_STRIP: + return TOP_TRIANGLE_STRIP; + case PIPE_PRIM_TRIANGLE_FAN: + return TOP_TRIANGLE_FAN; + case PIPE_PRIM_QUADS: + return TOP_QUAD_LIST; + case PIPE_PRIM_QUAD_STRIP: + return TOP_QUAD_STRIP; + case PIPE_PRIM_POLYGON: + return TOP_TRIANGLE_FAN; /* XXX TOP_POLYGON; */ + case PIPE_PRIM_LINES_ADJACENCY: + return TOP_LINE_LIST_ADJ; + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + return TOP_LISTSTRIP_ADJ; + case PIPE_PRIM_TRIANGLES_ADJACENCY: + return TOP_TRI_LIST_ADJ; + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + return TOP_TRI_STRIP_ADJ; + default: + assert(0 && "Unknown topology"); + return TOP_UNKNOWN; + } +}; #endif diff --git a/src/gallium/drivers/swr/swr_tex_sample.cpp b/src/gallium/drivers/swr/swr_tex_sample.cpp index 6eb5ea67733..37ad39b950d 100644 --- a/src/gallium/drivers/swr/swr_tex_sample.cpp +++ b/src/gallium/drivers/swr/swr_tex_sample.cpp @@ -123,6 +123,9 @@ swr_texture_member(const struct lp_sampler_dynamic_state *base, case PIPE_SHADER_VERTEX: indices[1] = lp_build_const_int32(gallivm, swr_draw_context_texturesVS); break; + case PIPE_SHADER_GEOMETRY: + indices[1] = lp_build_const_int32(gallivm, swr_draw_context_texturesGS); + break; default: assert(0 && "unsupported shader type"); break; @@ -217,6 +220,9 @@ swr_sampler_member(const struct lp_sampler_dynamic_state *base, case PIPE_SHADER_VERTEX: indices[1] = lp_build_const_int32(gallivm, swr_draw_context_samplersVS); break; + case PIPE_SHADER_GEOMETRY: + indices[1] = lp_build_const_int32(gallivm, swr_draw_context_samplersGS); + break; default: assert(0 && "unsupported shader type"); break; -- 2.11.0