OSDN Git Service

Merge remote branch 'origin/master' into lp-binning
[android-x86/external-mesa.git] / src / gallium / drivers / llvmpipe / lp_state_fs.c
index 22683ff..3a669ba 100644 (file)
@@ -84,8 +84,8 @@
 #include "lp_screen.h"
 #include "lp_context.h"
 #include "lp_buffer.h"
+#include "lp_setup.h"
 #include "lp_state.h"
-#include "lp_quad.h"
 #include "lp_tex_sample.h"
 #include "lp_debug.h"
 
@@ -187,7 +187,93 @@ generate_depth(LLVMBuilderRef builder,
 
 
 /**
+ * Generate the code to do inside/outside triangle testing for the
+ * four pixels in a 2x2 quad.  This will set the four elements of the
+ * quad mask vector to 0 or ~0.
+ * \param i  which quad of the quad group to test, in [0,3]
+ */
+static void
+generate_tri_edge_mask(LLVMBuilderRef builder,
+                       unsigned i,
+                       LLVMValueRef *mask,      /* ivec4, out */
+                       LLVMValueRef c0,         /* int32 */
+                       LLVMValueRef c1,         /* int32 */
+                       LLVMValueRef c2,         /* int32 */
+                       LLVMValueRef step0_ptr,  /* ivec4 */
+                       LLVMValueRef step1_ptr,  /* ivec4 */
+                       LLVMValueRef step2_ptr)  /* ivec4 */
+{
+   /*
+     c0_vec = splat(c0)
+     c1_vec = splat(c1)
+     c2_vec = splat(c2)
+     m0_vec = step0_ptr[i] > c0_vec
+     m1_vec = step1_ptr[i] > c1_vec
+     m2_vec = step2_ptr[i] > c2_vec
+     mask = m0_vec & m1_vec & m2_vec
+    */
+   struct lp_type i32_type;
+   LLVMTypeRef i32vec4_type;
+
+   LLVMValueRef index;
+   LLVMValueRef c0_vec, c1_vec, c2_vec;
+   LLVMValueRef step0_vec, step1_vec, step2_vec;
+   LLVMValueRef m0_vec, m1_vec, m2_vec;
+   LLVMValueRef m;
+
+   assert(i < 4);
+   
+   /* int32 vector type */
+   memset(&i32_type, 0, sizeof i32_type);
+   i32_type.floating = FALSE; /* values are integers */
+   i32_type.sign = TRUE;      /* values are signed */
+   i32_type.norm = FALSE;     /* values are not normalized */
+   i32_type.width = 32;       /* 32-bit int values */
+   i32_type.length = 4;       /* 4 elements per vector */
+
+   i32vec4_type = lp_build_int32_vec4_type();
+
+   /* c0_vec = {c0, c0, c0, c0}
+    * Note that we emit this code four times but LLVM optimizes away
+    * three instances of it.
+    */
+   c0_vec = lp_build_broadcast(builder, i32vec4_type, c0);
+   c1_vec = lp_build_broadcast(builder, i32vec4_type, c1);
+   c2_vec = lp_build_broadcast(builder, i32vec4_type, c2);
+
+   lp_build_name(c0_vec, "edgeconst0vec");
+   lp_build_name(c1_vec, "edgeconst1vec");
+   lp_build_name(c2_vec, "edgeconst2vec");
+
+   index = LLVMConstInt(LLVMInt32Type(), i, 0);
+   step0_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step0_ptr, &index, 1, ""), "");
+   step1_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step1_ptr, &index, 1, ""), "");
+   step2_vec = LLVMBuildLoad(builder, LLVMBuildGEP(builder, step2_ptr, &index, 1, ""), "");
+
+   lp_build_name(step0_vec, "step0vec");
+   lp_build_name(step1_vec, "step1vec");
+   lp_build_name(step2_vec, "step2vec");
+
+   m0_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step0_vec, c0_vec);
+   m1_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step1_vec, c1_vec);
+   m2_vec = lp_build_compare(builder, i32_type, PIPE_FUNC_GREATER, step2_vec, c2_vec);
+
+   m = LLVMBuildAnd(builder, m0_vec, m1_vec, "");
+   m = LLVMBuildAnd(builder, m, m2_vec, "");
+
+   lp_build_name(m, "inoutmaskvec");
+
+   *mask = m;
+
+   /*
+    * if mask = {0,0,0,0} skip quad
+    */
+}
+
+
+/**
  * Generate the fragment shader, depth/stencil test, and alpha tests.
+ * \param i  which quad in the tile, in range [0,3]
  */
 static void
 generate_fs(struct llvmpipe_context *lp,
@@ -201,7 +287,13 @@ generate_fs(struct llvmpipe_context *lp,
             struct lp_build_sampler_soa *sampler,
             LLVMValueRef *pmask,
             LLVMValueRef *color,
-            LLVMValueRef depth_ptr)
+            LLVMValueRef depth_ptr,
+            LLVMValueRef c0,
+            LLVMValueRef c1,
+            LLVMValueRef c2,
+            LLVMValueRef step0_ptr,
+            LLVMValueRef step1_ptr,
+            LLVMValueRef step2_ptr)
 {
    const struct tgsi_token *tokens = shader->base.tokens;
    LLVMTypeRef elem_type;
@@ -216,6 +308,8 @@ generate_fs(struct llvmpipe_context *lp,
    unsigned attrib;
    unsigned chan;
 
+   assert(i < 4);
+
    elem_type = lp_build_elem_type(type);
    vec_type = lp_build_vec_type(type);
    int_vec_type = lp_build_int_vec_type(type);
@@ -235,8 +329,14 @@ generate_fs(struct llvmpipe_context *lp,
    }
    lp_build_flow_scope_declare(flow, &z);
 
+   /* do triangle edge testing */
+   generate_tri_edge_mask(builder, i, pmask,
+                          c0, c1, c2, step0_ptr, step1_ptr, step2_ptr);
+
+   /* 'mask' will control execution based on quad's pixel alive/killed state */
    lp_build_mask_begin(&mask, flow, type, *pmask);
 
+
    early_depth_test =
       key->depth.enabled &&
       !key->alpha.enabled &&
@@ -369,6 +469,9 @@ generate_blend(const struct pipe_blend_state *blend,
 
 /**
  * Generate the runtime callable function for the whole fragment pipeline.
+ * Note that the function which we generate operates on a block of 16
+ * pixels at at time.  The block contains 2x2 quads.  Each quad contains
+ * 2x2 pixels.
  */
 static struct lp_fragment_shader_variant *
 generate_fragment(struct llvmpipe_context *lp,
@@ -384,17 +487,18 @@ generate_fragment(struct llvmpipe_context *lp,
    LLVMTypeRef fs_int_vec_type;
    LLVMTypeRef blend_vec_type;
    LLVMTypeRef blend_int_vec_type;
-   LLVMTypeRef arg_types[9];
+   LLVMTypeRef arg_types[14];
    LLVMTypeRef func_type;
+   LLVMTypeRef int32_vec4_type = lp_build_int32_vec4_type();
    LLVMValueRef context_ptr;
    LLVMValueRef x;
    LLVMValueRef y;
    LLVMValueRef a0_ptr;
    LLVMValueRef dadx_ptr;
    LLVMValueRef dady_ptr;
-   LLVMValueRef mask_ptr;
    LLVMValueRef color_ptr;
    LLVMValueRef depth_ptr;
+   LLVMValueRef c0, c1, c2, step0_ptr, step1_ptr, step2_ptr;
    LLVMBasicBlockRef block;
    LLVMBuilderRef builder;
    LLVMValueRef x0;
@@ -453,8 +557,8 @@ generate_fragment(struct llvmpipe_context *lp,
                          debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE));
             debug_printf("  .mag_img_filter = %s\n",
                          debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE));
-            if(key->sampler[i].compare_mode)
-               debug_printf("  .compare_mode = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE));
+            if(key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE)
+               debug_printf("  .compare_func = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE));
             debug_printf("  .normalized_coords = %u\n", key->sampler[i].normalized_coords);
             debug_printf("  .prefilter = %u\n", key->sampler[i].prefilter);
          }
@@ -476,8 +580,8 @@ generate_fragment(struct llvmpipe_context *lp,
    fs_type.sign = TRUE;     /* values are signed */
    fs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
    fs_type.width = 32;      /* 32-bit float */
-   fs_type.length = 4;      /* 4 element per vector */
-   num_fs = 4;
+   fs_type.length = 4;      /* 4 elements per vector */
+   num_fs = 4;              /* number of quads per block */
 
    memset(&blend_type, 0, sizeof blend_type);
    blend_type.floating = FALSE; /* values are integers */
@@ -504,9 +608,17 @@ generate_fragment(struct llvmpipe_context *lp,
    arg_types[3] = LLVMPointerType(fs_elem_type, 0);    /* a0 */
    arg_types[4] = LLVMPointerType(fs_elem_type, 0);    /* dadx */
    arg_types[5] = LLVMPointerType(fs_elem_type, 0);    /* dady */
-   arg_types[6] = LLVMPointerType(fs_int_vec_type, 0); /* mask */
-   arg_types[7] = LLVMPointerType(blend_vec_type, 0);  /* color */
-   arg_types[8] = LLVMPointerType(fs_int_vec_type, 0); /* depth */
+   arg_types[6] = LLVMPointerType(blend_vec_type, 0);  /* color */
+   arg_types[7] = LLVMPointerType(fs_int_vec_type, 0); /* depth */
+   arg_types[8] = LLVMInt32Type();                     /* c0 */
+   arg_types[9] = LLVMInt32Type();                    /* c1 */
+   arg_types[10] = LLVMInt32Type();                    /* c2 */
+   /* Note: the step arrays are built as int32[16] but we interpret
+    * them here as int32_vec4[4].
+    */
+   arg_types[11] = LLVMPointerType(int32_vec4_type, 0);/* step0 */
+   arg_types[12] = LLVMPointerType(int32_vec4_type, 0);/* step1 */
+   arg_types[13] = LLVMPointerType(int32_vec4_type, 0);/* step2 */
 
    func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
 
@@ -522,9 +634,14 @@ generate_fragment(struct llvmpipe_context *lp,
    a0_ptr       = LLVMGetParam(variant->function, 3);
    dadx_ptr     = LLVMGetParam(variant->function, 4);
    dady_ptr     = LLVMGetParam(variant->function, 5);
-   mask_ptr     = LLVMGetParam(variant->function, 6);
-   color_ptr    = LLVMGetParam(variant->function, 7);
-   depth_ptr    = LLVMGetParam(variant->function, 8);
+   color_ptr    = LLVMGetParam(variant->function, 6);
+   depth_ptr    = LLVMGetParam(variant->function, 7);
+   c0           = LLVMGetParam(variant->function, 8);
+   c1           = LLVMGetParam(variant->function, 9);
+   c2           = LLVMGetParam(variant->function, 10);
+   step0_ptr    = LLVMGetParam(variant->function, 11);
+   step1_ptr    = LLVMGetParam(variant->function, 12);
+   step2_ptr    = LLVMGetParam(variant->function, 13);
 
    lp_build_name(context_ptr, "context");
    lp_build_name(x, "x");
@@ -532,9 +649,14 @@ generate_fragment(struct llvmpipe_context *lp,
    lp_build_name(a0_ptr, "a0");
    lp_build_name(dadx_ptr, "dadx");
    lp_build_name(dady_ptr, "dady");
-   lp_build_name(mask_ptr, "mask");
    lp_build_name(color_ptr, "color");
    lp_build_name(depth_ptr, "depth");
+   lp_build_name(c0, "c0");
+   lp_build_name(c1, "c1");
+   lp_build_name(c2, "c2");
+   lp_build_name(step0_ptr, "step0");
+   lp_build_name(step1_ptr, "step1");
+   lp_build_name(step2_ptr, "step2");
 
    /*
     * Function body
@@ -548,25 +670,20 @@ generate_fragment(struct llvmpipe_context *lp,
 
    lp_build_interp_soa_init(&interp, shader->base.tokens, builder, fs_type,
                             a0_ptr, dadx_ptr, dady_ptr,
-                            x0, y0, 2, 0);
+                            x0, y0);
 
-#if 0
-   /* C texture sampling */
-   sampler = lp_c_sampler_soa_create(context_ptr);
-#else
    /* code generated texture sampling */
    sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr);
-#endif
 
+   /* loop over quads in the block */
    for(i = 0; i < num_fs; ++i) {
       LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
       LLVMValueRef out_color[NUM_CHANNELS];
       LLVMValueRef depth_ptr_i;
 
       if(i != 0)
-         lp_build_interp_soa_update(&interp);
+         lp_build_interp_soa_update(&interp, i);
 
-      fs_mask[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, mask_ptr, &index, 1, ""), "");
       depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, "");
 
       generate_fs(lp, shader, key,
@@ -576,9 +693,11 @@ generate_fragment(struct llvmpipe_context *lp,
                   i,
                   &interp,
                   sampler,
-                  &fs_mask[i],
+                  &fs_mask[i], /* output */
                   out_color,
-                  depth_ptr_i);
+                  depth_ptr_i,
+                  c0, c1, c2,
+                  step0_ptr, step1_ptr, step2_ptr);
 
       for(chan = 0; chan < NUM_CHANNELS; ++chan)
          fs_out_color[chan][i] = out_color[chan];
@@ -673,7 +792,12 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
 {
    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
 
-   llvmpipe->fs = (struct lp_fragment_shader *) fs;
+   if (llvmpipe->fs == fs)
+      return;
+
+   draw_flush(llvmpipe->draw);
+
+   llvmpipe->fs = fs;
 
    llvmpipe->dirty |= LP_NEW_FS;
 }
@@ -688,6 +812,7 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
    struct lp_fragment_shader_variant *variant;
 
    assert(fs != llvmpipe->fs);
+   (void) llvmpipe;
 
    variant = shader->variants;
    while(variant) {
@@ -723,18 +848,17 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
    assert(shader < PIPE_SHADER_TYPES);
    assert(index == 0);
 
-   if(shader == PIPE_SHADER_VERTEX)
-      draw_flush(llvmpipe->draw);
+   if(llvmpipe->constants[shader].buffer == buffer)
+      return;
+
+   draw_flush(llvmpipe->draw);
 
    /* note: reference counting */
    pipe_buffer_reference(&llvmpipe->constants[shader].buffer, buffer);
 
-   if(shader == PIPE_SHADER_FRAGMENT) {
-      llvmpipe->jit_context.constants = data;
-   }
-
    if(shader == PIPE_SHADER_VERTEX) {
-      draw_set_mapped_constant_buffer(llvmpipe->draw, data, size);
+      draw_set_mapped_constant_buffer(llvmpipe->draw, PIPE_SHADER_VERTEX,
+                                      data, size);
    }
 
    llvmpipe->dirty |= LP_NEW_CONSTANTS;
@@ -813,4 +937,7 @@ llvmpipe_update_fs(struct llvmpipe_context *lp)
       variant = generate_fragment(lp, shader, &key);
 
    shader->current = variant;
+
+   lp_setup_set_fs_function(lp->setup, 
+                            shader->current->jit_function);
 }