OSDN Git Service

i965/fs: Use the LRP instruction for ir_triop_lrp when possible.
[android-x86/external-mesa.git] / src / mesa / drivers / dri / i965 / brw_shader.cpp
index 7539d54..2da5ed5 100644 (file)
@@ -75,6 +75,38 @@ brw_shader_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
    return true;
 }
 
+static void
+brw_lower_packing_builtins(struct brw_context *brw,
+                           gl_shader_type shader_type,
+                           exec_list *ir)
+{
+   int ops = LOWER_PACK_SNORM_2x16
+           | LOWER_UNPACK_SNORM_2x16
+           | LOWER_PACK_UNORM_2x16
+           | LOWER_UNPACK_UNORM_2x16
+           | LOWER_PACK_SNORM_4x8
+           | LOWER_UNPACK_SNORM_4x8
+           | LOWER_PACK_UNORM_4x8
+           | LOWER_UNPACK_UNORM_4x8;
+
+   if (brw->intel.gen >= 7) {
+      /* Gen7 introduced the f32to16 and f16to32 instructions, which can be
+       * used to execute packHalf2x16 and unpackHalf2x16. For AOS code, no
+       * lowering is needed. For SOA code, the Half2x16 ops must be
+       * scalarized.
+       */
+      if (shader_type == MESA_SHADER_FRAGMENT) {
+         ops |= LOWER_PACK_HALF_2x16_TO_SPLIT
+             |  LOWER_UNPACK_HALF_2x16_TO_SPLIT;
+      }
+   } else {
+      ops |= LOWER_PACK_HALF_2x16
+          |  LOWER_UNPACK_HALF_2x16;
+   }
+
+   lower_packing_builtins(ir, ops);
+}
+
 GLboolean
 brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
 {
@@ -113,13 +145,20 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
       shader->ir = new(shader) exec_list;
       clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
 
+      /* lower_packing_builtins() inserts arithmetic instructions, so it
+       * must precede lower_instructions().
+       */
+      brw_lower_packing_builtins(brw, (gl_shader_type) stage, shader->ir);
       do_mat_op_to_vec(shader->ir);
+      const int lrp_to_arith = (intel->gen < 6 || stage != MESA_SHADER_FRAGMENT)
+                                ? LRP_TO_ARITH : 0;
       lower_instructions(shader->ir,
                         MOD_TO_FRACT |
                         DIV_TO_MUL_RCP |
                         SUB_TO_ADD_NEG |
                         EXP_TO_EXP2 |
-                        LOG_TO_LOG2);
+                        LOG_TO_LOG2 |
+                         lrp_to_arith);
 
       /* Pre-gen6 HW can only nest if-statements 16 deep.  Beyond this,
        * if-statements need to be flattened.
@@ -253,10 +292,14 @@ brw_type_for_base_type(const struct glsl_type *type)
        * way to trip up if we don't.
        */
       return BRW_REGISTER_TYPE_UD;
-   default:
+   case GLSL_TYPE_VOID:
+   case GLSL_TYPE_ERROR:
+   case GLSL_TYPE_INTERFACE:
       assert(!"not reached");
-      return BRW_REGISTER_TYPE_F;
+      break;
    }
+
+   return BRW_REGISTER_TYPE_F;
 }
 
 uint32_t