OSDN Git Service

i965/fs: Use the LRP instruction for ir_triop_lrp when possible.
[android-x86/external-mesa.git] / src / mesa / drivers / dri / i965 / brw_fs.cpp
index c1ccd92..fe34701 100644 (file)
@@ -146,6 +146,13 @@ fs_inst::fs_inst(enum opcode opcode, fs_reg dst,
       return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1);    \
    }
 
+#define ALU3(op)                                                        \
+   fs_inst *                                                            \
+   fs_visitor::op(fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)    \
+   {                                                                    \
+      return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1, src2);\
+   }
+
 ALU1(NOT)
 ALU1(MOV)
 ALU1(FRC)
@@ -161,6 +168,7 @@ ALU2(XOR)
 ALU2(SHL)
 ALU2(SHR)
 ALU2(ASR)
+ALU3(LRP)
 
 /** Gen4 predicated IF. */
 fs_inst *
@@ -1710,8 +1718,6 @@ fs_visitor::setup_pull_constants()
                                  dst, index, offset);
         pull->ir = inst->ir;
         pull->annotation = inst->annotation;
-        pull->base_mrf = 14;
-        pull->mlen = 1;
 
         inst->insert_before(pull);
 
@@ -1931,6 +1937,7 @@ fs_visitor::register_coalesce()
 
       bool has_source_modifiers = (inst->src[0].abs ||
                                    inst->src[0].negate ||
+                                   inst->src[0].smear != -1 ||
                                    inst->src[0].file == UNIFORM);
 
       /* Found a move of a GRF to a GRF.  Let's see if we can coalesce
@@ -2062,11 +2069,6 @@ fs_visitor::compute_to_mrf()
             * into a compute-to-MRF.
             */
 
-            /* SENDs can only write to GRFs, so no compute-to-MRF. */
-           if (scan_inst->mlen) {
-              break;
-           }
-
            /* If it's predicated, it (probably) didn't populate all
             * the channels.  We might be able to rewrite everything
             * that writes that reg, but it would require smarter
@@ -2087,7 +2089,7 @@ fs_visitor::compute_to_mrf()
            if (scan_inst->mlen)
               break;
 
-           if (intel->gen >= 6) {
+           if (intel->gen == 6) {
               /* gen6 math instructions must have the destination be
                * GRF, so no compute-to-MRF for them.
                */
@@ -2447,6 +2449,66 @@ fs_visitor::insert_gen4_send_dependency_workarounds()
    }
 }
 
+/**
+ * Turns the generic expression-style uniform pull constant load instruction
+ * into a hardware-specific series of instructions for loading a pull
+ * constant.
+ *
+ * The expression style allows the CSE pass before this to optimize out
+ * repeated loads from the same offset, and gives the pre-register-allocation
+ * scheduling full flexibility, while the conversion to native instructions
+ * allows the post-register-allocation scheduler the best information
+ * possible.
+ */
+void
+fs_visitor::lower_uniform_pull_constant_loads()
+{
+   foreach_list(node, &this->instructions) {
+      fs_inst *inst = (fs_inst *)node;
+
+      if (inst->opcode != FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD)
+         continue;
+
+      if (intel->gen >= 7) {
+         fs_reg const_offset_reg = inst->src[1];
+         assert(const_offset_reg.file == IMM &&
+                const_offset_reg.type == BRW_REGISTER_TYPE_UD);
+         const_offset_reg.imm.u /= 16;
+         fs_reg payload = fs_reg(this, glsl_type::uint_type);
+         struct brw_reg g0 = retype(brw_vec8_grf(0, 0),
+                                    BRW_REGISTER_TYPE_UD);
+
+         fs_inst *setup1 = MOV(payload, fs_reg(g0));
+         setup1->force_writemask_all = true;
+         /* We don't need the second half of this vgrf to be filled with g1
+          * in the 16-wide case, but if we use force_uncompressed then live
+          * variable analysis won't consider this a def!
+          */
+
+         fs_inst *setup2 = new(mem_ctx) fs_inst(FS_OPCODE_SET_GLOBAL_OFFSET,
+                                                payload, payload,
+                                                const_offset_reg);
+
+         setup1->ir = inst->ir;
+         setup1->annotation = inst->annotation;
+         inst->insert_before(setup1);
+         setup2->ir = inst->ir;
+         setup2->annotation = inst->annotation;
+         inst->insert_before(setup2);
+         inst->opcode = FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7;
+         inst->src[1] = payload;
+      } else {
+         /* Before register allocation, we didn't tell the scheduler about the
+          * MRF we use.  We know it's safe to use this MRF because nothing
+          * else does except for register spill/unspill, which generates and
+          * uses its MRF within a single IR instruction.
+          */
+         inst->base_mrf = 14;
+         inst->mlen = 1;
+      }
+   }
+}
+
 void
 fs_visitor::dump_instruction(fs_inst *inst)
 {
@@ -2460,7 +2522,20 @@ fs_visitor::dump_instruction(fs_inst *inst)
        opcode_descs[inst->opcode].name) {
       printf("%s", opcode_descs[inst->opcode].name);
    } else {
-      printf("op%d", inst->opcode);
+      switch (inst->opcode) {
+      case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
+         printf("uniform_pull_const");
+         break;
+      case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
+         printf("uniform_pull_const_gen7");
+         break;
+      case FS_OPCODE_SET_GLOBAL_OFFSET:
+         printf("set_global_offset");
+         break;
+      default:
+         printf("op%d", inst->opcode);
+         break;
+      }
    }
    if (inst->saturate)
       printf(".sat");
@@ -2519,6 +2594,22 @@ fs_visitor::dump_instruction(fs_inst *inst)
       case BAD_FILE:
          printf("(null)");
          break;
+      case IMM:
+         switch (inst->src[i].type) {
+         case BRW_REGISTER_TYPE_F:
+            printf("%ff", inst->src[i].imm.f);
+            break;
+         case BRW_REGISTER_TYPE_D:
+            printf("%dd", inst->src[i].imm.i);
+            break;
+         case BRW_REGISTER_TYPE_UD:
+            printf("%uu", inst->src[i].imm.u);
+            break;
+         default:
+            printf("???");
+            break;
+         }
+         break;
       default:
          printf("???");
          break;
@@ -2719,6 +2810,8 @@ fs_visitor::run()
 
       schedule_instructions(false);
 
+      lower_uniform_pull_constant_loads();
+
       assign_curb_setup();
       assign_urb_setup();