return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1); \
}
+#define ALU3(op) \
+ fs_inst * \
+ fs_visitor::op(fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) \
+ { \
+ return new(mem_ctx) fs_inst(BRW_OPCODE_##op, dst, src0, src1, src2);\
+ }
+
ALU1(NOT)
ALU1(MOV)
ALU1(FRC)
ALU2(SHL)
ALU2(SHR)
ALU2(ASR)
+ALU3(LRP)
/** Gen4 predicated IF. */
fs_inst *
dst, index, offset);
pull->ir = inst->ir;
pull->annotation = inst->annotation;
- pull->base_mrf = 14;
- pull->mlen = 1;
inst->insert_before(pull);
bool has_source_modifiers = (inst->src[0].abs ||
inst->src[0].negate ||
+ inst->src[0].smear != -1 ||
inst->src[0].file == UNIFORM);
/* Found a move of a GRF to a GRF. Let's see if we can coalesce
* into a compute-to-MRF.
*/
- /* SENDs can only write to GRFs, so no compute-to-MRF. */
- if (scan_inst->mlen) {
- break;
- }
-
/* If it's predicated, it (probably) didn't populate all
* the channels. We might be able to rewrite everything
* that writes that reg, but it would require smarter
if (scan_inst->mlen)
break;
- if (intel->gen >= 6) {
+ if (intel->gen == 6) {
/* gen6 math instructions must have the destination be
* GRF, so no compute-to-MRF for them.
*/
}
}
+/**
+ * Turns the generic expression-style uniform pull constant load instruction
+ * into a hardware-specific series of instructions for loading a pull
+ * constant.
+ *
+ * The expression style allows the CSE pass before this to optimize out
+ * repeated loads from the same offset, and gives the pre-register-allocation
+ * scheduling full flexibility, while the conversion to native instructions
+ * allows the post-register-allocation scheduler the best information
+ * possible.
+ */
+void
+fs_visitor::lower_uniform_pull_constant_loads()
+{
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
+
+ if (inst->opcode != FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD)
+ continue;
+
+ if (intel->gen >= 7) {
+ fs_reg const_offset_reg = inst->src[1];
+ assert(const_offset_reg.file == IMM &&
+ const_offset_reg.type == BRW_REGISTER_TYPE_UD);
+ const_offset_reg.imm.u /= 16;
+ fs_reg payload = fs_reg(this, glsl_type::uint_type);
+ struct brw_reg g0 = retype(brw_vec8_grf(0, 0),
+ BRW_REGISTER_TYPE_UD);
+
+ fs_inst *setup1 = MOV(payload, fs_reg(g0));
+ setup1->force_writemask_all = true;
+ /* We don't need the second half of this vgrf to be filled with g1
+ * in the 16-wide case, but if we use force_uncompressed then live
+ * variable analysis won't consider this a def!
+ */
+
+ fs_inst *setup2 = new(mem_ctx) fs_inst(FS_OPCODE_SET_GLOBAL_OFFSET,
+ payload, payload,
+ const_offset_reg);
+
+ setup1->ir = inst->ir;
+ setup1->annotation = inst->annotation;
+ inst->insert_before(setup1);
+ setup2->ir = inst->ir;
+ setup2->annotation = inst->annotation;
+ inst->insert_before(setup2);
+ inst->opcode = FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7;
+ inst->src[1] = payload;
+ } else {
+ /* Before register allocation, we didn't tell the scheduler about the
+ * MRF we use. We know it's safe to use this MRF because nothing
+ * else does except for register spill/unspill, which generates and
+ * uses its MRF within a single IR instruction.
+ */
+ inst->base_mrf = 14;
+ inst->mlen = 1;
+ }
+ }
+}
+
void
fs_visitor::dump_instruction(fs_inst *inst)
{
opcode_descs[inst->opcode].name) {
printf("%s", opcode_descs[inst->opcode].name);
} else {
- printf("op%d", inst->opcode);
+ switch (inst->opcode) {
+ case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
+ printf("uniform_pull_const");
+ break;
+ case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
+ printf("uniform_pull_const_gen7");
+ break;
+ case FS_OPCODE_SET_GLOBAL_OFFSET:
+ printf("set_global_offset");
+ break;
+ default:
+ printf("op%d", inst->opcode);
+ break;
+ }
}
if (inst->saturate)
printf(".sat");
case BAD_FILE:
printf("(null)");
break;
+ case IMM:
+ switch (inst->src[i].type) {
+ case BRW_REGISTER_TYPE_F:
+ printf("%ff", inst->src[i].imm.f);
+ break;
+ case BRW_REGISTER_TYPE_D:
+ printf("%dd", inst->src[i].imm.i);
+ break;
+ case BRW_REGISTER_TYPE_UD:
+ printf("%uu", inst->src[i].imm.u);
+ break;
+ default:
+ printf("???");
+ break;
+ }
+ break;
default:
printf("???");
break;
schedule_instructions(false);
+ lower_uniform_pull_constant_loads();
+
assign_curb_setup();
assign_urb_setup();