From 74e927bcafad0a994be5f88fbda4058bef08bc51 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 18 Aug 2011 11:55:42 -0700 Subject: [PATCH] i965/fs: Split generate_math into gen4/gen6 and 1/2 operand variants. This mirrors the structure Eric used in the new VS backend, and seems simpler. In particular, the math1/math2 split will avoid having to figure out how many operands there are, as this is already known by the caller. Signed-off-by: Kenneth Graunke Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i965/brw_fs.h | 11 ++- src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 132 ++++++++++++++++++------------ 2 files changed, 89 insertions(+), 54 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 0bd518f7702..f6a57bacb33 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -487,7 +487,16 @@ public: void generate_linterp(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src); - void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); + void generate_math1_gen6(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src); + void generate_math2_gen6(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1); + void generate_math_gen4(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src); void generate_discard(fs_inst *inst); void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src); void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src); diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index f742e84e1c6..8176a76a85e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -143,69 +143,85 @@ fs_visitor::generate_linterp(fs_inst *inst, } void -fs_visitor::generate_math(fs_inst *inst, - struct brw_reg dst, struct brw_reg *src) +fs_visitor::generate_math1_gen6(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src0) { int op = brw_math_function(inst->opcode); - if (intel->gen >= 6) { - assert(inst->mlen == 0); - - if (inst->opcode == SHADER_OPCODE_POW) { - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_math2(p, dst, op, src[0], src[1]); + assert(inst->mlen == 0); - if (c->dispatch_width == 16) { - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_math2(p, sechalf(dst), op, sechalf(src[0]), sechalf(src[1])); - brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); - } - } else { - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_math(p, dst, - op, - inst->saturate ? BRW_MATH_SATURATE_SATURATE : - BRW_MATH_SATURATE_NONE, - 0, src[0], - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); - - if (c->dispatch_width == 16) { - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_math(p, sechalf(dst), - op, - inst->saturate ? BRW_MATH_SATURATE_SATURATE : - BRW_MATH_SATURATE_NONE, - 0, sechalf(src[0]), - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); - brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); - } - } - } else /* gen <= 5 */{ - assert(inst->mlen >= 1); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_math(p, dst, + op, + inst->saturate ? BRW_MATH_SATURATE_SATURATE : + BRW_MATH_SATURATE_NONE, + 0, src0, + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_math(p, dst, + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_math(p, sechalf(dst), op, inst->saturate ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, - inst->base_mrf, src[0], + 0, sechalf(src0), BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + } +} - if (c->dispatch_width == 16) { - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_math(p, sechalf(dst), - op, - inst->saturate ? BRW_MATH_SATURATE_SATURATE : - BRW_MATH_SATURATE_NONE, - inst->base_mrf + 1, sechalf(src[0]), - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); +void +fs_visitor::generate_math2_gen6(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src0, + struct brw_reg src1) +{ + int op = brw_math_function(inst->opcode); - brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); - } + assert(inst->mlen == 0); + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_math2(p, dst, op, src0, src1); + + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_math2(p, sechalf(dst), op, sechalf(src0), sechalf(src1)); + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + } +} + +void +fs_visitor::generate_math_gen4(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src) +{ + int op = brw_math_function(inst->opcode); + + assert(inst->mlen >= 1); + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_math(p, dst, + op, + inst->saturate ? BRW_MATH_SATURATE_SATURATE : + BRW_MATH_SATURATE_NONE, + inst->base_mrf, src, + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_math(p, sechalf(dst), + op, + inst->saturate ? BRW_MATH_SATURATE_SATURATE : + BRW_MATH_SATURATE_NONE, + inst->base_mrf + 1, sechalf(src), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); } } @@ -770,10 +786,20 @@ fs_visitor::generate_code() case SHADER_OPCODE_SQRT: case SHADER_OPCODE_EXP2: case SHADER_OPCODE_LOG2: - case SHADER_OPCODE_POW: case SHADER_OPCODE_SIN: case SHADER_OPCODE_COS: - generate_math(inst, dst, src); + if (intel->gen >= 6) { + generate_math1_gen6(inst, dst, src[0]); + } else { + generate_math_gen4(inst, dst, src[0]); + } + break; + case SHADER_OPCODE_POW: + if (intel->gen >= 6) { + generate_math2_gen6(inst, dst, src[0], src[1]); + } else { + generate_math_gen4(inst, dst, src[0]); + } break; case FS_OPCODE_PIXEL_X: generate_pixel_xy(dst, true); -- 2.11.0