OSDN Git Service

i965/fs: Implement integer quotient and remainder math operations.
authorKenneth Graunke <kenneth@whitecape.org>
Thu, 29 Sep 2011 00:37:54 +0000 (17:37 -0700)
committerKenneth Graunke <kenneth@whitecape.org>
Mon, 3 Oct 2011 00:01:09 +0000 (17:01 -0700)
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Tested-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
src/mesa/drivers/dri/i965/brw_defines.h
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs.h
src/mesa/drivers/dri/i965/brw_fs_emit.cpp
src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
src/mesa/drivers/dri/i965/brw_shader.cpp

index 05a1337..a111630 100644 (file)
@@ -622,6 +622,8 @@ enum opcode {
    SHADER_OPCODE_EXP2,
    SHADER_OPCODE_LOG2,
    SHADER_OPCODE_POW,
+   SHADER_OPCODE_INT_QUOTIENT,
+   SHADER_OPCODE_INT_REMAINDER,
    SHADER_OPCODE_SIN,
    SHADER_OPCODE_COS,
    FS_OPCODE_DDX,
index 9a89f88..1d93a51 100644 (file)
@@ -152,6 +152,8 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
    case SHADER_OPCODE_COS:
       return 1 * c->dispatch_width / 8;
    case SHADER_OPCODE_POW:
+   case SHADER_OPCODE_INT_QUOTIENT:
+   case SHADER_OPCODE_INT_REMAINDER:
       return 2 * c->dispatch_width / 8;
    case FS_OPCODE_TEX:
    case FS_OPCODE_TXB:
@@ -576,7 +578,15 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
    int base_mrf = 2;
    fs_inst *inst;
 
-   assert(opcode == SHADER_OPCODE_POW);
+   switch (opcode) {
+   case SHADER_OPCODE_POW:
+   case SHADER_OPCODE_INT_QUOTIENT:
+   case SHADER_OPCODE_INT_REMAINDER:
+      break;
+   default:
+      assert(!"not reached: unsupported binary math opcode.");
+      return NULL;
+   }
 
    if (intel->gen >= 6) {
       /* Can't do hstride == 0 args to gen6 math, so expand it out.
@@ -586,19 +596,21 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1)
        */
       if (src0.file == UNIFORM || src0.abs || src0.negate) {
         fs_reg expanded = fs_reg(this, glsl_type::float_type);
+        expanded.type = src0.type;
         emit(BRW_OPCODE_MOV, expanded, src0);
         src0 = expanded;
       }
 
       if (src1.file == UNIFORM || src1.abs || src1.negate) {
         fs_reg expanded = fs_reg(this, glsl_type::float_type);
+        expanded.type = src1.type;
         emit(BRW_OPCODE_MOV, expanded, src1);
         src1 = expanded;
       }
 
       inst = emit(opcode, dst, src0, src1);
    } else {
-      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1), src1);
+      emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1, src1.type), src1);
       inst = emit(opcode, dst, src0, reg_null_f);
 
       inst->base_mrf = base_mrf;
index f6a57ba..56181a3 100644 (file)
@@ -305,6 +305,8 @@ public:
              opcode == SHADER_OPCODE_LOG2 ||
              opcode == SHADER_OPCODE_SIN ||
              opcode == SHADER_OPCODE_COS ||
+             opcode == SHADER_OPCODE_INT_QUOTIENT ||
+             opcode == SHADER_OPCODE_INT_REMAINDER ||
              opcode == SHADER_OPCODE_POW);
    }
 
index 8176a76..4c158fe 100644 (file)
@@ -794,6 +794,8 @@ fs_visitor::generate_code()
            generate_math_gen4(inst, dst, src[0]);
         }
         break;
+      case SHADER_OPCODE_INT_QUOTIENT:
+      case SHADER_OPCODE_INT_REMAINDER:
       case SHADER_OPCODE_POW:
         if (intel->gen >= 6) {
            generate_math2_gen6(inst, dst, src[0], src[1]);
index 1f83ee2..910f329 100644 (file)
@@ -75,11 +75,13 @@ public:
       case SHADER_OPCODE_RSQ:
         this->latency = 2 * chans * math_latency;
         break;
+      case SHADER_OPCODE_INT_QUOTIENT:
       case SHADER_OPCODE_SQRT:
       case SHADER_OPCODE_LOG2:
         /* full precision log.  partial is 2. */
         this->latency = 3 * chans * math_latency;
         break;
+      case SHADER_OPCODE_INT_REMAINDER:
       case SHADER_OPCODE_EXP2:
         /* full precision.  partial is 3, same throughput. */
         this->latency = 4 * chans * math_latency;
index 3af5780..07ea84f 100644 (file)
@@ -294,10 +294,14 @@ fs_visitor::visit(ir_expression *ir)
       }
       break;
    case ir_binop_div:
-      assert(!"not reached: should be handled by ir_div_to_mul_rcp");
+      /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
+      assert(ir->type->is_integer());
+      emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]);
       break;
    case ir_binop_mod:
-      assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
+      /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
+      assert(ir->type->is_integer());
+      emit_math(SHADER_OPCODE_INT_REMAINDER, this->result, op[0], op[1]);
       break;
 
    case ir_binop_less:
index a6ed810..c938c75 100644 (file)
@@ -227,6 +227,10 @@ brw_math_function(enum opcode op)
       return BRW_MATH_FUNCTION_SIN;
    case SHADER_OPCODE_COS:
       return BRW_MATH_FUNCTION_COS;
+   case SHADER_OPCODE_INT_QUOTIENT:
+      return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT;
+   case SHADER_OPCODE_INT_REMAINDER:
+      return BRW_MATH_FUNCTION_INT_DIV_REMAINDER;
    default:
       assert(!"not reached: unknown math function");
       return 0;