OSDN Git Service

i965: Emit better code for ir_unop_sign.
authorMatt Turner <mattst88@gmail.com>
Mon, 25 Nov 2013 06:44:32 +0000 (22:44 -0800)
committerMatt Turner <mattst88@gmail.com>
Thu, 5 Dec 2013 04:05:44 +0000 (20:05 -0800)
total instructions in shared programs: 1550449 -> 1550048 (-0.03%)
instructions in affected programs:     15207 -> 14806 (-2.64%)

Reviewed-by: Paul Berry <stereotype441@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

index 97776c8..f81bc8e 100644 (file)
@@ -382,18 +382,34 @@ fs_visitor::visit(ir_expression *ir)
       emit(MOV(this->result, op[0]));
       break;
    case ir_unop_sign:
-      temp = fs_reg(this, ir->type);
+      if (ir->type->is_float()) {
+         /* AND(val, 0x80000000) gives the sign bit.
+          *
+          * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
+          * zero.
+          */
+         emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
 
-      emit(MOV(this->result, fs_reg(0.0f)));
+         op[0].type = BRW_REGISTER_TYPE_UD;
+         this->result.type = BRW_REGISTER_TYPE_UD;
+         emit(AND(this->result, op[0], fs_reg(0x80000000u)));
 
-      emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_G));
-      inst = emit(MOV(this->result, fs_reg(1.0f)));
-      inst->predicate = BRW_PREDICATE_NORMAL;
+         inst = emit(OR(this->result, this->result, fs_reg(0x3f800000u)));
+         inst->predicate = BRW_PREDICATE_NORMAL;
 
-      emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_L));
-      inst = emit(MOV(this->result, fs_reg(-1.0f)));
-      inst->predicate = BRW_PREDICATE_NORMAL;
+         this->result.type = BRW_REGISTER_TYPE_F;
+      } else {
+         /*  ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
+          *               -> non-negative val generates 0x00000000.
+          *  Predicated OR sets 1 if val is positive.
+          */
+         emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_G));
+
+         emit(ASR(this->result, op[0], fs_reg(31)));
 
+         inst = emit(OR(this->result, this->result, fs_reg(1)));
+         inst->predicate = BRW_PREDICATE_NORMAL;
+      }
       break;
    case ir_unop_rcp:
       emit_math(SHADER_OPCODE_RCP, this->result, op[0]);
index aece780..6c5b5c6 100644 (file)
@@ -1259,16 +1259,34 @@ vec4_visitor::visit(ir_expression *ir)
       break;
 
    case ir_unop_sign:
-      emit(MOV(result_dst, src_reg(0.0f)));
+      if (ir->type->is_float()) {
+         /* AND(val, 0x80000000) gives the sign bit.
+          *
+          * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
+          * zero.
+          */
+         emit(CMP(dst_null_f(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
 
-      emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_G));
-      inst = emit(MOV(result_dst, src_reg(1.0f)));
-      inst->predicate = BRW_PREDICATE_NORMAL;
+         op[0].type = BRW_REGISTER_TYPE_UD;
+         result_dst.type = BRW_REGISTER_TYPE_UD;
+         emit(AND(result_dst, op[0], src_reg(0x80000000u)));
 
-      emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_L));
-      inst = emit(MOV(result_dst, src_reg(-1.0f)));
-      inst->predicate = BRW_PREDICATE_NORMAL;
+         inst = emit(OR(result_dst, src_reg(result_dst), src_reg(0x3f800000u)));
+         inst->predicate = BRW_PREDICATE_NORMAL;
+
+         this->result.type = BRW_REGISTER_TYPE_F;
+      } else {
+         /*  ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
+          *               -> non-negative val generates 0x00000000.
+          *  Predicated OR sets 1 if val is positive.
+          */
+         emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_G));
+
+         emit(ASR(result_dst, op[0], src_reg(31)));
 
+         inst = emit(OR(result_dst, src_reg(result_dst), src_reg(1)));
+         inst->predicate = BRW_PREDICATE_NORMAL;
+      }
       break;
 
    case ir_unop_rcp: