nir: add amul instruction

author Rob Clark <robdclark@chromium.org>

Thu, 26 Sep 2019 17:32:00 +0000 (10:32 -0700)

committer Rob Clark <robdclark@chromium.org>

Fri, 18 Oct 2019 22:08:54 +0000 (15:08 -0700)
author Rob Clark <robdclark@chromium.org>
Thu, 26 Sep 2019 17:32:00 +0000 (10:32 -0700)
committer Rob Clark <robdclark@chromium.org>
Fri, 18 Oct 2019 22:08:54 +0000 (15:08 -0700)
diff --git a/src/compiler/glsl/gl_nir_lower_buffers.c b/src/compiler/glsl/gl_nir_lower_buffers.c

index c7b5858..59bb582 100644 (file)
--- a/src/compiler/glsl/gl_nir_lower_buffers.c
+++ b/src/compiler/glsl/gl_nir_lower_buffers.c
@@ -59,7 +59,7 @@ get_block_array_index(nir_builder *b, nir_deref_instr *deref,
        } else {
           nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1);
           arr_index = nir_umin(b, arr_index, nir_imm_int(b, arr_size - 1));
-         nir_ssa_def *arr_offset = nir_imul_imm(b, arr_index, array_elements);
+         nir_ssa_def *arr_offset = nir_amul_imm(b, arr_index, array_elements);
           if (nonconst_index)
              nonconst_index = nir_iadd(b, nonconst_index, arr_offset);
           else
diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h

index cad0e13..5714ea0 100644 (file)
--- a/src/compiler/nir/nir_builder.h
+++ b/src/compiler/nir/nir_builder.h
@@ -646,7 +646,7 @@ nir_iadd_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)
  }
  
  static inline nir_ssa_def *
-nir_imul_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)
+_nir_mul_imm(nir_builder *build, nir_ssa_def *x, uint64_t y, bool amul)
  {
     assert(x->bit_size <= 64);
     if (x->bit_size < 64)
@@ -658,12 +658,26 @@ nir_imul_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)
        return x;
     } else if (util_is_power_of_two_or_zero64(y)) {
        return nir_ishl(build, x, nir_imm_int(build, ffsll(y) - 1));
+   } else if (amul) {
+      return nir_amul(build, x, nir_imm_intN_t(build, y, x->bit_size));
     } else {
        return nir_imul(build, x, nir_imm_intN_t(build, y, x->bit_size));
     }
  }
  
  static inline nir_ssa_def *
+nir_imul_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)
+{
+   return _nir_mul_imm(build, x, y, false);
+}
+
+static inline nir_ssa_def *
+nir_amul_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)
+{
+   return _nir_mul_imm(build, x, y, true);
+}
+
+static inline nir_ssa_def *
  nir_fadd_imm(nir_builder *build, nir_ssa_def *x, double y)
  {
     return nir_fadd(build, x, nir_imm_floatN_t(build, y, x->bit_size));
diff --git a/src/compiler/nir/nir_deref.c b/src/compiler/nir/nir_deref.c

index f7a07c6..7ec4601 100644 (file)
--- a/src/compiler/nir/nir_deref.c
+++ b/src/compiler/nir/nir_deref.c
@@ -297,7 +297,7 @@ nir_build_deref_offset(nir_builder *b, nir_deref_instr *deref,
        if ((*p)->deref_type == nir_deref_type_array) {
           nir_ssa_def *index = nir_ssa_for_src(b, (*p)->arr.index, 1);
           int stride = type_get_array_stride((*p)->type, size_align);
-         offset = nir_iadd(b, offset, nir_imul_imm(b, index, stride));
+         offset = nir_iadd(b, offset, nir_amul_imm(b, index, stride));
        } else if ((*p)->deref_type == nir_deref_type_struct) {
           /* p starts at path[1], so this is safe */
           nir_deref_instr *parent = *(p - 1);
diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c

index 8e20f5e..6f257d9 100644 (file)
--- a/src/compiler/nir/nir_lower_io.c
+++ b/src/compiler/nir/nir_lower_io.c
@@ -206,7 +206,7 @@ get_io_offset(nir_builder *b, nir_deref_instr *deref,
           unsigned size = type_size((*p)->type, bts);
  
           nir_ssa_def *mul =
-            nir_imul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size);
+            nir_amul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size);
  
           offset = nir_iadd(b, offset, mul);
        } else if ((*p)->deref_type == nir_deref_type_struct) {
@@ -1094,7 +1094,7 @@ nir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref,
        nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
        index = nir_i2i(b, index, base_addr->bit_size);
        return build_addr_iadd(b, base_addr, addr_format,
-                                nir_imul_imm(b, index, stride));
+                                nir_amul_imm(b, index, stride));
     }
  
     case nir_deref_type_ptr_as_array: {
@@ -1102,7 +1102,7 @@ nir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref,
        index = nir_i2i(b, index, base_addr->bit_size);
        unsigned stride = nir_deref_instr_ptr_as_array_stride(deref);
        return build_addr_iadd(b, base_addr, addr_format,
-                                nir_imul_imm(b, index, stride));
+                                nir_amul_imm(b, index, stride));
     }
  
     case nir_deref_type_array_wildcard:
diff --git a/src/compiler/nir/nir_lower_io_to_vector.c b/src/compiler/nir/nir_lower_io_to_vector.c

index 84bd941..0ef4021 100644 (file)
--- a/src/compiler/nir/nir_lower_io_to_vector.c
+++ b/src/compiler/nir/nir_lower_io_to_vector.c
@@ -319,7 +319,7 @@ build_array_index(nir_builder *b, nir_deref_instr *deref, nir_ssa_def *base,
                                     deref->dest.ssa.bit_size);
        return nir_iadd(
           b, build_array_index(b, nir_deref_instr_parent(deref), base, vs_in),
-         nir_imul_imm(b, index, glsl_count_attribute_slots(deref->type, vs_in)));
+         nir_amul_imm(b, index, glsl_count_attribute_slots(deref->type, vs_in)));
     }
     default:
        unreachable("Invalid deref instruction type");
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py

index 4bb21ea..e74e07b 100644 (file)
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -1047,6 +1047,18 @@ dst.z = src2.x;
  dst.w = src3.x;
  """)
  
+# An integer multiply instruction for address calculation.  This is
+# similar to imul, except that the results are undefined in case of
+# overflow.  Overflow is defined according to the size of the variable
+# being dereferenced.
+#
+# This relaxed definition, compared to imul, allows an optimization
+# pass to propagate bounds (ie, from an load/store intrinsic) to the
+# sources, such that lower precision integer multiplies can be used.
+# This is useful on hw that has 24b or perhaps 16b integer multiply
+# instructions.
+binop("amul", tint, _2src_commutative + associative, "src0 * src1")
+
  # ir3-specific instruction that maps directly to mul-add shift high mix,
  # (IMADSH_MIX16 i.e. ah * bl << 16 + c). It is used for lowering integer
  # multiplication (imul) on Freedreno backend..
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py

index 2e4d792..7bac217 100644 (file)
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -1112,6 +1112,11 @@ optimizations.extend([
  
     (('isign', a), ('imin', ('imax', a, -1), 1), 'options->lower_isign'),
     (('fsign', a), ('fsub', ('b2f', ('flt', 0.0, a)), ('b2f', ('flt', a, 0.0))), 'options->lower_fsign'),
+
+   # Address/offset calculations:
+   # for now, unconditionally convert amul to imul, this will
+   # change in the following patch
+   (('amul', a, b), ('imul', a, b)),
  ])
  
  # bit_size dependent lowerings
author	Rob Clark <robdclark@chromium.org>
	Thu, 26 Sep 2019 17:32:00 +0000 (10:32 -0700)
committer	Rob Clark <robdclark@chromium.org>
	Fri, 18 Oct 2019 22:08:54 +0000 (15:08 -0700)
src/compiler/glsl/gl_nir_lower_buffers.c		patch \| blob \| history
src/compiler/nir/nir_builder.h		patch \| blob \| history
src/compiler/nir/nir_deref.c		patch \| blob \| history
src/compiler/nir/nir_lower_io.c		patch \| blob \| history
src/compiler/nir/nir_lower_io_to_vector.c		patch \| blob \| history
src/compiler/nir/nir_opcodes.py		patch \| blob \| history
src/compiler/nir/nir_opt_algebraic.py		patch \| blob \| history