OSDN Git Service

nir: Add scoped_memory_barrier intrinsic
authorCaio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Thu, 18 Jul 2019 23:14:03 +0000 (16:14 -0700)
committerCaio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Thu, 24 Oct 2019 18:39:55 +0000 (11:39 -0700)
Add a NIR instrinsic that represent a memory barrier in SPIR-V /
Vulkan Memory Model, with extra attributes that describe the barrier:

- Ordering: whether is an Acquire or Release;
- "Cache control": availability ("ensure this gets written in the memory")
  and visibility ("ensure my cache is up to date when I'm reading");
- Variable modes: which memory types this barrier applies to;
- Scope: how far this barrier applies.

Note that unlike in SPIR-V, the "Storage Semantics" and the "Memory
Semantics" are split into two different attributes so we can use
variable modes for the former.

NIR passes that took barriers in consideration were also changed

- nir_opt_copy_prop_vars: clean up the values for the mode of an
  ACQUIRE barrier.  Copy propagation effect is to "pull up a load" (by
  not performing it), which is what ACQUIRE restricts.

- nir_opt_dead_write_vars and nir_opt_combine_writes: clean up the
  pending writes for the modes of an RELEASE barrier.  Dead writes
  effect is to "push down a store", which is what RELEASE restricts.

- nir_opt_access: treat the ACQUIRE and RELEASE as a full barrier for
  the modes.  This is conservative, but since this is a GL-specific
  pass, doesn't make a difference for now.

v2: Fix the scoped barrier handling in copy propagation.  (Jason)
    Add scoped barrier handling to nir_opt_access and
    nir_opt_combine_writes.  (Rhys)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/compiler/nir/nir.h
src/compiler/nir/nir_intrinsics.py
src/compiler/nir/nir_opt_access.c
src/compiler/nir/nir_opt_combine_stores.c
src/compiler/nir/nir_opt_copy_prop_vars.c
src/compiler/nir/nir_opt_dead_write_vars.c
src/compiler/nir/nir_print.c

index 6496ad9..f5f9826 100644 (file)
@@ -1380,6 +1380,24 @@ nir_intrinsic_get_var(nir_intrinsic_instr *intrin, unsigned i)
    return nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[i]));
 }
 
+typedef enum {
+   /* Memory ordering. */
+   NIR_MEMORY_ACQUIRE        = 1 << 0,
+   NIR_MEMORY_RELEASE        = 1 << 1,
+
+   /* Memory visibility operations. */
+   NIR_MEMORY_MAKE_AVAILABLE = 1 << 3,
+   NIR_MEMORY_MAKE_VISIBLE   = 1 << 4,
+} nir_memory_semantics;
+
+typedef enum {
+   NIR_SCOPE_DEVICE,
+   NIR_SCOPE_QUEUE_FAMILY,
+   NIR_SCOPE_WORKGROUP,
+   NIR_SCOPE_SUBGROUP,
+   NIR_SCOPE_INVOCATION,
+} nir_scope;
+
 /**
  * \name NIR intrinsics semantic flags
  *
@@ -1529,6 +1547,21 @@ typedef enum {
    /* Driver location for nir_load_patch_location_ir3 */
    NIR_INTRINSIC_DRIVER_LOCATION,
 
+   /**
+    * Mask of nir_memory_semantics, includes ordering and visibility.
+    */
+   NIR_INTRINSIC_MEMORY_SEMANTICS,
+
+   /**
+    * Mask of nir_variable_modes affected by the memory operation.
+    */
+   NIR_INTRINSIC_MEMORY_MODES,
+
+   /**
+    * Value of nir_scope.
+    */
+   NIR_INTRINSIC_MEMORY_SCOPE,
+
    NIR_INTRINSIC_NUM_INDEX_FLAGS,
 
 } nir_intrinsic_index_flag;
@@ -1638,6 +1671,9 @@ INTRINSIC_IDX_ACCESSORS(desc_type, DESC_TYPE, unsigned)
 INTRINSIC_IDX_ACCESSORS(type, TYPE, nir_alu_type)
 INTRINSIC_IDX_ACCESSORS(swizzle_mask, SWIZZLE_MASK, unsigned)
 INTRINSIC_IDX_ACCESSORS(driver_location, DRIVER_LOCATION, unsigned)
+INTRINSIC_IDX_ACCESSORS(memory_semantics, MEMORY_SEMANTICS, nir_memory_semantics)
+INTRINSIC_IDX_ACCESSORS(memory_modes, MEMORY_MODES, nir_variable_mode)
+INTRINSIC_IDX_ACCESSORS(memory_scope, MEMORY_SCOPE, nir_scope)
 
 static inline void
 nir_intrinsic_set_align(nir_intrinsic_instr *intrin,
index a648995..02c781e 100644 (file)
@@ -126,6 +126,12 @@ TYPE = "NIR_INTRINSIC_TYPE"
 SWIZZLE_MASK = "NIR_INTRINSIC_SWIZZLE_MASK"
 # Driver location of attribute
 DRIVER_LOCATION = "NIR_INTRINSIC_DRIVER_LOCATION"
+# Ordering and visibility of a memory operation
+MEMORY_SEMANTICS = "NIR_INTRINSIC_MEMORY_SEMANTICS"
+# Modes affected by a memory operation
+MEMORY_MODES = "NIR_INTRINSIC_MEMORY_MODES"
+# Scope of a memory operation
+MEMORY_SCOPE = "NIR_INTRINSIC_MEMORY_SCOPE"
 
 #
 # Possible flags:
@@ -206,6 +212,12 @@ intrinsic("is_helper_invocation", dest_comp=1, flags=[CAN_ELIMINATE])
 # intrinsic.
 barrier("memory_barrier")
 
+# Memory barrier with explicit scope.  Follows the semantics of SPIR-V
+# OpMemoryBarrier, used to implement Vulkan Memory Model.  Storage that the
+# barrierr applies is represented using NIR variable modes.
+intrinsic("scoped_memory_barrier",
+          indices=[MEMORY_SEMANTICS, MEMORY_MODES, MEMORY_SCOPE])
+
 # Shader clock intrinsic with semantics analogous to the clock2x32ARB()
 # GLSL intrinsic.
 # The latter can be used as code motion barrier, which is currently not
index ec316a8..766f056 100644 (file)
@@ -134,6 +134,16 @@ gather_intrinsic(struct access_state *state, nir_intrinsic_instr *instr)
       state->image_barriers = true;
       break;
 
+   case nir_intrinsic_scoped_memory_barrier:
+      /* TODO: Could be more granular if we had nir_var_mem_image. */
+      if (nir_intrinsic_memory_modes(instr) & (nir_var_mem_ubo |
+                                               nir_var_mem_ssbo |
+                                               nir_var_uniform)) {
+         state->buffer_barriers = true;
+         state->image_barriers = true;
+      }
+      break;
+
    default:
       break;
    }
index 48b9cfa..b3e5cb3 100644 (file)
@@ -316,6 +316,13 @@ combine_stores_block(struct combine_stores_state *state, nir_block *block)
                                               nir_var_mem_shared);
          break;
 
+      case nir_intrinsic_scoped_memory_barrier:
+         if (nir_intrinsic_memory_semantics(intrin) & NIR_MEMORY_RELEASE) {
+            combine_stores_with_modes(state,
+                                      nir_intrinsic_memory_modes(intrin));
+         }
+         break;
+
       case nir_intrinsic_emit_vertex:
       case nir_intrinsic_emit_vertex_with_counter:
          combine_stores_with_modes(state, nir_var_shader_out);
index c65beb2..c4544ea 100644 (file)
@@ -171,6 +171,11 @@ gather_vars_written(struct copy_prop_var_state *state,
                               nir_var_mem_shared;
             break;
 
+         case nir_intrinsic_scoped_memory_barrier:
+            if (nir_intrinsic_memory_semantics(intrin) & NIR_MEMORY_ACQUIRE)
+               written->modes |= nir_intrinsic_memory_modes(intrin);
+            break;
+
          case nir_intrinsic_emit_vertex:
          case nir_intrinsic_emit_vertex_with_counter:
             written->modes = nir_var_shader_out;
@@ -802,6 +807,13 @@ copy_prop_vars_block(struct copy_prop_var_state *state,
                                          nir_var_mem_shared);
          break;
 
+      case nir_intrinsic_scoped_memory_barrier:
+         if (debug) dump_instr(instr);
+
+         if (nir_intrinsic_memory_semantics(intrin) & NIR_MEMORY_ACQUIRE)
+            apply_barrier_for_modes(copies, nir_intrinsic_memory_modes(intrin));
+         break;
+
       case nir_intrinsic_emit_vertex:
       case nir_intrinsic_emit_vertex_with_counter:
          if (debug) dump_instr(instr);
index d2062a0..201e084 100644 (file)
@@ -139,6 +139,14 @@ remove_dead_write_vars_local(void *mem_ctx, nir_block *block)
          break;
       }
 
+      case nir_intrinsic_scoped_memory_barrier: {
+         if (nir_intrinsic_memory_semantics(intrin) & NIR_MEMORY_RELEASE) {
+            clear_unused_for_modes(&unused_writes,
+                                   nir_intrinsic_memory_modes(intrin));
+         }
+         break;
+      }
+
       case nir_intrinsic_emit_vertex:
       case nir_intrinsic_emit_vertex_with_counter: {
          clear_unused_for_modes(&unused_writes, nir_var_shader_out);
index 8408fa2..ca9dab7 100644 (file)
@@ -801,6 +801,9 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
       [NIR_INTRINSIC_TYPE] = "type",
       [NIR_INTRINSIC_SWIZZLE_MASK] = "swizzle_mask",
       [NIR_INTRINSIC_DRIVER_LOCATION] = "driver_location",
+      [NIR_INTRINSIC_MEMORY_SEMANTICS] = "mem_semantics",
+      [NIR_INTRINSIC_MEMORY_MODES] = "mem_modes",
+      [NIR_INTRINSIC_MEMORY_SCOPE] = "mem_scope",
    };
    for (unsigned idx = 1; idx < NIR_INTRINSIC_NUM_INDEX_FLAGS; idx++) {
       if (!info->index_map[idx])
@@ -887,6 +890,42 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
          break;
       }
 
+      case NIR_INTRINSIC_MEMORY_SEMANTICS: {
+         nir_memory_semantics semantics = nir_intrinsic_memory_semantics(instr);
+         fprintf(fp, " mem_semantics=");
+         switch (semantics & (NIR_MEMORY_ACQUIRE | NIR_MEMORY_RELEASE)) {
+         case 0:                  fprintf(fp, "NONE");    break;
+         case NIR_MEMORY_ACQUIRE: fprintf(fp, "ACQ");     break;
+         case NIR_MEMORY_RELEASE: fprintf(fp, "REL");     break;
+         default:                 fprintf(fp, "ACQ|REL"); break;
+         }
+         if (semantics & (NIR_MEMORY_MAKE_AVAILABLE)) fprintf(fp, "|AVAILABLE");
+         if (semantics & (NIR_MEMORY_MAKE_VISIBLE))   fprintf(fp, "|VISIBLE");
+         break;
+      }
+
+      case NIR_INTRINSIC_MEMORY_MODES: {
+         fprintf(fp, " mem_modes=");
+         unsigned int modes = nir_intrinsic_memory_modes(instr);
+         while (modes) {
+            nir_variable_mode m = u_bit_scan(&modes);
+            fprintf(fp, "%s%s", get_variable_mode_str(1 << m, true), modes ? "|" : "");
+         }
+         break;
+      }
+
+      case NIR_INTRINSIC_MEMORY_SCOPE: {
+         fprintf(fp, " mem_scope=");
+         switch (nir_intrinsic_memory_scope(instr)) {
+         case NIR_SCOPE_DEVICE:       fprintf(fp, "DEVICE");       break;
+         case NIR_SCOPE_QUEUE_FAMILY: fprintf(fp, "QUEUE_FAMILY"); break;
+         case NIR_SCOPE_WORKGROUP:    fprintf(fp, "WORKGROUP");    break;
+         case NIR_SCOPE_SUBGROUP:     fprintf(fp, "SUBGROUP");     break;
+         case NIR_SCOPE_INVOCATION:   fprintf(fp, "INVOCATION");   break;
+         }
+         break;
+      }
+
       default: {
          unsigned off = info->index_map[idx] - 1;
          assert(index_name[idx]);  /* forgot to update index_name table? */