From 8ba0c025a4e0aba97ae596e2121416cf04c0c300 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 24 Apr 2011 20:20:16 -0700 Subject: [PATCH] i965/gen4: Move clip state to state streaming Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_clip_state.c | 157 ++++++++++------------------- src/mesa/drivers/dri/i965/brw_context.h | 4 +- src/mesa/drivers/dri/i965/brw_misc_state.c | 4 +- src/mesa/drivers/dri/i965/brw_vtbl.c | 1 - 4 files changed, 60 insertions(+), 106 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index 60fd5fa7d9e..6015c8cbe9f 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -33,148 +33,101 @@ #include "brw_state.h" #include "brw_defines.h" -struct brw_clip_unit_key { - unsigned int total_grf; - unsigned int urb_entry_read_length; - unsigned int curb_entry_read_length; - unsigned int clip_mode; - - unsigned int curbe_offset; - - unsigned int nr_urb_entries, urb_size; - - GLboolean depth_clamp; -}; - static void -clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key) -{ - struct gl_context *ctx = &brw->intel.ctx; - memset(key, 0, sizeof(*key)); - - /* CACHE_NEW_CLIP_PROG */ - key->total_grf = brw->clip.prog_data->total_grf; - key->urb_entry_read_length = brw->clip.prog_data->urb_read_length; - key->curb_entry_read_length = brw->clip.prog_data->curb_read_length; - key->clip_mode = brw->clip.prog_data->clip_mode; - - /* BRW_NEW_CURBE_OFFSETS */ - key->curbe_offset = brw->curbe.clip_start; - - /* BRW_NEW_URB_FENCE */ - key->nr_urb_entries = brw->urb.nr_clip_entries; - key->urb_size = brw->urb.vsize; - - /* _NEW_TRANSOFORM */ - key->depth_clamp = ctx->Transform.DepthClamp; -} - -static drm_intel_bo * -clip_unit_create_from_key(struct brw_context *brw, - struct brw_clip_unit_key *key) +brw_prepare_clip_unit(struct brw_context *brw) { struct intel_context *intel = &brw->intel; - struct brw_clip_unit_state clip; - drm_intel_bo *bo; + struct gl_context *ctx = &intel->ctx; + struct brw_clip_unit_state *clip; - memset(&clip, 0, sizeof(clip)); + clip = brw_state_batch(brw, sizeof(*clip), 32, &brw->clip.state_offset); + memset(clip, 0, sizeof(*clip)); - clip.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; + /* CACHE_NEW_CLIP_PROG */ + clip->thread0.grf_reg_count = (ALIGN(brw->clip.prog_data->total_grf, 16) / + 16 - 1); /* reloc */ - clip.thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6; + clip->thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6; - clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; - clip.thread1.single_program_flow = 1; + clip->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + clip->thread1.single_program_flow = 1; - clip.thread3.urb_entry_read_length = key->urb_entry_read_length; - clip.thread3.const_urb_entry_read_length = key->curb_entry_read_length; - clip.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; - clip.thread3.dispatch_grf_start_reg = 1; - clip.thread3.urb_entry_read_offset = 0; + clip->thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length; + clip->thread3.const_urb_entry_read_length = + brw->clip.prog_data->curb_read_length; + + /* BRW_NEW_CURBE_OFFSETS */ + clip->thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2; + clip->thread3.dispatch_grf_start_reg = 1; + clip->thread3.urb_entry_read_offset = 0; - clip.thread4.nr_urb_entries = key->nr_urb_entries; - clip.thread4.urb_entry_allocation_size = key->urb_size - 1; + /* BRW_NEW_URB_FENCE */ + clip->thread4.nr_urb_entries = brw->urb.nr_clip_entries; + clip->thread4.urb_entry_allocation_size = brw->urb.vsize - 1; /* If we have enough clip URB entries to run two threads, do so. */ - if (key->nr_urb_entries >= 10) { + if (brw->urb.nr_clip_entries >= 10) { /* Half of the URB entries go to each thread, and it has to be an * even number. */ - assert(key->nr_urb_entries % 2 == 0); + assert(brw->urb.nr_clip_entries % 2 == 0); /* Although up to 16 concurrent Clip threads are allowed on Ironlake, * only 2 threads can output VUEs at a time. */ if (intel->gen == 5) - clip.thread4.max_threads = 16 - 1; + clip->thread4.max_threads = 16 - 1; else - clip.thread4.max_threads = 2 - 1; + clip->thread4.max_threads = 2 - 1; } else { - assert(key->nr_urb_entries >= 5); - clip.thread4.max_threads = 1 - 1; + assert(brw->urb.nr_clip_entries >= 5); + clip->thread4.max_threads = 1 - 1; } if (unlikely(INTEL_DEBUG & DEBUG_SINGLE_THREAD)) - clip.thread4.max_threads = 0; + clip->thread4.max_threads = 0; if (unlikely(INTEL_DEBUG & DEBUG_STATS)) - clip.thread4.stats_enable = 1; - - clip.clip5.userclip_enable_flags = 0x7f; - clip.clip5.userclip_must_clip = 1; - clip.clip5.guard_band_enable = 0; - if (!key->depth_clamp) - clip.clip5.viewport_z_clip_enable = 1; - clip.clip5.viewport_xy_clip_enable = 1; - clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE; - clip.clip5.api_mode = BRW_CLIP_API_OGL; - clip.clip5.clip_mode = key->clip_mode; + clip->thread4.stats_enable = 1; - if (intel->is_g4x) - clip.clip5.negative_w_clip_test = 1; + clip->clip5.userclip_enable_flags = 0x7f; + clip->clip5.userclip_must_clip = 1; + clip->clip5.guard_band_enable = 0; + /* _NEW_TRANSOFORM */ + if (!ctx->Transform.DepthClamp) + clip->clip5.viewport_z_clip_enable = 1; + clip->clip5.viewport_xy_clip_enable = 1; + clip->clip5.vertex_position_space = BRW_CLIP_NDCSPACE; + clip->clip5.api_mode = BRW_CLIP_API_OGL; + clip->clip5.clip_mode = brw->clip.prog_data->clip_mode; - clip.clip6.clipper_viewport_state_ptr = 0; - clip.viewport_xmin = -1; - clip.viewport_xmax = 1; - clip.viewport_ymin = -1; - clip.viewport_ymax = 1; + if (intel->is_g4x) + clip->clip5.negative_w_clip_test = 1; - bo = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT, - key, sizeof(*key), - &brw->clip.prog_bo, 1, - &clip, sizeof(clip)); + clip->clip6.clipper_viewport_state_ptr = 0; + clip->viewport_xmin = -1; + clip->viewport_xmax = 1; + clip->viewport_ymin = -1; + clip->viewport_ymax = 1; /* Emit clip program relocation */ assert(brw->clip.prog_bo); - drm_intel_bo_emit_reloc(bo, offsetof(struct brw_clip_unit_state, thread0), - brw->clip.prog_bo, clip.thread0.grf_reg_count << 1, + drm_intel_bo_emit_reloc(intel->batch.bo, + (brw->clip.state_offset + + offsetof(struct brw_clip_unit_state, thread0)), + brw->clip.prog_bo, clip->thread0.grf_reg_count << 1, I915_GEM_DOMAIN_INSTRUCTION, 0); - return bo; -} - -static void upload_clip_unit( struct brw_context *brw ) -{ - struct brw_clip_unit_key key; - - clip_unit_populate_key(brw, &key); - - drm_intel_bo_unreference(brw->clip.state_bo); - brw->clip.state_bo = brw_search_cache(&brw->cache, BRW_CLIP_UNIT, - &key, sizeof(key), - &brw->clip.prog_bo, 1, - NULL); - if (brw->clip.state_bo == NULL) { - brw->clip.state_bo = clip_unit_create_from_key(brw, &key); - } + brw->state.dirty.cache |= CACHE_NEW_CLIP_UNIT; } const struct brw_tracked_state brw_clip_unit = { .dirty = { .mesa = _NEW_TRANSFORM, - .brw = (BRW_NEW_CURBE_OFFSETS | + .brw = (BRW_NEW_BATCH | + BRW_NEW_CURBE_OFFSETS | BRW_NEW_URB_FENCE), .cache = CACHE_NEW_CLIP_PROG }, - .prepare = upload_clip_unit, + .prepare = brw_prepare_clip_unit, }; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index a81d6157390..4b97bfb2ac5 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -655,7 +655,9 @@ struct brw_context struct brw_clip_prog_data *prog_data; drm_intel_bo *prog_bo; - drm_intel_bo *state_bo; + + /* Offset in the batch to the CLIP state on pre-gen6. */ + uint32_t state_offset; /* As of gen6, this is the offset in the batch to the CLIP VP, * instead of vp_bo. diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index b61a7ad099e..3552cce62ad 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -149,7 +149,8 @@ static void upload_pipelined_state_pointers(struct brw_context *brw ) OUT_RELOC(brw->gs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); else OUT_BATCH(0); - OUT_RELOC(brw->clip.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, + brw->clip.state_offset | 1); OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, brw->sf.state_offset); OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, @@ -165,7 +166,6 @@ static void upload_pipelined_state_pointers(struct brw_context *brw ) static void prepare_psp_urb_cbs(struct brw_context *brw) { brw_add_validated_bo(brw, brw->gs.state_bo); - brw_add_validated_bo(brw, brw->clip.state_bo); } static void upload_psp_urb_cbs(struct brw_context *brw ) diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index 8d1497c8215..49d771133bf 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -80,7 +80,6 @@ static void brw_destroy_context( struct intel_context *intel ) dri_bo_release(&brw->gs.prog_bo); dri_bo_release(&brw->gs.state_bo); dri_bo_release(&brw->clip.prog_bo); - dri_bo_release(&brw->clip.state_bo); dri_bo_release(&brw->sf.prog_bo); dri_bo_release(&brw->wm.prog_bo); dri_bo_release(&brw->wm.const_bo); -- 2.11.0