* sizeof(float))
/** \} */
-
-/**
- * Compute masks to determine how much of draw_x and draw_y should be
- * performed using the fine adjustment of "depth coordinate offset X/Y"
- * (dw5 of 3DSTATE_DEPTH_BUFFER). See the emit_depthbuffer() function for
- * details.
- */
-void
-gen6_blorp_compute_tile_masks(const brw_blorp_params *params,
- uint32_t *tile_mask_x, uint32_t *tile_mask_y)
-{
- uint32_t depth_mask_x, depth_mask_y, hiz_mask_x, hiz_mask_y;
- intel_region_get_tile_masks(params->depth.mt->region,
- &depth_mask_x, &depth_mask_y);
- intel_region_get_tile_masks(params->depth.mt->hiz_mt->region,
- &hiz_mask_x, &hiz_mask_y);
-
- /* Each HiZ row represents 2 rows of pixels */
- hiz_mask_y = hiz_mask_y << 1 | 1;
-
- *tile_mask_x = depth_mask_x | hiz_mask_x;
- *tile_mask_y = depth_mask_y | hiz_mask_y;
-}
-
-
void
gen6_blorp_emit_batch_head(struct brw_context *brw,
const brw_blorp_params *params)
{
struct gl_context *ctx = &brw->intel.ctx;
- struct intel_context *intel = &brw->intel;
/* To ensure that the batch contains only the resolve, flush the batch
* before beginning and after finishing emitting the resolve packets.
- *
- * Ideally, we would not need to flush for the resolve op. But, I suspect
- * that it's unsafe for CMD_PIPELINE_SELECT to occur multiple times in
- * a single batch, and there is no safe way to ensure that other than by
- * fencing the resolve with flushes. Ideally, we would just detect if
- * a batch is in progress and do the right thing, but that would require
- * the ability to *safely* access brw_context::state::dirty::brw
- * outside of the brw_upload_state() codepath.
*/
intel_flush(ctx);
+}
- /* CMD_PIPELINE_SELECT
- *
- * Select the 3D pipeline, as opposed to the media pipeline.
- */
- {
- BEGIN_BATCH(1);
- OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16);
- ADVANCE_BATCH();
- }
- gen6_emit_3dstate_multisample(brw, params->num_samples);
- gen6_emit_3dstate_sample_mask(brw, params->num_samples);
+/**
+ * CMD_STATE_BASE_ADDRESS
+ *
+ * From the Sandy Bridge PRM, Volume 1, Part 1, Table STATE_BASE_ADDRESS:
+ * The following commands must be reissued following any change to the
+ * base addresses:
+ * 3DSTATE_CC_POINTERS
+ * 3DSTATE_BINDING_TABLE_POINTERS
+ * 3DSTATE_SAMPLER_STATE_POINTERS
+ * 3DSTATE_VIEWPORT_STATE_POINTERS
+ * MEDIA_STATE_POINTERS
+ */
+void
+gen6_blorp_emit_state_base_address(struct brw_context *brw,
+ const brw_blorp_params *params)
+{
+ struct intel_context *intel = &brw->intel;
- /* CMD_STATE_BASE_ADDRESS
- *
- * From the Sandy Bridge PRM, Volume 1, Part 1, Table STATE_BASE_ADDRESS:
- * The following commands must be reissued following any change to the
- * base addresses:
- * 3DSTATE_CC_POINTERS
- * 3DSTATE_BINDING_TABLE_POINTERS
- * 3DSTATE_SAMPLER_STATE_POINTERS
- * 3DSTATE_VIEWPORT_STATE_POINTERS
- * MEDIA_STATE_POINTERS
- */
- {
- BEGIN_BATCH(10);
- OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
- OUT_BATCH(1); /* GeneralStateBaseAddressModifyEnable */
- /* SurfaceStateBaseAddress */
- OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
- /* DynamicStateBaseAddress */
- OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER |
- I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
- OUT_BATCH(1); /* IndirectObjectBaseAddress */
- if (params->use_wm_prog) {
- OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
- 1); /* Instruction base address: shader kernels */
- } else {
- OUT_BATCH(1); /* InstructionBaseAddress */
- }
- OUT_BATCH(1); /* GeneralStateUpperBound */
- OUT_BATCH(1); /* DynamicStateUpperBound */
- OUT_BATCH(1); /* IndirectObjectUpperBound*/
- OUT_BATCH(1); /* InstructionAccessUpperBound */
- ADVANCE_BATCH();
+ BEGIN_BATCH(10);
+ OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
+ OUT_BATCH(1); /* GeneralStateBaseAddressModifyEnable */
+ /* SurfaceStateBaseAddress */
+ OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
+ /* DynamicStateBaseAddress */
+ OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER |
+ I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
+ OUT_BATCH(1); /* IndirectObjectBaseAddress */
+ if (params->use_wm_prog) {
+ OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
+ 1); /* Instruction base address: shader kernels */
+ } else {
+ OUT_BATCH(1); /* InstructionBaseAddress */
}
+ OUT_BATCH(1); /* GeneralStateUpperBound */
+ /* Dynamic state upper bound. Although the documentation says that
+ * programming it to zero will cause it to be ignored, that is a lie.
+ * If this isn't programmed to a real bound, the sampler border color
+ * pointer is rejected, causing border color to mysteriously fail.
+ */
+ OUT_BATCH(0xfffff001);
+ OUT_BATCH(1); /* IndirectObjectUpperBound*/
+ OUT_BATCH(1); /* InstructionAccessUpperBound */
+ ADVANCE_BATCH();
}
+
void
gen6_blorp_emit_vertices(struct brw_context *brw,
const brw_blorp_params *params)
float *vertex_data;
const float vertices[GEN6_BLORP_VBO_SIZE] = {
- /* v0 */ 0, 0, 0, 0, params->x0, params->y1, 0, 1,
- /* v1 */ 0, 0, 0, 0, params->x1, params->y1, 0, 1,
- /* v2 */ 0, 0, 0, 0, params->x0, params->y0, 0, 1,
+ /* v0 */ 0, 0, 0, 0, (float) params->x0, (float) params->y1, 0, 1,
+ /* v1 */ 0, 0, 0, 0, (float) params->x1, (float) params->y1, 0, 1,
+ /* v2 */ 0, 0, 0, 0, (float) params->x0, (float) params->y0, 0, 1,
};
vertex_data = (float *) brw_state_batch(brw, AUB_TRACE_VERTEX_BUFFER,
memset(blend, 0, sizeof(*blend));
- // TODO: handle other formats.
blend->blend1.pre_blend_clamp_enable = 1;
blend->blend1.post_blend_clamp_enable = 1;
blend->blend1.clamp_range = BRW_RENDERTARGET_CLAMPRANGE_FORMAT;
- blend->blend1.write_disable_r = false;
- blend->blend1.write_disable_g = false;
- blend->blend1.write_disable_b = false;
- blend->blend1.write_disable_a = false;
+ blend->blend1.write_disable_r = params->color_write_disable[0];
+ blend->blend1.write_disable_g = params->color_write_disable[1];
+ blend->blend1.write_disable_b = params->color_write_disable[2];
+ blend->blend1.write_disable_a = params->color_write_disable[3];
+
+ /* When blitting from an XRGB source to a ARGB destination, we need to
+ * interpret the missing channel as 1.0. Blending can do that for us:
+ * we simply use the RGB values from the fragment shader ("source RGB"),
+ * but smash the alpha channel to 1.
+ */
+ if (params->src.mt &&
+ _mesa_get_format_bits(params->dst.mt->format, GL_ALPHA_BITS) > 0 &&
+ _mesa_get_format_bits(params->src.mt->format, GL_ALPHA_BITS) == 0) {
+ blend->blend0.blend_enable = 1;
+ blend->blend0.ia_blend_enable = 1;
+
+ blend->blend0.blend_func = BRW_BLENDFUNCTION_ADD;
+ blend->blend0.ia_blend_func = BRW_BLENDFUNCTION_ADD;
+
+ blend->blend0.source_blend_factor = BRW_BLENDFACTOR_SRC_COLOR;
+ blend->blend0.dest_blend_factor = BRW_BLENDFACTOR_ZERO;
+ blend->blend0.ia_source_blend_factor = BRW_BLENDFACTOR_ONE;
+ blend->blend0.ia_dest_blend_factor = BRW_BLENDFACTOR_ZERO;
+ }
return cc_blend_state_offset;
}
uint32_t read_domains, uint32_t write_domain)
{
uint32_t wm_surf_offset;
- uint32_t width, height;
- surface->get_miplevel_dims(&width, &height);
- if (surface->num_samples > 0) { /* TODO: seems clumsy */
+ uint32_t width = surface->width;
+ uint32_t height = surface->height;
+ if (surface->num_samples > 1) {
+ /* Since gen6 uses INTEL_MSAA_LAYOUT_IMS, width and height are measured
+ * in samples. But SURFACE_STATE wants them in pixels, so we need to
+ * divide them each by 2.
+ */
width /= 2;
height /= 2;
}
- if (surface->map_stencil_as_y_tiled) {
- width *= 2;
- height /= 2;
- }
struct intel_region *region = surface->mt->region;
-
- /* TODO: handle other formats */
- uint32_t format = surface->map_stencil_as_y_tiled
- ? BRW_SURFACEFORMAT_R8_UNORM : BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+ uint32_t tile_x, tile_y;
uint32_t *surf = (uint32_t *)
brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
BRW_SURFACE_CUBEFACE_ENABLES |
- format << BRW_SURFACE_FORMAT_SHIFT);
+ surface->brw_surfaceformat << BRW_SURFACE_FORMAT_SHIFT);
/* reloc */
- surf[1] = region->bo->offset; /* No tile offsets needed */
+ surf[1] = (surface->compute_tile_offsets(&tile_x, &tile_y) +
+ region->bo->offset);
surf[2] = (0 << BRW_SURFACE_LOD_SHIFT |
(width - 1) << BRW_SURFACE_WIDTH_SHIFT |
uint32_t tiling = surface->map_stencil_as_y_tiled
? BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y
: brw_get_surface_tiling_bits(region->tiling);
- uint32_t pitch_bytes = region->pitch * region->cpp;
+ uint32_t pitch_bytes = region->pitch;
if (surface->map_stencil_as_y_tiled)
pitch_bytes *= 2;
surf[3] = (tiling |
surf[4] = brw_get_surface_num_multisamples(surface->num_samples);
- surf[5] = (0 << BRW_SURFACE_X_OFFSET_SHIFT |
- 0 << BRW_SURFACE_Y_OFFSET_SHIFT |
+ /* Note that the low bits of these fields are missing, so
+ * there's the possibility of getting in trouble.
+ */
+ assert(tile_x % 4 == 0);
+ assert(tile_y % 2 == 0);
+ surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
+ (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
(surface->mt->align_h == 4 ?
BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
intel_emit_post_sync_nonzero_flush(intel);
}
+ /* Disable the push constant buffers. */
+ BEGIN_BATCH(5);
+ OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (5 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+
BEGIN_BATCH(6);
OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
OUT_BATCH(0);
{
struct intel_context *intel = &brw->intel;
+ /* Disable all the constant buffers. */
+ BEGIN_BATCH(5);
+ OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (5 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
OUT_BATCH(0);
1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
0 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT);
OUT_BATCH(0); /* dw2 */
- OUT_BATCH(params->num_samples > 0 ? GEN6_SF_MSRAST_ON_PATTERN : 0);
+ OUT_BATCH(params->num_samples > 1 ? GEN6_SF_MSRAST_ON_PATTERN : 0);
for (int i = 0; i < 16; ++i)
OUT_BATCH(0);
ADVANCE_BATCH();
assert(0);
break;
}
- dw4 |= GEN6_WM_STATISTICS_ENABLE;
dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0;
dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5;
dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
dw5 |= GEN6_WM_DISPATCH_ENABLE; /* We are rendering */
}
- if (params->num_samples > 0) {
+ if (params->num_samples > 1) {
dw6 |= GEN6_WM_MSRAST_ON_PATTERN;
- dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL;
+ if (prog_data && prog_data->persample_msaa_dispatch)
+ dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE;
+ else
+ dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL;
} else {
dw6 |= GEN6_WM_MSRAST_OFF_PIXEL;
dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE;
ADVANCE_BATCH();
}
+static void
+gen6_blorp_emit_constant_ps_disable(struct brw_context *brw,
+ const brw_blorp_params *params)
+{
+ struct intel_context *intel = &brw->intel;
+
+ /* Disable the push constant buffers. */
+ BEGIN_BATCH(5);
+ OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (5 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+}
/**
* 3DSTATE_BINDING_TABLE_POINTERS
const brw_blorp_params *params)
{
struct intel_context *intel = &brw->intel;
- uint32_t draw_x, draw_y;
+ struct gl_context *ctx = &intel->ctx;
+ uint32_t draw_x = params->depth.x_offset;
+ uint32_t draw_y = params->depth.y_offset;
uint32_t tile_mask_x, tile_mask_y;
- gen6_blorp_compute_tile_masks(params, &tile_mask_x, &tile_mask_y);
- params->depth.get_draw_offsets(&draw_x, &draw_y);
+ brw_get_depthstencil_tile_masks(params->depth.mt,
+ params->depth.level,
+ params->depth.layer,
+ NULL,
+ &tile_mask_x, &tile_mask_y);
/* 3DSTATE_DEPTH_BUFFER */
{
- uint32_t width, height;
- params->depth.get_miplevel_dims(&width, &height);
-
uint32_t tile_x = draw_x & tile_mask_x;
uint32_t tile_y = draw_y & tile_mask_y;
uint32_t offset =
intel_region_get_aligned_offset(params->depth.mt->region,
draw_x & ~tile_mask_x,
- draw_y & ~tile_mask_y);
+ draw_y & ~tile_mask_y, false);
/* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
* (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
* tile_x and tile_y to 0. This is a temporary workaround until we come
* up with a better solution.
*/
+ WARN_ONCE((tile_x & 7) || (tile_y & 7),
+ "Depth/stencil buffer needs alignment to 8-pixel boundaries.\n"
+ "Truncating offset, bad rendering may occur.\n");
tile_x &= ~7;
tile_y &= ~7;
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
- uint32_t pitch_bytes =
- params->depth.mt->region->pitch * params->depth.mt->region->cpp;
- OUT_BATCH((pitch_bytes - 1) |
+ OUT_BATCH((params->depth.mt->region->pitch - 1) |
params->depth_format << 18 |
1 << 21 | /* separate stencil enable */
1 << 22 | /* hiz enable */
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
offset);
OUT_BATCH(BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1 |
- (width + tile_x - 1) << 6 |
- (height + tile_y - 1) << 19);
+ (params->depth.width + tile_x - 1) << 6 |
+ (params->depth.height + tile_y - 1) << 19);
OUT_BATCH(0);
OUT_BATCH(tile_x |
tile_y << 16);
uint32_t hiz_offset =
intel_region_get_aligned_offset(hiz_region,
draw_x & ~tile_mask_x,
- (draw_y & ~tile_mask_y) / 2);
+ (draw_y & ~tile_mask_y) / 2, false);
BEGIN_BATCH(3);
OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
- OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
+ OUT_BATCH(hiz_region->pitch - 1);
OUT_RELOC(hiz_region->bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
hiz_offset);
uint32_t prog_offset = params->get_wm_prog(brw, &prog_data);
gen6_blorp_emit_batch_head(brw, params);
+ gen6_emit_3dstate_multisample(brw, params->num_samples);
+ gen6_emit_3dstate_sample_mask(brw, params->num_samples, 1.0, false, ~0u);
+ gen6_blorp_emit_state_base_address(brw, params);
gen6_blorp_emit_vertices(brw, params);
gen6_blorp_emit_urb_config(brw, params);
if (params->use_wm_prog) {
depthstencil_offset, cc_state_offset);
if (params->use_wm_prog) {
uint32_t wm_surf_offset_renderbuffer;
- uint32_t wm_surf_offset_texture;
+ uint32_t wm_surf_offset_texture = 0;
uint32_t sampler_offset;
wm_push_const_offset = gen6_blorp_emit_wm_constants(brw, params);
wm_surf_offset_renderbuffer =
gen6_blorp_emit_surface_state(brw, params, ¶ms->dst,
I915_GEM_DOMAIN_RENDER,
I915_GEM_DOMAIN_RENDER);
- wm_surf_offset_texture =
- gen6_blorp_emit_surface_state(brw, params, ¶ms->src,
- I915_GEM_DOMAIN_SAMPLER, 0);
+ if (params->src.mt) {
+ wm_surf_offset_texture =
+ gen6_blorp_emit_surface_state(brw, params, ¶ms->src,
+ I915_GEM_DOMAIN_SAMPLER, 0);
+ }
wm_bind_bo_offset =
gen6_blorp_emit_binding_table(brw, params,
wm_surf_offset_renderbuffer,
gen6_blorp_emit_sf_config(brw, params);
if (params->use_wm_prog)
gen6_blorp_emit_constant_ps(brw, params, wm_push_const_offset);
+ else
+ gen6_blorp_emit_constant_ps_disable(brw, params);
gen6_blorp_emit_wm_config(brw, params, prog_offset, prog_data);
if (params->use_wm_prog)
gen6_blorp_emit_binding_table_pointers(brw, params, wm_bind_bo_offset);
gen6_blorp_emit_clear_params(brw, params);
gen6_blorp_emit_drawing_rectangle(brw, params);
gen6_blorp_emit_primitive(brw, params);
-
- /* See comments above at first invocation of intel_flush() in
- * gen6_blorp_emit_batch_head().
- */
- intel_flush(ctx);
-
- /* Be safe. */
- brw->state.dirty.brw = ~0;
- brw->state.dirty.cache = ~0;
}