2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "util/u_blitter.h"
29 #include "util/u_clear.h"
30 #include "util/u_pack_color.h"
31 #include "util/u_surface.h"
32 #include "intel_reg.h"
34 #include "ilo_context.h"
36 #include "ilo_resource.h"
37 #include "ilo_screen.h"
41 * From the Sandy Bridge PRM, volume 1 part 5, page 7:
43 * "The BLT engine is capable of transferring very large quantities of
44 * graphics data. Any graphics data read from and written to the
45 * destination is permitted to represent a number of pixels that occupies
46 * up to 65,536 scan lines and up to 32,768 bytes per scan line at the
47 * destination. The maximum number of pixels that may be represented per
48 * scan line's worth of graphics data depends on the color depth."
50 static const int gen6_max_bytes_per_scanline = 32768;
51 static const int gen6_max_scanlines = 65536;
54 ilo_blit_own_blt_ring(struct ilo_context *ilo)
56 ilo_cp_set_ring(ilo->cp, ILO_CP_RING_BLT);
57 ilo_cp_set_owner(ilo->cp, NULL, 0);
61 gen6_MI_FLUSH_DW(struct ilo_context *ilo)
63 const uint8_t cmd_len = 4;
64 struct ilo_cp *cp = ilo->cp;
66 ilo_cp_begin(cp, cmd_len);
67 ilo_cp_write(cp, MI_FLUSH_DW | (cmd_len - 2));
75 gen6_MI_LOAD_REGISTER_IMM(struct ilo_context *ilo, uint32_t reg, uint32_t val)
77 const uint8_t cmd_len = 3;
78 struct ilo_cp *cp = ilo->cp;
80 ilo_cp_begin(cp, cmd_len);
81 ilo_cp_write(cp, MI_LOAD_REGISTER_IMM | (cmd_len - 2));
82 ilo_cp_write(cp, reg);
83 ilo_cp_write(cp, val);
88 gen6_XY_COLOR_BLT(struct ilo_context *ilo, struct intel_bo *dst_bo,
89 uint32_t dst_offset, int16_t dst_pitch,
90 enum intel_tiling_mode dst_tiling,
91 int16_t x1, int16_t y1, int16_t x2, int16_t y2,
93 uint8_t rop, int cpp, bool write_alpha)
95 const uint8_t cmd_len = 6;
96 struct ilo_cp *cp = ilo->cp;
97 int dst_align, dst_pitch_shift;
100 dw0 = XY_COLOR_BLT_CMD | (cmd_len - 2);
102 if (dst_tiling == INTEL_TILING_NONE) {
109 dst_align = (dst_tiling == INTEL_TILING_Y) ? 128 : 512;
110 /* in dwords when tiled */
114 assert(cpp == 4 || cpp == 2 || cpp == 1);
115 assert((x2 - x1) * cpp < gen6_max_bytes_per_scanline);
116 assert(y2 - y1 < gen6_max_scanlines);
117 assert(dst_offset % dst_align == 0 && dst_pitch % dst_align == 0);
120 dst_pitch >> dst_pitch_shift;
124 dw0 |= XY_BLT_WRITE_RGB;
126 dw0 |= XY_BLT_WRITE_ALPHA;
138 ilo_cp_begin(cp, cmd_len);
139 ilo_cp_write(cp, dw0);
140 ilo_cp_write(cp, dw1);
141 ilo_cp_write(cp, y1 << 16 | x1);
142 ilo_cp_write(cp, y2 << 16 | x2);
143 ilo_cp_write_bo(cp, dst_offset, dst_bo,
144 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
145 ilo_cp_write(cp, color);
150 gen6_SRC_COPY_BLT(struct ilo_context *ilo, struct intel_bo *dst_bo,
151 uint32_t dst_offset, int16_t dst_pitch,
152 uint16_t width, uint16_t height,
153 struct intel_bo *src_bo,
154 uint32_t src_offset, int16_t src_pitch,
155 uint8_t rop, int cpp, bool write_alpha, bool dir_rtl)
157 const uint8_t cmd_len = 6;
158 struct ilo_cp *cp = ilo->cp;
161 assert(cpp == 4 || cpp == 2 || cpp == 1);
162 assert(width < gen6_max_bytes_per_scanline);
163 assert(height < gen6_max_scanlines);
164 /* offsets are naturally aligned and pitches are dword-aligned */
165 assert(dst_offset % cpp == 0 && dst_pitch % 4 == 0);
166 assert(src_offset % cpp == 0 && src_pitch % 4 == 0);
168 #ifndef SRC_COPY_BLT_CMD
169 #define SRC_COPY_BLT_CMD (CMD_2D | (0x43 << 22))
171 dw0 = SRC_COPY_BLT_CMD | (cmd_len - 2);
172 dw1 = rop << 16 | dst_pitch;
179 dw0 |= XY_BLT_WRITE_RGB;
181 dw0 |= XY_BLT_WRITE_ALPHA;
193 ilo_cp_begin(cp, cmd_len);
194 ilo_cp_write(cp, dw0);
195 ilo_cp_write(cp, dw1);
196 ilo_cp_write(cp, height << 16 | width);
197 ilo_cp_write_bo(cp, dst_offset, dst_bo, INTEL_DOMAIN_RENDER,
198 INTEL_DOMAIN_RENDER);
199 ilo_cp_write(cp, src_pitch);
200 ilo_cp_write_bo(cp, src_offset, src_bo, INTEL_DOMAIN_RENDER, 0);
205 gen6_XY_SRC_COPY_BLT(struct ilo_context *ilo, struct intel_bo *dst_bo,
206 uint32_t dst_offset, int16_t dst_pitch,
207 enum intel_tiling_mode dst_tiling,
208 int16_t x1, int16_t y1, int16_t x2, int16_t y2,
209 struct intel_bo *src_bo,
210 uint32_t src_offset, int16_t src_pitch,
211 enum intel_tiling_mode src_tiling,
212 int16_t src_x, int16_t src_y,
213 uint8_t rop, int cpp, bool write_alpha)
215 const uint8_t cmd_len = 8;
216 struct ilo_cp *cp = ilo->cp;
217 int dst_align, dst_pitch_shift;
218 int src_align, src_pitch_shift;
221 dw0 = XY_SRC_COPY_BLT_CMD | (cmd_len - 2);
223 if (dst_tiling == INTEL_TILING_NONE) {
230 dst_align = (dst_tiling == INTEL_TILING_Y) ? 128 : 512;
231 /* in dwords when tiled */
235 if (src_tiling == INTEL_TILING_NONE) {
242 src_align = (src_tiling == INTEL_TILING_Y) ? 128 : 512;
243 /* in dwords when tiled */
247 assert(cpp == 4 || cpp == 2 || cpp == 1);
248 assert((x2 - x1) * cpp < gen6_max_bytes_per_scanline);
249 assert(y2 - y1 < gen6_max_scanlines);
250 assert(dst_offset % dst_align == 0 && dst_pitch % dst_align == 0);
251 assert(src_offset % src_align == 0 && src_pitch % src_align == 0);
254 dst_pitch >> dst_pitch_shift;
258 dw0 |= XY_BLT_WRITE_RGB;
260 dw0 |= XY_BLT_WRITE_ALPHA;
272 ilo_cp_begin(cp, cmd_len);
273 ilo_cp_write(cp, dw0);
274 ilo_cp_write(cp, dw1);
275 ilo_cp_write(cp, y1 << 16 | x1);
276 ilo_cp_write(cp, y2 << 16 | x2);
277 ilo_cp_write_bo(cp, dst_offset, dst_bo, INTEL_DOMAIN_RENDER,
278 INTEL_DOMAIN_RENDER);
279 ilo_cp_write(cp, src_y << 16 | src_x);
280 ilo_cp_write(cp, src_pitch >> src_pitch_shift);
281 ilo_cp_write_bo(cp, src_offset, src_bo, INTEL_DOMAIN_RENDER, 0);
286 tex_copy_region(struct ilo_context *ilo,
287 struct ilo_texture *dst,
289 unsigned dst_x, unsigned dst_y, unsigned dst_z,
290 struct ilo_texture *src,
292 const struct pipe_box *src_box)
294 const struct util_format_description *desc =
295 util_format_description(dst->bo_format);
296 const unsigned max_extent = 32767; /* INT16_MAX */
297 const uint8_t rop = 0xcc; /* SRCCOPY */
298 struct intel_bo *aper_check[3];
300 int cpp, xscale, slice;
302 /* no W-tiling support */
303 if (dst->separate_s8 || src->separate_s8)
306 if (dst->bo_stride > max_extent || src->bo_stride > max_extent)
309 cpp = desc->block.bits / 8;
312 /* accommodate for larger cpp */
317 cpp = (cpp % 4 == 0) ? 4 : 2;
318 xscale = (desc->block.bits / 8) / cpp;
321 ilo_blit_own_blt_ring(ilo);
323 /* make room if necessary */
324 aper_check[0] = ilo->cp->bo;
325 aper_check[1] = dst->bo;
326 aper_check[2] = src->bo;
327 if (ilo->winsys->check_aperture_space(ilo->winsys, aper_check, 3))
328 ilo_cp_flush(ilo->cp);
332 if (dst->tiling == INTEL_TILING_Y) {
333 swctrl |= BCS_SWCTRL_DST_Y << 16 |
337 if (src->tiling == INTEL_TILING_Y) {
338 swctrl |= BCS_SWCTRL_SRC_Y << 16 |
344 * Most clients expect BLT engine to be stateless. If we have to set
345 * BCS_SWCTRL to a non-default value, we have to set it back in the same
348 if (ilo_cp_space(ilo->cp) < (4 + 3) * 2 + src_box->depth * 8)
349 ilo_cp_flush(ilo->cp);
351 ilo_cp_assert_no_implicit_flush(ilo->cp, true);
354 * From the Ivy Bridge PRM, volume 1 part 4, page 133:
356 * "SW is required to flush the HW before changing the polarity of
357 * this bit (Tile Y Destination/Source)."
359 gen6_MI_FLUSH_DW(ilo);
360 gen6_MI_LOAD_REGISTER_IMM(ilo, BCS_SWCTRL, swctrl);
362 swctrl &= ~(BCS_SWCTRL_DST_Y | BCS_SWCTRL_SRC_Y);
365 for (slice = 0; slice < src_box->depth; slice++) {
366 const struct ilo_texture_slice *dst_slice =
367 &dst->slice_offsets[dst_level][dst_z + slice];
368 const struct ilo_texture_slice *src_slice =
369 &src->slice_offsets[src_level][src_box->z + slice];
370 unsigned x1, y1, x2, y2, src_x, src_y;
372 x1 = (dst_slice->x + dst_x) * xscale;
373 y1 = dst_slice->y + dst_y;
374 x2 = (x1 + src_box->width) * xscale;
375 y2 = y1 + src_box->height;
376 src_x = (src_slice->x + src_box->x) * xscale;
377 src_y = src_slice->y + src_box->y;
379 x1 /= desc->block.width;
380 y1 /= desc->block.height;
381 x2 = (x2 + desc->block.width - 1) / desc->block.width;
382 y2 = (y2 + desc->block.height - 1) / desc->block.height;
383 src_x /= desc->block.width;
384 src_y /= desc->block.height;
386 if (x2 > max_extent || y2 > max_extent ||
387 src_x > max_extent || src_y > max_extent ||
388 (x2 - x1) * cpp > gen6_max_bytes_per_scanline)
391 gen6_XY_SRC_COPY_BLT(ilo,
392 dst->bo, 0, dst->bo_stride, dst->tiling,
394 src->bo, 0, src->bo_stride, src->tiling,
395 src_x, src_y, rop, cpp, true);
399 gen6_MI_FLUSH_DW(ilo);
400 gen6_MI_LOAD_REGISTER_IMM(ilo, BCS_SWCTRL, swctrl);
402 ilo_cp_assert_no_implicit_flush(ilo->cp, false);
405 return (slice == src_box->depth);
409 buf_copy_region(struct ilo_context *ilo,
410 struct ilo_buffer *dst, unsigned dst_offset,
411 struct ilo_buffer *src, unsigned src_offset,
414 const uint8_t rop = 0xcc; /* SRCCOPY */
416 struct intel_bo *aper_check[3];
418 ilo_blit_own_blt_ring(ilo);
420 /* make room if necessary */
421 aper_check[0] = ilo->cp->bo;
422 aper_check[1] = dst->bo;
423 aper_check[2] = src->bo;
424 if (ilo->winsys->check_aperture_space(ilo->winsys, aper_check, 3))
425 ilo_cp_flush(ilo->cp);
428 unsigned width, height;
435 if (width > gen6_max_bytes_per_scanline) {
436 /* less than INT16_MAX and dword-aligned */
440 height = size / width;
441 if (height > gen6_max_scanlines)
442 height = gen6_max_scanlines;
445 gen6_SRC_COPY_BLT(ilo,
446 dst->bo, dst_offset + offset, pitch,
448 src->bo, src_offset + offset, pitch,
449 rop, 1, true, false);
451 offset += pitch * height;
452 size -= width * height;
459 ilo_resource_copy_region(struct pipe_context *pipe,
460 struct pipe_resource *dst,
462 unsigned dstx, unsigned dsty, unsigned dstz,
463 struct pipe_resource *src,
465 const struct pipe_box *src_box)
469 if (dst->target != PIPE_BUFFER && src->target != PIPE_BUFFER) {
470 success = tex_copy_region(ilo_context(pipe),
471 ilo_texture(dst), dst_level, dstx, dsty, dstz,
472 ilo_texture(src), src_level, src_box);
474 else if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
475 const unsigned dst_offset = dstx;
476 const unsigned src_offset = src_box->x;
477 const unsigned size = src_box->width;
479 assert(dst_level == 0 && dsty == 0 && dstz == 0);
480 assert(src_level == 0 &&
483 src_box->height == 1 &&
484 src_box->depth == 1);
486 success = buf_copy_region(ilo_context(pipe),
487 ilo_buffer(dst), dst_offset, ilo_buffer(src), src_offset, size);
494 util_resource_copy_region(pipe, dst, dst_level,
495 dstx, dsty, dstz, src, src_level, src_box);
500 blitter_xy_color_blt(struct pipe_context *pipe,
501 struct pipe_resource *res,
502 int16_t x1, int16_t y1,
503 int16_t x2, int16_t y2,
506 struct ilo_context *ilo = ilo_context(pipe);
507 struct ilo_texture *tex = ilo_texture(res);
508 const int cpp = util_format_get_blocksize(tex->bo_format);
509 const uint8_t rop = 0xf0; /* PATCOPY */
510 struct intel_bo *aper_check[2];
512 /* how to support Y-tiling? */
513 if (tex->tiling == INTEL_TILING_Y)
516 /* nothing to clear */
517 if (x1 >= x2 || y1 >= y2)
520 ilo_blit_own_blt_ring(ilo);
522 /* make room if necessary */
523 aper_check[0] = ilo->cp->bo;
524 aper_check[1] = tex->bo;
525 if (ilo->winsys->check_aperture_space(ilo->winsys, aper_check, 2))
526 ilo_cp_flush(ilo->cp);
528 gen6_XY_COLOR_BLT(ilo,
529 tex->bo, 0, tex->bo_stride, tex->tiling,
530 x1, y1, x2, y2, color, rop, cpp, true);
535 enum ilo_blitter_op {
537 ILO_BLITTER_CLEAR_SURFACE,
542 ilo_blitter_begin(struct ilo_context *ilo, enum ilo_blitter_op op)
544 /* as documented in util/u_blitter.h */
545 util_blitter_save_vertex_buffer_slot(ilo->blitter, ilo->vb.states);
546 util_blitter_save_vertex_elements(ilo->blitter, (void *) ilo->ve);
547 util_blitter_save_vertex_shader(ilo->blitter, ilo->vs);
548 util_blitter_save_geometry_shader(ilo->blitter, ilo->gs);
549 util_blitter_save_so_targets(ilo->blitter, ilo->so.count, ilo->so.states);
551 util_blitter_save_fragment_shader(ilo->blitter, ilo->fs);
552 util_blitter_save_depth_stencil_alpha(ilo->blitter, (void *) ilo->dsa);
553 util_blitter_save_blend(ilo->blitter, (void *) ilo->blend);
556 util_blitter_save_viewport(ilo->blitter, &ilo->viewport.viewport0);
557 util_blitter_save_stencil_ref(ilo->blitter, &ilo->stencil_ref);
558 util_blitter_save_sample_mask(ilo->blitter, ilo->sample_mask);
561 case ILO_BLITTER_CLEAR:
562 util_blitter_save_rasterizer(ilo->blitter, (void *) ilo->rasterizer);
564 case ILO_BLITTER_CLEAR_SURFACE:
565 util_blitter_save_framebuffer(ilo->blitter, &ilo->fb.state);
567 case ILO_BLITTER_BLIT:
568 util_blitter_save_rasterizer(ilo->blitter, (void *) ilo->rasterizer);
569 util_blitter_save_framebuffer(ilo->blitter, &ilo->fb.state);
571 util_blitter_save_fragment_sampler_states(ilo->blitter,
572 ilo->sampler[PIPE_SHADER_FRAGMENT].count,
573 (void **) ilo->sampler[PIPE_SHADER_FRAGMENT].cso);
575 util_blitter_save_fragment_sampler_views(ilo->blitter,
576 ilo->view[PIPE_SHADER_FRAGMENT].count,
577 ilo->view[PIPE_SHADER_FRAGMENT].states);
579 /* disable render condition? */
587 ilo_blitter_end(struct ilo_context *ilo)
592 ilo_clear(struct pipe_context *pipe,
594 const union pipe_color_union *color,
598 struct ilo_context *ilo = ilo_context(pipe);
600 /* TODO we should pause/resume some queries */
601 ilo_blitter_begin(ilo, ILO_BLITTER_CLEAR);
603 util_blitter_clear(ilo->blitter,
604 ilo->fb.state.width, ilo->fb.state.height,
605 ilo->fb.state.nr_cbufs, buffers,
606 (ilo->fb.state.nr_cbufs) ? ilo->fb.state.cbufs[0]->format :
608 color, depth, stencil);
610 ilo_blitter_end(ilo);
614 ilo_clear_render_target(struct pipe_context *pipe,
615 struct pipe_surface *dst,
616 const union pipe_color_union *color,
617 unsigned dstx, unsigned dsty,
618 unsigned width, unsigned height)
620 struct ilo_context *ilo = ilo_context(pipe);
621 union util_color packed;
623 if (!width || !height || dstx >= dst->width || dsty >= dst->height)
626 if (dstx + width > dst->width)
627 width = dst->width - dstx;
628 if (dsty + height > dst->height)
629 height = dst->height - dsty;
631 util_pack_color(color->f, dst->format, &packed);
633 /* try HW blit first */
634 if (blitter_xy_color_blt(pipe, dst->texture,
636 dstx + width, dsty + height,
640 ilo_blitter_begin(ilo, ILO_BLITTER_CLEAR_SURFACE);
641 util_blitter_clear_render_target(ilo->blitter,
642 dst, color, dstx, dsty, width, height);
643 ilo_blitter_end(ilo);
647 ilo_clear_depth_stencil(struct pipe_context *pipe,
648 struct pipe_surface *dst,
649 unsigned clear_flags,
652 unsigned dstx, unsigned dsty,
653 unsigned width, unsigned height)
655 struct ilo_context *ilo = ilo_context(pipe);
658 * The PRM claims that HW blit supports Y-tiling since GEN6, but it does
659 * not tell us how to program it. Since depth buffers are always Y-tiled,
660 * HW blit will not work.
662 ilo_blitter_begin(ilo, ILO_BLITTER_CLEAR_SURFACE);
663 util_blitter_clear_depth_stencil(ilo->blitter,
664 dst, clear_flags, depth, stencil, dstx, dsty, width, height);
665 ilo_blitter_end(ilo);
669 ilo_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
671 struct ilo_context *ilo = ilo_context(pipe);
672 struct pipe_blit_info skip_stencil;
674 if (util_try_blit_via_copy_region(pipe, info))
677 if (!util_blitter_is_blit_supported(ilo->blitter, info)) {
678 /* try without stencil */
679 if (info->mask & PIPE_MASK_S) {
680 skip_stencil = *info;
681 skip_stencil.mask = info->mask & ~PIPE_MASK_S;
683 if (util_blitter_is_blit_supported(ilo->blitter, &skip_stencil))
684 info = &skip_stencil;
687 if (info == &skip_stencil) {
688 ilo_warn("ignore stencil buffer blitting\n");
691 ilo_warn("failed to blit with the generic blitter\n");
696 ilo_blitter_begin(ilo, ILO_BLITTER_BLIT);
697 util_blitter_blit(ilo->blitter, info);
698 ilo_blitter_end(ilo);
702 * Initialize blit-related functions.
705 ilo_init_blit_functions(struct ilo_context *ilo)
707 ilo->base.resource_copy_region = ilo_resource_copy_region;
708 ilo->base.blit = ilo_blit;
710 ilo->base.clear = ilo_clear;
711 ilo->base.clear_render_target = ilo_clear_render_target;
712 ilo->base.clear_depth_stencil = ilo_clear_depth_stencil;