2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Xiang Haihao <haihao.xiang@intel.com>
32 #include "intel_batchbuffer.h"
33 #include "intel_driver.h"
35 #include "i965_drv_video.h"
36 #include "i965_gpe_utils.h"
39 i965_gpe_select(VADriverContextP ctx,
40 struct i965_gpe_context *gpe_context,
41 struct intel_batchbuffer *batch)
43 BEGIN_BATCH(batch, 1);
44 OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
49 gen6_gpe_state_base_address(VADriverContextP ctx,
50 struct i965_gpe_context *gpe_context,
51 struct intel_batchbuffer *batch)
53 BEGIN_BATCH(batch, 10);
55 OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
56 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General State Base Address */
58 gpe_context->surface_state_binding_table.bo,
59 I915_GEM_DOMAIN_INSTRUCTION,
61 BASE_ADDRESS_MODIFY); /* Surface state base address */
62 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic State Base Address */
63 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect Object Base Address */
64 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction Base Address */
65 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General State Access Upper Bound */
66 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic State Access Upper Bound */
67 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect Object Access Upper Bound */
68 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction Access Upper Bound */
74 gen6_gpe_vfe_state(VADriverContextP ctx,
75 struct i965_gpe_context *gpe_context,
76 struct intel_batchbuffer *batch)
79 BEGIN_BATCH(batch, 8);
81 OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
82 OUT_BATCH(batch, 0); /* Scratch Space Base Pointer and Space */
84 gpe_context->vfe_state.max_num_threads << 16 | /* Maximum Number of Threads */
85 gpe_context->vfe_state.num_urb_entries << 8 | /* Number of URB Entries */
86 gpe_context->vfe_state.gpgpu_mode << 2); /* MEDIA Mode */
87 OUT_BATCH(batch, 0); /* Debug: Object ID */
89 gpe_context->vfe_state.urb_entry_size << 16 | /* URB Entry Allocation Size */
90 gpe_context->vfe_state.curbe_allocation_size); /* CURBE Allocation Size */
91 /* the vfe_desc5/6/7 will decide whether the scoreboard is used. */
92 OUT_BATCH(batch, gpe_context->vfe_desc5.dword);
93 OUT_BATCH(batch, gpe_context->vfe_desc6.dword);
94 OUT_BATCH(batch, gpe_context->vfe_desc7.dword);
101 gen6_gpe_curbe_load(VADriverContextP ctx,
102 struct i965_gpe_context *gpe_context,
103 struct intel_batchbuffer *batch)
105 BEGIN_BATCH(batch, 4);
107 OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
109 OUT_BATCH(batch, gpe_context->curbe.length);
110 OUT_RELOC(batch, gpe_context->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
112 ADVANCE_BATCH(batch);
116 gen6_gpe_idrt(VADriverContextP ctx,
117 struct i965_gpe_context *gpe_context,
118 struct intel_batchbuffer *batch)
120 BEGIN_BATCH(batch, 4);
122 OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | (4 - 2));
124 OUT_BATCH(batch, gpe_context->idrt.max_entries * gpe_context->idrt.entry_size);
125 OUT_RELOC(batch, gpe_context->idrt.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
127 ADVANCE_BATCH(batch);
131 i965_gpe_load_kernels(VADriverContextP ctx,
132 struct i965_gpe_context *gpe_context,
133 struct i965_kernel *kernel_list,
134 unsigned int num_kernels)
136 struct i965_driver_data *i965 = i965_driver_data(ctx);
139 assert(num_kernels <= MAX_GPE_KERNELS);
140 memcpy(gpe_context->kernels, kernel_list, sizeof(*kernel_list) * num_kernels);
141 gpe_context->num_kernels = num_kernels;
143 for (i = 0; i < num_kernels; i++) {
144 struct i965_kernel *kernel = &gpe_context->kernels[i];
146 kernel->bo = dri_bo_alloc(i965->intel.bufmgr,
151 dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
156 i965_gpe_context_destroy(struct i965_gpe_context *gpe_context)
160 dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
161 gpe_context->surface_state_binding_table.bo = NULL;
163 dri_bo_unreference(gpe_context->idrt.bo);
164 gpe_context->idrt.bo = NULL;
166 dri_bo_unreference(gpe_context->curbe.bo);
167 gpe_context->curbe.bo = NULL;
169 for (i = 0; i < gpe_context->num_kernels; i++) {
170 struct i965_kernel *kernel = &gpe_context->kernels[i];
172 dri_bo_unreference(kernel->bo);
178 i965_gpe_context_init(VADriverContextP ctx,
179 struct i965_gpe_context *gpe_context)
181 struct i965_driver_data *i965 = i965_driver_data(ctx);
184 dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
185 bo = dri_bo_alloc(i965->intel.bufmgr,
186 "surface state & binding table",
187 gpe_context->surface_state_binding_table.length,
190 gpe_context->surface_state_binding_table.bo = bo;
192 dri_bo_unreference(gpe_context->idrt.bo);
193 bo = dri_bo_alloc(i965->intel.bufmgr,
194 "interface descriptor table",
195 gpe_context->idrt.entry_size * gpe_context->idrt.max_entries,
198 gpe_context->idrt.bo = bo;
200 dri_bo_unreference(gpe_context->curbe.bo);
201 bo = dri_bo_alloc(i965->intel.bufmgr,
203 gpe_context->curbe.length,
206 gpe_context->curbe.bo = bo;
210 gen6_gpe_pipeline_setup(VADriverContextP ctx,
211 struct i965_gpe_context *gpe_context,
212 struct intel_batchbuffer *batch)
214 intel_batchbuffer_emit_mi_flush(batch);
216 i965_gpe_select(ctx, gpe_context, batch);
217 gen6_gpe_state_base_address(ctx, gpe_context, batch);
218 gen6_gpe_vfe_state(ctx, gpe_context, batch);
219 gen6_gpe_curbe_load(ctx, gpe_context, batch);
220 gen6_gpe_idrt(ctx, gpe_context, batch);
224 gen8_gpe_pipeline_end(VADriverContextP ctx,
225 struct i965_gpe_context *gpe_context,
226 struct intel_batchbuffer *batch)
232 i965_gpe_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
235 case I915_TILING_NONE:
236 ss->ss3.tiled_surface = 0;
237 ss->ss3.tile_walk = 0;
240 ss->ss3.tiled_surface = 1;
241 ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
244 ss->ss3.tiled_surface = 1;
245 ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
251 i965_gpe_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
254 case I915_TILING_NONE:
255 ss->ss2.tiled_surface = 0;
256 ss->ss2.tile_walk = 0;
259 ss->ss2.tiled_surface = 1;
260 ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
263 ss->ss2.tiled_surface = 1;
264 ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
270 gen7_gpe_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
273 case I915_TILING_NONE:
274 ss->ss0.tiled_surface = 0;
275 ss->ss0.tile_walk = 0;
278 ss->ss0.tiled_surface = 1;
279 ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
282 ss->ss0.tiled_surface = 1;
283 ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
289 gen7_gpe_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
292 case I915_TILING_NONE:
293 ss->ss2.tiled_surface = 0;
294 ss->ss2.tile_walk = 0;
297 ss->ss2.tiled_surface = 1;
298 ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
301 ss->ss2.tiled_surface = 1;
302 ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
308 gen8_gpe_set_surface_tiling(struct gen8_surface_state *ss, unsigned int tiling)
311 case I915_TILING_NONE:
312 ss->ss0.tiled_surface = 0;
313 ss->ss0.tile_walk = 0;
316 ss->ss0.tiled_surface = 1;
317 ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
320 ss->ss0.tiled_surface = 1;
321 ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
327 gen8_gpe_set_surface2_tiling(struct gen8_surface_state2 *ss, unsigned int tiling)
330 case I915_TILING_NONE:
331 ss->ss2.tiled_surface = 0;
332 ss->ss2.tile_walk = 0;
335 ss->ss2.tiled_surface = 1;
336 ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
339 ss->ss2.tiled_surface = 1;
340 ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
346 i965_gpe_set_surface2_state(VADriverContextP ctx,
347 struct object_surface *obj_surface,
348 struct i965_surface_state2 *ss)
351 unsigned int tiling, swizzle;
353 assert(obj_surface->bo);
354 assert(obj_surface->fourcc == VA_FOURCC_NV12);
356 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
357 w = obj_surface->orig_width;
358 h = obj_surface->orig_height;
359 w_pitch = obj_surface->width;
361 memset(ss, 0, sizeof(*ss));
363 ss->ss0.surface_base_address = obj_surface->bo->offset;
365 ss->ss1.cbcr_pixel_offset_v_direction = 2;
366 ss->ss1.width = w - 1;
367 ss->ss1.height = h - 1;
369 ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
370 ss->ss2.interleave_chroma = 1;
371 ss->ss2.pitch = w_pitch - 1;
372 ss->ss2.half_pitch_for_chroma = 0;
373 i965_gpe_set_surface2_tiling(ss, tiling);
374 /* ss3: UV offset for interleave mode */
375 ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
376 ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
380 i965_gpe_surface2_setup(VADriverContextP ctx,
381 struct i965_gpe_context *gpe_context,
382 struct object_surface *obj_surface,
383 unsigned long binding_table_offset,
384 unsigned long surface_state_offset)
386 struct i965_surface_state2 *ss;
389 bo = gpe_context->surface_state_binding_table.bo;
393 ss = (struct i965_surface_state2 *)((char *)bo->virtual + surface_state_offset);
394 i965_gpe_set_surface2_state(ctx, obj_surface, ss);
395 dri_bo_emit_reloc(bo,
396 I915_GEM_DOMAIN_RENDER, 0,
398 surface_state_offset + offsetof(struct i965_surface_state2, ss0),
401 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
406 i965_gpe_set_media_rw_surface_state(VADriverContextP ctx,
407 struct object_surface *obj_surface,
408 struct i965_surface_state *ss)
411 unsigned int tiling, swizzle;
413 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
414 w = obj_surface->orig_width;
415 h = obj_surface->orig_height;
416 w_pitch = obj_surface->width;
418 memset(ss, 0, sizeof(*ss));
420 ss->ss0.surface_type = I965_SURFACE_2D;
421 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
423 ss->ss1.base_addr = obj_surface->bo->offset;
425 ss->ss2.width = w / 4 - 1; /* in DWORDs for media read & write message */
426 ss->ss2.height = h - 1;
428 ss->ss3.pitch = w_pitch - 1;
429 i965_gpe_set_surface_tiling(ss, tiling);
433 i965_gpe_media_rw_surface_setup(VADriverContextP ctx,
434 struct i965_gpe_context *gpe_context,
435 struct object_surface *obj_surface,
436 unsigned long binding_table_offset,
437 unsigned long surface_state_offset,
440 struct i965_surface_state *ss;
443 bo = gpe_context->surface_state_binding_table.bo;
444 dri_bo_map(bo, True);
447 ss = (struct i965_surface_state *)((char *)bo->virtual + surface_state_offset);
448 i965_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
449 dri_bo_emit_reloc(bo,
450 I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
452 surface_state_offset + offsetof(struct i965_surface_state, ss1),
455 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
460 i965_gpe_set_buffer_surface_state(VADriverContextP ctx,
461 struct i965_buffer_surface *buffer_surface,
462 struct i965_surface_state *ss)
466 assert(buffer_surface->bo);
467 num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
469 memset(ss, 0, sizeof(*ss));
471 ss->ss0.render_cache_read_mode = 1;
472 ss->ss0.surface_type = I965_SURFACE_BUFFER;
474 ss->ss1.base_addr = buffer_surface->bo->offset;
476 ss->ss2.width = ((num_entries - 1) & 0x7f);
477 ss->ss2.height = (((num_entries - 1) >> 7) & 0x1fff);
479 ss->ss3.depth = (((num_entries - 1) >> 20) & 0x7f);
480 ss->ss3.pitch = buffer_surface->pitch - 1;
484 i965_gpe_buffer_suface_setup(VADriverContextP ctx,
485 struct i965_gpe_context *gpe_context,
486 struct i965_buffer_surface *buffer_surface,
487 unsigned long binding_table_offset,
488 unsigned long surface_state_offset)
490 struct i965_surface_state *ss;
493 bo = gpe_context->surface_state_binding_table.bo;
497 ss = (struct i965_surface_state *)((char *)bo->virtual + surface_state_offset);
498 i965_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
499 dri_bo_emit_reloc(bo,
500 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
502 surface_state_offset + offsetof(struct i965_surface_state, ss1),
505 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
510 gen7_gpe_set_surface2_state(VADriverContextP ctx,
511 struct object_surface *obj_surface,
512 struct gen7_surface_state2 *ss)
515 unsigned int tiling, swizzle;
517 assert(obj_surface->bo);
518 assert(obj_surface->fourcc == VA_FOURCC_NV12);
520 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
521 w = obj_surface->orig_width;
522 h = obj_surface->orig_height;
523 w_pitch = obj_surface->width;
525 memset(ss, 0, sizeof(*ss));
527 ss->ss0.surface_base_address = obj_surface->bo->offset;
529 ss->ss1.cbcr_pixel_offset_v_direction = 2;
530 ss->ss1.width = w - 1;
531 ss->ss1.height = h - 1;
533 ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
534 ss->ss2.interleave_chroma = 1;
535 ss->ss2.pitch = w_pitch - 1;
536 ss->ss2.half_pitch_for_chroma = 0;
537 gen7_gpe_set_surface2_tiling(ss, tiling);
538 /* ss3: UV offset for interleave mode */
539 ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
540 ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
544 gen7_gpe_surface2_setup(VADriverContextP ctx,
545 struct i965_gpe_context *gpe_context,
546 struct object_surface *obj_surface,
547 unsigned long binding_table_offset,
548 unsigned long surface_state_offset)
550 struct gen7_surface_state2 *ss;
553 bo = gpe_context->surface_state_binding_table.bo;
557 ss = (struct gen7_surface_state2 *)((char *)bo->virtual + surface_state_offset);
558 gen7_gpe_set_surface2_state(ctx, obj_surface, ss);
559 dri_bo_emit_reloc(bo,
560 I915_GEM_DOMAIN_RENDER, 0,
562 surface_state_offset + offsetof(struct gen7_surface_state2, ss0),
565 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
570 gen7_gpe_set_media_rw_surface_state(VADriverContextP ctx,
571 struct object_surface *obj_surface,
572 struct gen7_surface_state *ss)
575 unsigned int tiling, swizzle;
577 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
578 w = obj_surface->orig_width;
579 h = obj_surface->orig_height;
580 w_pitch = obj_surface->width;
582 memset(ss, 0, sizeof(*ss));
584 ss->ss0.surface_type = I965_SURFACE_2D;
585 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
587 ss->ss1.base_addr = obj_surface->bo->offset;
589 ss->ss2.width = w / 4 - 1; /* in DWORDs for media read & write message */
590 ss->ss2.height = h - 1;
592 ss->ss3.pitch = w_pitch - 1;
593 gen7_gpe_set_surface_tiling(ss, tiling);
597 gen75_gpe_set_media_chroma_surface_state(VADriverContextP ctx,
598 struct object_surface *obj_surface,
599 struct gen7_surface_state *ss)
602 unsigned int tiling, swizzle;
605 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
606 w = obj_surface->orig_width;
607 w_pitch = obj_surface->width;
609 cbcr_offset = obj_surface->height * obj_surface->width;
610 memset(ss, 0, sizeof(*ss));
612 ss->ss0.surface_type = I965_SURFACE_2D;
613 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
615 ss->ss1.base_addr = obj_surface->bo->offset + cbcr_offset;
617 ss->ss2.width = w / 4 - 1; /* in DWORDs for media read & write message */
618 ss->ss2.height = (obj_surface->height / 2) -1;
620 ss->ss3.pitch = w_pitch - 1;
621 gen7_gpe_set_surface_tiling(ss, tiling);
625 gen7_gpe_media_rw_surface_setup(VADriverContextP ctx,
626 struct i965_gpe_context *gpe_context,
627 struct object_surface *obj_surface,
628 unsigned long binding_table_offset,
629 unsigned long surface_state_offset,
632 struct gen7_surface_state *ss;
635 bo = gpe_context->surface_state_binding_table.bo;
636 dri_bo_map(bo, True);
639 ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
640 gen7_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
641 dri_bo_emit_reloc(bo,
642 I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
644 surface_state_offset + offsetof(struct gen7_surface_state, ss1),
647 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
652 gen75_gpe_media_chroma_surface_setup(VADriverContextP ctx,
653 struct i965_gpe_context *gpe_context,
654 struct object_surface *obj_surface,
655 unsigned long binding_table_offset,
656 unsigned long surface_state_offset,
659 struct gen7_surface_state *ss;
663 assert(obj_surface->fourcc == VA_FOURCC_NV12);
664 bo = gpe_context->surface_state_binding_table.bo;
665 dri_bo_map(bo, True);
668 cbcr_offset = obj_surface->height * obj_surface->width;
669 ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
670 gen75_gpe_set_media_chroma_surface_state(ctx, obj_surface, ss);
671 dri_bo_emit_reloc(bo,
672 I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
674 surface_state_offset + offsetof(struct gen7_surface_state, ss1),
677 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
683 gen7_gpe_set_buffer_surface_state(VADriverContextP ctx,
684 struct i965_buffer_surface *buffer_surface,
685 struct gen7_surface_state *ss)
689 assert(buffer_surface->bo);
690 num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
692 memset(ss, 0, sizeof(*ss));
694 ss->ss0.surface_type = I965_SURFACE_BUFFER;
696 ss->ss1.base_addr = buffer_surface->bo->offset;
698 ss->ss2.width = ((num_entries - 1) & 0x7f);
699 ss->ss2.height = (((num_entries - 1) >> 7) & 0x3fff);
701 ss->ss3.depth = (((num_entries - 1) >> 21) & 0x3f);
702 ss->ss3.pitch = buffer_surface->pitch - 1;
706 gen7_gpe_buffer_suface_setup(VADriverContextP ctx,
707 struct i965_gpe_context *gpe_context,
708 struct i965_buffer_surface *buffer_surface,
709 unsigned long binding_table_offset,
710 unsigned long surface_state_offset)
712 struct gen7_surface_state *ss;
715 bo = gpe_context->surface_state_binding_table.bo;
719 ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
720 gen7_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
721 dri_bo_emit_reloc(bo,
722 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
724 surface_state_offset + offsetof(struct gen7_surface_state, ss1),
727 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
732 gen8_gpe_set_surface2_state(VADriverContextP ctx,
733 struct object_surface *obj_surface,
734 struct gen8_surface_state2 *ss)
736 struct i965_driver_data *i965 = i965_driver_data(ctx);
738 unsigned int tiling, swizzle;
740 assert(obj_surface->bo);
741 assert(obj_surface->fourcc == VA_FOURCC_NV12);
743 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
744 w = obj_surface->orig_width;
745 h = obj_surface->orig_height;
746 w_pitch = obj_surface->width;
748 memset(ss, 0, sizeof(*ss));
750 if (IS_GEN9(i965->intel.device_info))
751 ss->ss5.surface_object_mocs = GEN9_CACHE_PTE;
753 ss->ss6.base_addr = (uint32_t)obj_surface->bo->offset64;
754 ss->ss7.base_addr_high = (uint32_t)(obj_surface->bo->offset64 >> 32);
756 ss->ss1.cbcr_pixel_offset_v_direction = 2;
757 ss->ss1.width = w - 1;
758 ss->ss1.height = h - 1;
760 ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
761 ss->ss2.interleave_chroma = 1;
762 ss->ss2.pitch = w_pitch - 1;
763 ss->ss2.half_pitch_for_chroma = 0;
764 gen8_gpe_set_surface2_tiling(ss, tiling);
765 /* ss3: UV offset for interleave mode */
766 ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
767 ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
771 gen8_gpe_surface2_setup(VADriverContextP ctx,
772 struct i965_gpe_context *gpe_context,
773 struct object_surface *obj_surface,
774 unsigned long binding_table_offset,
775 unsigned long surface_state_offset)
777 struct gen8_surface_state2 *ss;
780 bo = gpe_context->surface_state_binding_table.bo;
784 ss = (struct gen8_surface_state2 *)((char *)bo->virtual + surface_state_offset);
785 gen8_gpe_set_surface2_state(ctx, obj_surface, ss);
786 dri_bo_emit_reloc(bo,
787 I915_GEM_DOMAIN_RENDER, 0,
789 surface_state_offset + offsetof(struct gen8_surface_state2, ss6),
792 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
797 gen8_gpe_set_media_rw_surface_state(VADriverContextP ctx,
798 struct object_surface *obj_surface,
799 struct gen8_surface_state *ss)
801 struct i965_driver_data *i965 = i965_driver_data(ctx);
803 unsigned int tiling, swizzle;
805 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
806 w = obj_surface->orig_width;
807 h = obj_surface->orig_height;
808 w_pitch = obj_surface->width;
810 memset(ss, 0, sizeof(*ss));
812 if (IS_GEN9(i965->intel.device_info))
813 ss->ss1.surface_mocs = GEN9_CACHE_PTE;
815 ss->ss0.surface_type = I965_SURFACE_2D;
816 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
818 ss->ss8.base_addr = (uint32_t)obj_surface->bo->offset64;
819 ss->ss9.base_addr_high = (uint32_t)(obj_surface->bo->offset64 >> 32);
821 ss->ss2.width = w / 4 - 1; /* in DWORDs for media read & write message */
822 ss->ss2.height = h - 1;
824 ss->ss3.pitch = w_pitch - 1;
825 gen8_gpe_set_surface_tiling(ss, tiling);
829 gen8_gpe_set_media_chroma_surface_state(VADriverContextP ctx,
830 struct object_surface *obj_surface,
831 struct gen8_surface_state *ss)
833 struct i965_driver_data *i965 = i965_driver_data(ctx);
835 unsigned int tiling, swizzle;
837 uint64_t base_offset;
839 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
840 w = obj_surface->orig_width;
841 w_pitch = obj_surface->width;
843 cbcr_offset = obj_surface->height * obj_surface->width;
844 memset(ss, 0, sizeof(*ss));
846 if (IS_GEN9(i965->intel.device_info))
847 ss->ss1.surface_mocs = GEN9_CACHE_PTE;
849 ss->ss0.surface_type = I965_SURFACE_2D;
850 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
852 base_offset = obj_surface->bo->offset64 + cbcr_offset;
853 ss->ss8.base_addr = (uint32_t) base_offset;
854 ss->ss9.base_addr_high = (uint32_t) (base_offset >> 32);
856 ss->ss2.width = w / 4 - 1; /* in DWORDs for media read & write message */
857 ss->ss2.height = (obj_surface->height / 2) -1;
859 ss->ss3.pitch = w_pitch - 1;
860 gen8_gpe_set_surface_tiling(ss, tiling);
864 gen8_gpe_media_rw_surface_setup(VADriverContextP ctx,
865 struct i965_gpe_context *gpe_context,
866 struct object_surface *obj_surface,
867 unsigned long binding_table_offset,
868 unsigned long surface_state_offset,
871 struct gen8_surface_state *ss;
874 bo = gpe_context->surface_state_binding_table.bo;
875 dri_bo_map(bo, True);
878 ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
879 gen8_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
880 dri_bo_emit_reloc(bo,
881 I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
883 surface_state_offset + offsetof(struct gen8_surface_state, ss8),
886 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
891 gen8_gpe_media_chroma_surface_setup(VADriverContextP ctx,
892 struct i965_gpe_context *gpe_context,
893 struct object_surface *obj_surface,
894 unsigned long binding_table_offset,
895 unsigned long surface_state_offset,
898 struct gen8_surface_state *ss;
902 assert(obj_surface->fourcc == VA_FOURCC_NV12);
903 bo = gpe_context->surface_state_binding_table.bo;
904 dri_bo_map(bo, True);
907 cbcr_offset = obj_surface->height * obj_surface->width;
908 ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
909 gen8_gpe_set_media_chroma_surface_state(ctx, obj_surface, ss);
910 dri_bo_emit_reloc(bo,
911 I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
913 surface_state_offset + offsetof(struct gen8_surface_state, ss8),
916 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
922 gen8_gpe_set_buffer_surface_state(VADriverContextP ctx,
923 struct i965_buffer_surface *buffer_surface,
924 struct gen8_surface_state *ss)
926 struct i965_driver_data *i965 = i965_driver_data(ctx);
929 assert(buffer_surface->bo);
930 num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
932 memset(ss, 0, sizeof(*ss));
934 ss->ss0.surface_type = I965_SURFACE_BUFFER;
935 if (IS_GEN9(i965->intel.device_info))
936 ss->ss1.surface_mocs = GEN9_CACHE_PTE;
939 ss->ss8.base_addr = (uint32_t)buffer_surface->bo->offset64;
940 ss->ss9.base_addr_high = (uint32_t)(buffer_surface->bo->offset64 >> 32);
942 ss->ss2.width = ((num_entries - 1) & 0x7f);
943 ss->ss2.height = (((num_entries - 1) >> 7) & 0x3fff);
945 ss->ss3.depth = (((num_entries - 1) >> 21) & 0x3f);
946 ss->ss3.pitch = buffer_surface->pitch - 1;
950 gen8_gpe_buffer_suface_setup(VADriverContextP ctx,
951 struct i965_gpe_context *gpe_context,
952 struct i965_buffer_surface *buffer_surface,
953 unsigned long binding_table_offset,
954 unsigned long surface_state_offset)
956 struct gen8_surface_state *ss;
959 bo = gpe_context->surface_state_binding_table.bo;
963 ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
964 gen8_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
965 dri_bo_emit_reloc(bo,
966 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
968 surface_state_offset + offsetof(struct gen8_surface_state, ss8),
971 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
976 gen8_gpe_state_base_address(VADriverContextP ctx,
977 struct i965_gpe_context *gpe_context,
978 struct intel_batchbuffer *batch)
980 BEGIN_BATCH(batch, 16);
982 OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 14);
984 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //General State Base Address
988 /*DW4 Surface state base address */
989 OUT_RELOC64(batch, gpe_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
991 /*DW6. Dynamic state base address */
992 if (gpe_context->dynamic_state.bo)
993 OUT_RELOC64(batch, gpe_context->dynamic_state.bo,
994 I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
995 0, BASE_ADDRESS_MODIFY);
997 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1002 /*DW8. Indirect Object base address */
1003 if (gpe_context->indirect_state.bo)
1004 OUT_RELOC64(batch, gpe_context->indirect_state.bo,
1005 I915_GEM_DOMAIN_SAMPLER,
1006 0, BASE_ADDRESS_MODIFY);
1008 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1009 OUT_BATCH(batch, 0);
1013 /*DW10. Instruct base address */
1014 if (gpe_context->instruction_state.bo)
1015 OUT_RELOC64(batch, gpe_context->instruction_state.bo,
1016 I915_GEM_DOMAIN_INSTRUCTION,
1017 0, BASE_ADDRESS_MODIFY);
1019 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1020 OUT_BATCH(batch, 0);
1023 /* DW12. Size limitation */
1024 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //General State Access Upper Bound
1025 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Dynamic State Access Upper Bound
1026 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Indirect Object Access Upper Bound
1027 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Instruction Access Upper Bound
1030 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //LLC Coherent Base Address
1031 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY ); //LLC Coherent Upper Bound
1034 ADVANCE_BATCH(batch);
1038 gen8_gpe_vfe_state(VADriverContextP ctx,
1039 struct i965_gpe_context *gpe_context,
1040 struct intel_batchbuffer *batch)
1043 BEGIN_BATCH(batch, 9);
1045 OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (9 - 2));
1046 /* Scratch Space Base Pointer and Space */
1047 OUT_BATCH(batch, 0);
1048 OUT_BATCH(batch, 0);
1051 gpe_context->vfe_state.max_num_threads << 16 | /* Maximum Number of Threads */
1052 gpe_context->vfe_state.num_urb_entries << 8 | /* Number of URB Entries */
1053 gpe_context->vfe_state.gpgpu_mode << 2); /* MEDIA Mode */
1054 OUT_BATCH(batch, 0); /* Debug: Object ID */
1056 gpe_context->vfe_state.urb_entry_size << 16 | /* URB Entry Allocation Size */
1057 gpe_context->vfe_state.curbe_allocation_size); /* CURBE Allocation Size */
1059 /* the vfe_desc5/6/7 will decide whether the scoreboard is used. */
1060 OUT_BATCH(batch, gpe_context->vfe_desc5.dword);
1061 OUT_BATCH(batch, gpe_context->vfe_desc6.dword);
1062 OUT_BATCH(batch, gpe_context->vfe_desc7.dword);
1064 ADVANCE_BATCH(batch);
1070 gen8_gpe_curbe_load(VADriverContextP ctx,
1071 struct i965_gpe_context *gpe_context,
1072 struct intel_batchbuffer *batch)
1074 BEGIN_BATCH(batch, 4);
1076 OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
1077 OUT_BATCH(batch, 0);
1078 OUT_BATCH(batch, ALIGN(gpe_context->curbe.length, 64));
1079 OUT_BATCH(batch, gpe_context->curbe.offset);
1081 ADVANCE_BATCH(batch);
1085 gen8_gpe_idrt(VADriverContextP ctx,
1086 struct i965_gpe_context *gpe_context,
1087 struct intel_batchbuffer *batch)
1089 BEGIN_BATCH(batch, 6);
1091 OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH);
1092 OUT_BATCH(batch, 0);
1094 OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | (4 - 2));
1095 OUT_BATCH(batch, 0);
1096 OUT_BATCH(batch, gpe_context->idrt.max_entries * gpe_context->idrt.entry_size);
1097 OUT_BATCH(batch, gpe_context->idrt.offset);
1099 ADVANCE_BATCH(batch);
1104 gen8_gpe_pipeline_setup(VADriverContextP ctx,
1105 struct i965_gpe_context *gpe_context,
1106 struct intel_batchbuffer *batch)
1108 intel_batchbuffer_emit_mi_flush(batch);
1110 i965_gpe_select(ctx, gpe_context, batch);
1111 gen8_gpe_state_base_address(ctx, gpe_context, batch);
1112 gen8_gpe_vfe_state(ctx, gpe_context, batch);
1113 gen8_gpe_curbe_load(ctx, gpe_context, batch);
1114 gen8_gpe_idrt(ctx, gpe_context, batch);
1118 gen8_gpe_context_init(VADriverContextP ctx,
1119 struct i965_gpe_context *gpe_context)
1121 struct i965_driver_data *i965 = i965_driver_data(ctx);
1124 unsigned int start_offset, end_offset;
1126 dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
1127 bo = dri_bo_alloc(i965->intel.bufmgr,
1128 "surface state & binding table",
1129 gpe_context->surface_state_binding_table.length,
1132 gpe_context->surface_state_binding_table.bo = bo;
1134 bo_size = gpe_context->idrt.max_entries * ALIGN(gpe_context->idrt.entry_size, 64) +
1135 ALIGN(gpe_context->curbe.length, 64) +
1136 gpe_context->sampler.max_entries * ALIGN(gpe_context->sampler.entry_size, 64);
1137 dri_bo_unreference(gpe_context->dynamic_state.bo);
1138 bo = dri_bo_alloc(i965->intel.bufmgr,
1139 "surface state & binding table",
1143 gpe_context->dynamic_state.bo = bo;
1144 gpe_context->dynamic_state.bo_size = bo_size;
1147 gpe_context->dynamic_state.end_offset = 0;
1149 /* Constant buffer offset */
1150 start_offset = ALIGN(end_offset, 64);
1151 dri_bo_unreference(gpe_context->curbe.bo);
1152 gpe_context->curbe.bo = bo;
1153 dri_bo_reference(gpe_context->curbe.bo);
1154 gpe_context->curbe.offset = start_offset;
1155 end_offset = start_offset + gpe_context->curbe.length;
1157 /* Interface descriptor offset */
1158 start_offset = ALIGN(end_offset, 64);
1159 dri_bo_unreference(gpe_context->idrt.bo);
1160 gpe_context->idrt.bo = bo;
1161 dri_bo_reference(gpe_context->idrt.bo);
1162 gpe_context->idrt.offset = start_offset;
1163 end_offset = start_offset + ALIGN(gpe_context->idrt.entry_size, 64) * gpe_context->idrt.max_entries;
1165 /* Sampler state offset */
1166 start_offset = ALIGN(end_offset, 64);
1167 dri_bo_unreference(gpe_context->sampler.bo);
1168 gpe_context->sampler.bo = bo;
1169 dri_bo_reference(gpe_context->sampler.bo);
1170 gpe_context->sampler.offset = start_offset;
1171 end_offset = start_offset + ALIGN(gpe_context->sampler.entry_size, 64) * gpe_context->sampler.max_entries;
1173 /* update the end offset of dynamic_state */
1174 gpe_context->dynamic_state.end_offset = end_offset;
1179 gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context)
1181 dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
1182 gpe_context->surface_state_binding_table.bo = NULL;
1184 dri_bo_unreference(gpe_context->instruction_state.bo);
1185 gpe_context->instruction_state.bo = NULL;
1187 dri_bo_unreference(gpe_context->dynamic_state.bo);
1188 gpe_context->dynamic_state.bo = NULL;
1190 dri_bo_unreference(gpe_context->indirect_state.bo);
1191 gpe_context->indirect_state.bo = NULL;
1193 dri_bo_unreference(gpe_context->curbe.bo);
1194 gpe_context->curbe.bo = NULL;
1196 dri_bo_unreference(gpe_context->idrt.bo);
1197 gpe_context->idrt.bo = NULL;
1199 dri_bo_unreference(gpe_context->sampler.bo);
1200 gpe_context->sampler.bo = NULL;
1205 gen8_gpe_load_kernels(VADriverContextP ctx,
1206 struct i965_gpe_context *gpe_context,
1207 struct i965_kernel *kernel_list,
1208 unsigned int num_kernels)
1210 struct i965_driver_data *i965 = i965_driver_data(ctx);
1211 int i, kernel_size = 0;
1212 unsigned int kernel_offset, end_offset;
1213 unsigned char *kernel_ptr;
1214 struct i965_kernel *kernel;
1216 assert(num_kernels <= MAX_GPE_KERNELS);
1217 memcpy(gpe_context->kernels, kernel_list, sizeof(*kernel_list) * num_kernels);
1218 gpe_context->num_kernels = num_kernels;
1220 for (i = 0; i < num_kernels; i++) {
1221 kernel = &gpe_context->kernels[i];
1223 kernel_size += ALIGN(kernel->size, 64);
1226 gpe_context->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
1230 if (gpe_context->instruction_state.bo == NULL) {
1231 WARN_ONCE("failure to allocate the buffer space for kernel shader\n");
1235 assert(gpe_context->instruction_state.bo);
1237 gpe_context->instruction_state.bo_size = kernel_size;
1238 gpe_context->instruction_state.end_offset = 0;
1241 dri_bo_map(gpe_context->instruction_state.bo, 1);
1242 kernel_ptr = (unsigned char *)(gpe_context->instruction_state.bo->virtual);
1243 for (i = 0; i < num_kernels; i++) {
1244 kernel_offset = ALIGN(end_offset, 64);
1245 kernel = &gpe_context->kernels[i];
1246 kernel->kernel_offset = kernel_offset;
1249 memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size);
1251 end_offset = kernel_offset + kernel->size;
1255 gpe_context->instruction_state.end_offset = end_offset;
1257 dri_bo_unmap(gpe_context->instruction_state.bo);
1263 gen9_gpe_state_base_address(VADriverContextP ctx,
1264 struct i965_gpe_context *gpe_context,
1265 struct intel_batchbuffer *batch)
1267 struct i965_driver_data *i965 = i965_driver_data(ctx);
1268 BEGIN_BATCH(batch, 19);
1270 OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (19 - 2));
1272 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //General State Base Address
1273 OUT_BATCH(batch, 0);
1274 OUT_BATCH(batch, 0);
1276 /*DW4 Surface state base address */
1277 OUT_RELOC64(batch, gpe_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY | (i965->intel.mocs_state << 4)); /* Surface state base address */
1279 /*DW6. Dynamic state base address */
1280 if (gpe_context->dynamic_state.bo)
1281 OUT_RELOC64(batch, gpe_context->dynamic_state.bo,
1282 I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
1283 I915_GEM_DOMAIN_RENDER,
1284 BASE_ADDRESS_MODIFY | (i965->intel.mocs_state << 4));
1286 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1287 OUT_BATCH(batch, 0);
1291 /*DW8. Indirect Object base address */
1292 if (gpe_context->indirect_state.bo)
1293 OUT_RELOC64(batch, gpe_context->indirect_state.bo,
1294 I915_GEM_DOMAIN_SAMPLER,
1295 0, BASE_ADDRESS_MODIFY | (i965->intel.mocs_state << 4));
1297 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1298 OUT_BATCH(batch, 0);
1302 /*DW10. Instruct base address */
1303 if (gpe_context->instruction_state.bo)
1304 OUT_RELOC64(batch, gpe_context->instruction_state.bo,
1305 I915_GEM_DOMAIN_INSTRUCTION,
1306 0, BASE_ADDRESS_MODIFY | (i965->intel.mocs_state << 4));
1308 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1309 OUT_BATCH(batch, 0);
1313 /* DW12. Size limitation */
1314 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //General State Access Upper Bound
1315 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Dynamic State Access Upper Bound
1316 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Indirect Object Access Upper Bound
1317 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Instruction Access Upper Bound
1319 /* the bindless surface state address */
1320 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1321 OUT_BATCH(batch, 0);
1322 OUT_BATCH(batch, 0xFFFFF000);
1324 ADVANCE_BATCH(batch);
1328 gen9_gpe_select(VADriverContextP ctx,
1329 struct i965_gpe_context *gpe_context,
1330 struct intel_batchbuffer *batch)
1332 BEGIN_BATCH(batch, 1);
1333 OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA |
1334 GEN9_PIPELINE_SELECTION_MASK |
1335 GEN9_MEDIA_DOP_GATE_OFF |
1336 GEN9_MEDIA_DOP_GATE_MASK |
1337 GEN9_FORCE_MEDIA_AWAKE_ON |
1338 GEN9_FORCE_MEDIA_AWAKE_MASK);
1339 ADVANCE_BATCH(batch);
1343 gen9_gpe_pipeline_setup(VADriverContextP ctx,
1344 struct i965_gpe_context *gpe_context,
1345 struct intel_batchbuffer *batch)
1347 intel_batchbuffer_emit_mi_flush(batch);
1349 gen9_gpe_select(ctx, gpe_context, batch);
1350 gen9_gpe_state_base_address(ctx, gpe_context, batch);
1351 gen8_gpe_vfe_state(ctx, gpe_context, batch);
1352 gen8_gpe_curbe_load(ctx, gpe_context, batch);
1353 gen8_gpe_idrt(ctx, gpe_context, batch);
1357 gen9_gpe_pipeline_end(VADriverContextP ctx,
1358 struct i965_gpe_context *gpe_context,
1359 struct intel_batchbuffer *batch)
1361 BEGIN_BATCH(batch, 1);
1362 OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA |
1363 GEN9_PIPELINE_SELECTION_MASK |
1364 GEN9_MEDIA_DOP_GATE_ON |
1365 GEN9_MEDIA_DOP_GATE_MASK |
1366 GEN9_FORCE_MEDIA_AWAKE_OFF |
1367 GEN9_FORCE_MEDIA_AWAKE_MASK);
1368 ADVANCE_BATCH(batch);
1372 i965_allocate_gpe_resource(dri_bufmgr *bufmgr,
1373 struct i965_gpe_resource *res,
1381 res->bo = dri_bo_alloc(bufmgr, name, res->size, 4096);
1384 return (res->bo != NULL);
1388 i965_object_surface_to_2d_gpe_resource(struct i965_gpe_resource *res,
1389 struct object_surface *obj_surface)
1391 unsigned int swizzle;
1393 res->type = I965_GPE_RESOURCE_2D;
1394 res->width = obj_surface->orig_width;
1395 res->height = obj_surface->orig_height;
1396 res->pitch = obj_surface->width;
1397 res->size = obj_surface->size;
1398 res->cb_cr_pitch = obj_surface->cb_cr_pitch;
1399 res->x_cb_offset = obj_surface->x_cb_offset;
1400 res->y_cb_offset = obj_surface->y_cb_offset;
1401 res->bo = obj_surface->bo;
1404 dri_bo_reference(res->bo);
1405 dri_bo_get_tiling(obj_surface->bo, &res->tiling, &swizzle);
1409 i965_dri_object_to_buffer_gpe_resource(struct i965_gpe_resource *res,
1412 unsigned int swizzle;
1414 res->type = I965_GPE_RESOURCE_BUFFER;
1415 res->width = bo->size;
1417 res->pitch = res->width;
1418 res->size = res->pitch * res->width;
1422 dri_bo_reference(res->bo);
1423 dri_bo_get_tiling(res->bo, &res->tiling, &swizzle);
1427 i965_dri_object_to_2d_gpe_resource(struct i965_gpe_resource *res,
1430 unsigned int height,
1433 unsigned int swizzle;
1435 res->type = I965_GPE_RESOURCE_2D;
1437 res->height = height;
1439 res->size = res->pitch * res->width;
1443 dri_bo_reference(res->bo);
1444 dri_bo_get_tiling(res->bo, &res->tiling, &swizzle);
1448 i965_zero_gpe_resource(struct i965_gpe_resource *res)
1451 dri_bo_map(res->bo, 1);
1452 memset(res->bo->virtual, 0, res->size);
1453 dri_bo_unmap(res->bo);
1458 i965_free_gpe_resource(struct i965_gpe_resource *res)
1460 dri_bo_unreference(res->bo);
1466 i965_map_gpe_resource(struct i965_gpe_resource *res)
1471 ret = dri_bo_map(res->bo, 1);
1474 res->map = res->bo->virtual;
1484 i965_unmap_gpe_resource(struct i965_gpe_resource *res)
1486 if (res->bo && res->map)
1487 dri_bo_unmap(res->bo);
1493 gen8_gpe_mi_flush_dw(VADriverContextP ctx,
1494 struct intel_batchbuffer *batch,
1495 struct gpe_mi_flush_dw_parameter *params)
1497 int video_pipeline_cache_invalidate = 0;
1498 int post_sync_operation = MI_FLUSH_DW_NOWRITE;
1500 if (params->video_pipeline_cache_invalidate)
1501 video_pipeline_cache_invalidate = MI_FLUSH_DW_VIDEO_PIPELINE_CACHE_INVALIDATE;
1504 post_sync_operation = MI_FLUSH_DW_WRITE_QWORD;
1506 __OUT_BATCH(batch, (MI_FLUSH_DW2 |
1507 video_pipeline_cache_invalidate |
1508 post_sync_operation |
1509 (5 - 2))); /* Always use PPGTT */
1512 __OUT_RELOC64(batch,
1514 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1517 __OUT_BATCH(batch, 0);
1518 __OUT_BATCH(batch, 0);
1521 __OUT_BATCH(batch, params->dw0);
1522 __OUT_BATCH(batch, params->dw1);
1526 gen8_gpe_mi_store_data_imm(VADriverContextP ctx,
1527 struct intel_batchbuffer *batch,
1528 struct gpe_mi_store_data_imm_parameter *params)
1530 if (params->is_qword) {
1531 __OUT_BATCH(batch, MI_STORE_DATA_IMM |
1533 (5 - 2)); /* Always use PPGTT */
1535 __OUT_BATCH(batch, MI_STORE_DATA_IMM | (4 - 2)); /* Always use PPGTT */
1538 __OUT_RELOC64(batch,
1540 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1542 __OUT_BATCH(batch, params->dw0);
1544 if (params->is_qword)
1545 __OUT_BATCH(batch, params->dw1);
1549 gen8_gpe_mi_store_register_mem(VADriverContextP ctx,
1550 struct intel_batchbuffer *batch,
1551 struct gpe_mi_store_register_mem_parameter *params)
1553 __OUT_BATCH(batch, (MI_STORE_REGISTER_MEM | (4 - 2))); /* Always use PPGTT */
1554 __OUT_BATCH(batch, params->mmio_offset);
1555 __OUT_RELOC64(batch,
1557 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1562 gen8_gpe_mi_load_register_mem(VADriverContextP ctx,
1563 struct intel_batchbuffer *batch,
1564 struct gpe_mi_load_register_mem_parameter *params)
1566 __OUT_BATCH(batch, (MI_LOAD_REGISTER_MEM | (4 - 2))); /* Always use PPGTT */
1567 __OUT_BATCH(batch, params->mmio_offset);
1568 __OUT_RELOC64(batch,
1570 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1575 gen8_gpe_mi_load_register_imm(VADriverContextP ctx,
1576 struct intel_batchbuffer *batch,
1577 struct gpe_mi_load_register_imm_parameter *params)
1579 __OUT_BATCH(batch, (MI_LOAD_REGISTER_IMM | (3 - 2)));
1580 __OUT_BATCH(batch, params->mmio_offset);
1581 __OUT_BATCH(batch, params->data);
1585 gen8_gpe_mi_load_register_reg(VADriverContextP ctx,
1586 struct intel_batchbuffer *batch,
1587 struct gpe_mi_load_register_reg_parameter *params)
1589 __OUT_BATCH(batch, (MI_LOAD_REGISTER_REG | (3 - 2)));
1590 __OUT_BATCH(batch, params->src_mmio_offset);
1591 __OUT_BATCH(batch, params->dst_mmio_offset);
1595 gen9_gpe_mi_math(VADriverContextP ctx,
1596 struct intel_batchbuffer *batch,
1597 struct gpe_mi_math_parameter *params)
1599 __OUT_BATCH(batch, (MI_MATH | (params->num_instructions - 1)));
1600 intel_batchbuffer_data(batch, params->instruction_list, params->num_instructions * 4);
1604 gen9_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,
1605 struct intel_batchbuffer *batch,
1606 struct gpe_mi_conditional_batch_buffer_end_parameter *params)
1608 int compare_mask_mode_enabled = MI_COMPARE_MASK_MODE_ENANBLED;
1610 if (params->compare_mask_mode_disabled)
1611 compare_mask_mode_enabled = 0;
1613 __OUT_BATCH(batch, (MI_CONDITIONAL_BATCH_BUFFER_END |
1615 compare_mask_mode_enabled |
1616 (4 - 2))); /* Always use PPGTT */
1617 __OUT_BATCH(batch, params->compare_data);
1618 __OUT_RELOC64(batch,
1620 I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
1625 gen8_gpe_mi_batch_buffer_start(VADriverContextP ctx,
1626 struct intel_batchbuffer *batch,
1627 struct gpe_mi_batch_buffer_start_parameter *params)
1629 __OUT_BATCH(batch, (MI_BATCH_BUFFER_START |
1630 (!!params->is_second_level << 22) |
1631 (!params->use_global_gtt << 8) |
1633 __OUT_RELOC64(batch,
1635 I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
1640 gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
1641 struct i965_gpe_context *gpe_context,
1642 struct gpe_dynamic_state_parameter *ds)
1644 if (!ds->bo || !gpe_context)
1647 dri_bo_unreference(gpe_context->dynamic_state.bo);
1648 gpe_context->dynamic_state.bo = ds->bo;
1649 dri_bo_reference(gpe_context->dynamic_state.bo);
1650 gpe_context->dynamic_state.bo_size = ds->bo_size;
1652 /* curbe buffer is a part of the dynamic buffer */
1653 dri_bo_unreference(gpe_context->curbe.bo);
1654 gpe_context->curbe.bo = ds->bo;
1655 dri_bo_reference(gpe_context->curbe.bo);
1656 gpe_context->curbe.offset = ds->curbe_offset;
1658 /* idrt buffer is a part of the dynamic buffer */
1659 dri_bo_unreference(gpe_context->idrt.bo);
1660 gpe_context->idrt.bo = ds->bo;
1661 dri_bo_reference(gpe_context->idrt.bo);
1662 gpe_context->idrt.offset = ds->idrt_offset;
1664 /* sampler buffer is a part of the dynamic buffer */
1665 dri_bo_unreference(gpe_context->sampler.bo);
1666 gpe_context->sampler.bo = ds->bo;
1667 dri_bo_reference(gpe_context->sampler.bo);
1668 gpe_context->sampler.offset = ds->sampler_offset;
1674 i965_gpe_context_map_curbe(struct i965_gpe_context *gpe_context)
1676 dri_bo_map(gpe_context->curbe.bo, 1);
1678 return (char *)gpe_context->curbe.bo->virtual + gpe_context->curbe.offset;
1682 i965_gpe_context_unmap_curbe(struct i965_gpe_context *gpe_context)
1684 dri_bo_unmap(gpe_context->curbe.bo);
1688 gen9_gpe_reset_binding_table(VADriverContextP ctx,
1689 struct i965_gpe_context *gpe_context)
1691 unsigned int *binding_table;
1692 unsigned int binding_table_offset = gpe_context->surface_state_binding_table.binding_table_offset;
1695 dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
1696 binding_table = (unsigned int*)((char *)gpe_context->surface_state_binding_table.bo->virtual + binding_table_offset);
1698 for (i = 0; i < gpe_context->surface_state_binding_table.max_entries; i++) {
1699 *(binding_table + i) = gpe_context->surface_state_binding_table.surface_state_offset + i * SURFACE_STATE_PADDED_SIZE_GEN9;
1702 dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
1706 gen8_gpe_setup_interface_data(VADriverContextP ctx,
1707 struct i965_gpe_context *gpe_context)
1709 struct gen8_interface_descriptor_data *desc;
1712 unsigned char *desc_ptr;
1714 bo = gpe_context->idrt.bo;
1716 assert(bo->virtual);
1717 desc_ptr = (unsigned char *)bo->virtual + gpe_context->idrt.offset;
1718 desc = (struct gen8_interface_descriptor_data *)desc_ptr;
1720 for (i = 0; i < gpe_context->num_kernels; i++) {
1721 struct i965_kernel *kernel;
1723 kernel = &gpe_context->kernels[i];
1724 assert(sizeof(*desc) == 32);
1726 /*Setup the descritor table*/
1727 memset(desc, 0, sizeof(*desc));
1728 desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
1729 desc->desc3.sampler_count = 0;
1730 desc->desc3.sampler_state_pointer = (gpe_context->sampler.offset >> 5);
1731 desc->desc4.binding_table_entry_count = 0;
1732 desc->desc4.binding_table_pointer = (gpe_context->surface_state_binding_table.binding_table_offset >> 5);
1733 desc->desc5.constant_urb_entry_read_offset = 0;
1734 desc->desc5.constant_urb_entry_read_length = ALIGN(gpe_context->curbe.length, 32) >> 5; // in registers
1743 gen9_gpe_set_surface_tiling(struct gen9_surface_state *ss, unsigned int tiling)
1746 case I915_TILING_NONE:
1747 ss->ss0.tiled_surface = 0;
1748 ss->ss0.tile_walk = 0;
1751 ss->ss0.tiled_surface = 1;
1752 ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
1755 ss->ss0.tiled_surface = 1;
1756 ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
1762 gen9_gpe_set_surface2_tiling(struct gen9_surface_state2 *ss, unsigned int tiling)
1765 case I915_TILING_NONE:
1766 ss->ss2.tiled_surface = 0;
1767 ss->ss2.tile_walk = 0;
1770 ss->ss2.tiled_surface = 1;
1771 ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1774 ss->ss2.tiled_surface = 1;
1775 ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1781 gen9_gpe_set_2d_surface_state(struct gen9_surface_state *ss,
1782 unsigned int cacheability_control,
1783 unsigned int format,
1784 unsigned int tiling,
1786 unsigned int height,
1788 uint64_t base_offset,
1789 unsigned int y_offset)
1791 memset(ss, 0, sizeof(*ss));
1793 /* Always set 1(align 4 mode) */
1794 ss->ss0.vertical_alignment = 1;
1795 ss->ss0.horizontal_alignment = 1;
1797 ss->ss0.surface_format = format;
1798 ss->ss0.surface_type = I965_SURFACE_2D;
1800 ss->ss1.surface_mocs = cacheability_control;
1802 ss->ss2.width = width - 1;
1803 ss->ss2.height = height - 1;
1805 ss->ss3.pitch = pitch - 1;
1807 ss->ss5.y_offset = y_offset;
1809 ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
1810 ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
1811 ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
1812 ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
1814 ss->ss8.base_addr = (uint32_t)base_offset;
1815 ss->ss9.base_addr_high = (uint32_t)(base_offset >> 32);
1817 gen9_gpe_set_surface_tiling(ss, tiling);
1820 /* This is only for NV12 format */
1822 gen9_gpe_set_adv_surface_state(struct gen9_surface_state2 *ss,
1823 unsigned int v_direction,
1824 unsigned int cacheability_control,
1825 unsigned int format,
1826 unsigned int tiling,
1828 unsigned int height,
1830 uint64_t base_offset,
1831 unsigned int y_cb_offset)
1833 memset(ss, 0, sizeof(*ss));
1835 ss->ss1.cbcr_pixel_offset_v_direction = v_direction;
1836 ss->ss1.width = width - 1;
1837 ss->ss1.height = height - 1;
1839 ss->ss2.surface_format = format;
1840 ss->ss2.interleave_chroma = 1;
1841 ss->ss2.pitch = pitch - 1;
1843 ss->ss3.y_offset_for_cb = y_cb_offset;
1845 ss->ss5.surface_object_mocs = cacheability_control;
1847 ss->ss6.base_addr = (uint32_t)base_offset;
1848 ss->ss7.base_addr_high = (uint32_t)(base_offset >> 32);
1850 gen9_gpe_set_surface2_tiling(ss, tiling);
1854 gen9_gpe_set_buffer2_surface_state(struct gen9_surface_state *ss,
1855 unsigned int cacheability_control,
1856 unsigned int format,
1859 uint64_t base_offset)
1861 memset(ss, 0, sizeof(*ss));
1863 ss->ss0.surface_format = format;
1864 ss->ss0.surface_type = I965_SURFACE_BUFFER;
1866 ss->ss1.surface_mocs = cacheability_control;
1868 ss->ss2.width = (size - 1) & 0x7F;
1869 ss->ss2.height = ((size - 1) & 0x1FFF80) >> 7;
1871 ss->ss3.depth = ((size - 1) & 0xFE00000) >> 21;
1872 ss->ss3.pitch = pitch - 1;
1874 ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
1875 ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
1876 ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
1877 ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
1879 ss->ss8.base_addr = (uint32_t)base_offset;
1880 ss->ss9.base_addr_high = (uint32_t)(base_offset >> 32);
1884 gen9_gpe_context_add_surface(struct i965_gpe_context *gpe_context,
1885 struct i965_gpe_surface *gpe_surface,
1889 unsigned int tiling, swizzle, width, height, pitch, tile_alignment, y_offset = 0;
1890 unsigned int surface_state_offset = gpe_context->surface_state_binding_table.surface_state_offset +
1891 index * SURFACE_STATE_PADDED_SIZE_GEN9;
1892 unsigned int binding_table_offset = gpe_context->surface_state_binding_table.binding_table_offset +
1894 struct i965_gpe_resource *gpe_resource = gpe_surface->gpe_resource;
1896 dri_bo_get_tiling(gpe_resource->bo, &tiling, &swizzle);
1898 dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
1899 buf = (char *)gpe_context->surface_state_binding_table.bo->virtual;
1900 *((unsigned int *)(buf + binding_table_offset)) = surface_state_offset;
1902 if (gpe_surface->is_2d_surface && gpe_surface->is_override_offset) {
1903 struct gen9_surface_state *ss = (struct gen9_surface_state *)(buf + surface_state_offset);
1905 width = gpe_resource->width;
1906 height = gpe_resource->height;
1907 pitch = gpe_resource->pitch;
1909 if (gpe_surface->is_media_block_rw) {
1910 if (gpe_surface->is_16bpp)
1911 width = (ALIGN(width * 2, 4) >> 2);
1913 width = (ALIGN(width, 4) >> 2);
1917 gen9_gpe_set_2d_surface_state(ss,
1918 gpe_surface->cacheability_control,
1919 gpe_surface->format,
1921 width, height, pitch,
1922 gpe_resource->bo->offset64 + gpe_surface->offset,
1925 dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
1926 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1927 gpe_surface->offset,
1928 surface_state_offset + offsetof(struct gen9_surface_state, ss8),
1930 } else if (gpe_surface->is_2d_surface && gpe_surface->is_uv_surface) {
1931 unsigned int cbcr_offset;
1932 struct gen9_surface_state *ss = (struct gen9_surface_state *)(buf + surface_state_offset);
1934 width = gpe_resource->width;
1935 height = gpe_resource->height / 2;
1936 pitch = gpe_resource->pitch;
1938 if (gpe_surface->is_media_block_rw) {
1939 if (gpe_surface->is_16bpp)
1940 width = (ALIGN(width * 2, 4) >> 2);
1942 width = (ALIGN(width, 4) >> 2);
1945 if (tiling == I915_TILING_Y) {
1946 tile_alignment = 32;
1947 } else if (tiling == I915_TILING_X) {
1952 y_offset = (gpe_resource->y_cb_offset % tile_alignment);
1953 cbcr_offset = ALIGN_FLOOR(gpe_resource->y_cb_offset, tile_alignment) * pitch;
1955 gen9_gpe_set_2d_surface_state(ss,
1956 gpe_surface->cacheability_control,
1957 I965_SURFACEFORMAT_R16_UINT,
1959 width, height, pitch,
1960 gpe_resource->bo->offset64 + cbcr_offset,
1963 dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
1964 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1966 surface_state_offset + offsetof(struct gen9_surface_state, ss8),
1968 } else if (gpe_surface->is_2d_surface) {
1969 struct gen9_surface_state *ss = (struct gen9_surface_state *)(buf + surface_state_offset);
1971 width = gpe_resource->width;
1972 height = gpe_resource->height;
1973 pitch = gpe_resource->pitch;
1975 if (gpe_surface->is_media_block_rw) {
1976 if (gpe_surface->is_16bpp)
1977 width = (ALIGN(width * 2, 4) >> 2);
1979 width = (ALIGN(width, 4) >> 2);
1982 gen9_gpe_set_2d_surface_state(ss,
1983 gpe_surface->cacheability_control,
1984 gpe_surface->format,
1986 width, height, pitch,
1987 gpe_resource->bo->offset64,
1990 dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
1991 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1993 surface_state_offset + offsetof(struct gen9_surface_state, ss8),
1995 } else if (gpe_surface->is_adv_surface) {
1996 struct gen9_surface_state2 *ss = (struct gen9_surface_state2 *)(buf + surface_state_offset);
1998 width = gpe_resource->width;
1999 height = gpe_resource->height;
2000 pitch = gpe_resource->pitch;
2002 gen9_gpe_set_adv_surface_state(ss,
2003 gpe_surface->v_direction,
2004 gpe_surface->cacheability_control,
2005 MFX_SURFACE_PLANAR_420_8,
2007 width, height, pitch,
2008 gpe_resource->bo->offset64,
2009 gpe_resource->y_cb_offset);
2011 dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
2012 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
2014 surface_state_offset + offsetof(struct gen9_surface_state2, ss6),
2017 struct gen9_surface_state *ss = (struct gen9_surface_state *)(buf + surface_state_offset);
2018 unsigned int format;
2020 assert(gpe_surface->is_buffer);
2022 if (gpe_surface->is_raw_buffer) {
2023 format = I965_SURFACEFORMAT_RAW;
2026 format = I965_SURFACEFORMAT_R32_UINT;
2027 pitch = sizeof(unsigned int);
2030 gen9_gpe_set_buffer2_surface_state(ss,
2031 gpe_surface->cacheability_control,
2035 gpe_resource->bo->offset64 + gpe_surface->offset);
2037 dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
2038 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
2039 gpe_surface->offset,
2040 surface_state_offset + offsetof(struct gen9_surface_state, ss8),
2044 dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
2048 i965_gpe_allocate_2d_resource(dri_bufmgr *bufmgr,
2049 struct i965_gpe_resource *res,
2060 res->type = I965_GPE_RESOURCE_2D;
2062 res->height = height;
2065 bo_size = ALIGN(height, 16) * pitch;
2066 res->size = bo_size;
2068 res->bo = dri_bo_alloc(bufmgr, name, res->size, 4096);
2075 gen8_gpe_media_state_flush(VADriverContextP ctx,
2076 struct i965_gpe_context *gpe_context,
2077 struct intel_batchbuffer *batch)
2079 BEGIN_BATCH(batch, 2);
2081 OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH | (2 - 2));
2082 OUT_BATCH(batch, 0);
2084 ADVANCE_BATCH(batch);
2088 gen8_gpe_media_object(VADriverContextP ctx,
2089 struct i965_gpe_context *gpe_context,
2090 struct intel_batchbuffer *batch,
2091 struct gpe_media_object_parameter *param)
2093 int batch_size, subdata_size;
2097 if (param->pinline_data && param->inline_size) {
2098 subdata_size = ALIGN(param->inline_size, 4);
2099 batch_size += subdata_size / 4;
2101 BEGIN_BATCH(batch, batch_size);
2102 OUT_BATCH(batch, CMD_MEDIA_OBJECT | (batch_size - 2));
2103 OUT_BATCH(batch, param->interface_offset);
2104 OUT_BATCH(batch, param->use_scoreboard << 21);
2105 OUT_BATCH(batch, 0);
2106 OUT_BATCH(batch, (param->scoreboard_y << 16 |
2107 param->scoreboard_x));
2108 OUT_BATCH(batch, param->scoreboard_mask);
2111 intel_batchbuffer_data(batch, param->pinline_data, subdata_size);
2113 ADVANCE_BATCH(batch);
2117 gen8_gpe_media_object_walker(VADriverContextP ctx,
2118 struct i965_gpe_context *gpe_context,
2119 struct intel_batchbuffer *batch,
2120 struct gpe_media_object_walker_parameter *param)
2125 if (param->inline_size)
2126 walker_length += ALIGN(param->inline_size, 4) / 4;
2127 BEGIN_BATCH(batch, walker_length);
2128 OUT_BATCH(batch, CMD_MEDIA_OBJECT_WALKER | (walker_length - 2));
2129 OUT_BATCH(batch, param->interface_offset);
2130 OUT_BATCH(batch, param->use_scoreboard << 21);
2131 OUT_BATCH(batch, 0);
2132 OUT_BATCH(batch, 0);
2133 OUT_BATCH(batch, (param->group_id_loop_select << 8 |
2134 param->scoreboard_mask)); // DW5
2135 OUT_BATCH(batch, (param->color_count_minus1 << 24 |
2136 param->middle_loop_extra_steps << 16 |
2137 param->mid_loop_unit_y << 12 |
2138 param->mid_loop_unit_x << 8));
2139 OUT_BATCH(batch, ((param->global_loop_exec_count & 0x3ff) << 16 |
2140 (param->local_loop_exec_count & 0x3ff)));
2141 OUT_BATCH(batch, param->block_resolution.value);
2142 OUT_BATCH(batch, param->local_start.value);
2143 OUT_BATCH(batch, 0); // DW10
2144 OUT_BATCH(batch, param->local_outer_loop_stride.value);
2145 OUT_BATCH(batch, param->local_inner_loop_unit.value);
2146 OUT_BATCH(batch, param->global_resolution.value);
2147 OUT_BATCH(batch, param->global_start.value);
2148 OUT_BATCH(batch, param->global_outer_loop_stride.value);
2149 OUT_BATCH(batch, param->global_inner_loop_unit.value);
2151 if (param->pinline_data && param->inline_size)
2152 intel_batchbuffer_data(batch, param->pinline_data, ALIGN(param->inline_size, 4));
2154 ADVANCE_BATCH(batch);
2159 intel_vpp_init_media_object_walker_parameter(struct intel_vpp_kernel_walker_parameter *kernel_walker_param,
2160 struct gpe_media_object_walker_parameter *walker_param)
2162 memset(walker_param, 0, sizeof(*walker_param));
2164 walker_param->use_scoreboard = kernel_walker_param->use_scoreboard;
2166 walker_param->block_resolution.x = kernel_walker_param->resolution_x;
2167 walker_param->block_resolution.y = kernel_walker_param->resolution_y;
2169 walker_param->global_resolution.x = kernel_walker_param->resolution_x;
2170 walker_param->global_resolution.y = kernel_walker_param->resolution_y;
2172 walker_param->global_outer_loop_stride.x = kernel_walker_param->resolution_x;
2173 walker_param->global_outer_loop_stride.y = 0;
2175 walker_param->global_inner_loop_unit.x = 0;
2176 walker_param->global_inner_loop_unit.y = kernel_walker_param->resolution_y;
2178 walker_param->local_loop_exec_count = 0xFFFF; //MAX VALUE
2179 walker_param->global_loop_exec_count = 0xFFFF; //MAX VALUE
2181 if (kernel_walker_param->no_dependency) {
2182 /* The no_dependency is used for VPP */
2183 walker_param->scoreboard_mask = 0;
2184 walker_param->use_scoreboard = 0;
2185 // Raster scan walking pattern
2186 walker_param->local_outer_loop_stride.x = 0;
2187 walker_param->local_outer_loop_stride.y = 1;
2188 walker_param->local_inner_loop_unit.x = 1;
2189 walker_param->local_inner_loop_unit.y = 0;
2190 walker_param->local_end.x = kernel_walker_param->resolution_x - 1;
2191 walker_param->local_end.y = 0;
2193 walker_param->local_end.x = 0;
2194 walker_param->local_end.y = 0;
2197 walker_param->scoreboard_mask = 0x0F;
2198 walker_param->local_outer_loop_stride.x = 1;
2199 walker_param->local_outer_loop_stride.y = 0;
2200 walker_param->local_inner_loop_unit.x = -2;
2201 walker_param->local_inner_loop_unit.y = 1;
2206 gen8_gpe_reset_binding_table(VADriverContextP ctx, struct i965_gpe_context *gpe_context)
2208 unsigned int *binding_table;
2209 unsigned int binding_table_offset = gpe_context->surface_state_binding_table.binding_table_offset;
2212 dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
2213 binding_table = (unsigned int*)((char *)gpe_context->surface_state_binding_table.bo->virtual + binding_table_offset);
2215 for (i = 0; i < gpe_context->surface_state_binding_table.max_entries; i++) {
2216 *(binding_table + i) = gpe_context->surface_state_binding_table.surface_state_offset + i * SURFACE_STATE_PADDED_SIZE_GEN8;
2219 dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
2223 gen8_gpe_set_2d_surface_state(struct gen8_surface_state *ss,
2224 unsigned int vert_line_stride_offset,
2225 unsigned int vert_line_stride,
2226 unsigned int cacheability_control,
2227 unsigned int format,
2228 unsigned int tiling,
2230 unsigned int height,
2232 unsigned int base_offset,
2233 unsigned int y_offset)
2235 memset(ss, 0, sizeof(*ss));
2237 ss->ss0.vert_line_stride_ofs = vert_line_stride_offset;
2238 ss->ss0.vert_line_stride = vert_line_stride;
2239 ss->ss0.surface_format = format;
2240 ss->ss0.surface_type = I965_SURFACE_2D;
2242 ss->ss1.surface_mocs = cacheability_control;
2244 ss->ss2.width = width - 1;
2245 ss->ss2.height = height - 1;
2247 ss->ss3.pitch = pitch - 1;
2249 ss->ss5.y_offset = y_offset;
2251 ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
2252 ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
2253 ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
2254 ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
2256 ss->ss8.base_addr = base_offset;
2258 gen8_gpe_set_surface_tiling(ss, tiling);
2262 gen8_gpe_set_adv_surface_state(struct gen8_surface_state2 *ss,
2263 unsigned int v_direction,
2264 unsigned int cacheability_control,
2265 unsigned int format,
2266 unsigned int tiling,
2268 unsigned int height,
2270 unsigned int base_offset,
2271 unsigned int y_cb_offset)
2273 memset(ss, 0, sizeof(*ss));
2275 ss->ss1.cbcr_pixel_offset_v_direction = v_direction;
2276 ss->ss1.width = width - 1;
2277 ss->ss1.height = height - 1;
2279 ss->ss2.surface_format = format;
2280 ss->ss2.interleave_chroma = 1;
2281 ss->ss2.pitch = pitch - 1;
2283 ss->ss3.y_offset_for_cb = y_cb_offset;
2285 ss->ss5.surface_object_mocs = cacheability_control;
2287 ss->ss6.base_addr = base_offset;
2289 gen8_gpe_set_surface2_tiling(ss, tiling);
2293 gen8_gpe_set_buffer2_surface_state(struct gen8_surface_state *ss,
2294 unsigned int cacheability_control,
2295 unsigned int format,
2298 unsigned int base_offset)
2300 memset(ss, 0, sizeof(*ss));
2302 ss->ss0.surface_format = format;
2303 ss->ss0.surface_type = I965_SURFACE_BUFFER;
2305 ss->ss1.surface_mocs = cacheability_control;
2307 ss->ss2.width = (size - 1) & 0x7F;
2308 ss->ss2.height = ((size - 1) & 0x1FFF80) >> 7;
2310 ss->ss3.depth = ((size - 1) & 0xFE00000) >> 21;
2311 ss->ss3.pitch = pitch - 1;
2313 ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
2314 ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
2315 ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
2316 ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
2318 ss->ss8.base_addr = base_offset;
2322 gen8_gpe_context_add_surface(struct i965_gpe_context *gpe_context,
2323 struct i965_gpe_surface *gpe_surface,
2327 unsigned int tiling, swizzle, width, height, pitch, tile_alignment, y_offset = 0;
2328 unsigned int surface_state_offset = gpe_context->surface_state_binding_table.surface_state_offset +
2329 index * SURFACE_STATE_PADDED_SIZE_GEN8;
2330 unsigned int binding_table_offset = gpe_context->surface_state_binding_table.binding_table_offset +
2332 struct i965_gpe_resource *gpe_resource = gpe_surface->gpe_resource;
2334 dri_bo_get_tiling(gpe_resource->bo, &tiling, &swizzle);
2336 dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
2337 buf = (char *)gpe_context->surface_state_binding_table.bo->virtual;
2338 *((unsigned int *)(buf + binding_table_offset)) = surface_state_offset;
2340 if (gpe_surface->is_2d_surface) {
2341 struct gen8_surface_state *ss = (struct gen8_surface_state *)(buf + surface_state_offset);
2342 unsigned int target_offset;
2344 width = gpe_resource->width;
2345 height = gpe_resource->height;
2346 pitch = gpe_resource->pitch;
2348 if (gpe_surface->is_override_offset) {
2350 target_offset = gpe_surface->offset;
2351 } else if (gpe_surface->is_uv_surface) {
2354 if (tiling == I915_TILING_Y) {
2355 tile_alignment = 32;
2356 } else if (tiling == I915_TILING_X) {
2361 y_offset = (gpe_resource->y_cb_offset % tile_alignment);
2362 target_offset = ALIGN_FLOOR(gpe_resource->y_cb_offset, tile_alignment) * pitch;
2368 if (gpe_surface->is_media_block_rw) {
2369 width = (ALIGN(width, 4) >> 2);
2372 gen8_gpe_set_2d_surface_state(ss,
2373 gpe_surface->vert_line_stride_offset,
2374 gpe_surface->vert_line_stride,
2375 gpe_surface->cacheability_control,
2376 gpe_surface->format,
2378 width, height, pitch,
2379 gpe_resource->bo->offset64 + target_offset,
2382 dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
2383 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
2385 surface_state_offset + offsetof(struct gen8_surface_state, ss8),
2387 } else if (gpe_surface->is_adv_surface) {
2388 struct gen8_surface_state2 *ss = (struct gen8_surface_state2 *)(buf + surface_state_offset);
2390 width = gpe_resource->width;
2391 height = gpe_resource->height;
2392 pitch = gpe_resource->pitch;
2394 gen8_gpe_set_adv_surface_state(ss,
2395 gpe_surface->v_direction,
2396 gpe_surface->cacheability_control,
2397 MFX_SURFACE_PLANAR_420_8,
2399 width, height, pitch,
2400 gpe_resource->bo->offset64,
2401 gpe_resource->y_cb_offset);
2403 dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
2404 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
2406 surface_state_offset + offsetof(struct gen8_surface_state2, ss6),
2409 struct gen8_surface_state *ss = (struct gen8_surface_state *)(buf + surface_state_offset);
2410 unsigned int format;
2412 assert(gpe_surface->is_buffer);
2414 if (gpe_surface->is_raw_buffer) {
2415 format = I965_SURFACEFORMAT_RAW;
2418 format = I965_SURFACEFORMAT_R32_UINT;
2419 pitch = sizeof(unsigned int);
2422 gen8_gpe_set_buffer2_surface_state(ss,
2423 gpe_surface->cacheability_control,
2427 gpe_resource->bo->offset64 + gpe_surface->offset);
2429 dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
2430 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
2431 gpe_surface->offset,
2432 surface_state_offset + offsetof(struct gen8_surface_state, ss8),
2436 dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
2440 gen8_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,
2441 struct intel_batchbuffer *batch,
2442 struct gpe_mi_conditional_batch_buffer_end_parameter *param)
2444 __OUT_BATCH(batch, (MI_CONDITIONAL_BATCH_BUFFER_END |
2446 (4 - 2))); /* Always use PPGTT */
2447 __OUT_BATCH(batch, param->compare_data);
2448 __OUT_RELOC64(batch,
2450 I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
2456 gen8_gpe_pipe_control(VADriverContextP ctx,
2457 struct intel_batchbuffer *batch,
2458 struct gpe_pipe_control_parameter *param)
2460 int render_target_cache_flush_enable = CMD_PIPE_CONTROL_WC_FLUSH;
2461 int dc_flush_enable = 0;
2462 int state_cache_invalidation_enable = 0;
2463 int constant_cache_invalidation_enable = 0;
2464 int vf_cache_invalidation_enable = 0;
2465 int instruction_cache_invalidation_enable = 0;
2466 int post_sync_operation = CMD_PIPE_CONTROL_NOWRITE;
2467 int use_global_gtt = CMD_PIPE_CONTROL_GLOBAL_GTT_GEN8;
2468 int cs_stall_enable = !param->disable_cs_stall;
2470 switch (param->flush_mode) {
2471 case PIPE_CONTROL_FLUSH_WRITE_CACHE:
2472 render_target_cache_flush_enable = CMD_PIPE_CONTROL_WC_FLUSH;
2473 dc_flush_enable = CMD_PIPE_CONTROL_DC_FLUSH;
2476 case PIPE_CONTROL_FLUSH_READ_CACHE:
2477 render_target_cache_flush_enable = 0;
2478 state_cache_invalidation_enable = CMD_PIPE_CONTROL_SC_INVALIDATION_GEN8;
2479 constant_cache_invalidation_enable = CMD_PIPE_CONTROL_CC_INVALIDATION_GEN8;
2480 vf_cache_invalidation_enable = CMD_PIPE_CONTROL_VFC_INVALIDATION_GEN8;
2481 instruction_cache_invalidation_enable = CMD_PIPE_CONTROL_IS_FLUSH;
2484 case PIPE_CONTROL_FLUSH_NONE:
2486 render_target_cache_flush_enable = 0;
2491 post_sync_operation = CMD_PIPE_CONTROL_WRITE_QWORD;
2492 use_global_gtt = CMD_PIPE_CONTROL_LOCAL_PGTT_GEN8;
2494 post_sync_operation = CMD_PIPE_CONTROL_NOWRITE;
2495 render_target_cache_flush_enable = CMD_PIPE_CONTROL_WC_FLUSH;
2496 state_cache_invalidation_enable = CMD_PIPE_CONTROL_SC_INVALIDATION_GEN8;
2497 constant_cache_invalidation_enable = CMD_PIPE_CONTROL_CC_INVALIDATION_GEN8;
2498 vf_cache_invalidation_enable = CMD_PIPE_CONTROL_VFC_INVALIDATION_GEN8;
2499 instruction_cache_invalidation_enable = CMD_PIPE_CONTROL_IS_FLUSH;
2502 __OUT_BATCH(batch, CMD_PIPE_CONTROL | (6 - 2));
2503 __OUT_BATCH(batch, (render_target_cache_flush_enable |
2505 state_cache_invalidation_enable |
2506 constant_cache_invalidation_enable |
2507 vf_cache_invalidation_enable |
2508 instruction_cache_invalidation_enable |
2509 post_sync_operation |
2512 CMD_PIPE_CONTROL_FLUSH_ENABLE));
2515 __OUT_RELOC64(batch,
2517 I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_RENDER,
2520 __OUT_BATCH(batch, 0);
2521 __OUT_BATCH(batch, 0);
2524 __OUT_BATCH(batch, param->dw0);
2525 __OUT_BATCH(batch, param->dw1);
2529 i965_gpe_table_init(VADriverContextP ctx)
2531 struct i965_driver_data *i965 = i965_driver_data(ctx);
2532 struct i965_gpe_table *gpe = &i965->gpe_table;
2534 if (IS_GEN8(i965->intel.device_info)) {
2535 gpe->context_init = gen8_gpe_context_init;
2536 gpe->context_destroy = gen8_gpe_context_destroy;
2537 gpe->context_add_surface = gen8_gpe_context_add_surface;
2538 gpe->reset_binding_table = gen8_gpe_reset_binding_table;
2539 gpe->load_kernels = gen8_gpe_load_kernels;
2540 gpe->setup_interface_data = gen8_gpe_setup_interface_data;
2541 gpe->set_dynamic_buffer = gen8_gpe_context_set_dynamic_buffer;
2542 gpe->media_object = gen8_gpe_media_object;
2543 gpe->media_object_walker = gen8_gpe_media_object_walker;
2544 gpe->media_state_flush = gen8_gpe_media_state_flush;
2545 gpe->pipe_control = gen8_gpe_pipe_control;
2546 gpe->pipeline_end = gen8_gpe_pipeline_end;
2547 gpe->pipeline_setup = gen8_gpe_pipeline_setup;
2548 gpe->mi_conditional_batch_buffer_end = gen8_gpe_mi_conditional_batch_buffer_end;
2549 gpe->mi_batch_buffer_start = gen8_gpe_mi_batch_buffer_start;
2550 gpe->mi_load_register_reg = gen8_gpe_mi_load_register_reg;
2551 gpe->mi_load_register_imm = gen8_gpe_mi_load_register_imm;
2552 gpe->mi_load_register_mem = gen8_gpe_mi_load_register_mem;
2553 gpe->mi_store_register_mem = gen8_gpe_mi_store_register_mem;
2554 gpe->mi_store_data_imm =gen8_gpe_mi_store_data_imm;
2555 gpe->mi_flush_dw = gen8_gpe_mi_flush_dw;
2556 } else if (IS_GEN9(i965->intel.device_info)) {
2557 gpe->context_init = gen8_gpe_context_init;
2558 gpe->context_destroy = gen8_gpe_context_destroy;
2559 gpe->context_add_surface = gen9_gpe_context_add_surface;
2560 gpe->reset_binding_table = gen9_gpe_reset_binding_table;
2561 gpe->load_kernels = gen8_gpe_load_kernels;
2562 gpe->setup_interface_data = gen8_gpe_setup_interface_data;
2563 gpe->set_dynamic_buffer = gen8_gpe_context_set_dynamic_buffer;
2564 gpe->media_object = gen8_gpe_media_object;
2565 gpe->media_object_walker = gen8_gpe_media_object_walker;
2566 gpe->media_state_flush = gen8_gpe_media_state_flush;
2567 gpe->pipe_control = gen8_gpe_pipe_control;
2568 gpe->pipeline_end = gen9_gpe_pipeline_end;
2569 gpe->pipeline_setup = gen9_gpe_pipeline_setup;
2570 gpe->mi_conditional_batch_buffer_end = gen9_gpe_mi_conditional_batch_buffer_end;
2571 gpe->mi_batch_buffer_start = gen8_gpe_mi_batch_buffer_start;
2572 gpe->mi_load_register_reg = gen8_gpe_mi_load_register_reg;
2573 gpe->mi_load_register_imm = gen8_gpe_mi_load_register_imm;
2574 gpe->mi_load_register_mem = gen8_gpe_mi_load_register_mem;
2575 gpe->mi_store_register_mem = gen8_gpe_mi_store_register_mem;
2576 gpe->mi_store_data_imm =gen8_gpe_mi_store_data_imm;
2577 gpe->mi_flush_dw = gen8_gpe_mi_flush_dw;
2579 // TODO: for other platforms
2586 i965_gpe_table_terminate(VADriverContextP ctx)