2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Xiang Haihao <haihao.xiang@intel.com>
32 #include "intel_batchbuffer.h"
33 #include "intel_driver.h"
35 #include "i965_gpe_utils.h"
38 i965_gpe_select(VADriverContextP ctx,
39 struct i965_gpe_context *gpe_context,
40 struct intel_batchbuffer *batch)
42 BEGIN_BATCH(batch, 1);
43 OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
48 gen6_gpe_state_base_address(VADriverContextP ctx,
49 struct i965_gpe_context *gpe_context,
50 struct intel_batchbuffer *batch)
52 BEGIN_BATCH(batch, 10);
54 OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
55 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General State Base Address */
57 gpe_context->surface_state_binding_table.bo,
58 I915_GEM_DOMAIN_INSTRUCTION,
60 BASE_ADDRESS_MODIFY); /* Surface state base address */
61 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic State Base Address */
62 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect Object Base Address */
63 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction Base Address */
64 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General State Access Upper Bound */
65 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic State Access Upper Bound */
66 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect Object Access Upper Bound */
67 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction Access Upper Bound */
73 gen6_gpe_vfe_state(VADriverContextP ctx,
74 struct i965_gpe_context *gpe_context,
75 struct intel_batchbuffer *batch)
78 BEGIN_BATCH(batch, 8);
80 OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
81 OUT_BATCH(batch, 0); /* Scratch Space Base Pointer and Space */
83 gpe_context->vfe_state.max_num_threads << 16 | /* Maximum Number of Threads */
84 gpe_context->vfe_state.num_urb_entries << 8 | /* Number of URB Entries */
85 gpe_context->vfe_state.gpgpu_mode << 2); /* MEDIA Mode */
86 OUT_BATCH(batch, 0); /* Debug: Object ID */
88 gpe_context->vfe_state.urb_entry_size << 16 | /* URB Entry Allocation Size */
89 gpe_context->vfe_state.curbe_allocation_size); /* CURBE Allocation Size */
90 /* the vfe_desc5/6/7 will decide whether the scoreboard is used. */
91 OUT_BATCH(batch, gpe_context->vfe_desc5.dword);
92 OUT_BATCH(batch, gpe_context->vfe_desc6.dword);
93 OUT_BATCH(batch, gpe_context->vfe_desc7.dword);
100 gen6_gpe_curbe_load(VADriverContextP ctx,
101 struct i965_gpe_context *gpe_context,
102 struct intel_batchbuffer *batch)
104 BEGIN_BATCH(batch, 4);
106 OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
108 OUT_BATCH(batch, gpe_context->curbe.length);
109 OUT_RELOC(batch, gpe_context->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
111 ADVANCE_BATCH(batch);
115 gen6_gpe_idrt(VADriverContextP ctx,
116 struct i965_gpe_context *gpe_context,
117 struct intel_batchbuffer *batch)
119 BEGIN_BATCH(batch, 4);
121 OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | (4 - 2));
123 OUT_BATCH(batch, gpe_context->idrt.max_entries * gpe_context->idrt.entry_size);
124 OUT_RELOC(batch, gpe_context->idrt.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
126 ADVANCE_BATCH(batch);
130 i965_gpe_load_kernels(VADriverContextP ctx,
131 struct i965_gpe_context *gpe_context,
132 struct i965_kernel *kernel_list,
133 unsigned int num_kernels)
135 struct i965_driver_data *i965 = i965_driver_data(ctx);
138 assert(num_kernels <= MAX_GPE_KERNELS);
139 memcpy(gpe_context->kernels, kernel_list, sizeof(*kernel_list) * num_kernels);
140 gpe_context->num_kernels = num_kernels;
142 for (i = 0; i < num_kernels; i++) {
143 struct i965_kernel *kernel = &gpe_context->kernels[i];
145 kernel->bo = dri_bo_alloc(i965->intel.bufmgr,
150 dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
155 i965_gpe_context_destroy(struct i965_gpe_context *gpe_context)
159 dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
160 gpe_context->surface_state_binding_table.bo = NULL;
162 dri_bo_unreference(gpe_context->idrt.bo);
163 gpe_context->idrt.bo = NULL;
165 dri_bo_unreference(gpe_context->curbe.bo);
166 gpe_context->curbe.bo = NULL;
168 for (i = 0; i < gpe_context->num_kernels; i++) {
169 struct i965_kernel *kernel = &gpe_context->kernels[i];
171 dri_bo_unreference(kernel->bo);
177 i965_gpe_context_init(VADriverContextP ctx,
178 struct i965_gpe_context *gpe_context)
180 struct i965_driver_data *i965 = i965_driver_data(ctx);
183 dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
184 bo = dri_bo_alloc(i965->intel.bufmgr,
185 "surface state & binding table",
186 gpe_context->surface_state_binding_table.length,
189 gpe_context->surface_state_binding_table.bo = bo;
191 dri_bo_unreference(gpe_context->idrt.bo);
192 bo = dri_bo_alloc(i965->intel.bufmgr,
193 "interface descriptor table",
194 gpe_context->idrt.entry_size * gpe_context->idrt.max_entries,
197 gpe_context->idrt.bo = bo;
199 dri_bo_unreference(gpe_context->curbe.bo);
200 bo = dri_bo_alloc(i965->intel.bufmgr,
202 gpe_context->curbe.length,
205 gpe_context->curbe.bo = bo;
209 gen6_gpe_pipeline_setup(VADriverContextP ctx,
210 struct i965_gpe_context *gpe_context,
211 struct intel_batchbuffer *batch)
213 intel_batchbuffer_emit_mi_flush(batch);
215 i965_gpe_select(ctx, gpe_context, batch);
216 gen6_gpe_state_base_address(ctx, gpe_context, batch);
217 gen6_gpe_vfe_state(ctx, gpe_context, batch);
218 gen6_gpe_curbe_load(ctx, gpe_context, batch);
219 gen6_gpe_idrt(ctx, gpe_context, batch);
223 i965_gpe_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
226 case I915_TILING_NONE:
227 ss->ss3.tiled_surface = 0;
228 ss->ss3.tile_walk = 0;
231 ss->ss3.tiled_surface = 1;
232 ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
235 ss->ss3.tiled_surface = 1;
236 ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
242 i965_gpe_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
245 case I915_TILING_NONE:
246 ss->ss2.tiled_surface = 0;
247 ss->ss2.tile_walk = 0;
250 ss->ss2.tiled_surface = 1;
251 ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
254 ss->ss2.tiled_surface = 1;
255 ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
261 gen7_gpe_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
264 case I915_TILING_NONE:
265 ss->ss0.tiled_surface = 0;
266 ss->ss0.tile_walk = 0;
269 ss->ss0.tiled_surface = 1;
270 ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
273 ss->ss0.tiled_surface = 1;
274 ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
280 gen7_gpe_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
283 case I915_TILING_NONE:
284 ss->ss2.tiled_surface = 0;
285 ss->ss2.tile_walk = 0;
288 ss->ss2.tiled_surface = 1;
289 ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
292 ss->ss2.tiled_surface = 1;
293 ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
299 gen8_gpe_set_surface_tiling(struct gen8_surface_state *ss, unsigned int tiling)
302 case I915_TILING_NONE:
303 ss->ss0.tiled_surface = 0;
304 ss->ss0.tile_walk = 0;
307 ss->ss0.tiled_surface = 1;
308 ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
311 ss->ss0.tiled_surface = 1;
312 ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
318 gen8_gpe_set_surface2_tiling(struct gen8_surface_state2 *ss, unsigned int tiling)
321 case I915_TILING_NONE:
322 ss->ss2.tiled_surface = 0;
323 ss->ss2.tile_walk = 0;
326 ss->ss2.tiled_surface = 1;
327 ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
330 ss->ss2.tiled_surface = 1;
331 ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
337 i965_gpe_set_surface2_state(VADriverContextP ctx,
338 struct object_surface *obj_surface,
339 struct i965_surface_state2 *ss)
342 unsigned int tiling, swizzle;
344 assert(obj_surface->bo);
345 assert(obj_surface->fourcc == VA_FOURCC_NV12);
347 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
348 w = obj_surface->orig_width;
349 h = obj_surface->orig_height;
350 w_pitch = obj_surface->width;
352 memset(ss, 0, sizeof(*ss));
354 ss->ss0.surface_base_address = obj_surface->bo->offset;
356 ss->ss1.cbcr_pixel_offset_v_direction = 2;
357 ss->ss1.width = w - 1;
358 ss->ss1.height = h - 1;
360 ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
361 ss->ss2.interleave_chroma = 1;
362 ss->ss2.pitch = w_pitch - 1;
363 ss->ss2.half_pitch_for_chroma = 0;
364 i965_gpe_set_surface2_tiling(ss, tiling);
365 /* ss3: UV offset for interleave mode */
366 ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
367 ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
371 i965_gpe_surface2_setup(VADriverContextP ctx,
372 struct i965_gpe_context *gpe_context,
373 struct object_surface *obj_surface,
374 unsigned long binding_table_offset,
375 unsigned long surface_state_offset)
377 struct i965_surface_state2 *ss;
380 bo = gpe_context->surface_state_binding_table.bo;
384 ss = (struct i965_surface_state2 *)((char *)bo->virtual + surface_state_offset);
385 i965_gpe_set_surface2_state(ctx, obj_surface, ss);
386 dri_bo_emit_reloc(bo,
387 I915_GEM_DOMAIN_RENDER, 0,
389 surface_state_offset + offsetof(struct i965_surface_state2, ss0),
392 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
397 i965_gpe_set_media_rw_surface_state(VADriverContextP ctx,
398 struct object_surface *obj_surface,
399 struct i965_surface_state *ss)
402 unsigned int tiling, swizzle;
404 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
405 w = obj_surface->orig_width;
406 h = obj_surface->orig_height;
407 w_pitch = obj_surface->width;
409 memset(ss, 0, sizeof(*ss));
411 ss->ss0.surface_type = I965_SURFACE_2D;
412 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
414 ss->ss1.base_addr = obj_surface->bo->offset;
416 ss->ss2.width = w / 4 - 1; /* in DWORDs for media read & write message */
417 ss->ss2.height = h - 1;
419 ss->ss3.pitch = w_pitch - 1;
420 i965_gpe_set_surface_tiling(ss, tiling);
424 i965_gpe_media_rw_surface_setup(VADriverContextP ctx,
425 struct i965_gpe_context *gpe_context,
426 struct object_surface *obj_surface,
427 unsigned long binding_table_offset,
428 unsigned long surface_state_offset,
431 struct i965_surface_state *ss;
434 bo = gpe_context->surface_state_binding_table.bo;
435 dri_bo_map(bo, True);
438 ss = (struct i965_surface_state *)((char *)bo->virtual + surface_state_offset);
439 i965_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
440 dri_bo_emit_reloc(bo,
441 I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
443 surface_state_offset + offsetof(struct i965_surface_state, ss1),
446 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
451 i965_gpe_set_buffer_surface_state(VADriverContextP ctx,
452 struct i965_buffer_surface *buffer_surface,
453 struct i965_surface_state *ss)
457 assert(buffer_surface->bo);
458 num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
460 memset(ss, 0, sizeof(*ss));
462 ss->ss0.render_cache_read_mode = 1;
463 ss->ss0.surface_type = I965_SURFACE_BUFFER;
465 ss->ss1.base_addr = buffer_surface->bo->offset;
467 ss->ss2.width = ((num_entries - 1) & 0x7f);
468 ss->ss2.height = (((num_entries - 1) >> 7) & 0x1fff);
470 ss->ss3.depth = (((num_entries - 1) >> 20) & 0x7f);
471 ss->ss3.pitch = buffer_surface->pitch - 1;
475 i965_gpe_buffer_suface_setup(VADriverContextP ctx,
476 struct i965_gpe_context *gpe_context,
477 struct i965_buffer_surface *buffer_surface,
478 unsigned long binding_table_offset,
479 unsigned long surface_state_offset)
481 struct i965_surface_state *ss;
484 bo = gpe_context->surface_state_binding_table.bo;
488 ss = (struct i965_surface_state *)((char *)bo->virtual + surface_state_offset);
489 i965_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
490 dri_bo_emit_reloc(bo,
491 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
493 surface_state_offset + offsetof(struct i965_surface_state, ss1),
496 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
501 gen7_gpe_set_surface2_state(VADriverContextP ctx,
502 struct object_surface *obj_surface,
503 struct gen7_surface_state2 *ss)
506 unsigned int tiling, swizzle;
508 assert(obj_surface->bo);
509 assert(obj_surface->fourcc == VA_FOURCC_NV12);
511 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
512 w = obj_surface->orig_width;
513 h = obj_surface->orig_height;
514 w_pitch = obj_surface->width;
516 memset(ss, 0, sizeof(*ss));
518 ss->ss0.surface_base_address = obj_surface->bo->offset;
520 ss->ss1.cbcr_pixel_offset_v_direction = 2;
521 ss->ss1.width = w - 1;
522 ss->ss1.height = h - 1;
524 ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
525 ss->ss2.interleave_chroma = 1;
526 ss->ss2.pitch = w_pitch - 1;
527 ss->ss2.half_pitch_for_chroma = 0;
528 gen7_gpe_set_surface2_tiling(ss, tiling);
529 /* ss3: UV offset for interleave mode */
530 ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
531 ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
535 gen7_gpe_surface2_setup(VADriverContextP ctx,
536 struct i965_gpe_context *gpe_context,
537 struct object_surface *obj_surface,
538 unsigned long binding_table_offset,
539 unsigned long surface_state_offset)
541 struct gen7_surface_state2 *ss;
544 bo = gpe_context->surface_state_binding_table.bo;
548 ss = (struct gen7_surface_state2 *)((char *)bo->virtual + surface_state_offset);
549 gen7_gpe_set_surface2_state(ctx, obj_surface, ss);
550 dri_bo_emit_reloc(bo,
551 I915_GEM_DOMAIN_RENDER, 0,
553 surface_state_offset + offsetof(struct gen7_surface_state2, ss0),
556 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
561 gen7_gpe_set_media_rw_surface_state(VADriverContextP ctx,
562 struct object_surface *obj_surface,
563 struct gen7_surface_state *ss)
566 unsigned int tiling, swizzle;
568 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
569 w = obj_surface->orig_width;
570 h = obj_surface->orig_height;
571 w_pitch = obj_surface->width;
573 memset(ss, 0, sizeof(*ss));
575 ss->ss0.surface_type = I965_SURFACE_2D;
576 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
578 ss->ss1.base_addr = obj_surface->bo->offset;
580 ss->ss2.width = w / 4 - 1; /* in DWORDs for media read & write message */
581 ss->ss2.height = h - 1;
583 ss->ss3.pitch = w_pitch - 1;
584 gen7_gpe_set_surface_tiling(ss, tiling);
588 gen75_gpe_set_media_chroma_surface_state(VADriverContextP ctx,
589 struct object_surface *obj_surface,
590 struct gen7_surface_state *ss)
593 unsigned int tiling, swizzle;
596 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
597 w = obj_surface->orig_width;
598 w_pitch = obj_surface->width;
600 cbcr_offset = obj_surface->height * obj_surface->width;
601 memset(ss, 0, sizeof(*ss));
603 ss->ss0.surface_type = I965_SURFACE_2D;
604 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
606 ss->ss1.base_addr = obj_surface->bo->offset + cbcr_offset;
608 ss->ss2.width = w / 4 - 1; /* in DWORDs for media read & write message */
609 ss->ss2.height = (obj_surface->height / 2) -1;
611 ss->ss3.pitch = w_pitch - 1;
612 gen7_gpe_set_surface_tiling(ss, tiling);
616 gen7_gpe_media_rw_surface_setup(VADriverContextP ctx,
617 struct i965_gpe_context *gpe_context,
618 struct object_surface *obj_surface,
619 unsigned long binding_table_offset,
620 unsigned long surface_state_offset,
623 struct gen7_surface_state *ss;
626 bo = gpe_context->surface_state_binding_table.bo;
627 dri_bo_map(bo, True);
630 ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
631 gen7_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
632 dri_bo_emit_reloc(bo,
633 I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
635 surface_state_offset + offsetof(struct gen7_surface_state, ss1),
638 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
643 gen75_gpe_media_chroma_surface_setup(VADriverContextP ctx,
644 struct i965_gpe_context *gpe_context,
645 struct object_surface *obj_surface,
646 unsigned long binding_table_offset,
647 unsigned long surface_state_offset,
650 struct gen7_surface_state *ss;
654 assert(obj_surface->fourcc == VA_FOURCC_NV12);
655 bo = gpe_context->surface_state_binding_table.bo;
656 dri_bo_map(bo, True);
659 cbcr_offset = obj_surface->height * obj_surface->width;
660 ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
661 gen75_gpe_set_media_chroma_surface_state(ctx, obj_surface, ss);
662 dri_bo_emit_reloc(bo,
663 I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
665 surface_state_offset + offsetof(struct gen7_surface_state, ss1),
668 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
674 gen7_gpe_set_buffer_surface_state(VADriverContextP ctx,
675 struct i965_buffer_surface *buffer_surface,
676 struct gen7_surface_state *ss)
680 assert(buffer_surface->bo);
681 num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
683 memset(ss, 0, sizeof(*ss));
685 ss->ss0.surface_type = I965_SURFACE_BUFFER;
687 ss->ss1.base_addr = buffer_surface->bo->offset;
689 ss->ss2.width = ((num_entries - 1) & 0x7f);
690 ss->ss2.height = (((num_entries - 1) >> 7) & 0x3fff);
692 ss->ss3.depth = (((num_entries - 1) >> 21) & 0x3f);
693 ss->ss3.pitch = buffer_surface->pitch - 1;
697 gen7_gpe_buffer_suface_setup(VADriverContextP ctx,
698 struct i965_gpe_context *gpe_context,
699 struct i965_buffer_surface *buffer_surface,
700 unsigned long binding_table_offset,
701 unsigned long surface_state_offset)
703 struct gen7_surface_state *ss;
706 bo = gpe_context->surface_state_binding_table.bo;
710 ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
711 gen7_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
712 dri_bo_emit_reloc(bo,
713 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
715 surface_state_offset + offsetof(struct gen7_surface_state, ss1),
718 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
723 gen8_gpe_set_surface2_state(VADriverContextP ctx,
724 struct object_surface *obj_surface,
725 struct gen8_surface_state2 *ss)
727 struct i965_driver_data *i965 = i965_driver_data(ctx);
729 unsigned int tiling, swizzle;
731 assert(obj_surface->bo);
732 assert(obj_surface->fourcc == VA_FOURCC_NV12);
734 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
735 w = obj_surface->orig_width;
736 h = obj_surface->orig_height;
737 w_pitch = obj_surface->width;
739 memset(ss, 0, sizeof(*ss));
741 if (IS_GEN9(i965->intel.device_info))
742 ss->ss5.surface_object_mocs = GEN9_CACHE_PTE;
744 ss->ss6.base_addr = (uint32_t)obj_surface->bo->offset64;
745 ss->ss7.base_addr_high = (uint32_t)(obj_surface->bo->offset64 >> 32);
747 ss->ss1.cbcr_pixel_offset_v_direction = 2;
748 ss->ss1.width = w - 1;
749 ss->ss1.height = h - 1;
751 ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
752 ss->ss2.interleave_chroma = 1;
753 ss->ss2.pitch = w_pitch - 1;
754 ss->ss2.half_pitch_for_chroma = 0;
755 gen8_gpe_set_surface2_tiling(ss, tiling);
756 /* ss3: UV offset for interleave mode */
757 ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
758 ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
762 gen8_gpe_surface2_setup(VADriverContextP ctx,
763 struct i965_gpe_context *gpe_context,
764 struct object_surface *obj_surface,
765 unsigned long binding_table_offset,
766 unsigned long surface_state_offset)
768 struct gen8_surface_state2 *ss;
771 bo = gpe_context->surface_state_binding_table.bo;
775 ss = (struct gen8_surface_state2 *)((char *)bo->virtual + surface_state_offset);
776 gen8_gpe_set_surface2_state(ctx, obj_surface, ss);
777 dri_bo_emit_reloc(bo,
778 I915_GEM_DOMAIN_RENDER, 0,
780 surface_state_offset + offsetof(struct gen8_surface_state2, ss6),
783 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
788 gen8_gpe_set_media_rw_surface_state(VADriverContextP ctx,
789 struct object_surface *obj_surface,
790 struct gen8_surface_state *ss)
792 struct i965_driver_data *i965 = i965_driver_data(ctx);
794 unsigned int tiling, swizzle;
796 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
797 w = obj_surface->orig_width;
798 h = obj_surface->orig_height;
799 w_pitch = obj_surface->width;
801 memset(ss, 0, sizeof(*ss));
803 if (IS_GEN9(i965->intel.device_info))
804 ss->ss1.surface_mocs = GEN9_CACHE_PTE;
806 ss->ss0.surface_type = I965_SURFACE_2D;
807 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
809 ss->ss8.base_addr = (uint32_t)obj_surface->bo->offset64;
810 ss->ss9.base_addr_high = (uint32_t)(obj_surface->bo->offset64 >> 32);
812 ss->ss2.width = w / 4 - 1; /* in DWORDs for media read & write message */
813 ss->ss2.height = h - 1;
815 ss->ss3.pitch = w_pitch - 1;
816 gen8_gpe_set_surface_tiling(ss, tiling);
820 gen8_gpe_set_media_chroma_surface_state(VADriverContextP ctx,
821 struct object_surface *obj_surface,
822 struct gen8_surface_state *ss)
824 struct i965_driver_data *i965 = i965_driver_data(ctx);
826 unsigned int tiling, swizzle;
828 uint64_t base_offset;
830 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
831 w = obj_surface->orig_width;
832 w_pitch = obj_surface->width;
834 cbcr_offset = obj_surface->height * obj_surface->width;
835 memset(ss, 0, sizeof(*ss));
837 if (IS_GEN9(i965->intel.device_info))
838 ss->ss1.surface_mocs = GEN9_CACHE_PTE;
840 ss->ss0.surface_type = I965_SURFACE_2D;
841 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
843 base_offset = obj_surface->bo->offset64 + cbcr_offset;
844 ss->ss8.base_addr = (uint32_t) base_offset;
845 ss->ss9.base_addr_high = (uint32_t) (base_offset >> 32);
847 ss->ss2.width = w / 4 - 1; /* in DWORDs for media read & write message */
848 ss->ss2.height = (obj_surface->height / 2) -1;
850 ss->ss3.pitch = w_pitch - 1;
851 gen8_gpe_set_surface_tiling(ss, tiling);
855 gen8_gpe_media_rw_surface_setup(VADriverContextP ctx,
856 struct i965_gpe_context *gpe_context,
857 struct object_surface *obj_surface,
858 unsigned long binding_table_offset,
859 unsigned long surface_state_offset,
862 struct gen8_surface_state *ss;
865 bo = gpe_context->surface_state_binding_table.bo;
866 dri_bo_map(bo, True);
869 ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
870 gen8_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
871 dri_bo_emit_reloc(bo,
872 I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
874 surface_state_offset + offsetof(struct gen8_surface_state, ss8),
877 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
882 gen8_gpe_media_chroma_surface_setup(VADriverContextP ctx,
883 struct i965_gpe_context *gpe_context,
884 struct object_surface *obj_surface,
885 unsigned long binding_table_offset,
886 unsigned long surface_state_offset,
889 struct gen8_surface_state *ss;
893 assert(obj_surface->fourcc == VA_FOURCC_NV12);
894 bo = gpe_context->surface_state_binding_table.bo;
895 dri_bo_map(bo, True);
898 cbcr_offset = obj_surface->height * obj_surface->width;
899 ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
900 gen8_gpe_set_media_chroma_surface_state(ctx, obj_surface, ss);
901 dri_bo_emit_reloc(bo,
902 I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
904 surface_state_offset + offsetof(struct gen8_surface_state, ss8),
907 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
913 gen8_gpe_set_buffer_surface_state(VADriverContextP ctx,
914 struct i965_buffer_surface *buffer_surface,
915 struct gen8_surface_state *ss)
917 struct i965_driver_data *i965 = i965_driver_data(ctx);
920 assert(buffer_surface->bo);
921 num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
923 memset(ss, 0, sizeof(*ss));
925 ss->ss0.surface_type = I965_SURFACE_BUFFER;
926 if (IS_GEN9(i965->intel.device_info))
927 ss->ss1.surface_mocs = GEN9_CACHE_PTE;
930 ss->ss8.base_addr = (uint32_t)buffer_surface->bo->offset64;
931 ss->ss9.base_addr_high = (uint32_t)(buffer_surface->bo->offset64 >> 32);
933 ss->ss2.width = ((num_entries - 1) & 0x7f);
934 ss->ss2.height = (((num_entries - 1) >> 7) & 0x3fff);
936 ss->ss3.depth = (((num_entries - 1) >> 21) & 0x3f);
937 ss->ss3.pitch = buffer_surface->pitch - 1;
941 gen8_gpe_buffer_suface_setup(VADriverContextP ctx,
942 struct i965_gpe_context *gpe_context,
943 struct i965_buffer_surface *buffer_surface,
944 unsigned long binding_table_offset,
945 unsigned long surface_state_offset)
947 struct gen8_surface_state *ss;
950 bo = gpe_context->surface_state_binding_table.bo;
954 ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
955 gen8_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
956 dri_bo_emit_reloc(bo,
957 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
959 surface_state_offset + offsetof(struct gen8_surface_state, ss8),
962 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
967 gen8_gpe_state_base_address(VADriverContextP ctx,
968 struct i965_gpe_context *gpe_context,
969 struct intel_batchbuffer *batch)
971 BEGIN_BATCH(batch, 16);
973 OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 14);
975 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //General State Base Address
979 /*DW4 Surface state base address */
980 OUT_RELOC64(batch, gpe_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
982 /*DW6. Dynamic state base address */
983 if (gpe_context->dynamic_state.bo)
984 OUT_RELOC64(batch, gpe_context->dynamic_state.bo,
985 I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
986 0, BASE_ADDRESS_MODIFY);
988 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
993 /*DW8. Indirect Object base address */
994 if (gpe_context->indirect_state.bo)
995 OUT_RELOC64(batch, gpe_context->indirect_state.bo,
996 I915_GEM_DOMAIN_SAMPLER,
997 0, BASE_ADDRESS_MODIFY);
999 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1000 OUT_BATCH(batch, 0);
1004 /*DW10. Instruct base address */
1005 if (gpe_context->instruction_state.bo)
1006 OUT_RELOC64(batch, gpe_context->instruction_state.bo,
1007 I915_GEM_DOMAIN_INSTRUCTION,
1008 0, BASE_ADDRESS_MODIFY);
1010 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1011 OUT_BATCH(batch, 0);
1014 /* DW12. Size limitation */
1015 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //General State Access Upper Bound
1016 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Dynamic State Access Upper Bound
1017 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Indirect Object Access Upper Bound
1018 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Instruction Access Upper Bound
1021 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //LLC Coherent Base Address
1022 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY ); //LLC Coherent Upper Bound
1025 ADVANCE_BATCH(batch);
1029 gen8_gpe_vfe_state(VADriverContextP ctx,
1030 struct i965_gpe_context *gpe_context,
1031 struct intel_batchbuffer *batch)
1034 BEGIN_BATCH(batch, 9);
1036 OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (9 - 2));
1037 /* Scratch Space Base Pointer and Space */
1038 OUT_BATCH(batch, 0);
1039 OUT_BATCH(batch, 0);
1042 gpe_context->vfe_state.max_num_threads << 16 | /* Maximum Number of Threads */
1043 gpe_context->vfe_state.num_urb_entries << 8 | /* Number of URB Entries */
1044 gpe_context->vfe_state.gpgpu_mode << 2); /* MEDIA Mode */
1045 OUT_BATCH(batch, 0); /* Debug: Object ID */
1047 gpe_context->vfe_state.urb_entry_size << 16 | /* URB Entry Allocation Size */
1048 gpe_context->vfe_state.curbe_allocation_size); /* CURBE Allocation Size */
1050 /* the vfe_desc5/6/7 will decide whether the scoreboard is used. */
1051 OUT_BATCH(batch, gpe_context->vfe_desc5.dword);
1052 OUT_BATCH(batch, gpe_context->vfe_desc6.dword);
1053 OUT_BATCH(batch, gpe_context->vfe_desc7.dword);
1055 ADVANCE_BATCH(batch);
1061 gen8_gpe_curbe_load(VADriverContextP ctx,
1062 struct i965_gpe_context *gpe_context,
1063 struct intel_batchbuffer *batch)
1065 BEGIN_BATCH(batch, 4);
1067 OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
1068 OUT_BATCH(batch, 0);
1069 OUT_BATCH(batch, gpe_context->curbe.length);
1070 OUT_BATCH(batch, gpe_context->curbe.offset);
1072 ADVANCE_BATCH(batch);
1076 gen8_gpe_idrt(VADriverContextP ctx,
1077 struct i965_gpe_context *gpe_context,
1078 struct intel_batchbuffer *batch)
1080 BEGIN_BATCH(batch, 6);
1082 OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH);
1083 OUT_BATCH(batch, 0);
1085 OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | (4 - 2));
1086 OUT_BATCH(batch, 0);
1087 OUT_BATCH(batch, gpe_context->idrt.max_entries * gpe_context->idrt.entry_size);
1088 OUT_BATCH(batch, gpe_context->idrt.offset);
1090 ADVANCE_BATCH(batch);
1095 gen8_gpe_pipeline_setup(VADriverContextP ctx,
1096 struct i965_gpe_context *gpe_context,
1097 struct intel_batchbuffer *batch)
1099 intel_batchbuffer_emit_mi_flush(batch);
1101 i965_gpe_select(ctx, gpe_context, batch);
1102 gen8_gpe_state_base_address(ctx, gpe_context, batch);
1103 gen8_gpe_vfe_state(ctx, gpe_context, batch);
1104 gen8_gpe_curbe_load(ctx, gpe_context, batch);
1105 gen8_gpe_idrt(ctx, gpe_context, batch);
1109 gen8_gpe_context_init(VADriverContextP ctx,
1110 struct i965_gpe_context *gpe_context)
1112 struct i965_driver_data *i965 = i965_driver_data(ctx);
1115 unsigned int start_offset, end_offset;
1117 dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
1118 bo = dri_bo_alloc(i965->intel.bufmgr,
1119 "surface state & binding table",
1120 gpe_context->surface_state_binding_table.length,
1123 gpe_context->surface_state_binding_table.bo = bo;
1125 bo_size = gpe_context->idrt.max_entries * gpe_context->idrt.entry_size + gpe_context->curbe.length + gpe_context->sampler_size + 192;
1126 dri_bo_unreference(gpe_context->dynamic_state.bo);
1127 bo = dri_bo_alloc(i965->intel.bufmgr,
1128 "surface state & binding table",
1132 gpe_context->dynamic_state.bo = bo;
1133 gpe_context->dynamic_state.bo_size = bo_size;
1136 gpe_context->dynamic_state.end_offset = 0;
1138 /* Constant buffer offset */
1139 start_offset = ALIGN(end_offset, 64);
1140 dri_bo_unreference(gpe_context->curbe.bo);
1141 gpe_context->curbe.bo = bo;
1142 dri_bo_reference(gpe_context->curbe.bo);
1143 gpe_context->curbe.offset = start_offset;
1144 end_offset = start_offset + gpe_context->curbe.length;
1146 /* Interface descriptor offset */
1147 start_offset = ALIGN(end_offset, 64);
1148 dri_bo_unreference(gpe_context->idrt.bo);
1149 gpe_context->idrt.bo = bo;
1150 dri_bo_reference(gpe_context->idrt.bo);
1151 gpe_context->idrt.offset = start_offset;
1152 end_offset = start_offset + gpe_context->idrt.entry_size * gpe_context->idrt.max_entries;
1154 /* Sampler state offset */
1155 start_offset = ALIGN(end_offset, 64);
1156 gpe_context->sampler_offset = start_offset;
1157 end_offset = start_offset + gpe_context->sampler_size;
1159 /* update the end offset of dynamic_state */
1160 gpe_context->dynamic_state.end_offset = end_offset;
1165 gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context)
1167 dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
1168 gpe_context->surface_state_binding_table.bo = NULL;
1170 dri_bo_unreference(gpe_context->instruction_state.bo);
1171 gpe_context->instruction_state.bo = NULL;
1173 dri_bo_unreference(gpe_context->dynamic_state.bo);
1174 gpe_context->dynamic_state.bo = NULL;
1176 dri_bo_unreference(gpe_context->indirect_state.bo);
1177 gpe_context->indirect_state.bo = NULL;
1179 dri_bo_unreference(gpe_context->curbe.bo);
1180 gpe_context->curbe.bo = NULL;
1182 dri_bo_unreference(gpe_context->idrt.bo);
1183 gpe_context->idrt.bo = NULL;
1188 gen8_gpe_load_kernels(VADriverContextP ctx,
1189 struct i965_gpe_context *gpe_context,
1190 struct i965_kernel *kernel_list,
1191 unsigned int num_kernels)
1193 struct i965_driver_data *i965 = i965_driver_data(ctx);
1194 int i, kernel_size = 0;
1195 unsigned int kernel_offset, end_offset;
1196 unsigned char *kernel_ptr;
1197 struct i965_kernel *kernel;
1199 assert(num_kernels <= MAX_GPE_KERNELS);
1200 memcpy(gpe_context->kernels, kernel_list, sizeof(*kernel_list) * num_kernels);
1201 gpe_context->num_kernels = num_kernels;
1203 for (i = 0; i < num_kernels; i++) {
1204 kernel = &gpe_context->kernels[i];
1206 kernel_size += ALIGN(kernel->size, 64);
1209 gpe_context->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
1213 if (gpe_context->instruction_state.bo == NULL) {
1214 WARN_ONCE("failure to allocate the buffer space for kernel shader\n");
1218 assert(gpe_context->instruction_state.bo);
1220 gpe_context->instruction_state.bo_size = kernel_size;
1221 gpe_context->instruction_state.end_offset = 0;
1224 dri_bo_map(gpe_context->instruction_state.bo, 1);
1225 kernel_ptr = (unsigned char *)(gpe_context->instruction_state.bo->virtual);
1226 for (i = 0; i < num_kernels; i++) {
1227 kernel_offset = ALIGN(end_offset, 64);
1228 kernel = &gpe_context->kernels[i];
1229 kernel->kernel_offset = kernel_offset;
1232 memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size);
1234 end_offset = kernel_offset + kernel->size;
1238 gpe_context->instruction_state.end_offset = end_offset;
1240 dri_bo_unmap(gpe_context->instruction_state.bo);
1246 gen9_gpe_state_base_address(VADriverContextP ctx,
1247 struct i965_gpe_context *gpe_context,
1248 struct intel_batchbuffer *batch)
1250 BEGIN_BATCH(batch, 19);
1252 OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (19 - 2));
1254 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //General State Base Address
1255 OUT_BATCH(batch, 0);
1256 OUT_BATCH(batch, 0);
1258 /*DW4 Surface state base address */
1259 OUT_RELOC64(batch, gpe_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
1261 /*DW6. Dynamic state base address */
1262 if (gpe_context->dynamic_state.bo)
1263 OUT_RELOC64(batch, gpe_context->dynamic_state.bo,
1264 I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
1265 I915_GEM_DOMAIN_RENDER, BASE_ADDRESS_MODIFY);
1267 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1268 OUT_BATCH(batch, 0);
1272 /*DW8. Indirect Object base address */
1273 if (gpe_context->indirect_state.bo)
1274 OUT_RELOC64(batch, gpe_context->indirect_state.bo,
1275 I915_GEM_DOMAIN_SAMPLER,
1276 0, BASE_ADDRESS_MODIFY);
1278 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1279 OUT_BATCH(batch, 0);
1283 /*DW10. Instruct base address */
1284 if (gpe_context->instruction_state.bo)
1285 OUT_RELOC64(batch, gpe_context->instruction_state.bo,
1286 I915_GEM_DOMAIN_INSTRUCTION,
1287 0, BASE_ADDRESS_MODIFY);
1289 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1290 OUT_BATCH(batch, 0);
1294 /* DW12. Size limitation */
1295 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //General State Access Upper Bound
1296 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Dynamic State Access Upper Bound
1297 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Indirect Object Access Upper Bound
1298 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Instruction Access Upper Bound
1300 /* the bindless surface state address */
1301 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1302 OUT_BATCH(batch, 0);
1303 OUT_BATCH(batch, 0xFFFFF000);
1305 ADVANCE_BATCH(batch);
1309 gen9_gpe_select(VADriverContextP ctx,
1310 struct i965_gpe_context *gpe_context,
1311 struct intel_batchbuffer *batch)
1313 BEGIN_BATCH(batch, 1);
1314 OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA |
1315 GEN9_PIPELINE_SELECTION_MASK |
1316 GEN9_MEDIA_DOP_GATE_OFF |
1317 GEN9_MEDIA_DOP_GATE_MASK |
1318 GEN9_FORCE_MEDIA_AWAKE_ON |
1319 GEN9_FORCE_MEDIA_AWAKE_MASK);
1320 ADVANCE_BATCH(batch);
1324 gen9_gpe_pipeline_setup(VADriverContextP ctx,
1325 struct i965_gpe_context *gpe_context,
1326 struct intel_batchbuffer *batch)
1328 intel_batchbuffer_emit_mi_flush(batch);
1330 gen9_gpe_select(ctx, gpe_context, batch);
1331 gen9_gpe_state_base_address(ctx, gpe_context, batch);
1332 gen8_gpe_vfe_state(ctx, gpe_context, batch);
1333 gen8_gpe_curbe_load(ctx, gpe_context, batch);
1334 gen8_gpe_idrt(ctx, gpe_context, batch);
1338 gen9_gpe_pipeline_end(VADriverContextP ctx,
1339 struct i965_gpe_context *gpe_context,
1340 struct intel_batchbuffer *batch)
1342 BEGIN_BATCH(batch, 1);
1343 OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA |
1344 GEN9_PIPELINE_SELECTION_MASK |
1345 GEN9_MEDIA_DOP_GATE_ON |
1346 GEN9_MEDIA_DOP_GATE_MASK |
1347 GEN9_FORCE_MEDIA_AWAKE_OFF |
1348 GEN9_FORCE_MEDIA_AWAKE_MASK);
1349 ADVANCE_BATCH(batch);
1353 i965_allocate_gpe_resource(dri_bufmgr *bufmgr,
1354 struct i965_gpe_resource *res,
1362 res->bo = dri_bo_alloc(bufmgr, name, res->size, 4096);
1365 return (res->bo != NULL);
1369 i965_object_surface_to_2d_gpe_resource(struct i965_gpe_resource *res,
1370 struct object_surface *obj_surface)
1372 unsigned int swizzle;
1374 res->type = I965_GPE_RESOURCE_2D;
1375 res->width = obj_surface->orig_width;
1376 res->height = obj_surface->orig_height;
1377 res->pitch = obj_surface->width;
1378 res->size = obj_surface->size;
1379 res->cb_cr_pitch = obj_surface->cb_cr_pitch;
1380 res->x_cb_offset = obj_surface->x_cb_offset;
1381 res->y_cb_offset = obj_surface->y_cb_offset;
1382 res->bo = obj_surface->bo;
1385 dri_bo_reference(res->bo);
1386 dri_bo_get_tiling(obj_surface->bo, &res->tiling, &swizzle);
1390 i965_dri_object_to_buffer_gpe_resource(struct i965_gpe_resource *res,
1393 unsigned int swizzle;
1395 res->type = I965_GPE_RESOURCE_BUFFER;
1396 res->width = bo->size;
1398 res->pitch = res->width;
1399 res->size = res->pitch * res->width;
1403 dri_bo_reference(res->bo);
1404 dri_bo_get_tiling(res->bo, &res->tiling, &swizzle);
1408 i965_gpe_dri_object_to_2d_gpe_resource(struct i965_gpe_resource *res,
1411 unsigned int height,
1414 unsigned int swizzle;
1416 res->type = I965_GPE_RESOURCE_2D;
1418 res->height = height;
1420 res->size = res->pitch * res->width;
1424 dri_bo_reference(res->bo);
1425 dri_bo_get_tiling(res->bo, &res->tiling, &swizzle);
1429 i965_zero_gpe_resource(struct i965_gpe_resource *res)
1432 dri_bo_map(res->bo, 1);
1433 memset(res->bo->virtual, 0, res->size);
1434 dri_bo_unmap(res->bo);
1439 i965_free_gpe_resource(struct i965_gpe_resource *res)
1441 dri_bo_unreference(res->bo);
1447 i965_map_gpe_resource(struct i965_gpe_resource *res)
1452 ret = dri_bo_map(res->bo, 1);
1455 res->map = res->bo->virtual;
1465 i965_unmap_gpe_resource(struct i965_gpe_resource *res)
1467 if (res->bo && res->map)
1468 dri_bo_unmap(res->bo);
1474 gen9_gpe_mi_flush_dw(VADriverContextP ctx,
1475 struct intel_batchbuffer *batch,
1476 struct gpe_mi_flush_dw_parameter *params)
1478 int video_pipeline_cache_invalidate = 0;
1479 int post_sync_operation = MI_FLUSH_DW_NOWRITE;
1481 if (params->video_pipeline_cache_invalidate)
1482 video_pipeline_cache_invalidate = MI_FLUSH_DW_VIDEO_PIPELINE_CACHE_INVALIDATE;
1485 post_sync_operation = MI_FLUSH_DW_WRITE_QWORD;
1487 __OUT_BATCH(batch, (MI_FLUSH_DW2 |
1488 video_pipeline_cache_invalidate |
1489 post_sync_operation |
1490 (5 - 2))); /* Always use PPGTT */
1493 __OUT_RELOC64(batch,
1495 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1498 __OUT_BATCH(batch, 0);
1499 __OUT_BATCH(batch, 0);
1502 __OUT_BATCH(batch, params->dw0);
1503 __OUT_BATCH(batch, params->dw1);
1507 gen9_gpe_mi_store_data_imm(VADriverContextP ctx,
1508 struct intel_batchbuffer *batch,
1509 struct gpe_mi_store_data_imm_parameter *params)
1511 if (params->is_qword) {
1512 __OUT_BATCH(batch, MI_STORE_DATA_IMM |
1514 (5 - 2)); /* Always use PPGTT */
1516 __OUT_BATCH(batch, MI_STORE_DATA_IMM | (4 - 2)); /* Always use PPGTT */
1519 __OUT_RELOC64(batch,
1521 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1523 __OUT_BATCH(batch, params->dw0);
1525 if (params->is_qword)
1526 __OUT_BATCH(batch, params->dw1);
1530 gen9_gpe_mi_store_register_mem(VADriverContextP ctx,
1531 struct intel_batchbuffer *batch,
1532 struct gpe_mi_store_register_mem_parameter *params)
1534 __OUT_BATCH(batch, (MI_STORE_REGISTER_MEM | (4 - 2))); /* Always use PPGTT */
1535 __OUT_BATCH(batch, params->mmio_offset);
1536 __OUT_RELOC64(batch,
1538 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1543 gen9_gpe_mi_load_register_mem(VADriverContextP ctx,
1544 struct intel_batchbuffer *batch,
1545 struct gpe_mi_load_register_mem_parameter *params)
1547 __OUT_BATCH(batch, (MI_LOAD_REGISTER_MEM | (4 - 2))); /* Always use PPGTT */
1548 __OUT_BATCH(batch, params->mmio_offset);
1549 __OUT_RELOC64(batch,
1551 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1556 gen9_gpe_mi_load_register_imm(VADriverContextP ctx,
1557 struct intel_batchbuffer *batch,
1558 struct gpe_mi_load_register_imm_parameter *params)
1560 __OUT_BATCH(batch, (MI_LOAD_REGISTER_IMM | (3 - 2)));
1561 __OUT_BATCH(batch, params->mmio_offset);
1562 __OUT_BATCH(batch, params->data);
1566 gen9_gpe_mi_load_register_reg(VADriverContextP ctx,
1567 struct intel_batchbuffer *batch,
1568 struct gpe_mi_load_register_reg_parameter *params)
1570 __OUT_BATCH(batch, (MI_LOAD_REGISTER_REG | (3 - 2)));
1571 __OUT_BATCH(batch, params->src_mmio_offset);
1572 __OUT_BATCH(batch, params->dst_mmio_offset);
1576 gen9_gpe_mi_math(VADriverContextP ctx,
1577 struct intel_batchbuffer *batch,
1578 struct gpe_mi_math_parameter *params)
1580 __OUT_BATCH(batch, (MI_MATH | (params->num_instructions - 1)));
1581 intel_batchbuffer_data(batch, params->instruction_list, params->num_instructions * 4);
1585 gen9_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,
1586 struct intel_batchbuffer *batch,
1587 struct gpe_mi_conditional_batch_buffer_end_parameter *params)
1589 int compare_mask_mode_enabled = MI_COMPARE_MASK_MODE_ENANBLED;
1591 if (params->compare_mask_mode_disabled)
1592 compare_mask_mode_enabled = 0;
1594 __OUT_BATCH(batch, (MI_CONDITIONAL_BATCH_BUFFER_END |
1596 compare_mask_mode_enabled |
1597 (4 - 2))); /* Always use PPGTT */
1598 __OUT_BATCH(batch, params->compare_data);
1599 __OUT_RELOC64(batch,
1601 I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
1606 gen9_gpe_mi_batch_buffer_start(VADriverContextP ctx,
1607 struct intel_batchbuffer *batch,
1608 struct gpe_mi_batch_buffer_start_parameter *params)
1610 __OUT_BATCH(batch, (MI_BATCH_BUFFER_START |
1611 (!!params->is_second_level << 22) |
1612 (!params->use_global_gtt << 8) |
1614 __OUT_RELOC64(batch,
1616 I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
1621 gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
1622 struct i965_gpe_context *gpe_context,
1623 struct gpe_dynamic_state_parameter *ds)
1625 if (!ds->bo || !gpe_context)
1628 dri_bo_unreference(gpe_context->dynamic_state.bo);
1629 gpe_context->dynamic_state.bo = ds->bo;
1630 dri_bo_reference(gpe_context->dynamic_state.bo);
1631 gpe_context->dynamic_state.bo_size = ds->bo_size;
1633 /* curbe buffer is a part of the dynamic buffer */
1634 dri_bo_unreference(gpe_context->curbe.bo);
1635 gpe_context->curbe.bo = ds->bo;
1636 dri_bo_reference(gpe_context->curbe.bo);
1637 gpe_context->curbe.offset = ds->curbe_offset;
1639 /* idrt buffer is a part of the dynamic buffer */
1640 dri_bo_unreference(gpe_context->idrt.bo);
1641 gpe_context->idrt.bo = ds->bo;
1642 dri_bo_reference(gpe_context->idrt.bo);
1643 gpe_context->idrt.offset = ds->idrt_offset;
1645 gpe_context->sampler_offset = ds->sampler_offset;
1651 gen8p_gpe_context_map_curbe(struct i965_gpe_context *gpe_context)
1653 dri_bo_map(gpe_context->curbe.bo, 1);
1655 return (char *)gpe_context->curbe.bo->virtual + gpe_context->curbe.offset;
1659 gen8p_gpe_context_unmap_curbe(struct i965_gpe_context *gpe_context)
1661 dri_bo_unmap(gpe_context->curbe.bo);
1665 gen9_gpe_reset_binding_table(VADriverContextP ctx,
1666 struct i965_gpe_context *gpe_context)
1668 unsigned int *binding_table;
1669 unsigned int binding_table_offset = gpe_context->surface_state_binding_table.binding_table_offset;
1672 dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
1673 binding_table = (unsigned int*)((char *)gpe_context->surface_state_binding_table.bo->virtual + binding_table_offset);
1675 for (i = 0; i < gpe_context->surface_state_binding_table.max_entries; i++) {
1676 *(binding_table + i) = gpe_context->surface_state_binding_table.surface_state_offset + i * SURFACE_STATE_PADDED_SIZE_GEN9;
1679 dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
1683 gen8_gpe_setup_interface_data(VADriverContextP ctx,
1684 struct i965_gpe_context *gpe_context)
1686 struct gen8_interface_descriptor_data *desc;
1689 unsigned char *desc_ptr;
1691 bo = gpe_context->idrt.bo;
1693 assert(bo->virtual);
1694 desc_ptr = (unsigned char *)bo->virtual + gpe_context->idrt.offset;
1695 desc = (struct gen8_interface_descriptor_data *)desc_ptr;
1697 for (i = 0; i < gpe_context->num_kernels; i++) {
1698 struct i965_kernel *kernel;
1700 kernel = &gpe_context->kernels[i];
1701 assert(sizeof(*desc) == 32);
1703 /*Setup the descritor table*/
1704 memset(desc, 0, sizeof(*desc));
1705 desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
1706 desc->desc3.sampler_count = 0;
1707 desc->desc3.sampler_state_pointer = (gpe_context->sampler_offset >> 5);
1708 desc->desc4.binding_table_entry_count = 0;
1709 desc->desc4.binding_table_pointer = (gpe_context->surface_state_binding_table.binding_table_offset >> 5);
1710 desc->desc5.constant_urb_entry_read_offset = 0;
1711 desc->desc5.constant_urb_entry_read_length = ALIGN(gpe_context->curbe.length, 32) >> 5; // in registers
1720 gen9_gpe_set_surface_tiling(struct gen9_surface_state *ss, unsigned int tiling)
1723 case I915_TILING_NONE:
1724 ss->ss0.tiled_surface = 0;
1725 ss->ss0.tile_walk = 0;
1728 ss->ss0.tiled_surface = 1;
1729 ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
1732 ss->ss0.tiled_surface = 1;
1733 ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
1739 gen9_gpe_set_surface2_tiling(struct gen9_surface_state2 *ss, unsigned int tiling)
1742 case I915_TILING_NONE:
1743 ss->ss2.tiled_surface = 0;
1744 ss->ss2.tile_walk = 0;
1747 ss->ss2.tiled_surface = 1;
1748 ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1751 ss->ss2.tiled_surface = 1;
1752 ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1758 gen9_gpe_set_2d_surface_state(struct gen9_surface_state *ss,
1759 unsigned int cacheability_control,
1760 unsigned int format,
1761 unsigned int tiling,
1763 unsigned int height,
1765 uint64_t base_offset,
1766 unsigned int y_offset)
1768 memset(ss, 0, sizeof(*ss));
1770 /* Always set 1(align 4 mode) */
1771 ss->ss0.vertical_alignment = 1;
1772 ss->ss0.horizontal_alignment = 1;
1774 ss->ss0.surface_format = format;
1775 ss->ss0.surface_type = I965_SURFACE_2D;
1777 ss->ss1.surface_mocs = cacheability_control;
1779 ss->ss2.width = width - 1;
1780 ss->ss2.height = height - 1;
1782 ss->ss3.pitch = pitch - 1;
1784 ss->ss5.y_offset = y_offset;
1786 ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
1787 ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
1788 ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
1789 ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
1791 ss->ss8.base_addr = (uint32_t)base_offset;
1792 ss->ss9.base_addr_high = (uint32_t)(base_offset >> 32);
1794 gen9_gpe_set_surface_tiling(ss, tiling);
1797 /* This is only for NV12 format */
1799 gen9_gpe_set_adv_surface_state(struct gen9_surface_state2 *ss,
1800 unsigned int v_direction,
1801 unsigned int cacheability_control,
1802 unsigned int format,
1803 unsigned int tiling,
1805 unsigned int height,
1807 uint64_t base_offset,
1808 unsigned int y_cb_offset)
1810 memset(ss, 0, sizeof(*ss));
1812 ss->ss1.cbcr_pixel_offset_v_direction = v_direction;
1813 ss->ss1.width = width - 1;
1814 ss->ss1.height = height - 1;
1816 ss->ss2.surface_format = format;
1817 ss->ss2.interleave_chroma = 1;
1818 ss->ss2.pitch = pitch - 1;
1820 ss->ss3.y_offset_for_cb = y_cb_offset;
1822 ss->ss5.surface_object_mocs = cacheability_control;
1824 ss->ss6.base_addr = (uint32_t)base_offset;
1825 ss->ss7.base_addr_high = (uint32_t)(base_offset >> 32);
1827 gen9_gpe_set_surface2_tiling(ss, tiling);
1831 gen9_gpe_set_buffer2_surface_state(struct gen9_surface_state *ss,
1832 unsigned int cacheability_control,
1833 unsigned int format,
1836 uint64_t base_offset)
1838 memset(ss, 0, sizeof(*ss));
1840 ss->ss0.surface_format = format;
1841 ss->ss0.surface_type = I965_SURFACE_BUFFER;
1843 ss->ss1.surface_mocs = cacheability_control;
1845 ss->ss2.width = (size - 1) & 0x7F;
1846 ss->ss2.height = ((size - 1) & 0x1FFF80) >> 7;
1848 ss->ss3.depth = ((size - 1) & 0xFE00000) >> 21;
1849 ss->ss3.pitch = pitch - 1;
1851 ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
1852 ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
1853 ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
1854 ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
1856 ss->ss8.base_addr = (uint32_t)base_offset;
1857 ss->ss9.base_addr_high = (uint32_t)(base_offset >> 32);
1861 gen9_gpe_context_add_surface(struct i965_gpe_context *gpe_context,
1862 struct i965_gpe_surface *gpe_surface,
1866 unsigned int tiling, swizzle, width, height, pitch, tile_alignment, y_offset = 0;
1867 unsigned int surface_state_offset = gpe_context->surface_state_binding_table.surface_state_offset +
1868 index * SURFACE_STATE_PADDED_SIZE_GEN9;
1869 unsigned int binding_table_offset = gpe_context->surface_state_binding_table.binding_table_offset +
1871 struct i965_gpe_resource *gpe_resource = gpe_surface->gpe_resource;
1873 dri_bo_get_tiling(gpe_resource->bo, &tiling, &swizzle);
1875 dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
1876 buf = (char *)gpe_context->surface_state_binding_table.bo->virtual;
1877 *((unsigned int *)(buf + binding_table_offset)) = surface_state_offset;
1879 if (gpe_surface->is_2d_surface && gpe_surface->is_override_offset) {
1880 struct gen9_surface_state *ss = (struct gen9_surface_state *)(buf + surface_state_offset);
1882 width = gpe_resource->width;
1883 height = gpe_resource->height;
1884 pitch = gpe_resource->pitch;
1886 if (gpe_surface->is_media_block_rw) {
1887 if (gpe_surface->is_16bpp)
1888 width = (ALIGN(width * 2, 4) >> 2);
1890 width = (ALIGN(width, 4) >> 2);
1894 gen9_gpe_set_2d_surface_state(ss,
1895 gpe_surface->cacheability_control,
1896 gpe_surface->format,
1898 width, height, pitch,
1899 gpe_resource->bo->offset64 + gpe_surface->offset,
1902 dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
1903 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1904 gpe_surface->offset,
1905 surface_state_offset + offsetof(struct gen9_surface_state, ss8),
1907 } else if (gpe_surface->is_2d_surface && gpe_surface->is_uv_surface) {
1908 unsigned int cbcr_offset;
1909 struct gen9_surface_state *ss = (struct gen9_surface_state *)(buf + surface_state_offset);
1911 width = gpe_resource->width;
1912 height = gpe_resource->height / 2;
1913 pitch = gpe_resource->pitch;
1915 if (gpe_surface->is_media_block_rw) {
1916 if (gpe_surface->is_16bpp)
1917 width = (ALIGN(width * 2, 4) >> 2);
1919 width = (ALIGN(width, 4) >> 2);
1922 if (tiling == I915_TILING_Y) {
1923 tile_alignment = 32;
1924 } else if (tiling == I915_TILING_X) {
1929 y_offset = (gpe_resource->y_cb_offset % tile_alignment);
1930 cbcr_offset = ALIGN_FLOOR(gpe_resource->y_cb_offset, tile_alignment) * pitch;
1932 gen9_gpe_set_2d_surface_state(ss,
1933 gpe_surface->cacheability_control,
1934 I965_SURFACEFORMAT_R16_UINT,
1936 width, height, pitch,
1937 gpe_resource->bo->offset64 + cbcr_offset,
1940 dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
1941 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1943 surface_state_offset + offsetof(struct gen9_surface_state, ss8),
1945 } else if (gpe_surface->is_2d_surface) {
1946 struct gen9_surface_state *ss = (struct gen9_surface_state *)(buf + surface_state_offset);
1948 width = gpe_resource->width;
1949 height = gpe_resource->height;
1950 pitch = gpe_resource->pitch;
1952 if (gpe_surface->is_media_block_rw) {
1953 if (gpe_surface->is_16bpp)
1954 width = (ALIGN(width * 2, 4) >> 2);
1956 width = (ALIGN(width, 4) >> 2);
1959 gen9_gpe_set_2d_surface_state(ss,
1960 gpe_surface->cacheability_control,
1961 gpe_surface->format,
1963 width, height, pitch,
1964 gpe_resource->bo->offset64,
1967 dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
1968 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1970 surface_state_offset + offsetof(struct gen9_surface_state, ss8),
1972 } else if (gpe_surface->is_adv_surface) {
1973 struct gen9_surface_state2 *ss = (struct gen9_surface_state2 *)(buf + surface_state_offset);
1975 width = gpe_resource->width;
1976 height = gpe_resource->height;
1977 pitch = gpe_resource->pitch;
1979 gen9_gpe_set_adv_surface_state(ss,
1980 gpe_surface->v_direction,
1981 gpe_surface->cacheability_control,
1982 MFX_SURFACE_PLANAR_420_8,
1984 width, height, pitch,
1985 gpe_resource->bo->offset64,
1986 gpe_resource->y_cb_offset);
1988 dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
1989 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1991 surface_state_offset + offsetof(struct gen9_surface_state2, ss6),
1994 struct gen9_surface_state *ss = (struct gen9_surface_state *)(buf + surface_state_offset);
1995 unsigned int format;
1997 assert(gpe_surface->is_buffer);
1999 if (gpe_surface->is_raw_buffer) {
2000 format = I965_SURFACEFORMAT_RAW;
2003 format = I965_SURFACEFORMAT_R32_UINT;
2004 pitch = sizeof(unsigned int);
2007 gen9_gpe_set_buffer2_surface_state(ss,
2008 gpe_surface->cacheability_control,
2012 gpe_resource->bo->offset64 + gpe_surface->offset);
2014 dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
2015 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
2016 gpe_surface->offset,
2017 surface_state_offset + offsetof(struct gen9_surface_state, ss8),
2021 dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
2025 i965_gpe_allocate_2d_resource(dri_bufmgr *bufmgr,
2026 struct i965_gpe_resource *res,
2037 res->type = I965_GPE_RESOURCE_2D;
2039 res->height = height;
2042 bo_size = ALIGN(height, 16) * pitch;
2043 res->size = bo_size;
2045 res->bo = dri_bo_alloc(bufmgr, name, res->size, 4096);
2052 gen8_gpe_media_state_flush(VADriverContextP ctx,
2053 struct i965_gpe_context *gpe_context,
2054 struct intel_batchbuffer *batch)
2056 BEGIN_BATCH(batch, 2);
2058 OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH | (2 - 2));
2059 OUT_BATCH(batch, 0);
2061 ADVANCE_BATCH(batch);
2065 gen8_gpe_media_object(VADriverContextP ctx,
2066 struct i965_gpe_context *gpe_context,
2067 struct intel_batchbuffer *batch,
2068 struct gpe_media_object_parameter *param)
2070 int batch_size, subdata_size;
2074 if (param->pinline_data && param->inline_size) {
2075 subdata_size = ALIGN(param->inline_size, 4);
2076 batch_size += subdata_size / 4;
2078 BEGIN_BATCH(batch, batch_size);
2079 OUT_BATCH(batch, CMD_MEDIA_OBJECT | (batch_size - 2));
2080 OUT_BATCH(batch, param->interface_offset);
2081 OUT_BATCH(batch, param->use_scoreboard << 21);
2082 OUT_BATCH(batch, 0);
2083 OUT_BATCH(batch, (param->scoreboard_y << 16 |
2084 param->scoreboard_x));
2085 OUT_BATCH(batch, param->scoreboard_mask);
2088 intel_batchbuffer_data(batch, param->pinline_data, subdata_size);
2090 ADVANCE_BATCH(batch);
2094 gen9_gpe_media_object_walker(VADriverContextP ctx,
2095 struct i965_gpe_context *gpe_context,
2096 struct intel_batchbuffer *batch,
2097 struct gpe_media_object_walker_parameter *param)
2102 if (param->inline_size)
2103 walker_length += ALIGN(param->inline_size, 4) / 4;
2104 BEGIN_BATCH(batch, walker_length);
2105 OUT_BATCH(batch, CMD_MEDIA_OBJECT_WALKER | (walker_length - 2));
2106 OUT_BATCH(batch, param->interface_offset);
2107 OUT_BATCH(batch, param->use_scoreboard << 21);
2108 OUT_BATCH(batch, 0);
2109 OUT_BATCH(batch, 0);
2110 OUT_BATCH(batch, (param->group_id_loop_select << 8 |
2111 param->scoreboard_mask)); // DW5
2112 OUT_BATCH(batch, (param->color_count_minus1 << 24 |
2113 param->middle_loop_extra_steps << 16 |
2114 param->mid_loop_unit_y << 12 |
2115 param->mid_loop_unit_x << 8));
2116 OUT_BATCH(batch, ((param->global_loop_exec_count & 0x3ff) << 16 |
2117 (param->local_loop_exec_count & 0x3ff)));
2118 OUT_BATCH(batch, param->block_resolution.value);
2119 OUT_BATCH(batch, param->local_start.value);
2120 OUT_BATCH(batch, 0); // DW10
2121 OUT_BATCH(batch, param->local_outer_loop_stride.value);
2122 OUT_BATCH(batch, param->local_inner_loop_unit.value);
2123 OUT_BATCH(batch, param->global_resolution.value);
2124 OUT_BATCH(batch, param->global_start.value);
2125 OUT_BATCH(batch, param->global_outer_loop_stride.value);
2126 OUT_BATCH(batch, param->global_inner_loop_unit.value);
2128 if (param->pinline_data && param->inline_size)
2129 intel_batchbuffer_data(batch, param->pinline_data, ALIGN(param->inline_size, 4));
2131 ADVANCE_BATCH(batch);
2136 intel_vpp_init_media_object_walker_parameter(struct intel_vpp_kernel_walker_parameter *kernel_walker_param,
2137 struct gpe_media_object_walker_parameter *walker_param)
2139 memset(walker_param, 0, sizeof(*walker_param));
2141 walker_param->use_scoreboard = kernel_walker_param->use_scoreboard;
2143 walker_param->block_resolution.x = kernel_walker_param->resolution_x;
2144 walker_param->block_resolution.y = kernel_walker_param->resolution_y;
2146 walker_param->global_resolution.x = kernel_walker_param->resolution_x;
2147 walker_param->global_resolution.y = kernel_walker_param->resolution_y;
2149 walker_param->global_outer_loop_stride.x = kernel_walker_param->resolution_x;
2150 walker_param->global_outer_loop_stride.y = 0;
2152 walker_param->global_inner_loop_unit.x = 0;
2153 walker_param->global_inner_loop_unit.y = kernel_walker_param->resolution_y;
2155 walker_param->local_loop_exec_count = 0xFFFF; //MAX VALUE
2156 walker_param->global_loop_exec_count = 0xFFFF; //MAX VALUE
2158 if (kernel_walker_param->no_dependency) {
2159 /* The no_dependency is used for VPP */
2160 walker_param->scoreboard_mask = 0;
2161 walker_param->use_scoreboard = 0;
2162 // Raster scan walking pattern
2163 walker_param->local_outer_loop_stride.x = 0;
2164 walker_param->local_outer_loop_stride.y = 1;
2165 walker_param->local_inner_loop_unit.x = 1;
2166 walker_param->local_inner_loop_unit.y = 0;
2167 walker_param->local_end.x = kernel_walker_param->resolution_x - 1;
2168 walker_param->local_end.y = 0;
2170 walker_param->local_end.x = 0;
2171 walker_param->local_end.y = 0;
2174 walker_param->scoreboard_mask = 0x0F;
2175 walker_param->local_outer_loop_stride.x = 1;
2176 walker_param->local_outer_loop_stride.y = 0;
2177 walker_param->local_inner_loop_unit.x = -2;
2178 walker_param->local_inner_loop_unit.y = 1;