2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Li Xiaowei <xiaowei.a.li@intel.com>
33 #include "intel_batchbuffer.h"
34 #include "intel_driver.h"
36 #include "i965_structs.h"
37 #include "i965_defines.h"
38 #include "i965_drv_video.h"
39 #include "gen75_vpp_gpe.h"
41 #define MAX_INTERFACE_DESC_GEN6 MAX_GPE_KERNELS
42 #define MAX_MEDIA_SURFACES_GEN6 34
44 #define SURFACE_STATE_OFFSET_GEN7(index) (SURFACE_STATE_PADDED_SIZE_GEN7 * (index))
45 #define BINDING_TABLE_OFFSET_GEN7(index) (SURFACE_STATE_OFFSET_GEN7(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
47 #define SURFACE_STATE_OFFSET_GEN8(index) (SURFACE_STATE_PADDED_SIZE_GEN8 * (index))
48 #define BINDING_TABLE_OFFSET_GEN8(index) (SURFACE_STATE_OFFSET_GEN8(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
50 #define CURBE_ALLOCATION_SIZE 37
51 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)
52 #define CURBE_URB_ENTRY_LENGTH 4
54 /* Shaders information for sharpening */
55 static const unsigned int gen75_gpe_sharpening_h_blur[][4] = {
56 #include "shaders/post_processing/gen75/sharpening_h_blur.g75b"
58 static const unsigned int gen75_gpe_sharpening_v_blur[][4] = {
59 #include "shaders/post_processing/gen75/sharpening_v_blur.g75b"
61 static const unsigned int gen75_gpe_sharpening_unmask[][4] = {
62 #include "shaders/post_processing/gen75/sharpening_unmask.g75b"
64 static struct i965_kernel gen75_vpp_sharpening_kernels[] = {
66 "vpp: sharpening(horizontal blur)",
68 gen75_gpe_sharpening_h_blur,
69 sizeof(gen75_gpe_sharpening_h_blur),
73 "vpp: sharpening(vertical blur)",
75 gen75_gpe_sharpening_v_blur,
76 sizeof(gen75_gpe_sharpening_v_blur),
80 "vpp: sharpening(unmask)",
82 gen75_gpe_sharpening_unmask,
83 sizeof(gen75_gpe_sharpening_unmask),
88 /* sharpening kernels for Broadwell */
89 static const unsigned int gen8_gpe_sharpening_h_blur[][4] = {
90 #include "shaders/post_processing/gen8/sharpening_h_blur.g8b"
92 static const unsigned int gen8_gpe_sharpening_v_blur[][4] = {
93 #include "shaders/post_processing/gen8/sharpening_v_blur.g8b"
95 static const unsigned int gen8_gpe_sharpening_unmask[][4] = {
96 #include "shaders/post_processing/gen8/sharpening_unmask.g8b"
99 static struct i965_kernel gen8_vpp_sharpening_kernels[] = {
101 "vpp: sharpening(horizontal blur)",
103 gen8_gpe_sharpening_h_blur,
104 sizeof(gen8_gpe_sharpening_h_blur),
108 "vpp: sharpening(vertical blur)",
110 gen8_gpe_sharpening_v_blur,
111 sizeof(gen8_gpe_sharpening_v_blur),
115 "vpp: sharpening(unmask)",
117 gen8_gpe_sharpening_unmask,
118 sizeof(gen8_gpe_sharpening_unmask),
124 gen75_gpe_process_surfaces_setup(VADriverContextP ctx,
125 struct vpp_gpe_context *vpp_gpe_ctx)
127 struct object_surface *obj_surface;
129 unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
130 vpp_gpe_ctx->backward_surf_sum) * 2;
132 /* Binding input NV12 surfaces (Luma + Chroma)*/
133 for (i = 0; i < input_surface_sum; i += 2) {
134 obj_surface = vpp_gpe_ctx->surface_input_object[i / 2];
136 gen7_gpe_media_rw_surface_setup(ctx,
137 &vpp_gpe_ctx->gpe_ctx,
139 BINDING_TABLE_OFFSET_GEN7(i),
140 SURFACE_STATE_OFFSET_GEN7(i),
143 gen75_gpe_media_chroma_surface_setup(ctx,
144 &vpp_gpe_ctx->gpe_ctx,
146 BINDING_TABLE_OFFSET_GEN7(i + 1),
147 SURFACE_STATE_OFFSET_GEN7(i + 1),
151 /* Binding output NV12 surface(Luma + Chroma) */
152 obj_surface = vpp_gpe_ctx->surface_output_object;
154 gen7_gpe_media_rw_surface_setup(ctx,
155 &vpp_gpe_ctx->gpe_ctx,
157 BINDING_TABLE_OFFSET_GEN7(input_surface_sum),
158 SURFACE_STATE_OFFSET_GEN7(input_surface_sum),
160 gen75_gpe_media_chroma_surface_setup(ctx,
161 &vpp_gpe_ctx->gpe_ctx,
163 BINDING_TABLE_OFFSET_GEN7(input_surface_sum + 1),
164 SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 1),
166 /* Bind kernel return buffer surface */
167 gen7_gpe_buffer_suface_setup(ctx,
168 &vpp_gpe_ctx->gpe_ctx,
169 &vpp_gpe_ctx->vpp_kernel_return,
170 BINDING_TABLE_OFFSET_GEN7((input_surface_sum + 2)),
171 SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 2));
173 return VA_STATUS_SUCCESS;
177 gen75_gpe_process_interface_setup(VADriverContextP ctx,
178 struct vpp_gpe_context *vpp_gpe_ctx)
180 struct gen6_interface_descriptor_data *desc;
181 dri_bo *bo = vpp_gpe_ctx->gpe_ctx.idrt.bo;
188 /*Setup the descritor table*/
189 for (i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++) {
190 struct i965_kernel *kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
191 assert(sizeof(*desc) == 32);
192 memset(desc, 0, sizeof(*desc));
193 desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
194 desc->desc2.sampler_count = 0; /* FIXME: */
195 desc->desc2.sampler_state_pointer = 0;
196 desc->desc3.binding_table_entry_count = 6; /* FIXME: */
197 desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN7(0) >> 5);
198 desc->desc4.constant_urb_entry_read_offset = 0;
199 desc->desc4.constant_urb_entry_read_length = 0;
201 dri_bo_emit_reloc(bo,
202 I915_GEM_DOMAIN_INSTRUCTION, 0,
204 i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
211 return VA_STATUS_SUCCESS;
215 gen75_gpe_process_parameters_fill(VADriverContextP ctx,
216 struct vpp_gpe_context *vpp_gpe_ctx)
218 unsigned int *command_ptr;
219 unsigned int i, size = vpp_gpe_ctx->thread_param_size;
220 unsigned char* position = NULL;
222 /* Thread inline data setting*/
223 dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
224 command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
226 for (i = 0; i < vpp_gpe_ctx->thread_num; i ++) {
227 *command_ptr++ = (CMD_MEDIA_OBJECT | (size / sizeof(int) + 6 - 2));
228 *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
234 /* copy thread inline data */
235 position = (unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
236 memcpy(command_ptr, position, size);
237 command_ptr += size / sizeof(int);
241 *command_ptr++ = MI_BATCH_BUFFER_END;
243 dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
245 return VA_STATUS_SUCCESS;
249 gen75_gpe_process_pipeline_setup(VADriverContextP ctx,
250 struct vpp_gpe_context *vpp_gpe_ctx)
252 intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
253 intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
255 gen6_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
257 gen75_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
259 BEGIN_BATCH(vpp_gpe_ctx->batch, 2);
260 OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8));
261 OUT_RELOC(vpp_gpe_ctx->batch,
262 vpp_gpe_ctx->vpp_batchbuffer.bo,
263 I915_GEM_DOMAIN_COMMAND, 0,
265 ADVANCE_BATCH(vpp_gpe_ctx->batch);
267 intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
269 return VA_STATUS_SUCCESS;
273 gen75_gpe_process_init(VADriverContextP ctx,
274 struct vpp_gpe_context *vpp_gpe_ctx)
276 struct i965_driver_data *i965 = i965_driver_data(ctx);
279 unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
280 (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
282 vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
283 vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
284 vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
285 unsigned int kernel_return_size = vpp_gpe_ctx->vpp_kernel_return.num_blocks
286 * vpp_gpe_ctx->vpp_kernel_return.size_block;
288 dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
289 bo = dri_bo_alloc(i965->intel.bufmgr,
291 batch_buf_size, 0x1000);
292 vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
294 dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
295 bo = dri_bo_alloc(i965->intel.bufmgr,
296 "vpp kernel return buffer",
297 kernel_return_size, 0x1000);
298 vpp_gpe_ctx->vpp_kernel_return.bo = bo;
300 vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
302 return VA_STATUS_SUCCESS;
306 gen75_gpe_process_prepare(VADriverContextP ctx,
307 struct vpp_gpe_context *vpp_gpe_ctx)
309 /*Setup all the memory object*/
310 gen75_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
311 gen75_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
312 //gen75_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
314 /*Programing media pipeline*/
315 gen75_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
317 return VA_STATUS_SUCCESS;
321 gen75_gpe_process_run(VADriverContextP ctx,
322 struct vpp_gpe_context *vpp_gpe_ctx)
324 intel_batchbuffer_flush(vpp_gpe_ctx->batch);
326 return VA_STATUS_SUCCESS;
330 gen75_gpe_process(VADriverContextP ctx,
331 struct vpp_gpe_context * vpp_gpe_ctx)
333 VAStatus va_status = VA_STATUS_SUCCESS;
335 va_status = gen75_gpe_process_init(ctx, vpp_gpe_ctx);
336 if (va_status != VA_STATUS_SUCCESS)
339 va_status = gen75_gpe_process_prepare(ctx, vpp_gpe_ctx);
340 if (va_status != VA_STATUS_SUCCESS)
343 va_status = gen75_gpe_process_run(ctx, vpp_gpe_ctx);
344 if (va_status != VA_STATUS_SUCCESS)
347 return VA_STATUS_SUCCESS;
351 gen8_gpe_process_surfaces_setup(VADriverContextP ctx,
352 struct vpp_gpe_context *vpp_gpe_ctx)
354 struct object_surface *obj_surface;
356 unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
357 vpp_gpe_ctx->backward_surf_sum) * 2;
359 /* Binding input NV12 surfaces (Luma + Chroma)*/
360 for (i = 0; i < input_surface_sum; i += 2) {
361 obj_surface = vpp_gpe_ctx->surface_input_object[i / 2];
363 gen8_gpe_media_rw_surface_setup(ctx,
364 &vpp_gpe_ctx->gpe_ctx,
366 BINDING_TABLE_OFFSET_GEN8(i),
367 SURFACE_STATE_OFFSET_GEN8(i),
370 gen8_gpe_media_chroma_surface_setup(ctx,
371 &vpp_gpe_ctx->gpe_ctx,
373 BINDING_TABLE_OFFSET_GEN8(i + 1),
374 SURFACE_STATE_OFFSET_GEN8(i + 1),
378 /* Binding output NV12 surface(Luma + Chroma) */
379 obj_surface = vpp_gpe_ctx->surface_output_object;
381 gen8_gpe_media_rw_surface_setup(ctx,
382 &vpp_gpe_ctx->gpe_ctx,
384 BINDING_TABLE_OFFSET_GEN8(input_surface_sum),
385 SURFACE_STATE_OFFSET_GEN8(input_surface_sum),
387 gen8_gpe_media_chroma_surface_setup(ctx,
388 &vpp_gpe_ctx->gpe_ctx,
390 BINDING_TABLE_OFFSET_GEN8(input_surface_sum + 1),
391 SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 1),
393 /* Bind kernel return buffer surface */
394 gen7_gpe_buffer_suface_setup(ctx,
395 &vpp_gpe_ctx->gpe_ctx,
396 &vpp_gpe_ctx->vpp_kernel_return,
397 BINDING_TABLE_OFFSET_GEN8((input_surface_sum + 2)),
398 SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 2));
400 return VA_STATUS_SUCCESS;
404 gen8_gpe_process_interface_setup(VADriverContextP ctx,
405 struct vpp_gpe_context *vpp_gpe_ctx)
407 struct gen8_interface_descriptor_data *desc;
408 dri_bo *bo = vpp_gpe_ctx->gpe_ctx.idrt.bo;
413 desc = (struct gen8_interface_descriptor_data *)(bo->virtual
414 + vpp_gpe_ctx->gpe_ctx.idrt.offset);
416 /*Setup the descritor table*/
417 for (i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++) {
418 struct i965_kernel *kernel;
419 kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
420 assert(sizeof(*desc) == 32);
421 /*Setup the descritor table*/
422 memset(desc, 0, sizeof(*desc));
423 desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
424 desc->desc3.sampler_count = 0; /* FIXME: */
425 desc->desc3.sampler_state_pointer = 0;
426 desc->desc4.binding_table_entry_count = 6; /* FIXME: */
427 desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN8(0) >> 5);
428 desc->desc5.constant_urb_entry_read_offset = 0;
429 desc->desc5.constant_urb_entry_read_length = 0;
436 return VA_STATUS_SUCCESS;
440 gen8_gpe_process_parameters_fill(VADriverContextP ctx,
441 struct vpp_gpe_context *vpp_gpe_ctx)
443 unsigned int *command_ptr;
444 unsigned int i, size = vpp_gpe_ctx->thread_param_size;
445 unsigned char* position = NULL;
447 /* Thread inline data setting*/
448 dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
449 command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
451 for (i = 0; i < vpp_gpe_ctx->thread_num; i ++) {
452 *command_ptr++ = (CMD_MEDIA_OBJECT | (size / sizeof(int) + 6 - 2));
453 *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
459 /* copy thread inline data */
460 position = (unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
461 memcpy(command_ptr, position, size);
462 command_ptr += size / sizeof(int);
464 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
469 *command_ptr++ = MI_BATCH_BUFFER_END;
471 dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
473 return VA_STATUS_SUCCESS;
477 gen8_gpe_process_pipeline_setup(VADriverContextP ctx,
478 struct vpp_gpe_context *vpp_gpe_ctx)
480 intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
481 intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
483 gen8_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
485 gen8_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
487 BEGIN_BATCH(vpp_gpe_ctx->batch, 3);
488 OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
489 OUT_RELOC(vpp_gpe_ctx->batch,
490 vpp_gpe_ctx->vpp_batchbuffer.bo,
491 I915_GEM_DOMAIN_COMMAND, 0,
493 OUT_BATCH(vpp_gpe_ctx->batch, 0);
495 ADVANCE_BATCH(vpp_gpe_ctx->batch);
497 intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
499 return VA_STATUS_SUCCESS;
503 gen8_gpe_process_init(VADriverContextP ctx,
504 struct vpp_gpe_context *vpp_gpe_ctx)
506 struct i965_driver_data *i965 = i965_driver_data(ctx);
509 unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
510 (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
512 vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
513 vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
514 vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
516 unsigned int kernel_return_size = vpp_gpe_ctx->vpp_kernel_return.num_blocks
517 * vpp_gpe_ctx->vpp_kernel_return.size_block;
519 dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
520 bo = dri_bo_alloc(i965->intel.bufmgr,
522 batch_buf_size, 0x1000);
523 vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
525 dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
526 bo = dri_bo_alloc(i965->intel.bufmgr,
527 "vpp kernel return buffer",
528 kernel_return_size, 0x1000);
529 vpp_gpe_ctx->vpp_kernel_return.bo = bo;
531 vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
533 return VA_STATUS_SUCCESS;
537 gen8_gpe_process_prepare(VADriverContextP ctx,
538 struct vpp_gpe_context *vpp_gpe_ctx)
540 /*Setup all the memory object*/
541 gen8_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
542 gen8_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
543 //gen8_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
545 /*Programing media pipeline*/
546 gen8_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
548 return VA_STATUS_SUCCESS;
552 gen8_gpe_process_run(VADriverContextP ctx,
553 struct vpp_gpe_context *vpp_gpe_ctx)
555 intel_batchbuffer_flush(vpp_gpe_ctx->batch);
557 return VA_STATUS_SUCCESS;
561 gen8_gpe_process(VADriverContextP ctx,
562 struct vpp_gpe_context * vpp_gpe_ctx)
564 VAStatus va_status = VA_STATUS_SUCCESS;
566 va_status = gen8_gpe_process_init(ctx, vpp_gpe_ctx);
567 if (va_status != VA_STATUS_SUCCESS)
570 va_status = gen8_gpe_process_prepare(ctx, vpp_gpe_ctx);
571 if (va_status != VA_STATUS_SUCCESS)
574 va_status = gen8_gpe_process_run(ctx, vpp_gpe_ctx);
575 if (va_status != VA_STATUS_SUCCESS)
578 return VA_STATUS_SUCCESS;
582 vpp_gpe_process(VADriverContextP ctx,
583 struct vpp_gpe_context * vpp_gpe_ctx)
585 struct i965_driver_data *i965 = i965_driver_data(ctx);
586 if (IS_HASWELL(i965->intel.device_info))
587 return gen75_gpe_process(ctx, vpp_gpe_ctx);
588 else if (IS_GEN8(i965->intel.device_info) ||
589 IS_GEN9(i965->intel.device_info) ||
590 IS_GEN10(i965->intel.device_info))
591 return gen8_gpe_process(ctx, vpp_gpe_ctx);
593 return VA_STATUS_ERROR_UNIMPLEMENTED;
597 vpp_gpe_process_sharpening(VADriverContextP ctx,
598 struct vpp_gpe_context * vpp_gpe_ctx)
600 VAStatus va_status = VA_STATUS_SUCCESS;
601 struct i965_driver_data *i965 = i965_driver_data(ctx);
602 struct object_surface *origin_in_obj_surface = vpp_gpe_ctx->surface_input_object[0];
603 struct object_surface *origin_out_obj_surface = vpp_gpe_ctx->surface_output_object;
605 VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
606 VABufferID *filter_ids = (VABufferID*)pipe->filters ;
607 struct object_buffer *obj_buf = BUFFER((*(filter_ids + 0)));
609 assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
612 !obj_buf->buffer_store ||
613 !obj_buf->buffer_store->buffer)
616 VAProcFilterParameterBuffer* filter =
617 (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
618 float sharpening_intensity = filter->value;
620 ThreadParameterSharpening thr_param;
621 unsigned int thr_param_size = sizeof(ThreadParameterSharpening);
625 if (vpp_gpe_ctx->is_first_frame) {
626 vpp_gpe_ctx->sub_shader_sum = 3;
627 struct i965_kernel * vpp_kernels;
628 if (IS_HASWELL(i965->intel.device_info))
629 vpp_kernels = gen75_vpp_sharpening_kernels;
630 else if (IS_GEN8(i965->intel.device_info) ||
631 IS_GEN9(i965->intel.device_info) ||
632 IS_GEN10(i965->intel.device_info))
633 vpp_kernels = gen8_vpp_sharpening_kernels;
635 return VA_STATUS_ERROR_UNIMPLEMENTED;
637 vpp_gpe_ctx->gpe_load_kernels(ctx,
638 &vpp_gpe_ctx->gpe_ctx,
640 vpp_gpe_ctx->sub_shader_sum);
643 if (vpp_gpe_ctx->surface_tmp == VA_INVALID_ID) {
644 va_status = i965_CreateSurfaces(ctx,
645 vpp_gpe_ctx->in_frame_w,
646 vpp_gpe_ctx->in_frame_h,
649 &vpp_gpe_ctx->surface_tmp);
650 assert(va_status == VA_STATUS_SUCCESS);
652 struct object_surface * obj_surf = SURFACE(vpp_gpe_ctx->surface_tmp);
656 i965_check_alloc_surface_bo(ctx, obj_surf, 1, VA_FOURCC_NV12,
658 vpp_gpe_ctx->surface_tmp_object = obj_surf;
662 assert(sharpening_intensity >= 0.0 && sharpening_intensity <= 1.0);
663 thr_param.l_amount = (unsigned int)(sharpening_intensity * 128);
664 thr_param.d_amount = (unsigned int)(sharpening_intensity * 128);
666 thr_param.base.pic_width = vpp_gpe_ctx->in_frame_w;
667 thr_param.base.pic_height = vpp_gpe_ctx->in_frame_h;
669 /* Step 1: horizontal blur process */
670 vpp_gpe_ctx->forward_surf_sum = 0;
671 vpp_gpe_ctx->backward_surf_sum = 0;
673 vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h / 16;
674 vpp_gpe_ctx->thread_param_size = thr_param_size;
675 vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
676 * vpp_gpe_ctx->thread_num);
677 pos = vpp_gpe_ctx->thread_param;
680 return VA_STATUS_ERROR_ALLOCATION_FAILED;
683 for (i = 0 ; i < vpp_gpe_ctx->thread_num; i++) {
684 thr_param.base.v_pos = 16 * i;
685 thr_param.base.h_pos = 0;
686 memcpy(pos, &thr_param, thr_param_size);
687 pos += thr_param_size;
690 vpp_gpe_ctx->sub_shader_index = 0;
691 va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
692 free(vpp_gpe_ctx->thread_param);
694 /* Step 2: vertical blur process */
695 vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_output_object;
696 vpp_gpe_ctx->surface_output_object = vpp_gpe_ctx->surface_tmp_object;
697 vpp_gpe_ctx->forward_surf_sum = 0;
698 vpp_gpe_ctx->backward_surf_sum = 0;
700 vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_w / 16;
701 vpp_gpe_ctx->thread_param_size = thr_param_size;
702 vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
703 * vpp_gpe_ctx->thread_num);
704 pos = vpp_gpe_ctx->thread_param;
707 return VA_STATUS_ERROR_ALLOCATION_FAILED;
710 for (i = 0 ; i < vpp_gpe_ctx->thread_num; i++) {
711 thr_param.base.v_pos = 0;
712 thr_param.base.h_pos = 16 * i;
713 memcpy(pos, &thr_param, thr_param_size);
714 pos += thr_param_size;
717 vpp_gpe_ctx->sub_shader_index = 1;
718 vpp_gpe_process(ctx, vpp_gpe_ctx);
719 free(vpp_gpe_ctx->thread_param);
721 /* Step 3: apply the blur to original surface */
722 vpp_gpe_ctx->surface_input_object[0] = origin_in_obj_surface;
723 vpp_gpe_ctx->surface_input_object[1] = vpp_gpe_ctx->surface_tmp_object;
724 vpp_gpe_ctx->surface_output_object = origin_out_obj_surface;
725 vpp_gpe_ctx->forward_surf_sum = 1;
726 vpp_gpe_ctx->backward_surf_sum = 0;
728 vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h / 4;
729 vpp_gpe_ctx->thread_param_size = thr_param_size;
730 vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
731 * vpp_gpe_ctx->thread_num);
732 pos = vpp_gpe_ctx->thread_param;
735 return VA_STATUS_ERROR_ALLOCATION_FAILED;
738 for (i = 0 ; i < vpp_gpe_ctx->thread_num; i++) {
739 thr_param.base.v_pos = 4 * i;
740 thr_param.base.h_pos = 0;
741 memcpy(pos, &thr_param, thr_param_size);
742 pos += thr_param_size;
745 vpp_gpe_ctx->sub_shader_index = 2;
746 va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
747 free(vpp_gpe_ctx->thread_param);
752 return VA_STATUS_ERROR_INVALID_PARAMETER;
755 VAStatus vpp_gpe_process_picture(VADriverContextP ctx,
756 struct vpp_gpe_context * vpp_gpe_ctx)
758 VAStatus va_status = VA_STATUS_SUCCESS;
759 struct i965_driver_data *i965 = i965_driver_data(ctx);
760 VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
761 VAProcFilterParameterBuffer* filter = NULL;
763 struct object_surface *obj_surface = NULL;
765 if (pipe->num_filters && !pipe->filters)
768 for (i = 0; i < pipe->num_filters; i++) {
769 struct object_buffer *obj_buf = BUFFER(pipe->filters[i]);
771 assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
774 !obj_buf->buffer_store ||
775 !obj_buf->buffer_store->buffer)
778 filter = (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
779 if (filter->type == VAProcFilterSharpening) {
784 assert(pipe->num_forward_references + pipe->num_backward_references <= 4);
785 vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_pipeline_input_object;
787 vpp_gpe_ctx->forward_surf_sum = 0;
788 vpp_gpe_ctx->backward_surf_sum = 0;
790 for (i = 0; i < pipe->num_forward_references; i ++) {
791 obj_surface = SURFACE(pipe->forward_references[i]);
794 vpp_gpe_ctx->surface_input_object[i + 1] = obj_surface;
795 vpp_gpe_ctx->forward_surf_sum++;
798 for (i = 0; i < pipe->num_backward_references; i ++) {
799 obj_surface = SURFACE(pipe->backward_references[i]);
802 vpp_gpe_ctx->surface_input_object[vpp_gpe_ctx->forward_surf_sum + 1 + i ] = obj_surface;
803 vpp_gpe_ctx->backward_surf_sum++;
806 obj_surface = vpp_gpe_ctx->surface_input_object[0];
807 vpp_gpe_ctx->in_frame_w = obj_surface->orig_width;
808 vpp_gpe_ctx->in_frame_h = obj_surface->orig_height;
810 if (filter && filter->type == VAProcFilterSharpening) {
811 va_status = vpp_gpe_process_sharpening(ctx, vpp_gpe_ctx);
813 va_status = VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
816 vpp_gpe_ctx->is_first_frame = 0;
821 return VA_STATUS_ERROR_INVALID_PARAMETER;
825 vpp_gpe_context_destroy(VADriverContextP ctx,
826 struct vpp_gpe_context *vpp_gpe_ctx)
828 dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
829 vpp_gpe_ctx->vpp_batchbuffer.bo = NULL;
831 dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
832 vpp_gpe_ctx->vpp_kernel_return.bo = NULL;
834 vpp_gpe_ctx->gpe_context_destroy(&vpp_gpe_ctx->gpe_ctx);
836 if (vpp_gpe_ctx->surface_tmp != VA_INVALID_ID) {
837 assert(vpp_gpe_ctx->surface_tmp_object != NULL);
838 i965_DestroySurfaces(ctx, &vpp_gpe_ctx->surface_tmp, 1);
839 vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
840 vpp_gpe_ctx->surface_tmp_object = NULL;
843 if (vpp_gpe_ctx->batch)
844 intel_batchbuffer_free(vpp_gpe_ctx->batch);
849 struct vpp_gpe_context *
850 vpp_gpe_context_init(VADriverContextP ctx)
852 struct i965_driver_data *i965 = i965_driver_data(ctx);
853 struct vpp_gpe_context *vpp_gpe_ctx = calloc(1, sizeof(struct vpp_gpe_context));
855 struct i965_gpe_context *gpe_ctx = &(vpp_gpe_ctx->gpe_ctx);
857 assert(IS_HASWELL(i965->intel.device_info) ||
858 IS_GEN8(i965->intel.device_info) ||
859 IS_GEN9(i965->intel.device_info) ||
860 IS_GEN10(i965->intel.device_info));
862 vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
863 vpp_gpe_ctx->surface_tmp_object = NULL;
864 vpp_gpe_ctx->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0);
865 vpp_gpe_ctx->is_first_frame = 1;
867 gpe_ctx->vfe_state.max_num_threads = 60 - 1;
868 gpe_ctx->vfe_state.num_urb_entries = 16;
869 gpe_ctx->vfe_state.gpgpu_mode = 0;
870 gpe_ctx->vfe_state.urb_entry_size = 59 - 1;
871 gpe_ctx->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
873 if (IS_HASWELL(i965->intel.device_info)) {
874 vpp_gpe_ctx->gpe_context_init = i965_gpe_context_init;
875 vpp_gpe_ctx->gpe_context_destroy = i965_gpe_context_destroy;
876 vpp_gpe_ctx->gpe_load_kernels = i965_gpe_load_kernels;
877 gpe_ctx->surface_state_binding_table.length =
878 (SURFACE_STATE_PADDED_SIZE_GEN7 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
880 gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
881 gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
882 gpe_ctx->idrt.entry_size = ALIGN(sizeof(struct gen6_interface_descriptor_data), 64);
884 } else if (IS_GEN8(i965->intel.device_info) ||
885 IS_GEN9(i965->intel.device_info) ||
886 IS_GEN10(i965->intel.device_info)) {
887 vpp_gpe_ctx->gpe_context_init = gen8_gpe_context_init;
888 vpp_gpe_ctx->gpe_context_destroy = gen8_gpe_context_destroy;
889 vpp_gpe_ctx->gpe_load_kernels = gen8_gpe_load_kernels;
890 gpe_ctx->surface_state_binding_table.length =
891 (SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
893 gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
894 gpe_ctx->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
895 gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;