2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Li Xiaowei <xiaowei.a.li@intel.com>
33 #include "intel_batchbuffer.h"
34 #include "intel_driver.h"
36 #include "i965_structs.h"
37 #include "i965_defines.h"
38 #include "i965_drv_video.h"
39 #include "gen75_vpp_gpe.h"
41 #define MAX_INTERFACE_DESC_GEN6 MAX_GPE_KERNELS
42 #define MAX_MEDIA_SURFACES_GEN6 34
44 #define SURFACE_STATE_OFFSET_GEN7(index) (SURFACE_STATE_PADDED_SIZE_GEN7 * (index))
45 #define BINDING_TABLE_OFFSET_GEN7(index) (SURFACE_STATE_OFFSET_GEN7(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
47 #define SURFACE_STATE_OFFSET_GEN8(index) (SURFACE_STATE_PADDED_SIZE_GEN8 * (index))
48 #define BINDING_TABLE_OFFSET_GEN8(index) (SURFACE_STATE_OFFSET_GEN8(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
50 #define CURBE_ALLOCATION_SIZE 37
51 #define CURBE_TOTAL_DATA_LENGTH (4 * 32)
52 #define CURBE_URB_ENTRY_LENGTH 4
54 /* Shaders information for sharpening */
55 static const unsigned int gen75_gpe_sharpening_h_blur[][4] = {
56 #include "shaders/post_processing/gen75/sharpening_h_blur.g75b"
58 static const unsigned int gen75_gpe_sharpening_v_blur[][4] = {
59 #include "shaders/post_processing/gen75/sharpening_v_blur.g75b"
61 static const unsigned int gen75_gpe_sharpening_unmask[][4] = {
62 #include "shaders/post_processing/gen75/sharpening_unmask.g75b"
64 static struct i965_kernel gen75_vpp_sharpening_kernels[] = {
66 "vpp: sharpening(horizontal blur)",
68 gen75_gpe_sharpening_h_blur,
69 sizeof(gen75_gpe_sharpening_h_blur),
73 "vpp: sharpening(vertical blur)",
75 gen75_gpe_sharpening_v_blur,
76 sizeof(gen75_gpe_sharpening_v_blur),
80 "vpp: sharpening(unmask)",
82 gen75_gpe_sharpening_unmask,
83 sizeof(gen75_gpe_sharpening_unmask),
88 /* sharpening kernels for Broadwell */
89 static const unsigned int gen8_gpe_sharpening_h_blur[][4] = {
90 #include "shaders/post_processing/gen8/sharpening_h_blur.g8b"
92 static const unsigned int gen8_gpe_sharpening_v_blur[][4] = {
93 #include "shaders/post_processing/gen8/sharpening_v_blur.g8b"
95 static const unsigned int gen8_gpe_sharpening_unmask[][4] = {
96 #include "shaders/post_processing/gen8/sharpening_unmask.g8b"
99 static struct i965_kernel gen8_vpp_sharpening_kernels[] = {
101 "vpp: sharpening(horizontal blur)",
103 gen8_gpe_sharpening_h_blur,
104 sizeof(gen8_gpe_sharpening_h_blur),
108 "vpp: sharpening(vertical blur)",
110 gen8_gpe_sharpening_v_blur,
111 sizeof(gen8_gpe_sharpening_v_blur),
115 "vpp: sharpening(unmask)",
117 gen8_gpe_sharpening_unmask,
118 sizeof(gen8_gpe_sharpening_unmask),
124 gen75_gpe_process_surfaces_setup(VADriverContextP ctx,
125 struct vpp_gpe_context *vpp_gpe_ctx)
127 struct object_surface *obj_surface;
129 unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
130 vpp_gpe_ctx->backward_surf_sum) * 2;
132 /* Binding input NV12 surfaces (Luma + Chroma)*/
133 for( i = 0; i < input_surface_sum; i += 2){
134 obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
136 gen7_gpe_media_rw_surface_setup(ctx,
137 &vpp_gpe_ctx->gpe_ctx,
139 BINDING_TABLE_OFFSET_GEN7(i),
140 SURFACE_STATE_OFFSET_GEN7(i));
142 gen75_gpe_media_chroma_surface_setup(ctx,
143 &vpp_gpe_ctx->gpe_ctx,
145 BINDING_TABLE_OFFSET_GEN7(i + 1),
146 SURFACE_STATE_OFFSET_GEN7(i + 1));
149 /* Binding output NV12 surface(Luma + Chroma) */
150 obj_surface = vpp_gpe_ctx->surface_output_object;
152 gen7_gpe_media_rw_surface_setup(ctx,
153 &vpp_gpe_ctx->gpe_ctx,
155 BINDING_TABLE_OFFSET_GEN7(input_surface_sum),
156 SURFACE_STATE_OFFSET_GEN7(input_surface_sum));
157 gen75_gpe_media_chroma_surface_setup(ctx,
158 &vpp_gpe_ctx->gpe_ctx,
160 BINDING_TABLE_OFFSET_GEN7(input_surface_sum + 1),
161 SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 1));
162 /* Bind kernel return buffer surface */
163 gen7_gpe_buffer_suface_setup(ctx,
164 &vpp_gpe_ctx->gpe_ctx,
165 &vpp_gpe_ctx->vpp_kernel_return,
166 BINDING_TABLE_OFFSET_GEN7((input_surface_sum + 2)),
167 SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 2));
169 return VA_STATUS_SUCCESS;
173 gen75_gpe_process_interface_setup(VADriverContextP ctx,
174 struct vpp_gpe_context *vpp_gpe_ctx)
176 struct gen6_interface_descriptor_data *desc;
177 dri_bo *bo = vpp_gpe_ctx->gpe_ctx.idrt.bo;
184 /*Setup the descritor table*/
185 for(i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
186 struct i965_kernel *kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
187 assert(sizeof(*desc) == 32);
188 memset(desc, 0, sizeof(*desc));
189 desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
190 desc->desc2.sampler_count = 0; /* FIXME: */
191 desc->desc2.sampler_state_pointer = 0;
192 desc->desc3.binding_table_entry_count = 6; /* FIXME: */
193 desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN7(0) >> 5);
194 desc->desc4.constant_urb_entry_read_offset = 0;
195 desc->desc4.constant_urb_entry_read_length = 0;
197 dri_bo_emit_reloc(bo,
198 I915_GEM_DOMAIN_INSTRUCTION, 0,
200 i* sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
207 return VA_STATUS_SUCCESS;
211 gen75_gpe_process_parameters_fill(VADriverContextP ctx,
212 struct vpp_gpe_context *vpp_gpe_ctx)
214 unsigned int *command_ptr;
215 unsigned int i, size = vpp_gpe_ctx->thread_param_size;
216 unsigned char* position = NULL;
218 /* Thread inline data setting*/
219 dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
220 command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
222 for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
224 *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
225 *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
231 /* copy thread inline data */
232 position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
233 memcpy(command_ptr, position, size);
234 command_ptr += size/sizeof(int);
238 *command_ptr++ = MI_BATCH_BUFFER_END;
240 dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
242 return VA_STATUS_SUCCESS;
246 gen75_gpe_process_pipeline_setup(VADriverContextP ctx,
247 struct vpp_gpe_context *vpp_gpe_ctx)
249 intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
250 intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
252 gen6_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
254 gen75_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
256 BEGIN_BATCH(vpp_gpe_ctx->batch, 2);
257 OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8));
258 OUT_RELOC(vpp_gpe_ctx->batch,
259 vpp_gpe_ctx->vpp_batchbuffer.bo,
260 I915_GEM_DOMAIN_COMMAND, 0,
262 ADVANCE_BATCH(vpp_gpe_ctx->batch);
264 intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
266 return VA_STATUS_SUCCESS;
270 gen75_gpe_process_init(VADriverContextP ctx,
271 struct vpp_gpe_context *vpp_gpe_ctx)
273 struct i965_driver_data *i965 = i965_driver_data(ctx);
276 unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
277 (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
279 vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
280 vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
281 vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
282 unsigned int kernel_return_size = vpp_gpe_ctx->vpp_kernel_return.num_blocks
283 * vpp_gpe_ctx->vpp_kernel_return.size_block;
285 dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
286 bo = dri_bo_alloc(i965->intel.bufmgr,
288 batch_buf_size, 0x1000);
289 vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
291 dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
292 bo = dri_bo_alloc(i965->intel.bufmgr,
293 "vpp kernel return buffer",
294 kernel_return_size, 0x1000);
295 vpp_gpe_ctx->vpp_kernel_return.bo = bo;
297 vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
299 return VA_STATUS_SUCCESS;
303 gen75_gpe_process_prepare(VADriverContextP ctx,
304 struct vpp_gpe_context *vpp_gpe_ctx)
306 /*Setup all the memory object*/
307 gen75_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
308 gen75_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
309 //gen75_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
311 /*Programing media pipeline*/
312 gen75_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
314 return VA_STATUS_SUCCESS;
318 gen75_gpe_process_run(VADriverContextP ctx,
319 struct vpp_gpe_context *vpp_gpe_ctx)
321 intel_batchbuffer_flush(vpp_gpe_ctx->batch);
323 return VA_STATUS_SUCCESS;
327 gen75_gpe_process(VADriverContextP ctx,
328 struct vpp_gpe_context * vpp_gpe_ctx)
330 VAStatus va_status = VA_STATUS_SUCCESS;
332 va_status = gen75_gpe_process_init(ctx, vpp_gpe_ctx);
333 if (va_status != VA_STATUS_SUCCESS)
336 va_status = gen75_gpe_process_prepare(ctx, vpp_gpe_ctx);
337 if (va_status != VA_STATUS_SUCCESS)
340 va_status = gen75_gpe_process_run(ctx, vpp_gpe_ctx);
341 if (va_status != VA_STATUS_SUCCESS)
344 return VA_STATUS_SUCCESS;
348 gen8_gpe_process_surfaces_setup(VADriverContextP ctx,
349 struct vpp_gpe_context *vpp_gpe_ctx)
351 struct object_surface *obj_surface;
353 unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
354 vpp_gpe_ctx->backward_surf_sum) * 2;
356 /* Binding input NV12 surfaces (Luma + Chroma)*/
357 for( i = 0; i < input_surface_sum; i += 2){
358 obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
360 gen8_gpe_media_rw_surface_setup(ctx,
361 &vpp_gpe_ctx->gpe_ctx,
363 BINDING_TABLE_OFFSET_GEN8(i),
364 SURFACE_STATE_OFFSET_GEN8(i));
366 gen8_gpe_media_chroma_surface_setup(ctx,
367 &vpp_gpe_ctx->gpe_ctx,
369 BINDING_TABLE_OFFSET_GEN8(i + 1),
370 SURFACE_STATE_OFFSET_GEN8(i + 1));
373 /* Binding output NV12 surface(Luma + Chroma) */
374 obj_surface = vpp_gpe_ctx->surface_output_object;
376 gen8_gpe_media_rw_surface_setup(ctx,
377 &vpp_gpe_ctx->gpe_ctx,
379 BINDING_TABLE_OFFSET_GEN8(input_surface_sum),
380 SURFACE_STATE_OFFSET_GEN8(input_surface_sum));
381 gen8_gpe_media_chroma_surface_setup(ctx,
382 &vpp_gpe_ctx->gpe_ctx,
384 BINDING_TABLE_OFFSET_GEN8(input_surface_sum + 1),
385 SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 1));
386 /* Bind kernel return buffer surface */
387 gen7_gpe_buffer_suface_setup(ctx,
388 &vpp_gpe_ctx->gpe_ctx,
389 &vpp_gpe_ctx->vpp_kernel_return,
390 BINDING_TABLE_OFFSET_GEN8((input_surface_sum + 2)),
391 SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 2));
393 return VA_STATUS_SUCCESS;
397 gen8_gpe_process_interface_setup(VADriverContextP ctx,
398 struct vpp_gpe_context *vpp_gpe_ctx)
400 struct gen8_interface_descriptor_data *desc;
401 dri_bo *bo = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo;
406 desc = (struct gen8_interface_descriptor_data *)(bo->virtual
407 + vpp_gpe_ctx->gpe_ctx.idrt_offset);
409 /*Setup the descritor table*/
410 for (i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
411 struct i965_kernel *kernel;
412 kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
413 assert(sizeof(*desc) == 32);
414 /*Setup the descritor table*/
415 memset(desc, 0, sizeof(*desc));
416 desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
417 desc->desc3.sampler_count = 0; /* FIXME: */
418 desc->desc3.sampler_state_pointer = 0;
419 desc->desc4.binding_table_entry_count = 6; /* FIXME: */
420 desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN8(0) >> 5);
421 desc->desc5.constant_urb_entry_read_offset = 0;
422 desc->desc5.constant_urb_entry_read_length = 0;
429 return VA_STATUS_SUCCESS;
433 gen8_gpe_process_parameters_fill(VADriverContextP ctx,
434 struct vpp_gpe_context *vpp_gpe_ctx)
436 unsigned int *command_ptr;
437 unsigned int i, size = vpp_gpe_ctx->thread_param_size;
438 unsigned char* position = NULL;
440 /* Thread inline data setting*/
441 dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
442 command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
444 for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
446 *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
447 *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
453 /* copy thread inline data */
454 position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
455 memcpy(command_ptr, position, size);
456 command_ptr += size/sizeof(int);
458 *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
463 *command_ptr++ = MI_BATCH_BUFFER_END;
465 dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
467 return VA_STATUS_SUCCESS;
471 gen8_gpe_process_pipeline_setup(VADriverContextP ctx,
472 struct vpp_gpe_context *vpp_gpe_ctx)
474 intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
475 intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
477 gen8_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
479 gen8_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
481 BEGIN_BATCH(vpp_gpe_ctx->batch, 3);
482 OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
483 OUT_RELOC(vpp_gpe_ctx->batch,
484 vpp_gpe_ctx->vpp_batchbuffer.bo,
485 I915_GEM_DOMAIN_COMMAND, 0,
487 OUT_BATCH(vpp_gpe_ctx->batch, 0);
489 ADVANCE_BATCH(vpp_gpe_ctx->batch);
491 intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
493 return VA_STATUS_SUCCESS;
497 gen8_gpe_process_init(VADriverContextP ctx,
498 struct vpp_gpe_context *vpp_gpe_ctx)
500 struct i965_driver_data *i965 = i965_driver_data(ctx);
503 unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
504 (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
506 vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
507 vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
508 vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
510 unsigned int kernel_return_size = vpp_gpe_ctx->vpp_kernel_return.num_blocks
511 * vpp_gpe_ctx->vpp_kernel_return.size_block;
513 dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
514 bo = dri_bo_alloc(i965->intel.bufmgr,
516 batch_buf_size, 0x1000);
517 vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
519 dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
520 bo = dri_bo_alloc(i965->intel.bufmgr,
521 "vpp kernel return buffer",
522 kernel_return_size, 0x1000);
523 vpp_gpe_ctx->vpp_kernel_return.bo = bo;
525 vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
527 return VA_STATUS_SUCCESS;
531 gen8_gpe_process_prepare(VADriverContextP ctx,
532 struct vpp_gpe_context *vpp_gpe_ctx)
534 /*Setup all the memory object*/
535 gen8_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
536 gen8_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
537 //gen8_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
539 /*Programing media pipeline*/
540 gen8_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
542 return VA_STATUS_SUCCESS;
546 gen8_gpe_process_run(VADriverContextP ctx,
547 struct vpp_gpe_context *vpp_gpe_ctx)
549 intel_batchbuffer_flush(vpp_gpe_ctx->batch);
551 return VA_STATUS_SUCCESS;
555 gen8_gpe_process(VADriverContextP ctx,
556 struct vpp_gpe_context * vpp_gpe_ctx)
558 VAStatus va_status = VA_STATUS_SUCCESS;
560 va_status = gen8_gpe_process_init(ctx, vpp_gpe_ctx);
561 if (va_status != VA_STATUS_SUCCESS)
564 va_status = gen8_gpe_process_prepare(ctx, vpp_gpe_ctx);
565 if (va_status != VA_STATUS_SUCCESS)
568 va_status = gen8_gpe_process_run(ctx, vpp_gpe_ctx);
569 if (va_status != VA_STATUS_SUCCESS)
572 return VA_STATUS_SUCCESS;
576 vpp_gpe_process(VADriverContextP ctx,
577 struct vpp_gpe_context * vpp_gpe_ctx)
579 struct i965_driver_data *i965 = i965_driver_data(ctx);
580 if (IS_HASWELL(i965->intel.device_info))
581 return gen75_gpe_process(ctx, vpp_gpe_ctx);
582 else if (IS_GEN8(i965->intel.device_info) ||
583 IS_GEN9(i965->intel.device_info))
584 return gen8_gpe_process(ctx, vpp_gpe_ctx);
586 return VA_STATUS_ERROR_UNIMPLEMENTED;
590 vpp_gpe_process_sharpening(VADriverContextP ctx,
591 struct vpp_gpe_context * vpp_gpe_ctx)
593 VAStatus va_status = VA_STATUS_SUCCESS;
594 struct i965_driver_data *i965 = i965_driver_data(ctx);
595 struct object_surface *origin_in_obj_surface = vpp_gpe_ctx->surface_input_object[0];
596 struct object_surface *origin_out_obj_surface = vpp_gpe_ctx->surface_output_object;
598 VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
599 VABufferID *filter_ids = (VABufferID*)pipe->filters ;
600 struct object_buffer *obj_buf = BUFFER((*(filter_ids + 0)));
602 assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
605 !obj_buf->buffer_store ||
606 !obj_buf->buffer_store->buffer)
609 VAProcFilterParameterBuffer* filter =
610 (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
611 float sharpening_intensity = filter->value;
613 ThreadParameterSharpening thr_param;
614 unsigned int thr_param_size = sizeof(ThreadParameterSharpening);
618 if(vpp_gpe_ctx->is_first_frame){
619 vpp_gpe_ctx->sub_shader_sum = 3;
620 struct i965_kernel * vpp_kernels;
621 if (IS_HASWELL(i965->intel.device_info))
622 vpp_kernels = gen75_vpp_sharpening_kernels;
623 else if (IS_GEN8(i965->intel.device_info) ||
624 IS_GEN9(i965->intel.device_info)) // TODO: build the sharpening kernel for GEN9
625 vpp_kernels = gen8_vpp_sharpening_kernels;
627 return VA_STATUS_ERROR_UNIMPLEMENTED;
629 vpp_gpe_ctx->gpe_load_kernels(ctx,
630 &vpp_gpe_ctx->gpe_ctx,
632 vpp_gpe_ctx->sub_shader_sum);
635 if(vpp_gpe_ctx->surface_tmp == VA_INVALID_ID){
636 va_status = i965_CreateSurfaces(ctx,
637 vpp_gpe_ctx->in_frame_w,
638 vpp_gpe_ctx->in_frame_h,
641 &vpp_gpe_ctx->surface_tmp);
642 assert(va_status == VA_STATUS_SUCCESS);
644 struct object_surface * obj_surf = SURFACE(vpp_gpe_ctx->surface_tmp);
648 i965_check_alloc_surface_bo(ctx, obj_surf, 1, VA_FOURCC_NV12,
650 vpp_gpe_ctx->surface_tmp_object = obj_surf;
654 assert(sharpening_intensity >= 0.0 && sharpening_intensity <= 1.0);
655 thr_param.l_amount = (unsigned int)(sharpening_intensity * 128);
656 thr_param.d_amount = (unsigned int)(sharpening_intensity * 128);
658 thr_param.base.pic_width = vpp_gpe_ctx->in_frame_w;
659 thr_param.base.pic_height = vpp_gpe_ctx->in_frame_h;
661 /* Step 1: horizontal blur process */
662 vpp_gpe_ctx->forward_surf_sum = 0;
663 vpp_gpe_ctx->backward_surf_sum = 0;
665 vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/16;
666 vpp_gpe_ctx->thread_param_size = thr_param_size;
667 vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
668 *vpp_gpe_ctx->thread_num);
669 pos = vpp_gpe_ctx->thread_param;
672 return VA_STATUS_ERROR_ALLOCATION_FAILED;
675 for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
676 thr_param.base.v_pos = 16 * i;
677 thr_param.base.h_pos = 0;
678 memcpy(pos, &thr_param, thr_param_size);
679 pos += thr_param_size;
682 vpp_gpe_ctx->sub_shader_index = 0;
683 va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
684 free(vpp_gpe_ctx->thread_param);
686 /* Step 2: vertical blur process */
687 vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_output_object;
688 vpp_gpe_ctx->surface_output_object = vpp_gpe_ctx->surface_tmp_object;
689 vpp_gpe_ctx->forward_surf_sum = 0;
690 vpp_gpe_ctx->backward_surf_sum = 0;
692 vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_w/16;
693 vpp_gpe_ctx->thread_param_size = thr_param_size;
694 vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
695 *vpp_gpe_ctx->thread_num);
696 pos = vpp_gpe_ctx->thread_param;
699 return VA_STATUS_ERROR_ALLOCATION_FAILED;
702 for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
703 thr_param.base.v_pos = 0;
704 thr_param.base.h_pos = 16 * i;
705 memcpy(pos, &thr_param, thr_param_size);
706 pos += thr_param_size;
709 vpp_gpe_ctx->sub_shader_index = 1;
710 vpp_gpe_process(ctx, vpp_gpe_ctx);
711 free(vpp_gpe_ctx->thread_param);
713 /* Step 3: apply the blur to original surface */
714 vpp_gpe_ctx->surface_input_object[0] = origin_in_obj_surface;
715 vpp_gpe_ctx->surface_input_object[1] = vpp_gpe_ctx->surface_tmp_object;
716 vpp_gpe_ctx->surface_output_object = origin_out_obj_surface;
717 vpp_gpe_ctx->forward_surf_sum = 1;
718 vpp_gpe_ctx->backward_surf_sum = 0;
720 vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/4;
721 vpp_gpe_ctx->thread_param_size = thr_param_size;
722 vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
723 *vpp_gpe_ctx->thread_num);
724 pos = vpp_gpe_ctx->thread_param;
727 return VA_STATUS_ERROR_ALLOCATION_FAILED;
730 for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
731 thr_param.base.v_pos = 4 * i;
732 thr_param.base.h_pos = 0;
733 memcpy(pos, &thr_param, thr_param_size);
734 pos += thr_param_size;
737 vpp_gpe_ctx->sub_shader_index = 2;
738 va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
739 free(vpp_gpe_ctx->thread_param);
744 return VA_STATUS_ERROR_INVALID_PARAMETER;
747 VAStatus vpp_gpe_process_picture(VADriverContextP ctx,
748 struct vpp_gpe_context * vpp_gpe_ctx)
750 VAStatus va_status = VA_STATUS_SUCCESS;
751 struct i965_driver_data *i965 = i965_driver_data(ctx);
752 VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
753 VAProcFilterParameterBuffer* filter = NULL;
755 struct object_surface *obj_surface = NULL;
757 if (pipe->num_filters && !pipe->filters)
760 for(i = 0; i < pipe->num_filters; i++){
761 struct object_buffer *obj_buf = BUFFER(pipe->filters[i]);
763 assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
766 !obj_buf->buffer_store ||
767 !obj_buf->buffer_store->buffer)
770 filter = (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
771 if(filter->type == VAProcFilterSharpening){
776 assert(pipe->num_forward_references + pipe->num_backward_references <= 4);
777 vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_pipeline_input_object;
779 vpp_gpe_ctx->forward_surf_sum = 0;
780 vpp_gpe_ctx->backward_surf_sum = 0;
782 for(i = 0; i < pipe->num_forward_references; i ++)
784 obj_surface = SURFACE(pipe->forward_references[i]);
787 vpp_gpe_ctx->surface_input_object[i + 1] = obj_surface;
788 vpp_gpe_ctx->forward_surf_sum++;
791 for(i = 0; i < pipe->num_backward_references; i ++)
793 obj_surface = SURFACE(pipe->backward_references[i]);
796 vpp_gpe_ctx->surface_input_object[vpp_gpe_ctx->forward_surf_sum + 1 + i ] = obj_surface;
797 vpp_gpe_ctx->backward_surf_sum++;
800 obj_surface = vpp_gpe_ctx->surface_input_object[0];
801 vpp_gpe_ctx->in_frame_w = obj_surface->orig_width;
802 vpp_gpe_ctx->in_frame_h = obj_surface->orig_height;
804 if(filter && filter->type == VAProcFilterSharpening) {
805 va_status = vpp_gpe_process_sharpening(ctx, vpp_gpe_ctx);
807 va_status = VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
810 vpp_gpe_ctx->is_first_frame = 0;
815 return VA_STATUS_ERROR_INVALID_PARAMETER;
819 vpp_gpe_context_destroy(VADriverContextP ctx,
820 struct vpp_gpe_context *vpp_gpe_ctx)
822 dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
823 vpp_gpe_ctx->vpp_batchbuffer.bo = NULL;
825 dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
826 vpp_gpe_ctx->vpp_kernel_return.bo = NULL;
828 vpp_gpe_ctx->gpe_context_destroy(&vpp_gpe_ctx->gpe_ctx);
830 if(vpp_gpe_ctx->surface_tmp != VA_INVALID_ID){
831 assert(vpp_gpe_ctx->surface_tmp_object != NULL);
832 i965_DestroySurfaces(ctx, &vpp_gpe_ctx->surface_tmp, 1);
833 vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
834 vpp_gpe_ctx->surface_tmp_object = NULL;
837 if (vpp_gpe_ctx->batch)
838 intel_batchbuffer_free(vpp_gpe_ctx->batch);
843 struct vpp_gpe_context *
844 vpp_gpe_context_init(VADriverContextP ctx)
846 struct i965_driver_data *i965 = i965_driver_data(ctx);
847 struct vpp_gpe_context *vpp_gpe_ctx = calloc(1, sizeof(struct vpp_gpe_context));
849 struct i965_gpe_context *gpe_ctx = &(vpp_gpe_ctx->gpe_ctx);
851 assert(IS_HASWELL(i965->intel.device_info) ||
852 IS_GEN8(i965->intel.device_info) ||
853 IS_GEN9(i965->intel.device_info));
855 vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
856 vpp_gpe_ctx->surface_tmp_object = NULL;
857 vpp_gpe_ctx->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0);
858 vpp_gpe_ctx->is_first_frame = 1;
860 gpe_ctx->vfe_state.max_num_threads = 60 - 1;
861 gpe_ctx->vfe_state.num_urb_entries = 16;
862 gpe_ctx->vfe_state.gpgpu_mode = 0;
863 gpe_ctx->vfe_state.urb_entry_size = 59 - 1;
864 gpe_ctx->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
866 if (IS_HASWELL(i965->intel.device_info)) {
867 vpp_gpe_ctx->gpe_context_init = i965_gpe_context_init;
868 vpp_gpe_ctx->gpe_context_destroy = i965_gpe_context_destroy;
869 vpp_gpe_ctx->gpe_load_kernels = i965_gpe_load_kernels;
870 gpe_ctx->surface_state_binding_table.length =
871 (SURFACE_STATE_PADDED_SIZE_GEN7 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
873 gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
874 gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
875 gpe_ctx->idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
877 } else if (IS_GEN8(i965->intel.device_info) ||
878 IS_GEN9(i965->intel.device_info)) {
879 vpp_gpe_ctx->gpe_context_init = gen8_gpe_context_init;
880 vpp_gpe_ctx->gpe_context_destroy = gen8_gpe_context_destroy;
881 vpp_gpe_ctx->gpe_load_kernels = gen8_gpe_load_kernels;
882 gpe_ctx->surface_state_binding_table.length =
883 (SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
885 gpe_ctx->curbe_size = CURBE_TOTAL_DATA_LENGTH;
886 gpe_ctx->idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;