2 * Copyright © 2016 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * Zhao, Yakui <yakui.zhao@intel.com>
37 #include "intel_batchbuffer.h"
38 #include "intel_driver.h"
40 #include "i965_defines.h"
41 #include "i965_drv_video.h"
42 #include "i965_encoder.h"
43 #include "gen9_vp9_encapi.h"
44 #include "gen9_vp9_encoder.h"
45 #include "gen9_vp9_encoder_kernels.h"
46 #include "vp9_probs.h"
47 #include "gen9_vp9_const_def.h"
49 #define MAX_VP9_ENCODER_FRAMERATE 60
50 #define MAX_VP9_ENCODER_SURFACES 64
52 #define MAX_URB_SIZE 4096 /* In register */
53 #define NUM_KERNELS_PER_GPE_CONTEXT 1
55 #define VP9_BRC_KBPS 1000
57 #define BRC_KERNEL_CBR 0x0010
58 #define BRC_KERNEL_VBR 0x0020
59 #define BRC_KERNEL_AVBR 0x0040
60 #define BRC_KERNEL_CQL 0x0080
62 #define VP9_PIC_STATE_BUFFER_SIZE 192
64 typedef struct _intel_kernel_header_ {
65 uint32_t reserved : 6;
66 uint32_t kernel_start_pointer : 26;
67 } intel_kernel_header;
69 typedef struct _intel_vp9_kernel_header {
71 intel_kernel_header PLY_DSCALE;
72 intel_kernel_header VP9_ME_P;
73 intel_kernel_header VP9_Enc_I_32x32;
74 intel_kernel_header VP9_Enc_I_16x16;
75 intel_kernel_header VP9_Enc_P;
76 intel_kernel_header VP9_Enc_TX;
77 intel_kernel_header VP9_DYS;
79 intel_kernel_header VP9BRC_Intra_Distortion;
80 intel_kernel_header VP9BRC_Init;
81 intel_kernel_header VP9BRC_Reset;
82 intel_kernel_header VP9BRC_Update;
83 } intel_vp9_kernel_header;
85 #define DYS_1X_FLAG 0x01
86 #define DYS_4X_FLAG 0x02
87 #define DYS_16X_FLAG 0x04
89 struct vp9_surface_param {
91 uint32_t frame_height;
94 static uint32_t intel_convert_sign_mag(int val, int sign_bit_pos)
99 ret_val = ((1 << (sign_bit_pos - 1)) | (val & ((1 << (sign_bit_pos - 1)) - 1)));
101 ret_val = val & ((1 << (sign_bit_pos - 1)) - 1);
107 intel_vp9_get_kernel_header_and_size(
110 INTEL_VP9_ENC_OPERATION operation,
112 struct i965_kernel *ret_kernel)
114 typedef uint32_t BIN_PTR[4];
117 intel_vp9_kernel_header *pkh_table;
118 intel_kernel_header *pcurr_header, *pinvalid_entry, *pnext_header;
121 if (!pvbinary || !ret_kernel)
124 bin_start = (char *)pvbinary;
125 pkh_table = (intel_vp9_kernel_header *)pvbinary;
126 pinvalid_entry = &(pkh_table->VP9BRC_Update) + 1;
127 next_krnoffset = binary_size;
129 if ((operation == INTEL_VP9_ENC_SCALING4X) || (operation == INTEL_VP9_ENC_SCALING2X)) {
130 pcurr_header = &pkh_table->PLY_DSCALE;
131 } else if (operation == INTEL_VP9_ENC_ME) {
132 pcurr_header = &pkh_table->VP9_ME_P;
133 } else if (operation == INTEL_VP9_ENC_MBENC) {
134 pcurr_header = &pkh_table->VP9_Enc_I_32x32;
135 } else if (operation == INTEL_VP9_ENC_DYS) {
136 pcurr_header = &pkh_table->VP9_DYS;
137 } else if (operation == INTEL_VP9_ENC_BRC) {
138 pcurr_header = &pkh_table->VP9BRC_Intra_Distortion;
143 pcurr_header += krnstate_idx;
144 ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
146 pnext_header = (pcurr_header + 1);
147 if (pnext_header < pinvalid_entry) {
148 next_krnoffset = pnext_header->kernel_start_pointer << 6;
150 ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
157 gen9_free_surfaces_vp9(void **data)
159 struct gen9_surface_vp9 *vp9_surface;
166 if (vp9_surface->scaled_4x_surface_obj) {
167 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->scaled_4x_surface_id, 1);
168 vp9_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
169 vp9_surface->scaled_4x_surface_obj = NULL;
172 if (vp9_surface->scaled_16x_surface_obj) {
173 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->scaled_16x_surface_id, 1);
174 vp9_surface->scaled_16x_surface_id = VA_INVALID_SURFACE;
175 vp9_surface->scaled_16x_surface_obj = NULL;
178 if (vp9_surface->dys_4x_surface_obj) {
179 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_4x_surface_id, 1);
180 vp9_surface->dys_4x_surface_id = VA_INVALID_SURFACE;
181 vp9_surface->dys_4x_surface_obj = NULL;
184 if (vp9_surface->dys_16x_surface_obj) {
185 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_16x_surface_id, 1);
186 vp9_surface->dys_16x_surface_id = VA_INVALID_SURFACE;
187 vp9_surface->dys_16x_surface_obj = NULL;
190 if (vp9_surface->dys_surface_obj) {
191 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_surface_id, 1);
192 vp9_surface->dys_surface_id = VA_INVALID_SURFACE;
193 vp9_surface->dys_surface_obj = NULL;
204 gen9_vp9_init_check_surfaces(VADriverContextP ctx,
205 struct object_surface *obj_surface,
206 struct vp9_surface_param *surface_param)
208 struct i965_driver_data *i965 = i965_driver_data(ctx);
209 struct gen9_surface_vp9 *vp9_surface;
210 int downscaled_width_4x, downscaled_height_4x;
211 int downscaled_width_16x, downscaled_height_16x;
213 if (!obj_surface || !obj_surface->bo)
214 return VA_STATUS_ERROR_INVALID_SURFACE;
216 if (obj_surface->private_data &&
217 obj_surface->free_private_data != gen9_free_surfaces_vp9) {
218 obj_surface->free_private_data(&obj_surface->private_data);
219 obj_surface->private_data = NULL;
222 if (obj_surface->private_data) {
223 /* if the frame width/height is already the same as the expected,
224 * it is unncessary to reallocate it.
226 vp9_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
227 if (vp9_surface->frame_width >= surface_param->frame_width ||
228 vp9_surface->frame_height >= surface_param->frame_height)
229 return VA_STATUS_SUCCESS;
231 obj_surface->free_private_data(&obj_surface->private_data);
232 obj_surface->private_data = NULL;
236 vp9_surface = calloc(1, sizeof(struct gen9_surface_vp9));
239 return VA_STATUS_ERROR_ALLOCATION_FAILED;
241 vp9_surface->ctx = ctx;
242 obj_surface->private_data = vp9_surface;
243 obj_surface->free_private_data = gen9_free_surfaces_vp9;
245 vp9_surface->frame_width = surface_param->frame_width;
246 vp9_surface->frame_height = surface_param->frame_height;
248 downscaled_width_4x = ALIGN(surface_param->frame_width / 4, 16);
249 downscaled_height_4x = ALIGN(surface_param->frame_height / 4, 16);
251 i965_CreateSurfaces(ctx,
253 downscaled_height_4x,
256 &vp9_surface->scaled_4x_surface_id);
258 vp9_surface->scaled_4x_surface_obj = SURFACE(vp9_surface->scaled_4x_surface_id);
260 if (!vp9_surface->scaled_4x_surface_obj) {
261 return VA_STATUS_ERROR_ALLOCATION_FAILED;
264 i965_check_alloc_surface_bo(ctx, vp9_surface->scaled_4x_surface_obj, 1,
265 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
267 downscaled_width_16x = ALIGN(surface_param->frame_width / 16, 16);
268 downscaled_height_16x = ALIGN(surface_param->frame_height / 16, 16);
269 i965_CreateSurfaces(ctx,
270 downscaled_width_16x,
271 downscaled_height_16x,
274 &vp9_surface->scaled_16x_surface_id);
275 vp9_surface->scaled_16x_surface_obj = SURFACE(vp9_surface->scaled_16x_surface_id);
277 if (!vp9_surface->scaled_16x_surface_obj) {
278 return VA_STATUS_ERROR_ALLOCATION_FAILED;
281 i965_check_alloc_surface_bo(ctx, vp9_surface->scaled_16x_surface_obj, 1,
282 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
284 return VA_STATUS_SUCCESS;
288 gen9_vp9_check_dys_surfaces(VADriverContextP ctx,
289 struct object_surface *obj_surface,
290 struct vp9_surface_param *surface_param)
292 struct i965_driver_data *i965 = i965_driver_data(ctx);
293 struct gen9_surface_vp9 *vp9_surface;
294 int dys_width_4x, dys_height_4x;
295 int dys_width_16x, dys_height_16x;
297 /* As this is handled after the surface checking, it is unnecessary
298 * to check the surface bo and vp9_priv_surface again
301 vp9_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
304 return VA_STATUS_ERROR_INVALID_SURFACE;
306 /* if the frame_width/height of dys_surface is the same as
307 * the expected, it is unnecessary to allocate it again
309 if (vp9_surface->dys_frame_width == surface_param->frame_width &&
310 vp9_surface->dys_frame_width == surface_param->frame_width)
311 return VA_STATUS_SUCCESS;
313 if (vp9_surface->dys_4x_surface_obj) {
314 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_4x_surface_id, 1);
315 vp9_surface->dys_4x_surface_id = VA_INVALID_SURFACE;
316 vp9_surface->dys_4x_surface_obj = NULL;
319 if (vp9_surface->dys_16x_surface_obj) {
320 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_16x_surface_id, 1);
321 vp9_surface->dys_16x_surface_id = VA_INVALID_SURFACE;
322 vp9_surface->dys_16x_surface_obj = NULL;
325 if (vp9_surface->dys_surface_obj) {
326 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_surface_id, 1);
327 vp9_surface->dys_surface_id = VA_INVALID_SURFACE;
328 vp9_surface->dys_surface_obj = NULL;
331 vp9_surface->dys_frame_width = surface_param->frame_width;
332 vp9_surface->dys_frame_height = surface_param->frame_height;
334 i965_CreateSurfaces(ctx,
335 surface_param->frame_width,
336 surface_param->frame_height,
339 &vp9_surface->dys_surface_id);
340 vp9_surface->dys_surface_obj = SURFACE(vp9_surface->dys_surface_id);
342 if (!vp9_surface->dys_surface_obj) {
343 return VA_STATUS_ERROR_ALLOCATION_FAILED;
346 i965_check_alloc_surface_bo(ctx, vp9_surface->dys_surface_obj, 1,
347 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
349 dys_width_4x = ALIGN(surface_param->frame_width / 4, 16);
350 dys_height_4x = ALIGN(surface_param->frame_width / 4, 16);
352 i965_CreateSurfaces(ctx,
357 &vp9_surface->dys_4x_surface_id);
359 vp9_surface->dys_4x_surface_obj = SURFACE(vp9_surface->dys_4x_surface_id);
361 if (!vp9_surface->dys_4x_surface_obj) {
362 return VA_STATUS_ERROR_ALLOCATION_FAILED;
365 i965_check_alloc_surface_bo(ctx, vp9_surface->dys_4x_surface_obj, 1,
366 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
368 dys_width_16x = ALIGN(surface_param->frame_width / 16, 16);
369 dys_height_16x = ALIGN(surface_param->frame_width / 16, 16);
370 i965_CreateSurfaces(ctx,
375 &vp9_surface->dys_16x_surface_id);
376 vp9_surface->dys_16x_surface_obj = SURFACE(vp9_surface->dys_16x_surface_id);
378 if (!vp9_surface->dys_16x_surface_obj) {
379 return VA_STATUS_ERROR_ALLOCATION_FAILED;
382 i965_check_alloc_surface_bo(ctx, vp9_surface->dys_16x_surface_obj, 1,
383 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
385 return VA_STATUS_SUCCESS;
389 gen9_vp9_allocate_resources(VADriverContextP ctx,
390 struct encode_state *encode_state,
391 struct intel_encoder_context *encoder_context,
394 struct i965_driver_data *i965 = i965_driver_data(ctx);
395 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
396 struct gen9_vp9_state *vp9_state;
397 int allocate_flag, i;
399 uint32_t frame_width_in_sb, frame_height_in_sb, frame_sb_num;
400 unsigned int width, height;
402 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
404 if (!vp9_state || !vp9_state->pic_param)
405 return VA_STATUS_ERROR_INVALID_PARAMETER;
407 /* the buffer related with BRC is not changed. So it is allocated
408 * based on the input parameter
411 i965_free_gpe_resource(&vme_context->res_brc_history_buffer);
412 i965_free_gpe_resource(&vme_context->res_brc_const_data_buffer);
413 i965_free_gpe_resource(&vme_context->res_brc_mbenc_curbe_write_buffer);
414 i965_free_gpe_resource(&vme_context->res_pic_state_brc_read_buffer);
415 i965_free_gpe_resource(&vme_context->res_pic_state_brc_write_hfw_read_buffer);
416 i965_free_gpe_resource(&vme_context->res_pic_state_hfw_write_buffer);
417 i965_free_gpe_resource(&vme_context->res_seg_state_brc_read_buffer);
418 i965_free_gpe_resource(&vme_context->res_seg_state_brc_write_buffer);
419 i965_free_gpe_resource(&vme_context->res_brc_bitstream_size_buffer);
420 i965_free_gpe_resource(&vme_context->res_brc_hfw_data_buffer);
421 i965_free_gpe_resource(&vme_context->res_brc_mmdk_pak_buffer);
423 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
424 &vme_context->res_brc_history_buffer,
425 VP9_BRC_HISTORY_BUFFER_SIZE,
426 "Brc History buffer");
428 goto failed_allocation;
429 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
430 &vme_context->res_brc_const_data_buffer,
431 VP9_BRC_CONSTANTSURFACE_SIZE,
432 "Brc Constant buffer");
434 goto failed_allocation;
436 res_size = ALIGN(sizeof(vp9_mbenc_curbe_data), 64) + 128 +
437 ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * NUM_VP9_MBENC;
438 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
439 &vme_context->res_brc_mbenc_curbe_write_buffer,
443 goto failed_allocation;
445 res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
446 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
447 &vme_context->res_pic_state_brc_read_buffer,
449 "Pic State Brc_read");
451 goto failed_allocation;
453 res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
454 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
455 &vme_context->res_pic_state_brc_write_hfw_read_buffer,
457 "Pic State Brc_write Hfw_Read");
459 goto failed_allocation;
461 res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
462 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
463 &vme_context->res_pic_state_hfw_write_buffer,
465 "Pic State Hfw Write");
467 goto failed_allocation;
469 res_size = VP9_SEGMENT_STATE_BUFFER_SIZE;
470 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
471 &vme_context->res_seg_state_brc_read_buffer,
473 "Segment state brc_read");
475 goto failed_allocation;
477 res_size = VP9_SEGMENT_STATE_BUFFER_SIZE;
478 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
479 &vme_context->res_seg_state_brc_write_buffer,
481 "Segment state brc_write");
483 goto failed_allocation;
485 res_size = VP9_BRC_BITSTREAM_SIZE_BUFFER_SIZE;
486 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
487 &vme_context->res_brc_bitstream_size_buffer,
489 "Brc bitstream buffer");
491 goto failed_allocation;
493 res_size = VP9_HFW_BRC_DATA_BUFFER_SIZE;
494 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
495 &vme_context->res_brc_hfw_data_buffer,
499 goto failed_allocation;
501 res_size = VP9_BRC_MMDK_PAK_BUFFER_SIZE;
502 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
503 &vme_context->res_brc_mmdk_pak_buffer,
507 goto failed_allocation;
510 /* If the width/height of allocated buffer is greater than the expected,
511 * it is unnecessary to allocate it again
513 if (vp9_state->res_width >= vp9_state->frame_width &&
514 vp9_state->res_height >= vp9_state->frame_height) {
516 return VA_STATUS_SUCCESS;
518 frame_width_in_sb = ALIGN(vp9_state->frame_width, 64) / 64;
519 frame_height_in_sb = ALIGN(vp9_state->frame_height, 64) / 64;
520 frame_sb_num = frame_width_in_sb * frame_height_in_sb;
522 i965_free_gpe_resource(&vme_context->res_hvd_line_buffer);
523 res_size = frame_width_in_sb * 64;
524 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
525 &vme_context->res_hvd_line_buffer,
527 "VP9 hvd line line");
529 goto failed_allocation;
531 i965_free_gpe_resource(&vme_context->res_hvd_tile_line_buffer);
532 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
533 &vme_context->res_hvd_tile_line_buffer,
535 "VP9 hvd tile_line line");
537 goto failed_allocation;
539 i965_free_gpe_resource(&vme_context->res_deblocking_filter_line_buffer);
540 res_size = frame_width_in_sb * 18 * 64;
541 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
542 &vme_context->res_deblocking_filter_line_buffer,
544 "VP9 deblocking filter line");
546 goto failed_allocation;
548 i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_line_buffer);
549 res_size = frame_width_in_sb * 18 * 64;
550 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
551 &vme_context->res_deblocking_filter_tile_line_buffer,
553 "VP9 deblocking tile line");
555 goto failed_allocation;
557 i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_col_buffer);
558 res_size = frame_height_in_sb * 17 * 64;
559 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
560 &vme_context->res_deblocking_filter_tile_col_buffer,
562 "VP9 deblocking tile col");
564 goto failed_allocation;
566 i965_free_gpe_resource(&vme_context->res_metadata_line_buffer);
567 res_size = frame_width_in_sb * 5 * 64;
568 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
569 &vme_context->res_metadata_line_buffer,
571 "VP9 metadata line");
573 goto failed_allocation;
575 i965_free_gpe_resource(&vme_context->res_metadata_tile_line_buffer);
576 res_size = frame_width_in_sb * 5 * 64;
577 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
578 &vme_context->res_metadata_tile_line_buffer,
580 "VP9 metadata tile line");
582 goto failed_allocation;
584 i965_free_gpe_resource(&vme_context->res_metadata_tile_col_buffer);
585 res_size = frame_height_in_sb * 5 * 64;
586 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
587 &vme_context->res_metadata_tile_col_buffer,
589 "VP9 metadata tile col");
591 goto failed_allocation;
593 i965_free_gpe_resource(&vme_context->res_prob_buffer);
595 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
596 &vme_context->res_prob_buffer,
600 goto failed_allocation;
602 i965_free_gpe_resource(&vme_context->res_segmentid_buffer);
603 res_size = frame_sb_num * 64;
604 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
605 &vme_context->res_segmentid_buffer,
609 goto failed_allocation;
611 i965_zero_gpe_resource(&vme_context->res_segmentid_buffer);
613 i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
615 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
616 &vme_context->res_prob_delta_buffer,
620 goto failed_allocation;
622 i965_zero_gpe_resource(&vme_context->res_segmentid_buffer);
624 i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
626 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
627 &vme_context->res_prob_delta_buffer,
631 goto failed_allocation;
633 i965_free_gpe_resource(&vme_context->res_compressed_input_buffer);
635 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
636 &vme_context->res_compressed_input_buffer,
638 "VP9 compressed_input buffer");
640 goto failed_allocation;
642 i965_free_gpe_resource(&vme_context->res_prob_counter_buffer);
644 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
645 &vme_context->res_prob_counter_buffer,
649 goto failed_allocation;
651 i965_free_gpe_resource(&vme_context->res_tile_record_streamout_buffer);
652 res_size = frame_sb_num * 64;
653 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
654 &vme_context->res_tile_record_streamout_buffer,
656 "VP9 tile record stream_out");
658 goto failed_allocation;
660 i965_free_gpe_resource(&vme_context->res_cu_stat_streamout_buffer);
661 res_size = frame_sb_num * 64;
662 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
663 &vme_context->res_cu_stat_streamout_buffer,
665 "VP9 CU stat stream_out");
667 goto failed_allocation;
669 width = vp9_state->downscaled_width_4x_in_mb * 32;
670 height = vp9_state->downscaled_height_4x_in_mb * 16;
671 i965_free_gpe_resource(&vme_context->s4x_memv_data_buffer);
672 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
673 &vme_context->s4x_memv_data_buffer,
678 goto failed_allocation;
680 width = vp9_state->downscaled_width_4x_in_mb * 8;
681 height = vp9_state->downscaled_height_4x_in_mb * 16;
682 i965_free_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
683 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
684 &vme_context->s4x_memv_distortion_buffer,
687 "VP9 4x MEMV distorion");
689 goto failed_allocation;
691 width = ALIGN(vp9_state->downscaled_width_16x_in_mb * 32, 64);
692 height = vp9_state->downscaled_height_16x_in_mb * 16;
693 i965_free_gpe_resource(&vme_context->s16x_memv_data_buffer);
694 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
695 &vme_context->s16x_memv_data_buffer,
698 "VP9 16x MEMV data");
700 goto failed_allocation;
702 width = vp9_state->frame_width_in_mb * 16;
703 height = vp9_state->frame_height_in_mb * 8;
704 i965_free_gpe_resource(&vme_context->res_output_16x16_inter_modes);
705 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
706 &vme_context->res_output_16x16_inter_modes,
709 "VP9 output inter_mode");
711 goto failed_allocation;
713 res_size = vp9_state->frame_width_in_mb * vp9_state->frame_height_in_mb *
715 for (i = 0; i < 2; i++) {
716 i965_free_gpe_resource(&vme_context->res_mode_decision[i]);
717 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
718 &vme_context->res_mode_decision[i],
720 "VP9 mode decision");
722 goto failed_allocation;
726 res_size = frame_sb_num * 9 * 64;
727 for (i = 0; i < 2; i++) {
728 i965_free_gpe_resource(&vme_context->res_mv_temporal_buffer[i]);
729 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
730 &vme_context->res_mv_temporal_buffer[i],
734 goto failed_allocation;
737 vp9_state->mb_data_offset = ALIGN(frame_sb_num * 16, 4096) + 4096;
738 res_size = vp9_state->mb_data_offset + frame_sb_num * 64 * 64 + 1000;
739 i965_free_gpe_resource(&vme_context->res_mb_code_surface);
740 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
741 &vme_context->res_mb_code_surface,
742 ALIGN(res_size, 4096),
743 "VP9 mb_code surface");
745 goto failed_allocation;
748 i965_free_gpe_resource(&vme_context->res_pak_uncompressed_input_buffer);
749 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
750 &vme_context->res_pak_uncompressed_input_buffer,
751 ALIGN(res_size, 4096),
752 "VP9 pak_uncompressed_input");
754 goto failed_allocation;
756 if (!vme_context->frame_header_data) {
757 /* allocate 512 bytes for generating the uncompressed header */
758 vme_context->frame_header_data = calloc(1, 512);
761 vp9_state->res_width = vp9_state->frame_width;
762 vp9_state->res_height = vp9_state->frame_height;
764 return VA_STATUS_SUCCESS;
767 return VA_STATUS_ERROR_ALLOCATION_FAILED;
771 gen9_vp9_free_resources(struct gen9_encoder_context_vp9 *vme_context)
774 struct gen9_vp9_state *vp9_state = (struct gen9_vp9_state *) vme_context->enc_priv_state;
776 if (vp9_state->brc_enabled) {
777 i965_free_gpe_resource(&vme_context->res_brc_history_buffer);
778 i965_free_gpe_resource(&vme_context->res_brc_const_data_buffer);
779 i965_free_gpe_resource(&vme_context->res_brc_mbenc_curbe_write_buffer);
780 i965_free_gpe_resource(&vme_context->res_pic_state_brc_read_buffer);
781 i965_free_gpe_resource(&vme_context->res_pic_state_brc_write_hfw_read_buffer);
782 i965_free_gpe_resource(&vme_context->res_pic_state_hfw_write_buffer);
783 i965_free_gpe_resource(&vme_context->res_seg_state_brc_read_buffer);
784 i965_free_gpe_resource(&vme_context->res_seg_state_brc_write_buffer);
785 i965_free_gpe_resource(&vme_context->res_brc_bitstream_size_buffer);
786 i965_free_gpe_resource(&vme_context->res_brc_hfw_data_buffer);
787 i965_free_gpe_resource(&vme_context->res_brc_mmdk_pak_buffer);
790 i965_free_gpe_resource(&vme_context->res_hvd_line_buffer);
791 i965_free_gpe_resource(&vme_context->res_hvd_tile_line_buffer);
792 i965_free_gpe_resource(&vme_context->res_deblocking_filter_line_buffer);
793 i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_line_buffer);
794 i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_col_buffer);
795 i965_free_gpe_resource(&vme_context->res_metadata_line_buffer);
796 i965_free_gpe_resource(&vme_context->res_metadata_tile_line_buffer);
797 i965_free_gpe_resource(&vme_context->res_metadata_tile_col_buffer);
798 i965_free_gpe_resource(&vme_context->res_prob_buffer);
799 i965_free_gpe_resource(&vme_context->res_segmentid_buffer);
800 i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
801 i965_free_gpe_resource(&vme_context->res_prob_counter_buffer);
802 i965_free_gpe_resource(&vme_context->res_tile_record_streamout_buffer);
803 i965_free_gpe_resource(&vme_context->res_cu_stat_streamout_buffer);
804 i965_free_gpe_resource(&vme_context->s4x_memv_data_buffer);
805 i965_free_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
806 i965_free_gpe_resource(&vme_context->s16x_memv_data_buffer);
807 i965_free_gpe_resource(&vme_context->res_output_16x16_inter_modes);
808 for (i = 0; i < 2; i++) {
809 i965_free_gpe_resource(&vme_context->res_mode_decision[i]);
812 for (i = 0; i < 2; i++) {
813 i965_free_gpe_resource(&vme_context->res_mv_temporal_buffer[i]);
816 i965_free_gpe_resource(&vme_context->res_compressed_input_buffer);
817 i965_free_gpe_resource(&vme_context->res_mb_code_surface);
818 i965_free_gpe_resource(&vme_context->res_pak_uncompressed_input_buffer);
820 if (vme_context->frame_header_data) {
821 free(vme_context->frame_header_data);
822 vme_context->frame_header_data = NULL;
828 gen9_init_media_object_walker_parameter(struct intel_encoder_context *encoder_context,
829 struct gpe_encoder_kernel_walker_parameter *kernel_walker_param,
830 struct gpe_media_object_walker_parameter *walker_param)
832 memset(walker_param, 0, sizeof(*walker_param));
834 walker_param->use_scoreboard = kernel_walker_param->use_scoreboard;
836 walker_param->block_resolution.x = kernel_walker_param->resolution_x;
837 walker_param->block_resolution.y = kernel_walker_param->resolution_y;
839 walker_param->global_resolution.x = kernel_walker_param->resolution_x;
840 walker_param->global_resolution.y = kernel_walker_param->resolution_y;
842 walker_param->global_outer_loop_stride.x = kernel_walker_param->resolution_x;
843 walker_param->global_outer_loop_stride.y = 0;
845 walker_param->global_inner_loop_unit.x = 0;
846 walker_param->global_inner_loop_unit.y = kernel_walker_param->resolution_y;
848 walker_param->local_loop_exec_count = 0xFFFF; //MAX VALUE
849 walker_param->global_loop_exec_count = 0xFFFF; //MAX VALUE
851 if (kernel_walker_param->no_dependency) {
852 walker_param->scoreboard_mask = 0;
853 walker_param->use_scoreboard = 0;
854 // Raster scan walking pattern
855 walker_param->local_outer_loop_stride.x = 0;
856 walker_param->local_outer_loop_stride.y = 1;
857 walker_param->local_inner_loop_unit.x = 1;
858 walker_param->local_inner_loop_unit.y = 0;
859 walker_param->local_end.x = kernel_walker_param->resolution_x - 1;
860 walker_param->local_end.y = 0;
862 walker_param->local_end.x = 0;
863 walker_param->local_end.y = 0;
865 if (kernel_walker_param->walker_degree == VP9_45Z_DEGREE) {
867 walker_param->scoreboard_mask = 0x0F;
869 walker_param->global_loop_exec_count = 0x3FF;
870 walker_param->local_loop_exec_count = 0x3FF;
872 walker_param->global_resolution.x = (unsigned int)(kernel_walker_param->resolution_x / 2.f) + 1;
873 walker_param->global_resolution.y = 2 * kernel_walker_param->resolution_y;
875 walker_param->global_start.x = 0;
876 walker_param->global_start.y = 0;
878 walker_param->global_outer_loop_stride.x = walker_param->global_resolution.x;
879 walker_param->global_outer_loop_stride.y = 0;
881 walker_param->global_inner_loop_unit.x = 0;
882 walker_param->global_inner_loop_unit.y = walker_param->global_resolution.y;
884 walker_param->block_resolution.x = walker_param->global_resolution.x;
885 walker_param->block_resolution.y = walker_param->global_resolution.y;
887 walker_param->local_start.x = 0;
888 walker_param->local_start.y = 0;
890 walker_param->local_outer_loop_stride.x = 1;
891 walker_param->local_outer_loop_stride.y = 0;
893 walker_param->local_inner_loop_unit.x = -1;
894 walker_param->local_inner_loop_unit.y = 4;
896 walker_param->middle_loop_extra_steps = 3;
897 walker_param->mid_loop_unit_x = 0;
898 walker_param->mid_loop_unit_y = 1;
901 walker_param->scoreboard_mask = 0x0F;
902 walker_param->local_outer_loop_stride.x = 1;
903 walker_param->local_outer_loop_stride.y = 0;
904 walker_param->local_inner_loop_unit.x = -2;
905 walker_param->local_inner_loop_unit.y = 1;
911 gen9_run_kernel_media_object(VADriverContextP ctx,
912 struct intel_encoder_context *encoder_context,
913 struct i965_gpe_context *gpe_context,
915 struct gpe_media_object_parameter *param)
917 struct intel_batchbuffer *batch = encoder_context->base.batch;
918 struct vp9_encode_status_buffer_internal *status_buffer;
919 struct gen9_vp9_state *vp9_state;
920 struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
922 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
923 if (!vp9_state || !batch)
926 intel_batchbuffer_start_atomic(batch, 0x1000);
928 status_buffer = &(vp9_state->status_buffer);
929 memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
930 mi_store_data_imm.bo = status_buffer->bo;
931 mi_store_data_imm.offset = status_buffer->media_index_offset;
932 mi_store_data_imm.dw0 = media_function;
933 gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
935 intel_batchbuffer_emit_mi_flush(batch);
936 gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
937 gen8_gpe_media_object(ctx, gpe_context, batch, param);
938 gen8_gpe_media_state_flush(ctx, gpe_context, batch);
940 gen9_gpe_pipeline_end(ctx, gpe_context, batch);
942 intel_batchbuffer_end_atomic(batch);
944 intel_batchbuffer_flush(batch);
948 gen9_run_kernel_media_object_walker(VADriverContextP ctx,
949 struct intel_encoder_context *encoder_context,
950 struct i965_gpe_context *gpe_context,
952 struct gpe_media_object_walker_parameter *param)
954 struct intel_batchbuffer *batch = encoder_context->base.batch;
955 struct vp9_encode_status_buffer_internal *status_buffer;
956 struct gen9_vp9_state *vp9_state;
957 struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
959 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
960 if (!vp9_state || !batch)
963 intel_batchbuffer_start_atomic(batch, 0x1000);
965 intel_batchbuffer_emit_mi_flush(batch);
967 status_buffer = &(vp9_state->status_buffer);
968 memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
969 mi_store_data_imm.bo = status_buffer->bo;
970 mi_store_data_imm.offset = status_buffer->media_index_offset;
971 mi_store_data_imm.dw0 = media_function;
972 gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
974 gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
975 gen8_gpe_media_object_walker(ctx, gpe_context, batch, param);
976 gen8_gpe_media_state_flush(ctx, gpe_context, batch);
978 gen9_gpe_pipeline_end(ctx, gpe_context, batch);
980 intel_batchbuffer_end_atomic(batch);
982 intel_batchbuffer_flush(batch);
986 void gen9_vp9_set_curbe_brc(VADriverContextP ctx,
987 struct encode_state *encode_state,
988 struct i965_gpe_context *gpe_context,
989 struct intel_encoder_context *encoder_context,
990 struct gen9_vp9_brc_curbe_param *param)
992 VAEncSequenceParameterBufferVP9 *seq_param;
993 VAEncPictureParameterBufferVP9 *pic_param;
994 VAEncMiscParameterTypeVP9PerSegmantParam *segment_param;
995 vp9_brc_curbe_data *cmd;
996 double dbps_ratio, dInputBitsPerFrame;
997 struct gen9_vp9_state *vp9_state;
999 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1001 pic_param = param->ppic_param;
1002 seq_param = param->pseq_param;
1003 segment_param = param->psegment_param;
1005 cmd = i965_gpe_context_map_curbe(gpe_context);
1010 memset(cmd, 0, sizeof(vp9_brc_curbe_data));
1012 if (!vp9_state->dys_enabled) {
1013 cmd->dw0.frame_width = pic_param->frame_width_src;
1014 cmd->dw0.frame_height = pic_param->frame_height_src;
1016 cmd->dw0.frame_width = pic_param->frame_width_dst;
1017 cmd->dw0.frame_height = pic_param->frame_height_dst;
1020 cmd->dw1.frame_type = vp9_state->picture_coding_type;
1021 cmd->dw1.segmentation_enable = 0;
1022 cmd->dw1.ref_frame_flags = vp9_state->ref_frame_flag;
1023 cmd->dw1.num_tlevels = 1;
1025 switch (param->media_state_type) {
1026 case VP9_MEDIA_STATE_BRC_INIT_RESET: {
1027 cmd->dw3.max_level_ratiot0 = 0;
1028 cmd->dw3.max_level_ratiot1 = 0;
1029 cmd->dw3.max_level_ratiot2 = 0;
1030 cmd->dw3.max_level_ratiot3 = 0;
1032 cmd->dw4.profile_level_max_frame = seq_param->max_frame_width *
1033 seq_param->max_frame_height;
1034 cmd->dw5.init_buf_fullness = vp9_state->init_vbv_buffer_fullness_in_bit;
1035 cmd->dw6.buf_size = vp9_state->vbv_buffer_size_in_bit;
1036 cmd->dw7.target_bit_rate = (vp9_state->target_bit_rate + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1038 cmd->dw8.max_bit_rate = (vp9_state->max_bit_rate + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1040 cmd->dw9.min_bit_rate = (vp9_state->min_bit_rate + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1042 cmd->dw10.frame_ratem = vp9_state->framerate.num;
1043 cmd->dw11.frame_rated = vp9_state->framerate.den;
1045 cmd->dw14.avbr_accuracy = 30;
1046 cmd->dw14.avbr_convergence = 150;
1048 if (encoder_context->rate_control_mode == VA_RC_CBR) {
1049 cmd->dw12.brc_flag = BRC_KERNEL_CBR;
1050 cmd->dw8.max_bit_rate = cmd->dw7.target_bit_rate;
1051 cmd->dw9.min_bit_rate = 0;
1052 } else if (encoder_context->rate_control_mode == VA_RC_VBR) {
1053 cmd->dw12.brc_flag = BRC_KERNEL_VBR;
1055 cmd->dw12.brc_flag = BRC_KERNEL_CQL;
1056 cmd->dw16.cq_level = 30;
1058 cmd->dw12.gopp = seq_param->intra_period - 1;
1060 cmd->dw13.init_frame_width = pic_param->frame_width_src;
1061 cmd->dw13.init_frame_height = pic_param->frame_height_src;
1063 cmd->dw15.min_qp = 1;
1064 cmd->dw15.max_qp = 255;
1066 cmd->dw16.cq_level = 30;
1068 cmd->dw17.enable_dynamic_scaling = vp9_state->dys_in_use;
1069 cmd->dw17.brc_overshoot_cbr_pct = 150;
1071 dInputBitsPerFrame = (double)cmd->dw8.max_bit_rate * (double)vp9_state->framerate.den / (double)vp9_state->framerate.num;
1072 dbps_ratio = dInputBitsPerFrame / ((double)vp9_state->vbv_buffer_size_in_bit / 30.0);
1073 if (dbps_ratio < 0.1)
1075 if (dbps_ratio > 3.5)
1078 *param->pbrc_init_reset_buf_size_in_bits = cmd->dw6.buf_size;
1079 *param->pbrc_init_reset_input_bits_per_frame = dInputBitsPerFrame;
1080 *param->pbrc_init_current_target_buf_full_in_bits = cmd->dw6.buf_size >> 1;
1082 cmd->dw18.pframe_deviation_threshold0 = (uint32_t)(-50 * pow(0.90, dbps_ratio));
1083 cmd->dw18.pframe_deviation_threshold1 = (uint32_t)(-50 * pow(0.66, dbps_ratio));
1084 cmd->dw18.pframe_deviation_threshold2 = (uint32_t)(-50 * pow(0.46, dbps_ratio));
1085 cmd->dw18.pframe_deviation_threshold3 = (uint32_t)(-50 * pow(0.3, dbps_ratio));
1086 cmd->dw19.pframe_deviation_threshold4 = (uint32_t)(50 * pow(0.3, dbps_ratio));
1087 cmd->dw19.pframe_deviation_threshold5 = (uint32_t)(50 * pow(0.46, dbps_ratio));
1088 cmd->dw19.pframe_deviation_threshold6 = (uint32_t)(50 * pow(0.7, dbps_ratio));
1089 cmd->dw19.pframe_deviation_threshold7 = (uint32_t)(50 * pow(0.9, dbps_ratio));
1091 cmd->dw20.vbr_deviation_threshold0 = (uint32_t)(-50 * pow(0.9, dbps_ratio));
1092 cmd->dw20.vbr_deviation_threshold1 = (uint32_t)(-50 * pow(0.7, dbps_ratio));
1093 cmd->dw20.vbr_deviation_threshold2 = (uint32_t)(-50 * pow(0.5, dbps_ratio));
1094 cmd->dw20.vbr_deviation_threshold3 = (uint32_t)(-50 * pow(0.3, dbps_ratio));
1095 cmd->dw21.vbr_deviation_threshold4 = (uint32_t)(100 * pow(0.4, dbps_ratio));
1096 cmd->dw21.vbr_deviation_threshold5 = (uint32_t)(100 * pow(0.5, dbps_ratio));
1097 cmd->dw21.vbr_deviation_threshold6 = (uint32_t)(100 * pow(0.75, dbps_ratio));
1098 cmd->dw21.vbr_deviation_threshold7 = (uint32_t)(100 * pow(0.9, dbps_ratio));
1100 cmd->dw22.kframe_deviation_threshold0 = (uint32_t)(-50 * pow(0.8, dbps_ratio));
1101 cmd->dw22.kframe_deviation_threshold1 = (uint32_t)(-50 * pow(0.6, dbps_ratio));
1102 cmd->dw22.kframe_deviation_threshold2 = (uint32_t)(-50 * pow(0.34, dbps_ratio));
1103 cmd->dw22.kframe_deviation_threshold3 = (uint32_t)(-50 * pow(0.2, dbps_ratio));
1104 cmd->dw23.kframe_deviation_threshold4 = (uint32_t)(50 * pow(0.2, dbps_ratio));
1105 cmd->dw23.kframe_deviation_threshold5 = (uint32_t)(50 * pow(0.4, dbps_ratio));
1106 cmd->dw23.kframe_deviation_threshold6 = (uint32_t)(50 * pow(0.66, dbps_ratio));
1107 cmd->dw23.kframe_deviation_threshold7 = (uint32_t)(50 * pow(0.9, dbps_ratio));
1111 case VP9_MEDIA_STATE_BRC_UPDATE: {
1112 cmd->dw15.min_qp = 1;
1113 cmd->dw15.max_qp = 255;
1115 cmd->dw25.frame_number = param->frame_number;
1117 // Used in dynamic scaling. set to zero for now
1118 cmd->dw27.hrd_buffer_fullness_upper_limit = 0;
1119 cmd->dw28.hrd_buffer_fullness_lower_limit = 0;
1121 if (pic_param->pic_flags.bits.segmentation_enabled) {
1122 cmd->dw32.seg_delta_qp0 = segment_param->seg_data[0].segment_qindex_delta;
1123 cmd->dw32.seg_delta_qp1 = segment_param->seg_data[1].segment_qindex_delta;
1124 cmd->dw32.seg_delta_qp2 = segment_param->seg_data[2].segment_qindex_delta;
1125 cmd->dw32.seg_delta_qp3 = segment_param->seg_data[3].segment_qindex_delta;
1127 cmd->dw33.seg_delta_qp4 = segment_param->seg_data[4].segment_qindex_delta;
1128 cmd->dw33.seg_delta_qp5 = segment_param->seg_data[5].segment_qindex_delta;
1129 cmd->dw33.seg_delta_qp6 = segment_param->seg_data[6].segment_qindex_delta;
1130 cmd->dw33.seg_delta_qp7 = segment_param->seg_data[7].segment_qindex_delta;
1133 //cmd->dw34.temporal_id = pPicParams->temporal_idi;
1134 cmd->dw34.temporal_id = 0;
1135 cmd->dw34.multi_ref_qp_check = param->multi_ref_qp_check;
1137 cmd->dw35.max_num_pak_passes = param->brc_num_pak_passes;
1138 cmd->dw35.sync_async = 0;
1139 cmd->dw35.mbrc = param->mbbrc_enabled;
1140 if (*param->pbrc_init_current_target_buf_full_in_bits >
1141 ((double)(*param->pbrc_init_reset_buf_size_in_bits))) {
1142 *param->pbrc_init_current_target_buf_full_in_bits -=
1143 (double)(*param->pbrc_init_reset_buf_size_in_bits);
1144 cmd->dw35.overflow = 1;
1146 cmd->dw35.overflow = 0;
1148 cmd->dw24.target_size = (uint32_t)(*param->pbrc_init_current_target_buf_full_in_bits);
1150 cmd->dw36.segmentation = pic_param->pic_flags.bits.segmentation_enabled;
1152 *param->pbrc_init_current_target_buf_full_in_bits += *param->pbrc_init_reset_input_bits_per_frame;
1154 cmd->dw38.qdelta_ydc = pic_param->luma_dc_qindex_delta;
1155 cmd->dw38.qdelta_uvdc = pic_param->chroma_dc_qindex_delta;
1156 cmd->dw38.qdelta_uvac = pic_param->chroma_ac_qindex_delta;
1160 case VP9_MEDIA_STATE_ENC_I_FRAME_DIST:
1161 cmd->dw2.intra_mode_disable = 0;
1167 cmd->dw48.brc_y4x_input_bti = VP9_BTI_BRC_SRCY4X_G9;
1168 cmd->dw49.brc_vme_coarse_intra_input_bti = VP9_BTI_BRC_VME_COARSE_INTRA_G9;
1169 cmd->dw50.brc_history_buffer_bti = VP9_BTI_BRC_HISTORY_G9;
1170 cmd->dw51.brc_const_data_input_bti = VP9_BTI_BRC_CONSTANT_DATA_G9;
1171 cmd->dw52.brc_distortion_bti = VP9_BTI_BRC_DISTORTION_G9;
1172 cmd->dw53.brc_mmdk_pak_output_bti = VP9_BTI_BRC_MMDK_PAK_OUTPUT_G9;
1173 cmd->dw54.brc_enccurbe_input_bti = VP9_BTI_BRC_MBENC_CURBE_INPUT_G9;
1174 cmd->dw55.brc_enccurbe_output_bti = VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9;
1175 cmd->dw56.brc_pic_state_input_bti = VP9_BTI_BRC_PIC_STATE_INPUT_G9;
1176 cmd->dw57.brc_pic_state_output_bti = VP9_BTI_BRC_PIC_STATE_OUTPUT_G9;
1177 cmd->dw58.brc_seg_state_input_bti = VP9_BTI_BRC_SEGMENT_STATE_INPUT_G9;
1178 cmd->dw59.brc_seg_state_output_bti = VP9_BTI_BRC_SEGMENT_STATE_OUTPUT_G9;
1179 cmd->dw60.brc_bitstream_size_data_bti = VP9_BTI_BRC_BITSTREAM_SIZE_G9;
1180 cmd->dw61.brc_hfw_data_output_bti = VP9_BTI_BRC_HFW_DATA_G9;
1182 i965_gpe_context_unmap_curbe(gpe_context);
1187 gen9_brc_init_reset_add_surfaces_vp9(VADriverContextP ctx,
1188 struct encode_state *encode_state,
1189 struct intel_encoder_context *encoder_context,
1190 struct i965_gpe_context *gpe_context)
1192 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1194 i965_add_buffer_gpe_surface(ctx,
1196 &vme_context->res_brc_history_buffer,
1198 vme_context->res_brc_history_buffer.size,
1200 VP9_BTI_BRC_HISTORY_G9);
1202 i965_add_buffer_2d_gpe_surface(ctx,
1204 &vme_context->s4x_memv_distortion_buffer,
1206 I965_SURFACEFORMAT_R8_UNORM,
1207 VP9_BTI_BRC_DISTORTION_G9);
1210 /* The function related with BRC */
1212 gen9_vp9_brc_init_reset_kernel(VADriverContextP ctx,
1213 struct encode_state *encode_state,
1214 struct intel_encoder_context *encoder_context)
1216 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1217 struct vp9_brc_context *brc_context = &vme_context->brc_context;
1218 struct gpe_media_object_parameter media_object_param;
1219 struct i965_gpe_context *gpe_context;
1220 int gpe_index = VP9_BRC_INIT;
1221 int media_function = VP9_MEDIA_STATE_BRC_INIT_RESET;
1222 struct gen9_vp9_brc_curbe_param brc_initreset_curbe;
1223 VAEncPictureParameterBufferVP9 *pic_param;
1224 struct gen9_vp9_state *vp9_state;
1226 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1228 if (!vp9_state || !vp9_state->pic_param)
1229 return VA_STATUS_ERROR_INVALID_PARAMETER;
1231 pic_param = vp9_state->pic_param;
1233 if (vp9_state->brc_inited)
1234 gpe_index = VP9_BRC_RESET;
1236 gpe_context = &brc_context->gpe_contexts[gpe_index];
1238 gen8_gpe_context_init(ctx, gpe_context);
1239 gen9_gpe_reset_binding_table(ctx, gpe_context);
1241 brc_initreset_curbe.media_state_type = media_function;
1242 brc_initreset_curbe.curr_frame = pic_param->reconstructed_frame;
1243 brc_initreset_curbe.ppic_param = vp9_state->pic_param;
1244 brc_initreset_curbe.pseq_param = vp9_state->seq_param;
1245 brc_initreset_curbe.psegment_param = vp9_state->segment_param;
1246 brc_initreset_curbe.frame_width = vp9_state->frame_width;
1247 brc_initreset_curbe.frame_height = vp9_state->frame_height;
1248 brc_initreset_curbe.pbrc_init_current_target_buf_full_in_bits =
1249 &vp9_state->brc_init_current_target_buf_full_in_bits;
1250 brc_initreset_curbe.pbrc_init_reset_buf_size_in_bits =
1251 &vp9_state->brc_init_reset_buf_size_in_bits;
1252 brc_initreset_curbe.pbrc_init_reset_input_bits_per_frame =
1253 &vp9_state->brc_init_reset_input_bits_per_frame;
1254 brc_initreset_curbe.picture_coding_type = vp9_state->picture_coding_type;
1255 brc_initreset_curbe.initbrc = !vp9_state->brc_inited;
1256 brc_initreset_curbe.mbbrc_enabled = 0;
1257 brc_initreset_curbe.ref_frame_flag = vp9_state->ref_frame_flag;
1259 vme_context->pfn_set_curbe_brc(ctx, encode_state,
1262 &brc_initreset_curbe);
1264 gen9_brc_init_reset_add_surfaces_vp9(ctx, encode_state, encoder_context, gpe_context);
1265 gen8_gpe_setup_interface_data(ctx, gpe_context);
1267 memset(&media_object_param, 0, sizeof(media_object_param));
1268 gen9_run_kernel_media_object(ctx, encoder_context, gpe_context, media_function, &media_object_param);
1270 return VA_STATUS_SUCCESS;
1274 gen9_brc_intra_dist_add_surfaces_vp9(VADriverContextP ctx,
1275 struct encode_state *encode_state,
1276 struct intel_encoder_context *encoder_context,
1277 struct i965_gpe_context *gpe_context)
1279 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1281 struct object_surface *obj_surface;
1282 struct gen9_surface_vp9 *vp9_priv_surface;
1284 /* sScaled4xSurface surface */
1285 obj_surface = encode_state->reconstructed_object;
1287 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
1289 obj_surface = vp9_priv_surface->scaled_4x_surface_obj;
1290 i965_add_2d_gpe_surface(ctx, gpe_context,
1293 I965_SURFACEFORMAT_R8_UNORM,
1294 VP9_BTI_BRC_SRCY4X_G9
1297 i965_add_adv_gpe_surface(ctx, gpe_context,
1299 VP9_BTI_BRC_VME_COARSE_INTRA_G9);
1301 i965_add_buffer_2d_gpe_surface(ctx,
1303 &vme_context->s4x_memv_distortion_buffer,
1305 I965_SURFACEFORMAT_R8_UNORM,
1306 VP9_BTI_BRC_DISTORTION_G9);
1311 /* The function related with BRC */
1313 gen9_vp9_brc_intra_dist_kernel(VADriverContextP ctx,
1314 struct encode_state *encode_state,
1315 struct intel_encoder_context *encoder_context)
1317 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1318 struct vp9_brc_context *brc_context = &vme_context->brc_context;
1319 struct i965_gpe_context *gpe_context;
1320 int gpe_index = VP9_BRC_INTRA_DIST;
1321 int media_function = VP9_MEDIA_STATE_ENC_I_FRAME_DIST;
1322 struct gen9_vp9_brc_curbe_param brc_intra_dist_curbe;
1323 VAEncPictureParameterBufferVP9 *pic_param;
1324 struct gen9_vp9_state *vp9_state;
1325 struct gpe_media_object_walker_parameter media_object_walker_param;
1326 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
1328 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1330 if (!vp9_state || !vp9_state->pic_param)
1331 return VA_STATUS_ERROR_INVALID_PARAMETER;
1333 pic_param = vp9_state->pic_param;
1335 gpe_context = &brc_context->gpe_contexts[gpe_index];
1337 gen8_gpe_context_init(ctx, gpe_context);
1338 gen9_gpe_reset_binding_table(ctx, gpe_context);
1340 brc_intra_dist_curbe.media_state_type = media_function;
1341 brc_intra_dist_curbe.curr_frame = pic_param->reconstructed_frame;
1342 brc_intra_dist_curbe.ppic_param = vp9_state->pic_param;
1343 brc_intra_dist_curbe.pseq_param = vp9_state->seq_param;
1344 brc_intra_dist_curbe.psegment_param = vp9_state->segment_param;
1345 brc_intra_dist_curbe.frame_width = vp9_state->frame_width;
1346 brc_intra_dist_curbe.frame_height = vp9_state->frame_height;
1347 brc_intra_dist_curbe.pbrc_init_current_target_buf_full_in_bits =
1348 &vp9_state->brc_init_current_target_buf_full_in_bits;
1349 brc_intra_dist_curbe.pbrc_init_reset_buf_size_in_bits =
1350 &vp9_state->brc_init_reset_buf_size_in_bits;
1351 brc_intra_dist_curbe.pbrc_init_reset_input_bits_per_frame =
1352 &vp9_state->brc_init_reset_input_bits_per_frame;
1353 brc_intra_dist_curbe.picture_coding_type = vp9_state->picture_coding_type;
1354 brc_intra_dist_curbe.initbrc = !vp9_state->brc_inited;
1355 brc_intra_dist_curbe.mbbrc_enabled = 0;
1356 brc_intra_dist_curbe.ref_frame_flag = vp9_state->ref_frame_flag;
1358 vme_context->pfn_set_curbe_brc(ctx, encode_state,
1361 &brc_intra_dist_curbe);
1363 /* zero distortion buffer */
1364 i965_zero_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
1366 gen9_brc_intra_dist_add_surfaces_vp9(ctx, encode_state, encoder_context, gpe_context);
1367 gen8_gpe_setup_interface_data(ctx, gpe_context);
1369 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
1370 kernel_walker_param.resolution_x = vp9_state->downscaled_width_4x_in_mb;
1371 kernel_walker_param.resolution_y = vp9_state->downscaled_height_4x_in_mb;
1372 kernel_walker_param.no_dependency = 1;
1374 gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
1376 gen9_run_kernel_media_object_walker(ctx, encoder_context,
1379 &media_object_walker_param);
1381 return VA_STATUS_SUCCESS;
1385 intel_vp9enc_construct_picstate_batchbuf(VADriverContextP ctx,
1386 struct encode_state *encode_state,
1387 struct intel_encoder_context *encoder_context,
1388 struct i965_gpe_resource *gpe_resource)
1390 struct gen9_vp9_state *vp9_state;
1391 VAEncPictureParameterBufferVP9 *pic_param;
1392 int frame_width_minus1, frame_height_minus1;
1393 int is_lossless = 0;
1394 int is_intra_only = 0;
1395 unsigned int last_frame_type;
1396 unsigned int ref_flags;
1397 unsigned int use_prev_frame_mvs, adapt_flag;
1398 struct gen9_surface_vp9 *vp9_surface = NULL;
1399 struct object_surface *obj_surface = NULL;
1400 uint32_t scale_h = 0;
1401 uint32_t scale_w = 0;
1405 unsigned int *cmd_ptr, cmd_value, tmp;
1407 pdata = i965_map_gpe_resource(gpe_resource);
1408 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1410 if (!vp9_state || !vp9_state->pic_param || !pdata)
1413 pic_param = vp9_state->pic_param;
1414 frame_width_minus1 = ALIGN(pic_param->frame_width_dst, 8) - 1;
1415 frame_height_minus1 = ALIGN(pic_param->frame_height_dst, 8) - 1;
1416 if ((pic_param->luma_ac_qindex == 0) &&
1417 (pic_param->luma_dc_qindex_delta == 0) &&
1418 (pic_param->chroma_ac_qindex_delta == 0) &&
1419 (pic_param->chroma_dc_qindex_delta == 0))
1422 if (pic_param->pic_flags.bits.frame_type)
1423 is_intra_only = pic_param->pic_flags.bits.intra_only;
1425 last_frame_type = vp9_state->vp9_last_frame.frame_type;
1427 use_prev_frame_mvs = 0;
1428 if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) {
1429 last_frame_type = 0;
1432 ref_flags = ((pic_param->ref_flags.bits.ref_arf_sign_bias << 9) |
1433 (pic_param->ref_flags.bits.ref_gf_sign_bias << 8) |
1434 (pic_param->ref_flags.bits.ref_last_sign_bias << 7)
1436 if (!pic_param->pic_flags.bits.error_resilient_mode &&
1437 (pic_param->frame_width_dst == vp9_state->vp9_last_frame.frame_width) &&
1438 (pic_param->frame_height_dst == vp9_state->vp9_last_frame.frame_height) &&
1439 !pic_param->pic_flags.bits.intra_only &&
1440 vp9_state->vp9_last_frame.show_frame &&
1441 ((vp9_state->vp9_last_frame.frame_type == HCP_VP9_INTER_FRAME) &&
1442 !vp9_state->vp9_last_frame.intra_only)
1444 use_prev_frame_mvs = 1;
1447 if (!pic_param->pic_flags.bits.error_resilient_mode &&
1448 !pic_param->pic_flags.bits.frame_parallel_decoding_mode)
1451 for (i = 0; i < 4; i++) {
1452 uint32_t non_first_pass;
1457 cmd_ptr = (unsigned int *)(pdata + i * VP9_PIC_STATE_BUFFER_SIZE);
1459 *cmd_ptr++ = (HCP_VP9_PIC_STATE | (33 - 2));
1460 *cmd_ptr++ = (frame_height_minus1 << 16 |
1461 frame_width_minus1);
1463 *cmd_ptr++ = (0 << 31 | /* disable segment_in */
1464 0 << 30 | /* disable segment_out */
1465 is_lossless << 29 | /* loseless */
1466 (pic_param->pic_flags.bits.segmentation_enabled && pic_param->pic_flags.bits.segmentation_temporal_update) << 28 | /* temporal update */
1467 (pic_param->pic_flags.bits.segmentation_enabled && pic_param->pic_flags.bits.segmentation_update_map) << 27 | /* temporal update */
1468 (pic_param->pic_flags.bits.segmentation_enabled << 26) |
1469 (pic_param->sharpness_level << 23) |
1470 (pic_param->filter_level << 17) |
1471 (pic_param->pic_flags.bits.frame_parallel_decoding_mode << 16) |
1472 (pic_param->pic_flags.bits.error_resilient_mode << 15) |
1473 (pic_param->pic_flags.bits.refresh_frame_context << 14) |
1474 (last_frame_type << 13) |
1475 (vp9_state->tx_mode == TX_MODE_SELECT) << 12 |
1476 (pic_param->pic_flags.bits.comp_prediction_mode == REFERENCE_MODE_SELECT) << 11 |
1477 (use_prev_frame_mvs) << 10 |
1479 (pic_param->pic_flags.bits.mcomp_filter_type << 4) |
1480 (pic_param->pic_flags.bits.allow_high_precision_mv << 3) |
1481 (is_intra_only << 2) |
1483 (pic_param->pic_flags.bits.frame_type) << 0);
1485 *cmd_ptr++ = ((0 << 28) | /* VP9Profile0 */
1486 (0 << 24) | /* 8-bit depth */
1487 (0 << 22) | /* only 420 format */
1488 (0 << 0) | /* sse statistics */
1489 (pic_param->log2_tile_rows << 8) |
1490 (pic_param->log2_tile_columns << 0));
1493 if (pic_param->pic_flags.bits.frame_type &&
1494 !pic_param->pic_flags.bits.intra_only) {
1495 for (j = 0; j < 3; j++) {
1496 obj_surface = encode_state->reference_objects[j];
1499 if (obj_surface && obj_surface->private_data) {
1500 vp9_surface = obj_surface->private_data;
1501 scale_w = (vp9_surface->frame_width << 14) / pic_param->frame_width_dst;
1502 scale_h = (vp9_surface->frame_height << 14) / pic_param->frame_height_dst;
1503 *cmd_ptr++ = (scale_w << 16 |
1514 for (j = 0; j < 3; j++) {
1515 obj_surface = encode_state->reference_objects[j];
1518 if (obj_surface && obj_surface->private_data) {
1519 vp9_surface = obj_surface->private_data;
1520 *cmd_ptr++ = (vp9_surface->frame_height - 1) << 16 |
1521 (vp9_surface->frame_width - 1);
1528 *cmd_ptr++ = (1 << 1);
1532 *cmd_ptr++ = ((1 << 25) | /* header insertation for VP9 */
1533 (0 << 24) | /* tail insertation */
1534 (pic_param->luma_ac_qindex << 16) |
1535 0 /* compressed header bin count */);
1538 tmp = intel_convert_sign_mag(pic_param->luma_dc_qindex_delta, 5);
1539 cmd_value = (tmp << 16);
1540 tmp = intel_convert_sign_mag(pic_param->chroma_dc_qindex_delta, 5);
1541 cmd_value |= (tmp << 8);
1542 tmp = intel_convert_sign_mag(pic_param->chroma_ac_qindex_delta, 5);
1544 *cmd_ptr++ = cmd_value;
1546 tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[0], 7);
1548 tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[1], 7);
1549 cmd_value |= (tmp << 8);
1550 tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[2], 7);
1551 cmd_value |= (tmp << 16);
1552 tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[3], 7);
1553 cmd_value |= (tmp << 24);
1554 *cmd_ptr++ = cmd_value;
1557 tmp = intel_convert_sign_mag(pic_param->mode_lf_delta[0], 7);
1559 tmp = intel_convert_sign_mag(pic_param->mode_lf_delta[1], 7);
1560 cmd_value |= (tmp << 8);
1561 *cmd_ptr++ = cmd_value;
1564 *cmd_ptr++ = vp9_state->frame_header.bit_offset_ref_lf_delta |
1565 (vp9_state->frame_header.bit_offset_mode_lf_delta << 16);
1566 *cmd_ptr++ = vp9_state->frame_header.bit_offset_qindex |
1567 (vp9_state->frame_header.bit_offset_lf_level << 16);
1570 *cmd_ptr++ = (1 << 26 | (1 << 25) |
1571 non_first_pass << 16);
1573 *cmd_ptr++ = (1 << 31) | (256);
1576 *cmd_ptr++ = (0 << 31) | 1;
1578 /* dw22-dw24. Frame_delta_qindex_range */
1583 /* dw25-26. frame_delta_lf_range */
1587 /* dw27. frame_delta_lf_min */
1596 *cmd_ptr++ = (0 << 30) | 1;
1598 *cmd_ptr++ = vp9_state->frame_header.bit_offset_first_partition_size;
1601 *cmd_ptr++ = MI_BATCH_BUFFER_END;
1604 i965_unmap_gpe_resource(gpe_resource);
1608 gen9_brc_update_add_surfaces_vp9(VADriverContextP ctx,
1609 struct encode_state *encode_state,
1610 struct intel_encoder_context *encoder_context,
1611 struct i965_gpe_context *brc_gpe_context,
1612 struct i965_gpe_context *mbenc_gpe_context)
1614 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1616 /* 0. BRC history buffer */
1617 i965_add_buffer_gpe_surface(ctx,
1619 &vme_context->res_brc_history_buffer,
1621 vme_context->res_brc_history_buffer.size,
1623 VP9_BTI_BRC_HISTORY_G9);
1625 /* 1. Constant data buffer */
1626 i965_add_buffer_gpe_surface(ctx,
1628 &vme_context->res_brc_const_data_buffer,
1630 vme_context->res_brc_const_data_buffer.size,
1632 VP9_BTI_BRC_CONSTANT_DATA_G9);
1634 /* 2. Distortion 2D surface buffer */
1635 i965_add_buffer_2d_gpe_surface(ctx,
1637 &vme_context->s4x_memv_distortion_buffer,
1639 I965_SURFACEFORMAT_R8_UNORM,
1640 VP9_BTI_BRC_DISTORTION_G9);
1643 i965_add_buffer_gpe_surface(ctx,
1645 &vme_context->res_brc_mmdk_pak_buffer,
1647 vme_context->res_brc_mmdk_pak_buffer.size,
1649 VP9_BTI_BRC_MMDK_PAK_OUTPUT_G9);
1650 /* 4. Mbenc curbe input buffer */
1651 gen9_add_dri_buffer_gpe_surface(ctx,
1653 mbenc_gpe_context->curbe.bo,
1655 ALIGN(mbenc_gpe_context->curbe.length, 64),
1656 mbenc_gpe_context->curbe.offset,
1657 VP9_BTI_BRC_MBENC_CURBE_INPUT_G9);
1658 /* 5. Mbenc curbe output buffer */
1659 gen9_add_dri_buffer_gpe_surface(ctx,
1661 mbenc_gpe_context->curbe.bo,
1663 ALIGN(mbenc_gpe_context->curbe.length, 64),
1664 mbenc_gpe_context->curbe.offset,
1665 VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9);
1667 /* 6. BRC_PIC_STATE read buffer */
1668 i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1669 &vme_context->res_pic_state_brc_read_buffer,
1671 vme_context->res_pic_state_brc_read_buffer.size,
1673 VP9_BTI_BRC_PIC_STATE_INPUT_G9);
1675 /* 7. BRC_PIC_STATE write buffer */
1676 i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1677 &vme_context->res_pic_state_brc_write_hfw_read_buffer,
1679 vme_context->res_pic_state_brc_write_hfw_read_buffer.size,
1681 VP9_BTI_BRC_PIC_STATE_OUTPUT_G9);
1683 /* 8. SEGMENT_STATE read buffer */
1684 i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1685 &vme_context->res_seg_state_brc_read_buffer,
1687 vme_context->res_seg_state_brc_read_buffer.size,
1689 VP9_BTI_BRC_SEGMENT_STATE_INPUT_G9);
1691 /* 9. SEGMENT_STATE write buffer */
1692 i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1693 &vme_context->res_seg_state_brc_write_buffer,
1695 vme_context->res_seg_state_brc_write_buffer.size,
1697 VP9_BTI_BRC_SEGMENT_STATE_OUTPUT_G9);
1699 /* 10. Bitstream size buffer */
1700 i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1701 &vme_context->res_brc_bitstream_size_buffer,
1703 vme_context->res_brc_bitstream_size_buffer.size,
1705 VP9_BTI_BRC_BITSTREAM_SIZE_G9);
1707 i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1708 &vme_context->res_brc_hfw_data_buffer,
1710 vme_context->res_brc_hfw_data_buffer.size,
1712 VP9_BTI_BRC_HFW_DATA_G9);
1718 gen9_vp9_brc_update_kernel(VADriverContextP ctx,
1719 struct encode_state *encode_state,
1720 struct intel_encoder_context *encoder_context)
1722 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1723 struct vp9_brc_context *brc_context = &vme_context->brc_context;
1724 struct i965_gpe_context *brc_gpe_context, *mbenc_gpe_context;
1725 int mbenc_index, gpe_index = VP9_BRC_UPDATE;
1726 int media_function = VP9_MEDIA_STATE_BRC_UPDATE;
1728 struct gen9_vp9_brc_curbe_param brc_update_curbe_param;
1729 VAEncPictureParameterBufferVP9 *pic_param;
1730 struct gen9_vp9_state *vp9_state;
1731 struct gen9_vp9_mbenc_curbe_param mbenc_curbe_param;
1732 struct gpe_media_object_parameter media_object_param;
1734 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1735 if (!vp9_state || !vp9_state->pic_param)
1736 return VA_STATUS_ERROR_INVALID_PARAMETER;
1738 pic_param = vp9_state->pic_param;
1739 // Setup VP9 MbEnc Curbe
1740 if (vp9_state->picture_coding_type) {
1741 mbenc_function = VP9_MEDIA_STATE_MBENC_P;
1742 mbenc_index = VP9_MBENC_IDX_INTER;
1744 mbenc_function = VP9_MEDIA_STATE_MBENC_I_32x32;
1745 mbenc_index = VP9_MBENC_IDX_KEY_32x32;
1748 mbenc_gpe_context = &(vme_context->mbenc_context.gpe_contexts[mbenc_index]);
1750 memset(&mbenc_curbe_param, 0, sizeof(mbenc_curbe_param));
1752 mbenc_curbe_param.ppic_param = vp9_state->pic_param;
1753 mbenc_curbe_param.pseq_param = vp9_state->seq_param;
1754 mbenc_curbe_param.psegment_param = vp9_state->segment_param;
1755 //mbenc_curbe_param.ppRefList = &(vp9_state->pRefList[0]);
1756 mbenc_curbe_param.last_ref_obj = vp9_state->last_ref_obj;
1757 mbenc_curbe_param.golden_ref_obj = vp9_state->golden_ref_obj;
1758 mbenc_curbe_param.alt_ref_obj = vp9_state->alt_ref_obj;
1759 mbenc_curbe_param.frame_width_in_mb = ALIGN(vp9_state->frame_width, 16) / 16;
1760 mbenc_curbe_param.frame_height_in_mb = ALIGN(vp9_state->frame_height, 16) / 16;
1761 mbenc_curbe_param.hme_enabled = vp9_state->hme_enabled;
1762 mbenc_curbe_param.ref_frame_flag = vp9_state->ref_frame_flag;
1763 mbenc_curbe_param.multi_ref_qp_check = vp9_state->multi_ref_qp_check;
1764 mbenc_curbe_param.picture_coding_type = vp9_state->picture_coding_type;
1765 mbenc_curbe_param.media_state_type = mbenc_function;
1767 vme_context->pfn_set_curbe_mbenc(ctx, encode_state,
1770 &mbenc_curbe_param);
1772 vp9_state->mbenc_curbe_set_in_brc_update = true;
1774 brc_gpe_context = &brc_context->gpe_contexts[gpe_index];
1776 gen8_gpe_context_init(ctx, brc_gpe_context);
1777 gen9_gpe_reset_binding_table(ctx, brc_gpe_context);
1779 memset(&brc_update_curbe_param, 0, sizeof(brc_update_curbe_param));
1781 // Setup BRC Update Curbe
1782 brc_update_curbe_param.media_state_type = media_function;
1783 brc_update_curbe_param.curr_frame = pic_param->reconstructed_frame;
1784 brc_update_curbe_param.ppic_param = vp9_state->pic_param;
1785 brc_update_curbe_param.pseq_param = vp9_state->seq_param;
1786 brc_update_curbe_param.psegment_param = vp9_state->segment_param;
1787 brc_update_curbe_param.picture_coding_type = vp9_state->picture_coding_type;
1788 brc_update_curbe_param.frame_width_in_mb = ALIGN(vp9_state->frame_width, 16) / 16;
1789 brc_update_curbe_param.frame_height_in_mb = ALIGN(vp9_state->frame_height, 16) / 16;
1790 brc_update_curbe_param.hme_enabled = vp9_state->hme_enabled;
1791 brc_update_curbe_param.b_used_ref = 1;
1792 brc_update_curbe_param.frame_number = vp9_state->frame_number;
1793 brc_update_curbe_param.ref_frame_flag = vp9_state->ref_frame_flag;
1794 brc_update_curbe_param.mbbrc_enabled = 0;
1795 brc_update_curbe_param.multi_ref_qp_check = vp9_state->multi_ref_qp_check;
1796 brc_update_curbe_param.brc_num_pak_passes = vp9_state->num_pak_passes;
1798 brc_update_curbe_param.pbrc_init_current_target_buf_full_in_bits =
1799 &vp9_state->brc_init_current_target_buf_full_in_bits;
1800 brc_update_curbe_param.pbrc_init_reset_buf_size_in_bits =
1801 &vp9_state->brc_init_reset_buf_size_in_bits;
1802 brc_update_curbe_param.pbrc_init_reset_input_bits_per_frame =
1803 &vp9_state->brc_init_reset_input_bits_per_frame;
1805 vme_context->pfn_set_curbe_brc(ctx, encode_state,
1808 &brc_update_curbe_param);
1811 // Check if the constant data surface is present
1812 if (vp9_state->brc_constant_buffer_supported) {
1813 char *brc_const_buffer;
1814 brc_const_buffer = i965_map_gpe_resource(&vme_context->res_brc_const_data_buffer);
1816 if (!brc_const_buffer)
1817 return VA_STATUS_ERROR_OPERATION_FAILED;
1819 if (vp9_state->picture_coding_type)
1820 memcpy(brc_const_buffer, vp9_brc_const_data_p_g9,
1821 sizeof(vp9_brc_const_data_p_g9));
1823 memcpy(brc_const_buffer, vp9_brc_const_data_i_g9,
1824 sizeof(vp9_brc_const_data_i_g9));
1826 i965_unmap_gpe_resource(&vme_context->res_brc_const_data_buffer);
1829 if (pic_param->pic_flags.bits.segmentation_enabled) {
1830 //reallocate the vme_state->mb_segment_map_surface
1831 /* this will be added later */
1835 pic_param->filter_level = 0;
1836 // clear the filter level value in picParams ebfore programming pic state, as this value will be determined and updated by BRC.
1837 intel_vp9enc_construct_picstate_batchbuf(ctx, encode_state,
1838 encoder_context, &vme_context->res_pic_state_brc_read_buffer);
1841 gen9_brc_update_add_surfaces_vp9(ctx, encode_state,
1846 gen8_gpe_setup_interface_data(ctx, brc_gpe_context);
1847 memset(&media_object_param, 0, sizeof(media_object_param));
1848 gen9_run_kernel_media_object(ctx, encoder_context,
1851 &media_object_param);
1852 return VA_STATUS_SUCCESS;
1856 void gen9_vp9_set_curbe_me(VADriverContextP ctx,
1857 struct encode_state *encode_state,
1858 struct i965_gpe_context *gpe_context,
1859 struct intel_encoder_context *encoder_context,
1860 struct gen9_vp9_me_curbe_param *param)
1862 vp9_me_curbe_data *me_cmd;
1863 int enc_media_state;
1865 unsigned int width, height;
1866 uint32_t l0_ref_frames;
1867 uint32_t scale_factor;
1869 if (param->b16xme_enabled) {
1870 if (param->use_16x_me)
1871 me_mode = VP9_ENC_ME16X_BEFORE_ME4X;
1873 me_mode = VP9_ENC_ME4X_AFTER_ME16X;
1875 me_mode = VP9_ENC_ME4X_ONLY;
1878 if (me_mode == VP9_ENC_ME16X_BEFORE_ME4X)
1883 if (param->use_16x_me)
1884 enc_media_state = VP9_MEDIA_STATE_16X_ME;
1886 enc_media_state = VP9_MEDIA_STATE_4X_ME;
1888 me_cmd = i965_gpe_context_map_curbe(gpe_context);
1893 memset(me_cmd, 0, sizeof(vp9_me_curbe_data));
1895 me_cmd->dw1.max_num_mvs = 0x10;
1896 me_cmd->dw1.bi_weight = 0x00;
1898 me_cmd->dw2.max_num_su = 0x39;
1899 me_cmd->dw2.max_len_sp = 0x39;
1901 me_cmd->dw3.sub_mb_part_mask = 0x77;
1902 me_cmd->dw3.inter_sad = 0x00;
1903 me_cmd->dw3.intra_sad = 0x00;
1904 me_cmd->dw3.bme_disable_fbr = 0x01;
1905 me_cmd->dw3.sub_pel_mode = 0x03;
1907 width = param->frame_width / scale_factor;
1908 height = param->frame_height / scale_factor;
1910 me_cmd->dw4.picture_width = ALIGN(width, 16) / 16;
1911 me_cmd->dw4.picture_height_minus1 = ALIGN(height, 16) / 16 - 1;
1913 me_cmd->dw5.ref_width = 0x30;
1914 me_cmd->dw5.ref_height = 0x28;
1916 if (enc_media_state == VP9_MEDIA_STATE_4X_ME)
1917 me_cmd->dw6.write_distortions = 0x01;
1919 me_cmd->dw6.use_mv_from_prev_step = me_mode == VP9_ENC_ME4X_AFTER_ME16X ? 1 : 0;
1920 me_cmd->dw6.super_combine_dist = 0x5;
1921 me_cmd->dw6.max_vmvr = 0x7fc;
1923 l0_ref_frames = (param->ref_frame_flag & 0x01) +
1924 !!(param->ref_frame_flag & 0x02) +
1925 !!(param->ref_frame_flag & 0x04);
1926 me_cmd->dw13.num_ref_idx_l0_minus1 = (l0_ref_frames > 0) ? l0_ref_frames - 1 : 0;
1927 me_cmd->dw13.num_ref_idx_l1_minus1 = 0;
1929 me_cmd->dw14.l0_ref_pic_polarity_bits = 0;
1930 me_cmd->dw14.l1_ref_pic_polarity_bits = 0;
1932 me_cmd->dw15.mv_shift_factor = 0x02;
1935 memcpy((void *)((char *)me_cmd + 64),
1936 vp9_diamond_ime_search_path_delta,
1937 sizeof(vp9_diamond_ime_search_path_delta));
1941 me_cmd->dw32._4x_memv_output_data_surf_index = VP9_BTI_ME_MV_DATA_SURFACE;
1942 me_cmd->dw33._16x_32x_memv_input_data_surf_index = VP9_BTI_16XME_MV_DATA_SURFACE;
1943 me_cmd->dw34._4x_me_output_dist_surf_index = VP9_BTI_ME_DISTORTION_SURFACE;
1944 me_cmd->dw35._4x_me_output_brc_dist_surf_index = VP9_BTI_ME_BRC_DISTORTION_SURFACE;
1945 me_cmd->dw36.vme_fwd_inter_pred_surf_index = VP9_BTI_ME_CURR_PIC_L0;
1946 me_cmd->dw37.vme_bdw_inter_pred_surf_index = VP9_BTI_ME_CURR_PIC_L1;
1948 i965_gpe_context_unmap_curbe(gpe_context);
1952 gen9_vp9_send_me_surface(VADriverContextP ctx,
1953 struct encode_state *encode_state,
1954 struct i965_gpe_context *gpe_context,
1955 struct intel_encoder_context *encoder_context,
1956 struct gen9_vp9_me_surface_param *param)
1958 struct i965_driver_data *i965 = i965_driver_data(ctx);
1959 struct object_surface *obj_surface;
1960 struct gen9_surface_vp9 *vp9_priv_surface;
1961 struct object_surface *input_surface;
1962 struct i965_gpe_resource *gpe_resource;
1965 obj_surface = SURFACE(param->curr_pic);
1967 if (!obj_surface || !obj_surface->private_data)
1970 vp9_priv_surface = obj_surface->private_data;
1971 if (param->use_16x_me) {
1972 gpe_resource = param->pres_16x_memv_data_buffer;
1974 gpe_resource = param->pres_4x_memv_data_buffer;
1977 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1980 I965_SURFACEFORMAT_R8_UNORM,
1981 VP9_BTI_ME_MV_DATA_SURFACE);
1983 if (param->b16xme_enabled) {
1984 gpe_resource = param->pres_16x_memv_data_buffer;
1985 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1988 I965_SURFACEFORMAT_R8_UNORM,
1989 VP9_BTI_16XME_MV_DATA_SURFACE);
1992 if (!param->use_16x_me) {
1993 gpe_resource = param->pres_me_brc_distortion_buffer;
1995 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1998 I965_SURFACEFORMAT_R8_UNORM,
1999 VP9_BTI_ME_BRC_DISTORTION_SURFACE);
2001 gpe_resource = param->pres_me_distortion_buffer;
2003 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
2006 I965_SURFACEFORMAT_R8_UNORM,
2007 VP9_BTI_ME_DISTORTION_SURFACE);
2010 if (param->use_16x_me)
2011 input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2013 input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2015 i965_add_adv_gpe_surface(ctx, gpe_context,
2017 VP9_BTI_ME_CURR_PIC_L0);
2019 ref_bti = VP9_BTI_ME_CURR_PIC_L0 + 1;
2022 if (param->last_ref_pic) {
2023 obj_surface = param->last_ref_pic;
2024 vp9_priv_surface = obj_surface->private_data;
2026 if (param->use_16x_me)
2027 input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2029 input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2031 if (param->dys_enabled &&
2032 ((vp9_priv_surface->frame_width != param->frame_width) ||
2033 (vp9_priv_surface->frame_height != param->frame_height))) {
2034 if (param->use_16x_me)
2035 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2037 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2039 i965_add_adv_gpe_surface(ctx, gpe_context,
2042 i965_add_adv_gpe_surface(ctx, gpe_context,
2048 if (param->golden_ref_pic) {
2049 obj_surface = param->golden_ref_pic;
2050 vp9_priv_surface = obj_surface->private_data;
2052 if (param->use_16x_me)
2053 input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2055 input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2057 if (param->dys_enabled &&
2058 ((vp9_priv_surface->frame_width != param->frame_width) ||
2059 (vp9_priv_surface->frame_height != param->frame_height))) {
2060 if (param->use_16x_me)
2061 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2063 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2066 i965_add_adv_gpe_surface(ctx, gpe_context,
2069 i965_add_adv_gpe_surface(ctx, gpe_context,
2075 if (param->alt_ref_pic) {
2076 obj_surface = param->alt_ref_pic;
2077 vp9_priv_surface = obj_surface->private_data;
2079 if (param->use_16x_me)
2080 input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2082 input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2084 if (param->dys_enabled &&
2085 ((vp9_priv_surface->frame_width != param->frame_width) ||
2086 (vp9_priv_surface->frame_height != param->frame_height))) {
2087 if (param->use_16x_me)
2088 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2090 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2092 i965_add_adv_gpe_surface(ctx, gpe_context,
2095 i965_add_adv_gpe_surface(ctx, gpe_context,
2105 void gen9_me_add_surfaces_vp9(VADriverContextP ctx,
2106 struct encode_state *encode_state,
2107 struct intel_encoder_context *encoder_context,
2108 struct i965_gpe_context *gpe_context,
2111 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2112 struct gen9_vp9_me_surface_param me_surface_param;
2113 struct gen9_vp9_state *vp9_state;
2115 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
2117 /* sScaled4xSurface surface */
2118 memset(&me_surface_param, 0, sizeof(me_surface_param));
2119 me_surface_param.last_ref_pic = vp9_state->last_ref_obj;
2120 me_surface_param.golden_ref_pic = vp9_state->golden_ref_obj;
2121 me_surface_param.alt_ref_pic = vp9_state->alt_ref_obj;
2122 me_surface_param.curr_pic = vp9_state->curr_frame;
2123 me_surface_param.pres_4x_memv_data_buffer = &vme_context->s4x_memv_data_buffer;
2124 me_surface_param.pres_16x_memv_data_buffer = &vme_context->s16x_memv_data_buffer;
2125 me_surface_param.pres_me_distortion_buffer = &vme_context->s4x_memv_distortion_buffer;
2126 me_surface_param.pres_me_brc_distortion_buffer = &vme_context->s4x_memv_distortion_buffer;
2129 me_surface_param.downscaled_width_in_mb = vp9_state->downscaled_width_16x_in_mb;
2130 me_surface_param.downscaled_height_in_mb = vp9_state->downscaled_height_16x_in_mb;
2132 me_surface_param.downscaled_width_in_mb = vp9_state->downscaled_width_4x_in_mb;
2133 me_surface_param.downscaled_height_in_mb = vp9_state->downscaled_height_4x_in_mb;
2135 me_surface_param.frame_width = vp9_state->frame_width;
2136 me_surface_param.frame_height = vp9_state->frame_height;
2138 me_surface_param.use_16x_me = use_16x_me;
2139 me_surface_param.b16xme_enabled = vp9_state->b16xme_enabled;
2140 me_surface_param.dys_enabled = vp9_state->dys_in_use;
2142 vme_context->pfn_send_me_surface(ctx, encode_state,
2150 gen9_vp9_me_kernel(VADriverContextP ctx,
2151 struct encode_state *encode_state,
2152 struct intel_encoder_context *encoder_context,
2155 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2156 struct i965_gpe_context *gpe_context;
2158 struct gen9_vp9_me_curbe_param me_curbe_param;
2159 struct gen9_vp9_state *vp9_state;
2160 struct gpe_media_object_walker_parameter media_object_walker_param;
2161 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
2163 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2164 if (!vp9_state || !vp9_state->pic_param)
2165 return VA_STATUS_ERROR_INVALID_PARAMETER;
2168 media_function = VP9_MEDIA_STATE_16X_ME;
2170 media_function = VP9_MEDIA_STATE_4X_ME;
2172 gpe_context = &(vme_context->me_context.gpe_context);
2174 gen8_gpe_context_init(ctx, gpe_context);
2175 gen9_gpe_reset_binding_table(ctx, gpe_context);
2177 memset(&me_curbe_param, 0, sizeof(me_curbe_param));
2178 me_curbe_param.ppic_param = vp9_state->pic_param;
2179 me_curbe_param.pseq_param = vp9_state->seq_param;
2180 me_curbe_param.frame_width = vp9_state->frame_width;
2181 me_curbe_param.frame_height = vp9_state->frame_height;
2182 me_curbe_param.ref_frame_flag = vp9_state->ref_frame_flag;
2183 me_curbe_param.use_16x_me = use_16x_me;
2184 me_curbe_param.b16xme_enabled = vp9_state->b16xme_enabled;
2185 vme_context->pfn_set_curbe_me(ctx, encode_state,
2190 gen9_me_add_surfaces_vp9(ctx, encode_state,
2195 gen8_gpe_setup_interface_data(ctx, gpe_context);
2197 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2199 kernel_walker_param.resolution_x = vp9_state->downscaled_width_16x_in_mb;
2200 kernel_walker_param.resolution_y = vp9_state->downscaled_height_16x_in_mb;
2202 kernel_walker_param.resolution_x = vp9_state->downscaled_width_4x_in_mb;
2203 kernel_walker_param.resolution_y = vp9_state->downscaled_height_4x_in_mb;
2205 kernel_walker_param.no_dependency = 1;
2207 gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2209 gen9_run_kernel_media_object_walker(ctx, encoder_context,
2212 &media_object_walker_param);
2214 return VA_STATUS_SUCCESS;
2218 gen9_vp9_set_curbe_scaling_cm(VADriverContextP ctx,
2219 struct encode_state *encode_state,
2220 struct i965_gpe_context *gpe_context,
2221 struct intel_encoder_context *encoder_context,
2222 struct gen9_vp9_scaling_curbe_param *curbe_param)
2224 vp9_scaling4x_curbe_data_cm *curbe_cmd;
2226 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2231 memset(curbe_cmd, 0, sizeof(vp9_scaling4x_curbe_data_cm));
2233 curbe_cmd->dw0.input_picture_width = curbe_param->input_picture_width;
2234 curbe_cmd->dw0.input_picture_height = curbe_param->input_picture_height;
2236 curbe_cmd->dw1.input_y_bti = VP9_BTI_SCALING_FRAME_SRC_Y;
2237 curbe_cmd->dw2.output_y_bti = VP9_BTI_SCALING_FRAME_DST_Y;
2240 curbe_cmd->dw6.enable_mb_variance_output = 0;
2241 curbe_cmd->dw6.enable_mb_pixel_average_output = 0;
2242 curbe_cmd->dw6.enable_blk8x8_stat_output = 0;
2244 if (curbe_param->mb_variance_output_enabled ||
2245 curbe_param->mb_pixel_average_output_enabled) {
2246 curbe_cmd->dw10.mbv_proc_stat_bti = VP9_BTI_SCALING_FRAME_MBVPROCSTATS_DST_CM;
2249 i965_gpe_context_unmap_curbe(gpe_context);
2254 gen9_vp9_send_scaling_surface(VADriverContextP ctx,
2255 struct encode_state *encode_state,
2256 struct i965_gpe_context *gpe_context,
2257 struct intel_encoder_context *encoder_context,
2258 struct gen9_vp9_scaling_surface_param *scaling_surface_param)
2260 vp9_bti_scaling_offset *scaling_bti;
2261 unsigned int surface_format;
2263 scaling_bti = scaling_surface_param->p_scaling_bti;
2265 if (scaling_surface_param->scaling_out_use_32unorm_surf_fmt)
2266 surface_format = I965_SURFACEFORMAT_R32_UNORM;
2267 else if (scaling_surface_param->scaling_out_use_16unorm_surf_fmt)
2268 surface_format = I965_SURFACEFORMAT_R16_UNORM;
2270 surface_format = I965_SURFACEFORMAT_R8_UNORM;
2272 i965_add_2d_gpe_surface(ctx, gpe_context,
2273 scaling_surface_param->input_surface,
2274 0, 1, surface_format,
2275 scaling_bti->scaling_frame_src_y);
2277 i965_add_2d_gpe_surface(ctx, gpe_context,
2278 scaling_surface_param->output_surface,
2279 0, 1, surface_format,
2280 scaling_bti->scaling_frame_dst_y);
2287 gen9_vp9_scaling_kernel(VADriverContextP ctx,
2288 struct encode_state *encode_state,
2289 struct intel_encoder_context *encoder_context,
2290 int use_16x_scaling)
2292 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2293 struct i965_gpe_context *gpe_context;
2295 struct gen9_vp9_scaling_curbe_param scaling_curbe_param;
2296 struct gen9_vp9_scaling_surface_param scaling_surface_param;
2297 struct gen9_vp9_state *vp9_state;
2298 VAEncPictureParameterBufferVP9 *pic_param;
2299 struct gpe_media_object_walker_parameter media_object_walker_param;
2300 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
2301 struct object_surface *obj_surface;
2302 struct object_surface *input_surface, *output_surface;
2303 struct gen9_surface_vp9 *vp9_priv_surface;
2304 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
2305 unsigned int input_frame_width, input_frame_height;
2306 unsigned int output_frame_width, output_frame_height;
2308 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2309 if (!vp9_state || !vp9_state->pic_param)
2310 return VA_STATUS_ERROR_INVALID_PARAMETER;
2312 pic_param = vp9_state->pic_param;
2314 if (use_16x_scaling)
2315 media_function = VP9_MEDIA_STATE_16X_SCALING;
2317 media_function = VP9_MEDIA_STATE_4X_SCALING;
2319 gpe_context = &(vme_context->scaling_context.gpe_contexts[0]);
2321 gen8_gpe_context_init(ctx, gpe_context);
2322 gen9_gpe_reset_binding_table(ctx, gpe_context);
2324 obj_surface = encode_state->reconstructed_object;
2325 vp9_priv_surface = obj_surface->private_data;
2327 if (use_16x_scaling) {
2328 downscaled_width_in_mb = vp9_state->downscaled_width_16x_in_mb;
2329 downscaled_height_in_mb = vp9_state->downscaled_height_16x_in_mb;
2331 input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2332 input_frame_width = vp9_state->frame_width_4x;
2333 input_frame_height = vp9_state->frame_height_4x;
2335 output_surface = vp9_priv_surface->scaled_16x_surface_obj;
2336 output_frame_width = vp9_state->frame_width_16x;
2337 output_frame_height = vp9_state->frame_height_16x;
2339 downscaled_width_in_mb = vp9_state->downscaled_width_4x_in_mb;
2340 downscaled_height_in_mb = vp9_state->downscaled_height_4x_in_mb;
2342 if (vp9_state->dys_in_use &&
2343 ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
2344 (pic_param->frame_height_src != pic_param->frame_height_dst)))
2345 input_surface = vp9_priv_surface->dys_surface_obj;
2347 input_surface = encode_state->input_yuv_object;
2349 input_frame_width = vp9_state->frame_width;
2350 input_frame_height = vp9_state->frame_height;
2352 output_surface = vp9_priv_surface->scaled_4x_surface_obj;
2353 output_frame_width = vp9_state->frame_width_4x;
2354 output_frame_height = vp9_state->frame_height_4x;
2357 memset(&scaling_curbe_param, 0, sizeof(scaling_curbe_param));
2359 scaling_curbe_param.input_picture_width = input_frame_width;
2360 scaling_curbe_param.input_picture_height = input_frame_height;
2362 scaling_curbe_param.use_16x_scaling = use_16x_scaling;
2363 scaling_curbe_param.use_32x_scaling = 0;
2365 if (use_16x_scaling)
2366 scaling_curbe_param.mb_variance_output_enabled = 0;
2368 scaling_curbe_param.mb_variance_output_enabled = vp9_state->adaptive_transform_decision_enabled;
2370 scaling_curbe_param.blk8x8_stat_enabled = 0;
2372 vme_context->pfn_set_curbe_scaling(ctx, encode_state,
2375 &scaling_curbe_param);
2377 memset(&scaling_surface_param, 0, sizeof(scaling_surface_param));
2378 scaling_surface_param.p_scaling_bti = (void *)(&vme_context->scaling_context.scaling_4x_bti);
2379 scaling_surface_param.input_surface = input_surface;
2380 scaling_surface_param.input_frame_width = input_frame_width;
2381 scaling_surface_param.input_frame_height = input_frame_height;
2383 scaling_surface_param.output_surface = output_surface;
2384 scaling_surface_param.output_frame_width = output_frame_width;
2385 scaling_surface_param.output_frame_height = output_frame_height;
2386 scaling_surface_param.scaling_out_use_16unorm_surf_fmt = 0;
2387 scaling_surface_param.scaling_out_use_32unorm_surf_fmt = 1;
2389 vme_context->pfn_send_scaling_surface(ctx, encode_state,
2392 &scaling_surface_param);
2394 gen8_gpe_setup_interface_data(ctx, gpe_context);
2396 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2397 /* the scaling is based on 8x8 blk level */
2398 kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
2399 kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
2400 kernel_walker_param.no_dependency = 1;
2402 gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2404 gen9_run_kernel_media_object_walker(ctx, encoder_context,
2407 &media_object_walker_param);
2409 return VA_STATUS_SUCCESS;
2413 gen9_vp9_dys_set_sampler_state(struct i965_gpe_context *gpe_context)
2415 struct gen9_sampler_8x8_avs *sampler_cmd;
2420 dri_bo_map(gpe_context->sampler.bo, 1);
2422 if (!gpe_context->sampler.bo->virtual)
2425 sampler_cmd = (struct gen9_sampler_8x8_avs *)
2426 (gpe_context->sampler.bo->virtual + gpe_context->sampler.offset);
2428 memset(sampler_cmd, 0, sizeof(struct gen9_sampler_8x8_avs));
2430 sampler_cmd->dw0.r3c_coefficient = 15;
2431 sampler_cmd->dw0.r3x_coefficient = 6;
2432 sampler_cmd->dw0.strong_edge_threshold = 8;
2433 sampler_cmd->dw0.weak_edge_threshold = 1;
2434 sampler_cmd->dw0.gain_factor = 32;
2436 sampler_cmd->dw2.r5c_coefficient = 3;
2437 sampler_cmd->dw2.r5cx_coefficient = 8;
2438 sampler_cmd->dw2.r5x_coefficient = 9;
2439 sampler_cmd->dw2.strong_edge_weight = 6;
2440 sampler_cmd->dw2.regular_weight = 3;
2441 sampler_cmd->dw2.non_edge_weight = 2;
2442 sampler_cmd->dw2.global_noise_estimation = 255;
2444 sampler_cmd->dw3.enable_8tap_adaptive_filter = 0;
2445 sampler_cmd->dw3.cos_alpha = 79;
2446 sampler_cmd->dw3.sin_alpha = 101;
2448 sampler_cmd->dw5.diamond_du = 0;
2449 sampler_cmd->dw5.hs_margin = 3;
2450 sampler_cmd->dw5.diamond_alpha = 100;
2452 sampler_cmd->dw7.inv_margin_vyl = 3300;
2454 sampler_cmd->dw8.inv_margin_vyu = 1600;
2456 sampler_cmd->dw10.y_slope2 = 24;
2457 sampler_cmd->dw10.s0l = 1792;
2459 sampler_cmd->dw12.y_slope1 = 24;
2461 sampler_cmd->dw14.s0u = 256;
2463 sampler_cmd->dw15.s2u = 1792;
2464 sampler_cmd->dw15.s1u = 0;
2466 memcpy(sampler_cmd->coefficients,
2467 &gen9_vp9_avs_coeffs[0],
2468 17 * sizeof(struct gen8_sampler_8x8_avs_coefficients));
2470 sampler_cmd->dw152.default_sharpness_level = 255;
2471 sampler_cmd->dw152.max_derivative_4_pixels = 7;
2472 sampler_cmd->dw152.max_derivative_8_pixels = 20;
2473 sampler_cmd->dw152.transition_area_with_4_pixels = 4;
2474 sampler_cmd->dw152.transition_area_with_8_pixels = 5;
2476 sampler_cmd->dw153.bypass_x_adaptive_filtering = 1;
2477 sampler_cmd->dw153.bypass_y_adaptive_filtering = 1;
2478 sampler_cmd->dw153.adaptive_filter_for_all_channel = 0;
2480 memcpy(sampler_cmd->extra_coefficients,
2481 &gen9_vp9_avs_coeffs[17 * 8],
2482 15 * sizeof(struct gen8_sampler_8x8_avs_coefficients));
2484 dri_bo_unmap(gpe_context->sampler.bo);
2488 gen9_vp9_set_curbe_dys(VADriverContextP ctx,
2489 struct encode_state *encode_state,
2490 struct i965_gpe_context *gpe_context,
2491 struct intel_encoder_context *encoder_context,
2492 struct gen9_vp9_dys_curbe_param *curbe_param)
2494 vp9_dys_curbe_data *curbe_cmd;
2496 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2501 memset(curbe_cmd, 0, sizeof(vp9_dys_curbe_data));
2503 curbe_cmd->dw0.input_frame_width = curbe_param->input_width;
2504 curbe_cmd->dw0.input_frame_height = curbe_param->input_height;
2506 curbe_cmd->dw1.output_frame_width = curbe_param->output_width;
2507 curbe_cmd->dw1.output_frame_height = curbe_param->output_height;
2509 curbe_cmd->dw2.delta_u = 1.0f / curbe_param->output_width;
2510 curbe_cmd->dw3.delta_v = 1.0f / curbe_param->output_height;
2512 curbe_cmd->dw16.input_frame_nv12_bti = VP9_BTI_DYS_INPUT_NV12;
2513 curbe_cmd->dw17.output_frame_y_bti = VP9_BTI_DYS_OUTPUT_Y;
2514 curbe_cmd->dw18.avs_sample_idx = 0;
2516 i965_gpe_context_unmap_curbe(gpe_context);
2520 gen9_vp9_send_dys_surface(VADriverContextP ctx,
2521 struct encode_state *encode_state,
2522 struct i965_gpe_context *gpe_context,
2523 struct intel_encoder_context *encoder_context,
2524 struct gen9_vp9_dys_surface_param *surface_param)
2527 if (surface_param->input_frame)
2528 i965_add_adv_gpe_surface(ctx,
2530 surface_param->input_frame,
2531 VP9_BTI_DYS_INPUT_NV12);
2533 if (surface_param->output_frame) {
2534 i965_add_2d_gpe_surface(ctx,
2536 surface_param->output_frame,
2539 I965_SURFACEFORMAT_R8_UNORM,
2540 VP9_BTI_DYS_OUTPUT_Y);
2542 i965_add_2d_gpe_surface(ctx,
2544 surface_param->output_frame,
2547 I965_SURFACEFORMAT_R16_UINT,
2548 VP9_BTI_DYS_OUTPUT_UV);
2555 gen9_vp9_dys_kernel(VADriverContextP ctx,
2556 struct encode_state *encode_state,
2557 struct intel_encoder_context *encoder_context,
2558 gen9_vp9_dys_kernel_param *dys_kernel_param)
2560 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2561 struct i965_gpe_context *gpe_context;
2563 struct gen9_vp9_dys_curbe_param curbe_param;
2564 struct gen9_vp9_dys_surface_param surface_param;
2565 struct gpe_media_object_walker_parameter media_object_walker_param;
2566 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
2567 unsigned int resolution_x, resolution_y;
2569 media_function = VP9_MEDIA_STATE_DYS;
2570 gpe_context = &vme_context->dys_context.gpe_context;
2572 //gen8_gpe_context_init(ctx, gpe_context);
2573 gen9_gpe_reset_binding_table(ctx, gpe_context);
2575 /* sampler state is configured only when initializing the GPE context */
2577 memset(&curbe_param, 0, sizeof(curbe_param));
2578 curbe_param.input_width = dys_kernel_param->input_width;
2579 curbe_param.input_height = dys_kernel_param->input_height;
2580 curbe_param.output_width = dys_kernel_param->output_width;
2581 curbe_param.output_height = dys_kernel_param->output_height;
2582 vme_context->pfn_set_curbe_dys(ctx, encode_state,
2587 // Add surface states
2588 memset(&surface_param, 0, sizeof(surface_param));
2589 surface_param.input_frame = dys_kernel_param->input_surface;
2590 surface_param.output_frame = dys_kernel_param->output_surface;
2591 surface_param.vert_line_stride = 0;
2592 surface_param.vert_line_stride_offset = 0;
2594 vme_context->pfn_send_dys_surface(ctx,
2600 resolution_x = ALIGN(dys_kernel_param->output_width, 16) / 16;
2601 resolution_y = ALIGN(dys_kernel_param->output_height, 16) / 16;
2603 gen8_gpe_setup_interface_data(ctx, gpe_context);
2605 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2606 kernel_walker_param.resolution_x = resolution_x;
2607 kernel_walker_param.resolution_y = resolution_y;
2608 kernel_walker_param.no_dependency = 1;
2610 gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2612 gen9_run_kernel_media_object_walker(ctx, encoder_context,
2615 &media_object_walker_param);
2617 return VA_STATUS_SUCCESS;
2621 gen9_vp9_run_dys_refframes(VADriverContextP ctx,
2622 struct encode_state *encode_state,
2623 struct intel_encoder_context *encoder_context)
2625 struct gen9_vp9_state *vp9_state;
2626 VAEncPictureParameterBufferVP9 *pic_param;
2627 gen9_vp9_dys_kernel_param dys_kernel_param;
2628 struct object_surface *obj_surface;
2629 struct object_surface *input_surface, *output_surface;
2630 struct gen9_surface_vp9 *vp9_priv_surface;
2632 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2634 if (!vp9_state || !vp9_state->pic_param)
2635 return VA_STATUS_ERROR_INVALID_PARAMETER;
2637 pic_param = vp9_state->pic_param;
2639 if ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
2640 (pic_param->frame_height_src != pic_param->frame_height_dst)) {
2641 input_surface = encode_state->input_yuv_object;
2642 obj_surface = encode_state->reconstructed_object;
2643 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2644 output_surface = vp9_priv_surface->dys_surface_obj;
2646 memset(&dys_kernel_param, 0, sizeof(dys_kernel_param));
2647 dys_kernel_param.input_width = pic_param->frame_width_src;
2648 dys_kernel_param.input_height = pic_param->frame_height_src;
2649 dys_kernel_param.input_surface = input_surface;
2650 dys_kernel_param.output_width = pic_param->frame_width_dst;
2651 dys_kernel_param.output_height = pic_param->frame_height_dst;
2652 dys_kernel_param.output_surface = output_surface;
2653 gen9_vp9_dys_kernel(ctx, encode_state,
2658 if ((vp9_state->dys_ref_frame_flag & VP9_LAST_REF) &&
2659 vp9_state->last_ref_obj) {
2660 obj_surface = vp9_state->last_ref_obj;
2661 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2663 input_surface = obj_surface;
2664 output_surface = vp9_priv_surface->dys_surface_obj;
2666 dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2667 dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2668 dys_kernel_param.input_surface = input_surface;
2670 dys_kernel_param.output_width = pic_param->frame_width_dst;
2671 dys_kernel_param.output_height = pic_param->frame_height_dst;
2672 dys_kernel_param.output_surface = output_surface;
2674 gen9_vp9_dys_kernel(ctx, encode_state,
2678 if (vp9_state->hme_enabled) {
2679 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2680 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2681 dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2683 dys_kernel_param.output_width = vp9_state->frame_width_4x;
2684 dys_kernel_param.output_height = vp9_state->frame_height_4x;
2685 dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2687 gen9_vp9_dys_kernel(ctx, encode_state,
2691 /* Does it really need to do the 16x HME if the
2692 * resolution is different?
2693 * Maybe it should be restricted
2695 if (vp9_state->b16xme_enabled) {
2696 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2697 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2698 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2700 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2701 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2702 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2704 gen9_vp9_dys_kernel(ctx, encode_state,
2711 if ((vp9_state->dys_ref_frame_flag & VP9_GOLDEN_REF) &&
2712 vp9_state->golden_ref_obj) {
2713 obj_surface = vp9_state->golden_ref_obj;
2714 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2716 input_surface = obj_surface;
2717 output_surface = vp9_priv_surface->dys_surface_obj;
2719 dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2720 dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2721 dys_kernel_param.input_surface = input_surface;
2723 dys_kernel_param.output_width = pic_param->frame_width_dst;
2724 dys_kernel_param.output_height = pic_param->frame_height_dst;
2725 dys_kernel_param.output_surface = output_surface;
2727 gen9_vp9_dys_kernel(ctx, encode_state,
2731 if (vp9_state->hme_enabled) {
2732 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2733 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2734 dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2736 dys_kernel_param.output_width = vp9_state->frame_width_4x;
2737 dys_kernel_param.output_height = vp9_state->frame_height_4x;
2738 dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2740 gen9_vp9_dys_kernel(ctx, encode_state,
2744 /* Does it really need to do the 16x HME if the
2745 * resolution is different?
2746 * Maybe it should be restricted
2748 if (vp9_state->b16xme_enabled) {
2749 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2750 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2751 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2753 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2754 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2755 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2757 gen9_vp9_dys_kernel(ctx, encode_state,
2764 if ((vp9_state->dys_ref_frame_flag & VP9_ALT_REF) &&
2765 vp9_state->alt_ref_obj) {
2766 obj_surface = vp9_state->alt_ref_obj;
2767 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2769 input_surface = obj_surface;
2770 output_surface = vp9_priv_surface->dys_surface_obj;
2772 dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2773 dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2774 dys_kernel_param.input_surface = input_surface;
2776 dys_kernel_param.output_width = pic_param->frame_width_dst;
2777 dys_kernel_param.output_height = pic_param->frame_height_dst;
2778 dys_kernel_param.output_surface = output_surface;
2780 gen9_vp9_dys_kernel(ctx, encode_state,
2784 if (vp9_state->hme_enabled) {
2785 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2786 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2787 dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2789 dys_kernel_param.output_width = vp9_state->frame_width_4x;
2790 dys_kernel_param.output_height = vp9_state->frame_height_4x;
2791 dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2793 gen9_vp9_dys_kernel(ctx, encode_state,
2797 /* Does it really need to do the 16x HME if the
2798 * resolution is different?
2799 * Maybe it should be restricted
2801 if (vp9_state->b16xme_enabled) {
2802 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2803 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2804 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2806 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2807 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2808 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2810 gen9_vp9_dys_kernel(ctx, encode_state,
2817 return VA_STATUS_SUCCESS;
2821 gen9_vp9_set_curbe_mbenc(VADriverContextP ctx,
2822 struct encode_state *encode_state,
2823 struct i965_gpe_context *gpe_context,
2824 struct intel_encoder_context *encoder_context,
2825 struct gen9_vp9_mbenc_curbe_param *curbe_param)
2827 struct gen9_vp9_state *vp9_state;
2828 VAEncMiscParameterTypeVP9PerSegmantParam *seg_param, tmp_seg_param;
2829 vp9_mbenc_curbe_data *curbe_cmd;
2830 VAEncPictureParameterBufferVP9 *pic_param;
2831 int i, segment_count;
2833 struct object_surface *obj_surface;
2834 struct gen9_surface_vp9 *vp9_priv_surface;
2836 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2838 if (!vp9_state || !vp9_state->pic_param)
2841 pic_param = curbe_param->ppic_param;
2842 seg_param = curbe_param->psegment_param;
2845 memset(&tmp_seg_param, 0, sizeof(tmp_seg_param));
2846 seg_param = &tmp_seg_param;
2849 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2854 memset(curbe_cmd, 0, sizeof(vp9_mbenc_curbe_data));
2856 if (vp9_state->dys_in_use) {
2857 curbe_cmd->dw0.frame_width = pic_param->frame_width_dst;
2858 curbe_cmd->dw0.frame_height = pic_param->frame_height_dst;
2860 curbe_cmd->dw0.frame_width = pic_param->frame_width_src;
2861 curbe_cmd->dw0.frame_height = pic_param->frame_height_src;
2864 curbe_cmd->dw1.frame_type = curbe_param->picture_coding_type;
2866 curbe_cmd->dw1.segmentation_enable = pic_param->pic_flags.bits.segmentation_enabled;
2867 if (pic_param->pic_flags.bits.segmentation_enabled)
2872 curbe_cmd->dw1.ref_frame_flags = curbe_param->ref_frame_flag;
2874 //right now set them to normal settings
2875 if (curbe_param->picture_coding_type) {
2876 switch (vp9_state->target_usage) {
2877 case INTEL_ENC_VP9_TU_QUALITY:
2878 curbe_cmd->dw1.min_16for32_check = 0x00;
2879 curbe_cmd->dw2.multi_pred = 0x02;
2880 curbe_cmd->dw2.len_sp = 0x39;
2881 curbe_cmd->dw2.search_x = 0x30;
2882 curbe_cmd->dw2.search_y = 0x28;
2883 curbe_cmd->dw3.min_ref_for32_check = 0x01;
2884 curbe_cmd->dw4.skip16_threshold = 0x000A;
2885 curbe_cmd->dw4.disable_mr_threshold = 0x000C;
2887 memcpy(&curbe_cmd->dw16,
2888 vp9_diamond_ime_search_path_delta,
2889 14 * sizeof(unsigned int));
2891 case INTEL_ENC_VP9_TU_PERFORMANCE:
2892 curbe_cmd->dw1.min_16for32_check = 0x02;
2893 curbe_cmd->dw2.multi_pred = 0x00;
2894 curbe_cmd->dw2.len_sp = 0x10;
2895 curbe_cmd->dw2.search_x = 0x20;
2896 curbe_cmd->dw2.search_y = 0x20;
2897 curbe_cmd->dw3.min_ref_for32_check = 0x03;
2898 curbe_cmd->dw4.skip16_threshold = 0x0014;
2899 curbe_cmd->dw4.disable_mr_threshold = 0x0016;
2901 memcpy(&curbe_cmd->dw16,
2902 vp9_fullspiral_ime_search_path_delta,
2903 14 * sizeof(unsigned int));
2906 default: // normal settings
2907 curbe_cmd->dw1.min_16for32_check = 0x01;
2908 curbe_cmd->dw2.multi_pred = 0x00;
2909 curbe_cmd->dw2.len_sp = 0x19;
2910 curbe_cmd->dw2.search_x = 0x30;
2911 curbe_cmd->dw2.search_y = 0x28;
2912 curbe_cmd->dw3.min_ref_for32_check = 0x02;
2913 curbe_cmd->dw4.skip16_threshold = 0x000F;
2914 curbe_cmd->dw4.disable_mr_threshold = 0x0011;
2916 memcpy(&curbe_cmd->dw16,
2917 vp9_diamond_ime_search_path_delta,
2918 14 * sizeof(unsigned int));
2922 curbe_cmd->dw3.hme_enabled = curbe_param->hme_enabled;
2923 curbe_cmd->dw3.multi_ref_qp_check = curbe_param->multi_ref_qp_check;
2924 // co-located predictor must be disabled when dynamic scaling is enabled
2925 curbe_cmd->dw3.disable_temp_pred = vp9_state->dys_in_use;
2928 curbe_cmd->dw5.inter_round = 0;
2929 curbe_cmd->dw5.intra_round = 4;
2930 curbe_cmd->dw5.frame_qpindex = pic_param->luma_ac_qindex;
2932 for (i = 0; i < segment_count; i++) {
2933 seg_qindex = pic_param->luma_ac_qindex + pic_param->luma_dc_qindex_delta
2934 + seg_param->seg_data[i].segment_qindex_delta;
2936 seg_qindex = CLAMP(0, 255, seg_qindex);
2938 if (curbe_param->picture_coding_type)
2939 memcpy(&curbe_cmd->segments[i],
2940 &intel_vp9_costlut_p[seg_qindex * 16],
2941 16 * sizeof(unsigned int));
2943 memcpy(&curbe_cmd->segments[i],
2944 &intel_vp9_costlut_key[seg_qindex * 16],
2945 16 * sizeof(unsigned int));
2948 if (curbe_param->picture_coding_type) {
2949 if (curbe_cmd->dw3.multi_ref_qp_check) {
2950 if (curbe_param->ref_frame_flag & 0x01) {
2951 obj_surface = curbe_param->last_ref_obj;
2952 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2953 curbe_cmd->dw8.last_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
2956 if (curbe_param->ref_frame_flag & 0x02) {
2957 obj_surface = curbe_param->golden_ref_obj;
2958 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2959 curbe_cmd->dw8.golden_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
2962 if (curbe_param->ref_frame_flag & 0x04) {
2963 obj_surface = curbe_param->alt_ref_obj;
2964 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2965 curbe_cmd->dw9.alt_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
2969 curbe_cmd->dw160.enc_curr_y_surf_bti = VP9_BTI_MBENC_CURR_Y_G9;
2970 curbe_cmd->dw162.enc_curr_nv12_surf_bti = VP9_BTI_MBENC_CURR_NV12_G9;
2971 curbe_cmd->dw166.segmentation_map_bti = VP9_BTI_MBENC_SEGMENTATION_MAP_G9;
2972 curbe_cmd->dw172.mode_decision_bti = VP9_BTI_MBENC_MODE_DECISION_G9;
2973 curbe_cmd->dw167.tx_curbe_bti = VP9_BTI_MBENC_TX_CURBE_G9;
2974 curbe_cmd->dw168.hme_mvdata_bti = VP9_BTI_MBENC_HME_MV_DATA_G9;
2975 curbe_cmd->dw169.hme_distortion_bti = VP9_BTI_MBENC_HME_DISTORTION_G9;
2976 curbe_cmd->dw171.mode_decision_prev_bti = VP9_BTI_MBENC_MODE_DECISION_PREV_G9;
2977 curbe_cmd->dw172.mode_decision_bti = VP9_BTI_MBENC_MODE_DECISION_G9;
2978 curbe_cmd->dw173.output_16x16_inter_modes_bti = VP9_BTI_MBENC_OUT_16x16_INTER_MODES_G9;
2979 curbe_cmd->dw174.cu_record_bti = VP9_BTI_MBENC_CU_RECORDS_G9;
2980 curbe_cmd->dw175.pak_data_bti = VP9_BTI_MBENC_PAK_DATA_G9;
2982 i965_gpe_context_unmap_curbe(gpe_context);
2987 gen9_vp9_send_mbenc_surface(VADriverContextP ctx,
2988 struct encode_state *encode_state,
2989 struct i965_gpe_context *gpe_context,
2990 struct intel_encoder_context *encoder_context,
2991 struct gen9_vp9_mbenc_surface_param *mbenc_param)
2993 struct gen9_vp9_state *vp9_state;
2994 unsigned int res_size;
2995 unsigned int frame_width_in_sb, frame_height_in_sb;
2996 struct object_surface *obj_surface, *tmp_input;
2997 struct gen9_surface_vp9 *vp9_priv_surface;
3000 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3002 if (!vp9_state || !vp9_state->pic_param)
3005 frame_width_in_sb = ALIGN(mbenc_param->frame_width, 64) / 64;
3006 frame_height_in_sb = ALIGN(mbenc_param->frame_height, 64) / 64;
3007 media_function = mbenc_param->media_state_type;
3009 switch (media_function) {
3010 case VP9_MEDIA_STATE_MBENC_I_32x32: {
3011 obj_surface = mbenc_param->curr_frame_obj;
3013 i965_add_2d_gpe_surface(ctx,
3018 I965_SURFACEFORMAT_R8_UNORM,
3019 VP9_BTI_MBENC_CURR_Y_G9);
3021 i965_add_2d_gpe_surface(ctx,
3026 I965_SURFACEFORMAT_R16_UINT,
3027 VP9_BTI_MBENC_CURR_UV_G9);
3030 if (mbenc_param->segmentation_enabled) {
3031 i965_add_buffer_2d_gpe_surface(ctx,
3033 mbenc_param->pres_segmentation_map,
3035 I965_SURFACEFORMAT_R8_UNORM,
3036 VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3040 res_size = 16 * mbenc_param->frame_width_in_mb *
3041 mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3042 i965_add_buffer_gpe_surface(ctx,
3044 mbenc_param->pres_mode_decision,
3048 VP9_BTI_MBENC_MODE_DECISION_G9);
3052 case VP9_MEDIA_STATE_MBENC_I_16x16: {
3053 obj_surface = mbenc_param->curr_frame_obj;
3055 i965_add_2d_gpe_surface(ctx,
3060 I965_SURFACEFORMAT_R8_UNORM,
3061 VP9_BTI_MBENC_CURR_Y_G9);
3063 i965_add_2d_gpe_surface(ctx,
3068 I965_SURFACEFORMAT_R16_UINT,
3069 VP9_BTI_MBENC_CURR_UV_G9);
3071 i965_add_adv_gpe_surface(ctx, gpe_context,
3073 VP9_BTI_MBENC_CURR_NV12_G9);
3075 if (mbenc_param->segmentation_enabled) {
3076 i965_add_buffer_2d_gpe_surface(ctx,
3078 mbenc_param->pres_segmentation_map,
3080 I965_SURFACEFORMAT_R8_UNORM,
3081 VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3085 res_size = 16 * mbenc_param->frame_width_in_mb *
3086 mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3087 i965_add_buffer_gpe_surface(ctx,
3089 mbenc_param->pres_mode_decision,
3093 VP9_BTI_MBENC_MODE_DECISION_G9);
3097 gen9_add_dri_buffer_gpe_surface(ctx,
3099 mbenc_param->gpe_context_tx->curbe.bo,
3101 ALIGN(res_size, 64),
3102 mbenc_param->gpe_context_tx->curbe.offset,
3103 VP9_BTI_MBENC_TX_CURBE_G9);
3107 case VP9_MEDIA_STATE_MBENC_P: {
3108 obj_surface = mbenc_param->curr_frame_obj;
3110 i965_add_2d_gpe_surface(ctx,
3115 I965_SURFACEFORMAT_R8_UNORM,
3116 VP9_BTI_MBENC_CURR_Y_G9);
3118 i965_add_2d_gpe_surface(ctx, gpe_context,
3122 I965_SURFACEFORMAT_R16_UINT,
3123 VP9_BTI_MBENC_CURR_UV_G9);
3125 i965_add_adv_gpe_surface(ctx, gpe_context,
3127 VP9_BTI_MBENC_CURR_NV12_G9);
3129 if (mbenc_param->last_ref_obj) {
3130 obj_surface = mbenc_param->last_ref_obj;
3131 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3133 if (vp9_state->dys_in_use &&
3134 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3135 (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3136 tmp_input = vp9_priv_surface->dys_surface_obj;
3138 tmp_input = obj_surface;
3140 i965_add_adv_gpe_surface(ctx, gpe_context,
3142 VP9_BTI_MBENC_LAST_NV12_G9);
3144 i965_add_adv_gpe_surface(ctx, gpe_context,
3146 VP9_BTI_MBENC_LAST_NV12_G9 + 1);
3150 if (mbenc_param->golden_ref_obj) {
3151 obj_surface = mbenc_param->golden_ref_obj;
3152 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3154 if (vp9_state->dys_in_use &&
3155 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3156 (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3157 tmp_input = vp9_priv_surface->dys_surface_obj;
3159 tmp_input = obj_surface;
3161 i965_add_adv_gpe_surface(ctx, gpe_context,
3163 VP9_BTI_MBENC_GOLD_NV12_G9);
3165 i965_add_adv_gpe_surface(ctx, gpe_context,
3167 VP9_BTI_MBENC_GOLD_NV12_G9 + 1);
3171 if (mbenc_param->alt_ref_obj) {
3172 obj_surface = mbenc_param->alt_ref_obj;
3173 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3175 if (vp9_state->dys_in_use &&
3176 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3177 (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3178 tmp_input = vp9_priv_surface->dys_surface_obj;
3180 tmp_input = obj_surface;
3182 i965_add_adv_gpe_surface(ctx, gpe_context,
3184 VP9_BTI_MBENC_ALTREF_NV12_G9);
3186 i965_add_adv_gpe_surface(ctx, gpe_context,
3188 VP9_BTI_MBENC_ALTREF_NV12_G9 + 1);
3192 if (mbenc_param->hme_enabled) {
3193 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
3194 mbenc_param->ps4x_memv_data_buffer,
3196 I965_SURFACEFORMAT_R8_UNORM,
3197 VP9_BTI_MBENC_HME_MV_DATA_G9);
3199 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
3200 mbenc_param->ps4x_memv_distortion_buffer,
3202 I965_SURFACEFORMAT_R8_UNORM,
3203 VP9_BTI_MBENC_HME_DISTORTION_G9);
3206 if (mbenc_param->segmentation_enabled) {
3207 i965_add_buffer_2d_gpe_surface(ctx,
3209 mbenc_param->pres_segmentation_map,
3211 I965_SURFACEFORMAT_R8_UNORM,
3212 VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3216 res_size = 16 * mbenc_param->frame_width_in_mb *
3217 mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3218 i965_add_buffer_gpe_surface(ctx,
3220 mbenc_param->pres_mode_decision_prev,
3224 VP9_BTI_MBENC_MODE_DECISION_PREV_G9);
3226 i965_add_buffer_gpe_surface(ctx,
3228 mbenc_param->pres_mode_decision,
3232 VP9_BTI_MBENC_MODE_DECISION_G9);
3234 i965_add_buffer_2d_gpe_surface(ctx,
3236 mbenc_param->pres_output_16x16_inter_modes,
3238 I965_SURFACEFORMAT_R8_UNORM,
3239 VP9_BTI_MBENC_OUT_16x16_INTER_MODES_G9);
3243 gen9_add_dri_buffer_gpe_surface(ctx,
3245 mbenc_param->gpe_context_tx->curbe.bo,
3247 ALIGN(res_size, 64),
3248 mbenc_param->gpe_context_tx->curbe.offset,
3249 VP9_BTI_MBENC_TX_CURBE_G9);
3254 case VP9_MEDIA_STATE_MBENC_TX: {
3255 obj_surface = mbenc_param->curr_frame_obj;
3257 i965_add_2d_gpe_surface(ctx,
3262 I965_SURFACEFORMAT_R8_UNORM,
3263 VP9_BTI_MBENC_CURR_Y_G9);
3265 i965_add_2d_gpe_surface(ctx,
3270 I965_SURFACEFORMAT_R16_UINT,
3271 VP9_BTI_MBENC_CURR_UV_G9);
3273 if (mbenc_param->segmentation_enabled) {
3274 i965_add_buffer_2d_gpe_surface(ctx,
3276 mbenc_param->pres_segmentation_map,
3278 I965_SURFACEFORMAT_R8_UNORM,
3279 VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3283 res_size = 16 * mbenc_param->frame_width_in_mb *
3284 mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3285 i965_add_buffer_gpe_surface(ctx,
3287 mbenc_param->pres_mode_decision,
3291 VP9_BTI_MBENC_MODE_DECISION_G9);
3293 res_size = frame_width_in_sb * frame_height_in_sb * 4 * sizeof(unsigned int);
3294 i965_add_buffer_gpe_surface(ctx,
3296 mbenc_param->pres_mb_code_surface,
3300 VP9_BTI_MBENC_PAK_DATA_G9);
3303 res_size = frame_width_in_sb * frame_height_in_sb *
3304 64 * 16 * sizeof(unsigned int);
3306 i965_add_buffer_gpe_surface(ctx,
3308 mbenc_param->pres_mb_code_surface,
3311 mbenc_param->mb_data_offset,
3312 VP9_BTI_MBENC_CU_RECORDS_G9);
3322 gen9_vp9_mbenc_kernel(VADriverContextP ctx,
3323 struct encode_state *encode_state,
3324 struct intel_encoder_context *encoder_context,
3327 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
3328 struct i965_gpe_context *gpe_context, *tx_gpe_context;
3329 struct gpe_media_object_walker_parameter media_object_walker_param;
3330 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
3331 unsigned int resolution_x, resolution_y;
3332 struct gen9_vp9_state *vp9_state;
3333 VAEncPictureParameterBufferVP9 *pic_param;
3334 struct gen9_vp9_mbenc_curbe_param curbe_param;
3335 struct gen9_vp9_mbenc_surface_param surface_param;
3336 VAStatus va_status = VA_STATUS_SUCCESS;
3337 int mbenc_gpe_index = 0;
3338 struct object_surface *obj_surface;
3339 struct gen9_surface_vp9 *vp9_priv_surface;
3341 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3343 if (!vp9_state || !vp9_state->pic_param)
3344 return VA_STATUS_ERROR_ENCODING_ERROR;
3346 pic_param = vp9_state->pic_param;
3348 switch (media_function) {
3349 case VP9_MEDIA_STATE_MBENC_I_32x32:
3350 mbenc_gpe_index = VP9_MBENC_IDX_KEY_32x32;
3353 case VP9_MEDIA_STATE_MBENC_I_16x16:
3354 mbenc_gpe_index = VP9_MBENC_IDX_KEY_16x16;
3357 case VP9_MEDIA_STATE_MBENC_P:
3358 mbenc_gpe_index = VP9_MBENC_IDX_INTER;
3361 case VP9_MEDIA_STATE_MBENC_TX:
3362 mbenc_gpe_index = VP9_MBENC_IDX_TX;
3366 va_status = VA_STATUS_ERROR_OPERATION_FAILED;
3370 gpe_context = &(vme_context->mbenc_context.gpe_contexts[mbenc_gpe_index]);
3371 tx_gpe_context = &(vme_context->mbenc_context.gpe_contexts[VP9_MBENC_IDX_TX]);
3373 gen9_gpe_reset_binding_table(ctx, gpe_context);
3376 if (!vp9_state->mbenc_curbe_set_in_brc_update) {
3377 if (media_function == VP9_MEDIA_STATE_MBENC_I_32x32 ||
3378 media_function == VP9_MEDIA_STATE_MBENC_P) {
3379 memset(&curbe_param, 0, sizeof(curbe_param));
3380 curbe_param.ppic_param = vp9_state->pic_param;
3381 curbe_param.pseq_param = vp9_state->seq_param;
3382 curbe_param.psegment_param = vp9_state->segment_param;
3383 curbe_param.frame_width_in_mb = vp9_state->frame_width_in_mb;
3384 curbe_param.frame_height_in_mb = vp9_state->frame_height_in_mb;
3385 curbe_param.last_ref_obj = vp9_state->last_ref_obj;
3386 curbe_param.golden_ref_obj = vp9_state->golden_ref_obj;
3387 curbe_param.alt_ref_obj = vp9_state->alt_ref_obj;
3388 curbe_param.hme_enabled = vp9_state->hme_enabled;
3389 curbe_param.ref_frame_flag = vp9_state->ref_frame_flag;
3390 curbe_param.picture_coding_type = vp9_state->picture_coding_type;
3391 curbe_param.media_state_type = media_function;
3392 curbe_param.mbenc_curbe_set_in_brc_update = vp9_state->mbenc_curbe_set_in_brc_update;
3394 vme_context->pfn_set_curbe_mbenc(ctx,
3402 memset(&surface_param, 0, sizeof(surface_param));
3403 surface_param.media_state_type = media_function;
3404 surface_param.picture_coding_type = vp9_state->picture_coding_type;
3405 surface_param.frame_width = vp9_state->frame_width;
3406 surface_param.frame_height = vp9_state->frame_height;
3407 surface_param.frame_width_in_mb = vp9_state->frame_width_in_mb;
3408 surface_param.frame_height_in_mb = vp9_state->frame_height_in_mb;
3409 surface_param.hme_enabled = vp9_state->hme_enabled;
3410 surface_param.segmentation_enabled = pic_param->pic_flags.bits.segmentation_enabled;
3411 surface_param.pres_segmentation_map = &vme_context->mb_segment_map_surface;
3412 surface_param.ps4x_memv_data_buffer = &vme_context->s4x_memv_data_buffer;
3413 surface_param.ps4x_memv_distortion_buffer = &vme_context->s4x_memv_distortion_buffer;
3414 surface_param.pres_mode_decision =
3415 &vme_context->res_mode_decision[vp9_state->curr_mode_decision_index];
3416 surface_param.pres_mode_decision_prev =
3417 &vme_context->res_mode_decision[!vp9_state->curr_mode_decision_index];
3418 surface_param.pres_output_16x16_inter_modes = &vme_context->res_output_16x16_inter_modes;
3419 surface_param.pres_mbenc_curbe_buffer = NULL;
3420 surface_param.last_ref_obj = vp9_state->last_ref_obj;
3421 surface_param.golden_ref_obj = vp9_state->golden_ref_obj;
3422 surface_param.alt_ref_obj = vp9_state->alt_ref_obj;
3423 surface_param.pres_mb_code_surface = &vme_context->res_mb_code_surface;
3424 surface_param.gpe_context_tx = tx_gpe_context;
3425 surface_param.mb_data_offset = vp9_state->mb_data_offset;
3427 obj_surface = encode_state->reconstructed_object;
3428 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3429 if (vp9_state->dys_in_use &&
3430 (pic_param->frame_width_src != pic_param->frame_height_dst ||
3431 pic_param->frame_height_src != pic_param->frame_height_dst)) {
3432 obj_surface = vp9_priv_surface->dys_surface_obj;
3434 obj_surface = encode_state->input_yuv_object;
3436 surface_param.curr_frame_obj = obj_surface;
3438 vme_context->pfn_send_mbenc_surface(ctx,
3444 if (media_function == VP9_MEDIA_STATE_MBENC_I_32x32) {
3445 resolution_x = ALIGN(vp9_state->frame_width, 32) / 32;
3446 resolution_y = ALIGN(vp9_state->frame_height, 32) / 32;
3448 resolution_x = ALIGN(vp9_state->frame_width, 16) / 16;
3449 resolution_y = ALIGN(vp9_state->frame_height, 16) / 16;
3452 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
3453 kernel_walker_param.resolution_x = resolution_x;
3454 kernel_walker_param.resolution_y = resolution_y;
3456 if (media_function == VP9_MEDIA_STATE_MBENC_P ||
3457 media_function == VP9_MEDIA_STATE_MBENC_I_16x16) {
3458 kernel_walker_param.use_scoreboard = 1;
3459 kernel_walker_param.no_dependency = 0;
3460 kernel_walker_param.walker_degree = VP9_45Z_DEGREE;
3462 kernel_walker_param.use_scoreboard = 0;
3463 kernel_walker_param.no_dependency = 1;
3466 gen8_gpe_setup_interface_data(ctx, gpe_context);
3468 gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
3470 gen9_run_kernel_media_object_walker(ctx, encoder_context,
3473 &media_object_walker_param);
3478 gen9_init_gpe_context_vp9(VADriverContextP ctx,
3479 struct i965_gpe_context *gpe_context,
3480 struct vp9_encoder_kernel_parameter *kernel_param)
3482 struct i965_driver_data *i965 = i965_driver_data(ctx);
3484 gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
3486 gpe_context->sampler.entry_size = 0;
3487 gpe_context->sampler.max_entries = 0;
3489 if (kernel_param->sampler_size) {
3490 gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
3491 gpe_context->sampler.max_entries = 1;
3494 gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
3495 gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
3497 gpe_context->surface_state_binding_table.max_entries = MAX_VP9_ENCODER_SURFACES;
3498 gpe_context->surface_state_binding_table.binding_table_offset = 0;
3499 gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64);
3500 gpe_context->surface_state_binding_table.length = ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_VP9_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
3502 if (i965->intel.eu_total > 0)
3503 gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
3505 gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
3507 gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
3508 gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
3509 gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
3510 gpe_context->vfe_state.curbe_allocation_size -
3511 ((gpe_context->idrt.entry_size >> 5) *
3512 gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
3513 gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
3514 gpe_context->vfe_state.gpgpu_mode = 0;
3518 gen9_init_vfe_scoreboard_vp9(struct i965_gpe_context *gpe_context,
3519 struct vp9_encoder_scoreboard_parameter *scoreboard_param)
3521 gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
3522 gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
3523 gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
3525 if (scoreboard_param->walkpat_flag) {
3526 gpe_context->vfe_desc5.scoreboard0.mask = 0x0F;
3527 gpe_context->vfe_desc5.scoreboard0.type = 1;
3529 gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0;
3530 gpe_context->vfe_desc6.scoreboard1.delta_y0 = -1;
3532 gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0;
3533 gpe_context->vfe_desc6.scoreboard1.delta_y1 = -2;
3535 gpe_context->vfe_desc6.scoreboard1.delta_x2 = -1;
3536 gpe_context->vfe_desc6.scoreboard1.delta_y2 = 3;
3538 gpe_context->vfe_desc6.scoreboard1.delta_x3 = -1;
3539 gpe_context->vfe_desc6.scoreboard1.delta_y3 = 1;
3542 gpe_context->vfe_desc6.scoreboard1.delta_x0 = -1;
3543 gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0;
3546 gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0;
3547 gpe_context->vfe_desc6.scoreboard1.delta_y1 = -1;
3550 gpe_context->vfe_desc6.scoreboard1.delta_x2 = 1;
3551 gpe_context->vfe_desc6.scoreboard1.delta_y2 = -1;
3554 gpe_context->vfe_desc6.scoreboard1.delta_x3 = -1;
3555 gpe_context->vfe_desc6.scoreboard1.delta_y3 = -1;
3558 gpe_context->vfe_desc7.scoreboard2.delta_x4 = -1;
3559 gpe_context->vfe_desc7.scoreboard2.delta_y4 = 1;
3562 gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0;
3563 gpe_context->vfe_desc7.scoreboard2.delta_y5 = -2;
3566 gpe_context->vfe_desc7.scoreboard2.delta_x6 = 1;
3567 gpe_context->vfe_desc7.scoreboard2.delta_y6 = -2;
3570 gpe_context->vfe_desc7.scoreboard2.delta_x6 = -1;
3571 gpe_context->vfe_desc7.scoreboard2.delta_y6 = -2;
3575 #define VP9_VME_REF_WIN 48
3578 gen9_encode_vp9_check_parameter(VADriverContextP ctx,
3579 struct encode_state *encode_state,
3580 struct intel_encoder_context *encoder_context)
3582 struct i965_driver_data *i965 = i965_driver_data(ctx);
3583 struct gen9_vp9_state *vp9_state;
3584 VAEncPictureParameterBufferVP9 *pic_param;
3585 VAEncMiscParameterTypeVP9PerSegmantParam *seg_param;
3586 VAEncSequenceParameterBufferVP9 *seq_param;
3587 struct object_surface *obj_surface;
3588 struct object_buffer *obj_buffer;
3589 struct gen9_surface_vp9 *vp9_priv_surface;
3591 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3593 if (!encode_state->pic_param_ext ||
3594 !encode_state->pic_param_ext->buffer) {
3595 return VA_STATUS_ERROR_INVALID_PARAMETER;
3597 pic_param = (VAEncPictureParameterBufferVP9 *)encode_state->pic_param_ext->buffer;
3599 obj_buffer = BUFFER(pic_param->coded_buf);
3602 !obj_buffer->buffer_store ||
3603 !obj_buffer->buffer_store->bo)
3604 return VA_STATUS_ERROR_INVALID_PARAMETER;
3606 encode_state->coded_buf_object = obj_buffer;
3608 vp9_state->status_buffer.bo = obj_buffer->buffer_store->bo;
3610 encode_state->reconstructed_object = SURFACE(pic_param->reconstructed_frame);
3612 if (!encode_state->reconstructed_object ||
3613 !encode_state->input_yuv_object)
3614 return VA_STATUS_ERROR_INVALID_PARAMETER;
3616 vp9_state->curr_frame = pic_param->reconstructed_frame;
3617 vp9_state->ref_frame_flag = 0;
3618 if (pic_param->pic_flags.bits.frame_type == KEY_FRAME ||
3619 pic_param->pic_flags.bits.intra_only) {
3620 /* this will be regarded as I-frame type */
3621 vp9_state->picture_coding_type = 0;
3622 vp9_state->last_ref_obj = NULL;
3623 vp9_state->golden_ref_obj = NULL;
3624 vp9_state->alt_ref_obj = NULL;
3626 vp9_state->picture_coding_type = 1;
3627 vp9_state->ref_frame_flag = pic_param->ref_flags.bits.ref_frame_ctrl_l0 |
3628 pic_param->ref_flags.bits.ref_frame_ctrl_l1;
3630 obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx]);
3631 vp9_state->last_ref_obj = obj_surface;
3634 !obj_surface->private_data) {
3635 vp9_state->last_ref_obj = NULL;
3636 vp9_state->ref_frame_flag &= ~(VP9_LAST_REF);
3639 obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx]);
3640 vp9_state->golden_ref_obj = obj_surface;
3643 !obj_surface->private_data) {
3644 vp9_state->golden_ref_obj = NULL;
3645 vp9_state->ref_frame_flag &= ~(VP9_GOLDEN_REF);
3648 obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]);
3649 vp9_state->alt_ref_obj = obj_surface;
3652 !obj_surface->private_data) {
3653 vp9_state->alt_ref_obj = NULL;
3654 vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3657 /* remove the duplicated flag and ref frame list */
3658 if (vp9_state->ref_frame_flag & VP9_LAST_REF) {
3659 if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx] ==
3660 pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx]) {
3661 vp9_state->ref_frame_flag &= ~(VP9_GOLDEN_REF);
3662 vp9_state->golden_ref_obj = NULL;
3665 if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx] ==
3666 pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]) {
3667 vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3668 vp9_state->alt_ref_obj = NULL;
3672 if (vp9_state->ref_frame_flag & VP9_GOLDEN_REF) {
3673 if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx] ==
3674 pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]) {
3675 vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3676 vp9_state->alt_ref_obj = NULL;
3680 if (vp9_state->ref_frame_flag == 0)
3681 return VA_STATUS_ERROR_INVALID_PARAMETER;
3685 if (pic_param->pic_flags.bits.segmentation_enabled) {
3686 if (!encode_state->q_matrix ||
3687 !encode_state->q_matrix->buffer) {
3688 return VA_STATUS_ERROR_INVALID_PARAMETER;
3690 seg_param = (VAEncMiscParameterTypeVP9PerSegmantParam *)
3691 encode_state->q_matrix->buffer;
3695 if (encode_state->seq_param_ext &&
3696 encode_state->seq_param_ext->buffer)
3697 seq_param = (VAEncSequenceParameterBufferVP9 *)encode_state->seq_param_ext->buffer;
3700 seq_param = &vp9_state->bogus_seq_param;
3703 vp9_state->pic_param = pic_param;
3704 vp9_state->segment_param = seg_param;
3705 vp9_state->seq_param = seq_param;
3707 obj_surface = encode_state->reconstructed_object;
3708 if (pic_param->frame_width_dst > obj_surface->orig_width ||
3709 pic_param->frame_height_dst > obj_surface->orig_height)
3710 return VA_STATUS_ERROR_INVALID_SURFACE;
3712 if (!vp9_state->dys_enabled &&
3713 ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
3714 (pic_param->frame_height_src != pic_param->frame_height_dst)))
3715 return VA_STATUS_ERROR_UNIMPLEMENTED;
3717 if (vp9_state->brc_enabled) {
3718 if (vp9_state->first_frame || vp9_state->picture_coding_type == KEY_FRAME) {
3719 vp9_state->brc_reset = encoder_context->brc.need_reset || vp9_state->first_frame;
3721 if (!encoder_context->brc.framerate[0].num || !encoder_context->brc.framerate[0].den ||
3722 !encoder_context->brc.bits_per_second[0])
3723 return VA_STATUS_ERROR_INVALID_PARAMETER;
3725 vp9_state->gop_size = encoder_context->brc.gop_size;
3726 vp9_state->framerate = encoder_context->brc.framerate[0];
3727 if ((vp9_state->framerate.num / vp9_state->framerate.den) > MAX_VP9_ENCODER_FRAMERATE) {
3728 vp9_state->framerate.num = MAX_VP9_ENCODER_FRAMERATE * vp9_state->framerate.den;
3729 i965_log_info(ctx, "gen9_encode_vp9_check_parameter: Too high frame rate(num: %d, den: %d), max supported is %d fps.\n",
3730 vp9_state->framerate.num, vp9_state->framerate.den, MAX_VP9_ENCODER_FRAMERATE);
3733 if (encoder_context->rate_control_mode == VA_RC_CBR ||
3734 !encoder_context->brc.target_percentage[0]) {
3735 vp9_state->target_bit_rate = encoder_context->brc.bits_per_second[0];
3736 vp9_state->max_bit_rate = vp9_state->target_bit_rate;
3737 vp9_state->min_bit_rate = vp9_state->target_bit_rate;
3739 vp9_state->max_bit_rate = encoder_context->brc.bits_per_second[0];
3740 vp9_state->target_bit_rate = vp9_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
3741 if (2 * vp9_state->target_bit_rate < vp9_state->max_bit_rate)
3742 vp9_state->min_bit_rate = 0;
3744 vp9_state->min_bit_rate = 2 * vp9_state->target_bit_rate - vp9_state->max_bit_rate;
3747 if (encoder_context->brc.hrd_buffer_size)
3748 vp9_state->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;
3749 else if (encoder_context->brc.window_size)
3750 vp9_state->vbv_buffer_size_in_bit = (uint64_t)vp9_state->max_bit_rate * encoder_context->brc.window_size / 1000;
3752 vp9_state->vbv_buffer_size_in_bit = vp9_state->max_bit_rate;
3753 if (encoder_context->brc.hrd_initial_buffer_fullness)
3754 vp9_state->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;
3756 vp9_state->init_vbv_buffer_fullness_in_bit = vp9_state->vbv_buffer_size_in_bit / 2;
3760 vp9_state->frame_width = pic_param->frame_width_dst;
3761 vp9_state->frame_height = pic_param->frame_height_dst;
3763 vp9_state->frame_width_4x = ALIGN(vp9_state->frame_width / 4, 16);
3764 vp9_state->frame_height_4x = ALIGN(vp9_state->frame_height / 4, 16);
3766 vp9_state->frame_width_16x = ALIGN(vp9_state->frame_width / 16, 16);
3767 vp9_state->frame_height_16x = ALIGN(vp9_state->frame_height / 16, 16);
3769 vp9_state->frame_width_in_mb = ALIGN(vp9_state->frame_width, 16) / 16;
3770 vp9_state->frame_height_in_mb = ALIGN(vp9_state->frame_height, 16) / 16;
3772 vp9_state->downscaled_width_4x_in_mb = vp9_state->frame_width_4x / 16;
3773 vp9_state->downscaled_height_4x_in_mb = vp9_state->frame_height_4x / 16;
3774 vp9_state->downscaled_width_16x_in_mb = vp9_state->frame_width_16x / 16;
3775 vp9_state->downscaled_height_16x_in_mb = vp9_state->frame_height_16x / 16;
3777 vp9_state->dys_in_use = 0;
3778 if (pic_param->frame_width_src != pic_param->frame_width_dst ||
3779 pic_param->frame_height_src != pic_param->frame_height_dst)
3780 vp9_state->dys_in_use = 1;
3781 vp9_state->dys_ref_frame_flag = 0;
3782 /* check the dys setting. The dys is supported by default. */
3783 if (pic_param->pic_flags.bits.frame_type != KEY_FRAME &&
3784 !pic_param->pic_flags.bits.intra_only) {
3785 vp9_state->dys_ref_frame_flag = vp9_state->ref_frame_flag;
3787 if ((vp9_state->ref_frame_flag & VP9_LAST_REF) &&
3788 vp9_state->last_ref_obj) {
3789 obj_surface = vp9_state->last_ref_obj;
3790 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3792 if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
3793 vp9_state->frame_height == vp9_priv_surface->frame_height)
3794 vp9_state->dys_ref_frame_flag &= ~(VP9_LAST_REF);
3796 if ((vp9_state->ref_frame_flag & VP9_GOLDEN_REF) &&
3797 vp9_state->golden_ref_obj) {
3798 obj_surface = vp9_state->golden_ref_obj;
3799 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3801 if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
3802 vp9_state->frame_height == vp9_priv_surface->frame_height)
3803 vp9_state->dys_ref_frame_flag &= ~(VP9_GOLDEN_REF);
3805 if ((vp9_state->ref_frame_flag & VP9_ALT_REF) &&
3806 vp9_state->alt_ref_obj) {
3807 obj_surface = vp9_state->alt_ref_obj;
3808 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3810 if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
3811 vp9_state->frame_height == vp9_priv_surface->frame_height)
3812 vp9_state->dys_ref_frame_flag &= ~(VP9_ALT_REF);
3814 if (vp9_state->dys_ref_frame_flag)
3815 vp9_state->dys_in_use = 1;
3818 if (vp9_state->hme_supported) {
3819 vp9_state->hme_enabled = 1;
3821 vp9_state->hme_enabled = 0;
3824 if (vp9_state->b16xme_supported) {
3825 vp9_state->b16xme_enabled = 1;
3827 vp9_state->b16xme_enabled = 0;
3830 /* disable HME/16xME if the size is too small */
3831 if (vp9_state->frame_width_4x <= VP9_VME_REF_WIN ||
3832 vp9_state->frame_height_4x <= VP9_VME_REF_WIN) {
3833 vp9_state->hme_enabled = 0;
3834 vp9_state->b16xme_enabled = 0;
3837 if (vp9_state->frame_width_16x < VP9_VME_REF_WIN ||
3838 vp9_state->frame_height_16x < VP9_VME_REF_WIN)
3839 vp9_state->b16xme_enabled = 0;
3841 if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME ||
3842 pic_param->pic_flags.bits.intra_only) {
3843 vp9_state->hme_enabled = 0;
3844 vp9_state->b16xme_enabled = 0;
3847 vp9_state->mbenc_keyframe_dist_enabled = 0;
3848 if ((vp9_state->picture_coding_type == KEY_FRAME) &&
3849 vp9_state->brc_distortion_buffer_supported)
3850 vp9_state->mbenc_keyframe_dist_enabled = 1;
3852 return VA_STATUS_SUCCESS;
3856 gen9_vme_gpe_kernel_prepare_vp9(VADriverContextP ctx,
3857 struct encode_state *encode_state,
3858 struct intel_encoder_context *encoder_context)
3860 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
3861 struct vp9_surface_param surface_param;
3862 struct gen9_vp9_state *vp9_state;
3863 VAEncPictureParameterBufferVP9 *pic_param;
3864 struct object_surface *obj_surface;
3865 struct gen9_surface_vp9 *vp9_surface;
3866 int driver_header_flag = 0;
3869 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3871 if (!vp9_state || !vp9_state->pic_param)
3872 return VA_STATUS_ERROR_INVALID_PARAMETER;
3874 pic_param = vp9_state->pic_param;
3876 /* this is to check whether the driver should generate the uncompressed header */
3877 driver_header_flag = 1;
3878 if (encode_state->packed_header_data_ext &&
3879 encode_state->packed_header_data_ext[0] &&
3880 pic_param->bit_offset_first_partition_size) {
3881 VAEncPackedHeaderParameterBuffer *param = NULL;
3883 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_params_ext[0]->buffer;
3885 if (param->type == VAEncPackedHeaderRawData) {
3887 unsigned int length_in_bits;
3889 header_data = (char *)encode_state->packed_header_data_ext[0]->buffer;
3890 length_in_bits = param->bit_length;
3891 driver_header_flag = 0;
3893 vp9_state->frame_header.bit_offset_first_partition_size =
3894 pic_param->bit_offset_first_partition_size;
3895 vp9_state->header_length = ALIGN(length_in_bits, 8) >> 3;
3896 vp9_state->alias_insert_data = header_data;
3898 vp9_state->frame_header.bit_offset_ref_lf_delta = pic_param->bit_offset_ref_lf_delta;
3899 vp9_state->frame_header.bit_offset_mode_lf_delta = pic_param->bit_offset_mode_lf_delta;
3900 vp9_state->frame_header.bit_offset_lf_level = pic_param->bit_offset_lf_level;
3901 vp9_state->frame_header.bit_offset_qindex = pic_param->bit_offset_qindex;
3902 vp9_state->frame_header.bit_offset_segmentation = pic_param->bit_offset_segmentation;
3903 vp9_state->frame_header.bit_size_segmentation = pic_param->bit_size_segmentation;
3907 if (driver_header_flag) {
3908 memset(&vp9_state->frame_header, 0, sizeof(vp9_state->frame_header));
3909 intel_write_uncompressed_header(encode_state,
3910 VAProfileVP9Profile0,
3911 vme_context->frame_header_data,
3912 &vp9_state->header_length,
3913 &vp9_state->frame_header);
3914 vp9_state->alias_insert_data = vme_context->frame_header_data;
3917 va_status = i965_check_alloc_surface_bo(ctx, encode_state->input_yuv_object,
3918 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
3919 if (va_status != VA_STATUS_SUCCESS)
3922 va_status = i965_check_alloc_surface_bo(ctx, encode_state->reconstructed_object,
3923 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
3925 if (va_status != VA_STATUS_SUCCESS)
3928 surface_param.frame_width = vp9_state->frame_width;
3929 surface_param.frame_height = vp9_state->frame_height;
3930 va_status = gen9_vp9_init_check_surfaces(ctx,
3931 encode_state->reconstructed_object,
3935 vp9_surface = (struct gen9_surface_vp9*)encode_state->reconstructed_object;
3937 vp9_surface->qp_value = pic_param->luma_ac_qindex + pic_param->luma_dc_qindex_delta;
3939 if (vp9_state->dys_in_use &&
3940 (pic_param->frame_width_src != pic_param->frame_width_dst ||
3941 pic_param->frame_height_src != pic_param->frame_height_dst)) {
3942 surface_param.frame_width = pic_param->frame_width_dst;
3943 surface_param.frame_height = pic_param->frame_height_dst;
3944 va_status = gen9_vp9_check_dys_surfaces(ctx,
3945 encode_state->reconstructed_object,
3952 if (vp9_state->dys_ref_frame_flag) {
3953 if ((vp9_state->dys_ref_frame_flag & VP9_LAST_REF) &&
3954 vp9_state->last_ref_obj) {
3955 obj_surface = vp9_state->last_ref_obj;
3956 surface_param.frame_width = vp9_state->frame_width;
3957 surface_param.frame_height = vp9_state->frame_height;
3958 va_status = gen9_vp9_check_dys_surfaces(ctx,
3965 if ((vp9_state->dys_ref_frame_flag & VP9_GOLDEN_REF) &&
3966 vp9_state->golden_ref_obj) {
3967 obj_surface = vp9_state->golden_ref_obj;
3968 surface_param.frame_width = vp9_state->frame_width;
3969 surface_param.frame_height = vp9_state->frame_height;
3970 va_status = gen9_vp9_check_dys_surfaces(ctx,
3977 if ((vp9_state->dys_ref_frame_flag & VP9_ALT_REF) &&
3978 vp9_state->alt_ref_obj) {
3979 obj_surface = vp9_state->alt_ref_obj;
3980 surface_param.frame_width = vp9_state->frame_width;
3981 surface_param.frame_height = vp9_state->frame_height;
3982 va_status = gen9_vp9_check_dys_surfaces(ctx,
3991 if (va_status != VA_STATUS_SUCCESS)
3993 /* check the corresponding ref_frame_flag && dys_ref_frame_flag */
3995 return VA_STATUS_SUCCESS;
3999 gen9_vme_gpe_kernel_init_vp9(VADriverContextP ctx,
4000 struct encode_state *encode_state,
4001 struct intel_encoder_context *encoder_context)
4003 struct i965_driver_data *i965 = i965_driver_data(ctx);
4004 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4005 struct vp9_mbenc_context *mbenc_context = &vme_context->mbenc_context;
4006 struct vp9_dys_context *dys_context = &vme_context->dys_context;
4007 struct gpe_dynamic_state_parameter ds_param;
4011 * BRC will update MBEnc curbe data buffer, so initialize GPE context for
4014 for (i = 0; i < NUM_VP9_MBENC; i++) {
4015 gen8_gpe_context_init(ctx, &mbenc_context->gpe_contexts[i]);
4019 * VP9_MBENC_XXX uses the same dynamic state buffer as they share the same
4022 ds_param.bo_size = ALIGN(sizeof(vp9_mbenc_curbe_data), 64) + 128 +
4023 ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * NUM_VP9_MBENC;
4024 mbenc_context->mbenc_bo_dys = dri_bo_alloc(i965->intel.bufmgr,
4028 mbenc_context->mbenc_bo_size = ds_param.bo_size;
4030 ds_param.bo = mbenc_context->mbenc_bo_dys;
4031 ds_param.curbe_offset = 0;
4032 ds_param.sampler_offset = ALIGN(sizeof(vp9_mbenc_curbe_data), 64);
4033 for (i = 0; i < NUM_VP9_MBENC; i++) {
4034 ds_param.idrt_offset = ds_param.sampler_offset + 128 +
4035 ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * i;
4037 gen8_gpe_context_set_dynamic_buffer(ctx,
4038 &mbenc_context->gpe_contexts[i],
4042 gen8_gpe_context_init(ctx, &dys_context->gpe_context);
4043 gen9_vp9_dys_set_sampler_state(&dys_context->gpe_context);
4045 return VA_STATUS_SUCCESS;
4049 gen9_vme_gpe_kernel_final_vp9(VADriverContextP ctx,
4050 struct encode_state *encode_state,
4051 struct intel_encoder_context *encoder_context)
4053 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4054 struct vp9_mbenc_context *mbenc_context = &vme_context->mbenc_context;
4056 dri_bo_unreference(mbenc_context->mbenc_bo_dys);
4057 mbenc_context->mbenc_bo_dys = NULL;
4059 return VA_STATUS_SUCCESS;
4063 gen9_vme_gpe_kernel_run_vp9(VADriverContextP ctx,
4064 struct encode_state *encode_state,
4065 struct intel_encoder_context *encoder_context)
4067 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4068 struct gen9_vp9_state *vp9_state;
4071 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
4073 if (!vp9_state || !vp9_state->pic_param)
4074 return VA_STATUS_ERROR_INVALID_PARAMETER;
4076 if (vp9_state->dys_in_use) {
4077 gen9_vp9_run_dys_refframes(ctx, encode_state, encoder_context);
4080 if (vp9_state->brc_enabled && (vp9_state->brc_reset || !vp9_state->brc_inited)) {
4081 gen9_vp9_brc_init_reset_kernel(ctx, encode_state, encoder_context);
4084 if (vp9_state->picture_coding_type == KEY_FRAME) {
4085 for (i = 0; i < 2; i++)
4086 i965_zero_gpe_resource(&vme_context->res_mode_decision[i]);
4089 if (vp9_state->hme_supported) {
4090 gen9_vp9_scaling_kernel(ctx, encode_state,
4093 if (vp9_state->b16xme_supported) {
4094 gen9_vp9_scaling_kernel(ctx, encode_state,
4100 if (vp9_state->picture_coding_type && vp9_state->hme_enabled) {
4101 if (vp9_state->b16xme_enabled)
4102 gen9_vp9_me_kernel(ctx, encode_state,
4106 gen9_vp9_me_kernel(ctx, encode_state,
4111 if (vp9_state->brc_enabled) {
4112 if (vp9_state->mbenc_keyframe_dist_enabled)
4113 gen9_vp9_brc_intra_dist_kernel(ctx,
4117 gen9_vp9_brc_update_kernel(ctx, encode_state,
4121 if (vp9_state->picture_coding_type == KEY_FRAME) {
4122 gen9_vp9_mbenc_kernel(ctx, encode_state,
4124 VP9_MEDIA_STATE_MBENC_I_32x32);
4125 gen9_vp9_mbenc_kernel(ctx, encode_state,
4127 VP9_MEDIA_STATE_MBENC_I_16x16);
4129 gen9_vp9_mbenc_kernel(ctx, encode_state,
4131 VP9_MEDIA_STATE_MBENC_P);
4134 gen9_vp9_mbenc_kernel(ctx, encode_state,
4136 VP9_MEDIA_STATE_MBENC_TX);
4138 vp9_state->curr_mode_decision_index ^= 1;
4139 if (vp9_state->brc_enabled) {
4140 vp9_state->brc_inited = 1;
4141 vp9_state->brc_reset = 0;
4144 return VA_STATUS_SUCCESS;
4148 gen9_vme_pipeline_vp9(VADriverContextP ctx,
4150 struct encode_state *encode_state,
4151 struct intel_encoder_context *encoder_context)
4154 struct gen9_vp9_state *vp9_state;
4156 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
4159 return VA_STATUS_ERROR_INVALID_CONTEXT;
4161 va_status = gen9_encode_vp9_check_parameter(ctx, encode_state, encoder_context);
4162 if (va_status != VA_STATUS_SUCCESS)
4165 va_status = gen9_vp9_allocate_resources(ctx, encode_state,
4167 !vp9_state->brc_allocated);
4169 if (va_status != VA_STATUS_SUCCESS)
4171 vp9_state->brc_allocated = 1;
4173 va_status = gen9_vme_gpe_kernel_prepare_vp9(ctx, encode_state, encoder_context);
4175 if (va_status != VA_STATUS_SUCCESS)
4178 va_status = gen9_vme_gpe_kernel_init_vp9(ctx, encode_state, encoder_context);
4179 if (va_status != VA_STATUS_SUCCESS)
4182 va_status = gen9_vme_gpe_kernel_run_vp9(ctx, encode_state, encoder_context);
4183 if (va_status != VA_STATUS_SUCCESS)
4186 gen9_vme_gpe_kernel_final_vp9(ctx, encode_state, encoder_context);
4188 return VA_STATUS_SUCCESS;
4192 gen9_vme_brc_context_destroy_vp9(struct vp9_brc_context *brc_context)
4196 for (i = 0; i < NUM_VP9_BRC; i++)
4197 gen8_gpe_context_destroy(&brc_context->gpe_contexts[i]);
4201 gen9_vme_scaling_context_destroy_vp9(struct vp9_scaling_context *scaling_context)
4205 for (i = 0; i < NUM_VP9_SCALING; i++)
4206 gen8_gpe_context_destroy(&scaling_context->gpe_contexts[i]);
4210 gen9_vme_me_context_destroy_vp9(struct vp9_me_context *me_context)
4212 gen8_gpe_context_destroy(&me_context->gpe_context);
4216 gen9_vme_mbenc_context_destroy_vp9(struct vp9_mbenc_context *mbenc_context)
4220 for (i = 0; i < NUM_VP9_MBENC; i++)
4221 gen8_gpe_context_destroy(&mbenc_context->gpe_contexts[i]);
4222 dri_bo_unreference(mbenc_context->mbenc_bo_dys);
4223 mbenc_context->mbenc_bo_size = 0;
4227 gen9_vme_dys_context_destroy_vp9(struct vp9_dys_context *dys_context)
4229 gen8_gpe_context_destroy(&dys_context->gpe_context);
4233 gen9_vme_kernel_context_destroy_vp9(struct gen9_encoder_context_vp9 *vme_context)
4235 gen9_vp9_free_resources(vme_context);
4236 gen9_vme_scaling_context_destroy_vp9(&vme_context->scaling_context);
4237 gen9_vme_me_context_destroy_vp9(&vme_context->me_context);
4238 gen9_vme_mbenc_context_destroy_vp9(&vme_context->mbenc_context);
4239 gen9_vme_brc_context_destroy_vp9(&vme_context->brc_context);
4240 gen9_vme_dys_context_destroy_vp9(&vme_context->dys_context);
4246 gen9_vme_context_destroy_vp9(void *context)
4248 struct gen9_encoder_context_vp9 *vme_context = context;
4253 gen9_vme_kernel_context_destroy_vp9(vme_context);
4261 gen9_vme_scaling_context_init_vp9(VADriverContextP ctx,
4262 struct gen9_encoder_context_vp9 *vme_context,
4263 struct vp9_scaling_context *scaling_context)
4265 struct i965_gpe_context *gpe_context = NULL;
4266 struct vp9_encoder_kernel_parameter kernel_param;
4267 struct vp9_encoder_scoreboard_parameter scoreboard_param;
4268 struct i965_kernel scale_kernel;
4270 kernel_param.curbe_size = sizeof(vp9_scaling4x_curbe_data_cm);
4271 kernel_param.inline_data_size = sizeof(vp9_scaling4x_inline_data_cm);
4272 kernel_param.sampler_size = 0;
4274 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4275 scoreboard_param.mask = 0xFF;
4276 scoreboard_param.enable = vme_context->use_hw_scoreboard;
4277 scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4278 scoreboard_param.walkpat_flag = 0;
4280 gpe_context = &scaling_context->gpe_contexts[0];
4281 gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4282 gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4284 scaling_context->scaling_4x_bti.scaling_frame_src_y = VP9_BTI_SCALING_FRAME_SRC_Y;
4285 scaling_context->scaling_4x_bti.scaling_frame_dst_y = VP9_BTI_SCALING_FRAME_DST_Y;
4286 scaling_context->scaling_4x_bti.scaling_frame_mbv_proc_stat_dst =
4287 VP9_BTI_SCALING_FRAME_MBVPROCSTATS_DST_CM;
4289 memset(&scale_kernel, 0, sizeof(scale_kernel));
4291 intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4292 sizeof(media_vp9_kernels),
4293 INTEL_VP9_ENC_SCALING4X,
4297 gen8_gpe_load_kernels(ctx,
4302 kernel_param.curbe_size = sizeof(vp9_scaling2x_curbe_data_cm);
4303 kernel_param.inline_data_size = 0;
4304 kernel_param.sampler_size = 0;
4306 gpe_context = &scaling_context->gpe_contexts[1];
4307 gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4308 gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4310 memset(&scale_kernel, 0, sizeof(scale_kernel));
4312 intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4313 sizeof(media_vp9_kernels),
4314 INTEL_VP9_ENC_SCALING2X,
4318 gen8_gpe_load_kernels(ctx,
4323 scaling_context->scaling_2x_bti.scaling_frame_src_y = VP9_BTI_SCALING_FRAME_SRC_Y;
4324 scaling_context->scaling_2x_bti.scaling_frame_dst_y = VP9_BTI_SCALING_FRAME_DST_Y;
4329 gen9_vme_me_context_init_vp9(VADriverContextP ctx,
4330 struct gen9_encoder_context_vp9 *vme_context,
4331 struct vp9_me_context *me_context)
4333 struct i965_gpe_context *gpe_context = NULL;
4334 struct vp9_encoder_kernel_parameter kernel_param;
4335 struct vp9_encoder_scoreboard_parameter scoreboard_param;
4336 struct i965_kernel scale_kernel;
4338 kernel_param.curbe_size = sizeof(vp9_me_curbe_data);
4339 kernel_param.inline_data_size = 0;
4340 kernel_param.sampler_size = 0;
4342 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4343 scoreboard_param.mask = 0xFF;
4344 scoreboard_param.enable = vme_context->use_hw_scoreboard;
4345 scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4346 scoreboard_param.walkpat_flag = 0;
4348 gpe_context = &me_context->gpe_context;
4349 gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4350 gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4352 memset(&scale_kernel, 0, sizeof(scale_kernel));
4354 intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4355 sizeof(media_vp9_kernels),
4360 gen8_gpe_load_kernels(ctx,
4369 gen9_vme_mbenc_context_init_vp9(VADriverContextP ctx,
4370 struct gen9_encoder_context_vp9 *vme_context,
4371 struct vp9_mbenc_context *mbenc_context)
4373 struct i965_gpe_context *gpe_context = NULL;
4374 struct vp9_encoder_kernel_parameter kernel_param;
4375 struct vp9_encoder_scoreboard_parameter scoreboard_param;
4377 struct i965_kernel scale_kernel;
4379 kernel_param.curbe_size = sizeof(vp9_mbenc_curbe_data);
4380 kernel_param.inline_data_size = 0;
4381 kernel_param.sampler_size = 0;
4383 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4384 scoreboard_param.mask = 0xFF;
4385 scoreboard_param.enable = vme_context->use_hw_scoreboard;
4386 scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4388 for (i = 0; i < NUM_VP9_MBENC; i++) {
4389 gpe_context = &mbenc_context->gpe_contexts[i];
4391 if ((i == VP9_MBENC_IDX_KEY_16x16) ||
4392 (i == VP9_MBENC_IDX_INTER)) {
4393 scoreboard_param.walkpat_flag = 1;
4395 scoreboard_param.walkpat_flag = 0;
4397 gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4398 gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4400 memset(&scale_kernel, 0, sizeof(scale_kernel));
4402 intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4403 sizeof(media_vp9_kernels),
4404 INTEL_VP9_ENC_MBENC,
4408 gen8_gpe_load_kernels(ctx,
4416 gen9_vme_brc_context_init_vp9(VADriverContextP ctx,
4417 struct gen9_encoder_context_vp9 *vme_context,
4418 struct vp9_brc_context *brc_context)
4420 struct i965_gpe_context *gpe_context = NULL;
4421 struct vp9_encoder_kernel_parameter kernel_param;
4422 struct vp9_encoder_scoreboard_parameter scoreboard_param;
4424 struct i965_kernel scale_kernel;
4426 kernel_param.curbe_size = sizeof(vp9_brc_curbe_data);
4427 kernel_param.inline_data_size = 0;
4428 kernel_param.sampler_size = 0;
4430 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4431 scoreboard_param.mask = 0xFF;
4432 scoreboard_param.enable = vme_context->use_hw_scoreboard;
4433 scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4435 for (i = 0; i < NUM_VP9_BRC; i++) {
4436 gpe_context = &brc_context->gpe_contexts[i];
4437 gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4438 gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4440 memset(&scale_kernel, 0, sizeof(scale_kernel));
4442 intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4443 sizeof(media_vp9_kernels),
4448 gen8_gpe_load_kernels(ctx,
4456 gen9_vme_dys_context_init_vp9(VADriverContextP ctx,
4457 struct gen9_encoder_context_vp9 *vme_context,
4458 struct vp9_dys_context *dys_context)
4460 struct i965_gpe_context *gpe_context = NULL;
4461 struct vp9_encoder_kernel_parameter kernel_param;
4462 struct vp9_encoder_scoreboard_parameter scoreboard_param;
4463 struct i965_kernel scale_kernel;
4465 kernel_param.curbe_size = sizeof(vp9_dys_curbe_data);
4466 kernel_param.inline_data_size = 0;
4467 kernel_param.sampler_size = sizeof(struct gen9_sampler_8x8_avs);
4469 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4470 scoreboard_param.mask = 0xFF;
4471 scoreboard_param.enable = vme_context->use_hw_scoreboard;
4472 scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4473 scoreboard_param.walkpat_flag = 0;
4475 gpe_context = &dys_context->gpe_context;
4476 gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4477 gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4479 memset(&scale_kernel, 0, sizeof(scale_kernel));
4481 intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4482 sizeof(media_vp9_kernels),
4487 gen8_gpe_load_kernels(ctx,
4496 gen9_vme_kernels_context_init_vp9(VADriverContextP ctx,
4497 struct intel_encoder_context *encoder_context,
4498 struct gen9_encoder_context_vp9 *vme_context)
4500 gen9_vme_scaling_context_init_vp9(ctx, vme_context, &vme_context->scaling_context);
4501 gen9_vme_me_context_init_vp9(ctx, vme_context, &vme_context->me_context);
4502 gen9_vme_mbenc_context_init_vp9(ctx, vme_context, &vme_context->mbenc_context);
4503 gen9_vme_dys_context_init_vp9(ctx, vme_context, &vme_context->dys_context);
4504 gen9_vme_brc_context_init_vp9(ctx, vme_context, &vme_context->brc_context);
4506 vme_context->pfn_set_curbe_brc = gen9_vp9_set_curbe_brc;
4507 vme_context->pfn_set_curbe_me = gen9_vp9_set_curbe_me;
4508 vme_context->pfn_send_me_surface = gen9_vp9_send_me_surface;
4509 vme_context->pfn_send_scaling_surface = gen9_vp9_send_scaling_surface;
4511 vme_context->pfn_set_curbe_scaling = gen9_vp9_set_curbe_scaling_cm;
4513 vme_context->pfn_send_dys_surface = gen9_vp9_send_dys_surface;
4514 vme_context->pfn_set_curbe_dys = gen9_vp9_set_curbe_dys;
4515 vme_context->pfn_set_curbe_mbenc = gen9_vp9_set_curbe_mbenc;
4516 vme_context->pfn_send_mbenc_surface = gen9_vp9_send_mbenc_surface;
4521 void gen9_vp9_write_compressed_element(char *buffer,
4526 struct vp9_compressed_element *base_element, *vp9_element;
4527 base_element = (struct vp9_compressed_element *)buffer;
4529 vp9_element = base_element + (index >> 1);
4531 vp9_element->b_valid = 1;
4532 vp9_element->b_probdiff_select = 1;
4533 vp9_element->b_prob_select = (prob == 252) ? 1 : 0;
4534 vp9_element->b_bin = value;
4536 vp9_element->a_valid = 1;
4537 vp9_element->a_probdiff_select = 1;
4538 vp9_element->a_prob_select = (prob == 252) ? 1 : 0;
4539 vp9_element->a_bin = value;
4544 intel_vp9enc_refresh_frame_internal_buffers(VADriverContextP ctx,
4545 struct intel_encoder_context *encoder_context)
4547 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
4548 VAEncPictureParameterBufferVP9 *pic_param;
4549 struct gen9_vp9_state *vp9_state;
4553 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
4555 if (!pak_context || !vp9_state || !vp9_state->pic_param)
4558 pic_param = vp9_state->pic_param;
4559 if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
4560 (pic_param->pic_flags.bits.intra_only) ||
4561 pic_param->pic_flags.bits.error_resilient_mode) {
4562 /* reset current frame_context */
4563 intel_init_default_vp9_probs(&vp9_state->vp9_current_fc);
4564 if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
4565 pic_param->pic_flags.bits.error_resilient_mode ||
4566 (pic_param->pic_flags.bits.reset_frame_context == 3)) {
4567 for (i = 0; i < 4; i++)
4568 memcpy(&vp9_state->vp9_frame_ctx[i],
4569 &vp9_state->vp9_current_fc,
4570 sizeof(FRAME_CONTEXT));
4571 } else if (pic_param->pic_flags.bits.reset_frame_context == 2) {
4572 i = pic_param->pic_flags.bits.frame_context_idx;
4573 memcpy(&vp9_state->vp9_frame_ctx[i],
4574 &vp9_state->vp9_current_fc, sizeof(FRAME_CONTEXT));
4576 /* reset the frame_ctx_idx = 0 */
4577 vp9_state->frame_ctx_idx = 0;
4579 vp9_state->frame_ctx_idx = pic_param->pic_flags.bits.frame_context_idx;
4582 i965_zero_gpe_resource(&pak_context->res_compressed_input_buffer);
4583 buffer = i965_map_gpe_resource(&pak_context->res_compressed_input_buffer);
4589 if ((pic_param->luma_ac_qindex == 0) &&
4590 (pic_param->luma_dc_qindex_delta == 0) &&
4591 (pic_param->chroma_ac_qindex_delta == 0) &&
4592 (pic_param->chroma_dc_qindex_delta == 0)) {
4594 /* nothing is needed */
4595 gen9_vp9_write_compressed_element(buffer,
4597 gen9_vp9_write_compressed_element(buffer,
4599 gen9_vp9_write_compressed_element(buffer,
4602 if (vp9_state->tx_mode == TX_MODE_SELECT) {
4603 gen9_vp9_write_compressed_element(buffer,
4605 gen9_vp9_write_compressed_element(buffer,
4607 gen9_vp9_write_compressed_element(buffer,
4609 } else if (vp9_state->tx_mode == ALLOW_32X32) {
4610 gen9_vp9_write_compressed_element(buffer,
4612 gen9_vp9_write_compressed_element(buffer,
4614 gen9_vp9_write_compressed_element(buffer,
4617 unsigned int tx_mode;
4619 tx_mode = vp9_state->tx_mode;
4620 gen9_vp9_write_compressed_element(buffer,
4621 0, 128, ((tx_mode) & 2));
4622 gen9_vp9_write_compressed_element(buffer,
4623 1, 128, ((tx_mode) & 1));
4624 gen9_vp9_write_compressed_element(buffer,
4628 if (vp9_state->tx_mode == TX_MODE_SELECT) {
4630 gen9_vp9_write_compressed_element(buffer,
4633 gen9_vp9_write_compressed_element(buffer,
4636 gen9_vp9_write_compressed_element(buffer,
4640 /*Setup all the input&output object*/
4643 /* update the coeff_update flag */
4644 gen9_vp9_write_compressed_element(buffer,
4646 gen9_vp9_write_compressed_element(buffer,
4648 gen9_vp9_write_compressed_element(buffer,
4650 gen9_vp9_write_compressed_element(buffer,
4655 if (pic_param->pic_flags.bits.frame_type && !pic_param->pic_flags.bits.intra_only) {
4656 bool allow_comp = !(
4657 (pic_param->ref_flags.bits.ref_last_sign_bias && pic_param->ref_flags.bits.ref_gf_sign_bias && pic_param->ref_flags.bits.ref_arf_sign_bias) ||
4658 (!pic_param->ref_flags.bits.ref_last_sign_bias && !pic_param->ref_flags.bits.ref_gf_sign_bias && !pic_param->ref_flags.bits.ref_arf_sign_bias)
4662 if (pic_param->pic_flags.bits.comp_prediction_mode == REFERENCE_MODE_SELECT) {
4663 gen9_vp9_write_compressed_element(buffer,
4665 gen9_vp9_write_compressed_element(buffer,
4667 } else if (pic_param->pic_flags.bits.comp_prediction_mode == COMPOUND_REFERENCE) {
4668 gen9_vp9_write_compressed_element(buffer,
4670 gen9_vp9_write_compressed_element(buffer,
4674 gen9_vp9_write_compressed_element(buffer,
4676 gen9_vp9_write_compressed_element(buffer,
4682 i965_unmap_gpe_resource(&pak_context->res_compressed_input_buffer);
4687 gen9_pak_vp9_pipe_mode_select(VADriverContextP ctx,
4688 struct encode_state *encode_state,
4689 struct intel_encoder_context *encoder_context,
4690 struct gen9_hcpe_pipe_mode_select_param *pipe_mode_param)
4692 struct intel_batchbuffer *batch = encoder_context->base.batch;
4694 BEGIN_BCS_BATCH(batch, 6);
4696 OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (6 - 2));
4697 OUT_BCS_BATCH(batch,
4698 (pipe_mode_param->stream_out << 12) |
4699 (pipe_mode_param->codec_mode << 5) |
4700 (0 << 3) | /* disable Pic Status / Error Report */
4701 (pipe_mode_param->stream_out << 2) |
4702 HCP_CODEC_SELECT_ENCODE);
4703 OUT_BCS_BATCH(batch, 0);
4704 OUT_BCS_BATCH(batch, 0);
4705 OUT_BCS_BATCH(batch, (1 << 6));
4706 OUT_BCS_BATCH(batch, 0);
4708 ADVANCE_BCS_BATCH(batch);
4712 gen9_vp9_add_surface_state(VADriverContextP ctx,
4713 struct encode_state *encode_state,
4714 struct intel_encoder_context *encoder_context,
4715 hcp_surface_state *hcp_state)
4717 struct intel_batchbuffer *batch = encoder_context->base.batch;
4721 BEGIN_BCS_BATCH(batch, 3);
4722 OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
4723 OUT_BCS_BATCH(batch,
4724 (hcp_state->dw1.surface_id << 28) |
4725 (hcp_state->dw1.surface_pitch - 1)
4727 OUT_BCS_BATCH(batch,
4728 (hcp_state->dw2.surface_format << 28) |
4729 (hcp_state->dw2.y_cb_offset)
4731 ADVANCE_BCS_BATCH(batch);
4735 gen9_pak_vp9_pipe_buf_addr_state(VADriverContextP ctx,
4736 struct encode_state *encode_state,
4737 struct intel_encoder_context *encoder_context)
4739 struct i965_driver_data *i965 = i965_driver_data(ctx);
4740 struct intel_batchbuffer *batch = encoder_context->base.batch;
4741 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
4742 struct gen9_vp9_state *vp9_state;
4744 struct object_surface *obj_surface;
4746 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
4748 if (!vp9_state || !vp9_state->pic_param)
4752 BEGIN_BCS_BATCH(batch, 104);
4754 OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (104 - 2));
4756 obj_surface = encode_state->reconstructed_object;
4758 /* reconstructed obj_surface is already checked. So this is skipped */
4759 /* DW 1..3 decoded surface */
4762 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4764 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4766 /* DW 4..6 deblocking line */
4768 pak_context->res_deblocking_filter_line_buffer.bo,
4769 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4771 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4773 /* DW 7..9 deblocking tile line */
4775 pak_context->res_deblocking_filter_tile_line_buffer.bo,
4776 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4778 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4780 /* DW 10..12 deblocking tile col */
4782 pak_context->res_deblocking_filter_tile_col_buffer.bo,
4783 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4785 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4787 /* DW 13..15 metadata line */
4789 pak_context->res_metadata_line_buffer.bo,
4790 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4792 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4794 /* DW 16..18 metadata tile line */
4796 pak_context->res_metadata_tile_line_buffer.bo,
4797 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4799 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4801 /* DW 19..21 metadata tile col */
4803 pak_context->res_metadata_tile_col_buffer.bo,
4804 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4806 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4808 /* DW 22..30 SAO is not used for VP9 */
4809 OUT_BCS_BATCH(batch, 0);
4810 OUT_BCS_BATCH(batch, 0);
4811 OUT_BCS_BATCH(batch, 0);
4812 OUT_BCS_BATCH(batch, 0);
4813 OUT_BCS_BATCH(batch, 0);
4814 OUT_BCS_BATCH(batch, 0);
4815 OUT_BCS_BATCH(batch, 0);
4816 OUT_BCS_BATCH(batch, 0);
4817 OUT_BCS_BATCH(batch, 0);
4819 /* DW 31..33 Current Motion vector temporal buffer */
4821 pak_context->res_mv_temporal_buffer[vp9_state->curr_mv_temporal_index].bo,
4822 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4824 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4826 /* DW 34..36 Not used */
4827 OUT_BCS_BATCH(batch, 0);
4828 OUT_BCS_BATCH(batch, 0);
4829 OUT_BCS_BATCH(batch, 0);
4831 /* Only the first three reference_frame is used for VP9 */
4832 /* DW 37..52 for reference_frame */
4834 if (vp9_state->picture_coding_type) {
4835 for (i = 0; i < 3; i++) {
4837 if (pak_context->reference_surfaces[i].bo) {
4839 pak_context->reference_surfaces[i].bo,
4840 I915_GEM_DOMAIN_INSTRUCTION, 0,
4843 OUT_BCS_BATCH(batch, 0);
4844 OUT_BCS_BATCH(batch, 0);
4849 for (; i < 8; i++) {
4850 OUT_BCS_BATCH(batch, 0);
4851 OUT_BCS_BATCH(batch, 0);
4854 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4856 /* DW 54..56 for source input */
4858 pak_context->uncompressed_picture_source.bo,
4859 I915_GEM_DOMAIN_INSTRUCTION, 0,
4861 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4863 /* DW 57..59 StreamOut is not used */
4864 OUT_BCS_BATCH(batch, 0);
4865 OUT_BCS_BATCH(batch, 0);
4866 OUT_BCS_BATCH(batch, 0);
4868 /* DW 60..62. Not used for encoder */
4869 OUT_BCS_BATCH(batch, 0);
4870 OUT_BCS_BATCH(batch, 0);
4871 OUT_BCS_BATCH(batch, 0);
4873 /* DW 63..65. ILDB Not used for encoder */
4874 OUT_BCS_BATCH(batch, 0);
4875 OUT_BCS_BATCH(batch, 0);
4876 OUT_BCS_BATCH(batch, 0);
4878 /* DW 66..81 For the collocated motion vector temporal buffer */
4879 if (vp9_state->picture_coding_type) {
4880 int prev_index = vp9_state->curr_mv_temporal_index ^ 0x01;
4882 pak_context->res_mv_temporal_buffer[prev_index].bo,
4883 I915_GEM_DOMAIN_INSTRUCTION, 0,
4886 OUT_BCS_BATCH(batch, 0);
4887 OUT_BCS_BATCH(batch, 0);
4890 for (i = 1; i < 8; i++) {
4891 OUT_BCS_BATCH(batch, 0);
4892 OUT_BCS_BATCH(batch, 0);
4894 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4896 /* DW 83..85 VP9 prob buffer */
4898 pak_context->res_prob_buffer.bo,
4899 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4902 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4904 /* DW 86..88 Segment id buffer */
4905 if (pak_context->res_segmentid_buffer.bo) {
4907 pak_context->res_segmentid_buffer.bo,
4908 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4911 OUT_BCS_BATCH(batch, 0);
4912 OUT_BCS_BATCH(batch, 0);
4914 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4916 /* DW 89..91 HVD line rowstore buffer */
4918 pak_context->res_hvd_line_buffer.bo,
4919 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4921 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4923 /* DW 92..94 HVD tile line rowstore buffer */
4925 pak_context->res_hvd_tile_line_buffer.bo,
4926 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4928 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4930 /* DW 95..97 SAO streamout. Not used for VP9 */
4931 OUT_BCS_BATCH(batch, 0);
4932 OUT_BCS_BATCH(batch, 0);
4933 OUT_BCS_BATCH(batch, 0);
4935 /* reserved for KBL. 98..100 */
4936 OUT_BCS_BATCH(batch, 0);
4937 OUT_BCS_BATCH(batch, 0);
4938 OUT_BCS_BATCH(batch, 0);
4941 OUT_BCS_BATCH(batch, 0);
4942 OUT_BCS_BATCH(batch, 0);
4943 OUT_BCS_BATCH(batch, 0);
4945 ADVANCE_BCS_BATCH(batch);
4949 gen9_pak_vp9_ind_obj_base_addr_state(VADriverContextP ctx,
4950 struct encode_state *encode_state,
4951 struct intel_encoder_context *encoder_context)
4953 struct i965_driver_data *i965 = i965_driver_data(ctx);
4954 struct intel_batchbuffer *batch = encoder_context->base.batch;
4955 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
4956 struct gen9_vp9_state *vp9_state;
4958 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
4961 BEGIN_BCS_BATCH(batch, 29);
4963 OUT_BCS_BATCH(batch, HCP_IND_OBJ_BASE_ADDR_STATE | (29 - 2));
4965 /* indirect bitstream object base */
4966 OUT_BCS_BATCH(batch, 0);
4967 OUT_BCS_BATCH(batch, 0);
4968 OUT_BCS_BATCH(batch, 0);
4969 /* the upper bound of indirect bitstream object */
4970 OUT_BCS_BATCH(batch, 0);
4971 OUT_BCS_BATCH(batch, 0);
4973 /* DW 6: Indirect CU object base address */
4975 pak_context->res_mb_code_surface.bo,
4976 I915_GEM_DOMAIN_INSTRUCTION, 0, /* No write domain */
4977 vp9_state->mb_data_offset);
4978 /* default attribute */
4979 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4981 /* DW 9..11, PAK-BSE */
4983 pak_context->indirect_pak_bse_object.bo,
4984 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4985 pak_context->indirect_pak_bse_object.offset);
4986 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4988 /* DW 12..13 upper bound */
4990 pak_context->indirect_pak_bse_object.bo,
4991 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4992 pak_context->indirect_pak_bse_object.end_offset);
4994 /* DW 14..16 compressed header buffer */
4996 pak_context->res_compressed_input_buffer.bo,
4997 I915_GEM_DOMAIN_INSTRUCTION, 0,
4999 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5001 /* DW 17..19 prob counter streamout */
5003 pak_context->res_prob_counter_buffer.bo,
5004 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5006 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5008 /* DW 20..22 prob delta streamin */
5010 pak_context->res_prob_delta_buffer.bo,
5011 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5013 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5015 /* DW 23..25 Tile record streamout */
5017 pak_context->res_tile_record_streamout_buffer.bo,
5018 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5020 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5022 /* DW 26..28 CU record streamout */
5024 pak_context->res_cu_stat_streamout_buffer.bo,
5025 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5027 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5029 ADVANCE_BCS_BATCH(batch);
5033 gen9_pak_vp9_segment_state(VADriverContextP ctx,
5034 struct encode_state *encode_state,
5035 struct intel_encoder_context *encoder_context,
5036 VAEncSegParamVP9 *seg_param, uint8_t seg_id)
5038 struct intel_batchbuffer *batch = encoder_context->base.batch;
5039 uint32_t batch_value, tmp;
5040 VAEncPictureParameterBufferVP9 *pic_param;
5042 if (!encode_state->pic_param_ext ||
5043 !encode_state->pic_param_ext->buffer) {
5047 pic_param = (VAEncPictureParameterBufferVP9 *)encode_state->pic_param_ext->buffer;
5049 batch_value = seg_param->seg_flags.bits.segment_reference;
5050 if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME ||
5051 pic_param->pic_flags.bits.intra_only)
5054 BEGIN_BCS_BATCH(batch, 8);
5056 OUT_BCS_BATCH(batch, HCP_VP9_SEGMENT_STATE | (8 - 2));
5057 OUT_BCS_BATCH(batch, seg_id << 0); /* DW 1 - SegmentID */
5058 OUT_BCS_BATCH(batch,
5059 (seg_param->seg_flags.bits.segment_reference_enabled << 3) |
5060 (batch_value << 1) |
5061 (seg_param->seg_flags.bits.segment_reference_skipped << 0)
5064 /* DW 3..6 is not used for encoder */
5065 OUT_BCS_BATCH(batch, 0);
5066 OUT_BCS_BATCH(batch, 0);
5067 OUT_BCS_BATCH(batch, 0);
5068 OUT_BCS_BATCH(batch, 0);
5071 tmp = intel_convert_sign_mag(seg_param->segment_qindex_delta, 9);
5073 tmp = intel_convert_sign_mag(seg_param->segment_lf_level_delta, 7);
5074 batch_value |= (tmp << 16);
5075 OUT_BCS_BATCH(batch, batch_value);
5077 ADVANCE_BCS_BATCH(batch);
5082 intel_vp9enc_construct_pak_insertobj_batchbuffer(VADriverContextP ctx,
5083 struct intel_encoder_context *encoder_context,
5084 struct i965_gpe_resource *obj_batch_buffer)
5086 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5087 struct gen9_vp9_state *vp9_state;
5088 int uncompressed_header_length;
5089 unsigned int *cmd_ptr;
5090 unsigned int dw_length, bits_in_last_dw;
5092 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5094 if (!pak_context || !vp9_state || !vp9_state->pic_param)
5097 uncompressed_header_length = vp9_state->header_length;
5098 cmd_ptr = i965_map_gpe_resource(obj_batch_buffer);
5103 bits_in_last_dw = uncompressed_header_length % 4;
5104 bits_in_last_dw *= 8;
5106 if (bits_in_last_dw == 0)
5107 bits_in_last_dw = 32;
5109 /* get the DWORD length of the inserted_data */
5110 dw_length = ALIGN(uncompressed_header_length, 4) / 4;
5111 *cmd_ptr++ = HCP_INSERT_PAK_OBJECT | dw_length;
5113 *cmd_ptr++ = ((0 << 31) | /* indirect payload */
5114 (0 << 16) | /* the start offset in first DW */
5116 (bits_in_last_dw << 8) | /* bits_in_last_dw */
5117 (0 << 4) | /* skip emulation byte count. 0 for VP9 */
5118 (0 << 3) | /* emulation flag. 0 for VP9 */
5119 (1 << 2) | /* last header flag. */
5121 memcpy(cmd_ptr, vp9_state->alias_insert_data, dw_length * sizeof(unsigned int));
5123 cmd_ptr += dw_length;
5125 *cmd_ptr++ = MI_NOOP;
5126 *cmd_ptr++ = MI_BATCH_BUFFER_END;
5127 i965_unmap_gpe_resource(obj_batch_buffer);
5131 gen9_vp9_pak_picture_level(VADriverContextP ctx,
5132 struct encode_state *encode_state,
5133 struct intel_encoder_context *encoder_context)
5135 struct intel_batchbuffer *batch = encoder_context->base.batch;
5136 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5137 struct object_surface *obj_surface;
5138 VAEncPictureParameterBufferVP9 *pic_param;
5139 VAEncMiscParameterTypeVP9PerSegmantParam *seg_param, tmp_seg_param;
5140 struct gen9_vp9_state *vp9_state;
5141 struct gen9_surface_vp9 *vp9_priv_surface;
5143 struct gen9_hcpe_pipe_mode_select_param mode_param;
5144 hcp_surface_state hcp_surface;
5145 struct gpe_mi_batch_buffer_start_parameter second_level_batch;
5148 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5150 if (!pak_context || !vp9_state || !vp9_state->pic_param)
5153 pic_param = vp9_state->pic_param;
5154 seg_param = vp9_state->segment_param;
5156 if (vp9_state->curr_pak_pass == 0) {
5157 intel_vp9enc_construct_pak_insertobj_batchbuffer(ctx, encoder_context,
5158 &pak_context->res_pak_uncompressed_input_buffer);
5160 // Check if driver already programmed pic state as part of BRC update kernel programming.
5161 if (!vp9_state->brc_enabled) {
5162 intel_vp9enc_construct_picstate_batchbuf(ctx, encode_state,
5163 encoder_context, &pak_context->res_pic_state_brc_write_hfw_read_buffer);
5167 if (vp9_state->curr_pak_pass == 0) {
5168 intel_vp9enc_refresh_frame_internal_buffers(ctx, encoder_context);
5172 /* copy the frame_context[frame_idx] into curr_frame_context */
5173 memcpy(&vp9_state->vp9_current_fc,
5174 &(vp9_state->vp9_frame_ctx[vp9_state->frame_ctx_idx]),
5175 sizeof(FRAME_CONTEXT));
5179 prob_ptr = i965_map_gpe_resource(&pak_context->res_prob_buffer);
5184 /* copy the current fc to vp9_prob buffer */
5185 memcpy(prob_ptr, &vp9_state->vp9_current_fc, sizeof(FRAME_CONTEXT));
5186 if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
5187 pic_param->pic_flags.bits.intra_only) {
5188 FRAME_CONTEXT *frame_ptr = (FRAME_CONTEXT *)prob_ptr;
5190 memcpy(frame_ptr->partition_prob, vp9_kf_partition_probs,
5191 sizeof(vp9_kf_partition_probs));
5192 memcpy(frame_ptr->uv_mode_prob, vp9_kf_uv_mode_prob,
5193 sizeof(vp9_kf_uv_mode_prob));
5195 i965_unmap_gpe_resource(&pak_context->res_prob_buffer);
5199 if (vp9_state->brc_enabled && vp9_state->curr_pak_pass) {
5200 /* read image status and insert the conditional end cmd */
5201 /* image ctrl/status is already accessed */
5202 struct gpe_mi_conditional_batch_buffer_end_parameter mi_cond_end;
5203 struct vp9_encode_status_buffer_internal *status_buffer;
5205 status_buffer = &vp9_state->status_buffer;
5206 memset(&mi_cond_end, 0, sizeof(mi_cond_end));
5207 mi_cond_end.offset = status_buffer->image_status_mask_offset;
5208 mi_cond_end.bo = status_buffer->bo;
5209 mi_cond_end.compare_data = 0;
5210 mi_cond_end.compare_mask_mode_disabled = 1;
5211 gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch,
5215 mode_param.codec_mode = 1;
5216 mode_param.stream_out = 0;
5217 gen9_pak_vp9_pipe_mode_select(ctx, encode_state, encoder_context, &mode_param);
5219 /* reconstructed surface */
5220 memset(&hcp_surface, 0, sizeof(hcp_surface));
5221 obj_surface = encode_state->reconstructed_object;
5222 hcp_surface.dw1.surface_id = 0;
5223 hcp_surface.dw1.surface_pitch = obj_surface->width;
5224 hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5225 hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5226 gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5230 if (vp9_state->dys_in_use &&
5231 ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
5232 (pic_param->frame_height_src != pic_param->frame_height_dst))) {
5233 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
5234 obj_surface = vp9_priv_surface->dys_surface_obj;
5236 obj_surface = encode_state->input_yuv_object;
5239 hcp_surface.dw1.surface_id = 1;
5240 hcp_surface.dw1.surface_pitch = obj_surface->width;
5241 hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5242 hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5243 gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5246 if (vp9_state->picture_coding_type) {
5247 /* Add surface for last */
5248 if (vp9_state->last_ref_obj) {
5249 obj_surface = vp9_state->last_ref_obj;
5250 hcp_surface.dw1.surface_id = 2;
5251 hcp_surface.dw1.surface_pitch = obj_surface->width;
5252 hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5253 hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5254 gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5257 if (vp9_state->golden_ref_obj) {
5258 obj_surface = vp9_state->golden_ref_obj;
5259 hcp_surface.dw1.surface_id = 3;
5260 hcp_surface.dw1.surface_pitch = obj_surface->width;
5261 hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5262 hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5263 gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5266 if (vp9_state->alt_ref_obj) {
5267 obj_surface = vp9_state->alt_ref_obj;
5268 hcp_surface.dw1.surface_id = 4;
5269 hcp_surface.dw1.surface_pitch = obj_surface->width;
5270 hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5271 hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5272 gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5277 gen9_pak_vp9_pipe_buf_addr_state(ctx, encode_state, encoder_context);
5279 gen9_pak_vp9_ind_obj_base_addr_state(ctx, encode_state, encoder_context);
5281 // Using picstate zero with updated QP and LF deltas by HuC for repak, irrespective of how many Pak passes were run in multi-pass mode.
5282 memset(&second_level_batch, 0, sizeof(second_level_batch));
5284 if (vp9_state->curr_pak_pass == 0) {
5285 second_level_batch.offset = 0;
5287 second_level_batch.offset = vp9_state->curr_pak_pass * VP9_PIC_STATE_BUFFER_SIZE;
5289 second_level_batch.is_second_level = 1;
5290 second_level_batch.bo = pak_context->res_pic_state_brc_write_hfw_read_buffer.bo;
5292 gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5294 if (pic_param->pic_flags.bits.segmentation_enabled &&
5299 memset(&tmp_seg_param, 0, sizeof(tmp_seg_param));
5300 seg_param = &tmp_seg_param;
5302 for (i = 0; i < segment_count; i++) {
5303 gen9_pak_vp9_segment_state(ctx, encode_state,
5305 &seg_param->seg_data[i], i);
5308 /* Insert the uncompressed header buffer */
5309 second_level_batch.is_second_level = 1;
5310 second_level_batch.offset = 0;
5311 second_level_batch.bo = pak_context->res_pak_uncompressed_input_buffer.bo;
5313 gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5316 second_level_batch.is_second_level = 1;
5317 second_level_batch.offset = 0;
5318 second_level_batch.bo = pak_context->res_mb_code_surface.bo;
5319 gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5325 gen9_vp9_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5327 struct intel_batchbuffer *batch = encoder_context->base.batch;
5328 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5329 struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
5330 struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
5331 //struct gpe_mi_copy_mem_parameter mi_copy_mem_param;
5332 struct vp9_encode_status_buffer_internal *status_buffer;
5333 struct gen9_vp9_state *vp9_state;
5335 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5336 if (!vp9_state || !pak_context || !batch)
5339 status_buffer = &(vp9_state->status_buffer);
5341 memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
5342 gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
5344 memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
5345 mi_store_reg_mem_param.bo = status_buffer->bo;
5346 mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_offset;
5347 mi_store_reg_mem_param.mmio_offset = status_buffer->vp9_bs_frame_reg_offset;
5348 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5350 mi_store_reg_mem_param.bo = pak_context->res_brc_bitstream_size_buffer.bo;
5351 mi_store_reg_mem_param.offset = 0;
5352 mi_store_reg_mem_param.mmio_offset = status_buffer->vp9_bs_frame_reg_offset;
5353 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5355 /* Read HCP Image status */
5356 mi_store_reg_mem_param.bo = status_buffer->bo;
5357 mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
5358 mi_store_reg_mem_param.mmio_offset =
5359 status_buffer->vp9_image_mask_reg_offset;
5360 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5362 mi_store_reg_mem_param.bo = status_buffer->bo;
5363 mi_store_reg_mem_param.offset = status_buffer->image_status_ctrl_offset;
5364 mi_store_reg_mem_param.mmio_offset =
5365 status_buffer->vp9_image_ctrl_reg_offset;
5366 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5368 mi_store_reg_mem_param.bo = pak_context->res_brc_bitstream_size_buffer.bo;
5369 mi_store_reg_mem_param.offset = 4;
5370 mi_store_reg_mem_param.mmio_offset =
5371 status_buffer->vp9_image_ctrl_reg_offset;
5372 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5374 gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
5380 gen9_vp9_pak_pipeline_prepare(VADriverContextP ctx,
5381 struct encode_state *encode_state,
5382 struct intel_encoder_context *encoder_context)
5384 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5385 struct object_surface *obj_surface;
5386 struct object_buffer *obj_buffer;
5387 struct i965_coded_buffer_segment *coded_buffer_segment;
5388 VAEncPictureParameterBufferVP9 *pic_param;
5389 struct gen9_vp9_state *vp9_state;
5393 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5395 !vp9_state->pic_param)
5396 return VA_STATUS_ERROR_INVALID_PARAMETER;
5398 pic_param = vp9_state->pic_param;
5400 /* reconstructed surface */
5401 obj_surface = encode_state->reconstructed_object;
5402 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5404 dri_bo_unreference(pak_context->reconstructed_object.bo);
5406 pak_context->reconstructed_object.bo = obj_surface->bo;
5407 dri_bo_reference(pak_context->reconstructed_object.bo);
5409 /* set vp9 reference frames */
5410 for (i = 0; i < ARRAY_ELEMS(pak_context->reference_surfaces); i++) {
5411 if (pak_context->reference_surfaces[i].bo)
5412 dri_bo_unreference(pak_context->reference_surfaces[i].bo);
5413 pak_context->reference_surfaces[i].bo = NULL;
5416 /* Three reference frames are enough for VP9 */
5417 if (pic_param->pic_flags.bits.frame_type &&
5418 !pic_param->pic_flags.bits.intra_only) {
5419 for (i = 0; i < 3; i++) {
5420 obj_surface = encode_state->reference_objects[i];
5421 if (obj_surface && obj_surface->bo) {
5422 pak_context->reference_surfaces[i].bo = obj_surface->bo;
5423 dri_bo_reference(obj_surface->bo);
5428 /* input YUV surface */
5429 dri_bo_unreference(pak_context->uncompressed_picture_source.bo);
5430 pak_context->uncompressed_picture_source.bo = NULL;
5431 obj_surface = encode_state->reconstructed_object;
5432 if (vp9_state->dys_in_use &&
5433 ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
5434 (pic_param->frame_height_src != pic_param->frame_height_dst))) {
5435 struct gen9_surface_vp9 *vp9_priv_surface =
5436 (struct gen9_surface_vp9 *)(obj_surface->private_data);
5437 obj_surface = vp9_priv_surface->dys_surface_obj;
5439 obj_surface = encode_state->input_yuv_object;
5441 pak_context->uncompressed_picture_source.bo = obj_surface->bo;
5442 dri_bo_reference(pak_context->uncompressed_picture_source.bo);
5445 dri_bo_unreference(pak_context->indirect_pak_bse_object.bo);
5446 pak_context->indirect_pak_bse_object.bo = NULL;
5447 obj_buffer = encode_state->coded_buf_object;
5448 bo = obj_buffer->buffer_store->bo;
5449 pak_context->indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
5450 pak_context->indirect_pak_bse_object.end_offset = ALIGN((obj_buffer->size_element - 0x1000), 0x1000);
5451 pak_context->indirect_pak_bse_object.bo = bo;
5452 dri_bo_reference(pak_context->indirect_pak_bse_object.bo);
5454 /* set the internal flag to 0 to indicate the coded size is unknown */
5456 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
5457 coded_buffer_segment->mapped = 0;
5458 coded_buffer_segment->codec = encoder_context->codec;
5459 coded_buffer_segment->status_support = 1;
5462 return VA_STATUS_SUCCESS;
5466 gen9_vp9_pak_brc_prepare(struct encode_state *encode_state,
5467 struct intel_encoder_context *encoder_context)
5472 gen9_vp9_pak_context_destroy(void *context)
5474 struct gen9_encoder_context_vp9 *pak_context = context;
5477 dri_bo_unreference(pak_context->reconstructed_object.bo);
5478 pak_context->reconstructed_object.bo = NULL;
5480 dri_bo_unreference(pak_context->uncompressed_picture_source.bo);
5481 pak_context->uncompressed_picture_source.bo = NULL;
5483 dri_bo_unreference(pak_context->indirect_pak_bse_object.bo);
5484 pak_context->indirect_pak_bse_object.bo = NULL;
5486 for (i = 0; i < 8; i++) {
5487 dri_bo_unreference(pak_context->reference_surfaces[i].bo);
5488 pak_context->reference_surfaces[i].bo = NULL;
5491 /* vme & pak same the same structure, so don't free the context here */
5495 gen9_vp9_pak_pipeline(VADriverContextP ctx,
5497 struct encode_state *encode_state,
5498 struct intel_encoder_context *encoder_context)
5500 struct i965_driver_data *i965 = i965_driver_data(ctx);
5501 struct intel_batchbuffer *batch = encoder_context->base.batch;
5502 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5504 struct gen9_vp9_state *vp9_state;
5505 VAEncPictureParameterBufferVP9 *pic_param;
5508 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5510 if (!vp9_state || !vp9_state->pic_param || !pak_context)
5511 return VA_STATUS_ERROR_INVALID_PARAMETER;
5513 va_status = gen9_vp9_pak_pipeline_prepare(ctx, encode_state, encoder_context);
5515 if (va_status != VA_STATUS_SUCCESS)
5518 if (i965->intel.has_bsd2)
5519 intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
5521 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
5523 intel_batchbuffer_emit_mi_flush(batch);
5525 BEGIN_BCS_BATCH(batch, 64);
5526 for (i = 0; i < 64; i++)
5527 OUT_BCS_BATCH(batch, MI_NOOP);
5529 ADVANCE_BCS_BATCH(batch);
5531 for (vp9_state->curr_pak_pass = 0;
5532 vp9_state->curr_pak_pass < vp9_state->num_pak_passes;
5533 vp9_state->curr_pak_pass++) {
5535 if (vp9_state->curr_pak_pass == 0) {
5536 /* Initialize the VP9 Image Ctrl reg for the first pass */
5537 struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
5538 struct vp9_encode_status_buffer_internal *status_buffer;
5540 status_buffer = &(vp9_state->status_buffer);
5541 memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
5542 mi_load_reg_imm.mmio_offset = status_buffer->vp9_image_ctrl_reg_offset;
5543 mi_load_reg_imm.data = 0;
5544 gen8_gpe_mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
5546 gen9_vp9_pak_picture_level(ctx, encode_state, encoder_context);
5547 gen9_vp9_read_mfc_status(ctx, encoder_context);
5550 intel_batchbuffer_end_atomic(batch);
5551 intel_batchbuffer_flush(batch);
5553 pic_param = vp9_state->pic_param;
5554 vp9_state->vp9_last_frame.frame_width = pic_param->frame_width_dst;
5555 vp9_state->vp9_last_frame.frame_height = pic_param->frame_height_dst;
5556 vp9_state->vp9_last_frame.frame_type = pic_param->pic_flags.bits.frame_type;
5557 vp9_state->vp9_last_frame.show_frame = pic_param->pic_flags.bits.show_frame;
5558 vp9_state->vp9_last_frame.refresh_frame_context = pic_param->pic_flags.bits.refresh_frame_context;
5559 vp9_state->vp9_last_frame.frame_context_idx = pic_param->pic_flags.bits.frame_context_idx;
5560 vp9_state->vp9_last_frame.intra_only = pic_param->pic_flags.bits.intra_only;
5561 vp9_state->frame_number++;
5562 vp9_state->curr_mv_temporal_index ^= 1;
5563 vp9_state->first_frame = 0;
5565 return VA_STATUS_SUCCESS;
5569 gen9_vp9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5571 struct gen9_encoder_context_vp9 *vme_context = NULL;
5572 struct gen9_vp9_state *vp9_state = NULL;
5574 vme_context = calloc(1, sizeof(struct gen9_encoder_context_vp9));
5575 vp9_state = calloc(1, sizeof(struct gen9_vp9_state));
5577 if (!vme_context || !vp9_state) {
5585 encoder_context->enc_priv_state = vp9_state;
5586 vme_context->enc_priv_state = vp9_state;
5588 /* Initialize the features that are supported by VP9 */
5589 vme_context->hme_supported = 1;
5590 vme_context->use_hw_scoreboard = 1;
5591 vme_context->use_hw_non_stalling_scoreboard = 1;
5593 vp9_state->tx_mode = TX_MODE_SELECT;
5594 vp9_state->multi_ref_qp_check = 0;
5595 vp9_state->target_usage = INTEL_ENC_VP9_TU_NORMAL;
5596 vp9_state->num_pak_passes = 1;
5597 vp9_state->hme_supported = vme_context->hme_supported;
5598 vp9_state->b16xme_supported = 1;
5600 if (encoder_context->rate_control_mode != VA_RC_NONE &&
5601 encoder_context->rate_control_mode != VA_RC_CQP) {
5602 vp9_state->brc_enabled = 1;
5603 vp9_state->brc_distortion_buffer_supported = 1;
5604 vp9_state->brc_constant_buffer_supported = 1;
5605 vp9_state->num_pak_passes = 4;
5607 vp9_state->dys_enabled = 1; /* this is supported by default */
5608 vp9_state->first_frame = 1;
5610 /* the definition of status buffer offset for VP9 */
5612 struct vp9_encode_status_buffer_internal *status_buffer;
5613 uint32_t base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
5615 status_buffer = &vp9_state->status_buffer;
5616 memset(status_buffer, 0,
5617 sizeof(struct vp9_encode_status_buffer_internal));
5619 status_buffer->bs_byte_count_offset = base_offset + offsetof(struct vp9_encode_status, bs_byte_count);
5620 status_buffer->image_status_mask_offset = base_offset + offsetof(struct vp9_encode_status, image_status_mask);
5621 status_buffer->image_status_ctrl_offset = base_offset + offsetof(struct vp9_encode_status, image_status_ctrl);
5622 status_buffer->media_index_offset = base_offset + offsetof(struct vp9_encode_status, media_index);
5624 status_buffer->vp9_bs_frame_reg_offset = 0x1E9E0;
5625 status_buffer->vp9_image_mask_reg_offset = 0x1E9F0;
5626 status_buffer->vp9_image_ctrl_reg_offset = 0x1E9F4;
5629 gen9_vme_kernels_context_init_vp9(ctx, encoder_context, vme_context);
5631 encoder_context->vme_context = vme_context;
5632 encoder_context->vme_pipeline = gen9_vme_pipeline_vp9;
5633 encoder_context->vme_context_destroy = gen9_vme_context_destroy_vp9;
5639 gen9_vp9_get_coded_status(VADriverContextP ctx,
5640 struct intel_encoder_context *encoder_context,
5641 struct i965_coded_buffer_segment *coded_buf_seg)
5643 struct vp9_encode_status *vp9_encode_status;
5645 if (!encoder_context || !coded_buf_seg)
5646 return VA_STATUS_ERROR_INVALID_BUFFER;
5648 vp9_encode_status = (struct vp9_encode_status *)coded_buf_seg->codec_private_data;
5649 coded_buf_seg->base.size = vp9_encode_status->bs_byte_count;
5651 /* One VACodedBufferSegment for VP9 will be added later.
5652 * It will be linked to the next element of coded_buf_seg->base.next
5655 return VA_STATUS_SUCCESS;
5659 gen9_vp9_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5661 /* VME & PAK share the same context */
5662 struct gen9_encoder_context_vp9 *pak_context = encoder_context->vme_context;
5667 encoder_context->mfc_context = pak_context;
5668 encoder_context->mfc_context_destroy = gen9_vp9_pak_context_destroy;
5669 encoder_context->mfc_pipeline = gen9_vp9_pak_pipeline;
5670 encoder_context->mfc_brc_prepare = gen9_vp9_pak_brc_prepare;
5671 encoder_context->get_status = gen9_vp9_get_coded_status;