2 * Copyright © 2016 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * Zhao, Yakui <yakui.zhao@intel.com>
37 #include "intel_batchbuffer.h"
38 #include "intel_driver.h"
40 #include "i965_defines.h"
41 #include "i965_drv_video.h"
42 #include "i965_encoder.h"
43 #include "gen9_vp9_encapi.h"
44 #include "gen9_vp9_encoder.h"
45 #include "gen9_vp9_encoder_kernels.h"
46 #include "vp9_probs.h"
47 #include "gen9_vp9_const_def.h"
49 #define MAX_VP9_ENCODER_SURFACES 64
51 #define MAX_URB_SIZE 4096 /* In register */
52 #define NUM_KERNELS_PER_GPE_CONTEXT 1
54 #define VP9_BRC_KBPS 1000
56 #define BRC_KERNEL_CBR 0x0010
57 #define BRC_KERNEL_VBR 0x0020
58 #define BRC_KERNEL_AVBR 0x0040
59 #define BRC_KERNEL_CQL 0x0080
61 #define DEFAULT_MOCS 0x02
62 #define VP9_PIC_STATE_BUFFER_SIZE 192
64 typedef struct _intel_kernel_header_
66 uint32_t reserved : 6;
67 uint32_t kernel_start_pointer : 26;
68 } intel_kernel_header;
70 typedef struct _intel_vp9_kernel_header {
72 intel_kernel_header PLY_DSCALE;
73 intel_kernel_header VP9_ME_P;
74 intel_kernel_header VP9_Enc_I_32x32;
75 intel_kernel_header VP9_Enc_I_16x16;
76 intel_kernel_header VP9_Enc_P;
77 intel_kernel_header VP9_Enc_TX;
78 intel_kernel_header VP9_DYS;
80 intel_kernel_header VP9BRC_Intra_Distortion;
81 intel_kernel_header VP9BRC_Init;
82 intel_kernel_header VP9BRC_Reset;
83 intel_kernel_header VP9BRC_Update;
84 } intel_vp9_kernel_header;
86 #define DYS_1X_FLAG 0x01
87 #define DYS_4X_FLAG 0x02
88 #define DYS_16X_FLAG 0x04
90 struct vp9_surface_param {
92 uint32_t frame_height;
95 static uint32_t intel_convert_sign_mag(int val, int sign_bit_pos)
101 ret_val = ((1 << (sign_bit_pos - 1)) | (val & ((1 << (sign_bit_pos - 1)) - 1)));
105 ret_val = val & ((1 << (sign_bit_pos - 1)) - 1);
111 intel_vp9_get_kernel_header_and_size(
114 INTEL_VP9_ENC_OPERATION operation,
116 struct i965_kernel *ret_kernel)
118 typedef uint32_t BIN_PTR[4];
121 intel_vp9_kernel_header *pkh_table;
122 intel_kernel_header *pcurr_header, *pinvalid_entry, *pnext_header;
125 if (!pvbinary || !ret_kernel)
128 bin_start = (char *)pvbinary;
129 pkh_table = (intel_vp9_kernel_header *)pvbinary;
130 pinvalid_entry = &(pkh_table->VP9BRC_Update) + 1;
131 next_krnoffset = binary_size;
133 if ((operation == INTEL_VP9_ENC_SCALING4X) || (operation == INTEL_VP9_ENC_SCALING2X))
135 pcurr_header = &pkh_table->PLY_DSCALE;
137 else if (operation == INTEL_VP9_ENC_ME)
139 pcurr_header = &pkh_table->VP9_ME_P;
141 else if (operation == INTEL_VP9_ENC_MBENC)
143 pcurr_header = &pkh_table->VP9_Enc_I_32x32;
145 else if (operation == INTEL_VP9_ENC_DYS)
147 pcurr_header = &pkh_table->VP9_DYS;
149 else if (operation == INTEL_VP9_ENC_BRC)
151 pcurr_header = &pkh_table->VP9BRC_Intra_Distortion;
158 pcurr_header += krnstate_idx;
159 ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
161 pnext_header = (pcurr_header + 1);
162 if (pnext_header < pinvalid_entry)
164 next_krnoffset = pnext_header->kernel_start_pointer << 6;
166 ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
173 gen9_free_surfaces_vp9(void **data)
175 struct gen9_surface_vp9 *vp9_surface;
182 if (vp9_surface->scaled_4x_surface_obj) {
183 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->scaled_4x_surface_id, 1);
184 vp9_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
185 vp9_surface->scaled_4x_surface_obj = NULL;
188 if (vp9_surface->scaled_16x_surface_obj) {
189 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->scaled_16x_surface_id, 1);
190 vp9_surface->scaled_16x_surface_id = VA_INVALID_SURFACE;
191 vp9_surface->scaled_16x_surface_obj = NULL;
194 if (vp9_surface->dys_4x_surface_obj) {
195 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_4x_surface_id, 1);
196 vp9_surface->dys_4x_surface_id = VA_INVALID_SURFACE;
197 vp9_surface->dys_4x_surface_obj = NULL;
200 if (vp9_surface->dys_16x_surface_obj) {
201 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_16x_surface_id, 1);
202 vp9_surface->dys_16x_surface_id = VA_INVALID_SURFACE;
203 vp9_surface->dys_16x_surface_obj = NULL;
206 if (vp9_surface->dys_surface_obj) {
207 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_surface_id, 1);
208 vp9_surface->dys_surface_id = VA_INVALID_SURFACE;
209 vp9_surface->dys_surface_obj = NULL;
220 gen9_vp9_init_check_surfaces(VADriverContextP ctx,
221 struct object_surface *obj_surface,
222 struct vp9_surface_param *surface_param)
224 struct i965_driver_data *i965 = i965_driver_data(ctx);
225 struct gen9_surface_vp9 *vp9_surface;
226 int downscaled_width_4x, downscaled_height_4x;
227 int downscaled_width_16x, downscaled_height_16x;
229 if (!obj_surface || !obj_surface->bo)
230 return VA_STATUS_ERROR_INVALID_SURFACE;
232 if (obj_surface->private_data &&
233 obj_surface->free_private_data != gen9_free_surfaces_vp9) {
234 obj_surface->free_private_data(&obj_surface->private_data);
235 obj_surface->private_data = NULL;
238 if (obj_surface->private_data) {
239 /* if the frame width/height is already the same as the expected,
240 * it is unncessary to reallocate it.
242 vp9_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
243 if (vp9_surface->frame_width >= surface_param->frame_width ||
244 vp9_surface->frame_height >= surface_param->frame_height)
245 return VA_STATUS_SUCCESS;
247 obj_surface->free_private_data(&obj_surface->private_data);
248 obj_surface->private_data = NULL;
252 vp9_surface = calloc(1, sizeof(struct gen9_surface_vp9));
255 return VA_STATUS_ERROR_ALLOCATION_FAILED;
257 vp9_surface->ctx = ctx;
258 obj_surface->private_data = vp9_surface;
259 obj_surface->free_private_data = gen9_free_surfaces_vp9;
261 vp9_surface->frame_width = surface_param->frame_width;
262 vp9_surface->frame_height = surface_param->frame_height;
264 downscaled_width_4x = ALIGN(surface_param->frame_width / 4, 16);
265 downscaled_height_4x = ALIGN(surface_param->frame_height / 4, 16);
267 i965_CreateSurfaces(ctx,
269 downscaled_height_4x,
272 &vp9_surface->scaled_4x_surface_id);
274 vp9_surface->scaled_4x_surface_obj = SURFACE(vp9_surface->scaled_4x_surface_id);
276 if (!vp9_surface->scaled_4x_surface_obj) {
277 return VA_STATUS_ERROR_ALLOCATION_FAILED;
280 i965_check_alloc_surface_bo(ctx, vp9_surface->scaled_4x_surface_obj, 1,
281 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
283 downscaled_width_16x = ALIGN(surface_param->frame_width / 16, 16);
284 downscaled_height_16x = ALIGN(surface_param->frame_height / 16, 16);
285 i965_CreateSurfaces(ctx,
286 downscaled_width_16x,
287 downscaled_height_16x,
290 &vp9_surface->scaled_16x_surface_id);
291 vp9_surface->scaled_16x_surface_obj = SURFACE(vp9_surface->scaled_16x_surface_id);
293 if (!vp9_surface->scaled_16x_surface_obj) {
294 return VA_STATUS_ERROR_ALLOCATION_FAILED;
297 i965_check_alloc_surface_bo(ctx, vp9_surface->scaled_16x_surface_obj, 1,
298 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
300 return VA_STATUS_SUCCESS;
304 gen9_vp9_check_dys_surfaces(VADriverContextP ctx,
305 struct object_surface *obj_surface,
306 struct vp9_surface_param *surface_param)
308 struct i965_driver_data *i965 = i965_driver_data(ctx);
309 struct gen9_surface_vp9 *vp9_surface;
310 int dys_width_4x, dys_height_4x;
311 int dys_width_16x, dys_height_16x;
313 /* As this is handled after the surface checking, it is unnecessary
314 * to check the surface bo and vp9_priv_surface again
317 vp9_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
320 return VA_STATUS_ERROR_INVALID_SURFACE;
322 /* if the frame_width/height of dys_surface is the same as
323 * the expected, it is unnecessary to allocate it again
325 if (vp9_surface->dys_frame_width == surface_param->frame_width &&
326 vp9_surface->dys_frame_width == surface_param->frame_width)
327 return VA_STATUS_SUCCESS;
329 if (vp9_surface->dys_4x_surface_obj) {
330 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_4x_surface_id, 1);
331 vp9_surface->dys_4x_surface_id = VA_INVALID_SURFACE;
332 vp9_surface->dys_4x_surface_obj = NULL;
335 if (vp9_surface->dys_16x_surface_obj) {
336 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_16x_surface_id, 1);
337 vp9_surface->dys_16x_surface_id = VA_INVALID_SURFACE;
338 vp9_surface->dys_16x_surface_obj = NULL;
341 if (vp9_surface->dys_surface_obj) {
342 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_surface_id, 1);
343 vp9_surface->dys_surface_id = VA_INVALID_SURFACE;
344 vp9_surface->dys_surface_obj = NULL;
347 vp9_surface->dys_frame_width = surface_param->frame_width;
348 vp9_surface->dys_frame_height = surface_param->frame_height;
350 i965_CreateSurfaces(ctx,
351 surface_param->frame_width,
352 surface_param->frame_height,
355 &vp9_surface->dys_surface_id);
356 vp9_surface->dys_surface_obj = SURFACE(vp9_surface->dys_surface_id);
358 if (!vp9_surface->dys_surface_obj) {
359 return VA_STATUS_ERROR_ALLOCATION_FAILED;
362 i965_check_alloc_surface_bo(ctx, vp9_surface->dys_surface_obj, 1,
363 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
365 dys_width_4x = ALIGN(surface_param->frame_width / 4, 16);
366 dys_height_4x = ALIGN(surface_param->frame_width / 4, 16);
368 i965_CreateSurfaces(ctx,
373 &vp9_surface->dys_4x_surface_id);
375 vp9_surface->dys_4x_surface_obj = SURFACE(vp9_surface->dys_4x_surface_id);
377 if (!vp9_surface->dys_4x_surface_obj) {
378 return VA_STATUS_ERROR_ALLOCATION_FAILED;
381 i965_check_alloc_surface_bo(ctx, vp9_surface->dys_4x_surface_obj, 1,
382 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
384 dys_width_16x = ALIGN(surface_param->frame_width / 16, 16);
385 dys_height_16x = ALIGN(surface_param->frame_width / 16, 16);
386 i965_CreateSurfaces(ctx,
391 &vp9_surface->dys_16x_surface_id);
392 vp9_surface->dys_16x_surface_obj = SURFACE(vp9_surface->dys_16x_surface_id);
394 if (!vp9_surface->dys_16x_surface_obj) {
395 return VA_STATUS_ERROR_ALLOCATION_FAILED;
398 i965_check_alloc_surface_bo(ctx, vp9_surface->dys_16x_surface_obj, 1,
399 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
401 return VA_STATUS_SUCCESS;
405 gen9_vp9_allocate_resources(VADriverContextP ctx,
406 struct encode_state *encode_state,
407 struct intel_encoder_context *encoder_context,
410 struct i965_driver_data *i965 = i965_driver_data(ctx);
411 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
412 struct gen9_vp9_state *vp9_state;
413 int allocate_flag, i;
415 uint32_t frame_width_in_sb, frame_height_in_sb, frame_sb_num;
416 unsigned int width, height;
418 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
420 if (!vp9_state || !vp9_state->pic_param)
421 return VA_STATUS_ERROR_INVALID_PARAMETER;
423 /* the buffer related with BRC is not changed. So it is allocated
424 * based on the input parameter
427 i965_free_gpe_resource(&vme_context->res_brc_history_buffer);
428 i965_free_gpe_resource(&vme_context->res_brc_const_data_buffer);
429 i965_free_gpe_resource(&vme_context->res_brc_mbenc_curbe_write_buffer);
430 i965_free_gpe_resource(&vme_context->res_pic_state_brc_read_buffer);
431 i965_free_gpe_resource(&vme_context->res_pic_state_brc_write_hfw_read_buffer);
432 i965_free_gpe_resource(&vme_context->res_pic_state_hfw_write_buffer);
433 i965_free_gpe_resource(&vme_context->res_seg_state_brc_read_buffer);
434 i965_free_gpe_resource(&vme_context->res_seg_state_brc_write_buffer);
435 i965_free_gpe_resource(&vme_context->res_brc_bitstream_size_buffer);
436 i965_free_gpe_resource(&vme_context->res_brc_hfw_data_buffer);
437 i965_free_gpe_resource(&vme_context->res_brc_mmdk_pak_buffer);
439 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
440 &vme_context->res_brc_history_buffer,
441 VP9_BRC_HISTORY_BUFFER_SIZE,
442 "Brc History buffer");
444 goto failed_allocation;
445 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
446 &vme_context->res_brc_const_data_buffer,
447 VP9_BRC_CONSTANTSURFACE_SIZE,
448 "Brc Constant buffer");
450 goto failed_allocation;
452 res_size = ALIGN(sizeof(vp9_mbenc_curbe_data), 64) + 128 +
453 ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * NUM_VP9_MBENC;
454 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
455 &vme_context->res_brc_mbenc_curbe_write_buffer,
459 goto failed_allocation;
461 res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
462 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
463 &vme_context->res_pic_state_brc_read_buffer,
465 "Pic State Brc_read");
467 goto failed_allocation;
469 res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
470 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
471 &vme_context->res_pic_state_brc_write_hfw_read_buffer,
473 "Pic State Brc_write Hfw_Read");
475 goto failed_allocation;
477 res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
478 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
479 &vme_context->res_pic_state_hfw_write_buffer,
481 "Pic State Hfw Write");
483 goto failed_allocation;
485 res_size = VP9_SEGMENT_STATE_BUFFER_SIZE;
486 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
487 &vme_context->res_seg_state_brc_read_buffer,
489 "Segment state brc_read");
491 goto failed_allocation;
493 res_size = VP9_SEGMENT_STATE_BUFFER_SIZE;
494 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
495 &vme_context->res_seg_state_brc_write_buffer,
497 "Segment state brc_write");
499 goto failed_allocation;
501 res_size = VP9_BRC_BITSTREAM_SIZE_BUFFER_SIZE;
502 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
503 &vme_context->res_brc_bitstream_size_buffer,
505 "Brc bitstream buffer");
507 goto failed_allocation;
509 res_size = VP9_HFW_BRC_DATA_BUFFER_SIZE;
510 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
511 &vme_context->res_brc_hfw_data_buffer,
515 goto failed_allocation;
517 res_size = VP9_BRC_MMDK_PAK_BUFFER_SIZE;
518 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
519 &vme_context->res_brc_mmdk_pak_buffer,
523 goto failed_allocation;
526 /* If the width/height of allocated buffer is greater than the expected,
527 * it is unnecessary to allocate it again
529 if (vp9_state->res_width >= vp9_state->frame_width &&
530 vp9_state->res_height >= vp9_state->frame_height) {
532 return VA_STATUS_SUCCESS;
534 frame_width_in_sb = ALIGN(vp9_state->frame_width, 64) / 64;
535 frame_height_in_sb = ALIGN(vp9_state->frame_height, 64) / 64;
536 frame_sb_num = frame_width_in_sb * frame_height_in_sb;
538 i965_free_gpe_resource(&vme_context->res_hvd_line_buffer);
539 res_size = frame_width_in_sb * 64;
540 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
541 &vme_context->res_hvd_line_buffer,
543 "VP9 hvd line line");
545 goto failed_allocation;
547 i965_free_gpe_resource(&vme_context->res_hvd_tile_line_buffer);
548 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
549 &vme_context->res_hvd_tile_line_buffer,
551 "VP9 hvd tile_line line");
553 goto failed_allocation;
555 i965_free_gpe_resource(&vme_context->res_deblocking_filter_line_buffer);
556 res_size = frame_width_in_sb * 18 * 64;
557 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
558 &vme_context->res_deblocking_filter_line_buffer,
560 "VP9 deblocking filter line");
562 goto failed_allocation;
564 i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_line_buffer);
565 res_size = frame_width_in_sb * 18 * 64;
566 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
567 &vme_context->res_deblocking_filter_tile_line_buffer,
569 "VP9 deblocking tile line");
571 goto failed_allocation;
573 i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_col_buffer);
574 res_size = frame_height_in_sb * 17 * 64;
575 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
576 &vme_context->res_deblocking_filter_tile_col_buffer,
578 "VP9 deblocking tile col");
580 goto failed_allocation;
582 i965_free_gpe_resource(&vme_context->res_metadata_line_buffer);
583 res_size = frame_width_in_sb * 5 * 64;
584 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
585 &vme_context->res_metadata_line_buffer,
587 "VP9 metadata line");
589 goto failed_allocation;
591 i965_free_gpe_resource(&vme_context->res_metadata_tile_line_buffer);
592 res_size = frame_width_in_sb * 5 * 64;
593 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
594 &vme_context->res_metadata_tile_line_buffer,
596 "VP9 metadata tile line");
598 goto failed_allocation;
600 i965_free_gpe_resource(&vme_context->res_metadata_tile_col_buffer);
601 res_size = frame_height_in_sb * 5 * 64;
602 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
603 &vme_context->res_metadata_tile_col_buffer,
605 "VP9 metadata tile col");
607 goto failed_allocation;
609 i965_free_gpe_resource(&vme_context->res_prob_buffer);
611 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
612 &vme_context->res_prob_buffer,
616 goto failed_allocation;
618 i965_free_gpe_resource(&vme_context->res_segmentid_buffer);
619 res_size = frame_sb_num * 64;
620 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
621 &vme_context->res_segmentid_buffer,
625 goto failed_allocation;
627 i965_zero_gpe_resource(&vme_context->res_segmentid_buffer);
629 i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
631 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
632 &vme_context->res_prob_delta_buffer,
636 goto failed_allocation;
638 i965_zero_gpe_resource(&vme_context->res_segmentid_buffer);
640 i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
642 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
643 &vme_context->res_prob_delta_buffer,
647 goto failed_allocation;
649 i965_free_gpe_resource(&vme_context->res_compressed_input_buffer);
651 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
652 &vme_context->res_compressed_input_buffer,
654 "VP9 compressed_input buffer");
656 goto failed_allocation;
658 i965_free_gpe_resource(&vme_context->res_prob_counter_buffer);
660 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
661 &vme_context->res_prob_counter_buffer,
665 goto failed_allocation;
667 i965_free_gpe_resource(&vme_context->res_tile_record_streamout_buffer);
668 res_size = frame_sb_num * 64;
669 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
670 &vme_context->res_tile_record_streamout_buffer,
672 "VP9 tile record stream_out");
674 goto failed_allocation;
676 i965_free_gpe_resource(&vme_context->res_cu_stat_streamout_buffer);
677 res_size = frame_sb_num * 64;
678 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
679 &vme_context->res_cu_stat_streamout_buffer,
681 "VP9 CU stat stream_out");
683 goto failed_allocation;
685 width = vp9_state->downscaled_width_4x_in_mb * 32;
686 height = vp9_state->downscaled_height_4x_in_mb * 16;
687 i965_free_gpe_resource(&vme_context->s4x_memv_data_buffer);
688 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
689 &vme_context->s4x_memv_data_buffer,
694 goto failed_allocation;
696 width = vp9_state->downscaled_width_4x_in_mb * 8;
697 height = vp9_state->downscaled_height_4x_in_mb * 16;
698 i965_free_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
699 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
700 &vme_context->s4x_memv_distortion_buffer,
703 "VP9 4x MEMV distorion");
705 goto failed_allocation;
707 width = ALIGN(vp9_state->downscaled_width_16x_in_mb * 32, 64);
708 height = vp9_state->downscaled_height_16x_in_mb * 16;
709 i965_free_gpe_resource(&vme_context->s16x_memv_data_buffer);
710 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
711 &vme_context->s16x_memv_data_buffer,
714 "VP9 16x MEMV data");
716 goto failed_allocation;
718 width = vp9_state->frame_width_in_mb * 16;
719 height = vp9_state->frame_height_in_mb * 8;
720 i965_free_gpe_resource(&vme_context->res_output_16x16_inter_modes);
721 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
722 &vme_context->res_output_16x16_inter_modes,
725 "VP9 output inter_mode");
727 goto failed_allocation;
729 res_size = vp9_state->frame_width_in_mb * vp9_state->frame_height_in_mb *
731 for (i = 0; i < 2; i++) {
732 i965_free_gpe_resource(&vme_context->res_mode_decision[i]);
733 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
734 &vme_context->res_mode_decision[i],
736 "VP9 mode decision");
738 goto failed_allocation;
742 res_size = frame_sb_num * 9 * 64;
743 for (i = 0; i < 2; i++) {
744 i965_free_gpe_resource(&vme_context->res_mv_temporal_buffer[i]);
745 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
746 &vme_context->res_mv_temporal_buffer[i],
750 goto failed_allocation;
753 vp9_state->mb_data_offset = ALIGN(frame_sb_num * 16, 4096) + 4096;
754 res_size = vp9_state->mb_data_offset + frame_sb_num * 64 * 64 + 1000;
755 i965_free_gpe_resource(&vme_context->res_mb_code_surface);
756 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
757 &vme_context->res_mb_code_surface,
758 ALIGN(res_size, 4096),
759 "VP9 mb_code surface");
761 goto failed_allocation;
764 i965_free_gpe_resource(&vme_context->res_pak_uncompressed_input_buffer);
765 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
766 &vme_context->res_pak_uncompressed_input_buffer,
767 ALIGN(res_size, 4096),
768 "VP9 pak_uncompressed_input");
770 goto failed_allocation;
772 if (!vme_context->frame_header_data) {
773 /* allocate 512 bytes for generating the uncompressed header */
774 vme_context->frame_header_data = calloc(1, 512);
777 vp9_state->res_width = vp9_state->frame_width;
778 vp9_state->res_height = vp9_state->frame_height;
780 return VA_STATUS_SUCCESS;
783 return VA_STATUS_ERROR_ALLOCATION_FAILED;
787 gen9_vp9_free_resources(struct gen9_encoder_context_vp9 *vme_context)
790 struct gen9_vp9_state *vp9_state = (struct gen9_vp9_state *) vme_context->enc_priv_state;
792 if (vp9_state->brc_enabled) {
793 i965_free_gpe_resource(&vme_context->res_brc_history_buffer);
794 i965_free_gpe_resource(&vme_context->res_brc_const_data_buffer);
795 i965_free_gpe_resource(&vme_context->res_brc_mbenc_curbe_write_buffer);
796 i965_free_gpe_resource(&vme_context->res_pic_state_brc_read_buffer);
797 i965_free_gpe_resource(&vme_context->res_pic_state_brc_write_hfw_read_buffer);
798 i965_free_gpe_resource(&vme_context->res_pic_state_hfw_write_buffer);
799 i965_free_gpe_resource(&vme_context->res_seg_state_brc_read_buffer);
800 i965_free_gpe_resource(&vme_context->res_seg_state_brc_write_buffer);
801 i965_free_gpe_resource(&vme_context->res_brc_bitstream_size_buffer);
802 i965_free_gpe_resource(&vme_context->res_brc_hfw_data_buffer);
803 i965_free_gpe_resource(&vme_context->res_brc_mmdk_pak_buffer);
806 i965_free_gpe_resource(&vme_context->res_hvd_line_buffer);
807 i965_free_gpe_resource(&vme_context->res_hvd_tile_line_buffer);
808 i965_free_gpe_resource(&vme_context->res_deblocking_filter_line_buffer);
809 i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_line_buffer);
810 i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_col_buffer);
811 i965_free_gpe_resource(&vme_context->res_metadata_line_buffer);
812 i965_free_gpe_resource(&vme_context->res_metadata_tile_line_buffer);
813 i965_free_gpe_resource(&vme_context->res_metadata_tile_col_buffer);
814 i965_free_gpe_resource(&vme_context->res_prob_buffer);
815 i965_free_gpe_resource(&vme_context->res_segmentid_buffer);
816 i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
817 i965_free_gpe_resource(&vme_context->res_prob_counter_buffer);
818 i965_free_gpe_resource(&vme_context->res_tile_record_streamout_buffer);
819 i965_free_gpe_resource(&vme_context->res_cu_stat_streamout_buffer);
820 i965_free_gpe_resource(&vme_context->s4x_memv_data_buffer);
821 i965_free_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
822 i965_free_gpe_resource(&vme_context->s16x_memv_data_buffer);
823 i965_free_gpe_resource(&vme_context->res_output_16x16_inter_modes);
824 for (i = 0; i < 2; i++) {
825 i965_free_gpe_resource(&vme_context->res_mode_decision[i]);
828 for (i = 0; i < 2; i++) {
829 i965_free_gpe_resource(&vme_context->res_mv_temporal_buffer[i]);
832 i965_free_gpe_resource(&vme_context->res_compressed_input_buffer);
833 i965_free_gpe_resource(&vme_context->res_mb_code_surface);
834 i965_free_gpe_resource(&vme_context->res_pak_uncompressed_input_buffer);
836 if (vme_context->frame_header_data) {
837 free(vme_context->frame_header_data);
838 vme_context->frame_header_data = NULL;
844 gen9_init_media_object_walker_parameter(struct intel_encoder_context *encoder_context,
845 struct vp9_encoder_kernel_walker_parameter *kernel_walker_param,
846 struct gpe_media_object_walker_parameter *walker_param)
848 memset(walker_param, 0, sizeof(*walker_param));
850 walker_param->use_scoreboard = kernel_walker_param->use_scoreboard;
852 walker_param->block_resolution.x = kernel_walker_param->resolution_x;
853 walker_param->block_resolution.y = kernel_walker_param->resolution_y;
855 walker_param->global_resolution.x = kernel_walker_param->resolution_x;
856 walker_param->global_resolution.y = kernel_walker_param->resolution_y;
858 walker_param->global_outer_loop_stride.x = kernel_walker_param->resolution_x;
859 walker_param->global_outer_loop_stride.y = 0;
861 walker_param->global_inner_loop_unit.x = 0;
862 walker_param->global_inner_loop_unit.y = kernel_walker_param->resolution_y;
864 walker_param->local_loop_exec_count = 0xFFFF; //MAX VALUE
865 walker_param->global_loop_exec_count = 0xFFFF; //MAX VALUE
867 if (kernel_walker_param->no_dependency) {
868 walker_param->scoreboard_mask = 0;
869 walker_param->use_scoreboard = 0;
870 // Raster scan walking pattern
871 walker_param->local_outer_loop_stride.x = 0;
872 walker_param->local_outer_loop_stride.y = 1;
873 walker_param->local_inner_loop_unit.x = 1;
874 walker_param->local_inner_loop_unit.y = 0;
875 walker_param->local_end.x = kernel_walker_param->resolution_x - 1;
876 walker_param->local_end.y = 0;
878 walker_param->local_end.x = 0;
879 walker_param->local_end.y = 0;
881 if (kernel_walker_param->walker_degree == VP9_45Z_DEGREE) {
883 walker_param->scoreboard_mask = 0x0F;
885 walker_param->global_loop_exec_count = 0x3FF;
886 walker_param->local_loop_exec_count = 0x3FF;
888 walker_param->global_resolution.x = (unsigned int)(kernel_walker_param->resolution_x / 2.f) + 1;
889 walker_param->global_resolution.y = 2 * kernel_walker_param->resolution_y;
891 walker_param->global_start.x = 0;
892 walker_param->global_start.y = 0;
894 walker_param->global_outer_loop_stride.x = walker_param->global_resolution.x;
895 walker_param->global_outer_loop_stride.y = 0;
897 walker_param->global_inner_loop_unit.x = 0;
898 walker_param->global_inner_loop_unit.y = walker_param->global_resolution.y;
900 walker_param->block_resolution.x = walker_param->global_resolution.x;
901 walker_param->block_resolution.y = walker_param->global_resolution.y;
903 walker_param->local_start.x = 0;
904 walker_param->local_start.y = 0;
906 walker_param->local_outer_loop_stride.x = 1;
907 walker_param->local_outer_loop_stride.y = 0;
909 walker_param->local_inner_loop_unit.x = -1;
910 walker_param->local_inner_loop_unit.y = 4;
912 walker_param->middle_loop_extra_steps = 3;
913 walker_param->mid_loop_unit_x = 0;
914 walker_param->mid_loop_unit_y = 1;
917 walker_param->scoreboard_mask = 0x0F;
918 walker_param->local_outer_loop_stride.x = 1;
919 walker_param->local_outer_loop_stride.y = 0;
920 walker_param->local_inner_loop_unit.x = -2;
921 walker_param->local_inner_loop_unit.y = 1;
927 gen9_add_2d_gpe_surface(VADriverContextP ctx,
928 struct i965_gpe_context *gpe_context,
929 struct object_surface *obj_surface,
931 int is_media_block_rw,
935 struct i965_gpe_resource gpe_resource;
936 struct i965_gpe_surface gpe_surface;
938 memset(&gpe_surface, 0, sizeof(gpe_surface));
940 i965_object_surface_to_2d_gpe_resource(&gpe_resource, obj_surface);
941 gpe_surface.gpe_resource = &gpe_resource;
942 gpe_surface.is_2d_surface = 1;
943 gpe_surface.is_uv_surface = !!is_uv_surface;
944 gpe_surface.is_media_block_rw = !!is_media_block_rw;
946 gpe_surface.cacheability_control = DEFAULT_MOCS;
947 gpe_surface.format = format;
949 gen9_gpe_context_add_surface(gpe_context, &gpe_surface, index);
950 i965_free_gpe_resource(&gpe_resource);
954 gen9_add_adv_gpe_surface(VADriverContextP ctx,
955 struct i965_gpe_context *gpe_context,
956 struct object_surface *obj_surface,
959 struct i965_gpe_resource gpe_resource;
960 struct i965_gpe_surface gpe_surface;
962 memset(&gpe_surface, 0, sizeof(gpe_surface));
964 i965_object_surface_to_2d_gpe_resource(&gpe_resource, obj_surface);
965 gpe_surface.gpe_resource = &gpe_resource;
966 gpe_surface.is_adv_surface = 1;
967 gpe_surface.cacheability_control = DEFAULT_MOCS;
968 gpe_surface.v_direction = 2;
970 gen9_gpe_context_add_surface(gpe_context, &gpe_surface, index);
971 i965_free_gpe_resource(&gpe_resource);
975 gen9_add_buffer_gpe_surface(VADriverContextP ctx,
976 struct i965_gpe_context *gpe_context,
977 struct i965_gpe_resource *gpe_buffer,
983 struct i965_gpe_surface gpe_surface;
985 memset(&gpe_surface, 0, sizeof(gpe_surface));
987 gpe_surface.gpe_resource = gpe_buffer;
988 gpe_surface.is_buffer = 1;
989 gpe_surface.is_raw_buffer = !!is_raw_buffer;
990 gpe_surface.cacheability_control = DEFAULT_MOCS;
991 gpe_surface.size = size;
992 gpe_surface.offset = offset;
994 gen9_gpe_context_add_surface(gpe_context, &gpe_surface, index);
998 gen9_add_buffer_2d_gpe_surface(VADriverContextP ctx,
999 struct i965_gpe_context *gpe_context,
1000 struct i965_gpe_resource *gpe_buffer,
1001 int is_media_block_rw,
1002 unsigned int format,
1005 struct i965_gpe_surface gpe_surface;
1007 memset(&gpe_surface, 0, sizeof(gpe_surface));
1009 gpe_surface.gpe_resource = gpe_buffer;
1010 gpe_surface.is_2d_surface = 1;
1011 gpe_surface.is_media_block_rw = !!is_media_block_rw;
1012 gpe_surface.cacheability_control = DEFAULT_MOCS;
1013 gpe_surface.format = format;
1015 gen9_gpe_context_add_surface(gpe_context, &gpe_surface, index);
1019 gen9_add_dri_buffer_gpe_surface(VADriverContextP ctx,
1020 struct i965_gpe_context *gpe_context,
1024 unsigned int offset,
1027 struct i965_gpe_resource gpe_resource;
1029 i965_dri_object_to_buffer_gpe_resource(&gpe_resource, bo);
1030 gen9_add_buffer_gpe_surface(ctx,
1038 i965_free_gpe_resource(&gpe_resource);
1043 gen9_add_dri_buffer_2d_gpe_surface(VADriverContextP ctx,
1044 struct i965_gpe_context *gpe_context,
1047 unsigned int height,
1049 int is_media_block_rw,
1050 unsigned int format,
1053 struct i965_gpe_resource gpe_resource;
1055 i965_dri_object_to_2d_gpe_resource(&gpe_resource, bo, width, height, pitch);
1056 gen9_add_buffer_2d_gpe_surface(ctx,
1063 i965_free_gpe_resource(&gpe_resource);
1068 gen9_run_kernel_media_object(VADriverContextP ctx,
1069 struct intel_encoder_context *encoder_context,
1070 struct i965_gpe_context *gpe_context,
1072 struct gpe_media_object_parameter *param)
1074 struct intel_batchbuffer *batch = encoder_context->base.batch;
1075 struct vp9_encode_status_buffer_internal *status_buffer;
1076 struct gen9_vp9_state *vp9_state;
1077 struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1079 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
1080 if (!vp9_state || !batch)
1083 intel_batchbuffer_start_atomic(batch, 0x1000);
1085 status_buffer = &(vp9_state->status_buffer);
1086 memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1087 mi_store_data_imm.bo = status_buffer->bo;
1088 mi_store_data_imm.offset = status_buffer->media_index_offset;
1089 mi_store_data_imm.dw0 = media_function;
1090 gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1092 intel_batchbuffer_emit_mi_flush(batch);
1093 gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
1094 gen8_gpe_media_object(ctx, gpe_context, batch, param);
1095 gen8_gpe_media_state_flush(ctx, gpe_context, batch);
1097 gen9_gpe_pipeline_end(ctx, gpe_context, batch);
1099 intel_batchbuffer_end_atomic(batch);
1101 intel_batchbuffer_flush(batch);
1105 gen9_run_kernel_media_object_walker(VADriverContextP ctx,
1106 struct intel_encoder_context *encoder_context,
1107 struct i965_gpe_context *gpe_context,
1109 struct gpe_media_object_walker_parameter *param)
1111 struct intel_batchbuffer *batch = encoder_context->base.batch;
1112 struct vp9_encode_status_buffer_internal *status_buffer;
1113 struct gen9_vp9_state *vp9_state;
1114 struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1116 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
1117 if (!vp9_state || !batch)
1120 intel_batchbuffer_start_atomic(batch, 0x1000);
1122 intel_batchbuffer_emit_mi_flush(batch);
1124 status_buffer = &(vp9_state->status_buffer);
1125 memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1126 mi_store_data_imm.bo = status_buffer->bo;
1127 mi_store_data_imm.offset = status_buffer->media_index_offset;
1128 mi_store_data_imm.dw0 = media_function;
1129 gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1131 gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
1132 gen8_gpe_media_object_walker(ctx, gpe_context, batch, param);
1133 gen8_gpe_media_state_flush(ctx, gpe_context, batch);
1135 gen9_gpe_pipeline_end(ctx, gpe_context, batch);
1137 intel_batchbuffer_end_atomic(batch);
1139 intel_batchbuffer_flush(batch);
1143 void gen9_vp9_set_curbe_brc(VADriverContextP ctx,
1144 struct encode_state *encode_state,
1145 struct i965_gpe_context *gpe_context,
1146 struct intel_encoder_context *encoder_context,
1147 struct gen9_vp9_brc_curbe_param *param)
1149 VAEncSequenceParameterBufferVP9 *seq_param;
1150 VAEncPictureParameterBufferVP9 *pic_param;
1151 VAEncMiscParameterTypeVP9PerSegmantParam *segment_param;
1152 vp9_brc_curbe_data *cmd;
1153 double dbps_ratio, dInputBitsPerFrame;
1154 struct gen9_vp9_state *vp9_state;
1156 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1158 pic_param = param->ppic_param;
1159 seq_param = param->pseq_param;
1160 segment_param = param->psegment_param;
1162 cmd = i965_gpe_context_map_curbe(gpe_context);
1167 memset(cmd, 0, sizeof(vp9_brc_curbe_data));
1169 if (!vp9_state->dys_enabled)
1171 cmd->dw0.frame_width = pic_param->frame_width_src;
1172 cmd->dw0.frame_height = pic_param->frame_height_src;
1176 cmd->dw0.frame_width = pic_param->frame_width_dst;
1177 cmd->dw0.frame_height = pic_param->frame_height_dst;
1180 cmd->dw1.frame_type = vp9_state->picture_coding_type;
1181 cmd->dw1.segmentation_enable = 0;
1182 cmd->dw1.ref_frame_flags = vp9_state->ref_frame_flag;
1183 cmd->dw1.num_tlevels = 1;
1185 switch(param->media_state_type)
1187 case VP9_MEDIA_STATE_BRC_INIT_RESET:
1189 cmd->dw3.max_level_ratiot0 = 0;
1190 cmd->dw3.max_level_ratiot1 = 0;
1191 cmd->dw3.max_level_ratiot2 = 0;
1192 cmd->dw3.max_level_ratiot3 = 0;
1194 cmd->dw4.profile_level_max_frame = seq_param->max_frame_width *
1195 seq_param->max_frame_height;
1196 cmd->dw5.init_buf_fullness = vp9_state->init_vbv_buffer_fullness_in_bit;
1197 cmd->dw6.buf_size = vp9_state->vbv_buffer_size_in_bit;
1198 cmd->dw7.target_bit_rate = (vp9_state->target_bit_rate + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1200 cmd->dw8.max_bit_rate = (vp9_state->max_bit_rate + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1202 cmd->dw9.min_bit_rate = (vp9_state->min_bit_rate + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1204 cmd->dw10.frame_ratem = vp9_state->framerate.num;
1205 cmd->dw11.frame_rated = vp9_state->framerate.den;
1207 cmd->dw14.avbr_accuracy = 30;
1208 cmd->dw14.avbr_convergence = 150;
1210 if (encoder_context->rate_control_mode == VA_RC_CBR)
1212 cmd->dw12.brc_flag = BRC_KERNEL_CBR;
1213 cmd->dw8.max_bit_rate = cmd->dw7.target_bit_rate;
1214 cmd->dw9.min_bit_rate = 0;
1216 else if (encoder_context->rate_control_mode == VA_RC_VBR)
1218 cmd->dw12.brc_flag = BRC_KERNEL_VBR;
1222 cmd->dw12.brc_flag = BRC_KERNEL_CQL;
1223 cmd->dw16.cq_level = 30;
1225 cmd->dw12.gopp = seq_param->intra_period - 1;
1227 cmd->dw13.init_frame_width = pic_param->frame_width_src;
1228 cmd->dw13.init_frame_height = pic_param->frame_height_src;
1230 cmd->dw15.min_qp = 0;
1231 cmd->dw15.max_qp = 255;
1233 cmd->dw16.cq_level = 30;
1235 cmd->dw17.enable_dynamic_scaling = vp9_state->dys_in_use;
1236 cmd->dw17.brc_overshoot_cbr_pct = 150;
1238 dInputBitsPerFrame = (double)cmd->dw8.max_bit_rate * (double)vp9_state->framerate.den / (double)vp9_state->framerate.num;
1239 dbps_ratio = dInputBitsPerFrame / ((double)vp9_state->vbv_buffer_size_in_bit / 30.0);
1240 if (dbps_ratio < 0.1)
1242 if (dbps_ratio > 3.5)
1245 *param->pbrc_init_reset_buf_size_in_bits = cmd->dw6.buf_size;
1246 *param->pbrc_init_reset_input_bits_per_frame = dInputBitsPerFrame;
1248 cmd->dw18.pframe_deviation_threshold0 = (uint32_t)(-50 * pow(0.90, dbps_ratio));
1249 cmd->dw18.pframe_deviation_threshold1 = (uint32_t)(-50 * pow(0.66, dbps_ratio));
1250 cmd->dw18.pframe_deviation_threshold2 = (uint32_t)(-50 * pow(0.46, dbps_ratio));
1251 cmd->dw18.pframe_deviation_threshold3 = (uint32_t)(-50 * pow(0.3, dbps_ratio));
1252 cmd->dw19.pframe_deviation_threshold4 = (uint32_t)(50 * pow(0.3, dbps_ratio));
1253 cmd->dw19.pframe_deviation_threshold5 = (uint32_t)(50 * pow(0.46, dbps_ratio));
1254 cmd->dw19.pframe_deviation_threshold6 = (uint32_t)(50 * pow(0.7, dbps_ratio));
1255 cmd->dw19.pframe_deviation_threshold7 = (uint32_t)(50 * pow(0.9, dbps_ratio));
1257 cmd->dw20.vbr_deviation_threshold0 = (uint32_t)(-50 * pow(0.9, dbps_ratio));
1258 cmd->dw20.vbr_deviation_threshold1 = (uint32_t)(-50 * pow(0.7, dbps_ratio));
1259 cmd->dw20.vbr_deviation_threshold2 = (uint32_t)(-50 * pow(0.5, dbps_ratio));
1260 cmd->dw20.vbr_deviation_threshold3 = (uint32_t)(-50 * pow(0.3, dbps_ratio));
1261 cmd->dw21.vbr_deviation_threshold4 = (uint32_t)(100 * pow(0.4, dbps_ratio));
1262 cmd->dw21.vbr_deviation_threshold5 = (uint32_t)(100 * pow(0.5, dbps_ratio));
1263 cmd->dw21.vbr_deviation_threshold6 = (uint32_t)(100 * pow(0.75, dbps_ratio));
1264 cmd->dw21.vbr_deviation_threshold7 = (uint32_t)(100 * pow(0.9, dbps_ratio));
1266 cmd->dw22.kframe_deviation_threshold0 = (uint32_t)(-50 * pow(0.8, dbps_ratio));
1267 cmd->dw22.kframe_deviation_threshold1 = (uint32_t)(-50 * pow(0.6, dbps_ratio));
1268 cmd->dw22.kframe_deviation_threshold2 = (uint32_t)(-50 * pow(0.34, dbps_ratio));
1269 cmd->dw22.kframe_deviation_threshold3 = (uint32_t)(-50 * pow(0.2, dbps_ratio));
1270 cmd->dw23.kframe_deviation_threshold4 = (uint32_t)(50 * pow(0.2, dbps_ratio));
1271 cmd->dw23.kframe_deviation_threshold5 = (uint32_t)(50 * pow(0.4, dbps_ratio));
1272 cmd->dw23.kframe_deviation_threshold6 = (uint32_t)(50 * pow(0.66, dbps_ratio));
1273 cmd->dw23.kframe_deviation_threshold7 = (uint32_t)(50 * pow(0.9, dbps_ratio));
1277 case VP9_MEDIA_STATE_BRC_UPDATE:
1279 cmd->dw15.min_qp = 0;
1280 cmd->dw15.max_qp = 255;
1282 cmd->dw25.frame_number = param->frame_number;
1284 // Used in dynamic scaling. set to zero for now
1285 cmd->dw27.hrd_buffer_fullness_upper_limit = 0;
1286 cmd->dw28.hrd_buffer_fullness_lower_limit = 0;
1288 if (pic_param->pic_flags.bits.segmentation_enabled) {
1289 cmd->dw32.seg_delta_qp0 = segment_param->seg_data[0].segment_qindex_delta;
1290 cmd->dw32.seg_delta_qp1 = segment_param->seg_data[1].segment_qindex_delta;
1291 cmd->dw32.seg_delta_qp2 = segment_param->seg_data[2].segment_qindex_delta;
1292 cmd->dw32.seg_delta_qp3 = segment_param->seg_data[3].segment_qindex_delta;
1294 cmd->dw33.seg_delta_qp4 = segment_param->seg_data[4].segment_qindex_delta;
1295 cmd->dw33.seg_delta_qp5 = segment_param->seg_data[5].segment_qindex_delta;
1296 cmd->dw33.seg_delta_qp6 = segment_param->seg_data[6].segment_qindex_delta;
1297 cmd->dw33.seg_delta_qp7 = segment_param->seg_data[7].segment_qindex_delta;
1300 //cmd->dw34.temporal_id = pPicParams->temporal_idi;
1301 cmd->dw34.temporal_id = 0;
1302 cmd->dw34.multi_ref_qp_check = param->multi_ref_qp_check;
1304 cmd->dw35.max_num_pak_passes = param->brc_num_pak_passes;
1305 cmd->dw35.sync_async = 0;
1306 cmd->dw35.mbrc = param->mbbrc_enabled;
1307 if (*param->pbrc_init_current_target_buf_full_in_bits >
1308 ((double)(*param->pbrc_init_reset_buf_size_in_bits))) {
1309 *param->pbrc_init_current_target_buf_full_in_bits -=
1310 (double)(*param->pbrc_init_reset_buf_size_in_bits);
1311 cmd->dw35.overflow = 1;
1314 cmd->dw35.overflow = 0;
1316 cmd->dw24.target_size = (uint32_t)(*param->pbrc_init_current_target_buf_full_in_bits);
1318 cmd->dw36.segmentation = pic_param->pic_flags.bits.segmentation_enabled;
1320 *param->pbrc_init_current_target_buf_full_in_bits += *param->pbrc_init_reset_input_bits_per_frame;
1322 cmd->dw38.qdelta_ydc = pic_param->luma_dc_qindex_delta;
1323 cmd->dw38.qdelta_uvdc = pic_param->chroma_dc_qindex_delta;
1324 cmd->dw38.qdelta_uvac = pic_param->chroma_ac_qindex_delta;
1328 case VP9_MEDIA_STATE_ENC_I_FRAME_DIST:
1329 cmd->dw2.intra_mode_disable = 0;
1335 cmd->dw48.brc_y4x_input_bti = VP9_BTI_BRC_SRCY4X_G9;
1336 cmd->dw49.brc_vme_coarse_intra_input_bti = VP9_BTI_BRC_VME_COARSE_INTRA_G9;
1337 cmd->dw50.brc_history_buffer_bti = VP9_BTI_BRC_HISTORY_G9;
1338 cmd->dw51.brc_const_data_input_bti = VP9_BTI_BRC_CONSTANT_DATA_G9;
1339 cmd->dw52.brc_distortion_bti = VP9_BTI_BRC_DISTORTION_G9;
1340 cmd->dw53.brc_mmdk_pak_output_bti = VP9_BTI_BRC_MMDK_PAK_OUTPUT_G9;
1341 cmd->dw54.brc_enccurbe_input_bti = VP9_BTI_BRC_MBENC_CURBE_INPUT_G9;
1342 cmd->dw55.brc_enccurbe_output_bti = VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9;
1343 cmd->dw56.brc_pic_state_input_bti = VP9_BTI_BRC_PIC_STATE_INPUT_G9;
1344 cmd->dw57.brc_pic_state_output_bti = VP9_BTI_BRC_PIC_STATE_OUTPUT_G9;
1345 cmd->dw58.brc_seg_state_input_bti = VP9_BTI_BRC_SEGMENT_STATE_INPUT_G9;
1346 cmd->dw59.brc_seg_state_output_bti = VP9_BTI_BRC_SEGMENT_STATE_OUTPUT_G9;
1347 cmd->dw60.brc_bitstream_size_data_bti = VP9_BTI_BRC_BITSTREAM_SIZE_G9;
1348 cmd->dw61.brc_hfw_data_output_bti = VP9_BTI_BRC_HFW_DATA_G9;
1350 i965_gpe_context_unmap_curbe(gpe_context);
1355 gen9_brc_init_reset_add_surfaces_vp9(VADriverContextP ctx,
1356 struct encode_state *encode_state,
1357 struct intel_encoder_context *encoder_context,
1358 struct i965_gpe_context *gpe_context)
1360 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1362 gen9_add_buffer_gpe_surface(ctx,
1364 &vme_context->res_brc_history_buffer,
1366 vme_context->res_brc_history_buffer.size,
1368 VP9_BTI_BRC_HISTORY_G9);
1370 gen9_add_buffer_2d_gpe_surface(ctx,
1372 &vme_context->s4x_memv_distortion_buffer,
1374 I965_SURFACEFORMAT_R8_UNORM,
1375 VP9_BTI_BRC_DISTORTION_G9);
1378 /* The function related with BRC */
1380 gen9_vp9_brc_init_reset_kernel(VADriverContextP ctx,
1381 struct encode_state *encode_state,
1382 struct intel_encoder_context *encoder_context)
1384 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1385 struct vp9_brc_context *brc_context = &vme_context->brc_context;
1386 struct gpe_media_object_parameter media_object_param;
1387 struct i965_gpe_context *gpe_context;
1388 int gpe_index = VP9_BRC_INIT;
1389 int media_function = VP9_MEDIA_STATE_BRC_INIT_RESET;
1390 struct gen9_vp9_brc_curbe_param brc_initreset_curbe;
1391 VAEncPictureParameterBufferVP9 *pic_param;
1392 struct gen9_vp9_state *vp9_state;
1394 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1396 if (!vp9_state || !vp9_state->pic_param)
1397 return VA_STATUS_ERROR_INVALID_PARAMETER;
1399 pic_param = vp9_state->pic_param;
1401 if (vp9_state->brc_inited)
1402 gpe_index = VP9_BRC_RESET;
1404 gpe_context = &brc_context->gpe_contexts[gpe_index];
1406 gen8_gpe_context_init(ctx, gpe_context);
1407 gen9_gpe_reset_binding_table(ctx, gpe_context);
1409 brc_initreset_curbe.media_state_type = media_function;
1410 brc_initreset_curbe.curr_frame = pic_param->reconstructed_frame;
1411 brc_initreset_curbe.ppic_param = vp9_state->pic_param;
1412 brc_initreset_curbe.pseq_param = vp9_state->seq_param;
1413 brc_initreset_curbe.psegment_param = vp9_state->segment_param;
1414 brc_initreset_curbe.frame_width = vp9_state->frame_width;
1415 brc_initreset_curbe.frame_height = vp9_state->frame_height;
1416 brc_initreset_curbe.pbrc_init_current_target_buf_full_in_bits =
1417 &vp9_state->brc_init_current_target_buf_full_in_bits;
1418 brc_initreset_curbe.pbrc_init_reset_buf_size_in_bits =
1419 &vp9_state->brc_init_reset_buf_size_in_bits;
1420 brc_initreset_curbe.pbrc_init_reset_input_bits_per_frame =
1421 &vp9_state->brc_init_reset_input_bits_per_frame;
1422 brc_initreset_curbe.picture_coding_type = vp9_state->picture_coding_type;
1423 brc_initreset_curbe.initbrc = !vp9_state->brc_inited;
1424 brc_initreset_curbe.mbbrc_enabled = 0;
1425 brc_initreset_curbe.ref_frame_flag = vp9_state->ref_frame_flag;
1427 vme_context->pfn_set_curbe_brc(ctx, encode_state,
1430 &brc_initreset_curbe);
1432 gen9_brc_init_reset_add_surfaces_vp9(ctx, encode_state, encoder_context, gpe_context);
1433 gen8_gpe_setup_interface_data(ctx, gpe_context);
1435 memset(&media_object_param, 0, sizeof(media_object_param));
1436 gen9_run_kernel_media_object(ctx, encoder_context, gpe_context, media_function, &media_object_param);
1438 return VA_STATUS_SUCCESS;
1442 gen9_brc_intra_dist_add_surfaces_vp9(VADriverContextP ctx,
1443 struct encode_state *encode_state,
1444 struct intel_encoder_context *encoder_context,
1445 struct i965_gpe_context *gpe_context)
1447 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1449 struct object_surface *obj_surface;
1450 struct gen9_surface_vp9 *vp9_priv_surface;
1452 /* sScaled4xSurface surface */
1453 obj_surface = encode_state->reconstructed_object;
1455 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
1457 obj_surface = vp9_priv_surface->scaled_4x_surface_obj;
1458 gen9_add_2d_gpe_surface(ctx, gpe_context,
1461 I965_SURFACEFORMAT_R8_UNORM,
1462 VP9_BTI_BRC_SRCY4X_G9
1465 gen9_add_adv_gpe_surface(ctx, gpe_context,
1467 VP9_BTI_BRC_VME_COARSE_INTRA_G9);
1469 gen9_add_buffer_2d_gpe_surface(ctx,
1471 &vme_context->s4x_memv_distortion_buffer,
1473 I965_SURFACEFORMAT_R8_UNORM,
1474 VP9_BTI_BRC_DISTORTION_G9);
1479 /* The function related with BRC */
1481 gen9_vp9_brc_intra_dist_kernel(VADriverContextP ctx,
1482 struct encode_state *encode_state,
1483 struct intel_encoder_context *encoder_context)
1485 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1486 struct vp9_brc_context *brc_context = &vme_context->brc_context;
1487 struct i965_gpe_context *gpe_context;
1488 int gpe_index = VP9_BRC_INTRA_DIST;
1489 int media_function = VP9_MEDIA_STATE_ENC_I_FRAME_DIST;
1490 struct gen9_vp9_brc_curbe_param brc_intra_dist_curbe;
1491 VAEncPictureParameterBufferVP9 *pic_param;
1492 struct gen9_vp9_state *vp9_state;
1493 struct gpe_media_object_walker_parameter media_object_walker_param;
1494 struct vp9_encoder_kernel_walker_parameter kernel_walker_param;
1496 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1498 if (!vp9_state || !vp9_state->pic_param)
1499 return VA_STATUS_ERROR_INVALID_PARAMETER;
1501 pic_param = vp9_state->pic_param;
1503 gpe_context = &brc_context->gpe_contexts[gpe_index];
1505 gen8_gpe_context_init(ctx, gpe_context);
1506 gen9_gpe_reset_binding_table(ctx, gpe_context);
1508 brc_intra_dist_curbe.media_state_type = media_function;
1509 brc_intra_dist_curbe.curr_frame = pic_param->reconstructed_frame;
1510 brc_intra_dist_curbe.ppic_param = vp9_state->pic_param;
1511 brc_intra_dist_curbe.pseq_param = vp9_state->seq_param;
1512 brc_intra_dist_curbe.psegment_param = vp9_state->segment_param;
1513 brc_intra_dist_curbe.frame_width = vp9_state->frame_width;
1514 brc_intra_dist_curbe.frame_height = vp9_state->frame_height;
1515 brc_intra_dist_curbe.pbrc_init_current_target_buf_full_in_bits =
1516 &vp9_state->brc_init_current_target_buf_full_in_bits;
1517 brc_intra_dist_curbe.pbrc_init_reset_buf_size_in_bits =
1518 &vp9_state->brc_init_reset_buf_size_in_bits;
1519 brc_intra_dist_curbe.pbrc_init_reset_input_bits_per_frame =
1520 &vp9_state->brc_init_reset_input_bits_per_frame;
1521 brc_intra_dist_curbe.picture_coding_type = vp9_state->picture_coding_type;
1522 brc_intra_dist_curbe.initbrc = !vp9_state->brc_inited;
1523 brc_intra_dist_curbe.mbbrc_enabled = 0;
1524 brc_intra_dist_curbe.ref_frame_flag = vp9_state->ref_frame_flag;
1526 vme_context->pfn_set_curbe_brc(ctx, encode_state,
1529 &brc_intra_dist_curbe);
1531 /* zero distortion buffer */
1532 i965_zero_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
1534 gen9_brc_intra_dist_add_surfaces_vp9(ctx, encode_state, encoder_context, gpe_context);
1535 gen8_gpe_setup_interface_data(ctx, gpe_context);
1537 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
1538 kernel_walker_param.resolution_x = vme_context->downscaled_width_in_mb4x;
1539 kernel_walker_param.resolution_y = vme_context->downscaled_height_in_mb4x;
1540 kernel_walker_param.no_dependency = 1;
1542 gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
1544 gen9_run_kernel_media_object_walker(ctx, encoder_context,
1547 &media_object_walker_param);
1549 return VA_STATUS_SUCCESS;
1553 intel_vp9enc_construct_picstate_batchbuf(VADriverContextP ctx,
1554 struct encode_state *encode_state,
1555 struct intel_encoder_context *encoder_context,
1556 struct i965_gpe_resource *gpe_resource)
1558 struct gen9_vp9_state *vp9_state;
1559 VAEncPictureParameterBufferVP9 *pic_param;
1560 int frame_width_minus1, frame_height_minus1;
1561 int is_lossless = 0;
1562 int is_intra_only = 0;
1563 unsigned int last_frame_type;
1564 unsigned int ref_flags;
1565 unsigned int use_prev_frame_mvs, adapt_flag;
1566 struct gen9_surface_vp9 *vp9_surface = NULL;
1567 struct object_surface *obj_surface = NULL;
1568 uint32_t scale_h = 0;
1569 uint32_t scale_w = 0;
1573 unsigned int *cmd_ptr, cmd_value, tmp;
1575 pdata = i965_map_gpe_resource(gpe_resource);
1576 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1578 if (!vp9_state || !vp9_state->pic_param || !pdata)
1581 pic_param = vp9_state->pic_param;
1582 frame_width_minus1 = ALIGN(pic_param->frame_width_dst, 8) - 1;
1583 frame_height_minus1 = ALIGN(pic_param->frame_height_dst, 8) - 1;
1584 if ((pic_param->luma_ac_qindex == 0) &&
1585 (pic_param->luma_dc_qindex_delta == 0) &&
1586 (pic_param->chroma_ac_qindex_delta == 0) &&
1587 (pic_param->chroma_dc_qindex_delta == 0))
1590 if (pic_param->pic_flags.bits.frame_type)
1591 is_intra_only = pic_param->pic_flags.bits.intra_only;
1593 last_frame_type = vp9_state->vp9_last_frame.frame_type;
1595 use_prev_frame_mvs = 0;
1596 if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) {
1597 last_frame_type = 0;
1600 ref_flags = ((pic_param->ref_flags.bits.ref_arf_sign_bias << 9) |
1601 (pic_param->ref_flags.bits.ref_gf_sign_bias << 8) |
1602 (pic_param->ref_flags.bits.ref_last_sign_bias << 7)
1604 if (!pic_param->pic_flags.bits.error_resilient_mode &&
1605 (pic_param->frame_width_dst == vp9_state->vp9_last_frame.frame_width) &&
1606 (pic_param->frame_height_dst == vp9_state->vp9_last_frame.frame_height) &&
1607 !pic_param->pic_flags.bits.intra_only &&
1608 vp9_state->vp9_last_frame.show_frame &&
1609 ((vp9_state->vp9_last_frame.frame_type == HCP_VP9_INTER_FRAME) &&
1610 !vp9_state->vp9_last_frame.intra_only)
1612 use_prev_frame_mvs = 1;
1615 if (!pic_param->pic_flags.bits.error_resilient_mode &&
1616 !pic_param->pic_flags.bits.frame_parallel_decoding_mode)
1619 for (i = 0; i < 4; i++) {
1620 uint32_t non_first_pass;
1625 cmd_ptr =(unsigned int *)(pdata + i * VP9_PIC_STATE_BUFFER_SIZE);
1627 *cmd_ptr++ = (HCP_VP9_PIC_STATE | (33 - 2));
1628 *cmd_ptr++ = (frame_height_minus1 << 16 |
1629 frame_width_minus1);
1631 *cmd_ptr++ = ( 0 << 31 | /* disable segment_in */
1632 0 << 30 | /* disable segment_out */
1633 is_lossless << 29 | /* loseless */
1634 (pic_param->pic_flags.bits.segmentation_enabled && pic_param->pic_flags.bits.segmentation_temporal_update) << 28 | /* temporal update */
1635 (pic_param->pic_flags.bits.segmentation_enabled && pic_param->pic_flags.bits.segmentation_update_map) << 27 | /* temporal update */
1636 (pic_param->pic_flags.bits.segmentation_enabled << 26) |
1637 (pic_param->sharpness_level << 23) |
1638 (pic_param->filter_level << 17) |
1639 (pic_param->pic_flags.bits.frame_parallel_decoding_mode << 16) |
1640 (pic_param->pic_flags.bits.error_resilient_mode << 15) |
1641 (pic_param->pic_flags.bits.refresh_frame_context << 14) |
1642 (last_frame_type << 13) |
1643 (vp9_state->tx_mode == TX_MODE_SELECT) << 12 |
1644 (pic_param->pic_flags.bits.comp_prediction_mode == REFERENCE_MODE_SELECT) << 11 |
1645 (use_prev_frame_mvs) << 10 |
1647 (pic_param->pic_flags.bits.mcomp_filter_type << 4) |
1648 (pic_param->pic_flags.bits.allow_high_precision_mv << 3) |
1649 (is_intra_only << 2) |
1651 (pic_param->pic_flags.bits.frame_type) << 0);
1653 *cmd_ptr++ =((0 << 28) | /* VP9Profile0 */
1654 (0 << 24) | /* 8-bit depth */
1655 (0 << 22) | /* only 420 format */
1656 (0 << 0) | /* sse statistics */
1657 (pic_param->log2_tile_rows << 8) |
1658 (pic_param->log2_tile_columns << 0));
1661 if (pic_param->pic_flags.bits.frame_type &&
1662 !pic_param->pic_flags.bits.intra_only) {
1663 for (j = 0; j < 3; j++) {
1664 obj_surface = encode_state->reference_objects[j];
1667 if (obj_surface && obj_surface->private_data) {
1668 vp9_surface = obj_surface->private_data;
1669 scale_w = (vp9_surface->frame_width << 14) / pic_param->frame_width_dst;
1670 scale_h = (vp9_surface->frame_height << 14) / pic_param->frame_height_dst;
1671 *cmd_ptr++ = (scale_w << 16 |
1682 for(j = 0; j < 3; j++) {
1683 obj_surface = encode_state->reference_objects[j];
1686 if (obj_surface && obj_surface->private_data) {
1687 vp9_surface = obj_surface->private_data;
1688 *cmd_ptr++ = (vp9_surface->frame_height - 1) << 16 |
1689 (vp9_surface->frame_width - 1);
1696 *cmd_ptr++ = (1 << 1);
1700 *cmd_ptr++ = ((1 << 25) | /* header insertation for VP9 */
1701 (0 << 24) | /* tail insertation */
1702 (pic_param->luma_ac_qindex << 16) |
1703 0 /* compressed header bin count */);
1706 tmp = intel_convert_sign_mag(pic_param->luma_dc_qindex_delta, 5);
1707 cmd_value = (tmp << 16);
1708 tmp = intel_convert_sign_mag(pic_param->chroma_dc_qindex_delta, 5);
1709 cmd_value |= (tmp << 8);
1710 tmp = intel_convert_sign_mag(pic_param->chroma_ac_qindex_delta, 5);
1712 *cmd_ptr++ = cmd_value;
1714 tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[0], 7);
1716 tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[1], 7);
1717 cmd_value |= (tmp << 8);
1718 tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[2], 7);
1719 cmd_value |= (tmp << 16);
1720 tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[3], 7);
1721 cmd_value |= (tmp << 24);
1722 *cmd_ptr++ = cmd_value;
1725 tmp = intel_convert_sign_mag(pic_param->mode_lf_delta[0], 7);
1727 tmp = intel_convert_sign_mag(pic_param->mode_lf_delta[1], 7);
1728 cmd_value |= (tmp << 8);
1729 *cmd_ptr++ = cmd_value;
1732 *cmd_ptr++ = vp9_state->frame_header.bit_offset_ref_lf_delta |
1733 (vp9_state->frame_header.bit_offset_mode_lf_delta << 16);
1734 *cmd_ptr++ = vp9_state->frame_header.bit_offset_qindex |
1735 (vp9_state->frame_header.bit_offset_lf_level << 16);
1738 *cmd_ptr++ = (1 << 26 | (1 << 25) |
1739 non_first_pass << 16);
1741 *cmd_ptr++ = (1 << 31) | (256);
1744 *cmd_ptr++ = (0 << 31) | 1;
1746 /* dw22-dw24. Frame_delta_qindex_range */
1751 /* dw25-26. frame_delta_lf_range */
1755 /* dw27. frame_delta_lf_min */
1764 *cmd_ptr++ = (0 << 30) | 1;
1766 *cmd_ptr++ = vp9_state->frame_header.bit_offset_first_partition_size;
1769 *cmd_ptr++ = MI_BATCH_BUFFER_END;
1772 i965_unmap_gpe_resource(gpe_resource);
1776 gen9_brc_update_add_surfaces_vp9(VADriverContextP ctx,
1777 struct encode_state *encode_state,
1778 struct intel_encoder_context *encoder_context,
1779 struct i965_gpe_context *brc_gpe_context,
1780 struct i965_gpe_context *mbenc_gpe_context)
1782 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1784 /* 0. BRC history buffer */
1785 gen9_add_buffer_gpe_surface(ctx,
1787 &vme_context->res_brc_history_buffer,
1789 vme_context->res_brc_history_buffer.size,
1791 VP9_BTI_BRC_HISTORY_G9);
1793 /* 1. Constant data buffer */
1794 gen9_add_buffer_gpe_surface(ctx,
1796 &vme_context->res_brc_const_data_buffer,
1798 vme_context->res_brc_const_data_buffer.size,
1800 VP9_BTI_BRC_CONSTANT_DATA_G9);
1802 /* 2. Distortion 2D surface buffer */
1803 gen9_add_buffer_2d_gpe_surface(ctx,
1805 &vme_context->s4x_memv_distortion_buffer,
1807 I965_SURFACEFORMAT_R8_UNORM,
1808 VP9_BTI_BRC_DISTORTION_G9);
1811 gen9_add_buffer_gpe_surface(ctx,
1813 &vme_context->res_brc_mmdk_pak_buffer,
1815 vme_context->res_brc_mmdk_pak_buffer.size,
1817 VP9_BTI_BRC_MMDK_PAK_OUTPUT_G9);
1818 /* 4. Mbenc curbe input buffer */
1819 gen9_add_dri_buffer_gpe_surface(ctx,
1821 mbenc_gpe_context->curbe.bo,
1823 ALIGN(mbenc_gpe_context->curbe.length, 64),
1824 mbenc_gpe_context->curbe.offset,
1825 VP9_BTI_BRC_MBENC_CURBE_INPUT_G9);
1826 /* 5. Mbenc curbe output buffer */
1827 gen9_add_dri_buffer_gpe_surface(ctx,
1829 mbenc_gpe_context->curbe.bo,
1831 ALIGN(mbenc_gpe_context->curbe.length, 64),
1832 mbenc_gpe_context->curbe.offset,
1833 VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9);
1835 /* 6. BRC_PIC_STATE read buffer */
1836 gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1837 &vme_context->res_pic_state_brc_read_buffer,
1839 vme_context->res_pic_state_brc_read_buffer.size,
1841 VP9_BTI_BRC_PIC_STATE_INPUT_G9);
1843 /* 7. BRC_PIC_STATE write buffer */
1844 gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1845 &vme_context->res_pic_state_brc_write_hfw_read_buffer,
1847 vme_context->res_pic_state_brc_write_hfw_read_buffer.size,
1849 VP9_BTI_BRC_PIC_STATE_OUTPUT_G9);
1851 /* 8. SEGMENT_STATE read buffer */
1852 gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1853 &vme_context->res_seg_state_brc_read_buffer,
1855 vme_context->res_seg_state_brc_read_buffer.size,
1857 VP9_BTI_BRC_SEGMENT_STATE_INPUT_G9);
1859 /* 9. SEGMENT_STATE write buffer */
1860 gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1861 &vme_context->res_seg_state_brc_write_buffer,
1863 vme_context->res_seg_state_brc_write_buffer.size,
1865 VP9_BTI_BRC_SEGMENT_STATE_OUTPUT_G9);
1867 /* 10. Bitstream size buffer */
1868 gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1869 &vme_context->res_brc_bitstream_size_buffer,
1871 vme_context->res_brc_bitstream_size_buffer.size,
1873 VP9_BTI_BRC_BITSTREAM_SIZE_G9);
1875 gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1876 &vme_context->res_brc_hfw_data_buffer,
1878 vme_context->res_brc_hfw_data_buffer.size,
1880 VP9_BTI_BRC_HFW_DATA_G9);
1886 gen9_vp9_brc_update_kernel(VADriverContextP ctx,
1887 struct encode_state *encode_state,
1888 struct intel_encoder_context *encoder_context)
1890 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1891 struct vp9_brc_context *brc_context = &vme_context->brc_context;
1892 struct i965_gpe_context *brc_gpe_context, *mbenc_gpe_context;
1893 int mbenc_index, gpe_index = VP9_BRC_UPDATE;
1894 int media_function = VP9_MEDIA_STATE_BRC_UPDATE;
1896 struct gen9_vp9_brc_curbe_param brc_update_curbe_param;
1897 VAEncPictureParameterBufferVP9 *pic_param;
1898 struct gen9_vp9_state *vp9_state;
1899 struct gen9_vp9_mbenc_curbe_param mbenc_curbe_param;
1900 struct gpe_media_object_parameter media_object_param;
1902 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1903 if (!vp9_state || !vp9_state->pic_param)
1904 return VA_STATUS_ERROR_INVALID_PARAMETER;
1906 pic_param = vp9_state->pic_param;
1907 // Setup VP9 MbEnc Curbe
1908 if (vp9_state->picture_coding_type) {
1909 mbenc_function = VP9_MEDIA_STATE_MBENC_P;
1910 mbenc_index = VP9_MBENC_IDX_INTER;
1912 mbenc_function = VP9_MEDIA_STATE_MBENC_I_32x32;
1913 mbenc_index = VP9_MBENC_IDX_KEY_32x32;
1916 mbenc_gpe_context = &(vme_context->mbenc_context.gpe_contexts[mbenc_index]);
1918 memset(&mbenc_curbe_param, 0, sizeof(mbenc_curbe_param));
1920 mbenc_curbe_param.ppic_param = vp9_state->pic_param;
1921 mbenc_curbe_param.pseq_param = vp9_state->seq_param;
1922 mbenc_curbe_param.psegment_param = vp9_state->segment_param;
1923 //mbenc_curbe_param.ppRefList = &(vp9_state->pRefList[0]);
1924 mbenc_curbe_param.last_ref_obj = vp9_state->last_ref_obj;
1925 mbenc_curbe_param.golden_ref_obj = vp9_state->golden_ref_obj;
1926 mbenc_curbe_param.alt_ref_obj = vp9_state->alt_ref_obj;
1927 mbenc_curbe_param.frame_width_in_mb = ALIGN(vp9_state->frame_width, 16) / 16;
1928 mbenc_curbe_param.frame_height_in_mb = ALIGN(vp9_state->frame_height, 16) / 16;
1929 mbenc_curbe_param.hme_enabled = vp9_state->hme_enabled;
1930 mbenc_curbe_param.ref_frame_flag = vp9_state->ref_frame_flag;
1931 mbenc_curbe_param.multi_ref_qp_check = vp9_state->multi_ref_qp_check;
1932 mbenc_curbe_param.picture_coding_type = vp9_state->picture_coding_type;
1933 mbenc_curbe_param.media_state_type = mbenc_function;
1935 vme_context->pfn_set_curbe_mbenc(ctx, encode_state,
1938 &mbenc_curbe_param);
1940 vp9_state->mbenc_curbe_set_in_brc_update = true;
1942 brc_gpe_context = &brc_context->gpe_contexts[gpe_index];
1944 gen8_gpe_context_init(ctx, brc_gpe_context);
1945 gen9_gpe_reset_binding_table(ctx, brc_gpe_context);
1947 memset(&brc_update_curbe_param, 0, sizeof(brc_update_curbe_param));
1949 // Setup BRC Update Curbe
1950 brc_update_curbe_param.media_state_type = media_function;
1951 brc_update_curbe_param.curr_frame = pic_param->reconstructed_frame;
1952 brc_update_curbe_param.ppic_param = vp9_state->pic_param;
1953 brc_update_curbe_param.pseq_param = vp9_state->seq_param;
1954 brc_update_curbe_param.psegment_param = vp9_state->segment_param;
1955 brc_update_curbe_param.picture_coding_type = vp9_state->picture_coding_type;
1956 brc_update_curbe_param.frame_width_in_mb = ALIGN(vp9_state->frame_width, 16) / 16;
1957 brc_update_curbe_param.frame_height_in_mb = ALIGN(vp9_state->frame_height, 16) / 16;
1958 brc_update_curbe_param.hme_enabled = vp9_state->hme_enabled;
1959 brc_update_curbe_param.b_used_ref = 1;
1960 brc_update_curbe_param.frame_number = vp9_state->frame_number;
1961 brc_update_curbe_param.ref_frame_flag = vp9_state->ref_frame_flag;
1962 brc_update_curbe_param.mbbrc_enabled = 0;
1963 brc_update_curbe_param.multi_ref_qp_check = vp9_state->multi_ref_qp_check;
1964 brc_update_curbe_param.brc_num_pak_passes = vp9_state->num_pak_passes;
1966 brc_update_curbe_param.pbrc_init_current_target_buf_full_in_bits =
1967 &vp9_state->brc_init_current_target_buf_full_in_bits;
1968 brc_update_curbe_param.pbrc_init_reset_buf_size_in_bits =
1969 &vp9_state->brc_init_reset_buf_size_in_bits;
1970 brc_update_curbe_param.pbrc_init_reset_input_bits_per_frame =
1971 &vp9_state->brc_init_reset_input_bits_per_frame;
1973 vme_context->pfn_set_curbe_brc(ctx, encode_state,
1976 &brc_update_curbe_param);
1979 // Check if the constant data surface is present
1980 if (vp9_state->brc_constant_buffer_supported)
1982 char *brc_const_buffer;
1983 brc_const_buffer = i965_map_gpe_resource(&vme_context->res_brc_const_data_buffer);
1985 if (!brc_const_buffer)
1986 return VA_STATUS_ERROR_OPERATION_FAILED;
1988 if (vp9_state->picture_coding_type)
1989 memcpy(brc_const_buffer, vp9_brc_const_data_p_g9,
1990 sizeof(vp9_brc_const_data_p_g9));
1992 memcpy(brc_const_buffer, vp9_brc_const_data_i_g9,
1993 sizeof(vp9_brc_const_data_i_g9));
1995 i965_unmap_gpe_resource(&vme_context->res_brc_const_data_buffer);
1998 if (pic_param->pic_flags.bits.segmentation_enabled)
2000 //reallocate the vme_state->mb_segment_map_surface
2001 /* this will be added later */
2005 pic_param->filter_level = 0;
2006 // clear the filter level value in picParams ebfore programming pic state, as this value will be determined and updated by BRC.
2007 intel_vp9enc_construct_picstate_batchbuf(ctx, encode_state,
2008 encoder_context, &vme_context->res_pic_state_brc_read_buffer);
2011 gen9_brc_update_add_surfaces_vp9(ctx, encode_state,
2016 gen8_gpe_setup_interface_data(ctx, brc_gpe_context);
2017 memset(&media_object_param, 0, sizeof(media_object_param));
2018 gen9_run_kernel_media_object(ctx, encoder_context,
2021 &media_object_param);
2022 return VA_STATUS_SUCCESS;
2026 void gen9_vp9_set_curbe_me(VADriverContextP ctx,
2027 struct encode_state *encode_state,
2028 struct i965_gpe_context *gpe_context,
2029 struct intel_encoder_context *encoder_context,
2030 struct gen9_vp9_me_curbe_param *param)
2032 vp9_me_curbe_data *me_cmd;
2033 int enc_media_state;
2035 unsigned int width, height;
2036 uint32_t l0_ref_frames;
2037 uint32_t scale_factor;
2039 if (param->b16xme_enabled) {
2040 if (param->use_16x_me)
2041 me_mode = VP9_ENC_ME16X_BEFORE_ME4X;
2043 me_mode = VP9_ENC_ME4X_AFTER_ME16X;
2045 me_mode = VP9_ENC_ME4X_ONLY;
2048 if (me_mode == VP9_ENC_ME16X_BEFORE_ME4X)
2053 if (param->use_16x_me)
2054 enc_media_state = VP9_MEDIA_STATE_16X_ME;
2056 enc_media_state = VP9_MEDIA_STATE_4X_ME;
2058 me_cmd = i965_gpe_context_map_curbe(gpe_context);
2063 memset(me_cmd, 0, sizeof(vp9_me_curbe_data));
2065 me_cmd->dw1.max_num_mvs = 0x10;
2066 me_cmd->dw1.bi_weight = 0x00;
2068 me_cmd->dw2.max_num_su = 0x39;
2069 me_cmd->dw2.max_len_sp = 0x39;
2071 me_cmd->dw3.sub_mb_part_mask = 0x77;
2072 me_cmd->dw3.inter_sad = 0x00;
2073 me_cmd->dw3.intra_sad = 0x00;
2074 me_cmd->dw3.bme_disable_fbr = 0x01;
2075 me_cmd->dw3.sub_pel_mode = 0x03;
2077 width = param->frame_width / scale_factor;
2078 height = param->frame_height / scale_factor;
2080 me_cmd->dw4.picture_width = ALIGN(width, 16) / 16;
2081 me_cmd->dw4.picture_height_minus1 = ALIGN(height, 16) / 16 - 1;
2083 me_cmd->dw5.ref_width = 0x30;
2084 me_cmd->dw5.ref_height = 0x28;
2086 if (enc_media_state == VP9_MEDIA_STATE_4X_ME)
2087 me_cmd->dw6.write_distortions = 0x01;
2089 me_cmd->dw6.use_mv_from_prev_step = me_mode == VP9_ENC_ME4X_AFTER_ME16X ? 1 : 0;
2090 me_cmd->dw6.super_combine_dist = 0x5;
2091 me_cmd->dw6.max_vmvr = 0x7fc;
2093 l0_ref_frames = (param->ref_frame_flag & 0x01) +
2094 !!(param->ref_frame_flag & 0x02) +
2095 !!(param->ref_frame_flag & 0x04);
2096 me_cmd->dw13.num_ref_idx_l0_minus1 = (l0_ref_frames > 0) ? l0_ref_frames - 1 : 0;
2097 me_cmd->dw13.num_ref_idx_l1_minus1 = 0;
2099 me_cmd->dw14.l0_ref_pic_polarity_bits = 0;
2100 me_cmd->dw14.l1_ref_pic_polarity_bits = 0;
2102 me_cmd->dw15.mv_shift_factor = 0x02;
2105 memcpy((void *)((char *)me_cmd + 64),
2106 vp9_diamond_ime_search_path_delta,
2107 sizeof(vp9_diamond_ime_search_path_delta));
2111 me_cmd->dw32._4x_memv_output_data_surf_index = VP9_BTI_ME_MV_DATA_SURFACE;
2112 me_cmd->dw33._16x_32x_memv_input_data_surf_index = VP9_BTI_16XME_MV_DATA_SURFACE;
2113 me_cmd->dw34._4x_me_output_dist_surf_index = VP9_BTI_ME_DISTORTION_SURFACE;
2114 me_cmd->dw35._4x_me_output_brc_dist_surf_index = VP9_BTI_ME_BRC_DISTORTION_SURFACE;
2115 me_cmd->dw36.vme_fwd_inter_pred_surf_index = VP9_BTI_ME_CURR_PIC_L0;
2116 me_cmd->dw37.vme_bdw_inter_pred_surf_index = VP9_BTI_ME_CURR_PIC_L1;
2118 i965_gpe_context_unmap_curbe(gpe_context);
2122 gen9_vp9_send_me_surface(VADriverContextP ctx,
2123 struct encode_state *encode_state,
2124 struct i965_gpe_context *gpe_context,
2125 struct intel_encoder_context *encoder_context,
2126 struct gen9_vp9_me_surface_param *param)
2128 struct i965_driver_data *i965 = i965_driver_data(ctx);
2129 struct object_surface *obj_surface;
2130 struct gen9_surface_vp9 *vp9_priv_surface;
2131 struct object_surface *input_surface;
2132 struct i965_gpe_resource *gpe_resource;
2135 obj_surface = SURFACE(param->curr_pic);
2137 if (!obj_surface || !obj_surface->private_data)
2140 vp9_priv_surface = obj_surface->private_data;
2141 if (param->use_16x_me)
2143 gpe_resource = param->pres_16x_memv_data_buffer;
2147 gpe_resource = param->pres_4x_memv_data_buffer;
2150 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
2153 I965_SURFACEFORMAT_R8_UNORM,
2154 VP9_BTI_ME_MV_DATA_SURFACE);
2156 if (param->b16xme_enabled) {
2157 gpe_resource = param->pres_16x_memv_data_buffer;
2158 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
2161 I965_SURFACEFORMAT_R8_UNORM,
2162 VP9_BTI_16XME_MV_DATA_SURFACE);
2165 if (!param->use_16x_me) {
2166 gpe_resource = param->pres_me_brc_distortion_buffer;
2168 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
2171 I965_SURFACEFORMAT_R8_UNORM,
2172 VP9_BTI_ME_BRC_DISTORTION_SURFACE);
2174 gpe_resource = param->pres_me_distortion_buffer;
2176 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
2179 I965_SURFACEFORMAT_R8_UNORM,
2180 VP9_BTI_ME_DISTORTION_SURFACE);
2183 if (param->use_16x_me)
2184 input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2186 input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2188 gen9_add_adv_gpe_surface(ctx, gpe_context,
2190 VP9_BTI_ME_CURR_PIC_L0);
2192 ref_bti = VP9_BTI_ME_CURR_PIC_L0 + 1;
2195 if (param->last_ref_pic) {
2196 obj_surface = param->last_ref_pic;
2197 vp9_priv_surface = obj_surface->private_data;
2199 if (param->use_16x_me)
2200 input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2202 input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2204 if (param->dys_enabled &&
2205 ((vp9_priv_surface->frame_width != param->frame_width) ||
2206 (vp9_priv_surface->frame_height != param->frame_height))) {
2207 if (param->use_16x_me)
2208 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2210 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2212 gen9_add_adv_gpe_surface(ctx, gpe_context,
2215 gen9_add_adv_gpe_surface(ctx, gpe_context,
2221 if (param->golden_ref_pic) {
2222 obj_surface = param->golden_ref_pic;
2223 vp9_priv_surface = obj_surface->private_data;
2225 if (param->use_16x_me)
2226 input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2228 input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2230 if (param->dys_enabled &&
2231 ((vp9_priv_surface->frame_width != param->frame_width) ||
2232 (vp9_priv_surface->frame_height != param->frame_height))) {
2233 if (param->use_16x_me)
2234 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2236 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2239 gen9_add_adv_gpe_surface(ctx, gpe_context,
2242 gen9_add_adv_gpe_surface(ctx, gpe_context,
2248 if (param->alt_ref_pic) {
2249 obj_surface = param->alt_ref_pic;
2250 vp9_priv_surface = obj_surface->private_data;
2252 if (param->use_16x_me)
2253 input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2255 input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2257 if (param->dys_enabled &&
2258 ((vp9_priv_surface->frame_width != param->frame_width) ||
2259 (vp9_priv_surface->frame_height != param->frame_height))) {
2260 if (param->use_16x_me)
2261 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2263 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2265 gen9_add_adv_gpe_surface(ctx, gpe_context,
2268 gen9_add_adv_gpe_surface(ctx, gpe_context,
2278 void gen9_me_add_surfaces_vp9(VADriverContextP ctx,
2279 struct encode_state *encode_state,
2280 struct intel_encoder_context *encoder_context,
2281 struct i965_gpe_context *gpe_context,
2284 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2285 struct gen9_vp9_me_surface_param me_surface_param;
2286 struct gen9_vp9_state *vp9_state;
2288 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
2290 /* sScaled4xSurface surface */
2291 memset(&me_surface_param, 0, sizeof(me_surface_param));
2292 me_surface_param.last_ref_pic = vp9_state->last_ref_obj;
2293 me_surface_param.golden_ref_pic = vp9_state->golden_ref_obj;
2294 me_surface_param.alt_ref_pic = vp9_state->alt_ref_obj;
2295 me_surface_param.curr_pic = vp9_state->curr_frame;
2296 me_surface_param.pres_4x_memv_data_buffer = &vme_context->s4x_memv_data_buffer;
2297 me_surface_param.pres_16x_memv_data_buffer = &vme_context->s16x_memv_data_buffer;
2298 me_surface_param.pres_me_distortion_buffer = &vme_context->s4x_memv_distortion_buffer;
2299 me_surface_param.pres_me_brc_distortion_buffer = &vme_context->s4x_memv_distortion_buffer;
2302 me_surface_param.downscaled_width_in_mb = vp9_state->downscaled_width_16x_in_mb;
2303 me_surface_param.downscaled_height_in_mb = vp9_state->downscaled_height_16x_in_mb;
2305 me_surface_param.downscaled_width_in_mb = vp9_state->downscaled_width_4x_in_mb;
2306 me_surface_param.downscaled_height_in_mb = vp9_state->downscaled_height_4x_in_mb;
2308 me_surface_param.frame_width = vp9_state->frame_width;
2309 me_surface_param.frame_height = vp9_state->frame_height;
2311 me_surface_param.use_16x_me = use_16x_me;
2312 me_surface_param.b16xme_enabled = vp9_state->b16xme_enabled;
2313 me_surface_param.dys_enabled = vp9_state->dys_in_use;
2315 vme_context->pfn_send_me_surface(ctx, encode_state,
2323 gen9_vp9_me_kernel(VADriverContextP ctx,
2324 struct encode_state *encode_state,
2325 struct intel_encoder_context *encoder_context,
2328 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2329 struct i965_gpe_context *gpe_context;
2331 struct gen9_vp9_me_curbe_param me_curbe_param;
2332 struct gen9_vp9_state *vp9_state;
2333 struct gpe_media_object_walker_parameter media_object_walker_param;
2334 struct vp9_encoder_kernel_walker_parameter kernel_walker_param;
2336 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2337 if (!vp9_state || !vp9_state->pic_param)
2338 return VA_STATUS_ERROR_INVALID_PARAMETER;
2341 media_function = VP9_MEDIA_STATE_16X_ME;
2343 media_function = VP9_MEDIA_STATE_4X_ME;
2345 gpe_context = &(vme_context->me_context.gpe_context);
2347 gen8_gpe_context_init(ctx, gpe_context);
2348 gen9_gpe_reset_binding_table(ctx, gpe_context);
2350 memset(&me_curbe_param, 0, sizeof(me_curbe_param));
2351 me_curbe_param.ppic_param = vp9_state->pic_param;
2352 me_curbe_param.pseq_param = vp9_state->seq_param;
2353 me_curbe_param.frame_width = vp9_state->frame_width;
2354 me_curbe_param.frame_height = vp9_state->frame_height;
2355 me_curbe_param.ref_frame_flag = vp9_state->ref_frame_flag;
2356 me_curbe_param.use_16x_me = use_16x_me;
2357 me_curbe_param.b16xme_enabled = vp9_state->b16xme_enabled;
2358 vme_context->pfn_set_curbe_me(ctx, encode_state,
2363 gen9_me_add_surfaces_vp9(ctx, encode_state,
2368 gen8_gpe_setup_interface_data(ctx, gpe_context);
2370 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2372 kernel_walker_param.resolution_x = vp9_state->downscaled_width_16x_in_mb;
2373 kernel_walker_param.resolution_y = vp9_state->downscaled_height_16x_in_mb;
2375 kernel_walker_param.resolution_x = vp9_state->downscaled_width_4x_in_mb;
2376 kernel_walker_param.resolution_y = vp9_state->downscaled_height_4x_in_mb;
2378 kernel_walker_param.no_dependency = 1;
2380 gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2382 gen9_run_kernel_media_object_walker(ctx, encoder_context,
2385 &media_object_walker_param);
2387 return VA_STATUS_SUCCESS;
2391 gen9_vp9_set_curbe_scaling_cm(VADriverContextP ctx,
2392 struct encode_state *encode_state,
2393 struct i965_gpe_context *gpe_context,
2394 struct intel_encoder_context *encoder_context,
2395 struct gen9_vp9_scaling_curbe_param *curbe_param)
2397 vp9_scaling4x_curbe_data_cm *curbe_cmd;
2399 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2404 memset(curbe_cmd, 0, sizeof(vp9_scaling4x_curbe_data_cm));
2406 curbe_cmd->dw0.input_picture_width = curbe_param->input_picture_width;
2407 curbe_cmd->dw0.input_picture_height = curbe_param->input_picture_height;
2409 curbe_cmd->dw1.input_y_bti = VP9_BTI_SCALING_FRAME_SRC_Y;
2410 curbe_cmd->dw2.output_y_bti = VP9_BTI_SCALING_FRAME_DST_Y;
2413 curbe_cmd->dw6.enable_mb_variance_output = 0;
2414 curbe_cmd->dw6.enable_mb_pixel_average_output = 0;
2415 curbe_cmd->dw6.enable_blk8x8_stat_output = 0;
2417 if (curbe_param->mb_variance_output_enabled ||
2418 curbe_param->mb_pixel_average_output_enabled)
2420 curbe_cmd->dw10.mbv_proc_stat_bti = VP9_BTI_SCALING_FRAME_MBVPROCSTATS_DST_CM;
2423 i965_gpe_context_unmap_curbe(gpe_context);
2428 gen9_vp9_send_scaling_surface(VADriverContextP ctx,
2429 struct encode_state *encode_state,
2430 struct i965_gpe_context *gpe_context,
2431 struct intel_encoder_context *encoder_context,
2432 struct gen9_vp9_scaling_surface_param *scaling_surface_param)
2434 vp9_bti_scaling_offset *scaling_bti;
2435 unsigned int surface_format;
2437 scaling_bti = scaling_surface_param->p_scaling_bti;
2439 if (scaling_surface_param->scaling_out_use_32unorm_surf_fmt)
2440 surface_format = I965_SURFACEFORMAT_R32_UNORM;
2441 else if (scaling_surface_param->scaling_out_use_16unorm_surf_fmt)
2442 surface_format = I965_SURFACEFORMAT_R16_UNORM;
2444 surface_format = I965_SURFACEFORMAT_R8_UNORM;
2446 gen9_add_2d_gpe_surface(ctx, gpe_context,
2447 scaling_surface_param->input_surface,
2448 0, 1, surface_format,
2449 scaling_bti->scaling_frame_src_y);
2451 gen9_add_2d_gpe_surface(ctx, gpe_context,
2452 scaling_surface_param->output_surface,
2453 0, 1, surface_format,
2454 scaling_bti->scaling_frame_dst_y);
2461 gen9_vp9_scaling_kernel(VADriverContextP ctx,
2462 struct encode_state *encode_state,
2463 struct intel_encoder_context *encoder_context,
2464 int use_16x_scaling)
2466 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2467 struct i965_gpe_context *gpe_context;
2469 struct gen9_vp9_scaling_curbe_param scaling_curbe_param;
2470 struct gen9_vp9_scaling_surface_param scaling_surface_param;
2471 struct gen9_vp9_state *vp9_state;
2472 VAEncPictureParameterBufferVP9 *pic_param;
2473 struct gpe_media_object_walker_parameter media_object_walker_param;
2474 struct vp9_encoder_kernel_walker_parameter kernel_walker_param;
2475 struct object_surface *obj_surface;
2476 struct object_surface *input_surface, *output_surface;
2477 struct gen9_surface_vp9 *vp9_priv_surface;
2478 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
2479 unsigned int input_frame_width, input_frame_height;
2480 unsigned int output_frame_width, output_frame_height;
2482 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2483 if (!vp9_state || !vp9_state->pic_param)
2484 return VA_STATUS_ERROR_INVALID_PARAMETER;
2486 pic_param = vp9_state->pic_param;
2488 if (use_16x_scaling)
2489 media_function = VP9_MEDIA_STATE_16X_SCALING;
2491 media_function = VP9_MEDIA_STATE_4X_SCALING;
2493 gpe_context = &(vme_context->scaling_context.gpe_contexts[0]);
2495 gen8_gpe_context_init(ctx, gpe_context);
2496 gen9_gpe_reset_binding_table(ctx, gpe_context);
2498 obj_surface = encode_state->reconstructed_object;
2499 vp9_priv_surface = obj_surface->private_data;
2501 if (use_16x_scaling)
2503 downscaled_width_in_mb = vp9_state->downscaled_width_16x_in_mb;
2504 downscaled_height_in_mb = vp9_state->downscaled_height_16x_in_mb;
2506 input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2507 input_frame_width = vp9_state->frame_width_4x;
2508 input_frame_height = vp9_state->frame_height_4x;
2510 output_surface = vp9_priv_surface->scaled_16x_surface_obj;
2511 output_frame_width = vp9_state->frame_width_16x;
2512 output_frame_height = vp9_state->frame_height_16x;
2514 downscaled_width_in_mb = vp9_state->downscaled_width_4x_in_mb;
2515 downscaled_height_in_mb = vp9_state->downscaled_height_4x_in_mb;
2517 if (vp9_state->dys_in_use &&
2518 ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
2519 (pic_param->frame_height_src != pic_param->frame_height_dst)))
2520 input_surface = vp9_priv_surface->dys_surface_obj;
2522 input_surface = encode_state->input_yuv_object;
2524 input_frame_width = vp9_state->frame_width;
2525 input_frame_height = vp9_state->frame_height;
2527 output_surface = vp9_priv_surface->scaled_4x_surface_obj;
2528 output_frame_width = vp9_state->frame_width_4x;
2529 output_frame_height = vp9_state->frame_height_4x;
2532 memset(&scaling_curbe_param, 0, sizeof(scaling_curbe_param));
2534 scaling_curbe_param.input_picture_width = input_frame_width;
2535 scaling_curbe_param.input_picture_height = input_frame_height;
2537 scaling_curbe_param.use_16x_scaling = use_16x_scaling;
2538 scaling_curbe_param.use_32x_scaling = 0;
2540 if (use_16x_scaling)
2541 scaling_curbe_param.mb_variance_output_enabled = 0;
2543 scaling_curbe_param.mb_variance_output_enabled = vp9_state->adaptive_transform_decision_enabled;
2545 scaling_curbe_param.blk8x8_stat_enabled = 0;
2547 vme_context->pfn_set_curbe_scaling(ctx, encode_state,
2550 &scaling_curbe_param);
2552 memset(&scaling_surface_param, 0, sizeof(scaling_surface_param));
2553 scaling_surface_param.p_scaling_bti = (void *)(&vme_context->scaling_context.scaling_4x_bti);
2554 scaling_surface_param.input_surface = input_surface;
2555 scaling_surface_param.input_frame_width = input_frame_width;
2556 scaling_surface_param.input_frame_height = input_frame_height;
2558 scaling_surface_param.output_surface = output_surface;
2559 scaling_surface_param.output_frame_width = output_frame_width;
2560 scaling_surface_param.output_frame_height = output_frame_height;
2561 scaling_surface_param.scaling_out_use_16unorm_surf_fmt = 0;
2562 scaling_surface_param.scaling_out_use_32unorm_surf_fmt = 1;
2564 vme_context->pfn_send_scaling_surface(ctx, encode_state,
2567 &scaling_surface_param);
2569 gen8_gpe_setup_interface_data(ctx, gpe_context);
2571 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2572 /* the scaling is based on 8x8 blk level */
2573 kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
2574 kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
2575 kernel_walker_param.no_dependency = 1;
2577 gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2579 gen9_run_kernel_media_object_walker(ctx, encoder_context,
2582 &media_object_walker_param);
2584 return VA_STATUS_SUCCESS;
2588 gen9_vp9_dys_set_sampler_state(struct i965_gpe_context *gpe_context)
2590 struct gen9_sampler_8x8_avs *sampler_cmd;
2595 dri_bo_map(gpe_context->sampler.bo, 1);
2597 if (!gpe_context->sampler.bo->virtual)
2600 sampler_cmd = (struct gen9_sampler_8x8_avs *)
2601 (gpe_context->sampler.bo->virtual + gpe_context->sampler.offset);
2603 memset(sampler_cmd, 0, sizeof(struct gen9_sampler_8x8_avs));
2605 sampler_cmd->dw0.r3c_coefficient = 15;
2606 sampler_cmd->dw0.r3x_coefficient = 6;
2607 sampler_cmd->dw0.strong_edge_threshold = 8;
2608 sampler_cmd->dw0.weak_edge_threshold = 1;
2609 sampler_cmd->dw0.gain_factor = 32;
2611 sampler_cmd->dw2.r5c_coefficient = 3;
2612 sampler_cmd->dw2.r5cx_coefficient = 8;
2613 sampler_cmd->dw2.r5x_coefficient = 9;
2614 sampler_cmd->dw2.strong_edge_weight = 6;
2615 sampler_cmd->dw2.regular_weight = 3;
2616 sampler_cmd->dw2.non_edge_weight = 2;
2617 sampler_cmd->dw2.global_noise_estimation = 255;
2619 sampler_cmd->dw3.enable_8tap_adaptive_filter = 0;
2620 sampler_cmd->dw3.cos_alpha = 79;
2621 sampler_cmd->dw3.sin_alpha = 101;
2623 sampler_cmd->dw5.diamond_du = 0;
2624 sampler_cmd->dw5.hs_margin = 3;
2625 sampler_cmd->dw5.diamond_alpha = 100;
2627 sampler_cmd->dw7.inv_margin_vyl = 3300;
2629 sampler_cmd->dw8.inv_margin_vyu = 1600;
2631 sampler_cmd->dw10.y_slope2 = 24;
2632 sampler_cmd->dw10.s0l = 1792;
2634 sampler_cmd->dw12.y_slope1 = 24;
2636 sampler_cmd->dw14.s0u = 256;
2638 sampler_cmd->dw15.s2u = 1792;
2639 sampler_cmd->dw15.s1u = 0;
2641 memcpy(sampler_cmd->coefficients,
2642 &gen9_vp9_avs_coeffs[0],
2643 17 * sizeof(struct gen8_sampler_8x8_avs_coefficients));
2645 sampler_cmd->dw152.default_sharpness_level = 255;
2646 sampler_cmd->dw152.max_derivative_4_pixels = 7;
2647 sampler_cmd->dw152.max_derivative_8_pixels = 20;
2648 sampler_cmd->dw152.transition_area_with_4_pixels = 4;
2649 sampler_cmd->dw152.transition_area_with_8_pixels = 5;
2651 sampler_cmd->dw153.bypass_x_adaptive_filtering = 1;
2652 sampler_cmd->dw153.bypass_y_adaptive_filtering = 1;
2653 sampler_cmd->dw153.adaptive_filter_for_all_channel = 0;
2655 memcpy(sampler_cmd->extra_coefficients,
2656 &gen9_vp9_avs_coeffs[17 * 8],
2657 15 * sizeof(struct gen8_sampler_8x8_avs_coefficients));
2659 dri_bo_unmap(gpe_context->sampler.bo);
2663 gen9_vp9_set_curbe_dys(VADriverContextP ctx,
2664 struct encode_state *encode_state,
2665 struct i965_gpe_context *gpe_context,
2666 struct intel_encoder_context *encoder_context,
2667 struct gen9_vp9_dys_curbe_param *curbe_param)
2669 vp9_dys_curbe_data *curbe_cmd;
2671 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2676 memset(curbe_cmd, 0, sizeof(vp9_dys_curbe_data));
2678 curbe_cmd->dw0.input_frame_width = curbe_param->input_width;
2679 curbe_cmd->dw0.input_frame_height = curbe_param->input_height;
2681 curbe_cmd->dw1.output_frame_width = curbe_param->output_width;
2682 curbe_cmd->dw1.output_frame_height = curbe_param->output_height;
2684 curbe_cmd->dw2.delta_u = 1.0f / curbe_param->output_width;
2685 curbe_cmd->dw3.delta_v = 1.0f / curbe_param->output_height;
2687 curbe_cmd->dw16.input_frame_nv12_bti = VP9_BTI_DYS_INPUT_NV12;
2688 curbe_cmd->dw17.output_frame_y_bti = VP9_BTI_DYS_OUTPUT_Y;
2689 curbe_cmd->dw18.avs_sample_idx = 0;
2691 i965_gpe_context_unmap_curbe(gpe_context);
2695 gen9_vp9_send_dys_surface(VADriverContextP ctx,
2696 struct encode_state *encode_state,
2697 struct i965_gpe_context *gpe_context,
2698 struct intel_encoder_context *encoder_context,
2699 struct gen9_vp9_dys_surface_param *surface_param)
2702 if (surface_param->input_frame)
2703 gen9_add_adv_gpe_surface(ctx,
2705 surface_param->input_frame,
2706 VP9_BTI_DYS_INPUT_NV12);
2708 if (surface_param->output_frame) {
2709 gen9_add_2d_gpe_surface(ctx,
2711 surface_param->output_frame,
2714 I965_SURFACEFORMAT_R8_UNORM,
2715 VP9_BTI_DYS_OUTPUT_Y);
2717 gen9_add_2d_gpe_surface(ctx,
2719 surface_param->output_frame,
2722 I965_SURFACEFORMAT_R16_UINT,
2723 VP9_BTI_DYS_OUTPUT_UV);
2730 gen9_vp9_dys_kernel(VADriverContextP ctx,
2731 struct encode_state *encode_state,
2732 struct intel_encoder_context *encoder_context,
2733 gen9_vp9_dys_kernel_param *dys_kernel_param)
2735 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2736 struct i965_gpe_context *gpe_context;
2738 struct gen9_vp9_dys_curbe_param curbe_param;
2739 struct gen9_vp9_dys_surface_param surface_param;
2740 struct gpe_media_object_walker_parameter media_object_walker_param;
2741 struct vp9_encoder_kernel_walker_parameter kernel_walker_param;
2742 unsigned int resolution_x, resolution_y;
2744 media_function = VP9_MEDIA_STATE_DYS;
2745 gpe_context = &vme_context->dys_context.gpe_context;
2747 //gen8_gpe_context_init(ctx, gpe_context);
2748 gen9_gpe_reset_binding_table(ctx, gpe_context);
2750 /* sampler state is configured only when initializing the GPE context */
2752 memset(&curbe_param, 0, sizeof(curbe_param));
2753 curbe_param.input_width = dys_kernel_param->input_width;
2754 curbe_param.input_height = dys_kernel_param->input_height;
2755 curbe_param.output_width = dys_kernel_param->output_width;
2756 curbe_param.output_height = dys_kernel_param->output_height;
2757 vme_context->pfn_set_curbe_dys(ctx, encode_state,
2762 // Add surface states
2763 memset(&surface_param, 0, sizeof(surface_param));
2764 surface_param.input_frame = dys_kernel_param->input_surface;
2765 surface_param.output_frame = dys_kernel_param->output_surface;
2766 surface_param.vert_line_stride = 0;
2767 surface_param.vert_line_stride_offset = 0;
2769 vme_context->pfn_send_dys_surface(ctx,
2775 resolution_x = ALIGN(dys_kernel_param->output_width, 16) / 16;
2776 resolution_y = ALIGN(dys_kernel_param->output_height, 16) / 16;
2778 gen8_gpe_setup_interface_data(ctx, gpe_context);
2780 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2781 kernel_walker_param.resolution_x = resolution_x;
2782 kernel_walker_param.resolution_y = resolution_y;
2783 kernel_walker_param.no_dependency = 1;
2785 gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2787 gen9_run_kernel_media_object_walker(ctx, encoder_context,
2790 &media_object_walker_param);
2792 return VA_STATUS_SUCCESS;
2796 gen9_vp9_run_dys_refframes(VADriverContextP ctx,
2797 struct encode_state *encode_state,
2798 struct intel_encoder_context *encoder_context)
2800 struct gen9_vp9_state *vp9_state;
2801 VAEncPictureParameterBufferVP9 *pic_param;
2802 gen9_vp9_dys_kernel_param dys_kernel_param;
2803 struct object_surface *obj_surface;
2804 struct object_surface *input_surface, *output_surface;
2805 struct gen9_surface_vp9 *vp9_priv_surface;
2807 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2809 if (!vp9_state || !vp9_state->pic_param)
2810 return VA_STATUS_ERROR_INVALID_PARAMETER;
2812 pic_param = vp9_state->pic_param;
2814 if ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
2815 (pic_param->frame_height_src != pic_param->frame_height_dst)) {
2816 input_surface = encode_state->input_yuv_object;
2817 obj_surface = encode_state->reconstructed_object;
2818 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2819 output_surface = vp9_priv_surface->dys_surface_obj;
2821 memset(&dys_kernel_param, 0, sizeof(dys_kernel_param));
2822 dys_kernel_param.input_width = pic_param->frame_width_src;
2823 dys_kernel_param.input_height = pic_param->frame_height_src;
2824 dys_kernel_param.input_surface = input_surface;
2825 dys_kernel_param.output_width = pic_param->frame_width_dst;
2826 dys_kernel_param.output_height = pic_param->frame_height_dst;
2827 dys_kernel_param.output_surface = output_surface;
2828 gen9_vp9_dys_kernel(ctx, encode_state,
2833 if ((vp9_state->dys_ref_frame_flag & VP9_LAST_REF) &&
2834 vp9_state->last_ref_obj) {
2835 obj_surface = vp9_state->last_ref_obj;
2836 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2838 input_surface = obj_surface;
2839 output_surface = vp9_priv_surface->dys_surface_obj;
2841 dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2842 dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2843 dys_kernel_param.input_surface = input_surface;
2845 dys_kernel_param.output_width = pic_param->frame_width_dst;
2846 dys_kernel_param.output_height = pic_param->frame_height_dst;
2847 dys_kernel_param.output_surface = output_surface;
2849 gen9_vp9_dys_kernel(ctx, encode_state,
2853 if (vp9_state->hme_enabled) {
2854 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2855 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2856 dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2858 dys_kernel_param.output_width = vp9_state->frame_width_4x;
2859 dys_kernel_param.output_height = vp9_state->frame_height_4x;
2860 dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2862 gen9_vp9_dys_kernel(ctx, encode_state,
2866 /* Does it really need to do the 16x HME if the
2867 * resolution is different?
2868 * Maybe it should be restricted
2870 if (vp9_state->b16xme_enabled) {
2871 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2872 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2873 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2875 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2876 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2877 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2879 gen9_vp9_dys_kernel(ctx, encode_state,
2886 if ((vp9_state->dys_ref_frame_flag & VP9_GOLDEN_REF) &&
2887 vp9_state->golden_ref_obj) {
2888 obj_surface = vp9_state->golden_ref_obj;
2889 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2891 input_surface = obj_surface;
2892 output_surface = vp9_priv_surface->dys_surface_obj;
2894 dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2895 dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2896 dys_kernel_param.input_surface = input_surface;
2898 dys_kernel_param.output_width = pic_param->frame_width_dst;
2899 dys_kernel_param.output_height = pic_param->frame_height_dst;
2900 dys_kernel_param.output_surface = output_surface;
2902 gen9_vp9_dys_kernel(ctx, encode_state,
2906 if (vp9_state->hme_enabled) {
2907 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2908 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2909 dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2911 dys_kernel_param.output_width = vp9_state->frame_width_4x;
2912 dys_kernel_param.output_height = vp9_state->frame_height_4x;
2913 dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2915 gen9_vp9_dys_kernel(ctx, encode_state,
2919 /* Does it really need to do the 16x HME if the
2920 * resolution is different?
2921 * Maybe it should be restricted
2923 if (vp9_state->b16xme_enabled) {
2924 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2925 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2926 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2928 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2929 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2930 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2932 gen9_vp9_dys_kernel(ctx, encode_state,
2939 if ((vp9_state->dys_ref_frame_flag & VP9_ALT_REF) &&
2940 vp9_state->alt_ref_obj) {
2941 obj_surface = vp9_state->alt_ref_obj;
2942 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2944 input_surface = obj_surface;
2945 output_surface = vp9_priv_surface->dys_surface_obj;
2947 dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2948 dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2949 dys_kernel_param.input_surface = input_surface;
2951 dys_kernel_param.output_width = pic_param->frame_width_dst;
2952 dys_kernel_param.output_height = pic_param->frame_height_dst;
2953 dys_kernel_param.output_surface = output_surface;
2955 gen9_vp9_dys_kernel(ctx, encode_state,
2959 if (vp9_state->hme_enabled) {
2960 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2961 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2962 dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2964 dys_kernel_param.output_width = vp9_state->frame_width_4x;
2965 dys_kernel_param.output_height = vp9_state->frame_height_4x;
2966 dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2968 gen9_vp9_dys_kernel(ctx, encode_state,
2972 /* Does it really need to do the 16x HME if the
2973 * resolution is different?
2974 * Maybe it should be restricted
2976 if (vp9_state->b16xme_enabled) {
2977 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2978 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2979 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2981 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2982 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2983 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2985 gen9_vp9_dys_kernel(ctx, encode_state,
2992 return VA_STATUS_SUCCESS;
2996 gen9_vp9_set_curbe_mbenc(VADriverContextP ctx,
2997 struct encode_state *encode_state,
2998 struct i965_gpe_context *gpe_context,
2999 struct intel_encoder_context *encoder_context,
3000 struct gen9_vp9_mbenc_curbe_param *curbe_param)
3002 struct gen9_vp9_state *vp9_state;
3003 VAEncMiscParameterTypeVP9PerSegmantParam *seg_param, tmp_seg_param;
3004 vp9_mbenc_curbe_data *curbe_cmd;
3005 VAEncPictureParameterBufferVP9 *pic_param;
3006 int i, segment_count;
3008 struct object_surface *obj_surface;
3009 struct gen9_surface_vp9 *vp9_priv_surface;
3011 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3013 if (!vp9_state || !vp9_state->pic_param)
3016 pic_param = curbe_param->ppic_param;
3017 seg_param = curbe_param->psegment_param;
3020 memset(&tmp_seg_param, 0, sizeof(tmp_seg_param));
3021 seg_param = &tmp_seg_param;
3024 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
3029 memset(curbe_cmd, 0, sizeof(vp9_mbenc_curbe_data));
3031 if (vp9_state->dys_in_use)
3033 curbe_cmd->dw0.frame_width = pic_param->frame_width_dst;
3034 curbe_cmd->dw0.frame_height = pic_param->frame_height_dst;
3038 curbe_cmd->dw0.frame_width = pic_param->frame_width_src;
3039 curbe_cmd->dw0.frame_height = pic_param->frame_height_src;
3042 curbe_cmd->dw1.frame_type = curbe_param->picture_coding_type;
3044 curbe_cmd->dw1.segmentation_enable = pic_param->pic_flags.bits.segmentation_enabled;
3045 if (pic_param->pic_flags.bits.segmentation_enabled)
3050 curbe_cmd->dw1.ref_frame_flags = curbe_param->ref_frame_flag;
3052 //right now set them to normal settings
3053 if (curbe_param->picture_coding_type)
3055 switch (vp9_state->target_usage)
3057 case INTEL_ENC_VP9_TU_QUALITY:
3058 curbe_cmd->dw1.min_16for32_check = 0x00;
3059 curbe_cmd->dw2.multi_pred = 0x02;
3060 curbe_cmd->dw2.len_sp = 0x39;
3061 curbe_cmd->dw2.search_x = 0x30;
3062 curbe_cmd->dw2.search_y = 0x28;
3063 curbe_cmd->dw3.min_ref_for32_check = 0x01;
3064 curbe_cmd->dw4.skip16_threshold = 0x000A;
3065 curbe_cmd->dw4.disable_mr_threshold = 0x000C;
3067 memcpy(&curbe_cmd->dw16,
3068 vp9_diamond_ime_search_path_delta,
3069 14 * sizeof(unsigned int));
3071 case INTEL_ENC_VP9_TU_PERFORMANCE:
3072 curbe_cmd->dw1.min_16for32_check = 0x02;
3073 curbe_cmd->dw2.multi_pred = 0x00;
3074 curbe_cmd->dw2.len_sp = 0x10;
3075 curbe_cmd->dw2.search_x = 0x20;
3076 curbe_cmd->dw2.search_y = 0x20;
3077 curbe_cmd->dw3.min_ref_for32_check = 0x03;
3078 curbe_cmd->dw4.skip16_threshold = 0x0014;
3079 curbe_cmd->dw4.disable_mr_threshold = 0x0016;
3081 memcpy(&curbe_cmd->dw16,
3082 vp9_fullspiral_ime_search_path_delta,
3083 14 * sizeof(unsigned int));
3086 default: // normal settings
3087 curbe_cmd->dw1.min_16for32_check = 0x01;
3088 curbe_cmd->dw2.multi_pred = 0x00;
3089 curbe_cmd->dw2.len_sp = 0x19;
3090 curbe_cmd->dw2.search_x = 0x30;
3091 curbe_cmd->dw2.search_y = 0x28;
3092 curbe_cmd->dw3.min_ref_for32_check = 0x02;
3093 curbe_cmd->dw4.skip16_threshold = 0x000F;
3094 curbe_cmd->dw4.disable_mr_threshold = 0x0011;
3096 memcpy(&curbe_cmd->dw16,
3097 vp9_diamond_ime_search_path_delta,
3098 14 * sizeof(unsigned int));
3102 curbe_cmd->dw3.hme_enabled = curbe_param->hme_enabled;
3103 curbe_cmd->dw3.multi_ref_qp_check = curbe_param->multi_ref_qp_check;
3104 // co-located predictor must be disabled when dynamic scaling is enabled
3105 curbe_cmd->dw3.disable_temp_pred = vp9_state->dys_in_use;
3108 curbe_cmd->dw5.inter_round = 0;
3109 curbe_cmd->dw5.intra_round = 4;
3110 curbe_cmd->dw5.frame_qpindex = pic_param->luma_ac_qindex;
3112 for (i = 0; i < segment_count; i++)
3114 seg_qindex = pic_param->luma_ac_qindex + pic_param->luma_dc_qindex_delta
3115 + seg_param->seg_data[i].segment_qindex_delta;
3117 seg_qindex = CLAMP(0, 255, seg_qindex);
3119 if (curbe_param->picture_coding_type)
3120 memcpy(&curbe_cmd->segments[i],
3121 &intel_vp9_costlut_p[seg_qindex * 16],
3122 16 * sizeof(unsigned int));
3124 memcpy(&curbe_cmd->segments[i],
3125 &intel_vp9_costlut_key[seg_qindex * 16],
3126 16 * sizeof(unsigned int));
3129 if (curbe_param->picture_coding_type)
3131 if (curbe_cmd->dw3.multi_ref_qp_check)
3133 if (curbe_param->ref_frame_flag & 0x01)
3135 obj_surface = curbe_param->last_ref_obj;
3136 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3137 curbe_cmd->dw8.last_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
3140 if (curbe_param->ref_frame_flag & 0x02)
3142 obj_surface = curbe_param->golden_ref_obj;
3143 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3144 curbe_cmd->dw8.golden_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
3147 if (curbe_param->ref_frame_flag & 0x04)
3149 obj_surface = curbe_param->alt_ref_obj;
3150 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3151 curbe_cmd->dw9.alt_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
3155 curbe_cmd->dw160.enc_curr_y_surf_bti = VP9_BTI_MBENC_CURR_Y_G9;
3156 curbe_cmd->dw162.enc_curr_nv12_surf_bti = VP9_BTI_MBENC_CURR_NV12_G9;
3157 curbe_cmd->dw166.segmentation_map_bti = VP9_BTI_MBENC_SEGMENTATION_MAP_G9;
3158 curbe_cmd->dw172.mode_decision_bti = VP9_BTI_MBENC_MODE_DECISION_G9;
3159 curbe_cmd->dw167.tx_curbe_bti = VP9_BTI_MBENC_TX_CURBE_G9;
3160 curbe_cmd->dw168.hme_mvdata_bti = VP9_BTI_MBENC_HME_MV_DATA_G9;
3161 curbe_cmd->dw169.hme_distortion_bti = VP9_BTI_MBENC_HME_DISTORTION_G9;
3162 curbe_cmd->dw171.mode_decision_prev_bti = VP9_BTI_MBENC_MODE_DECISION_PREV_G9;
3163 curbe_cmd->dw172.mode_decision_bti = VP9_BTI_MBENC_MODE_DECISION_G9;
3164 curbe_cmd->dw173.output_16x16_inter_modes_bti = VP9_BTI_MBENC_OUT_16x16_INTER_MODES_G9;
3165 curbe_cmd->dw174.cu_record_bti = VP9_BTI_MBENC_CU_RECORDS_G9;
3166 curbe_cmd->dw175.pak_data_bti = VP9_BTI_MBENC_PAK_DATA_G9;
3168 i965_gpe_context_unmap_curbe(gpe_context);
3173 gen9_vp9_send_mbenc_surface(VADriverContextP ctx,
3174 struct encode_state *encode_state,
3175 struct i965_gpe_context *gpe_context,
3176 struct intel_encoder_context *encoder_context,
3177 struct gen9_vp9_mbenc_surface_param *mbenc_param)
3179 struct gen9_vp9_state *vp9_state;
3180 unsigned int res_size;
3181 unsigned int frame_width_in_sb, frame_height_in_sb;
3182 struct object_surface *obj_surface, *tmp_input;
3183 struct gen9_surface_vp9 *vp9_priv_surface;
3186 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3188 if (!vp9_state || !vp9_state->pic_param)
3191 frame_width_in_sb = ALIGN(mbenc_param->frame_width, 64) / 64;
3192 frame_height_in_sb = ALIGN(mbenc_param->frame_height, 64) / 64;
3193 media_function = mbenc_param->media_state_type;
3195 switch (media_function)
3197 case VP9_MEDIA_STATE_MBENC_I_32x32:
3199 obj_surface = mbenc_param->curr_frame_obj;
3201 gen9_add_2d_gpe_surface(ctx,
3206 I965_SURFACEFORMAT_R8_UNORM,
3207 VP9_BTI_MBENC_CURR_Y_G9);
3209 gen9_add_2d_gpe_surface(ctx,
3214 I965_SURFACEFORMAT_R16_UINT,
3215 VP9_BTI_MBENC_CURR_UV_G9);
3218 if (mbenc_param->segmentation_enabled)
3220 gen9_add_buffer_2d_gpe_surface(ctx,
3222 mbenc_param->pres_segmentation_map,
3224 I965_SURFACEFORMAT_R8_UNORM,
3225 VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3229 res_size = 16 * mbenc_param->frame_width_in_mb *
3230 mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3231 gen9_add_buffer_gpe_surface(ctx,
3233 mbenc_param->pres_mode_decision,
3237 VP9_BTI_MBENC_MODE_DECISION_G9);
3241 case VP9_MEDIA_STATE_MBENC_I_16x16:
3243 obj_surface = mbenc_param->curr_frame_obj;
3245 gen9_add_2d_gpe_surface(ctx,
3250 I965_SURFACEFORMAT_R8_UNORM,
3251 VP9_BTI_MBENC_CURR_Y_G9);
3253 gen9_add_2d_gpe_surface(ctx,
3258 I965_SURFACEFORMAT_R16_UINT,
3259 VP9_BTI_MBENC_CURR_UV_G9);
3261 gen9_add_adv_gpe_surface(ctx, gpe_context,
3263 VP9_BTI_MBENC_CURR_NV12_G9);
3265 if (mbenc_param->segmentation_enabled)
3267 gen9_add_buffer_2d_gpe_surface(ctx,
3269 mbenc_param->pres_segmentation_map,
3271 I965_SURFACEFORMAT_R8_UNORM,
3272 VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3276 res_size = 16 * mbenc_param->frame_width_in_mb *
3277 mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3278 gen9_add_buffer_gpe_surface(ctx,
3280 mbenc_param->pres_mode_decision,
3284 VP9_BTI_MBENC_MODE_DECISION_G9);
3288 gen9_add_dri_buffer_gpe_surface(ctx,
3290 mbenc_param->gpe_context_tx->curbe.bo,
3292 ALIGN(res_size, 64),
3293 mbenc_param->gpe_context_tx->curbe.offset,
3294 VP9_BTI_MBENC_TX_CURBE_G9);
3298 case VP9_MEDIA_STATE_MBENC_P:
3300 obj_surface = mbenc_param->curr_frame_obj;
3302 gen9_add_2d_gpe_surface(ctx,
3307 I965_SURFACEFORMAT_R8_UNORM,
3308 VP9_BTI_MBENC_CURR_Y_G9);
3310 gen9_add_2d_gpe_surface(ctx, gpe_context,
3314 I965_SURFACEFORMAT_R16_UINT,
3315 VP9_BTI_MBENC_CURR_UV_G9);
3317 gen9_add_adv_gpe_surface(ctx, gpe_context,
3319 VP9_BTI_MBENC_CURR_NV12_G9);
3321 if (mbenc_param->last_ref_obj)
3323 obj_surface = mbenc_param->last_ref_obj;
3324 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3326 if (vp9_state->dys_in_use &&
3327 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3328 (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3329 tmp_input = vp9_priv_surface->dys_surface_obj;
3331 tmp_input = obj_surface;
3333 gen9_add_adv_gpe_surface(ctx, gpe_context,
3335 VP9_BTI_MBENC_LAST_NV12_G9);
3337 gen9_add_adv_gpe_surface(ctx, gpe_context,
3339 VP9_BTI_MBENC_LAST_NV12_G9 + 1);
3343 if (mbenc_param->golden_ref_obj)
3345 obj_surface = mbenc_param->golden_ref_obj;
3346 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3348 if (vp9_state->dys_in_use &&
3349 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3350 (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3351 tmp_input = vp9_priv_surface->dys_surface_obj;
3353 tmp_input = obj_surface;
3355 gen9_add_adv_gpe_surface(ctx, gpe_context,
3357 VP9_BTI_MBENC_GOLD_NV12_G9);
3359 gen9_add_adv_gpe_surface(ctx, gpe_context,
3361 VP9_BTI_MBENC_GOLD_NV12_G9 + 1);
3365 if (mbenc_param->alt_ref_obj)
3367 obj_surface = mbenc_param->alt_ref_obj;
3368 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3370 if (vp9_state->dys_in_use &&
3371 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3372 (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3373 tmp_input = vp9_priv_surface->dys_surface_obj;
3375 tmp_input = obj_surface;
3377 gen9_add_adv_gpe_surface(ctx, gpe_context,
3379 VP9_BTI_MBENC_ALTREF_NV12_G9);
3381 gen9_add_adv_gpe_surface(ctx, gpe_context,
3383 VP9_BTI_MBENC_ALTREF_NV12_G9 + 1);
3387 if (mbenc_param->hme_enabled)
3389 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3390 mbenc_param->ps4x_memv_data_buffer,
3392 I965_SURFACEFORMAT_R8_UNORM,
3393 VP9_BTI_MBENC_HME_MV_DATA_G9);
3395 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3396 mbenc_param->ps4x_memv_distortion_buffer,
3398 I965_SURFACEFORMAT_R8_UNORM,
3399 VP9_BTI_MBENC_HME_DISTORTION_G9);
3402 if (mbenc_param->segmentation_enabled)
3404 gen9_add_buffer_2d_gpe_surface(ctx,
3406 mbenc_param->pres_segmentation_map,
3408 I965_SURFACEFORMAT_R8_UNORM,
3409 VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3413 res_size = 16 * mbenc_param->frame_width_in_mb *
3414 mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3415 gen9_add_buffer_gpe_surface(ctx,
3417 mbenc_param->pres_mode_decision_prev,
3421 VP9_BTI_MBENC_MODE_DECISION_PREV_G9);
3423 gen9_add_buffer_gpe_surface(ctx,
3425 mbenc_param->pres_mode_decision,
3429 VP9_BTI_MBENC_MODE_DECISION_G9);
3431 gen9_add_buffer_2d_gpe_surface(ctx,
3433 mbenc_param->pres_output_16x16_inter_modes,
3435 I965_SURFACEFORMAT_R8_UNORM,
3436 VP9_BTI_MBENC_OUT_16x16_INTER_MODES_G9);
3440 gen9_add_dri_buffer_gpe_surface(ctx,
3442 mbenc_param->gpe_context_tx->curbe.bo,
3444 ALIGN(res_size, 64),
3445 mbenc_param->gpe_context_tx->curbe.offset,
3446 VP9_BTI_MBENC_TX_CURBE_G9);
3451 case VP9_MEDIA_STATE_MBENC_TX:
3453 obj_surface = mbenc_param->curr_frame_obj;
3455 gen9_add_2d_gpe_surface(ctx,
3460 I965_SURFACEFORMAT_R8_UNORM,
3461 VP9_BTI_MBENC_CURR_Y_G9);
3463 gen9_add_2d_gpe_surface(ctx,
3468 I965_SURFACEFORMAT_R16_UINT,
3469 VP9_BTI_MBENC_CURR_UV_G9);
3471 if (mbenc_param->segmentation_enabled)
3473 gen9_add_buffer_2d_gpe_surface(ctx,
3475 mbenc_param->pres_segmentation_map,
3477 I965_SURFACEFORMAT_R8_UNORM,
3478 VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3482 res_size = 16 * mbenc_param->frame_width_in_mb *
3483 mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3484 gen9_add_buffer_gpe_surface(ctx,
3486 mbenc_param->pres_mode_decision,
3490 VP9_BTI_MBENC_MODE_DECISION_G9);
3492 res_size = frame_width_in_sb * frame_height_in_sb * 4 * sizeof(unsigned int);
3493 gen9_add_buffer_gpe_surface(ctx,
3495 mbenc_param->pres_mb_code_surface,
3499 VP9_BTI_MBENC_PAK_DATA_G9);
3502 res_size = frame_width_in_sb * frame_height_in_sb *
3503 64 * 16 * sizeof(unsigned int);
3505 gen9_add_buffer_gpe_surface(ctx,
3507 mbenc_param->pres_mb_code_surface,
3510 mbenc_param->mb_data_offset,
3511 VP9_BTI_MBENC_CU_RECORDS_G9);
3521 gen9_vp9_mbenc_kernel(VADriverContextP ctx,
3522 struct encode_state *encode_state,
3523 struct intel_encoder_context *encoder_context,
3526 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
3527 struct i965_gpe_context *gpe_context, *tx_gpe_context;
3528 struct gpe_media_object_walker_parameter media_object_walker_param;
3529 struct vp9_encoder_kernel_walker_parameter kernel_walker_param;
3530 unsigned int resolution_x, resolution_y;
3531 struct gen9_vp9_state *vp9_state;
3532 VAEncPictureParameterBufferVP9 *pic_param;
3533 struct gen9_vp9_mbenc_curbe_param curbe_param;
3534 struct gen9_vp9_mbenc_surface_param surface_param;
3535 VAStatus va_status = VA_STATUS_SUCCESS;
3536 int mbenc_gpe_index = 0;
3537 struct object_surface *obj_surface;
3538 struct gen9_surface_vp9 *vp9_priv_surface;
3540 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3542 if (!vp9_state || !vp9_state->pic_param)
3543 return VA_STATUS_ERROR_ENCODING_ERROR;
3545 pic_param = vp9_state->pic_param;
3547 switch (media_function)
3549 case VP9_MEDIA_STATE_MBENC_I_32x32:
3550 mbenc_gpe_index = VP9_MBENC_IDX_KEY_32x32;
3553 case VP9_MEDIA_STATE_MBENC_I_16x16:
3554 mbenc_gpe_index = VP9_MBENC_IDX_KEY_16x16;
3557 case VP9_MEDIA_STATE_MBENC_P:
3558 mbenc_gpe_index = VP9_MBENC_IDX_INTER;
3561 case VP9_MEDIA_STATE_MBENC_TX:
3562 mbenc_gpe_index = VP9_MBENC_IDX_TX;
3566 va_status = VA_STATUS_ERROR_OPERATION_FAILED;
3570 gpe_context = &(vme_context->mbenc_context.gpe_contexts[mbenc_gpe_index]);
3571 tx_gpe_context = &(vme_context->mbenc_context.gpe_contexts[VP9_MBENC_IDX_TX]);
3573 gen9_gpe_reset_binding_table(ctx, gpe_context);
3576 if (!vp9_state->mbenc_curbe_set_in_brc_update)
3578 if(media_function == VP9_MEDIA_STATE_MBENC_I_32x32 ||
3579 media_function == VP9_MEDIA_STATE_MBENC_P)
3581 memset(&curbe_param, 0, sizeof(curbe_param));
3582 curbe_param.ppic_param = vp9_state->pic_param;
3583 curbe_param.pseq_param = vp9_state->seq_param;
3584 curbe_param.psegment_param = vp9_state->segment_param;
3585 curbe_param.frame_width_in_mb = vp9_state->frame_width_in_mb;
3586 curbe_param.frame_height_in_mb = vp9_state->frame_height_in_mb;
3587 curbe_param.last_ref_obj = vp9_state->last_ref_obj;
3588 curbe_param.golden_ref_obj = vp9_state->golden_ref_obj;
3589 curbe_param.alt_ref_obj = vp9_state->alt_ref_obj;
3590 curbe_param.hme_enabled = vp9_state->hme_enabled;
3591 curbe_param.ref_frame_flag = vp9_state->ref_frame_flag;
3592 curbe_param.picture_coding_type = vp9_state->picture_coding_type;
3593 curbe_param.media_state_type = media_function;
3594 curbe_param.mbenc_curbe_set_in_brc_update = vp9_state->mbenc_curbe_set_in_brc_update;
3596 vme_context->pfn_set_curbe_mbenc(ctx,
3604 memset(&surface_param, 0, sizeof(surface_param));
3605 surface_param.media_state_type = media_function;
3606 surface_param.picture_coding_type = vp9_state->picture_coding_type;
3607 surface_param.frame_width = vp9_state->frame_width;
3608 surface_param.frame_height = vp9_state->frame_height;
3609 surface_param.frame_width_in_mb = vp9_state->frame_width_in_mb;
3610 surface_param.frame_height_in_mb = vp9_state->frame_height_in_mb;
3611 surface_param.hme_enabled = vp9_state->hme_enabled;
3612 surface_param.segmentation_enabled = pic_param->pic_flags.bits.segmentation_enabled;
3613 surface_param.pres_segmentation_map = &vme_context->mb_segment_map_surface;
3614 surface_param.ps4x_memv_data_buffer = &vme_context->s4x_memv_data_buffer;
3615 surface_param.ps4x_memv_distortion_buffer = &vme_context->s4x_memv_distortion_buffer;
3616 surface_param.pres_mode_decision =
3617 &vme_context->res_mode_decision[vp9_state->curr_mode_decision_index];
3618 surface_param.pres_mode_decision_prev =
3619 &vme_context->res_mode_decision[!vp9_state->curr_mode_decision_index];
3620 surface_param.pres_output_16x16_inter_modes = &vme_context->res_output_16x16_inter_modes;
3621 surface_param.pres_mbenc_curbe_buffer = NULL;
3622 surface_param.last_ref_obj = vp9_state->last_ref_obj;
3623 surface_param.golden_ref_obj = vp9_state->golden_ref_obj;
3624 surface_param.alt_ref_obj = vp9_state->alt_ref_obj;
3625 surface_param.pres_mb_code_surface = &vme_context->res_mb_code_surface;
3626 surface_param.gpe_context_tx = tx_gpe_context;
3627 surface_param.mb_data_offset = vp9_state->mb_data_offset;
3629 obj_surface = encode_state->reconstructed_object;
3630 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3631 if (vp9_state->dys_in_use &&
3632 (pic_param->frame_width_src != pic_param->frame_height_dst ||
3633 pic_param->frame_height_src != pic_param->frame_height_dst)) {
3634 obj_surface = vp9_priv_surface->dys_surface_obj;
3636 obj_surface = encode_state->input_yuv_object;
3638 surface_param.curr_frame_obj = obj_surface;
3640 vme_context->pfn_send_mbenc_surface(ctx,
3646 if (media_function == VP9_MEDIA_STATE_MBENC_I_32x32) {
3647 resolution_x = ALIGN(vp9_state->frame_width, 32) / 32;
3648 resolution_y = ALIGN(vp9_state->frame_height, 32) / 32;
3650 resolution_x = ALIGN(vp9_state->frame_width, 16) / 16;
3651 resolution_y = ALIGN(vp9_state->frame_height, 16) / 16;
3654 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
3655 kernel_walker_param.resolution_x = resolution_x;
3656 kernel_walker_param.resolution_y = resolution_y;
3658 if (media_function == VP9_MEDIA_STATE_MBENC_P ||
3659 media_function == VP9_MEDIA_STATE_MBENC_I_16x16) {
3660 kernel_walker_param.use_scoreboard = 1;
3661 kernel_walker_param.no_dependency = 0;
3662 kernel_walker_param.walker_degree = VP9_45Z_DEGREE;
3664 kernel_walker_param.use_scoreboard = 0;
3665 kernel_walker_param.no_dependency = 1;
3668 gen8_gpe_setup_interface_data(ctx, gpe_context);
3670 gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
3672 gen9_run_kernel_media_object_walker(ctx, encoder_context,
3675 &media_object_walker_param);
3680 gen9_init_gpe_context_vp9(VADriverContextP ctx,
3681 struct i965_gpe_context *gpe_context,
3682 struct vp9_encoder_kernel_parameter *kernel_param)
3684 struct i965_driver_data *i965 = i965_driver_data(ctx);
3686 gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
3688 gpe_context->sampler.entry_size = 0;
3689 gpe_context->sampler.max_entries = 0;
3691 if (kernel_param->sampler_size) {
3692 gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
3693 gpe_context->sampler.max_entries = 1;
3696 gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
3697 gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
3699 gpe_context->surface_state_binding_table.max_entries = MAX_VP9_ENCODER_SURFACES;
3700 gpe_context->surface_state_binding_table.binding_table_offset = 0;
3701 gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64);
3702 gpe_context->surface_state_binding_table.length = ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_VP9_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
3704 if (i965->intel.eu_total > 0)
3705 gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
3707 gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
3709 gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
3710 gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
3711 gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
3712 gpe_context->vfe_state.curbe_allocation_size -
3713 ((gpe_context->idrt.entry_size >> 5) *
3714 gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
3715 gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
3716 gpe_context->vfe_state.gpgpu_mode = 0;
3720 gen9_init_vfe_scoreboard_vp9(struct i965_gpe_context *gpe_context,
3721 struct vp9_encoder_scoreboard_parameter *scoreboard_param)
3723 gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
3724 gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
3725 gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
3727 if (scoreboard_param->walkpat_flag) {
3728 gpe_context->vfe_desc5.scoreboard0.mask = 0x0F;
3729 gpe_context->vfe_desc5.scoreboard0.type = 1;
3731 gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0x0;
3732 gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0xF;
3734 gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
3735 gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xE;
3737 gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0xF;
3738 gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0x3;
3740 gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
3741 gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0x1;
3744 gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0xF;
3745 gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0x0;
3748 gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
3749 gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xF;
3752 gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0x1;
3753 gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0xF;
3756 gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
3757 gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0xF;
3760 gpe_context->vfe_desc7.scoreboard2.delta_x4 = 0xF;
3761 gpe_context->vfe_desc7.scoreboard2.delta_y4 = 0x1;
3764 gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0x0;
3765 gpe_context->vfe_desc7.scoreboard2.delta_y5 = 0xE;
3768 gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0x1;
3769 gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
3772 gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0xF;
3773 gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
3777 #define VP9_MI_BLOCK_MASK 0x07
3778 #define VP9_VME_REF_WIN 48
3781 gen9_encode_vp9_check_parameter(VADriverContextP ctx,
3782 struct encode_state *encode_state,
3783 struct intel_encoder_context *encoder_context)
3785 struct i965_driver_data *i965 = i965_driver_data(ctx);
3786 struct gen9_vp9_state *vp9_state;
3787 VAEncPictureParameterBufferVP9 *pic_param;
3788 VAEncMiscParameterTypeVP9PerSegmantParam *seg_param;
3789 VAEncSequenceParameterBufferVP9 *seq_param;
3790 struct object_surface *obj_surface;
3791 struct object_buffer *obj_buffer;
3792 struct gen9_surface_vp9 *vp9_priv_surface;
3794 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3796 if (!encode_state->pic_param_ext ||
3797 !encode_state->pic_param_ext->buffer) {
3798 return VA_STATUS_ERROR_INVALID_PARAMETER;
3800 pic_param = (VAEncPictureParameterBufferVP9 *)encode_state->pic_param_ext->buffer;
3802 if (pic_param->frame_width_src & VP9_MI_BLOCK_MASK ||
3803 pic_param->frame_height_src & VP9_MI_BLOCK_MASK ||
3804 pic_param->frame_width_dst & VP9_MI_BLOCK_MASK ||
3805 pic_param->frame_height_dst & VP9_MI_BLOCK_MASK)
3806 return VA_STATUS_ERROR_INVALID_PARAMETER;
3808 obj_buffer = BUFFER(pic_param->coded_buf);
3811 !obj_buffer->buffer_store ||
3812 !obj_buffer->buffer_store->bo)
3813 return VA_STATUS_ERROR_INVALID_PARAMETER;
3815 encode_state->coded_buf_object = obj_buffer;
3817 vp9_state->status_buffer.bo = obj_buffer->buffer_store->bo;
3819 encode_state->reconstructed_object = SURFACE(pic_param->reconstructed_frame);
3821 if (!encode_state->reconstructed_object ||
3822 !encode_state->input_yuv_object)
3823 return VA_STATUS_ERROR_INVALID_PARAMETER;
3825 vp9_state->curr_frame = pic_param->reconstructed_frame;
3826 vp9_state->ref_frame_flag = 0;
3827 if (pic_param->pic_flags.bits.frame_type == KEY_FRAME ||
3828 pic_param->pic_flags.bits.intra_only) {
3829 /* this will be regarded as I-frame type */
3830 vp9_state->picture_coding_type = 0;
3831 vp9_state->last_ref_obj = NULL;
3832 vp9_state->golden_ref_obj = NULL;
3833 vp9_state->alt_ref_obj = NULL;
3835 vp9_state->picture_coding_type = 1;
3836 vp9_state->ref_frame_flag = pic_param->ref_flags.bits.ref_frame_ctrl_l0 |
3837 pic_param->ref_flags.bits.ref_frame_ctrl_l1;
3839 obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx]);
3840 vp9_state->last_ref_obj = obj_surface;
3843 !obj_surface->private_data) {
3844 vp9_state->last_ref_obj = NULL;
3845 vp9_state->ref_frame_flag &= ~(VP9_LAST_REF);
3848 obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx]);
3849 vp9_state->golden_ref_obj = obj_surface;
3852 !obj_surface->private_data) {
3853 vp9_state->golden_ref_obj = NULL;
3854 vp9_state->ref_frame_flag &= ~(VP9_GOLDEN_REF);
3857 obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]);
3858 vp9_state->alt_ref_obj = obj_surface;
3861 !obj_surface->private_data) {
3862 vp9_state->alt_ref_obj = NULL;
3863 vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3866 /* remove the duplicated flag and ref frame list */
3867 if (vp9_state->ref_frame_flag & VP9_LAST_REF) {
3868 if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx] ==
3869 pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx]) {
3870 vp9_state->ref_frame_flag &= ~(VP9_GOLDEN_REF);
3871 vp9_state->golden_ref_obj = NULL;
3874 if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx] ==
3875 pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]) {
3876 vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3877 vp9_state->alt_ref_obj = NULL;
3881 if (vp9_state->ref_frame_flag & VP9_GOLDEN_REF) {
3882 if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx] ==
3883 pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]) {
3884 vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3885 vp9_state->alt_ref_obj = NULL;
3889 if (vp9_state->ref_frame_flag == 0)
3890 return VA_STATUS_ERROR_INVALID_PARAMETER;
3894 if (pic_param->pic_flags.bits.segmentation_enabled) {
3895 if (!encode_state->q_matrix ||
3896 !encode_state->q_matrix->buffer) {
3897 return VA_STATUS_ERROR_INVALID_PARAMETER;
3899 seg_param = (VAEncMiscParameterTypeVP9PerSegmantParam *)
3900 encode_state->q_matrix->buffer;
3904 if (encode_state->seq_param_ext &&
3905 encode_state->seq_param_ext->buffer)
3906 seq_param = (VAEncSequenceParameterBufferVP9 *)encode_state->seq_param_ext->buffer;
3909 seq_param = &vp9_state->bogus_seq_param;
3912 vp9_state->pic_param = pic_param;
3913 vp9_state->segment_param = seg_param;
3914 vp9_state->seq_param = seq_param;
3916 obj_surface = encode_state->reconstructed_object;
3917 if (pic_param->frame_width_dst > obj_surface->orig_width ||
3918 pic_param->frame_height_dst > obj_surface->orig_height)
3919 return VA_STATUS_ERROR_INVALID_SURFACE;
3921 if (!vp9_state->dys_enabled &&
3922 ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
3923 (pic_param->frame_height_src != pic_param->frame_height_dst)))
3924 return VA_STATUS_ERROR_UNIMPLEMENTED;
3926 if (vp9_state->brc_enabled) {
3927 if (vp9_state->first_frame || vp9_state->picture_coding_type == KEY_FRAME) {
3928 vp9_state->brc_reset = encoder_context->brc.need_reset || vp9_state->first_frame;
3930 if (!encoder_context->brc.framerate[0].num || !encoder_context->brc.framerate[0].den ||
3931 !encoder_context->brc.bits_per_second[0])
3932 return VA_STATUS_ERROR_INVALID_PARAMETER;
3934 vp9_state->gop_size = encoder_context->brc.gop_size;
3935 vp9_state->framerate = encoder_context->brc.framerate[0];
3937 if (encoder_context->rate_control_mode == VA_RC_CBR ||
3938 !encoder_context->brc.target_percentage[0]) {
3939 vp9_state->target_bit_rate = encoder_context->brc.bits_per_second[0];
3940 vp9_state->max_bit_rate = vp9_state->target_bit_rate;
3941 vp9_state->min_bit_rate = vp9_state->target_bit_rate;
3943 vp9_state->max_bit_rate = encoder_context->brc.bits_per_second[0];
3944 vp9_state->target_bit_rate = vp9_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
3945 if (2 * vp9_state->target_bit_rate < vp9_state->max_bit_rate)
3946 vp9_state->min_bit_rate = 0;
3948 vp9_state->min_bit_rate = 2 * vp9_state->target_bit_rate - vp9_state->max_bit_rate;
3951 if (encoder_context->brc.hrd_buffer_size)
3952 vp9_state->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;
3953 else if (encoder_context->brc.window_size)
3954 vp9_state->vbv_buffer_size_in_bit = (uint64_t)vp9_state->max_bit_rate * encoder_context->brc.window_size / 1000;
3956 vp9_state->vbv_buffer_size_in_bit = vp9_state->max_bit_rate;
3957 if (encoder_context->brc.hrd_initial_buffer_fullness)
3958 vp9_state->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;
3960 vp9_state->init_vbv_buffer_fullness_in_bit = vp9_state->vbv_buffer_size_in_bit / 2;
3964 vp9_state->frame_width = pic_param->frame_width_dst;
3965 vp9_state->frame_height = pic_param->frame_height_dst;
3967 vp9_state->frame_width_4x = ALIGN(vp9_state->frame_width / 4, 16);
3968 vp9_state->frame_height_4x = ALIGN(vp9_state->frame_height / 4, 16);
3970 vp9_state->frame_width_16x = ALIGN(vp9_state->frame_width / 16, 16);
3971 vp9_state->frame_height_16x = ALIGN(vp9_state->frame_height / 16, 16);
3973 vp9_state->frame_width_in_mb = ALIGN(vp9_state->frame_width, 16) / 16;
3974 vp9_state->frame_height_in_mb = ALIGN(vp9_state->frame_height, 16) / 16;
3976 vp9_state->downscaled_width_4x_in_mb = vp9_state->frame_width_4x / 16;
3977 vp9_state->downscaled_height_4x_in_mb = vp9_state->frame_height_4x / 16;
3978 vp9_state->downscaled_width_16x_in_mb = vp9_state->frame_width_16x / 16;
3979 vp9_state->downscaled_height_16x_in_mb = vp9_state->frame_height_16x / 16;
3981 vp9_state->dys_in_use = 0;
3982 if(pic_param->frame_width_src != pic_param->frame_width_dst ||
3983 pic_param->frame_height_src != pic_param->frame_height_dst)
3984 vp9_state->dys_in_use = 1;
3985 vp9_state->dys_ref_frame_flag = 0;
3986 /* check the dys setting. The dys is supported by default. */
3987 if (pic_param->pic_flags.bits.frame_type != KEY_FRAME &&
3988 !pic_param->pic_flags.bits.intra_only) {
3989 vp9_state->dys_ref_frame_flag = vp9_state->ref_frame_flag;
3991 if ((vp9_state->ref_frame_flag & VP9_LAST_REF) &&
3992 vp9_state->last_ref_obj) {
3993 obj_surface = vp9_state->last_ref_obj;
3994 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3996 if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
3997 vp9_state->frame_height == vp9_priv_surface->frame_height)
3998 vp9_state->dys_ref_frame_flag &= ~(VP9_LAST_REF);
4000 if ((vp9_state->ref_frame_flag & VP9_GOLDEN_REF) &&
4001 vp9_state->golden_ref_obj) {
4002 obj_surface = vp9_state->golden_ref_obj;
4003 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
4005 if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
4006 vp9_state->frame_height == vp9_priv_surface->frame_height)
4007 vp9_state->dys_ref_frame_flag &= ~(VP9_GOLDEN_REF);
4009 if ((vp9_state->ref_frame_flag & VP9_ALT_REF) &&
4010 vp9_state->alt_ref_obj) {
4011 obj_surface = vp9_state->alt_ref_obj;
4012 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
4014 if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
4015 vp9_state->frame_height == vp9_priv_surface->frame_height)
4016 vp9_state->dys_ref_frame_flag &= ~(VP9_ALT_REF);
4018 if (vp9_state->dys_ref_frame_flag)
4019 vp9_state->dys_in_use = 1;
4022 if (vp9_state->hme_supported) {
4023 vp9_state->hme_enabled = 1;
4025 vp9_state->hme_enabled = 0;
4028 if (vp9_state->b16xme_supported) {
4029 vp9_state->b16xme_enabled = 1;
4031 vp9_state->b16xme_enabled = 0;
4034 /* disable HME/16xME if the size is too small */
4035 if (vp9_state->frame_width_4x <= VP9_VME_REF_WIN ||
4036 vp9_state->frame_height_4x <= VP9_VME_REF_WIN) {
4037 vp9_state->hme_enabled = 0;
4038 vp9_state->b16xme_enabled = 0;
4041 if (vp9_state->frame_width_16x < VP9_VME_REF_WIN ||
4042 vp9_state->frame_height_16x < VP9_VME_REF_WIN)
4043 vp9_state->b16xme_enabled = 0;
4045 if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME ||
4046 pic_param->pic_flags.bits.intra_only) {
4047 vp9_state->hme_enabled = 0;
4048 vp9_state->b16xme_enabled = 0;
4051 vp9_state->mbenc_keyframe_dist_enabled = 0;
4052 if ((vp9_state->picture_coding_type == KEY_FRAME) &&
4053 vp9_state->brc_distortion_buffer_supported)
4054 vp9_state->mbenc_keyframe_dist_enabled = 1;
4056 return VA_STATUS_SUCCESS;
4060 gen9_vme_gpe_kernel_prepare_vp9(VADriverContextP ctx,
4061 struct encode_state *encode_state,
4062 struct intel_encoder_context *encoder_context)
4064 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4065 struct vp9_surface_param surface_param;
4066 struct gen9_vp9_state *vp9_state;
4067 VAEncPictureParameterBufferVP9 *pic_param;
4068 struct object_surface *obj_surface;
4069 struct gen9_surface_vp9 *vp9_surface;
4070 int driver_header_flag = 0;
4073 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
4075 if (!vp9_state || !vp9_state->pic_param)
4076 return VA_STATUS_ERROR_INVALID_PARAMETER;
4078 pic_param = vp9_state->pic_param;
4080 /* this is to check whether the driver should generate the uncompressed header */
4081 driver_header_flag = 1;
4082 if (encode_state->packed_header_data_ext &&
4083 encode_state->packed_header_data_ext[0] &&
4084 pic_param->bit_offset_first_partition_size) {
4085 VAEncPackedHeaderParameterBuffer *param = NULL;
4087 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_params_ext[0]->buffer;
4089 if (param->type == VAEncPackedHeaderRawData) {
4091 unsigned int length_in_bits;
4093 header_data = (char *)encode_state->packed_header_data_ext[0]->buffer;
4094 length_in_bits = param->bit_length;
4095 driver_header_flag = 0;
4097 vp9_state->frame_header.bit_offset_first_partition_size =
4098 pic_param->bit_offset_first_partition_size;
4099 vp9_state->header_length = ALIGN(length_in_bits, 8) >> 3;
4100 vp9_state->alias_insert_data = header_data;
4102 vp9_state->frame_header.bit_offset_ref_lf_delta = pic_param->bit_offset_ref_lf_delta;
4103 vp9_state->frame_header.bit_offset_mode_lf_delta = pic_param->bit_offset_mode_lf_delta;
4104 vp9_state->frame_header.bit_offset_lf_level = pic_param->bit_offset_lf_level;
4105 vp9_state->frame_header.bit_offset_qindex = pic_param->bit_offset_qindex;
4106 vp9_state->frame_header.bit_offset_segmentation = pic_param->bit_offset_segmentation;
4107 vp9_state->frame_header.bit_size_segmentation = pic_param->bit_size_segmentation;
4111 if (driver_header_flag) {
4112 memset(&vp9_state->frame_header, 0, sizeof(vp9_state->frame_header));
4113 intel_write_uncompressed_header(encode_state,
4114 VAProfileVP9Profile0,
4115 vme_context->frame_header_data,
4116 &vp9_state->header_length,
4117 &vp9_state->frame_header);
4118 vp9_state->alias_insert_data = vme_context->frame_header_data;
4121 va_status = i965_check_alloc_surface_bo(ctx, encode_state->input_yuv_object,
4122 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
4123 if (va_status != VA_STATUS_SUCCESS)
4126 va_status = i965_check_alloc_surface_bo(ctx, encode_state->reconstructed_object,
4127 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
4129 if (va_status != VA_STATUS_SUCCESS)
4132 surface_param.frame_width = vp9_state->frame_width;
4133 surface_param.frame_height = vp9_state->frame_height;
4134 va_status = gen9_vp9_init_check_surfaces(ctx,
4135 encode_state->reconstructed_object,
4139 vp9_surface = (struct gen9_surface_vp9*)encode_state->reconstructed_object;
4141 vp9_surface->qp_value = pic_param->luma_ac_qindex + pic_param->luma_dc_qindex_delta;
4143 if (vp9_state->dys_in_use &&
4144 (pic_param->frame_width_src != pic_param->frame_width_dst ||
4145 pic_param->frame_height_src != pic_param->frame_height_dst)) {
4146 surface_param.frame_width = pic_param->frame_width_dst;
4147 surface_param.frame_height = pic_param->frame_height_dst;
4148 va_status = gen9_vp9_check_dys_surfaces(ctx,
4149 encode_state->reconstructed_object,
4156 if (vp9_state->dys_ref_frame_flag) {
4157 if ((vp9_state->dys_ref_frame_flag & VP9_LAST_REF) &&
4158 vp9_state->last_ref_obj) {
4159 obj_surface = vp9_state->last_ref_obj;
4160 surface_param.frame_width = vp9_state->frame_width;
4161 surface_param.frame_height = vp9_state->frame_height;
4162 va_status = gen9_vp9_check_dys_surfaces(ctx,
4169 if ((vp9_state->dys_ref_frame_flag & VP9_GOLDEN_REF) &&
4170 vp9_state->golden_ref_obj) {
4171 obj_surface = vp9_state->golden_ref_obj;
4172 surface_param.frame_width = vp9_state->frame_width;
4173 surface_param.frame_height = vp9_state->frame_height;
4174 va_status = gen9_vp9_check_dys_surfaces(ctx,
4181 if ((vp9_state->dys_ref_frame_flag & VP9_ALT_REF) &&
4182 vp9_state->alt_ref_obj) {
4183 obj_surface = vp9_state->alt_ref_obj;
4184 surface_param.frame_width = vp9_state->frame_width;
4185 surface_param.frame_height = vp9_state->frame_height;
4186 va_status = gen9_vp9_check_dys_surfaces(ctx,
4195 if (va_status != VA_STATUS_SUCCESS)
4197 /* check the corresponding ref_frame_flag && dys_ref_frame_flag */
4199 return VA_STATUS_SUCCESS;
4203 gen9_vme_gpe_kernel_init_vp9(VADriverContextP ctx,
4204 struct encode_state *encode_state,
4205 struct intel_encoder_context *encoder_context)
4207 struct i965_driver_data *i965 = i965_driver_data(ctx);
4208 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4209 struct vp9_mbenc_context *mbenc_context = &vme_context->mbenc_context;
4210 struct vp9_dys_context *dys_context = &vme_context->dys_context;
4211 struct gpe_dynamic_state_parameter ds_param;
4215 * BRC will update MBEnc curbe data buffer, so initialize GPE context for
4218 for (i = 0; i < NUM_VP9_MBENC; i++) {
4219 gen8_gpe_context_init(ctx, &mbenc_context->gpe_contexts[i]);
4223 * VP9_MBENC_XXX uses the same dynamic state buffer as they share the same
4226 ds_param.bo_size = ALIGN(sizeof(vp9_mbenc_curbe_data), 64) + 128 +
4227 ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * NUM_VP9_MBENC;
4228 mbenc_context->mbenc_bo_dys = dri_bo_alloc(i965->intel.bufmgr,
4232 mbenc_context->mbenc_bo_size = ds_param.bo_size;
4234 ds_param.bo = mbenc_context->mbenc_bo_dys;
4235 ds_param.curbe_offset = 0;
4236 ds_param.sampler_offset = ALIGN(sizeof(vp9_mbenc_curbe_data), 64);
4237 for (i = 0; i < NUM_VP9_MBENC; i++) {
4238 ds_param.idrt_offset = ds_param.sampler_offset + 128 +
4239 ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * i;
4241 gen8_gpe_context_set_dynamic_buffer(ctx,
4242 &mbenc_context->gpe_contexts[i],
4246 gen8_gpe_context_init(ctx, &dys_context->gpe_context);
4247 gen9_vp9_dys_set_sampler_state(&dys_context->gpe_context);
4249 return VA_STATUS_SUCCESS;
4253 gen9_vme_gpe_kernel_final_vp9(VADriverContextP ctx,
4254 struct encode_state *encode_state,
4255 struct intel_encoder_context *encoder_context)
4257 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4258 struct vp9_mbenc_context *mbenc_context = &vme_context->mbenc_context;
4260 dri_bo_unreference(mbenc_context->mbenc_bo_dys);
4261 mbenc_context->mbenc_bo_dys = NULL;
4263 return VA_STATUS_SUCCESS;
4267 gen9_vme_gpe_kernel_run_vp9(VADriverContextP ctx,
4268 struct encode_state *encode_state,
4269 struct intel_encoder_context *encoder_context)
4271 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4272 struct gen9_vp9_state *vp9_state;
4275 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
4277 if (!vp9_state || !vp9_state->pic_param)
4278 return VA_STATUS_ERROR_INVALID_PARAMETER;
4280 if (vp9_state->dys_in_use) {
4281 gen9_vp9_run_dys_refframes(ctx, encode_state, encoder_context);
4284 if (vp9_state->brc_enabled && (vp9_state->brc_reset || !vp9_state->brc_inited)) {
4285 gen9_vp9_brc_init_reset_kernel(ctx, encode_state, encoder_context);
4288 if (vp9_state->picture_coding_type == KEY_FRAME) {
4289 for (i = 0; i < 2; i++)
4290 i965_zero_gpe_resource(&vme_context->res_mode_decision[i]);
4293 if (vp9_state->hme_supported) {
4294 gen9_vp9_scaling_kernel(ctx, encode_state,
4297 if (vp9_state->b16xme_supported) {
4298 gen9_vp9_scaling_kernel(ctx, encode_state,
4304 if (vp9_state->picture_coding_type && vp9_state->hme_enabled) {
4305 if (vp9_state->b16xme_enabled)
4306 gen9_vp9_me_kernel(ctx, encode_state,
4310 gen9_vp9_me_kernel(ctx, encode_state,
4315 if (vp9_state->brc_enabled) {
4316 if (vp9_state->mbenc_keyframe_dist_enabled)
4317 gen9_vp9_brc_intra_dist_kernel(ctx,
4321 gen9_vp9_brc_update_kernel(ctx, encode_state,
4325 if (vp9_state->picture_coding_type == KEY_FRAME) {
4326 gen9_vp9_mbenc_kernel(ctx, encode_state,
4328 VP9_MEDIA_STATE_MBENC_I_32x32);
4329 gen9_vp9_mbenc_kernel(ctx, encode_state,
4331 VP9_MEDIA_STATE_MBENC_I_16x16);
4333 gen9_vp9_mbenc_kernel(ctx, encode_state,
4335 VP9_MEDIA_STATE_MBENC_P);
4338 gen9_vp9_mbenc_kernel(ctx, encode_state,
4340 VP9_MEDIA_STATE_MBENC_TX);
4342 vp9_state->curr_mode_decision_index ^= 1;
4343 if (vp9_state->brc_enabled) {
4344 vp9_state->brc_inited = 1;
4345 vp9_state->brc_reset = 0;
4348 return VA_STATUS_SUCCESS;
4352 gen9_vme_pipeline_vp9(VADriverContextP ctx,
4354 struct encode_state *encode_state,
4355 struct intel_encoder_context *encoder_context)
4358 struct gen9_vp9_state *vp9_state;
4360 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
4363 return VA_STATUS_ERROR_INVALID_CONTEXT;
4365 va_status = gen9_encode_vp9_check_parameter(ctx, encode_state, encoder_context);
4366 if (va_status != VA_STATUS_SUCCESS)
4369 va_status = gen9_vp9_allocate_resources(ctx, encode_state,
4371 !vp9_state->brc_allocated);
4373 if (va_status != VA_STATUS_SUCCESS)
4375 vp9_state->brc_allocated = 1;
4377 va_status = gen9_vme_gpe_kernel_prepare_vp9(ctx, encode_state, encoder_context);
4379 if (va_status != VA_STATUS_SUCCESS)
4382 va_status = gen9_vme_gpe_kernel_init_vp9(ctx, encode_state, encoder_context);
4383 if (va_status != VA_STATUS_SUCCESS)
4386 va_status = gen9_vme_gpe_kernel_run_vp9(ctx, encode_state, encoder_context);
4387 if (va_status != VA_STATUS_SUCCESS)
4390 gen9_vme_gpe_kernel_final_vp9(ctx, encode_state, encoder_context);
4392 return VA_STATUS_SUCCESS;
4396 gen9_vme_brc_context_destroy_vp9(struct vp9_brc_context *brc_context)
4400 for (i = 0; i < NUM_VP9_BRC; i++)
4401 gen8_gpe_context_destroy(&brc_context->gpe_contexts[i]);
4405 gen9_vme_scaling_context_destroy_vp9(struct vp9_scaling_context *scaling_context)
4409 for (i = 0; i < NUM_VP9_SCALING; i++)
4410 gen8_gpe_context_destroy(&scaling_context->gpe_contexts[i]);
4414 gen9_vme_me_context_destroy_vp9(struct vp9_me_context *me_context)
4416 gen8_gpe_context_destroy(&me_context->gpe_context);
4420 gen9_vme_mbenc_context_destroy_vp9(struct vp9_mbenc_context *mbenc_context)
4424 for (i = 0; i < NUM_VP9_MBENC; i++)
4425 gen8_gpe_context_destroy(&mbenc_context->gpe_contexts[i]);
4426 dri_bo_unreference(mbenc_context->mbenc_bo_dys);
4427 mbenc_context->mbenc_bo_size = 0;
4431 gen9_vme_dys_context_destroy_vp9(struct vp9_dys_context *dys_context)
4433 gen8_gpe_context_destroy(&dys_context->gpe_context);
4437 gen9_vme_kernel_context_destroy_vp9(struct gen9_encoder_context_vp9 *vme_context)
4439 gen9_vp9_free_resources(vme_context);
4440 gen9_vme_scaling_context_destroy_vp9(&vme_context->scaling_context);
4441 gen9_vme_me_context_destroy_vp9(&vme_context->me_context);
4442 gen9_vme_mbenc_context_destroy_vp9(&vme_context->mbenc_context);
4443 gen9_vme_brc_context_destroy_vp9(&vme_context->brc_context);
4444 gen9_vme_dys_context_destroy_vp9(&vme_context->dys_context);
4450 gen9_vme_context_destroy_vp9(void *context)
4452 struct gen9_encoder_context_vp9 *vme_context = context;
4457 gen9_vme_kernel_context_destroy_vp9(vme_context);
4465 gen9_vme_scaling_context_init_vp9(VADriverContextP ctx,
4466 struct gen9_encoder_context_vp9 *vme_context,
4467 struct vp9_scaling_context *scaling_context)
4469 struct i965_gpe_context *gpe_context = NULL;
4470 struct vp9_encoder_kernel_parameter kernel_param;
4471 struct vp9_encoder_scoreboard_parameter scoreboard_param;
4472 struct i965_kernel scale_kernel;
4474 kernel_param.curbe_size = sizeof(vp9_scaling4x_curbe_data_cm);
4475 kernel_param.inline_data_size = sizeof(vp9_scaling4x_inline_data_cm);
4476 kernel_param.sampler_size = 0;
4478 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4479 scoreboard_param.mask = 0xFF;
4480 scoreboard_param.enable = vme_context->use_hw_scoreboard;
4481 scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4482 scoreboard_param.walkpat_flag = 0;
4484 gpe_context = &scaling_context->gpe_contexts[0];
4485 gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4486 gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4488 scaling_context->scaling_4x_bti.scaling_frame_src_y = VP9_BTI_SCALING_FRAME_SRC_Y;
4489 scaling_context->scaling_4x_bti.scaling_frame_dst_y = VP9_BTI_SCALING_FRAME_DST_Y;
4490 scaling_context->scaling_4x_bti.scaling_frame_mbv_proc_stat_dst =
4491 VP9_BTI_SCALING_FRAME_MBVPROCSTATS_DST_CM;
4493 memset(&scale_kernel, 0, sizeof(scale_kernel));
4495 intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4496 sizeof(media_vp9_kernels),
4497 INTEL_VP9_ENC_SCALING4X,
4501 gen8_gpe_load_kernels(ctx,
4506 kernel_param.curbe_size = sizeof(vp9_scaling2x_curbe_data_cm);
4507 kernel_param.inline_data_size = 0;
4508 kernel_param.sampler_size = 0;
4510 gpe_context = &scaling_context->gpe_contexts[1];
4511 gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4512 gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4514 memset(&scale_kernel, 0, sizeof(scale_kernel));
4516 intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4517 sizeof(media_vp9_kernels),
4518 INTEL_VP9_ENC_SCALING2X,
4522 gen8_gpe_load_kernels(ctx,
4527 scaling_context->scaling_2x_bti.scaling_frame_src_y = VP9_BTI_SCALING_FRAME_SRC_Y;
4528 scaling_context->scaling_2x_bti.scaling_frame_dst_y = VP9_BTI_SCALING_FRAME_DST_Y;
4533 gen9_vme_me_context_init_vp9(VADriverContextP ctx,
4534 struct gen9_encoder_context_vp9 *vme_context,
4535 struct vp9_me_context *me_context)
4537 struct i965_gpe_context *gpe_context = NULL;
4538 struct vp9_encoder_kernel_parameter kernel_param;
4539 struct vp9_encoder_scoreboard_parameter scoreboard_param;
4540 struct i965_kernel scale_kernel;
4542 kernel_param.curbe_size = sizeof(vp9_me_curbe_data);
4543 kernel_param.inline_data_size = 0;
4544 kernel_param.sampler_size = 0;
4546 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4547 scoreboard_param.mask = 0xFF;
4548 scoreboard_param.enable = vme_context->use_hw_scoreboard;
4549 scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4550 scoreboard_param.walkpat_flag = 0;
4552 gpe_context = &me_context->gpe_context;
4553 gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4554 gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4556 memset(&scale_kernel, 0, sizeof(scale_kernel));
4558 intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4559 sizeof(media_vp9_kernels),
4564 gen8_gpe_load_kernels(ctx,
4573 gen9_vme_mbenc_context_init_vp9(VADriverContextP ctx,
4574 struct gen9_encoder_context_vp9 *vme_context,
4575 struct vp9_mbenc_context *mbenc_context)
4577 struct i965_gpe_context *gpe_context = NULL;
4578 struct vp9_encoder_kernel_parameter kernel_param;
4579 struct vp9_encoder_scoreboard_parameter scoreboard_param;
4581 struct i965_kernel scale_kernel;
4583 kernel_param.curbe_size = sizeof(vp9_mbenc_curbe_data);
4584 kernel_param.inline_data_size = 0;
4585 kernel_param.sampler_size = 0;
4587 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4588 scoreboard_param.mask = 0xFF;
4589 scoreboard_param.enable = vme_context->use_hw_scoreboard;
4590 scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4592 for (i = 0; i < NUM_VP9_MBENC; i++) {
4593 gpe_context = &mbenc_context->gpe_contexts[i];
4595 if ((i == VP9_MBENC_IDX_KEY_16x16) ||
4596 (i == VP9_MBENC_IDX_INTER)) {
4597 scoreboard_param.walkpat_flag = 1;
4599 scoreboard_param.walkpat_flag = 0;
4601 gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4602 gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4604 memset(&scale_kernel, 0, sizeof(scale_kernel));
4606 intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4607 sizeof(media_vp9_kernels),
4608 INTEL_VP9_ENC_MBENC,
4612 gen8_gpe_load_kernels(ctx,
4620 gen9_vme_brc_context_init_vp9(VADriverContextP ctx,
4621 struct gen9_encoder_context_vp9 *vme_context,
4622 struct vp9_brc_context *brc_context)
4624 struct i965_gpe_context *gpe_context = NULL;
4625 struct vp9_encoder_kernel_parameter kernel_param;
4626 struct vp9_encoder_scoreboard_parameter scoreboard_param;
4628 struct i965_kernel scale_kernel;
4630 kernel_param.curbe_size = sizeof(vp9_brc_curbe_data);
4631 kernel_param.inline_data_size = 0;
4632 kernel_param.sampler_size = 0;
4634 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4635 scoreboard_param.mask = 0xFF;
4636 scoreboard_param.enable = vme_context->use_hw_scoreboard;
4637 scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4639 for (i = 0; i < NUM_VP9_BRC; i++) {
4640 gpe_context = &brc_context->gpe_contexts[i];
4641 gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4642 gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4644 memset(&scale_kernel, 0, sizeof(scale_kernel));
4646 intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4647 sizeof(media_vp9_kernels),
4652 gen8_gpe_load_kernels(ctx,
4660 gen9_vme_dys_context_init_vp9(VADriverContextP ctx,
4661 struct gen9_encoder_context_vp9 *vme_context,
4662 struct vp9_dys_context *dys_context)
4664 struct i965_gpe_context *gpe_context = NULL;
4665 struct vp9_encoder_kernel_parameter kernel_param;
4666 struct vp9_encoder_scoreboard_parameter scoreboard_param;
4667 struct i965_kernel scale_kernel;
4669 kernel_param.curbe_size = sizeof(vp9_dys_curbe_data);
4670 kernel_param.inline_data_size = 0;
4671 kernel_param.sampler_size = sizeof(struct gen9_sampler_8x8_avs);
4673 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4674 scoreboard_param.mask = 0xFF;
4675 scoreboard_param.enable = vme_context->use_hw_scoreboard;
4676 scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4677 scoreboard_param.walkpat_flag = 0;
4679 gpe_context = &dys_context->gpe_context;
4680 gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4681 gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4683 memset(&scale_kernel, 0, sizeof(scale_kernel));
4685 intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4686 sizeof(media_vp9_kernels),
4691 gen8_gpe_load_kernels(ctx,
4700 gen9_vme_kernels_context_init_vp9(VADriverContextP ctx,
4701 struct intel_encoder_context *encoder_context,
4702 struct gen9_encoder_context_vp9 *vme_context)
4704 gen9_vme_scaling_context_init_vp9(ctx, vme_context, &vme_context->scaling_context);
4705 gen9_vme_me_context_init_vp9(ctx, vme_context, &vme_context->me_context);
4706 gen9_vme_mbenc_context_init_vp9(ctx, vme_context, &vme_context->mbenc_context);
4707 gen9_vme_dys_context_init_vp9(ctx, vme_context, &vme_context->dys_context);
4708 gen9_vme_brc_context_init_vp9(ctx, vme_context, &vme_context->brc_context);
4710 vme_context->pfn_set_curbe_brc = gen9_vp9_set_curbe_brc;
4711 vme_context->pfn_set_curbe_me = gen9_vp9_set_curbe_me;
4712 vme_context->pfn_send_me_surface = gen9_vp9_send_me_surface;
4713 vme_context->pfn_send_scaling_surface = gen9_vp9_send_scaling_surface;
4715 vme_context->pfn_set_curbe_scaling = gen9_vp9_set_curbe_scaling_cm;
4717 vme_context->pfn_send_dys_surface = gen9_vp9_send_dys_surface;
4718 vme_context->pfn_set_curbe_dys = gen9_vp9_set_curbe_dys;
4719 vme_context->pfn_set_curbe_mbenc = gen9_vp9_set_curbe_mbenc;
4720 vme_context->pfn_send_mbenc_surface = gen9_vp9_send_mbenc_surface;
4725 void gen9_vp9_write_compressed_element(char *buffer,
4730 struct vp9_compressed_element *base_element, *vp9_element;
4731 base_element = (struct vp9_compressed_element *)buffer;
4733 vp9_element = base_element + (index >> 1);
4735 vp9_element->b_valid = 1;
4736 vp9_element->b_probdiff_select = 1;
4737 vp9_element->b_prob_select = (prob == 252) ? 1: 0;
4738 vp9_element->b_bin = value;
4740 vp9_element->a_valid = 1;
4741 vp9_element->a_probdiff_select = 1;
4742 vp9_element->a_prob_select = (prob == 252) ? 1: 0;
4743 vp9_element->a_bin = value;
4748 intel_vp9enc_refresh_frame_internal_buffers(VADriverContextP ctx,
4749 struct intel_encoder_context *encoder_context)
4751 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
4752 VAEncPictureParameterBufferVP9 *pic_param;
4753 struct gen9_vp9_state *vp9_state;
4757 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
4759 if (!pak_context || !vp9_state || !vp9_state->pic_param)
4762 pic_param = vp9_state->pic_param;
4763 if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
4764 (pic_param->pic_flags.bits.intra_only) ||
4765 pic_param->pic_flags.bits.error_resilient_mode) {
4766 /* reset current frame_context */
4767 intel_init_default_vp9_probs(&vp9_state->vp9_current_fc);
4768 if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
4769 pic_param->pic_flags.bits.error_resilient_mode ||
4770 (pic_param->pic_flags.bits.reset_frame_context == 3)) {
4771 for (i = 0; i < 4; i++)
4772 memcpy(&vp9_state->vp9_frame_ctx[i],
4773 &vp9_state->vp9_current_fc,
4774 sizeof(FRAME_CONTEXT));
4775 } else if (pic_param->pic_flags.bits.reset_frame_context == 2) {
4776 i = pic_param->pic_flags.bits.frame_context_idx;
4777 memcpy(&vp9_state->vp9_frame_ctx[i],
4778 &vp9_state->vp9_current_fc, sizeof(FRAME_CONTEXT));
4780 /* reset the frame_ctx_idx = 0 */
4781 vp9_state->frame_ctx_idx = 0;
4783 vp9_state->frame_ctx_idx = pic_param->pic_flags.bits.frame_context_idx;
4786 i965_zero_gpe_resource(&pak_context->res_compressed_input_buffer);
4787 buffer = i965_map_gpe_resource(&pak_context->res_compressed_input_buffer);
4793 if ((pic_param->luma_ac_qindex == 0) &&
4794 (pic_param->luma_dc_qindex_delta == 0) &&
4795 (pic_param->chroma_ac_qindex_delta == 0) &&
4796 (pic_param->chroma_dc_qindex_delta == 0)) {
4798 /* nothing is needed */
4799 gen9_vp9_write_compressed_element(buffer,
4801 gen9_vp9_write_compressed_element(buffer,
4803 gen9_vp9_write_compressed_element(buffer,
4806 if (vp9_state->tx_mode == TX_MODE_SELECT) {
4807 gen9_vp9_write_compressed_element(buffer,
4809 gen9_vp9_write_compressed_element(buffer,
4811 gen9_vp9_write_compressed_element(buffer,
4813 } else if (vp9_state->tx_mode == ALLOW_32X32) {
4814 gen9_vp9_write_compressed_element(buffer,
4816 gen9_vp9_write_compressed_element(buffer,
4818 gen9_vp9_write_compressed_element(buffer,
4821 unsigned int tx_mode;
4823 tx_mode = vp9_state->tx_mode;
4824 gen9_vp9_write_compressed_element(buffer,
4825 0, 128, ((tx_mode) & 2));
4826 gen9_vp9_write_compressed_element(buffer,
4827 1, 128, ((tx_mode) & 1));
4828 gen9_vp9_write_compressed_element(buffer,
4832 if (vp9_state->tx_mode == TX_MODE_SELECT) {
4834 gen9_vp9_write_compressed_element(buffer,
4837 gen9_vp9_write_compressed_element(buffer,
4840 gen9_vp9_write_compressed_element(buffer,
4844 /*Setup all the input&output object*/
4847 /* update the coeff_update flag */
4848 gen9_vp9_write_compressed_element(buffer,
4850 gen9_vp9_write_compressed_element(buffer,
4852 gen9_vp9_write_compressed_element(buffer,
4854 gen9_vp9_write_compressed_element(buffer,
4859 if (pic_param->pic_flags.bits.frame_type && !pic_param->pic_flags.bits.intra_only)
4861 bool allow_comp = !(
4862 (pic_param->ref_flags.bits.ref_last_sign_bias && pic_param->ref_flags.bits.ref_gf_sign_bias && pic_param->ref_flags.bits.ref_arf_sign_bias) ||
4863 (!pic_param->ref_flags.bits.ref_last_sign_bias && !pic_param->ref_flags.bits.ref_gf_sign_bias && !pic_param->ref_flags.bits.ref_arf_sign_bias)
4868 if (pic_param->pic_flags.bits.comp_prediction_mode == REFERENCE_MODE_SELECT) {
4869 gen9_vp9_write_compressed_element(buffer,
4871 gen9_vp9_write_compressed_element(buffer,
4874 else if (pic_param->pic_flags.bits.comp_prediction_mode == COMPOUND_REFERENCE) {
4875 gen9_vp9_write_compressed_element(buffer,
4877 gen9_vp9_write_compressed_element(buffer,
4882 gen9_vp9_write_compressed_element(buffer,
4884 gen9_vp9_write_compressed_element(buffer,
4890 i965_unmap_gpe_resource(&pak_context->res_compressed_input_buffer);
4895 gen9_pak_vp9_pipe_mode_select(VADriverContextP ctx,
4896 struct encode_state *encode_state,
4897 struct intel_encoder_context *encoder_context,
4898 struct gen9_hcpe_pipe_mode_select_param *pipe_mode_param)
4900 struct intel_batchbuffer *batch = encoder_context->base.batch;
4902 BEGIN_BCS_BATCH(batch, 6);
4904 OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (6 - 2));
4905 OUT_BCS_BATCH(batch,
4906 (pipe_mode_param->stream_out << 12) |
4907 (pipe_mode_param->codec_mode << 5) |
4908 (0 << 3) | /* disable Pic Status / Error Report */
4909 (pipe_mode_param->stream_out << 2) |
4910 HCP_CODEC_SELECT_ENCODE);
4911 OUT_BCS_BATCH(batch, 0);
4912 OUT_BCS_BATCH(batch, 0);
4913 OUT_BCS_BATCH(batch, (1 << 6));
4914 OUT_BCS_BATCH(batch, 0);
4916 ADVANCE_BCS_BATCH(batch);
4920 gen9_vp9_add_surface_state(VADriverContextP ctx,
4921 struct encode_state *encode_state,
4922 struct intel_encoder_context *encoder_context,
4923 hcp_surface_state *hcp_state)
4925 struct intel_batchbuffer *batch = encoder_context->base.batch;
4929 BEGIN_BCS_BATCH(batch, 3);
4930 OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
4931 OUT_BCS_BATCH(batch,
4932 (hcp_state->dw1.surface_id << 28) |
4933 (hcp_state->dw1.surface_pitch - 1)
4935 OUT_BCS_BATCH(batch,
4936 (hcp_state->dw2.surface_format << 28) |
4937 (hcp_state->dw2.y_cb_offset)
4939 ADVANCE_BCS_BATCH(batch);
4943 gen9_pak_vp9_pipe_buf_addr_state(VADriverContextP ctx,
4944 struct encode_state *encode_state,
4945 struct intel_encoder_context *encoder_context)
4947 struct intel_batchbuffer *batch = encoder_context->base.batch;
4948 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
4949 struct gen9_vp9_state *vp9_state;
4951 struct object_surface *obj_surface;
4953 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
4955 if (!vp9_state || !vp9_state->pic_param)
4959 BEGIN_BCS_BATCH(batch, 104);
4961 OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (104 - 2));
4963 obj_surface = encode_state->reconstructed_object;
4965 /* reconstructed obj_surface is already checked. So this is skipped */
4966 /* DW 1..3 decoded surface */
4969 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4971 OUT_BCS_BATCH(batch, 0);
4973 /* DW 4..6 deblocking line */
4975 pak_context->res_deblocking_filter_line_buffer.bo,
4976 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4978 OUT_BCS_BATCH(batch, 0);
4980 /* DW 7..9 deblocking tile line */
4982 pak_context->res_deblocking_filter_tile_line_buffer.bo,
4983 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4985 OUT_BCS_BATCH(batch, 0);
4987 /* DW 10..12 deblocking tile col */
4989 pak_context->res_deblocking_filter_tile_col_buffer.bo,
4990 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4992 OUT_BCS_BATCH(batch, 0);
4994 /* DW 13..15 metadata line */
4996 pak_context->res_metadata_line_buffer.bo,
4997 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4999 OUT_BCS_BATCH(batch, 0);
5001 /* DW 16..18 metadata tile line */
5003 pak_context->res_metadata_tile_line_buffer.bo,
5004 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5006 OUT_BCS_BATCH(batch, 0);
5008 /* DW 19..21 metadata tile col */
5010 pak_context->res_metadata_tile_col_buffer.bo,
5011 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5013 OUT_BCS_BATCH(batch, 0);
5015 /* DW 22..30 SAO is not used for VP9 */
5016 OUT_BCS_BATCH(batch, 0);
5017 OUT_BCS_BATCH(batch, 0);
5018 OUT_BCS_BATCH(batch, 0);
5019 OUT_BCS_BATCH(batch, 0);
5020 OUT_BCS_BATCH(batch, 0);
5021 OUT_BCS_BATCH(batch, 0);
5022 OUT_BCS_BATCH(batch, 0);
5023 OUT_BCS_BATCH(batch, 0);
5024 OUT_BCS_BATCH(batch, 0);
5026 /* DW 31..33 Current Motion vector temporal buffer */
5028 pak_context->res_mv_temporal_buffer[vp9_state->curr_mv_temporal_index].bo,
5029 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5031 OUT_BCS_BATCH(batch, 0);
5033 /* DW 34..36 Not used */
5034 OUT_BCS_BATCH(batch, 0);
5035 OUT_BCS_BATCH(batch, 0);
5036 OUT_BCS_BATCH(batch, 0);
5038 /* Only the first three reference_frame is used for VP9 */
5039 /* DW 37..52 for reference_frame */
5041 if (vp9_state->picture_coding_type) {
5042 for (i = 0; i < 3; i++) {
5044 if (pak_context->reference_surfaces[i].bo) {
5046 pak_context->reference_surfaces[i].bo,
5047 I915_GEM_DOMAIN_INSTRUCTION, 0,
5050 OUT_BCS_BATCH(batch, 0);
5051 OUT_BCS_BATCH(batch, 0);
5056 for (; i < 8; i++) {
5057 OUT_BCS_BATCH(batch, 0);
5058 OUT_BCS_BATCH(batch, 0);
5061 OUT_BCS_BATCH(batch, 0);
5063 /* DW 54..56 for source input */
5065 pak_context->uncompressed_picture_source.bo,
5066 I915_GEM_DOMAIN_INSTRUCTION, 0,
5068 OUT_BCS_BATCH(batch, 0);
5070 /* DW 57..59 StreamOut is not used */
5071 OUT_BCS_BATCH(batch, 0);
5072 OUT_BCS_BATCH(batch, 0);
5073 OUT_BCS_BATCH(batch, 0);
5075 /* DW 60..62. Not used for encoder */
5076 OUT_BCS_BATCH(batch, 0);
5077 OUT_BCS_BATCH(batch, 0);
5078 OUT_BCS_BATCH(batch, 0);
5080 /* DW 63..65. ILDB Not used for encoder */
5081 OUT_BCS_BATCH(batch, 0);
5082 OUT_BCS_BATCH(batch, 0);
5083 OUT_BCS_BATCH(batch, 0);
5085 /* DW 66..81 For the collocated motion vector temporal buffer */
5086 if (vp9_state->picture_coding_type) {
5087 int prev_index = vp9_state->curr_mv_temporal_index ^ 0x01;
5089 pak_context->res_mv_temporal_buffer[prev_index].bo,
5090 I915_GEM_DOMAIN_INSTRUCTION, 0,
5093 OUT_BCS_BATCH(batch, 0);
5094 OUT_BCS_BATCH(batch, 0);
5097 for (i = 1; i < 8; i++) {
5098 OUT_BCS_BATCH(batch, 0);
5099 OUT_BCS_BATCH(batch, 0);
5101 OUT_BCS_BATCH(batch, 0);
5103 /* DW 83..85 VP9 prob buffer */
5105 pak_context->res_prob_buffer.bo,
5106 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5109 OUT_BCS_BATCH(batch, 0);
5111 /* DW 86..88 Segment id buffer */
5112 if (pak_context->res_segmentid_buffer.bo) {
5114 pak_context->res_segmentid_buffer.bo,
5115 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5118 OUT_BCS_BATCH(batch, 0);
5119 OUT_BCS_BATCH(batch, 0);
5121 OUT_BCS_BATCH(batch, 0);
5123 /* DW 89..91 HVD line rowstore buffer */
5125 pak_context->res_hvd_line_buffer.bo,
5126 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5128 OUT_BCS_BATCH(batch, 0);
5130 /* DW 92..94 HVD tile line rowstore buffer */
5132 pak_context->res_hvd_tile_line_buffer.bo,
5133 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5135 OUT_BCS_BATCH(batch, 0);
5137 /* DW 95..97 SAO streamout. Not used for VP9 */
5138 OUT_BCS_BATCH(batch, 0);
5139 OUT_BCS_BATCH(batch, 0);
5140 OUT_BCS_BATCH(batch, 0);
5142 /* reserved for KBL. 98..100 */
5143 OUT_BCS_BATCH(batch, 0);
5144 OUT_BCS_BATCH(batch, 0);
5145 OUT_BCS_BATCH(batch, 0);
5148 OUT_BCS_BATCH(batch, 0);
5149 OUT_BCS_BATCH(batch, 0);
5150 OUT_BCS_BATCH(batch, 0);
5152 ADVANCE_BCS_BATCH(batch);
5156 gen9_pak_vp9_ind_obj_base_addr_state(VADriverContextP ctx,
5157 struct encode_state *encode_state,
5158 struct intel_encoder_context *encoder_context)
5160 struct intel_batchbuffer *batch = encoder_context->base.batch;
5161 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5162 struct gen9_vp9_state *vp9_state;
5164 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5167 BEGIN_BCS_BATCH(batch, 29);
5169 OUT_BCS_BATCH(batch, HCP_IND_OBJ_BASE_ADDR_STATE | (29 - 2));
5171 /* indirect bitstream object base */
5172 OUT_BCS_BATCH(batch, 0);
5173 OUT_BCS_BATCH(batch, 0);
5174 OUT_BCS_BATCH(batch, 0);
5175 /* the upper bound of indirect bitstream object */
5176 OUT_BCS_BATCH(batch, 0);
5177 OUT_BCS_BATCH(batch, 0);
5179 /* DW 6: Indirect CU object base address */
5181 pak_context->res_mb_code_surface.bo,
5182 I915_GEM_DOMAIN_INSTRUCTION, 0, /* No write domain */
5183 vp9_state->mb_data_offset);
5184 /* default attribute */
5185 OUT_BCS_BATCH(batch, 0);
5187 /* DW 9..11, PAK-BSE */
5189 pak_context->indirect_pak_bse_object.bo,
5190 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5191 pak_context->indirect_pak_bse_object.offset);
5192 OUT_BCS_BATCH(batch, 0);
5194 /* DW 12..13 upper bound */
5196 pak_context->indirect_pak_bse_object.bo,
5197 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5198 pak_context->indirect_pak_bse_object.end_offset);
5200 /* DW 14..16 compressed header buffer */
5202 pak_context->res_compressed_input_buffer.bo,
5203 I915_GEM_DOMAIN_INSTRUCTION, 0,
5205 OUT_BCS_BATCH(batch, 0);
5207 /* DW 17..19 prob counter streamout */
5209 pak_context->res_prob_counter_buffer.bo,
5210 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5212 OUT_BCS_BATCH(batch, 0);
5214 /* DW 20..22 prob delta streamin */
5216 pak_context->res_prob_delta_buffer.bo,
5217 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5219 OUT_BCS_BATCH(batch, 0);
5221 /* DW 23..25 Tile record streamout */
5223 pak_context->res_tile_record_streamout_buffer.bo,
5224 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5226 OUT_BCS_BATCH(batch, 0);
5228 /* DW 26..28 CU record streamout */
5230 pak_context->res_cu_stat_streamout_buffer.bo,
5231 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5233 OUT_BCS_BATCH(batch, 0);
5235 ADVANCE_BCS_BATCH(batch);
5239 gen9_pak_vp9_segment_state(VADriverContextP ctx,
5240 struct encode_state *encode_state,
5241 struct intel_encoder_context *encoder_context,
5242 VAEncSegParamVP9 *seg_param, uint8_t seg_id)
5244 struct intel_batchbuffer *batch = encoder_context->base.batch;
5245 uint32_t batch_value, tmp;
5246 VAEncPictureParameterBufferVP9 *pic_param;
5248 if (!encode_state->pic_param_ext ||
5249 !encode_state->pic_param_ext->buffer) {
5253 pic_param = (VAEncPictureParameterBufferVP9 *)encode_state->pic_param_ext->buffer;
5255 batch_value = seg_param->seg_flags.bits.segment_reference;
5256 if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME ||
5257 pic_param->pic_flags.bits.intra_only)
5260 BEGIN_BCS_BATCH(batch, 8);
5262 OUT_BCS_BATCH(batch, HCP_VP9_SEGMENT_STATE | (8 - 2));
5263 OUT_BCS_BATCH(batch, seg_id << 0); /* DW 1 - SegmentID */
5264 OUT_BCS_BATCH(batch,
5265 (seg_param->seg_flags.bits.segment_reference_enabled << 3) |
5266 (batch_value << 1) |
5267 (seg_param->seg_flags.bits.segment_reference_skipped <<0)
5270 /* DW 3..6 is not used for encoder */
5271 OUT_BCS_BATCH(batch, 0);
5272 OUT_BCS_BATCH(batch, 0);
5273 OUT_BCS_BATCH(batch, 0);
5274 OUT_BCS_BATCH(batch, 0);
5277 tmp = intel_convert_sign_mag(seg_param->segment_qindex_delta, 9);
5279 tmp = intel_convert_sign_mag(seg_param->segment_lf_level_delta, 7);
5280 batch_value |= (tmp << 16);
5281 OUT_BCS_BATCH(batch, batch_value);
5283 ADVANCE_BCS_BATCH(batch);
5288 intel_vp9enc_construct_pak_insertobj_batchbuffer(VADriverContextP ctx,
5289 struct intel_encoder_context *encoder_context,
5290 struct i965_gpe_resource *obj_batch_buffer)
5292 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5293 struct gen9_vp9_state *vp9_state;
5294 int uncompressed_header_length;
5295 unsigned int *cmd_ptr;
5296 unsigned int dw_length, bits_in_last_dw;
5298 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5300 if (!pak_context || !vp9_state || !vp9_state->pic_param)
5303 uncompressed_header_length = vp9_state->header_length;
5304 cmd_ptr = i965_map_gpe_resource(obj_batch_buffer);
5309 bits_in_last_dw = uncompressed_header_length % 4;
5310 bits_in_last_dw *= 8;
5312 if (bits_in_last_dw == 0)
5313 bits_in_last_dw = 32;
5315 /* get the DWORD length of the inserted_data */
5316 dw_length = ALIGN(uncompressed_header_length, 4) / 4;
5317 *cmd_ptr++ = HCP_INSERT_PAK_OBJECT | dw_length;
5319 *cmd_ptr++ = ((0 << 31) | /* indirect payload */
5320 (0 << 16) | /* the start offset in first DW */
5322 (bits_in_last_dw << 8) | /* bits_in_last_dw */
5323 (0 << 4) | /* skip emulation byte count. 0 for VP9 */
5324 (0 << 3) | /* emulation flag. 0 for VP9 */
5325 (1 << 2) | /* last header flag. */
5327 memcpy(cmd_ptr, vp9_state->alias_insert_data, dw_length * sizeof(unsigned int));
5329 cmd_ptr += dw_length;
5331 *cmd_ptr++ = MI_NOOP;
5332 *cmd_ptr++ = MI_BATCH_BUFFER_END;
5333 i965_unmap_gpe_resource(obj_batch_buffer);
5337 gen9_vp9_pak_picture_level(VADriverContextP ctx,
5338 struct encode_state *encode_state,
5339 struct intel_encoder_context *encoder_context)
5341 struct intel_batchbuffer *batch = encoder_context->base.batch;
5342 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5343 struct object_surface *obj_surface;
5344 VAEncPictureParameterBufferVP9 *pic_param;
5345 VAEncMiscParameterTypeVP9PerSegmantParam *seg_param, tmp_seg_param;
5346 struct gen9_vp9_state *vp9_state;
5347 struct gen9_surface_vp9 *vp9_priv_surface;
5349 struct gen9_hcpe_pipe_mode_select_param mode_param;
5350 hcp_surface_state hcp_surface;
5351 struct gpe_mi_batch_buffer_start_parameter second_level_batch;
5354 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5356 if (!pak_context || !vp9_state || !vp9_state->pic_param)
5359 pic_param = vp9_state->pic_param;
5360 seg_param = vp9_state->segment_param;
5362 if (vp9_state->curr_pak_pass == 0)
5364 intel_vp9enc_construct_pak_insertobj_batchbuffer(ctx, encoder_context,
5365 &pak_context->res_pak_uncompressed_input_buffer);
5367 // Check if driver already programmed pic state as part of BRC update kernel programming.
5368 if (!vp9_state->brc_enabled)
5370 intel_vp9enc_construct_picstate_batchbuf(ctx, encode_state,
5371 encoder_context, &pak_context->res_pic_state_brc_write_hfw_read_buffer);
5375 if (vp9_state->curr_pak_pass == 0)
5377 intel_vp9enc_refresh_frame_internal_buffers(ctx, encoder_context);
5381 /* copy the frame_context[frame_idx] into curr_frame_context */
5382 memcpy(&vp9_state->vp9_current_fc,
5383 &(vp9_state->vp9_frame_ctx[vp9_state->frame_ctx_idx]),
5384 sizeof(FRAME_CONTEXT));
5388 prob_ptr = i965_map_gpe_resource(&pak_context->res_prob_buffer);
5393 /* copy the current fc to vp9_prob buffer */
5394 memcpy(prob_ptr, &vp9_state->vp9_current_fc, sizeof(FRAME_CONTEXT));
5395 if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
5396 pic_param->pic_flags.bits.intra_only) {
5397 FRAME_CONTEXT *frame_ptr = (FRAME_CONTEXT *)prob_ptr;
5399 memcpy(frame_ptr->partition_prob, vp9_kf_partition_probs,
5400 sizeof(vp9_kf_partition_probs));
5401 memcpy(frame_ptr->uv_mode_prob, vp9_kf_uv_mode_prob,
5402 sizeof(vp9_kf_uv_mode_prob));
5404 i965_unmap_gpe_resource(&pak_context->res_prob_buffer);
5408 if (vp9_state->brc_enabled && vp9_state->curr_pak_pass) {
5409 /* read image status and insert the conditional end cmd */
5410 /* image ctrl/status is already accessed */
5411 struct gpe_mi_conditional_batch_buffer_end_parameter mi_cond_end;
5412 struct vp9_encode_status_buffer_internal *status_buffer;
5414 status_buffer = &vp9_state->status_buffer;
5415 memset(&mi_cond_end, 0, sizeof(mi_cond_end));
5416 mi_cond_end.offset = status_buffer->image_status_mask_offset;
5417 mi_cond_end.bo = status_buffer->bo;
5418 mi_cond_end.compare_data = 0;
5419 mi_cond_end.compare_mask_mode_disabled = 1;
5420 gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch,
5424 mode_param.codec_mode = 1;
5425 mode_param.stream_out = 0;
5426 gen9_pak_vp9_pipe_mode_select(ctx, encode_state, encoder_context, &mode_param);
5428 /* reconstructed surface */
5429 memset(&hcp_surface, 0, sizeof(hcp_surface));
5430 obj_surface = encode_state->reconstructed_object;
5431 hcp_surface.dw1.surface_id = 0;
5432 hcp_surface.dw1.surface_pitch = obj_surface->width;
5433 hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5434 hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5435 gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5439 if (vp9_state->dys_in_use &&
5440 ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
5441 (pic_param->frame_height_src != pic_param->frame_height_dst))) {
5442 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
5443 obj_surface = vp9_priv_surface->dys_surface_obj;
5445 obj_surface = encode_state->input_yuv_object;
5448 hcp_surface.dw1.surface_id = 1;
5449 hcp_surface.dw1.surface_pitch = obj_surface->width;
5450 hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5451 hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5452 gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5455 if (vp9_state->picture_coding_type) {
5456 /* Add surface for last */
5457 if (vp9_state->last_ref_obj) {
5458 obj_surface = vp9_state->last_ref_obj;
5459 hcp_surface.dw1.surface_id = 2;
5460 hcp_surface.dw1.surface_pitch = obj_surface->width;
5461 hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5462 hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5463 gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5466 if (vp9_state->golden_ref_obj) {
5467 obj_surface = vp9_state->golden_ref_obj;
5468 hcp_surface.dw1.surface_id = 3;
5469 hcp_surface.dw1.surface_pitch = obj_surface->width;
5470 hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5471 hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5472 gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5475 if (vp9_state->alt_ref_obj) {
5476 obj_surface = vp9_state->alt_ref_obj;
5477 hcp_surface.dw1.surface_id = 4;
5478 hcp_surface.dw1.surface_pitch = obj_surface->width;
5479 hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5480 hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5481 gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5486 gen9_pak_vp9_pipe_buf_addr_state(ctx, encode_state, encoder_context);
5488 gen9_pak_vp9_ind_obj_base_addr_state(ctx, encode_state, encoder_context);
5490 // Using picstate zero with updated QP and LF deltas by HuC for repak, irrespective of how many Pak passes were run in multi-pass mode.
5491 memset(&second_level_batch, 0, sizeof(second_level_batch));
5493 if (vp9_state->curr_pak_pass == 0) {
5494 second_level_batch.offset = 0;
5496 second_level_batch.offset = vp9_state->curr_pak_pass * VP9_PIC_STATE_BUFFER_SIZE;
5498 second_level_batch.is_second_level = 1;
5499 second_level_batch.bo = pak_context->res_pic_state_brc_write_hfw_read_buffer.bo;
5501 gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5503 if (pic_param->pic_flags.bits.segmentation_enabled &&
5508 memset(&tmp_seg_param, 0, sizeof(tmp_seg_param));
5509 seg_param = &tmp_seg_param;
5511 for (i = 0; i < segment_count; i++)
5513 gen9_pak_vp9_segment_state(ctx, encode_state,
5515 &seg_param->seg_data[i], i);
5518 /* Insert the uncompressed header buffer */
5519 second_level_batch.is_second_level = 1;
5520 second_level_batch.offset = 0;
5521 second_level_batch.bo = pak_context->res_pak_uncompressed_input_buffer.bo;
5523 gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5526 second_level_batch.is_second_level = 1;
5527 second_level_batch.offset = 0;
5528 second_level_batch.bo = pak_context->res_mb_code_surface.bo;
5529 gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5535 gen9_vp9_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5537 struct intel_batchbuffer *batch = encoder_context->base.batch;
5538 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5539 struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
5540 struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
5541 //struct gpe_mi_copy_mem_parameter mi_copy_mem_param;
5542 struct vp9_encode_status_buffer_internal *status_buffer;
5543 struct gen9_vp9_state *vp9_state;
5545 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5546 if (!vp9_state || !pak_context || !batch)
5549 status_buffer = &(vp9_state->status_buffer);
5551 memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
5552 gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
5554 memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
5555 mi_store_reg_mem_param.bo = status_buffer->bo;
5556 mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_offset;
5557 mi_store_reg_mem_param.mmio_offset = status_buffer->vp9_bs_frame_reg_offset;
5558 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5560 mi_store_reg_mem_param.bo = pak_context->res_brc_bitstream_size_buffer.bo;
5561 mi_store_reg_mem_param.offset = 0;
5562 mi_store_reg_mem_param.mmio_offset = status_buffer->vp9_bs_frame_reg_offset;
5563 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5565 /* Read HCP Image status */
5566 mi_store_reg_mem_param.bo = status_buffer->bo;
5567 mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
5568 mi_store_reg_mem_param.mmio_offset =
5569 status_buffer->vp9_image_mask_reg_offset;
5570 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5572 mi_store_reg_mem_param.bo = status_buffer->bo;
5573 mi_store_reg_mem_param.offset = status_buffer->image_status_ctrl_offset;
5574 mi_store_reg_mem_param.mmio_offset =
5575 status_buffer->vp9_image_ctrl_reg_offset;
5576 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5578 mi_store_reg_mem_param.bo = pak_context->res_brc_bitstream_size_buffer.bo;
5579 mi_store_reg_mem_param.offset = 4;
5580 mi_store_reg_mem_param.mmio_offset =
5581 status_buffer->vp9_image_ctrl_reg_offset;
5582 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5584 gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
5590 gen9_vp9_pak_pipeline_prepare(VADriverContextP ctx,
5591 struct encode_state *encode_state,
5592 struct intel_encoder_context *encoder_context)
5594 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5595 struct object_surface *obj_surface;
5596 struct object_buffer *obj_buffer;
5597 struct i965_coded_buffer_segment *coded_buffer_segment;
5598 VAEncPictureParameterBufferVP9 *pic_param;
5599 struct gen9_vp9_state *vp9_state;
5603 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5605 !vp9_state->pic_param)
5606 return VA_STATUS_ERROR_INVALID_PARAMETER;
5608 pic_param = vp9_state->pic_param;
5610 /* reconstructed surface */
5611 obj_surface = encode_state->reconstructed_object;
5612 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5614 pak_context->reconstructed_object.bo = obj_surface->bo;
5615 dri_bo_reference(pak_context->reconstructed_object.bo);
5617 /* set vp9 reference frames */
5618 for (i = 0; i < ARRAY_ELEMS(pak_context->reference_surfaces); i++) {
5619 if (pak_context->reference_surfaces[i].bo)
5620 dri_bo_unreference(pak_context->reference_surfaces[i].bo);
5621 pak_context->reference_surfaces[i].bo = NULL;
5624 /* Three reference frames are enough for VP9 */
5625 if (pic_param->pic_flags.bits.frame_type &&
5626 !pic_param->pic_flags.bits.intra_only) {
5627 for (i = 0; i < 3; i++) {
5628 obj_surface = encode_state->reference_objects[i];
5629 if (obj_surface && obj_surface->bo) {
5630 pak_context->reference_surfaces[i].bo = obj_surface->bo;
5631 dri_bo_reference(obj_surface->bo);
5636 /* input YUV surface */
5637 dri_bo_unreference(pak_context->uncompressed_picture_source.bo);
5638 pak_context->uncompressed_picture_source.bo = NULL;
5639 obj_surface = encode_state->reconstructed_object;
5640 if (vp9_state->dys_in_use &&
5641 ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
5642 (pic_param->frame_height_src != pic_param->frame_height_dst))) {
5643 struct gen9_surface_vp9 *vp9_priv_surface =
5644 (struct gen9_surface_vp9 *)(obj_surface->private_data);
5645 obj_surface = vp9_priv_surface->dys_surface_obj;
5647 obj_surface = encode_state->input_yuv_object;
5649 pak_context->uncompressed_picture_source.bo = obj_surface->bo;
5650 dri_bo_reference(pak_context->uncompressed_picture_source.bo);
5653 dri_bo_unreference(pak_context->indirect_pak_bse_object.bo);
5654 pak_context->indirect_pak_bse_object.bo = NULL;
5655 obj_buffer = encode_state->coded_buf_object;
5656 bo = obj_buffer->buffer_store->bo;
5657 pak_context->indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
5658 pak_context->indirect_pak_bse_object.end_offset = ALIGN((obj_buffer->size_element - 0x1000), 0x1000);
5659 pak_context->indirect_pak_bse_object.bo = bo;
5660 dri_bo_reference(pak_context->indirect_pak_bse_object.bo);
5662 /* set the internal flag to 0 to indicate the coded size is unknown */
5664 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
5665 coded_buffer_segment->mapped = 0;
5666 coded_buffer_segment->codec = encoder_context->codec;
5667 coded_buffer_segment->status_support = 1;
5670 return VA_STATUS_SUCCESS;
5674 gen9_vp9_pak_brc_prepare(struct encode_state *encode_state,
5675 struct intel_encoder_context *encoder_context)
5680 gen9_vp9_pak_context_destroy(void *context)
5682 struct gen9_encoder_context_vp9 *pak_context = context;
5685 dri_bo_unreference(pak_context->reconstructed_object.bo);
5686 pak_context->reconstructed_object.bo = NULL;
5688 dri_bo_unreference(pak_context->uncompressed_picture_source.bo);
5689 pak_context->uncompressed_picture_source.bo = NULL;
5691 dri_bo_unreference(pak_context->indirect_pak_bse_object.bo);
5692 pak_context->indirect_pak_bse_object.bo = NULL;
5694 for (i = 0; i < 8; i++){
5695 dri_bo_unreference(pak_context->reference_surfaces[i].bo);
5696 pak_context->reference_surfaces[i].bo = NULL;
5699 /* vme & pak same the same structure, so don't free the context here */
5703 gen9_vp9_pak_pipeline(VADriverContextP ctx,
5705 struct encode_state *encode_state,
5706 struct intel_encoder_context *encoder_context)
5708 struct i965_driver_data *i965 = i965_driver_data(ctx);
5709 struct intel_batchbuffer *batch = encoder_context->base.batch;
5710 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5712 struct gen9_vp9_state *vp9_state;
5713 VAEncPictureParameterBufferVP9 *pic_param;
5716 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5718 if (!vp9_state || !vp9_state->pic_param || !pak_context)
5719 return VA_STATUS_ERROR_INVALID_PARAMETER;
5721 va_status = gen9_vp9_pak_pipeline_prepare(ctx, encode_state, encoder_context);
5723 if (va_status != VA_STATUS_SUCCESS)
5726 if (i965->intel.has_bsd2)
5727 intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
5729 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
5731 intel_batchbuffer_emit_mi_flush(batch);
5733 BEGIN_BCS_BATCH(batch, 64);
5734 for (i = 0; i < 64; i++)
5735 OUT_BCS_BATCH(batch, MI_NOOP);
5737 ADVANCE_BCS_BATCH(batch);
5739 for (vp9_state->curr_pak_pass = 0;
5740 vp9_state->curr_pak_pass < vp9_state->num_pak_passes;
5741 vp9_state->curr_pak_pass++) {
5743 if (vp9_state->curr_pak_pass == 0) {
5744 /* Initialize the VP9 Image Ctrl reg for the first pass */
5745 struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
5746 struct vp9_encode_status_buffer_internal *status_buffer;
5748 status_buffer = &(vp9_state->status_buffer);
5749 memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
5750 mi_load_reg_imm.mmio_offset = status_buffer->vp9_image_ctrl_reg_offset;
5751 mi_load_reg_imm.data = 0;
5752 gen8_gpe_mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
5754 gen9_vp9_pak_picture_level(ctx, encode_state, encoder_context);
5755 gen9_vp9_read_mfc_status(ctx, encoder_context);
5758 intel_batchbuffer_end_atomic(batch);
5759 intel_batchbuffer_flush(batch);
5761 pic_param = vp9_state->pic_param;
5762 vp9_state->vp9_last_frame.frame_width = pic_param->frame_width_dst;
5763 vp9_state->vp9_last_frame.frame_height = pic_param->frame_height_dst;
5764 vp9_state->vp9_last_frame.frame_type = pic_param->pic_flags.bits.frame_type;
5765 vp9_state->vp9_last_frame.show_frame = pic_param->pic_flags.bits.show_frame;
5766 vp9_state->vp9_last_frame.refresh_frame_context = pic_param->pic_flags.bits.refresh_frame_context;
5767 vp9_state->vp9_last_frame.frame_context_idx = pic_param->pic_flags.bits.frame_context_idx;
5768 vp9_state->vp9_last_frame.intra_only = pic_param->pic_flags.bits.intra_only;
5769 vp9_state->frame_number++;
5770 vp9_state->curr_mv_temporal_index ^= 1;
5771 vp9_state->first_frame = 0;
5773 return VA_STATUS_SUCCESS;
5777 gen9_vp9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5779 struct gen9_encoder_context_vp9 *vme_context = NULL;
5780 struct gen9_vp9_state *vp9_state = NULL;
5782 vme_context = calloc(1, sizeof(struct gen9_encoder_context_vp9));
5783 vp9_state = calloc(1, sizeof(struct gen9_vp9_state));
5785 if (!vme_context || !vp9_state) {
5793 encoder_context->enc_priv_state = vp9_state;
5794 vme_context->enc_priv_state = vp9_state;
5796 /* Initialize the features that are supported by VP9 */
5797 vme_context->hme_supported = 1;
5798 vme_context->use_hw_scoreboard = 1;
5799 vme_context->use_hw_non_stalling_scoreboard = 1;
5801 vp9_state->tx_mode = TX_MODE_SELECT;
5802 vp9_state->multi_ref_qp_check = 0;
5803 vp9_state->target_usage = INTEL_ENC_VP9_TU_NORMAL;
5804 vp9_state->num_pak_passes = 1;
5805 vp9_state->hme_supported = vme_context->hme_supported;
5806 vp9_state->b16xme_supported = 1;
5808 if (encoder_context->rate_control_mode != VA_RC_NONE &&
5809 encoder_context->rate_control_mode != VA_RC_CQP) {
5810 vp9_state->brc_enabled = 1;
5811 vp9_state->brc_distortion_buffer_supported = 1;
5812 vp9_state->brc_constant_buffer_supported = 1;
5813 vp9_state->num_pak_passes = 4;
5815 vp9_state->dys_enabled = 1; /* this is supported by default */
5816 vp9_state->first_frame = 1;
5818 /* the definition of status buffer offset for VP9 */
5820 struct vp9_encode_status_buffer_internal *status_buffer;
5821 uint32_t base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
5823 status_buffer = &vp9_state->status_buffer;
5824 memset(status_buffer, 0,
5825 sizeof(struct vp9_encode_status_buffer_internal));
5827 status_buffer->bs_byte_count_offset = base_offset + offsetof(struct vp9_encode_status, bs_byte_count);
5828 status_buffer->image_status_mask_offset = base_offset + offsetof(struct vp9_encode_status, image_status_mask);
5829 status_buffer->image_status_ctrl_offset = base_offset + offsetof(struct vp9_encode_status, image_status_ctrl);
5830 status_buffer->media_index_offset = base_offset + offsetof(struct vp9_encode_status, media_index);
5832 status_buffer->vp9_bs_frame_reg_offset = 0x1E9E0;
5833 status_buffer->vp9_image_mask_reg_offset = 0x1E9F0;
5834 status_buffer->vp9_image_ctrl_reg_offset = 0x1E9F4;
5837 gen9_vme_kernels_context_init_vp9(ctx, encoder_context, vme_context);
5839 encoder_context->vme_context = vme_context;
5840 encoder_context->vme_pipeline = gen9_vme_pipeline_vp9;
5841 encoder_context->vme_context_destroy = gen9_vme_context_destroy_vp9;
5847 gen9_vp9_get_coded_status(VADriverContextP ctx,
5848 struct intel_encoder_context *encoder_context,
5849 struct i965_coded_buffer_segment *coded_buf_seg)
5851 struct vp9_encode_status *vp9_encode_status;
5853 if (!encoder_context || !coded_buf_seg)
5854 return VA_STATUS_ERROR_INVALID_BUFFER;
5856 vp9_encode_status = (struct vp9_encode_status *)coded_buf_seg->codec_private_data;
5857 coded_buf_seg->base.size = vp9_encode_status->bs_byte_count;
5859 /* One VACodedBufferSegment for VP9 will be added later.
5860 * It will be linked to the next element of coded_buf_seg->base.next
5863 return VA_STATUS_SUCCESS;
5867 gen9_vp9_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5869 /* VME & PAK share the same context */
5870 struct gen9_encoder_context_vp9 *pak_context = encoder_context->vme_context;
5875 encoder_context->mfc_context = pak_context;
5876 encoder_context->mfc_context_destroy = gen9_vp9_pak_context_destroy;
5877 encoder_context->mfc_pipeline = gen9_vp9_pak_pipeline;
5878 encoder_context->mfc_brc_prepare = gen9_vp9_pak_brc_prepare;
5879 encoder_context->get_status = gen9_vp9_get_coded_status;