2 * Copyright © 2016 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * Zhao, Yakui <yakui.zhao@intel.com>
37 #include "intel_batchbuffer.h"
38 #include "intel_driver.h"
40 #include "i965_defines.h"
41 #include "i965_drv_video.h"
42 #include "i965_encoder.h"
43 #include "gen9_vp9_encapi.h"
44 #include "gen9_vp9_encoder.h"
45 #include "gen9_vp9_encoder_kernels.h"
46 #include "vp9_probs.h"
47 #include "gen9_vp9_const_def.h"
49 #define MAX_VP9_ENCODER_SURFACES 64
51 #define MAX_URB_SIZE 4096 /* In register */
52 #define NUM_KERNELS_PER_GPE_CONTEXT 1
54 #define VP9_BRC_KBPS 1000
56 #define BRC_KERNEL_CBR 0x0010
57 #define BRC_KERNEL_VBR 0x0020
58 #define BRC_KERNEL_AVBR 0x0040
59 #define BRC_KERNEL_CQL 0x0080
61 #define VP9_PIC_STATE_BUFFER_SIZE 192
63 typedef struct _intel_kernel_header_ {
64 uint32_t reserved : 6;
65 uint32_t kernel_start_pointer : 26;
66 } intel_kernel_header;
68 typedef struct _intel_vp9_kernel_header {
70 intel_kernel_header PLY_DSCALE;
71 intel_kernel_header VP9_ME_P;
72 intel_kernel_header VP9_Enc_I_32x32;
73 intel_kernel_header VP9_Enc_I_16x16;
74 intel_kernel_header VP9_Enc_P;
75 intel_kernel_header VP9_Enc_TX;
76 intel_kernel_header VP9_DYS;
78 intel_kernel_header VP9BRC_Intra_Distortion;
79 intel_kernel_header VP9BRC_Init;
80 intel_kernel_header VP9BRC_Reset;
81 intel_kernel_header VP9BRC_Update;
82 } intel_vp9_kernel_header;
84 #define DYS_1X_FLAG 0x01
85 #define DYS_4X_FLAG 0x02
86 #define DYS_16X_FLAG 0x04
88 struct vp9_surface_param {
90 uint32_t frame_height;
93 static uint32_t intel_convert_sign_mag(int val, int sign_bit_pos)
98 ret_val = ((1 << (sign_bit_pos - 1)) | (val & ((1 << (sign_bit_pos - 1)) - 1)));
100 ret_val = val & ((1 << (sign_bit_pos - 1)) - 1);
106 intel_vp9_get_kernel_header_and_size(
109 INTEL_VP9_ENC_OPERATION operation,
111 struct i965_kernel *ret_kernel)
113 typedef uint32_t BIN_PTR[4];
116 intel_vp9_kernel_header *pkh_table;
117 intel_kernel_header *pcurr_header, *pinvalid_entry, *pnext_header;
120 if (!pvbinary || !ret_kernel)
123 bin_start = (char *)pvbinary;
124 pkh_table = (intel_vp9_kernel_header *)pvbinary;
125 pinvalid_entry = &(pkh_table->VP9BRC_Update) + 1;
126 next_krnoffset = binary_size;
128 if ((operation == INTEL_VP9_ENC_SCALING4X) || (operation == INTEL_VP9_ENC_SCALING2X)) {
129 pcurr_header = &pkh_table->PLY_DSCALE;
130 } else if (operation == INTEL_VP9_ENC_ME) {
131 pcurr_header = &pkh_table->VP9_ME_P;
132 } else if (operation == INTEL_VP9_ENC_MBENC) {
133 pcurr_header = &pkh_table->VP9_Enc_I_32x32;
134 } else if (operation == INTEL_VP9_ENC_DYS) {
135 pcurr_header = &pkh_table->VP9_DYS;
136 } else if (operation == INTEL_VP9_ENC_BRC) {
137 pcurr_header = &pkh_table->VP9BRC_Intra_Distortion;
142 pcurr_header += krnstate_idx;
143 ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
145 pnext_header = (pcurr_header + 1);
146 if (pnext_header < pinvalid_entry) {
147 next_krnoffset = pnext_header->kernel_start_pointer << 6;
149 ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
156 gen9_free_surfaces_vp9(void **data)
158 struct gen9_surface_vp9 *vp9_surface;
165 if (vp9_surface->scaled_4x_surface_obj) {
166 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->scaled_4x_surface_id, 1);
167 vp9_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
168 vp9_surface->scaled_4x_surface_obj = NULL;
171 if (vp9_surface->scaled_16x_surface_obj) {
172 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->scaled_16x_surface_id, 1);
173 vp9_surface->scaled_16x_surface_id = VA_INVALID_SURFACE;
174 vp9_surface->scaled_16x_surface_obj = NULL;
177 if (vp9_surface->dys_4x_surface_obj) {
178 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_4x_surface_id, 1);
179 vp9_surface->dys_4x_surface_id = VA_INVALID_SURFACE;
180 vp9_surface->dys_4x_surface_obj = NULL;
183 if (vp9_surface->dys_16x_surface_obj) {
184 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_16x_surface_id, 1);
185 vp9_surface->dys_16x_surface_id = VA_INVALID_SURFACE;
186 vp9_surface->dys_16x_surface_obj = NULL;
189 if (vp9_surface->dys_surface_obj) {
190 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_surface_id, 1);
191 vp9_surface->dys_surface_id = VA_INVALID_SURFACE;
192 vp9_surface->dys_surface_obj = NULL;
203 gen9_vp9_init_check_surfaces(VADriverContextP ctx,
204 struct object_surface *obj_surface,
205 struct vp9_surface_param *surface_param)
207 struct i965_driver_data *i965 = i965_driver_data(ctx);
208 struct gen9_surface_vp9 *vp9_surface;
209 int downscaled_width_4x, downscaled_height_4x;
210 int downscaled_width_16x, downscaled_height_16x;
212 if (!obj_surface || !obj_surface->bo)
213 return VA_STATUS_ERROR_INVALID_SURFACE;
215 if (obj_surface->private_data &&
216 obj_surface->free_private_data != gen9_free_surfaces_vp9) {
217 obj_surface->free_private_data(&obj_surface->private_data);
218 obj_surface->private_data = NULL;
221 if (obj_surface->private_data) {
222 /* if the frame width/height is already the same as the expected,
223 * it is unncessary to reallocate it.
225 vp9_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
226 if (vp9_surface->frame_width >= surface_param->frame_width ||
227 vp9_surface->frame_height >= surface_param->frame_height)
228 return VA_STATUS_SUCCESS;
230 obj_surface->free_private_data(&obj_surface->private_data);
231 obj_surface->private_data = NULL;
235 vp9_surface = calloc(1, sizeof(struct gen9_surface_vp9));
238 return VA_STATUS_ERROR_ALLOCATION_FAILED;
240 vp9_surface->ctx = ctx;
241 obj_surface->private_data = vp9_surface;
242 obj_surface->free_private_data = gen9_free_surfaces_vp9;
244 vp9_surface->frame_width = surface_param->frame_width;
245 vp9_surface->frame_height = surface_param->frame_height;
247 downscaled_width_4x = ALIGN(surface_param->frame_width / 4, 16);
248 downscaled_height_4x = ALIGN(surface_param->frame_height / 4, 16);
250 i965_CreateSurfaces(ctx,
252 downscaled_height_4x,
255 &vp9_surface->scaled_4x_surface_id);
257 vp9_surface->scaled_4x_surface_obj = SURFACE(vp9_surface->scaled_4x_surface_id);
259 if (!vp9_surface->scaled_4x_surface_obj) {
260 return VA_STATUS_ERROR_ALLOCATION_FAILED;
263 i965_check_alloc_surface_bo(ctx, vp9_surface->scaled_4x_surface_obj, 1,
264 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
266 downscaled_width_16x = ALIGN(surface_param->frame_width / 16, 16);
267 downscaled_height_16x = ALIGN(surface_param->frame_height / 16, 16);
268 i965_CreateSurfaces(ctx,
269 downscaled_width_16x,
270 downscaled_height_16x,
273 &vp9_surface->scaled_16x_surface_id);
274 vp9_surface->scaled_16x_surface_obj = SURFACE(vp9_surface->scaled_16x_surface_id);
276 if (!vp9_surface->scaled_16x_surface_obj) {
277 return VA_STATUS_ERROR_ALLOCATION_FAILED;
280 i965_check_alloc_surface_bo(ctx, vp9_surface->scaled_16x_surface_obj, 1,
281 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
283 return VA_STATUS_SUCCESS;
287 gen9_vp9_check_dys_surfaces(VADriverContextP ctx,
288 struct object_surface *obj_surface,
289 struct vp9_surface_param *surface_param)
291 struct i965_driver_data *i965 = i965_driver_data(ctx);
292 struct gen9_surface_vp9 *vp9_surface;
293 int dys_width_4x, dys_height_4x;
294 int dys_width_16x, dys_height_16x;
296 /* As this is handled after the surface checking, it is unnecessary
297 * to check the surface bo and vp9_priv_surface again
300 vp9_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
303 return VA_STATUS_ERROR_INVALID_SURFACE;
305 /* if the frame_width/height of dys_surface is the same as
306 * the expected, it is unnecessary to allocate it again
308 if (vp9_surface->dys_frame_width == surface_param->frame_width &&
309 vp9_surface->dys_frame_width == surface_param->frame_width)
310 return VA_STATUS_SUCCESS;
312 if (vp9_surface->dys_4x_surface_obj) {
313 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_4x_surface_id, 1);
314 vp9_surface->dys_4x_surface_id = VA_INVALID_SURFACE;
315 vp9_surface->dys_4x_surface_obj = NULL;
318 if (vp9_surface->dys_16x_surface_obj) {
319 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_16x_surface_id, 1);
320 vp9_surface->dys_16x_surface_id = VA_INVALID_SURFACE;
321 vp9_surface->dys_16x_surface_obj = NULL;
324 if (vp9_surface->dys_surface_obj) {
325 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_surface_id, 1);
326 vp9_surface->dys_surface_id = VA_INVALID_SURFACE;
327 vp9_surface->dys_surface_obj = NULL;
330 vp9_surface->dys_frame_width = surface_param->frame_width;
331 vp9_surface->dys_frame_height = surface_param->frame_height;
333 i965_CreateSurfaces(ctx,
334 surface_param->frame_width,
335 surface_param->frame_height,
338 &vp9_surface->dys_surface_id);
339 vp9_surface->dys_surface_obj = SURFACE(vp9_surface->dys_surface_id);
341 if (!vp9_surface->dys_surface_obj) {
342 return VA_STATUS_ERROR_ALLOCATION_FAILED;
345 i965_check_alloc_surface_bo(ctx, vp9_surface->dys_surface_obj, 1,
346 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
348 dys_width_4x = ALIGN(surface_param->frame_width / 4, 16);
349 dys_height_4x = ALIGN(surface_param->frame_width / 4, 16);
351 i965_CreateSurfaces(ctx,
356 &vp9_surface->dys_4x_surface_id);
358 vp9_surface->dys_4x_surface_obj = SURFACE(vp9_surface->dys_4x_surface_id);
360 if (!vp9_surface->dys_4x_surface_obj) {
361 return VA_STATUS_ERROR_ALLOCATION_FAILED;
364 i965_check_alloc_surface_bo(ctx, vp9_surface->dys_4x_surface_obj, 1,
365 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
367 dys_width_16x = ALIGN(surface_param->frame_width / 16, 16);
368 dys_height_16x = ALIGN(surface_param->frame_width / 16, 16);
369 i965_CreateSurfaces(ctx,
374 &vp9_surface->dys_16x_surface_id);
375 vp9_surface->dys_16x_surface_obj = SURFACE(vp9_surface->dys_16x_surface_id);
377 if (!vp9_surface->dys_16x_surface_obj) {
378 return VA_STATUS_ERROR_ALLOCATION_FAILED;
381 i965_check_alloc_surface_bo(ctx, vp9_surface->dys_16x_surface_obj, 1,
382 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
384 return VA_STATUS_SUCCESS;
388 gen9_vp9_allocate_resources(VADriverContextP ctx,
389 struct encode_state *encode_state,
390 struct intel_encoder_context *encoder_context,
393 struct i965_driver_data *i965 = i965_driver_data(ctx);
394 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
395 struct gen9_vp9_state *vp9_state;
396 int allocate_flag, i;
398 uint32_t frame_width_in_sb, frame_height_in_sb, frame_sb_num;
399 unsigned int width, height;
401 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
403 if (!vp9_state || !vp9_state->pic_param)
404 return VA_STATUS_ERROR_INVALID_PARAMETER;
406 /* the buffer related with BRC is not changed. So it is allocated
407 * based on the input parameter
410 i965_free_gpe_resource(&vme_context->res_brc_history_buffer);
411 i965_free_gpe_resource(&vme_context->res_brc_const_data_buffer);
412 i965_free_gpe_resource(&vme_context->res_brc_mbenc_curbe_write_buffer);
413 i965_free_gpe_resource(&vme_context->res_pic_state_brc_read_buffer);
414 i965_free_gpe_resource(&vme_context->res_pic_state_brc_write_hfw_read_buffer);
415 i965_free_gpe_resource(&vme_context->res_pic_state_hfw_write_buffer);
416 i965_free_gpe_resource(&vme_context->res_seg_state_brc_read_buffer);
417 i965_free_gpe_resource(&vme_context->res_seg_state_brc_write_buffer);
418 i965_free_gpe_resource(&vme_context->res_brc_bitstream_size_buffer);
419 i965_free_gpe_resource(&vme_context->res_brc_hfw_data_buffer);
420 i965_free_gpe_resource(&vme_context->res_brc_mmdk_pak_buffer);
422 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
423 &vme_context->res_brc_history_buffer,
424 VP9_BRC_HISTORY_BUFFER_SIZE,
425 "Brc History buffer");
427 goto failed_allocation;
428 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
429 &vme_context->res_brc_const_data_buffer,
430 VP9_BRC_CONSTANTSURFACE_SIZE,
431 "Brc Constant buffer");
433 goto failed_allocation;
435 res_size = ALIGN(sizeof(vp9_mbenc_curbe_data), 64) + 128 +
436 ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * NUM_VP9_MBENC;
437 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
438 &vme_context->res_brc_mbenc_curbe_write_buffer,
442 goto failed_allocation;
444 res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
445 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
446 &vme_context->res_pic_state_brc_read_buffer,
448 "Pic State Brc_read");
450 goto failed_allocation;
452 res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
453 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
454 &vme_context->res_pic_state_brc_write_hfw_read_buffer,
456 "Pic State Brc_write Hfw_Read");
458 goto failed_allocation;
460 res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
461 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
462 &vme_context->res_pic_state_hfw_write_buffer,
464 "Pic State Hfw Write");
466 goto failed_allocation;
468 res_size = VP9_SEGMENT_STATE_BUFFER_SIZE;
469 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
470 &vme_context->res_seg_state_brc_read_buffer,
472 "Segment state brc_read");
474 goto failed_allocation;
476 res_size = VP9_SEGMENT_STATE_BUFFER_SIZE;
477 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
478 &vme_context->res_seg_state_brc_write_buffer,
480 "Segment state brc_write");
482 goto failed_allocation;
484 res_size = VP9_BRC_BITSTREAM_SIZE_BUFFER_SIZE;
485 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
486 &vme_context->res_brc_bitstream_size_buffer,
488 "Brc bitstream buffer");
490 goto failed_allocation;
492 res_size = VP9_HFW_BRC_DATA_BUFFER_SIZE;
493 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
494 &vme_context->res_brc_hfw_data_buffer,
498 goto failed_allocation;
500 res_size = VP9_BRC_MMDK_PAK_BUFFER_SIZE;
501 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
502 &vme_context->res_brc_mmdk_pak_buffer,
506 goto failed_allocation;
509 /* If the width/height of allocated buffer is greater than the expected,
510 * it is unnecessary to allocate it again
512 if (vp9_state->res_width >= vp9_state->frame_width &&
513 vp9_state->res_height >= vp9_state->frame_height) {
515 return VA_STATUS_SUCCESS;
517 frame_width_in_sb = ALIGN(vp9_state->frame_width, 64) / 64;
518 frame_height_in_sb = ALIGN(vp9_state->frame_height, 64) / 64;
519 frame_sb_num = frame_width_in_sb * frame_height_in_sb;
521 i965_free_gpe_resource(&vme_context->res_hvd_line_buffer);
522 res_size = frame_width_in_sb * 64;
523 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
524 &vme_context->res_hvd_line_buffer,
526 "VP9 hvd line line");
528 goto failed_allocation;
530 i965_free_gpe_resource(&vme_context->res_hvd_tile_line_buffer);
531 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
532 &vme_context->res_hvd_tile_line_buffer,
534 "VP9 hvd tile_line line");
536 goto failed_allocation;
538 i965_free_gpe_resource(&vme_context->res_deblocking_filter_line_buffer);
539 res_size = frame_width_in_sb * 18 * 64;
540 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
541 &vme_context->res_deblocking_filter_line_buffer,
543 "VP9 deblocking filter line");
545 goto failed_allocation;
547 i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_line_buffer);
548 res_size = frame_width_in_sb * 18 * 64;
549 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
550 &vme_context->res_deblocking_filter_tile_line_buffer,
552 "VP9 deblocking tile line");
554 goto failed_allocation;
556 i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_col_buffer);
557 res_size = frame_height_in_sb * 17 * 64;
558 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
559 &vme_context->res_deblocking_filter_tile_col_buffer,
561 "VP9 deblocking tile col");
563 goto failed_allocation;
565 i965_free_gpe_resource(&vme_context->res_metadata_line_buffer);
566 res_size = frame_width_in_sb * 5 * 64;
567 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
568 &vme_context->res_metadata_line_buffer,
570 "VP9 metadata line");
572 goto failed_allocation;
574 i965_free_gpe_resource(&vme_context->res_metadata_tile_line_buffer);
575 res_size = frame_width_in_sb * 5 * 64;
576 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
577 &vme_context->res_metadata_tile_line_buffer,
579 "VP9 metadata tile line");
581 goto failed_allocation;
583 i965_free_gpe_resource(&vme_context->res_metadata_tile_col_buffer);
584 res_size = frame_height_in_sb * 5 * 64;
585 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
586 &vme_context->res_metadata_tile_col_buffer,
588 "VP9 metadata tile col");
590 goto failed_allocation;
592 i965_free_gpe_resource(&vme_context->res_prob_buffer);
594 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
595 &vme_context->res_prob_buffer,
599 goto failed_allocation;
601 i965_free_gpe_resource(&vme_context->res_segmentid_buffer);
602 res_size = frame_sb_num * 64;
603 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
604 &vme_context->res_segmentid_buffer,
608 goto failed_allocation;
610 i965_zero_gpe_resource(&vme_context->res_segmentid_buffer);
612 i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
614 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
615 &vme_context->res_prob_delta_buffer,
619 goto failed_allocation;
621 i965_zero_gpe_resource(&vme_context->res_segmentid_buffer);
623 i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
625 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
626 &vme_context->res_prob_delta_buffer,
630 goto failed_allocation;
632 i965_free_gpe_resource(&vme_context->res_compressed_input_buffer);
634 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
635 &vme_context->res_compressed_input_buffer,
637 "VP9 compressed_input buffer");
639 goto failed_allocation;
641 i965_free_gpe_resource(&vme_context->res_prob_counter_buffer);
643 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
644 &vme_context->res_prob_counter_buffer,
648 goto failed_allocation;
650 i965_free_gpe_resource(&vme_context->res_tile_record_streamout_buffer);
651 res_size = frame_sb_num * 64;
652 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
653 &vme_context->res_tile_record_streamout_buffer,
655 "VP9 tile record stream_out");
657 goto failed_allocation;
659 i965_free_gpe_resource(&vme_context->res_cu_stat_streamout_buffer);
660 res_size = frame_sb_num * 64;
661 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
662 &vme_context->res_cu_stat_streamout_buffer,
664 "VP9 CU stat stream_out");
666 goto failed_allocation;
668 width = vp9_state->downscaled_width_4x_in_mb * 32;
669 height = vp9_state->downscaled_height_4x_in_mb * 16;
670 i965_free_gpe_resource(&vme_context->s4x_memv_data_buffer);
671 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
672 &vme_context->s4x_memv_data_buffer,
677 goto failed_allocation;
679 width = vp9_state->downscaled_width_4x_in_mb * 8;
680 height = vp9_state->downscaled_height_4x_in_mb * 16;
681 i965_free_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
682 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
683 &vme_context->s4x_memv_distortion_buffer,
686 "VP9 4x MEMV distorion");
688 goto failed_allocation;
690 width = ALIGN(vp9_state->downscaled_width_16x_in_mb * 32, 64);
691 height = vp9_state->downscaled_height_16x_in_mb * 16;
692 i965_free_gpe_resource(&vme_context->s16x_memv_data_buffer);
693 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
694 &vme_context->s16x_memv_data_buffer,
697 "VP9 16x MEMV data");
699 goto failed_allocation;
701 width = vp9_state->frame_width_in_mb * 16;
702 height = vp9_state->frame_height_in_mb * 8;
703 i965_free_gpe_resource(&vme_context->res_output_16x16_inter_modes);
704 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
705 &vme_context->res_output_16x16_inter_modes,
708 "VP9 output inter_mode");
710 goto failed_allocation;
712 res_size = vp9_state->frame_width_in_mb * vp9_state->frame_height_in_mb *
714 for (i = 0; i < 2; i++) {
715 i965_free_gpe_resource(&vme_context->res_mode_decision[i]);
716 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
717 &vme_context->res_mode_decision[i],
719 "VP9 mode decision");
721 goto failed_allocation;
725 res_size = frame_sb_num * 9 * 64;
726 for (i = 0; i < 2; i++) {
727 i965_free_gpe_resource(&vme_context->res_mv_temporal_buffer[i]);
728 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
729 &vme_context->res_mv_temporal_buffer[i],
733 goto failed_allocation;
736 vp9_state->mb_data_offset = ALIGN(frame_sb_num * 16, 4096) + 4096;
737 res_size = vp9_state->mb_data_offset + frame_sb_num * 64 * 64 + 1000;
738 i965_free_gpe_resource(&vme_context->res_mb_code_surface);
739 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
740 &vme_context->res_mb_code_surface,
741 ALIGN(res_size, 4096),
742 "VP9 mb_code surface");
744 goto failed_allocation;
747 i965_free_gpe_resource(&vme_context->res_pak_uncompressed_input_buffer);
748 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
749 &vme_context->res_pak_uncompressed_input_buffer,
750 ALIGN(res_size, 4096),
751 "VP9 pak_uncompressed_input");
753 goto failed_allocation;
755 if (!vme_context->frame_header_data) {
756 /* allocate 512 bytes for generating the uncompressed header */
757 vme_context->frame_header_data = calloc(1, 512);
760 vp9_state->res_width = vp9_state->frame_width;
761 vp9_state->res_height = vp9_state->frame_height;
763 return VA_STATUS_SUCCESS;
766 return VA_STATUS_ERROR_ALLOCATION_FAILED;
770 gen9_vp9_free_resources(struct gen9_encoder_context_vp9 *vme_context)
773 struct gen9_vp9_state *vp9_state = (struct gen9_vp9_state *) vme_context->enc_priv_state;
775 if (vp9_state->brc_enabled) {
776 i965_free_gpe_resource(&vme_context->res_brc_history_buffer);
777 i965_free_gpe_resource(&vme_context->res_brc_const_data_buffer);
778 i965_free_gpe_resource(&vme_context->res_brc_mbenc_curbe_write_buffer);
779 i965_free_gpe_resource(&vme_context->res_pic_state_brc_read_buffer);
780 i965_free_gpe_resource(&vme_context->res_pic_state_brc_write_hfw_read_buffer);
781 i965_free_gpe_resource(&vme_context->res_pic_state_hfw_write_buffer);
782 i965_free_gpe_resource(&vme_context->res_seg_state_brc_read_buffer);
783 i965_free_gpe_resource(&vme_context->res_seg_state_brc_write_buffer);
784 i965_free_gpe_resource(&vme_context->res_brc_bitstream_size_buffer);
785 i965_free_gpe_resource(&vme_context->res_brc_hfw_data_buffer);
786 i965_free_gpe_resource(&vme_context->res_brc_mmdk_pak_buffer);
789 i965_free_gpe_resource(&vme_context->res_hvd_line_buffer);
790 i965_free_gpe_resource(&vme_context->res_hvd_tile_line_buffer);
791 i965_free_gpe_resource(&vme_context->res_deblocking_filter_line_buffer);
792 i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_line_buffer);
793 i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_col_buffer);
794 i965_free_gpe_resource(&vme_context->res_metadata_line_buffer);
795 i965_free_gpe_resource(&vme_context->res_metadata_tile_line_buffer);
796 i965_free_gpe_resource(&vme_context->res_metadata_tile_col_buffer);
797 i965_free_gpe_resource(&vme_context->res_prob_buffer);
798 i965_free_gpe_resource(&vme_context->res_segmentid_buffer);
799 i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
800 i965_free_gpe_resource(&vme_context->res_prob_counter_buffer);
801 i965_free_gpe_resource(&vme_context->res_tile_record_streamout_buffer);
802 i965_free_gpe_resource(&vme_context->res_cu_stat_streamout_buffer);
803 i965_free_gpe_resource(&vme_context->s4x_memv_data_buffer);
804 i965_free_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
805 i965_free_gpe_resource(&vme_context->s16x_memv_data_buffer);
806 i965_free_gpe_resource(&vme_context->res_output_16x16_inter_modes);
807 for (i = 0; i < 2; i++) {
808 i965_free_gpe_resource(&vme_context->res_mode_decision[i]);
811 for (i = 0; i < 2; i++) {
812 i965_free_gpe_resource(&vme_context->res_mv_temporal_buffer[i]);
815 i965_free_gpe_resource(&vme_context->res_compressed_input_buffer);
816 i965_free_gpe_resource(&vme_context->res_mb_code_surface);
817 i965_free_gpe_resource(&vme_context->res_pak_uncompressed_input_buffer);
819 if (vme_context->frame_header_data) {
820 free(vme_context->frame_header_data);
821 vme_context->frame_header_data = NULL;
827 gen9_init_media_object_walker_parameter(struct intel_encoder_context *encoder_context,
828 struct gpe_encoder_kernel_walker_parameter *kernel_walker_param,
829 struct gpe_media_object_walker_parameter *walker_param)
831 memset(walker_param, 0, sizeof(*walker_param));
833 walker_param->use_scoreboard = kernel_walker_param->use_scoreboard;
835 walker_param->block_resolution.x = kernel_walker_param->resolution_x;
836 walker_param->block_resolution.y = kernel_walker_param->resolution_y;
838 walker_param->global_resolution.x = kernel_walker_param->resolution_x;
839 walker_param->global_resolution.y = kernel_walker_param->resolution_y;
841 walker_param->global_outer_loop_stride.x = kernel_walker_param->resolution_x;
842 walker_param->global_outer_loop_stride.y = 0;
844 walker_param->global_inner_loop_unit.x = 0;
845 walker_param->global_inner_loop_unit.y = kernel_walker_param->resolution_y;
847 walker_param->local_loop_exec_count = 0xFFFF; //MAX VALUE
848 walker_param->global_loop_exec_count = 0xFFFF; //MAX VALUE
850 if (kernel_walker_param->no_dependency) {
851 walker_param->scoreboard_mask = 0;
852 walker_param->use_scoreboard = 0;
853 // Raster scan walking pattern
854 walker_param->local_outer_loop_stride.x = 0;
855 walker_param->local_outer_loop_stride.y = 1;
856 walker_param->local_inner_loop_unit.x = 1;
857 walker_param->local_inner_loop_unit.y = 0;
858 walker_param->local_end.x = kernel_walker_param->resolution_x - 1;
859 walker_param->local_end.y = 0;
861 walker_param->local_end.x = 0;
862 walker_param->local_end.y = 0;
864 if (kernel_walker_param->walker_degree == VP9_45Z_DEGREE) {
866 walker_param->scoreboard_mask = 0x0F;
868 walker_param->global_loop_exec_count = 0x3FF;
869 walker_param->local_loop_exec_count = 0x3FF;
871 walker_param->global_resolution.x = (unsigned int)(kernel_walker_param->resolution_x / 2.f) + 1;
872 walker_param->global_resolution.y = 2 * kernel_walker_param->resolution_y;
874 walker_param->global_start.x = 0;
875 walker_param->global_start.y = 0;
877 walker_param->global_outer_loop_stride.x = walker_param->global_resolution.x;
878 walker_param->global_outer_loop_stride.y = 0;
880 walker_param->global_inner_loop_unit.x = 0;
881 walker_param->global_inner_loop_unit.y = walker_param->global_resolution.y;
883 walker_param->block_resolution.x = walker_param->global_resolution.x;
884 walker_param->block_resolution.y = walker_param->global_resolution.y;
886 walker_param->local_start.x = 0;
887 walker_param->local_start.y = 0;
889 walker_param->local_outer_loop_stride.x = 1;
890 walker_param->local_outer_loop_stride.y = 0;
892 walker_param->local_inner_loop_unit.x = -1;
893 walker_param->local_inner_loop_unit.y = 4;
895 walker_param->middle_loop_extra_steps = 3;
896 walker_param->mid_loop_unit_x = 0;
897 walker_param->mid_loop_unit_y = 1;
900 walker_param->scoreboard_mask = 0x0F;
901 walker_param->local_outer_loop_stride.x = 1;
902 walker_param->local_outer_loop_stride.y = 0;
903 walker_param->local_inner_loop_unit.x = -2;
904 walker_param->local_inner_loop_unit.y = 1;
910 gen9_run_kernel_media_object(VADriverContextP ctx,
911 struct intel_encoder_context *encoder_context,
912 struct i965_gpe_context *gpe_context,
914 struct gpe_media_object_parameter *param)
916 struct intel_batchbuffer *batch = encoder_context->base.batch;
917 struct vp9_encode_status_buffer_internal *status_buffer;
918 struct gen9_vp9_state *vp9_state;
919 struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
921 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
922 if (!vp9_state || !batch)
925 intel_batchbuffer_start_atomic(batch, 0x1000);
927 status_buffer = &(vp9_state->status_buffer);
928 memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
929 mi_store_data_imm.bo = status_buffer->bo;
930 mi_store_data_imm.offset = status_buffer->media_index_offset;
931 mi_store_data_imm.dw0 = media_function;
932 gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
934 intel_batchbuffer_emit_mi_flush(batch);
935 gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
936 gen8_gpe_media_object(ctx, gpe_context, batch, param);
937 gen8_gpe_media_state_flush(ctx, gpe_context, batch);
939 gen9_gpe_pipeline_end(ctx, gpe_context, batch);
941 intel_batchbuffer_end_atomic(batch);
943 intel_batchbuffer_flush(batch);
947 gen9_run_kernel_media_object_walker(VADriverContextP ctx,
948 struct intel_encoder_context *encoder_context,
949 struct i965_gpe_context *gpe_context,
951 struct gpe_media_object_walker_parameter *param)
953 struct intel_batchbuffer *batch = encoder_context->base.batch;
954 struct vp9_encode_status_buffer_internal *status_buffer;
955 struct gen9_vp9_state *vp9_state;
956 struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
958 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
959 if (!vp9_state || !batch)
962 intel_batchbuffer_start_atomic(batch, 0x1000);
964 intel_batchbuffer_emit_mi_flush(batch);
966 status_buffer = &(vp9_state->status_buffer);
967 memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
968 mi_store_data_imm.bo = status_buffer->bo;
969 mi_store_data_imm.offset = status_buffer->media_index_offset;
970 mi_store_data_imm.dw0 = media_function;
971 gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
973 gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
974 gen8_gpe_media_object_walker(ctx, gpe_context, batch, param);
975 gen8_gpe_media_state_flush(ctx, gpe_context, batch);
977 gen9_gpe_pipeline_end(ctx, gpe_context, batch);
979 intel_batchbuffer_end_atomic(batch);
981 intel_batchbuffer_flush(batch);
985 void gen9_vp9_set_curbe_brc(VADriverContextP ctx,
986 struct encode_state *encode_state,
987 struct i965_gpe_context *gpe_context,
988 struct intel_encoder_context *encoder_context,
989 struct gen9_vp9_brc_curbe_param *param)
991 VAEncSequenceParameterBufferVP9 *seq_param;
992 VAEncPictureParameterBufferVP9 *pic_param;
993 VAEncMiscParameterTypeVP9PerSegmantParam *segment_param;
994 vp9_brc_curbe_data *cmd;
995 double dbps_ratio, dInputBitsPerFrame;
996 struct gen9_vp9_state *vp9_state;
998 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1000 pic_param = param->ppic_param;
1001 seq_param = param->pseq_param;
1002 segment_param = param->psegment_param;
1004 cmd = i965_gpe_context_map_curbe(gpe_context);
1009 memset(cmd, 0, sizeof(vp9_brc_curbe_data));
1011 if (!vp9_state->dys_enabled) {
1012 cmd->dw0.frame_width = pic_param->frame_width_src;
1013 cmd->dw0.frame_height = pic_param->frame_height_src;
1015 cmd->dw0.frame_width = pic_param->frame_width_dst;
1016 cmd->dw0.frame_height = pic_param->frame_height_dst;
1019 cmd->dw1.frame_type = vp9_state->picture_coding_type;
1020 cmd->dw1.segmentation_enable = 0;
1021 cmd->dw1.ref_frame_flags = vp9_state->ref_frame_flag;
1022 cmd->dw1.num_tlevels = 1;
1024 switch (param->media_state_type) {
1025 case VP9_MEDIA_STATE_BRC_INIT_RESET: {
1026 cmd->dw3.max_level_ratiot0 = 0;
1027 cmd->dw3.max_level_ratiot1 = 0;
1028 cmd->dw3.max_level_ratiot2 = 0;
1029 cmd->dw3.max_level_ratiot3 = 0;
1031 cmd->dw4.profile_level_max_frame = seq_param->max_frame_width *
1032 seq_param->max_frame_height;
1033 cmd->dw5.init_buf_fullness = vp9_state->init_vbv_buffer_fullness_in_bit;
1034 cmd->dw6.buf_size = vp9_state->vbv_buffer_size_in_bit;
1035 cmd->dw7.target_bit_rate = (vp9_state->target_bit_rate + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1037 cmd->dw8.max_bit_rate = (vp9_state->max_bit_rate + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1039 cmd->dw9.min_bit_rate = (vp9_state->min_bit_rate + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1041 cmd->dw10.frame_ratem = vp9_state->framerate.num;
1042 cmd->dw11.frame_rated = vp9_state->framerate.den;
1044 cmd->dw14.avbr_accuracy = 30;
1045 cmd->dw14.avbr_convergence = 150;
1047 if (encoder_context->rate_control_mode == VA_RC_CBR) {
1048 cmd->dw12.brc_flag = BRC_KERNEL_CBR;
1049 cmd->dw8.max_bit_rate = cmd->dw7.target_bit_rate;
1050 cmd->dw9.min_bit_rate = 0;
1051 } else if (encoder_context->rate_control_mode == VA_RC_VBR) {
1052 cmd->dw12.brc_flag = BRC_KERNEL_VBR;
1054 cmd->dw12.brc_flag = BRC_KERNEL_CQL;
1055 cmd->dw16.cq_level = 30;
1057 cmd->dw12.gopp = seq_param->intra_period - 1;
1059 cmd->dw13.init_frame_width = pic_param->frame_width_src;
1060 cmd->dw13.init_frame_height = pic_param->frame_height_src;
1062 cmd->dw15.min_qp = 1;
1063 cmd->dw15.max_qp = 255;
1065 cmd->dw16.cq_level = 30;
1067 cmd->dw17.enable_dynamic_scaling = vp9_state->dys_in_use;
1068 cmd->dw17.brc_overshoot_cbr_pct = 150;
1070 dInputBitsPerFrame = (double)cmd->dw8.max_bit_rate * (double)vp9_state->framerate.den / (double)vp9_state->framerate.num;
1071 dbps_ratio = dInputBitsPerFrame / ((double)vp9_state->vbv_buffer_size_in_bit / 30.0);
1072 if (dbps_ratio < 0.1)
1074 if (dbps_ratio > 3.5)
1077 *param->pbrc_init_reset_buf_size_in_bits = cmd->dw6.buf_size;
1078 *param->pbrc_init_reset_input_bits_per_frame = dInputBitsPerFrame;
1079 *param->pbrc_init_current_target_buf_full_in_bits = cmd->dw6.buf_size >> 1;
1081 cmd->dw18.pframe_deviation_threshold0 = (uint32_t)(-50 * pow(0.90, dbps_ratio));
1082 cmd->dw18.pframe_deviation_threshold1 = (uint32_t)(-50 * pow(0.66, dbps_ratio));
1083 cmd->dw18.pframe_deviation_threshold2 = (uint32_t)(-50 * pow(0.46, dbps_ratio));
1084 cmd->dw18.pframe_deviation_threshold3 = (uint32_t)(-50 * pow(0.3, dbps_ratio));
1085 cmd->dw19.pframe_deviation_threshold4 = (uint32_t)(50 * pow(0.3, dbps_ratio));
1086 cmd->dw19.pframe_deviation_threshold5 = (uint32_t)(50 * pow(0.46, dbps_ratio));
1087 cmd->dw19.pframe_deviation_threshold6 = (uint32_t)(50 * pow(0.7, dbps_ratio));
1088 cmd->dw19.pframe_deviation_threshold7 = (uint32_t)(50 * pow(0.9, dbps_ratio));
1090 cmd->dw20.vbr_deviation_threshold0 = (uint32_t)(-50 * pow(0.9, dbps_ratio));
1091 cmd->dw20.vbr_deviation_threshold1 = (uint32_t)(-50 * pow(0.7, dbps_ratio));
1092 cmd->dw20.vbr_deviation_threshold2 = (uint32_t)(-50 * pow(0.5, dbps_ratio));
1093 cmd->dw20.vbr_deviation_threshold3 = (uint32_t)(-50 * pow(0.3, dbps_ratio));
1094 cmd->dw21.vbr_deviation_threshold4 = (uint32_t)(100 * pow(0.4, dbps_ratio));
1095 cmd->dw21.vbr_deviation_threshold5 = (uint32_t)(100 * pow(0.5, dbps_ratio));
1096 cmd->dw21.vbr_deviation_threshold6 = (uint32_t)(100 * pow(0.75, dbps_ratio));
1097 cmd->dw21.vbr_deviation_threshold7 = (uint32_t)(100 * pow(0.9, dbps_ratio));
1099 cmd->dw22.kframe_deviation_threshold0 = (uint32_t)(-50 * pow(0.8, dbps_ratio));
1100 cmd->dw22.kframe_deviation_threshold1 = (uint32_t)(-50 * pow(0.6, dbps_ratio));
1101 cmd->dw22.kframe_deviation_threshold2 = (uint32_t)(-50 * pow(0.34, dbps_ratio));
1102 cmd->dw22.kframe_deviation_threshold3 = (uint32_t)(-50 * pow(0.2, dbps_ratio));
1103 cmd->dw23.kframe_deviation_threshold4 = (uint32_t)(50 * pow(0.2, dbps_ratio));
1104 cmd->dw23.kframe_deviation_threshold5 = (uint32_t)(50 * pow(0.4, dbps_ratio));
1105 cmd->dw23.kframe_deviation_threshold6 = (uint32_t)(50 * pow(0.66, dbps_ratio));
1106 cmd->dw23.kframe_deviation_threshold7 = (uint32_t)(50 * pow(0.9, dbps_ratio));
1110 case VP9_MEDIA_STATE_BRC_UPDATE: {
1111 cmd->dw15.min_qp = 1;
1112 cmd->dw15.max_qp = 255;
1114 cmd->dw25.frame_number = param->frame_number;
1116 // Used in dynamic scaling. set to zero for now
1117 cmd->dw27.hrd_buffer_fullness_upper_limit = 0;
1118 cmd->dw28.hrd_buffer_fullness_lower_limit = 0;
1120 if (pic_param->pic_flags.bits.segmentation_enabled) {
1121 cmd->dw32.seg_delta_qp0 = segment_param->seg_data[0].segment_qindex_delta;
1122 cmd->dw32.seg_delta_qp1 = segment_param->seg_data[1].segment_qindex_delta;
1123 cmd->dw32.seg_delta_qp2 = segment_param->seg_data[2].segment_qindex_delta;
1124 cmd->dw32.seg_delta_qp3 = segment_param->seg_data[3].segment_qindex_delta;
1126 cmd->dw33.seg_delta_qp4 = segment_param->seg_data[4].segment_qindex_delta;
1127 cmd->dw33.seg_delta_qp5 = segment_param->seg_data[5].segment_qindex_delta;
1128 cmd->dw33.seg_delta_qp6 = segment_param->seg_data[6].segment_qindex_delta;
1129 cmd->dw33.seg_delta_qp7 = segment_param->seg_data[7].segment_qindex_delta;
1132 //cmd->dw34.temporal_id = pPicParams->temporal_idi;
1133 cmd->dw34.temporal_id = 0;
1134 cmd->dw34.multi_ref_qp_check = param->multi_ref_qp_check;
1136 cmd->dw35.max_num_pak_passes = param->brc_num_pak_passes;
1137 cmd->dw35.sync_async = 0;
1138 cmd->dw35.mbrc = param->mbbrc_enabled;
1139 if (*param->pbrc_init_current_target_buf_full_in_bits >
1140 ((double)(*param->pbrc_init_reset_buf_size_in_bits))) {
1141 *param->pbrc_init_current_target_buf_full_in_bits -=
1142 (double)(*param->pbrc_init_reset_buf_size_in_bits);
1143 cmd->dw35.overflow = 1;
1145 cmd->dw35.overflow = 0;
1147 cmd->dw24.target_size = (uint32_t)(*param->pbrc_init_current_target_buf_full_in_bits);
1149 cmd->dw36.segmentation = pic_param->pic_flags.bits.segmentation_enabled;
1151 *param->pbrc_init_current_target_buf_full_in_bits += *param->pbrc_init_reset_input_bits_per_frame;
1153 cmd->dw38.qdelta_ydc = pic_param->luma_dc_qindex_delta;
1154 cmd->dw38.qdelta_uvdc = pic_param->chroma_dc_qindex_delta;
1155 cmd->dw38.qdelta_uvac = pic_param->chroma_ac_qindex_delta;
1159 case VP9_MEDIA_STATE_ENC_I_FRAME_DIST:
1160 cmd->dw2.intra_mode_disable = 0;
1166 cmd->dw48.brc_y4x_input_bti = VP9_BTI_BRC_SRCY4X_G9;
1167 cmd->dw49.brc_vme_coarse_intra_input_bti = VP9_BTI_BRC_VME_COARSE_INTRA_G9;
1168 cmd->dw50.brc_history_buffer_bti = VP9_BTI_BRC_HISTORY_G9;
1169 cmd->dw51.brc_const_data_input_bti = VP9_BTI_BRC_CONSTANT_DATA_G9;
1170 cmd->dw52.brc_distortion_bti = VP9_BTI_BRC_DISTORTION_G9;
1171 cmd->dw53.brc_mmdk_pak_output_bti = VP9_BTI_BRC_MMDK_PAK_OUTPUT_G9;
1172 cmd->dw54.brc_enccurbe_input_bti = VP9_BTI_BRC_MBENC_CURBE_INPUT_G9;
1173 cmd->dw55.brc_enccurbe_output_bti = VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9;
1174 cmd->dw56.brc_pic_state_input_bti = VP9_BTI_BRC_PIC_STATE_INPUT_G9;
1175 cmd->dw57.brc_pic_state_output_bti = VP9_BTI_BRC_PIC_STATE_OUTPUT_G9;
1176 cmd->dw58.brc_seg_state_input_bti = VP9_BTI_BRC_SEGMENT_STATE_INPUT_G9;
1177 cmd->dw59.brc_seg_state_output_bti = VP9_BTI_BRC_SEGMENT_STATE_OUTPUT_G9;
1178 cmd->dw60.brc_bitstream_size_data_bti = VP9_BTI_BRC_BITSTREAM_SIZE_G9;
1179 cmd->dw61.brc_hfw_data_output_bti = VP9_BTI_BRC_HFW_DATA_G9;
1181 i965_gpe_context_unmap_curbe(gpe_context);
1186 gen9_brc_init_reset_add_surfaces_vp9(VADriverContextP ctx,
1187 struct encode_state *encode_state,
1188 struct intel_encoder_context *encoder_context,
1189 struct i965_gpe_context *gpe_context)
1191 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1193 i965_add_buffer_gpe_surface(ctx,
1195 &vme_context->res_brc_history_buffer,
1197 vme_context->res_brc_history_buffer.size,
1199 VP9_BTI_BRC_HISTORY_G9);
1201 i965_add_buffer_2d_gpe_surface(ctx,
1203 &vme_context->s4x_memv_distortion_buffer,
1205 I965_SURFACEFORMAT_R8_UNORM,
1206 VP9_BTI_BRC_DISTORTION_G9);
1209 /* The function related with BRC */
1211 gen9_vp9_brc_init_reset_kernel(VADriverContextP ctx,
1212 struct encode_state *encode_state,
1213 struct intel_encoder_context *encoder_context)
1215 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1216 struct vp9_brc_context *brc_context = &vme_context->brc_context;
1217 struct gpe_media_object_parameter media_object_param;
1218 struct i965_gpe_context *gpe_context;
1219 int gpe_index = VP9_BRC_INIT;
1220 int media_function = VP9_MEDIA_STATE_BRC_INIT_RESET;
1221 struct gen9_vp9_brc_curbe_param brc_initreset_curbe;
1222 VAEncPictureParameterBufferVP9 *pic_param;
1223 struct gen9_vp9_state *vp9_state;
1225 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1227 if (!vp9_state || !vp9_state->pic_param)
1228 return VA_STATUS_ERROR_INVALID_PARAMETER;
1230 pic_param = vp9_state->pic_param;
1232 if (vp9_state->brc_inited)
1233 gpe_index = VP9_BRC_RESET;
1235 gpe_context = &brc_context->gpe_contexts[gpe_index];
1237 gen8_gpe_context_init(ctx, gpe_context);
1238 gen9_gpe_reset_binding_table(ctx, gpe_context);
1240 brc_initreset_curbe.media_state_type = media_function;
1241 brc_initreset_curbe.curr_frame = pic_param->reconstructed_frame;
1242 brc_initreset_curbe.ppic_param = vp9_state->pic_param;
1243 brc_initreset_curbe.pseq_param = vp9_state->seq_param;
1244 brc_initreset_curbe.psegment_param = vp9_state->segment_param;
1245 brc_initreset_curbe.frame_width = vp9_state->frame_width;
1246 brc_initreset_curbe.frame_height = vp9_state->frame_height;
1247 brc_initreset_curbe.pbrc_init_current_target_buf_full_in_bits =
1248 &vp9_state->brc_init_current_target_buf_full_in_bits;
1249 brc_initreset_curbe.pbrc_init_reset_buf_size_in_bits =
1250 &vp9_state->brc_init_reset_buf_size_in_bits;
1251 brc_initreset_curbe.pbrc_init_reset_input_bits_per_frame =
1252 &vp9_state->brc_init_reset_input_bits_per_frame;
1253 brc_initreset_curbe.picture_coding_type = vp9_state->picture_coding_type;
1254 brc_initreset_curbe.initbrc = !vp9_state->brc_inited;
1255 brc_initreset_curbe.mbbrc_enabled = 0;
1256 brc_initreset_curbe.ref_frame_flag = vp9_state->ref_frame_flag;
1258 vme_context->pfn_set_curbe_brc(ctx, encode_state,
1261 &brc_initreset_curbe);
1263 gen9_brc_init_reset_add_surfaces_vp9(ctx, encode_state, encoder_context, gpe_context);
1264 gen8_gpe_setup_interface_data(ctx, gpe_context);
1266 memset(&media_object_param, 0, sizeof(media_object_param));
1267 gen9_run_kernel_media_object(ctx, encoder_context, gpe_context, media_function, &media_object_param);
1269 return VA_STATUS_SUCCESS;
1273 gen9_brc_intra_dist_add_surfaces_vp9(VADriverContextP ctx,
1274 struct encode_state *encode_state,
1275 struct intel_encoder_context *encoder_context,
1276 struct i965_gpe_context *gpe_context)
1278 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1280 struct object_surface *obj_surface;
1281 struct gen9_surface_vp9 *vp9_priv_surface;
1283 /* sScaled4xSurface surface */
1284 obj_surface = encode_state->reconstructed_object;
1286 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
1288 obj_surface = vp9_priv_surface->scaled_4x_surface_obj;
1289 i965_add_2d_gpe_surface(ctx, gpe_context,
1292 I965_SURFACEFORMAT_R8_UNORM,
1293 VP9_BTI_BRC_SRCY4X_G9
1296 i965_add_adv_gpe_surface(ctx, gpe_context,
1298 VP9_BTI_BRC_VME_COARSE_INTRA_G9);
1300 i965_add_buffer_2d_gpe_surface(ctx,
1302 &vme_context->s4x_memv_distortion_buffer,
1304 I965_SURFACEFORMAT_R8_UNORM,
1305 VP9_BTI_BRC_DISTORTION_G9);
1310 /* The function related with BRC */
1312 gen9_vp9_brc_intra_dist_kernel(VADriverContextP ctx,
1313 struct encode_state *encode_state,
1314 struct intel_encoder_context *encoder_context)
1316 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1317 struct vp9_brc_context *brc_context = &vme_context->brc_context;
1318 struct i965_gpe_context *gpe_context;
1319 int gpe_index = VP9_BRC_INTRA_DIST;
1320 int media_function = VP9_MEDIA_STATE_ENC_I_FRAME_DIST;
1321 struct gen9_vp9_brc_curbe_param brc_intra_dist_curbe;
1322 VAEncPictureParameterBufferVP9 *pic_param;
1323 struct gen9_vp9_state *vp9_state;
1324 struct gpe_media_object_walker_parameter media_object_walker_param;
1325 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
1327 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1329 if (!vp9_state || !vp9_state->pic_param)
1330 return VA_STATUS_ERROR_INVALID_PARAMETER;
1332 pic_param = vp9_state->pic_param;
1334 gpe_context = &brc_context->gpe_contexts[gpe_index];
1336 gen8_gpe_context_init(ctx, gpe_context);
1337 gen9_gpe_reset_binding_table(ctx, gpe_context);
1339 brc_intra_dist_curbe.media_state_type = media_function;
1340 brc_intra_dist_curbe.curr_frame = pic_param->reconstructed_frame;
1341 brc_intra_dist_curbe.ppic_param = vp9_state->pic_param;
1342 brc_intra_dist_curbe.pseq_param = vp9_state->seq_param;
1343 brc_intra_dist_curbe.psegment_param = vp9_state->segment_param;
1344 brc_intra_dist_curbe.frame_width = vp9_state->frame_width;
1345 brc_intra_dist_curbe.frame_height = vp9_state->frame_height;
1346 brc_intra_dist_curbe.pbrc_init_current_target_buf_full_in_bits =
1347 &vp9_state->brc_init_current_target_buf_full_in_bits;
1348 brc_intra_dist_curbe.pbrc_init_reset_buf_size_in_bits =
1349 &vp9_state->brc_init_reset_buf_size_in_bits;
1350 brc_intra_dist_curbe.pbrc_init_reset_input_bits_per_frame =
1351 &vp9_state->brc_init_reset_input_bits_per_frame;
1352 brc_intra_dist_curbe.picture_coding_type = vp9_state->picture_coding_type;
1353 brc_intra_dist_curbe.initbrc = !vp9_state->brc_inited;
1354 brc_intra_dist_curbe.mbbrc_enabled = 0;
1355 brc_intra_dist_curbe.ref_frame_flag = vp9_state->ref_frame_flag;
1357 vme_context->pfn_set_curbe_brc(ctx, encode_state,
1360 &brc_intra_dist_curbe);
1362 /* zero distortion buffer */
1363 i965_zero_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
1365 gen9_brc_intra_dist_add_surfaces_vp9(ctx, encode_state, encoder_context, gpe_context);
1366 gen8_gpe_setup_interface_data(ctx, gpe_context);
1368 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
1369 kernel_walker_param.resolution_x = vp9_state->downscaled_width_4x_in_mb;
1370 kernel_walker_param.resolution_y = vp9_state->downscaled_height_4x_in_mb;
1371 kernel_walker_param.no_dependency = 1;
1373 gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
1375 gen9_run_kernel_media_object_walker(ctx, encoder_context,
1378 &media_object_walker_param);
1380 return VA_STATUS_SUCCESS;
1384 intel_vp9enc_construct_picstate_batchbuf(VADriverContextP ctx,
1385 struct encode_state *encode_state,
1386 struct intel_encoder_context *encoder_context,
1387 struct i965_gpe_resource *gpe_resource)
1389 struct gen9_vp9_state *vp9_state;
1390 VAEncPictureParameterBufferVP9 *pic_param;
1391 int frame_width_minus1, frame_height_minus1;
1392 int is_lossless = 0;
1393 int is_intra_only = 0;
1394 unsigned int last_frame_type;
1395 unsigned int ref_flags;
1396 unsigned int use_prev_frame_mvs, adapt_flag;
1397 struct gen9_surface_vp9 *vp9_surface = NULL;
1398 struct object_surface *obj_surface = NULL;
1399 uint32_t scale_h = 0;
1400 uint32_t scale_w = 0;
1404 unsigned int *cmd_ptr, cmd_value, tmp;
1406 pdata = i965_map_gpe_resource(gpe_resource);
1407 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1409 if (!vp9_state || !vp9_state->pic_param || !pdata)
1412 pic_param = vp9_state->pic_param;
1413 frame_width_minus1 = ALIGN(pic_param->frame_width_dst, 8) - 1;
1414 frame_height_minus1 = ALIGN(pic_param->frame_height_dst, 8) - 1;
1415 if ((pic_param->luma_ac_qindex == 0) &&
1416 (pic_param->luma_dc_qindex_delta == 0) &&
1417 (pic_param->chroma_ac_qindex_delta == 0) &&
1418 (pic_param->chroma_dc_qindex_delta == 0))
1421 if (pic_param->pic_flags.bits.frame_type)
1422 is_intra_only = pic_param->pic_flags.bits.intra_only;
1424 last_frame_type = vp9_state->vp9_last_frame.frame_type;
1426 use_prev_frame_mvs = 0;
1427 if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) {
1428 last_frame_type = 0;
1431 ref_flags = ((pic_param->ref_flags.bits.ref_arf_sign_bias << 9) |
1432 (pic_param->ref_flags.bits.ref_gf_sign_bias << 8) |
1433 (pic_param->ref_flags.bits.ref_last_sign_bias << 7)
1435 if (!pic_param->pic_flags.bits.error_resilient_mode &&
1436 (pic_param->frame_width_dst == vp9_state->vp9_last_frame.frame_width) &&
1437 (pic_param->frame_height_dst == vp9_state->vp9_last_frame.frame_height) &&
1438 !pic_param->pic_flags.bits.intra_only &&
1439 vp9_state->vp9_last_frame.show_frame &&
1440 ((vp9_state->vp9_last_frame.frame_type == HCP_VP9_INTER_FRAME) &&
1441 !vp9_state->vp9_last_frame.intra_only)
1443 use_prev_frame_mvs = 1;
1446 if (!pic_param->pic_flags.bits.error_resilient_mode &&
1447 !pic_param->pic_flags.bits.frame_parallel_decoding_mode)
1450 for (i = 0; i < 4; i++) {
1451 uint32_t non_first_pass;
1456 cmd_ptr = (unsigned int *)(pdata + i * VP9_PIC_STATE_BUFFER_SIZE);
1458 *cmd_ptr++ = (HCP_VP9_PIC_STATE | (33 - 2));
1459 *cmd_ptr++ = (frame_height_minus1 << 16 |
1460 frame_width_minus1);
1462 *cmd_ptr++ = (0 << 31 | /* disable segment_in */
1463 0 << 30 | /* disable segment_out */
1464 is_lossless << 29 | /* loseless */
1465 (pic_param->pic_flags.bits.segmentation_enabled && pic_param->pic_flags.bits.segmentation_temporal_update) << 28 | /* temporal update */
1466 (pic_param->pic_flags.bits.segmentation_enabled && pic_param->pic_flags.bits.segmentation_update_map) << 27 | /* temporal update */
1467 (pic_param->pic_flags.bits.segmentation_enabled << 26) |
1468 (pic_param->sharpness_level << 23) |
1469 (pic_param->filter_level << 17) |
1470 (pic_param->pic_flags.bits.frame_parallel_decoding_mode << 16) |
1471 (pic_param->pic_flags.bits.error_resilient_mode << 15) |
1472 (pic_param->pic_flags.bits.refresh_frame_context << 14) |
1473 (last_frame_type << 13) |
1474 (vp9_state->tx_mode == TX_MODE_SELECT) << 12 |
1475 (pic_param->pic_flags.bits.comp_prediction_mode == REFERENCE_MODE_SELECT) << 11 |
1476 (use_prev_frame_mvs) << 10 |
1478 (pic_param->pic_flags.bits.mcomp_filter_type << 4) |
1479 (pic_param->pic_flags.bits.allow_high_precision_mv << 3) |
1480 (is_intra_only << 2) |
1482 (pic_param->pic_flags.bits.frame_type) << 0);
1484 *cmd_ptr++ = ((0 << 28) | /* VP9Profile0 */
1485 (0 << 24) | /* 8-bit depth */
1486 (0 << 22) | /* only 420 format */
1487 (0 << 0) | /* sse statistics */
1488 (pic_param->log2_tile_rows << 8) |
1489 (pic_param->log2_tile_columns << 0));
1492 if (pic_param->pic_flags.bits.frame_type &&
1493 !pic_param->pic_flags.bits.intra_only) {
1494 for (j = 0; j < 3; j++) {
1495 obj_surface = encode_state->reference_objects[j];
1498 if (obj_surface && obj_surface->private_data) {
1499 vp9_surface = obj_surface->private_data;
1500 scale_w = (vp9_surface->frame_width << 14) / pic_param->frame_width_dst;
1501 scale_h = (vp9_surface->frame_height << 14) / pic_param->frame_height_dst;
1502 *cmd_ptr++ = (scale_w << 16 |
1513 for (j = 0; j < 3; j++) {
1514 obj_surface = encode_state->reference_objects[j];
1517 if (obj_surface && obj_surface->private_data) {
1518 vp9_surface = obj_surface->private_data;
1519 *cmd_ptr++ = (vp9_surface->frame_height - 1) << 16 |
1520 (vp9_surface->frame_width - 1);
1527 *cmd_ptr++ = (1 << 1);
1531 *cmd_ptr++ = ((1 << 25) | /* header insertation for VP9 */
1532 (0 << 24) | /* tail insertation */
1533 (pic_param->luma_ac_qindex << 16) |
1534 0 /* compressed header bin count */);
1537 tmp = intel_convert_sign_mag(pic_param->luma_dc_qindex_delta, 5);
1538 cmd_value = (tmp << 16);
1539 tmp = intel_convert_sign_mag(pic_param->chroma_dc_qindex_delta, 5);
1540 cmd_value |= (tmp << 8);
1541 tmp = intel_convert_sign_mag(pic_param->chroma_ac_qindex_delta, 5);
1543 *cmd_ptr++ = cmd_value;
1545 tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[0], 7);
1547 tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[1], 7);
1548 cmd_value |= (tmp << 8);
1549 tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[2], 7);
1550 cmd_value |= (tmp << 16);
1551 tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[3], 7);
1552 cmd_value |= (tmp << 24);
1553 *cmd_ptr++ = cmd_value;
1556 tmp = intel_convert_sign_mag(pic_param->mode_lf_delta[0], 7);
1558 tmp = intel_convert_sign_mag(pic_param->mode_lf_delta[1], 7);
1559 cmd_value |= (tmp << 8);
1560 *cmd_ptr++ = cmd_value;
1563 *cmd_ptr++ = vp9_state->frame_header.bit_offset_ref_lf_delta |
1564 (vp9_state->frame_header.bit_offset_mode_lf_delta << 16);
1565 *cmd_ptr++ = vp9_state->frame_header.bit_offset_qindex |
1566 (vp9_state->frame_header.bit_offset_lf_level << 16);
1569 *cmd_ptr++ = (1 << 26 | (1 << 25) |
1570 non_first_pass << 16);
1572 *cmd_ptr++ = (1 << 31) | (256);
1575 *cmd_ptr++ = (0 << 31) | 1;
1577 /* dw22-dw24. Frame_delta_qindex_range */
1582 /* dw25-26. frame_delta_lf_range */
1586 /* dw27. frame_delta_lf_min */
1595 *cmd_ptr++ = (0 << 30) | 1;
1597 *cmd_ptr++ = vp9_state->frame_header.bit_offset_first_partition_size;
1600 *cmd_ptr++ = MI_BATCH_BUFFER_END;
1603 i965_unmap_gpe_resource(gpe_resource);
1607 gen9_brc_update_add_surfaces_vp9(VADriverContextP ctx,
1608 struct encode_state *encode_state,
1609 struct intel_encoder_context *encoder_context,
1610 struct i965_gpe_context *brc_gpe_context,
1611 struct i965_gpe_context *mbenc_gpe_context)
1613 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1615 /* 0. BRC history buffer */
1616 i965_add_buffer_gpe_surface(ctx,
1618 &vme_context->res_brc_history_buffer,
1620 vme_context->res_brc_history_buffer.size,
1622 VP9_BTI_BRC_HISTORY_G9);
1624 /* 1. Constant data buffer */
1625 i965_add_buffer_gpe_surface(ctx,
1627 &vme_context->res_brc_const_data_buffer,
1629 vme_context->res_brc_const_data_buffer.size,
1631 VP9_BTI_BRC_CONSTANT_DATA_G9);
1633 /* 2. Distortion 2D surface buffer */
1634 i965_add_buffer_2d_gpe_surface(ctx,
1636 &vme_context->s4x_memv_distortion_buffer,
1638 I965_SURFACEFORMAT_R8_UNORM,
1639 VP9_BTI_BRC_DISTORTION_G9);
1642 i965_add_buffer_gpe_surface(ctx,
1644 &vme_context->res_brc_mmdk_pak_buffer,
1646 vme_context->res_brc_mmdk_pak_buffer.size,
1648 VP9_BTI_BRC_MMDK_PAK_OUTPUT_G9);
1649 /* 4. Mbenc curbe input buffer */
1650 gen9_add_dri_buffer_gpe_surface(ctx,
1652 mbenc_gpe_context->curbe.bo,
1654 ALIGN(mbenc_gpe_context->curbe.length, 64),
1655 mbenc_gpe_context->curbe.offset,
1656 VP9_BTI_BRC_MBENC_CURBE_INPUT_G9);
1657 /* 5. Mbenc curbe output buffer */
1658 gen9_add_dri_buffer_gpe_surface(ctx,
1660 mbenc_gpe_context->curbe.bo,
1662 ALIGN(mbenc_gpe_context->curbe.length, 64),
1663 mbenc_gpe_context->curbe.offset,
1664 VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9);
1666 /* 6. BRC_PIC_STATE read buffer */
1667 i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1668 &vme_context->res_pic_state_brc_read_buffer,
1670 vme_context->res_pic_state_brc_read_buffer.size,
1672 VP9_BTI_BRC_PIC_STATE_INPUT_G9);
1674 /* 7. BRC_PIC_STATE write buffer */
1675 i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1676 &vme_context->res_pic_state_brc_write_hfw_read_buffer,
1678 vme_context->res_pic_state_brc_write_hfw_read_buffer.size,
1680 VP9_BTI_BRC_PIC_STATE_OUTPUT_G9);
1682 /* 8. SEGMENT_STATE read buffer */
1683 i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1684 &vme_context->res_seg_state_brc_read_buffer,
1686 vme_context->res_seg_state_brc_read_buffer.size,
1688 VP9_BTI_BRC_SEGMENT_STATE_INPUT_G9);
1690 /* 9. SEGMENT_STATE write buffer */
1691 i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1692 &vme_context->res_seg_state_brc_write_buffer,
1694 vme_context->res_seg_state_brc_write_buffer.size,
1696 VP9_BTI_BRC_SEGMENT_STATE_OUTPUT_G9);
1698 /* 10. Bitstream size buffer */
1699 i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1700 &vme_context->res_brc_bitstream_size_buffer,
1702 vme_context->res_brc_bitstream_size_buffer.size,
1704 VP9_BTI_BRC_BITSTREAM_SIZE_G9);
1706 i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1707 &vme_context->res_brc_hfw_data_buffer,
1709 vme_context->res_brc_hfw_data_buffer.size,
1711 VP9_BTI_BRC_HFW_DATA_G9);
1717 gen9_vp9_brc_update_kernel(VADriverContextP ctx,
1718 struct encode_state *encode_state,
1719 struct intel_encoder_context *encoder_context)
1721 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1722 struct vp9_brc_context *brc_context = &vme_context->brc_context;
1723 struct i965_gpe_context *brc_gpe_context, *mbenc_gpe_context;
1724 int mbenc_index, gpe_index = VP9_BRC_UPDATE;
1725 int media_function = VP9_MEDIA_STATE_BRC_UPDATE;
1727 struct gen9_vp9_brc_curbe_param brc_update_curbe_param;
1728 VAEncPictureParameterBufferVP9 *pic_param;
1729 struct gen9_vp9_state *vp9_state;
1730 struct gen9_vp9_mbenc_curbe_param mbenc_curbe_param;
1731 struct gpe_media_object_parameter media_object_param;
1733 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1734 if (!vp9_state || !vp9_state->pic_param)
1735 return VA_STATUS_ERROR_INVALID_PARAMETER;
1737 pic_param = vp9_state->pic_param;
1738 // Setup VP9 MbEnc Curbe
1739 if (vp9_state->picture_coding_type) {
1740 mbenc_function = VP9_MEDIA_STATE_MBENC_P;
1741 mbenc_index = VP9_MBENC_IDX_INTER;
1743 mbenc_function = VP9_MEDIA_STATE_MBENC_I_32x32;
1744 mbenc_index = VP9_MBENC_IDX_KEY_32x32;
1747 mbenc_gpe_context = &(vme_context->mbenc_context.gpe_contexts[mbenc_index]);
1749 memset(&mbenc_curbe_param, 0, sizeof(mbenc_curbe_param));
1751 mbenc_curbe_param.ppic_param = vp9_state->pic_param;
1752 mbenc_curbe_param.pseq_param = vp9_state->seq_param;
1753 mbenc_curbe_param.psegment_param = vp9_state->segment_param;
1754 //mbenc_curbe_param.ppRefList = &(vp9_state->pRefList[0]);
1755 mbenc_curbe_param.last_ref_obj = vp9_state->last_ref_obj;
1756 mbenc_curbe_param.golden_ref_obj = vp9_state->golden_ref_obj;
1757 mbenc_curbe_param.alt_ref_obj = vp9_state->alt_ref_obj;
1758 mbenc_curbe_param.frame_width_in_mb = ALIGN(vp9_state->frame_width, 16) / 16;
1759 mbenc_curbe_param.frame_height_in_mb = ALIGN(vp9_state->frame_height, 16) / 16;
1760 mbenc_curbe_param.hme_enabled = vp9_state->hme_enabled;
1761 mbenc_curbe_param.ref_frame_flag = vp9_state->ref_frame_flag;
1762 mbenc_curbe_param.multi_ref_qp_check = vp9_state->multi_ref_qp_check;
1763 mbenc_curbe_param.picture_coding_type = vp9_state->picture_coding_type;
1764 mbenc_curbe_param.media_state_type = mbenc_function;
1766 vme_context->pfn_set_curbe_mbenc(ctx, encode_state,
1769 &mbenc_curbe_param);
1771 vp9_state->mbenc_curbe_set_in_brc_update = true;
1773 brc_gpe_context = &brc_context->gpe_contexts[gpe_index];
1775 gen8_gpe_context_init(ctx, brc_gpe_context);
1776 gen9_gpe_reset_binding_table(ctx, brc_gpe_context);
1778 memset(&brc_update_curbe_param, 0, sizeof(brc_update_curbe_param));
1780 // Setup BRC Update Curbe
1781 brc_update_curbe_param.media_state_type = media_function;
1782 brc_update_curbe_param.curr_frame = pic_param->reconstructed_frame;
1783 brc_update_curbe_param.ppic_param = vp9_state->pic_param;
1784 brc_update_curbe_param.pseq_param = vp9_state->seq_param;
1785 brc_update_curbe_param.psegment_param = vp9_state->segment_param;
1786 brc_update_curbe_param.picture_coding_type = vp9_state->picture_coding_type;
1787 brc_update_curbe_param.frame_width_in_mb = ALIGN(vp9_state->frame_width, 16) / 16;
1788 brc_update_curbe_param.frame_height_in_mb = ALIGN(vp9_state->frame_height, 16) / 16;
1789 brc_update_curbe_param.hme_enabled = vp9_state->hme_enabled;
1790 brc_update_curbe_param.b_used_ref = 1;
1791 brc_update_curbe_param.frame_number = vp9_state->frame_number;
1792 brc_update_curbe_param.ref_frame_flag = vp9_state->ref_frame_flag;
1793 brc_update_curbe_param.mbbrc_enabled = 0;
1794 brc_update_curbe_param.multi_ref_qp_check = vp9_state->multi_ref_qp_check;
1795 brc_update_curbe_param.brc_num_pak_passes = vp9_state->num_pak_passes;
1797 brc_update_curbe_param.pbrc_init_current_target_buf_full_in_bits =
1798 &vp9_state->brc_init_current_target_buf_full_in_bits;
1799 brc_update_curbe_param.pbrc_init_reset_buf_size_in_bits =
1800 &vp9_state->brc_init_reset_buf_size_in_bits;
1801 brc_update_curbe_param.pbrc_init_reset_input_bits_per_frame =
1802 &vp9_state->brc_init_reset_input_bits_per_frame;
1804 vme_context->pfn_set_curbe_brc(ctx, encode_state,
1807 &brc_update_curbe_param);
1810 // Check if the constant data surface is present
1811 if (vp9_state->brc_constant_buffer_supported) {
1812 char *brc_const_buffer;
1813 brc_const_buffer = i965_map_gpe_resource(&vme_context->res_brc_const_data_buffer);
1815 if (!brc_const_buffer)
1816 return VA_STATUS_ERROR_OPERATION_FAILED;
1818 if (vp9_state->picture_coding_type)
1819 memcpy(brc_const_buffer, vp9_brc_const_data_p_g9,
1820 sizeof(vp9_brc_const_data_p_g9));
1822 memcpy(brc_const_buffer, vp9_brc_const_data_i_g9,
1823 sizeof(vp9_brc_const_data_i_g9));
1825 i965_unmap_gpe_resource(&vme_context->res_brc_const_data_buffer);
1828 if (pic_param->pic_flags.bits.segmentation_enabled) {
1829 //reallocate the vme_state->mb_segment_map_surface
1830 /* this will be added later */
1834 pic_param->filter_level = 0;
1835 // clear the filter level value in picParams ebfore programming pic state, as this value will be determined and updated by BRC.
1836 intel_vp9enc_construct_picstate_batchbuf(ctx, encode_state,
1837 encoder_context, &vme_context->res_pic_state_brc_read_buffer);
1840 gen9_brc_update_add_surfaces_vp9(ctx, encode_state,
1845 gen8_gpe_setup_interface_data(ctx, brc_gpe_context);
1846 memset(&media_object_param, 0, sizeof(media_object_param));
1847 gen9_run_kernel_media_object(ctx, encoder_context,
1850 &media_object_param);
1851 return VA_STATUS_SUCCESS;
1855 void gen9_vp9_set_curbe_me(VADriverContextP ctx,
1856 struct encode_state *encode_state,
1857 struct i965_gpe_context *gpe_context,
1858 struct intel_encoder_context *encoder_context,
1859 struct gen9_vp9_me_curbe_param *param)
1861 vp9_me_curbe_data *me_cmd;
1862 int enc_media_state;
1864 unsigned int width, height;
1865 uint32_t l0_ref_frames;
1866 uint32_t scale_factor;
1868 if (param->b16xme_enabled) {
1869 if (param->use_16x_me)
1870 me_mode = VP9_ENC_ME16X_BEFORE_ME4X;
1872 me_mode = VP9_ENC_ME4X_AFTER_ME16X;
1874 me_mode = VP9_ENC_ME4X_ONLY;
1877 if (me_mode == VP9_ENC_ME16X_BEFORE_ME4X)
1882 if (param->use_16x_me)
1883 enc_media_state = VP9_MEDIA_STATE_16X_ME;
1885 enc_media_state = VP9_MEDIA_STATE_4X_ME;
1887 me_cmd = i965_gpe_context_map_curbe(gpe_context);
1892 memset(me_cmd, 0, sizeof(vp9_me_curbe_data));
1894 me_cmd->dw1.max_num_mvs = 0x10;
1895 me_cmd->dw1.bi_weight = 0x00;
1897 me_cmd->dw2.max_num_su = 0x39;
1898 me_cmd->dw2.max_len_sp = 0x39;
1900 me_cmd->dw3.sub_mb_part_mask = 0x77;
1901 me_cmd->dw3.inter_sad = 0x00;
1902 me_cmd->dw3.intra_sad = 0x00;
1903 me_cmd->dw3.bme_disable_fbr = 0x01;
1904 me_cmd->dw3.sub_pel_mode = 0x03;
1906 width = param->frame_width / scale_factor;
1907 height = param->frame_height / scale_factor;
1909 me_cmd->dw4.picture_width = ALIGN(width, 16) / 16;
1910 me_cmd->dw4.picture_height_minus1 = ALIGN(height, 16) / 16 - 1;
1912 me_cmd->dw5.ref_width = 0x30;
1913 me_cmd->dw5.ref_height = 0x28;
1915 if (enc_media_state == VP9_MEDIA_STATE_4X_ME)
1916 me_cmd->dw6.write_distortions = 0x01;
1918 me_cmd->dw6.use_mv_from_prev_step = me_mode == VP9_ENC_ME4X_AFTER_ME16X ? 1 : 0;
1919 me_cmd->dw6.super_combine_dist = 0x5;
1920 me_cmd->dw6.max_vmvr = 0x7fc;
1922 l0_ref_frames = (param->ref_frame_flag & 0x01) +
1923 !!(param->ref_frame_flag & 0x02) +
1924 !!(param->ref_frame_flag & 0x04);
1925 me_cmd->dw13.num_ref_idx_l0_minus1 = (l0_ref_frames > 0) ? l0_ref_frames - 1 : 0;
1926 me_cmd->dw13.num_ref_idx_l1_minus1 = 0;
1928 me_cmd->dw14.l0_ref_pic_polarity_bits = 0;
1929 me_cmd->dw14.l1_ref_pic_polarity_bits = 0;
1931 me_cmd->dw15.mv_shift_factor = 0x02;
1934 memcpy((void *)((char *)me_cmd + 64),
1935 vp9_diamond_ime_search_path_delta,
1936 sizeof(vp9_diamond_ime_search_path_delta));
1940 me_cmd->dw32._4x_memv_output_data_surf_index = VP9_BTI_ME_MV_DATA_SURFACE;
1941 me_cmd->dw33._16x_32x_memv_input_data_surf_index = VP9_BTI_16XME_MV_DATA_SURFACE;
1942 me_cmd->dw34._4x_me_output_dist_surf_index = VP9_BTI_ME_DISTORTION_SURFACE;
1943 me_cmd->dw35._4x_me_output_brc_dist_surf_index = VP9_BTI_ME_BRC_DISTORTION_SURFACE;
1944 me_cmd->dw36.vme_fwd_inter_pred_surf_index = VP9_BTI_ME_CURR_PIC_L0;
1945 me_cmd->dw37.vme_bdw_inter_pred_surf_index = VP9_BTI_ME_CURR_PIC_L1;
1947 i965_gpe_context_unmap_curbe(gpe_context);
1951 gen9_vp9_send_me_surface(VADriverContextP ctx,
1952 struct encode_state *encode_state,
1953 struct i965_gpe_context *gpe_context,
1954 struct intel_encoder_context *encoder_context,
1955 struct gen9_vp9_me_surface_param *param)
1957 struct i965_driver_data *i965 = i965_driver_data(ctx);
1958 struct object_surface *obj_surface;
1959 struct gen9_surface_vp9 *vp9_priv_surface;
1960 struct object_surface *input_surface;
1961 struct i965_gpe_resource *gpe_resource;
1964 obj_surface = SURFACE(param->curr_pic);
1966 if (!obj_surface || !obj_surface->private_data)
1969 vp9_priv_surface = obj_surface->private_data;
1970 if (param->use_16x_me) {
1971 gpe_resource = param->pres_16x_memv_data_buffer;
1973 gpe_resource = param->pres_4x_memv_data_buffer;
1976 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1979 I965_SURFACEFORMAT_R8_UNORM,
1980 VP9_BTI_ME_MV_DATA_SURFACE);
1982 if (param->b16xme_enabled) {
1983 gpe_resource = param->pres_16x_memv_data_buffer;
1984 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1987 I965_SURFACEFORMAT_R8_UNORM,
1988 VP9_BTI_16XME_MV_DATA_SURFACE);
1991 if (!param->use_16x_me) {
1992 gpe_resource = param->pres_me_brc_distortion_buffer;
1994 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1997 I965_SURFACEFORMAT_R8_UNORM,
1998 VP9_BTI_ME_BRC_DISTORTION_SURFACE);
2000 gpe_resource = param->pres_me_distortion_buffer;
2002 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
2005 I965_SURFACEFORMAT_R8_UNORM,
2006 VP9_BTI_ME_DISTORTION_SURFACE);
2009 if (param->use_16x_me)
2010 input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2012 input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2014 i965_add_adv_gpe_surface(ctx, gpe_context,
2016 VP9_BTI_ME_CURR_PIC_L0);
2018 ref_bti = VP9_BTI_ME_CURR_PIC_L0 + 1;
2021 if (param->last_ref_pic) {
2022 obj_surface = param->last_ref_pic;
2023 vp9_priv_surface = obj_surface->private_data;
2025 if (param->use_16x_me)
2026 input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2028 input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2030 if (param->dys_enabled &&
2031 ((vp9_priv_surface->frame_width != param->frame_width) ||
2032 (vp9_priv_surface->frame_height != param->frame_height))) {
2033 if (param->use_16x_me)
2034 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2036 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2038 i965_add_adv_gpe_surface(ctx, gpe_context,
2041 i965_add_adv_gpe_surface(ctx, gpe_context,
2047 if (param->golden_ref_pic) {
2048 obj_surface = param->golden_ref_pic;
2049 vp9_priv_surface = obj_surface->private_data;
2051 if (param->use_16x_me)
2052 input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2054 input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2056 if (param->dys_enabled &&
2057 ((vp9_priv_surface->frame_width != param->frame_width) ||
2058 (vp9_priv_surface->frame_height != param->frame_height))) {
2059 if (param->use_16x_me)
2060 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2062 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2065 i965_add_adv_gpe_surface(ctx, gpe_context,
2068 i965_add_adv_gpe_surface(ctx, gpe_context,
2074 if (param->alt_ref_pic) {
2075 obj_surface = param->alt_ref_pic;
2076 vp9_priv_surface = obj_surface->private_data;
2078 if (param->use_16x_me)
2079 input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2081 input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2083 if (param->dys_enabled &&
2084 ((vp9_priv_surface->frame_width != param->frame_width) ||
2085 (vp9_priv_surface->frame_height != param->frame_height))) {
2086 if (param->use_16x_me)
2087 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2089 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2091 i965_add_adv_gpe_surface(ctx, gpe_context,
2094 i965_add_adv_gpe_surface(ctx, gpe_context,
2104 void gen9_me_add_surfaces_vp9(VADriverContextP ctx,
2105 struct encode_state *encode_state,
2106 struct intel_encoder_context *encoder_context,
2107 struct i965_gpe_context *gpe_context,
2110 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2111 struct gen9_vp9_me_surface_param me_surface_param;
2112 struct gen9_vp9_state *vp9_state;
2114 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
2116 /* sScaled4xSurface surface */
2117 memset(&me_surface_param, 0, sizeof(me_surface_param));
2118 me_surface_param.last_ref_pic = vp9_state->last_ref_obj;
2119 me_surface_param.golden_ref_pic = vp9_state->golden_ref_obj;
2120 me_surface_param.alt_ref_pic = vp9_state->alt_ref_obj;
2121 me_surface_param.curr_pic = vp9_state->curr_frame;
2122 me_surface_param.pres_4x_memv_data_buffer = &vme_context->s4x_memv_data_buffer;
2123 me_surface_param.pres_16x_memv_data_buffer = &vme_context->s16x_memv_data_buffer;
2124 me_surface_param.pres_me_distortion_buffer = &vme_context->s4x_memv_distortion_buffer;
2125 me_surface_param.pres_me_brc_distortion_buffer = &vme_context->s4x_memv_distortion_buffer;
2128 me_surface_param.downscaled_width_in_mb = vp9_state->downscaled_width_16x_in_mb;
2129 me_surface_param.downscaled_height_in_mb = vp9_state->downscaled_height_16x_in_mb;
2131 me_surface_param.downscaled_width_in_mb = vp9_state->downscaled_width_4x_in_mb;
2132 me_surface_param.downscaled_height_in_mb = vp9_state->downscaled_height_4x_in_mb;
2134 me_surface_param.frame_width = vp9_state->frame_width;
2135 me_surface_param.frame_height = vp9_state->frame_height;
2137 me_surface_param.use_16x_me = use_16x_me;
2138 me_surface_param.b16xme_enabled = vp9_state->b16xme_enabled;
2139 me_surface_param.dys_enabled = vp9_state->dys_in_use;
2141 vme_context->pfn_send_me_surface(ctx, encode_state,
2149 gen9_vp9_me_kernel(VADriverContextP ctx,
2150 struct encode_state *encode_state,
2151 struct intel_encoder_context *encoder_context,
2154 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2155 struct i965_gpe_context *gpe_context;
2157 struct gen9_vp9_me_curbe_param me_curbe_param;
2158 struct gen9_vp9_state *vp9_state;
2159 struct gpe_media_object_walker_parameter media_object_walker_param;
2160 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
2162 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2163 if (!vp9_state || !vp9_state->pic_param)
2164 return VA_STATUS_ERROR_INVALID_PARAMETER;
2167 media_function = VP9_MEDIA_STATE_16X_ME;
2169 media_function = VP9_MEDIA_STATE_4X_ME;
2171 gpe_context = &(vme_context->me_context.gpe_context);
2173 gen8_gpe_context_init(ctx, gpe_context);
2174 gen9_gpe_reset_binding_table(ctx, gpe_context);
2176 memset(&me_curbe_param, 0, sizeof(me_curbe_param));
2177 me_curbe_param.ppic_param = vp9_state->pic_param;
2178 me_curbe_param.pseq_param = vp9_state->seq_param;
2179 me_curbe_param.frame_width = vp9_state->frame_width;
2180 me_curbe_param.frame_height = vp9_state->frame_height;
2181 me_curbe_param.ref_frame_flag = vp9_state->ref_frame_flag;
2182 me_curbe_param.use_16x_me = use_16x_me;
2183 me_curbe_param.b16xme_enabled = vp9_state->b16xme_enabled;
2184 vme_context->pfn_set_curbe_me(ctx, encode_state,
2189 gen9_me_add_surfaces_vp9(ctx, encode_state,
2194 gen8_gpe_setup_interface_data(ctx, gpe_context);
2196 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2198 kernel_walker_param.resolution_x = vp9_state->downscaled_width_16x_in_mb;
2199 kernel_walker_param.resolution_y = vp9_state->downscaled_height_16x_in_mb;
2201 kernel_walker_param.resolution_x = vp9_state->downscaled_width_4x_in_mb;
2202 kernel_walker_param.resolution_y = vp9_state->downscaled_height_4x_in_mb;
2204 kernel_walker_param.no_dependency = 1;
2206 gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2208 gen9_run_kernel_media_object_walker(ctx, encoder_context,
2211 &media_object_walker_param);
2213 return VA_STATUS_SUCCESS;
2217 gen9_vp9_set_curbe_scaling_cm(VADriverContextP ctx,
2218 struct encode_state *encode_state,
2219 struct i965_gpe_context *gpe_context,
2220 struct intel_encoder_context *encoder_context,
2221 struct gen9_vp9_scaling_curbe_param *curbe_param)
2223 vp9_scaling4x_curbe_data_cm *curbe_cmd;
2225 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2230 memset(curbe_cmd, 0, sizeof(vp9_scaling4x_curbe_data_cm));
2232 curbe_cmd->dw0.input_picture_width = curbe_param->input_picture_width;
2233 curbe_cmd->dw0.input_picture_height = curbe_param->input_picture_height;
2235 curbe_cmd->dw1.input_y_bti = VP9_BTI_SCALING_FRAME_SRC_Y;
2236 curbe_cmd->dw2.output_y_bti = VP9_BTI_SCALING_FRAME_DST_Y;
2239 curbe_cmd->dw6.enable_mb_variance_output = 0;
2240 curbe_cmd->dw6.enable_mb_pixel_average_output = 0;
2241 curbe_cmd->dw6.enable_blk8x8_stat_output = 0;
2243 if (curbe_param->mb_variance_output_enabled ||
2244 curbe_param->mb_pixel_average_output_enabled) {
2245 curbe_cmd->dw10.mbv_proc_stat_bti = VP9_BTI_SCALING_FRAME_MBVPROCSTATS_DST_CM;
2248 i965_gpe_context_unmap_curbe(gpe_context);
2253 gen9_vp9_send_scaling_surface(VADriverContextP ctx,
2254 struct encode_state *encode_state,
2255 struct i965_gpe_context *gpe_context,
2256 struct intel_encoder_context *encoder_context,
2257 struct gen9_vp9_scaling_surface_param *scaling_surface_param)
2259 vp9_bti_scaling_offset *scaling_bti;
2260 unsigned int surface_format;
2262 scaling_bti = scaling_surface_param->p_scaling_bti;
2264 if (scaling_surface_param->scaling_out_use_32unorm_surf_fmt)
2265 surface_format = I965_SURFACEFORMAT_R32_UNORM;
2266 else if (scaling_surface_param->scaling_out_use_16unorm_surf_fmt)
2267 surface_format = I965_SURFACEFORMAT_R16_UNORM;
2269 surface_format = I965_SURFACEFORMAT_R8_UNORM;
2271 i965_add_2d_gpe_surface(ctx, gpe_context,
2272 scaling_surface_param->input_surface,
2273 0, 1, surface_format,
2274 scaling_bti->scaling_frame_src_y);
2276 i965_add_2d_gpe_surface(ctx, gpe_context,
2277 scaling_surface_param->output_surface,
2278 0, 1, surface_format,
2279 scaling_bti->scaling_frame_dst_y);
2286 gen9_vp9_scaling_kernel(VADriverContextP ctx,
2287 struct encode_state *encode_state,
2288 struct intel_encoder_context *encoder_context,
2289 int use_16x_scaling)
2291 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2292 struct i965_gpe_context *gpe_context;
2294 struct gen9_vp9_scaling_curbe_param scaling_curbe_param;
2295 struct gen9_vp9_scaling_surface_param scaling_surface_param;
2296 struct gen9_vp9_state *vp9_state;
2297 VAEncPictureParameterBufferVP9 *pic_param;
2298 struct gpe_media_object_walker_parameter media_object_walker_param;
2299 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
2300 struct object_surface *obj_surface;
2301 struct object_surface *input_surface, *output_surface;
2302 struct gen9_surface_vp9 *vp9_priv_surface;
2303 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
2304 unsigned int input_frame_width, input_frame_height;
2305 unsigned int output_frame_width, output_frame_height;
2307 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2308 if (!vp9_state || !vp9_state->pic_param)
2309 return VA_STATUS_ERROR_INVALID_PARAMETER;
2311 pic_param = vp9_state->pic_param;
2313 if (use_16x_scaling)
2314 media_function = VP9_MEDIA_STATE_16X_SCALING;
2316 media_function = VP9_MEDIA_STATE_4X_SCALING;
2318 gpe_context = &(vme_context->scaling_context.gpe_contexts[0]);
2320 gen8_gpe_context_init(ctx, gpe_context);
2321 gen9_gpe_reset_binding_table(ctx, gpe_context);
2323 obj_surface = encode_state->reconstructed_object;
2324 vp9_priv_surface = obj_surface->private_data;
2326 if (use_16x_scaling) {
2327 downscaled_width_in_mb = vp9_state->downscaled_width_16x_in_mb;
2328 downscaled_height_in_mb = vp9_state->downscaled_height_16x_in_mb;
2330 input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2331 input_frame_width = vp9_state->frame_width_4x;
2332 input_frame_height = vp9_state->frame_height_4x;
2334 output_surface = vp9_priv_surface->scaled_16x_surface_obj;
2335 output_frame_width = vp9_state->frame_width_16x;
2336 output_frame_height = vp9_state->frame_height_16x;
2338 downscaled_width_in_mb = vp9_state->downscaled_width_4x_in_mb;
2339 downscaled_height_in_mb = vp9_state->downscaled_height_4x_in_mb;
2341 if (vp9_state->dys_in_use &&
2342 ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
2343 (pic_param->frame_height_src != pic_param->frame_height_dst)))
2344 input_surface = vp9_priv_surface->dys_surface_obj;
2346 input_surface = encode_state->input_yuv_object;
2348 input_frame_width = vp9_state->frame_width;
2349 input_frame_height = vp9_state->frame_height;
2351 output_surface = vp9_priv_surface->scaled_4x_surface_obj;
2352 output_frame_width = vp9_state->frame_width_4x;
2353 output_frame_height = vp9_state->frame_height_4x;
2356 memset(&scaling_curbe_param, 0, sizeof(scaling_curbe_param));
2358 scaling_curbe_param.input_picture_width = input_frame_width;
2359 scaling_curbe_param.input_picture_height = input_frame_height;
2361 scaling_curbe_param.use_16x_scaling = use_16x_scaling;
2362 scaling_curbe_param.use_32x_scaling = 0;
2364 if (use_16x_scaling)
2365 scaling_curbe_param.mb_variance_output_enabled = 0;
2367 scaling_curbe_param.mb_variance_output_enabled = vp9_state->adaptive_transform_decision_enabled;
2369 scaling_curbe_param.blk8x8_stat_enabled = 0;
2371 vme_context->pfn_set_curbe_scaling(ctx, encode_state,
2374 &scaling_curbe_param);
2376 memset(&scaling_surface_param, 0, sizeof(scaling_surface_param));
2377 scaling_surface_param.p_scaling_bti = (void *)(&vme_context->scaling_context.scaling_4x_bti);
2378 scaling_surface_param.input_surface = input_surface;
2379 scaling_surface_param.input_frame_width = input_frame_width;
2380 scaling_surface_param.input_frame_height = input_frame_height;
2382 scaling_surface_param.output_surface = output_surface;
2383 scaling_surface_param.output_frame_width = output_frame_width;
2384 scaling_surface_param.output_frame_height = output_frame_height;
2385 scaling_surface_param.scaling_out_use_16unorm_surf_fmt = 0;
2386 scaling_surface_param.scaling_out_use_32unorm_surf_fmt = 1;
2388 vme_context->pfn_send_scaling_surface(ctx, encode_state,
2391 &scaling_surface_param);
2393 gen8_gpe_setup_interface_data(ctx, gpe_context);
2395 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2396 /* the scaling is based on 8x8 blk level */
2397 kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
2398 kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
2399 kernel_walker_param.no_dependency = 1;
2401 gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2403 gen9_run_kernel_media_object_walker(ctx, encoder_context,
2406 &media_object_walker_param);
2408 return VA_STATUS_SUCCESS;
2412 gen9_vp9_dys_set_sampler_state(struct i965_gpe_context *gpe_context)
2414 struct gen9_sampler_8x8_avs *sampler_cmd;
2419 dri_bo_map(gpe_context->sampler.bo, 1);
2421 if (!gpe_context->sampler.bo->virtual)
2424 sampler_cmd = (struct gen9_sampler_8x8_avs *)
2425 (gpe_context->sampler.bo->virtual + gpe_context->sampler.offset);
2427 memset(sampler_cmd, 0, sizeof(struct gen9_sampler_8x8_avs));
2429 sampler_cmd->dw0.r3c_coefficient = 15;
2430 sampler_cmd->dw0.r3x_coefficient = 6;
2431 sampler_cmd->dw0.strong_edge_threshold = 8;
2432 sampler_cmd->dw0.weak_edge_threshold = 1;
2433 sampler_cmd->dw0.gain_factor = 32;
2435 sampler_cmd->dw2.r5c_coefficient = 3;
2436 sampler_cmd->dw2.r5cx_coefficient = 8;
2437 sampler_cmd->dw2.r5x_coefficient = 9;
2438 sampler_cmd->dw2.strong_edge_weight = 6;
2439 sampler_cmd->dw2.regular_weight = 3;
2440 sampler_cmd->dw2.non_edge_weight = 2;
2441 sampler_cmd->dw2.global_noise_estimation = 255;
2443 sampler_cmd->dw3.enable_8tap_adaptive_filter = 0;
2444 sampler_cmd->dw3.cos_alpha = 79;
2445 sampler_cmd->dw3.sin_alpha = 101;
2447 sampler_cmd->dw5.diamond_du = 0;
2448 sampler_cmd->dw5.hs_margin = 3;
2449 sampler_cmd->dw5.diamond_alpha = 100;
2451 sampler_cmd->dw7.inv_margin_vyl = 3300;
2453 sampler_cmd->dw8.inv_margin_vyu = 1600;
2455 sampler_cmd->dw10.y_slope2 = 24;
2456 sampler_cmd->dw10.s0l = 1792;
2458 sampler_cmd->dw12.y_slope1 = 24;
2460 sampler_cmd->dw14.s0u = 256;
2462 sampler_cmd->dw15.s2u = 1792;
2463 sampler_cmd->dw15.s1u = 0;
2465 memcpy(sampler_cmd->coefficients,
2466 &gen9_vp9_avs_coeffs[0],
2467 17 * sizeof(struct gen8_sampler_8x8_avs_coefficients));
2469 sampler_cmd->dw152.default_sharpness_level = 255;
2470 sampler_cmd->dw152.max_derivative_4_pixels = 7;
2471 sampler_cmd->dw152.max_derivative_8_pixels = 20;
2472 sampler_cmd->dw152.transition_area_with_4_pixels = 4;
2473 sampler_cmd->dw152.transition_area_with_8_pixels = 5;
2475 sampler_cmd->dw153.bypass_x_adaptive_filtering = 1;
2476 sampler_cmd->dw153.bypass_y_adaptive_filtering = 1;
2477 sampler_cmd->dw153.adaptive_filter_for_all_channel = 0;
2479 memcpy(sampler_cmd->extra_coefficients,
2480 &gen9_vp9_avs_coeffs[17 * 8],
2481 15 * sizeof(struct gen8_sampler_8x8_avs_coefficients));
2483 dri_bo_unmap(gpe_context->sampler.bo);
2487 gen9_vp9_set_curbe_dys(VADriverContextP ctx,
2488 struct encode_state *encode_state,
2489 struct i965_gpe_context *gpe_context,
2490 struct intel_encoder_context *encoder_context,
2491 struct gen9_vp9_dys_curbe_param *curbe_param)
2493 vp9_dys_curbe_data *curbe_cmd;
2495 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2500 memset(curbe_cmd, 0, sizeof(vp9_dys_curbe_data));
2502 curbe_cmd->dw0.input_frame_width = curbe_param->input_width;
2503 curbe_cmd->dw0.input_frame_height = curbe_param->input_height;
2505 curbe_cmd->dw1.output_frame_width = curbe_param->output_width;
2506 curbe_cmd->dw1.output_frame_height = curbe_param->output_height;
2508 curbe_cmd->dw2.delta_u = 1.0f / curbe_param->output_width;
2509 curbe_cmd->dw3.delta_v = 1.0f / curbe_param->output_height;
2511 curbe_cmd->dw16.input_frame_nv12_bti = VP9_BTI_DYS_INPUT_NV12;
2512 curbe_cmd->dw17.output_frame_y_bti = VP9_BTI_DYS_OUTPUT_Y;
2513 curbe_cmd->dw18.avs_sample_idx = 0;
2515 i965_gpe_context_unmap_curbe(gpe_context);
2519 gen9_vp9_send_dys_surface(VADriverContextP ctx,
2520 struct encode_state *encode_state,
2521 struct i965_gpe_context *gpe_context,
2522 struct intel_encoder_context *encoder_context,
2523 struct gen9_vp9_dys_surface_param *surface_param)
2526 if (surface_param->input_frame)
2527 i965_add_adv_gpe_surface(ctx,
2529 surface_param->input_frame,
2530 VP9_BTI_DYS_INPUT_NV12);
2532 if (surface_param->output_frame) {
2533 i965_add_2d_gpe_surface(ctx,
2535 surface_param->output_frame,
2538 I965_SURFACEFORMAT_R8_UNORM,
2539 VP9_BTI_DYS_OUTPUT_Y);
2541 i965_add_2d_gpe_surface(ctx,
2543 surface_param->output_frame,
2546 I965_SURFACEFORMAT_R16_UINT,
2547 VP9_BTI_DYS_OUTPUT_UV);
2554 gen9_vp9_dys_kernel(VADriverContextP ctx,
2555 struct encode_state *encode_state,
2556 struct intel_encoder_context *encoder_context,
2557 gen9_vp9_dys_kernel_param *dys_kernel_param)
2559 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2560 struct i965_gpe_context *gpe_context;
2562 struct gen9_vp9_dys_curbe_param curbe_param;
2563 struct gen9_vp9_dys_surface_param surface_param;
2564 struct gpe_media_object_walker_parameter media_object_walker_param;
2565 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
2566 unsigned int resolution_x, resolution_y;
2568 media_function = VP9_MEDIA_STATE_DYS;
2569 gpe_context = &vme_context->dys_context.gpe_context;
2571 //gen8_gpe_context_init(ctx, gpe_context);
2572 gen9_gpe_reset_binding_table(ctx, gpe_context);
2574 /* sampler state is configured only when initializing the GPE context */
2576 memset(&curbe_param, 0, sizeof(curbe_param));
2577 curbe_param.input_width = dys_kernel_param->input_width;
2578 curbe_param.input_height = dys_kernel_param->input_height;
2579 curbe_param.output_width = dys_kernel_param->output_width;
2580 curbe_param.output_height = dys_kernel_param->output_height;
2581 vme_context->pfn_set_curbe_dys(ctx, encode_state,
2586 // Add surface states
2587 memset(&surface_param, 0, sizeof(surface_param));
2588 surface_param.input_frame = dys_kernel_param->input_surface;
2589 surface_param.output_frame = dys_kernel_param->output_surface;
2590 surface_param.vert_line_stride = 0;
2591 surface_param.vert_line_stride_offset = 0;
2593 vme_context->pfn_send_dys_surface(ctx,
2599 resolution_x = ALIGN(dys_kernel_param->output_width, 16) / 16;
2600 resolution_y = ALIGN(dys_kernel_param->output_height, 16) / 16;
2602 gen8_gpe_setup_interface_data(ctx, gpe_context);
2604 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2605 kernel_walker_param.resolution_x = resolution_x;
2606 kernel_walker_param.resolution_y = resolution_y;
2607 kernel_walker_param.no_dependency = 1;
2609 gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2611 gen9_run_kernel_media_object_walker(ctx, encoder_context,
2614 &media_object_walker_param);
2616 return VA_STATUS_SUCCESS;
2620 gen9_vp9_run_dys_refframes(VADriverContextP ctx,
2621 struct encode_state *encode_state,
2622 struct intel_encoder_context *encoder_context)
2624 struct gen9_vp9_state *vp9_state;
2625 VAEncPictureParameterBufferVP9 *pic_param;
2626 gen9_vp9_dys_kernel_param dys_kernel_param;
2627 struct object_surface *obj_surface;
2628 struct object_surface *input_surface, *output_surface;
2629 struct gen9_surface_vp9 *vp9_priv_surface;
2631 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2633 if (!vp9_state || !vp9_state->pic_param)
2634 return VA_STATUS_ERROR_INVALID_PARAMETER;
2636 pic_param = vp9_state->pic_param;
2638 if ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
2639 (pic_param->frame_height_src != pic_param->frame_height_dst)) {
2640 input_surface = encode_state->input_yuv_object;
2641 obj_surface = encode_state->reconstructed_object;
2642 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2643 output_surface = vp9_priv_surface->dys_surface_obj;
2645 memset(&dys_kernel_param, 0, sizeof(dys_kernel_param));
2646 dys_kernel_param.input_width = pic_param->frame_width_src;
2647 dys_kernel_param.input_height = pic_param->frame_height_src;
2648 dys_kernel_param.input_surface = input_surface;
2649 dys_kernel_param.output_width = pic_param->frame_width_dst;
2650 dys_kernel_param.output_height = pic_param->frame_height_dst;
2651 dys_kernel_param.output_surface = output_surface;
2652 gen9_vp9_dys_kernel(ctx, encode_state,
2657 if ((vp9_state->dys_ref_frame_flag & VP9_LAST_REF) &&
2658 vp9_state->last_ref_obj) {
2659 obj_surface = vp9_state->last_ref_obj;
2660 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2662 input_surface = obj_surface;
2663 output_surface = vp9_priv_surface->dys_surface_obj;
2665 dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2666 dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2667 dys_kernel_param.input_surface = input_surface;
2669 dys_kernel_param.output_width = pic_param->frame_width_dst;
2670 dys_kernel_param.output_height = pic_param->frame_height_dst;
2671 dys_kernel_param.output_surface = output_surface;
2673 gen9_vp9_dys_kernel(ctx, encode_state,
2677 if (vp9_state->hme_enabled) {
2678 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2679 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2680 dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2682 dys_kernel_param.output_width = vp9_state->frame_width_4x;
2683 dys_kernel_param.output_height = vp9_state->frame_height_4x;
2684 dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2686 gen9_vp9_dys_kernel(ctx, encode_state,
2690 /* Does it really need to do the 16x HME if the
2691 * resolution is different?
2692 * Maybe it should be restricted
2694 if (vp9_state->b16xme_enabled) {
2695 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2696 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2697 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2699 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2700 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2701 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2703 gen9_vp9_dys_kernel(ctx, encode_state,
2710 if ((vp9_state->dys_ref_frame_flag & VP9_GOLDEN_REF) &&
2711 vp9_state->golden_ref_obj) {
2712 obj_surface = vp9_state->golden_ref_obj;
2713 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2715 input_surface = obj_surface;
2716 output_surface = vp9_priv_surface->dys_surface_obj;
2718 dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2719 dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2720 dys_kernel_param.input_surface = input_surface;
2722 dys_kernel_param.output_width = pic_param->frame_width_dst;
2723 dys_kernel_param.output_height = pic_param->frame_height_dst;
2724 dys_kernel_param.output_surface = output_surface;
2726 gen9_vp9_dys_kernel(ctx, encode_state,
2730 if (vp9_state->hme_enabled) {
2731 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2732 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2733 dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2735 dys_kernel_param.output_width = vp9_state->frame_width_4x;
2736 dys_kernel_param.output_height = vp9_state->frame_height_4x;
2737 dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2739 gen9_vp9_dys_kernel(ctx, encode_state,
2743 /* Does it really need to do the 16x HME if the
2744 * resolution is different?
2745 * Maybe it should be restricted
2747 if (vp9_state->b16xme_enabled) {
2748 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2749 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2750 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2752 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2753 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2754 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2756 gen9_vp9_dys_kernel(ctx, encode_state,
2763 if ((vp9_state->dys_ref_frame_flag & VP9_ALT_REF) &&
2764 vp9_state->alt_ref_obj) {
2765 obj_surface = vp9_state->alt_ref_obj;
2766 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2768 input_surface = obj_surface;
2769 output_surface = vp9_priv_surface->dys_surface_obj;
2771 dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2772 dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2773 dys_kernel_param.input_surface = input_surface;
2775 dys_kernel_param.output_width = pic_param->frame_width_dst;
2776 dys_kernel_param.output_height = pic_param->frame_height_dst;
2777 dys_kernel_param.output_surface = output_surface;
2779 gen9_vp9_dys_kernel(ctx, encode_state,
2783 if (vp9_state->hme_enabled) {
2784 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2785 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2786 dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2788 dys_kernel_param.output_width = vp9_state->frame_width_4x;
2789 dys_kernel_param.output_height = vp9_state->frame_height_4x;
2790 dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2792 gen9_vp9_dys_kernel(ctx, encode_state,
2796 /* Does it really need to do the 16x HME if the
2797 * resolution is different?
2798 * Maybe it should be restricted
2800 if (vp9_state->b16xme_enabled) {
2801 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2802 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2803 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2805 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2806 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2807 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2809 gen9_vp9_dys_kernel(ctx, encode_state,
2816 return VA_STATUS_SUCCESS;
2820 gen9_vp9_set_curbe_mbenc(VADriverContextP ctx,
2821 struct encode_state *encode_state,
2822 struct i965_gpe_context *gpe_context,
2823 struct intel_encoder_context *encoder_context,
2824 struct gen9_vp9_mbenc_curbe_param *curbe_param)
2826 struct gen9_vp9_state *vp9_state;
2827 VAEncMiscParameterTypeVP9PerSegmantParam *seg_param, tmp_seg_param;
2828 vp9_mbenc_curbe_data *curbe_cmd;
2829 VAEncPictureParameterBufferVP9 *pic_param;
2830 int i, segment_count;
2832 struct object_surface *obj_surface;
2833 struct gen9_surface_vp9 *vp9_priv_surface;
2835 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2837 if (!vp9_state || !vp9_state->pic_param)
2840 pic_param = curbe_param->ppic_param;
2841 seg_param = curbe_param->psegment_param;
2844 memset(&tmp_seg_param, 0, sizeof(tmp_seg_param));
2845 seg_param = &tmp_seg_param;
2848 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2853 memset(curbe_cmd, 0, sizeof(vp9_mbenc_curbe_data));
2855 if (vp9_state->dys_in_use) {
2856 curbe_cmd->dw0.frame_width = pic_param->frame_width_dst;
2857 curbe_cmd->dw0.frame_height = pic_param->frame_height_dst;
2859 curbe_cmd->dw0.frame_width = pic_param->frame_width_src;
2860 curbe_cmd->dw0.frame_height = pic_param->frame_height_src;
2863 curbe_cmd->dw1.frame_type = curbe_param->picture_coding_type;
2865 curbe_cmd->dw1.segmentation_enable = pic_param->pic_flags.bits.segmentation_enabled;
2866 if (pic_param->pic_flags.bits.segmentation_enabled)
2871 curbe_cmd->dw1.ref_frame_flags = curbe_param->ref_frame_flag;
2873 //right now set them to normal settings
2874 if (curbe_param->picture_coding_type) {
2875 switch (vp9_state->target_usage) {
2876 case INTEL_ENC_VP9_TU_QUALITY:
2877 curbe_cmd->dw1.min_16for32_check = 0x00;
2878 curbe_cmd->dw2.multi_pred = 0x02;
2879 curbe_cmd->dw2.len_sp = 0x39;
2880 curbe_cmd->dw2.search_x = 0x30;
2881 curbe_cmd->dw2.search_y = 0x28;
2882 curbe_cmd->dw3.min_ref_for32_check = 0x01;
2883 curbe_cmd->dw4.skip16_threshold = 0x000A;
2884 curbe_cmd->dw4.disable_mr_threshold = 0x000C;
2886 memcpy(&curbe_cmd->dw16,
2887 vp9_diamond_ime_search_path_delta,
2888 14 * sizeof(unsigned int));
2890 case INTEL_ENC_VP9_TU_PERFORMANCE:
2891 curbe_cmd->dw1.min_16for32_check = 0x02;
2892 curbe_cmd->dw2.multi_pred = 0x00;
2893 curbe_cmd->dw2.len_sp = 0x10;
2894 curbe_cmd->dw2.search_x = 0x20;
2895 curbe_cmd->dw2.search_y = 0x20;
2896 curbe_cmd->dw3.min_ref_for32_check = 0x03;
2897 curbe_cmd->dw4.skip16_threshold = 0x0014;
2898 curbe_cmd->dw4.disable_mr_threshold = 0x0016;
2900 memcpy(&curbe_cmd->dw16,
2901 vp9_fullspiral_ime_search_path_delta,
2902 14 * sizeof(unsigned int));
2905 default: // normal settings
2906 curbe_cmd->dw1.min_16for32_check = 0x01;
2907 curbe_cmd->dw2.multi_pred = 0x00;
2908 curbe_cmd->dw2.len_sp = 0x19;
2909 curbe_cmd->dw2.search_x = 0x30;
2910 curbe_cmd->dw2.search_y = 0x28;
2911 curbe_cmd->dw3.min_ref_for32_check = 0x02;
2912 curbe_cmd->dw4.skip16_threshold = 0x000F;
2913 curbe_cmd->dw4.disable_mr_threshold = 0x0011;
2915 memcpy(&curbe_cmd->dw16,
2916 vp9_diamond_ime_search_path_delta,
2917 14 * sizeof(unsigned int));
2921 curbe_cmd->dw3.hme_enabled = curbe_param->hme_enabled;
2922 curbe_cmd->dw3.multi_ref_qp_check = curbe_param->multi_ref_qp_check;
2923 // co-located predictor must be disabled when dynamic scaling is enabled
2924 curbe_cmd->dw3.disable_temp_pred = vp9_state->dys_in_use;
2927 curbe_cmd->dw5.inter_round = 0;
2928 curbe_cmd->dw5.intra_round = 4;
2929 curbe_cmd->dw5.frame_qpindex = pic_param->luma_ac_qindex;
2931 for (i = 0; i < segment_count; i++) {
2932 seg_qindex = pic_param->luma_ac_qindex + pic_param->luma_dc_qindex_delta
2933 + seg_param->seg_data[i].segment_qindex_delta;
2935 seg_qindex = CLAMP(0, 255, seg_qindex);
2937 if (curbe_param->picture_coding_type)
2938 memcpy(&curbe_cmd->segments[i],
2939 &intel_vp9_costlut_p[seg_qindex * 16],
2940 16 * sizeof(unsigned int));
2942 memcpy(&curbe_cmd->segments[i],
2943 &intel_vp9_costlut_key[seg_qindex * 16],
2944 16 * sizeof(unsigned int));
2947 if (curbe_param->picture_coding_type) {
2948 if (curbe_cmd->dw3.multi_ref_qp_check) {
2949 if (curbe_param->ref_frame_flag & 0x01) {
2950 obj_surface = curbe_param->last_ref_obj;
2951 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2952 curbe_cmd->dw8.last_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
2955 if (curbe_param->ref_frame_flag & 0x02) {
2956 obj_surface = curbe_param->golden_ref_obj;
2957 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2958 curbe_cmd->dw8.golden_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
2961 if (curbe_param->ref_frame_flag & 0x04) {
2962 obj_surface = curbe_param->alt_ref_obj;
2963 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2964 curbe_cmd->dw9.alt_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
2968 curbe_cmd->dw160.enc_curr_y_surf_bti = VP9_BTI_MBENC_CURR_Y_G9;
2969 curbe_cmd->dw162.enc_curr_nv12_surf_bti = VP9_BTI_MBENC_CURR_NV12_G9;
2970 curbe_cmd->dw166.segmentation_map_bti = VP9_BTI_MBENC_SEGMENTATION_MAP_G9;
2971 curbe_cmd->dw172.mode_decision_bti = VP9_BTI_MBENC_MODE_DECISION_G9;
2972 curbe_cmd->dw167.tx_curbe_bti = VP9_BTI_MBENC_TX_CURBE_G9;
2973 curbe_cmd->dw168.hme_mvdata_bti = VP9_BTI_MBENC_HME_MV_DATA_G9;
2974 curbe_cmd->dw169.hme_distortion_bti = VP9_BTI_MBENC_HME_DISTORTION_G9;
2975 curbe_cmd->dw171.mode_decision_prev_bti = VP9_BTI_MBENC_MODE_DECISION_PREV_G9;
2976 curbe_cmd->dw172.mode_decision_bti = VP9_BTI_MBENC_MODE_DECISION_G9;
2977 curbe_cmd->dw173.output_16x16_inter_modes_bti = VP9_BTI_MBENC_OUT_16x16_INTER_MODES_G9;
2978 curbe_cmd->dw174.cu_record_bti = VP9_BTI_MBENC_CU_RECORDS_G9;
2979 curbe_cmd->dw175.pak_data_bti = VP9_BTI_MBENC_PAK_DATA_G9;
2981 i965_gpe_context_unmap_curbe(gpe_context);
2986 gen9_vp9_send_mbenc_surface(VADriverContextP ctx,
2987 struct encode_state *encode_state,
2988 struct i965_gpe_context *gpe_context,
2989 struct intel_encoder_context *encoder_context,
2990 struct gen9_vp9_mbenc_surface_param *mbenc_param)
2992 struct gen9_vp9_state *vp9_state;
2993 unsigned int res_size;
2994 unsigned int frame_width_in_sb, frame_height_in_sb;
2995 struct object_surface *obj_surface, *tmp_input;
2996 struct gen9_surface_vp9 *vp9_priv_surface;
2999 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3001 if (!vp9_state || !vp9_state->pic_param)
3004 frame_width_in_sb = ALIGN(mbenc_param->frame_width, 64) / 64;
3005 frame_height_in_sb = ALIGN(mbenc_param->frame_height, 64) / 64;
3006 media_function = mbenc_param->media_state_type;
3008 switch (media_function) {
3009 case VP9_MEDIA_STATE_MBENC_I_32x32: {
3010 obj_surface = mbenc_param->curr_frame_obj;
3012 i965_add_2d_gpe_surface(ctx,
3017 I965_SURFACEFORMAT_R8_UNORM,
3018 VP9_BTI_MBENC_CURR_Y_G9);
3020 i965_add_2d_gpe_surface(ctx,
3025 I965_SURFACEFORMAT_R16_UINT,
3026 VP9_BTI_MBENC_CURR_UV_G9);
3029 if (mbenc_param->segmentation_enabled) {
3030 i965_add_buffer_2d_gpe_surface(ctx,
3032 mbenc_param->pres_segmentation_map,
3034 I965_SURFACEFORMAT_R8_UNORM,
3035 VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3039 res_size = 16 * mbenc_param->frame_width_in_mb *
3040 mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3041 i965_add_buffer_gpe_surface(ctx,
3043 mbenc_param->pres_mode_decision,
3047 VP9_BTI_MBENC_MODE_DECISION_G9);
3051 case VP9_MEDIA_STATE_MBENC_I_16x16: {
3052 obj_surface = mbenc_param->curr_frame_obj;
3054 i965_add_2d_gpe_surface(ctx,
3059 I965_SURFACEFORMAT_R8_UNORM,
3060 VP9_BTI_MBENC_CURR_Y_G9);
3062 i965_add_2d_gpe_surface(ctx,
3067 I965_SURFACEFORMAT_R16_UINT,
3068 VP9_BTI_MBENC_CURR_UV_G9);
3070 i965_add_adv_gpe_surface(ctx, gpe_context,
3072 VP9_BTI_MBENC_CURR_NV12_G9);
3074 if (mbenc_param->segmentation_enabled) {
3075 i965_add_buffer_2d_gpe_surface(ctx,
3077 mbenc_param->pres_segmentation_map,
3079 I965_SURFACEFORMAT_R8_UNORM,
3080 VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3084 res_size = 16 * mbenc_param->frame_width_in_mb *
3085 mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3086 i965_add_buffer_gpe_surface(ctx,
3088 mbenc_param->pres_mode_decision,
3092 VP9_BTI_MBENC_MODE_DECISION_G9);
3096 gen9_add_dri_buffer_gpe_surface(ctx,
3098 mbenc_param->gpe_context_tx->curbe.bo,
3100 ALIGN(res_size, 64),
3101 mbenc_param->gpe_context_tx->curbe.offset,
3102 VP9_BTI_MBENC_TX_CURBE_G9);
3106 case VP9_MEDIA_STATE_MBENC_P: {
3107 obj_surface = mbenc_param->curr_frame_obj;
3109 i965_add_2d_gpe_surface(ctx,
3114 I965_SURFACEFORMAT_R8_UNORM,
3115 VP9_BTI_MBENC_CURR_Y_G9);
3117 i965_add_2d_gpe_surface(ctx, gpe_context,
3121 I965_SURFACEFORMAT_R16_UINT,
3122 VP9_BTI_MBENC_CURR_UV_G9);
3124 i965_add_adv_gpe_surface(ctx, gpe_context,
3126 VP9_BTI_MBENC_CURR_NV12_G9);
3128 if (mbenc_param->last_ref_obj) {
3129 obj_surface = mbenc_param->last_ref_obj;
3130 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3132 if (vp9_state->dys_in_use &&
3133 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3134 (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3135 tmp_input = vp9_priv_surface->dys_surface_obj;
3137 tmp_input = obj_surface;
3139 i965_add_adv_gpe_surface(ctx, gpe_context,
3141 VP9_BTI_MBENC_LAST_NV12_G9);
3143 i965_add_adv_gpe_surface(ctx, gpe_context,
3145 VP9_BTI_MBENC_LAST_NV12_G9 + 1);
3149 if (mbenc_param->golden_ref_obj) {
3150 obj_surface = mbenc_param->golden_ref_obj;
3151 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3153 if (vp9_state->dys_in_use &&
3154 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3155 (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3156 tmp_input = vp9_priv_surface->dys_surface_obj;
3158 tmp_input = obj_surface;
3160 i965_add_adv_gpe_surface(ctx, gpe_context,
3162 VP9_BTI_MBENC_GOLD_NV12_G9);
3164 i965_add_adv_gpe_surface(ctx, gpe_context,
3166 VP9_BTI_MBENC_GOLD_NV12_G9 + 1);
3170 if (mbenc_param->alt_ref_obj) {
3171 obj_surface = mbenc_param->alt_ref_obj;
3172 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3174 if (vp9_state->dys_in_use &&
3175 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3176 (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3177 tmp_input = vp9_priv_surface->dys_surface_obj;
3179 tmp_input = obj_surface;
3181 i965_add_adv_gpe_surface(ctx, gpe_context,
3183 VP9_BTI_MBENC_ALTREF_NV12_G9);
3185 i965_add_adv_gpe_surface(ctx, gpe_context,
3187 VP9_BTI_MBENC_ALTREF_NV12_G9 + 1);
3191 if (mbenc_param->hme_enabled) {
3192 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
3193 mbenc_param->ps4x_memv_data_buffer,
3195 I965_SURFACEFORMAT_R8_UNORM,
3196 VP9_BTI_MBENC_HME_MV_DATA_G9);
3198 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
3199 mbenc_param->ps4x_memv_distortion_buffer,
3201 I965_SURFACEFORMAT_R8_UNORM,
3202 VP9_BTI_MBENC_HME_DISTORTION_G9);
3205 if (mbenc_param->segmentation_enabled) {
3206 i965_add_buffer_2d_gpe_surface(ctx,
3208 mbenc_param->pres_segmentation_map,
3210 I965_SURFACEFORMAT_R8_UNORM,
3211 VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3215 res_size = 16 * mbenc_param->frame_width_in_mb *
3216 mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3217 i965_add_buffer_gpe_surface(ctx,
3219 mbenc_param->pres_mode_decision_prev,
3223 VP9_BTI_MBENC_MODE_DECISION_PREV_G9);
3225 i965_add_buffer_gpe_surface(ctx,
3227 mbenc_param->pres_mode_decision,
3231 VP9_BTI_MBENC_MODE_DECISION_G9);
3233 i965_add_buffer_2d_gpe_surface(ctx,
3235 mbenc_param->pres_output_16x16_inter_modes,
3237 I965_SURFACEFORMAT_R8_UNORM,
3238 VP9_BTI_MBENC_OUT_16x16_INTER_MODES_G9);
3242 gen9_add_dri_buffer_gpe_surface(ctx,
3244 mbenc_param->gpe_context_tx->curbe.bo,
3246 ALIGN(res_size, 64),
3247 mbenc_param->gpe_context_tx->curbe.offset,
3248 VP9_BTI_MBENC_TX_CURBE_G9);
3253 case VP9_MEDIA_STATE_MBENC_TX: {
3254 obj_surface = mbenc_param->curr_frame_obj;
3256 i965_add_2d_gpe_surface(ctx,
3261 I965_SURFACEFORMAT_R8_UNORM,
3262 VP9_BTI_MBENC_CURR_Y_G9);
3264 i965_add_2d_gpe_surface(ctx,
3269 I965_SURFACEFORMAT_R16_UINT,
3270 VP9_BTI_MBENC_CURR_UV_G9);
3272 if (mbenc_param->segmentation_enabled) {
3273 i965_add_buffer_2d_gpe_surface(ctx,
3275 mbenc_param->pres_segmentation_map,
3277 I965_SURFACEFORMAT_R8_UNORM,
3278 VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3282 res_size = 16 * mbenc_param->frame_width_in_mb *
3283 mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3284 i965_add_buffer_gpe_surface(ctx,
3286 mbenc_param->pres_mode_decision,
3290 VP9_BTI_MBENC_MODE_DECISION_G9);
3292 res_size = frame_width_in_sb * frame_height_in_sb * 4 * sizeof(unsigned int);
3293 i965_add_buffer_gpe_surface(ctx,
3295 mbenc_param->pres_mb_code_surface,
3299 VP9_BTI_MBENC_PAK_DATA_G9);
3302 res_size = frame_width_in_sb * frame_height_in_sb *
3303 64 * 16 * sizeof(unsigned int);
3305 i965_add_buffer_gpe_surface(ctx,
3307 mbenc_param->pres_mb_code_surface,
3310 mbenc_param->mb_data_offset,
3311 VP9_BTI_MBENC_CU_RECORDS_G9);
3321 gen9_vp9_mbenc_kernel(VADriverContextP ctx,
3322 struct encode_state *encode_state,
3323 struct intel_encoder_context *encoder_context,
3326 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
3327 struct i965_gpe_context *gpe_context, *tx_gpe_context;
3328 struct gpe_media_object_walker_parameter media_object_walker_param;
3329 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
3330 unsigned int resolution_x, resolution_y;
3331 struct gen9_vp9_state *vp9_state;
3332 VAEncPictureParameterBufferVP9 *pic_param;
3333 struct gen9_vp9_mbenc_curbe_param curbe_param;
3334 struct gen9_vp9_mbenc_surface_param surface_param;
3335 VAStatus va_status = VA_STATUS_SUCCESS;
3336 int mbenc_gpe_index = 0;
3337 struct object_surface *obj_surface;
3338 struct gen9_surface_vp9 *vp9_priv_surface;
3340 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3342 if (!vp9_state || !vp9_state->pic_param)
3343 return VA_STATUS_ERROR_ENCODING_ERROR;
3345 pic_param = vp9_state->pic_param;
3347 switch (media_function) {
3348 case VP9_MEDIA_STATE_MBENC_I_32x32:
3349 mbenc_gpe_index = VP9_MBENC_IDX_KEY_32x32;
3352 case VP9_MEDIA_STATE_MBENC_I_16x16:
3353 mbenc_gpe_index = VP9_MBENC_IDX_KEY_16x16;
3356 case VP9_MEDIA_STATE_MBENC_P:
3357 mbenc_gpe_index = VP9_MBENC_IDX_INTER;
3360 case VP9_MEDIA_STATE_MBENC_TX:
3361 mbenc_gpe_index = VP9_MBENC_IDX_TX;
3365 va_status = VA_STATUS_ERROR_OPERATION_FAILED;
3369 gpe_context = &(vme_context->mbenc_context.gpe_contexts[mbenc_gpe_index]);
3370 tx_gpe_context = &(vme_context->mbenc_context.gpe_contexts[VP9_MBENC_IDX_TX]);
3372 gen9_gpe_reset_binding_table(ctx, gpe_context);
3375 if (!vp9_state->mbenc_curbe_set_in_brc_update) {
3376 if (media_function == VP9_MEDIA_STATE_MBENC_I_32x32 ||
3377 media_function == VP9_MEDIA_STATE_MBENC_P) {
3378 memset(&curbe_param, 0, sizeof(curbe_param));
3379 curbe_param.ppic_param = vp9_state->pic_param;
3380 curbe_param.pseq_param = vp9_state->seq_param;
3381 curbe_param.psegment_param = vp9_state->segment_param;
3382 curbe_param.frame_width_in_mb = vp9_state->frame_width_in_mb;
3383 curbe_param.frame_height_in_mb = vp9_state->frame_height_in_mb;
3384 curbe_param.last_ref_obj = vp9_state->last_ref_obj;
3385 curbe_param.golden_ref_obj = vp9_state->golden_ref_obj;
3386 curbe_param.alt_ref_obj = vp9_state->alt_ref_obj;
3387 curbe_param.hme_enabled = vp9_state->hme_enabled;
3388 curbe_param.ref_frame_flag = vp9_state->ref_frame_flag;
3389 curbe_param.picture_coding_type = vp9_state->picture_coding_type;
3390 curbe_param.media_state_type = media_function;
3391 curbe_param.mbenc_curbe_set_in_brc_update = vp9_state->mbenc_curbe_set_in_brc_update;
3393 vme_context->pfn_set_curbe_mbenc(ctx,
3401 memset(&surface_param, 0, sizeof(surface_param));
3402 surface_param.media_state_type = media_function;
3403 surface_param.picture_coding_type = vp9_state->picture_coding_type;
3404 surface_param.frame_width = vp9_state->frame_width;
3405 surface_param.frame_height = vp9_state->frame_height;
3406 surface_param.frame_width_in_mb = vp9_state->frame_width_in_mb;
3407 surface_param.frame_height_in_mb = vp9_state->frame_height_in_mb;
3408 surface_param.hme_enabled = vp9_state->hme_enabled;
3409 surface_param.segmentation_enabled = pic_param->pic_flags.bits.segmentation_enabled;
3410 surface_param.pres_segmentation_map = &vme_context->mb_segment_map_surface;
3411 surface_param.ps4x_memv_data_buffer = &vme_context->s4x_memv_data_buffer;
3412 surface_param.ps4x_memv_distortion_buffer = &vme_context->s4x_memv_distortion_buffer;
3413 surface_param.pres_mode_decision =
3414 &vme_context->res_mode_decision[vp9_state->curr_mode_decision_index];
3415 surface_param.pres_mode_decision_prev =
3416 &vme_context->res_mode_decision[!vp9_state->curr_mode_decision_index];
3417 surface_param.pres_output_16x16_inter_modes = &vme_context->res_output_16x16_inter_modes;
3418 surface_param.pres_mbenc_curbe_buffer = NULL;
3419 surface_param.last_ref_obj = vp9_state->last_ref_obj;
3420 surface_param.golden_ref_obj = vp9_state->golden_ref_obj;
3421 surface_param.alt_ref_obj = vp9_state->alt_ref_obj;
3422 surface_param.pres_mb_code_surface = &vme_context->res_mb_code_surface;
3423 surface_param.gpe_context_tx = tx_gpe_context;
3424 surface_param.mb_data_offset = vp9_state->mb_data_offset;
3426 obj_surface = encode_state->reconstructed_object;
3427 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3428 if (vp9_state->dys_in_use &&
3429 (pic_param->frame_width_src != pic_param->frame_height_dst ||
3430 pic_param->frame_height_src != pic_param->frame_height_dst)) {
3431 obj_surface = vp9_priv_surface->dys_surface_obj;
3433 obj_surface = encode_state->input_yuv_object;
3435 surface_param.curr_frame_obj = obj_surface;
3437 vme_context->pfn_send_mbenc_surface(ctx,
3443 if (media_function == VP9_MEDIA_STATE_MBENC_I_32x32) {
3444 resolution_x = ALIGN(vp9_state->frame_width, 32) / 32;
3445 resolution_y = ALIGN(vp9_state->frame_height, 32) / 32;
3447 resolution_x = ALIGN(vp9_state->frame_width, 16) / 16;
3448 resolution_y = ALIGN(vp9_state->frame_height, 16) / 16;
3451 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
3452 kernel_walker_param.resolution_x = resolution_x;
3453 kernel_walker_param.resolution_y = resolution_y;
3455 if (media_function == VP9_MEDIA_STATE_MBENC_P ||
3456 media_function == VP9_MEDIA_STATE_MBENC_I_16x16) {
3457 kernel_walker_param.use_scoreboard = 1;
3458 kernel_walker_param.no_dependency = 0;
3459 kernel_walker_param.walker_degree = VP9_45Z_DEGREE;
3461 kernel_walker_param.use_scoreboard = 0;
3462 kernel_walker_param.no_dependency = 1;
3465 gen8_gpe_setup_interface_data(ctx, gpe_context);
3467 gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
3469 gen9_run_kernel_media_object_walker(ctx, encoder_context,
3472 &media_object_walker_param);
3477 gen9_init_gpe_context_vp9(VADriverContextP ctx,
3478 struct i965_gpe_context *gpe_context,
3479 struct vp9_encoder_kernel_parameter *kernel_param)
3481 struct i965_driver_data *i965 = i965_driver_data(ctx);
3483 gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
3485 gpe_context->sampler.entry_size = 0;
3486 gpe_context->sampler.max_entries = 0;
3488 if (kernel_param->sampler_size) {
3489 gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
3490 gpe_context->sampler.max_entries = 1;
3493 gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
3494 gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
3496 gpe_context->surface_state_binding_table.max_entries = MAX_VP9_ENCODER_SURFACES;
3497 gpe_context->surface_state_binding_table.binding_table_offset = 0;
3498 gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64);
3499 gpe_context->surface_state_binding_table.length = ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_VP9_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
3501 if (i965->intel.eu_total > 0)
3502 gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
3504 gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
3506 gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
3507 gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
3508 gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
3509 gpe_context->vfe_state.curbe_allocation_size -
3510 ((gpe_context->idrt.entry_size >> 5) *
3511 gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
3512 gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
3513 gpe_context->vfe_state.gpgpu_mode = 0;
3517 gen9_init_vfe_scoreboard_vp9(struct i965_gpe_context *gpe_context,
3518 struct vp9_encoder_scoreboard_parameter *scoreboard_param)
3520 gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
3521 gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
3522 gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
3524 if (scoreboard_param->walkpat_flag) {
3525 gpe_context->vfe_desc5.scoreboard0.mask = 0x0F;
3526 gpe_context->vfe_desc5.scoreboard0.type = 1;
3528 gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0x0;
3529 gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0xF;
3531 gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
3532 gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xE;
3534 gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0xF;
3535 gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0x3;
3537 gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
3538 gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0x1;
3541 gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0xF;
3542 gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0x0;
3545 gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
3546 gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xF;
3549 gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0x1;
3550 gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0xF;
3553 gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
3554 gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0xF;
3557 gpe_context->vfe_desc7.scoreboard2.delta_x4 = 0xF;
3558 gpe_context->vfe_desc7.scoreboard2.delta_y4 = 0x1;
3561 gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0x0;
3562 gpe_context->vfe_desc7.scoreboard2.delta_y5 = 0xE;
3565 gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0x1;
3566 gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
3569 gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0xF;
3570 gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
3574 #define VP9_VME_REF_WIN 48
3577 gen9_encode_vp9_check_parameter(VADriverContextP ctx,
3578 struct encode_state *encode_state,
3579 struct intel_encoder_context *encoder_context)
3581 struct i965_driver_data *i965 = i965_driver_data(ctx);
3582 struct gen9_vp9_state *vp9_state;
3583 VAEncPictureParameterBufferVP9 *pic_param;
3584 VAEncMiscParameterTypeVP9PerSegmantParam *seg_param;
3585 VAEncSequenceParameterBufferVP9 *seq_param;
3586 struct object_surface *obj_surface;
3587 struct object_buffer *obj_buffer;
3588 struct gen9_surface_vp9 *vp9_priv_surface;
3590 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3592 if (!encode_state->pic_param_ext ||
3593 !encode_state->pic_param_ext->buffer) {
3594 return VA_STATUS_ERROR_INVALID_PARAMETER;
3596 pic_param = (VAEncPictureParameterBufferVP9 *)encode_state->pic_param_ext->buffer;
3598 obj_buffer = BUFFER(pic_param->coded_buf);
3601 !obj_buffer->buffer_store ||
3602 !obj_buffer->buffer_store->bo)
3603 return VA_STATUS_ERROR_INVALID_PARAMETER;
3605 encode_state->coded_buf_object = obj_buffer;
3607 vp9_state->status_buffer.bo = obj_buffer->buffer_store->bo;
3609 encode_state->reconstructed_object = SURFACE(pic_param->reconstructed_frame);
3611 if (!encode_state->reconstructed_object ||
3612 !encode_state->input_yuv_object)
3613 return VA_STATUS_ERROR_INVALID_PARAMETER;
3615 vp9_state->curr_frame = pic_param->reconstructed_frame;
3616 vp9_state->ref_frame_flag = 0;
3617 if (pic_param->pic_flags.bits.frame_type == KEY_FRAME ||
3618 pic_param->pic_flags.bits.intra_only) {
3619 /* this will be regarded as I-frame type */
3620 vp9_state->picture_coding_type = 0;
3621 vp9_state->last_ref_obj = NULL;
3622 vp9_state->golden_ref_obj = NULL;
3623 vp9_state->alt_ref_obj = NULL;
3625 vp9_state->picture_coding_type = 1;
3626 vp9_state->ref_frame_flag = pic_param->ref_flags.bits.ref_frame_ctrl_l0 |
3627 pic_param->ref_flags.bits.ref_frame_ctrl_l1;
3629 obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx]);
3630 vp9_state->last_ref_obj = obj_surface;
3633 !obj_surface->private_data) {
3634 vp9_state->last_ref_obj = NULL;
3635 vp9_state->ref_frame_flag &= ~(VP9_LAST_REF);
3638 obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx]);
3639 vp9_state->golden_ref_obj = obj_surface;
3642 !obj_surface->private_data) {
3643 vp9_state->golden_ref_obj = NULL;
3644 vp9_state->ref_frame_flag &= ~(VP9_GOLDEN_REF);
3647 obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]);
3648 vp9_state->alt_ref_obj = obj_surface;
3651 !obj_surface->private_data) {
3652 vp9_state->alt_ref_obj = NULL;
3653 vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3656 /* remove the duplicated flag and ref frame list */
3657 if (vp9_state->ref_frame_flag & VP9_LAST_REF) {
3658 if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx] ==
3659 pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx]) {
3660 vp9_state->ref_frame_flag &= ~(VP9_GOLDEN_REF);
3661 vp9_state->golden_ref_obj = NULL;
3664 if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx] ==
3665 pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]) {
3666 vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3667 vp9_state->alt_ref_obj = NULL;
3671 if (vp9_state->ref_frame_flag & VP9_GOLDEN_REF) {
3672 if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx] ==
3673 pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]) {
3674 vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3675 vp9_state->alt_ref_obj = NULL;
3679 if (vp9_state->ref_frame_flag == 0)
3680 return VA_STATUS_ERROR_INVALID_PARAMETER;
3684 if (pic_param->pic_flags.bits.segmentation_enabled) {
3685 if (!encode_state->q_matrix ||
3686 !encode_state->q_matrix->buffer) {
3687 return VA_STATUS_ERROR_INVALID_PARAMETER;
3689 seg_param = (VAEncMiscParameterTypeVP9PerSegmantParam *)
3690 encode_state->q_matrix->buffer;
3694 if (encode_state->seq_param_ext &&
3695 encode_state->seq_param_ext->buffer)
3696 seq_param = (VAEncSequenceParameterBufferVP9 *)encode_state->seq_param_ext->buffer;
3699 seq_param = &vp9_state->bogus_seq_param;
3702 vp9_state->pic_param = pic_param;
3703 vp9_state->segment_param = seg_param;
3704 vp9_state->seq_param = seq_param;
3706 obj_surface = encode_state->reconstructed_object;
3707 if (pic_param->frame_width_dst > obj_surface->orig_width ||
3708 pic_param->frame_height_dst > obj_surface->orig_height)
3709 return VA_STATUS_ERROR_INVALID_SURFACE;
3711 if (!vp9_state->dys_enabled &&
3712 ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
3713 (pic_param->frame_height_src != pic_param->frame_height_dst)))
3714 return VA_STATUS_ERROR_UNIMPLEMENTED;
3716 if (vp9_state->brc_enabled) {
3717 if (vp9_state->first_frame || vp9_state->picture_coding_type == KEY_FRAME) {
3718 vp9_state->brc_reset = encoder_context->brc.need_reset || vp9_state->first_frame;
3720 if (!encoder_context->brc.framerate[0].num || !encoder_context->brc.framerate[0].den ||
3721 !encoder_context->brc.bits_per_second[0])
3722 return VA_STATUS_ERROR_INVALID_PARAMETER;
3724 vp9_state->gop_size = encoder_context->brc.gop_size;
3725 vp9_state->framerate = encoder_context->brc.framerate[0];
3727 if (encoder_context->rate_control_mode == VA_RC_CBR ||
3728 !encoder_context->brc.target_percentage[0]) {
3729 vp9_state->target_bit_rate = encoder_context->brc.bits_per_second[0];
3730 vp9_state->max_bit_rate = vp9_state->target_bit_rate;
3731 vp9_state->min_bit_rate = vp9_state->target_bit_rate;
3733 vp9_state->max_bit_rate = encoder_context->brc.bits_per_second[0];
3734 vp9_state->target_bit_rate = vp9_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
3735 if (2 * vp9_state->target_bit_rate < vp9_state->max_bit_rate)
3736 vp9_state->min_bit_rate = 0;
3738 vp9_state->min_bit_rate = 2 * vp9_state->target_bit_rate - vp9_state->max_bit_rate;
3741 if (encoder_context->brc.hrd_buffer_size)
3742 vp9_state->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;
3743 else if (encoder_context->brc.window_size)
3744 vp9_state->vbv_buffer_size_in_bit = (uint64_t)vp9_state->max_bit_rate * encoder_context->brc.window_size / 1000;
3746 vp9_state->vbv_buffer_size_in_bit = vp9_state->max_bit_rate;
3747 if (encoder_context->brc.hrd_initial_buffer_fullness)
3748 vp9_state->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;
3750 vp9_state->init_vbv_buffer_fullness_in_bit = vp9_state->vbv_buffer_size_in_bit / 2;
3754 vp9_state->frame_width = pic_param->frame_width_dst;
3755 vp9_state->frame_height = pic_param->frame_height_dst;
3757 vp9_state->frame_width_4x = ALIGN(vp9_state->frame_width / 4, 16);
3758 vp9_state->frame_height_4x = ALIGN(vp9_state->frame_height / 4, 16);
3760 vp9_state->frame_width_16x = ALIGN(vp9_state->frame_width / 16, 16);
3761 vp9_state->frame_height_16x = ALIGN(vp9_state->frame_height / 16, 16);
3763 vp9_state->frame_width_in_mb = ALIGN(vp9_state->frame_width, 16) / 16;
3764 vp9_state->frame_height_in_mb = ALIGN(vp9_state->frame_height, 16) / 16;
3766 vp9_state->downscaled_width_4x_in_mb = vp9_state->frame_width_4x / 16;
3767 vp9_state->downscaled_height_4x_in_mb = vp9_state->frame_height_4x / 16;
3768 vp9_state->downscaled_width_16x_in_mb = vp9_state->frame_width_16x / 16;
3769 vp9_state->downscaled_height_16x_in_mb = vp9_state->frame_height_16x / 16;
3771 vp9_state->dys_in_use = 0;
3772 if (pic_param->frame_width_src != pic_param->frame_width_dst ||
3773 pic_param->frame_height_src != pic_param->frame_height_dst)
3774 vp9_state->dys_in_use = 1;
3775 vp9_state->dys_ref_frame_flag = 0;
3776 /* check the dys setting. The dys is supported by default. */
3777 if (pic_param->pic_flags.bits.frame_type != KEY_FRAME &&
3778 !pic_param->pic_flags.bits.intra_only) {
3779 vp9_state->dys_ref_frame_flag = vp9_state->ref_frame_flag;
3781 if ((vp9_state->ref_frame_flag & VP9_LAST_REF) &&
3782 vp9_state->last_ref_obj) {
3783 obj_surface = vp9_state->last_ref_obj;
3784 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3786 if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
3787 vp9_state->frame_height == vp9_priv_surface->frame_height)
3788 vp9_state->dys_ref_frame_flag &= ~(VP9_LAST_REF);
3790 if ((vp9_state->ref_frame_flag & VP9_GOLDEN_REF) &&
3791 vp9_state->golden_ref_obj) {
3792 obj_surface = vp9_state->golden_ref_obj;
3793 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3795 if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
3796 vp9_state->frame_height == vp9_priv_surface->frame_height)
3797 vp9_state->dys_ref_frame_flag &= ~(VP9_GOLDEN_REF);
3799 if ((vp9_state->ref_frame_flag & VP9_ALT_REF) &&
3800 vp9_state->alt_ref_obj) {
3801 obj_surface = vp9_state->alt_ref_obj;
3802 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3804 if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
3805 vp9_state->frame_height == vp9_priv_surface->frame_height)
3806 vp9_state->dys_ref_frame_flag &= ~(VP9_ALT_REF);
3808 if (vp9_state->dys_ref_frame_flag)
3809 vp9_state->dys_in_use = 1;
3812 if (vp9_state->hme_supported) {
3813 vp9_state->hme_enabled = 1;
3815 vp9_state->hme_enabled = 0;
3818 if (vp9_state->b16xme_supported) {
3819 vp9_state->b16xme_enabled = 1;
3821 vp9_state->b16xme_enabled = 0;
3824 /* disable HME/16xME if the size is too small */
3825 if (vp9_state->frame_width_4x <= VP9_VME_REF_WIN ||
3826 vp9_state->frame_height_4x <= VP9_VME_REF_WIN) {
3827 vp9_state->hme_enabled = 0;
3828 vp9_state->b16xme_enabled = 0;
3831 if (vp9_state->frame_width_16x < VP9_VME_REF_WIN ||
3832 vp9_state->frame_height_16x < VP9_VME_REF_WIN)
3833 vp9_state->b16xme_enabled = 0;
3835 if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME ||
3836 pic_param->pic_flags.bits.intra_only) {
3837 vp9_state->hme_enabled = 0;
3838 vp9_state->b16xme_enabled = 0;
3841 vp9_state->mbenc_keyframe_dist_enabled = 0;
3842 if ((vp9_state->picture_coding_type == KEY_FRAME) &&
3843 vp9_state->brc_distortion_buffer_supported)
3844 vp9_state->mbenc_keyframe_dist_enabled = 1;
3846 return VA_STATUS_SUCCESS;
3850 gen9_vme_gpe_kernel_prepare_vp9(VADriverContextP ctx,
3851 struct encode_state *encode_state,
3852 struct intel_encoder_context *encoder_context)
3854 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
3855 struct vp9_surface_param surface_param;
3856 struct gen9_vp9_state *vp9_state;
3857 VAEncPictureParameterBufferVP9 *pic_param;
3858 struct object_surface *obj_surface;
3859 struct gen9_surface_vp9 *vp9_surface;
3860 int driver_header_flag = 0;
3863 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3865 if (!vp9_state || !vp9_state->pic_param)
3866 return VA_STATUS_ERROR_INVALID_PARAMETER;
3868 pic_param = vp9_state->pic_param;
3870 /* this is to check whether the driver should generate the uncompressed header */
3871 driver_header_flag = 1;
3872 if (encode_state->packed_header_data_ext &&
3873 encode_state->packed_header_data_ext[0] &&
3874 pic_param->bit_offset_first_partition_size) {
3875 VAEncPackedHeaderParameterBuffer *param = NULL;
3877 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_params_ext[0]->buffer;
3879 if (param->type == VAEncPackedHeaderRawData) {
3881 unsigned int length_in_bits;
3883 header_data = (char *)encode_state->packed_header_data_ext[0]->buffer;
3884 length_in_bits = param->bit_length;
3885 driver_header_flag = 0;
3887 vp9_state->frame_header.bit_offset_first_partition_size =
3888 pic_param->bit_offset_first_partition_size;
3889 vp9_state->header_length = ALIGN(length_in_bits, 8) >> 3;
3890 vp9_state->alias_insert_data = header_data;
3892 vp9_state->frame_header.bit_offset_ref_lf_delta = pic_param->bit_offset_ref_lf_delta;
3893 vp9_state->frame_header.bit_offset_mode_lf_delta = pic_param->bit_offset_mode_lf_delta;
3894 vp9_state->frame_header.bit_offset_lf_level = pic_param->bit_offset_lf_level;
3895 vp9_state->frame_header.bit_offset_qindex = pic_param->bit_offset_qindex;
3896 vp9_state->frame_header.bit_offset_segmentation = pic_param->bit_offset_segmentation;
3897 vp9_state->frame_header.bit_size_segmentation = pic_param->bit_size_segmentation;
3901 if (driver_header_flag) {
3902 memset(&vp9_state->frame_header, 0, sizeof(vp9_state->frame_header));
3903 intel_write_uncompressed_header(encode_state,
3904 VAProfileVP9Profile0,
3905 vme_context->frame_header_data,
3906 &vp9_state->header_length,
3907 &vp9_state->frame_header);
3908 vp9_state->alias_insert_data = vme_context->frame_header_data;
3911 va_status = i965_check_alloc_surface_bo(ctx, encode_state->input_yuv_object,
3912 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
3913 if (va_status != VA_STATUS_SUCCESS)
3916 va_status = i965_check_alloc_surface_bo(ctx, encode_state->reconstructed_object,
3917 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
3919 if (va_status != VA_STATUS_SUCCESS)
3922 surface_param.frame_width = vp9_state->frame_width;
3923 surface_param.frame_height = vp9_state->frame_height;
3924 va_status = gen9_vp9_init_check_surfaces(ctx,
3925 encode_state->reconstructed_object,
3929 vp9_surface = (struct gen9_surface_vp9*)encode_state->reconstructed_object;
3931 vp9_surface->qp_value = pic_param->luma_ac_qindex + pic_param->luma_dc_qindex_delta;
3933 if (vp9_state->dys_in_use &&
3934 (pic_param->frame_width_src != pic_param->frame_width_dst ||
3935 pic_param->frame_height_src != pic_param->frame_height_dst)) {
3936 surface_param.frame_width = pic_param->frame_width_dst;
3937 surface_param.frame_height = pic_param->frame_height_dst;
3938 va_status = gen9_vp9_check_dys_surfaces(ctx,
3939 encode_state->reconstructed_object,
3946 if (vp9_state->dys_ref_frame_flag) {
3947 if ((vp9_state->dys_ref_frame_flag & VP9_LAST_REF) &&
3948 vp9_state->last_ref_obj) {
3949 obj_surface = vp9_state->last_ref_obj;
3950 surface_param.frame_width = vp9_state->frame_width;
3951 surface_param.frame_height = vp9_state->frame_height;
3952 va_status = gen9_vp9_check_dys_surfaces(ctx,
3959 if ((vp9_state->dys_ref_frame_flag & VP9_GOLDEN_REF) &&
3960 vp9_state->golden_ref_obj) {
3961 obj_surface = vp9_state->golden_ref_obj;
3962 surface_param.frame_width = vp9_state->frame_width;
3963 surface_param.frame_height = vp9_state->frame_height;
3964 va_status = gen9_vp9_check_dys_surfaces(ctx,
3971 if ((vp9_state->dys_ref_frame_flag & VP9_ALT_REF) &&
3972 vp9_state->alt_ref_obj) {
3973 obj_surface = vp9_state->alt_ref_obj;
3974 surface_param.frame_width = vp9_state->frame_width;
3975 surface_param.frame_height = vp9_state->frame_height;
3976 va_status = gen9_vp9_check_dys_surfaces(ctx,
3985 if (va_status != VA_STATUS_SUCCESS)
3987 /* check the corresponding ref_frame_flag && dys_ref_frame_flag */
3989 return VA_STATUS_SUCCESS;
3993 gen9_vme_gpe_kernel_init_vp9(VADriverContextP ctx,
3994 struct encode_state *encode_state,
3995 struct intel_encoder_context *encoder_context)
3997 struct i965_driver_data *i965 = i965_driver_data(ctx);
3998 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
3999 struct vp9_mbenc_context *mbenc_context = &vme_context->mbenc_context;
4000 struct vp9_dys_context *dys_context = &vme_context->dys_context;
4001 struct gpe_dynamic_state_parameter ds_param;
4005 * BRC will update MBEnc curbe data buffer, so initialize GPE context for
4008 for (i = 0; i < NUM_VP9_MBENC; i++) {
4009 gen8_gpe_context_init(ctx, &mbenc_context->gpe_contexts[i]);
4013 * VP9_MBENC_XXX uses the same dynamic state buffer as they share the same
4016 ds_param.bo_size = ALIGN(sizeof(vp9_mbenc_curbe_data), 64) + 128 +
4017 ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * NUM_VP9_MBENC;
4018 mbenc_context->mbenc_bo_dys = dri_bo_alloc(i965->intel.bufmgr,
4022 mbenc_context->mbenc_bo_size = ds_param.bo_size;
4024 ds_param.bo = mbenc_context->mbenc_bo_dys;
4025 ds_param.curbe_offset = 0;
4026 ds_param.sampler_offset = ALIGN(sizeof(vp9_mbenc_curbe_data), 64);
4027 for (i = 0; i < NUM_VP9_MBENC; i++) {
4028 ds_param.idrt_offset = ds_param.sampler_offset + 128 +
4029 ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * i;
4031 gen8_gpe_context_set_dynamic_buffer(ctx,
4032 &mbenc_context->gpe_contexts[i],
4036 gen8_gpe_context_init(ctx, &dys_context->gpe_context);
4037 gen9_vp9_dys_set_sampler_state(&dys_context->gpe_context);
4039 return VA_STATUS_SUCCESS;
4043 gen9_vme_gpe_kernel_final_vp9(VADriverContextP ctx,
4044 struct encode_state *encode_state,
4045 struct intel_encoder_context *encoder_context)
4047 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4048 struct vp9_mbenc_context *mbenc_context = &vme_context->mbenc_context;
4050 dri_bo_unreference(mbenc_context->mbenc_bo_dys);
4051 mbenc_context->mbenc_bo_dys = NULL;
4053 return VA_STATUS_SUCCESS;
4057 gen9_vme_gpe_kernel_run_vp9(VADriverContextP ctx,
4058 struct encode_state *encode_state,
4059 struct intel_encoder_context *encoder_context)
4061 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4062 struct gen9_vp9_state *vp9_state;
4065 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
4067 if (!vp9_state || !vp9_state->pic_param)
4068 return VA_STATUS_ERROR_INVALID_PARAMETER;
4070 if (vp9_state->dys_in_use) {
4071 gen9_vp9_run_dys_refframes(ctx, encode_state, encoder_context);
4074 if (vp9_state->brc_enabled && (vp9_state->brc_reset || !vp9_state->brc_inited)) {
4075 gen9_vp9_brc_init_reset_kernel(ctx, encode_state, encoder_context);
4078 if (vp9_state->picture_coding_type == KEY_FRAME) {
4079 for (i = 0; i < 2; i++)
4080 i965_zero_gpe_resource(&vme_context->res_mode_decision[i]);
4083 if (vp9_state->hme_supported) {
4084 gen9_vp9_scaling_kernel(ctx, encode_state,
4087 if (vp9_state->b16xme_supported) {
4088 gen9_vp9_scaling_kernel(ctx, encode_state,
4094 if (vp9_state->picture_coding_type && vp9_state->hme_enabled) {
4095 if (vp9_state->b16xme_enabled)
4096 gen9_vp9_me_kernel(ctx, encode_state,
4100 gen9_vp9_me_kernel(ctx, encode_state,
4105 if (vp9_state->brc_enabled) {
4106 if (vp9_state->mbenc_keyframe_dist_enabled)
4107 gen9_vp9_brc_intra_dist_kernel(ctx,
4111 gen9_vp9_brc_update_kernel(ctx, encode_state,
4115 if (vp9_state->picture_coding_type == KEY_FRAME) {
4116 gen9_vp9_mbenc_kernel(ctx, encode_state,
4118 VP9_MEDIA_STATE_MBENC_I_32x32);
4119 gen9_vp9_mbenc_kernel(ctx, encode_state,
4121 VP9_MEDIA_STATE_MBENC_I_16x16);
4123 gen9_vp9_mbenc_kernel(ctx, encode_state,
4125 VP9_MEDIA_STATE_MBENC_P);
4128 gen9_vp9_mbenc_kernel(ctx, encode_state,
4130 VP9_MEDIA_STATE_MBENC_TX);
4132 vp9_state->curr_mode_decision_index ^= 1;
4133 if (vp9_state->brc_enabled) {
4134 vp9_state->brc_inited = 1;
4135 vp9_state->brc_reset = 0;
4138 return VA_STATUS_SUCCESS;
4142 gen9_vme_pipeline_vp9(VADriverContextP ctx,
4144 struct encode_state *encode_state,
4145 struct intel_encoder_context *encoder_context)
4148 struct gen9_vp9_state *vp9_state;
4150 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
4153 return VA_STATUS_ERROR_INVALID_CONTEXT;
4155 va_status = gen9_encode_vp9_check_parameter(ctx, encode_state, encoder_context);
4156 if (va_status != VA_STATUS_SUCCESS)
4159 va_status = gen9_vp9_allocate_resources(ctx, encode_state,
4161 !vp9_state->brc_allocated);
4163 if (va_status != VA_STATUS_SUCCESS)
4165 vp9_state->brc_allocated = 1;
4167 va_status = gen9_vme_gpe_kernel_prepare_vp9(ctx, encode_state, encoder_context);
4169 if (va_status != VA_STATUS_SUCCESS)
4172 va_status = gen9_vme_gpe_kernel_init_vp9(ctx, encode_state, encoder_context);
4173 if (va_status != VA_STATUS_SUCCESS)
4176 va_status = gen9_vme_gpe_kernel_run_vp9(ctx, encode_state, encoder_context);
4177 if (va_status != VA_STATUS_SUCCESS)
4180 gen9_vme_gpe_kernel_final_vp9(ctx, encode_state, encoder_context);
4182 return VA_STATUS_SUCCESS;
4186 gen9_vme_brc_context_destroy_vp9(struct vp9_brc_context *brc_context)
4190 for (i = 0; i < NUM_VP9_BRC; i++)
4191 gen8_gpe_context_destroy(&brc_context->gpe_contexts[i]);
4195 gen9_vme_scaling_context_destroy_vp9(struct vp9_scaling_context *scaling_context)
4199 for (i = 0; i < NUM_VP9_SCALING; i++)
4200 gen8_gpe_context_destroy(&scaling_context->gpe_contexts[i]);
4204 gen9_vme_me_context_destroy_vp9(struct vp9_me_context *me_context)
4206 gen8_gpe_context_destroy(&me_context->gpe_context);
4210 gen9_vme_mbenc_context_destroy_vp9(struct vp9_mbenc_context *mbenc_context)
4214 for (i = 0; i < NUM_VP9_MBENC; i++)
4215 gen8_gpe_context_destroy(&mbenc_context->gpe_contexts[i]);
4216 dri_bo_unreference(mbenc_context->mbenc_bo_dys);
4217 mbenc_context->mbenc_bo_size = 0;
4221 gen9_vme_dys_context_destroy_vp9(struct vp9_dys_context *dys_context)
4223 gen8_gpe_context_destroy(&dys_context->gpe_context);
4227 gen9_vme_kernel_context_destroy_vp9(struct gen9_encoder_context_vp9 *vme_context)
4229 gen9_vp9_free_resources(vme_context);
4230 gen9_vme_scaling_context_destroy_vp9(&vme_context->scaling_context);
4231 gen9_vme_me_context_destroy_vp9(&vme_context->me_context);
4232 gen9_vme_mbenc_context_destroy_vp9(&vme_context->mbenc_context);
4233 gen9_vme_brc_context_destroy_vp9(&vme_context->brc_context);
4234 gen9_vme_dys_context_destroy_vp9(&vme_context->dys_context);
4240 gen9_vme_context_destroy_vp9(void *context)
4242 struct gen9_encoder_context_vp9 *vme_context = context;
4247 gen9_vme_kernel_context_destroy_vp9(vme_context);
4255 gen9_vme_scaling_context_init_vp9(VADriverContextP ctx,
4256 struct gen9_encoder_context_vp9 *vme_context,
4257 struct vp9_scaling_context *scaling_context)
4259 struct i965_gpe_context *gpe_context = NULL;
4260 struct vp9_encoder_kernel_parameter kernel_param;
4261 struct vp9_encoder_scoreboard_parameter scoreboard_param;
4262 struct i965_kernel scale_kernel;
4264 kernel_param.curbe_size = sizeof(vp9_scaling4x_curbe_data_cm);
4265 kernel_param.inline_data_size = sizeof(vp9_scaling4x_inline_data_cm);
4266 kernel_param.sampler_size = 0;
4268 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4269 scoreboard_param.mask = 0xFF;
4270 scoreboard_param.enable = vme_context->use_hw_scoreboard;
4271 scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4272 scoreboard_param.walkpat_flag = 0;
4274 gpe_context = &scaling_context->gpe_contexts[0];
4275 gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4276 gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4278 scaling_context->scaling_4x_bti.scaling_frame_src_y = VP9_BTI_SCALING_FRAME_SRC_Y;
4279 scaling_context->scaling_4x_bti.scaling_frame_dst_y = VP9_BTI_SCALING_FRAME_DST_Y;
4280 scaling_context->scaling_4x_bti.scaling_frame_mbv_proc_stat_dst =
4281 VP9_BTI_SCALING_FRAME_MBVPROCSTATS_DST_CM;
4283 memset(&scale_kernel, 0, sizeof(scale_kernel));
4285 intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4286 sizeof(media_vp9_kernels),
4287 INTEL_VP9_ENC_SCALING4X,
4291 gen8_gpe_load_kernels(ctx,
4296 kernel_param.curbe_size = sizeof(vp9_scaling2x_curbe_data_cm);
4297 kernel_param.inline_data_size = 0;
4298 kernel_param.sampler_size = 0;
4300 gpe_context = &scaling_context->gpe_contexts[1];
4301 gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4302 gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4304 memset(&scale_kernel, 0, sizeof(scale_kernel));
4306 intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4307 sizeof(media_vp9_kernels),
4308 INTEL_VP9_ENC_SCALING2X,
4312 gen8_gpe_load_kernels(ctx,
4317 scaling_context->scaling_2x_bti.scaling_frame_src_y = VP9_BTI_SCALING_FRAME_SRC_Y;
4318 scaling_context->scaling_2x_bti.scaling_frame_dst_y = VP9_BTI_SCALING_FRAME_DST_Y;
4323 gen9_vme_me_context_init_vp9(VADriverContextP ctx,
4324 struct gen9_encoder_context_vp9 *vme_context,
4325 struct vp9_me_context *me_context)
4327 struct i965_gpe_context *gpe_context = NULL;
4328 struct vp9_encoder_kernel_parameter kernel_param;
4329 struct vp9_encoder_scoreboard_parameter scoreboard_param;
4330 struct i965_kernel scale_kernel;
4332 kernel_param.curbe_size = sizeof(vp9_me_curbe_data);
4333 kernel_param.inline_data_size = 0;
4334 kernel_param.sampler_size = 0;
4336 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4337 scoreboard_param.mask = 0xFF;
4338 scoreboard_param.enable = vme_context->use_hw_scoreboard;
4339 scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4340 scoreboard_param.walkpat_flag = 0;
4342 gpe_context = &me_context->gpe_context;
4343 gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4344 gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4346 memset(&scale_kernel, 0, sizeof(scale_kernel));
4348 intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4349 sizeof(media_vp9_kernels),
4354 gen8_gpe_load_kernels(ctx,
4363 gen9_vme_mbenc_context_init_vp9(VADriverContextP ctx,
4364 struct gen9_encoder_context_vp9 *vme_context,
4365 struct vp9_mbenc_context *mbenc_context)
4367 struct i965_gpe_context *gpe_context = NULL;
4368 struct vp9_encoder_kernel_parameter kernel_param;
4369 struct vp9_encoder_scoreboard_parameter scoreboard_param;
4371 struct i965_kernel scale_kernel;
4373 kernel_param.curbe_size = sizeof(vp9_mbenc_curbe_data);
4374 kernel_param.inline_data_size = 0;
4375 kernel_param.sampler_size = 0;
4377 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4378 scoreboard_param.mask = 0xFF;
4379 scoreboard_param.enable = vme_context->use_hw_scoreboard;
4380 scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4382 for (i = 0; i < NUM_VP9_MBENC; i++) {
4383 gpe_context = &mbenc_context->gpe_contexts[i];
4385 if ((i == VP9_MBENC_IDX_KEY_16x16) ||
4386 (i == VP9_MBENC_IDX_INTER)) {
4387 scoreboard_param.walkpat_flag = 1;
4389 scoreboard_param.walkpat_flag = 0;
4391 gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4392 gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4394 memset(&scale_kernel, 0, sizeof(scale_kernel));
4396 intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4397 sizeof(media_vp9_kernels),
4398 INTEL_VP9_ENC_MBENC,
4402 gen8_gpe_load_kernels(ctx,
4410 gen9_vme_brc_context_init_vp9(VADriverContextP ctx,
4411 struct gen9_encoder_context_vp9 *vme_context,
4412 struct vp9_brc_context *brc_context)
4414 struct i965_gpe_context *gpe_context = NULL;
4415 struct vp9_encoder_kernel_parameter kernel_param;
4416 struct vp9_encoder_scoreboard_parameter scoreboard_param;
4418 struct i965_kernel scale_kernel;
4420 kernel_param.curbe_size = sizeof(vp9_brc_curbe_data);
4421 kernel_param.inline_data_size = 0;
4422 kernel_param.sampler_size = 0;
4424 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4425 scoreboard_param.mask = 0xFF;
4426 scoreboard_param.enable = vme_context->use_hw_scoreboard;
4427 scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4429 for (i = 0; i < NUM_VP9_BRC; i++) {
4430 gpe_context = &brc_context->gpe_contexts[i];
4431 gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4432 gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4434 memset(&scale_kernel, 0, sizeof(scale_kernel));
4436 intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4437 sizeof(media_vp9_kernels),
4442 gen8_gpe_load_kernels(ctx,
4450 gen9_vme_dys_context_init_vp9(VADriverContextP ctx,
4451 struct gen9_encoder_context_vp9 *vme_context,
4452 struct vp9_dys_context *dys_context)
4454 struct i965_gpe_context *gpe_context = NULL;
4455 struct vp9_encoder_kernel_parameter kernel_param;
4456 struct vp9_encoder_scoreboard_parameter scoreboard_param;
4457 struct i965_kernel scale_kernel;
4459 kernel_param.curbe_size = sizeof(vp9_dys_curbe_data);
4460 kernel_param.inline_data_size = 0;
4461 kernel_param.sampler_size = sizeof(struct gen9_sampler_8x8_avs);
4463 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4464 scoreboard_param.mask = 0xFF;
4465 scoreboard_param.enable = vme_context->use_hw_scoreboard;
4466 scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4467 scoreboard_param.walkpat_flag = 0;
4469 gpe_context = &dys_context->gpe_context;
4470 gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4471 gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4473 memset(&scale_kernel, 0, sizeof(scale_kernel));
4475 intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4476 sizeof(media_vp9_kernels),
4481 gen8_gpe_load_kernels(ctx,
4490 gen9_vme_kernels_context_init_vp9(VADriverContextP ctx,
4491 struct intel_encoder_context *encoder_context,
4492 struct gen9_encoder_context_vp9 *vme_context)
4494 gen9_vme_scaling_context_init_vp9(ctx, vme_context, &vme_context->scaling_context);
4495 gen9_vme_me_context_init_vp9(ctx, vme_context, &vme_context->me_context);
4496 gen9_vme_mbenc_context_init_vp9(ctx, vme_context, &vme_context->mbenc_context);
4497 gen9_vme_dys_context_init_vp9(ctx, vme_context, &vme_context->dys_context);
4498 gen9_vme_brc_context_init_vp9(ctx, vme_context, &vme_context->brc_context);
4500 vme_context->pfn_set_curbe_brc = gen9_vp9_set_curbe_brc;
4501 vme_context->pfn_set_curbe_me = gen9_vp9_set_curbe_me;
4502 vme_context->pfn_send_me_surface = gen9_vp9_send_me_surface;
4503 vme_context->pfn_send_scaling_surface = gen9_vp9_send_scaling_surface;
4505 vme_context->pfn_set_curbe_scaling = gen9_vp9_set_curbe_scaling_cm;
4507 vme_context->pfn_send_dys_surface = gen9_vp9_send_dys_surface;
4508 vme_context->pfn_set_curbe_dys = gen9_vp9_set_curbe_dys;
4509 vme_context->pfn_set_curbe_mbenc = gen9_vp9_set_curbe_mbenc;
4510 vme_context->pfn_send_mbenc_surface = gen9_vp9_send_mbenc_surface;
4515 void gen9_vp9_write_compressed_element(char *buffer,
4520 struct vp9_compressed_element *base_element, *vp9_element;
4521 base_element = (struct vp9_compressed_element *)buffer;
4523 vp9_element = base_element + (index >> 1);
4525 vp9_element->b_valid = 1;
4526 vp9_element->b_probdiff_select = 1;
4527 vp9_element->b_prob_select = (prob == 252) ? 1 : 0;
4528 vp9_element->b_bin = value;
4530 vp9_element->a_valid = 1;
4531 vp9_element->a_probdiff_select = 1;
4532 vp9_element->a_prob_select = (prob == 252) ? 1 : 0;
4533 vp9_element->a_bin = value;
4538 intel_vp9enc_refresh_frame_internal_buffers(VADriverContextP ctx,
4539 struct intel_encoder_context *encoder_context)
4541 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
4542 VAEncPictureParameterBufferVP9 *pic_param;
4543 struct gen9_vp9_state *vp9_state;
4547 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
4549 if (!pak_context || !vp9_state || !vp9_state->pic_param)
4552 pic_param = vp9_state->pic_param;
4553 if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
4554 (pic_param->pic_flags.bits.intra_only) ||
4555 pic_param->pic_flags.bits.error_resilient_mode) {
4556 /* reset current frame_context */
4557 intel_init_default_vp9_probs(&vp9_state->vp9_current_fc);
4558 if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
4559 pic_param->pic_flags.bits.error_resilient_mode ||
4560 (pic_param->pic_flags.bits.reset_frame_context == 3)) {
4561 for (i = 0; i < 4; i++)
4562 memcpy(&vp9_state->vp9_frame_ctx[i],
4563 &vp9_state->vp9_current_fc,
4564 sizeof(FRAME_CONTEXT));
4565 } else if (pic_param->pic_flags.bits.reset_frame_context == 2) {
4566 i = pic_param->pic_flags.bits.frame_context_idx;
4567 memcpy(&vp9_state->vp9_frame_ctx[i],
4568 &vp9_state->vp9_current_fc, sizeof(FRAME_CONTEXT));
4570 /* reset the frame_ctx_idx = 0 */
4571 vp9_state->frame_ctx_idx = 0;
4573 vp9_state->frame_ctx_idx = pic_param->pic_flags.bits.frame_context_idx;
4576 i965_zero_gpe_resource(&pak_context->res_compressed_input_buffer);
4577 buffer = i965_map_gpe_resource(&pak_context->res_compressed_input_buffer);
4583 if ((pic_param->luma_ac_qindex == 0) &&
4584 (pic_param->luma_dc_qindex_delta == 0) &&
4585 (pic_param->chroma_ac_qindex_delta == 0) &&
4586 (pic_param->chroma_dc_qindex_delta == 0)) {
4588 /* nothing is needed */
4589 gen9_vp9_write_compressed_element(buffer,
4591 gen9_vp9_write_compressed_element(buffer,
4593 gen9_vp9_write_compressed_element(buffer,
4596 if (vp9_state->tx_mode == TX_MODE_SELECT) {
4597 gen9_vp9_write_compressed_element(buffer,
4599 gen9_vp9_write_compressed_element(buffer,
4601 gen9_vp9_write_compressed_element(buffer,
4603 } else if (vp9_state->tx_mode == ALLOW_32X32) {
4604 gen9_vp9_write_compressed_element(buffer,
4606 gen9_vp9_write_compressed_element(buffer,
4608 gen9_vp9_write_compressed_element(buffer,
4611 unsigned int tx_mode;
4613 tx_mode = vp9_state->tx_mode;
4614 gen9_vp9_write_compressed_element(buffer,
4615 0, 128, ((tx_mode) & 2));
4616 gen9_vp9_write_compressed_element(buffer,
4617 1, 128, ((tx_mode) & 1));
4618 gen9_vp9_write_compressed_element(buffer,
4622 if (vp9_state->tx_mode == TX_MODE_SELECT) {
4624 gen9_vp9_write_compressed_element(buffer,
4627 gen9_vp9_write_compressed_element(buffer,
4630 gen9_vp9_write_compressed_element(buffer,
4634 /*Setup all the input&output object*/
4637 /* update the coeff_update flag */
4638 gen9_vp9_write_compressed_element(buffer,
4640 gen9_vp9_write_compressed_element(buffer,
4642 gen9_vp9_write_compressed_element(buffer,
4644 gen9_vp9_write_compressed_element(buffer,
4649 if (pic_param->pic_flags.bits.frame_type && !pic_param->pic_flags.bits.intra_only) {
4650 bool allow_comp = !(
4651 (pic_param->ref_flags.bits.ref_last_sign_bias && pic_param->ref_flags.bits.ref_gf_sign_bias && pic_param->ref_flags.bits.ref_arf_sign_bias) ||
4652 (!pic_param->ref_flags.bits.ref_last_sign_bias && !pic_param->ref_flags.bits.ref_gf_sign_bias && !pic_param->ref_flags.bits.ref_arf_sign_bias)
4656 if (pic_param->pic_flags.bits.comp_prediction_mode == REFERENCE_MODE_SELECT) {
4657 gen9_vp9_write_compressed_element(buffer,
4659 gen9_vp9_write_compressed_element(buffer,
4661 } else if (pic_param->pic_flags.bits.comp_prediction_mode == COMPOUND_REFERENCE) {
4662 gen9_vp9_write_compressed_element(buffer,
4664 gen9_vp9_write_compressed_element(buffer,
4668 gen9_vp9_write_compressed_element(buffer,
4670 gen9_vp9_write_compressed_element(buffer,
4676 i965_unmap_gpe_resource(&pak_context->res_compressed_input_buffer);
4681 gen9_pak_vp9_pipe_mode_select(VADriverContextP ctx,
4682 struct encode_state *encode_state,
4683 struct intel_encoder_context *encoder_context,
4684 struct gen9_hcpe_pipe_mode_select_param *pipe_mode_param)
4686 struct intel_batchbuffer *batch = encoder_context->base.batch;
4688 BEGIN_BCS_BATCH(batch, 6);
4690 OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (6 - 2));
4691 OUT_BCS_BATCH(batch,
4692 (pipe_mode_param->stream_out << 12) |
4693 (pipe_mode_param->codec_mode << 5) |
4694 (0 << 3) | /* disable Pic Status / Error Report */
4695 (pipe_mode_param->stream_out << 2) |
4696 HCP_CODEC_SELECT_ENCODE);
4697 OUT_BCS_BATCH(batch, 0);
4698 OUT_BCS_BATCH(batch, 0);
4699 OUT_BCS_BATCH(batch, (1 << 6));
4700 OUT_BCS_BATCH(batch, 0);
4702 ADVANCE_BCS_BATCH(batch);
4706 gen9_vp9_add_surface_state(VADriverContextP ctx,
4707 struct encode_state *encode_state,
4708 struct intel_encoder_context *encoder_context,
4709 hcp_surface_state *hcp_state)
4711 struct intel_batchbuffer *batch = encoder_context->base.batch;
4715 BEGIN_BCS_BATCH(batch, 3);
4716 OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
4717 OUT_BCS_BATCH(batch,
4718 (hcp_state->dw1.surface_id << 28) |
4719 (hcp_state->dw1.surface_pitch - 1)
4721 OUT_BCS_BATCH(batch,
4722 (hcp_state->dw2.surface_format << 28) |
4723 (hcp_state->dw2.y_cb_offset)
4725 ADVANCE_BCS_BATCH(batch);
4729 gen9_pak_vp9_pipe_buf_addr_state(VADriverContextP ctx,
4730 struct encode_state *encode_state,
4731 struct intel_encoder_context *encoder_context)
4733 struct i965_driver_data *i965 = i965_driver_data(ctx);
4734 struct intel_batchbuffer *batch = encoder_context->base.batch;
4735 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
4736 struct gen9_vp9_state *vp9_state;
4738 struct object_surface *obj_surface;
4740 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
4742 if (!vp9_state || !vp9_state->pic_param)
4746 BEGIN_BCS_BATCH(batch, 104);
4748 OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (104 - 2));
4750 obj_surface = encode_state->reconstructed_object;
4752 /* reconstructed obj_surface is already checked. So this is skipped */
4753 /* DW 1..3 decoded surface */
4756 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4758 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4760 /* DW 4..6 deblocking line */
4762 pak_context->res_deblocking_filter_line_buffer.bo,
4763 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4765 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4767 /* DW 7..9 deblocking tile line */
4769 pak_context->res_deblocking_filter_tile_line_buffer.bo,
4770 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4772 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4774 /* DW 10..12 deblocking tile col */
4776 pak_context->res_deblocking_filter_tile_col_buffer.bo,
4777 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4779 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4781 /* DW 13..15 metadata line */
4783 pak_context->res_metadata_line_buffer.bo,
4784 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4786 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4788 /* DW 16..18 metadata tile line */
4790 pak_context->res_metadata_tile_line_buffer.bo,
4791 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4793 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4795 /* DW 19..21 metadata tile col */
4797 pak_context->res_metadata_tile_col_buffer.bo,
4798 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4800 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4802 /* DW 22..30 SAO is not used for VP9 */
4803 OUT_BCS_BATCH(batch, 0);
4804 OUT_BCS_BATCH(batch, 0);
4805 OUT_BCS_BATCH(batch, 0);
4806 OUT_BCS_BATCH(batch, 0);
4807 OUT_BCS_BATCH(batch, 0);
4808 OUT_BCS_BATCH(batch, 0);
4809 OUT_BCS_BATCH(batch, 0);
4810 OUT_BCS_BATCH(batch, 0);
4811 OUT_BCS_BATCH(batch, 0);
4813 /* DW 31..33 Current Motion vector temporal buffer */
4815 pak_context->res_mv_temporal_buffer[vp9_state->curr_mv_temporal_index].bo,
4816 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4818 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4820 /* DW 34..36 Not used */
4821 OUT_BCS_BATCH(batch, 0);
4822 OUT_BCS_BATCH(batch, 0);
4823 OUT_BCS_BATCH(batch, 0);
4825 /* Only the first three reference_frame is used for VP9 */
4826 /* DW 37..52 for reference_frame */
4828 if (vp9_state->picture_coding_type) {
4829 for (i = 0; i < 3; i++) {
4831 if (pak_context->reference_surfaces[i].bo) {
4833 pak_context->reference_surfaces[i].bo,
4834 I915_GEM_DOMAIN_INSTRUCTION, 0,
4837 OUT_BCS_BATCH(batch, 0);
4838 OUT_BCS_BATCH(batch, 0);
4843 for (; i < 8; i++) {
4844 OUT_BCS_BATCH(batch, 0);
4845 OUT_BCS_BATCH(batch, 0);
4848 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4850 /* DW 54..56 for source input */
4852 pak_context->uncompressed_picture_source.bo,
4853 I915_GEM_DOMAIN_INSTRUCTION, 0,
4855 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4857 /* DW 57..59 StreamOut is not used */
4858 OUT_BCS_BATCH(batch, 0);
4859 OUT_BCS_BATCH(batch, 0);
4860 OUT_BCS_BATCH(batch, 0);
4862 /* DW 60..62. Not used for encoder */
4863 OUT_BCS_BATCH(batch, 0);
4864 OUT_BCS_BATCH(batch, 0);
4865 OUT_BCS_BATCH(batch, 0);
4867 /* DW 63..65. ILDB Not used for encoder */
4868 OUT_BCS_BATCH(batch, 0);
4869 OUT_BCS_BATCH(batch, 0);
4870 OUT_BCS_BATCH(batch, 0);
4872 /* DW 66..81 For the collocated motion vector temporal buffer */
4873 if (vp9_state->picture_coding_type) {
4874 int prev_index = vp9_state->curr_mv_temporal_index ^ 0x01;
4876 pak_context->res_mv_temporal_buffer[prev_index].bo,
4877 I915_GEM_DOMAIN_INSTRUCTION, 0,
4880 OUT_BCS_BATCH(batch, 0);
4881 OUT_BCS_BATCH(batch, 0);
4884 for (i = 1; i < 8; i++) {
4885 OUT_BCS_BATCH(batch, 0);
4886 OUT_BCS_BATCH(batch, 0);
4888 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4890 /* DW 83..85 VP9 prob buffer */
4892 pak_context->res_prob_buffer.bo,
4893 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4896 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4898 /* DW 86..88 Segment id buffer */
4899 if (pak_context->res_segmentid_buffer.bo) {
4901 pak_context->res_segmentid_buffer.bo,
4902 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4905 OUT_BCS_BATCH(batch, 0);
4906 OUT_BCS_BATCH(batch, 0);
4908 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4910 /* DW 89..91 HVD line rowstore buffer */
4912 pak_context->res_hvd_line_buffer.bo,
4913 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4915 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4917 /* DW 92..94 HVD tile line rowstore buffer */
4919 pak_context->res_hvd_tile_line_buffer.bo,
4920 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4922 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4924 /* DW 95..97 SAO streamout. Not used for VP9 */
4925 OUT_BCS_BATCH(batch, 0);
4926 OUT_BCS_BATCH(batch, 0);
4927 OUT_BCS_BATCH(batch, 0);
4929 /* reserved for KBL. 98..100 */
4930 OUT_BCS_BATCH(batch, 0);
4931 OUT_BCS_BATCH(batch, 0);
4932 OUT_BCS_BATCH(batch, 0);
4935 OUT_BCS_BATCH(batch, 0);
4936 OUT_BCS_BATCH(batch, 0);
4937 OUT_BCS_BATCH(batch, 0);
4939 ADVANCE_BCS_BATCH(batch);
4943 gen9_pak_vp9_ind_obj_base_addr_state(VADriverContextP ctx,
4944 struct encode_state *encode_state,
4945 struct intel_encoder_context *encoder_context)
4947 struct i965_driver_data *i965 = i965_driver_data(ctx);
4948 struct intel_batchbuffer *batch = encoder_context->base.batch;
4949 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
4950 struct gen9_vp9_state *vp9_state;
4952 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
4955 BEGIN_BCS_BATCH(batch, 29);
4957 OUT_BCS_BATCH(batch, HCP_IND_OBJ_BASE_ADDR_STATE | (29 - 2));
4959 /* indirect bitstream object base */
4960 OUT_BCS_BATCH(batch, 0);
4961 OUT_BCS_BATCH(batch, 0);
4962 OUT_BCS_BATCH(batch, 0);
4963 /* the upper bound of indirect bitstream object */
4964 OUT_BCS_BATCH(batch, 0);
4965 OUT_BCS_BATCH(batch, 0);
4967 /* DW 6: Indirect CU object base address */
4969 pak_context->res_mb_code_surface.bo,
4970 I915_GEM_DOMAIN_INSTRUCTION, 0, /* No write domain */
4971 vp9_state->mb_data_offset);
4972 /* default attribute */
4973 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4975 /* DW 9..11, PAK-BSE */
4977 pak_context->indirect_pak_bse_object.bo,
4978 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4979 pak_context->indirect_pak_bse_object.offset);
4980 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4982 /* DW 12..13 upper bound */
4984 pak_context->indirect_pak_bse_object.bo,
4985 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4986 pak_context->indirect_pak_bse_object.end_offset);
4988 /* DW 14..16 compressed header buffer */
4990 pak_context->res_compressed_input_buffer.bo,
4991 I915_GEM_DOMAIN_INSTRUCTION, 0,
4993 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4995 /* DW 17..19 prob counter streamout */
4997 pak_context->res_prob_counter_buffer.bo,
4998 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5000 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5002 /* DW 20..22 prob delta streamin */
5004 pak_context->res_prob_delta_buffer.bo,
5005 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5007 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5009 /* DW 23..25 Tile record streamout */
5011 pak_context->res_tile_record_streamout_buffer.bo,
5012 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5014 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5016 /* DW 26..28 CU record streamout */
5018 pak_context->res_cu_stat_streamout_buffer.bo,
5019 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5021 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5023 ADVANCE_BCS_BATCH(batch);
5027 gen9_pak_vp9_segment_state(VADriverContextP ctx,
5028 struct encode_state *encode_state,
5029 struct intel_encoder_context *encoder_context,
5030 VAEncSegParamVP9 *seg_param, uint8_t seg_id)
5032 struct intel_batchbuffer *batch = encoder_context->base.batch;
5033 uint32_t batch_value, tmp;
5034 VAEncPictureParameterBufferVP9 *pic_param;
5036 if (!encode_state->pic_param_ext ||
5037 !encode_state->pic_param_ext->buffer) {
5041 pic_param = (VAEncPictureParameterBufferVP9 *)encode_state->pic_param_ext->buffer;
5043 batch_value = seg_param->seg_flags.bits.segment_reference;
5044 if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME ||
5045 pic_param->pic_flags.bits.intra_only)
5048 BEGIN_BCS_BATCH(batch, 8);
5050 OUT_BCS_BATCH(batch, HCP_VP9_SEGMENT_STATE | (8 - 2));
5051 OUT_BCS_BATCH(batch, seg_id << 0); /* DW 1 - SegmentID */
5052 OUT_BCS_BATCH(batch,
5053 (seg_param->seg_flags.bits.segment_reference_enabled << 3) |
5054 (batch_value << 1) |
5055 (seg_param->seg_flags.bits.segment_reference_skipped << 0)
5058 /* DW 3..6 is not used for encoder */
5059 OUT_BCS_BATCH(batch, 0);
5060 OUT_BCS_BATCH(batch, 0);
5061 OUT_BCS_BATCH(batch, 0);
5062 OUT_BCS_BATCH(batch, 0);
5065 tmp = intel_convert_sign_mag(seg_param->segment_qindex_delta, 9);
5067 tmp = intel_convert_sign_mag(seg_param->segment_lf_level_delta, 7);
5068 batch_value |= (tmp << 16);
5069 OUT_BCS_BATCH(batch, batch_value);
5071 ADVANCE_BCS_BATCH(batch);
5076 intel_vp9enc_construct_pak_insertobj_batchbuffer(VADriverContextP ctx,
5077 struct intel_encoder_context *encoder_context,
5078 struct i965_gpe_resource *obj_batch_buffer)
5080 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5081 struct gen9_vp9_state *vp9_state;
5082 int uncompressed_header_length;
5083 unsigned int *cmd_ptr;
5084 unsigned int dw_length, bits_in_last_dw;
5086 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5088 if (!pak_context || !vp9_state || !vp9_state->pic_param)
5091 uncompressed_header_length = vp9_state->header_length;
5092 cmd_ptr = i965_map_gpe_resource(obj_batch_buffer);
5097 bits_in_last_dw = uncompressed_header_length % 4;
5098 bits_in_last_dw *= 8;
5100 if (bits_in_last_dw == 0)
5101 bits_in_last_dw = 32;
5103 /* get the DWORD length of the inserted_data */
5104 dw_length = ALIGN(uncompressed_header_length, 4) / 4;
5105 *cmd_ptr++ = HCP_INSERT_PAK_OBJECT | dw_length;
5107 *cmd_ptr++ = ((0 << 31) | /* indirect payload */
5108 (0 << 16) | /* the start offset in first DW */
5110 (bits_in_last_dw << 8) | /* bits_in_last_dw */
5111 (0 << 4) | /* skip emulation byte count. 0 for VP9 */
5112 (0 << 3) | /* emulation flag. 0 for VP9 */
5113 (1 << 2) | /* last header flag. */
5115 memcpy(cmd_ptr, vp9_state->alias_insert_data, dw_length * sizeof(unsigned int));
5117 cmd_ptr += dw_length;
5119 *cmd_ptr++ = MI_NOOP;
5120 *cmd_ptr++ = MI_BATCH_BUFFER_END;
5121 i965_unmap_gpe_resource(obj_batch_buffer);
5125 gen9_vp9_pak_picture_level(VADriverContextP ctx,
5126 struct encode_state *encode_state,
5127 struct intel_encoder_context *encoder_context)
5129 struct intel_batchbuffer *batch = encoder_context->base.batch;
5130 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5131 struct object_surface *obj_surface;
5132 VAEncPictureParameterBufferVP9 *pic_param;
5133 VAEncMiscParameterTypeVP9PerSegmantParam *seg_param, tmp_seg_param;
5134 struct gen9_vp9_state *vp9_state;
5135 struct gen9_surface_vp9 *vp9_priv_surface;
5137 struct gen9_hcpe_pipe_mode_select_param mode_param;
5138 hcp_surface_state hcp_surface;
5139 struct gpe_mi_batch_buffer_start_parameter second_level_batch;
5142 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5144 if (!pak_context || !vp9_state || !vp9_state->pic_param)
5147 pic_param = vp9_state->pic_param;
5148 seg_param = vp9_state->segment_param;
5150 if (vp9_state->curr_pak_pass == 0) {
5151 intel_vp9enc_construct_pak_insertobj_batchbuffer(ctx, encoder_context,
5152 &pak_context->res_pak_uncompressed_input_buffer);
5154 // Check if driver already programmed pic state as part of BRC update kernel programming.
5155 if (!vp9_state->brc_enabled) {
5156 intel_vp9enc_construct_picstate_batchbuf(ctx, encode_state,
5157 encoder_context, &pak_context->res_pic_state_brc_write_hfw_read_buffer);
5161 if (vp9_state->curr_pak_pass == 0) {
5162 intel_vp9enc_refresh_frame_internal_buffers(ctx, encoder_context);
5166 /* copy the frame_context[frame_idx] into curr_frame_context */
5167 memcpy(&vp9_state->vp9_current_fc,
5168 &(vp9_state->vp9_frame_ctx[vp9_state->frame_ctx_idx]),
5169 sizeof(FRAME_CONTEXT));
5173 prob_ptr = i965_map_gpe_resource(&pak_context->res_prob_buffer);
5178 /* copy the current fc to vp9_prob buffer */
5179 memcpy(prob_ptr, &vp9_state->vp9_current_fc, sizeof(FRAME_CONTEXT));
5180 if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
5181 pic_param->pic_flags.bits.intra_only) {
5182 FRAME_CONTEXT *frame_ptr = (FRAME_CONTEXT *)prob_ptr;
5184 memcpy(frame_ptr->partition_prob, vp9_kf_partition_probs,
5185 sizeof(vp9_kf_partition_probs));
5186 memcpy(frame_ptr->uv_mode_prob, vp9_kf_uv_mode_prob,
5187 sizeof(vp9_kf_uv_mode_prob));
5189 i965_unmap_gpe_resource(&pak_context->res_prob_buffer);
5193 if (vp9_state->brc_enabled && vp9_state->curr_pak_pass) {
5194 /* read image status and insert the conditional end cmd */
5195 /* image ctrl/status is already accessed */
5196 struct gpe_mi_conditional_batch_buffer_end_parameter mi_cond_end;
5197 struct vp9_encode_status_buffer_internal *status_buffer;
5199 status_buffer = &vp9_state->status_buffer;
5200 memset(&mi_cond_end, 0, sizeof(mi_cond_end));
5201 mi_cond_end.offset = status_buffer->image_status_mask_offset;
5202 mi_cond_end.bo = status_buffer->bo;
5203 mi_cond_end.compare_data = 0;
5204 mi_cond_end.compare_mask_mode_disabled = 1;
5205 gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch,
5209 mode_param.codec_mode = 1;
5210 mode_param.stream_out = 0;
5211 gen9_pak_vp9_pipe_mode_select(ctx, encode_state, encoder_context, &mode_param);
5213 /* reconstructed surface */
5214 memset(&hcp_surface, 0, sizeof(hcp_surface));
5215 obj_surface = encode_state->reconstructed_object;
5216 hcp_surface.dw1.surface_id = 0;
5217 hcp_surface.dw1.surface_pitch = obj_surface->width;
5218 hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5219 hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5220 gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5224 if (vp9_state->dys_in_use &&
5225 ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
5226 (pic_param->frame_height_src != pic_param->frame_height_dst))) {
5227 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
5228 obj_surface = vp9_priv_surface->dys_surface_obj;
5230 obj_surface = encode_state->input_yuv_object;
5233 hcp_surface.dw1.surface_id = 1;
5234 hcp_surface.dw1.surface_pitch = obj_surface->width;
5235 hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5236 hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5237 gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5240 if (vp9_state->picture_coding_type) {
5241 /* Add surface for last */
5242 if (vp9_state->last_ref_obj) {
5243 obj_surface = vp9_state->last_ref_obj;
5244 hcp_surface.dw1.surface_id = 2;
5245 hcp_surface.dw1.surface_pitch = obj_surface->width;
5246 hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5247 hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5248 gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5251 if (vp9_state->golden_ref_obj) {
5252 obj_surface = vp9_state->golden_ref_obj;
5253 hcp_surface.dw1.surface_id = 3;
5254 hcp_surface.dw1.surface_pitch = obj_surface->width;
5255 hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5256 hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5257 gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5260 if (vp9_state->alt_ref_obj) {
5261 obj_surface = vp9_state->alt_ref_obj;
5262 hcp_surface.dw1.surface_id = 4;
5263 hcp_surface.dw1.surface_pitch = obj_surface->width;
5264 hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5265 hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5266 gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5271 gen9_pak_vp9_pipe_buf_addr_state(ctx, encode_state, encoder_context);
5273 gen9_pak_vp9_ind_obj_base_addr_state(ctx, encode_state, encoder_context);
5275 // Using picstate zero with updated QP and LF deltas by HuC for repak, irrespective of how many Pak passes were run in multi-pass mode.
5276 memset(&second_level_batch, 0, sizeof(second_level_batch));
5278 if (vp9_state->curr_pak_pass == 0) {
5279 second_level_batch.offset = 0;
5281 second_level_batch.offset = vp9_state->curr_pak_pass * VP9_PIC_STATE_BUFFER_SIZE;
5283 second_level_batch.is_second_level = 1;
5284 second_level_batch.bo = pak_context->res_pic_state_brc_write_hfw_read_buffer.bo;
5286 gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5288 if (pic_param->pic_flags.bits.segmentation_enabled &&
5293 memset(&tmp_seg_param, 0, sizeof(tmp_seg_param));
5294 seg_param = &tmp_seg_param;
5296 for (i = 0; i < segment_count; i++) {
5297 gen9_pak_vp9_segment_state(ctx, encode_state,
5299 &seg_param->seg_data[i], i);
5302 /* Insert the uncompressed header buffer */
5303 second_level_batch.is_second_level = 1;
5304 second_level_batch.offset = 0;
5305 second_level_batch.bo = pak_context->res_pak_uncompressed_input_buffer.bo;
5307 gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5310 second_level_batch.is_second_level = 1;
5311 second_level_batch.offset = 0;
5312 second_level_batch.bo = pak_context->res_mb_code_surface.bo;
5313 gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5319 gen9_vp9_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5321 struct intel_batchbuffer *batch = encoder_context->base.batch;
5322 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5323 struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
5324 struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
5325 //struct gpe_mi_copy_mem_parameter mi_copy_mem_param;
5326 struct vp9_encode_status_buffer_internal *status_buffer;
5327 struct gen9_vp9_state *vp9_state;
5329 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5330 if (!vp9_state || !pak_context || !batch)
5333 status_buffer = &(vp9_state->status_buffer);
5335 memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
5336 gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
5338 memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
5339 mi_store_reg_mem_param.bo = status_buffer->bo;
5340 mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_offset;
5341 mi_store_reg_mem_param.mmio_offset = status_buffer->vp9_bs_frame_reg_offset;
5342 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5344 mi_store_reg_mem_param.bo = pak_context->res_brc_bitstream_size_buffer.bo;
5345 mi_store_reg_mem_param.offset = 0;
5346 mi_store_reg_mem_param.mmio_offset = status_buffer->vp9_bs_frame_reg_offset;
5347 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5349 /* Read HCP Image status */
5350 mi_store_reg_mem_param.bo = status_buffer->bo;
5351 mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
5352 mi_store_reg_mem_param.mmio_offset =
5353 status_buffer->vp9_image_mask_reg_offset;
5354 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5356 mi_store_reg_mem_param.bo = status_buffer->bo;
5357 mi_store_reg_mem_param.offset = status_buffer->image_status_ctrl_offset;
5358 mi_store_reg_mem_param.mmio_offset =
5359 status_buffer->vp9_image_ctrl_reg_offset;
5360 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5362 mi_store_reg_mem_param.bo = pak_context->res_brc_bitstream_size_buffer.bo;
5363 mi_store_reg_mem_param.offset = 4;
5364 mi_store_reg_mem_param.mmio_offset =
5365 status_buffer->vp9_image_ctrl_reg_offset;
5366 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5368 gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
5374 gen9_vp9_pak_pipeline_prepare(VADriverContextP ctx,
5375 struct encode_state *encode_state,
5376 struct intel_encoder_context *encoder_context)
5378 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5379 struct object_surface *obj_surface;
5380 struct object_buffer *obj_buffer;
5381 struct i965_coded_buffer_segment *coded_buffer_segment;
5382 VAEncPictureParameterBufferVP9 *pic_param;
5383 struct gen9_vp9_state *vp9_state;
5387 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5389 !vp9_state->pic_param)
5390 return VA_STATUS_ERROR_INVALID_PARAMETER;
5392 pic_param = vp9_state->pic_param;
5394 /* reconstructed surface */
5395 obj_surface = encode_state->reconstructed_object;
5396 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5398 dri_bo_unreference(pak_context->reconstructed_object.bo);
5400 pak_context->reconstructed_object.bo = obj_surface->bo;
5401 dri_bo_reference(pak_context->reconstructed_object.bo);
5403 /* set vp9 reference frames */
5404 for (i = 0; i < ARRAY_ELEMS(pak_context->reference_surfaces); i++) {
5405 if (pak_context->reference_surfaces[i].bo)
5406 dri_bo_unreference(pak_context->reference_surfaces[i].bo);
5407 pak_context->reference_surfaces[i].bo = NULL;
5410 /* Three reference frames are enough for VP9 */
5411 if (pic_param->pic_flags.bits.frame_type &&
5412 !pic_param->pic_flags.bits.intra_only) {
5413 for (i = 0; i < 3; i++) {
5414 obj_surface = encode_state->reference_objects[i];
5415 if (obj_surface && obj_surface->bo) {
5416 pak_context->reference_surfaces[i].bo = obj_surface->bo;
5417 dri_bo_reference(obj_surface->bo);
5422 /* input YUV surface */
5423 dri_bo_unreference(pak_context->uncompressed_picture_source.bo);
5424 pak_context->uncompressed_picture_source.bo = NULL;
5425 obj_surface = encode_state->reconstructed_object;
5426 if (vp9_state->dys_in_use &&
5427 ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
5428 (pic_param->frame_height_src != pic_param->frame_height_dst))) {
5429 struct gen9_surface_vp9 *vp9_priv_surface =
5430 (struct gen9_surface_vp9 *)(obj_surface->private_data);
5431 obj_surface = vp9_priv_surface->dys_surface_obj;
5433 obj_surface = encode_state->input_yuv_object;
5435 pak_context->uncompressed_picture_source.bo = obj_surface->bo;
5436 dri_bo_reference(pak_context->uncompressed_picture_source.bo);
5439 dri_bo_unreference(pak_context->indirect_pak_bse_object.bo);
5440 pak_context->indirect_pak_bse_object.bo = NULL;
5441 obj_buffer = encode_state->coded_buf_object;
5442 bo = obj_buffer->buffer_store->bo;
5443 pak_context->indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
5444 pak_context->indirect_pak_bse_object.end_offset = ALIGN((obj_buffer->size_element - 0x1000), 0x1000);
5445 pak_context->indirect_pak_bse_object.bo = bo;
5446 dri_bo_reference(pak_context->indirect_pak_bse_object.bo);
5448 /* set the internal flag to 0 to indicate the coded size is unknown */
5450 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
5451 coded_buffer_segment->mapped = 0;
5452 coded_buffer_segment->codec = encoder_context->codec;
5453 coded_buffer_segment->status_support = 1;
5456 return VA_STATUS_SUCCESS;
5460 gen9_vp9_pak_brc_prepare(struct encode_state *encode_state,
5461 struct intel_encoder_context *encoder_context)
5466 gen9_vp9_pak_context_destroy(void *context)
5468 struct gen9_encoder_context_vp9 *pak_context = context;
5471 dri_bo_unreference(pak_context->reconstructed_object.bo);
5472 pak_context->reconstructed_object.bo = NULL;
5474 dri_bo_unreference(pak_context->uncompressed_picture_source.bo);
5475 pak_context->uncompressed_picture_source.bo = NULL;
5477 dri_bo_unreference(pak_context->indirect_pak_bse_object.bo);
5478 pak_context->indirect_pak_bse_object.bo = NULL;
5480 for (i = 0; i < 8; i++) {
5481 dri_bo_unreference(pak_context->reference_surfaces[i].bo);
5482 pak_context->reference_surfaces[i].bo = NULL;
5485 /* vme & pak same the same structure, so don't free the context here */
5489 gen9_vp9_pak_pipeline(VADriverContextP ctx,
5491 struct encode_state *encode_state,
5492 struct intel_encoder_context *encoder_context)
5494 struct i965_driver_data *i965 = i965_driver_data(ctx);
5495 struct intel_batchbuffer *batch = encoder_context->base.batch;
5496 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5498 struct gen9_vp9_state *vp9_state;
5499 VAEncPictureParameterBufferVP9 *pic_param;
5502 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5504 if (!vp9_state || !vp9_state->pic_param || !pak_context)
5505 return VA_STATUS_ERROR_INVALID_PARAMETER;
5507 va_status = gen9_vp9_pak_pipeline_prepare(ctx, encode_state, encoder_context);
5509 if (va_status != VA_STATUS_SUCCESS)
5512 if (i965->intel.has_bsd2)
5513 intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
5515 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
5517 intel_batchbuffer_emit_mi_flush(batch);
5519 BEGIN_BCS_BATCH(batch, 64);
5520 for (i = 0; i < 64; i++)
5521 OUT_BCS_BATCH(batch, MI_NOOP);
5523 ADVANCE_BCS_BATCH(batch);
5525 for (vp9_state->curr_pak_pass = 0;
5526 vp9_state->curr_pak_pass < vp9_state->num_pak_passes;
5527 vp9_state->curr_pak_pass++) {
5529 if (vp9_state->curr_pak_pass == 0) {
5530 /* Initialize the VP9 Image Ctrl reg for the first pass */
5531 struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
5532 struct vp9_encode_status_buffer_internal *status_buffer;
5534 status_buffer = &(vp9_state->status_buffer);
5535 memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
5536 mi_load_reg_imm.mmio_offset = status_buffer->vp9_image_ctrl_reg_offset;
5537 mi_load_reg_imm.data = 0;
5538 gen8_gpe_mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
5540 gen9_vp9_pak_picture_level(ctx, encode_state, encoder_context);
5541 gen9_vp9_read_mfc_status(ctx, encoder_context);
5544 intel_batchbuffer_end_atomic(batch);
5545 intel_batchbuffer_flush(batch);
5547 pic_param = vp9_state->pic_param;
5548 vp9_state->vp9_last_frame.frame_width = pic_param->frame_width_dst;
5549 vp9_state->vp9_last_frame.frame_height = pic_param->frame_height_dst;
5550 vp9_state->vp9_last_frame.frame_type = pic_param->pic_flags.bits.frame_type;
5551 vp9_state->vp9_last_frame.show_frame = pic_param->pic_flags.bits.show_frame;
5552 vp9_state->vp9_last_frame.refresh_frame_context = pic_param->pic_flags.bits.refresh_frame_context;
5553 vp9_state->vp9_last_frame.frame_context_idx = pic_param->pic_flags.bits.frame_context_idx;
5554 vp9_state->vp9_last_frame.intra_only = pic_param->pic_flags.bits.intra_only;
5555 vp9_state->frame_number++;
5556 vp9_state->curr_mv_temporal_index ^= 1;
5557 vp9_state->first_frame = 0;
5559 return VA_STATUS_SUCCESS;
5563 gen9_vp9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5565 struct gen9_encoder_context_vp9 *vme_context = NULL;
5566 struct gen9_vp9_state *vp9_state = NULL;
5568 vme_context = calloc(1, sizeof(struct gen9_encoder_context_vp9));
5569 vp9_state = calloc(1, sizeof(struct gen9_vp9_state));
5571 if (!vme_context || !vp9_state) {
5579 encoder_context->enc_priv_state = vp9_state;
5580 vme_context->enc_priv_state = vp9_state;
5582 /* Initialize the features that are supported by VP9 */
5583 vme_context->hme_supported = 1;
5584 vme_context->use_hw_scoreboard = 1;
5585 vme_context->use_hw_non_stalling_scoreboard = 1;
5587 vp9_state->tx_mode = TX_MODE_SELECT;
5588 vp9_state->multi_ref_qp_check = 0;
5589 vp9_state->target_usage = INTEL_ENC_VP9_TU_NORMAL;
5590 vp9_state->num_pak_passes = 1;
5591 vp9_state->hme_supported = vme_context->hme_supported;
5592 vp9_state->b16xme_supported = 1;
5594 if (encoder_context->rate_control_mode != VA_RC_NONE &&
5595 encoder_context->rate_control_mode != VA_RC_CQP) {
5596 vp9_state->brc_enabled = 1;
5597 vp9_state->brc_distortion_buffer_supported = 1;
5598 vp9_state->brc_constant_buffer_supported = 1;
5599 vp9_state->num_pak_passes = 4;
5601 vp9_state->dys_enabled = 1; /* this is supported by default */
5602 vp9_state->first_frame = 1;
5604 /* the definition of status buffer offset for VP9 */
5606 struct vp9_encode_status_buffer_internal *status_buffer;
5607 uint32_t base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
5609 status_buffer = &vp9_state->status_buffer;
5610 memset(status_buffer, 0,
5611 sizeof(struct vp9_encode_status_buffer_internal));
5613 status_buffer->bs_byte_count_offset = base_offset + offsetof(struct vp9_encode_status, bs_byte_count);
5614 status_buffer->image_status_mask_offset = base_offset + offsetof(struct vp9_encode_status, image_status_mask);
5615 status_buffer->image_status_ctrl_offset = base_offset + offsetof(struct vp9_encode_status, image_status_ctrl);
5616 status_buffer->media_index_offset = base_offset + offsetof(struct vp9_encode_status, media_index);
5618 status_buffer->vp9_bs_frame_reg_offset = 0x1E9E0;
5619 status_buffer->vp9_image_mask_reg_offset = 0x1E9F0;
5620 status_buffer->vp9_image_ctrl_reg_offset = 0x1E9F4;
5623 gen9_vme_kernels_context_init_vp9(ctx, encoder_context, vme_context);
5625 encoder_context->vme_context = vme_context;
5626 encoder_context->vme_pipeline = gen9_vme_pipeline_vp9;
5627 encoder_context->vme_context_destroy = gen9_vme_context_destroy_vp9;
5633 gen9_vp9_get_coded_status(VADriverContextP ctx,
5634 struct intel_encoder_context *encoder_context,
5635 struct i965_coded_buffer_segment *coded_buf_seg)
5637 struct vp9_encode_status *vp9_encode_status;
5639 if (!encoder_context || !coded_buf_seg)
5640 return VA_STATUS_ERROR_INVALID_BUFFER;
5642 vp9_encode_status = (struct vp9_encode_status *)coded_buf_seg->codec_private_data;
5643 coded_buf_seg->base.size = vp9_encode_status->bs_byte_count;
5645 /* One VACodedBufferSegment for VP9 will be added later.
5646 * It will be linked to the next element of coded_buf_seg->base.next
5649 return VA_STATUS_SUCCESS;
5653 gen9_vp9_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5655 /* VME & PAK share the same context */
5656 struct gen9_encoder_context_vp9 *pak_context = encoder_context->vme_context;
5661 encoder_context->mfc_context = pak_context;
5662 encoder_context->mfc_context_destroy = gen9_vp9_pak_context_destroy;
5663 encoder_context->mfc_pipeline = gen9_vp9_pak_pipeline;
5664 encoder_context->mfc_brc_prepare = gen9_vp9_pak_brc_prepare;
5665 encoder_context->get_status = gen9_vp9_get_coded_status;