2 * Copyright © 2016 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * Zhao, Yakui <yakui.zhao@intel.com>
37 #include "intel_batchbuffer.h"
38 #include "intel_driver.h"
40 #include "i965_defines.h"
41 #include "i965_drv_video.h"
42 #include "i965_encoder.h"
43 #include "gen9_vp9_encapi.h"
44 #include "gen9_vp9_encoder.h"
45 #include "gen9_vp9_encoder_kernels.h"
46 #include "vp9_probs.h"
47 #include "gen9_vp9_const_def.h"
49 #define MAX_VP9_ENCODER_SURFACES 64
51 #define MAX_URB_SIZE 4096 /* In register */
52 #define NUM_KERNELS_PER_GPE_CONTEXT 1
54 #define VP9_BRC_KBPS 1000
56 #define BRC_KERNEL_CBR 0x0010
57 #define BRC_KERNEL_VBR 0x0020
58 #define BRC_KERNEL_AVBR 0x0040
59 #define BRC_KERNEL_CQL 0x0080
61 #define VP9_PIC_STATE_BUFFER_SIZE 192
63 typedef struct _intel_kernel_header_
65 uint32_t reserved : 6;
66 uint32_t kernel_start_pointer : 26;
67 } intel_kernel_header;
69 typedef struct _intel_vp9_kernel_header {
71 intel_kernel_header PLY_DSCALE;
72 intel_kernel_header VP9_ME_P;
73 intel_kernel_header VP9_Enc_I_32x32;
74 intel_kernel_header VP9_Enc_I_16x16;
75 intel_kernel_header VP9_Enc_P;
76 intel_kernel_header VP9_Enc_TX;
77 intel_kernel_header VP9_DYS;
79 intel_kernel_header VP9BRC_Intra_Distortion;
80 intel_kernel_header VP9BRC_Init;
81 intel_kernel_header VP9BRC_Reset;
82 intel_kernel_header VP9BRC_Update;
83 } intel_vp9_kernel_header;
85 #define DYS_1X_FLAG 0x01
86 #define DYS_4X_FLAG 0x02
87 #define DYS_16X_FLAG 0x04
89 struct vp9_surface_param {
91 uint32_t frame_height;
94 static uint32_t intel_convert_sign_mag(int val, int sign_bit_pos)
100 ret_val = ((1 << (sign_bit_pos - 1)) | (val & ((1 << (sign_bit_pos - 1)) - 1)));
104 ret_val = val & ((1 << (sign_bit_pos - 1)) - 1);
110 intel_vp9_get_kernel_header_and_size(
113 INTEL_VP9_ENC_OPERATION operation,
115 struct i965_kernel *ret_kernel)
117 typedef uint32_t BIN_PTR[4];
120 intel_vp9_kernel_header *pkh_table;
121 intel_kernel_header *pcurr_header, *pinvalid_entry, *pnext_header;
124 if (!pvbinary || !ret_kernel)
127 bin_start = (char *)pvbinary;
128 pkh_table = (intel_vp9_kernel_header *)pvbinary;
129 pinvalid_entry = &(pkh_table->VP9BRC_Update) + 1;
130 next_krnoffset = binary_size;
132 if ((operation == INTEL_VP9_ENC_SCALING4X) || (operation == INTEL_VP9_ENC_SCALING2X))
134 pcurr_header = &pkh_table->PLY_DSCALE;
136 else if (operation == INTEL_VP9_ENC_ME)
138 pcurr_header = &pkh_table->VP9_ME_P;
140 else if (operation == INTEL_VP9_ENC_MBENC)
142 pcurr_header = &pkh_table->VP9_Enc_I_32x32;
144 else if (operation == INTEL_VP9_ENC_DYS)
146 pcurr_header = &pkh_table->VP9_DYS;
148 else if (operation == INTEL_VP9_ENC_BRC)
150 pcurr_header = &pkh_table->VP9BRC_Intra_Distortion;
157 pcurr_header += krnstate_idx;
158 ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
160 pnext_header = (pcurr_header + 1);
161 if (pnext_header < pinvalid_entry)
163 next_krnoffset = pnext_header->kernel_start_pointer << 6;
165 ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
172 gen9_free_surfaces_vp9(void **data)
174 struct gen9_surface_vp9 *vp9_surface;
181 if (vp9_surface->scaled_4x_surface_obj) {
182 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->scaled_4x_surface_id, 1);
183 vp9_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
184 vp9_surface->scaled_4x_surface_obj = NULL;
187 if (vp9_surface->scaled_16x_surface_obj) {
188 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->scaled_16x_surface_id, 1);
189 vp9_surface->scaled_16x_surface_id = VA_INVALID_SURFACE;
190 vp9_surface->scaled_16x_surface_obj = NULL;
193 if (vp9_surface->dys_4x_surface_obj) {
194 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_4x_surface_id, 1);
195 vp9_surface->dys_4x_surface_id = VA_INVALID_SURFACE;
196 vp9_surface->dys_4x_surface_obj = NULL;
199 if (vp9_surface->dys_16x_surface_obj) {
200 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_16x_surface_id, 1);
201 vp9_surface->dys_16x_surface_id = VA_INVALID_SURFACE;
202 vp9_surface->dys_16x_surface_obj = NULL;
205 if (vp9_surface->dys_surface_obj) {
206 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_surface_id, 1);
207 vp9_surface->dys_surface_id = VA_INVALID_SURFACE;
208 vp9_surface->dys_surface_obj = NULL;
219 gen9_vp9_init_check_surfaces(VADriverContextP ctx,
220 struct object_surface *obj_surface,
221 struct vp9_surface_param *surface_param)
223 struct i965_driver_data *i965 = i965_driver_data(ctx);
224 struct gen9_surface_vp9 *vp9_surface;
225 int downscaled_width_4x, downscaled_height_4x;
226 int downscaled_width_16x, downscaled_height_16x;
228 if (!obj_surface || !obj_surface->bo)
229 return VA_STATUS_ERROR_INVALID_SURFACE;
231 if (obj_surface->private_data &&
232 obj_surface->free_private_data != gen9_free_surfaces_vp9) {
233 obj_surface->free_private_data(&obj_surface->private_data);
234 obj_surface->private_data = NULL;
237 if (obj_surface->private_data) {
238 /* if the frame width/height is already the same as the expected,
239 * it is unncessary to reallocate it.
241 vp9_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
242 if (vp9_surface->frame_width >= surface_param->frame_width ||
243 vp9_surface->frame_height >= surface_param->frame_height)
244 return VA_STATUS_SUCCESS;
246 obj_surface->free_private_data(&obj_surface->private_data);
247 obj_surface->private_data = NULL;
251 vp9_surface = calloc(1, sizeof(struct gen9_surface_vp9));
254 return VA_STATUS_ERROR_ALLOCATION_FAILED;
256 vp9_surface->ctx = ctx;
257 obj_surface->private_data = vp9_surface;
258 obj_surface->free_private_data = gen9_free_surfaces_vp9;
260 vp9_surface->frame_width = surface_param->frame_width;
261 vp9_surface->frame_height = surface_param->frame_height;
263 downscaled_width_4x = ALIGN(surface_param->frame_width / 4, 16);
264 downscaled_height_4x = ALIGN(surface_param->frame_height / 4, 16);
266 i965_CreateSurfaces(ctx,
268 downscaled_height_4x,
271 &vp9_surface->scaled_4x_surface_id);
273 vp9_surface->scaled_4x_surface_obj = SURFACE(vp9_surface->scaled_4x_surface_id);
275 if (!vp9_surface->scaled_4x_surface_obj) {
276 return VA_STATUS_ERROR_ALLOCATION_FAILED;
279 i965_check_alloc_surface_bo(ctx, vp9_surface->scaled_4x_surface_obj, 1,
280 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
282 downscaled_width_16x = ALIGN(surface_param->frame_width / 16, 16);
283 downscaled_height_16x = ALIGN(surface_param->frame_height / 16, 16);
284 i965_CreateSurfaces(ctx,
285 downscaled_width_16x,
286 downscaled_height_16x,
289 &vp9_surface->scaled_16x_surface_id);
290 vp9_surface->scaled_16x_surface_obj = SURFACE(vp9_surface->scaled_16x_surface_id);
292 if (!vp9_surface->scaled_16x_surface_obj) {
293 return VA_STATUS_ERROR_ALLOCATION_FAILED;
296 i965_check_alloc_surface_bo(ctx, vp9_surface->scaled_16x_surface_obj, 1,
297 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
299 return VA_STATUS_SUCCESS;
303 gen9_vp9_check_dys_surfaces(VADriverContextP ctx,
304 struct object_surface *obj_surface,
305 struct vp9_surface_param *surface_param)
307 struct i965_driver_data *i965 = i965_driver_data(ctx);
308 struct gen9_surface_vp9 *vp9_surface;
309 int dys_width_4x, dys_height_4x;
310 int dys_width_16x, dys_height_16x;
312 /* As this is handled after the surface checking, it is unnecessary
313 * to check the surface bo and vp9_priv_surface again
316 vp9_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
319 return VA_STATUS_ERROR_INVALID_SURFACE;
321 /* if the frame_width/height of dys_surface is the same as
322 * the expected, it is unnecessary to allocate it again
324 if (vp9_surface->dys_frame_width == surface_param->frame_width &&
325 vp9_surface->dys_frame_width == surface_param->frame_width)
326 return VA_STATUS_SUCCESS;
328 if (vp9_surface->dys_4x_surface_obj) {
329 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_4x_surface_id, 1);
330 vp9_surface->dys_4x_surface_id = VA_INVALID_SURFACE;
331 vp9_surface->dys_4x_surface_obj = NULL;
334 if (vp9_surface->dys_16x_surface_obj) {
335 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_16x_surface_id, 1);
336 vp9_surface->dys_16x_surface_id = VA_INVALID_SURFACE;
337 vp9_surface->dys_16x_surface_obj = NULL;
340 if (vp9_surface->dys_surface_obj) {
341 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_surface_id, 1);
342 vp9_surface->dys_surface_id = VA_INVALID_SURFACE;
343 vp9_surface->dys_surface_obj = NULL;
346 vp9_surface->dys_frame_width = surface_param->frame_width;
347 vp9_surface->dys_frame_height = surface_param->frame_height;
349 i965_CreateSurfaces(ctx,
350 surface_param->frame_width,
351 surface_param->frame_height,
354 &vp9_surface->dys_surface_id);
355 vp9_surface->dys_surface_obj = SURFACE(vp9_surface->dys_surface_id);
357 if (!vp9_surface->dys_surface_obj) {
358 return VA_STATUS_ERROR_ALLOCATION_FAILED;
361 i965_check_alloc_surface_bo(ctx, vp9_surface->dys_surface_obj, 1,
362 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
364 dys_width_4x = ALIGN(surface_param->frame_width / 4, 16);
365 dys_height_4x = ALIGN(surface_param->frame_width / 4, 16);
367 i965_CreateSurfaces(ctx,
372 &vp9_surface->dys_4x_surface_id);
374 vp9_surface->dys_4x_surface_obj = SURFACE(vp9_surface->dys_4x_surface_id);
376 if (!vp9_surface->dys_4x_surface_obj) {
377 return VA_STATUS_ERROR_ALLOCATION_FAILED;
380 i965_check_alloc_surface_bo(ctx, vp9_surface->dys_4x_surface_obj, 1,
381 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
383 dys_width_16x = ALIGN(surface_param->frame_width / 16, 16);
384 dys_height_16x = ALIGN(surface_param->frame_width / 16, 16);
385 i965_CreateSurfaces(ctx,
390 &vp9_surface->dys_16x_surface_id);
391 vp9_surface->dys_16x_surface_obj = SURFACE(vp9_surface->dys_16x_surface_id);
393 if (!vp9_surface->dys_16x_surface_obj) {
394 return VA_STATUS_ERROR_ALLOCATION_FAILED;
397 i965_check_alloc_surface_bo(ctx, vp9_surface->dys_16x_surface_obj, 1,
398 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
400 return VA_STATUS_SUCCESS;
404 gen9_vp9_allocate_resources(VADriverContextP ctx,
405 struct encode_state *encode_state,
406 struct intel_encoder_context *encoder_context,
409 struct i965_driver_data *i965 = i965_driver_data(ctx);
410 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
411 struct gen9_vp9_state *vp9_state;
412 int allocate_flag, i;
414 uint32_t frame_width_in_sb, frame_height_in_sb, frame_sb_num;
415 unsigned int width, height;
417 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
419 if (!vp9_state || !vp9_state->pic_param)
420 return VA_STATUS_ERROR_INVALID_PARAMETER;
422 /* the buffer related with BRC is not changed. So it is allocated
423 * based on the input parameter
426 i965_free_gpe_resource(&vme_context->res_brc_history_buffer);
427 i965_free_gpe_resource(&vme_context->res_brc_const_data_buffer);
428 i965_free_gpe_resource(&vme_context->res_brc_mbenc_curbe_write_buffer);
429 i965_free_gpe_resource(&vme_context->res_pic_state_brc_read_buffer);
430 i965_free_gpe_resource(&vme_context->res_pic_state_brc_write_hfw_read_buffer);
431 i965_free_gpe_resource(&vme_context->res_pic_state_hfw_write_buffer);
432 i965_free_gpe_resource(&vme_context->res_seg_state_brc_read_buffer);
433 i965_free_gpe_resource(&vme_context->res_seg_state_brc_write_buffer);
434 i965_free_gpe_resource(&vme_context->res_brc_bitstream_size_buffer);
435 i965_free_gpe_resource(&vme_context->res_brc_hfw_data_buffer);
436 i965_free_gpe_resource(&vme_context->res_brc_mmdk_pak_buffer);
438 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
439 &vme_context->res_brc_history_buffer,
440 VP9_BRC_HISTORY_BUFFER_SIZE,
441 "Brc History buffer");
443 goto failed_allocation;
444 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
445 &vme_context->res_brc_const_data_buffer,
446 VP9_BRC_CONSTANTSURFACE_SIZE,
447 "Brc Constant buffer");
449 goto failed_allocation;
451 res_size = ALIGN(sizeof(vp9_mbenc_curbe_data), 64) + 128 +
452 ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * NUM_VP9_MBENC;
453 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
454 &vme_context->res_brc_mbenc_curbe_write_buffer,
458 goto failed_allocation;
460 res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
461 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
462 &vme_context->res_pic_state_brc_read_buffer,
464 "Pic State Brc_read");
466 goto failed_allocation;
468 res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
469 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
470 &vme_context->res_pic_state_brc_write_hfw_read_buffer,
472 "Pic State Brc_write Hfw_Read");
474 goto failed_allocation;
476 res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
477 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
478 &vme_context->res_pic_state_hfw_write_buffer,
480 "Pic State Hfw Write");
482 goto failed_allocation;
484 res_size = VP9_SEGMENT_STATE_BUFFER_SIZE;
485 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
486 &vme_context->res_seg_state_brc_read_buffer,
488 "Segment state brc_read");
490 goto failed_allocation;
492 res_size = VP9_SEGMENT_STATE_BUFFER_SIZE;
493 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
494 &vme_context->res_seg_state_brc_write_buffer,
496 "Segment state brc_write");
498 goto failed_allocation;
500 res_size = VP9_BRC_BITSTREAM_SIZE_BUFFER_SIZE;
501 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
502 &vme_context->res_brc_bitstream_size_buffer,
504 "Brc bitstream buffer");
506 goto failed_allocation;
508 res_size = VP9_HFW_BRC_DATA_BUFFER_SIZE;
509 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
510 &vme_context->res_brc_hfw_data_buffer,
514 goto failed_allocation;
516 res_size = VP9_BRC_MMDK_PAK_BUFFER_SIZE;
517 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
518 &vme_context->res_brc_mmdk_pak_buffer,
522 goto failed_allocation;
525 /* If the width/height of allocated buffer is greater than the expected,
526 * it is unnecessary to allocate it again
528 if (vp9_state->res_width >= vp9_state->frame_width &&
529 vp9_state->res_height >= vp9_state->frame_height) {
531 return VA_STATUS_SUCCESS;
533 frame_width_in_sb = ALIGN(vp9_state->frame_width, 64) / 64;
534 frame_height_in_sb = ALIGN(vp9_state->frame_height, 64) / 64;
535 frame_sb_num = frame_width_in_sb * frame_height_in_sb;
537 i965_free_gpe_resource(&vme_context->res_hvd_line_buffer);
538 res_size = frame_width_in_sb * 64;
539 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
540 &vme_context->res_hvd_line_buffer,
542 "VP9 hvd line line");
544 goto failed_allocation;
546 i965_free_gpe_resource(&vme_context->res_hvd_tile_line_buffer);
547 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
548 &vme_context->res_hvd_tile_line_buffer,
550 "VP9 hvd tile_line line");
552 goto failed_allocation;
554 i965_free_gpe_resource(&vme_context->res_deblocking_filter_line_buffer);
555 res_size = frame_width_in_sb * 18 * 64;
556 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
557 &vme_context->res_deblocking_filter_line_buffer,
559 "VP9 deblocking filter line");
561 goto failed_allocation;
563 i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_line_buffer);
564 res_size = frame_width_in_sb * 18 * 64;
565 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
566 &vme_context->res_deblocking_filter_tile_line_buffer,
568 "VP9 deblocking tile line");
570 goto failed_allocation;
572 i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_col_buffer);
573 res_size = frame_height_in_sb * 17 * 64;
574 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
575 &vme_context->res_deblocking_filter_tile_col_buffer,
577 "VP9 deblocking tile col");
579 goto failed_allocation;
581 i965_free_gpe_resource(&vme_context->res_metadata_line_buffer);
582 res_size = frame_width_in_sb * 5 * 64;
583 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
584 &vme_context->res_metadata_line_buffer,
586 "VP9 metadata line");
588 goto failed_allocation;
590 i965_free_gpe_resource(&vme_context->res_metadata_tile_line_buffer);
591 res_size = frame_width_in_sb * 5 * 64;
592 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
593 &vme_context->res_metadata_tile_line_buffer,
595 "VP9 metadata tile line");
597 goto failed_allocation;
599 i965_free_gpe_resource(&vme_context->res_metadata_tile_col_buffer);
600 res_size = frame_height_in_sb * 5 * 64;
601 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
602 &vme_context->res_metadata_tile_col_buffer,
604 "VP9 metadata tile col");
606 goto failed_allocation;
608 i965_free_gpe_resource(&vme_context->res_prob_buffer);
610 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
611 &vme_context->res_prob_buffer,
615 goto failed_allocation;
617 i965_free_gpe_resource(&vme_context->res_segmentid_buffer);
618 res_size = frame_sb_num * 64;
619 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
620 &vme_context->res_segmentid_buffer,
624 goto failed_allocation;
626 i965_zero_gpe_resource(&vme_context->res_segmentid_buffer);
628 i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
630 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
631 &vme_context->res_prob_delta_buffer,
635 goto failed_allocation;
637 i965_zero_gpe_resource(&vme_context->res_segmentid_buffer);
639 i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
641 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
642 &vme_context->res_prob_delta_buffer,
646 goto failed_allocation;
648 i965_free_gpe_resource(&vme_context->res_compressed_input_buffer);
650 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
651 &vme_context->res_compressed_input_buffer,
653 "VP9 compressed_input buffer");
655 goto failed_allocation;
657 i965_free_gpe_resource(&vme_context->res_prob_counter_buffer);
659 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
660 &vme_context->res_prob_counter_buffer,
664 goto failed_allocation;
666 i965_free_gpe_resource(&vme_context->res_tile_record_streamout_buffer);
667 res_size = frame_sb_num * 64;
668 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
669 &vme_context->res_tile_record_streamout_buffer,
671 "VP9 tile record stream_out");
673 goto failed_allocation;
675 i965_free_gpe_resource(&vme_context->res_cu_stat_streamout_buffer);
676 res_size = frame_sb_num * 64;
677 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
678 &vme_context->res_cu_stat_streamout_buffer,
680 "VP9 CU stat stream_out");
682 goto failed_allocation;
684 width = vp9_state->downscaled_width_4x_in_mb * 32;
685 height = vp9_state->downscaled_height_4x_in_mb * 16;
686 i965_free_gpe_resource(&vme_context->s4x_memv_data_buffer);
687 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
688 &vme_context->s4x_memv_data_buffer,
693 goto failed_allocation;
695 width = vp9_state->downscaled_width_4x_in_mb * 8;
696 height = vp9_state->downscaled_height_4x_in_mb * 16;
697 i965_free_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
698 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
699 &vme_context->s4x_memv_distortion_buffer,
702 "VP9 4x MEMV distorion");
704 goto failed_allocation;
706 width = ALIGN(vp9_state->downscaled_width_16x_in_mb * 32, 64);
707 height = vp9_state->downscaled_height_16x_in_mb * 16;
708 i965_free_gpe_resource(&vme_context->s16x_memv_data_buffer);
709 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
710 &vme_context->s16x_memv_data_buffer,
713 "VP9 16x MEMV data");
715 goto failed_allocation;
717 width = vp9_state->frame_width_in_mb * 16;
718 height = vp9_state->frame_height_in_mb * 8;
719 i965_free_gpe_resource(&vme_context->res_output_16x16_inter_modes);
720 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
721 &vme_context->res_output_16x16_inter_modes,
724 "VP9 output inter_mode");
726 goto failed_allocation;
728 res_size = vp9_state->frame_width_in_mb * vp9_state->frame_height_in_mb *
730 for (i = 0; i < 2; i++) {
731 i965_free_gpe_resource(&vme_context->res_mode_decision[i]);
732 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
733 &vme_context->res_mode_decision[i],
735 "VP9 mode decision");
737 goto failed_allocation;
741 res_size = frame_sb_num * 9 * 64;
742 for (i = 0; i < 2; i++) {
743 i965_free_gpe_resource(&vme_context->res_mv_temporal_buffer[i]);
744 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
745 &vme_context->res_mv_temporal_buffer[i],
749 goto failed_allocation;
752 vp9_state->mb_data_offset = ALIGN(frame_sb_num * 16, 4096) + 4096;
753 res_size = vp9_state->mb_data_offset + frame_sb_num * 64 * 64 + 1000;
754 i965_free_gpe_resource(&vme_context->res_mb_code_surface);
755 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
756 &vme_context->res_mb_code_surface,
757 ALIGN(res_size, 4096),
758 "VP9 mb_code surface");
760 goto failed_allocation;
763 i965_free_gpe_resource(&vme_context->res_pak_uncompressed_input_buffer);
764 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
765 &vme_context->res_pak_uncompressed_input_buffer,
766 ALIGN(res_size, 4096),
767 "VP9 pak_uncompressed_input");
769 goto failed_allocation;
771 if (!vme_context->frame_header_data) {
772 /* allocate 512 bytes for generating the uncompressed header */
773 vme_context->frame_header_data = calloc(1, 512);
776 vp9_state->res_width = vp9_state->frame_width;
777 vp9_state->res_height = vp9_state->frame_height;
779 return VA_STATUS_SUCCESS;
782 return VA_STATUS_ERROR_ALLOCATION_FAILED;
786 gen9_vp9_free_resources(struct gen9_encoder_context_vp9 *vme_context)
789 struct gen9_vp9_state *vp9_state = (struct gen9_vp9_state *) vme_context->enc_priv_state;
791 if (vp9_state->brc_enabled) {
792 i965_free_gpe_resource(&vme_context->res_brc_history_buffer);
793 i965_free_gpe_resource(&vme_context->res_brc_const_data_buffer);
794 i965_free_gpe_resource(&vme_context->res_brc_mbenc_curbe_write_buffer);
795 i965_free_gpe_resource(&vme_context->res_pic_state_brc_read_buffer);
796 i965_free_gpe_resource(&vme_context->res_pic_state_brc_write_hfw_read_buffer);
797 i965_free_gpe_resource(&vme_context->res_pic_state_hfw_write_buffer);
798 i965_free_gpe_resource(&vme_context->res_seg_state_brc_read_buffer);
799 i965_free_gpe_resource(&vme_context->res_seg_state_brc_write_buffer);
800 i965_free_gpe_resource(&vme_context->res_brc_bitstream_size_buffer);
801 i965_free_gpe_resource(&vme_context->res_brc_hfw_data_buffer);
802 i965_free_gpe_resource(&vme_context->res_brc_mmdk_pak_buffer);
805 i965_free_gpe_resource(&vme_context->res_hvd_line_buffer);
806 i965_free_gpe_resource(&vme_context->res_hvd_tile_line_buffer);
807 i965_free_gpe_resource(&vme_context->res_deblocking_filter_line_buffer);
808 i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_line_buffer);
809 i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_col_buffer);
810 i965_free_gpe_resource(&vme_context->res_metadata_line_buffer);
811 i965_free_gpe_resource(&vme_context->res_metadata_tile_line_buffer);
812 i965_free_gpe_resource(&vme_context->res_metadata_tile_col_buffer);
813 i965_free_gpe_resource(&vme_context->res_prob_buffer);
814 i965_free_gpe_resource(&vme_context->res_segmentid_buffer);
815 i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
816 i965_free_gpe_resource(&vme_context->res_prob_counter_buffer);
817 i965_free_gpe_resource(&vme_context->res_tile_record_streamout_buffer);
818 i965_free_gpe_resource(&vme_context->res_cu_stat_streamout_buffer);
819 i965_free_gpe_resource(&vme_context->s4x_memv_data_buffer);
820 i965_free_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
821 i965_free_gpe_resource(&vme_context->s16x_memv_data_buffer);
822 i965_free_gpe_resource(&vme_context->res_output_16x16_inter_modes);
823 for (i = 0; i < 2; i++) {
824 i965_free_gpe_resource(&vme_context->res_mode_decision[i]);
827 for (i = 0; i < 2; i++) {
828 i965_free_gpe_resource(&vme_context->res_mv_temporal_buffer[i]);
831 i965_free_gpe_resource(&vme_context->res_compressed_input_buffer);
832 i965_free_gpe_resource(&vme_context->res_mb_code_surface);
833 i965_free_gpe_resource(&vme_context->res_pak_uncompressed_input_buffer);
835 if (vme_context->frame_header_data) {
836 free(vme_context->frame_header_data);
837 vme_context->frame_header_data = NULL;
843 gen9_init_media_object_walker_parameter(struct intel_encoder_context *encoder_context,
844 struct gpe_encoder_kernel_walker_parameter *kernel_walker_param,
845 struct gpe_media_object_walker_parameter *walker_param)
847 memset(walker_param, 0, sizeof(*walker_param));
849 walker_param->use_scoreboard = kernel_walker_param->use_scoreboard;
851 walker_param->block_resolution.x = kernel_walker_param->resolution_x;
852 walker_param->block_resolution.y = kernel_walker_param->resolution_y;
854 walker_param->global_resolution.x = kernel_walker_param->resolution_x;
855 walker_param->global_resolution.y = kernel_walker_param->resolution_y;
857 walker_param->global_outer_loop_stride.x = kernel_walker_param->resolution_x;
858 walker_param->global_outer_loop_stride.y = 0;
860 walker_param->global_inner_loop_unit.x = 0;
861 walker_param->global_inner_loop_unit.y = kernel_walker_param->resolution_y;
863 walker_param->local_loop_exec_count = 0xFFFF; //MAX VALUE
864 walker_param->global_loop_exec_count = 0xFFFF; //MAX VALUE
866 if (kernel_walker_param->no_dependency) {
867 walker_param->scoreboard_mask = 0;
868 walker_param->use_scoreboard = 0;
869 // Raster scan walking pattern
870 walker_param->local_outer_loop_stride.x = 0;
871 walker_param->local_outer_loop_stride.y = 1;
872 walker_param->local_inner_loop_unit.x = 1;
873 walker_param->local_inner_loop_unit.y = 0;
874 walker_param->local_end.x = kernel_walker_param->resolution_x - 1;
875 walker_param->local_end.y = 0;
877 walker_param->local_end.x = 0;
878 walker_param->local_end.y = 0;
880 if (kernel_walker_param->walker_degree == VP9_45Z_DEGREE) {
882 walker_param->scoreboard_mask = 0x0F;
884 walker_param->global_loop_exec_count = 0x3FF;
885 walker_param->local_loop_exec_count = 0x3FF;
887 walker_param->global_resolution.x = (unsigned int)(kernel_walker_param->resolution_x / 2.f) + 1;
888 walker_param->global_resolution.y = 2 * kernel_walker_param->resolution_y;
890 walker_param->global_start.x = 0;
891 walker_param->global_start.y = 0;
893 walker_param->global_outer_loop_stride.x = walker_param->global_resolution.x;
894 walker_param->global_outer_loop_stride.y = 0;
896 walker_param->global_inner_loop_unit.x = 0;
897 walker_param->global_inner_loop_unit.y = walker_param->global_resolution.y;
899 walker_param->block_resolution.x = walker_param->global_resolution.x;
900 walker_param->block_resolution.y = walker_param->global_resolution.y;
902 walker_param->local_start.x = 0;
903 walker_param->local_start.y = 0;
905 walker_param->local_outer_loop_stride.x = 1;
906 walker_param->local_outer_loop_stride.y = 0;
908 walker_param->local_inner_loop_unit.x = -1;
909 walker_param->local_inner_loop_unit.y = 4;
911 walker_param->middle_loop_extra_steps = 3;
912 walker_param->mid_loop_unit_x = 0;
913 walker_param->mid_loop_unit_y = 1;
916 walker_param->scoreboard_mask = 0x0F;
917 walker_param->local_outer_loop_stride.x = 1;
918 walker_param->local_outer_loop_stride.y = 0;
919 walker_param->local_inner_loop_unit.x = -2;
920 walker_param->local_inner_loop_unit.y = 1;
926 gen9_run_kernel_media_object(VADriverContextP ctx,
927 struct intel_encoder_context *encoder_context,
928 struct i965_gpe_context *gpe_context,
930 struct gpe_media_object_parameter *param)
932 struct intel_batchbuffer *batch = encoder_context->base.batch;
933 struct vp9_encode_status_buffer_internal *status_buffer;
934 struct gen9_vp9_state *vp9_state;
935 struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
937 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
938 if (!vp9_state || !batch)
941 intel_batchbuffer_start_atomic(batch, 0x1000);
943 status_buffer = &(vp9_state->status_buffer);
944 memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
945 mi_store_data_imm.bo = status_buffer->bo;
946 mi_store_data_imm.offset = status_buffer->media_index_offset;
947 mi_store_data_imm.dw0 = media_function;
948 gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
950 intel_batchbuffer_emit_mi_flush(batch);
951 gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
952 gen8_gpe_media_object(ctx, gpe_context, batch, param);
953 gen8_gpe_media_state_flush(ctx, gpe_context, batch);
955 gen9_gpe_pipeline_end(ctx, gpe_context, batch);
957 intel_batchbuffer_end_atomic(batch);
959 intel_batchbuffer_flush(batch);
963 gen9_run_kernel_media_object_walker(VADriverContextP ctx,
964 struct intel_encoder_context *encoder_context,
965 struct i965_gpe_context *gpe_context,
967 struct gpe_media_object_walker_parameter *param)
969 struct intel_batchbuffer *batch = encoder_context->base.batch;
970 struct vp9_encode_status_buffer_internal *status_buffer;
971 struct gen9_vp9_state *vp9_state;
972 struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
974 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
975 if (!vp9_state || !batch)
978 intel_batchbuffer_start_atomic(batch, 0x1000);
980 intel_batchbuffer_emit_mi_flush(batch);
982 status_buffer = &(vp9_state->status_buffer);
983 memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
984 mi_store_data_imm.bo = status_buffer->bo;
985 mi_store_data_imm.offset = status_buffer->media_index_offset;
986 mi_store_data_imm.dw0 = media_function;
987 gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
989 gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
990 gen8_gpe_media_object_walker(ctx, gpe_context, batch, param);
991 gen8_gpe_media_state_flush(ctx, gpe_context, batch);
993 gen9_gpe_pipeline_end(ctx, gpe_context, batch);
995 intel_batchbuffer_end_atomic(batch);
997 intel_batchbuffer_flush(batch);
1001 void gen9_vp9_set_curbe_brc(VADriverContextP ctx,
1002 struct encode_state *encode_state,
1003 struct i965_gpe_context *gpe_context,
1004 struct intel_encoder_context *encoder_context,
1005 struct gen9_vp9_brc_curbe_param *param)
1007 VAEncSequenceParameterBufferVP9 *seq_param;
1008 VAEncPictureParameterBufferVP9 *pic_param;
1009 VAEncMiscParameterTypeVP9PerSegmantParam *segment_param;
1010 vp9_brc_curbe_data *cmd;
1011 double dbps_ratio, dInputBitsPerFrame;
1012 struct gen9_vp9_state *vp9_state;
1014 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1016 pic_param = param->ppic_param;
1017 seq_param = param->pseq_param;
1018 segment_param = param->psegment_param;
1020 cmd = i965_gpe_context_map_curbe(gpe_context);
1025 memset(cmd, 0, sizeof(vp9_brc_curbe_data));
1027 if (!vp9_state->dys_enabled)
1029 cmd->dw0.frame_width = pic_param->frame_width_src;
1030 cmd->dw0.frame_height = pic_param->frame_height_src;
1034 cmd->dw0.frame_width = pic_param->frame_width_dst;
1035 cmd->dw0.frame_height = pic_param->frame_height_dst;
1038 cmd->dw1.frame_type = vp9_state->picture_coding_type;
1039 cmd->dw1.segmentation_enable = 0;
1040 cmd->dw1.ref_frame_flags = vp9_state->ref_frame_flag;
1041 cmd->dw1.num_tlevels = 1;
1043 switch(param->media_state_type)
1045 case VP9_MEDIA_STATE_BRC_INIT_RESET:
1047 cmd->dw3.max_level_ratiot0 = 0;
1048 cmd->dw3.max_level_ratiot1 = 0;
1049 cmd->dw3.max_level_ratiot2 = 0;
1050 cmd->dw3.max_level_ratiot3 = 0;
1052 cmd->dw4.profile_level_max_frame = seq_param->max_frame_width *
1053 seq_param->max_frame_height;
1054 cmd->dw5.init_buf_fullness = vp9_state->init_vbv_buffer_fullness_in_bit;
1055 cmd->dw6.buf_size = vp9_state->vbv_buffer_size_in_bit;
1056 cmd->dw7.target_bit_rate = (vp9_state->target_bit_rate + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1058 cmd->dw8.max_bit_rate = (vp9_state->max_bit_rate + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1060 cmd->dw9.min_bit_rate = (vp9_state->min_bit_rate + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1062 cmd->dw10.frame_ratem = vp9_state->framerate.num;
1063 cmd->dw11.frame_rated = vp9_state->framerate.den;
1065 cmd->dw14.avbr_accuracy = 30;
1066 cmd->dw14.avbr_convergence = 150;
1068 if (encoder_context->rate_control_mode == VA_RC_CBR)
1070 cmd->dw12.brc_flag = BRC_KERNEL_CBR;
1071 cmd->dw8.max_bit_rate = cmd->dw7.target_bit_rate;
1072 cmd->dw9.min_bit_rate = 0;
1074 else if (encoder_context->rate_control_mode == VA_RC_VBR)
1076 cmd->dw12.brc_flag = BRC_KERNEL_VBR;
1080 cmd->dw12.brc_flag = BRC_KERNEL_CQL;
1081 cmd->dw16.cq_level = 30;
1083 cmd->dw12.gopp = seq_param->intra_period - 1;
1085 cmd->dw13.init_frame_width = pic_param->frame_width_src;
1086 cmd->dw13.init_frame_height = pic_param->frame_height_src;
1088 cmd->dw15.min_qp = 0;
1089 cmd->dw15.max_qp = 255;
1091 cmd->dw16.cq_level = 30;
1093 cmd->dw17.enable_dynamic_scaling = vp9_state->dys_in_use;
1094 cmd->dw17.brc_overshoot_cbr_pct = 150;
1096 dInputBitsPerFrame = (double)cmd->dw8.max_bit_rate * (double)vp9_state->framerate.den / (double)vp9_state->framerate.num;
1097 dbps_ratio = dInputBitsPerFrame / ((double)vp9_state->vbv_buffer_size_in_bit / 30.0);
1098 if (dbps_ratio < 0.1)
1100 if (dbps_ratio > 3.5)
1103 *param->pbrc_init_reset_buf_size_in_bits = cmd->dw6.buf_size;
1104 *param->pbrc_init_reset_input_bits_per_frame = dInputBitsPerFrame;
1106 cmd->dw18.pframe_deviation_threshold0 = (uint32_t)(-50 * pow(0.90, dbps_ratio));
1107 cmd->dw18.pframe_deviation_threshold1 = (uint32_t)(-50 * pow(0.66, dbps_ratio));
1108 cmd->dw18.pframe_deviation_threshold2 = (uint32_t)(-50 * pow(0.46, dbps_ratio));
1109 cmd->dw18.pframe_deviation_threshold3 = (uint32_t)(-50 * pow(0.3, dbps_ratio));
1110 cmd->dw19.pframe_deviation_threshold4 = (uint32_t)(50 * pow(0.3, dbps_ratio));
1111 cmd->dw19.pframe_deviation_threshold5 = (uint32_t)(50 * pow(0.46, dbps_ratio));
1112 cmd->dw19.pframe_deviation_threshold6 = (uint32_t)(50 * pow(0.7, dbps_ratio));
1113 cmd->dw19.pframe_deviation_threshold7 = (uint32_t)(50 * pow(0.9, dbps_ratio));
1115 cmd->dw20.vbr_deviation_threshold0 = (uint32_t)(-50 * pow(0.9, dbps_ratio));
1116 cmd->dw20.vbr_deviation_threshold1 = (uint32_t)(-50 * pow(0.7, dbps_ratio));
1117 cmd->dw20.vbr_deviation_threshold2 = (uint32_t)(-50 * pow(0.5, dbps_ratio));
1118 cmd->dw20.vbr_deviation_threshold3 = (uint32_t)(-50 * pow(0.3, dbps_ratio));
1119 cmd->dw21.vbr_deviation_threshold4 = (uint32_t)(100 * pow(0.4, dbps_ratio));
1120 cmd->dw21.vbr_deviation_threshold5 = (uint32_t)(100 * pow(0.5, dbps_ratio));
1121 cmd->dw21.vbr_deviation_threshold6 = (uint32_t)(100 * pow(0.75, dbps_ratio));
1122 cmd->dw21.vbr_deviation_threshold7 = (uint32_t)(100 * pow(0.9, dbps_ratio));
1124 cmd->dw22.kframe_deviation_threshold0 = (uint32_t)(-50 * pow(0.8, dbps_ratio));
1125 cmd->dw22.kframe_deviation_threshold1 = (uint32_t)(-50 * pow(0.6, dbps_ratio));
1126 cmd->dw22.kframe_deviation_threshold2 = (uint32_t)(-50 * pow(0.34, dbps_ratio));
1127 cmd->dw22.kframe_deviation_threshold3 = (uint32_t)(-50 * pow(0.2, dbps_ratio));
1128 cmd->dw23.kframe_deviation_threshold4 = (uint32_t)(50 * pow(0.2, dbps_ratio));
1129 cmd->dw23.kframe_deviation_threshold5 = (uint32_t)(50 * pow(0.4, dbps_ratio));
1130 cmd->dw23.kframe_deviation_threshold6 = (uint32_t)(50 * pow(0.66, dbps_ratio));
1131 cmd->dw23.kframe_deviation_threshold7 = (uint32_t)(50 * pow(0.9, dbps_ratio));
1135 case VP9_MEDIA_STATE_BRC_UPDATE:
1137 cmd->dw15.min_qp = 0;
1138 cmd->dw15.max_qp = 255;
1140 cmd->dw25.frame_number = param->frame_number;
1142 // Used in dynamic scaling. set to zero for now
1143 cmd->dw27.hrd_buffer_fullness_upper_limit = 0;
1144 cmd->dw28.hrd_buffer_fullness_lower_limit = 0;
1146 if (pic_param->pic_flags.bits.segmentation_enabled) {
1147 cmd->dw32.seg_delta_qp0 = segment_param->seg_data[0].segment_qindex_delta;
1148 cmd->dw32.seg_delta_qp1 = segment_param->seg_data[1].segment_qindex_delta;
1149 cmd->dw32.seg_delta_qp2 = segment_param->seg_data[2].segment_qindex_delta;
1150 cmd->dw32.seg_delta_qp3 = segment_param->seg_data[3].segment_qindex_delta;
1152 cmd->dw33.seg_delta_qp4 = segment_param->seg_data[4].segment_qindex_delta;
1153 cmd->dw33.seg_delta_qp5 = segment_param->seg_data[5].segment_qindex_delta;
1154 cmd->dw33.seg_delta_qp6 = segment_param->seg_data[6].segment_qindex_delta;
1155 cmd->dw33.seg_delta_qp7 = segment_param->seg_data[7].segment_qindex_delta;
1158 //cmd->dw34.temporal_id = pPicParams->temporal_idi;
1159 cmd->dw34.temporal_id = 0;
1160 cmd->dw34.multi_ref_qp_check = param->multi_ref_qp_check;
1162 cmd->dw35.max_num_pak_passes = param->brc_num_pak_passes;
1163 cmd->dw35.sync_async = 0;
1164 cmd->dw35.mbrc = param->mbbrc_enabled;
1165 if (*param->pbrc_init_current_target_buf_full_in_bits >
1166 ((double)(*param->pbrc_init_reset_buf_size_in_bits))) {
1167 *param->pbrc_init_current_target_buf_full_in_bits -=
1168 (double)(*param->pbrc_init_reset_buf_size_in_bits);
1169 cmd->dw35.overflow = 1;
1172 cmd->dw35.overflow = 0;
1174 cmd->dw24.target_size = (uint32_t)(*param->pbrc_init_current_target_buf_full_in_bits);
1176 cmd->dw36.segmentation = pic_param->pic_flags.bits.segmentation_enabled;
1178 *param->pbrc_init_current_target_buf_full_in_bits += *param->pbrc_init_reset_input_bits_per_frame;
1180 cmd->dw38.qdelta_ydc = pic_param->luma_dc_qindex_delta;
1181 cmd->dw38.qdelta_uvdc = pic_param->chroma_dc_qindex_delta;
1182 cmd->dw38.qdelta_uvac = pic_param->chroma_ac_qindex_delta;
1186 case VP9_MEDIA_STATE_ENC_I_FRAME_DIST:
1187 cmd->dw2.intra_mode_disable = 0;
1193 cmd->dw48.brc_y4x_input_bti = VP9_BTI_BRC_SRCY4X_G9;
1194 cmd->dw49.brc_vme_coarse_intra_input_bti = VP9_BTI_BRC_VME_COARSE_INTRA_G9;
1195 cmd->dw50.brc_history_buffer_bti = VP9_BTI_BRC_HISTORY_G9;
1196 cmd->dw51.brc_const_data_input_bti = VP9_BTI_BRC_CONSTANT_DATA_G9;
1197 cmd->dw52.brc_distortion_bti = VP9_BTI_BRC_DISTORTION_G9;
1198 cmd->dw53.brc_mmdk_pak_output_bti = VP9_BTI_BRC_MMDK_PAK_OUTPUT_G9;
1199 cmd->dw54.brc_enccurbe_input_bti = VP9_BTI_BRC_MBENC_CURBE_INPUT_G9;
1200 cmd->dw55.brc_enccurbe_output_bti = VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9;
1201 cmd->dw56.brc_pic_state_input_bti = VP9_BTI_BRC_PIC_STATE_INPUT_G9;
1202 cmd->dw57.brc_pic_state_output_bti = VP9_BTI_BRC_PIC_STATE_OUTPUT_G9;
1203 cmd->dw58.brc_seg_state_input_bti = VP9_BTI_BRC_SEGMENT_STATE_INPUT_G9;
1204 cmd->dw59.brc_seg_state_output_bti = VP9_BTI_BRC_SEGMENT_STATE_OUTPUT_G9;
1205 cmd->dw60.brc_bitstream_size_data_bti = VP9_BTI_BRC_BITSTREAM_SIZE_G9;
1206 cmd->dw61.brc_hfw_data_output_bti = VP9_BTI_BRC_HFW_DATA_G9;
1208 i965_gpe_context_unmap_curbe(gpe_context);
1213 gen9_brc_init_reset_add_surfaces_vp9(VADriverContextP ctx,
1214 struct encode_state *encode_state,
1215 struct intel_encoder_context *encoder_context,
1216 struct i965_gpe_context *gpe_context)
1218 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1220 gen9_add_buffer_gpe_surface(ctx,
1222 &vme_context->res_brc_history_buffer,
1224 vme_context->res_brc_history_buffer.size,
1226 VP9_BTI_BRC_HISTORY_G9);
1228 gen9_add_buffer_2d_gpe_surface(ctx,
1230 &vme_context->s4x_memv_distortion_buffer,
1232 I965_SURFACEFORMAT_R8_UNORM,
1233 VP9_BTI_BRC_DISTORTION_G9);
1236 /* The function related with BRC */
1238 gen9_vp9_brc_init_reset_kernel(VADriverContextP ctx,
1239 struct encode_state *encode_state,
1240 struct intel_encoder_context *encoder_context)
1242 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1243 struct vp9_brc_context *brc_context = &vme_context->brc_context;
1244 struct gpe_media_object_parameter media_object_param;
1245 struct i965_gpe_context *gpe_context;
1246 int gpe_index = VP9_BRC_INIT;
1247 int media_function = VP9_MEDIA_STATE_BRC_INIT_RESET;
1248 struct gen9_vp9_brc_curbe_param brc_initreset_curbe;
1249 VAEncPictureParameterBufferVP9 *pic_param;
1250 struct gen9_vp9_state *vp9_state;
1252 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1254 if (!vp9_state || !vp9_state->pic_param)
1255 return VA_STATUS_ERROR_INVALID_PARAMETER;
1257 pic_param = vp9_state->pic_param;
1259 if (vp9_state->brc_inited)
1260 gpe_index = VP9_BRC_RESET;
1262 gpe_context = &brc_context->gpe_contexts[gpe_index];
1264 gen8_gpe_context_init(ctx, gpe_context);
1265 gen9_gpe_reset_binding_table(ctx, gpe_context);
1267 brc_initreset_curbe.media_state_type = media_function;
1268 brc_initreset_curbe.curr_frame = pic_param->reconstructed_frame;
1269 brc_initreset_curbe.ppic_param = vp9_state->pic_param;
1270 brc_initreset_curbe.pseq_param = vp9_state->seq_param;
1271 brc_initreset_curbe.psegment_param = vp9_state->segment_param;
1272 brc_initreset_curbe.frame_width = vp9_state->frame_width;
1273 brc_initreset_curbe.frame_height = vp9_state->frame_height;
1274 brc_initreset_curbe.pbrc_init_current_target_buf_full_in_bits =
1275 &vp9_state->brc_init_current_target_buf_full_in_bits;
1276 brc_initreset_curbe.pbrc_init_reset_buf_size_in_bits =
1277 &vp9_state->brc_init_reset_buf_size_in_bits;
1278 brc_initreset_curbe.pbrc_init_reset_input_bits_per_frame =
1279 &vp9_state->brc_init_reset_input_bits_per_frame;
1280 brc_initreset_curbe.picture_coding_type = vp9_state->picture_coding_type;
1281 brc_initreset_curbe.initbrc = !vp9_state->brc_inited;
1282 brc_initreset_curbe.mbbrc_enabled = 0;
1283 brc_initreset_curbe.ref_frame_flag = vp9_state->ref_frame_flag;
1285 vme_context->pfn_set_curbe_brc(ctx, encode_state,
1288 &brc_initreset_curbe);
1290 gen9_brc_init_reset_add_surfaces_vp9(ctx, encode_state, encoder_context, gpe_context);
1291 gen8_gpe_setup_interface_data(ctx, gpe_context);
1293 memset(&media_object_param, 0, sizeof(media_object_param));
1294 gen9_run_kernel_media_object(ctx, encoder_context, gpe_context, media_function, &media_object_param);
1296 return VA_STATUS_SUCCESS;
1300 gen9_brc_intra_dist_add_surfaces_vp9(VADriverContextP ctx,
1301 struct encode_state *encode_state,
1302 struct intel_encoder_context *encoder_context,
1303 struct i965_gpe_context *gpe_context)
1305 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1307 struct object_surface *obj_surface;
1308 struct gen9_surface_vp9 *vp9_priv_surface;
1310 /* sScaled4xSurface surface */
1311 obj_surface = encode_state->reconstructed_object;
1313 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
1315 obj_surface = vp9_priv_surface->scaled_4x_surface_obj;
1316 gen9_add_2d_gpe_surface(ctx, gpe_context,
1319 I965_SURFACEFORMAT_R8_UNORM,
1320 VP9_BTI_BRC_SRCY4X_G9
1323 gen9_add_adv_gpe_surface(ctx, gpe_context,
1325 VP9_BTI_BRC_VME_COARSE_INTRA_G9);
1327 gen9_add_buffer_2d_gpe_surface(ctx,
1329 &vme_context->s4x_memv_distortion_buffer,
1331 I965_SURFACEFORMAT_R8_UNORM,
1332 VP9_BTI_BRC_DISTORTION_G9);
1337 /* The function related with BRC */
1339 gen9_vp9_brc_intra_dist_kernel(VADriverContextP ctx,
1340 struct encode_state *encode_state,
1341 struct intel_encoder_context *encoder_context)
1343 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1344 struct vp9_brc_context *brc_context = &vme_context->brc_context;
1345 struct i965_gpe_context *gpe_context;
1346 int gpe_index = VP9_BRC_INTRA_DIST;
1347 int media_function = VP9_MEDIA_STATE_ENC_I_FRAME_DIST;
1348 struct gen9_vp9_brc_curbe_param brc_intra_dist_curbe;
1349 VAEncPictureParameterBufferVP9 *pic_param;
1350 struct gen9_vp9_state *vp9_state;
1351 struct gpe_media_object_walker_parameter media_object_walker_param;
1352 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
1354 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1356 if (!vp9_state || !vp9_state->pic_param)
1357 return VA_STATUS_ERROR_INVALID_PARAMETER;
1359 pic_param = vp9_state->pic_param;
1361 gpe_context = &brc_context->gpe_contexts[gpe_index];
1363 gen8_gpe_context_init(ctx, gpe_context);
1364 gen9_gpe_reset_binding_table(ctx, gpe_context);
1366 brc_intra_dist_curbe.media_state_type = media_function;
1367 brc_intra_dist_curbe.curr_frame = pic_param->reconstructed_frame;
1368 brc_intra_dist_curbe.ppic_param = vp9_state->pic_param;
1369 brc_intra_dist_curbe.pseq_param = vp9_state->seq_param;
1370 brc_intra_dist_curbe.psegment_param = vp9_state->segment_param;
1371 brc_intra_dist_curbe.frame_width = vp9_state->frame_width;
1372 brc_intra_dist_curbe.frame_height = vp9_state->frame_height;
1373 brc_intra_dist_curbe.pbrc_init_current_target_buf_full_in_bits =
1374 &vp9_state->brc_init_current_target_buf_full_in_bits;
1375 brc_intra_dist_curbe.pbrc_init_reset_buf_size_in_bits =
1376 &vp9_state->brc_init_reset_buf_size_in_bits;
1377 brc_intra_dist_curbe.pbrc_init_reset_input_bits_per_frame =
1378 &vp9_state->brc_init_reset_input_bits_per_frame;
1379 brc_intra_dist_curbe.picture_coding_type = vp9_state->picture_coding_type;
1380 brc_intra_dist_curbe.initbrc = !vp9_state->brc_inited;
1381 brc_intra_dist_curbe.mbbrc_enabled = 0;
1382 brc_intra_dist_curbe.ref_frame_flag = vp9_state->ref_frame_flag;
1384 vme_context->pfn_set_curbe_brc(ctx, encode_state,
1387 &brc_intra_dist_curbe);
1389 /* zero distortion buffer */
1390 i965_zero_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
1392 gen9_brc_intra_dist_add_surfaces_vp9(ctx, encode_state, encoder_context, gpe_context);
1393 gen8_gpe_setup_interface_data(ctx, gpe_context);
1395 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
1396 kernel_walker_param.resolution_x = vme_context->downscaled_width_in_mb4x;
1397 kernel_walker_param.resolution_y = vme_context->downscaled_height_in_mb4x;
1398 kernel_walker_param.no_dependency = 1;
1400 gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
1402 gen9_run_kernel_media_object_walker(ctx, encoder_context,
1405 &media_object_walker_param);
1407 return VA_STATUS_SUCCESS;
1411 intel_vp9enc_construct_picstate_batchbuf(VADriverContextP ctx,
1412 struct encode_state *encode_state,
1413 struct intel_encoder_context *encoder_context,
1414 struct i965_gpe_resource *gpe_resource)
1416 struct gen9_vp9_state *vp9_state;
1417 VAEncPictureParameterBufferVP9 *pic_param;
1418 int frame_width_minus1, frame_height_minus1;
1419 int is_lossless = 0;
1420 int is_intra_only = 0;
1421 unsigned int last_frame_type;
1422 unsigned int ref_flags;
1423 unsigned int use_prev_frame_mvs, adapt_flag;
1424 struct gen9_surface_vp9 *vp9_surface = NULL;
1425 struct object_surface *obj_surface = NULL;
1426 uint32_t scale_h = 0;
1427 uint32_t scale_w = 0;
1431 unsigned int *cmd_ptr, cmd_value, tmp;
1433 pdata = i965_map_gpe_resource(gpe_resource);
1434 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1436 if (!vp9_state || !vp9_state->pic_param || !pdata)
1439 pic_param = vp9_state->pic_param;
1440 frame_width_minus1 = ALIGN(pic_param->frame_width_dst, 8) - 1;
1441 frame_height_minus1 = ALIGN(pic_param->frame_height_dst, 8) - 1;
1442 if ((pic_param->luma_ac_qindex == 0) &&
1443 (pic_param->luma_dc_qindex_delta == 0) &&
1444 (pic_param->chroma_ac_qindex_delta == 0) &&
1445 (pic_param->chroma_dc_qindex_delta == 0))
1448 if (pic_param->pic_flags.bits.frame_type)
1449 is_intra_only = pic_param->pic_flags.bits.intra_only;
1451 last_frame_type = vp9_state->vp9_last_frame.frame_type;
1453 use_prev_frame_mvs = 0;
1454 if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) {
1455 last_frame_type = 0;
1458 ref_flags = ((pic_param->ref_flags.bits.ref_arf_sign_bias << 9) |
1459 (pic_param->ref_flags.bits.ref_gf_sign_bias << 8) |
1460 (pic_param->ref_flags.bits.ref_last_sign_bias << 7)
1462 if (!pic_param->pic_flags.bits.error_resilient_mode &&
1463 (pic_param->frame_width_dst == vp9_state->vp9_last_frame.frame_width) &&
1464 (pic_param->frame_height_dst == vp9_state->vp9_last_frame.frame_height) &&
1465 !pic_param->pic_flags.bits.intra_only &&
1466 vp9_state->vp9_last_frame.show_frame &&
1467 ((vp9_state->vp9_last_frame.frame_type == HCP_VP9_INTER_FRAME) &&
1468 !vp9_state->vp9_last_frame.intra_only)
1470 use_prev_frame_mvs = 1;
1473 if (!pic_param->pic_flags.bits.error_resilient_mode &&
1474 !pic_param->pic_flags.bits.frame_parallel_decoding_mode)
1477 for (i = 0; i < 4; i++) {
1478 uint32_t non_first_pass;
1483 cmd_ptr =(unsigned int *)(pdata + i * VP9_PIC_STATE_BUFFER_SIZE);
1485 *cmd_ptr++ = (HCP_VP9_PIC_STATE | (33 - 2));
1486 *cmd_ptr++ = (frame_height_minus1 << 16 |
1487 frame_width_minus1);
1489 *cmd_ptr++ = ( 0 << 31 | /* disable segment_in */
1490 0 << 30 | /* disable segment_out */
1491 is_lossless << 29 | /* loseless */
1492 (pic_param->pic_flags.bits.segmentation_enabled && pic_param->pic_flags.bits.segmentation_temporal_update) << 28 | /* temporal update */
1493 (pic_param->pic_flags.bits.segmentation_enabled && pic_param->pic_flags.bits.segmentation_update_map) << 27 | /* temporal update */
1494 (pic_param->pic_flags.bits.segmentation_enabled << 26) |
1495 (pic_param->sharpness_level << 23) |
1496 (pic_param->filter_level << 17) |
1497 (pic_param->pic_flags.bits.frame_parallel_decoding_mode << 16) |
1498 (pic_param->pic_flags.bits.error_resilient_mode << 15) |
1499 (pic_param->pic_flags.bits.refresh_frame_context << 14) |
1500 (last_frame_type << 13) |
1501 (vp9_state->tx_mode == TX_MODE_SELECT) << 12 |
1502 (pic_param->pic_flags.bits.comp_prediction_mode == REFERENCE_MODE_SELECT) << 11 |
1503 (use_prev_frame_mvs) << 10 |
1505 (pic_param->pic_flags.bits.mcomp_filter_type << 4) |
1506 (pic_param->pic_flags.bits.allow_high_precision_mv << 3) |
1507 (is_intra_only << 2) |
1509 (pic_param->pic_flags.bits.frame_type) << 0);
1511 *cmd_ptr++ =((0 << 28) | /* VP9Profile0 */
1512 (0 << 24) | /* 8-bit depth */
1513 (0 << 22) | /* only 420 format */
1514 (0 << 0) | /* sse statistics */
1515 (pic_param->log2_tile_rows << 8) |
1516 (pic_param->log2_tile_columns << 0));
1519 if (pic_param->pic_flags.bits.frame_type &&
1520 !pic_param->pic_flags.bits.intra_only) {
1521 for (j = 0; j < 3; j++) {
1522 obj_surface = encode_state->reference_objects[j];
1525 if (obj_surface && obj_surface->private_data) {
1526 vp9_surface = obj_surface->private_data;
1527 scale_w = (vp9_surface->frame_width << 14) / pic_param->frame_width_dst;
1528 scale_h = (vp9_surface->frame_height << 14) / pic_param->frame_height_dst;
1529 *cmd_ptr++ = (scale_w << 16 |
1540 for(j = 0; j < 3; j++) {
1541 obj_surface = encode_state->reference_objects[j];
1544 if (obj_surface && obj_surface->private_data) {
1545 vp9_surface = obj_surface->private_data;
1546 *cmd_ptr++ = (vp9_surface->frame_height - 1) << 16 |
1547 (vp9_surface->frame_width - 1);
1554 *cmd_ptr++ = (1 << 1);
1558 *cmd_ptr++ = ((1 << 25) | /* header insertation for VP9 */
1559 (0 << 24) | /* tail insertation */
1560 (pic_param->luma_ac_qindex << 16) |
1561 0 /* compressed header bin count */);
1564 tmp = intel_convert_sign_mag(pic_param->luma_dc_qindex_delta, 5);
1565 cmd_value = (tmp << 16);
1566 tmp = intel_convert_sign_mag(pic_param->chroma_dc_qindex_delta, 5);
1567 cmd_value |= (tmp << 8);
1568 tmp = intel_convert_sign_mag(pic_param->chroma_ac_qindex_delta, 5);
1570 *cmd_ptr++ = cmd_value;
1572 tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[0], 7);
1574 tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[1], 7);
1575 cmd_value |= (tmp << 8);
1576 tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[2], 7);
1577 cmd_value |= (tmp << 16);
1578 tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[3], 7);
1579 cmd_value |= (tmp << 24);
1580 *cmd_ptr++ = cmd_value;
1583 tmp = intel_convert_sign_mag(pic_param->mode_lf_delta[0], 7);
1585 tmp = intel_convert_sign_mag(pic_param->mode_lf_delta[1], 7);
1586 cmd_value |= (tmp << 8);
1587 *cmd_ptr++ = cmd_value;
1590 *cmd_ptr++ = vp9_state->frame_header.bit_offset_ref_lf_delta |
1591 (vp9_state->frame_header.bit_offset_mode_lf_delta << 16);
1592 *cmd_ptr++ = vp9_state->frame_header.bit_offset_qindex |
1593 (vp9_state->frame_header.bit_offset_lf_level << 16);
1596 *cmd_ptr++ = (1 << 26 | (1 << 25) |
1597 non_first_pass << 16);
1599 *cmd_ptr++ = (1 << 31) | (256);
1602 *cmd_ptr++ = (0 << 31) | 1;
1604 /* dw22-dw24. Frame_delta_qindex_range */
1609 /* dw25-26. frame_delta_lf_range */
1613 /* dw27. frame_delta_lf_min */
1622 *cmd_ptr++ = (0 << 30) | 1;
1624 *cmd_ptr++ = vp9_state->frame_header.bit_offset_first_partition_size;
1627 *cmd_ptr++ = MI_BATCH_BUFFER_END;
1630 i965_unmap_gpe_resource(gpe_resource);
1634 gen9_brc_update_add_surfaces_vp9(VADriverContextP ctx,
1635 struct encode_state *encode_state,
1636 struct intel_encoder_context *encoder_context,
1637 struct i965_gpe_context *brc_gpe_context,
1638 struct i965_gpe_context *mbenc_gpe_context)
1640 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1642 /* 0. BRC history buffer */
1643 gen9_add_buffer_gpe_surface(ctx,
1645 &vme_context->res_brc_history_buffer,
1647 vme_context->res_brc_history_buffer.size,
1649 VP9_BTI_BRC_HISTORY_G9);
1651 /* 1. Constant data buffer */
1652 gen9_add_buffer_gpe_surface(ctx,
1654 &vme_context->res_brc_const_data_buffer,
1656 vme_context->res_brc_const_data_buffer.size,
1658 VP9_BTI_BRC_CONSTANT_DATA_G9);
1660 /* 2. Distortion 2D surface buffer */
1661 gen9_add_buffer_2d_gpe_surface(ctx,
1663 &vme_context->s4x_memv_distortion_buffer,
1665 I965_SURFACEFORMAT_R8_UNORM,
1666 VP9_BTI_BRC_DISTORTION_G9);
1669 gen9_add_buffer_gpe_surface(ctx,
1671 &vme_context->res_brc_mmdk_pak_buffer,
1673 vme_context->res_brc_mmdk_pak_buffer.size,
1675 VP9_BTI_BRC_MMDK_PAK_OUTPUT_G9);
1676 /* 4. Mbenc curbe input buffer */
1677 gen9_add_dri_buffer_gpe_surface(ctx,
1679 mbenc_gpe_context->curbe.bo,
1681 ALIGN(mbenc_gpe_context->curbe.length, 64),
1682 mbenc_gpe_context->curbe.offset,
1683 VP9_BTI_BRC_MBENC_CURBE_INPUT_G9);
1684 /* 5. Mbenc curbe output buffer */
1685 gen9_add_dri_buffer_gpe_surface(ctx,
1687 mbenc_gpe_context->curbe.bo,
1689 ALIGN(mbenc_gpe_context->curbe.length, 64),
1690 mbenc_gpe_context->curbe.offset,
1691 VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9);
1693 /* 6. BRC_PIC_STATE read buffer */
1694 gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1695 &vme_context->res_pic_state_brc_read_buffer,
1697 vme_context->res_pic_state_brc_read_buffer.size,
1699 VP9_BTI_BRC_PIC_STATE_INPUT_G9);
1701 /* 7. BRC_PIC_STATE write buffer */
1702 gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1703 &vme_context->res_pic_state_brc_write_hfw_read_buffer,
1705 vme_context->res_pic_state_brc_write_hfw_read_buffer.size,
1707 VP9_BTI_BRC_PIC_STATE_OUTPUT_G9);
1709 /* 8. SEGMENT_STATE read buffer */
1710 gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1711 &vme_context->res_seg_state_brc_read_buffer,
1713 vme_context->res_seg_state_brc_read_buffer.size,
1715 VP9_BTI_BRC_SEGMENT_STATE_INPUT_G9);
1717 /* 9. SEGMENT_STATE write buffer */
1718 gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1719 &vme_context->res_seg_state_brc_write_buffer,
1721 vme_context->res_seg_state_brc_write_buffer.size,
1723 VP9_BTI_BRC_SEGMENT_STATE_OUTPUT_G9);
1725 /* 10. Bitstream size buffer */
1726 gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1727 &vme_context->res_brc_bitstream_size_buffer,
1729 vme_context->res_brc_bitstream_size_buffer.size,
1731 VP9_BTI_BRC_BITSTREAM_SIZE_G9);
1733 gen9_add_buffer_gpe_surface(ctx, brc_gpe_context,
1734 &vme_context->res_brc_hfw_data_buffer,
1736 vme_context->res_brc_hfw_data_buffer.size,
1738 VP9_BTI_BRC_HFW_DATA_G9);
1744 gen9_vp9_brc_update_kernel(VADriverContextP ctx,
1745 struct encode_state *encode_state,
1746 struct intel_encoder_context *encoder_context)
1748 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1749 struct vp9_brc_context *brc_context = &vme_context->brc_context;
1750 struct i965_gpe_context *brc_gpe_context, *mbenc_gpe_context;
1751 int mbenc_index, gpe_index = VP9_BRC_UPDATE;
1752 int media_function = VP9_MEDIA_STATE_BRC_UPDATE;
1754 struct gen9_vp9_brc_curbe_param brc_update_curbe_param;
1755 VAEncPictureParameterBufferVP9 *pic_param;
1756 struct gen9_vp9_state *vp9_state;
1757 struct gen9_vp9_mbenc_curbe_param mbenc_curbe_param;
1758 struct gpe_media_object_parameter media_object_param;
1760 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1761 if (!vp9_state || !vp9_state->pic_param)
1762 return VA_STATUS_ERROR_INVALID_PARAMETER;
1764 pic_param = vp9_state->pic_param;
1765 // Setup VP9 MbEnc Curbe
1766 if (vp9_state->picture_coding_type) {
1767 mbenc_function = VP9_MEDIA_STATE_MBENC_P;
1768 mbenc_index = VP9_MBENC_IDX_INTER;
1770 mbenc_function = VP9_MEDIA_STATE_MBENC_I_32x32;
1771 mbenc_index = VP9_MBENC_IDX_KEY_32x32;
1774 mbenc_gpe_context = &(vme_context->mbenc_context.gpe_contexts[mbenc_index]);
1776 memset(&mbenc_curbe_param, 0, sizeof(mbenc_curbe_param));
1778 mbenc_curbe_param.ppic_param = vp9_state->pic_param;
1779 mbenc_curbe_param.pseq_param = vp9_state->seq_param;
1780 mbenc_curbe_param.psegment_param = vp9_state->segment_param;
1781 //mbenc_curbe_param.ppRefList = &(vp9_state->pRefList[0]);
1782 mbenc_curbe_param.last_ref_obj = vp9_state->last_ref_obj;
1783 mbenc_curbe_param.golden_ref_obj = vp9_state->golden_ref_obj;
1784 mbenc_curbe_param.alt_ref_obj = vp9_state->alt_ref_obj;
1785 mbenc_curbe_param.frame_width_in_mb = ALIGN(vp9_state->frame_width, 16) / 16;
1786 mbenc_curbe_param.frame_height_in_mb = ALIGN(vp9_state->frame_height, 16) / 16;
1787 mbenc_curbe_param.hme_enabled = vp9_state->hme_enabled;
1788 mbenc_curbe_param.ref_frame_flag = vp9_state->ref_frame_flag;
1789 mbenc_curbe_param.multi_ref_qp_check = vp9_state->multi_ref_qp_check;
1790 mbenc_curbe_param.picture_coding_type = vp9_state->picture_coding_type;
1791 mbenc_curbe_param.media_state_type = mbenc_function;
1793 vme_context->pfn_set_curbe_mbenc(ctx, encode_state,
1796 &mbenc_curbe_param);
1798 vp9_state->mbenc_curbe_set_in_brc_update = true;
1800 brc_gpe_context = &brc_context->gpe_contexts[gpe_index];
1802 gen8_gpe_context_init(ctx, brc_gpe_context);
1803 gen9_gpe_reset_binding_table(ctx, brc_gpe_context);
1805 memset(&brc_update_curbe_param, 0, sizeof(brc_update_curbe_param));
1807 // Setup BRC Update Curbe
1808 brc_update_curbe_param.media_state_type = media_function;
1809 brc_update_curbe_param.curr_frame = pic_param->reconstructed_frame;
1810 brc_update_curbe_param.ppic_param = vp9_state->pic_param;
1811 brc_update_curbe_param.pseq_param = vp9_state->seq_param;
1812 brc_update_curbe_param.psegment_param = vp9_state->segment_param;
1813 brc_update_curbe_param.picture_coding_type = vp9_state->picture_coding_type;
1814 brc_update_curbe_param.frame_width_in_mb = ALIGN(vp9_state->frame_width, 16) / 16;
1815 brc_update_curbe_param.frame_height_in_mb = ALIGN(vp9_state->frame_height, 16) / 16;
1816 brc_update_curbe_param.hme_enabled = vp9_state->hme_enabled;
1817 brc_update_curbe_param.b_used_ref = 1;
1818 brc_update_curbe_param.frame_number = vp9_state->frame_number;
1819 brc_update_curbe_param.ref_frame_flag = vp9_state->ref_frame_flag;
1820 brc_update_curbe_param.mbbrc_enabled = 0;
1821 brc_update_curbe_param.multi_ref_qp_check = vp9_state->multi_ref_qp_check;
1822 brc_update_curbe_param.brc_num_pak_passes = vp9_state->num_pak_passes;
1824 brc_update_curbe_param.pbrc_init_current_target_buf_full_in_bits =
1825 &vp9_state->brc_init_current_target_buf_full_in_bits;
1826 brc_update_curbe_param.pbrc_init_reset_buf_size_in_bits =
1827 &vp9_state->brc_init_reset_buf_size_in_bits;
1828 brc_update_curbe_param.pbrc_init_reset_input_bits_per_frame =
1829 &vp9_state->brc_init_reset_input_bits_per_frame;
1831 vme_context->pfn_set_curbe_brc(ctx, encode_state,
1834 &brc_update_curbe_param);
1837 // Check if the constant data surface is present
1838 if (vp9_state->brc_constant_buffer_supported)
1840 char *brc_const_buffer;
1841 brc_const_buffer = i965_map_gpe_resource(&vme_context->res_brc_const_data_buffer);
1843 if (!brc_const_buffer)
1844 return VA_STATUS_ERROR_OPERATION_FAILED;
1846 if (vp9_state->picture_coding_type)
1847 memcpy(brc_const_buffer, vp9_brc_const_data_p_g9,
1848 sizeof(vp9_brc_const_data_p_g9));
1850 memcpy(brc_const_buffer, vp9_brc_const_data_i_g9,
1851 sizeof(vp9_brc_const_data_i_g9));
1853 i965_unmap_gpe_resource(&vme_context->res_brc_const_data_buffer);
1856 if (pic_param->pic_flags.bits.segmentation_enabled)
1858 //reallocate the vme_state->mb_segment_map_surface
1859 /* this will be added later */
1863 pic_param->filter_level = 0;
1864 // clear the filter level value in picParams ebfore programming pic state, as this value will be determined and updated by BRC.
1865 intel_vp9enc_construct_picstate_batchbuf(ctx, encode_state,
1866 encoder_context, &vme_context->res_pic_state_brc_read_buffer);
1869 gen9_brc_update_add_surfaces_vp9(ctx, encode_state,
1874 gen8_gpe_setup_interface_data(ctx, brc_gpe_context);
1875 memset(&media_object_param, 0, sizeof(media_object_param));
1876 gen9_run_kernel_media_object(ctx, encoder_context,
1879 &media_object_param);
1880 return VA_STATUS_SUCCESS;
1884 void gen9_vp9_set_curbe_me(VADriverContextP ctx,
1885 struct encode_state *encode_state,
1886 struct i965_gpe_context *gpe_context,
1887 struct intel_encoder_context *encoder_context,
1888 struct gen9_vp9_me_curbe_param *param)
1890 vp9_me_curbe_data *me_cmd;
1891 int enc_media_state;
1893 unsigned int width, height;
1894 uint32_t l0_ref_frames;
1895 uint32_t scale_factor;
1897 if (param->b16xme_enabled) {
1898 if (param->use_16x_me)
1899 me_mode = VP9_ENC_ME16X_BEFORE_ME4X;
1901 me_mode = VP9_ENC_ME4X_AFTER_ME16X;
1903 me_mode = VP9_ENC_ME4X_ONLY;
1906 if (me_mode == VP9_ENC_ME16X_BEFORE_ME4X)
1911 if (param->use_16x_me)
1912 enc_media_state = VP9_MEDIA_STATE_16X_ME;
1914 enc_media_state = VP9_MEDIA_STATE_4X_ME;
1916 me_cmd = i965_gpe_context_map_curbe(gpe_context);
1921 memset(me_cmd, 0, sizeof(vp9_me_curbe_data));
1923 me_cmd->dw1.max_num_mvs = 0x10;
1924 me_cmd->dw1.bi_weight = 0x00;
1926 me_cmd->dw2.max_num_su = 0x39;
1927 me_cmd->dw2.max_len_sp = 0x39;
1929 me_cmd->dw3.sub_mb_part_mask = 0x77;
1930 me_cmd->dw3.inter_sad = 0x00;
1931 me_cmd->dw3.intra_sad = 0x00;
1932 me_cmd->dw3.bme_disable_fbr = 0x01;
1933 me_cmd->dw3.sub_pel_mode = 0x03;
1935 width = param->frame_width / scale_factor;
1936 height = param->frame_height / scale_factor;
1938 me_cmd->dw4.picture_width = ALIGN(width, 16) / 16;
1939 me_cmd->dw4.picture_height_minus1 = ALIGN(height, 16) / 16 - 1;
1941 me_cmd->dw5.ref_width = 0x30;
1942 me_cmd->dw5.ref_height = 0x28;
1944 if (enc_media_state == VP9_MEDIA_STATE_4X_ME)
1945 me_cmd->dw6.write_distortions = 0x01;
1947 me_cmd->dw6.use_mv_from_prev_step = me_mode == VP9_ENC_ME4X_AFTER_ME16X ? 1 : 0;
1948 me_cmd->dw6.super_combine_dist = 0x5;
1949 me_cmd->dw6.max_vmvr = 0x7fc;
1951 l0_ref_frames = (param->ref_frame_flag & 0x01) +
1952 !!(param->ref_frame_flag & 0x02) +
1953 !!(param->ref_frame_flag & 0x04);
1954 me_cmd->dw13.num_ref_idx_l0_minus1 = (l0_ref_frames > 0) ? l0_ref_frames - 1 : 0;
1955 me_cmd->dw13.num_ref_idx_l1_minus1 = 0;
1957 me_cmd->dw14.l0_ref_pic_polarity_bits = 0;
1958 me_cmd->dw14.l1_ref_pic_polarity_bits = 0;
1960 me_cmd->dw15.mv_shift_factor = 0x02;
1963 memcpy((void *)((char *)me_cmd + 64),
1964 vp9_diamond_ime_search_path_delta,
1965 sizeof(vp9_diamond_ime_search_path_delta));
1969 me_cmd->dw32._4x_memv_output_data_surf_index = VP9_BTI_ME_MV_DATA_SURFACE;
1970 me_cmd->dw33._16x_32x_memv_input_data_surf_index = VP9_BTI_16XME_MV_DATA_SURFACE;
1971 me_cmd->dw34._4x_me_output_dist_surf_index = VP9_BTI_ME_DISTORTION_SURFACE;
1972 me_cmd->dw35._4x_me_output_brc_dist_surf_index = VP9_BTI_ME_BRC_DISTORTION_SURFACE;
1973 me_cmd->dw36.vme_fwd_inter_pred_surf_index = VP9_BTI_ME_CURR_PIC_L0;
1974 me_cmd->dw37.vme_bdw_inter_pred_surf_index = VP9_BTI_ME_CURR_PIC_L1;
1976 i965_gpe_context_unmap_curbe(gpe_context);
1980 gen9_vp9_send_me_surface(VADriverContextP ctx,
1981 struct encode_state *encode_state,
1982 struct i965_gpe_context *gpe_context,
1983 struct intel_encoder_context *encoder_context,
1984 struct gen9_vp9_me_surface_param *param)
1986 struct i965_driver_data *i965 = i965_driver_data(ctx);
1987 struct object_surface *obj_surface;
1988 struct gen9_surface_vp9 *vp9_priv_surface;
1989 struct object_surface *input_surface;
1990 struct i965_gpe_resource *gpe_resource;
1993 obj_surface = SURFACE(param->curr_pic);
1995 if (!obj_surface || !obj_surface->private_data)
1998 vp9_priv_surface = obj_surface->private_data;
1999 if (param->use_16x_me)
2001 gpe_resource = param->pres_16x_memv_data_buffer;
2005 gpe_resource = param->pres_4x_memv_data_buffer;
2008 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
2011 I965_SURFACEFORMAT_R8_UNORM,
2012 VP9_BTI_ME_MV_DATA_SURFACE);
2014 if (param->b16xme_enabled) {
2015 gpe_resource = param->pres_16x_memv_data_buffer;
2016 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
2019 I965_SURFACEFORMAT_R8_UNORM,
2020 VP9_BTI_16XME_MV_DATA_SURFACE);
2023 if (!param->use_16x_me) {
2024 gpe_resource = param->pres_me_brc_distortion_buffer;
2026 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
2029 I965_SURFACEFORMAT_R8_UNORM,
2030 VP9_BTI_ME_BRC_DISTORTION_SURFACE);
2032 gpe_resource = param->pres_me_distortion_buffer;
2034 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
2037 I965_SURFACEFORMAT_R8_UNORM,
2038 VP9_BTI_ME_DISTORTION_SURFACE);
2041 if (param->use_16x_me)
2042 input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2044 input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2046 gen9_add_adv_gpe_surface(ctx, gpe_context,
2048 VP9_BTI_ME_CURR_PIC_L0);
2050 ref_bti = VP9_BTI_ME_CURR_PIC_L0 + 1;
2053 if (param->last_ref_pic) {
2054 obj_surface = param->last_ref_pic;
2055 vp9_priv_surface = obj_surface->private_data;
2057 if (param->use_16x_me)
2058 input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2060 input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2062 if (param->dys_enabled &&
2063 ((vp9_priv_surface->frame_width != param->frame_width) ||
2064 (vp9_priv_surface->frame_height != param->frame_height))) {
2065 if (param->use_16x_me)
2066 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2068 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2070 gen9_add_adv_gpe_surface(ctx, gpe_context,
2073 gen9_add_adv_gpe_surface(ctx, gpe_context,
2079 if (param->golden_ref_pic) {
2080 obj_surface = param->golden_ref_pic;
2081 vp9_priv_surface = obj_surface->private_data;
2083 if (param->use_16x_me)
2084 input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2086 input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2088 if (param->dys_enabled &&
2089 ((vp9_priv_surface->frame_width != param->frame_width) ||
2090 (vp9_priv_surface->frame_height != param->frame_height))) {
2091 if (param->use_16x_me)
2092 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2094 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2097 gen9_add_adv_gpe_surface(ctx, gpe_context,
2100 gen9_add_adv_gpe_surface(ctx, gpe_context,
2106 if (param->alt_ref_pic) {
2107 obj_surface = param->alt_ref_pic;
2108 vp9_priv_surface = obj_surface->private_data;
2110 if (param->use_16x_me)
2111 input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2113 input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2115 if (param->dys_enabled &&
2116 ((vp9_priv_surface->frame_width != param->frame_width) ||
2117 (vp9_priv_surface->frame_height != param->frame_height))) {
2118 if (param->use_16x_me)
2119 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2121 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2123 gen9_add_adv_gpe_surface(ctx, gpe_context,
2126 gen9_add_adv_gpe_surface(ctx, gpe_context,
2136 void gen9_me_add_surfaces_vp9(VADriverContextP ctx,
2137 struct encode_state *encode_state,
2138 struct intel_encoder_context *encoder_context,
2139 struct i965_gpe_context *gpe_context,
2142 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2143 struct gen9_vp9_me_surface_param me_surface_param;
2144 struct gen9_vp9_state *vp9_state;
2146 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
2148 /* sScaled4xSurface surface */
2149 memset(&me_surface_param, 0, sizeof(me_surface_param));
2150 me_surface_param.last_ref_pic = vp9_state->last_ref_obj;
2151 me_surface_param.golden_ref_pic = vp9_state->golden_ref_obj;
2152 me_surface_param.alt_ref_pic = vp9_state->alt_ref_obj;
2153 me_surface_param.curr_pic = vp9_state->curr_frame;
2154 me_surface_param.pres_4x_memv_data_buffer = &vme_context->s4x_memv_data_buffer;
2155 me_surface_param.pres_16x_memv_data_buffer = &vme_context->s16x_memv_data_buffer;
2156 me_surface_param.pres_me_distortion_buffer = &vme_context->s4x_memv_distortion_buffer;
2157 me_surface_param.pres_me_brc_distortion_buffer = &vme_context->s4x_memv_distortion_buffer;
2160 me_surface_param.downscaled_width_in_mb = vp9_state->downscaled_width_16x_in_mb;
2161 me_surface_param.downscaled_height_in_mb = vp9_state->downscaled_height_16x_in_mb;
2163 me_surface_param.downscaled_width_in_mb = vp9_state->downscaled_width_4x_in_mb;
2164 me_surface_param.downscaled_height_in_mb = vp9_state->downscaled_height_4x_in_mb;
2166 me_surface_param.frame_width = vp9_state->frame_width;
2167 me_surface_param.frame_height = vp9_state->frame_height;
2169 me_surface_param.use_16x_me = use_16x_me;
2170 me_surface_param.b16xme_enabled = vp9_state->b16xme_enabled;
2171 me_surface_param.dys_enabled = vp9_state->dys_in_use;
2173 vme_context->pfn_send_me_surface(ctx, encode_state,
2181 gen9_vp9_me_kernel(VADriverContextP ctx,
2182 struct encode_state *encode_state,
2183 struct intel_encoder_context *encoder_context,
2186 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2187 struct i965_gpe_context *gpe_context;
2189 struct gen9_vp9_me_curbe_param me_curbe_param;
2190 struct gen9_vp9_state *vp9_state;
2191 struct gpe_media_object_walker_parameter media_object_walker_param;
2192 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
2194 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2195 if (!vp9_state || !vp9_state->pic_param)
2196 return VA_STATUS_ERROR_INVALID_PARAMETER;
2199 media_function = VP9_MEDIA_STATE_16X_ME;
2201 media_function = VP9_MEDIA_STATE_4X_ME;
2203 gpe_context = &(vme_context->me_context.gpe_context);
2205 gen8_gpe_context_init(ctx, gpe_context);
2206 gen9_gpe_reset_binding_table(ctx, gpe_context);
2208 memset(&me_curbe_param, 0, sizeof(me_curbe_param));
2209 me_curbe_param.ppic_param = vp9_state->pic_param;
2210 me_curbe_param.pseq_param = vp9_state->seq_param;
2211 me_curbe_param.frame_width = vp9_state->frame_width;
2212 me_curbe_param.frame_height = vp9_state->frame_height;
2213 me_curbe_param.ref_frame_flag = vp9_state->ref_frame_flag;
2214 me_curbe_param.use_16x_me = use_16x_me;
2215 me_curbe_param.b16xme_enabled = vp9_state->b16xme_enabled;
2216 vme_context->pfn_set_curbe_me(ctx, encode_state,
2221 gen9_me_add_surfaces_vp9(ctx, encode_state,
2226 gen8_gpe_setup_interface_data(ctx, gpe_context);
2228 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2230 kernel_walker_param.resolution_x = vp9_state->downscaled_width_16x_in_mb;
2231 kernel_walker_param.resolution_y = vp9_state->downscaled_height_16x_in_mb;
2233 kernel_walker_param.resolution_x = vp9_state->downscaled_width_4x_in_mb;
2234 kernel_walker_param.resolution_y = vp9_state->downscaled_height_4x_in_mb;
2236 kernel_walker_param.no_dependency = 1;
2238 gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2240 gen9_run_kernel_media_object_walker(ctx, encoder_context,
2243 &media_object_walker_param);
2245 return VA_STATUS_SUCCESS;
2249 gen9_vp9_set_curbe_scaling_cm(VADriverContextP ctx,
2250 struct encode_state *encode_state,
2251 struct i965_gpe_context *gpe_context,
2252 struct intel_encoder_context *encoder_context,
2253 struct gen9_vp9_scaling_curbe_param *curbe_param)
2255 vp9_scaling4x_curbe_data_cm *curbe_cmd;
2257 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2262 memset(curbe_cmd, 0, sizeof(vp9_scaling4x_curbe_data_cm));
2264 curbe_cmd->dw0.input_picture_width = curbe_param->input_picture_width;
2265 curbe_cmd->dw0.input_picture_height = curbe_param->input_picture_height;
2267 curbe_cmd->dw1.input_y_bti = VP9_BTI_SCALING_FRAME_SRC_Y;
2268 curbe_cmd->dw2.output_y_bti = VP9_BTI_SCALING_FRAME_DST_Y;
2271 curbe_cmd->dw6.enable_mb_variance_output = 0;
2272 curbe_cmd->dw6.enable_mb_pixel_average_output = 0;
2273 curbe_cmd->dw6.enable_blk8x8_stat_output = 0;
2275 if (curbe_param->mb_variance_output_enabled ||
2276 curbe_param->mb_pixel_average_output_enabled)
2278 curbe_cmd->dw10.mbv_proc_stat_bti = VP9_BTI_SCALING_FRAME_MBVPROCSTATS_DST_CM;
2281 i965_gpe_context_unmap_curbe(gpe_context);
2286 gen9_vp9_send_scaling_surface(VADriverContextP ctx,
2287 struct encode_state *encode_state,
2288 struct i965_gpe_context *gpe_context,
2289 struct intel_encoder_context *encoder_context,
2290 struct gen9_vp9_scaling_surface_param *scaling_surface_param)
2292 vp9_bti_scaling_offset *scaling_bti;
2293 unsigned int surface_format;
2295 scaling_bti = scaling_surface_param->p_scaling_bti;
2297 if (scaling_surface_param->scaling_out_use_32unorm_surf_fmt)
2298 surface_format = I965_SURFACEFORMAT_R32_UNORM;
2299 else if (scaling_surface_param->scaling_out_use_16unorm_surf_fmt)
2300 surface_format = I965_SURFACEFORMAT_R16_UNORM;
2302 surface_format = I965_SURFACEFORMAT_R8_UNORM;
2304 gen9_add_2d_gpe_surface(ctx, gpe_context,
2305 scaling_surface_param->input_surface,
2306 0, 1, surface_format,
2307 scaling_bti->scaling_frame_src_y);
2309 gen9_add_2d_gpe_surface(ctx, gpe_context,
2310 scaling_surface_param->output_surface,
2311 0, 1, surface_format,
2312 scaling_bti->scaling_frame_dst_y);
2319 gen9_vp9_scaling_kernel(VADriverContextP ctx,
2320 struct encode_state *encode_state,
2321 struct intel_encoder_context *encoder_context,
2322 int use_16x_scaling)
2324 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2325 struct i965_gpe_context *gpe_context;
2327 struct gen9_vp9_scaling_curbe_param scaling_curbe_param;
2328 struct gen9_vp9_scaling_surface_param scaling_surface_param;
2329 struct gen9_vp9_state *vp9_state;
2330 VAEncPictureParameterBufferVP9 *pic_param;
2331 struct gpe_media_object_walker_parameter media_object_walker_param;
2332 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
2333 struct object_surface *obj_surface;
2334 struct object_surface *input_surface, *output_surface;
2335 struct gen9_surface_vp9 *vp9_priv_surface;
2336 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
2337 unsigned int input_frame_width, input_frame_height;
2338 unsigned int output_frame_width, output_frame_height;
2340 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2341 if (!vp9_state || !vp9_state->pic_param)
2342 return VA_STATUS_ERROR_INVALID_PARAMETER;
2344 pic_param = vp9_state->pic_param;
2346 if (use_16x_scaling)
2347 media_function = VP9_MEDIA_STATE_16X_SCALING;
2349 media_function = VP9_MEDIA_STATE_4X_SCALING;
2351 gpe_context = &(vme_context->scaling_context.gpe_contexts[0]);
2353 gen8_gpe_context_init(ctx, gpe_context);
2354 gen9_gpe_reset_binding_table(ctx, gpe_context);
2356 obj_surface = encode_state->reconstructed_object;
2357 vp9_priv_surface = obj_surface->private_data;
2359 if (use_16x_scaling)
2361 downscaled_width_in_mb = vp9_state->downscaled_width_16x_in_mb;
2362 downscaled_height_in_mb = vp9_state->downscaled_height_16x_in_mb;
2364 input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2365 input_frame_width = vp9_state->frame_width_4x;
2366 input_frame_height = vp9_state->frame_height_4x;
2368 output_surface = vp9_priv_surface->scaled_16x_surface_obj;
2369 output_frame_width = vp9_state->frame_width_16x;
2370 output_frame_height = vp9_state->frame_height_16x;
2372 downscaled_width_in_mb = vp9_state->downscaled_width_4x_in_mb;
2373 downscaled_height_in_mb = vp9_state->downscaled_height_4x_in_mb;
2375 if (vp9_state->dys_in_use &&
2376 ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
2377 (pic_param->frame_height_src != pic_param->frame_height_dst)))
2378 input_surface = vp9_priv_surface->dys_surface_obj;
2380 input_surface = encode_state->input_yuv_object;
2382 input_frame_width = vp9_state->frame_width;
2383 input_frame_height = vp9_state->frame_height;
2385 output_surface = vp9_priv_surface->scaled_4x_surface_obj;
2386 output_frame_width = vp9_state->frame_width_4x;
2387 output_frame_height = vp9_state->frame_height_4x;
2390 memset(&scaling_curbe_param, 0, sizeof(scaling_curbe_param));
2392 scaling_curbe_param.input_picture_width = input_frame_width;
2393 scaling_curbe_param.input_picture_height = input_frame_height;
2395 scaling_curbe_param.use_16x_scaling = use_16x_scaling;
2396 scaling_curbe_param.use_32x_scaling = 0;
2398 if (use_16x_scaling)
2399 scaling_curbe_param.mb_variance_output_enabled = 0;
2401 scaling_curbe_param.mb_variance_output_enabled = vp9_state->adaptive_transform_decision_enabled;
2403 scaling_curbe_param.blk8x8_stat_enabled = 0;
2405 vme_context->pfn_set_curbe_scaling(ctx, encode_state,
2408 &scaling_curbe_param);
2410 memset(&scaling_surface_param, 0, sizeof(scaling_surface_param));
2411 scaling_surface_param.p_scaling_bti = (void *)(&vme_context->scaling_context.scaling_4x_bti);
2412 scaling_surface_param.input_surface = input_surface;
2413 scaling_surface_param.input_frame_width = input_frame_width;
2414 scaling_surface_param.input_frame_height = input_frame_height;
2416 scaling_surface_param.output_surface = output_surface;
2417 scaling_surface_param.output_frame_width = output_frame_width;
2418 scaling_surface_param.output_frame_height = output_frame_height;
2419 scaling_surface_param.scaling_out_use_16unorm_surf_fmt = 0;
2420 scaling_surface_param.scaling_out_use_32unorm_surf_fmt = 1;
2422 vme_context->pfn_send_scaling_surface(ctx, encode_state,
2425 &scaling_surface_param);
2427 gen8_gpe_setup_interface_data(ctx, gpe_context);
2429 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2430 /* the scaling is based on 8x8 blk level */
2431 kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
2432 kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
2433 kernel_walker_param.no_dependency = 1;
2435 gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2437 gen9_run_kernel_media_object_walker(ctx, encoder_context,
2440 &media_object_walker_param);
2442 return VA_STATUS_SUCCESS;
2446 gen9_vp9_dys_set_sampler_state(struct i965_gpe_context *gpe_context)
2448 struct gen9_sampler_8x8_avs *sampler_cmd;
2453 dri_bo_map(gpe_context->sampler.bo, 1);
2455 if (!gpe_context->sampler.bo->virtual)
2458 sampler_cmd = (struct gen9_sampler_8x8_avs *)
2459 (gpe_context->sampler.bo->virtual + gpe_context->sampler.offset);
2461 memset(sampler_cmd, 0, sizeof(struct gen9_sampler_8x8_avs));
2463 sampler_cmd->dw0.r3c_coefficient = 15;
2464 sampler_cmd->dw0.r3x_coefficient = 6;
2465 sampler_cmd->dw0.strong_edge_threshold = 8;
2466 sampler_cmd->dw0.weak_edge_threshold = 1;
2467 sampler_cmd->dw0.gain_factor = 32;
2469 sampler_cmd->dw2.r5c_coefficient = 3;
2470 sampler_cmd->dw2.r5cx_coefficient = 8;
2471 sampler_cmd->dw2.r5x_coefficient = 9;
2472 sampler_cmd->dw2.strong_edge_weight = 6;
2473 sampler_cmd->dw2.regular_weight = 3;
2474 sampler_cmd->dw2.non_edge_weight = 2;
2475 sampler_cmd->dw2.global_noise_estimation = 255;
2477 sampler_cmd->dw3.enable_8tap_adaptive_filter = 0;
2478 sampler_cmd->dw3.cos_alpha = 79;
2479 sampler_cmd->dw3.sin_alpha = 101;
2481 sampler_cmd->dw5.diamond_du = 0;
2482 sampler_cmd->dw5.hs_margin = 3;
2483 sampler_cmd->dw5.diamond_alpha = 100;
2485 sampler_cmd->dw7.inv_margin_vyl = 3300;
2487 sampler_cmd->dw8.inv_margin_vyu = 1600;
2489 sampler_cmd->dw10.y_slope2 = 24;
2490 sampler_cmd->dw10.s0l = 1792;
2492 sampler_cmd->dw12.y_slope1 = 24;
2494 sampler_cmd->dw14.s0u = 256;
2496 sampler_cmd->dw15.s2u = 1792;
2497 sampler_cmd->dw15.s1u = 0;
2499 memcpy(sampler_cmd->coefficients,
2500 &gen9_vp9_avs_coeffs[0],
2501 17 * sizeof(struct gen8_sampler_8x8_avs_coefficients));
2503 sampler_cmd->dw152.default_sharpness_level = 255;
2504 sampler_cmd->dw152.max_derivative_4_pixels = 7;
2505 sampler_cmd->dw152.max_derivative_8_pixels = 20;
2506 sampler_cmd->dw152.transition_area_with_4_pixels = 4;
2507 sampler_cmd->dw152.transition_area_with_8_pixels = 5;
2509 sampler_cmd->dw153.bypass_x_adaptive_filtering = 1;
2510 sampler_cmd->dw153.bypass_y_adaptive_filtering = 1;
2511 sampler_cmd->dw153.adaptive_filter_for_all_channel = 0;
2513 memcpy(sampler_cmd->extra_coefficients,
2514 &gen9_vp9_avs_coeffs[17 * 8],
2515 15 * sizeof(struct gen8_sampler_8x8_avs_coefficients));
2517 dri_bo_unmap(gpe_context->sampler.bo);
2521 gen9_vp9_set_curbe_dys(VADriverContextP ctx,
2522 struct encode_state *encode_state,
2523 struct i965_gpe_context *gpe_context,
2524 struct intel_encoder_context *encoder_context,
2525 struct gen9_vp9_dys_curbe_param *curbe_param)
2527 vp9_dys_curbe_data *curbe_cmd;
2529 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2534 memset(curbe_cmd, 0, sizeof(vp9_dys_curbe_data));
2536 curbe_cmd->dw0.input_frame_width = curbe_param->input_width;
2537 curbe_cmd->dw0.input_frame_height = curbe_param->input_height;
2539 curbe_cmd->dw1.output_frame_width = curbe_param->output_width;
2540 curbe_cmd->dw1.output_frame_height = curbe_param->output_height;
2542 curbe_cmd->dw2.delta_u = 1.0f / curbe_param->output_width;
2543 curbe_cmd->dw3.delta_v = 1.0f / curbe_param->output_height;
2545 curbe_cmd->dw16.input_frame_nv12_bti = VP9_BTI_DYS_INPUT_NV12;
2546 curbe_cmd->dw17.output_frame_y_bti = VP9_BTI_DYS_OUTPUT_Y;
2547 curbe_cmd->dw18.avs_sample_idx = 0;
2549 i965_gpe_context_unmap_curbe(gpe_context);
2553 gen9_vp9_send_dys_surface(VADriverContextP ctx,
2554 struct encode_state *encode_state,
2555 struct i965_gpe_context *gpe_context,
2556 struct intel_encoder_context *encoder_context,
2557 struct gen9_vp9_dys_surface_param *surface_param)
2560 if (surface_param->input_frame)
2561 gen9_add_adv_gpe_surface(ctx,
2563 surface_param->input_frame,
2564 VP9_BTI_DYS_INPUT_NV12);
2566 if (surface_param->output_frame) {
2567 gen9_add_2d_gpe_surface(ctx,
2569 surface_param->output_frame,
2572 I965_SURFACEFORMAT_R8_UNORM,
2573 VP9_BTI_DYS_OUTPUT_Y);
2575 gen9_add_2d_gpe_surface(ctx,
2577 surface_param->output_frame,
2580 I965_SURFACEFORMAT_R16_UINT,
2581 VP9_BTI_DYS_OUTPUT_UV);
2588 gen9_vp9_dys_kernel(VADriverContextP ctx,
2589 struct encode_state *encode_state,
2590 struct intel_encoder_context *encoder_context,
2591 gen9_vp9_dys_kernel_param *dys_kernel_param)
2593 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2594 struct i965_gpe_context *gpe_context;
2596 struct gen9_vp9_dys_curbe_param curbe_param;
2597 struct gen9_vp9_dys_surface_param surface_param;
2598 struct gpe_media_object_walker_parameter media_object_walker_param;
2599 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
2600 unsigned int resolution_x, resolution_y;
2602 media_function = VP9_MEDIA_STATE_DYS;
2603 gpe_context = &vme_context->dys_context.gpe_context;
2605 //gen8_gpe_context_init(ctx, gpe_context);
2606 gen9_gpe_reset_binding_table(ctx, gpe_context);
2608 /* sampler state is configured only when initializing the GPE context */
2610 memset(&curbe_param, 0, sizeof(curbe_param));
2611 curbe_param.input_width = dys_kernel_param->input_width;
2612 curbe_param.input_height = dys_kernel_param->input_height;
2613 curbe_param.output_width = dys_kernel_param->output_width;
2614 curbe_param.output_height = dys_kernel_param->output_height;
2615 vme_context->pfn_set_curbe_dys(ctx, encode_state,
2620 // Add surface states
2621 memset(&surface_param, 0, sizeof(surface_param));
2622 surface_param.input_frame = dys_kernel_param->input_surface;
2623 surface_param.output_frame = dys_kernel_param->output_surface;
2624 surface_param.vert_line_stride = 0;
2625 surface_param.vert_line_stride_offset = 0;
2627 vme_context->pfn_send_dys_surface(ctx,
2633 resolution_x = ALIGN(dys_kernel_param->output_width, 16) / 16;
2634 resolution_y = ALIGN(dys_kernel_param->output_height, 16) / 16;
2636 gen8_gpe_setup_interface_data(ctx, gpe_context);
2638 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2639 kernel_walker_param.resolution_x = resolution_x;
2640 kernel_walker_param.resolution_y = resolution_y;
2641 kernel_walker_param.no_dependency = 1;
2643 gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2645 gen9_run_kernel_media_object_walker(ctx, encoder_context,
2648 &media_object_walker_param);
2650 return VA_STATUS_SUCCESS;
2654 gen9_vp9_run_dys_refframes(VADriverContextP ctx,
2655 struct encode_state *encode_state,
2656 struct intel_encoder_context *encoder_context)
2658 struct gen9_vp9_state *vp9_state;
2659 VAEncPictureParameterBufferVP9 *pic_param;
2660 gen9_vp9_dys_kernel_param dys_kernel_param;
2661 struct object_surface *obj_surface;
2662 struct object_surface *input_surface, *output_surface;
2663 struct gen9_surface_vp9 *vp9_priv_surface;
2665 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2667 if (!vp9_state || !vp9_state->pic_param)
2668 return VA_STATUS_ERROR_INVALID_PARAMETER;
2670 pic_param = vp9_state->pic_param;
2672 if ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
2673 (pic_param->frame_height_src != pic_param->frame_height_dst)) {
2674 input_surface = encode_state->input_yuv_object;
2675 obj_surface = encode_state->reconstructed_object;
2676 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2677 output_surface = vp9_priv_surface->dys_surface_obj;
2679 memset(&dys_kernel_param, 0, sizeof(dys_kernel_param));
2680 dys_kernel_param.input_width = pic_param->frame_width_src;
2681 dys_kernel_param.input_height = pic_param->frame_height_src;
2682 dys_kernel_param.input_surface = input_surface;
2683 dys_kernel_param.output_width = pic_param->frame_width_dst;
2684 dys_kernel_param.output_height = pic_param->frame_height_dst;
2685 dys_kernel_param.output_surface = output_surface;
2686 gen9_vp9_dys_kernel(ctx, encode_state,
2691 if ((vp9_state->dys_ref_frame_flag & VP9_LAST_REF) &&
2692 vp9_state->last_ref_obj) {
2693 obj_surface = vp9_state->last_ref_obj;
2694 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2696 input_surface = obj_surface;
2697 output_surface = vp9_priv_surface->dys_surface_obj;
2699 dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2700 dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2701 dys_kernel_param.input_surface = input_surface;
2703 dys_kernel_param.output_width = pic_param->frame_width_dst;
2704 dys_kernel_param.output_height = pic_param->frame_height_dst;
2705 dys_kernel_param.output_surface = output_surface;
2707 gen9_vp9_dys_kernel(ctx, encode_state,
2711 if (vp9_state->hme_enabled) {
2712 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2713 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2714 dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2716 dys_kernel_param.output_width = vp9_state->frame_width_4x;
2717 dys_kernel_param.output_height = vp9_state->frame_height_4x;
2718 dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2720 gen9_vp9_dys_kernel(ctx, encode_state,
2724 /* Does it really need to do the 16x HME if the
2725 * resolution is different?
2726 * Maybe it should be restricted
2728 if (vp9_state->b16xme_enabled) {
2729 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2730 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2731 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2733 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2734 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2735 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2737 gen9_vp9_dys_kernel(ctx, encode_state,
2744 if ((vp9_state->dys_ref_frame_flag & VP9_GOLDEN_REF) &&
2745 vp9_state->golden_ref_obj) {
2746 obj_surface = vp9_state->golden_ref_obj;
2747 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2749 input_surface = obj_surface;
2750 output_surface = vp9_priv_surface->dys_surface_obj;
2752 dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2753 dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2754 dys_kernel_param.input_surface = input_surface;
2756 dys_kernel_param.output_width = pic_param->frame_width_dst;
2757 dys_kernel_param.output_height = pic_param->frame_height_dst;
2758 dys_kernel_param.output_surface = output_surface;
2760 gen9_vp9_dys_kernel(ctx, encode_state,
2764 if (vp9_state->hme_enabled) {
2765 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2766 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2767 dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2769 dys_kernel_param.output_width = vp9_state->frame_width_4x;
2770 dys_kernel_param.output_height = vp9_state->frame_height_4x;
2771 dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2773 gen9_vp9_dys_kernel(ctx, encode_state,
2777 /* Does it really need to do the 16x HME if the
2778 * resolution is different?
2779 * Maybe it should be restricted
2781 if (vp9_state->b16xme_enabled) {
2782 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2783 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2784 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2786 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2787 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2788 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2790 gen9_vp9_dys_kernel(ctx, encode_state,
2797 if ((vp9_state->dys_ref_frame_flag & VP9_ALT_REF) &&
2798 vp9_state->alt_ref_obj) {
2799 obj_surface = vp9_state->alt_ref_obj;
2800 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2802 input_surface = obj_surface;
2803 output_surface = vp9_priv_surface->dys_surface_obj;
2805 dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2806 dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2807 dys_kernel_param.input_surface = input_surface;
2809 dys_kernel_param.output_width = pic_param->frame_width_dst;
2810 dys_kernel_param.output_height = pic_param->frame_height_dst;
2811 dys_kernel_param.output_surface = output_surface;
2813 gen9_vp9_dys_kernel(ctx, encode_state,
2817 if (vp9_state->hme_enabled) {
2818 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2819 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2820 dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2822 dys_kernel_param.output_width = vp9_state->frame_width_4x;
2823 dys_kernel_param.output_height = vp9_state->frame_height_4x;
2824 dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2826 gen9_vp9_dys_kernel(ctx, encode_state,
2830 /* Does it really need to do the 16x HME if the
2831 * resolution is different?
2832 * Maybe it should be restricted
2834 if (vp9_state->b16xme_enabled) {
2835 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2836 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2837 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2839 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2840 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2841 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2843 gen9_vp9_dys_kernel(ctx, encode_state,
2850 return VA_STATUS_SUCCESS;
2854 gen9_vp9_set_curbe_mbenc(VADriverContextP ctx,
2855 struct encode_state *encode_state,
2856 struct i965_gpe_context *gpe_context,
2857 struct intel_encoder_context *encoder_context,
2858 struct gen9_vp9_mbenc_curbe_param *curbe_param)
2860 struct gen9_vp9_state *vp9_state;
2861 VAEncMiscParameterTypeVP9PerSegmantParam *seg_param, tmp_seg_param;
2862 vp9_mbenc_curbe_data *curbe_cmd;
2863 VAEncPictureParameterBufferVP9 *pic_param;
2864 int i, segment_count;
2866 struct object_surface *obj_surface;
2867 struct gen9_surface_vp9 *vp9_priv_surface;
2869 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2871 if (!vp9_state || !vp9_state->pic_param)
2874 pic_param = curbe_param->ppic_param;
2875 seg_param = curbe_param->psegment_param;
2878 memset(&tmp_seg_param, 0, sizeof(tmp_seg_param));
2879 seg_param = &tmp_seg_param;
2882 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2887 memset(curbe_cmd, 0, sizeof(vp9_mbenc_curbe_data));
2889 if (vp9_state->dys_in_use)
2891 curbe_cmd->dw0.frame_width = pic_param->frame_width_dst;
2892 curbe_cmd->dw0.frame_height = pic_param->frame_height_dst;
2896 curbe_cmd->dw0.frame_width = pic_param->frame_width_src;
2897 curbe_cmd->dw0.frame_height = pic_param->frame_height_src;
2900 curbe_cmd->dw1.frame_type = curbe_param->picture_coding_type;
2902 curbe_cmd->dw1.segmentation_enable = pic_param->pic_flags.bits.segmentation_enabled;
2903 if (pic_param->pic_flags.bits.segmentation_enabled)
2908 curbe_cmd->dw1.ref_frame_flags = curbe_param->ref_frame_flag;
2910 //right now set them to normal settings
2911 if (curbe_param->picture_coding_type)
2913 switch (vp9_state->target_usage)
2915 case INTEL_ENC_VP9_TU_QUALITY:
2916 curbe_cmd->dw1.min_16for32_check = 0x00;
2917 curbe_cmd->dw2.multi_pred = 0x02;
2918 curbe_cmd->dw2.len_sp = 0x39;
2919 curbe_cmd->dw2.search_x = 0x30;
2920 curbe_cmd->dw2.search_y = 0x28;
2921 curbe_cmd->dw3.min_ref_for32_check = 0x01;
2922 curbe_cmd->dw4.skip16_threshold = 0x000A;
2923 curbe_cmd->dw4.disable_mr_threshold = 0x000C;
2925 memcpy(&curbe_cmd->dw16,
2926 vp9_diamond_ime_search_path_delta,
2927 14 * sizeof(unsigned int));
2929 case INTEL_ENC_VP9_TU_PERFORMANCE:
2930 curbe_cmd->dw1.min_16for32_check = 0x02;
2931 curbe_cmd->dw2.multi_pred = 0x00;
2932 curbe_cmd->dw2.len_sp = 0x10;
2933 curbe_cmd->dw2.search_x = 0x20;
2934 curbe_cmd->dw2.search_y = 0x20;
2935 curbe_cmd->dw3.min_ref_for32_check = 0x03;
2936 curbe_cmd->dw4.skip16_threshold = 0x0014;
2937 curbe_cmd->dw4.disable_mr_threshold = 0x0016;
2939 memcpy(&curbe_cmd->dw16,
2940 vp9_fullspiral_ime_search_path_delta,
2941 14 * sizeof(unsigned int));
2944 default: // normal settings
2945 curbe_cmd->dw1.min_16for32_check = 0x01;
2946 curbe_cmd->dw2.multi_pred = 0x00;
2947 curbe_cmd->dw2.len_sp = 0x19;
2948 curbe_cmd->dw2.search_x = 0x30;
2949 curbe_cmd->dw2.search_y = 0x28;
2950 curbe_cmd->dw3.min_ref_for32_check = 0x02;
2951 curbe_cmd->dw4.skip16_threshold = 0x000F;
2952 curbe_cmd->dw4.disable_mr_threshold = 0x0011;
2954 memcpy(&curbe_cmd->dw16,
2955 vp9_diamond_ime_search_path_delta,
2956 14 * sizeof(unsigned int));
2960 curbe_cmd->dw3.hme_enabled = curbe_param->hme_enabled;
2961 curbe_cmd->dw3.multi_ref_qp_check = curbe_param->multi_ref_qp_check;
2962 // co-located predictor must be disabled when dynamic scaling is enabled
2963 curbe_cmd->dw3.disable_temp_pred = vp9_state->dys_in_use;
2966 curbe_cmd->dw5.inter_round = 0;
2967 curbe_cmd->dw5.intra_round = 4;
2968 curbe_cmd->dw5.frame_qpindex = pic_param->luma_ac_qindex;
2970 for (i = 0; i < segment_count; i++)
2972 seg_qindex = pic_param->luma_ac_qindex + pic_param->luma_dc_qindex_delta
2973 + seg_param->seg_data[i].segment_qindex_delta;
2975 seg_qindex = CLAMP(0, 255, seg_qindex);
2977 if (curbe_param->picture_coding_type)
2978 memcpy(&curbe_cmd->segments[i],
2979 &intel_vp9_costlut_p[seg_qindex * 16],
2980 16 * sizeof(unsigned int));
2982 memcpy(&curbe_cmd->segments[i],
2983 &intel_vp9_costlut_key[seg_qindex * 16],
2984 16 * sizeof(unsigned int));
2987 if (curbe_param->picture_coding_type)
2989 if (curbe_cmd->dw3.multi_ref_qp_check)
2991 if (curbe_param->ref_frame_flag & 0x01)
2993 obj_surface = curbe_param->last_ref_obj;
2994 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2995 curbe_cmd->dw8.last_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
2998 if (curbe_param->ref_frame_flag & 0x02)
3000 obj_surface = curbe_param->golden_ref_obj;
3001 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3002 curbe_cmd->dw8.golden_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
3005 if (curbe_param->ref_frame_flag & 0x04)
3007 obj_surface = curbe_param->alt_ref_obj;
3008 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3009 curbe_cmd->dw9.alt_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
3013 curbe_cmd->dw160.enc_curr_y_surf_bti = VP9_BTI_MBENC_CURR_Y_G9;
3014 curbe_cmd->dw162.enc_curr_nv12_surf_bti = VP9_BTI_MBENC_CURR_NV12_G9;
3015 curbe_cmd->dw166.segmentation_map_bti = VP9_BTI_MBENC_SEGMENTATION_MAP_G9;
3016 curbe_cmd->dw172.mode_decision_bti = VP9_BTI_MBENC_MODE_DECISION_G9;
3017 curbe_cmd->dw167.tx_curbe_bti = VP9_BTI_MBENC_TX_CURBE_G9;
3018 curbe_cmd->dw168.hme_mvdata_bti = VP9_BTI_MBENC_HME_MV_DATA_G9;
3019 curbe_cmd->dw169.hme_distortion_bti = VP9_BTI_MBENC_HME_DISTORTION_G9;
3020 curbe_cmd->dw171.mode_decision_prev_bti = VP9_BTI_MBENC_MODE_DECISION_PREV_G9;
3021 curbe_cmd->dw172.mode_decision_bti = VP9_BTI_MBENC_MODE_DECISION_G9;
3022 curbe_cmd->dw173.output_16x16_inter_modes_bti = VP9_BTI_MBENC_OUT_16x16_INTER_MODES_G9;
3023 curbe_cmd->dw174.cu_record_bti = VP9_BTI_MBENC_CU_RECORDS_G9;
3024 curbe_cmd->dw175.pak_data_bti = VP9_BTI_MBENC_PAK_DATA_G9;
3026 i965_gpe_context_unmap_curbe(gpe_context);
3031 gen9_vp9_send_mbenc_surface(VADriverContextP ctx,
3032 struct encode_state *encode_state,
3033 struct i965_gpe_context *gpe_context,
3034 struct intel_encoder_context *encoder_context,
3035 struct gen9_vp9_mbenc_surface_param *mbenc_param)
3037 struct gen9_vp9_state *vp9_state;
3038 unsigned int res_size;
3039 unsigned int frame_width_in_sb, frame_height_in_sb;
3040 struct object_surface *obj_surface, *tmp_input;
3041 struct gen9_surface_vp9 *vp9_priv_surface;
3044 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3046 if (!vp9_state || !vp9_state->pic_param)
3049 frame_width_in_sb = ALIGN(mbenc_param->frame_width, 64) / 64;
3050 frame_height_in_sb = ALIGN(mbenc_param->frame_height, 64) / 64;
3051 media_function = mbenc_param->media_state_type;
3053 switch (media_function)
3055 case VP9_MEDIA_STATE_MBENC_I_32x32:
3057 obj_surface = mbenc_param->curr_frame_obj;
3059 gen9_add_2d_gpe_surface(ctx,
3064 I965_SURFACEFORMAT_R8_UNORM,
3065 VP9_BTI_MBENC_CURR_Y_G9);
3067 gen9_add_2d_gpe_surface(ctx,
3072 I965_SURFACEFORMAT_R16_UINT,
3073 VP9_BTI_MBENC_CURR_UV_G9);
3076 if (mbenc_param->segmentation_enabled)
3078 gen9_add_buffer_2d_gpe_surface(ctx,
3080 mbenc_param->pres_segmentation_map,
3082 I965_SURFACEFORMAT_R8_UNORM,
3083 VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3087 res_size = 16 * mbenc_param->frame_width_in_mb *
3088 mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3089 gen9_add_buffer_gpe_surface(ctx,
3091 mbenc_param->pres_mode_decision,
3095 VP9_BTI_MBENC_MODE_DECISION_G9);
3099 case VP9_MEDIA_STATE_MBENC_I_16x16:
3101 obj_surface = mbenc_param->curr_frame_obj;
3103 gen9_add_2d_gpe_surface(ctx,
3108 I965_SURFACEFORMAT_R8_UNORM,
3109 VP9_BTI_MBENC_CURR_Y_G9);
3111 gen9_add_2d_gpe_surface(ctx,
3116 I965_SURFACEFORMAT_R16_UINT,
3117 VP9_BTI_MBENC_CURR_UV_G9);
3119 gen9_add_adv_gpe_surface(ctx, gpe_context,
3121 VP9_BTI_MBENC_CURR_NV12_G9);
3123 if (mbenc_param->segmentation_enabled)
3125 gen9_add_buffer_2d_gpe_surface(ctx,
3127 mbenc_param->pres_segmentation_map,
3129 I965_SURFACEFORMAT_R8_UNORM,
3130 VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3134 res_size = 16 * mbenc_param->frame_width_in_mb *
3135 mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3136 gen9_add_buffer_gpe_surface(ctx,
3138 mbenc_param->pres_mode_decision,
3142 VP9_BTI_MBENC_MODE_DECISION_G9);
3146 gen9_add_dri_buffer_gpe_surface(ctx,
3148 mbenc_param->gpe_context_tx->curbe.bo,
3150 ALIGN(res_size, 64),
3151 mbenc_param->gpe_context_tx->curbe.offset,
3152 VP9_BTI_MBENC_TX_CURBE_G9);
3156 case VP9_MEDIA_STATE_MBENC_P:
3158 obj_surface = mbenc_param->curr_frame_obj;
3160 gen9_add_2d_gpe_surface(ctx,
3165 I965_SURFACEFORMAT_R8_UNORM,
3166 VP9_BTI_MBENC_CURR_Y_G9);
3168 gen9_add_2d_gpe_surface(ctx, gpe_context,
3172 I965_SURFACEFORMAT_R16_UINT,
3173 VP9_BTI_MBENC_CURR_UV_G9);
3175 gen9_add_adv_gpe_surface(ctx, gpe_context,
3177 VP9_BTI_MBENC_CURR_NV12_G9);
3179 if (mbenc_param->last_ref_obj)
3181 obj_surface = mbenc_param->last_ref_obj;
3182 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3184 if (vp9_state->dys_in_use &&
3185 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3186 (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3187 tmp_input = vp9_priv_surface->dys_surface_obj;
3189 tmp_input = obj_surface;
3191 gen9_add_adv_gpe_surface(ctx, gpe_context,
3193 VP9_BTI_MBENC_LAST_NV12_G9);
3195 gen9_add_adv_gpe_surface(ctx, gpe_context,
3197 VP9_BTI_MBENC_LAST_NV12_G9 + 1);
3201 if (mbenc_param->golden_ref_obj)
3203 obj_surface = mbenc_param->golden_ref_obj;
3204 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3206 if (vp9_state->dys_in_use &&
3207 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3208 (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3209 tmp_input = vp9_priv_surface->dys_surface_obj;
3211 tmp_input = obj_surface;
3213 gen9_add_adv_gpe_surface(ctx, gpe_context,
3215 VP9_BTI_MBENC_GOLD_NV12_G9);
3217 gen9_add_adv_gpe_surface(ctx, gpe_context,
3219 VP9_BTI_MBENC_GOLD_NV12_G9 + 1);
3223 if (mbenc_param->alt_ref_obj)
3225 obj_surface = mbenc_param->alt_ref_obj;
3226 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3228 if (vp9_state->dys_in_use &&
3229 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3230 (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3231 tmp_input = vp9_priv_surface->dys_surface_obj;
3233 tmp_input = obj_surface;
3235 gen9_add_adv_gpe_surface(ctx, gpe_context,
3237 VP9_BTI_MBENC_ALTREF_NV12_G9);
3239 gen9_add_adv_gpe_surface(ctx, gpe_context,
3241 VP9_BTI_MBENC_ALTREF_NV12_G9 + 1);
3245 if (mbenc_param->hme_enabled)
3247 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3248 mbenc_param->ps4x_memv_data_buffer,
3250 I965_SURFACEFORMAT_R8_UNORM,
3251 VP9_BTI_MBENC_HME_MV_DATA_G9);
3253 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3254 mbenc_param->ps4x_memv_distortion_buffer,
3256 I965_SURFACEFORMAT_R8_UNORM,
3257 VP9_BTI_MBENC_HME_DISTORTION_G9);
3260 if (mbenc_param->segmentation_enabled)
3262 gen9_add_buffer_2d_gpe_surface(ctx,
3264 mbenc_param->pres_segmentation_map,
3266 I965_SURFACEFORMAT_R8_UNORM,
3267 VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3271 res_size = 16 * mbenc_param->frame_width_in_mb *
3272 mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3273 gen9_add_buffer_gpe_surface(ctx,
3275 mbenc_param->pres_mode_decision_prev,
3279 VP9_BTI_MBENC_MODE_DECISION_PREV_G9);
3281 gen9_add_buffer_gpe_surface(ctx,
3283 mbenc_param->pres_mode_decision,
3287 VP9_BTI_MBENC_MODE_DECISION_G9);
3289 gen9_add_buffer_2d_gpe_surface(ctx,
3291 mbenc_param->pres_output_16x16_inter_modes,
3293 I965_SURFACEFORMAT_R8_UNORM,
3294 VP9_BTI_MBENC_OUT_16x16_INTER_MODES_G9);
3298 gen9_add_dri_buffer_gpe_surface(ctx,
3300 mbenc_param->gpe_context_tx->curbe.bo,
3302 ALIGN(res_size, 64),
3303 mbenc_param->gpe_context_tx->curbe.offset,
3304 VP9_BTI_MBENC_TX_CURBE_G9);
3309 case VP9_MEDIA_STATE_MBENC_TX:
3311 obj_surface = mbenc_param->curr_frame_obj;
3313 gen9_add_2d_gpe_surface(ctx,
3318 I965_SURFACEFORMAT_R8_UNORM,
3319 VP9_BTI_MBENC_CURR_Y_G9);
3321 gen9_add_2d_gpe_surface(ctx,
3326 I965_SURFACEFORMAT_R16_UINT,
3327 VP9_BTI_MBENC_CURR_UV_G9);
3329 if (mbenc_param->segmentation_enabled)
3331 gen9_add_buffer_2d_gpe_surface(ctx,
3333 mbenc_param->pres_segmentation_map,
3335 I965_SURFACEFORMAT_R8_UNORM,
3336 VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3340 res_size = 16 * mbenc_param->frame_width_in_mb *
3341 mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3342 gen9_add_buffer_gpe_surface(ctx,
3344 mbenc_param->pres_mode_decision,
3348 VP9_BTI_MBENC_MODE_DECISION_G9);
3350 res_size = frame_width_in_sb * frame_height_in_sb * 4 * sizeof(unsigned int);
3351 gen9_add_buffer_gpe_surface(ctx,
3353 mbenc_param->pres_mb_code_surface,
3357 VP9_BTI_MBENC_PAK_DATA_G9);
3360 res_size = frame_width_in_sb * frame_height_in_sb *
3361 64 * 16 * sizeof(unsigned int);
3363 gen9_add_buffer_gpe_surface(ctx,
3365 mbenc_param->pres_mb_code_surface,
3368 mbenc_param->mb_data_offset,
3369 VP9_BTI_MBENC_CU_RECORDS_G9);
3379 gen9_vp9_mbenc_kernel(VADriverContextP ctx,
3380 struct encode_state *encode_state,
3381 struct intel_encoder_context *encoder_context,
3384 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
3385 struct i965_gpe_context *gpe_context, *tx_gpe_context;
3386 struct gpe_media_object_walker_parameter media_object_walker_param;
3387 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
3388 unsigned int resolution_x, resolution_y;
3389 struct gen9_vp9_state *vp9_state;
3390 VAEncPictureParameterBufferVP9 *pic_param;
3391 struct gen9_vp9_mbenc_curbe_param curbe_param;
3392 struct gen9_vp9_mbenc_surface_param surface_param;
3393 VAStatus va_status = VA_STATUS_SUCCESS;
3394 int mbenc_gpe_index = 0;
3395 struct object_surface *obj_surface;
3396 struct gen9_surface_vp9 *vp9_priv_surface;
3398 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3400 if (!vp9_state || !vp9_state->pic_param)
3401 return VA_STATUS_ERROR_ENCODING_ERROR;
3403 pic_param = vp9_state->pic_param;
3405 switch (media_function)
3407 case VP9_MEDIA_STATE_MBENC_I_32x32:
3408 mbenc_gpe_index = VP9_MBENC_IDX_KEY_32x32;
3411 case VP9_MEDIA_STATE_MBENC_I_16x16:
3412 mbenc_gpe_index = VP9_MBENC_IDX_KEY_16x16;
3415 case VP9_MEDIA_STATE_MBENC_P:
3416 mbenc_gpe_index = VP9_MBENC_IDX_INTER;
3419 case VP9_MEDIA_STATE_MBENC_TX:
3420 mbenc_gpe_index = VP9_MBENC_IDX_TX;
3424 va_status = VA_STATUS_ERROR_OPERATION_FAILED;
3428 gpe_context = &(vme_context->mbenc_context.gpe_contexts[mbenc_gpe_index]);
3429 tx_gpe_context = &(vme_context->mbenc_context.gpe_contexts[VP9_MBENC_IDX_TX]);
3431 gen9_gpe_reset_binding_table(ctx, gpe_context);
3434 if (!vp9_state->mbenc_curbe_set_in_brc_update)
3436 if(media_function == VP9_MEDIA_STATE_MBENC_I_32x32 ||
3437 media_function == VP9_MEDIA_STATE_MBENC_P)
3439 memset(&curbe_param, 0, sizeof(curbe_param));
3440 curbe_param.ppic_param = vp9_state->pic_param;
3441 curbe_param.pseq_param = vp9_state->seq_param;
3442 curbe_param.psegment_param = vp9_state->segment_param;
3443 curbe_param.frame_width_in_mb = vp9_state->frame_width_in_mb;
3444 curbe_param.frame_height_in_mb = vp9_state->frame_height_in_mb;
3445 curbe_param.last_ref_obj = vp9_state->last_ref_obj;
3446 curbe_param.golden_ref_obj = vp9_state->golden_ref_obj;
3447 curbe_param.alt_ref_obj = vp9_state->alt_ref_obj;
3448 curbe_param.hme_enabled = vp9_state->hme_enabled;
3449 curbe_param.ref_frame_flag = vp9_state->ref_frame_flag;
3450 curbe_param.picture_coding_type = vp9_state->picture_coding_type;
3451 curbe_param.media_state_type = media_function;
3452 curbe_param.mbenc_curbe_set_in_brc_update = vp9_state->mbenc_curbe_set_in_brc_update;
3454 vme_context->pfn_set_curbe_mbenc(ctx,
3462 memset(&surface_param, 0, sizeof(surface_param));
3463 surface_param.media_state_type = media_function;
3464 surface_param.picture_coding_type = vp9_state->picture_coding_type;
3465 surface_param.frame_width = vp9_state->frame_width;
3466 surface_param.frame_height = vp9_state->frame_height;
3467 surface_param.frame_width_in_mb = vp9_state->frame_width_in_mb;
3468 surface_param.frame_height_in_mb = vp9_state->frame_height_in_mb;
3469 surface_param.hme_enabled = vp9_state->hme_enabled;
3470 surface_param.segmentation_enabled = pic_param->pic_flags.bits.segmentation_enabled;
3471 surface_param.pres_segmentation_map = &vme_context->mb_segment_map_surface;
3472 surface_param.ps4x_memv_data_buffer = &vme_context->s4x_memv_data_buffer;
3473 surface_param.ps4x_memv_distortion_buffer = &vme_context->s4x_memv_distortion_buffer;
3474 surface_param.pres_mode_decision =
3475 &vme_context->res_mode_decision[vp9_state->curr_mode_decision_index];
3476 surface_param.pres_mode_decision_prev =
3477 &vme_context->res_mode_decision[!vp9_state->curr_mode_decision_index];
3478 surface_param.pres_output_16x16_inter_modes = &vme_context->res_output_16x16_inter_modes;
3479 surface_param.pres_mbenc_curbe_buffer = NULL;
3480 surface_param.last_ref_obj = vp9_state->last_ref_obj;
3481 surface_param.golden_ref_obj = vp9_state->golden_ref_obj;
3482 surface_param.alt_ref_obj = vp9_state->alt_ref_obj;
3483 surface_param.pres_mb_code_surface = &vme_context->res_mb_code_surface;
3484 surface_param.gpe_context_tx = tx_gpe_context;
3485 surface_param.mb_data_offset = vp9_state->mb_data_offset;
3487 obj_surface = encode_state->reconstructed_object;
3488 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3489 if (vp9_state->dys_in_use &&
3490 (pic_param->frame_width_src != pic_param->frame_height_dst ||
3491 pic_param->frame_height_src != pic_param->frame_height_dst)) {
3492 obj_surface = vp9_priv_surface->dys_surface_obj;
3494 obj_surface = encode_state->input_yuv_object;
3496 surface_param.curr_frame_obj = obj_surface;
3498 vme_context->pfn_send_mbenc_surface(ctx,
3504 if (media_function == VP9_MEDIA_STATE_MBENC_I_32x32) {
3505 resolution_x = ALIGN(vp9_state->frame_width, 32) / 32;
3506 resolution_y = ALIGN(vp9_state->frame_height, 32) / 32;
3508 resolution_x = ALIGN(vp9_state->frame_width, 16) / 16;
3509 resolution_y = ALIGN(vp9_state->frame_height, 16) / 16;
3512 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
3513 kernel_walker_param.resolution_x = resolution_x;
3514 kernel_walker_param.resolution_y = resolution_y;
3516 if (media_function == VP9_MEDIA_STATE_MBENC_P ||
3517 media_function == VP9_MEDIA_STATE_MBENC_I_16x16) {
3518 kernel_walker_param.use_scoreboard = 1;
3519 kernel_walker_param.no_dependency = 0;
3520 kernel_walker_param.walker_degree = VP9_45Z_DEGREE;
3522 kernel_walker_param.use_scoreboard = 0;
3523 kernel_walker_param.no_dependency = 1;
3526 gen8_gpe_setup_interface_data(ctx, gpe_context);
3528 gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
3530 gen9_run_kernel_media_object_walker(ctx, encoder_context,
3533 &media_object_walker_param);
3538 gen9_init_gpe_context_vp9(VADriverContextP ctx,
3539 struct i965_gpe_context *gpe_context,
3540 struct vp9_encoder_kernel_parameter *kernel_param)
3542 struct i965_driver_data *i965 = i965_driver_data(ctx);
3544 gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
3546 gpe_context->sampler.entry_size = 0;
3547 gpe_context->sampler.max_entries = 0;
3549 if (kernel_param->sampler_size) {
3550 gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
3551 gpe_context->sampler.max_entries = 1;
3554 gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
3555 gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
3557 gpe_context->surface_state_binding_table.max_entries = MAX_VP9_ENCODER_SURFACES;
3558 gpe_context->surface_state_binding_table.binding_table_offset = 0;
3559 gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64);
3560 gpe_context->surface_state_binding_table.length = ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_VP9_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
3562 if (i965->intel.eu_total > 0)
3563 gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
3565 gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
3567 gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
3568 gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
3569 gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
3570 gpe_context->vfe_state.curbe_allocation_size -
3571 ((gpe_context->idrt.entry_size >> 5) *
3572 gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
3573 gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
3574 gpe_context->vfe_state.gpgpu_mode = 0;
3578 gen9_init_vfe_scoreboard_vp9(struct i965_gpe_context *gpe_context,
3579 struct vp9_encoder_scoreboard_parameter *scoreboard_param)
3581 gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
3582 gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
3583 gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
3585 if (scoreboard_param->walkpat_flag) {
3586 gpe_context->vfe_desc5.scoreboard0.mask = 0x0F;
3587 gpe_context->vfe_desc5.scoreboard0.type = 1;
3589 gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0x0;
3590 gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0xF;
3592 gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
3593 gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xE;
3595 gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0xF;
3596 gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0x3;
3598 gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
3599 gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0x1;
3602 gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0xF;
3603 gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0x0;
3606 gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
3607 gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xF;
3610 gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0x1;
3611 gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0xF;
3614 gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
3615 gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0xF;
3618 gpe_context->vfe_desc7.scoreboard2.delta_x4 = 0xF;
3619 gpe_context->vfe_desc7.scoreboard2.delta_y4 = 0x1;
3622 gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0x0;
3623 gpe_context->vfe_desc7.scoreboard2.delta_y5 = 0xE;
3626 gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0x1;
3627 gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
3630 gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0xF;
3631 gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
3635 #define VP9_MI_BLOCK_MASK 0x07
3636 #define VP9_VME_REF_WIN 48
3639 gen9_encode_vp9_check_parameter(VADriverContextP ctx,
3640 struct encode_state *encode_state,
3641 struct intel_encoder_context *encoder_context)
3643 struct i965_driver_data *i965 = i965_driver_data(ctx);
3644 struct gen9_vp9_state *vp9_state;
3645 VAEncPictureParameterBufferVP9 *pic_param;
3646 VAEncMiscParameterTypeVP9PerSegmantParam *seg_param;
3647 VAEncSequenceParameterBufferVP9 *seq_param;
3648 struct object_surface *obj_surface;
3649 struct object_buffer *obj_buffer;
3650 struct gen9_surface_vp9 *vp9_priv_surface;
3652 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3654 if (!encode_state->pic_param_ext ||
3655 !encode_state->pic_param_ext->buffer) {
3656 return VA_STATUS_ERROR_INVALID_PARAMETER;
3658 pic_param = (VAEncPictureParameterBufferVP9 *)encode_state->pic_param_ext->buffer;
3660 if (pic_param->frame_width_src & VP9_MI_BLOCK_MASK ||
3661 pic_param->frame_height_src & VP9_MI_BLOCK_MASK ||
3662 pic_param->frame_width_dst & VP9_MI_BLOCK_MASK ||
3663 pic_param->frame_height_dst & VP9_MI_BLOCK_MASK)
3664 return VA_STATUS_ERROR_INVALID_PARAMETER;
3666 obj_buffer = BUFFER(pic_param->coded_buf);
3669 !obj_buffer->buffer_store ||
3670 !obj_buffer->buffer_store->bo)
3671 return VA_STATUS_ERROR_INVALID_PARAMETER;
3673 encode_state->coded_buf_object = obj_buffer;
3675 vp9_state->status_buffer.bo = obj_buffer->buffer_store->bo;
3677 encode_state->reconstructed_object = SURFACE(pic_param->reconstructed_frame);
3679 if (!encode_state->reconstructed_object ||
3680 !encode_state->input_yuv_object)
3681 return VA_STATUS_ERROR_INVALID_PARAMETER;
3683 vp9_state->curr_frame = pic_param->reconstructed_frame;
3684 vp9_state->ref_frame_flag = 0;
3685 if (pic_param->pic_flags.bits.frame_type == KEY_FRAME ||
3686 pic_param->pic_flags.bits.intra_only) {
3687 /* this will be regarded as I-frame type */
3688 vp9_state->picture_coding_type = 0;
3689 vp9_state->last_ref_obj = NULL;
3690 vp9_state->golden_ref_obj = NULL;
3691 vp9_state->alt_ref_obj = NULL;
3693 vp9_state->picture_coding_type = 1;
3694 vp9_state->ref_frame_flag = pic_param->ref_flags.bits.ref_frame_ctrl_l0 |
3695 pic_param->ref_flags.bits.ref_frame_ctrl_l1;
3697 obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx]);
3698 vp9_state->last_ref_obj = obj_surface;
3701 !obj_surface->private_data) {
3702 vp9_state->last_ref_obj = NULL;
3703 vp9_state->ref_frame_flag &= ~(VP9_LAST_REF);
3706 obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx]);
3707 vp9_state->golden_ref_obj = obj_surface;
3710 !obj_surface->private_data) {
3711 vp9_state->golden_ref_obj = NULL;
3712 vp9_state->ref_frame_flag &= ~(VP9_GOLDEN_REF);
3715 obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]);
3716 vp9_state->alt_ref_obj = obj_surface;
3719 !obj_surface->private_data) {
3720 vp9_state->alt_ref_obj = NULL;
3721 vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3724 /* remove the duplicated flag and ref frame list */
3725 if (vp9_state->ref_frame_flag & VP9_LAST_REF) {
3726 if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx] ==
3727 pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx]) {
3728 vp9_state->ref_frame_flag &= ~(VP9_GOLDEN_REF);
3729 vp9_state->golden_ref_obj = NULL;
3732 if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx] ==
3733 pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]) {
3734 vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3735 vp9_state->alt_ref_obj = NULL;
3739 if (vp9_state->ref_frame_flag & VP9_GOLDEN_REF) {
3740 if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx] ==
3741 pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]) {
3742 vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3743 vp9_state->alt_ref_obj = NULL;
3747 if (vp9_state->ref_frame_flag == 0)
3748 return VA_STATUS_ERROR_INVALID_PARAMETER;
3752 if (pic_param->pic_flags.bits.segmentation_enabled) {
3753 if (!encode_state->q_matrix ||
3754 !encode_state->q_matrix->buffer) {
3755 return VA_STATUS_ERROR_INVALID_PARAMETER;
3757 seg_param = (VAEncMiscParameterTypeVP9PerSegmantParam *)
3758 encode_state->q_matrix->buffer;
3762 if (encode_state->seq_param_ext &&
3763 encode_state->seq_param_ext->buffer)
3764 seq_param = (VAEncSequenceParameterBufferVP9 *)encode_state->seq_param_ext->buffer;
3767 seq_param = &vp9_state->bogus_seq_param;
3770 vp9_state->pic_param = pic_param;
3771 vp9_state->segment_param = seg_param;
3772 vp9_state->seq_param = seq_param;
3774 obj_surface = encode_state->reconstructed_object;
3775 if (pic_param->frame_width_dst > obj_surface->orig_width ||
3776 pic_param->frame_height_dst > obj_surface->orig_height)
3777 return VA_STATUS_ERROR_INVALID_SURFACE;
3779 if (!vp9_state->dys_enabled &&
3780 ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
3781 (pic_param->frame_height_src != pic_param->frame_height_dst)))
3782 return VA_STATUS_ERROR_UNIMPLEMENTED;
3784 if (vp9_state->brc_enabled) {
3785 if (vp9_state->first_frame || vp9_state->picture_coding_type == KEY_FRAME) {
3786 vp9_state->brc_reset = encoder_context->brc.need_reset || vp9_state->first_frame;
3788 if (!encoder_context->brc.framerate[0].num || !encoder_context->brc.framerate[0].den ||
3789 !encoder_context->brc.bits_per_second[0])
3790 return VA_STATUS_ERROR_INVALID_PARAMETER;
3792 vp9_state->gop_size = encoder_context->brc.gop_size;
3793 vp9_state->framerate = encoder_context->brc.framerate[0];
3795 if (encoder_context->rate_control_mode == VA_RC_CBR ||
3796 !encoder_context->brc.target_percentage[0]) {
3797 vp9_state->target_bit_rate = encoder_context->brc.bits_per_second[0];
3798 vp9_state->max_bit_rate = vp9_state->target_bit_rate;
3799 vp9_state->min_bit_rate = vp9_state->target_bit_rate;
3801 vp9_state->max_bit_rate = encoder_context->brc.bits_per_second[0];
3802 vp9_state->target_bit_rate = vp9_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
3803 if (2 * vp9_state->target_bit_rate < vp9_state->max_bit_rate)
3804 vp9_state->min_bit_rate = 0;
3806 vp9_state->min_bit_rate = 2 * vp9_state->target_bit_rate - vp9_state->max_bit_rate;
3809 if (encoder_context->brc.hrd_buffer_size)
3810 vp9_state->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;
3811 else if (encoder_context->brc.window_size)
3812 vp9_state->vbv_buffer_size_in_bit = (uint64_t)vp9_state->max_bit_rate * encoder_context->brc.window_size / 1000;
3814 vp9_state->vbv_buffer_size_in_bit = vp9_state->max_bit_rate;
3815 if (encoder_context->brc.hrd_initial_buffer_fullness)
3816 vp9_state->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;
3818 vp9_state->init_vbv_buffer_fullness_in_bit = vp9_state->vbv_buffer_size_in_bit / 2;
3822 vp9_state->frame_width = pic_param->frame_width_dst;
3823 vp9_state->frame_height = pic_param->frame_height_dst;
3825 vp9_state->frame_width_4x = ALIGN(vp9_state->frame_width / 4, 16);
3826 vp9_state->frame_height_4x = ALIGN(vp9_state->frame_height / 4, 16);
3828 vp9_state->frame_width_16x = ALIGN(vp9_state->frame_width / 16, 16);
3829 vp9_state->frame_height_16x = ALIGN(vp9_state->frame_height / 16, 16);
3831 vp9_state->frame_width_in_mb = ALIGN(vp9_state->frame_width, 16) / 16;
3832 vp9_state->frame_height_in_mb = ALIGN(vp9_state->frame_height, 16) / 16;
3834 vp9_state->downscaled_width_4x_in_mb = vp9_state->frame_width_4x / 16;
3835 vp9_state->downscaled_height_4x_in_mb = vp9_state->frame_height_4x / 16;
3836 vp9_state->downscaled_width_16x_in_mb = vp9_state->frame_width_16x / 16;
3837 vp9_state->downscaled_height_16x_in_mb = vp9_state->frame_height_16x / 16;
3839 vp9_state->dys_in_use = 0;
3840 if(pic_param->frame_width_src != pic_param->frame_width_dst ||
3841 pic_param->frame_height_src != pic_param->frame_height_dst)
3842 vp9_state->dys_in_use = 1;
3843 vp9_state->dys_ref_frame_flag = 0;
3844 /* check the dys setting. The dys is supported by default. */
3845 if (pic_param->pic_flags.bits.frame_type != KEY_FRAME &&
3846 !pic_param->pic_flags.bits.intra_only) {
3847 vp9_state->dys_ref_frame_flag = vp9_state->ref_frame_flag;
3849 if ((vp9_state->ref_frame_flag & VP9_LAST_REF) &&
3850 vp9_state->last_ref_obj) {
3851 obj_surface = vp9_state->last_ref_obj;
3852 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3854 if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
3855 vp9_state->frame_height == vp9_priv_surface->frame_height)
3856 vp9_state->dys_ref_frame_flag &= ~(VP9_LAST_REF);
3858 if ((vp9_state->ref_frame_flag & VP9_GOLDEN_REF) &&
3859 vp9_state->golden_ref_obj) {
3860 obj_surface = vp9_state->golden_ref_obj;
3861 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3863 if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
3864 vp9_state->frame_height == vp9_priv_surface->frame_height)
3865 vp9_state->dys_ref_frame_flag &= ~(VP9_GOLDEN_REF);
3867 if ((vp9_state->ref_frame_flag & VP9_ALT_REF) &&
3868 vp9_state->alt_ref_obj) {
3869 obj_surface = vp9_state->alt_ref_obj;
3870 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3872 if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
3873 vp9_state->frame_height == vp9_priv_surface->frame_height)
3874 vp9_state->dys_ref_frame_flag &= ~(VP9_ALT_REF);
3876 if (vp9_state->dys_ref_frame_flag)
3877 vp9_state->dys_in_use = 1;
3880 if (vp9_state->hme_supported) {
3881 vp9_state->hme_enabled = 1;
3883 vp9_state->hme_enabled = 0;
3886 if (vp9_state->b16xme_supported) {
3887 vp9_state->b16xme_enabled = 1;
3889 vp9_state->b16xme_enabled = 0;
3892 /* disable HME/16xME if the size is too small */
3893 if (vp9_state->frame_width_4x <= VP9_VME_REF_WIN ||
3894 vp9_state->frame_height_4x <= VP9_VME_REF_WIN) {
3895 vp9_state->hme_enabled = 0;
3896 vp9_state->b16xme_enabled = 0;
3899 if (vp9_state->frame_width_16x < VP9_VME_REF_WIN ||
3900 vp9_state->frame_height_16x < VP9_VME_REF_WIN)
3901 vp9_state->b16xme_enabled = 0;
3903 if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME ||
3904 pic_param->pic_flags.bits.intra_only) {
3905 vp9_state->hme_enabled = 0;
3906 vp9_state->b16xme_enabled = 0;
3909 vp9_state->mbenc_keyframe_dist_enabled = 0;
3910 if ((vp9_state->picture_coding_type == KEY_FRAME) &&
3911 vp9_state->brc_distortion_buffer_supported)
3912 vp9_state->mbenc_keyframe_dist_enabled = 1;
3914 return VA_STATUS_SUCCESS;
3918 gen9_vme_gpe_kernel_prepare_vp9(VADriverContextP ctx,
3919 struct encode_state *encode_state,
3920 struct intel_encoder_context *encoder_context)
3922 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
3923 struct vp9_surface_param surface_param;
3924 struct gen9_vp9_state *vp9_state;
3925 VAEncPictureParameterBufferVP9 *pic_param;
3926 struct object_surface *obj_surface;
3927 struct gen9_surface_vp9 *vp9_surface;
3928 int driver_header_flag = 0;
3931 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3933 if (!vp9_state || !vp9_state->pic_param)
3934 return VA_STATUS_ERROR_INVALID_PARAMETER;
3936 pic_param = vp9_state->pic_param;
3938 /* this is to check whether the driver should generate the uncompressed header */
3939 driver_header_flag = 1;
3940 if (encode_state->packed_header_data_ext &&
3941 encode_state->packed_header_data_ext[0] &&
3942 pic_param->bit_offset_first_partition_size) {
3943 VAEncPackedHeaderParameterBuffer *param = NULL;
3945 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_params_ext[0]->buffer;
3947 if (param->type == VAEncPackedHeaderRawData) {
3949 unsigned int length_in_bits;
3951 header_data = (char *)encode_state->packed_header_data_ext[0]->buffer;
3952 length_in_bits = param->bit_length;
3953 driver_header_flag = 0;
3955 vp9_state->frame_header.bit_offset_first_partition_size =
3956 pic_param->bit_offset_first_partition_size;
3957 vp9_state->header_length = ALIGN(length_in_bits, 8) >> 3;
3958 vp9_state->alias_insert_data = header_data;
3960 vp9_state->frame_header.bit_offset_ref_lf_delta = pic_param->bit_offset_ref_lf_delta;
3961 vp9_state->frame_header.bit_offset_mode_lf_delta = pic_param->bit_offset_mode_lf_delta;
3962 vp9_state->frame_header.bit_offset_lf_level = pic_param->bit_offset_lf_level;
3963 vp9_state->frame_header.bit_offset_qindex = pic_param->bit_offset_qindex;
3964 vp9_state->frame_header.bit_offset_segmentation = pic_param->bit_offset_segmentation;
3965 vp9_state->frame_header.bit_size_segmentation = pic_param->bit_size_segmentation;
3969 if (driver_header_flag) {
3970 memset(&vp9_state->frame_header, 0, sizeof(vp9_state->frame_header));
3971 intel_write_uncompressed_header(encode_state,
3972 VAProfileVP9Profile0,
3973 vme_context->frame_header_data,
3974 &vp9_state->header_length,
3975 &vp9_state->frame_header);
3976 vp9_state->alias_insert_data = vme_context->frame_header_data;
3979 va_status = i965_check_alloc_surface_bo(ctx, encode_state->input_yuv_object,
3980 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
3981 if (va_status != VA_STATUS_SUCCESS)
3984 va_status = i965_check_alloc_surface_bo(ctx, encode_state->reconstructed_object,
3985 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
3987 if (va_status != VA_STATUS_SUCCESS)
3990 surface_param.frame_width = vp9_state->frame_width;
3991 surface_param.frame_height = vp9_state->frame_height;
3992 va_status = gen9_vp9_init_check_surfaces(ctx,
3993 encode_state->reconstructed_object,
3997 vp9_surface = (struct gen9_surface_vp9*)encode_state->reconstructed_object;
3999 vp9_surface->qp_value = pic_param->luma_ac_qindex + pic_param->luma_dc_qindex_delta;
4001 if (vp9_state->dys_in_use &&
4002 (pic_param->frame_width_src != pic_param->frame_width_dst ||
4003 pic_param->frame_height_src != pic_param->frame_height_dst)) {
4004 surface_param.frame_width = pic_param->frame_width_dst;
4005 surface_param.frame_height = pic_param->frame_height_dst;
4006 va_status = gen9_vp9_check_dys_surfaces(ctx,
4007 encode_state->reconstructed_object,
4014 if (vp9_state->dys_ref_frame_flag) {
4015 if ((vp9_state->dys_ref_frame_flag & VP9_LAST_REF) &&
4016 vp9_state->last_ref_obj) {
4017 obj_surface = vp9_state->last_ref_obj;
4018 surface_param.frame_width = vp9_state->frame_width;
4019 surface_param.frame_height = vp9_state->frame_height;
4020 va_status = gen9_vp9_check_dys_surfaces(ctx,
4027 if ((vp9_state->dys_ref_frame_flag & VP9_GOLDEN_REF) &&
4028 vp9_state->golden_ref_obj) {
4029 obj_surface = vp9_state->golden_ref_obj;
4030 surface_param.frame_width = vp9_state->frame_width;
4031 surface_param.frame_height = vp9_state->frame_height;
4032 va_status = gen9_vp9_check_dys_surfaces(ctx,
4039 if ((vp9_state->dys_ref_frame_flag & VP9_ALT_REF) &&
4040 vp9_state->alt_ref_obj) {
4041 obj_surface = vp9_state->alt_ref_obj;
4042 surface_param.frame_width = vp9_state->frame_width;
4043 surface_param.frame_height = vp9_state->frame_height;
4044 va_status = gen9_vp9_check_dys_surfaces(ctx,
4053 if (va_status != VA_STATUS_SUCCESS)
4055 /* check the corresponding ref_frame_flag && dys_ref_frame_flag */
4057 return VA_STATUS_SUCCESS;
4061 gen9_vme_gpe_kernel_init_vp9(VADriverContextP ctx,
4062 struct encode_state *encode_state,
4063 struct intel_encoder_context *encoder_context)
4065 struct i965_driver_data *i965 = i965_driver_data(ctx);
4066 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4067 struct vp9_mbenc_context *mbenc_context = &vme_context->mbenc_context;
4068 struct vp9_dys_context *dys_context = &vme_context->dys_context;
4069 struct gpe_dynamic_state_parameter ds_param;
4073 * BRC will update MBEnc curbe data buffer, so initialize GPE context for
4076 for (i = 0; i < NUM_VP9_MBENC; i++) {
4077 gen8_gpe_context_init(ctx, &mbenc_context->gpe_contexts[i]);
4081 * VP9_MBENC_XXX uses the same dynamic state buffer as they share the same
4084 ds_param.bo_size = ALIGN(sizeof(vp9_mbenc_curbe_data), 64) + 128 +
4085 ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * NUM_VP9_MBENC;
4086 mbenc_context->mbenc_bo_dys = dri_bo_alloc(i965->intel.bufmgr,
4090 mbenc_context->mbenc_bo_size = ds_param.bo_size;
4092 ds_param.bo = mbenc_context->mbenc_bo_dys;
4093 ds_param.curbe_offset = 0;
4094 ds_param.sampler_offset = ALIGN(sizeof(vp9_mbenc_curbe_data), 64);
4095 for (i = 0; i < NUM_VP9_MBENC; i++) {
4096 ds_param.idrt_offset = ds_param.sampler_offset + 128 +
4097 ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * i;
4099 gen8_gpe_context_set_dynamic_buffer(ctx,
4100 &mbenc_context->gpe_contexts[i],
4104 gen8_gpe_context_init(ctx, &dys_context->gpe_context);
4105 gen9_vp9_dys_set_sampler_state(&dys_context->gpe_context);
4107 return VA_STATUS_SUCCESS;
4111 gen9_vme_gpe_kernel_final_vp9(VADriverContextP ctx,
4112 struct encode_state *encode_state,
4113 struct intel_encoder_context *encoder_context)
4115 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4116 struct vp9_mbenc_context *mbenc_context = &vme_context->mbenc_context;
4118 dri_bo_unreference(mbenc_context->mbenc_bo_dys);
4119 mbenc_context->mbenc_bo_dys = NULL;
4121 return VA_STATUS_SUCCESS;
4125 gen9_vme_gpe_kernel_run_vp9(VADriverContextP ctx,
4126 struct encode_state *encode_state,
4127 struct intel_encoder_context *encoder_context)
4129 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4130 struct gen9_vp9_state *vp9_state;
4133 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
4135 if (!vp9_state || !vp9_state->pic_param)
4136 return VA_STATUS_ERROR_INVALID_PARAMETER;
4138 if (vp9_state->dys_in_use) {
4139 gen9_vp9_run_dys_refframes(ctx, encode_state, encoder_context);
4142 if (vp9_state->brc_enabled && (vp9_state->brc_reset || !vp9_state->brc_inited)) {
4143 gen9_vp9_brc_init_reset_kernel(ctx, encode_state, encoder_context);
4146 if (vp9_state->picture_coding_type == KEY_FRAME) {
4147 for (i = 0; i < 2; i++)
4148 i965_zero_gpe_resource(&vme_context->res_mode_decision[i]);
4151 if (vp9_state->hme_supported) {
4152 gen9_vp9_scaling_kernel(ctx, encode_state,
4155 if (vp9_state->b16xme_supported) {
4156 gen9_vp9_scaling_kernel(ctx, encode_state,
4162 if (vp9_state->picture_coding_type && vp9_state->hme_enabled) {
4163 if (vp9_state->b16xme_enabled)
4164 gen9_vp9_me_kernel(ctx, encode_state,
4168 gen9_vp9_me_kernel(ctx, encode_state,
4173 if (vp9_state->brc_enabled) {
4174 if (vp9_state->mbenc_keyframe_dist_enabled)
4175 gen9_vp9_brc_intra_dist_kernel(ctx,
4179 gen9_vp9_brc_update_kernel(ctx, encode_state,
4183 if (vp9_state->picture_coding_type == KEY_FRAME) {
4184 gen9_vp9_mbenc_kernel(ctx, encode_state,
4186 VP9_MEDIA_STATE_MBENC_I_32x32);
4187 gen9_vp9_mbenc_kernel(ctx, encode_state,
4189 VP9_MEDIA_STATE_MBENC_I_16x16);
4191 gen9_vp9_mbenc_kernel(ctx, encode_state,
4193 VP9_MEDIA_STATE_MBENC_P);
4196 gen9_vp9_mbenc_kernel(ctx, encode_state,
4198 VP9_MEDIA_STATE_MBENC_TX);
4200 vp9_state->curr_mode_decision_index ^= 1;
4201 if (vp9_state->brc_enabled) {
4202 vp9_state->brc_inited = 1;
4203 vp9_state->brc_reset = 0;
4206 return VA_STATUS_SUCCESS;
4210 gen9_vme_pipeline_vp9(VADriverContextP ctx,
4212 struct encode_state *encode_state,
4213 struct intel_encoder_context *encoder_context)
4216 struct gen9_vp9_state *vp9_state;
4218 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
4221 return VA_STATUS_ERROR_INVALID_CONTEXT;
4223 va_status = gen9_encode_vp9_check_parameter(ctx, encode_state, encoder_context);
4224 if (va_status != VA_STATUS_SUCCESS)
4227 va_status = gen9_vp9_allocate_resources(ctx, encode_state,
4229 !vp9_state->brc_allocated);
4231 if (va_status != VA_STATUS_SUCCESS)
4233 vp9_state->brc_allocated = 1;
4235 va_status = gen9_vme_gpe_kernel_prepare_vp9(ctx, encode_state, encoder_context);
4237 if (va_status != VA_STATUS_SUCCESS)
4240 va_status = gen9_vme_gpe_kernel_init_vp9(ctx, encode_state, encoder_context);
4241 if (va_status != VA_STATUS_SUCCESS)
4244 va_status = gen9_vme_gpe_kernel_run_vp9(ctx, encode_state, encoder_context);
4245 if (va_status != VA_STATUS_SUCCESS)
4248 gen9_vme_gpe_kernel_final_vp9(ctx, encode_state, encoder_context);
4250 return VA_STATUS_SUCCESS;
4254 gen9_vme_brc_context_destroy_vp9(struct vp9_brc_context *brc_context)
4258 for (i = 0; i < NUM_VP9_BRC; i++)
4259 gen8_gpe_context_destroy(&brc_context->gpe_contexts[i]);
4263 gen9_vme_scaling_context_destroy_vp9(struct vp9_scaling_context *scaling_context)
4267 for (i = 0; i < NUM_VP9_SCALING; i++)
4268 gen8_gpe_context_destroy(&scaling_context->gpe_contexts[i]);
4272 gen9_vme_me_context_destroy_vp9(struct vp9_me_context *me_context)
4274 gen8_gpe_context_destroy(&me_context->gpe_context);
4278 gen9_vme_mbenc_context_destroy_vp9(struct vp9_mbenc_context *mbenc_context)
4282 for (i = 0; i < NUM_VP9_MBENC; i++)
4283 gen8_gpe_context_destroy(&mbenc_context->gpe_contexts[i]);
4284 dri_bo_unreference(mbenc_context->mbenc_bo_dys);
4285 mbenc_context->mbenc_bo_size = 0;
4289 gen9_vme_dys_context_destroy_vp9(struct vp9_dys_context *dys_context)
4291 gen8_gpe_context_destroy(&dys_context->gpe_context);
4295 gen9_vme_kernel_context_destroy_vp9(struct gen9_encoder_context_vp9 *vme_context)
4297 gen9_vp9_free_resources(vme_context);
4298 gen9_vme_scaling_context_destroy_vp9(&vme_context->scaling_context);
4299 gen9_vme_me_context_destroy_vp9(&vme_context->me_context);
4300 gen9_vme_mbenc_context_destroy_vp9(&vme_context->mbenc_context);
4301 gen9_vme_brc_context_destroy_vp9(&vme_context->brc_context);
4302 gen9_vme_dys_context_destroy_vp9(&vme_context->dys_context);
4308 gen9_vme_context_destroy_vp9(void *context)
4310 struct gen9_encoder_context_vp9 *vme_context = context;
4315 gen9_vme_kernel_context_destroy_vp9(vme_context);
4323 gen9_vme_scaling_context_init_vp9(VADriverContextP ctx,
4324 struct gen9_encoder_context_vp9 *vme_context,
4325 struct vp9_scaling_context *scaling_context)
4327 struct i965_gpe_context *gpe_context = NULL;
4328 struct vp9_encoder_kernel_parameter kernel_param;
4329 struct vp9_encoder_scoreboard_parameter scoreboard_param;
4330 struct i965_kernel scale_kernel;
4332 kernel_param.curbe_size = sizeof(vp9_scaling4x_curbe_data_cm);
4333 kernel_param.inline_data_size = sizeof(vp9_scaling4x_inline_data_cm);
4334 kernel_param.sampler_size = 0;
4336 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4337 scoreboard_param.mask = 0xFF;
4338 scoreboard_param.enable = vme_context->use_hw_scoreboard;
4339 scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4340 scoreboard_param.walkpat_flag = 0;
4342 gpe_context = &scaling_context->gpe_contexts[0];
4343 gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4344 gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4346 scaling_context->scaling_4x_bti.scaling_frame_src_y = VP9_BTI_SCALING_FRAME_SRC_Y;
4347 scaling_context->scaling_4x_bti.scaling_frame_dst_y = VP9_BTI_SCALING_FRAME_DST_Y;
4348 scaling_context->scaling_4x_bti.scaling_frame_mbv_proc_stat_dst =
4349 VP9_BTI_SCALING_FRAME_MBVPROCSTATS_DST_CM;
4351 memset(&scale_kernel, 0, sizeof(scale_kernel));
4353 intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4354 sizeof(media_vp9_kernels),
4355 INTEL_VP9_ENC_SCALING4X,
4359 gen8_gpe_load_kernels(ctx,
4364 kernel_param.curbe_size = sizeof(vp9_scaling2x_curbe_data_cm);
4365 kernel_param.inline_data_size = 0;
4366 kernel_param.sampler_size = 0;
4368 gpe_context = &scaling_context->gpe_contexts[1];
4369 gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4370 gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4372 memset(&scale_kernel, 0, sizeof(scale_kernel));
4374 intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4375 sizeof(media_vp9_kernels),
4376 INTEL_VP9_ENC_SCALING2X,
4380 gen8_gpe_load_kernels(ctx,
4385 scaling_context->scaling_2x_bti.scaling_frame_src_y = VP9_BTI_SCALING_FRAME_SRC_Y;
4386 scaling_context->scaling_2x_bti.scaling_frame_dst_y = VP9_BTI_SCALING_FRAME_DST_Y;
4391 gen9_vme_me_context_init_vp9(VADriverContextP ctx,
4392 struct gen9_encoder_context_vp9 *vme_context,
4393 struct vp9_me_context *me_context)
4395 struct i965_gpe_context *gpe_context = NULL;
4396 struct vp9_encoder_kernel_parameter kernel_param;
4397 struct vp9_encoder_scoreboard_parameter scoreboard_param;
4398 struct i965_kernel scale_kernel;
4400 kernel_param.curbe_size = sizeof(vp9_me_curbe_data);
4401 kernel_param.inline_data_size = 0;
4402 kernel_param.sampler_size = 0;
4404 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4405 scoreboard_param.mask = 0xFF;
4406 scoreboard_param.enable = vme_context->use_hw_scoreboard;
4407 scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4408 scoreboard_param.walkpat_flag = 0;
4410 gpe_context = &me_context->gpe_context;
4411 gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4412 gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4414 memset(&scale_kernel, 0, sizeof(scale_kernel));
4416 intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4417 sizeof(media_vp9_kernels),
4422 gen8_gpe_load_kernels(ctx,
4431 gen9_vme_mbenc_context_init_vp9(VADriverContextP ctx,
4432 struct gen9_encoder_context_vp9 *vme_context,
4433 struct vp9_mbenc_context *mbenc_context)
4435 struct i965_gpe_context *gpe_context = NULL;
4436 struct vp9_encoder_kernel_parameter kernel_param;
4437 struct vp9_encoder_scoreboard_parameter scoreboard_param;
4439 struct i965_kernel scale_kernel;
4441 kernel_param.curbe_size = sizeof(vp9_mbenc_curbe_data);
4442 kernel_param.inline_data_size = 0;
4443 kernel_param.sampler_size = 0;
4445 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4446 scoreboard_param.mask = 0xFF;
4447 scoreboard_param.enable = vme_context->use_hw_scoreboard;
4448 scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4450 for (i = 0; i < NUM_VP9_MBENC; i++) {
4451 gpe_context = &mbenc_context->gpe_contexts[i];
4453 if ((i == VP9_MBENC_IDX_KEY_16x16) ||
4454 (i == VP9_MBENC_IDX_INTER)) {
4455 scoreboard_param.walkpat_flag = 1;
4457 scoreboard_param.walkpat_flag = 0;
4459 gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4460 gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4462 memset(&scale_kernel, 0, sizeof(scale_kernel));
4464 intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4465 sizeof(media_vp9_kernels),
4466 INTEL_VP9_ENC_MBENC,
4470 gen8_gpe_load_kernels(ctx,
4478 gen9_vme_brc_context_init_vp9(VADriverContextP ctx,
4479 struct gen9_encoder_context_vp9 *vme_context,
4480 struct vp9_brc_context *brc_context)
4482 struct i965_gpe_context *gpe_context = NULL;
4483 struct vp9_encoder_kernel_parameter kernel_param;
4484 struct vp9_encoder_scoreboard_parameter scoreboard_param;
4486 struct i965_kernel scale_kernel;
4488 kernel_param.curbe_size = sizeof(vp9_brc_curbe_data);
4489 kernel_param.inline_data_size = 0;
4490 kernel_param.sampler_size = 0;
4492 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4493 scoreboard_param.mask = 0xFF;
4494 scoreboard_param.enable = vme_context->use_hw_scoreboard;
4495 scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4497 for (i = 0; i < NUM_VP9_BRC; i++) {
4498 gpe_context = &brc_context->gpe_contexts[i];
4499 gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4500 gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4502 memset(&scale_kernel, 0, sizeof(scale_kernel));
4504 intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4505 sizeof(media_vp9_kernels),
4510 gen8_gpe_load_kernels(ctx,
4518 gen9_vme_dys_context_init_vp9(VADriverContextP ctx,
4519 struct gen9_encoder_context_vp9 *vme_context,
4520 struct vp9_dys_context *dys_context)
4522 struct i965_gpe_context *gpe_context = NULL;
4523 struct vp9_encoder_kernel_parameter kernel_param;
4524 struct vp9_encoder_scoreboard_parameter scoreboard_param;
4525 struct i965_kernel scale_kernel;
4527 kernel_param.curbe_size = sizeof(vp9_dys_curbe_data);
4528 kernel_param.inline_data_size = 0;
4529 kernel_param.sampler_size = sizeof(struct gen9_sampler_8x8_avs);
4531 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4532 scoreboard_param.mask = 0xFF;
4533 scoreboard_param.enable = vme_context->use_hw_scoreboard;
4534 scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4535 scoreboard_param.walkpat_flag = 0;
4537 gpe_context = &dys_context->gpe_context;
4538 gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4539 gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4541 memset(&scale_kernel, 0, sizeof(scale_kernel));
4543 intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4544 sizeof(media_vp9_kernels),
4549 gen8_gpe_load_kernels(ctx,
4558 gen9_vme_kernels_context_init_vp9(VADriverContextP ctx,
4559 struct intel_encoder_context *encoder_context,
4560 struct gen9_encoder_context_vp9 *vme_context)
4562 gen9_vme_scaling_context_init_vp9(ctx, vme_context, &vme_context->scaling_context);
4563 gen9_vme_me_context_init_vp9(ctx, vme_context, &vme_context->me_context);
4564 gen9_vme_mbenc_context_init_vp9(ctx, vme_context, &vme_context->mbenc_context);
4565 gen9_vme_dys_context_init_vp9(ctx, vme_context, &vme_context->dys_context);
4566 gen9_vme_brc_context_init_vp9(ctx, vme_context, &vme_context->brc_context);
4568 vme_context->pfn_set_curbe_brc = gen9_vp9_set_curbe_brc;
4569 vme_context->pfn_set_curbe_me = gen9_vp9_set_curbe_me;
4570 vme_context->pfn_send_me_surface = gen9_vp9_send_me_surface;
4571 vme_context->pfn_send_scaling_surface = gen9_vp9_send_scaling_surface;
4573 vme_context->pfn_set_curbe_scaling = gen9_vp9_set_curbe_scaling_cm;
4575 vme_context->pfn_send_dys_surface = gen9_vp9_send_dys_surface;
4576 vme_context->pfn_set_curbe_dys = gen9_vp9_set_curbe_dys;
4577 vme_context->pfn_set_curbe_mbenc = gen9_vp9_set_curbe_mbenc;
4578 vme_context->pfn_send_mbenc_surface = gen9_vp9_send_mbenc_surface;
4583 void gen9_vp9_write_compressed_element(char *buffer,
4588 struct vp9_compressed_element *base_element, *vp9_element;
4589 base_element = (struct vp9_compressed_element *)buffer;
4591 vp9_element = base_element + (index >> 1);
4593 vp9_element->b_valid = 1;
4594 vp9_element->b_probdiff_select = 1;
4595 vp9_element->b_prob_select = (prob == 252) ? 1: 0;
4596 vp9_element->b_bin = value;
4598 vp9_element->a_valid = 1;
4599 vp9_element->a_probdiff_select = 1;
4600 vp9_element->a_prob_select = (prob == 252) ? 1: 0;
4601 vp9_element->a_bin = value;
4606 intel_vp9enc_refresh_frame_internal_buffers(VADriverContextP ctx,
4607 struct intel_encoder_context *encoder_context)
4609 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
4610 VAEncPictureParameterBufferVP9 *pic_param;
4611 struct gen9_vp9_state *vp9_state;
4615 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
4617 if (!pak_context || !vp9_state || !vp9_state->pic_param)
4620 pic_param = vp9_state->pic_param;
4621 if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
4622 (pic_param->pic_flags.bits.intra_only) ||
4623 pic_param->pic_flags.bits.error_resilient_mode) {
4624 /* reset current frame_context */
4625 intel_init_default_vp9_probs(&vp9_state->vp9_current_fc);
4626 if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
4627 pic_param->pic_flags.bits.error_resilient_mode ||
4628 (pic_param->pic_flags.bits.reset_frame_context == 3)) {
4629 for (i = 0; i < 4; i++)
4630 memcpy(&vp9_state->vp9_frame_ctx[i],
4631 &vp9_state->vp9_current_fc,
4632 sizeof(FRAME_CONTEXT));
4633 } else if (pic_param->pic_flags.bits.reset_frame_context == 2) {
4634 i = pic_param->pic_flags.bits.frame_context_idx;
4635 memcpy(&vp9_state->vp9_frame_ctx[i],
4636 &vp9_state->vp9_current_fc, sizeof(FRAME_CONTEXT));
4638 /* reset the frame_ctx_idx = 0 */
4639 vp9_state->frame_ctx_idx = 0;
4641 vp9_state->frame_ctx_idx = pic_param->pic_flags.bits.frame_context_idx;
4644 i965_zero_gpe_resource(&pak_context->res_compressed_input_buffer);
4645 buffer = i965_map_gpe_resource(&pak_context->res_compressed_input_buffer);
4651 if ((pic_param->luma_ac_qindex == 0) &&
4652 (pic_param->luma_dc_qindex_delta == 0) &&
4653 (pic_param->chroma_ac_qindex_delta == 0) &&
4654 (pic_param->chroma_dc_qindex_delta == 0)) {
4656 /* nothing is needed */
4657 gen9_vp9_write_compressed_element(buffer,
4659 gen9_vp9_write_compressed_element(buffer,
4661 gen9_vp9_write_compressed_element(buffer,
4664 if (vp9_state->tx_mode == TX_MODE_SELECT) {
4665 gen9_vp9_write_compressed_element(buffer,
4667 gen9_vp9_write_compressed_element(buffer,
4669 gen9_vp9_write_compressed_element(buffer,
4671 } else if (vp9_state->tx_mode == ALLOW_32X32) {
4672 gen9_vp9_write_compressed_element(buffer,
4674 gen9_vp9_write_compressed_element(buffer,
4676 gen9_vp9_write_compressed_element(buffer,
4679 unsigned int tx_mode;
4681 tx_mode = vp9_state->tx_mode;
4682 gen9_vp9_write_compressed_element(buffer,
4683 0, 128, ((tx_mode) & 2));
4684 gen9_vp9_write_compressed_element(buffer,
4685 1, 128, ((tx_mode) & 1));
4686 gen9_vp9_write_compressed_element(buffer,
4690 if (vp9_state->tx_mode == TX_MODE_SELECT) {
4692 gen9_vp9_write_compressed_element(buffer,
4695 gen9_vp9_write_compressed_element(buffer,
4698 gen9_vp9_write_compressed_element(buffer,
4702 /*Setup all the input&output object*/
4705 /* update the coeff_update flag */
4706 gen9_vp9_write_compressed_element(buffer,
4708 gen9_vp9_write_compressed_element(buffer,
4710 gen9_vp9_write_compressed_element(buffer,
4712 gen9_vp9_write_compressed_element(buffer,
4717 if (pic_param->pic_flags.bits.frame_type && !pic_param->pic_flags.bits.intra_only)
4719 bool allow_comp = !(
4720 (pic_param->ref_flags.bits.ref_last_sign_bias && pic_param->ref_flags.bits.ref_gf_sign_bias && pic_param->ref_flags.bits.ref_arf_sign_bias) ||
4721 (!pic_param->ref_flags.bits.ref_last_sign_bias && !pic_param->ref_flags.bits.ref_gf_sign_bias && !pic_param->ref_flags.bits.ref_arf_sign_bias)
4726 if (pic_param->pic_flags.bits.comp_prediction_mode == REFERENCE_MODE_SELECT) {
4727 gen9_vp9_write_compressed_element(buffer,
4729 gen9_vp9_write_compressed_element(buffer,
4732 else if (pic_param->pic_flags.bits.comp_prediction_mode == COMPOUND_REFERENCE) {
4733 gen9_vp9_write_compressed_element(buffer,
4735 gen9_vp9_write_compressed_element(buffer,
4740 gen9_vp9_write_compressed_element(buffer,
4742 gen9_vp9_write_compressed_element(buffer,
4748 i965_unmap_gpe_resource(&pak_context->res_compressed_input_buffer);
4753 gen9_pak_vp9_pipe_mode_select(VADriverContextP ctx,
4754 struct encode_state *encode_state,
4755 struct intel_encoder_context *encoder_context,
4756 struct gen9_hcpe_pipe_mode_select_param *pipe_mode_param)
4758 struct intel_batchbuffer *batch = encoder_context->base.batch;
4760 BEGIN_BCS_BATCH(batch, 6);
4762 OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (6 - 2));
4763 OUT_BCS_BATCH(batch,
4764 (pipe_mode_param->stream_out << 12) |
4765 (pipe_mode_param->codec_mode << 5) |
4766 (0 << 3) | /* disable Pic Status / Error Report */
4767 (pipe_mode_param->stream_out << 2) |
4768 HCP_CODEC_SELECT_ENCODE);
4769 OUT_BCS_BATCH(batch, 0);
4770 OUT_BCS_BATCH(batch, 0);
4771 OUT_BCS_BATCH(batch, (1 << 6));
4772 OUT_BCS_BATCH(batch, 0);
4774 ADVANCE_BCS_BATCH(batch);
4778 gen9_vp9_add_surface_state(VADriverContextP ctx,
4779 struct encode_state *encode_state,
4780 struct intel_encoder_context *encoder_context,
4781 hcp_surface_state *hcp_state)
4783 struct intel_batchbuffer *batch = encoder_context->base.batch;
4787 BEGIN_BCS_BATCH(batch, 3);
4788 OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
4789 OUT_BCS_BATCH(batch,
4790 (hcp_state->dw1.surface_id << 28) |
4791 (hcp_state->dw1.surface_pitch - 1)
4793 OUT_BCS_BATCH(batch,
4794 (hcp_state->dw2.surface_format << 28) |
4795 (hcp_state->dw2.y_cb_offset)
4797 ADVANCE_BCS_BATCH(batch);
4801 gen9_pak_vp9_pipe_buf_addr_state(VADriverContextP ctx,
4802 struct encode_state *encode_state,
4803 struct intel_encoder_context *encoder_context)
4805 struct i965_driver_data *i965 = i965_driver_data(ctx);
4806 struct intel_batchbuffer *batch = encoder_context->base.batch;
4807 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
4808 struct gen9_vp9_state *vp9_state;
4810 struct object_surface *obj_surface;
4812 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
4814 if (!vp9_state || !vp9_state->pic_param)
4818 BEGIN_BCS_BATCH(batch, 104);
4820 OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (104 - 2));
4822 obj_surface = encode_state->reconstructed_object;
4824 /* reconstructed obj_surface is already checked. So this is skipped */
4825 /* DW 1..3 decoded surface */
4828 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4830 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4832 /* DW 4..6 deblocking line */
4834 pak_context->res_deblocking_filter_line_buffer.bo,
4835 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4837 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4839 /* DW 7..9 deblocking tile line */
4841 pak_context->res_deblocking_filter_tile_line_buffer.bo,
4842 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4844 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4846 /* DW 10..12 deblocking tile col */
4848 pak_context->res_deblocking_filter_tile_col_buffer.bo,
4849 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4851 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4853 /* DW 13..15 metadata line */
4855 pak_context->res_metadata_line_buffer.bo,
4856 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4858 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4860 /* DW 16..18 metadata tile line */
4862 pak_context->res_metadata_tile_line_buffer.bo,
4863 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4865 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4867 /* DW 19..21 metadata tile col */
4869 pak_context->res_metadata_tile_col_buffer.bo,
4870 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4872 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4874 /* DW 22..30 SAO is not used for VP9 */
4875 OUT_BCS_BATCH(batch, 0);
4876 OUT_BCS_BATCH(batch, 0);
4877 OUT_BCS_BATCH(batch, 0);
4878 OUT_BCS_BATCH(batch, 0);
4879 OUT_BCS_BATCH(batch, 0);
4880 OUT_BCS_BATCH(batch, 0);
4881 OUT_BCS_BATCH(batch, 0);
4882 OUT_BCS_BATCH(batch, 0);
4883 OUT_BCS_BATCH(batch, 0);
4885 /* DW 31..33 Current Motion vector temporal buffer */
4887 pak_context->res_mv_temporal_buffer[vp9_state->curr_mv_temporal_index].bo,
4888 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4890 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4892 /* DW 34..36 Not used */
4893 OUT_BCS_BATCH(batch, 0);
4894 OUT_BCS_BATCH(batch, 0);
4895 OUT_BCS_BATCH(batch, 0);
4897 /* Only the first three reference_frame is used for VP9 */
4898 /* DW 37..52 for reference_frame */
4900 if (vp9_state->picture_coding_type) {
4901 for (i = 0; i < 3; i++) {
4903 if (pak_context->reference_surfaces[i].bo) {
4905 pak_context->reference_surfaces[i].bo,
4906 I915_GEM_DOMAIN_INSTRUCTION, 0,
4909 OUT_BCS_BATCH(batch, 0);
4910 OUT_BCS_BATCH(batch, 0);
4915 for (; i < 8; i++) {
4916 OUT_BCS_BATCH(batch, 0);
4917 OUT_BCS_BATCH(batch, 0);
4920 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4922 /* DW 54..56 for source input */
4924 pak_context->uncompressed_picture_source.bo,
4925 I915_GEM_DOMAIN_INSTRUCTION, 0,
4927 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4929 /* DW 57..59 StreamOut is not used */
4930 OUT_BCS_BATCH(batch, 0);
4931 OUT_BCS_BATCH(batch, 0);
4932 OUT_BCS_BATCH(batch, 0);
4934 /* DW 60..62. Not used for encoder */
4935 OUT_BCS_BATCH(batch, 0);
4936 OUT_BCS_BATCH(batch, 0);
4937 OUT_BCS_BATCH(batch, 0);
4939 /* DW 63..65. ILDB Not used for encoder */
4940 OUT_BCS_BATCH(batch, 0);
4941 OUT_BCS_BATCH(batch, 0);
4942 OUT_BCS_BATCH(batch, 0);
4944 /* DW 66..81 For the collocated motion vector temporal buffer */
4945 if (vp9_state->picture_coding_type) {
4946 int prev_index = vp9_state->curr_mv_temporal_index ^ 0x01;
4948 pak_context->res_mv_temporal_buffer[prev_index].bo,
4949 I915_GEM_DOMAIN_INSTRUCTION, 0,
4952 OUT_BCS_BATCH(batch, 0);
4953 OUT_BCS_BATCH(batch, 0);
4956 for (i = 1; i < 8; i++) {
4957 OUT_BCS_BATCH(batch, 0);
4958 OUT_BCS_BATCH(batch, 0);
4960 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4962 /* DW 83..85 VP9 prob buffer */
4964 pak_context->res_prob_buffer.bo,
4965 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4968 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4970 /* DW 86..88 Segment id buffer */
4971 if (pak_context->res_segmentid_buffer.bo) {
4973 pak_context->res_segmentid_buffer.bo,
4974 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4977 OUT_BCS_BATCH(batch, 0);
4978 OUT_BCS_BATCH(batch, 0);
4980 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4982 /* DW 89..91 HVD line rowstore buffer */
4984 pak_context->res_hvd_line_buffer.bo,
4985 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4987 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4989 /* DW 92..94 HVD tile line rowstore buffer */
4991 pak_context->res_hvd_tile_line_buffer.bo,
4992 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4994 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4996 /* DW 95..97 SAO streamout. Not used for VP9 */
4997 OUT_BCS_BATCH(batch, 0);
4998 OUT_BCS_BATCH(batch, 0);
4999 OUT_BCS_BATCH(batch, 0);
5001 /* reserved for KBL. 98..100 */
5002 OUT_BCS_BATCH(batch, 0);
5003 OUT_BCS_BATCH(batch, 0);
5004 OUT_BCS_BATCH(batch, 0);
5007 OUT_BCS_BATCH(batch, 0);
5008 OUT_BCS_BATCH(batch, 0);
5009 OUT_BCS_BATCH(batch, 0);
5011 ADVANCE_BCS_BATCH(batch);
5015 gen9_pak_vp9_ind_obj_base_addr_state(VADriverContextP ctx,
5016 struct encode_state *encode_state,
5017 struct intel_encoder_context *encoder_context)
5019 struct i965_driver_data *i965 = i965_driver_data(ctx);
5020 struct intel_batchbuffer *batch = encoder_context->base.batch;
5021 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5022 struct gen9_vp9_state *vp9_state;
5024 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5027 BEGIN_BCS_BATCH(batch, 29);
5029 OUT_BCS_BATCH(batch, HCP_IND_OBJ_BASE_ADDR_STATE | (29 - 2));
5031 /* indirect bitstream object base */
5032 OUT_BCS_BATCH(batch, 0);
5033 OUT_BCS_BATCH(batch, 0);
5034 OUT_BCS_BATCH(batch, 0);
5035 /* the upper bound of indirect bitstream object */
5036 OUT_BCS_BATCH(batch, 0);
5037 OUT_BCS_BATCH(batch, 0);
5039 /* DW 6: Indirect CU object base address */
5041 pak_context->res_mb_code_surface.bo,
5042 I915_GEM_DOMAIN_INSTRUCTION, 0, /* No write domain */
5043 vp9_state->mb_data_offset);
5044 /* default attribute */
5045 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5047 /* DW 9..11, PAK-BSE */
5049 pak_context->indirect_pak_bse_object.bo,
5050 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5051 pak_context->indirect_pak_bse_object.offset);
5052 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5054 /* DW 12..13 upper bound */
5056 pak_context->indirect_pak_bse_object.bo,
5057 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5058 pak_context->indirect_pak_bse_object.end_offset);
5060 /* DW 14..16 compressed header buffer */
5062 pak_context->res_compressed_input_buffer.bo,
5063 I915_GEM_DOMAIN_INSTRUCTION, 0,
5065 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5067 /* DW 17..19 prob counter streamout */
5069 pak_context->res_prob_counter_buffer.bo,
5070 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5072 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5074 /* DW 20..22 prob delta streamin */
5076 pak_context->res_prob_delta_buffer.bo,
5077 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5079 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5081 /* DW 23..25 Tile record streamout */
5083 pak_context->res_tile_record_streamout_buffer.bo,
5084 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5086 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5088 /* DW 26..28 CU record streamout */
5090 pak_context->res_cu_stat_streamout_buffer.bo,
5091 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5093 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5095 ADVANCE_BCS_BATCH(batch);
5099 gen9_pak_vp9_segment_state(VADriverContextP ctx,
5100 struct encode_state *encode_state,
5101 struct intel_encoder_context *encoder_context,
5102 VAEncSegParamVP9 *seg_param, uint8_t seg_id)
5104 struct intel_batchbuffer *batch = encoder_context->base.batch;
5105 uint32_t batch_value, tmp;
5106 VAEncPictureParameterBufferVP9 *pic_param;
5108 if (!encode_state->pic_param_ext ||
5109 !encode_state->pic_param_ext->buffer) {
5113 pic_param = (VAEncPictureParameterBufferVP9 *)encode_state->pic_param_ext->buffer;
5115 batch_value = seg_param->seg_flags.bits.segment_reference;
5116 if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME ||
5117 pic_param->pic_flags.bits.intra_only)
5120 BEGIN_BCS_BATCH(batch, 8);
5122 OUT_BCS_BATCH(batch, HCP_VP9_SEGMENT_STATE | (8 - 2));
5123 OUT_BCS_BATCH(batch, seg_id << 0); /* DW 1 - SegmentID */
5124 OUT_BCS_BATCH(batch,
5125 (seg_param->seg_flags.bits.segment_reference_enabled << 3) |
5126 (batch_value << 1) |
5127 (seg_param->seg_flags.bits.segment_reference_skipped <<0)
5130 /* DW 3..6 is not used for encoder */
5131 OUT_BCS_BATCH(batch, 0);
5132 OUT_BCS_BATCH(batch, 0);
5133 OUT_BCS_BATCH(batch, 0);
5134 OUT_BCS_BATCH(batch, 0);
5137 tmp = intel_convert_sign_mag(seg_param->segment_qindex_delta, 9);
5139 tmp = intel_convert_sign_mag(seg_param->segment_lf_level_delta, 7);
5140 batch_value |= (tmp << 16);
5141 OUT_BCS_BATCH(batch, batch_value);
5143 ADVANCE_BCS_BATCH(batch);
5148 intel_vp9enc_construct_pak_insertobj_batchbuffer(VADriverContextP ctx,
5149 struct intel_encoder_context *encoder_context,
5150 struct i965_gpe_resource *obj_batch_buffer)
5152 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5153 struct gen9_vp9_state *vp9_state;
5154 int uncompressed_header_length;
5155 unsigned int *cmd_ptr;
5156 unsigned int dw_length, bits_in_last_dw;
5158 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5160 if (!pak_context || !vp9_state || !vp9_state->pic_param)
5163 uncompressed_header_length = vp9_state->header_length;
5164 cmd_ptr = i965_map_gpe_resource(obj_batch_buffer);
5169 bits_in_last_dw = uncompressed_header_length % 4;
5170 bits_in_last_dw *= 8;
5172 if (bits_in_last_dw == 0)
5173 bits_in_last_dw = 32;
5175 /* get the DWORD length of the inserted_data */
5176 dw_length = ALIGN(uncompressed_header_length, 4) / 4;
5177 *cmd_ptr++ = HCP_INSERT_PAK_OBJECT | dw_length;
5179 *cmd_ptr++ = ((0 << 31) | /* indirect payload */
5180 (0 << 16) | /* the start offset in first DW */
5182 (bits_in_last_dw << 8) | /* bits_in_last_dw */
5183 (0 << 4) | /* skip emulation byte count. 0 for VP9 */
5184 (0 << 3) | /* emulation flag. 0 for VP9 */
5185 (1 << 2) | /* last header flag. */
5187 memcpy(cmd_ptr, vp9_state->alias_insert_data, dw_length * sizeof(unsigned int));
5189 cmd_ptr += dw_length;
5191 *cmd_ptr++ = MI_NOOP;
5192 *cmd_ptr++ = MI_BATCH_BUFFER_END;
5193 i965_unmap_gpe_resource(obj_batch_buffer);
5197 gen9_vp9_pak_picture_level(VADriverContextP ctx,
5198 struct encode_state *encode_state,
5199 struct intel_encoder_context *encoder_context)
5201 struct intel_batchbuffer *batch = encoder_context->base.batch;
5202 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5203 struct object_surface *obj_surface;
5204 VAEncPictureParameterBufferVP9 *pic_param;
5205 VAEncMiscParameterTypeVP9PerSegmantParam *seg_param, tmp_seg_param;
5206 struct gen9_vp9_state *vp9_state;
5207 struct gen9_surface_vp9 *vp9_priv_surface;
5209 struct gen9_hcpe_pipe_mode_select_param mode_param;
5210 hcp_surface_state hcp_surface;
5211 struct gpe_mi_batch_buffer_start_parameter second_level_batch;
5214 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5216 if (!pak_context || !vp9_state || !vp9_state->pic_param)
5219 pic_param = vp9_state->pic_param;
5220 seg_param = vp9_state->segment_param;
5222 if (vp9_state->curr_pak_pass == 0)
5224 intel_vp9enc_construct_pak_insertobj_batchbuffer(ctx, encoder_context,
5225 &pak_context->res_pak_uncompressed_input_buffer);
5227 // Check if driver already programmed pic state as part of BRC update kernel programming.
5228 if (!vp9_state->brc_enabled)
5230 intel_vp9enc_construct_picstate_batchbuf(ctx, encode_state,
5231 encoder_context, &pak_context->res_pic_state_brc_write_hfw_read_buffer);
5235 if (vp9_state->curr_pak_pass == 0)
5237 intel_vp9enc_refresh_frame_internal_buffers(ctx, encoder_context);
5241 /* copy the frame_context[frame_idx] into curr_frame_context */
5242 memcpy(&vp9_state->vp9_current_fc,
5243 &(vp9_state->vp9_frame_ctx[vp9_state->frame_ctx_idx]),
5244 sizeof(FRAME_CONTEXT));
5248 prob_ptr = i965_map_gpe_resource(&pak_context->res_prob_buffer);
5253 /* copy the current fc to vp9_prob buffer */
5254 memcpy(prob_ptr, &vp9_state->vp9_current_fc, sizeof(FRAME_CONTEXT));
5255 if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
5256 pic_param->pic_flags.bits.intra_only) {
5257 FRAME_CONTEXT *frame_ptr = (FRAME_CONTEXT *)prob_ptr;
5259 memcpy(frame_ptr->partition_prob, vp9_kf_partition_probs,
5260 sizeof(vp9_kf_partition_probs));
5261 memcpy(frame_ptr->uv_mode_prob, vp9_kf_uv_mode_prob,
5262 sizeof(vp9_kf_uv_mode_prob));
5264 i965_unmap_gpe_resource(&pak_context->res_prob_buffer);
5268 if (vp9_state->brc_enabled && vp9_state->curr_pak_pass) {
5269 /* read image status and insert the conditional end cmd */
5270 /* image ctrl/status is already accessed */
5271 struct gpe_mi_conditional_batch_buffer_end_parameter mi_cond_end;
5272 struct vp9_encode_status_buffer_internal *status_buffer;
5274 status_buffer = &vp9_state->status_buffer;
5275 memset(&mi_cond_end, 0, sizeof(mi_cond_end));
5276 mi_cond_end.offset = status_buffer->image_status_mask_offset;
5277 mi_cond_end.bo = status_buffer->bo;
5278 mi_cond_end.compare_data = 0;
5279 mi_cond_end.compare_mask_mode_disabled = 1;
5280 gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch,
5284 mode_param.codec_mode = 1;
5285 mode_param.stream_out = 0;
5286 gen9_pak_vp9_pipe_mode_select(ctx, encode_state, encoder_context, &mode_param);
5288 /* reconstructed surface */
5289 memset(&hcp_surface, 0, sizeof(hcp_surface));
5290 obj_surface = encode_state->reconstructed_object;
5291 hcp_surface.dw1.surface_id = 0;
5292 hcp_surface.dw1.surface_pitch = obj_surface->width;
5293 hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5294 hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5295 gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5299 if (vp9_state->dys_in_use &&
5300 ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
5301 (pic_param->frame_height_src != pic_param->frame_height_dst))) {
5302 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
5303 obj_surface = vp9_priv_surface->dys_surface_obj;
5305 obj_surface = encode_state->input_yuv_object;
5308 hcp_surface.dw1.surface_id = 1;
5309 hcp_surface.dw1.surface_pitch = obj_surface->width;
5310 hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5311 hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5312 gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5315 if (vp9_state->picture_coding_type) {
5316 /* Add surface for last */
5317 if (vp9_state->last_ref_obj) {
5318 obj_surface = vp9_state->last_ref_obj;
5319 hcp_surface.dw1.surface_id = 2;
5320 hcp_surface.dw1.surface_pitch = obj_surface->width;
5321 hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5322 hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5323 gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5326 if (vp9_state->golden_ref_obj) {
5327 obj_surface = vp9_state->golden_ref_obj;
5328 hcp_surface.dw1.surface_id = 3;
5329 hcp_surface.dw1.surface_pitch = obj_surface->width;
5330 hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5331 hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5332 gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5335 if (vp9_state->alt_ref_obj) {
5336 obj_surface = vp9_state->alt_ref_obj;
5337 hcp_surface.dw1.surface_id = 4;
5338 hcp_surface.dw1.surface_pitch = obj_surface->width;
5339 hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5340 hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5341 gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5346 gen9_pak_vp9_pipe_buf_addr_state(ctx, encode_state, encoder_context);
5348 gen9_pak_vp9_ind_obj_base_addr_state(ctx, encode_state, encoder_context);
5350 // Using picstate zero with updated QP and LF deltas by HuC for repak, irrespective of how many Pak passes were run in multi-pass mode.
5351 memset(&second_level_batch, 0, sizeof(second_level_batch));
5353 if (vp9_state->curr_pak_pass == 0) {
5354 second_level_batch.offset = 0;
5356 second_level_batch.offset = vp9_state->curr_pak_pass * VP9_PIC_STATE_BUFFER_SIZE;
5358 second_level_batch.is_second_level = 1;
5359 second_level_batch.bo = pak_context->res_pic_state_brc_write_hfw_read_buffer.bo;
5361 gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5363 if (pic_param->pic_flags.bits.segmentation_enabled &&
5368 memset(&tmp_seg_param, 0, sizeof(tmp_seg_param));
5369 seg_param = &tmp_seg_param;
5371 for (i = 0; i < segment_count; i++)
5373 gen9_pak_vp9_segment_state(ctx, encode_state,
5375 &seg_param->seg_data[i], i);
5378 /* Insert the uncompressed header buffer */
5379 second_level_batch.is_second_level = 1;
5380 second_level_batch.offset = 0;
5381 second_level_batch.bo = pak_context->res_pak_uncompressed_input_buffer.bo;
5383 gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5386 second_level_batch.is_second_level = 1;
5387 second_level_batch.offset = 0;
5388 second_level_batch.bo = pak_context->res_mb_code_surface.bo;
5389 gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5395 gen9_vp9_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5397 struct intel_batchbuffer *batch = encoder_context->base.batch;
5398 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5399 struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
5400 struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
5401 //struct gpe_mi_copy_mem_parameter mi_copy_mem_param;
5402 struct vp9_encode_status_buffer_internal *status_buffer;
5403 struct gen9_vp9_state *vp9_state;
5405 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5406 if (!vp9_state || !pak_context || !batch)
5409 status_buffer = &(vp9_state->status_buffer);
5411 memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
5412 gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
5414 memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
5415 mi_store_reg_mem_param.bo = status_buffer->bo;
5416 mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_offset;
5417 mi_store_reg_mem_param.mmio_offset = status_buffer->vp9_bs_frame_reg_offset;
5418 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5420 mi_store_reg_mem_param.bo = pak_context->res_brc_bitstream_size_buffer.bo;
5421 mi_store_reg_mem_param.offset = 0;
5422 mi_store_reg_mem_param.mmio_offset = status_buffer->vp9_bs_frame_reg_offset;
5423 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5425 /* Read HCP Image status */
5426 mi_store_reg_mem_param.bo = status_buffer->bo;
5427 mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
5428 mi_store_reg_mem_param.mmio_offset =
5429 status_buffer->vp9_image_mask_reg_offset;
5430 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5432 mi_store_reg_mem_param.bo = status_buffer->bo;
5433 mi_store_reg_mem_param.offset = status_buffer->image_status_ctrl_offset;
5434 mi_store_reg_mem_param.mmio_offset =
5435 status_buffer->vp9_image_ctrl_reg_offset;
5436 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5438 mi_store_reg_mem_param.bo = pak_context->res_brc_bitstream_size_buffer.bo;
5439 mi_store_reg_mem_param.offset = 4;
5440 mi_store_reg_mem_param.mmio_offset =
5441 status_buffer->vp9_image_ctrl_reg_offset;
5442 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5444 gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
5450 gen9_vp9_pak_pipeline_prepare(VADriverContextP ctx,
5451 struct encode_state *encode_state,
5452 struct intel_encoder_context *encoder_context)
5454 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5455 struct object_surface *obj_surface;
5456 struct object_buffer *obj_buffer;
5457 struct i965_coded_buffer_segment *coded_buffer_segment;
5458 VAEncPictureParameterBufferVP9 *pic_param;
5459 struct gen9_vp9_state *vp9_state;
5463 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5465 !vp9_state->pic_param)
5466 return VA_STATUS_ERROR_INVALID_PARAMETER;
5468 pic_param = vp9_state->pic_param;
5470 /* reconstructed surface */
5471 obj_surface = encode_state->reconstructed_object;
5472 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5474 dri_bo_unreference(pak_context->reconstructed_object.bo);
5476 pak_context->reconstructed_object.bo = obj_surface->bo;
5477 dri_bo_reference(pak_context->reconstructed_object.bo);
5479 /* set vp9 reference frames */
5480 for (i = 0; i < ARRAY_ELEMS(pak_context->reference_surfaces); i++) {
5481 if (pak_context->reference_surfaces[i].bo)
5482 dri_bo_unreference(pak_context->reference_surfaces[i].bo);
5483 pak_context->reference_surfaces[i].bo = NULL;
5486 /* Three reference frames are enough for VP9 */
5487 if (pic_param->pic_flags.bits.frame_type &&
5488 !pic_param->pic_flags.bits.intra_only) {
5489 for (i = 0; i < 3; i++) {
5490 obj_surface = encode_state->reference_objects[i];
5491 if (obj_surface && obj_surface->bo) {
5492 pak_context->reference_surfaces[i].bo = obj_surface->bo;
5493 dri_bo_reference(obj_surface->bo);
5498 /* input YUV surface */
5499 dri_bo_unreference(pak_context->uncompressed_picture_source.bo);
5500 pak_context->uncompressed_picture_source.bo = NULL;
5501 obj_surface = encode_state->reconstructed_object;
5502 if (vp9_state->dys_in_use &&
5503 ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
5504 (pic_param->frame_height_src != pic_param->frame_height_dst))) {
5505 struct gen9_surface_vp9 *vp9_priv_surface =
5506 (struct gen9_surface_vp9 *)(obj_surface->private_data);
5507 obj_surface = vp9_priv_surface->dys_surface_obj;
5509 obj_surface = encode_state->input_yuv_object;
5511 pak_context->uncompressed_picture_source.bo = obj_surface->bo;
5512 dri_bo_reference(pak_context->uncompressed_picture_source.bo);
5515 dri_bo_unreference(pak_context->indirect_pak_bse_object.bo);
5516 pak_context->indirect_pak_bse_object.bo = NULL;
5517 obj_buffer = encode_state->coded_buf_object;
5518 bo = obj_buffer->buffer_store->bo;
5519 pak_context->indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
5520 pak_context->indirect_pak_bse_object.end_offset = ALIGN((obj_buffer->size_element - 0x1000), 0x1000);
5521 pak_context->indirect_pak_bse_object.bo = bo;
5522 dri_bo_reference(pak_context->indirect_pak_bse_object.bo);
5524 /* set the internal flag to 0 to indicate the coded size is unknown */
5526 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
5527 coded_buffer_segment->mapped = 0;
5528 coded_buffer_segment->codec = encoder_context->codec;
5529 coded_buffer_segment->status_support = 1;
5532 return VA_STATUS_SUCCESS;
5536 gen9_vp9_pak_brc_prepare(struct encode_state *encode_state,
5537 struct intel_encoder_context *encoder_context)
5542 gen9_vp9_pak_context_destroy(void *context)
5544 struct gen9_encoder_context_vp9 *pak_context = context;
5547 dri_bo_unreference(pak_context->reconstructed_object.bo);
5548 pak_context->reconstructed_object.bo = NULL;
5550 dri_bo_unreference(pak_context->uncompressed_picture_source.bo);
5551 pak_context->uncompressed_picture_source.bo = NULL;
5553 dri_bo_unreference(pak_context->indirect_pak_bse_object.bo);
5554 pak_context->indirect_pak_bse_object.bo = NULL;
5556 for (i = 0; i < 8; i++){
5557 dri_bo_unreference(pak_context->reference_surfaces[i].bo);
5558 pak_context->reference_surfaces[i].bo = NULL;
5561 /* vme & pak same the same structure, so don't free the context here */
5565 gen9_vp9_pak_pipeline(VADriverContextP ctx,
5567 struct encode_state *encode_state,
5568 struct intel_encoder_context *encoder_context)
5570 struct i965_driver_data *i965 = i965_driver_data(ctx);
5571 struct intel_batchbuffer *batch = encoder_context->base.batch;
5572 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5574 struct gen9_vp9_state *vp9_state;
5575 VAEncPictureParameterBufferVP9 *pic_param;
5578 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5580 if (!vp9_state || !vp9_state->pic_param || !pak_context)
5581 return VA_STATUS_ERROR_INVALID_PARAMETER;
5583 va_status = gen9_vp9_pak_pipeline_prepare(ctx, encode_state, encoder_context);
5585 if (va_status != VA_STATUS_SUCCESS)
5588 if (i965->intel.has_bsd2)
5589 intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
5591 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
5593 intel_batchbuffer_emit_mi_flush(batch);
5595 BEGIN_BCS_BATCH(batch, 64);
5596 for (i = 0; i < 64; i++)
5597 OUT_BCS_BATCH(batch, MI_NOOP);
5599 ADVANCE_BCS_BATCH(batch);
5601 for (vp9_state->curr_pak_pass = 0;
5602 vp9_state->curr_pak_pass < vp9_state->num_pak_passes;
5603 vp9_state->curr_pak_pass++) {
5605 if (vp9_state->curr_pak_pass == 0) {
5606 /* Initialize the VP9 Image Ctrl reg for the first pass */
5607 struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
5608 struct vp9_encode_status_buffer_internal *status_buffer;
5610 status_buffer = &(vp9_state->status_buffer);
5611 memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
5612 mi_load_reg_imm.mmio_offset = status_buffer->vp9_image_ctrl_reg_offset;
5613 mi_load_reg_imm.data = 0;
5614 gen8_gpe_mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
5616 gen9_vp9_pak_picture_level(ctx, encode_state, encoder_context);
5617 gen9_vp9_read_mfc_status(ctx, encoder_context);
5620 intel_batchbuffer_end_atomic(batch);
5621 intel_batchbuffer_flush(batch);
5623 pic_param = vp9_state->pic_param;
5624 vp9_state->vp9_last_frame.frame_width = pic_param->frame_width_dst;
5625 vp9_state->vp9_last_frame.frame_height = pic_param->frame_height_dst;
5626 vp9_state->vp9_last_frame.frame_type = pic_param->pic_flags.bits.frame_type;
5627 vp9_state->vp9_last_frame.show_frame = pic_param->pic_flags.bits.show_frame;
5628 vp9_state->vp9_last_frame.refresh_frame_context = pic_param->pic_flags.bits.refresh_frame_context;
5629 vp9_state->vp9_last_frame.frame_context_idx = pic_param->pic_flags.bits.frame_context_idx;
5630 vp9_state->vp9_last_frame.intra_only = pic_param->pic_flags.bits.intra_only;
5631 vp9_state->frame_number++;
5632 vp9_state->curr_mv_temporal_index ^= 1;
5633 vp9_state->first_frame = 0;
5635 return VA_STATUS_SUCCESS;
5639 gen9_vp9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5641 struct gen9_encoder_context_vp9 *vme_context = NULL;
5642 struct gen9_vp9_state *vp9_state = NULL;
5644 vme_context = calloc(1, sizeof(struct gen9_encoder_context_vp9));
5645 vp9_state = calloc(1, sizeof(struct gen9_vp9_state));
5647 if (!vme_context || !vp9_state) {
5655 encoder_context->enc_priv_state = vp9_state;
5656 vme_context->enc_priv_state = vp9_state;
5658 /* Initialize the features that are supported by VP9 */
5659 vme_context->hme_supported = 1;
5660 vme_context->use_hw_scoreboard = 1;
5661 vme_context->use_hw_non_stalling_scoreboard = 1;
5663 vp9_state->tx_mode = TX_MODE_SELECT;
5664 vp9_state->multi_ref_qp_check = 0;
5665 vp9_state->target_usage = INTEL_ENC_VP9_TU_NORMAL;
5666 vp9_state->num_pak_passes = 1;
5667 vp9_state->hme_supported = vme_context->hme_supported;
5668 vp9_state->b16xme_supported = 1;
5670 if (encoder_context->rate_control_mode != VA_RC_NONE &&
5671 encoder_context->rate_control_mode != VA_RC_CQP) {
5672 vp9_state->brc_enabled = 1;
5673 vp9_state->brc_distortion_buffer_supported = 1;
5674 vp9_state->brc_constant_buffer_supported = 1;
5675 vp9_state->num_pak_passes = 4;
5677 vp9_state->dys_enabled = 1; /* this is supported by default */
5678 vp9_state->first_frame = 1;
5680 /* the definition of status buffer offset for VP9 */
5682 struct vp9_encode_status_buffer_internal *status_buffer;
5683 uint32_t base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
5685 status_buffer = &vp9_state->status_buffer;
5686 memset(status_buffer, 0,
5687 sizeof(struct vp9_encode_status_buffer_internal));
5689 status_buffer->bs_byte_count_offset = base_offset + offsetof(struct vp9_encode_status, bs_byte_count);
5690 status_buffer->image_status_mask_offset = base_offset + offsetof(struct vp9_encode_status, image_status_mask);
5691 status_buffer->image_status_ctrl_offset = base_offset + offsetof(struct vp9_encode_status, image_status_ctrl);
5692 status_buffer->media_index_offset = base_offset + offsetof(struct vp9_encode_status, media_index);
5694 status_buffer->vp9_bs_frame_reg_offset = 0x1E9E0;
5695 status_buffer->vp9_image_mask_reg_offset = 0x1E9F0;
5696 status_buffer->vp9_image_ctrl_reg_offset = 0x1E9F4;
5699 gen9_vme_kernels_context_init_vp9(ctx, encoder_context, vme_context);
5701 encoder_context->vme_context = vme_context;
5702 encoder_context->vme_pipeline = gen9_vme_pipeline_vp9;
5703 encoder_context->vme_context_destroy = gen9_vme_context_destroy_vp9;
5709 gen9_vp9_get_coded_status(VADriverContextP ctx,
5710 struct intel_encoder_context *encoder_context,
5711 struct i965_coded_buffer_segment *coded_buf_seg)
5713 struct vp9_encode_status *vp9_encode_status;
5715 if (!encoder_context || !coded_buf_seg)
5716 return VA_STATUS_ERROR_INVALID_BUFFER;
5718 vp9_encode_status = (struct vp9_encode_status *)coded_buf_seg->codec_private_data;
5719 coded_buf_seg->base.size = vp9_encode_status->bs_byte_count;
5721 /* One VACodedBufferSegment for VP9 will be added later.
5722 * It will be linked to the next element of coded_buf_seg->base.next
5725 return VA_STATUS_SUCCESS;
5729 gen9_vp9_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5731 /* VME & PAK share the same context */
5732 struct gen9_encoder_context_vp9 *pak_context = encoder_context->vme_context;
5737 encoder_context->mfc_context = pak_context;
5738 encoder_context->mfc_context_destroy = gen9_vp9_pak_context_destroy;
5739 encoder_context->mfc_pipeline = gen9_vp9_pak_pipeline;
5740 encoder_context->mfc_brc_prepare = gen9_vp9_pak_brc_prepare;
5741 encoder_context->get_status = gen9_vp9_get_coded_status;