2 * Copyright @ 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWAR OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Pengfei Qu <Pengfei.qu@intel.com>
37 #include "intel_batchbuffer.h"
38 #include "intel_driver.h"
40 #include "i965_defines.h"
41 #include "i965_structs.h"
42 #include "i965_drv_video.h"
43 #include "i965_encoder.h"
44 #include "i965_encoder_utils.h"
45 #include "intel_media.h"
47 #include "i965_gpe_utils.h"
48 #include "i965_encoder_common.h"
49 #include "i965_avc_encoder_common.h"
50 #include "gen9_avc_encoder_kernels.h"
51 #include "gen9_avc_encoder.h"
52 #include "gen9_avc_const_def.h"
54 #define MAX_URB_SIZE 4096 /* In register */
55 #define NUM_KERNELS_PER_GPE_CONTEXT 1
56 #define MBENC_KERNEL_BASE GEN9_AVC_KERNEL_MBENC_QUALITY_I
57 #define GPE_RESOURCE_ALIGNMENT 4 /* 4 means 16 = 1 << 4) */
59 #define OUT_BUFFER_2DW(batch, bo, is_target, delta) do { \
61 OUT_BCS_RELOC64(batch, \
63 I915_GEM_DOMAIN_INSTRUCTION, \
64 is_target ? I915_GEM_DOMAIN_RENDER : 0, \
67 OUT_BCS_BATCH(batch, 0); \
68 OUT_BCS_BATCH(batch, 0); \
72 #define OUT_BUFFER_3DW(batch, bo, is_target, delta, attr) do { \
73 OUT_BUFFER_2DW(batch, bo, is_target, delta); \
74 OUT_BCS_BATCH(batch, attr); \
77 static const uint32_t qm_flat[16] = {
78 0x10101010, 0x10101010, 0x10101010, 0x10101010,
79 0x10101010, 0x10101010, 0x10101010, 0x10101010,
80 0x10101010, 0x10101010, 0x10101010, 0x10101010,
81 0x10101010, 0x10101010, 0x10101010, 0x10101010
84 static const uint32_t fqm_flat[32] = {
85 0x10001000, 0x10001000, 0x10001000, 0x10001000,
86 0x10001000, 0x10001000, 0x10001000, 0x10001000,
87 0x10001000, 0x10001000, 0x10001000, 0x10001000,
88 0x10001000, 0x10001000, 0x10001000, 0x10001000,
89 0x10001000, 0x10001000, 0x10001000, 0x10001000,
90 0x10001000, 0x10001000, 0x10001000, 0x10001000,
91 0x10001000, 0x10001000, 0x10001000, 0x10001000,
92 0x10001000, 0x10001000, 0x10001000, 0x10001000
95 static const unsigned int slice_type_kernel[3] = {1, 2, 0};
97 static const gen9_avc_brc_init_reset_curbe_data gen9_avc_brc_init_reset_curbe_init_data = {
254 static const gen9_avc_frame_brc_update_curbe_data gen9_avc_frame_brc_update_curbe_init_data = {
412 gen9_avc_update_misc_parameters(VADriverContextP ctx,
413 struct encode_state *encode_state,
414 struct intel_encoder_context *encoder_context)
416 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
417 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
421 generic_state->max_bit_rate = (encoder_context->brc.bits_per_second[0] + 1000 - 1) / 1000;
423 generic_state->brc_need_reset = encoder_context->brc.need_reset;
425 if (generic_state->internal_rate_mode == VA_RC_CBR) {
426 generic_state->min_bit_rate = generic_state->max_bit_rate;
427 generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0];
429 if (generic_state->target_bit_rate != generic_state->max_bit_rate) {
430 generic_state->target_bit_rate = generic_state->max_bit_rate;
431 generic_state->brc_need_reset = 1;
433 } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
434 generic_state->min_bit_rate = generic_state->max_bit_rate * (2 * encoder_context->brc.target_percentage[0] - 100) / 100;
435 generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0];
437 if (generic_state->target_bit_rate != generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100) {
438 generic_state->target_bit_rate = generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
439 generic_state->brc_need_reset = 1;
444 if (generic_state->internal_rate_mode != VA_RC_CQP) {
445 generic_state->frames_per_100s = encoder_context->brc.framerate[0].num * 100 / encoder_context->brc.framerate[0].den ;
446 generic_state->frame_rate = encoder_context->brc.framerate[0].num / encoder_context->brc.framerate[0].den ;
447 generic_state->frames_per_window_size = (int)(encoder_context->brc.window_size * generic_state->frame_rate / 1000); // brc.windows size in ms as the unit
449 generic_state->frames_per_100s = 30 * 100;
450 generic_state->frame_rate = 30 ;
451 generic_state->frames_per_window_size = 30;
455 if (generic_state->internal_rate_mode != VA_RC_CQP) {
456 generic_state->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;//misc->buffer_size;
457 generic_state->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;//misc->initial_buffer_fullness;
461 generic_state->num_roi = MIN(encoder_context->brc.num_roi, 3);
462 if (generic_state->num_roi > 0) {
463 generic_state->max_delta_qp = encoder_context->brc.roi_max_delta_qp;
464 generic_state->min_delta_qp = encoder_context->brc.roi_min_delta_qp;
466 for (i = 0; i < generic_state->num_roi; i++) {
467 generic_state->roi[i].left = encoder_context->brc.roi[i].left;
468 generic_state->roi[i].right = encoder_context->brc.roi[i].right;
469 generic_state->roi[i].top = encoder_context->brc.roi[i].top;
470 generic_state->roi[i].bottom = encoder_context->brc.roi[i].bottom;
471 generic_state->roi[i].value = encoder_context->brc.roi[i].value;
473 generic_state->roi[i].left /= 16;
474 generic_state->roi[i].right /= 16;
475 generic_state->roi[i].top /= 16;
476 generic_state->roi[i].bottom /= 16;
483 intel_avc_get_kernel_header_and_size(void *pvbinary,
485 INTEL_GENERIC_ENC_OPERATION operation,
487 struct i965_kernel *ret_kernel)
489 typedef uint32_t BIN_PTR[4];
492 gen9_avc_encoder_kernel_header *pkh_table;
493 kernel_header *pcurr_header, *pinvalid_entry, *pnext_header;
496 if (!pvbinary || !ret_kernel)
499 bin_start = (char *)pvbinary;
500 pkh_table = (gen9_avc_encoder_kernel_header *)pvbinary;
501 pinvalid_entry = &(pkh_table->static_detection) + 1;
502 next_krnoffset = binary_size;
504 if (operation == INTEL_GENERIC_ENC_SCALING4X) {
505 pcurr_header = &pkh_table->ply_dscale_ply;
506 } else if (operation == INTEL_GENERIC_ENC_SCALING2X) {
507 pcurr_header = &pkh_table->ply_2xdscale_ply;
508 } else if (operation == INTEL_GENERIC_ENC_ME) {
509 pcurr_header = &pkh_table->me_p;
510 } else if (operation == INTEL_GENERIC_ENC_BRC) {
511 pcurr_header = &pkh_table->frame_brc_init;
512 } else if (operation == INTEL_GENERIC_ENC_MBENC) {
513 pcurr_header = &pkh_table->mbenc_quality_I;
514 } else if (operation == INTEL_GENERIC_ENC_WP) {
515 pcurr_header = &pkh_table->wp;
516 } else if (operation == INTEL_GENERIC_ENC_SFD) {
517 pcurr_header = &pkh_table->static_detection;
522 pcurr_header += krnstate_idx;
523 ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
525 pnext_header = (pcurr_header + 1);
526 if (pnext_header < pinvalid_entry) {
527 next_krnoffset = pnext_header->kernel_start_pointer << 6;
529 ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
534 gen9_free_surfaces_avc(void **data)
536 struct gen9_surface_avc *avc_surface;
543 if (avc_surface->scaled_4x_surface_obj) {
544 i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_4x_surface_id, 1);
545 avc_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
546 avc_surface->scaled_4x_surface_obj = NULL;
549 if (avc_surface->scaled_16x_surface_obj) {
550 i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_16x_surface_id, 1);
551 avc_surface->scaled_16x_surface_id = VA_INVALID_SURFACE;
552 avc_surface->scaled_16x_surface_obj = NULL;
555 if (avc_surface->scaled_32x_surface_obj) {
556 i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_32x_surface_id, 1);
557 avc_surface->scaled_32x_surface_id = VA_INVALID_SURFACE;
558 avc_surface->scaled_32x_surface_obj = NULL;
561 i965_free_gpe_resource(&avc_surface->res_mb_code_surface);
562 i965_free_gpe_resource(&avc_surface->res_mv_data_surface);
563 i965_free_gpe_resource(&avc_surface->res_ref_pic_select_surface);
565 dri_bo_unreference(avc_surface->dmv_top);
566 avc_surface->dmv_top = NULL;
567 dri_bo_unreference(avc_surface->dmv_bottom);
568 avc_surface->dmv_bottom = NULL;
578 gen9_avc_init_check_surfaces(VADriverContextP ctx,
579 struct object_surface *obj_surface,
580 struct intel_encoder_context *encoder_context,
581 struct avc_surface_param *surface_param)
583 struct i965_driver_data *i965 = i965_driver_data(ctx);
584 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
585 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
586 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
588 struct gen9_surface_avc *avc_surface;
589 int downscaled_width_4x, downscaled_height_4x;
590 int downscaled_width_16x, downscaled_height_16x;
591 int downscaled_width_32x, downscaled_height_32x;
593 unsigned int frame_width_in_mbs = ALIGN(surface_param->frame_width, 16) / 16;
594 unsigned int frame_height_in_mbs = ALIGN(surface_param->frame_height, 16) / 16;
595 unsigned int frame_mb_nums = frame_width_in_mbs * frame_height_in_mbs;
596 int allocate_flag = 1;
599 if (!obj_surface || !obj_surface->bo)
600 return VA_STATUS_ERROR_INVALID_SURFACE;
602 if (obj_surface->private_data) {
603 return VA_STATUS_SUCCESS;
606 avc_surface = calloc(1, sizeof(struct gen9_surface_avc));
609 return VA_STATUS_ERROR_ALLOCATION_FAILED;
611 avc_surface->ctx = ctx;
612 obj_surface->private_data = avc_surface;
613 obj_surface->free_private_data = gen9_free_surfaces_avc;
615 downscaled_width_4x = generic_state->frame_width_4x;
616 downscaled_height_4x = generic_state->frame_height_4x;
618 i965_CreateSurfaces(ctx,
620 downscaled_height_4x,
623 &avc_surface->scaled_4x_surface_id);
625 avc_surface->scaled_4x_surface_obj = SURFACE(avc_surface->scaled_4x_surface_id);
627 if (!avc_surface->scaled_4x_surface_obj) {
628 return VA_STATUS_ERROR_ALLOCATION_FAILED;
631 i965_check_alloc_surface_bo(ctx, avc_surface->scaled_4x_surface_obj, 1,
632 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
634 downscaled_width_16x = generic_state->frame_width_16x;
635 downscaled_height_16x = generic_state->frame_height_16x;
636 i965_CreateSurfaces(ctx,
637 downscaled_width_16x,
638 downscaled_height_16x,
641 &avc_surface->scaled_16x_surface_id);
642 avc_surface->scaled_16x_surface_obj = SURFACE(avc_surface->scaled_16x_surface_id);
644 if (!avc_surface->scaled_16x_surface_obj) {
645 return VA_STATUS_ERROR_ALLOCATION_FAILED;
648 i965_check_alloc_surface_bo(ctx, avc_surface->scaled_16x_surface_obj, 1,
649 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
651 if (generic_state->b32xme_supported ||
652 generic_state->b32xme_enabled) {
653 downscaled_width_32x = generic_state->frame_width_32x;
654 downscaled_height_32x = generic_state->frame_height_32x;
655 i965_CreateSurfaces(ctx,
656 downscaled_width_32x,
657 downscaled_height_32x,
660 &avc_surface->scaled_32x_surface_id);
661 avc_surface->scaled_32x_surface_obj = SURFACE(avc_surface->scaled_32x_surface_id);
663 if (!avc_surface->scaled_32x_surface_obj) {
664 return VA_STATUS_ERROR_ALLOCATION_FAILED;
667 i965_check_alloc_surface_bo(ctx, avc_surface->scaled_32x_surface_obj, 1,
668 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
671 /*mb code and mv data for each frame*/
672 size = frame_mb_nums * 16 * 4;
673 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
674 &avc_surface->res_mb_code_surface,
678 goto failed_allocation;
680 size = frame_mb_nums * 32 * 4;
681 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
682 &avc_surface->res_mv_data_surface,
686 goto failed_allocation;
689 if (avc_state->ref_pic_select_list_supported) {
690 width = ALIGN(frame_width_in_mbs * 8, 64);
691 height = frame_height_in_mbs ;
692 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
693 &avc_surface->res_ref_pic_select_surface,
696 "Ref pic select list buffer");
698 goto failed_allocation;
702 avc_surface->dmv_top =
703 dri_bo_alloc(i965->intel.bufmgr,
704 "direct mv top Buffer",
707 avc_surface->dmv_bottom =
708 dri_bo_alloc(i965->intel.bufmgr,
709 "direct mv bottom Buffer",
712 assert(avc_surface->dmv_top);
713 assert(avc_surface->dmv_bottom);
715 return VA_STATUS_SUCCESS;
718 return VA_STATUS_ERROR_ALLOCATION_FAILED;
722 gen9_avc_generate_slice_map(VADriverContextP ctx,
723 struct encode_state *encode_state,
724 struct intel_encoder_context *encoder_context)
726 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
727 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
728 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
729 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
731 struct i965_gpe_resource *gpe_resource = NULL;
732 VAEncSliceParameterBufferH264 * slice_param = NULL;
733 unsigned int * data = NULL;
734 unsigned int * data_row = NULL;
736 unsigned int pitch = ALIGN((generic_state->frame_width_in_mbs + 1) * 4, 64) / 4;
738 if (!avc_state->arbitrary_num_mbs_in_slice)
741 gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
742 assert(gpe_resource);
744 i965_zero_gpe_resource(gpe_resource);
746 data_row = (unsigned int *)i965_map_gpe_resource(gpe_resource);
750 for (i = 0; i < avc_state->slice_num; i++) {
751 slice_param = avc_state->slice_param[i];
752 for (j = 0; j < slice_param->num_macroblocks; j++) {
754 if ((count > 0) && (count % generic_state->frame_width_in_mbs == 0)) {
762 *data++ = 0xFFFFFFFF;
764 i965_unmap_gpe_resource(gpe_resource);
768 gen9_avc_allocate_resources(VADriverContextP ctx,
769 struct encode_state *encode_state,
770 struct intel_encoder_context *encoder_context)
772 struct i965_driver_data *i965 = i965_driver_data(ctx);
773 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
774 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
775 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
776 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
777 unsigned int size = 0;
778 unsigned int width = 0;
779 unsigned int height = 0;
780 unsigned char * data = NULL;
781 int allocate_flag = 1;
784 /*all the surface/buffer are allocated here*/
786 /*second level batch buffer for image state write when cqp etc*/
787 i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
788 size = INTEL_AVC_IMAGE_STATE_CMD_SIZE ;
789 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
790 &avc_ctx->res_image_state_batch_buffer_2nd_level,
792 "second levle batch (image state write) buffer");
794 goto failed_allocation;
796 /* scaling related surface */
797 if (avc_state->mb_status_supported) {
798 i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
799 size = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * 16 * 4 + 1023) & ~0x3ff;
800 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
801 &avc_ctx->res_mb_status_buffer,
803 "MB statistics output buffer");
805 goto failed_allocation;
806 i965_zero_gpe_resource(&avc_ctx->res_mb_status_buffer);
809 if (avc_state->flatness_check_supported) {
810 width = generic_state->frame_width_in_mbs * 4;
811 height = generic_state->frame_height_in_mbs * 4;
812 i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
813 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
814 &avc_ctx->res_flatness_check_surface,
817 "Flatness check buffer");
819 goto failed_allocation;
821 /* me related surface */
822 width = generic_state->downscaled_width_4x_in_mb * 8;
823 height = generic_state->downscaled_height_4x_in_mb * 4 * 10;
824 i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
825 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
826 &avc_ctx->s4x_memv_distortion_buffer,
829 "4x MEMV distortion buffer");
831 goto failed_allocation;
832 i965_zero_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
834 width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
835 height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
836 i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
837 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
838 &avc_ctx->s4x_memv_min_distortion_brc_buffer,
841 "4x MEMV min distortion brc buffer");
843 goto failed_allocation;
844 i965_zero_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
847 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 32, 64);
848 height = generic_state->downscaled_height_4x_in_mb * 4 * 2 * 10;
849 i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
850 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
851 &avc_ctx->s4x_memv_data_buffer,
854 "4x MEMV data buffer");
856 goto failed_allocation;
857 i965_zero_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
860 width = ALIGN(generic_state->downscaled_width_16x_in_mb * 32, 64);
861 height = generic_state->downscaled_height_16x_in_mb * 4 * 2 * 10 ;
862 i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
863 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
864 &avc_ctx->s16x_memv_data_buffer,
867 "16x MEMV data buffer");
869 goto failed_allocation;
870 i965_zero_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
873 width = ALIGN(generic_state->downscaled_width_32x_in_mb * 32, 64);
874 height = generic_state->downscaled_height_32x_in_mb * 4 * 2 * 10 ;
875 i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
876 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
877 &avc_ctx->s32x_memv_data_buffer,
880 "32x MEMV data buffer");
882 goto failed_allocation;
883 i965_zero_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
886 if (!generic_state->brc_allocated) {
887 /*brc related surface */
888 i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
890 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
891 &avc_ctx->res_brc_history_buffer,
893 "brc history buffer");
895 goto failed_allocation;
897 i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
899 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
900 &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
902 "brc pak statistic buffer");
904 goto failed_allocation;
906 i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
907 size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
908 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
909 &avc_ctx->res_brc_image_state_read_buffer,
911 "brc image state read buffer");
913 goto failed_allocation;
915 i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
916 size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
917 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
918 &avc_ctx->res_brc_image_state_write_buffer,
920 "brc image state write buffer");
922 goto failed_allocation;
924 width = ALIGN(avc_state->brc_const_data_surface_width, 64);
925 height = avc_state->brc_const_data_surface_height;
926 i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
927 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
928 &avc_ctx->res_brc_const_data_buffer,
931 "brc const data buffer");
933 goto failed_allocation;
935 if (generic_state->brc_distortion_buffer_supported) {
936 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 8, 64);
937 height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
938 width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
939 height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
940 i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
941 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
942 &avc_ctx->res_brc_dist_data_surface,
945 "brc dist data buffer");
947 goto failed_allocation;
948 i965_zero_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
951 if (generic_state->brc_roi_enable) {
952 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 16, 64);
953 height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
954 i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
955 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
956 &avc_ctx->res_mbbrc_roi_surface,
961 goto failed_allocation;
962 i965_zero_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
966 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4, 64);
967 height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
968 i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
969 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
970 &avc_ctx->res_mbbrc_mb_qp_data_surface,
973 "mbbrc mb qp buffer");
975 goto failed_allocation;
977 i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
978 size = 16 * AVC_QP_MAX * 4;
979 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
980 &avc_ctx->res_mbbrc_const_data_buffer,
982 "mbbrc const data buffer");
984 goto failed_allocation;
986 if (avc_state->decouple_mbenc_curbe_from_brc_enable) {
987 i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
988 size = avc_state->mbenc_brc_buffer_size;
989 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
990 &avc_ctx->res_mbenc_brc_buffer,
994 goto failed_allocation;
995 i965_zero_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
997 generic_state->brc_allocated = 1;
1001 if (avc_state->mb_qp_data_enable) {
1002 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4, 64);
1003 height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1004 i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
1005 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1006 &avc_ctx->res_mb_qp_data_surface,
1009 "external mb qp buffer");
1011 goto failed_allocation;
1014 /* mbenc related surface. it share most of surface with other kernels */
1015 if (avc_state->arbitrary_num_mbs_in_slice) {
1016 width = ALIGN((generic_state->frame_width_in_mbs + 1) * 4, 64);
1017 height = generic_state->frame_height_in_mbs ;
1018 i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1019 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1020 &avc_ctx->res_mbenc_slice_map_surface,
1023 "slice map buffer");
1025 goto failed_allocation;
1026 i965_zero_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1028 /*generate slice map,default one slice per frame.*/
1031 /* sfd related surface */
1032 if (avc_state->sfd_enable) {
1033 i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1035 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1036 &avc_ctx->res_sfd_output_buffer,
1038 "sfd output buffer");
1040 goto failed_allocation;
1042 i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1043 size = ALIGN(52, 64);
1044 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1045 &avc_ctx->res_sfd_cost_table_p_frame_buffer,
1047 "sfd P frame cost table buffer");
1049 goto failed_allocation;
1050 data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1052 memcpy(data, gen9_avc_sfd_cost_table_p_frame, sizeof(unsigned char) * 52);
1053 i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1055 i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1056 size = ALIGN(52, 64);
1057 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1058 &avc_ctx->res_sfd_cost_table_b_frame_buffer,
1060 "sfd B frame cost table buffer");
1062 goto failed_allocation;
1063 data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1065 memcpy(data, gen9_avc_sfd_cost_table_b_frame, sizeof(unsigned char) * 52);
1066 i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1069 /* wp related surfaces */
1070 if (avc_state->weighted_prediction_supported) {
1071 for (i = 0; i < 2 ; i++) {
1072 if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1076 width = generic_state->frame_width_in_pixel;
1077 height = generic_state->frame_height_in_pixel ;
1078 i965_CreateSurfaces(ctx,
1081 VA_RT_FORMAT_YUV420,
1083 &avc_ctx->wp_output_pic_select_surface_id[i]);
1084 avc_ctx->wp_output_pic_select_surface_obj[i] = SURFACE(avc_ctx->wp_output_pic_select_surface_id[i]);
1086 if (!avc_ctx->wp_output_pic_select_surface_obj[i]) {
1087 goto failed_allocation;
1090 i965_check_alloc_surface_bo(ctx, avc_ctx->wp_output_pic_select_surface_obj[i], 1,
1091 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
1093 i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1094 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[0], avc_ctx->wp_output_pic_select_surface_obj[0], GPE_RESOURCE_ALIGNMENT);
1095 i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1096 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[1], avc_ctx->wp_output_pic_select_surface_obj[1], GPE_RESOURCE_ALIGNMENT);
1101 i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1103 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1104 &avc_ctx->res_mad_data_buffer,
1105 ALIGN(size, 0x1000),
1108 goto failed_allocation;
1110 return VA_STATUS_SUCCESS;
1113 return VA_STATUS_ERROR_ALLOCATION_FAILED;
1117 gen9_avc_free_resources(struct encoder_vme_mfc_context * vme_context)
1122 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1123 VADriverContextP ctx = avc_ctx->ctx;
1126 /* free all the surface/buffer here*/
1127 i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
1128 i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
1129 i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
1130 i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1131 i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1132 i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1133 i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1134 i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1135 i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
1136 i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
1137 i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
1138 i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
1139 i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1140 i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1141 i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1142 i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
1143 i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1144 i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
1145 i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
1146 i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1147 i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1148 i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1149 i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1150 i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1151 i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1152 i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1154 for (i = 0; i < 2 ; i++) {
1155 if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1156 i965_DestroySurfaces(ctx, &avc_ctx->wp_output_pic_select_surface_id[i], 1);
1157 avc_ctx->wp_output_pic_select_surface_id[i] = VA_INVALID_SURFACE;
1158 avc_ctx->wp_output_pic_select_surface_obj[i] = NULL;
1165 gen9_avc_run_kernel_media_object(VADriverContextP ctx,
1166 struct intel_encoder_context *encoder_context,
1167 struct i965_gpe_context *gpe_context,
1169 struct gpe_media_object_parameter *param)
1171 struct i965_driver_data *i965 = i965_driver_data(ctx);
1172 struct i965_gpe_table *gpe = &i965->gpe_table;
1173 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1174 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1176 struct intel_batchbuffer *batch = encoder_context->base.batch;
1177 struct encoder_status_buffer_internal *status_buffer;
1178 struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1183 intel_batchbuffer_start_atomic(batch, 0x1000);
1184 intel_batchbuffer_emit_mi_flush(batch);
1186 status_buffer = &(avc_ctx->status_buffer);
1187 memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1188 mi_store_data_imm.bo = status_buffer->bo;
1189 mi_store_data_imm.offset = status_buffer->media_index_offset;
1190 mi_store_data_imm.dw0 = media_function;
1191 gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1193 gpe->pipeline_setup(ctx, gpe_context, batch);
1194 gpe->media_object(ctx, gpe_context, batch, param);
1195 gpe->media_state_flush(ctx, gpe_context, batch);
1197 gpe->pipeline_end(ctx, gpe_context, batch);
1199 intel_batchbuffer_end_atomic(batch);
1201 intel_batchbuffer_flush(batch);
1205 gen9_avc_run_kernel_media_object_walker(VADriverContextP ctx,
1206 struct intel_encoder_context *encoder_context,
1207 struct i965_gpe_context *gpe_context,
1209 struct gpe_media_object_walker_parameter *param)
1211 struct i965_driver_data *i965 = i965_driver_data(ctx);
1212 struct i965_gpe_table *gpe = &i965->gpe_table;
1213 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1214 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1216 struct intel_batchbuffer *batch = encoder_context->base.batch;
1217 struct encoder_status_buffer_internal *status_buffer;
1218 struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1223 intel_batchbuffer_start_atomic(batch, 0x1000);
1225 intel_batchbuffer_emit_mi_flush(batch);
1227 status_buffer = &(avc_ctx->status_buffer);
1228 memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1229 mi_store_data_imm.bo = status_buffer->bo;
1230 mi_store_data_imm.offset = status_buffer->media_index_offset;
1231 mi_store_data_imm.dw0 = media_function;
1232 gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1234 gpe->pipeline_setup(ctx, gpe_context, batch);
1235 gpe->media_object_walker(ctx, gpe_context, batch, param);
1236 gpe->media_state_flush(ctx, gpe_context, batch);
1238 gpe->pipeline_end(ctx, gpe_context, batch);
1240 intel_batchbuffer_end_atomic(batch);
1242 intel_batchbuffer_flush(batch);
1246 gen9_init_gpe_context_avc(VADriverContextP ctx,
1247 struct i965_gpe_context *gpe_context,
1248 struct encoder_kernel_parameter *kernel_param)
1250 struct i965_driver_data *i965 = i965_driver_data(ctx);
1252 gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
1254 gpe_context->sampler.entry_size = 0;
1255 gpe_context->sampler.max_entries = 0;
1257 if (kernel_param->sampler_size) {
1258 gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
1259 gpe_context->sampler.max_entries = 1;
1262 gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
1263 gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
1265 gpe_context->surface_state_binding_table.max_entries = MAX_AVC_ENCODER_SURFACES;
1266 gpe_context->surface_state_binding_table.binding_table_offset = 0;
1267 gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64);
1268 gpe_context->surface_state_binding_table.length = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_AVC_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
1270 if (i965->intel.eu_total > 0)
1271 gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
1273 gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
1275 gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
1276 gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
1277 gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
1278 gpe_context->vfe_state.curbe_allocation_size -
1279 ((gpe_context->idrt.entry_size >> 5) *
1280 gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
1281 gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
1282 gpe_context->vfe_state.gpgpu_mode = 0;
1286 gen9_init_vfe_scoreboard_avc(struct i965_gpe_context *gpe_context,
1287 struct encoder_scoreboard_parameter *scoreboard_param)
1289 gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
1290 gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
1291 gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
1293 if (scoreboard_param->walkpat_flag) {
1294 gpe_context->vfe_desc5.scoreboard0.mask = 0x0F;
1295 gpe_context->vfe_desc5.scoreboard0.type = 1;
1297 gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0x0;
1298 gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0xF;
1300 gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
1301 gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xE;
1303 gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0xF;
1304 gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0x3;
1306 gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
1307 gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0x1;
1310 gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0xF;
1311 gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0x0;
1314 gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0x0;
1315 gpe_context->vfe_desc6.scoreboard1.delta_y1 = 0xF;
1318 gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0x1;
1319 gpe_context->vfe_desc6.scoreboard1.delta_y2 = 0xF;
1322 gpe_context->vfe_desc6.scoreboard1.delta_x3 = 0xF;
1323 gpe_context->vfe_desc6.scoreboard1.delta_y3 = 0xF;
1326 gpe_context->vfe_desc7.scoreboard2.delta_x4 = 0xF;
1327 gpe_context->vfe_desc7.scoreboard2.delta_y4 = 0x1;
1330 gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0x0;
1331 gpe_context->vfe_desc7.scoreboard2.delta_y5 = 0xE;
1334 gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0x1;
1335 gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
1338 gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0xF;
1339 gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0xE;
1343 VME pipeline related function
1347 scaling kernel related function
1350 gen9_avc_set_curbe_scaling4x(VADriverContextP ctx,
1351 struct encode_state *encode_state,
1352 struct i965_gpe_context *gpe_context,
1353 struct intel_encoder_context *encoder_context,
1356 gen9_avc_scaling4x_curbe_data *curbe_cmd;
1357 struct scaling_param *surface_param = (struct scaling_param *)param;
1359 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1364 memset(curbe_cmd, 0, sizeof(gen9_avc_scaling4x_curbe_data));
1366 curbe_cmd->dw0.input_picture_width = surface_param->input_frame_width;
1367 curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1369 curbe_cmd->dw1.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1370 curbe_cmd->dw2.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1373 curbe_cmd->dw5.flatness_threshold = 128;
1374 curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1375 curbe_cmd->dw7.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1376 curbe_cmd->dw8.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1378 if (curbe_cmd->dw6.enable_mb_flatness_check ||
1379 curbe_cmd->dw7.enable_mb_variance_output ||
1380 curbe_cmd->dw8.enable_mb_pixel_average_output) {
1381 curbe_cmd->dw10.mbv_proc_stat_bti = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1384 i965_gpe_context_unmap_curbe(gpe_context);
1389 gen95_avc_set_curbe_scaling4x(VADriverContextP ctx,
1390 struct encode_state *encode_state,
1391 struct i965_gpe_context *gpe_context,
1392 struct intel_encoder_context *encoder_context,
1395 gen95_avc_scaling4x_curbe_data *curbe_cmd;
1396 struct scaling_param *surface_param = (struct scaling_param *)param;
1398 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1403 memset(curbe_cmd, 0, sizeof(gen95_avc_scaling4x_curbe_data));
1405 curbe_cmd->dw0.input_picture_width = surface_param->input_frame_width;
1406 curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1408 curbe_cmd->dw1.input_y_bti_frame = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1409 curbe_cmd->dw2.output_y_bti_frame = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1411 if (surface_param->enable_mb_flatness_check)
1412 curbe_cmd->dw5.flatness_threshold = 128;
1413 curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1414 curbe_cmd->dw6.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1415 curbe_cmd->dw6.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1416 curbe_cmd->dw6.enable_block8x8_statistics_output = surface_param->blk8x8_stat_enabled;
1418 if (curbe_cmd->dw6.enable_mb_flatness_check ||
1419 curbe_cmd->dw6.enable_mb_variance_output ||
1420 curbe_cmd->dw6.enable_mb_pixel_average_output) {
1421 curbe_cmd->dw8.mbv_proc_stat_bti_frame = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1424 i965_gpe_context_unmap_curbe(gpe_context);
1429 gen9_avc_set_curbe_scaling2x(VADriverContextP ctx,
1430 struct encode_state *encode_state,
1431 struct i965_gpe_context *gpe_context,
1432 struct intel_encoder_context *encoder_context,
1435 gen9_avc_scaling2x_curbe_data *curbe_cmd;
1436 struct scaling_param *surface_param = (struct scaling_param *)param;
1438 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1443 memset(curbe_cmd, 0, sizeof(gen9_avc_scaling2x_curbe_data));
1445 curbe_cmd->dw0.input_picture_width = surface_param->input_frame_width;
1446 curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1448 curbe_cmd->dw8.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1449 curbe_cmd->dw9.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1451 i965_gpe_context_unmap_curbe(gpe_context);
1456 gen9_avc_send_surface_scaling(VADriverContextP ctx,
1457 struct encode_state *encode_state,
1458 struct i965_gpe_context *gpe_context,
1459 struct intel_encoder_context *encoder_context,
1462 struct scaling_param *surface_param = (struct scaling_param *)param;
1463 unsigned int surface_format;
1464 unsigned int res_size;
1466 if (surface_param->scaling_out_use_32unorm_surf_fmt)
1467 surface_format = I965_SURFACEFORMAT_R32_UNORM;
1468 else if (surface_param->scaling_out_use_16unorm_surf_fmt)
1469 surface_format = I965_SURFACEFORMAT_R16_UNORM;
1471 surface_format = I965_SURFACEFORMAT_R8_UNORM;
1473 gen9_add_2d_gpe_surface(ctx, gpe_context,
1474 surface_param->input_surface,
1475 0, 1, surface_format,
1476 GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX);
1478 gen9_add_2d_gpe_surface(ctx, gpe_context,
1479 surface_param->output_surface,
1480 0, 1, surface_format,
1481 GEN9_AVC_SCALING_FRAME_DST_Y_INDEX);
1483 /*add buffer mv_proc_stat, here need change*/
1484 if (surface_param->mbv_proc_stat_enabled) {
1485 res_size = 16 * (surface_param->input_frame_width / 16) * (surface_param->input_frame_height / 16) * sizeof(unsigned int);
1487 gen9_add_buffer_gpe_surface(ctx,
1489 surface_param->pres_mbv_proc_stat_buffer,
1493 GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1494 } else if (surface_param->enable_mb_flatness_check) {
1495 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
1496 surface_param->pres_flatness_check_surface,
1498 I965_SURFACEFORMAT_R8_UNORM,
1499 GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1506 gen9_avc_kernel_scaling(VADriverContextP ctx,
1507 struct encode_state *encode_state,
1508 struct intel_encoder_context *encoder_context,
1511 struct i965_driver_data *i965 = i965_driver_data(ctx);
1512 struct i965_gpe_table *gpe = &i965->gpe_table;
1513 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1514 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1515 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1516 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1517 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
1519 struct i965_gpe_context *gpe_context;
1520 struct scaling_param surface_param;
1521 struct object_surface *obj_surface;
1522 struct gen9_surface_avc *avc_priv_surface;
1523 struct gpe_media_object_walker_parameter media_object_walker_param;
1524 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
1525 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
1526 int media_function = 0;
1529 obj_surface = encode_state->reconstructed_object;
1530 avc_priv_surface = obj_surface->private_data;
1532 memset(&surface_param, 0, sizeof(struct scaling_param));
1534 case INTEL_ENC_HME_4x : {
1535 media_function = INTEL_MEDIA_STATE_4X_SCALING;
1536 kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1537 downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
1538 downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
1540 surface_param.input_surface = encode_state->input_yuv_object ;
1541 surface_param.input_frame_width = generic_state->frame_width_in_pixel ;
1542 surface_param.input_frame_height = generic_state->frame_height_in_pixel ;
1544 surface_param.output_surface = avc_priv_surface->scaled_4x_surface_obj ;
1545 surface_param.output_frame_width = generic_state->frame_width_4x ;
1546 surface_param.output_frame_height = generic_state->frame_height_4x ;
1548 surface_param.enable_mb_flatness_check = avc_state->flatness_check_enable;
1549 surface_param.enable_mb_variance_output = avc_state->mb_status_enable;
1550 surface_param.enable_mb_pixel_average_output = avc_state->mb_status_enable;
1552 surface_param.blk8x8_stat_enabled = 0 ;
1553 surface_param.use_4x_scaling = 1 ;
1554 surface_param.use_16x_scaling = 0 ;
1555 surface_param.use_32x_scaling = 0 ;
1558 case INTEL_ENC_HME_16x : {
1559 media_function = INTEL_MEDIA_STATE_16X_SCALING;
1560 kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1561 downscaled_width_in_mb = generic_state->downscaled_width_16x_in_mb;
1562 downscaled_height_in_mb = generic_state->downscaled_height_16x_in_mb;
1564 surface_param.input_surface = avc_priv_surface->scaled_4x_surface_obj ;
1565 surface_param.input_frame_width = generic_state->frame_width_4x ;
1566 surface_param.input_frame_height = generic_state->frame_height_4x ;
1568 surface_param.output_surface = avc_priv_surface->scaled_16x_surface_obj ;
1569 surface_param.output_frame_width = generic_state->frame_width_16x ;
1570 surface_param.output_frame_height = generic_state->frame_height_16x ;
1572 surface_param.enable_mb_flatness_check = 0 ;
1573 surface_param.enable_mb_variance_output = 0 ;
1574 surface_param.enable_mb_pixel_average_output = 0 ;
1576 surface_param.blk8x8_stat_enabled = 0 ;
1577 surface_param.use_4x_scaling = 0 ;
1578 surface_param.use_16x_scaling = 1 ;
1579 surface_param.use_32x_scaling = 0 ;
1583 case INTEL_ENC_HME_32x : {
1584 media_function = INTEL_MEDIA_STATE_32X_SCALING;
1585 kernel_idx = GEN9_AVC_KERNEL_SCALING_2X_IDX;
1586 downscaled_width_in_mb = generic_state->downscaled_width_32x_in_mb;
1587 downscaled_height_in_mb = generic_state->downscaled_height_32x_in_mb;
1589 surface_param.input_surface = avc_priv_surface->scaled_16x_surface_obj ;
1590 surface_param.input_frame_width = generic_state->frame_width_16x ;
1591 surface_param.input_frame_height = generic_state->frame_height_16x ;
1593 surface_param.output_surface = avc_priv_surface->scaled_32x_surface_obj ;
1594 surface_param.output_frame_width = generic_state->frame_width_32x ;
1595 surface_param.output_frame_height = generic_state->frame_height_32x ;
1597 surface_param.enable_mb_flatness_check = 0 ;
1598 surface_param.enable_mb_variance_output = 0 ;
1599 surface_param.enable_mb_pixel_average_output = 0 ;
1601 surface_param.blk8x8_stat_enabled = 0 ;
1602 surface_param.use_4x_scaling = 0 ;
1603 surface_param.use_16x_scaling = 0 ;
1604 surface_param.use_32x_scaling = 1 ;
1612 gpe_context = &(avc_ctx->context_scaling.gpe_contexts[kernel_idx]);
1614 gpe->context_init(ctx, gpe_context);
1615 gpe->reset_binding_table(ctx, gpe_context);
1617 if (surface_param.use_32x_scaling) {
1618 generic_ctx->pfn_set_curbe_scaling2x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1620 generic_ctx->pfn_set_curbe_scaling4x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1623 if (surface_param.use_32x_scaling) {
1624 surface_param.scaling_out_use_16unorm_surf_fmt = 1 ;
1625 surface_param.scaling_out_use_32unorm_surf_fmt = 0 ;
1627 surface_param.scaling_out_use_16unorm_surf_fmt = 0 ;
1628 surface_param.scaling_out_use_32unorm_surf_fmt = 1 ;
1631 if (surface_param.use_4x_scaling) {
1632 if (avc_state->mb_status_supported) {
1633 surface_param.enable_mb_flatness_check = 0;
1634 surface_param.mbv_proc_stat_enabled = (surface_param.use_4x_scaling) ? (avc_state->mb_status_enable || avc_state->flatness_check_enable) : 0 ;
1635 surface_param.pres_mbv_proc_stat_buffer = &(avc_ctx->res_mb_status_buffer);
1638 surface_param.enable_mb_flatness_check = (surface_param.use_4x_scaling) ? avc_state->flatness_check_enable : 0;
1639 surface_param.mbv_proc_stat_enabled = 0 ;
1640 surface_param.pres_flatness_check_surface = &(avc_ctx->res_flatness_check_surface);
1644 generic_ctx->pfn_send_scaling_surface(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1646 /* setup the interface data */
1647 gpe->setup_interface_data(ctx, gpe_context);
1649 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
1650 if (surface_param.use_32x_scaling) {
1651 kernel_walker_param.resolution_x = downscaled_width_in_mb ;
1652 kernel_walker_param.resolution_y = downscaled_height_in_mb ;
1654 /* the scaling is based on 8x8 blk level */
1655 kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
1656 kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
1658 kernel_walker_param.no_dependency = 1;
1660 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
1662 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
1665 &media_object_walker_param);
1667 return VA_STATUS_SUCCESS;
1671 frame/mb brc related function
1674 gen9_avc_init_mfx_avc_img_state(VADriverContextP ctx,
1675 struct encode_state *encode_state,
1676 struct intel_encoder_context *encoder_context,
1677 struct gen9_mfx_avc_img_state *pstate)
1679 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1680 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1681 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1683 VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
1684 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
1686 memset(pstate, 0, sizeof(*pstate));
1688 pstate->dw0.dword_length = (sizeof(struct gen9_mfx_avc_img_state)) / 4 - 2;
1689 pstate->dw0.sub_opcode_b = 0;
1690 pstate->dw0.sub_opcode_a = 0;
1691 pstate->dw0.command_opcode = 1;
1692 pstate->dw0.pipeline = 2;
1693 pstate->dw0.command_type = 3;
1695 pstate->dw1.frame_size_in_mbs = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs ;
1697 pstate->dw2.frame_width_in_mbs_minus1 = generic_state->frame_width_in_mbs - 1;
1698 pstate->dw2.frame_height_in_mbs_minus1 = generic_state->frame_height_in_mbs - 1;
1700 pstate->dw3.image_structure = 0;//frame is zero
1701 pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1702 pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
1703 pstate->dw3.brc_domain_rate_control_enable = 0;//0,set for non-vdenc mode;
1704 pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
1705 pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
1707 pstate->dw4.field_picture_flag = 0;
1708 pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
1709 pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
1710 pstate->dw4.transform_8x8_idct_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag;
1711 pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
1712 pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
1713 pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
1714 pstate->dw4.mb_mv_format_flag = 1;
1715 pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
1716 pstate->dw4.mv_unpacked_flag = 1;
1717 pstate->dw4.insert_test_flag = 0;
1718 pstate->dw4.load_slice_pointer_flag = 0;
1719 pstate->dw4.macroblock_stat_enable = 0; /* disable in the first pass */
1720 pstate->dw4.minimum_frame_size = 0;
1721 pstate->dw5.intra_mb_max_bit_flag = 1;
1722 pstate->dw5.inter_mb_max_bit_flag = 1;
1723 pstate->dw5.frame_size_over_flag = 1;
1724 pstate->dw5.frame_size_under_flag = 1;
1725 pstate->dw5.intra_mb_ipcm_flag = 1;
1726 pstate->dw5.mb_rate_ctrl_flag = 0;
1727 pstate->dw5.non_first_pass_flag = 0;
1728 pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
1729 pstate->dw5.aq_chroma_disable = 1;
1730 if (pstate->dw4.entropy_coding_flag && (avc_state->tq_enable)) {
1731 pstate->dw5.aq_enable = avc_state->tq_enable;
1732 pstate->dw5.aq_rounding = avc_state->tq_rounding;
1734 pstate->dw5.aq_rounding = 0;
1737 pstate->dw6.intra_mb_max_size = 2700;
1738 pstate->dw6.inter_mb_max_size = 4095;
1740 pstate->dw8.slice_delta_qp_max0 = 0;
1741 pstate->dw8.slice_delta_qp_max1 = 0;
1742 pstate->dw8.slice_delta_qp_max2 = 0;
1743 pstate->dw8.slice_delta_qp_max3 = 0;
1745 pstate->dw9.slice_delta_qp_min0 = 0;
1746 pstate->dw9.slice_delta_qp_min1 = 0;
1747 pstate->dw9.slice_delta_qp_min2 = 0;
1748 pstate->dw9.slice_delta_qp_min3 = 0;
1750 pstate->dw10.frame_bitrate_min = 0;
1751 pstate->dw10.frame_bitrate_min_unit = 1;
1752 pstate->dw10.frame_bitrate_min_unit_mode = 1;
1753 pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
1754 pstate->dw10.frame_bitrate_max_unit = 1;
1755 pstate->dw10.frame_bitrate_max_unit_mode = 1;
1757 pstate->dw11.frame_bitrate_min_delta = 0;
1758 pstate->dw11.frame_bitrate_max_delta = 0;
1760 pstate->dw12.vad_error_logic = 1;
1761 /* set paramters DW19/DW20 for slices */
1764 void gen9_avc_set_image_state(VADriverContextP ctx,
1765 struct encode_state *encode_state,
1766 struct intel_encoder_context *encoder_context,
1767 struct i965_gpe_resource *gpe_resource)
1769 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1770 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
1773 unsigned int * data;
1774 struct gen9_mfx_avc_img_state cmd;
1776 pdata = i965_map_gpe_resource(gpe_resource);
1781 gen9_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
1782 for (i = 0; i < generic_state->num_pak_passes; i++) {
1785 cmd.dw4.macroblock_stat_enable = 0;
1786 cmd.dw5.non_first_pass_flag = 0;
1788 cmd.dw4.macroblock_stat_enable = 1;
1789 cmd.dw5.non_first_pass_flag = 1;
1790 cmd.dw5.intra_mb_ipcm_flag = 1;
1793 cmd.dw5.mb_rate_ctrl_flag = 0;
1794 memcpy(pdata, &cmd, sizeof(struct gen9_mfx_avc_img_state));
1795 data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
1796 *data = MI_BATCH_BUFFER_END;
1798 pdata += INTEL_AVC_IMAGE_STATE_CMD_SIZE;
1800 i965_unmap_gpe_resource(gpe_resource);
1804 void gen9_avc_set_image_state_non_brc(VADriverContextP ctx,
1805 struct encode_state *encode_state,
1806 struct intel_encoder_context *encoder_context,
1807 struct i965_gpe_resource *gpe_resource)
1809 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1810 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
1813 unsigned int * data;
1814 struct gen9_mfx_avc_img_state cmd;
1816 pdata = i965_map_gpe_resource(gpe_resource);
1821 gen9_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
1823 if (generic_state->curr_pak_pass == 0) {
1824 cmd.dw4.macroblock_stat_enable = 0;
1825 cmd.dw5.non_first_pass_flag = 0;
1828 cmd.dw4.macroblock_stat_enable = 1;
1829 cmd.dw5.non_first_pass_flag = 0;
1830 cmd.dw5.intra_mb_ipcm_flag = 1;
1833 cmd.dw5.mb_rate_ctrl_flag = 0;
1834 memcpy(pdata, &cmd, sizeof(struct gen9_mfx_avc_img_state));
1835 data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
1836 *data = MI_BATCH_BUFFER_END;
1838 i965_unmap_gpe_resource(gpe_resource);
1843 gen95_avc_calc_lambda_table(VADriverContextP ctx,
1844 struct encode_state *encode_state,
1845 struct intel_encoder_context *encoder_context)
1847 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1848 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1849 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1850 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
1851 unsigned int value, inter, intra;
1852 unsigned int rounding_value = 0;
1853 unsigned int size = 0;
1856 unsigned int * lambda_table = (unsigned int *)(avc_state->lamda_value_lut);
1862 size = AVC_QP_MAX * 2 * sizeof(unsigned int);
1863 switch (generic_state->frame_type) {
1865 memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_i_frame[0][0], size * sizeof(unsigned char));
1868 memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_p_frame[0][0], size * sizeof(unsigned char));
1871 memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_b_frame[0][0], size * sizeof(unsigned char));
1878 for (i = 0; i < AVC_QP_MAX ; i++) {
1879 for (col = 0; col < 2; col++) {
1880 value = *(lambda_table + i * 2 + col);
1881 intra = value >> 16;
1883 if (intra < GEN95_AVC_MAX_LAMBDA) {
1884 if (intra == 0xfffa) {
1885 intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
1889 intra = intra << 16;
1890 inter = value & 0xffff;
1892 if (inter < GEN95_AVC_MAX_LAMBDA) {
1893 if (inter == 0xffef) {
1894 if (generic_state->frame_type == SLICE_TYPE_P) {
1895 if (avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE)
1896 rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
1898 rounding_value = avc_state->rounding_inter_p;
1899 } else if (generic_state->frame_type == SLICE_TYPE_B) {
1900 if (pic_param->pic_fields.bits.reference_pic_flag) {
1901 if (avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
1902 rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
1904 rounding_value = avc_state->rounding_inter_b_ref;
1906 if (avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE)
1907 rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
1909 rounding_value = avc_state->rounding_inter_b;
1913 inter = 0xf000 + rounding_value;
1915 *(lambda_table + i * 2 + col) = intra + inter;
1921 gen9_avc_init_brc_const_data(VADriverContextP ctx,
1922 struct encode_state *encode_state,
1923 struct intel_encoder_context *encoder_context)
1925 struct i965_driver_data *i965 = i965_driver_data(ctx);
1926 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1927 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1928 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1929 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1931 struct i965_gpe_resource *gpe_resource = NULL;
1932 unsigned char * data = NULL;
1933 unsigned char * data_tmp = NULL;
1934 unsigned int size = 0;
1935 unsigned int table_idx = 0;
1936 unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
1939 struct object_surface *obj_surface;
1940 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
1941 VASurfaceID surface_id;
1942 unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
1944 gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
1945 assert(gpe_resource);
1947 i965_zero_gpe_resource(gpe_resource);
1949 data = i965_map_gpe_resource(gpe_resource);
1952 table_idx = slice_type_kernel[generic_state->frame_type];
1954 /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
1955 size = sizeof(gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
1956 memcpy(data, gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb, size * sizeof(unsigned char));
1960 /* skip threshold table*/
1962 switch (generic_state->frame_type) {
1964 memcpy(data, gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
1967 memcpy(data, gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
1970 /*SLICE_TYPE_I,no change */
1974 if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) {
1975 for (i = 0; i < AVC_QP_MAX ; i++) {
1976 *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
1981 /*fill the qp for ref list*/
1982 size = 32 + 32 + 32 + 160;
1983 memset(data, 0xff, 32);
1984 memset(data + 32 + 32, 0xff, 32);
1985 switch (generic_state->frame_type) {
1986 case SLICE_TYPE_P: {
1987 for (i = 0 ; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
1988 surface_id = slice_param->RefPicList0[i].picture_id;
1989 obj_surface = SURFACE(surface_id);
1992 *(data + i) = avc_state->list_ref_idx[0][i];//?
1996 case SLICE_TYPE_B: {
1997 data = data + 32 + 32;
1998 for (i = 0 ; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
1999 surface_id = slice_param->RefPicList1[i].picture_id;
2000 obj_surface = SURFACE(surface_id);
2003 *(data + i) = avc_state->list_ref_idx[1][i];//?
2006 data = data - 32 - 32;
2008 for (i = 0 ; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
2009 surface_id = slice_param->RefPicList0[i].picture_id;
2010 obj_surface = SURFACE(surface_id);
2013 *(data + i) = avc_state->list_ref_idx[0][i];//?
2018 /*SLICE_TYPE_I,no change */
2023 /*mv cost and mode cost*/
2025 memcpy(data, (unsigned char *)&gen9_avc_mode_mv_cost_table[table_idx][0][0], size * sizeof(unsigned char));
2027 if (avc_state->old_mode_cost_enable) {
2029 for (i = 0; i < AVC_QP_MAX ; i++) {
2030 *(data_tmp + 3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2035 if (avc_state->ftq_skip_threshold_lut_input_enable) {
2036 for (i = 0; i < AVC_QP_MAX ; i++) {
2037 *(data + (i * 32) + 24) =
2038 *(data + (i * 32) + 25) =
2039 *(data + (i * 32) + 27) =
2040 *(data + (i * 32) + 28) =
2041 *(data + (i * 32) + 29) =
2042 *(data + (i * 32) + 30) =
2043 *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2051 memcpy(data, (unsigned char *)&gen9_avc_ref_cost[table_idx][0], size * sizeof(unsigned char));
2056 if (avc_state->adaptive_intra_scaling_enable) {
2057 memcpy(data, (unsigned char *)gen9_avc_adaptive_intra_scaling_factor, size * sizeof(unsigned char));
2059 memcpy(data, (unsigned char *)gen9_avc_intra_scaling_factor, size * sizeof(unsigned char));
2062 if (IS_KBL(i965->intel.device_info) ||
2063 IS_GLK(i965->intel.device_info)) {
2067 memcpy(data, (unsigned char *)gen95_avc_lambda_data, size * sizeof(unsigned char));
2071 memcpy(data, (unsigned char *)gen95_avc_ftq25, size * sizeof(unsigned char));
2074 i965_unmap_gpe_resource(gpe_resource);
2078 gen9_avc_init_brc_const_data_old(VADriverContextP ctx,
2079 struct encode_state *encode_state,
2080 struct intel_encoder_context *encoder_context)
2082 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2083 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2084 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2085 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2087 struct i965_gpe_resource *gpe_resource = NULL;
2088 unsigned int * data = NULL;
2089 unsigned int * data_tmp = NULL;
2090 unsigned int size = 0;
2091 unsigned int table_idx = 0;
2092 unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2093 unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
2096 gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
2097 assert(gpe_resource);
2099 i965_zero_gpe_resource(gpe_resource);
2101 data = i965_map_gpe_resource(gpe_resource);
2104 table_idx = slice_type_kernel[generic_state->frame_type];
2106 /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
2107 size = sizeof(gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
2108 memcpy(data, gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb, size * sizeof(unsigned char));
2112 /* skip threshold table*/
2114 switch (generic_state->frame_type) {
2116 memcpy(data, gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2119 memcpy(data, gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2122 /*SLICE_TYPE_I,no change */
2126 if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) {
2127 for (i = 0; i < AVC_QP_MAX ; i++) {
2128 *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
2133 /*fill the qp for ref list*/
2139 /*mv cost and mode cost*/
2141 memcpy(data, (unsigned char *)&gen75_avc_mode_mv_cost_table[table_idx][0][0], size * sizeof(unsigned char));
2143 if (avc_state->old_mode_cost_enable) {
2145 for (i = 0; i < AVC_QP_MAX ; i++) {
2146 *(data_tmp + 3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2151 if (avc_state->ftq_skip_threshold_lut_input_enable) {
2152 for (i = 0; i < AVC_QP_MAX ; i++) {
2153 *(data + (i * 32) + 24) =
2154 *(data + (i * 32) + 25) =
2155 *(data + (i * 32) + 27) =
2156 *(data + (i * 32) + 28) =
2157 *(data + (i * 32) + 29) =
2158 *(data + (i * 32) + 30) =
2159 *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2167 memcpy(data, (unsigned char *)&gen9_avc_ref_cost[table_idx][0], size * sizeof(unsigned char));
2169 i965_unmap_gpe_resource(gpe_resource);
2172 gen9_avc_set_curbe_brc_init_reset(VADriverContextP ctx,
2173 struct encode_state *encode_state,
2174 struct i965_gpe_context *gpe_context,
2175 struct intel_encoder_context *encoder_context,
2178 gen9_avc_brc_init_reset_curbe_data *cmd;
2179 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2180 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2181 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2182 double input_bits_per_frame = 0;
2183 double bps_ratio = 0;
2184 VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2185 struct avc_param common_param;
2187 cmd = i965_gpe_context_map_curbe(gpe_context);
2192 memcpy(cmd, &gen9_avc_brc_init_reset_curbe_init_data, sizeof(gen9_avc_brc_init_reset_curbe_data));
2194 memset(&common_param, 0, sizeof(common_param));
2195 common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2196 common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2197 common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2198 common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2199 common_param.frames_per_100s = generic_state->frames_per_100s;
2200 common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2201 common_param.target_bit_rate = generic_state->target_bit_rate;
2203 cmd->dw0.profile_level_max_frame = i965_avc_get_profile_level_max_frame(&common_param, seq_param->level_idc);
2204 cmd->dw1.init_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
2205 cmd->dw2.buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
2206 cmd->dw3.average_bit_rate = generic_state->target_bit_rate * 1000;
2207 cmd->dw4.max_bit_rate = generic_state->max_bit_rate * 1000;
2208 cmd->dw8.gop_p = (generic_state->gop_ref_distance) ? ((generic_state->gop_size - 1) / generic_state->gop_ref_distance) : 0;
2209 cmd->dw9.gop_b = (generic_state->gop_size - 1 - cmd->dw8.gop_p);
2210 cmd->dw9.frame_width_in_bytes = generic_state->frame_width_in_pixel;
2211 cmd->dw10.frame_height_in_bytes = generic_state->frame_height_in_pixel;
2212 cmd->dw12.no_slices = avc_state->slice_num;
2215 if (seq_param->vui_parameters_present_flag && generic_state->internal_rate_mode != INTEL_BRC_AVBR) {
2216 cmd->dw4.max_bit_rate = cmd->dw4.max_bit_rate;
2217 if (generic_state->internal_rate_mode == VA_RC_CBR) {
2218 cmd->dw3.average_bit_rate = cmd->dw4.max_bit_rate;
2223 cmd->dw6.frame_rate_m = generic_state->frames_per_100s;
2224 cmd->dw7.frame_rate_d = 100;
2225 cmd->dw8.brc_flag = 0;
2226 cmd->dw8.brc_flag |= (generic_state->mb_brc_enabled) ? 0 : 0x8000;
2229 if (generic_state->internal_rate_mode == VA_RC_CBR) {
2231 cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2232 cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISCBR;
2234 } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
2236 if (cmd->dw4.max_bit_rate < cmd->dw3.average_bit_rate) {
2237 cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate << 1;
2239 cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISVBR;
2241 } else if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2243 cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2244 cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISAVBR;
2247 //igonre icq/vcm/qvbr
2249 cmd->dw10.avbr_accuracy = generic_state->avbr_curracy;
2250 cmd->dw11.avbr_convergence = generic_state->avbr_convergence;
2253 input_bits_per_frame = (double)(cmd->dw4.max_bit_rate) * (double)(cmd->dw7.frame_rate_d) / (double)(cmd->dw6.frame_rate_m);;
2255 if (cmd->dw2.buf_size_in_bits == 0) {
2256 cmd->dw2.buf_size_in_bits = (unsigned int)(input_bits_per_frame * 4);
2259 if (cmd->dw1.init_buf_full_in_bits == 0) {
2260 cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits * 7 / 8;
2262 if (cmd->dw1.init_buf_full_in_bits < (unsigned int)(input_bits_per_frame * 2)) {
2263 cmd->dw1.init_buf_full_in_bits = (unsigned int)(input_bits_per_frame * 2);
2265 if (cmd->dw1.init_buf_full_in_bits > cmd->dw2.buf_size_in_bits) {
2266 cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits;
2270 if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2271 cmd->dw2.buf_size_in_bits = 2 * generic_state->target_bit_rate * 1000;
2272 cmd->dw1.init_buf_full_in_bits = (unsigned int)(3 * cmd->dw2.buf_size_in_bits / 4);
2276 bps_ratio = input_bits_per_frame / (cmd->dw2.buf_size_in_bits / 30.0);
2277 bps_ratio = (bps_ratio < 0.1) ? 0.1 : (bps_ratio > 3.5) ? 3.5 : bps_ratio;
2280 cmd->dw16.deviation_threshold_0_pand_b = (unsigned int)(-50 * pow(0.90, bps_ratio));
2281 cmd->dw16.deviation_threshold_1_pand_b = (unsigned int)(-50 * pow(0.66, bps_ratio));
2282 cmd->dw16.deviation_threshold_2_pand_b = (unsigned int)(-50 * pow(0.46, bps_ratio));
2283 cmd->dw16.deviation_threshold_3_pand_b = (unsigned int)(-50 * pow(0.3, bps_ratio));
2284 cmd->dw17.deviation_threshold_4_pand_b = (unsigned int)(50 * pow(0.3, bps_ratio));
2285 cmd->dw17.deviation_threshold_5_pand_b = (unsigned int)(50 * pow(0.46, bps_ratio));
2286 cmd->dw17.deviation_threshold_6_pand_b = (unsigned int)(50 * pow(0.7, bps_ratio));
2287 cmd->dw17.deviation_threshold_7_pand_b = (unsigned int)(50 * pow(0.9, bps_ratio));
2288 cmd->dw18.deviation_threshold_0_vbr = (unsigned int)(-50 * pow(0.9, bps_ratio));
2289 cmd->dw18.deviation_threshold_1_vbr = (unsigned int)(-50 * pow(0.7, bps_ratio));
2290 cmd->dw18.deviation_threshold_2_vbr = (unsigned int)(-50 * pow(0.5, bps_ratio));
2291 cmd->dw18.deviation_threshold_3_vbr = (unsigned int)(-50 * pow(0.3, bps_ratio));
2292 cmd->dw19.deviation_threshold_4_vbr = (unsigned int)(100 * pow(0.4, bps_ratio));
2293 cmd->dw19.deviation_threshold_5_vbr = (unsigned int)(100 * pow(0.5, bps_ratio));
2294 cmd->dw19.deviation_threshold_6_vbr = (unsigned int)(100 * pow(0.75, bps_ratio));
2295 cmd->dw19.deviation_threshold_7_vbr = (unsigned int)(100 * pow(0.9, bps_ratio));
2296 cmd->dw20.deviation_threshold_0_i = (unsigned int)(-50 * pow(0.8, bps_ratio));
2297 cmd->dw20.deviation_threshold_1_i = (unsigned int)(-50 * pow(0.6, bps_ratio));
2298 cmd->dw20.deviation_threshold_2_i = (unsigned int)(-50 * pow(0.34, bps_ratio));
2299 cmd->dw20.deviation_threshold_3_i = (unsigned int)(-50 * pow(0.2, bps_ratio));
2300 cmd->dw21.deviation_threshold_4_i = (unsigned int)(50 * pow(0.2, bps_ratio));
2301 cmd->dw21.deviation_threshold_5_i = (unsigned int)(50 * pow(0.4, bps_ratio));
2302 cmd->dw21.deviation_threshold_6_i = (unsigned int)(50 * pow(0.66, bps_ratio));
2303 cmd->dw21.deviation_threshold_7_i = (unsigned int)(50 * pow(0.9, bps_ratio));
2305 cmd->dw22.sliding_window_size = generic_state->frames_per_window_size;
2307 i965_gpe_context_unmap_curbe(gpe_context);
2313 gen9_avc_send_surface_brc_init_reset(VADriverContextP ctx,
2314 struct encode_state *encode_state,
2315 struct i965_gpe_context *gpe_context,
2316 struct intel_encoder_context *encoder_context,
2319 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2320 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2322 gen9_add_buffer_gpe_surface(ctx,
2324 &avc_ctx->res_brc_history_buffer,
2326 avc_ctx->res_brc_history_buffer.size,
2328 GEN9_AVC_BRC_INIT_RESET_HISTORY_INDEX);
2330 gen9_add_buffer_2d_gpe_surface(ctx,
2332 &avc_ctx->res_brc_dist_data_surface,
2334 I965_SURFACEFORMAT_R8_UNORM,
2335 GEN9_AVC_BRC_INIT_RESET_DISTORTION_INDEX);
2341 gen9_avc_kernel_brc_init_reset(VADriverContextP ctx,
2342 struct encode_state *encode_state,
2343 struct intel_encoder_context *encoder_context)
2345 struct i965_driver_data *i965 = i965_driver_data(ctx);
2346 struct i965_gpe_table *gpe = &i965->gpe_table;
2347 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2348 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2349 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2350 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
2352 struct i965_gpe_context *gpe_context;
2353 struct gpe_media_object_parameter media_object_param;
2354 struct gpe_media_object_inline_data media_object_inline_data;
2355 int media_function = 0;
2356 int kernel_idx = GEN9_AVC_KERNEL_BRC_INIT;
2358 media_function = INTEL_MEDIA_STATE_BRC_INIT_RESET;
2360 if (generic_state->brc_inited)
2361 kernel_idx = GEN9_AVC_KERNEL_BRC_RESET;
2363 gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2365 gpe->context_init(ctx, gpe_context);
2366 gpe->reset_binding_table(ctx, gpe_context);
2368 generic_ctx->pfn_set_curbe_brc_init_reset(ctx, encode_state, gpe_context, encoder_context, NULL);
2370 generic_ctx->pfn_send_brc_init_reset_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
2372 gpe->setup_interface_data(ctx, gpe_context);
2374 memset(&media_object_param, 0, sizeof(media_object_param));
2375 memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
2376 media_object_param.pinline_data = &media_object_inline_data;
2377 media_object_param.inline_size = sizeof(media_object_inline_data);
2379 gen9_avc_run_kernel_media_object(ctx, encoder_context,
2382 &media_object_param);
2384 return VA_STATUS_SUCCESS;
2388 gen9_avc_set_curbe_brc_frame_update(VADriverContextP ctx,
2389 struct encode_state *encode_state,
2390 struct i965_gpe_context *gpe_context,
2391 struct intel_encoder_context *encoder_context,
2394 gen9_avc_frame_brc_update_curbe_data *cmd;
2395 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2396 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2397 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2398 struct object_surface *obj_surface;
2399 struct gen9_surface_avc *avc_priv_surface;
2400 struct avc_param common_param;
2401 VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2403 obj_surface = encode_state->reconstructed_object;
2405 if (!obj_surface || !obj_surface->private_data)
2407 avc_priv_surface = obj_surface->private_data;
2409 cmd = i965_gpe_context_map_curbe(gpe_context);
2414 memcpy(cmd, &gen9_avc_frame_brc_update_curbe_init_data, sizeof(gen9_avc_frame_brc_update_curbe_data));
2416 cmd->dw5.target_size_flag = 0 ;
2417 if (generic_state->brc_init_current_target_buf_full_in_bits > (double)generic_state->brc_init_reset_buf_size_in_bits) {
2419 generic_state->brc_init_current_target_buf_full_in_bits -= (double)generic_state->brc_init_reset_buf_size_in_bits;
2420 cmd->dw5.target_size_flag = 1 ;
2423 if (generic_state->skip_frame_enbale) {
2424 cmd->dw6.num_skip_frames = generic_state->num_skip_frames ;
2425 cmd->dw7.size_skip_frames = generic_state->size_skip_frames;
2427 generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame * generic_state->num_skip_frames;
2430 cmd->dw0.target_size = (unsigned int)generic_state->brc_init_current_target_buf_full_in_bits ;
2431 cmd->dw1.frame_number = generic_state->seq_frame_number ;
2432 cmd->dw2.size_of_pic_headers = generic_state->herder_bytes_inserted << 3 ;
2433 cmd->dw5.cur_frame_type = generic_state->frame_type ;
2434 cmd->dw5.brc_flag = 0 ;
2435 cmd->dw5.brc_flag |= (avc_priv_surface->is_as_ref) ? INTEL_ENCODE_BRCUPDATE_IS_REFERENCE : 0 ;
2437 if (avc_state->multi_pre_enable) {
2438 cmd->dw5.brc_flag |= INTEL_ENCODE_BRCUPDATE_IS_ACTUALQP ;
2439 cmd->dw14.qp_index_of_cur_pic = avc_priv_surface->frame_idx ; //do not know this. use -1
2442 cmd->dw5.max_num_paks = generic_state->num_pak_passes ;
2443 if (avc_state->min_max_qp_enable) {
2444 switch (generic_state->frame_type) {
2446 cmd->dw6.minimum_qp = avc_state->min_qp_i ;
2447 cmd->dw6.maximum_qp = avc_state->max_qp_i ;
2450 cmd->dw6.minimum_qp = avc_state->min_qp_p ;
2451 cmd->dw6.maximum_qp = avc_state->max_qp_p ;
2454 cmd->dw6.minimum_qp = avc_state->min_qp_b ;
2455 cmd->dw6.maximum_qp = avc_state->max_qp_b ;
2459 cmd->dw6.minimum_qp = 0 ;
2460 cmd->dw6.maximum_qp = 0 ;
2462 cmd->dw6.enable_force_skip = avc_state->enable_force_skip ;
2463 cmd->dw6.enable_sliding_window = 0 ;
2465 generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame;
2467 if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2468 cmd->dw3.start_gadj_frame0 = (unsigned int)((10 * generic_state->avbr_convergence) / (double)150);
2469 cmd->dw3.start_gadj_frame1 = (unsigned int)((50 * generic_state->avbr_convergence) / (double)150);
2470 cmd->dw4.start_gadj_frame2 = (unsigned int)((100 * generic_state->avbr_convergence) / (double)150);
2471 cmd->dw4.start_gadj_frame3 = (unsigned int)((150 * generic_state->avbr_convergence) / (double)150);
2472 cmd->dw11.g_rate_ratio_threshold_0 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 40)));
2473 cmd->dw11.g_rate_ratio_threshold_1 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 75)));
2474 cmd->dw12.g_rate_ratio_threshold_2 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 97)));
2475 cmd->dw12.g_rate_ratio_threshold_3 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (103 - 100)));
2476 cmd->dw12.g_rate_ratio_threshold_4 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (125 - 100)));
2477 cmd->dw12.g_rate_ratio_threshold_5 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (160 - 100)));
2480 cmd->dw15.enable_roi = generic_state->brc_roi_enable ;
2482 memset(&common_param, 0, sizeof(common_param));
2483 common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2484 common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2485 common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2486 common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2487 common_param.frames_per_100s = generic_state->frames_per_100s;
2488 common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2489 common_param.target_bit_rate = generic_state->target_bit_rate;
2491 cmd->dw19.user_max_frame = i965_avc_get_profile_level_max_frame(&common_param, seq_param->level_idc);
2492 i965_gpe_context_unmap_curbe(gpe_context);
2498 gen9_avc_send_surface_brc_frame_update(VADriverContextP ctx,
2499 struct encode_state *encode_state,
2500 struct i965_gpe_context *gpe_context,
2501 struct intel_encoder_context *encoder_context,
2504 struct i965_driver_data *i965 = i965_driver_data(ctx);
2505 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2506 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2507 struct brc_param * param = (struct brc_param *)param_brc ;
2508 struct i965_gpe_context * gpe_context_mbenc = param->gpe_context_mbenc;
2509 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2510 unsigned char is_g95 = 0;
2512 if (IS_SKL(i965->intel.device_info) ||
2513 IS_BXT(i965->intel.device_info))
2515 else if (IS_KBL(i965->intel.device_info) ||
2516 IS_GLK(i965->intel.device_info))
2519 /* brc history buffer*/
2520 gen9_add_buffer_gpe_surface(ctx,
2522 &avc_ctx->res_brc_history_buffer,
2524 avc_ctx->res_brc_history_buffer.size,
2526 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX));
2528 /* previous pak buffer*/
2529 gen9_add_buffer_gpe_surface(ctx,
2531 &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
2533 avc_ctx->res_brc_pre_pak_statistics_output_buffer.size,
2535 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX));
2537 /* image state command buffer read only*/
2538 gen9_add_buffer_gpe_surface(ctx,
2540 &avc_ctx->res_brc_image_state_read_buffer,
2542 avc_ctx->res_brc_image_state_read_buffer.size,
2544 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX));
2546 /* image state command buffer write only*/
2547 gen9_add_buffer_gpe_surface(ctx,
2549 &avc_ctx->res_brc_image_state_write_buffer,
2551 avc_ctx->res_brc_image_state_write_buffer.size,
2553 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX));
2555 if (avc_state->mbenc_brc_buffer_size > 0) {
2556 gen9_add_buffer_gpe_surface(ctx,
2558 &(avc_ctx->res_mbenc_brc_buffer),
2560 avc_ctx->res_mbenc_brc_buffer.size,
2562 GEN95_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2564 /* Mbenc curbe input buffer */
2565 gen9_add_dri_buffer_gpe_surface(ctx,
2567 gpe_context_mbenc->dynamic_state.bo,
2569 ALIGN(gpe_context_mbenc->curbe.length, 64),
2570 gpe_context_mbenc->curbe.offset,
2571 GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_READ_INDEX);
2572 /* Mbenc curbe output buffer */
2573 gen9_add_dri_buffer_gpe_surface(ctx,
2575 gpe_context_mbenc->dynamic_state.bo,
2577 ALIGN(gpe_context_mbenc->curbe.length, 64),
2578 gpe_context_mbenc->curbe.offset,
2579 GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2582 /* AVC_ME Distortion 2D surface buffer,input/output. is it res_brc_dist_data_surface*/
2583 gen9_add_buffer_2d_gpe_surface(ctx,
2585 &avc_ctx->res_brc_dist_data_surface,
2587 I965_SURFACEFORMAT_R8_UNORM,
2588 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX));
2590 /* BRC const data 2D surface buffer */
2591 gen9_add_buffer_2d_gpe_surface(ctx,
2593 &avc_ctx->res_brc_const_data_buffer,
2595 I965_SURFACEFORMAT_R8_UNORM,
2596 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX));
2598 /* MB statistical data surface*/
2599 gen9_add_buffer_gpe_surface(ctx,
2601 &avc_ctx->res_mb_status_buffer,
2603 avc_ctx->res_mb_status_buffer.size,
2605 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX));
2611 gen9_avc_kernel_brc_frame_update(VADriverContextP ctx,
2612 struct encode_state *encode_state,
2613 struct intel_encoder_context *encoder_context)
2616 struct i965_driver_data *i965 = i965_driver_data(ctx);
2617 struct i965_gpe_table *gpe = &i965->gpe_table;
2618 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2619 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
2620 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2621 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2622 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2624 struct i965_gpe_context *gpe_context = NULL;
2625 struct gpe_media_object_parameter media_object_param;
2626 struct gpe_media_object_inline_data media_object_inline_data;
2627 int media_function = 0;
2629 unsigned int mb_const_data_buffer_in_use, mb_qp_buffer_in_use;
2630 unsigned int brc_enabled = 0;
2631 unsigned int roi_enable = (generic_state->num_roi > 0) ? 1 : 0;
2632 unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
2634 /* the following set the mbenc curbe*/
2635 struct mbenc_param curbe_mbenc_param ;
2636 struct brc_param curbe_brc_param ;
2638 mb_const_data_buffer_in_use =
2639 generic_state->mb_brc_enabled ||
2642 avc_state->mb_qp_data_enable ||
2643 avc_state->rolling_intra_refresh_enable;
2644 mb_qp_buffer_in_use =
2645 generic_state->mb_brc_enabled ||
2646 generic_state->brc_roi_enable ||
2647 avc_state->mb_qp_data_enable;
2649 switch (generic_state->kernel_mode) {
2650 case INTEL_ENC_KERNEL_NORMAL : {
2651 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
2654 case INTEL_ENC_KERNEL_PERFORMANCE : {
2655 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
2658 case INTEL_ENC_KERNEL_QUALITY : {
2659 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
2667 if (generic_state->frame_type == SLICE_TYPE_P) {
2669 } else if (generic_state->frame_type == SLICE_TYPE_B) {
2673 gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
2674 gpe->context_init(ctx, gpe_context);
2676 memset(&curbe_mbenc_param, 0, sizeof(struct mbenc_param));
2678 curbe_mbenc_param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
2679 curbe_mbenc_param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
2680 curbe_mbenc_param.mbenc_i_frame_dist_in_use = 0;
2681 curbe_mbenc_param.brc_enabled = brc_enabled;
2682 curbe_mbenc_param.roi_enabled = roi_enable;
2684 /* set curbe mbenc*/
2685 generic_ctx->pfn_set_curbe_mbenc(ctx, encode_state, gpe_context, encoder_context, &curbe_mbenc_param);
2687 // gen95 set curbe out of the brc. gen9 do it here
2688 avc_state->mbenc_curbe_set_in_brc_update = !avc_state->decouple_mbenc_curbe_from_brc_enable;
2689 /*begin brc frame update*/
2690 memset(&curbe_brc_param, 0, sizeof(struct brc_param));
2691 curbe_brc_param.gpe_context_mbenc = gpe_context;
2692 media_function = INTEL_MEDIA_STATE_BRC_UPDATE;
2693 kernel_idx = GEN9_AVC_KERNEL_BRC_FRAME_UPDATE;
2694 gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2695 curbe_brc_param.gpe_context_brc_frame_update = gpe_context;
2697 gpe->context_init(ctx, gpe_context);
2698 gpe->reset_binding_table(ctx, gpe_context);
2699 /*brc copy ignored*/
2701 /* set curbe frame update*/
2702 generic_ctx->pfn_set_curbe_brc_frame_update(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
2704 /* load brc constant data, is it same as mbenc mb brc constant data? no.*/
2705 if (avc_state->multi_pre_enable) {
2706 gen9_avc_init_brc_const_data(ctx, encode_state, encoder_context);
2708 gen9_avc_init_brc_const_data_old(ctx, encode_state, encoder_context);
2710 /* image state construct*/
2711 gen9_avc_set_image_state(ctx, encode_state, encoder_context, &(avc_ctx->res_brc_image_state_read_buffer));
2712 /* set surface frame mbenc*/
2713 generic_ctx->pfn_send_brc_frame_update_surface(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
2716 gpe->setup_interface_data(ctx, gpe_context);
2718 memset(&media_object_param, 0, sizeof(media_object_param));
2719 memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
2720 media_object_param.pinline_data = &media_object_inline_data;
2721 media_object_param.inline_size = sizeof(media_object_inline_data);
2723 gen9_avc_run_kernel_media_object(ctx, encoder_context,
2726 &media_object_param);
2728 return VA_STATUS_SUCCESS;
2732 gen9_avc_set_curbe_brc_mb_update(VADriverContextP ctx,
2733 struct encode_state *encode_state,
2734 struct i965_gpe_context *gpe_context,
2735 struct intel_encoder_context *encoder_context,
2738 gen9_avc_mb_brc_curbe_data *cmd;
2739 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2740 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2742 cmd = i965_gpe_context_map_curbe(gpe_context);
2747 memset(cmd, 0, sizeof(gen9_avc_mb_brc_curbe_data));
2749 cmd->dw0.cur_frame_type = generic_state->frame_type;
2750 if (generic_state->brc_roi_enable) {
2751 cmd->dw0.enable_roi = 1;
2753 cmd->dw0.enable_roi = 0;
2756 i965_gpe_context_unmap_curbe(gpe_context);
2762 gen9_avc_send_surface_brc_mb_update(VADriverContextP ctx,
2763 struct encode_state *encode_state,
2764 struct i965_gpe_context *gpe_context,
2765 struct intel_encoder_context *encoder_context,
2768 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2769 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2770 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2772 /* brc history buffer*/
2773 gen9_add_buffer_gpe_surface(ctx,
2775 &avc_ctx->res_brc_history_buffer,
2777 avc_ctx->res_brc_history_buffer.size,
2779 GEN9_AVC_MB_BRC_UPDATE_HISTORY_INDEX);
2781 /* MB qp data buffer is it same as res_mbbrc_mb_qp_data_surface*/
2782 if (generic_state->mb_brc_enabled) {
2783 gen9_add_buffer_2d_gpe_surface(ctx,
2785 &avc_ctx->res_mbbrc_mb_qp_data_surface,
2787 I965_SURFACEFORMAT_R8_UNORM,
2788 GEN9_AVC_MB_BRC_UPDATE_MB_QP_INDEX);
2792 /* BRC roi feature*/
2793 if (generic_state->brc_roi_enable) {
2794 gen9_add_buffer_gpe_surface(ctx,
2796 &avc_ctx->res_mbbrc_roi_surface,
2798 avc_ctx->res_mbbrc_roi_surface.size,
2800 GEN9_AVC_MB_BRC_UPDATE_ROI_INDEX);
2804 /* MB statistical data surface*/
2805 gen9_add_buffer_gpe_surface(ctx,
2807 &avc_ctx->res_mb_status_buffer,
2809 avc_ctx->res_mb_status_buffer.size,
2811 GEN9_AVC_MB_BRC_UPDATE_MB_STATUS_INDEX);
2817 gen9_avc_kernel_brc_mb_update(VADriverContextP ctx,
2818 struct encode_state *encode_state,
2819 struct intel_encoder_context *encoder_context)
2822 struct i965_driver_data *i965 = i965_driver_data(ctx);
2823 struct i965_gpe_table *gpe = &i965->gpe_table;
2824 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2825 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2826 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2827 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
2829 struct i965_gpe_context *gpe_context;
2830 struct gpe_media_object_walker_parameter media_object_walker_param;
2831 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
2832 int media_function = 0;
2835 media_function = INTEL_MEDIA_STATE_MB_BRC_UPDATE;
2836 kernel_idx = GEN9_AVC_KERNEL_BRC_MB_UPDATE;
2837 gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2839 gpe->context_init(ctx, gpe_context);
2840 gpe->reset_binding_table(ctx, gpe_context);
2842 /* set curbe brc mb update*/
2843 generic_ctx->pfn_set_curbe_brc_mb_update(ctx, encode_state, gpe_context, encoder_context, NULL);
2846 /* set surface brc mb update*/
2847 generic_ctx->pfn_send_brc_mb_update_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
2850 gpe->setup_interface_data(ctx, gpe_context);
2852 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2853 /* the scaling is based on 8x8 blk level */
2854 kernel_walker_param.resolution_x = (generic_state->frame_width_in_mbs + 1) / 2;
2855 kernel_walker_param.resolution_y = (generic_state->frame_height_in_mbs + 1) / 2 ;
2856 kernel_walker_param.no_dependency = 1;
2858 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
2860 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
2863 &media_object_walker_param);
2865 return VA_STATUS_SUCCESS;
2869 mbenc kernel related function,it include intra dist kernel
2872 gen9_avc_get_biweight(int dist_scale_factor_ref_id0_list0, unsigned short weighted_bipredidc)
2874 int biweight = 32; // default value
2876 /* based on kernel HLD*/
2877 if (weighted_bipredidc != INTEL_AVC_WP_MODE_IMPLICIT) {
2880 biweight = (dist_scale_factor_ref_id0_list0 + 2) >> 2;
2882 if (biweight != 16 && biweight != 21 &&
2883 biweight != 32 && biweight != 43 && biweight != 48) {
2884 biweight = 32; // If # of B-pics between two refs is more than 3. VME does not support it.
2892 gen9_avc_get_dist_scale_factor(VADriverContextP ctx,
2893 struct encode_state *encode_state,
2894 struct intel_encoder_context *encoder_context)
2896 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2897 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2898 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
2899 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
2901 int max_num_references;
2902 VAPictureH264 *curr_pic;
2903 VAPictureH264 *ref_pic_l0;
2904 VAPictureH264 *ref_pic_l1;
2913 max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
2915 memset(avc_state->dist_scale_factor_list0, 0, 32 * sizeof(unsigned int));
2916 curr_pic = &pic_param->CurrPic;
2917 for (i = 0; i < max_num_references; i++) {
2918 ref_pic_l0 = &(slice_param->RefPicList0[i]);
2920 if ((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
2921 (ref_pic_l0->picture_id == VA_INVALID_SURFACE))
2923 ref_pic_l1 = &(slice_param->RefPicList1[0]);
2924 if ((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
2925 (ref_pic_l0->picture_id == VA_INVALID_SURFACE))
2928 poc0 = (curr_pic->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
2929 poc1 = (ref_pic_l1->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
2930 CLIP(poc0, -128, 127);
2931 CLIP(poc1, -128, 127);
2938 tmp = (td / 2 > 0) ? (td / 2) : (-(td / 2));
2939 tx = (16384 + tmp) / td ;
2940 tmp = (tb * tx + 32) >> 6;
2941 CLIP(tmp, -1024, 1023);
2942 avc_state->dist_scale_factor_list0[i] = tmp;
2948 gen9_avc_get_qp_from_ref_list(VADriverContextP ctx,
2949 VAEncSliceParameterBufferH264 *slice_param,
2953 struct i965_driver_data *i965 = i965_driver_data(ctx);
2954 struct object_surface *obj_surface;
2955 struct gen9_surface_avc *avc_priv_surface;
2956 VASurfaceID surface_id;
2958 assert(slice_param);
2962 if (ref_frame_idx < slice_param->num_ref_idx_l0_active_minus1 + 1)
2963 surface_id = slice_param->RefPicList0[ref_frame_idx].picture_id;
2967 if (ref_frame_idx < slice_param->num_ref_idx_l1_active_minus1 + 1)
2968 surface_id = slice_param->RefPicList1[ref_frame_idx].picture_id;
2972 obj_surface = SURFACE(surface_id);
2973 if (obj_surface && obj_surface->private_data) {
2974 avc_priv_surface = obj_surface->private_data;
2975 return avc_priv_surface->qp_value;
2982 gen9_avc_load_mb_brc_const_data(VADriverContextP ctx,
2983 struct encode_state *encode_state,
2984 struct intel_encoder_context *encoder_context)
2986 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2987 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2988 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2989 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2991 struct i965_gpe_resource *gpe_resource = NULL;
2992 unsigned int * data = NULL;
2993 unsigned int * data_tmp = NULL;
2994 unsigned int size = 16 * 52;
2995 unsigned int table_idx = 0;
2996 unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2997 unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
3000 gpe_resource = &(avc_ctx->res_mbbrc_const_data_buffer);
3001 assert(gpe_resource);
3002 data = i965_map_gpe_resource(gpe_resource);
3005 table_idx = slice_type_kernel[generic_state->frame_type];
3007 memcpy(data, gen9_avc_mb_brc_const_data[table_idx][0], size * sizeof(unsigned int));
3011 switch (generic_state->frame_type) {
3013 for (i = 0; i < AVC_QP_MAX ; i++) {
3014 if (avc_state->old_mode_cost_enable)
3015 *data = (unsigned int)gen9_avc_old_intra_mode_cost[i];
3021 for (i = 0; i < AVC_QP_MAX ; i++) {
3022 if (generic_state->frame_type == SLICE_TYPE_P) {
3023 if (avc_state->skip_bias_adjustment_enable)
3024 *(data + 3) = (unsigned int)gen9_avc_mv_cost_p_skip_adjustment[i];
3026 if (avc_state->non_ftq_skip_threshold_lut_input_enable) {
3027 *(data + 9) = (unsigned int)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
3028 } else if (generic_state->frame_type == SLICE_TYPE_P) {
3029 *(data + 9) = (unsigned int)gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag][i];
3031 *(data + 9) = (unsigned int)gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag][i];
3034 if (avc_state->adaptive_intra_scaling_enable) {
3035 *(data + 10) = (unsigned int)gen9_avc_adaptive_intra_scaling_factor[i];
3037 *(data + 10) = (unsigned int)gen9_avc_intra_scaling_factor[i];
3049 for (i = 0; i < AVC_QP_MAX ; i++) {
3050 if (avc_state->ftq_skip_threshold_lut_input_enable) {
3051 *(data + 6) = (avc_state->ftq_skip_threshold_lut[i] |
3052 (avc_state->ftq_skip_threshold_lut[i] << 16) |
3053 (avc_state->ftq_skip_threshold_lut[i] << 24));
3054 *(data + 7) = (avc_state->ftq_skip_threshold_lut[i] |
3055 (avc_state->ftq_skip_threshold_lut[i] << 8) |
3056 (avc_state->ftq_skip_threshold_lut[i] << 16) |
3057 (avc_state->ftq_skip_threshold_lut[i] << 24));
3060 if (avc_state->kernel_trellis_enable) {
3061 *(data + 11) = (unsigned int)avc_state->lamda_value_lut[i][0];
3062 *(data + 12) = (unsigned int)avc_state->lamda_value_lut[i][1];
3068 i965_unmap_gpe_resource(gpe_resource);
3072 gen9_avc_set_curbe_mbenc(VADriverContextP ctx,
3073 struct encode_state *encode_state,
3074 struct i965_gpe_context *gpe_context,
3075 struct intel_encoder_context *encoder_context,
3078 struct i965_driver_data *i965 = i965_driver_data(ctx);
3080 gen9_avc_mbenc_curbe_data *g9;
3081 gen95_avc_mbenc_curbe_data *g95;
3083 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3084 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3085 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3087 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3088 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
3089 VASurfaceID surface_id;
3090 struct object_surface *obj_surface;
3092 struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
3093 unsigned char qp = 0;
3094 unsigned char me_method = 0;
3095 unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
3096 unsigned int table_idx = 0;
3097 unsigned char is_g9 = 0;
3098 unsigned char is_g95 = 0;
3099 unsigned int curbe_size = 0;
3101 unsigned int preset = generic_state->preset;
3102 if (IS_SKL(i965->intel.device_info) ||
3103 IS_BXT(i965->intel.device_info)) {
3104 cmd.g9 = (gen9_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3108 curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
3109 memset(cmd.g9, 0, curbe_size);
3111 if (mbenc_i_frame_dist_in_use) {
3112 memcpy(cmd.g9, gen9_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
3115 switch (generic_state->frame_type) {
3117 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
3120 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
3123 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
3130 } else if (IS_KBL(i965->intel.device_info) ||
3131 IS_GLK(i965->intel.device_info)) {
3132 cmd.g95 = (gen95_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3136 curbe_size = sizeof(gen95_avc_mbenc_curbe_data);
3137 memset(cmd.g9, 0, curbe_size);
3139 if (mbenc_i_frame_dist_in_use) {
3140 memcpy(cmd.g95, gen95_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
3143 switch (generic_state->frame_type) {
3145 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
3148 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
3151 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
3160 me_method = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_b_me_method[preset] : gen9_avc_p_me_method[preset];
3161 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
3163 cmd.g9->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3164 cmd.g9->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3165 cmd.g9->dw0.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3166 cmd.g9->dw37.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3168 cmd.g9->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
3169 cmd.g9->dw38.max_len_sp = 0;
3172 cmd.g95->dw1.extended_mv_cost_range = avc_state->extended_mv_cost_range_enable;
3174 cmd.g9->dw3.src_access = 0;
3175 cmd.g9->dw3.ref_access = 0;
3177 if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
3178 //disable ftq_override by now.
3179 if (avc_state->ftq_override) {
3180 cmd.g9->dw3.ftq_enable = avc_state->ftq_enable;
3183 // both gen9 and gen95 come here by now
3184 if (generic_state->frame_type == SLICE_TYPE_P) {
3185 cmd.g9->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
3188 cmd.g9->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
3192 cmd.g9->dw3.ftq_enable = 0;
3195 if (avc_state->disable_sub_mb_partion)
3196 cmd.g9->dw3.sub_mb_part_mask = 0x7;
3198 if (mbenc_i_frame_dist_in_use) {
3199 cmd.g9->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
3200 cmd.g9->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
3201 cmd.g9->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
3202 cmd.g9->dw6.batch_buffer_end = 0;
3203 cmd.g9->dw31.intra_compute_type = 1;
3206 cmd.g9->dw2.pitch_width = generic_state->frame_width_in_mbs;
3207 cmd.g9->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
3208 cmd.g9->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ? generic_state->frame_height_in_mbs : avc_state->slice_height;
3211 memcpy(&(cmd.g9->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
3212 if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
3213 //cmd.g9->dw8 = gen9_avc_old_intra_mode_cost[qp];
3214 } else if (avc_state->skip_bias_adjustment_enable) {
3215 /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
3216 // No need to check for P picture as the flag is only enabled for P picture */
3217 cmd.g9->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
3222 table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
3223 memcpy(&(cmd.g9->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
3225 cmd.g9->dw4.enable_fbr_bypass = avc_state->fbr_bypass_enable;
3226 cmd.g9->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
3227 cmd.g9->dw4.field_parity_flag = 0;//bottom field
3228 cmd.g9->dw4.enable_cur_fld_idr = 0;//field realted
3229 cmd.g9->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
3230 cmd.g9->dw4.hme_enable = generic_state->hme_enabled;
3231 cmd.g9->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
3232 cmd.g9->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
3235 cmd.g9->dw7.intra_part_mask = avc_state->transform_8x8_mode_enable ? 0 : 0x02;
3236 cmd.g9->dw7.src_field_polarity = 0;//field related
3238 /*ftq_skip_threshold_lut set,dw14 /15*/
3240 /*r5 disable NonFTQSkipThresholdLUT*/
3241 if (generic_state->frame_type == SLICE_TYPE_P) {
3242 cmd.g9->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3244 } else if (generic_state->frame_type == SLICE_TYPE_B) {
3245 cmd.g9->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3249 cmd.g9->dw13.qp_prime_y = qp;
3250 cmd.g9->dw13.qp_prime_cb = qp;
3251 cmd.g9->dw13.qp_prime_cr = qp;
3252 cmd.g9->dw13.target_size_in_word = 0xff;//hardcode for brc disable
3254 if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
3255 switch (gen9_avc_multi_pred[preset]) {
3257 cmd.g9->dw32.mult_pred_l0_disable = 128;
3258 cmd.g9->dw32.mult_pred_l1_disable = 128;
3261 cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P) ? 1 : 128;
3262 cmd.g9->dw32.mult_pred_l1_disable = 128;
3265 cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3266 cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3269 cmd.g9->dw32.mult_pred_l0_disable = 1;
3270 cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3276 cmd.g9->dw32.mult_pred_l0_disable = 128;
3277 cmd.g9->dw32.mult_pred_l1_disable = 128;
3280 /*field setting for dw33 34, ignored*/
3282 if (avc_state->adaptive_transform_decision_enable) {
3283 if (generic_state->frame_type != SLICE_TYPE_I) {
3284 cmd.g9->dw34.enable_adaptive_tx_decision = 1;
3286 cmd.g95->dw60.mb_texture_threshold = 1024;
3287 cmd.g95->dw60.tx_decision_threshold = 128;
3293 cmd.g9->dw58.mb_texture_threshold = 1024;
3294 cmd.g9->dw58.tx_decision_threshold = 128;
3299 if (generic_state->frame_type == SLICE_TYPE_B) {
3300 cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
3301 cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0;
3302 cmd.g9->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
3305 cmd.g9->dw34.b_original_bff = 0; //frame only
3306 cmd.g9->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
3307 cmd.g9->dw34.roi_enable_flag = curbe_param->roi_enabled;
3308 cmd.g9->dw34.mad_enable_falg = avc_state->mad_enable;
3309 cmd.g9->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
3310 cmd.g9->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
3312 cmd.g95->dw34.tq_enable = avc_state->tq_enable;
3313 cmd.g95->dw34.cqp_flag = !generic_state->brc_enabled;
3317 cmd.g9->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
3319 if (cmd.g9->dw34.force_non_skip_check) {
3320 cmd.g9->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
3325 cmd.g9->dw36.check_all_fractional_enable = avc_state->caf_enable;
3326 cmd.g9->dw38.ref_threshold = 400;
3327 cmd.g9->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
3329 /* Default:2 used for MBBRC (MB QP Surface width and height are 4x downscaled picture in MB unit * 4 bytes)
3330 0 used for MBQP data surface (MB QP Surface width and height are same as the input picture size in MB unit * 1bytes)
3331 starting GEN9, BRC use split kernel, MB QP surface is same size as input picture */
3332 cmd.g9->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled) ? 0 : 2;
3334 if (mbenc_i_frame_dist_in_use) {
3335 cmd.g9->dw13.qp_prime_y = 0;
3336 cmd.g9->dw13.qp_prime_cb = 0;
3337 cmd.g9->dw13.qp_prime_cr = 0;
3338 cmd.g9->dw33.intra_16x16_nondc_penalty = 0;
3339 cmd.g9->dw33.intra_8x8_nondc_penalty = 0;
3340 cmd.g9->dw33.intra_4x4_nondc_penalty = 0;
3343 if (cmd.g9->dw4.use_actual_ref_qp_value) {
3344 cmd.g9->dw44.actual_qp_value_for_ref_id0_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
3345 cmd.g9->dw44.actual_qp_value_for_ref_id1_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
3346 cmd.g9->dw44.actual_qp_value_for_ref_id2_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
3347 cmd.g9->dw44.actual_qp_value_for_ref_id3_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
3348 cmd.g9->dw45.actual_qp_value_for_ref_id4_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
3349 cmd.g9->dw45.actual_qp_value_for_ref_id5_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
3350 cmd.g9->dw45.actual_qp_value_for_ref_id6_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
3351 cmd.g9->dw45.actual_qp_value_for_ref_id7_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
3352 cmd.g9->dw46.actual_qp_value_for_ref_id0_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
3353 cmd.g9->dw46.actual_qp_value_for_ref_id1_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
3356 table_idx = slice_type_kernel[generic_state->frame_type];
3357 cmd.g9->dw46.ref_cost = gen9_avc_ref_cost[table_idx][qp];
3359 if (generic_state->frame_type == SLICE_TYPE_I) {
3360 cmd.g9->dw0.skip_mode_enable = 0;
3361 cmd.g9->dw37.skip_mode_enable = 0;
3362 cmd.g9->dw36.hme_combine_overlap = 0;
3363 cmd.g9->dw47.intra_cost_sf = 16;
3364 cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3366 cmd.g9->dw34.enable_global_motion_bias_adjustment = 0;
3368 } else if (generic_state->frame_type == SLICE_TYPE_P) {
3369 cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
3370 cmd.g9->dw3.bme_disable_fbr = 1;
3371 cmd.g9->dw5.ref_width = gen9_avc_search_x[preset];
3372 cmd.g9->dw5.ref_height = gen9_avc_search_y[preset];
3373 cmd.g9->dw7.non_skip_zmv_added = 1;
3374 cmd.g9->dw7.non_skip_mode_added = 1;
3375 cmd.g9->dw7.skip_center_mask = 1;
3376 cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
3377 cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame onlys
3378 cmd.g9->dw36.hme_combine_overlap = 1;
3379 cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
3380 cmd.g9->dw39.ref_width = gen9_avc_search_x[preset];
3381 cmd.g9->dw39.ref_height = gen9_avc_search_y[preset];
3382 cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3383 cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3384 if (is_g9 && avc_state->global_motion_bias_adjustment_enable)
3385 cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3388 cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
3389 cmd.g9->dw1.bi_weight = avc_state->bi_weight;
3390 cmd.g9->dw3.search_ctrl = 7;
3391 cmd.g9->dw3.skip_type = 1;
3392 cmd.g9->dw5.ref_width = gen9_avc_b_search_x[preset];
3393 cmd.g9->dw5.ref_height = gen9_avc_b_search_y[preset];
3394 cmd.g9->dw7.skip_center_mask = 0xff;
3395 cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
3396 cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
3397 cmd.g9->dw36.hme_combine_overlap = 1;
3398 surface_id = slice_param->RefPicList1[0].picture_id;
3399 obj_surface = SURFACE(surface_id);
3401 WARN_ONCE("Invalid backward reference frame\n");
3404 cmd.g9->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
3406 cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
3407 cmd.g9->dw36.num_ref_idx_l1_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1 : 0;
3408 cmd.g9->dw39.ref_width = gen9_avc_b_search_x[preset];
3409 cmd.g9->dw39.ref_height = gen9_avc_b_search_y[preset];
3410 cmd.g9->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
3411 cmd.g9->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
3412 cmd.g9->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
3413 cmd.g9->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
3414 cmd.g9->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
3415 cmd.g9->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
3416 cmd.g9->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
3417 cmd.g9->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
3419 cmd.g9->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
3420 if (cmd.g9->dw34.enable_direct_bias_adjustment) {
3421 cmd.g9->dw7.non_skip_zmv_added = 1;
3422 cmd.g9->dw7.non_skip_mode_added = 1;
3425 cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3426 if (is_g9 && avc_state->global_motion_bias_adjustment_enable)
3427 cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3431 avc_state->block_based_skip_enable = cmd.g9->dw3.block_based_skip_enable;
3433 if (avc_state->rolling_intra_refresh_enable) {
3434 /*by now disable it*/
3435 cmd.g9->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3436 cmd.g9->dw32.mult_pred_l0_disable = 128;
3437 /* Pass the same IntraRefreshUnit to the kernel w/o the adjustment by -1, so as to have an overlap of one MB row or column of Intra macroblocks
3438 across one P frame to another P frame, as needed by the RollingI algo */
3440 cmd.g9->dw48.widi_intra_refresh_mb_num = 0;
3441 cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3442 cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3446 if (avc_state->rolling_intra_refresh_enable == INTEL_ROLLING_I_SQUARE && generic_state->brc_enabled) {
3447 cmd.g95->dw4.enable_intra_refresh = 0;
3448 cmd.g95->dw34.widi_intra_refresh_en = INTEL_ROLLING_I_DISABLED;
3449 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3450 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3452 cmd.g95->dw4.enable_intra_refresh = 1;
3453 cmd.g95->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3454 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3455 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3456 cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3457 cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3462 cmd.g9->dw34.widi_intra_refresh_en = 0;
3465 cmd.g9->dw34.enable_per_mb_static_check = avc_state->sfd_enable && generic_state->hme_enabled;
3466 cmd.g9->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
3468 /*roi set disable by now. 49-56*/
3469 if (curbe_param->roi_enabled) {
3470 cmd.g9->dw49.roi_1_x_left = generic_state->roi[0].left;
3471 cmd.g9->dw49.roi_1_y_top = generic_state->roi[0].top;
3472 cmd.g9->dw50.roi_1_x_right = generic_state->roi[0].right;
3473 cmd.g9->dw50.roi_1_y_bottom = generic_state->roi[0].bottom;
3475 cmd.g9->dw51.roi_2_x_left = generic_state->roi[1].left;
3476 cmd.g9->dw51.roi_2_y_top = generic_state->roi[1].top;
3477 cmd.g9->dw52.roi_2_x_right = generic_state->roi[1].right;
3478 cmd.g9->dw52.roi_2_y_bottom = generic_state->roi[1].bottom;
3480 cmd.g9->dw53.roi_3_x_left = generic_state->roi[2].left;
3481 cmd.g9->dw53.roi_3_y_top = generic_state->roi[2].top;
3482 cmd.g9->dw54.roi_3_x_right = generic_state->roi[2].right;
3483 cmd.g9->dw54.roi_3_y_bottom = generic_state->roi[2].bottom;
3485 cmd.g9->dw55.roi_4_x_left = generic_state->roi[3].left;
3486 cmd.g9->dw55.roi_4_y_top = generic_state->roi[3].top;
3487 cmd.g9->dw56.roi_4_x_right = generic_state->roi[3].right;
3488 cmd.g9->dw56.roi_4_y_bottom = generic_state->roi[3].bottom;
3490 if (!generic_state->brc_enabled) {
3492 tmp = generic_state->roi[0].value;
3493 CLIP(tmp, -qp, AVC_QP_MAX - qp);
3494 cmd.g9->dw57.roi_1_dqp_prime_y = tmp;
3495 tmp = generic_state->roi[1].value;
3496 CLIP(tmp, -qp, AVC_QP_MAX - qp);
3497 cmd.g9->dw57.roi_2_dqp_prime_y = tmp;
3498 tmp = generic_state->roi[2].value;
3499 CLIP(tmp, -qp, AVC_QP_MAX - qp);
3500 cmd.g9->dw57.roi_3_dqp_prime_y = tmp;
3501 tmp = generic_state->roi[3].value;
3502 CLIP(tmp, -qp, AVC_QP_MAX - qp);
3503 cmd.g9->dw57.roi_4_dqp_prime_y = tmp;
3505 cmd.g9->dw34.roi_enable_flag = 0;
3510 if (avc_state->tq_enable) {
3511 if (generic_state->frame_type == SLICE_TYPE_I) {
3512 cmd.g95->dw58.value = gen95_avc_tq_lambda_i_frame[qp][0];
3513 cmd.g95->dw59.value = gen95_avc_tq_lambda_i_frame[qp][1];
3515 } else if (generic_state->frame_type == SLICE_TYPE_P) {
3516 cmd.g95->dw58.value = gen95_avc_tq_lambda_p_frame[qp][0];
3517 cmd.g95->dw59.value = gen95_avc_tq_lambda_p_frame[qp][1];
3520 cmd.g95->dw58.value = gen95_avc_tq_lambda_b_frame[qp][0];
3521 cmd.g95->dw59.value = gen95_avc_tq_lambda_b_frame[qp][1];
3524 if (cmd.g95->dw58.lambda_8x8_inter > GEN95_AVC_MAX_LAMBDA)
3525 cmd.g95->dw58.lambda_8x8_inter = 0xf000 + avc_state->rounding_value;
3527 if (cmd.g95->dw58.lambda_8x8_intra > GEN95_AVC_MAX_LAMBDA)
3528 cmd.g95->dw58.lambda_8x8_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3530 if (cmd.g95->dw59.lambda_inter > GEN95_AVC_MAX_LAMBDA)
3531 cmd.g95->dw59.lambda_inter = 0xf000 + avc_state->rounding_value;
3533 if (cmd.g95->dw59.lambda_intra > GEN95_AVC_MAX_LAMBDA)
3534 cmd.g95->dw59.lambda_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3539 cmd.g95->dw66.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3540 cmd.g95->dw67.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3541 cmd.g95->dw68.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3542 cmd.g95->dw69.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3543 cmd.g95->dw70.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3544 cmd.g95->dw71.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3545 cmd.g95->dw72.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3546 cmd.g95->dw73.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3547 cmd.g95->dw74.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3548 cmd.g95->dw75.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3549 cmd.g95->dw76.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3550 cmd.g95->dw77.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3551 cmd.g95->dw78.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3552 cmd.g95->dw79.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3553 cmd.g95->dw80.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3554 cmd.g95->dw81.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3555 cmd.g95->dw82.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3556 cmd.g95->dw83.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3557 cmd.g95->dw84.brc_curbe_surf_index = GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3558 cmd.g95->dw85.force_non_skip_mb_map_surface = GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3559 cmd.g95->dw86.widi_wa_surf_index = GEN95_AVC_MBENC_WIDI_WA_INDEX;
3560 cmd.g95->dw87.static_detection_cost_table_index = GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX;
3564 cmd.g9->dw64.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3565 cmd.g9->dw65.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3566 cmd.g9->dw66.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3567 cmd.g9->dw67.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3568 cmd.g9->dw68.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3569 cmd.g9->dw69.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3570 cmd.g9->dw70.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3571 cmd.g9->dw71.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3572 cmd.g9->dw72.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3573 cmd.g9->dw73.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3574 cmd.g9->dw74.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3575 cmd.g9->dw75.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3576 cmd.g9->dw76.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3577 cmd.g9->dw77.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3578 cmd.g9->dw78.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3579 cmd.g9->dw79.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3580 cmd.g9->dw80.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3581 cmd.g9->dw81.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3582 cmd.g9->dw82.force_non_skip_mb_map_surface = GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3583 cmd.g9->dw83.widi_wa_surf_index = GEN9_AVC_MBENC_WIDI_WA_INDEX;
3584 cmd.g9->dw84.brc_curbe_surf_index = GEN9_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3585 cmd.g9->dw85.static_detection_cost_table_index = GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX;
3588 i965_gpe_context_unmap_curbe(gpe_context);
3594 gen9_avc_send_surface_mbenc(VADriverContextP ctx,
3595 struct encode_state *encode_state,
3596 struct i965_gpe_context *gpe_context,
3597 struct intel_encoder_context *encoder_context,
3600 struct i965_driver_data *i965 = i965_driver_data(ctx);
3601 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3602 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3603 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3604 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3605 struct object_surface *obj_surface;
3606 struct gen9_surface_avc *avc_priv_surface;
3607 struct i965_gpe_resource *gpe_resource;
3608 struct mbenc_param * param = (struct mbenc_param *)param_mbenc ;
3609 VASurfaceID surface_id;
3610 unsigned int mbenc_i_frame_dist_in_use = param->mbenc_i_frame_dist_in_use;
3611 unsigned int size = 0;
3612 unsigned int frame_mb_size = generic_state->frame_width_in_mbs *
3613 generic_state->frame_height_in_mbs;
3615 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3616 unsigned char is_g95 = 0;
3618 if (IS_SKL(i965->intel.device_info) ||
3619 IS_BXT(i965->intel.device_info))
3621 else if (IS_KBL(i965->intel.device_info) ||
3622 IS_GLK(i965->intel.device_info))
3625 obj_surface = encode_state->reconstructed_object;
3627 if (!obj_surface || !obj_surface->private_data)
3629 avc_priv_surface = obj_surface->private_data;
3631 /*pak obj command buffer output*/
3632 size = frame_mb_size * 16 * 4;
3633 gpe_resource = &avc_priv_surface->res_mb_code_surface;
3634 gen9_add_buffer_gpe_surface(ctx,
3640 GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX);
3642 /*mv data buffer output*/
3643 size = frame_mb_size * 32 * 4;
3644 gpe_resource = &avc_priv_surface->res_mv_data_surface;
3645 gen9_add_buffer_gpe_surface(ctx,
3651 GEN9_AVC_MBENC_IND_MV_DATA_INDEX);
3653 /*input current YUV surface, current input Y/UV object*/
3654 if (mbenc_i_frame_dist_in_use) {
3655 obj_surface = encode_state->reconstructed_object;
3656 if (!obj_surface || !obj_surface->private_data)
3658 avc_priv_surface = obj_surface->private_data;
3659 obj_surface = avc_priv_surface->scaled_4x_surface_obj;
3661 obj_surface = encode_state->input_yuv_object;
3663 gen9_add_2d_gpe_surface(ctx,
3668 I965_SURFACEFORMAT_R8_UNORM,
3669 GEN9_AVC_MBENC_CURR_Y_INDEX);
3671 gen9_add_2d_gpe_surface(ctx,
3676 I965_SURFACEFORMAT_R16_UINT,
3677 GEN9_AVC_MBENC_CURR_UV_INDEX);
3679 if (generic_state->hme_enabled) {
3681 gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
3682 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3685 I965_SURFACEFORMAT_R8_UNORM,
3686 GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
3687 /* memv distortion input*/
3688 gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
3689 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3692 I965_SURFACEFORMAT_R8_UNORM,
3693 GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
3696 /*mbbrc const data_buffer*/
3697 if (param->mb_const_data_buffer_in_use) {
3698 size = 16 * AVC_QP_MAX * sizeof(unsigned int);
3699 gpe_resource = &avc_ctx->res_mbbrc_const_data_buffer;
3700 gen9_add_buffer_gpe_surface(ctx,
3706 GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX);
3710 /*mb qp data_buffer*/
3711 if (param->mb_qp_buffer_in_use) {
3712 if (avc_state->mb_qp_data_enable)
3713 gpe_resource = &(avc_ctx->res_mb_qp_data_surface);
3715 gpe_resource = &(avc_ctx->res_mbbrc_mb_qp_data_surface);
3716 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3719 I965_SURFACEFORMAT_R8_UNORM,
3720 GEN9_AVC_MBENC_MBQP_INDEX);
3723 /*input current YUV surface, current input Y/UV object*/
3724 if (mbenc_i_frame_dist_in_use) {
3725 obj_surface = encode_state->reconstructed_object;
3726 if (!obj_surface || !obj_surface->private_data)
3728 avc_priv_surface = obj_surface->private_data;
3729 obj_surface = avc_priv_surface->scaled_4x_surface_obj;
3731 obj_surface = encode_state->input_yuv_object;
3733 gen9_add_adv_gpe_surface(ctx, gpe_context,
3735 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX);
3736 /*input ref YUV surface*/
3737 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
3738 surface_id = slice_param->RefPicList0[i].picture_id;
3739 obj_surface = SURFACE(surface_id);
3740 if (!obj_surface || !obj_surface->private_data)
3743 gen9_add_adv_gpe_surface(ctx, gpe_context,
3745 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 1);
3747 /*input current YUV surface, current input Y/UV object*/
3748 if (mbenc_i_frame_dist_in_use) {
3749 obj_surface = encode_state->reconstructed_object;
3750 if (!obj_surface || !obj_surface->private_data)
3752 avc_priv_surface = obj_surface->private_data;
3753 obj_surface = avc_priv_surface->scaled_4x_surface_obj;
3755 obj_surface = encode_state->input_yuv_object;
3757 gen9_add_adv_gpe_surface(ctx, gpe_context,
3759 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX);
3761 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
3762 if (i > 0) break; // only one ref supported here for B frame
3763 surface_id = slice_param->RefPicList1[i].picture_id;
3764 obj_surface = SURFACE(surface_id);
3765 if (!obj_surface || !obj_surface->private_data)
3768 gen9_add_adv_gpe_surface(ctx, gpe_context,
3770 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1);
3771 gen9_add_adv_gpe_surface(ctx, gpe_context,
3773 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 2);
3775 avc_priv_surface = obj_surface->private_data;
3776 /*pak obj command buffer output(mb code)*/
3777 size = frame_mb_size * 16 * 4;
3778 gpe_resource = &avc_priv_surface->res_mb_code_surface;
3779 gen9_add_buffer_gpe_surface(ctx,
3785 GEN9_AVC_MBENC_FWD_MB_DATA_INDEX);
3787 /*mv data buffer output*/
3788 size = frame_mb_size * 32 * 4;
3789 gpe_resource = &avc_priv_surface->res_mv_data_surface;
3790 gen9_add_buffer_gpe_surface(ctx,
3796 GEN9_AVC_MBENC_FWD_MV_DATA_INDEX);
3800 if (i < INTEL_AVC_MAX_BWD_REF_NUM) {
3801 gen9_add_adv_gpe_surface(ctx, gpe_context,
3803 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1 + INTEL_AVC_MAX_BWD_REF_NUM);
3808 /* BRC distortion data buffer for I frame*/
3809 if (mbenc_i_frame_dist_in_use) {
3810 gpe_resource = &(avc_ctx->res_brc_dist_data_surface);
3811 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3814 I965_SURFACEFORMAT_R8_UNORM,
3815 GEN9_AVC_MBENC_BRC_DISTORTION_INDEX);
3818 /* as ref frame ,update later RefPicSelect of Current Picture*/
3819 obj_surface = encode_state->reconstructed_object;
3820 avc_priv_surface = obj_surface->private_data;
3821 if (avc_state->ref_pic_select_list_supported && avc_priv_surface->is_as_ref) {
3822 gpe_resource = &(avc_priv_surface->res_ref_pic_select_surface);
3823 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3826 I965_SURFACEFORMAT_R8_UNORM,
3827 GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX);
3831 if (param->mb_vproc_stats_enable) {
3832 /*mb status buffer input*/
3833 size = frame_mb_size * 16 * 4;
3834 gpe_resource = &(avc_ctx->res_mb_status_buffer);
3835 gen9_add_buffer_gpe_surface(ctx,
3841 GEN9_AVC_MBENC_MB_STATS_INDEX);
3843 } else if (avc_state->flatness_check_enable) {
3845 gpe_resource = &(avc_ctx->res_flatness_check_surface);
3846 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3849 I965_SURFACEFORMAT_R8_UNORM,
3850 GEN9_AVC_MBENC_MB_STATS_INDEX);
3853 if (param->mad_enable) {
3854 /*mad buffer input*/
3856 gpe_resource = &(avc_ctx->res_mad_data_buffer);
3857 gen9_add_buffer_gpe_surface(ctx,
3863 GEN9_AVC_MBENC_MAD_DATA_INDEX);
3864 i965_zero_gpe_resource(gpe_resource);
3867 /*brc updated mbenc curbe data buffer,it is ignored by gen9 and used in gen95*/
3868 if (avc_state->mbenc_brc_buffer_size > 0) {
3869 size = avc_state->mbenc_brc_buffer_size;
3870 gpe_resource = &(avc_ctx->res_mbenc_brc_buffer);
3871 gen9_add_buffer_gpe_surface(ctx,
3877 GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX);
3880 /*artitratry num mbs in slice*/
3881 if (avc_state->arbitrary_num_mbs_in_slice) {
3882 /*slice surface input*/
3883 gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
3884 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3887 I965_SURFACEFORMAT_R8_UNORM,
3888 GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX);
3889 gen9_avc_generate_slice_map(ctx, encode_state, encoder_context);
3892 /* BRC distortion data buffer for I frame */
3893 if (!mbenc_i_frame_dist_in_use) {
3894 if (avc_state->mb_disable_skip_map_enable) {
3895 gpe_resource = &(avc_ctx->res_mb_disable_skip_map_surface);
3896 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3899 I965_SURFACEFORMAT_R8_UNORM,
3900 (is_g95 ? GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX : GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX));
3903 if (avc_state->sfd_enable && generic_state->hme_enabled) {
3904 if (generic_state->frame_type == SLICE_TYPE_P) {
3905 gpe_resource = &(avc_ctx->res_sfd_cost_table_p_frame_buffer);
3907 } else if (generic_state->frame_type == SLICE_TYPE_B) {
3908 gpe_resource = &(avc_ctx->res_sfd_cost_table_b_frame_buffer);
3911 if (generic_state->frame_type != SLICE_TYPE_I) {
3912 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
3915 I965_SURFACEFORMAT_R8_UNORM,
3916 (is_g95 ? GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX : GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX));
3925 gen9_avc_kernel_mbenc(VADriverContextP ctx,
3926 struct encode_state *encode_state,
3927 struct intel_encoder_context *encoder_context,
3928 bool i_frame_dist_in_use)
3930 struct i965_driver_data *i965 = i965_driver_data(ctx);
3931 struct i965_gpe_table *gpe = &i965->gpe_table;
3932 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3933 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
3934 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3935 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3936 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3938 struct i965_gpe_context *gpe_context;
3939 struct gpe_media_object_walker_parameter media_object_walker_param;
3940 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
3941 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
3942 int media_function = 0;
3944 unsigned int mb_const_data_buffer_in_use = 0;
3945 unsigned int mb_qp_buffer_in_use = 0;
3946 unsigned int brc_enabled = 0;
3947 unsigned int roi_enable = (generic_state->num_roi > 0) ? 1 : 0;
3948 unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
3949 struct mbenc_param param ;
3951 int mbenc_i_frame_dist_in_use = i_frame_dist_in_use;
3953 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3955 mb_const_data_buffer_in_use =
3956 generic_state->mb_brc_enabled ||
3959 avc_state->mb_qp_data_enable ||
3960 avc_state->rolling_intra_refresh_enable;
3961 mb_qp_buffer_in_use =
3962 generic_state->mb_brc_enabled ||
3963 generic_state->brc_roi_enable ||
3964 avc_state->mb_qp_data_enable;
3966 if (mbenc_i_frame_dist_in_use) {
3967 media_function = INTEL_MEDIA_STATE_ENC_I_FRAME_DIST;
3968 kernel_idx = GEN9_AVC_KERNEL_BRC_I_FRAME_DIST;
3969 downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
3970 downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
3974 gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
3976 switch (generic_state->kernel_mode) {
3977 case INTEL_ENC_KERNEL_NORMAL : {
3978 media_function = INTEL_MEDIA_STATE_ENC_NORMAL;
3979 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
3982 case INTEL_ENC_KERNEL_PERFORMANCE : {
3983 media_function = INTEL_MEDIA_STATE_ENC_PERFORMANCE;
3984 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
3987 case INTEL_ENC_KERNEL_QUALITY : {
3988 media_function = INTEL_MEDIA_STATE_ENC_QUALITY;
3989 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
3997 if (generic_state->frame_type == SLICE_TYPE_P) {
3999 } else if (generic_state->frame_type == SLICE_TYPE_B) {
4003 downscaled_width_in_mb = generic_state->frame_width_in_mbs;
4004 downscaled_height_in_mb = generic_state->frame_height_in_mbs;
4005 mad_enable = avc_state->mad_enable;
4006 brc_enabled = generic_state->brc_enabled;
4008 gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
4011 memset(¶m, 0, sizeof(struct mbenc_param));
4013 param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
4014 param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
4015 param.mbenc_i_frame_dist_in_use = mbenc_i_frame_dist_in_use;
4016 param.mad_enable = mad_enable;
4017 param.brc_enabled = brc_enabled;
4018 param.roi_enabled = roi_enable;
4020 if (avc_state->mb_status_supported) {
4021 param.mb_vproc_stats_enable = avc_state->flatness_check_enable || avc_state->adaptive_transform_decision_enable;
4024 if (!avc_state->mbenc_curbe_set_in_brc_update) {
4025 gpe->context_init(ctx, gpe_context);
4028 gpe->reset_binding_table(ctx, gpe_context);
4030 if (!avc_state->mbenc_curbe_set_in_brc_update) {
4032 generic_ctx->pfn_set_curbe_mbenc(ctx, encode_state, gpe_context, encoder_context, ¶m);
4035 /* MB brc const data buffer set up*/
4036 if (mb_const_data_buffer_in_use) {
4037 // caculate the lambda table, it is kernel controlled trellis quantization,gen95+
4038 if (avc_state->lambda_table_enable)
4039 gen95_avc_calc_lambda_table(ctx, encode_state, encoder_context);
4041 gen9_avc_load_mb_brc_const_data(ctx, encode_state, encoder_context);
4044 /*clear the mad buffer*/
4046 i965_zero_gpe_resource(&(avc_ctx->res_mad_data_buffer));
4049 generic_ctx->pfn_send_mbenc_surface(ctx, encode_state, gpe_context, encoder_context, ¶m);
4051 gpe->setup_interface_data(ctx, gpe_context);
4054 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
4056 kernel_walker_param.use_scoreboard = 1;
4057 kernel_walker_param.resolution_x = downscaled_width_in_mb ;
4058 kernel_walker_param.resolution_y = downscaled_height_in_mb ;
4059 if (mbenc_i_frame_dist_in_use) {
4060 kernel_walker_param.no_dependency = 1;
4062 switch (generic_state->frame_type) {
4064 kernel_walker_param.walker_degree = WALKER_45_DEGREE;
4067 kernel_walker_param.walker_degree = WALKER_26_DEGREE;
4070 kernel_walker_param.walker_degree = WALKER_26_DEGREE;
4071 if (!slice_param->direct_spatial_mv_pred_flag) {
4072 kernel_walker_param.walker_degree = WALKER_45_DEGREE;
4078 kernel_walker_param.no_dependency = 0;
4081 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
4083 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
4086 &media_object_walker_param);
4087 return VA_STATUS_SUCCESS;
4091 me kernle related function
4094 gen9_avc_set_curbe_me(VADriverContextP ctx,
4095 struct encode_state *encode_state,
4096 struct i965_gpe_context *gpe_context,
4097 struct intel_encoder_context *encoder_context,
4100 gen9_avc_me_curbe_data *curbe_cmd;
4101 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4102 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4103 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4105 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4107 struct me_param * curbe_param = (struct me_param *)param ;
4108 unsigned char use_mv_from_prev_step = 0;
4109 unsigned char write_distortions = 0;
4110 unsigned char qp_prime_y = 0;
4111 unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
4112 unsigned char seach_table_idx = 0;
4113 unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
4114 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
4115 unsigned int scale_factor = 0;
4117 qp_prime_y = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta;
4118 switch (curbe_param->hme_type) {
4119 case INTEL_ENC_HME_4x : {
4120 use_mv_from_prev_step = (generic_state->b16xme_enabled) ? 1 : 0;
4121 write_distortions = 1;
4122 mv_shift_factor = 2;
4124 prev_mv_read_pos_factor = 0;
4127 case INTEL_ENC_HME_16x : {
4128 use_mv_from_prev_step = (generic_state->b32xme_enabled) ? 1 : 0;
4129 write_distortions = 0;
4130 mv_shift_factor = 2;
4132 prev_mv_read_pos_factor = 1;
4135 case INTEL_ENC_HME_32x : {
4136 use_mv_from_prev_step = 0;
4137 write_distortions = 0;
4138 mv_shift_factor = 1;
4140 prev_mv_read_pos_factor = 0;
4147 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
4152 downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
4153 downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
4155 memcpy(curbe_cmd, gen9_avc_me_curbe_init_data, sizeof(gen9_avc_me_curbe_data));
4157 curbe_cmd->dw3.sub_pel_mode = 3;
4158 if (avc_state->field_scaling_output_interleaved) {
4159 /*frame set to zero,field specified*/
4160 curbe_cmd->dw3.src_access = 0;
4161 curbe_cmd->dw3.ref_access = 0;
4162 curbe_cmd->dw7.src_field_polarity = 0;
4164 curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
4165 curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
4166 curbe_cmd->dw5.qp_prime_y = qp_prime_y;
4168 curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
4169 curbe_cmd->dw6.write_distortions = write_distortions;
4170 curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
4171 curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
4173 if (generic_state->frame_type == SLICE_TYPE_B) {
4174 curbe_cmd->dw1.bi_weight = 32;
4175 curbe_cmd->dw13.num_ref_idx_l1_minus1 = slice_param->num_ref_idx_l1_active_minus1;
4176 me_method = gen9_avc_b_me_method[generic_state->preset];
4177 seach_table_idx = 1;
4180 if (generic_state->frame_type == SLICE_TYPE_P ||
4181 generic_state->frame_type == SLICE_TYPE_B)
4182 curbe_cmd->dw13.num_ref_idx_l0_minus1 = slice_param->num_ref_idx_l0_active_minus1;
4184 curbe_cmd->dw13.ref_streamin_cost = 5;
4185 curbe_cmd->dw13.roi_enable = 0;
4187 curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
4188 curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
4190 memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
4192 curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN9_AVC_ME_MV_DATA_SURFACE_INDEX;
4193 curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX : GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX ;
4194 curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN9_AVC_ME_DISTORTION_SURFACE_INDEX;
4195 curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN9_AVC_ME_BRC_DISTORTION_INDEX;
4196 curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX;
4197 curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX;
4198 curbe_cmd->dw38.reserved = GEN9_AVC_ME_VDENC_STREAMIN_INDEX;
4200 i965_gpe_context_unmap_curbe(gpe_context);
4205 gen9_avc_send_surface_me(VADriverContextP ctx,
4206 struct encode_state *encode_state,
4207 struct i965_gpe_context *gpe_context,
4208 struct intel_encoder_context *encoder_context,
4211 struct i965_driver_data *i965 = i965_driver_data(ctx);
4213 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4214 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4215 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4216 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4218 struct object_surface *obj_surface, *input_surface;
4219 struct gen9_surface_avc *avc_priv_surface;
4220 struct i965_gpe_resource *gpe_resource;
4221 struct me_param * curbe_param = (struct me_param *)param ;
4223 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4224 VASurfaceID surface_id;
4227 /* all scaled input surface stored in reconstructed_object*/
4228 obj_surface = encode_state->reconstructed_object;
4229 if (!obj_surface || !obj_surface->private_data)
4231 avc_priv_surface = obj_surface->private_data;
4234 switch (curbe_param->hme_type) {
4235 case INTEL_ENC_HME_4x : {
4237 gpe_resource = &avc_ctx->s4x_memv_data_buffer;
4238 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4241 I965_SURFACEFORMAT_R8_UNORM,
4242 GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
4245 if (generic_state->b16xme_enabled) {
4246 gpe_resource = &avc_ctx->s16x_memv_data_buffer;
4247 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4250 I965_SURFACEFORMAT_R8_UNORM,
4251 GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX);
4253 /* brc distortion output*/
4254 gpe_resource = &avc_ctx->res_brc_dist_data_surface;
4255 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4258 I965_SURFACEFORMAT_R8_UNORM,
4259 GEN9_AVC_ME_BRC_DISTORTION_INDEX);
4260 /* memv distortion output*/
4261 gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
4262 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4265 I965_SURFACEFORMAT_R8_UNORM,
4266 GEN9_AVC_ME_DISTORTION_SURFACE_INDEX);
4267 /*input current down scaled YUV surface*/
4268 obj_surface = encode_state->reconstructed_object;
4269 avc_priv_surface = obj_surface->private_data;
4270 input_surface = avc_priv_surface->scaled_4x_surface_obj;
4271 gen9_add_adv_gpe_surface(ctx, gpe_context,
4273 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
4274 /*input ref scaled YUV surface*/
4275 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
4276 surface_id = slice_param->RefPicList0[i].picture_id;
4277 obj_surface = SURFACE(surface_id);
4278 if (!obj_surface || !obj_surface->private_data)
4280 avc_priv_surface = obj_surface->private_data;
4282 input_surface = avc_priv_surface->scaled_4x_surface_obj;
4284 gen9_add_adv_gpe_surface(ctx, gpe_context,
4286 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
4289 obj_surface = encode_state->reconstructed_object;
4290 avc_priv_surface = obj_surface->private_data;
4291 input_surface = avc_priv_surface->scaled_4x_surface_obj;
4293 gen9_add_adv_gpe_surface(ctx, gpe_context,
4295 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
4297 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
4298 surface_id = slice_param->RefPicList1[i].picture_id;
4299 obj_surface = SURFACE(surface_id);
4300 if (!obj_surface || !obj_surface->private_data)
4302 avc_priv_surface = obj_surface->private_data;
4304 input_surface = avc_priv_surface->scaled_4x_surface_obj;
4306 gen9_add_adv_gpe_surface(ctx, gpe_context,
4308 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
4313 case INTEL_ENC_HME_16x : {
4314 gpe_resource = &avc_ctx->s16x_memv_data_buffer;
4315 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4318 I965_SURFACEFORMAT_R8_UNORM,
4319 GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
4321 if (generic_state->b32xme_enabled) {
4322 gpe_resource = &avc_ctx->s32x_memv_data_buffer;
4323 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4326 I965_SURFACEFORMAT_R8_UNORM,
4327 GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX);
4330 obj_surface = encode_state->reconstructed_object;
4331 avc_priv_surface = obj_surface->private_data;
4332 input_surface = avc_priv_surface->scaled_16x_surface_obj;
4333 gen9_add_adv_gpe_surface(ctx, gpe_context,
4335 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
4337 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
4338 surface_id = slice_param->RefPicList0[i].picture_id;
4339 obj_surface = SURFACE(surface_id);
4340 if (!obj_surface || !obj_surface->private_data)
4342 avc_priv_surface = obj_surface->private_data;
4344 input_surface = avc_priv_surface->scaled_16x_surface_obj;
4346 gen9_add_adv_gpe_surface(ctx, gpe_context,
4348 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
4351 obj_surface = encode_state->reconstructed_object;
4352 avc_priv_surface = obj_surface->private_data;
4353 input_surface = avc_priv_surface->scaled_16x_surface_obj;
4355 gen9_add_adv_gpe_surface(ctx, gpe_context,
4357 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
4359 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
4360 surface_id = slice_param->RefPicList1[i].picture_id;
4361 obj_surface = SURFACE(surface_id);
4362 if (!obj_surface || !obj_surface->private_data)
4364 avc_priv_surface = obj_surface->private_data;
4366 input_surface = avc_priv_surface->scaled_16x_surface_obj;
4368 gen9_add_adv_gpe_surface(ctx, gpe_context,
4370 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
4374 case INTEL_ENC_HME_32x : {
4375 gpe_resource = &avc_ctx->s32x_memv_data_buffer;
4376 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4379 I965_SURFACEFORMAT_R8_UNORM,
4380 GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
4382 obj_surface = encode_state->reconstructed_object;
4383 avc_priv_surface = obj_surface->private_data;
4384 input_surface = avc_priv_surface->scaled_32x_surface_obj;
4385 gen9_add_adv_gpe_surface(ctx, gpe_context,
4387 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
4389 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
4390 surface_id = slice_param->RefPicList0[i].picture_id;
4391 obj_surface = SURFACE(surface_id);
4392 if (!obj_surface || !obj_surface->private_data)
4394 avc_priv_surface = obj_surface->private_data;
4396 input_surface = avc_priv_surface->scaled_32x_surface_obj;
4398 gen9_add_adv_gpe_surface(ctx, gpe_context,
4400 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
4403 obj_surface = encode_state->reconstructed_object;
4404 avc_priv_surface = obj_surface->private_data;
4405 input_surface = avc_priv_surface->scaled_32x_surface_obj;
4407 gen9_add_adv_gpe_surface(ctx, gpe_context,
4409 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
4411 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
4412 surface_id = slice_param->RefPicList1[i].picture_id;
4413 obj_surface = SURFACE(surface_id);
4414 if (!obj_surface || !obj_surface->private_data)
4416 avc_priv_surface = obj_surface->private_data;
4418 input_surface = avc_priv_surface->scaled_32x_surface_obj;
4420 gen9_add_adv_gpe_surface(ctx, gpe_context,
4422 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
4433 gen9_avc_kernel_me(VADriverContextP ctx,
4434 struct encode_state *encode_state,
4435 struct intel_encoder_context *encoder_context,
4438 struct i965_driver_data *i965 = i965_driver_data(ctx);
4439 struct i965_gpe_table *gpe = &i965->gpe_table;
4440 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4441 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
4442 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4443 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4445 struct i965_gpe_context *gpe_context;
4446 struct gpe_media_object_walker_parameter media_object_walker_param;
4447 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
4448 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
4449 int media_function = 0;
4451 struct me_param param ;
4452 unsigned int scale_factor = 0;
4455 case INTEL_ENC_HME_4x : {
4456 media_function = INTEL_MEDIA_STATE_4X_ME;
4460 case INTEL_ENC_HME_16x : {
4461 media_function = INTEL_MEDIA_STATE_16X_ME;
4465 case INTEL_ENC_HME_32x : {
4466 media_function = INTEL_MEDIA_STATE_32X_ME;
4475 downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
4476 downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
4478 /* I frame should not come here.*/
4479 kernel_idx = (generic_state->frame_type == SLICE_TYPE_P) ? GEN9_AVC_KERNEL_ME_P_IDX : GEN9_AVC_KERNEL_ME_B_IDX;
4480 gpe_context = &(avc_ctx->context_me.gpe_contexts[kernel_idx]);
4482 gpe->context_init(ctx, gpe_context);
4483 gpe->reset_binding_table(ctx, gpe_context);
4486 memset(¶m, 0, sizeof(param));
4487 param.hme_type = hme_type;
4488 generic_ctx->pfn_set_curbe_me(ctx, encode_state, gpe_context, encoder_context, ¶m);
4491 generic_ctx->pfn_send_me_surface(ctx, encode_state, gpe_context, encoder_context, ¶m);
4493 gpe->setup_interface_data(ctx, gpe_context);
4495 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
4496 /* the scaling is based on 8x8 blk level */
4497 kernel_walker_param.resolution_x = downscaled_width_in_mb ;
4498 kernel_walker_param.resolution_y = downscaled_height_in_mb ;
4499 kernel_walker_param.no_dependency = 1;
4501 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
4503 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
4506 &media_object_walker_param);
4508 return VA_STATUS_SUCCESS;
4515 gen9_avc_set_curbe_wp(VADriverContextP ctx,
4516 struct encode_state *encode_state,
4517 struct i965_gpe_context *gpe_context,
4518 struct intel_encoder_context *encoder_context,
4521 gen9_avc_wp_curbe_data *cmd;
4522 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4523 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4524 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4525 struct wp_param * curbe_param = (struct wp_param *)param;
4527 cmd = i965_gpe_context_map_curbe(gpe_context);
4531 memset(cmd, 0, sizeof(gen9_avc_wp_curbe_data));
4532 if (curbe_param->ref_list_idx) {
4533 cmd->dw0.default_weight = slice_param->luma_weight_l1[0];
4534 cmd->dw0.default_offset = slice_param->luma_offset_l1[0];
4536 cmd->dw0.default_weight = slice_param->luma_weight_l0[0];
4537 cmd->dw0.default_offset = slice_param->luma_offset_l0[0];
4540 cmd->dw49.input_surface = GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX;
4541 cmd->dw50.output_surface = GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX;
4543 i965_gpe_context_unmap_curbe(gpe_context);
4548 gen9_avc_send_surface_wp(VADriverContextP ctx,
4549 struct encode_state *encode_state,
4550 struct i965_gpe_context *gpe_context,
4551 struct intel_encoder_context *encoder_context,
4554 struct i965_driver_data *i965 = i965_driver_data(ctx);
4555 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4556 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4557 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4558 struct wp_param * curbe_param = (struct wp_param *)param;
4559 struct object_surface *obj_surface;
4560 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4561 VASurfaceID surface_id;
4563 if (curbe_param->ref_list_idx) {
4564 surface_id = slice_param->RefPicList1[0].picture_id;
4565 obj_surface = SURFACE(surface_id);
4566 if (!obj_surface || !obj_surface->private_data)
4567 avc_state->weighted_ref_l1_enable = 0;
4569 avc_state->weighted_ref_l1_enable = 1;
4571 surface_id = slice_param->RefPicList0[0].picture_id;
4572 obj_surface = SURFACE(surface_id);
4573 if (!obj_surface || !obj_surface->private_data)
4574 avc_state->weighted_ref_l0_enable = 0;
4576 avc_state->weighted_ref_l0_enable = 1;
4579 obj_surface = encode_state->reference_objects[0];
4582 gen9_add_adv_gpe_surface(ctx, gpe_context,
4584 GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX);
4586 obj_surface = avc_ctx->wp_output_pic_select_surface_obj[curbe_param->ref_list_idx];
4587 gen9_add_adv_gpe_surface(ctx, gpe_context,
4589 GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX);
4594 gen9_avc_kernel_wp(VADriverContextP ctx,
4595 struct encode_state *encode_state,
4596 struct intel_encoder_context *encoder_context,
4597 unsigned int list1_in_use)
4599 struct i965_driver_data *i965 = i965_driver_data(ctx);
4600 struct i965_gpe_table *gpe = &i965->gpe_table;
4601 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4602 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4603 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4604 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
4606 struct i965_gpe_context *gpe_context;
4607 struct gpe_media_object_walker_parameter media_object_walker_param;
4608 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
4609 int media_function = INTEL_MEDIA_STATE_ENC_WP;
4610 struct wp_param param;
4612 gpe_context = &(avc_ctx->context_wp.gpe_contexts);
4614 gpe->context_init(ctx, gpe_context);
4615 gpe->reset_binding_table(ctx, gpe_context);
4617 memset(¶m, 0, sizeof(param));
4618 param.ref_list_idx = (list1_in_use == 1) ? 1 : 0;
4620 generic_ctx->pfn_set_curbe_wp(ctx, encode_state, gpe_context, encoder_context, ¶m);
4623 generic_ctx->pfn_send_wp_surface(ctx, encode_state, gpe_context, encoder_context, ¶m);
4625 gpe->setup_interface_data(ctx, gpe_context);
4627 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
4628 /* the scaling is based on 8x8 blk level */
4629 kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs;
4630 kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs;
4631 kernel_walker_param.no_dependency = 1;
4633 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
4635 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
4638 &media_object_walker_param);
4640 return VA_STATUS_SUCCESS;
4645 sfd related function
4648 gen9_avc_set_curbe_sfd(VADriverContextP ctx,
4649 struct encode_state *encode_state,
4650 struct i965_gpe_context *gpe_context,
4651 struct intel_encoder_context *encoder_context,
4654 gen9_avc_sfd_curbe_data *cmd;
4655 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4656 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4657 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4658 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4660 cmd = i965_gpe_context_map_curbe(gpe_context);
4664 memset(cmd, 0, sizeof(gen9_avc_sfd_curbe_data));
4666 cmd->dw0.enable_intra_cost_scaling_for_static_frame = 1 ;
4667 cmd->dw0.enable_adaptive_mv_stream_in = 0 ;
4668 cmd->dw0.stream_in_type = 7 ;
4669 cmd->dw0.slice_type = slice_type_kernel[generic_state->frame_type] ;
4670 cmd->dw0.brc_mode_enable = generic_state->brc_enabled ;
4671 cmd->dw0.vdenc_mode_disable = 1 ;
4673 cmd->dw1.hme_stream_in_ref_cost = 5 ;
4674 cmd->dw1.num_of_refs = slice_param->num_ref_idx_l0_active_minus1 ;
4675 cmd->dw1.qp_value = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta ;
4677 cmd->dw2.frame_width_in_mbs = generic_state->frame_width_in_mbs ;
4678 cmd->dw2.frame_height_in_mbs = generic_state->frame_height_in_mbs ;
4680 cmd->dw3.large_mv_threshold = 128 ;
4681 cmd->dw4.total_large_mv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs) / 100 ;
4682 cmd->dw5.zmv_threshold = 4 ;
4683 cmd->dw6.total_zmv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * avc_state->zero_mv_threshold) / 100 ; // zero_mv_threshold = 60;
4684 cmd->dw7.min_dist_threshold = 10 ;
4686 if (generic_state->frame_type == SLICE_TYPE_P) {
4687 memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_p_frame, AVC_QP_MAX * sizeof(unsigned char));
4689 } else if (generic_state->frame_type == SLICE_TYPE_B) {
4690 memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_b_frame, AVC_QP_MAX * sizeof(unsigned char));
4693 cmd->dw21.actual_width_in_mb = cmd->dw2.frame_width_in_mbs ;
4694 cmd->dw21.actual_height_in_mb = cmd->dw2.frame_height_in_mbs ;
4695 cmd->dw24.vdenc_input_image_state_index = GEN9_AVC_SFD_VDENC_INPUT_IMAGE_STATE_INDEX ;
4696 cmd->dw26.mv_data_surface_index = GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX ;
4697 cmd->dw27.inter_distortion_surface_index = GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX ;
4698 cmd->dw28.output_data_surface_index = GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX ;
4699 cmd->dw29.vdenc_output_image_state_index = GEN9_AVC_SFD_VDENC_OUTPUT_IMAGE_STATE_INDEX ;
4701 i965_gpe_context_unmap_curbe(gpe_context);
4706 gen9_avc_send_surface_sfd(VADriverContextP ctx,
4707 struct encode_state *encode_state,
4708 struct i965_gpe_context *gpe_context,
4709 struct intel_encoder_context *encoder_context,
4712 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4713 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4714 struct i965_gpe_resource *gpe_resource;
4717 /*HME mv data surface memv output 4x*/
4718 gpe_resource = &avc_ctx->s4x_memv_data_buffer;
4719 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4722 I965_SURFACEFORMAT_R8_UNORM,
4723 GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX);
4725 /* memv distortion */
4726 gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
4727 gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
4730 I965_SURFACEFORMAT_R8_UNORM,
4731 GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX);
4734 gpe_resource = &avc_ctx->res_sfd_output_buffer;
4735 gen9_add_buffer_gpe_surface(ctx,
4741 GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX);
4746 gen9_avc_kernel_sfd(VADriverContextP ctx,
4747 struct encode_state *encode_state,
4748 struct intel_encoder_context *encoder_context)
4750 struct i965_driver_data *i965 = i965_driver_data(ctx);
4751 struct i965_gpe_table *gpe = &i965->gpe_table;
4752 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4753 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4754 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
4756 struct i965_gpe_context *gpe_context;
4757 struct gpe_media_object_parameter media_object_param;
4758 struct gpe_media_object_inline_data media_object_inline_data;
4759 int media_function = INTEL_MEDIA_STATE_STATIC_FRAME_DETECTION;
4760 gpe_context = &(avc_ctx->context_sfd.gpe_contexts);
4762 gpe->context_init(ctx, gpe_context);
4763 gpe->reset_binding_table(ctx, gpe_context);
4766 generic_ctx->pfn_set_curbe_sfd(ctx, encode_state, gpe_context, encoder_context, NULL);
4769 generic_ctx->pfn_send_sfd_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
4771 gpe->setup_interface_data(ctx, gpe_context);
4773 memset(&media_object_param, 0, sizeof(media_object_param));
4774 memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
4775 media_object_param.pinline_data = &media_object_inline_data;
4776 media_object_param.inline_size = sizeof(media_object_inline_data);
4778 gen9_avc_run_kernel_media_object(ctx, encoder_context,
4781 &media_object_param);
4783 return VA_STATUS_SUCCESS;
4787 kernel related function:init/destroy etc
4790 gen9_avc_kernel_init_scaling(VADriverContextP ctx,
4791 struct generic_encoder_context *generic_context,
4792 struct gen_avc_scaling_context *kernel_context)
4794 struct i965_driver_data *i965 = i965_driver_data(ctx);
4795 struct i965_gpe_table *gpe = &i965->gpe_table;
4796 struct i965_gpe_context *gpe_context = NULL;
4797 struct encoder_kernel_parameter kernel_param ;
4798 struct encoder_scoreboard_parameter scoreboard_param;
4799 struct i965_kernel common_kernel;
4801 if (IS_SKL(i965->intel.device_info) ||
4802 IS_BXT(i965->intel.device_info)) {
4803 kernel_param.curbe_size = sizeof(gen9_avc_scaling4x_curbe_data);
4804 kernel_param.inline_data_size = sizeof(gen9_avc_scaling4x_curbe_data);
4805 } else if (IS_KBL(i965->intel.device_info) ||
4806 IS_GLK(i965->intel.device_info)) {
4807 kernel_param.curbe_size = sizeof(gen95_avc_scaling4x_curbe_data);
4808 kernel_param.inline_data_size = sizeof(gen95_avc_scaling4x_curbe_data);
4811 /* 4x scaling kernel*/
4812 kernel_param.sampler_size = 0;
4814 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4815 scoreboard_param.mask = 0xFF;
4816 scoreboard_param.enable = generic_context->use_hw_scoreboard;
4817 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4818 scoreboard_param.walkpat_flag = 0;
4820 gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_4X_IDX];
4821 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4822 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4824 memset(&common_kernel, 0, sizeof(common_kernel));
4826 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4827 generic_context->enc_kernel_size,
4828 INTEL_GENERIC_ENC_SCALING4X,
4832 gpe->load_kernels(ctx,
4837 /*2x scaling kernel*/
4838 kernel_param.curbe_size = sizeof(gen9_avc_scaling2x_curbe_data);
4839 kernel_param.inline_data_size = 0;
4840 kernel_param.sampler_size = 0;
4842 gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_2X_IDX];
4843 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4844 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4846 memset(&common_kernel, 0, sizeof(common_kernel));
4848 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4849 generic_context->enc_kernel_size,
4850 INTEL_GENERIC_ENC_SCALING2X,
4854 gpe->load_kernels(ctx,
4862 gen9_avc_kernel_init_me(VADriverContextP ctx,
4863 struct generic_encoder_context *generic_context,
4864 struct gen_avc_me_context *kernel_context)
4866 struct i965_driver_data *i965 = i965_driver_data(ctx);
4867 struct i965_gpe_table *gpe = &i965->gpe_table;
4868 struct i965_gpe_context *gpe_context = NULL;
4869 struct encoder_kernel_parameter kernel_param ;
4870 struct encoder_scoreboard_parameter scoreboard_param;
4871 struct i965_kernel common_kernel;
4874 kernel_param.curbe_size = sizeof(gen9_avc_me_curbe_data);
4875 kernel_param.inline_data_size = 0;
4876 kernel_param.sampler_size = 0;
4878 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4879 scoreboard_param.mask = 0xFF;
4880 scoreboard_param.enable = generic_context->use_hw_scoreboard;
4881 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4882 scoreboard_param.walkpat_flag = 0;
4884 for (i = 0; i < 2; i++) {
4885 gpe_context = &kernel_context->gpe_contexts[i];
4886 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4887 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4889 memset(&common_kernel, 0, sizeof(common_kernel));
4891 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4892 generic_context->enc_kernel_size,
4893 INTEL_GENERIC_ENC_ME,
4897 gpe->load_kernels(ctx,
4906 gen9_avc_kernel_init_mbenc(VADriverContextP ctx,
4907 struct generic_encoder_context *generic_context,
4908 struct gen_avc_mbenc_context *kernel_context)
4910 struct i965_driver_data *i965 = i965_driver_data(ctx);
4911 struct i965_gpe_table *gpe = &i965->gpe_table;
4912 struct i965_gpe_context *gpe_context = NULL;
4913 struct encoder_kernel_parameter kernel_param ;
4914 struct encoder_scoreboard_parameter scoreboard_param;
4915 struct i965_kernel common_kernel;
4917 unsigned int curbe_size = 0;
4919 if (IS_SKL(i965->intel.device_info) ||
4920 IS_BXT(i965->intel.device_info)) {
4921 curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
4922 } else if (IS_KBL(i965->intel.device_info) ||
4923 IS_GLK(i965->intel.device_info)) {
4924 curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
4927 assert(curbe_size > 0);
4928 kernel_param.curbe_size = curbe_size;
4929 kernel_param.inline_data_size = 0;
4930 kernel_param.sampler_size = 0;
4932 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4933 scoreboard_param.mask = 0xFF;
4934 scoreboard_param.enable = generic_context->use_hw_scoreboard;
4935 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4936 scoreboard_param.walkpat_flag = 0;
4938 for (i = 0; i < NUM_GEN9_AVC_KERNEL_MBENC ; i++) {
4939 gpe_context = &kernel_context->gpe_contexts[i];
4940 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4941 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4943 memset(&common_kernel, 0, sizeof(common_kernel));
4945 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4946 generic_context->enc_kernel_size,
4947 INTEL_GENERIC_ENC_MBENC,
4951 gpe->load_kernels(ctx,
4960 gen9_avc_kernel_init_brc(VADriverContextP ctx,
4961 struct generic_encoder_context *generic_context,
4962 struct gen_avc_brc_context *kernel_context)
4964 struct i965_driver_data *i965 = i965_driver_data(ctx);
4965 struct i965_gpe_table *gpe = &i965->gpe_table;
4966 struct i965_gpe_context *gpe_context = NULL;
4967 struct encoder_kernel_parameter kernel_param ;
4968 struct encoder_scoreboard_parameter scoreboard_param;
4969 struct i965_kernel common_kernel;
4972 static const int brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC] = {
4973 (sizeof(gen9_avc_brc_init_reset_curbe_data)),
4974 (sizeof(gen9_avc_frame_brc_update_curbe_data)),
4975 (sizeof(gen9_avc_brc_init_reset_curbe_data)),
4976 (sizeof(gen9_avc_mbenc_curbe_data)),
4978 (sizeof(gen9_avc_mb_brc_curbe_data))
4981 kernel_param.inline_data_size = 0;
4982 kernel_param.sampler_size = 0;
4984 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4985 scoreboard_param.mask = 0xFF;
4986 scoreboard_param.enable = generic_context->use_hw_scoreboard;
4987 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
4988 scoreboard_param.walkpat_flag = 0;
4990 for (i = 0; i < NUM_GEN9_AVC_KERNEL_BRC; i++) {
4991 kernel_param.curbe_size = brc_curbe_size[i];
4992 gpe_context = &kernel_context->gpe_contexts[i];
4993 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
4994 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
4996 memset(&common_kernel, 0, sizeof(common_kernel));
4998 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
4999 generic_context->enc_kernel_size,
5000 INTEL_GENERIC_ENC_BRC,
5004 gpe->load_kernels(ctx,
5013 gen9_avc_kernel_init_wp(VADriverContextP ctx,
5014 struct generic_encoder_context *generic_context,
5015 struct gen_avc_wp_context *kernel_context)
5017 struct i965_driver_data *i965 = i965_driver_data(ctx);
5018 struct i965_gpe_table *gpe = &i965->gpe_table;
5019 struct i965_gpe_context *gpe_context = NULL;
5020 struct encoder_kernel_parameter kernel_param ;
5021 struct encoder_scoreboard_parameter scoreboard_param;
5022 struct i965_kernel common_kernel;
5024 kernel_param.curbe_size = sizeof(gen9_avc_wp_curbe_data);
5025 kernel_param.inline_data_size = 0;
5026 kernel_param.sampler_size = 0;
5028 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
5029 scoreboard_param.mask = 0xFF;
5030 scoreboard_param.enable = generic_context->use_hw_scoreboard;
5031 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
5032 scoreboard_param.walkpat_flag = 0;
5034 gpe_context = &kernel_context->gpe_contexts;
5035 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
5036 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
5038 memset(&common_kernel, 0, sizeof(common_kernel));
5040 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
5041 generic_context->enc_kernel_size,
5042 INTEL_GENERIC_ENC_WP,
5046 gpe->load_kernels(ctx,
5054 gen9_avc_kernel_init_sfd(VADriverContextP ctx,
5055 struct generic_encoder_context *generic_context,
5056 struct gen_avc_sfd_context *kernel_context)
5058 struct i965_driver_data *i965 = i965_driver_data(ctx);
5059 struct i965_gpe_table *gpe = &i965->gpe_table;
5060 struct i965_gpe_context *gpe_context = NULL;
5061 struct encoder_kernel_parameter kernel_param ;
5062 struct encoder_scoreboard_parameter scoreboard_param;
5063 struct i965_kernel common_kernel;
5065 kernel_param.curbe_size = sizeof(gen9_avc_sfd_curbe_data);
5066 kernel_param.inline_data_size = 0;
5067 kernel_param.sampler_size = 0;
5069 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
5070 scoreboard_param.mask = 0xFF;
5071 scoreboard_param.enable = generic_context->use_hw_scoreboard;
5072 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
5073 scoreboard_param.walkpat_flag = 0;
5075 gpe_context = &kernel_context->gpe_contexts;
5076 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
5077 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
5079 memset(&common_kernel, 0, sizeof(common_kernel));
5081 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
5082 generic_context->enc_kernel_size,
5083 INTEL_GENERIC_ENC_SFD,
5087 gpe->load_kernels(ctx,
5095 gen9_avc_kernel_destroy(struct encoder_vme_mfc_context * vme_context)
5098 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5099 struct i965_driver_data *i965 = i965_driver_data(avc_ctx->ctx);
5100 struct i965_gpe_table *gpe = &i965->gpe_table;
5104 gen9_avc_free_resources(vme_context);
5106 for (i = 0; i < NUM_GEN9_AVC_KERNEL_SCALING; i++)
5107 gpe->context_destroy(&avc_ctx->context_scaling.gpe_contexts[i]);
5109 for (i = 0; i < NUM_GEN9_AVC_KERNEL_BRC; i++)
5110 gpe->context_destroy(&avc_ctx->context_brc.gpe_contexts[i]);
5112 for (i = 0; i < NUM_GEN9_AVC_KERNEL_ME; i++)
5113 gpe->context_destroy(&avc_ctx->context_me.gpe_contexts[i]);
5115 for (i = 0; i < NUM_GEN9_AVC_KERNEL_MBENC; i++)
5116 gpe->context_destroy(&avc_ctx->context_mbenc.gpe_contexts[i]);
5118 gpe->context_destroy(&avc_ctx->context_wp.gpe_contexts);
5120 gpe->context_destroy(&avc_ctx->context_sfd.gpe_contexts);
5128 gen9_avc_update_parameters(VADriverContextP ctx,
5130 struct encode_state *encode_state,
5131 struct intel_encoder_context *encoder_context)
5133 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5134 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5135 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5136 VAEncSequenceParameterBufferH264 *seq_param;
5137 VAEncSliceParameterBufferH264 *slice_param;
5138 int i, j, slice_index;
5139 unsigned int preset = generic_state->preset;
5141 /* seq/pic/slice parameter setting */
5142 generic_state->b16xme_supported = gen9_avc_super_hme[preset];
5143 generic_state->b32xme_supported = gen9_avc_ultra_hme[preset];
5145 avc_state->seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
5146 avc_state->pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
5148 avc_state->slice_num = 0;
5150 for (j = 0; j < encode_state->num_slice_params_ext; j++) {
5151 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
5152 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
5153 avc_state->slice_param[slice_index] = slice_param;
5156 avc_state->slice_num++;
5160 /* how many slices support by now? 1 slice or multi slices, but row slice.not slice group. */
5161 seq_param = avc_state->seq_param;
5162 slice_param = avc_state->slice_param[0];
5164 generic_state->frame_type = avc_state->slice_param[0]->slice_type;
5166 if (slice_param->slice_type == SLICE_TYPE_I ||
5167 slice_param->slice_type == SLICE_TYPE_SI)
5168 generic_state->frame_type = SLICE_TYPE_I;
5169 else if (slice_param->slice_type == SLICE_TYPE_P)
5170 generic_state->frame_type = SLICE_TYPE_P;
5171 else if (slice_param->slice_type == SLICE_TYPE_B)
5172 generic_state->frame_type = SLICE_TYPE_B;
5173 if (profile == VAProfileH264High)
5174 avc_state->transform_8x8_mode_enable = 0;//work around for high profile to disabel pic_param->pic_fields.bits.transform_8x8_mode_flag
5176 avc_state->transform_8x8_mode_enable = 0;
5179 if (generic_state->brc_enabled && (!generic_state->brc_inited || generic_state->brc_need_reset)) {
5180 generic_state->target_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
5181 generic_state->init_vbv_buffer_fullness_in_bit = seq_param->bits_per_second;
5182 generic_state->vbv_buffer_size_in_bit = (uint64_t)seq_param->bits_per_second << 1;
5183 generic_state->frames_per_100s = 3000; /* 30fps */
5186 generic_state->gop_size = seq_param->intra_period;
5187 generic_state->gop_ref_distance = seq_param->ip_period;
5189 if (generic_state->internal_rate_mode == VA_RC_CBR) {
5190 generic_state->max_bit_rate = generic_state->target_bit_rate;
5191 generic_state->min_bit_rate = generic_state->target_bit_rate;
5194 if (generic_state->frame_type == SLICE_TYPE_I || generic_state->first_frame) {
5195 gen9_avc_update_misc_parameters(ctx, encode_state, encoder_context);
5198 generic_state->preset = encoder_context->quality_level;
5199 if (encoder_context->quality_level == INTEL_PRESET_UNKNOWN) {
5200 generic_state->preset = INTEL_PRESET_RT_SPEED;
5202 generic_state->kernel_mode = gen9_avc_kernel_mode[generic_state->preset];
5204 if (!generic_state->brc_inited) {
5205 generic_state->brc_init_reset_input_bits_per_frame = ((double)(generic_state->max_bit_rate * 1000) * 100) / generic_state->frames_per_100s;;
5206 generic_state->brc_init_current_target_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
5207 generic_state->brc_init_reset_buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
5208 generic_state->brc_target_size = generic_state->init_vbv_buffer_fullness_in_bit;
5212 generic_state->curr_pak_pass = 0;
5213 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
5215 if (generic_state->internal_rate_mode == VA_RC_CBR ||
5216 generic_state->internal_rate_mode == VA_RC_VBR)
5217 generic_state->brc_enabled = 1;
5219 generic_state->brc_enabled = 0;
5221 if (generic_state->brc_enabled &&
5222 (!generic_state->init_vbv_buffer_fullness_in_bit ||
5223 !generic_state->vbv_buffer_size_in_bit ||
5224 !generic_state->max_bit_rate ||
5225 !generic_state->target_bit_rate ||
5226 !generic_state->frames_per_100s)) {
5227 WARN_ONCE("Rate control parameter is required for BRC\n");
5228 generic_state->brc_enabled = 0;
5231 if (!generic_state->brc_enabled) {
5232 generic_state->target_bit_rate = 0;
5233 generic_state->max_bit_rate = 0;
5234 generic_state->min_bit_rate = 0;
5235 generic_state->init_vbv_buffer_fullness_in_bit = 0;
5236 generic_state->vbv_buffer_size_in_bit = 0;
5237 generic_state->num_pak_passes = 1;
5239 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
5243 generic_state->frame_width_in_mbs = seq_param->picture_width_in_mbs;
5244 generic_state->frame_height_in_mbs = seq_param->picture_height_in_mbs;
5245 generic_state->frame_width_in_pixel = generic_state->frame_width_in_mbs * 16;
5246 generic_state->frame_height_in_pixel = generic_state->frame_height_in_mbs * 16;
5248 generic_state->frame_width_4x = ALIGN(generic_state->frame_width_in_pixel / 4, 16);
5249 generic_state->frame_height_4x = ALIGN(generic_state->frame_height_in_pixel / 4, 16);
5250 generic_state->downscaled_width_4x_in_mb = generic_state->frame_width_4x / 16 ;
5251 generic_state->downscaled_height_4x_in_mb = generic_state->frame_height_4x / 16;
5253 generic_state->frame_width_16x = ALIGN(generic_state->frame_width_in_pixel / 16, 16);
5254 generic_state->frame_height_16x = ALIGN(generic_state->frame_height_in_pixel / 16, 16);
5255 generic_state->downscaled_width_16x_in_mb = generic_state->frame_width_16x / 16 ;
5256 generic_state->downscaled_height_16x_in_mb = generic_state->frame_height_16x / 16;
5258 generic_state->frame_width_32x = ALIGN(generic_state->frame_width_in_pixel / 32, 16);
5259 generic_state->frame_height_32x = ALIGN(generic_state->frame_height_in_pixel / 32, 16);
5260 generic_state->downscaled_width_32x_in_mb = generic_state->frame_width_32x / 16 ;
5261 generic_state->downscaled_height_32x_in_mb = generic_state->frame_height_32x / 16;
5263 if (generic_state->hme_supported) {
5264 generic_state->hme_enabled = 1;
5266 generic_state->hme_enabled = 0;
5269 if (generic_state->b16xme_supported) {
5270 generic_state->b16xme_enabled = 1;
5272 generic_state->b16xme_enabled = 0;
5275 if (generic_state->b32xme_supported) {
5276 generic_state->b32xme_enabled = 1;
5278 generic_state->b32xme_enabled = 0;
5280 /* disable HME/16xME if the size is too small */
5281 if (generic_state->frame_width_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5282 generic_state->b32xme_supported = 0;
5283 generic_state->b32xme_enabled = 0;
5284 generic_state->b16xme_supported = 0;
5285 generic_state->b16xme_enabled = 0;
5286 generic_state->frame_width_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5287 generic_state->downscaled_width_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5289 if (generic_state->frame_height_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5290 generic_state->b32xme_supported = 0;
5291 generic_state->b32xme_enabled = 0;
5292 generic_state->b16xme_supported = 0;
5293 generic_state->b16xme_enabled = 0;
5294 generic_state->frame_height_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5295 generic_state->downscaled_height_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5298 if (generic_state->frame_width_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5299 generic_state->b32xme_supported = 0;
5300 generic_state->b32xme_enabled = 0;
5301 generic_state->frame_width_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5302 generic_state->downscaled_width_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5304 if (generic_state->frame_height_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5305 generic_state->b32xme_supported = 0;
5306 generic_state->b32xme_enabled = 0;
5307 generic_state->frame_height_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5308 generic_state->downscaled_height_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5311 if (generic_state->frame_width_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5312 generic_state->frame_width_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5313 generic_state->downscaled_width_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5315 if (generic_state->frame_height_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
5316 generic_state->frame_height_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
5317 generic_state->downscaled_height_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
5323 gen9_avc_encode_check_parameter(VADriverContextP ctx,
5324 struct encode_state *encode_state,
5325 struct intel_encoder_context *encoder_context)
5327 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5328 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5329 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5330 unsigned int rate_control_mode = encoder_context->rate_control_mode;
5331 unsigned int preset = generic_state->preset;
5332 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param ;
5333 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5335 int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
5337 generic_state->avbr_curracy = 30;
5338 generic_state->avbr_convergence = 150;
5340 switch (rate_control_mode & 0x7f) {
5342 generic_state->internal_rate_mode = VA_RC_CBR;
5346 generic_state->internal_rate_mode = VA_RC_VBR;
5351 generic_state->internal_rate_mode = VA_RC_CQP;
5355 if (rate_control_mode != VA_RC_NONE &&
5356 rate_control_mode != VA_RC_CQP) {
5357 generic_state->brc_enabled = 1;
5358 generic_state->brc_distortion_buffer_supported = 1;
5359 generic_state->brc_constant_buffer_supported = 1;
5360 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
5363 /*check brc parameter*/
5364 if (generic_state->brc_enabled) {
5365 avc_state->mb_qp_data_enable = 0;
5368 /*set the brc init and reset accordingly*/
5369 if (generic_state->brc_need_reset &&
5370 (generic_state->brc_distortion_buffer_supported == 0 ||
5371 rate_control_mode == VA_RC_CQP)) {
5372 generic_state->brc_need_reset = 0;// not support by CQP
5375 if (generic_state->brc_need_reset && !avc_state->sfd_mb_enable) {
5376 avc_state->sfd_enable = 0;
5379 if (generic_state->frames_per_window_size == 0) {
5380 generic_state->frames_per_window_size = (generic_state->frames_per_100s / 100 < 60) ? (generic_state->frames_per_100s / 100) : 60;
5381 } else if (generic_state->frames_per_window_size > 2 * generic_state->frames_per_100s / 100) {
5382 generic_state->frames_per_window_size = (generic_state->frames_per_100s / 100 < 60) ? (generic_state->frames_per_100s / 100) : 60;
5385 if (generic_state->brc_enabled) {
5386 generic_state->hme_enabled = generic_state->frame_type != SLICE_TYPE_I;
5387 if (avc_state->min_max_qp_enable) {
5388 generic_state->num_pak_passes = 1;
5390 generic_state->brc_roi_enable = (rate_control_mode != VA_RC_CQP) && (generic_state->num_roi > 0);// only !CQP
5391 generic_state->mb_brc_enabled = generic_state->mb_brc_enabled || generic_state->brc_roi_enable;
5393 generic_state->num_pak_passes = 1;// CQP only one pass
5396 avc_state->mbenc_i_frame_dist_in_use = 0;
5397 avc_state->mbenc_i_frame_dist_in_use = (generic_state->brc_enabled) && (generic_state->brc_distortion_buffer_supported) && (generic_state->frame_type == SLICE_TYPE_I);
5399 /*ROI must enable mbbrc.*/
5402 if (avc_state->caf_supported) {
5403 switch (generic_state->frame_type) {
5407 avc_state->caf_enable = gen9_avc_all_fractional[preset] & 0x01;
5410 avc_state->caf_enable = (gen9_avc_all_fractional[preset] >> 1) & 0x01;
5414 if (avc_state->caf_enable && avc_state->caf_disable_hd && gen9_avc_disable_all_fractional_check_for_high_res[preset]) {
5415 if (generic_state->frame_width_in_pixel >= 1280 && generic_state->frame_height_in_pixel >= 720)
5416 avc_state->caf_enable = 0;
5420 avc_state->adaptive_transform_decision_enable &= gen9_avc_enable_adaptive_tx_decision[preset & 0x7];
5422 /* Flatness check is enabled only if scaling will be performed and CAF is enabled. here only frame */
5423 if (avc_state->flatness_check_supported) {
5424 avc_state->flatness_check_enable = ((avc_state->caf_enable) && (generic_state->brc_enabled || generic_state->hme_supported)) ;
5426 avc_state->flatness_check_enable = 0;
5429 /* check mb_status_supported/enbale*/
5430 if (avc_state->adaptive_transform_decision_enable) {
5431 avc_state->mb_status_enable = 1;
5433 avc_state->mb_status_enable = 0;
5435 /*slice check,all the slices use the same slice height except the last slice*/
5436 avc_state->arbitrary_num_mbs_in_slice = 0;
5437 for (i = 0; i < avc_state->slice_num; i++) {
5438 if (avc_state->slice_param[i]->num_macroblocks % generic_state->frame_width_in_mbs > 0) {
5439 avc_state->arbitrary_num_mbs_in_slice = 1;
5440 avc_state->slice_height = 1; /* slice height will be ignored by kernel ans here set it as default value */
5442 avc_state->slice_height = avc_state->slice_param[i]->num_macroblocks / generic_state->frame_width_in_mbs;
5446 if (generic_state->frame_type == SLICE_TYPE_I) {
5447 generic_state->hme_enabled = 0;
5448 generic_state->b16xme_enabled = 0;
5449 generic_state->b32xme_enabled = 0;
5452 if (generic_state->frame_type == SLICE_TYPE_B) {
5453 gen9_avc_get_dist_scale_factor(ctx, encode_state, encoder_context);
5454 avc_state->bi_weight = gen9_avc_get_biweight(avc_state->dist_scale_factor_list0[0], pic_param->pic_fields.bits.weighted_bipred_idc);
5457 /* Determine if SkipBiasAdjustment should be enabled for P picture 1. No B frame 2. Qp >= 22 3. CQP mode */
5458 avc_state->skip_bias_adjustment_enable = avc_state->skip_bias_adjustment_supported && (generic_state->frame_type == SLICE_TYPE_P)
5459 && (generic_state->gop_ref_distance == 1) && (avc_state->pic_param->pic_init_qp + avc_state->slice_param[0]->slice_qp_delta >= 22) && !generic_state->brc_enabled;
5461 if (generic_state->kernel_mode == INTEL_ENC_KERNEL_QUALITY) {
5462 avc_state->tq_enable = 1;
5463 avc_state->tq_rounding = 6;
5464 if (generic_state->brc_enabled) {
5465 generic_state->mb_brc_enabled = 1;
5469 //check the inter rounding
5470 avc_state->rounding_value = 0;
5471 avc_state->rounding_inter_p = 255;//default
5472 avc_state->rounding_inter_b = 255; //default
5473 avc_state->rounding_inter_b_ref = 255; //default
5475 if (generic_state->frame_type == SLICE_TYPE_P) {
5476 if (avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE) {
5477 if (avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled)) {
5478 if (generic_state->gop_ref_distance == 1)
5479 avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p_without_b[slice_qp];
5481 avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p[slice_qp];
5483 avc_state->rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
5487 avc_state->rounding_value = avc_state->rounding_inter_p;
5489 } else if (generic_state->frame_type == SLICE_TYPE_B) {
5490 if (pic_param->pic_fields.bits.reference_pic_flag) {
5491 if (avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
5492 avc_state->rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
5494 avc_state->rounding_value = avc_state->rounding_inter_b_ref;
5496 if (avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE) {
5497 if (avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled))
5498 avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_b[slice_qp];
5500 avc_state->rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
5502 avc_state->rounding_value = avc_state->rounding_inter_b;
5506 return VA_STATUS_SUCCESS;
5510 gen9_avc_vme_gpe_kernel_prepare(VADriverContextP ctx,
5511 struct encode_state *encode_state,
5512 struct intel_encoder_context *encoder_context)
5515 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5516 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5517 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5518 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5519 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5521 struct object_surface *obj_surface;
5522 struct object_buffer *obj_buffer;
5523 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5524 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
5525 struct i965_coded_buffer_segment *coded_buffer_segment;
5527 struct gen9_surface_avc *avc_priv_surface;
5529 struct avc_surface_param surface_param;
5531 unsigned char * pdata;
5533 /* Setup current reconstruct frame */
5534 obj_surface = encode_state->reconstructed_object;
5535 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5537 if (va_status != VA_STATUS_SUCCESS)
5540 memset(&surface_param, 0, sizeof(surface_param));
5541 surface_param.frame_width = generic_state->frame_width_in_pixel;
5542 surface_param.frame_height = generic_state->frame_height_in_pixel;
5543 va_status = gen9_avc_init_check_surfaces(ctx,
5547 if (va_status != VA_STATUS_SUCCESS)
5550 /* init the member of avc_priv_surface,frame_store_id,qp_value*/
5551 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
5552 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = 0;
5553 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = 0;
5554 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2]);
5555 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1]);
5556 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2], avc_priv_surface->dmv_top);
5557 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1], avc_priv_surface->dmv_bottom);
5558 avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
5559 avc_priv_surface->frame_store_id = 0;
5560 avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
5561 avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
5562 avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
5563 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = avc_priv_surface->top_field_order_cnt;
5564 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = avc_priv_surface->top_field_order_cnt + 1;
5566 i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
5567 i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
5569 /* input YUV surface*/
5570 obj_surface = encode_state->input_yuv_object;
5571 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5573 if (va_status != VA_STATUS_SUCCESS)
5575 i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
5576 i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
5578 /* Reference surfaces */
5579 for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
5580 i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
5581 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2]);
5582 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1]);
5583 obj_surface = encode_state->reference_objects[i];
5584 avc_state->top_field_poc[2 * i] = 0;
5585 avc_state->top_field_poc[2 * i + 1] = 0;
5587 if (obj_surface && obj_surface->bo) {
5588 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface, GPE_RESOURCE_ALIGNMENT);
5590 /* actually it should be handled when it is reconstructed surface*/
5591 va_status = gen9_avc_init_check_surfaces(ctx,
5592 obj_surface, encoder_context,
5594 if (va_status != VA_STATUS_SUCCESS)
5596 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
5597 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2], avc_priv_surface->dmv_top);
5598 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1], avc_priv_surface->dmv_bottom);
5599 avc_state->top_field_poc[2 * i] = avc_priv_surface->top_field_order_cnt;
5600 avc_state->top_field_poc[2 * i + 1] = avc_priv_surface->top_field_order_cnt + 1;
5601 avc_priv_surface->frame_store_id = i;
5607 /* Encoded bitstream ?*/
5608 obj_buffer = encode_state->coded_buf_object;
5609 bo = obj_buffer->buffer_store->bo;
5610 i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
5611 i965_dri_object_to_buffer_gpe_resource(&generic_ctx->compressed_bitstream.res, bo);
5612 generic_ctx->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
5613 generic_ctx->compressed_bitstream.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
5616 avc_ctx->status_buffer.bo = bo;
5618 /* set the internal flag to 0 to indicate the coded size is unknown */
5620 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
5621 coded_buffer_segment->mapped = 0;
5622 coded_buffer_segment->codec = encoder_context->codec;
5623 coded_buffer_segment->status_support = 1;
5625 pdata = bo->virtual + avc_ctx->status_buffer.base_offset;
5626 memset(pdata, 0, avc_ctx->status_buffer.status_buffer_size);
5629 //frame id, it is the ref pic id in the reference_objects list.
5630 avc_state->num_refs[0] = 0;
5631 avc_state->num_refs[1] = 0;
5632 if (generic_state->frame_type == SLICE_TYPE_P) {
5633 avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
5635 if (slice_param->num_ref_idx_active_override_flag)
5636 avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
5637 } else if (generic_state->frame_type == SLICE_TYPE_B) {
5638 avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
5639 avc_state->num_refs[1] = pic_param->num_ref_idx_l1_active_minus1 + 1;
5641 if (slice_param->num_ref_idx_active_override_flag) {
5642 avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
5643 avc_state->num_refs[1] = slice_param->num_ref_idx_l1_active_minus1 + 1;
5647 if (avc_state->num_refs[0] > ARRAY_ELEMS(avc_state->list_ref_idx[0]))
5648 return VA_STATUS_ERROR_INVALID_VALUE;
5649 if (avc_state->num_refs[1] > ARRAY_ELEMS(avc_state->list_ref_idx[1]))
5650 return VA_STATUS_ERROR_INVALID_VALUE;
5652 for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[0]); i++) {
5653 VAPictureH264 *va_pic;
5655 assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(avc_state->list_ref_idx[0]));
5656 avc_state->list_ref_idx[0][i] = 0;
5658 if (i >= avc_state->num_refs[0])
5661 va_pic = &slice_param->RefPicList0[i];
5663 for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
5664 obj_surface = encode_state->reference_objects[j];
5668 obj_surface->base.id == va_pic->picture_id) {
5670 assert(obj_surface->base.id != VA_INVALID_SURFACE);
5671 avc_state->list_ref_idx[0][i] = j;
5677 for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[1]); i++) {
5678 VAPictureH264 *va_pic;
5680 assert(ARRAY_ELEMS(slice_param->RefPicList1) == ARRAY_ELEMS(avc_state->list_ref_idx[1]));
5681 avc_state->list_ref_idx[1][i] = 0;
5683 if (i >= avc_state->num_refs[1])
5686 va_pic = &slice_param->RefPicList1[i];
5688 for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
5689 obj_surface = encode_state->reference_objects[j];
5693 obj_surface->base.id == va_pic->picture_id) {
5695 assert(obj_surface->base.id != VA_INVALID_SURFACE);
5696 avc_state->list_ref_idx[1][i] = j;
5703 return VA_STATUS_SUCCESS;
5707 gen9_avc_vme_gpe_kernel_init(VADriverContextP ctx,
5708 struct encode_state *encode_state,
5709 struct intel_encoder_context *encoder_context)
5711 return VA_STATUS_SUCCESS;
5715 gen9_avc_vme_gpe_kernel_final(VADriverContextP ctx,
5716 struct encode_state *encode_state,
5717 struct intel_encoder_context *encoder_context)
5720 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5721 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5722 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5724 /*set this flag when all kernel is finished*/
5725 if (generic_state->brc_enabled) {
5726 generic_state->brc_inited = 1;
5727 generic_state->brc_need_reset = 0;
5728 avc_state->mbenc_curbe_set_in_brc_update = 0;
5730 return VA_STATUS_SUCCESS;
5734 gen9_avc_vme_gpe_kernel_run(VADriverContextP ctx,
5735 struct encode_state *encode_state,
5736 struct intel_encoder_context *encoder_context)
5738 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5739 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5740 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5742 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
5743 VAEncSliceParameterBufferH264 *slice_param = avc_state->slice_param[0];
5746 /* BRC init/reset needs to be called before HME since it will reset the Brc Distortion surface*/
5747 if (generic_state->brc_enabled && (!generic_state->brc_inited || generic_state->brc_need_reset)) {
5748 gen9_avc_kernel_brc_init_reset(ctx, encode_state, encoder_context);
5752 if (generic_state->hme_supported) {
5753 gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
5754 if (generic_state->b16xme_supported) {
5755 gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_16x);
5756 if (generic_state->b32xme_supported) {
5757 gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_32x);
5763 if (generic_state->hme_enabled) {
5764 if (generic_state->b16xme_enabled) {
5765 if (generic_state->b32xme_enabled) {
5766 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_32x);
5768 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_16x);
5770 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
5773 /*call SFD kernel after HME in same command buffer*/
5774 sfd_in_use = avc_state->sfd_enable && generic_state->hme_enabled;
5775 sfd_in_use = sfd_in_use && !avc_state->sfd_mb_enable;
5777 gen9_avc_kernel_sfd(ctx, encode_state, encoder_context);
5780 /* BRC and MbEnc are included in the same task phase*/
5781 if (generic_state->brc_enabled) {
5782 if (avc_state->mbenc_i_frame_dist_in_use) {
5783 gen9_avc_kernel_mbenc(ctx, encode_state, encoder_context, true);
5785 gen9_avc_kernel_brc_frame_update(ctx, encode_state, encoder_context);
5787 if (avc_state->brc_split_enable && generic_state->mb_brc_enabled) {
5788 gen9_avc_kernel_brc_mb_update(ctx, encode_state, encoder_context);
5792 /*weight prediction,disable by now */
5793 avc_state->weighted_ref_l0_enable = 0;
5794 avc_state->weighted_ref_l1_enable = 0;
5795 if (avc_state->weighted_prediction_supported &&
5796 ((generic_state->frame_type == SLICE_TYPE_P && pic_param->pic_fields.bits.weighted_pred_flag) ||
5797 (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT))) {
5798 if (slice_param->luma_weight_l0_flag & 1) {
5799 gen9_avc_kernel_wp(ctx, encode_state, encoder_context, 0);
5801 } else if (!(slice_param->chroma_weight_l0_flag & 1)) {
5802 pic_param->pic_fields.bits.weighted_pred_flag = 0;// it should be handled in app
5805 if (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT) {
5806 if (slice_param->luma_weight_l1_flag & 1) {
5807 gen9_avc_kernel_wp(ctx, encode_state, encoder_context, 1);
5808 } else if (!((slice_param->luma_weight_l0_flag & 1) ||
5809 (slice_param->chroma_weight_l0_flag & 1) ||
5810 (slice_param->chroma_weight_l1_flag & 1))) {
5811 pic_param->pic_fields.bits.weighted_bipred_idc = INTEL_AVC_WP_MODE_DEFAULT;// it should be handled in app
5817 gen9_avc_kernel_mbenc(ctx, encode_state, encoder_context, false);
5819 /*ignore the reset vertical line kernel*/
5821 return VA_STATUS_SUCCESS;
5825 gen9_avc_vme_pipeline(VADriverContextP ctx,
5827 struct encode_state *encode_state,
5828 struct intel_encoder_context *encoder_context)
5832 gen9_avc_update_parameters(ctx, profile, encode_state, encoder_context);
5834 va_status = gen9_avc_encode_check_parameter(ctx, encode_state, encoder_context);
5835 if (va_status != VA_STATUS_SUCCESS)
5838 va_status = gen9_avc_allocate_resources(ctx, encode_state, encoder_context);
5839 if (va_status != VA_STATUS_SUCCESS)
5842 va_status = gen9_avc_vme_gpe_kernel_prepare(ctx, encode_state, encoder_context);
5843 if (va_status != VA_STATUS_SUCCESS)
5846 va_status = gen9_avc_vme_gpe_kernel_init(ctx, encode_state, encoder_context);
5847 if (va_status != VA_STATUS_SUCCESS)
5850 va_status = gen9_avc_vme_gpe_kernel_run(ctx, encode_state, encoder_context);
5851 if (va_status != VA_STATUS_SUCCESS)
5854 gen9_avc_vme_gpe_kernel_final(ctx, encode_state, encoder_context);
5856 return VA_STATUS_SUCCESS;
5860 gen9_avc_vme_context_destroy(void * context)
5862 struct encoder_vme_mfc_context *vme_context = (struct encoder_vme_mfc_context *)context;
5863 struct generic_encoder_context *generic_ctx;
5864 struct i965_avc_encoder_context *avc_ctx;
5865 struct generic_enc_codec_state *generic_state;
5866 struct avc_enc_state *avc_state;
5871 generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5872 avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5873 generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5874 avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5876 gen9_avc_kernel_destroy(vme_context);
5880 free(generic_state);
5888 gen9_avc_kernel_init(VADriverContextP ctx,
5889 struct intel_encoder_context *encoder_context)
5891 struct i965_driver_data *i965 = i965_driver_data(ctx);
5892 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5893 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5894 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5896 gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling);
5897 gen9_avc_kernel_init_brc(ctx, generic_ctx, &avc_ctx->context_brc);
5898 gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me);
5899 gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc);
5900 gen9_avc_kernel_init_wp(ctx, generic_ctx, &avc_ctx->context_wp);
5901 gen9_avc_kernel_init_sfd(ctx, generic_ctx, &avc_ctx->context_sfd);
5904 generic_ctx->pfn_set_curbe_scaling2x = gen9_avc_set_curbe_scaling2x;
5905 generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
5906 generic_ctx->pfn_set_curbe_me = gen9_avc_set_curbe_me;
5907 generic_ctx->pfn_set_curbe_mbenc = gen9_avc_set_curbe_mbenc;
5908 generic_ctx->pfn_set_curbe_brc_init_reset = gen9_avc_set_curbe_brc_init_reset;
5909 generic_ctx->pfn_set_curbe_brc_frame_update = gen9_avc_set_curbe_brc_frame_update;
5910 generic_ctx->pfn_set_curbe_brc_mb_update = gen9_avc_set_curbe_brc_mb_update;
5911 generic_ctx->pfn_set_curbe_sfd = gen9_avc_set_curbe_sfd;
5912 generic_ctx->pfn_set_curbe_wp = gen9_avc_set_curbe_wp;
5914 generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
5915 generic_ctx->pfn_send_me_surface = gen9_avc_send_surface_me;
5916 generic_ctx->pfn_send_mbenc_surface = gen9_avc_send_surface_mbenc;
5917 generic_ctx->pfn_send_brc_init_reset_surface = gen9_avc_send_surface_brc_init_reset;
5918 generic_ctx->pfn_send_brc_frame_update_surface = gen9_avc_send_surface_brc_frame_update;
5919 generic_ctx->pfn_send_brc_mb_update_surface = gen9_avc_send_surface_brc_mb_update;
5920 generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd;
5921 generic_ctx->pfn_send_wp_surface = gen9_avc_send_surface_wp;
5923 if (IS_SKL(i965->intel.device_info) ||
5924 IS_BXT(i965->intel.device_info))
5925 generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
5926 else if (IS_KBL(i965->intel.device_info) ||
5927 IS_GLK(i965->intel.device_info))
5928 generic_ctx->pfn_set_curbe_scaling4x = gen95_avc_set_curbe_scaling4x;
5933 PAK pipeline related function
5936 intel_avc_enc_slice_type_fixup(int slice_type);
5939 gen9_mfc_avc_pipe_mode_select(VADriverContextP ctx,
5940 struct encode_state *encode_state,
5941 struct intel_encoder_context *encoder_context)
5943 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5944 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
5945 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
5946 struct intel_batchbuffer *batch = encoder_context->base.batch;
5948 BEGIN_BCS_BATCH(batch, 5);
5950 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
5951 OUT_BCS_BATCH(batch,
5953 (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
5954 (MFD_MODE_VLD << 15) |
5955 (0 << 13) | /* Non-VDEnc mode is 0*/
5956 ((generic_state->curr_pak_pass != (generic_state->num_pak_passes - 1)) << 10) | /* Stream-Out Enable */
5957 ((!!avc_ctx->res_post_deblocking_output.bo) << 9) | /* Post Deblocking Output */
5958 ((!!avc_ctx->res_pre_deblocking_output.bo) << 8) | /* Pre Deblocking Output */
5959 (0 << 7) | /* Scaled surface enable */
5960 (0 << 6) | /* Frame statistics stream out enable */
5961 (0 << 5) | /* not in stitch mode */
5962 (1 << 4) | /* encoding mode */
5963 (MFX_FORMAT_AVC << 0));
5964 OUT_BCS_BATCH(batch,
5965 (0 << 7) | /* expand NOA bus flag */
5966 (0 << 6) | /* disable slice-level clock gating */
5967 (0 << 5) | /* disable clock gating for NOA */
5968 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
5969 (0 << 3) | /* terminate if AVC mbdata error occurs */
5970 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
5973 OUT_BCS_BATCH(batch, 0);
5974 OUT_BCS_BATCH(batch, 0);
5976 ADVANCE_BCS_BATCH(batch);
5980 gen9_mfc_avc_surface_state(VADriverContextP ctx,
5981 struct intel_encoder_context *encoder_context,
5982 struct i965_gpe_resource *gpe_resource,
5985 struct intel_batchbuffer *batch = encoder_context->base.batch;
5987 BEGIN_BCS_BATCH(batch, 6);
5989 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
5990 OUT_BCS_BATCH(batch, id);
5991 OUT_BCS_BATCH(batch,
5992 ((gpe_resource->height - 1) << 18) |
5993 ((gpe_resource->width - 1) << 4));
5994 OUT_BCS_BATCH(batch,
5995 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
5996 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
5997 ((gpe_resource->pitch - 1) << 3) | /* pitch */
5998 (0 << 2) | /* must be 0 for interleave U/V */
5999 (1 << 1) | /* must be tiled */
6000 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
6001 OUT_BCS_BATCH(batch,
6002 (0 << 16) | /* must be 0 for interleave U/V */
6003 (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
6004 OUT_BCS_BATCH(batch,
6005 (0 << 16) | /* must be 0 for interleave U/V */
6006 (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
6008 ADVANCE_BCS_BATCH(batch);
6012 gen9_mfc_avc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
6014 struct i965_driver_data *i965 = i965_driver_data(ctx);
6015 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6016 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
6017 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
6018 struct intel_batchbuffer *batch = encoder_context->base.batch;
6021 BEGIN_BCS_BATCH(batch, 65);
6023 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (65 - 2));
6025 /* the DW1-3 is for pre_deblocking */
6026 OUT_BUFFER_3DW(batch, avc_ctx->res_pre_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
6028 /* the DW4-6 is for the post_deblocking */
6029 OUT_BUFFER_3DW(batch, avc_ctx->res_post_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
6031 /* the DW7-9 is for the uncompressed_picture */
6032 OUT_BUFFER_3DW(batch, generic_ctx->res_uncompressed_input_surface.bo, 0, 0, i965->intel.mocs_state);
6034 /* the DW10-12 is for PAK information (write) */
6035 OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);//?
6037 /* the DW13-15 is for the intra_row_store_scratch */
6038 OUT_BUFFER_3DW(batch, avc_ctx->res_intra_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
6040 /* the DW16-18 is for the deblocking filter */
6041 OUT_BUFFER_3DW(batch, avc_ctx->res_deblocking_filter_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
6043 /* the DW 19-50 is for Reference pictures*/
6044 for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
6045 OUT_BUFFER_2DW(batch, avc_ctx->list_reference_res[i].bo, 0, 0);
6048 /* DW 51, reference picture attributes */
6049 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
6051 /* The DW 52-54 is for PAK information (read) */
6052 OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);
6054 /* the DW 55-57 is the ILDB buffer */
6055 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6057 /* the DW 58-60 is the second ILDB buffer */
6058 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6060 /* DW 61, memory compress enable & mode */
6061 OUT_BCS_BATCH(batch, 0);
6063 /* the DW 62-64 is the buffer */
6064 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6066 ADVANCE_BCS_BATCH(batch);
6070 gen9_mfc_avc_ind_obj_base_addr_state(VADriverContextP ctx,
6071 struct encode_state *encode_state,
6072 struct intel_encoder_context *encoder_context)
6074 struct i965_driver_data *i965 = i965_driver_data(ctx);
6075 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6076 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
6077 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
6078 struct intel_batchbuffer *batch = encoder_context->base.batch;
6079 struct object_surface *obj_surface;
6080 struct gen9_surface_avc *avc_priv_surface;
6081 unsigned int size = 0;
6082 unsigned int w_mb = generic_state->frame_width_in_mbs;
6083 unsigned int h_mb = generic_state->frame_height_in_mbs;
6085 obj_surface = encode_state->reconstructed_object;
6087 if (!obj_surface || !obj_surface->private_data)
6089 avc_priv_surface = obj_surface->private_data;
6091 BEGIN_BCS_BATCH(batch, 26);
6093 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
6094 /* The DW1-5 is for the MFX indirect bistream offset */
6095 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6096 OUT_BUFFER_2DW(batch, NULL, 0, 0);
6098 /* the DW6-10 is for MFX Indirect MV Object Base Address */
6099 size = w_mb * h_mb * 32 * 4;
6100 OUT_BUFFER_3DW(batch,
6101 avc_priv_surface->res_mv_data_surface.bo,
6104 i965->intel.mocs_state);
6105 OUT_BUFFER_2DW(batch,
6106 avc_priv_surface->res_mv_data_surface.bo,
6108 ALIGN(size, 0x1000));
6110 /* The DW11-15 is for MFX IT-COFF. Not used on encoder */
6111 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6112 OUT_BUFFER_2DW(batch, NULL, 0, 0);
6114 /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */
6115 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6116 OUT_BUFFER_2DW(batch, NULL, 0, 0);
6118 /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder
6119 * Note: an offset is specified in MFX_AVC_SLICE_STATE
6121 OUT_BUFFER_3DW(batch,
6122 generic_ctx->compressed_bitstream.res.bo,
6125 i965->intel.mocs_state);
6126 OUT_BUFFER_2DW(batch,
6127 generic_ctx->compressed_bitstream.res.bo,
6129 generic_ctx->compressed_bitstream.end_offset);
6131 ADVANCE_BCS_BATCH(batch);
6135 gen9_mfc_avc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
6137 struct i965_driver_data *i965 = i965_driver_data(ctx);
6138 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6139 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
6140 struct intel_batchbuffer *batch = encoder_context->base.batch;
6142 BEGIN_BCS_BATCH(batch, 10);
6144 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
6146 /* The DW1-3 is for bsd/mpc row store scratch buffer */
6147 OUT_BUFFER_3DW(batch, avc_ctx->res_bsd_mpc_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
6149 /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
6150 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6152 /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */
6153 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
6155 ADVANCE_BCS_BATCH(batch);
6159 gen9_mfc_avc_directmode_state(VADriverContextP ctx,
6160 struct intel_encoder_context *encoder_context)
6162 struct i965_driver_data *i965 = i965_driver_data(ctx);
6163 struct intel_batchbuffer *batch = encoder_context->base.batch;
6164 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6165 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
6166 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6170 BEGIN_BCS_BATCH(batch, 71);
6172 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
6174 /* Reference frames and Current frames */
6175 /* the DW1-32 is for the direct MV for reference */
6176 for (i = 0; i < NUM_MFC_AVC_DMV_BUFFERS - 2; i += 2) {
6177 if (avc_ctx->res_direct_mv_buffersr[i].bo != NULL) {
6178 OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[i].bo,
6179 I915_GEM_DOMAIN_INSTRUCTION, 0,
6182 OUT_BCS_BATCH(batch, 0);
6183 OUT_BCS_BATCH(batch, 0);
6187 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
6189 /* the DW34-36 is the MV for the current frame */
6190 OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2].bo,
6191 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
6194 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
6197 for (i = 0; i < 32; i++) {
6198 OUT_BCS_BATCH(batch, avc_state->top_field_poc[i]);
6200 OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2]);
6201 OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1]);
6203 ADVANCE_BCS_BATCH(batch);
6207 gen9_mfc_qm_state(VADriverContextP ctx,
6209 const unsigned int *qm,
6211 struct intel_encoder_context *encoder_context)
6213 struct intel_batchbuffer *batch = encoder_context->base.batch;
6214 unsigned int qm_buffer[16];
6216 assert(qm_length <= 16);
6217 assert(sizeof(*qm) == 4);
6218 memset(qm_buffer, 0, 16 * 4);
6219 memcpy(qm_buffer, qm, qm_length * 4);
6221 BEGIN_BCS_BATCH(batch, 18);
6222 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
6223 OUT_BCS_BATCH(batch, qm_type << 0);
6224 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
6225 ADVANCE_BCS_BATCH(batch);
6229 gen9_mfc_avc_qm_state(VADriverContextP ctx,
6230 struct encode_state *encode_state,
6231 struct intel_encoder_context *encoder_context)
6233 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6234 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6235 VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
6236 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
6239 const unsigned int *qm_4x4_intra;
6240 const unsigned int *qm_4x4_inter;
6241 const unsigned int *qm_8x8_intra;
6242 const unsigned int *qm_8x8_inter;
6244 if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
6245 && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
6246 qm_4x4_intra = qm_4x4_inter = qm_8x8_intra = qm_8x8_inter = qm_flat;
6248 VAIQMatrixBufferH264 *qm;
6249 assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
6250 qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
6251 qm_4x4_intra = (unsigned int *)qm->ScalingList4x4[0];
6252 qm_4x4_inter = (unsigned int *)qm->ScalingList4x4[3];
6253 qm_8x8_intra = (unsigned int *)qm->ScalingList8x8[0];
6254 qm_8x8_inter = (unsigned int *)qm->ScalingList8x8[1];
6257 gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm_4x4_intra, 12, encoder_context);
6258 gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm_4x4_inter, 12, encoder_context);
6259 gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm_8x8_intra, 16, encoder_context);
6260 gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm_8x8_inter, 16, encoder_context);
6264 gen9_mfc_fqm_state(VADriverContextP ctx,
6266 const unsigned int *fqm,
6268 struct intel_encoder_context *encoder_context)
6270 struct intel_batchbuffer *batch = encoder_context->base.batch;
6271 unsigned int fqm_buffer[32];
6273 assert(fqm_length <= 32);
6274 assert(sizeof(*fqm) == 4);
6275 memset(fqm_buffer, 0, 32 * 4);
6276 memcpy(fqm_buffer, fqm, fqm_length * 4);
6278 BEGIN_BCS_BATCH(batch, 34);
6279 OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
6280 OUT_BCS_BATCH(batch, fqm_type << 0);
6281 intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
6282 ADVANCE_BCS_BATCH(batch);
6286 gen9_mfc_fill_fqm(uint8_t *qm, uint16_t *fqm, int len)
6289 for (i = 0; i < len; i++)
6290 for (j = 0; j < len; j++) {
6291 assert(qm[j * len + i]);
6292 fqm[i * len + j] = (1 << 16) / qm[j * len + i];
6297 gen9_mfc_avc_fqm_state(VADriverContextP ctx,
6298 struct encode_state *encode_state,
6299 struct intel_encoder_context *encoder_context)
6301 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6302 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6303 VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
6304 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
6306 if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
6307 && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
6308 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm_flat, 24, encoder_context);
6309 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm_flat, 24, encoder_context);
6310 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm_flat, 32, encoder_context);
6311 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm_flat, 32, encoder_context);
6315 VAIQMatrixBufferH264 *qm;
6316 assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
6317 qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
6319 for (i = 0; i < 3; i++)
6320 gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * i, 4);
6321 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm, 24, encoder_context);
6323 for (i = 3; i < 6; i++)
6324 gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * (i - 3), 4);
6325 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm, 24, encoder_context);
6327 gen9_mfc_fill_fqm(qm->ScalingList8x8[0], (uint16_t *)fqm, 8);
6328 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm, 32, encoder_context);
6330 gen9_mfc_fill_fqm(qm->ScalingList8x8[1], (uint16_t *)fqm, 8);
6331 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm, 32, encoder_context);
6336 gen9_mfc_avc_insert_object(VADriverContextP ctx,
6337 struct intel_encoder_context *encoder_context,
6338 unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
6339 int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
6340 int slice_header_indicator,
6341 struct intel_batchbuffer *batch)
6343 if (data_bits_in_last_dw == 0)
6344 data_bits_in_last_dw = 32;
6346 BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
6348 OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws));
6349 OUT_BCS_BATCH(batch,
6350 (0 << 16) | /* always start at offset 0 */
6351 (slice_header_indicator << 14) |
6352 (data_bits_in_last_dw << 8) |
6353 (skip_emul_byte_count << 4) |
6354 (!!emulation_flag << 3) |
6355 ((!!is_last_header) << 2) |
6356 ((!!is_end_of_slice) << 1) |
6357 (0 << 0)); /* check this flag */
6358 intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
6360 ADVANCE_BCS_BATCH(batch);
6364 gen9_mfc_avc_insert_aud_packed_data(VADriverContextP ctx,
6365 struct encode_state *encode_state,
6366 struct intel_encoder_context *encoder_context,
6367 struct intel_batchbuffer *batch)
6369 VAEncPackedHeaderParameterBuffer *param = NULL;
6370 unsigned int length_in_bits;
6371 unsigned int *header_data = NULL;
6372 unsigned char *nal_type = NULL;
6373 int count, i, start_index;
6375 count = encode_state->slice_rawdata_count[0];
6376 start_index = (encode_state->slice_rawdata_index[0] & SLICE_PACKED_DATA_INDEX_MASK);
6378 for (i = 0; i < count; i++) {
6379 unsigned int skip_emul_byte_cnt;
6381 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
6382 nal_type = (unsigned char *)header_data;
6384 param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
6386 length_in_bits = param->bit_length;
6388 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6390 if ((*(nal_type + skip_emul_byte_cnt - 1) & 0x1f) == AVC_NAL_DELIMITER) {
6391 gen9_mfc_avc_insert_object(ctx,
6394 ALIGN(length_in_bits, 32) >> 5,
6395 length_in_bits & 0x1f,
6399 !param->has_emulation_bytes,
6408 gen9_mfc_avc_insert_slice_packed_data(VADriverContextP ctx,
6409 struct encode_state *encode_state,
6410 struct intel_encoder_context *encoder_context,
6412 struct intel_batchbuffer *batch)
6414 VAEncPackedHeaderParameterBuffer *param = NULL;
6415 unsigned int length_in_bits;
6416 unsigned int *header_data = NULL;
6417 int count, i, start_index;
6418 int slice_header_index;
6419 unsigned char *nal_type = NULL;
6421 if (encode_state->slice_header_index[slice_index] == 0)
6422 slice_header_index = -1;
6424 slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
6426 count = encode_state->slice_rawdata_count[slice_index];
6427 start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
6429 for (i = 0; i < count; i++) {
6430 unsigned int skip_emul_byte_cnt;
6432 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
6433 nal_type = (unsigned char *)header_data;
6435 param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
6437 length_in_bits = param->bit_length;
6439 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6441 /* skip the slice header packed data type as it is lastly inserted */
6442 if (param->type == VAEncPackedHeaderSlice || (*(nal_type + skip_emul_byte_cnt - 1) & 0x1f) == AVC_NAL_DELIMITER)
6445 /* as the slice header is still required, the last header flag is set to
6448 gen9_mfc_avc_insert_object(ctx,
6451 ALIGN(length_in_bits, 32) >> 5,
6452 length_in_bits & 0x1f,
6456 !param->has_emulation_bytes,
6461 if (slice_header_index == -1) {
6462 VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
6463 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
6464 VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
6465 unsigned char *slice_header = NULL;
6466 int slice_header_length_in_bits = 0;
6468 /* No slice header data is passed. And the driver needs to generate it */
6469 /* For the Normal H264 */
6470 slice_header_length_in_bits = build_avc_slice_header(seq_param,
6474 gen9_mfc_avc_insert_object(ctx,
6476 (unsigned int *)slice_header,
6477 ALIGN(slice_header_length_in_bits, 32) >> 5,
6478 slice_header_length_in_bits & 0x1f,
6479 5, /* first 5 bytes are start code + nal unit type */
6486 unsigned int skip_emul_byte_cnt;
6488 header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
6490 param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
6491 length_in_bits = param->bit_length;
6493 /* as the slice header is the last header data for one slice,
6494 * the last header flag is set to one.
6496 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6498 gen9_mfc_avc_insert_object(ctx,
6501 ALIGN(length_in_bits, 32) >> 5,
6502 length_in_bits & 0x1f,
6506 !param->has_emulation_bytes,
6515 gen9_mfc_avc_inset_headers(VADriverContextP ctx,
6516 struct encode_state *encode_state,
6517 struct intel_encoder_context *encoder_context,
6518 VAEncSliceParameterBufferH264 *slice_param,
6520 struct intel_batchbuffer *batch)
6522 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6523 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
6524 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
6525 unsigned int internal_rate_mode = generic_state->internal_rate_mode;
6526 unsigned int skip_emul_byte_cnt;
6528 if (slice_index == 0) {
6530 /* if AUD exist and insert it firstly */
6531 gen9_mfc_avc_insert_aud_packed_data(ctx, encode_state, encoder_context, batch);
6533 if (encode_state->packed_header_data[idx]) {
6534 VAEncPackedHeaderParameterBuffer *param = NULL;
6535 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
6536 unsigned int length_in_bits;
6538 assert(encode_state->packed_header_param[idx]);
6539 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
6540 length_in_bits = param->bit_length;
6542 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6543 gen9_mfc_avc_insert_object(ctx,
6546 ALIGN(length_in_bits, 32) >> 5,
6547 length_in_bits & 0x1f,
6551 !param->has_emulation_bytes,
6556 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
6558 if (encode_state->packed_header_data[idx]) {
6559 VAEncPackedHeaderParameterBuffer *param = NULL;
6560 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
6561 unsigned int length_in_bits;
6563 assert(encode_state->packed_header_param[idx]);
6564 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
6565 length_in_bits = param->bit_length;
6567 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6569 gen9_mfc_avc_insert_object(ctx,
6572 ALIGN(length_in_bits, 32) >> 5,
6573 length_in_bits & 0x1f,
6577 !param->has_emulation_bytes,
6582 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
6584 if (encode_state->packed_header_data[idx]) {
6585 VAEncPackedHeaderParameterBuffer *param = NULL;
6586 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
6587 unsigned int length_in_bits;
6589 assert(encode_state->packed_header_param[idx]);
6590 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
6591 length_in_bits = param->bit_length;
6593 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
6594 gen9_mfc_avc_insert_object(ctx,
6597 ALIGN(length_in_bits, 32) >> 5,
6598 length_in_bits & 0x1f,
6602 !param->has_emulation_bytes,
6605 } else if (internal_rate_mode == VA_RC_CBR) {
6610 gen9_mfc_avc_insert_slice_packed_data(ctx,
6618 gen9_mfc_avc_slice_state(VADriverContextP ctx,
6619 struct encode_state *encode_state,
6620 struct intel_encoder_context *encoder_context,
6621 VAEncPictureParameterBufferH264 *pic_param,
6622 VAEncSliceParameterBufferH264 *slice_param,
6623 VAEncSliceParameterBufferH264 *next_slice_param,
6624 struct intel_batchbuffer *batch)
6626 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6627 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
6628 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
6629 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6630 unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
6631 unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
6632 unsigned char correct[6], grow, shrink;
6633 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
6634 int max_qp_n, max_qp_p;
6636 int weighted_pred_idc = 0;
6637 int num_ref_l0 = 0, num_ref_l1 = 0;
6638 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
6639 int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
6640 unsigned int rc_panic_enable = 0;
6641 unsigned int rate_control_counter_enable = 0;
6642 unsigned int rounding_value = 0;
6643 unsigned int rounding_inter_enable = 0;
6645 slice_hor_pos = slice_param->macroblock_address % generic_state->frame_width_in_mbs;
6646 slice_ver_pos = slice_param->macroblock_address / generic_state->frame_width_in_mbs;
6648 if (next_slice_param) {
6649 next_slice_hor_pos = next_slice_param->macroblock_address % generic_state->frame_width_in_mbs;
6650 next_slice_ver_pos = next_slice_param->macroblock_address / generic_state->frame_width_in_mbs;
6652 next_slice_hor_pos = 0;
6653 next_slice_ver_pos = generic_state->frame_height_in_mbs;
6656 if (slice_type == SLICE_TYPE_I) {
6657 luma_log2_weight_denom = 0;
6658 chroma_log2_weight_denom = 0;
6659 } else if (slice_type == SLICE_TYPE_P) {
6660 weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
6661 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
6662 rounding_inter_enable = avc_state->rounding_inter_enable;
6663 rounding_value = avc_state->rounding_value;
6665 if (slice_param->num_ref_idx_active_override_flag)
6666 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
6667 } else if (slice_type == SLICE_TYPE_B) {
6668 weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
6669 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
6670 num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
6671 rounding_inter_enable = avc_state->rounding_inter_enable;
6672 rounding_value = avc_state->rounding_value;
6674 if (slice_param->num_ref_idx_active_override_flag) {
6675 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
6676 num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
6679 if (weighted_pred_idc == 2) {
6680 /* 8.4.3 - Derivation process for prediction weights (8-279) */
6681 luma_log2_weight_denom = 5;
6682 chroma_log2_weight_denom = 5;
6691 rate_control_counter_enable = (generic_state->brc_enabled && (generic_state->curr_pak_pass != 0));
6692 rc_panic_enable = (avc_state->rc_panic_enable &&
6693 (!avc_state->min_max_qp_enable) &&
6694 (encoder_context->rate_control_mode != VA_RC_CQP) &&
6695 (generic_state->curr_pak_pass == (generic_state->num_pak_passes - 1)));
6697 for (i = 0; i < 6; i++)
6700 BEGIN_BCS_BATCH(batch, 11);
6702 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
6703 OUT_BCS_BATCH(batch, slice_type);
6704 OUT_BCS_BATCH(batch,
6705 (num_ref_l1 << 24) |
6706 (num_ref_l0 << 16) |
6707 (chroma_log2_weight_denom << 8) |
6708 (luma_log2_weight_denom << 0));
6709 OUT_BCS_BATCH(batch,
6710 (weighted_pred_idc << 30) |
6711 (((slice_type == SLICE_TYPE_B) ? slice_param->direct_spatial_mv_pred_flag : 0) << 29) |
6712 (slice_param->disable_deblocking_filter_idc << 27) |
6713 (slice_param->cabac_init_idc << 24) |
6715 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
6716 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
6718 OUT_BCS_BATCH(batch,
6719 slice_ver_pos << 24 |
6720 slice_hor_pos << 16 |
6721 slice_param->macroblock_address);
6722 OUT_BCS_BATCH(batch,
6723 next_slice_ver_pos << 16 |
6724 next_slice_hor_pos);
6726 OUT_BCS_BATCH(batch,
6727 (rate_control_counter_enable << 31) |
6728 (1 << 30) | /* ResetRateControlCounter */
6729 (2 << 28) | /* Loose Rate Control */
6730 (0 << 24) | /* RC Stable Tolerance */
6731 (rc_panic_enable << 23) | /* RC Panic Enable */
6732 (1 << 22) | /* CBP mode */
6733 (0 << 21) | /* MB Type Direct Conversion, 0: Enable, 1: Disable */
6734 (0 << 20) | /* MB Type Skip Conversion, 0: Enable, 1: Disable */
6735 (!next_slice_param << 19) | /* Is Last Slice */
6736 (0 << 18) | /* BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable */
6737 (1 << 17) | /* HeaderPresentFlag */
6738 (1 << 16) | /* SliceData PresentFlag */
6739 (0 << 15) | /* TailPresentFlag */
6740 (1 << 13) | /* RBSP NAL TYPE */
6741 (1 << 12)); /* CabacZeroWordInsertionEnable */
6743 OUT_BCS_BATCH(batch, generic_ctx->compressed_bitstream.start_offset);
6745 OUT_BCS_BATCH(batch,
6746 (max_qp_n << 24) | /*Target QP - 24 is lowest QP*/
6747 (max_qp_p << 16) | /*Target QP + 20 is highest QP*/
6750 OUT_BCS_BATCH(batch,
6751 (rounding_inter_enable << 31) |
6752 (rounding_value << 28) |
6755 (correct[5] << 20) |
6756 (correct[4] << 16) |
6757 (correct[3] << 12) |
6761 OUT_BCS_BATCH(batch, 0);
6763 ADVANCE_BCS_BATCH(batch);
6767 gen9_mfc_avc_get_ref_idx_state(VAPictureH264 *va_pic, unsigned int frame_store_id)
6769 unsigned int is_long_term =
6770 !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
6771 unsigned int is_top_field =
6772 !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
6773 unsigned int is_bottom_field =
6774 !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
6776 return ((is_long_term << 6) |
6778 (frame_store_id << 1) |
6779 ((is_top_field ^ 1) & is_bottom_field));
6783 gen9_mfc_avc_ref_idx_state(VADriverContextP ctx,
6784 struct encode_state *encode_state,
6785 struct intel_encoder_context *encoder_context,
6786 VAEncSliceParameterBufferH264 *slice_param,
6787 struct intel_batchbuffer *batch)
6789 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6790 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6791 VAPictureH264 *ref_pic;
6792 int i, slice_type, ref_idx_shift;
6793 unsigned int fwd_ref_entry;
6794 unsigned int bwd_ref_entry;
6796 /* max 4 ref frames are allowed for l0 and l1 */
6797 fwd_ref_entry = 0x80808080;
6798 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
6800 if ((slice_type == SLICE_TYPE_P) ||
6801 (slice_type == SLICE_TYPE_B)) {
6802 for (i = 0; i < MIN(avc_state->num_refs[0], 4); i++) {
6803 ref_pic = &slice_param->RefPicList0[i];
6804 ref_idx_shift = i * 8;
6806 fwd_ref_entry &= ~(0xFF << ref_idx_shift);
6807 fwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[0][i]) << ref_idx_shift);
6811 bwd_ref_entry = 0x80808080;
6812 if (slice_type == SLICE_TYPE_B) {
6813 for (i = 0; i < MIN(avc_state->num_refs[1], 4); i++) {
6814 ref_pic = &slice_param->RefPicList1[i];
6815 ref_idx_shift = i * 8;
6817 bwd_ref_entry &= ~(0xFF << ref_idx_shift);
6818 bwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[1][i]) << ref_idx_shift);
6822 if ((slice_type == SLICE_TYPE_P) ||
6823 (slice_type == SLICE_TYPE_B)) {
6824 BEGIN_BCS_BATCH(batch, 10);
6825 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
6826 OUT_BCS_BATCH(batch, 0); // L0
6827 OUT_BCS_BATCH(batch, fwd_ref_entry);
6829 for (i = 0; i < 7; i++) {
6830 OUT_BCS_BATCH(batch, 0x80808080);
6833 ADVANCE_BCS_BATCH(batch);
6836 if (slice_type == SLICE_TYPE_B) {
6837 BEGIN_BCS_BATCH(batch, 10);
6838 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
6839 OUT_BCS_BATCH(batch, 1); //Select L1
6840 OUT_BCS_BATCH(batch, bwd_ref_entry); //max 4 reference allowed
6841 for (i = 0; i < 7; i++) {
6842 OUT_BCS_BATCH(batch, 0x80808080);
6844 ADVANCE_BCS_BATCH(batch);
6849 gen9_mfc_avc_weightoffset_state(VADriverContextP ctx,
6850 struct encode_state *encode_state,
6851 struct intel_encoder_context *encoder_context,
6852 VAEncPictureParameterBufferH264 *pic_param,
6853 VAEncSliceParameterBufferH264 *slice_param,
6854 struct intel_batchbuffer *batch)
6857 short weightoffsets[32 * 6];
6859 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
6861 if (slice_type == SLICE_TYPE_P &&
6862 pic_param->pic_fields.bits.weighted_pred_flag == 1) {
6863 memset(weightoffsets, 0, 32 * 6 * sizeof(short));
6864 for (i = 0; i < 32; i++) {
6865 weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
6866 weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
6867 weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
6868 weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
6869 weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
6870 weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
6873 BEGIN_BCS_BATCH(batch, 98);
6874 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
6875 OUT_BCS_BATCH(batch, 0);
6876 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
6878 ADVANCE_BCS_BATCH(batch);
6881 if (slice_type == SLICE_TYPE_B &&
6882 (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
6883 memset(weightoffsets, 0, 32 * 6 * sizeof(short));
6884 for (i = 0; i < 32; i++) {
6885 weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
6886 weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
6887 weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
6888 weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
6889 weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
6890 weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
6893 BEGIN_BCS_BATCH(batch, 98);
6894 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
6895 OUT_BCS_BATCH(batch, 0);
6896 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
6897 ADVANCE_BCS_BATCH(batch);
6899 memset(weightoffsets, 0, 32 * 6 * sizeof(short));
6900 for (i = 0; i < 32; i++) {
6901 weightoffsets[i * 6 + 0] = slice_param->luma_weight_l1[i];
6902 weightoffsets[i * 6 + 1] = slice_param->luma_offset_l1[i];
6903 weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l1[i][0];
6904 weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l1[i][0];
6905 weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l1[i][1];
6906 weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l1[i][1];
6909 BEGIN_BCS_BATCH(batch, 98);
6910 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
6911 OUT_BCS_BATCH(batch, 1);
6912 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
6913 ADVANCE_BCS_BATCH(batch);
6918 gen9_mfc_avc_single_slice(VADriverContextP ctx,
6919 struct encode_state *encode_state,
6920 struct intel_encoder_context *encoder_context,
6921 VAEncSliceParameterBufferH264 *slice_param,
6922 VAEncSliceParameterBufferH264 *next_slice_param,
6925 struct i965_driver_data *i965 = i965_driver_data(ctx);
6926 struct i965_gpe_table *gpe = &i965->gpe_table;
6927 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6928 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
6929 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
6930 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
6931 struct intel_batchbuffer *batch = encoder_context->base.batch;
6932 struct intel_batchbuffer *slice_batch = avc_ctx->pres_slice_batch_buffer_2nd_level;
6933 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
6934 struct gpe_mi_batch_buffer_start_parameter second_level_batch;
6935 struct object_surface *obj_surface;
6936 struct gen9_surface_avc *avc_priv_surface;
6938 unsigned int slice_offset = 0;
6940 if (generic_state->curr_pak_pass == 0) {
6941 slice_offset = intel_batchbuffer_used_size(slice_batch);
6942 avc_state->slice_batch_offset[slice_index] = slice_offset;
6943 gen9_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param, slice_batch);
6944 gen9_mfc_avc_weightoffset_state(ctx,
6950 gen9_mfc_avc_slice_state(ctx,
6957 gen9_mfc_avc_inset_headers(ctx,
6964 BEGIN_BCS_BATCH(slice_batch, 2);
6965 OUT_BCS_BATCH(slice_batch, 0);
6966 OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
6967 ADVANCE_BCS_BATCH(slice_batch);
6970 slice_offset = avc_state->slice_batch_offset[slice_index];
6972 /* insert slice as second levle.*/
6973 memset(&second_level_batch, 0, sizeof(second_level_batch));
6974 second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
6975 second_level_batch.offset = slice_offset;
6976 second_level_batch.bo = slice_batch->buffer;
6977 gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
6979 /* insert mb code as second levle.*/
6980 obj_surface = encode_state->reconstructed_object;
6981 assert(obj_surface->private_data);
6982 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
6984 memset(&second_level_batch, 0, sizeof(second_level_batch));
6985 second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
6986 second_level_batch.offset = slice_param->macroblock_address * 16 * 4;
6987 second_level_batch.bo = avc_priv_surface->res_mb_code_surface.bo;
6988 gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
6993 gen9_avc_pak_slice_level(VADriverContextP ctx,
6994 struct encode_state *encode_state,
6995 struct intel_encoder_context *encoder_context)
6997 struct i965_driver_data *i965 = i965_driver_data(ctx);
6998 struct i965_gpe_table *gpe = &i965->gpe_table;
6999 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7000 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
7001 struct intel_batchbuffer *batch = encoder_context->base.batch;
7002 struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
7003 VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
7005 int slice_index = 0;
7006 int is_frame_level = (avc_state->slice_num > 1) ? 0 : 1; /* check it for SKL,now single slice per frame */
7007 int has_tail = 0; /* check it later */
7009 for (j = 0; j < encode_state->num_slice_params_ext; j++) {
7010 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
7012 if (j == encode_state->num_slice_params_ext - 1)
7013 next_slice_group_param = NULL;
7015 next_slice_group_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j + 1]->buffer;
7017 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
7018 if (i < encode_state->slice_params_ext[j]->num_elements - 1)
7019 next_slice_param = slice_param + 1;
7021 next_slice_param = next_slice_group_param;
7023 gen9_mfc_avc_single_slice(ctx,
7041 /* insert a tail if required */
7044 memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
7045 mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
7046 gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_params);
7049 gen9_avc_pak_picture_level(VADriverContextP ctx,
7050 struct encode_state *encode_state,
7051 struct intel_encoder_context *encoder_context)
7053 struct i965_driver_data *i965 = i965_driver_data(ctx);
7054 struct i965_gpe_table *gpe = &i965->gpe_table;
7055 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7056 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
7057 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
7058 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
7059 struct gpe_mi_batch_buffer_start_parameter second_level_batch;
7060 struct intel_batchbuffer *batch = encoder_context->base.batch;
7062 if (generic_state->brc_enabled &&
7063 generic_state->curr_pak_pass) {
7064 struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
7065 struct encoder_status_buffer_internal *status_buffer;
7066 status_buffer = &(avc_ctx->status_buffer);
7068 memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
7069 mi_conditional_batch_buffer_end_params.offset = status_buffer->image_status_mask_offset;
7070 mi_conditional_batch_buffer_end_params.bo = status_buffer->bo;
7071 mi_conditional_batch_buffer_end_params.compare_data = 0;
7072 mi_conditional_batch_buffer_end_params.compare_mask_mode_disabled = 0;
7073 gpe->mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
7076 gen9_mfc_avc_pipe_mode_select(ctx, encode_state, encoder_context);
7077 gen9_mfc_avc_surface_state(ctx, encoder_context, &(generic_ctx->res_reconstructed_surface), 0);
7078 gen9_mfc_avc_surface_state(ctx, encoder_context, &(generic_ctx->res_uncompressed_input_surface), 4);
7079 gen9_mfc_avc_pipe_buf_addr_state(ctx, encoder_context);
7080 gen9_mfc_avc_ind_obj_base_addr_state(ctx, encode_state, encoder_context);
7081 gen9_mfc_avc_bsp_buf_base_addr_state(ctx, encoder_context);
7083 if (generic_state->brc_enabled) {
7084 memset(&second_level_batch, 0, sizeof(second_level_batch));
7085 if (generic_state->curr_pak_pass == 0) {
7086 second_level_batch.offset = 0;
7088 second_level_batch.offset = generic_state->curr_pak_pass * INTEL_AVC_IMAGE_STATE_CMD_SIZE;
7090 second_level_batch.is_second_level = 1;
7091 second_level_batch.bo = avc_ctx->res_brc_image_state_read_buffer.bo;
7092 gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
7094 /*generate a new image state */
7095 gen9_avc_set_image_state_non_brc(ctx, encode_state, encoder_context, &(avc_ctx->res_image_state_batch_buffer_2nd_level));
7096 memset(&second_level_batch, 0, sizeof(second_level_batch));
7097 second_level_batch.offset = 0;
7098 second_level_batch.is_second_level = 1;
7099 second_level_batch.bo = avc_ctx->res_image_state_batch_buffer_2nd_level.bo;
7100 gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
7103 gen9_mfc_avc_qm_state(ctx, encode_state, encoder_context);
7104 gen9_mfc_avc_fqm_state(ctx, encode_state, encoder_context);
7105 gen9_mfc_avc_directmode_state(ctx, encoder_context);
7110 gen9_avc_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
7112 struct i965_driver_data *i965 = i965_driver_data(ctx);
7113 struct i965_gpe_table *gpe = &i965->gpe_table;
7114 struct intel_batchbuffer *batch = encoder_context->base.batch;
7115 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7116 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
7117 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
7119 struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
7120 struct gpe_mi_store_data_imm_parameter mi_store_data_imm_param;
7121 struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
7122 struct encoder_status_buffer_internal *status_buffer;
7124 status_buffer = &(avc_ctx->status_buffer);
7126 memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
7127 gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
7129 /* read register and store into status_buffer and pak_statitistic info */
7130 memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
7131 mi_store_reg_mem_param.bo = status_buffer->bo;
7132 mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_frame_offset;
7133 mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
7134 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7136 mi_store_reg_mem_param.bo = status_buffer->bo;
7137 mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
7138 mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_mask_reg_offset;
7139 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7141 /*update the status in the pak_statistic_surface */
7142 mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7143 mi_store_reg_mem_param.offset = 0;
7144 mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
7145 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7147 mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7148 mi_store_reg_mem_param.offset = 4;
7149 mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_nh_reg_offset;
7150 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7152 memset(&mi_store_data_imm_param, 0, sizeof(mi_store_data_imm_param));
7153 mi_store_data_imm_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7154 mi_store_data_imm_param.offset = sizeof(unsigned int) * 2;
7155 mi_store_data_imm_param.dw0 = (generic_state->curr_pak_pass + 1);
7156 gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm_param);
7158 mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
7159 mi_store_reg_mem_param.offset = sizeof(unsigned int) * (4 + generic_state->curr_pak_pass) ;
7160 mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
7161 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
7163 memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
7164 gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
7170 gen9_avc_pak_brc_prepare(struct encode_state *encode_state,
7171 struct intel_encoder_context *encoder_context)
7173 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7174 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
7175 unsigned int rate_control_mode = encoder_context->rate_control_mode;
7177 switch (rate_control_mode & 0x7f) {
7179 generic_state->internal_rate_mode = VA_RC_CBR;
7183 generic_state->internal_rate_mode = VA_RC_VBR;//AVBR
7188 generic_state->internal_rate_mode = VA_RC_CQP;
7192 if (encoder_context->quality_level == 0)
7193 encoder_context->quality_level = ENCODER_DEFAULT_QUALITY_AVC;
7197 gen9_avc_pak_pipeline_prepare(VADriverContextP ctx,
7198 struct encode_state *encode_state,
7199 struct intel_encoder_context *encoder_context)
7202 struct i965_driver_data *i965 = i965_driver_data(ctx);
7203 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7204 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
7205 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
7206 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
7207 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
7209 struct object_surface *obj_surface;
7210 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
7211 VAEncSliceParameterBufferH264 *slice_param = avc_state->slice_param[0];
7213 struct gen9_surface_avc *avc_priv_surface;
7214 int i, j, enable_avc_ildb = 0;
7215 unsigned int allocate_flag = 1;
7217 unsigned int w_mb = generic_state->frame_width_in_mbs;
7218 unsigned int h_mb = generic_state->frame_height_in_mbs;
7219 struct avc_surface_param surface_param;
7221 /* update the parameter and check slice parameter */
7222 for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
7223 assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
7224 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
7226 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
7227 assert((slice_param->slice_type == SLICE_TYPE_I) ||
7228 (slice_param->slice_type == SLICE_TYPE_SI) ||
7229 (slice_param->slice_type == SLICE_TYPE_P) ||
7230 (slice_param->slice_type == SLICE_TYPE_SP) ||
7231 (slice_param->slice_type == SLICE_TYPE_B));
7233 if (slice_param->disable_deblocking_filter_idc != 1) {
7234 enable_avc_ildb = 1;
7241 avc_state->enable_avc_ildb = enable_avc_ildb;
7243 /* setup the all surface and buffer for PAK */
7244 /* Setup current reconstruct frame */
7245 obj_surface = encode_state->reconstructed_object;
7246 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
7248 if (va_status != VA_STATUS_SUCCESS)
7251 memset(&surface_param, 0, sizeof(surface_param));
7252 surface_param.frame_width = generic_state->frame_width_in_pixel;
7253 surface_param.frame_height = generic_state->frame_height_in_pixel;
7254 va_status = gen9_avc_init_check_surfaces(ctx,
7255 obj_surface, encoder_context,
7257 if (va_status != VA_STATUS_SUCCESS)
7259 /* init the member of avc_priv_surface,frame_store_id,qp_value */
7261 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
7262 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = 0;
7263 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = 0;
7264 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2]);
7265 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1]);
7266 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2], avc_priv_surface->dmv_top);
7267 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1], avc_priv_surface->dmv_bottom);
7268 avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
7269 avc_priv_surface->frame_store_id = 0;
7270 avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
7271 avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
7272 avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
7273 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = avc_priv_surface->top_field_order_cnt;
7274 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = avc_priv_surface->top_field_order_cnt + 1;
7276 i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
7277 i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
7278 i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
7279 i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
7282 if (avc_state->enable_avc_ildb) {
7283 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_post_deblocking_output, obj_surface, GPE_RESOURCE_ALIGNMENT);
7285 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_pre_deblocking_output, obj_surface, GPE_RESOURCE_ALIGNMENT);
7287 /* input YUV surface */
7288 obj_surface = encode_state->input_yuv_object;
7289 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
7291 if (va_status != VA_STATUS_SUCCESS)
7293 i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
7294 i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
7296 /* Reference surfaces */
7297 for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
7298 i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
7299 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2]);
7300 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1]);
7301 obj_surface = encode_state->reference_objects[i];
7302 avc_state->top_field_poc[2 * i] = 0;
7303 avc_state->top_field_poc[2 * i + 1] = 0;
7305 if (obj_surface && obj_surface->bo) {
7306 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface, GPE_RESOURCE_ALIGNMENT);
7308 /* actually it should be handled when it is reconstructed surface */
7309 va_status = gen9_avc_init_check_surfaces(ctx,
7310 obj_surface, encoder_context,
7312 if (va_status != VA_STATUS_SUCCESS)
7314 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
7315 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2], avc_priv_surface->dmv_top);
7316 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1], avc_priv_surface->dmv_bottom);
7317 avc_priv_surface->frame_store_id = i;
7318 avc_state->top_field_poc[2 * i] = avc_priv_surface->top_field_order_cnt;
7319 avc_state->top_field_poc[2 * i + 1] = avc_priv_surface->top_field_order_cnt + 1;
7325 if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
7326 intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
7327 avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
7330 avc_ctx->pres_slice_batch_buffer_2nd_level =
7331 intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD,
7333 encode_state->num_slice_params_ext);
7334 if (!avc_ctx->pres_slice_batch_buffer_2nd_level)
7335 return VA_STATUS_ERROR_ALLOCATION_FAILED;
7337 for (i = 0; i < MAX_AVC_SLICE_NUM; i++) {
7338 avc_state->slice_batch_offset[i] = 0;
7343 i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
7344 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7345 &avc_ctx->res_intra_row_store_scratch_buffer,
7347 "PAK Intra row store scratch buffer");
7349 goto failed_allocation;
7351 size = w_mb * 4 * 64;
7352 i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
7353 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7354 &avc_ctx->res_deblocking_filter_row_store_scratch_buffer,
7356 "PAK Deblocking filter row store scratch buffer");
7358 goto failed_allocation;
7360 size = w_mb * 2 * 64;
7361 i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
7362 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7363 &avc_ctx->res_bsd_mpc_row_store_scratch_buffer,
7365 "PAK BSD/MPC row store scratch buffer");
7367 goto failed_allocation;
7369 size = w_mb * h_mb * 16;
7370 i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
7371 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
7372 &avc_ctx->res_pak_mb_status_buffer,
7374 "PAK MB status buffer");
7376 goto failed_allocation;
7378 return VA_STATUS_SUCCESS;
7381 return VA_STATUS_ERROR_ALLOCATION_FAILED;
7385 gen9_avc_encode_picture(VADriverContextP ctx,
7387 struct encode_state *encode_state,
7388 struct intel_encoder_context *encoder_context)
7391 struct i965_driver_data *i965 = i965_driver_data(ctx);
7392 struct i965_gpe_table *gpe = &i965->gpe_table;
7393 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7394 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
7395 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7396 struct intel_batchbuffer *batch = encoder_context->base.batch;
7398 va_status = gen9_avc_pak_pipeline_prepare(ctx, encode_state, encoder_context);
7400 if (va_status != VA_STATUS_SUCCESS)
7403 if (i965->intel.has_bsd2)
7404 intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
7406 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
7407 intel_batchbuffer_emit_mi_flush(batch);
7409 for (generic_state->curr_pak_pass = 0;
7410 generic_state->curr_pak_pass < generic_state->num_pak_passes;
7411 generic_state->curr_pak_pass++) {
7413 if (generic_state->curr_pak_pass == 0) {
7414 /* Initialize the avc Image Ctrl reg for the first pass,write 0 to staturs/control register, is it needed in AVC? */
7415 struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
7416 struct encoder_status_buffer_internal *status_buffer;
7418 status_buffer = &(avc_ctx->status_buffer);
7419 memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
7420 mi_load_reg_imm.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
7421 mi_load_reg_imm.data = 0;
7422 gpe->mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
7424 gen9_avc_pak_picture_level(ctx, encode_state, encoder_context);
7425 gen9_avc_pak_slice_level(ctx, encode_state, encoder_context);
7426 gen9_avc_read_mfc_status(ctx, encoder_context);
7430 if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
7431 intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
7432 avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
7435 intel_batchbuffer_end_atomic(batch);
7436 intel_batchbuffer_flush(batch);
7438 generic_state->seq_frame_number++;
7439 generic_state->total_frame_number++;
7440 generic_state->first_frame = 0;
7441 return VA_STATUS_SUCCESS;
7445 gen9_avc_pak_pipeline(VADriverContextP ctx,
7447 struct encode_state *encode_state,
7448 struct intel_encoder_context *encoder_context)
7453 case VAProfileH264ConstrainedBaseline:
7454 case VAProfileH264Main:
7455 case VAProfileH264High:
7456 case VAProfileH264MultiviewHigh:
7457 case VAProfileH264StereoHigh:
7458 vaStatus = gen9_avc_encode_picture(ctx, profile, encode_state, encoder_context);
7462 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
7470 gen9_avc_pak_context_destroy(void * context)
7472 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)context;
7473 struct generic_encoder_context * generic_ctx;
7474 struct i965_avc_encoder_context * avc_ctx;
7480 generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
7481 avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
7484 i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
7485 i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
7486 i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
7487 i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
7489 i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
7490 i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
7491 i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
7492 i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
7493 i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
7495 for (i = 0 ; i < MAX_MFC_AVC_REFERENCE_SURFACES; i++) {
7496 i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
7499 for (i = 0 ; i < NUM_MFC_AVC_DMV_BUFFERS; i++) {
7500 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i]);
7503 if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
7504 intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
7505 avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
7511 gen9_avc_get_coded_status(VADriverContextP ctx,
7512 struct intel_encoder_context *encoder_context,
7513 struct i965_coded_buffer_segment *coded_buf_seg)
7515 struct encoder_status *avc_encode_status;
7517 if (!encoder_context || !coded_buf_seg)
7518 return VA_STATUS_ERROR_INVALID_BUFFER;
7520 avc_encode_status = (struct encoder_status *)coded_buf_seg->codec_private_data;
7521 coded_buf_seg->base.size = avc_encode_status->bs_byte_count_frame;
7523 return VA_STATUS_SUCCESS;
7527 gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
7529 /* VME & PAK share the same context */
7530 struct i965_driver_data *i965 = i965_driver_data(ctx);
7531 struct encoder_vme_mfc_context * vme_context = NULL;
7532 struct generic_encoder_context * generic_ctx = NULL;
7533 struct i965_avc_encoder_context * avc_ctx = NULL;
7534 struct generic_enc_codec_state * generic_state = NULL;
7535 struct avc_enc_state * avc_state = NULL;
7536 struct encoder_status_buffer_internal *status_buffer;
7537 uint32_t base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
7539 vme_context = calloc(1, sizeof(struct encoder_vme_mfc_context));
7540 generic_ctx = calloc(1, sizeof(struct generic_encoder_context));
7541 avc_ctx = calloc(1, sizeof(struct i965_avc_encoder_context));
7542 generic_state = calloc(1, sizeof(struct generic_enc_codec_state));
7543 avc_state = calloc(1, sizeof(struct avc_enc_state));
7545 if (!vme_context || !generic_ctx || !avc_ctx || !generic_state || !avc_state)
7546 goto allocate_structure_failed;
7548 memset(vme_context, 0, sizeof(struct encoder_vme_mfc_context));
7549 memset(generic_ctx, 0, sizeof(struct generic_encoder_context));
7550 memset(avc_ctx, 0, sizeof(struct i965_avc_encoder_context));
7551 memset(generic_state, 0, sizeof(struct generic_enc_codec_state));
7552 memset(avc_state, 0, sizeof(struct avc_enc_state));
7554 encoder_context->vme_context = vme_context;
7555 vme_context->generic_enc_ctx = generic_ctx;
7556 vme_context->private_enc_ctx = avc_ctx;
7557 vme_context->generic_enc_state = generic_state;
7558 vme_context->private_enc_state = avc_state;
7560 if (IS_SKL(i965->intel.device_info) ||
7561 IS_BXT(i965->intel.device_info)) {
7562 generic_ctx->enc_kernel_ptr = (void *)skl_avc_encoder_kernels;
7563 generic_ctx->enc_kernel_size = sizeof(skl_avc_encoder_kernels);
7564 } else if (IS_KBL(i965->intel.device_info) ||
7565 IS_GLK(i965->intel.device_info)) {
7566 generic_ctx->enc_kernel_ptr = (void *)kbl_avc_encoder_kernels;
7567 generic_ctx->enc_kernel_size = sizeof(kbl_avc_encoder_kernels);
7569 goto allocate_structure_failed;
7571 /* initialize misc ? */
7573 generic_ctx->use_hw_scoreboard = 1;
7574 generic_ctx->use_hw_non_stalling_scoreboard = 1;
7576 /* initialize generic state */
7578 generic_state->kernel_mode = INTEL_ENC_KERNEL_NORMAL;
7579 generic_state->preset = INTEL_PRESET_RT_SPEED;
7580 generic_state->seq_frame_number = 0;
7581 generic_state->total_frame_number = 0;
7582 generic_state->frame_type = 0;
7583 generic_state->first_frame = 1;
7585 generic_state->frame_width_in_pixel = 0;
7586 generic_state->frame_height_in_pixel = 0;
7587 generic_state->frame_width_in_mbs = 0;
7588 generic_state->frame_height_in_mbs = 0;
7589 generic_state->frame_width_4x = 0;
7590 generic_state->frame_height_4x = 0;
7591 generic_state->frame_width_16x = 0;
7592 generic_state->frame_height_16x = 0;
7593 generic_state->frame_width_32x = 0;
7594 generic_state->downscaled_width_4x_in_mb = 0;
7595 generic_state->downscaled_height_4x_in_mb = 0;
7596 generic_state->downscaled_width_16x_in_mb = 0;
7597 generic_state->downscaled_height_16x_in_mb = 0;
7598 generic_state->downscaled_width_32x_in_mb = 0;
7599 generic_state->downscaled_height_32x_in_mb = 0;
7601 generic_state->hme_supported = 1;
7602 generic_state->b16xme_supported = 1;
7603 generic_state->b32xme_supported = 0;
7604 generic_state->hme_enabled = 0;
7605 generic_state->b16xme_enabled = 0;
7606 generic_state->b32xme_enabled = 0;
7607 generic_state->brc_distortion_buffer_supported = 1;
7608 generic_state->brc_constant_buffer_supported = 0;
7611 generic_state->frame_rate = 30;
7612 generic_state->brc_allocated = 0;
7613 generic_state->brc_inited = 0;
7614 generic_state->brc_need_reset = 0;
7615 generic_state->is_low_delay = 0;
7616 generic_state->brc_enabled = 0;//default
7617 generic_state->internal_rate_mode = 0;
7618 generic_state->curr_pak_pass = 0;
7619 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7620 generic_state->is_first_pass = 1;
7621 generic_state->is_last_pass = 0;
7622 generic_state->mb_brc_enabled = 0; // enable mb brc
7623 generic_state->brc_roi_enable = 0;
7624 generic_state->brc_dirty_roi_enable = 0;
7625 generic_state->skip_frame_enbale = 0;
7627 generic_state->target_bit_rate = 0;
7628 generic_state->max_bit_rate = 0;
7629 generic_state->min_bit_rate = 0;
7630 generic_state->init_vbv_buffer_fullness_in_bit = 0;
7631 generic_state->vbv_buffer_size_in_bit = 0;
7632 generic_state->frames_per_100s = 0;
7633 generic_state->gop_size = 0;
7634 generic_state->gop_ref_distance = 0;
7635 generic_state->brc_target_size = 0;
7636 generic_state->brc_mode = 0;
7637 generic_state->brc_init_current_target_buf_full_in_bits = 0.0;
7638 generic_state->brc_init_reset_input_bits_per_frame = 0.0;
7639 generic_state->brc_init_reset_buf_size_in_bits = 0;
7640 generic_state->brc_init_previous_target_buf_full_in_bits = 0;
7641 generic_state->frames_per_window_size = 0;//default
7642 generic_state->target_percentage = 0;
7644 generic_state->avbr_curracy = 0;
7645 generic_state->avbr_convergence = 0;
7647 generic_state->num_skip_frames = 0;
7648 generic_state->size_skip_frames = 0;
7650 generic_state->num_roi = 0;
7651 generic_state->max_delta_qp = 0;
7652 generic_state->min_delta_qp = 0;
7654 if (encoder_context->rate_control_mode != VA_RC_NONE &&
7655 encoder_context->rate_control_mode != VA_RC_CQP) {
7656 generic_state->brc_enabled = 1;
7657 generic_state->brc_distortion_buffer_supported = 1;
7658 generic_state->brc_constant_buffer_supported = 1;
7659 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7661 /*avc state initialization */
7662 avc_state->mad_enable = 0;
7663 avc_state->mb_disable_skip_map_enable = 0;
7664 avc_state->sfd_enable = 1;//default
7665 avc_state->sfd_mb_enable = 1;//set it true
7666 avc_state->adaptive_search_window_enable = 1;//default
7667 avc_state->mb_qp_data_enable = 0;
7668 avc_state->intra_refresh_i_enable = 0;
7669 avc_state->min_max_qp_enable = 0;
7670 avc_state->skip_bias_adjustment_enable = 0;//default,same as skip_bias_adjustment_supporte? no
7673 avc_state->non_ftq_skip_threshold_lut_input_enable = 0;
7674 avc_state->ftq_skip_threshold_lut_input_enable = 0;
7675 avc_state->ftq_override = 0;
7677 avc_state->direct_bias_adjustment_enable = 0;
7678 avc_state->global_motion_bias_adjustment_enable = 0;
7679 avc_state->disable_sub_mb_partion = 0;
7680 avc_state->arbitrary_num_mbs_in_slice = 0;
7681 avc_state->adaptive_transform_decision_enable = 0;//default
7682 avc_state->skip_check_disable = 0;
7683 avc_state->tq_enable = 0;
7684 avc_state->enable_avc_ildb = 0;
7685 avc_state->mbaff_flag = 0;
7686 avc_state->enable_force_skip = 1;//default
7687 avc_state->rc_panic_enable = 1;//default
7688 avc_state->suppress_recon_enable = 1;//default
7690 avc_state->ref_pic_select_list_supported = 1;
7691 avc_state->mb_brc_supported = 1;//?,default
7692 avc_state->multi_pre_enable = 1;//default
7693 avc_state->ftq_enable = 1;//default
7694 avc_state->caf_supported = 1; //default
7695 avc_state->caf_enable = 0;
7696 avc_state->caf_disable_hd = 1;//default
7697 avc_state->skip_bias_adjustment_supported = 1;//default
7699 avc_state->adaptive_intra_scaling_enable = 1;//default
7700 avc_state->old_mode_cost_enable = 0;//default
7701 avc_state->multi_ref_qp_enable = 1;//default
7702 avc_state->weighted_ref_l0_enable = 1;//default
7703 avc_state->weighted_ref_l1_enable = 1;//default
7704 avc_state->weighted_prediction_supported = 0;
7705 avc_state->brc_split_enable = 0;
7706 avc_state->slice_level_report_supported = 0;
7708 avc_state->fbr_bypass_enable = 1;//default
7709 avc_state->field_scaling_output_interleaved = 0;
7710 avc_state->mb_variance_output_enable = 0;
7711 avc_state->mb_pixel_average_output_enable = 0;
7712 avc_state->rolling_intra_refresh_enable = 0;// same as intra_refresh_i_enable?
7713 avc_state->mbenc_curbe_set_in_brc_update = 0;
7714 avc_state->rounding_inter_enable = 1; //default
7715 avc_state->adaptive_rounding_inter_enable = 1;//default
7717 avc_state->mbenc_i_frame_dist_in_use = 0;
7718 avc_state->mb_status_supported = 1; //set in intialization for gen9
7719 avc_state->mb_status_enable = 0;
7720 avc_state->mb_vproc_stats_enable = 0;
7721 avc_state->flatness_check_enable = 0;
7722 avc_state->flatness_check_supported = 1;//default
7723 avc_state->block_based_skip_enable = 0;
7724 avc_state->use_widi_mbenc_kernel = 0;
7725 avc_state->kernel_trellis_enable = 0;
7726 avc_state->generic_reserved = 0;
7728 avc_state->rounding_value = 0;
7729 avc_state->rounding_inter_p = AVC_INVALID_ROUNDING_VALUE;//default
7730 avc_state->rounding_inter_b = AVC_INVALID_ROUNDING_VALUE; //default
7731 avc_state->rounding_inter_b_ref = AVC_INVALID_ROUNDING_VALUE; //default
7732 avc_state->min_qp_i = INTEL_AVC_MIN_QP;
7733 avc_state->min_qp_p = INTEL_AVC_MIN_QP;
7734 avc_state->min_qp_b = INTEL_AVC_MIN_QP;
7735 avc_state->max_qp_i = INTEL_AVC_MAX_QP;
7736 avc_state->max_qp_p = INTEL_AVC_MAX_QP;
7737 avc_state->max_qp_b = INTEL_AVC_MAX_QP;
7739 memset(avc_state->non_ftq_skip_threshold_lut, 0, AVC_QP_MAX * sizeof(uint8_t));
7740 memset(avc_state->ftq_skip_threshold_lut, 0, AVC_QP_MAX * sizeof(uint8_t));
7741 memset(avc_state->lamda_value_lut, 0, AVC_QP_MAX * 2 * sizeof(uint32_t));
7743 avc_state->intra_refresh_qp_threshold = 0;
7744 avc_state->trellis_flag = 0;
7745 avc_state->hme_mv_cost_scaling_factor = 0;
7746 avc_state->slice_height = 1;
7747 avc_state->slice_num = 1;
7748 memset(avc_state->dist_scale_factor_list0, 0, 32 * sizeof(uint32_t));
7749 avc_state->bi_weight = 0;
7751 avc_state->lambda_table_enable = 0;
7754 if (IS_SKL(i965->intel.device_info) ||
7755 IS_BXT(i965->intel.device_info)) {
7756 avc_state->brc_const_data_surface_width = 64;
7757 avc_state->brc_const_data_surface_height = 44;
7758 } else if (IS_KBL(i965->intel.device_info) ||
7759 IS_GLK(i965->intel.device_info)) {
7760 avc_state->brc_const_data_surface_width = 64;
7761 avc_state->brc_const_data_surface_height = 53;
7763 avc_state->decouple_mbenc_curbe_from_brc_enable = 1;
7764 avc_state->extended_mv_cost_range_enable = 0;
7765 avc_state->reserved_g95 = 0;
7766 avc_state->mbenc_brc_buffer_size = 128;
7767 avc_state->kernel_trellis_enable = 1;
7768 avc_state->lambda_table_enable = 1;
7769 avc_state->brc_split_enable = 1;
7772 avc_state->num_refs[0] = 0;
7773 avc_state->num_refs[1] = 0;
7774 memset(avc_state->list_ref_idx, 0, 32 * 2 * sizeof(uint32_t));
7775 memset(avc_state->top_field_poc, 0, NUM_MFC_AVC_DMV_BUFFERS * sizeof(int32_t));
7776 avc_state->tq_rounding = 0;
7777 avc_state->zero_mv_threshold = 0;
7778 avc_state->slice_second_levle_batch_buffer_in_use = 0;
7782 /* the definition of status buffer offset for Encoder */
7784 status_buffer = &avc_ctx->status_buffer;
7785 memset(status_buffer, 0, sizeof(struct encoder_status_buffer_internal));
7787 status_buffer->base_offset = base_offset;
7788 status_buffer->bs_byte_count_frame_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame);
7789 status_buffer->bs_byte_count_frame_nh_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame_nh);
7790 status_buffer->image_status_mask_offset = base_offset + offsetof(struct encoder_status, image_status_mask);
7791 status_buffer->image_status_ctrl_offset = base_offset + offsetof(struct encoder_status, image_status_ctrl);
7792 status_buffer->mfc_qp_status_count_offset = base_offset + offsetof(struct encoder_status, mfc_qp_status_count);
7793 status_buffer->media_index_offset = base_offset + offsetof(struct encoder_status, media_index);
7795 status_buffer->status_buffer_size = sizeof(struct encoder_status);
7796 status_buffer->bs_byte_count_frame_reg_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG;
7797 status_buffer->bs_byte_count_frame_nh_reg_offset = MFC_BITSTREAM_BYTECOUNT_SLICE_REG;
7798 status_buffer->image_status_mask_reg_offset = MFC_IMAGE_STATUS_MASK_REG;
7799 status_buffer->image_status_ctrl_reg_offset = MFC_IMAGE_STATUS_CTRL_REG;
7800 status_buffer->mfc_qp_status_count_reg_offset = MFC_QP_STATUS_COUNT_REG;
7802 gen9_avc_kernel_init(ctx, encoder_context);
7803 encoder_context->vme_context = vme_context;
7804 encoder_context->vme_pipeline = gen9_avc_vme_pipeline;
7805 encoder_context->vme_context_destroy = gen9_avc_vme_context_destroy;
7809 allocate_structure_failed:
7814 free(generic_state);
7820 gen9_avc_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
7822 /* VME & PAK share the same context */
7823 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7828 encoder_context->mfc_context = pak_context;
7829 encoder_context->mfc_context_destroy = gen9_avc_pak_context_destroy;
7830 encoder_context->mfc_pipeline = gen9_avc_pak_pipeline;
7831 encoder_context->mfc_brc_prepare = gen9_avc_pak_brc_prepare;
7832 encoder_context->get_status = gen9_avc_get_coded_status;