2 * Copyright @ 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWAR OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Pengfei Qu <Pengfei.qu@intel.com>
26 * Sreerenj Balachandran <sreerenj.balachandran@intel.com>
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
41 #include "i965_defines.h"
42 #include "i965_structs.h"
43 #include "i965_drv_video.h"
44 #include "i965_encoder.h"
45 #include "i965_encoder_utils.h"
46 #include "intel_media.h"
48 #include "i965_gpe_utils.h"
49 #include "i965_encoder_common.h"
50 #include "i965_avc_encoder_common.h"
51 #include "i965_avc_encoder_kernels.h"
52 #include "i965_avc_encoder.h"
53 #include "i965_avc_const_def.h"
55 #define MAX_URB_SIZE 4096 /* In register */
56 #define NUM_KERNELS_PER_GPE_CONTEXT 1
57 #define MBENC_KERNEL_BASE GEN9_AVC_KERNEL_MBENC_QUALITY_I
58 #define GPE_RESOURCE_ALIGNMENT 4 /* 4 means 16 = 1 << 4) */
60 #define OUT_BUFFER_2DW(batch, bo, is_target, delta) do { \
62 OUT_BCS_RELOC64(batch, \
64 I915_GEM_DOMAIN_INSTRUCTION, \
65 is_target ? I915_GEM_DOMAIN_RENDER : 0, \
68 OUT_BCS_BATCH(batch, 0); \
69 OUT_BCS_BATCH(batch, 0); \
73 #define OUT_BUFFER_3DW(batch, bo, is_target, delta, attr) do { \
74 OUT_BUFFER_2DW(batch, bo, is_target, delta); \
75 OUT_BCS_BATCH(batch, attr); \
78 /* FEI specific buffer sizes per MB in bytes for gen9 */
79 #define FEI_AVC_MB_CODE_BUFFER_SIZE 64
80 #define FEI_AVC_MV_DATA_BUFFER_SIZE 128
81 #define FEI_AVC_MB_CONTROL_BUFFER_SIZE 16
82 #define FEI_AVC_MV_PREDICTOR_BUFFER_SIZE 40
83 #define FEI_AVC_DISTORTION_BUFFER_SIZE 48
84 #define FEI_AVC_QP_BUFFER_SIZE 1
85 #define PREENC_AVC_STATISTICS_BUFFER_SIZE 64
87 #define SCALE_CUR_PIC 1
88 #define SCALE_PAST_REF_PIC 2
89 #define SCALE_FUTURE_REF_PIC 3
91 static const uint32_t qm_flat[16] = {
92 0x10101010, 0x10101010, 0x10101010, 0x10101010,
93 0x10101010, 0x10101010, 0x10101010, 0x10101010,
94 0x10101010, 0x10101010, 0x10101010, 0x10101010,
95 0x10101010, 0x10101010, 0x10101010, 0x10101010
98 static const uint32_t fqm_flat[32] = {
99 0x10001000, 0x10001000, 0x10001000, 0x10001000,
100 0x10001000, 0x10001000, 0x10001000, 0x10001000,
101 0x10001000, 0x10001000, 0x10001000, 0x10001000,
102 0x10001000, 0x10001000, 0x10001000, 0x10001000,
103 0x10001000, 0x10001000, 0x10001000, 0x10001000,
104 0x10001000, 0x10001000, 0x10001000, 0x10001000,
105 0x10001000, 0x10001000, 0x10001000, 0x10001000,
106 0x10001000, 0x10001000, 0x10001000, 0x10001000
109 static const unsigned int slice_type_kernel[3] = {1, 2, 0};
111 static const gen9_avc_brc_init_reset_curbe_data gen9_avc_brc_init_reset_curbe_init_data = {
268 static const gen8_avc_frame_brc_update_curbe_data gen8_avc_frame_brc_update_curbe_init_data = {
400 static const gen9_avc_frame_brc_update_curbe_data gen9_avc_frame_brc_update_curbe_init_data = {
558 gen9_avc_update_misc_parameters(VADriverContextP ctx,
559 struct encode_state *encode_state,
560 struct intel_encoder_context *encoder_context)
562 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
563 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
567 generic_state->max_bit_rate = (encoder_context->brc.bits_per_second[0] + 1000 - 1) / 1000;
569 generic_state->brc_need_reset = encoder_context->brc.need_reset;
571 if (generic_state->internal_rate_mode == VA_RC_CBR) {
572 generic_state->min_bit_rate = generic_state->max_bit_rate;
573 generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0] == 1;
575 if (generic_state->target_bit_rate != generic_state->max_bit_rate) {
576 generic_state->target_bit_rate = generic_state->max_bit_rate;
577 generic_state->brc_need_reset = 1;
579 } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
580 generic_state->min_bit_rate = generic_state->max_bit_rate * (2 * encoder_context->brc.target_percentage[0] - 100) / 100;
581 generic_state->mb_brc_enabled = encoder_context->brc.mb_rate_control[0] == 1;
583 if (generic_state->target_bit_rate != generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100) {
584 generic_state->target_bit_rate = generic_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
585 generic_state->brc_need_reset = 1;
590 if (generic_state->internal_rate_mode != VA_RC_CQP) {
591 generic_state->frames_per_100s = encoder_context->brc.framerate[0].num * 100 / encoder_context->brc.framerate[0].den ;
592 generic_state->frame_rate = encoder_context->brc.framerate[0].num / encoder_context->brc.framerate[0].den ;
593 generic_state->frames_per_window_size = (int)(encoder_context->brc.window_size * generic_state->frame_rate / 1000); // brc.windows size in ms as the unit
595 generic_state->frames_per_100s = 30 * 100;
596 generic_state->frame_rate = 30 ;
597 generic_state->frames_per_window_size = 30;
601 if (generic_state->internal_rate_mode != VA_RC_CQP) {
602 generic_state->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;//misc->buffer_size;
603 generic_state->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;//misc->initial_buffer_fullness;
607 generic_state->num_roi = MIN(encoder_context->brc.num_roi, 3);
608 if (generic_state->num_roi > 0) {
609 generic_state->max_delta_qp = encoder_context->brc.roi_max_delta_qp;
610 generic_state->min_delta_qp = encoder_context->brc.roi_min_delta_qp;
612 for (i = 0; i < generic_state->num_roi; i++) {
613 generic_state->roi[i].left = encoder_context->brc.roi[i].left;
614 generic_state->roi[i].right = encoder_context->brc.roi[i].right;
615 generic_state->roi[i].top = encoder_context->brc.roi[i].top;
616 generic_state->roi[i].bottom = encoder_context->brc.roi[i].bottom;
617 generic_state->roi[i].value = encoder_context->brc.roi[i].value;
619 generic_state->roi[i].left /= 16;
620 generic_state->roi[i].right /= 16;
621 generic_state->roi[i].top /= 16;
622 generic_state->roi[i].bottom /= 16;
629 intel_avc_get_kernel_header_and_size(void *pvbinary,
631 INTEL_GENERIC_ENC_OPERATION operation,
633 struct i965_kernel *ret_kernel)
635 typedef uint32_t BIN_PTR[4];
638 gen9_avc_encoder_kernel_header *pkh_table;
639 kernel_header *pcurr_header, *pinvalid_entry, *pnext_header;
642 if (!pvbinary || !ret_kernel)
645 bin_start = (char *)pvbinary;
646 pkh_table = (gen9_avc_encoder_kernel_header *)pvbinary;
647 pinvalid_entry = &(pkh_table->static_detection) + 1;
648 next_krnoffset = binary_size;
650 if (operation == INTEL_GENERIC_ENC_SCALING4X) {
651 pcurr_header = &pkh_table->ply_dscale_ply;
652 } else if (operation == INTEL_GENERIC_ENC_SCALING2X) {
653 pcurr_header = &pkh_table->ply_2xdscale_ply;
654 } else if (operation == INTEL_GENERIC_ENC_ME) {
655 pcurr_header = &pkh_table->me_p;
656 } else if (operation == INTEL_GENERIC_ENC_BRC) {
657 pcurr_header = &pkh_table->frame_brc_init;
658 } else if (operation == INTEL_GENERIC_ENC_MBENC) {
659 pcurr_header = &pkh_table->mbenc_quality_I;
660 } else if (operation == INTEL_GENERIC_ENC_WP) {
661 pcurr_header = &pkh_table->wp;
662 } else if (operation == INTEL_GENERIC_ENC_SFD) {
663 pcurr_header = &pkh_table->static_detection;
668 pcurr_header += krnstate_idx;
669 ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
671 pnext_header = (pcurr_header + 1);
672 if (pnext_header < pinvalid_entry) {
673 next_krnoffset = pnext_header->kernel_start_pointer << 6;
675 ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
681 intel_avc_fei_get_kernel_header_and_size(
684 INTEL_GENERIC_ENC_OPERATION operation,
686 struct i965_kernel *ret_kernel)
688 typedef uint32_t BIN_PTR[4];
691 gen9_avc_fei_encoder_kernel_header *pkh_table;
692 kernel_header *pcurr_header, *pinvalid_entry, *pnext_header;
695 if (!pvbinary || !ret_kernel)
698 bin_start = (char *)pvbinary;
699 pkh_table = (gen9_avc_fei_encoder_kernel_header *)pvbinary;
700 pinvalid_entry = &(pkh_table->wp) + 1;
701 next_krnoffset = binary_size;
703 if (operation == INTEL_GENERIC_ENC_SCALING4X) {
704 pcurr_header = &pkh_table->ply_dscale_ply;
705 } else if (operation == INTEL_GENERIC_ENC_ME) {
706 pcurr_header = &pkh_table->me_p;
707 } else if (operation == INTEL_GENERIC_ENC_MBENC) {
708 pcurr_header = &pkh_table->mbenc_i;
709 } else if (operation == INTEL_GENERIC_ENC_PREPROC) {
710 pcurr_header = &pkh_table->preproc;
715 pcurr_header += krnstate_idx;
716 ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
718 pnext_header = (pcurr_header + 1);
719 if (pnext_header < pinvalid_entry) {
720 next_krnoffset = pnext_header->kernel_start_pointer << 6;
722 ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
728 gen9_free_surfaces_avc(void **data)
730 struct gen9_surface_avc *avc_surface;
737 if (avc_surface->scaled_4x_surface_obj) {
738 i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_4x_surface_id, 1);
739 avc_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
740 avc_surface->scaled_4x_surface_obj = NULL;
743 if (avc_surface->scaled_16x_surface_obj) {
744 i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_16x_surface_id, 1);
745 avc_surface->scaled_16x_surface_id = VA_INVALID_SURFACE;
746 avc_surface->scaled_16x_surface_obj = NULL;
749 if (avc_surface->scaled_32x_surface_obj) {
750 i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_32x_surface_id, 1);
751 avc_surface->scaled_32x_surface_id = VA_INVALID_SURFACE;
752 avc_surface->scaled_32x_surface_obj = NULL;
755 i965_free_gpe_resource(&avc_surface->res_mb_code_surface);
756 i965_free_gpe_resource(&avc_surface->res_mv_data_surface);
757 i965_free_gpe_resource(&avc_surface->res_ref_pic_select_surface);
759 /* FEI specific resources */
760 /* since the driver previously taken an extra reference to the drm_bo
761 * in case the buffers were supplied by middleware, there shouldn't
762 * be any memory handling issue */
763 i965_free_gpe_resource(&avc_surface->res_fei_mb_cntrl_surface);
764 i965_free_gpe_resource(&avc_surface->res_fei_mv_predictor_surface);
765 i965_free_gpe_resource(&avc_surface->res_fei_vme_distortion_surface);
766 i965_free_gpe_resource(&avc_surface->res_fei_mb_qp_surface);
768 dri_bo_unreference(avc_surface->dmv_top);
769 avc_surface->dmv_top = NULL;
770 dri_bo_unreference(avc_surface->dmv_bottom);
771 avc_surface->dmv_bottom = NULL;
781 gen9_avc_init_check_surfaces(VADriverContextP ctx,
782 struct object_surface *obj_surface,
783 struct intel_encoder_context *encoder_context,
784 struct avc_surface_param *surface_param)
786 struct i965_driver_data *i965 = i965_driver_data(ctx);
787 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
788 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
789 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
791 struct gen9_surface_avc *avc_surface;
792 int downscaled_width_4x, downscaled_height_4x;
793 int downscaled_width_16x, downscaled_height_16x;
794 int downscaled_width_32x, downscaled_height_32x;
796 unsigned int frame_width_in_mbs = ALIGN(surface_param->frame_width, 16) / 16;
797 unsigned int frame_height_in_mbs = ALIGN(surface_param->frame_height, 16) / 16;
798 unsigned int frame_mb_nums = frame_width_in_mbs * frame_height_in_mbs;
799 int allocate_flag = 1;
802 if (!obj_surface || !obj_surface->bo)
803 return VA_STATUS_ERROR_INVALID_SURFACE;
805 if (obj_surface->private_data) {
806 return VA_STATUS_SUCCESS;
809 avc_surface = calloc(1, sizeof(struct gen9_surface_avc));
812 return VA_STATUS_ERROR_ALLOCATION_FAILED;
814 avc_surface->ctx = ctx;
815 obj_surface->private_data = avc_surface;
816 obj_surface->free_private_data = gen9_free_surfaces_avc;
818 downscaled_width_4x = generic_state->frame_width_4x;
819 downscaled_height_4x = generic_state->frame_height_4x;
821 i965_CreateSurfaces(ctx,
823 downscaled_height_4x,
826 &avc_surface->scaled_4x_surface_id);
828 avc_surface->scaled_4x_surface_obj = SURFACE(avc_surface->scaled_4x_surface_id);
830 if (!avc_surface->scaled_4x_surface_obj) {
831 return VA_STATUS_ERROR_ALLOCATION_FAILED;
834 i965_check_alloc_surface_bo(ctx, avc_surface->scaled_4x_surface_obj, 1,
835 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
837 downscaled_width_16x = generic_state->frame_width_16x;
838 downscaled_height_16x = generic_state->frame_height_16x;
839 i965_CreateSurfaces(ctx,
840 downscaled_width_16x,
841 downscaled_height_16x,
844 &avc_surface->scaled_16x_surface_id);
845 avc_surface->scaled_16x_surface_obj = SURFACE(avc_surface->scaled_16x_surface_id);
847 if (!avc_surface->scaled_16x_surface_obj) {
848 return VA_STATUS_ERROR_ALLOCATION_FAILED;
851 i965_check_alloc_surface_bo(ctx, avc_surface->scaled_16x_surface_obj, 1,
852 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
854 if (generic_state->b32xme_supported ||
855 generic_state->b32xme_enabled) {
856 downscaled_width_32x = generic_state->frame_width_32x;
857 downscaled_height_32x = generic_state->frame_height_32x;
858 i965_CreateSurfaces(ctx,
859 downscaled_width_32x,
860 downscaled_height_32x,
863 &avc_surface->scaled_32x_surface_id);
864 avc_surface->scaled_32x_surface_obj = SURFACE(avc_surface->scaled_32x_surface_id);
866 if (!avc_surface->scaled_32x_surface_obj) {
867 return VA_STATUS_ERROR_ALLOCATION_FAILED;
870 i965_check_alloc_surface_bo(ctx, avc_surface->scaled_32x_surface_obj, 1,
871 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
874 /*mb code and mv data for each frame*/
875 if (!encoder_context->fei_enabled) {
876 size = frame_mb_nums * 16 * 4;
877 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
878 &avc_surface->res_mb_code_surface,
882 goto failed_allocation;
884 size = frame_mb_nums * 32 * 4;
885 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
886 &avc_surface->res_mv_data_surface,
890 goto failed_allocation;
894 if (avc_state->ref_pic_select_list_supported) {
895 width = ALIGN(frame_width_in_mbs * 8, 64);
896 height = frame_height_in_mbs ;
897 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
898 &avc_surface->res_ref_pic_select_surface,
901 "Ref pic select list buffer");
903 goto failed_allocation;
907 avc_surface->dmv_top =
908 dri_bo_alloc(i965->intel.bufmgr,
909 "direct mv top Buffer",
912 avc_surface->dmv_bottom =
913 dri_bo_alloc(i965->intel.bufmgr,
914 "direct mv bottom Buffer",
917 assert(avc_surface->dmv_top);
918 assert(avc_surface->dmv_bottom);
920 return VA_STATUS_SUCCESS;
923 return VA_STATUS_ERROR_ALLOCATION_FAILED;
927 gen9_avc_generate_slice_map(VADriverContextP ctx,
928 struct encode_state *encode_state,
929 struct intel_encoder_context *encoder_context)
931 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
932 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
933 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
934 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
936 struct i965_gpe_resource *gpe_resource = NULL;
937 VAEncSliceParameterBufferH264 * slice_param = NULL;
938 unsigned int * data = NULL;
939 unsigned int * data_row = NULL;
941 unsigned int pitch = ALIGN((generic_state->frame_width_in_mbs + 1) * 4, 64) / 4;
943 if (!avc_state->arbitrary_num_mbs_in_slice)
946 gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
947 assert(gpe_resource);
949 i965_zero_gpe_resource(gpe_resource);
951 data_row = (unsigned int *)i965_map_gpe_resource(gpe_resource);
955 for (i = 0; i < avc_state->slice_num; i++) {
956 slice_param = avc_state->slice_param[i];
957 for (j = 0; j < slice_param->num_macroblocks; j++) {
959 if ((count > 0) && (count % generic_state->frame_width_in_mbs == 0)) {
967 *data++ = 0xFFFFFFFF;
969 i965_unmap_gpe_resource(gpe_resource);
973 gen9_avc_allocate_resources(VADriverContextP ctx,
974 struct encode_state *encode_state,
975 struct intel_encoder_context *encoder_context)
977 struct i965_driver_data *i965 = i965_driver_data(ctx);
978 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
979 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
980 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
981 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
982 unsigned int size = 0;
983 unsigned int width = 0;
984 unsigned int height = 0;
985 unsigned char * data = NULL;
986 int allocate_flag = 1;
989 /*all the surface/buffer are allocated here*/
991 /*second level batch buffer for image state write when cqp etc*/
992 i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
993 size = INTEL_AVC_IMAGE_STATE_CMD_SIZE ;
994 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
995 &avc_ctx->res_image_state_batch_buffer_2nd_level,
997 "second levle batch (image state write) buffer");
999 goto failed_allocation;
1001 /* scaling related surface */
1002 if (avc_state->mb_status_supported) {
1003 i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
1004 size = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * 16 * 4 + 1023) & ~0x3ff;
1005 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1006 &avc_ctx->res_mb_status_buffer,
1007 ALIGN(size, 0x1000),
1008 "MB statistics output buffer");
1010 goto failed_allocation;
1011 i965_zero_gpe_resource(&avc_ctx->res_mb_status_buffer);
1014 if (avc_state->flatness_check_supported) {
1015 width = generic_state->frame_width_in_mbs * 4;
1016 height = generic_state->frame_height_in_mbs * 4;
1017 i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
1018 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1019 &avc_ctx->res_flatness_check_surface,
1022 "Flatness check buffer");
1024 goto failed_allocation;
1026 /* me related surface */
1027 width = generic_state->downscaled_width_4x_in_mb * 8;
1028 height = generic_state->downscaled_height_4x_in_mb * 4 * 10;
1029 i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1030 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1031 &avc_ctx->s4x_memv_distortion_buffer,
1034 "4x MEMV distortion buffer");
1036 goto failed_allocation;
1037 i965_zero_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1039 width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
1040 height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
1041 i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1042 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1043 &avc_ctx->s4x_memv_min_distortion_brc_buffer,
1046 "4x MEMV min distortion brc buffer");
1048 goto failed_allocation;
1049 i965_zero_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1052 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 32, 64);
1053 height = generic_state->downscaled_height_4x_in_mb * 4 * 2 * 10;
1054 i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1055 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1056 &avc_ctx->s4x_memv_data_buffer,
1059 "4x MEMV data buffer");
1061 goto failed_allocation;
1062 i965_zero_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1065 width = ALIGN(generic_state->downscaled_width_16x_in_mb * 32, 64);
1066 height = generic_state->downscaled_height_16x_in_mb * 4 * 2 * 10 ;
1067 i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1068 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1069 &avc_ctx->s16x_memv_data_buffer,
1072 "16x MEMV data buffer");
1074 goto failed_allocation;
1075 i965_zero_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1078 width = ALIGN(generic_state->downscaled_width_32x_in_mb * 32, 64);
1079 height = generic_state->downscaled_height_32x_in_mb * 4 * 2 * 10 ;
1080 i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1081 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1082 &avc_ctx->s32x_memv_data_buffer,
1085 "32x MEMV data buffer");
1087 goto failed_allocation;
1088 i965_zero_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1091 if (!generic_state->brc_allocated) {
1092 /*brc related surface */
1093 i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
1095 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1096 &avc_ctx->res_brc_history_buffer,
1097 ALIGN(size, 0x1000),
1098 "brc history buffer");
1100 goto failed_allocation;
1102 i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
1104 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1105 &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
1106 ALIGN(size, 0x1000),
1107 "brc pak statistic buffer");
1109 goto failed_allocation;
1111 i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
1112 size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
1113 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1114 &avc_ctx->res_brc_image_state_read_buffer,
1115 ALIGN(size, 0x1000),
1116 "brc image state read buffer");
1118 goto failed_allocation;
1120 i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
1121 size = INTEL_AVC_IMAGE_STATE_CMD_SIZE * 7;
1122 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1123 &avc_ctx->res_brc_image_state_write_buffer,
1124 ALIGN(size, 0x1000),
1125 "brc image state write buffer");
1127 goto failed_allocation;
1129 width = ALIGN(avc_state->brc_const_data_surface_width, 64);
1130 height = avc_state->brc_const_data_surface_height;
1131 i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1132 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1133 &avc_ctx->res_brc_const_data_buffer,
1136 "brc const data buffer");
1138 goto failed_allocation;
1139 i965_zero_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1141 if (generic_state->brc_distortion_buffer_supported) {
1142 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 8, 64);
1143 height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1144 width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
1145 height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
1146 i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1147 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1148 &avc_ctx->res_brc_dist_data_surface,
1151 "brc dist data buffer");
1153 goto failed_allocation;
1154 i965_zero_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1157 if (generic_state->brc_roi_enable) {
1158 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 16, 64);
1159 height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1160 i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1161 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1162 &avc_ctx->res_mbbrc_roi_surface,
1165 "mbbrc roi buffer");
1167 goto failed_allocation;
1168 i965_zero_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1172 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4, 64);
1173 height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1174 i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
1175 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1176 &avc_ctx->res_mbbrc_mb_qp_data_surface,
1179 "mbbrc mb qp buffer");
1181 goto failed_allocation;
1183 i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
1184 size = 16 * AVC_QP_MAX * 4;
1185 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1186 &avc_ctx->res_mbbrc_const_data_buffer,
1187 ALIGN(size, 0x1000),
1188 "mbbrc const data buffer");
1190 goto failed_allocation;
1192 if (avc_state->decouple_mbenc_curbe_from_brc_enable) {
1193 i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1194 size = avc_state->mbenc_brc_buffer_size;
1195 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1196 &avc_ctx->res_mbenc_brc_buffer,
1197 ALIGN(size, 0x1000),
1198 "mbenc brc buffer");
1200 goto failed_allocation;
1201 i965_zero_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1203 generic_state->brc_allocated = 1;
1207 if (avc_state->mb_qp_data_enable) {
1208 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 4, 64);
1209 height = ALIGN(generic_state->downscaled_height_4x_in_mb * 4, 8);
1210 i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
1211 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1212 &avc_ctx->res_mb_qp_data_surface,
1215 "external mb qp buffer");
1217 goto failed_allocation;
1220 /* mbenc related surface. it share most of surface with other kernels */
1221 if (avc_state->arbitrary_num_mbs_in_slice) {
1222 width = ALIGN((generic_state->frame_width_in_mbs + 1) * 4, 64);
1223 height = generic_state->frame_height_in_mbs ;
1224 i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1225 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1226 &avc_ctx->res_mbenc_slice_map_surface,
1229 "slice map buffer");
1231 goto failed_allocation;
1232 i965_zero_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1234 /*generate slice map,default one slice per frame.*/
1237 /* sfd related surface */
1238 if (avc_state->sfd_enable) {
1239 i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1241 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1242 &avc_ctx->res_sfd_output_buffer,
1244 "sfd output buffer");
1246 goto failed_allocation;
1247 i965_zero_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1249 i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1250 size = ALIGN(52, 64);
1251 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1252 &avc_ctx->res_sfd_cost_table_p_frame_buffer,
1254 "sfd P frame cost table buffer");
1256 goto failed_allocation;
1257 data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1259 memcpy(data, gen9_avc_sfd_cost_table_p_frame, sizeof(unsigned char) * 52);
1260 i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_p_frame_buffer));
1262 i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1263 size = ALIGN(52, 64);
1264 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1265 &avc_ctx->res_sfd_cost_table_b_frame_buffer,
1267 "sfd B frame cost table buffer");
1269 goto failed_allocation;
1270 data = i965_map_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1272 memcpy(data, gen9_avc_sfd_cost_table_b_frame, sizeof(unsigned char) * 52);
1273 i965_unmap_gpe_resource(&(avc_ctx->res_sfd_cost_table_b_frame_buffer));
1276 /* wp related surfaces */
1277 if (avc_state->weighted_prediction_supported) {
1278 for (i = 0; i < 2 ; i++) {
1279 if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1283 width = generic_state->frame_width_in_pixel;
1284 height = generic_state->frame_height_in_pixel ;
1285 i965_CreateSurfaces(ctx,
1288 VA_RT_FORMAT_YUV420,
1290 &avc_ctx->wp_output_pic_select_surface_id[i]);
1291 avc_ctx->wp_output_pic_select_surface_obj[i] = SURFACE(avc_ctx->wp_output_pic_select_surface_id[i]);
1293 if (!avc_ctx->wp_output_pic_select_surface_obj[i]) {
1294 goto failed_allocation;
1297 i965_check_alloc_surface_bo(ctx, avc_ctx->wp_output_pic_select_surface_obj[i], 1,
1298 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
1300 i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1301 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[0], avc_ctx->wp_output_pic_select_surface_obj[0], GPE_RESOURCE_ALIGNMENT);
1302 i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1303 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_wp_output_pic_select_surface_list[1], avc_ctx->wp_output_pic_select_surface_obj[1], GPE_RESOURCE_ALIGNMENT);
1308 i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1310 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1311 &avc_ctx->res_mad_data_buffer,
1312 ALIGN(size, 0x1000),
1315 goto failed_allocation;
1317 return VA_STATUS_SUCCESS;
1320 return VA_STATUS_ERROR_ALLOCATION_FAILED;
1324 gen9_avc_free_resources(struct encoder_vme_mfc_context * vme_context)
1329 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1330 VADriverContextP ctx = avc_ctx->ctx;
1333 /* free all the surface/buffer here*/
1334 i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
1335 i965_free_gpe_resource(&avc_ctx->res_mb_status_buffer);
1336 i965_free_gpe_resource(&avc_ctx->res_flatness_check_surface);
1337 i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
1338 i965_free_gpe_resource(&avc_ctx->s4x_memv_min_distortion_brc_buffer);
1339 i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
1340 i965_free_gpe_resource(&avc_ctx->s16x_memv_data_buffer);
1341 i965_free_gpe_resource(&avc_ctx->s32x_memv_data_buffer);
1342 i965_free_gpe_resource(&avc_ctx->res_brc_history_buffer);
1343 i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
1344 i965_free_gpe_resource(&avc_ctx->res_brc_image_state_read_buffer);
1345 i965_free_gpe_resource(&avc_ctx->res_brc_image_state_write_buffer);
1346 i965_free_gpe_resource(&avc_ctx->res_brc_const_data_buffer);
1347 i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
1348 i965_free_gpe_resource(&avc_ctx->res_mbbrc_roi_surface);
1349 i965_free_gpe_resource(&avc_ctx->res_mbbrc_mb_qp_data_surface);
1350 i965_free_gpe_resource(&avc_ctx->res_mbenc_brc_buffer);
1351 i965_free_gpe_resource(&avc_ctx->res_mb_qp_data_surface);
1352 i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
1353 i965_free_gpe_resource(&avc_ctx->res_mbenc_slice_map_surface);
1354 i965_free_gpe_resource(&avc_ctx->res_sfd_output_buffer);
1355 i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_p_frame_buffer);
1356 i965_free_gpe_resource(&avc_ctx->res_sfd_cost_table_b_frame_buffer);
1357 i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[0]);
1358 i965_free_gpe_resource(&avc_ctx->res_wp_output_pic_select_surface_list[1]);
1359 i965_free_gpe_resource(&avc_ctx->res_mad_data_buffer);
1361 for (i = 0; i < 2 ; i++) {
1362 if (avc_ctx->wp_output_pic_select_surface_obj[i]) {
1363 i965_DestroySurfaces(ctx, &avc_ctx->wp_output_pic_select_surface_id[i], 1);
1364 avc_ctx->wp_output_pic_select_surface_id[i] = VA_INVALID_SURFACE;
1365 avc_ctx->wp_output_pic_select_surface_obj[i] = NULL;
1369 /* free preenc resources */
1370 i965_free_gpe_resource(&avc_ctx->preproc_mv_predictor_buffer);
1371 i965_free_gpe_resource(&avc_ctx->preproc_mb_qp_buffer);
1372 i965_free_gpe_resource(&avc_ctx->preproc_mv_data_out_buffer);
1373 i965_free_gpe_resource(&avc_ctx->preproc_stat_data_out_buffer);
1375 i965_free_gpe_resource(&avc_ctx->preenc_past_ref_stat_data_out_buffer);
1376 i965_free_gpe_resource(&avc_ctx->preenc_future_ref_stat_data_out_buffer);
1378 i965_DestroySurfaces(ctx, &avc_ctx->preenc_scaled_4x_surface_id, 1);
1379 avc_ctx->preenc_scaled_4x_surface_id = VA_INVALID_SURFACE;
1380 avc_ctx->preenc_scaled_4x_surface_obj = NULL;
1382 i965_DestroySurfaces(ctx, &avc_ctx->preenc_past_ref_scaled_4x_surface_id, 1);
1383 avc_ctx->preenc_past_ref_scaled_4x_surface_id = VA_INVALID_SURFACE;
1384 avc_ctx->preenc_past_ref_scaled_4x_surface_obj = NULL;
1386 i965_DestroySurfaces(ctx, &avc_ctx->preenc_future_ref_scaled_4x_surface_id, 1);
1387 avc_ctx->preenc_future_ref_scaled_4x_surface_id = VA_INVALID_SURFACE;
1388 avc_ctx->preenc_future_ref_scaled_4x_surface_obj = NULL;
1392 gen9_avc_run_kernel_media_object(VADriverContextP ctx,
1393 struct intel_encoder_context *encoder_context,
1394 struct i965_gpe_context *gpe_context,
1396 struct gpe_media_object_parameter *param)
1398 struct i965_driver_data *i965 = i965_driver_data(ctx);
1399 struct i965_gpe_table *gpe = &i965->gpe_table;
1400 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1401 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1403 struct intel_batchbuffer *batch = encoder_context->base.batch;
1404 struct encoder_status_buffer_internal *status_buffer;
1405 struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1410 intel_batchbuffer_start_atomic(batch, 0x1000);
1411 intel_batchbuffer_emit_mi_flush(batch);
1413 status_buffer = &(avc_ctx->status_buffer);
1414 memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1415 mi_store_data_imm.bo = status_buffer->bo;
1416 mi_store_data_imm.offset = status_buffer->media_index_offset;
1417 mi_store_data_imm.dw0 = media_function;
1418 gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1420 gpe->pipeline_setup(ctx, gpe_context, batch);
1421 gpe->media_object(ctx, gpe_context, batch, param);
1422 gpe->media_state_flush(ctx, gpe_context, batch);
1424 gpe->pipeline_end(ctx, gpe_context, batch);
1426 intel_batchbuffer_end_atomic(batch);
1428 intel_batchbuffer_flush(batch);
1432 gen9_avc_run_kernel_media_object_walker(VADriverContextP ctx,
1433 struct intel_encoder_context *encoder_context,
1434 struct i965_gpe_context *gpe_context,
1436 struct gpe_media_object_walker_parameter *param)
1438 struct i965_driver_data *i965 = i965_driver_data(ctx);
1439 struct i965_gpe_table *gpe = &i965->gpe_table;
1440 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1441 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1443 struct intel_batchbuffer *batch = encoder_context->base.batch;
1444 struct encoder_status_buffer_internal *status_buffer;
1445 struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1450 intel_batchbuffer_start_atomic(batch, 0x1000);
1452 intel_batchbuffer_emit_mi_flush(batch);
1454 status_buffer = &(avc_ctx->status_buffer);
1455 memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1456 mi_store_data_imm.bo = status_buffer->bo;
1457 mi_store_data_imm.offset = status_buffer->media_index_offset;
1458 mi_store_data_imm.dw0 = media_function;
1459 gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1461 gpe->pipeline_setup(ctx, gpe_context, batch);
1462 gpe->media_object_walker(ctx, gpe_context, batch, param);
1463 gpe->media_state_flush(ctx, gpe_context, batch);
1465 gpe->pipeline_end(ctx, gpe_context, batch);
1467 intel_batchbuffer_end_atomic(batch);
1469 intel_batchbuffer_flush(batch);
1473 gen9_init_gpe_context_avc(VADriverContextP ctx,
1474 struct i965_gpe_context *gpe_context,
1475 struct encoder_kernel_parameter *kernel_param)
1477 struct i965_driver_data *i965 = i965_driver_data(ctx);
1479 gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
1481 gpe_context->sampler.entry_size = 0;
1482 gpe_context->sampler.max_entries = 0;
1484 if (kernel_param->sampler_size) {
1485 gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
1486 gpe_context->sampler.max_entries = 1;
1489 gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
1490 gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
1492 gpe_context->surface_state_binding_table.max_entries = MAX_AVC_ENCODER_SURFACES;
1493 gpe_context->surface_state_binding_table.binding_table_offset = 0;
1494 gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64);
1495 gpe_context->surface_state_binding_table.length = ALIGN(MAX_AVC_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_AVC_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
1497 if (i965->intel.eu_total > 0)
1498 gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
1500 gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
1502 gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
1503 gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
1504 gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
1505 gpe_context->vfe_state.curbe_allocation_size -
1506 ((gpe_context->idrt.entry_size >> 5) *
1507 gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
1508 gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
1509 gpe_context->vfe_state.gpgpu_mode = 0;
1513 gen9_init_vfe_scoreboard_avc(struct i965_gpe_context *gpe_context,
1514 struct encoder_scoreboard_parameter *scoreboard_param)
1516 gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
1517 gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
1518 gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
1520 if (scoreboard_param->walkpat_flag) {
1521 gpe_context->vfe_desc5.scoreboard0.mask = 0x0F;
1522 gpe_context->vfe_desc5.scoreboard0.type = 1;
1524 gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0;
1525 gpe_context->vfe_desc6.scoreboard1.delta_y0 = -1;
1527 gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0;
1528 gpe_context->vfe_desc6.scoreboard1.delta_y1 = -2;
1530 gpe_context->vfe_desc6.scoreboard1.delta_x2 = -1;
1531 gpe_context->vfe_desc6.scoreboard1.delta_y2 = 3;
1533 gpe_context->vfe_desc6.scoreboard1.delta_x3 = -1;
1534 gpe_context->vfe_desc6.scoreboard1.delta_y3 = 1;
1537 gpe_context->vfe_desc6.scoreboard1.delta_x0 = -1;
1538 gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0;
1541 gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0;
1542 gpe_context->vfe_desc6.scoreboard1.delta_y1 = -1;
1545 gpe_context->vfe_desc6.scoreboard1.delta_x2 = 1;
1546 gpe_context->vfe_desc6.scoreboard1.delta_y2 = -1;
1549 gpe_context->vfe_desc6.scoreboard1.delta_x3 = -1;
1550 gpe_context->vfe_desc6.scoreboard1.delta_y3 = -1;
1553 gpe_context->vfe_desc7.scoreboard2.delta_x4 = -1;
1554 gpe_context->vfe_desc7.scoreboard2.delta_y4 = 1;
1557 gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0;
1558 gpe_context->vfe_desc7.scoreboard2.delta_y5 = -2;
1561 gpe_context->vfe_desc7.scoreboard2.delta_x6 = 1;
1562 gpe_context->vfe_desc7.scoreboard2.delta_y6 = -2;
1565 gpe_context->vfe_desc7.scoreboard2.delta_x6 = -1;
1566 gpe_context->vfe_desc7.scoreboard2.delta_y6 = -2;
1570 VME pipeline related function
1574 scaling kernel related function
1577 gen9_avc_set_curbe_scaling4x(VADriverContextP ctx,
1578 struct encode_state *encode_state,
1579 struct i965_gpe_context *gpe_context,
1580 struct intel_encoder_context *encoder_context,
1583 gen9_avc_scaling4x_curbe_data *curbe_cmd;
1584 struct scaling_param *surface_param = (struct scaling_param *)param;
1586 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1591 memset(curbe_cmd, 0, sizeof(gen9_avc_scaling4x_curbe_data));
1593 curbe_cmd->dw0.input_picture_width = surface_param->input_frame_width;
1594 curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1596 curbe_cmd->dw1.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1597 curbe_cmd->dw2.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1600 curbe_cmd->dw5.flatness_threshold = 128;
1601 curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1602 curbe_cmd->dw7.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1603 curbe_cmd->dw8.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1605 if (curbe_cmd->dw6.enable_mb_flatness_check ||
1606 curbe_cmd->dw7.enable_mb_variance_output ||
1607 curbe_cmd->dw8.enable_mb_pixel_average_output) {
1608 curbe_cmd->dw10.mbv_proc_stat_bti = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1611 i965_gpe_context_unmap_curbe(gpe_context);
1616 gen95_avc_set_curbe_scaling4x(VADriverContextP ctx,
1617 struct encode_state *encode_state,
1618 struct i965_gpe_context *gpe_context,
1619 struct intel_encoder_context *encoder_context,
1622 gen95_avc_scaling4x_curbe_data *curbe_cmd;
1623 struct scaling_param *surface_param = (struct scaling_param *)param;
1625 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1630 memset(curbe_cmd, 0, sizeof(gen95_avc_scaling4x_curbe_data));
1632 curbe_cmd->dw0.input_picture_width = surface_param->input_frame_width;
1633 curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1635 curbe_cmd->dw1.input_y_bti_frame = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1636 curbe_cmd->dw2.output_y_bti_frame = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1638 if (surface_param->enable_mb_flatness_check)
1639 curbe_cmd->dw5.flatness_threshold = 128;
1640 curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
1641 curbe_cmd->dw6.enable_mb_variance_output = surface_param->enable_mb_variance_output;
1642 curbe_cmd->dw6.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
1643 curbe_cmd->dw6.enable_block8x8_statistics_output = surface_param->blk8x8_stat_enabled;
1645 if (curbe_cmd->dw6.enable_mb_flatness_check ||
1646 curbe_cmd->dw6.enable_mb_variance_output ||
1647 curbe_cmd->dw6.enable_mb_pixel_average_output) {
1648 curbe_cmd->dw8.mbv_proc_stat_bti_frame = GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX;
1651 i965_gpe_context_unmap_curbe(gpe_context);
1656 gen9_avc_set_curbe_scaling2x(VADriverContextP ctx,
1657 struct encode_state *encode_state,
1658 struct i965_gpe_context *gpe_context,
1659 struct intel_encoder_context *encoder_context,
1662 gen9_avc_scaling2x_curbe_data *curbe_cmd;
1663 struct scaling_param *surface_param = (struct scaling_param *)param;
1665 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
1670 memset(curbe_cmd, 0, sizeof(gen9_avc_scaling2x_curbe_data));
1672 curbe_cmd->dw0.input_picture_width = surface_param->input_frame_width;
1673 curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
1675 curbe_cmd->dw8.input_y_bti = GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX;
1676 curbe_cmd->dw9.output_y_bti = GEN9_AVC_SCALING_FRAME_DST_Y_INDEX;
1678 i965_gpe_context_unmap_curbe(gpe_context);
1683 gen9_avc_send_surface_scaling(VADriverContextP ctx,
1684 struct encode_state *encode_state,
1685 struct i965_gpe_context *gpe_context,
1686 struct intel_encoder_context *encoder_context,
1689 struct scaling_param *surface_param = (struct scaling_param *)param;
1690 struct i965_driver_data *i965 = i965_driver_data(ctx);
1691 unsigned int surface_format;
1692 unsigned int res_size;
1694 if (surface_param->scaling_out_use_32unorm_surf_fmt)
1695 surface_format = I965_SURFACEFORMAT_R32_UNORM;
1696 else if (surface_param->scaling_out_use_16unorm_surf_fmt)
1697 surface_format = I965_SURFACEFORMAT_R16_UNORM;
1699 surface_format = I965_SURFACEFORMAT_R8_UNORM;
1701 i965_add_2d_gpe_surface(ctx, gpe_context,
1702 surface_param->input_surface,
1703 0, 1, surface_format,
1704 GEN9_AVC_SCALING_FRAME_SRC_Y_INDEX);
1706 i965_add_2d_gpe_surface(ctx, gpe_context,
1707 surface_param->output_surface,
1708 0, 1, surface_format,
1709 GEN9_AVC_SCALING_FRAME_DST_Y_INDEX);
1711 /*add buffer mv_proc_stat, here need change*/
1712 if (IS_GEN8(i965->intel.device_info)) {
1713 if (surface_param->mbv_proc_stat_enabled) {
1714 res_size = 16 * (surface_param->input_frame_width / 16) * (surface_param->input_frame_height / 16) * sizeof(unsigned int);
1716 i965_add_buffer_gpe_surface(ctx,
1718 surface_param->pres_mbv_proc_stat_buffer,
1722 GEN8_SCALING_FRAME_MBVPROCSTATS_DST_CM);
1724 if (surface_param->enable_mb_flatness_check) {
1725 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1726 surface_param->pres_flatness_check_surface,
1728 I965_SURFACEFORMAT_R8_UNORM,
1729 GEN8_SCALING_FRAME_FLATNESS_DST_CM);
1732 if (surface_param->mbv_proc_stat_enabled) {
1733 res_size = 16 * (surface_param->input_frame_width / 16) * (surface_param->input_frame_height / 16) * sizeof(unsigned int);
1735 i965_add_buffer_gpe_surface(ctx,
1737 surface_param->pres_mbv_proc_stat_buffer,
1741 GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1742 } else if (surface_param->enable_mb_flatness_check) {
1743 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1744 surface_param->pres_flatness_check_surface,
1746 I965_SURFACEFORMAT_R8_UNORM,
1747 GEN9_AVC_SCALING_FRAME_MBVPROCSTATS_DST_INDEX);
1754 gen9_avc_kernel_scaling(VADriverContextP ctx,
1755 struct encode_state *encode_state,
1756 struct intel_encoder_context *encoder_context,
1759 struct i965_driver_data *i965 = i965_driver_data(ctx);
1760 struct i965_gpe_table *gpe = &i965->gpe_table;
1761 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1762 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
1763 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1764 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1765 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
1767 struct i965_gpe_context *gpe_context;
1768 struct scaling_param surface_param;
1769 struct object_surface *obj_surface;
1770 struct gen9_surface_avc *avc_priv_surface;
1771 struct gpe_media_object_walker_parameter media_object_walker_param;
1772 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
1773 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
1774 int media_function = 0;
1777 obj_surface = encode_state->reconstructed_object;
1778 avc_priv_surface = obj_surface->private_data;
1780 memset(&surface_param, 0, sizeof(struct scaling_param));
1782 case INTEL_ENC_HME_4x : {
1783 media_function = INTEL_MEDIA_STATE_4X_SCALING;
1784 kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1785 downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
1786 downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
1788 surface_param.input_surface = encode_state->input_yuv_object ;
1789 surface_param.input_frame_width = generic_state->frame_width_in_pixel ;
1790 surface_param.input_frame_height = generic_state->frame_height_in_pixel ;
1792 surface_param.output_surface = avc_priv_surface->scaled_4x_surface_obj ;
1793 surface_param.output_frame_width = generic_state->frame_width_4x ;
1794 surface_param.output_frame_height = generic_state->frame_height_4x ;
1796 surface_param.enable_mb_flatness_check = avc_state->flatness_check_enable;
1797 surface_param.enable_mb_variance_output = avc_state->mb_status_enable;
1798 surface_param.enable_mb_pixel_average_output = avc_state->mb_status_enable;
1800 surface_param.blk8x8_stat_enabled = 0 ;
1801 surface_param.use_4x_scaling = 1 ;
1802 surface_param.use_16x_scaling = 0 ;
1803 surface_param.use_32x_scaling = 0 ;
1806 case INTEL_ENC_HME_16x : {
1807 media_function = INTEL_MEDIA_STATE_16X_SCALING;
1808 kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
1809 downscaled_width_in_mb = generic_state->downscaled_width_16x_in_mb;
1810 downscaled_height_in_mb = generic_state->downscaled_height_16x_in_mb;
1812 surface_param.input_surface = avc_priv_surface->scaled_4x_surface_obj ;
1813 surface_param.input_frame_width = generic_state->frame_width_4x ;
1814 surface_param.input_frame_height = generic_state->frame_height_4x ;
1816 surface_param.output_surface = avc_priv_surface->scaled_16x_surface_obj ;
1817 surface_param.output_frame_width = generic_state->frame_width_16x ;
1818 surface_param.output_frame_height = generic_state->frame_height_16x ;
1820 surface_param.enable_mb_flatness_check = 0 ;
1821 surface_param.enable_mb_variance_output = 0 ;
1822 surface_param.enable_mb_pixel_average_output = 0 ;
1824 surface_param.blk8x8_stat_enabled = 0 ;
1825 surface_param.use_4x_scaling = 0 ;
1826 surface_param.use_16x_scaling = 1 ;
1827 surface_param.use_32x_scaling = 0 ;
1831 case INTEL_ENC_HME_32x : {
1832 media_function = INTEL_MEDIA_STATE_32X_SCALING;
1833 kernel_idx = GEN9_AVC_KERNEL_SCALING_2X_IDX;
1834 downscaled_width_in_mb = generic_state->downscaled_width_32x_in_mb;
1835 downscaled_height_in_mb = generic_state->downscaled_height_32x_in_mb;
1837 surface_param.input_surface = avc_priv_surface->scaled_16x_surface_obj ;
1838 surface_param.input_frame_width = generic_state->frame_width_16x ;
1839 surface_param.input_frame_height = generic_state->frame_height_16x ;
1841 surface_param.output_surface = avc_priv_surface->scaled_32x_surface_obj ;
1842 surface_param.output_frame_width = generic_state->frame_width_32x ;
1843 surface_param.output_frame_height = generic_state->frame_height_32x ;
1845 surface_param.enable_mb_flatness_check = 0 ;
1846 surface_param.enable_mb_variance_output = 0 ;
1847 surface_param.enable_mb_pixel_average_output = 0 ;
1849 surface_param.blk8x8_stat_enabled = 0 ;
1850 surface_param.use_4x_scaling = 0 ;
1851 surface_param.use_16x_scaling = 0 ;
1852 surface_param.use_32x_scaling = 1 ;
1860 gpe_context = &(avc_ctx->context_scaling.gpe_contexts[kernel_idx]);
1862 gpe->context_init(ctx, gpe_context);
1863 gpe->reset_binding_table(ctx, gpe_context);
1865 if (surface_param.use_32x_scaling) {
1866 generic_ctx->pfn_set_curbe_scaling2x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1868 generic_ctx->pfn_set_curbe_scaling4x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1871 if (surface_param.use_32x_scaling) {
1872 surface_param.scaling_out_use_16unorm_surf_fmt = 1 ;
1873 surface_param.scaling_out_use_32unorm_surf_fmt = 0 ;
1875 surface_param.scaling_out_use_16unorm_surf_fmt = 0 ;
1876 surface_param.scaling_out_use_32unorm_surf_fmt = 1 ;
1879 if (surface_param.use_4x_scaling) {
1880 if (avc_state->mb_status_supported) {
1881 surface_param.enable_mb_flatness_check = 0;
1882 surface_param.mbv_proc_stat_enabled = (surface_param.use_4x_scaling) ? (avc_state->mb_status_enable || avc_state->flatness_check_enable) : 0 ;
1883 surface_param.pres_mbv_proc_stat_buffer = &(avc_ctx->res_mb_status_buffer);
1886 surface_param.enable_mb_flatness_check = (surface_param.use_4x_scaling) ? avc_state->flatness_check_enable : 0;
1887 surface_param.mbv_proc_stat_enabled = 0 ;
1888 surface_param.pres_flatness_check_surface = &(avc_ctx->res_flatness_check_surface);
1892 generic_ctx->pfn_send_scaling_surface(ctx, encode_state, gpe_context, encoder_context, &surface_param);
1894 /* setup the interface data */
1895 gpe->setup_interface_data(ctx, gpe_context);
1897 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
1898 if (surface_param.use_32x_scaling) {
1899 kernel_walker_param.resolution_x = downscaled_width_in_mb ;
1900 kernel_walker_param.resolution_y = downscaled_height_in_mb ;
1902 /* the scaling is based on 8x8 blk level */
1903 kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
1904 kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
1906 kernel_walker_param.no_dependency = 1;
1908 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
1910 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
1913 &media_object_walker_param);
1915 return VA_STATUS_SUCCESS;
1919 frame/mb brc related function
1922 gen9_avc_init_mfx_avc_img_state(VADriverContextP ctx,
1923 struct encode_state *encode_state,
1924 struct intel_encoder_context *encoder_context,
1925 struct gen9_mfx_avc_img_state *pstate)
1927 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
1928 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
1929 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
1931 VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
1932 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
1934 memset(pstate, 0, sizeof(*pstate));
1936 pstate->dw0.dword_length = (sizeof(struct gen9_mfx_avc_img_state)) / 4 - 2;
1937 pstate->dw0.sub_opcode_b = 0;
1938 pstate->dw0.sub_opcode_a = 0;
1939 pstate->dw0.command_opcode = 1;
1940 pstate->dw0.pipeline = 2;
1941 pstate->dw0.command_type = 3;
1943 pstate->dw1.frame_size_in_mbs = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs ;
1945 pstate->dw2.frame_width_in_mbs_minus1 = generic_state->frame_width_in_mbs - 1;
1946 pstate->dw2.frame_height_in_mbs_minus1 = generic_state->frame_height_in_mbs - 1;
1948 pstate->dw3.image_structure = 0;//frame is zero
1949 pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1950 pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
1951 pstate->dw3.brc_domain_rate_control_enable = 0;//0,set for non-vdenc mode;
1952 pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
1953 pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
1955 pstate->dw4.field_picture_flag = 0;
1956 pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
1957 pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
1958 pstate->dw4.transform_8x8_idct_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag;
1959 pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
1960 pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
1961 pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
1962 pstate->dw4.mb_mv_format_flag = 1;
1963 pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
1964 pstate->dw4.mv_unpacked_flag = 1;
1965 pstate->dw4.insert_test_flag = 0;
1966 pstate->dw4.load_slice_pointer_flag = 0;
1967 pstate->dw4.macroblock_stat_enable = 0; /* disable in the first pass */
1968 pstate->dw4.minimum_frame_size = 0;
1969 pstate->dw5.intra_mb_max_bit_flag = 1;
1970 pstate->dw5.inter_mb_max_bit_flag = 1;
1971 pstate->dw5.frame_size_over_flag = 1;
1972 pstate->dw5.frame_size_under_flag = 1;
1973 pstate->dw5.intra_mb_ipcm_flag = 1;
1974 pstate->dw5.mb_rate_ctrl_flag = 0;
1975 pstate->dw5.non_first_pass_flag = 0;
1976 pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
1977 pstate->dw5.aq_chroma_disable = 1;
1978 if (pstate->dw4.entropy_coding_flag && (avc_state->tq_enable)) {
1979 pstate->dw5.aq_enable = avc_state->tq_enable;
1980 pstate->dw5.aq_rounding = avc_state->tq_rounding;
1982 pstate->dw5.aq_rounding = 0;
1985 pstate->dw6.intra_mb_max_size = 2700;
1986 pstate->dw6.inter_mb_max_size = 4095;
1988 pstate->dw8.slice_delta_qp_max0 = 0;
1989 pstate->dw8.slice_delta_qp_max1 = 0;
1990 pstate->dw8.slice_delta_qp_max2 = 0;
1991 pstate->dw8.slice_delta_qp_max3 = 0;
1993 pstate->dw9.slice_delta_qp_min0 = 0;
1994 pstate->dw9.slice_delta_qp_min1 = 0;
1995 pstate->dw9.slice_delta_qp_min2 = 0;
1996 pstate->dw9.slice_delta_qp_min3 = 0;
1998 pstate->dw10.frame_bitrate_min = 0;
1999 pstate->dw10.frame_bitrate_min_unit = 1;
2000 pstate->dw10.frame_bitrate_min_unit_mode = 1;
2001 pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
2002 pstate->dw10.frame_bitrate_max_unit = 1;
2003 pstate->dw10.frame_bitrate_max_unit_mode = 1;
2005 pstate->dw11.frame_bitrate_min_delta = 0;
2006 pstate->dw11.frame_bitrate_max_delta = 0;
2008 pstate->dw12.vad_error_logic = 1;
2009 /* set paramters DW19/DW20 for slices */
2013 gen8_avc_init_mfx_avc_img_state(VADriverContextP ctx,
2014 struct encode_state *encode_state,
2015 struct intel_encoder_context *encoder_context,
2016 struct gen8_mfx_avc_img_state *pstate)
2018 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2019 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2020 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2022 VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
2023 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
2025 memset(pstate, 0, sizeof(*pstate));
2027 pstate->dw0.dword_length = (sizeof(struct gen8_mfx_avc_img_state)) / 4 - 2;
2028 pstate->dw0.command_sub_opcode_b = 0;
2029 pstate->dw0.command_sub_opcode_a = 0;
2030 pstate->dw0.command_opcode = 1;
2031 pstate->dw0.command_pipeline = 2;
2032 pstate->dw0.command_type = 3;
2034 pstate->dw1.frame_size_in_mbs = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs ;
2036 pstate->dw2.frame_width_in_mbs_minus1 = generic_state->frame_width_in_mbs - 1;
2037 pstate->dw2.frame_height_in_mbs_minus1 = generic_state->frame_height_in_mbs - 1;
2039 pstate->dw3.image_structure = 0;//frame is zero
2040 pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
2041 pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
2042 pstate->dw3.inter_mb_conf_flag = 0;
2043 pstate->dw3.intra_mb_conf_flag = 0;
2044 pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
2045 pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
2047 pstate->dw4.field_picture_flag = 0;
2048 pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
2049 pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
2050 pstate->dw4.transform_8x8_idct_mode_flag = pic_param->pic_fields.bits.transform_8x8_mode_flag;
2051 pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
2052 pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
2053 pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
2054 pstate->dw4.mb_mv_format_flag = 1;
2055 pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
2056 pstate->dw4.mv_unpacked_flag = 1;
2057 pstate->dw4.insert_test_flag = 0;
2058 pstate->dw4.load_slice_pointer_flag = 0;
2059 pstate->dw4.macroblock_stat_enable = 0; /* disable in the first pass */
2060 pstate->dw4.minimum_frame_size = 0;
2061 pstate->dw5.intra_mb_max_bit_flag = 1;
2062 pstate->dw5.inter_mb_max_bit_flag = 1;
2063 pstate->dw5.frame_size_over_flag = 1;
2064 pstate->dw5.frame_size_under_flag = 1;
2065 pstate->dw5.intra_mb_ipcm_flag = 1;
2066 pstate->dw5.mb_rate_ctrl_flag = 0;
2067 pstate->dw5.non_first_pass_flag = 0;
2068 pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
2069 pstate->dw5.aq_chroma_disable = 1;
2070 if (pstate->dw4.entropy_coding_flag && (avc_state->tq_enable)) {
2071 pstate->dw5.aq_enable = avc_state->tq_enable;
2072 pstate->dw5.aq_rounding = avc_state->tq_rounding;
2074 pstate->dw5.aq_rounding = 0;
2077 pstate->dw6.intra_mb_max_size = 2700;
2078 pstate->dw6.inter_mb_max_size = 4095;
2080 pstate->dw8.slice_delta_qp_max0 = 0;
2081 pstate->dw8.slice_delta_qp_max1 = 0;
2082 pstate->dw8.slice_delta_qp_max2 = 0;
2083 pstate->dw8.slice_delta_qp_max3 = 0;
2085 pstate->dw9.slice_delta_qp_min0 = 0;
2086 pstate->dw9.slice_delta_qp_min1 = 0;
2087 pstate->dw9.slice_delta_qp_min2 = 0;
2088 pstate->dw9.slice_delta_qp_min3 = 0;
2090 pstate->dw10.frame_bitrate_min = 0;
2091 pstate->dw10.frame_bitrate_min_unit = 1;
2092 pstate->dw10.frame_bitrate_min_unit_mode = 1;
2093 pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
2094 pstate->dw10.frame_bitrate_max_unit = 1;
2095 pstate->dw10.frame_bitrate_max_unit_mode = 1;
2097 pstate->dw11.frame_bitrate_min_delta = 0;
2098 pstate->dw11.frame_bitrate_max_delta = 0;
2099 /* set paramters DW19/DW20 for slices */
2101 void gen9_avc_set_image_state(VADriverContextP ctx,
2102 struct encode_state *encode_state,
2103 struct intel_encoder_context *encoder_context,
2104 struct i965_gpe_resource *gpe_resource)
2106 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2107 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
2110 unsigned int * data;
2111 struct gen9_mfx_avc_img_state cmd;
2113 pdata = i965_map_gpe_resource(gpe_resource);
2118 gen9_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
2119 for (i = 0; i < generic_state->num_pak_passes; i++) {
2122 cmd.dw4.macroblock_stat_enable = 0;
2123 cmd.dw5.non_first_pass_flag = 0;
2125 cmd.dw4.macroblock_stat_enable = 1;
2126 cmd.dw5.non_first_pass_flag = 1;
2127 cmd.dw5.intra_mb_ipcm_flag = 1;
2130 cmd.dw5.mb_rate_ctrl_flag = 0;
2131 memcpy(pdata, &cmd, sizeof(struct gen9_mfx_avc_img_state));
2132 data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
2133 *data = MI_BATCH_BUFFER_END;
2135 pdata += INTEL_AVC_IMAGE_STATE_CMD_SIZE;
2137 i965_unmap_gpe_resource(gpe_resource);
2141 void gen8_avc_set_image_state(VADriverContextP ctx,
2142 struct encode_state *encode_state,
2143 struct intel_encoder_context *encoder_context,
2144 struct i965_gpe_resource *gpe_resource)
2146 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2147 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
2150 unsigned int * data;
2151 struct gen8_mfx_avc_img_state cmd;
2153 pdata = i965_map_gpe_resource(gpe_resource);
2158 gen8_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
2159 for (i = 0; i < generic_state->num_pak_passes; i++) {
2162 cmd.dw4.macroblock_stat_enable = 0;
2163 cmd.dw5.non_first_pass_flag = 0;
2165 cmd.dw4.macroblock_stat_enable = 1;
2166 cmd.dw5.non_first_pass_flag = 1;
2167 cmd.dw5.intra_mb_ipcm_flag = 1;
2168 cmd.dw3.inter_mb_conf_flag = 1;
2169 cmd.dw3.intra_mb_conf_flag = 1;
2171 cmd.dw5.mb_rate_ctrl_flag = 0;
2172 memcpy(pdata, &cmd, sizeof(struct gen8_mfx_avc_img_state));
2173 data = (unsigned int *)(pdata + sizeof(struct gen8_mfx_avc_img_state));
2174 *data = MI_BATCH_BUFFER_END;
2176 pdata += INTEL_AVC_IMAGE_STATE_CMD_SIZE;
2178 i965_unmap_gpe_resource(gpe_resource);
2182 void gen9_avc_set_image_state_non_brc(VADriverContextP ctx,
2183 struct encode_state *encode_state,
2184 struct intel_encoder_context *encoder_context,
2185 struct i965_gpe_resource *gpe_resource)
2187 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2188 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
2191 unsigned int * data;
2192 struct gen9_mfx_avc_img_state cmd;
2194 pdata = i965_map_gpe_resource(gpe_resource);
2199 gen9_avc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &cmd);
2201 if (generic_state->curr_pak_pass == 0) {
2202 cmd.dw4.macroblock_stat_enable = 0;
2203 cmd.dw5.non_first_pass_flag = 0;
2206 cmd.dw4.macroblock_stat_enable = 1;
2207 cmd.dw5.non_first_pass_flag = 0;
2208 cmd.dw5.intra_mb_ipcm_flag = 1;
2211 cmd.dw5.mb_rate_ctrl_flag = 0;
2212 memcpy(pdata, &cmd, sizeof(struct gen9_mfx_avc_img_state));
2213 data = (unsigned int *)(pdata + sizeof(struct gen9_mfx_avc_img_state));
2214 *data = MI_BATCH_BUFFER_END;
2216 i965_unmap_gpe_resource(gpe_resource);
2221 gen95_avc_calc_lambda_table(VADriverContextP ctx,
2222 struct encode_state *encode_state,
2223 struct intel_encoder_context *encoder_context)
2225 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2226 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2227 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2228 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
2229 unsigned int value, inter, intra;
2230 unsigned int rounding_value = 0;
2231 unsigned int size = 0;
2234 unsigned int * lambda_table = (unsigned int *)(avc_state->lamda_value_lut);
2240 size = AVC_QP_MAX * 2 * sizeof(unsigned int);
2241 switch (generic_state->frame_type) {
2243 memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_i_frame[0][0], size * sizeof(unsigned char));
2246 memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_p_frame[0][0], size * sizeof(unsigned char));
2249 memcpy((unsigned char *)lambda_table, (unsigned char *)&gen95_avc_tq_lambda_b_frame[0][0], size * sizeof(unsigned char));
2256 for (i = 0; i < AVC_QP_MAX ; i++) {
2257 for (col = 0; col < 2; col++) {
2258 value = *(lambda_table + i * 2 + col);
2259 intra = value >> 16;
2261 if (intra < GEN95_AVC_MAX_LAMBDA) {
2262 if (intra == 0xfffa) {
2263 intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
2267 intra = intra << 16;
2268 inter = value & 0xffff;
2270 if (inter < GEN95_AVC_MAX_LAMBDA) {
2271 if (inter == 0xffef) {
2272 if (generic_state->frame_type == SLICE_TYPE_P) {
2273 if (avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE)
2274 rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
2276 rounding_value = avc_state->rounding_inter_p;
2277 } else if (generic_state->frame_type == SLICE_TYPE_B) {
2278 if (pic_param->pic_fields.bits.reference_pic_flag) {
2279 if (avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
2280 rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
2282 rounding_value = avc_state->rounding_inter_b_ref;
2284 if (avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE)
2285 rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
2287 rounding_value = avc_state->rounding_inter_b;
2291 inter = 0xf000 + rounding_value;
2293 *(lambda_table + i * 2 + col) = intra + inter;
2299 gen9_avc_init_brc_const_data(VADriverContextP ctx,
2300 struct encode_state *encode_state,
2301 struct intel_encoder_context *encoder_context)
2303 struct i965_driver_data *i965 = i965_driver_data(ctx);
2304 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2305 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2306 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2307 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2309 struct i965_gpe_resource *gpe_resource = NULL;
2310 unsigned char * data = NULL;
2311 unsigned char * data_tmp = NULL;
2312 unsigned int size = 0;
2313 unsigned int table_idx = 0;
2314 unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2317 struct object_surface *obj_surface;
2318 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
2319 VASurfaceID surface_id;
2320 unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
2322 gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
2323 assert(gpe_resource);
2325 i965_zero_gpe_resource(gpe_resource);
2327 data = i965_map_gpe_resource(gpe_resource);
2330 table_idx = slice_type_kernel[generic_state->frame_type];
2332 /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
2333 size = sizeof(gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
2334 memcpy(data, gen9_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb, size * sizeof(unsigned char));
2338 /* skip threshold table*/
2340 switch (generic_state->frame_type) {
2342 memcpy(data, gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2345 memcpy(data, gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2348 /*SLICE_TYPE_I,no change */
2352 if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) {
2353 for (i = 0; i < AVC_QP_MAX ; i++) {
2354 *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
2359 /*fill the qp for ref list*/
2360 size = 32 + 32 + 32 + 160;
2361 memset(data, 0xff, 32);
2362 memset(data + 32 + 32, 0xff, 32);
2363 switch (generic_state->frame_type) {
2364 case SLICE_TYPE_P: {
2365 for (i = 0 ; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
2366 surface_id = slice_param->RefPicList0[i].picture_id;
2367 obj_surface = SURFACE(surface_id);
2370 *(data + i) = avc_state->list_ref_idx[0][i];//?
2374 case SLICE_TYPE_B: {
2375 data = data + 32 + 32;
2376 for (i = 0 ; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
2377 surface_id = slice_param->RefPicList1[i].picture_id;
2378 obj_surface = SURFACE(surface_id);
2381 *(data + i) = avc_state->list_ref_idx[1][i];//?
2384 data = data - 32 - 32;
2386 for (i = 0 ; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
2387 surface_id = slice_param->RefPicList0[i].picture_id;
2388 obj_surface = SURFACE(surface_id);
2391 *(data + i) = avc_state->list_ref_idx[0][i];//?
2396 /*SLICE_TYPE_I,no change */
2401 /*mv cost and mode cost*/
2403 memcpy(data, (unsigned char *)&gen9_avc_mode_mv_cost_table[table_idx][0][0], size * sizeof(unsigned char));
2405 if (avc_state->old_mode_cost_enable) {
2407 for (i = 0; i < AVC_QP_MAX ; i++) {
2408 *(data_tmp + 3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2413 if (avc_state->ftq_skip_threshold_lut_input_enable) {
2414 for (i = 0; i < AVC_QP_MAX ; i++) {
2415 *(data + (i * 32) + 24) =
2416 *(data + (i * 32) + 25) =
2417 *(data + (i * 32) + 27) =
2418 *(data + (i * 32) + 28) =
2419 *(data + (i * 32) + 29) =
2420 *(data + (i * 32) + 30) =
2421 *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2429 memcpy(data, (unsigned char *)&gen9_avc_ref_cost[table_idx][0], size * sizeof(unsigned char));
2434 if (avc_state->adaptive_intra_scaling_enable) {
2435 memcpy(data, (unsigned char *)gen9_avc_adaptive_intra_scaling_factor, size * sizeof(unsigned char));
2437 memcpy(data, (unsigned char *)gen9_avc_intra_scaling_factor, size * sizeof(unsigned char));
2440 if (IS_KBL(i965->intel.device_info) ||
2441 IS_GEN10(i965->intel.device_info) ||
2442 IS_GLK(i965->intel.device_info)) {
2446 memcpy(data, (unsigned char *)gen95_avc_lambda_data, size * sizeof(unsigned char));
2450 memcpy(data, (unsigned char *)gen95_avc_ftq25, size * sizeof(unsigned char));
2453 i965_unmap_gpe_resource(gpe_resource);
2457 gen9_avc_init_brc_const_data_old(VADriverContextP ctx,
2458 struct encode_state *encode_state,
2459 struct intel_encoder_context *encoder_context)
2461 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2462 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2463 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2464 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2466 struct i965_gpe_resource *gpe_resource = NULL;
2467 unsigned int * data = NULL;
2468 unsigned int * data_tmp = NULL;
2469 unsigned int size = 0;
2470 unsigned int table_idx = 0;
2471 unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
2472 unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
2475 gpe_resource = &(avc_ctx->res_brc_const_data_buffer);
2476 assert(gpe_resource);
2478 i965_zero_gpe_resource(gpe_resource);
2480 data = i965_map_gpe_resource(gpe_resource);
2483 table_idx = slice_type_kernel[generic_state->frame_type];
2485 /* Fill surface with QP Adjustment table, Distortion threshold table, MaxFrame threshold table, Distortion QP Adjustment Table*/
2486 size = sizeof(gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb);
2487 memcpy(data, gen75_avc_qp_adjustment_dist_threshold_max_frame_threshold_dist_qp_adjustment_ipb, size * sizeof(unsigned char));
2491 /* skip threshold table*/
2493 switch (generic_state->frame_type) {
2495 memcpy(data, gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2498 memcpy(data, gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag], size * sizeof(unsigned char));
2501 /*SLICE_TYPE_I,no change */
2505 if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->non_ftq_skip_threshold_lut_input_enable) {
2506 for (i = 0; i < AVC_QP_MAX ; i++) {
2507 *(data + 1 + (i * 2)) = (unsigned char)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
2512 /*fill the qp for ref list*/
2518 /*mv cost and mode cost*/
2520 memcpy(data, (unsigned char *)&gen75_avc_mode_mv_cost_table[table_idx][0][0], size * sizeof(unsigned char));
2522 if (avc_state->old_mode_cost_enable) {
2524 for (i = 0; i < AVC_QP_MAX ; i++) {
2525 *(data_tmp + 3) = (unsigned int)gen9_avc_old_intra_mode_cost[i];
2530 if (avc_state->ftq_skip_threshold_lut_input_enable) {
2531 for (i = 0; i < AVC_QP_MAX ; i++) {
2532 *(data + (i * 32) + 24) =
2533 *(data + (i * 32) + 25) =
2534 *(data + (i * 32) + 27) =
2535 *(data + (i * 32) + 28) =
2536 *(data + (i * 32) + 29) =
2537 *(data + (i * 32) + 30) =
2538 *(data + (i * 32) + 31) = avc_state->ftq_skip_threshold_lut[i];
2546 memcpy(data, (unsigned char *)&gen9_avc_ref_cost[table_idx][0], size * sizeof(unsigned char));
2548 i965_unmap_gpe_resource(gpe_resource);
2551 gen9_avc_set_curbe_brc_init_reset(VADriverContextP ctx,
2552 struct encode_state *encode_state,
2553 struct i965_gpe_context *gpe_context,
2554 struct intel_encoder_context *encoder_context,
2557 gen9_avc_brc_init_reset_curbe_data *cmd;
2558 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2559 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2560 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2561 double input_bits_per_frame = 0;
2562 double bps_ratio = 0;
2563 VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2564 struct avc_param common_param;
2566 cmd = i965_gpe_context_map_curbe(gpe_context);
2571 memcpy(cmd, &gen9_avc_brc_init_reset_curbe_init_data, sizeof(gen9_avc_brc_init_reset_curbe_data));
2573 memset(&common_param, 0, sizeof(common_param));
2574 common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2575 common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2576 common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2577 common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2578 common_param.frames_per_100s = generic_state->frames_per_100s;
2579 common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2580 common_param.target_bit_rate = generic_state->target_bit_rate;
2582 cmd->dw0.profile_level_max_frame = i965_avc_get_profile_level_max_frame(&common_param, seq_param->level_idc);
2583 cmd->dw1.init_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
2584 cmd->dw2.buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
2585 cmd->dw3.average_bit_rate = generic_state->target_bit_rate * 1000;
2586 cmd->dw4.max_bit_rate = generic_state->max_bit_rate * 1000;
2587 cmd->dw8.gop_p = (generic_state->gop_ref_distance) ? ((generic_state->gop_size - 1) / generic_state->gop_ref_distance) : 0;
2588 cmd->dw9.gop_b = (generic_state->gop_size - 1 - cmd->dw8.gop_p);
2589 cmd->dw9.frame_width_in_bytes = generic_state->frame_width_in_pixel;
2590 cmd->dw10.frame_height_in_bytes = generic_state->frame_height_in_pixel;
2591 cmd->dw12.no_slices = avc_state->slice_num;
2594 if (seq_param->vui_parameters_present_flag && generic_state->internal_rate_mode != INTEL_BRC_AVBR) {
2595 if (generic_state->internal_rate_mode == VA_RC_CBR) {
2596 cmd->dw3.average_bit_rate = cmd->dw4.max_bit_rate;
2601 cmd->dw6.frame_rate_m = generic_state->frames_per_100s;
2602 cmd->dw7.frame_rate_d = 100;
2603 cmd->dw8.brc_flag = 0;
2604 cmd->dw8.brc_flag |= (generic_state->mb_brc_enabled) ? 0 : 0x8000;
2607 if (generic_state->internal_rate_mode == VA_RC_CBR) {
2609 cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2610 cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISCBR;
2612 } else if (generic_state->internal_rate_mode == VA_RC_VBR) {
2614 if (cmd->dw4.max_bit_rate < cmd->dw3.average_bit_rate) {
2615 cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate << 1;
2617 cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISVBR;
2619 } else if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2621 cmd->dw4.max_bit_rate = cmd->dw3.average_bit_rate;
2622 cmd->dw8.brc_flag = cmd->dw8.brc_flag | INTEL_ENCODE_BRCINIT_ISAVBR;
2625 //igonre icq/vcm/qvbr
2627 cmd->dw10.avbr_accuracy = generic_state->avbr_curracy;
2628 cmd->dw11.avbr_convergence = generic_state->avbr_convergence;
2631 input_bits_per_frame = (double)(cmd->dw4.max_bit_rate) * (double)(cmd->dw7.frame_rate_d) / (double)(cmd->dw6.frame_rate_m);;
2633 if (cmd->dw2.buf_size_in_bits == 0) {
2634 cmd->dw2.buf_size_in_bits = (unsigned int)(input_bits_per_frame * 4);
2637 if (cmd->dw1.init_buf_full_in_bits == 0) {
2638 cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits * 7 / 8;
2640 if (cmd->dw1.init_buf_full_in_bits < (unsigned int)(input_bits_per_frame * 2)) {
2641 cmd->dw1.init_buf_full_in_bits = (unsigned int)(input_bits_per_frame * 2);
2643 if (cmd->dw1.init_buf_full_in_bits > cmd->dw2.buf_size_in_bits) {
2644 cmd->dw1.init_buf_full_in_bits = cmd->dw2.buf_size_in_bits;
2648 if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2649 cmd->dw2.buf_size_in_bits = 2 * generic_state->target_bit_rate * 1000;
2650 cmd->dw1.init_buf_full_in_bits = (unsigned int)(3 * cmd->dw2.buf_size_in_bits / 4);
2654 bps_ratio = input_bits_per_frame / (cmd->dw2.buf_size_in_bits / 30.0);
2655 bps_ratio = (bps_ratio < 0.1) ? 0.1 : (bps_ratio > 3.5) ? 3.5 : bps_ratio;
2658 cmd->dw16.deviation_threshold_0_pand_b = (unsigned int)(-50 * pow(0.90, bps_ratio));
2659 cmd->dw16.deviation_threshold_1_pand_b = (unsigned int)(-50 * pow(0.66, bps_ratio));
2660 cmd->dw16.deviation_threshold_2_pand_b = (unsigned int)(-50 * pow(0.46, bps_ratio));
2661 cmd->dw16.deviation_threshold_3_pand_b = (unsigned int)(-50 * pow(0.3, bps_ratio));
2662 cmd->dw17.deviation_threshold_4_pand_b = (unsigned int)(50 * pow(0.3, bps_ratio));
2663 cmd->dw17.deviation_threshold_5_pand_b = (unsigned int)(50 * pow(0.46, bps_ratio));
2664 cmd->dw17.deviation_threshold_6_pand_b = (unsigned int)(50 * pow(0.7, bps_ratio));
2665 cmd->dw17.deviation_threshold_7_pand_b = (unsigned int)(50 * pow(0.9, bps_ratio));
2666 cmd->dw18.deviation_threshold_0_vbr = (unsigned int)(-50 * pow(0.9, bps_ratio));
2667 cmd->dw18.deviation_threshold_1_vbr = (unsigned int)(-50 * pow(0.7, bps_ratio));
2668 cmd->dw18.deviation_threshold_2_vbr = (unsigned int)(-50 * pow(0.5, bps_ratio));
2669 cmd->dw18.deviation_threshold_3_vbr = (unsigned int)(-50 * pow(0.3, bps_ratio));
2670 cmd->dw19.deviation_threshold_4_vbr = (unsigned int)(100 * pow(0.4, bps_ratio));
2671 cmd->dw19.deviation_threshold_5_vbr = (unsigned int)(100 * pow(0.5, bps_ratio));
2672 cmd->dw19.deviation_threshold_6_vbr = (unsigned int)(100 * pow(0.75, bps_ratio));
2673 cmd->dw19.deviation_threshold_7_vbr = (unsigned int)(100 * pow(0.9, bps_ratio));
2674 cmd->dw20.deviation_threshold_0_i = (unsigned int)(-50 * pow(0.8, bps_ratio));
2675 cmd->dw20.deviation_threshold_1_i = (unsigned int)(-50 * pow(0.6, bps_ratio));
2676 cmd->dw20.deviation_threshold_2_i = (unsigned int)(-50 * pow(0.34, bps_ratio));
2677 cmd->dw20.deviation_threshold_3_i = (unsigned int)(-50 * pow(0.2, bps_ratio));
2678 cmd->dw21.deviation_threshold_4_i = (unsigned int)(50 * pow(0.2, bps_ratio));
2679 cmd->dw21.deviation_threshold_5_i = (unsigned int)(50 * pow(0.4, bps_ratio));
2680 cmd->dw21.deviation_threshold_6_i = (unsigned int)(50 * pow(0.66, bps_ratio));
2681 cmd->dw21.deviation_threshold_7_i = (unsigned int)(50 * pow(0.9, bps_ratio));
2683 cmd->dw22.sliding_window_size = generic_state->frames_per_window_size;
2685 i965_gpe_context_unmap_curbe(gpe_context);
2691 gen9_avc_send_surface_brc_init_reset(VADriverContextP ctx,
2692 struct encode_state *encode_state,
2693 struct i965_gpe_context *gpe_context,
2694 struct intel_encoder_context *encoder_context,
2697 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2698 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2700 i965_add_buffer_gpe_surface(ctx,
2702 &avc_ctx->res_brc_history_buffer,
2704 avc_ctx->res_brc_history_buffer.size,
2706 GEN9_AVC_BRC_INIT_RESET_HISTORY_INDEX);
2708 i965_add_buffer_2d_gpe_surface(ctx,
2710 &avc_ctx->res_brc_dist_data_surface,
2712 I965_SURFACEFORMAT_R8_UNORM,
2713 GEN9_AVC_BRC_INIT_RESET_DISTORTION_INDEX);
2719 gen9_avc_kernel_brc_init_reset(VADriverContextP ctx,
2720 struct encode_state *encode_state,
2721 struct intel_encoder_context *encoder_context)
2723 struct i965_driver_data *i965 = i965_driver_data(ctx);
2724 struct i965_gpe_table *gpe = &i965->gpe_table;
2725 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2726 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2727 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2728 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
2730 struct i965_gpe_context *gpe_context;
2731 struct gpe_media_object_parameter media_object_param;
2732 struct gpe_media_object_inline_data media_object_inline_data;
2733 int media_function = 0;
2734 int kernel_idx = GEN9_AVC_KERNEL_BRC_INIT;
2736 media_function = INTEL_MEDIA_STATE_BRC_INIT_RESET;
2738 if (generic_state->brc_inited)
2739 kernel_idx = GEN9_AVC_KERNEL_BRC_RESET;
2741 gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
2743 gpe->context_init(ctx, gpe_context);
2744 gpe->reset_binding_table(ctx, gpe_context);
2746 generic_ctx->pfn_set_curbe_brc_init_reset(ctx, encode_state, gpe_context, encoder_context, NULL);
2748 generic_ctx->pfn_send_brc_init_reset_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
2750 gpe->setup_interface_data(ctx, gpe_context);
2752 memset(&media_object_param, 0, sizeof(media_object_param));
2753 memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
2754 media_object_param.pinline_data = &media_object_inline_data;
2755 media_object_param.inline_size = sizeof(media_object_inline_data);
2757 gen9_avc_run_kernel_media_object(ctx, encoder_context,
2760 &media_object_param);
2762 return VA_STATUS_SUCCESS;
2766 gen9_avc_set_curbe_brc_frame_update(VADriverContextP ctx,
2767 struct encode_state *encode_state,
2768 struct i965_gpe_context *gpe_context,
2769 struct intel_encoder_context *encoder_context,
2772 gen9_avc_frame_brc_update_curbe_data *cmd;
2773 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2774 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
2775 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2776 struct object_surface *obj_surface;
2777 struct gen9_surface_avc *avc_priv_surface;
2778 struct avc_param common_param;
2779 VAEncSequenceParameterBufferH264 * seq_param = avc_state->seq_param;
2781 obj_surface = encode_state->reconstructed_object;
2783 if (!obj_surface || !obj_surface->private_data)
2785 avc_priv_surface = obj_surface->private_data;
2787 cmd = i965_gpe_context_map_curbe(gpe_context);
2792 memcpy(cmd, &gen9_avc_frame_brc_update_curbe_init_data, sizeof(gen9_avc_frame_brc_update_curbe_data));
2794 cmd->dw5.target_size_flag = 0 ;
2795 if (generic_state->brc_init_current_target_buf_full_in_bits > (double)generic_state->brc_init_reset_buf_size_in_bits) {
2797 generic_state->brc_init_current_target_buf_full_in_bits -= (double)generic_state->brc_init_reset_buf_size_in_bits;
2798 cmd->dw5.target_size_flag = 1 ;
2801 if (generic_state->skip_frame_enbale) {
2802 cmd->dw6.num_skip_frames = generic_state->num_skip_frames ;
2803 cmd->dw7.size_skip_frames = generic_state->size_skip_frames;
2805 generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame * generic_state->num_skip_frames;
2808 cmd->dw0.target_size = (unsigned int)generic_state->brc_init_current_target_buf_full_in_bits ;
2809 cmd->dw1.frame_number = generic_state->seq_frame_number ;
2810 cmd->dw2.size_of_pic_headers = generic_state->herder_bytes_inserted << 3 ;
2811 cmd->dw5.cur_frame_type = generic_state->frame_type ;
2812 cmd->dw5.brc_flag = 0 ;
2813 cmd->dw5.brc_flag |= (avc_priv_surface->is_as_ref) ? INTEL_ENCODE_BRCUPDATE_IS_REFERENCE : 0 ;
2815 if (avc_state->multi_pre_enable) {
2816 cmd->dw5.brc_flag |= INTEL_ENCODE_BRCUPDATE_IS_ACTUALQP ;
2817 cmd->dw14.qp_index_of_cur_pic = avc_priv_surface->frame_idx ; //do not know this. use -1
2820 cmd->dw5.max_num_paks = generic_state->num_pak_passes ;
2821 if (avc_state->min_max_qp_enable) {
2822 switch (generic_state->frame_type) {
2824 cmd->dw6.minimum_qp = avc_state->min_qp_i ;
2825 cmd->dw6.maximum_qp = avc_state->max_qp_i ;
2828 cmd->dw6.minimum_qp = avc_state->min_qp_p ;
2829 cmd->dw6.maximum_qp = avc_state->max_qp_p ;
2832 cmd->dw6.minimum_qp = avc_state->min_qp_b ;
2833 cmd->dw6.maximum_qp = avc_state->max_qp_b ;
2837 cmd->dw6.minimum_qp = 0 ;
2838 cmd->dw6.maximum_qp = 0 ;
2840 cmd->dw6.enable_force_skip = avc_state->enable_force_skip ;
2841 cmd->dw6.enable_sliding_window = 0 ;
2843 generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame;
2845 if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
2846 cmd->dw3.start_gadj_frame0 = (unsigned int)((10 * generic_state->avbr_convergence) / (double)150);
2847 cmd->dw3.start_gadj_frame1 = (unsigned int)((50 * generic_state->avbr_convergence) / (double)150);
2848 cmd->dw4.start_gadj_frame2 = (unsigned int)((100 * generic_state->avbr_convergence) / (double)150);
2849 cmd->dw4.start_gadj_frame3 = (unsigned int)((150 * generic_state->avbr_convergence) / (double)150);
2850 cmd->dw11.g_rate_ratio_threshold_0 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 40)));
2851 cmd->dw11.g_rate_ratio_threshold_1 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 75)));
2852 cmd->dw12.g_rate_ratio_threshold_2 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 97)));
2853 cmd->dw12.g_rate_ratio_threshold_3 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (103 - 100)));
2854 cmd->dw12.g_rate_ratio_threshold_4 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (125 - 100)));
2855 cmd->dw12.g_rate_ratio_threshold_5 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (160 - 100)));
2858 cmd->dw15.enable_roi = generic_state->brc_roi_enable ;
2860 memset(&common_param, 0, sizeof(common_param));
2861 common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
2862 common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
2863 common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
2864 common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
2865 common_param.frames_per_100s = generic_state->frames_per_100s;
2866 common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
2867 common_param.target_bit_rate = generic_state->target_bit_rate;
2869 cmd->dw19.user_max_frame = i965_avc_get_profile_level_max_frame(&common_param, seq_param->level_idc);
2870 i965_gpe_context_unmap_curbe(gpe_context);
2876 gen9_avc_send_surface_brc_frame_update(VADriverContextP ctx,
2877 struct encode_state *encode_state,
2878 struct i965_gpe_context *gpe_context,
2879 struct intel_encoder_context *encoder_context,
2882 struct i965_driver_data *i965 = i965_driver_data(ctx);
2883 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
2884 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
2885 struct brc_param * param = (struct brc_param *)param_brc ;
2886 struct i965_gpe_context * gpe_context_mbenc = param->gpe_context_mbenc;
2887 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
2888 unsigned char is_g95 = 0;
2890 if (IS_SKL(i965->intel.device_info) ||
2891 IS_BXT(i965->intel.device_info) ||
2892 IS_GEN8(i965->intel.device_info))
2894 else if (IS_KBL(i965->intel.device_info) ||
2895 IS_GEN10(i965->intel.device_info) ||
2896 IS_GLK(i965->intel.device_info))
2899 /* brc history buffer*/
2900 i965_add_buffer_gpe_surface(ctx,
2902 &avc_ctx->res_brc_history_buffer,
2904 avc_ctx->res_brc_history_buffer.size,
2906 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_HISTORY_INDEX));
2908 /* previous pak buffer*/
2909 i965_add_buffer_gpe_surface(ctx,
2911 &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
2913 avc_ctx->res_brc_pre_pak_statistics_output_buffer.size,
2915 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_PAK_STATISTICS_OUTPUT_INDEX));
2917 /* image state command buffer read only*/
2918 i965_add_buffer_gpe_surface(ctx,
2920 &avc_ctx->res_brc_image_state_read_buffer,
2922 avc_ctx->res_brc_image_state_read_buffer.size,
2924 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_READ_INDEX));
2926 /* image state command buffer write only*/
2927 i965_add_buffer_gpe_surface(ctx,
2929 &avc_ctx->res_brc_image_state_write_buffer,
2931 avc_ctx->res_brc_image_state_write_buffer.size,
2933 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_IMAGE_STATE_WRITE_INDEX));
2935 if (avc_state->mbenc_brc_buffer_size > 0) {
2936 i965_add_buffer_gpe_surface(ctx,
2938 &(avc_ctx->res_mbenc_brc_buffer),
2940 avc_ctx->res_mbenc_brc_buffer.size,
2942 GEN95_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2944 /* Mbenc curbe input buffer */
2945 gen9_add_dri_buffer_gpe_surface(ctx,
2947 gpe_context_mbenc->dynamic_state.bo,
2949 ALIGN(gpe_context_mbenc->curbe.length, 64),
2950 gpe_context_mbenc->curbe.offset,
2951 GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_READ_INDEX);
2952 /* Mbenc curbe output buffer */
2953 gen9_add_dri_buffer_gpe_surface(ctx,
2955 gpe_context_mbenc->dynamic_state.bo,
2957 ALIGN(gpe_context_mbenc->curbe.length, 64),
2958 gpe_context_mbenc->curbe.offset,
2959 GEN9_AVC_FRAME_BRC_UPDATE_MBENC_CURBE_WRITE_INDEX);
2962 /* AVC_ME Distortion 2D surface buffer,input/output. is it res_brc_dist_data_surface*/
2963 i965_add_buffer_2d_gpe_surface(ctx,
2965 &avc_ctx->res_brc_dist_data_surface,
2967 I965_SURFACEFORMAT_R8_UNORM,
2968 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_DISTORTION_INDEX));
2970 /* BRC const data 2D surface buffer */
2971 i965_add_buffer_2d_gpe_surface(ctx,
2973 &avc_ctx->res_brc_const_data_buffer,
2975 I965_SURFACEFORMAT_R8_UNORM,
2976 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_CONSTANT_DATA_INDEX));
2978 /* MB statistical data surface*/
2979 if (!IS_GEN8(i965->intel.device_info)) {
2980 i965_add_buffer_gpe_surface(ctx,
2982 &avc_ctx->res_mb_status_buffer,
2984 avc_ctx->res_mb_status_buffer.size,
2986 (is_g95 ? GEN95_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX : GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX));
2988 i965_add_buffer_2d_gpe_surface(ctx,
2990 &avc_ctx->res_mbbrc_mb_qp_data_surface,
2992 I965_SURFACEFORMAT_R8_UNORM,
2993 GEN9_AVC_FRAME_BRC_UPDATE_MB_STATUS_INDEX);
2999 gen9_avc_kernel_brc_frame_update(VADriverContextP ctx,
3000 struct encode_state *encode_state,
3001 struct intel_encoder_context *encoder_context)
3004 struct i965_driver_data *i965 = i965_driver_data(ctx);
3005 struct i965_gpe_table *gpe = &i965->gpe_table;
3006 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3007 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
3008 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3009 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3010 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3012 struct i965_gpe_context *gpe_context = NULL;
3013 struct gpe_media_object_parameter media_object_param;
3014 struct gpe_media_object_inline_data media_object_inline_data;
3015 int media_function = 0;
3017 unsigned int mb_const_data_buffer_in_use, mb_qp_buffer_in_use;
3018 unsigned int brc_enabled = 0;
3019 unsigned int roi_enable = (generic_state->num_roi > 0) ? 1 : 0;
3020 unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
3022 /* the following set the mbenc curbe*/
3023 struct mbenc_param curbe_mbenc_param ;
3024 struct brc_param curbe_brc_param ;
3026 mb_const_data_buffer_in_use =
3027 generic_state->mb_brc_enabled ||
3030 avc_state->mb_qp_data_enable ||
3031 avc_state->rolling_intra_refresh_enable;
3032 mb_qp_buffer_in_use =
3033 generic_state->mb_brc_enabled ||
3034 generic_state->brc_roi_enable ||
3035 avc_state->mb_qp_data_enable;
3037 switch (generic_state->kernel_mode) {
3038 case INTEL_ENC_KERNEL_NORMAL : {
3039 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
3042 case INTEL_ENC_KERNEL_PERFORMANCE : {
3043 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
3046 case INTEL_ENC_KERNEL_QUALITY : {
3047 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
3055 if (generic_state->frame_type == SLICE_TYPE_P) {
3057 } else if (generic_state->frame_type == SLICE_TYPE_B) {
3061 gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
3062 gpe->context_init(ctx, gpe_context);
3064 memset(&curbe_mbenc_param, 0, sizeof(struct mbenc_param));
3066 curbe_mbenc_param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
3067 curbe_mbenc_param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
3068 curbe_mbenc_param.mbenc_i_frame_dist_in_use = 0;
3069 curbe_mbenc_param.brc_enabled = brc_enabled;
3070 curbe_mbenc_param.roi_enabled = roi_enable;
3072 /* set curbe mbenc*/
3073 generic_ctx->pfn_set_curbe_mbenc(ctx, encode_state, gpe_context, encoder_context, &curbe_mbenc_param);
3075 // gen95 set curbe out of the brc. gen9 do it here
3076 avc_state->mbenc_curbe_set_in_brc_update = !avc_state->decouple_mbenc_curbe_from_brc_enable;
3077 /*begin brc frame update*/
3078 memset(&curbe_brc_param, 0, sizeof(struct brc_param));
3079 curbe_brc_param.gpe_context_mbenc = gpe_context;
3080 media_function = INTEL_MEDIA_STATE_BRC_UPDATE;
3081 kernel_idx = GEN9_AVC_KERNEL_BRC_FRAME_UPDATE;
3082 gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
3083 curbe_brc_param.gpe_context_brc_frame_update = gpe_context;
3085 gpe->context_init(ctx, gpe_context);
3086 gpe->reset_binding_table(ctx, gpe_context);
3087 /*brc copy ignored*/
3089 /* set curbe frame update*/
3090 generic_ctx->pfn_set_curbe_brc_frame_update(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
3092 /* load brc constant data, is it same as mbenc mb brc constant data? no.*/
3093 if (avc_state->multi_pre_enable) {
3094 gen9_avc_init_brc_const_data(ctx, encode_state, encoder_context);
3096 gen9_avc_init_brc_const_data_old(ctx, encode_state, encoder_context);
3098 /* image state construct*/
3099 if (IS_GEN8(i965->intel.device_info)) {
3100 gen8_avc_set_image_state(ctx, encode_state, encoder_context, &(avc_ctx->res_brc_image_state_read_buffer));
3102 gen9_avc_set_image_state(ctx, encode_state, encoder_context, &(avc_ctx->res_brc_image_state_read_buffer));
3104 /* set surface frame mbenc*/
3105 generic_ctx->pfn_send_brc_frame_update_surface(ctx, encode_state, gpe_context, encoder_context, &curbe_brc_param);
3108 gpe->setup_interface_data(ctx, gpe_context);
3110 memset(&media_object_param, 0, sizeof(media_object_param));
3111 memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
3112 media_object_param.pinline_data = &media_object_inline_data;
3113 media_object_param.inline_size = sizeof(media_object_inline_data);
3115 gen9_avc_run_kernel_media_object(ctx, encoder_context,
3118 &media_object_param);
3120 return VA_STATUS_SUCCESS;
3124 gen9_avc_set_curbe_brc_mb_update(VADriverContextP ctx,
3125 struct encode_state *encode_state,
3126 struct i965_gpe_context *gpe_context,
3127 struct intel_encoder_context *encoder_context,
3130 gen9_avc_mb_brc_curbe_data *cmd;
3131 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3132 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3134 cmd = i965_gpe_context_map_curbe(gpe_context);
3139 memset(cmd, 0, sizeof(gen9_avc_mb_brc_curbe_data));
3141 cmd->dw0.cur_frame_type = generic_state->frame_type;
3142 if (generic_state->brc_roi_enable) {
3143 cmd->dw0.enable_roi = 1;
3145 cmd->dw0.enable_roi = 0;
3148 i965_gpe_context_unmap_curbe(gpe_context);
3154 gen9_avc_send_surface_brc_mb_update(VADriverContextP ctx,
3155 struct encode_state *encode_state,
3156 struct i965_gpe_context *gpe_context,
3157 struct intel_encoder_context *encoder_context,
3160 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3161 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3162 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3164 /* brc history buffer*/
3165 i965_add_buffer_gpe_surface(ctx,
3167 &avc_ctx->res_brc_history_buffer,
3169 avc_ctx->res_brc_history_buffer.size,
3171 GEN9_AVC_MB_BRC_UPDATE_HISTORY_INDEX);
3173 /* MB qp data buffer is it same as res_mbbrc_mb_qp_data_surface*/
3174 if (generic_state->mb_brc_enabled) {
3175 i965_add_buffer_2d_gpe_surface(ctx,
3177 &avc_ctx->res_mbbrc_mb_qp_data_surface,
3179 I965_SURFACEFORMAT_R8_UNORM,
3180 GEN9_AVC_MB_BRC_UPDATE_MB_QP_INDEX);
3184 /* BRC roi feature*/
3185 if (generic_state->brc_roi_enable) {
3186 i965_add_buffer_gpe_surface(ctx,
3188 &avc_ctx->res_mbbrc_roi_surface,
3190 avc_ctx->res_mbbrc_roi_surface.size,
3192 GEN9_AVC_MB_BRC_UPDATE_ROI_INDEX);
3196 /* MB statistical data surface*/
3197 i965_add_buffer_gpe_surface(ctx,
3199 &avc_ctx->res_mb_status_buffer,
3201 avc_ctx->res_mb_status_buffer.size,
3203 GEN9_AVC_MB_BRC_UPDATE_MB_STATUS_INDEX);
3209 gen9_avc_kernel_brc_mb_update(VADriverContextP ctx,
3210 struct encode_state *encode_state,
3211 struct intel_encoder_context *encoder_context)
3214 struct i965_driver_data *i965 = i965_driver_data(ctx);
3215 struct i965_gpe_table *gpe = &i965->gpe_table;
3216 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3217 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3218 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3219 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
3221 struct i965_gpe_context *gpe_context;
3222 struct gpe_media_object_walker_parameter media_object_walker_param;
3223 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
3224 int media_function = 0;
3227 media_function = INTEL_MEDIA_STATE_MB_BRC_UPDATE;
3228 kernel_idx = GEN9_AVC_KERNEL_BRC_MB_UPDATE;
3229 gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
3231 gpe->context_init(ctx, gpe_context);
3232 gpe->reset_binding_table(ctx, gpe_context);
3234 /* set curbe brc mb update*/
3235 generic_ctx->pfn_set_curbe_brc_mb_update(ctx, encode_state, gpe_context, encoder_context, NULL);
3238 /* set surface brc mb update*/
3239 generic_ctx->pfn_send_brc_mb_update_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
3242 gpe->setup_interface_data(ctx, gpe_context);
3244 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
3245 /* the scaling is based on 8x8 blk level */
3246 kernel_walker_param.resolution_x = (generic_state->frame_width_in_mbs + 1) / 2;
3247 kernel_walker_param.resolution_y = (generic_state->frame_height_in_mbs + 1) / 2 ;
3248 kernel_walker_param.no_dependency = 1;
3250 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
3252 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
3255 &media_object_walker_param);
3257 return VA_STATUS_SUCCESS;
3261 mbenc kernel related function,it include intra dist kernel
3264 gen9_avc_get_biweight(int dist_scale_factor_ref_id0_list0, unsigned short weighted_bipredidc)
3266 int biweight = 32; // default value
3268 /* based on kernel HLD*/
3269 if (weighted_bipredidc != INTEL_AVC_WP_MODE_IMPLICIT) {
3272 biweight = (dist_scale_factor_ref_id0_list0 + 2) >> 2;
3274 if (biweight != 16 && biweight != 21 &&
3275 biweight != 32 && biweight != 43 && biweight != 48) {
3276 biweight = 32; // If # of B-pics between two refs is more than 3. VME does not support it.
3284 gen9_avc_get_dist_scale_factor(VADriverContextP ctx,
3285 struct encode_state *encode_state,
3286 struct intel_encoder_context *encoder_context)
3288 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3289 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3290 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3291 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
3293 int max_num_references;
3294 VAPictureH264 *curr_pic;
3295 VAPictureH264 *ref_pic_l0;
3296 VAPictureH264 *ref_pic_l1;
3305 max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
3307 memset(avc_state->dist_scale_factor_list0, 0, 32 * sizeof(unsigned int));
3308 curr_pic = &pic_param->CurrPic;
3309 for (i = 0; i < max_num_references; i++) {
3310 ref_pic_l0 = &(slice_param->RefPicList0[i]);
3312 if ((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
3313 (ref_pic_l0->picture_id == VA_INVALID_SURFACE))
3315 ref_pic_l1 = &(slice_param->RefPicList1[0]);
3316 if ((ref_pic_l0->flags & VA_PICTURE_H264_INVALID) ||
3317 (ref_pic_l0->picture_id == VA_INVALID_SURFACE))
3320 poc0 = (curr_pic->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
3321 poc1 = (ref_pic_l1->TopFieldOrderCnt - ref_pic_l0->TopFieldOrderCnt);
3322 CLIP(poc0, -128, 127);
3323 CLIP(poc1, -128, 127);
3330 tmp = (td / 2 > 0) ? (td / 2) : (-(td / 2));
3331 tx = (16384 + tmp) / td ;
3332 tmp = (tb * tx + 32) >> 6;
3333 CLIP(tmp, -1024, 1023);
3334 avc_state->dist_scale_factor_list0[i] = tmp;
3340 gen9_avc_get_qp_from_ref_list(VADriverContextP ctx,
3341 VAEncSliceParameterBufferH264 *slice_param,
3345 struct i965_driver_data *i965 = i965_driver_data(ctx);
3346 struct object_surface *obj_surface;
3347 struct gen9_surface_avc *avc_priv_surface;
3348 VASurfaceID surface_id;
3350 assert(slice_param);
3354 if (ref_frame_idx < slice_param->num_ref_idx_l0_active_minus1 + 1)
3355 surface_id = slice_param->RefPicList0[ref_frame_idx].picture_id;
3359 if (ref_frame_idx < slice_param->num_ref_idx_l1_active_minus1 + 1)
3360 surface_id = slice_param->RefPicList1[ref_frame_idx].picture_id;
3364 obj_surface = SURFACE(surface_id);
3365 if (obj_surface && obj_surface->private_data) {
3366 avc_priv_surface = obj_surface->private_data;
3367 return avc_priv_surface->qp_value;
3374 gen9_avc_load_mb_brc_const_data(VADriverContextP ctx,
3375 struct encode_state *encode_state,
3376 struct intel_encoder_context *encoder_context)
3378 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3379 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
3380 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3381 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3383 struct i965_gpe_resource *gpe_resource = NULL;
3384 unsigned int * data = NULL;
3385 unsigned int * data_tmp = NULL;
3386 unsigned int size = 16 * 52;
3387 unsigned int table_idx = 0;
3388 unsigned int block_based_skip_enable = avc_state->block_based_skip_enable;
3389 unsigned int transform_8x8_mode_flag = avc_state->transform_8x8_mode_enable;
3392 gpe_resource = &(avc_ctx->res_mbbrc_const_data_buffer);
3393 assert(gpe_resource);
3394 data = i965_map_gpe_resource(gpe_resource);
3397 table_idx = slice_type_kernel[generic_state->frame_type];
3399 memcpy(data, gen9_avc_mb_brc_const_data[table_idx][0], size * sizeof(unsigned int));
3403 switch (generic_state->frame_type) {
3405 for (i = 0; i < AVC_QP_MAX ; i++) {
3406 if (avc_state->old_mode_cost_enable)
3407 *data = (unsigned int)gen9_avc_old_intra_mode_cost[i];
3413 for (i = 0; i < AVC_QP_MAX ; i++) {
3414 if (generic_state->frame_type == SLICE_TYPE_P) {
3415 if (avc_state->skip_bias_adjustment_enable)
3416 *(data + 3) = (unsigned int)gen9_avc_mv_cost_p_skip_adjustment[i];
3418 if (avc_state->non_ftq_skip_threshold_lut_input_enable) {
3419 *(data + 9) = (unsigned int)i965_avc_calc_skip_value(block_based_skip_enable, transform_8x8_mode_flag, avc_state->non_ftq_skip_threshold_lut[i]);
3420 } else if (generic_state->frame_type == SLICE_TYPE_P) {
3421 *(data + 9) = (unsigned int)gen9_avc_skip_value_p[block_based_skip_enable][transform_8x8_mode_flag][i];
3423 *(data + 9) = (unsigned int)gen9_avc_skip_value_b[block_based_skip_enable][transform_8x8_mode_flag][i];
3426 if (avc_state->adaptive_intra_scaling_enable) {
3427 *(data + 10) = (unsigned int)gen9_avc_adaptive_intra_scaling_factor[i];
3429 *(data + 10) = (unsigned int)gen9_avc_intra_scaling_factor[i];
3441 for (i = 0; i < AVC_QP_MAX ; i++) {
3442 if (avc_state->ftq_skip_threshold_lut_input_enable) {
3443 *(data + 6) = (avc_state->ftq_skip_threshold_lut[i] |
3444 (avc_state->ftq_skip_threshold_lut[i] << 16) |
3445 (avc_state->ftq_skip_threshold_lut[i] << 24));
3446 *(data + 7) = (avc_state->ftq_skip_threshold_lut[i] |
3447 (avc_state->ftq_skip_threshold_lut[i] << 8) |
3448 (avc_state->ftq_skip_threshold_lut[i] << 16) |
3449 (avc_state->ftq_skip_threshold_lut[i] << 24));
3452 if (avc_state->kernel_trellis_enable) {
3453 *(data + 11) = (unsigned int)avc_state->lamda_value_lut[i][0];
3454 *(data + 12) = (unsigned int)avc_state->lamda_value_lut[i][1];
3460 i965_unmap_gpe_resource(gpe_resource);
3464 gen9_avc_set_curbe_mbenc(VADriverContextP ctx,
3465 struct encode_state *encode_state,
3466 struct i965_gpe_context *gpe_context,
3467 struct intel_encoder_context *encoder_context,
3470 struct i965_driver_data *i965 = i965_driver_data(ctx);
3472 gen9_avc_mbenc_curbe_data *g9;
3473 gen95_avc_mbenc_curbe_data *g95;
3475 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
3476 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
3477 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
3479 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
3480 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
3481 VASurfaceID surface_id;
3482 struct object_surface *obj_surface;
3484 struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
3485 unsigned char qp = 0;
3486 unsigned char me_method = 0;
3487 unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
3488 unsigned int table_idx = 0;
3489 unsigned char is_g9 = 0;
3490 unsigned char is_g95 = 0;
3491 unsigned int curbe_size = 0;
3493 unsigned int preset = generic_state->preset;
3494 if (IS_SKL(i965->intel.device_info) ||
3495 IS_BXT(i965->intel.device_info)) {
3496 cmd.g9 = (gen9_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3500 curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
3501 memset(cmd.g9, 0, curbe_size);
3503 if (mbenc_i_frame_dist_in_use) {
3504 memcpy(cmd.g9, gen9_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
3507 switch (generic_state->frame_type) {
3509 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
3512 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
3515 memcpy(cmd.g9, gen9_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
3522 } else if (IS_KBL(i965->intel.device_info) ||
3523 IS_GEN10(i965->intel.device_info) ||
3524 IS_GLK(i965->intel.device_info)) {
3525 cmd.g95 = (gen95_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
3529 curbe_size = sizeof(gen95_avc_mbenc_curbe_data);
3530 memset(cmd.g9, 0, curbe_size);
3532 if (mbenc_i_frame_dist_in_use) {
3533 memcpy(cmd.g95, gen95_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
3536 switch (generic_state->frame_type) {
3538 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
3541 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
3544 memcpy(cmd.g95, gen95_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
3552 /* Never get here, just silence a gcc warning */
3558 me_method = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_b_me_method[preset] : gen9_avc_p_me_method[preset];
3559 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
3561 cmd.g9->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3562 cmd.g9->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
3563 cmd.g9->dw0.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3564 cmd.g9->dw37.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
3566 cmd.g9->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
3567 cmd.g9->dw38.max_len_sp = 0;
3570 cmd.g95->dw1.extended_mv_cost_range = avc_state->extended_mv_cost_range_enable;
3572 cmd.g9->dw3.src_access = 0;
3573 cmd.g9->dw3.ref_access = 0;
3575 if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
3576 //disable ftq_override by now.
3577 if (avc_state->ftq_override) {
3578 cmd.g9->dw3.ftq_enable = avc_state->ftq_enable;
3581 // both gen9 and gen95 come here by now
3582 if (generic_state->frame_type == SLICE_TYPE_P) {
3583 cmd.g9->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
3586 cmd.g9->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
3590 cmd.g9->dw3.ftq_enable = 0;
3593 if (avc_state->disable_sub_mb_partion)
3594 cmd.g9->dw3.sub_mb_part_mask = 0x7;
3596 if (mbenc_i_frame_dist_in_use) {
3597 cmd.g9->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
3598 cmd.g9->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
3599 cmd.g9->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
3600 cmd.g9->dw6.batch_buffer_end = 0;
3601 cmd.g9->dw31.intra_compute_type = 1;
3604 cmd.g9->dw2.pitch_width = generic_state->frame_width_in_mbs;
3605 cmd.g9->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
3606 cmd.g9->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ? generic_state->frame_height_in_mbs : avc_state->slice_height;
3609 memcpy(&(cmd.g9->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
3610 if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
3611 } else if (avc_state->skip_bias_adjustment_enable) {
3612 /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
3613 // No need to check for P picture as the flag is only enabled for P picture */
3614 cmd.g9->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
3619 table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
3620 memcpy(&(cmd.g9->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
3622 cmd.g9->dw4.enable_fbr_bypass = avc_state->fbr_bypass_enable;
3623 cmd.g9->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
3624 cmd.g9->dw4.field_parity_flag = 0;//bottom field
3625 cmd.g9->dw4.enable_cur_fld_idr = 0;//field realted
3626 cmd.g9->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
3627 cmd.g9->dw4.hme_enable = generic_state->hme_enabled;
3628 cmd.g9->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
3629 cmd.g9->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
3632 cmd.g9->dw7.intra_part_mask = avc_state->transform_8x8_mode_enable ? 0 : 0x02;
3633 cmd.g9->dw7.src_field_polarity = 0;//field related
3635 /*ftq_skip_threshold_lut set,dw14 /15*/
3637 /*r5 disable NonFTQSkipThresholdLUT*/
3638 if (generic_state->frame_type == SLICE_TYPE_P) {
3639 cmd.g9->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3641 } else if (generic_state->frame_type == SLICE_TYPE_B) {
3642 cmd.g9->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
3646 cmd.g9->dw13.qp_prime_y = qp;
3647 cmd.g9->dw13.qp_prime_cb = qp;
3648 cmd.g9->dw13.qp_prime_cr = qp;
3649 cmd.g9->dw13.target_size_in_word = 0xff;//hardcode for brc disable
3651 if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
3652 switch (gen9_avc_multi_pred[preset]) {
3654 cmd.g9->dw32.mult_pred_l0_disable = 128;
3655 cmd.g9->dw32.mult_pred_l1_disable = 128;
3658 cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P) ? 1 : 128;
3659 cmd.g9->dw32.mult_pred_l1_disable = 128;
3662 cmd.g9->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3663 cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3666 cmd.g9->dw32.mult_pred_l0_disable = 1;
3667 cmd.g9->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
3673 cmd.g9->dw32.mult_pred_l0_disable = 128;
3674 cmd.g9->dw32.mult_pred_l1_disable = 128;
3677 /*field setting for dw33 34, ignored*/
3679 if (avc_state->adaptive_transform_decision_enable) {
3680 if (generic_state->frame_type != SLICE_TYPE_I) {
3682 cmd.g9->dw34.enable_adaptive_tx_decision = 1;
3683 cmd.g9->dw58.mb_texture_threshold = 1024;
3684 cmd.g9->dw58.tx_decision_threshold = 128;
3685 } else if (is_g95) {
3686 cmd.g95->dw34.enable_adaptive_tx_decision = 1;
3687 cmd.g95->dw60.mb_texture_threshold = 1024;
3688 cmd.g95->dw60.tx_decision_threshold = 128;
3694 if (generic_state->frame_type == SLICE_TYPE_B) {
3695 cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
3696 cmd.g9->dw34.list1_ref_id0_frm_field_parity = 0;
3697 cmd.g9->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
3700 cmd.g9->dw34.b_original_bff = 0; //frame only
3701 cmd.g9->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
3702 cmd.g9->dw34.roi_enable_flag = curbe_param->roi_enabled;
3703 cmd.g9->dw34.mad_enable_falg = avc_state->mad_enable;
3704 cmd.g9->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
3705 cmd.g9->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
3707 cmd.g95->dw34.tq_enable = avc_state->tq_enable;
3708 cmd.g95->dw34.cqp_flag = !generic_state->brc_enabled;
3712 cmd.g9->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
3714 if (cmd.g9->dw34.force_non_skip_check) {
3715 cmd.g9->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
3720 cmd.g9->dw36.check_all_fractional_enable = avc_state->caf_enable;
3721 cmd.g9->dw38.ref_threshold = 400;
3722 cmd.g9->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
3724 /* Default:2 used for MBBRC (MB QP Surface width and height are 4x downscaled picture in MB unit * 4 bytes)
3725 0 used for MBQP data surface (MB QP Surface width and height are same as the input picture size in MB unit * 1bytes)
3726 starting GEN9, BRC use split kernel, MB QP surface is same size as input picture */
3727 cmd.g9->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled) ? 0 : 2;
3729 if (mbenc_i_frame_dist_in_use) {
3730 cmd.g9->dw13.qp_prime_y = 0;
3731 cmd.g9->dw13.qp_prime_cb = 0;
3732 cmd.g9->dw13.qp_prime_cr = 0;
3733 cmd.g9->dw33.intra_16x16_nondc_penalty = 0;
3734 cmd.g9->dw33.intra_8x8_nondc_penalty = 0;
3735 cmd.g9->dw33.intra_4x4_nondc_penalty = 0;
3738 if (cmd.g9->dw4.use_actual_ref_qp_value) {
3739 cmd.g9->dw44.actual_qp_value_for_ref_id0_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
3740 cmd.g9->dw44.actual_qp_value_for_ref_id1_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
3741 cmd.g9->dw44.actual_qp_value_for_ref_id2_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
3742 cmd.g9->dw44.actual_qp_value_for_ref_id3_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
3743 cmd.g9->dw45.actual_qp_value_for_ref_id4_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
3744 cmd.g9->dw45.actual_qp_value_for_ref_id5_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
3745 cmd.g9->dw45.actual_qp_value_for_ref_id6_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
3746 cmd.g9->dw45.actual_qp_value_for_ref_id7_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
3747 cmd.g9->dw46.actual_qp_value_for_ref_id0_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
3748 cmd.g9->dw46.actual_qp_value_for_ref_id1_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
3751 table_idx = slice_type_kernel[generic_state->frame_type];
3752 cmd.g9->dw46.ref_cost = gen9_avc_ref_cost[table_idx][qp];
3754 if (generic_state->frame_type == SLICE_TYPE_I) {
3755 cmd.g9->dw0.skip_mode_enable = 0;
3756 cmd.g9->dw37.skip_mode_enable = 0;
3757 cmd.g9->dw36.hme_combine_overlap = 0;
3758 cmd.g9->dw47.intra_cost_sf = 16;
3759 cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3761 cmd.g9->dw34.enable_global_motion_bias_adjustment = 0;
3763 } else if (generic_state->frame_type == SLICE_TYPE_P) {
3764 cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
3765 cmd.g9->dw3.bme_disable_fbr = 1;
3766 cmd.g9->dw5.ref_width = gen9_avc_search_x[preset];
3767 cmd.g9->dw5.ref_height = gen9_avc_search_y[preset];
3768 cmd.g9->dw7.non_skip_zmv_added = 1;
3769 cmd.g9->dw7.non_skip_mode_added = 1;
3770 cmd.g9->dw7.skip_center_mask = 1;
3771 cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
3772 cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame onlys
3773 cmd.g9->dw36.hme_combine_overlap = 1;
3774 cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
3775 cmd.g9->dw39.ref_width = gen9_avc_search_x[preset];
3776 cmd.g9->dw39.ref_height = gen9_avc_search_y[preset];
3777 cmd.g9->dw34.enable_direct_bias_adjustment = 0;
3778 cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3779 if (is_g9 && avc_state->global_motion_bias_adjustment_enable)
3780 cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3783 cmd.g9->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
3784 cmd.g9->dw1.bi_weight = avc_state->bi_weight;
3785 cmd.g9->dw3.search_ctrl = 7;
3786 cmd.g9->dw3.skip_type = 1;
3787 cmd.g9->dw5.ref_width = gen9_avc_b_search_x[preset];
3788 cmd.g9->dw5.ref_height = gen9_avc_b_search_y[preset];
3789 cmd.g9->dw7.skip_center_mask = 0xff;
3790 cmd.g9->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
3791 cmd.g9->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
3792 cmd.g9->dw36.hme_combine_overlap = 1;
3793 surface_id = slice_param->RefPicList1[0].picture_id;
3794 obj_surface = SURFACE(surface_id);
3796 WARN_ONCE("Invalid backward reference frame\n");
3799 cmd.g9->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
3801 cmd.g9->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
3802 cmd.g9->dw36.num_ref_idx_l1_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1 : 0;
3803 cmd.g9->dw39.ref_width = gen9_avc_b_search_x[preset];
3804 cmd.g9->dw39.ref_height = gen9_avc_b_search_y[preset];
3805 cmd.g9->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
3806 cmd.g9->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
3807 cmd.g9->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
3808 cmd.g9->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
3809 cmd.g9->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
3810 cmd.g9->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
3811 cmd.g9->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
3812 cmd.g9->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
3814 cmd.g9->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
3815 if (cmd.g9->dw34.enable_direct_bias_adjustment) {
3816 cmd.g9->dw7.non_skip_zmv_added = 1;
3817 cmd.g9->dw7.non_skip_mode_added = 1;
3820 cmd.g9->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
3821 if (is_g9 && avc_state->global_motion_bias_adjustment_enable)
3822 cmd.g9->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
3826 avc_state->block_based_skip_enable = cmd.g9->dw3.block_based_skip_enable;
3828 if (avc_state->rolling_intra_refresh_enable) {
3829 /*by now disable it*/
3830 cmd.g9->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3831 cmd.g9->dw32.mult_pred_l0_disable = 128;
3832 /* Pass the same IntraRefreshUnit to the kernel w/o the adjustment by -1, so as to have an overlap of one MB row or column of Intra macroblocks
3833 across one P frame to another P frame, as needed by the RollingI algo */
3835 cmd.g9->dw48.widi_intra_refresh_mb_num = 0;
3836 cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3837 cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3841 if (avc_state->rolling_intra_refresh_enable == INTEL_ROLLING_I_SQUARE && generic_state->brc_enabled) {
3842 cmd.g95->dw4.enable_intra_refresh = 0;
3843 cmd.g95->dw34.widi_intra_refresh_en = INTEL_ROLLING_I_DISABLED;
3844 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3845 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3847 cmd.g95->dw4.enable_intra_refresh = 1;
3848 cmd.g95->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
3849 cmd.g95->dw48.widi_intra_refresh_mb_x = 0;
3850 cmd.g95->dw61.widi_intra_refresh_mb_y = 0;
3851 cmd.g9->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
3852 cmd.g9->dw48.widi_intra_refresh_qp_delta = 0;
3857 cmd.g9->dw34.widi_intra_refresh_en = 0;
3860 cmd.g9->dw34.enable_per_mb_static_check = avc_state->sfd_enable && generic_state->hme_enabled;
3862 cmd.g9->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
3864 cmd.g95->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
3866 /*roi set disable by now. 49-56*/
3867 if (curbe_param->roi_enabled) {
3868 cmd.g9->dw49.roi_1_x_left = generic_state->roi[0].left;
3869 cmd.g9->dw49.roi_1_y_top = generic_state->roi[0].top;
3870 cmd.g9->dw50.roi_1_x_right = generic_state->roi[0].right;
3871 cmd.g9->dw50.roi_1_y_bottom = generic_state->roi[0].bottom;
3873 cmd.g9->dw51.roi_2_x_left = generic_state->roi[1].left;
3874 cmd.g9->dw51.roi_2_y_top = generic_state->roi[1].top;
3875 cmd.g9->dw52.roi_2_x_right = generic_state->roi[1].right;
3876 cmd.g9->dw52.roi_2_y_bottom = generic_state->roi[1].bottom;
3878 cmd.g9->dw53.roi_3_x_left = generic_state->roi[2].left;
3879 cmd.g9->dw53.roi_3_y_top = generic_state->roi[2].top;
3880 cmd.g9->dw54.roi_3_x_right = generic_state->roi[2].right;
3881 cmd.g9->dw54.roi_3_y_bottom = generic_state->roi[2].bottom;
3883 cmd.g9->dw55.roi_4_x_left = generic_state->roi[3].left;
3884 cmd.g9->dw55.roi_4_y_top = generic_state->roi[3].top;
3885 cmd.g9->dw56.roi_4_x_right = generic_state->roi[3].right;
3886 cmd.g9->dw56.roi_4_y_bottom = generic_state->roi[3].bottom;
3888 if (!generic_state->brc_enabled) {
3890 tmp = generic_state->roi[0].value;
3891 CLIP(tmp, -qp, AVC_QP_MAX - qp);
3892 cmd.g9->dw57.roi_1_dqp_prime_y = tmp;
3893 tmp = generic_state->roi[1].value;
3894 CLIP(tmp, -qp, AVC_QP_MAX - qp);
3895 cmd.g9->dw57.roi_2_dqp_prime_y = tmp;
3896 tmp = generic_state->roi[2].value;
3897 CLIP(tmp, -qp, AVC_QP_MAX - qp);
3898 cmd.g9->dw57.roi_3_dqp_prime_y = tmp;
3899 tmp = generic_state->roi[3].value;
3900 CLIP(tmp, -qp, AVC_QP_MAX - qp);
3901 cmd.g9->dw57.roi_4_dqp_prime_y = tmp;
3903 cmd.g9->dw34.roi_enable_flag = 0;
3908 if (avc_state->tq_enable) {
3909 if (generic_state->frame_type == SLICE_TYPE_I) {
3910 cmd.g95->dw58.value = gen95_avc_tq_lambda_i_frame[qp][0];
3911 cmd.g95->dw59.value = gen95_avc_tq_lambda_i_frame[qp][1];
3913 } else if (generic_state->frame_type == SLICE_TYPE_P) {
3914 cmd.g95->dw58.value = gen95_avc_tq_lambda_p_frame[qp][0];
3915 cmd.g95->dw59.value = gen95_avc_tq_lambda_p_frame[qp][1];
3918 cmd.g95->dw58.value = gen95_avc_tq_lambda_b_frame[qp][0];
3919 cmd.g95->dw59.value = gen95_avc_tq_lambda_b_frame[qp][1];
3922 if (cmd.g95->dw58.lambda_8x8_inter > GEN95_AVC_MAX_LAMBDA)
3923 cmd.g95->dw58.lambda_8x8_inter = 0xf000 + avc_state->rounding_value;
3925 if (cmd.g95->dw58.lambda_8x8_intra > GEN95_AVC_MAX_LAMBDA)
3926 cmd.g95->dw58.lambda_8x8_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3928 if (cmd.g95->dw59.lambda_inter > GEN95_AVC_MAX_LAMBDA)
3929 cmd.g95->dw59.lambda_inter = 0xf000 + avc_state->rounding_value;
3931 if (cmd.g95->dw59.lambda_intra > GEN95_AVC_MAX_LAMBDA)
3932 cmd.g95->dw59.lambda_intra = 0xf000 + GEN95_AVC_DEFAULT_TRELLIS_QUANT_INTRA_ROUNDING;
3937 cmd.g95->dw66.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3938 cmd.g95->dw67.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3939 cmd.g95->dw68.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3940 cmd.g95->dw69.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3941 cmd.g95->dw70.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3942 cmd.g95->dw71.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3943 cmd.g95->dw72.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3944 cmd.g95->dw73.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3945 cmd.g95->dw74.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3946 cmd.g95->dw75.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3947 cmd.g95->dw76.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3948 cmd.g95->dw77.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3949 cmd.g95->dw78.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3950 cmd.g95->dw79.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3951 cmd.g95->dw80.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3952 cmd.g95->dw81.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3953 cmd.g95->dw82.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3954 cmd.g95->dw83.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3955 cmd.g95->dw84.brc_curbe_surf_index = GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3956 cmd.g95->dw85.force_non_skip_mb_map_surface = GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3957 cmd.g95->dw86.widi_wa_surf_index = GEN95_AVC_MBENC_WIDI_WA_INDEX;
3958 cmd.g95->dw87.static_detection_cost_table_index = GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX;
3962 cmd.g9->dw64.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
3963 cmd.g9->dw65.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
3964 cmd.g9->dw66.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
3965 cmd.g9->dw67.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
3966 cmd.g9->dw68.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
3967 cmd.g9->dw69.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
3968 cmd.g9->dw70.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
3969 cmd.g9->dw71.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
3970 cmd.g9->dw72.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
3971 cmd.g9->dw73.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
3972 cmd.g9->dw74.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
3973 cmd.g9->dw75.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
3974 cmd.g9->dw76.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
3975 cmd.g9->dw77.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
3976 cmd.g9->dw78.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
3977 cmd.g9->dw79.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
3978 cmd.g9->dw80.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
3979 cmd.g9->dw81.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
3980 cmd.g9->dw82.force_non_skip_mb_map_surface = GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
3981 cmd.g9->dw83.widi_wa_surf_index = GEN9_AVC_MBENC_WIDI_WA_INDEX;
3982 cmd.g9->dw84.brc_curbe_surf_index = GEN9_AVC_MBENC_BRC_CURBE_DATA_INDEX;
3983 cmd.g9->dw85.static_detection_cost_table_index = GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX;
3986 i965_gpe_context_unmap_curbe(gpe_context);
3992 gen9_avc_fei_set_curbe_mbenc(VADriverContextP ctx,
3993 struct encode_state *encode_state,
3994 struct i965_gpe_context *gpe_context,
3995 struct intel_encoder_context *encoder_context,
3998 struct i965_driver_data *i965 = i965_driver_data(ctx);
3999 gen9_avc_fei_mbenc_curbe_data *cmd;
4000 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4001 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4002 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4003 VASurfaceID surface_id;
4004 struct object_surface *obj_surface;
4005 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4006 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
4007 VAEncMiscParameterFEIFrameControlH264 *fei_param = avc_state->fei_framectl_param;
4009 struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
4010 unsigned char qp = 0;
4011 unsigned char me_method = 0;
4012 unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
4013 unsigned int table_idx = 0;
4014 int ref_width, ref_height, len_sp;
4015 int is_bframe = (generic_state->frame_type == SLICE_TYPE_B);
4016 int is_pframe = (generic_state->frame_type == SLICE_TYPE_P);
4017 unsigned int preset = generic_state->preset;
4019 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
4021 assert(gpe_context != NULL);
4022 cmd = (gen9_avc_fei_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
4023 memset(cmd, 0, sizeof(gen9_avc_fei_mbenc_curbe_data));
4025 if (mbenc_i_frame_dist_in_use) {
4026 memcpy(cmd, gen9_avc_fei_mbenc_curbe_i_frame_dist_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4029 switch (generic_state->frame_type) {
4031 memcpy(cmd, gen9_avc_fei_mbenc_curbe_i_frame_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4034 memcpy(cmd, gen9_avc_fei_mbenc_curbe_p_frame_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4037 memcpy(cmd, gen9_avc_fei_mbenc_curbe_b_frame_init_data, sizeof(gen9_avc_fei_mbenc_curbe_data));
4044 /* 4 means full search, 6 means diamand search */
4045 me_method = (fei_param->search_window == 5) ||
4046 (fei_param->search_window == 8) ? 4 : 6;
4048 ref_width = fei_param->ref_width;
4049 ref_height = fei_param->ref_height;
4050 len_sp = fei_param->len_sp;
4051 /* If there is a serch_window, discard user provided ref_width, ref_height
4052 * and search_path length */
4053 switch (fei_param->search_window) {
4055 /* not use predefined search window, there should be a search_path input */
4056 if ((fei_param->search_path != 0) &&
4057 (fei_param->search_path != 1) &&
4058 (fei_param->search_path != 2)) {
4059 WARN_ONCE("Invalid input search_path for SearchWindow=0 \n");
4062 /* 4 means full search, 6 means diamand search */
4063 me_method = (fei_param->search_path == 1) ? 6 : 4;
4064 if (((ref_width * ref_height) > 2048) || (ref_width > 64) || (ref_height > 64)) {
4065 WARN_ONCE("Invalid input ref_width/ref_height in"
4066 "SearchWindow=0 case! \n");
4072 /* Tiny - 4 SUs 24x24 window */
4079 /* Small - 9 SUs 28x28 window */
4085 /* Diamond - 16 SUs 48x40 window */
4091 /* Large Diamond - 32 SUs 48x40 window */
4097 /* Exhaustive - 48 SUs 48x40 window */
4103 /* Diamond - 16 SUs 64x32 window */
4109 /* Large Diamond - 32 SUs 64x32 window */
4115 /* Exhaustive - 48 SUs 64x32 window */
4125 /* ref_width*ref_height = Max 64x32 one direction, Max 32x32 two directions */
4127 CLIP(ref_width, 4, 32);
4128 CLIP(ref_height, 4, 32);
4129 } else if (is_pframe) {
4130 CLIP(ref_width, 4, 64);
4131 CLIP(ref_height, 4, 32);
4134 cmd->dw0.adaptive_enable =
4135 cmd->dw37.adaptive_enable = fei_param->adaptive_search;
4136 cmd->dw0.t8x8_flag_for_inter_enable = cmd->dw37.t8x8_flag_for_inter_enable
4137 = avc_state->transform_8x8_mode_enable;
4138 cmd->dw2.max_len_sp = len_sp;
4139 cmd->dw38.max_len_sp = 0; // HLD mandates this field to be Zero
4140 cmd->dw2.max_num_su = cmd->dw38.max_num_su = 57;
4141 cmd->dw3.src_access =
4142 cmd->dw3.ref_access = 0; // change it to (is_frame ? 0: 1) when interlace is suppoted
4144 if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
4145 if (avc_state->ftq_override) {
4146 cmd->dw3.ft_enable = avc_state->ftq_enable;
4148 if (generic_state->frame_type == SLICE_TYPE_P) {
4149 cmd->dw3.ft_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
4151 cmd->dw3.ft_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
4155 cmd->dw3.ft_enable = 0;
4158 if (avc_state->disable_sub_mb_partion)
4159 cmd->dw3.sub_mb_part_mask = 0x7;
4161 if (mbenc_i_frame_dist_in_use) {
4162 /* Fixme: Not supported, no brc in fei */
4164 cmd->dw2.pic_width = generic_state->downscaled_width_4x_in_mb;
4165 cmd->dw4.pic_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
4166 cmd->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
4167 cmd->dw6.batch_buffer_end = 0;
4168 cmd->dw31.intra_compute_type = 1;
4171 cmd->dw2.pic_width = generic_state->frame_width_in_mbs;
4172 cmd->dw4.pic_height_minus1 = generic_state->frame_height_in_mbs - 1;
4173 cmd->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ?
4174 generic_state->frame_height_in_mbs : avc_state->slice_height;
4175 cmd->dw3.sub_mb_part_mask = fei_param->sub_mb_part_mask;
4176 cmd->dw3.sub_pel_mode = fei_param->sub_pel_mode;
4177 cmd->dw3.inter_sad = fei_param->inter_sad;
4178 cmd->dw3.Intra_sad = fei_param->intra_sad;
4179 cmd->dw3.search_ctrl = (is_bframe) ? 7 : 0;
4180 cmd->dw4.enable_fbr_bypass = avc_state->fbr_bypass_enable;
4181 cmd->dw4.enable_intra_cost_scaling_for_static_frame =
4182 avc_state->sfd_enable && generic_state->hme_enabled;
4183 cmd->dw4.true_distortion_enable = fei_param->distortion_type == 0 ? 1 : 0;
4184 cmd->dw4.constrained_intra_pred_flag =
4185 pic_param->pic_fields.bits.constrained_intra_pred_flag;
4186 cmd->dw4.hme_enable = 0;
4187 cmd->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
4188 cmd->dw4.use_actual_ref_qp_value =
4189 generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
4190 cmd->dw7.intra_part_mask = fei_param->intra_part_mask;
4191 cmd->dw7.src_field_polarity = 0;
4194 memcpy(&(cmd->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
4195 if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
4196 // cmd->dw8 = gen9_avc_old_intra_mode_cost[qp];
4197 } else if (avc_state->skip_bias_adjustment_enable) {
4198 // Load different MvCost for P picture when SkipBiasAdjustment is enabled
4199 // No need to check for P picture as the flag is only enabled for P picture
4200 cmd->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
4204 /* search path tables */
4205 table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
4206 memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
4208 //ftq_skip_threshold_lut set,dw14 /15
4210 //r5 disable NonFTQSkipThresholdLUT
4211 if (generic_state->frame_type == SLICE_TYPE_P) {
4212 cmd->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
4213 } else if (generic_state->frame_type == SLICE_TYPE_B) {
4214 cmd->dw32.skip_val =
4215 gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
4217 cmd->dw13.qp_prime_y = qp;
4218 cmd->dw13.qp_prime_cb = qp;
4219 cmd->dw13.qp_prime_cr = qp;
4220 cmd->dw13.target_size_in_word = 0xff; /* hardcoded for brc disable */
4222 if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
4223 cmd->dw32.mult_pred_l0_disable = fei_param->multi_pred_l0 ? 0x01 : 0x80;
4224 cmd->dw32.mult_pred_l1_disable = ((generic_state->frame_type == SLICE_TYPE_B) && fei_param->multi_pred_l1) ? 0x01 : 0x80;
4227 cmd->dw32.mult_pred_l0_disable = 0x80;
4228 cmd->dw32.mult_pred_l1_disable = 0x80;
4230 /* no field pic setting, not supported */
4233 if (avc_state->adaptive_transform_decision_enable) {
4234 if (generic_state->frame_type != SLICE_TYPE_I) {
4235 cmd->dw34.enable_adaptive_tx_decision = 1;
4238 cmd->dw58.mb_texture_threshold = 1024;
4239 cmd->dw58.tx_decision_threshold = 128;
4241 if (generic_state->frame_type == SLICE_TYPE_B) {
4242 cmd->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
4243 cmd->dw34.list1_ref_id1_frm_field_parity = 0;
4244 cmd->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
4246 cmd->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
4247 cmd->dw34.roi_enable_flag = generic_state->brc_roi_enable;
4248 cmd->dw34.mad_enable_falg = avc_state->mad_enable;
4249 cmd->dw34.mb_brc_enable = avc_state->mb_qp_data_enable && generic_state->mb_brc_enabled;
4250 cmd->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
4251 cmd->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
4253 if (cmd->dw34.force_non_skip_check) {
4254 cmd->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
4256 cmd->dw36.check_all_fractional_enable = avc_state->caf_enable;
4257 cmd->dw38.ref_threshold = 400;
4258 cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
4259 // Default:2 used for MBBRC (MB QP Surface width and height are 4x downscaled picture in MB unit * 4 bytes)
4260 // 0 used for MBQP data surface (MB QP Surface width and height are same as the input picture size in MB unit * 1bytes)
4261 // starting GEN9, BRC use split kernel, MB QP surface is same size as input picture
4262 cmd->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled) ? 0 : 2;
4263 if (mbenc_i_frame_dist_in_use) {
4264 cmd->dw13.qp_prime_y = 0;
4265 cmd->dw13.qp_prime_cb = 0;
4266 cmd->dw13.qp_prime_cr = 0;
4267 cmd->dw33.intra_16x16_nondc_penalty = 0;
4268 cmd->dw33.intra_8x8_nondc_penalty = 0;
4269 cmd->dw33.intra_4x4_nondc_penalty = 0;
4271 if (cmd->dw4.use_actual_ref_qp_value) {
4272 cmd->dw44.actual_qp_value_for_ref_id0_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
4273 cmd->dw44.actual_qp_value_for_ref_id1_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
4274 cmd->dw44.actual_qp_value_for_ref_id2_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
4275 cmd->dw44.actual_qp_value_for_ref_id3_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
4276 cmd->dw45.actual_qp_value_for_ref_id4_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
4277 cmd->dw45.actual_qp_value_for_ref_id5_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
4278 cmd->dw45.actual_qp_value_for_ref_id6_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
4279 cmd->dw45.actual_qp_value_for_ref_id7_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
4280 cmd->dw46.actual_qp_value_for_ref_id0_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
4281 cmd->dw46.actual_qp_value_for_ref_id1_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
4284 table_idx = slice_type_kernel[generic_state->frame_type];
4285 cmd->dw46.ref_cost = gen9_avc_ref_cost[table_idx][qp];
4287 if (generic_state->frame_type == SLICE_TYPE_I) {
4288 cmd->dw0.skip_mode_enable = 0;
4289 cmd->dw37.skip_mode_enable = 0;
4290 cmd->dw36.hme_combine_overlap = 0;
4291 cmd->dw36.check_all_fractional_enable = 0;
4292 cmd->dw47.intra_cost_sf = 16;/* not used, but recommended to set 16 by kernel team */
4293 cmd->dw34.enable_direct_bias_adjustment = 0;
4294 cmd->dw34.enable_global_motion_bias_adjustment = 0;
4296 } else if (generic_state->frame_type == SLICE_TYPE_P) {
4297 cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
4298 cmd->dw3.bme_disable_fbr = 1;
4299 cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
4300 cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
4301 cmd->dw7.non_skip_zmv_added = 1;
4302 cmd->dw7.non_skip_mode_added = 1;
4303 cmd->dw7.skip_center_mask = 1;
4305 cmd->dw47.intra_cost_sf =
4306 (avc_state->adaptive_intra_scaling_enable) ?
4307 gen9_avc_adaptive_intra_scaling_factor[preset] :
4308 gen9_avc_intra_scaling_factor[preset];
4310 cmd->dw47.max_vmv_r =
4311 i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
4313 cmd->dw36.hme_combine_overlap = 1;
4314 cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
4315 cmd->dw36.check_all_fractional_enable = fei_param->repartition_check_enable;
4316 cmd->dw34.enable_direct_bias_adjustment = 0;
4317 cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
4318 if (avc_state->global_motion_bias_adjustment_enable)
4319 cmd->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
4321 cmd->dw64.num_mv_predictors_l0 = fei_param->num_mv_predictors_l0;
4323 } else { /* B slice */
4325 cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
4326 cmd->dw1.bi_Weight = avc_state->bi_weight;
4327 cmd->dw3.search_ctrl = 7;
4328 cmd->dw3.skip_type = 1;
4329 cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
4330 cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
4331 cmd->dw7.skip_center_mask = 0xff;
4333 cmd->dw47.intra_cost_sf = avc_state->adaptive_intra_scaling_enable ?
4334 gen9_avc_adaptive_intra_scaling_factor[qp] :
4335 gen9_avc_intra_scaling_factor[qp];
4337 cmd->dw47.max_vmv_r =
4338 i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
4340 cmd->dw36.hme_combine_overlap = 1;
4342 //check is_fwd_frame_short_term_ref
4343 surface_id = slice_param->RefPicList1[0].picture_id;
4344 obj_surface = SURFACE(surface_id);
4346 WARN_ONCE("Invalid backward reference frame\n");
4348 i965_gpe_context_unmap_curbe(gpe_context);
4351 cmd->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
4353 cmd->dw36.num_ref_idx_l0_minus_one =
4354 (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1
4356 cmd->dw36.num_ref_idx_l1_minus_one =
4357 (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1
4359 cmd->dw36.check_all_fractional_enable = fei_param->repartition_check_enable;
4361 cmd->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
4362 cmd->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
4363 cmd->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
4364 cmd->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
4365 cmd->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
4366 cmd->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
4367 cmd->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
4368 cmd->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
4370 cmd->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
4371 if (cmd->dw34.enable_direct_bias_adjustment) {
4372 cmd->dw7.non_skip_mode_added = 1;
4373 cmd->dw7.non_skip_zmv_added = 1;
4376 cmd->dw34.enable_global_motion_bias_adjustment =
4377 avc_state->global_motion_bias_adjustment_enable;
4378 if (avc_state->global_motion_bias_adjustment_enable)
4379 cmd->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
4381 cmd->dw64.num_mv_predictors_l0 = fei_param->num_mv_predictors_l0;
4382 cmd->dw64.num_mv_predictors_l1 = fei_param->num_mv_predictors_l1;
4385 avc_state->block_based_skip_enable = cmd->dw3.block_based_skip_enable;
4387 if (avc_state->rolling_intra_refresh_enable) {
4389 cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
4392 cmd->dw34.widi_intra_refresh_en = 0;
4394 cmd->dw34.enable_per_mb_static_check = avc_state->sfd_enable && generic_state->hme_enabled;
4395 cmd->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
4397 /* Fixme: Skipped ROI stuffs for now */
4399 /* r64: FEI specific fields */
4400 cmd->dw64.fei_enable = 1;
4401 cmd->dw64.multiple_mv_predictor_per_mb_enable = fei_param->mv_predictor_enable;
4402 if (fei_param->distortion != VA_INVALID_ID)
4403 cmd->dw64.vme_distortion_output_enable = 1;
4404 cmd->dw64.per_mb_qp_enable = fei_param->mb_qp;
4405 cmd->dw64.mb_input_enable = fei_param->mb_input;
4407 // FEI mode is disabled when external MVP is available
4408 if (fei_param->mv_predictor_enable)
4409 cmd->dw64.fei_mode = 0;
4411 cmd->dw64.fei_mode = 1;
4413 cmd->dw80.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
4414 cmd->dw81.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
4415 cmd->dw82.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
4416 cmd->dw83.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
4417 cmd->dw84.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
4418 cmd->dw85.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
4419 cmd->dw86.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
4420 cmd->dw87.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
4421 cmd->dw88.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
4422 cmd->dw89.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
4423 cmd->dw90.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
4424 cmd->dw91.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
4425 cmd->dw92.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
4426 cmd->dw93.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
4427 cmd->dw94.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
4428 cmd->dw95.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
4429 cmd->dw96.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
4430 cmd->dw97.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
4431 cmd->dw98.force_non_skip_mb_map_surface = GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
4432 cmd->dw99.widi_wa_surf_index = GEN9_AVC_MBENC_WIDI_WA_INDEX;
4433 cmd->dw100.brc_curbe_surf_index = GEN9_AVC_MBENC_BRC_CURBE_DATA_INDEX;
4434 cmd->dw101.static_detection_cost_table_index = GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX;
4435 cmd->dw102.fei_mv_predictor_surf_index = GEN9_AVC_MBENC_MV_PREDICTOR_INDEX;
4436 i965_gpe_context_unmap_curbe(gpe_context);
4442 gen9_avc_send_surface_mbenc(VADriverContextP ctx,
4443 struct encode_state *encode_state,
4444 struct i965_gpe_context *gpe_context,
4445 struct intel_encoder_context *encoder_context,
4448 struct i965_driver_data *i965 = i965_driver_data(ctx);
4449 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4450 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
4451 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4452 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4453 struct object_surface *obj_surface;
4454 struct gen9_surface_avc *avc_priv_surface;
4455 struct i965_gpe_resource *gpe_resource;
4456 struct mbenc_param * param = (struct mbenc_param *)param_mbenc ;
4457 VASurfaceID surface_id;
4458 unsigned int mbenc_i_frame_dist_in_use = param->mbenc_i_frame_dist_in_use;
4459 unsigned int size = 0;
4460 unsigned int frame_mb_size = generic_state->frame_width_in_mbs *
4461 generic_state->frame_height_in_mbs;
4463 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4464 unsigned char is_g95 = 0;
4466 if (IS_SKL(i965->intel.device_info) ||
4467 IS_BXT(i965->intel.device_info))
4469 else if (IS_KBL(i965->intel.device_info) ||
4470 IS_GEN10(i965->intel.device_info) ||
4471 IS_GLK(i965->intel.device_info))
4474 obj_surface = encode_state->reconstructed_object;
4476 if (!obj_surface || !obj_surface->private_data)
4478 avc_priv_surface = obj_surface->private_data;
4480 /*pak obj command buffer output*/
4481 size = frame_mb_size * 16 * 4;
4482 gpe_resource = &avc_priv_surface->res_mb_code_surface;
4483 i965_add_buffer_gpe_surface(ctx,
4489 GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX);
4491 /*mv data buffer output*/
4492 size = frame_mb_size * 32 * 4;
4493 gpe_resource = &avc_priv_surface->res_mv_data_surface;
4494 i965_add_buffer_gpe_surface(ctx,
4500 GEN9_AVC_MBENC_IND_MV_DATA_INDEX);
4502 /*input current YUV surface, current input Y/UV object*/
4503 if (mbenc_i_frame_dist_in_use) {
4504 obj_surface = encode_state->reconstructed_object;
4505 if (!obj_surface || !obj_surface->private_data)
4507 avc_priv_surface = obj_surface->private_data;
4508 obj_surface = avc_priv_surface->scaled_4x_surface_obj;
4510 obj_surface = encode_state->input_yuv_object;
4512 i965_add_2d_gpe_surface(ctx,
4517 I965_SURFACEFORMAT_R8_UNORM,
4518 GEN9_AVC_MBENC_CURR_Y_INDEX);
4520 i965_add_2d_gpe_surface(ctx,
4525 I965_SURFACEFORMAT_R16_UINT,
4526 GEN9_AVC_MBENC_CURR_UV_INDEX);
4528 if (generic_state->hme_enabled) {
4530 if (!IS_GEN8(i965->intel.device_info)) {
4531 gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
4532 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4535 I965_SURFACEFORMAT_R8_UNORM,
4536 GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
4537 /* memv distortion input*/
4538 gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
4539 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4542 I965_SURFACEFORMAT_R8_UNORM,
4543 GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
4544 } else if (generic_state->frame_type != SLICE_TYPE_I) {
4545 gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
4546 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4549 I965_SURFACEFORMAT_R8_UNORM,
4550 GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
4551 /* memv distortion input*/
4552 gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
4553 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4556 I965_SURFACEFORMAT_R8_UNORM,
4557 GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
4561 /*mbbrc const data_buffer*/
4562 if (param->mb_const_data_buffer_in_use) {
4563 size = 16 * AVC_QP_MAX * sizeof(unsigned int);
4564 gpe_resource = &avc_ctx->res_mbbrc_const_data_buffer;
4565 i965_add_buffer_gpe_surface(ctx,
4571 GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX);
4575 /*mb qp data_buffer*/
4576 if (param->mb_qp_buffer_in_use) {
4577 if (avc_state->mb_qp_data_enable)
4578 gpe_resource = &(avc_ctx->res_mb_qp_data_surface);
4580 gpe_resource = &(avc_ctx->res_mbbrc_mb_qp_data_surface);
4581 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4584 I965_SURFACEFORMAT_R8_UNORM,
4585 GEN9_AVC_MBENC_MBQP_INDEX);
4588 /*input current YUV surface, current input Y/UV object*/
4589 if (mbenc_i_frame_dist_in_use) {
4590 obj_surface = encode_state->reconstructed_object;
4591 if (!obj_surface || !obj_surface->private_data)
4593 avc_priv_surface = obj_surface->private_data;
4594 obj_surface = avc_priv_surface->scaled_4x_surface_obj;
4596 obj_surface = encode_state->input_yuv_object;
4598 i965_add_adv_gpe_surface(ctx, gpe_context,
4600 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX);
4601 /*input ref YUV surface*/
4602 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
4603 surface_id = slice_param->RefPicList0[i].picture_id;
4604 obj_surface = SURFACE(surface_id);
4605 if (!obj_surface || !obj_surface->private_data)
4608 i965_add_adv_gpe_surface(ctx, gpe_context,
4610 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 1);
4612 /*input current YUV surface, current input Y/UV object*/
4613 if (mbenc_i_frame_dist_in_use) {
4614 obj_surface = encode_state->reconstructed_object;
4615 if (!obj_surface || !obj_surface->private_data)
4617 avc_priv_surface = obj_surface->private_data;
4618 obj_surface = avc_priv_surface->scaled_4x_surface_obj;
4620 obj_surface = encode_state->input_yuv_object;
4622 i965_add_adv_gpe_surface(ctx, gpe_context,
4624 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX);
4626 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
4627 if (i > 0) break; // only one ref supported here for B frame
4628 surface_id = slice_param->RefPicList1[i].picture_id;
4629 obj_surface = SURFACE(surface_id);
4630 if (!obj_surface || !obj_surface->private_data)
4633 i965_add_adv_gpe_surface(ctx, gpe_context,
4635 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1);
4636 i965_add_adv_gpe_surface(ctx, gpe_context,
4638 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 2);
4640 avc_priv_surface = obj_surface->private_data;
4641 /*pak obj command buffer output(mb code)*/
4642 size = frame_mb_size * 16 * 4;
4643 gpe_resource = &avc_priv_surface->res_mb_code_surface;
4644 i965_add_buffer_gpe_surface(ctx,
4650 GEN9_AVC_MBENC_FWD_MB_DATA_INDEX);
4652 /*mv data buffer output*/
4653 size = frame_mb_size * 32 * 4;
4654 gpe_resource = &avc_priv_surface->res_mv_data_surface;
4655 i965_add_buffer_gpe_surface(ctx,
4661 GEN9_AVC_MBENC_FWD_MV_DATA_INDEX);
4665 if (i < INTEL_AVC_MAX_BWD_REF_NUM) {
4666 i965_add_adv_gpe_surface(ctx, gpe_context,
4668 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1 + INTEL_AVC_MAX_BWD_REF_NUM);
4673 /* BRC distortion data buffer for I frame*/
4674 if (mbenc_i_frame_dist_in_use) {
4675 gpe_resource = &(avc_ctx->res_brc_dist_data_surface);
4676 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4679 I965_SURFACEFORMAT_R8_UNORM,
4680 GEN9_AVC_MBENC_BRC_DISTORTION_INDEX);
4683 /* as ref frame ,update later RefPicSelect of Current Picture*/
4684 obj_surface = encode_state->reconstructed_object;
4685 avc_priv_surface = obj_surface->private_data;
4686 if (avc_state->ref_pic_select_list_supported && avc_priv_surface->is_as_ref) {
4687 gpe_resource = &(avc_priv_surface->res_ref_pic_select_surface);
4688 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4691 I965_SURFACEFORMAT_R8_UNORM,
4692 GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX);
4695 if (!IS_GEN8(i965->intel.device_info)) {
4696 if (param->mb_vproc_stats_enable) {
4697 /*mb status buffer input*/
4698 size = frame_mb_size * 16 * 4;
4699 gpe_resource = &(avc_ctx->res_mb_status_buffer);
4700 i965_add_buffer_gpe_surface(ctx,
4706 GEN9_AVC_MBENC_MB_STATS_INDEX);
4708 } else if (avc_state->flatness_check_enable) {
4709 gpe_resource = &(avc_ctx->res_flatness_check_surface);
4710 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4713 I965_SURFACEFORMAT_R8_UNORM,
4714 GEN9_AVC_MBENC_MB_STATS_INDEX);
4716 } else if (avc_state->flatness_check_enable) {
4717 gpe_resource = &(avc_ctx->res_flatness_check_surface);
4718 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4721 I965_SURFACEFORMAT_R8_UNORM,
4722 GEN9_AVC_MBENC_MB_STATS_INDEX);
4725 if (param->mad_enable) {
4726 /*mad buffer input*/
4728 gpe_resource = &(avc_ctx->res_mad_data_buffer);
4729 i965_add_buffer_gpe_surface(ctx,
4735 GEN9_AVC_MBENC_MAD_DATA_INDEX);
4736 i965_zero_gpe_resource(gpe_resource);
4739 /*brc updated mbenc curbe data buffer,it is ignored by gen9 and used in gen95*/
4740 if (avc_state->mbenc_brc_buffer_size > 0) {
4741 size = avc_state->mbenc_brc_buffer_size;
4742 gpe_resource = &(avc_ctx->res_mbenc_brc_buffer);
4743 i965_add_buffer_gpe_surface(ctx,
4749 GEN95_AVC_MBENC_BRC_CURBE_DATA_INDEX);
4752 /*artitratry num mbs in slice*/
4753 if (avc_state->arbitrary_num_mbs_in_slice) {
4754 /*slice surface input*/
4755 gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
4756 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4759 I965_SURFACEFORMAT_R8_UNORM,
4760 GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX);
4761 gen9_avc_generate_slice_map(ctx, encode_state, encoder_context);
4764 /* BRC distortion data buffer for I frame */
4765 if (!mbenc_i_frame_dist_in_use) {
4766 if (avc_state->mb_disable_skip_map_enable) {
4767 gpe_resource = &(avc_ctx->res_mb_disable_skip_map_surface);
4768 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4771 I965_SURFACEFORMAT_R8_UNORM,
4772 (is_g95 ? GEN95_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX : GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX));
4774 if (IS_GEN8(i965->intel.device_info)) {
4775 if (avc_state->sfd_enable) {
4776 size = 128 / sizeof(unsigned long);
4777 gpe_resource = &(avc_ctx->res_sfd_output_buffer);
4778 i965_add_buffer_gpe_surface(ctx,
4784 GEN8_AVC_MBENC_STATIC_FRAME_DETECTION_OUTPUT_CM);
4788 if (avc_state->sfd_enable && generic_state->hme_enabled) {
4789 if (generic_state->frame_type == SLICE_TYPE_P) {
4790 gpe_resource = &(avc_ctx->res_sfd_cost_table_p_frame_buffer);
4791 } else if (generic_state->frame_type == SLICE_TYPE_B) {
4792 gpe_resource = &(avc_ctx->res_sfd_cost_table_b_frame_buffer);
4794 if (generic_state->frame_type != SLICE_TYPE_I) {
4796 i965_add_buffer_gpe_surface(ctx,
4802 (is_g95 ? GEN95_AVC_MBENC_SFD_COST_TABLE_INDEX : GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX));
4813 gen9_avc_fei_send_surface_mbenc(VADriverContextP ctx,
4814 struct encode_state *encode_state,
4815 struct i965_gpe_context *gpe_context,
4816 struct intel_encoder_context *encoder_context,
4819 struct i965_driver_data *i965 = i965_driver_data(ctx);
4820 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
4821 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
4822 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
4823 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
4824 VAEncMiscParameterFEIFrameControlH264 *fei_param = NULL;
4825 struct object_buffer *obj_buffer = NULL;
4826 struct buffer_store *buffer_store = NULL;
4827 struct object_surface *obj_surface = NULL;
4828 struct gen9_surface_avc *avc_priv_surface;
4829 struct i965_gpe_resource *gpe_resource;
4830 VASurfaceID surface_id;
4831 unsigned int size = 0;
4832 unsigned int frame_mb_nums;
4833 int i = 0, allocate_flag = 1;
4835 obj_surface = encode_state->reconstructed_object;
4836 if (!obj_surface || !obj_surface->private_data)
4838 avc_priv_surface = obj_surface->private_data;
4840 frame_mb_nums = generic_state->frame_width_in_mbs *
4841 generic_state->frame_height_in_mbs;
4842 fei_param = avc_state->fei_framectl_param;
4844 assert(fei_param != NULL);
4846 /* res_mb_code_surface for MB code */
4847 size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
4848 if (avc_priv_surface->res_mb_code_surface.bo != NULL)
4849 i965_free_gpe_resource(&avc_priv_surface->res_mb_code_surface);
4850 if (fei_param->mb_code_data != VA_INVALID_ID) {
4851 obj_buffer = BUFFER(fei_param->mb_code_data);
4852 assert(obj_buffer != NULL);
4853 buffer_store = obj_buffer->buffer_store;
4854 assert(size <= buffer_store->bo->size);
4855 i965_dri_object_to_buffer_gpe_resource(
4856 &avc_priv_surface->res_mb_code_surface,
4859 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
4860 &avc_priv_surface->res_mb_code_surface,
4861 ALIGN(size, 0x1000),
4863 assert(allocate_flag != 0);
4866 /* res_mv_data_surface for MV data */
4867 size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
4868 if (avc_priv_surface->res_mv_data_surface.bo != NULL)
4869 i965_free_gpe_resource(&avc_priv_surface->res_mv_data_surface);
4870 if (fei_param->mv_data != VA_INVALID_ID) {
4871 obj_buffer = BUFFER(fei_param->mv_data);
4872 assert(obj_buffer != NULL);
4873 buffer_store = obj_buffer->buffer_store;
4874 assert(size <= buffer_store->bo->size);
4875 i965_dri_object_to_buffer_gpe_resource(
4876 &avc_priv_surface->res_mv_data_surface,
4879 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
4880 &avc_priv_surface->res_mv_data_surface,
4881 ALIGN(size, 0x1000),
4883 assert(allocate_flag != 0);
4886 /* fei mb control data surface */
4887 size = frame_mb_nums * FEI_AVC_MB_CONTROL_BUFFER_SIZE;
4888 if (fei_param->mb_input | fei_param->mb_size_ctrl) {
4889 assert(fei_param->mb_ctrl != VA_INVALID_ID);
4890 obj_buffer = BUFFER(fei_param->mb_ctrl);
4891 assert(obj_buffer != NULL);
4892 buffer_store = obj_buffer->buffer_store;
4893 assert(size <= buffer_store->bo->size);
4894 if (avc_priv_surface->res_fei_mb_cntrl_surface.bo != NULL)
4895 i965_free_gpe_resource(&avc_priv_surface->res_fei_mb_cntrl_surface);
4896 i965_dri_object_to_buffer_gpe_resource(
4897 &avc_priv_surface->res_fei_mb_cntrl_surface,
4901 /* fei mv predictor surface*/
4902 size = frame_mb_nums * FEI_AVC_MV_PREDICTOR_BUFFER_SIZE;
4903 if (fei_param->mv_predictor_enable &&
4904 (fei_param->mv_predictor != VA_INVALID_ID)) {
4905 obj_buffer = BUFFER(fei_param->mv_predictor);
4906 assert(obj_buffer != NULL);
4907 buffer_store = obj_buffer->buffer_store;
4908 assert(size <= buffer_store->bo->size);
4909 if (avc_priv_surface->res_fei_mv_predictor_surface.bo != NULL)
4910 i965_free_gpe_resource(&avc_priv_surface->res_fei_mv_predictor_surface);
4911 i965_dri_object_to_buffer_gpe_resource(
4912 &avc_priv_surface->res_fei_mv_predictor_surface,
4915 if (fei_param->mv_predictor_enable)
4916 assert(fei_param->mv_predictor != VA_INVALID_ID);
4919 /* fei vme distortion */
4920 size = frame_mb_nums * FEI_AVC_DISTORTION_BUFFER_SIZE;
4921 if (avc_priv_surface->res_fei_vme_distortion_surface.bo != NULL)
4922 i965_free_gpe_resource(&avc_priv_surface->res_fei_vme_distortion_surface);
4923 if (fei_param->distortion != VA_INVALID_ID) {
4924 obj_buffer = BUFFER(fei_param->distortion);
4925 assert(obj_buffer != NULL);
4926 buffer_store = obj_buffer->buffer_store;
4927 assert(size <= buffer_store->bo->size);
4928 i965_dri_object_to_buffer_gpe_resource(
4929 &avc_priv_surface->res_fei_vme_distortion_surface,
4932 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
4933 &avc_priv_surface->res_fei_vme_distortion_surface,
4934 ALIGN(size, 0x1000),
4935 "fei vme distortion");
4936 assert(allocate_flag != 0);
4940 /* Fixme/Confirm: not sure why we need 3 byte padding here */
4941 size = frame_mb_nums * FEI_AVC_QP_BUFFER_SIZE + 3;
4942 if (fei_param->mb_qp && (fei_param->qp != VA_INVALID_ID)) {
4943 obj_buffer = BUFFER(fei_param->qp);
4944 assert(obj_buffer != NULL);
4945 buffer_store = obj_buffer->buffer_store;
4946 assert((size - 3) <= buffer_store->bo->size);
4947 if (avc_priv_surface->res_fei_mb_qp_surface.bo != NULL)
4948 i965_free_gpe_resource(&avc_priv_surface->res_fei_mb_qp_surface);
4949 i965_dri_object_to_buffer_gpe_resource(
4950 &avc_priv_surface->res_fei_mb_qp_surface,
4953 if (fei_param->mb_qp)
4954 assert(fei_param->qp != VA_INVALID_ID);
4957 /*==== pak obj command buffer output ====*/
4958 size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
4959 gpe_resource = &avc_priv_surface->res_mb_code_surface;
4960 i965_add_buffer_gpe_surface(ctx,
4966 GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX);
4969 /*=== mv data buffer output */
4970 size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
4971 gpe_resource = &avc_priv_surface->res_mv_data_surface;
4972 i965_add_buffer_gpe_surface(ctx,
4978 GEN9_AVC_MBENC_IND_MV_DATA_INDEX);
4981 /* === current input Y (binding table offset = 3)=== */
4982 obj_surface = encode_state->input_yuv_object;
4983 i965_add_2d_gpe_surface(ctx,
4988 I965_SURFACEFORMAT_R8_UNORM,
4989 GEN9_AVC_MBENC_CURR_Y_INDEX);
4991 /* === current input UV === (binding table offset == 4)*/
4992 i965_add_2d_gpe_surface(ctx,
4997 I965_SURFACEFORMAT_R16_UINT,
4998 GEN9_AVC_MBENC_CURR_UV_INDEX);
5000 /* === input current YUV surface, (binding table offset == 15) === */
5001 i965_add_adv_gpe_surface(ctx, gpe_context,
5003 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX);
5006 /*== input current YUV surface, (binding table offset == 32)*/
5007 i965_add_adv_gpe_surface(ctx, gpe_context,
5009 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX);
5011 /* list 0 references */
5012 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5014 surface_id = slice_param->RefPicList0[i].picture_id;
5015 obj_surface = SURFACE(surface_id);
5016 if (!obj_surface || !obj_surface->private_data)
5018 i965_add_adv_gpe_surface(ctx, gpe_context,
5020 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 1);
5024 /* list 1 references */
5025 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5026 if (i > 0) break; // only one ref supported here for B frame
5027 surface_id = slice_param->RefPicList1[i].picture_id;
5028 obj_surface = SURFACE(surface_id);
5029 if (!obj_surface || !obj_surface->private_data)
5032 i965_add_adv_gpe_surface(ctx, gpe_context,
5034 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX + i * 2 + 2);
5036 avc_priv_surface = obj_surface->private_data;
5037 /* mb code of Backward reference frame */
5038 size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
5039 gpe_resource = &avc_priv_surface->res_mb_code_surface;
5040 i965_add_buffer_gpe_surface(ctx,
5046 GEN9_AVC_MBENC_FWD_MB_DATA_INDEX);
5048 /* mv data of backward ref frame */
5049 size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
5050 gpe_resource = &avc_priv_surface->res_mv_data_surface;
5051 i965_add_buffer_gpe_surface(ctx,
5057 GEN9_AVC_MBENC_FWD_MV_DATA_INDEX);
5061 if (i < INTEL_AVC_MAX_BWD_REF_NUM) {
5062 i965_add_adv_gpe_surface(ctx, gpe_context,
5064 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX + i * 2 + 1);
5068 /* as ref frame ,update later RefPicSelect of Current Picture*/
5069 obj_surface = encode_state->reconstructed_object;
5070 avc_priv_surface = obj_surface->private_data;
5071 if (avc_state->ref_pic_select_list_supported && avc_priv_surface->is_as_ref) {
5072 gpe_resource = &(avc_priv_surface->res_ref_pic_select_surface);
5073 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5076 I965_SURFACEFORMAT_R8_UNORM,
5077 GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX);
5082 /* mb specific data, macroblock control parameters */
5083 if ((fei_param->mb_input | fei_param->mb_size_ctrl) &&
5084 (fei_param->mb_ctrl != VA_INVALID_ID)) {
5085 size = frame_mb_nums * FEI_AVC_MB_CONTROL_BUFFER_SIZE;
5086 gpe_resource = &avc_priv_surface->res_fei_mb_cntrl_surface;
5087 i965_add_buffer_gpe_surface(ctx,
5093 GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX);
5096 /* multi mv predictor surface */
5097 if (fei_param->mv_predictor_enable && (fei_param->mv_predictor != VA_INVALID_ID)) {
5098 size = frame_mb_nums * 48; //sizeof (VAEncMVPredictorH264Intel) == 40
5099 gpe_resource = &avc_priv_surface->res_fei_mv_predictor_surface;
5100 i965_add_buffer_gpe_surface(ctx,
5106 GEN9_AVC_MBENC_MV_PREDICTOR_INDEX);
5110 if (fei_param->mb_qp && (fei_param->qp != VA_INVALID_ID)) {
5111 size = frame_mb_nums + 3;
5112 gpe_resource = &avc_priv_surface->res_fei_mb_qp_surface,
5113 i965_add_buffer_gpe_surface(ctx,
5119 GEN9_AVC_MBENC_MBQP_INDEX);
5123 /*=== FEI distortion surface ====*/
5124 size = frame_mb_nums * 48; //sizeof (VAEncFEIDistortionBufferH264Intel) == 48
5125 gpe_resource = &avc_priv_surface->res_fei_vme_distortion_surface;
5126 i965_add_buffer_gpe_surface(ctx,
5132 GEN9_AVC_MBENC_AUX_VME_OUT_INDEX);
5138 gen9_avc_kernel_mbenc(VADriverContextP ctx,
5139 struct encode_state *encode_state,
5140 struct intel_encoder_context *encoder_context,
5141 bool i_frame_dist_in_use)
5143 struct i965_driver_data *i965 = i965_driver_data(ctx);
5144 struct i965_gpe_table *gpe = &i965->gpe_table;
5145 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5146 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5147 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5148 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5149 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5151 struct i965_gpe_context *gpe_context;
5152 struct gpe_media_object_walker_parameter media_object_walker_param;
5153 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
5154 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
5155 int media_function = 0;
5157 unsigned int mb_const_data_buffer_in_use = 0;
5158 unsigned int mb_qp_buffer_in_use = 0;
5159 unsigned int brc_enabled = 0;
5160 unsigned int roi_enable = (generic_state->num_roi > 0) ? 1 : 0;
5161 unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
5162 struct mbenc_param param ;
5164 int mbenc_i_frame_dist_in_use = i_frame_dist_in_use;
5166 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5168 mb_const_data_buffer_in_use =
5169 generic_state->mb_brc_enabled ||
5172 avc_state->mb_qp_data_enable ||
5173 avc_state->rolling_intra_refresh_enable;
5174 mb_qp_buffer_in_use =
5175 generic_state->mb_brc_enabled ||
5176 generic_state->brc_roi_enable ||
5177 avc_state->mb_qp_data_enable;
5179 if (mbenc_i_frame_dist_in_use) {
5180 media_function = INTEL_MEDIA_STATE_ENC_I_FRAME_DIST;
5181 kernel_idx = GEN9_AVC_KERNEL_BRC_I_FRAME_DIST;
5182 downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
5183 downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
5187 gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
5189 switch (generic_state->kernel_mode) {
5190 case INTEL_ENC_KERNEL_NORMAL : {
5191 media_function = INTEL_MEDIA_STATE_ENC_NORMAL;
5192 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
5195 case INTEL_ENC_KERNEL_PERFORMANCE : {
5196 media_function = INTEL_MEDIA_STATE_ENC_PERFORMANCE;
5197 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
5200 case INTEL_ENC_KERNEL_QUALITY : {
5201 media_function = INTEL_MEDIA_STATE_ENC_QUALITY;
5202 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
5210 if (encoder_context->fei_enabled) {
5211 media_function = INTEL_MEDIA_STATE_ENC_NORMAL;
5212 kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_FEI_KERNEL_I;
5215 if (generic_state->frame_type == SLICE_TYPE_P) {
5217 } else if (generic_state->frame_type == SLICE_TYPE_B) {
5221 downscaled_width_in_mb = generic_state->frame_width_in_mbs;
5222 downscaled_height_in_mb = generic_state->frame_height_in_mbs;
5223 mad_enable = avc_state->mad_enable;
5224 brc_enabled = generic_state->brc_enabled;
5226 gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
5229 memset(¶m, 0, sizeof(struct mbenc_param));
5231 param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
5232 param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
5233 param.mbenc_i_frame_dist_in_use = mbenc_i_frame_dist_in_use;
5234 param.mad_enable = mad_enable;
5235 param.brc_enabled = brc_enabled;
5236 param.roi_enabled = roi_enable;
5238 if (avc_state->mb_status_supported) {
5239 param.mb_vproc_stats_enable = avc_state->flatness_check_enable || avc_state->adaptive_transform_decision_enable;
5242 if (!avc_state->mbenc_curbe_set_in_brc_update) {
5243 gpe->context_init(ctx, gpe_context);
5246 gpe->reset_binding_table(ctx, gpe_context);
5248 if (!avc_state->mbenc_curbe_set_in_brc_update) {
5250 generic_ctx->pfn_set_curbe_mbenc(ctx, encode_state, gpe_context, encoder_context, ¶m);
5253 /* MB brc const data buffer set up*/
5254 if (mb_const_data_buffer_in_use) {
5255 // caculate the lambda table, it is kernel controlled trellis quantization,gen95+
5256 if (avc_state->lambda_table_enable)
5257 gen95_avc_calc_lambda_table(ctx, encode_state, encoder_context);
5259 gen9_avc_load_mb_brc_const_data(ctx, encode_state, encoder_context);
5262 /*clear the mad buffer*/
5264 i965_zero_gpe_resource(&(avc_ctx->res_mad_data_buffer));
5267 generic_ctx->pfn_send_mbenc_surface(ctx, encode_state, gpe_context, encoder_context, ¶m);
5269 gpe->setup_interface_data(ctx, gpe_context);
5272 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
5274 kernel_walker_param.use_scoreboard = 1;
5275 kernel_walker_param.resolution_x = downscaled_width_in_mb ;
5276 kernel_walker_param.resolution_y = downscaled_height_in_mb ;
5277 if (mbenc_i_frame_dist_in_use) {
5278 kernel_walker_param.no_dependency = 1;
5280 switch (generic_state->frame_type) {
5282 kernel_walker_param.walker_degree = WALKER_45_DEGREE;
5285 kernel_walker_param.walker_degree = WALKER_26_DEGREE;
5288 kernel_walker_param.walker_degree = WALKER_26_DEGREE;
5289 if (!slice_param->direct_spatial_mv_pred_flag) {
5290 kernel_walker_param.walker_degree = WALKER_45_DEGREE;
5296 kernel_walker_param.no_dependency = 0;
5299 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
5301 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
5304 &media_object_walker_param);
5305 return VA_STATUS_SUCCESS;
5309 me kernle related function
5312 gen9_avc_set_curbe_me(VADriverContextP ctx,
5313 struct encode_state *encode_state,
5314 struct i965_gpe_context *gpe_context,
5315 struct intel_encoder_context *encoder_context,
5318 gen9_avc_me_curbe_data *curbe_cmd;
5319 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5320 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5321 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5323 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5325 struct me_param * curbe_param = (struct me_param *)param ;
5326 unsigned char use_mv_from_prev_step = 0;
5327 unsigned char write_distortions = 0;
5328 unsigned char qp_prime_y = 0;
5329 unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
5330 unsigned char seach_table_idx = 0;
5331 unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
5332 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
5333 unsigned int scale_factor = 0;
5335 qp_prime_y = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta;
5336 switch (curbe_param->hme_type) {
5337 case INTEL_ENC_HME_4x : {
5338 use_mv_from_prev_step = (generic_state->b16xme_enabled) ? 1 : 0;
5339 write_distortions = 1;
5340 mv_shift_factor = 2;
5342 prev_mv_read_pos_factor = 0;
5345 case INTEL_ENC_HME_16x : {
5346 use_mv_from_prev_step = (generic_state->b32xme_enabled) ? 1 : 0;
5347 write_distortions = 0;
5348 mv_shift_factor = 2;
5350 prev_mv_read_pos_factor = 1;
5353 case INTEL_ENC_HME_32x : {
5354 use_mv_from_prev_step = 0;
5355 write_distortions = 0;
5356 mv_shift_factor = 1;
5358 prev_mv_read_pos_factor = 0;
5365 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
5370 downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
5371 downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
5373 memcpy(curbe_cmd, gen9_avc_me_curbe_init_data, sizeof(gen9_avc_me_curbe_data));
5375 curbe_cmd->dw3.sub_pel_mode = 3;
5376 if (avc_state->field_scaling_output_interleaved) {
5377 /*frame set to zero,field specified*/
5378 curbe_cmd->dw3.src_access = 0;
5379 curbe_cmd->dw3.ref_access = 0;
5380 curbe_cmd->dw7.src_field_polarity = 0;
5382 curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
5383 curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
5384 curbe_cmd->dw5.qp_prime_y = qp_prime_y;
5386 curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
5387 curbe_cmd->dw6.write_distortions = write_distortions;
5388 curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
5389 curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
5391 if (generic_state->frame_type == SLICE_TYPE_B) {
5392 curbe_cmd->dw1.bi_weight = 32;
5393 curbe_cmd->dw13.num_ref_idx_l1_minus1 = slice_param->num_ref_idx_l1_active_minus1;
5394 me_method = gen9_avc_b_me_method[generic_state->preset];
5395 seach_table_idx = 1;
5398 if (generic_state->frame_type == SLICE_TYPE_P ||
5399 generic_state->frame_type == SLICE_TYPE_B)
5400 curbe_cmd->dw13.num_ref_idx_l0_minus1 = slice_param->num_ref_idx_l0_active_minus1;
5402 curbe_cmd->dw13.ref_streamin_cost = 5;
5403 curbe_cmd->dw13.roi_enable = 0;
5405 curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
5406 curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
5408 memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
5410 curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN9_AVC_ME_MV_DATA_SURFACE_INDEX;
5411 curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX : GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX ;
5412 curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN9_AVC_ME_DISTORTION_SURFACE_INDEX;
5413 curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN9_AVC_ME_BRC_DISTORTION_INDEX;
5414 curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX;
5415 curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX;
5416 curbe_cmd->dw38.reserved = GEN9_AVC_ME_VDENC_STREAMIN_INDEX;
5418 i965_gpe_context_unmap_curbe(gpe_context);
5423 gen9_avc_send_surface_me(VADriverContextP ctx,
5424 struct encode_state *encode_state,
5425 struct i965_gpe_context *gpe_context,
5426 struct intel_encoder_context *encoder_context,
5429 struct i965_driver_data *i965 = i965_driver_data(ctx);
5431 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5432 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5433 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5434 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5436 struct object_surface *obj_surface, *input_surface;
5437 struct gen9_surface_avc *avc_priv_surface;
5438 struct i965_gpe_resource *gpe_resource;
5439 struct me_param * curbe_param = (struct me_param *)param ;
5441 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5442 VASurfaceID surface_id;
5445 /* all scaled input surface stored in reconstructed_object*/
5446 obj_surface = encode_state->reconstructed_object;
5447 if (!obj_surface || !obj_surface->private_data)
5449 avc_priv_surface = obj_surface->private_data;
5452 switch (curbe_param->hme_type) {
5453 case INTEL_ENC_HME_4x : {
5455 gpe_resource = &avc_ctx->s4x_memv_data_buffer;
5456 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5459 I965_SURFACEFORMAT_R8_UNORM,
5460 GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
5463 if (generic_state->b16xme_enabled) {
5464 gpe_resource = &avc_ctx->s16x_memv_data_buffer;
5465 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5468 I965_SURFACEFORMAT_R8_UNORM,
5469 GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX);
5471 /* brc distortion output*/
5472 gpe_resource = &avc_ctx->res_brc_dist_data_surface;
5473 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5476 I965_SURFACEFORMAT_R8_UNORM,
5477 GEN9_AVC_ME_BRC_DISTORTION_INDEX);
5478 /* memv distortion output*/
5479 gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
5480 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5483 I965_SURFACEFORMAT_R8_UNORM,
5484 GEN9_AVC_ME_DISTORTION_SURFACE_INDEX);
5485 /*input current down scaled YUV surface*/
5486 obj_surface = encode_state->reconstructed_object;
5487 avc_priv_surface = obj_surface->private_data;
5488 input_surface = avc_priv_surface->scaled_4x_surface_obj;
5489 i965_add_adv_gpe_surface(ctx, gpe_context,
5491 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
5492 /*input ref scaled YUV surface*/
5493 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5494 surface_id = slice_param->RefPicList0[i].picture_id;
5495 obj_surface = SURFACE(surface_id);
5496 if (!obj_surface || !obj_surface->private_data)
5498 avc_priv_surface = obj_surface->private_data;
5500 input_surface = avc_priv_surface->scaled_4x_surface_obj;
5502 i965_add_adv_gpe_surface(ctx, gpe_context,
5504 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
5507 obj_surface = encode_state->reconstructed_object;
5508 avc_priv_surface = obj_surface->private_data;
5509 input_surface = avc_priv_surface->scaled_4x_surface_obj;
5511 i965_add_adv_gpe_surface(ctx, gpe_context,
5513 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
5515 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5516 surface_id = slice_param->RefPicList1[i].picture_id;
5517 obj_surface = SURFACE(surface_id);
5518 if (!obj_surface || !obj_surface->private_data)
5520 avc_priv_surface = obj_surface->private_data;
5522 input_surface = avc_priv_surface->scaled_4x_surface_obj;
5524 i965_add_adv_gpe_surface(ctx, gpe_context,
5526 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
5531 case INTEL_ENC_HME_16x : {
5532 gpe_resource = &avc_ctx->s16x_memv_data_buffer;
5533 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5536 I965_SURFACEFORMAT_R8_UNORM,
5537 GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
5539 if (generic_state->b32xme_enabled) {
5540 gpe_resource = &avc_ctx->s32x_memv_data_buffer;
5541 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5544 I965_SURFACEFORMAT_R8_UNORM,
5545 GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX);
5548 obj_surface = encode_state->reconstructed_object;
5549 avc_priv_surface = obj_surface->private_data;
5550 input_surface = avc_priv_surface->scaled_16x_surface_obj;
5551 i965_add_adv_gpe_surface(ctx, gpe_context,
5553 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
5555 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5556 surface_id = slice_param->RefPicList0[i].picture_id;
5557 obj_surface = SURFACE(surface_id);
5558 if (!obj_surface || !obj_surface->private_data)
5560 avc_priv_surface = obj_surface->private_data;
5562 input_surface = avc_priv_surface->scaled_16x_surface_obj;
5564 i965_add_adv_gpe_surface(ctx, gpe_context,
5566 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
5569 obj_surface = encode_state->reconstructed_object;
5570 avc_priv_surface = obj_surface->private_data;
5571 input_surface = avc_priv_surface->scaled_16x_surface_obj;
5573 i965_add_adv_gpe_surface(ctx, gpe_context,
5575 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
5577 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5578 surface_id = slice_param->RefPicList1[i].picture_id;
5579 obj_surface = SURFACE(surface_id);
5580 if (!obj_surface || !obj_surface->private_data)
5582 avc_priv_surface = obj_surface->private_data;
5584 input_surface = avc_priv_surface->scaled_16x_surface_obj;
5586 i965_add_adv_gpe_surface(ctx, gpe_context,
5588 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
5592 case INTEL_ENC_HME_32x : {
5593 gpe_resource = &avc_ctx->s32x_memv_data_buffer;
5594 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5597 I965_SURFACEFORMAT_R8_UNORM,
5598 GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
5600 obj_surface = encode_state->reconstructed_object;
5601 avc_priv_surface = obj_surface->private_data;
5602 input_surface = avc_priv_surface->scaled_32x_surface_obj;
5603 i965_add_adv_gpe_surface(ctx, gpe_context,
5605 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
5607 for (i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++) {
5608 surface_id = slice_param->RefPicList0[i].picture_id;
5609 obj_surface = SURFACE(surface_id);
5610 if (!obj_surface || !obj_surface->private_data)
5612 avc_priv_surface = obj_surface->private_data;
5614 input_surface = avc_priv_surface->scaled_32x_surface_obj;
5616 i965_add_adv_gpe_surface(ctx, gpe_context,
5618 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
5621 obj_surface = encode_state->reconstructed_object;
5622 avc_priv_surface = obj_surface->private_data;
5623 input_surface = avc_priv_surface->scaled_32x_surface_obj;
5625 i965_add_adv_gpe_surface(ctx, gpe_context,
5627 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
5629 for (i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++) {
5630 surface_id = slice_param->RefPicList1[i].picture_id;
5631 obj_surface = SURFACE(surface_id);
5632 if (!obj_surface || !obj_surface->private_data)
5634 avc_priv_surface = obj_surface->private_data;
5636 input_surface = avc_priv_surface->scaled_32x_surface_obj;
5638 i965_add_adv_gpe_surface(ctx, gpe_context,
5640 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
5651 gen9_avc_kernel_me(VADriverContextP ctx,
5652 struct encode_state *encode_state,
5653 struct intel_encoder_context *encoder_context,
5656 struct i965_driver_data *i965 = i965_driver_data(ctx);
5657 struct i965_gpe_table *gpe = &i965->gpe_table;
5658 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5659 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5660 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5661 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5663 struct i965_gpe_context *gpe_context;
5664 struct gpe_media_object_walker_parameter media_object_walker_param;
5665 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
5666 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
5667 int media_function = 0;
5669 struct me_param param ;
5670 unsigned int scale_factor = 0;
5673 case INTEL_ENC_HME_4x : {
5674 media_function = INTEL_MEDIA_STATE_4X_ME;
5678 case INTEL_ENC_HME_16x : {
5679 media_function = INTEL_MEDIA_STATE_16X_ME;
5683 case INTEL_ENC_HME_32x : {
5684 media_function = INTEL_MEDIA_STATE_32X_ME;
5693 downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
5694 downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
5696 /* I frame should not come here.*/
5697 kernel_idx = (generic_state->frame_type == SLICE_TYPE_P) ? GEN9_AVC_KERNEL_ME_P_IDX : GEN9_AVC_KERNEL_ME_B_IDX;
5698 gpe_context = &(avc_ctx->context_me.gpe_contexts[kernel_idx]);
5700 gpe->context_init(ctx, gpe_context);
5701 gpe->reset_binding_table(ctx, gpe_context);
5704 memset(¶m, 0, sizeof(param));
5705 param.hme_type = hme_type;
5706 generic_ctx->pfn_set_curbe_me(ctx, encode_state, gpe_context, encoder_context, ¶m);
5709 generic_ctx->pfn_send_me_surface(ctx, encode_state, gpe_context, encoder_context, ¶m);
5711 gpe->setup_interface_data(ctx, gpe_context);
5713 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
5714 /* the scaling is based on 8x8 blk level */
5715 kernel_walker_param.resolution_x = downscaled_width_in_mb ;
5716 kernel_walker_param.resolution_y = downscaled_height_in_mb ;
5717 kernel_walker_param.no_dependency = 1;
5719 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
5721 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
5724 &media_object_walker_param);
5726 return VA_STATUS_SUCCESS;
5733 gen9_avc_set_curbe_wp(VADriverContextP ctx,
5734 struct encode_state *encode_state,
5735 struct i965_gpe_context *gpe_context,
5736 struct intel_encoder_context *encoder_context,
5739 gen9_avc_wp_curbe_data *cmd;
5740 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5741 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5742 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5743 struct wp_param * curbe_param = (struct wp_param *)param;
5745 cmd = i965_gpe_context_map_curbe(gpe_context);
5749 memset(cmd, 0, sizeof(gen9_avc_wp_curbe_data));
5750 if (curbe_param->ref_list_idx) {
5751 cmd->dw0.default_weight = slice_param->luma_weight_l1[0];
5752 cmd->dw0.default_offset = slice_param->luma_offset_l1[0];
5754 cmd->dw0.default_weight = slice_param->luma_weight_l0[0];
5755 cmd->dw0.default_offset = slice_param->luma_offset_l0[0];
5758 cmd->dw49.input_surface = GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX;
5759 cmd->dw50.output_surface = GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX;
5761 i965_gpe_context_unmap_curbe(gpe_context);
5766 gen9_avc_send_surface_wp(VADriverContextP ctx,
5767 struct encode_state *encode_state,
5768 struct i965_gpe_context *gpe_context,
5769 struct intel_encoder_context *encoder_context,
5772 struct i965_driver_data *i965 = i965_driver_data(ctx);
5773 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5774 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5775 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5776 struct wp_param * curbe_param = (struct wp_param *)param;
5777 struct object_surface *obj_surface;
5778 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5779 VASurfaceID surface_id;
5781 if (curbe_param->ref_list_idx) {
5782 surface_id = slice_param->RefPicList1[0].picture_id;
5783 obj_surface = SURFACE(surface_id);
5784 if (!obj_surface || !obj_surface->private_data)
5785 avc_state->weighted_ref_l1_enable = 0;
5787 avc_state->weighted_ref_l1_enable = 1;
5789 surface_id = slice_param->RefPicList0[0].picture_id;
5790 obj_surface = SURFACE(surface_id);
5791 if (!obj_surface || !obj_surface->private_data)
5792 avc_state->weighted_ref_l0_enable = 0;
5794 avc_state->weighted_ref_l0_enable = 1;
5797 obj_surface = encode_state->reference_objects[0];
5800 i965_add_adv_gpe_surface(ctx, gpe_context,
5802 GEN9_AVC_WP_INPUT_REF_SURFACE_INDEX);
5804 obj_surface = avc_ctx->wp_output_pic_select_surface_obj[curbe_param->ref_list_idx];
5805 i965_add_adv_gpe_surface(ctx, gpe_context,
5807 GEN9_AVC_WP_OUTPUT_SCALED_SURFACE_INDEX);
5812 gen9_avc_kernel_wp(VADriverContextP ctx,
5813 struct encode_state *encode_state,
5814 struct intel_encoder_context *encoder_context,
5815 unsigned int list1_in_use)
5817 struct i965_driver_data *i965 = i965_driver_data(ctx);
5818 struct i965_gpe_table *gpe = &i965->gpe_table;
5819 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5820 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5821 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5822 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5824 struct i965_gpe_context *gpe_context;
5825 struct gpe_media_object_walker_parameter media_object_walker_param;
5826 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
5827 int media_function = INTEL_MEDIA_STATE_ENC_WP;
5828 struct wp_param param;
5830 gpe_context = &(avc_ctx->context_wp.gpe_contexts);
5832 gpe->context_init(ctx, gpe_context);
5833 gpe->reset_binding_table(ctx, gpe_context);
5835 memset(¶m, 0, sizeof(param));
5836 param.ref_list_idx = (list1_in_use == 1) ? 1 : 0;
5838 generic_ctx->pfn_set_curbe_wp(ctx, encode_state, gpe_context, encoder_context, ¶m);
5841 generic_ctx->pfn_send_wp_surface(ctx, encode_state, gpe_context, encoder_context, ¶m);
5843 gpe->setup_interface_data(ctx, gpe_context);
5845 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
5846 /* the scaling is based on 8x8 blk level */
5847 kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs;
5848 kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs;
5849 kernel_walker_param.no_dependency = 1;
5851 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
5853 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
5856 &media_object_walker_param);
5858 return VA_STATUS_SUCCESS;
5863 sfd related function
5866 gen9_avc_set_curbe_sfd(VADriverContextP ctx,
5867 struct encode_state *encode_state,
5868 struct i965_gpe_context *gpe_context,
5869 struct intel_encoder_context *encoder_context,
5872 gen9_avc_sfd_curbe_data *cmd;
5873 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5874 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
5875 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
5876 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
5878 cmd = i965_gpe_context_map_curbe(gpe_context);
5882 memset(cmd, 0, sizeof(gen9_avc_sfd_curbe_data));
5884 cmd->dw0.enable_intra_cost_scaling_for_static_frame = 1 ;
5885 cmd->dw0.enable_adaptive_mv_stream_in = 0 ;
5886 cmd->dw0.stream_in_type = 7 ;
5887 cmd->dw0.slice_type = slice_type_kernel[generic_state->frame_type] ;
5888 cmd->dw0.brc_mode_enable = generic_state->brc_enabled ;
5889 cmd->dw0.vdenc_mode_disable = 1 ;
5891 cmd->dw1.hme_stream_in_ref_cost = 5 ;
5892 cmd->dw1.num_of_refs = slice_param->num_ref_idx_l0_active_minus1 ;
5893 cmd->dw1.qp_value = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta ;
5895 cmd->dw2.frame_width_in_mbs = generic_state->frame_width_in_mbs ;
5896 cmd->dw2.frame_height_in_mbs = generic_state->frame_height_in_mbs ;
5898 cmd->dw3.large_mv_threshold = 128 ;
5899 cmd->dw4.total_large_mv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs) / 100 ;
5900 cmd->dw5.zmv_threshold = 4 ;
5901 cmd->dw6.total_zmv_threshold = (generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs * avc_state->zero_mv_threshold) / 100 ; // zero_mv_threshold = 60;
5902 cmd->dw7.min_dist_threshold = 10 ;
5904 if (generic_state->frame_type == SLICE_TYPE_P) {
5905 memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_p_frame, AVC_QP_MAX * sizeof(unsigned char));
5907 } else if (generic_state->frame_type == SLICE_TYPE_B) {
5908 memcpy(cmd->cost_table, gen9_avc_sfd_cost_table_b_frame, AVC_QP_MAX * sizeof(unsigned char));
5911 cmd->dw21.actual_width_in_mb = cmd->dw2.frame_width_in_mbs ;
5912 cmd->dw21.actual_height_in_mb = cmd->dw2.frame_height_in_mbs ;
5913 cmd->dw24.vdenc_input_image_state_index = GEN9_AVC_SFD_VDENC_INPUT_IMAGE_STATE_INDEX ;
5914 cmd->dw26.mv_data_surface_index = GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX ;
5915 cmd->dw27.inter_distortion_surface_index = GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX ;
5916 cmd->dw28.output_data_surface_index = GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX ;
5917 cmd->dw29.vdenc_output_image_state_index = GEN9_AVC_SFD_VDENC_OUTPUT_IMAGE_STATE_INDEX ;
5919 i965_gpe_context_unmap_curbe(gpe_context);
5924 gen9_avc_send_surface_sfd(VADriverContextP ctx,
5925 struct encode_state *encode_state,
5926 struct i965_gpe_context *gpe_context,
5927 struct intel_encoder_context *encoder_context,
5930 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5931 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5932 struct i965_gpe_resource *gpe_resource;
5935 /*HME mv data surface memv output 4x*/
5936 gpe_resource = &avc_ctx->s4x_memv_data_buffer;
5937 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5940 I965_SURFACEFORMAT_R8_UNORM,
5941 GEN9_AVC_SFD_MV_DATA_SURFACE_INDEX);
5943 /* memv distortion */
5944 gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
5945 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
5948 I965_SURFACEFORMAT_R8_UNORM,
5949 GEN9_AVC_SFD_INTER_DISTORTION_SURFACE_INDEX);
5952 gpe_resource = &avc_ctx->res_sfd_output_buffer;
5953 i965_add_buffer_gpe_surface(ctx,
5959 GEN9_AVC_SFD_OUTPUT_DATA_SURFACE_INDEX);
5964 gen9_avc_kernel_sfd(VADriverContextP ctx,
5965 struct encode_state *encode_state,
5966 struct intel_encoder_context *encoder_context)
5968 struct i965_driver_data *i965 = i965_driver_data(ctx);
5969 struct i965_gpe_table *gpe = &i965->gpe_table;
5970 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
5971 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
5972 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
5974 struct i965_gpe_context *gpe_context;
5975 struct gpe_media_object_parameter media_object_param;
5976 struct gpe_media_object_inline_data media_object_inline_data;
5977 int media_function = INTEL_MEDIA_STATE_STATIC_FRAME_DETECTION;
5978 gpe_context = &(avc_ctx->context_sfd.gpe_contexts);
5980 gpe->context_init(ctx, gpe_context);
5981 gpe->reset_binding_table(ctx, gpe_context);
5984 generic_ctx->pfn_set_curbe_sfd(ctx, encode_state, gpe_context, encoder_context, NULL);
5987 generic_ctx->pfn_send_sfd_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
5989 gpe->setup_interface_data(ctx, gpe_context);
5991 memset(&media_object_param, 0, sizeof(media_object_param));
5992 memset(&media_object_inline_data, 0, sizeof(media_object_inline_data));
5993 media_object_param.pinline_data = &media_object_inline_data;
5994 media_object_param.inline_size = sizeof(media_object_inline_data);
5996 gen9_avc_run_kernel_media_object(ctx, encoder_context,
5999 &media_object_param);
6001 return VA_STATUS_SUCCESS;
6004 /**************** PreEnc Scaling *************************************/
6005 /* function to run preenc scaling: gen9_avc_preenc_kernel_scaling()
6006 * function to set preenc scaling curbe is the same one using for avc encode
6007 == gen95_avc_set_curbe_scaling4x()
6008 * function to send buffer/surface resources is the same one using for avc encode
6009 == gen9_avc_send_surface_scaling()
6012 gen9_avc_preenc_kernel_scaling(VADriverContextP ctx,
6013 struct encode_state *encode_state,
6014 struct intel_encoder_context *encoder_context,
6016 int scale_surface_type)
6018 struct i965_driver_data *i965 = i965_driver_data(ctx);
6019 struct i965_gpe_table *gpe = &i965->gpe_table;
6020 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6021 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6022 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6023 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6024 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
6025 VAStatsStatisticsParameterH264 *stat_param_h264 = NULL;
6026 VAStatsStatisticsParameter *stat_param = NULL;
6027 struct i965_gpe_context *gpe_context;
6028 struct scaling_param surface_param;
6029 struct object_surface *obj_surface = NULL;
6030 struct gpe_media_object_walker_parameter media_object_walker_param;
6031 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
6032 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
6033 int media_function = 0;
6035 int enable_statistics_output;
6037 stat_param_h264 = avc_state->stat_param;
6038 assert(stat_param_h264);
6039 stat_param = &stat_param_h264->stats_params;
6040 enable_statistics_output = !stat_param_h264->disable_statistics_output;
6042 memset(&surface_param, 0, sizeof(struct scaling_param));
6043 media_function = INTEL_MEDIA_STATE_4X_SCALING;
6044 kernel_idx = GEN9_AVC_KERNEL_SCALING_4X_IDX;
6045 downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
6046 downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
6048 surface_param.input_frame_width = generic_state->frame_width_in_pixel;
6049 surface_param.input_frame_height = generic_state->frame_height_in_pixel;
6050 surface_param.output_frame_width = generic_state->frame_width_4x;
6051 surface_param.output_frame_height = generic_state->frame_height_4x;
6052 surface_param.use_4x_scaling = 1 ;
6053 surface_param.use_16x_scaling = 0 ;
6054 surface_param.use_32x_scaling = 0 ;
6055 surface_param.enable_mb_flatness_check = enable_statistics_output;
6056 surface_param.enable_mb_variance_output = enable_statistics_output;
6057 surface_param.enable_mb_pixel_average_output = enable_statistics_output;
6058 surface_param.blk8x8_stat_enabled = stat_param_h264->enable_8x8_statistics;
6060 switch (scale_surface_type) {
6063 surface_param.input_surface = encode_state->input_yuv_object ;
6064 surface_param.output_surface = avc_ctx->preenc_scaled_4x_surface_obj ;
6066 if (enable_statistics_output) {
6067 surface_param.pres_mbv_proc_stat_buffer =
6068 &avc_ctx->preproc_stat_data_out_buffer;
6069 surface_param.mbv_proc_stat_enabled = 1;
6071 surface_param.mbv_proc_stat_enabled = 0;
6072 surface_param.pres_mbv_proc_stat_buffer = NULL;
6076 case SCALE_PAST_REF_PIC:
6077 obj_surface = SURFACE(stat_param->past_references[0].picture_id);
6078 assert(obj_surface);
6079 surface_param.input_surface = obj_surface;
6080 surface_param.output_surface = avc_ctx->preenc_past_ref_scaled_4x_surface_obj;
6082 if (stat_param->past_ref_stat_buf) {
6083 surface_param.pres_mbv_proc_stat_buffer =
6084 &avc_ctx->preenc_past_ref_stat_data_out_buffer;
6085 surface_param.mbv_proc_stat_enabled = 1;
6087 surface_param.mbv_proc_stat_enabled = 0;
6088 surface_param.pres_mbv_proc_stat_buffer = NULL;
6092 case SCALE_FUTURE_REF_PIC:
6094 obj_surface = SURFACE(stat_param->future_references[0].picture_id);
6095 assert(obj_surface);
6096 surface_param.input_surface = obj_surface;
6097 surface_param.output_surface = avc_ctx->preenc_future_ref_scaled_4x_surface_obj;
6099 if (stat_param->future_ref_stat_buf) {
6100 surface_param.pres_mbv_proc_stat_buffer =
6101 &avc_ctx->preenc_future_ref_stat_data_out_buffer;
6102 surface_param.mbv_proc_stat_enabled = 1;
6104 surface_param.mbv_proc_stat_enabled = 0;
6105 surface_param.pres_mbv_proc_stat_buffer = NULL;
6112 gpe_context = &(avc_ctx->context_scaling.gpe_contexts[kernel_idx]);
6114 gpe->context_init(ctx, gpe_context);
6115 gpe->reset_binding_table(ctx, gpe_context);
6117 generic_ctx->pfn_set_curbe_scaling4x(ctx, encode_state, gpe_context, encoder_context, &surface_param);
6119 surface_param.scaling_out_use_16unorm_surf_fmt = 0 ;
6120 surface_param.scaling_out_use_32unorm_surf_fmt = 1 ;
6122 /* No need of explicit flatness_check surface allocation. The field mb_is_flat
6123 * VAStatsStatisticsH264 will be used to store the output. */
6124 surface_param.enable_mb_flatness_check = 0;
6125 generic_ctx->pfn_send_scaling_surface(ctx, encode_state, gpe_context, encoder_context, &surface_param);
6127 /* setup the interface data */
6128 gpe->setup_interface_data(ctx, gpe_context);
6130 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
6131 /* the scaling is based on 8x8 blk level */
6132 kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
6133 kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
6134 kernel_walker_param.no_dependency = 1;
6136 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
6138 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
6141 &media_object_walker_param);
6143 return VA_STATUS_SUCCESS;
6146 /**************** PreEnc HME *************************************/
6147 /* function to run preenc hme is the same one we using in avc encode:
6148 == gen9_avc_kernel_me()
6149 * function to set preenc hme curbe: gen9_avc_preenc_set_curbe_me()
6150 * function to send hme buffer/surface: gen9_avc_preenc_send_surface_me()
6153 gen9_avc_preenc_set_curbe_me(VADriverContextP ctx,
6154 struct encode_state *encode_state,
6155 struct i965_gpe_context *gpe_context,
6156 struct intel_encoder_context *encoder_context,
6159 gen9_avc_fei_me_curbe_data *curbe_cmd;
6160 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6161 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6162 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6163 VAStatsStatisticsParameterH264 * stat_param_h264 = avc_state->stat_param;
6164 VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6166 struct me_param * curbe_param = (struct me_param *)param ;
6167 unsigned char use_mv_from_prev_step = 0;
6168 unsigned char write_distortions = 0;
6169 unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
6170 unsigned char seach_table_idx = 0;
6171 unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
6172 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
6173 unsigned int scale_factor = 0;
6175 switch (curbe_param->hme_type) {
6176 case INTEL_ENC_HME_4x:
6177 use_mv_from_prev_step = 0;
6178 write_distortions = 0;
6179 mv_shift_factor = 2;
6181 prev_mv_read_pos_factor = 0;
6188 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
6192 downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
6193 downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
6195 memcpy(curbe_cmd, gen9_avc_me_curbe_init_data, sizeof(gen9_avc_me_curbe_init_data));
6197 curbe_cmd->dw3.sub_pel_mode = stat_param_h264->sub_pel_mode;
6198 if (avc_state->field_scaling_output_interleaved) {
6199 /*frame set to zero,field specified*/
6200 curbe_cmd->dw3.src_access = 0;
6201 curbe_cmd->dw3.ref_access = 0;
6202 curbe_cmd->dw7.src_field_polarity = 0;
6204 curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
6205 curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
6206 curbe_cmd->dw5.qp_prime_y = stat_param_h264->frame_qp;
6208 curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
6209 curbe_cmd->dw6.write_distortions = write_distortions;
6210 curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
6211 curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(INTEL_AVC_LEVEL_52) * 4;//frame only
6213 if (generic_state->frame_type == SLICE_TYPE_B) {
6214 curbe_cmd->dw1.bi_weight = 32;
6215 curbe_cmd->dw13.num_ref_idx_l1_minus1 = stat_param->num_future_references - 1;
6216 me_method = gen9_avc_b_me_method[generic_state->preset];
6217 seach_table_idx = 1;
6220 if (generic_state->frame_type == SLICE_TYPE_P ||
6221 generic_state->frame_type == SLICE_TYPE_B)
6222 curbe_cmd->dw13.num_ref_idx_l0_minus1 = stat_param->num_past_references - 1;
6224 curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
6225 curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
6227 memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
6229 curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN9_AVC_ME_MV_DATA_SURFACE_INDEX;
6230 curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN9_AVC_32XME_MV_DATA_SURFACE_INDEX : GEN9_AVC_16XME_MV_DATA_SURFACE_INDEX ;
6231 curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN9_AVC_ME_DISTORTION_SURFACE_INDEX;
6232 curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN9_AVC_ME_BRC_DISTORTION_INDEX;
6233 curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX;
6234 curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX;
6235 curbe_cmd->dw38.reserved = 0;
6237 i965_gpe_context_unmap_curbe(gpe_context);
6242 gen9_avc_preenc_send_surface_me(VADriverContextP ctx,
6243 struct encode_state *encode_state,
6244 struct i965_gpe_context *gpe_context,
6245 struct intel_encoder_context *encoder_context,
6248 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6249 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6250 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6251 VAStatsStatisticsParameterH264 * stat_param_h264 = avc_state->stat_param;
6252 VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6253 struct object_surface *input_surface;
6254 struct i965_gpe_resource *gpe_resource;
6255 struct me_param * curbe_param = (struct me_param *)param ;
6258 /* PreEnc Only supports 4xme */
6259 assert(curbe_param->hme_type == INTEL_ENC_HME_4x);
6261 switch (curbe_param->hme_type) {
6262 case INTEL_ENC_HME_4x : {
6264 gpe_resource = &avc_ctx->s4x_memv_data_buffer;
6265 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6268 I965_SURFACEFORMAT_R8_UNORM,
6269 GEN9_AVC_ME_MV_DATA_SURFACE_INDEX);
6271 /* memv distortion output*/
6272 gpe_resource = &avc_ctx->s4x_memv_distortion_buffer;
6273 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6276 I965_SURFACEFORMAT_R8_UNORM,
6277 GEN9_AVC_ME_DISTORTION_SURFACE_INDEX);
6279 /* brc distortion output*/
6280 gpe_resource = &avc_ctx->res_brc_dist_data_surface;
6281 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6284 I965_SURFACEFORMAT_R8_UNORM,
6285 GEN9_AVC_ME_BRC_DISTORTION_INDEX);
6287 /* input past ref scaled YUV surface*/
6288 for (i = 0; i < stat_param->num_past_references; i++) {
6289 /*input current down scaled YUV surface for forward refef */
6290 input_surface = avc_ctx->preenc_scaled_4x_surface_obj;
6291 i965_add_adv_gpe_surface(ctx, gpe_context,
6293 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX);
6295 input_surface = avc_ctx->preenc_past_ref_scaled_4x_surface_obj;
6296 i965_add_adv_gpe_surface(ctx, gpe_context,
6298 GEN9_AVC_ME_CURR_FOR_FWD_REF_INDEX + i * 2 + 1);
6301 /* input future ref scaled YUV surface*/
6302 for (i = 0; i < stat_param->num_future_references; i++) {
6303 /*input current down scaled YUV surface for backward ref */
6304 input_surface = avc_ctx->preenc_scaled_4x_surface_obj;
6305 i965_add_adv_gpe_surface(ctx, gpe_context,
6307 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX);
6309 input_surface = avc_ctx->preenc_future_ref_scaled_4x_surface_obj;
6310 i965_add_adv_gpe_surface(ctx, gpe_context,
6312 GEN9_AVC_ME_CURR_FOR_BWD_REF_INDEX + i * 2 + 1);
6323 /**************** PreEnc PreProc *************************************/
6324 /* function to run preenc preproc: gen9_avc_preenc_kernel_preproc()
6325 * function to set preenc preproc curbe: gen9_avc_preenc_set_curbe_preproc()
6326 * function to send preproc buffer/surface: gen9_avc_preenc_send_surface_preproc ()
6329 gen9_avc_preenc_set_curbe_preproc(VADriverContextP ctx,
6330 struct encode_state *encode_state,
6331 struct i965_gpe_context *gpe_context,
6332 struct intel_encoder_context *encoder_context,
6335 gen9_avc_preproc_curbe_data *cmd;
6336 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6337 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6338 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6339 VAStatsStatisticsParameterH264 *stat_param_h264 = avc_state->stat_param;
6340 VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6341 unsigned char me_method = 0;
6342 unsigned int table_idx = 0;
6343 int ref_width, ref_height, len_sp;
6344 int is_bframe = (generic_state->frame_type == SLICE_TYPE_B);
6345 int is_pframe = (generic_state->frame_type == SLICE_TYPE_P);
6346 unsigned int preset = generic_state->preset;
6348 cmd = (gen9_avc_preproc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
6351 memset(cmd, 0, sizeof(gen9_avc_preproc_curbe_data));
6353 switch (generic_state->frame_type) {
6355 memcpy(cmd, gen9_avc_preenc_preproc_curbe_i_frame_init_data,
6356 sizeof(gen9_avc_preproc_curbe_data));
6359 memcpy(cmd, gen9_avc_preenc_preproc_curbe_p_frame_init_data,
6360 sizeof(gen9_avc_preproc_curbe_data));
6363 memcpy(cmd, gen9_avc_preenc_preproc_curbe_b_frame_init_data,
6364 sizeof(gen9_avc_preproc_curbe_data));
6369 /* 4 means full search, 6 means diamand search */
6370 me_method = (stat_param_h264->search_window == 5) ||
6371 (stat_param_h264->search_window == 8) ? 4 : 6;
6373 ref_width = stat_param_h264->ref_width;
6374 ref_height = stat_param_h264->ref_height;
6375 len_sp = stat_param_h264->len_sp;
6376 /* If there is a serch_window, discard user provided ref_width, ref_height
6377 * and search_path length */
6378 switch (stat_param_h264->search_window) {
6380 /* not use predefined search window, there should be a search_path input */
6381 if ((stat_param_h264->search_path != 0) &&
6382 (stat_param_h264->search_path != 1) &&
6383 (stat_param_h264->search_path != 2)) {
6384 WARN_ONCE("Invalid input search_path for SearchWindow=0 \n");
6387 /* 4 means full search, 6 means diamand search */
6388 me_method = (stat_param_h264->search_path == 1) ? 6 : 4;
6389 if (((ref_width * ref_height) > 2048) || (ref_width > 64) || (ref_height > 64)) {
6390 WARN_ONCE("Invalid input ref_width/ref_height in"
6391 "SearchWindow=0 case! \n");
6397 /* Tiny - 4 SUs 24x24 window */
6404 /* Small - 9 SUs 28x28 window */
6410 /* Diamond - 16 SUs 48x40 window */
6416 /* Large Diamond - 32 SUs 48x40 window */
6422 /* Exhaustive - 48 SUs 48x40 window */
6428 /* Diamond - 16 SUs 64x32 window */
6434 /* Large Diamond - 32 SUs 64x32 window */
6440 /* Exhaustive - 48 SUs 64x32 window */
6450 /* ref_width*ref_height = Max 64x32 one direction, Max 32x32 two directions */
6452 CLIP(ref_width, 4, 32);
6453 CLIP(ref_height, 4, 32);
6454 } else if (is_pframe) {
6455 CLIP(ref_width, 4, 64);
6456 CLIP(ref_height, 4, 32);
6459 cmd->dw0.adaptive_enable =
6460 cmd->dw37.adaptive_enable = stat_param_h264->adaptive_search;
6461 cmd->dw2.max_len_sp = len_sp;
6462 cmd->dw38.max_len_sp = 0; // HLD mandates this field to be Zero
6463 cmd->dw2.max_num_su = cmd->dw38.max_num_su = 57;
6464 cmd->dw3.src_access =
6465 cmd->dw3.ref_access = 0; // change it to (is_frame ? 0: 1) when interlace is suppoted
6467 if (generic_state->frame_type != SLICE_TYPE_I && avc_state->ftq_enable)
6468 cmd->dw3.ft_enable = stat_param_h264->ft_enable;
6470 cmd->dw3.ft_enable = 0;
6472 cmd->dw2.pic_width = generic_state->frame_width_in_mbs;
6473 cmd->dw6.pic_height = cmd->dw5.slice_mb_height = generic_state->frame_height_in_mbs;
6474 cmd->dw3.sub_mb_part_mask = stat_param_h264->sub_mb_part_mask;
6475 cmd->dw3.sub_pel_mode = stat_param_h264->sub_pel_mode;
6476 cmd->dw3.inter_sad = stat_param_h264->inter_sad;
6477 cmd->dw3.intra_sad = stat_param_h264->intra_sad;
6478 cmd->dw4.hme_enable = generic_state->hme_enabled;
6479 cmd->dw4.frame_qp = stat_param_h264->frame_qp;
6480 cmd->dw4.per_mb_qp_enable = stat_param_h264->mb_qp;
6482 cmd->dw4.multiple_mv_predictor_per_mb_enable =
6483 (generic_state->frame_type != SLICE_TYPE_I) ? 0 : stat_param_h264->mv_predictor_ctrl;
6485 cmd->dw4.disable_mv_output = (generic_state->frame_type == SLICE_TYPE_I) ? 1 : stat_param_h264->disable_mv_output;
6486 cmd->dw4.disable_mb_stats = stat_param_h264->disable_statistics_output;
6488 cmd->dw4.fwd_ref_pic_enable = (stat_param->num_past_references > 0) ? 1 : 0;
6489 cmd->dw4.bwd_ref_pic_enable = (stat_param->num_future_references > 0) ? 1 : 0;
6491 cmd->dw7.intra_part_mask = stat_param_h264->intra_part_mask;
6494 memcpy(&(cmd->dw8), gen75_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][stat_param_h264->frame_qp], 8 * sizeof(unsigned int));
6496 /* reset all except sic_fwd_trans_coeff_threshold_* from dw8 to dw15 */
6497 memset(&(cmd->dw8), 0, 6 * (sizeof(unsigned int)));
6499 /* search path tables */
6500 table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
6501 memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
6503 if (stat_param_h264->intra_part_mask == 0x07)
6504 cmd->dw31.intra_compute_type = 3;
6506 cmd->dw38.ref_threshold = 400;
6507 cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
6509 if (generic_state->frame_type == SLICE_TYPE_I) {
6510 cmd->dw0.skip_mode_enable = cmd->dw37.skip_mode_enable = 0;
6511 cmd->dw36.hme_combine_overlap = 0;
6512 } else if (generic_state->frame_type == SLICE_TYPE_P) {
6513 cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(INTEL_AVC_LEVEL_52) / 2;
6514 cmd->dw3.bme_disable_fbr = 1;
6515 cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
6516 cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
6517 cmd->dw7.non_skip_zmv_added = 1;
6518 cmd->dw7.non_skip_mode_added = 1;
6519 cmd->dw7.skip_center_mask = 1;
6520 cmd->dw32.max_vmv_r =
6521 i965_avc_get_max_mv_len(INTEL_AVC_LEVEL_52) * 4;
6522 cmd->dw36.hme_combine_overlap = 1;
6524 } else if (generic_state->frame_type == SLICE_TYPE_P) { /* B slice */
6526 cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(INTEL_AVC_LEVEL_52) / 2;
6527 cmd->dw3.search_ctrl = 0;
6528 cmd->dw3.skip_type = 1;
6529 cmd->dw5.ref_width = cmd->dw39.ref_width = ref_width;
6530 cmd->dw5.ref_height = cmd->dw39.ref_height = ref_height;
6531 cmd->dw7.skip_center_mask = 0xff;
6532 cmd->dw32.max_vmv_r =
6533 i965_avc_get_max_mv_len(INTEL_AVC_LEVEL_52) * 4;
6534 cmd->dw36.hme_combine_overlap = 1;
6537 cmd->dw40.curr_pic_surf_index = GEN9_AVC_PREPROC_CURR_Y_INDEX;
6538 cmd->dw41.hme_mv_dat_surf_index = GEN9_AVC_PREPROC_HME_MV_DATA_INDEX;
6539 cmd->dw42.mv_predictor_surf_index = GEN9_AVC_PREPROC_MV_PREDICTOR_INDEX;
6540 cmd->dw43.mb_qp_surf_index = GEN9_AVC_PREPROC_MBQP_INDEX;
6541 cmd->dw44.mv_data_out_surf_index = GEN9_AVC_PREPROC_MV_DATA_INDEX;
6542 cmd->dw45.mb_stats_out_surf_index = GEN9_AVC_PREPROC_MB_STATS_INDEX;
6543 cmd->dw46.vme_inter_prediction_surf_index = GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_0_INDEX;
6544 cmd->dw47.vme_Inter_prediction_mr_surf_index = GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_1_INDEX;
6545 cmd->dw48.ftq_lut_surf_index = GEN9_AVC_PREPROC_FTQ_LUT_INDEX;
6547 i965_gpe_context_unmap_curbe(gpe_context);
6551 gen9_avc_preenc_send_surface_preproc(VADriverContextP ctx,
6552 struct encode_state *encode_state,
6553 struct i965_gpe_context *gpe_context,
6554 struct intel_encoder_context *encoder_context,
6557 struct i965_driver_data *i965 = i965_driver_data(ctx);
6558 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6559 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6560 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6561 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6562 struct object_surface *obj_surface;
6563 struct i965_gpe_resource *gpe_resource;
6564 VASurfaceID surface_id;
6565 VAStatsStatisticsParameterH264 *stat_param_h264 = avc_state->stat_param;
6566 VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
6567 unsigned int size = 0, frame_mb_nums = 0;
6569 frame_mb_nums = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs;
6571 /* input yuv surface, Y index */
6572 obj_surface = encode_state->input_yuv_object;
6573 i965_add_2d_gpe_surface(ctx,
6578 I965_SURFACEFORMAT_R8_UNORM,
6579 GEN9_AVC_PREPROC_CURR_Y_INDEX);
6581 /* input yuv surface, UV index */
6582 i965_add_2d_gpe_surface(ctx,
6587 I965_SURFACEFORMAT_R16_UINT,
6588 GEN9_AVC_MBENC_CURR_UV_INDEX);
6591 if (generic_state->hme_enabled) {
6592 /* HME mv data buffer */
6593 gpe_resource = &avc_ctx->s4x_memv_data_buffer;
6594 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
6597 I965_SURFACEFORMAT_R8_UNORM,
6598 GEN9_AVC_PREPROC_HME_MV_DATA_INDEX);
6601 /* mv predictor buffer */
6602 if (stat_param_h264->mv_predictor_ctrl) {
6603 size = frame_mb_nums * FEI_AVC_MV_PREDICTOR_BUFFER_SIZE;
6604 gpe_resource = &avc_ctx->preproc_mv_predictor_buffer;
6605 i965_add_buffer_gpe_surface(ctx,
6611 GEN9_AVC_PREPROC_MV_PREDICTOR_INDEX);
6615 if (stat_param_h264->mb_qp) {
6616 size = frame_mb_nums * FEI_AVC_QP_BUFFER_SIZE;
6617 gpe_resource = &avc_ctx->preproc_mb_qp_buffer;
6618 i965_add_buffer_gpe_surface(ctx,
6624 GEN9_AVC_PREPROC_MBQP_INDEX);
6626 gpe_resource = &avc_ctx->res_mbbrc_const_data_buffer;
6627 size = 16 * AVC_QP_MAX * 4;
6628 i965_add_buffer_gpe_surface(ctx,
6634 GEN9_AVC_PREPROC_FTQ_LUT_INDEX);
6638 /* mv data output buffer */
6639 if (!stat_param_h264->disable_mv_output) {
6640 gpe_resource = &avc_ctx->preproc_mv_data_out_buffer;
6641 size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
6642 i965_add_buffer_gpe_surface(ctx,
6648 GEN9_AVC_PREPROC_MV_DATA_INDEX);
6651 /* statistics output buffer */
6652 if (!stat_param_h264->disable_statistics_output) {
6653 gpe_resource = &avc_ctx->preproc_stat_data_out_buffer;
6654 size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
6655 i965_add_buffer_gpe_surface(ctx,
6661 GEN9_AVC_PREPROC_MB_STATS_INDEX);
6665 obj_surface = encode_state->input_yuv_object;
6666 i965_add_2d_gpe_surface(ctx,
6671 I965_SURFACEFORMAT_R8_UNORM,
6672 GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_0_INDEX);
6674 /* vme cur pic y (repeating based on required BTI order for mediakerel)*/
6675 obj_surface = encode_state->input_yuv_object;
6676 i965_add_2d_gpe_surface(ctx,
6681 I965_SURFACEFORMAT_R8_UNORM,
6682 GEN9_AVC_PREPROC_VME_CURR_PIC_IDX_1_INDEX);
6684 /* vme forward ref */
6685 /* Only supports one past ref */
6686 if (stat_param->num_past_references > 0) {
6687 surface_id = stat_param->past_references[0].picture_id;
6688 assert(surface_id != VA_INVALID_ID);
6689 obj_surface = SURFACE(surface_id);
6692 i965_add_adv_gpe_surface(ctx, gpe_context,
6694 GEN9_AVC_PREPROC_VME_FWD_PIC_IDX0_INDEX);
6698 /* vme future ref */
6699 /* Only supports one future ref */
6700 if (stat_param->num_future_references > 0) {
6701 surface_id = stat_param->future_references[0].picture_id;
6702 assert(surface_id != VA_INVALID_ID);
6703 obj_surface = SURFACE(surface_id);
6706 i965_add_adv_gpe_surface(ctx, gpe_context,
6708 GEN9_AVC_PREPROC_VME_BWD_PIC_IDX0_0_INDEX);
6710 surface_id = stat_param->future_references[0].picture_id;
6711 assert(surface_id != VA_INVALID_ID);
6712 obj_surface = SURFACE(surface_id);
6715 i965_add_adv_gpe_surface(ctx, gpe_context,
6717 GEN9_AVC_PREPROC_VME_BWD_PIC_IDX0_1_INDEX);
6725 gen9_avc_preenc_kernel_preproc(VADriverContextP ctx,
6726 struct encode_state *encode_state,
6727 struct intel_encoder_context *encoder_context)
6729 struct i965_driver_data *i965 = i965_driver_data(ctx);
6730 struct i965_gpe_table *gpe = &i965->gpe_table;
6731 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6732 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
6733 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
6734 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6735 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6736 VAStatsStatisticsParameterH264 * stat_param_h264 = avc_state->stat_param;
6737 struct i965_gpe_context *gpe_context;
6738 struct gpe_media_object_walker_parameter media_object_walker_param;
6739 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
6740 int media_function = INTEL_MEDIA_STATE_PREPROC;
6741 struct i965_gpe_resource *gpe_resource = NULL;
6742 unsigned int * data = NULL;
6743 unsigned int ftq_lut_table_size = 16 * 52; /* 16 DW per QP for each qp*/
6745 gpe_context = &(avc_ctx->context_preproc.gpe_contexts);
6746 gpe->context_init(ctx, gpe_context);
6747 gpe->reset_binding_table(ctx, gpe_context);
6750 generic_ctx->pfn_set_curbe_preproc(ctx, encode_state, gpe_context, encoder_context, NULL);
6753 generic_ctx->pfn_send_preproc_surface(ctx, encode_state, gpe_context, encoder_context, NULL);
6755 gpe->setup_interface_data(ctx, gpe_context);
6757 /* Set up FtqLut Buffer if there is QP change within a frame */
6758 if (stat_param_h264->mb_qp) {
6759 gpe_resource = &(avc_ctx->res_mbbrc_const_data_buffer);
6760 assert(gpe_resource);
6761 data = i965_map_gpe_resource(gpe_resource);
6763 memcpy(data, gen9_avc_preenc_preproc_ftq_lut, ftq_lut_table_size * sizeof(unsigned int));
6766 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
6767 kernel_walker_param.resolution_x = generic_state->frame_width_in_mbs ;
6768 kernel_walker_param.resolution_y = generic_state->frame_height_in_mbs ;
6769 kernel_walker_param.no_dependency = 1;
6771 i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
6773 gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
6776 &media_object_walker_param);
6778 return VA_STATUS_SUCCESS;
6783 gen8_avc_set_curbe_mbenc(VADriverContextP ctx,
6784 struct encode_state *encode_state,
6785 struct i965_gpe_context *gpe_context,
6786 struct intel_encoder_context *encoder_context,
6789 struct i965_driver_data *i965 = i965_driver_data(ctx);
6790 gen8_avc_mbenc_curbe_data *cmd;
6791 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
6792 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
6793 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
6795 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
6796 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
6797 VASurfaceID surface_id;
6798 struct object_surface *obj_surface;
6800 struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
6801 unsigned char qp = 0;
6802 unsigned char me_method = 0;
6803 unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
6804 unsigned int table_idx = 0;
6805 unsigned int curbe_size = 0;
6807 unsigned int preset = generic_state->preset;
6808 if (IS_GEN8(i965->intel.device_info)) {
6809 cmd = (gen8_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
6812 curbe_size = sizeof(gen8_avc_mbenc_curbe_data);
6813 memset(cmd, 0, curbe_size);
6815 if (mbenc_i_frame_dist_in_use) {
6816 memcpy(cmd, gen8_avc_mbenc_curbe_i_frame_dist_init_data, curbe_size);
6818 switch (generic_state->frame_type) {
6820 memcpy(cmd, gen8_avc_mbenc_curbe_normal_i_frame_init_data, curbe_size);
6823 memcpy(cmd, gen8_avc_mbenc_curbe_normal_p_frame_init_data, curbe_size);
6826 memcpy(cmd, gen8_avc_mbenc_curbe_normal_b_frame_init_data, curbe_size);
6838 me_method = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_b_me_method[preset] : gen9_avc_p_me_method[preset];
6839 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
6841 cmd->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
6842 cmd->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
6843 cmd->dw0.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
6844 cmd->dw37.t8x8_flag_for_inter_enable = avc_state->transform_8x8_mode_enable;
6846 cmd->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
6847 cmd->dw38.max_len_sp = 0;
6849 cmd->dw3.src_access = 0;
6850 cmd->dw3.ref_access = 0;
6852 if (avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I)) {
6853 //disable ftq_override by now.
6854 if (avc_state->ftq_override) {
6855 cmd->dw3.ftq_enable = avc_state->ftq_enable;
6858 if (generic_state->frame_type == SLICE_TYPE_P) {
6859 cmd->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
6862 cmd->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
6866 cmd->dw3.ftq_enable = 0;
6869 if (avc_state->disable_sub_mb_partion)
6870 cmd->dw3.sub_mb_part_mask = 0x7;
6872 if (mbenc_i_frame_dist_in_use) {
6873 cmd->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
6874 cmd->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
6875 cmd->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1) / 4;
6876 cmd->dw6.batch_buffer_end = 0;
6877 cmd->dw31.intra_compute_type = 1;
6879 cmd->dw2.pitch_width = generic_state->frame_width_in_mbs;
6880 cmd->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
6881 cmd->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice) ? generic_state->frame_height_in_mbs : avc_state->slice_height;
6884 memcpy(&(cmd->dw8), gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp], 8 * sizeof(unsigned int));
6885 if ((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable) {
6886 } else if (avc_state->skip_bias_adjustment_enable) {
6887 /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
6888 // No need to check for P picture as the flag is only enabled for P picture */
6889 cmd->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
6892 table_idx = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 0;
6893 memcpy(&(cmd->dw16), table_enc_search_path[table_idx][me_method], 16 * sizeof(unsigned int));
6895 cmd->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
6896 cmd->dw4.field_parity_flag = 0;//bottom field
6897 cmd->dw4.enable_cur_fld_idr = 0;//field realted
6898 cmd->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
6899 cmd->dw4.hme_enable = generic_state->hme_enabled;
6900 cmd->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
6901 cmd->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
6903 cmd->dw7.intra_part_mask = avc_state->transform_8x8_mode_enable ? 0 : 0x02;
6904 cmd->dw7.src_field_polarity = 0;//field related
6906 /*ftq_skip_threshold_lut set,dw14 /15*/
6908 /*r5 disable NonFTQSkipThresholdLUT*/
6909 if (generic_state->frame_type == SLICE_TYPE_P) {
6910 cmd->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
6911 } else if (generic_state->frame_type == SLICE_TYPE_B) {
6912 cmd->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][avc_state->transform_8x8_mode_enable][qp];
6915 cmd->dw13.qp_prime_y = qp;
6916 cmd->dw13.qp_prime_cb = qp;
6917 cmd->dw13.qp_prime_cr = qp;
6918 cmd->dw13.target_size_in_word = 0xff;//hardcode for brc disable
6920 if ((generic_state->frame_type != SLICE_TYPE_I) && avc_state->multi_pre_enable) {
6921 switch (gen9_avc_multi_pred[preset]) {
6923 cmd->dw32.mult_pred_l0_disable = 128;
6924 cmd->dw32.mult_pred_l1_disable = 128;
6927 cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P) ? 1 : 128;
6928 cmd->dw32.mult_pred_l1_disable = 128;
6931 cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
6932 cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
6935 cmd->dw32.mult_pred_l0_disable = 1;
6936 cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B) ? 1 : 128;
6941 cmd->dw32.mult_pred_l0_disable = 128;
6942 cmd->dw32.mult_pred_l1_disable = 128;
6945 if (generic_state->frame_type == SLICE_TYPE_B) {
6946 cmd->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
6947 cmd->dw34.list1_ref_id0_frm_field_parity = 0;
6948 cmd->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
6951 cmd->dw34.b_original_bff = 0; //frame only
6952 cmd->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
6953 cmd->dw34.roi_enable_flag = curbe_param->roi_enabled;
6954 cmd->dw34.mad_enable_falg = avc_state->mad_enable;
6955 cmd->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
6956 cmd->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
6957 cmd->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
6959 if (cmd->dw34.force_non_skip_check) {
6960 cmd->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
6963 cmd->dw36.check_all_fractional_enable = avc_state->caf_enable;
6964 cmd->dw38.ref_threshold = 400;
6965 cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B) ? gen9_avc_hme_b_combine_len[preset] : gen9_avc_hme_combine_len[preset];
6966 cmd->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable) ? 0 : 2;
6968 if (mbenc_i_frame_dist_in_use) {
6969 cmd->dw13.qp_prime_y = 0;
6970 cmd->dw13.qp_prime_cb = 0;
6971 cmd->dw13.qp_prime_cr = 0;
6972 cmd->dw33.intra_16x16_nondc_penalty = 0;
6973 cmd->dw33.intra_8x8_nondc_penalty = 0;
6974 cmd->dw33.intra_4x4_nondc_penalty = 0;
6976 if (cmd->dw4.use_actual_ref_qp_value) {
6977 cmd->dw44.actual_qp_value_for_ref_id0_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 0);
6978 cmd->dw44.actual_qp_value_for_ref_id1_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 1);
6979 cmd->dw44.actual_qp_value_for_ref_id2_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 2);
6980 cmd->dw44.actual_qp_value_for_ref_id3_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 3);
6981 cmd->dw45.actual_qp_value_for_ref_id4_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 4);
6982 cmd->dw45.actual_qp_value_for_ref_id5_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 5);
6983 cmd->dw45.actual_qp_value_for_ref_id6_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 6);
6984 cmd->dw45.actual_qp_value_for_ref_id7_list0 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 0, 7);
6985 cmd->dw46.actual_qp_value_for_ref_id0_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 0);
6986 cmd->dw46.actual_qp_value_for_ref_id1_list1 = gen9_avc_get_qp_from_ref_list(ctx, slice_param, 1, 1);
6989 table_idx = slice_type_kernel[generic_state->frame_type];
6990 cmd->dw46.ref_cost = gen8_avc_ref_cost[table_idx][qp];
6991 if (generic_state->frame_type == SLICE_TYPE_I) {
6992 cmd->dw0.skip_mode_enable = 0;
6993 cmd->dw37.skip_mode_enable = 0;
6994 cmd->dw36.hme_combine_overlap = 0;
6995 cmd->dw47.intra_cost_sf = 16;
6996 cmd->dw34.enable_direct_bias_adjustment = 0;
6997 cmd->dw34.enable_global_motion_bias_adjustment = 0;
6999 } else if (generic_state->frame_type == SLICE_TYPE_P) {
7000 cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
7001 cmd->dw3.bme_disable_fbr = 1;
7002 cmd->dw5.ref_width = gen9_avc_search_x[preset];
7003 cmd->dw5.ref_height = gen9_avc_search_y[preset];
7004 cmd->dw7.non_skip_zmv_added = 1;
7005 cmd->dw7.non_skip_mode_added = 1;
7006 cmd->dw7.skip_center_mask = 1;
7007 cmd->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
7008 cmd->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame onlys
7009 cmd->dw36.hme_combine_overlap = 1;
7010 cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
7011 cmd->dw39.ref_width = gen9_avc_search_x[preset];
7012 cmd->dw39.ref_height = gen9_avc_search_y[preset];
7013 cmd->dw34.enable_direct_bias_adjustment = 0;
7014 cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
7015 if (avc_state->global_motion_bias_adjustment_enable)
7016 cmd->dw58.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
7018 cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc) / 2;
7019 cmd->dw1.bi_weight = avc_state->bi_weight;
7020 cmd->dw3.search_ctrl = 7;
7021 cmd->dw3.skip_type = 1;
7022 cmd->dw5.ref_width = gen9_avc_b_search_x[preset];
7023 cmd->dw5.ref_height = gen9_avc_b_search_y[preset];
7024 cmd->dw7.skip_center_mask = 0xff;
7025 cmd->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable) ? gen9_avc_adaptive_intra_scaling_factor[qp] : gen9_avc_intra_scaling_factor[qp];
7026 cmd->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
7027 cmd->dw36.hme_combine_overlap = 1;
7028 surface_id = slice_param->RefPicList1[0].picture_id;
7029 obj_surface = SURFACE(surface_id);
7031 WARN_ONCE("Invalid backward reference frame\n");
7034 cmd->dw36.is_fwd_frame_short_term_ref = !!(slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
7035 cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l0_active_minus1 : 0;
7036 cmd->dw36.num_ref_idx_l1_minus_one = (avc_state->multi_pre_enable) ? slice_param->num_ref_idx_l1_active_minus1 : 0;
7037 cmd->dw39.ref_width = gen9_avc_b_search_x[preset];
7038 cmd->dw39.ref_height = gen9_avc_b_search_y[preset];
7039 cmd->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
7040 cmd->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
7041 cmd->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
7042 cmd->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
7043 cmd->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
7044 cmd->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
7045 cmd->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
7046 cmd->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
7047 cmd->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
7048 if (cmd->dw34.enable_direct_bias_adjustment) {
7049 cmd->dw7.non_skip_zmv_added = 1;
7050 cmd->dw7.non_skip_mode_added = 1;
7053 cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
7054 if (avc_state->global_motion_bias_adjustment_enable)
7055 cmd->dw58.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
7057 avc_state->block_based_skip_enable = cmd->dw3.block_based_skip_enable;
7059 if (avc_state->rolling_intra_refresh_enable) {
7060 /*by now disable it*/
7061 if (generic_state->brc_enabled) {
7062 cmd->dw4.enable_intra_refresh = false;
7063 cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
7064 cmd->dw48.widi_intra_refresh_mbx = 0;
7065 cmd->dw58.widi_intra_refresh_mby = 0;
7067 cmd->dw4.enable_intra_refresh = true;
7068 cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
7070 cmd->dw32.mult_pred_l0_disable = 128;
7071 /* Pass the same IntraRefreshUnit to the kernel w/o the adjustment by -1, so as to have an overlap of one MB row or column of Intra macroblocks
7072 across one P frame to another P frame, as needed by the RollingI algo */
7073 cmd->dw48.widi_intra_refresh_mbx = 0;
7074 cmd->dw48.widi_intra_refresh_unit_in_mb_minus1 = 0;
7075 cmd->dw48.widi_intra_refresh_qp_delta = 0;
7078 cmd->dw34.widi_intra_refresh_en = 0;
7081 /*roi set disable by now. 49-56*/
7082 if (curbe_param->roi_enabled) {
7083 cmd->dw49.roi_1_x_left = generic_state->roi[0].left;
7084 cmd->dw49.roi_1_y_top = generic_state->roi[0].top;
7085 cmd->dw50.roi_1_x_right = generic_state->roi[0].right;
7086 cmd->dw50.roi_1_y_bottom = generic_state->roi[0].bottom;
7088 cmd->dw51.roi_2_x_left = generic_state->roi[1].left;
7089 cmd->dw51.roi_2_y_top = generic_state->roi[1].top;
7090 cmd->dw52.roi_2_x_right = generic_state->roi[1].right;
7091 cmd->dw52.roi_2_y_bottom = generic_state->roi[1].bottom;
7093 cmd->dw53.roi_3_x_left = generic_state->roi[2].left;
7094 cmd->dw53.roi_3_y_top = generic_state->roi[2].top;
7095 cmd->dw54.roi_3_x_right = generic_state->roi[2].right;
7096 cmd->dw54.roi_3_y_bottom = generic_state->roi[2].bottom;
7098 cmd->dw55.roi_4_x_left = generic_state->roi[3].left;
7099 cmd->dw55.roi_4_y_top = generic_state->roi[3].top;
7100 cmd->dw56.roi_4_x_right = generic_state->roi[3].right;
7101 cmd->dw56.roi_4_y_bottom = generic_state->roi[3].bottom;
7103 cmd->dw36.enable_cabac_work_around = 0;
7105 if (!generic_state->brc_enabled) {
7107 tmp = generic_state->roi[0].value;
7108 CLIP(tmp, -qp, AVC_QP_MAX - qp);
7109 cmd->dw57.roi_1_dqp_prime_y = tmp;
7110 tmp = generic_state->roi[1].value;
7111 CLIP(tmp, -qp, AVC_QP_MAX - qp);
7112 cmd->dw57.roi_2_dqp_prime_y = tmp;
7113 tmp = generic_state->roi[2].value;
7114 CLIP(tmp, -qp, AVC_QP_MAX - qp);
7115 cmd->dw57.roi_3_dqp_prime_y = tmp;
7116 tmp = generic_state->roi[3].value;
7117 CLIP(tmp, -qp, AVC_QP_MAX - qp);
7118 cmd->dw57.roi_4_dqp_prime_y = tmp;
7120 cmd->dw34.roi_enable_flag = 0;
7124 cmd->dw65.mb_data_surf_index = GEN8_AVC_MBENC_MFC_AVC_PAK_OBJ_CM;
7125 cmd->dw66.mv_data_surf_index = GEN8_AVC_MBENC_IND_MV_DATA_CM;
7126 cmd->dw67.i_dist_surf_index = GEN8_AVC_MBENC_BRC_DISTORTION_CM;
7127 cmd->dw68.src_y_surf_index = GEN8_AVC_MBENC_CURR_Y_CM;
7128 cmd->dw69.mb_specific_data_surf_index = GEN8_AVC_MBENC_MB_SPECIFIC_DATA_CM;
7129 cmd->dw70.aux_vme_out_surf_index = GEN8_AVC_MBENC_AUX_VME_OUT_CM;
7130 cmd->dw71.curr_ref_pic_sel_surf_index = GEN8_AVC_MBENC_REFPICSELECT_L0_CM;
7131 cmd->dw72.hme_mv_pred_fwd_bwd_surf_index = GEN8_AVC_MBENC_MV_DATA_FROM_ME_CM;
7132 cmd->dw73.hme_dist_surf_index = GEN8_AVC_MBENC_4xME_DISTORTION_CM;
7133 cmd->dw74.slice_map_surf_index = GEN8_AVC_MBENC_SLICEMAP_DATA_CM;
7134 cmd->dw75.fwd_frm_mb_data_surf_index = GEN8_AVC_MBENC_FWD_MB_DATA_CM;
7135 cmd->dw76.fwd_frm_mv_surf_index = GEN8_AVC_MBENC_FWD_MV_DATA_CM;
7136 cmd->dw77.mb_qp_buffer = GEN8_AVC_MBENC_MBQP_CM;
7137 cmd->dw78.mb_brc_lut = GEN8_AVC_MBENC_MBBRC_CONST_DATA_CM;
7138 cmd->dw79.vme_inter_prediction_surf_index = GEN8_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_CM;
7139 cmd->dw80.vme_inter_prediction_mr_surf_index = GEN8_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_CM;
7140 cmd->dw81.flatness_chk_surf_index = GEN8_AVC_MBENC_FLATNESS_CHECK_CM;
7141 cmd->dw82.mad_surf_index = GEN8_AVC_MBENC_MAD_DATA_CM;
7142 cmd->dw83.force_non_skip_mb_map_surface = GEN8_AVC_MBENC_FORCE_NONSKIP_MB_MAP_CM;
7143 cmd->dw84.widi_wa_surf_index = GEN8_AVC_MBENC_WIDI_WA_DATA_CM;
7144 cmd->dw85.brc_curbe_surf_index = GEN8_AVC_MBENC_BRC_CURBE_DATA_CM;
7145 cmd->dw86.static_detection_cost_table_index = GEN8_AVC_MBENC_STATIC_FRAME_DETECTION_OUTPUT_CM;
7147 i965_gpe_context_unmap_curbe(gpe_context);
7153 gen8_avc_set_curbe_scaling4x(VADriverContextP ctx,
7154 struct encode_state *encode_state,
7155 struct i965_gpe_context *gpe_context,
7156 struct intel_encoder_context *encoder_context,
7159 gen8_avc_scaling4x_curbe_data *curbe_cmd;
7160 struct scaling_param *surface_param = (struct scaling_param *)param;
7162 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
7167 memset(curbe_cmd, 0, sizeof(gen9_avc_scaling4x_curbe_data));
7169 curbe_cmd->dw0.input_picture_width = surface_param->input_frame_width;
7170 curbe_cmd->dw0.input_picture_height = surface_param->input_frame_height;
7172 curbe_cmd->dw1.input_y_bti = GEN8_SCALING_FRAME_SRC_Y_CM;
7173 curbe_cmd->dw2.output_y_bti = GEN8_SCALING_FRAME_DST_Y_CM;
7175 curbe_cmd->dw5.flatness_threshold = 0;
7176 if (surface_param->enable_mb_flatness_check) {
7177 curbe_cmd->dw5.flatness_threshold = 128;
7178 curbe_cmd->dw8.flatness_output_bti_top_field = 4;
7181 curbe_cmd->dw6.enable_mb_flatness_check = surface_param->enable_mb_flatness_check;
7182 curbe_cmd->dw6.enable_mb_variance_output = surface_param->enable_mb_variance_output;
7183 curbe_cmd->dw6.enable_mb_pixel_average_output = surface_param->enable_mb_pixel_average_output;
7185 if (curbe_cmd->dw6.enable_mb_variance_output ||
7186 curbe_cmd->dw6.enable_mb_pixel_average_output) {
7187 curbe_cmd->dw10.mbv_proc_states_bti_top_field = GEN8_SCALING_FIELD_TOP_MBVPROCSTATS_DST_CM;
7188 curbe_cmd->dw11.mbv_proc_states_bti_bottom_field = GEN8_SCALING_FIELD_BOT_MBVPROCSTATS_DST_CM;
7191 i965_gpe_context_unmap_curbe(gpe_context);
7196 gen8_avc_set_curbe_me(VADriverContextP ctx,
7197 struct encode_state *encode_state,
7198 struct i965_gpe_context *gpe_context,
7199 struct intel_encoder_context *encoder_context,
7202 gen8_avc_me_curbe_data *curbe_cmd;
7203 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7204 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7205 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
7207 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
7209 struct me_param * curbe_param = (struct me_param *)param ;
7210 unsigned char use_mv_from_prev_step = 0;
7211 unsigned char write_distortions = 0;
7212 unsigned char qp_prime_y = 0;
7213 unsigned char me_method = gen9_avc_p_me_method[generic_state->preset];
7214 unsigned char seach_table_idx = 0;
7215 unsigned char mv_shift_factor = 0, prev_mv_read_pos_factor = 0;
7216 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
7217 unsigned int scale_factor = 0;
7219 qp_prime_y = avc_state->pic_param->pic_init_qp + slice_param->slice_qp_delta;
7220 switch (curbe_param->hme_type) {
7221 case INTEL_ENC_HME_4x : {
7222 use_mv_from_prev_step = (generic_state->b16xme_enabled) ? 1 : 0;
7223 write_distortions = 1;
7224 mv_shift_factor = 2;
7226 prev_mv_read_pos_factor = 0;
7229 case INTEL_ENC_HME_16x : {
7230 use_mv_from_prev_step = (generic_state->b32xme_enabled) ? 1 : 0;
7231 write_distortions = 0;
7232 mv_shift_factor = 2;
7234 prev_mv_read_pos_factor = 1;
7237 case INTEL_ENC_HME_32x : {
7238 use_mv_from_prev_step = 0;
7239 write_distortions = 0;
7240 mv_shift_factor = 1;
7242 prev_mv_read_pos_factor = 0;
7249 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
7254 downscaled_width_in_mb = ALIGN(generic_state->frame_width_in_pixel / scale_factor, 16) / 16;
7255 downscaled_height_in_mb = ALIGN(generic_state->frame_height_in_pixel / scale_factor, 16) / 16;
7257 memcpy(curbe_cmd, gen8_avc_me_curbe_init_data, sizeof(gen8_avc_me_curbe_data));
7259 curbe_cmd->dw3.sub_pel_mode = 3;
7260 if (avc_state->field_scaling_output_interleaved) {
7261 /*frame set to zero,field specified*/
7262 curbe_cmd->dw3.src_access = 0;
7263 curbe_cmd->dw3.ref_access = 0;
7264 curbe_cmd->dw7.src_field_polarity = 0;
7266 curbe_cmd->dw4.picture_height_minus1 = downscaled_height_in_mb - 1;
7267 curbe_cmd->dw4.picture_width = downscaled_width_in_mb;
7268 curbe_cmd->dw5.qp_prime_y = qp_prime_y;
7270 curbe_cmd->dw6.use_mv_from_prev_step = use_mv_from_prev_step;
7271 curbe_cmd->dw6.write_distortions = write_distortions;
7272 curbe_cmd->dw6.super_combine_dist = gen9_avc_super_combine_dist[generic_state->preset];
7273 curbe_cmd->dw6.max_vmvr = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
7275 if (generic_state->frame_type == SLICE_TYPE_B) {
7276 curbe_cmd->dw1.bi_weight = 32;
7277 curbe_cmd->dw13.num_ref_idx_l1_minus1 = slice_param->num_ref_idx_l1_active_minus1;
7278 me_method = gen9_avc_b_me_method[generic_state->preset];
7279 seach_table_idx = 1;
7282 if (generic_state->frame_type == SLICE_TYPE_P ||
7283 generic_state->frame_type == SLICE_TYPE_B)
7284 curbe_cmd->dw13.num_ref_idx_l0_minus1 = slice_param->num_ref_idx_l0_active_minus1;
7286 curbe_cmd->dw15.prev_mv_read_pos_factor = prev_mv_read_pos_factor;
7287 curbe_cmd->dw15.mv_shift_factor = mv_shift_factor;
7289 memcpy(&curbe_cmd->dw16, table_enc_search_path[seach_table_idx][me_method], 14 * sizeof(int));
7291 curbe_cmd->dw32._4x_memv_output_data_surf_index = GEN8_AVC_ME_MV_DATA_SURFACE_CM;
7292 curbe_cmd->dw33._16x_32x_memv_input_data_surf_index = (curbe_param->hme_type == INTEL_ENC_HME_32x) ? GEN8_AVC_32xME_MV_DATA_SURFACE_CM : GEN8_AVC_16xME_MV_DATA_SURFACE_CM ;
7293 curbe_cmd->dw34._4x_me_output_dist_surf_index = GEN8_AVC_ME_DISTORTION_SURFACE_CM;
7294 curbe_cmd->dw35._4x_me_output_brc_dist_surf_index = GEN8_AVC_ME_BRC_DISTORTION_CM;
7295 curbe_cmd->dw36.vme_fwd_inter_pred_surf_index = GEN8_AVC_ME_CURR_FOR_FWD_REF_CM;
7296 curbe_cmd->dw37.vme_bdw_inter_pred_surf_index = GEN8_AVC_ME_CURR_FOR_BWD_REF_CM;
7297 curbe_cmd->dw38.reserved = 0;
7299 i965_gpe_context_unmap_curbe(gpe_context);
7304 gen8_avc_set_curbe_brc_frame_update(VADriverContextP ctx,
7305 struct encode_state *encode_state,
7306 struct i965_gpe_context *gpe_context,
7307 struct intel_encoder_context *encoder_context,
7310 gen8_avc_frame_brc_update_curbe_data *cmd;
7311 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7312 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7313 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
7314 struct object_surface *obj_surface;
7315 struct gen9_surface_avc *avc_priv_surface;
7316 struct avc_param common_param;
7318 obj_surface = encode_state->reconstructed_object;
7320 if (!obj_surface || !obj_surface->private_data)
7322 avc_priv_surface = obj_surface->private_data;
7324 cmd = i965_gpe_context_map_curbe(gpe_context);
7329 memcpy(cmd, &gen8_avc_frame_brc_update_curbe_init_data, sizeof(gen8_avc_frame_brc_update_curbe_data));
7331 cmd->dw5.target_size_flag = 0 ;
7332 if (generic_state->brc_init_current_target_buf_full_in_bits > (double)generic_state->brc_init_reset_buf_size_in_bits) {
7334 generic_state->brc_init_current_target_buf_full_in_bits -= (double)generic_state->brc_init_reset_buf_size_in_bits;
7335 cmd->dw5.target_size_flag = 1 ;
7338 if (generic_state->skip_frame_enbale) {
7339 cmd->dw6.num_skip_frames = generic_state->num_skip_frames ;
7340 cmd->dw7.size_skip_frames = generic_state->size_skip_frames;
7342 generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame * generic_state->num_skip_frames;
7345 cmd->dw0.target_size = (unsigned int)generic_state->brc_init_current_target_buf_full_in_bits ;
7346 cmd->dw1.frame_number = generic_state->seq_frame_number ;
7347 cmd->dw2.size_of_pic_headers = generic_state->herder_bytes_inserted << 3 ;
7348 cmd->dw5.cur_frame_type = generic_state->frame_type ;
7349 cmd->dw5.brc_flag = 0 ;
7350 cmd->dw5.brc_flag |= (avc_priv_surface->is_as_ref) ? INTEL_ENCODE_BRCUPDATE_IS_REFERENCE : 0 ;
7352 if (avc_state->multi_pre_enable) {
7353 cmd->dw5.brc_flag |= INTEL_ENCODE_BRCUPDATE_IS_ACTUALQP ;
7354 cmd->dw14.qp_index_of_cur_pic = avc_priv_surface->frame_idx ; //do not know this. use -1
7357 cmd->dw5.max_num_paks = generic_state->num_pak_passes ;
7358 if (avc_state->min_max_qp_enable) {
7359 switch (generic_state->frame_type) {
7361 cmd->dw6.minimum_qp = avc_state->min_qp_i ;
7362 cmd->dw6.maximum_qp = avc_state->max_qp_i ;
7365 cmd->dw6.minimum_qp = avc_state->min_qp_p ;
7366 cmd->dw6.maximum_qp = avc_state->max_qp_p ;
7369 cmd->dw6.minimum_qp = avc_state->min_qp_b ;
7370 cmd->dw6.maximum_qp = avc_state->max_qp_b ;
7374 cmd->dw6.minimum_qp = 0 ;
7375 cmd->dw6.maximum_qp = 0 ;
7378 generic_state->brc_init_current_target_buf_full_in_bits += generic_state->brc_init_reset_input_bits_per_frame;
7380 if (generic_state->internal_rate_mode == INTEL_BRC_AVBR) {
7381 cmd->dw3.start_gadj_frame0 = (unsigned int)((10 * generic_state->avbr_convergence) / (double)150);
7382 cmd->dw3.start_gadj_frame1 = (unsigned int)((50 * generic_state->avbr_convergence) / (double)150);
7383 cmd->dw4.start_gadj_frame2 = (unsigned int)((100 * generic_state->avbr_convergence) / (double)150);
7384 cmd->dw4.start_gadj_frame3 = (unsigned int)((150 * generic_state->avbr_convergence) / (double)150);
7385 cmd->dw11.g_rate_ratio_threshold_0 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 40)));
7386 cmd->dw11.g_rate_ratio_threshold_1 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 75)));
7387 cmd->dw12.g_rate_ratio_threshold_2 = (unsigned int)((100 - (generic_state->avbr_curracy / (double)30) * (100 - 97)));
7388 cmd->dw12.g_rate_ratio_threshold_3 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (103 - 100)));
7389 cmd->dw12.g_rate_ratio_threshold_4 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (125 - 100)));
7390 cmd->dw12.g_rate_ratio_threshold_5 = (unsigned int)((100 + (generic_state->avbr_curracy / (double)30) * (160 - 100)));
7394 memset(&common_param, 0, sizeof(common_param));
7395 common_param.frame_width_in_pixel = generic_state->frame_width_in_pixel;
7396 common_param.frame_height_in_pixel = generic_state->frame_height_in_pixel;
7397 common_param.frame_width_in_mbs = generic_state->frame_width_in_mbs;
7398 common_param.frame_height_in_mbs = generic_state->frame_height_in_mbs;
7399 common_param.frames_per_100s = generic_state->frames_per_100s;
7400 common_param.vbv_buffer_size_in_bit = generic_state->vbv_buffer_size_in_bit;
7401 common_param.target_bit_rate = generic_state->target_bit_rate;
7403 i965_gpe_context_unmap_curbe(gpe_context);
7409 kernel related function:init/destroy etc
7412 gen9_avc_kernel_init_scaling(VADriverContextP ctx,
7413 struct generic_encoder_context *generic_context,
7414 struct gen_avc_scaling_context *kernel_context,
7417 struct i965_driver_data *i965 = i965_driver_data(ctx);
7418 struct i965_gpe_table *gpe = &i965->gpe_table;
7419 struct i965_gpe_context *gpe_context = NULL;
7420 struct encoder_kernel_parameter kernel_param ;
7421 struct encoder_scoreboard_parameter scoreboard_param;
7422 struct i965_kernel common_kernel;
7424 memset(&kernel_param, 0, sizeof(kernel_param));
7425 if (IS_SKL(i965->intel.device_info) ||
7426 IS_BXT(i965->intel.device_info)) {
7427 if (!preenc_enabled) {
7428 kernel_param.curbe_size = sizeof(gen9_avc_scaling4x_curbe_data);
7429 kernel_param.inline_data_size = sizeof(gen9_avc_scaling4x_curbe_data);
7431 /* Skylake PreEnc using GEN95/gen10 DS kernel */
7432 kernel_param.curbe_size = sizeof(gen95_avc_scaling4x_curbe_data);
7433 kernel_param.inline_data_size = sizeof(gen95_avc_scaling4x_curbe_data);
7435 } else if (IS_KBL(i965->intel.device_info) ||
7436 IS_GEN10(i965->intel.device_info) ||
7437 IS_GLK(i965->intel.device_info)) {
7438 kernel_param.curbe_size = sizeof(gen95_avc_scaling4x_curbe_data);
7439 kernel_param.inline_data_size = sizeof(gen95_avc_scaling4x_curbe_data);
7440 } else if (IS_GEN8(i965->intel.device_info)) {
7441 kernel_param.curbe_size = sizeof(gen8_avc_scaling4x_curbe_data);
7442 kernel_param.inline_data_size = sizeof(gen8_avc_scaling4x_curbe_data);
7446 /* 4x scaling kernel*/
7447 kernel_param.sampler_size = 0;
7449 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7450 scoreboard_param.mask = 0xFF;
7451 scoreboard_param.enable = generic_context->use_hw_scoreboard;
7452 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7453 scoreboard_param.walkpat_flag = 0;
7455 gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_4X_IDX];
7456 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7457 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7459 memset(&common_kernel, 0, sizeof(common_kernel));
7461 generic_context->get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7462 generic_context->enc_kernel_size,
7463 INTEL_GENERIC_ENC_SCALING4X,
7467 gpe->load_kernels(ctx,
7472 /* PreEnc using only the 4X scaling */
7476 /*2x scaling kernel*/
7477 kernel_param.curbe_size = sizeof(gen9_avc_scaling2x_curbe_data);
7478 kernel_param.inline_data_size = 0;
7479 kernel_param.sampler_size = 0;
7481 gpe_context = &kernel_context->gpe_contexts[GEN9_AVC_KERNEL_SCALING_2X_IDX];
7482 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7483 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7485 memset(&common_kernel, 0, sizeof(common_kernel));
7487 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7488 generic_context->enc_kernel_size,
7489 INTEL_GENERIC_ENC_SCALING2X,
7493 gpe->load_kernels(ctx,
7501 gen9_avc_kernel_init_me(VADriverContextP ctx,
7502 struct generic_encoder_context *generic_context,
7503 struct gen_avc_me_context *kernel_context,
7506 struct i965_driver_data *i965 = i965_driver_data(ctx);
7507 struct i965_gpe_table *gpe = &i965->gpe_table;
7508 struct i965_gpe_context *gpe_context = NULL;
7509 struct encoder_kernel_parameter kernel_param ;
7510 struct encoder_scoreboard_parameter scoreboard_param;
7511 struct i965_kernel common_kernel;
7513 unsigned int curbe_size = 0;
7515 if (IS_GEN8(i965->intel.device_info)) {
7516 curbe_size = sizeof(gen8_avc_me_curbe_data);
7518 if (!preenc_enabled)
7519 curbe_size = sizeof(gen9_avc_me_curbe_data);
7521 curbe_size = sizeof(gen9_avc_fei_me_curbe_data);
7524 kernel_param.curbe_size = curbe_size;
7525 kernel_param.inline_data_size = 0;
7526 kernel_param.sampler_size = 0;
7528 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7529 scoreboard_param.mask = 0xFF;
7530 scoreboard_param.enable = generic_context->use_hw_scoreboard;
7531 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7532 scoreboard_param.walkpat_flag = 0;
7534 /* There is two hme kernel, one for P and other for B frame */
7535 for (i = 0; i < 2; i++) {
7536 gpe_context = &kernel_context->gpe_contexts[i];
7537 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7538 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7540 memset(&common_kernel, 0, sizeof(common_kernel));
7542 generic_context->get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7543 generic_context->enc_kernel_size,
7544 INTEL_GENERIC_ENC_ME,
7548 gpe->load_kernels(ctx,
7557 gen9_avc_kernel_init_preproc(VADriverContextP ctx,
7558 struct generic_encoder_context *generic_context,
7559 struct gen_avc_preproc_context *kernel_context)
7561 struct i965_driver_data *i965 = i965_driver_data(ctx);
7562 struct i965_gpe_table *gpe = &i965->gpe_table;
7563 struct i965_gpe_context *gpe_context = NULL;
7564 struct encoder_kernel_parameter kernel_param ;
7565 struct encoder_scoreboard_parameter scoreboard_param;
7566 struct i965_kernel common_kernel;
7568 kernel_param.curbe_size = sizeof(gen9_avc_preproc_curbe_data);
7569 kernel_param.inline_data_size = 0;
7570 kernel_param.sampler_size = 0;
7572 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7573 scoreboard_param.mask = 0xFF;
7574 scoreboard_param.enable = generic_context->use_hw_scoreboard;
7575 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7576 scoreboard_param.walkpat_flag = 0;
7578 gpe_context = &kernel_context->gpe_contexts;
7579 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7580 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7582 memset(&common_kernel, 0, sizeof(common_kernel));
7584 intel_avc_fei_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7585 generic_context->enc_kernel_size,
7586 INTEL_GENERIC_ENC_PREPROC,
7590 gpe->load_kernels(ctx,
7598 gen9_avc_kernel_init_mbenc(VADriverContextP ctx,
7599 struct generic_encoder_context *generic_context,
7600 struct gen_avc_mbenc_context *kernel_context,
7603 struct i965_driver_data *i965 = i965_driver_data(ctx);
7604 struct i965_gpe_table *gpe = &i965->gpe_table;
7605 struct i965_gpe_context *gpe_context = NULL;
7606 struct encoder_kernel_parameter kernel_param ;
7607 struct encoder_scoreboard_parameter scoreboard_param;
7608 struct i965_kernel common_kernel;
7610 unsigned int curbe_size = 0;
7611 unsigned int num_mbenc_kernels = 0;
7613 if (IS_SKL(i965->intel.device_info) ||
7614 IS_BXT(i965->intel.device_info)) {
7616 curbe_size = sizeof(gen9_avc_mbenc_curbe_data);
7617 num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC;
7619 curbe_size = sizeof(gen9_avc_fei_mbenc_curbe_data);
7620 num_mbenc_kernels = NUM_GEN9_AVC_FEI_KERNEL_MBENC;
7622 } else if (IS_KBL(i965->intel.device_info) ||
7623 IS_GEN10(i965->intel.device_info) ||
7624 IS_GLK(i965->intel.device_info)) {
7625 curbe_size = sizeof(gen95_avc_mbenc_curbe_data);
7626 num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC;
7627 } else if (IS_GEN8(i965->intel.device_info)) {
7628 curbe_size = sizeof(gen8_avc_mbenc_curbe_data);
7629 num_mbenc_kernels = NUM_GEN9_AVC_KERNEL_MBENC;
7632 assert(curbe_size > 0);
7633 kernel_param.curbe_size = curbe_size;
7634 kernel_param.inline_data_size = 0;
7635 kernel_param.sampler_size = 0;
7637 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7638 scoreboard_param.mask = 0xFF;
7639 scoreboard_param.enable = generic_context->use_hw_scoreboard;
7640 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7641 scoreboard_param.walkpat_flag = 0;
7643 for (i = 0; i < num_mbenc_kernels ; i++) {
7644 gpe_context = &kernel_context->gpe_contexts[i];
7645 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7646 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7648 memset(&common_kernel, 0, sizeof(common_kernel));
7650 generic_context->get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7651 generic_context->enc_kernel_size,
7652 INTEL_GENERIC_ENC_MBENC,
7656 gpe->load_kernels(ctx,
7665 gen9_avc_kernel_init_brc(VADriverContextP ctx,
7666 struct generic_encoder_context *generic_context,
7667 struct gen_avc_brc_context *kernel_context)
7669 struct i965_driver_data *i965 = i965_driver_data(ctx);
7670 struct i965_gpe_table *gpe = &i965->gpe_table;
7671 struct i965_gpe_context *gpe_context = NULL;
7672 struct encoder_kernel_parameter kernel_param ;
7673 struct encoder_scoreboard_parameter scoreboard_param;
7674 struct i965_kernel common_kernel;
7675 int num_brc_init_kernels = 0;
7678 if (IS_GEN8(i965->intel.device_info)) {
7679 num_brc_init_kernels = NUM_GEN9_AVC_KERNEL_BRC - 1;
7681 num_brc_init_kernels = NUM_GEN9_AVC_KERNEL_BRC;
7684 const int gen8_brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC - 1] = {
7685 (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7686 (sizeof(gen8_avc_frame_brc_update_curbe_data)),
7687 (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7688 (sizeof(gen8_avc_mbenc_curbe_data)),
7691 const int gen9_brc_curbe_size[NUM_GEN9_AVC_KERNEL_BRC] = {
7692 (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7693 (sizeof(gen9_avc_frame_brc_update_curbe_data)),
7694 (sizeof(gen9_avc_brc_init_reset_curbe_data)),
7695 ((IS_SKL(i965->intel.device_info) || IS_BXT(i965->intel.device_info)) ? sizeof(gen9_avc_mbenc_curbe_data) : sizeof(gen95_avc_mbenc_curbe_data)),
7697 (sizeof(gen9_avc_mb_brc_curbe_data))
7700 kernel_param.inline_data_size = 0;
7701 kernel_param.sampler_size = 0;
7703 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7704 scoreboard_param.mask = 0xFF;
7705 scoreboard_param.enable = generic_context->use_hw_scoreboard;
7706 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7707 scoreboard_param.walkpat_flag = 0;
7709 for (i = 0; i < num_brc_init_kernels; i++) {
7710 if (IS_GEN8(i965->intel.device_info)) {
7711 kernel_param.curbe_size = gen8_brc_curbe_size[i];
7713 kernel_param.curbe_size = gen9_brc_curbe_size[i];
7715 gpe_context = &kernel_context->gpe_contexts[i];
7716 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7717 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7719 memset(&common_kernel, 0, sizeof(common_kernel));
7721 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7722 generic_context->enc_kernel_size,
7723 INTEL_GENERIC_ENC_BRC,
7727 gpe->load_kernels(ctx,
7736 gen9_avc_kernel_init_wp(VADriverContextP ctx,
7737 struct generic_encoder_context *generic_context,
7738 struct gen_avc_wp_context *kernel_context)
7740 struct i965_driver_data *i965 = i965_driver_data(ctx);
7741 struct i965_gpe_table *gpe = &i965->gpe_table;
7742 struct i965_gpe_context *gpe_context = NULL;
7743 struct encoder_kernel_parameter kernel_param ;
7744 struct encoder_scoreboard_parameter scoreboard_param;
7745 struct i965_kernel common_kernel;
7747 kernel_param.curbe_size = sizeof(gen9_avc_wp_curbe_data);
7748 kernel_param.inline_data_size = 0;
7749 kernel_param.sampler_size = 0;
7751 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7752 scoreboard_param.mask = 0xFF;
7753 scoreboard_param.enable = generic_context->use_hw_scoreboard;
7754 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7755 scoreboard_param.walkpat_flag = 0;
7757 gpe_context = &kernel_context->gpe_contexts;
7758 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7759 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7761 memset(&common_kernel, 0, sizeof(common_kernel));
7763 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7764 generic_context->enc_kernel_size,
7765 INTEL_GENERIC_ENC_WP,
7769 gpe->load_kernels(ctx,
7777 gen9_avc_kernel_init_sfd(VADriverContextP ctx,
7778 struct generic_encoder_context *generic_context,
7779 struct gen_avc_sfd_context *kernel_context)
7781 struct i965_driver_data *i965 = i965_driver_data(ctx);
7782 struct i965_gpe_table *gpe = &i965->gpe_table;
7783 struct i965_gpe_context *gpe_context = NULL;
7784 struct encoder_kernel_parameter kernel_param ;
7785 struct encoder_scoreboard_parameter scoreboard_param;
7786 struct i965_kernel common_kernel;
7788 kernel_param.curbe_size = sizeof(gen9_avc_sfd_curbe_data);
7789 kernel_param.inline_data_size = 0;
7790 kernel_param.sampler_size = 0;
7792 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
7793 scoreboard_param.mask = 0xFF;
7794 scoreboard_param.enable = generic_context->use_hw_scoreboard;
7795 scoreboard_param.type = generic_context->use_hw_non_stalling_scoreboard;
7796 scoreboard_param.walkpat_flag = 0;
7798 gpe_context = &kernel_context->gpe_contexts;
7799 gen9_init_gpe_context_avc(ctx, gpe_context, &kernel_param);
7800 gen9_init_vfe_scoreboard_avc(gpe_context, &scoreboard_param);
7802 memset(&common_kernel, 0, sizeof(common_kernel));
7804 intel_avc_get_kernel_header_and_size((void *)(generic_context->enc_kernel_ptr),
7805 generic_context->enc_kernel_size,
7806 INTEL_GENERIC_ENC_SFD,
7810 gpe->load_kernels(ctx,
7818 gen9_avc_kernel_destroy(struct encoder_vme_mfc_context * vme_context)
7821 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
7822 struct i965_driver_data *i965 = i965_driver_data(avc_ctx->ctx);
7823 struct i965_gpe_table *gpe = &i965->gpe_table;
7827 gen9_avc_free_resources(vme_context);
7829 for (i = 0; i < NUM_GEN9_AVC_KERNEL_SCALING; i++)
7830 gpe->context_destroy(&avc_ctx->context_scaling.gpe_contexts[i]);
7832 for (i = 0; i < NUM_GEN9_AVC_KERNEL_BRC; i++)
7833 gpe->context_destroy(&avc_ctx->context_brc.gpe_contexts[i]);
7835 for (i = 0; i < NUM_GEN9_AVC_KERNEL_ME; i++)
7836 gpe->context_destroy(&avc_ctx->context_me.gpe_contexts[i]);
7838 for (i = 0; i < NUM_GEN9_AVC_KERNEL_MBENC; i++)
7839 gpe->context_destroy(&avc_ctx->context_mbenc.gpe_contexts[i]);
7841 gpe->context_destroy(&avc_ctx->context_wp.gpe_contexts);
7843 gpe->context_destroy(&avc_ctx->context_sfd.gpe_contexts);
7845 gpe->context_destroy(&avc_ctx->context_preproc.gpe_contexts);
7853 gen9_avc_update_parameters(VADriverContextP ctx,
7855 struct encode_state *encode_state,
7856 struct intel_encoder_context *encoder_context)
7858 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
7859 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
7860 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
7861 VAEncSequenceParameterBufferH264 *seq_param;
7862 VAEncSliceParameterBufferH264 *slice_param;
7863 VAEncMiscParameterBuffer *fei_misc_param;
7864 int i, j, slice_index;
7865 unsigned int preset = generic_state->preset;
7866 unsigned int fei_enabled = encoder_context->fei_enabled;
7868 /* seq/pic/slice parameter setting */
7869 generic_state->b16xme_supported = gen9_avc_super_hme[preset];
7870 generic_state->b32xme_supported = gen9_avc_ultra_hme[preset];
7872 avc_state->seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
7873 avc_state->pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
7876 encode_state->misc_param[VAEncMiscParameterTypeFEIFrameControl][0] &&
7877 encode_state->misc_param[VAEncMiscParameterTypeFEIFrameControl][0]->buffer) {
7878 fei_misc_param = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeFEIFrameControl][0]->buffer;
7879 avc_state->fei_framectl_param =
7880 (VAEncMiscParameterFEIFrameControlH264 *)fei_misc_param->data;
7883 avc_state->slice_num = 0;
7885 for (j = 0; j < encode_state->num_slice_params_ext; j++) {
7886 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
7887 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
7888 avc_state->slice_param[slice_index] = slice_param;
7891 avc_state->slice_num++;
7895 /* how many slices support by now? 1 slice or multi slices, but row slice.not slice group. */
7896 seq_param = avc_state->seq_param;
7897 slice_param = avc_state->slice_param[0];
7899 generic_state->frame_type = avc_state->slice_param[0]->slice_type;
7901 if (slice_param->slice_type == SLICE_TYPE_I ||
7902 slice_param->slice_type == SLICE_TYPE_SI)
7903 generic_state->frame_type = SLICE_TYPE_I;
7904 else if (slice_param->slice_type == SLICE_TYPE_P)
7905 generic_state->frame_type = SLICE_TYPE_P;
7906 else if (slice_param->slice_type == SLICE_TYPE_B)
7907 generic_state->frame_type = SLICE_TYPE_B;
7908 if (profile == VAProfileH264High)
7909 avc_state->transform_8x8_mode_enable = 0;//work around for high profile to disabel pic_param->pic_fields.bits.transform_8x8_mode_flag
7911 avc_state->transform_8x8_mode_enable = 0;
7914 if (generic_state->brc_enabled && (!generic_state->brc_inited || generic_state->brc_need_reset)) {
7915 generic_state->target_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
7916 generic_state->init_vbv_buffer_fullness_in_bit = seq_param->bits_per_second;
7917 generic_state->vbv_buffer_size_in_bit = (uint64_t)seq_param->bits_per_second << 1;
7918 generic_state->frames_per_100s = 3000; /* 30fps */
7921 generic_state->gop_size = seq_param->intra_period;
7922 generic_state->gop_ref_distance = seq_param->ip_period;
7924 if (generic_state->internal_rate_mode == VA_RC_CBR) {
7925 generic_state->max_bit_rate = generic_state->target_bit_rate;
7926 generic_state->min_bit_rate = generic_state->target_bit_rate;
7929 if (generic_state->frame_type == SLICE_TYPE_I || generic_state->first_frame) {
7930 gen9_avc_update_misc_parameters(ctx, encode_state, encoder_context);
7933 generic_state->preset = encoder_context->quality_level;
7934 if (encoder_context->quality_level == INTEL_PRESET_UNKNOWN) {
7935 generic_state->preset = INTEL_PRESET_RT_SPEED;
7937 generic_state->kernel_mode = gen9_avc_kernel_mode[generic_state->preset];
7939 if (!generic_state->brc_inited) {
7940 generic_state->brc_init_reset_input_bits_per_frame = ((double)(generic_state->max_bit_rate * 1000) * 100) / generic_state->frames_per_100s;;
7941 generic_state->brc_init_current_target_buf_full_in_bits = generic_state->init_vbv_buffer_fullness_in_bit;
7942 generic_state->brc_init_reset_buf_size_in_bits = generic_state->vbv_buffer_size_in_bit;
7943 generic_state->brc_target_size = generic_state->init_vbv_buffer_fullness_in_bit;
7947 generic_state->curr_pak_pass = 0;
7948 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7950 if (generic_state->internal_rate_mode == VA_RC_CBR ||
7951 generic_state->internal_rate_mode == VA_RC_VBR)
7952 generic_state->brc_enabled = 1;
7954 generic_state->brc_enabled = 0;
7956 if (generic_state->brc_enabled &&
7957 (!generic_state->init_vbv_buffer_fullness_in_bit ||
7958 !generic_state->vbv_buffer_size_in_bit ||
7959 !generic_state->max_bit_rate ||
7960 !generic_state->target_bit_rate ||
7961 !generic_state->frames_per_100s)) {
7962 WARN_ONCE("Rate control parameter is required for BRC\n");
7963 generic_state->brc_enabled = 0;
7966 if (!generic_state->brc_enabled) {
7967 generic_state->target_bit_rate = 0;
7968 generic_state->max_bit_rate = 0;
7969 generic_state->min_bit_rate = 0;
7970 generic_state->init_vbv_buffer_fullness_in_bit = 0;
7971 generic_state->vbv_buffer_size_in_bit = 0;
7972 generic_state->num_pak_passes = 1;
7974 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
7978 generic_state->frame_width_in_mbs = seq_param->picture_width_in_mbs;
7979 generic_state->frame_height_in_mbs = seq_param->picture_height_in_mbs;
7980 generic_state->frame_width_in_pixel = generic_state->frame_width_in_mbs * 16;
7981 generic_state->frame_height_in_pixel = generic_state->frame_height_in_mbs * 16;
7983 generic_state->frame_width_4x = ALIGN(generic_state->frame_width_in_pixel / 4, 16);
7984 generic_state->frame_height_4x = ALIGN(generic_state->frame_height_in_pixel / 4, 16);
7985 generic_state->downscaled_width_4x_in_mb = generic_state->frame_width_4x / 16 ;
7986 generic_state->downscaled_height_4x_in_mb = generic_state->frame_height_4x / 16;
7988 generic_state->frame_width_16x = ALIGN(generic_state->frame_width_in_pixel / 16, 16);
7989 generic_state->frame_height_16x = ALIGN(generic_state->frame_height_in_pixel / 16, 16);
7990 generic_state->downscaled_width_16x_in_mb = generic_state->frame_width_16x / 16 ;
7991 generic_state->downscaled_height_16x_in_mb = generic_state->frame_height_16x / 16;
7993 generic_state->frame_width_32x = ALIGN(generic_state->frame_width_in_pixel / 32, 16);
7994 generic_state->frame_height_32x = ALIGN(generic_state->frame_height_in_pixel / 32, 16);
7995 generic_state->downscaled_width_32x_in_mb = generic_state->frame_width_32x / 16 ;
7996 generic_state->downscaled_height_32x_in_mb = generic_state->frame_height_32x / 16;
7998 if (generic_state->hme_supported) {
7999 generic_state->hme_enabled = 1;
8001 generic_state->hme_enabled = 0;
8004 if (generic_state->b16xme_supported) {
8005 generic_state->b16xme_enabled = 1;
8007 generic_state->b16xme_enabled = 0;
8010 if (generic_state->b32xme_supported) {
8011 generic_state->b32xme_enabled = 1;
8013 generic_state->b32xme_enabled = 0;
8015 /* disable HME/16xME if the size is too small */
8016 if (generic_state->frame_width_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8017 generic_state->b32xme_supported = 0;
8018 generic_state->b32xme_enabled = 0;
8019 generic_state->b16xme_supported = 0;
8020 generic_state->b16xme_enabled = 0;
8021 generic_state->frame_width_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8022 generic_state->downscaled_width_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8024 if (generic_state->frame_height_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8025 generic_state->b32xme_supported = 0;
8026 generic_state->b32xme_enabled = 0;
8027 generic_state->b16xme_supported = 0;
8028 generic_state->b16xme_enabled = 0;
8029 generic_state->frame_height_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8030 generic_state->downscaled_height_4x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8033 if (generic_state->frame_width_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8034 generic_state->b32xme_supported = 0;
8035 generic_state->b32xme_enabled = 0;
8036 generic_state->frame_width_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8037 generic_state->downscaled_width_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8039 if (generic_state->frame_height_16x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8040 generic_state->b32xme_supported = 0;
8041 generic_state->b32xme_enabled = 0;
8042 generic_state->frame_height_16x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8043 generic_state->downscaled_height_16x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8046 if (generic_state->frame_width_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8047 generic_state->frame_width_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8048 generic_state->downscaled_width_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8050 if (generic_state->frame_height_32x < INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8051 generic_state->frame_height_32x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8052 generic_state->downscaled_height_32x_in_mb = WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8058 gen9_avc_encode_check_parameter(VADriverContextP ctx,
8059 struct encode_state *encode_state,
8060 struct intel_encoder_context *encoder_context)
8062 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8063 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8064 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8065 unsigned int rate_control_mode = encoder_context->rate_control_mode;
8066 unsigned int preset = generic_state->preset;
8067 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param ;
8068 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
8070 int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
8072 generic_state->avbr_curracy = 30;
8073 generic_state->avbr_convergence = 150;
8075 switch (rate_control_mode & 0x7f) {
8077 generic_state->internal_rate_mode = VA_RC_CBR;
8081 generic_state->internal_rate_mode = VA_RC_VBR;
8086 generic_state->internal_rate_mode = VA_RC_CQP;
8090 if (rate_control_mode != VA_RC_NONE &&
8091 rate_control_mode != VA_RC_CQP) {
8092 generic_state->brc_enabled = 1;
8093 generic_state->brc_distortion_buffer_supported = 1;
8094 generic_state->brc_constant_buffer_supported = 1;
8095 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
8098 /*check brc parameter*/
8099 if (generic_state->brc_enabled) {
8100 avc_state->mb_qp_data_enable = 0;
8103 /*set the brc init and reset accordingly*/
8104 if (generic_state->brc_need_reset &&
8105 (generic_state->brc_distortion_buffer_supported == 0 ||
8106 rate_control_mode == VA_RC_CQP)) {
8107 generic_state->brc_need_reset = 0;// not support by CQP
8109 if (generic_state->internal_rate_mode == VA_RC_CBR || generic_state->internal_rate_mode == VA_RC_VBR || generic_state->frame_type == SLICE_TYPE_I) {
8110 avc_state->sfd_enable = 0;
8112 avc_state->sfd_enable = 1;
8115 if (generic_state->frames_per_window_size == 0) {
8116 generic_state->frames_per_window_size = (generic_state->frames_per_100s / 100 < 60) ? (generic_state->frames_per_100s / 100) : 60;
8117 } else if (generic_state->frames_per_window_size > 2 * generic_state->frames_per_100s / 100) {
8118 generic_state->frames_per_window_size = (generic_state->frames_per_100s / 100 < 60) ? (generic_state->frames_per_100s / 100) : 60;
8121 if (generic_state->brc_enabled) {
8122 generic_state->hme_enabled = generic_state->frame_type != SLICE_TYPE_I;
8123 if (avc_state->min_max_qp_enable) {
8124 generic_state->num_pak_passes = 1;
8126 generic_state->brc_roi_enable = (rate_control_mode != VA_RC_CQP) && (generic_state->num_roi > 0);// only !CQP
8127 generic_state->mb_brc_enabled = generic_state->mb_brc_enabled || generic_state->brc_roi_enable;
8129 generic_state->num_pak_passes = 1;// CQP only one pass
8132 avc_state->mbenc_i_frame_dist_in_use = 0;
8133 avc_state->mbenc_i_frame_dist_in_use = (generic_state->brc_enabled) && (generic_state->brc_distortion_buffer_supported) && (generic_state->frame_type == SLICE_TYPE_I);
8135 /*ROI must enable mbbrc.*/
8138 if (avc_state->caf_supported) {
8139 switch (generic_state->frame_type) {
8141 avc_state->caf_enable = 0;
8144 avc_state->caf_enable = gen9_avc_all_fractional[preset] & 0x01;
8147 avc_state->caf_enable = (gen9_avc_all_fractional[preset] >> 1) & 0x01;
8151 if (avc_state->caf_enable && avc_state->caf_disable_hd && gen9_avc_disable_all_fractional_check_for_high_res[preset]) {
8152 if (generic_state->frame_width_in_pixel >= 1280 && generic_state->frame_height_in_pixel >= 720)
8153 avc_state->caf_enable = 0;
8157 avc_state->adaptive_transform_decision_enable &= gen9_avc_enable_adaptive_tx_decision[preset & 0x7];
8159 /* Flatness check is enabled only if scaling will be performed and CAF is enabled. here only frame */
8160 if (avc_state->flatness_check_supported) {
8161 avc_state->flatness_check_enable = ((avc_state->caf_enable) && (generic_state->brc_enabled || generic_state->hme_supported)) ;
8163 avc_state->flatness_check_enable = 0;
8166 /* check mb_status_supported/enbale*/
8167 if (avc_state->adaptive_transform_decision_enable) {
8168 avc_state->mb_status_enable = 1;
8170 avc_state->mb_status_enable = 0;
8172 /*slice check,all the slices use the same slice height except the last slice*/
8173 avc_state->arbitrary_num_mbs_in_slice = 0;
8174 for (i = 0; i < avc_state->slice_num; i++) {
8175 if (avc_state->slice_param[i]->num_macroblocks % generic_state->frame_width_in_mbs > 0) {
8176 avc_state->arbitrary_num_mbs_in_slice = 1;
8177 avc_state->slice_height = 1; /* slice height will be ignored by kernel ans here set it as default value */
8179 avc_state->slice_height = avc_state->slice_param[i]->num_macroblocks / generic_state->frame_width_in_mbs;
8183 if (avc_state->slice_num > 1)
8184 avc_state->arbitrary_num_mbs_in_slice = 1;
8186 if (generic_state->frame_type == SLICE_TYPE_I) {
8187 generic_state->hme_enabled = 0;
8188 generic_state->b16xme_enabled = 0;
8189 generic_state->b32xme_enabled = 0;
8192 if (generic_state->frame_type == SLICE_TYPE_B) {
8193 gen9_avc_get_dist_scale_factor(ctx, encode_state, encoder_context);
8194 avc_state->bi_weight = gen9_avc_get_biweight(avc_state->dist_scale_factor_list0[0], pic_param->pic_fields.bits.weighted_bipred_idc);
8197 /* Determine if SkipBiasAdjustment should be enabled for P picture 1. No B frame 2. Qp >= 22 3. CQP mode */
8198 avc_state->skip_bias_adjustment_enable = avc_state->skip_bias_adjustment_supported && (generic_state->frame_type == SLICE_TYPE_P)
8199 && (generic_state->gop_ref_distance == 1) && (avc_state->pic_param->pic_init_qp + avc_state->slice_param[0]->slice_qp_delta >= 22) && !generic_state->brc_enabled;
8201 if (generic_state->kernel_mode == INTEL_ENC_KERNEL_QUALITY) {
8202 avc_state->tq_enable = 1;
8203 avc_state->tq_rounding = 6;
8204 if (generic_state->brc_enabled) {
8205 generic_state->mb_brc_enabled = 1;
8209 //check the inter rounding
8210 avc_state->rounding_value = 0;
8211 avc_state->rounding_inter_p = 255;//default
8212 avc_state->rounding_inter_b = 255; //default
8213 avc_state->rounding_inter_b_ref = 255; //default
8215 if (generic_state->frame_type == SLICE_TYPE_P) {
8216 if (avc_state->rounding_inter_p == AVC_INVALID_ROUNDING_VALUE) {
8217 if (avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled)) {
8218 if (generic_state->gop_ref_distance == 1)
8219 avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p_without_b[slice_qp];
8221 avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_p[slice_qp];
8223 avc_state->rounding_value = gen9_avc_inter_rounding_p[generic_state->preset];
8227 avc_state->rounding_value = avc_state->rounding_inter_p;
8229 } else if (generic_state->frame_type == SLICE_TYPE_B) {
8230 if (pic_param->pic_fields.bits.reference_pic_flag) {
8231 if (avc_state->rounding_inter_b_ref == AVC_INVALID_ROUNDING_VALUE)
8232 avc_state->rounding_value = gen9_avc_inter_rounding_b_ref[generic_state->preset];
8234 avc_state->rounding_value = avc_state->rounding_inter_b_ref;
8236 if (avc_state->rounding_inter_b == AVC_INVALID_ROUNDING_VALUE) {
8237 if (avc_state->adaptive_rounding_inter_enable && !(generic_state->brc_enabled))
8238 avc_state->rounding_value = gen9_avc_adaptive_inter_rounding_b[slice_qp];
8240 avc_state->rounding_value = gen9_avc_inter_rounding_b[generic_state->preset];
8242 avc_state->rounding_value = avc_state->rounding_inter_b;
8246 return VA_STATUS_SUCCESS;
8250 gen9_avc_vme_gpe_kernel_prepare(VADriverContextP ctx,
8251 struct encode_state *encode_state,
8252 struct intel_encoder_context *encoder_context)
8255 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8256 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
8257 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
8258 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8259 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8261 struct object_surface *obj_surface;
8262 struct object_buffer *obj_buffer;
8263 VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
8264 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
8265 struct i965_coded_buffer_segment *coded_buffer_segment;
8267 struct gen9_surface_avc *avc_priv_surface;
8269 struct avc_surface_param surface_param;
8271 unsigned char * pdata;
8273 /* Setup current reconstruct frame */
8274 obj_surface = encode_state->reconstructed_object;
8275 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
8277 if (va_status != VA_STATUS_SUCCESS)
8280 memset(&surface_param, 0, sizeof(surface_param));
8281 surface_param.frame_width = generic_state->frame_width_in_pixel;
8282 surface_param.frame_height = generic_state->frame_height_in_pixel;
8283 va_status = gen9_avc_init_check_surfaces(ctx,
8287 if (va_status != VA_STATUS_SUCCESS)
8290 /* init the member of avc_priv_surface,frame_store_id,qp_value*/
8291 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
8292 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = 0;
8293 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = 0;
8294 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2]);
8295 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1]);
8296 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2], avc_priv_surface->dmv_top);
8297 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1], avc_priv_surface->dmv_bottom);
8298 avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
8299 avc_priv_surface->frame_store_id = 0;
8300 avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
8301 avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
8302 avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
8303 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = avc_priv_surface->top_field_order_cnt;
8304 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = avc_priv_surface->top_field_order_cnt + 1;
8306 i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
8307 i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
8309 /* input YUV surface*/
8310 obj_surface = encode_state->input_yuv_object;
8311 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
8313 if (va_status != VA_STATUS_SUCCESS)
8315 i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
8316 i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
8318 /* Reference surfaces */
8319 for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
8320 i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
8321 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2]);
8322 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1]);
8323 obj_surface = encode_state->reference_objects[i];
8324 avc_state->top_field_poc[2 * i] = 0;
8325 avc_state->top_field_poc[2 * i + 1] = 0;
8327 if (obj_surface && obj_surface->bo) {
8328 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface, GPE_RESOURCE_ALIGNMENT);
8330 /* actually it should be handled when it is reconstructed surface*/
8331 va_status = gen9_avc_init_check_surfaces(ctx,
8332 obj_surface, encoder_context,
8334 if (va_status != VA_STATUS_SUCCESS)
8336 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
8337 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2], avc_priv_surface->dmv_top);
8338 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1], avc_priv_surface->dmv_bottom);
8339 avc_state->top_field_poc[2 * i] = avc_priv_surface->top_field_order_cnt;
8340 avc_state->top_field_poc[2 * i + 1] = avc_priv_surface->top_field_order_cnt + 1;
8341 avc_priv_surface->frame_store_id = i;
8347 /* Encoded bitstream ?*/
8348 obj_buffer = encode_state->coded_buf_object;
8349 bo = obj_buffer->buffer_store->bo;
8350 i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
8351 i965_dri_object_to_buffer_gpe_resource(&generic_ctx->compressed_bitstream.res, bo);
8352 generic_ctx->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
8353 generic_ctx->compressed_bitstream.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
8356 avc_ctx->status_buffer.bo = bo;
8358 /* set the internal flag to 0 to indicate the coded size is unknown */
8360 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
8361 coded_buffer_segment->mapped = 0;
8362 coded_buffer_segment->codec = encoder_context->codec;
8363 coded_buffer_segment->status_support = 1;
8365 pdata = bo->virtual + avc_ctx->status_buffer.base_offset;
8366 memset(pdata, 0, avc_ctx->status_buffer.status_buffer_size);
8369 //frame id, it is the ref pic id in the reference_objects list.
8370 avc_state->num_refs[0] = 0;
8371 avc_state->num_refs[1] = 0;
8372 if (generic_state->frame_type == SLICE_TYPE_P) {
8373 avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
8375 if (slice_param->num_ref_idx_active_override_flag)
8376 avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
8377 } else if (generic_state->frame_type == SLICE_TYPE_B) {
8378 avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
8379 avc_state->num_refs[1] = pic_param->num_ref_idx_l1_active_minus1 + 1;
8381 if (slice_param->num_ref_idx_active_override_flag) {
8382 avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
8383 avc_state->num_refs[1] = slice_param->num_ref_idx_l1_active_minus1 + 1;
8387 if (avc_state->num_refs[0] > ARRAY_ELEMS(avc_state->list_ref_idx[0]))
8388 return VA_STATUS_ERROR_INVALID_VALUE;
8389 if (avc_state->num_refs[1] > ARRAY_ELEMS(avc_state->list_ref_idx[1]))
8390 return VA_STATUS_ERROR_INVALID_VALUE;
8392 for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[0]); i++) {
8393 VAPictureH264 *va_pic;
8395 assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(avc_state->list_ref_idx[0]));
8396 avc_state->list_ref_idx[0][i] = 0;
8398 if (i >= avc_state->num_refs[0])
8401 va_pic = &slice_param->RefPicList0[i];
8403 for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
8404 obj_surface = encode_state->reference_objects[j];
8408 obj_surface->base.id == va_pic->picture_id) {
8410 assert(obj_surface->base.id != VA_INVALID_SURFACE);
8411 avc_state->list_ref_idx[0][i] = j;
8417 for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[1]); i++) {
8418 VAPictureH264 *va_pic;
8420 assert(ARRAY_ELEMS(slice_param->RefPicList1) == ARRAY_ELEMS(avc_state->list_ref_idx[1]));
8421 avc_state->list_ref_idx[1][i] = 0;
8423 if (i >= avc_state->num_refs[1])
8426 va_pic = &slice_param->RefPicList1[i];
8428 for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
8429 obj_surface = encode_state->reference_objects[j];
8433 obj_surface->base.id == va_pic->picture_id) {
8435 assert(obj_surface->base.id != VA_INVALID_SURFACE);
8436 avc_state->list_ref_idx[1][i] = j;
8443 return VA_STATUS_SUCCESS;
8447 gen9_avc_vme_gpe_kernel_init(VADriverContextP ctx,
8448 struct encode_state *encode_state,
8449 struct intel_encoder_context *encoder_context)
8451 return VA_STATUS_SUCCESS;
8455 gen9_avc_vme_gpe_kernel_final(VADriverContextP ctx,
8456 struct encode_state *encode_state,
8457 struct intel_encoder_context *encoder_context)
8460 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8461 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8462 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8464 /*set this flag when all kernel is finished*/
8465 if (generic_state->brc_enabled) {
8466 generic_state->brc_inited = 1;
8467 generic_state->brc_need_reset = 0;
8468 avc_state->mbenc_curbe_set_in_brc_update = 0;
8470 return VA_STATUS_SUCCESS;
8474 gen9_avc_vme_gpe_kernel_run(VADriverContextP ctx,
8475 struct encode_state *encode_state,
8476 struct intel_encoder_context *encoder_context)
8478 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8479 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8480 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8481 int fei_enabled = encoder_context->fei_enabled;
8483 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
8484 VAEncSliceParameterBufferH264 *slice_param = avc_state->slice_param[0];
8487 /* BRC init/reset needs to be called before HME since it will reset the Brc Distortion surface*/
8488 if (!fei_enabled && generic_state->brc_enabled && (!generic_state->brc_inited || generic_state->brc_need_reset)) {
8489 gen9_avc_kernel_brc_init_reset(ctx, encode_state, encoder_context);
8493 if (generic_state->hme_supported) {
8494 gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
8495 if (generic_state->b16xme_supported) {
8496 gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_16x);
8497 if (generic_state->b32xme_supported) {
8498 gen9_avc_kernel_scaling(ctx, encode_state, encoder_context, INTEL_ENC_HME_32x);
8504 if (generic_state->hme_enabled) {
8505 if (generic_state->b16xme_enabled) {
8506 if (generic_state->b32xme_enabled) {
8507 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_32x);
8509 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_16x);
8511 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
8514 /*call SFD kernel after HME in same command buffer*/
8515 sfd_in_use = avc_state->sfd_enable && generic_state->hme_enabled;
8516 sfd_in_use = sfd_in_use && !avc_state->sfd_mb_enable;
8518 gen9_avc_kernel_sfd(ctx, encode_state, encoder_context);
8521 /* BRC and MbEnc are included in the same task phase*/
8522 if (generic_state->brc_enabled) {
8523 if (avc_state->mbenc_i_frame_dist_in_use) {
8524 gen9_avc_kernel_mbenc(ctx, encode_state, encoder_context, true);
8526 gen9_avc_kernel_brc_frame_update(ctx, encode_state, encoder_context);
8528 if (avc_state->brc_split_enable && generic_state->mb_brc_enabled) {
8529 gen9_avc_kernel_brc_mb_update(ctx, encode_state, encoder_context);
8533 /*weight prediction,disable by now */
8534 avc_state->weighted_ref_l0_enable = 0;
8535 avc_state->weighted_ref_l1_enable = 0;
8536 if (avc_state->weighted_prediction_supported &&
8537 ((generic_state->frame_type == SLICE_TYPE_P && pic_param->pic_fields.bits.weighted_pred_flag) ||
8538 (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT))) {
8539 if (slice_param->luma_weight_l0_flag & 1) {
8540 gen9_avc_kernel_wp(ctx, encode_state, encoder_context, 0);
8542 } else if (!(slice_param->chroma_weight_l0_flag & 1)) {
8543 pic_param->pic_fields.bits.weighted_pred_flag = 0;// it should be handled in app
8546 if (generic_state->frame_type == SLICE_TYPE_B && pic_param->pic_fields.bits.weighted_bipred_idc == INTEL_AVC_WP_MODE_EXPLICIT) {
8547 if (slice_param->luma_weight_l1_flag & 1) {
8548 gen9_avc_kernel_wp(ctx, encode_state, encoder_context, 1);
8549 } else if (!((slice_param->luma_weight_l0_flag & 1) ||
8550 (slice_param->chroma_weight_l0_flag & 1) ||
8551 (slice_param->chroma_weight_l1_flag & 1))) {
8552 pic_param->pic_fields.bits.weighted_bipred_idc = INTEL_AVC_WP_MODE_DEFAULT;// it should be handled in app
8558 gen9_avc_kernel_mbenc(ctx, encode_state, encoder_context, false);
8560 /*ignore the reset vertical line kernel*/
8562 return VA_STATUS_SUCCESS;
8566 gen9_avc_vme_pipeline(VADriverContextP ctx,
8568 struct encode_state *encode_state,
8569 struct intel_encoder_context *encoder_context)
8573 gen9_avc_update_parameters(ctx, profile, encode_state, encoder_context);
8575 va_status = gen9_avc_encode_check_parameter(ctx, encode_state, encoder_context);
8576 if (va_status != VA_STATUS_SUCCESS)
8579 va_status = gen9_avc_allocate_resources(ctx, encode_state, encoder_context);
8580 if (va_status != VA_STATUS_SUCCESS)
8583 va_status = gen9_avc_vme_gpe_kernel_prepare(ctx, encode_state, encoder_context);
8584 if (va_status != VA_STATUS_SUCCESS)
8587 va_status = gen9_avc_vme_gpe_kernel_init(ctx, encode_state, encoder_context);
8588 if (va_status != VA_STATUS_SUCCESS)
8591 va_status = gen9_avc_vme_gpe_kernel_run(ctx, encode_state, encoder_context);
8592 if (va_status != VA_STATUS_SUCCESS)
8595 gen9_avc_vme_gpe_kernel_final(ctx, encode_state, encoder_context);
8597 return VA_STATUS_SUCCESS;
8600 /* Update PreEnc specific parameters */
8602 gen9_avc_preenc_update_parameters(VADriverContextP ctx,
8604 struct encode_state *encode_state,
8605 struct intel_encoder_context *encoder_context)
8607 struct i965_driver_data *i965 = i965_driver_data(ctx);
8608 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8609 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8610 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
8611 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8612 VAStatsStatisticsParameterH264 *stat_param_h264 = NULL;
8613 VAStatsStatisticsParameter *stat_param = NULL;
8614 struct object_buffer *obj_buffer = NULL;
8615 struct object_buffer *obj_buffer_mv = NULL, *obj_buffer_stat = NULL;
8616 struct buffer_store *buffer_store = NULL;
8617 unsigned int size = 0, i = 0;
8618 unsigned int frame_mb_nums = 0;
8620 if (!encoder_context->preenc_enabled ||
8621 !encode_state->stat_param_ext ||
8622 !encode_state->stat_param_ext->buffer)
8623 return VA_STATUS_ERROR_OPERATION_FAILED;
8625 stat_param_h264 = avc_state->stat_param =
8626 (VAStatsStatisticsParameterH264 *)encode_state->stat_param_ext->buffer;
8627 stat_param = &stat_param_h264->stats_params;
8629 /* Assume the frame type based on number of past/future ref frames */
8630 if (!stat_param->num_past_references && !stat_param->num_future_references)
8631 generic_state->frame_type = SLICE_TYPE_I;
8632 else if (stat_param->num_future_references > 0)
8633 generic_state->frame_type = SLICE_TYPE_B;
8635 generic_state->frame_type = SLICE_TYPE_P;
8637 generic_state->preset = INTEL_PRESET_RT_SPEED;
8638 generic_state->kernel_mode = gen9_avc_kernel_mode[generic_state->preset];
8640 /* frame width and height */
8641 generic_state->frame_width_in_pixel = encoder_context->frame_width_in_pixel;
8642 generic_state->frame_height_in_pixel = encoder_context->frame_height_in_pixel;
8643 generic_state->frame_width_in_mbs = ALIGN(generic_state->frame_width_in_pixel, 16) / 16;
8644 generic_state->frame_height_in_mbs = ALIGN(generic_state->frame_height_in_pixel, 16) / 16;
8646 /* 4x downscaled width and height */
8647 generic_state->frame_width_4x = ALIGN(generic_state->frame_width_in_pixel / 4, 16);
8648 generic_state->frame_height_4x = ALIGN(generic_state->frame_height_in_pixel / 4, 16);
8649 generic_state->downscaled_width_4x_in_mb = generic_state->frame_width_4x / 16 ;
8650 generic_state->downscaled_height_4x_in_mb = generic_state->frame_height_4x / 16;
8652 /* reset hme types for preenc */
8653 if (generic_state->frame_type != SLICE_TYPE_I)
8654 generic_state->hme_enabled = 1;
8656 /* ensure frame width is not too small */
8657 if (generic_state->frame_width_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8658 generic_state->frame_width_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8659 generic_state->downscaled_width_4x_in_mb =
8660 WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8663 /* ensure frame height is not too small*/
8664 if (generic_state->frame_height_4x <= INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT) {
8665 generic_state->frame_height_4x = INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT;
8666 generic_state->downscaled_height_4x_in_mb =
8667 WIDTH_IN_MACROBLOCKS(INTEL_VME_MIN_ALLOWED_WIDTH_HEIGHT);
8670 /********** Ensure buffer object parameters ********/
8671 frame_mb_nums = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs;
8673 /* mv predictor buffer */
8674 if (stat_param_h264->mv_predictor_ctrl) {
8675 if (stat_param->mv_predictor == VA_INVALID_ID)
8677 size = frame_mb_nums * FEI_AVC_MV_PREDICTOR_BUFFER_SIZE;
8678 obj_buffer = BUFFER(stat_param->mv_predictor);
8681 buffer_store = obj_buffer->buffer_store;
8682 if (buffer_store->bo->size < size)
8684 if (avc_ctx->preproc_mv_predictor_buffer.bo != NULL)
8685 i965_free_gpe_resource(&avc_ctx->preproc_mv_predictor_buffer);
8686 i965_dri_object_to_buffer_gpe_resource(
8687 &avc_ctx->preproc_mv_predictor_buffer,
8692 if (stat_param_h264->mb_qp) {
8693 if (stat_param->qp == VA_INVALID_ID)
8695 size = frame_mb_nums * FEI_AVC_QP_BUFFER_SIZE;
8696 obj_buffer = BUFFER(stat_param->qp);
8697 buffer_store = obj_buffer->buffer_store;
8698 if (buffer_store->bo->size < size)
8700 if (avc_ctx->preproc_mb_qp_buffer.bo != NULL)
8701 i965_free_gpe_resource(&avc_ctx->preproc_mb_qp_buffer);
8702 i965_dri_object_to_buffer_gpe_resource(
8703 &avc_ctx->preproc_mb_qp_buffer,
8707 /* locate mv and stat buffer */
8708 if (!stat_param_h264->disable_mv_output ||
8709 !stat_param_h264->disable_statistics_output) {
8711 if (!stat_param->outputs)
8714 for (i = 0; i < 2 ; i++) {
8715 if (stat_param->outputs[i] != VA_INVALID_ID) {
8716 obj_buffer = BUFFER(stat_param->outputs[i]);
8717 switch (obj_buffer->type) {
8718 case VAStatsMVBufferType:
8719 obj_buffer_mv = obj_buffer;
8721 case VAStatsStatisticsBufferType:
8722 obj_buffer_stat = obj_buffer;
8728 if (!(!stat_param_h264->disable_mv_output &&
8729 !stat_param_h264->disable_statistics_output))
8733 /* mv data output buffer */
8734 if (!stat_param_h264->disable_mv_output && obj_buffer_mv) {
8735 size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
8736 buffer_store = obj_buffer_mv->buffer_store;
8737 if (buffer_store->bo->size < size)
8739 if (avc_ctx->preproc_mv_data_out_buffer.bo != NULL)
8740 i965_free_gpe_resource(&avc_ctx->preproc_mv_data_out_buffer);
8741 i965_dri_object_to_buffer_gpe_resource(
8742 &avc_ctx->preproc_mv_data_out_buffer,
8745 /* statistics output buffer */
8746 if (!stat_param_h264->disable_statistics_output && obj_buffer_stat) {
8747 size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
8748 buffer_store = obj_buffer_stat->buffer_store;
8749 if (buffer_store->bo->size < size)
8751 if (avc_ctx->preproc_stat_data_out_buffer.bo != NULL)
8752 i965_free_gpe_resource(&avc_ctx->preproc_stat_data_out_buffer);
8753 i965_dri_object_to_buffer_gpe_resource(
8754 &avc_ctx->preproc_stat_data_out_buffer,
8758 /* past ref stat out buffer */
8759 if (stat_param->num_past_references && stat_param->past_ref_stat_buf &&
8760 stat_param->past_ref_stat_buf[0] != VA_INVALID_ID) {
8761 size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
8762 obj_buffer = BUFFER(stat_param->past_ref_stat_buf[0]);
8763 buffer_store = obj_buffer->buffer_store;
8764 if (buffer_store->bo->size < size)
8766 if (avc_ctx->preenc_past_ref_stat_data_out_buffer.bo != NULL)
8767 i965_free_gpe_resource(&avc_ctx->preenc_past_ref_stat_data_out_buffer);
8768 i965_dri_object_to_buffer_gpe_resource(
8769 &avc_ctx->preenc_past_ref_stat_data_out_buffer,
8772 /* future ref stat out buffer */
8773 if (stat_param->num_past_references && stat_param->future_ref_stat_buf &&
8774 stat_param->future_ref_stat_buf[0] != VA_INVALID_ID) {
8775 size = frame_mb_nums * PREENC_AVC_STATISTICS_BUFFER_SIZE;
8776 obj_buffer = BUFFER(stat_param->future_ref_stat_buf[0]);
8777 buffer_store = obj_buffer->buffer_store;
8778 if (buffer_store->bo->size < size)
8780 if (avc_ctx->preenc_future_ref_stat_data_out_buffer.bo != NULL)
8781 i965_free_gpe_resource(&avc_ctx->preenc_future_ref_stat_data_out_buffer);
8782 i965_dri_object_to_buffer_gpe_resource(
8783 &avc_ctx->preenc_future_ref_stat_data_out_buffer,
8786 return VA_STATUS_SUCCESS;
8789 return VA_STATUS_ERROR_INVALID_BUFFER;
8792 /* allocate internal resouces required for PreEenc */
8794 gen9_avc_preenc_allocate_internal_resources(VADriverContextP ctx,
8795 struct encode_state *encode_state,
8796 struct intel_encoder_context *encoder_context)
8798 struct i965_driver_data *i965 = i965_driver_data(ctx);
8799 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8800 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
8801 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8802 unsigned int width = 0;
8803 unsigned int height = 0;
8804 unsigned int size = 0;
8805 int allocate_flag = 1;
8807 /* 4x MEMV data buffer */
8808 width = ALIGN(generic_state->downscaled_width_4x_in_mb * 32, 64);
8809 height = generic_state->downscaled_height_4x_in_mb * 4 * 2 * 10;
8810 i965_free_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
8811 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
8812 &avc_ctx->s4x_memv_data_buffer,
8815 "4x MEMV data buffer");
8817 goto failed_allocation;
8818 i965_zero_gpe_resource(&avc_ctx->s4x_memv_data_buffer);
8820 /* Output DISTORTION surface from 4x ME */
8821 width = generic_state->downscaled_width_4x_in_mb * 8;
8822 height = generic_state->downscaled_height_4x_in_mb * 4 * 10;
8823 i965_free_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
8824 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
8825 &avc_ctx->s4x_memv_distortion_buffer,
8828 "4x MEMV distortion buffer");
8830 goto failed_allocation;
8831 i965_zero_gpe_resource(&avc_ctx->s4x_memv_distortion_buffer);
8833 /* output BRC DISTORTION surface from 4x ME */
8834 width = (generic_state->downscaled_width_4x_in_mb + 7) / 8 * 64;
8835 height = (generic_state->downscaled_height_4x_in_mb + 1) / 2 * 8;
8836 i965_free_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
8837 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
8838 &avc_ctx->res_brc_dist_data_surface,
8841 "brc dist data buffer");
8843 goto failed_allocation;
8844 i965_zero_gpe_resource(&avc_ctx->res_brc_dist_data_surface);
8847 /* FTQ Lut buffer,whichs is the mbbrc_const_data_buffer */
8848 i965_free_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
8849 size = 16 * AVC_QP_MAX * 4;
8850 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
8851 &avc_ctx->res_mbbrc_const_data_buffer,
8852 ALIGN(size, 0x1000),
8853 "mbbrc const data buffer");
8855 goto failed_allocation;
8856 i965_zero_gpe_resource(&avc_ctx->res_mbbrc_const_data_buffer);
8858 /* 4x downscaled surface */
8859 if (!avc_ctx->preenc_scaled_4x_surface_obj) {
8860 i965_CreateSurfaces(ctx,
8861 generic_state->frame_width_4x,
8862 generic_state->frame_height_4x,
8863 VA_RT_FORMAT_YUV420,
8865 &avc_ctx->preenc_scaled_4x_surface_id);
8866 avc_ctx->preenc_scaled_4x_surface_obj = SURFACE(avc_ctx->preenc_scaled_4x_surface_id);
8867 if (!avc_ctx->preenc_scaled_4x_surface_obj)
8868 goto failed_allocation;
8869 i965_check_alloc_surface_bo(ctx, avc_ctx->preenc_scaled_4x_surface_obj, 1,
8870 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
8873 /* 4x downscaled past ref surface */
8874 if (!avc_ctx->preenc_past_ref_scaled_4x_surface_obj) {
8875 i965_CreateSurfaces(ctx,
8876 generic_state->frame_width_4x,
8877 generic_state->frame_height_4x,
8878 VA_RT_FORMAT_YUV420,
8880 &avc_ctx->preenc_past_ref_scaled_4x_surface_id);
8881 avc_ctx->preenc_past_ref_scaled_4x_surface_obj =
8882 SURFACE(avc_ctx->preenc_past_ref_scaled_4x_surface_id);
8883 if (!avc_ctx->preenc_past_ref_scaled_4x_surface_obj)
8884 goto failed_allocation;
8885 i965_check_alloc_surface_bo(ctx, avc_ctx->preenc_past_ref_scaled_4x_surface_obj, 1,
8886 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
8889 /* 4x downscaled future ref surface */
8890 if (!avc_ctx->preenc_future_ref_scaled_4x_surface_obj) {
8891 i965_CreateSurfaces(ctx,
8892 generic_state->frame_width_4x,
8893 generic_state->frame_height_4x,
8894 VA_RT_FORMAT_YUV420,
8896 &avc_ctx->preenc_future_ref_scaled_4x_surface_id);
8897 avc_ctx->preenc_future_ref_scaled_4x_surface_obj =
8898 SURFACE(avc_ctx->preenc_future_ref_scaled_4x_surface_id);
8899 if (!avc_ctx->preenc_future_ref_scaled_4x_surface_obj)
8900 goto failed_allocation;
8901 i965_check_alloc_surface_bo(ctx, avc_ctx->preenc_future_ref_scaled_4x_surface_obj, 1,
8902 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
8905 /* FeiPreEncFixme: dummy coded buffer. This is a tweak which helps to use
8906 * the generic AVC Encdoe codepath which allocate status buffer as extension
8908 if (!avc_ctx->status_buffer.bo) {
8910 generic_state->frame_width_in_pixel * generic_state->frame_height_in_pixel * 12;
8911 size += I965_CODEDBUFFER_HEADER_SIZE;
8913 avc_ctx->status_buffer.bo = dri_bo_alloc(i965->intel.bufmgr,
8914 "Dummy Coded Buffer",
8918 return VA_STATUS_SUCCESS;
8921 return VA_STATUS_ERROR_ALLOCATION_FAILED;
8926 gen9_avc_preenc_gpe_kernel_run(VADriverContextP ctx,
8927 struct encode_state *encode_state,
8928 struct intel_encoder_context *encoder_context)
8930 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
8931 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
8932 struct avc_enc_state * avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
8933 VAStatsStatisticsParameterH264 *stat_param_h264 = avc_state->stat_param;;
8934 VAStatsStatisticsParameter *stat_param = &stat_param_h264->stats_params;
8936 /* FeiPreEncFixme: Optimize the scaling. Keep a cache of already scaled surfaces
8937 * to avoid repeated scaling of same surfaces */
8940 gen9_avc_preenc_kernel_scaling(ctx, encode_state, encoder_context,
8941 INTEL_ENC_HME_4x, SCALE_CUR_PIC);
8942 if (stat_param->num_past_references > 0) {
8943 gen9_avc_preenc_kernel_scaling(ctx, encode_state, encoder_context,
8944 INTEL_ENC_HME_4x, SCALE_PAST_REF_PIC);
8946 if (stat_param->num_future_references > 0) {
8947 gen9_avc_preenc_kernel_scaling(ctx, encode_state, encoder_context,
8948 INTEL_ENC_HME_4x, SCALE_FUTURE_REF_PIC);
8952 if (generic_state->hme_enabled) {
8953 gen9_avc_kernel_me(ctx, encode_state, encoder_context, INTEL_ENC_HME_4x);
8956 /* preproc kernel */
8957 if (!stat_param_h264->disable_mv_output || !stat_param_h264->disable_statistics_output) {
8958 gen9_avc_preenc_kernel_preproc(ctx, encode_state, encoder_context);
8961 return VA_STATUS_SUCCESS;
8965 gen9_avc_preenc_pipeline(VADriverContextP ctx,
8967 struct encode_state *encode_state,
8968 struct intel_encoder_context *encoder_context)
8972 va_status = gen9_avc_preenc_update_parameters(ctx, profile, encode_state, encoder_context);
8973 if (va_status != VA_STATUS_SUCCESS)
8976 va_status = gen9_avc_preenc_allocate_internal_resources(ctx, encode_state, encoder_context);
8977 if (va_status != VA_STATUS_SUCCESS)
8980 va_status = gen9_avc_preenc_gpe_kernel_run(ctx, encode_state, encoder_context);
8981 if (va_status != VA_STATUS_SUCCESS)
8984 return VA_STATUS_SUCCESS;
8988 gen9_avc_vme_context_destroy(void * context)
8990 struct encoder_vme_mfc_context *vme_context = (struct encoder_vme_mfc_context *)context;
8991 struct generic_encoder_context *generic_ctx;
8992 struct i965_avc_encoder_context *avc_ctx;
8993 struct generic_enc_codec_state *generic_state;
8994 struct avc_enc_state *avc_state;
8999 generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
9000 avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
9001 generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
9002 avc_state = (struct avc_enc_state *)vme_context->private_enc_state;
9004 gen9_avc_kernel_destroy(vme_context);
9008 free(generic_state);
9016 gen8_avc_kernel_init(VADriverContextP ctx,
9017 struct intel_encoder_context *encoder_context)
9019 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9020 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
9021 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
9022 int fei_enabled = encoder_context->fei_enabled;
9024 generic_ctx->get_kernel_header_and_size = fei_enabled ?
9025 intel_avc_fei_get_kernel_header_and_size :
9026 intel_avc_get_kernel_header_and_size ;
9027 gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling, false);
9028 gen9_avc_kernel_init_brc(ctx, generic_ctx, &avc_ctx->context_brc);
9029 gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me, false);
9030 gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc, fei_enabled);
9031 gen9_avc_kernel_init_sfd(ctx, generic_ctx, &avc_ctx->context_sfd);
9034 generic_ctx->pfn_set_curbe_scaling2x = gen9_avc_set_curbe_scaling2x;
9035 generic_ctx->pfn_set_curbe_scaling4x = gen8_avc_set_curbe_scaling4x;
9036 generic_ctx->pfn_set_curbe_me = gen8_avc_set_curbe_me;
9037 generic_ctx->pfn_set_curbe_mbenc = gen8_avc_set_curbe_mbenc;
9038 generic_ctx->pfn_set_curbe_brc_init_reset = gen9_avc_set_curbe_brc_init_reset;
9039 generic_ctx->pfn_set_curbe_brc_frame_update = gen8_avc_set_curbe_brc_frame_update;
9040 generic_ctx->pfn_set_curbe_sfd = gen9_avc_set_curbe_sfd;
9042 generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
9043 generic_ctx->pfn_send_me_surface = gen9_avc_send_surface_me;
9044 generic_ctx->pfn_send_mbenc_surface = gen9_avc_send_surface_mbenc;
9045 generic_ctx->pfn_send_brc_init_reset_surface = gen9_avc_send_surface_brc_init_reset;
9046 generic_ctx->pfn_send_brc_frame_update_surface = gen9_avc_send_surface_brc_frame_update;
9047 generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd;
9050 gen9_avc_kernel_init(VADriverContextP ctx,
9051 struct intel_encoder_context *encoder_context)
9053 struct i965_driver_data *i965 = i965_driver_data(ctx);
9054 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9055 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
9056 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)vme_context->generic_enc_ctx;
9057 int fei_enabled = encoder_context->fei_enabled;
9058 int preenc_enabled = encoder_context->preenc_enabled;
9060 generic_ctx->get_kernel_header_and_size = (fei_enabled || preenc_enabled) ?
9061 intel_avc_fei_get_kernel_header_and_size :
9062 intel_avc_get_kernel_header_and_size ;
9064 if (!fei_enabled && !preenc_enabled) {
9065 /* generic AVC Encoder */
9066 gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling, false);
9067 gen9_avc_kernel_init_brc(ctx, generic_ctx, &avc_ctx->context_brc);
9068 gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me, false);
9069 gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc,
9070 encoder_context->fei_enabled);
9071 gen9_avc_kernel_init_wp(ctx, generic_ctx, &avc_ctx->context_wp);
9072 gen9_avc_kernel_init_sfd(ctx, generic_ctx, &avc_ctx->context_sfd);
9075 generic_ctx->pfn_set_curbe_scaling2x = gen9_avc_set_curbe_scaling2x;
9076 generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
9077 generic_ctx->pfn_set_curbe_me = gen9_avc_set_curbe_me;
9078 generic_ctx->pfn_set_curbe_mbenc = gen9_avc_set_curbe_mbenc;
9079 generic_ctx->pfn_set_curbe_brc_init_reset = gen9_avc_set_curbe_brc_init_reset;
9080 generic_ctx->pfn_set_curbe_brc_frame_update = gen9_avc_set_curbe_brc_frame_update;
9081 generic_ctx->pfn_set_curbe_brc_mb_update = gen9_avc_set_curbe_brc_mb_update;
9082 generic_ctx->pfn_set_curbe_sfd = gen9_avc_set_curbe_sfd;
9083 generic_ctx->pfn_set_curbe_wp = gen9_avc_set_curbe_wp;
9085 generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
9086 generic_ctx->pfn_send_me_surface = gen9_avc_send_surface_me;
9087 generic_ctx->pfn_send_mbenc_surface = gen9_avc_send_surface_mbenc;
9088 generic_ctx->pfn_send_brc_init_reset_surface = gen9_avc_send_surface_brc_init_reset;
9089 generic_ctx->pfn_send_brc_frame_update_surface = gen9_avc_send_surface_brc_frame_update;
9090 generic_ctx->pfn_send_brc_mb_update_surface = gen9_avc_send_surface_brc_mb_update;
9091 generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd;
9092 generic_ctx->pfn_send_wp_surface = gen9_avc_send_surface_wp;
9094 if (IS_SKL(i965->intel.device_info) ||
9095 IS_BXT(i965->intel.device_info))
9096 generic_ctx->pfn_set_curbe_scaling4x = gen9_avc_set_curbe_scaling4x;
9097 else if (IS_KBL(i965->intel.device_info) ||
9098 IS_GEN10(i965->intel.device_info) ||
9099 IS_GLK(i965->intel.device_info))
9100 generic_ctx->pfn_set_curbe_scaling4x = gen95_avc_set_curbe_scaling4x;
9102 } else if (fei_enabled) {
9103 /* FEI AVC Encoding */
9104 gen9_avc_kernel_init_mbenc(ctx, generic_ctx, &avc_ctx->context_mbenc,
9105 encoder_context->fei_enabled);
9106 generic_ctx->pfn_set_curbe_mbenc = gen9_avc_fei_set_curbe_mbenc;
9107 generic_ctx->pfn_send_mbenc_surface = gen9_avc_fei_send_surface_mbenc;
9110 /* PreEnc for AVC */
9111 gen9_avc_kernel_init_scaling(ctx, generic_ctx, &avc_ctx->context_scaling,
9112 encoder_context->preenc_enabled);
9113 gen9_avc_kernel_init_me(ctx, generic_ctx, &avc_ctx->context_me,
9114 encoder_context->preenc_enabled);
9115 gen9_avc_kernel_init_preproc(ctx, generic_ctx, &avc_ctx->context_preproc);
9117 /* preenc 4x scaling uses the gen95 kernel */
9118 generic_ctx->pfn_set_curbe_scaling4x = gen95_avc_set_curbe_scaling4x;
9119 generic_ctx->pfn_set_curbe_me = gen9_avc_preenc_set_curbe_me;
9120 generic_ctx->pfn_set_curbe_preproc = gen9_avc_preenc_set_curbe_preproc;
9122 generic_ctx->pfn_send_scaling_surface = gen9_avc_send_surface_scaling;
9123 generic_ctx->pfn_send_me_surface = gen9_avc_preenc_send_surface_me;
9124 generic_ctx->pfn_send_preproc_surface = gen9_avc_preenc_send_surface_preproc;
9129 PAK pipeline related function
9132 intel_avc_enc_slice_type_fixup(int slice_type);
9134 /* Allocate resources needed for PAK only mode (get invoked only in FEI encode) */
9136 gen9_avc_allocate_pak_resources(VADriverContextP ctx,
9137 struct encode_state *encode_state,
9138 struct intel_encoder_context *encoder_context)
9140 struct i965_driver_data *i965 = i965_driver_data(ctx);
9141 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9142 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
9143 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
9144 unsigned int size = 0;
9145 int allocate_flag = 1;
9147 /*second level batch buffer for image state write when cqp etc*/
9148 i965_free_gpe_resource(&avc_ctx->res_image_state_batch_buffer_2nd_level);
9149 size = INTEL_AVC_IMAGE_STATE_CMD_SIZE ;
9150 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
9151 &avc_ctx->res_image_state_batch_buffer_2nd_level,
9152 ALIGN(size, 0x1000),
9153 "second levle batch (image state write) buffer");
9155 goto failed_allocation;
9157 if (!generic_state->brc_allocated) {
9158 i965_free_gpe_resource(&avc_ctx->res_brc_pre_pak_statistics_output_buffer);
9160 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
9161 &avc_ctx->res_brc_pre_pak_statistics_output_buffer,
9162 ALIGN(size, 0x1000),
9163 "brc pak statistic buffer");
9165 goto failed_allocation;
9168 return VA_STATUS_SUCCESS;
9171 return VA_STATUS_ERROR_ALLOCATION_FAILED;
9175 gen9_mfc_avc_pipe_mode_select(VADriverContextP ctx,
9176 struct encode_state *encode_state,
9177 struct intel_encoder_context *encoder_context)
9179 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9180 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9181 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9182 struct intel_batchbuffer *batch = encoder_context->base.batch;
9184 BEGIN_BCS_BATCH(batch, 5);
9186 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
9187 OUT_BCS_BATCH(batch,
9189 (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
9190 (MFD_MODE_VLD << 15) |
9191 (0 << 13) | /* Non-VDEnc mode is 0*/
9192 ((generic_state->curr_pak_pass != (generic_state->num_pak_passes - 1)) << 10) | /* Stream-Out Enable */
9193 ((!!avc_ctx->res_post_deblocking_output.bo) << 9) | /* Post Deblocking Output */
9194 ((!!avc_ctx->res_pre_deblocking_output.bo) << 8) | /* Pre Deblocking Output */
9195 (0 << 7) | /* Scaled surface enable */
9196 (0 << 6) | /* Frame statistics stream out enable */
9197 (0 << 5) | /* not in stitch mode */
9198 (1 << 4) | /* encoding mode */
9199 (MFX_FORMAT_AVC << 0));
9200 OUT_BCS_BATCH(batch,
9201 (0 << 7) | /* expand NOA bus flag */
9202 (0 << 6) | /* disable slice-level clock gating */
9203 (0 << 5) | /* disable clock gating for NOA */
9204 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
9205 (0 << 3) | /* terminate if AVC mbdata error occurs */
9206 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
9209 OUT_BCS_BATCH(batch, 0);
9210 OUT_BCS_BATCH(batch, 0);
9212 ADVANCE_BCS_BATCH(batch);
9216 gen9_mfc_avc_surface_state(VADriverContextP ctx,
9217 struct intel_encoder_context *encoder_context,
9218 struct i965_gpe_resource *gpe_resource,
9221 struct intel_batchbuffer *batch = encoder_context->base.batch;
9223 BEGIN_BCS_BATCH(batch, 6);
9225 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
9226 OUT_BCS_BATCH(batch, id);
9227 OUT_BCS_BATCH(batch,
9228 ((gpe_resource->height - 1) << 18) |
9229 ((gpe_resource->width - 1) << 4));
9230 OUT_BCS_BATCH(batch,
9231 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
9232 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
9233 ((gpe_resource->pitch - 1) << 3) | /* pitch */
9234 (0 << 2) | /* must be 0 for interleave U/V */
9235 (1 << 1) | /* must be tiled */
9236 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
9237 OUT_BCS_BATCH(batch,
9238 (0 << 16) | /* must be 0 for interleave U/V */
9239 (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
9240 OUT_BCS_BATCH(batch,
9241 (0 << 16) | /* must be 0 for interleave U/V */
9242 (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
9244 ADVANCE_BCS_BATCH(batch);
9248 gen9_mfc_avc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
9250 struct i965_driver_data *i965 = i965_driver_data(ctx);
9251 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9252 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
9253 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9254 struct intel_batchbuffer *batch = encoder_context->base.batch;
9256 unsigned int cmd_len = 65;
9258 if (IS_GEN10(i965->intel.device_info))
9261 BEGIN_BCS_BATCH(batch, cmd_len);
9263 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (cmd_len - 2));
9265 /* the DW1-3 is for pre_deblocking */
9266 OUT_BUFFER_3DW(batch, avc_ctx->res_pre_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
9268 /* the DW4-6 is for the post_deblocking */
9269 OUT_BUFFER_3DW(batch, avc_ctx->res_post_deblocking_output.bo, 1, 0, i965->intel.mocs_state);
9271 /* the DW7-9 is for the uncompressed_picture */
9272 OUT_BUFFER_3DW(batch, generic_ctx->res_uncompressed_input_surface.bo, 0, 0, i965->intel.mocs_state);
9274 /* the DW10-12 is for PAK information (write) */
9275 OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);//?
9277 /* the DW13-15 is for the intra_row_store_scratch */
9278 OUT_BUFFER_3DW(batch, avc_ctx->res_intra_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
9280 /* the DW16-18 is for the deblocking filter */
9281 OUT_BUFFER_3DW(batch, avc_ctx->res_deblocking_filter_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
9283 /* the DW 19-50 is for Reference pictures*/
9284 for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
9285 OUT_BUFFER_2DW(batch, avc_ctx->list_reference_res[i].bo, 0, 0);
9288 /* DW 51, reference picture attributes */
9289 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
9291 /* The DW 52-54 is for PAK information (read) */
9292 OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, i965->intel.mocs_state);
9294 /* the DW 55-57 is the ILDB buffer */
9295 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9297 /* the DW 58-60 is the second ILDB buffer */
9298 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9300 /* DW 61, memory compress enable & mode */
9301 OUT_BCS_BATCH(batch, 0);
9303 /* the DW 62-64 is the buffer */
9304 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9307 if (IS_GEN10(i965->intel.device_info)) {
9308 OUT_BCS_BATCH(batch, 0);
9309 OUT_BCS_BATCH(batch, 0);
9310 OUT_BCS_BATCH(batch, 0);
9313 ADVANCE_BCS_BATCH(batch);
9317 gen9_mfc_avc_ind_obj_base_addr_state(VADriverContextP ctx,
9318 struct encode_state *encode_state,
9319 struct intel_encoder_context *encoder_context)
9321 struct i965_driver_data *i965 = i965_driver_data(ctx);
9322 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9323 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
9324 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9325 struct intel_batchbuffer *batch = encoder_context->base.batch;
9326 struct object_surface *obj_surface;
9327 struct gen9_surface_avc *avc_priv_surface;
9328 unsigned int size = 0;
9329 unsigned int w_mb = generic_state->frame_width_in_mbs;
9330 unsigned int h_mb = generic_state->frame_height_in_mbs;
9332 obj_surface = encode_state->reconstructed_object;
9334 if (!obj_surface || !obj_surface->private_data)
9336 avc_priv_surface = obj_surface->private_data;
9338 BEGIN_BCS_BATCH(batch, 26);
9340 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
9341 /* The DW1-5 is for the MFX indirect bistream offset */
9342 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9343 OUT_BUFFER_2DW(batch, NULL, 0, 0);
9345 /* the DW6-10 is for MFX Indirect MV Object Base Address */
9346 size = w_mb * h_mb * 32 * 4;
9347 OUT_BUFFER_3DW(batch,
9348 avc_priv_surface->res_mv_data_surface.bo,
9351 i965->intel.mocs_state);
9352 OUT_BUFFER_2DW(batch,
9353 avc_priv_surface->res_mv_data_surface.bo,
9355 ALIGN(size, 0x1000));
9357 /* The DW11-15 is for MFX IT-COFF. Not used on encoder */
9358 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9359 OUT_BUFFER_2DW(batch, NULL, 0, 0);
9361 /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */
9362 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9363 OUT_BUFFER_2DW(batch, NULL, 0, 0);
9365 /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder
9366 * Note: an offset is specified in MFX_AVC_SLICE_STATE
9368 OUT_BUFFER_3DW(batch,
9369 generic_ctx->compressed_bitstream.res.bo,
9372 i965->intel.mocs_state);
9373 OUT_BUFFER_2DW(batch,
9374 generic_ctx->compressed_bitstream.res.bo,
9376 generic_ctx->compressed_bitstream.end_offset);
9378 ADVANCE_BCS_BATCH(batch);
9382 gen9_mfc_avc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
9384 struct i965_driver_data *i965 = i965_driver_data(ctx);
9385 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9386 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9387 struct intel_batchbuffer *batch = encoder_context->base.batch;
9389 BEGIN_BCS_BATCH(batch, 10);
9391 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
9393 /* The DW1-3 is for bsd/mpc row store scratch buffer */
9394 OUT_BUFFER_3DW(batch, avc_ctx->res_bsd_mpc_row_store_scratch_buffer.bo, 1, 0, i965->intel.mocs_state);
9396 /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
9397 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9399 /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */
9400 OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
9402 ADVANCE_BCS_BATCH(batch);
9406 gen9_mfc_avc_directmode_state(VADriverContextP ctx,
9407 struct intel_encoder_context *encoder_context)
9409 struct i965_driver_data *i965 = i965_driver_data(ctx);
9410 struct intel_batchbuffer *batch = encoder_context->base.batch;
9411 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9412 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
9413 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9417 BEGIN_BCS_BATCH(batch, 71);
9419 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
9421 /* Reference frames and Current frames */
9422 /* the DW1-32 is for the direct MV for reference */
9423 for (i = 0; i < NUM_MFC_AVC_DMV_BUFFERS - 2; i += 2) {
9424 if (avc_ctx->res_direct_mv_buffersr[i].bo != NULL) {
9425 OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[i].bo,
9426 I915_GEM_DOMAIN_INSTRUCTION, 0,
9429 OUT_BCS_BATCH(batch, 0);
9430 OUT_BCS_BATCH(batch, 0);
9434 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
9436 /* the DW34-36 is the MV for the current frame */
9437 OUT_BCS_RELOC64(batch, avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2].bo,
9438 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
9441 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
9444 for (i = 0; i < 32; i++) {
9445 OUT_BCS_BATCH(batch, avc_state->top_field_poc[i]);
9447 OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2]);
9448 OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1]);
9450 ADVANCE_BCS_BATCH(batch);
9454 gen9_mfc_qm_state(VADriverContextP ctx,
9456 const unsigned int *qm,
9458 struct intel_encoder_context *encoder_context)
9460 struct intel_batchbuffer *batch = encoder_context->base.batch;
9461 unsigned int qm_buffer[16];
9463 assert(qm_length <= 16);
9464 assert(sizeof(*qm) == 4);
9465 memset(qm_buffer, 0, 16 * 4);
9466 memcpy(qm_buffer, qm, qm_length * 4);
9468 BEGIN_BCS_BATCH(batch, 18);
9469 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
9470 OUT_BCS_BATCH(batch, qm_type << 0);
9471 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
9472 ADVANCE_BCS_BATCH(batch);
9476 gen9_mfc_avc_qm_state(VADriverContextP ctx,
9477 struct encode_state *encode_state,
9478 struct intel_encoder_context *encoder_context)
9480 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9481 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9482 VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
9483 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
9486 const unsigned int *qm_4x4_intra;
9487 const unsigned int *qm_4x4_inter;
9488 const unsigned int *qm_8x8_intra;
9489 const unsigned int *qm_8x8_inter;
9491 if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
9492 && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
9493 qm_4x4_intra = qm_4x4_inter = qm_8x8_intra = qm_8x8_inter = qm_flat;
9495 VAIQMatrixBufferH264 *qm;
9496 assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
9497 qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
9498 qm_4x4_intra = (unsigned int *)qm->ScalingList4x4[0];
9499 qm_4x4_inter = (unsigned int *)qm->ScalingList4x4[3];
9500 qm_8x8_intra = (unsigned int *)qm->ScalingList8x8[0];
9501 qm_8x8_inter = (unsigned int *)qm->ScalingList8x8[1];
9504 gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm_4x4_intra, 12, encoder_context);
9505 gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm_4x4_inter, 12, encoder_context);
9506 gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm_8x8_intra, 16, encoder_context);
9507 gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm_8x8_inter, 16, encoder_context);
9511 gen9_mfc_fqm_state(VADriverContextP ctx,
9513 const unsigned int *fqm,
9515 struct intel_encoder_context *encoder_context)
9517 struct intel_batchbuffer *batch = encoder_context->base.batch;
9518 unsigned int fqm_buffer[32];
9520 assert(fqm_length <= 32);
9521 assert(sizeof(*fqm) == 4);
9522 memset(fqm_buffer, 0, 32 * 4);
9523 memcpy(fqm_buffer, fqm, fqm_length * 4);
9525 BEGIN_BCS_BATCH(batch, 34);
9526 OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
9527 OUT_BCS_BATCH(batch, fqm_type << 0);
9528 intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
9529 ADVANCE_BCS_BATCH(batch);
9533 gen9_mfc_fill_fqm(uint8_t *qm, uint16_t *fqm, int len)
9536 for (i = 0; i < len; i++)
9537 for (j = 0; j < len; j++) {
9538 assert(qm[j * len + i]);
9539 fqm[i * len + j] = (1 << 16) / qm[j * len + i];
9544 gen9_mfc_avc_fqm_state(VADriverContextP ctx,
9545 struct encode_state *encode_state,
9546 struct intel_encoder_context *encoder_context)
9548 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9549 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9550 VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
9551 VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
9553 if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
9554 && !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
9555 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm_flat, 24, encoder_context);
9556 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm_flat, 24, encoder_context);
9557 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm_flat, 32, encoder_context);
9558 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm_flat, 32, encoder_context);
9562 VAIQMatrixBufferH264 *qm;
9563 assert(encode_state->q_matrix && encode_state->q_matrix->buffer);
9564 qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
9566 for (i = 0; i < 3; i++)
9567 gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * i, 4);
9568 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm, 24, encoder_context);
9570 for (i = 3; i < 6; i++)
9571 gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * (i - 3), 4);
9572 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm, 24, encoder_context);
9574 gen9_mfc_fill_fqm(qm->ScalingList8x8[0], (uint16_t *)fqm, 8);
9575 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm, 32, encoder_context);
9577 gen9_mfc_fill_fqm(qm->ScalingList8x8[1], (uint16_t *)fqm, 8);
9578 gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm, 32, encoder_context);
9583 gen9_mfc_avc_insert_object(VADriverContextP ctx,
9584 struct intel_encoder_context *encoder_context,
9585 unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
9586 int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
9587 int slice_header_indicator,
9588 struct intel_batchbuffer *batch)
9590 if (data_bits_in_last_dw == 0)
9591 data_bits_in_last_dw = 32;
9593 BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
9595 OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws));
9596 OUT_BCS_BATCH(batch,
9597 (0 << 16) | /* always start at offset 0 */
9598 (slice_header_indicator << 14) |
9599 (data_bits_in_last_dw << 8) |
9600 (skip_emul_byte_count << 4) |
9601 (!!emulation_flag << 3) |
9602 ((!!is_last_header) << 2) |
9603 ((!!is_end_of_slice) << 1) |
9604 (0 << 0)); /* check this flag */
9605 intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
9607 ADVANCE_BCS_BATCH(batch);
9611 gen9_mfc_avc_insert_aud_packed_data(VADriverContextP ctx,
9612 struct encode_state *encode_state,
9613 struct intel_encoder_context *encoder_context,
9614 struct intel_batchbuffer *batch)
9616 VAEncPackedHeaderParameterBuffer *param = NULL;
9617 unsigned int length_in_bits;
9618 unsigned int *header_data = NULL;
9619 unsigned char *nal_type = NULL;
9620 int count, i, start_index;
9622 count = encode_state->slice_rawdata_count[0];
9623 start_index = (encode_state->slice_rawdata_index[0] & SLICE_PACKED_DATA_INDEX_MASK);
9625 for (i = 0; i < count; i++) {
9626 unsigned int skip_emul_byte_cnt;
9628 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
9629 nal_type = (unsigned char *)header_data;
9631 param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
9632 if (param->type != VAEncPackedHeaderRawData)
9635 length_in_bits = param->bit_length;
9637 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9639 if ((*(nal_type + skip_emul_byte_cnt - 1) & 0x1f) == AVC_NAL_DELIMITER) {
9640 gen9_mfc_avc_insert_object(ctx,
9643 ALIGN(length_in_bits, 32) >> 5,
9644 length_in_bits & 0x1f,
9648 !param->has_emulation_bytes,
9657 gen9_mfc_avc_insert_slice_packed_data(VADriverContextP ctx,
9658 struct encode_state *encode_state,
9659 struct intel_encoder_context *encoder_context,
9661 struct intel_batchbuffer *batch)
9663 VAEncPackedHeaderParameterBuffer *param = NULL;
9664 unsigned int length_in_bits;
9665 unsigned int *header_data = NULL;
9666 int count, i, start_index;
9667 int slice_header_index;
9668 unsigned char *nal_type = NULL;
9670 if (encode_state->slice_header_index[slice_index] == 0)
9671 slice_header_index = -1;
9673 slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
9675 count = encode_state->slice_rawdata_count[slice_index];
9676 start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
9678 for (i = 0; i < count; i++) {
9679 unsigned int skip_emul_byte_cnt;
9681 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
9682 nal_type = (unsigned char *)header_data;
9684 param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
9686 length_in_bits = param->bit_length;
9688 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9690 /* skip the slice header packed data type as it is lastly inserted */
9691 if (param->type == VAEncPackedHeaderSlice || (*(nal_type + skip_emul_byte_cnt - 1) & 0x1f) == AVC_NAL_DELIMITER)
9694 /* as the slice header is still required, the last header flag is set to
9697 gen9_mfc_avc_insert_object(ctx,
9700 ALIGN(length_in_bits, 32) >> 5,
9701 length_in_bits & 0x1f,
9705 !param->has_emulation_bytes,
9710 if (slice_header_index == -1) {
9711 VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
9712 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
9713 VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
9714 unsigned char *slice_header = NULL;
9715 int slice_header_length_in_bits = 0;
9717 /* No slice header data is passed. And the driver needs to generate it */
9718 /* For the Normal H264 */
9719 slice_header_length_in_bits = build_avc_slice_header(seq_param,
9723 gen9_mfc_avc_insert_object(ctx,
9725 (unsigned int *)slice_header,
9726 ALIGN(slice_header_length_in_bits, 32) >> 5,
9727 slice_header_length_in_bits & 0x1f,
9728 5, /* first 5 bytes are start code + nal unit type */
9735 unsigned int skip_emul_byte_cnt;
9737 header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
9739 param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
9740 length_in_bits = param->bit_length;
9742 /* as the slice header is the last header data for one slice,
9743 * the last header flag is set to one.
9745 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9747 gen9_mfc_avc_insert_object(ctx,
9750 ALIGN(length_in_bits, 32) >> 5,
9751 length_in_bits & 0x1f,
9755 !param->has_emulation_bytes,
9764 gen9_mfc_avc_inset_headers(VADriverContextP ctx,
9765 struct encode_state *encode_state,
9766 struct intel_encoder_context *encoder_context,
9767 VAEncSliceParameterBufferH264 *slice_param,
9769 struct intel_batchbuffer *batch)
9771 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9772 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9773 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
9774 unsigned int internal_rate_mode = generic_state->internal_rate_mode;
9775 unsigned int skip_emul_byte_cnt;
9777 if (slice_index == 0) {
9779 /* if AUD exist and insert it firstly */
9780 gen9_mfc_avc_insert_aud_packed_data(ctx, encode_state, encoder_context, batch);
9782 if (encode_state->packed_header_data[idx]) {
9783 VAEncPackedHeaderParameterBuffer *param = NULL;
9784 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
9785 unsigned int length_in_bits;
9787 assert(encode_state->packed_header_param[idx]);
9788 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
9789 length_in_bits = param->bit_length;
9791 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9792 gen9_mfc_avc_insert_object(ctx,
9795 ALIGN(length_in_bits, 32) >> 5,
9796 length_in_bits & 0x1f,
9800 !param->has_emulation_bytes,
9805 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
9807 if (encode_state->packed_header_data[idx]) {
9808 VAEncPackedHeaderParameterBuffer *param = NULL;
9809 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
9810 unsigned int length_in_bits;
9812 assert(encode_state->packed_header_param[idx]);
9813 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
9814 length_in_bits = param->bit_length;
9816 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9818 gen9_mfc_avc_insert_object(ctx,
9821 ALIGN(length_in_bits, 32) >> 5,
9822 length_in_bits & 0x1f,
9826 !param->has_emulation_bytes,
9831 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
9833 if (encode_state->packed_header_data[idx]) {
9834 VAEncPackedHeaderParameterBuffer *param = NULL;
9835 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
9836 unsigned int length_in_bits;
9838 assert(encode_state->packed_header_param[idx]);
9839 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
9840 length_in_bits = param->bit_length;
9842 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
9843 gen9_mfc_avc_insert_object(ctx,
9846 ALIGN(length_in_bits, 32) >> 5,
9847 length_in_bits & 0x1f,
9851 !param->has_emulation_bytes,
9854 } else if (internal_rate_mode == VA_RC_CBR) {
9859 gen9_mfc_avc_insert_slice_packed_data(ctx,
9867 gen9_mfc_avc_slice_state(VADriverContextP ctx,
9868 struct encode_state *encode_state,
9869 struct intel_encoder_context *encoder_context,
9870 VAEncPictureParameterBufferH264 *pic_param,
9871 VAEncSliceParameterBufferH264 *slice_param,
9872 VAEncSliceParameterBufferH264 *next_slice_param,
9873 struct intel_batchbuffer *batch)
9875 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
9876 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
9877 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
9878 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
9879 unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
9880 unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
9881 unsigned char correct[6], grow, shrink;
9882 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
9883 int max_qp_n, max_qp_p;
9885 int weighted_pred_idc = 0;
9886 int num_ref_l0 = 0, num_ref_l1 = 0;
9887 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
9888 int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
9889 unsigned int rc_panic_enable = 0;
9890 unsigned int rate_control_counter_enable = 0;
9891 unsigned int rounding_value = 0;
9892 unsigned int rounding_inter_enable = 0;
9894 slice_hor_pos = slice_param->macroblock_address % generic_state->frame_width_in_mbs;
9895 slice_ver_pos = slice_param->macroblock_address / generic_state->frame_width_in_mbs;
9897 if (next_slice_param) {
9898 next_slice_hor_pos = next_slice_param->macroblock_address % generic_state->frame_width_in_mbs;
9899 next_slice_ver_pos = next_slice_param->macroblock_address / generic_state->frame_width_in_mbs;
9901 next_slice_hor_pos = 0;
9902 next_slice_ver_pos = generic_state->frame_height_in_mbs;
9905 if (slice_type == SLICE_TYPE_I) {
9906 luma_log2_weight_denom = 0;
9907 chroma_log2_weight_denom = 0;
9908 } else if (slice_type == SLICE_TYPE_P) {
9909 weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
9910 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
9911 rounding_inter_enable = avc_state->rounding_inter_enable;
9912 rounding_value = avc_state->rounding_value;
9914 if (slice_param->num_ref_idx_active_override_flag)
9915 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
9916 } else if (slice_type == SLICE_TYPE_B) {
9917 weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
9918 num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
9919 num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
9920 rounding_inter_enable = avc_state->rounding_inter_enable;
9921 rounding_value = avc_state->rounding_value;
9923 if (slice_param->num_ref_idx_active_override_flag) {
9924 num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
9925 num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
9928 if (weighted_pred_idc == 2) {
9929 /* 8.4.3 - Derivation process for prediction weights (8-279) */
9930 luma_log2_weight_denom = 5;
9931 chroma_log2_weight_denom = 5;
9940 rate_control_counter_enable = (generic_state->brc_enabled && (generic_state->curr_pak_pass != 0));
9941 rc_panic_enable = (avc_state->rc_panic_enable &&
9942 (!avc_state->min_max_qp_enable) &&
9943 (encoder_context->rate_control_mode != VA_RC_CQP) &&
9944 (generic_state->curr_pak_pass == (generic_state->num_pak_passes - 1)));
9946 for (i = 0; i < 6; i++)
9949 BEGIN_BCS_BATCH(batch, 11);
9951 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
9952 OUT_BCS_BATCH(batch, slice_type);
9953 OUT_BCS_BATCH(batch,
9954 (num_ref_l1 << 24) |
9955 (num_ref_l0 << 16) |
9956 (chroma_log2_weight_denom << 8) |
9957 (luma_log2_weight_denom << 0));
9958 OUT_BCS_BATCH(batch,
9959 (weighted_pred_idc << 30) |
9960 (((slice_type == SLICE_TYPE_B) ? slice_param->direct_spatial_mv_pred_flag : 0) << 29) |
9961 (slice_param->disable_deblocking_filter_idc << 27) |
9962 (slice_param->cabac_init_idc << 24) |
9964 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
9965 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
9967 OUT_BCS_BATCH(batch,
9968 slice_ver_pos << 24 |
9969 slice_hor_pos << 16 |
9970 slice_param->macroblock_address);
9971 OUT_BCS_BATCH(batch,
9972 next_slice_ver_pos << 16 |
9973 next_slice_hor_pos);
9975 OUT_BCS_BATCH(batch,
9976 (rate_control_counter_enable << 31) |
9977 (1 << 30) | /* ResetRateControlCounter */
9978 (2 << 28) | /* Loose Rate Control */
9979 (0 << 24) | /* RC Stable Tolerance */
9980 (rc_panic_enable << 23) | /* RC Panic Enable */
9981 (1 << 22) | /* CBP mode */
9982 (0 << 21) | /* MB Type Direct Conversion, 0: Enable, 1: Disable */
9983 (0 << 20) | /* MB Type Skip Conversion, 0: Enable, 1: Disable */
9984 (!next_slice_param << 19) | /* Is Last Slice */
9985 (0 << 18) | /* BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable */
9986 (1 << 17) | /* HeaderPresentFlag */
9987 (1 << 16) | /* SliceData PresentFlag */
9988 (0 << 15) | /* TailPresentFlag */
9989 (1 << 13) | /* RBSP NAL TYPE */
9990 (1 << 12)); /* CabacZeroWordInsertionEnable */
9992 OUT_BCS_BATCH(batch, generic_ctx->compressed_bitstream.start_offset);
9994 OUT_BCS_BATCH(batch,
9995 (max_qp_n << 24) | /*Target QP - 24 is lowest QP*/
9996 (max_qp_p << 16) | /*Target QP + 20 is highest QP*/
9999 OUT_BCS_BATCH(batch,
10000 (rounding_inter_enable << 31) |
10001 (rounding_value << 28) |
10004 (correct[5] << 20) |
10005 (correct[4] << 16) |
10006 (correct[3] << 12) |
10007 (correct[2] << 8) |
10008 (correct[1] << 4) |
10009 (correct[0] << 0));
10010 OUT_BCS_BATCH(batch, 0);
10012 ADVANCE_BCS_BATCH(batch);
10016 gen9_mfc_avc_get_ref_idx_state(VAPictureH264 *va_pic, unsigned int frame_store_id)
10018 unsigned int is_long_term =
10019 !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
10020 unsigned int is_top_field =
10021 !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
10022 unsigned int is_bottom_field =
10023 !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
10025 return ((is_long_term << 6) |
10027 (frame_store_id << 1) |
10028 ((is_top_field ^ 1) & is_bottom_field));
10032 gen9_mfc_avc_ref_idx_state(VADriverContextP ctx,
10033 struct encode_state *encode_state,
10034 struct intel_encoder_context *encoder_context,
10035 VAEncSliceParameterBufferH264 *slice_param,
10036 struct intel_batchbuffer *batch)
10038 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10039 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10040 VAPictureH264 *ref_pic;
10041 int i, slice_type, ref_idx_shift;
10042 unsigned int fwd_ref_entry;
10043 unsigned int bwd_ref_entry;
10045 /* max 4 ref frames are allowed for l0 and l1 */
10046 fwd_ref_entry = 0x80808080;
10047 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
10049 if ((slice_type == SLICE_TYPE_P) ||
10050 (slice_type == SLICE_TYPE_B)) {
10051 for (i = 0; i < MIN(avc_state->num_refs[0], 4); i++) {
10052 ref_pic = &slice_param->RefPicList0[i];
10053 ref_idx_shift = i * 8;
10055 fwd_ref_entry &= ~(0xFF << ref_idx_shift);
10056 fwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[0][i]) << ref_idx_shift);
10060 bwd_ref_entry = 0x80808080;
10061 if (slice_type == SLICE_TYPE_B) {
10062 for (i = 0; i < MIN(avc_state->num_refs[1], 4); i++) {
10063 ref_pic = &slice_param->RefPicList1[i];
10064 ref_idx_shift = i * 8;
10066 bwd_ref_entry &= ~(0xFF << ref_idx_shift);
10067 bwd_ref_entry += (gen9_mfc_avc_get_ref_idx_state(ref_pic, avc_state->list_ref_idx[1][i]) << ref_idx_shift);
10071 if ((slice_type == SLICE_TYPE_P) ||
10072 (slice_type == SLICE_TYPE_B)) {
10073 BEGIN_BCS_BATCH(batch, 10);
10074 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
10075 OUT_BCS_BATCH(batch, 0); // L0
10076 OUT_BCS_BATCH(batch, fwd_ref_entry);
10078 for (i = 0; i < 7; i++) {
10079 OUT_BCS_BATCH(batch, 0x80808080);
10082 ADVANCE_BCS_BATCH(batch);
10085 if (slice_type == SLICE_TYPE_B) {
10086 BEGIN_BCS_BATCH(batch, 10);
10087 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
10088 OUT_BCS_BATCH(batch, 1); //Select L1
10089 OUT_BCS_BATCH(batch, bwd_ref_entry); //max 4 reference allowed
10090 for (i = 0; i < 7; i++) {
10091 OUT_BCS_BATCH(batch, 0x80808080);
10093 ADVANCE_BCS_BATCH(batch);
10098 gen9_mfc_avc_weightoffset_state(VADriverContextP ctx,
10099 struct encode_state *encode_state,
10100 struct intel_encoder_context *encoder_context,
10101 VAEncPictureParameterBufferH264 *pic_param,
10102 VAEncSliceParameterBufferH264 *slice_param,
10103 struct intel_batchbuffer *batch)
10106 short weightoffsets[32 * 6];
10108 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
10110 if (slice_type == SLICE_TYPE_P &&
10111 pic_param->pic_fields.bits.weighted_pred_flag == 1) {
10112 memset(weightoffsets, 0, 32 * 6 * sizeof(short));
10113 for (i = 0; i < 32; i++) {
10114 weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
10115 weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
10116 weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
10117 weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
10118 weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
10119 weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
10122 BEGIN_BCS_BATCH(batch, 98);
10123 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
10124 OUT_BCS_BATCH(batch, 0);
10125 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
10127 ADVANCE_BCS_BATCH(batch);
10130 if (slice_type == SLICE_TYPE_B &&
10131 (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
10132 memset(weightoffsets, 0, 32 * 6 * sizeof(short));
10133 for (i = 0; i < 32; i++) {
10134 weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
10135 weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
10136 weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
10137 weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
10138 weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
10139 weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
10142 BEGIN_BCS_BATCH(batch, 98);
10143 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
10144 OUT_BCS_BATCH(batch, 0);
10145 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
10146 ADVANCE_BCS_BATCH(batch);
10148 memset(weightoffsets, 0, 32 * 6 * sizeof(short));
10149 for (i = 0; i < 32; i++) {
10150 weightoffsets[i * 6 + 0] = slice_param->luma_weight_l1[i];
10151 weightoffsets[i * 6 + 1] = slice_param->luma_offset_l1[i];
10152 weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l1[i][0];
10153 weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l1[i][0];
10154 weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l1[i][1];
10155 weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l1[i][1];
10158 BEGIN_BCS_BATCH(batch, 98);
10159 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
10160 OUT_BCS_BATCH(batch, 1);
10161 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
10162 ADVANCE_BCS_BATCH(batch);
10167 gen9_mfc_avc_single_slice(VADriverContextP ctx,
10168 struct encode_state *encode_state,
10169 struct intel_encoder_context *encoder_context,
10170 VAEncSliceParameterBufferH264 *slice_param,
10171 VAEncSliceParameterBufferH264 *next_slice_param,
10174 struct i965_driver_data *i965 = i965_driver_data(ctx);
10175 struct i965_gpe_table *gpe = &i965->gpe_table;
10176 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10177 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10178 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10179 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10180 struct intel_batchbuffer *batch = encoder_context->base.batch;
10181 struct intel_batchbuffer *slice_batch = avc_ctx->pres_slice_batch_buffer_2nd_level;
10182 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
10183 struct gpe_mi_batch_buffer_start_parameter second_level_batch;
10184 struct object_surface *obj_surface;
10185 struct gen9_surface_avc *avc_priv_surface;
10187 unsigned int slice_offset = 0;
10189 if (generic_state->curr_pak_pass == 0) {
10190 slice_offset = intel_batchbuffer_used_size(slice_batch);
10191 avc_state->slice_batch_offset[slice_index] = slice_offset;
10192 gen9_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param, slice_batch);
10193 gen9_mfc_avc_weightoffset_state(ctx,
10199 gen9_mfc_avc_slice_state(ctx,
10206 gen9_mfc_avc_inset_headers(ctx,
10213 BEGIN_BCS_BATCH(slice_batch, 2);
10214 OUT_BCS_BATCH(slice_batch, 0);
10215 OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
10216 ADVANCE_BCS_BATCH(slice_batch);
10219 slice_offset = avc_state->slice_batch_offset[slice_index];
10221 /* insert slice as second level.*/
10222 memset(&second_level_batch, 0, sizeof(second_level_batch));
10223 second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
10224 second_level_batch.offset = slice_offset;
10225 second_level_batch.bo = slice_batch->buffer;
10226 gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10228 /* insert mb code as second level.*/
10229 obj_surface = encode_state->reconstructed_object;
10230 assert(obj_surface->private_data);
10231 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
10233 memset(&second_level_batch, 0, sizeof(second_level_batch));
10234 second_level_batch.is_second_level = 1; /* Must be the second level batch buffer */
10235 second_level_batch.offset = slice_param->macroblock_address * 16 * 4;
10236 second_level_batch.bo = avc_priv_surface->res_mb_code_surface.bo;
10237 gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10242 gen9_avc_pak_slice_level(VADriverContextP ctx,
10243 struct encode_state *encode_state,
10244 struct intel_encoder_context *encoder_context)
10246 struct i965_driver_data *i965 = i965_driver_data(ctx);
10247 struct i965_gpe_table *gpe = &i965->gpe_table;
10248 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10249 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10250 struct intel_batchbuffer *batch = encoder_context->base.batch;
10251 struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
10252 VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
10254 int slice_index = 0;
10255 int is_frame_level = (avc_state->slice_num > 1) ? 0 : 1; /* check it for SKL,now single slice per frame */
10256 int has_tail = 0; /* check it later */
10258 for (j = 0; j < encode_state->num_slice_params_ext; j++) {
10259 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
10261 if (j == encode_state->num_slice_params_ext - 1)
10262 next_slice_group_param = NULL;
10264 next_slice_group_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j + 1]->buffer;
10266 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
10267 if (i < encode_state->slice_params_ext[j]->num_elements - 1)
10268 next_slice_param = slice_param + 1;
10270 next_slice_param = next_slice_group_param;
10272 gen9_mfc_avc_single_slice(ctx,
10281 if (is_frame_level)
10285 if (is_frame_level)
10290 /* insert a tail if required */
10293 memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
10294 mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
10295 gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_params);
10298 gen9_avc_pak_picture_level(VADriverContextP ctx,
10299 struct encode_state *encode_state,
10300 struct intel_encoder_context *encoder_context)
10302 struct i965_driver_data *i965 = i965_driver_data(ctx);
10303 struct i965_gpe_table *gpe = &i965->gpe_table;
10304 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10305 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10306 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10307 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10308 struct gpe_mi_batch_buffer_start_parameter second_level_batch;
10309 struct intel_batchbuffer *batch = encoder_context->base.batch;
10311 if (generic_state->brc_enabled &&
10312 generic_state->curr_pak_pass) {
10313 struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
10314 struct encoder_status_buffer_internal *status_buffer;
10315 status_buffer = &(avc_ctx->status_buffer);
10317 memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
10318 mi_conditional_batch_buffer_end_params.offset = status_buffer->image_status_mask_offset;
10319 mi_conditional_batch_buffer_end_params.bo = status_buffer->bo;
10320 mi_conditional_batch_buffer_end_params.compare_data = 0;
10321 mi_conditional_batch_buffer_end_params.compare_mask_mode_disabled = 0;
10322 gpe->mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
10325 gen9_mfc_avc_pipe_mode_select(ctx, encode_state, encoder_context);
10326 gen9_mfc_avc_surface_state(ctx, encoder_context, &(generic_ctx->res_reconstructed_surface), 0);
10327 gen9_mfc_avc_surface_state(ctx, encoder_context, &(generic_ctx->res_uncompressed_input_surface), 4);
10328 gen9_mfc_avc_pipe_buf_addr_state(ctx, encoder_context);
10329 gen9_mfc_avc_ind_obj_base_addr_state(ctx, encode_state, encoder_context);
10330 gen9_mfc_avc_bsp_buf_base_addr_state(ctx, encoder_context);
10332 if (generic_state->brc_enabled) {
10333 memset(&second_level_batch, 0, sizeof(second_level_batch));
10334 if (generic_state->curr_pak_pass == 0) {
10335 second_level_batch.offset = 0;
10337 second_level_batch.offset = generic_state->curr_pak_pass * INTEL_AVC_IMAGE_STATE_CMD_SIZE;
10339 second_level_batch.is_second_level = 1;
10340 second_level_batch.bo = avc_ctx->res_brc_image_state_read_buffer.bo;
10341 gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10343 /*generate a new image state */
10344 gen9_avc_set_image_state_non_brc(ctx, encode_state, encoder_context, &(avc_ctx->res_image_state_batch_buffer_2nd_level));
10345 memset(&second_level_batch, 0, sizeof(second_level_batch));
10346 second_level_batch.offset = 0;
10347 second_level_batch.is_second_level = 1;
10348 second_level_batch.bo = avc_ctx->res_image_state_batch_buffer_2nd_level.bo;
10349 gpe->mi_batch_buffer_start(ctx, batch, &second_level_batch);
10352 gen9_mfc_avc_qm_state(ctx, encode_state, encoder_context);
10353 gen9_mfc_avc_fqm_state(ctx, encode_state, encoder_context);
10354 gen9_mfc_avc_directmode_state(ctx, encoder_context);
10359 gen9_avc_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
10361 struct i965_driver_data *i965 = i965_driver_data(ctx);
10362 struct i965_gpe_table *gpe = &i965->gpe_table;
10363 struct intel_batchbuffer *batch = encoder_context->base.batch;
10364 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10365 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10366 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10368 struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
10369 struct gpe_mi_store_data_imm_parameter mi_store_data_imm_param;
10370 struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
10371 struct encoder_status_buffer_internal *status_buffer;
10373 status_buffer = &(avc_ctx->status_buffer);
10375 memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
10376 gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
10378 /* read register and store into status_buffer and pak_statitistic info */
10379 memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
10380 mi_store_reg_mem_param.bo = status_buffer->bo;
10381 mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_frame_offset;
10382 mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
10383 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10385 mi_store_reg_mem_param.bo = status_buffer->bo;
10386 mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
10387 mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_mask_reg_offset;
10388 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10390 /*update the status in the pak_statistic_surface */
10391 mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10392 mi_store_reg_mem_param.offset = 0;
10393 mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_reg_offset;
10394 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10396 mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10397 mi_store_reg_mem_param.offset = 4;
10398 mi_store_reg_mem_param.mmio_offset = status_buffer->bs_byte_count_frame_nh_reg_offset;
10399 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10401 memset(&mi_store_data_imm_param, 0, sizeof(mi_store_data_imm_param));
10402 mi_store_data_imm_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10403 mi_store_data_imm_param.offset = sizeof(unsigned int) * 2;
10404 mi_store_data_imm_param.dw0 = (generic_state->curr_pak_pass + 1);
10405 gpe->mi_store_data_imm(ctx, batch, &mi_store_data_imm_param);
10407 mi_store_reg_mem_param.bo = avc_ctx->res_brc_pre_pak_statistics_output_buffer.bo;
10408 mi_store_reg_mem_param.offset = sizeof(unsigned int) * (4 + generic_state->curr_pak_pass) ;
10409 mi_store_reg_mem_param.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
10410 gpe->mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
10412 memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
10413 gpe->mi_flush_dw(ctx, batch, &mi_flush_dw_param);
10419 gen9_avc_pak_brc_prepare(struct encode_state *encode_state,
10420 struct intel_encoder_context *encoder_context)
10422 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10423 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10424 unsigned int rate_control_mode = encoder_context->rate_control_mode;
10426 switch (rate_control_mode & 0x7f) {
10428 generic_state->internal_rate_mode = VA_RC_CBR;
10432 generic_state->internal_rate_mode = VA_RC_VBR;//AVBR
10437 generic_state->internal_rate_mode = VA_RC_CQP;
10441 if (encoder_context->quality_level == 0)
10442 encoder_context->quality_level = ENCODER_DEFAULT_QUALITY_AVC;
10445 /* allcate resources for pak only (fei mode) */
10447 gen9_avc_fei_pak_pipeline_prepare(VADriverContextP ctx,
10449 struct encode_state *encode_state,
10450 struct intel_encoder_context *encoder_context)
10452 VAStatus va_status;
10453 struct i965_driver_data *i965 = i965_driver_data(ctx);
10454 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10455 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10456 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10457 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10458 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10459 struct gen9_surface_avc *avc_priv_surface;
10460 VAEncPictureParameterBufferH264 *pic_param;
10461 VAEncSliceParameterBufferH264 *slice_param;
10462 VAEncMiscParameterFEIFrameControlH264 *fei_param = NULL;
10463 unsigned int size = 0, i, j;
10464 unsigned int frame_mb_nums;
10465 struct object_buffer *obj_buffer = NULL;
10466 struct buffer_store *buffer_store = NULL;
10467 struct object_surface *obj_surface = NULL;
10468 struct avc_surface_param surface_param;
10469 struct i965_coded_buffer_segment *coded_buffer_segment;
10471 unsigned char * pdata;
10473 gen9_avc_update_parameters(ctx, profile, encode_state, encoder_context);
10475 pic_param = avc_state->pic_param;
10476 slice_param = avc_state->slice_param[0];
10478 va_status = gen9_avc_encode_check_parameter(ctx, encode_state, encoder_context);
10479 if (va_status != VA_STATUS_SUCCESS)
10482 va_status = gen9_avc_allocate_pak_resources(ctx, encode_state, encoder_context);
10483 if (va_status != VA_STATUS_SUCCESS)
10486 /* Encoded bitstream ?*/
10487 obj_buffer = encode_state->coded_buf_object;
10488 bo = obj_buffer->buffer_store->bo;
10489 i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
10490 i965_dri_object_to_buffer_gpe_resource(&generic_ctx->compressed_bitstream.res, bo);
10491 generic_ctx->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
10492 generic_ctx->compressed_bitstream.end_offset =
10493 ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
10496 dri_bo_unreference(avc_ctx->status_buffer.bo);
10497 avc_ctx->status_buffer.bo = bo;
10498 dri_bo_reference(bo);
10500 /* set the internal flag to 0 to indicate the coded size is unknown */
10502 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
10503 coded_buffer_segment->mapped = 0;
10504 coded_buffer_segment->codec = encoder_context->codec;
10505 coded_buffer_segment->status_support = 1;
10507 pdata = bo->virtual + avc_ctx->status_buffer.base_offset;
10508 memset(pdata, 0, avc_ctx->status_buffer.status_buffer_size);
10510 //frame id, it is the ref pic id in the reference_objects list.
10511 avc_state->num_refs[0] = 0;
10512 avc_state->num_refs[1] = 0;
10513 if (generic_state->frame_type == SLICE_TYPE_P) {
10514 avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
10516 if (slice_param->num_ref_idx_active_override_flag)
10517 avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
10518 } else if (generic_state->frame_type == SLICE_TYPE_B) {
10519 avc_state->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
10520 avc_state->num_refs[1] = pic_param->num_ref_idx_l1_active_minus1 + 1;
10522 if (slice_param->num_ref_idx_active_override_flag) {
10523 avc_state->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
10524 avc_state->num_refs[1] = slice_param->num_ref_idx_l1_active_minus1 + 1;
10527 for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[0]); i++) {
10528 VAPictureH264 *va_pic;
10530 assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(avc_state->list_ref_idx[0]));
10531 avc_state->list_ref_idx[0][i] = 0;
10533 if (i >= avc_state->num_refs[0])
10536 va_pic = &slice_param->RefPicList0[i];
10538 for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
10539 obj_surface = encode_state->reference_objects[j];
10543 obj_surface->base.id == va_pic->picture_id) {
10545 assert(obj_surface->base.id != VA_INVALID_SURFACE);
10546 avc_state->list_ref_idx[0][i] = j;
10552 for (i = 0; i < ARRAY_ELEMS(avc_state->list_ref_idx[1]); i++) {
10553 VAPictureH264 *va_pic;
10555 assert(ARRAY_ELEMS(slice_param->RefPicList1) == ARRAY_ELEMS(avc_state->list_ref_idx[1]));
10556 avc_state->list_ref_idx[1][i] = 0;
10558 if (i >= avc_state->num_refs[1])
10561 va_pic = &slice_param->RefPicList1[i];
10563 for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
10564 obj_surface = encode_state->reference_objects[j];
10569 obj_surface->base.id == va_pic->picture_id) {
10571 assert(obj_surface->base.id != VA_INVALID_SURFACE);
10572 avc_state->list_ref_idx[1][i] = j;
10580 obj_surface = encode_state->reconstructed_object;
10581 fei_param = avc_state->fei_framectl_param;
10582 frame_mb_nums = generic_state->frame_width_in_mbs * generic_state->frame_height_in_mbs;
10584 /* Setup current reconstruct frame */
10585 obj_surface = encode_state->reconstructed_object;
10586 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
10588 if (va_status != VA_STATUS_SUCCESS)
10591 memset(&surface_param, 0, sizeof(surface_param));
10592 surface_param.frame_width = generic_state->frame_width_in_pixel;
10593 surface_param.frame_height = generic_state->frame_height_in_pixel;
10594 va_status = gen9_avc_init_check_surfaces(ctx,
10595 obj_surface, encoder_context,
10597 avc_priv_surface = obj_surface->private_data;
10599 /* res_mb_code_surface for MB code */
10600 /* PAK only mode must have the mb_code_surface from middleware,
10601 * so the code shouldn't reach here without an externally provided
10602 * MB Code buffer */
10603 assert(fei_param->mb_code_data != VA_INVALID_ID);
10604 size = frame_mb_nums * FEI_AVC_MB_CODE_BUFFER_SIZE;
10605 obj_buffer = BUFFER(fei_param->mb_code_data);
10606 assert(obj_buffer != NULL);
10607 buffer_store = obj_buffer->buffer_store;
10608 assert(size <= buffer_store->bo->size);
10609 if (avc_priv_surface->res_mb_code_surface.bo != NULL)
10610 i965_free_gpe_resource(&avc_priv_surface->res_mb_code_surface);
10611 i965_dri_object_to_buffer_gpe_resource(&avc_priv_surface->res_mb_code_surface,
10613 /* res_mv_data_surface for MV data */
10614 size = frame_mb_nums * FEI_AVC_MV_DATA_BUFFER_SIZE;
10615 if (fei_param->mv_data != VA_INVALID_ID) {
10616 obj_buffer = BUFFER(fei_param->mv_data);
10617 assert(obj_buffer != NULL);
10618 buffer_store = obj_buffer->buffer_store;
10619 assert(size <= buffer_store->bo->size);
10620 if (avc_priv_surface->res_mv_data_surface.bo != NULL)
10621 i965_free_gpe_resource(&avc_priv_surface->res_mv_data_surface);
10622 i965_dri_object_to_buffer_gpe_resource(&avc_priv_surface->res_mv_data_surface,
10626 return VA_STATUS_SUCCESS;
10631 gen9_avc_pak_pipeline_prepare(VADriverContextP ctx,
10633 struct encode_state *encode_state,
10634 struct intel_encoder_context *encoder_context)
10636 VAStatus va_status;
10637 struct i965_driver_data *i965 = i965_driver_data(ctx);
10638 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10639 struct generic_encoder_context * generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10640 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10641 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)pak_context->generic_enc_state;
10642 struct avc_enc_state * avc_state = (struct avc_enc_state *)pak_context->private_enc_state;
10644 struct object_surface *obj_surface;
10645 VAEncPictureParameterBufferH264 *pic_param;
10646 VAEncSliceParameterBufferH264 *slice_param;
10648 struct gen9_surface_avc *avc_priv_surface;
10649 struct avc_surface_param surface_param;
10650 int i, j, enable_avc_ildb = 0;
10651 unsigned int allocate_flag = 1;
10652 unsigned int size, w_mb, h_mb;
10654 if (encoder_context->fei_function_mode == VA_FEI_FUNCTION_PAK) {
10655 va_status = gen9_avc_fei_pak_pipeline_prepare(ctx, profile, encode_state, encoder_context);
10656 if (va_status != VA_STATUS_SUCCESS)
10660 pic_param = avc_state->pic_param;
10661 slice_param = avc_state->slice_param[0];
10662 w_mb = generic_state->frame_width_in_mbs;
10663 h_mb = generic_state->frame_height_in_mbs;
10665 /* update the parameter and check slice parameter */
10666 for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
10667 assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
10668 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
10670 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
10671 assert((slice_param->slice_type == SLICE_TYPE_I) ||
10672 (slice_param->slice_type == SLICE_TYPE_SI) ||
10673 (slice_param->slice_type == SLICE_TYPE_P) ||
10674 (slice_param->slice_type == SLICE_TYPE_SP) ||
10675 (slice_param->slice_type == SLICE_TYPE_B));
10677 if (slice_param->disable_deblocking_filter_idc != 1) {
10678 enable_avc_ildb = 1;
10685 avc_state->enable_avc_ildb = enable_avc_ildb;
10687 /* setup the all surface and buffer for PAK */
10688 /* Setup current reconstruct frame */
10689 obj_surface = encode_state->reconstructed_object;
10690 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
10692 if (va_status != VA_STATUS_SUCCESS)
10695 memset(&surface_param, 0, sizeof(surface_param));
10696 surface_param.frame_width = generic_state->frame_width_in_pixel;
10697 surface_param.frame_height = generic_state->frame_height_in_pixel;
10698 va_status = gen9_avc_init_check_surfaces(ctx,
10699 obj_surface, encoder_context,
10701 if (va_status != VA_STATUS_SUCCESS)
10703 /* init the member of avc_priv_surface,frame_store_id,qp_value */
10705 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
10706 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = 0;
10707 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = 0;
10708 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2]);
10709 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1]);
10710 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2], avc_priv_surface->dmv_top);
10711 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 1], avc_priv_surface->dmv_bottom);
10712 avc_priv_surface->qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
10713 avc_priv_surface->frame_store_id = 0;
10714 avc_priv_surface->frame_idx = pic_param->CurrPic.frame_idx;
10715 avc_priv_surface->top_field_order_cnt = pic_param->CurrPic.TopFieldOrderCnt;
10716 avc_priv_surface->is_as_ref = pic_param->pic_fields.bits.reference_pic_flag;
10717 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2] = avc_priv_surface->top_field_order_cnt;
10718 avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1] = avc_priv_surface->top_field_order_cnt + 1;
10720 i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
10721 i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
10722 i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
10723 i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_reconstructed_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
10726 if (avc_state->enable_avc_ildb) {
10727 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_post_deblocking_output, obj_surface, GPE_RESOURCE_ALIGNMENT);
10729 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->res_pre_deblocking_output, obj_surface, GPE_RESOURCE_ALIGNMENT);
10731 /* input YUV surface */
10732 obj_surface = encode_state->input_yuv_object;
10733 va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
10735 if (va_status != VA_STATUS_SUCCESS)
10737 i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
10738 i965_object_surface_to_2d_gpe_resource_with_align(&generic_ctx->res_uncompressed_input_surface, obj_surface, GPE_RESOURCE_ALIGNMENT);
10740 /* Reference surfaces */
10741 for (i = 0; i < ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
10742 i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
10743 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2]);
10744 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1]);
10745 obj_surface = encode_state->reference_objects[i];
10746 avc_state->top_field_poc[2 * i] = 0;
10747 avc_state->top_field_poc[2 * i + 1] = 0;
10749 if (obj_surface && obj_surface->bo) {
10750 i965_object_surface_to_2d_gpe_resource_with_align(&avc_ctx->list_reference_res[i], obj_surface, GPE_RESOURCE_ALIGNMENT);
10752 /* actually it should be handled when it is reconstructed surface */
10753 va_status = gen9_avc_init_check_surfaces(ctx,
10754 obj_surface, encoder_context,
10756 if (va_status != VA_STATUS_SUCCESS)
10758 avc_priv_surface = (struct gen9_surface_avc *)obj_surface->private_data;
10759 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2], avc_priv_surface->dmv_top);
10760 i965_dri_object_to_buffer_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i * 2 + 1], avc_priv_surface->dmv_bottom);
10761 avc_priv_surface->frame_store_id = i;
10762 avc_state->top_field_poc[2 * i] = avc_priv_surface->top_field_order_cnt;
10763 avc_state->top_field_poc[2 * i + 1] = avc_priv_surface->top_field_order_cnt + 1;
10769 if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
10770 intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
10771 avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
10774 avc_ctx->pres_slice_batch_buffer_2nd_level =
10775 intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD,
10777 encode_state->num_slice_params_ext);
10778 if (!avc_ctx->pres_slice_batch_buffer_2nd_level)
10779 return VA_STATUS_ERROR_ALLOCATION_FAILED;
10781 for (i = 0; i < MAX_AVC_SLICE_NUM; i++) {
10782 avc_state->slice_batch_offset[i] = 0;
10787 i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
10788 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10789 &avc_ctx->res_intra_row_store_scratch_buffer,
10791 "PAK Intra row store scratch buffer");
10792 if (!allocate_flag)
10793 goto failed_allocation;
10795 size = w_mb * 4 * 64;
10796 i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
10797 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10798 &avc_ctx->res_deblocking_filter_row_store_scratch_buffer,
10800 "PAK Deblocking filter row store scratch buffer");
10801 if (!allocate_flag)
10802 goto failed_allocation;
10804 size = w_mb * 2 * 64;
10805 i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
10806 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10807 &avc_ctx->res_bsd_mpc_row_store_scratch_buffer,
10809 "PAK BSD/MPC row store scratch buffer");
10810 if (!allocate_flag)
10811 goto failed_allocation;
10813 size = w_mb * h_mb * 16;
10814 i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
10815 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
10816 &avc_ctx->res_pak_mb_status_buffer,
10818 "PAK MB status buffer");
10819 if (!allocate_flag)
10820 goto failed_allocation;
10822 return VA_STATUS_SUCCESS;
10825 return VA_STATUS_ERROR_ALLOCATION_FAILED;
10829 gen9_avc_encode_picture(VADriverContextP ctx,
10831 struct encode_state *encode_state,
10832 struct intel_encoder_context *encoder_context)
10834 VAStatus va_status;
10835 struct i965_driver_data *i965 = i965_driver_data(ctx);
10836 struct i965_gpe_table *gpe = &i965->gpe_table;
10837 struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
10838 struct i965_avc_encoder_context * avc_ctx = (struct i965_avc_encoder_context *)vme_context->private_enc_ctx;
10839 struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state *)vme_context->generic_enc_state;
10840 struct intel_batchbuffer *batch = encoder_context->base.batch;
10842 va_status = gen9_avc_pak_pipeline_prepare(ctx, profile, encode_state, encoder_context);
10844 if (va_status != VA_STATUS_SUCCESS)
10847 if (i965->intel.has_bsd2)
10848 intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
10850 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
10851 intel_batchbuffer_emit_mi_flush(batch);
10852 for (generic_state->curr_pak_pass = 0;
10853 generic_state->curr_pak_pass < generic_state->num_pak_passes;
10854 generic_state->curr_pak_pass++) {
10856 if (generic_state->curr_pak_pass == 0) {
10857 /* Initialize the avc Image Ctrl reg for the first pass,write 0 to staturs/control register, is it needed in AVC? */
10858 struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
10859 struct encoder_status_buffer_internal *status_buffer;
10861 status_buffer = &(avc_ctx->status_buffer);
10862 memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
10863 mi_load_reg_imm.mmio_offset = status_buffer->image_status_ctrl_reg_offset;
10864 mi_load_reg_imm.data = 0;
10865 gpe->mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
10867 gen9_avc_pak_picture_level(ctx, encode_state, encoder_context);
10868 gen9_avc_pak_slice_level(ctx, encode_state, encoder_context);
10869 gen9_avc_read_mfc_status(ctx, encoder_context);
10872 if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
10873 intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
10874 avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
10877 intel_batchbuffer_end_atomic(batch);
10878 intel_batchbuffer_flush(batch);
10880 generic_state->seq_frame_number++;
10881 generic_state->total_frame_number++;
10882 generic_state->first_frame = 0;
10883 return VA_STATUS_SUCCESS;
10887 gen9_avc_pak_pipeline(VADriverContextP ctx,
10889 struct encode_state *encode_state,
10890 struct intel_encoder_context *encoder_context)
10895 case VAProfileH264ConstrainedBaseline:
10896 case VAProfileH264Main:
10897 case VAProfileH264High:
10898 case VAProfileH264MultiviewHigh:
10899 case VAProfileH264StereoHigh:
10900 vaStatus = gen9_avc_encode_picture(ctx, profile, encode_state, encoder_context);
10904 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
10912 gen9_avc_pak_context_destroy(void * context)
10914 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)context;
10915 struct generic_encoder_context * generic_ctx;
10916 struct i965_avc_encoder_context * avc_ctx;
10922 generic_ctx = (struct generic_encoder_context *)pak_context->generic_enc_ctx;
10923 avc_ctx = (struct i965_avc_encoder_context *)pak_context->private_enc_ctx;
10926 i965_free_gpe_resource(&generic_ctx->res_reconstructed_surface);
10927 i965_free_gpe_resource(&avc_ctx->res_post_deblocking_output);
10928 i965_free_gpe_resource(&avc_ctx->res_pre_deblocking_output);
10929 i965_free_gpe_resource(&generic_ctx->res_uncompressed_input_surface);
10931 i965_free_gpe_resource(&generic_ctx->compressed_bitstream.res);
10932 i965_free_gpe_resource(&avc_ctx->res_intra_row_store_scratch_buffer);
10933 i965_free_gpe_resource(&avc_ctx->res_deblocking_filter_row_store_scratch_buffer);
10934 i965_free_gpe_resource(&avc_ctx->res_bsd_mpc_row_store_scratch_buffer);
10935 i965_free_gpe_resource(&avc_ctx->res_pak_mb_status_buffer);
10937 for (i = 0 ; i < MAX_MFC_AVC_REFERENCE_SURFACES; i++) {
10938 i965_free_gpe_resource(&avc_ctx->list_reference_res[i]);
10941 for (i = 0 ; i < NUM_MFC_AVC_DMV_BUFFERS; i++) {
10942 i965_free_gpe_resource(&avc_ctx->res_direct_mv_buffersr[i]);
10945 if (avc_ctx->pres_slice_batch_buffer_2nd_level) {
10946 intel_batchbuffer_free(avc_ctx->pres_slice_batch_buffer_2nd_level);
10947 avc_ctx->pres_slice_batch_buffer_2nd_level = NULL;
10953 gen9_avc_get_coded_status(VADriverContextP ctx,
10954 struct intel_encoder_context *encoder_context,
10955 struct i965_coded_buffer_segment *coded_buf_seg)
10957 struct encoder_status *avc_encode_status;
10959 if (!encoder_context || !coded_buf_seg)
10960 return VA_STATUS_ERROR_INVALID_BUFFER;
10962 avc_encode_status = (struct encoder_status *)coded_buf_seg->codec_private_data;
10963 coded_buf_seg->base.size = avc_encode_status->bs_byte_count_frame;
10965 return VA_STATUS_SUCCESS;
10969 gen9_avc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
10971 /* VME & PAK share the same context */
10972 struct i965_driver_data *i965 = i965_driver_data(ctx);
10973 struct encoder_vme_mfc_context * vme_context = NULL;
10974 struct generic_encoder_context * generic_ctx = NULL;
10975 struct i965_avc_encoder_context * avc_ctx = NULL;
10976 struct generic_enc_codec_state * generic_state = NULL;
10977 struct avc_enc_state * avc_state = NULL;
10978 struct encoder_status_buffer_internal *status_buffer;
10979 uint32_t base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
10981 vme_context = calloc(1, sizeof(struct encoder_vme_mfc_context));
10982 generic_ctx = calloc(1, sizeof(struct generic_encoder_context));
10983 avc_ctx = calloc(1, sizeof(struct i965_avc_encoder_context));
10984 generic_state = calloc(1, sizeof(struct generic_enc_codec_state));
10985 avc_state = calloc(1, sizeof(struct avc_enc_state));
10987 if (!vme_context || !generic_ctx || !avc_ctx || !generic_state || !avc_state)
10988 goto allocate_structure_failed;
10990 memset(vme_context, 0, sizeof(struct encoder_vme_mfc_context));
10991 memset(generic_ctx, 0, sizeof(struct generic_encoder_context));
10992 memset(avc_ctx, 0, sizeof(struct i965_avc_encoder_context));
10993 memset(generic_state, 0, sizeof(struct generic_enc_codec_state));
10994 memset(avc_state, 0, sizeof(struct avc_enc_state));
10996 encoder_context->vme_context = vme_context;
10997 vme_context->generic_enc_ctx = generic_ctx;
10998 vme_context->private_enc_ctx = avc_ctx;
10999 vme_context->generic_enc_state = generic_state;
11000 vme_context->private_enc_state = avc_state;
11002 if (IS_SKL(i965->intel.device_info) ||
11003 IS_BXT(i965->intel.device_info)) {
11004 if (!encoder_context->fei_enabled && !encoder_context->preenc_enabled) {
11005 generic_ctx->enc_kernel_ptr = (void *)skl_avc_encoder_kernels;
11006 generic_ctx->enc_kernel_size = sizeof(skl_avc_encoder_kernels);
11008 /* FEI and PreEnc operation kernels are included in
11009 * the monolithic kernel binary */
11010 generic_ctx->enc_kernel_ptr = (void *)skl_avc_fei_encoder_kernels;
11011 generic_ctx->enc_kernel_size = sizeof(skl_avc_fei_encoder_kernels);
11013 } else if (IS_GEN8(i965->intel.device_info)) {
11014 generic_ctx->enc_kernel_ptr = (void *)bdw_avc_encoder_kernels;
11015 generic_ctx->enc_kernel_size = sizeof(bdw_avc_encoder_kernels);
11016 } else if (IS_KBL(i965->intel.device_info) ||
11017 IS_GLK(i965->intel.device_info)) {
11018 generic_ctx->enc_kernel_ptr = (void *)kbl_avc_encoder_kernels;
11019 generic_ctx->enc_kernel_size = sizeof(kbl_avc_encoder_kernels);
11020 } else if (IS_GEN10(i965->intel.device_info)) {
11021 generic_ctx->enc_kernel_ptr = (void *)cnl_avc_encoder_kernels;
11022 generic_ctx->enc_kernel_size = sizeof(cnl_avc_encoder_kernels);
11024 goto allocate_structure_failed;
11026 /* initialize misc ? */
11027 avc_ctx->ctx = ctx;
11028 generic_ctx->use_hw_scoreboard = 1;
11029 generic_ctx->use_hw_non_stalling_scoreboard = 1;
11031 /* initialize generic state */
11033 generic_state->kernel_mode = INTEL_ENC_KERNEL_NORMAL;
11034 generic_state->preset = INTEL_PRESET_RT_SPEED;
11035 generic_state->seq_frame_number = 0;
11036 generic_state->total_frame_number = 0;
11037 generic_state->frame_type = 0;
11038 generic_state->first_frame = 1;
11040 generic_state->frame_width_in_pixel = 0;
11041 generic_state->frame_height_in_pixel = 0;
11042 generic_state->frame_width_in_mbs = 0;
11043 generic_state->frame_height_in_mbs = 0;
11044 generic_state->frame_width_4x = 0;
11045 generic_state->frame_height_4x = 0;
11046 generic_state->frame_width_16x = 0;
11047 generic_state->frame_height_16x = 0;
11048 generic_state->frame_width_32x = 0;
11049 generic_state->downscaled_width_4x_in_mb = 0;
11050 generic_state->downscaled_height_4x_in_mb = 0;
11051 generic_state->downscaled_width_16x_in_mb = 0;
11052 generic_state->downscaled_height_16x_in_mb = 0;
11053 generic_state->downscaled_width_32x_in_mb = 0;
11054 generic_state->downscaled_height_32x_in_mb = 0;
11056 generic_state->hme_supported = 1;
11057 generic_state->b16xme_supported = 1;
11058 generic_state->b32xme_supported = 0;
11059 generic_state->hme_enabled = 0;
11060 generic_state->b16xme_enabled = 0;
11061 generic_state->b32xme_enabled = 0;
11063 if (encoder_context->fei_enabled) {
11064 /* Disabling HME in FEI encode */
11065 generic_state->hme_supported = 0;
11066 generic_state->b16xme_supported = 0;
11067 } else if (encoder_context->preenc_enabled) {
11068 /* Disabling 16x16ME in PreEnc */
11069 generic_state->b16xme_supported = 0;
11072 generic_state->brc_distortion_buffer_supported = 1;
11073 generic_state->brc_constant_buffer_supported = 0;
11075 generic_state->frame_rate = 30;
11076 generic_state->brc_allocated = 0;
11077 generic_state->brc_inited = 0;
11078 generic_state->brc_need_reset = 0;
11079 generic_state->is_low_delay = 0;
11080 generic_state->brc_enabled = 0;//default
11081 generic_state->internal_rate_mode = 0;
11082 generic_state->curr_pak_pass = 0;
11083 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
11084 generic_state->is_first_pass = 1;
11085 generic_state->is_last_pass = 0;
11086 generic_state->mb_brc_enabled = 0; // enable mb brc
11087 generic_state->brc_roi_enable = 0;
11088 generic_state->brc_dirty_roi_enable = 0;
11089 generic_state->skip_frame_enbale = 0;
11091 generic_state->target_bit_rate = 0;
11092 generic_state->max_bit_rate = 0;
11093 generic_state->min_bit_rate = 0;
11094 generic_state->init_vbv_buffer_fullness_in_bit = 0;
11095 generic_state->vbv_buffer_size_in_bit = 0;
11096 generic_state->frames_per_100s = 0;
11097 generic_state->gop_size = 0;
11098 generic_state->gop_ref_distance = 0;
11099 generic_state->brc_target_size = 0;
11100 generic_state->brc_mode = 0;
11101 generic_state->brc_init_current_target_buf_full_in_bits = 0.0;
11102 generic_state->brc_init_reset_input_bits_per_frame = 0.0;
11103 generic_state->brc_init_reset_buf_size_in_bits = 0;
11104 generic_state->brc_init_previous_target_buf_full_in_bits = 0;
11105 generic_state->frames_per_window_size = 0;//default
11106 generic_state->target_percentage = 0;
11108 generic_state->avbr_curracy = 0;
11109 generic_state->avbr_convergence = 0;
11111 generic_state->num_skip_frames = 0;
11112 generic_state->size_skip_frames = 0;
11114 generic_state->num_roi = 0;
11115 generic_state->max_delta_qp = 0;
11116 generic_state->min_delta_qp = 0;
11118 if (encoder_context->rate_control_mode != VA_RC_NONE &&
11119 encoder_context->rate_control_mode != VA_RC_CQP) {
11120 generic_state->brc_enabled = 1;
11121 generic_state->brc_distortion_buffer_supported = 1;
11122 generic_state->brc_constant_buffer_supported = 1;
11123 generic_state->num_pak_passes = MAX_AVC_PAK_PASS_NUM;
11125 /*avc state initialization */
11126 avc_state->mad_enable = 0;
11127 avc_state->mb_disable_skip_map_enable = 0;
11128 avc_state->sfd_enable = 1;//default
11129 avc_state->sfd_mb_enable = 1;//set it true
11130 avc_state->adaptive_search_window_enable = 1;//default
11131 avc_state->mb_qp_data_enable = 0;
11132 avc_state->intra_refresh_i_enable = 0;
11133 avc_state->min_max_qp_enable = 0;
11134 avc_state->skip_bias_adjustment_enable = 0;//default,same as skip_bias_adjustment_supporte? no
11137 avc_state->non_ftq_skip_threshold_lut_input_enable = 0;
11138 avc_state->ftq_skip_threshold_lut_input_enable = 0;
11139 avc_state->ftq_override = 0;
11141 avc_state->direct_bias_adjustment_enable = 0;
11142 avc_state->global_motion_bias_adjustment_enable = 0;
11143 avc_state->disable_sub_mb_partion = 0;
11144 avc_state->arbitrary_num_mbs_in_slice = 0;
11145 avc_state->adaptive_transform_decision_enable = 0;//default
11146 avc_state->skip_check_disable = 0;
11147 avc_state->tq_enable = 0;
11148 avc_state->enable_avc_ildb = 0;
11149 avc_state->mbaff_flag = 0;
11150 avc_state->enable_force_skip = 1;//default
11151 avc_state->rc_panic_enable = 1;//default
11152 avc_state->suppress_recon_enable = 1;//default
11154 avc_state->ref_pic_select_list_supported = 1;
11155 avc_state->mb_brc_supported = 1;//?,default
11156 avc_state->multi_pre_enable = 1;//default
11157 avc_state->ftq_enable = 1;//default
11158 avc_state->caf_supported = 1; //default
11159 avc_state->caf_enable = 0;
11160 avc_state->caf_disable_hd = 1;//default
11161 avc_state->skip_bias_adjustment_supported = 1;//default
11163 avc_state->adaptive_intra_scaling_enable = 1;//default
11164 avc_state->old_mode_cost_enable = 0;//default
11165 avc_state->multi_ref_qp_enable = 1;//default
11166 avc_state->weighted_ref_l0_enable = 1;//default
11167 avc_state->weighted_ref_l1_enable = 1;//default
11168 avc_state->weighted_prediction_supported = 0;
11169 avc_state->brc_split_enable = 0;
11170 avc_state->slice_level_report_supported = 0;
11172 avc_state->fbr_bypass_enable = 1;//default
11173 avc_state->field_scaling_output_interleaved = 0;
11174 avc_state->mb_variance_output_enable = 0;
11175 avc_state->mb_pixel_average_output_enable = 0;
11176 avc_state->rolling_intra_refresh_enable = 0;// same as intra_refresh_i_enable?
11177 avc_state->mbenc_curbe_set_in_brc_update = 0;
11178 avc_state->rounding_inter_enable = 1; //default
11179 avc_state->adaptive_rounding_inter_enable = 1;//default
11181 avc_state->mbenc_i_frame_dist_in_use = 0;
11182 avc_state->mb_status_supported = 1; //set in intialization for gen9
11183 avc_state->mb_status_enable = 0;
11184 avc_state->mb_vproc_stats_enable = 0;
11185 avc_state->flatness_check_enable = 0;
11186 avc_state->flatness_check_supported = 1;//default
11187 avc_state->block_based_skip_enable = 0;
11188 avc_state->use_widi_mbenc_kernel = 0;
11189 avc_state->kernel_trellis_enable = 0;
11190 avc_state->generic_reserved = 0;
11192 avc_state->rounding_value = 0;
11193 avc_state->rounding_inter_p = AVC_INVALID_ROUNDING_VALUE;//default
11194 avc_state->rounding_inter_b = AVC_INVALID_ROUNDING_VALUE; //default
11195 avc_state->rounding_inter_b_ref = AVC_INVALID_ROUNDING_VALUE; //default
11196 avc_state->min_qp_i = INTEL_AVC_MIN_QP;
11197 avc_state->min_qp_p = INTEL_AVC_MIN_QP;
11198 avc_state->min_qp_b = INTEL_AVC_MIN_QP;
11199 avc_state->max_qp_i = INTEL_AVC_MAX_QP;
11200 avc_state->max_qp_p = INTEL_AVC_MAX_QP;
11201 avc_state->max_qp_b = INTEL_AVC_MAX_QP;
11203 memset(avc_state->non_ftq_skip_threshold_lut, 0, AVC_QP_MAX * sizeof(uint8_t));
11204 memset(avc_state->ftq_skip_threshold_lut, 0, AVC_QP_MAX * sizeof(uint8_t));
11205 memset(avc_state->lamda_value_lut, 0, AVC_QP_MAX * 2 * sizeof(uint32_t));
11207 avc_state->intra_refresh_qp_threshold = 0;
11208 avc_state->trellis_flag = 0;
11209 avc_state->hme_mv_cost_scaling_factor = 0;
11210 avc_state->slice_height = 1;
11211 avc_state->slice_num = 1;
11212 memset(avc_state->dist_scale_factor_list0, 0, 32 * sizeof(uint32_t));
11213 avc_state->bi_weight = 0;
11215 avc_state->lambda_table_enable = 0;
11217 if (IS_GEN8(i965->intel.device_info)) {
11218 avc_state->brc_const_data_surface_width = 64;
11219 avc_state->brc_const_data_surface_height = 44;
11220 avc_state->mb_status_supported = 0;
11221 } else if (IS_SKL(i965->intel.device_info) ||
11222 IS_BXT(i965->intel.device_info)) {
11223 avc_state->brc_const_data_surface_width = 64;
11224 avc_state->brc_const_data_surface_height = 44;
11225 avc_state->brc_split_enable = 1;
11226 } else if (IS_KBL(i965->intel.device_info) ||
11227 IS_GEN10(i965->intel.device_info) ||
11228 IS_GLK(i965->intel.device_info)) {
11229 avc_state->brc_const_data_surface_width = 64;
11230 avc_state->brc_const_data_surface_height = 53;
11232 avc_state->decouple_mbenc_curbe_from_brc_enable = 1;
11233 avc_state->extended_mv_cost_range_enable = 0;
11234 avc_state->reserved_g95 = 0;
11235 avc_state->mbenc_brc_buffer_size = 128;
11236 avc_state->kernel_trellis_enable = 1;
11237 avc_state->lambda_table_enable = 1;
11238 avc_state->brc_split_enable = 1;
11240 if (IS_GEN10(i965->intel.device_info))
11241 avc_state->adaptive_transform_decision_enable = 1;// CNL
11244 avc_state->num_refs[0] = 0;
11245 avc_state->num_refs[1] = 0;
11246 memset(avc_state->list_ref_idx, 0, 32 * 2 * sizeof(uint32_t));
11247 memset(avc_state->top_field_poc, 0, NUM_MFC_AVC_DMV_BUFFERS * sizeof(int32_t));
11248 avc_state->tq_rounding = 0;
11249 avc_state->zero_mv_threshold = 0;
11250 avc_state->slice_second_levle_batch_buffer_in_use = 0;
11254 /* the definition of status buffer offset for Encoder */
11256 status_buffer = &avc_ctx->status_buffer;
11257 memset(status_buffer, 0, sizeof(struct encoder_status_buffer_internal));
11259 status_buffer->base_offset = base_offset;
11260 status_buffer->bs_byte_count_frame_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame);
11261 status_buffer->bs_byte_count_frame_nh_offset = base_offset + offsetof(struct encoder_status, bs_byte_count_frame_nh);
11262 status_buffer->image_status_mask_offset = base_offset + offsetof(struct encoder_status, image_status_mask);
11263 status_buffer->image_status_ctrl_offset = base_offset + offsetof(struct encoder_status, image_status_ctrl);
11264 status_buffer->mfc_qp_status_count_offset = base_offset + offsetof(struct encoder_status, mfc_qp_status_count);
11265 status_buffer->media_index_offset = base_offset + offsetof(struct encoder_status, media_index);
11267 status_buffer->status_buffer_size = sizeof(struct encoder_status);
11268 status_buffer->bs_byte_count_frame_reg_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG;
11269 status_buffer->bs_byte_count_frame_nh_reg_offset = MFC_BITSTREAM_BYTECOUNT_SLICE_REG;
11270 status_buffer->image_status_mask_reg_offset = MFC_IMAGE_STATUS_MASK_REG;
11271 status_buffer->image_status_ctrl_reg_offset = MFC_IMAGE_STATUS_CTRL_REG;
11272 status_buffer->mfc_qp_status_count_reg_offset = MFC_QP_STATUS_COUNT_REG;
11274 if (IS_GEN8(i965->intel.device_info)) {
11275 gen8_avc_kernel_init(ctx, encoder_context);
11277 gen9_avc_kernel_init(ctx, encoder_context);
11279 encoder_context->vme_context = vme_context;
11280 /* Handling PreEnc operations separately since it gives better
11281 * code readability, avoid possible vme operations mess-up */
11282 encoder_context->vme_pipeline =
11283 !encoder_context->preenc_enabled ? gen9_avc_vme_pipeline : gen9_avc_preenc_pipeline;
11284 encoder_context->vme_context_destroy = gen9_avc_vme_context_destroy;
11288 allocate_structure_failed:
11293 free(generic_state);
11299 gen9_avc_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
11301 /* VME & PAK share the same context */
11302 struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
11307 encoder_context->mfc_context = pak_context;
11308 encoder_context->mfc_context_destroy = gen9_avc_pak_context_destroy;
11309 encoder_context->mfc_pipeline = gen9_avc_pak_pipeline;
11310 encoder_context->mfc_brc_prepare = gen9_avc_pak_brc_prepare;
11311 encoder_context->get_status = gen9_avc_get_coded_status;